aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /searchlib
Publish
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/.gitignore9
-rw-r--r--searchlib/CMakeLists.txt203
-rw-r--r--searchlib/OWNERS4
-rw-r--r--searchlib/README3
-rw-r--r--searchlib/README-gbdt1
-rw-r--r--searchlib/README-treenet1
-rw-r--r--searchlib/pom.xml98
-rw-r--r--searchlib/src/.gitignore7
-rw-r--r--searchlib/src/Doxyfile1162
-rw-r--r--searchlib/src/apps/docstore/.gitignore6
-rw-r--r--searchlib/src/apps/docstore/CMakeLists.txt33
-rw-r--r--searchlib/src/apps/docstore/benchmarkdatastore.cpp114
-rw-r--r--searchlib/src/apps/docstore/create-idx-from-dat.cpp157
-rw-r--r--searchlib/src/apps/docstore/documentstoreinspect.cpp114
-rw-r--r--searchlib/src/apps/docstore/verifylogdatastore.cpp59
-rw-r--r--searchlib/src/apps/expgolomb/.gitignore3
-rw-r--r--searchlib/src/apps/expgolomb/CMakeLists.txt9
-rw-r--r--searchlib/src/apps/expgolomb/expgolomb.cpp175
-rw-r--r--searchlib/src/apps/fileheaderinspect/.gitignore3
-rw-r--r--searchlib/src/apps/fileheaderinspect/CMakeLists.txt9
-rw-r--r--searchlib/src/apps/fileheaderinspect/fileheaderinspect.cpp223
-rw-r--r--searchlib/src/apps/loadattribute/.gitignore3
-rw-r--r--searchlib/src/apps/loadattribute/CMakeLists.txt9
-rw-r--r--searchlib/src/apps/loadattribute/loadattribute.cpp216
-rw-r--r--searchlib/src/apps/loadattribute/loadattribute.rb43
-rw-r--r--searchlib/src/apps/tests/.gitignore8
-rw-r--r--searchlib/src/apps/tests/CMakeLists.txt22
-rw-r--r--searchlib/src/apps/tests/biglogtest.cpp243
-rw-r--r--searchlib/src/apps/tests/btreestress_test.cpp224
-rw-r--r--searchlib/src/apps/tests/memoryindexstress_test.cpp537
-rw-r--r--searchlib/src/apps/uniform/.gitignore3
-rw-r--r--searchlib/src/apps/uniform/CMakeLists.txt9
-rw-r--r--searchlib/src/apps/uniform/uniform.cpp153
-rw-r--r--searchlib/src/apps/vespa-index-inspect/.gitignore3
-rw-r--r--searchlib/src/apps/vespa-index-inspect/CMakeLists.txt9
-rw-r--r--searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp965
-rw-r--r--searchlib/src/apps/vespa-ranking-expression-analyzer/.gitignore3
-rw-r--r--searchlib/src/apps/vespa-ranking-expression-analyzer/CMakeLists.txt9
-rw-r--r--searchlib/src/apps/vespa-ranking-expression-analyzer/illegal.expression1
-rw-r--r--searchlib/src/apps/vespa-ranking-expression-analyzer/vespa-ranking-expression-analyzer.cpp386
-rwxr-xr-xsearchlib/src/forcelink.sh45
-rw-r--r--searchlib/src/main/OWNERS1
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/AggregationResult.java161
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/AverageAggregationResult.java157
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/CountAggregationResult.java99
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResult.java116
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/FS4Hit.java132
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/ForceLoad.java39
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/Group.java518
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/Grouping.java445
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/GroupingLevel.java184
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/Hit.java104
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/HitsAggregationResult.java218
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/MaxAggregationResult.java103
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/MinAggregationResult.java103
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/aggregation/RawData.java130
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/SumAggregationResult.java103
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/VdsHit.java91
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/XorAggregationResult.java99
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/BiasEstimator.java131
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLog.java18
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimator.java172
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/NormalSketch.java190
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/Sketch.java32
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SketchMerger.java60
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SparseSketch.java105
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/UniqueCountEstimator.java12
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/aggregation/package-info.java4
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/document/package-info.java5
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/AddFunctionNode.java23
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/AggregationRefNode.java115
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/AndFunctionNode.java22
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/ArithmeticTypeConversion.java66
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/ArrayAtLookupNode.java94
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/AttributeNode.java90
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/BitFunctionNode.java36
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/BucketResultNode.java47
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/CatFunctionNode.java42
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/ConstantNode.java82
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/DebugWaitFunctionNode.java104
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/DivideFunctionNode.java23
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentAccessorNode.java19
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentFieldNode.java116
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/ExpressionNode.java104
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionNode.java82
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNode.java118
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNodeVector.java80
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNode.java182
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNodeVector.java80
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/ForceLoad.java89
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/FunctionNode.java74
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/GetDocIdNamespaceSpecificFunctionNode.java88
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/GetYMUMChecksumFunctionNode.java60
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNode.java149
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNodeVector.java79
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNode.java149
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNodeVector.java80
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNode.java149
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNodeVector.java80
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNode.java102
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeVector.java80
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNode.java183
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNodeVector.java80
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/InterpolatedLookupNode.java94
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/MD5BitFunctionNode.java35
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/MathFunctionNode.java185
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/MaxFunctionNode.java23
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/MinFunctionNode.java23
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/ModuloFunctionNode.java23
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/MultiArgFunctionNode.java176
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/MultiplyFunctionNode.java23
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/NegateFunctionNode.java52
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/NormalizeSubjectFunctionNode.java65
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/NullResultNode.java56
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/NumElemFunctionNode.java50
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/NumericFunctionNode.java31
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/NumericResultNode.java52
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/OrFunctionNode.java22
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/PositiveInfinityResultNode.java44
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionNode.java82
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNode.java101
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNodeVector.java75
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNode.java184
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNodeVector.java80
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/RelevanceNode.java72
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNode.java82
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNodeVector.java45
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/ReverseFunctionNode.java39
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/SingleResultNode.java38
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/SortFunctionNode.java36
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/StrCatFunctionNode.java42
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/StrLenFunctionNode.java55
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNode.java114
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNodeVector.java80
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNode.java177
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNodeVector.java80
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/TimeStampFunctionNode.java116
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/ToFloatFunctionNode.java39
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/ToIntFunctionNode.java44
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/ToRawFunctionNode.java38
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/ToStringFunctionNode.java51
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/UcaFunctionNode.java84
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryBitFunctionNode.java89
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryFunctionNode.java44
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/XorBitFunctionNode.java38
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/XorFunctionNode.java22
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/ZCurveFunctionNode.java92
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/expression/package-info.java4
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/gbdt/CategoryFeatureNode.java34
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/gbdt/FeatureNode.java95
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/gbdt/GbdtConverter.java34
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/gbdt/GbdtModel.java92
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/gbdt/NumericFeatureNode.java34
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/gbdt/ResponseNode.java33
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/gbdt/TreeNode.java43
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java110
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/CaseList.java15
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Evolvable.java26
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Individual.java69
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/KeyboardChecker.java50
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Main.java73
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Population.java60
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/PrintingTracker.java91
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/RankingExpressionCaseList.java33
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Recombiner.java200
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Species.java93
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/SpeciesName.java54
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Tracker.java25
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Trainer.java57
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingEnvironment.java31
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingParameters.java100
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingSet.java122
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/CsvFileCaseList.java56
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FileCaseList.java73
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FvFileCaseList.java59
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysis.java425
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/package-info.java5
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/ranking/features/ElementCompleteness.java96
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/ranking/features/Features.java30
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/ranking/features/FieldTermMatch.java48
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Field.java60
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetrics.java536
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsComputer.java433
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsParameters.java198
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Main.java39
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Query.java72
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/QueryTerm.java67
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/SegmentStartPoint.java145
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Trace.java22
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/package-info.java12
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/ranking/features/package-info.java10
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/rankingexpression/ExpressionFunction.java139
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java140
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/rankingexpression/RankingExpression.java250
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/AbstractArrayContext.java131
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ArrayContext.java120
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/BooleanValue.java61
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Context.java107
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleCompatibleValue.java51
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleOnlyArrayContext.java96
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleValue.java158
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ExpressionOptimizer.java55
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java95
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/OptimizationReport.java63
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Optimizer.java23
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/StringValue.java108
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TensorValue.java168
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Value.java96
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/.gitignore0
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestNode.java43
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizer.java124
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTNode.java98
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizer.java184
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/test/.gitignore0
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/package-info.java10
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/mlr/.gitignore0
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/package-info.java10
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/parser/package-info.java10
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Arguments.java81
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticNode.java129
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticOperator.java62
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/BooleanNode.java11
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ComparisonNode.java62
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/CompositeNode.java27
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ConstantNode.java54
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/EmbracedNode.java57
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ExpressionNode.java51
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Function.java55
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/FunctionNode.java90
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/IfNode.java86
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NameNode.java37
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NegativeNode.java49
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java119
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SerializationContext.java116
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SetMembershipNode.java72
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorMatchNode.java59
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorSumNode.java65
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TruthOperator.java48
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/package-info.java7
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencer.java62
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ExpressionTransformer.java38
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/Simplifier.java131
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/package-info.java6
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/treenet/TreeNetConverter.java35
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/treenet/package-info.java5
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/treenet/parser/package-info.java5
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/treenet/rule/ComparisonCondition.java39
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Condition.java54
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Response.java45
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/treenet/rule/SetMembershipCondition.java57
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Tree.java110
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNet.java63
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNode.java34
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/package-info.java5
-rwxr-xr-xsearchlib/src/main/javacc/RankingExpressionParser.jj479
-rwxr-xr-xsearchlib/src/main/javacc/TreeNetParser.jj362
-rwxr-xr-xsearchlib/src/main/sh/evaluation-benchmark1
-rw-r--r--searchlib/src/main/sh/ga69
-rwxr-xr-xsearchlib/src/main/sh/gbdt-analysis1
-rwxr-xr-xsearchlib/src/main/sh/vespa-gbdt-converter63
-rwxr-xr-xsearchlib/src/main/sh/vespa-treenet-converter63
-rw-r--r--searchlib/src/test/OWNERS1
-rw-r--r--searchlib/src/test/files/features01.expression1
-rw-r--r--searchlib/src/test/files/features02.expression1
-rw-r--r--searchlib/src/test/files/features03.expression4
-rw-r--r--searchlib/src/test/files/features04.expression1
-rw-r--r--searchlib/src/test/files/gbdt.expression10
-rw-r--r--searchlib/src/test/files/gbdt.ext.xml284
-rw-r--r--searchlib/src/test/files/gbdt.xml614
-rw-r--r--searchlib/src/test/files/gbdt_empty_tree.xml46
-rw-r--r--searchlib/src/test/files/gbdt_err.xml3
-rw-r--r--searchlib/src/test/files/gbdt_set_inclusion_test.xml119
-rw-r--r--searchlib/src/test/files/gbdt_tree_response.xml35
-rw-r--r--searchlib/src/test/files/mlr/cases-illegal1.csv5
-rw-r--r--searchlib/src/test/files/mlr/cases-illegal2.csv2
-rw-r--r--searchlib/src/test/files/mlr/cases-linear.csv7
-rw-r--r--searchlib/src/test/files/mlr/cases.csv6
-rw-r--r--searchlib/src/test/files/ranking01.expression10
-rw-r--r--searchlib/src/test/files/ranking02.expression90
-rw-r--r--searchlib/src/test/files/ranking03.expression97
-rw-r--r--searchlib/src/test/files/ranking04.expression103
-rw-r--r--searchlib/src/test/files/ranking05.expression77
-rw-r--r--searchlib/src/test/files/ranking06.expression85
-rw-r--r--searchlib/src/test/files/ranking07.expression200
-rw-r--r--searchlib/src/test/files/ranking08.expression5
-rw-r--r--searchlib/src/test/files/s-expression.vre1
-rw-r--r--searchlib/src/test/files/simple.expression1
-rw-r--r--searchlib/src/test/files/testAggregatorResultsbin0 -> 310 bytes
-rw-r--r--searchlib/src/test/files/testFunctionNodesbin0 -> 1025 bytes
-rw-r--r--searchlib/src/test/files/testGroupbin0 -> 427 bytes
-rw-r--r--searchlib/src/test/files/testGroupingbin0 -> 828 bytes
-rw-r--r--searchlib/src/test/files/testGroupingLevelbin0 -> 159 bytes
-rw-r--r--searchlib/src/test/files/testHitCollectionbin0 -> 681 bytes
-rw-r--r--searchlib/src/test/files/testResultTypesbin0 -> 374 bytes
-rw-r--r--searchlib/src/test/files/testSpecialNodesbin0 -> 93 bytes
-rw-r--r--searchlib/src/test/files/treenet01.model531
-rw-r--r--searchlib/src/test/files/treenet02.model11784
-rw-r--r--searchlib/src/test/files/treenet03.model5880
-rw-r--r--searchlib/src/test/files/treenet04.model6247
-rw-r--r--searchlib/src/test/files/treenet05.model4684
-rw-r--r--searchlib/src/test/files/treenet06.model3799
-rw-r--r--searchlib/src/test/files/treenet07.model13275
-rw-r--r--searchlib/src/test/files/treenet08.model227
-rwxr-xr-xsearchlib/src/test/java/com/yahoo/searchlib/aggregation/AggregationTestCase.java346
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResultTest.java82
-rwxr-xr-xsearchlib/src/test/java/com/yahoo/searchlib/aggregation/ForceLoadTestCase.java19
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupTestCase.java229
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingSerializationTest.java387
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingTestCase.java227
-rwxr-xr-xsearchlib/src/test/java/com/yahoo/searchlib/aggregation/MergeTestCase.java735
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/BiasEstimatorTest.java70
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimatorTest.java89
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogPrecisionBenchmark.java70
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/NormalSketchTest.java121
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchMergerTest.java69
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchUtils.java46
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SparseSketchTest.java62
-rwxr-xr-xsearchlib/src/test/java/com/yahoo/searchlib/expression/ExpressionTestCase.java932
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionTestCase.java21
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/expression/FloatBucketResultNodeTestCase.java44
-rwxr-xr-xsearchlib/src/test/java/com/yahoo/searchlib/expression/ForceLoadTestCase.java19
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeTestCase.java35
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerResultNodeTestCase.java118
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/expression/NullResultNodeTestCase.java36
-rwxr-xr-xsearchlib/src/test/java/com/yahoo/searchlib/expression/ObjectVisitorTestCase.java61
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionTestCase.java21
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/expression/RawBucketResultNodeTestCase.java46
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeTest.java43
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeVectorTestCase.java167
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/expression/StringBucketResultNodeTestCase.java57
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/expression/TimeStampFunctionTestCase.java29
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/expression/ZCurveFunctionTestCase.java25
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtConverterTestCase.java169
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtModelTestCase.java65
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/gbdt/ReferenceNodeTestCase.java101
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/gbdt/ResponseNodeTestCase.java40
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/gbdt/TreeNodeTestCase.java57
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/gbdt/XmlHelperTestCase.java153
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/CsvFileCaseListTestCase.java81
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/ExampleLearningSessions.java110
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MainTestCase.java57
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MockTrainingSetTestCase.java46
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/TripAdvisorFileCaseList.java99
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysisRunner.java19
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/ranking/features/ElementCompletenessTestCase.java80
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/ranking/features/FieldTermMatchTestCase.java30
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/SemanticDistanceTestCase.java140
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/OptimalStringAlignmentDistance.java201
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/TextbookLevenshteinDistance.java38
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/test/OptimalStringAlignmentTestCase.java58
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/test/FieldMatchMetricsTestCase.java757
-rwxr-xr-xsearchlib/src/test/java/com/yahoo/searchlib/rankingexpression/FeatureListTestCase.java77
-rwxr-xr-xsearchlib/src/test/java/com/yahoo/searchlib/rankingexpression/RankingExpressionTestCase.java281
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/Benchmark.java144
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationBenchmark.java474
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationTestCase.java399
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/NeuralNetEvaluationTestCase.java49
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/StreamEvaluationBenchmark.java160
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/ContextReuseTestCase.java61
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizerTestCase.java109
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizerTestCase.java105
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ArgumentsTestCase.java42
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNodeTestCase.java35
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencerTestCase.java30
-rw-r--r--searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/SimplifierTestCase.java80
-rwxr-xr-xsearchlib/src/test/java/com/yahoo/searchlib/treenet/TreeNetParserTestCase.java79
-rw-r--r--searchlib/src/testlist.txt137
-rw-r--r--searchlib/src/tests/.gitignore3
-rw-r--r--searchlib/src/tests/aggregator/.gitignore7
-rw-r--r--searchlib/src/tests/aggregator/CMakeLists.txt15
-rw-r--r--searchlib/src/tests/aggregator/DESC1
-rw-r--r--searchlib/src/tests/aggregator/FILES1
-rw-r--r--searchlib/src/tests/aggregator/attr_test.cpp285
-rw-r--r--searchlib/src/tests/aggregator/perdocexpr.cpp1693
-rw-r--r--searchlib/src/tests/alignment/.gitignore4
-rw-r--r--searchlib/src/tests/alignment/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/alignment/DESC1
-rw-r--r--searchlib/src/tests/alignment/FILES1
-rw-r--r--searchlib/src/tests/alignment/alignment.cpp68
-rw-r--r--searchlib/src/tests/attribute/.gitignore11
-rw-r--r--searchlib/src/tests/attribute/CMakeLists.txt29
-rw-r--r--searchlib/src/tests/attribute/DESC1
-rw-r--r--searchlib/src/tests/attribute/FILES2
-rw-r--r--searchlib/src/tests/attribute/attribute_test.cpp2200
-rw-r--r--searchlib/src/tests/attribute/attribute_test.sh7
-rw-r--r--searchlib/src/tests/attribute/attributebenchmark.cpp678
-rw-r--r--searchlib/src/tests/attribute/attributebenchmark.rb22
-rw-r--r--searchlib/src/tests/attribute/attributefilewriter/.gitignore1
-rw-r--r--searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp116
-rw-r--r--searchlib/src/tests/attribute/attributeguard.cpp32
-rw-r--r--searchlib/src/tests/attribute/attributeguard_test.sh7
-rw-r--r--searchlib/src/tests/attribute/attributemanager/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/attributemanager/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp422
-rw-r--r--searchlib/src/tests/attribute/attributesearcher.h265
-rw-r--r--searchlib/src/tests/attribute/attributeupdater.h299
-rw-r--r--searchlib/src/tests/attribute/benchmarkplotter.rb134
-rw-r--r--searchlib/src/tests/attribute/bitvector/.gitignore1
-rw-r--r--searchlib/src/tests/attribute/bitvector/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/attribute/bitvector/bitvector_test.cpp632
-rw-r--r--searchlib/src/tests/attribute/changevector_test.cpp92
-rw-r--r--searchlib/src/tests/attribute/changevector_test.sh7
-rw-r--r--searchlib/src/tests/attribute/comparator/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/comparator/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/comparator/DESC1
-rw-r--r--searchlib/src/tests/attribute/comparator/FILES1
-rw-r--r--searchlib/src/tests/attribute/comparator/comparator_test.cpp169
-rw-r--r--searchlib/src/tests/attribute/document_weight_iterator/.gitignore1
-rw-r--r--searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/attribute/document_weight_iterator/FILES1
-rw-r--r--searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp189
-rw-r--r--searchlib/src/tests/attribute/enumeratedsave/.gitignore127
-rw-r--r--searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp944
-rw-r--r--searchlib/src/tests/attribute/enumstore/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/enumstore/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/enumstore/DESC1
-rw-r--r--searchlib/src/tests/attribute/enumstore/FILES1
-rw-r--r--searchlib/src/tests/attribute/enumstore/enumstore_test.cpp879
-rw-r--r--searchlib/src/tests/attribute/extendattributes/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/extendattributes/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/extendattributes/DESC1
-rw-r--r--searchlib/src/tests/attribute/extendattributes/FILES1
-rw-r--r--searchlib/src/tests/attribute/extendattributes/extendattribute.cpp176
-rwxr-xr-xsearchlib/src/tests/attribute/extendattributes/extendattribute_test.sh3
-rw-r--r--searchlib/src/tests/attribute/gidmapattribute/.gitignore0
-rw-r--r--searchlib/src/tests/attribute/multivaluemapping/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/multivaluemapping/DESC1
-rw-r--r--searchlib/src/tests/attribute/multivaluemapping/FILES1
-rw-r--r--searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp836
-rw-r--r--searchlib/src/tests/attribute/postinglist/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/postinglist/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/postinglist/DESC1
-rw-r--r--searchlib/src/tests/attribute/postinglist/FILES1
-rw-r--r--searchlib/src/tests/attribute/postinglist/postinglist.cpp707
-rw-r--r--searchlib/src/tests/attribute/postinglistattribute/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/postinglistattribute/DESC1
-rw-r--r--searchlib/src/tests/attribute/postinglistattribute/FILES1
-rw-r--r--searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp1021
-rwxr-xr-xsearchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh5
-rw-r--r--searchlib/src/tests/attribute/runnable.h43
-rw-r--r--searchlib/src/tests/attribute/searchable/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/searchable/CMakeLists.txt22
-rw-r--r--searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp689
-rwxr-xr-xsearchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh4
-rw-r--r--searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp231
-rw-r--r--searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp240
-rw-r--r--searchlib/src/tests/attribute/searchcontext/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/searchcontext/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/attribute/searchcontext/DESC1
-rw-r--r--searchlib/src/tests/attribute/searchcontext/FILES1
-rw-r--r--searchlib/src/tests/attribute/searchcontext/searchcontext.cpp1900
-rwxr-xr-xsearchlib/src/tests/attribute/searchcontext/searchcontext_test.sh5
-rw-r--r--searchlib/src/tests/attribute/sourceselector/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/sourceselector/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/sourceselector/DESC1
-rw-r--r--searchlib/src/tests/attribute/sourceselector/FILES1
-rw-r--r--searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp216
-rw-r--r--searchlib/src/tests/attribute/stringattribute/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/stringattribute/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/stringattribute/DESC1
-rw-r--r--searchlib/src/tests/attribute/stringattribute/FILES1
-rw-r--r--searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp453
-rwxr-xr-xsearchlib/src/tests/attribute/stringattribute/stringattribute_test.sh3
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/.gitignore1
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/DESC1
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/FILES1
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp217
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh3
-rw-r--r--searchlib/src/tests/bitcompression/expgolomb/.gitignore1
-rw-r--r--searchlib/src/tests/bitcompression/expgolomb/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/bitcompression/expgolomb/DESC1
-rw-r--r--searchlib/src/tests/bitcompression/expgolomb/FILES1
-rw-r--r--searchlib/src/tests/bitcompression/expgolomb/expgolomb_test.cpp621
-rw-r--r--searchlib/src/tests/bitvector/.gitignore4
-rw-r--r--searchlib/src/tests/bitvector/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/bitvector/DESC1
-rw-r--r--searchlib/src/tests/bitvector/FILES1
-rw-r--r--searchlib/src/tests/bitvector/bitvectorbenchmark.cpp225
-rw-r--r--searchlib/src/tests/btree/.gitignore3
-rw-r--r--searchlib/src/tests/btree/CMakeLists.txt15
-rw-r--r--searchlib/src/tests/btree/DESC1
-rw-r--r--searchlib/src/tests/btree/FILES1
-rw-r--r--searchlib/src/tests/btree/btreeaggregation_test.cpp1146
-rw-r--r--searchlib/src/tests/btree/iteratespeed.cpp213
-rw-r--r--searchlib/src/tests/bytecomplens/.gitignore5
-rw-r--r--searchlib/src/tests/bytecomplens/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/bytecomplens/DESC1
-rw-r--r--searchlib/src/tests/bytecomplens/FILES1
-rw-r--r--searchlib/src/tests/bytecomplens/bytecomp.cpp102
-rw-r--r--searchlib/src/tests/bytecomplens/example.txt122
-rw-r--r--searchlib/src/tests/bytecomplens/tblprint.cpp357
-rw-r--r--searchlib/src/tests/common/bitvector/.gitignore8
-rw-r--r--searchlib/src/tests/common/bitvector/CMakeLists.txt22
-rw-r--r--searchlib/src/tests/common/bitvector/DESC1
-rw-r--r--searchlib/src/tests/common/bitvector/FILES1
-rw-r--r--searchlib/src/tests/common/bitvector/bitvector_benchmark.cpp37
-rw-r--r--searchlib/src/tests/common/bitvector/bitvector_test.cpp541
-rw-r--r--searchlib/src/tests/common/bitvector/condensedbitvector_test.cpp49
-rw-r--r--searchlib/src/tests/common/foregroundtaskexecutor/.gitignore1
-rw-r--r--searchlib/src/tests/common/foregroundtaskexecutor/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/common/foregroundtaskexecutor/DESC1
-rw-r--r--searchlib/src/tests/common/foregroundtaskexecutor/FILES1
-rw-r--r--searchlib/src/tests/common/foregroundtaskexecutor/foregroundtaskexecutor_test.cpp124
-rw-r--r--searchlib/src/tests/common/location/.gitignore1
-rw-r--r--searchlib/src/tests/common/location/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/common/location/FILES1
-rw-r--r--searchlib/src/tests/common/location/location_test.cpp119
-rw-r--r--searchlib/src/tests/common/packets/.gitignore4
-rw-r--r--searchlib/src/tests/common/packets/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/common/packets/DESC1
-rw-r--r--searchlib/src/tests/common/packets/FILES1
-rw-r--r--searchlib/src/tests/common/packets/packets_test.cpp705
-rw-r--r--searchlib/src/tests/common/rcuvector/.gitignore4
-rw-r--r--searchlib/src/tests/common/rcuvector/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/common/rcuvector/DESC1
-rw-r--r--searchlib/src/tests/common/rcuvector/FILES1
-rw-r--r--searchlib/src/tests/common/rcuvector/rcuvector_test.cpp284
-rw-r--r--searchlib/src/tests/common/resultset/.gitignore1
-rw-r--r--searchlib/src/tests/common/resultset/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/common/resultset/resultset_test.cpp109
-rw-r--r--searchlib/src/tests/common/sequencedtaskexecutor/.gitignore1
-rw-r--r--searchlib/src/tests/common/sequencedtaskexecutor/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/common/sequencedtaskexecutor/DESC1
-rw-r--r--searchlib/src/tests/common/sequencedtaskexecutor/FILES1
-rw-r--r--searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp194
-rw-r--r--searchlib/src/tests/common/summaryfeatures/.gitignore4
-rw-r--r--searchlib/src/tests/common/summaryfeatures/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/common/summaryfeatures/DESC1
-rw-r--r--searchlib/src/tests/common/summaryfeatures/FILES1
-rw-r--r--searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp152
-rwxr-xr-xsearchlib/src/tests/create-test.sh52
-rw-r--r--searchlib/src/tests/datastore/.gitignore8
-rw-r--r--searchlib/src/tests/datastore/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/datastore/DESC1
-rw-r--r--searchlib/src/tests/datastore/FILES1
-rw-r--r--searchlib/src/tests/datastore/bad.datbin0 -> 4096 bytes
-rw-r--r--searchlib/src/tests/datastore/bug-7257706/1422358701368384000.datbin0 -> 94208 bytes
-rw-r--r--searchlib/src/tests/datastore/bug-7257706/1422358701368384000.idxbin0 -> 4384 bytes
-rw-r--r--searchlib/src/tests/datastore/dangling/1425506005745465000.datbin0 -> 4096 bytes
-rw-r--r--searchlib/src/tests/datastore/dangling/1425506005745465000.idxbin0 -> 480 bytes
-rw-r--r--searchlib/src/tests/datastore/dangling/2425506005745465000.datbin0 -> 4096 bytes
-rw-r--r--searchlib/src/tests/datastore/dangling/2425506005745465000.idxbin0 -> 480 bytes
-rw-r--r--searchlib/src/tests/datastore/dangling/3425506005745465000.datbin0 -> 4096 bytes
-rw-r--r--searchlib/src/tests/datastore/dangling/4425506005745465000.datbin0 -> 4096 bytes
-rw-r--r--searchlib/src/tests/datastore/dangling/4425506005745465000.idx0
-rw-r--r--searchlib/src/tests/datastore/datastore.datbin0 -> 5120 bytes
-rw-r--r--searchlib/src/tests/datastore/logdatastore_test.cpp468
-rwxr-xr-xsearchlib/src/tests/datastore/logdatastore_test.sh10
-rw-r--r--searchlib/src/tests/diskindex/bitvector/.gitignore6
-rw-r--r--searchlib/src/tests/diskindex/bitvector/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/diskindex/bitvector/DESC1
-rw-r--r--searchlib/src/tests/diskindex/bitvector/FILES1
-rw-r--r--searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp221
-rw-r--r--searchlib/src/tests/diskindex/diskindex/.gitignore5
-rw-r--r--searchlib/src/tests/diskindex/diskindex/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/diskindex/diskindex/DESC1
-rw-r--r--searchlib/src/tests/diskindex/diskindex/FILES1
-rw-r--r--searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp330
-rw-r--r--searchlib/src/tests/diskindex/fieldwriter/.gitignore3
-rw-r--r--searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp972
-rwxr-xr-xsearchlib/src/tests/diskindex/fieldwriter/runtests.sh66
-rw-r--r--searchlib/src/tests/diskindex/fusion/.gitignore37
-rw-r--r--searchlib/src/tests/diskindex/fusion/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/diskindex/fusion/DESC1
-rw-r--r--searchlib/src/tests/diskindex/fusion/FILES1
-rw-r--r--searchlib/src/tests/diskindex/fusion/fusion_test.cpp506
-rwxr-xr-xsearchlib/src/tests/diskindex/fusion/fusion_test.sh15
-rw-r--r--searchlib/src/tests/diskindex/pagedict4/.gitignore5
-rw-r--r--searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp876
-rw-r--r--searchlib/src/tests/document_store/.gitignore1
-rw-r--r--searchlib/src/tests/document_store/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/document_store/FILES1
-rw-r--r--searchlib/src/tests/document_store/document_store_test.cpp58
-rw-r--r--searchlib/src/tests/document_store/visitor/.gitignore1
-rw-r--r--searchlib/src/tests/document_store/visitor/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/document_store/visitor/DESC1
-rw-r--r--searchlib/src/tests/document_store/visitor/FILES1
-rw-r--r--searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp466
-rw-r--r--searchlib/src/tests/engine/docsumapi/.gitignore4
-rw-r--r--searchlib/src/tests/engine/docsumapi/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/engine/docsumapi/DESC1
-rw-r--r--searchlib/src/tests/engine/docsumapi/FILES1
-rw-r--r--searchlib/src/tests/engine/docsumapi/docsumapi_test.cpp185
-rw-r--r--searchlib/src/tests/engine/monitorapi/.gitignore4
-rw-r--r--searchlib/src/tests/engine/monitorapi/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/engine/monitorapi/DESC1
-rw-r--r--searchlib/src/tests/engine/monitorapi/FILES1
-rw-r--r--searchlib/src/tests/engine/monitorapi/monitorapi_test.cpp126
-rw-r--r--searchlib/src/tests/engine/searchapi/.gitignore4
-rw-r--r--searchlib/src/tests/engine/searchapi/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/engine/searchapi/DESC1
-rw-r--r--searchlib/src/tests/engine/searchapi/FILES1
-rw-r--r--searchlib/src/tests/engine/searchapi/searchapi_test.cpp267
-rw-r--r--searchlib/src/tests/engine/transportserver/.gitignore5
-rw-r--r--searchlib/src/tests/engine/transportserver/CMakeLists.txt12
-rw-r--r--searchlib/src/tests/engine/transportserver/DESC1
-rw-r--r--searchlib/src/tests/engine/transportserver/FILES1
-rw-r--r--searchlib/src/tests/engine/transportserver/transportserver_test.cpp187
-rw-r--r--searchlib/src/tests/features/.gitignore11
-rw-r--r--searchlib/src/tests/features/CMakeLists.txt19
-rw-r--r--searchlib/src/tests/features/DESC1
-rw-r--r--searchlib/src/tests/features/FILES3
-rw-r--r--searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-double.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-float.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-int.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-long.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-wset.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-100-1.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-100-10.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-100-100.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-100-1000.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-100-10000.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-100-5.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-100-50.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-100-500.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-100.txt6
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1-callgrind.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100-callgrind.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1000.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10000.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-1000-5.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-1000-50.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-1000-500.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-1000.txt6
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-10000-100.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1000.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10000.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-10000-5.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-10000-50.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-10000-500.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-10000.txt6
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-20-1.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-20-10.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-20-100.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-20-1000.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-20-10000.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-20-5.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-20-50.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-20-500.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/c-20.txt6
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/phrase-02.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/phrase-10.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/phrase-50.txt7
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/plot.rb30
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/readme.txt22
-rw-r--r--searchlib/src/tests/features/benchmark/fieldmatch/run.rb17
-rw-r--r--searchlib/src/tests/features/benchmark/plotlib.rb36
-rw-r--r--searchlib/src/tests/features/benchmark/rankingexpression/c-1.txt4
-rw-r--r--searchlib/src/tests/features/benchmark/rankingexpression/c-10.txt4
-rw-r--r--searchlib/src/tests/features/benchmark/rankingexpression/c-100.txt4
-rw-r--r--searchlib/src/tests/features/benchmark/rankingexpression/c-200.txt4
-rw-r--r--searchlib/src/tests/features/benchmark/rankingexpression/c-400.txt4
-rw-r--r--searchlib/src/tests/features/benchmark/rankingexpression/c-5.txt4
-rw-r--r--searchlib/src/tests/features/benchmark/rankingexpression/c-50.txt4
-rw-r--r--searchlib/src/tests/features/benchmark/rankingexpression/c-800.txt4
-rw-r--r--searchlib/src/tests/features/benchmark/rankingexpression/plot.rb22
-rw-r--r--searchlib/src/tests/features/benchmark/rankingexpression/run.rb14
-rw-r--r--searchlib/src/tests/features/beta/.gitignore1
-rw-r--r--searchlib/src/tests/features/beta/CMakeLists.txt12
-rw-r--r--searchlib/src/tests/features/beta/beta_features.cpp726
-rw-r--r--searchlib/src/tests/features/element_completeness/.gitignore1
-rw-r--r--searchlib/src/tests/features/element_completeness/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/features/element_completeness/FILES1
-rw-r--r--searchlib/src/tests/features/element_completeness/element_completeness_test.cpp201
-rw-r--r--searchlib/src/tests/features/element_similarity_feature/.gitignore1
-rw-r--r--searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp371
-rw-r--r--searchlib/src/tests/features/euclidean_distance/.gitignore1
-rw-r--r--searchlib/src/tests/features/euclidean_distance/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/features/euclidean_distance/FILES1
-rw-r--r--searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp115
-rw-r--r--searchlib/src/tests/features/featurebenchmark.cpp657
-rw-r--r--searchlib/src/tests/features/item_raw_score/.gitignore1
-rw-r--r--searchlib/src/tests/features/item_raw_score/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/features/item_raw_score/FILES1
-rw-r--r--searchlib/src/tests/features/item_raw_score/item_raw_score_test.cpp158
-rw-r--r--searchlib/src/tests/features/native_dot_product/.gitignore1
-rw-r--r--searchlib/src/tests/features/native_dot_product/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/features/native_dot_product/FILES1
-rw-r--r--searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp191
-rw-r--r--searchlib/src/tests/features/prod_features.cpp1937
-rw-r--r--searchlib/src/tests/features/prod_features.h175
-rw-r--r--searchlib/src/tests/features/prod_features_attributematch.cpp300
-rw-r--r--searchlib/src/tests/features/prod_features_fieldmatch.cpp1079
-rw-r--r--searchlib/src/tests/features/prod_features_fieldtermmatch.cpp113
-rw-r--r--searchlib/src/tests/features/prod_features_framework.cpp174
-rwxr-xr-xsearchlib/src/tests/features/prod_features_test.sh3
-rw-r--r--searchlib/src/tests/features/ranking_expression/.gitignore1
-rw-r--r--searchlib/src/tests/features/ranking_expression/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/features/ranking_expression/ranking_expression_test.cpp90
-rw-r--r--searchlib/src/tests/features/raw_score/.gitignore1
-rw-r--r--searchlib/src/tests/features/raw_score/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/features/raw_score/FILES1
-rw-r--r--searchlib/src/tests/features/raw_score/raw_score_test.cpp151
-rw-r--r--searchlib/src/tests/features/subqueries/.gitignore1
-rw-r--r--searchlib/src/tests/features/subqueries/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/features/subqueries/subqueries_test.cpp162
-rw-r--r--searchlib/src/tests/features/tensor/.gitignore1
-rw-r--r--searchlib/src/tests/features/tensor/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/features/tensor/FILES1
-rw-r--r--searchlib/src/tests/features/tensor/tensor_test.cpp237
-rw-r--r--searchlib/src/tests/features/tensor_from_labels/.gitignore1
-rw-r--r--searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/features/tensor_from_labels/FILES1
-rw-r--r--searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp211
-rw-r--r--searchlib/src/tests/features/tensor_from_weighted_set/.gitignore1
-rw-r--r--searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/features/tensor_from_weighted_set/FILES1
-rw-r--r--searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp198
-rw-r--r--searchlib/src/tests/features/text_similarity_feature/.gitignore1
-rw-r--r--searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/features/text_similarity_feature/FILES1
-rw-r--r--searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp245
-rw-r--r--searchlib/src/tests/features/util/.gitignore1
-rw-r--r--searchlib/src/tests/features/util/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/features/util/FILES1
-rw-r--r--searchlib/src/tests/features/util/util_test.cpp40
-rw-r--r--searchlib/src/tests/fef/.gitignore4
-rw-r--r--searchlib/src/tests/fef/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/fef/DESC1
-rw-r--r--searchlib/src/tests/fef/FILES1
-rw-r--r--searchlib/src/tests/fef/attributecontent/.gitignore4
-rw-r--r--searchlib/src/tests/fef/attributecontent/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/fef/attributecontent/DESC1
-rw-r--r--searchlib/src/tests/fef/attributecontent/FILES1
-rw-r--r--searchlib/src/tests/fef/attributecontent/attributecontent_test.cpp106
-rw-r--r--searchlib/src/tests/fef/featurenamebuilder/.gitignore4
-rw-r--r--searchlib/src/tests/fef/featurenamebuilder/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/fef/featurenamebuilder/DESC1
-rw-r--r--searchlib/src/tests/fef/featurenamebuilder/FILES1
-rw-r--r--searchlib/src/tests/fef/featurenamebuilder/featurenamebuilder_test.cpp78
-rw-r--r--searchlib/src/tests/fef/featurenameparser/.gitignore4
-rw-r--r--searchlib/src/tests/fef/featurenameparser/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/fef/featurenameparser/DESC1
-rw-r--r--searchlib/src/tests/fef/featurenameparser/FILES1
-rw-r--r--searchlib/src/tests/fef/featurenameparser/featurenameparser_test.cpp151
-rw-r--r--searchlib/src/tests/fef/featurenameparser/parsetest.txt55
-rw-r--r--searchlib/src/tests/fef/featureoverride/.gitignore4
-rw-r--r--searchlib/src/tests/fef/featureoverride/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/fef/featureoverride/DESC1
-rw-r--r--searchlib/src/tests/fef/featureoverride/FILES1
-rw-r--r--searchlib/src/tests/fef/featureoverride/featureoverride.cpp175
-rw-r--r--searchlib/src/tests/fef/fef_test.cpp116
-rw-r--r--searchlib/src/tests/fef/object_passing/.gitignore1
-rw-r--r--searchlib/src/tests/fef/object_passing/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/fef/object_passing/object_passing_test.cpp128
-rw-r--r--searchlib/src/tests/fef/parameter/.gitignore4
-rw-r--r--searchlib/src/tests/fef/parameter/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/fef/parameter/DESC1
-rw-r--r--searchlib/src/tests/fef/parameter/FILES1
-rw-r--r--searchlib/src/tests/fef/parameter/parameter_test.cpp267
-rw-r--r--searchlib/src/tests/fef/phrasesplitter/.gitignore6
-rw-r--r--searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt15
-rw-r--r--searchlib/src/tests/fef/phrasesplitter/DESC1
-rw-r--r--searchlib/src/tests/fef/phrasesplitter/FILES1
-rw-r--r--searchlib/src/tests/fef/phrasesplitter/benchmark.cpp84
-rw-r--r--searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp242
-rw-r--r--searchlib/src/tests/fef/properties/.gitignore4
-rw-r--r--searchlib/src/tests/fef/properties/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/fef/properties/DESC1
-rw-r--r--searchlib/src/tests/fef/properties/FILES1
-rw-r--r--searchlib/src/tests/fef/properties/properties_test.cpp425
-rw-r--r--searchlib/src/tests/fef/rank_program/.gitignore1
-rw-r--r--searchlib/src/tests/fef/rank_program/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/fef/rank_program/FILES1
-rw-r--r--searchlib/src/tests/fef/rank_program/rank_program_test.cpp172
-rw-r--r--searchlib/src/tests/fef/resolver/.gitignore4
-rw-r--r--searchlib/src/tests/fef/resolver/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/fef/resolver/DESC1
-rw-r--r--searchlib/src/tests/fef/resolver/FILES1
-rw-r--r--searchlib/src/tests/fef/resolver/resolver_test.cpp93
-rw-r--r--searchlib/src/tests/fef/table/.gitignore4
-rw-r--r--searchlib/src/tests/fef/table/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/fef/table/DESC1
-rw-r--r--searchlib/src/tests/fef/table/FILES1
-rw-r--r--searchlib/src/tests/fef/table/table_test.cpp159
-rw-r--r--searchlib/src/tests/fef/table/tables1/a3
-rw-r--r--searchlib/src/tests/fef/table/tables2/a3
-rw-r--r--searchlib/src/tests/fef/table/tables2/b3
-rw-r--r--searchlib/src/tests/fef/termfieldmodel/.gitignore4
-rw-r--r--searchlib/src/tests/fef/termfieldmodel/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/fef/termfieldmodel/DESC1
-rw-r--r--searchlib/src/tests/fef/termfieldmodel/FILES1
-rw-r--r--searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp209
-rw-r--r--searchlib/src/tests/fef/termmatchdatamerger/.gitignore4
-rw-r--r--searchlib/src/tests/fef/termmatchdatamerger/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/fef/termmatchdatamerger/DESC1
-rw-r--r--searchlib/src/tests/fef/termmatchdatamerger/FILES1
-rw-r--r--searchlib/src/tests/fef/termmatchdatamerger/termmatchdatamerger_test.cpp281
-rw-r--r--searchlib/src/tests/fileheaderinspect/.gitignore6
-rw-r--r--searchlib/src/tests/fileheaderinspect/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/fileheaderinspect/DESC1
-rw-r--r--searchlib/src/tests/fileheaderinspect/FILES1
-rw-r--r--searchlib/src/tests/fileheaderinspect/fileheaderinspect.cpp131
-rw-r--r--searchlib/src/tests/fileheadertk/.gitignore6
-rw-r--r--searchlib/src/tests/fileheadertk/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/fileheadertk/DESC1
-rw-r--r--searchlib/src/tests/fileheadertk/FILES1
-rw-r--r--searchlib/src/tests/fileheadertk/fileheadertk_test.cpp47
-rw-r--r--searchlib/src/tests/forcelink/.gitignore4
-rw-r--r--searchlib/src/tests/forcelink/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/forcelink/DESC1
-rw-r--r--searchlib/src/tests/forcelink/FILES1
-rw-r--r--searchlib/src/tests/forcelink/forcelink.cpp18
-rw-r--r--searchlib/src/tests/grouping/.gitignore11
-rw-r--r--searchlib/src/tests/grouping/CMakeLists.txt29
-rw-r--r--searchlib/src/tests/grouping/DESC1
-rw-r--r--searchlib/src/tests/grouping/FILES4
-rw-r--r--searchlib/src/tests/grouping/grouping_serialization_test.cpp339
-rw-r--r--searchlib/src/tests/grouping/grouping_test.cpp1912
-rw-r--r--searchlib/src/tests/grouping/hyperloglog_test.cpp92
-rw-r--r--searchlib/src/tests/grouping/sketch_test.cpp151
-rw-r--r--searchlib/src/tests/groupingengine/.gitignore7
-rw-r--r--searchlib/src/tests/groupingengine/CMakeLists.txt15
-rw-r--r--searchlib/src/tests/groupingengine/DESC1
-rw-r--r--searchlib/src/tests/groupingengine/FILES4
-rw-r--r--searchlib/src/tests/groupingengine/groupingengine_benchmark.cpp292
-rw-r--r--searchlib/src/tests/groupingengine/groupingengine_test.cpp1985
-rw-r--r--searchlib/src/tests/hitcollector/.gitignore4
-rw-r--r--searchlib/src/tests/hitcollector/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/hitcollector/DESC1
-rw-r--r--searchlib/src/tests/hitcollector/FILES1
-rw-r--r--searchlib/src/tests/hitcollector/hitcollector_test.cpp493
-rw-r--r--searchlib/src/tests/index/docbuilder/.gitignore5
-rw-r--r--searchlib/src/tests/index/docbuilder/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/index/docbuilder/DESC1
-rw-r--r--searchlib/src/tests/index/docbuilder/FILES1
-rw-r--r--searchlib/src/tests/index/docbuilder/docbuilder_test.cpp531
-rw-r--r--searchlib/src/tests/index/doctypebuilder/.gitignore5
-rw-r--r--searchlib/src/tests/index/doctypebuilder/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/index/doctypebuilder/DESC1
-rw-r--r--searchlib/src/tests/index/doctypebuilder/FILES1
-rw-r--r--searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp88
-rw-r--r--searchlib/src/tests/indexmetainfo/.gitignore5
-rw-r--r--searchlib/src/tests/indexmetainfo/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/indexmetainfo/DESC2
-rw-r--r--searchlib/src/tests/indexmetainfo/FILES1
-rw-r--r--searchlib/src/tests/indexmetainfo/bogus1.txt1
-rw-r--r--searchlib/src/tests/indexmetainfo/bogus10.txt4
-rw-r--r--searchlib/src/tests/indexmetainfo/bogus2.txt1
-rw-r--r--searchlib/src/tests/indexmetainfo/bogus3.txt1
-rw-r--r--searchlib/src/tests/indexmetainfo/bogus4.txt1
-rw-r--r--searchlib/src/tests/indexmetainfo/bogus5.txt7
-rw-r--r--searchlib/src/tests/indexmetainfo/bogus6.txt7
-rw-r--r--searchlib/src/tests/indexmetainfo/bogus7.txt4
-rw-r--r--searchlib/src/tests/indexmetainfo/bogus8.txt4
-rw-r--r--searchlib/src/tests/indexmetainfo/bogus9.txt4
-rw-r--r--searchlib/src/tests/indexmetainfo/indexmetainfo_test.cpp127
-rw-r--r--searchlib/src/tests/indexmetainfo/meta-info.txt12
-rw-r--r--searchlib/src/tests/ld-library-path/.gitignore4
-rw-r--r--searchlib/src/tests/ld-library-path/CMakeLists.txt7
-rw-r--r--searchlib/src/tests/ld-library-path/ld-library-path.cpp12
-rw-r--r--searchlib/src/tests/memoryindex/btree/.gitignore6
-rw-r--r--searchlib/src/tests/memoryindex/btree/CMakeLists.txt15
-rw-r--r--searchlib/src/tests/memoryindex/btree/DESC1
-rw-r--r--searchlib/src/tests/memoryindex/btree/FILES1
-rw-r--r--searchlib/src/tests/memoryindex/btree/btree_test.cpp1282
-rw-r--r--searchlib/src/tests/memoryindex/btree/frozenbtree_test.cpp513
-rw-r--r--searchlib/src/tests/memoryindex/compact_document_words_store/.gitignore1
-rw-r--r--searchlib/src/tests/memoryindex/compact_document_words_store/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/memoryindex/compact_document_words_store/DESC1
-rw-r--r--searchlib/src/tests/memoryindex/compact_document_words_store/FILES1
-rw-r--r--searchlib/src/tests/memoryindex/compact_document_words_store/compact_document_words_store_test.cpp157
-rw-r--r--searchlib/src/tests/memoryindex/datastore/.gitignore8
-rw-r--r--searchlib/src/tests/memoryindex/datastore/CMakeLists.txt22
-rw-r--r--searchlib/src/tests/memoryindex/datastore/DESC1
-rw-r--r--searchlib/src/tests/memoryindex/datastore/FILES2
-rw-r--r--searchlib/src/tests/memoryindex/datastore/datastore_test.cpp432
-rw-r--r--searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp245
-rw-r--r--searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp104
-rw-r--r--searchlib/src/tests/memoryindex/dictionary/.gitignore6
-rw-r--r--searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/memoryindex/dictionary/DESC1
-rw-r--r--searchlib/src/tests/memoryindex/dictionary/FILES1
-rw-r--r--searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp1528
-rw-r--r--searchlib/src/tests/memoryindex/document_remover/.gitignore1
-rw-r--r--searchlib/src/tests/memoryindex/document_remover/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/memoryindex/document_remover/DESC1
-rw-r--r--searchlib/src/tests/memoryindex/document_remover/FILES1
-rw-r--r--searchlib/src/tests/memoryindex/document_remover/document_remover_test.cpp144
-rw-r--r--searchlib/src/tests/memoryindex/documentinverter/.gitignore1
-rw-r--r--searchlib/src/tests/memoryindex/documentinverter/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/memoryindex/documentinverter/DESC1
-rw-r--r--searchlib/src/tests/memoryindex/documentinverter/FILES1
-rw-r--r--searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp294
-rw-r--r--searchlib/src/tests/memoryindex/fieldinverter/.gitignore1
-rw-r--r--searchlib/src/tests/memoryindex/fieldinverter/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/memoryindex/fieldinverter/DESC1
-rw-r--r--searchlib/src/tests/memoryindex/fieldinverter/FILES1
-rw-r--r--searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp338
-rw-r--r--searchlib/src/tests/memoryindex/memoryindex/.gitignore5
-rw-r--r--searchlib/src/tests/memoryindex/memoryindex/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/memoryindex/memoryindex/DESC1
-rw-r--r--searchlib/src/tests/memoryindex/memoryindex/FILES1
-rw-r--r--searchlib/src/tests/memoryindex/memoryindex/memoryindex_test.cpp438
-rw-r--r--searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore1
-rw-r--r--searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/memoryindex/urlfieldinverter/DESC1
-rw-r--r--searchlib/src/tests/memoryindex/urlfieldinverter/FILES1
-rw-r--r--searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp579
-rw-r--r--searchlib/src/tests/memorytub/.gitignore4
-rw-r--r--searchlib/src/tests/memorytub/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/memorytub/memorytub_test.cpp205
-rw-r--r--searchlib/src/tests/nativerank/.gitignore2
-rw-r--r--searchlib/src/tests/nativerank/CMakeLists.txt12
-rw-r--r--searchlib/src/tests/nativerank/nativerank.cpp828
-rw-r--r--searchlib/src/tests/nearsearch/.gitignore4
-rw-r--r--searchlib/src/tests/nearsearch/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/nearsearch/DESC1
-rw-r--r--searchlib/src/tests/nearsearch/FILES1
-rw-r--r--searchlib/src/tests/nearsearch/nearsearch_test.cpp247
-rw-r--r--searchlib/src/tests/postinglistbm/.gitignore10
-rw-r--r--searchlib/src/tests/postinglistbm/CMakeLists.txt10
-rw-r--r--searchlib/src/tests/postinglistbm/andstress.cpp536
-rw-r--r--searchlib/src/tests/postinglistbm/andstress.h43
-rw-r--r--searchlib/src/tests/postinglistbm/postinglistbm.cpp491
-rw-r--r--searchlib/src/tests/postinglistbm/skip.txt75
-rw-r--r--searchlib/src/tests/predicate/.gitignore13
-rw-r--r--searchlib/src/tests/predicate/CMakeLists.txt92
-rw-r--r--searchlib/src/tests/predicate/OWNERS1
-rw-r--r--searchlib/src/tests/predicate/document_features_store_test.cpp225
-rw-r--r--searchlib/src/tests/predicate/predicate_bounds_posting_list_test.cpp107
-rw-r--r--searchlib/src/tests/predicate/predicate_index_test.cpp363
-rw-r--r--searchlib/src/tests/predicate/predicate_interval_posting_list_test.cpp80
-rw-r--r--searchlib/src/tests/predicate/predicate_interval_store_test.cpp152
-rw-r--r--searchlib/src/tests/predicate/predicate_range_term_expander_test.cpp332
-rw-r--r--searchlib/src/tests/predicate/predicate_ref_cache_test.cpp106
-rw-r--r--searchlib/src/tests/predicate/predicate_tree_analyzer_test.cpp157
-rw-r--r--searchlib/src/tests/predicate/predicate_tree_annotator_test.cpp381
-rw-r--r--searchlib/src/tests/predicate/predicate_zero_constraint_posting_list_test.cpp58
-rw-r--r--searchlib/src/tests/predicate/predicate_zstar_compressed_posting_list_test.cpp95
-rw-r--r--searchlib/src/tests/predicate/simple_index_test.cpp333
-rw-r--r--searchlib/src/tests/predicate/tree_crumbs_test.cpp65
-rw-r--r--searchlib/src/tests/prettyfloat/.gitignore4
-rw-r--r--searchlib/src/tests/prettyfloat/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/prettyfloat/DESC1
-rw-r--r--searchlib/src/tests/prettyfloat/FILES1
-rw-r--r--searchlib/src/tests/prettyfloat/prettyfloat.cpp32
-rw-r--r--searchlib/src/tests/query/.gitignore10
-rw-r--r--searchlib/src/tests/query/CMakeLists.txt50
-rw-r--r--searchlib/src/tests/query/DESC1
-rw-r--r--searchlib/src/tests/query/FILES2
-rw-r--r--searchlib/src/tests/query/customtypevisitor_test.cpp157
-rw-r--r--searchlib/src/tests/query/query-old-large.cpp51
-rw-r--r--searchlib/src/tests/query/query-old.cpp650
-rw-r--r--searchlib/src/tests/query/query_visitor_test.cpp114
-rw-r--r--searchlib/src/tests/query/querybuilder_test.cpp615
-rw-r--r--searchlib/src/tests/query/stackdumpquerycreator_test.cpp116
-rw-r--r--searchlib/src/tests/query/templatetermvisitor_test.cpp87
-rw-r--r--searchlib/src/tests/queryeval/.gitignore5
-rw-r--r--searchlib/src/tests/queryeval/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/queryeval/DESC1
-rw-r--r--searchlib/src/tests/queryeval/FILES1
-rw-r--r--searchlib/src/tests/queryeval/blueprint/.cvsignore3
-rw-r--r--searchlib/src/tests/queryeval/blueprint/.gitignore8
-rw-r--r--searchlib/src/tests/queryeval/blueprint/CMakeLists.txt23
-rw-r--r--searchlib/src/tests/queryeval/blueprint/DESC1
-rw-r--r--searchlib/src/tests/queryeval/blueprint/FILES1
-rw-r--r--searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp766
-rw-r--r--searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp1332
-rw-r--r--searchlib/src/tests/queryeval/blueprint/leaf_blueprints_test.cpp125
-rw-r--r--searchlib/src/tests/queryeval/blueprint/mysearch.h155
-rw-r--r--searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.cvsignore3
-rw-r--r--searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.gitignore4
-rw-r--r--searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/DESC1
-rw-r--r--searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/FILES1
-rw-r--r--searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/booleanmatchiteratorwrapper_test.cpp133
-rw-r--r--searchlib/src/tests/queryeval/dot_product/.gitignore1
-rw-r--r--searchlib/src/tests/queryeval/dot_product/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/queryeval/dot_product/FILES1
-rw-r--r--searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp219
-rw-r--r--searchlib/src/tests/queryeval/equiv/.cvsignore3
-rw-r--r--searchlib/src/tests/queryeval/equiv/.gitignore4
-rw-r--r--searchlib/src/tests/queryeval/equiv/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/queryeval/equiv/DESC1
-rw-r--r--searchlib/src/tests/queryeval/equiv/FILES1
-rw-r--r--searchlib/src/tests/queryeval/equiv/equiv_test.cpp130
-rw-r--r--searchlib/src/tests/queryeval/fake_searchable/.cvsignore3
-rw-r--r--searchlib/src/tests/queryeval/fake_searchable/.gitignore4
-rw-r--r--searchlib/src/tests/queryeval/fake_searchable/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/queryeval/fake_searchable/DESC1
-rw-r--r--searchlib/src/tests/queryeval/fake_searchable/FILES1
-rw-r--r--searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp379
-rw-r--r--searchlib/src/tests/queryeval/getnodeweight/.gitignore1
-rw-r--r--searchlib/src/tests/queryeval/getnodeweight/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp49
-rw-r--r--searchlib/src/tests/queryeval/monitoring_search_iterator/.gitignore1
-rw-r--r--searchlib/src/tests/queryeval/monitoring_search_iterator/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/queryeval/monitoring_search_iterator/DESC1
-rw-r--r--searchlib/src/tests/queryeval/monitoring_search_iterator/FILES1
-rw-r--r--searchlib/src/tests/queryeval/monitoring_search_iterator/monitoring_search_iterator_test.cpp325
-rw-r--r--searchlib/src/tests/queryeval/multibitvectoriterator/.gitignore2
-rw-r--r--searchlib/src/tests/queryeval/multibitvectoriterator/CMakeLists.txt15
-rw-r--r--searchlib/src/tests/queryeval/multibitvectoriterator/DESC1
-rw-r--r--searchlib/src/tests/queryeval/multibitvectoriterator/FILES2
-rw-r--r--searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_bench.cpp138
-rw-r--r--searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_test.cpp531
-rw-r--r--searchlib/src/tests/queryeval/parallel_weak_and/.gitignore1
-rw-r--r--searchlib/src/tests/queryeval/parallel_weak_and/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/queryeval/parallel_weak_and/DESC1
-rw-r--r--searchlib/src/tests/queryeval/parallel_weak_and/FILES2
-rw-r--r--searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp681
-rw-r--r--searchlib/src/tests/queryeval/predicate/.gitignore2
-rw-r--r--searchlib/src/tests/queryeval/predicate/CMakeLists.txt15
-rw-r--r--searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp241
-rw-r--r--searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp370
-rw-r--r--searchlib/src/tests/queryeval/queryeval.cpp691
-rw-r--r--searchlib/src/tests/queryeval/simple_phrase/.cvsignore3
-rw-r--r--searchlib/src/tests/queryeval/simple_phrase/.gitignore4
-rw-r--r--searchlib/src/tests/queryeval/simple_phrase/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/queryeval/simple_phrase/DESC1
-rw-r--r--searchlib/src/tests/queryeval/simple_phrase/FILES1
-rw-r--r--searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp341
-rw-r--r--searchlib/src/tests/queryeval/sourceblender/.gitignore4
-rw-r--r--searchlib/src/tests/queryeval/sourceblender/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/queryeval/sourceblender/DESC1
-rw-r--r--searchlib/src/tests/queryeval/sourceblender/FILES1
-rw-r--r--searchlib/src/tests/queryeval/sourceblender/sourceblender.cpp169
-rw-r--r--searchlib/src/tests/queryeval/sparse_vector_benchmark/.gitignore6
-rw-r--r--searchlib/src/tests/queryeval/sparse_vector_benchmark/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/queryeval/sparse_vector_benchmark/FILES1
-rw-r--r--searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp429
-rw-r--r--searchlib/src/tests/queryeval/termwise_eval/.gitignore1
-rw-r--r--searchlib/src/tests/queryeval/termwise_eval/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp641
-rw-r--r--searchlib/src/tests/queryeval/weak_and/.gitignore7
-rw-r--r--searchlib/src/tests/queryeval/weak_and/CMakeLists.txt30
-rw-r--r--searchlib/src/tests/queryeval/weak_and/FILES2
-rw-r--r--searchlib/src/tests/queryeval/weak_and/parallel_weak_and_bench.cpp19
-rw-r--r--searchlib/src/tests/queryeval/weak_and/rise_wand.h132
-rw-r--r--searchlib/src/tests/queryeval/weak_and/rise_wand.hpp238
-rw-r--r--searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp248
-rw-r--r--searchlib/src/tests/queryeval/weak_and/weak_and_bench.cpp19
-rw-r--r--searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp128
-rw-r--r--searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp102
-rw-r--r--searchlib/src/tests/queryeval/weak_and_heap/.gitignore1
-rw-r--r--searchlib/src/tests/queryeval/weak_and_heap/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/queryeval/weak_and_heap/DESC1
-rw-r--r--searchlib/src/tests/queryeval/weak_and_heap/FILES1
-rw-r--r--searchlib/src/tests/queryeval/weak_and_heap/weak_and_heap_test.cpp101
-rw-r--r--searchlib/src/tests/queryeval/weak_and_scorers/.gitignore1
-rw-r--r--searchlib/src/tests/queryeval/weak_and_scorers/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/queryeval/weak_and_scorers/DESC1
-rw-r--r--searchlib/src/tests/queryeval/weak_and_scorers/FILES1
-rw-r--r--searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp67
-rw-r--r--searchlib/src/tests/queryeval/weighted_set_term/.gitignore1
-rw-r--r--searchlib/src/tests/queryeval/weighted_set_term/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/queryeval/weighted_set_term/DESC1
-rw-r--r--searchlib/src/tests/queryeval/weighted_set_term/FILES1
-rw-r--r--searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp240
-rw-r--r--searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore1
-rw-r--r--searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/rankingexpression/feature_name_extractor/FILES1
-rw-r--r--searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp79
-rw-r--r--searchlib/src/tests/rankingexpression/rankingexpressionlist160
-rw-r--r--searchlib/src/tests/ranksetup/.gitignore5
-rw-r--r--searchlib/src/tests/ranksetup/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/ranksetup/DESC1
-rw-r--r--searchlib/src/tests/ranksetup/FILES1
-rw-r--r--searchlib/src/tests/ranksetup/ranksetup_test.cpp922
-rw-r--r--searchlib/src/tests/ranksetup/verify_feature/.gitignore1
-rw-r--r--searchlib/src/tests/ranksetup/verify_feature/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/ranksetup/verify_feature/FILES1
-rw-r--r--searchlib/src/tests/ranksetup/verify_feature/verify_feature_test.cpp58
-rw-r--r--searchlib/src/tests/sha1/.gitignore0
-rw-r--r--searchlib/src/tests/sort/.gitignore8
-rw-r--r--searchlib/src/tests/sort/CMakeLists.txt22
-rw-r--r--searchlib/src/tests/sort/DESC1
-rw-r--r--searchlib/src/tests/sort/FILES1
-rw-r--r--searchlib/src/tests/sort/javaorder.zh158
-rw-r--r--searchlib/src/tests/sort/sort_test.cpp295
-rw-r--r--searchlib/src/tests/sort/sortbenchmark.cpp115
-rw-r--r--searchlib/src/tests/sort/uca.cpp121
-rw-r--r--searchlib/src/tests/sortresults/.gitignore7
-rw-r--r--searchlib/src/tests/sortresults/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/sortresults/sorttest.cpp99
-rw-r--r--searchlib/src/tests/sortspec/.gitignore4
-rw-r--r--searchlib/src/tests/sortspec/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/sortspec/multilevelsort.cpp413
-rw-r--r--searchlib/src/tests/stackdumpiterator/.gitignore7
-rw-r--r--searchlib/src/tests/stackdumpiterator/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.cpp316
-rw-r--r--searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.h17
-rw-r--r--searchlib/src/tests/stackdumpiterator/testowner.ATS1
-rw-r--r--searchlib/src/tests/stringenum/.gitignore8
-rw-r--r--searchlib/src/tests/stringenum/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/stringenum/stringenum_test.cpp147
-rw-r--r--searchlib/src/tests/transactionlog/.gitignore7
-rw-r--r--searchlib/src/tests/transactionlog/CMakeLists.txt15
-rw-r--r--searchlib/src/tests/transactionlog/DESC1
-rw-r--r--searchlib/src/tests/transactionlog/FILES2
-rw-r--r--searchlib/src/tests/transactionlog/translogclient_test.cpp926
-rwxr-xr-xsearchlib/src/tests/transactionlog/translogclient_test.sh4
-rw-r--r--searchlib/src/tests/transactionlog/translogserver_test.cpp19
-rw-r--r--searchlib/src/tests/transactionlogstress/.gitignore4
-rw-r--r--searchlib/src/tests/transactionlogstress/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/transactionlogstress/DESC1
-rw-r--r--searchlib/src/tests/transactionlogstress/FILES1
-rw-r--r--searchlib/src/tests/transactionlogstress/translogstress.cpp875
-rw-r--r--searchlib/src/tests/true/.gitignore4
-rw-r--r--searchlib/src/tests/true/CMakeLists.txt7
-rw-r--r--searchlib/src/tests/true/DESC1
-rw-r--r--searchlib/src/tests/true/FILES1
-rw-r--r--searchlib/src/tests/true/true.cpp15
-rw-r--r--searchlib/src/tests/url/.gitignore7
-rw-r--r--searchlib/src/tests/url/CMakeLists.txt8
-rwxr-xr-xsearchlib/src/tests/url/dotest.sh13
-rw-r--r--searchlib/src/tests/url/testurl.cpp750
-rw-r--r--searchlib/src/tests/util/.gitignore4
-rw-r--r--searchlib/src/tests/util/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/util/bufferwriter/.gitignore3
-rw-r--r--searchlib/src/tests/util/bufferwriter/CMakeLists.txt16
-rw-r--r--searchlib/src/tests/util/bufferwriter/bm.cpp95
-rw-r--r--searchlib/src/tests/util/bufferwriter/bufferwriter_test.cpp158
-rw-r--r--searchlib/src/tests/util/bufferwriter/work.cpp93
-rw-r--r--searchlib/src/tests/util/bufferwriter/work.h19
-rw-r--r--searchlib/src/tests/util/ioerrorhandler/.gitignore1
-rw-r--r--searchlib/src/tests/util/ioerrorhandler/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/util/ioerrorhandler/DESC1
-rw-r--r--searchlib/src/tests/util/ioerrorhandler/FILES1
-rw-r--r--searchlib/src/tests/util/ioerrorhandler/ioerrorhandler_test.cpp358
-rw-r--r--searchlib/src/tests/util/rawbuf_test.cpp198
-rw-r--r--searchlib/src/tests/util/searchable_stats/.gitignore4
-rw-r--r--searchlib/src/tests/util/searchable_stats/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/util/searchable_stats/DESC1
-rw-r--r--searchlib/src/tests/util/searchable_stats/FILES1
-rw-r--r--searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp42
-rw-r--r--searchlib/src/tests/util/sigbushandler/.gitignore1
-rw-r--r--searchlib/src/tests/util/sigbushandler/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/util/sigbushandler/DESC1
-rw-r--r--searchlib/src/tests/util/sigbushandler/FILES1
-rw-r--r--searchlib/src/tests/util/sigbushandler/sigbushandler_test.cpp131
-rw-r--r--searchlib/src/tests/util/slime_output_raw_buf_adapter/.gitignore1
-rw-r--r--searchlib/src/tests/util/slime_output_raw_buf_adapter/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/util/slime_output_raw_buf_adapter/FILES1
-rw-r--r--searchlib/src/tests/util/slime_output_raw_buf_adapter/slime_output_raw_buf_adapter_test.cpp25
-rw-r--r--searchlib/src/tests/util/statebuf/.gitignore1
-rw-r--r--searchlib/src/tests/util/statebuf/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/util/statebuf/DESC1
-rw-r--r--searchlib/src/tests/util/statebuf/FILES1
-rw-r--r--searchlib/src/tests/util/statebuf/statebuf_test.cpp109
-rw-r--r--searchlib/src/tests/util/statefile/.gitignore1
-rw-r--r--searchlib/src/tests/util/statefile/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/util/statefile/DESC1
-rw-r--r--searchlib/src/tests/util/statefile/FILES1
-rw-r--r--searchlib/src/tests/util/statefile/statefile_test.cpp294
-rw-r--r--searchlib/src/vespa/searchlib/.gitignore4
-rw-r--r--searchlib/src/vespa/searchlib/CMakeLists.txt31
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/.gitignore6
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/CMakeLists.txt16
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/OWNERS1
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/aggregation.cpp448
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/aggregation.h22
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/aggregationresult.h116
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/averageaggregationresult.h27
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/countaggregationresult.h27
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/expressioncountaggregationresult.h36
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/forcelink.hpp29
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/fs4hit.cpp61
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/fs4hit.h39
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/group.cpp671
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/group.h201
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/grouping.cpp357
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/grouping.h93
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/groupinglevel.cpp109
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/groupinglevel.h121
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/hit.cpp46
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/hit.h34
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/hitlist.cpp152
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/hitlist.h74
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.cpp119
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.h76
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/maxaggregationresult.h24
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/minaggregationresult.h22
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/modifiers.cpp54
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/modifiers.h19
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/perdocexpression.h46
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/predicates.h47
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/rawrank.cpp51
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/rawrank.h35
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/sumaggregationresult.h24
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/vdshit.cpp45
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/vdshit.h40
-rw-r--r--searchlib/src/vespa/searchlib/aggregation/xoraggregationresult.h26
-rw-r--r--searchlib/src/vespa/searchlib/attribute/.gitignore6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/CMakeLists.txt88
-rw-r--r--searchlib/src/vespa/searchlib/attribute/OWNERS3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/address_space.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/attribute/address_space.h36
-rw-r--r--searchlib/src/vespa/searchlib/attribute/address_space_usage.cpp32
-rw-r--r--searchlib/src/vespa/searchlib/attribute/address_space_usage.h29
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute.h8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp636
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.h21
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp187
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.h36
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributecontext.cpp72
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributecontext.h43
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributefactory.cpp58
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributefactory.h33
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributefile.cpp457
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributefile.h113
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp48
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.h39
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp105
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h41
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp213
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributefilewriter.h58
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributeguard.cpp41
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributeguard.h78
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributeiterators.cpp237
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributeiterators.h567
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp62
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributemanager.cpp279
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributemanager.h73
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp31
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.h25
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp60
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.h27
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp78
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h54
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributesaver.cpp40
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributesaver.h35
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.cpp1110
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.h845
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.hpp169
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attrvector.cpp188
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attrvector.h235
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attrvector.hpp185
-rw-r--r--searchlib/src/vespa/searchlib/attribute/changevector.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/attribute/changevector.h230
-rw-r--r--searchlib/src/vespa/searchlib/attribute/configconverter.cpp84
-rw-r--r--searchlib/src/vespa/searchlib/attribute/configconverter.h21
-rw-r--r--searchlib/src/vespa/searchlib/attribute/createarrayfastsearch.cpp69
-rw-r--r--searchlib/src/vespa/searchlib/attribute/createarraystd.cpp63
-rw-r--r--searchlib/src/vespa/searchlib/attribute/createsetfastsearch.cpp71
-rw-r--r--searchlib/src/vespa/searchlib/attribute/createsetstd.cpp62
-rw-r--r--searchlib/src/vespa/searchlib/attribute/createsinglefastsearch.cpp65
-rw-r--r--searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp68
-rw-r--r--searchlib/src/vespa/searchlib/attribute/defines.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/defines.h11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/diversity.h226
-rw-r--r--searchlib/src/vespa/searchlib/attribute/dociditerator.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/dociditerator.h105
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattribute.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattribute.h98
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattribute.hpp147
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp51
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattributesaver.h34
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp83
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumcomparator.h195
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp79
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h49
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.cpp361
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.h501
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.hpp502
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp657
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstorebase.h622
-rw-r--r--searchlib/src/vespa/searchlib/attribute/extendableattributes.cpp162
-rw-r--r--searchlib/src/vespa/searchlib/attribute/extendableattributes.h245
-rw-r--r--searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp90
-rw-r--r--searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h48
-rw-r--r--searchlib/src/vespa/searchlib/attribute/flagattribute.cpp283
-rw-r--r--searchlib/src/vespa/searchlib/attribute/flagattribute.h73
-rw-r--r--searchlib/src/vespa/searchlib/attribute/floatbase.cpp91
-rw-r--r--searchlib/src/vespa/searchlib/attribute/floatbase.h123
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h32
-rw-r--r--searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h38
-rw-r--r--searchlib/src/vespa/searchlib/attribute/iattributemanager.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/iattributemanager.h57
-rw-r--r--searchlib/src/vespa/searchlib/attribute/iattributesavetarget.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h161
-rw-r--r--searchlib/src/vespa/searchlib/attribute/integerbase.cpp90
-rw-r--r--searchlib/src/vespa/searchlib/attribute/integerbase.h136
-rw-r--r--searchlib/src/vespa/searchlib/attribute/interlock.h65
-rw-r--r--searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h33
-rw-r--r--searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.h62
-rw-r--r--searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/attribute/iterator_pack.h56
-rw-r--r--searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp28
-rw-r--r--searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h177
-rw-r--r--searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp124
-rw-r--r--searchlib/src/vespa/searchlib/attribute/loadednumericvalue.h69
-rw-r--r--searchlib/src/vespa/searchlib/attribute/loadedstringvalue.cpp49
-rw-r--r--searchlib/src/vespa/searchlib/attribute/loadedstringvalue.h95
-rw-r--r--searchlib/src/vespa/searchlib/attribute/loadedvalue.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/loadedvalue.h163
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.h120
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp238
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp122
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h40
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericattribute.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericattribute.h333
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp197
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.cpp130
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.h38
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h289
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp145
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h133
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp143
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringattribute.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringattribute.h173
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp146
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringpostattribute.cpp18
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h123
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp152
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multivalue.h63
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multivalueattribute.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multivalueattribute.h78
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp203
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.cpp32
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.h36
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multivalueattributesaverutils.h97
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multivaluemapping.cpp858
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multivaluemapping.h1498
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multivaluemapping.hpp50
-rw-r--r--searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.h182
-rw-r--r--searchlib/src/vespa/searchlib/attribute/numericbase.cpp74
-rw-r--r--searchlib/src/vespa/searchlib/attribute/numericbase.h147
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingchange.cpp275
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingchange.h86
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp451
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistattribute.h165
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp93
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h388
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp388
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglisttraits.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglisttraits.h56
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.cpp638
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.h361
-rw-r--r--searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp277
-rw-r--r--searchlib/src/vespa/searchlib/attribute/predicate_attribute.h104
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp43
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.h152
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp310
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp48
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.h33
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericattribute.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericattribute.h235
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp188
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp48
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.h31
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.h191
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp172
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h121
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp153
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp242
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h313
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringattribute.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringattribute.h115
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp80
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h127
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp150
-rw-r--r--searchlib/src/vespa/searchlib/attribute/sourceselector.cpp136
-rw-r--r--searchlib/src/vespa/searchlib/attribute/sourceselector.h85
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringattribute.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringattribute.h12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.cpp542
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.h201
-rw-r--r--searchlib/src/vespa/searchlib/attribute/tensorattribute.cpp270
-rw-r--r--searchlib/src/vespa/searchlib/attribute/tensorattribute.h55
-rw-r--r--searchlib/src/vespa/searchlib/attribute/tensorattributesaver.cpp51
-rw-r--r--searchlib/src/vespa/searchlib/attribute/tensorattributesaver.h37
-rw-r--r--searchlib/src/vespa/searchlib/attribute/tensorstore.cpp133
-rw-r--r--searchlib/src/vespa/searchlib/attribute/tensorstore.h93
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/.gitignore3
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/CMakeLists.txt9
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/OWNERS1
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/compression.cpp450
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/compression.h1933
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/countcompression.cpp241
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/countcompression.h110
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp2586
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/pagedict4.h836
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp1355
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/posocccompression.h616
-rw-r--r--searchlib/src/vespa/searchlib/btree/CMakeLists.txt19
-rw-r--r--searchlib/src/vespa/searchlib/btree/OWNERS2
-rw-r--r--searchlib/src/vespa/searchlib/btree/btree.h170
-rw-r--r--searchlib/src/vespa/searchlib/btree/btree.hpp30
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeaggregator.cpp25
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeaggregator.h65
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeaggregator.hpp84
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreebuilder.cpp29
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreebuilder.h100
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreebuilder.hpp459
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeinserter.cpp24
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeinserter.h62
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeinserter.hpp113
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeiterator.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeiterator.h885
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeiterator.hpp1330
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreenode.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreenode.h784
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreenode.hpp402
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreenodeallocator.cpp27
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreenodeallocator.h271
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreenodeallocator.hpp437
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreenodestore.cpp117
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreenodestore.h399
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreenodestore.hpp98
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeremover.cpp24
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeremover.h104
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeremover.hpp185
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeroot.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeroot.h253
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeroot.hpp486
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreerootbase.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreerootbase.h121
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreerootbase.hpp90
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreestore.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreestore.h511
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreestore.hpp1005
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreetraits.h25
-rw-r--r--searchlib/src/vespa/searchlib/btree/bufferstate.cpp351
-rw-r--r--searchlib/src/vespa/searchlib/btree/bufferstate.h389
-rw-r--r--searchlib/src/vespa/searchlib/btree/datastore.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/btree/datastore.h139
-rw-r--r--searchlib/src/vespa/searchlib/btree/datastore.hpp248
-rw-r--r--searchlib/src/vespa/searchlib/btree/datastorebase.cpp426
-rw-r--r--searchlib/src/vespa/searchlib/btree/datastorebase.h404
-rw-r--r--searchlib/src/vespa/searchlib/btree/entryref.h64
-rw-r--r--searchlib/src/vespa/searchlib/btree/minmaxaggrcalc.h82
-rw-r--r--searchlib/src/vespa/searchlib/btree/minmaxaggregated.h113
-rw-r--r--searchlib/src/vespa/searchlib/btree/noaggrcalc.h98
-rw-r--r--searchlib/src/vespa/searchlib/btree/noaggregated.h21
-rw-r--r--searchlib/src/vespa/searchlib/common/.gitignore6
-rw-r--r--searchlib/src/vespa/searchlib/common/CMakeLists.txt30
-rw-r--r--searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp156
-rw-r--r--searchlib/src/vespa/searchlib/common/allocatedbitvector.h92
-rw-r--r--searchlib/src/vespa/searchlib/common/base.h16
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvector.cpp421
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvector.h354
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvectorcache.cpp218
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvectorcache.h86
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp116
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvectoriterator.h42
-rw-r--r--searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp148
-rw-r--r--searchlib/src/vespa/searchlib/common/condensedbitvectors.h39
-rw-r--r--searchlib/src/vespa/searchlib/common/converters.h69
-rw-r--r--searchlib/src/vespa/searchlib/common/docstamp.h18
-rw-r--r--searchlib/src/vespa/searchlib/common/documentlocations.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/common/documentlocations.h43
-rw-r--r--searchlib/src/vespa/searchlib/common/documentsummary.cpp63
-rw-r--r--searchlib/src/vespa/searchlib/common/documentsummary.h24
-rw-r--r--searchlib/src/vespa/searchlib/common/feature.h10
-rw-r--r--searchlib/src/vespa/searchlib/common/featureset.cpp90
-rw-r--r--searchlib/src/vespa/searchlib/common/featureset.h128
-rw-r--r--searchlib/src/vespa/searchlib/common/fileheadercontext.cpp51
-rw-r--r--searchlib/src/vespa/searchlib/common/fileheadercontext.h42
-rw-r--r--searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.cpp47
-rw-r--r--searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.h35
-rw-r--r--searchlib/src/vespa/searchlib/common/fslimits.h37
-rw-r--r--searchlib/src/vespa/searchlib/common/gid.h54
-rw-r--r--searchlib/src/vespa/searchlib/common/growablebitvector.cpp55
-rw-r--r--searchlib/src/vespa/searchlib/common/growablebitvector.h27
-rw-r--r--searchlib/src/vespa/searchlib/common/hitrank.h12
-rw-r--r--searchlib/src/vespa/searchlib/common/identifiable.h167
-rw-r--r--searchlib/src/vespa/searchlib/common/idestructorcallback.h20
-rw-r--r--searchlib/src/vespa/searchlib/common/idocumentmetastore.h152
-rw-r--r--searchlib/src/vespa/searchlib/common/indexmetainfo.cpp354
-rw-r--r--searchlib/src/vespa/searchlib/common/indexmetainfo.h63
-rw-r--r--searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h66
-rw-r--r--searchlib/src/vespa/searchlib/common/lambdatask.h28
-rw-r--r--searchlib/src/vespa/searchlib/common/lid_usage_stats.h66
-rw-r--r--searchlib/src/vespa/searchlib/common/location.cpp205
-rw-r--r--searchlib/src/vespa/searchlib/common/location.h56
-rw-r--r--searchlib/src/vespa/searchlib/common/locationiterators.cpp121
-rw-r--r--searchlib/src/vespa/searchlib/common/locationiterators.h12
-rw-r--r--searchlib/src/vespa/searchlib/common/mapnames.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/common/mapnames.h33
-rw-r--r--searchlib/src/vespa/searchlib/common/packets.cpp2198
-rw-r--r--searchlib/src/vespa/searchlib/common/packets.h593
-rw-r--r--searchlib/src/vespa/searchlib/common/partialbitvector.cpp24
-rw-r--r--searchlib/src/vespa/searchlib/common/partialbitvector.h38
-rw-r--r--searchlib/src/vespa/searchlib/common/range.h30
-rw-r--r--searchlib/src/vespa/searchlib/common/rankedhit.h35
-rw-r--r--searchlib/src/vespa/searchlib/common/rcuvector.h354
-rw-r--r--searchlib/src/vespa/searchlib/common/reserved.h19
-rw-r--r--searchlib/src/vespa/searchlib/common/resultset.cpp149
-rw-r--r--searchlib/src/vespa/searchlib/common/resultset.h51
-rw-r--r--searchlib/src/vespa/searchlib/common/scheduletaskcallback.h32
-rw-r--r--searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.cpp65
-rw-r--r--searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.h36
-rw-r--r--searchlib/src/vespa/searchlib/common/sequencedtaskexecutorobserver.h44
-rw-r--r--searchlib/src/vespa/searchlib/common/serialnum.h13
-rw-r--r--searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.h31
-rw-r--r--searchlib/src/vespa/searchlib/common/sort.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/common/sort.h537
-rw-r--r--searchlib/src/vespa/searchlib/common/sortdata.cpp65
-rw-r--r--searchlib/src/vespa/searchlib/common/sortdata.h99
-rw-r--r--searchlib/src/vespa/searchlib/common/sortresults.cpp507
-rw-r--r--searchlib/src/vespa/searchlib/common/sortresults.h157
-rw-r--r--searchlib/src/vespa/searchlib/common/sortspec.cpp180
-rw-r--r--searchlib/src/vespa/searchlib/common/sortspec.h35
-rw-r--r--searchlib/src/vespa/searchlib/common/transport.h401
-rw-r--r--searchlib/src/vespa/searchlib/common/tunefileinfo.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/common/tunefileinfo.h431
-rw-r--r--searchlib/src/vespa/searchlib/config/.gitignore5
-rw-r--r--searchlib/src/vespa/searchlib/config/CMakeLists.txt7
-rw-r--r--searchlib/src/vespa/searchlib/config/translogserver.def24
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/.gitignore3
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt28
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/OWNERS1
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp108
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h81
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp238
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h204
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp233
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h122
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.cpp73
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.h43
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/checkpointfile.cpp189
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/checkpointfile.h73
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp71
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.h135
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/diskindex.cpp476
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/diskindex.h193
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp124
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h53
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/docidmapper.cpp73
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/docidmapper.h91
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/extposocc.cpp157
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/extposocc.h56
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp385
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fieldreader.h216
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp258
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fieldwriter.h138
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fileheader.cpp165
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fileheader.h91
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fusion.cpp606
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fusion.h265
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp720
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/indexbuilder.h124
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp738
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/pagedict4file.h239
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp300
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h85
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/wordnummapper.cpp110
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/wordnummapper.h137
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp137
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcposocc.h83
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcposocciterators.cpp89
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcposocciterators.h93
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp381
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h112
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcposting.cpp1470
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcposting.h495
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp700
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h200
-rw-r--r--searchlib/src/vespa/searchlib/docstore/.gitignore6
-rw-r--r--searchlib/src/vespa/searchlib/docstore/CMakeLists.txt18
-rw-r--r--searchlib/src/vespa/searchlib/docstore/OWNERS2
-rw-r--r--searchlib/src/vespa/searchlib/docstore/bytecomplens.cpp260
-rw-r--r--searchlib/src/vespa/searchlib/docstore/bytecomplens.h110
-rw-r--r--searchlib/src/vespa/searchlib/docstore/cachestats.h41
-rw-r--r--searchlib/src/vespa/searchlib/docstore/chunk.cpp139
-rw-r--r--searchlib/src/vespa/searchlib/docstore/chunk.h108
-rw-r--r--searchlib/src/vespa/searchlib/docstore/chunkformat.cpp158
-rw-r--r--searchlib/src/vespa/searchlib/docstore/chunkformat.h106
-rw-r--r--searchlib/src/vespa/searchlib/docstore/chunkformats.cpp73
-rw-r--r--searchlib/src/vespa/searchlib/docstore/chunkformats.h48
-rw-r--r--searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.h28
-rw-r--r--searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_stats.h30
-rw-r--r--searchlib/src/vespa/searchlib/docstore/data_store_storage_stats.h36
-rw-r--r--searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.cpp31
-rw-r--r--searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.h23
-rw-r--r--searchlib/src/vespa/searchlib/docstore/documentstore.cpp392
-rw-r--r--searchlib/src/vespa/searchlib/docstore/documentstore.h244
-rw-r--r--searchlib/src/vespa/searchlib/docstore/filechunk.cpp676
-rw-r--r--searchlib/src/vespa/searchlib/docstore/filechunk.h338
-rw-r--r--searchlib/src/vespa/searchlib/docstore/ibucketizer.h25
-rw-r--r--searchlib/src/vespa/searchlib/docstore/idatastore.cpp19
-rw-r--r--searchlib/src/vespa/searchlib/docstore/idatastore.h187
-rw-r--r--searchlib/src/vespa/searchlib/docstore/idocumentstore.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/docstore/idocumentstore.h203
-rw-r--r--searchlib/src/vespa/searchlib/docstore/liddatastore.h58
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdatastore.cpp1240
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdatastore.h304
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdocumentstore.cpp31
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdocumentstore.h67
-rw-r--r--searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp868
-rw-r--r--searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h185
-rw-r--r--searchlib/src/vespa/searchlib/engine/.gitignore3
-rw-r--r--searchlib/src/vespa/searchlib/engine/CMakeLists.txt19
-rw-r--r--searchlib/src/vespa/searchlib/engine/OWNERS1
-rwxr-xr-xsearchlib/src/vespa/searchlib/engine/create-class-cpp.sh29
-rw-r--r--searchlib/src/vespa/searchlib/engine/create-class-h.sh27
-rw-r--r--searchlib/src/vespa/searchlib/engine/create-interface.sh23
-rw-r--r--searchlib/src/vespa/searchlib/engine/docsumapi.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/engine/docsumapi.h74
-rw-r--r--searchlib/src/vespa/searchlib/engine/docsumreply.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/engine/docsumreply.h47
-rw-r--r--searchlib/src/vespa/searchlib/engine/docsumrequest.cpp38
-rw-r--r--searchlib/src/vespa/searchlib/engine/docsumrequest.h99
-rw-r--r--searchlib/src/vespa/searchlib/engine/errorcodes.cpp38
-rw-r--r--searchlib/src/vespa/searchlib/engine/errorcodes.h38
-rw-r--r--searchlib/src/vespa/searchlib/engine/monitorapi.h66
-rw-r--r--searchlib/src/vespa/searchlib/engine/monitorreply.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/engine/monitorreply.h31
-rw-r--r--searchlib/src/vespa/searchlib/engine/monitorrequest.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/engine/monitorrequest.h21
-rw-r--r--searchlib/src/vespa/searchlib/engine/packetconverter.cpp261
-rw-r--r--searchlib/src/vespa/searchlib/engine/packetconverter.h177
-rw-r--r--searchlib/src/vespa/searchlib/engine/propertiesmap.cpp30
-rw-r--r--searchlib/src/vespa/searchlib/engine/propertiesmap.h129
-rw-r--r--searchlib/src/vespa/searchlib/engine/request.cpp41
-rw-r--r--searchlib/src/vespa/searchlib/engine/request.h43
-rw-r--r--searchlib/src/vespa/searchlib/engine/searchapi.h66
-rw-r--r--searchlib/src/vespa/searchlib/engine/searchreply.cpp56
-rw-r--r--searchlib/src/vespa/searchlib/engine/searchreply.h84
-rw-r--r--searchlib/src/vespa/searchlib/engine/searchrequest.cpp33
-rw-r--r--searchlib/src/vespa/searchlib/engine/searchrequest.h82
-rw-r--r--searchlib/src/vespa/searchlib/engine/source_description.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/engine/source_description.h17
-rw-r--r--searchlib/src/vespa/searchlib/engine/tracereply.h16
-rw-r--r--searchlib/src/vespa/searchlib/engine/transport_metrics.cpp35
-rw-r--r--searchlib/src/vespa/searchlib/engine/transport_metrics.h37
-rw-r--r--searchlib/src/vespa/searchlib/engine/transportserver.cpp427
-rw-r--r--searchlib/src/vespa/searchlib/engine/transportserver.h334
-rw-r--r--searchlib/src/vespa/searchlib/expression/.gitignore6
-rw-r--r--searchlib/src/vespa/searchlib/expression/CMakeLists.txt32
-rw-r--r--searchlib/src/vespa/searchlib/expression/OWNERS1
-rw-r--r--searchlib/src/vespa/searchlib/expression/addfunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/aggregationrefnode.cpp69
-rw-r--r--searchlib/src/vespa/searchlib/expression/aggregationrefnode.h50
-rw-r--r--searchlib/src/vespa/searchlib/expression/andfunctionnode.h24
-rw-r--r--searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.cpp163
-rw-r--r--searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.h46
-rw-r--r--searchlib/src/vespa/searchlib/expression/arrayoperationnode.cpp66
-rw-r--r--searchlib/src/vespa/searchlib/expression/arrayoperationnode.h46
-rw-r--r--searchlib/src/vespa/searchlib/expression/attributenode.cpp283
-rw-r--r--searchlib/src/vespa/searchlib/expression/attributenode.h158
-rw-r--r--searchlib/src/vespa/searchlib/expression/binaryfunctionnode.h24
-rw-r--r--searchlib/src/vespa/searchlib/expression/bitfunctionnode.h24
-rw-r--r--searchlib/src/vespa/searchlib/expression/bucketresultnode.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/expression/bucketresultnode.h27
-rw-r--r--searchlib/src/vespa/searchlib/expression/catfunctionnode.h23
-rw-r--r--searchlib/src/vespa/searchlib/expression/catserializer.cpp79
-rw-r--r--searchlib/src/vespa/searchlib/expression/catserializer.h38
-rw-r--r--searchlib/src/vespa/searchlib/expression/constantnode.h27
-rw-r--r--searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.cpp78
-rw-r--r--searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.h30
-rw-r--r--searchlib/src/vespa/searchlib/expression/dividefunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/documentaccessornode.h37
-rw-r--r--searchlib/src/vespa/searchlib/expression/documentfieldnode.cpp340
-rw-r--r--searchlib/src/vespa/searchlib/expression/documentfieldnode.h87
-rw-r--r--searchlib/src/vespa/searchlib/expression/enumresultnode.h30
-rw-r--r--searchlib/src/vespa/searchlib/expression/expressionnode.h58
-rw-r--r--searchlib/src/vespa/searchlib/expression/expressiontree.cpp202
-rw-r--r--searchlib/src/vespa/searchlib/expression/expressiontree.h75
-rw-r--r--searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.cpp134
-rw-r--r--searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.h71
-rw-r--r--searchlib/src/vespa/searchlib/expression/floatbucketresultnode.cpp85
-rw-r--r--searchlib/src/vespa/searchlib/expression/floatbucketresultnode.h53
-rw-r--r--searchlib/src/vespa/searchlib/expression/floatresultnode.h56
-rw-r--r--searchlib/src/vespa/searchlib/expression/forcelink.hpp49
-rw-r--r--searchlib/src/vespa/searchlib/expression/functionnode.h30
-rw-r--r--searchlib/src/vespa/searchlib/expression/functionnodes.cpp624
-rw-r--r--searchlib/src/vespa/searchlib/expression/getdocidnamespacespecificfunctionnode.h28
-rw-r--r--searchlib/src/vespa/searchlib/expression/getymumchecksumfunctionnode.h26
-rw-r--r--searchlib/src/vespa/searchlib/expression/integerbucketresultnode.cpp73
-rw-r--r--searchlib/src/vespa/searchlib/expression/integerbucketresultnode.h52
-rw-r--r--searchlib/src/vespa/searchlib/expression/integerresultnode.h138
-rw-r--r--searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.cpp127
-rw-r--r--searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.h40
-rw-r--r--searchlib/src/vespa/searchlib/expression/mathfunctionnode.cpp70
-rw-r--r--searchlib/src/vespa/searchlib/expression/mathfunctionnode.h26
-rw-r--r--searchlib/src/vespa/searchlib/expression/maxfunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/md5bitfunctionnode.h21
-rw-r--r--searchlib/src/vespa/searchlib/expression/minfunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/modulofunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/multiargfunctionnode.h41
-rw-r--r--searchlib/src/vespa/searchlib/expression/multiplyfunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/negatefunctionnode.h21
-rw-r--r--searchlib/src/vespa/searchlib/expression/normalizesubjectfunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/nullresultnode.h36
-rw-r--r--searchlib/src/vespa/searchlib/expression/numelemfunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/numericfunctionnode.cpp156
-rw-r--r--searchlib/src/vespa/searchlib/expression/numericfunctionnode.h178
-rw-r--r--searchlib/src/vespa/searchlib/expression/numericresultnode.h23
-rw-r--r--searchlib/src/vespa/searchlib/expression/orfunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/perdocexpression.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/expression/positiveinfinityresultnode.h30
-rw-r--r--searchlib/src/vespa/searchlib/expression/rangebucketpredef.cpp133
-rw-r--r--searchlib/src/vespa/searchlib/expression/rangebucketpredef.h75
-rw-r--r--searchlib/src/vespa/searchlib/expression/rawbucketresultnode.cpp93
-rw-r--r--searchlib/src/vespa/searchlib/expression/rawbucketresultnode.h37
-rw-r--r--searchlib/src/vespa/searchlib/expression/rawresultnode.h53
-rw-r--r--searchlib/src/vespa/searchlib/expression/relevancenode.h27
-rw-r--r--searchlib/src/vespa/searchlib/expression/resultnode.cpp73
-rw-r--r--searchlib/src/vespa/searchlib/expression/resultnode.h128
-rw-r--r--searchlib/src/vespa/searchlib/expression/resultnodes.cpp410
-rw-r--r--searchlib/src/vespa/searchlib/expression/resultvector.cpp61
-rw-r--r--searchlib/src/vespa/searchlib/expression/resultvector.h399
-rw-r--r--searchlib/src/vespa/searchlib/expression/reversefunctionnode.h21
-rw-r--r--searchlib/src/vespa/searchlib/expression/serializer.h34
-rw-r--r--searchlib/src/vespa/searchlib/expression/singleresultnode.h30
-rw-r--r--searchlib/src/vespa/searchlib/expression/sortfunctionnode.h21
-rw-r--r--searchlib/src/vespa/searchlib/expression/strcatfunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/strcatserializer.cpp54
-rw-r--r--searchlib/src/vespa/searchlib/expression/strcatserializer.h25
-rw-r--r--searchlib/src/vespa/searchlib/expression/stringbucketresultnode.cpp93
-rw-r--r--searchlib/src/vespa/searchlib/expression/stringbucketresultnode.h44
-rw-r--r--searchlib/src/vespa/searchlib/expression/stringresultnode.h52
-rw-r--r--searchlib/src/vespa/searchlib/expression/strlenfunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/timestamp.cpp108
-rw-r--r--searchlib/src/vespa/searchlib/expression/timestamp.h78
-rw-r--r--searchlib/src/vespa/searchlib/expression/tofloatfunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/tointfunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/torawfunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/tostringfunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/ucafunctionnode.cpp115
-rw-r--r--searchlib/src/vespa/searchlib/expression/ucafunctionnode.h60
-rw-r--r--searchlib/src/vespa/searchlib/expression/unarybitfunctionnode.h31
-rw-r--r--searchlib/src/vespa/searchlib/expression/unaryfunctionnode.h27
-rw-r--r--searchlib/src/vespa/searchlib/expression/xorbitfunctionnode.h23
-rw-r--r--searchlib/src/vespa/searchlib/expression/xorfunctionnode.h22
-rw-r--r--searchlib/src/vespa/searchlib/expression/zcurve.cpp91
-rw-r--r--searchlib/src/vespa/searchlib/expression/zcurve.h59
-rw-r--r--searchlib/src/vespa/searchlib/features/.gitignore3
-rw-r--r--searchlib/src/vespa/searchlib/features/CMakeLists.txt64
-rw-r--r--searchlib/src/vespa/searchlib/features/OWNERS1
-rw-r--r--searchlib/src/vespa/searchlib/features/agefeature.cpp79
-rw-r--r--searchlib/src/vespa/searchlib/features/agefeature.h64
-rw-r--r--searchlib/src/vespa/searchlib/features/array_parser.cpp19
-rw-r--r--searchlib/src/vespa/searchlib/features/array_parser.h49
-rw-r--r--searchlib/src/vespa/searchlib/features/array_parser.hpp96
-rw-r--r--searchlib/src/vespa/searchlib/features/attributefeature.cpp433
-rw-r--r--searchlib/src/vespa/searchlib/features/attributefeature.h57
-rw-r--r--searchlib/src/vespa/searchlib/features/attributematchfeature.cpp350
-rw-r--r--searchlib/src/vespa/searchlib/features/attributematchfeature.h124
-rw-r--r--searchlib/src/vespa/searchlib/features/closenessfeature.cpp110
-rw-r--r--searchlib/src/vespa/searchlib/features/closenessfeature.h67
-rw-r--r--searchlib/src/vespa/searchlib/features/constant_tensor_executor.h44
-rwxr-xr-xsearchlib/src/vespa/searchlib/features/create-class-cpp.sh29
-rw-r--r--searchlib/src/vespa/searchlib/features/create-class-h.sh27
-rw-r--r--searchlib/src/vespa/searchlib/features/debug_attribute_wait.cpp96
-rw-r--r--searchlib/src/vespa/searchlib/features/debug_attribute_wait.h71
-rw-r--r--searchlib/src/vespa/searchlib/features/debug_wait.cpp82
-rw-r--r--searchlib/src/vespa/searchlib/features/debug_wait.h66
-rw-r--r--searchlib/src/vespa/searchlib/features/distancefeature.cpp148
-rw-r--r--searchlib/src/vespa/searchlib/features/distancefeature.h75
-rw-r--r--searchlib/src/vespa/searchlib/features/distancetopathfeature.cpp177
-rw-r--r--searchlib/src/vespa/searchlib/features/distancetopathfeature.h82
-rw-r--r--searchlib/src/vespa/searchlib/features/dotproductfeature.cpp457
-rw-r--r--searchlib/src/vespa/searchlib/features/dotproductfeature.h217
-rw-r--r--searchlib/src/vespa/searchlib/features/element_completeness_feature.cpp143
-rw-r--r--searchlib/src/vespa/searchlib/features/element_completeness_feature.h131
-rw-r--r--searchlib/src/vespa/searchlib/features/element_similarity_feature.cpp417
-rw-r--r--searchlib/src/vespa/searchlib/features/element_similarity_feature.h44
-rw-r--r--searchlib/src/vespa/searchlib/features/euclidean_distance_feature.cpp123
-rw-r--r--searchlib/src/vespa/searchlib/features/euclidean_distance_feature.h76
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldinfofeature.cpp235
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldinfofeature.h70
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldlengthfeature.cpp99
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldlengthfeature.h65
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatch/.gitignore3
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatch/CMakeLists.txt10
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp558
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatch/computer.h382
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatch/metrics.cpp344
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatch/metrics.h563
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatch/params.cpp45
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatch/params.h261
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.cpp103
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.h186
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.cpp39
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.h186
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatchfeature.cpp311
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatchfeature.h70
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.cpp129
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.h67
-rw-r--r--searchlib/src/vespa/searchlib/features/firstphasefeature.cpp62
-rw-r--r--searchlib/src/vespa/searchlib/features/firstphasefeature.h53
-rw-r--r--searchlib/src/vespa/searchlib/features/flow_completeness_feature.cpp309
-rw-r--r--searchlib/src/vespa/searchlib/features/flow_completeness_feature.h111
-rw-r--r--searchlib/src/vespa/searchlib/features/foreachfeature.cpp186
-rw-r--r--searchlib/src/vespa/searchlib/features/foreachfeature.h185
-rw-r--r--searchlib/src/vespa/searchlib/features/freshnessfeature.cpp101
-rw-r--r--searchlib/src/vespa/searchlib/features/freshnessfeature.h67
-rw-r--r--searchlib/src/vespa/searchlib/features/item_raw_score_feature.cpp82
-rw-r--r--searchlib/src/vespa/searchlib/features/item_raw_score_feature.h63
-rw-r--r--searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.cpp184
-rw-r--r--searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.h84
-rw-r--r--searchlib/src/vespa/searchlib/features/logarithmcalculator.h61
-rw-r--r--searchlib/src/vespa/searchlib/features/matchesfeature.cpp90
-rw-r--r--searchlib/src/vespa/searchlib/features/matchesfeature.h71
-rw-r--r--searchlib/src/vespa/searchlib/features/matchfeature.cpp107
-rw-r--r--searchlib/src/vespa/searchlib/features/matchfeature.h68
-rw-r--r--searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp57
-rw-r--r--searchlib/src/vespa/searchlib/features/native_dot_product_feature.h47
-rw-r--r--searchlib/src/vespa/searchlib/features/nativeattributematchfeature.cpp150
-rw-r--r--searchlib/src/vespa/searchlib/features/nativeattributematchfeature.h119
-rw-r--r--searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.cpp179
-rw-r--r--searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.h133
-rw-r--r--searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp218
-rw-r--r--searchlib/src/vespa/searchlib/features/nativeproximityfeature.h119
-rw-r--r--searchlib/src/vespa/searchlib/features/nativerankfeature.cpp173
-rw-r--r--searchlib/src/vespa/searchlib/features/nativerankfeature.h133
-rw-r--r--searchlib/src/vespa/searchlib/features/nowfeature.cpp63
-rw-r--r--searchlib/src/vespa/searchlib/features/nowfeature.h60
-rw-r--r--searchlib/src/vespa/searchlib/features/proximityfeature.cpp149
-rw-r--r--searchlib/src/vespa/searchlib/features/proximityfeature.h82
-rw-r--r--searchlib/src/vespa/searchlib/features/querycompletenessfeature.cpp112
-rw-r--r--searchlib/src/vespa/searchlib/features/querycompletenessfeature.h80
-rw-r--r--searchlib/src/vespa/searchlib/features/queryfeature.cpp161
-rw-r--r--searchlib/src/vespa/searchlib/features/queryfeature.h52
-rw-r--r--searchlib/src/vespa/searchlib/features/queryterm.cpp52
-rw-r--r--searchlib/src/vespa/searchlib/features/queryterm.h64
-rw-r--r--searchlib/src/vespa/searchlib/features/querytermcountfeature.cpp58
-rw-r--r--searchlib/src/vespa/searchlib/features/querytermcountfeature.h48
-rw-r--r--searchlib/src/vespa/searchlib/features/randomfeature.cpp87
-rw-r--r--searchlib/src/vespa/searchlib/features/randomfeature.h71
-rw-r--r--searchlib/src/vespa/searchlib/features/rankingexpression/.gitignore6
-rw-r--r--searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt6
-rw-r--r--searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.cpp87
-rw-r--r--searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h23
-rw-r--r--searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp170
-rw-r--r--searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h88
-rw-r--r--searchlib/src/vespa/searchlib/features/raw_score_feature.cpp57
-rw-r--r--searchlib/src/vespa/searchlib/features/raw_score_feature.h44
-rw-r--r--searchlib/src/vespa/searchlib/features/reverseproximityfeature.cpp136
-rw-r--r--searchlib/src/vespa/searchlib/features/reverseproximityfeature.h78
-rw-r--r--searchlib/src/vespa/searchlib/features/setup.cpp115
-rw-r--r--searchlib/src/vespa/searchlib/features/setup.h19
-rw-r--r--searchlib/src/vespa/searchlib/features/subqueries_feature.cpp58
-rw-r--r--searchlib/src/vespa/searchlib/features/subqueries_feature.h43
-rw-r--r--searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp46
-rw-r--r--searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h33
-rw-r--r--searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h56
-rw-r--r--searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp122
-rw-r--r--searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.h36
-rw-r--r--searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.cpp35
-rw-r--r--searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.h30
-rw-r--r--searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp137
-rw-r--r--searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.h33
-rw-r--r--searchlib/src/vespa/searchlib/features/term_field_md_feature.cpp115
-rw-r--r--searchlib/src/vespa/searchlib/features/term_field_md_feature.h60
-rw-r--r--searchlib/src/vespa/searchlib/features/termdistancecalculator.cpp81
-rw-r--r--searchlib/src/vespa/searchlib/features/termdistancecalculator.h81
-rw-r--r--searchlib/src/vespa/searchlib/features/termdistancefeature.cpp100
-rw-r--r--searchlib/src/vespa/searchlib/features/termdistancefeature.h74
-rw-r--r--searchlib/src/vespa/searchlib/features/termeditdistancefeature.cpp234
-rw-r--r--searchlib/src/vespa/searchlib/features/termeditdistancefeature.h153
-rw-r--r--searchlib/src/vespa/searchlib/features/termfeature.cpp91
-rw-r--r--searchlib/src/vespa/searchlib/features/termfeature.h68
-rw-r--r--searchlib/src/vespa/searchlib/features/terminfofeature.cpp55
-rw-r--r--searchlib/src/vespa/searchlib/features/terminfofeature.h33
-rw-r--r--searchlib/src/vespa/searchlib/features/text_similarity_feature.cpp220
-rw-r--r--searchlib/src/vespa/searchlib/features/text_similarity_feature.h75
-rw-r--r--searchlib/src/vespa/searchlib/features/utils.cpp155
-rw-r--r--searchlib/src/vespa/searchlib/features/utils.h234
-rw-r--r--searchlib/src/vespa/searchlib/features/valuefeature.cpp65
-rw-r--r--searchlib/src/vespa/searchlib/features/valuefeature.h59
-rw-r--r--searchlib/src/vespa/searchlib/features/weighted_set_parser.cpp19
-rw-r--r--searchlib/src/vespa/searchlib/features/weighted_set_parser.h28
-rw-r--r--searchlib/src/vespa/searchlib/features/weighted_set_parser.hpp48
-rw-r--r--searchlib/src/vespa/searchlib/fef/.gitignore4
-rw-r--r--searchlib/src/vespa/searchlib/fef/CMakeLists.txt44
-rw-r--r--searchlib/src/vespa/searchlib/fef/Doxyfile1162
-rw-r--r--searchlib/src/vespa/searchlib/fef/OWNERS1
-rw-r--r--searchlib/src/vespa/searchlib/fef/blueprint.cpp76
-rw-r--r--searchlib/src/vespa/searchlib/fef/blueprint.h252
-rw-r--r--searchlib/src/vespa/searchlib/fef/blueprintfactory.cpp49
-rw-r--r--searchlib/src/vespa/searchlib/fef/blueprintfactory.h62
-rw-r--r--searchlib/src/vespa/searchlib/fef/blueprintresolver.cpp227
-rw-r--r--searchlib/src/vespa/searchlib/fef/blueprintresolver.h150
-rw-r--r--searchlib/src/vespa/searchlib/fef/collection_type.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/fef/collection_type.h51
-rwxr-xr-xsearchlib/src/vespa/searchlib/fef/create-class-cpp.sh29
-rw-r--r--searchlib/src/vespa/searchlib/fef/create-class-h.sh27
-rw-r--r--searchlib/src/vespa/searchlib/fef/create-fef-includes.sh26
-rw-r--r--searchlib/src/vespa/searchlib/fef/create-interface.sh23
-rwxr-xr-xsearchlib/src/vespa/searchlib/fef/dist_doc_hp.sh3
-rw-r--r--searchlib/src/vespa/searchlib/fef/feature_type.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/fef/feature_type.h40
-rw-r--r--searchlib/src/vespa/searchlib/fef/featureexecutor.cpp22
-rw-r--r--searchlib/src/vespa/searchlib/fef/featureexecutor.h185
-rw-r--r--searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp159
-rw-r--r--searchlib/src/vespa/searchlib/fef/featurenamebuilder.h75
-rw-r--r--searchlib/src/vespa/searchlib/fef/featurenameparser.cpp499
-rw-r--r--searchlib/src/vespa/searchlib/fef/featurenameparser.h100
-rw-r--r--searchlib/src/vespa/searchlib/fef/featureoverrider.cpp54
-rw-r--r--searchlib/src/vespa/searchlib/fef/featureoverrider.h46
-rw-r--r--searchlib/src/vespa/searchlib/fef/fef.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/fef/fef.h62
-rw-r--r--searchlib/src/vespa/searchlib/fef/fieldinfo.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/fef/fieldinfo.h112
-rw-r--r--searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.h164
-rw-r--r--searchlib/src/vespa/searchlib/fef/fieldtype.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/fef/fieldtype.h51
-rw-r--r--searchlib/src/vespa/searchlib/fef/filetablefactory.cpp41
-rw-r--r--searchlib/src/vespa/searchlib/fef/filetablefactory.h34
-rw-r--r--searchlib/src/vespa/searchlib/fef/functiontablefactory.cpp134
-rw-r--r--searchlib/src/vespa/searchlib/fef/functiontablefactory.h59
-rw-r--r--searchlib/src/vespa/searchlib/fef/handle.h17
-rw-r--r--searchlib/src/vespa/searchlib/fef/iblueprintregistry.h28
-rw-r--r--searchlib/src/vespa/searchlib/fef/idumpfeaturevisitor.h33
-rw-r--r--searchlib/src/vespa/searchlib/fef/iindexenvironment.h125
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.cpp373
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.h307
-rw-r--r--searchlib/src/vespa/searchlib/fef/iqueryenvironment.h94
-rw-r--r--searchlib/src/vespa/searchlib/fef/itablefactory.h36
-rw-r--r--searchlib/src/vespa/searchlib/fef/itablemanager.h29
-rw-r--r--searchlib/src/vespa/searchlib/fef/itermdata.h89
-rw-r--r--searchlib/src/vespa/searchlib/fef/itermfielddata.h48
-rw-r--r--searchlib/src/vespa/searchlib/fef/location.cpp19
-rw-r--r--searchlib/src/vespa/searchlib/fef/location.h111
-rw-r--r--searchlib/src/vespa/searchlib/fef/matchdata.cpp30
-rw-r--r--searchlib/src/vespa/searchlib/fef/matchdata.h181
-rw-r--r--searchlib/src/vespa/searchlib/fef/matchdatalayout.cpp35
-rw-r--r--searchlib/src/vespa/searchlib/fef/matchdatalayout.h64
-rw-r--r--searchlib/src/vespa/searchlib/fef/objectstore.cpp39
-rw-r--r--searchlib/src/vespa/searchlib/fef/objectstore.h37
-rw-r--r--searchlib/src/vespa/searchlib/fef/parameter.cpp19
-rw-r--r--searchlib/src/vespa/searchlib/fef/parameter.h41
-rw-r--r--searchlib/src/vespa/searchlib/fef/parameterdescriptions.cpp34
-rw-r--r--searchlib/src/vespa/searchlib/fef/parameterdescriptions.h197
-rw-r--r--searchlib/src/vespa/searchlib/fef/parametervalidator.cpp158
-rw-r--r--searchlib/src/vespa/searchlib/fef/parametervalidator.h83
-rw-r--r--searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp110
-rw-r--r--searchlib/src/vespa/searchlib/fef/phrasesplitter.h146
-rw-r--r--searchlib/src/vespa/searchlib/fef/properties.cpp269
-rw-r--r--searchlib/src/vespa/searchlib/fef/properties.h324
-rw-r--r--searchlib/src/vespa/searchlib/fef/queryproperties.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/fef/queryproperties.h42
-rw-r--r--searchlib/src/vespa/searchlib/fef/rank_program.cpp240
-rw-r--r--searchlib/src/vespa/searchlib/fef/rank_program.h135
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.cpp186
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.h393
-rw-r--r--searchlib/src/vespa/searchlib/fef/simpletermdata.cpp33
-rw-r--r--searchlib/src/vespa/searchlib/fef/simpletermdata.h195
-rw-r--r--searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/fef/simpletermfielddata.h84
-rw-r--r--searchlib/src/vespa/searchlib/fef/sumexecutor.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/fef/sumexecutor.h32
-rw-r--r--searchlib/src/vespa/searchlib/fef/symmetrictable.cpp52
-rw-r--r--searchlib/src/vespa/searchlib/fef/symmetrictable.h58
-rw-r--r--searchlib/src/vespa/searchlib/fef/table.cpp22
-rw-r--r--searchlib/src/vespa/searchlib/fef/table.h65
-rw-r--r--searchlib/src/vespa/searchlib/fef/tablemanager.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/fef/tablemanager.h50
-rw-r--r--searchlib/src/vespa/searchlib/fef/termfieldmatchdata.cpp121
-rw-r--r--searchlib/src/vespa/searchlib/fef/termfieldmatchdata.h267
-rw-r--r--searchlib/src/vespa/searchlib/fef/termfieldmatchdataarray.h69
-rw-r--r--searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.h113
-rw-r--r--searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp77
-rw-r--r--searchlib/src/vespa/searchlib/fef/termmatchdatamerger.h46
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/.gitignore3
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/CMakeLists.txt14
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.cpp60
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.h39
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/featuretest.cpp159
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/featuretest.h137
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/ftlib.cpp399
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/ftlib.h238
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp42
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/indexenvironment.h83
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.cpp28
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.h50
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp184
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h150
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/.gitignore3
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/CMakeLists.txt12
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.cpp58
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.h34
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/chain.cpp69
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/chain.h38
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/double.cpp59
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/double.h42
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/query.cpp46
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/query.h30
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/setup.cpp35
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/setup.h16
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.cpp59
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.h41
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/sum.cpp74
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/plugin/sum.h38
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/queryenvironment.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/queryenvironment.h94
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.cpp66
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.h71
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/rankresult.cpp113
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/rankresult.h113
-rw-r--r--searchlib/src/vespa/searchlib/fef/utils.cpp75
-rw-r--r--searchlib/src/vespa/searchlib/fef/utils.h37
-rw-r--r--searchlib/src/vespa/searchlib/fef/verify_feature.cpp29
-rw-r--r--searchlib/src/vespa/searchlib/fef/verify_feature.h30
-rw-r--r--searchlib/src/vespa/searchlib/grouping/CMakeLists.txt9
-rw-r--r--searchlib/src/vespa/searchlib/grouping/OWNERS1
-rw-r--r--searchlib/src/vespa/searchlib/grouping/collect.cpp113
-rw-r--r--searchlib/src/vespa/searchlib/grouping/collect.h105
-rw-r--r--searchlib/src/vespa/searchlib/grouping/forcelink.hpp13
-rw-r--r--searchlib/src/vespa/searchlib/grouping/groupandcollectengine.cpp50
-rw-r--r--searchlib/src/vespa/searchlib/grouping/groupandcollectengine.h21
-rw-r--r--searchlib/src/vespa/searchlib/grouping/groupengine.cpp227
-rw-r--r--searchlib/src/vespa/searchlib/grouping/groupengine.h139
-rw-r--r--searchlib/src/vespa/searchlib/grouping/groupingengine.cpp110
-rw-r--r--searchlib/src/vespa/searchlib/grouping/groupingengine.h33
-rw-r--r--searchlib/src/vespa/searchlib/grouping/groupref.h22
-rw-r--r--searchlib/src/vespa/searchlib/grouping/hyperloglog.h140
-rw-r--r--searchlib/src/vespa/searchlib/grouping/sketch.h260
-rw-r--r--searchlib/src/vespa/searchlib/index/.gitignore6
-rw-r--r--searchlib/src/vespa/searchlib/index/CMakeLists.txt18
-rw-r--r--searchlib/src/vespa/searchlib/index/OWNERS1
-rw-r--r--searchlib/src/vespa/searchlib/index/bitvectorkeys.h43
-rw-r--r--searchlib/src/vespa/searchlib/index/dictionaryfile.cpp45
-rw-r--r--searchlib/src/vespa/searchlib/index/dictionaryfile.h138
-rw-r--r--searchlib/src/vespa/searchlib/index/docbuilder.cpp930
-rw-r--r--searchlib/src/vespa/searchlib/index/docbuilder.h432
-rw-r--r--searchlib/src/vespa/searchlib/index/docidandfeatures.cpp105
-rw-r--r--searchlib/src/vespa/searchlib/index/docidandfeatures.h338
-rw-r--r--searchlib/src/vespa/searchlib/index/doctypebuilder.cpp356
-rw-r--r--searchlib/src/vespa/searchlib/index/doctypebuilder.h95
-rw-r--r--searchlib/src/vespa/searchlib/index/dummyfileheadercontext.cpp70
-rw-r--r--searchlib/src/vespa/searchlib/index/dummyfileheadercontext.h47
-rw-r--r--searchlib/src/vespa/searchlib/index/indexbuilder.cpp28
-rw-r--r--searchlib/src/vespa/searchlib/index/indexbuilder.h62
-rw-r--r--searchlib/src/vespa/searchlib/index/olddictionaryfile.cpp115
-rw-r--r--searchlib/src/vespa/searchlib/index/olddictionaryfile.h208
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp60
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglistcountfile.h140
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglistcounts.cpp90
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglistcounts.h144
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglistfile.cpp170
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglistfile.h344
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglisthandle.cpp27
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglisthandle.h90
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglistparams.cpp138
-rw-r--r--searchlib/src/vespa/searchlib/index/postinglistparams.h48
-rw-r--r--searchlib/src/vespa/searchlib/index/schemautil.cpp217
-rw-r--r--searchlib/src/vespa/searchlib/index/schemautil.h234
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/.gitignore3
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt17
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/OWNERS2
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.cpp176
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.h102
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/dictionary.cpp68
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/dictionary.h64
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_remover.cpp67
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_remover.h63
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp206
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/documentinverter.h128
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp167
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/featurestore.h274
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp577
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h449
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/i_document_insert_listener.h23
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/i_document_remove_listener.h28
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/iordereddocumentinserter.h52
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.cpp342
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.h283
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/memoryindex.cpp308
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/memoryindex.h184
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.cpp158
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.h80
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/postingiterator.cpp74
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/postingiterator.h43
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.cpp384
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.h79
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/wordstore.cpp59
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/wordstore.h41
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/.gitignore6
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt8
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/OWNERS1
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/parse.cpp239
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/parse.h232
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/simplequerystack.cpp354
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/simplequerystack.h108
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp297
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h165
-rw-r--r--searchlib/src/vespa/searchlib/predicate/CMakeLists.txt14
-rw-r--r--searchlib/src/vespa/searchlib/predicate/OWNERS1
-rw-r--r--searchlib/src/vespa/searchlib/predicate/document_features_store.cpp293
-rw-r--r--searchlib/src/vespa/searchlib/predicate/document_features_store.h89
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_bounds_posting_list.h96
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_hash.h125
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_index.cpp288
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_index.h131
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_interval.cpp24
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_interval.h64
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_interval_posting_list.h68
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp124
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h119
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_posting_list.h52
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_range_expander.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_range_expander.h122
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h99
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_ref_cache.h160
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.cpp168
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.h43
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.cpp256
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.h51
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.cpp27
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.h28
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_zstar_compressed_posting_list.h89
-rw-r--r--searchlib/src/vespa/searchlib/predicate/simple_index.cpp25
-rw-r--r--searchlib/src/vespa/searchlib/predicate/simple_index.h261
-rw-r--r--searchlib/src/vespa/searchlib/predicate/simple_index.hpp315
-rw-r--r--searchlib/src/vespa/searchlib/predicate/tree_crumbs.h44
-rw-r--r--searchlib/src/vespa/searchlib/query/.gitignore6
-rw-r--r--searchlib/src/vespa/searchlib/query/CMakeLists.txt12
-rw-r--r--searchlib/src/vespa/searchlib/query/OWNERS1
-rw-r--r--searchlib/src/vespa/searchlib/query/base.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/query/base.h141
-rw-r--r--searchlib/src/vespa/searchlib/query/posocc.h30
-rw-r--r--searchlib/src/vespa/searchlib/query/query.cpp348
-rw-r--r--searchlib/src/vespa/searchlib/query/query.h212
-rw-r--r--searchlib/src/vespa/searchlib/query/querynode.cpp199
-rw-r--r--searchlib/src/vespa/searchlib/query/querynode.h66
-rw-r--r--searchlib/src/vespa/searchlib/query/querynoderesultbase.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/query/querynoderesultbase.h35
-rw-r--r--searchlib/src/vespa/searchlib/query/queryterm.cpp469
-rw-r--r--searchlib/src/vespa/searchlib/query/queryterm.h190
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/.gitignore3
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/CMakeLists.txt13
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/OWNERS1
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/customtypetermvisitor.h37
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h105
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/intermediate.cpp22
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/intermediate.h29
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp37
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h143
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/location.cpp61
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/location.h37
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/node.h26
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/point.h23
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h76
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/querybuilder.cpp103
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/querybuilder.h358
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/querynodemixin.h28
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/queryreplicator.h171
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/querytreecreator.h32
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/queryvisitor.h58
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/range.cpp24
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/range.h30
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/rectangle.h26
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/simplequery.h132
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp301
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.h19
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h175
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h59
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/term.cpp27
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/term.h78
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/termnodes.cpp29
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/termnodes.h123
-rw-r--r--searchlib/src/vespa/searchlib/query/weight.h52
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/.gitignore3
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt54
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/OWNERS1
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/andnotsearch.cpp163
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/andnotsearch.h101
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/andsearch.cpp123
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/andsearch.h37
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/andsearchnostrict.h61
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/andsearchstrict.h109
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/begin_and_end_id.h10
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.cpp562
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.h314
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.cpp48
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h64
-rwxr-xr-xsearchlib/src/vespa/searchlib/queryeval/create-class-cpp.sh29
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/create-class-h.sh27
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/create-interface.sh23
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h143
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.h62
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp92
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.h47
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp154
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/dot_product_search.h46
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/emptysearch.cpp29
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/emptysearch.h27
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp74
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.h36
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/equivsearch.cpp72
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/equivsearch.h32
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h34
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_result.cpp39
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_result.h108
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_search.cpp56
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_search.h45
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp108
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_searchable.h68
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/field_spec.cpp19
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/field_spec.h119
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.cpp48
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.h15
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp313
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/hitcollector.h214
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp584
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h181
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/irequestcontext.h33
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/isourceselector.h103
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/iterator_pack.h84
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/iterators.cpp24
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/iterators.h38
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp91
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h82
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.cpp37
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.h38
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.cpp239
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.h131
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp258
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h39
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/multisearch.cpp95
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/multisearch.h59
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearsearch.cpp313
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearsearch.h157
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/orlikesearch.h73
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/orsearch.cpp119
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/orsearch.h33
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/posting_info.h45
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp345
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h94
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/predicate_search.cpp310
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/predicate_search.h71
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/ranksearch.cpp64
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/ranksearch.h33
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/scores.h21
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/searchable.cpp33
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/searchable.h60
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp129
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/searchiterator.h345
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp105
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.h49
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp201
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.h59
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/simpleresult.cpp67
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/simpleresult.h87
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp49
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/simplesearch.h41
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.cpp187
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.h92
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/split_float.cpp29
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/split_float.h24
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/termasstring.cpp120
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/termasstring.h30
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.cpp43
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.h31
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/termwise_search.cpp62
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/termwise_search.h27
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/test/CMakeLists.txt5
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/test/eagerchild.h23
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/test/leafspec.h61
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/test/searchhistory.h58
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/test/trackedsearch.h74
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/test/wandspec.h53
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/truesearch.cpp33
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/truesearch.h25
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/unpackinfo.cpp104
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/unpackinfo.h69
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/CMakeLists.txt10
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp126
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h75
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp263
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h85
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.cpp27
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h615
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp41
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h67
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp143
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h26
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp89
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h47
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp161
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h41
-rw-r--r--searchlib/src/vespa/searchlib/test/.gitignore6
-rw-r--r--searchlib/src/vespa/searchlib/test/CMakeLists.txt12
-rw-r--r--searchlib/src/vespa/searchlib/test/OWNERS1
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/.gitignore2
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt7
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp120
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.h26
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.cpp133
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.h82
-rw-r--r--searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h55
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/.gitignore2
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt16
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.cpp22
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.h91
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/bitencode64.cpp37
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/bitencode64.h59
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.cpp1521
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.h121
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.cpp206
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.h74
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp430
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h287
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp61
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h105
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp796
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakeword.h355
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp161
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h92
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.cpp268
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.h75
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp1823
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h119
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fpfactory.cpp120
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fpfactory.h84
-rw-r--r--searchlib/src/vespa/searchlib/test/initrange.cpp185
-rw-r--r--searchlib/src/vespa/searchlib/test/initrange.h38
-rw-r--r--searchlib/src/vespa/searchlib/test/memoryindex/CMakeLists.txt5
-rw-r--r--searchlib/src/vespa/searchlib/test/memoryindex/ordereddocumentinserter.h119
-rw-r--r--searchlib/src/vespa/searchlib/test/statefile.cpp48
-rw-r--r--searchlib/src/vespa/searchlib/test/statefile.h22
-rw-r--r--searchlib/src/vespa/searchlib/test/statestring.cpp98
-rw-r--r--searchlib/src/vespa/searchlib/test/statestring.h24
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/.gitignore6
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/CMakeLists.txt14
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/OWNERS1
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/common.cpp107
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/common.h100
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/domain.cpp405
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/domain.h125
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp681
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/domainpart.h123
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.cpp28
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.h27
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/session.cpp275
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/session.h94
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/syncproxy.h27
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.cpp71
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.h27
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/translogclient.cpp402
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/translogclient.h140
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp672
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/translogserver.h110
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/translogserverapp.cpp68
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/translogserverapp.h46
-rw-r--r--searchlib/src/vespa/searchlib/util/.gitignore6
-rw-r--r--searchlib/src/vespa/searchlib/util/CMakeLists.txt25
-rw-r--r--searchlib/src/vespa/searchlib/util/bufferwriter.cpp43
-rw-r--r--searchlib/src/vespa/searchlib/util/bufferwriter.h55
-rw-r--r--searchlib/src/vespa/searchlib/util/comprbuffer.cpp147
-rw-r--r--searchlib/src/vespa/searchlib/util/comprbuffer.h98
-rw-r--r--searchlib/src/vespa/searchlib/util/comprfile.cpp650
-rw-r--r--searchlib/src/vespa/searchlib/util/comprfile.h456
-rw-r--r--searchlib/src/vespa/searchlib/util/dirtraverse.cpp289
-rw-r--r--searchlib/src/vespa/searchlib/util/dirtraverse.h67
-rw-r--r--searchlib/src/vespa/searchlib/util/drainingbufferwriter.cpp41
-rw-r--r--searchlib/src/vespa/searchlib/util/drainingbufferwriter.h32
-rw-r--r--searchlib/src/vespa/searchlib/util/filealign.cpp145
-rw-r--r--searchlib/src/vespa/searchlib/util/filealign.h138
-rw-r--r--searchlib/src/vespa/searchlib/util/fileheadertk.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/util/fileheadertk.h23
-rw-r--r--searchlib/src/vespa/searchlib/util/filekit.cpp108
-rw-r--r--searchlib/src/vespa/searchlib/util/filekit.h35
-rw-r--r--searchlib/src/vespa/searchlib/util/filesizecalculator.cpp59
-rw-r--r--searchlib/src/vespa/searchlib/util/filesizecalculator.h26
-rw-r--r--searchlib/src/vespa/searchlib/util/fileutil.cpp176
-rw-r--r--searchlib/src/vespa/searchlib/util/fileutil.h389
-rw-r--r--searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp82
-rw-r--r--searchlib/src/vespa/searchlib/util/foldedstringcompare.h58
-rw-r--r--searchlib/src/vespa/searchlib/util/inline.h5
-rw-r--r--searchlib/src/vespa/searchlib/util/ioerrorhandler.cpp96
-rw-r--r--searchlib/src/vespa/searchlib/util/ioerrorhandler.h61
-rw-r--r--searchlib/src/vespa/searchlib/util/logutil.cpp54
-rw-r--r--searchlib/src/vespa/searchlib/util/logutil.h29
-rw-r--r--searchlib/src/vespa/searchlib/util/memorytub.h94
-rw-r--r--searchlib/src/vespa/searchlib/util/memorytub_impl.h202
-rw-r--r--searchlib/src/vespa/searchlib/util/memoryusage.h123
-rw-r--r--searchlib/src/vespa/searchlib/util/postingpriorityqueue.h258
-rw-r--r--searchlib/src/vespa/searchlib/util/rand48.h44
-rw-r--r--searchlib/src/vespa/searchlib/util/randomgenerator.h63
-rw-r--r--searchlib/src/vespa/searchlib/util/rawbuf.cpp360
-rw-r--r--searchlib/src/vespa/searchlib/util/rawbuf.h163
-rw-r--r--searchlib/src/vespa/searchlib/util/runnable.h42
-rw-r--r--searchlib/src/vespa/searchlib/util/searchable_stats.h44
-rw-r--r--searchlib/src/vespa/searchlib/util/sigbushandler.cpp168
-rw-r--r--searchlib/src/vespa/searchlib/util/sigbushandler.h60
-rw-r--r--searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.h24
-rw-r--r--searchlib/src/vespa/searchlib/util/sort.h143
-rw-r--r--searchlib/src/vespa/searchlib/util/statebuf.cpp215
-rw-r--r--searchlib/src/vespa/searchlib/util/statebuf.h92
-rw-r--r--searchlib/src/vespa/searchlib/util/statefile.cpp460
-rw-r--r--searchlib/src/vespa/searchlib/util/statefile.h106
-rw-r--r--searchlib/src/vespa/searchlib/util/stringenum.cpp131
-rw-r--r--searchlib/src/vespa/searchlib/util/stringenum.h147
-rw-r--r--searchlib/src/vespa/searchlib/util/url.cpp555
-rw-r--r--searchlib/src/vespa/searchlib/util/url.h277
-rw-r--r--searchlib/testrun/.gitignore9
2416 files changed, 320462 insertions, 0 deletions
diff --git a/searchlib/.gitignore b/searchlib/.gitignore
new file mode 100644
index 00000000000..ef6cddb9800
--- /dev/null
+++ b/searchlib/.gitignore
@@ -0,0 +1,9 @@
+bin
+lib
+target
+*.iml
+*.ipr
+*.iws
+/pom.xml.build
+Makefile
+Testing
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
new file mode 100644
index 00000000000..dcd8c86e870
--- /dev/null
+++ b/searchlib/CMakeLists.txt
@@ -0,0 +1,203 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_define_module(
+ DEPENDS
+ fastos
+ vespalog
+ vespalib
+ staging_vespalib
+ fnet
+ configdefinitions
+ metrics
+ fastlib_fast
+ document
+ config_cloudconfig
+ persistencetypes
+ searchcommon
+
+ EXTERNAL_DEPENDS
+ icui18n
+
+ LIBS
+ src/vespa/searchlib
+ src/vespa/searchlib/aggregation
+ src/vespa/searchlib/attribute
+ src/vespa/searchlib/bitcompression
+ src/vespa/searchlib/btree
+ src/vespa/searchlib/common
+ src/vespa/searchlib/config
+ src/vespa/searchlib/diskindex
+ src/vespa/searchlib/docstore
+ src/vespa/searchlib/engine
+ src/vespa/searchlib/expression
+ src/vespa/searchlib/features
+ src/vespa/searchlib/features/fieldmatch
+ src/vespa/searchlib/features/rankingexpression
+ src/vespa/searchlib/fef
+ src/vespa/searchlib/fef/test
+ src/vespa/searchlib/fef/test/plugin
+ src/vespa/searchlib/grouping
+ src/vespa/searchlib/index
+ src/vespa/searchlib/memoryindex
+ src/vespa/searchlib/parsequery
+ src/vespa/searchlib/predicate
+ src/vespa/searchlib/query
+ src/vespa/searchlib/query/tree
+ src/vespa/searchlib/queryeval
+ src/vespa/searchlib/queryeval/test
+ src/vespa/searchlib/queryeval/wand
+ src/vespa/searchlib/test
+ src/vespa/searchlib/test/diskindex
+ src/vespa/searchlib/test/fakedata
+ src/vespa/searchlib/test/memoryindex
+ src/vespa/searchlib/transactionlog
+ src/vespa/searchlib/util
+
+ APPS
+ src/apps/docstore
+ src/apps/expgolomb
+ src/apps/fileheaderinspect
+ src/apps/loadattribute
+ src/apps/tests
+ src/apps/uniform
+ src/apps/vespa-index-inspect
+ src/apps/vespa-ranking-expression-analyzer
+
+ TESTS
+ src/tests/aggregator
+ src/tests/alignment
+ src/tests/attribute
+ src/tests/attribute/attributefilewriter
+ src/tests/attribute/attributemanager
+ src/tests/attribute/bitvector
+ src/tests/attribute/comparator
+ src/tests/attribute/document_weight_iterator
+ src/tests/attribute/enumeratedsave
+ src/tests/attribute/enumstore
+ src/tests/attribute/extendattributes
+ src/tests/attribute/multivaluemapping
+ src/tests/attribute/postinglist
+ src/tests/attribute/postinglistattribute
+ src/tests/attribute/searchable
+ src/tests/attribute/searchcontext
+ src/tests/attribute/sourceselector
+ src/tests/attribute/stringattribute
+ src/tests/attribute/tensorattribute
+ src/tests/bitcompression/expgolomb
+ src/tests/bitvector
+ src/tests/btree
+ src/tests/bytecomplens
+ src/tests/common/bitvector
+ src/tests/common/foregroundtaskexecutor
+ src/tests/common/location
+ src/tests/common/packets
+ src/tests/common/rcuvector
+ src/tests/common/resultset
+ src/tests/common/sequencedtaskexecutor
+ src/tests/common/summaryfeatures
+ src/tests/datastore
+ src/tests/diskindex/bitvector
+ src/tests/diskindex/diskindex
+ src/tests/diskindex/fieldwriter
+ src/tests/diskindex/fusion
+ src/tests/diskindex/pagedict4
+ src/tests/document_store
+ src/tests/document_store/visitor
+ src/tests/engine/docsumapi
+ src/tests/engine/monitorapi
+ src/tests/engine/searchapi
+ src/tests/engine/transportserver
+ src/tests/features
+ src/tests/features/beta
+ src/tests/features/element_completeness
+ src/tests/features/element_similarity_feature
+ src/tests/features/euclidean_distance
+ src/tests/features/item_raw_score
+ src/tests/features/native_dot_product
+ src/tests/features/ranking_expression
+ src/tests/features/raw_score
+ src/tests/features/subqueries
+ src/tests/features/tensor
+ src/tests/features/tensor_from_labels
+ src/tests/features/tensor_from_weighted_set
+ src/tests/features/text_similarity_feature
+ src/tests/features/util
+ src/tests/fef
+ src/tests/fef/attributecontent
+ src/tests/fef/featurenamebuilder
+ src/tests/fef/featurenameparser
+ src/tests/fef/featureoverride
+ src/tests/fef/object_passing
+ src/tests/fef/parameter
+ src/tests/fef/phrasesplitter
+ src/tests/fef/properties
+ src/tests/fef/rank_program
+ src/tests/fef/resolver
+ src/tests/fef/table
+ src/tests/fef/termfieldmodel
+ src/tests/fef/termmatchdatamerger
+ src/tests/fileheaderinspect
+ src/tests/fileheadertk
+ src/tests/forcelink
+ src/tests/grouping
+ src/tests/groupingengine
+ src/tests/hitcollector
+ src/tests/index/docbuilder
+ src/tests/index/doctypebuilder
+ src/tests/indexmetainfo
+ src/tests/ld-library-path
+ src/tests/memoryindex/btree
+ src/tests/memoryindex/compact_document_words_store
+ src/tests/memoryindex/datastore
+ src/tests/memoryindex/dictionary
+ src/tests/memoryindex/document_remover
+ src/tests/memoryindex/documentinverter
+ src/tests/memoryindex/fieldinverter
+ src/tests/memoryindex/memoryindex
+ src/tests/memoryindex/urlfieldinverter
+ src/tests/memorytub
+ src/tests/nativerank
+ src/tests/nearsearch
+ src/tests/postinglistbm
+ src/tests/predicate
+ src/tests/prettyfloat
+ src/tests/query
+ src/tests/queryeval
+ src/tests/queryeval/blueprint
+ src/tests/queryeval/booleanmatchiteratorwrapper
+ src/tests/queryeval/dot_product
+ src/tests/queryeval/equiv
+ src/tests/queryeval/fake_searchable
+ src/tests/queryeval/getnodeweight
+ src/tests/queryeval/monitoring_search_iterator
+ src/tests/queryeval/multibitvectoriterator
+ src/tests/queryeval/parallel_weak_and
+ src/tests/queryeval/predicate
+ src/tests/queryeval/simple_phrase
+ src/tests/queryeval/sourceblender
+ src/tests/queryeval/sparse_vector_benchmark
+ src/tests/queryeval/termwise_eval
+ src/tests/queryeval/weak_and
+ src/tests/queryeval/weak_and_heap
+ src/tests/queryeval/weak_and_scorers
+ src/tests/queryeval/weighted_set_term
+ src/tests/rankingexpression/feature_name_extractor
+ src/tests/ranksetup
+ src/tests/ranksetup/verify_feature
+ src/tests/sort
+ src/tests/sortresults
+ src/tests/sortspec
+ src/tests/stackdumpiterator
+ src/tests/stringenum
+ src/tests/transactionlog
+ src/tests/transactionlogstress
+ src/tests/true
+ src/tests/url
+ src/tests/util
+ src/tests/util/bufferwriter
+ src/tests/util/ioerrorhandler
+ src/tests/util/searchable_stats
+ src/tests/util/sigbushandler
+ src/tests/util/slime_output_raw_buf_adapter
+ src/tests/util/statebuf
+ src/tests/util/statefile
+)
diff --git a/searchlib/OWNERS b/searchlib/OWNERS
new file mode 100644
index 00000000000..9289349b7db
--- /dev/null
+++ b/searchlib/OWNERS
@@ -0,0 +1,4 @@
+havardpe
+balder
+tegge
+geirst
diff --git a/searchlib/README b/searchlib/README
new file mode 100644
index 00000000000..0de2fe40a4f
--- /dev/null
+++ b/searchlib/README
@@ -0,0 +1,3 @@
+Vespa Search library
+
+Containing some basic common search components and apis.
diff --git a/searchlib/README-gbdt b/searchlib/README-gbdt
new file mode 100644
index 00000000000..64cb89a5c18
--- /dev/null
+++ b/searchlib/README-gbdt
@@ -0,0 +1 @@
+This package provides the vespa-gbdt-converter tool to convert a gradient boost decision tree output from the GBDT tool into a Vespa ranking expression.
diff --git a/searchlib/README-treenet b/searchlib/README-treenet
new file mode 100644
index 00000000000..1253062470e
--- /dev/null
+++ b/searchlib/README-treenet
@@ -0,0 +1 @@
+This package provides the vespa-treenet-converter tool to convert a treenet model output from treenet expression into a Vespa ranking expression.
diff --git a/searchlib/pom.xml b/searchlib/pom.xml
new file mode 100644
index 00000000000..565370de634
--- /dev/null
+++ b/searchlib/pom.xml
@@ -0,0 +1,98 @@
+<?xml version="1.0"?>
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>parent</artifactId>
+ <version>6-SNAPSHOT</version>
+ <relativePath>../parent/pom.xml</relativePath>
+ </parent>
+ <artifactId>searchlib</artifactId>
+ <packaging>container-plugin</packaging>
+ <version>6-SNAPSHOT</version>
+ <name>searchlib</name>
+ <description>Search library functions.</description>
+ <dependencies>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>13.0.1</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>document</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>vespajlib</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <compilerArgs>
+ <arg>-Xlint:rawtypes</arg>
+ <arg>-Xlint:unchecked</arg>
+ <arg>-Werror</arg>
+ </compilerArgs>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>javacc-maven-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>javacc</id>
+ <goals>
+ <goal>javacc</goal>
+ </goals>
+ <configuration>
+ <lookAhead>1</lookAhead>
+ <isStatic>false</isStatic>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-install-plugin</artifactId>
+ <configuration>
+ <updateReleaseInfo>true</updateReleaseInfo>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>config-class-plugin</artifactId>
+ <version>${project.version}</version>
+ <configuration>
+ <defFilesDirectories>src/vespa/searchlib/config/</defFilesDirectories>
+ </configuration>
+ <executions>
+ <execution>
+ <id>config-gen</id>
+ <goals>
+ <goal>config-gen</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/searchlib/src/.gitignore b/searchlib/src/.gitignore
new file mode 100644
index 00000000000..3e2fb17989e
--- /dev/null
+++ b/searchlib/src/.gitignore
@@ -0,0 +1,7 @@
+*.dsp
+*.mak
+Makefile.ini
+config_command.sh
+html
+latex
+project.dsw
diff --git a/searchlib/src/Doxyfile b/searchlib/src/Doxyfile
new file mode 100644
index 00000000000..931ba9fba8e
--- /dev/null
+++ b/searchlib/src/Doxyfile
@@ -0,0 +1,1162 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+# Doxyfile 1.3.9.1
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+# TAG = value [value, ...]
+# For lists items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME = SearchLib
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY =
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of source
+# files, where putting all generated files in the same directory would otherwise
+# cause performance problems for the file system.
+
+CREATE_SUBDIRS = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish,
+# Dutch, Finnish, French, German, Greek, Hungarian, Italian, Japanese,
+# Japanese-en (Japanese with English messages), Korean, Korean-en, Norwegian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish,
+# Swedish, and Ukrainian.
+
+OUTPUT_LANGUAGE = English
+
+# This tag can be used to specify the encoding used in the generated output.
+# The encoding is not always determined by the language that is chosen,
+# but also whether or not the output is meant for Windows or non-Windows users.
+# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES
+# forces the Windows encoding (this is the default for the Windows binary),
+# whereas setting the tag to NO uses a Unix-style encoding (the default for
+# all platforms other than Windows).
+
+USE_WINDOWS_ENCODING = NO
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is used
+# as the annotated text. Otherwise, the brief description is used as-is. If left
+# blank, the following values are used ("$name" is automatically replaced with the
+# name of the entity): "The $name class" "The $name widget" "The $name file"
+# "is" "provides" "specifies" "contains" "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all inherited
+# members of a class in the documentation of that class as if those members were
+# ordinary class members. Constructors, destructors and assignment operators of
+# the base classes will not be shown.
+
+INLINE_INHERITED_MEMB = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like the Qt-style comments (thus requiring an
+# explicit @brief command for a brief description.
+
+JAVADOC_AUTOBRIEF = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the DETAILS_AT_TOP tag is set to YES then Doxygen
+# will output the detailed description near the top, like JavaDoc.
+# If set to NO, the detailed description appears after the member
+# documentation.
+
+DETAILS_AT_TOP = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE = 8
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java sources
+# only. Doxygen will then generate output that is more tailored for Java.
+# For instance, namespaces will be presented as packages, qualified scopes
+# will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING = YES
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation.
+
+SHOW_DIRECTORIES = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR = YES
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text.
+
+WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT = searchlib
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx *.hpp
+# *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm
+
+FILE_PATTERNS =
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE =
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or directories
+# that are symbolic links (a Unix filesystem feature) are excluded from the input.
+
+EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+
+EXCLUDE_PATTERNS =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output. If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
+# is applied to all files.
+
+FILTER_PATTERNS =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES (the default)
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES (the default)
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION = YES
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
+HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET =
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS = YES
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compressed HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND = NO
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE = 4
+
+# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be
+# generated containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+,
+# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are
+# probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH = 250
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS = NO
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX = NO
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader. This is useful
+# if you want to understand what is going on. On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_PREDEFINED tags.
+
+EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED = IAM_DOXYGEN
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse the
+# parser if not removed.
+
+SKIP_FUNCTION_MACROS = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base or
+# super classes. Setting the tag to NO turns the diagrams off. Note that this
+# option is superseded by the HAVE_DOT option below. This is only a fallback. It is
+# recommended to install and use dot, since it yields more powerful graphs.
+
+CLASS_DIAGRAMS = YES
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT = NO
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH = YES
+
+# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will
+# generate a call dependency graph for every global function or class method.
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command.
+
+CALL_GRAPH = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found on the path.
+
+DOT_PATH =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS =
+
+# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
+MAX_DOT_GRAPH_WIDTH = 1024
+
+# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
+MAX_DOT_GRAPH_HEIGHT = 1024
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes that
+# lay further from the root node will be omitted. Note that setting this option to
+# 1 or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that a graph may be further truncated if the graph's image dimensions are
+# not sufficient to fit the graph (see MAX_DOT_GRAPH_WIDTH and MAX_DOT_GRAPH_HEIGHT).
+# If 0 is used for the depth value (the default), the graph is not depth-constrained.
+
+MAX_DOT_GRAPH_DEPTH = 0
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to the search engine
+#---------------------------------------------------------------------------
+
+# The SEARCHENGINE tag specifies whether or not a search engine should be
+# used. If set to NO the values of all tags below this one will be ignored.
+
+SEARCHENGINE = NO
diff --git a/searchlib/src/apps/docstore/.gitignore b/searchlib/src/apps/docstore/.gitignore
new file mode 100644
index 00000000000..395e6ce624b
--- /dev/null
+++ b/searchlib/src/apps/docstore/.gitignore
@@ -0,0 +1,6 @@
+/.depend
+/Makefile
+/vespa-verify-logdatastore
+/vespa-documentstore-inspect
+/vespa-documentstore-benchmark
+/vespa-create-idx-from-dat
diff --git a/searchlib/src/apps/docstore/CMakeLists.txt b/searchlib/src/apps/docstore/CMakeLists.txt
new file mode 100644
index 00000000000..971d11ea4cc
--- /dev/null
+++ b/searchlib/src/apps/docstore/CMakeLists.txt
@@ -0,0 +1,33 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_vespa-verify-logdatastore_app
+ SOURCES
+ verifylogdatastore.cpp
+ OUTPUT_NAME vespa-verify-logdatastore
+ INSTALL bin
+ DEPENDS
+ searchlib
+)
+vespa_add_executable(searchlib_vespa-documentstore-inspect_app
+ SOURCES
+ documentstoreinspect.cpp
+ OUTPUT_NAME vespa-documentstore-inspect
+ INSTALL bin
+ DEPENDS
+ searchlib
+)
+vespa_add_executable(searchlib_vespa-documentstore-benchmark_app
+ SOURCES
+ benchmarkdatastore.cpp
+ OUTPUT_NAME vespa-documentstore-benchmark
+ INSTALL bin
+ DEPENDS
+ searchlib
+)
+vespa_add_executable(searchlib_vespa-create-idx-from-dat_app
+ SOURCES
+ create-idx-from-dat.cpp
+ OUTPUT_NAME vespa-create-idx-from-dat
+ INSTALL bin
+ DEPENDS
+ searchlib
+)
diff --git a/searchlib/src/apps/docstore/benchmarkdatastore.cpp b/searchlib/src/apps/docstore/benchmarkdatastore.cpp
new file mode 100644
index 00000000000..1281e0d11b3
--- /dev/null
+++ b/searchlib/src/apps/docstore/benchmarkdatastore.cpp
@@ -0,0 +1,114 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/docstore/logdatastore.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/vespalib/util/closure.h>
+#include <vespa/vespalib/util/closuretask.h>
+#include <vespa/searchlib/transactionlog/nosyncproxy.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP("documentstore.benchmark");
+
+using namespace search;
+
+class BenchmarkDataStoreApp : public FastOS_Application
+{
+ void usage(void);
+ int benchmark(const vespalib::string & directory, size_t numReads, size_t numThreads, size_t perChunk, const vespalib::string & readType);
+ int Main(void);
+ void read(size_t numReads, size_t perChunk, const IDataStore * dataStore);
+};
+
+
+
+void
+BenchmarkDataStoreApp::usage(void)
+{
+ printf("Usage: %s <direcory> <numreads> <numthreads> <objects per read> <normal,directio,mmap,mlock>\n", _argv[0]);
+ fflush(stdout);
+}
+
+int
+BenchmarkDataStoreApp::Main(void)
+{
+ if (_argc >= 2) {
+ size_t numThreads(16);
+ size_t numReads(1000000);
+ size_t perChunk(1);
+ vespalib::string readType("directio");
+ vespalib::string directory(_argv[1]);
+ if (_argc >= 3) {
+ numReads = strtoul(_argv[2], NULL, 0);
+ if (_argc >= 4) {
+ numThreads = strtoul(_argv[3], NULL, 0);
+ if (_argc >= 5) {
+ perChunk = strtoul(_argv[4], NULL, 0);
+ if (_argc >= 5) {
+ readType = _argv[5];
+ }
+ }
+ }
+ }
+ return benchmark(directory, numReads, numThreads, perChunk, readType);
+ } else {
+ fprintf(stderr, "Too few arguments\n");
+ usage();
+ return 1;
+ }
+ return 0;
+}
+
+void BenchmarkDataStoreApp::read(size_t numReads, size_t perChunk, const IDataStore * dataStore)
+{
+ vespalib::DataBuffer buf;
+ struct random_data rstate;
+ char state[8];
+ memset(state, 0, sizeof(state));
+ memset(&rstate, 0, sizeof(rstate));
+ const size_t numDocs(dataStore->nextId());
+ assert(numDocs > 0);
+ initstate_r(getpid(), state, sizeof(state), &rstate);
+ assert(srandom_r(getpid(), &rstate) == 0);
+ int32_t rnd(0);
+ for ( size_t i(0); i < numReads; i++) {
+ random_r(&rstate, &rnd);
+ uint32_t lid(rnd%numDocs);
+ for (uint32_t j(lid); j < std::min(numDocs, lid+perChunk); j++) {
+ dataStore->read(j, buf);
+ buf.clear();
+ }
+ }
+}
+
+int
+BenchmarkDataStoreApp::benchmark(const vespalib::string & dir, size_t numReads, size_t numThreads, size_t perChunk, const vespalib::string & readType)
+{
+ int retval(0);
+ LogDataStore::Config config;
+ GrowStrategy growStrategy;
+ TuneFileSummary tuning;
+ if (readType == "directio") {
+ tuning._randRead.setWantDirectIO();
+ } else if (readType == "normal") {
+ tuning._randRead.setWantNormal();
+ } else if (readType == "mmap") {
+ tuning._randRead.setWantMemoryMap();
+ }
+ search::index::DummyFileHeaderContext fileHeaderContext;
+ vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024);
+ transactionlog::NoSyncProxy noTlSyncer;
+ LogDataStore store(executor, dir, config, growStrategy, tuning,
+ fileHeaderContext,
+ noTlSyncer, NULL, true);
+ vespalib::ThreadStackExecutor bmPool(numThreads, 128*1024);
+ LOG(info, "Start read benchmark with %lu threads doing %lu reads in chunks of %lu reads. Totally %lu objects", numThreads, numReads, perChunk, numThreads * numReads * perChunk);
+ for (size_t i(0); i < numThreads; i++) {
+ bmPool.execute(vespalib::makeTask(vespalib::makeClosure(this, &BenchmarkDataStoreApp::read, numReads, perChunk, static_cast<const IDataStore *>(&store))));
+ }
+ bmPool.sync();
+ LOG(info, "Benchmark done.");
+ return retval;
+}
+
+FASTOS_MAIN(BenchmarkDataStoreApp);
diff --git a/searchlib/src/apps/docstore/create-idx-from-dat.cpp b/searchlib/src/apps/docstore/create-idx-from-dat.cpp
new file mode 100644
index 00000000000..66661b6468d
--- /dev/null
+++ b/searchlib/src/apps/docstore/create-idx-from-dat.cpp
@@ -0,0 +1,157 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/docstore/logdatastore.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/transactionlog/nosyncproxy.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+
+using namespace search;
+
+class CreateIdxFileFromDatApp : public FastOS_Application
+{
+ void usage(void);
+ int createIdxFile(const vespalib::string & datFileName, const vespalib::string & idxFileName);
+ int Main(void);
+};
+
+
+
+void
+CreateIdxFileFromDatApp::usage(void)
+{
+ printf("Usage: %s <datfile> <idxfile>\n", _argv[0]);
+ fflush(stdout);
+}
+
+bool tryDecode(size_t chunks, size_t offset, const char * p, size_t sz, size_t nextSync)
+{
+ bool success(false);
+ for (size_t lengthError(0); !success && (sz + lengthError <= nextSync); lengthError++) {
+ try {
+ Chunk chunk(chunks, p, sz + lengthError, false);
+ success = true;
+ } catch (const vespalib::Exception & e) {
+ fprintf(stdout, "Chunk %ld, with size=%ld failed with lengthError %ld due to '%s'\n", offset, sz, lengthError, e.what());
+ }
+ }
+ return success;
+}
+
+bool validUncompressed(const char * n, size_t offset) {
+ return (n[1] == document::CompressionConfig::NONE) &&
+ (n[2] == 0) &&
+ (n[3] == 0) &&
+ (n[4] == 0) &&
+ (n[5] != 0) &&
+ tryDecode(0, offset, n, 6ul + 4ul + uint8_t(n[5]), 6ul + 4ul + uint8_t(n[5]) + 4);
+}
+
+bool validHead(const char * n, size_t offset) {
+ return (n[0] == 0) && (validUncompressed(n, offset));
+}
+
+int CreateIdxFileFromDatApp::createIdxFile(const vespalib::string & datFileName, const vespalib::string & idxFileName)
+{
+ MMapRandRead datFile(datFileName, 0, 0);
+ int64_t fileSize = datFile.getSize();
+ uint64_t datHeaderLen = FileChunk::readDataHeader(datFile);
+ const char * start = static_cast<const char *>(datFile.getMapping());
+ const char * end = start + fileSize;
+ uint64_t chunks(0);
+ uint64_t entries(0);
+ uint64_t alignment(512);
+ FastOS_File idxFile(idxFileName.c_str());
+ assert(idxFile.OpenWriteOnly());
+ index::DummyFileHeaderContext fileHeaderContext;
+ idxFile.SetPosition(WriteableFileChunk::writeIdxHeader(fileHeaderContext, idxFile));
+ fprintf(stdout, "datHeaderLen=%ld\n", datHeaderLen);
+ uint64_t serialNum(0);
+ for (const char * current(start + datHeaderLen); current < end; ) {
+ if (validHead(current, current-start)) {
+ const char * tail(current);
+ const char * nextStart(current+alignment);
+ for (; nextStart < end; nextStart+=alignment) {
+ if (validHead(nextStart, nextStart-start)) {
+ tail = nextStart;
+ while(*(tail-1) == 0) {
+ tail--;
+ }
+ if (tryDecode(chunks, current-start, current, tail - current, nextStart-current)) {
+ break;
+ } else {
+ fprintf(stdout, "chunk %ld possibly starting at %ld ending at %ld false sync at pos=%ld\n",
+ chunks, current-start, tail-start, nextStart-start);
+ }
+ }
+ }
+ if (tail == current) {
+ nextStart = end;
+ tail = end;
+ while(*(tail-1) == 0) {
+ tail--;
+ }
+ }
+ uint64_t sz = tail - current;
+ fprintf(stdout, "Most likely found chunk at offset %ld with length %ld\n", current - start, sz);
+ vespalib::nbostream os;
+ for (size_t lengthError(0); int64_t(sz+lengthError) <= nextStart-start; lengthError++) {
+ try {
+ Chunk chunk(chunks, current, sz + lengthError, false);
+ fprintf(stdout, "id=%d lastSerial=%ld count=%ld\n", chunk.getId(), chunk.getLastSerial(), chunk.count());
+ const Chunk::LidList & lidlist = chunk.getLids();
+ if (chunk.getLastSerial() < serialNum) {
+ fprintf(stdout, "Serial num grows down prev=%ld, current=%ld\n", serialNum, chunk.getLastSerial());
+ }
+ serialNum = std::max(serialNum, chunk.getLastSerial());
+ ChunkMeta cmeta(current-start, sz + lengthError, serialNum, chunk.count());
+ cmeta.serialize(os);
+ for (auto it(lidlist.begin()); it != lidlist.end(); it++) {
+ LidMeta lm(it->getLid(), it->netSize());
+ lm.serialize(os);
+ }
+ break;
+ } catch (const vespalib::Exception & e) {
+ fprintf(stdout, "Failed with lengthError %ld due to '%s'\n", lengthError, e.what());
+ }
+ }
+ idxFile.Write2(os.c_str(), os.size());
+ chunks++;
+ for(current += alignment; current < tail; current += alignment);
+ } else {
+ current += alignment;
+ }
+ //fprintf(stdout, "Next is most likely at offset %ld tail(%p)\n", current - start, tail);
+/*
+ ChunkMeta cm;
+ cm.deserialize(is);
+ fprintf(stdout, "Chunk(%ld) : LastSerial(%ld), Entries(%d), Offset(%ld), Size(%d)\n",
+ chunk, cm.getLastSerial(), cm.getNumEntries(), cm.getOffset(), cm.getSize());
+ for (size_t i(0), m(cm.getNumEntries()); i < m; i++, entries++) {
+ LidMeta lm;
+ lm.deserialize(is);
+ fprintf(stdout, "Entry(%ld.%ld) : Lid(%d), Size(%d)\n", chunk, i, lm.getLid(), lm.size());
+ }
+*/
+ }
+ fprintf(stdout, "Processed %ld chunks with total entries = %ld\n", chunks, entries);
+ return 0;
+}
+
+int
+CreateIdxFileFromDatApp::Main(void)
+{
+ vespalib::string cmd;
+ if (_argc == 3) {
+ vespalib::string datFile(_argv[1]);
+ vespalib::string idxfile(_argv[2]);
+ createIdxFile(datFile, idxfile);
+ } else {
+ fprintf(stderr, "Too few arguments\n");
+ usage();
+ return 1;
+ }
+ return 0;
+}
+
+FASTOS_MAIN(CreateIdxFileFromDatApp);
diff --git a/searchlib/src/apps/docstore/documentstoreinspect.cpp b/searchlib/src/apps/docstore/documentstoreinspect.cpp
new file mode 100644
index 00000000000..587565672c0
--- /dev/null
+++ b/searchlib/src/apps/docstore/documentstoreinspect.cpp
@@ -0,0 +1,114 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/docstore/logdatastore.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/transactionlog/nosyncproxy.h>
+
+using namespace search;
+
+class DocumentStoreInspectApp : public FastOS_Application
+{
+ void usage(void);
+ int verify(const vespalib::string & directory);
+ int dumpIdxFile(const vespalib::string & file);
+ int Main(void);
+};
+
+
+
+void
+DocumentStoreInspectApp::usage(void)
+{
+ printf("Usage: %s dumpidxfile [--idxfile idxFile]\n", _argv[0]);
+ fflush(stdout);
+}
+
+int DocumentStoreInspectApp::dumpIdxFile(const vespalib::string & file)
+{
+ FastOS_File idxFile(file.c_str());
+ idxFile.enableMemoryMap(0);
+ if (idxFile.OpenReadOnly()) {
+ if (idxFile.IsMemoryMapped()) {
+ int64_t fileSize = idxFile.GetSize();
+ uint64_t idxHeaderLen = FileChunk::readIdxHeader(idxFile);
+ vespalib::nbostream is(static_cast<const char *>
+ (idxFile.MemoryMapPtr(0)) + idxHeaderLen,
+ fileSize - idxHeaderLen);
+ size_t chunk(0);
+ size_t entries(0);
+ for (; ! is.empty(); chunk++) {
+ ChunkMeta cm;
+ cm.deserialize(is);
+ fprintf(stdout, "Chunk(%ld) : LastSerial(%ld), Entries(%d), Offset(%ld), Size(%d)\n",
+ chunk, cm.getLastSerial(), cm.getNumEntries(), cm.getOffset(), cm.getSize());
+ for (size_t i(0), m(cm.getNumEntries()); i < m; i++, entries++) {
+ LidMeta lm;
+ lm.deserialize(is);
+ fprintf(stdout, "Entry(%ld.%ld) : Lid(%d), Size(%d)\n", chunk, i, lm.getLid(), lm.size());
+ }
+ }
+ fprintf(stdout, "Processed %ld chunks with total entries = %ld\n", chunk, entries);
+ } else {
+ fprintf(stderr, "Failed memorymapping file '%s' due to %s\n", idxFile.GetFileName(), idxFile.getLastErrorString().c_str());
+ }
+ } else {
+ fprintf(stderr, "Failed opening file '%s' readonly due to %s\n", idxFile.GetFileName(), idxFile.getLastErrorString().c_str());
+ }
+ return 0;
+}
+
+int
+DocumentStoreInspectApp::Main(void)
+{
+ vespalib::string cmd;
+ if (_argc >= 2) {
+ cmd = _argv[1];
+ if (cmd == "dumpidxfile") {
+ vespalib::string idxfile;
+ if (_argc >= 4) {
+ if (_argv[2] == vespalib::string("--idxfile")) {
+ idxfile = _argv[3];
+ dumpIdxFile(idxfile);
+ } else {
+ fprintf(stderr, "Unknown option '%s'.\n", _argv[2]);
+ usage();
+ return 1;
+ }
+ } else {
+ fprintf(stderr, "Too few arguments\n");
+ usage();
+ return 1;
+ }
+ } else {
+ fprintf(stderr, "Unknown command '%s'.\n", cmd.c_str());
+ usage();
+ return 1;
+ }
+ } else {
+ fprintf(stderr, "Too few arguments\n");
+ usage();
+ return 1;
+ }
+ return 0;
+}
+
+int
+DocumentStoreInspectApp::verify(const vespalib::string & dir)
+{
+ int retval(0);
+
+ LogDataStore::Config config;
+ GrowStrategy growStrategy;
+ TuneFileSummary tuning;
+ search::index::DummyFileHeaderContext fileHeaderContext;
+ vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024);
+ transactionlog::NoSyncProxy noTlSyncer;
+
+ LogDataStore store(executor, dir, config, growStrategy, tuning,
+ fileHeaderContext, noTlSyncer, NULL, true);
+ store.verify(false);
+ return retval;
+}
+
+FASTOS_MAIN(DocumentStoreInspectApp);
diff --git a/searchlib/src/apps/docstore/verifylogdatastore.cpp b/searchlib/src/apps/docstore/verifylogdatastore.cpp
new file mode 100644
index 00000000000..200d6051d8f
--- /dev/null
+++ b/searchlib/src/apps/docstore/verifylogdatastore.cpp
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/docstore/logdatastore.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/transactionlog/nosyncproxy.h>
+
+using namespace search;
+
+class VerifyLogDataStoreApp : public FastOS_Application
+{
+ void usage(void);
+ int verify(const vespalib::string & directory);
+ int Main(void);
+};
+
+
+
+void
+VerifyLogDataStoreApp::usage(void)
+{
+ printf("Usage: %s <direcory>\n", _argv[0]);
+ fflush(stdout);
+}
+
+int
+VerifyLogDataStoreApp::Main(void)
+{
+ if (_argc >= 2) {
+ vespalib::string directory(_argv[1]);
+ return verify(directory);
+ } else {
+ fprintf(stderr, "Too few arguments\n");
+ usage();
+ return 1;
+ }
+ return 0;
+}
+
+int
+VerifyLogDataStoreApp::verify(const vespalib::string & dir)
+{
+ int retval(0);
+
+ LogDataStore::Config config;
+ GrowStrategy growStrategy;
+ TuneFileSummary tuning;
+ search::index::DummyFileHeaderContext fileHeaderContext;
+ vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024);
+ transactionlog::NoSyncProxy noTlSyncer;
+
+ LogDataStore store(executor, dir, config, growStrategy, tuning,
+ fileHeaderContext,
+ noTlSyncer, NULL, true);
+ store.verify(false);
+ return retval;
+}
+
+FASTOS_MAIN(VerifyLogDataStoreApp);
diff --git a/searchlib/src/apps/expgolomb/.gitignore b/searchlib/src/apps/expgolomb/.gitignore
new file mode 100644
index 00000000000..0886ab154a2
--- /dev/null
+++ b/searchlib/src/apps/expgolomb/.gitignore
@@ -0,0 +1,3 @@
+.depend
+Makefile
+expgolomb
diff --git a/searchlib/src/apps/expgolomb/CMakeLists.txt b/searchlib/src/apps/expgolomb/CMakeLists.txt
new file mode 100644
index 00000000000..230718907dd
--- /dev/null
+++ b/searchlib/src/apps/expgolomb/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_expgolomb_app
+ SOURCES
+ expgolomb.cpp
+ OUTPUT_NAME expgolomb
+ INSTALL bin
+ DEPENDS
+ searchlib
+)
diff --git a/searchlib/src/apps/expgolomb/expgolomb.cpp b/searchlib/src/apps/expgolomb/expgolomb.cpp
new file mode 100644
index 00000000000..1070a9dab8f
--- /dev/null
+++ b/searchlib/src/apps/expgolomb/expgolomb.cpp
@@ -0,0 +1,175 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vector>
+
+class ExpGolombApp : public FastOS_Application
+{
+ void
+ usage(void);
+
+ int
+ testExpGolomb64(int kValue);
+
+ int
+ testExpGolomb64le(int kValue);
+
+ int
+ Main(void);
+};
+
+
+
+void
+ExpGolombApp::usage(void)
+{
+ printf("Usage: expgolomb testeg64 <kValue>]\n");
+ fflush(stdout);
+}
+
+
+int
+ExpGolombApp::testExpGolomb64(int kValue)
+{
+ std::vector<uint64_t> myrand;
+ for (int i = 0; i < 10000; ++i) {
+ uint64_t rval = rand();
+ rval <<= 30;
+ rval |= rand();
+ myrand.push_back(rval);
+ }
+ for (int i = 0; i < 10000; ++i) {
+ uint64_t rval = rand();
+ rval <<= 30;
+ rval |= rand();
+ uint32_t bits = (rand() & 63);
+ rval &= ((UINT64_C(1) << bits) - 1);
+ myrand.push_back(rval);
+ }
+ typedef search::bitcompression::EncodeContext64BE EC;
+
+ EC e;
+ search::ComprFileWriteContext wc(e);
+ wc.allocComprBuf(32768, 32768);
+ e.setupWrite(wc);
+
+ int rsize = myrand.size();
+ for (int i = 0; i < rsize; ++i) {
+ e.encodeExpGolomb(myrand[i], kValue);
+ if (e._valI >= e._valE)
+ wc.writeComprBuffer(false);
+ }
+ e.flush();
+
+ UC64_DECODECONTEXT(o);
+ unsigned int length;
+ uint64_t val64;
+ UC64BE_SETUPBITS_NS(o, static_cast<const uint64_t *>(wc._comprBuf), 0, EC);
+
+ bool failure = false;
+ for (int i = 0; i < rsize; ++i) {
+ UC64BE_DECODEEXPGOLOMB(oVal, oCompr, oPreRead, oCacheInt,
+ kValue, EC);
+ if (val64 != myrand[i]) {
+ printf("FAILURE: TestExpGolomb64, val64=%"
+ PRIu64 ", myrand[%d]=%" PRIu64 "\n",
+ val64, i, myrand[i]);
+ failure = true;
+ }
+ }
+ if (!failure)
+ printf("SUCCESS: TestExpGolomb64\n");
+ return failure ? 1 : 0;
+}
+
+int
+ExpGolombApp::testExpGolomb64le(int kValue)
+{
+ std::vector<uint64_t> myrand;
+ for (int i = 0; i < 10000; ++i) {
+ uint64_t rval = rand();
+ rval <<= 30;
+ rval |= rand();
+ myrand.push_back(rval);
+ }
+ for (int i = 0; i < 10000; ++i) {
+ uint64_t rval = rand();
+ rval <<= 30;
+ rval |= rand();
+ uint32_t bits = (rand() & 63);
+ rval &= ((UINT64_C(1) << bits) - 1);
+ myrand.push_back(rval);
+ }
+ typedef search::bitcompression::EncodeContext64LE EC;
+
+ EC e;
+ search::ComprFileWriteContext wc(e);
+ wc.allocComprBuf(32768, 32768);
+ e.setupWrite(wc);
+
+ int rsize = myrand.size();
+ for (int i = 0; i < rsize; ++i) {
+ e.encodeExpGolomb(myrand[i], kValue);
+ if (e._valI >= e._valE)
+ wc.writeComprBuffer(false);
+ }
+ e.flush();
+
+ UC64_DECODECONTEXT(o);
+ unsigned int length;
+ uint64_t val64;
+ UC64LE_SETUPBITS_NS(o, static_cast<const uint64_t *>(wc._comprBuf), 0, EC);
+
+ bool failure = false;
+ for (int i = 0; i < rsize; ++i) {
+ UC64LE_DECODEEXPGOLOMB(oVal, oCompr, oPreRead, oCacheInt,
+ kValue, EC);
+ if (val64 != myrand[i]) {
+ printf("FAILURE: TestExpGolomb64le, val64=%"
+ PRIu64 ", myrand[%d]=%" PRIu64 "\n",
+ val64, i, myrand[i]);
+ failure = true;
+ }
+ }
+ if (!failure)
+ printf("SUCCESS: TestExpGolomb64le\n");
+ return failure ? 1 : 0;
+}
+
+
+int
+ExpGolombApp::Main(void)
+{
+ printf("Hello world\n");
+ if (_argc >= 2) {
+ if (strcmp(_argv[1], "testeg64") == 0) {
+ if (_argc < 3) {
+ fprintf(stderr, "Too few arguments\n");
+ usage();
+ return 1;
+ }
+ return testExpGolomb64(atoi(_argv[2]));
+ } else if (strcmp(_argv[1], "testeg64le") == 0) {
+ if (_argc < 3) {
+ fprintf(stderr, "Too few arguments\n");
+ usage();
+ return 1;
+ }
+ return testExpGolomb64le(atoi(_argv[2]));
+ } else {
+ fprintf(stderr, "Wrong arguments\n");
+ usage();
+ return 1;
+ }
+ } else {
+ fprintf(stderr, "Too few arguments\n");
+ usage();
+ return 1;
+ }
+ return 0;
+}
+
+FASTOS_MAIN(ExpGolombApp);
+
+
diff --git a/searchlib/src/apps/fileheaderinspect/.gitignore b/searchlib/src/apps/fileheaderinspect/.gitignore
new file mode 100644
index 00000000000..5616f8e735c
--- /dev/null
+++ b/searchlib/src/apps/fileheaderinspect/.gitignore
@@ -0,0 +1,3 @@
+.depend
+Makefile
+vespa-header-inspect
diff --git a/searchlib/src/apps/fileheaderinspect/CMakeLists.txt b/searchlib/src/apps/fileheaderinspect/CMakeLists.txt
new file mode 100644
index 00000000000..322bf6fefcb
--- /dev/null
+++ b/searchlib/src/apps/fileheaderinspect/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_vespa-header-inspect_app
+ SOURCES
+ fileheaderinspect.cpp
+ OUTPUT_NAME vespa-header-inspect
+ INSTALL bin
+ DEPENDS
+ searchlib
+)
diff --git a/searchlib/src/apps/fileheaderinspect/fileheaderinspect.cpp b/searchlib/src/apps/fileheaderinspect/fileheaderinspect.cpp
new file mode 100644
index 00000000000..1cd280830d9
--- /dev/null
+++ b/searchlib/src/apps/fileheaderinspect/fileheaderinspect.cpp
@@ -0,0 +1,223 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("fileheaderinspect");
+
+#include <vespa/fastos/app.h>
+#include <iomanip>
+#include <iostream>
+#include <vespa/vespalib/data/fileheader.h>
+
+using namespace vespalib;
+
+class Application : public FastOS_Application {
+private:
+ vespalib::string _fileName;
+ char _delimiter;
+ bool _quiet;
+
+ int parseOpts();
+ void usage();
+ void printQuiet(FileHeader &header);
+ void printVerbose(FileHeader &header);
+ vespalib::string escape(const vespalib::string &str, char quote = '\0');
+ vespalib::string getTypeString(const FileHeader::Tag &tag);
+ vespalib::string getValueString(const FileHeader::Tag &tag);
+
+public:
+ Application();
+ int Main();
+};
+
+Application::Application() :
+ _fileName(""),
+ _delimiter(';'),
+ _quiet(false)
+{
+ // empty
+}
+
+
+void
+Application::usage()
+{
+ printf("Tool for inspecting the headers of files used by Vespa.\n");
+ printf("Usage: %s [options] filename\n", _argv[0]);
+ printf("\n");
+ printf("The options are:\n");
+ printf("-d delimiter The delimiter to use to separate values in quiet output.\n");
+ printf("-f file The name of the file to inspect.\n");
+ printf("-q Enables machine readable output.\n");
+ printf("-h Shows this help page.\n");
+}
+
+
+int
+Application::parseOpts()
+{
+ char c = '?';
+ const char *optArg = NULL;
+ int optInd = 0;
+ while ((c = GetOpt("d:f:qh", optArg, optInd)) != -1) {
+ switch (c) {
+ case 'd':
+ _delimiter = optArg[0];
+ break;
+ case 'f':
+ _fileName = optArg;
+ break;
+ case 'q':
+ _quiet = true;
+ break;
+ case 'h':
+ usage();
+ return EXIT_SUCCESS;
+ default:
+ usage();
+ return EXIT_FAILURE;
+ }
+ }
+ if (_argc == optInd + 1) {
+ _fileName = _argv[optInd];
+ }
+ if (_fileName.empty()) {
+ std::cerr << "No filename given." << std::endl;
+ return EXIT_FAILURE;
+ }
+ return ~(EXIT_SUCCESS | EXIT_FAILURE);
+}
+
+int
+Application::Main()
+{
+ int ret = parseOpts();
+ if (ret == EXIT_FAILURE || ret == EXIT_SUCCESS) {
+ return ret;
+ }
+
+ FastOS_File file;
+ if (!file.OpenReadOnly(_fileName.c_str())) {
+ std::cerr << "Failed to open file '" << _fileName << "'." << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ FileHeader header;
+ try {
+ header.readFile(file);
+ } catch (IllegalHeaderException &e) {
+ std::cerr << e.getMessage() << std::endl;
+ return EXIT_FAILURE;
+ }
+ file.Close();
+
+ if (_quiet) {
+ printQuiet(header);
+ } else {
+ printVerbose(header);
+ }
+ return EXIT_SUCCESS;
+}
+
+void
+Application::printQuiet(FileHeader &header)
+{
+ for (uint32_t i = 0, len = header.getNumTags(); i < len; ++i) {
+ const FileHeader::Tag &tag = header.getTag(i);
+ std::cout << escape(tag.getName(), _delimiter) << _delimiter
+ << escape(getTypeString(tag), _delimiter) << _delimiter
+ << escape(getValueString(tag), _delimiter) << std::endl;
+ }
+}
+
+void
+Application::printVerbose(FileHeader &header)
+{
+ uint32_t nameWidth = 3, typeWidth = 4, valueWidth = 5;
+ for (uint32_t i = 0, len = header.getNumTags(); i < len; ++i) {
+ const FileHeader::Tag &tag = header.getTag(i);
+ nameWidth = std::max(nameWidth, (uint32_t)tag.getName().size());
+ typeWidth = std::max(typeWidth, (uint32_t)getTypeString(tag).size());
+ valueWidth = std::max(valueWidth, (uint32_t)getValueString(tag).size());
+ }
+
+ vespalib::asciistream line;
+ line << "+" << std::string(nameWidth + 2, '-')
+ << "+" << std::string(typeWidth + 2, '-')
+ << "+" << std::string(valueWidth + 2, '-')
+ << "+";
+
+ std::cout << std::left << line.str() << std::endl;
+ std::cout << "| " << std::setw(nameWidth) << "Tag" << " "
+ << "| " << std::setw(typeWidth) << "Type" << " "
+ << "| " << std::setw(valueWidth)<< "Value" << " "
+ << "| " << std::endl;
+ std::cout << line.str() << std::endl;
+ for (uint32_t i = 0, len = header.getNumTags(); i < len; ++i) {
+ const FileHeader::Tag &tag = header.getTag(i);
+ std::cout << "| " << std::setw(nameWidth) << escape(tag.getName()) << " "
+ << "| " << std::setw(typeWidth) << getTypeString(tag) << " "
+ << "| " << std::setw(valueWidth) << escape(getValueString(tag)) << " "
+ << "| " << std::endl;
+ }
+ std::cout << line.str() << std::endl;
+}
+
+vespalib::string
+Application::escape(const vespalib::string &str, char quote)
+{
+ vespalib::string ret = "";
+ for (uint32_t i = 0, len = str.size(); i < len; ++i) {
+ char c = str[i];
+ switch (c) {
+ case '\f':
+ ret.append("\\f");
+ break;
+ case '\n':
+ ret.append("\\n");
+ break;
+ case '\r':
+ ret.append("\\r");
+ break;
+ case '\t':
+ ret.append("\\t");
+ break;
+ default:
+ if (c != '\0' && c == quote) {
+ ret.append("\\");
+ }
+ ret.push_back(c);
+ }
+ }
+ return ret;
+}
+
+vespalib::string
+Application::getTypeString(const FileHeader::Tag &tag)
+{
+ switch (tag.getType()) {
+ case FileHeader::Tag::TYPE_FLOAT:
+ return "float";
+ case FileHeader::Tag::TYPE_INTEGER:
+ return "integer";
+ case FileHeader::Tag::TYPE_STRING:
+ return "string";
+ default:
+ LOG_ASSERT(tag.getType() == FileHeader::Tag::TYPE_INTEGER);
+ abort();
+ }
+}
+
+vespalib::string
+Application::getValueString(const FileHeader::Tag &tag)
+{
+ vespalib::asciistream out;
+ out << tag;
+ return out.str();
+}
+
+int
+main(int argc, char** argv)
+{
+ Application app;
+ return app.Entry(argc, argv);
+}
diff --git a/searchlib/src/apps/loadattribute/.gitignore b/searchlib/src/apps/loadattribute/.gitignore
new file mode 100644
index 00000000000..4f008fbf84e
--- /dev/null
+++ b/searchlib/src/apps/loadattribute/.gitignore
@@ -0,0 +1,3 @@
+.depend
+Makefile
+loadattribute
diff --git a/searchlib/src/apps/loadattribute/CMakeLists.txt b/searchlib/src/apps/loadattribute/CMakeLists.txt
new file mode 100644
index 00000000000..6712519e59a
--- /dev/null
+++ b/searchlib/src/apps/loadattribute/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_loadattribute_app
+ SOURCES
+ loadattribute.cpp
+ OUTPUT_NAME loadattribute
+ INSTALL bin
+ DEPENDS
+ searchlib
+)
diff --git a/searchlib/src/apps/loadattribute/loadattribute.cpp b/searchlib/src/apps/loadattribute/loadattribute.cpp
new file mode 100644
index 00000000000..b1d1f896af8
--- /dev/null
+++ b/searchlib/src/apps/loadattribute/loadattribute.cpp
@@ -0,0 +1,216 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <iostream>
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/vespalib/data/fileheader.h>
+#include <fstream>
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+LOG_SETUP("loadattribute");
+
+namespace search {
+
+typedef AttributeVector::SP AttributePtr;
+
+class LoadAttribute : public FastOS_Application
+{
+private:
+ void load(const AttributePtr & ptr);
+ void applyUpdate(const AttributePtr & ptr);
+ void printContent(const AttributePtr & ptr, std::ostream & os);
+ void usage();
+
+public:
+ int Main();
+};
+
+void
+LoadAttribute::load(const AttributePtr & ptr)
+{
+ std::cout << "loading attribute: " << ptr->getBaseFileName() << std::endl;
+ ptr->load();
+ std::cout << "attribute successfully loaded" << std::endl;
+}
+
+void
+LoadAttribute::applyUpdate(const AttributePtr & ptr)
+{
+ std::cout << "applyUpdate" << std::endl;
+ if (ptr->getClass().inherits(IntegerAttribute::classId)) {
+ IntegerAttribute * a = static_cast<IntegerAttribute *>(ptr.get());
+ if (ptr->hasMultiValue()) {
+ a->append(0, 123456789, 1);
+ } else {
+ a->update(0, 123456789);
+ }
+ a->commit();
+ } else if (ptr->getClass().inherits(FloatingPointAttribute::classId)) {
+ FloatingPointAttribute * a = static_cast<FloatingPointAttribute *>(ptr.get());
+ if (ptr->hasMultiValue()) {
+ a->append(0, 123456789.5f, 1);
+ } else {
+ a->update(0, 123456789);
+ }
+ a->commit();
+ } else if (ptr->getClass().inherits(StringAttribute::classId)) {
+ StringAttribute * a = static_cast<StringAttribute *>(ptr.get());
+ if (ptr->hasMultiValue()) {
+ a->append(0, "non-existing string value", 1);
+ } else {
+ a->update(0, "non-existing string value");
+ }
+ a->commit();
+ }
+}
+
+void
+LoadAttribute::printContent(const AttributePtr & ptr, std::ostream & os)
+{
+ uint32_t sz = ptr->getMaxValueCount();
+ if (ptr->hasWeightedSetType()) {
+ AttributeVector::WeightedString * buf = new AttributeVector::WeightedString[sz];
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ uint32_t valueCount = ptr->get(doc, buf, sz);
+ assert(valueCount <= sz);
+ os << "doc " << doc << ": valueCount(" << valueCount << ")" << std::endl;
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ os << " " << i << ": " << "[" << buf[i].getValue() << ", " << buf[i].getWeight() << "]" << std::endl;
+ }
+ }
+ delete [] buf;
+ } else {
+ vespalib::string *buf = new vespalib::string[ptr->getMaxValueCount()];
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ uint32_t valueCount = ptr->get(doc, buf, sz);
+ assert(valueCount <= sz);
+ os << "doc " << doc << ": valueCount(" << valueCount << ")" << std::endl;
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ os << " " << i << ": " << "[" << buf[i] << "]" << std::endl;
+ }
+ }
+ delete [] buf;
+ }
+}
+
+void
+LoadAttribute::usage()
+{
+ std::cout << "usage: loadattribute [-p (print content to <attribute>.out)]" << std::endl;
+ std::cout << " [-a (apply a single update)]" << std::endl;
+ std::cout << " [-s (save attribute to <attribute>.save.dat)]" << std::endl;
+ std::cout << " <attribute>" << std::endl;
+}
+
+int
+LoadAttribute::Main()
+{
+ bool doPrintContent = false;
+ bool doApplyUpdate = false;
+ bool doSave = false;
+ bool doFastSearch = false;
+ bool doEnableEnumeratedSave = false;
+ bool doHuge = false;
+
+ int idx = 1;
+ char opt;
+ const char * arg;
+ bool optError = false;
+ while ((opt = GetOpt("pasf:eh", arg, idx)) != -1) {
+ switch (opt) {
+ case 'p':
+ doPrintContent = true;
+ break;
+ case 'a':
+ doApplyUpdate = true;
+ break;
+ case 'e':
+ doEnableEnumeratedSave = true;
+ break;
+ case 'h':
+ doHuge = true;
+ break;
+ case 'f':
+ if (strcmp(arg, "search") == 0) {
+ doFastSearch = true;
+ } else {
+ std::cerr << "Expected 'search' or 'aggregate', got '" <<
+ arg << "'" << std::endl;
+ optError = true;
+ }
+ break;
+ case 's':
+ doSave = true;
+ break;
+ default:
+ optError = true;
+ break;
+ }
+ }
+
+ if (_argc != (idx + 1) || optError) {
+ usage();
+ return -1;
+ }
+
+ vespalib::string fileName(_argv[idx]);
+ vespalib::FileHeader fh;
+ do {
+ vespalib::string datFileName(fileName + ".dat");
+ Fast_BufferedFile file;
+ file.ReadOpenExisting(datFileName.c_str());
+ (void) fh.readFile(file);
+ } while (0);
+ attribute::BasicType bt(fh.getTag("datatype").asString());
+ attribute::CollectionType ct(fh.getTag("collectiontype").asString());
+ attribute::Config c(bt, ct);
+ c.setFastSearch(doFastSearch);
+ c.setHuge(doHuge);
+ AttributePtr ptr = AttributeFactory::createAttribute(fileName, c);
+ if (doEnableEnumeratedSave)
+ ptr->enableEnumeratedSave();
+ AttributeVector::enableEnumeratedLoad();
+ FastOS_Time timer;
+ timer.SetNow();
+ load(ptr);
+ std::cout << "load time: " << timer.MilliSecsToNow() / 1000 << " seconds " << std::endl;
+
+ std::cout << "numDocs: " << ptr->getNumDocs() << std::endl;
+
+ if (doApplyUpdate) {
+ timer.SetNow();
+ applyUpdate(ptr);
+ std::cout << "update time: " << timer.MilliSecsToNow() / 1000 << " seconds " << std::endl;
+ }
+
+ if (doPrintContent) {
+ vespalib::string outFile(fileName + ".out");
+ std::ofstream of(outFile.c_str());
+ if (of.fail()) {
+ std::cout << "failed opening: " << fileName << ".out" << std::endl;
+ }
+ std::cout << "printContent" << std::endl;
+ printContent(ptr, of);
+ of.close();
+ }
+
+ if (doSave) {
+ vespalib::string saveFile = fileName + ".save";
+ std::cout << "saving attribute: " << saveFile << std::endl;
+ timer.SetNow();
+ ptr->saveAs(saveFile);
+ std::cout << "save time: " << timer.MilliSecsToNow() / 1000 << " seconds " << std::endl;
+ }
+
+ return 0;
+}
+
+}
+
+int main(int argc, char ** argv)
+{
+ search::LoadAttribute myApp;
+ return myApp.Entry(argc, argv);
+}
diff --git a/searchlib/src/apps/loadattribute/loadattribute.rb b/searchlib/src/apps/loadattribute/loadattribute.rb
new file mode 100644
index 00000000000..d1fb5a5632c
--- /dev/null
+++ b/searchlib/src/apps/loadattribute/loadattribute.rb
@@ -0,0 +1,43 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+attribute = ARGV[0]
+
+dat = File.open(attribute + ".dat", "r")
+puts "opened " + attribute + ".dat"
+dat_buffer = []
+dat.each_byte do |byte|
+ dat_buffer.push(byte)
+end
+
+string = []
+strings = []
+dat_buffer.each do |byte|
+ if byte == 0
+ strings.push(string.pack("c*"))
+ string.clear
+ else
+ string.push(byte)
+ end
+end
+puts "num strings: #{strings.size}"
+
+idx = File.open(attribute + ".idx", "r")
+puts "opened " + attribute + ".idx"
+idx_buffer = []
+while not idx.eof
+ idx_buffer.push((idx.read(4).unpack("I")).first)
+end
+puts "num docs: #{idx_buffer.size - 1}"
+puts "num values: #{idx_buffer.last}"
+
+out = File.open(attribute + ".out", "w")
+for i in 0...(idx_buffer.size - 1)
+ count = idx_buffer[i + 1]. - idx_buffer[i]
+ out.write("doc #{i}: count = #{count}\n")
+ for j in 0...count
+ if idx_buffer[i] + j >= strings.size
+ raise "ERROR: idx_buffer[i] + j (#{idx_buffer[i] + j}) >= strings.size (#{strings.size})"
+ end
+ out.write(" #{j}: #{strings[idx_buffer[i] + j]}\n")
+ end
+end
+
diff --git a/searchlib/src/apps/tests/.gitignore b/searchlib/src/apps/tests/.gitignore
new file mode 100644
index 00000000000..e05359d841e
--- /dev/null
+++ b/searchlib/src/apps/tests/.gitignore
@@ -0,0 +1,8 @@
+/.depend
+/Makefile
+/biglog_test
+/btreestress_test
+/memoryindexstress_test
+searchlib_biglog_test_app
+searchlib_btreestress_test_app
+searchlib_memoryindexstress_test_app
diff --git a/searchlib/src/apps/tests/CMakeLists.txt b/searchlib/src/apps/tests/CMakeLists.txt
new file mode 100644
index 00000000000..5c275e4cfb8
--- /dev/null
+++ b/searchlib/src/apps/tests/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_biglog_test_app
+ SOURCES
+ biglogtest.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_biglog_test_app COMMAND searchlib_biglog_test_app BENCHMARK)
+vespa_add_executable(searchlib_btreestress_test_app
+ SOURCES
+ btreestress_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_btreestress_test_app COMMAND searchlib_btreestress_test_app BENCHMARK)
+vespa_add_executable(searchlib_memoryindexstress_test_app
+ SOURCES
+ memoryindexstress_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_memoryindexstress_test_app COMMAND searchlib_memoryindexstress_test_app BENCHMARK)
diff --git a/searchlib/src/apps/tests/biglogtest.cpp b/searchlib/src/apps/tests/biglogtest.cpp
new file mode 100644
index 00000000000..56b695b69c0
--- /dev/null
+++ b/searchlib/src/apps/tests/biglogtest.cpp
@@ -0,0 +1,243 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("big_logdatastore_test");
+
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/random.h>
+#include <vespa/searchlib/docstore/logdatastore.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/transactionlog/nosyncproxy.h>
+#include <iostream>
+
+#include <vespa/vespalib/util/exceptions.h>
+
+using namespace search;
+using search::index::DummyFileHeaderContext;
+
+class Test : public vespalib::TestApp {
+private:
+ struct Blob {
+ ssize_t sz;
+ char *buf;
+ Blob(size_t s) : sz(s), buf(s == 0 ? 0 : new char[s]) {}
+ };
+ typedef std::map<uint32_t, uint32_t> Map;
+
+ void makeBlobs();
+ void cleanBlobs();
+ void checkBlobs(const IDataStore &datastore, const Map &lidToBlobMap);
+
+ template <typename DS>
+ void testDIO();
+
+ std::string _dir;
+ std::vector<Blob> _blobs;
+ vespalib::RandomGen _randomgenerator;
+
+public:
+ int Main() {
+ TEST_INIT("big_logdatastore_test");
+
+ if (_argc > 0) {
+ DummyFileHeaderContext::setCreator(_argv[0]);
+ }
+ makeBlobs();
+
+ _dir = "logged";
+ TEST_DO(testDIO<LogDataStore>());
+
+ cleanBlobs();
+
+ TEST_DONE();
+ }
+
+ Test() : _dir(""), _blobs(), _randomgenerator(42) {}
+};
+
+TEST_APPHOOK(Test);
+
+
+void
+Test::makeBlobs()
+{
+ _randomgenerator.setSeed(42);
+ _blobs.push_back(Blob(0));
+ size_t usemem = 444222111;
+ while (usemem > 0) {
+ size_t sizeclass = 6 + _randomgenerator.nextUint32() % 20;
+ size_t blobsize = _randomgenerator.nextUint32() % (1<<sizeclass);
+ if (blobsize > usemem) blobsize = usemem;
+ _blobs.push_back(Blob(blobsize));
+ char *p = _blobs.back().buf;
+ for (size_t j=0; j < blobsize; ++j) {
+ *p++ = _randomgenerator.nextUint32();
+ }
+ usemem -= blobsize;
+ }
+}
+
+void
+Test::cleanBlobs()
+{
+ printf("count %lu blobs sizes:", _blobs.size());
+ while (_blobs.size() > 0) {
+ char *p = _blobs.back().buf;
+ printf(" %lu", _blobs.back().sz);
+ delete[] p;
+ _blobs.pop_back();
+ }
+ printf("\n");
+}
+
+
+void
+Test::checkBlobs(const IDataStore &datastore,
+ const Map &lidToBlobMap)
+{
+ for (Map::const_iterator it = lidToBlobMap.begin();
+ it != lidToBlobMap.end();
+ ++it)
+ {
+ uint32_t lid = it->first;
+ uint32_t bno = it->second;
+ vespalib::DataBuffer got;
+ EXPECT_EQUAL(datastore.read(lid, got), _blobs[bno].sz);
+ EXPECT_TRUE(memcmp(got.getData(), _blobs[bno].buf, _blobs[bno].sz) == 0);
+ }
+}
+
+struct DioTune
+{
+ TuneFileSummary tuning;
+ DioTune() {
+ tuning._seqRead.setWantDirectIO();
+ tuning._write.setWantDirectIO();
+ tuning._randRead.setWantDirectIO();
+ }
+};
+
+template <typename DS>
+struct factory {};
+
+template <>
+struct factory<LogDataStore> : DioTune
+{
+ DummyFileHeaderContext _fileHeaderContext;
+ LogDataStore::Config _config;
+ vespalib::ThreadStackExecutor _executor;
+ transactionlog::NoSyncProxy _noTlSyncer;
+ LogDataStore _datastore;
+ factory(std::string dir)
+ : DioTune(),
+ _fileHeaderContext(),
+ _config(),
+ _executor(_config.getNumThreads(), 128*1024),
+ _noTlSyncer(),
+ _datastore(_executor, dir, _config, GrowStrategy(), tuning,
+ _fileHeaderContext, _noTlSyncer, NULL)
+ {}
+ IDataStore & operator() () { return _datastore; }
+
+};
+
+template <typename DS>
+void
+Test::testDIO()
+{
+ uint64_t serial = 0;
+
+ FastOS_File::EmptyDirectory(_dir.c_str());
+ FastOS_File::RemoveDirectory(_dir.c_str());
+ EXPECT_TRUE(FastOS_File::MakeDirectory(_dir.c_str()));
+
+ Map lidToBlobMap;
+ vespalib::DataBuffer buf;
+ {
+ factory<DS> ds(_dir);
+ for (uint32_t lid=0; lid<15; ++lid) {
+ uint32_t blobno = _randomgenerator.nextUint32() % _blobs.size();
+ lidToBlobMap[lid] = blobno;
+ ds().write(++serial, lid, _blobs[blobno].buf, _blobs[blobno].sz);
+ }
+ uint64_t flushToken = ds().initFlush(serial);
+ ds().flush(flushToken);
+ for (uint32_t lid=10; lid<30; ++lid) {
+ uint32_t blobno = _randomgenerator.nextUint32() % _blobs.size();
+ lidToBlobMap[lid] = blobno;
+ ds().write(++serial, lid, _blobs[blobno].buf, _blobs[blobno].sz);
+ }
+ checkBlobs(ds(), lidToBlobMap);
+ flushToken = ds().initFlush(serial);
+ ds().flush(flushToken);
+ checkBlobs(ds(), lidToBlobMap);
+ }
+ {
+ factory<DS> ds(_dir);
+ checkBlobs(ds(), lidToBlobMap);
+
+ for (uint32_t lid=3; lid<8; ++lid) {
+ uint32_t blobno = _randomgenerator.nextUint32() % _blobs.size();
+ lidToBlobMap[lid] = blobno;
+ ds().write(++serial, lid, _blobs[blobno].buf, _blobs[blobno].sz);
+ }
+ for (uint32_t lid=23; lid<28; ++lid) {
+ uint32_t blobno = _randomgenerator.nextUint32() % _blobs.size();
+ lidToBlobMap[lid] = blobno;
+ ds().write(++serial, lid, _blobs[blobno].buf, _blobs[blobno].sz);
+ }
+ for (uint32_t lid=100033; lid<100088; ++lid) {
+ uint32_t blobno = _randomgenerator.nextUint32() % _blobs.size();
+ lidToBlobMap[lid] = blobno;
+ ds().write(++serial, lid, _blobs[blobno].buf, _blobs[blobno].sz);
+ }
+ checkBlobs(ds(), lidToBlobMap);
+
+ ds().remove(++serial, 1);
+ lidToBlobMap[1] = 0;
+ ds().remove(++serial, 11);
+ lidToBlobMap[11] = 0;
+ ds().remove(++serial, 21);
+ lidToBlobMap[21] = 0;
+ ds().remove(++serial, 31);
+ lidToBlobMap[31] = 0;
+
+ checkBlobs(ds(), lidToBlobMap);
+ uint64_t flushToken = ds().initFlush(serial);
+ ds().flush(flushToken);
+ checkBlobs(ds(), lidToBlobMap);
+ }
+ {
+ factory<DS> ds(_dir);
+
+ ASSERT_TRUE(ds().read(1, buf) <= 0);
+ ASSERT_TRUE(ds().read(11, buf) <= 0);
+ ASSERT_TRUE(ds().read(21, buf) <= 0);
+ ASSERT_TRUE(ds().read(31, buf) <= 0);
+
+ checkBlobs(ds(), lidToBlobMap);
+ uint64_t flushToken = ds().initFlush(serial);
+ ds().flush(flushToken);
+ }
+ {
+ factory<DS> ds(_dir);
+ checkBlobs(ds(), lidToBlobMap);
+
+ for (uint32_t lid=1234567; lid < 1234999; ++lid) {
+ uint32_t blobno = _randomgenerator.nextUint32() % _blobs.size();
+ lidToBlobMap[lid] = blobno;
+ ds().write(++serial, lid, _blobs[blobno].buf, _blobs[blobno].sz);
+ }
+ checkBlobs(ds(), lidToBlobMap);
+ uint64_t flushToken = ds().initFlush(22);
+ ds().flush(flushToken);
+ checkBlobs(ds(), lidToBlobMap);
+ }
+ {
+ factory<DS> ds(_dir);
+ checkBlobs(ds(), lidToBlobMap);
+ }
+ FastOS_File::EmptyDirectory(_dir.c_str());
+ FastOS_File::RemoveDirectory(_dir.c_str());
+ TEST_FLUSH();
+}
diff --git a/searchlib/src/apps/tests/btreestress_test.cpp b/searchlib/src/apps/tests/btreestress_test.cpp
new file mode 100644
index 00000000000..7e4cdf32fdd
--- /dev/null
+++ b/searchlib/src/apps/tests/btreestress_test.cpp
@@ -0,0 +1,224 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("btreestress_test");
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <string>
+#include <set>
+#include <iostream>
+#include <vespa/searchlib/btree/btreeroot.h>
+#include <vespa/searchlib/btree/btreebuilder.h>
+#include <vespa/searchlib/btree/btreenodeallocator.h>
+#include <vespa/searchlib/btree/btree.h>
+#include <vespa/searchlib/btree/btreestore.h>
+#include <vespa/searchlib/util/rand48.h>
+
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodestore.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btreebuilder.hpp>
+#include <vespa/searchlib/btree/btree.hpp>
+#include <vespa/searchlib/btree/btreestore.hpp>
+#include <vespa/searchlib/btree/btreeaggregator.hpp>
+
+
+#include <vespa/vespalib/util/threadstackexecutor.h>
+#include <vespa/searchlib/common/lambdatask.h>
+#include <vespa/searchlib/util/rand48.h>
+
+using MyTree = search::btree::BTree<uint32_t, uint32_t>;
+using MyTreeIterator = typename MyTree::Iterator;
+using MyTreeConstIterator = typename MyTree::ConstIterator;
+using GenerationHandler = vespalib::GenerationHandler;
+using search::makeLambdaTask;
+
+struct Fixture
+{
+ GenerationHandler _generationHandler;
+ MyTree _tree;
+ MyTreeIterator _writeItr;
+ vespalib::ThreadStackExecutor _writer; // 1 write thread
+ vespalib::ThreadStackExecutor _readers; // multiple reader threads
+ search::Rand48 _rnd;
+ uint32_t _keyLimit;
+ std::atomic<long> _readSeed;
+ std::atomic<long> _doneWriteWork;
+ std::atomic<long> _doneReadWork;
+ std::atomic<int> _stopRead;
+ bool _reportWork;
+
+ Fixture();
+ ~Fixture();
+ void commit();
+ void adjustWriteIterator(uint32_t key);
+ void insert(uint32_t key);
+ void remove(uint32_t key);
+
+ void readWork(uint32_t cnt);
+ void readWork();
+ void writeWork(uint32_t cnt);
+};
+
+
+Fixture::Fixture()
+ : _generationHandler(),
+ _tree(),
+ _writeItr(_tree.begin()),
+ _writer(1, 128 * 1024),
+ _readers(4, 128 * 1024),
+ _rnd(),
+ _keyLimit(1000000),
+ _readSeed(50),
+ _doneWriteWork(0),
+ _doneReadWork(0),
+ _stopRead(0),
+ _reportWork(false)
+{
+ _rnd.srand48(32);
+}
+
+
+Fixture::~Fixture()
+{
+ _readers.sync();
+ _readers.shutdown();
+ _writer.sync();
+ _writer.shutdown();
+ commit();
+ if (_reportWork) {
+ LOG(info,
+ "readWork=%ld, writeWork=%ld",
+ _doneReadWork.load(), _doneWriteWork.load());
+ }
+}
+
+
+void
+Fixture::commit()
+{
+ auto &allocator = _tree.getAllocator();
+ allocator.freeze();
+ allocator.transferHoldLists(_generationHandler.getCurrentGeneration());
+ _generationHandler.incGeneration();
+ allocator.trimHoldLists(_generationHandler.getFirstUsedGeneration());
+}
+
+void
+Fixture::adjustWriteIterator(uint32_t key)
+{
+ if (_writeItr.valid() && _writeItr.getKey() < key) {
+ _writeItr.binarySeek(key);
+ } else {
+ _writeItr.lower_bound(key);
+ }
+}
+
+void
+Fixture::insert(uint32_t key)
+{
+ adjustWriteIterator(key);
+ assert(!_writeItr.valid() || _writeItr.getKey() >= key);
+ if (!_writeItr.valid() || _writeItr.getKey() != key) {
+ _tree.insert(_writeItr, key, 0u);
+ }
+}
+
+void
+Fixture::remove(uint32_t key)
+{
+ adjustWriteIterator(key);
+ assert(!_writeItr.valid() || _writeItr.getKey() >= key);
+ if (_writeItr.valid() && _writeItr.getKey() == key) {
+ _tree.remove(_writeItr);
+ }
+}
+
+
+void
+Fixture::readWork(uint32_t cnt)
+{
+ search::Rand48 rnd;
+ rnd.srand48(++_readSeed);
+ uint32_t i;
+ for (i = 0; i < cnt && _stopRead.load() == 0; ++i) {
+ auto guard = _generationHandler.takeGuard();
+ uint32_t key = rnd.lrand48() % (_keyLimit + 1);
+ MyTreeConstIterator itr = _tree.getFrozenView().lowerBound(key);
+ assert(!itr.valid() || itr.getKey() >= key);
+ }
+ _doneReadWork += i;
+ LOG(info, "done %u read work", i);
+}
+
+
+void
+Fixture::readWork()
+{
+ readWork(std::numeric_limits<uint32_t>::max());
+}
+
+
+void
+Fixture::writeWork(uint32_t cnt)
+{
+ search::Rand48 &rnd(_rnd);
+ for (uint32_t i = 0; i < cnt; ++i) {
+ uint32_t key = rnd.lrand48() % _keyLimit;
+ if ((rnd.lrand48() & 1) == 0) {
+ insert(key);
+ } else {
+ remove(key);
+ }
+ commit();
+ }
+ _doneWriteWork += cnt;
+ _stopRead = 1;
+ LOG(info, "done %u write work", cnt);
+}
+
+
+TEST_F("Test manual lower bound call", Fixture)
+{
+ f.insert(1);
+ f.remove(2);
+ f.insert(1);
+ f.insert(5);
+ f.insert(4);
+ f.remove(3);
+ f.remove(5);
+ f.commit();
+ auto itr = f._tree.getFrozenView().lowerBound(3);
+ EXPECT_TRUE(itr.valid());
+ EXPECT_EQUAL(4u, itr.getKey());
+}
+
+TEST_F("Test single threaded lower_bound reader without updates", Fixture)
+{
+ f._reportWork = true;
+ f.writeWork(10);
+ f._stopRead = 0;
+ f.readWork(10);
+}
+
+TEST_F("Test single threaded lower_bound reader during updates", Fixture)
+{
+ uint32_t cnt = 1000000;
+ f._reportWork = true;
+ f._writer.execute(makeLambdaTask([=]() { f.writeWork(cnt); }));
+ f._readers.execute(makeLambdaTask([=]() { f.readWork(); }));
+}
+
+TEST_F("Test multithreaded lower_bound reader during updates", Fixture)
+{
+ uint32_t cnt = 1000000;
+ f._reportWork = true;
+ f._writer.execute(makeLambdaTask([=]() { f.writeWork(cnt); }));
+ f._readers.execute(makeLambdaTask([=]() { f.readWork(); }));
+ f._readers.execute(makeLambdaTask([=]() { f.readWork(); }));
+ f._readers.execute(makeLambdaTask([=]() { f.readWork(); }));
+ f._readers.execute(makeLambdaTask([=]() { f.readWork(); }));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/apps/tests/memoryindexstress_test.cpp b/searchlib/src/apps/tests/memoryindexstress_test.cpp
new file mode 100644
index 00000000000..88aaae374b3
--- /dev/null
+++ b/searchlib/src/apps/tests/memoryindexstress_test.cpp
@@ -0,0 +1,537 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("memoryindexstress_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/searchlib/memoryindex/memoryindex.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/document/repo/documenttyperepo.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
+#include <vespa/searchlib/queryeval/fake_search.h>
+#include <vespa/searchlib/queryeval/fake_searchable.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/common/sequencedtaskexecutor.h>
+#include <vespa/searchlib/common/scheduletaskcallback.h>
+#include <vespa/vespalib/util/threadstackexecutor.h>
+#include <vespa/document/repo/configbuilder.h>
+#include <vespa/document/datatype/annotationtype.h>
+#include <vespa/document/annotation/annotation.h>
+#include <vespa/document/annotation/span.h>
+#include <vespa/document/annotation/spanlist.h>
+#include <vespa/document/annotation/spantree.h>
+#include <vespa/searchlib/util/rand48.h>
+
+using document::AnnotationType;
+using document::DataType;
+using document::Document;
+using document::DocumentId;
+using document::DocumentType;
+using document::DocumentTypeRepo;
+using document::FieldValue;
+using document::Span;
+using document::SpanList;
+using document::StringFieldValue;
+using search::query::Node;
+using search::query::SimplePhrase;
+using search::query::SimpleStringTerm;
+using search::makeLambdaTask;
+using search::ScheduleTaskCallback;
+using namespace search::fef;
+using namespace search::index;
+using namespace search::memoryindex;
+using namespace search::queryeval;
+using vespalib::asciistream;
+
+namespace
+{
+
+const vespalib::string SPANTREE_NAME("linguistics");
+const vespalib::string title("title");
+const vespalib::string body("body");
+const vespalib::string foo("foo");
+const vespalib::string bar("bar");
+const vespalib::string doc_type_name = "test";
+const vespalib::string header_name = doc_type_name + ".header";
+const vespalib::string body_name = doc_type_name + ".body";
+
+
+Schema
+makeSchema()
+{
+ Schema schema;
+ schema.addIndexField(Schema::IndexField(title, Schema::STRING));
+ schema.addIndexField(Schema::IndexField(body, Schema::STRING));
+ return schema;
+}
+
+document::DocumenttypesConfig
+makeDocTypeRepoConfig(void)
+{
+ const int32_t doc_type_id = 787121340;
+ document::config_builder::DocumenttypesConfigBuilderHelper builder;
+ builder.document(doc_type_id,
+ doc_type_name,
+ document::config_builder::Struct(header_name),
+ document::config_builder::Struct(body_name).
+ addField(title, DataType::T_STRING).
+ addField(body, DataType::T_STRING));
+ return builder.config();
+}
+
+
+bool isWordChar(char c) {
+ return ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z'));
+}
+
+
+void
+tokenizeStringFieldValue(const document::FixedTypeRepo & repo, StringFieldValue &field)
+{
+ document::SpanTree::UP spanTree; // Note: Not thread safe, is linkedptr
+ SpanList::UP spanList(std::make_unique<SpanList>());
+ SpanList *spans = spanList.get();
+ spanTree.reset(new document::SpanTree(SPANTREE_NAME, std::move(spanList)));
+ const vespalib::string &text = field.getValue();
+ uint32_t cur = 0;
+ int32_t start = 0;
+ bool inWord = false;
+ for (cur = 0; cur < text.size(); ++cur) {
+ char c = text[cur];
+ bool isWc = isWordChar(c);
+ if (!inWord && isWc) {
+ inWord = true;
+ start = cur;
+ } else if (inWord && !isWc) {
+ int32_t len = cur - start;
+ spanTree->annotate(spans->add(std::make_unique<Span>(start, len)),
+ *AnnotationType::TERM);
+ inWord = false;
+ }
+ }
+ if (inWord) {
+ int32_t len = cur - start;
+ spanTree->annotate(spans->add(std::make_unique<Span>(start, len)),
+ *AnnotationType::TERM);
+ }
+ if (spanTree->numAnnotations() > 0u) {
+ StringFieldValue::SpanTrees trees;
+ trees.emplace_back(std::move(spanTree));
+ field.setSpanTrees(trees, repo);
+ }
+}
+
+
+void
+setFieldValue(Document &doc, const vespalib::string &fieldName,
+ const vespalib::string &fieldString)
+{
+ std::unique_ptr<StringFieldValue> fieldValue =
+ std::make_unique<StringFieldValue>(fieldString);
+ document::FixedTypeRepo repo(*doc.getRepo(), doc.getType());
+ tokenizeStringFieldValue(repo, *fieldValue);
+ doc.setFieldValue(doc.getField(fieldName), std::move(fieldValue));
+}
+
+Document::UP
+makeDoc(const DocumentTypeRepo &repo, uint32_t i,
+ const vespalib::string &titleString,
+ const vespalib::string &bodyString = "")
+{
+ asciistream idstr;
+ idstr << "id:test:test:: " << i;
+ DocumentId id(idstr.str());
+ const DocumentType *docType = repo.getDocumentType(doc_type_name);
+ Document::UP doc(new Document(*docType, id));
+ doc->setRepo(repo);
+ if (!titleString.empty()) {
+ setFieldValue(*doc, title, titleString);
+ }
+ if (!bodyString.empty()) {
+ setFieldValue(*doc, body, bodyString);
+ }
+ ASSERT_TRUE(doc.get());
+#if 0
+ doc->print(std::cout, true, "");
+ std::cout << std::endl;
+#endif
+ return doc;
+}
+
+Document::UP
+makeDoc(const DocumentTypeRepo &repo, uint32_t i)
+{
+ asciistream titleStr;
+ asciistream bodyStr;
+ titleStr << i;
+ bodyStr << (i * 3);
+ return makeDoc(repo, i, titleStr.str(), bodyStr.str());
+}
+
+
+SimpleStringTerm makeTerm(const std::string &term) {
+ return SimpleStringTerm(term, "field", 0, search::query::Weight(0));
+}
+
+Node::UP makePhrase(const std::string &term1, const std::string &term2) {
+ SimplePhrase * phrase = new SimplePhrase("field", 0, search::query::Weight(0));
+ Node::UP node(phrase);
+ phrase->append(Node::UP(new SimpleStringTerm(makeTerm(term1))));
+ phrase->append(Node::UP(new SimpleStringTerm(makeTerm(term2))));
+ return node;
+}
+
+} // namespace
+
+
+
+struct Fixture {
+ Schema schema;
+ DocumentTypeRepo repo;
+ vespalib::ThreadStackExecutor _executor;
+ search::SequencedTaskExecutor _invertThreads;
+ search::SequencedTaskExecutor _pushThreads;
+ MemoryIndex index;
+ uint32_t _readThreads;
+ vespalib::ThreadStackExecutor _writer; // 1 write thread
+ vespalib::ThreadStackExecutor _readers; // multiple reader threads
+ search::Rand48 _rnd;
+ uint32_t _keyLimit;
+ std::atomic<long> _readSeed;
+ std::atomic<long> _doneWriteWork;
+ std::atomic<long> _doneReadWork;
+ std::atomic<long> _emptyCount;
+ std::atomic<long> _nonEmptyCount;
+ std::atomic<int> _stopRead;
+ bool _reportWork;
+
+ Fixture(uint32_t readThreads = 1);
+
+ ~Fixture();
+
+ void internalSyncCommit() {
+ vespalib::Gate gate;
+ index.commit(std::make_shared<ScheduleTaskCallback>
+ (_executor,
+ makeLambdaTask([&]() { gate.countDown(); })));
+ gate.await();
+ }
+ void put(uint32_t id, Document::UP doc) {
+ index.insertDocument(id, *doc);
+ }
+ void remove(uint32_t id) {
+ index.removeDocument(id);
+ }
+
+ void readWork(uint32_t cnt);
+ void readWork();
+ void writeWork(uint32_t cnt);
+ uint32_t getReadThreads() const { return _readThreads; }
+ void stressTest(uint32_t writeCnt);
+
+private:
+ Fixture(const Fixture &index) = delete;
+ Fixture(Fixture &&index) = delete;
+ Fixture &operator=(const Fixture &index) = delete;
+ Fixture &operator=(Fixture &&index) = delete;
+};
+
+
+Fixture::Fixture(uint32_t readThreads)
+ : schema(makeSchema()),
+ repo(makeDocTypeRepoConfig()),
+ _executor(1, 128 * 1024),
+ _invertThreads(2),
+ _pushThreads(2),
+ index(schema, _invertThreads, _pushThreads),
+ _readThreads(readThreads),
+ _writer(1, 128 * 1024),
+ _readers(readThreads, 128 * 1024),
+ _rnd(),
+ _keyLimit(1000000),
+ _readSeed(50),
+ _doneWriteWork(0),
+ _doneReadWork(0),
+ _emptyCount(0),
+ _nonEmptyCount(0),
+ _stopRead(0),
+ _reportWork(false)
+{
+ _rnd.srand48(32);
+}
+
+
+Fixture::~Fixture()
+{
+ _readers.sync();
+ _readers.shutdown();
+ _writer.sync();
+ _writer.shutdown();
+ if (_reportWork) {
+ LOG(info,
+ "readWork=%ld, writeWork=%ld, emptyCount=%ld, nonemptyCount=%ld",
+ _doneReadWork.load(), _doneWriteWork.load(),
+ _emptyCount.load(), _nonEmptyCount.load());
+ }
+}
+
+
+void
+Fixture::readWork(uint32_t cnt)
+{
+ search::Rand48 rnd;
+ rnd.srand48(++_readSeed);
+ uint32_t i;
+ uint32_t emptyCount = 0;
+ uint32_t nonEmptyCount = 0;
+ std::string fieldName = "title";
+
+ for (i = 0; i < cnt && _stopRead.load() == 0; ++i) {
+ uint32_t key = (rnd.lrand48() % (_keyLimit + 1)) + 1;
+
+ asciistream keyStr;
+ keyStr << key;
+
+ SimpleStringTerm term = makeTerm(keyStr.str());
+
+ uint32_t fieldId = 0;
+ FakeRequestContext requestContext;
+
+ MatchDataLayout mdl;
+ TermFieldHandle handle = mdl.allocTermField(fieldId);
+ MatchData::UP match_data = mdl.createMatchData();
+
+ FieldSpec field(fieldName, fieldId, handle);
+ FieldSpecList fields;
+ fields.add(field);
+ Blueprint::UP result = index.createBlueprint(requestContext,
+ fields, term);
+ if (!EXPECT_TRUE(result.get() != 0)) {
+ LOG(error, "Did not get blueprint");
+ break;
+ }
+ if (result->getState().estimate().empty) {
+ ++emptyCount;
+ } else {
+ ++nonEmptyCount;
+ }
+ result->fetchPostings(true);
+ SearchIterator::UP search = result->createSearch(*match_data, true);
+ if (!EXPECT_TRUE(search.get() != 0)) {
+ LOG(error, "Did not get search iterator");
+ break;
+ }
+ }
+ _doneReadWork += i;
+ _emptyCount += emptyCount;
+ _nonEmptyCount += nonEmptyCount;
+ LOG(info, "done %u read work", i);
+}
+
+
+void
+Fixture::readWork()
+{
+ readWork(std::numeric_limits<uint32_t>::max());
+}
+
+
+void
+Fixture::writeWork(uint32_t cnt)
+{
+ search::Rand48 &rnd(_rnd);
+ for (uint32_t i = 0; i < cnt; ++i) {
+ uint32_t key = rnd.lrand48() % _keyLimit;
+ if ((rnd.lrand48() & 1) == 0) {
+ put(key + 1, makeDoc(repo, key + 1));
+ } else {
+ remove(key + 1);
+ }
+ internalSyncCommit();
+ }
+ _doneWriteWork += cnt;
+ _stopRead = 1;
+ LOG(info, "done %u write work", cnt);
+}
+
+
+void
+Fixture::stressTest(uint32_t writeCnt)
+{
+ _reportWork = true;
+ uint32_t readThreads = getReadThreads();
+ LOG(info,
+ "starting stress test, 1 write thread, %u read threads, %u writes",
+ readThreads, writeCnt);
+ _writer.execute(makeLambdaTask([=]() { writeWork(writeCnt); }));
+ for (uint32_t i = 0; i < readThreads; ++i) {
+ _readers.execute(makeLambdaTask([=]() { readWork(); }));
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+
+std::string toString(SearchIterator & search)
+{
+ std::ostringstream oss;
+ bool first = true;
+ for (search.seek(1); ! search.isAtEnd(); search.seek(search.getDocId() + 1)) {
+ if (!first) oss << ",";
+ oss << search.getDocId();
+ first = false;
+ }
+ return oss.str();
+}
+
+//-----------------------------------------------------------------------------
+
+bool
+verifyResult(const FakeResult &expect,
+ Searchable &index,
+ std::string fieldName,
+ const Node &term)
+{
+ uint32_t fieldId = 0;
+ FakeRequestContext requestContext;
+
+ MatchDataLayout mdl;
+ TermFieldHandle handle = mdl.allocTermField(fieldId);
+ MatchData::UP match_data = mdl.createMatchData();
+
+ FieldSpec field(fieldName, fieldId, handle);
+ FieldSpecList fields;
+ fields.add(field);
+
+ Blueprint::UP result = index.createBlueprint(requestContext, fields, term);
+ if (!EXPECT_TRUE(result.get() != 0)) {
+ return false;
+ }
+ EXPECT_EQUAL(expect.inspect().size(), result->getState().estimate().estHits);
+ EXPECT_EQUAL(expect.inspect().empty(), result->getState().estimate().empty);
+
+ result->fetchPostings(true);
+ SearchIterator::UP search = result->createSearch(*match_data, true);
+ if (!EXPECT_TRUE(search.get() != 0)) {
+ return false;
+ }
+ TermFieldMatchData &tmd = *match_data->resolveTermField(handle);
+
+ FakeResult actual;
+ search->initFullRange();
+ for (search->seek(1); !search->isAtEnd(); search->seek(search->getDocId() + 1)) {
+ actual.doc(search->getDocId());
+ search->unpack(search->getDocId());
+ EXPECT_EQUAL(search->getDocId(), tmd.getDocId());
+ FieldPositionsIterator p = tmd.getIterator();
+ actual.len(p.getFieldLength());
+ for (; p.valid(); p.next()) {
+ actual.pos(p.getPosition());
+ }
+ }
+ return EXPECT_EQUAL(expect, actual);
+}
+
+// tests basic usage; index some documents in docid order and perform
+// some searches.
+TEST_F("testIndexAndSearch", Fixture)
+{
+ f.put(1, makeDoc(f.repo, 1, "foo bar foo", "foo foo foo"));
+ f.internalSyncCommit();
+ f.put(2, makeDoc(f.repo, 2, "bar foo", "bar bar bar bar"));
+ f.internalSyncCommit();
+
+ // search for "foo" in "title"
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(3).pos(0).pos(2)
+ .doc(2).len(2).pos(1),
+ f.index, title, makeTerm(foo)));
+
+ // search for "bar" in "title"
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(3).pos(1)
+ .doc(2).len(2).pos(0),
+ f.index, title, makeTerm(bar)));
+
+ // search for "foo" in "body"
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(3).pos(0).pos(1).pos(2),
+ f.index, body, makeTerm(foo)));
+
+ // search for "bar" in "body"
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(2).len(4).pos(0).pos(1).pos(2).pos(3),
+ f.index, body, makeTerm(bar)));
+
+ // search for "bogus" in "title"
+ EXPECT_TRUE(verifyResult(FakeResult(),
+ f.index, title, makeTerm("bogus")));
+
+ // search for "foo" in "bogus"
+ EXPECT_TRUE(verifyResult(FakeResult(),
+ f.index, "bogus", makeTerm(foo)));
+
+ // search for "bar foo" in "title"
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(3).pos(1)
+ .doc(2).len(2).pos(0),
+ f.index, title, *makePhrase(bar, foo)));
+
+}
+
+// tests index update behavior; remove/update and unordered docid
+// indexing.
+TEST_F("require that documents can be removed and updated", Fixture)
+{
+ // add unordered
+ f.put(3, makeDoc(f.repo, 3, "foo foo foo"));
+ f.internalSyncCommit();
+ f.put(1, makeDoc(f.repo, 1, "foo"));
+ f.internalSyncCommit();
+ f.put(2, makeDoc(f.repo, 2, "foo foo"));
+ f.internalSyncCommit();
+
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(1).pos(0)
+ .doc(2).len(2).pos(0).pos(1)
+ .doc(3).len(3).pos(0).pos(1).pos(2),
+ f.index, title, makeTerm(foo)));
+
+ // remove document
+ f.remove(2);
+ f.internalSyncCommit();
+
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(1).pos(0)
+ .doc(3).len(3).pos(0).pos(1).pos(2),
+ f.index, title, makeTerm(foo)));
+
+ // update document
+ f.put(1, makeDoc(f.repo, 1, "bar foo foo"));
+ f.internalSyncCommit();
+
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(3).pos(1).pos(2)
+ .doc(3).len(3).pos(0).pos(1).pos(2),
+ f.index, title, makeTerm(foo)));
+}
+
+
+TEST_F("stress test, 4 readers", Fixture(4))
+{
+ f.stressTest(1000000);
+}
+
+TEST_F("stress test, 128 readers", Fixture(128))
+{
+ f.stressTest(1000000);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/apps/uniform/.gitignore b/searchlib/src/apps/uniform/.gitignore
new file mode 100644
index 00000000000..ff18dbaa7fd
--- /dev/null
+++ b/searchlib/src/apps/uniform/.gitignore
@@ -0,0 +1,3 @@
+.depend
+Makefile
+uniform
diff --git a/searchlib/src/apps/uniform/CMakeLists.txt b/searchlib/src/apps/uniform/CMakeLists.txt
new file mode 100644
index 00000000000..9f9c2139f42
--- /dev/null
+++ b/searchlib/src/apps/uniform/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_uniform_app
+ SOURCES
+ uniform.cpp
+ OUTPUT_NAME uniform
+ INSTALL bin
+ DEPENDS
+ searchlib
+)
diff --git a/searchlib/src/apps/uniform/uniform.cpp b/searchlib/src/apps/uniform/uniform.cpp
new file mode 100644
index 00000000000..18bdcadbc20
--- /dev/null
+++ b/searchlib/src/apps/uniform/uniform.cpp
@@ -0,0 +1,153 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+
+
+static uint64_t
+maxExpGolombVal(uint64_t kValue, uint64_t maxBits)
+{
+ return static_cast<uint64_t>
+ ((UINT64_C(1) << ((maxBits + kValue + 1) / 2)) -
+ (UINT64_C(1) << kValue));
+}
+
+class UniformApp : public FastOS_Application
+{
+ typedef search::bitcompression::EncodeContext64BE EC64;
+
+ enum {
+ MAXK = 30
+ };
+
+ uint64_t _bits[MAXK + 1];
+ uint64_t _next;
+
+ static uint32_t
+ encodeSpace(uint64_t x, uint32_t k)
+ {
+ return EC64::encodeExpGolombSpace(x, k);
+ }
+
+ void
+ clearBits(void);
+
+ void
+ reportBits(void);
+
+ int
+ Main(void);
+};
+
+
+void
+UniformApp::clearBits(void)
+{
+ for (unsigned int k = 0; k <= MAXK; ++k)
+ _bits[k] = 0;
+ _next = 0;
+}
+
+
+void
+UniformApp::reportBits(void)
+{
+ printf("next=%" PRIu64 " ", _next);
+ for (unsigned int k = 0; k <= MAXK; ++k)
+ printf("b[%u]=%" PRIu64 " ",
+ static_cast<unsigned int>(k),
+ _bits[k]);
+ printf("\n");
+
+}
+
+
+
+int
+UniformApp::Main(void)
+{
+ int k, l, m, bestmask, oldbestmask;
+ printf("Hello world\n");
+ clearBits();
+ reportBits();
+
+ m = 0;
+ oldbestmask = 0;
+ for (;;) {
+ uint64_t minnext = 0;
+ int minnextk = 0;
+ int bestk = 0;
+ printf("_next=%" PRIu64 "\n", _next);
+ for (k = 0; k <= MAXK; ++k) {
+ uint32_t bits = encodeSpace(_next, k); // Current bits
+ uint64_t next = maxExpGolombVal(k, bits);
+ assert(encodeSpace(next - 1, k) == bits);
+ assert(encodeSpace(next, k) > bits);
+ if (k == 0 || next < minnext) {
+ minnext = next;
+ minnextk = k;
+ }
+ if (_bits[k] < _bits[bestk])
+ bestk = k;
+ printf("k=%d, bits=%d, next=%" PRIu64 "\n", k, bits, next);
+ }
+ printf("minnext=%" PRIu64 ", minnextk=%d, bestk=%d\n",
+ minnext, minnextk, bestk);
+ for (k = 0; k <= MAXK; ++k) {
+ uint32_t kbits = encodeSpace(_next, k); // Current bits
+ l = bestk;
+ uint32_t lbits = encodeSpace(_next, l); // Current bits
+ if (_bits[k] > _bits[l] && kbits < lbits) {
+ uint32_t dbits = lbits - kbits;
+ uint64_t dsbits = _bits[k] - _bits[l];
+ uint64_t delt = (dsbits + dbits - 1) / dbits;
+ if (minnext >= _next + delt) {
+ minnext = _next + delt;
+ bestk = k;
+ }
+ } else if (_bits[k] == _bits[l] && kbits < lbits) {
+ minnext = _next + 1;
+ bestk = k;
+ }
+ }
+ printf("minnext=%" PRIu64 ", minnextk=%d, bestk=%d\n",
+ minnext, minnextk, bestk);
+ for (k = 0; k <= MAXK; ++k) {
+ assert(encodeSpace(_next, k) == encodeSpace(minnext - 1, k));
+ _bits[k] += (minnext - _next) * encodeSpace(_next, k);
+ }
+ _next = minnext;
+ bestmask = 0;
+ uint32_t smallk = 0;
+ for (k = 0; k <= MAXK; ++k) {
+ if (_bits[k] < _bits[smallk])
+ smallk = k;
+ }
+ for (k = 0; k <= MAXK; ++k)
+ if (_bits[k] <= _bits[smallk])
+ bestmask |= (1 << k);
+ if (bestmask == oldbestmask && _next < (UINT64_C(1) << 30))
+ continue;
+ reportBits();
+ printf("Best k for interval [0..%" PRIu64 ") is", _next);
+ for (k = 0; k <= MAXK; ++k)
+ if (_bits[k] <= _bits[smallk])
+ printf(" %d", k);
+ printf("\n");
+ oldbestmask = bestmask;
+ if (_next >= (UINT64_C(1) << 30))
+ break;
+ printf("m iter=%d\n", m);
+ ++m;
+ if (m >= 10000) {
+ printf("m breakout\n");
+ break;
+ }
+ }
+
+ return 0;
+}
+
+FASTOS_MAIN(UniformApp);
+
+
diff --git a/searchlib/src/apps/vespa-index-inspect/.gitignore b/searchlib/src/apps/vespa-index-inspect/.gitignore
new file mode 100644
index 00000000000..4d5ccbbcb89
--- /dev/null
+++ b/searchlib/src/apps/vespa-index-inspect/.gitignore
@@ -0,0 +1,3 @@
+/.depend
+/Makefile
+/vespa-index-inspect
diff --git a/searchlib/src/apps/vespa-index-inspect/CMakeLists.txt b/searchlib/src/apps/vespa-index-inspect/CMakeLists.txt
new file mode 100644
index 00000000000..c68aa6b1a6a
--- /dev/null
+++ b/searchlib/src/apps/vespa-index-inspect/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_vespa-index-inspect_app
+ SOURCES
+ vespa-index-inspect.cpp
+ OUTPUT_NAME vespa-index-inspect
+ INSTALL bin
+ DEPENDS
+ searchlib
+)
diff --git a/searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp b/searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp
new file mode 100644
index 00000000000..b1cf96f81ef
--- /dev/null
+++ b/searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp
@@ -0,0 +1,965 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("vespa-index-inspect");
+#include <vespa/searchlib/index/dictionaryfile.h>
+#include <vespa/searchlib/index/postinglistfile.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/diskindex/pagedict4randread.h>
+#include <vespa/searchlib/diskindex/pagedict4file.h>
+#include <vespa/searchlib/diskindex/zcposoccrandread.h>
+#include <vespa/searchlib/diskindex/docidmapper.h>
+#include <vespa/searchlib/diskindex/wordnummapper.h>
+#include <vespa/searchlib/diskindex/fieldreader.h>
+#include <vespa/searchlib/index/schemautil.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <iostream>
+
+using search::index::Schema;
+using search::index::SchemaUtil;
+using search::index::DictionaryFileRandRead;
+using search::index::PostingListFileRandRead;
+using search::index::PostingListOffsetAndCounts;
+using search::index::PostingListCounts;
+using search::index::PostingListHandle;
+using search::diskindex::PageDict4RandRead;
+using search::diskindex::Zc4PosOccRandRead;
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataArray;
+using search::fef::FieldPositionsIterator;
+using search::queryeval::SearchIterator;
+using search::index::DocIdAndFeatures;
+using search::diskindex::DocIdMapping;
+using search::diskindex::WordNumMapping;
+using search::diskindex::FieldReader;
+using search::diskindex::PageDict4FileSeqRead;
+using search::TuneFileSeqRead;
+
+namespace
+{
+
+/**
+ * Fine granularity, for small scale inversion within a single document.
+ */
+class PosEntry
+{
+public:
+ uint32_t _docId;
+ uint32_t _fieldId;
+ uint64_t _wordNum;
+ uint32_t _elementId;
+ uint32_t _wordPos;
+ uint32_t _elementLen;
+ int32_t _elementWeight;
+
+ PosEntry(uint32_t docId,
+ uint32_t fieldId,
+ uint32_t elementId, uint32_t wordPos,
+ uint64_t wordNum,
+ uint32_t elementLen, int32_t elementWeight)
+ : _docId(docId),
+ _fieldId(fieldId),
+ _wordNum(wordNum),
+ _elementId(elementId),
+ _wordPos(wordPos),
+ _elementLen(elementLen),
+ _elementWeight(elementWeight)
+ {
+ }
+
+ bool
+ operator<(const PosEntry &rhs) const
+ {
+ if (_docId != rhs._docId)
+ return _docId < rhs._docId;
+ if (_fieldId != rhs._fieldId)
+ return _fieldId < rhs._fieldId;
+ if (_elementId != rhs._elementId)
+ return _elementId < rhs._elementId;
+ if (_wordPos != rhs._wordPos)
+ return _wordPos < rhs._wordPos;
+ return _wordNum < rhs._wordNum;
+ }
+};
+
+
+void
+unpackFeatures(std::vector<PosEntry> &entries,
+ uint32_t fieldId,
+ uint64_t wordNum,
+ const DocIdAndFeatures &features)
+{
+ std::vector<search::index::WordDocElementFeatures>::const_iterator
+ element = features._elements.begin();
+ std::vector<search::index::WordDocElementWordPosFeatures>::
+ const_iterator position = features._wordPositions.begin();
+ uint32_t numElements = features._elements.size();
+ while (numElements--) {
+ uint32_t numOccs = element->getNumOccs();
+ while (numOccs--) {
+ entries.push_back(PosEntry(features._docId,
+ fieldId,
+ element->getElementId(),
+ position->getWordPos(),
+ wordNum,
+ element->getElementLen(),
+ element->getWeight()));
+ ++position;
+ }
+ ++element;
+ }
+}
+
+
+void
+usageHeader(void)
+{
+ using std::cerr;
+ cerr <<
+ "vespa-index-inspect version 0.0\n"
+ "\n"
+ "USAGE:\n";
+}
+
+
+class FieldOptions
+{
+public:
+ std::vector<vespalib::string> _fields;
+ std::vector<uint32_t> _ids;
+
+ FieldOptions()
+ : _fields(),
+ _ids()
+ {
+ }
+
+ void
+ addField(const vespalib::string &field)
+ {
+ _fields.push_back(field);
+ }
+
+ bool
+ empty(void) const
+ {
+ return _ids.empty();
+ }
+
+ void
+ validateFields(const Schema &schema);
+};
+
+
+void
+FieldOptions::validateFields(const Schema &schema)
+{
+ for (std::vector<vespalib::string>::const_iterator
+ i = _fields.begin(), ie = _fields.end();
+ i != ie; ++i) {
+ uint32_t fieldId = schema.getIndexFieldId(*i);
+ if (fieldId == Schema::UNKNOWN_FIELD_ID) {
+ LOG(error,
+ "No such field: %s",
+ i->c_str());
+ exit(1);
+ }
+ _ids.push_back(fieldId);
+ }
+}
+
+
+}
+
+class SubApp
+{
+protected:
+ FastOS_Application &_app;
+
+public:
+ SubApp(FastOS_Application &app)
+ : _app(app)
+ {
+ }
+
+ virtual
+ ~SubApp(void)
+ {
+ }
+
+ virtual void
+ usage(bool showHeader) = 0;
+
+ virtual bool
+ getOptions(void) = 0;
+
+ virtual int
+ run(void) = 0;
+};
+
+
+class ShowPostingListSubApp : public SubApp
+{
+ vespalib::string _indexDir;
+ FieldOptions _fieldOptions;
+ vespalib::string _word;
+ bool _verbose;
+ bool _readmmap;
+ bool _directio;
+ bool _transpose;
+ int _optIndex;
+ DocIdMapping _dm;
+ std::vector<WordNumMapping> _wmv;
+ std::vector<std::vector<std::string>> _wordsv;
+ uint32_t _docIdLimit;
+ uint32_t _minDocId;
+
+ static uint64_t
+ noWordNumHigh(void)
+ {
+ return std::numeric_limits<uint64_t>::max();
+ }
+
+ static uint64_t
+ noWordNum(void)
+ {
+ return 0u;
+ }
+public:
+
+ ShowPostingListSubApp(FastOS_Application &app);
+
+ virtual
+ ~ShowPostingListSubApp(void);
+
+ virtual void
+ usage(bool showHeader);
+
+ virtual bool
+ getOptions(void);
+
+ virtual int
+ run(void);
+
+ void
+ showPostingList(void);
+
+ bool
+ readDocIdLimit(const Schema &schema);
+
+ bool
+ readWordList(const SchemaUtil::IndexIterator &index);
+
+ bool
+ readWordList(const Schema &schema);
+
+ void
+ readPostings(const SchemaUtil::IndexIterator &index,
+ std::vector<PosEntry> &entries);
+
+ void
+ showTransposedPostingList();
+};
+
+
+ShowPostingListSubApp::ShowPostingListSubApp(FastOS_Application &app)
+ : SubApp(app),
+ _indexDir("."),
+ _fieldOptions(),
+ _word(),
+ _verbose(false),
+ _readmmap(false),
+ _directio(false),
+ _transpose(false),
+ _optIndex(1),
+ _dm(),
+ _wmv(),
+ _wordsv(),
+ _docIdLimit(std::numeric_limits<uint32_t>::max()),
+ _minDocId(0u)
+{
+}
+
+
+ShowPostingListSubApp::~ShowPostingListSubApp(void)
+{
+}
+
+
+void
+ShowPostingListSubApp::usage(bool showHeader)
+{
+ using std::cerr;
+ if (showHeader)
+ usageHeader();
+ cerr <<
+ "vespa-index-inspect showpostings [--indexdir indexDir]\n"
+ " --field field\n"
+ " word\n"
+ "\n"
+ "vespa-index-inspect showpostings [--indexdir indexDir]\n"
+ " [--field field]\n"
+ " --transpose\n"
+ " [--docidlimit docIdLimit] [--mindocid mindocid]\n"
+ "\n";
+}
+
+
+bool
+ShowPostingListSubApp::getOptions(void)
+{
+ int c;
+ const char *optArgument = NULL;
+ int longopt_index = 0;
+ static struct option longopts[] = {
+ { "indexdir", 1, NULL, 0 },
+ { "field", 1, NULL, 0 },
+ { "transpose", 0, NULL, 0 },
+ { "docidlimit", 1, NULL, 0 },
+ { "mindocid", 1, NULL, 0 },
+ { NULL, 0, NULL, 0 }
+ };
+ enum longopts_enum {
+ LONGOPT_INDEXDIR,
+ LONGOPT_FIELD,
+ LONGOPT_TRANSPOSE,
+ LONGOPT_DOCIDLIMIT,
+ LONGOPT_MINDOCID
+ };
+ int optIndex = 2;
+ while ((c = _app.GetOptLong("di:mv",
+ optArgument,
+ optIndex,
+ longopts,
+ &longopt_index)) != -1) {
+ switch (c) {
+ case 0:
+ switch (longopt_index) {
+ case LONGOPT_INDEXDIR:
+ _indexDir = optArgument;
+ break;
+ case LONGOPT_FIELD:
+ _fieldOptions.addField(optArgument);
+ break;
+ case LONGOPT_TRANSPOSE:
+ _transpose = true;
+ break;
+ case LONGOPT_DOCIDLIMIT:
+ _docIdLimit = atoi(optArgument);
+ break;
+ case LONGOPT_MINDOCID:
+ _minDocId = atoi(optArgument);
+ break;
+ default:
+ if (optArgument != NULL) {
+ LOG(error,
+ "longopt %s with arg %s",
+ longopts[longopt_index].name, optArgument);
+ } else {
+ LOG(error,
+ "longopt %s",
+ longopts[longopt_index].name);
+ }
+ }
+ break;
+ case 'd':
+ _directio = true;
+ break;
+ case 'i':
+ _indexDir = optArgument;
+ break;
+ case 'm':
+ _readmmap = true;
+ break;
+ case 'v':
+ _verbose = true;
+ break;
+ default:
+ return false;
+ }
+ }
+ if (_transpose) {
+ } else {
+ if (_fieldOptions._fields.empty())
+ return false;
+ if (_fieldOptions._fields.size() > 1)
+ return false;
+ }
+ _optIndex = optIndex;
+ if (_transpose) {
+ } else {
+ if (_optIndex >= _app._argc) {
+ return false;
+ }
+ _word = _app._argv[optIndex];
+ }
+ return true;
+}
+
+
+bool
+ShowPostingListSubApp::readDocIdLimit(const Schema &schema)
+{
+ TuneFileSeqRead tuneFileRead;
+ if (_dm.readDocIdLimit(_indexDir))
+ return true;
+ uint32_t numIndexFields = schema.getNumIndexFields();
+ for (uint32_t fieldId = 0; fieldId < numIndexFields; ++fieldId) {
+ const Schema::IndexField &field = schema.getIndexField(fieldId);
+ if (field.getDataType() == Schema::STRING) {
+ FieldReader fr;
+ if (!fr.open(_indexDir + "/" + field.getName() + "/",
+ tuneFileRead))
+ continue;
+ _dm.setup(fr.getDocIdLimit());
+ return true;
+ }
+ }
+ return false;
+}
+
+
+bool
+ShowPostingListSubApp::readWordList(const SchemaUtil::IndexIterator &index)
+{
+ std::vector<std::string> &words = _wordsv[index.getIndex()];
+ WordNumMapping &wm = _wmv[index.getIndex()];
+
+ search::TuneFileSeqRead tuneFileRead;
+ PageDict4FileSeqRead wr;
+ vespalib::string fieldDir = _indexDir + "/" + index.getName();
+ if (!wr.open(fieldDir + "/dictionary", tuneFileRead))
+ return false;
+ vespalib::string word;
+ PostingListCounts counts;
+ uint64_t wordNum = noWordNum();
+ wr.readWord(word, wordNum, counts);
+ words.push_back(""); // Word number 0 is special here.
+ while (wordNum != noWordNumHigh()) {
+ assert(wordNum == words.size());
+ words.push_back(word);
+ wr.readWord(word, wordNum, counts);
+ }
+ wm.setup(words.size() - 1);
+ if (!wr.close())
+ return false;
+ return true;
+}
+
+bool
+ShowPostingListSubApp::readWordList(const Schema &schema)
+{
+ _wordsv.clear();
+ _wmv.clear();
+ uint32_t numFields = schema.getNumIndexFields();
+ _wordsv.resize(numFields);
+ _wmv.resize(numFields);
+
+ if (!_fieldOptions.empty()) {
+ for (std::vector<uint32_t>::const_iterator
+ i = _fieldOptions._ids.begin(), ie = _fieldOptions._ids.end();
+ i != ie; ++i) {
+ SchemaUtil::IndexIterator index(schema, *i);
+ if (!readWordList(index))
+ return false;
+ }
+ } else {
+ SchemaUtil::IndexIterator index(schema);
+ while (index.isValid()) {
+ if (!readWordList(index))
+ return false;
+ ++index;
+ }
+ }
+ return true;
+}
+
+
+void
+ShowPostingListSubApp::readPostings(const SchemaUtil::IndexIterator &index,
+ std::vector<PosEntry> &entries)
+{
+ FieldReader r;
+ std::unique_ptr<PostingListFileRandRead> postingfile(new Zc4PosOccRandRead);
+ vespalib::string mangledName = _indexDir + "/" + index.getName() +
+ "/";
+ search::TuneFileSeqRead tuneFileRead;
+ r.setup(_wmv[index.getIndex()], _dm);
+ if (!r.open(mangledName, tuneFileRead))
+ return;
+ if (r.isValid())
+ r.read();
+ while (r.isValid()) {
+ uint32_t docId = r._docIdAndFeatures._docId;
+ if (docId >= _minDocId && docId < _docIdLimit) {
+ unpackFeatures(entries, index.getIndex(),
+ r._wordNum, r._docIdAndFeatures);
+ }
+ r.read();
+ }
+ if (!r.close())
+ abort();
+}
+
+
+void
+ShowPostingListSubApp::showTransposedPostingList(void)
+{
+ Schema schema;
+ std::string schemaName = _indexDir + "/schema.txt";
+ if (!schema.loadFromFile(schemaName)) {
+ LOG(error,
+ "Could not load schema from %s", schemaName.c_str());
+ exit(1);
+ }
+ _fieldOptions.validateFields(schema);
+ if (!readDocIdLimit(schema))
+ return;
+ if (!readWordList(schema))
+ return;
+ std::vector<PosEntry> entries;
+ if (!_fieldOptions.empty()) {
+ for (std::vector<uint32_t>::const_iterator
+ i = _fieldOptions._ids.begin(), ie = _fieldOptions._ids.end();
+ i != ie; ++i) {
+ SchemaUtil::IndexIterator index(schema, *i);
+ readPostings(index, entries);
+ }
+ } else {
+ SchemaUtil::IndexIterator index(schema);
+ while (index.isValid()) {
+ readPostings(index, entries);
+ ++index;
+ }
+ }
+ std::sort(entries.begin(), entries.end());
+ uint32_t prevDocId = static_cast<uint32_t>(-1);
+ uint32_t prevFieldId = static_cast<uint32_t>(-1);
+ uint32_t prevElemId = static_cast<uint32_t>(-1);
+ uint32_t prevElementLen = 0;
+ int32_t prevElementWeight = 0;
+ for (std::vector<PosEntry>::const_iterator
+ i = entries.begin(), ie = entries.end(); i != ie; ++i) {
+ if (i->_docId != prevDocId) {
+ std::cout << "docId = " << i->_docId << '\n';
+ prevDocId = i->_docId;
+ prevFieldId = static_cast<uint32_t>(-1);
+ }
+ if (i->_fieldId != prevFieldId) {
+ std::cout << " field = " << i->_fieldId <<
+ " \"" << schema.getIndexField(i->_fieldId).getName() <<
+ "\"\n";
+ prevFieldId = i->_fieldId;
+ prevElemId = static_cast<uint32_t>(-1);
+ }
+ if (i->_elementId != prevElemId ||
+ i->_elementLen != prevElementLen ||
+ i->_elementWeight != prevElementWeight) {
+ std::cout << " element = " << i->_elementId <<
+ ", elementLen = " << i->_elementLen <<
+ ", elementWeight = " << i->_elementWeight <<
+ '\n';
+ prevElemId = i->_elementId;
+ prevElementLen = i->_elementLen;
+ prevElementWeight = i->_elementWeight;
+ }
+ assert(i->_wordNum != 0);
+ assert(i->_wordNum < _wordsv[i->_fieldId].size());
+ std::cout << " pos = " << i->_wordPos <<
+ ", word = \"" << _wordsv[i->_fieldId][i->_wordNum] << "\"";
+ std::cout << '\n';
+ }
+}
+
+
+void
+ShowPostingListSubApp::showPostingList(void)
+{
+ Schema schema;
+ uint32_t numFields = 1;
+ std::string schemaName = _indexDir + "/schema.txt";
+ std::vector<vespalib::string> fieldNames;
+ vespalib::string shortName;
+ if (!schema.loadFromFile(schemaName)) {
+ LOG(error,
+ "Could not load schema from %s", schemaName.c_str());
+ exit(1);
+ }
+ _fieldOptions.validateFields(schema);
+ if (_fieldOptions._ids.size() != 1) {
+ LOG(error,
+ "Wrong number of field arguments: %d",
+ static_cast<int>(_fieldOptions._ids.size()));
+ exit(1);
+ }
+ SchemaUtil::IndexIterator it(schema, _fieldOptions._ids.front());
+
+ shortName = it.getName();
+ fieldNames.push_back(it.getName());
+ std::unique_ptr<DictionaryFileRandRead> dict(new PageDict4RandRead);
+ std::string dictName = _indexDir + "/" + shortName + "/dictionary";
+ search::TuneFileRandRead tuneFileRead;
+ if (_directio)
+ tuneFileRead.setWantDirectIO();
+ if (_readmmap)
+ tuneFileRead.setWantMemoryMap();
+ if (!dict->open(dictName, tuneFileRead)) {
+ LOG(error,
+ "Could not open dictionary %s",
+ dictName.c_str());
+ exit(1);
+ }
+ std::unique_ptr<PostingListFileRandRead> postingfile(new Zc4PosOccRandRead);
+ std::string mangledName = _indexDir + "/" + shortName +
+ "/posocc.dat.compressed";
+ if (!postingfile->open(mangledName, tuneFileRead)) {
+ LOG(error,
+ "Could not open posting list file %s",
+ mangledName.c_str());
+ exit(1);
+ }
+ PostingListOffsetAndCounts offsetAndCounts;
+ uint64_t wordNum = 0;
+ bool res = dict->lookup(_word, wordNum, offsetAndCounts);
+ if (!res) {
+ LOG(warning, "Unknown word %s", _word.c_str());
+ exit(1);
+ }
+ if (_verbose) {
+ LOG(info,
+ "bitOffset %" PRId64 ", bitLen=%" PRId64 ", numDocs=%" PRId64,
+ offsetAndCounts._offset,
+ offsetAndCounts._counts._bitLength,
+ offsetAndCounts._counts._numDocs);
+ }
+ typedef PostingListCounts Counts;
+ typedef PostingListHandle Handle;
+ typedef std::pair<Counts, Handle> CH;
+ typedef std::unique_ptr<CH> CHAP;
+ CHAP handle(new CH);
+ handle->first = offsetAndCounts._counts;
+ handle->second._bitOffset = offsetAndCounts._offset;
+ handle->second._bitLength = handle->first._bitLength;
+ const uint32_t first_segment = 0;
+ const uint32_t num_segments = 0; // means all segments
+ handle->second._file = postingfile.get();
+ handle->second._file->readPostingList(handle->first,
+ first_segment,
+ num_segments,
+ handle->second);
+ std::vector<TermFieldMatchData> tfmdv(numFields);
+ TermFieldMatchDataArray tfmda;
+ for (std::vector<TermFieldMatchData>::iterator
+ tfit = tfmdv.begin(), tfite = tfmdv.end();
+ tfit != tfite; ++tfit) {
+ tfmda.add(&*tfit);
+ }
+ std::unique_ptr<SearchIterator> sb(handle->second.createIterator(
+ handle->first, tfmda));
+ sb->initFullRange();
+ uint32_t docId = 0;
+ bool first = true;
+ for (;;) {
+ if (sb->seek(docId)) {
+ first = false;
+ std::cout << "docId = " << docId << '\n';
+ sb->unpack(docId);
+ for (uint32_t field = 0; field < numFields; ++field) {
+ const TermFieldMatchData &md = *tfmda[field];
+ if (md.getDocId() != docId)
+ continue;
+ std::cout << " field = " << fieldNames[field] << '\n';
+ FieldPositionsIterator fpi = md.getIterator();
+ uint32_t lastElement = static_cast<uint32_t>(-1);
+ while (fpi.valid()) {
+ if (fpi.getElementId() != lastElement) {
+ std::cout << " element = " << fpi.getElementId() <<
+ ", elementLen = " << fpi.getElementLen() <<
+ ", elementWeight = " << fpi.getElementWeight() <<
+ '\n';
+ lastElement = fpi.getElementId();
+ }
+ std::cout << " pos = " << fpi.getPosition() << '\n';
+ fpi.next();
+ }
+ }
+ ++docId;
+ } else {
+ docId = sb->getDocId();
+ if (sb->isAtEnd())
+ break;
+ }
+ }
+ if (first) {
+ std::cout << "No hits\n";
+ }
+
+ if (!postingfile->close()) {
+ LOG(error,
+ "Could not close posting list file %s",
+ mangledName.c_str());
+ exit(1);
+ }
+ if (!dict->close()) {
+ LOG(error,
+ "Could not close dictionary %s", dictName.c_str());
+ exit(1);
+ }
+}
+
+
+int
+ShowPostingListSubApp::run(void)
+{
+ if (_transpose)
+ showTransposedPostingList();
+ else
+ showPostingList();
+ return 0;
+}
+
+
+class DumpWordsSubApp : public SubApp
+{
+ std::string _indexDir;
+ FieldOptions _fieldOptions;
+ uint64_t _minNumDocs;
+ bool _verbose;
+ bool _all;
+ bool _showWordNum;
+
+public:
+ DumpWordsSubApp(FastOS_Application &app);
+
+ virtual
+ ~DumpWordsSubApp(void);
+
+ virtual void
+ usage(bool showHeader);
+
+ virtual bool
+ getOptions(void);
+
+ virtual int
+ run(void);
+
+ void
+ dumpWords(void);
+};
+
+
+DumpWordsSubApp::DumpWordsSubApp(FastOS_Application &app)
+ : SubApp(app),
+ _indexDir("."),
+ _fieldOptions(),
+ _minNumDocs(0u),
+ _verbose(false),
+ _showWordNum(false)
+{
+}
+
+
+DumpWordsSubApp::~DumpWordsSubApp(void)
+{
+}
+
+
+void
+DumpWordsSubApp::usage(bool showHeader)
+{
+ using std::cerr;
+ if (showHeader)
+ usageHeader();
+ cerr <<
+ "vespa-index-inspect dumpwords [--indexdir indexDir]\n"
+ " --field field\n"
+ " [--minnumdocs minnumdocs] [--verbose] [--wordnum]\n"
+ "\n";
+}
+
+
+bool
+DumpWordsSubApp::getOptions(void)
+{
+ int c;
+ const char *optArgument = NULL;
+ int longopt_index = 0;
+ static struct option longopts[] = {
+ { "indexdir", 1, NULL, 0 },
+ { "field", 1, NULL, 0 },
+ { "minnumdocs", 1, NULL, 0 },
+ { "verbose", 0, NULL, 0 },
+ { "wordnum", 0, NULL, 0 },
+ { NULL, 0, NULL, 0 }
+ };
+ enum longopts_enum {
+ LONGOPT_INDEXDIR,
+ LONGOPT_FIELD,
+ LONGOPT_MINNUMDOCS,
+ LONGOPT_VERBOSE,
+ LONGOPT_WORDNUM
+ };
+ int optIndex = 2;
+ while ((c = _app.GetOptLong("i:",
+ optArgument,
+ optIndex,
+ longopts,
+ &longopt_index)) != -1) {
+ switch (c) {
+ case 0:
+ switch (longopt_index) {
+ case LONGOPT_INDEXDIR:
+ _indexDir = optArgument;
+ break;
+ case LONGOPT_FIELD:
+ _fieldOptions.addField(optArgument);
+ break;
+ case LONGOPT_MINNUMDOCS:
+ _minNumDocs = atol(optArgument);
+ break;
+ case LONGOPT_VERBOSE:
+ _verbose = true;
+ break;
+ case LONGOPT_WORDNUM:
+ _showWordNum = true;
+ break;
+ default:
+ if (optArgument != NULL) {
+ LOG(error,
+ "longopt %s with arg %s",
+ longopts[longopt_index].name, optArgument);
+ } else {
+ LOG(error,
+ "longopt %s",
+ longopts[longopt_index].name);
+ }
+ }
+ break;
+ case 'i':
+ _indexDir = optArgument;
+ break;
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
+
+void
+DumpWordsSubApp::dumpWords(void)
+{
+ search::index::Schema schema;
+ std::string schemaName = _indexDir + "/schema.txt";
+ if (!schema.loadFromFile(schemaName)) {
+ LOG(error,
+ "Could not load schema from %s", schemaName.c_str());
+ exit(1);
+ }
+ _fieldOptions.validateFields(schema);
+ if (_fieldOptions._ids.size() != 1) {
+ LOG(error,
+ "Wrong number of field arguments: %d",
+ static_cast<int>(_fieldOptions._ids.size()));
+ exit(1);
+ }
+
+ SchemaUtil::IndexIterator index(schema, _fieldOptions._ids[0]);
+ vespalib::string fieldDir = _indexDir + "/" + index.getName();
+ PageDict4FileSeqRead wordList;
+ std::string wordListName = fieldDir + "/dictionary";
+ search::TuneFileSeqRead tuneFileRead;
+ if (!wordList.open(wordListName, tuneFileRead)) {
+ LOG(error,
+ "Could not open wordlist %s", wordListName.c_str());
+ exit(1);
+ }
+ uint64_t wordNum = 0;
+ vespalib::string word;
+ PostingListCounts counts;
+ for (;;) {
+ wordList.readWord(word, wordNum, counts);
+ if (wordNum == wordList.noWordNumHigh())
+ break;
+ if (counts._numDocs < _minNumDocs)
+ continue;
+ if (_showWordNum) {
+ std::cout << wordNum << '\t';
+ }
+ std::cout << word << '\t' << counts._numDocs;
+ if (_verbose) {
+ std::cout << '\t' << counts._bitLength;
+ }
+ std::cout << '\n';
+ }
+ if (!wordList.close()) {
+ LOG(error,
+ "Could not close wordlist %s", wordListName.c_str());
+ exit(1);
+ }
+}
+
+
+int
+DumpWordsSubApp::run(void)
+{
+ dumpWords();
+ return 0;
+}
+
+
+class VespaIndexInspectApp : public FastOS_Application
+{
+public:
+ VespaIndexInspectApp(void);
+
+ void
+ usage(void);
+
+ int
+ Main(void);
+};
+
+
+VespaIndexInspectApp::VespaIndexInspectApp(void)
+ : FastOS_Application()
+{
+}
+
+
+void
+VespaIndexInspectApp::usage(void)
+{
+ ShowPostingListSubApp(*this).usage(true);
+ DumpWordsSubApp(*this).usage(false);
+}
+
+
+int
+VespaIndexInspectApp::Main(void)
+{
+ if (_argc < 2) {
+ usage();
+ return 1;
+ }
+ std::unique_ptr<SubApp> subApp;
+ if (strcmp(_argv[1], "showpostings") == 0)
+ subApp.reset(new ShowPostingListSubApp(*this));
+ else if (strcmp(_argv[1], "dumpwords") == 0)
+ subApp.reset(new DumpWordsSubApp(*this));
+ if (subApp.get() != NULL) {
+ if (!subApp->getOptions()) {
+ subApp->usage(true);
+ return 1;
+ }
+ return subApp->run();
+ }
+ usage();
+ return 1;
+}
+
+FASTOS_MAIN(VespaIndexInspectApp);
diff --git a/searchlib/src/apps/vespa-ranking-expression-analyzer/.gitignore b/searchlib/src/apps/vespa-ranking-expression-analyzer/.gitignore
new file mode 100644
index 00000000000..5c3dba7e243
--- /dev/null
+++ b/searchlib/src/apps/vespa-ranking-expression-analyzer/.gitignore
@@ -0,0 +1,3 @@
+/.depend
+/Makefile
+/vespa-ranking-expression-analyzer
diff --git a/searchlib/src/apps/vespa-ranking-expression-analyzer/CMakeLists.txt b/searchlib/src/apps/vespa-ranking-expression-analyzer/CMakeLists.txt
new file mode 100644
index 00000000000..6d1b7f55980
--- /dev/null
+++ b/searchlib/src/apps/vespa-ranking-expression-analyzer/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_vespa-ranking-expression-analyzer_app
+ SOURCES
+ vespa-ranking-expression-analyzer.cpp
+ OUTPUT_NAME vespa-ranking-expression-analyzer
+ INSTALL bin
+ DEPENDS
+ searchlib
+)
diff --git a/searchlib/src/apps/vespa-ranking-expression-analyzer/illegal.expression b/searchlib/src/apps/vespa-ranking-expression-analyzer/illegal.expression
new file mode 100644
index 00000000000..87c9e959d1f
--- /dev/null
+++ b/searchlib/src/apps/vespa-ranking-expression-analyzer/illegal.expression
@@ -0,0 +1 @@
+a # b \ No newline at end of file
diff --git a/searchlib/src/apps/vespa-ranking-expression-analyzer/vespa-ranking-expression-analyzer.cpp b/searchlib/src/apps/vespa-ranking-expression-analyzer/vespa-ranking-expression-analyzer.cpp
new file mode 100644
index 00000000000..e64fb406bb5
--- /dev/null
+++ b/searchlib/src/apps/vespa-ranking-expression-analyzer/vespa-ranking-expression-analyzer.cpp
@@ -0,0 +1,386 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+
+#include <map>
+#include <vespa/searchlib/features/rankingexpression/feature_name_extractor.h>
+#include <vector>
+#include <vespa/vespalib/eval/compiled_function.h>
+#include <vespa/vespalib/eval/function.h>
+#include <vespa/vespalib/eval/interpreted_function.h>
+#include <vespa/vespalib/eval/basic_nodes.h>
+#include <vespa/vespalib/eval/call_nodes.h>
+#include <vespa/vespalib/eval/operator_nodes.h>
+#include <vespa/vespalib/util/benchmark_timer.h>
+#include <vespa/vespalib/eval/gbdt.h>
+#include <vespa/vespalib/eval/vm_forest.h>
+#include <vespa/vespalib/eval/deinline_forest.h>
+#include <vespa/vespalib/tensor/default_tensor_engine.h>
+
+//-----------------------------------------------------------------------------
+
+using vespalib::BenchmarkTimer;
+using vespalib::tensor::DefaultTensorEngine;
+using namespace vespalib::eval;
+using namespace vespalib::eval::nodes;
+using namespace vespalib::eval::gbdt;
+using namespace search::features::rankingexpression;
+
+//-----------------------------------------------------------------------------
+
+struct File {
+ int file;
+ char *data;
+ size_t size;
+ File(const vespalib::string &file_name)
+ : file(open(file_name.c_str(), O_RDONLY)), data((char*)MAP_FAILED), size(0)
+ {
+ struct stat info;
+ if ((file != -1) && (fstat(file, &info) == 0)) {
+ data = (char*)mmap(0, info.st_size, PROT_READ, MAP_SHARED, file, 0);
+ if (data != MAP_FAILED) {
+ size = info.st_size;
+ }
+ }
+ }
+ ~File() {
+ if (valid()) {
+ munmap(data, size);
+ }
+ if (file != -1) {
+ close(file);
+ }
+ }
+ bool valid() const { return (data != MAP_FAILED); }
+};
+
+//-----------------------------------------------------------------------------
+
+vespalib::string strip_name(const vespalib::string &name) {
+ const char *expected_ending = ".expression";
+ vespalib::string tmp = name;
+ size_t pos = tmp.rfind("/");
+ if (pos != tmp.npos) {
+ tmp = tmp.substr(pos + 1);
+ }
+ pos = tmp.rfind(expected_ending);
+ if (pos == tmp.size() - strlen(expected_ending)) {
+ tmp = tmp.substr(0, pos);
+ }
+ return tmp;
+}
+
+size_t as_percent(double value) {
+ return size_t(round(value * 100.0));
+}
+
+const char *maybe_s(size_t n) { return (n == 1) ? "" : "s"; }
+
+//-----------------------------------------------------------------------------
+
+size_t count_nodes(const Node &node) {
+ size_t count = 1;
+ for (size_t i = 0; i < node.num_children(); ++i) {
+ count += count_nodes(node.get_child(i));
+ }
+ return count;
+}
+
+//-----------------------------------------------------------------------------
+
+struct InputInfo {
+ vespalib::string name;
+ std::vector<double> cmp_with;
+ explicit InputInfo(vespalib::stringref name_in)
+ : name(name_in), cmp_with() {}
+ double select_value() const {
+ return cmp_with.empty() ? 0.5 : cmp_with[(cmp_with.size()-1)/2];
+ return 0.5;
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+struct FunctionInfo {
+ typedef std::vector<const Node *> TreeList;
+
+ size_t expression_size;
+ bool root_is_forest;
+ std::vector<TreeList> forests;
+ std::vector<InputInfo> inputs;
+ std::vector<double> params;
+
+ void find_forests(const Node &node) {
+ if (node.is_forest()) {
+ forests.push_back(extract_trees(node));
+ } else {
+ for (size_t i = 0; i < node.num_children(); ++i) {
+ find_forests(node.get_child(i));
+ }
+ }
+ }
+
+ template <typename T>
+ void check_cmp(const T *node) {
+ if (node) {
+ auto lhs_symbol = as<Symbol>(node->lhs());
+ auto rhs_symbol = as<Symbol>(node->rhs());
+ if (lhs_symbol && node->rhs().is_const()) {
+ inputs[lhs_symbol->id()].cmp_with.push_back(node->rhs().get_const_value());
+ }
+ if (node->lhs().is_const() && rhs_symbol) {
+ inputs[rhs_symbol->id()].cmp_with.push_back(node->lhs().get_const_value());
+ }
+ }
+ }
+
+ void check_in(const In *node) {
+ if (node) {
+ auto lhs_symbol = as<Symbol>(node->lhs());
+ auto rhs_symbol = as<Symbol>(node->rhs());
+ if (lhs_symbol && node->rhs().is_const()) {
+ auto array = as<Array>(node->rhs());
+ if (array) {
+ for (size_t i = 0; i < array->size(); ++i) {
+ inputs[lhs_symbol->id()].cmp_with.push_back(array->get(i).get_const_value());
+ }
+ } else {
+ inputs[lhs_symbol->id()].cmp_with.push_back(node->rhs().get_const_value());
+ }
+ }
+ if (node->lhs().is_const() && rhs_symbol) {
+ inputs[rhs_symbol->id()].cmp_with.push_back(node->lhs().get_const_value());
+ }
+ }
+ }
+
+ void analyze_inputs(const Node &node) {
+ for (size_t i = 0; i < node.num_children(); ++i) {
+ analyze_inputs(node.get_child(i));
+ }
+ check_cmp(as<Equal>(node));
+ check_cmp(as<NotEqual>(node));
+ check_cmp(as<Approx>(node));
+ check_cmp(as<Less>(node));
+ check_cmp(as<LessEqual>(node));
+ check_cmp(as<Greater>(node));
+ check_cmp(as<GreaterEqual>(node));
+ check_in(as<In>(node));
+ }
+
+ FunctionInfo(const Function &function)
+ : expression_size(count_nodes(function.root())),
+ root_is_forest(function.root().is_forest()),
+ forests(),
+ inputs(),
+ params()
+ {
+ for (size_t i = 0; i < function.num_params(); ++i) {
+ inputs.emplace_back(function.param_name(i));
+ }
+ find_forests(function.root());
+ analyze_inputs(function.root());
+ for (size_t i = 0; i < function.num_params(); ++i) {
+ std::sort(inputs[i].cmp_with.begin(), inputs[i].cmp_with.end());
+ }
+ for (size_t i = 0; i < function.num_params(); ++i) {
+ params.push_back(inputs[i].select_value());
+ }
+ }
+
+ size_t get_path_len(const TreeList &trees) const {
+ size_t path = 0;
+ for (const Node *tree: trees) {
+ InterpretedFunction ifun(DefaultTensorEngine::ref(), *tree, params.size());
+ InterpretedFunction::Context ctx;
+ for (double param: params) {
+ ctx.add_param(param);
+ }
+ ifun.eval(ctx);
+ path += ctx.if_cnt();
+ }
+ return path;
+ }
+
+ void report() const {
+ fprintf(stderr, " number of inputs: %zu\n", inputs.size());
+ fprintf(stderr, " expression size (AST node count): %zu\n", expression_size);
+ if (root_is_forest) {
+ fprintf(stderr, " expression root is a sum of GBD trees\n");
+ }
+ if (!forests.empty()) {
+ fprintf(stderr, " expression contains %zu GBD forest%s\n",
+ forests.size(), maybe_s(forests.size()));
+ }
+ for (size_t i = 0; i < forests.size(); ++i) {
+ ForestStats forest(forests[i]);
+ fprintf(stderr, " GBD forest %zu:\n", i);
+ fprintf(stderr, " average path length: %g\n", forest.total_average_path_length);
+ fprintf(stderr, " expected path length: %g\n", forest.total_expected_path_length);
+ fprintf(stderr, " actual path with sample input: %zu\n", get_path_len(forests[i]));
+ if (forest.total_tuned_checks == 0) {
+ fprintf(stderr, " WARNING: checks are not tuned (expected path length to be ignored)\n");
+ }
+ fprintf(stderr, " largest set membership check: %zu\n", forest.max_set_size);
+ for (const auto &item: forest.tree_sizes) {
+ fprintf(stderr, " forest contains %zu GBD tree%s of size %zu\n",
+ item.count, maybe_s(item.count), item.size);
+ }
+ if (forest.tree_sizes.size() > 1) {
+ fprintf(stderr, " forest contains %zu GBD trees in total\n", forest.num_trees);
+ }
+ }
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+bool none_used(const std::vector<Forest::UP> &forests) {
+ return forests.empty();
+}
+
+bool deinline_used(const std::vector<Forest::UP> &forests) {
+ if (forests.empty()) {
+ return false;
+ }
+ for (const Forest::UP &forest: forests) {
+ if (dynamic_cast<DeinlineForest*>(forest.get()) == nullptr) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool vmforest_used(const std::vector<Forest::UP> &forests) {
+ if (forests.empty()) {
+ return false;
+ }
+ for (const Forest::UP &forest: forests) {
+ if (dynamic_cast<VMForest*>(forest.get()) == nullptr) {
+ return false;
+ }
+ }
+ return true;
+}
+
+//-----------------------------------------------------------------------------
+
+struct State {
+ vespalib::string name;
+ vespalib::string expression;
+ Function function;
+ FunctionInfo fun_info;
+ CompiledFunction::UP compiled_function;
+
+ double llvm_compile_s = 0.0;
+ double llvm_execute_us = 0.0;
+
+ std::vector<vespalib::string> options;
+ std::vector<double> options_us;
+
+ explicit State(const vespalib::string &file_name,
+ vespalib::stringref expression_in)
+ : name(strip_name(file_name)),
+ expression(expression_in),
+ function(Function::parse(expression, FeatureNameExtractor())),
+ fun_info(function),
+ compiled_function(),
+ llvm_compile_s(0.0),
+ llvm_execute_us(0.0),
+ options(),
+ options_us()
+ {
+ }
+
+ void benchmark_llvm_compile() {
+ BenchmarkTimer timer(1.0);
+ while (timer.has_budget()) {
+ timer.before();
+ CompiledFunction::UP new_cf(new CompiledFunction(function, PassParams::ARRAY));
+ timer.after();
+ compiled_function = std::move(new_cf);
+ }
+ llvm_compile_s = timer.min_time();
+ }
+
+ void benchmark_option(const vespalib::string &opt_name, Optimize::Chain optimizer_chain) {
+ options.push_back(opt_name);
+ options_us.push_back(CompiledFunction(function, PassParams::ARRAY, optimizer_chain).estimate_cost_us(fun_info.params));
+ fprintf(stderr, " LLVM(%s) execute time: %g us\n", opt_name.c_str(), options_us.back());
+ }
+
+ void report() {
+ fun_info.report();
+ benchmark_llvm_compile();
+ fprintf(stderr, " LLVM compile time: %g s\n", llvm_compile_s);
+ llvm_execute_us = compiled_function->estimate_cost_us(fun_info.params);
+ fprintf(stderr, " LLVM(default) execute time: %g us\n", llvm_execute_us);
+ if (!none_used(compiled_function->get_forests())) {
+ benchmark_option("none", Optimize::none);
+ }
+ if (!deinline_used(compiled_function->get_forests()) && !fun_info.forests.empty()) {
+ benchmark_option("deinline", DeinlineForest::optimize_chain);
+ }
+ if (!vmforest_used(compiled_function->get_forests()) && !fun_info.forests.empty()) {
+ benchmark_option("vmforest", VMForest::optimize_chain);
+ }
+ fprintf(stdout, "[compile: %.3fs][execute: %.3fus]", llvm_compile_s, llvm_execute_us);
+ for (size_t i = 0; i < options.size(); ++i) {
+ double rel_speed = (llvm_execute_us / options_us[i]);
+ fprintf(stdout, "[%s: %zu%%]", options[i].c_str(), as_percent(rel_speed));
+ if (rel_speed >= 1.1) {
+ fprintf(stderr, " WARNING: LLVM(%s) faster than default choice\n",
+ options[i].c_str());
+ }
+ }
+ fprintf(stdout, "[name: %s]\n", name.c_str());
+ fflush(stdout);
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+struct MyApp : public FastOS_Application {
+ int Main();
+ int usage();
+ virtual bool useProcessStarter() const { return false; }
+};
+
+int
+MyApp::usage() {
+ fprintf(stderr, "usage: %s <expression-file>\n", _argv[0]);
+ fprintf(stderr, " analyze/benchmark vespa ranking expression\n");
+ return 1;
+}
+
+int
+MyApp::Main()
+{
+ if (_argc != 2) {
+ return usage();
+ }
+ vespalib::string file_name(_argv[1]);
+ File file(file_name);
+ if (!file.valid()) {
+ fprintf(stderr, "could not read input file: '%s'\n",
+ file_name.c_str());
+ return 1;
+ }
+ State state(file_name, vespalib::stringref(file.data, file.size));
+ if (state.function.has_error()) {
+ vespalib::string error_message = state.function.get_error();
+ fprintf(stderr, "input file (%s) contains an illegal expression:\n%s\n",
+ file_name.c_str(), error_message.c_str());
+ return 1;
+ }
+ fprintf(stderr, "analyzing expression file: '%s'\n",
+ file_name.c_str());
+ state.report();
+ return 0;
+}
+
+int main(int argc, char **argv) {
+ MyApp my_app;
+ return my_app.Entry(argc, argv);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/searchlib/src/forcelink.sh b/searchlib/src/forcelink.sh
new file mode 100755
index 00000000000..b088b8363a5
--- /dev/null
+++ b/searchlib/src/forcelink.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+project=searchlib
+
+if [ X$1 == "Xupdate" ]; then
+ # update forcelink.hpp
+ echo "generating forcelink.hpp..."
+ guard=`pwd | sed -e "s|.*/${project}/||" -e "s|/|_|g"`
+ prefix=forcelink_file_${project}_${guard}_
+ echo "#ifndef GUARD_${project}_${guard}_FORCELINK" > forcelink.hpp
+ echo "#define GUARD_${project}_${guard}_FORCELINK" >> forcelink.hpp
+ echo "" >> forcelink.hpp
+ find . -name "*.cpp" -maxdepth 1 | sed -e "s|.*/\(.*\)\.cpp|void ${prefix}\1();|" >> forcelink.hpp
+ echo "" >> forcelink.hpp
+ echo "void forcelink_${project}_${guard}() {" >> forcelink.hpp
+ find . -name "*.cpp" -maxdepth 1 | sed -e "s|.*/\(.*\)\.cpp| ${prefix}\1();|" >> forcelink.hpp
+ echo "}" >> forcelink.hpp
+ echo "" >> forcelink.hpp
+ echo "#endif" >> forcelink.hpp
+ echo "invoke 'forcelink_${project}_${guard}()' to force link this directory"
+
+ # update .cpp files
+ for file in *.cpp; do
+ name=`echo "${prefix}${file}" | sed 's|\(.*\)\.cpp|\1|'`
+ found=`grep ${name} ${file} | wc -l`
+ if [ $found == "0" ]; then
+ echo "updating ${file}..."
+ echo "" >> $file
+ echo "// this function was added by $0" >> $file
+ echo "void ${name}() {}" >> $file
+ fi
+ done
+else
+ echo "This is a small utility script that might help out when trying to"
+ echo "force the linkage of object files. When run in a subdirectory within"
+ echo "${project}, it will create a 'forcelink.hpp' file that contains the"
+ echo "force linkage wrapping code. It will also update any .cpp files in the"
+ echo "directory with appropriate dummy functions to allow consistent force"
+ echo "linkage. Note that this script will make a large"
+ echo "number of assumptions; USE AT YOUR OWN RISK!"
+ echo ""
+ echo "if you feel lucky, run:"
+ echo "$0 update"
+fi
diff --git a/searchlib/src/main/OWNERS b/searchlib/src/main/OWNERS
new file mode 100644
index 00000000000..31af040f698
--- /dev/null
+++ b/searchlib/src/main/OWNERS
@@ -0,0 +1 @@
+bratseth
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/AggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/AggregationResult.java
new file mode 100644
index 00000000000..b877a88fc8d
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/AggregationResult.java
@@ -0,0 +1,161 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.searchlib.expression.ExpressionNode;
+import com.yahoo.searchlib.expression.ResultNode;
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * <p>This is the aggregation super-class from which all types of aggregation inherits.</p>
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public abstract class AggregationResult extends ExpressionNode {
+
+ public static final int classId = registerClass(0x4000 + 80, AggregationResult.class);
+ private ExpressionNode expression = null;
+ private int tag = -1;
+
+ /**
+ * <p>Returns the tag of this aggregation result. This is useful for uniquely identifying a result.</p>
+ *
+ * @return The numerical tag.
+ */
+ public int getTag() {
+ return tag;
+ }
+
+ /**
+ * <p>Assigns a tag to this group.</p>
+ *
+ * @param tag The numerical tag to set.
+ * @return This, to allow chaining.
+ */
+ public AggregationResult setTag(int tag) {
+ this.tag = tag;
+ return this;
+ }
+
+ /**
+ * <p>This method is called when merging aggregation results. This method is simply a proxy for the abstract {@link
+ * #onMerge(AggregationResult)} method.</p>
+ *
+ * @param result The result to merge with.
+ */
+ public void merge(AggregationResult result) {
+ onMerge(result);
+ }
+
+ /**
+ * <p>This method is called when all aggregation results have been merged. This method can be overloaded by
+ * subclasses that need special behaviour to occur after merge.</p>
+ */
+ public void postMerge() {
+ // empty
+ }
+
+ /**
+ * <p>This method returns a value that can be used for ranking.</p>
+ *
+ * @return The rankable result.
+ */
+ public abstract ResultNode getRank();
+
+ /**
+ * <p>Sets the expression to aggregate on.</p>
+ *
+ * @param exp The expression.
+ * @return This, to allow chaining.
+ */
+ public AggregationResult setExpression(ExpressionNode exp) {
+ expression = exp;
+ return this;
+ }
+
+ /**
+ * <p>Returns the expression to aggregate on.</p>
+ *
+ * @return The expression.
+ */
+ public ExpressionNode getExpression() {
+ return expression;
+ }
+
+ /**
+ * <p>This method must be implemented by subclasses to support merge. It is called as the {@link
+ * #merge(AggregationResult)} method is invoked.</p>
+ *
+ * @param result The result to merge with.
+ */
+ protected abstract void onMerge(AggregationResult result);
+
+ @Override
+ public ResultNode getResult() {
+ return getRank();
+ }
+
+ @Override
+ public void onPrepare() {
+
+ }
+
+ @Override
+ public boolean onExecute() {
+ return true;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ serializeOptional(buf, expression);
+ buf.putInt(null, tag);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ expression = (ExpressionNode)deserializeOptional(buf);
+ tag = buf.getInt(null);
+ }
+
+ @Override
+ public AggregationResult clone() {
+ AggregationResult obj = (AggregationResult)super.clone();
+ if (expression != null) {
+ obj.expression = expression.clone();
+ }
+ return obj;
+ }
+
+ @Override
+ protected final boolean equalsExpression(ExpressionNode obj) {
+ AggregationResult rhs = (AggregationResult)obj;
+ if (!equals(expression, rhs.expression)) {
+ return false;
+ }
+ if (tag != rhs.tag) {
+ return false;
+ }
+ if (!equalsAggregation(rhs)) {
+ return false;
+ }
+ return true;
+ }
+
+ protected abstract boolean equalsAggregation(AggregationResult obj);
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("expression", expression);
+ visitor.visit("tag", tag);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/AverageAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/AverageAggregationResult.java
new file mode 100644
index 00000000000..651ab192786
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/AverageAggregationResult.java
@@ -0,0 +1,157 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.searchlib.expression.IntegerResultNode;
+import com.yahoo.searchlib.expression.NumericResultNode;
+import com.yahoo.searchlib.expression.ResultNode;
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This is an aggregated result holding the average of all results.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class AverageAggregationResult extends AggregationResult {
+
+ public static final int classId = registerClass(0x4000 + 85, AverageAggregationResult.class);
+ private NumericResultNode sum;
+ private long count;
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public AverageAggregationResult() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given sum and count values.
+ *
+ * @param sum The initial sum to set.
+ * @param count The initial number of results.
+ */
+ public AverageAggregationResult(NumericResultNode sum, long count) {
+ setSum(sum);
+ setCount(count);
+ }
+
+ /**
+ * Returns the sum of all results in this.
+ *
+ * @return The numeric sum.
+ */
+ public final NumericResultNode getSum() {
+ return sum;
+ }
+
+ /**
+ * Sets the sum of all results in this.
+ *
+ * @param sum The sum to set.
+ * @return This, to allow chaining.
+ */
+ public final AverageAggregationResult setSum(NumericResultNode sum) {
+ this.sum = sum;
+ return this;
+ }
+
+ /**
+ * Returns the number of results in this.
+ *
+ * @return The number of results.
+ */
+ public final long getCount() {
+ return count;
+ }
+
+ /**
+ * Sets the number of results in this.
+ *
+ * @param count The number of results.
+ * @return This, to allow chaining.
+ */
+ public final AverageAggregationResult setCount(long count) {
+ this.count = count;
+ return this;
+ }
+
+ /**
+ * Returns the average value of the results. Because the result can be any numeric type, this method returns a
+ * {@link NumericResultNode} object.
+ *
+ * @return The average result value.
+ */
+ public final NumericResultNode getAverage() {
+ NumericResultNode sum = (NumericResultNode)this.sum.clone();
+ if (count != 0) {
+ sum.divide(new IntegerResultNode(count));
+ }
+ return sum;
+ }
+
+ @Override
+ public ResultNode getRank() {
+ return getAverage();
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putLong(null, count);
+ serializeOptional(buf, sum);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ count = buf.getLong(null);
+ sum = (NumericResultNode)deserializeOptional(buf);
+ }
+
+ @Override
+ protected void onMerge(AggregationResult result) {
+ sum.add(((AverageAggregationResult)result).sum);
+ count += ((AverageAggregationResult)result).count;
+ }
+
+ @Override
+ public AverageAggregationResult clone() {
+ AverageAggregationResult obj = (AverageAggregationResult)super.clone();
+ if (sum != null) {
+ obj.sum = (NumericResultNode)sum.clone();
+ }
+ return obj;
+ }
+
+ @Override
+ protected boolean equalsAggregation(AggregationResult obj) {
+ AverageAggregationResult rhs = (AverageAggregationResult)obj;
+ if (!equals(sum, rhs.sum)) {
+ return false;
+ }
+ if (count != rhs.count) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + (int)count;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("sum", sum);
+ visitor.visit("count", count);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/CountAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/CountAggregationResult.java
new file mode 100644
index 00000000000..5f90c126115
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/CountAggregationResult.java
@@ -0,0 +1,99 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.searchlib.expression.IntegerResultNode;
+import com.yahoo.searchlib.expression.ResultNode;
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This is an aggregated result holding the number of aggregated hits.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class CountAggregationResult extends AggregationResult {
+
+ public static final int classId = registerClass(0x4000 + 81, CountAggregationResult.class);
+ private long count = 0;
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public CountAggregationResult() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given count value.
+ *
+ * @param count The initial number of hits.
+ */
+ public CountAggregationResult(long count) {
+ setCount(count);
+ }
+
+ /**
+ * Returns the number of aggregated hits.
+ *
+ * @return The count.
+ */
+ public final long getCount() {
+ return count;
+ }
+
+ /**
+ * Sets the number of aggregated hits.
+ *
+ * @param count The count.
+ * @return This, to allow chaining.
+ */
+ public final CountAggregationResult setCount(long count) {
+ this.count = count;
+ return this;
+ }
+
+ @Override
+ public ResultNode getRank() {
+ return new IntegerResultNode(count);
+ }
+
+ @Override
+ protected void onMerge(AggregationResult result) {
+ count += ((CountAggregationResult)result).count;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putLong(null, count);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ count = buf.getLong(null);
+ }
+
+ @Override
+ protected boolean equalsAggregation(AggregationResult obj) {
+ return count == ((CountAggregationResult)obj).count;
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + (int)count;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("count", count);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResult.java
new file mode 100644
index 00000000000..d6c76087e4e
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResult.java
@@ -0,0 +1,116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.searchlib.aggregation.hll.*;
+import com.yahoo.searchlib.expression.IntegerResultNode;
+import com.yahoo.searchlib.expression.ResultNode;
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This is an aggregated result holding the number of unique documents matching a given expression.
+ *
+ * @author bjorncs
+ */
+public class ExpressionCountAggregationResult extends AggregationResult {
+
+ public static final int classId = registerClass(0x4000 + 88, ExpressionCountAggregationResult.class);
+ private static final int UNDEFINED = -1;
+
+ // The unique count estimator
+ private final UniqueCountEstimator<Sketch<?>> estimator;
+ // Sketch merger
+ private final SketchMerger sketchMerger = new SketchMerger();
+ // The sketch used as basis for the unique count calculation. The sketch is populated with data by the search nodes.
+ private Sketch<?> sketch;
+ // The estimated unique count. This value will not be serialized / deserialized.
+ private long estimatedUniqueCount = UNDEFINED;
+
+
+ /**
+ * Constructor used for deserialization. Will be instantiated with a default sketch.
+ */
+ @SuppressWarnings("UnusedDeclaration")
+ public ExpressionCountAggregationResult() {
+ this(new SparseSketch(), new HyperLogLogEstimator());
+ }
+
+ /**
+ * Constructs an instance with a given sketch, sketch merger and unique count estimator. For test purposes.
+ *
+ * @param initialSketch The HLL sketch.
+ */
+ public ExpressionCountAggregationResult(Sketch<?> initialSketch, UniqueCountEstimator<Sketch<?>> estimator) {
+ this.sketch = initialSketch;
+ this.estimator = estimator;
+ }
+
+ /**
+ * @return The unique count estimated by the HyperLogLog algorithm.
+ */
+ public long getEstimatedUniqueCount() {
+ if (estimatedUniqueCount == UNDEFINED) {
+ updateEstimate();
+ }
+ return estimatedUniqueCount;
+ }
+
+ @Override
+ public ResultNode getRank() {
+ return new IntegerResultNode(getEstimatedUniqueCount());
+ }
+
+ @Override
+ protected void onMerge(AggregationResult result) {
+ ExpressionCountAggregationResult other = (ExpressionCountAggregationResult) result;
+ sketch = sketchMerger.merge(sketch, other.sketch);
+ // Any cached result should be invalidated.
+ estimatedUniqueCount = UNDEFINED;
+ }
+
+ public Sketch<?> getSketch() {
+ return sketch;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ sketch.serializeWithId(buf);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ sketch = (Sketch<?>) create(buf);
+ }
+
+ @Override
+ protected boolean equalsAggregation(AggregationResult obj) {
+ // obj is assumed to always be of correct type.
+ ExpressionCountAggregationResult other = (ExpressionCountAggregationResult) obj;
+ return sketch.equals(other.sketch);
+ }
+
+ private void updateEstimate() {
+ estimatedUniqueCount = estimator.estimateCount(sketch);
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("sketch", sketch);
+ }
+
+ @Override
+ public int hashCode() {
+ int result = super.hashCode();
+ result = 31 * result + sketch.hashCode();
+ return result;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/FS4Hit.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/FS4Hit.java
new file mode 100644
index 00000000000..8b0704eea9b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/FS4Hit.java
@@ -0,0 +1,132 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.document.GlobalId;
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This class represents a single hit from the fastserver4 backend
+ *
+ * @author <a href="mailto:havardpe@yahoo-inc.com">Haavard Pettersen</a>
+ */
+public class FS4Hit extends Hit {
+
+ public static final int classId = registerClass(0x4000 + 95, FS4Hit.class); // shared with c++
+ private int path = 0;
+ private GlobalId globalId = new GlobalId(new byte[GlobalId.LENGTH]);
+ private int distributionKey = -1;
+
+ /**
+ * Constructs an empty result node.
+ */
+ public FS4Hit() {
+ }
+
+ /**
+ * Create a hit with the given path and document id.
+ *
+ * @param path The mangled search node path.
+ * @param globalId The local document id.
+ * @param rank The rank of this hit.
+ */
+ public FS4Hit(int path, GlobalId globalId, double rank) {
+ this(path, globalId, rank, -1);
+ }
+
+ /**
+ * Create a hit with the given path and document id.
+ *
+ * @param path The mangled search node path.
+ * @param globalId The local document id.
+ * @param rank The rank of this hit.
+ * @param distributionKey The doc stamp.
+ */
+ public FS4Hit(int path, GlobalId globalId, double rank, int distributionKey) {
+ super(rank);
+ this.path = path;
+ this.globalId = globalId;
+ this.distributionKey = distributionKey;
+ }
+
+ /**
+ * Obtain the (mangled) network path back to the search node returning this hit.
+ *
+ * @return The mangled search node path.
+ */
+ public int getPath() {
+ return path;
+ }
+
+ /**
+ * Obtain the global document id on the search node returning this hit.
+ *
+ * @return The global document id.
+ */
+ public GlobalId getGlobalId() {
+ return globalId;
+ }
+
+ /**
+ * Obtain the distribution key for the node producing this hit.
+ *
+ * @return distribution key
+ */
+ public int getDistributionKey() {
+ return distributionKey;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, path);
+ buf.put(null, globalId.getRawId());
+ buf.putInt(null, distributionKey);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ path = buf.getInt(null);
+ globalId = new GlobalId(buf.getBytes(null, GlobalId.LENGTH));
+ distributionKey = buf.getInt(null);
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + path + globalId.hashCode() + distributionKey;
+ }
+
+ @SuppressWarnings({ "EqualsWhichDoesntCheckParameterClass", "RedundantIfStatement" })
+ @Override
+ public boolean equals(Object obj) {
+ if (!super.equals(obj)) {
+ return false;
+ }
+ FS4Hit rhs = (FS4Hit)obj;
+ if (path != rhs.path) {
+ return false;
+ }
+ if (!globalId.equals(rhs.globalId)) {
+ return false;
+ }
+ if (distributionKey != rhs.distributionKey) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("path", path);
+ visitor.visit("globalId", globalId.toString());
+ visitor.visit("distributionKey", distributionKey);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/ForceLoad.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/ForceLoad.java
new file mode 100644
index 00000000000..ecbab688821
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/ForceLoad.java
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+/**
+ * FIXME: Really ugly hack to force class loading for subclasses of Identifiable.
+ * This should be fixed by doing the all class registration in a single place (similar to how its done in C++).
+ */
+public class ForceLoad {
+
+ static {
+ String pkg = "com.yahoo.searchlib.aggregation";
+ String[] classes = {
+ "XorAggregationResult",
+ "SumAggregationResult",
+ "Group",
+ "HitsAggregationResult",
+ "AggregationResult",
+ "FS4Hit",
+ "VdsHit",
+ "Grouping",
+ "Hit",
+ "ForceLoad",
+ "MinAggregationResult",
+ "GroupingLevel",
+ "MaxAggregationResult",
+ "CountAggregationResult",
+ "AverageAggregationResult",
+ "ExpressionCountAggregationResult",
+ "hll.SparseSketch",
+ "hll.NormalSketch",
+ "ForceLoad"
+ };
+ com.yahoo.system.ForceLoad.forceLoad(pkg, classes);
+ }
+
+ public static boolean forceLoad() {
+ return true;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Group.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Group.java
new file mode 100644
index 00000000000..03836d75efc
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Group.java
@@ -0,0 +1,518 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.searchlib.expression.AggregationRefNode;
+import com.yahoo.searchlib.expression.ExpressionNode;
+import com.yahoo.searchlib.expression.ResultNode;
+import com.yahoo.vespa.objects.*;
+
+import java.util.*;
+
+public class Group extends Identifiable {
+
+ public static final int classId = registerClass(0x4000 + 90, Group.class);
+ private static final ObjectPredicate REF_LOCATOR = new RefLocator();
+ private List<Integer> orderByIdx = new ArrayList<Integer>();
+ private List<ExpressionNode> orderByExp = new ArrayList<ExpressionNode>();
+ private List<AggregationResult> aggregationResults = new ArrayList<AggregationResult>();
+ private List<Group> children = new ArrayList<Group>();
+ private ResultNode id = null;
+ private double rank;
+ private int tag = -1;
+ private SortType sortType = SortType.UNSORTED;
+
+ /**
+ * <p>This tells you if the children are ranked by the pure relevance or by a more complex expression. That
+ * indicates if the rank score from the child can be used for ordering.</p>
+ *
+ * @return True if it ranked by pure relevance.
+ */
+ public boolean isRankedByRelevance() {
+ return orderByIdx.isEmpty();
+ }
+
+ /**
+ * <p>Merges the content of the given group <b>into</b> this. When this function returns, make sure to call {@link
+ * #postMerge(java.util.List, int, int)}.</p>
+ *
+ * @param firstLevel The first level to merge.
+ * @param currentLevel The current level.
+ * @param rhs The group to merge with.
+ */
+ public void merge(int firstLevel, int currentLevel, Group rhs) {
+ if (rhs.rank > rank) {
+ rank = rhs.rank; // keep highest rank
+ }
+ if (currentLevel >= firstLevel) {
+ for (int i = 0, len = aggregationResults.size(); i < len; ++i) {
+ aggregationResults.get(i).merge(rhs.aggregationResults.get(i));
+ }
+ }
+
+ ArrayList<Group> merged = new ArrayList<Group>();
+ Iterator<Group> lhsChild = children.iterator(), rhsChild = rhs.children.iterator();
+ if (lhsChild.hasNext() && rhsChild.hasNext()) {
+ Group lhsGroup = lhsChild.next();
+ Group rhsGroup = rhsChild.next();
+ for (; (lhsGroup != null) && (rhsGroup != null); ) {
+ int cmp = lhsGroup.getId().compareTo(rhsGroup.getId());
+ if (cmp < 0) {
+ merged.add(lhsGroup);
+ lhsGroup = lhsChild.hasNext() ? lhsChild.next() : null;
+ } else if (cmp > 0) {
+ merged.add(rhsGroup);
+ rhsGroup = rhsChild.hasNext() ? rhsChild.next() : null;
+ } else {
+ lhsGroup.merge(firstLevel, currentLevel + 1, rhsGroup);
+ merged.add(lhsGroup);
+ lhsGroup = lhsChild.hasNext() ? lhsChild.next() : null;
+ rhsGroup = rhsChild.hasNext() ? rhsChild.next() : null;
+ }
+ }
+ if (lhsGroup != null) {
+ merged.add(lhsGroup);
+ }
+ if (rhsGroup != null) {
+ merged.add(rhsGroup);
+ }
+ }
+ while (lhsChild.hasNext()) {
+ merged.add(lhsChild.next());
+ }
+ while (rhsChild.hasNext()) {
+ merged.add(rhsChild.next());
+ }
+ children = merged;
+ }
+
+ private void executeOrderBy() {
+ for (ExpressionNode node : orderByExp) {
+ node.prepare();
+ node.execute();
+ }
+ }
+
+ /**
+ * <p>After merging, this method will prune all levels so that they do not exceed the configured maximum number of
+ * groups per level.</p>
+ *
+ * @param levels The specs of all grouping levels.
+ * @param firstLevel The first level to merge.
+ * @param currentLevel The current level.
+ */
+ public void postMerge(List<GroupingLevel> levels, int firstLevel, int currentLevel) {
+ if (currentLevel >= firstLevel) {
+ for (AggregationResult result : aggregationResults) {
+ result.postMerge();
+ }
+ for (ExpressionNode result : orderByExp) {
+ result.execute();
+ }
+ }
+ if (currentLevel < levels.size()) {
+ int maxGroups = (int)levels.get(currentLevel).getMaxGroups();
+ for (Group group : children) {
+ group.executeOrderBy();
+ }
+ if (maxGroups >= 0 && children.size() > maxGroups) {
+ // prune groups
+ sortChildrenByRank();
+ children = children.subList(0, maxGroups);
+ sortChildrenById();
+ }
+ for (Group group : children) {
+ group.postMerge(levels, firstLevel, currentLevel + 1);
+ }
+ }
+
+ }
+
+ /**
+ * <p>Will sort the children by their id, if they are not sorted already.</p>
+ */
+ public void sortChildrenById() {
+ if (sortType == SortType.BYID) {
+ return;
+ }
+ Collections.sort(children, new Comparator<Group>() {
+ public int compare(Group lhs, Group rhs) {
+ return lhs.compareId(rhs);
+ }
+ });
+ sortType = SortType.BYID;
+ }
+
+ /**
+ * <p>Will sort the children by their rank, if they are not sorted already.</p>
+ */
+ public void sortChildrenByRank() {
+ if (sortType == SortType.BYRANK) {
+ return;
+ }
+ Collections.sort(children, new Comparator<Group>() {
+ public int compare(Group lhs, Group rhs) {
+ return lhs.compareRank(rhs);
+ }
+ });
+ sortType = SortType.BYRANK;
+ }
+
+ /**
+ * <p>Returns the label to use for this group. See comment on {@link #setId(com.yahoo.searchlib.expression.ResultNode)}
+ * on the rationale of this being a {@link ResultNode}.</p>
+ *
+ * @return The label.
+ */
+ public ResultNode getId() {
+ return id;
+ }
+
+ /**
+ * <p>Sets the label to use for this group. This is a {@link ResultNode} so that a group can be labeled with
+ * whatever value the classifier expression returns.</p>
+ *
+ * @param id The label to set.
+ * @return This, to allow chaining.
+ */
+ public Group setId(ResultNode id) {
+ this.id = id;
+ return this;
+ }
+
+ /**
+ * <p>Sets the relevancy to use for this group.</p>
+ *
+ * @param rank The rank to set.
+ * @return This, to allow chaining.
+ */
+ public Group setRank(double rank) {
+ this.rank = rank;
+ return this;
+ }
+
+ /**
+ * <p>Return the relevancy of this group.</p>
+ *
+ * @return Relevance.
+ */
+ public double getRank() {
+ return rank;
+ }
+
+ /**
+ * <p>Adds a child group to this.</p>
+ *
+ * @param child The group to add.
+ * @return This, to allow chaining.
+ */
+ public Group addChild(Group child) {
+ if (child == null) {
+ throw new IllegalArgumentException("Child can not be null.");
+ }
+ children.add(child);
+ return this;
+ }
+
+ /**
+ * <p>Returns the list of child groups to this.</p>
+ *
+ * @return The children.
+ */
+ public List<Group> getChildren() {
+ return children;
+ }
+
+ /**
+ * <p>Returns the tag of this group. This value is set per-level in the grouping request, and then becomes assigned
+ * to each group of that level in the grouping result as they are copied from the prototype.</p>
+ *
+ * @return The numerical tag.
+ */
+ public int getTag() {
+ return tag;
+ }
+
+ /**
+ * <p>Assigns a tag to this group.</p>
+ *
+ * @param tag The numerical tag to set.
+ * @return This, to allow chaining.
+ */
+ public Group setTag(int tag) {
+ this.tag = tag;
+ return this;
+ }
+
+ /**
+ * <p>Returns this group's aggregation results.</p>
+ *
+ * @return The aggregation results.
+ */
+ public List<AggregationResult> getAggregationResults() {
+ return aggregationResults;
+ }
+
+ /**
+ * <p>Adds an aggregation result to this group.</p>
+ *
+ * @param result The result to add.
+ * @return This, to allow chaining.
+ */
+ public Group addAggregationResult(AggregationResult result) {
+ aggregationResults.add(result);
+ return this;
+ }
+
+ /**
+ * <p>Adds an order-by expression to this group. If the expression is an AggregationResult, it will be added to the
+ * list of this group's AggregationResults, and a reference to that expression is added instead. If the
+ * AggregationResult is already present, a reference to THAT result is created instead.</p>
+ *
+ * @param exp The result to add.
+ * @param asc True to sort ascending, false to sort descending.
+ * @return This, to allow chaining.
+ */
+ public Group addOrderBy(ExpressionNode exp, boolean asc) {
+ if (exp instanceof AggregationResult) {
+ exp = new AggregationRefNode((AggregationResult)exp);
+ }
+ exp.select(REF_LOCATOR, new RefResolver(this));
+ orderByExp.add(exp);
+ orderByIdx.add((asc ? 1 : -1) * orderByExp.size());
+ return this;
+ }
+
+ public List<Integer> getOrderByIndexes() {
+ return Collections.unmodifiableList(orderByIdx);
+ }
+
+ public List<ExpressionNode> getOrderByExpressions() {
+ return Collections.unmodifiableList(orderByExp);
+ }
+
+ private int compareId(Group rhs) {
+ return getId().compareTo(rhs.getId());
+ }
+
+ private int compareRank(Group rhs) {
+ long diff = 0;
+ for (int i = 0, m = orderByIdx.size(); (diff == 0) && (i < m); i++) {
+ int rawIndex = orderByIdx.get(i);
+ int index = ((rawIndex < 0) ? -rawIndex : rawIndex) - 1;
+ diff = orderByExp.get(index).getResult().compareTo(rhs.orderByExp.get(index).getResult());
+ diff = diff * rawIndex;
+ }
+ if (diff < 0) {
+ return -1;
+ }
+ if (diff > 0) {
+ return 1;
+ }
+ if (rank > rhs.rank) {
+ return -1;
+ }
+ if (rank < rhs.rank) {
+ return 1;
+ }
+ return 0;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ serializeOptional(buf, id);
+ buf.putDouble(null, rank);
+ int sz = orderByIdx.size();
+ buf.putInt(null, sz);
+ for (Integer index : orderByIdx) {
+ buf.putInt(null, index);
+ }
+ int numResults = aggregationResults.size();
+ buf.putInt(null, numResults);
+ for (AggregationResult a : aggregationResults) {
+ serializeOptional(buf, a);
+ }
+ int numExpressionResults = orderByExp.size();
+ buf.putInt(null, numExpressionResults);
+ for (ExpressionNode e : orderByExp) {
+ serializeOptional(buf, e);
+ }
+ int numGroups = children.size();
+ buf.putInt(null, numGroups);
+ for (Group g : children) {
+ g.serializeWithId(buf);
+ }
+ buf.putInt(null, tag);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ id = (ResultNode)deserializeOptional(buf);
+ rank = buf.getDouble(null);
+ orderByIdx.clear();
+ int orderByCount = buf.getInt(null);
+ for (int i = 0; i < orderByCount; i++) {
+ orderByIdx.add(buf.getInt(null));
+ }
+ int numResults = buf.getInt(null);
+ for (int i = 0; i < numResults; i++) {
+ AggregationResult e = (AggregationResult)deserializeOptional(buf);
+ aggregationResults.add(e);
+ }
+ int numExpressionResults = buf.getInt(null);
+ RefResolver resolver = new RefResolver(this);
+ for (int i = 0; i < numExpressionResults; i++) {
+ ExpressionNode exp = (ExpressionNode)deserializeOptional(buf);
+ exp.select(REF_LOCATOR, resolver);
+ orderByExp.add(exp);
+ }
+ int numGroups = buf.getInt(null);
+ for (int i = 0; i < numGroups; i++) {
+ Group g = new Group();
+ g.deserializeWithId(buf);
+ children.add(g);
+ }
+ tag = buf.getInt(null);
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + aggregationResults.hashCode() + children.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!super.equals(obj)) {
+ return false;
+ }
+ Group rhs = (Group)obj;
+ if (!equals(id, rhs.id)) {
+ return false;
+ }
+ if (rank != rhs.rank) {
+ return false;
+ }
+ if (!aggregationResults.equals(rhs.aggregationResults)) {
+ return false;
+ }
+ if (!orderByIdx.equals(rhs.orderByIdx)) {
+ return false;
+ }
+ if (!orderByExp.equals(rhs.orderByExp)) {
+ return false;
+ }
+ if (!children.equals(rhs.children)) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public Group clone() {
+ Group obj = (Group)super.clone();
+ if (id != null) {
+ obj.id = (ResultNode)id.clone();
+ }
+ obj.aggregationResults = new ArrayList<AggregationResult>();
+ for (AggregationResult result : aggregationResults) {
+ obj.aggregationResults.add(result.clone());
+ }
+ obj.orderByIdx = new ArrayList<Integer>();
+ for (Integer idx : orderByIdx) {
+ obj.orderByIdx.add(idx);
+ }
+ obj.orderByExp = new ArrayList<ExpressionNode>();
+ RefResolver resolver = new RefResolver(obj);
+ for (ExpressionNode exp : orderByExp) {
+ exp = exp.clone();
+ exp.select(REF_LOCATOR, resolver);
+ obj.orderByExp.add(exp);
+ }
+ obj.children = new ArrayList<Group>();
+ for (Group child : children) {
+ obj.children.add(child.clone());
+ }
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("id", id);
+ visitor.visit("rank", rank);
+ visitor.visit("aggregationresults", aggregationResults);
+ visitor.visit("orderby-idx", orderByIdx);
+ visitor.visit("orderby-exp", orderByExp);
+ visitor.visit("children", children);
+ visitor.visit("tag", tag);
+ }
+
+ @Override
+ public void selectMembers(ObjectPredicate predicate, ObjectOperation operation) {
+ for (AggregationResult result : aggregationResults) {
+ result.select(predicate, operation);
+ }
+ for (ExpressionNode exp : orderByExp) {
+ exp.select(predicate, operation);
+ }
+ }
+
+ private static enum SortType {
+ UNSORTED,
+ BYRANK,
+ BYID
+ }
+
+ private static class RefLocator implements ObjectPredicate {
+
+ @Override
+ public boolean check(Object obj) {
+ return obj instanceof AggregationRefNode;
+ }
+ }
+
+ private static class RefResolver implements ObjectOperation {
+
+ final List<AggregationResult> results;
+
+ RefResolver(Group group) {
+ this.results = group.aggregationResults;
+ }
+
+ @Override
+ public void execute(Object obj) {
+ AggregationRefNode ref = (AggregationRefNode)obj;
+ int idx = ref.getIndex();
+ if (idx < 0) {
+ AggregationResult res = ref.getExpression();
+ idx = indexOf(res);
+ if (idx < 0) {
+ idx = results.size();
+ results.add(res);
+ }
+ ref.setIndex(idx);
+ } else {
+ ref.setExpression(results.get(idx));
+ }
+ }
+
+ int indexOf(AggregationResult lhs) {
+ int prevTag = lhs.getTag();
+ for (int i = 0, len = results.size(); i < len; ++i) {
+ AggregationResult rhs = results.get(i);
+ lhs.setTag(rhs.getTag());
+ if (lhs.equals(rhs)) {
+ return i;
+ }
+ }
+ lhs.setTag(prevTag);
+ return -1;
+ }
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Grouping.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Grouping.java
new file mode 100644
index 00000000000..6e384e6e0b5
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Grouping.java
@@ -0,0 +1,445 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.searchlib.expression.BucketResultNode;
+import com.yahoo.searchlib.expression.NullResultNode;
+import com.yahoo.searchlib.expression.ResultNode;
+import com.yahoo.vespa.objects.*;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class Grouping extends Identifiable {
+
+ // Force load all of expression and aggregation when using this class.
+ static {
+ com.yahoo.searchlib.aggregation.ForceLoad.forceLoad();
+ com.yahoo.searchlib.expression.ForceLoad.forceLoad();
+ }
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 91, Grouping.class);
+
+ // The client id for this grouping request.
+ private int id = 0;
+
+ // Whether or not this grouping is valid.
+ private boolean valid = true;
+
+ // Whether or not to group all hits or only those with hits. Only applicable for streaming search.
+ private boolean all = false;
+
+ // How many hits to group per backend node.
+ private long topN = -1;
+
+ // The level to start grouping in backend. This also instantiates the next level, if any.
+ private int firstLevel = 0;
+
+ // The last level to group in backend.
+ private int lastLevel = 0;
+
+ private boolean forceSinglePass = false;
+
+ // Details for each level except root.
+ private List<GroupingLevel> groupingLevels = new ArrayList<>();
+
+ // Actual root group, does not require level details.
+ private Group root = new Group();
+
+ /**
+ * <p>Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is
+ * set.</p>
+ */
+ public Grouping() {
+ super();
+ }
+
+ /**
+ * <p>Constructs an instance of this class with given client id.</p>
+ *
+ * @param id The client id for this grouping request.
+ */
+ public Grouping(int id) {
+ super();
+ setId(id);
+ }
+
+ /**
+ * <p>Merges the content of the given grouping <b>into</b> this.</p>
+ *
+ * @param rhs The grouping to merge with.
+ */
+ public void merge(Grouping rhs) {
+ root.merge(firstLevel, 0, rhs.root);
+ }
+
+ /**
+ * <p>This method is invoked after merging is done. It is intended used for resolving any dependencies or derivates
+ * that might have changes due to the merge.</p>
+ */
+ public void postMerge() {
+ root.postMerge(groupingLevels, firstLevel, 0);
+ }
+
+ /**
+ * <p>Returns the client id of this grouping request.</p>
+ *
+ * @return The identifier.
+ */
+ public int getId() {
+ return id;
+ }
+
+ /**
+ * <p>Sets the client id for this grouping request.</p>
+ *
+ * @param id The identifier to set.
+ * @return This, to allow chaining.
+ */
+ public Grouping setId(int id) {
+ this.id = id;
+ return this;
+ }
+
+ /**
+ * <p>Returns whether or not this grouping request is valid.</p>
+ *
+ * @return True if valid.
+ */
+ public boolean valid() {
+ return valid;
+ }
+
+ /**
+ * <p>Returns whether or not to perform grouping on the entire document corpus instead of only those matching the
+ * search criteria. Please see note on {@link #setAll(boolean)}.</p>
+ *
+ * @return True if grouping all documents.
+ */
+ public boolean getAll() {
+ return all;
+ }
+
+ /**
+ * <p>Sets whether or not to perform grouping on the entire document corpus instead of only those matching the
+ * search criteria. <b>NOTE:</b> This is only possible with streaming search.</p>
+ *
+ * @param all True to group all documents.
+ * @return This, to allow chaining.
+ */
+ public Grouping setAll(boolean all) {
+ this.all = all;
+ return this;
+ }
+
+ /**
+ * <p>Returns the number of candidate documents to group.</p>
+ *
+ * @return The number.
+ */
+ public long getTopN() {
+ return topN;
+ }
+
+ /**
+ * <p>Sets the number of candidate documents to group.</p>
+ *
+ * @param topN The number to set.
+ * @return This, to allow chaining.
+ */
+ public Grouping setTopN(long topN) {
+ this.topN = topN;
+ return this;
+ }
+
+ /**
+ * <p>Returns the first level to start grouping work. See note on {@link #setFirstLevel(int)}.</p>
+ *
+ * @return The first level.
+ */
+ public int getFirstLevel() {
+ return firstLevel;
+ }
+
+ /**
+ * <p>Sets the first level to start grouping work. All the necessary work above this group level is expected to be
+ * already done.</p>
+ *
+ * @param level The level to set.
+ * @return This, to allow chaining.
+ */
+ public Grouping setFirstLevel(int level) {
+ firstLevel = level;
+ return this;
+ }
+
+ /**
+ * <p>Returns the last level to do grouping work. See note on {@link #setLastLevel(int)}.</p>
+ *
+ * @return The last level.
+ */
+ public int getLastLevel() {
+ return lastLevel;
+ }
+
+ /**
+ * <p>Sets the last level to do grouping work. Executing a level will instantiate the {@link Group} objects for the
+ * next level, if there is any. This means that grouping work ends at this level, but also instantiates the groups
+ * for level (lastLevel + 1).</p>
+ *
+ * @param level The level to set.
+ * @return This, to allow chaining.
+ */
+ public Grouping setLastLevel(int level) {
+ lastLevel = level;
+ return this;
+ }
+
+ /**
+ * <p>Returns the list of grouping levels that make up this grouping request.</p>
+ *
+ * @return The list.
+ */
+ public List<GroupingLevel> getLevels() {
+ return groupingLevels;
+ }
+
+ /**
+ * <p>Appends the given grouping level specification to the list of levels.</p>
+ *
+ * @param level The level to add.
+ * @return This, to allow chaining.
+ * @throws NullPointerException If <tt>level</tt> argument is null.
+ */
+ public Grouping addLevel(GroupingLevel level) {
+ level.getClass(); // throws NullPointerException
+ groupingLevels.add(level);
+ return this;
+ }
+
+ /**
+ * <p>Returns the root group.</p>
+ *
+ * @return The root.
+ */
+ public Group getRoot() {
+ return root;
+ }
+
+ /**
+ * <p>Sets the root group.</p>
+ *
+ * @param root The group to set as root.
+ * @return This, to allow chaining.
+ * @throws NullPointerException If <tt>root</tt> argument is null.
+ */
+ public Grouping setRoot(Group root) {
+ root.getClass(); // throws NullPointerException
+ this.root = root;
+ return this;
+ }
+
+ /**
+ * <p>Returns whether or not single pass execution of grouping is forced.</p>
+ *
+ * @return True if single pass grouping is forced.
+ */
+ public boolean getForceSinglePass() {
+ return forceSinglePass;
+ }
+
+ /**
+ * <p>Sets whether or not grouping should be forced to execute in a single pass. If false, this <tt>Grouping</tt>
+ * might still execute in a single pass due to other constraints.</p>
+ *
+ * @param forceSinglePass True to force execution in single pass.
+ * @return This, to allow chaining.
+ */
+ public Grouping setForceSinglePass(boolean forceSinglePass) {
+ this.forceSinglePass = forceSinglePass;
+ return this;
+ }
+
+ /**
+ * <p>Returns whether or not grouping should be executed in a single pass.</p>
+ *
+ * @return True if grouping should be executed in a single pass.
+ */
+ public boolean useSinglePass() {
+ return needDeepResultCollection() || getForceSinglePass();
+ }
+
+ /**
+ * <p>Tell if ordering will need results collected in children. in that case we will probably just do a single
+ * pass.</p>
+ *
+ * @return If deeper resultcollection is needed.
+ */
+ public boolean needDeepResultCollection() {
+ if (forceSinglePass) {
+ return true;
+ }
+ for (GroupingLevel level : groupingLevels) {
+ if (level.needResultCollection()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ buf.putInt(null, id);
+ byte tmp = valid ? (byte)1 : (byte)0;
+ buf.putByte(null, tmp);
+ tmp = all ? (byte)1 : (byte)0;
+ buf.putByte(null, tmp);
+ buf.putLong(null, topN);
+ buf.putInt(null, firstLevel);
+ buf.putInt(null, lastLevel);
+ buf.putInt(null, groupingLevels.size());
+ for (GroupingLevel level : groupingLevels) {
+ level.serializeWithId(buf);
+ }
+ root.serializeWithId(buf);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ id = buf.getInt(null);
+ byte tmp = buf.getByte(null);
+ valid = (tmp != 0);
+ tmp = buf.getByte(null);
+ all = (tmp != 0);
+ topN = buf.getLong(null);
+ firstLevel = buf.getInt(null);
+ lastLevel = buf.getInt(null);
+ int numLevels = buf.getInt(null);
+ for (int i = 0; i < numLevels; i++) {
+ GroupingLevel level = new GroupingLevel();
+ level.deserializeWithId(buf);
+ groupingLevels.add(level);
+ }
+ root.deserializeWithId(buf);
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + id + (valid ? 66 : 99) + (all ? 666 : 999) + (int)topN + groupingLevels.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!super.equals(obj)) {
+ return false;
+ }
+ Grouping rhs = (Grouping)obj;
+ if (id != rhs.id) {
+ return false;
+ }
+ if (valid != rhs.valid) {
+ return false;
+ }
+ if (all != rhs.all) {
+ return false;
+ }
+ if (topN != rhs.topN) {
+ return false;
+ }
+ if (firstLevel != rhs.firstLevel) {
+ return false;
+ }
+ if (lastLevel != rhs.lastLevel) {
+ return false;
+ }
+ if (!groupingLevels.equals(rhs.groupingLevels)) {
+ return false;
+ }
+ if (!root.equals(rhs.root)) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public Grouping clone() {
+ Grouping obj = (Grouping)super.clone();
+ obj.groupingLevels = new ArrayList<>();
+ for (GroupingLevel level : groupingLevels) {
+ obj.groupingLevels.add(level.clone());
+ }
+ obj.root = root.clone();
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("id", id);
+ visitor.visit("valid", valid);
+ visitor.visit("all", all);
+ visitor.visit("topN", topN);
+ visitor.visit("firstLevel", firstLevel);
+ visitor.visit("lastLevel", lastLevel);
+ visitor.visit("groupingLevels", groupingLevels);
+ visitor.visit("root", root);
+ }
+
+ @Override
+ public void selectMembers(ObjectPredicate predicate, ObjectOperation operation) {
+ selectGroups(predicate, operation, root, firstLevel, lastLevel, 0);
+ }
+
+ public void unifyNull() {
+ class FindGroup implements ObjectPredicate {
+
+ @Override
+ public boolean check(Object obj) {
+ return obj instanceof Group;
+ }
+ }
+ class UnifyNullGroupId implements ObjectOperation {
+
+ @Override
+ public void execute(Object obj) {
+ Group group = (Group)obj;
+ ResultNode id = group.getId();
+ if (id instanceof BucketResultNode && ((BucketResultNode)id).empty()) {
+ group.setId(new NullResultNode());
+ }
+ }
+ }
+ selectMembers(new FindGroup(), new UnifyNullGroupId());
+ }
+
+ /**
+ * <p>This is a helper function to perform recursive traversal of all groups contained in this grouping object. It
+ * is invoked by the {@link #selectMembers(ObjectPredicate, ObjectOperation)} method and itself. This method will
+ * only evaluate the groups that belong to active levels.</p>
+ *
+ * @param predicate The object predicate to evaluate.
+ * @param operation The operation to execute when the predicate is true.
+ * @param group The group to evaluate.
+ * @param first The first active level.
+ * @param last The last active level.
+ * @param current The level being evaluated.
+ */
+ private static void selectGroups(ObjectPredicate predicate, ObjectOperation operation,
+ Group group, int first, int last, int current)
+ {
+ if (current > last) {
+ return;
+ }
+ if (current >= first) {
+ group.select(predicate, operation);
+ }
+ for (Group child : group.getChildren()) {
+ selectGroups(predicate, operation, child, first, last, current + 1);
+ }
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/GroupingLevel.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/GroupingLevel.java
new file mode 100644
index 00000000000..7e10507a57a
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/GroupingLevel.java
@@ -0,0 +1,184 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.searchlib.expression.ExpressionNode;
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Identifiable;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+public class GroupingLevel extends Identifiable {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 93, GroupingLevel.class);
+
+ // The maximum number of groups allowed at this level.
+ private long maxGroups = -1;
+
+ // The precsicion used for estimation. This is number of groups returned up when using orderby that need more info to get it correct.
+ private long precision = -1;
+
+ // The classifier expression; the result of this is the group key.
+ private ExpressionNode classify = null;
+
+ // The prototype of the groups to create for each class.
+ private Group collect = new Group();
+
+ /**
+ * <p>Returns the presicion (i.e number of groups) returned up from this level.</p>
+ *
+ * @return The precision.
+ */
+ public long getPrecision() {
+ return precision;
+ }
+
+ /**
+ * <p>Returns the maximum number of groups allowed at this level.</p>
+ *
+ * @return The maximum number.
+ */
+ public long getMaxGroups() {
+ return maxGroups;
+ }
+
+ /**
+ * <p>Sets the maximum number of groups allowed at this level.</p>
+ *
+ * @param max The maximum number to set.
+ * @return This, to allow chaining.
+ */
+ public GroupingLevel setMaxGroups(long max) {
+ maxGroups = max;
+ if (precision < maxGroups) {
+ precision = maxGroups;
+ }
+ return this;
+ }
+
+ /**
+ * <p>Sets the presicion (i.e number of groups) returned up from this level.</p>
+ *
+ * @param precision The precision to set.
+ * @return This, to allow chaining.
+ */
+ public GroupingLevel setPrecision(long precision) {
+ this.precision = precision;
+ return this;
+ }
+
+ /**
+ * <p>Returns the expression used to classify hits into groups.</p>
+ *
+ * @return The classifier expression.
+ */
+ public ExpressionNode getExpression() {
+ return classify;
+ }
+
+ /**
+ * <p>Sets the expression used to classify hits into groups.</p>
+ *
+ * @param exp The classifier expression to set.
+ * @return This, to allow chaining.
+ */
+ public GroupingLevel setExpression(ExpressionNode exp) {
+ classify = exp;
+ return this;
+ }
+
+ /**
+ * <p>Sets the prototype to use when creating groups at this level.</p>
+ *
+ * @param group The group prototype.
+ * @return This, to allow chaining.
+ */
+ public GroupingLevel setGroupPrototype(Group group) {
+ this.collect = group;
+ return this;
+ }
+
+ /**
+ * <p>Returns the prototype to use when creating groups at this level.</p>
+ *
+ * @return The group prototype.
+ */
+ public Group getGroupPrototype() {
+ return collect;
+ }
+
+ /**
+ * <p>Tell if ordering will need results collected in children.</p>
+ *
+ * @return If deeper resultcollection is needed.
+ */
+ public boolean needResultCollection() {
+ return !collect.isRankedByRelevance();
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ buf.putLong(null, maxGroups);
+ buf.putLong(null, precision);
+ serializeOptional(buf, classify);
+ collect.serializeWithId(buf);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ maxGroups = buf.getLong(null);
+ precision = buf.getLong(null);
+ classify = (ExpressionNode)deserializeOptional(buf);
+ collect.deserializeWithId(buf);
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + (int)maxGroups + (int)precision + collect.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!super.equals(obj)) {
+ return false;
+ }
+ GroupingLevel rhs = (GroupingLevel)obj;
+ if (maxGroups != rhs.maxGroups) {
+ return false;
+ }
+ if (precision != rhs.precision) {
+ return false;
+ }
+ if (!equals(classify, rhs.classify)) {
+ return false;
+ }
+ if (!collect.equals(rhs.collect)) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public GroupingLevel clone() {
+ GroupingLevel obj = (GroupingLevel)super.clone();
+ if (classify != null) {
+ obj.classify = classify.clone();
+ }
+ obj.collect = collect.clone();
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("maxGroups", maxGroups);
+ visitor.visit("precision", precision);
+ visitor.visit("classify", classify);
+ visitor.visit("collect", collect);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Hit.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Hit.java
new file mode 100644
index 00000000000..8c5db8a6ecc
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Hit.java
@@ -0,0 +1,104 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Identifiable;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This class represents a generic hit with a rank value. Actual hits are represented using subclasses of this class.
+ *
+ * @author <a href="mailto:havardpe@yahoo-inc.com">Haavard Pettersen</a>
+ */
+public abstract class Hit extends Identifiable {
+
+ public static final int classId = registerClass(0x4000 + 94, Hit.class); // shared with c++
+ private Object context = null;
+ private double rank = 0.0;
+
+ /**
+ * Constructs an empty result node.
+ */
+ public Hit() {
+ // empty
+ }
+
+ /**
+ * Create a new hit with the given rank
+ *
+ * @param rank generic rank value
+ */
+ public Hit(double rank) {
+ this.rank = rank;
+ }
+
+ /**
+ * Obtain the rank of this hit. This is a comparable rank to allow multilevel sorting on arbitrary rank type.
+ *
+ * @return generic rank value
+ */
+ public double getRank() {
+ return rank;
+ }
+
+ /**
+ * Returns the context object of this hit.
+ *
+ * @return The context object.
+ */
+ public Object getContext() {
+ return context;
+ }
+
+ /**
+ * Sets the context object of this hit. This is not serialized, and is merely a tag used by the QRS.
+ *
+ * @param context The context to set.
+ * @return This, to allow chaining.
+ */
+ public Hit setContext(Object context) {
+ this.context = context;
+ return this;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putDouble(null, rank);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ rank = buf.getDouble(null);
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + (int)rank;
+ }
+
+ @SuppressWarnings({ "RedundantIfStatement", "EqualsWhichDoesntCheckParameterClass" })
+ @Override
+ public boolean equals(Object obj) {
+ if (!super.equals(obj)) {
+ return false;
+ }
+ Hit rhs = (Hit)obj;
+ if (rank != rhs.rank) {
+ return false;
+ }
+ if (!equals(context, rhs.context)) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("rank", rank);
+ visitor.visit("context", context);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/HitsAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/HitsAggregationResult.java
new file mode 100644
index 00000000000..6d5d95bbcc0
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/HitsAggregationResult.java
@@ -0,0 +1,218 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.searchlib.expression.FloatResultNode;
+import com.yahoo.searchlib.expression.ResultNode;
+import com.yahoo.text.Utf8;
+import com.yahoo.vespa.objects.*;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * This is an aggregated result holding the top n hits for a single group.
+ *
+ * @author <a href="mailto:havardpe@yahoo-inc.com">Haavard Pettersen</a>
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class HitsAggregationResult extends AggregationResult {
+
+ public static final int classId = registerClass(0x4000 + 87, HitsAggregationResult.class);
+ private String summaryClass = "default";
+ private int maxHits = -1;
+ private List<Hit> hits = new ArrayList<Hit>();
+
+ /**
+ * Constructs an empty result node.
+ */
+ public HitsAggregationResult() {
+ // empty
+ }
+
+ /**
+ * Create a hits aggregation result that will collect the given number of hits
+ *
+ * @param maxHits maximum number of hits to collect
+ */
+ public HitsAggregationResult(int maxHits) {
+ this.maxHits = maxHits;
+ }
+
+ /**
+ * Create a hits aggregation result that will collect the given number of hits of the summaryClass asked.
+ *
+ * @param maxHits maximum number of hits to collect
+ * @param summaryClass SummaryClass to use for hits to collect
+ */
+ public HitsAggregationResult(int maxHits, String summaryClass) {
+ this.summaryClass = summaryClass;
+ this.maxHits = maxHits;
+ }
+
+ /**
+ * Obtain the summary class used to collect the hits.
+ *
+ * @return The summary class id.
+ */
+ public String getSummaryClass() {
+ return summaryClass;
+ }
+
+ /**
+ * Obtain the maximum number of hits to collect.
+ *
+ * @return Max number of hits to collect.
+ */
+ public int getMaxHits() {
+ return maxHits;
+ }
+
+ /**
+ * Sets the summary class of hits to collect.
+ *
+ * @param summaryClass The summary class to collect.
+ * @return This, to allow chaining.
+ */
+ public HitsAggregationResult setSummaryClass(String summaryClass) {
+ this.summaryClass = summaryClass;
+ return this;
+ }
+
+ /**
+ * Sets the maximum number of hits to collect.
+ *
+ * @param maxHits The number of hits to collect.
+ * @return This, to allow chaining.
+ */
+ public HitsAggregationResult setMaxHits(int maxHits) {
+ this.maxHits = maxHits;
+ return this;
+ }
+
+ /**
+ * Obtain the hits collected by this aggregation result
+ *
+ * @return collected hits
+ */
+ public List<Hit> getHits() {
+ return hits;
+ }
+
+ /**
+ * Add a hit to this aggregation result
+ *
+ * @param h the hit
+ * @return this object
+ */
+ public HitsAggregationResult addHit(Hit h) {
+ hits.add(h);
+ return this;
+ }
+
+ @Override
+ public ResultNode getRank() {
+ if (hits.isEmpty()) {
+ return new FloatResultNode(0);
+ }
+ return new FloatResultNode(hits.get(0).getRank());
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ byte[] raw = Utf8.toBytes(summaryClass);
+ buf.putInt(null, raw.length);
+ buf.put(null, raw);
+
+ buf.putInt(null, maxHits);
+ int numHits = hits.size();
+ buf.putInt(null, numHits);
+ for (Hit h : hits) {
+ serializeOptional(buf, h);
+ }
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ summaryClass = getUtf8(buf);
+ maxHits = buf.getInt(null);
+ int numHits = buf.getInt(null);
+ for (int i = 0; i < numHits; i++) {
+ Hit h = (Hit)deserializeOptional(buf);
+ hits.add(h);
+ }
+ }
+
+ @Override
+ protected void onMerge(AggregationResult result) {
+ hits.addAll(((HitsAggregationResult)result).hits);
+ }
+
+ @Override
+ public void postMerge() {
+ Collections.sort(hits, new Comparator<Hit>() {
+ public int compare(Hit lhs, Hit rhs) {
+ return (lhs.getRank() > rhs.getRank()) ? -1 : (lhs.getRank() < rhs.getRank()) ? 1 : 0;
+ }
+ });
+ if ((maxHits >= 0) && (hits.size() > maxHits)) {
+ hits = hits.subList(0, maxHits);
+ }
+ }
+
+ @Override
+ protected boolean equalsAggregation(AggregationResult obj) {
+ HitsAggregationResult rhs = (HitsAggregationResult)obj;
+ if (!summaryClass.equals(rhs.summaryClass)) {
+ return false;
+ }
+ if (maxHits != rhs.maxHits) {
+ return false;
+ }
+ if (!hits.equals(rhs.hits)) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + summaryClass.hashCode() + maxHits + hits.hashCode();
+ }
+
+ @Override
+ public HitsAggregationResult clone() {
+ HitsAggregationResult obj = (HitsAggregationResult)super.clone();
+ obj.summaryClass = summaryClass;
+ obj.maxHits = maxHits;
+ obj.hits = new ArrayList<Hit>();
+ for (Hit hit : hits) {
+ obj.hits.add((Hit)hit.clone());
+ }
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("summaryClass", summaryClass);
+ visitor.visit("maxHits", maxHits);
+ visitor.visit("hits", hits);
+ }
+
+ @Override
+ public void selectMembers(ObjectPredicate predicate, ObjectOperation operation) {
+ for (Hit hit : hits) {
+ hit.select(predicate, operation);
+ }
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/MaxAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/MaxAggregationResult.java
new file mode 100644
index 00000000000..dba44dcf023
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/MaxAggregationResult.java
@@ -0,0 +1,103 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.searchlib.expression.ResultNode;
+import com.yahoo.searchlib.expression.SingleResultNode;
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This is an aggregated result holding the maximum result of the matching hits.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class MaxAggregationResult extends AggregationResult {
+
+ public static final int classId = registerClass(0x4000 + 83, MaxAggregationResult.class);
+ private SingleResultNode max;
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public MaxAggregationResult() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given max value.
+ *
+ * @param max The initial maximum to set.
+ */
+ public MaxAggregationResult(SingleResultNode max) {
+ setMax(max);
+ }
+
+ /**
+ * Returns the maximum value found in all matching hits.
+ *
+ * @return The value.
+ */
+ public final SingleResultNode getMax() {
+ return max;
+ }
+
+ /**
+ * Sets the maximum value found in all matching hits.
+ *
+ * @param max The value.
+ * @return This, to allow chaining.
+ */
+ public final MaxAggregationResult setMax(SingleResultNode max) {
+ this.max = max;
+ return this;
+ }
+
+ @Override
+ public ResultNode getRank() {
+ return max;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ serializeOptional(buf, max);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ max = (SingleResultNode)deserializeOptional(buf);
+ }
+
+ @Override
+ protected void onMerge(AggregationResult result) {
+ max.max(((MaxAggregationResult)result).max);
+ }
+
+ @Override
+ protected boolean equalsAggregation(AggregationResult obj) {
+ return equals(max, ((MaxAggregationResult)obj).max);
+ }
+
+ @Override
+ public MaxAggregationResult clone() {
+ MaxAggregationResult obj = (MaxAggregationResult)super.clone();
+ if (max != null) {
+ obj.max = (SingleResultNode)max.clone();
+ }
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("max", max);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/MinAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/MinAggregationResult.java
new file mode 100644
index 00000000000..ca8c71e6ede
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/MinAggregationResult.java
@@ -0,0 +1,103 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.searchlib.expression.ResultNode;
+import com.yahoo.searchlib.expression.SingleResultNode;
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This is an aggregated result holding the minimum result of the matching hits.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class MinAggregationResult extends AggregationResult {
+
+ public static final int classId = registerClass(0x4000 + 84, MinAggregationResult.class);
+ private SingleResultNode min;
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public MinAggregationResult() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given min value.
+ *
+ * @param min The initial minimum to set.
+ */
+ public MinAggregationResult(SingleResultNode min) {
+ setMin(min);
+ }
+
+ /**
+ * Returns the minimum value found in all matching hits.
+ *
+ * @return The value.
+ */
+ public final SingleResultNode getMin() {
+ return min;
+ }
+
+ /**
+ * Sets the minimum value found in all matching hits.
+ *
+ * @param min The value.
+ * @return This, to allow chaining.
+ */
+ public final MinAggregationResult setMin(SingleResultNode min) {
+ this.min = min;
+ return this;
+ }
+
+ @Override
+ public ResultNode getRank() {
+ return min;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ serializeOptional(buf, min);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ min = (SingleResultNode)deserializeOptional(buf);
+ }
+
+ @Override
+ protected void onMerge(AggregationResult result) {
+ min.min(((MinAggregationResult)result).min);
+ }
+
+ @Override
+ protected boolean equalsAggregation(AggregationResult obj) {
+ return equals(min, ((MinAggregationResult)obj).min);
+ }
+
+ @Override
+ public MinAggregationResult clone() {
+ MinAggregationResult obj = (MinAggregationResult)super.clone();
+ if (min != null) {
+ obj.min = (SingleResultNode)min.clone();
+ }
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("min", min);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/RawData.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/RawData.java
new file mode 100755
index 00000000000..7c9dd33477b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/RawData.java
@@ -0,0 +1,130 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.util.Arrays;
+
+/**
+ * <p>This class encapsulates a byte array into a cloneable and comparable object. It also implements a sane {@link
+ * #hashCode()} and {@link #toString()}.</p>
+ *
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class RawData implements Cloneable, Comparable<RawData> {
+
+ private byte[] data;
+
+ /**
+ * <p>Constructs an empty data object.</p>
+ */
+ public RawData() {
+ data = new byte[0];
+ }
+
+ /**
+ * <p>Constructs a raw data object that holds the given byte array.</p>
+ *
+ * @param data The rank to set.
+ */
+ public RawData(byte[] data) {
+ setData(data);
+ }
+
+ /**
+ * <p>Serializes the content of this data into the given byte buffer.</p>
+ *
+ * @param buf The buffer to serialize to.
+ */
+ public void serialize(Serializer buf) {
+ buf.putInt(null, data.length);
+ buf.put(null, data);
+ }
+
+ /**
+ * <p>Deserializes the content for this data from the given byte buffer.</p>
+ *
+ * @param buf The buffer to deserialize from.
+ */
+ public void deserialize(Deserializer buf) {
+ int len = buf.getInt(null);
+ data = buf.getBytes(null, len);
+ }
+
+ /**
+ * <p>Returns the byte array that constitutes this data.</p>
+ *
+ * @return The byte array.
+ */
+ public byte[] getData() {
+ return data;
+ }
+
+ /**
+ * <p>Sets the byte array that constitutes this data. This does <b>not</b> copy the given array, it simply assigns
+ * it to this.</p>
+ *
+ * @param data The data to set.
+ * @return This, to allow chaining.
+ */
+ public RawData setData(byte[] data) {
+ if (data == null) {
+ throw new IllegalArgumentException("Data can not be null.");
+ }
+ this.data = data;
+ return this;
+ }
+
+ @Override
+ public int compareTo(RawData rhs) {
+ return compare(data, rhs.data);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof RawData)) {
+ return false;
+ }
+ RawData rhs = (RawData)obj;
+ if (!Arrays.equals(data, rhs.data)) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return Arrays.hashCode(data);
+ }
+
+ @Override
+ public String toString() {
+ return "RawData(data = " + Arrays.toString(data) + ")";
+ }
+
+ @Override
+ public Object clone() {
+ return new RawData(Arrays.copyOf(data, data.length));
+ }
+
+ /**
+ * <p>Implements comparison of two byte arrays.</p>
+ *
+ * @param lhs The left-hand-side of the comparison.
+ * @param rhs The right-hand-side of the comparison.
+ * @return The result of comparing the two byte arrays.
+ */
+ public static int compare(byte[] lhs, byte[] rhs) {
+ int cmp = 0;
+ for (int i = 0, len = Math.min(lhs.length, rhs.length); (i < len) && (cmp == 0); i++) {
+ int a = lhs[i] & 0xFF;
+ int b = rhs[i] & 0xFF;
+ cmp = a - b;
+ }
+ if (cmp == 0) {
+ cmp = lhs.length - rhs.length;
+ }
+ return cmp;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/SumAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/SumAggregationResult.java
new file mode 100644
index 00000000000..88e61d98ba0
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/SumAggregationResult.java
@@ -0,0 +1,103 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.searchlib.expression.ResultNode;
+import com.yahoo.searchlib.expression.SingleResultNode;
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This is an aggregated result holding the sum of the aggregating expression for all matching hits.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class SumAggregationResult extends AggregationResult {
+
+ public static final int classId = registerClass(0x4000 + 82, SumAggregationResult.class);
+ private SingleResultNode sum;
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public SumAggregationResult() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given sum.
+ *
+ * @param sum The initial sum to set.
+ */
+ public SumAggregationResult(SingleResultNode sum) {
+ setSum(sum);
+ }
+
+ /**
+ * Returns the sum of all results in this.
+ *
+ * @return The numeric sum.
+ */
+ public final SingleResultNode getSum() {
+ return sum;
+ }
+
+ /**
+ * Sets the sum of all results in this.
+ *
+ * @param sum The sum to set.
+ * @return This, to allow chaining.
+ */
+ public final SumAggregationResult setSum(SingleResultNode sum) {
+ this.sum = sum;
+ return this;
+ }
+
+ @Override
+ public ResultNode getRank() {
+ return sum;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ serializeOptional(buf, sum);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ sum = (SingleResultNode)deserializeOptional(buf);
+ }
+
+ @Override
+ protected void onMerge(AggregationResult result) {
+ sum.add(((SumAggregationResult)result).sum);
+ }
+
+ @Override
+ public SumAggregationResult clone() {
+ SumAggregationResult obj = (SumAggregationResult)super.clone();
+ if (sum != null) {
+ obj.sum = (SingleResultNode)sum.clone();
+ }
+ return obj;
+ }
+
+ @Override
+ protected boolean equalsAggregation(AggregationResult obj) {
+ return equals(sum, ((SumAggregationResult)obj).sum);
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("sum", sum);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/VdsHit.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/VdsHit.java
new file mode 100644
index 00000000000..adecdee8401
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/VdsHit.java
@@ -0,0 +1,91 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.text.Utf8;
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+public class VdsHit extends Hit {
+
+ public static final int classId = registerClass(0x4000 + 96, VdsHit.class);
+ private String docId = "";
+ private RawData summary = new RawData();
+
+ @SuppressWarnings("UnusedDeclaration")
+ public VdsHit() {
+ // user by deserializer
+ }
+
+ /**
+ * Create a hit with the given path and document id.
+ *
+ * @param summary The summary blob standard fs4 coding.
+ * @param docId The local document id.
+ * @param rank The rank of this hit.
+ */
+ public VdsHit(String docId, byte[] summary, double rank) {
+ super(rank);
+ this.docId = docId;
+ this.summary = new RawData(summary);
+ }
+
+ /**
+ * Obtain the summary blob for this hit.
+ *
+ * @return The summary blob.
+ */
+ public RawData getSummary() {
+ return summary;
+ }
+
+ /**
+ * Obtain the local document id of this hit.
+ *
+ * @return The local document id.
+ */
+ public String getDocId() {
+ return docId;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ byte[] utf8 = Utf8.toBytes(docId);
+ buf.putInt(null, utf8.length);
+ buf.put(null, utf8);
+ summary.serialize(buf);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ docId = getUtf8(buf);
+ summary.deserialize(buf);
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + docId.hashCode() + summary.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ VdsHit rhs = (VdsHit)obj;
+ return super.equals(obj) &&
+ docId.equals(rhs.docId) &&
+ summary.equals(rhs.summary);
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("docId", docId);
+ visitor.visit("summary", summary);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/XorAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/XorAggregationResult.java
new file mode 100644
index 00000000000..ee171be0c4b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/XorAggregationResult.java
@@ -0,0 +1,99 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.searchlib.expression.IntegerResultNode;
+import com.yahoo.searchlib.expression.ResultNode;
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This is an aggregated result holding the xor of the aggregating expression for all matching hits.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class XorAggregationResult extends AggregationResult {
+
+ public static final int classId = registerClass(0x4000 + 86, XorAggregationResult.class);
+ private long xor = 0;
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public XorAggregationResult() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given xor value.
+ *
+ * @param xor The initial xor value to set.
+ */
+ public XorAggregationResult(long xor) {
+ setXor(xor);
+ }
+
+ /**
+ * Returns the current xor value.
+ *
+ * @return The value.
+ */
+ public long getXor() {
+ return xor;
+ }
+
+ /**
+ * Sets the current xor value.
+ *
+ * @param xor The value to set.
+ * @return This, to allow chaining.
+ */
+ public XorAggregationResult setXor(long xor) {
+ this.xor = xor;
+ return this;
+ }
+
+ @Override
+ public ResultNode getRank() {
+ return new IntegerResultNode(xor);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putLong(null, xor);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ xor = buf.getLong(null);
+ }
+
+ @Override
+ protected void onMerge(AggregationResult result) {
+ xor = xor ^ ((XorAggregationResult)result).xor;
+ }
+
+ @Override
+ protected boolean equalsAggregation(AggregationResult obj) {
+ return xor == ((XorAggregationResult)obj).xor;
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + (int)xor;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("xor", xor);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/BiasEstimator.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/BiasEstimator.java
new file mode 100644
index 00000000000..54651bdfae4
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/BiasEstimator.java
@@ -0,0 +1,131 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation.hll;
+
+import com.google.common.base.Preconditions;
+
+import java.util.Arrays;
+
+/**
+ * Performs bias correction for a given precision and raw estimate.
+ * The values are taken from Google's HLL++ paper:
+ * https://docs.google.com/document/d/1gyjfMHy43U9OWBXxfaeG-3MjGzejW1dlpyMwEYAAWEI/view?fullscreen#
+ *
+ * @author bjorncs
+ */
+public class BiasEstimator {
+ // Raw estimate data for given precision
+ private final double[] rawEstimateData;
+ // Raw bias data for a given precision
+ private final double[] biasData;
+
+ /**
+ * Constructs the BiasEstimator for a given HLL precision.
+ *
+ * @param precision HLL precision
+ */
+ public BiasEstimator(int precision) {
+ Preconditions.checkArgument(precision >= 4 && precision <= 18,
+ "Invalid precision: %s. Expected 4 <= precision <= 18.", precision);
+ this.rawEstimateData = rawEstimateDataAllPrecisions[precision - 4];
+ this.biasData = biasDataAllPrecisions[precision - 4];
+ }
+
+ /**
+ * Maps a given raw estimate to a bias correction value. The callee should subtract the bias from the raw estimate
+ * to get a bias corrected HLL estimate. Uses linear interpolation when no exact value exist.
+ *
+ * @param rawEstimate The raw HLL estimate
+ * @return The estimated bias for the given raw estimate.
+ */
+ public double estimateBias(double rawEstimate) {
+ int index = Arrays.binarySearch(rawEstimateData, rawEstimate);
+ // Check if the value is in rawEstimate or not.
+ if (index >= 0) {
+ return biasData[index];
+ } else {
+ int insertionIndex = -index - 1;
+ if (insertionIndex == 0) {
+ return biasData[0];
+ } else if (insertionIndex == biasData.length) {
+ return biasData[biasData.length - 1];
+ } else {
+ //Perform linear interpolation
+ double x0 = rawEstimateData[insertionIndex - 1];
+ double x1 = rawEstimateData[insertionIndex];
+ double f0 = biasData[insertionIndex - 1];
+ double f1 = biasData[insertionIndex];
+ return linearInterpolationOf(x0, x1, f0, f1, rawEstimate);
+ }
+ }
+ }
+
+ private static double linearInterpolationOf(double x0, double x1, double f0, double f1, double x) {
+ return f0 + (f1 - f0) / (x1 - x0) * (x - x0);
+ }
+
+ private static final double[][] rawEstimateDataAllPrecisions = {
+ // precision 4
+ { 11, 11.717, 12.207, 12.7896, 13.2882, 13.8204, 14.3772, 14.9342, 15.5202, 16.161, 16.7722, 17.4636, 18.0396, 18.6766, 19.3566, 20.0454, 20.7936, 21.4856, 22.2666, 22.9946, 23.766, 24.4692, 25.3638, 26.0764, 26.7864, 27.7602, 28.4814, 29.433, 30.2926, 31.0664, 31.9996, 32.7956, 33.5366, 34.5894, 35.5738, 36.2698, 37.3682, 38.0544, 39.2342, 40.0108, 40.7966, 41.9298, 42.8704, 43.6358, 44.5194, 45.773, 46.6772, 47.6174, 48.4888, 49.3304, 50.2506, 51.4996, 52.3824, 53.3078, 54.3984, 55.5838, 56.6618, 57.2174, 58.3514, 59.0802, 60.1482, 61.0376, 62.3598, 62.8078, 63.9744, 64.914, 65.781, 67.1806, 68.0594, 68.8446, 69.7928, 70.8248, 71.8324, 72.8598, 73.6246, 74.7014, 75.393, 76.6708, 77.2394, },
+ // precision 5
+ { 23, 23.1194, 23.8208, 24.2318, 24.77, 25.2436, 25.7774, 26.2848, 26.8224, 27.3742, 27.9336, 28.503, 29.0494, 29.6292, 30.2124, 30.798, 31.367, 31.9728, 32.5944, 33.217, 33.8438, 34.3696, 35.0956, 35.7044, 36.324, 37.0668, 37.6698, 38.3644, 39.049, 39.6918, 40.4146, 41.082, 41.687, 42.5398, 43.2462, 43.857, 44.6606, 45.4168, 46.1248, 46.9222, 47.6804, 48.447, 49.3454, 49.9594, 50.7636, 51.5776, 52.331, 53.19, 53.9676, 54.7564, 55.5314, 56.4442, 57.3708, 57.9774, 58.9624, 59.8796, 60.755, 61.472, 62.2076, 63.1024, 63.8908, 64.7338, 65.7728, 66.629, 67.413, 68.3266, 69.1524, 70.2642, 71.1806, 72.0566, 72.9192, 73.7598, 74.3516, 75.5802, 76.4386, 77.4916, 78.1524, 79.1892, 79.8414, 80.8798, 81.8376, 82.4698, 83.7656, 84.331, 85.5914, 86.6012, 87.7016, 88.5582, 89.3394, 90.3544, 91.4912, 92.308, 93.3552, 93.9746, 95.2052, 95.727, 97.1322, 98.3944, 98.7588, 100.242, 101.1914, 102.2538, 102.8776, 103.6292, 105.1932, 105.9152, 107.0868, 107.6728, 108.7144, 110.3114, 110.8716, 111.245, 112.7908, 113.7064, 114.636, 115.7464, 116.1788, 117.7464, 118.4896, 119.6166, 120.5082, 121.7798, 122.9028, 123.4426, 124.8854, 125.705, 126.4652, 128.3464, 128.3462, 130.0398, 131.0342, 131.0042, 132.4766, 133.511, 134.7252, 135.425, 136.5172, 138.0572, 138.6694, 139.3712, 140.8598, 141.4594, 142.554, 143.4006, 144.7374, 146.1634, 146.8994, 147.605, 147.9304, 149.1636, 150.2468, 151.5876, 152.2096, 153.7032, 154.7146, 155.807, 156.9228, 157.0372, 158.5852, },
+ // precision 6
+ { 46, 46.1902, 47.271, 47.8358, 48.8142, 49.2854, 50.317, 51.354, 51.8924, 52.9436, 53.4596, 54.5262, 55.6248, 56.1574, 57.2822, 57.837, 58.9636, 60.074, 60.7042, 61.7976, 62.4772, 63.6564, 64.7942, 65.5004, 66.686, 67.291, 68.5672, 69.8556, 70.4982, 71.8204, 72.4252, 73.7744, 75.0786, 75.8344, 77.0294, 77.8098, 79.0794, 80.5732, 81.1878, 82.5648, 83.2902, 84.6784, 85.3352, 86.8946, 88.3712, 89.0852, 90.499, 91.2686, 92.6844, 94.2234, 94.9732, 96.3356, 97.2286, 98.7262, 100.3284, 101.1048, 102.5962, 103.3562, 105.1272, 106.4184, 107.4974, 109.0822, 109.856, 111.48, 113.2834, 114.0208, 115.637, 116.5174, 118.0576, 119.7476, 120.427, 122.1326, 123.2372, 125.2788, 126.6776, 127.7926, 129.1952, 129.9564, 131.6454, 133.87, 134.5428, 136.2, 137.0294, 138.6278, 139.6782, 141.792, 143.3516, 144.2832, 146.0394, 147.0748, 148.4912, 150.849, 151.696, 153.5404, 154.073, 156.3714, 157.7216, 158.7328, 160.4208, 161.4184, 163.9424, 165.2772, 166.411, 168.1308, 168.769, 170.9258, 172.6828, 173.7502, 175.706, 176.3886, 179.0186, 180.4518, 181.927, 183.4172, 184.4114, 186.033, 188.5124, 189.5564, 191.6008, 192.4172, 193.8044, 194.997, 197.4548, 198.8948, 200.2346, 202.3086, 203.1548, 204.8842, 206.6508, 206.6772, 209.7254, 210.4752, 212.7228, 214.6614, 215.1676, 217.793, 218.0006, 219.9052, 221.66, 223.5588, 225.1636, 225.6882, 227.7126, 229.4502, 231.1978, 232.9756, 233.1654, 236.727, 238.1974, 237.7474, 241.1346, 242.3048, 244.1948, 245.3134, 246.879, 249.1204, 249.853, 252.6792, 253.857, 254.4486, 257.2362, 257.9534, 260.0286, 260.5632, 262.663, 264.723, 265.7566, 267.2566, 267.1624, 270.62, 272.8216, 273.2166, 275.2056, 276.2202, 278.3726, 280.3344, 281.9284, 283.9728, 284.1924, 286.4872, 287.587, 289.807, 291.1206, 292.769, 294.8708, 296.665, 297.1182, 299.4012, 300.6352, 302.1354, 304.1756, 306.1606, 307.3462, 308.5214, 309.4134, 310.8352, 313.9684, 315.837, 316.7796, 318.9858, },
+ // precision 7
+ { 92, 93.4934, 94.9758, 96.4574, 97.9718, 99.4954, 101.5302, 103.0756, 104.6374, 106.1782, 107.7888, 109.9522, 111.592, 113.2532, 114.9086, 116.5938, 118.9474, 120.6796, 122.4394, 124.2176, 125.9768, 128.4214, 130.2528, 132.0102, 133.8658, 135.7278, 138.3044, 140.1316, 142.093, 144.0032, 145.9092, 148.6306, 150.5294, 152.5756, 154.6508, 156.662, 159.552, 161.3724, 163.617, 165.5754, 167.7872, 169.8444, 172.7988, 174.8606, 177.2118, 179.3566, 181.4476, 184.5882, 186.6816, 189.0824, 191.0258, 193.6048, 196.4436, 198.7274, 200.957, 203.147, 205.4364, 208.7592, 211.3386, 213.781, 215.8028, 218.656, 221.6544, 223.996, 226.4718, 229.1544, 231.6098, 234.5956, 237.0616, 239.5758, 242.4878, 244.5244, 248.2146, 250.724, 252.8722, 255.5198, 258.0414, 261.941, 264.9048, 266.87, 269.4304, 272.028, 274.4708, 278.37, 281.0624, 283.4668, 286.5532, 289.4352, 293.2564, 295.2744, 298.2118, 300.7472, 304.1456, 307.2928, 309.7504, 312.5528, 315.979, 318.2102, 322.1834, 324.3494, 327.325, 330.6614, 332.903, 337.2544, 339.9042, 343.215, 345.2864, 348.0814, 352.6764, 355.301, 357.139, 360.658, 363.1732, 366.5902, 369.9538, 373.0828, 375.922, 378.9902, 382.7328, 386.4538, 388.1136, 391.2234, 394.0878, 396.708, 401.1556, 404.1852, 406.6372, 409.6822, 412.7796, 416.6078, 418.4916, 422.131, 424.5376, 428.1988, 432.211, 434.4502, 438.5282, 440.912, 444.0448, 447.7432, 450.8524, 453.7988, 456.7858, 458.8868, 463.9886, 466.5064, 468.9124, 472.6616, 475.4682, 478.582, 481.304, 485.2738, 488.6894, 490.329, 496.106, 497.6908, 501.1374, 504.5322, 506.8848, 510.3324, 513.4512, 516.179, 520.4412, 522.6066, 526.167, 528.7794, 533.379, 536.067, 538.46, 542.9116, 545.692, 547.9546, 552.493, 555.2722, 557.335, 562.449, 564.2014, 569.0738, 571.0974, 574.8564, 578.2996, 581.409, 583.9704, 585.8098, 589.6528, 594.5998, 595.958, 600.068, 603.3278, 608.2016, 609.9632, 612.864, 615.43, 620.7794, 621.272, 625.8644, 629.206, 633.219, 634.5154, 638.6102, },
+ // precision 8
+ { 184.2152, 187.2454, 190.2096, 193.6652, 196.6312, 199.6822, 203.249, 206.3296, 210.0038, 213.2074, 216.4612, 220.27, 223.5178, 227.4412, 230.8032, 234.1634, 238.1688, 241.6074, 245.6946, 249.2664, 252.8228, 257.0432, 260.6824, 264.9464, 268.6268, 272.2626, 276.8376, 280.4034, 284.8956, 288.8522, 292.7638, 297.3552, 301.3556, 305.7526, 309.9292, 313.8954, 318.8198, 322.7668, 327.298, 331.6688, 335.9466, 340.9746, 345.1672, 349.3474, 354.3028, 358.8912, 364.114, 368.4646, 372.9744, 378.4092, 382.6022, 387.843, 392.5684, 397.1652, 402.5426, 407.4152, 412.5388, 417.3592, 422.1366, 427.486, 432.3918, 437.5076, 442.509, 447.3834, 453.3498, 458.0668, 463.7346, 469.1228, 473.4528, 479.7, 484.644, 491.0518, 495.5774, 500.9068, 506.432, 512.1666, 517.434, 522.6644, 527.4894, 533.6312, 538.3804, 544.292, 550.5496, 556.0234, 562.8206, 566.6146, 572.4188, 579.117, 583.6762, 590.6576, 595.7864, 601.509, 607.5334, 612.9204, 619.772, 624.2924, 630.8654, 636.1836, 642.745, 649.1316, 655.0386, 660.0136, 666.6342, 671.6196, 678.1866, 684.4282, 689.3324, 695.4794, 702.5038, 708.129, 713.528, 720.3204, 726.463, 732.7928, 739.123, 744.7418, 751.2192, 756.5102, 762.6066, 769.0184, 775.2224, 781.4014, 787.7618, 794.1436, 798.6506, 805.6378, 811.766, 819.7514, 824.5776, 828.7322, 837.8048, 843.6302, 849.9336, 854.4798, 861.3388, 867.9894, 873.8196, 880.3136, 886.2308, 892.4588, 899.0816, 905.4076, 912.0064, 917.3878, 923.619, 929.998, 937.3482, 943.9506, 947.991, 955.1144, 962.203, 968.8222, 975.7324, 981.7826, 988.7666, 994.2648, 1000.3128, 1007.4082, 1013.7536, 1020.3376, 1026.7156, 1031.7478, 1037.4292, 1045.393, 1051.2278, 1058.3434, 1062.8726, 1071.884, 1076.806, 1082.9176, 1089.1678, 1095.5032, 1102.525, 1107.2264, 1115.315, 1120.93, 1127.252, 1134.1496, 1139.0408, 1147.5448, 1153.3296, 1158.1974, 1166.5262, 1174.3328, 1175.657, 1184.4222, 1190.9172, 1197.1292, 1204.4606, 1210.4578, 1218.8728, 1225.3336, 1226.6592, 1236.5768, 1241.363, 1249.4074, 1254.6566, 1260.8014, 1266.5454, 1274.5192, },
+ // precision 9
+ { 369, 374.8294, 381.2452, 387.6698, 394.1464, 400.2024, 406.8782, 413.6598, 420.462, 427.2826, 433.7102, 440.7416, 447.9366, 455.1046, 462.285, 469.0668, 476.306, 483.8448, 491.301, 498.9886, 506.2422, 513.8138, 521.7074, 529.7428, 537.8402, 545.1664, 553.3534, 561.594, 569.6886, 577.7876, 585.65, 594.228, 602.8036, 611.1666, 620.0818, 628.0824, 637.2574, 646.302, 655.1644, 664.0056, 672.3802, 681.7192, 690.5234, 700.2084, 708.831, 718.485, 728.1112, 737.4764, 746.76, 756.3368, 766.5538, 775.5058, 785.2646, 795.5902, 804.3818, 814.8998, 824.9532, 835.2062, 845.2798, 854.4728, 864.9582, 875.3292, 886.171, 896.781, 906.5716, 916.7048, 927.5322, 937.875, 949.3972, 958.3464, 969.7274, 980.2834, 992.1444, 1003.4264, 1013.0166, 1024.018, 1035.0438, 1046.34, 1057.6856, 1068.9836, 1079.0312, 1091.677, 1102.3188, 1113.4846, 1124.4424, 1135.739, 1147.1488, 1158.9202, 1169.406, 1181.5342, 1193.2834, 1203.8954, 1216.3286, 1226.2146, 1239.6684, 1251.9946, 1262.123, 1275.4338, 1285.7378, 1296.076, 1308.9692, 1320.4964, 1333.0998, 1343.9864, 1357.7754, 1368.3208, 1380.4838, 1392.7388, 1406.0758, 1416.9098, 1428.9728, 1440.9228, 1453.9292, 1462.617, 1476.05, 1490.2996, 1500.6128, 1513.7392, 1524.5174, 1536.6322, 1548.2584, 1562.3766, 1572.423, 1587.1232, 1596.5164, 1610.5938, 1622.5972, 1633.1222, 1647.7674, 1658.5044, 1671.57, 1683.7044, 1695.4142, 1708.7102, 1720.6094, 1732.6522, 1747.841, 1756.4072, 1769.9786, 1782.3276, 1797.5216, 1808.3186, 1819.0694, 1834.354, 1844.575, 1856.2808, 1871.1288, 1880.7852, 1893.9622, 1906.3418, 1920.6548, 1932.9302, 1945.8584, 1955.473, 1968.8248, 1980.6446, 1995.9598, 2008.349, 2019.8556, 2033.0334, 2044.0206, 2059.3956, 2069.9174, 2082.6084, 2093.7036, 2106.6108, 2118.9124, 2132.301, 2144.7628, 2159.8422, 2171.0212, 2183.101, 2193.5112, 2208.052, 2221.3194, 2233.3282, 2247.295, 2257.7222, 2273.342, 2286.5638, 2299.6786, 2310.8114, 2322.3312, 2335.516, 2349.874, 2363.5968, 2373.865, 2387.1918, 2401.8328, 2414.8496, 2424.544, 2436.7592, 2447.1682, 2464.1958, 2474.3438, 2489.0006, 2497.4526, 2513.6586, 2527.19, 2540.7028, 2553.768, },
+ // precision 10
+ { 738.1256, 750.4234, 763.1064, 775.4732, 788.4636, 801.0644, 814.488, 827.9654, 841.0832, 854.7864, 868.1992, 882.2176, 896.5228, 910.1716, 924.7752, 938.899, 953.6126, 968.6492, 982.9474, 998.5214, 1013.1064, 1028.6364, 1044.2468, 1059.4588, 1075.3832, 1091.0584, 1106.8606, 1123.3868, 1139.5062, 1156.1862, 1172.463, 1189.339, 1206.1936, 1223.1292, 1240.1854, 1257.2908, 1275.3324, 1292.8518, 1310.5204, 1328.4854, 1345.9318, 1364.552, 1381.4658, 1400.4256, 1419.849, 1438.152, 1456.8956, 1474.8792, 1494.118, 1513.62, 1532.5132, 1551.9322, 1570.7726, 1590.6086, 1610.5332, 1630.5918, 1650.4294, 1669.7662, 1690.4106, 1710.7338, 1730.9012, 1750.4486, 1770.1556, 1791.6338, 1812.7312, 1833.6264, 1853.9526, 1874.8742, 1896.8326, 1918.1966, 1939.5594, 1961.07, 1983.037, 2003.1804, 2026.071, 2047.4884, 2070.0848, 2091.2944, 2114.333, 2135.9626, 2158.2902, 2181.0814, 2202.0334, 2224.4832, 2246.39, 2269.7202, 2292.1714, 2314.2358, 2338.9346, 2360.891, 2384.0264, 2408.3834, 2430.1544, 2454.8684, 2476.9896, 2501.4368, 2522.8702, 2548.0408, 2570.6738, 2593.5208, 2617.0158, 2640.2302, 2664.0962, 2687.4986, 2714.2588, 2735.3914, 2759.6244, 2781.8378, 2808.0072, 2830.6516, 2856.2454, 2877.2136, 2903.4546, 2926.785, 2951.2294, 2976.468, 3000.867, 3023.6508, 3049.91, 3073.5984, 3098.162, 3121.5564, 3146.2328, 3170.9484, 3195.5902, 3221.3346, 3242.7032, 3271.6112, 3296.5546, 3317.7376, 3345.072, 3369.9518, 3394.326, 3418.1818, 3444.6926, 3469.086, 3494.2754, 3517.8698, 3544.248, 3565.3768, 3588.7234, 3616.979, 3643.7504, 3668.6812, 3695.72, 3719.7392, 3742.6224, 3770.4456, 3795.6602, 3819.9058, 3844.002, 3869.517, 3895.6824, 3920.8622, 3947.1364, 3973.985, 3995.4772, 4021.62, 4046.628, 4074.65, 4096.2256, 4121.831, 4146.6406, 4173.276, 4195.0744, 4223.9696, 4251.3708, 4272.9966, 4300.8046, 4326.302, 4353.1248, 4374.312, 4403.0322, 4426.819, 4450.0598, 4478.5206, 4504.8116, 4528.8928, 4553.9584, 4578.8712, 4603.8384, 4632.3872, 4655.5128, 4675.821, 4704.6222, 4731.9862, 4755.4174, 4781.2628, 4804.332, 4832.3048, 4862.8752, 4883.4148, 4906.9544, 4935.3516, 4954.3532, 4984.0248, 5011.217, 5035.3258, 5057.3672, 5084.1828, },
+ // precision 11
+ { 1477, 1501.6014, 1526.5802, 1551.7942, 1577.3042, 1603.2062, 1629.8402, 1656.2292, 1682.9462, 1709.9926, 1737.3026, 1765.4252, 1793.0578, 1821.6092, 1849.626, 1878.5568, 1908.527, 1937.5154, 1967.1874, 1997.3878, 2027.37, 2058.1972, 2089.5728, 2120.1012, 2151.9668, 2183.292, 2216.0772, 2247.8578, 2280.6562, 2313.041, 2345.714, 2380.3112, 2414.1806, 2447.9854, 2481.656, 2516.346, 2551.5154, 2586.8378, 2621.7448, 2656.6722, 2693.5722, 2729.1462, 2765.4124, 2802.8728, 2838.898, 2876.408, 2913.4926, 2951.4938, 2989.6776, 3026.282, 3065.7704, 3104.1012, 3143.7388, 3181.6876, 3221.1872, 3261.5048, 3300.0214, 3339.806, 3381.409, 3421.4144, 3461.4294, 3502.2286, 3544.651, 3586.6156, 3627.337, 3670.083, 3711.1538, 3753.5094, 3797.01, 3838.6686, 3882.1678, 3922.8116, 3967.9978, 4009.9204, 4054.3286, 4097.5706, 4140.6014, 4185.544, 4229.5976, 4274.583, 4316.9438, 4361.672, 4406.2786, 4451.8628, 4496.1834, 4543.505, 4589.1816, 4632.5188, 4678.2294, 4724.8908, 4769.0194, 4817.052, 4861.4588, 4910.1596, 4956.4344, 5002.5238, 5048.13, 5093.6374, 5142.8162, 5187.7894, 5237.3984, 5285.6078, 5331.0858, 5379.1036, 5428.6258, 5474.6018, 5522.7618, 5571.5822, 5618.59, 5667.9992, 5714.88, 5763.454, 5808.6982, 5860.3644, 5910.2914, 5953.571, 6005.9232, 6055.1914, 6104.5882, 6154.5702, 6199.7036, 6251.1764, 6298.7596, 6350.0302, 6398.061, 6448.4694, 6495.933, 6548.0474, 6597.7166, 6646.9416, 6695.9208, 6742.6328, 6793.5276, 6842.1934, 6894.2372, 6945.3864, 6996.9228, 7044.2372, 7094.1374, 7142.2272, 7192.2942, 7238.8338, 7288.9006, 7344.0908, 7394.8544, 7443.5176, 7490.4148, 7542.9314, 7595.6738, 7641.9878, 7694.3688, 7743.0448, 7797.522, 7845.53, 7899.594, 7950.3132, 7996.455, 8050.9442, 8092.9114, 8153.1374, 8197.4472, 8252.8278, 8301.8728, 8348.6776, 8401.4698, 8453.551, 8504.6598, 8553.8944, 8604.1276, 8657.6514, 8710.3062, 8758.908, 8807.8706, 8862.1702, 8910.4668, 8960.77, 9007.2766, 9063.164, 9121.0534, 9164.1354, 9218.1594, 9267.767, 9319.0594, 9372.155, 9419.7126, 9474.3722, 9520.1338, 9572.368, 9622.7702, 9675.8448, 9726.5396, 9778.7378, 9827.6554, 9878.1922, 9928.7782, 9978.3984, 10026.578, 10076.5626, 10137.1618, 10177.5244, 10229.9176, },
+ // precision 12
+ { 2954, 3003.4782, 3053.3568, 3104.3666, 3155.324, 3206.9598, 3259.648, 3312.539, 3366.1474, 3420.2576, 3474.8376, 3530.6076, 3586.451, 3643.38, 3700.4104, 3757.5638, 3815.9676, 3875.193, 3934.838, 3994.8548, 4055.018, 4117.1742, 4178.4482, 4241.1294, 4304.4776, 4367.4044, 4431.8724, 4496.3732, 4561.4304, 4627.5326, 4693.949, 4761.5532, 4828.7256, 4897.6182, 4965.5186, 5034.4528, 5104.865, 5174.7164, 5244.6828, 5316.6708, 5387.8312, 5459.9036, 5532.476, 5604.8652, 5679.6718, 5753.757, 5830.2072, 5905.2828, 5980.0434, 6056.6264, 6134.3192, 6211.5746, 6290.0816, 6367.1176, 6447.9796, 6526.5576, 6606.1858, 6686.9144, 6766.1142, 6847.0818, 6927.9664, 7010.9096, 7091.0816, 7175.3962, 7260.3454, 7344.018, 7426.4214, 7511.3106, 7596.0686, 7679.8094, 7765.818, 7852.4248, 7936.834, 8022.363, 8109.5066, 8200.4554, 8288.5832, 8373.366, 8463.4808, 8549.7682, 8642.0522, 8728.3288, 8820.9528, 8907.727, 9001.0794, 9091.2522, 9179.988, 9269.852, 9362.6394, 9453.642, 9546.9024, 9640.6616, 9732.6622, 9824.3254, 9917.7484, 10007.9392, 10106.7508, 10196.2152, 10289.8114, 10383.5494, 10482.3064, 10576.8734, 10668.7872, 10764.7156, 10862.0196, 10952.793, 11049.9748, 11146.0702, 11241.4492, 11339.2772, 11434.2336, 11530.741, 11627.6136, 11726.311, 11821.5964, 11918.837, 12015.3724, 12113.0162, 12213.0424, 12306.9804, 12408.4518, 12504.8968, 12604.586, 12700.9332, 12798.705, 12898.5142, 12997.0488, 13094.788, 13198.475, 13292.7764, 13392.9698, 13486.8574, 13590.1616, 13686.5838, 13783.6264, 13887.2638, 13992.0978, 14081.0844, 14189.9956, 14280.0912, 14382.4956, 14486.4384, 14588.1082, 14686.2392, 14782.276, 14888.0284, 14985.1864, 15088.8596, 15187.0998, 15285.027, 15383.6694, 15495.8266, 15591.3736, 15694.2008, 15790.3246, 15898.4116, 15997.4522, 16095.5014, 16198.8514, 16291.7492, 16402.6424, 16499.1266, 16606.2436, 16697.7186, 16796.3946, 16902.3376, 17005.7672, 17100.814, 17206.8282, 17305.8262, 17416.0744, 17508.4092, 17617.0178, 17715.4554, 17816.758, 17920.1748, 18012.9236, 18119.7984, 18223.2248, 18324.2482, 18426.6276, 18525.0932, 18629.8976, 18733.2588, 18831.0466, 18940.1366, 19032.2696, 19131.729, 19243.4864, 19349.6932, 19442.866, 19547.9448, 19653.2798, 19754.4034, 19854.0692, 19965.1224, 20065.1774, 20158.2212, 20253.353, 20366.3264, 20463.22, },
+ // precision 13
+ { 5908.5052, 6007.2672, 6107.347, 6208.5794, 6311.2622, 6414.5514, 6519.3376, 6625.6952, 6732.5988, 6841.3552, 6950.5972, 7061.3082, 7173.5646, 7287.109, 7401.8216, 7516.4344, 7633.3802, 7751.2962, 7870.3784, 7990.292, 8110.79, 8233.4574, 8356.6036, 8482.2712, 8607.7708, 8735.099, 8863.1858, 8993.4746, 9123.8496, 9255.6794, 9388.5448, 9522.7516, 9657.3106, 9792.6094, 9930.5642, 10068.794, 10206.7256, 10347.81, 10490.3196, 10632.0778, 10775.9916, 10920.4662, 11066.124, 11213.073, 11358.0362, 11508.1006, 11659.1716, 11808.7514, 11959.4884, 12112.1314, 12265.037, 12420.3756, 12578.933, 12734.311, 12890.0006, 13047.2144, 13207.3096, 13368.5144, 13528.024, 13689.847, 13852.7528, 14018.3168, 14180.5372, 14346.9668, 14513.5074, 14677.867, 14846.2186, 15017.4186, 15184.9716, 15356.339, 15529.2972, 15697.3578, 15871.8686, 16042.187, 16216.4094, 16389.4188, 16565.9126, 16742.3272, 16919.0042, 17094.7592, 17273.965, 17451.8342, 17634.4254, 17810.5984, 17988.9242, 18171.051, 18354.7938, 18539.466, 18721.0408, 18904.9972, 19081.867, 19271.9118, 19451.8694, 19637.9816, 19821.2922, 20013.1292, 20199.3858, 20387.8726, 20572.9514, 20770.7764, 20955.1714, 21144.751, 21329.9952, 21520.709, 21712.7016, 21906.3868, 22096.2626, 22286.0524, 22475.051, 22665.5098, 22862.8492, 23055.5294, 23249.6138, 23437.848, 23636.273, 23826.093, 24020.3296, 24213.3896, 24411.7392, 24602.9614, 24805.7952, 24998.1552, 25193.9588, 25389.0166, 25585.8392, 25780.6976, 25981.2728, 26175.977, 26376.5252, 26570.1964, 26773.387, 26962.9812, 27163.0586, 27368.164, 27565.0534, 27758.7428, 27961.1276, 28163.2324, 28362.3816, 28565.7668, 28758.644, 28956.9768, 29163.4722, 29354.7026, 29561.1186, 29767.9948, 29959.9986, 30164.0492, 30366.9818, 30562.5338, 30762.9928, 30976.1592, 31166.274, 31376.722, 31570.3734, 31770.809, 31974.8934, 32179.5286, 32387.5442, 32582.3504, 32794.076, 32989.9528, 33191.842, 33392.4684, 33595.659, 33801.8672, 34000.3414, 34200.0922, 34402.6792, 34610.0638, 34804.0084, 35011.13, 35218.669, 35418.6634, 35619.0792, 35830.6534, 36028.4966, 36229.7902, 36438.6422, 36630.7764, 36833.3102, 37048.6728, 37247.3916, 37453.5904, 37669.3614, 37854.5526, 38059.305, 38268.0936, 38470.2516, 38674.7064, 38876.167, 39068.3794, 39281.9144, 39492.8566, 39684.8628, 39898.4108, 40093.1836, 40297.6858, 40489.7086, 40717.2424, },
+ // precision 14
+ { 11817.475, 12015.0046, 12215.3792, 12417.7504, 12623.1814, 12830.0086, 13040.0072, 13252.503, 13466.178, 13683.2738, 13902.0344, 14123.9798, 14347.394, 14573.7784, 14802.6894, 15033.6824, 15266.9134, 15502.8624, 15741.4944, 15980.7956, 16223.8916, 16468.6316, 16715.733, 16965.5726, 17217.204, 17470.666, 17727.8516, 17986.7886, 18247.6902, 18510.9632, 18775.304, 19044.7486, 19314.4408, 19587.202, 19862.2576, 20135.924, 20417.0324, 20697.9788, 20979.6112, 21265.0274, 21550.723, 21841.6906, 22132.162, 22428.1406, 22722.127, 23020.5606, 23319.7394, 23620.4014, 23925.2728, 24226.9224, 24535.581, 24845.505, 25155.9618, 25470.3828, 25785.9702, 26103.7764, 26420.4132, 26742.0186, 27062.8852, 27388.415, 27714.6024, 28042.296, 28365.4494, 28701.1526, 29031.8008, 29364.2156, 29704.497, 30037.1458, 30380.111, 30723.8168, 31059.5114, 31404.9498, 31751.6752, 32095.2686, 32444.7792, 32794.767, 33145.204, 33498.4226, 33847.6502, 34209.006, 34560.849, 34919.4838, 35274.9778, 35635.1322, 35996.3266, 36359.1394, 36722.8266, 37082.8516, 37447.7354, 37815.9606, 38191.0692, 38559.4106, 38924.8112, 39294.6726, 39663.973, 40042.261, 40416.2036, 40779.2036, 41161.6436, 41540.9014, 41921.1998, 42294.7698, 42678.5264, 43061.3464, 43432.375, 43818.432, 44198.6598, 44583.0138, 44970.4794, 45353.924, 45729.858, 46118.2224, 46511.5724, 46900.7386, 47280.6964, 47668.1472, 48055.6796, 48446.9436, 48838.7146, 49217.7296, 49613.7796, 50010.7508, 50410.0208, 50793.7886, 51190.2456, 51583.1882, 51971.0796, 52376.5338, 52763.319, 53165.5534, 53556.5594, 53948.2702, 54346.352, 54748.7914, 55138.577, 55543.4824, 55941.1748, 56333.7746, 56745.1552, 57142.7944, 57545.2236, 57935.9956, 58348.5268, 58737.5474, 59158.5962, 59542.6896, 59958.8004, 60349.3788, 60755.0212, 61147.6144, 61548.194, 61946.0696, 62348.6042, 62763.603, 63162.781, 63560.635, 63974.3482, 64366.4908, 64771.5876, 65176.7346, 65597.3916, 65995.915, 66394.0384, 66822.9396, 67203.6336, 67612.2032, 68019.0078, 68420.0388, 68821.22, 69235.8388, 69640.0724, 70055.155, 70466.357, 70863.4266, 71276.2482, 71677.0306, 72080.2006, 72493.0214, 72893.5952, 73314.5856, 73714.9852, 74125.3022, 74521.2122, 74933.6814, 75341.5904, 75743.0244, 76166.0278, 76572.1322, 76973.1028, 77381.6284, 77800.6092, 78189.328, 78607.0962, 79012.2508, 79407.8358, 79825.725, 80238.701, 80646.891, 81035.6436, 81460.0448, 81876.3884, },
+ // precision 15
+ { 23635.0036, 24030.8034, 24431.4744, 24837.1524, 25246.7928, 25661.326, 26081.3532, 26505.2806, 26933.9892, 27367.7098, 27805.318, 28248.799, 28696.4382, 29148.8244, 29605.5138, 30066.8668, 30534.2344, 31006.32, 31480.778, 31962.2418, 32447.3324, 32938.0232, 33432.731, 33930.728, 34433.9896, 34944.1402, 35457.5588, 35974.5958, 36497.3296, 37021.9096, 37554.326, 38088.0826, 38628.8816, 39171.3192, 39723.2326, 40274.5554, 40832.3142, 41390.613, 41959.5908, 42532.5466, 43102.0344, 43683.5072, 44266.694, 44851.2822, 45440.7862, 46038.0586, 46640.3164, 47241.064, 47846.155, 48454.7396, 49076.9168, 49692.542, 50317.4778, 50939.65, 51572.5596, 52210.2906, 52843.7396, 53481.3996, 54127.236, 54770.406, 55422.6598, 56078.7958, 56736.7174, 57397.6784, 58064.5784, 58730.308, 59404.9784, 60077.0864, 60751.9158, 61444.1386, 62115.817, 62808.7742, 63501.4774, 64187.5454, 64883.6622, 65582.7468, 66274.5318, 66976.9276, 67688.7764, 68402.138, 69109.6274, 69822.9706, 70543.6108, 71265.5202, 71983.3848, 72708.4656, 73433.384, 74158.4664, 74896.4868, 75620.9564, 76362.1434, 77098.3204, 77835.7662, 78582.6114, 79323.9902, 80067.8658, 80814.9246, 81567.0136, 82310.8536, 83061.9952, 83821.4096, 84580.8608, 85335.547, 86092.5802, 86851.6506, 87612.311, 88381.2016, 89146.3296, 89907.8974, 90676.846, 91451.4152, 92224.5518, 92995.8686, 93763.5066, 94551.2796, 95315.1944, 96096.1806, 96881.0918, 97665.679, 98442.68, 99229.3002, 100011.0994, 100790.6386, 101580.1564, 102377.7484, 103152.1392, 103944.2712, 104730.216, 105528.6336, 106324.9398, 107117.6706, 107890.3988, 108695.2266, 109485.238, 110294.7876, 111075.0958, 111878.0496, 112695.2864, 113464.5486, 114270.0474, 115068.608, 115884.3626, 116673.2588, 117483.3716, 118275.097, 119085.4092, 119879.2808, 120687.5868, 121499.9944, 122284.916, 123095.9254, 123912.5038, 124709.0454, 125503.7182, 126323.259, 127138.9412, 127943.8294, 128755.646, 129556.5354, 130375.3298, 131161.4734, 131971.1962, 132787.5458, 133588.1056, 134431.351, 135220.2906, 136023.398, 136846.6558, 137667.0004, 138463.663, 139283.7154, 140074.6146, 140901.3072, 141721.8548, 142543.2322, 143356.1096, 144173.7412, 144973.0948, 145794.3162, 146609.5714, 147420.003, 148237.9784, 149050.5696, 149854.761, 150663.1966, 151494.0754, 152313.1416, 153112.6902, 153935.7206, 154746.9262, 155559.547, 156401.9746, 157228.7036, 158008.7254, 158820.75, 159646.9184, 160470.4458, 161279.5348, 162093.3114, 162918.542, 163729.2842, },
+ // precision 16
+ { 47271, 48062.3584, 48862.7074, 49673.152, 50492.8416, 51322.9514, 52161.03, 53009.407, 53867.6348, 54734.206, 55610.5144, 56496.2096, 57390.795, 58297.268, 59210.6448, 60134.665, 61068.0248, 62010.4472, 62962.5204, 63923.5742, 64895.0194, 65876.4182, 66862.6136, 67862.6968, 68868.8908, 69882.8544, 70911.271, 71944.0924, 72990.0326, 74040.692, 75100.6336, 76174.7826, 77252.5998, 78340.2974, 79438.2572, 80545.4976, 81657.2796, 82784.6336, 83915.515, 85059.7362, 86205.9368, 87364.4424, 88530.3358, 89707.3744, 90885.9638, 92080.197, 93275.5738, 94479.391, 95695.918, 96919.2236, 98148.4602, 99382.3474, 100625.6974, 101878.0284, 103141.6278, 104409.4588, 105686.2882, 106967.5402, 108261.6032, 109548.1578, 110852.0728, 112162.231, 113479.0072, 114806.2626, 116137.9072, 117469.5048, 118813.5186, 120165.4876, 121516.2556, 122875.766, 124250.5444, 125621.2222, 127003.2352, 128387.848, 129775.2644, 131181.7776, 132577.3086, 133979.9458, 135394.1132, 136800.9078, 138233.217, 139668.5308, 141085.212, 142535.2122, 143969.0684, 145420.2872, 146878.1542, 148332.7572, 149800.3202, 151269.66, 152743.6104, 154213.0948, 155690.288, 157169.4246, 158672.1756, 160160.059, 161650.6854, 163145.7772, 164645.6726, 166159.1952, 167682.1578, 169177.3328, 170700.0118, 172228.8964, 173732.6664, 175265.5556, 176787.799, 178317.111, 179856.6914, 181400.865, 182943.4612, 184486.742, 186033.4698, 187583.7886, 189148.1868, 190688.4526, 192250.1926, 193810.9042, 195354.2972, 196938.7682, 198493.5898, 200079.2824, 201618.912, 203205.5492, 204765.5798, 206356.1124, 207929.3064, 209498.7196, 211086.229, 212675.1324, 214256.7892, 215826.2392, 217412.8474, 218995.6724, 220618.6038, 222207.1166, 223781.0364, 225387.4332, 227005.7928, 228590.4336, 230217.8738, 231805.1054, 233408.9, 234995.3432, 236601.4956, 238190.7904, 239817.2548, 241411.2832, 243002.4066, 244640.1884, 246255.3128, 247849.3508, 249479.9734, 251106.8822, 252705.027, 254332.9242, 255935.129, 257526.9014, 259154.772, 260777.625, 262390.253, 264004.4906, 265643.59, 267255.4076, 268873.426, 270470.7252, 272106.4804, 273722.4456, 275337.794, 276945.7038, 278592.9154, 280204.3726, 281841.1606, 283489.171, 285130.1716, 286735.3362, 288364.7164, 289961.1814, 291595.5524, 293285.683, 294899.6668, 296499.3434, 298128.0462, 299761.8946, 301394.2424, 302997.6748, 304615.1478, 306269.7724, 307886.114, 309543.1028, 311153.2862, 312782.8546, 314421.2008, 316033.2438, 317692.9636, 319305.2648, 320948.7406, 322566.3364, 324228.4224, 325847.1542, },
+ // precision 17
+ { 94542, 96125.811, 97728.019, 99348.558, 100987.9705, 102646.7565, 104324.5125, 106021.7435, 107736.7865, 109469.272, 111223.9465, 112995.219, 114787.432, 116593.152, 118422.71, 120267.2345, 122134.6765, 124020.937, 125927.2705, 127851.255, 129788.9485, 131751.016, 133726.8225, 135722.592, 137736.789, 139770.568, 141821.518, 143891.343, 145982.1415, 148095.387, 150207.526, 152355.649, 154515.6415, 156696.05, 158887.7575, 161098.159, 163329.852, 165569.053, 167837.4005, 170121.6165, 172420.4595, 174732.6265, 177062.77, 179412.502, 181774.035, 184151.939, 186551.6895, 188965.691, 191402.8095, 193857.949, 196305.0775, 198774.6715, 201271.2585, 203764.78, 206299.3695, 208818.1365, 211373.115, 213946.7465, 216532.076, 219105.541, 221714.5375, 224337.5135, 226977.5125, 229613.0655, 232270.2685, 234952.2065, 237645.3555, 240331.1925, 243034.517, 245756.0725, 248517.6865, 251232.737, 254011.3955, 256785.995, 259556.44, 262368.335, 265156.911, 267965.266, 270785.583, 273616.0495, 276487.4835, 279346.639, 282202.509, 285074.3885, 287942.2855, 290856.018, 293774.0345, 296678.5145, 299603.6355, 302552.6575, 305492.9785, 308466.8605, 311392.581, 314347.538, 317319.4295, 320285.9785, 323301.7325, 326298.3235, 329301.3105, 332301.987, 335309.791, 338370.762, 341382.923, 344431.1265, 347464.1545, 350507.28, 353619.2345, 356631.2005, 359685.203, 362776.7845, 365886.488, 368958.2255, 372060.6825, 375165.4335, 378237.935, 381328.311, 384430.5225, 387576.425, 390683.242, 393839.648, 396977.8425, 400101.9805, 403271.296, 406409.8425, 409529.5485, 412678.7, 415847.423, 419020.8035, 422157.081, 425337.749, 428479.6165, 431700.902, 434893.1915, 438049.582, 441210.5415, 444379.2545, 447577.356, 450741.931, 453959.548, 457137.0935, 460329.846, 463537.4815, 466732.3345, 469960.5615, 473164.681, 476347.6345, 479496.173, 482813.1645, 486025.6995, 489249.4885, 492460.1945, 495675.8805, 498908.0075, 502131.802, 505374.3855, 508550.9915, 511806.7305, 515026.776, 518217.0005, 521523.9855, 524705.9855, 527950.997, 531210.0265, 534472.497, 537750.7315, 540926.922, 544207.094, 547429.4345, 550666.3745, 553975.3475, 557150.7185, 560399.6165, 563662.697, 566916.7395, 570146.1215, 573447.425, 576689.6245, 579874.5745, 583202.337, 586503.0255, 589715.635, 592910.161, 596214.3885, 599488.035, 602740.92, 605983.0685, 609248.67, 612491.3605, 615787.912, 619107.5245, 622307.9555, 625577.333, 628840.4385, 632085.2155, 635317.6135, 638691.7195, 641887.467, 645139.9405, 648441.546, 651666.252, 654941.845, },
+ // precision 18
+ { 189084, 192250.913, 195456.774, 198696.946, 201977.762, 205294.444, 208651.754, 212042.099, 215472.269, 218941.91, 222443.912, 225996.845, 229568.199, 233193.568, 236844.457, 240543.233, 244279.475, 248044.27, 251854.588, 255693.2, 259583.619, 263494.621, 267445.385, 271454.061, 275468.769, 279549.456, 283646.446, 287788.198, 291966.099, 296181.164, 300431.469, 304718.618, 309024.004, 313393.508, 317760.803, 322209.731, 326675.061, 331160.627, 335654.47, 340241.442, 344841.833, 349467.132, 354130.629, 358819.432, 363574.626, 368296.587, 373118.482, 377914.93, 382782.301, 387680.669, 392601.981, 397544.323, 402529.115, 407546.018, 412593.658, 417638.657, 422762.865, 427886.169, 433017.167, 438213.273, 443441.254, 448692.421, 453937.533, 459239.049, 464529.569, 469910.083, 475274.03, 480684.473, 486070.26, 491515.237, 496995.651, 502476.617, 507973.609, 513497.19, 519083.233, 524726.509, 530305.505, 535945.728, 541584.404, 547274.055, 552967.236, 558667.862, 564360.216, 570128.148, 575965.08, 581701.952, 587532.523, 593361.144, 599246.128, 605033.418, 610958.779, 616837.117, 622772.818, 628672.04, 634675.369, 640574.831, 646585.739, 652574.547, 658611.217, 664642.684, 670713.914, 676737.681, 682797.313, 688837.897, 694917.874, 701009.882, 707173.648, 713257.254, 719415.392, 725636.761, 731710.697, 737906.209, 744103.074, 750313.39, 756504.185, 762712.579, 768876.985, 775167.859, 781359, 787615.959, 793863.597, 800245.477, 806464.582, 812785.294, 819005.925, 825403.057, 831676.197, 837936.284, 844266.968, 850642.711, 856959.756, 863322.774, 869699.931, 876102.478, 882355.787, 888694.463, 895159.952, 901536.143, 907872.631, 914293.672, 920615.14, 927130.974, 933409.404, 939922.178, 946331.47, 952745.93, 959209.264, 965590.224, 972077.284, 978501.961, 984953.19, 991413.271, 997817.479, 1004222.658, 1010725.676, 1017177.138, 1023612.529, 1030098.236, 1036493.719, 1043112.207, 1049537.036, 1056008.096, 1062476.184, 1068942.337, 1075524.95, 1081932.864, 1088426.025, 1094776.005, 1101327.448, 1107901.673, 1114423.639, 1120884.602, 1127324.923, 1133794.24, 1140328.886, 1146849.376, 1153346.682, 1159836.502, 1166478.703, 1172953.304, 1179391.502, 1185950.982, 1192544.052, 1198913.41, 1205430.994, 1212015.525, 1218674.042, 1225121.683, 1231551.101, 1238126.379, 1244673.795, 1251260.649, 1257697.86, 1264320.983, 1270736.319, 1277274.694, 1283804.95, 1290211.514, 1296858.568, 1303455.691, }
+ };
+
+ private static final double[][] biasDataAllPrecisions = {
+ // precision 4
+ { 10, 9.717, 9.207, 8.7896, 8.2882, 7.8204, 7.3772, 6.9342, 6.5202, 6.161, 5.7722, 5.4636, 5.0396, 4.6766, 4.3566, 4.0454, 3.7936, 3.4856, 3.2666, 2.9946, 2.766, 2.4692, 2.3638, 2.0764, 1.7864, 1.7602, 1.4814, 1.433, 1.2926, 1.0664, 0.999600000000001, 0.7956, 0.5366, 0.589399999999998, 0.573799999999999, 0.269799999999996, 0.368200000000002, 0.0544000000000011, 0.234200000000001, 0.0108000000000033, -0.203400000000002, -0.0701999999999998, -0.129600000000003, -0.364199999999997, -0.480600000000003, -0.226999999999997, -0.322800000000001, -0.382599999999996, -0.511200000000002, -0.669600000000003, -0.749400000000001, -0.500399999999999, -0.617600000000003, -0.6922, -0.601599999999998, -0.416200000000003, -0.338200000000001, -0.782600000000002, -0.648600000000002, -0.919800000000002, -0.851799999999997, -0.962400000000002, -0.6402, -1.1922, -1.0256, -1.086, -1.21899999999999, -0.819400000000002, -0.940600000000003, -1.1554, -1.2072, -1.1752, -1.16759999999999, -1.14019999999999, -1.3754, -1.29859999999999, -1.607, -1.3292, -1.7606, },
+ // precision 5
+ { 22, 21.1194, 20.8208, 20.2318, 19.77, 19.2436, 18.7774, 18.2848, 17.8224, 17.3742, 16.9336, 16.503, 16.0494, 15.6292, 15.2124, 14.798, 14.367, 13.9728, 13.5944, 13.217, 12.8438, 12.3696, 12.0956, 11.7044, 11.324, 11.0668, 10.6698, 10.3644, 10.049, 9.6918, 9.4146, 9.082, 8.687, 8.5398, 8.2462, 7.857, 7.6606, 7.4168, 7.1248, 6.9222, 6.6804, 6.447, 6.3454, 5.9594, 5.7636, 5.5776, 5.331, 5.19, 4.9676, 4.7564, 4.5314, 4.4442, 4.3708, 3.9774, 3.9624, 3.8796, 3.755, 3.472, 3.2076, 3.1024, 2.8908, 2.7338, 2.7728, 2.629, 2.413, 2.3266, 2.1524, 2.2642, 2.1806, 2.0566, 1.9192, 1.7598, 1.3516, 1.5802, 1.43859999999999, 1.49160000000001, 1.1524, 1.1892, 0.841399999999993, 0.879800000000003, 0.837599999999995, 0.469800000000006, 0.765600000000006, 0.331000000000003, 0.591399999999993, 0.601200000000006, 0.701599999999999, 0.558199999999999, 0.339399999999998, 0.354399999999998, 0.491200000000006, 0.308000000000007, 0.355199999999996, -0.0254000000000048, 0.205200000000005, -0.272999999999996, 0.132199999999997, 0.394400000000005, -0.241200000000006, 0.242000000000004, 0.191400000000002, 0.253799999999998, -0.122399999999999, -0.370800000000003, 0.193200000000004, -0.0848000000000013, 0.0867999999999967, -0.327200000000005, -0.285600000000002, 0.311400000000006, -0.128399999999999, -0.754999999999995, -0.209199999999996, -0.293599999999998, -0.364000000000004, -0.253600000000006, -0.821200000000005, -0.253600000000006, -0.510400000000004, -0.383399999999995, -0.491799999999998, -0.220200000000006, -0.0972000000000008, -0.557400000000001, -0.114599999999996, -0.295000000000002, -0.534800000000004, 0.346399999999988, -0.65379999999999, 0.0398000000000138, 0.0341999999999985, -0.995800000000003, -0.523400000000009, -0.489000000000004, -0.274799999999999, -0.574999999999989, -0.482799999999997, 0.0571999999999946, -0.330600000000004, -0.628800000000012, -0.140199999999993, -0.540600000000012, -0.445999999999998, -0.599400000000003, -0.262599999999992, 0.163399999999996, -0.100599999999986, -0.39500000000001, -1.06960000000001, -0.836399999999998, -0.753199999999993, -0.412399999999991, -0.790400000000005, -0.29679999999999, -0.28540000000001, -0.193000000000012, -0.0772000000000048, -0.962799999999987, -0.414800000000014, },
+ // precision 6
+ { 45, 44.1902, 43.271, 42.8358, 41.8142, 41.2854, 40.317, 39.354, 38.8924, 37.9436, 37.4596, 36.5262, 35.6248, 35.1574, 34.2822, 33.837, 32.9636, 32.074, 31.7042, 30.7976, 30.4772, 29.6564, 28.7942, 28.5004, 27.686, 27.291, 26.5672, 25.8556, 25.4982, 24.8204, 24.4252, 23.7744, 23.0786, 22.8344, 22.0294, 21.8098, 21.0794, 20.5732, 20.1878, 19.5648, 19.2902, 18.6784, 18.3352, 17.8946, 17.3712, 17.0852, 16.499, 16.2686, 15.6844, 15.2234, 14.9732, 14.3356, 14.2286, 13.7262, 13.3284, 13.1048, 12.5962, 12.3562, 12.1272, 11.4184, 11.4974, 11.0822, 10.856, 10.48, 10.2834, 10.0208, 9.637, 9.51739999999999, 9.05759999999999, 8.74760000000001, 8.42700000000001, 8.1326, 8.2372, 8.2788, 7.6776, 7.79259999999999, 7.1952, 6.9564, 6.6454, 6.87, 6.5428, 6.19999999999999, 6.02940000000001, 5.62780000000001, 5.6782, 5.792, 5.35159999999999, 5.28319999999999, 5.0394, 5.07480000000001, 4.49119999999999, 4.84899999999999, 4.696, 4.54040000000001, 4.07300000000001, 4.37139999999999, 3.7216, 3.7328, 3.42080000000001, 3.41839999999999, 3.94239999999999, 3.27719999999999, 3.411, 3.13079999999999, 2.76900000000001, 2.92580000000001, 2.68279999999999, 2.75020000000001, 2.70599999999999, 2.3886, 3.01859999999999, 2.45179999999999, 2.92699999999999, 2.41720000000001, 2.41139999999999, 2.03299999999999, 2.51240000000001, 2.5564, 2.60079999999999, 2.41720000000001, 1.80439999999999, 1.99700000000001, 2.45480000000001, 1.8948, 2.2346, 2.30860000000001, 2.15479999999999, 1.88419999999999, 1.6508, 0.677199999999999, 1.72540000000001, 1.4752, 1.72280000000001, 1.66139999999999, 1.16759999999999, 1.79300000000001, 1.00059999999999, 0.905200000000008, 0.659999999999997, 1.55879999999999, 1.1636, 0.688199999999995, 0.712600000000009, 0.450199999999995, 1.1978, 0.975599999999986, 0.165400000000005, 1.727, 1.19739999999999, -0.252600000000001, 1.13460000000001, 1.3048, 1.19479999999999, 0.313400000000001, 0.878999999999991, 1.12039999999999, 0.853000000000009, 1.67920000000001, 0.856999999999999, 0.448599999999999, 1.2362, 0.953399999999988, 1.02859999999998, 0.563199999999995, 0.663000000000011, 0.723000000000013, 0.756599999999992, 0.256599999999992, -0.837600000000009, 0.620000000000005, 0.821599999999989, 0.216600000000028, 0.205600000000004, 0.220199999999977, 0.372599999999977, 0.334400000000016, 0.928400000000011, 0.972800000000007, 0.192400000000021, 0.487199999999973, -0.413000000000011, 0.807000000000016, 0.120600000000024, 0.769000000000005, 0.870799999999974, 0.66500000000002, 0.118200000000002, 0.401200000000017, 0.635199999999998, 0.135400000000004, 0.175599999999974, 1.16059999999999, 0.34620000000001, 0.521400000000028, -0.586599999999976, -1.16480000000001, 0.968399999999974, 0.836999999999989, 0.779600000000016, 0.985799999999983, },
+ // precision 7
+ { 91, 89.4934, 87.9758, 86.4574, 84.9718, 83.4954, 81.5302, 80.0756, 78.6374, 77.1782, 75.7888, 73.9522, 72.592, 71.2532, 69.9086, 68.5938, 66.9474, 65.6796, 64.4394, 63.2176, 61.9768, 60.4214, 59.2528, 58.0102, 56.8658, 55.7278, 54.3044, 53.1316, 52.093, 51.0032, 49.9092, 48.6306, 47.5294, 46.5756, 45.6508, 44.662, 43.552, 42.3724, 41.617, 40.5754, 39.7872, 38.8444, 37.7988, 36.8606, 36.2118, 35.3566, 34.4476, 33.5882, 32.6816, 32.0824, 31.0258, 30.6048, 29.4436, 28.7274, 27.957, 27.147, 26.4364, 25.7592, 25.3386, 24.781, 23.8028, 23.656, 22.6544, 21.996, 21.4718, 21.1544, 20.6098, 19.5956, 19.0616, 18.5758, 18.4878, 17.5244, 17.2146, 16.724, 15.8722, 15.5198, 15.0414, 14.941, 14.9048, 13.87, 13.4304, 13.028, 12.4708, 12.37, 12.0624, 11.4668, 11.5532, 11.4352, 11.2564, 10.2744, 10.2118, 9.74720000000002, 10.1456, 9.2928, 8.75040000000001, 8.55279999999999, 8.97899999999998, 8.21019999999999, 8.18340000000001, 7.3494, 7.32499999999999, 7.66140000000001, 6.90300000000002, 7.25439999999998, 6.9042, 7.21499999999997, 6.28640000000001, 6.08139999999997, 6.6764, 6.30099999999999, 5.13900000000001, 5.65800000000002, 5.17320000000001, 4.59019999999998, 4.9538, 5.08280000000002, 4.92200000000003, 4.99020000000002, 4.7328, 5.4538, 4.11360000000002, 4.22340000000003, 4.08780000000002, 3.70800000000003, 4.15559999999999, 4.18520000000001, 3.63720000000001, 3.68220000000002, 3.77960000000002, 3.6078, 2.49160000000001, 3.13099999999997, 2.5376, 3.19880000000001, 3.21100000000001, 2.4502, 3.52820000000003, 2.91199999999998, 3.04480000000001, 2.7432, 2.85239999999999, 2.79880000000003, 2.78579999999999, 1.88679999999999, 2.98860000000002, 2.50639999999999, 1.91239999999999, 2.66160000000002, 2.46820000000002, 1.58199999999999, 1.30399999999997, 2.27379999999999, 2.68939999999998, 1.32900000000001, 3.10599999999999, 1.69080000000002, 2.13740000000001, 2.53219999999999, 1.88479999999998, 1.33240000000001, 1.45119999999997, 1.17899999999997, 2.44119999999998, 1.60659999999996, 2.16700000000003, 0.77940000000001, 2.37900000000002, 2.06700000000001, 1.46000000000004, 2.91160000000002, 1.69200000000001, 0.954600000000028, 2.49300000000005, 2.2722, 1.33500000000004, 2.44899999999996, 1.20140000000004, 3.07380000000001, 2.09739999999999, 2.85640000000001, 2.29960000000005, 2.40899999999999, 1.97040000000004, 0.809799999999996, 1.65279999999996, 2.59979999999996, 0.95799999999997, 2.06799999999998, 2.32780000000002, 4.20159999999998, 1.96320000000003, 1.86400000000003, 1.42999999999995, 3.77940000000001, 1.27200000000005, 1.86440000000005, 2.20600000000002, 3.21900000000005, 1.5154, 2.61019999999996, },
+ // precision 8
+ { 183.2152, 180.2454, 177.2096, 173.6652, 170.6312, 167.6822, 164.249, 161.3296, 158.0038, 155.2074, 152.4612, 149.27, 146.5178, 143.4412, 140.8032, 138.1634, 135.1688, 132.6074, 129.6946, 127.2664, 124.8228, 122.0432, 119.6824, 116.9464, 114.6268, 112.2626, 109.8376, 107.4034, 104.8956, 102.8522, 100.7638, 98.3552, 96.3556, 93.7526, 91.9292, 89.8954, 87.8198, 85.7668, 83.298, 81.6688, 79.9466, 77.9746, 76.1672, 74.3474, 72.3028, 70.8912, 69.114, 67.4646, 65.9744, 64.4092, 62.6022, 60.843, 59.5684, 58.1652, 56.5426, 55.4152, 53.5388, 52.3592, 51.1366, 49.486, 48.3918, 46.5076, 45.509, 44.3834, 43.3498, 42.0668, 40.7346, 40.1228, 38.4528, 37.7, 36.644, 36.0518, 34.5774, 33.9068, 32.432, 32.1666, 30.434, 29.6644, 28.4894, 27.6312, 26.3804, 26.292, 25.5496000000001, 25.0234, 24.8206, 22.6146, 22.4188, 22.117, 20.6762, 20.6576, 19.7864, 19.509, 18.5334, 17.9204, 17.772, 16.2924, 16.8654, 15.1836, 15.745, 15.1316, 15.0386, 14.0136, 13.6342, 12.6196, 12.1866, 12.4281999999999, 11.3324, 10.4794000000001, 11.5038, 10.129, 9.52800000000002, 10.3203999999999, 9.46299999999997, 9.79280000000006, 9.12300000000005, 8.74180000000001, 9.2192, 7.51020000000005, 7.60659999999996, 7.01840000000004, 7.22239999999999, 7.40139999999997, 6.76179999999999, 7.14359999999999, 5.65060000000005, 5.63779999999997, 5.76599999999996, 6.75139999999999, 5.57759999999996, 3.73220000000003, 5.8048, 5.63019999999995, 4.93359999999996, 3.47979999999995, 4.33879999999999, 3.98940000000005, 3.81960000000004, 3.31359999999995, 3.23080000000004, 3.4588, 3.08159999999998, 3.4076, 3.00639999999999, 2.38779999999997, 2.61900000000003, 1.99800000000005, 3.34820000000002, 2.95060000000001, 0.990999999999985, 2.11440000000005, 2.20299999999997, 2.82219999999995, 2.73239999999998, 2.7826, 3.76660000000004, 2.26480000000004, 2.31280000000004, 2.40819999999997, 2.75360000000001, 3.33759999999995, 2.71559999999999, 1.7478000000001, 1.42920000000004, 2.39300000000003, 2.22779999999989, 2.34339999999997, 0.87259999999992, 3.88400000000001, 1.80600000000004, 1.91759999999999, 1.16779999999994, 1.50320000000011, 2.52500000000009, 0.226400000000012, 2.31500000000005, 0.930000000000064, 1.25199999999995, 2.14959999999996, 0.0407999999999902, 2.5447999999999, 1.32960000000003, 0.197400000000016, 2.52620000000002, 3.33279999999991, -1.34300000000007, 0.422199999999975, 0.917200000000093, 1.12920000000008, 1.46060000000011, 1.45779999999991, 2.8728000000001, 3.33359999999993, -1.34079999999994, 1.57680000000005, 0.363000000000056, 1.40740000000005, 0.656600000000026, 0.801400000000058, -0.454600000000028, 1.51919999999996, },
+ // precision 9
+ { 368, 361.8294, 355.2452, 348.6698, 342.1464, 336.2024, 329.8782, 323.6598, 317.462, 311.2826, 305.7102, 299.7416, 293.9366, 288.1046, 282.285, 277.0668, 271.306, 265.8448, 260.301, 254.9886, 250.2422, 244.8138, 239.7074, 234.7428, 229.8402, 225.1664, 220.3534, 215.594, 210.6886, 205.7876, 201.65, 197.228, 192.8036, 188.1666, 184.0818, 180.0824, 176.2574, 172.302, 168.1644, 164.0056, 160.3802, 156.7192, 152.5234, 149.2084, 145.831, 142.485, 139.1112, 135.4764, 131.76, 129.3368, 126.5538, 122.5058, 119.2646, 116.5902, 113.3818, 110.8998, 107.9532, 105.2062, 102.2798, 99.4728, 96.9582, 94.3292, 92.171, 89.7809999999999, 87.5716, 84.7048, 82.5322, 79.875, 78.3972, 75.3464, 73.7274, 71.2834, 70.1444, 68.4263999999999, 66.0166, 64.018, 62.0437999999999, 60.3399999999999, 58.6856, 57.9836, 55.0311999999999, 54.6769999999999, 52.3188, 51.4846, 49.4423999999999, 47.739, 46.1487999999999, 44.9202, 43.4059999999999, 42.5342000000001, 41.2834, 38.8954000000001, 38.3286000000001, 36.2146, 36.6684, 35.9946, 33.123, 33.4338, 31.7378000000001, 29.076, 28.9692, 27.4964, 27.0998, 25.9864, 26.7754, 24.3208, 23.4838, 22.7388000000001, 24.0758000000001, 21.9097999999999, 20.9728, 19.9228000000001, 19.9292, 16.617, 17.05, 18.2996000000001, 15.6128000000001, 15.7392, 14.5174, 13.6322, 12.2583999999999, 13.3766000000001, 11.423, 13.1232, 9.51639999999998, 10.5938000000001, 9.59719999999993, 8.12220000000002, 9.76739999999995, 7.50440000000003, 7.56999999999994, 6.70440000000008, 6.41419999999994, 6.71019999999999, 5.60940000000005, 4.65219999999999, 6.84099999999989, 3.4072000000001, 3.97859999999991, 3.32760000000007, 5.52160000000003, 3.31860000000006, 2.06940000000009, 4.35400000000004, 1.57500000000005, 0.280799999999999, 2.12879999999996, -0.214799999999968, -0.0378000000000611, -0.658200000000079, 0.654800000000023, -0.0697999999999865, 0.858400000000074, -2.52700000000004, -2.1751999999999, -3.35539999999992, -1.04019999999991, -0.651000000000067, -2.14439999999991, -1.96659999999997, -3.97939999999994, -0.604400000000169, -3.08260000000018, -3.39159999999993, -5.29640000000018, -5.38920000000007, -5.08759999999984, -4.69900000000007, -5.23720000000003, -3.15779999999995, -4.97879999999986, -4.89899999999989, -7.48880000000008, -5.94799999999987, -5.68060000000014, -6.67180000000008, -4.70499999999993, -7.27779999999984, -4.6579999999999, -4.4362000000001, -4.32139999999981, -5.18859999999995, -6.66879999999992, -6.48399999999992, -5.1260000000002, -4.4032000000002, -6.13500000000022, -5.80819999999994, -4.16719999999987, -4.15039999999999, -7.45600000000013, -7.24080000000004, -9.83179999999993, -5.80420000000004, -8.6561999999999, -6.99940000000015, -10.5473999999999, -7.34139999999979, -6.80999999999995, -6.29719999999998, -6.23199999999997, },
+ // precision 10
+ { 737.1256, 724.4234, 711.1064, 698.4732, 685.4636, 673.0644, 660.488, 647.9654, 636.0832, 623.7864, 612.1992, 600.2176, 588.5228, 577.1716, 565.7752, 554.899, 543.6126, 532.6492, 521.9474, 511.5214, 501.1064, 490.6364, 480.2468, 470.4588, 460.3832, 451.0584, 440.8606, 431.3868, 422.5062, 413.1862, 404.463, 395.339, 386.1936, 378.1292, 369.1854, 361.2908, 353.3324, 344.8518, 337.5204, 329.4854, 321.9318, 314.552, 306.4658, 299.4256, 292.849, 286.152, 278.8956, 271.8792, 265.118, 258.62, 252.5132, 245.9322, 239.7726, 233.6086, 227.5332, 222.5918, 216.4294, 210.7662, 205.4106, 199.7338, 194.9012, 188.4486, 183.1556, 178.6338, 173.7312, 169.6264, 163.9526, 159.8742, 155.8326, 151.1966, 147.5594, 143.07, 140.037, 134.1804, 131.071, 127.4884, 124.0848, 120.2944, 117.333, 112.9626, 110.2902, 107.0814, 103.0334, 99.4832000000001, 96.3899999999999, 93.7202000000002, 90.1714000000002, 87.2357999999999, 85.9346, 82.8910000000001, 80.0264000000002, 78.3834000000002, 75.1543999999999, 73.8683999999998, 70.9895999999999, 69.4367999999999, 64.8701999999998, 65.0408000000002, 61.6738, 59.5207999999998, 57.0158000000001, 54.2302, 53.0962, 50.4985999999999, 52.2588000000001, 47.3914, 45.6244000000002, 42.8377999999998, 43.0072, 40.6516000000001, 40.2453999999998, 35.2136, 36.4546, 33.7849999999999, 33.2294000000002, 32.4679999999998, 30.8670000000002, 28.6507999999999, 28.9099999999999, 27.5983999999999, 26.1619999999998, 24.5563999999999, 23.2328000000002, 21.9484000000002, 21.5902000000001, 21.3346000000001, 17.7031999999999, 20.6111999999998, 19.5545999999999, 15.7375999999999, 17.0720000000001, 16.9517999999998, 15.326, 13.1817999999998, 14.6925999999999, 13.0859999999998, 13.2754, 10.8697999999999, 11.248, 7.3768, 4.72339999999986, 7.97899999999981, 8.7503999999999, 7.68119999999999, 9.7199999999998, 7.73919999999998, 5.6224000000002, 7.44560000000001, 6.6601999999998, 5.9058, 4.00199999999995, 4.51699999999983, 4.68240000000014, 3.86220000000003, 5.13639999999987, 5.98500000000013, 2.47719999999981, 2.61999999999989, 1.62800000000016, 4.65000000000009, 0.225599999999758, 0.831000000000131, -0.359400000000278, 1.27599999999984, -2.92559999999958, -0.0303999999996449, 2.37079999999969, -2.0033999999996, 0.804600000000391, 0.30199999999968, 1.1247999999996, -2.6880000000001, 0.0321999999996478, -1.18099999999959, -3.9402, -1.47940000000017, -0.188400000000001, -2.10720000000038, -2.04159999999956, -3.12880000000041, -4.16160000000036, -0.612799999999879, -3.48719999999958, -8.17900000000009, -5.37780000000021, -4.01379999999972, -5.58259999999973, -5.73719999999958, -7.66799999999967, -5.69520000000011, -1.1247999999996, -5.58520000000044, -8.04560000000038, -4.64840000000004, -11.6468000000004, -7.97519999999986, -5.78300000000036, -7.67420000000038, -10.6328000000003, -9.81720000000041, },
+ // precision 11
+ { 1476, 1449.6014, 1423.5802, 1397.7942, 1372.3042, 1347.2062, 1321.8402, 1297.2292, 1272.9462, 1248.9926, 1225.3026, 1201.4252, 1178.0578, 1155.6092, 1132.626, 1110.5568, 1088.527, 1066.5154, 1045.1874, 1024.3878, 1003.37, 982.1972, 962.5728, 942.1012, 922.9668, 903.292, 884.0772, 864.8578, 846.6562, 828.041, 809.714, 792.3112, 775.1806, 757.9854, 740.656, 724.346, 707.5154, 691.8378, 675.7448, 659.6722, 645.5722, 630.1462, 614.4124, 600.8728, 585.898, 572.408, 558.4926, 544.4938, 531.6776, 517.282, 505.7704, 493.1012, 480.7388, 467.6876, 456.1872, 445.5048, 433.0214, 420.806, 411.409, 400.4144, 389.4294, 379.2286, 369.651, 360.6156, 350.337, 342.083, 332.1538, 322.5094, 315.01, 305.6686, 298.1678, 287.8116, 280.9978, 271.9204, 265.3286, 257.5706, 249.6014, 242.544, 235.5976, 229.583, 220.9438, 214.672, 208.2786, 201.8628, 195.1834, 191.505, 186.1816, 178.5188, 172.2294, 167.8908, 161.0194, 158.052, 151.4588, 148.1596, 143.4344, 138.5238, 133.13, 127.6374, 124.8162, 118.7894, 117.3984, 114.6078, 109.0858, 105.1036, 103.6258, 98.6018000000004, 95.7618000000002, 93.5821999999998, 88.5900000000001, 86.9992000000002, 82.8800000000001, 80.4539999999997, 74.6981999999998, 74.3644000000004, 73.2914000000001, 65.5709999999999, 66.9232000000002, 65.1913999999997, 62.5882000000001, 61.5702000000001, 55.7035999999998, 56.1764000000003, 52.7596000000003, 53.0302000000001, 49.0609999999997, 48.4694, 44.933, 46.0474000000004, 44.7165999999997, 41.9416000000001, 39.9207999999999, 35.6328000000003, 35.5276000000003, 33.1934000000001, 33.2371999999996, 33.3864000000003, 33.9228000000003, 30.2371999999996, 29.1373999999996, 25.2272000000003, 24.2942000000003, 19.8338000000003, 18.9005999999999, 23.0907999999999, 21.8544000000002, 19.5176000000001, 15.4147999999996, 16.9314000000004, 18.6737999999996, 12.9877999999999, 14.3688000000002, 12.0447999999997, 15.5219999999999, 12.5299999999997, 14.5940000000001, 14.3131999999996, 9.45499999999993, 12.9441999999999, 3.91139999999996, 13.1373999999996, 5.44720000000052, 9.82779999999912, 7.87279999999919, 3.67760000000089, 5.46980000000076, 5.55099999999948, 5.65979999999945, 3.89439999999922, 3.1275999999998, 5.65140000000065, 6.3062000000009, 3.90799999999945, 1.87060000000019, 5.17020000000048, 2.46680000000015, 0.770000000000437, -3.72340000000077, 1.16400000000067, 8.05340000000069, 0.135399999999208, 2.15940000000046, 0.766999999999825, 1.0594000000001, 3.15500000000065, -0.287399999999252, 2.37219999999979, -2.86620000000039, -1.63199999999961, -2.22979999999916, -0.15519999999924, -1.46039999999994, -0.262199999999211, -2.34460000000036, -2.8078000000005, -3.22179999999935, -5.60159999999996, -8.42200000000048, -9.43740000000071, 0.161799999999857, -10.4755999999998, -10.0823999999993, },
+ // precision 12
+ { 2953, 2900.4782, 2848.3568, 2796.3666, 2745.324, 2694.9598, 2644.648, 2595.539, 2546.1474, 2498.2576, 2450.8376, 2403.6076, 2357.451, 2311.38, 2266.4104, 2221.5638, 2176.9676, 2134.193, 2090.838, 2048.8548, 2007.018, 1966.1742, 1925.4482, 1885.1294, 1846.4776, 1807.4044, 1768.8724, 1731.3732, 1693.4304, 1657.5326, 1621.949, 1586.5532, 1551.7256, 1517.6182, 1483.5186, 1450.4528, 1417.865, 1385.7164, 1352.6828, 1322.6708, 1291.8312, 1260.9036, 1231.476, 1201.8652, 1173.6718, 1145.757, 1119.2072, 1092.2828, 1065.0434, 1038.6264, 1014.3192, 988.5746, 965.0816, 940.1176, 917.9796, 894.5576, 871.1858, 849.9144, 827.1142, 805.0818, 783.9664, 763.9096, 742.0816, 724.3962, 706.3454, 688.018, 667.4214, 650.3106, 633.0686, 613.8094, 597.818, 581.4248, 563.834, 547.363, 531.5066, 520.455400000001, 505.583199999999, 488.366, 476.480799999999, 459.7682, 450.0522, 434.328799999999, 423.952799999999, 408.727000000001, 399.079400000001, 387.252200000001, 373.987999999999, 360.852000000001, 351.6394, 339.642, 330.902400000001, 322.661599999999, 311.662200000001, 301.3254, 291.7484, 279.939200000001, 276.7508, 263.215200000001, 254.811400000001, 245.5494, 242.306399999999, 234.8734, 223.787200000001, 217.7156, 212.0196, 200.793, 195.9748, 189.0702, 182.449199999999, 177.2772, 170.2336, 164.741, 158.613600000001, 155.311, 147.5964, 142.837, 137.3724, 132.0162, 130.0424, 121.9804, 120.451800000001, 114.8968, 111.585999999999, 105.933199999999, 101.705, 98.5141999999996, 95.0488000000005, 89.7880000000005, 91.4750000000004, 83.7764000000006, 80.9698000000008, 72.8574000000008, 73.1615999999995, 67.5838000000003, 62.6263999999992, 63.2638000000006, 66.0977999999996, 52.0843999999997, 58.9956000000002, 47.0912000000008, 46.4956000000002, 48.4383999999991, 47.1082000000006, 43.2392, 37.2759999999998, 40.0283999999992, 35.1864000000005, 35.8595999999998, 32.0998, 28.027, 23.6694000000007, 33.8266000000003, 26.3736000000008, 27.2008000000005, 21.3245999999999, 26.4115999999995, 23.4521999999997, 19.5013999999992, 19.8513999999996, 10.7492000000002, 18.6424000000006, 13.1265999999996, 18.2436000000016, 6.71860000000015, 3.39459999999963, 6.33759999999893, 7.76719999999841, 0.813999999998487, 3.82819999999992, 0.826199999999517, 8.07440000000133, -1.59080000000176, 5.01780000000144, 0.455399999998917, -0.24199999999837, 0.174800000000687, -9.07640000000174, -4.20160000000033, -3.77520000000004, -4.75179999999818, -5.3724000000002, -8.90680000000066, -6.10239999999976, -5.74120000000039, -9.95339999999851, -3.86339999999836, -13.7304000000004, -16.2710000000006, -7.51359999999841, -3.30679999999847, -13.1339999999982, -10.0551999999989, -6.72019999999975, -8.59660000000076, -10.9307999999983, -1.8775999999998, -4.82259999999951, -13.7788, -21.6470000000008, -10.6735999999983, -15.7799999999988, },
+ // precision 13
+ { 5907.5052, 5802.2672, 5697.347, 5593.5794, 5491.2622, 5390.5514, 5290.3376, 5191.6952, 5093.5988, 4997.3552, 4902.5972, 4808.3082, 4715.5646, 4624.109, 4533.8216, 4444.4344, 4356.3802, 4269.2962, 4183.3784, 4098.292, 4014.79, 3932.4574, 3850.6036, 3771.2712, 3691.7708, 3615.099, 3538.1858, 3463.4746, 3388.8496, 3315.6794, 3244.5448, 3173.7516, 3103.3106, 3033.6094, 2966.5642, 2900.794, 2833.7256, 2769.81, 2707.3196, 2644.0778, 2583.9916, 2523.4662, 2464.124, 2406.073, 2347.0362, 2292.1006, 2238.1716, 2182.7514, 2128.4884, 2077.1314, 2025.037, 1975.3756, 1928.933, 1879.311, 1831.0006, 1783.2144, 1738.3096, 1694.5144, 1649.024, 1606.847, 1564.7528, 1525.3168, 1482.5372, 1443.9668, 1406.5074, 1365.867, 1329.2186, 1295.4186, 1257.9716, 1225.339, 1193.2972, 1156.3578, 1125.8686, 1091.187, 1061.4094, 1029.4188, 1000.9126, 972.3272, 944.004199999999, 915.7592, 889.965, 862.834200000001, 840.4254, 812.598399999999, 785.924200000001, 763.050999999999, 741.793799999999, 721.466, 699.040799999999, 677.997200000002, 649.866999999998, 634.911800000002, 609.8694, 591.981599999999, 570.2922, 557.129199999999, 538.3858, 521.872599999999, 502.951400000002, 495.776399999999, 475.171399999999, 459.751, 439.995200000001, 426.708999999999, 413.7016, 402.3868, 387.262599999998, 372.0524, 357.050999999999, 342.5098, 334.849200000001, 322.529399999999, 311.613799999999, 295.848000000002, 289.273000000001, 274.093000000001, 263.329600000001, 251.389599999999, 245.7392, 231.9614, 229.7952, 217.155200000001, 208.9588, 199.016599999999, 190.839199999999, 180.6976, 176.272799999999, 166.976999999999, 162.5252, 151.196400000001, 149.386999999999, 133.981199999998, 130.0586, 130.164000000001, 122.053400000001, 110.7428, 108.1276, 106.232400000001, 100.381600000001, 98.7668000000012, 86.6440000000002, 79.9768000000004, 82.4722000000002, 68.7026000000005, 70.1186000000016, 71.9948000000004, 58.998599999999, 59.0492000000013, 56.9818000000014, 47.5338000000011, 42.9928, 51.1591999999982, 37.2740000000013, 42.7220000000016, 31.3734000000004, 26.8090000000011, 25.8934000000008, 26.5286000000015, 29.5442000000003, 19.3503999999994, 26.0760000000009, 17.9527999999991, 14.8419999999969, 10.4683999999979, 8.65899999999965, 9.86720000000059, 4.34139999999752, -0.907800000000861, -3.32080000000133, -0.936199999996461, -11.9916000000012, -8.87000000000262, -6.33099999999831, -11.3366000000024, -15.9207999999999, -9.34659999999712, -15.5034000000014, -19.2097999999969, -15.357799999998, -28.2235999999975, -30.6898000000001, -19.3271999999997, -25.6083999999973, -24.409599999999, -13.6385999999984, -33.4473999999973, -32.6949999999997, -28.9063999999998, -31.7483999999968, -32.2935999999972, -35.8329999999987, -47.620600000002, -39.0855999999985, -33.1434000000008, -46.1371999999974, -37.5892000000022, -46.8164000000033, -47.3142000000007, -60.2914000000019, -37.7575999999972, },
+ // precision 14
+ { 11816.475, 11605.0046, 11395.3792, 11188.7504, 10984.1814, 10782.0086, 10582.0072, 10384.503, 10189.178, 9996.2738, 9806.0344, 9617.9798, 9431.394, 9248.7784, 9067.6894, 8889.6824, 8712.9134, 8538.8624, 8368.4944, 8197.7956, 8031.8916, 7866.6316, 7703.733, 7544.5726, 7386.204, 7230.666, 7077.8516, 6926.7886, 6778.6902, 6631.9632, 6487.304, 6346.7486, 6206.4408, 6070.202, 5935.2576, 5799.924, 5671.0324, 5541.9788, 5414.6112, 5290.0274, 5166.723, 5047.6906, 4929.162, 4815.1406, 4699.127, 4588.5606, 4477.7394, 4369.4014, 4264.2728, 4155.9224, 4055.581, 3955.505, 3856.9618, 3761.3828, 3666.9702, 3575.7764, 3482.4132, 3395.0186, 3305.8852, 3221.415, 3138.6024, 3056.296, 2970.4494, 2896.1526, 2816.8008, 2740.2156, 2670.497, 2594.1458, 2527.111, 2460.8168, 2387.5114, 2322.9498, 2260.6752, 2194.2686, 2133.7792, 2074.767, 2015.204, 1959.4226, 1898.6502, 1850.006, 1792.849, 1741.4838, 1687.9778, 1638.1322, 1589.3266, 1543.1394, 1496.8266, 1447.8516, 1402.7354, 1361.9606, 1327.0692, 1285.4106, 1241.8112, 1201.6726, 1161.973, 1130.261, 1094.2036, 1048.2036, 1020.6436, 990.901400000002, 961.199800000002, 924.769800000002, 899.526400000002, 872.346400000002, 834.375, 810.432000000001, 780.659800000001, 756.013800000001, 733.479399999997, 707.923999999999, 673.858, 652.222399999999, 636.572399999997, 615.738599999997, 586.696400000001, 564.147199999999, 541.679600000003, 523.943599999999, 505.714599999999, 475.729599999999, 461.779600000002, 449.750800000002, 439.020799999998, 412.7886, 400.245600000002, 383.188199999997, 362.079599999997, 357.533799999997, 334.319000000003, 327.553399999997, 308.559399999998, 291.270199999999, 279.351999999999, 271.791400000002, 252.576999999997, 247.482400000001, 236.174800000001, 218.774599999997, 220.155200000001, 208.794399999999, 201.223599999998, 182.995600000002, 185.5268, 164.547400000003, 176.5962, 150.689599999998, 157.8004, 138.378799999999, 134.021200000003, 117.614399999999, 108.194000000003, 97.0696000000025, 89.6042000000016, 95.6030000000028, 84.7810000000027, 72.635000000002, 77.3482000000004, 59.4907999999996, 55.5875999999989, 50.7346000000034, 61.3916000000027, 50.9149999999936, 39.0384000000049, 58.9395999999979, 29.633600000001, 28.2032000000036, 26.0078000000067, 17.0387999999948, 9.22000000000116, 13.8387999999977, 8.07240000000456, 14.1549999999988, 15.3570000000036, 3.42660000000615, 6.24820000000182, -2.96940000000177, -8.79940000000352, -5.97860000000219, -14.4048000000039, -3.4143999999942, -13.0148000000045, -11.6977999999945, -25.7878000000055, -22.3185999999987, -24.409599999999, -31.9756000000052, -18.9722000000038, -22.8678000000073, -30.8972000000067, -32.3715999999986, -22.3907999999938, -43.6720000000059, -35.9038, -39.7492000000057, -54.1641999999993, -45.2749999999942, -42.2989999999991, -44.1089999999967, -64.3564000000042, -49.9551999999967, -42.6116000000038, },
+ // precision 15
+ { 23634.0036, 23210.8034, 22792.4744, 22379.1524, 21969.7928, 21565.326, 21165.3532, 20770.2806, 20379.9892, 19994.7098, 19613.318, 19236.799, 18865.4382, 18498.8244, 18136.5138, 17778.8668, 17426.2344, 17079.32, 16734.778, 16397.2418, 16063.3324, 15734.0232, 15409.731, 15088.728, 14772.9896, 14464.1402, 14157.5588, 13855.5958, 13559.3296, 13264.9096, 12978.326, 12692.0826, 12413.8816, 12137.3192, 11870.2326, 11602.5554, 11340.3142, 11079.613, 10829.5908, 10583.5466, 10334.0344, 10095.5072, 9859.694, 9625.2822, 9395.7862, 9174.0586, 8957.3164, 8738.064, 8524.155, 8313.7396, 8116.9168, 7913.542, 7718.4778, 7521.65, 7335.5596, 7154.2906, 6968.7396, 6786.3996, 6613.236, 6437.406, 6270.6598, 6107.7958, 5945.7174, 5787.6784, 5635.5784, 5482.308, 5337.9784, 5190.0864, 5045.9158, 4919.1386, 4771.817, 4645.7742, 4518.4774, 4385.5454, 4262.6622, 4142.74679999999, 4015.5318, 3897.9276, 3790.7764, 3685.13800000001, 3573.6274, 3467.9706, 3368.61079999999, 3271.5202, 3170.3848, 3076.4656, 2982.38400000001, 2888.4664, 2806.4868, 2711.9564, 2634.1434, 2551.3204, 2469.7662, 2396.61139999999, 2318.9902, 2243.8658, 2171.9246, 2105.01360000001, 2028.8536, 1960.9952, 1901.4096, 1841.86079999999, 1777.54700000001, 1714.5802, 1654.65059999999, 1596.311, 1546.2016, 1492.3296, 1433.8974, 1383.84600000001, 1339.4152, 1293.5518, 1245.8686, 1193.50659999999, 1162.27959999999, 1107.19439999999, 1069.18060000001, 1035.09179999999, 999.679000000004, 957.679999999993, 925.300199999998, 888.099400000006, 848.638600000006, 818.156400000007, 796.748399999997, 752.139200000005, 725.271200000003, 692.216, 671.633600000001, 647.939799999993, 621.670599999998, 575.398799999995, 561.226599999995, 532.237999999998, 521.787599999996, 483.095799999996, 467.049599999998, 465.286399999997, 415.548599999995, 401.047399999996, 380.607999999993, 377.362599999993, 347.258799999996, 338.371599999999, 310.096999999994, 301.409199999995, 276.280799999993, 265.586800000005, 258.994399999996, 223.915999999997, 215.925399999993, 213.503800000006, 191.045400000003, 166.718200000003, 166.259000000005, 162.941200000001, 148.829400000002, 141.645999999993, 123.535399999993, 122.329800000007, 89.473399999988, 80.1962000000058, 77.5457999999926, 59.1056000000099, 83.3509999999951, 52.2906000000075, 36.3979999999865, 40.6558000000077, 42.0003999999899, 19.6630000000005, 19.7153999999864, -8.38539999999921, -0.692799999989802, 0.854800000000978, 3.23219999999856, -3.89040000000386, -5.25880000001052, -24.9052000000083, -22.6837999999989, -26.4286000000138, -34.997000000003, -37.0216000000073, -43.430400000012, -58.2390000000014, -68.8034000000043, -56.9245999999985, -57.8583999999973, -77.3097999999882, -73.2793999999994, -81.0738000000129, -87.4530000000086, -65.0254000000132, -57.296399999992, -96.2746000000043, -103.25, -96.081600000005, -91.5542000000132, -102.465200000006, -107.688599999994, -101.458000000013, -109.715800000005, },
+ // precision 16
+ { 47270, 46423.3584, 45585.7074, 44757.152, 43938.8416, 43130.9514, 42330.03, 41540.407, 40759.6348, 39988.206, 39226.5144, 38473.2096, 37729.795, 36997.268, 36272.6448, 35558.665, 34853.0248, 34157.4472, 33470.5204, 32793.5742, 32127.0194, 31469.4182, 30817.6136, 30178.6968, 29546.8908, 28922.8544, 28312.271, 27707.0924, 27114.0326, 26526.692, 25948.6336, 25383.7826, 24823.5998, 24272.2974, 23732.2572, 23201.4976, 22674.2796, 22163.6336, 21656.515, 21161.7362, 20669.9368, 20189.4424, 19717.3358, 19256.3744, 18795.9638, 18352.197, 17908.5738, 17474.391, 17052.918, 16637.2236, 16228.4602, 15823.3474, 15428.6974, 15043.0284, 14667.6278, 14297.4588, 13935.2882, 13578.5402, 13234.6032, 12882.1578, 12548.0728, 12219.231, 11898.0072, 11587.2626, 11279.9072, 10973.5048, 10678.5186, 10392.4876, 10105.2556, 9825.766, 9562.5444, 9294.2222, 9038.2352, 8784.848, 8533.2644, 8301.7776, 8058.30859999999, 7822.94579999999, 7599.11319999999, 7366.90779999999, 7161.217, 6957.53080000001, 6736.212, 6548.21220000001, 6343.06839999999, 6156.28719999999, 5975.15419999999, 5791.75719999999, 5621.32019999999, 5451.66, 5287.61040000001, 5118.09479999999, 4957.288, 4798.4246, 4662.17559999999, 4512.05900000001, 4364.68539999999, 4220.77720000001, 4082.67259999999, 3957.19519999999, 3842.15779999999, 3699.3328, 3583.01180000001, 3473.8964, 3338.66639999999, 3233.55559999999, 3117.799, 3008.111, 2909.69140000001, 2814.86499999999, 2719.46119999999, 2624.742, 2532.46979999999, 2444.7886, 2370.1868, 2272.45259999999, 2196.19260000001, 2117.90419999999, 2023.2972, 1969.76819999999, 1885.58979999999, 1833.2824, 1733.91200000001, 1682.54920000001, 1604.57980000001, 1556.11240000001, 1491.3064, 1421.71960000001, 1371.22899999999, 1322.1324, 1264.7892, 1196.23920000001, 1143.8474, 1088.67240000001, 1073.60380000001, 1023.11660000001, 959.036400000012, 927.433199999999, 906.792799999996, 853.433599999989, 841.873800000001, 791.1054, 756.899999999994, 704.343200000003, 672.495599999995, 622.790399999998, 611.254799999995, 567.283200000005, 519.406599999988, 519.188400000014, 495.312800000014, 451.350799999986, 443.973399999988, 431.882199999993, 392.027000000002, 380.924200000009, 345.128999999986, 298.901400000002, 287.771999999997, 272.625, 247.253000000026, 222.490600000019, 223.590000000026, 196.407599999977, 176.425999999978, 134.725199999986, 132.4804, 110.445599999977, 86.7939999999944, 56.7038000000175, 64.915399999998, 38.3726000000024, 37.1606000000029, 46.170999999973, 49.1716000000015, 15.3362000000197, 6.71639999997569, -34.8185999999987, -39.4476000000141, 12.6830000000191, -12.3331999999937, -50.6565999999875, -59.9538000000175, -65.1054000000004, -70.7576000000117, -106.325200000021, -126.852200000023, -110.227599999984, -132.885999999999, -113.897200000007, -142.713800000027, -151.145399999979, -150.799200000009, -177.756200000003, -156.036399999983, -182.735199999996, -177.259399999981, -198.663600000029, -174.577600000019, -193.84580000001, },
+ // precision 17
+ { 94541, 92848.811, 91174.019, 89517.558, 87879.9705, 86262.7565, 84663.5125, 83083.7435, 81521.7865, 79977.272, 78455.9465, 76950.219, 75465.432, 73994.152, 72546.71, 71115.2345, 69705.6765, 68314.937, 66944.2705, 65591.255, 64252.9485, 62938.016, 61636.8225, 60355.592, 59092.789, 57850.568, 56624.518, 55417.343, 54231.1415, 53067.387, 51903.526, 50774.649, 49657.6415, 48561.05, 47475.7575, 46410.159, 45364.852, 44327.053, 43318.4005, 42325.6165, 41348.4595, 40383.6265, 39436.77, 38509.502, 37594.035, 36695.939, 35818.6895, 34955.691, 34115.8095, 33293.949, 32465.0775, 31657.6715, 30877.2585, 30093.78, 29351.3695, 28594.1365, 27872.115, 27168.7465, 26477.076, 25774.541, 25106.5375, 24452.5135, 23815.5125, 23174.0655, 22555.2685, 21960.2065, 21376.3555, 20785.1925, 20211.517, 19657.0725, 19141.6865, 18579.737, 18081.3955, 17578.995, 17073.44, 16608.335, 16119.911, 15651.266, 15194.583, 14749.0495, 14343.4835, 13925.639, 13504.509, 13099.3885, 12691.2855, 12328.018, 11969.0345, 11596.5145, 11245.6355, 10917.6575, 10580.9785, 10277.8605, 9926.58100000001, 9605.538, 9300.42950000003, 8989.97850000003, 8728.73249999998, 8448.3235, 8175.31050000002, 7898.98700000002, 7629.79100000003, 7413.76199999999, 7149.92300000001, 6921.12650000001, 6677.1545, 6443.28000000003, 6278.23450000002, 6014.20049999998, 5791.20299999998, 5605.78450000001, 5438.48800000001, 5234.2255, 5059.6825, 4887.43349999998, 4682.935, 4496.31099999999, 4322.52250000002, 4191.42499999999, 4021.24200000003, 3900.64799999999, 3762.84250000003, 3609.98050000001, 3502.29599999997, 3363.84250000003, 3206.54849999998, 3079.70000000001, 2971.42300000001, 2867.80349999998, 2727.08100000001, 2630.74900000001, 2496.6165, 2440.902, 2356.19150000002, 2235.58199999999, 2120.54149999999, 2012.25449999998, 1933.35600000003, 1820.93099999998, 1761.54800000001, 1663.09350000002, 1578.84600000002, 1509.48149999999, 1427.3345, 1379.56150000001, 1306.68099999998, 1212.63449999999, 1084.17300000001, 1124.16450000001, 1060.69949999999, 1007.48849999998, 941.194499999983, 879.880500000028, 836.007500000007, 782.802000000025, 748.385499999975, 647.991500000004, 626.730500000005, 570.776000000013, 484.000500000024, 513.98550000001, 418.985499999952, 386.996999999974, 370.026500000036, 355.496999999974, 356.731499999994, 255.92200000002, 259.094000000041, 205.434499999974, 165.374500000034, 197.347500000033, 95.718499999959, 67.6165000000037, 54.6970000000438, 31.7395000000251, -15.8784999999916, 8.42500000004657, -26.3754999999655, -118.425500000012, -66.6629999999423, -42.9745000000112, -107.364999999991, -189.839000000036, -162.611499999999, -164.964999999967, -189.079999999958, -223.931499999948, -235.329999999958, -269.639500000048, -249.087999999989, -206.475499999942, -283.04449999996, -290.667000000016, -304.561499999953, -336.784499999951, -380.386500000022, -283.280499999993, -364.533000000054, -389.059499999974, -364.454000000027, -415.748000000021, -417.155000000028, },
+ // precision 18
+ { 189083, 185696.913, 182348.774, 179035.946, 175762.762, 172526.444, 169329.754, 166166.099, 163043.269, 159958.91, 156907.912, 153906.845, 150924.199, 147996.568, 145093.457, 142239.233, 139421.475, 136632.27, 133889.588, 131174.2, 128511.619, 125868.621, 123265.385, 120721.061, 118181.769, 115709.456, 113252.446, 110840.198, 108465.099, 106126.164, 103823.469, 101556.618, 99308.004, 97124.508, 94937.803, 92833.731, 90745.061, 88677.627, 86617.47, 84650.442, 82697.833, 80769.132, 78879.629, 77014.432, 75215.626, 73384.587, 71652.482, 69895.93, 68209.301, 66553.669, 64921.981, 63310.323, 61742.115, 60205.018, 58698.658, 57190.657, 55760.865, 54331.169, 52908.167, 51550.273, 50225.254, 48922.421, 47614.533, 46362.049, 45098.569, 43926.083, 42736.03, 41593.473, 40425.26, 39316.237, 38243.651, 37170.617, 36114.609, 35084.19, 34117.233, 33206.509, 32231.505, 31318.728, 30403.404, 29540.0550000001, 28679.236, 27825.862, 26965.216, 26179.148, 25462.08, 24645.952, 23922.523, 23198.144, 22529.128, 21762.4179999999, 21134.779, 20459.117, 19840.818, 19187.04, 18636.3689999999, 17982.831, 17439.7389999999, 16874.547, 16358.2169999999, 15835.684, 15352.914, 14823.681, 14329.313, 13816.897, 13342.874, 12880.882, 12491.648, 12021.254, 11625.392, 11293.7610000001, 10813.697, 10456.209, 10099.074, 9755.39000000001, 9393.18500000006, 9047.57900000003, 8657.98499999999, 8395.85900000005, 8033, 7736.95900000003, 7430.59699999995, 7258.47699999996, 6924.58200000005, 6691.29399999999, 6357.92500000005, 6202.05700000003, 5921.19700000004, 5628.28399999999, 5404.96799999999, 5226.71100000001, 4990.75600000005, 4799.77399999998, 4622.93099999998, 4472.478, 4171.78700000001, 3957.46299999999, 3868.95200000005, 3691.14300000004, 3474.63100000005, 3341.67200000002, 3109.14000000001, 3071.97400000005, 2796.40399999998, 2756.17799999996, 2611.46999999997, 2471.93000000005, 2382.26399999997, 2209.22400000005, 2142.28399999999, 2013.96100000001, 1911.18999999994, 1818.27099999995, 1668.47900000005, 1519.65800000005, 1469.67599999998, 1367.13800000004, 1248.52899999998, 1181.23600000003, 1022.71900000004, 1088.20700000005, 959.03600000008, 876.095999999903, 791.183999999892, 703.337000000058, 731.949999999953, 586.86400000006, 526.024999999907, 323.004999999888, 320.448000000091, 340.672999999952, 309.638999999966, 216.601999999955, 102.922999999952, 19.2399999999907, -0.114000000059605, -32.6240000000689, -89.3179999999702, -153.497999999905, -64.2970000000205, -143.695999999996, -259.497999999905, -253.017999999924, -213.948000000091, -397.590000000084, -434.006000000052, -403.475000000093, -297.958000000101, -404.317000000039, -528.898999999976, -506.621000000043, -513.205000000075, -479.351000000024, -596.139999999898, -527.016999999993, -664.681000000099, -680.306000000099, -704.050000000047, -850.486000000034, -757.43200000003, -713.308999999892, }
+ };
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLog.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLog.java
new file mode 100644
index 00000000000..20beb642fde
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLog.java
@@ -0,0 +1,18 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation.hll;
+
+/**
+ * Contain constants relevant for HyperLogLog classes.
+ *
+ * @author bjorncs
+ */
+public interface HyperLogLog {
+ /**
+ * Default HLL precision.
+ */
+ int DEFAULT_PRECISION = 10;
+ /**
+ * Threshold to convert sparse sketch to normal sketch.
+ */
+ int SPARSE_SKETCH_CONVERSION_THRESHOLD = 256;
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimator.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimator.java
new file mode 100644
index 00000000000..7055686a4c0
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimator.java
@@ -0,0 +1,172 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation.hll;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Implementation of the result computation phase of the HyperLogLog algorithm.
+ * Based on the pseudo code from: http://www.dmtcs.org/dmtcs-ojs/index.php/proceedings/article/viewArticle/914
+ *
+ * @author bjorncs
+ */
+public class HyperLogLogEstimator implements UniqueCountEstimator<Sketch<?>> {
+
+ // Number of buckets in sketch.
+ private final int nBuckets;
+ // The bias estimator used to bias correct the raw estimate.
+ private final BiasEstimator biasEstimator;
+ // Linear counting threshold. Linear counting will only be used if raw estimate is equal or below this threshold.
+ private final int linearCountingThreshold;
+ // A bias correcting constant used in calculation of raw estimate.
+ private final double alphaCoefficient;
+
+ /**
+ * Creates the estimator for a given precision. The resulting memory consumption is the exponential to the precision.
+ *
+ * @param precision The precision parameter as defined in HLL algorithm.
+ */
+ public HyperLogLogEstimator(int precision) {
+ Preconditions.checkArgument(precision >= 4 && precision <= 18, "Invalid precision: %s.", precision);
+ this.nBuckets = 1 << precision;
+ this.biasEstimator = new BiasEstimator(precision);
+ this.linearCountingThreshold = getLinearCountingThreshold(precision);
+ this.alphaCoefficient = getAlphaCoefficient(nBuckets);
+ }
+
+
+ /**
+ * Creates the estimator with the default precision ({@link HyperLogLog#DEFAULT_PRECISION}.
+ */
+ public HyperLogLogEstimator() {
+ this(HyperLogLog.DEFAULT_PRECISION);
+ }
+
+ /**
+ * Estimates the number of unique elements.
+ *
+ * @param sketch A sketch populated with values from the aggregation phase of HLL.
+ * @return The estimated number of unique elements.
+ */
+ @Override
+ public long estimateCount(Sketch<?> sketch) {
+ if (sketch instanceof NormalSketch) {
+ return estimateCount((NormalSketch) sketch);
+ } else {
+ return estimateCount((SparseSketch) sketch);
+ }
+ }
+
+ // The sparse sketch contains a set of unique hash values. The size of this set is a good estimator as the
+ // probability for hash collision is very low.
+ private long estimateCount(SparseSketch sketch) {
+ return sketch.size();
+ }
+
+
+ // Performs the result calculation phase of HLL. Note that the {@link NormalSketch}
+ // precision must match the one supplied in the constructor.
+ private long estimateCount(NormalSketch sketch) {
+ Preconditions.checkArgument(sketch.size() == nBuckets,
+ "Sketch has invalid size. Expected %s, actual %s.", nBuckets, sketch.size());
+ double rawEstimate = calculateRawEstimate(sketch);
+ if (shouldPerformBiasCorrection(rawEstimate)) {
+ rawEstimate -= biasEstimator.estimateBias(rawEstimate);
+ }
+
+ // Use linear counting if sketch contains buckets with 0 value.
+ int nZeroBuckets = countZeroBuckets(sketch);
+ if (nZeroBuckets > 0) {
+ double linearCountingEstimate = calculateLinearCountingEstimate(nZeroBuckets);
+ if (linearCountingEstimate <= linearCountingThreshold) {
+ rawEstimate = linearCountingEstimate;
+ }
+ }
+
+ return Math.round(rawEstimate);
+ }
+
+ private double calculateLinearCountingEstimate(int nZeroBuckets) {
+ return nBuckets * Math.log(nBuckets / (double) nZeroBuckets);
+ }
+
+ private boolean shouldPerformBiasCorrection(double rawEstimate) {
+ return rawEstimate <= 5 * nBuckets;
+ }
+
+ private double calculateRawEstimate(NormalSketch sketch) {
+ double indicator = calculateIndicator(sketch);
+ return alphaCoefficient * nBuckets * nBuckets * indicator;
+ }
+
+ // Calculates the raw indicator, summing up the probabilities for each bucket.
+ // indicator == 1 / sum(2^(-S[i]) where i = 0 to n
+ private static double calculateIndicator(NormalSketch sketch) {
+ double sum = 0;
+ for (byte prefixLength : sketch.data()) {
+ sum += Math.pow(2, -prefixLength);
+ }
+ return 1 / sum;
+ }
+
+ private static int countZeroBuckets(NormalSketch sketch) {
+ int nZeroBuckets = 0;
+ for (byte prefixLength : sketch.data()) {
+ if (prefixLength == 0) {
+ ++nZeroBuckets;
+ }
+ }
+ return nZeroBuckets;
+ }
+
+ // Empirically determined values from Google HLL++ paper. Decides whether to use linear counting instead of raw HLL estimate.
+ private static int getLinearCountingThreshold(int precision) {
+ switch (precision) {
+ case 4:
+ return 10;
+ case 5:
+ return 20;
+ case 6:
+ return 40;
+ case 7:
+ return 80;
+ case 8:
+ return 220;
+ case 9:
+ return 400;
+ case 10:
+ return 900;
+ case 11:
+ return 1800;
+ case 12:
+ return 3100;
+ case 13:
+ return 6500;
+ case 14:
+ return 11500;
+ case 15:
+ return 22000;
+ case 16:
+ return 50000;
+ case 17:
+ return 120000;
+ case 18:
+ return 350000;
+ default:
+ // Unreachable code.
+ throw new RuntimeException();
+ }
+ }
+
+ private static double getAlphaCoefficient(int nBuckets) {
+ switch (nBuckets) {
+ case 16:
+ return 0.673;
+ case 32:
+ return 0.697;
+ case 64:
+ return 0.709;
+ default: /* nBuckets >= 128 */
+ return 0.7213 / (1 + 1.079 / nBuckets);
+ }
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/NormalSketch.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/NormalSketch.java
new file mode 100644
index 00000000000..c91f1e82a3b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/NormalSketch.java
@@ -0,0 +1,190 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation.hll;
+
+import com.google.common.base.Preconditions;
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Serializer;
+import net.jpountz.lz4.LZ4Compressor;
+import net.jpountz.lz4.LZ4Exception;
+import net.jpountz.lz4.LZ4Factory;
+import net.jpountz.lz4.LZ4FastDecompressor;
+
+import java.util.Arrays;
+
+/**
+ * Sketch used by the HyperLogLog (HLL) algorithm.
+ *
+ * @author bjorncs
+ */
+public class NormalSketch extends Sketch<NormalSketch> {
+
+ public static final int classId = registerClass(0x4000 + 170, NormalSketch.class);
+
+ private final byte[] data;
+ private final int bucketMask;
+
+ /**
+ * Create a sketch with the default precision given by {@link HyperLogLog#DEFAULT_PRECISION}.
+ * */
+ public NormalSketch() {
+ this(HyperLogLog.DEFAULT_PRECISION);
+ }
+
+ /**
+ * Create a sketch with a given HLL precision parameter.
+ *
+ * @param precision The precision parameter used by HLL. Determines the size of the sketch.
+ */
+ public NormalSketch(int precision) {
+ this.data = new byte[1 << precision];
+ this.bucketMask = (1 << precision) - 1; // A mask where the lowest `precision` bits are 1.
+ }
+
+ /**
+ * Lossless merge of sketches. Performs a pairwise maximum on the underlying data array.
+ *
+ * @param other Other sketch
+ */
+ @Override
+ public void merge(NormalSketch other) {
+ Preconditions.checkArgument(data.length == other.data.length,
+ "Trying to merge sketch with one of different size. Expected %s, actual %s", data.length, other.data.length);
+ for (int i = 0; i < data.length; i++) {
+ data[i] = (byte) Math.max(data[i], other.data[i]);
+ }
+ }
+
+ /**
+ * Aggregates the hash values.
+ *
+ * @param hashValues Provides an iterator for the hash values
+ */
+ @Override
+ public void aggregate(Iterable<Integer> hashValues) {
+ for (int hash : hashValues) {
+ aggregate(hash);
+ }
+ }
+
+ /**
+ * Aggregates the hash value.
+ *
+ * @param hash Hash value.
+ */
+ @Override
+ public void aggregate(int hash) {
+ int existingValue = data[hash & bucketMask];
+ int newValue = Integer.numberOfLeadingZeros(hash | bucketMask) + 1;
+ data[hash & bucketMask] = (byte) Math.max(newValue, existingValue);
+ }
+
+ /**
+ * Serializes the Sketch.
+ *
+ * Serialization format
+ * ==================
+ * Original size: 4 bytes
+ * Compressed size: 4 bytes
+ * Compressed data: N * 1 bytes
+ *
+ * Invariant:
+ * compressed size &lt;= original size
+ *
+ * Special case:
+ * compressed size == original size =&gt; data is uncompressed
+ *
+ * @param buf Serializer
+ */
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, data.length);
+ try {
+ LZ4Compressor c = LZ4Factory.safeInstance().highCompressor();
+ byte[] compressedData = new byte[data.length];
+ int compressedSize = c.compress(data, compressedData);
+ serializeDataArray(compressedData, compressedSize, buf);
+ } catch (LZ4Exception e) {
+ // LZ4Compressor.compress will throw this exception if it is unable to compress
+ // into compressedData (when compressed size >= original size)
+ serializeDataArray(data, data.length, buf);
+ }
+ }
+
+ private static void serializeDataArray(byte[] source, int length, Serializer buf) {
+ buf.putInt(null, length);
+ for (int i = 0; i < length; i++) {
+ buf.putByte(null, source[i]);
+ }
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ int length = buf.getInt(null);
+ int compressedLength = buf.getInt(null);
+ Preconditions.checkState(length == data.length,
+ "Size of serialized sketch does not match expected value. Expected %s, actual %s.", data.length, length);
+
+ if (length == compressedLength) {
+ deserializeDataArray(data, length, buf);
+ } else {
+ LZ4FastDecompressor c = LZ4Factory.safeInstance().fastDecompressor();
+ byte[] compressedData = buf.getBytes(null, compressedLength);
+ c.decompress(compressedData, data);
+ }
+ }
+
+ private static void deserializeDataArray(byte[] destination, int length, Deserializer buf) {
+ for (int i = 0; i < length; i++) {
+ destination[i] = buf.getByte(null);
+ }
+ }
+
+ /**
+ * Returns the underlying byte array backing the sketch.
+ *
+ * @return The underlying sketch data
+ */
+ public byte[] data() {
+ return data;
+ }
+
+ /**
+ * Sketch size.
+ *
+ * @return Number of buckets in the sketch.
+ */
+ public int size() {
+ return data.length;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ NormalSketch sketch = (NormalSketch) o;
+
+ if (!Arrays.equals(data, sketch.data)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return Arrays.hashCode(data);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ public String toString() {
+ return "NormalSketch{" +
+ "data=" + Arrays.toString(data) +
+ '}';
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/Sketch.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/Sketch.java
new file mode 100644
index 00000000000..523942f1e3e
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/Sketch.java
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation.hll;
+
+import com.yahoo.vespa.objects.Identifiable;
+
+/**
+ * Represents a sketch. All sketch types must provide a merge method.
+ *
+ * @param <T> The type of the sub-class.
+ */
+public abstract class Sketch<T extends Sketch<T>> extends Identifiable {
+ /**
+ * Merge content of other into 'this'.
+ *
+ * @param other Other sketch
+ */
+ public abstract void merge(T other);
+
+ /**
+ * Aggregates the hash values.
+ *
+ * @param hashValues Provides an iterator for the hash values
+ */
+ public abstract void aggregate(Iterable<Integer> hashValues);
+
+ /**
+ * Aggregates the hash value.
+ *
+ * @param hash Hash value.
+ */
+ public abstract void aggregate(int hash);
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SketchMerger.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SketchMerger.java
new file mode 100644
index 00000000000..9d9a67edafb
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SketchMerger.java
@@ -0,0 +1,60 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation.hll;
+
+/**
+ * This class is responsible for merging any combinations of two {@link Sketch} instances.
+ */
+public class SketchMerger {
+
+ /**
+ * Merges one of the two sketches into the other. The merge operation is performed in-place is possible.
+ *
+ * @param left Either a {@link NormalSketch} or {@link SparseSketch}.
+ * @param right Either a {@link NormalSketch} or {@link SparseSketch}.
+ * @return The merged sketch. Is either first parameter, the other parameter or a new instance.
+ */
+ public Sketch<?> merge(Sketch<?> left, Sketch<?> right) {
+ if (left instanceof NormalSketch && right instanceof NormalSketch) {
+ return mergeNormalWithNormal(asNormal(left), asNormal(right));
+ } else if (left instanceof NormalSketch && right instanceof SparseSketch) {
+ return mergeNormalWithSparse(asNormal(left), asSparse(right));
+ } else if (left instanceof SparseSketch && right instanceof NormalSketch) {
+ return mergeNormalWithSparse(asNormal(right), asSparse(left));
+ } else if (left instanceof SparseSketch && right instanceof SparseSketch) {
+ return mergeSparseWithSparse(asSparse(left), asSparse(right));
+ } else {
+ throw new IllegalArgumentException(
+ String.format("Invalid sketch types: left=%s, right=%s", right.getClass(), left.getClass()));
+ }
+ }
+
+ private Sketch<?> mergeSparseWithSparse(SparseSketch dest, SparseSketch other) {
+ dest.merge(other);
+ if (dest.size() > HyperLogLog.SPARSE_SKETCH_CONVERSION_THRESHOLD) {
+ NormalSketch newSketch = new NormalSketch();
+ newSketch.aggregate(dest.data());
+ return newSketch;
+ }
+ return dest;
+ }
+
+ private NormalSketch mergeNormalWithNormal(NormalSketch dest, NormalSketch other) {
+ dest.merge(other);
+ return dest;
+ }
+
+ private NormalSketch mergeNormalWithSparse(NormalSketch dest, SparseSketch other) {
+ NormalSketch newSketch = new NormalSketch();
+ newSketch.aggregate(other.data());
+ dest.merge(newSketch);
+ return dest;
+ }
+
+ private static NormalSketch asNormal(Sketch<?> sketch) {
+ return (NormalSketch) sketch;
+ }
+
+ private static SparseSketch asSparse(Sketch<?> sketch) {
+ return (SparseSketch) sketch;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SparseSketch.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SparseSketch.java
new file mode 100644
index 00000000000..fbfd08be6b0
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SparseSketch.java
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation.hll;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.util.HashSet;
+
+public class SparseSketch extends Sketch<SparseSketch> {
+
+ public static final int classId = registerClass(0x4000 + 171, SparseSketch.class);
+ private final HashSet<Integer> values = new HashSet<>();
+
+ @Override
+ public void merge(SparseSketch other) {
+ values.addAll(other.values);
+ }
+
+ /**
+ * Aggregates the hash values.
+ *
+ * @param hashValues Provides an iterator for the hash values
+ */
+ @Override
+ public void aggregate(Iterable<Integer> hashValues) {
+ for (int hash: hashValues) {
+ aggregate(hash);
+ }
+ }
+
+ /**
+ * Aggregates the hash value.
+ *
+ * @param hash Hash value.
+ */
+ @Override
+ public void aggregate(int hash) {
+ values.add(hash);
+ }
+
+ /**
+ * Serializes the Sketch.
+ *
+ * Serialization format
+ * ==================
+ * Number of elements: 4 bytes
+ * Elements: N * 4 bytes
+ * @param buf Serializer
+ */
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, values.size());
+ for (int value : values) {
+ buf.putInt(null, value);
+ }
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ values.clear();
+ int nElements = buf.getInt(null);
+ for (int i = 0; i < nElements; i++) {
+ values.add(buf.getInt(null));
+ }
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ public HashSet<Integer> data() {
+ return values;
+ }
+
+ public int size() {
+ return values.size();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ SparseSketch sketch = (SparseSketch) o;
+
+ if (!values.equals(sketch.values)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return values.hashCode();
+ }
+
+ @Override
+ public String toString() {
+ return "SparseSketch{" +
+ "values=" + values +
+ '}';
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/UniqueCountEstimator.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/UniqueCountEstimator.java
new file mode 100644
index 00000000000..b6edd72c40c
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/UniqueCountEstimator.java
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation.hll;
+
+/**
+ * A interface for unique count estimation algorithms. The goal of this interface is
+ * to aid unit testing of {@link HyperLogLogEstimator} users.
+ *
+ * @author bjorncs
+ */
+public interface UniqueCountEstimator<T> {
+ long estimateCount(T sketch);
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/package-info.java
new file mode 100644
index 00000000000..2a974a4a3da
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/package-info.java
@@ -0,0 +1,4 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/document/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/document/package-info.java
new file mode 100644
index 00000000000..adfc4da0b7e
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/document/package-info.java
@@ -0,0 +1,5 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+package com.yahoo.searchlib.document;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/AddFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/AddFunctionNode.java
new file mode 100644
index 00000000000..a56215a6991
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/AddFunctionNode.java
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to add all arguments.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class AddFunctionNode extends NumericFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 61, AddFunctionNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onArgument(final ResultNode arg, ResultNode result) {
+ ((NumericResultNode)result).add(arg);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/AggregationRefNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/AggregationRefNode.java
new file mode 100644
index 00000000000..d16058afde2
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/AggregationRefNode.java
@@ -0,0 +1,115 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.searchlib.aggregation.AggregationResult;
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This node holds the index of an ExpressionNode in an external array, and is used as a proxy in the back-end to allow
+ * aggregators to be used in expressions.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Ulf Lilleengen</a>
+ */
+public class AggregationRefNode extends ExpressionNode {
+
+ public static final int classId = registerClass(0x4000 + 142, AggregationRefNode.class);
+ private AggregationResult result = null;
+ private int index = - 1;
+
+ @SuppressWarnings("UnusedDeclaration")
+ public AggregationRefNode() {
+ // Used by deserializer.
+ }
+
+ public AggregationRefNode(int index) {
+ this.index = index;
+ }
+
+ public AggregationRefNode(AggregationResult result) {
+ this.result = result;
+ }
+
+ public AggregationResult getExpression() {
+ return result;
+ }
+
+ public AggregationRefNode setExpression(AggregationResult result) {
+ this.result = result;
+ return this;
+ }
+
+ public AggregationRefNode setIndex(int index) {
+ this.index = index;
+ return this;
+ }
+
+ public int getIndex() {
+ return index;
+ }
+
+ @Override
+ public boolean onExecute() {
+ return result.execute();
+ }
+
+ @Override
+ public void onPrepare() {
+ result.prepare();
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, index);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ index = buf.getInt(null);
+ result = null;
+ }
+
+ @Override
+ public AggregationRefNode clone() {
+ AggregationRefNode obj = (AggregationRefNode)super.clone();
+ obj.index = this.index;
+ obj.result = this.result.clone();
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("index", index);
+ }
+
+ @Override
+ public ResultNode getResult() {
+ return result.getResult();
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + index;
+ }
+
+ @Override
+ public boolean equalsExpression(ExpressionNode obj) {
+ AggregationRefNode rhs = (AggregationRefNode)obj;
+ if (index != rhs.index) {
+ return false;
+ }
+ if (!equals(result, rhs.result)) {
+ return false;
+ }
+ return true;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/AndFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/AndFunctionNode.java
new file mode 100644
index 00000000000..f54b8fba9ea
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/AndFunctionNode.java
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to perform bitwise AND on the result of all arguments in order.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class AndFunctionNode extends BitFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 67, AndFunctionNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ public void onArgument(final ResultNode arg, IntegerResultNode result) {
+ result.andOp(arg);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ArithmeticTypeConversion.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ArithmeticTypeConversion.java
new file mode 100644
index 00000000000..a8484a1245e
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ArithmeticTypeConversion.java
@@ -0,0 +1,66 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * This class implements a lookup table for result node type conversion.
+ *
+ * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a>
+ */
+public class ArithmeticTypeConversion {
+ private static final Map<Integer, Map<Integer, Integer>> types = new HashMap<Integer, Map<Integer, Integer>>();
+
+ static {
+ add(IntegerResultNode.classId, IntegerResultNode.classId, IntegerResultNode.classId);
+ add(IntegerResultNode.classId, FloatResultNode.classId, FloatResultNode.classId);
+ add(IntegerResultNode.classId, StringResultNode.classId, IntegerResultNode.classId);
+ add(IntegerResultNode.classId, RawResultNode.classId, IntegerResultNode.classId);
+ add(FloatResultNode.classId, IntegerResultNode.classId, FloatResultNode.classId);
+ add(FloatResultNode.classId, FloatResultNode.classId, FloatResultNode.classId);
+ add(FloatResultNode.classId, StringResultNode.classId, FloatResultNode.classId);
+ add(FloatResultNode.classId, RawResultNode.classId, FloatResultNode.classId);
+ add(StringResultNode.classId, IntegerResultNode.classId, IntegerResultNode.classId);
+ add(StringResultNode.classId, FloatResultNode.classId, FloatResultNode.classId);
+ add(StringResultNode.classId, StringResultNode.classId, StringResultNode.classId);
+ add(StringResultNode.classId, RawResultNode.classId, StringResultNode.classId);
+ add(RawResultNode.classId, IntegerResultNode.classId, IntegerResultNode.classId);
+ add(RawResultNode.classId, FloatResultNode.classId, FloatResultNode.classId);
+ add(RawResultNode.classId, StringResultNode.classId, StringResultNode.classId);
+ add(RawResultNode.classId, RawResultNode.classId, RawResultNode.classId);
+ }
+
+ private static void add(int a, int b, int c) {
+ Map<Integer, Integer> entry;
+ if (types.containsKey(a)) {
+ entry = types.get(a);
+ } else {
+ entry = new HashMap<Integer, Integer>();
+ }
+ entry.put(b, c);
+ types.put(a, entry);
+ }
+
+ public static ResultNode getType(ResultNode arg) {
+ return (ResultNode)ResultNode.createFromId(getBaseType(arg));
+ }
+
+ public static ResultNode getType(ResultNode arg1, ResultNode arg2) {
+ return (ResultNode)ResultNode.createFromId(types.get(getBaseType(arg1)).get(getBaseType(arg2)));
+ }
+
+ public static int getBaseType(ResultNode arg) {
+ if (arg instanceof IntegerResultNode) {
+ return IntegerResultNode.classId;
+ } else if (arg instanceof FloatResultNode) {
+ return FloatResultNode.classId;
+ } else if (arg instanceof StringResultNode) {
+ return StringResultNode.classId;
+ } else if (arg instanceof RawResultNode) {
+ return RawResultNode.classId;
+ } else {
+ return ResultNode.classId;
+ }
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ArrayAtLookupNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ArrayAtLookupNode.java
new file mode 100644
index 00000000000..0d005e06326
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ArrayAtLookupNode.java
@@ -0,0 +1,94 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This function is an instruction to retrieve an index a named array attribute.
+ *
+ * @author arnej27959
+ */
+public class ArrayAtLookupNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 38, ArrayAtLookupNode.class);
+ private String attribute;
+
+ /**
+ * Constructs an empty result node.
+ * <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public ArrayAtLookupNode() { }
+
+ /**
+ * Constructs an instance of this class with given attribute name
+ * and index argument.
+ *
+ * @param attribute The attribute to retrieve.
+ * @param arg Expression evaluating to the index argument.
+ */
+ public ArrayAtLookupNode(String attribute, ExpressionNode arg) {
+ setAttributeName(attribute);
+ addArg(arg);
+ }
+
+ /**
+ * Returns the name of the attribute whose value we do index in.
+ *
+ * @return The attribute name.
+ */
+ public String getAttributeName() {
+ return attribute;
+ }
+
+ /**
+ * Sets the name of the attribute whose value we do index in.
+ *
+ * @param attribute The attribute to retrieve.
+ * @return This, to allow chaining.
+ */
+ public ArrayAtLookupNode setAttributeName(String attribute) {
+ if (attribute == null) {
+ throw new IllegalArgumentException("Attribute name can not be null.");
+ }
+ this.attribute = attribute;
+ return this;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ putUtf8(buf, attribute);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ attribute = getUtf8(buf);
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + attribute.hashCode();
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ // "arg" checked by superclass
+ String otherAttr = ((ArrayAtLookupNode)obj).getAttributeName();
+ return attribute.equals(otherAttr);
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("attribute", attribute);
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/AttributeNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/AttributeNode.java
new file mode 100644
index 00000000000..e58bf1e317d
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/AttributeNode.java
@@ -0,0 +1,90 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This function is an instruction to retrieve the value of a named attribute.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class AttributeNode extends FunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 55, AttributeNode.class);
+ private String attribute;
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public AttributeNode() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given attribute name.
+ *
+ * @param attribute The attribute to retrieve.
+ */
+ public AttributeNode(String attribute) {
+ setAttributeName(attribute);
+ }
+
+ /**
+ * Returns the name of the attribute whose value this function is to retrieve.
+ *
+ * @return The attribute name.
+ */
+ public String getAttributeName() {
+ return attribute;
+ }
+
+ /**
+ * Sets the name of the attribute whose value this function is to retrieve.
+ *
+ * @param attribute The attribute to retrieve.
+ * @return This, to allow chaining.
+ */
+ public AttributeNode setAttributeName(String attribute) {
+ if (attribute == null) {
+ throw new IllegalArgumentException("Attribute name can not be null.");
+ }
+ this.attribute = attribute;
+ return this;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ putUtf8(buf, attribute);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ attribute = getUtf8(buf);
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + attribute.hashCode();
+ }
+
+ @Override
+ protected boolean equalsFunction(FunctionNode obj) {
+ return attribute.equals(((AttributeNode)obj).attribute);
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("attribute", attribute);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/BitFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/BitFunctionNode.java
new file mode 100644
index 00000000000..830b74bbb5f
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/BitFunctionNode.java
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This is an abstract super-class for all non-unary functions that operator on bit values.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public abstract class BitFunctionNode extends NumericFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 47, BitFunctionNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onArgument(final ResultNode arg, ResultNode result) {
+ onArgument(arg, (IntegerResultNode)result);
+ }
+
+ @Override
+ protected void onPrepareResult() {
+ setResult(new IntegerResultNode(0));
+ }
+
+ /**
+ * Method for performing onArgument on integers, the only type supported for bit operations.
+ *
+ * @param arg Argument given to the bit function.
+ * @param result Place to store the result.
+ */
+ protected abstract void onArgument(final ResultNode arg, IntegerResultNode result);
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/BucketResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/BucketResultNode.java
new file mode 100644
index 00000000000..0dc7f49a826
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/BucketResultNode.java
@@ -0,0 +1,47 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This is the superclass of all bucket values
+ *
+ * @author <a href="mailto:havardpe@yahoo-inc.com">Haavard Pettersen</a>
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+abstract public class BucketResultNode extends ResultNode {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 100, BucketResultNode.class);
+
+ @Override
+ public long getInteger() {
+ return 0;
+ }
+
+ @Override
+ public double getFloat() {
+ return 0.0;
+ }
+
+ @Override
+ public String getString() {
+ return "";
+ }
+
+ @Override
+ public byte[] getRaw() {
+ return new byte[0];
+ }
+
+ @Override
+ public void set(ResultNode rhs) {
+ }
+
+ /**
+ * Tell if this bucket has zero width. Indicates that is has no value and can be considered a NULL range. An empty
+ * range is used by the backend to represent hits that end in no buckets.
+ *
+ * @return If this bucket has zero width.
+ */
+ public abstract boolean empty();
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/CatFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/CatFunctionNode.java
new file mode 100644
index 00000000000..98c3ba0580e
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/CatFunctionNode.java
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to concatenate the bits of all arguments in order.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class CatFunctionNode extends MultiArgFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 72, CatFunctionNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected boolean equalsMultiArgFunction(MultiArgFunctionNode obj) {
+ return true;
+ }
+
+ @Override
+ protected void onPrepareResult() {
+ setResult(new RawResultNode());
+ }
+
+ @Override
+ protected void onPrepare() {
+ super.onPrepare();
+ }
+
+ @Override
+ protected boolean onExecute() {
+ for (int i = 0; i < getNumArgs(); i++) {
+ getArg(i).execute();
+ ((RawResultNode)getResult()).add(getArg(i).getResult());
+ }
+ return true;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ConstantNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ConstantNode.java
new file mode 100644
index 00000000000..2ba6ee6e1c3
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ConstantNode.java
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This abstract expression node represents a function to execute.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class ConstantNode extends ExpressionNode {
+
+ public static final int classId = registerClass(0x4000 + 49, ConstantNode.class);
+ private ResultNode value = null;
+
+ public ConstantNode() {
+
+ }
+
+ public ConstantNode(ResultNode value) {
+ this.value = value;
+ }
+
+ public ResultNode getValue() {
+ return value;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ serializeOptional(buf, value);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ value = (ResultNode)deserializeOptional(buf);
+ }
+
+ @Override
+ public ConstantNode clone() {
+ ConstantNode obj = (ConstantNode)super.clone();
+ if (value != null) {
+ obj.value = (ResultNode)value.clone();
+ }
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("value", value);
+ }
+
+ @Override
+ protected void onPrepare() {
+
+ }
+
+ @Override
+ protected boolean onExecute() {
+ return true;
+ }
+
+ @Override
+ public ResultNode getResult() {
+ return value;
+ }
+
+ @Override
+ protected boolean equalsExpression(ExpressionNode obj) {
+ return equals(value, ((ConstantNode)obj).value);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/DebugWaitFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/DebugWaitFunctionNode.java
new file mode 100644
index 00000000000..c24e6fa1acd
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/DebugWaitFunctionNode.java
@@ -0,0 +1,104 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This is a debug wait function node that waits for a specified amount of time before executing its expression.
+ *
+ * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a>
+ */
+public class DebugWaitFunctionNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 144, DebugWaitFunctionNode.class);
+ private double waitTime;
+ private boolean busyWait;
+
+ @SuppressWarnings("UnusedDeclaration")
+ public DebugWaitFunctionNode() {
+ // used by deserializer
+ }
+
+ /**
+ * Constructs an instance of this class with given argument and wait parameters.
+ *
+ * @param arg The argument for this function.
+ * @param waitTime The time to wait before executing expression.
+ * @param busyWait true if busy wait, false if not.
+ */
+ public DebugWaitFunctionNode(ExpressionNode arg, double waitTime, boolean busyWait) {
+ addArg(arg);
+ this.waitTime = waitTime;
+ this.busyWait = busyWait;
+ }
+
+ @Override
+ public void onPrepare() {
+ super.onPrepare();
+ }
+
+ @Override
+ public boolean onExecute() {
+ // TODO: Add wait code.
+ double millis = waitTime * 1000.0;
+ long start = System.currentTimeMillis();
+ try {
+ while ((System.currentTimeMillis() - start) < millis) {
+ if (busyWait) {
+ for (int i = 0; i < 1000; i++) {
+ ;
+ }
+ } else {
+ long rem = (long)(millis - (System.currentTimeMillis() - start));
+ Thread.sleep(rem);
+ }
+ }
+ } catch (InterruptedException ie) {
+ // Not critical
+ }
+ getArg().execute();
+ getResult().set(getArg().getResult());
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + (int)waitTime + (busyWait ? 1 : 0);
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ DebugWaitFunctionNode rhs = (DebugWaitFunctionNode)obj;
+ return waitTime == rhs.waitTime && busyWait == rhs.busyWait;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putDouble(null, waitTime);
+ byte tmp = busyWait ? (byte)1 : (byte)0;
+ buf.putByte(null, tmp);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ waitTime = buf.getDouble(null);
+ byte tmp = buf.getByte(null);
+ busyWait = (tmp != 0);
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("waitTime", waitTime);
+ visitor.visit("busyWait", busyWait);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/DivideFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/DivideFunctionNode.java
new file mode 100644
index 00000000000..2a99e9f1edb
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/DivideFunctionNode.java
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to divide the arguments in order.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class DivideFunctionNode extends NumericFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 63, DivideFunctionNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onArgument(final ResultNode arg, ResultNode result) {
+ ((NumericResultNode)result).divide(arg);
+ }
+} \ No newline at end of file
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentAccessorNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentAccessorNode.java
new file mode 100644
index 00000000000..dabbf8d622b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentAccessorNode.java
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This abstract expression node represents a document whose content is accessed depending on the subclass
+ * implementation of this.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public abstract class DocumentAccessorNode extends ExpressionNode {
+
+ public static final int classId = registerClass(0x4000 + 48, FunctionNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentFieldNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentFieldNode.java
new file mode 100644
index 00000000000..c33ef6cd7fd
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentFieldNode.java
@@ -0,0 +1,116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * The node is a request to retrieve the content of a document field.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class DocumentFieldNode extends DocumentAccessorNode {
+
+ public static final int classId = registerClass(0x4000 + 56, DocumentFieldNode.class);
+ private String fieldName;
+ private ResultNode result;
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public DocumentFieldNode() {
+ super();
+ }
+
+ /**
+ * Constructs an instance of this class with given field name.
+ *
+ * @param fieldName The field whose value to retrieve.
+ */
+ public DocumentFieldNode(String fieldName) {
+ super();
+ setDocumentFieldName(fieldName);
+ }
+
+ /**
+ * Returns the name of the field whose value to retrieve.
+ *
+ * @return The field name.
+ */
+ public String getDocumentFieldName() {
+ return fieldName;
+ }
+
+ /**
+ * Sets the name of the field whose value to retrieve.
+ *
+ * @param fieldName The field name to set.
+ * @return This, to allow chaining.
+ */
+ public DocumentFieldNode setDocumentFieldName(String fieldName) {
+ if (fieldName == null) {
+ throw new IllegalArgumentException("Field name can not be null.");
+ }
+ this.fieldName = fieldName;
+ return this;
+ }
+
+ @Override
+ public ResultNode getResult() {
+ return result;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ putUtf8(buf, fieldName);
+ serializeOptional(buf, result);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ fieldName = getUtf8(buf);
+ result = (ResultNode)deserializeOptional(buf);
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + fieldName.hashCode();
+ }
+
+ @Override
+ protected boolean equalsExpression(ExpressionNode obj) {
+ DocumentFieldNode rhs = (DocumentFieldNode)obj;
+ if (!fieldName.equals(rhs.fieldName)) {
+ return false;
+ }
+ if (!equals(result, rhs.result)) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public DocumentFieldNode clone() {
+ DocumentFieldNode obj = (DocumentFieldNode)super.clone();
+ if (result != null) {
+ obj.result = (ResultNode)result.clone();
+ }
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("fieldName", fieldName);
+ visitor.visit("result", result);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ExpressionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ExpressionNode.java
new file mode 100644
index 00000000000..07aa4c8d580
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ExpressionNode.java
@@ -0,0 +1,104 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Identifiable;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.io.Serializable;
+
+/**
+ * This is the base class for all expression node types. There is no execution logic implemented in Java, since that all
+ * happens in the C++ backend. This class hierarchy is for <b>building</b> the expression tree to pass to the backend.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public abstract class ExpressionNode extends Identifiable implements Serializable {
+
+ public static final int classId = registerClass(0x4000 + 40, ExpressionNode.class);
+
+ /**
+ * Prepare expression for execution.
+ */
+ public void prepare() {
+ onPrepare();
+ }
+
+ /**
+ * Execute expression.
+ *
+ * @return true if successful, false if not.
+ */
+ public boolean execute() {
+ return onExecute();
+ }
+
+ /**
+ * Give an argument to this expression and store the result.
+ *
+ * @param arg Argument to use for expression.
+ * @param result Node to contain the result.
+ */
+ protected void executeIterative(final ResultNode arg, ResultNode result) {
+ onArgument(arg, result);
+ }
+
+ protected boolean onExecute() {
+ throw new RuntimeException("Class " + this.getClass().getName() + " does not implement onExecute().");
+ }
+
+ protected void onPrepare() {
+ throw new RuntimeException("Class " + this.getClass().getName() + " does not implement onPrepare().");
+ }
+
+ protected void onArgument(final ResultNode arg, ResultNode result) {
+ throw new RuntimeException("Class " + this.getClass().getName() + " does not implement onArgument().");
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ }
+
+ @Override
+ public ExpressionNode clone() {
+ return (ExpressionNode)super.clone();
+ }
+
+ @Override
+ public final boolean equals(Object obj) {
+ if (!super.equals(obj)) {
+ return false;
+ }
+ if (!equalsExpression((ExpressionNode)obj)) {
+ return false;
+ }
+ return true;
+ }
+
+ protected abstract boolean equalsExpression(ExpressionNode obj);
+
+ /**
+ * Get the result of this expression.
+ *
+ * @return the result as a ResultNode.
+ */
+ abstract public ResultNode getResult();
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionNode.java
new file mode 100644
index 00000000000..5c8a526291d
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionNode.java
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This function assign a fixed width bucket to each input value
+ *
+ * @author <a href="mailto:havardpe@yahoo-inc.com">Haavard Pettersen</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class FixedWidthBucketFunctionNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 77, FixedWidthBucketFunctionNode.class);
+ private NumericResultNode width = null;
+
+ /**
+ * Constructs an empty result node.
+ */
+ public FixedWidthBucketFunctionNode() {
+ // empty
+ }
+
+ /**
+ * Create a bucket expression with the given width and the given subexpression
+ *
+ * @param w bucket width
+ * @param arg The argument for this function.
+ */
+ public FixedWidthBucketFunctionNode(NumericResultNode w, ExpressionNode arg) {
+ addArg(arg);
+ width = w;
+ }
+
+ /**
+ * Obtain the width of this bucket expression
+ *
+ * @return bucket width for this expression
+ */
+ public NumericResultNode getWidth() {
+ return width;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ serializeOptional(buf, width);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ width = (NumericResultNode)deserializeOptional(buf);
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ return equals(width, ((FixedWidthBucketFunctionNode)obj).width);
+ }
+
+ @Override
+ public FixedWidthBucketFunctionNode clone() {
+ FixedWidthBucketFunctionNode obj = (FixedWidthBucketFunctionNode)super.clone();
+ if (width != null) {
+ obj.width = (NumericResultNode)width.clone();
+ }
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("width", width);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNode.java
new file mode 100644
index 00000000000..e5088e27a2e
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNode.java
@@ -0,0 +1,118 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This result holds a float value.
+ *
+ * @author <a href="mailto:havardpe@yahoo-inc.com">Haavard Pettersen</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class FloatBucketResultNode extends BucketResultNode {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 102, FloatBucketResultNode.class);
+
+ // bucket start, inclusive
+ private double from = 0;
+
+ // bucket end, exclusive
+ private double to = 0;
+
+ @Override
+ public boolean empty() {
+ return to == from;
+ }
+
+ /**
+ * Constructs an empty result node.
+ */
+ public FloatBucketResultNode() {
+ // empty
+ }
+
+ /**
+ * Create a bucket with the given limits
+ *
+ * @param from bucket start
+ * @param to bucket end
+ */
+ public FloatBucketResultNode(double from, double to) {
+ this.from = from;
+ this.to = to;
+ }
+
+ /**
+ * Obtain the bucket start
+ *
+ * @return bucket start
+ */
+ public double getFrom() {
+ return from;
+ }
+
+ /**
+ * Obtain the bucket end
+ *
+ * @return bucket end
+ */
+ public double getTo() {
+ return to;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ buf.putDouble(null, from);
+ buf.putDouble(null, to);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ from = buf.getDouble(null);
+ to = buf.getDouble(null);
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ if (classId != rhs.getClassId()) {
+ return (classId - rhs.getClassId());
+ }
+ FloatBucketResultNode b = (FloatBucketResultNode)rhs;
+ double f1 = from;
+ double f2 = b.from;
+ if (f1 < f2) {
+ return -1;
+ } else if (f1 > f2) {
+ return 1;
+ } else {
+ double t1 = to;
+ double t2 = b.to;
+ if (t1 < t2) {
+ return -1;
+ } else if (t1 > t2) {
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + (int)from + (int)to;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("from", from);
+ visitor.visit("to", to);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNodeVector.java
new file mode 100644
index 00000000000..9d6d83ccc5c
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNodeVector.java
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.util.ArrayList;
+
+/**
+ * This result holds nothing.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class FloatBucketResultNodeVector extends ResultNodeVector {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 113, FloatBucketResultNodeVector.class);
+ private ArrayList<FloatBucketResultNode> vector = new ArrayList<FloatBucketResultNode>();
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ public FloatBucketResultNodeVector() {
+ }
+
+ public FloatBucketResultNodeVector add(FloatBucketResultNode v) {
+ vector.add(v);
+ return this;
+ }
+
+ public ResultNodeVector add(ResultNode r) {
+ return add((FloatBucketResultNode)r);
+ }
+
+ public ArrayList<FloatBucketResultNode> getVector() {
+ return vector;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, vector.size());
+ for (FloatBucketResultNode node : vector) {
+ node.serialize(buf);
+ }
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ int sz = buf.getInt(null);
+ vector = new ArrayList<FloatBucketResultNode>();
+ for (int i = 0; i < sz; i++) {
+ FloatBucketResultNode node = new FloatBucketResultNode(0, 0);
+ node.deserialize(buf);
+ vector.add(node);
+ }
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ if (classId != rhs.getClassId()) {
+ return (classId - rhs.getClassId());
+ }
+ FloatBucketResultNodeVector b = (FloatBucketResultNodeVector)rhs;
+ int minLength = vector.size();
+ if (b.vector.size() < minLength) {
+ minLength = b.vector.size();
+ }
+ int diff = 0;
+ for (int i = 0; (diff == 0) && (i < minLength); i++) {
+ diff = vector.get(i).compareTo(b.vector.get(i));
+ }
+ return (diff == 0) ? (vector.size() - b.vector.size()) : diff;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNode.java
new file mode 100644
index 00000000000..6e44f113eed
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNode.java
@@ -0,0 +1,182 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.nio.ByteBuffer;
+
+/**
+ * This result holds a float value.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class FloatResultNode extends NumericResultNode {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 52, FloatResultNode.class);
+ private static FloatResultNode negativeInfinity = new FloatResultNode(Double.NEGATIVE_INFINITY);
+ private static FloatResultNode positiveInfinity = new FloatResultNode(Double.POSITIVE_INFINITY);
+ // The numeric value of this node.
+ private double value;
+
+ /**
+ * Constructs an empty result node.
+ */
+ public FloatResultNode() {
+ super();
+ }
+
+ /**
+ * Constructs an instance of this class with given value.
+ *
+ * @param value The value to assign to this.
+ */
+ public FloatResultNode(double value) {
+ super();
+ setValue(value);
+ }
+
+ /**
+ * Sets the value of this result.
+ *
+ * @param value The value to set.
+ * @return This, to allow chaining.
+ */
+ public FloatResultNode setValue(double value) {
+ this.value = value;
+ return this;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ buf.putDouble(null, value);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ value = buf.getDouble(null);
+ }
+
+ @Override
+ public long getInteger() {
+ return Math.round(value);
+ }
+
+ @Override
+ public double getFloat() {
+ return value;
+ }
+
+ @Override
+ public String getString() {
+ return String.valueOf(value);
+ }
+
+ @Override
+ public byte[] getRaw() {
+ return ByteBuffer.allocate(8).putDouble(value).array();
+ }
+
+ @Override
+ public void add(ResultNode rhs) {
+ value += rhs.getFloat();
+ }
+
+ @Override
+ public void negate() {
+ value = -value;
+ }
+
+ @Override
+ public void multiply(ResultNode rhs) {
+ value *= rhs.getFloat();
+ }
+
+ @Override
+ public void divide(ResultNode rhs) {
+ double val = rhs.getFloat();
+ value = (val == 0.0) ? 0.0 : (value / val);
+ }
+
+ @Override
+ public void modulo(ResultNode rhs) {
+ value %= rhs.getInteger();
+ }
+
+ @Override
+ public void min(ResultNode rhs) {
+ double value = rhs.getFloat();
+ if (value < this.value) {
+ this.value = value;
+ }
+ }
+
+ @Override
+ public void max(ResultNode rhs) {
+ double value = rhs.getFloat();
+ if (value > this.value) {
+ this.value = value;
+ }
+ }
+
+ @Override
+ public Object getNumber() {
+ return new Double(value);
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ double b = rhs.getFloat();
+ if (Double.isNaN(value)) {
+ return Double.isNaN(b) ? 0 : -1;
+ } else {
+ if (Double.isNaN(b)) {
+ return 1;
+ } else {
+ return (value < b) ? -1 : (value > b) ? 1 : 0;
+ }
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + (int)value;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("value", value);
+ }
+
+ @Override
+ public void set(ResultNode rhs) {
+ value = rhs.getFloat();
+ }
+
+ /**
+ * Will provide the smallest possible value
+ *
+ * @return the smallest possible FloatResultNode
+ */
+ public static FloatResultNode getNegativeInfinity() {
+ return negativeInfinity;
+ }
+
+ /**
+ * Will provide the largest possible value
+ *
+ * @return the smallest largest FloatResultNode
+ */
+ public static FloatResultNode getPositiveInfinity() {
+ return positiveInfinity;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNodeVector.java
new file mode 100644
index 00000000000..ae57aeb6a7f
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNodeVector.java
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.util.ArrayList;
+
+/**
+ * This result holds nothing.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class FloatResultNodeVector extends ResultNodeVector {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 110, FloatResultNodeVector.class);
+ private ArrayList<FloatResultNode> vector = new ArrayList<FloatResultNode>();
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ public FloatResultNodeVector() {
+ }
+
+ public FloatResultNodeVector add(FloatResultNode v) {
+ vector.add(v);
+ return this;
+ }
+
+ public ResultNodeVector add(ResultNode r) {
+ return add((FloatResultNode)r);
+ }
+
+ public ArrayList<FloatResultNode> getVector() {
+ return vector;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, vector.size());
+ for (FloatResultNode node : vector) {
+ node.serialize(buf);
+ }
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ int sz = buf.getInt(null);
+ vector = new ArrayList<FloatResultNode>();
+ for (int i = 0; i < sz; i++) {
+ FloatResultNode node = new FloatResultNode(0);
+ node.deserialize(buf);
+ vector.add(node);
+ }
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ if (classId != rhs.getClassId()) {
+ return (classId - rhs.getClassId());
+ }
+ FloatResultNodeVector b = (FloatResultNodeVector)rhs;
+ int minLength = vector.size();
+ if (b.vector.size() < minLength) {
+ minLength = b.vector.size();
+ }
+ int diff = 0;
+ for (int i = 0; (diff == 0) && (i < minLength); i++) {
+ diff = vector.get(i).compareTo(b.vector.get(i));
+ }
+ return (diff == 0) ? (vector.size() - b.vector.size()) : diff;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ForceLoad.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ForceLoad.java
new file mode 100644
index 00000000000..6ebb4c672c8
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ForceLoad.java
@@ -0,0 +1,89 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This file was generated by ../../../../../forceload.sh
+ */
+public class ForceLoad {
+
+ static {
+ String pkg = "com.yahoo.searchlib.expression";
+ String[] classes = {
+ "StringResultNode",
+ "BucketResultNode",
+ "MaxFunctionNode",
+ "FloatResultNode",
+ "RawResultNode",
+ "NegateFunctionNode",
+ "SortFunctionNode",
+ "ReverseFunctionNode",
+ "ToIntFunctionNode",
+ "ToFloatFunctionNode",
+ "MathFunctionNode",
+ "StrLenFunctionNode",
+ "NormalizeSubjectFunctionNode",
+ "StrCatFunctionNode",
+ "ToStringFunctionNode",
+ "NumElemFunctionNode",
+ "CatFunctionNode",
+ "ResultNode",
+ "AddFunctionNode",
+ "DivideFunctionNode",
+ "XorFunctionNode",
+ "MD5BitFunctionNode",
+ "UnaryBitFunctionNode",
+ "AttributeNode",
+ "MinFunctionNode",
+ "BitFunctionNode",
+ "FixedWidthBucketFunctionNode",
+ "RangeBucketPreDefFunctionNode",
+ "GetYMUMChecksumFunctionNode",
+ "DocumentFieldNode",
+ "NullResultNode",
+ "FunctionNode",
+ "ConstantNode",
+ "RawResultNode",
+ "OrFunctionNode",
+ "ExpressionNode",
+ "AggregationRefNode",
+ "IntegerResultNode",
+ "Int32ResultNode",
+ "Int16ResultNode",
+ "Int8ResultNode",
+ "ModuloFunctionNode",
+ "IntegerResultNodeVector",
+ "Int32ResultNodeVector",
+ "Int16ResultNodeVector",
+ "Int8ResultNodeVector",
+ "FloatResultNodeVector",
+ "StringResultNodeVector",
+ "RawResultNodeVector",
+ "ForceLoad",
+ "MultiplyFunctionNode",
+ "IntegerBucketResultNode",
+ "FloatBucketResultNode",
+ "StringBucketResultNode",
+ "RawBucketResultNode",
+ "RawBucketResultNodeVector",
+ "IntegerBucketResultNodeVector",
+ "FloatBucketResultNodeVector",
+ "StringBucketResultNodeVector",
+ "AndFunctionNode",
+ "DocumentAccessorNode",
+ "GetDocIdNamespaceSpecificFunctionNode",
+ "NumericResultNode",
+ "UnaryFunctionNode",
+ "TimeStampFunctionNode",
+ "ZCurveFunctionNode",
+ "XorBitFunctionNode",
+ "MultiArgFunctionNode",
+ "DebugWaitFunctionNode",
+ "ForceLoad"
+ };
+ com.yahoo.system.ForceLoad.forceLoad(pkg, classes);
+ }
+
+ public static boolean forceLoad() {
+ return true;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/FunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/FunctionNode.java
new file mode 100644
index 00000000000..13f7f8e11a2
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/FunctionNode.java
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This abstract expression node represents a function to execute.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public abstract class FunctionNode extends ExpressionNode {
+
+ public static final int classId = registerClass(0x4000 + 42, FunctionNode.class);
+ private ResultNode result = null;
+
+ public FunctionNode setResult(ResultNode res) {
+ this.result = res;
+ return this;
+ }
+
+ @Override
+ public final ResultNode getResult() {
+ return result;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ serializeOptional(buf, result);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ result = (ResultNode)deserializeOptional(buf);
+ }
+
+ @Override
+ public FunctionNode clone() {
+ FunctionNode obj = (FunctionNode)super.clone();
+ if (result != null) {
+ obj.result = (ResultNode)result.clone();
+ }
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("result", result);
+ }
+
+ @Override
+ protected final boolean equalsExpression(ExpressionNode obj) {
+ FunctionNode rhs = (FunctionNode)obj;
+ if (!equals(result, rhs.result)) {
+ return false;
+ }
+ if (!equalsFunction(rhs)) {
+ return false;
+ }
+ return true;
+ }
+
+ protected abstract boolean equalsFunction(FunctionNode obj);
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/GetDocIdNamespaceSpecificFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/GetDocIdNamespaceSpecificFunctionNode.java
new file mode 100644
index 00000000000..1308e668d3b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/GetDocIdNamespaceSpecificFunctionNode.java
@@ -0,0 +1,88 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * The node is a request to retrieve the namespace-specific content of a document id.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class GetDocIdNamespaceSpecificFunctionNode extends DocumentAccessorNode {
+
+ public static final int classId = registerClass(0x4000 + 73, GetDocIdNamespaceSpecificFunctionNode.class);
+ private ResultNode result = null;
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public GetDocIdNamespaceSpecificFunctionNode() {
+ super();
+ }
+
+ /**
+ * Constructs an instance of this class with given result.
+ *
+ * @param result The result to assign to this.
+ */
+ public GetDocIdNamespaceSpecificFunctionNode(ResultNode result) {
+ super();
+ setResult(result);
+ }
+
+ /**
+ * Sets the result of this function.
+ *
+ * @param result The result to set.
+ * @return This, to allow chaining.
+ */
+ public GetDocIdNamespaceSpecificFunctionNode setResult(ResultNode result) {
+ this.result = result;
+ return this;
+ }
+
+ @Override
+ public ResultNode getResult() {
+ return result;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ serializeOptional(buf, result);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ result = (ResultNode)deserializeOptional(buf);
+ }
+
+ @Override
+ public GetDocIdNamespaceSpecificFunctionNode clone() {
+ GetDocIdNamespaceSpecificFunctionNode obj = (GetDocIdNamespaceSpecificFunctionNode)super.clone();
+ if (result != null) {
+ obj.result = (ResultNode)result.clone();
+ }
+ return obj;
+ }
+
+ @Override
+ protected boolean equalsExpression(ExpressionNode obj) {
+ return equals(result, ((GetDocIdNamespaceSpecificFunctionNode)obj).result);
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("result", result);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/GetYMUMChecksumFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/GetYMUMChecksumFunctionNode.java
new file mode 100644
index 00000000000..89b1f477706
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/GetYMUMChecksumFunctionNode.java
@@ -0,0 +1,60 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This node is a request to retrieve the YMUM checksum of a document.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class GetYMUMChecksumFunctionNode extends DocumentAccessorNode {
+
+ public static final int classId = registerClass(0x4000 + 74, GetYMUMChecksumFunctionNode.class);
+ private IntegerResultNode result = new IntegerResultNode(0);
+
+ @Override
+ public ResultNode getResult() {
+ return result;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ result.serialize(buf);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ result.deserialize(buf);
+ }
+
+ @Override
+ public GetYMUMChecksumFunctionNode clone() {
+ GetYMUMChecksumFunctionNode obj = (GetYMUMChecksumFunctionNode)super.clone();
+ if (result != null) {
+ obj.result = (IntegerResultNode)result.clone();
+ }
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("result", result);
+ }
+
+ @Override
+ protected boolean equalsExpression(ExpressionNode obj) {
+ return equals(result, ((GetYMUMChecksumFunctionNode)obj).result);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNode.java
new file mode 100644
index 00000000000..53455fe26ec
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNode.java
@@ -0,0 +1,149 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.nio.ByteBuffer;
+
+/**
+ * This result holds an integer value.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class Int16ResultNode extends NumericResultNode {
+
+ public static final int classId = registerClass(0x4000 + 105, Int16ResultNode.class);
+ private short value = 0;
+
+ @SuppressWarnings("UnusedDeclaration")
+ public Int16ResultNode() {
+ // used by deserializer
+ }
+
+ /**
+ * Constructs an instance of this class with given value.
+ *
+ * @param value The value to assign to this.
+ */
+ public Int16ResultNode(short value) {
+ this.value = value;
+ }
+
+ /**
+ * Sets the value of this result.
+ *
+ * @param value The value to set.
+ * @return This, to allow chaining.
+ */
+ public Int16ResultNode setValue(short value) {
+ this.value = value;
+ return this;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ buf.putShort(null, value);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ value = buf.getShort(null);
+ }
+
+ @Override
+ public long getInteger() {
+ return value;
+ }
+
+ @Override
+ public double getFloat() {
+ return value;
+ }
+
+ @Override
+ public String getString() {
+ return String.valueOf(value);
+ }
+
+ @Override
+ public byte[] getRaw() {
+ return ByteBuffer.allocate(8).putLong(value).array();
+ }
+
+ @Override
+ public void add(ResultNode rhs) {
+ value += rhs.getInteger();
+ }
+
+ @Override
+ public void negate() {
+ value = (short)-value;
+ }
+
+ @Override
+ public void multiply(ResultNode rhs) {
+ value *= rhs.getInteger();
+ }
+
+ @Override
+ public void divide(ResultNode rhs) {
+ short val = (short)rhs.getInteger();
+ value = (short)((val == 0) ? 0 : (value / val));
+ }
+
+ @Override
+ public void modulo(ResultNode rhs) {
+ value %= rhs.getInteger();
+ }
+
+ @Override
+ public void min(ResultNode rhs) {
+ short value = (short)rhs.getInteger();
+ if (value < this.value) {
+ this.value = value;
+ }
+ }
+
+ @Override
+ public void max(ResultNode rhs) {
+ short value = (short)rhs.getInteger();
+ if (value > this.value) {
+ this.value = value;
+ }
+ }
+
+ @Override
+ public Object getNumber() {
+ return new Integer(value);
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ long value = rhs.getInteger();
+ return (this.value < value) ? -1 : (this.value > value) ? 1 : 0;
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + (int)value;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("value", value);
+ }
+
+ @Override
+ public void set(ResultNode rhs) {
+ value = (short)rhs.getInteger();
+ }
+} \ No newline at end of file
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNodeVector.java
new file mode 100644
index 00000000000..7e67f80b5e0
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNodeVector.java
@@ -0,0 +1,79 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.util.ArrayList;
+
+/**
+ * This result holds nothing.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class Int16ResultNodeVector extends ResultNodeVector {
+
+ public static final int classId = registerClass(0x4000 + 117, Int16ResultNodeVector.class);
+ private ArrayList<Int16ResultNode> vector = new ArrayList<Int16ResultNode>();
+
+ public Int16ResultNodeVector() {
+ }
+
+ public Int16ResultNodeVector add(Int16ResultNode v) {
+ vector.add(v);
+ return this;
+ }
+
+ public ArrayList<Int16ResultNode> getVector() {
+ return vector;
+ }
+
+ @Override
+ public ResultNodeVector add(ResultNode r) {
+ return add((Int16ResultNode)r);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, vector.size());
+ for (Int16ResultNode node : vector) {
+ node.serialize(buf);
+ }
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ int sz = buf.getInt(null);
+ vector = new ArrayList<Int16ResultNode>();
+ for (int i = 0; i < sz; i++) {
+ Int16ResultNode node = new Int16ResultNode((short)0);
+ node.deserialize(buf);
+ vector.add(node);
+ }
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ if (classId != rhs.getClassId()) {
+ return (classId - rhs.getClassId());
+ }
+ Int16ResultNodeVector b = (Int16ResultNodeVector)rhs;
+ int minLength = vector.size();
+ if (b.vector.size() < minLength) {
+ minLength = b.vector.size();
+ }
+ int diff = 0;
+ for (int i = 0; (diff == 0) && (i < minLength); i++) {
+ diff = vector.get(i).compareTo(b.vector.get(i));
+ }
+ return (diff == 0) ? (vector.size() - b.vector.size()) : diff;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNode.java
new file mode 100644
index 00000000000..e2acb243714
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNode.java
@@ -0,0 +1,149 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.nio.ByteBuffer;
+
+/**
+ * This result holds an integer value.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class Int32ResultNode extends NumericResultNode {
+
+ public static final int classId = registerClass(0x4000 + 106, Int32ResultNode.class);
+ private int value = 0;
+
+ @SuppressWarnings("UnusedDeclaration")
+ public Int32ResultNode() {
+ // used by deserializer
+ }
+
+ /**
+ * Constructs an instance of this class with given value.
+ *
+ * @param value The value to assign to this.
+ */
+ public Int32ResultNode(int value) {
+ this.value = value;
+ }
+
+ /**
+ * Sets the value of this result.
+ *
+ * @param value The value to set.
+ * @return This, to allow chaining.
+ */
+ public Int32ResultNode setValue(int value) {
+ this.value = value;
+ return this;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ buf.putInt(null, value);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ value = buf.getInt(null);
+ }
+
+ @Override
+ public long getInteger() {
+ return value;
+ }
+
+ @Override
+ public double getFloat() {
+ return value;
+ }
+
+ @Override
+ public String getString() {
+ return String.valueOf(value);
+ }
+
+ @Override
+ public byte[] getRaw() {
+ return ByteBuffer.allocate(8).putLong(value).array();
+ }
+
+ @Override
+ public void add(ResultNode rhs) {
+ value += rhs.getInteger();
+ }
+
+ @Override
+ public void negate() {
+ value = -value;
+ }
+
+ @Override
+ public void multiply(ResultNode rhs) {
+ value *= rhs.getInteger();
+ }
+
+ @Override
+ public void divide(ResultNode rhs) {
+ int val = (int)rhs.getInteger();
+ value = (val == 0) ? 0 : (value / val);
+ }
+
+ @Override
+ public void modulo(ResultNode rhs) {
+ value %= rhs.getInteger();
+ }
+
+ @Override
+ public void min(ResultNode rhs) {
+ int value = (int)rhs.getInteger();
+ if (value < this.value) {
+ this.value = value;
+ }
+ }
+
+ @Override
+ public void max(ResultNode rhs) {
+ int value = (int)rhs.getInteger();
+ if (value > this.value) {
+ this.value = value;
+ }
+ }
+
+ @Override
+ public Object getNumber() {
+ return new Integer(value);
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ long value = rhs.getInteger();
+ return (this.value < value) ? -1 : (this.value > value) ? 1 : 0;
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + value;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("value", value);
+ }
+
+ @Override
+ public void set(ResultNode rhs) {
+ value = (int)rhs.getInteger();
+ }
+} \ No newline at end of file
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNodeVector.java
new file mode 100644
index 00000000000..f9166ac63da
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNodeVector.java
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.util.ArrayList;
+
+/**
+ * This result holds nothing.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class Int32ResultNodeVector extends ResultNodeVector {
+
+ public static final int classId = registerClass(0x4000 + 118, Int32ResultNodeVector.class);
+ private ArrayList<Int32ResultNode> vector = new ArrayList<Int32ResultNode>();
+
+ public Int32ResultNodeVector() {
+
+ }
+
+ public Int32ResultNodeVector add(Int32ResultNode v) {
+ vector.add(v);
+ return this;
+ }
+
+ public ArrayList<Int32ResultNode> getVector() {
+ return vector;
+ }
+
+ @Override
+ public ResultNodeVector add(ResultNode r) {
+ return add((Int32ResultNode)r);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, vector.size());
+ for (Int32ResultNode node : vector) {
+ node.serialize(buf);
+ }
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ int sz = buf.getInt(null);
+ vector = new ArrayList<Int32ResultNode>();
+ for (int i = 0; i < sz; i++) {
+ Int32ResultNode node = new Int32ResultNode(0);
+ node.deserialize(buf);
+ vector.add(node);
+ }
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ if (classId != rhs.getClassId()) {
+ return (classId - rhs.getClassId());
+ }
+ Int32ResultNodeVector b = (Int32ResultNodeVector)rhs;
+ int minLength = vector.size();
+ if (b.vector.size() < minLength) {
+ minLength = b.vector.size();
+ }
+ int diff = 0;
+ for (int i = 0; (diff == 0) && (i < minLength); i++) {
+ diff = vector.get(i).compareTo(b.vector.get(i));
+ }
+ return (diff == 0) ? (vector.size() - b.vector.size()) : diff;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNode.java
new file mode 100644
index 00000000000..dedb2f3ddbc
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNode.java
@@ -0,0 +1,149 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.nio.ByteBuffer;
+
+/**
+ * This result holds an integer value.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class Int8ResultNode extends NumericResultNode {
+
+ public static final int classId = registerClass(0x4000 + 104, Int8ResultNode.class);
+ private byte value = 0;
+
+ @SuppressWarnings("UnusedDeclaration")
+ public Int8ResultNode() {
+ // used by deserializer
+ }
+
+ /**
+ * Constructs an instance of this class with given value.
+ *
+ * @param value The value to assign to this.
+ */
+ public Int8ResultNode(byte value) {
+ this.value = value;
+ }
+
+ /**
+ * Sets the value of this result.
+ *
+ * @param value The value to set.
+ * @return This, to allow chaining.
+ */
+ public Int8ResultNode setValue(byte value) {
+ this.value = value;
+ return this;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ buf.putByte(null, value);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ value = buf.getByte(null);
+ }
+
+ @Override
+ public long getInteger() {
+ return value;
+ }
+
+ @Override
+ public double getFloat() {
+ return value;
+ }
+
+ @Override
+ public String getString() {
+ return String.valueOf(value);
+ }
+
+ @Override
+ public byte[] getRaw() {
+ return ByteBuffer.allocate(8).putLong(value).array();
+ }
+
+ @Override
+ public void add(ResultNode rhs) {
+ value += rhs.getInteger();
+ }
+
+ @Override
+ public void negate() {
+ value = (byte)-value;
+ }
+
+ @Override
+ public void multiply(ResultNode rhs) {
+ value *= rhs.getInteger();
+ }
+
+ @Override
+ public void divide(ResultNode rhs) {
+ int val = (int)rhs.getInteger();
+ value = (byte)((val == 0) ? 0 : (value / val));
+ }
+
+ @Override
+ public void modulo(ResultNode rhs) {
+ value %= rhs.getInteger();
+ }
+
+ @Override
+ public void min(ResultNode rhs) {
+ byte value = (byte)rhs.getInteger();
+ if (value < this.value) {
+ this.value = value;
+ }
+ }
+
+ @Override
+ public void max(ResultNode rhs) {
+ byte value = (byte)rhs.getInteger();
+ if (value > this.value) {
+ this.value = value;
+ }
+ }
+
+ @Override
+ public Object getNumber() {
+ return new Integer(value);
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ long value = rhs.getInteger();
+ return (this.value < value) ? -1 : (this.value > value) ? 1 : 0;
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + (int)value;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("value", value);
+ }
+
+ @Override
+ public void set(ResultNode rhs) {
+ value = (byte)rhs.getInteger();
+ }
+} \ No newline at end of file
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNodeVector.java
new file mode 100644
index 00000000000..da1edfc5a3a
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNodeVector.java
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.util.ArrayList;
+
+/**
+ * This result holds nothing.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class Int8ResultNodeVector extends ResultNodeVector {
+
+ public static final int classId = registerClass(0x4000 + 116, Int8ResultNodeVector.class);
+ private ArrayList<Int8ResultNode> vector = new ArrayList<Int8ResultNode>();
+
+ public Int8ResultNodeVector() {
+
+ }
+
+ public Int8ResultNodeVector add(Int8ResultNode v) {
+ vector.add(v);
+ return this;
+ }
+
+ public ArrayList<Int8ResultNode> getVector() {
+ return vector;
+ }
+
+ @Override
+ public ResultNodeVector add(ResultNode r) {
+ return add((Int8ResultNode)r);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, vector.size());
+ for (Int8ResultNode node : vector) {
+ node.serialize(buf);
+ }
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ int sz = buf.getInt(null);
+ vector = new ArrayList<Int8ResultNode>();
+ for (int i = 0; i < sz; i++) {
+ Int8ResultNode node = new Int8ResultNode((byte)0);
+ node.deserialize(buf);
+ vector.add(node);
+ }
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ if (classId != rhs.getClassId()) {
+ return (classId - rhs.getClassId());
+ }
+ Int8ResultNodeVector b = (Int8ResultNodeVector)rhs;
+ int minLength = vector.size();
+ if (b.vector.size() < minLength) {
+ minLength = b.vector.size();
+ }
+ int diff = 0;
+ for (int i = 0; (diff == 0) && (i < minLength); i++) {
+ diff = vector.get(i).compareTo(b.vector.get(i));
+ }
+ return (diff == 0) ? (vector.size() - b.vector.size()) : diff;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNode.java
new file mode 100644
index 00000000000..08a85375e7c
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNode.java
@@ -0,0 +1,102 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This is an integer bucket value
+ *
+ * @author <a href="mailto:havardpe@yahoo-inc.com">Haavard Pettersen</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class IntegerBucketResultNode extends BucketResultNode {
+
+ public static final int classId = registerClass(0x4000 + 101, IntegerBucketResultNode.class);
+ private long from = 0; // bucket start, inclusive
+ private long to = 0; // bucket end, exclusive
+
+ /**
+ * Constructs an empty result node.
+ */
+ public IntegerBucketResultNode() {
+ // empty
+ }
+
+ /**
+ * Create a bucket with the given limits
+ *
+ * @param from bucket start
+ * @param to bucket end
+ */
+ public IntegerBucketResultNode(long from, long to) {
+ this.from = from;
+ this.to = to;
+ }
+
+ /**
+ * Obtain the bucket start
+ *
+ * @return bucket start
+ */
+ public long getFrom() {
+ return from;
+ }
+
+ /**
+ * Obtain the bucket end
+ *
+ * @return bucket end
+ */
+ public long getTo() {
+ return to;
+ }
+
+ @Override
+ public boolean empty() {
+ return to == from;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ buf.putLong(null, from);
+ buf.putLong(null, to);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ from = buf.getLong(null);
+ to = buf.getLong(null);
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ if (classId != rhs.getClassId()) {
+ return (classId - rhs.getClassId());
+ }
+ IntegerBucketResultNode b = (IntegerBucketResultNode)rhs;
+ long diff = from - b.from;
+ if (diff == 0) {
+ diff = to - b.to;
+ }
+ return ((diff == 0) ? 0 : ((diff < 0) ? -1 : 1));
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + (int)from + (int)to;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("from", from);
+ visitor.visit("to", to);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeVector.java
new file mode 100644
index 00000000000..1ea639bd67f
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeVector.java
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.util.ArrayList;
+
+/**
+ * This result holds nothing.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class IntegerBucketResultNodeVector extends ResultNodeVector {
+
+ public static final int classId = registerClass(0x4000 + 112, IntegerBucketResultNodeVector.class);
+ private ArrayList<IntegerBucketResultNode> vector = new ArrayList<IntegerBucketResultNode>();
+
+ public IntegerBucketResultNodeVector() {
+
+ }
+
+ public IntegerBucketResultNodeVector add(IntegerBucketResultNode v) {
+ vector.add(v);
+ return this;
+ }
+
+ public ArrayList<IntegerBucketResultNode> getVector() {
+ return vector;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ public ResultNodeVector add(ResultNode r) {
+ return add((IntegerBucketResultNode)r);
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, vector.size());
+ for (IntegerBucketResultNode node : vector) {
+ node.serialize(buf);
+ }
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ int sz = buf.getInt(null);
+ vector = new ArrayList<IntegerBucketResultNode>();
+ for (int i = 0; i < sz; i++) {
+ IntegerBucketResultNode node = new IntegerBucketResultNode(0, 0);
+ node.deserialize(buf);
+ vector.add(node);
+ }
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ if (classId != rhs.getClassId()) {
+ return (classId - rhs.getClassId());
+ }
+ IntegerBucketResultNodeVector b = (IntegerBucketResultNodeVector)rhs;
+ int minLength = vector.size();
+ if (b.vector.size() < minLength) {
+ minLength = b.vector.size();
+ }
+ int diff = 0;
+ for (int i = 0; (diff == 0) && (i < minLength); i++) {
+ diff = vector.get(i).compareTo(b.vector.get(i));
+ }
+ return (diff == 0) ? (vector.size() - b.vector.size()) : diff;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNode.java
new file mode 100644
index 00000000000..4ca5dfc4139
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNode.java
@@ -0,0 +1,183 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.nio.ByteBuffer;
+
+/**
+ * This result holds an integer value.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class IntegerResultNode extends NumericResultNode {
+
+ public static final int classId = registerClass(0x4000 + 107, IntegerResultNode.class);
+ private static IntegerResultNode negativeInfinity = new IntegerResultNode(Long.MIN_VALUE);
+ private static IntegerResultNode positiveInfinity = new IntegerResultNode(Long.MAX_VALUE);
+ private long value;
+
+ /**
+ * Constructs an empty result node.
+ */
+ public IntegerResultNode() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given value.
+ *
+ * @param value The value to assign to this.
+ */
+ public IntegerResultNode(long value) {
+ setValue(value);
+ }
+
+ /**
+ * Sets the value of this result.
+ *
+ * @param value The value to set.
+ * @return This, to allow chaining.
+ */
+ public IntegerResultNode setValue(long value) {
+ this.value = value;
+ return this;
+ }
+
+ void andOp(final ResultNode b) {
+ value &= b.getInteger();
+ }
+
+ void orOp(final ResultNode b) {
+ value |= b.getInteger();
+ }
+
+ void xorOp(final ResultNode b) {
+ value ^= b.getInteger();
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ buf.putLong(null, value);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ value = buf.getLong(null);
+ }
+
+ @Override
+ public long getInteger() {
+ return value;
+ }
+
+ @Override
+ public double getFloat() {
+ return value;
+ }
+
+ @Override
+ public String getString() {
+ return String.valueOf(value);
+ }
+
+ @Override
+ public byte[] getRaw() {
+ return ByteBuffer.allocate(8).putLong(value).array();
+ }
+
+ @Override
+ public void add(ResultNode rhs) {
+ value += rhs.getInteger();
+ }
+
+ @Override
+ public void negate() {
+ value = -value;
+ }
+
+ @Override
+ public void multiply(ResultNode rhs) {
+ value *= rhs.getInteger();
+ }
+
+ @Override
+ public void divide(ResultNode rhs) {
+ long val = rhs.getInteger();
+ value = (val == 0) ? 0 : (value / val);
+ }
+
+ @Override
+ public void modulo(ResultNode rhs) {
+ value %= rhs.getInteger();
+ }
+
+ @Override
+ public void min(ResultNode rhs) {
+ long value = rhs.getInteger();
+ if (value < this.value) {
+ this.value = value;
+ }
+ }
+
+ @Override
+ public void max(ResultNode rhs) {
+ long value = rhs.getInteger();
+ if (value > this.value) {
+ this.value = value;
+ }
+ }
+
+ @Override
+ public Object getNumber() {
+ return new Long(value);
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ long value = rhs.getInteger();
+ return (this.value < value) ? -1 : (this.value > value) ? 1 : 0;
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + (int)value;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("value", value);
+ }
+
+ @Override
+ public void set(ResultNode rhs) {
+ value = rhs.getInteger();
+ }
+
+ /**
+ * Will provide the smallest possible value
+ *
+ * @return the smallest possible IntegerResultNode
+ */
+ public static IntegerResultNode getNegativeInfinity() {
+ return negativeInfinity;
+ }
+
+ /**
+ * Will provide the largest possible value
+ *
+ * @return the smallest largest IntegerResultNode
+ */
+ public static IntegerResultNode getPositiveInfinity() {
+ return positiveInfinity;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNodeVector.java
new file mode 100644
index 00000000000..ac55a4e7d8b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNodeVector.java
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.util.ArrayList;
+
+/**
+ * This result holds nothing.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class IntegerResultNodeVector extends ResultNodeVector {
+
+ public static final int classId = registerClass(0x4000 + 119, IntegerResultNodeVector.class);
+ private ArrayList<IntegerResultNode> vector = new ArrayList<IntegerResultNode>();
+
+ public IntegerResultNodeVector() {
+
+ }
+
+ public IntegerResultNodeVector add(IntegerResultNode v) {
+ vector.add(v);
+ return this;
+ }
+
+ public ArrayList<IntegerResultNode> getVector() {
+ return vector;
+ }
+
+ @Override
+ public ResultNodeVector add(ResultNode r) {
+ return add((IntegerResultNode)r);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, vector.size());
+ for (IntegerResultNode node : vector) {
+ node.serialize(buf);
+ }
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ int sz = buf.getInt(null);
+ vector = new ArrayList<IntegerResultNode>();
+ for (int i = 0; i < sz; i++) {
+ IntegerResultNode node = new IntegerResultNode(0);
+ node.deserialize(buf);
+ vector.add(node);
+ }
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ if (classId != rhs.getClassId()) {
+ return (classId - rhs.getClassId());
+ }
+ IntegerResultNodeVector b = (IntegerResultNodeVector)rhs;
+ int minLength = vector.size();
+ if (b.vector.size() < minLength) {
+ minLength = b.vector.size();
+ }
+ int diff = 0;
+ for (int i = 0; (diff == 0) && (i < minLength); i++) {
+ diff = vector.get(i).compareTo(b.vector.get(i));
+ }
+ return (diff == 0) ? (vector.size() - b.vector.size()) : diff;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/InterpolatedLookupNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/InterpolatedLookupNode.java
new file mode 100644
index 00000000000..6bd9e10a75a
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/InterpolatedLookupNode.java
@@ -0,0 +1,94 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This function is an instruction to retrieve the value of a named attribute.
+ *
+ * @author arnej27959
+ */
+public class InterpolatedLookupNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 39, InterpolatedLookupNode.class);
+ private String attribute;
+
+ /**
+ * Constructs an empty result node.
+ * <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public InterpolatedLookupNode() { }
+
+ /**
+ * Constructs an instance of this class with given attribute name
+ * and lookup argument.
+ *
+ * @param attribute The attribute to retrieve.
+ * @param arg Expression evaluating to the lookup argument.
+ */
+ public InterpolatedLookupNode(String attribute, ExpressionNode arg) {
+ setAttributeName(attribute);
+ addArg(arg);
+ }
+
+ /**
+ * Returns the name of the attribute whose value we do lookup in.
+ *
+ * @return The attribute name.
+ */
+ public String getAttributeName() {
+ return attribute;
+ }
+
+ /**
+ * Sets the name of the attribute whose value we do lookup in.
+ *
+ * @param attribute The attribute to retrieve.
+ * @return This, to allow chaining.
+ */
+ public InterpolatedLookupNode setAttributeName(String attribute) {
+ if (attribute == null) {
+ throw new IllegalArgumentException("Attribute name can not be null.");
+ }
+ this.attribute = attribute;
+ return this;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ putUtf8(buf, attribute);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ attribute = getUtf8(buf);
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + attribute.hashCode();
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ // "arg" checked by superclass
+ String otherAttr = ((InterpolatedLookupNode)obj).getAttributeName();
+ return attribute.equals(otherAttr);
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("attribute", attribute);
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/MD5BitFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/MD5BitFunctionNode.java
new file mode 100644
index 00000000000..64c81072714
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/MD5BitFunctionNode.java
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is a request to calculate the MD5 of the result of its argument.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class MD5BitFunctionNode extends UnaryBitFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 70, MD5BitFunctionNode.class);
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public MD5BitFunctionNode() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given argument and number of bits.
+ *
+ * @param arg The argument for this function.
+ * @param numBits The number of bits to operate on.
+ */
+ public MD5BitFunctionNode(ExpressionNode arg, int numBits) {
+ super(arg, numBits);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/MathFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/MathFunctionNode.java
new file mode 100644
index 00000000000..0d82b6a260e
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/MathFunctionNode.java
@@ -0,0 +1,185 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This function is an instruction to negate its argument.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class MathFunctionNode extends MultiArgFunctionNode {
+
+ // Make sure these match the definition in c++ searchlib/src/searchlib/expression/mathfunctionnode.h.
+ public static enum Function {
+ EXP(0),
+ POW(1),
+ LOG(2),
+ LOG1P(3),
+ LOG10(4),
+ SIN(5),
+ ASIN(6),
+ COS(7),
+ ACOS(8),
+ TAN(9),
+ ATAN(10),
+ SQRT(11),
+ SINH(12),
+ ASINH(13),
+ COSH(14),
+ ACOSH(15),
+ TANH(16),
+ ATANH(17),
+ CBRT(18),
+ HYPOT(19),
+ FLOOR(20);
+
+ private final int id;
+
+ private Function(int id) {
+ this.id = id;
+ }
+
+ private static Function valueOf(int id) {
+ for (Function fnc : values()) {
+ if (id == fnc.id) {
+ return fnc;
+ }
+ }
+ return null;
+ }
+ }
+
+ public static final int classId = registerClass(0x4000 + 136, MathFunctionNode.class);
+ private Function fnc;
+
+ @SuppressWarnings("UnusedDeclaration")
+ public MathFunctionNode() {
+ this(Function.LOG);
+ }
+
+ public MathFunctionNode(Function fnc) {
+ this(null, fnc);
+ }
+
+ public MathFunctionNode(ExpressionNode exp, Function fnc) {
+ this.fnc = fnc;
+ if (exp != null) {
+ addArg(exp);
+ }
+ }
+
+ @Override
+ protected boolean onExecute() {
+ getArg(0).execute();
+ double result = 0.0;
+ switch (fnc) {
+ case EXP:
+ result = Math.exp(getArg(0).getResult().getFloat());
+ break;
+ case POW:
+ result = Math.pow(getArg(0).getResult().getFloat(), getArg(1).getResult().getFloat());
+ break;
+ case LOG:
+ result = Math.log(getArg(0).getResult().getFloat());
+ break;
+ case LOG1P:
+ result = Math.log1p(getArg(0).getResult().getFloat());
+ break;
+ case LOG10:
+ result = Math.log10(getArg(0).getResult().getFloat());
+ break;
+ case SIN:
+ result = Math.sin(getArg(0).getResult().getFloat());
+ break;
+ case ASIN:
+ result = Math.asin(getArg(0).getResult().getFloat());
+ break;
+ case COS:
+ result = Math.cos(getArg(0).getResult().getFloat());
+ break;
+ case ACOS:
+ result = Math.acos(getArg(0).getResult().getFloat());
+ break;
+ case TAN:
+ result = Math.tan(getArg(0).getResult().getFloat());
+ break;
+ case ATAN:
+ result = Math.atan(getArg(0).getResult().getFloat());
+ break;
+ case SQRT:
+ result = Math.sqrt(getArg(0).getResult().getFloat());
+ break;
+ case SINH:
+ result = Math.sinh(getArg(0).getResult().getFloat());
+ break;
+ case ASINH:
+ throw new IllegalArgumentException("Inverse hyperbolic sine(asinh) is not supported in java");
+ case COSH:
+ result = Math.cosh(getArg(0).getResult().getFloat());
+ break;
+ case ACOSH:
+ throw new IllegalArgumentException("Inverse hyperbolic cosine (acosh) is not supported in java");
+ case TANH:
+ result = Math.tanh(getArg(0).getResult().getFloat());
+ break;
+ case ATANH:
+ throw new IllegalArgumentException("Inverse hyperbolic tangents (atanh) is not supported in java");
+ case FLOOR:
+ result = Math.floor(getArg(0).getResult().getFloat());
+ break;
+ case CBRT:
+ result = Math.cbrt(getArg(0).getResult().getFloat());
+ break;
+ case HYPOT:
+ result = Math.hypot(getArg(0).getResult().getFloat(), getArg(1).getResult().getFloat());
+ break;
+ }
+ ((FloatResultNode)getResult()).setValue(result);
+ return true;
+ }
+
+ @Override
+ public void onPrepareResult() {
+ setResult(new FloatResultNode());
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putByte(null, (byte)fnc.id);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ int b = buf.getByte(null);
+ fnc = Function.valueOf(b & 0xff);
+ }
+
+ @Override
+ protected boolean equalsMultiArgFunction(MultiArgFunctionNode obj) {
+ return fnc == ((MathFunctionNode)obj).fnc;
+ }
+
+ @Override
+ public MathFunctionNode clone() {
+ MathFunctionNode obj = (MathFunctionNode)super.clone();
+ obj.fnc = fnc;
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("function", fnc);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/MaxFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/MaxFunctionNode.java
new file mode 100644
index 00000000000..8496f88eb1c
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/MaxFunctionNode.java
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to return the maximum value of all its arguments.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class MaxFunctionNode extends NumericFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 66, MaxFunctionNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onArgument(final ResultNode arg, ResultNode result) {
+ ((NumericResultNode)result).max(arg);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/MinFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/MinFunctionNode.java
new file mode 100644
index 00000000000..f7c18077791
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/MinFunctionNode.java
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to return the minimum value of all its arguments.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class MinFunctionNode extends NumericFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 65, MinFunctionNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onArgument(final ResultNode arg, ResultNode result) {
+ ((NumericResultNode)result).min(arg);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ModuloFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ModuloFunctionNode.java
new file mode 100644
index 00000000000..a2c919b1d4d
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ModuloFunctionNode.java
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to modulo the arguments in order.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class ModuloFunctionNode extends NumericFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 64, ModuloFunctionNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onArgument(final ResultNode arg, ResultNode result) {
+ ((NumericResultNode)result).modulo(arg);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/MultiArgFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/MultiArgFunctionNode.java
new file mode 100644
index 00000000000..4f201e98bfb
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/MultiArgFunctionNode.java
@@ -0,0 +1,176 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.*;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * <p>This is an abstract super-class for all functions that accepts multiple arguments. This node implements the
+ * necessary API for manipulating arguments.</p>
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public abstract class MultiArgFunctionNode extends FunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 45, MultiArgFunctionNode.class);
+ private List<ExpressionNode> args = new ArrayList<ExpressionNode>();
+
+ /**
+ * <p>Adds the given argument to this function.</p>
+ *
+ * @param arg The argument to add.
+ * @return This, to allow chaining.
+ */
+ public MultiArgFunctionNode addArg(ExpressionNode arg) {
+ arg.getClass(); // throws NullPointerException
+ args.add(arg);
+ return this;
+ }
+
+ /**
+ * <p>Returns the argument at the given index.</p>
+ *
+ * @param i The index of the argument to return.
+ * @return The argument.
+ */
+ public ExpressionNode getArg(int i) {
+ return args.get(i);
+ }
+
+ /**
+ * <p>Returns the number of arguments this function has.</p>
+ *
+ * @return The size of the argument list.
+ */
+ public int getNumArgs() {
+ return args.size();
+ }
+
+ @Override
+ protected boolean onExecute() {
+ for (int i = 0; i < args.size(); i++) {
+ args.get(i).execute();
+ }
+ return calculate(args, getResult());
+ }
+
+ @Override
+ protected void onPrepare() {
+ for (int i = 0; i < args.size(); i++) {
+ args.get(i).prepare();
+ }
+ prepareResult();
+ }
+
+ /**
+ * <p>Perform the appropriate calculation of the arguments into a result node.</p>
+ *
+ * @param args A list of operands.
+ * @param result Place to put the result.
+ * @return True if successful, false if not.
+ */
+ private boolean calculate(final List<ExpressionNode> args, ResultNode result) {
+ return onCalculate(args, result);
+ }
+
+ private void prepareResult() {
+ onPrepareResult();
+ }
+
+ protected boolean onCalculate(final List<ExpressionNode> args, ResultNode result) {
+ result.set(args.get(0).getResult());
+ for (int i = 1; i < args.size(); i++) {
+ executeIterative(args.get(i).getResult(), result);
+ }
+ return true;
+ }
+
+ protected void onPrepareResult() {
+ if (args.size() == 1) {
+ setResult(ArithmeticTypeConversion.getType(args.get(0).getResult()));
+ } else if (args.size() > 1) {
+ setResult((ResultNode)args.get(0).getResult().clone());
+ for (int i = 1; i < args.size(); i++) {
+ if (args.get(i).getResult() != null) {
+ setResult(ArithmeticTypeConversion.getType(getResult(), args.get(i).getResult()));
+ }
+ }
+ }
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ int numArgs = args.size();
+ buf.putInt(null, numArgs);
+ for (ExpressionNode node : args) {
+ serializeOptional(buf, node); // TODO: Not optional.
+ }
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ args.clear();
+ int numArgs = buf.getInt(null);
+ for (int i = 0; i < numArgs; i++) {
+ ExpressionNode node = (ExpressionNode)deserializeOptional(buf); // TODO: Not optional.
+ args.add(node);
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ int ret = super.hashCode();
+ for (ExpressionNode node : args) {
+ ret += node.hashCode();
+ }
+ return ret;
+ }
+
+ @Override
+ protected final boolean equalsFunction(FunctionNode obj) {
+ MultiArgFunctionNode rhs = (MultiArgFunctionNode)obj;
+ if (!args.equals(rhs.args)) {
+ return false;
+ }
+ if (!equalsMultiArgFunction(rhs)) {
+ return false;
+ }
+ return true;
+ }
+
+ protected abstract boolean equalsMultiArgFunction(MultiArgFunctionNode obj);
+
+ @Override
+ public MultiArgFunctionNode clone() {
+ MultiArgFunctionNode obj = (MultiArgFunctionNode)super.clone();
+ obj.args = new ArrayList<ExpressionNode>();
+ for (ExpressionNode node : args) {
+ obj.args.add(node.clone());
+ }
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("args", args);
+ }
+
+ @Override
+ public void selectMembers(ObjectPredicate predicate, ObjectOperation operation) {
+ super.selectMembers(predicate, operation);
+ for (ExpressionNode arg : args) {
+ arg.select(predicate, operation);
+ }
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/MultiplyFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/MultiplyFunctionNode.java
new file mode 100644
index 00000000000..b55e86ba5fe
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/MultiplyFunctionNode.java
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to multiply all arguments.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class MultiplyFunctionNode extends NumericFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 62, MultiplyFunctionNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onArgument(final ResultNode arg, ResultNode result) {
+ ((NumericResultNode)result).multiply(arg);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/NegateFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/NegateFunctionNode.java
new file mode 100644
index 00000000000..0fdf07d6291
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/NegateFunctionNode.java
@@ -0,0 +1,52 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to negate its argument.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class NegateFunctionNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 60, NegateFunctionNode.class);
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public NegateFunctionNode() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given argument.
+ *
+ * @param arg The argument for this function.
+ */
+ public NegateFunctionNode(ExpressionNode arg) {
+ addArg(arg);
+ }
+
+ @Override
+ public void onPrepare() {
+ super.onPrepare();
+ }
+
+ @Override
+ public boolean onExecute() {
+ getArg().execute();
+ getResult().set(getArg().getResult());
+ getResult().negate();
+ return true;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ return true;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/NormalizeSubjectFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/NormalizeSubjectFunctionNode.java
new file mode 100644
index 00000000000..dd24c1f9efe
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/NormalizeSubjectFunctionNode.java
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to negate its argument.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class NormalizeSubjectFunctionNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 143, NormalizeSubjectFunctionNode.class);
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public NormalizeSubjectFunctionNode() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given argument.
+ *
+ * @param arg The argument for this function.
+ */
+ public NormalizeSubjectFunctionNode(ExpressionNode arg) {
+ addArg(arg);
+ }
+
+ @Override
+ public void onPrepareResult() {
+ setResult(new StringResultNode());
+ }
+
+ @Override
+ public void onPrepare() {
+ super.onPrepare();
+ }
+
+ @Override
+ public boolean onExecute() {
+ String result = getArg().getResult().getString();
+
+ if (result.startsWith("Re: ") || result.startsWith("RE: ") || result.startsWith("Fw: ") ||
+ result.startsWith("FW: "))
+ {
+ result = result.substring(4);
+ } else if (result.startsWith("Fwd: ")) {
+ result = result.substring(5);
+ }
+
+ ((StringResultNode)getResult()).setValue(result);
+ return true;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ return true;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/NullResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/NullResultNode.java
new file mode 100644
index 00000000000..bc66e0d1899
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/NullResultNode.java
@@ -0,0 +1,56 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.ObjectVisitor;
+
+/**
+ * This result holds nothing.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class NullResultNode extends ResultNode {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 57, NullResultNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ public long getInteger() {
+ return 0;
+ }
+
+ @Override
+ public double getFloat() {
+ return 0.0;
+ }
+
+ @Override
+ public String getString() {
+ return "";
+ }
+
+ @Override
+ public byte[] getRaw() {
+ return new byte[0];
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ return classId - rhs.getClassId();
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("result", null);
+ }
+
+ @Override
+ public void set(ResultNode rhs) {
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/NumElemFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/NumElemFunctionNode.java
new file mode 100644
index 00000000000..f949dc67936
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/NumElemFunctionNode.java
@@ -0,0 +1,50 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to negate its argument.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class NumElemFunctionNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 132, NumElemFunctionNode.class);
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public NumElemFunctionNode() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given argument.
+ *
+ * @param arg The argument for this function.
+ */
+ public NumElemFunctionNode(ExpressionNode arg) {
+ addArg(arg);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ public void onPrepareResult() {
+ setResult(new IntegerResultNode(1));
+ }
+
+ @Override
+ public boolean onExecute() {
+ getArg().execute();
+ return true;
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ return true;
+ }
+} \ No newline at end of file
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/NumericFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/NumericFunctionNode.java
new file mode 100644
index 00000000000..a3312313733
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/NumericFunctionNode.java
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This is an abstract class for all functions that perform arithmetics. This node implements the necessary API for
+ * doing arithmetic operations.
+ *
+ * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a>
+ */
+public abstract class NumericFunctionNode extends MultiArgFunctionNode {
+
+ @Override
+ public void onPrepare() {
+ super.onPrepare();
+
+ ResultNode result = getResult();
+ if (!(result instanceof IntegerResultNode) &&
+ !(result instanceof FloatResultNode) &&
+ !(result instanceof StringResultNode) &&
+ !(result instanceof RawResultNode))
+ {
+ throw new RuntimeException("Can not perform numeric function on value of type '" +
+ getResult().getClass().getName() + "'.");
+ }
+ }
+
+ @Override
+ protected final boolean equalsMultiArgFunction(MultiArgFunctionNode obj) {
+ return true;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/NumericResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/NumericResultNode.java
new file mode 100644
index 00000000000..70a5cdcaf98
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/NumericResultNode.java
@@ -0,0 +1,52 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This is a superclass for all numerical results.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+abstract public class NumericResultNode extends SingleResultNode {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 50, NumericResultNode.class);
+
+ /**
+ * In-place multiplication of this result with another.
+ *
+ * @param rhs The result to multiply with this.
+ */
+ public abstract void multiply(ResultNode rhs);
+
+ /**
+ * In-place division of this result with another.
+ *
+ * @param rhs The result to divide this by.
+ */
+ public abstract void divide(ResultNode rhs);
+
+ /**
+ * In-place modulo of this result with another.
+ *
+ * @param rhs The result to modulo this with.
+ */
+ public abstract void modulo(ResultNode rhs);
+
+ /**
+ * Return a java numeric, either Double or Long, depending on the underlying container.
+ *
+ * @return The underlying numeric value.
+ */
+ public abstract Object getNumber();
+
+ @Override
+ public Object getValue() {
+ return getNumber();
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/OrFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/OrFunctionNode.java
new file mode 100644
index 00000000000..6f34f261543
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/OrFunctionNode.java
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to perform bitwise OR on the result of all arguments.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class OrFunctionNode extends BitFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 68, OrFunctionNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ public void onArgument(final ResultNode arg, IntegerResultNode result) {
+ result.orOp(arg);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/PositiveInfinityResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/PositiveInfinityResultNode.java
new file mode 100644
index 00000000000..a72d9d41318
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/PositiveInfinityResultNode.java
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ */
+public class PositiveInfinityResultNode extends ResultNode {
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 124, PositiveInfinityResultNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ public long getInteger() {
+ return Long.MAX_VALUE;
+ }
+
+ @Override
+ public double getFloat() {
+ return Double.MAX_VALUE;
+ }
+
+ @Override
+ public byte[] getRaw() {
+ return new byte[0];
+ }
+
+ @Override
+ public String getString() {
+ return "";
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ return rhs instanceof PositiveInfinityResultNode ? 0 : 1;
+ }
+
+ @Override
+ public void set(ResultNode rhs) {
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionNode.java
new file mode 100644
index 00000000000..dab0221fcb5
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionNode.java
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This function assign a fixed width bucket to each input value
+ *
+ * @author <a href="mailto:havardpe@yahoo-inc.com">Haavard Pettersen</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class RangeBucketPreDefFunctionNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 76, RangeBucketPreDefFunctionNode.class);
+ private ResultNodeVector predef = null;
+
+ /**
+ * Constructs an empty result node.
+ */
+ public RangeBucketPreDefFunctionNode() {
+ // empty
+ }
+
+ /**
+ * Create a bucket expression with the given width and the given subexpression
+ *
+ * @param v predefined bucket list
+ * @param arg The argument for this function.
+ */
+ public RangeBucketPreDefFunctionNode(ResultNodeVector v, ExpressionNode arg) {
+ addArg(arg);
+ predef = v;
+ }
+
+ /**
+ * Obtain the predefined bucket list of this bucket expression
+ *
+ * @return predefined bucket list for this expression
+ */
+ public ResultNodeVector getBucketList() {
+ return predef;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ serializeOptional(buf, predef);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ predef = (ResultNodeVector)deserializeOptional(buf);
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ return equals(predef, ((RangeBucketPreDefFunctionNode)obj).predef);
+ }
+
+ @Override
+ public RangeBucketPreDefFunctionNode clone() {
+ RangeBucketPreDefFunctionNode obj = (RangeBucketPreDefFunctionNode)super.clone();
+ if (predef != null) {
+ obj.predef = (ResultNodeVector)predef.clone();
+ }
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("predef", predef);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNode.java
new file mode 100644
index 00000000000..eef386735a1
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNode.java
@@ -0,0 +1,101 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ */
+public class RawBucketResultNode extends BucketResultNode {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 125, RawBucketResultNode.class);
+
+ // bucket start, inclusive
+ private ResultNode from = RawResultNode.getNegativeInfinity();
+
+ // bucket end, exclusive
+ private ResultNode to = RawResultNode.getNegativeInfinity();
+
+ @Override
+ public boolean empty() {
+ return to.equals(from);
+ }
+
+ /**
+ * Constructs an empty result node.
+ */
+ public RawBucketResultNode() {
+ // empty
+ }
+
+ /**
+ * Create a bucket with the given limits
+ *
+ * @param from bucket start
+ * @param to bucket end
+ */
+ public RawBucketResultNode(ResultNode from, ResultNode to) {
+ this.from = from;
+ this.to = to;
+ }
+
+ /**
+ * Obtain the bucket start
+ *
+ * @return bucket start
+ */
+ public byte[] getFrom() {
+ return from.getRaw();
+ }
+
+ /**
+ * Obtain the bucket end
+ *
+ * @return bucket end
+ */
+ public byte[] getTo() {
+ return to.getRaw();
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ serializeOptional(buf, from);
+ serializeOptional(buf, to);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ from = (ResultNode)deserializeOptional(buf);
+ to = (ResultNode)deserializeOptional(buf);
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ if (classId != rhs.getClassId()) {
+ return (classId - rhs.getClassId());
+ }
+ RawBucketResultNode b = (RawBucketResultNode)rhs;
+ int diff = from.compareTo(b.from);
+ return (diff == 0) ? to.compareTo(b.to) : diff;
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + from.hashCode() + to.hashCode();
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("from", from);
+ visitor.visit("to", to);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNodeVector.java
new file mode 100644
index 00000000000..caed1de4134
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNodeVector.java
@@ -0,0 +1,75 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.util.ArrayList;
+
+/**
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ */
+public class RawBucketResultNodeVector extends ResultNodeVector {
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 126, RawBucketResultNodeVector.class);
+ private ArrayList<RawBucketResultNode> vector = new ArrayList<RawBucketResultNode>();
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ public RawBucketResultNodeVector() {
+ }
+
+ public RawBucketResultNodeVector add(RawBucketResultNode v) {
+ vector.add(v);
+ return this;
+ }
+
+ public ResultNodeVector add(ResultNode r) {
+ return add((RawBucketResultNode)r);
+ }
+
+ public ArrayList<RawBucketResultNode> getVector() {
+ return vector;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, vector.size());
+ for (RawBucketResultNode node : vector) {
+ node.serialize(buf);
+ }
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ int sz = buf.getInt(null);
+ vector = new ArrayList<RawBucketResultNode>();
+ for (int i = 0; i < sz; i++) {
+ RawBucketResultNode node = new RawBucketResultNode();
+ node.deserialize(buf);
+ vector.add(node);
+ }
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ if (classId != rhs.getClassId()) {
+ return (classId - rhs.getClassId());
+ }
+ RawBucketResultNodeVector b = (RawBucketResultNodeVector)rhs;
+ int minLength = vector.size();
+ if (b.vector.size() < minLength) {
+ minLength = b.vector.size();
+ }
+ int diff = 0;
+ for (int i = 0; (diff == 0) && (i < minLength); i++) {
+ diff = vector.get(i).compareTo(b.vector.get(i));
+ }
+ return (diff == 0) ? (vector.size() - b.vector.size()) : diff;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNode.java
new file mode 100644
index 00000000000..ad40fc5026f
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNode.java
@@ -0,0 +1,184 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.searchlib.aggregation.RawData;
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.util.Arrays;
+
+/**
+ * This result holds a byte array value.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class RawResultNode extends SingleResultNode {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 54, RawResultNode.class);
+ private static RawResultNode negativeInfinity = new RawResultNode();
+ private static PositiveInfinityResultNode positiveInfinity = new PositiveInfinityResultNode();
+
+ // The raw value of this node.
+ private RawData value = null;
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public RawResultNode() {
+ super();
+ value = new RawData();
+ }
+
+ /**
+ * Constructs an instance of this class with given byte buffer.
+ *
+ * @param value The value to assign to this.
+ */
+ public RawResultNode(byte[] value) {
+ super();
+ setValue(value);
+ }
+
+ /**
+ * Sets the value of this result.
+ *
+ * @param value The value to set.
+ * @return This, to allow chaining.
+ */
+ public RawResultNode setValue(byte[] value) {
+ this.value = new RawData(value);
+ return this;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ value.serialize(buf);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ value = new RawData();
+ value.deserialize(buf);
+ }
+
+ @Override
+ public long getInteger() {
+ return 0;
+ }
+
+ @Override
+ public double getFloat() {
+ return 0;
+ }
+
+ @Override
+ public String getString() {
+ return new String(value.getData());
+ }
+
+ @Override
+ public byte[] getRaw() {
+ return value.getData();
+ }
+
+ @Override
+ public String toString() {
+ if (value != null) {
+ return Arrays.toString(value.getData());
+ }
+ return "[]";
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ return (rhs instanceof PositiveInfinityResultNode)
+ ? -1
+ : RawData.compare(value.getData(), rhs.getRaw());
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + value.hashCode();
+ }
+
+ @Override
+ public RawResultNode clone() {
+ RawResultNode obj = (RawResultNode)super.clone();
+ if (value != null) {
+ obj.value = (RawData)value.clone();
+ }
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("value", value);
+ }
+
+ public void add(ResultNode rhs) {
+ byte[] nb = new byte[value.getData().length + rhs.getRaw().length];
+ System.arraycopy(value.getData(), 0, nb, 0, value.getData().length);
+ System.arraycopy(rhs.getRaw(), 0, nb, value.getData().length, rhs.getRaw().length);
+ value = new RawData(nb);
+ }
+
+ public void min(ResultNode rhs) {
+ RawData b = new RawData(rhs.getRaw());
+ if (value.compareTo(b) > 0) {
+ value = b;
+ }
+ }
+
+ public void max(ResultNode rhs) {
+ RawData b = new RawData(rhs.getRaw());
+ if (value.compareTo(b) < 0) {
+ value = b;
+ }
+ }
+
+ @Override
+ public Object getValue() {
+ return getString();
+ }
+
+ @Override
+ public void set(ResultNode rhs) {
+ value = new RawData(rhs.getRaw());
+ }
+
+ @Override
+ public void negate() {
+ byte[] data = value.getData();
+ for (int i = 0; i < data.length; i++) {
+ data[i] = (byte)-data[i];
+ }
+ }
+
+ /**
+ * Will provide the smallest possible value
+ *
+ * @return the smallest possible IntegerResultNode
+ */
+ public static RawResultNode getNegativeInfinity() {
+ return negativeInfinity;
+ }
+
+ /**
+ * Will provide the largest possible value
+ *
+ * @return the smallest largest IntegerResultNode
+ */
+ public static PositiveInfinityResultNode getPositiveInfinity() {
+ return positiveInfinity;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNodeVector.java
new file mode 100644
index 00000000000..dc791b7ce69
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNodeVector.java
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.util.ArrayList;
+
+/**
+ * This result holds nothing.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class RawResultNodeVector extends ResultNodeVector {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 115, RawResultNodeVector.class);
+ private ArrayList<RawResultNode> vector = new ArrayList<RawResultNode>();
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ public RawResultNodeVector() {
+ }
+
+ public RawResultNodeVector add(RawResultNode v) {
+ vector.add(v);
+ return this;
+ }
+
+ public ResultNodeVector add(ResultNode r) {
+ return add((RawResultNode)r);
+ }
+
+ public ArrayList<RawResultNode> getVector() {
+ return vector;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, vector.size());
+ for (RawResultNode node : vector) {
+ node.serialize(buf);
+ }
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ int sz = buf.getInt(null);
+ vector = new ArrayList<RawResultNode>();
+ for (int i = 0; i < sz; i++) {
+ RawResultNode node = new RawResultNode();
+ node.deserialize(buf);
+ vector.add(node);
+ }
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ if (classId != rhs.getClassId()) {
+ return (classId - rhs.getClassId());
+ }
+ RawResultNodeVector b = (RawResultNodeVector)rhs;
+ int minLength = vector.size();
+ if (b.vector.size() < minLength) {
+ minLength = b.vector.size();
+ }
+ int diff = 0;
+ for (int i = 0; (diff == 0) && (i < minLength); i++) {
+ diff = vector.get(i).compareTo(b.vector.get(i));
+ }
+ return (diff == 0) ? (vector.size() - b.vector.size()) : diff;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/RelevanceNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/RelevanceNode.java
new file mode 100644
index 00000000000..90077238925
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/RelevanceNode.java
@@ -0,0 +1,72 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This abstract expression node represents a function to execute.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class RelevanceNode extends ExpressionNode {
+
+ public static final int classId = registerClass(0x4000 + 59, RelevanceNode.class);
+ private FloatResultNode relevance = new FloatResultNode();
+
+ public RelevanceNode() {
+
+ }
+
+ @Override
+ public void onPrepare() {
+
+ }
+
+ @Override
+ public boolean onExecute() {
+ return true;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ relevance.serialize(buf);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ relevance.deserialize(buf);
+ }
+
+ @Override
+ public RelevanceNode clone() {
+ RelevanceNode obj = (RelevanceNode)super.clone();
+ obj.relevance = (FloatResultNode)relevance.clone();
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("relevance", relevance);
+ }
+
+ @Override
+ public ResultNode getResult() {
+ return relevance;
+ }
+
+ @Override
+ protected boolean equalsExpression(ExpressionNode obj) {
+ return relevance.equals(((RelevanceNode)obj).relevance);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNode.java
new file mode 100644
index 00000000000..7a31e1598f6
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNode.java
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Identifiable;
+
+/**
+ * This abstract expression node represents the result value of execution.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public abstract class ResultNode extends Identifiable implements Comparable<ResultNode> {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 41, ResultNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ public final int compareTo(ResultNode b) {
+ return onCmp(b);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return obj instanceof ResultNode && compareTo((ResultNode)obj) == 0;
+ }
+
+ /**
+ * This method must be implemented by all subclasses of this to allow new results to be calculated.
+ *
+ * @param rhs The node to get the result from.
+ */
+ protected abstract void set(ResultNode rhs);
+
+ /**
+ * This method must be implemented by all subclasses of this to allow ordering of results. This method is used by
+ * the {@link Cloneable} implementation.
+ *
+ * @param rhs The other node to compare with.
+ * @return Comparable result.
+ */
+ protected abstract int onCmp(ResultNode rhs);
+
+ /**
+ * Returns the integer representation of this result.
+ *
+ * @return The value of this.
+ */
+ public abstract long getInteger();
+
+ /**
+ * Returns the float representation of this result.
+ *
+ * @return The value of this.
+ */
+ public abstract double getFloat();
+
+ /**
+ * Returns the string representation of this result.
+ *
+ * @return The value of this.
+ */
+ public abstract String getString();
+
+ /**
+ * Returns the raw byte array representation of this result.
+ *
+ * @return The value of this.
+ */
+ public abstract byte[] getRaw();
+
+ /**
+ * Negate the value contained within the result node.
+ */
+ public void negate() {
+ throw new RuntimeException("Class " + getClass().getName() + " does not implement negate");
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNodeVector.java
new file mode 100644
index 00000000000..e6d2818e39d
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNodeVector.java
@@ -0,0 +1,45 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This result holds nothing.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public abstract class ResultNodeVector extends ResultNode {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 108, ResultNodeVector.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ public long getInteger() {
+ return 0;
+ }
+
+ @Override
+ public double getFloat() {
+ return 0.0;
+ }
+
+ @Override
+ public String getString() {
+ return "";
+ }
+
+ @Override
+ public byte[] getRaw() {
+ return new byte[0];
+ }
+
+ @Override
+ public void set(ResultNode rhs) {
+ }
+
+ public abstract ResultNodeVector add(ResultNode r);
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ReverseFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ReverseFunctionNode.java
new file mode 100644
index 00000000000..7aa9cd92163
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ReverseFunctionNode.java
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This class will revert the order on any multivalues. Nothing is done to single value types such as integers, float,
+ * strings and Raw values.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ */
+public class ReverseFunctionNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 138, ReverseFunctionNode.class);
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public ReverseFunctionNode() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given argument.
+ *
+ * @param arg The argument for this function.
+ */
+ public ReverseFunctionNode(ExpressionNode arg) {
+ addArg(arg);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ return true;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/SingleResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/SingleResultNode.java
new file mode 100644
index 00000000000..2c9b940cbf0
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/SingleResultNode.java
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ */
+public abstract class SingleResultNode extends ResultNode {
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 121, NumericResultNode.class);
+
+ /**
+ * In-place addition of this result with another.
+ *
+ * @param rhs The result to add to this.
+ */
+ public abstract void add(ResultNode rhs);
+
+ /**
+ * Swaps the numerical value of this node with the smaller of this and the other.
+ *
+ * @param rhs The other result to evaluate.
+ */
+ public abstract void min(ResultNode rhs);
+
+ /**
+ * Swaps the numerical value of this node with the larger of this and the other.
+ *
+ * @param rhs The other result to evaluate.
+ */
+ public abstract void max(ResultNode rhs);
+
+ /**
+ * Return a java native, either String, Double or Long, depending on the underlying container.
+ *
+ * @return The underlying numeric value.
+ */
+ public abstract Object getValue();
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/SortFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/SortFunctionNode.java
new file mode 100644
index 00000000000..0b0f1e1ed5b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/SortFunctionNode.java
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ */
+public class SortFunctionNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 137, SortFunctionNode.class);
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public SortFunctionNode() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given argument.
+ *
+ * @param arg The argument for this function.
+ */
+ public SortFunctionNode(ExpressionNode arg) {
+ addArg(arg);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ return true;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/StrCatFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/StrCatFunctionNode.java
new file mode 100644
index 00000000000..de748394ca3
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/StrCatFunctionNode.java
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to concatenate the bits of all arguments in order.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class StrCatFunctionNode extends MultiArgFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 133, StrCatFunctionNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected boolean equalsMultiArgFunction(MultiArgFunctionNode obj) {
+ return true;
+ }
+
+ @Override
+ protected void onPrepareResult() {
+ setResult(new StringResultNode());
+ }
+
+ @Override
+ protected void onPrepare() {
+ super.onPrepare();
+ }
+
+ @Override
+ protected boolean onExecute() {
+ for (int i = 0; i < getNumArgs(); i++) {
+ getArg(i).execute();
+ ((StringResultNode)getResult()).append(getArg(i).getResult());
+ }
+ return true;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/StrLenFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/StrLenFunctionNode.java
new file mode 100644
index 00000000000..dbec8903177
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/StrLenFunctionNode.java
@@ -0,0 +1,55 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to negate its argument.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class StrLenFunctionNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 130, StrLenFunctionNode.class);
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public StrLenFunctionNode() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given argument.
+ *
+ * @param arg The argument for this function.
+ */
+ public StrLenFunctionNode(ExpressionNode arg) {
+ addArg(arg);
+ }
+
+ @Override
+ public void onPrepareResult() {
+ setResult(new IntegerResultNode(0));
+ }
+
+ @Override
+ public void onPrepare() {
+ super.onPrepare();
+ }
+
+ @Override
+ public boolean onExecute() {
+ ((IntegerResultNode)getResult()).setValue(getArg().getResult().getString().length());
+ return true;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ return true;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNode.java
new file mode 100644
index 00000000000..d830cb0f2c4
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNode.java
@@ -0,0 +1,114 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This is an integer bucket value
+ *
+ * @author <a href="mailto:havardpe@yahoo-inc.com">Haavard Pettersen</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class StringBucketResultNode extends BucketResultNode {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 103, StringBucketResultNode.class);
+
+ // bucket start, inclusive
+ private ResultNode from = StringResultNode.getNegativeInfinity();
+
+ // bucket end, exclusive
+ private ResultNode to = StringResultNode.getNegativeInfinity();
+
+ @Override
+ public boolean empty() {
+ return to.equals(from);
+ }
+
+ /**
+ * Constructs an empty result node.
+ */
+ public StringBucketResultNode() {
+ // empty
+ }
+
+ /**
+ * Create a bucket with the given limits
+ *
+ * @param from bucket start
+ * @param to bucket end
+ */
+ public StringBucketResultNode(ResultNode from, ResultNode to) {
+ this.from = from;
+ this.to = to;
+ }
+
+ /**
+ * Create a bucket with the given limits
+ *
+ * @param from bucket start
+ * @param to bucket end
+ */
+ public StringBucketResultNode(String from, String to) {
+ this(new StringResultNode(from), new StringResultNode(to));
+ }
+
+ /**
+ * Obtain the bucket start
+ *
+ * @return bucket start
+ */
+ public String getFrom() {
+ return from.getString();
+ }
+
+ /**
+ * Obtain the bucket end
+ *
+ * @return bucket end
+ */
+ public String getTo() {
+ return to.getString();
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ serializeOptional(buf, from);
+ serializeOptional(buf, to);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ from = (ResultNode)deserializeOptional(buf);
+ to = (ResultNode)deserializeOptional(buf);
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ if (classId != rhs.getClassId()) {
+ return (classId - rhs.getClassId());
+ }
+ StringBucketResultNode b = (StringBucketResultNode)rhs;
+ int diff = from.compareTo(b.from);
+ return (diff == 0) ? to.compareTo(b.to) : diff;
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + from.hashCode() + to.hashCode();
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("from", from);
+ visitor.visit("to", to);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNodeVector.java
new file mode 100644
index 00000000000..89570c702ec
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNodeVector.java
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.util.ArrayList;
+
+/**
+ * This result holds nothing.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class StringBucketResultNodeVector extends ResultNodeVector {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 114, StringBucketResultNodeVector.class);
+ private ArrayList<StringBucketResultNode> vector = new ArrayList<StringBucketResultNode>();
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ public StringBucketResultNodeVector() {
+ }
+
+ public StringBucketResultNodeVector add(StringBucketResultNode v) {
+ vector.add(v);
+ return this;
+ }
+
+ public ResultNodeVector add(ResultNode r) {
+ return add((StringBucketResultNode)r);
+ }
+
+ public ArrayList<StringBucketResultNode> getVector() {
+ return vector;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, vector.size());
+ for (StringBucketResultNode node : vector) {
+ node.serialize(buf);
+ }
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ int sz = buf.getInt(null);
+ vector = new ArrayList<StringBucketResultNode>();
+ for (int i = 0; i < sz; i++) {
+ StringBucketResultNode node = new StringBucketResultNode();
+ node.deserialize(buf);
+ vector.add(node);
+ }
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ if (classId != rhs.getClassId()) {
+ return (classId - rhs.getClassId());
+ }
+ StringBucketResultNodeVector b = (StringBucketResultNodeVector)rhs;
+ int minLength = vector.size();
+ if (b.vector.size() < minLength) {
+ minLength = b.vector.size();
+ }
+ int diff = 0;
+ for (int i = 0; (diff == 0) && (i < minLength); i++) {
+ diff = vector.get(i).compareTo(b.vector.get(i));
+ }
+ return (diff == 0) ? (vector.size() - b.vector.size()) : diff;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNode.java
new file mode 100644
index 00000000000..f428e2aef9f
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNode.java
@@ -0,0 +1,177 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.text.Utf8;
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This result holds a string.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class StringResultNode extends SingleResultNode {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 53, StringResultNode.class);
+ private static StringResultNode negativeInfinity = new StringResultNode("");
+ private static PositiveInfinityResultNode positiveInfinity = new PositiveInfinityResultNode();
+
+ // The string value of this node.
+ private String value;
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public StringResultNode() {
+ super();
+ value = "";
+ }
+
+ /**
+ * Constructs an instance of this class with given value.
+ *
+ * @param value The value to assign to this.
+ */
+ public StringResultNode(String value) {
+ super();
+ setValue(value);
+ }
+
+ /**
+ * Sets the value of this result.
+ *
+ * @param value The value to set.
+ * @return This, to allow chaining.
+ */
+ public StringResultNode setValue(String value) {
+ if (value == null) {
+ throw new IllegalArgumentException("Value can not be null.");
+ }
+ this.value = value;
+ return this;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ byte[] raw = getRaw();
+ buf.putInt(null, raw.length);
+ buf.put(null, raw);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ value = getUtf8(buf);
+ }
+
+ @Override
+ public long getInteger() {
+ try {
+ return Integer.valueOf(value);
+ } catch (java.lang.NumberFormatException e) {
+ return 0;
+ }
+ }
+
+ @Override
+ public double getFloat() {
+ try {
+ return Double.valueOf(value);
+ } catch (java.lang.NumberFormatException e) {
+ return 0;
+ }
+ }
+
+ @Override
+ public String getString() {
+ return value;
+ }
+
+ @Override
+ public byte[] getRaw() {
+ return Utf8.toBytes(value);
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ return (rhs instanceof PositiveInfinityResultNode)
+ ? -1
+ : value.compareTo(rhs.getString());
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + value.hashCode();
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("value", value);
+ }
+
+ public void add(ResultNode rhs) {
+ value += rhs.getString();
+ }
+
+ public void min(ResultNode rhs) {
+ if (value.compareTo(rhs.getString()) > 0) {
+ value = rhs.getString();
+ }
+ }
+
+ public void max(ResultNode rhs) {
+ if (value.compareTo(rhs.getString()) < 0) {
+ value = rhs.getString();
+ }
+ }
+
+ public void append(ResultNode rhs) {
+ value += rhs.getString();
+ }
+
+ @Override
+ public Object getValue() {
+ return getString();
+ }
+
+ @Override
+ public void set(ResultNode rhs) {
+ value = rhs.getString();
+ }
+
+ @Override
+ public void negate() {
+ char a[] = value.toCharArray();
+ for (int i = 0; i < a.length; i++) {
+ a[i] = (char)-a[i];
+ }
+ value = new String(a);
+ }
+
+ /**
+ * Will provide the smallest possible value
+ *
+ * @return the smallest possible IntegerResultNode
+ */
+ public static StringResultNode getNegativeInfinity() {
+ return negativeInfinity;
+ }
+
+ /**
+ * Will provide the largest possible value
+ *
+ * @return the smallest largest IntegerResultNode
+ */
+ public static PositiveInfinityResultNode getPositiveInfinity() {
+ return positiveInfinity;
+ }
+}
+
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNodeVector.java
new file mode 100644
index 00000000000..ba172f5db01
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNodeVector.java
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.Serializer;
+
+import java.util.ArrayList;
+
+/**
+ * This result holds nothing.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class StringResultNodeVector extends ResultNodeVector {
+
+ // The global class identifier shared with C++.
+ public static final int classId = registerClass(0x4000 + 111, StringResultNodeVector.class);
+ private ArrayList<StringResultNode> vector = new ArrayList<StringResultNode>();
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ public StringResultNodeVector() {
+ }
+
+ public StringResultNodeVector add(StringResultNode v) {
+ vector.add(v);
+ return this;
+ }
+
+ public ResultNodeVector add(ResultNode r) {
+ return add((StringResultNode)r);
+ }
+
+ public ArrayList<StringResultNode> getVector() {
+ return vector;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, vector.size());
+ for (StringResultNode node : vector) {
+ node.serialize(buf);
+ }
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ int sz = buf.getInt(null);
+ vector = new ArrayList<StringResultNode>();
+ for (int i = 0; i < sz; i++) {
+ StringResultNode node = new StringResultNode();
+ node.deserialize(buf);
+ vector.add(node);
+ }
+ }
+
+ @Override
+ protected int onCmp(ResultNode rhs) {
+ if (classId != rhs.getClassId()) {
+ return (classId - rhs.getClassId());
+ }
+ StringResultNodeVector b = (StringResultNodeVector)rhs;
+ int minLength = vector.size();
+ if (b.vector.size() < minLength) {
+ minLength = b.vector.size();
+ }
+ int diff = 0;
+ for (int i = 0; (diff == 0) && (i < minLength); i++) {
+ diff = vector.get(i).compareTo(b.vector.get(i));
+ }
+ return (diff == 0) ? (vector.size() - b.vector.size()) : diff;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/TimeStampFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/TimeStampFunctionNode.java
new file mode 100644
index 00000000000..b84fa124841
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/TimeStampFunctionNode.java
@@ -0,0 +1,116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * <p>This function assign a fixed width bucket to each input value.</p>
+ *
+ * @author <a href="mailto:havardpe@yahoo-inc.com">Haavard Pettersen</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class TimeStampFunctionNode extends UnaryFunctionNode {
+
+ public static enum TimePart {
+ Year(0),
+ Month(1),
+ MonthDay(2),
+ WeekDay(3),
+ Hour(4),
+ Minute(5),
+ Second(6),
+ YearDay(7),
+ IsDST(8);
+
+ private final int id;
+
+ private TimePart(int id) {
+ this.id = id;
+ }
+
+ private static TimePart valueOf(int id) {
+ for (TimePart part : values()) {
+ if (id == part.id) {
+ return part;
+ }
+ }
+ return null;
+ }
+ }
+
+ public static final int classId = registerClass(0x4000 + 75, TimeStampFunctionNode.class);
+ private TimePart timePart = TimePart.Year;
+ private boolean isGmt = false;
+
+ @SuppressWarnings("UnusedDeclaration")
+ public TimeStampFunctionNode() {
+ // used by deserializer
+ }
+
+ /**
+ * <p>Create a bucket expression with the given width and the given subexpression.</p>
+ *
+ * @param arg The argument for this function.
+ * @param part The part of time to retrieve.
+ * @param gmt Whether or not to treat time as GMT.
+ */
+ public TimeStampFunctionNode(ExpressionNode arg, TimePart part, boolean gmt) {
+ addArg(arg);
+ timePart = part;
+ isGmt = gmt;
+ }
+
+ public TimePart getTimePart() {
+ return timePart;
+ }
+
+ public boolean isGmt() {
+ return isGmt;
+ }
+
+ public boolean isLocal() {
+ return !isGmt;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putByte(null, (byte)(timePart.id | (isGmt ? 0x80 : 0)));
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ int b = buf.getByte(null);
+ timePart = TimePart.valueOf(b & 0x7f);
+ isGmt = (b & 0x80) != 0;
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ TimeStampFunctionNode rhs = (TimeStampFunctionNode)obj;
+ return timePart == rhs.timePart && isGmt == rhs.isGmt;
+ }
+
+ @Override
+ public TimeStampFunctionNode clone() {
+ TimeStampFunctionNode obj = (TimeStampFunctionNode)super.clone();
+ obj.timePart = timePart;
+ obj.isGmt = isGmt;
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("timepart", timePart);
+ visitor.visit("islocal", isGmt);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ToFloatFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToFloatFunctionNode.java
new file mode 100644
index 00000000000..4511797d3dd
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToFloatFunctionNode.java
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to negate its argument.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class ToFloatFunctionNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 134, ToFloatFunctionNode.class);
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public ToFloatFunctionNode() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given argument.
+ *
+ * @param arg The argument for this function.
+ */
+ public ToFloatFunctionNode(ExpressionNode arg) {
+ addArg(arg);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ return true;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ToIntFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToIntFunctionNode.java
new file mode 100644
index 00000000000..8ff20216374
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToIntFunctionNode.java
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to negate its argument.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class ToIntFunctionNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 135, ToIntFunctionNode.class);
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public ToIntFunctionNode() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given argument.
+ *
+ * @param arg The argument for this function.
+ */
+ public ToIntFunctionNode(ExpressionNode arg) {
+ addArg(arg);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ public void onPrepareResult() {
+ setResult(new IntegerResultNode());
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ return true;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ToRawFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToRawFunctionNode.java
new file mode 100644
index 00000000000..0ee1fd1cb71
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToRawFunctionNode.java
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function converts its argument to a raw function node.
+ *
+ * @author <a href="mailto:lulf@yahoo-inc.com">Ulf Lilleengen</a>
+ */
+public class ToRawFunctionNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 141, ToRawFunctionNode.class);
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public ToRawFunctionNode() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given argument.
+ *
+ * @param arg The argument for this function.
+ */
+ public ToRawFunctionNode(ExpressionNode arg) {
+ addArg(arg);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ return true;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ToStringFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToStringFunctionNode.java
new file mode 100644
index 00000000000..490d19ad9a8
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToStringFunctionNode.java
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to negate its argument.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class ToStringFunctionNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 131, ToStringFunctionNode.class);
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public ToStringFunctionNode() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given argument.
+ *
+ * @param arg The argument for this function.
+ */
+ public ToStringFunctionNode(ExpressionNode arg) {
+ addArg(arg);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ public void onPrepareResult() {
+ setResult(new StringResultNode());
+ }
+
+ @Override
+ public boolean onExecute() {
+ getArg().execute();
+ ((StringResultNode)getResult()).setValue(getArg().getResult().getString());
+ return true;
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ return true;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/UcaFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/UcaFunctionNode.java
new file mode 100644
index 00000000000..233023d1a2e
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/UcaFunctionNode.java
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This function is a request to use the Unicode Collation Algorithm specification when sorting this field.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ */
+public class UcaFunctionNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 140, UcaFunctionNode.class);
+ private String locale = "en-US";
+ private String strength = "TERTIARY";
+
+ /**
+ * Constructs an empty result node.
+ */
+ public UcaFunctionNode() {
+ // empty
+ }
+
+ /**
+ * Create an UCA node with a specific locale.
+ *
+ * @param arg The argument for this function.
+ * @param locale The locale to use.
+ */
+ public UcaFunctionNode(ExpressionNode arg, String locale) {
+ this(arg, locale, "TERTIARY");
+ }
+
+ /**
+ * Create an UCA node with a specific locale and strength setting.
+ *
+ * @param arg The argument for this function.
+ * @param locale The locale to use.
+ * @param strength The strength setting to use.
+ */
+ public UcaFunctionNode(ExpressionNode arg, String locale, String strength) {
+ addArg(arg);
+ this.locale = locale;
+ this.strength = strength;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ putUtf8(buf, locale);
+ putUtf8(buf, strength);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ locale = getUtf8(buf);
+ strength = getUtf8(buf);
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ return true;
+ }
+
+ @Override
+ public UcaFunctionNode clone() {
+ return (UcaFunctionNode)super.clone();
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("locale", locale);
+ visitor.visit("strength", strength);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryBitFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryBitFunctionNode.java
new file mode 100644
index 00000000000..05afc5d99b9
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryBitFunctionNode.java
@@ -0,0 +1,89 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This is an abstract super-class for all unary functions that operator on bit values.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public abstract class UnaryBitFunctionNode extends UnaryFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 46, UnaryBitFunctionNode.class);
+ private int numBits = 0;
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public UnaryBitFunctionNode() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given argument and number of bits.
+ *
+ * @param arg The argument for this function.
+ * @param numBits The number of bits to operate on.
+ */
+ public UnaryBitFunctionNode(ExpressionNode arg, int numBits) {
+ addArg(arg);
+ setNumBits(numBits);
+ }
+
+ /**
+ * Returns the number of bits to operate on.
+ *
+ * @return The number of bits.
+ */
+ public final int getNumBits() {
+ return numBits;
+ }
+
+ /**
+ * Sets the number of bits to operate on.
+ *
+ * @param numBits The number of bits.
+ * @return This, to allow chaining.
+ */
+ public UnaryBitFunctionNode setNumBits(int numBits) {
+ this.numBits = numBits;
+ return this;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putInt(null, numBits);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ numBits = buf.getInt(null);
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() + numBits;
+ }
+
+ @Override
+ protected final boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ return numBits == ((UnaryBitFunctionNode)obj).numBits;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("numBits", numBits);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryFunctionNode.java
new file mode 100644
index 00000000000..84264f47ef4
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryFunctionNode.java
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This is an abstract super-class for all functions that accept only a single argument.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public abstract class UnaryFunctionNode extends MultiArgFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 43, UnaryFunctionNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ /**
+ * Return the single argument given to this function.
+ *
+ * @return The argument to this function
+ */
+ public ExpressionNode getArg() {
+ return getArg(0);
+ }
+
+ @Override
+ public void onPrepareResult() {
+ setResult((ResultNode)getArg().getResult().clone());
+ }
+
+ @Override
+ public void onPrepare() {
+ super.onPrepare();
+ }
+
+ @Override
+ protected final boolean equalsMultiArgFunction(MultiArgFunctionNode obj) {
+ return equalsUnaryFunction((UnaryFunctionNode)obj);
+ }
+
+ protected abstract boolean equalsUnaryFunction(UnaryFunctionNode obj);
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/XorBitFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/XorBitFunctionNode.java
new file mode 100644
index 00000000000..57fa01c97de
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/XorBitFunctionNode.java
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is a request to bitwise XOR the result of its first argument with itself in chunks of the second
+ * argument number of bits. If the result to XOR is a 24 bit value, and the second argument is 8, this function will XOR
+ * the first 8 bits of the result with the next 8 bits of the result, and then XOR that number with the next 8 bits of
+ * the result.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class XorBitFunctionNode extends UnaryBitFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 71, XorBitFunctionNode.class);
+
+ /**
+ * Constructs an empty result node. <b>NOTE:</b> This instance is broken until non-optional member data is set.
+ */
+ public XorBitFunctionNode() {
+
+ }
+
+ /**
+ * Constructs an instance of this class with given argument and number of bits.
+ *
+ * @param arg The argument for this function.
+ * @param numBits The number of bits to operate on.
+ */
+ public XorBitFunctionNode(ExpressionNode arg, int numBits) {
+ super(arg, numBits);
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/XorFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/XorFunctionNode.java
new file mode 100644
index 00000000000..036d7fc8f16
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/XorFunctionNode.java
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+/**
+ * This function is an instruction to perform bitwise XOR on the result of all arguments in order.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class XorFunctionNode extends BitFunctionNode {
+
+ public static final int classId = registerClass(0x4000 + 69, XorFunctionNode.class);
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ public void onArgument(final ResultNode arg, IntegerResultNode result) {
+ result.xorOp(arg);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ZCurveFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ZCurveFunctionNode.java
new file mode 100644
index 00000000000..54e86f8353c
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ZCurveFunctionNode.java
@@ -0,0 +1,92 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.Deserializer;
+import com.yahoo.vespa.objects.ObjectVisitor;
+import com.yahoo.vespa.objects.Serializer;
+
+/**
+ * This function decompose two-dimensonal zcurve values into x and y values.
+ *
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ */
+public class ZCurveFunctionNode extends UnaryFunctionNode {
+
+ public static enum Dimension {
+ X(0),
+ Y(1);
+
+ private final int id;
+
+ private Dimension(int id) {
+ this.id = id;
+ }
+
+ private static Dimension valueOf(int id) {
+ for (Dimension dim : values()) {
+ if (id == dim.id) {
+ return dim;
+ }
+ }
+ return null;
+ }
+ }
+
+ public static final int classId = registerClass(0x4000 + 139, ZCurveFunctionNode.class);
+ private Dimension dim = Dimension.X;
+
+ @SuppressWarnings("UnusedDeclaration")
+ public ZCurveFunctionNode() {
+ // used by deserializer
+ }
+
+ public ZCurveFunctionNode(ExpressionNode arg, Dimension dimension) {
+ addArg(arg);
+ dim = dimension;
+ }
+
+ /**
+ * Obtain the predefined bucket list of this bucket expression
+ *
+ * @return what part of the time you have requested
+ */
+ public final Dimension getDimension() {
+ return dim;
+ }
+
+ @Override
+ protected int onGetClassId() {
+ return classId;
+ }
+
+ @Override
+ protected void onSerialize(Serializer buf) {
+ super.onSerialize(buf);
+ buf.putByte(null, (byte)dim.id);
+ }
+
+ @Override
+ protected void onDeserialize(Deserializer buf) {
+ super.onDeserialize(buf);
+ int b = buf.getByte(null);
+ dim = Dimension.valueOf(b);
+ }
+
+ @Override
+ protected boolean equalsUnaryFunction(UnaryFunctionNode obj) {
+ return dim == ((ZCurveFunctionNode)obj).dim;
+ }
+
+ @Override
+ public ZCurveFunctionNode clone() {
+ ZCurveFunctionNode obj = (ZCurveFunctionNode)super.clone();
+ obj.dim = dim;
+ return obj;
+ }
+
+ @Override
+ public void visitMembers(ObjectVisitor visitor) {
+ super.visitMembers(visitor);
+ visitor.visit("dimension", dim);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/package-info.java
new file mode 100644
index 00000000000..ebe2448ebf0
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/package-info.java
@@ -0,0 +1,4 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage package com.yahoo.searchlib.expression;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/CategoryFeatureNode.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/CategoryFeatureNode.java
new file mode 100644
index 00000000000..285b39cbfbb
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/CategoryFeatureNode.java
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.gbdt;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.Arrays;
+import java.util.Optional;
+
+/**
+ * A GBDT node representing a set inclusion test: feature IN [value-list] where values can be strings or numbers.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public final class CategoryFeatureNode extends FeatureNode {
+
+ private final Value[] values;
+
+ public CategoryFeatureNode(String feature, Value[] values, Optional<Integer> samples, TreeNode left, TreeNode right) {
+ super(feature, samples, left, right);
+ this.values = Arrays.copyOf(values, values.length);
+ }
+
+ /** Returns a copy of the array of values in this */
+ public Value[] values() {
+ return Arrays.copyOf(values, values.length);
+ }
+
+ @Override
+ protected String rankingExpressionCondition() {
+ return " in " + Arrays.toString(values);
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/FeatureNode.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/FeatureNode.java
new file mode 100644
index 00000000000..2d69624726c
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/FeatureNode.java
@@ -0,0 +1,95 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.gbdt;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.StringValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * A node in a GBDT tree which references a feature value
+ *
+ * @author bratseth
+ */
+public abstract class FeatureNode extends TreeNode {
+
+ private final String feature;
+
+ private final TreeNode left;
+ private final TreeNode right;
+
+ public FeatureNode(String feature, Optional<Integer> samples, TreeNode left, TreeNode right) {
+ super(samples);
+ this.feature = feature;
+ this.left = left;
+ this.right = right;
+ }
+
+ public String feature() { return feature; }
+
+ public TreeNode left() { return left; }
+
+ public TreeNode right() { return right; }
+
+ // TODO: Integrate with programmatic API rather than strings
+ @Override
+ public String toRankingExpression() {
+ StringBuilder expression = new StringBuilder();
+ expression.append("if (").append(feature).append(rankingExpressionCondition());
+ expression.append(", ").append(left.toRankingExpression());
+ expression.append(", ").append(right.toRankingExpression());
+
+ Optional<Float> trueProbability = trueProbability();
+ if (trueProbability.isPresent())
+ expression.append(", ").append(trueProbability.get());
+
+ expression.append(")");
+ return expression.toString();
+ }
+
+ private Optional<Float> trueProbability() {
+ if (left.samples().isPresent() && right.samples().isPresent())
+ return Optional.of((float)left.samples().get() / (left.samples().get() + right.samples().get()));
+ return Optional.empty();
+ }
+
+ protected abstract String rankingExpressionCondition();
+
+ public static FeatureNode fromDom(Node node) {
+ List<Element> children = XmlHelper.getChildElements(node, null);
+ if (children.size() != 2) {
+ throw new IllegalArgumentException("Expected 2 children in element '" + node.getNodeName() + "', got " +
+ children.size() + ".");
+ }
+
+ String name = XmlHelper.getAttributeText(node, "feature");
+ Value[] values = toValues(XmlHelper.getAttributeText(node, "value"));
+ Optional<Integer> samples = toInteger(XmlHelper.getOptionalAttributeText(node, "nSamples"));
+ TreeNode left = TreeNode.fromDom(children.get(0));
+ TreeNode right = TreeNode.fromDom(children.get(1));
+
+ if (name.endsWith("$") || values.length>1 || values[0] instanceof StringValue)
+ return new CategoryFeatureNode(name, values, samples, left, right);
+ else
+ return new NumericFeatureNode(name, values[0], samples, left, right);
+ }
+
+ /** Converts one or more comma-separated values into an array of values */
+ private static Value[] toValues(String valueListString) {
+ String[] valueStrings = valueListString.split(",");
+ Value[] values = new Value[valueStrings.length];
+ for (int i=0; i<valueStrings.length; i++) {
+ try {
+ values[i] = Value.parse(valueStrings[i]);
+ }
+ catch (NumberFormatException e) { // allow un(double)quoted string values in Gbdt XML trees
+ values[i] = new StringValue(valueStrings[i]);
+ }
+ }
+ return values;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/GbdtConverter.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/GbdtConverter.java
new file mode 100644
index 00000000000..3625ee4252b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/GbdtConverter.java
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.gbdt;
+
+import com.yahoo.yolean.Exceptions;
+
+import java.io.FileNotFoundException;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class GbdtConverter {
+
+ /**
+ * Implements an application main function so that the converter can be used as a command-line tool.
+ *
+ * @param args List of arguments.
+ */
+ public static void main(String[] args) {
+ if (args.length != 1) {
+ System.err.println("Usage: GbdtConverter <filename>");
+ System.exit(1);
+ }
+ try {
+ System.out.println(GbdtModel.fromXmlFile(args[0]).toRankingExpression());
+ } catch (FileNotFoundException e) {
+ System.err.println("Could not find file '" + args[0] + "'.");
+ System.exit(1);
+ } catch (Exception e) {
+ System.err.println("An error occurred while parsing the content of file '" + args[0] + "': " +
+ Exceptions.toMessageString(e));
+ System.exit(1);
+ }
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/GbdtModel.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/GbdtModel.java
new file mode 100644
index 00000000000..0e40fe33b03
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/GbdtModel.java
@@ -0,0 +1,92 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.gbdt;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.xml.sax.SAXException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class GbdtModel {
+
+ private final List<TreeNode> trees;
+
+ public GbdtModel(List<TreeNode> trees) {
+ this.trees = asForest(trees);
+ }
+
+ public List<TreeNode> trees() {
+ return trees;
+ }
+
+ public String toRankingExpression() {
+ if ( ! hasSampleInformation())
+ System.err.println("The model nodes does not have the 'nSamples' attribute. " +
+ "For optimal runtime performance use an 'ext' model which has this information.");
+ StringBuilder ret = new StringBuilder();
+ for (TreeNode tree : trees) {
+ if (ret.length() > 0) {
+ ret.append(" +\n");
+ }
+ ret.append(tree.toRankingExpression());
+ }
+ ret.append("\n");
+ return ret.toString();
+ }
+
+ /**
+ * Return whether this model has sample information.
+ * Don't bother to check every node as files either has this for all nodes or for none.
+ */
+ private boolean hasSampleInformation() {
+ if (trees.size() == 0) return true; // no matter
+ return trees.get(0).samples() !=null;
+ }
+
+ public static GbdtModel fromXml(String xml) throws ParserConfigurationException, IOException, SAXException {
+ return fromDom(XmlHelper.parseXml(xml));
+ }
+
+ public static GbdtModel fromXmlFile(String fileName) throws ParserConfigurationException, IOException, SAXException {
+ return fromDom(XmlHelper.parseXmlFile(fileName));
+ }
+
+ public static GbdtModel fromDom(Node doc) {
+ Element dtree = XmlHelper.getSingleElement(doc, "DecisionTree");
+ Element forest = XmlHelper.getSingleElement(dtree, "Forest");
+ List<Element> trees = XmlHelper.getChildElements(forest, "Tree");
+ if (trees.isEmpty()) {
+ throw new IllegalArgumentException("Forest has no trees.");
+ }
+ List<TreeNode> model = new ArrayList<>();
+ for (Node tree : trees) {
+ if (XmlHelper.getChildElements(tree, null).isEmpty()) continue; // ignore
+ model.add(TreeNode.fromDom(XmlHelper.getSingleElement(tree, null)));
+ }
+ return new GbdtModel(model);
+ }
+
+ private static List<TreeNode> asForest(List<TreeNode> in) {
+ List<TreeNode> out = new ArrayList<>(in.size());
+ for (TreeNode node : in) {
+ if (node instanceof FeatureNode) {
+ out.add(node);
+ } else if (node instanceof ResponseNode) { // TODO): We should stop this sillyness ...
+ out.add(new NumericFeatureNode("value(0)", new DoubleValue(1), node.samples(), node,
+ new ResponseNode(0, Optional.of(0))));
+ } else {
+ throw new UnsupportedOperationException(node.getClass().getName());
+ }
+ }
+ return Collections.unmodifiableList(out);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/NumericFeatureNode.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/NumericFeatureNode.java
new file mode 100644
index 00000000000..b78b9ed4224
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/NumericFeatureNode.java
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.gbdt;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.Arrays;
+import java.util.Optional;
+
+/**
+ * A GBDT node representing a numeric "less than" comparison: feature &lt; numeric-value
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public final class NumericFeatureNode extends FeatureNode {
+
+ private final Value value;
+
+ public NumericFeatureNode(String feature, Value value, Optional<Integer> samples, TreeNode left, TreeNode right) {
+ super(feature, samples, left, right);
+ this.value = value;
+ }
+
+ /** Returns a copy of the array of values in this */
+ public Value value() {
+ return value;
+ }
+
+ @Override
+ public String rankingExpressionCondition() {
+ return " < " + value;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/ResponseNode.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/ResponseNode.java
new file mode 100644
index 00000000000..fa4ef2b38e0
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/ResponseNode.java
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.gbdt;
+
+import org.w3c.dom.Node;
+
+import java.util.Optional;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class ResponseNode extends TreeNode {
+
+ private final double value;
+
+ public ResponseNode(double value, Optional<Integer> samples) {
+ super(samples);
+ this.value = value;
+ }
+
+ public double value() {
+ return value;
+ }
+
+ @Override
+ public String toRankingExpression() {
+ return String.valueOf(value);
+ }
+
+ public static ResponseNode fromDom(Node node) {
+ return new ResponseNode(Double.valueOf(XmlHelper.getAttributeText(node, "value")),
+ toInteger(XmlHelper.getOptionalAttributeText(node, "nSamples")));
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/TreeNode.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/TreeNode.java
new file mode 100644
index 00000000000..a8a6add87cd
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/TreeNode.java
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.gbdt;
+
+import org.w3c.dom.Node;
+
+import java.util.Optional;
+
+/**
+ * @author bratseth
+ */
+public abstract class TreeNode {
+
+ private final Optional<Integer> samples;
+
+ public TreeNode(Optional<Integer> samples) {
+ this.samples = samples;
+ }
+
+ public abstract String toRankingExpression();
+
+ /**
+ * Returns the number of samples in the training set that matches this node
+ * if this model does not contain this information (i.e if it is not an "ext" model).
+ */
+ public Optional<Integer> samples() { return samples; }
+
+ public static TreeNode fromDom(Node node) {
+ String nodeName = node.getNodeName();
+ if (nodeName.equalsIgnoreCase("node")) {
+ return FeatureNode.fromDom(node);
+ } else if (nodeName.equalsIgnoreCase("response")) {
+ return ResponseNode.fromDom(node);
+ } else {
+ throw new UnsupportedOperationException(nodeName);
+ }
+ }
+
+ static Optional<Integer> toInteger(Optional<String> integerText) {
+ if ( ! integerText.isPresent()) return Optional.empty();
+ return Optional.of(Integer.parseInt(integerText.get()));
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java
new file mode 100644
index 00000000000..4ed0106e7ae
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java
@@ -0,0 +1,110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.gbdt;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import java.io.ByteArrayInputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+abstract class XmlHelper {
+
+ private static final Charset UTF8 = Charset.forName("UTF-8");
+
+ public static Element parseXml(String xml)
+ throws ParserConfigurationException, IOException, SAXException
+ {
+ return parseXmlStream(new ByteArrayInputStream(xml.getBytes(UTF8)));
+ }
+
+ public static Element parseXmlFile(String fileName)
+ throws ParserConfigurationException, IOException, SAXException
+ {
+ return parseXmlStream(new FileInputStream(fileName));
+ }
+
+ public static Element parseXmlStream(InputStream in)
+ throws ParserConfigurationException, IOException, SAXException
+ {
+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ DocumentBuilder builder = factory.newDocumentBuilder();
+ Document doc = builder.parse(in);
+ return doc.getDocumentElement();
+ }
+
+ public static String getAttributeText(Node node, String name) {
+ Node valueNode = node.getAttributes().getNamedItem(name);
+ if (valueNode == null) {
+ throw new IllegalArgumentException("Missing '" + name + "' attribute in element '" +
+ node.getNodeName() + "'.");
+ }
+ String valueText = valueNode.getTextContent();
+ if (valueText == null || valueText.isEmpty()) {
+ throw new IllegalArgumentException("Attribute '" + name + "' in element '" +
+ node.getNodeName() + "' is empty.");
+ }
+ return valueText;
+ }
+
+ public static String getAttributeTextOrNull(Node node, String name) {
+ Node valueNode = node.getAttributes().getNamedItem(name);
+ if (valueNode == null) return null;
+ return valueNode.getTextContent();
+ }
+
+ public static Optional<String> getOptionalAttributeText(Node node, String name) {
+ Node valueNode = node.getAttributes().getNamedItem(name);
+ if (valueNode == null) return Optional.empty();
+ return Optional.of(valueNode.getTextContent());
+ }
+
+ public static Element getSingleElement(Node node, String name) {
+ List<Element> children = getChildElements(node, name);
+ if (children.isEmpty()) {
+ if (name != null) {
+ throw new IllegalArgumentException("Node '" + node.getNodeName() + "' has no '" + name + "' children.");
+ } else {
+ throw new IllegalArgumentException("Node '" + node.getNodeName() + "' has no children.");
+ }
+ }
+ if (children.size() != 1) {
+ if (name != null) {
+ throw new IllegalArgumentException("Expected 1 '" + name + "' child, got " + children.size() + ".");
+ } else {
+ throw new IllegalArgumentException("Expected 1 child, got " + children.size() + ".");
+ }
+ }
+ return children.get(0);
+ }
+
+ public static List<Element> getChildElements(Node node, String name) {
+ NodeList children = node.getChildNodes();
+ List<Element> lst = new LinkedList<>();
+ for (int i = 0, len = children.getLength(); i < len; ++i) {
+ Node child = children.item(i);
+ if (!(child instanceof Element)) {
+ continue;
+ }
+ if (name != null && !child.getNodeName().equalsIgnoreCase(name)) {
+ continue;
+ }
+ lst.add((Element)child);
+ }
+ return lst;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/CaseList.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/CaseList.java
new file mode 100644
index 00000000000..608a4b499ed
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/CaseList.java
@@ -0,0 +1,15 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+import java.util.List;
+
+/**
+ * A producer of a list of cases for function training.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public interface CaseList {
+
+ public List<TrainingSet.Case> cases();
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Evolvable.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Evolvable.java
new file mode 100644
index 00000000000..0ccce4ad2ad
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Evolvable.java
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+
+import java.util.List;
+
+/**
+ * An entity which may evolve over time
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public abstract class Evolvable implements Comparable<Evolvable> {
+
+ public abstract Evolvable makeSuccessor(int memberNumber, List<RankingExpression> genepool, TrainingEnvironment environment);
+
+ public abstract RankingExpression getGenepool();
+
+ @Override
+ public int compareTo(Evolvable other) {
+ return -Double.compare(getFitness(), other.getFitness());
+ }
+
+ public abstract double getFitness();
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Individual.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Individual.java
new file mode 100644
index 00000000000..416e2da4c82
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Individual.java
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * An individual in an evolving population - a genome with a fitness score.
+ * Individuals are comparable by decreasing fitness.
+ * <p>
+ * As we are training ranking expressions, the genome, here, is the ranking expression.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class Individual extends Evolvable {
+
+ private final RankingExpression genome;
+ private final TrainingSet trainingSet;
+ private final double fitness;
+
+ public Individual(RankingExpression genome, TrainingSet trainingSet) {
+ this.genome = genome;
+ this.trainingSet = trainingSet;
+ this.fitness = trainingSet.evaluate(genome);
+ }
+
+ public RankingExpression getGenome() { return genome; }
+
+ public double calculateAverageError() {
+ return trainingSet.calculateAverageError(genome);
+ }
+
+ public double calculateAverageErrorPercentage() {
+ return trainingSet.calculateAverageErrorPercentage(genome);
+ }
+
+ @Override
+ public double getFitness() { return fitness; }
+
+ @Override
+ public Individual makeSuccessor(int memberNumber, List<RankingExpression> genepool, TrainingEnvironment environment) {
+ return new Individual(environment.recombiner().recombine(genome, genepool), trainingSet);
+ }
+
+ @Override
+ public RankingExpression getGenepool() {
+ return genome;
+ }
+
+ @Override
+ public String toString() {
+ return toSomewhatShortString() + ", expression: " + genome;
+ }
+
+ /** Returns a shorter string describing this (not including the expression */
+ public String toSomewhatShortString() {
+ return "Error % " + calculateAverageErrorPercentage() +
+ " average error " + calculateAverageError() +
+ " fitness " + getFitness();
+ }
+
+ /** Returns a shorter string describing this (not including the expression */
+ public String toShortString() {
+ return "Error: " + calculateAverageErrorPercentage() + " %";
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/KeyboardChecker.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/KeyboardChecker.java
new file mode 100644
index 00000000000..7f2e3645076
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/KeyboardChecker.java
@@ -0,0 +1,50 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+import java.awt.KeyEventDispatcher;
+import java.awt.KeyboardFocusManager;
+import java.awt.event.KeyEvent;
+
+/**
+ * TODO
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class KeyboardChecker {
+
+ private static boolean qPressed = false;
+
+ private final Object lock = new Object();
+
+ public KeyboardChecker() {
+ KeyboardFocusManager.getCurrentKeyboardFocusManager().addKeyEventDispatcher(new KeyEventDispatcher() {
+
+ @Override
+ public boolean dispatchKeyEvent(KeyEvent ke) {
+ synchronized (lock) {
+ switch (ke.getID()) {
+ case KeyEvent.KEY_PRESSED:
+ if (ke.getKeyCode() == KeyEvent.VK_Q) {
+ qPressed = true;
+ }
+ break;
+
+ case KeyEvent.KEY_RELEASED:
+ if (ke.getKeyCode() == KeyEvent.VK_Q) {
+ qPressed = false;
+ }
+ break;
+ }
+ return false;
+ }
+ }
+ });
+ }
+
+ public boolean isQPressed() {
+ synchronized (lock) {
+ return qPressed;
+ }
+ }
+
+} \ No newline at end of file
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Main.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Main.java
new file mode 100644
index 00000000000..c62462d0c3d
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Main.java
@@ -0,0 +1,73 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+import com.yahoo.io.IOUtils;
+import com.yahoo.searchlib.mlr.ga.caselist.FileCaseList;
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+
+/**
+ * Command line runner for training sessions
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+/*
+TODO: Switch order of generation and sequence in names
+TODO: Output fitness improvement on each step (esp useful for species evolution)
+TODO: Detect local optima (no improvement for n rounds) and stop early
+TODO: Split into training and validation sets
+ */
+public class Main {
+
+ public Main(String[] args, Tracker tracker) {
+ if (args.length < 1 || args[0].trim().equals("help")) {
+ System.out.println(
+ "Finds a ranking expression matching a training set given as a case file.\n" +
+ "Run until the expression seems good enough.\n" +
+ "Usage: ga <case-file> - \n" +
+ " where case-file is a file containing case lines on the form \n" +
+ " targetValue, argument1:value1, ...\n" +
+ " (comment lines starting by # are also permitted)\n");
+ return;
+ }
+
+ TrainingParameters parameters = new TrainingParameters();
+ //parameters.setAllowConditions(false);
+ parameters.setErrorIsRelative(false);
+ parameters.setInitialSpeciesSize(40);
+ parameters.setSpeciesLifespan(100);
+ parameters.setExcludeFeatures("F7,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F21,F23,F24,F25,F26,F27,F29,F30,F32,F33,F34,F35,F36,F37,F38,F39,F40,F41,F42,F44,F46,F47,F48,F49,F50,F52,F53,F55,F56,F57,F58,F59,F60,F61,F62,F63,F64,F65,F67,F69,F70,F71,F72,F73,F75,F76,F78,F79,F80,F81,F82,F83,F84,F85,F86,F87,F88,F90,F92,F93,F94,F95,F96,F98,F99,F100,F101,F102,F103,F104,F105,F106,F107,F108,F109,F66,F89,F110");
+ //parameters.setInitialSpeciesSize(20);
+
+ String caseFile = args[0];
+ TrainingSet trainingSet = new TrainingSet(FileCaseList.create(caseFile, parameters), parameters);
+ Trainer trainer = new Trainer(trainingSet);
+
+ if (args.length > 1) { // Evaluate given expression
+ try {
+ Individual given = new Individual(new RankingExpression(new BufferedReader(new FileReader(args[1]))), trainingSet);
+ System.out.println("Error in '" + args[1] + "': error % " + given.calculateAverageErrorPercentage() +
+ " average error " + given.calculateAverageError() +
+ " fitness " + given.getFitness());
+ }
+ catch (IOException | ParseException e) {
+ throw new IllegalArgumentException("Could not evaluate expression in argument 2", e);
+ }
+ }
+ else { // Train expression
+ // TODO: Move system outs to tracker
+ System.out.println("Learning ...");
+ RankingExpression learntExpression = trainer.train(parameters, tracker);
+ System.out.println("Learnt expression: " + learntExpression);
+ }
+ }
+
+ public static void main(String[] args) {
+ new Main(args, new PrintingTracker(10, 0, 1));
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Population.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Population.java
new file mode 100644
index 00000000000..484a0747e24
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Population.java
@@ -0,0 +1,60 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * A collection of evolvables
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class Population {
+
+ /** The current members of this population, always sorted by decreasing fitness */
+ private List<Evolvable> members;
+
+ public Population(List<Evolvable> initialMembers) {
+ members = new ArrayList<>(initialMembers);
+ Collections.sort(members);
+ }
+
+ /** Returns the most fit member of this population (never null) */
+ public Evolvable best() {
+ return members.get(0);
+ }
+
+ /** Returns the members of this population as an unmodifiable list sorted by decreasing fitness*/
+ public List<Evolvable> members() { return Collections.unmodifiableList(members); }
+
+ public void evolve(int generation, TrainingEnvironment environment) {
+ TrainingParameters p = environment.parameters();
+ int generationSize = p.getInitialSpeciesSize() -
+ (int)Math.round((p.getInitialSpeciesSize() - p.getFinalSpeciesSize()) * generation/p.getSpeciesLifespan());
+ members = breed(members, generationSize * p.getGenerationCandidatesFactor(), environment);
+ Collections.sort(members);
+ members = members.subList(0, Math.min(generationSize, members.size()));
+ }
+
+ private List<Evolvable> breed(List<Evolvable> members, int offspringCount, TrainingEnvironment environment) {
+ List<Evolvable> offspring = new ArrayList<>(offspringCount); // TODO: Can we do this inline and keep the list forever (and then also the immutable view)
+ offspring.add(members.get(0)); // keep the best as-is
+ List<RankingExpression> genePool = collectGenepool(members);
+ for (int i = 0; i < offspringCount - 1; i++) {
+ Evolvable child = members.get(i % members.size()).makeSuccessor(i, genePool, environment);
+ offspring.add(child);
+ }
+ return offspring;
+ }
+
+ private List<RankingExpression> collectGenepool(List<Evolvable> members) {
+ List<RankingExpression> genepool = new ArrayList<>();
+ for (Evolvable member : members)
+ genepool.add(member.getGenepool());
+ return genepool;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/PrintingTracker.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/PrintingTracker.java
new file mode 100644
index 00000000000..4a3edd35a8d
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/PrintingTracker.java
@@ -0,0 +1,91 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.yolean.Exceptions;
+
+import java.util.List;
+
+/**
+ * A tracker which prints a summary of training events to standard out
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class PrintingTracker implements Tracker {
+
+ private final int iterationEvery;
+ private final int survivorsEvery;
+ private final int printSpeciesCreationLevel;
+ private final int printSpeciesCompletionLevel;
+
+ public PrintingTracker() {
+ this(0, 1);
+ }
+
+ public PrintingTracker(int printSpeciesCreationLevel, int printSpeciesCompletionLevel) {
+ this(Integer.MAX_VALUE, Integer.MAX_VALUE, printSpeciesCreationLevel, printSpeciesCompletionLevel);
+ }
+
+ public PrintingTracker(int iterationEvery, int printSpeciesCreationLevel, int printSpeciesCompletionLevel) {
+ this(iterationEvery, Integer.MAX_VALUE, printSpeciesCreationLevel, printSpeciesCompletionLevel);
+ }
+
+ public PrintingTracker(int iterationEvery, int survivorsEvery, int printSpeciesCreationLevel, int printSpeciesCompletionLevel) {
+ this.iterationEvery = iterationEvery;
+ this.survivorsEvery = survivorsEvery;
+ this.printSpeciesCreationLevel = printSpeciesCreationLevel;
+ this.printSpeciesCompletionLevel = printSpeciesCompletionLevel;
+ }
+
+ @Override
+ public void newSpecies(Species predecessor, int initialSize, List<RankingExpression> genePool) {
+ if (predecessor.name().level() > printSpeciesCreationLevel) return;
+ System.out.println(spaces(predecessor.name().level()*2) + "Creating new species of size " + initialSize + " and a gene pool of size " + genePool.size() + " from predecessor " + predecessor);
+ }
+
+ @Override
+ public void newSpeciesCreated(Species species) {
+ if (species.name().level() > printSpeciesCreationLevel) return;
+ System.out.println(spaces(species.name().level()*2) + "Created and will now evolve " + species);
+ }
+
+ @Override
+ public void speciesCompleted(Species species) {
+ if (species.name().level() > printSpeciesCompletionLevel) return;
+ System.out.println(spaces(species.name().level()*2) + "--> Evolution completed for " + species);
+ }
+
+ /** Called each time a species (or super-species) have completed one generation */
+ @Override
+ public void iteration(Species species, int generation) {
+ try {
+ new RankingExpression(species.bestIndividual().getGenome().toString());
+ }
+ catch (Exception e) {
+ System.err.println("ERROR: " + Exceptions.toMessageString(e) + ": " + species.bestIndividual().getGenome());
+ }
+
+ if ( (generation % iterationEvery) == 0)
+ System.out.println(spaces(species.name().level()*2) + "Gen " + generation + " of " + species);
+
+ if ( (generation % survivorsEvery) == 0)
+ printPopulation(species.name().level(), species.population().members());
+ }
+
+ @Override
+ public void result(Evolvable winner) {
+ System.out.println("Learnt expression: " + winner);
+ }
+
+ private String spaces(int spaces) {
+ return " ".substring(0,spaces);
+ }
+
+ private void printPopulation(int level, List<Evolvable> survivors) {
+ if (survivors.size()<=1) return;
+ System.out.println(" Population:");
+ for (Evolvable individual : survivors)
+ System.out.println(spaces(level*2) + " " + individual);
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/RankingExpressionCaseList.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/RankingExpressionCaseList.java
new file mode 100644
index 00000000000..a4421595917
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/RankingExpressionCaseList.java
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+import com.yahoo.searchlib.mlr.ga.CaseList;
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.mlr.ga.TrainingSet;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Produces a list of training cases (argument and target value pairs)
+ * from a Ranking Expression.
+ * Useful for testing.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class RankingExpressionCaseList implements CaseList {
+
+ private final List<TrainingSet.Case> cases = new ArrayList<TrainingSet.Case>();
+
+ public RankingExpressionCaseList(List<Context> arguments, RankingExpression targetFunction) {
+ for (Context argument : arguments)
+ cases.add(new TrainingSet.Case(argument,targetFunction.evaluate(argument).asDouble()));
+ }
+
+ /** Returns the list of cases generated from the ranking expression */
+ @Override
+ public List<TrainingSet.Case> cases() { return Collections.unmodifiableList(cases); }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Recombiner.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Recombiner.java
new file mode 100644
index 00000000000..d67afddd3c5
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Recombiner.java
@@ -0,0 +1,200 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue;
+import com.yahoo.searchlib.rankingexpression.rule.*;
+
+import java.util.*;
+import java.util.logging.Logger;
+
+import static java.lang.Math.abs;
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+
+/**
+ * A class which returns a mutated, recombined genome from a list of parent genomes.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class Recombiner {
+
+ // TODO: Either make ranking expressions immutable and get rid of parent pointer, or do clone everywhere below
+
+ private static final Logger log = Logger.getLogger(Trainer.class.getName());
+
+ private final Random random = new Random();
+
+ private final List<String> features;
+
+ private final TrainingParameters parameters;
+
+ /**
+ * Creates a recombiner
+ *
+ * @param features the list of feature names which are possible within the space we are training,
+ * such that these may be spontaneously added to expressions.
+ */
+ public Recombiner(Collection<String> features, TrainingParameters trainingParameters) {
+ this.features = Collections.unmodifiableList(new ArrayList<>(features));
+ this.parameters = trainingParameters;
+ }
+
+ public RankingExpression recombine(RankingExpression genome, List<RankingExpression> genePool) {
+ List<ExpressionNode> genePoolRoots = new ArrayList<>();
+ for (RankingExpression genePoolGenome : genePool)
+ genePoolRoots.add(genePoolGenome.getRoot());
+ return new RankingExpression(mutate(genome.getRoot(), genePoolRoots, 0));
+ }
+
+ private ExpressionNode mutate(ExpressionNode gene, List<ExpressionNode> genePool, int depth) {
+ // TODO: Extract insert level
+ if (gene instanceof BooleanNode)
+ return simplifyCondition(mutateChildren((CompositeNode)gene,genePool,depth+1));
+ if (gene instanceof CompositeNode)
+ return insertNodeLevel(simplify(removeNodeLevel(mutateChildren((CompositeNode)gene,genePool,depth+1))), genePool, depth+1);
+ else
+ return insertNodeLevel(mutateLeaf(gene), genePool, depth+1);
+ }
+
+ private BooleanNode simplifyCondition(ExpressionNode node) {
+ // Nothing yet
+ return (BooleanNode)node;
+ }
+
+ /** Very basic algorithmic simplification */
+ private ExpressionNode simplify(ExpressionNode node) {
+ if (! (node instanceof CompositeNode)) return node;
+ CompositeNode composite = (CompositeNode)node;
+ if (maxDepth(composite)>2) return composite;
+ List<ExpressionNode> children = composite.children();
+ if (children.size()!=2) return composite;
+ if ( ! (children.get(0) instanceof ConstantNode)) return composite;
+ if ( ! (children.get(1) instanceof ConstantNode)) return composite;
+ return new ConstantNode(composite.evaluate(null));
+ }
+
+ private CompositeNode mutateChildren(CompositeNode gene, List<ExpressionNode> genePool, int depth) {
+ if (gene instanceof ReferenceNode) return gene; // TODO: Remove if we make this a non-composite
+
+ List<ExpressionNode> mutatedChildren = new ArrayList<>();
+ for (ExpressionNode child : gene.children())
+ mutatedChildren.add(mutate(child, genePool, depth));
+ return gene.setChildren(mutatedChildren);
+ }
+
+ private ExpressionNode insertNodeLevel(ExpressionNode gene, List<ExpressionNode> genePool, int depth) {
+ if (probability() < 0.9) return gene;
+ if (depth + maxDepth(gene) >= parameters.getMaxExpressionDepth()) return gene;
+ ExpressionNode newChild = generateChild(genePool, depth);
+ if (probability() < 0.5)
+ return generateComposite(gene, newChild, genePool, depth);
+ else
+ return generateComposite(newChild, gene, genePool, depth);
+ }
+
+ private ExpressionNode removeNodeLevel(CompositeNode gene) {
+ if (gene instanceof ReferenceNode) return gene; // TODO: Remove if we make featurenode a non-composite
+ if (probability() < 0.9) return gene;
+ return randomFrom(gene.children());
+ }
+
+ private ExpressionNode generateComposite(ExpressionNode left, ExpressionNode right, List<ExpressionNode> genePool, int depth) {
+ int type = random.nextInt(2 + ( parameters.getAllowConditions() ? 1:0 ) ); // pick equally between 2 or 3 types
+ if (type == 0) {
+ return new ArithmeticNode(left, pickArithmeticOperator(), right);
+ }
+ else if (type == 1) {
+ Function function = pickFunction();
+ if (function.arity() == 1)
+ return new FunctionNode(function, left);
+ else // arity==2
+ return new FunctionNode(function, left, right);
+ }
+ else {
+ return new IfNode(generateCondition(genePool, depth + 1), left, right);
+ }
+ }
+
+ private BooleanNode generateCondition(List<ExpressionNode> genePool, int depth) {
+ // TODO: Add set membership nodes
+ return new ComparisonNode(generateChild(genePool, depth), TruthOperator.SMALLER, generateChild(genePool, depth));
+ }
+
+ private ExpressionNode generateChild(List<ExpressionNode> genePool, int depth) {
+ if (genePool.isEmpty() || probability() < 0.1) { // entirely new child
+ return generateLeaf();
+ }
+ else { // pick from gene pool
+ ExpressionNode picked = randomFrom(genePool);
+ int pickedDepth = 0;
+ // descend until we are at at least the same depth as this depth
+ // to make sure branches spliced in are shallow enough that we avoid growing
+ // larger than maxDepth
+ while (picked instanceof CompositeNode && (pickedDepth++ < depth || probability() < 0.5)) {
+ if (picked instanceof ReferenceNode) continue; // TODO: Remove if we make referencenode a noncomposite
+ picked = randomFrom(((CompositeNode)picked).children());
+ }
+ return picked;
+ }
+ }
+
+ public ExpressionNode mutateLeaf(ExpressionNode leaf) {
+ if (probability() < 0.5) return leaf; // TODO: For performance. Drop?
+ // TODO: Other leaves
+ ConstantNode constant = (ConstantNode)leaf;
+ return new ConstantNode(DoubleValue.frozen(constant.getValue().asDouble()*aboutOne()));
+ }
+
+ public ExpressionNode generateLeaf() {
+ if (probability()<0.5 || features.size() == 0)
+ return new ConstantNode(DoubleValue.frozen(random.nextDouble() * 2000 - 1000)); // TODO: Use some non-uniform distribution
+ else
+ return new ReferenceNode(randomFrom(features));
+ }
+
+ private double aboutOne() {
+ return 1 + Math.pow(-0.1, random.nextInt(4) + 1);
+ }
+
+ private double probability() {
+ return random.nextDouble();
+ }
+
+ private <T> T randomFrom(List<T> expressionList) {
+ return expressionList.get(random.nextInt(expressionList.size()));
+ }
+
+ private ArithmeticOperator pickArithmeticOperator() {
+ switch (random.nextInt(4)) {
+ case 0: return ArithmeticOperator.PLUS;
+ case 1: return ArithmeticOperator.MINUS;
+ case 2: return ArithmeticOperator.MULTIPLY;
+ case 3: return ArithmeticOperator.DIVIDE;
+ }
+ throw new RuntimeException("This cannot happen");
+ }
+
+ /** Pick among the subset of functions which are probably useful */
+ private Function pickFunction() {
+ switch (random.nextInt(5)) {
+ case 0: return Function.tanh;
+ case 1: return Function.exp;
+ case 2: return Function.log;
+ case 3: return Function.pow;
+ case 4: return Function.sqrt;
+ }
+ throw new RuntimeException("This cannot happen");
+ }
+
+ // TODO: Make ranking expressions immutable and compute this on creation?
+ private int maxDepth(ExpressionNode node) {
+ if ( ! (node instanceof CompositeNode)) return 1;
+
+ int maxChildDepth = 0;
+ for (ExpressionNode child : ((CompositeNode)node).children())
+ maxChildDepth = Math.max(maxDepth(child), maxChildDepth);
+ return maxChildDepth + 1;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Species.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Species.java
new file mode 100644
index 00000000000..39694b6253f
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Species.java
@@ -0,0 +1,93 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A species is a population of evolvables.
+ * Contrary to a real species, a species population may contain (sub)species
+ * rather than individuals - at all levels but the lowest.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class Species extends Evolvable {
+
+ private SpeciesName name;
+ private final Population population;
+
+ /** Create a species having a given initial population */
+ public Species(SpeciesName name, Population population) {
+ this.name = name;
+ this.population = population;
+ }
+
+ /** Create a species evolved from a predecessor species, using the given gene pool for mutating it */
+ private Species(SpeciesName name, Species predecessor, List<RankingExpression> genepool, TrainingEnvironment environment) {
+ this.name = name;
+ environment.tracker().newSpecies(predecessor, environment.parameters().getInitialSpeciesSize(), genepool);
+
+ // Initialize new species with members generated from the predecessor species
+ List<Evolvable> initialMembers = new ArrayList<>();
+ for (int i = 0; i < environment.parameters().getInitialSpeciesSize(); i++)
+ initialMembers.add(drawFrom(predecessor.population, i).makeSuccessor(i, genepool, environment));
+ population = new Population(initialMembers);
+
+ // Evolve the population of this species for the configured number of generations
+ environment.tracker().newSpeciesCreated(this);
+ for (int generation = 0; generation < environment.parameters().getSpeciesLifespan(); generation++) {
+ environment.tracker().iteration(this, generation+1);
+ population.evolve(generation, environment);
+ if (Double.isInfinite(bestIndividual().getFitness())) break; // jackpot
+ // if (keyboardChecker.isQPressed()) break; // user quit TODO: Make work
+ }
+ environment.tracker().speciesCompleted(this);
+ }
+
+ /**
+ * Draws a member from the given population, where the probability of being drawn is proportional to the
+ * fitness of the member
+ */
+ private Evolvable drawFrom(Population population, int succession) {
+ return population.members().get(Math.min(succession % 3, population.members().size() - 1)); // TODO: Probabilistic by fitness?
+ }
+
+ public SpeciesName name() { return name; }
+
+ /** The fitness of the fittest individual in the population */
+ @Override
+ public double getFitness() {
+ return population.best().getFitness();
+ }
+
+ /** Creates the successor of this, using its genes, mutated drawing from the given gene pool */
+ @Override
+ public Evolvable makeSuccessor(int memberNumber, List<RankingExpression> genepool, TrainingEnvironment environment) {
+ return new Species(name.successor(memberNumber), this, genepool, environment);
+ }
+
+ /** Returns the members of this species */
+ public Population population() { return population; }
+
+ /** The genes of the fittest individual in the population of this */
+ @Override
+ public RankingExpression getGenepool() { // TODO: Less sharp?
+ return population.best().getGenepool();
+ }
+
+ /** Returns the best individual below this in the species hierarchy (e.g recursively the best leaf) */
+ public Individual bestIndividual() {
+ Evolvable child = this;
+ while (child instanceof Species)
+ child = ((Species)child).population.best();
+ return (Individual)child; // it is when it is not instanceof Species
+ }
+
+ @Override
+ public String toString() {
+ return "species " + name + ", best member: " + population.best();
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/SpeciesName.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/SpeciesName.java
new file mode 100644
index 00000000000..3bd8ae5e55f
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/SpeciesName.java
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+/**
+ * The name of a species. For tracking purposes.
+ * A name has the form superSpeciesName + "/" + serialNumber.generationNumber.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class SpeciesName {
+
+ private final int level, serial, generation;
+
+ private final String name, prefixName;
+
+ private SpeciesName(int level, int serial, int generation, String prefixName) {
+ this.level = level;
+ this.serial = serial;
+ this.generation = generation;
+ this.prefixName = prefixName;
+ if (level == 0)
+ this.name = "";
+ else
+ this.name = prefixName + (prefixName.isEmpty() ? "" : "/") + serial + "." + generation;
+ }
+
+ /**
+ * The level in the species hierarchy of the species having this name.
+ * The root species has level 0.
+ */
+ public int level() { return level; }
+
+ /** Returns the name of the root species: The empty string at level 0 */
+ public static SpeciesName createRoot() {
+ return new SpeciesName(0 ,0 ,0, "");
+ }
+
+ @Override
+ public String toString() {
+ if (level == 0) return "(root)";
+ return name;
+ }
+
+ /** Returns the name of a new subspecies */
+ public SpeciesName subspecies(int serial) {
+ return new SpeciesName(level+1, serial, 0, name);
+ }
+
+ /** Returns the name of the successor of this species */
+ public SpeciesName successor(int serial) {
+ return new SpeciesName(level, serial, generation+1, prefixName);
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Tracker.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Tracker.java
new file mode 100644
index 00000000000..d86af40b805
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Tracker.java
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+
+import java.util.List;
+
+/**
+ * A tracker receives callbacks about events happening during a training session.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public interface Tracker {
+
+ public void newSpecies(Species predecessor, int initialSize, List<RankingExpression> genePool);
+
+ public void newSpeciesCreated(Species species);
+
+ public void speciesCompleted(Species species);
+
+ public void iteration(Species species, int generation);
+
+ public void result(Evolvable winner);
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Trainer.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Trainer.java
new file mode 100644
index 00000000000..7e2551eccb2
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Trainer.java
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+import com.yahoo.searchlib.rankingexpression.rule.ConstantNode;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Learns a ranking expression from some seed expressions and a training set.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class Trainer {
+
+ // TODO: Simplify this to constructor only ... or maybe remove ... or combine with TrainingEnvironment
+ // TODO: Also: Rename to Training?
+
+ private final TrainingSet trainingSet;
+ private final Set<String> argumentNames;
+
+ /**
+ * Creates a new trainer.
+ */
+ public Trainer(TrainingSet trainingSet) {
+ this(trainingSet, trainingSet.argumentNames());
+ }
+
+ /**
+ * Creates a new trainer which uses a specified list of expression argument names
+ * rather than the argument names given by the training set.
+ */
+ public Trainer(TrainingSet trainingSet, Set<String> argumentNames) {
+ this.trainingSet = trainingSet;
+ this.argumentNames = new HashSet<>(argumentNames);
+ }
+
+ public RankingExpression train(TrainingParameters parameters, Tracker tracker) {
+ TrainingEnvironment environment = new TrainingEnvironment(new Recombiner(argumentNames, parameters), tracker, trainingSet, parameters);
+ SpeciesName rootName = SpeciesName.createRoot();
+ Species genesisSubSpecies = new Species(rootName.subspecies(0), new Population(Collections.<Evolvable>singletonList(new Individual(new RankingExpression(new ConstantNode(new DoubleValue(1))), trainingSet))));
+ Species rootSpecies = (Species) new Species(rootName, new Population(Collections.<Evolvable>singletonList(genesisSubSpecies)))
+ .makeSuccessor(0, Collections.<RankingExpression>emptyList(), environment);
+ Individual winner = rootSpecies.bestIndividual();
+ tracker.result(winner);
+ return winner.getGenome();
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingEnvironment.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingEnvironment.java
new file mode 100644
index 00000000000..757a2e4d3d2
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingEnvironment.java
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+/**
+ * The static environment of a training session
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class TrainingEnvironment {
+
+ // TODO: Not sure if this belongs ... or should even be an instance
+ // TODO: maybe collapse Trainer into this and call it TrainingSession
+ private final Recombiner recombiner;
+ private final Tracker tracker;
+ private final TrainingSet trainingSet;
+ private final TrainingParameters parameters;
+
+ public TrainingEnvironment(Recombiner recombiner, Tracker tracker,
+ TrainingSet trainingSet, TrainingParameters parameters) {
+ this.recombiner = recombiner;
+ this.tracker = tracker;
+ this.trainingSet = trainingSet;
+ this.parameters = parameters;
+ }
+
+ public Recombiner recombiner() { return recombiner; }
+ public Tracker tracker() { return tracker; }
+ public TrainingSet trainingSet() { return trainingSet; }
+ public TrainingParameters parameters() { return parameters; }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingParameters.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingParameters.java
new file mode 100644
index 00000000000..e18f560878e
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingParameters.java
@@ -0,0 +1,100 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class TrainingParameters {
+
+ // A note:
+ // The total number of species generated and evaluated is
+ // (generationCandidatesFactor * speciesLifespan * (initialSpeciesSize-finalSpeciesSize)/2 ) ^ speciesLevels
+ // (speciesLevel is hardcoded to 2 atm)
+
+ private int speciesLifespan = 1000;
+ private int initialSpeciesSize = 10;
+ private double finalSpeciesSize = 1;
+ private int generationCandidatesFactor = 3;
+ private int maxExpressionDepth = 6;
+ private boolean allowConditions = true;
+ private boolean errorIsRelative = true;
+ private Set<String> excludeFeatures = new HashSet<>();
+ private String trainingSetFormat = null;
+ private double validationFraction = 0.2;
+
+ /** The number of generation which a given species (or super-species at any level) lives. Default:1000 */
+ public int getSpeciesLifespan() { return speciesLifespan; }
+ public void setSpeciesLifespan(int generations) { this.speciesLifespan = generations; }
+
+ /** The number of members in a species (or super-species at any level) as it is created. Default: 10 */
+ public int getInitialSpeciesSize() { return initialSpeciesSize; }
+ public void setInitialSpeciesSize(int initialSpeciesSize) { this.initialSpeciesSize = initialSpeciesSize; }
+
+ /**
+ * The number of members in a species in its final generation.
+ * The size of the species will be reduced linearly in each generation to go from initial size to final size.
+ * Default: 1
+ */
+ public double getFinalSpeciesSize() { return finalSpeciesSize; }
+ public void setFinalSpeciesSize(int finalSpeciesSize) { this.finalSpeciesSize = finalSpeciesSize; }
+
+ /*
+ * The factor determining how many more members are generated than are allowed to survive in each generation of a species.
+ * Default: 3
+ */
+ public int getGenerationCandidatesFactor() { return generationCandidatesFactor; }
+ public void setGenerationCandidatesFactor(int generationCandidatesFactor) { this.generationCandidatesFactor = generationCandidatesFactor; }
+
+ /**
+ * The max depth of expressions this is allowed to generate.
+ * Default: 6
+ */
+ public int getMaxExpressionDepth() { return maxExpressionDepth; }
+ public void setMaxExpressionDepth(int maxExpressionDepth) { this.maxExpressionDepth = maxExpressionDepth; }
+
+ /**
+ * Whether mutation should allow creation of condition (if) expressions.
+ * Default: true
+ */
+ public boolean getAllowConditions() { return allowConditions; }
+ public void setAllowConditions(boolean allowConditions) { this.allowConditions = allowConditions; }
+
+ /**
+ * Whether errors are relative to the absolute value of the function at that point or not.
+ * If true, training will assign equal weight to the error of 1.1 for 1 and 110 for 100.
+ * If false, training will instead assign a 10x weight to the latter.
+ * Default: True.
+ */
+ public boolean getErrorIsRelative() { return errorIsRelative; }
+ public void setErrorIsRelative(boolean errorIsRelative) { this.errorIsRelative = errorIsRelative; }
+
+ /**
+ * Returns the set of features to exclude during training.
+ * Returned as an immutable set, never null.
+ */
+ public Set<String> getExcludeFeatures() { return excludeFeatures; }
+ /** Sets the features to exclude from a comma-separated string */
+ public void setExcludeFeatures(String excludeFeatureString) {
+ for (String featureName : excludeFeatureString.split(","))
+ excludeFeatures.add(featureName.trim());
+ }
+
+ /**
+ * Returns the format of the training set to read. "fv" or "cvs" is supported.
+ * If this is null the format name is taken from the last name of the file instead.
+ * Default: null.
+ */
+ public String getTrainingSetFormat() { return trainingSetFormat; }
+ public void setTrainingSetFormat(String trainingSetFormat) { this.trainingSetFormat = trainingSetFormat; }
+
+ /**
+ * Returns the fraction of the result set to hold out of training and use for validation.
+ * Default 0.2
+ */
+ public double getValidationFraction() { return validationFraction; }
+ public void setValidationFraction(double validationFraction) { this.validationFraction = validationFraction; }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingSet.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingSet.java
new file mode 100644
index 00000000000..507ab26806a
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingSet.java
@@ -0,0 +1,122 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * A training set: a set of <i>cases</i>: Input data to output value pairs
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class TrainingSet {
+
+ private final TrainingParameters parameters;
+ private final List<Case> trainingCases;
+ private final List<Case> validationCases;
+ private final Set<String> argumentNames = new HashSet<>();
+
+ /**
+ * Creates a training set from a list of cases.
+ * The ownership of the argument list and all the cases are transferred to this by this call.
+ */
+ public TrainingSet(CaseList caseList, TrainingParameters parameters) {
+ List<Case> cases = caseList.cases();
+
+ this.parameters = parameters;
+ for (Case aCase : cases)
+ argumentNames.addAll(aCase.arguments().names());
+ argumentNames.removeAll(parameters.getExcludeFeatures());
+
+ int validationCaseCount = (int)Math.round((cases.size() * parameters.getValidationFraction()));
+ this.validationCases = cases.subList(0, validationCaseCount);
+ this.trainingCases = cases.subList(validationCaseCount, cases.size());
+ }
+
+ public Set<String> argumentNames() {
+ return Collections.unmodifiableSet(argumentNames);
+ }
+
+ /**
+ * Returns the fitness of a genome (ranking expression) according to this training set.
+ * The fitness to be returned by this is the inverse of the average squared difference between the
+ * target function result and the function result returned by the genome function.
+ */
+ // TODO: Take expression length into account.
+ public double evaluate(RankingExpression genome) {
+ boolean constantExpressionGenome = true;
+ double squaredErrorSum = 0;
+ Double previousValue = null;
+ for (Case trainingCase : trainingCases) {
+ double value = genome.evaluate(trainingCase.arguments()).asDouble();
+ double error = saneAbs(effectiveError(trainingCase.targetValue(), value));
+ squaredErrorSum += Math.pow(error, 2);
+
+ if (previousValue != null && previousValue != value)
+ constantExpressionGenome = false;
+ previousValue = value;
+ }
+ if (constantExpressionGenome) return 0; // Disqualify constant expressions as we know we're not looking for them
+ return 1 / (squaredErrorSum / trainingCases.size());
+ }
+
+ private double effectiveError(double a, double b) {
+ return parameters.getErrorIsRelative() ? errorFraction(a, b) : a - b;
+ }
+
+ /** Calculate error in a way which is easy to understand (but which behaves badly when the target is around 0 */
+ public double calculateAverageError(RankingExpression genome) {
+ double errorSum=0;
+ for (Case trainingCase : trainingCases)
+ errorSum += saneAbs(trainingCase.targetValue() - genome.evaluate(trainingCase.arguments()).asDouble());
+ return errorSum/(double) trainingCases.size();
+ }
+
+ /** Calculate error in a way which is easy to understand (but which behaves badly when the target is around 0 */
+ public double calculateAverageErrorPercentage(RankingExpression genome) {
+ double errorFractionSum = 0;
+ for (Case trainingCase : trainingCases) {
+ double errorFraction = saneAbs(errorFraction(trainingCase.targetValue(), genome.evaluate(trainingCase.arguments()).asDouble()));
+ // System.out.println("Error %: " + (100 * errorFraction + " Target: " + trainingCase.targetValue() + " Learned: " + genome.evaluate(trainingCase.arguments()).asDouble()));
+ errorFractionSum += errorFraction;
+ }
+ return ( errorFractionSum/(double) trainingCases.size() ) *100;
+ }
+
+ private double errorFraction(double a, double b) {
+ double error = a - b;
+ if (error == 0 ) return 0; // otherwise a or b is different from 0
+ if (a != 0)
+ return error / a;
+ else
+ return error / b;
+ }
+
+ private double saneAbs(double d) {
+ if (Double.isInfinite(d) || Double.isNaN(d)) return Double.MAX_VALUE;
+ return Math.abs(d);
+ }
+
+ public static class Case {
+
+ private Context arguments;
+
+ private double targetValue;
+
+ public Case(Context arguments, double targetValue) {
+ this.arguments = arguments;
+ this.targetValue = targetValue;
+ }
+
+ public double targetValue() { return targetValue; }
+
+ public Context arguments() { return arguments; }
+
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/CsvFileCaseList.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/CsvFileCaseList.java
new file mode 100644
index 00000000000..78291768380
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/CsvFileCaseList.java
@@ -0,0 +1,56 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga.caselist;
+
+import com.yahoo.searchlib.mlr.ga.TrainingSet;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.MapContext;
+
+import java.util.Optional;
+
+/**
+ * <p>A list of training set cases created by reading a file containing lines specifying a case
+ * per line using the following syntax
+ * <code>targetValue, argument1:value, argument2:value2, ...</code>
+ * where arguments are identifiers and values are doubles.</p>
+ *
+ * <p>Comment lines starting with "#" are ignored.</p>
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class CsvFileCaseList extends FileCaseList {
+
+ public CsvFileCaseList(String fileName) {
+ super(fileName);
+ }
+
+ protected Optional<TrainingSet.Case> lineToCase(String line, int lineNumber) {
+ String[] elements = line.split(",");
+ if (elements.length<2)
+ throw new IllegalArgumentException("At line " + lineNumber + ": Expected a comma-separated case on the " +
+ "form 'targetValue, argument1:value1, ...', but got '" + line );
+
+ double target;
+ try {
+ target = Double.parseDouble(elements[0].trim());
+ }
+ catch (NumberFormatException e) {
+ throw new IllegalArgumentException("At line " + lineNumber + ": Expected a target value double " +
+ "at the start of the line, got '" + elements[0] + "'");
+ }
+
+ Context context = new MapContext();
+ for (int i=1; i<elements.length; i++) {
+ String[] argumentPair = elements[i].split(":");
+ try {
+ if (argumentPair.length != 2) throw new IllegalArgumentException();
+ context.put(argumentPair[0].trim(),Double.parseDouble(argumentPair[1].trim()));
+ }
+ catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException("At line " + lineNumber + ", element " + (i+1) +
+ ": Expected argument on the form 'identifier:double', got '" + elements[i] + "'");
+ }
+ }
+ return Optional.of(new TrainingSet.Case(context, target));
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FileCaseList.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FileCaseList.java
new file mode 100644
index 00000000000..264f8f33075
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FileCaseList.java
@@ -0,0 +1,73 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga.caselist;
+
+import com.yahoo.searchlib.mlr.ga.CaseList;
+import com.yahoo.searchlib.mlr.ga.TrainingParameters;
+import com.yahoo.searchlib.mlr.ga.TrainingSet;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * @author bratseth
+ */
+public abstract class FileCaseList implements CaseList {
+
+ private List<TrainingSet.Case> cases = new ArrayList<>();
+
+ /**
+ * Reads a case list from file.
+ *
+ * @throws IllegalArgumentException if the file could not be found or opened
+ */
+ public FileCaseList(String fileName) {
+ try (BufferedReader reader = new BufferedReader(new FileReader(fileName))) {
+ String line;
+ int lineNumber=0;
+ while (null != (line=reader.readLine())) {
+ lineNumber++;
+ line = line.trim();
+ if (line.startsWith("#")) continue;
+ if (line.isEmpty()) continue;
+ Optional<TrainingSet.Case> newCase = lineToCase(line, lineNumber);
+ if (newCase.isPresent())
+ cases.add(newCase.get());
+
+ }
+ }
+ catch (IOException | IllegalArgumentException e) {
+ throw new IllegalArgumentException("Could not create a case list from file '" + fileName + "'", e);
+ }
+ }
+
+ /** Returns the case constructed from reading a line, if any */
+ protected abstract Optional<TrainingSet.Case> lineToCase(String line, int lineNumber);
+
+ @Override
+ public List<TrainingSet.Case> cases() { return Collections.unmodifiableList(cases); }
+
+ /** Creates a file case list of the type specified in the parameters */
+ public static FileCaseList create(String fileName, TrainingParameters parameters) {
+ String format = parameters.getTrainingSetFormat();
+ if (format == null)
+ format = ending(fileName);
+
+ switch (format) {
+ case "csv" : return new CsvFileCaseList(fileName);
+ case "fv" : return new FvFileCaseList(fileName);
+ default : throw new IllegalArgumentException("Unknown file format '" + format + "'");
+ }
+ }
+
+ private static String ending(String fileName) {
+ int lastDot = fileName.lastIndexOf(".");
+ if (lastDot <= 0) return null;
+ return fileName.substring(lastDot + 1, fileName.length());
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FvFileCaseList.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FvFileCaseList.java
new file mode 100644
index 00000000000..ec07a939932
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FvFileCaseList.java
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga.caselist;
+
+import com.yahoo.searchlib.mlr.ga.TrainingSet;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.MapContext;
+
+import java.util.Optional;
+
+/**
+ * A list of training set cases created by reading a file containing lines specifying a case
+ * per line using the following syntax
+ * <code>feature1\tfeature2\tfeature3\t...\ttarget1</code>
+ * <p>
+ * The first line contains the name of each feature in the same order.
+ *
+ * <p>Comment lines starting with "#" are ignored.</p>
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+// NOTE: If we get another type of case list it is time to abstract into a common CaseList base class
+public class FvFileCaseList extends FileCaseList {
+
+ private String[] argumentNames;
+
+ public FvFileCaseList(String fileName) {
+ super(fileName);
+ }
+
+ protected Optional<TrainingSet.Case> lineToCase(String line, int lineNumber) {
+ String[] values = line.split("\t");
+
+ if (argumentNames == null) { // first line
+ argumentNames = values;
+ return Optional.empty();
+ }
+
+ if (argumentNames.length != values.length)
+ throw new IllegalArgumentException("Wrong number of values at line " + lineNumber);
+
+
+ Context context = new MapContext();
+ for (int i = 0; i < values.length-1; i++)
+ context.put(argumentNames[i], toDouble(values[i], lineNumber));
+
+ double target = toDouble(values[values.length-1], lineNumber);
+ return Optional.of(new TrainingSet.Case(context, target));
+ }
+
+ private double toDouble(String s, int lineNumber) {
+ try {
+ return Double.parseDouble(s.trim());
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("At line " + lineNumber + ": Expected only double values, " +
+ "got '" + s + "'");
+ }
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysis.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysis.java
new file mode 100644
index 00000000000..874f8e8666b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysis.java
@@ -0,0 +1,425 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.gbdt;
+
+import com.yahoo.searchlib.rankingexpression.rule.SetMembershipNode;
+import com.yahoo.yolean.Exceptions;
+import com.yahoo.searchlib.mlr.ga.Individual;
+import com.yahoo.searchlib.mlr.ga.PrintingTracker;
+import com.yahoo.searchlib.mlr.ga.RankingExpressionCaseList;
+import com.yahoo.searchlib.mlr.ga.Trainer;
+import com.yahoo.searchlib.mlr.ga.TrainingParameters;
+import com.yahoo.searchlib.mlr.ga.TrainingSet;
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.MapContext;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+import com.yahoo.searchlib.rankingexpression.rule.Arguments;
+import com.yahoo.searchlib.rankingexpression.rule.ArithmeticNode;
+import com.yahoo.searchlib.rankingexpression.rule.ComparisonNode;
+import com.yahoo.searchlib.rankingexpression.rule.CompositeNode;
+import com.yahoo.searchlib.rankingexpression.rule.ConstantNode;
+import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode;
+import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode;
+import com.yahoo.searchlib.rankingexpression.rule.IfNode;
+import com.yahoo.searchlib.rankingexpression.rule.NegativeNode;
+import com.yahoo.searchlib.rankingexpression.rule.TruthOperator;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+/**
+ * A standalone tool which analyzes a GBDT form ranking expression
+ *
+ * @author bratseth
+ */
+public class ExpressionAnalysis {
+
+ private final Map<String, Feature> features = new HashMap<>();
+
+ private int currentTree;
+
+ private final RankingExpression expression;
+
+ public ExpressionAnalysis(RankingExpression expression) {
+ this.expression = expression;
+ if ( ! instanceOf(expression.getRoot(), ArithmeticNode.class)) return;
+ analyzeSum((ArithmeticNode)expression.getRoot());
+ }
+
+ /** Returns the expression analyzed by this */
+ public RankingExpression expression() { return expression; }
+
+ /** Returns the analysis of each feature in this expression as a read-only map indexed by feature name */
+ private Map<String, Feature> featureMap() {
+ return Collections.unmodifiableMap(features);
+ }
+
+ /** Returns list containing the analysis of each feature, sorted by decreasing usage */
+ private List<Feature> features() {
+ List<Feature> featureList = new ArrayList<>(features.values());
+ Collections.sort(featureList);
+ return featureList;
+ }
+
+ /** Returns the name of each feature, sorted by decreasing usage */
+ private List<String> featureNames() {
+ List<String> featureNameList = new ArrayList<>(features.values().size());
+ for (Feature feature : features())
+ featureNameList.add(feature.name());
+ return featureNameList;
+ }
+
+ private void analyzeSum(ArithmeticNode node) {
+ for (ExpressionNode child : node.children()) {
+ currentTree++;
+ analyze(child);
+ }
+ }
+
+ private void analyze(ExpressionNode node) {
+ if (node instanceof IfNode) {
+ analyzeIf((IfNode)node);
+ }
+
+ if (node instanceof CompositeNode) {
+ for (ExpressionNode child : ((CompositeNode)node).children())
+ analyze(child);
+ }
+ }
+
+ private void analyzeIf(IfNode node) {
+ if (node.getCondition() instanceof ComparisonNode)
+ analyzeComparisonIf(node);
+ else if (node.getCondition() instanceof SetMembershipNode)
+ analyzeSetMembershipIf(node);
+ else
+ System.err.println("Warning: Expected a comparison or set membership test, got " + node.getCondition().getClass());
+ }
+
+ private void analyzeComparisonIf(IfNode node) {
+ ComparisonNode comparison = (ComparisonNode)node.getCondition();
+
+ if (comparison.getOperator() != TruthOperator.SMALLER) {
+ System.err.println("Warning: This expression has " + comparison.getOperator() + " where we expect < :" +
+ comparison);
+ return;
+ }
+
+ if ( ! instanceOf(comparison.getLeftCondition(), ReferenceNode.class)) return;
+ String featureName = ((ReferenceNode)comparison.getLeftCondition()).getName();
+
+ Double value = nodeValue(comparison.getRightCondition());
+ if (value == null) return;
+
+ ComparisonFeature feature = (ComparisonFeature)features.get(featureName);
+ if (feature == null) {
+ feature = new ComparisonFeature(featureName);
+ features.put(featureName, feature);
+ }
+ feature.isComparedTo(value, currentTree, average(node.getTrueExpression()), average(node.getFalseExpression()));
+ }
+
+ private void analyzeSetMembershipIf(IfNode node) {
+ SetMembershipNode membershipTest = (SetMembershipNode)node.getCondition();
+
+ if ( ! instanceOf(membershipTest.getTestValue(), ReferenceNode.class)) return;
+ String featureName = ((ReferenceNode)membershipTest.getTestValue()).getName();
+
+ SetMembershipFeature feature = (SetMembershipFeature)features.get(featureName);
+ if (feature == null) {
+ feature = new SetMembershipFeature(featureName);
+ features.put(featureName, feature);
+ }
+ }
+
+ /**
+ * Returns the value of a constant node, or a negative wrapping a constant.
+ * Warns and returns null if it is neither.
+ */
+ private Double nodeValue(ExpressionNode node) {
+ if (node instanceof NegativeNode) {
+ NegativeNode negativeNode = (NegativeNode)node;
+ if ( ! instanceOf(negativeNode.getValue(), ConstantNode.class)) return null;
+ return - ((ConstantNode)negativeNode.getValue()).getValue().asDouble();
+ }
+ else {
+ if ( ! instanceOf(node, ConstantNode.class)) return null;
+ return ((ConstantNode)node).getValue().asDouble();
+ }
+ }
+
+
+ /** Returns the average value of all the leaf constants below this */
+ private double average(ExpressionNode node) {
+ Sum sum = new Sum();
+ average(node, sum);
+ return sum.average();
+ }
+
+ private void average(ExpressionNode node, Sum sum) {
+ if (node instanceof CompositeNode) {
+ for (ExpressionNode child : ((CompositeNode)node).children())
+ average(child, sum);
+ }
+ else {
+ Double value = nodeValue(node);
+ if (value == null) return;
+ sum.add(value);
+ }
+ }
+
+ private boolean instanceOf(Object object, Class<?> clazz) {
+ if (clazz.isAssignableFrom(object.getClass())) return true;
+ System.err.println("Warning: This expression has " + object.getClass() + " where we expect " + clazz +
+ ": Instance " + object);
+ return false;
+ }
+
+ private List<Context> generateArgumentSets(int count) {
+ List<Context> argumentSets = new ArrayList<>(count);
+ for (int i=0; i<count; i++) {
+ ArgumentIgnoringMapContext context = new ArgumentIgnoringMapContext();
+ for (Feature feature : features()) {
+ if (feature instanceof ComparisonFeature) {
+ ComparisonFeature comparison = (ComparisonFeature)feature;
+ context.put(comparison.name(),randomBetween(comparison.lowerBound(), comparison.upperBound()));
+ }
+ // TODO: else if (feature instanceof SetMembershipFeature)
+ }
+ argumentSets.add(context);
+ }
+ return argumentSets;
+ }
+
+ private Random random = new Random();
+ /** Returns a random value in [lowerBound, upperBound&gt; */
+ private double randomBetween(double lowerBound, double upperBound) {
+ return random.nextDouble()*(upperBound-lowerBound)+lowerBound;
+ }
+
+ private static class ArgumentIgnoringMapContext extends MapContext {
+
+ @Override
+ public Value get(String name, Arguments arguments,String output) {
+ return super.get(name, null, output);
+ }
+
+ }
+
+ /** Generates a textual report from analyzing this expression */
+ public String report() {
+ StringBuilder b = new StringBuilder();
+ b.append("Trees: " + currentTree).append("\n");
+ b.append("Features:\n");
+ for (Feature feature : features())
+ b.append(" " + feature).append("\n");
+ return b.toString();
+ }
+
+ private static final String usage = "\nUsage: ExpressionAnalysis [myExpressionFile.expression]";
+
+ public static void main(String[] args) {
+ if (args.length == 0) error("No arguments." + usage);
+
+ ExpressionAnalysis analysis = analysisFromFile(args[0]);
+
+ if (1==1) return; // Turn off ga training
+ if (args.length == 1) {
+ new GATraining(analysis);
+ }
+ else if (args.length == 2) {
+ try {
+ new LearntExpressionAnalysis(analysis, new RankingExpression(args[1]));
+ }
+ catch (ParseException e) {
+ error("Syntax error in argument expression: " + Exceptions.toMessageString(e));
+ }
+ }
+ else {
+ error("Unexpectedly got more than 2 arguments." + usage);
+ }
+
+ }
+
+ private static ExpressionAnalysis analysisFromFile(String fileName) {
+ try (Reader fileReader = new BufferedReader(new FileReader(fileName))) {
+ System.out.println("Analyzing " + fileName + "...");
+ ExpressionAnalysis analysis = new ExpressionAnalysis(new RankingExpression(fileReader));
+ System.out.println(analysis.report());
+ return analysis;
+ }
+ catch (FileNotFoundException e) {
+ error("Could not find '" + fileName + "'");
+ }
+ catch (IOException e) {
+ error("Failed reading '" + fileName + "': " + Exceptions.toMessageString(e));
+ }
+ catch (ParseException e) {
+ error("Syntax error in '" + fileName + "': " + Exceptions.toMessageString(e));
+ }
+ return null;
+ }
+
+ private static class LearntExpressionAnalysis {
+
+ public LearntExpressionAnalysis(ExpressionAnalysis analysis, RankingExpression learntExpression) {
+ int cases = 1000;
+ TrainingSet newTrainingSet = new TrainingSet(new RankingExpressionCaseList(analysis.generateArgumentSets(cases),
+ analysis.expression()), new TrainingParameters());
+ Individual winner = new Individual(learntExpression, newTrainingSet);
+ System.out.println("With separate training set: " + winner.toShortString() + " (" + winner.calculateAverageError() + ")");
+ }
+
+ }
+
+ private static class GATraining {
+
+ public GATraining(ExpressionAnalysis analysis) {
+ int skipFeatures = 0;
+ int featureCount = analysis.featureNames().size();
+ int cases = 1000;
+ TrainingParameters parameters = new TrainingParameters();
+ parameters.setInitialSpeciesSize(50);
+ parameters.setSpeciesLifespan(50);
+ //parameters.setAllowConditions(false); // disallow non-smooth functions
+ parameters.setMaxExpressionDepth(8);
+ TrainingSet trainingSet = new TrainingSet(new RankingExpressionCaseList(analysis.generateArgumentSets(cases),
+ analysis.expression()), parameters);
+ Trainer trainer = new Trainer(trainingSet, new HashSet<>(analysis.featureNames().subList(skipFeatures, featureCount)));
+
+ System.out.println("Learning ...");
+ RankingExpression learntExpression = trainer.train(parameters, new PrintingTracker(100, 0, 1));
+ System.out.println("Learnt expression: " + learntExpression);
+
+ // Check for overtraining
+ new LearntExpressionAnalysis(analysis, learntExpression);
+ }
+
+ }
+
+ private static void error(String message) {
+ System.err.println(message);
+ System.exit(1);
+ }
+
+ public abstract static class Feature implements Comparable<Feature> {
+
+ private final String name;
+
+ protected Feature(String name) {
+ this.name = name;
+ }
+
+ public String name() { return name; }
+
+ /** Primary sort by type, secondary by name */
+ @Override
+ public int compareTo(Feature other) {
+ int typeComparison = this.getClass().getName().compareTo(other.getClass().getName());
+ if (typeComparison != 0) return typeComparison;
+ return this.name.compareTo(other.name);
+ }
+
+ }
+
+ /** A feature used in comparisons. These are the ones on which our serious analysis is focused */
+ public static class ComparisonFeature extends Feature {
+
+ private double lowerBound = Double.MAX_VALUE;
+ private double upperBound = Double.MIN_VALUE;
+
+ /** The number of usages of this feature */
+ private int usages = 0;
+
+ /** The sum of the tree numbers where this is accessed */
+ private int treeNumberSum = 0;
+
+ /**
+ * The net times where the left values are smaller than the right values for this
+ * (which is a measure of correlation between input and output because the comparison is &lt;)
+ */
+ private int correlationCount = 0;
+
+ /**
+ * The sum difference in returned value between choosing the right and left branch due to this feature
+ */
+ private double netSum = 0;
+
+ public ComparisonFeature(String name) {
+ super(name);
+ }
+
+ public double lowerBound() { return lowerBound; }
+ public double upperBound() { return upperBound; }
+
+ public void isComparedTo(double value, int inTreeNumber, double leftAverage, double rightAverage) {
+ lowerBound = Math.min(lowerBound, value);
+ upperBound = Math.max(upperBound, value);
+ usages++;
+ treeNumberSum += inTreeNumber;
+ correlationCount += leftAverage < rightAverage ? 1 : -1;
+ netSum += rightAverage - leftAverage;
+ }
+
+ /** Override to do secondary sort by usages */
+ public int compareTo(Feature o) {
+ if ( ! (o instanceof ComparisonFeature)) return super.compareTo(o);
+ ComparisonFeature other = (ComparisonFeature)o;
+ return - Integer.compare(this.usages, other.usages);
+ }
+
+ @Override
+ public String toString() {
+ return "Numeric feature: " + name() +
+ ": range [" + lowerBound + ", " + upperBound + "]" +
+ ", usages " + usages +
+ ", average tree occurrence " + (treeNumberSum / usages) +
+ ", correlation: " + (correlationCount / (double)usages) +
+ ", net contribution: " + netSum;
+ }
+
+ }
+
+ /** A feature used in set membership tests */
+ public static class SetMembershipFeature extends Feature {
+
+ public SetMembershipFeature(String name) {
+ super(name);
+ }
+
+ @Override
+ public String toString() {
+ return "Categorical feature: " + name();
+ }
+
+ }
+
+ /** A sum which can returns its average */
+ private static class Sum {
+
+ private double sum;
+ private int count;
+
+ public void add(double value) {
+ sum+=value;
+ count++;
+ }
+
+ public double average() {
+ return sum / count;
+ }
+
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/package-info.java
new file mode 100644
index 00000000000..63343d425b6
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/package-info.java
@@ -0,0 +1,5 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+package com.yahoo.searchlib;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/ElementCompleteness.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/ElementCompleteness.java
new file mode 100644
index 00000000000..fb74fb4de6b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/ElementCompleteness.java
@@ -0,0 +1,96 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features;
+
+import com.google.common.annotations.Beta;
+import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Calculates the elementCompleteness features
+ *
+ * @author bratseth
+ */
+public class ElementCompleteness {
+
+ /** Hardcoded to default for now */
+ private static final double fieldCompletenessImportance = 0.05;
+
+ /**
+ * Computes the following elementCompleteness features:
+ * <ul>
+ * <li><code>completeness</code>
+ * <li><code>fieldCompleteness</code>
+ * <li><code>queryCompleteness</code>
+ * <li><code>elementWeight</code>
+ * </ul>
+ *
+ * @param queryTerms the query terms with associated weights to compute over
+ * @param field a set of weighted field values, where each is taken to be a space-separated string of tokens
+ * @return a features object containing the values listed above
+ */
+ public static Features compute(Map<String, Integer> queryTerms, Item[] field) {
+ double completeness = 0;
+ double fieldCompleteness = 0;
+ double queryCompleteness = 0;
+ double elementWeight = 0;
+
+ double queryTermWeightSum = sum(queryTerms.values());
+
+ for (Item item : field) {
+ String[] itemTokens =item.value().split(" ");
+ int matchCount = 0;
+ int matchWeightSum = 0;
+ for (String token : itemTokens) {
+ Integer weight = queryTerms.get(token);
+ if (weight == null) continue;
+ matchCount++;
+ matchWeightSum += weight;
+ }
+ double itemFieldCompleteness = (double)matchCount / itemTokens.length;
+ double itemQueryCompleteness = matchWeightSum / queryTermWeightSum;
+ double itemCompleteness =
+ fieldCompletenessImportance * itemFieldCompleteness +
+ (1 - fieldCompletenessImportance) * itemQueryCompleteness;
+ if (itemCompleteness > completeness) {
+ completeness = itemCompleteness;
+ fieldCompleteness = itemFieldCompleteness;
+ queryCompleteness = itemQueryCompleteness;
+ elementWeight = item.weight();
+ }
+ }
+
+ Map<String, Value> features = new HashMap<>();
+ features.put("completeness", new DoubleValue(completeness));
+ features.put("fieldCompleteness", new DoubleValue(fieldCompleteness));
+ features.put("queryCompleteness", new DoubleValue(queryCompleteness));
+ features.put("elementWeight", new DoubleValue(elementWeight));
+ return new Features(features);
+ }
+
+ private static int sum(Collection<Integer> integers) {
+ int sum = 0;
+ for (int integer : integers)
+ sum += integer;
+ return sum;
+ }
+
+ public static class Item {
+
+ private final String value;
+ private final double weight;
+
+ public Item(String value, double weight) {
+ this.value = value;
+ this.weight = weight;
+ }
+
+ public String value() { return value; }
+ public double weight() { return weight; }
+
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/Features.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/Features.java
new file mode 100644
index 00000000000..9dac3db11c8
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/Features.java
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features;
+
+import com.google.common.annotations.Beta;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.Collections;
+import java.util.Map;
+
+/**
+ * A set of (immutable) computed features
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+@Beta
+public class Features {
+
+ private Map<String, Value> features;
+
+ /** Creates a set of features by assigning ownership of map of features to this */
+ Features(Map<String, Value> features) {
+ this.features = Collections.unmodifiableMap(features);
+ }
+
+ /** Returns the Value of a feature, or null if it is not present in this */
+ public Value get(String featureName) {
+ return features.get(featureName);
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/FieldTermMatch.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/FieldTermMatch.java
new file mode 100644
index 00000000000..e5b4a899844
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/FieldTermMatch.java
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features;
+
+import com.google.common.annotations.Beta;
+import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Calculates the fieldTermMatch features
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+@Beta
+public class FieldTermMatch {
+
+ /**
+ * Computes the fieldTermMatch features:
+ * <ul>
+ * <li><code>firstPosition</code> - the position of the first occurrence of this query term in this index field</li>
+ * <li><code>occurrences</code> - the position of the first occurrence of this query term in this index field</li>
+ * </ul>
+ * @param queryTerm the term to return these features for
+ * @param field the field value to compute over, assumed to be a space-separated string of tokens
+ * @return a features object containing the two values described above
+ */
+ public static Features compute(String queryTerm, String field) {
+ Map<String, Value> features = new HashMap<>();
+
+ String[] tokens = field.split(" ");
+
+ int occurrences = 0;
+ int firstPosition = 1000000;
+ for (int i = 0; i < tokens.length; i++) {
+ if (tokens[i].equals(queryTerm)) {
+ if (occurrences == 0)
+ firstPosition = i;
+ occurrences++;
+ }
+ }
+ features.put("firstPosition", new DoubleValue(firstPosition));
+ features.put("occurrences", new DoubleValue(occurrences));
+ return new Features(features);
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Field.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Field.java
new file mode 100644
index 00000000000..b71eff8ffde
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Field.java
@@ -0,0 +1,60 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features.fieldmatch;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * Represents a document field which can be matched and ranked against.
+ *
+ * @author bratseth
+ */
+public class Field {
+
+ private final ImmutableList<Term> terms;
+
+ /** Creates a field from a space-separated string */
+ public Field(String fieldString) {
+ ImmutableList.Builder<Term> list = new ImmutableList.Builder<>();
+ for (String term : fieldString.split(" "))
+ list.add(new Term(term));
+ this.terms = list.build();
+ }
+
+ /** Creates a field from a list of terms */
+ public Field(List<Term> terms) {
+ this.terms = ImmutableList.copyOf(terms);
+ }
+
+ /** Returns an immutable list of the terms in this */
+ public List<Term> terms() { return terms; }
+
+ /** A term in a field */
+ public static class Term {
+
+ private final String value;
+ private final float exactness;
+
+ /** Creates a term with the given value and full exactness (1.0) */
+ public Term(String value) {
+ this(value, 1.0f);
+ }
+
+ public Term(String value, float exactness) {
+ this.value = value;
+ this.exactness = exactness;
+ }
+
+ /** Returns the string value of this term */
+ public String value() { return value; }
+
+ /**
+ * Returns the degree to which this term is exactly what was in the document (1.0),
+ * or some stemmed form (closer to 0)
+ */
+ public float exactness() { return exactness; }
+
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetrics.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetrics.java
new file mode 100644
index 00000000000..77083d4edb4
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetrics.java
@@ -0,0 +1,536 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features.fieldmatch;
+
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import static java.lang.Math.*;
+
+/**
+ * The collection of metrics calculated by the string match metric calculator.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public final class FieldMatchMetrics implements Cloneable {
+
+ /** The calculator creating this - given on initialization */
+ private FieldMatchMetricsComputer source;
+
+ /** The trace accumulated during execution - empty if no tracing */
+ private final Trace trace=new Trace();
+
+ private boolean complete=false;
+
+ // Metrics
+ private int outOfOrder;
+ private int segments;
+ private int gaps;
+ private int gapLength;
+ private int longestSequence;
+ private int head;
+ private int tail;
+ private int matches;
+ private float proximity;
+ private float unweightedProximity;
+ private float segmentDistance;
+ private int pairs;
+ private float weight;
+ private float significance;
+ private float occurrence;
+ private float weightedOccurrence;
+ private float absoluteOccurrence;
+ private float weightedAbsoluteOccurrence;
+ private float significantOccurrence;
+ private float weightedExactnessSum;
+ private int weightSum;
+
+ // Temporary variables
+ private int currentSequence;
+ private List<Integer> segmentStarts=new ArrayList<>();
+ private int queryLength;
+
+ public FieldMatchMetrics(FieldMatchMetricsComputer source) {
+ this.source=source;
+
+ complete=false;
+
+ outOfOrder = 0;
+ segments = 0;
+ gaps = 0;
+ gapLength = 0;
+ longestSequence = 1;
+ head = -1;
+ tail = -1;
+ proximity = 0;
+ unweightedProximity = 0;
+ segmentDistance = 0;
+ matches = 0;
+ pairs = 0;
+ weight = 0;
+ significance = 0;
+ weightedExactnessSum = 0;
+ weightSum = 0;
+
+ currentSequence=0;
+ segmentStarts.clear();
+ queryLength=source.getQuery().getTerms().length;
+ }
+
+ /** Are these metrics representing a complete match */
+ public boolean isComplete() { return complete; }
+
+ public void setComplete(boolean complete) { this.complete=complete; }
+
+ /** Returns the segment start points */
+ public List<Integer> getSegmentStarts() { return segmentStarts; }
+
+ /**
+ * Returns a metric by name
+ *
+ * @throws IllegalArgumentException if the metric name (case sensitive) is not present
+ */
+ public float get(String name) {
+ try {
+ Method getter=getClass().getMethod("get" + name.substring(0,1).toUpperCase() + name.substring(1));
+ return ((Number)getter.invoke(this)).floatValue();
+ }
+ catch (NoSuchMethodException e) {
+ throw new IllegalArgumentException("No metric named '" + name + "' is known");
+ }
+ catch (Exception e) {
+ throw new RuntimeException("Error getting metric '" + name + "'",e);
+ }
+ }
+
+ // Base metrics ----------------------------------------------------------------------------------------------
+
+ /** Returns the total number of out of order token sequences within field segments */
+ public int getOutOfOrder() { return outOfOrder; }
+
+ /** Returns the number of field text segments which are needed to match the query as completely as possible */
+ public int getSegments() { return segments; }
+
+ /** Returns the total number of position jumps (backward or forward) within document segments */
+ public int getGaps() { return gaps; }
+
+ /** Returns the summed size of all gaps within segments */
+ public int getGapLength() { return gapLength; }
+
+ /** Returns the size of the longest matched continuous, in-order sequence in the document */
+ public int getLongestSequence() { return longestSequence; }
+
+ /** Returns the number of tokens in the field preceding the start of the first matched segment */
+ public int getHead() { return head; }
+
+ /** Returns the number of tokens in the field following the end of the last matched segment */
+ public int getTail() { return tail; }
+
+ /** Returns the number of query terms which was matched in this field */
+ public int getMatches() { return matches; }
+
+ /** Returns the number of in-segment token pairs */
+ public int getPairs() { return pairs; }
+
+ /**
+ * Returns the normalized proximity of the matched terms, weighted by the connectedness of the query terms.
+ * This number is 0.1 if all the matched terms are and have default or lower connectedness, close to 1 if they
+ * are following in sequence and have a high connectedness, and close to 0 if they are far from each other in the
+ * segment or out of order
+ */
+ public float getAbsoluteProximity() {
+ if (pairs <1) return 0.1f;
+
+ return proximity/pairs;
+ }
+
+ /**
+ * Returns the normalized proximity of the matched terms, not taking term connectedness into account.
+ * This number is close to 1 if all the matched terms are
+ * following each other in sequence, and close to 0 if they are far from each other or out of order
+ */
+ public float getUnweightedProximity() {
+ if (pairs <1) return 1f;
+ return unweightedProximity/pairs;
+ }
+
+ /**
+ * Returns the sum of the distance between all segments making up a match to the query, measured
+ * as the sum of the number of token positions separating the <i>start</i> of each field adjacent segment.
+ */
+ public float getSegmentDistance() { return segmentDistance; }
+
+ /**
+ * <p>Returns the normalized weight of this match relative to the whole query:
+ * The sum of the weights of all <i>matched</i> terms/the sum of the weights of all <i>query</i> terms
+ * If all the query terms were matched, this is 1. If no terms were matched, or these matches has weight zero,
+ * this is 0.</p>
+ *
+ * <p>As the sum of this number over all the terms of the query is always 1, sums over all fields of
+ * normalized rank features for each field multiplied by this number for the same field will produce a
+ * normalized number.</p>
+ *
+ * <p>Note that this scales with the number of matched query terms in the field. If you want a component which does
+ * not, divide by matches.</p>
+ */
+ public float getWeight() { return weight; }
+
+ /**
+ * <p>Returns the normalized term significance (1-frequency) of the terms of this match relative to the whole query:
+ * The sum of the significance of all <i>matched</i> terms/the sum of the significance of all <i>query</i> terms
+ * If all the query terms were matched, this is 1. If no terms were matched, or if the significance of all the matched terms
+ * is zero (they are present in all (possible) documents), this number is zero.</p>
+ *
+ * <p>As the sum of this number over all the terms of the query is always 1, sums over all fields of
+ * normalized rank features for each field multiplied by this number for the same field will produce a
+ * normalized number.</p>
+ *
+ * <p>Note that this scales with the number of matched query terms in the field. If you want a component which does
+ * not, divide by matches.</p>
+ */
+ public float getSignificance() { return significance; }
+
+ /**
+ * <p>Returns a normalized measure of the number of occurrence of the terms of the query.
+ * This number is 1 if there are many occurrences of the query terms <i>in absolute terms,
+ * or relative to the total content of the field</i>, and 0 if there are none.</p>
+ *
+ * <p>This is suitable for occurrence in fields containing regular text.</p>
+ */
+ public float getOccurrence() { return occurrence; }
+
+ /**
+ * <p>Returns a normalized measure of the number of occurrence of the terms of the query:
+ *
+ * <code>sum over all query terms(min(number of occurrences of the term,maxOccurrences))/(query term count*100)</code>
+ *
+ * <p>This number is 1 if there are many occurrences of the query terms, and 0 if there are none.
+ * This number does not take the actual length of the field into account, so it is suitable for uses of occurrence
+ * to denote importance across multiple terms.</p>
+ */
+ public float getAbsoluteOccurrence() { return absoluteOccurrence; }
+
+ /**
+ * <p>Returns a normalized measure of the number of occurrence of the terms of the query, weighted by term weight.
+ * This number is close to 1 if there are many occurrences of highly weighted query terms,
+ * in absolute terms, or relative to the total content of the field, and 0 if there are none.</p>
+ */
+ public float getWeightedOccurrence() { return weightedOccurrence; }
+
+ /**
+ * <p>Returns a normalized measure of the number of occurrence of the terms of the query, taking weights
+ * into account so that occurrences of higher weighted query terms has more impact than lower weighted terms.</p>
+ *
+ * <p>This number is 1 if there are many occurrences of the highly weighted terms, and 0 if there are none.
+ * This number does not take the actual length of the field into account, so it is suitable for uses of occurrence
+ * to denote importance across multiple terms.</p>
+ */
+ public float getWeightedAbsoluteOccurrence() { return weightedAbsoluteOccurrence; }
+
+ /**
+ * <p>Returns a normalized measure of the number of occurrence of the terms of the query
+ * <i>in absolute terms,
+ * or relative to the total content of the field</i>, weighted by term significance.
+ *
+ * <p>This number is 1 if there are many occurrences of the highly significant terms, and 0 if there are none.</p>
+ */
+ public float getSignificantOccurrence() { return significantOccurrence; }
+
+ /**
+ * <p>Returns the degree to which the query terms submitted matched exactly terms contained in the document.
+ * This is 1 if all the terms matched exactly, and closer to 0 as more of the terms was matched only as stem forms.
+ * </p>
+ *
+ * <p>This is the query term weighted average of the exactness of each match, where the exactness of a match is
+ * the product of the exactness of the matching query term and the matching field term:
+ * <code>
+ * sum over matching query terms(query term weight * query term exactness * field term exactness) /
+ * sum over matching query terms(query term weight)
+ * </code>
+ */
+ public float getExactness() {
+ if (matches == 0) return 0;
+ return weightedExactnessSum / weightSum;
+ }
+
+ // Derived metrics ----------------------------------------------------------------------------------------------
+
+ /** The ratio of query tokens which was matched in the field: <code>matches/queryLength</code> */
+ public float getQueryCompleteness() {
+ return (float)matches/source.getQuery().getTerms().length;
+ }
+
+ /** The ratio of query tokens which was matched in the field: <code>matches/fieldLength</code> */
+ public float getFieldCompleteness() {
+ return (float)matches/source.getField().terms().size();
+ }
+
+ /**
+ * Total completeness, where field completeness is more important:
+ * <code>queryCompleteness * ( 1 - fieldCompletenessImportance) + fieldCompletenessImportance * fieldCompleteness</code>
+ */
+ public float getCompleteness() {
+ float fieldCompletenessImportance=source.getParameters().getFieldCompletenessImportance();
+ return getQueryCompleteness() * ( 1 - fieldCompletenessImportance) + fieldCompletenessImportance*getFieldCompleteness();
+ }
+
+ /** Returns how well the order of the terms agreed in segments: <code>1-outOfOrder/pairs</code> */
+ public float getOrderness() {
+ if (pairs ==0) return 1f;
+ return 1-(float)outOfOrder/pairs;
+ }
+
+ /** Returns the degree to which different terms are related (occurring in the same segment): <code>1-segments/(matches-1)</code> */
+ public float getRelatedness() {
+ if (matches==0) return 0;
+ if (matches==1) return 1;
+ return 1-(float)(segments-1)/(matches-1);
+ }
+
+ /** Returns <code>longestSequence/matches</code> */
+ public float getLongestSequenceRatio() {
+ if (matches==0) return 0;
+ return (float)longestSequence/matches;
+ }
+
+ /** Returns the closeness of the segments in the field: <code>1-segmentDistance/fieldLength</code> */
+ public float getSegmentProximity() {
+ if (matches==0) return 0;
+ return 1-(float)segmentDistance/source.getField().terms().size();
+ }
+
+ /**
+ * Returns a value which is close to 1 when matched terms are close and close to zero when they are far apart
+ * in the segment. Relatively more connected terms influence this value more.
+ * This is absoluteProximity/average connectedness.
+ */
+ public float getProximity() {
+ float totalConnectedness=0;
+ for (int i=1; i<queryLength; i++) {
+ totalConnectedness+=Math.max(0.1,source.getQuery().getTerms()[i].getConnectedness());
+ }
+ float averageConnectedness=0.1f;
+ if (queryLength>1)
+ averageConnectedness=totalConnectedness/(queryLength-1);
+ return getAbsoluteProximity()/averageConnectedness;
+ }
+
+ /**
+ * <p>Returns the average of significance and weight.</p>
+ *
+ * <p>As the sum of this number over all the terms of the query is always 1, sums over all fields of
+ * normalized rank features for each field multiplied by this number for the same field will produce a
+ * normalized number.</p>
+ *
+ * <p>Note that this scales with the number of matched query terms in the field. If you want a component which does
+ * not, divide by matches.</p>
+ */
+ public float getImportance() {
+ return (getSignificance() + getWeight()) / 2;
+ }
+
+ /** A normalized measure of how early the first segment occurs in this field: <code>1-head/(max(6,field.length)-1)</code> */
+ public float getEarliness() {
+ if (matches == 0) return 0; // Covers field.length==0 too
+ if (source.getField().terms().size() == 1) return 1;
+ return 1 - (float)head/(max(6, source.getField().terms().size()) - 1);
+ }
+
+ /**
+ * <p>A ready-to-use aggregate match score. Use this if you don't have time to find a better application specific
+ * aggregate score of the fine grained match metrics.</p>
+ *
+ * <p>The current formula is
+ *
+ * <code>
+ * ( proximityCompletenessImportance * (1-relatednessImportance + relatednessImportance*relatedness)
+ * proximity * exactness * completeness^2 + earlinessImportance * earliness + segmentProximityImportance * segmentProximity )
+ * / (proximityCompletenessImportance + earlinessImportance + relatednessImportance)</code>
+ *
+ * but this is subject to change (i.e improvement) at any time.
+ * </p>
+ *
+ *
+ * <p>Weight and significance are not taken into account because this is mean to capture tha quality of the
+ * match in this field, while those measures relate this match to matches in other fields. This number
+ * can be multiplied with those values when combining with other field match scores.</p>
+ */
+ public float getMatch() {
+ float proximityCompletenessImportance = source.getParameters().getProximityCompletenessImportance();
+ float earlinessImportance = source.getParameters().getEarlinessImportance();
+ float relatednessImportance = source.getParameters().getRelatednessImportance();
+ float segmentProximityImportance = source.getParameters().getSegmentProximityImportance();
+ float occurrenceImportance = source.getParameters().getOccurrenceImportance();
+ float scaledRelatedness = 1 - relatednessImportance + relatednessImportance*getRelatedness();
+
+ return ( proximityCompletenessImportance * scaledRelatedness * getProximity() * getExactness() * getCompleteness() * getCompleteness()
+ + earlinessImportance * getEarliness()
+ + segmentProximityImportance * getSegmentProximity()
+ + occurrenceImportance * getOccurrence())
+ / (proximityCompletenessImportance + earlinessImportance + segmentProximityImportance + occurrenceImportance);
+ }
+
+ /**
+ * <p>The metric use to select the best segments during execution of the string match metric algorithm.</p>
+ *
+ * <p>This metric, and any metric it depends on, must be correct each time a segment is completed,
+ * not only when the metrics are complete, because this metric is used to choose segments during calculation.</p>
+ */
+ float getSegmentationScore() {
+ if (segments==0) return 0;
+ return getAbsoluteProximity() * getExactness() / (segments * segments);
+ }
+
+ // Events emitted from the computer while matching strings ----------------------------------------------------
+ // Note that one move in the computer may cause multiple events
+
+ // Events on single positions ----------
+
+ /** Called once for every match */
+ void onMatch(int i, int j) {
+ if (matches>=source.getField().terms().size()) return;
+ matches++;
+ weight += (float)source.getQuery().getTerms()[i].getWeight() / source.getQuery().getTotalTermWeight();
+ significance += source.getQuery().getTerms()[i].getSignificance() / source.getQuery().getTotalSignificance();
+ int queryTermWeight = source.getQuery().getTerms()[i].getWeight();
+ weightedExactnessSum += queryTermWeight * source.getQuery().getTerms()[i].getExactness() * source.getField().terms().get(j).exactness();
+ weightSum += queryTermWeight;
+ }
+
+ /** Called once per sequence, when the sequence starts */
+ void onSequenceStart(int j) {
+ if (head==-1 || j<head)
+ head=j;
+
+ currentSequence=1;
+ }
+
+ /** Called once per sequence when the sequence ends */
+ void onSequenceEnd(int j) {
+ int sequenceTail = source.getField().terms().size() - j - 1;
+ if (tail ==-1 || sequenceTail < tail)
+ tail = sequenceTail;
+
+ if (currentSequence > longestSequence)
+ longestSequence = currentSequence;
+ currentSequence = 0;
+ }
+
+ /** Called once when this value is calculated, before onComplete */
+ void setOccurrence(float occurrence) { this.occurrence=occurrence; }
+
+ /** Called once when this value is calculated, before onComplete */
+ void setWeightedOccurrence(float weightedOccurrence) { this.weightedOccurrence=weightedOccurrence; }
+
+ /** Called once when this value is calculated, before onComplete */
+ void setAbsoluteOccurrence(float absoluteOccurrence) { this.absoluteOccurrence=absoluteOccurrence; }
+
+ /** Called once when this value is calculated, before onComplete */
+ void setWeightedAbsoluteOccurrence(float weightedAbsoluteOccurrence) { this.weightedAbsoluteOccurrence=weightedAbsoluteOccurrence; }
+
+ /** Called once when this value is calculated, before onComplete */
+ void setSignificantOccurrence(float significantOccurrence) { this.significantOccurrence =significantOccurrence; }
+
+ /** Called once when matching is complete */
+ void onComplete() {
+ // segment distance - calculated from sorted segment starts
+ if (segmentStarts.size()<=1) {
+ segmentDistance=0;
+ }
+ else {
+ Collections.sort(segmentStarts);
+ for (int i=1; i<segmentStarts.size(); i++) {
+ segmentDistance+=segmentStarts.get(i)-segmentStarts.get(i-1)+1;
+ }
+ }
+
+ if (head==-1) head=0;
+ if (tail==-1) tail=0;
+ }
+
+ // Events on pairs ----------
+
+ /** Called when <i>any</i> pair is encountered */
+ void onPair(int i, int j, int previousJ) {
+ int distance = j-previousJ-1;
+ if (distance < 0) distance++; // Discontinuity where the two terms are in the same position
+ if (abs(distance) > source.getParameters().getProximityLimit()) return; // Contribution=0
+
+ // We have an in-segment pair
+ float pairProximity = source.getParameters().getProximity(distance + source.getParameters().getProximityLimit());
+
+ unweightedProximity += pairProximity;
+
+ float connectedness = source.getQuery().getTerms()[i].getConnectedness();
+ proximity += pow(pairProximity, connectedness/0.1) * max(0.1, connectedness);
+
+ pairs++;
+ }
+
+ /** Called when an in-sequence pair is encountered */
+ void onInSequence(int i, int j, int previousJ) {
+ currentSequence++;
+ }
+
+ /** Called when a gap (within a sequence) is encountered */
+ void onInSegmentGap(int i, int j, int previousJ) {
+ gaps++;
+ if (j>previousJ) {
+ gapLength+=abs(j-previousJ)-1; // gap length may be 0 if the gap was in the query
+ }
+ else {
+ outOfOrder++;
+ gapLength+=abs(j-previousJ);
+ }
+ }
+
+ /**
+ * Called when a new segment is started
+ *
+ * @param previousJ the end of the previous segment, or -1 if this is the first segment
+ * */
+ void onNewSegment(int i, int j, int previousJ) {
+ segments++;
+ segmentStarts.add(j);
+ }
+
+ @Override
+ public FieldMatchMetrics clone() {
+ try {
+ FieldMatchMetrics clone=(FieldMatchMetrics)super.clone();
+ clone.segmentStarts=new ArrayList<>(segmentStarts);
+ return clone;
+ }
+ catch (CloneNotSupportedException e) {
+ throw new RuntimeException("Programming error",e);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "Metrics: [match: " + getMatch() + "]";
+ }
+
+ public String toStringDump() {
+ try {
+ StringBuilder b=new StringBuilder();
+ for (Method m : this.getClass().getDeclaredMethods()) {
+ if ( ! m.getName().startsWith("get")) continue;
+ if (m.getReturnType()!=Integer.TYPE && m.getReturnType()!=Float.TYPE) continue;
+ if ( m.getParameterTypes().length!=0 ) continue;
+
+ Object value=m.invoke(this,new Object[0]);
+ b.append(m.getName().substring(3,4).toLowerCase() + m.getName().substring(4) + ": " + value + "\n");
+ }
+ return b.toString();
+ }
+ catch (Exception e) {
+ throw new RuntimeException("Programming error",e);
+ }
+ }
+
+ /** Returns the trace of this computation. This is empty (never null) if tracing is off */
+ public Trace trace() { return trace; }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsComputer.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsComputer.java
new file mode 100644
index 00000000000..3fc3780151a
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsComputer.java
@@ -0,0 +1,433 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features.fieldmatch;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * <p>Calculates a set of metrics capturing information about the degree of agreement between a query
+ * and a field string. This algorithm attempts to capture the property of text that very close tokens
+ * are usually part of the same semantic structure, while tokens farther apart are much more loosely related.
+ * The algorithm will locate alternative such regions containing multiple query tokens (segments), do a more
+ * detailed analysis of these segments and choose the ones producing the best overall set of match metrics
+ * (subject to certain resource constraints).</p>
+ *
+ * <p>Such segments are found by looking at query terms in sequence from
+ * left to right and finding matches in the field. All alternative segment start points are explored, and the
+ * segmentation achieving the best overall string match metric score is preferred. Dynamic programming
+ * is used to avoid redoing work on segmentations.</p>
+ *
+ * <p>When a segment start point is found, subsequent tokens from the query are searched in the field
+ * from this starting point in "semantic order". This search order can be defined independently of the
+ * algorithm. The current order searches <i>proximityLimit</i> tokens ahead first, then the same distance backwards
+ * (so if you need to go two steps backwards in the field from the segment starting point, the real distance is -2,
+ * but the "semantic distance" is proximityLimit+2).</p>
+ *
+ * <p>The actual metrics are calculated during execution of this algorithm by the {@link FieldMatchMetrics} class,
+ * by receiving events emitted from the algorithm. Any set of metrics derivable from these events are computable using
+ * this algorithm.</p>
+ *
+ * <p>Terminology:
+ * <ul>
+ * <li><b>Sequence</b> - A set of adjacent matched tokens in the field
+ * <li><b>Segment</b> - A field area containing matches to a continuous section of the query
+ * <li><b>Gap</b> - A chunk of adjacent tokens <i>inside a segment</i> separating two matched characters
+ * <li><b>Semantic distance</b> - A non-continuous distance between tokens in j, where the non-continuousness is
+ * mean to capture the semantic similarity between the query and those tokens.
+ * </ul>
+ *
+ * <p>Notation: A position index in the query is denoted <code>i</code>. A position index in the field is
+ * denoted <code>j</code>.</p>
+ *
+ * <p>This class is not multithread safe, but is reusable across queries for a single thread.</p>
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public final class FieldMatchMetricsComputer {
+
+ private Query query;
+
+ private Field field;
+
+ private final FieldMatchMetricsParameters parameters;
+
+ /** The metrics of the currently explored segmentation */
+ private FieldMatchMetrics metrics;
+
+ /**
+ * Known segment starting points. The array is 0..i, one element per starting point query item i,
+ * and a last element representing the entire query.
+ */
+ private List<SegmentStartPoint> segmentStartPoints=new ArrayList<>();
+
+ /** True to collect trace */
+ private boolean collectTrace;
+
+ private int alternativeSegmentationsTried=0;
+
+ /** Creates a feature computer using default settings */
+ public FieldMatchMetricsComputer() {
+ this(FieldMatchMetricsParameters.defaultParameters());
+ }
+
+ /**
+ * Creates a feature computer with the given parameters.
+ * The parameters are frozen if they were not already, this may cause
+ * validation exceptions to be thrown from this.
+ */
+ public FieldMatchMetricsComputer(FieldMatchMetricsParameters parameters) {
+ this.parameters = parameters;
+ }
+
+ /** Computes the string match metrics from a query and field string. */
+ public FieldMatchMetrics compute(String queryString,String fieldString) {
+ return compute(new Query(queryString), fieldString);
+ }
+
+ /** Computes the string match metrics from a query and field string. */
+ public FieldMatchMetrics compute(Query query, String fieldString) {
+ return compute(query,fieldString,false);
+ }
+
+ /**
+ * Computes the string match metrics from a query and field string.
+ *
+ * @param query the query to compute over
+ * @param fieldString the field value to compute over - tokenized by splitting on space
+ * @param collectTrace true to accumulate trace information in the trace returned with the metrics
+ */
+ public FieldMatchMetrics compute(Query query, String fieldString, boolean collectTrace) {
+ return compute(query, new Field(fieldString), collectTrace);
+ }
+
+ /**
+ * Computes the string match metrics from a query and field.
+ *
+ * @param query the query to compute over
+ * @param field the field value to compute over
+ * @param collectTrace true to accumulate trace information in the trace returned with the metrics
+ */
+ public FieldMatchMetrics compute(Query query, Field field, boolean collectTrace) {
+ // 1. Reset state
+ this.collectTrace = collectTrace;
+ this.query = query;
+ this.field = field;
+ segmentStartPoints.clear();
+ for (int i = 0; i <= query.getTerms().length; i++)
+ segmentStartPoints.add(null);
+ alternativeSegmentationsTried = 0;
+ metrics = new FieldMatchMetrics(this);
+
+ // 2. Compute
+ exploreSegments();
+ return metrics;
+ }
+
+ /** Finds segment candidates and explores them until we have the best segmentation history of the entire query */
+ private void exploreSegments() {
+ if (collectTrace)
+ metrics.trace().add("Calculating matches for\n " + query + "\n " + field + "\n");
+
+ // Create an initial start point
+ SegmentStartPoint segmentStartPoint=new SegmentStartPoint(metrics,this);
+ segmentStartPoints.set(0,segmentStartPoint);
+
+ // Explore segmentations
+ while (segmentStartPoint!=null) {
+ metrics =segmentStartPoint.getMetrics().clone();
+ if (collectTrace)
+ metrics.trace().add("\nLooking for segment from " + segmentStartPoint + "..." + "\n");
+ boolean found=findAlternativeSegmentFrom(segmentStartPoint);
+ if (collectTrace)
+ metrics.trace().add(found ? "...found segment: " + metrics.getSegmentStarts() + " score: " +
+ metrics.getSegmentationScore() : "...no complete and improved segment existed" + "\n");
+ if (!found)
+ segmentStartPoint.setOpen(false);
+ segmentStartPoint=findOpenSegment(segmentStartPoint.getI());
+ }
+
+ metrics=findLastStartPoint().getMetrics(); // these metrics are the final set
+ setOccurrenceCounts(metrics);
+ metrics.onComplete();
+ metrics.setComplete(true);
+ }
+
+ /**
+ * Find correspondences from a segment starting point
+ *
+ * @return true if a segment was found, false if none could be found
+ */
+ private boolean findAlternativeSegmentFrom(SegmentStartPoint segmentStartPoint) {
+ // i: index into the query
+ // j: index into the field
+ int semanticDistanceExplored=segmentStartPoint.getSemanticDistanceExplored();
+ int previousI=-1;
+ int previousJ=segmentStartPoint.getPreviousJ();
+ boolean hasOpenSequence=false;
+ boolean isFirst=true;
+
+ for (int i=segmentStartPoint.getStartI(); i<query.getTerms().length; i++) {
+ int semanticDistance=findClosestInFieldBySemanticDistance(i,previousJ,semanticDistanceExplored);
+ int j=semanticDistanceToFieldIndex(semanticDistance,previousJ);
+
+ if (j==-1 && semanticDistanceExplored>0 && isFirst) {
+ return false; // Segment explored before, and no more matches found
+ }
+
+ if ( hasOpenSequence && ( j==-1 || j!=previousJ+1 ) ) {
+ metrics.onSequenceEnd(previousJ);
+ hasOpenSequence=false;
+ }
+
+ if (isFirst) {
+ if (j!=-1) {
+ segmentStart(i,j,isFirst ? -1 : previousJ);
+ segmentStartPoint.exploredTo(j);
+ isFirst=false;
+ }
+ else {
+ segmentStartPoint.incrementStartI(); // Remember that there are no matches for this i
+ }
+ }
+ else {
+ if (Math.abs(j-previousJ) >= parameters.getProximityLimit()) {
+ segmentEnd(i-1,previousJ);
+ return true;
+ }
+ else if (j!=-1) {
+ inSegment(i,j,previousJ,previousI);
+ }
+ }
+
+ if (j!=-1)
+ metrics.onMatch(i,j);
+
+ if (j!=-1 && !hasOpenSequence) {
+ metrics.onSequenceStart(j);
+ hasOpenSequence=true;
+ }
+
+ if (j!=-1)
+ semanticDistanceExplored=1; // Skip the current match when looking for the next
+ else
+ semanticDistanceExplored=0;
+
+ if (j>=0) {
+ previousI=i;
+ previousJ=j;
+ }
+ }
+
+ if (hasOpenSequence)
+ metrics.onSequenceEnd(previousJ);
+
+ if (!isFirst) {
+ segmentEnd(query.getTerms().length-1,previousJ);
+ return true;
+ }
+ else {
+ return false;
+ }
+ }
+
+ /**
+ * Implements the preferred search order for finding a match to a query item - first
+ * looking close in the right order, then close in the reverse order, then far in the right order
+ * and lastly far in the reverse order.
+ *
+ * @param startSemanticDistance is the semantic distance we must be larger than or equal to
+ * @return the semantic distance of the next mathing j larger than startSemanticDistance, or -1 if
+ * there are no matches larger than startSemanticDistance
+ */
+ private int findClosestInFieldBySemanticDistance(int i,int previousJ,int startSemanticDistance) {
+ String term=query.getTerms()[i].getTerm();
+ for (int distance=startSemanticDistance; distance<field.terms().size(); distance++) {
+ int j=semanticDistanceToFieldIndex(distance,previousJ);
+ if (term.equals(field.terms().get(j).value()))
+ return distance;
+ }
+ return -1;
+ }
+
+ /**
+ * Returns the field index (j) from a starting point zeroJ and the distance form zeroJ in the
+ * semantic distance space
+ *
+ * @return the field index, or -1 (undefined) if the semanticDistance is -1
+ */
+ int semanticDistanceToFieldIndex(int semanticDistance,int zeroJ) {
+ if (semanticDistance == -1) return -1;
+ int firstSegmentLength = Math.min(parameters.getProximityLimit(),field.terms().size() - zeroJ);
+ int secondSegmentLength = Math.min(parameters.getProximityLimit(), zeroJ);
+ if (semanticDistance < firstSegmentLength)
+ return zeroJ + semanticDistance;
+ else if (semanticDistance < firstSegmentLength+secondSegmentLength)
+ return zeroJ - semanticDistance - 1 + firstSegmentLength;
+ else if (semanticDistance < field.terms().size() - zeroJ+secondSegmentLength)
+ return zeroJ + semanticDistance - secondSegmentLength;
+ else
+ return field.terms().size() - semanticDistance - 1;
+ }
+
+ /**
+ * Returns the semantic distance from a starting point zeroJ to a field index j
+ *
+ * @return the semantic distance, or -1 (undefined) if j is -1
+ */
+ int fieldIndexToSemanticDistance(int j,int zeroJ) {
+ if (j == -1) return -1;
+ int firstSegmentLength = Math.min(parameters.getProximityLimit(), field.terms().size() - zeroJ);
+ int secondSegmentLength = Math.min(parameters.getProximityLimit(), zeroJ);
+ if (j >= zeroJ) {
+ if ( (j - zeroJ) < firstSegmentLength )
+ return j - zeroJ; // 0..limit
+ else
+ return j - zeroJ+secondSegmentLength; // limit*2..field.length-zeroJ
+ }
+ else {
+ if ( (zeroJ - j - 1) < secondSegmentLength )
+ return zeroJ - j + firstSegmentLength-1; // limit..limit*2
+ else
+ return (zeroJ - j - 1) + field.terms().size() - zeroJ; // field.length-zeroJ..
+ }
+
+ }
+
+ private void inSegment(int i, int j, int previousJ, int previousI) {
+ metrics.onPair(i, j, previousJ);
+ if (j==previousJ+1 && i==previousI+1) {
+ metrics.onInSequence(i, j, previousJ);
+ }
+ else {
+ metrics.onInSegmentGap(i, j, previousJ);
+ if (collectTrace)
+ metrics.trace().add(" in segment gap: " + i + "->" + j + " (" + query.getTerms()[i] + ")" + "\n");
+ }
+ }
+
+ /** Returns whether this segment was accepted as a starting point */
+ private boolean segmentStart(int i,int j,int previousJ) {
+ metrics.onNewSegment(i, j, previousJ);
+
+ if (previousJ>=0)
+ metrics.onPair(i,j,previousJ);
+
+ if (collectTrace)
+ metrics.trace().add(" new segment at: " + i + "->" + j + " (" + query.getTerms()[i] + ")" + "\n");
+ return true;
+ }
+
+ /**
+ * Registers an end of a segment
+ *
+ * @param i the i at which this segment ends
+ * @param j the j at which this segment ends
+ */
+ private void segmentEnd(int i,int j) {
+ if (collectTrace)
+ metrics.trace().add(" segment ended at: " + i + "->" + j + " (" + query.getTerms()[i] + ")" + "\n");
+ SegmentStartPoint startOfNext=segmentStartPoints.get(i + 1);
+ if (startOfNext==null)
+ segmentStartPoints.set(i+1,new SegmentStartPoint(i+1,j, metrics,this));
+ else
+ startOfNext.offerHistory(j, metrics, collectTrace);
+ }
+
+ /** Returns the next open segment to explore, or null if no more segments exists or should be explored */
+ private SegmentStartPoint findOpenSegment(int startI) {
+ for (int i=startI; i<segmentStartPoints.size(); i++) {
+ SegmentStartPoint startPoint=segmentStartPoints.get(i);
+ if (startPoint==null || !startPoint.isOpen()) continue;
+
+ if (startPoint.getSemanticDistanceExplored()==0) return startPoint; // First attempt
+
+ if (alternativeSegmentationsTried>=parameters.getMaxAlternativeSegmentations()) continue;
+ alternativeSegmentationsTried++;
+ return startPoint;
+ }
+
+ return null;
+ }
+
+ private SegmentStartPoint findLastStartPoint() {
+ for (int i=segmentStartPoints.size()-1; i>=0; i--) {
+ SegmentStartPoint startPoint=segmentStartPoints.get(i);
+ if (startPoint!=null)
+ return startPoint;
+ }
+ return null; // Impossible
+ }
+
+ /** Counts all occurrences of terms of the query in the field and set those metrics */
+ private void setOccurrenceCounts(FieldMatchMetrics metrics) {
+ Set<QueryTerm> uniqueQueryTerms=new HashSet<>();
+ for (QueryTerm queryTerm : query.getTerms())
+ uniqueQueryTerms.add(queryTerm);
+
+ List<Float> weightedOccurrences=new ArrayList<Float>();
+ List<Float> significantOccurrences=new ArrayList<Float>();
+
+ int divider = Math.min(field.terms().size(),parameters.getMaxOccurrences()*uniqueQueryTerms.size());
+ int maxOccurence = Math.min(field.terms().size(),parameters.getMaxOccurrences());
+
+ float occurrence=0;
+ float absoluteOccurrence=0;
+ float weightedAbsoluteOccurrence=0;
+ int totalWeight=0;
+ float totalWeightedOccurrences=0;
+ float totalSignificantOccurrences=0;
+
+ for (QueryTerm queryTerm : uniqueQueryTerms) {
+ int termOccurrences=0;
+ for (Field.Term fieldTerm : field.terms()) {
+ if (fieldTerm.value().equals(queryTerm.getTerm()))
+ termOccurrences++;
+ if (termOccurrences == parameters.getMaxOccurrences()) break;
+ }
+ occurrence+=(float)termOccurrences/divider;
+
+ absoluteOccurrence+=(float)termOccurrences/(parameters.getMaxOccurrences()*uniqueQueryTerms.size());
+
+ weightedAbsoluteOccurrence+=(float)termOccurrences*queryTerm.getWeight()/parameters.getMaxOccurrences();
+ totalWeight+=queryTerm.getWeight();
+
+ totalWeightedOccurrences+=(float)maxOccurence*queryTerm.getWeight()/divider;
+ weightedOccurrences.add((float)termOccurrences*queryTerm.getWeight()/divider);
+
+ totalSignificantOccurrences+=(float)maxOccurence*queryTerm.getSignificance()/divider;
+ significantOccurrences.add((float)termOccurrences*queryTerm.getSignificance()/divider);
+ }
+
+ float weightedOccurrenceSum=0;
+ for (float weightedOccurence : weightedOccurrences)
+ weightedOccurrenceSum+=weightedOccurence/totalWeightedOccurrences;
+
+ float significantOccurrenceSum=0;
+ for (float significantOccurence : significantOccurrences)
+ significantOccurrenceSum+=significantOccurence/totalSignificantOccurrences;
+
+ if (totalWeight>0)
+ weightedAbsoluteOccurrence=weightedAbsoluteOccurrence/totalWeight;
+
+ metrics.setOccurrence(occurrence);
+ metrics.setAbsoluteOccurrence(absoluteOccurrence);
+ metrics.setWeightedOccurrence(weightedOccurrenceSum);
+ metrics.setWeightedAbsoluteOccurrence(weightedAbsoluteOccurrence);
+ metrics.setSignificantOccurrence(significantOccurrenceSum);
+ }
+
+ /** Returns the parameter settings of this */
+ public FieldMatchMetricsParameters getParameters() { return parameters; }
+
+ Query getQuery() { return query; }
+
+ Field getField() { return field; }
+
+ @Override
+ public String toString() {
+ return query + "\n" + field + "\n" + metrics + "\n";
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsParameters.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsParameters.java
new file mode 100644
index 00000000000..4ab8565a285
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsParameters.java
@@ -0,0 +1,198 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features.fieldmatch;
+
+/**
+ * The parameters to a string match metric calculator.
+ * Mutable until frozen.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public final class FieldMatchMetricsParameters {
+
+ private boolean frozen=false;
+
+ private int proximityLimit=10;
+
+ private int maxAlternativeSegmentations = 10000;
+
+ private int maxOccurrences=100;
+
+ private float proximityCompletenessImportance =0.9f;
+
+ private float relatednessImportance =0.9f;
+
+ private float earlinessImportance =0.05f;
+
+ private float segmentProximityImportance =0.05f;
+
+ private float occurrenceImportance =0.05f;
+
+ private float fieldCompletenessImportance =0.05f;
+
+ private float[] proximityTable= new float[] { 0.01f, 0.02f, 0.03f, 0.04f, 0.06f, 0.08f, 0.12f, 0.17f, 0.24f, 0.33f, 1,
+ 0.71f, 0.50f, 0.35f, 0.25f, 0.18f, 0.13f, 0.09f, 0.06f, 0.04f, 0.03f };
+
+ /* Calculation of the table above:
+ static {
+ System.out.println("Right order");
+ for (float i=0; i<=10; i++)
+ System.out.println(1/Math.pow(2,i/2));
+
+ System.out.println("Reverse order");
+ for (float i=0; i<=10; i++)
+ System.out.println(1/Math.pow(2,i/2)/3);
+ }
+ */
+
+ private static FieldMatchMetricsParameters defaultParameters;
+
+ static {
+ defaultParameters=new FieldMatchMetricsParameters();
+ defaultParameters.freeze();
+ }
+
+ /** Returns the frozen default parameters */
+ public static FieldMatchMetricsParameters defaultParameters() {
+ return defaultParameters;
+ }
+
+ /** Creates an unfrozen marcg metrics object initialized to the default values */
+ public FieldMatchMetricsParameters() { }
+
+ /** Sets the maximum allowed gap within a segment. Default: 10 */
+ public void setProximityLimit(int proximityLimit) {
+ ensureNotFrozen();
+ this.proximityLimit=proximityLimit;
+ }
+
+ /** Returns the maximum allowed gap within a segment. Default: 10 */
+ public int getProximityLimit() { return proximityLimit; }
+
+ /**
+ * Sets the proximity table deciding the importance of separations of various distances,
+ * The table must have size proximityLimit*2+1, where the first half is for reverse direction
+ * distances. The table must only contain values between 0 and 1, where 1 is "perfect" and 0 is "worst".
+ */
+ public void setProximityTable(float[] proximityTable) {
+ ensureNotFrozen();
+ this.proximityTable=proximityTable;
+ }
+
+ /**
+ * Returns the current proxmity table.
+ * The default table is calculated by
+ * <code>1/2^(n/2)</code> on the right order side, and
+ * <code>1/2^(n/2) /3</code> on the reverse order side
+ * where n is the distance between the tokens.
+ */
+ public float[] getProximityTable() { return proximityTable; }
+
+ /** Returns the proximity table value at an index */
+ public float getProximity(int index) { return proximityTable[index]; }
+
+ /**
+ * Returns the maximal number of <i>alternative</i> segmentations allowed in addition to the first one found.
+ * Default is 10000. This will prefer to not consider iterations on segments that are far out in the field,
+ * and which starts late in the query.
+ */
+ public int getMaxAlternativeSegmentations() { return maxAlternativeSegmentations; }
+
+ public void setMaxAlternativeSegmentations(int maxAlternativeSegmentations) {
+ ensureNotFrozen();
+ this.maxAlternativeSegmentations = maxAlternativeSegmentations;
+ }
+
+ /**
+ * Returns the number of occurrences the number of occurrences of each word is normalized against.
+ * This should be set as the number above which additional occurrences of the term has no real significance.
+ * The default is 100.
+ */
+ public int getMaxOccurrences() { return maxOccurrences; }
+
+ public void setMaxOccurrences(int maxOccurrences) { this.maxOccurrences=maxOccurrences; }
+
+ /**
+ * Returns a number between 0 and 1 which determines the importancy of field completeness in relation to
+ * query completeness in the <code>match</code> and <code>completeness</code> metrics. Default is 0.05
+ */
+ public float getFieldCompletenessImportance() { return fieldCompletenessImportance; }
+
+ public void setFieldCompletenessImportance(float fieldCompletenessImportance) {
+ ensureNotFrozen();
+ this.fieldCompletenessImportance = fieldCompletenessImportance;
+ }
+
+ /**
+ * Returns the importance of the match having high proximity and being complete, relative to segmentProximityImportance,
+ * occurrenceImportance and earlinessImportance in the <code>match</code> metric. Default: 0.9
+ */
+ public float getProximityCompletenessImportance() { return proximityCompletenessImportance; }
+
+ public void setProximityCompletenessImportance(float proximityCompletenessImportance) {
+ ensureNotFrozen();
+ this.proximityCompletenessImportance = proximityCompletenessImportance;
+ }
+
+ /**
+ * Returns the importance of the match occuring early in the query, relative to segmentProximityImportance,
+ * occurrenceImportance and proximityCompletenessImportance in the <code>match</code> metric. Default: 0.05
+ */
+ public float getEarlinessImportance() { return earlinessImportance; }
+
+ public void setEarlinessImportance(float earlinessImportance) {
+ ensureNotFrozen();
+ this.earlinessImportance = earlinessImportance;
+ }
+
+ /**
+ * Returns the importance of multiple segments being close to each other, relative to earlinessImportance,
+ * occurrenceImportance and proximityCompletenessImportance in the <code>match</code> metric. Default: 0.05
+ */
+ public float getSegmentProximityImportance() { return segmentProximityImportance; }
+
+ public void setSegmentProximityImportance(float segmentProximityImportance) {
+ ensureNotFrozen();
+ this.segmentProximityImportance = segmentProximityImportance;
+ }
+
+ /**
+ * Returns the importance of having many occurrences of the query terms, relative to earlinessImportance,
+ * segmentProximityImportance and proximityCompletenessImportance in the <code>match</code> metric. Default: 0.05
+ */
+ public float getOccurrenceImportance() { return occurrenceImportance; }
+
+ public void setOccurrenceImportance(float occurrenceImportance) {
+ ensureNotFrozen();
+ this.occurrenceImportance = occurrenceImportance;
+ }
+
+ /** Returns the normalized importance of relatedness used in the <code>match</code> metric. Default: 0.9 */
+ public float getRelatednessImportance() { return relatednessImportance; }
+
+ public void setRelatednessImportance(float relatednessImportance) {
+ ensureNotFrozen();
+ this.relatednessImportance = relatednessImportance;
+ }
+
+
+ /** Throws IllegalStateException if this is frozen. Does nothing otherwise */
+ private void ensureNotFrozen() {
+ if (frozen)
+ throw new IllegalStateException(this + " is frozen");
+ }
+
+ /**
+ * Freezes this object. All changes after this point will cause an IllegalStateException.
+ * This must be frozen before being handed to a calculator.
+ *
+ * @throws IllegalStateException if this parameter object is inconsistent. In this case, this is not frozen.
+ */
+ public void freeze() {
+ if (proximityTable.length!=proximityLimit*2+1)
+ throw new IllegalStateException("Proximity table length is " + proximityTable.length + ". Must be " +
+ (proximityLimit*2+1) +
+ " (proximityLimit*2+1), because the proximity limit is " + proximityLimit);
+ frozen=true;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Main.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Main.java
new file mode 100644
index 00000000000..f101448a3dd
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Main.java
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features.fieldmatch;
+
+/**
+ * Helper for computing metrics from the command line.
+ */
+public class Main {
+
+ public static void main(String[] args) {
+ FieldMatchMetricsComputer c=new FieldMatchMetricsComputer();
+ String query=getQuery(args);
+ String field=getField(args);
+ if (query==null || field==null) {
+ printUsage();
+ return;
+ }
+
+ FieldMatchMetrics metrics = c.compute(query,field);
+ System.out.println(metrics.toStringDump());
+ }
+
+ private static String getQuery(String[] args) {
+ if (args.length<1) return null;
+ if (args[0].equals("-h") || args[0].equals("-help")) return null;
+ return args[0];
+ }
+
+ private static String getField(String[] args) {
+ if (args.length<2) return null;
+ return args[1];
+ }
+
+ private static void printUsage() {
+ System.out.println("Computes the string segment match metrics of a query and field.");
+ System.out.println("Usage: java -jar searchlib.jar query field");
+ System.out.println("By: Jon Bratseth (bratseth@yahoo-inc.com)");
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Query.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Query.java
new file mode 100644
index 00000000000..6cd9d651a09
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Query.java
@@ -0,0 +1,72 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features.fieldmatch;
+
+import com.yahoo.searchlib.ranking.features.fieldmatch.QueryTerm;
+
+import java.util.Arrays;
+
+/**
+ * A query: An array of the QueryTerms which searches the field we are calculating for,
+ * <p>
+ * In addition the sum of the term weights of <i>all</i> the query terms can be set
+ * explicitly. This allows us to model the matchWeight rank feature of a field as dependent of
+ * the weights of all the terms in the query.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class Query {
+
+ private QueryTerm[] terms;
+
+ private int totalTermWeight=0;
+
+ private float totalSignificance=0;
+
+ public Query(String query) {
+ this(splitQuery(query));
+ }
+
+ /** Creates a query with a list of query terms. The query terms are not, and must not be subsequently modified */
+ public Query(QueryTerm[] terms) {
+ this.terms=terms;
+
+ for (QueryTerm term : terms) {
+ totalTermWeight+=term.getWeight();
+ totalSignificance+=term.getSignificance();
+ }
+ }
+
+ private static QueryTerm[] splitQuery(String queryString) {
+ String[] queryTerms=queryString.split(" ");
+ QueryTerm[] query=new QueryTerm[queryTerms.length];
+ for (int i=0; i<query.length; i++)
+ query[i]=new QueryTerm(queryTerms[i]);
+ return query;
+ }
+
+ /** Returns the query terms we are calculating features of */
+ public QueryTerm[] getTerms() { return terms; }
+
+ /**
+ * Returns the total term weight for this query.
+ * This is the sum of the weights of the terms if not set explicitly, or if set explicitly a higher
+ * number which also models a query which also has terms going to other indexes.
+ */
+ public int getTotalTermWeight() { return totalTermWeight; }
+
+ public void setTotalTermWeight(int totalTermWeight) { this.totalTermWeight=totalTermWeight; }
+
+ /**
+ * Returns the total term significance for this query.
+ * This is the sum of the significance of the terms if not set explicitly, or if set explicitly a higher
+ * number which also models a query which also has terms going to other indexes.
+ */
+ public float getTotalSignificance() { return totalSignificance; }
+
+ public void setTotalSignificance(float totalSignificance) { this.totalSignificance=totalSignificance; }
+
+ public String toString() {
+ return "query: " + Arrays.toString(terms);
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/QueryTerm.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/QueryTerm.java
new file mode 100644
index 00000000000..803aaf52964
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/QueryTerm.java
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features.fieldmatch;
+
+/**
+ * A query term. Query terms are equal if they have the same term string.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public final class QueryTerm {
+
+ private String term;
+
+ private float connectedness = 0.1f;
+
+ private int weight = 100;
+
+ private float significance = 0.1f;
+
+ private float exactness = 1.0f;
+
+ public QueryTerm(String term) {
+ this.term=term;
+ }
+
+ public QueryTerm(String term,float connectedness) {
+ this.term=term;
+ this.connectedness=connectedness;
+ }
+
+ public void setTerm(String term) { this.term=term; }
+
+ public String getTerm() { return term; }
+
+ /**
+ * Returns how connected this term is to the previous term in the query.
+ * Default: 0.1. This is always a number between 0 (not connected at all) and 1 (virtually inseparable)
+ */
+ public float getConnectedness() { return connectedness; }
+
+ public void setConnectedness(float connectedness) { this.connectedness=connectedness; }
+
+ public void setWeight(int weight) { this.weight=weight; }
+
+ public int getWeight() { return weight; }
+
+ /** The significance of this term: 1-term frequency */
+ public void setSignificance(float significance) { this.significance=significance; }
+
+ public float getSignificance() { return significance; }
+
+ /** The degree to which this is exactly the term the user specified (1), or a stemmed form (closer to 0) */
+ public float getExactness() { return exactness; }
+
+ public @Override int hashCode() { return term.hashCode(); }
+
+ public @Override boolean equals(Object object) {
+ if (! (object instanceof QueryTerm)) return false;
+
+ return this.term.equals(((QueryTerm)object).term);
+ }
+
+ public @Override String toString() {
+ if (connectedness==0.1f) return term;
+ return connectedness + ":" + term;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/SegmentStartPoint.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/SegmentStartPoint.java
new file mode 100644
index 00000000000..9f6e81a04bc
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/SegmentStartPoint.java
@@ -0,0 +1,145 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features.fieldmatch;
+
+/**
+ * <p>Information on segment start points stored temporarily during string match metric calculation.</p>
+ *
+ * <p>Given that we want to start a segment at i, this holdes the best known metrics up to i
+ * and the end of the previous segment. In addition it holds information on how far we have tried
+ * to look for alternative segments from this starting point (skipI and previousJ).</p>
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+final class SegmentStartPoint {
+
+ private FieldMatchMetricsComputer owner;
+
+ /** The i for which this is the possible segment starting points */
+ private int i;
+
+ private int skipI;
+
+ /** The best known metrics up to this starting point */
+ private FieldMatchMetrics metrics;
+
+ /** The j ending the previous segmentation producing those best metrics */
+ private int previousJ;
+
+ /** The semantic distance from the current previousJ which is already explored */
+ private int semanticDistanceExplored=0;
+
+ /** There are possibly more j's to try at this starting point */
+ boolean open=true;
+
+ /** Creates a segment start point for the first segment */
+ public SegmentStartPoint(FieldMatchMetrics metrics, FieldMatchMetricsComputer owner) {
+ this.i=0;
+ this.previousJ=0;
+ this.metrics=metrics;
+ this.owner=owner;
+ this.semanticDistanceExplored=0;
+ }
+
+ /** Creates a segment start point for any i position where the j is not known */
+ public SegmentStartPoint(int i,int previousJ,FieldMatchMetrics metrics, FieldMatchMetricsComputer owner) {
+ this.i=i;
+ this.previousJ=previousJ;
+ this.metrics=metrics;
+ this.owner=owner;
+ this.semanticDistanceExplored=0;
+ }
+
+ /** Creates a segment start point for any position, where the j of the start point is known */
+ public SegmentStartPoint(int i,int j,int previousJ,FieldMatchMetrics metrics, FieldMatchMetricsComputer owner) {
+ this.i=i;
+ this.previousJ=previousJ;
+ this.metrics=metrics;
+ this.owner=owner;
+ this.semanticDistanceExplored=owner.fieldIndexToSemanticDistance(j,previousJ)+1;
+ }
+
+ /** Returns the current best metrics for this starting point */
+ public FieldMatchMetrics getMetrics() { return metrics; }
+
+ /**
+ * Stores that we have explored to a certain j from the current previousJ.
+ */
+ public void exploredTo(int j) {
+ semanticDistanceExplored=owner.fieldIndexToSemanticDistance(j,previousJ)+1;
+ }
+
+ /**
+ * Offers an alternative history leading up to this point, which is accepted and stored if it is
+ * better than the current history
+ */
+ public void offerHistory(int offeredPreviousJ,FieldMatchMetrics offeredMetrics,boolean collectTrace) {
+ if (offeredMetrics.getSegmentationScore()<=metrics.getSegmentationScore()) {
+ if (collectTrace)
+ offeredMetrics.trace().add(" rejected offered history [match: " + offeredMetrics.getSegmentationScore() +
+ " ending at:" + previousJ + "] at " + this + "\n");
+ return; // Reject
+ }
+
+ /*
+ if (previousJ!=offeredPreviousJ) { // Starting over like this achieves higher correctness if
+ semanticDistanceExplored=0; // the match metric is dependent on relative distance between segments
+ open=true; // but is more expensive
+ }
+ */
+
+ if (collectTrace)
+ offeredMetrics.trace().add(" accepted offered history [match: " + offeredMetrics.getSegmentationScore() +
+ " ending at:" + previousJ + "] at " + this + "\n");
+
+ previousJ=offeredPreviousJ;
+ metrics=offeredMetrics;
+ }
+
+ /**
+ * Returns whether there are possibly still unexplored j's for this i
+ */
+ public boolean isOpen() { return open; }
+
+ public void setOpen(boolean open) { this.open=open; }
+
+ /** Returns the i for which this is the possible segment starting points */
+ public int getI() { return i; }
+
+ /**
+ * Returns the j ending the previous segmentation producing those best metrics,
+ */
+ public int getPreviousJ() { return previousJ; }
+
+ /**
+ * Returns the semantic distance from the previous j which is explored so far, exclusive
+ * (meaning, if the value is 0, 0 is <i>not</i> explored yet)
+ */
+ public int getSemanticDistanceExplored() { return semanticDistanceExplored; }
+
+ public void setSemanticDistanceExplored(int distance) { this.semanticDistanceExplored=distance; }
+
+ /**
+ * Returns the position startI we should start at from this start point i.
+ * startI==i except when there are i's from this starting point which are not found anywhere in
+ * the field. In that case, startI==i+the number of terms following i which are known not to be present
+ */
+ public int getStartI() {
+ return i+skipI;
+ }
+
+ /**
+ * Increments the startI by one because we have discovered that the term at the current startI is not
+ * present in the field
+ */
+ public void incrementStartI() { skipI++; }
+
+ public String toString() {
+ if (i==owner.getQuery().getTerms().length)
+ return "last segment: Complete match: " + metrics.getMatch() + " previous j: " + previousJ +
+ " (" + (open ? "open" : "closed") + ")";
+ return "segment at " + i + " (" + owner.getQuery().getTerms()[i] + "): Match up to here: " + metrics.getMatch() + " previous j: " +
+ previousJ + " explored to: " + semanticDistanceExplored +
+ " (" + (open ? "open" : "closed") + ")";
+ }
+
+} \ No newline at end of file
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Trace.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Trace.java
new file mode 100644
index 00000000000..775c7d1d687
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Trace.java
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features.fieldmatch;
+
+/**
+ * A computation trace
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class Trace {
+
+ private StringBuilder b = new StringBuilder();
+
+ public void add(String s) {
+ b.append(b);
+ }
+
+ @Override
+ public String toString() {
+ return b.toString();
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/package-info.java
new file mode 100644
index 00000000000..c16fbb4521e
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/package-info.java
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * Reference implementation of the
+ * string segment match algorithm
+ * which creates the fieldMatch feature set.
+ */
+@ExportPackage
+@PublicApi
+package com.yahoo.searchlib.ranking.features.fieldmatch;
+
+import com.yahoo.api.annotations.PublicApi;
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/package-info.java
new file mode 100644
index 00000000000..028bf3337f0
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/package-info.java
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * Java implementations for various Vespa rank features
+ */
+@ExportPackage
+@PublicApi
+package com.yahoo.searchlib.ranking.features;
+
+import com.yahoo.api.annotations.PublicApi;
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/ExpressionFunction.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/ExpressionFunction.java
new file mode 100755
index 00000000000..86ac53a1e44
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/ExpressionFunction.java
@@ -0,0 +1,139 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression;
+
+import com.google.common.collect.ImmutableList;
+import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode;
+import com.yahoo.searchlib.rankingexpression.rule.SerializationContext;
+import com.yahoo.text.Utf8;
+
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.*;
+
+/**
+ * <p>A function defined by a ranking expression</p>
+ *
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ * @author bratseth
+ */
+public class ExpressionFunction {
+
+ private final String name;
+ private final ImmutableList<String> arguments;
+ private final RankingExpression body;
+
+ /**
+ * <p>Constructs a new function</p>
+ *
+ * @param name the name of this function
+ * @param arguments its argument names
+ * @param body the ranking expression that defines this function
+ */
+ public ExpressionFunction(String name, List<String> arguments, RankingExpression body) {
+ this.name = name;
+ this.arguments = arguments==null ? ImmutableList.of() : ImmutableList.copyOf(arguments);
+ this.body = body;
+ }
+
+ public String getName() { return name; }
+
+ /** Returns an immutable list of the arguments of this */
+ public List<String> arguments() { return arguments; }
+
+ public RankingExpression getBody() { return body; }
+
+ /**
+ * <p>Create and return an instance of this function based on the given
+ * arguments. If function calls are nested, this call might produce
+ * additional scripts.</p>
+ *
+ * @param context the context used to expand this
+ * @param arguments the arguments to instantiate on.
+ * @param path the expansion path leading to this.
+ * @return the script function instance created.
+ */
+ public Instance expand(SerializationContext context, List<ExpressionNode> arguments, Deque<String> path) {
+ Map<String, String> argumentBindings = new HashMap<>();
+ for (int i = 0; i < this.arguments.size() && i < arguments.size(); ++i) {
+ argumentBindings.put(this.arguments.get(i), arguments.get(i).toString(context, path, null));
+ }
+ return new Instance(toSymbol(argumentBindings), body.getRoot().toString(context.createBinding(argumentBindings), path, null));
+ }
+
+ /**
+ * Returns a symbolic string that represents this function with a given
+ * list of arguments. The arguments are mangled by hashing the string
+ * representation of the argument expressions, so we might need to revisit
+ * this if we start seeing collisions.
+ *
+ * @param argumentBindings the bound arguments to include in the symbolic name.
+ * @return the symbolic name for an instance of this function
+ */
+ private String toSymbol(Map<String, String> argumentBindings) {
+ if (argumentBindings.isEmpty()) return name;
+
+ StringBuilder ret = new StringBuilder();
+ ret.append(name).append("@");
+ for (Map.Entry<String,String> argumentBinding : argumentBindings.entrySet()) {
+ ret.append(Long.toHexString(symbolCode(argumentBinding.getKey() + "=" + argumentBinding.getValue())));
+ ret.append(".");
+ }
+ if (ret.toString().endsWith("."))
+ ret.setLength(ret.length()-1);
+ return ret.toString();
+ }
+
+
+ /**
+ * <p>Returns a more unique hash code than what Java's own {@link
+ * String#hashCode()} method would produce.</p>
+ *
+ * @param str The string to hash.
+ * @return A 64 bit long hash code.
+ */
+ private static long symbolCode(String str) {
+ try {
+ MessageDigest md = java.security.MessageDigest.getInstance("SHA-1");
+ byte[] buf = md.digest(Utf8.toBytes(str));
+ if (buf.length >= 8) {
+ long ret = 0;
+ for (int i = 0; i < 8; ++i) {
+ ret = (ret << 8) + (buf[i] & 0xff);
+ }
+ return ret;
+ }
+ } catch (NoSuchAlgorithmException e) {
+ throw new Error("java must always support SHA-1 message digest format", e);
+ }
+ return str.hashCode();
+ }
+
+ @Override
+ public String toString() {
+ return name;
+ }
+
+ /**
+ * An instance of a serialization of this function, using a particular serialization context (by {@link
+ * ExpressionFunction#expand})
+ */
+ public class Instance {
+
+ private final String name;
+ private final String expressionString;
+
+ public Instance(String name, String expressionString) {
+ this.name = name;
+ this.expressionString = expressionString;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public String getExpressionString() {
+ return expressionString;
+ }
+
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java
new file mode 100755
index 00000000000..527a908da73
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java
@@ -0,0 +1,140 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression;
+
+import com.google.common.annotations.Beta;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+import com.yahoo.searchlib.rankingexpression.parser.RankingExpressionParser;
+import com.yahoo.searchlib.rankingexpression.parser.TokenMgrError;
+import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Encapsulates the production rule 'featureList()' int the RankingExpressionParser.
+ *
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+@Beta
+public class FeatureList implements Iterable<ReferenceNode> {
+
+ private final List<ReferenceNode> features = new ArrayList<>();
+
+ /**
+ * Creates a new feature list by consuming from a reader object.
+ *
+ * @param reader The reader that contains the string to parse.
+ * @throws ParseException Thrown if the string could not be parsed.
+ */
+ public FeatureList(Reader reader) throws ParseException {
+ features.addAll(parse(reader));
+ }
+
+ /**
+ * Creates a new feature list by parsing a string.
+ *
+ * @param list The string to parse.
+ * @throws ParseException Thrown if the string could not be parsed.
+ */
+ public FeatureList(String list) throws ParseException {
+ features.addAll(parse(new StringReader(list)));
+ }
+
+ /**
+ * Creates a new feature list by reading the content of a file.
+ *
+ * @param file The file whose content to parse.
+ * @throws ParseException Thrown if the string could not be parsed.
+ * @throws FileNotFoundException Thrown if the file specified could not be found.
+ */
+ public FeatureList(File file) throws ParseException, FileNotFoundException {
+ features.addAll(parse(new FileReader(file)));
+ }
+
+ /**
+ * Parses the content of a reader object as a list of feature nodes.
+ *
+ * @param reader A reader object that contains an feature list.
+ * @return A list of those features named in the string.
+ * @throws ParseException if the string could not be parsed.
+ */
+ private static List<ReferenceNode> parse(Reader reader) throws ParseException {
+ List<ReferenceNode> lst;
+ try {
+ lst = new RankingExpressionParser(reader).featureList();
+ }
+ catch (TokenMgrError e) {
+ ParseException t = new ParseException();
+ throw (ParseException)t.initCause(e);
+ }
+ List<ReferenceNode> ret = new ArrayList<ReferenceNode>(lst.size());
+ for (Object obj : lst) {
+ if (!(obj instanceof ReferenceNode)) {
+ throw new IllegalStateException("Feature list contains a " + obj.getClass().getName() + ".");
+ }
+ ret.add((ReferenceNode)obj);
+ }
+ return ret;
+ }
+
+ /**
+ * Returns the number of features in this list.
+ *
+ * @return The size.
+ */
+ public int size() {
+ return features.size();
+ }
+
+ /**
+ * Returns the feature at the given index.
+ *
+ * @param i The index of the feature to return.
+ * @return The featuer at the given index.
+ */
+ public ReferenceNode get(int i) {
+ return features.get(i);
+ }
+
+ @Override
+ public int hashCode() {
+ int ret = 0;
+ for (ReferenceNode node : features) {
+ ret += node.hashCode() * 17;
+ }
+ return ret;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof FeatureList)) {
+ return false;
+ }
+ FeatureList lst = (FeatureList)obj;
+ if (features.size() != lst.features.size()) {
+ return false;
+ }
+ for (int i = 0; i < features.size(); ++i) {
+ if (!features.get(i).equals(lst.features.get(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder ret = new StringBuilder();
+ for (ReferenceNode node : this) {
+ ret.append(node).append(" ");
+ }
+ return ret.toString();
+ }
+
+ @Override
+ public Iterator<ReferenceNode> iterator() {
+ return features.iterator();
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/RankingExpression.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/RankingExpression.java
new file mode 100755
index 00000000000..e17d524e906
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/RankingExpression.java
@@ -0,0 +1,250 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+import com.yahoo.searchlib.rankingexpression.parser.RankingExpressionParser;
+import com.yahoo.searchlib.rankingexpression.parser.TokenMgrError;
+import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode;
+import com.yahoo.searchlib.rankingexpression.rule.SerializationContext;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * <p>A ranking expression. Ranking expressions are used to calculate a rank score for a searched instance from a set of
+ * <i>rank features</i>.</p>
+ *
+ * <p>A ranking expression wraps a expression node tree and may also optionally have a name.</p>
+ *
+ * <p>The identity of a ranking expression is decided by both its name and expression tree. Two expressions which
+ * looks the same in string form are the same.</p>
+ *
+ * <h3>Simple usage</h3>
+<pre><code>
+try {
+ MapContext context=new MapContext();
+ context.put("one",1d);
+ RankingExpression expression=new RankingExpression("10*if(i&gt;35,if(i&gt;one,if(i&gt;=670,4,8),if(i&gt;8000,5,3)),if(i==478,90,91))");
+ double result=expression.evaluate(context);
+ }
+catch (ParseException e) {
+ throw new RuntimeException(e);
+}
+</code></pre>
+ *
+ * <h3>Or, usage optimized for repeated evaluation of the same expression</h3>
+<pre><code>
+// Members in a class living across multiple evaluations
+RankingExpression expression;
+ArrayContext contextPrototype;
+
+...
+
+// Initialization of the above members (once)
+// Create reusable, gbdt optimized expression and context.
+// The expression is multithread-safe while the context created is not
+try {
+ RankingExpression expression=new RankingExpression("10*if(i&gt;35,if(i&gt;one,if(i&gt;=670,4,8),if(i&gt;8000,5,3)),if(i==478,90,91))");
+ ArrayContext contextPrototype=new ArrayContext(expression);
+ ExpressionOptimizer optimizer=new ExpressionOptimizer(); // Increases evaluation speed of gbdt form expressions by 3-4x
+ OptimizationReport triviaAboutTheOptimization=optimizer.optimize(expression,contextPrototype);
+}
+catch (ParseException e) {
+ throw new RuntimeException(e);
+}
+
+...
+
+// Execution (many)
+context=contextPrototype.clone(); // If evaluation is multithreaded - skip this if execution is single-threaded
+context.put("one",1d);
+double result=expression.evaluate(context);
+</code></pre>
+ *
+ * @author Simon Thoresen
+ * @author bratseth
+ */
+public class RankingExpression implements Serializable {
+
+ private String name = "";
+ private ExpressionNode root;
+
+ /**
+ * Creates a new ranking expression by consuming from the reader
+ *
+ * @param reader the reader that contains the string to parse.
+ * @throws ParseException if the string could not be parsed.
+ */
+ public RankingExpression(Reader reader) throws ParseException {
+ root = parse(reader);
+ }
+
+ /**
+ * Creates a ranking expression from a string
+ *
+ * @param expression The reader that contains the string to parse.
+ * @throws ParseException if the string could not be parsed.
+ */
+ public RankingExpression(String expression) throws ParseException {
+ try {
+ if (expression == null || expression.length() == 0) {
+ throw new IllegalArgumentException("Empty ranking expressions are not allowed");
+ }
+ root = parse(new StringReader(expression));
+ }
+ catch (ParseException e) {
+ ParseException p = new ParseException("Could not parse '" + expression + "'");
+ p.initCause(e);
+ throw p;
+ }
+ }
+
+ /**
+ * Creates a ranking expression from a file. For convenience, the file.getName() up to any dot becomes the name of
+ * this expression.
+ *
+ * @param file the name of the file whose content to parse.
+ * @throws ParseException if the string could not be parsed.
+ * @throws IllegalArgumentException if the file could not be found
+ */
+ public RankingExpression(File file) throws ParseException {
+ try {
+ name = file.getName().split("\\.")[0];
+ root = parse(new FileReader(file));
+ }
+ catch (FileNotFoundException e) {
+ throw new IllegalArgumentException("Could not create a ranking expression", e);
+ }
+ }
+
+ /**
+ * Creates a named ranking expression from an expression root node.
+ */
+ public RankingExpression(String name, ExpressionNode root) {
+ this.name = name;
+ this.root = root;
+ }
+
+ /**
+ * Creates a ranking expression from an expression root node.
+ *
+ * @param root The root node.
+ */
+ public RankingExpression(ExpressionNode root) {
+ this.root = root;
+ }
+
+ /**
+ * Parses the content of the reader object as an expression string.
+ *
+ * @param reader A reader object that contains an expression string.
+ * @return An expression node that corresponds to the given string.
+ * @throws ParseException if the string could not be parsed.
+ */
+ private static ExpressionNode parse(Reader reader) throws ParseException {
+ try {
+ return new RankingExpressionParser(reader).rankingExpression();
+ }
+ catch (TokenMgrError e) {
+ throw new ParseException(e.getMessage());
+ }
+ }
+
+ /**
+ * Returns the name of this ranking expression, or "" if no name is set.
+ *
+ * @return The name of this expression.
+ */
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * Sets the name of this ranking expression.
+ *
+ * @param name The name to set.
+ */
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ /**
+ * Returns the root of the expression tree of this expression.
+ *
+ * @return The root node.
+ */
+ public ExpressionNode getRoot() {
+ return root;
+ }
+
+ /**
+ * Sets the root of the expression tree of this expression.
+ *
+ * @param root The root node to set.
+ */
+ public void setRoot(ExpressionNode root) {
+ this.root = root;
+ }
+
+ @Override
+ public int hashCode() {
+ return toString().hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return obj instanceof RankingExpression && toString().equals(obj.toString());
+ }
+
+ @Override
+ public String toString() {
+ if ("".equals(name)) {
+ return root.toString();
+ } else {
+ return name + ": " + root.toString();
+ }
+ }
+
+ /**
+ * Creates the necessary rank properties required to implement this expression.
+ *
+ * @param macros the expression macros to expand.
+ * @return a list of named rank properties required to implement this expression.
+ */
+ public Map<String, String> getRankProperties(List<ExpressionFunction> macros) {
+ Map<String, ExpressionFunction> arg = new HashMap<>();
+ for (ExpressionFunction function : macros) {
+ arg.put(function.getName(), function);
+ }
+ Deque<String> path = new LinkedList<>();
+ SerializationContext context = new SerializationContext(macros);
+ String serializedRoot = root.toString(context, path, null);
+ Map<String, String> serializedExpressions = context.serializedFunctions();
+ serializedExpressions.put(propertyName(name), serializedRoot);
+ return serializedExpressions;
+ }
+
+ /**
+ * Returns the rank-property name for a given expression name.
+ *
+ * @param expressionName The expression name to mangle.
+ * @return The property name.
+ */
+ public static String propertyName(String expressionName) {
+ return "rankingExpression(" + expressionName + ").rankingScript";
+ }
+
+ /**
+ * Returns the value of evaluating this expression over the given context.
+ *
+ * @param context The variable bindings to use for this evaluation.
+ * @return The evaluation result.
+ * @throws IllegalArgumentException if there are variables which are not bound in the given map
+ */
+ public Value evaluate(Context context) {
+ return root.evaluate(context);
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/AbstractArrayContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/AbstractArrayContext.java
new file mode 100644
index 00000000000..f4d21fd634b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/AbstractArrayContext.java
@@ -0,0 +1,131 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.google.common.collect.ImmutableMap;
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.rule.CompositeNode;
+import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode;
+import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Superclass of contexts which supports array index based lookup.
+ * Instances may be reused indefinitely for evaluations of a single
+ * ranking expression, in a single thread at the time.
+ *
+ * @author bratseth
+ */
+public abstract class AbstractArrayContext extends Context implements Cloneable {
+
+ private final boolean ignoreUnknownValues;
+
+ /** The mapping from variable name to index */
+ private final ImmutableMap<String, Integer> nameToIndex;
+
+ /** The current values set, pre-converted to doubles */
+ private double[] doubleValues;
+
+ /** The name of the ranking expression this was created for */
+ private final String rankingExpressionName;
+
+ /**
+ * Create a fast lookup context for an expression.
+ * This instance should be reused indefinitely by a single thread.
+ * This will fail if unknown values are attempted added.
+ */
+ protected AbstractArrayContext(RankingExpression expression) {
+ this(expression, false);
+ }
+
+ /**
+ * Create a fast lookup context for an expression.
+ * This instance should be reused indefinitely by a single thread.
+ *
+ * @param expression the expression to create a context for
+ * @param ignoreUnknownValues whether attempts to put values not present in this expression
+ * should fail (false - the default), or be ignored (true)
+ */
+ protected AbstractArrayContext(RankingExpression expression, boolean ignoreUnknownValues) {
+ this.ignoreUnknownValues = ignoreUnknownValues;
+ this.rankingExpressionName = expression.getName();
+ Set<String> variables = new LinkedHashSet<>();
+ extractVariables(expression.getRoot(),variables);
+
+ doubleValues = new double[variables.size()];
+
+ int i = 0;
+ ImmutableMap.Builder<String, Integer> nameToIndexBuilder = new ImmutableMap.Builder<>();
+ for (String variable : variables)
+ nameToIndexBuilder.put(variable,i++);
+ nameToIndex = nameToIndexBuilder.build();
+ }
+
+ private void extractVariables(ExpressionNode node,Set<String> variables) {
+ if (node instanceof ReferenceNode) {
+ ReferenceNode fNode=(ReferenceNode)node;
+ if (fNode.getArguments().expressions().size()>0)
+ throw new UnsupportedOperationException("Array lookup is not supported with features having arguments)");
+ variables.add(fNode.toString());
+ }
+ else if (node instanceof CompositeNode) {
+ CompositeNode cNode=(CompositeNode)node;
+ for (ExpressionNode child : cNode.children())
+ extractVariables(child,variables);
+ }
+ }
+
+ protected final Map<String, Integer> nameToIndex() { return nameToIndex; }
+ protected final double[] doubleValues() { return doubleValues; }
+ protected final boolean ignoreUnknownValues() { return ignoreUnknownValues; }
+
+ /**
+ * Creates a clone of this context suitable for evaluating against the same ranking expression
+ * in a different thread (i.e, name name to index map, different value set.
+ */
+ public AbstractArrayContext clone() {
+ try {
+ AbstractArrayContext clone=(AbstractArrayContext)super.clone();
+ clone.doubleValues=new double[nameToIndex.size()];
+ return clone;
+ }
+ catch (CloneNotSupportedException e) {
+ throw new RuntimeException("Programming error");
+ }
+ }
+
+ public Set<String> names() {
+ return nameToIndex.keySet();
+ }
+
+ /**
+ * Returns the index from a name.
+ *
+ * @throws NullPointerException is this name is not known to this context
+ */
+ public final int getIndex(String name) {
+ return nameToIndex.get(name);
+ }
+
+ /** Returns the max number of variables which may be set in this */
+ public int size() {
+ return doubleValues.length;
+ }
+
+ /** Perform a fast lookup directly of the value as a double. This is faster than get(index).asDouble() */
+ @Override
+ public double getDouble(int index) {
+ return doubleValues[index];
+ }
+
+ @Override
+ public String toString() {
+ return "fast lookup context for ranking expression '" + rankingExpressionName +
+ "' [" + doubleValues.length + " variables]";
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ArrayContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ArrayContext.java
new file mode 100644
index 00000000000..b9ff630198e
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ArrayContext.java
@@ -0,0 +1,120 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+
+import java.util.Arrays;
+
+/**
+ * Creates a context which supports array index based lookup.
+ * This instance may be reused indefinitely for evaluations of a single
+ * ranking expression, in a single thread at the time.
+ *
+ * @author bratseth
+ */
+public class ArrayContext extends AbstractArrayContext implements Cloneable {
+
+ /** The current values set */
+ private Value[] values;
+
+ private static DoubleValue constantZero = DoubleValue.frozen(0);
+
+ /**
+ * Create a fast lookup context for an expression.
+ * This instance should be reused indefinitely by a single thread.
+ * This will fail if unknown values are attempted added.
+ */
+ public ArrayContext(RankingExpression expression) {
+ this(expression, false);
+ }
+
+ /**
+ * Create a fast lookup context for an expression.
+ * This instance should be reused indefinitely by a single thread.
+ *
+ * @param expression the expression to create a context for
+ * @param ignoreUnknownValues whether attempts to put values not present in this expression
+ * should fail (false - the default), or be ignored (true)
+ */
+ public ArrayContext(RankingExpression expression, boolean ignoreUnknownValues) {
+ super(expression, ignoreUnknownValues);
+ values = new Value[doubleValues().length];
+ Arrays.fill(values, DoubleValue.zero);
+ }
+
+ /**
+ * Puts a value by name.
+ * The value will be frozen if it isn't already.
+ *
+ * @throws IllegalArgumentException if the name is not present in the ranking expression this was created with, and
+ * ignoredUnknownValues is false
+ * @since 5.1.5
+ */
+ @Override
+ public final void put(String name, Value value) {
+ Integer index = nameToIndex().get(name);
+ if (index==null) {
+ if (ignoreUnknownValues())
+ return;
+ else
+ throw new IllegalArgumentException("Value '" + name + "' is not known to " + this);
+ }
+ put(index, value);
+ }
+
+ /** Same as put(index,DoubleValue.frozen(value)) */
+ public final void put(int index, double value) {
+ put(index, DoubleValue.frozen(value));
+ }
+
+ /**
+ * Puts a value by index.
+ * The value will be frozen if it isn't already.
+ *
+ * @since 5.1.5
+ */
+ public final void put(int index, Value value) {
+ values[index]=value.freeze();
+ try {
+ doubleValues()[index]=value.asDouble();
+ }
+ catch (UnsupportedOperationException e) {
+ doubleValues()[index]=Double.NaN; // see getDouble below
+ }
+ }
+
+ /** Perform a slow lookup by name */
+ @Override
+ public Value get(String name) {
+ Integer index=nameToIndex().get(name);
+ if (index==null) return DoubleValue.zero;
+ return values[index];
+ }
+
+ /** Perform a fast lookup by index */
+ @Override
+ public final Value get(int index) {
+ return values[index];
+ }
+
+ /** Perform a fast lookup directly of the value as a double. This is faster than get(index).asDouble() */
+ @Override
+ public final double getDouble(int index) {
+ double value=doubleValues()[index];
+ if (value==Double.NaN)
+ throw new UnsupportedOperationException("Value at " + index + " has no double representation");
+ return value;
+ }
+
+ /**
+ * Creates a clone of this context suitable for evaluating against the same ranking expression
+ * in a different thread (i.e, name name to index map, different value set.
+ */
+ public ArrayContext clone() {
+ ArrayContext clone=(ArrayContext)super.clone();
+ clone.values = new Value[nameToIndex().size()];
+ Arrays.fill(values,constantZero);
+ return clone;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/BooleanValue.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/BooleanValue.java
new file mode 100644
index 00000000000..8b456b9236b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/BooleanValue.java
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.searchlib.rankingexpression.rule.Function;
+import com.yahoo.searchlib.rankingexpression.rule.TruthOperator;
+
+/**
+ * A value which is either true or false.
+ * In numerical context true is interpreted as 1 and false as 0.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ * @since 5.1.21
+ */
+public class BooleanValue extends DoubleCompatibleValue {
+
+ private boolean value;
+
+ /**
+ * Create a boolean value which is frozen at the outset.
+ */
+ public static BooleanValue frozen(boolean value) {
+ BooleanValue booleanValue=new BooleanValue(value);
+ booleanValue.freeze();
+ return booleanValue;
+ }
+
+ public BooleanValue(boolean value) {
+ this.value = value;
+ }
+
+ public boolean asBoolean() { return value; };
+
+ @Override
+ public double asDouble() {
+ return value ? 1 : 0;
+ }
+
+ @Override
+ public Value asMutable() {
+ if ( ! isFrozen()) return this;
+ return new BooleanValue(value);
+ }
+
+ @Override
+ public String toString() {
+ return String.valueOf(value);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this==other) return true;
+ if ( ! (other instanceof BooleanValue)) return false;
+ return ((BooleanValue)other).value==this.value;
+ }
+
+ @Override
+ public int hashCode() {
+ return value ? 1 : 3;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Context.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Context.java
new file mode 100644
index 00000000000..0dff0414ac2
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Context.java
@@ -0,0 +1,107 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.searchlib.rankingexpression.rule.Arguments;
+
+import java.util.Set;
+
+/**
+ * <p>The context providing value bindings for an expression evaluation.</p>
+ *
+ * @author bratseth
+ */
+public abstract class Context {
+
+ /**
+ * <p>Returns the value of a simple variable name.</p>
+ *
+ * @param name The name of the variable whose value to return.
+ * @return The value of the named variable.
+ */
+ public abstract Value get(String name);
+
+ /**
+ * <p>Returns the value of a <i>structured variable</i> on the form
+ * <code>name(argument*)(.output)?</code>, where <i>argument</i> is any
+ * string. This may be used to implement more advanced variables whose
+ * values are calculated at runtime from arguments. Supporting this in a
+ * context is optional. Implementations may choose to throw
+ * UnsupportedOperationException or always return null, or to handle outputs
+ * but not arguments.</p>
+ *
+ * <p>This default implementation does the latter - if arguments is non-null
+ * and non-empty an UnsupportedOperationException is thrown, otherwise
+ * get(name + "." + output) is called (or just get(name)) if output is also
+ * null.</p>
+ *
+ * @param name The name of this variable.
+ * @param arguments The parsed arguments as given in the textual expression.
+ * @param output The name of the value to output (to enable one named
+ * calculation to output several), or null to output the
+ * "main" (or only) value.
+ */
+ public Value get(String name, Arguments arguments,String output) {
+ if (arguments!=null && arguments.expressions().size()>0)
+ throw new UnsupportedOperationException(this + " does not support structured ranking expression variables, attempted to reference '" +
+ name + arguments + "'");
+ if (output==null)
+ return get(name);
+ return get(name + "." + output);
+ }
+
+ /**
+ * <p>Lookup by index rather than name. This is supported by some optimized
+ * context subclasses. This default implementation throws
+ * UnsupportedOperationException.</p>
+ *
+ * @param index The index of the variable whose value to return.
+ * @return The value of the indexed variable.
+ */
+ public Value get(int index) {
+ throw new UnsupportedOperationException(this + " does not support variable lookup by index");
+ }
+
+ /**
+ * <p>Lookup by index rather than name directly to a double. This is supported by some optimized
+ * context subclasses. This default implementation throws
+ * UnsupportedOperationException.</p>
+ *
+ * @param index The index of the variable whose value to return.
+ * @return The value of the indexed variable.
+ */
+ public double getDouble(int index) {
+ throw new UnsupportedOperationException(this + " does not support variable lookup by index");
+ }
+
+ /**
+ * Same as put(name,DoubleValue.frozen(value))
+ */
+ public final void put(String name, double value) {
+ put(name, DoubleValue.frozen(value));
+ }
+
+ /**
+ * <p>Sets a value to this, or throws an UnsupportedOperationException if
+ * this is not supported. This default implementation does the latter.</p> *
+ *
+ * @param name The name of the variable to set.
+ * @param value the value to set. Ownership of this value is transferred to this - if it is mutable
+ * (not frozen) it may be modified during execution
+ * @since 5.1.5
+ */
+ public void put(String name, Value value) {
+ throw new UnsupportedOperationException(this + " does not support variable assignment");
+ }
+
+ /**
+ * <p>Returns all the names available in this, or throws an
+ * UnsupportedOperationException if this operation is not supported. This
+ * default implementation does the latter.</p>
+ *
+ * @return The set of all variable names.
+ */
+ public Set<String> names() {
+ throw new UnsupportedOperationException(this + " does not support return a list of its names");
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleCompatibleValue.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleCompatibleValue.java
new file mode 100644
index 00000000000..3129bfa05a3
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleCompatibleValue.java
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.searchlib.rankingexpression.rule.Function;
+import com.yahoo.searchlib.rankingexpression.rule.TruthOperator;
+
+/**
+ * A value which acts as a double in numerical context.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ * @since 5.1.21
+ */
+public abstract class DoubleCompatibleValue extends Value {
+
+ @Override
+ public boolean hasDouble() { return true; }
+
+ @Override
+ public Value negate() { return new DoubleValue(-asDouble()); }
+
+ @Override
+ public Value add(Value value) {
+ return new DoubleValue(asDouble() + value.asDouble());
+ }
+
+ @Override
+ public Value subtract(Value value) {
+ return new DoubleValue(asDouble() - value.asDouble());
+ }
+
+ @Override
+ public Value multiply(Value value) {
+ return new DoubleValue(asDouble() * value.asDouble());
+ }
+
+ @Override
+ public Value divide(Value value) {
+ return new DoubleValue(asDouble() / value.asDouble());
+ }
+
+ @Override
+ public boolean compare(TruthOperator operator, Value value) {
+ return operator.evaluate(asDouble(), value.asDouble());
+ }
+
+ @Override
+ public Value function(Function function, Value value) {
+ return new DoubleValue(function.evaluate(asDouble(),value.asDouble()));
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleOnlyArrayContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleOnlyArrayContext.java
new file mode 100644
index 00000000000..2a9a6173125
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleOnlyArrayContext.java
@@ -0,0 +1,96 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+
+/**
+ * A variant of an array context variant which supports faster binding of variables but slower lookup
+ * from non-gbdt-optimized ranking expressions.
+ *
+ * @author bratseth
+ */
+public class DoubleOnlyArrayContext extends AbstractArrayContext {
+
+ /**
+ * Create a fast lookup context for an expression.
+ * This instance should be reused indefinitely by a single thread.
+ * This will fail if unknown values are attempted added.
+ */
+ public DoubleOnlyArrayContext(RankingExpression expression) {
+ this(expression, false);
+ }
+
+ /**
+ * Create a fast lookup context for an expression.
+ * This instance should be reused indefinitely by a single thread.
+ *
+ * @param expression the expression to create a context for
+ * @param ignoreUnknownValues whether attempts to put values not present in this expression
+ * should fail (false - the default), or be ignored (true)
+ */
+ public DoubleOnlyArrayContext(RankingExpression expression, boolean ignoreUnknownValues) {
+ super(expression, ignoreUnknownValues);
+ }
+
+ /**
+ * Puts a value by name.
+ * The value will be frozen if it isn't already.
+ *
+ * @throws IllegalArgumentException if the name is not present in the ranking expression this was created with, and
+ * ignoredUnknownValues is false
+ * @since 5.1.5
+ */
+ @Override
+ public final void put(String name, Value value) {
+ Integer index = nameToIndex().get(name);
+ if (index == null) {
+ if (ignoreUnknownValues())
+ return;
+ else
+ throw new IllegalArgumentException("Value '" + name + "' is not known to " + this);
+ }
+ put(index, value);
+ }
+
+ /** Same as put(index,DoubleValue.frozen(value)) */
+ public final void put(int index, double value) {
+ doubleValues()[index] = value;
+ }
+
+ /**
+ * Puts a value by index.
+ *
+ * @since 5.1.5
+ */
+ public final void put(int index, Value value) {
+ try {
+ put(index, value.asDouble());
+ }
+ catch (UnsupportedOperationException e) {
+ throw new IllegalArgumentException("This context only supports doubles, not " + value);
+ }
+ }
+
+ /** Perform a slow lookup by name */
+ @Override
+ public Value get(String name) {
+ Integer index = nameToIndex().get(name);
+ if (index==null) return DoubleValue.zero;
+ return new DoubleValue(getDouble(index));
+ }
+
+ /** Perform a faster lookup by index */
+ @Override
+ public final Value get(int index) {
+ return new DoubleValue(getDouble(index));
+ }
+
+ /**
+ * Creates a clone of this context suitable for evaluating against the same ranking expression
+ * in a different thread (i.e, name name to index map, different value set.
+ */
+ public DoubleOnlyArrayContext clone() {
+ return (DoubleOnlyArrayContext)super.clone();
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleValue.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleValue.java
new file mode 100644
index 00000000000..1cd65c3133a
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleValue.java
@@ -0,0 +1,158 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.searchlib.rankingexpression.rule.Function;
+import com.yahoo.searchlib.rankingexpression.rule.TruthOperator;
+
+/**
+ * A double value result of a ranking expression evaluation.
+ * In a boolean context doubles are true if they are different from 0.0
+ *
+ * @author bratseth
+ * @since 5.1.5
+ */
+public final class DoubleValue extends DoubleCompatibleValue {
+
+ // A note on performance: Reusing double values like below is actually slightly slower per evaluation,
+ // but the reduced garbage cost seems to regain this plus some additional percentages
+
+ private double value;
+
+ /** The double value instance for 0 */
+ public final static DoubleValue zero=DoubleValue.frozen(0);
+
+ public DoubleValue(double value) {
+ this.value=value;
+ }
+
+ /**
+ * Create a double which is frozen at the outset.
+ */
+ public static DoubleValue frozen(double value) {
+ DoubleValue doubleValue=new DoubleValue(value);
+ doubleValue.freeze();
+ return doubleValue;
+ }
+
+ @Override
+ public double asDouble() { return value; }
+
+ @Override
+ public DoubleValue asDoubleValue() { return this; }
+
+ @Override
+ public boolean asBoolean() { return value != 0.0; }
+
+ @Override
+ public DoubleValue negate() {
+ return mutable(-value);
+ }
+
+ @Override
+ public Value add(Value value) {
+ if (value instanceof TensorValue)
+ return value.add(this);
+
+ try {
+ return mutable(this.value + value.asDouble());
+ }
+ catch (UnsupportedOperationException e) {
+ throw unsupported("add",value);
+ }
+ }
+
+ @Override
+ public Value subtract(Value value) {
+ if (value instanceof TensorValue)
+ return value.negate().add(this);
+
+ try {
+ return mutable(this.value - value.asDouble());
+ }
+ catch (UnsupportedOperationException e) {
+ throw unsupported("subtract",value);
+ }
+ }
+
+ @Override
+ public Value multiply(Value value) {
+ if (value instanceof TensorValue)
+ return value.multiply(this);
+
+ try {
+ return mutable(this.value * value.asDouble());
+ }
+ catch (UnsupportedOperationException e) {
+ throw unsupported("multiply", value);
+ }
+ }
+
+ @Override
+ public Value divide(Value value) {
+ try {
+ return mutable(this.value / value.asDouble());
+ }
+ catch (UnsupportedOperationException e) {
+ throw unsupported("divide",value);
+ }
+ }
+
+ @Override
+ public boolean compare(TruthOperator operator, Value value) {
+ try {
+ return operator.evaluate(this.value, value.asDouble());
+ }
+ catch (UnsupportedOperationException e) {
+ throw unsupported("comparison",value);
+ }
+ }
+
+ @Override
+ public Value function(Function function, Value value) {
+ // use the tensor implementation of max and min if the argument is a tensor
+ if ( (function.equals(Function.min) || function.equals(Function.max)) && value instanceof TensorValue)
+ return value.function(function, this);
+
+ try {
+ return mutable(function.evaluate(this.value, value.asDouble()));
+ }
+ catch (UnsupportedOperationException e) {
+ throw unsupported("function " + function.toString(), value);
+ }
+ }
+
+ private UnsupportedOperationException unsupported(String operation, Value value) {
+ return new UnsupportedOperationException("Cannot perform " + operation + " on " + value + " and " + this);
+ }
+
+ /** Returns this or a mutable copy assigned the given value */
+ private DoubleValue mutable(double value) {
+ DoubleValue mutable=this.asMutable();
+ mutable.value=value;
+ return mutable;
+ }
+
+ @Override
+ public DoubleValue asMutable() {
+ if ( ! isFrozen()) return this;
+ return new DoubleValue(value);
+ }
+
+ @Override
+ public String toString() {
+ return String.valueOf(value);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this==other) return true;
+ if ( ! (other instanceof DoubleValue)) return false;
+ return ((DoubleValue)other).value==this.value;
+ }
+
+ @Override
+ public int hashCode() {
+ return toString().hashCode();
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ExpressionOptimizer.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ExpressionOptimizer.java
new file mode 100644
index 00000000000..6730053e9fe
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ExpressionOptimizer.java
@@ -0,0 +1,55 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization.GBDTForestOptimizer;
+import com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization.GBDTOptimizer;
+
+/**
+ * This class will perform various optimizations on the ranking expressions. Clients using optimized expressions
+ * will do
+ *
+ * <code>
+ * // Set up once
+ * RankingExpression expression = new RankingExpression(myExpressionString);
+ * ArrayContext context = new ArrayContext(expression);
+ * new ExpressionOptimizer().optimize(expression, context);
+ *
+ * // Execute repeatedly
+ * context.put("featureName1", value1);
+ * ...
+ * expression.evaluate(context);
+ *
+ * // Note that the expression may be used by multiple threads at the same time, while the
+ * // context is single-threaded. To create a context for another tread, use the above context as a prototype,
+ * // contextForOtherThread = context.clone();
+ * </code>
+ * <p>
+ * Instances of this class are not multithread safe.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class ExpressionOptimizer {
+
+ private GBDTOptimizer gbdtOptimizer = new GBDTOptimizer();
+
+ private GBDTForestOptimizer gbdtForestOptimizer = new GBDTForestOptimizer();
+
+ /** Gets an optimizer instance used by this by class name, or null if the optimizer is not known */
+ public Optimizer getOptimizer(Class<?> clazz) {
+ if (clazz == gbdtOptimizer.getClass())
+ return gbdtOptimizer;
+ if (clazz == gbdtForestOptimizer.getClass())
+ return gbdtForestOptimizer;
+ return null;
+ }
+
+ public OptimizationReport optimize(RankingExpression expression, AbstractArrayContext arrayContext) {
+ OptimizationReport report = new OptimizationReport();
+ // Note: Order of optimizations matter
+ gbdtOptimizer.optimize(expression, arrayContext, report);
+ gbdtForestOptimizer.optimize(expression, arrayContext, report);
+ return report;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java
new file mode 100644
index 00000000000..9ee9a1f7a71
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java
@@ -0,0 +1,95 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * A context backed by a Map
+ *
+ * @author bratseth
+ */
+public class MapContext extends Context {
+
+ private Map<String,Value> bindings=new HashMap<>();
+
+ private boolean frozen = false;
+
+ public MapContext() {
+ }
+
+ /**
+ * Freezes this.
+ * Returns this for convenience.
+ */
+ public MapContext freeze() {
+ if ( ! frozen)
+ bindings = Collections.unmodifiableMap(bindings);
+ return this;
+ }
+
+ /**
+ * Creates a map context from a map.
+ * The ownership of the map is transferred to this - it cannot be further modified by the caller.
+ * All the Values of the map will be frozen.
+ *
+ * @since 5.1.5
+ */
+ public MapContext(Map<String,Value> bindings) {
+ this.bindings=bindings;
+ for (Value boundValue : bindings.values())
+ boundValue.freeze();
+ }
+
+ /**
+ * Returns the value of a key. 0 is returned if the given key is not bound in this.
+ */
+ public @Override Value get(String key) {
+ Value value=bindings.get(key);
+ if (value==null) return DoubleValue.zero;
+ return value;
+ }
+
+ /**
+ * Sets the value of a key.
+ * The value is frozen by this.
+ *
+ * @since 5.1.5
+ */
+ public @Override void put(String key,Value value) {
+ bindings.put(key,value.freeze());
+ }
+
+ /** Returns an immutable view of the bindings of this. */
+ public Map<String,Value> bindings() {
+ if (frozen) return bindings;
+ return Collections.unmodifiableMap(bindings);
+ }
+
+ /** Returns an unmodifiable map of the names of this */
+ public @Override Set<String> names() {
+ if (frozen) return bindings.keySet();
+ return Collections.unmodifiableMap(bindings).keySet();
+ }
+
+ public @Override String toString() {
+ return "a map context [" + bindings.size() + " bindings]";
+ }
+
+ /**
+ * A convenience constructor which returns a map context from a string on the form
+ * <code>name1:value1, name2:value2 ...</code>.
+ * Extra spaces are allowed anywhere. Any other deviation from the syntax causes an exception to be thrown.
+ */
+ public static MapContext fromString(String contextString) {
+ MapContext mapContext = new MapContext();
+ for (String keyValueString : contextString.split(",")) {
+ String[] strings = keyValueString.trim().split(":");
+ mapContext.put(strings[0].trim(), Value.parse(strings[1].trim()));
+ }
+ return mapContext;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/OptimizationReport.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/OptimizationReport.java
new file mode 100644
index 00000000000..340a074f179
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/OptimizationReport.java
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Reports the result of optimizations of a ranking expression.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class OptimizationReport {
+
+ private Map<String,Integer> metrics=new LinkedHashMap<String,Integer>();
+
+ private List<String> notes=new ArrayList<String>();
+
+ public void setMetric(String name,int value) {
+ metrics.put(name,value);
+ }
+
+ /** Returns the value of a metric, or null if it is not set */
+ public int getMetric(String name) {
+ return metrics.get(name);
+ }
+
+ /**
+ * Increases the metric by the given name by increment, if the metric is not previously set,
+ * this will assign it the value increment as expected
+ */
+ public void incMetric(String name,int increment) {
+ Integer currentValue=metrics.get(name);
+ if (currentValue==null)
+ currentValue=0;
+ metrics.put(name,currentValue+increment);
+ }
+
+ public void note(String note) {
+ notes.add(note);
+ }
+
+ /** Returns all the content of this report as a multiline string */
+ public String toString() {
+ StringBuilder b=new StringBuilder();
+
+ if (notes.size()>0) {
+ b.append("Optimization notes:\n");
+ List<String> displayedNotes=notes.subList(0,Math.min(5,notes.size()));
+ for (String note : displayedNotes)
+ b.append(" ").append(note).append("\n");
+ if (notes.size()>displayedNotes.size())
+ b.append(" ...\n");
+ }
+
+ b.append("Optimization metrics:\n");
+ for (Map.Entry<String,Integer> metric : metrics.entrySet())
+ b.append(" " + metric.getKey() + ": " + metric.getValue() + "\n");
+ return b.toString();
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Optimizer.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Optimizer.java
new file mode 100644
index 00000000000..337e2f84774
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Optimizer.java
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+
+/**
+ * Superclass of ranking expression optimizers
+ *
+ * @author bratseth
+ */
+public abstract class Optimizer {
+
+ private boolean enabled=true;
+
+ /** Sets whether this optimizer is enabled. Default true */
+ public void setEnabled(boolean enabled) { this.enabled=enabled; }
+
+ /** Returns whether this is enabled */
+ public boolean isEnabled() { return enabled; }
+
+ public abstract void optimize(RankingExpression expression, AbstractArrayContext context, OptimizationReport report);
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/StringValue.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/StringValue.java
new file mode 100644
index 00000000000..ff935031149
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/StringValue.java
@@ -0,0 +1,108 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.javacc.UnicodeUtilities;
+import com.yahoo.searchlib.rankingexpression.rule.Function;
+import com.yahoo.searchlib.rankingexpression.rule.TruthOperator;
+
+/**
+ * A string value.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ * @since 5.1.21
+ */
+public class StringValue extends Value {
+
+ private final String value;
+
+ /**
+ * Create a string value which is frozen at the outset.
+ */
+ public static StringValue frozen(String value) {
+ StringValue stringValue=new StringValue(value);
+ stringValue.freeze();
+ return stringValue;
+ }
+
+ public StringValue(String value) {
+ this.value = value;
+ }
+
+ /** Returns the hashcode of this, to enable strings to be encoded (with reasonable safely) as doubles for optimization */
+ @Override
+ public double asDouble() {
+ return UnicodeUtilities.unquote(value.toString()).hashCode();
+ }
+
+ @Override
+ public boolean hasDouble() { return true; }
+
+ @Override
+ public boolean asBoolean() {
+ throw new UnsupportedOperationException("A string value ('" + value + "') does not have a boolean value");
+ }
+
+ @Override
+ public Value negate() {
+ throw new UnsupportedOperationException("A string value ('" + value + "') cannot be negated");
+ }
+
+ @Override
+ public Value add(Value value) {
+ return new StringValue(value + value.toString());
+ }
+
+ @Override
+ public Value subtract(Value value) {
+ throw new UnsupportedOperationException("String values ('" + value + "') does not support subtraction");
+ }
+
+ @Override
+ public Value multiply(Value value) {
+ throw new UnsupportedOperationException("String values ('" + value + "') does not support multiplication");
+ }
+
+ @Override
+ public Value divide(Value value) {
+ throw new UnsupportedOperationException("String values ('" + value + "') does not support division");
+ }
+
+ @Override
+ public boolean compare(TruthOperator operator, Value value) {
+ if (operator.equals(TruthOperator.EQUAL))
+ return this.equals(value);
+ throw new UnsupportedOperationException("String values ('" + value + "') cannot be compared except with '='");
+ }
+
+ @Override
+ public Value function(Function function, Value value) {
+ throw new UnsupportedOperationException("Mathematical functions cannot be applied on strings ('" + value + "')");
+ }
+
+ @Override
+ public Value asMutable() {
+ if ( ! isFrozen()) return this;
+ return new StringValue(value);
+ }
+
+ @Override
+ public String toString() {
+ return "\"" + value + "\"";
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this==other) return true;
+ if ( ! (other instanceof StringValue)) return false;
+ return ((StringValue)other).value.equals(this.value);
+ }
+
+ @Override
+ public int hashCode() {
+ return value.hashCode();
+ }
+
+ /** Returns the value of this as a string */
+ public String asString() { return value; }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TensorValue.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TensorValue.java
new file mode 100644
index 00000000000..12bede95aae
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TensorValue.java
@@ -0,0 +1,168 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.google.common.annotations.Beta;
+import com.yahoo.tensor.Tensor;
+import com.yahoo.tensor.TensorAddress;
+import com.yahoo.searchlib.rankingexpression.rule.Function;
+import com.yahoo.searchlib.rankingexpression.rule.TruthOperator;
+import com.yahoo.tensor.TensorType;
+
+import java.util.Optional;
+
+/**
+ * A Value containing a tensor.
+ * See {@link com.yahoo.tensor.Tensor} for definition of a tensor
+ * and the operations supported.
+ *
+ * @author bratseth
+ */
+ @Beta
+public class TensorValue extends Value {
+
+ /** The tensor value of this */
+ private final Tensor value;
+ private final Optional<TensorType> type;
+
+ public TensorValue(Tensor value) {
+ this.value = value;
+ this.type = Optional.empty();
+ }
+
+ public TensorValue(Tensor value, TensorType type) {
+ this.value = value;
+ this.type = Optional.of(type);
+ }
+
+ @Override
+ public double asDouble() {
+ if (value.dimensions().size() == 0)
+ return value.get(TensorAddress.empty);
+ throw new UnsupportedOperationException("Requires a double value from a tensor with dimensions " +
+ value.dimensions() + ", but a tensor of order > 0 does " +
+ "not have a double value. Input tensor: " + this);
+ }
+
+ @Override
+ public boolean hasDouble() { return value.dimensions().size() == 0; }
+
+ @Override
+ public boolean asBoolean() {
+ throw new UnsupportedOperationException("A tensor does not have a boolean value");
+ }
+
+ @Override
+ public Value negate() {
+ return new TensorValue(value.apply((Double value) -> -value));
+ }
+
+ @Override
+ public Value add(Value argument) {
+ if (argument instanceof TensorValue)
+ return new TensorValue(value.add(((TensorValue)argument).value));
+ else
+ return new TensorValue(value.apply((Double value) -> value + argument.asDouble()));
+ }
+
+ @Override
+ public Value subtract(Value argument) {
+ if (argument instanceof TensorValue)
+ return new TensorValue(value.subtract(((TensorValue) argument).value));
+ else
+ return new TensorValue(value.apply((Double value) -> value - argument.asDouble()));
+ }
+
+ @Override
+ public Value multiply(Value argument) {
+ if (argument instanceof TensorValue)
+ return new TensorValue(value.multiply(((TensorValue) argument).value));
+ else
+ return new TensorValue(value.apply((Double value) -> value * argument.asDouble()));
+ }
+
+ @Override
+ public Value divide(Value argument) {
+ if (argument instanceof TensorValue)
+ throw new UnsupportedOperationException("Two tensors cannot be divided");
+ else
+ return new TensorValue(value.apply((Double value) -> value / argument.asDouble()));
+ }
+
+ public Value match(Value argument) {
+ return new TensorValue(value.match(asTensor(argument, "match")));
+ }
+
+ public Value min(Value argument) {
+ return new TensorValue(value.min(asTensor(argument, "min")));
+ }
+
+ public Value max(Value argument) {
+ return new TensorValue(value.max(asTensor(argument, "max")));
+ }
+
+ public Value sum(String dimension) {
+ return new TensorValue(value.sum(dimension));
+ }
+
+ public Value sum() {
+ return new DoubleValue(value.sum());
+ }
+
+ private Tensor asTensor(Value value, String operationName) {
+ if ( ! (value instanceof TensorValue))
+ throw new UnsupportedOperationException("Could not perform " + operationName +
+ ": The second argument must be a tensor but was " + value);
+ return ((TensorValue)value).value;
+ }
+
+ public Tensor asTensor() { return value; }
+
+ public Optional<TensorType> getType() {
+ return type;
+ }
+
+ @Override
+ public boolean compare(TruthOperator operator, Value value) {
+ throw new UnsupportedOperationException("A tensor cannot be compared with any value");
+ }
+
+ @Override
+ public Value function(Function function, Value argument) {
+ if (function.equals(Function.min) && argument instanceof TensorValue)
+ return min(argument);
+ else if (function.equals(Function.max) && argument instanceof TensorValue)
+ return max(argument);
+ else
+ return new TensorValue(value.apply((Double value) -> function.evaluate(value, argument.asDouble())));
+ }
+
+ @Override
+ public Value asMutable() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String toString() {
+ return value.toString();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ TensorValue that = (TensorValue) o;
+
+ if (!type.equals(that.type)) return false;
+ if (!value.equals(that.value)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = value.hashCode();
+ result = 31 * result + type.hashCode();
+ return result;
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Value.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Value.java
new file mode 100644
index 00000000000..e56c005cdf7
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Value.java
@@ -0,0 +1,96 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.javacc.UnicodeUtilities;
+import com.yahoo.searchlib.rankingexpression.rule.Function;
+import com.yahoo.searchlib.rankingexpression.rule.TruthOperator;
+import com.yahoo.tensor.MapTensor;
+
+/**
+ * The result of a ranking expression evaluation.
+ * Concrete subclasses of this provides implementations of these methods or throws
+ * UnsupportedOperationException if the operation is not supported.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ * @since 5.1.5
+ */
+public abstract class Value {
+
+ private boolean frozen=false;
+
+ /** Returns this value as a double, or throws UnsupportedOperationException if it cannot be represented as a double */
+ public abstract double asDouble();
+
+ /** Returns this value as a double value, or throws UnsupportedOperationException if it cannot be represented as a double */
+ public DoubleValue asDoubleValue() {
+ return new DoubleValue(asDouble());
+ }
+
+ /** Returns true if this value can return itself as a double, i.e asDoubleValue will return a value and not throw */
+ public abstract boolean hasDouble();
+
+ /** Returns this value as a boolean. */
+ public abstract boolean asBoolean();
+
+ public abstract Value negate();
+
+ public abstract Value add(Value value);
+
+ public abstract Value subtract(Value value);
+
+ public abstract Value multiply(Value value);
+
+ public abstract Value divide(Value value);
+
+ /** Perform the comparison specified by the operator between this value and the given value */
+ public abstract boolean compare(TruthOperator operator,Value value);
+
+ /** Perform the given binary function on this value and the given value */
+ public abstract Value function(Function function,Value value);
+
+ /**
+ * Irreversibly makes this immutable. Overriders must always call super.freeze() and return this
+ *
+ * @return this for convenience
+ */
+ public Value freeze() {
+ frozen=true;
+ return this;
+ }
+
+ /** Returns true if this is immutable, false otherwise */
+ public final boolean isFrozen() { return frozen; }
+
+ /** Returns this is mutable, or a mutable copy otherwise */
+ public abstract Value asMutable();
+
+ @Override
+ public abstract String toString();
+
+ @Override
+ public abstract boolean equals(Object other);
+
+ @Override
+ public abstract int hashCode();
+
+ /**
+ * Parses the given string to a value and returns it.
+ * Different subtypes of Value will be returned depending on the string.
+ *
+ * @return a mutable Value
+ * @throws IllegalArgumentException if the given string is not parseable as a value
+ */
+ public static Value parse(String value) {
+ if (value.equals("true"))
+ return new BooleanValue(true);
+ else if (value.equals("false"))
+ return new BooleanValue(false);
+ else if (value.startsWith("\"") || value.startsWith("'"))
+ return new StringValue(UnicodeUtilities.unquote(value));
+ else if (value.startsWith("{"))
+ return new TensorValue(MapTensor.from(value));
+ else
+ return new DoubleValue(Double.parseDouble(value));
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/.gitignore b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/.gitignore
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/.gitignore
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestNode.java
new file mode 100644
index 00000000000..3e138aa7d72
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestNode.java
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+import com.yahoo.searchlib.rankingexpression.rule.CompositeNode;
+import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode;
+import com.yahoo.searchlib.rankingexpression.rule.SerializationContext;
+
+import java.util.Deque;
+
+/**
+ * An optimized version of a sum of consecutive decision trees.
+ *
+ * @author bratseth
+ */
+public class GBDTForestNode extends ExpressionNode {
+
+ private final double[] values;
+
+ public GBDTForestNode(double[] values) {
+ this.values=values;
+ }
+
+ @Override
+ public final Value evaluate(Context context) {
+ int pc = 0;
+ double treeSum = 0;
+ while (pc < values.length) {
+ int nextTree = (int)values[pc++];
+ treeSum += GBDTNode.evaluate(values, pc, context);
+ pc += nextTree;
+ }
+ return new DoubleValue(treeSum);
+ }
+
+ /** Returns (optimized sum of condition trees) */
+ public String toString(SerializationContext context, Deque<String> path, CompositeNode parent) {
+ return "(optimized sum of condition trees of size " + (values.length*8) + " bytes)";
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizer.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizer.java
new file mode 100644
index 00000000000..7d84124f2af
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizer.java
@@ -0,0 +1,124 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.AbstractArrayContext;
+import com.yahoo.searchlib.rankingexpression.evaluation.ArrayContext;
+import com.yahoo.searchlib.rankingexpression.evaluation.OptimizationReport;
+import com.yahoo.searchlib.rankingexpression.evaluation.Optimizer;
+import com.yahoo.searchlib.rankingexpression.rule.ArithmeticNode;
+import com.yahoo.searchlib.rankingexpression.rule.ArithmeticOperator;
+import com.yahoo.searchlib.rankingexpression.rule.CompositeNode;
+import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @author bratseth
+ */
+public class GBDTForestOptimizer extends Optimizer {
+
+ private OptimizationReport report;
+
+ /**
+ * A temporary value used within the algorithm
+ */
+ private int currentTreesOptimized = 0;
+
+ /**
+ * Optimizes sums of GBDTNodes by replacing them by a single GBDTForestNode
+ *
+ * @param expression the expression to destructively optimize
+ * @param context a fast lookup context created from the given expression
+ * @param report the optimization report to which actions of this is logged
+ */
+ @Override
+ public void optimize(RankingExpression expression, AbstractArrayContext context, OptimizationReport report) {
+ if ( ! isEnabled()) return;
+
+ this.report = report;
+ expression.setRoot(findAndOptimize(expression.getRoot()));
+ report.note("GBDT forest optimization done");
+ }
+
+ /**
+ * Recursively descend and optimize gbdt forest nodes.
+ *
+ * @return the resulting node, which may be the input node if no optimizations were found
+ */
+ private ExpressionNode findAndOptimize(ExpressionNode node) {
+ ExpressionNode newNode = optimize(node);
+ if ( ! (newNode instanceof CompositeNode)) return newNode; //
+
+ CompositeNode newComposite = (CompositeNode)newNode;
+ List<ExpressionNode> newChildren = new ArrayList<>();
+ for (ExpressionNode child : newComposite.children()) {
+ newChildren.add(findAndOptimize(child));
+ }
+ return newComposite.setChildren(newChildren);
+ }
+
+ /**
+ * Optimize the given node (only)
+ *
+ * @return the resulting node, which may be the input node if it could not be optimized
+ */
+ private ExpressionNode optimize(ExpressionNode node) {
+ currentTreesOptimized = 0;
+ List<Double> forest = new ArrayList<>();
+ boolean optimized = optimize(node, forest);
+ if ( ! optimized ) return node;
+
+ GBDTForestNode forestNode = new GBDTForestNode(toArray(forest));
+ report.incMetric("Number of forests", 1);
+ report.incMetric("GBDT trees optimized to forests", currentTreesOptimized);
+ return forestNode;
+ }
+
+ /**
+ * Optimize the given node, if it is the root of a gdbt forest. Otherwise do nothing and return false
+ */
+ private boolean optimize(ExpressionNode node, List<Double> forest) {
+ if (node instanceof GBDTNode) {
+ addTo(forest, (GBDTNode)node);
+ currentTreesOptimized++;
+ return true;
+ }
+ if (!(node instanceof ArithmeticNode)) {
+ return false;
+ }
+ ArithmeticNode aNode = (ArithmeticNode)node;
+ for (ArithmeticOperator op : aNode.operators()) {
+ if (op != ArithmeticOperator.PLUS) {
+ return false;
+ }
+ }
+ for (ExpressionNode child : aNode.children()) {
+ if (!optimize(child, forest)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private void addTo(List<Double> forest, GBDTNode tree) {
+ forest.add((double)tree.values().length);
+ addAll(tree.values(), forest);
+ }
+
+ private void addAll(double[] values, List<Double> forest) {
+ for (double value : values) {
+ forest.add(value);
+ }
+ }
+
+ private double[] toArray(List<Double> valueList) {
+ double[] valueArray = new double[valueList.size()];
+ for (int i = 0; i < valueList.size(); i++) {
+ valueArray[i] = valueList.get(i);
+ }
+ return valueArray;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTNode.java
new file mode 100644
index 00000000000..607b4dc55cb
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTNode.java
@@ -0,0 +1,98 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+import com.yahoo.searchlib.rankingexpression.rule.CompositeNode;
+import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode;
+import com.yahoo.searchlib.rankingexpression.rule.SerializationContext;
+
+import java.util.Deque;
+
+/**
+ * An optimized version of a decision tree.
+ *
+ * @author bratseth
+ */
+public final class GBDTNode extends ExpressionNode {
+
+ // The GBDT node vm works by reading doubles one at a time and interpreting them
+ // as either constant data or a mangling of opcode and variable reference:
+ // The value space is as follows:
+ // n=[0,MAX_LEAF_VALUE> : n is data (tree leaf constant value)
+ // n=[MAX_LEAF_VALUE+MAX_VARIABLES*0,MAX_LEAF_VALUE+MAX_VARIABLES*1>: < than var at index n
+ // n=[MAX_LEAF_VALUE+MAX_VARIABLES*1,MAX_LEAF_VALUE+MAX_VARIABLES*2>: = to var at index n-MAX_VARIABLES
+ // n=[MAX_LEAF_VALUE+MAX_VARIABLES*2,MAX_LEAF_VALUE+MAX_VARIABLES*3]: n-MAX_VARIABLES*2 is IN the following set
+
+ // The full layout of an IF instruction is
+ // COMPARISON,TRUE_BRANCH_LENGTH,TRUE_BRANCH,FALSE_BRANCH
+ // where COMPARISON is VARIABLE_AND_OPCODE,COMPARE_CONSTANT if the opcode is < or =,
+ // and VARIABLE_AND_OPCODE,COMPARE_CONSTANTS_LENGTH,COMPARE_CONSTANTS if the opcode is IN
+
+
+ // If any change is made to this encoding, this change must also be reflected in GBDTNodeOptimizer
+
+ /** The max (absolute) supported value an optimized leaf may have */
+ public final static int MAX_LEAF_VALUE=2*1000*1000*1000;
+
+ /** The max number of variables (features) supported in the context */
+ public final static int MAX_VARIABLES=1*1000*1000;
+
+ private final double[] values;
+
+ public GBDTNode(double[] values) {
+ this.values=values;
+ }
+
+ /** Returns a direct reference to the values of this. The returned array must not be modified. */
+ public final double[] values() { return values; }
+
+ @Override
+ public final Value evaluate(Context context) {
+ return new DoubleValue(evaluate(values,0,context));
+ }
+
+ public static double evaluate(double[] values, int startOffset, Context context) {
+ int pc = startOffset;
+ while (true) {
+ double nextValue = values[pc++];
+ if (nextValue >= MAX_LEAF_VALUE) { // a condition node
+ int offset = (int)nextValue - MAX_LEAF_VALUE;
+ boolean comparisonIsTrue = false;
+ if (offset < MAX_VARIABLES) {
+ comparisonIsTrue = context.getDouble(offset)<values[pc++];
+ }
+ else if (offset < MAX_VARIABLES*2) {
+ comparisonIsTrue = context.getDouble(offset-MAX_VARIABLES)==values[pc++];
+ }
+ else { // offset<MAX_VARIABLES*3
+ double testValue = context.getDouble(offset-MAX_VARIABLES*2);
+ int setValuesLeft = (int)values[pc++];
+ while (setValuesLeft > 0) { // test each value in the set
+ setValuesLeft--;
+ if (testValue == values[pc++]) {
+ comparisonIsTrue=true;
+ break;
+ }
+ }
+ pc += setValuesLeft; // jump to after the set
+ }
+
+ if (comparisonIsTrue)
+ pc++; // true branch - skip the jump value
+ else
+ pc += values[pc]; // false branch - jump
+ }
+ else { // a leaf
+ return nextValue;
+ }
+ }
+ }
+
+ /** Returns "(optimized condition tree)" */
+ @Override
+ public String toString(SerializationContext context, Deque<String> path, CompositeNode parent) {
+ return "(optimized condition tree)";
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizer.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizer.java
new file mode 100644
index 00000000000..7e74bdce9e6
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizer.java
@@ -0,0 +1,184 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization;
+
+import com.yahoo.yolean.Exceptions;
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.*;
+import com.yahoo.searchlib.rankingexpression.rule.*;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * <p>This ranking expression processor recognizes and optimizes GBDT expressions. Note that this optimization is
+ * destructive - inspection is not possible into optimized subtrees.</p>
+ *
+ * <p>This class is not multithread safe.</p>
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class GBDTOptimizer extends Optimizer {
+
+ private OptimizationReport report;
+
+ /**
+ * Optimizes this by replacing GBDT sub-expressions by GBDTNodes. These optimized expressions <b>must</b> be
+ * executed using an instance of {@link com.yahoo.searchlib.rankingexpression.evaluation.ArrayContext} as context.
+ * These thread locally reusable contexts must be created from the ranking expression <i>before</i> the ranking
+ * expression is optimized.
+ *
+ * @param expression the expression to destructively optimize
+ * @param context a fast lookup context created from the given expression
+ * @param report the optimization report to which actions of this is logged
+ */
+ @Override
+ public void optimize(RankingExpression expression, AbstractArrayContext context, OptimizationReport report) {
+ if (!isEnabled()) return;
+
+ this.report = report;
+
+ if (context.size() > GBDTNode.MAX_VARIABLES) {
+ report.note("Can not optimize expressions referencing more than " + GBDTNode.MAX_VARIABLES + " features: " +
+ expression + " has " + context.size());
+ return;
+ }
+
+ expression.setRoot(optimize(expression.getRoot(), context));
+ report.note("GBDT tree optimization done");
+ }
+
+ /**
+ * <p>Recursively optimize nodes of the form ArithmeticNode(IfNode,ArithmeticNode(IfNode)) etc., ignore
+ * anything else.</p>
+ *
+ * <p>Each condition node is converted to the double sequence [(OperatorIsEquals ? GBDTNode.MAX_VARIABLES : 0) +
+ * IndexOfLeftComparisonFeature+GBDTNode.MAX_LEAFT_VALUE, ValueOfRightComparisonValue,#OfValuesInTrueBranch,true
+ * branch values,false branch values]</p>
+ *
+ * <p>Each value node is converted to the double value of the value node itself.</p>
+ *
+ * @return the optimized expression
+ */
+ private ExpressionNode optimize(ExpressionNode node, AbstractArrayContext context) {
+ if (node instanceof ArithmeticNode) {
+ Iterator<ExpressionNode> childIt = ((ArithmeticNode)node).children().iterator();
+ ExpressionNode ret = optimize(childIt.next(), context);
+
+ Iterator<ArithmeticOperator> operIt = ((ArithmeticNode)node).operators().iterator();
+ while (childIt.hasNext() && operIt.hasNext()) {
+ ret = ArithmeticNode.resolve(ret, operIt.next(), optimize(childIt.next(), context));
+ }
+ return ret;
+ }
+ if (node instanceof IfNode) {
+ return createGBDTNode((IfNode)node, context);
+ }
+ return node;
+ }
+
+ private ExpressionNode createGBDTNode(IfNode cNode, AbstractArrayContext context) {
+ List<Double> values = new ArrayList<>();
+ try {
+ consumeNode(cNode, values, context);
+ }
+ catch (IllegalArgumentException e) { // Conversion was impossible
+ report.note("Skipped optimization: " + Exceptions.toMessageString(e) + ". Expression: " + cNode);
+ return cNode;
+ }
+ report.incMetric("Optimized GDBT trees",1);
+ return new GBDTNode(toArray(values));
+ }
+
+ /**
+ * Recursively consume nodes into the value list Returns the number of values produced by this.
+ */
+ private int consumeNode(ExpressionNode node, List<Double> values, AbstractArrayContext context) {
+ int beforeIndex = values.size();
+ if ( node instanceof IfNode) {
+ IfNode ifNode = (IfNode)node;
+ int jumpValueIndex = consumeIfCondition(ifNode.getCondition(), values, context);
+ values.add(0d); // jumpValue goes here after the next line
+ int jumpValue = consumeNode(ifNode.getTrueExpression(), values, context) + 1;
+ values.set(jumpValueIndex, (double) jumpValue);
+ consumeNode(ifNode.getFalseExpression(), values, context);
+ } else {
+ double value = toValue(node);
+ if (Math.abs(value) > GBDTNode.MAX_LEAF_VALUE) {
+ throw new IllegalArgumentException("Leaf value is too large for optimization: " + value);
+ }
+ values.add(toValue(node));
+ }
+ return values.size() - beforeIndex;
+ }
+
+ /** Consumes the if condition and return the size of the values resulting, for convenience */
+ private int consumeIfCondition(ExpressionNode condition, List<Double> values, AbstractArrayContext context) {
+ if (condition instanceof ComparisonNode) {
+ ComparisonNode comparison = (ComparisonNode)condition;
+ if (comparison.getOperator() == TruthOperator.SMALLER)
+ values.add(GBDTNode.MAX_LEAF_VALUE + GBDTNode.MAX_VARIABLES*0 + getVariableIndex(comparison.getLeftCondition(), context));
+ else if (comparison.getOperator() == TruthOperator.EQUAL)
+ values.add(GBDTNode.MAX_LEAF_VALUE + GBDTNode.MAX_VARIABLES*1 + getVariableIndex(comparison.getLeftCondition(), context));
+ else
+ throw new IllegalArgumentException("Cannot optimize other conditions than < and ==, encountered: " + comparison.getOperator());
+ values.add(toValue(comparison.getRightCondition()));
+ }
+ else if (condition instanceof SetMembershipNode) {
+ SetMembershipNode setMembership = (SetMembershipNode)condition;
+ values.add(GBDTNode.MAX_LEAF_VALUE + GBDTNode.MAX_VARIABLES*2 + getVariableIndex(setMembership.getTestValue(),context));
+ values.add((double)setMembership.getSetValues().size());
+ for (ExpressionNode setElementNode : setMembership.getSetValues())
+ values.add(toValue(setElementNode));
+ }
+ else {
+ throw new IllegalArgumentException("Node condition could not be optimized: " + condition);
+ }
+
+ return values.size();
+ }
+
+ private double getVariableIndex(ExpressionNode node, AbstractArrayContext context) {
+ if (!(node instanceof ReferenceNode)) {
+ throw new IllegalArgumentException("Contained a left-hand comparison expression " +
+ "which was not a feature value but was: " + node);
+ }
+ ReferenceNode fNode = (ReferenceNode)node;
+ Integer index = context.getIndex(fNode.toString());
+ if (index == null) {
+ throw new IllegalStateException("The ranking expression contained feature '" + fNode.getName() +
+ "', which is not known to " + context + ": The context must be created" +
+ "from the same ranking expression which is to be optimized");
+ }
+ return index;
+ }
+
+ private double toValue(ExpressionNode node) {
+ if (node instanceof ConstantNode) {
+ Value value = ((ConstantNode)node).getValue();
+ if (value instanceof DoubleCompatibleValue || value instanceof StringValue)
+ return value.asDouble();
+ else
+ throw new IllegalArgumentException("Cannot optimize a node containing a value of type " +
+ value.getClass().getSimpleName() + " (" + value + ") in a set test: " + node);
+ }
+
+ if (node instanceof NegativeNode) {
+ NegativeNode nNode = (NegativeNode)node;
+ if (!(nNode.getValue() instanceof ConstantNode)) {
+ throw new IllegalArgumentException("Contained a negation of a non-number: " + nNode.getValue());
+ }
+ return -((ConstantNode)nNode.getValue()).getValue().asDouble();
+ }
+ throw new IllegalArgumentException("Node could not be optimized: " + node);
+ }
+
+ private double[] toArray(List<Double> valueList) {
+ double[] valueArray = new double[valueList.size()];
+ for (int i = 0; i < valueList.size(); i++) {
+ valueArray[i] = valueList.get(i);
+ }
+ return valueArray;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/test/.gitignore b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/test/.gitignore
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/test/.gitignore
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/package-info.java
new file mode 100644
index 00000000000..b744b884e0f
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/package-info.java
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * Execution engine for ranking expressions
+ */
+@ExportPackage
+@PublicApi
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.api.annotations.PublicApi;
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/mlr/.gitignore b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/mlr/.gitignore
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/mlr/.gitignore
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/package-info.java
new file mode 100644
index 00000000000..95099876eb4
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/package-info.java
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * Ranking expression execution library, see {@link com.yahoo.searchlib.rankingexpression.RankingExpression}.
+ */
+@ExportPackage
+@PublicApi
+package com.yahoo.searchlib.rankingexpression;
+
+import com.yahoo.api.annotations.PublicApi;
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/parser/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/parser/package-info.java
new file mode 100644
index 00000000000..01af7c12ae4
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/parser/package-info.java
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * Ranking expression parser
+ */
+@ExportPackage
+@PublicApi
+package com.yahoo.searchlib.rankingexpression.parser;
+
+import com.yahoo.api.annotations.PublicApi;
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Arguments.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Arguments.java
new file mode 100644
index 00000000000..a5d04c0f3b9
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Arguments.java
@@ -0,0 +1,81 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.google.common.collect.ImmutableList;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * A set of argument expressions to a function or feature.
+ * This is immutable.
+ *
+ * @author bratseth
+ */
+public final class Arguments implements Serializable {
+
+ private final ImmutableList<ExpressionNode> expressions;
+
+ public Arguments() {
+ this(null);
+ }
+
+ public Arguments(List<? extends ExpressionNode> expressions) {
+ if (expressions == null) {
+ this.expressions = ImmutableList.of();
+ return;
+ }
+
+ // Build in a roundabout way because java generics and lists
+ ImmutableList.Builder<ExpressionNode> b = ImmutableList.builder();
+ for (ExpressionNode node : expressions)
+ b.add(node);
+ this.expressions = b.build();
+ }
+
+ /** Returns an unmodifiable list of the expressions in this */
+ public List<ExpressionNode> expressions() { return expressions; }
+
+ /** Evaluate all arguments in this */
+ public Value[] evaluate(Context context) {
+ Value[] values=new Value[expressions.size()];
+ for (int i=0; i<expressions.size(); i++)
+ values[i]=expressions.get(i).evaluate(context);
+ return values;
+ }
+
+ /** Evaluate the i'th argument */
+ public Value evaluate(int i,Context context) {
+ return expressions.get(i).evaluate(context);
+ }
+
+ public boolean isEmpty() { return expressions.isEmpty(); }
+
+ @Override
+ public int hashCode() {
+ return expressions.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object rhs) {
+ return rhs instanceof Arguments && expressions.equals(((Arguments)rhs).expressions);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder b = new StringBuilder();
+ b.append("(");
+ for (ExpressionNode argument : expressions)
+ b.append(argument).append(",");
+ b.setLength(b.length()-1);
+ if (b.length() > 0)
+ b.append(")");
+ return b.toString();
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticNode.java
new file mode 100755
index 00000000000..c6669d87d1b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticNode.java
@@ -0,0 +1,129 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.google.common.collect.ImmutableList;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.*;
+
+/**
+ * A binary mathematical operation
+ *
+ * @author bratseth
+ */
+public final class ArithmeticNode extends CompositeNode {
+
+ private final ImmutableList<ExpressionNode> children;
+ private final ImmutableList<ArithmeticOperator> operators;
+
+ public ArithmeticNode(List<ExpressionNode> children, List<ArithmeticOperator> operators) {
+ this.children = ImmutableList.copyOf(children);
+ this.operators = ImmutableList.copyOf(operators);
+ }
+
+ public ArithmeticNode(ExpressionNode leftExpression, ArithmeticOperator operator, ExpressionNode rightExpression) {
+ this.children = ImmutableList.of(leftExpression, rightExpression);
+ this.operators = ImmutableList.of(operator);
+ }
+
+ public List<ArithmeticOperator> operators() { return operators; }
+
+ @Override
+ public List<ExpressionNode> children() { return children; }
+
+ @Override
+ public String toString(SerializationContext context, Deque<String> path, CompositeNode parent) {
+ StringBuilder string = new StringBuilder();
+
+ boolean nonDefaultPrecedence = nonDefaultPrecedence(parent);
+ if (nonDefaultPrecedence)
+ string.append("(");
+
+ Iterator<ExpressionNode> child = children.iterator();
+ string.append(child.next().toString(context, path, this)).append(" ");
+ for (Iterator<ArithmeticOperator> op = operators.iterator(); op.hasNext() && child.hasNext();) {
+ string.append(op.next().toString()).append(" ");
+ string.append(child.next().toString(context, path, this));
+ if (op.hasNext())
+ string.append(" ");
+ }
+ if (nonDefaultPrecedence)
+ string.append(")");
+ string.append(" ");
+
+ return string.toString().trim();
+ }
+
+ /**
+ * Returns true if this node has lower precedence than the parent
+ * (even though by virtue of being a node it will be calculated before the parent).
+ */
+ private boolean nonDefaultPrecedence(CompositeNode parent) {
+ if ( parent==null) return false;
+ if ( ! (parent instanceof ArithmeticNode)) return false;
+
+ return ((ArithmeticNode)parent).operators.get(0).hasPrecedenceOver(this.operators.get(0));
+ }
+
+ @Override
+ public Value evaluate(Context context) {
+ Iterator<ExpressionNode> child = children.iterator();
+
+ Deque<ValueItem> stack = new ArrayDeque<>();
+ stack.push(new ValueItem(ArithmeticOperator.PLUS, child.next().evaluate(context)));
+ for (Iterator<ArithmeticOperator> it = operators.iterator(); it.hasNext() && child.hasNext();) {
+ ArithmeticOperator op = it.next();
+ if (!stack.isEmpty()) {
+ while (stack.peek().op.hasPrecedenceOver(op)) {
+ popStack(stack);
+ }
+ }
+ stack.push(new ValueItem(op, child.next().evaluate(context)));
+ }
+ while (stack.size() > 1) {
+ popStack(stack);
+ }
+ return stack.getFirst().value;
+ }
+
+ private void popStack(Deque<ValueItem> stack) {
+ ValueItem rhs = stack.pop();
+ ValueItem lhs = stack.peek();
+ lhs.value = rhs.op.evaluate(lhs.value, rhs.value);
+ }
+
+ public static ArithmeticNode resolve(ExpressionNode left, ArithmeticOperator op, ExpressionNode right) {
+ if ( ! (left instanceof ArithmeticNode)) return new ArithmeticNode(left, op, right);
+
+ ArithmeticNode leftArithmetic = (ArithmeticNode)left;
+
+ List<ExpressionNode> newChildren = new ArrayList<>(leftArithmetic.children());
+ newChildren.add(right);
+
+ List<ArithmeticOperator> newOperators = new ArrayList<>(leftArithmetic.operators());
+ newOperators.add(op);
+
+ return new ArithmeticNode(newChildren, newOperators);
+ }
+
+ private static class ValueItem {
+
+ final ArithmeticOperator op;
+ Value value;
+
+ public ValueItem(ArithmeticOperator op, Value value) {
+ this.op = op;
+ this.value = value;
+ }
+ }
+
+ @Override
+ public CompositeNode setChildren(List<ExpressionNode> newChildren) {
+ if (children.size() != newChildren.size())
+ throw new IllegalArgumentException("Expected " + children.size() + " children but got " + newChildren.size());
+ return new ArithmeticNode(newChildren, operators);
+ }
+
+}
+
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticOperator.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticOperator.java
new file mode 100644
index 00000000000..e5a794ab53e
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticOperator.java
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * A mathematical operator
+ *
+ * @author bratseth
+ */
+public enum ArithmeticOperator {
+
+ PLUS(0, "+") { public Value evaluate(Value x, Value y) {
+ return x.add(y);
+ }},
+ MINUS(1, "-") { public Value evaluate(Value x, Value y) {
+ return x.subtract(y);
+ }},
+ MULTIPLY(2, "*") { public Value evaluate(Value x, Value y) {
+ return x.multiply(y);
+ }},
+ DIVIDE(3, "/") { public Value evaluate(Value x, Value y) {
+ return x.divide(y);
+ }};
+
+ /** A list of all the operators in this in order of decreasing precedence */
+ public static final List<ArithmeticOperator> operatorsByPrecedence = operatorsByPrecedence();
+
+ private final int precedence;
+ private final String image;
+
+ private ArithmeticOperator(int precedence, String image) {
+ this.precedence = precedence;
+ this.image = image;
+ }
+
+ /** Returns true if this operator has precedence over the given operator */
+ public boolean hasPrecedenceOver(ArithmeticOperator op) {
+ return precedence > op.precedence;
+ }
+
+ public abstract Value evaluate(Value x, Value y);
+
+ @Override
+ public String toString() {
+ return image;
+ }
+
+ private static List<ArithmeticOperator> operatorsByPrecedence() {
+ List<ArithmeticOperator> operators = new ArrayList<>();
+ operators.add(DIVIDE);
+ operators.add(MULTIPLY);
+ operators.add(MINUS);
+ operators.add(PLUS);
+ return Collections.unmodifiableList(operators);
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/BooleanNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/BooleanNode.java
new file mode 100755
index 00000000000..22b777d4b9d
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/BooleanNode.java
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+/**
+ * A node which produces a boolean value when evaluated.
+ *
+ * @author bratseth
+ * @since 5.1.21
+ */
+public abstract class BooleanNode extends CompositeNode {
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ComparisonNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ComparisonNode.java
new file mode 100644
index 00000000000..882d16ebc1c
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ComparisonNode.java
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.BooleanValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.*;
+
+/**
+ * A node which returns true or false depending on the outcome of a comparison.
+ *
+ * @author bratseth
+ * @since 5.1.21
+ */
+public class ComparisonNode extends BooleanNode {
+
+ /** The operator string of this condition. */
+ private final TruthOperator operator;
+
+ private final ExpressionNode leftCondition, rightCondition;
+
+ public ComparisonNode(ExpressionNode leftCondition, TruthOperator operator, ExpressionNode rightCondition) {
+ this.leftCondition = leftCondition;
+ this.operator = operator;
+ this.rightCondition = rightCondition;
+ }
+
+ @Override
+ public List<ExpressionNode> children() {
+ List<ExpressionNode> children = new ArrayList<>(2);
+ children.add(leftCondition);
+ children.add(rightCondition);
+ return children;
+ }
+
+ public TruthOperator getOperator() { return operator; }
+
+ public ExpressionNode getLeftCondition() { return leftCondition; }
+
+ public ExpressionNode getRightCondition() { return rightCondition; }
+
+ @Override
+ public String toString(SerializationContext context, Deque<String> path, CompositeNode parent) {
+ return leftCondition.toString(context, path, this) + " " + operator + " " +
+ rightCondition.toString(context, path, this);
+ }
+
+ @Override
+ public Value evaluate(Context context) {
+ Value leftValue=leftCondition.evaluate(context);
+ Value rightValue=rightCondition.evaluate(context);
+ return new BooleanValue(leftValue.compare(operator,rightValue));
+ }
+
+ @Override
+ public ComparisonNode setChildren(List<ExpressionNode> children) {
+ if (children.size() != 2) throw new IllegalArgumentException("A comparison test must have 2 children");
+ return new ComparisonNode(children.get(0), operator, children.get(1));
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/CompositeNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/CompositeNode.java
new file mode 100644
index 00000000000..d181c29b516
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/CompositeNode.java
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import java.util.List;
+
+/**
+ * <p>The parent of all node types which contains child nodes.</p>
+ *
+ * @author bratseth
+ */
+public abstract class CompositeNode extends ExpressionNode {
+
+ /**
+ * <p>Returns a read-only list containing the immediate children of this composite</p>
+ *
+ * @return The children of this.
+ */
+ public abstract List<ExpressionNode> children();
+
+ /**
+ * Returns a copy of this where the children is replaced by the given children.
+ *
+ * @throws IllegalArgumentException if the given list of children has different size than children()
+ */
+ public abstract CompositeNode setChildren(List<ExpressionNode> children);
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ConstantNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ConstantNode.java
new file mode 100755
index 00000000000..e51519059ed
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ConstantNode.java
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.Deque;
+
+/**
+ * A node which holds a constant (frozen) value.
+ *
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public final class ConstantNode extends ExpressionNode {
+
+ private final String sourceImage;
+
+ private final Value value;
+
+ public ConstantNode(Value value) {
+ this(value,null);
+ }
+
+ /**
+ * Creates a constant value
+ *
+ * @param value the value. Ownership of this value is transferred to this.
+ * @param sourceImage the source string image producing this value
+ */
+ public ConstantNode(Value value, String sourceImage) {
+ value.freeze();
+ this.value=value;
+ this.sourceImage=sourceImage;
+ }
+
+ public Value getValue() { return value; }
+
+ @Override
+ public String toString(SerializationContext context, Deque<String> path, CompositeNode parent) {
+ return sourceString();
+ }
+
+ /** Returns the string which created this, or the value.toString() if not known */
+ public String sourceString() {
+ if (sourceImage != null) return sourceImage;
+ return value.toString();
+ }
+
+ @Override
+ public Value evaluate(Context context) {
+ return value;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/EmbracedNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/EmbracedNode.java
new file mode 100755
index 00000000000..7e9e1cb2825
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/EmbracedNode.java
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.Collections;
+import java.util.Deque;
+import java.util.List;
+
+/**
+ * This class represents another expression enclosed in braces.
+ *
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public final class EmbracedNode extends CompositeNode {
+
+ // The node to embrace.
+ private final ExpressionNode value;
+
+ /**
+ * Creates a new expression node that embraces another.
+ *
+ * @param value The node to embrace.
+ */
+ public EmbracedNode(ExpressionNode value) {
+ this.value=value;
+ }
+
+ /** Returns the node enclosed by this */
+ public ExpressionNode getValue() { return value; }
+
+ @Override
+ public List<ExpressionNode> children() {
+ return Collections.singletonList(value);
+ }
+
+ @Override
+ public String toString(SerializationContext context, Deque<String> path, CompositeNode parent) {
+ String expression = value.toString(context, path, this);
+ if (value instanceof ReferenceNode) return expression;
+ return "(" + expression + ")";
+ }
+
+ @Override
+ public Value evaluate(Context context) {
+ return value.evaluate(context);
+ }
+
+ @Override
+ public CompositeNode setChildren(List<ExpressionNode> newChildren) {
+ if (newChildren.size() != 1)
+ throw new IllegalArgumentException("Expected 1 child but got " + newChildren.size());
+ return new EmbracedNode(newChildren.get(0));
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ExpressionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ExpressionNode.java
new file mode 100755
index 00000000000..05d998afd35
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ExpressionNode.java
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.io.Serializable;
+import java.util.Deque;
+
+/**
+ * Superclass of all expression nodes. Expression nodes have their identity determined by their content.
+ * All expression nodes are immutable.
+ *
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public abstract class ExpressionNode implements Serializable {
+
+ @Override
+ public final int hashCode() {
+ return toString().hashCode();
+ }
+
+ @Override
+ public final boolean equals(Object obj) {
+ return obj instanceof ExpressionNode && toString().equals(obj.toString());
+ }
+
+ @Override
+ public final String toString() {
+ return toString(new SerializationContext(), null, null);
+ }
+
+ /**
+ * Returns a script instance of this based on the supplied script functions.
+ *
+ * @param context the serialization context
+ * @param path the call path to this, used for cycle detection, or null if this is a root
+ * @param parent the parent node of this, or null if it a root
+ * @return the main script, referring to script instances.
+ */
+ public abstract String toString(SerializationContext context, Deque<String> path, CompositeNode parent);
+
+ /**
+ * Returns the value of evaluating this expression over the given context.
+ *
+ * @param context the variable bindings to use for this evaluation
+ * @throws IllegalArgumentException if there are variables which are not bound in the given map
+ */
+ public abstract Value evaluate(Context context);
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Function.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Function.java
new file mode 100644
index 00000000000..ecd8182a108
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Function.java
@@ -0,0 +1,55 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import java.io.Serializable;
+
+import static java.lang.Math.*;
+
+/**
+ * A scalar function
+ *
+ * @author bratseth
+ */
+public enum Function implements Serializable {
+
+ cosh { public double evaluate(double x, double y) { return cosh(x); } },
+ sinh { public double evaluate(double x, double y) { return sinh(x); } },
+ tanh { public double evaluate(double x, double y) { return tanh(x); } },
+ cos { public double evaluate(double x, double y) { return cos(x); } },
+ sin { public double evaluate(double x, double y) { return sin(x); } },
+ tan { public double evaluate(double x, double y) { return tan(x); } },
+ acos { public double evaluate(double x, double y) { return acos(x); } },
+ asin { public double evaluate(double x, double y) { return asin(x); } },
+ atan { public double evaluate(double x, double y) { return atan(x); } },
+ exp { public double evaluate(double x, double y) { return exp(x); } },
+ log10 { public double evaluate(double x, double y) { return log10(x); } },
+ log { public double evaluate(double x, double y) { return log(x); } },
+ sqrt { public double evaluate(double x, double y) { return sqrt(x); } },
+ ceil { public double evaluate(double x, double y) { return ceil(x); } },
+ fabs { public double evaluate(double x, double y) { return abs(x); } },
+ floor { public double evaluate(double x, double y) { return floor(x); } },
+ isNan { public double evaluate(double x, double y) { return Double.isNaN(x) ? 1.0 : 0.0; } },
+ atan2(2) { public double evaluate(double x, double y) { return atan2(x,y); } },
+ pow(2) { public double evaluate(double x, double y) { return pow(x,y); } },
+ ldexp(2) { public double evaluate(double x, double y) { return x*pow(2,y); } },
+ fmod(2) { public double evaluate(double x, double y) { return IEEEremainder(x,y); } },
+ min(2) { public double evaluate(double x, double y) { return min(x,y); } },
+ max(2) { public double evaluate(double x, double y) { return max(x,y); } };
+
+ private final int arity;
+
+ private Function() {
+ this(1);
+ }
+
+ private Function(int arity) {
+ this.arity = arity;
+ }
+
+ /** Perform the function on the input */
+ public abstract double evaluate(double x, double y);
+
+ /** Returns the number of arguments this function takes */
+ public int arity() { return arity; }
+
+} \ No newline at end of file
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/FunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/FunctionNode.java
new file mode 100755
index 00000000000..8ab403bff7a
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/FunctionNode.java
@@ -0,0 +1,90 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Deque;
+import java.util.List;
+
+/**
+ * Invocation of a native function.
+ *
+ * @author simon
+ * @author bratseth
+ */
+public final class FunctionNode extends CompositeNode {
+
+ /** The type of function. */
+ private final Function function;
+
+ /** The arguments to this function. */
+ private final Arguments arguments;
+
+ /* Creates an unary function node */
+ public FunctionNode(Function function, ExpressionNode argument) {
+ if (function.arity() != 1) throw new IllegalArgumentException(function + " is not unary");
+ this.function = function;
+ this.arguments = new Arguments(Collections.singletonList(argument));
+ }
+
+ /** Creates a binary function node */
+ public FunctionNode(Function function, ExpressionNode argument1, ExpressionNode argument2) {
+ if (function.arity() != 2) throw new IllegalArgumentException(function + " is not binary");
+ this.function = function;
+ List<ExpressionNode> argumentList = new ArrayList<>();
+ argumentList.add(argument1);
+ argumentList.add(argument2);
+ arguments=new Arguments(argumentList);
+ }
+
+ public Function getFunction() { return function; }
+
+ /** Returns the arguments of this */
+ @Override
+ public List<ExpressionNode> children() {
+ return arguments.expressions();
+ }
+
+ @Override
+ public String toString(SerializationContext context, Deque<String> path, CompositeNode parent) {
+ StringBuilder b=new StringBuilder(function.toString());
+ b.append("(");
+ for (int i = 0; i < this.arguments.expressions().size(); ++i) {
+ b.append(this.arguments.expressions().get(i).toString(context, path, this));
+ if (i < this.arguments.expressions().size() - 1) {
+ b.append(",");
+ }
+ }
+ b.append(")");
+ return b.toString();
+ }
+
+ @Override
+ public Value evaluate(Context context) {
+ if (arguments.expressions().size() == 0)
+ return DoubleValue.zero.function(function,DoubleValue.zero);
+
+ Value argument1 = arguments.expressions().get(0).evaluate(context);
+ if (arguments.expressions().size() == 1)
+ return argument1.function(function, DoubleValue.zero);
+
+ Value argument2 = arguments.expressions().get(1).evaluate(context);
+ return argument1.function(function,argument2);
+ }
+
+ /** Returns a new function node with the children replaced by the given children */
+ @Override
+ public FunctionNode setChildren(List<ExpressionNode> children) {
+ if (arguments.expressions().size() != children.size())
+ throw new IllegalArgumentException("Expected " + arguments.expressions().size() + " children but got " + children.size());
+ if (children.size() == 1)
+ return new FunctionNode(function, children.get(0));
+ else // binary
+ return new FunctionNode(function, children.get(0), children.get(1));
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/IfNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/IfNode.java
new file mode 100755
index 00000000000..994c3db9bac
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/IfNode.java
@@ -0,0 +1,86 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.*;
+
+/**
+ * A conditional branch of a ranking expression.
+ *
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ * @author bratseth
+ */
+public final class IfNode extends CompositeNode {
+
+ /** The expression nodes that make up this condition. */
+ private final ExpressionNode condition, trueExpression, falseExpression;
+
+ private final Double trueProbability;
+
+ public IfNode(ExpressionNode condition, ExpressionNode trueExpression, ExpressionNode falseExpression) {
+ this(condition, trueExpression, falseExpression, null);
+ }
+
+ /**
+ * Creates a new condition node.
+ *
+ * @param condition the condition of this
+ * @param trueExpression the expression to evaluate if the comparison is true
+ * @param falseExpression the expression to evaluate if the comparison is false
+ * @param trueProbability the probability that the condition will evaluate to true, or null if not known.
+ * @throws IllegalArgumentException if trueProbability is non-null and not between 0.0 and 1.0
+ */
+ public IfNode(ExpressionNode condition, ExpressionNode trueExpression, ExpressionNode falseExpression,
+ Double trueProbability) {
+ if (trueProbability != null && ( trueProbability < 0.0 || trueProbability > 1.0) )
+ throw new IllegalArgumentException("trueProbability must be a between 0.0 and 1.0, not " + trueProbability);
+ this.condition = condition;
+ this.trueProbability = trueProbability;
+ this.trueExpression = trueExpression;
+ this.falseExpression = falseExpression;
+ }
+
+ @Override
+ public List<ExpressionNode> children() {
+ List<ExpressionNode> children = new ArrayList<ExpressionNode>(4);
+ children.add(condition);
+ children.add(trueExpression);
+ children.add(falseExpression);
+ return Collections.unmodifiableList(children);
+ }
+
+ public ExpressionNode getCondition() { return condition; }
+
+ public ExpressionNode getTrueExpression() { return trueExpression; }
+
+ public ExpressionNode getFalseExpression() { return falseExpression; }
+
+ /** The average probability that the condition of this node will evaluate to true, or null if not known */
+ public Double getTrueProbability() { return trueProbability; }
+
+ @Override
+ public String toString(SerializationContext context, Deque<String> path, CompositeNode parent) {
+ return "if (" +
+ condition.toString(context, path, this) + ", " +
+ trueExpression.toString(context, path, this) + ", " +
+ falseExpression.toString(context, path, this) +
+ (trueProbability != null ? ", " + trueProbability : "") + ")";
+ }
+
+ @Override
+ public Value evaluate(Context context) {
+ if (condition.evaluate(context).asBoolean())
+ return trueExpression.evaluate(context);
+ else
+ return falseExpression.evaluate(context);
+ }
+
+ @Override
+ public IfNode setChildren(List<ExpressionNode> children) {
+ if (children.size() != 3) throw new IllegalArgumentException("Expected 3 children but got " + children.size());
+ return new IfNode(children.get(0), children.get(1), children.get(2));
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NameNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NameNode.java
new file mode 100755
index 00000000000..eee729fa3a8
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NameNode.java
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.Deque;
+
+/**
+ * An opaque name in a ranking expression. This is used to represent names passed to the context
+ * and interpreted by the given context in a way which is opaque to the ranking expressions.
+ *
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public final class NameNode extends ExpressionNode {
+
+ private final String name;
+
+ public NameNode(String name) {
+ this.name = name;
+ }
+
+ public String getValue() {
+ return name;
+ }
+
+ @Override
+ public String toString(SerializationContext context, Deque<String> path, CompositeNode parent) {
+ return name;
+ }
+
+ @Override
+ public Value evaluate(Context context) {
+ throw new RuntimeException("Name nodes should never be evaluated");
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NegativeNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NegativeNode.java
new file mode 100644
index 00000000000..11feddb919e
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NegativeNode.java
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.Collections;
+import java.util.Deque;
+import java.util.List;
+
+/**
+ * A node which flips the sign of the value produced from the nested expression
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class NegativeNode extends CompositeNode {
+
+ private final ExpressionNode value;
+
+ /** Constructs a new negative node */
+ public NegativeNode(ExpressionNode value) {
+ this.value = value;
+ }
+
+ /** Returns the node creating the value negated by this */
+ public ExpressionNode getValue() { return value; }
+
+ @Override
+ public List<ExpressionNode> children() {
+ return Collections.singletonList(value);
+ }
+
+ @Override
+ public String toString(SerializationContext context, Deque<String> path, CompositeNode parent) {
+ return "-" + value.toString(context, path, parent);
+ }
+
+ @Override
+ public Value evaluate(Context context) {
+ return value.evaluate(context).negate();
+ }
+
+ @Override
+ public NegativeNode setChildren(List<ExpressionNode> children) {
+ if (children.size() != 1) throw new IllegalArgumentException("Expected 1 children but got " + children.size());
+ return new NegativeNode(children.get(0));
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java
new file mode 100755
index 00000000000..2968b414cb8
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java
@@ -0,0 +1,119 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.yahoo.searchlib.rankingexpression.ExpressionFunction;
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.ArrayDeque;
+import java.util.Deque;
+import java.util.List;
+
+/**
+ * A node referring either to a value in the context or to another named ranking expression.
+ *
+ * @author simon
+ * @author bratseth
+ */
+public final class ReferenceNode extends CompositeNode {
+
+ private final String name, output;
+
+ private final Arguments arguments;
+
+ public ReferenceNode(String name) {
+ this(name, null, null);
+ }
+
+ public ReferenceNode(String name, List<? extends ExpressionNode> arguments, String output) {
+ this.name = name;
+ this.arguments = arguments != null ? new Arguments(arguments) : new Arguments();
+ this.output = output;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ /** Returns the arguments, never null */
+ public Arguments getArguments() { return arguments; }
+
+ /** Returns a copy of this where the arguments are replaced by the given arguments */
+ public ReferenceNode setArguments(List<ExpressionNode> arguments) {
+ return new ReferenceNode(name, arguments, output);
+ }
+
+ public String getOutput() {
+ return output;
+ }
+
+ /** Returns a copy of this node with a modified output */
+ public ReferenceNode setOutput(String output) {
+ return new ReferenceNode(name, arguments.expressions(), output);
+ }
+
+ /** Returns an empty list as this has no children */
+ @Override
+ public List<ExpressionNode> children() { return arguments.expressions(); }
+
+ @Override
+ public String toString(SerializationContext context, Deque<String> path, CompositeNode parent) {
+ if (path == null)
+ path = new ArrayDeque<>();
+ String myName = this.name;
+ String myOutput = this.output;
+ List<ExpressionNode> myArguments = this.arguments.expressions();
+
+ String resolvedArgument = context.getBinding(myName);
+ if (resolvedArgument != null && this.arguments.expressions().size() == 0 && myOutput == null) {
+ // Replace this whole node with the value of the argument value that it maps to
+ myName = resolvedArgument;
+ myArguments = null;
+ myOutput = null;
+ } else if (context.getFunction(myName) != null) {
+ // Replace this whole node with a reference to another script.
+ ExpressionFunction function = context.getFunction(myName);
+ if (function != null && myArguments != null && function.arguments().size() == myArguments.size() && myOutput == null) {
+ String myPath = name + this.arguments.expressions();
+ if (path.contains(myPath)) {
+ throw new IllegalStateException("Cycle in ranking expression function: " + path);
+ }
+ path.addLast(myPath);
+ ExpressionFunction.Instance instance = function.expand(context, myArguments, path);
+ path.removeLast();
+ context.addFunctionSerialization(RankingExpression.propertyName(instance.getName()), instance.getExpressionString());
+ myName = "rankingExpression(" + instance.getName() + ")";
+ myArguments = null;
+ myOutput = null;
+ }
+ }
+ // Always print the same way, the magic is already done.
+ StringBuilder ret = new StringBuilder(myName);
+ if (myArguments != null && myArguments.size() > 0) {
+ ret.append("(");
+ for (int i = 0; i < myArguments.size(); ++i) {
+ ret.append(myArguments.get(i).toString(context, path, this));
+ if (i < myArguments.size() - 1) {
+ ret.append(",");
+ }
+ }
+ ret.append(")");
+ }
+ ret.append(myOutput != null ? "." + myOutput : "");
+ return ret.toString();
+ }
+
+ @Override
+ public Value evaluate(Context context) {
+ if (arguments.expressions().size()==0 && output==null)
+ return context.get(name);
+ return context.get(name, arguments, output);
+ }
+
+ @Override
+ public CompositeNode setChildren(List<ExpressionNode> newChildren) {
+ return new ReferenceNode(name, newChildren, output);
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SerializationContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SerializationContext.java
new file mode 100644
index 00000000000..8ea0a886b65
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SerializationContext.java
@@ -0,0 +1,116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.google.common.collect.ImmutableMap;
+import com.yahoo.searchlib.rankingexpression.ExpressionFunction;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Context needed to serialize an expression to a string. This has the lifetime of a single serialization
+ *
+ * @author bratseth
+ */
+public class SerializationContext {
+
+ /** Expression functions indexed by name */
+ private final ImmutableMap<String, ExpressionFunction> functions;
+
+ /** A cache of already serialized expressions indexed by name */
+ private final Map<String, String> serializedFunctions;
+
+ /** Mapping from argument names to the expressions they resolve to */
+ public final Map<String, String> bindings = new HashMap<>();
+
+ /** Create a context for a single serialization task */
+ public SerializationContext() {
+ this(Collections.emptyList());
+ }
+
+ /** Create a context for a single serialization task */
+ public SerializationContext(Collection<ExpressionFunction> functions) {
+ this(functions, Collections.emptyMap(), new LinkedHashMap<>());
+ }
+
+ /** Create a context for a single serialization task */
+ public SerializationContext(Map<String, ExpressionFunction> functions) {
+ this(functions.values());
+ }
+
+ /** Create a context for a single serialization task */
+ public SerializationContext(List<ExpressionFunction> functions, Map<String, String> bindings) {
+ this(functions, bindings, new LinkedHashMap<>());
+ }
+
+ /**
+ * Create a context for a single serialization task
+ *
+ * @param functions the functions of this
+ * @param bindings the arguments of this
+ * @param serializedFunctions a cache of serializedFunctions - the ownership of this map
+ * is <b>transferred</b> to this and will be modified in it
+ */
+ public SerializationContext(Collection<ExpressionFunction> functions, Map<String, String> bindings,
+ Map<String, String> serializedFunctions) {
+ this(toMap(functions), bindings, serializedFunctions);
+ }
+
+ private static ImmutableMap<String, ExpressionFunction> toMap(Collection<ExpressionFunction> list) {
+ ImmutableMap.Builder<String,ExpressionFunction> mapBuilder = new ImmutableMap.Builder<>();
+ for (ExpressionFunction function : list)
+ mapBuilder.put(function.getName(), function);
+ return mapBuilder.build();
+ }
+
+ /**
+ * Create a context for a single serialization task
+ *
+ * @param functions the functions of this
+ * @param bindings the arguments of this
+ * @param serializedFunctions a cache of serializedFunctions - the ownership of this map
+ * is <b>transferred</b> to this and will be modified in it
+ */
+ public SerializationContext(ImmutableMap<String,ExpressionFunction> functions, Map<String, String> bindings,
+ Map<String, String> serializedFunctions) {
+ this.functions = functions;
+ this.serializedFunctions = serializedFunctions;
+ if (bindings != null)
+ this.bindings.putAll(bindings);
+ }
+
+ /**
+ * Returns a function or null if it isn't defined in this context
+ */
+ public ExpressionFunction getFunction(String name) { return functions.get(name); }
+
+ /** Adds the serialization of a function */
+ public void addFunctionSerialization(String name, String expressionString) {
+ serializedFunctions.put(name, expressionString);
+ }
+
+ /** Returns the existing serialization of a function, or null if none */
+ public String getFunctionSerialization(String name) {
+ return serializedFunctions.get(name);
+ }
+
+ /**
+ * Returns the resolution of an argument, or null if it isn't defined in this context
+ */
+ public String getBinding(String name) { return bindings.get(name); }
+
+ /**
+ * Returns a new context which shares the functions and serialized function map with this but has different
+ * arguments.
+ */
+ public SerializationContext createBinding(Map<String, String> arguments) {
+ return new SerializationContext(this.functions, arguments, this.serializedFunctions);
+ }
+
+ public Map<String, String> serializedFunctions() { return serializedFunctions; }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SetMembershipNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SetMembershipNode.java
new file mode 100644
index 00000000000..bb3b028f696
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SetMembershipNode.java
@@ -0,0 +1,72 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.google.common.collect.ImmutableList;
+import com.yahoo.searchlib.rankingexpression.evaluation.BooleanValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.*;
+
+/**
+ * A node which returns true or false depending on a set membership test
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ * @since 5.1.21
+ */
+public class SetMembershipNode extends BooleanNode {
+
+ private final ExpressionNode testValue;
+
+ private final ImmutableList<ExpressionNode> setValues;
+
+ public SetMembershipNode(ExpressionNode testValue, List<ExpressionNode> setValues) {
+ this.testValue = testValue;
+ this.setValues = ImmutableList.copyOf(setValues);
+ }
+
+ /** The value to check for membership in the set */
+ public ExpressionNode getTestValue() { return testValue; }
+
+ /** Returns an immutable list of the values of the set */
+ public List<ExpressionNode> getSetValues() { return setValues; }
+
+ @Override
+ public List<ExpressionNode> children() {
+ ArrayList<ExpressionNode> children = new ArrayList<>();
+ children.add(testValue);
+ children.addAll(setValues);
+ return children;
+ }
+
+ @Override
+ public String toString(SerializationContext context, Deque<String> path, CompositeNode parent) {
+ StringBuilder b = new StringBuilder(testValue.toString(context, path, this));
+ b.append(" in [");
+ for (int i = 0, len = setValues.size(); i < len; ++i) {
+ b.append(setValues.get(i).toString(context, path, this));
+ if (i < len - 1) {
+ b.append(", ");
+ }
+ }
+ b.append("]");
+ return b.toString();
+ }
+
+ @Override
+ public Value evaluate(Context context) {
+ Value value = testValue.evaluate(context);
+ for (ExpressionNode setValue : setValues) {
+ if (setValue.evaluate(context).equals(value))
+ return new BooleanValue(true);
+ }
+ return new BooleanValue(false);
+ }
+
+ @Override
+ public SetMembershipNode setChildren(List<ExpressionNode> children) {
+ if (children.size()<1) throw new IllegalArgumentException("A set membership test must have at least 1 child");
+ return new SetMembershipNode(children.get(0), children.subList(1, children.size()));
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorMatchNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorMatchNode.java
new file mode 100644
index 00000000000..af309b3e8d8
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorMatchNode.java
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.google.common.annotations.Beta;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.TensorValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.ArrayList;
+import java.util.Deque;
+import java.util.List;
+
+/**
+ * @author bratseth
+ */
+ @Beta
+public class TensorMatchNode extends CompositeNode {
+
+ private final ExpressionNode left, right;
+
+ public TensorMatchNode(ExpressionNode left, ExpressionNode right) {
+ this.left = left;
+ this.right = right;
+ }
+
+ @Override
+ public List<ExpressionNode> children() {
+ List<ExpressionNode> children = new ArrayList<>(2);
+ children.add(left);
+ children.add(right);
+ return children;
+ }
+
+ @Override
+ public CompositeNode setChildren(List<ExpressionNode> children) {
+ if ( children.size() != 2)
+ throw new IllegalArgumentException("A match product must have two children");
+ return new TensorMatchNode(children.get(0), children.get(1));
+
+ }
+
+ @Override
+ public String toString(SerializationContext context, Deque<String> path, CompositeNode parent) {
+ return "match(" + left.toString(context, path, parent) + ", " + right.toString(context, path, parent) + ")";
+ }
+
+ @Override
+ public Value evaluate(Context context) {
+ return asTensor(left.evaluate(context)).match(asTensor(right.evaluate(context)));
+ }
+
+ private TensorValue asTensor(Value value) {
+ if ( ! (value instanceof TensorValue))
+ throw new IllegalArgumentException("Attempted to take the tensor product with an argument which is " +
+ "not a tensor: " + value);
+ return (TensorValue)value;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorSumNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorSumNode.java
new file mode 100644
index 00000000000..a1f83157e20
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorSumNode.java
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.google.common.annotations.Beta;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.TensorValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+
+import java.util.Collections;
+import java.util.Deque;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * A node which sums over all cells in the argument tensor
+ *
+ * @author bratseth
+ */
+ @Beta
+public class TensorSumNode extends CompositeNode {
+
+ /** The tensor to sum */
+ private final ExpressionNode argument;
+
+ /** The dimension to sum over, or empty to sum all cells to a scalar */
+ private final Optional<String> dimension;
+
+ public TensorSumNode(ExpressionNode argument, Optional<String> dimension) {
+ this.argument = argument;
+ this.dimension = dimension;
+ }
+
+ @Override
+ public List<ExpressionNode> children() {
+ return Collections.singletonList(argument);
+ }
+
+ @Override
+ public CompositeNode setChildren(List<ExpressionNode> children) {
+ if (children.size() != 1) throw new IllegalArgumentException("A tensor sum node must have one tensor argument");
+ return new TensorSumNode(children.get(0), dimension);
+ }
+
+ @Override
+ public String toString(SerializationContext context, Deque<String> path, CompositeNode parent) {
+ return "sum(" +
+ argument.toString(context, path, parent) +
+ ( dimension.isPresent() ? ", " + dimension.get() : "" ) +
+ ")";
+ }
+
+ @Override
+ public Value evaluate(Context context) {
+ Value argumentValue = argument.evaluate(context);
+ if ( ! ( argumentValue instanceof TensorValue))
+ throw new IllegalArgumentException("Attempted to take the tensor sum of argument '" + argument + "', " +
+ "but this returns " + argumentValue + ", not a tensor");
+ TensorValue tensorArgument = (TensorValue)argumentValue;
+ if (dimension.isPresent())
+ return tensorArgument.sum(dimension.get());
+ else
+ return tensorArgument.sum();
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TruthOperator.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TruthOperator.java
new file mode 100644
index 00000000000..26e8b183c21
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TruthOperator.java
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import java.io.Serializable;
+
+/**
+ * A mathematical operator
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public enum TruthOperator implements Serializable {
+
+ SMALLER("<") { public boolean evaluate(double x, double y) { return x<y; } },
+ SMALLEREQUAL("<=") { public boolean evaluate(double x, double y) { return x<=y; } },
+ EQUAL("==") { public boolean evaluate(double x, double y) { return x==y; } },
+ APPROX_EQUAL("~=") { public boolean evaluate(double x, double y) { return approxEqual(x,y); } },
+ LARGER(">") { public boolean evaluate(double x, double y) { return x>y; } },
+ LARGEREQUAL(">=") { public boolean evaluate(double x, double y) { return x>=y; } };
+
+ private final String operatorString;
+
+ TruthOperator(String operatorString) {
+ this.operatorString=operatorString;
+ }
+
+ /** Perform the truth operation on the input */
+ public abstract boolean evaluate(double x, double y);
+
+ public @Override String toString() { return operatorString; }
+
+ public static TruthOperator fromString(String string) {
+ for (TruthOperator operator : values())
+ if (operator.toString().equals(string))
+ return operator;
+ throw new IllegalArgumentException("Illegal truth operator '" + string + "'");
+ }
+
+ private static boolean approxEqual(double x,double y) {
+ if (y < -1.0 || y > 1.0) {
+ x = Math.nextAfter(x/y, 1.0);
+ y = 1.0;
+ } else {
+ x = Math.nextAfter(x, y);
+ }
+ return x==y;
+ }
+
+} \ No newline at end of file
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/package-info.java
new file mode 100644
index 00000000000..d6a27aae0f8
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/package-info.java
@@ -0,0 +1,7 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@PublicApi
+@ExportPackage
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import com.yahoo.api.annotations.PublicApi;
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencer.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencer.java
new file mode 100644
index 00000000000..bd9ad43f155
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencer.java
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.transform;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.TensorValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+import com.yahoo.searchlib.rankingexpression.rule.CompositeNode;
+import com.yahoo.searchlib.rankingexpression.rule.ConstantNode;
+import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode;
+import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Replaces "features" which found in the given constants by their constant value
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class ConstantDereferencer extends ExpressionTransformer {
+
+ /** The map of constants to dereference */
+ private final Map<String, Value> constants;
+
+ public ConstantDereferencer(Map<String, Value> constants) {
+ this.constants = constants;
+ }
+
+ @Override
+ public ExpressionNode transform(ExpressionNode node) {
+ if (node instanceof ReferenceNode)
+ return transformFeature((ReferenceNode) node);
+ else if (node instanceof CompositeNode)
+ return transformChildren((CompositeNode)node);
+ else
+ return node;
+ }
+
+ private ExpressionNode transformFeature(ReferenceNode node) {
+ if (!node.getArguments().isEmpty())
+ return transformArguments(node);
+ else
+ return transformConstantReference(node);
+ }
+
+ private ExpressionNode transformArguments(ReferenceNode node) {
+ List<ExpressionNode> arguments = node.getArguments().expressions();
+ List<ExpressionNode> transformedArguments = new ArrayList<>(arguments.size());
+ for (ExpressionNode argument : arguments)
+ transformedArguments.add(transform(argument));
+ return node.setArguments(transformedArguments);
+ }
+
+ private ExpressionNode transformConstantReference(ReferenceNode node) {
+ Value value = constants.get(node.getName());
+ if (value == null || (value instanceof TensorValue)) {
+ return node; // not a value constant reference
+ }
+ return new ConstantNode(value.freeze());
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ExpressionTransformer.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ExpressionTransformer.java
new file mode 100644
index 00000000000..d8995bd8752
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ExpressionTransformer.java
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.transform;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.rule.CompositeNode;
+import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Superclass of expression transformers
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public abstract class ExpressionTransformer {
+
+ public RankingExpression transform(RankingExpression expression) {
+ return new RankingExpression(expression.getName(), transform(expression.getRoot()));
+ }
+
+ /** Transforms an expression node and returns the transformed node */
+ public abstract ExpressionNode transform(ExpressionNode node);
+
+ /**
+ * Utility method which calls transform on each child of the given node and return the resulting transformed
+ * composite
+ */
+ protected CompositeNode transformChildren(CompositeNode node) {
+ List<ExpressionNode> children = node.children();
+ List<ExpressionNode> transformedChildren = new ArrayList<>(children.size());
+ for (ExpressionNode child : children)
+ transformedChildren.add(transform(child));
+ return node.setChildren(transformedChildren);
+ }
+
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/Simplifier.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/Simplifier.java
new file mode 100644
index 00000000000..5b5a06c99bf
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/Simplifier.java
@@ -0,0 +1,131 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.transform;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.BooleanValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+import com.yahoo.searchlib.rankingexpression.rule.ArithmeticNode;
+import com.yahoo.searchlib.rankingexpression.rule.ArithmeticOperator;
+import com.yahoo.searchlib.rankingexpression.rule.CompositeNode;
+import com.yahoo.searchlib.rankingexpression.rule.ConstantNode;
+import com.yahoo.searchlib.rankingexpression.rule.EmbracedNode;
+import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode;
+import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode;
+import com.yahoo.searchlib.rankingexpression.rule.IfNode;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Performs simple algebraic simplification of expressions
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class Simplifier extends ExpressionTransformer {
+
+ @Override
+ public ExpressionNode transform(ExpressionNode node) {
+ if (node instanceof CompositeNode)
+ node = transformChildren((CompositeNode) node); // depth first
+ if (node instanceof IfNode)
+ node = transformIf((IfNode) node);
+ if (node instanceof EmbracedNode && hasSingleUndividableChild((EmbracedNode)node))
+ node = ((EmbracedNode)node).children().get(0);
+ if (node instanceof ArithmeticNode)
+ node = transformArithmetic((ArithmeticNode) node);
+ return node;
+ }
+
+ private boolean hasSingleUndividableChild(EmbracedNode node) {
+ if (node.children().size() > 1) return false;
+ if (node.children().get(0) instanceof ArithmeticNode) return false;
+ return true;
+ }
+
+ private ExpressionNode transformArithmetic(ArithmeticNode node) {
+ if (node.children().size() > 1) {
+ List<ExpressionNode> children = new ArrayList<>(node.children());
+ List<ArithmeticOperator> operators = new ArrayList<>(node.operators());
+ for (ArithmeticOperator operator : ArithmeticOperator.operatorsByPrecedence)
+ transform(operator, children, operators);
+ node = new ArithmeticNode(children, operators);
+ }
+
+ if (isConstant(node))
+ return new ConstantNode(node.evaluate(null));
+ else if (allMultiplicationOrDivision(node) && hasZero(node)) // disregarding the /0 case
+ return new ConstantNode(new DoubleValue(0));
+ else
+ return node;
+ }
+
+ private void transform(ArithmeticOperator operator, List<ExpressionNode> children, List<ArithmeticOperator> operators) {
+ int i = 0;
+ while (i < children.size()-1) {
+ if ( ! operators.get(i).equals(operator)) {
+ i++;
+ continue;
+ }
+
+ ExpressionNode child1 = children.get(i);
+ ExpressionNode child2 = children.get(i + 1);
+ if (isConstant(child1) && isConstant(child2) && hasPrecedence(operators, i)) {
+ Value evaluated = new ArithmeticNode(child1, operators.remove(i), child2).evaluate(null);
+ children.set(i, new ConstantNode(evaluated.freeze()));
+ children.remove(i+1);
+ }
+ else { // try the next index
+ i++;
+ }
+ }
+ }
+
+ /**
+ * Returns true if the operator at i binds at least as strongly as the neighbouring operators on each side (if any).
+ * This check works because we simplify by decreasing precedence, so neighbours will either be single constant values
+ * or a more complex expression that can't be simplified and hence also prevents the simplification in question here.
+ */
+ private boolean hasPrecedence(List<ArithmeticOperator> operators, int i) {
+ if (i > 0 && operators.get(i-1).hasPrecedenceOver(operators.get(i))) return false;
+ if (i < operators.size()-1 && operators.get(i+1).hasPrecedenceOver(operators.get(i))) return false;
+ return true;
+ }
+
+ private ExpressionNode transformIf(IfNode node) {
+ if ( ! isConstant(node.getCondition())) return node;
+
+ if (((BooleanValue)node.getCondition().evaluate(null)).asBoolean())
+ return node.getTrueExpression();
+ else
+ return node.getFalseExpression();
+ }
+
+ private boolean allMultiplicationOrDivision(ArithmeticNode node) {
+ for (ArithmeticOperator o : node.operators())
+ if (o == ArithmeticOperator.PLUS || o == ArithmeticOperator.MINUS)
+ return false;
+ return true;
+ }
+
+ private boolean hasZero(ArithmeticNode node) {
+ for (ExpressionNode child : node.children()) {
+ if ( ! (child instanceof ConstantNode)) continue;
+ ConstantNode constant = (ConstantNode)child;
+ if ( ! constant.getValue().hasDouble()) return false;
+ if (constant.getValue().asDouble() == 0.0)
+ return true;
+ }
+ return false;
+ }
+
+ private boolean isConstant(ExpressionNode node) {
+ if (node instanceof ConstantNode) return true;
+ if (node instanceof ReferenceNode) return false;
+ if ( ! (node instanceof CompositeNode)) return false;
+ for (ExpressionNode child : ((CompositeNode)node).children()) {
+ if ( ! isConstant(child)) return false;
+ }
+ return true;
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/package-info.java
new file mode 100644
index 00000000000..da4e4f64615
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/package-info.java
@@ -0,0 +1,6 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+package com.yahoo.searchlib.rankingexpression.transform;
+
+import com.yahoo.api.annotations.PublicApi;
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/TreeNetConverter.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/TreeNetConverter.java
new file mode 100755
index 00000000000..c147c3a33b8
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/TreeNetConverter.java
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.treenet;
+
+import com.yahoo.searchlib.treenet.parser.TreeNetParser;
+
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class TreeNetConverter {
+
+ /**
+ * Implements an application main function so that the converter can be used as a command-line tool.
+ *
+ * @param args List of arguments.
+ */
+ public static void main(String[] args) {
+ if (args.length != 1) {
+ System.err.println("Usage: TreeNetConverter <filename>");
+ System.exit(1);
+ }
+ try {
+ TreeNetParser parser = new TreeNetParser(new FileReader(args[0]));
+ System.out.println(parser.treeNet().toRankingExpression());
+ } catch (FileNotFoundException e) {
+ System.err.println("Could not find file '" + args[0] + "'.");
+ System.exit(1);
+ } catch (Exception e) {
+ System.err.println("An error occured while parsing the content of file '" + args[0] + "': " + e);
+ System.exit(1);
+ }
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/package-info.java
new file mode 100644
index 00000000000..debffbdcf5b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/package-info.java
@@ -0,0 +1,5 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+package com.yahoo.searchlib.treenet;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/parser/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/parser/package-info.java
new file mode 100644
index 00000000000..f3244457c66
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/parser/package-info.java
@@ -0,0 +1,5 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+package com.yahoo.searchlib.treenet.parser;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/ComparisonCondition.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/ComparisonCondition.java
new file mode 100755
index 00000000000..1855a8a5674
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/ComparisonCondition.java
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.treenet.rule;
+
+import com.yahoo.java7compat.Util;
+
+/**
+ * Represents a condition which comparing two values
+ *
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class ComparisonCondition extends Condition {
+
+ private final double rhs;
+
+ /**
+ * Constructs a new instance of this class.
+ *
+ * @param lhs The name of the feature to compare to a constant.
+ * @param rhs The constant to compare the feature with.
+ * @param ift The label to jump to if left &lt; right.
+ * @param iff The label to jump to if left &gt;= right;
+ */
+ public ComparisonCondition(String lhs, double rhs, String ift, String iff) {
+ super(lhs, ift, iff);
+ this.rhs = rhs;
+ }
+
+ /**
+ * Returns the constant to compare the feature with.
+ *
+ * @return The constant.
+ */
+ public double getConstant() { return rhs; }
+
+ @Override
+ public String conditionToRankingExpression() {
+ return "< " + Util.toJava7String(rhs);
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Condition.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Condition.java
new file mode 100644
index 00000000000..4506f4970b0
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Condition.java
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.treenet.rule;
+
+import java.util.Iterator;
+
+/**
+ * Represents a condition
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public abstract class Condition extends TreeNode {
+
+ private final String leftValue;
+ private final String trueLabel;
+ private final String falseLabel;
+
+ public Condition(String leftValue, String trueLabel, String falseLabel) {
+ this.leftValue = leftValue;
+ this.trueLabel = trueLabel;
+ this.falseLabel = falseLabel;
+ }
+
+ /** Returns the name of the feature to compare to a constant. */
+ public String getLeftValue() { return leftValue; }
+
+ /** Return the label to jump to if this condition is true. */
+ public String getTrueLabel() { return trueLabel; }
+
+ /** Return the label to jump to if this condition is false. */
+ public String getFalseLabel() { return falseLabel; }
+
+ @Override
+ public final String toRankingExpression() {
+ StringBuilder b = new StringBuilder("if (");
+ b.append(getLeftValue());
+ b.append(" ");
+ b.append(conditionToRankingExpression());
+ b.append(", ");
+ b.append(getParent().getNodes().get(getTrueLabel()).toRankingExpression());
+ b.append(", ");
+ b.append(getParent().getNodes().get(getFalseLabel()).toRankingExpression());
+ b.append(")");
+ return b.toString();
+ }
+
+ /**
+ * Returns the ranking expression string for the condition part of this condition, i.e the ... part of
+ * <pre>
+ * if(leftValue ..., trueExpression, falseExpression)
+ * </pre>
+ */
+ protected abstract String conditionToRankingExpression();
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Response.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Response.java
new file mode 100755
index 00000000000..347dd84f419
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Response.java
@@ -0,0 +1,45 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.treenet.rule;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class Response extends TreeNode {
+
+ // The id of the next tree to run after this.
+ private final Double value;
+
+ // The value of this response.
+ private final String next;
+
+ /**
+ * Constructs a new response.
+ *
+ * @param next The id of the next tree to run after this.
+ * @param value The value of this response.
+ */
+ public Response(Double value, String next) {
+ super();
+ this.value = value;
+ this.next = next;
+ }
+
+ /**
+ * Returns the value of this response.
+ */
+ public Double getValue() {
+ return value;
+ }
+
+ /**
+ * Returns the id of the next tree to run after this.
+ */
+ public String getNext() {
+ return next;
+ }
+
+ @Override
+ public String toRankingExpression() {
+ return value.toString();
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/SetMembershipCondition.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/SetMembershipCondition.java
new file mode 100755
index 00000000000..95841bf829f
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/SetMembershipCondition.java
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.treenet.rule;
+
+import com.yahoo.java7compat.Util;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Represents a set membership test on the form <code>feature IN (integer1, integer2 ...)</code>
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ * @since 5.1.21
+ */
+public class SetMembershipCondition extends Condition {
+
+ private final List<Object> setValues;
+
+ /**
+ * Constructs a new instance of this class.
+ *
+ * @param testValue the name of the feature to test
+ * @param setValues the set of values to compare to
+ * @param trueLabel the label to jump to if the value is in the set
+ * @param falseLabel the label to jumt to if the value is not in the set
+ */
+ public SetMembershipCondition(String testValue, List<Object> setValues, String trueLabel, String falseLabel) {
+ super(testValue, trueLabel, falseLabel);
+ this.setValues = Collections.unmodifiableList(new ArrayList<>(setValues));
+ }
+
+ /** Returns the unmodifiable set of values to check */
+ public List<Object> getSetValues() { return setValues; }
+
+ @Override
+ protected String conditionToRankingExpression() {
+ StringBuilder b = new StringBuilder("in [");
+ for (Iterator<Object> i = setValues.iterator(); i.hasNext(); ) {
+ Object value = i.next();
+ if (value instanceof String)
+ b.append("\"").append(value).append("\"");
+ else if (value instanceof Integer)
+ b.append(value);
+ else
+ throw new RuntimeException("Excepted a string or integer in a set membership test, not a " +
+ value.getClass() + ": " + value);
+
+ if (i.hasNext())
+ b.append(",");
+ }
+ b.append("]");
+ return b.toString();
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Tree.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Tree.java
new file mode 100755
index 00000000000..2a7191baeba
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Tree.java
@@ -0,0 +1,110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.treenet.rule;
+
+import java.util.Map;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class Tree {
+
+ private final String name;
+
+ // The parent tree net of this.
+ private TreeNet parent;
+
+ // Returns the id of the next tree to run after this.
+ private String next;
+
+ // The initial response value of this tree, may be null.
+ private final Double value;
+
+ // The id of the first condition or response to run in this tree.
+ private final String begin;
+
+ // All named nodes of this tree.
+ private final Map<String, TreeNode> nodes;
+
+ /**
+ * Constructs a new tree.
+ *
+ * @param name The name of this tree, used for error outputs.
+ * @param value The initial response value of this tree, may be null.
+ * @param begin The id of the first condition or response to run in this tree.
+ * @param nodes All named nodes of this tree.
+ */
+ public Tree(String name, Double value, String begin, Map<String, TreeNode> nodes) {
+ this.name = name;
+ this.value = value;
+ this.begin = begin;
+ this.nodes = nodes;
+
+ this.next = null;
+ for (TreeNode node : this.nodes.values()) {
+ node.setParent(this);
+ if (node instanceof Response) {
+ String next = ((Response)node).getNext();
+ if (this.next == null) {
+ this.next = next;
+ } else if (!this.next.equals(next)) {
+ throw new IllegalStateException("Not all child nodes of tree '" + name + "' agree on the next " +
+ "tree to run. Initial name was '" + this.next + "', conflicting " +
+ "name is '" + next + "'.");
+ }
+ }
+ }
+ }
+
+ public String getName() { return name; }
+
+ /**
+ * Returns the parent tree net of this.
+ */
+ public TreeNet getParent() { return parent; }
+
+ /**
+ * Sets the parent tree net of this.
+ *
+ * @param parent The parent tree net.
+ * @return This, to allow chaining.
+ */
+ public Tree setParent(TreeNet parent) {
+ this.parent = parent;
+ return this;
+ }
+
+ /**
+ * Returns the id of the next tree to run after this.
+ */
+ public String getNext() {
+ return next;
+ }
+
+ /**
+ * Returns the initial response value of this tree, may be null.
+ */
+ public Double getValue() {
+ return value;
+ }
+
+ /**
+ * Returns the id of the first condition or response to run in this tree.
+ */
+ public String getBegin() {
+ return begin;
+ }
+
+ /**
+ * Returns all named nodes of this tree.
+ */
+ public Map<String, TreeNode> getNodes() {
+ return nodes;
+ }
+
+ /**
+ * Returns a ranking expression equivalent of this tree.
+ */
+ public String toRankingExpression() {
+ return nodes.get(begin).toRankingExpression();
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNet.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNet.java
new file mode 100755
index 00000000000..1db13b6c12e
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNet.java
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.treenet.rule;
+
+import java.util.Map;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class TreeNet {
+
+ // The id of the first tree to run in this net.
+ private String begin;
+
+ // All named trees of this net.
+ private final Map<String, Tree> trees;
+
+ /**
+ * Constructs a new tree net.
+ *
+ * @param begin The id of the first tree to run in this net.
+ * @param trees All named trees of this net.
+ */
+ public TreeNet(String begin, Map<String, Tree> trees) {
+ this.begin = begin;
+ this.trees = trees;
+ for (Tree tree : this.trees.values()) {
+ tree.setParent(this);
+ }
+ }
+
+ /**
+ * Returns the id of the first tree to run in this net.
+ */
+ public String getBegin() {
+ return begin;
+ }
+
+ /**
+ * Returns all named trees of this net.
+ */
+ public Map<String, Tree> getTrees() {
+ return trees;
+ }
+
+ /**
+ * Returns a ranking expression equivalent of this net.
+ */
+ public String toRankingExpression() {
+ StringBuilder ret = new StringBuilder();
+ String next = begin;
+ while (next != null) {
+ Tree tree = trees.get(next);
+ if (tree.getBegin() != null) {
+ if (ret.length() > 0) {
+ ret.append(" + \n");
+ }
+ ret.append(tree.toRankingExpression());
+ }
+ next = tree.getNext();
+ }
+ return ret.toString();
+ }
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNode.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNode.java
new file mode 100755
index 00000000000..a637adafc73
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNode.java
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.treenet.rule;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public abstract class TreeNode {
+
+ // The parent tree of this.
+ private Tree parent = null;
+
+ /**
+ * Returns the parent tree of this.
+ */
+ public Tree getParent() {
+ return parent;
+ }
+
+ /**
+ * Sets the parent tree net of this.
+ *
+ * @param parent The parent tree net.
+ * @return This, to allow chaining.
+ */
+ public TreeNode setParent(Tree parent) {
+ this.parent = parent;
+ return this;
+ }
+
+ /**
+ * Returns a ranking expression equivalent of this net.
+ */
+ public abstract String toRankingExpression();
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/package-info.java
new file mode 100644
index 00000000000..aae05b07627
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/package-info.java
@@ -0,0 +1,5 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+package com.yahoo.searchlib.treenet.rule;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/searchlib/src/main/javacc/RankingExpressionParser.jj b/searchlib/src/main/javacc/RankingExpressionParser.jj
new file mode 100755
index 00000000000..40dc31f13ae
--- /dev/null
+++ b/searchlib/src/main/javacc/RankingExpressionParser.jj
@@ -0,0 +1,479 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * When this file is changed, do "mvn generate-sources" to rebuild the parser.
+ *
+ * @author bratseth
+ */
+options {
+ CACHE_TOKENS = true;
+ STATIC = false;
+ DEBUG_PARSER = false;
+ USER_TOKEN_MANAGER = false;
+ ERROR_REPORTING = true;
+ USER_CHAR_STREAM = false;
+}
+
+PARSER_BEGIN(RankingExpressionParser)
+
+package com.yahoo.searchlib.rankingexpression.parser;
+
+import com.yahoo.searchlib.rankingexpression.rule.*;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+import com.yahoo.searchlib.rankingexpression.evaluation.StringValue;
+import com.yahoo.searchlib.rankingexpression.evaluation.TensorValue;
+import com.yahoo.tensor.MapTensor;
+import com.yahoo.tensor.TensorAddress;
+import java.util.Collections;
+import java.util.Map;
+import java.util.LinkedHashMap;
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+
+public class RankingExpressionParser {
+
+}
+
+PARSER_END(RankingExpressionParser)
+
+SKIP :
+{
+ <[" ","\n","\r","\t"]>
+}
+
+TOKEN :
+{
+ <INTEGER: <DECIMAL> (["l","L"])? | <HEX> (["l","L"])? | <OCTAL> (["l","L"])?> |
+ <#DECIMAL: ["1"-"9"] (["0"-"9"])*> |
+ <#HEX: "0" ["x","X"] (["0"-"9","a"-"f","A"-"F"])+> |
+ <#OCTAL: "0" (["0"-"7"])*> |
+ <FLOAT: (["0"-"9"])+ ("." (["0"-"9"])*)? (<EXPONENT>)? (["f","F","d","D"])?> |
+ <#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+>
+}
+
+TOKEN :
+{
+ <LBRACE: "("> |
+ <RBRACE: ")"> |
+ <LSQUARE: "["> |
+ <RSQUARE: "]"> |
+ <LCURLY: "{"> |
+ <RCURLY: "}"> |
+ <ADD: "+"> |
+ <SUB: "-"> |
+ <DIV: "/"> |
+ <MUL: "*"> |
+ <DOT: "."> |
+ <DOLLAR: "$"> |
+ <COMMA: ","> |
+ <COLON: ":"> |
+ <LE: "<="> |
+ <LT: "<"> |
+ <EQ: "=="> |
+ <AQ: "~="> |
+ <GE: ">="> |
+ <GT: ">"> |
+ <STRING: ("\"" (~["\""] | "\\\"")* "\"") |
+ ("'" (~["'"] | "\\'")* "'")> |
+ <IF: "if"> |
+ <COSH: "cosh"> |
+ <SINH: "sinh"> |
+ <TANH: "tanh"> |
+ <COS: "cos"> |
+ <SIN: "sin"> |
+ <TAN: "tan"> |
+ <ACOS: "acos"> |
+ <ASIN: "asin"> |
+ <ATAN2: "atan2"> |
+ <ATAN: "atan"> |
+ <EXP: "exp"> |
+ <LDEXP: "ldexp"> |
+ <LOG10: "log10"> |
+ <LOG: "log"> |
+ <POW: "pow"> |
+ <SQRT: "sqrt"> |
+ <CEIL: "ceil"> |
+ <FABS: "fabs"> |
+ <FLOOR: "floor"> |
+ <FMOD: "fmod"> |
+ <MIN: "min"> |
+ <MAX: "max"> |
+ <ISNAN: "isNan"> |
+ <IN: "in"> |
+ <SUM: "sum"> |
+ <MATCH: "match"> |
+ <IDENTIFIER: (["A"-"Z","a"-"z","0"-"9","_","@"](["A"-"Z","a"-"z","0"-"9","_","@","$"])*)>
+}
+
+// Declare a special skip token for comments.
+SPECIAL_TOKEN :
+{
+ <SINGLE_LINE_COMMENT: "#" (~["\n","\r"])* >
+}
+
+List<ReferenceNode> featureList() :
+{
+ List<ReferenceNode> ret = new ArrayList<ReferenceNode>();
+ ReferenceNode exp;
+}
+{
+ ( ( exp = feature() { ret.add(exp); } )+ <EOF> )
+ { return ret; }
+}
+
+ExpressionNode rankingExpression() :
+{
+ ExpressionNode ret;
+}
+{
+ ( ret = expression() <EOF> )
+ { return ret; }
+}
+
+ExpressionNode expression() :
+{
+ ExpressionNode left, right;
+ List<ExpressionNode> rightList;
+ TruthOperator comparatorOp;
+}
+{
+ ( left = arithmeticExpression()
+ (
+ ( comparatorOp = comparator() right = arithmeticExpression() { left = new ComparisonNode(left, comparatorOp, right); } ) |
+ ( <IN> rightList = expressionList() { left = new SetMembershipNode(left, rightList); } )
+ ) *
+ )
+ { return left; }
+}
+
+ExpressionNode arithmeticExpression() :
+{
+ ExpressionNode left, right = null;
+ ArithmeticOperator arithmeticOp;
+}
+{
+ ( left = value()
+ ( arithmeticOp = arithmetic() right = value() { left = ArithmeticNode.resolve(left, arithmeticOp, right); } ) *
+ )
+ { return left; }
+}
+
+ArithmeticOperator arithmetic() : { }
+{
+ ( <ADD> { return ArithmeticOperator.PLUS; } |
+ <SUB> { return ArithmeticOperator.MINUS; } |
+ <DIV> { return ArithmeticOperator.DIVIDE; } |
+ <MUL> { return ArithmeticOperator.MULTIPLY; } )
+ { return null; }
+}
+
+TruthOperator comparator() : { }
+{
+ ( <LE> { return TruthOperator.SMALLEREQUAL; } |
+ <LT> { return TruthOperator.SMALLER; } |
+ <EQ> { return TruthOperator.EQUAL; } |
+ <AQ> { return TruthOperator.APPROX_EQUAL; } |
+ <GE> { return TruthOperator.LARGEREQUAL; } |
+ <GT> { return TruthOperator.LARGER; } )
+ { return null; }
+}
+
+ExpressionNode value() :
+{
+ ExpressionNode ret;
+ boolean neg = false;
+}
+{
+ ( [ LOOKAHEAD(2) <SUB> { neg = true; } ]
+ ( ret = constantPrimitive() |
+ ret = constantTensor() |
+ LOOKAHEAD(2) ret = ifExpression() |
+ LOOKAHEAD(2) ret = function() |
+ ret = feature() |
+ ret = queryFeature() |
+ ( <LBRACE> ret = expression() <RBRACE> { ret = new EmbracedNode(ret); } ) ) )
+ { return neg ? new NegativeNode(ret) : ret; }
+}
+
+IfNode ifExpression() :
+{
+ ExpressionNode condition, ifTrue, ifFalse;
+ Double trueProbability = null;
+}
+{
+ ( <IF> <LBRACE> ( condition = expression() )
+ <COMMA> ifTrue = expression() <COMMA> ifFalse = expression() ( <COMMA> trueProbability = number() )? <RBRACE> )
+ {
+ return new IfNode(condition, ifTrue, ifFalse, trueProbability);
+ }
+}
+
+ReferenceNode queryFeature() :
+{
+ String name;
+}
+{
+ ( <DOLLAR> name = identifier() )
+ { return new ReferenceNode("query", Arrays.asList((ExpressionNode)new NameNode(name)), null); }
+}
+
+ReferenceNode feature() :
+{
+ List<ExpressionNode> args = null;
+ String name, out = null;
+}
+{
+ ( name = identifier() [ <LBRACE> args = args() <RBRACE> ] [ <DOT> out = outs() ] )
+ { return new ReferenceNode(name, args, out); }
+}
+
+String outs() :
+{
+ StringBuilder ret = new StringBuilder();
+ String str;
+}
+{
+ ( str = out() { ret.append(str); }
+ ( <DOT> { ret.append(token.image); }
+ str = out() { ret.append(str); } )* )
+ { return ret.toString(); }
+}
+
+String out() :
+{
+ Function fnc;
+ String name;
+}
+{
+ ( <INTEGER> { return token.image; } |
+ <FLOAT> { return token.image; } |
+ name = identifier() { return name; } )
+ { return null; }
+}
+
+List<ExpressionNode> args() :
+{
+ List<ExpressionNode> arguments = new ArrayList<ExpressionNode>();
+ ExpressionNode argument;
+}
+{
+ ( argument = arg() { arguments.add(argument); } ( <COMMA> argument = arg() { arguments.add(argument); } )* )
+ { return arguments; }
+}
+
+// TODO: Replace use of this for macro arguments with value()
+// For that to work with the current search execution framework
+// we need to generate another macro for the argument such that we can replace
+// instances of the argument with the reference to that macro in the same way
+// as we replace by constants/names today (this can make for some fun combinatorial explosion).
+// Simon also points out that we should stop doing macro expansion in the toString of a macro.
+// - Jon 2014-05-02
+ExpressionNode arg() :
+{
+ ExpressionNode ret;
+ String name;
+ Function fnc;
+}
+{
+ ( ret = constantPrimitive() |
+ ret = constantTensor() |
+ LOOKAHEAD(2) ret = feature() |
+ name = identifier() { ret = new NameNode(name); } )
+ { return ret; }
+}
+
+ExpressionNode function() :
+{
+ ExpressionNode function;
+}
+{
+ ( function = scalarFunction() | function = tensorFunction() )
+ { return function; }
+}
+
+FunctionNode scalarFunction() :
+{
+ Function function;
+ ExpressionNode arg1, arg2;
+}
+{
+ (
+ ( function = unaryFunctionName() <LBRACE> arg1 = expression() <RBRACE> )
+ { return new FunctionNode(function, arg1); }
+ ) |
+ (
+ ( function = binaryFunctionName() <LBRACE> arg1 = expression() <COMMA> arg2 = expression() <RBRACE> )
+ { return new FunctionNode(function, arg1, arg2); }
+ )
+}
+
+ExpressionNode tensorFunction() :
+{
+ ExpressionNode tensor1, tensor2;
+ String dimension = null;
+ TensorAddress address = null;
+}
+{
+ (
+ <SUM> <LBRACE> tensor1 = expression() ( <COMMA> dimension = identifier() )? <RBRACE>
+ { return new TensorSumNode(tensor1, Optional.ofNullable(dimension)); }
+ ) |
+ (
+ <MATCH> <LBRACE> tensor1 = expression() <COMMA> tensor2 = expression() <RBRACE>
+ { return new TensorMatchNode(tensor1, tensor2); }
+ )
+}
+
+// This is needed not to parse tensor functions but for the "reserved names as literals" workaround cludge
+String tensorFunctionName() :
+{
+}
+{
+ ( <SUM> | <MATCH> )
+ { return token.image; }
+}
+
+Function unaryFunctionName() : { }
+{
+ <COS> { return Function.cos; } |
+ <SIN> { return Function.sin; } |
+ <TAN> { return Function.tan; } |
+ <COSH> { return Function.cosh; } |
+ <SINH> { return Function.sinh; } |
+ <TANH> { return Function.tanh; } |
+ <ACOS> { return Function.acos; } |
+ <ASIN> { return Function.asin; } |
+ <ATAN> { return Function.atan; } |
+ <EXP> { return Function.exp; } |
+ <LOG10> { return Function.log10; } |
+ <LOG> { return Function.log; } |
+ <SQRT> { return Function.sqrt; } |
+ <CEIL> { return Function.ceil; } |
+ <FABS> { return Function.fabs; } |
+ <FLOOR> { return Function.floor; } |
+ <ISNAN> { return Function.isNan; }
+}
+
+Function binaryFunctionName() : { }
+{
+ <ATAN2> { return Function.atan2; } |
+ <LDEXP> { return Function.ldexp; } |
+ <POW> { return Function.pow; } |
+ <FMOD> { return Function.fmod; } |
+ <MIN> { return Function.min; } |
+ <MAX> { return Function.max; }
+}
+
+List<ExpressionNode> expressionList() :
+{
+ List<ExpressionNode> list = new ArrayList<ExpressionNode>();
+ ExpressionNode expression;
+}
+{
+ <LSQUARE>
+ expression=expression() { list.add(expression); }
+ ( LOOKAHEAD(2) <COMMA> expression=expression() { list.add(expression); } ) *
+ <RSQUARE>
+ { return list; }
+}
+
+double number() :
+{
+ String sign = "";
+}
+{
+ ( <SUB> { sign = "-";} )? ( <FLOAT> | <INTEGER> )
+ { return Double.parseDouble(sign + token.image); }
+}
+
+String identifier() :
+{
+ String name;
+ Function func;
+}
+{
+ name = tensorFunctionName() { return name; } |
+ func = unaryFunctionName() { return func.toString(); } |
+ func = binaryFunctionName() { return func.toString(); } |
+ <IF> { return token.image; } |
+ <IN> { return token.image; } |
+ <IDENTIFIER> { return token.image; }
+}
+
+// An identifier or integer
+String tag() :
+{
+ String name;
+}
+{
+ name = identifier() { return name; } |
+ <INTEGER> { return token.image; }
+}
+
+ConstantNode constantPrimitive() :
+{
+ String sign = "";
+}
+{
+ ( <SUB> { sign = "-";} ) ?
+ ( <INTEGER> | <FLOAT> | <STRING> )
+ { return new ConstantNode(Value.parse(sign + token.image),sign + token.image); }
+}
+
+Value primitiveValue() :
+{
+ String sign = "";
+}
+{
+ ( <SUB> { sign = "-";} ) ?
+ ( <INTEGER> | <FLOAT> | <STRING> )
+ { return Value.parse(sign + token.image); }
+}
+
+ConstantNode constantTensor() :
+{
+ Value constantValue;
+}
+{
+ <LCURLY> constantValue = tensorContent() <RCURLY>
+ { return new ConstantNode(constantValue); }
+}
+
+TensorValue tensorContent() :
+{
+ Map<TensorAddress, Double> cells = new LinkedHashMap<TensorAddress, Double>();
+ TensorAddress address;
+ Double value;
+}
+{
+ ( address = tensorAddress() <COLON> value = number() { cells.put(address, value); } ) ?
+ ( <COMMA> address = tensorAddress() <COLON> value = number() { cells.put(address, value); } ) *
+ { return new TensorValue(new MapTensor(cells)); }
+}
+
+TensorAddress tensorAddress() :
+{
+ List<TensorAddress.Element> elements = new ArrayList<TensorAddress.Element>();
+ String dimension;
+ String label;
+}
+{
+ <LCURLY>
+ ( dimension = tag() <COLON> label = label() { elements.add(new TensorAddress.Element(dimension, label)); } ) ?
+ ( <COMMA> dimension = tag() <COLON> label = label() { elements.add(new TensorAddress.Element(dimension, label)); } ) *
+ <RCURLY>
+ { return TensorAddress.fromUnsorted(elements); }
+}
+
+String label() :
+{
+ String label;
+
+}
+{
+ ( label = tag() |
+ ( "-" { label = "-"; } ) )
+ { return label; }
+}
+
diff --git a/searchlib/src/main/javacc/TreeNetParser.jj b/searchlib/src/main/javacc/TreeNetParser.jj
new file mode 100755
index 00000000000..db160c094ca
--- /dev/null
+++ b/searchlib/src/main/javacc/TreeNetParser.jj
@@ -0,0 +1,362 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * A best-effort treenet parser.
+ *
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ * @version $Id: TreeNetParser.jj,v 1.1 2009-02-24 10:06:32 arnej Exp $
+ */
+options {
+ CACHE_TOKENS = true;
+ STATIC = false;
+ DEBUG_PARSER = false;
+ IGNORE_CASE = true;
+
+ // Flip for higher performance
+ ERROR_REPORTING = true;
+}
+
+PARSER_BEGIN(TreeNetParser)
+
+package com.yahoo.searchlib.treenet.parser;
+
+import com.yahoo.searchlib.rankingexpression.parser.RankingExpressionParser;
+import com.yahoo.searchlib.treenet.rule.*;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class TreeNetParser {
+
+ void verifyCategoricalVar(String expected, String actual) throws ParseException {
+ if (!expected.equals(actual)) {
+ throw new ParseException("Expected variable '" + expected + "', got '" + actual + "'.");
+ }
+ }
+
+ ComparisonCondition resolveCategoricalCondition(String var, Integer valA, Integer valB, String lblA, String lblB) {
+ if (valA < valB)
+ return new ComparisonCondition(var, valA + (valB - valA) / 2.0, lblA, lblB);
+ else
+ return new ComparisonCondition(var, valB + (valA - valB) / 2.0, lblB, lblA);
+ }
+
+}
+
+PARSER_END(TreeNetParser)
+
+SKIP :
+{
+ <[" ","\r","\t"]> |
+ <"//" (~["\n","\r"])* ("\n" | "\r" | "\r\n")> |
+ <"#" (~["\n","\r"])* ("\n" | "\r" | "\r\n")> |
+ <"/*" (~["*"])* "*" ("*" | ~["*","/"] (~["*"])* "*")* "/">
+}
+
+TOKEN :
+{
+ <INTEGER: (["+","-"])? <DECIMAL> (["l","L"])? | <HEX> (["l","L"])? | <OCTAL> (["l","L"])?> |
+ <#DECIMAL: ["1"-"9"] (["0"-"9"])*> |
+ <#HEX: "0" ["x","X"] (["0"-"9","a"-"f","A"-"F"])+> |
+ <#OCTAL: "0" (["0"-"7"])*> |
+ <FLOAT: (["+","-"])? (["0"-"9"])+ "." (["0"-"9"])* (<EXPONENT>)? (["f","F","d","D"])? | "." (["0"-"9"])+
+ (<EXPONENT>)? (["f","F","d","D"])? | (["0"-"9"])+ <EXPONENT> (["f","F","d","D"])? | (["0"-"9"])+
+ (<EXPONENT>)? ["f","F","d","D"]> |
+ <#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+> |
+ <STRING: ("\"" (~["\""] | "\\\"")* "\"") |
+ ("'" (~["'"] | "\\'")* "'")>
+}
+
+TOKEN :
+{
+ <ADD: "+"> |
+ <BEGIN: "modelbegin"> |
+ <COLON: ":"> |
+ <ELSE: "else"> |
+ <COMMA: ","> |
+ <DOT: "."> |
+ <SEMICOLON: ";"> |
+ <EQ: "="> |
+ <GOTO: "goto"> |
+ <IF: "if"> |
+ <IN: "in"> |
+ <LINK: "link"> |
+ <LT: "<"> |
+ <NL: "\n"> |
+ <PRED: "pred"> |
+ <LBRACE: "("> |
+ <RBRACE: ")"> |
+ <LCURLY: "{"> |
+ <RCURLY: "}"> |
+ <RESPONSE: "response"> |
+ <RETURN: "return"> |
+ <THEN: "then"> |
+ <TNSCORE: "tnscore"> |
+ <IDENTIFIER: ["A"-"Z","a"-"z","_"](["A"-"Z","a"-"z","0"-"9","_","$"])*>
+}
+
+TreeNet treeNet() :
+{
+ String begin, label;
+ Tree tree;
+ Map<String,Tree> trees = new HashMap<String,Tree>();
+}
+{
+ ( ( ignoredCpp() )*
+ <BEGIN> <COLON> nl()
+ <LINK> <IDENTIFIER> { begin = token.image; } eol()
+ <PRED> <EQ> <TNSCORE> eol()
+ <RETURN> eol()
+
+ ( tree = tree() { trees.put(tree.getName(), tree); } )*
+
+ <RETURN> <SEMICOLON>
+ ( ignoredCpp() )*
+ <EOF>
+ )
+ { return new TreeNet(begin, trees); }
+}
+
+/** C++ code outside the model which we can ignore */
+void ignoredCpp() :
+{
+}
+{
+ <RETURN> | <IDENTIFIER> | <FLOAT> | <INTEGER> | <STRING> | <EQ> | <COMMA> | <LBRACE> | <RBRACE> | <LCURLY> | <RCURLY> | <SEMICOLON> | <NL>
+}
+
+Tree tree() :
+{
+ String name;
+ String begin = null, label;
+ Double value = null;
+
+ TreeNode node;
+ Map<String,TreeNode> nodes = new HashMap<String,TreeNode>();
+}
+{
+ (
+ (
+ ( value = tnScore() name = label() ) |
+ ( name = label() value = tnScore() )
+ )
+ (
+ LOOKAHEAD(label() (condition() | response()))
+ label = label() { if (begin == null) { begin = label; } }
+ ( node = condition() { nodes.put(label, node); } |
+ node = response() { nodes.put(label, node); } ) )* )
+ { return new Tree(name, value, begin, nodes); }
+}
+
+Double tnScore() :
+{
+ Double value = null;
+}
+{
+ <TNSCORE> <EQ> ( value = floatVal() | ( <TNSCORE> <ADD> <RESPONSE> ) ) eol()
+ { return value; }
+}
+
+Condition condition() :
+{
+ String var;
+ Condition ret;
+}
+{
+ ( <IF> var = feature() ( ret = continuousCondition(var) |
+ LOOKAHEAD(8) ret = singleValueCategoricalCondition(var) |
+ ret = setMembershipCondition(var) ) )
+ { return ret; }
+}
+
+ComparisonCondition continuousCondition(String left) :
+{
+ Double right;
+ String ift, iff;
+}
+{
+ ( <LT> right = floatVal() <THEN> ift = jump() eol()
+ <ELSE> iff = jump() eol())
+ { return new ComparisonCondition(left, right, ift, iff); }
+}
+
+// Handle single-value IN expression as a regular comparison.
+// This special case may be removed when IN support is implemented in ranking expressions in both C++ and Java
+ComparisonCondition singleValueCategoricalCondition(String varA) :
+{
+ Integer valA, valB;
+ String lblA, lblB, varB;
+}
+{
+ ( <IN> <LBRACE> valA = intVal() nl() <RBRACE> <THEN> lblA = jump() eol()
+ <ELSE> <IF> varB = feature() { verifyCategoricalVar(varA, varB); }
+ <IN> <LBRACE> valB = intVal() nl() <RBRACE> <THEN> lblB = jump() eol() )
+ { return resolveCategoricalCondition(varA, valA, valB, lblA, lblB); }
+}
+
+SetMembershipCondition setMembershipCondition(String testValue) :
+{
+ List<Object> setValues;
+ String trueLabel, falseLabel;
+}
+{
+ ( <IN> <LBRACE> setValues = valueList() <RBRACE> <THEN> trueLabel = jump() eol()
+ <ELSE> falseLabel = jump() eol() )
+ { return new SetMembershipCondition(testValue, setValues, trueLabel, falseLabel); }
+}
+
+Response response() :
+{
+ Double val;
+ String lbl;
+}
+{
+ ( <RESPONSE> <EQ> val = floatVal() eol()
+ lbl = jump() eol() )
+ { return new Response(val, lbl); }
+}
+
+String feature() :
+{
+ String name;
+ String arguments = null;
+ String output = null;
+}
+{
+ ( name = identifier() [ <LBRACE> arguments = featureArguments() <RBRACE> ] [ <DOT> output = featureOutputs() ] )
+ { return name + (arguments != null ? "(" + arguments + ")" : "") + (output !=null ? "." + output : ""); }
+}
+
+String featureArguments() :
+{
+ String argument;
+ StringBuilder arguments = new StringBuilder();
+}
+{
+ ( argument = featureArgument() { arguments.append(argument); }
+ ( <COMMA> argument = featureArgument() { arguments.append(",").append(argument); } )* )
+ { return arguments.toString(); }
+}
+
+String featureArgument() :
+{
+ String argument;
+}
+{
+ ( argument = string() | argument = floatImage() | argument = feature() )
+ { return argument; }
+}
+
+String featureOutputs() :
+{
+ StringBuilder outputs = new StringBuilder();
+ String output;
+}
+{
+ output = featureOutput() { outputs.append(output); }
+ ( <DOT> output = featureOutput() { outputs.append(output); } ) *
+ { return outputs.toString(); }
+}
+
+String featureOutput() :
+{
+ String name;
+}
+{
+ <INTEGER> { return token.image; } |
+ <FLOAT> { return token.image; } |
+ name = identifier() { return name; }
+}
+
+String label() :
+{
+ String ret;
+}
+{
+ ( ret = identifier() <COLON> nl() )
+ { return ret; }
+}
+
+void eol() : { }
+{
+ <SEMICOLON> nl()
+}
+
+void nl() : { }
+{
+ ( <NL> )+
+}
+
+String jump() : { }
+{
+ <GOTO> <IDENTIFIER> { return token.image; }
+}
+
+String identifier() : { }
+{
+ ( /*<BEGIN> |
+ <ELSE> |
+ <GOTO> |
+ <IF> |
+ <IN> |
+ <LINK> |
+ <PRED> |
+ <RESPONSE> |
+ <RETURN> |
+ <THEN> |
+ <TNSCORE> |*/
+ <IDENTIFIER> )
+ { return token.image; }
+}
+
+String spaceSeparatedIdentifiers() :
+{
+ StringBuilder identifiers = new StringBuilder();
+ String identifier;
+}
+{
+ identifier = identifier() { identifiers.append(identifier); }
+ ( identifier = identifier() { identifiers.append(identifier); } ) *
+ { return identifiers.toString(); }
+}
+
+List<Object> valueList() :
+{
+ List<Object> values = new ArrayList<Object>();
+ Object value;
+}
+{
+ value = value() { values.add(value); }
+ ( <COMMA> value = value() { values.add(value); } ) *
+ { return values; }
+}
+
+Object value() :
+{
+ Object value;
+}
+{
+ ( value = spaceSeparatedIdentifiers() | value = intVal() | value = string() )
+ { return value; }
+}
+
+String string() : { }
+{
+ <STRING> { return token.image; }
+}
+
+Integer intVal() : { }
+{
+ <INTEGER> { return Integer.valueOf(token.image); }
+}
+
+Double floatVal() : { }
+{
+ ( <INTEGER> | <FLOAT> ) { return Double.valueOf(token.image); }
+}
+
+String floatImage() : { }
+{
+ ( <INTEGER> | <FLOAT> ) { return token.image; }
+}
diff --git a/searchlib/src/main/sh/evaluation-benchmark b/searchlib/src/main/sh/evaluation-benchmark
new file mode 100755
index 00000000000..0c9afd83fbd
--- /dev/null
+++ b/searchlib/src/main/sh/evaluation-benchmark
@@ -0,0 +1 @@
+java -cp "target/test-classes:target/searchlib.jar" com.yahoo.searchlib.rankingexpression.evaluation.EvaluationBenchmark $@
diff --git a/searchlib/src/main/sh/ga b/searchlib/src/main/sh/ga
new file mode 100644
index 00000000000..f1e5b0981e9
--- /dev/null
+++ b/searchlib/src/main/sh/ga
@@ -0,0 +1,69 @@
+#! /bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+# BEGIN environment bootstrap section
+# Do not edit between here and END as this section should stay identical in all scripts
+
+findpath () {
+ myname=${0}
+ mypath=${myname%/*}
+ myname=${myname##*/}
+ if [ "$mypath" ] && [ -d "$mypath" ]; then
+ return
+ fi
+ mypath=$(pwd)
+ if [ -f "${mypath}/${myname}" ]; then
+ return
+ fi
+ echo "FATAL: Could not figure out the path where $myname lives from $0"
+ exit 1
+}
+
+COMMON_ENV=libexec/vespa/common-env.sh
+
+source_common_env () {
+ if [ "$VESPA_HOME" ] && [ -d "$VESPA_HOME" ]; then
+ # ensure it ends with "/" :
+ VESPA_HOME=${VESPA_HOME%/}/
+ export VESPA_HOME
+ common_env=$VESPA_HOME/$COMMON_ENV
+ if [ -f "$common_env" ]; then
+ . $common_env
+ return
+ fi
+ fi
+ return 1
+}
+
+findroot () {
+ source_common_env && return
+ if [ "$VESPA_HOME" ]; then
+ echo "FATAL: bad VESPA_HOME value '$VESPA_HOME'"
+ exit 1
+ fi
+ if [ "$ROOT" ] && [ -d "$ROOT" ]; then
+ VESPA_HOME="$ROOT"
+ source_common_env && return
+ fi
+ findpath
+ while [ "$mypath" ]; do
+ VESPA_HOME=${mypath}
+ source_common_env && return
+ mypath=${mypath%/*}
+ done
+ echo "FATAL: missing VESPA_HOME environment variable"
+ echo "Could not locate $COMMON_ENV anywhere"
+ exit 1
+}
+
+findroot
+
+# END environment bootstrap section
+
+JAR=$VESPA_HOME/lib/jars/searchlib-deploy.jar
+if [[ "$1" == *.jar ]]; then
+ JAR=$1
+fi
+shift
+
+exec java -cp $JAR com.yahoo.searchlib.mlr.ga.Main "$@"
diff --git a/searchlib/src/main/sh/gbdt-analysis b/searchlib/src/main/sh/gbdt-analysis
new file mode 100755
index 00000000000..6ff9c98ef1f
--- /dev/null
+++ b/searchlib/src/main/sh/gbdt-analysis
@@ -0,0 +1 @@
+java -cp target/searchlib.jar com.yahoo.searchlib.mlr.gbdt.ExpressionAnalysis $@
diff --git a/searchlib/src/main/sh/vespa-gbdt-converter b/searchlib/src/main/sh/vespa-gbdt-converter
new file mode 100755
index 00000000000..aa1f79b1dc2
--- /dev/null
+++ b/searchlib/src/main/sh/vespa-gbdt-converter
@@ -0,0 +1,63 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+# BEGIN environment bootstrap section
+# Do not edit between here and END as this section should stay identical in all scripts
+
+findpath () {
+ myname=${0}
+ mypath=${myname%/*}
+ myname=${myname##*/}
+ if [ "$mypath" ] && [ -d "$mypath" ]; then
+ return
+ fi
+ mypath=$(pwd)
+ if [ -f "${mypath}/${myname}" ]; then
+ return
+ fi
+ echo "FATAL: Could not figure out the path where $myname lives from $0"
+ exit 1
+}
+
+COMMON_ENV=libexec/vespa/common-env.sh
+
+source_common_env () {
+ if [ "$VESPA_HOME" ] && [ -d "$VESPA_HOME" ]; then
+ # ensure it ends with "/" :
+ VESPA_HOME=${VESPA_HOME%/}/
+ export VESPA_HOME
+ common_env=$VESPA_HOME/$COMMON_ENV
+ if [ -f "$common_env" ]; then
+ . $common_env
+ return
+ fi
+ fi
+ return 1
+}
+
+findroot () {
+ source_common_env && return
+ if [ "$VESPA_HOME" ]; then
+ echo "FATAL: bad VESPA_HOME value '$VESPA_HOME'"
+ exit 1
+ fi
+ if [ "$ROOT" ] && [ -d "$ROOT" ]; then
+ VESPA_HOME="$ROOT"
+ source_common_env && return
+ fi
+ findpath
+ while [ "$mypath" ]; do
+ VESPA_HOME=${mypath}
+ source_common_env && return
+ mypath=${mypath%/*}
+ done
+ echo "FATAL: missing VESPA_HOME environment variable"
+ echo "Could not locate $COMMON_ENV anywhere"
+ exit 1
+}
+
+findroot
+
+# END environment bootstrap section
+
+exec java -cp $VESPA_HOME/lib/jars/searchlib.jar:$VESPA_HOME/lib/jars/document.jar:$VESPA_HOME/lib/jars/vespajlib.jar com.yahoo.searchlib.gbdt.GbdtConverter "$@"
diff --git a/searchlib/src/main/sh/vespa-treenet-converter b/searchlib/src/main/sh/vespa-treenet-converter
new file mode 100755
index 00000000000..a95d910c4b4
--- /dev/null
+++ b/searchlib/src/main/sh/vespa-treenet-converter
@@ -0,0 +1,63 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+# BEGIN environment bootstrap section
+# Do not edit between here and END as this section should stay identical in all scripts
+
+findpath () {
+ myname=${0}
+ mypath=${myname%/*}
+ myname=${myname##*/}
+ if [ "$mypath" ] && [ -d "$mypath" ]; then
+ return
+ fi
+ mypath=$(pwd)
+ if [ -f "${mypath}/${myname}" ]; then
+ return
+ fi
+ echo "FATAL: Could not figure out the path where $myname lives from $0"
+ exit 1
+}
+
+COMMON_ENV=libexec/vespa/common-env.sh
+
+source_common_env () {
+ if [ "$VESPA_HOME" ] && [ -d "$VESPA_HOME" ]; then
+ # ensure it ends with "/" :
+ VESPA_HOME=${VESPA_HOME%/}/
+ export VESPA_HOME
+ common_env=$VESPA_HOME/$COMMON_ENV
+ if [ -f "$common_env" ]; then
+ . $common_env
+ return
+ fi
+ fi
+ return 1
+}
+
+findroot () {
+ source_common_env && return
+ if [ "$VESPA_HOME" ]; then
+ echo "FATAL: bad VESPA_HOME value '$VESPA_HOME'"
+ exit 1
+ fi
+ if [ "$ROOT" ] && [ -d "$ROOT" ]; then
+ VESPA_HOME="$ROOT"
+ source_common_env && return
+ fi
+ findpath
+ while [ "$mypath" ]; do
+ VESPA_HOME=${mypath}
+ source_common_env && return
+ mypath=${mypath%/*}
+ done
+ echo "FATAL: missing VESPA_HOME environment variable"
+ echo "Could not locate $COMMON_ENV anywhere"
+ exit 1
+}
+
+findroot
+
+# END environment bootstrap section
+
+exec java -cp $VESPA_HOME/lib/jars/searchlib.jar:$VESPA_HOME/lib/jars/document.jar:$VESPA_HOME/lib/jars/vespajlib.jar com.yahoo.searchlib.treenet.TreeNetConverter "$@"
diff --git a/searchlib/src/test/OWNERS b/searchlib/src/test/OWNERS
new file mode 100644
index 00000000000..31af040f698
--- /dev/null
+++ b/searchlib/src/test/OWNERS
@@ -0,0 +1 @@
+bratseth
diff --git a/searchlib/src/test/files/features01.expression b/searchlib/src/test/files/features01.expression
new file mode 100644
index 00000000000..fbb43a77696
--- /dev/null
+++ b/searchlib/src/test/files/features01.expression
@@ -0,0 +1 @@
+attribute(foo).out \ No newline at end of file
diff --git a/searchlib/src/test/files/features02.expression b/searchlib/src/test/files/features02.expression
new file mode 100644
index 00000000000..0a58b4b10f2
--- /dev/null
+++ b/searchlib/src/test/files/features02.expression
@@ -0,0 +1 @@
+attribute(foo).out attribute ( bar ) . out \ No newline at end of file
diff --git a/searchlib/src/test/files/features03.expression b/searchlib/src/test/files/features03.expression
new file mode 100644
index 00000000000..12760619b04
--- /dev/null
+++ b/searchlib/src/test/files/features03.expression
@@ -0,0 +1,4 @@
+foo
+ bar
+
+ baz
diff --git a/searchlib/src/test/files/features04.expression b/searchlib/src/test/files/features04.expression
new file mode 100644
index 00000000000..b8dea2e902c
--- /dev/null
+++ b/searchlib/src/test/files/features04.expression
@@ -0,0 +1 @@
+attribute attribute(foo) attribute(foo).out attribute(bar).out.out \ No newline at end of file
diff --git a/searchlib/src/test/files/gbdt.expression b/searchlib/src/test/files/gbdt.expression
new file mode 100644
index 00000000000..b59d6052f5d
--- /dev/null
+++ b/searchlib/src/test/files/gbdt.expression
@@ -0,0 +1,10 @@
+if (F55 < 2.0932798, if (F42 < 1.7252731, if (F33 < 0.5, if (F38 < 1.5367546, 1.7333333, 1.3255814), if (F37 < 0.675922, 1.9014085, 1.0)), if (F109 < 0.5, if (F116 < 5.25, if (F111 < 0.0521445, 1.0, 1.9090909), if (F38 < 4.0740733, 0.8, if (F38 < 6.6152048, 1.7142857, 0.625))), 1.5945946)), if (F109 < 0.5, if (F113 < 0.7835808, if (F110 < 491.0, if (F56 < 2.5423126, if (F108 < 243.5, 1.375, 0.78), 0.5), 2.0), if (F103 < 0.9918365, 1.6, 0.3333333)), if (F59 < 0.9207, if (F30 < 0.86, 1.5890411, 0.625), if (F100 < 5.9548216, 1.0, 0.0)))) +
+if (F55 < 59.5480576, if (F42 < 1.8308522, if (F100 < 5.9549484, if (F107 < 0.5, -0.3406279, if (F56 < 1.7057916, if (F36 < 3.778285, if (F103 < 0.5600199, 0.047108, if (F36 < 1.2203553, if (F102 < 1.5, 0.0460316, -0.473794), -0.9825869)), -0.8848045), if (F47 < 15.5, 0.348047, -1.0890411))), 1.75), if (F113 < 0.8389627, if (F110 < 7.5, -0.5778378, if (F111 < 0.8596972, if (F114 < 831.5, if (F113 < 0.3807178, 0.0497646, if (F110 < 63.0, 0.6549377, 0.2486999)), if (F39 < 8.9685574, 0.3222195, -0.1690968)), 1.0381818)), if (F58 < 0.889763, -0.0702703, -1.6))), if (F102 < 3.5, -0.3059684, -1.5890411)) +
+if (F55 < 119.6311035, if (F55 < 90.895813, if (F39 < 12.162282, if (F35 < 1.1213787, if (F55 < 34.9389648, if (F45 < 3.5, if (F51 < 0.0502058, if (F103 < 0.8550526, if (F55 < 4.96804, 0.048519, 0.6596588), if (F38 < 1.3808891, -0.7416763, 0.0176633)), 0.4502234), -0.6811898), 0.5572351), if (F100 < 3.3971992, if (F39 < 7.0869236, if (F43 < 5.5100875, if (F46 < 4.5, -0.1702421, -0.9797453), -1.5426025), 0.0774408), if (F52 < 22.3562355, if (F35 < 4.4263992, 0.4011598, -0.3898472), -1.75))), if (F39 < 14.5762558, if (F109 < 0.5, 1.6616928, 0.4001626), if (F100 < 3.0519419, 0.616491, -0.1808479))), -1.2135522), 0.5535716) +
+if (F43 < 9.272151, if (F36 < 9.0613861, if (F115 < 36.5, if (F34 < 1.4407213, if (F41 < 10.4713802, if (F34 < 1.2610778, if (F105 < 8.2159586, if (F46 < 88.5, 0.0075843, -0.6358738), if (F105 < 9.5308332, 1.4464284, -0.0895592)), 0.3532708), -1.8289603), if (F45 < 24.5, if (F111 < 0.9095335, if (F113 < 0.0529755, -0.6272416, if (F50 < 34.2163391, if (F113 < 0.0813664, 0.3683843, if (F34 < 1.6283135, -0.6334628, -0.1610307)), 1.5559684)), -1.7492068), 1.5060212)), if (F49 < 23.5787125, if (F100 < 6.5115452, if (F37 < 0.8601408, if (F57 < 6.5, 0.0547747, 1.193346), 0.6402962), 1.7395205), 2.5559684)), -3.1016318), 1.8657542) +
+if (F55 < 764.9404297, if (F34 < 23.2379246, if (F36 < 9.2296076, if (F114 < 116.0, if (F108 < 13.5, if (F108 < 12.5, -0.2736142, -1.7384173), if (F110 < 10.5, 0.0794336, -0.2171646)), if (F114 < 129.0, if (F109 < 0.5, 1.4407836, -0.1458547), if (F111 < 0.9703438, if (F47 < 18.5, if (F32 < 3.5, 0.0708936, if (F118 < 0.6794872, if (F119 < 3.8533711, if (F34 < 0.1213822, -2.0046196, -8.566E-4), -0.9490828), 0.0790339)), if (F113 < 0.3637481, 0.1161088, -0.9997786)), 1.3003114))), if (F111 < 0.2438112, -2.0582902, 0.6918949)), if (F115 < 95.0, -2.8602383, -0.0063699)), if (F101 < 0.9411763, -2.0253283, -0.6417007)) +
+if (F114 < 516.0, if (F49 < 8.9197922, if (F48 < 3.5, if (F36 < 1.3889931, if (F43 < 0.9699799, if (F34 < 9.6113167, if (F106 < 8.5, if (F108 < 153.5, if (F110 < 130.5, 0.180242, 2.545163), if (F108 < 161.5, -2.2253985, if (F55 < 31.4965668, -0.0122572, 0.7364454))), -0.2596613), 0.7247348), if (F111 < 0.2817393, -0.6409092, 0.2100071)), if (F116 < 18.75, 0.511352, -0.1093323)), 0.9379161), 0.3603908), if (F46 < 32.5, if (F46 < 5.5, if (F39 < 11.7440758, if (F115 < 774.0, -0.0433343, -1.7439904), -0.3662575), 0.5413771), if (F110 < 67.0, if (F46 < 34.5, -2.6581287, -0.9399502), 0.075664))) +
+if (F42 < 24.3080139, if (F118 < 0.8452381, if (F119 < 6.2847767, if (F100 < 3.2778931, if (F46 < 30.0, if (F43 < 1.2712233, if (F104 < 3.5, 0.1365837, 0.5592712), if (F39 < 0.6294491, -0.8729556, -0.0123421)), 3.7677864), if (F111 < 0.6580936, if (F103 < 0.9319581, -0.2822538, if (F107 < 1.5, -0.3983539, if (F104 < 5.5, 0.0792465, 0.7273864))), if (F104 < 3.5, -1.1550477, 0.0490706))), 1.4735778), if (F111 < 0.3724709, if (F51 < 16.0989189, if (F114 < 154.0, if (F108 < 57.5, -0.0675733, -0.3994327), -0.0250285), -1.4871782), if (F34 < 2.1943491, 0.0229469, if (F108 < 1527.0, 1.4706301, 0.0285333)))), 3.489949) +
+if (F34 < 30.3465347, if (F103 < 0.9996098, if (F38 < 0.558669, if (F105 < 3.6287756, if (F104 < 3.5, if (F31 < 0.86, 0.1121421, 1.8153648), -0.8281607), if (F55 < 37.6819153, 0.9656266, 0.1585065)), if (F113 < 0.840385, if (F38 < 9.6623116, if (F46 < 136.0, if (F53 < 0.5548913, if (F38 < 8.4469957, if (F34 < 3.1969421, if (F114 < 20.0, -0.2944335, 0.03499), if (F34 < 3.4671984, -1.3154796, -0.1742507)), 0.4071658), if (F105 < 2.315434, if (F110 < 59.5, -0.1713032, -1.420465), -0.1456236)), 0.5520287), if (F108 < 12156.5, if (F111 < 0.3892631, -0.16285, -0.9015614), -2.6391831)), 0.2011691)), -3.073049), -3.2461861) +
+if (F55 < 28.4668102, if (F34 < 0.4929269, if (F30 < 0.86, if (F37 < 0.8360082, -0.0815482, -0.7898247), -0.5144471), if (F108 < 20498.0, if (F44 < 1.1856511, if (F56 < 1.0706565, if (F39 < 8.377079, if (F59 < 0.5604, 0.0429508, if (F34 < 0.7287493, -1.0264078, 0.6052195)), -0.4814408), if (F119 < 3.7530813, if (F115 < 8.5, 0.4916013, 0.0457533), if (F114 < 1093.5, 1.1673864, 0.3411176))), -0.6176305), if (F100 < 3.151973, 2.6908011, 0.3835885))), if (F116 < 62.0, if (F114 < 562.0, -0.415543, if (F103 < 0.9826763, -0.1169933, if (F104 < 0.5, -0.0665763, 1.0238317))), if (F100 < 5.8046961, -3.2954836, 0.2781039))) +
+if (F34 < 26.9548168, if (F35 < 18.4714928, if (F115 < 698.0, if (F116 < 41.5, if (F38 < 1.1138718, if (F46 < 9.0, if (F31 < 0.86, 0.1059075, -0.2995292), if (F46 < 25.5, if (F46 < 13.0, 0.6297316, 1.8451736), 0.2079161)), if (F38 < 19.3839836, if (F49 < 29.9797497, if (F46 < 235.5, if (F38 < 1.2626771, -0.5165347, if (F35 < 10.3027954, if (F50 < 0.2823648, -0.0424489, if (F113 < 0.0776736, 0.7495954, -0.2948665)), 0.3229146)), -1.0711968), 0.3153474), if (F116 < 5.2182379, 2.8017734, 0.3444192))), if (F113 < 0.5691726, 1.7530511, 0.3534861)), -2.4915219), if (F103 < 0.9680555, -2.1724317, 0.2143739)), 3.1712332) \ No newline at end of file
diff --git a/searchlib/src/test/files/gbdt.ext.xml b/searchlib/src/test/files/gbdt.ext.xml
new file mode 100644
index 00000000000..f466751eb35
--- /dev/null
+++ b/searchlib/src/test/files/gbdt.ext.xml
@@ -0,0 +1,284 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<MlrFunction name="CHANGE THIS NUMBER" featuredef="mlrfeaturedefs.xml" version="1.0">
+
+ <!-- ADD SCORE STANDARDIZATION OR CALIBRATION HERE -->
+
+ <DecisionTree loss="regression">
+
+ <!-- ADD EARLY EXIT HERE -->
+
+ <Forest>
+ <Tree id="0" shrinkage="1">
+ <Node feature="F4" value="0.6972222" id="N0_1" nSamples="16934" response="0.325971" gain="3400.38" reg="8.11091">
+ <Node feature="F1" value="0.7928572" id="N0_2" nSamples="15850" response="0.208644" gain="794.718" reg="5.04115">
+ <Node feature="F54" value="0.9166666" id="N0_3" nSamples="15380" response="0.169376" gain="380.772" reg="1.21184">
+ <Response value="0.1145211" id="T0_1" nSamples="13718" gain="196.603" reg="1.44895"/>
+ <Node feature="F111" value="1105.0000000" id="N0_4" nSamples="1662" response="0.622142" gain="540.33" reg="4.36127">
+ <Response value="0.3115265" id="T0_2" nSamples="1284" gain="89.0326" reg="1.40341"/>
+ <Response value="1.6772487" id="T0_3" nSamples="378" gain="366.668" reg="7.19705"/>
+ </Node>
+ </Node>
+ <Response value="1.4936170" id="T0_4" nSamples="470" gain="245.945" reg="4.73472"/>
+ </Node>
+ <Node feature="F111" value="85.5000000" id="N0_5" nSamples="1084" response="2.04151" gain="464.844" reg="4.1662">
+ <Response value="1.1202186" id="T0_5" nSamples="366" gain="46.188" reg="2.94919"/>
+ <Response value="2.5111421" id="T0_6" nSamples="718" gain="303.427" reg="4.00159"/>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="1" shrinkage="1">
+ <Node feature="F1" value="0.8875000" id="N1_1" nSamples="16934" response="1.66526e-14" gain="290.877" reg="2.1699">
+ <Node feature="F1" value="0.0634921" id="N1_2" nSamples="16398" response="-0.0237835" gain="191.337" reg="1.30842">
+ <Response value="0.4755052" id="T1_1" nSamples="738" gain="50.8552" reg="1.41831"/>
+ <Node feature="F111" value="8765.0000000" id="N1_3" nSamples="15660" response="-0.0473132" gain="89.9259" reg="1.60382">
+ <Response value="-0.0572274" id="T1_2" nSamples="15401" gain="104.393" reg="1.01145"/>
+ <Response value="0.5422220" id="T1_3" nSamples="259" gain="44.8483" reg="2.05604"/>
+ </Node>
+ </Node>
+ <Node feature="F114" value="55.0000000" id="N1_4" nSamples="536" response="0.727615" gain="132.057" reg="3.78796">
+ <Response value="-0.2409815" id="T1_4" nSamples="114" gain="18.8198" reg="2.07042"/>
+ <Node feature="F54" value="0.5500000" id="N1_5" nSamples="422" response="0.989273" gain="101.767" reg="3.02428">
+ <Response value="0.2211539" id="T1_5" nSamples="125" gain="27.8663" reg="4.24205"/>
+ <Response value="1.3125561" id="T1_6" nSamples="297" gain="51.2473" reg="7.68835"/>
+ </Node>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="2" shrinkage="1">
+ <Node feature="F4" value="0.6972222" id="N2_1" nSamples="16934" response="1.54893e-15" gain="108.114" reg="0.714474">
+ <Node feature="F3" value="0.9285715" id="N2_2" nSamples="15850" response="0.0209649" gain="119.238" reg="1.32652">
+ <Node feature="F8" value="0.0540936" id="N2_3" nSamples="15398" response="0.00602205" gain="65.8719" reg="0.729996">
+ <Response value="-0.0076290" id="T2_1" nSamples="14762" gain="40.047" reg="0.938362"/>
+ <Response value="0.3228730" id="T2_2" nSamples="636" gain="29.0566" reg="1.61032"/>
+ </Node>
+ <Node feature="F1" value="0.8166667" id="N2_4" nSamples="452" response="0.530013" gain="58.4604" reg="1.72187">
+ <Response value="0.8435790" id="T2_3" nSamples="260" gain="19.8011" reg="1.54592"/>
+ <Response value="0.1053924" id="T2_4" nSamples="192" gain="34.1021" reg="2.12077"/>
+ </Node>
+ </Node>
+ <Node feature="F4" value="0.7619048" id="N2_5" nSamples="1084" response="-0.306544" gain="86.884" reg="1.26721">
+ <Response value="-0.5500016" id="T2_5" nSamples="627" gain="36.2072" reg="1.06408"/>
+ <Response value="0.0274784" id="T2_6" nSamples="457" gain="33.0332" reg="1.18481"/>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="3" shrinkage="1">
+ <Node feature="F74" value="0.8750000" id="N3_1" nSamples="16934" response="5.72141e-15" gain="61.8637" reg="0.467865">
+ <Node feature="F54" value="0.8452381" id="N3_2" nSamples="15666" response="0.0172606" gain="78.5182" reg="0.556383">
+ <Response value="-0.0031926" id="T3_1" nSamples="14467" gain="19.9349" reg="0.881707"/>
+ <Node feature="F111" value="141.5000000" id="N3_3" nSamples="1199" response="0.264046" gain="173.465" reg="1.79504">
+ <Response value="-0.1402742" id="T3_2" nSamples="566" gain="25.2057" reg="1.55658"/>
+ <Node feature="F4" value="0.5871212" id="N3_4" nSamples="633" response="0.62557" gain="142.99" reg="2.61286">
+ <Response value="1.2691849" id="T3_3" nSamples="226" gain="14.003" reg="6.26018"/>
+ <Response value="0.2681826" id="T3_4" nSamples="407" gain="36.74" reg="1.35318"/>
+ </Node>
+ </Node>
+ </Node>
+ <Node feature="F111" value="1105.0000000" id="N3_5" nSamples="1268" response="-0.213252" gain="99.4561" reg="1.62725">
+ <Response value="-0.0588169" id="T3_5" nSamples="976" gain="31.0582" reg="0.806895"/>
+ <Response value="-0.7294473" id="T3_6" nSamples="292" gain="23.201" reg="2.37482"/>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="4" shrinkage="1">
+ <Node feature="F1" value="0.7619048" id="N4_1" nSamples="16934" response="-5.46253e-15" gain="21.0627" reg="0.287539">
+ <Response value="0.0089472" id="T4_1" nSamples="15923" gain="19.015" reg="0.147787"/>
+ <Node feature="F3" value="0.9285715" id="N4_2" nSamples="1011" response="-0.140916" gain="39.0598" reg="0.83888">
+ <Node feature="F114" value="36.5000000" id="N4_3" nSamples="391" response="-0.391072" gain="57.2061" reg="2.63101">
+ <Response value="-1.1389426" id="T4_2" nSamples="84" gain="0.787752" reg="0.464762"/>
+ <Node feature="F97" value="0.0468557" id="N4_4" nSamples="307" response="-0.186443" gain="41.6641" reg="2.24799">
+ <Node feature="F6" value="0.5357143" id="N4_5" nSamples="228" response="0.0361799" gain="28.332" reg="1.89184">
+ <Response value="0.5614127" id="T4_3" nSamples="74" gain="8.47267" reg="4.30523"/>
+ <Response value="-0.2162048" id="T4_4" nSamples="154" gain="15.6327" reg="2.44277"/>
+ </Node>
+ <Response value="-0.8289478" id="T4_5" nSamples="79" gain="10.0783" reg="4.35689"/>
+ </Node>
+ </Node>
+ <Response value="0.0168442" id="T4_6" nSamples="620" gain="27.2567" reg="1.42134"/>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="5" shrinkage="1">
+ <Node feature="F1" value="0.6583333" id="N5_1" nSamples="16934" response="-1.18724e-14" gain="15.8033" reg="0.121385">
+ <Response value="-0.0187975" id="T5_1" nSamples="12309" gain="8.07967" reg="0.419129"/>
+ <Node feature="F74" value="0.2104235" id="N5_2" nSamples="4625" response="0.0500277" gain="14.0121" reg="0.318388">
+ <Response value="0.1951745" id="T5_2" nSamples="593" gain="19.6837" reg="1.436"/>
+ <Node feature="F68" value="0.8158333" id="N5_3" nSamples="4032" response="0.0286805" gain="20.4058" reg="0.799386">
+ <Node feature="F68" value="0.7616667" id="N5_4" nSamples="175" response="-0.31178" gain="59.9989" reg="7.53081">
+ <Response value="-0.0701389" id="T5_3" nSamples="152" gain="9.57811" reg="1.19009"/>
+ <Response value="-1.9087110" id="T5_4" nSamples="23" gain="8.7892" reg="5.88041"/>
+ </Node>
+ <Node feature="F91" value="0.9516667" id="N5_5" nSamples="3857" response="0.0441279" gain="21.9203" reg="0.555284">
+ <Response value="0.2880719" id="T5_5" nSamples="344" gain="13.4963" reg="1.14124"/>
+ <Response value="0.0202404" id="T5_6" nSamples="3513" gain="6.85785" reg="0.207486"/>
+ </Node>
+ </Node>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="6" shrinkage="1">
+ <Node feature="F97" value="0.0104738" id="N6_1" nSamples="16934" response="-1.18431e-14" gain="15.1028" reg="0.507954">
+ <Node feature="F4" value="0.6833333" id="N6_2" nSamples="269" response="-0.238978" gain="15.3919" reg="1.49284">
+ <Response value="-0.1119661" id="T6_1" nSamples="214" gain="6.1687" reg="0.93026"/>
+ <Response value="-0.7331711" id="T6_2" nSamples="55" gain="2.18312" reg="1.22751"/>
+ </Node>
+ <Node feature="F111" value="1.5000000" id="N6_3" nSamples="16665" response="0.00385749" gain="12.0175" reg="0.117568">
+ <Response value="-0.0487729" id="T6_3" nSamples="3469" gain="4.92313" reg="0.182471"/>
+ <Node feature="F54" value="0.0294118" id="N6_4" nSamples="13196" response="0.0176931" gain="19.9028" reg="0.279545">
+ <Node feature="F6" value="0.2250000" id="N6_5" nSamples="1017" response="0.153028" gain="20.5404" reg="0.562946">
+ <Response value="0.3140816" id="T6_4" nSamples="452" gain="19.4676" reg="2.06914"/>
+ <Response value="0.0241852" id="T6_5" nSamples="565" gain="13.4265" reg="3.16515"/>
+ </Node>
+ <Response value="0.0063921" id="T6_6" nSamples="12179" gain="13.2817" reg="0.260959"/>
+ </Node>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="7" shrinkage="1">
+ <Node feature="F8" value="0.0488095" id="N7_1" nSamples="16934" response="-2.39427e-15" gain="10.855" reg="0.215065">
+ <Node feature="F97" value="0.0196587" id="N7_2" nSamples="16052" response="0.00599328" gain="9.7203" reg="0.100778">
+ <Response value="-0.0373170" id="T7_1" nSamples="3948" gain="9.68833" reg="0.846037"/>
+ <Node feature="F4" value="0.5527778" id="N7_3" nSamples="12104" response="0.0201199" gain="10.2801" reg="0.154998">
+ <Response value="0.0085123" id="T7_2" nSamples="10468" gain="23.4724" reg="1.54228"/>
+ <Node feature="F111" value="4064.5000000" id="N7_4" nSamples="1636" response="0.0943918" gain="25.4176" reg="0.859279">
+ <Node feature="F111" value="109.5000000" id="N7_5" nSamples="1438" response="0.0473647" gain="50.7234" reg="0.785363">
+ <Response value="0.2020749" id="T7_3" nSamples="862" gain="33.8131" reg="0.904055"/>
+ <Response value="-0.1841633" id="T7_4" nSamples="576" gain="30.0832" reg="1.48082"/>
+ </Node>
+ <Response value="0.4359319" id="T7_5" nSamples="198" gain="7.05975" reg="0.81405"/>
+ </Node>
+ </Node>
+ </Node>
+ <Response value="-0.1090751" id="T7_6" nSamples="882" gain="9.46835" reg="0.412835"/>
+ </Node>
+ </Tree>
+ <Tree id="8" shrinkage="1">
+ <Node feature="F111" value="7801.5000000" id="N8_1" nSamples="16934" response="2.53029e-16" gain="20.228" reg="0.496475">
+ <Response value="0.0052430" id="T8_1" nSamples="16562" gain="8.78442" reg="0.509431"/>
+ <Node feature="F4" value="0.5444444" id="N8_2" nSamples="372" response="-0.233425" gain="14.0777" reg="0.814657">
+ <Response value="-0.4434354" id="T8_2" nSamples="177" gain="5.5993" reg="0.886644"/>
+ <Node feature="F4" value="0.7250000" id="N8_3" nSamples="195" response="-0.0427999" gain="11.9581" reg="1.1046">
+ <Node feature="F111" value="86382.5000000" id="N8_4" nSamples="94" response="0.225484" gain="11.99" reg="8.76417">
+ <Node feature="F77" value="0.0250039" id="N8_5" nSamples="88" response="0.348178" gain="10.4609" reg="2.12667">
+ <Response value="0.9485625" id="T8_3" nSamples="25" gain="3.76835" reg="5.05517"/>
+ <Response value="0.1099304" id="T8_4" nSamples="63" gain="7.71866" reg="4.5633"/>
+ </Node>
+ <Response value="-1.5740248" id="T8_5" nSamples="6" gain="0" reg="0"/>
+ </Node>
+ <Response value="-0.2924902" id="T8_6" nSamples="101" gain="5.49883" reg="1.23572"/>
+ </Node>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="9" shrinkage="1">
+ <Node feature="F4" value="0.9270834" id="N9_1" nSamples="16934" response="-2.33411e-14" gain="8.63368" reg="0.442739">
+ <Node feature="F1" value="0.8166667" id="N9_2" nSamples="16737" response="-0.00251172" gain="11.5881" reg="0.238416">
+ <Response value="0.0033574" id="T9_1" nSamples="15959" gain="8.64667" reg="0.243896"/>
+ <Node feature="F4" value="0.7071428" id="N9_3" nSamples="778" response="-0.122905" gain="15.9527" reg="0.575741">
+ <Response value="-0.2470163" id="T9_2" nSamples="451" gain="12.678" reg="0.973375"/>
+ <Response value="0.0482702" id="T9_3" nSamples="327" gain="31.7349" reg="2.28499"/>
+ </Node>
+ </Node>
+ <Node feature="F54" value="0.5833334" id="N9_4" nSamples="197" response="0.213395" gain="16.1909" reg="1.92764">
+ <Response value="0.8142192" id="T9_4" nSamples="40" gain="7.39547" reg="3.12529"/>
+ <Node feature="F1" value="0.9500000" id="N9_5" nSamples="157" response="0.0603181" gain="13.0832" reg="4.4261">
+ <Response value="1.2211719" id="T9_5" nSamples="12" gain="0" reg="0"/>
+ <Response value="-0.0357525" id="T9_6" nSamples="145" gain="9.9023" reg="1.48019"/>
+ </Node>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="10" shrinkage="1">
+ <Node feature="F113" value="37.5050011" id="N10_1" nSamples="16934" response="2.09292e-14" gain="9.56962" reg="1.59045">
+ <Node feature="F111" value="252.5000000" id="N10_2" nSamples="16902" response="0.00111702" gain="8.25835" reg="0.0924086">
+ <Response value="-0.0110506" id="T10_1" nSamples="13005" gain="10.0378" reg="0.228437"/>
+ <Node feature="F4" value="0.6937500" id="N10_3" nSamples="3897" response="0.0417224" gain="19.189" reg="0.389404">
+ <Node feature="F5" value="0.9000000" id="N10_4" nSamples="3323" response="0.0711812" gain="23.5288" reg="0.77517">
+ <Response value="0.0488562" id="T10_2" nSamples="3111" gain="20.1009" reg="0.430781"/>
+ <Response value="0.3987899" id="T10_3" nSamples="212" gain="19.0266" reg="1.75955"/>
+ </Node>
+ <Node feature="F74" value="0.7500000" id="N10_5" nSamples="574" response="-0.12882" gain="22.6731" reg="1.45348">
+ <Response value="-0.2113237" id="T10_4" nSamples="494" gain="17.8518" reg="0.946744"/>
+ <Response value="0.3806402" id="T10_5" nSamples="80" gain="8.97513" reg="1.8265"/>
+ </Node>
+ </Node>
+ </Node>
+ <Response value="-0.5899943" id="T10_6" nSamples="32" gain="3.25699" reg="1.90709"/>
+ </Node>
+ </Tree>
+ <Tree id="11" shrinkage="1">
+ <Node feature="F3" value="0.4365079" id="N11_1" nSamples="16934" response="2.1823e-15" gain="10.3536" reg="0.0892519">
+ <Response value="-0.0192181" id="T11_1" nSamples="10591" gain="5.39159" reg="0.134103"/>
+ <Node feature="F77" value="0.1715686" id="N11_2" nSamples="6343" response="0.0320887" gain="10.6477" reg="2.13695">
+ <Node feature="F111" value="1187.5000000" id="N11_3" nSamples="6319" response="0.0293219" gain="8.87468" reg="0.227924">
+ <Response value="0.0161420" id="T11_2" nSamples="5639" gain="8.60436" reg="0.206594"/>
+ <Node feature="F112" value="467.5000000" id="N11_4" nSamples="680" response="0.138618" gain="12.1983" reg="0.554692">
+ <Node feature="F68" value="0.8550000" id="N11_5" nSamples="249" response="0.318791" gain="13.0127" reg="2.04041">
+ <Response value="0.9831077" id="T11_3" nSamples="30" gain="5.2409" reg="2.63663"/>
+ <Response value="0.2277890" id="T11_4" nSamples="219" gain="7.42359" reg="0.873513"/>
+ </Node>
+ <Response value="0.0345274" id="T11_5" nSamples="431" gain="4.9666" reg="2.03851"/>
+ </Node>
+ </Node>
+ <Response value="0.7605657" id="T11_6" nSamples="24" gain="8.10877" reg="4.57014"/>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="12" shrinkage="1">
+ <Node feature="F5" value="0.6125000" id="N12_1" nSamples="16934" response="1.30276e-14" gain="6.79558" reg="0.109105">
+ <Node feature="F4" value="0.7928572" id="N12_2" nSamples="14850" response="0.00756446" gain="23.3851" reg="7.61205">
+ <Response value="0.0063205" id="T12_1" nSamples="14839" gain="5.62744" reg="0.7077"/>
+ <Response value="1.6856100" id="T12_2" nSamples="11" gain="5.61032" reg="8.07894"/>
+ </Node>
+ <Node feature="F113" value="1.6900001" id="N12_3" nSamples="2084" response="-0.0539022" gain="9.88859" reg="0.345626">
+ <Node feature="F113" value="1.6350000" id="N12_4" nSamples="1702" response="-0.0207028" gain="9.3609" reg="4.28011">
+ <Response value="-0.0275853" id="T12_3" nSamples="1692" gain="5.95979" reg="0.487675"/>
+ <Response value="1.1438084" id="T12_4" nSamples="10" gain="1.70629" reg="5.68187"/>
+ </Node>
+ <Node feature="F97" value="0.0363399" id="N12_5" nSamples="382" response="-0.201822" gain="6.00279" reg="0.503405">
+ <Response value="-0.0843354" id="T12_5" nSamples="211" gain="7.1092" reg="3.50362"/>
+ <Response value="-0.3467910" id="T12_6" nSamples="171" gain="6.76865" reg="0.887804"/>
+ </Node>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="13" shrinkage="1">
+ <Node feature="F8" value="0.1396104" id="N13_1" nSamples="16934" response="3.23757e-14" gain="3.72465" reg="0.481389">
+ <Response value="-0.0010790" id="T13_1" nSamples="16855" gain="2.96631" reg="0.0462446"/>
+ <Node feature="F54" value="0.5500000" id="N13_2" nSamples="79" response="0.230201" gain="7.54681" reg="1.54017">
+ <Node feature="F111" value="513.5000000" id="N13_3" nSamples="38" response="0.582488" gain="9.03315" reg="4.50984">
+ <Node feature="F77" value="0.0380987" id="N13_4" nSamples="29" response="0.249914" gain="4.40913" reg="2.7969">
+ <Response value="-0.1117221" id="T13_2" nSamples="19" gain="0.0309543" reg="2.26759"/>
+ <Response value="0.9370234" id="T13_3" nSamples="10" gain="3.44156" reg="7.77767"/>
+ </Node>
+ <Response value="1.6541140" id="T13_4" nSamples="9" gain="0.911121" reg="5.10972"/>
+ </Node>
+ <Node feature="F113" value="1.0700001" id="N13_5" nSamples="41" response="-0.0963084" gain="4.49083" reg="3.73645">
+ <Response value="0.1069487" id="T13_5" nSamples="34" gain="1.03851" reg="1.04634"/>
+ <Response value="-1.0835573" id="T13_6" nSamples="7" gain="0" reg="0"/>
+ </Node>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="14" shrinkage="1">
+ <Node feature="F6" value="0.7321429" id="N14_1" nSamples="16934" response="8.23453e-15" gain="3.2952" reg="0.113837">
+ <Response value="0.0033418" id="T14_1" nSamples="16044" gain="3.59967" reg="0.101123"/>
+ <Node feature="F111" value="74.5000000" id="N14_2" nSamples="890" response="-0.0602418" gain="11.1643" reg="0.431465">
+ <Node feature="F4" value="0.6708333" id="N14_3" nSamples="422" response="0.0599632" gain="31.2494" reg="1.33569">
+ <Node feature="F1" value="0.5435606" id="N14_4" nSamples="138" response="-0.33867" gain="10.5062" reg="2.9291">
+ <Response value="0.5229282" id="T14_2" nSamples="16" gain="1.32068" reg="2.50445"/>
+ <Response value="-0.4516660" id="T14_3" nSamples="122" gain="4.40994" reg="1.48527"/>
+ </Node>
+ <Response value="0.2536650" id="T14_4" nSamples="284" gain="9.82795" reg="1.11729"/>
+ </Node>
+ <Node feature="F113" value="2.4700000" id="N14_5" nSamples="468" response="-0.168632" gain="10.4781" reg="1.13687">
+ <Response value="-0.2267124" id="T14_5" nSamples="412" gain="8.73493" reg="0.640413"/>
+ <Response value="0.2586769" id="T14_6" nSamples="56" gain="7.37561" reg="2.42814"/>
+ </Node>
+ </Node>
+ </Node>
+ </Tree>
+ </Forest>
+ </DecisionTree>
+</MlrFunction>
diff --git a/searchlib/src/test/files/gbdt.xml b/searchlib/src/test/files/gbdt.xml
new file mode 100644
index 00000000000..76e64b129f3
--- /dev/null
+++ b/searchlib/src/test/files/gbdt.xml
@@ -0,0 +1,614 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<MlrFunction name="CHANGE THIS NUMBER" featuredef="mlrfeaturedefs.xml" version="1.0">
+
+ <!-- ADD SCORE STANDARDIZATION OR CALIBRATION HERE -->
+
+ <DecisionTree loss="regression">
+
+ <!-- ADD EARLY EXIT HERE -->
+
+ <Forest>
+ <Tree id="0">
+ <Node feature="F55" value="2.0932798" id="N0_1">
+ <Node feature="F42" value="1.7252731" id="N0_2">
+ <Node feature="F33" value="0.5000000" id="N0_3">
+ <Node feature="F38" value="1.5367546" id="N0_4">
+ <Response value="1.7333333" id="T0_1"/>
+ <Response value="1.3255814" id="T0_2"/>
+ </Node>
+ <Node feature="F37" value="0.6759220" id="N0_5">
+ <Response value="1.9014085" id="T0_3"/>
+ <Response value="1.0000000" id="T0_4"/>
+ </Node>
+ </Node>
+ <Node feature="F109" value="0.5000000" id="N0_6">
+ <Node feature="F116" value="5.2500000" id="N0_7">
+ <Node feature="F111" value="0.0521445" id="N0_8">
+ <Response value="1.0000000" id="T0_5"/>
+ <Response value="1.9090909" id="T0_6"/>
+ </Node>
+ <Node feature="F38" value="4.0740733" id="N0_9">
+ <Response value="0.8000000" id="T0_7"/>
+ <Node feature="F38" value="6.6152048" id="N0_10">
+ <Response value="1.7142857" id="T0_8"/>
+ <Response value="0.6250000" id="T0_9"/>
+ </Node>
+ </Node>
+ </Node>
+ <Response value="1.5945946" id="T0_10"/>
+ </Node>
+ </Node>
+ <Node feature="F109" value="0.5000000" id="N0_11">
+ <Node feature="F113" value="0.7835808" id="N0_12">
+ <Node feature="F110" value="491.0000000" id="N0_13">
+ <Node feature="F56" value="2.5423126" id="N0_14">
+ <Node feature="F108" value="243.5000000" id="N0_15">
+ <Response value="1.3750000" id="T0_11"/>
+ <Response value="0.7800000" id="T0_12"/>
+ </Node>
+ <Response value="0.5000000" id="T0_13"/>
+ </Node>
+ <Response value="2.0000000" id="T0_14"/>
+ </Node>
+ <Node feature="F103" value="0.9918365" id="N0_16">
+ <Response value="1.6000000" id="T0_15"/>
+ <Response value="0.3333333" id="T0_16"/>
+ </Node>
+ </Node>
+ <Node feature="F59" value="0.9207000" id="N0_17">
+ <Node feature="F30" value="0.8600000" id="N0_18">
+ <Response value="1.5890411" id="T0_17"/>
+ <Response value="0.6250000" id="T0_18"/>
+ </Node>
+ <Node feature="F100" value="5.9548216" id="N0_19">
+ <Response value="1.0000000" id="T0_19"/>
+ <Response value="0.0000000" id="T0_20"/>
+ </Node>
+ </Node>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="1">
+ <Node feature="F55" value="59.5480576" id="N1_1">
+ <Node feature="F42" value="1.8308522" id="N1_2">
+ <Node feature="F100" value="5.9549484" id="N1_3">
+ <Node feature="F107" value="0.5000000" id="N1_4">
+ <Response value="-0.3406279" id="T1_1"/>
+ <Node feature="F56" value="1.7057916" id="N1_5">
+ <Node feature="F36" value="3.7782850" id="N1_6">
+ <Node feature="F103" value="0.5600199" id="N1_7">
+ <Response value="0.0471080" id="T1_2"/>
+ <Node feature="F36" value="1.2203553" id="N1_8">
+ <Node feature="F102" value="1.5000000" id="N1_9">
+ <Response value="0.0460316" id="T1_3"/>
+ <Response value="-0.4737940" id="T1_4"/>
+ </Node>
+ <Response value="-0.9825869" id="T1_5"/>
+ </Node>
+ </Node>
+ <Response value="-0.8848045" id="T1_6"/>
+ </Node>
+ <Node feature="F47" value="15.5000000" id="N1_10">
+ <Response value="0.3480470" id="T1_7"/>
+ <Response value="-1.0890411" id="T1_8"/>
+ </Node>
+ </Node>
+ </Node>
+ <Response value="1.7500000" id="T1_9"/>
+ </Node>
+ <Node feature="F113" value="0.8389627" id="N1_11">
+ <Node feature="F110" value="7.5000000" id="N1_12">
+ <Response value="-0.5778378" id="T1_10"/>
+ <Node feature="F111" value="0.8596972" id="N1_13">
+ <Node feature="F114" value="831.5000000" id="N1_14">
+ <Node feature="F113" value="0.3807178" id="N1_15">
+ <Response value="0.0497646" id="T1_11"/>
+ <Node feature="F110" value="63.0000000" id="N1_16">
+ <Response value="0.6549377" id="T1_12"/>
+ <Response value="0.2486999" id="T1_13"/>
+ </Node>
+ </Node>
+ <Node feature="F39" value="8.9685574" id="N1_17">
+ <Response value="0.3222195" id="T1_14"/>
+ <Response value="-0.1690968" id="T1_15"/>
+ </Node>
+ </Node>
+ <Response value="1.0381818" id="T1_16"/>
+ </Node>
+ </Node>
+ <Node feature="F58" value="0.8897630" id="N1_18">
+ <Response value="-0.0702703" id="T1_17"/>
+ <Response value="-1.6000000" id="T1_18"/>
+ </Node>
+ </Node>
+ </Node>
+ <Node feature="F102" value="3.5000000" id="N1_19">
+ <Response value="-0.3059684" id="T1_19"/>
+ <Response value="-1.5890411" id="T1_20"/>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="2">
+ <Node feature="F55" value="119.6311035" id="N2_1">
+ <Node feature="F55" value="90.8958130" id="N2_2">
+ <Node feature="F39" value="12.1622820" id="N2_3">
+ <Node feature="F35" value="1.1213787" id="N2_4">
+ <Node feature="F55" value="34.9389648" id="N2_5">
+ <Node feature="F45" value="3.5000000" id="N2_6">
+ <Node feature="F51" value="0.0502058" id="N2_7">
+ <Node feature="F103" value="0.8550526" id="N2_8">
+ <Node feature="F55" value="4.9680400" id="N2_9">
+ <Response value="0.0485190" id="T2_1"/>
+ <Response value="0.6596588" id="T2_2"/>
+ </Node>
+ <Node feature="F38" value="1.3808891" id="N2_10">
+ <Response value="-0.7416763" id="T2_3"/>
+ <Response value="0.0176633" id="T2_4"/>
+ </Node>
+ </Node>
+ <Response value="0.4502234" id="T2_5"/>
+ </Node>
+ <Response value="-0.6811898" id="T2_6"/>
+ </Node>
+ <Response value="0.5572351" id="T2_7"/>
+ </Node>
+ <Node feature="F100" value="3.3971992" id="N2_11">
+ <Node feature="F39" value="7.0869236" id="N2_12">
+ <Node feature="F43" value="5.5100875" id="N2_13">
+ <Node feature="F46" value="4.5000000" id="N2_14">
+ <Response value="-0.1702421" id="T2_8"/>
+ <Response value="-0.9797453" id="T2_9"/>
+ </Node>
+ <Response value="-1.5426025" id="T2_10"/>
+ </Node>
+ <Response value="0.0774408" id="T2_11"/>
+ </Node>
+ <Node feature="F52" value="22.3562355" id="N2_15">
+ <Node feature="F35" value="4.4263992" id="N2_16">
+ <Response value="0.4011598" id="T2_12"/>
+ <Response value="-0.3898472" id="T2_13"/>
+ </Node>
+ <Response value="-1.7500000" id="T2_14"/>
+ </Node>
+ </Node>
+ </Node>
+ <Node feature="F39" value="14.5762558" id="N2_17">
+ <Node feature="F109" value="0.5000000" id="N2_18">
+ <Response value="1.6616928" id="T2_15"/>
+ <Response value="0.4001626" id="T2_16"/>
+ </Node>
+ <Node feature="F100" value="3.0519419" id="N2_19">
+ <Response value="0.6164910" id="T2_17"/>
+ <Response value="-0.1808479" id="T2_18"/>
+ </Node>
+ </Node>
+ </Node>
+ <Response value="-1.2135522" id="T2_19"/>
+ </Node>
+ <Response value="0.5535716" id="T2_20"/>
+ </Node>
+ </Tree>
+ <Tree id="3">
+ <Node feature="F43" value="9.2721510" id="N3_1">
+ <Node feature="F36" value="9.0613861" id="N3_2">
+ <Node feature="F115" value="36.5000000" id="N3_3">
+ <Node feature="F34" value="1.4407213" id="N3_4">
+ <Node feature="F41" value="10.4713802" id="N3_5">
+ <Node feature="F34" value="1.2610778" id="N3_6">
+ <Node feature="F105" value="8.2159586" id="N3_7">
+ <Node feature="F46" value="88.5000000" id="N3_8">
+ <Response value="0.0075843" id="T3_1"/>
+ <Response value="-0.6358738" id="T3_2"/>
+ </Node>
+ <Node feature="F105" value="9.5308332" id="N3_9">
+ <Response value="1.4464284" id="T3_3"/>
+ <Response value="-0.0895592" id="T3_4"/>
+ </Node>
+ </Node>
+ <Response value="0.3532708" id="T3_5"/>
+ </Node>
+ <Response value="-1.8289603" id="T3_6"/>
+ </Node>
+ <Node feature="F45" value="24.5000000" id="N3_10">
+ <Node feature="F111" value="0.9095335" id="N3_11">
+ <Node feature="F113" value="0.0529755" id="N3_12">
+ <Response value="-0.6272416" id="T3_7"/>
+ <Node feature="F50" value="34.2163391" id="N3_13">
+ <Node feature="F113" value="0.0813664" id="N3_14">
+ <Response value="0.3683843" id="T3_8"/>
+ <Node feature="F34" value="1.6283135" id="N3_15">
+ <Response value="-0.6334628" id="T3_9"/>
+ <Response value="-0.1610307" id="T3_10"/>
+ </Node>
+ </Node>
+ <Response value="1.5559684" id="T3_11"/>
+ </Node>
+ </Node>
+ <Response value="-1.7492068" id="T3_12"/>
+ </Node>
+ <Response value="1.5060212" id="T3_13"/>
+ </Node>
+ </Node>
+ <Node feature="F49" value="23.5787125" id="N3_16">
+ <Node feature="F100" value="6.5115452" id="N3_17">
+ <Node feature="F37" value="0.8601408" id="N3_18">
+ <Node feature="F57" value="6.5000000" id="N3_19">
+ <Response value="0.0547747" id="T3_14"/>
+ <Response value="1.1933460" id="T3_15"/>
+ </Node>
+ <Response value="0.6402962" id="T3_16"/>
+ </Node>
+ <Response value="1.7395205" id="T3_17"/>
+ </Node>
+ <Response value="2.5559684" id="T3_18"/>
+ </Node>
+ </Node>
+ <Response value="-3.1016318" id="T3_19"/>
+ </Node>
+ <Response value="1.8657542" id="T3_20"/>
+ </Node>
+ </Tree>
+ <Tree id="4">
+ <Node feature="F55" value="764.9404297" id="N4_1">
+ <Node feature="F34" value="23.2379246" id="N4_2">
+ <Node feature="F36" value="9.2296076" id="N4_3">
+ <Node feature="F114" value="116.0000000" id="N4_4">
+ <Node feature="F108" value="13.5000000" id="N4_5">
+ <Node feature="F108" value="12.5000000" id="N4_6">
+ <Response value="-0.2736142" id="T4_1"/>
+ <Response value="-1.7384173" id="T4_2"/>
+ </Node>
+ <Node feature="F110" value="10.5000000" id="N4_7">
+ <Response value="0.0794336" id="T4_3"/>
+ <Response value="-0.2171646" id="T4_4"/>
+ </Node>
+ </Node>
+ <Node feature="F114" value="129.0000000" id="N4_8">
+ <Node feature="F109" value="0.5000000" id="N4_9">
+ <Response value="1.4407836" id="T4_5"/>
+ <Response value="-0.1458547" id="T4_6"/>
+ </Node>
+ <Node feature="F111" value="0.9703438" id="N4_10">
+ <Node feature="F47" value="18.5000000" id="N4_11">
+ <Node feature="F32" value="3.5000000" id="N4_12">
+ <Response value="0.0708936" id="T4_7"/>
+ <Node feature="F118" value="0.6794872" id="N4_13">
+ <Node feature="F119" value="3.8533711" id="N4_14">
+ <Node feature="F34" value="0.1213822" id="N4_15">
+ <Response value="-2.0046196" id="T4_8"/>
+ <Response value="-0.0008566" id="T4_9"/>
+ </Node>
+ <Response value="-0.9490828" id="T4_10"/>
+ </Node>
+ <Response value="0.0790339" id="T4_11"/>
+ </Node>
+ </Node>
+ <Node feature="F113" value="0.3637481" id="N4_16">
+ <Response value="0.1161088" id="T4_12"/>
+ <Response value="-0.9997786" id="T4_13"/>
+ </Node>
+ </Node>
+ <Response value="1.3003114" id="T4_14"/>
+ </Node>
+ </Node>
+ </Node>
+ <Node feature="F111" value="0.2438112" id="N4_17">
+ <Response value="-2.0582902" id="T4_15"/>
+ <Response value="0.6918949" id="T4_16"/>
+ </Node>
+ </Node>
+ <Node feature="F115" value="95.0000000" id="N4_18">
+ <Response value="-2.8602383" id="T4_17"/>
+ <Response value="-0.0063699" id="T4_18"/>
+ </Node>
+ </Node>
+ <Node feature="F101" value="0.9411763" id="N4_19">
+ <Response value="-2.0253283" id="T4_19"/>
+ <Response value="-0.6417007" id="T4_20"/>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="5">
+ <Node feature="F114" value="516.0000000" id="N5_1">
+ <Node feature="F49" value="8.9197922" id="N5_2">
+ <Node feature="F48" value="3.5000000" id="N5_3">
+ <Node feature="F36" value="1.3889931" id="N5_4">
+ <Node feature="F43" value="0.9699799" id="N5_5">
+ <Node feature="F34" value="9.6113167" id="N5_6">
+ <Node feature="F106" value="8.5000000" id="N5_7">
+ <Node feature="F108" value="153.5000000" id="N5_8">
+ <Node feature="F110" value="130.5000000" id="N5_9">
+ <Response value="0.1802420" id="T5_1"/>
+ <Response value="2.5451630" id="T5_2"/>
+ </Node>
+ <Node feature="F108" value="161.5000000" id="N5_10">
+ <Response value="-2.2253985" id="T5_3"/>
+ <Node feature="F55" value="31.4965668" id="N5_11">
+ <Response value="-0.0122572" id="T5_4"/>
+ <Response value="0.7364454" id="T5_5"/>
+ </Node>
+ </Node>
+ </Node>
+ <Response value="-0.2596613" id="T5_6"/>
+ </Node>
+ <Response value="0.7247348" id="T5_7"/>
+ </Node>
+ <Node feature="F111" value="0.2817393" id="N5_12">
+ <Response value="-0.6409092" id="T5_8"/>
+ <Response value="0.2100071" id="T5_9"/>
+ </Node>
+ </Node>
+ <Node feature="F116" value="18.7500000" id="N5_13">
+ <Response value="0.5113520" id="T5_10"/>
+ <Response value="-0.1093323" id="T5_11"/>
+ </Node>
+ </Node>
+ <Response value="0.9379161" id="T5_12"/>
+ </Node>
+ <Response value="0.3603908" id="T5_13"/>
+ </Node>
+ <Node feature="F46" value="32.5000000" id="N5_14">
+ <Node feature="F46" value="5.5000000" id="N5_15">
+ <Node feature="F39" value="11.7440758" id="N5_16">
+ <Node feature="F115" value="774.0000000" id="N5_17">
+ <Response value="-0.0433343" id="T5_14"/>
+ <Response value="-1.7439904" id="T5_15"/>
+ </Node>
+ <Response value="-0.3662575" id="T5_16"/>
+ </Node>
+ <Response value="0.5413771" id="T5_17"/>
+ </Node>
+ <Node feature="F110" value="67.0000000" id="N5_18">
+ <Node feature="F46" value="34.5000000" id="N5_19">
+ <Response value="-2.6581287" id="T5_18"/>
+ <Response value="-0.9399502" id="T5_19"/>
+ </Node>
+ <Response value="0.0756640" id="T5_20"/>
+ </Node>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="6">
+ <Node feature="F42" value="24.3080139" id="N6_1">
+ <Node feature="F118" value="0.8452381" id="N6_2">
+ <Node feature="F119" value="6.2847767" id="N6_3">
+ <Node feature="F100" value="3.2778931" id="N6_4">
+ <Node feature="F46" value="30.0000000" id="N6_5">
+ <Node feature="F43" value="1.2712233" id="N6_6">
+ <Node feature="F104" value="3.5000000" id="N6_7">
+ <Response value="0.1365837" id="T6_1"/>
+ <Response value="0.5592712" id="T6_2"/>
+ </Node>
+ <Node feature="F39" value="0.6294491" id="N6_8">
+ <Response value="-0.8729556" id="T6_3"/>
+ <Response value="-0.0123421" id="T6_4"/>
+ </Node>
+ </Node>
+ <Response value="3.7677864" id="T6_5"/>
+ </Node>
+ <Node feature="F111" value="0.6580936" id="N6_9">
+ <Node feature="F103" value="0.9319581" id="N6_10">
+ <Response value="-0.2822538" id="T6_6"/>
+ <Node feature="F107" value="1.5000000" id="N6_11">
+ <Response value="-0.3983539" id="T6_7"/>
+ <Node feature="F104" value="5.5000000" id="N6_12">
+ <Response value="0.0792465" id="T6_8"/>
+ <Response value="0.7273864" id="T6_9"/>
+ </Node>
+ </Node>
+ </Node>
+ <Node feature="F104" value="3.5000000" id="N6_13">
+ <Response value="-1.1550477" id="T6_10"/>
+ <Response value="0.0490706" id="T6_11"/>
+ </Node>
+ </Node>
+ </Node>
+ <Response value="1.4735778" id="T6_12"/>
+ </Node>
+ <Node feature="F111" value="0.3724709" id="N6_14">
+ <Node feature="F51" value="16.0989189" id="N6_15">
+ <Node feature="F114" value="154.0000000" id="N6_16">
+ <Node feature="F108" value="57.5000000" id="N6_17">
+ <Response value="-0.0675733" id="T6_13"/>
+ <Response value="-0.3994327" id="T6_14"/>
+ </Node>
+ <Response value="-0.0250285" id="T6_15"/>
+ </Node>
+ <Response value="-1.4871782" id="T6_16"/>
+ </Node>
+ <Node feature="F34" value="2.1943491" id="N6_18">
+ <Response value="0.0229469" id="T6_17"/>
+ <Node feature="F108" value="1527.0000000" id="N6_19">
+ <Response value="1.4706301" id="T6_18"/>
+ <Response value="0.0285333" id="T6_19"/>
+ </Node>
+ </Node>
+ </Node>
+ </Node>
+ <Response value="3.4899490" id="T6_20"/>
+ </Node>
+ </Tree>
+ <Tree id="7">
+ <Node feature="F34" value="30.3465347" id="N7_1">
+ <Node feature="F103" value="0.9996098" id="N7_2">
+ <Node feature="F38" value="0.5586690" id="N7_3">
+ <Node feature="F105" value="3.6287756" id="N7_4">
+ <Node feature="F104" value="3.5000000" id="N7_5">
+ <Node feature="F31" value="0.8600000" id="N7_6">
+ <Response value="0.1121421" id="T7_1"/>
+ <Response value="1.8153648" id="T7_2"/>
+ </Node>
+ <Response value="-0.8281607" id="T7_3"/>
+ </Node>
+ <Node feature="F55" value="37.6819153" id="N7_7">
+ <Response value="0.9656266" id="T7_4"/>
+ <Response value="0.1585065" id="T7_5"/>
+ </Node>
+ </Node>
+ <Node feature="F113" value="0.8403850" id="N7_8">
+ <Node feature="F38" value="9.6623116" id="N7_9">
+ <Node feature="F46" value="136.0000000" id="N7_10">
+ <Node feature="F53" value="0.5548913" id="N7_11">
+ <Node feature="F38" value="8.4469957" id="N7_12">
+ <Node feature="F34" value="3.1969421" id="N7_13">
+ <Node feature="F114" value="20.0000000" id="N7_14">
+ <Response value="-0.2944335" id="T7_6"/>
+ <Response value="0.0349900" id="T7_7"/>
+ </Node>
+ <Node feature="F34" value="3.4671984" id="N7_15">
+ <Response value="-1.3154796" id="T7_8"/>
+ <Response value="-0.1742507" id="T7_9"/>
+ </Node>
+ </Node>
+ <Response value="0.4071658" id="T7_10"/>
+ </Node>
+ <Node feature="F105" value="2.3154340" id="N7_16">
+ <Node feature="F110" value="59.5000000" id="N7_17">
+ <Response value="-0.1713032" id="T7_11"/>
+ <Response value="-1.4204650" id="T7_12"/>
+ </Node>
+ <Response value="-0.1456236" id="T7_13"/>
+ </Node>
+ </Node>
+ <Response value="0.5520287" id="T7_14"/>
+ </Node>
+ <Node feature="F108" value="12156.5000000" id="N7_18">
+ <Node feature="F111" value="0.3892631" id="N7_19">
+ <Response value="-0.1628500" id="T7_15"/>
+ <Response value="-0.9015614" id="T7_16"/>
+ </Node>
+ <Response value="-2.6391831" id="T7_17"/>
+ </Node>
+ </Node>
+ <Response value="0.2011691" id="T7_18"/>
+ </Node>
+ </Node>
+ <Response value="-3.0730490" id="T7_19"/>
+ </Node>
+ <Response value="-3.2461861" id="T7_20"/>
+ </Node>
+ </Tree>
+ <Tree id="8">
+ <Node feature="F55" value="28.4668102" id="N8_1">
+ <Node feature="F34" value="0.4929269" id="N8_2">
+ <Node feature="F30" value="0.8600000" id="N8_3">
+ <Node feature="F37" value="0.8360082" id="N8_4">
+ <Response value="-0.0815482" id="T8_1"/>
+ <Response value="-0.7898247" id="T8_2"/>
+ </Node>
+ <Response value="-0.5144471" id="T8_3"/>
+ </Node>
+ <Node feature="F108" value="20498.0000000" id="N8_5">
+ <Node feature="F44" value="1.1856511" id="N8_6">
+ <Node feature="F56" value="1.0706565" id="N8_7">
+ <Node feature="F39" value="8.3770790" id="N8_8">
+ <Node feature="F59" value="0.5604000" id="N8_9">
+ <Response value="0.0429508" id="T8_4"/>
+ <Node feature="F34" value="0.7287493" id="N8_10">
+ <Response value="-1.0264078" id="T8_5"/>
+ <Response value="0.6052195" id="T8_6"/>
+ </Node>
+ </Node>
+ <Response value="-0.4814408" id="T8_7"/>
+ </Node>
+ <Node feature="F119" value="3.7530813" id="N8_11">
+ <Node feature="F115" value="8.5000000" id="N8_12">
+ <Response value="0.4916013" id="T8_8"/>
+ <Response value="0.0457533" id="T8_9"/>
+ </Node>
+ <Node feature="F114" value="1093.5000000" id="N8_13">
+ <Response value="1.1673864" id="T8_10"/>
+ <Response value="0.3411176" id="T8_11"/>
+ </Node>
+ </Node>
+ </Node>
+ <Response value="-0.6176305" id="T8_12"/>
+ </Node>
+ <Node feature="F100" value="3.1519730" id="N8_14">
+ <Response value="2.6908011" id="T8_13"/>
+ <Response value="0.3835885" id="T8_14"/>
+ </Node>
+ </Node>
+ </Node>
+ <Node feature="F116" value="62.0000000" id="N8_15">
+ <Node feature="F114" value="562.0000000" id="N8_16">
+ <Response value="-0.4155430" id="T8_15"/>
+ <Node feature="F103" value="0.9826763" id="N8_17">
+ <Response value="-0.1169933" id="T8_16"/>
+ <Node feature="F104" value="0.5000000" id="N8_18">
+ <Response value="-0.0665763" id="T8_17"/>
+ <Response value="1.0238317" id="T8_18"/>
+ </Node>
+ </Node>
+ </Node>
+ <Node feature="F100" value="5.8046961" id="N8_19">
+ <Response value="-3.2954836" id="T8_19"/>
+ <Response value="0.2781039" id="T8_20"/>
+ </Node>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="9">
+ <Node feature="F34" value="26.9548168" id="N9_1">
+ <Node feature="F35" value="18.4714928" id="N9_2">
+ <Node feature="F115" value="698.0000000" id="N9_3">
+ <Node feature="F116" value="41.5000000" id="N9_4">
+ <Node feature="F38" value="1.1138718" id="N9_5">
+ <Node feature="F46" value="9.0000000" id="N9_6">
+ <Node feature="F31" value="0.8600000" id="N9_7">
+ <Response value="0.1059075" id="T9_1"/>
+ <Response value="-0.2995292" id="T9_2"/>
+ </Node>
+ <Node feature="F46" value="25.5000000" id="N9_8">
+ <Node feature="F46" value="13.0000000" id="N9_9">
+ <Response value="0.6297316" id="T9_3"/>
+ <Response value="1.8451736" id="T9_4"/>
+ </Node>
+ <Response value="0.2079161" id="T9_5"/>
+ </Node>
+ </Node>
+ <Node feature="F38" value="19.3839836" id="N9_10">
+ <Node feature="F49" value="29.9797497" id="N9_11">
+ <Node feature="F46" value="235.5000000" id="N9_12">
+ <Node feature="F38" value="1.2626771" id="N9_13">
+ <Response value="-0.5165347" id="T9_6"/>
+ <Node feature="F35" value="10.3027954" id="N9_14">
+ <Node feature="F50" value="0.2823648" id="N9_15">
+ <Response value="-0.0424489" id="T9_7"/>
+ <Node feature="F113" value="0.0776736" id="N9_16">
+ <Response value="0.7495954" id="T9_8"/>
+ <Response value="-0.2948665" id="T9_9"/>
+ </Node>
+ </Node>
+ <Response value="0.3229146" id="T9_10"/>
+ </Node>
+ </Node>
+ <Response value="-1.0711968" id="T9_11"/>
+ </Node>
+ <Response value="0.3153474" id="T9_12"/>
+ </Node>
+ <Node feature="F116" value="5.2182379" id="N9_17">
+ <Response value="2.8017734" id="T9_13"/>
+ <Response value="0.3444192" id="T9_14"/>
+ </Node>
+ </Node>
+ </Node>
+ <Node feature="F113" value="0.5691726" id="N9_18">
+ <Response value="1.7530511" id="T9_15"/>
+ <Response value="0.3534861" id="T9_16"/>
+ </Node>
+ </Node>
+ <Response value="-2.4915219" id="T9_17"/>
+ </Node>
+ <Node feature="F103" value="0.9680555" id="N9_19">
+ <Response value="-2.1724317" id="T9_18"/>
+ <Response value="0.2143739" id="T9_19"/>
+ </Node>
+ </Node>
+ <Response value="3.1712332" id="T9_20"/>
+ </Node>
+ </Tree>
+ </Forest>
+ </DecisionTree>
+</MlrFunction>
diff --git a/searchlib/src/test/files/gbdt_empty_tree.xml b/searchlib/src/test/files/gbdt_empty_tree.xml
new file mode 100644
index 00000000000..15bf46471b6
--- /dev/null
+++ b/searchlib/src/test/files/gbdt_empty_tree.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<MlrFunction name="CHANGE THIS NUMBER" featuredef="mlrfeaturedefs.xml" version="1.0">
+
+ <!-- ADD SCORE STANDARDIZATION OR CALIBRATION HERE -->
+
+ <DecisionTree loss="regression">
+
+ <!-- ADD EARLY EXIT HERE -->
+
+ <Forest>
+ <Tree id="0">
+ </Tree>
+ <Tree id="1">
+ <Node feature="INFD_SCORE" value="3.2105989" id="N1_1">
+ <Node feature="GMP_SCORE" value="0.0138730" id="N1_2">
+ <Node feature="INFD_SCORE" value="1.8138845" id="N1_3">
+ <Response value="0.0018257" id="T1_1"/>
+ <Node feature="GMP_SCORE" value="0.0061840" id="N1_4">
+ <Response value="0.0034753" id="T1_2"/>
+ <Response value="0.0062119" id="T1_3"/>
+ </Node>
+ </Node>
+ <Node feature="INFD_SCORE" value="1.5684295" id="N1_5">
+ <Node feature="GMP_SCORE" value="0.0217475" id="N1_6">
+ <Response value="0.0043064" id="T1_4"/>
+ <Response value="0.0082065" id="T1_5"/>
+ </Node>
+ <Response value="0.0110743" id="T1_6"/>
+ </Node>
+ </Node>
+ <Node feature="GMP_SCORE" value="0.0100120" id="N1_7">
+ <Node feature="INFD_SCORE" value="5.5982456" id="N1_8">
+ <Node feature="GMP_SCORE" value="0.0052305" id="N1_9">
+ <Response value="0.0060169" id="T1_7"/>
+ <Response value="0.0094888" id="T1_8"/>
+ </Node>
+ <Response value="0.0119292" id="T1_9"/>
+ </Node>
+ <Response value="0.0174150" id="T1_10"/>
+ </Node>
+ </Node>
+ </Tree>
+ </Forest>
+ </DecisionTree>
+</MlrFunction>
diff --git a/searchlib/src/test/files/gbdt_err.xml b/searchlib/src/test/files/gbdt_err.xml
new file mode 100644
index 00000000000..aa6103c8604
--- /dev/null
+++ b/searchlib/src/test/files/gbdt_err.xml
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<Unknown />
diff --git a/searchlib/src/test/files/gbdt_set_inclusion_test.xml b/searchlib/src/test/files/gbdt_set_inclusion_test.xml
new file mode 100644
index 00000000000..ad62c556c87
--- /dev/null
+++ b/searchlib/src/test/files/gbdt_set_inclusion_test.xml
@@ -0,0 +1,119 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<MlrFunction name="CHANGE THIS NUMBER" featuredef="mlrfeaturedefs.xml" version="1.0">
+
+ <!-- ADD SCORE STANDARDIZATION OR CALIBRATION HERE -->
+
+ <DecisionTree loss="logistic">
+
+ <!-- ADD EARLY EXIT HERE -->
+
+ <Forest>
+ <Tree id="0">
+ <Node feature="AGE_GROUP$" value="2" id="N0_1">
+ <Node feature="EDUCATION_LEVEL$" value="0" id="N0_2">
+ <Response value="-0.2500000" id="T0_1"/>
+ <Response value="0.1250000" id="T0_2"/>
+ </Node>
+ <Node feature="AGE_GROUP$" value="1" id="N0_3">
+ <Response value="0.1250000" id="T0_3"/>
+ <Response value="0.2500000" id="T0_4"/>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="1">
+ <Node feature="AGE_GROUP$" value="2" id="N1_1">
+ <Node feature="EDUCATION_LEVEL$" value="0" id="N1_2">
+ <Response value="-0.2189117" id="T1_1"/>
+ <Response value="-0.0000000" id="T1_2"/>
+ </Node>
+ <Node feature="EDUCATION_LEVEL$" value="0" id="N1_3">
+ <Response value="0.1094559" id="T1_3"/>
+ <Response value="0.2343953" id="T1_4"/>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="2">
+ <Node feature="AGE_GROUP$" value="2" id="N2_1">
+ <Response value="-0.0962185" id="T2_1"/>
+ <Node feature="EDUCATION_LEVEL$" value="0" id="N2_2">
+ <Node feature="AGE_GROUP$" value="1" id="N2_3">
+ <Response value="0.0000000" id="T2_2"/>
+ <Response value="0.2055456" id="T2_3"/>
+ </Node>
+ <Response value="0.2055530" id="T2_4"/>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="3">
+ <Node feature="EDUCATION_LEVEL$" value="0" id="N3_1">
+ <Response value="0.0905977" id="T3_1"/>
+ <Response value="0.1812016" id="T3_2"/>
+ </Node>
+ </Tree>
+ <Tree id="4">
+ <Node feature="EDUCATION_LEVEL$" value="0,1" id="N4_1">
+ <Node feature="AGE_GROUP$" value="2" id="N4_2">
+ <Node feature="EDUCATION_LEVEL$" value="0" id="N4_3">
+ <Response value="-0.1917720" id="T4_1"/>
+ <Response value="-0.0000000" id="T4_2"/>
+ </Node>
+ <Node feature="AGE_GROUP$" value="1" id="N4_4">
+ <Node feature="EDUCATION_LEVEL$" value="0" id="N4_5">
+ <Response value="0.0000000" id="T4_3"/>
+ <Response value="0.1608304" id="T4_4"/>
+ </Node>
+ <Response value="0.1708644" id="T4_5"/>
+ </Node>
+ </Node>
+ <Response value="0.1923393" id="T4_6"/>
+ </Node>
+ </Tree>
+ <Tree id="5">
+ <Node feature="EDUCATION_LEVEL$" value="foo,bar" id="N5_1">
+ <Node feature="AGE_GROUP$" value="2" id="N5_2">
+ <Node feature="EDUCATION_LEVEL$" value="baz" id="N5_3">
+ <Response value="-0.1696624" id="T5_1"/>
+ <Response value="-0.0000000" id="T5_2"/>
+ </Node>
+ <Node feature="AGE_GROUP$" value="1" id="N5_4">
+ <Node feature="EDUCATION_LEVEL$" value="0" id="N5_5">
+ <Response value="0.0000000" id="T5_3"/>
+ <Response value="0.1438091" id="T5_4"/>
+ </Node>
+ <Response value="0.1521967" id="T5_5"/>
+ </Node>
+ </Node>
+ <Response value="0.2003772" id="T5_6"/>
+ </Node>
+ </Tree>
+ <Tree id="6">
+ <Response value="-0.0108278" id="T6_1"/>
+ </Tree>
+ <Tree id="7">
+ <Node feature="EDUCATION_LEVEL$" value="0" id="N7_1">
+ <Response value="-0.1500528" id="T7_1"/>
+ <Node feature="GENDER$" value="1" id="N7_2">
+ <Response value="0.0652894" id="T7_2"/>
+ <Response value="0.1543407" id="T7_3"/>
+ </Node>
+ </Node>
+ </Tree>
+ <Tree id="8">
+ <Node feature="AGE_GROUP$" value="1" id="N8_1">
+ <Response value="0.0000000" id="T8_1"/>
+ <Response value="0.1569706" id="T8_2"/>
+ </Node>
+ </Tree>
+ <Tree id="9">
+ <Node feature="AGE_GROUP$" value="1" id="N9_1">
+ <Response value="0.0000000" id="T9_1"/>
+ <Node feature="EDUCATION_LEVEL$" value="1" id="N9_2">
+ <Response value="0.0000000" id="T9_2"/>
+ <Response value="0.1405829" id="T9_3"/>
+ </Node>
+ </Node>
+ </Tree>
+ </Forest>
+ </DecisionTree>
+</MlrFunction>
diff --git a/searchlib/src/test/files/gbdt_tree_response.xml b/searchlib/src/test/files/gbdt_tree_response.xml
new file mode 100644
index 00000000000..8bed53957d2
--- /dev/null
+++ b/searchlib/src/test/files/gbdt_tree_response.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<MlrFunction name="CHANGE THIS NUMBER" featuredef="mlrfeaturedefs.xml" version="1.0">
+
+ <!-- ADD SCORE STANDARDIZATION OR CALIBRATION HERE -->
+
+ <DecisionTree loss="regression">
+
+ <!-- ADD EARLY EXIT HERE -->
+
+ <Forest>
+ <Tree id="130">
+ <Node feature="INFD_SCORE" value="2.1280360" id="N130_1">
+ <Response value="-0.0000112" id="T130_1"/>
+ <Response value="0.0000871" id="T130_2"/>
+ </Node>
+ </Tree>
+ <Tree id="135">
+ <Response value="0.0000028" id="T135_1"/>
+ </Tree>
+ <Tree id="136">
+ <Node feature="GMP_SCORE" value="0.0167980" id="N136_1">
+ <Node feature="INFD_SCORE" value="3.9760852" id="N136_2">
+ <Node feature="INFD_SCORE" value="0.1266405" id="N136_3">
+ <Response value="-0.0000598" id="T136_1"/>
+ <Response value="0.0000225" id="T136_2"/>
+ </Node>
+ <Response value="-0.0001383" id="T136_3"/>
+ </Node>
+ <Response value="0.0001529" id="T136_4"/>
+ </Node>
+ </Tree>
+ </Forest>
+ </DecisionTree>
+</MlrFunction>
diff --git a/searchlib/src/test/files/mlr/cases-illegal1.csv b/searchlib/src/test/files/mlr/cases-illegal1.csv
new file mode 100644
index 00000000000..fe8dcf1ac8a
--- /dev/null
+++ b/searchlib/src/test/files/mlr/cases-illegal1.csv
@@ -0,0 +1,5 @@
+# Argument missing value
+
+
+
+23, arg1:3, arg2:
diff --git a/searchlib/src/test/files/mlr/cases-illegal2.csv b/searchlib/src/test/files/mlr/cases-illegal2.csv
new file mode 100644
index 00000000000..0b755035b36
--- /dev/null
+++ b/searchlib/src/test/files/mlr/cases-illegal2.csv
@@ -0,0 +1,2 @@
+# Target isn't a number
+5db,7
diff --git a/searchlib/src/test/files/mlr/cases-linear.csv b/searchlib/src/test/files/mlr/cases-linear.csv
new file mode 100644
index 00000000000..dadc626ce18
--- /dev/null
+++ b/searchlib/src/test/files/mlr/cases-linear.csv
@@ -0,0 +1,7 @@
+# f(x)=x
+0, x:0
+1, x:1
+2, x:2
+3, x:3
+4, x:4
+5, x:5
diff --git a/searchlib/src/test/files/mlr/cases.csv b/searchlib/src/test/files/mlr/cases.csv
new file mode 100644
index 00000000000..2b9ea8bf7ff
--- /dev/null
+++ b/searchlib/src/test/files/mlr/cases.csv
@@ -0,0 +1,6 @@
+# Comments are legal
+
+1, arg1:2, arg2:-1.3
+-1.003,arg1:500007
+
+0, arg2:1.00
diff --git a/searchlib/src/test/files/ranking01.expression b/searchlib/src/test/files/ranking01.expression
new file mode 100644
index 00000000000..4df9b580069
--- /dev/null
+++ b/searchlib/src/test/files/ranking01.expression
@@ -0,0 +1,10 @@
+if (attribute(b) < 0.65, if (attribute(c) < 0.55, if (attribute(a) < 0.55, 0.369863, -0.6578947), if (attribute(a) < 0.65, -0.775, -1.0)), if (attribute(c) < 0.45, -0.9090909, -1.0)) +
+if (attribute(a) < 0.55, if (attribute(b) < 0.35, if (attribute(c) < 0.75, 0.4327977, 0.025), if (attribute(c) < 0.75, -0.1090028, -0.07682927)), if (attribute(c) < 0.55, -0.04031544, -0.01875)) +
+if (attribute(b) < 0.35, 0.06336273, if (attribute(c) < 0.85, if (attribute(a) < 0.35, if (attribute(c) < 0.35, 0.6091127, 0.02845135), -0.07638131), if (attribute(a) < 0.75, -0.018862, 0.01875))) +
+if (attribute(c) < 0.15, if (attribute(b) < 0.55, if (attribute(a) < 0.35, if (attribute(b) < 0.3, -1.866023, 0.1300271), 0.6299557), 0.1788445), if (attribute(b) < 0.65, -0.1586424, 0.06778581)) +
+if (attribute(c) < 0.45, if (attribute(a) < 0.35, if (attribute(b) < 0.75, 0.1426054, -0.2282), if (attribute(b) < 0.85, -0.09571452, -0.04941978)), if (attribute(a) < 0.25, 0.2759441, 0.0172878)) +
+if (attribute(a) < 0.15, if (attribute(b) < 0.75, 0.3165435, -0.04458321), if (attribute(a) < 0.55, -0.1137117, if (attribute(b) < 0.75, 0.04622166, if (attribute(c) < 0.65, 0.004746275, -0.03648972)))) +
+if (attribute(a) < 0.95, if (attribute(b) < 0.25, if (attribute(c) < 0.25, 0.7623822, if (attribute(a) < 0.65, 0.2338952, if (attribute(c) < 0.85, -0.06132011, 0.05052024))), -0.04188744), -0.03245768) +
+if (attribute(c) < 0.55, if (attribute(b) < 0.65, -0.2042442, 0.03887484), if (attribute(b) < 0.25, -0.0474437, if (attribute(a) < 0.15, -0.3700475, if (attribute(a) < 0.65, 0.07656199, 0.1085871)))) +
+if (attribute(a) < 0.75, 0.0189638, if (attribute(b) < 0.85, 1.942833E-4, if (attribute(c) < 0.85, if (attribute(c) < 0.45, -0.009795157, if (attribute(a) < 0.85, -0.01795083, -0.01329222)), -0.1179778))) +
+if (attribute(c) < 0.75, if (attribute(b) < 0.45, if (attribute(c) < 0.15, -0.4551494, if (attribute(c) < 0.65, 0.1471968, -0.06380587)), 0.03410008), if (attribute(b) < 0.65, -0.06397114, -0.01491517)) \ No newline at end of file
diff --git a/searchlib/src/test/files/ranking02.expression b/searchlib/src/test/files/ranking02.expression
new file mode 100644
index 00000000000..4e80576c7e7
--- /dev/null
+++ b/searchlib/src/test/files/ranking02.expression
@@ -0,0 +1,90 @@
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(0).significance < 0.9964904785, if (match < 0.6113165021, -0.0284270267, 3.592783E-4), if (fieldMatch(text).significantOccurrence < 0.0488094985, if (attribute(user_friends_count) < 184.5, -0.0124428511, 0.0077143433), if (term(0).significance < 0.997767508, -0.0390395696, if (term(1).significance < 0.9895755053, 0.02259176, if (fieldMatch(text).significantOccurrence < 0.1335410029, -0.01671786, -0.0425634221))))), if (age(created_at) < 5400.0, -0.046690069, if (age(created_at) < 45000.0, -0.025709541, -0.012803042))), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.7488905191, if (fieldMatch(text).absoluteOccurrence < 0.0116665, if (fieldMatch(text) < 0.8492144942, 0.0288744693, 0.0439309311), 0.0566558463), 0.0594293259), if (age(created_at) < 5400.0, -0.0184100055, if (age(created_at) < 27000.0, -3.458478E-4, 0.0087464789)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(0).significance < 0.9964904785, if (term(0).significance < 0.9914690256, -0.0118607453, 0.0031225791), if (term(0).significance < 0.997767508, if (term(1).significance < 0.9883980155, -0.0803907557, if (attribute(yst_link_array_size) < 0.0250600018, -0.0303931857, 0.0259097321)), if (term(1).significance < 0.9972054958, 0.0065438125, if (term(1).significance < 0.9975290298, -0.0913176725, -0.0123125115)))), if (age(created_at) < 5400.0, -0.0448246506, if (age(created_at) < 23400.0, -0.0262210797, -0.0146461827))), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.7488645315, if (fieldMatch(text) < 0.856990993, 0.0307982478, 0.0469020946), 0.0558564997), if (age(created_at) < 5400.0, -0.0166881751, if (fieldMatch(text) < 0.3820354939, -0.0035580609, if (age(created_at) < 30600.0, 0.0028319521, 0.0104819912))))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).tail < 7.5, if (match < 0.6697604656, -0.0323866906, if (term(0).significance < 0.9955350161, -0.0025720554, -0.0170321274)), if (attribute(user_followers_count) < 489.5, if (term(0).significance < 0.9964904785, 0.0035465045, if (term(0).significance < 0.999284029, -0.0205069971, 0.0010003389)), 0.0149904595)), if (age(created_at) < 9000.0, if (age(created_at) < 3570.0, -0.0501614448, -0.0347695722), if (age(created_at) < 45000.0, -0.0217186612, -0.0115826893))), if (age(created_at) < 1830.0, if (fieldMatch(text).occurrence < 0.127717495, if (fieldMatch(text) < 0.8662694693, 0.0270881826, 0.0422977189), 0.0471192106), if (age(created_at) < 5400.0, if (fieldMatch(text).importance < 0.666426003, -0.023211464, -0.0105863112), if (age(created_at) < 19800.0, -0.0011091805, 0.0079984015)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.8159549832, 0.0342704034, if (attribute(user_statuses_count) < 574.0, -0.0031829638, if (fieldMatch(text).weightedOccurrence < 0.109127, if (fieldMatch(text) < 0.8472499847, -0.0193605912, -0.0038142662), -0.0308342022))), if (age(created_at) < 5400.0, -0.0418216807, if (age(created_at) < 48600.0, -0.0237625386, -0.0115288531))), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.7488905191, if (term(0).significance < 0.991820991, if (term(2).significance < 0.983879447, if (fieldLength(text) < 23.5, 0.0091275797, -0.0681415824), 0.0280728758), if (fieldTermMatch(text,0).firstPosition < 9.5, if (attribute(user_followers_count) < 2165.5, 0.0378854321, 0.055539461), 0.0261930857)), 0.049689868), if (age(created_at) < 12600.0, if (age(created_at) < 5400.0, -0.0136135545, -0.0029542657), 0.0066915734))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.7788045406, 0.0678231236, if (fieldMatch(text).significantOccurrence < 0.1249914989, if (attribute(user_statuses_count) < 103.5, 0.0047730322, if (attribute(user_followers_count) < 3070.5, if (fieldMatch(text).earliness < 0.8834840059, -0.0130691877, 0.0030931972), 0.0212955094)), -0.0250041155)), if (age(created_at) < 5400.0, -0.0386563137, if (age(created_at) < 48600.0, -0.0213844929, -0.0116543752))), if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).earliness < 0.5683230162, if (fieldMatch(text).gapLength < 5.5, 0.0208840083, -0.0392353393), if (term(0).significance < 0.9139549732, -0.059916078, 0.0359567192)), 0.0417870117), if (age(created_at) < 5400.0, -0.0124473711, if (age(created_at) < 27000.0, -3.908889E-4, if (fieldMatch(text) < 0.5566675067, 9.45327E-4, 0.0097812185))))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.8159549832, if (term(0).significance < 0.9873124957, 0.062526781, -0.0025881996), if (fieldMatch(text).tail < 7.5, if (attribute(user_statuses_count) < 504.0, -0.0072144471, -0.0184304751), -0.0041050691)), if (age(created_at) < 5400.0, -0.0342922301, if (age(created_at) < 52200.0, -0.0213685384, -0.0114302758))), if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).tail < 7.5, if (fieldMatch(text).significantOccurrence < 0.057490997, if (term(0).significance < 0.9980279803, -0.0131328933, 0.0192113014), if (fieldMatch(text) < 0.8584204912, 0.0314073419, -0.0026767115)), if (fieldMatch(text).fieldCompleteness < 0.0392310023, -0.0016304919, 0.034703474)), 0.0373450153), if (age(created_at) < 12600.0, if (age(created_at) < 5400.0, -0.0106738218, -0.0029072167), 0.0056105069))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).tail < 12.5, if (attribute(user_statuses_count) < 826.0, -0.0058871349, if (fieldMatch(text).earliness < 0.8774999976, -0.0128456148, -0.0362508217)), 0.0039172531), if (age(created_at) < 5400.0, -0.033274366, if (age(created_at) < 48600.0, if (fieldMatch(text) < 0.5479695201, -0.0292307762, -0.0167816152), -0.0103426077))), if (age(created_at) < 1830.0, if (fieldMatch(text).earliness < 0.6510869861, if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text) < 0.2023105025, -0.0093665406, 0.0265294786), -0.016323195), if (term(2).significance < 0.9791975021, if (attribute(yst_reply_auth) < 2.5, 0.0195383609, 0.0376308584), 0.038683455)), if (age(created_at) < 12600.0, if (term(4).significance < 0.9926320314, if (age(created_at) < 5400.0, -0.0119040624, -0.0033941367), 0.003778577), 0.0059147929))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).tail < 10.5, if (fieldMatch(text).importance < 0.7443764806, if (match < 0.9134370089, -0.0252698647, -0.0099637807), if (term(2).significance < 0.7909680009, -0.0344071695, if (term(0).significance < 0.9947484732, 0.0145760432, -0.0105169825))), -2.56762E-4), if (age(created_at) < 12600.0, if (age(created_at) < 5400.0, -0.0322518692, -0.0221817109), -0.0127554041)), if (age(created_at) < 1830.0, if (fieldMatch(text).earliness < 0.724747479, if (fieldTermMatch(text,1).firstPosition < 14.5, 0.0292128233, if (fieldMatch(text) < 0.3254045248, -0.0359002315, 0.0194921959)), if (attribute(user_followers_count) < 609.5, 0.0299861583, 0.04219304)), if (age(created_at) < 5400.0, if (fieldMatch(text).occurrence < 0.1731635034, -0.0134935559, -0.0027367126), if (term(1).significance < 0.9878399968, 0.0121433273, 0.0020006783)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, if (fieldTermMatch(text,0).firstPosition < 14.5, if (fieldMatch(text).importance < 0.7413114905, -0.0091155042, 0.0233289393), -0.0239608468), if (attribute(user_followers_count) < 2995.0, -0.0016973828, 0.0438873528)), if (age(created_at) < 12600.0, if (age(created_at) < 3570.0, -0.034580545, -0.0236031788), -0.0119280014)), if (age(created_at) < 1830.0, if (fieldTermMatch(text,1).firstPosition < 14.5, 0.0323605063, if (attribute(yst_tweet_language) < 3243.5, if (fieldTermMatch(text,0).firstPosition < 3.5, if (attribute(user_followers_count) < 114.5, 0.0149219697, 0.0383892131), if (fieldMatch(text) < 0.3404299915, -0.0214082868, 0.0183146341)), -0.0199916697)), if (age(created_at) < 9000.0, -0.0076472907, if (fieldMatch(text) < 0.5607429743, -1.69083E-5, if (term(1).significance < 0.8870275021, 0.0275141633, 0.0058735097))))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(0).significance < 0.9964904785, if (fieldMatch(text) < 0.1415009946, -0.0331371143, 0.0016819061), if (term(0).significance < 0.9974014759, if (term(1).significance < 0.9943025112, -0.0561295193, -0.0143235877), if (term(0).significance < 0.9975079894, 0.0456376595, if (term(0).significance < 0.9976614714, -0.0617225433, -0.0089081592)))), if (age(created_at) < 12600.0, -0.0250708949, -0.0120490174)), if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).significantOccurrence < 0.0422540009, 0.0132403332, if (fieldMatch(text).importance < 0.7485179901, if (fieldMatch(text).tail < 10.5, 0.0224059642, 0.0317363105), 0.0363809447)), -0.0059409077), if (age(created_at) < 12600.0, -0.005953322, if (match < 0.7504960299, if (fieldMatch(text).occurrence < 0.1318840086, -0.0092412181, 0.0036779089), 0.0067221979)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1770.0, if (term(0).significance < 0.9964904785, if (fieldMatch(text) < 0.2731105089, -0.0169751683, if (attribute(yst_reply_auth) < 16.5, 0.0106972872, if (term(1).significance < 0.8159549832, 0.0454901055, -0.0067703435))), if (term(0).significance < 0.9966599941, -0.0415369371, if (match.totalWeight < 250.0, -0.0080140966, 0.0488608858))), if (age(created_at) < 12600.0, -0.0229133495, if (fieldMatch(text).absoluteOccurrence < 0.0122500006, -0.0125676511, 2.328845E-4))), if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, if (term(0).significance < 0.9184160233, -0.0549048781, if (fieldLength(text) < 23.5, 0.0226244877, 0.0077881056)), 0.0280730521), if (age(created_at) < 9000.0, if (fieldMatch(text) < 0.3730605245, -0.0167181189, -0.0044234172), if (fieldMatch(text) < 0.5543889999, -8.4709E-4, 0.0055458527)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1770.0, if (match.totalWeight < 250.0, if (fieldTermMatch(text,1).firstPosition < 13.5, if (term(0).significance < 0.9701889753, -0.0195353072, if (term(0).significance < 0.9965775013, 0.0073931107, -0.0074860039)), if (fieldMatch(text) < 0.6285369992, -0.0322505986, -0.0073317181)), 0.0451330307), if (age(created_at) < 45000.0, -0.0206455453, -0.0085888986)), if (age(created_at) < 1830.0, if (fieldTermMatch(text,1).firstPosition < 14.5, 0.0265081733, if (fieldMatch(text) < 0.3104079962, -0.0291219391, if (attribute(yst_tweet_language) < 3271.5, if (attribute(user_friends_count) < 146.5, 0.0133927786, 0.0247206105), -0.0249098053))), if (age(created_at) < 12600.0, if (fieldTermMatch(text,0).firstPosition < 1.5, 0.0013211001, if (fieldMatch(text).importance < 0.6664245129, -0.0124234916, -0.0044820648)), if (fieldMatch(text).significantOccurrence < 0.0555050001, 6.152863E-4, 0.0069791274)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.7788045406, 0.0442178195, if (term(0).significance < 0.9492504597, -0.0249224413, if (fieldMatch(text).tail < 11.5, if (fieldMatch(text).importance < 0.7480239868, -0.009906585, if (term(0).significance < 0.9947484732, 0.0121048215, -0.0132930884)), 6.079666E-4))), if (age(created_at) < 9000.0, -0.0219397199, -0.0106952111)), if (age(created_at) < 1830.0, if (fieldMatch(text).absoluteOccurrence < 0.0136665003, if (fieldMatch(text).importance < 0.7488800287, if (fieldTermMatch(text,0).firstPosition < 6.5, if (attribute(user_followers_count) < 866.5, 0.0181298105, 0.0303594396), 0.0126963345), 0.028578828), 0.0333028419), if (age(created_at) < 27000.0, if (fieldTermMatch(text,3).firstPosition < 7.5, 0.0067345611, if (fieldTermMatch(text,0).firstPosition < 4.5, -0.0013179334, -0.0081428248)), if (fieldMatch(text) < 0.5568180084, 4.597678E-4, 0.0093837881)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).importance < 0.4998250008, if (term(0).significance < 0.9983664751, -0.0058107338, 0.0245069566), if (fieldMatch(text).importance < 0.4998745024, -0.0308383904, -0.0106009672)), 0.00109712), if (age(created_at) < 45000.0, if (fieldMatch(text).weightedOccurrence < 0.0912880003, if (age(created_at) < 3570.0, -0.0267460073, -0.0152835256), -0.0298858389), -0.0088562145)), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.4989734888, if (fieldMatch(text).importance < 0.4988874793, 0.0045636472, -0.1210997623), if (fieldMatch(text) < 0.3135755062, if (term(1).significance < 0.9852235317, if (term(0).significance < 0.9929184914, -0.0444011152, 0.0156709024), 0.0178486139), if (attribute(yst_tweet_language) < 3243.5, 0.0236557227, -0.0055893686))), if (age(created_at) < 12600.0, -0.0039404999, 0.0040376803))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1770.0, if (term(0).significance < 0.9964904785, if (fieldMatch(text).gapLength < 3.5, 0.003496796, -0.0136111988), if (term(0).significance < 0.9975960255, if (term(1).significance < 0.9943025112, -0.0474034255, -0.0139986631), if (fieldMatch(text).importance < 0.6665844917, -0.0083848009, if (term(0).significance < 0.9992945194, if (term(1).significance < 0.9993695021, 0.0095761689, 0.0714217668), -0.014142042)))), if (age(created_at) < 5400.0, -0.0204021576, if (fieldMatch(text).longestSequence < 1.5, -0.0152195185, -0.0074091603))), if (age(created_at) < 1830.0, if (fieldMatch(text).occurrence < 0.127717495, if (fieldMatch(text) < 0.845182538, 0.0072816766, 0.0185451686), 0.0243676179), if (fieldMatch(text).importance < 0.666454494, if (age(created_at) < 5400.0, -0.0113001116, -0.0020866841), if (age(created_at) < 30600.0, -2.226823E-4, 0.0054407552)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1770.0, if (attribute(user_friends_count) < 1202.5, if (attribute(yst_tweet_language) < 3243.5, if (attribute(user_statuses_count) < 491.5, if (attribute(user_followers_count) < 39.5, -0.0053604202, 0.0112837612), -0.0076658014), -0.0344819911), 0.015286062), if (age(created_at) < 52200.0, if (fieldMatch(text).importance < 0.6658334732, -0.0236404883, -0.0155495401), -0.0063627489)), if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).significantOccurrence < 0.0377494991, -0.0247545653, if (term(0).significance < 0.9914690256, if (term(0).significance < 0.9911389947, 0.0074545408, -0.0712173039), 0.0174505123)), 0.0227466857), if (fieldMatch(text).importance < 0.6664484739, if (fieldMatch(text) < 0.3570200205, -0.0164480209, -0.0029063778), if (age(created_at) < 30600.0, -8.955043E-4, if (fieldLength(text) < 22.5, 0.0066513594, -0.0017231871))))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.8159549832, 0.0266703268, if (attribute(yst_tweet_language) < 3243.5, if (match < 0.6756634712, -0.0178612015, if (fieldMatch(text).weightedOccurrence < 0.1012820005, if (fieldMatch(text).earliness < 0.8834840059, -0.0031504958, 0.0108290236), -0.0132400721)), -0.0294468679)), if (fieldMatch(text).longestSequence < 1.5, -0.0183944645, if (age(created_at) < 45000.0, -0.0132570328, -0.0044394895))), if (age(created_at) < 1830.0, if (fieldMatch(text).occurrence < 0.1225000024, if (fieldMatch(text).head < 9.5, if (term(0).significance < 0.973123014, -0.0399055768, 0.0171512303), 0.0047044679), if (attribute(user_statuses_count) < 6.5, -0.0303006102, 0.0230432421)), if (fieldMatch(text).importance < 0.6664534807, -0.0054238462, if (term(2).significance < 0.9981180429, if (fieldMatch(text).completeness < 0.959010005, -0.0041171766, 0.0032716696), 0.0055362698)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).tail < 16.5, if (term(2).significance < 0.9960604906, -0.008761479, if (fieldMatch(text).occurrence < 0.1225000024, -0.0067609181, if (term(1).significance < 0.9832755327, 0.0282354539, 0.002590827))), 0.0073141014), if (age(created_at) < 45000.0, -0.0156556128, -0.0071654687)), if (age(created_at) < 1830.0, if (fieldMatch(text).occurrence < 0.075498499, 0.007334316, if (fieldMatch(text) < 0.3130764961, if (term(1).significance < 0.9978075027, -0.001460364, 0.0291628398), if (attribute(user_statuses_count) < 29.5, -0.005500918, if (attribute(yst_reply_auth) < 476.0, 0.0209690045, -0.0105504498)))), if (age(created_at) < 30600.0, if (fieldTermMatch(text,1).occurrences < 1.5, if (fieldMatch(text) < 0.4531754851, -0.0106616269, if (fieldMatch(text).importance < 0.4999470115, -0.0131472535, -8.043613E-4)), 0.0065678273), 0.0043163871))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).tail < 12.5, -0.0083194933, 0.0035102742), if (attribute(user_friends_count) < 263.5, if (fieldMatch(text).importance < 0.6659150124, 0.0142259075, -0.0029315835), 0.0146667338)), if (age(created_at) < 37800.0, if (fieldMatch(text).significantOccurrence < 0.0833195001, -0.0135039055, -0.0220540111), if (fieldMatch(text).longestSequenceRatio < 0.5357145071, -0.013039774, -0.0039578022))), if (age(created_at) < 1830.0, if (term(0).significance < 0.9184160233, if (fieldMatch(text).longestSequence < 1.5, -0.0589194117, 0.0129273078), if (fieldTermMatch(text,1).firstPosition < 7.5, 0.0206642588, if (attribute(user_statuses_count) < 63762.0, 0.0138866614, -0.0354735543))), if (fieldTermMatch(text,0).firstPosition < 1.5, if (age(created_at) < 52200.0, 0.0028235989, 0.0121270804), if (fieldMatch(text).importance < 0.666454494, -0.0071218235, 5.514519E-4)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldTermMatch(text,1).firstPosition < 4.5, if (match < 0.938462019, 0.0165893189, -0.0049824111), -0.0046803205), if (age(created_at) < 41400.0, -0.0148008888, -0.0071343073)), if (age(created_at) < 1830.0, if (fieldTermMatch(text,0).firstPosition < 6.5, if (attribute(user_followers_count) < 945.5, 0.0158379194, 0.0252891613), if (fieldMatch(text) < 0.2744970024, if (term(0).significance < 0.9929184914, if (fieldMatch(text).tail < 6.5, -0.0518040838, 0.0076190376), 0.0091624226), if (attribute(yst_tweet_language) < 3243.5, if (attribute(yst_reply_auth) < 469.5, if (fieldMatch(text).significantOccurrence < 0.0392310023, -0.0078021755, 0.0161894548), -0.0230367514), -0.0302108693))), if (age(created_at) < 30600.0, if (fieldMatch(text) < 0.370840013, -0.0093177671, -8.716804E-4), if (fieldMatch(text) < 0.5607429743, -4.994075E-4, 0.0075202897)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (match < 0.6055585146, if (fieldTermMatch(text,1).firstPosition < 9.5, -0.0038025793, -0.036586404), if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).significantOccurrence < 0.1483514905, if (term(1).significance < 0.7788045406, 0.0569638816, -6.508355E-4), -0.0215954499), -0.028835301)), if (age(created_at) < 5400.0, -0.0155259431, -0.0084487818)), if (age(created_at) < 1830.0, if (attribute(user_followers_count) < 2333.0, if (fieldMatch(text).importance < 0.4989485145, if (fieldMatch(text).importance < 0.4988874793, 2.952785E-4, -0.1374273254), if (fieldMatch(text).absoluteOccurrence < 0.0122500006, if (attribute(yst_reply_auth) < 22.5, 0.0126010812, 0.0015712189), if (fieldTermMatch(text,1).firstPosition < 8.5, 0.0140537649, 0.0345167434))), 0.0247552557), if (fieldMatch(text).importance < 0.6664245129, if (match < 0.9277470112, -0.0104111915, -6.36678E-4), 0.0023009658))) +
+if (attribute(ythl) < 0.5, if (fieldMatch(text).tail < 11.5, if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3554.0, if (fieldMatch(text).importance < 0.665607512, if (term(0).significance < 0.9996379614, -0.0108858514, 0.0223953057), -7.23685E-4), -0.0527538471), if (age(created_at) < 5400.0, -0.0167835591, -0.0101222507)), if (attribute(user_friends_count) < 103.5, -0.0058634359, if (age(created_at) < 1770.0, 0.0106468506, -0.0022715192))), if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).longestSequence < 1.5, if (term(0).significance < 0.9184160233, -0.0441538866, 0.0103401752), 0.0169759088), if (term(0).significance < 0.9990385175, 0.0064769128, -0.0484309871)), if (fieldMatch(text).longestSequence < 1.5, -0.0038310021, if (term(0).significance < 0.9686380029, 0.0135820391, if (attribute(user_followers_count) < 719.5, -2.012513E-4, 0.0056425249))))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 630.0, if (fieldMatch(text).importance < 0.499478519, 0.0221233715, if (attribute(user_followers_count) < 926.5, -0.0044878516, 0.0128654737)), if (fieldMatch(text).tail < 11.5, if (fieldMatch(text).importance < 0.6665325165, -0.0138860556, -0.0083897223), -0.004508875)), if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3587.5, if (fieldMatch(text).importance < 0.4989734888, -0.008760469, if (attribute(user_followers_count) < 1733.5, if (fieldMatch(text).occurrence < 0.1455025077, if (fieldMatch(text) < 0.5567239523, if (term(2).significance < 0.9795899987, -0.0543641627, 0.0024748648), 0.0124403853), 0.0174741297), 0.0222181645)), -0.0288913368), if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).importance < 0.6664659977, -0.0065555429, 5.791831E-4), if (fieldMatch(text).earliness < 0.93541646, if (fieldMatch(text).significantOccurrence < 0.0339080021, -0.0694353726, 0.0012739636), 0.0075882453)))) +
+if (attribute(ythl) < 0.5, if (fieldMatch(text).tail < 12.5, if (fieldMatch(text).longestSequence < 1.5, if (age(created_at) < 210.0, 5.10467E-4, if (term(0).significance < 0.991086483, -0.0213498049, if (term(0).significance < 0.9921205044, 0.0187676178, if (fieldMatch(text).tail < 5.5, -0.0155349434, -0.0073599141)))), if (fieldMatch(text).importance < 0.6662604809, if (attribute(user_followers_count) < 1875.0, 9.507365E-4, 0.0527948179), -0.0073533813)), if (age(created_at) < 810.0, 0.0076378446, -0.0027198247)), if (age(created_at) < 1830.0, if (fieldMatch(text).significantOccurrence < 0.0424195006, 0.001535613, if (fieldMatch(text).tail < 7.5, if (fieldMatch(text).importance < 0.7466344833, if (fieldMatch(text).importance < 0.6666129827, 0.0067592681, -0.0205924309), 0.0160937308), 0.0167252945)), if (fieldTermMatch(text,0).firstPosition < 2.5, 0.0049627365, if (fieldMatch(text).importance < 0.6664254665, -0.0065001791, 0.0010821803)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 630.0, if (age(created_at) < 510.0, -0.0026815916, if (fieldMatch(text).importance < 0.4997144938, 0.0414511969, 0.004406815)), if (fieldMatch(text).tail < 11.5, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).importance < 0.4994869828, 0.0066561273, -0.0136850009), -0.0069580048), if (attribute(yst_reply_auth) < 16.5, 0.0023887625, -0.0083630492))), if (age(created_at) < 1830.0, if (fieldMatch(text).fieldCompleteness < 0.0425724983, if (attribute(user_friends_count) < 252.5, -0.020119899, 0.0154324464), if (fieldMatch(text).tail < 5.5, 0.005278601, 0.0135236791)), if (fieldMatch(text).longestSequence < 1.5, -0.0049136258, if (age(created_at) < 27000.0, -4.128224E-4, if (term(2).significance < 0.9998239875, if (fieldMatch(text).completeness < 0.9559409618, -0.0209409304, if (term(1).significance < 0.9128689766, 0.0195740015, 0.0035250792)), 0.0133058164))))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1770.0, if (fieldMatch(text).tail < 7.5, -0.004792789, if (fieldMatch(text).importance < 0.6652389765, -0.0020739127, 0.0102078569)), if (fieldMatch(text).importance < 0.6657874584, if (attribute(user_friends_count) < 16.5, 4.515464E-4, -0.0178576762), if (term(0).significance < 0.9976029992, -0.0101410825, -0.004854538))), if (age(created_at) < 1770.0, if (attribute(yst_tweet_language) < 3243.5, if (attribute(user_followers_count) < 606.0, if (fieldMatch(text).importance < 0.7488585114, 0.0066962893, 0.0158854368), if (fieldMatch(text).significantOccurrence < 0.0555564985, if (attribute(user_statuses_count) < 13511.5, 0.0163436735, -0.0031528673), 0.0222661817)), -0.0172180317), if (fieldMatch(text).importance < 0.6664534807, if (fieldMatch(text) < 0.3528665006, -0.0116436179, -0.0025309342), if (term(2).significance < 0.9981445074, if (fieldTermMatch(text,3).firstPosition < 4.5, 0.0069706912, -0.0021646003), 0.0046032512)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(user_name).fieldCompleteness < 0.2916665077, if (attribute(user_statuses_count) < 497.5, 0.0022748148, if (attribute(user_followers_count) < 960.5, if (term(0).significance < 0.9725670218, -0.0246347116, if (term(0).significance < 0.9963495135, if (fieldMatch(text).tail < 9.5, if (fieldMatch(text).head < 11.5, 0.0013827683, -0.0212024376), 0.016150842), -0.0104714457)), 0.0070006551)), 0.0447412235), if (age(created_at) < 16200.0, -0.012466698, -0.0057219106)), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.4999520183, 0.0038677446, if (fieldMatch(text) < 0.4182469845, 0.0026964712, 0.0132060784)), if (fieldMatch(text).weightedOccurrence < 0.0513554998, if (match < 0.6867794991, if (fieldMatch(text).importance < 0.6665154696, -0.02010221, if (term(2).significance < 0.9950574636, -0.0169792919, 0.0050699268)), -0.0013697969), 0.0020313056))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.665607512, if (fieldMatch(text) < 0.2109414935, -0.0320963356, -0.0049411304), if (attribute(yst_tweet_language) < 3243.5, if (term(2).significance < 0.9769929647, if (fieldMatch(text).importance < 0.6665915251, 0.0282759231, if (fieldMatch(text).importance < 0.7399419546, -0.063959372, -0.0116583984)), if (term(0).significance < 0.9964904785, 0.0113601762, if (term(0).significance < 0.9984384775, -0.0096497985, 0.0065807303))), -0.0430967785)), if (fieldMatch(text).longestSequence < 1.5, -0.012735201, -0.0062520929)), if (age(created_at) < 1770.0, if (fieldMatch(text).earliness < 0.7211109996, 0.0056563829, if (attribute(user_followers_count) < 812.5, 0.0100250822, 0.0209608983)), if (fieldTermMatch(text,0).firstPosition < 4.5, 0.0027791958, if (fieldMatch(text).importance < 0.666424036, if (match < 0.9192979932, -0.0152598227, -0.0032270961), -5.422229E-4)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (attribute(user_followers_count) < 459.5, if (attribute(user_statuses_count) < 496.5, 8.480388E-4, -0.0090870631), if (attribute(yst_reply_auth) < 244.5, 0.0126503896, -0.0054197846)), if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).absoluteOccurrence < 0.0122500006, -0.0148159779, 0.0029148481), -0.0058224247)), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.7490880489, if (fieldMatch(text) < 0.5652275085, if (term(2).significance < 0.9916304946, -0.0285282409, 0.0051566337), if (attribute(user_followers_count) < 104.5, if (fieldMatch(text) < 0.8065220118, 0.0383292168, 0.0010266011), if (attribute(yst_reply_auth) < 391.0, 0.0133363207, -0.0143777685))), 0.0164241107), if (fieldTermMatch(text,0).firstPosition < 4.5, 0.0032745831, if (fieldMatch(text).importance < 0.7496404648, if (term(1).significance < 0.9979525208, -0.0096924346, -9.781494E-4), 0.0015180251)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(0).significance < 0.9995554686, if (attribute(yst_reply_auth) < 26.5, if (attribute(user_followers_count) < 92.5, if (fieldLength(text) < 27.5, 0.0012866951, -0.0265027781), 0.0104770861), if (attribute(yst_link_array_size) < 0.0041509997, -0.0099713041, 0.0011954032)), if (term(0).significance < 0.9996379614, -0.0297536383, -2.317059E-4)), if (fieldMatch(text).longestSequence < 1.5, -0.0121670225, -0.0054595694)), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.4989734888, if (fieldMatch(text).importance < 0.4986799955, 0.007117559, if (fieldLength(text) < 18.5, 0.0043567972, -0.0954988221)), if (attribute(user_statuses_count) < 5.5, -0.0298547936, 0.0103403639)), if (fieldMatch(text).occurrence < 0.1348485053, if (match < 0.9276950359, if (term(0).significance < 0.9981074929, -0.0125565952, -0.0030946195), 0.0023600605), 0.0024001179))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.7413114905, if (fieldMatch(text).importance < 0.6666384935, if (fieldMatch(text).earliness < 0.8681160212, if (match < 0.6799730062, -0.0205917268, -0.0018660452), if (fieldMatch(text) < 0.8819584846, if (fieldMatch(text).completeness < 0.9544465542, 0.0070580213, 0.0359145), -0.0174774107)), -0.0421236424), if (fieldMatch(text).completeness < 0.957596004, if (fieldMatch(text).earliness < 0.4128789902, -0.0155841429, 0.0271271066), 0.0016623712)), if (age(created_at) < 5400.0, -0.0114561422, -0.0053122836)), if (age(created_at) < 1830.0, if (term(0).significance < 0.9184160233, if (fieldMatch(text).longestSequence < 1.5, -0.070303917, 0.0039828312), if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).occurrence < 0.1455025077, 0.0064206057, 0.0136203784), -0.0155537108)), if (fieldMatch(text).importance < 0.6656370163, -0.0057846012, 0.0015276435))) +
+if (attribute(ythl) < 0.5, if (fieldMatch(text).tail < 12.5, if (fieldMatch(text) < 0.4900699854, if (term(0).significance < 0.9883320332, -0.0195571527, -0.0078552672), if (fieldMatch(text).weightedOccurrence < 0.0929629952, if (age(created_at) < 1530.0, if (fieldTermMatch(text,1).firstPosition < 6.5, 0.0158849428, -0.0010859682), -0.0055859102), -0.0109538484)), if (fieldMatch(user_name) < 0.0710614994, 2.014444E-4, 0.0605228154)), if (age(created_at) < 1830.0, if (fieldMatch(text) < 0.2891100049, -0.0017665209, if (attribute(yst_reply_auth) < 471.5, if (attribute(user_followers_count) < 2200.0, 0.0095812144, 0.0192088364), -0.0097908152)), if (fieldMatch(text).occurrence < 0.1348485053, if (match < 0.9285860062, if (attribute(user_statuses_count) < 2957.5, -0.003199469, -0.0120737981), 5.44995E-4), if (term(2).significance < 0.9939094782, if (fieldMatch(text).absoluteProximity < 0.0212500002, -0.0227140008, -3.328979E-4), 0.0055961138)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (attribute(yst_reply_auth) < 176.0, if (attribute(yst_tweet_language) < 3243.5, if (term(1).significance < 0.7788045406, 0.0578500341, 0.0011485747), -0.0253253039), if (attribute(yst_link_array_size) < 0.0223225001, -0.0156277732, 0.0023478823)), if (fieldMatch(text).longestSequence < 1.5, -0.0107069928, if (attribute(user_followers_count) < 1710.5, -0.0053639058, 0.0090303888))), if (fieldMatch(text) < 0.5406639576, if (fieldMatch(text).occurrence < 0.1165160015, if (term(1).significance < 0.9973840117, -0.0174263062, -0.0012512051), -4.493622E-4), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.4997544885, if (fieldMatch(text).importance < 0.4997400045, 0.0011570612, if (attribute(user_statuses_count) < 2389.0, -0.075250743, 0.0078353389)), if (attribute(yst_tweet_language) < 3243.5, 0.0095661855, -0.0134482465)), if (fieldMatch(text).importance < 0.4999470115, -0.0103740403, 0.0025010891)))) +
+if (attribute(ythl) < 0.5, if (fieldMatch(text) < 0.3427360058, if (fieldMatch(text).earliness < 0.6939799786, -0.0183828125, -0.0073742585), if (age(created_at) < 1830.0, if (term(0).significance < 0.9958745241, if (term(1).significance < 0.8159549832, 0.0358430149, if (term(1).significance < 0.9927034974, if (fieldMatch(text).importance < 0.6640119553, 0.0448918743, -0.0109749723), if (term(1).significance < 0.996638, 0.021964601, 0.0055406966))), -0.0040966912), if (fieldLength(text) < 14.5, -0.0121807234, -0.0043039012))), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.7479754686, if (term(0).significance < 0.9139549732, -0.0340629156, if (fieldMatch(text).tail < 7.5, 8.350066E-4, if (fieldMatch(text) < 0.8667535186, 0.0042479503, if (fieldTermMatch(text,0).firstPosition < 2.5, 0.009079718, 0.0217882168)))), 0.0152961627), if (fieldTermMatch(text,1).firstPosition < 6.5, 0.0019770381, -0.0024650391))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1650.0, -5.35497E-4, if (fieldMatch(text).absoluteOccurrence < 0.0126785003, if (fieldMatch(text).longestSequence < 1.5, -0.0102881411, -0.0050682353), 0.0015992266)), if (age(created_at) < 1830.0, if (fieldMatch(text).occurrence < 0.0425724983, -0.0103390097, if (fieldTermMatch(text,0).firstPosition < 4.5, if (attribute(user_followers_count) < 807.5, 0.009014829, 0.0188702216), if (fieldMatch(text).importance < 0.7488585114, if (term(1).significance < 0.9833209515, if (fieldMatch(text).absoluteOccurrence < 0.0116665, -0.0062725138, -0.0573762051), 0.0061628636), 0.0150568527))), if (fieldMatch(text) < 0.4552929997, if (fieldMatch(user_name) < 0.3179910183, if (fieldMatch(text).importance < 0.6665154696, if (fieldMatch(text).occurrence < 0.1188234985, -0.017059865, -9.6562E-6), if (term(1).significance < 0.9965360165, -0.0071076426, 0.0044895619)), 0.0279607247), if (term(2).significance < 0.8411514759, -0.0033532749, 0.0026365471)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.8159549832, 0.0276561682, if (term(1).significance < 0.9822615385, if (match < 0.7864329815, -0.0313860274, if (fieldMatch(text).longestSequenceRatio < 0.7749999762, 0.026549556, -0.0146271425)), if (term(0).significance < 0.9987125397, if (term(0).significance < 0.9821995497, -0.0131166881, if (age(created_at) < 1770.0, 0.002490936, 0.0242417466)), if (term(0).significance < 0.9987905025, -0.0474544221, -0.003321698)))), if (fieldMatch(text).significance < 0.6663454771, -0.0112682274, -0.0050036035)), if (age(created_at) < 1770.0, if (attribute(user_followers_count) < 812.5, 0.0053496824, 0.0125479103), if (fieldTermMatch(text,0).firstPosition < 1.5, 0.0037986168, if (fieldMatch(text).importance < 0.7498390079, if (term(0).significance < 0.9974490404, -0.0066723085, -6.70732E-4), if (attribute(user_friends_count) < 20.5, -0.005179231, 0.0048035663))))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (attribute(yst_reply_auth) < 26.5, if (attribute(user_followers_count) < 80.5, -0.0011002979, 0.0120623048), if (fieldMatch(text).importance < 0.4994869828, if (term(0).significance < 0.991086483, -0.015038199, 0.0335272034), if (attribute(yst_link_array_size) < 0.0041354997, -0.0126645698, if (fieldTermMatch(text,0).firstPosition < 2.5, 0.0132560119, -0.0052124596)))), if (fieldMatch(text).importance < 0.6665325165, if (fieldMatch(text).tail < 7.5, -0.0136798779, -0.0055728098), if (term(0).significance < 0.9954190254, -0.0075794485, -8.289554E-4))), if (age(created_at) < 1770.0, if (fieldMatch(text).earliness < 0.3779760003, -0.0012520588, 0.0078110682), if (fieldMatch(text).significantOccurrence < 0.0547899976, if (match < 0.7503944635, if (fieldMatch(text).importance < 0.6665270329, if (fieldMatch(text).earliness < 0.6339714527, -0.0338858554, -0.009096585), -0.0037668704), 3.149666E-4), 0.0028579202))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldLength(text) < 8.5, -0.0119762189, if (attribute(user_friends_count) < 15.5, 0.0150834311, if (fieldMatch(text) < 0.4306970239, if (term(1).significance < 0.9976525307, -0.018499759, 0.0133471677), if (attribute(user_friends_count) < 1375.5, 5.505579E-4, 0.0194310919)))), -0.0058711817), if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).tail < 3.5, -0.0081336884, if (fieldMatch(text).significantOccurrence < 0.0392310023, -0.0074424529, if (fieldMatch(text).importance < 0.7463564873, 0.0063782103, 0.0133409187))), -0.0205400734), if (fieldMatch(text).importance < 0.6664534807, if (fieldMatch(text).significantOccurrence < 0.1249409989, if (fieldTermMatch(text,0).firstPosition < 2.5, 0.0031349276, -0.0052935732), -0.0275582309), if (age(created_at) < 63000.0, if (fieldMatch(text) < 0.3207110167, -0.0114880439, 9.364683E-4), 0.0048748147)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(0).significance < 0.9995759726, if (fieldMatch(text).significantOccurrence < 0.0363755003, -0.01574373, if (term(1).significance < 0.7788045406, 0.0476961371, 0.001213897)), -0.0123574496), if (fieldMatch(text).importance < 0.6664404869, -0.0101736132, if (term(0).significance < 0.9954395294, if (fieldMatch(text).orderness < 0.5357145071, -0.0179059498, -0.0054873409), -0.0020453926))), if (fieldMatch(text) < 0.3861989975, if (term(1).significance < 0.9980044961, if (age(created_at) < 330.0, 0.0172467014, -0.0090164842), 0.0018725599), if (age(created_at) < 1830.0, if (attribute(yst_reply_auth) < 557.0, if (attribute(user_statuses_count) < 7.5, if (attribute(user_followers_count) < 147.0, -7.864416E-4, -0.1136937768), if (fieldMatch(text).importance < 0.4989485145, -0.017996364, 0.0077754184)), -0.0216960094), if (fieldMatch(text).importance < 0.6657680273, -0.0047764491, 0.002661354)))) +
+if (attribute(ythl) < 0.5, if (fieldMatch(text).absoluteProximity < 0.0247499999, -0.0106958848, if (fieldLength(text) < 14.5, if (fieldMatch(text).importance < 0.4994429946, 0.0219589017, -0.0076174869), if (fieldMatch(text).occurrence < 0.0816664994, -0.0053254707, if (fieldMatch(text).longestSequenceRatio < 0.8166664839, -0.0034099679, if (attribute(user_followers_count) < 3130.0, 0.0023795826, 0.0305866803))))), if (age(created_at) < 1830.0, if (attribute(user_followers_count) < 579.5, if (fieldMatch(text).importance < 0.7473194599, if (term(1).significance < 0.9967114925, if (fieldMatch(text).orderness < 0.25, 0.0199100212, if (age(created_at) < 630.0, 1.241074E-4, -0.0217275952)), 0.0052103051), 0.0100794994), if (fieldMatch(text).weightedOccurrence < 0.057417497, 0.0046668785, 0.0152380854)), if (fieldMatch(text) < 0.279281497, if (term(1).significance < 0.996701479, -0.0134490906, 5.562205E-4), if (fieldTermMatch(text,1).firstPosition < 9.5, 0.0022888891, -0.0019659597)))) +
+if (attribute(ythl) < 0.5, if (fieldMatch(text).tail < 7.5, if (fieldMatch(text).importance < 0.6656044722, if (match < 0.7144390345, -0.0310311214, -0.0088877493), if (fieldMatch(text).earliness < 0.0727514997, 0.0149462312, -0.0044403174)), if (attribute(user_friends_count) < 14.5, 0.0092716632, -0.0015500378)), if (age(created_at) < 1830.0, if (fieldMatch(text) < 0.2904269993, if (attribute(user_friends_count) < 127.0, 0.0056714395, if (fieldTermMatch(text,1).firstPosition < 10.5, -0.0061125596, -0.0400728335)), if (attribute(yst_tweet_language) < 3243.5, if (fieldLength(text) < 9.5, -0.014240152, if (attribute(yst_reply_auth) < 209.5, 0.0090823293, if (attribute(user_followers_count) < 1677.5, -0.0133225099, 0.0160376802))), if (fieldTermMatch(text,0).firstPosition < 6.5, 0.002067266, -0.0469488746))), if (fieldMatch(text).significantOccurrence < 0.0543674976, -0.0022326468, if (age(created_at) < 63000.0, 5.594003E-4, 0.0062407904)))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.8159549832, 0.0232298047, if (attribute(yst_reply_auth) < 176.0, 5.588745E-4, -0.0071946844)), if (fieldLength(text) < 14.5, -0.0100784734, if (fieldMatch(text).occurrence < 0.1043554991, -0.0078973837, -0.0021612919))), if (fieldMatch(text) < 0.4846429825, if (term(1).significance < 0.9980959892, if (fieldMatch(text).significantOccurrence < 0.0594874993, if (fieldMatch(text).importance < 0.7486180067, if (fieldMatch(text).head < 1.5, -0.0040433128, -0.0206367481), if (attribute(user_followers_count) < 592.0, 0.003159187, -0.014456241)), 0.0020151861), 0.0050398144), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.4997544885, if (fieldMatch(text).importance < 0.4997400045, if (fieldLength(text) < 10.5, -0.0498201605, 0.0052238898), -0.0305327007), 0.0074862309), if (fieldTermMatch(text,1).firstPosition < 12.5, 0.0024835558, -0.0030184713)))) +
+if (fieldMatch(text) < 0.2844820023, if (fieldMatch(text).importance < 0.6664454937, if (fieldMatch(text).earliness < 0.6909815073, -0.0202083466, -0.007230346), if (term(0).significance < 0.9991005063, -0.0078735247, 0.0050183478)), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.4999545217, if (fieldMatch(text).importance < 0.4999495149, 0.0010054634, if (attribute(yst_reply_auth) < 24.5, -0.0091545768, -0.0585794793)), if (attribute(user_followers_count) < 496.5, if (attribute(yst_reply_auth) < 64.5, if (attribute(yst_tweet_language) < 3583.5, 0.0060891079, -0.0311686318), -0.0029778507), 0.0105208941)), if (fieldTermMatch(text,2).firstPosition < 9.5, if (term(2).significance < 0.9943845272, -0.0014959657, 0.0041636931), if (fieldMatch(text).earliness < 0.8651515245, if (fieldMatch(user_name).completeness < 0.9791665077, if (attribute(user_followers_count) < 680.5, -0.005581803, if (term(1).significance < 0.9998655319, 0.002594313, -0.0096650963)), 0.0535512397), 0.0020234411)))) +
+if (attribute(ythl) < 0.5, if (fieldMatch(text).absoluteProximity < 0.0491665006, -0.0097379318, if (age(created_at) < 1710.0, if (term(0).significance < 0.9986090064, if (term(0).significance < 0.9982025027, if (term(2).significance < 0.9959775209, -0.0019926524, if (fieldMatch(text).importance < 0.665396452, -0.0116503978, 0.0109466166)), 0.0189828366), if (fieldMatch(text).importance < 0.6666469574, if (fieldMatch(text).importance < 0.4998664856, -0.0224440709, if (attribute(yst_reply_auth) < 18.5, 0.0032764517, -0.0102488229)), 0.0230535914)), -0.00426531)), if (age(created_at) < 1650.0, if (attribute(user_followers_count) < 105.5, -6.226235E-4, if (fieldMatch(text).importance < 0.4989485145, -0.0224164552, 0.0086177649)), if (fieldMatch(text).significantOccurrence < 0.0392310023, -0.0097048559, if (fieldMatch(text) < 0.4509834945, if (term(1).significance < 0.9981694818, -0.0078122033, 0.003806844), if (term(0).significance < 0.9841674566, 0.0097951581, 9.24353E-4))))) +
+if (attribute(ythl) < 0.5, if (fieldMatch(text) < 0.2836354971, if (term(1).significance < 0.9996379614, -0.0108495877, 0.0148675984), if (fieldMatch(text).tail < 17.5, if (term(2).significance < 0.9099119902, if (fieldMatch(text).importance < 0.4998250008, if (fieldMatch(text).significance < 0.4995914996, -0.0043957101, 0.0231344492), -0.008840382), if (attribute(user_followers_count) < 506.5, if (fieldMatch(text).importance < 0.6659464836, if (fieldMatch(text).importance < 0.6658334732, -3.420491E-4, 0.0373151929), -0.0037227166), 0.0035857585)), 0.0053145038)), if (fieldMatch(text) < 0.2904269993, if (term(1).significance < 0.998260498, if (term(2).significance < 0.978690505, -0.0302698107, if (fieldMatch(text).occurrence < 0.1188234985, -0.0139381667, 3.038E-7)), 0.0019017619), if (age(created_at) < 1830.0, if (fieldMatch(text).occurrence < 0.075498499, -4.698689E-4, 0.0075287937), if (term(2).significance < 0.9939094782, -0.0018063524, 0.0022813626)))) +
+if (attribute(ythl) < 0.5, if (fieldMatch(text).absoluteProximity < 0.0537500009, -0.0076941292, if (attribute(user_friends_count) < 13.5, if (attribute(user_followers_count) < 180.5, 7.683782E-4, 0.0254407298), if (term(1).significance < 0.9965690374, if (attribute(user_friends_count) < 1596.0, -9.906495E-4, 0.017963509), if (term(1).significance < 0.9967479706, -0.030275409, if (term(0).significance < 0.9986245036, if (term(0).significance < 0.9983664751, -0.0033463225, if (fieldMatch(text).importance < 0.4998250008, 0.0252647259, 0.0031275837)), if (fieldMatch(text).importance < 0.6665315032, -0.0095874064, -0.002315628)))))), if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).absoluteOccurrence < 0.0122500006, 0.0044201553, 0.0117690347), -0.0145395863), if (fieldTermMatch(text,0).firstPosition < 10.5, if (fieldMatch(user_name).significantOccurrence < 0.4166665077, if (match < 0.7534494996, -0.0026555272, 0.0017879837), 0.014526025), -0.0044547476))) +
+if (attribute(ythl) < 0.5, if (fieldMatch(text).tail < 5.5, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).completeness < 0.9522235394, 0.0042831561, -0.0095824673), if (fieldMatch(text).weightedOccurrence < 0.084523499, -6.026845E-4, -0.0078613786)), if (fieldMatch(user_name).importance < 0.1997880042, -0.0010588456, 0.0257470432)), if (fieldMatch(text) < 0.2779855132, if (fieldMatch(text).significantOccurrence < 0.0599530004, if (fieldMatch(text).importance < 0.6665154696, -0.0140299808, if (term(2).significance < 0.967427969, -0.0277817247, 0.0028355135)), 0.0034763323), if (age(created_at) < 2370.0, if (attribute(user_statuses_count) < 10.5, if (attribute(yst_link_array_size) < 1.2E-5, -0.0032323662, -0.1013679738), if (fieldTermMatch(text,1).firstPosition < 5.5, 0.0092460814, if (attribute(user_statuses_count) < 72785.0, 0.0043196848, -0.0308937796))), if (fieldMatch(text).significantOccurrence < 0.0339080021, -0.0524175559, if (fieldTermMatch(text,1).firstPosition < 17.5, 0.0020057038, -0.0052555353))))) +
+if (age(created_at) < 1830.0, if (fieldMatch(text).significantOccurrence < 0.0382340029, if (fieldMatch(text) < 0.8533049822, if (term(0).significance < 0.9981650114, -0.0368343915, -0.0056610638), 0.0049994224), if (fieldMatch(text).tail < 5.5, if (fieldMatch(text).importance < 0.4989485145, -0.0226356769, 8.778837E-4), if (term(0).significance < 0.9986659884, 0.0085648682, if (fieldMatch(text).earliness < 0.7071075439, if (fieldMatch(text).importance < 0.6665714979, -0.0079427382, 0.0095678431), if (term(0).significance < 0.9988600016, -0.0090514905, if (attribute(yst_tweet_language) < 3243.5, 0.011654868, -0.0250363073)))))), if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).absoluteOccurrence < 0.0122500006, -0.0063330281, 0.0017606811), if (attribute(user_followers_count) < 521.5, if (attribute(yst_reply_auth) < 6.5, if (fieldLength(text) < 26.5, 8.975568E-4, -0.0115152224), if (age(created_at) < 12600.0, -0.0079829768, -7.770708E-4)), 0.0029601612))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.8159549832, 0.0344152691, if (attribute(yst_tweet_language) < 3243.5, -9.31972E-5, -0.0184607413)), if (fieldMatch(text) < 0.8700245023, if (age(created_at) < 81000.0, -0.0081256943, -0.002474476), if (fieldMatch(text).completeness < 0.9577934742, 0.006010286, -0.0049642463))), if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).completeness < 0.9521285295, -0.0086391614, 0.0063207862), if (term(0).significance < 0.998996973, 0.0055342844, if (term(0).significance < 0.999627471, -0.053409278, 0.0110272216))), if (fieldMatch(text).head < 1.5, 0.0038230846, if (fieldMatch(text).importance < 0.6663914919, -0.0050094296, if (term(2).significance < 0.9939705133, if (term(2).significance < 0.9904664755, -6.273878E-4, if (term(1).significance < 0.9985420108, -0.0060480992, -0.0305338408)), 0.0023578375))))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.7788045406, 0.0500283244, if (fieldMatch(text).importance < 0.4989485145, if (fieldMatch(text).importance < 0.4985739887, -0.0040890196, -0.0504476618), if (fieldMatch(text).importance < 0.4994869828, 0.0226347107, if (fieldMatch(text).importance < 0.7494400144, -0.0013898685, if (term(1).significance < 0.9945595264, -0.0032601315, 0.0245669695))))), -0.0050325999), if (age(created_at) < 1830.0, if (attribute(yst_reply_auth) < 218.5, if (attribute(user_followers_count) < 516.5, if (attribute(yst_reply_auth) < 5.5, if (fieldMatch(text).importance < 0.6662045121, if (fieldMatch(text).tail < 15.5, -9.372615E-4, 0.0137497531), 0.0092949266), -0.0038400009), 0.0088974242), -0.0063462945), if (fieldMatch(text).fieldCompleteness < 0.1863425076, if (attribute(user_followers_count) < 82.5, -0.004996894, if (term(1).significance < 0.9968400002, -0.0035744289, 0.0022489806)), 0.0031548406))) +
+if (attribute(user_followers_count) < 1739.0, if (attribute(yst_reply_auth) < 28.5, if (attribute(user_followers_count) < 86.5, if (fieldMatch(text).importance < 0.6655265093, -0.0070659027, if (age(created_at) < 1890.0, if (term(1).significance < 0.9980455041, 0.0084857763, if (attribute(user_friends_count) < 38.5, 0.008263962, -0.013053989)), -0.0029246429)), if (fieldMatch(user_name) < 0.3153960109, if (fieldMatch(text) < 0.5473589897, if (match < 0.54053545, -0.0265778832, -0.0016129946), if (fieldMatch(text).occurrence < 0.0816664994, if (attribute(yst_link_array_size) < 0.001784, -0.0042828731, if (age(created_at) < 1830.0, 0.019961191, -0.0028795459)), if (age(created_at) < 1710.0, 0.0085534102, 0.0023027773))), if (fieldMatch(text).significantOccurrence < 0.0327955, -0.0415331084, 0.0263336717))), -0.004577551), if (fieldMatch(text).tail < 7.5, if (term(2).significance < 0.8023320436, -0.009778886, 0.0038323247), 0.0081719743)) +
+if (attribute(user_followers_count) < 437.5, if (fieldMatch(text).significantOccurrence < 0.1246850044, if (attribute(yst_reply_auth) < 22.5, if (fieldMatch(text) < 0.3409180045, if (fieldMatch(text).importance < 0.6665065289, -0.0102582795, if (term(1).significance < 0.9962199926, if (term(1).significance < 0.994343996, -0.0021503448, -0.0306146076), 0.0068595469)), if (fieldMatch(text).earliness < 0.93541646, if (fieldMatch(user_name) < 0.5095770359, if (attribute(yst_tweet_language) < 3243.5, if (age(created_at) < 1830.0, 0.0020429611, -0.0019541993), -0.0132421664), 0.027938898), 0.0043877081)), -0.0053086844), -0.0109310616), if (age(created_at) < 1530.0, if (term(0).significance < 0.9986474514, if (term(0).significance < 0.9980379939, if (fieldMatch(text).earliness < 0.2440474927, -0.0189069835, 0.0085132629), 0.0221469666), 2.679538E-4), if (attribute(user_statuses_count) < 2928.5, 0.003703727, if (match < 0.5710045099, -0.0125698441, -3.892576E-4)))) +
+if (fieldMatch(text).tail < 3.5, if (attribute(yst_reply_auth) < 278.5, if (fieldLength(text) < 24.5, if (attribute(yst_link_array_size) < 0.0885144994, if (term(1).significance < 0.7788045406, 0.0265531093, -0.0020354187), -0.0289274408), -0.0107078153), -0.0129870071), if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3243.5, if (fieldLength(text) < 9.5, -0.0103056223, if (fieldMatch(user_name) < 0.2000829875, if (fieldMatch(text).importance < 0.7970539927, 0.0035454291, 0.0126182815), 0.0233580846)), if (fieldLength(text) < 22.5, if (term(0).significance < 0.9991210103, 0.0080261443, -0.0294237431), -0.0673310696)), if (fieldMatch(text).importance < 0.6664265394, if (attribute(yst_reply_auth) < 13.5, -0.0018649103, -0.0077154393), if (term(0).significance < 0.9997465014, if (attribute(user_followers_count) < 717.5, -0.0012717951, 0.002903754), if (fieldTermMatch(text,1).firstPosition < 15.5, 0.0107252476, -0.0062640981))))) +
+if (attribute(ythl) < 0.5, if (attribute(user_followers_count) < 483.5, if (attribute(user_statuses_count) < 491.5, if (fieldMatch(text).tail < 3.5, -0.0057958616, if (age(created_at) < 1410.0, 0.0070562486, -7.664522E-4)), if (attribute(user_friends_count) < 8.5, 0.0087335556, -0.0058603167)), if (age(created_at) < 210.0, 0.0246066286, 3.480739E-4)), if (fieldMatch(text) < 0.5547109842, if (fieldMatch(text).occurrence < 0.1348485053, if (attribute(user_statuses_count) < 2933.0, -0.0023188146, if (attribute(yst_reply_auth) < 1.5, -0.0279839136, if (fieldTermMatch(text,1).firstPosition < 12.5, if (age(created_at) < 2730.0, 0.0153842703, -0.0081351611), -0.0240346583))), 8.863957E-4), if (fieldLength(text) < 9.5, -0.0106073655, if (fieldMatch(text).earliness < 0.9393379688, if (fieldMatch(text).occurrence < 0.0655914992, -0.0023447985, if (age(created_at) < 1950.0, 0.0063181854, 0.0015014161)), 0.0074385233)))) +
+if (attribute(ythl) < 0.5, if (fieldMatch(text) < 0.8502080441, if (fieldMatch(text).importance < 0.7468224764, -0.0067962178, -0.0019381191), if (attribute(user_statuses_count) < 16627.5, if (attribute(user_followers_count) < 515.5, if (fieldMatch(text).weightedOccurrence < 0.0944940001, if (attribute(user_statuses_count) < 109.5, 0.0079116741, -4.709728E-4), -0.0057247378), if (term(0).significance < 0.9991005063, if (age(created_at) < 1350.0, 0.0233500539, 0.0061626722), -0.0047207579)), -0.0108453748)), if (fieldTermMatch(text,2).firstPosition < 8.5, if (attribute(user_followers_count) < 16.5, -0.0054426486, if (age(created_at) < 2430.0, 0.0098679265, 0.0032263599)), if (match < 0.9289889932, if (fieldMatch(text).tail < 3.5, if (fieldMatch(text).occurrence < 0.0976189971, -0.0211220829, -0.0037859558), if (attribute(user_statuses_count) < 12392.5, 8.0234E-5, if (attribute(user_followers_count) < 317.5, -0.0361792845, -0.0049548586))), 0.0025926241))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, -0.003165344, if (fieldMatch(text).importance < 0.74989748, if (term(1).significance < 0.998134017, 0.0094378769, if (term(1).significance < 0.9994934797, if (term(1).significance < 0.9991415143, 3.721852E-4, -0.0259010774), 0.0124789418)), if (term(2).significance < 0.9737149477, -0.0202297481, 0.0040219128))), -0.0045478246), if (age(created_at) < 1590.0, if (fieldTermMatch(text,1).firstPosition < 7.5, 0.0081043971, if (fieldMatch(text) < 0.8496830463, -0.0035718865, 0.0048047847)), if (fieldMatch(text).head < 1.5, if (fieldLength(text) < 8.5, -0.0224047487, 0.0036563528), if (fieldMatch(text) < 0.2774904966, if (term(0).significance < 0.997859478, -0.0142881937, -0.0014981238), if (attribute(yst_reply_auth) < 70.5, 1.609764E-4, if (attribute(user_followers_count) < 397.5, -0.01809199, -0.0021113953)))))) +
+if (fieldMatch(text).longestSequence < 1.5, if (fieldTermMatch(text,0).firstPosition < 14.5, if (fieldMatch(text).absoluteOccurrence < 0.0136665003, if (fieldMatch(text).significance < 0.7493325472, if (age(created_at) < 1830.0, if (attribute(user_statuses_count) < 5.5, -0.0282346598, if (term(0).significance < 0.9725670218, if (term(1).significance < 0.9792364836, -0.0784237022, -0.0059997941), if (fieldTermMatch(text,1).firstPosition < 10.5, 0.0078147345, if (match < 0.9353330135, -0.0019768224, if (fieldMatch(text).earliness < 0.9198719859, if (attribute(yst_link_array_size) < 2.88E-4, -0.0041019966, 0.0264356088), 5.631411E-4))))), -0.0050485104), 0.0082039036), 0.0051839504), if (term(0).significance < 0.9982025027, -0.0142960741, -0.0033997299)), if (attribute(yst_tweet_language) < 3587.5, if (age(created_at) < 1770.0, 0.0049084121, if (attribute(ythl) < 0.5, -0.0020172224, if (attribute(yst_link_array_size) < 0.0056419997, 0.0029607752, -0.0028856329))), -0.0261107048)) +
+if (fieldMatch(text) < 0.2898915112, if (fieldMatch(text).head < 1.5, 0.0014847907, if (fieldMatch(text).importance < 0.6663334966, -0.0119151319, -0.0040477723)), if (attribute(user_followers_count) < 519.5, if (term(2).significance < 0.9943574667, if (age(created_at) < 1830.0, if (attribute(user_statuses_count) < 1256.0, 0.0034757752, if (attribute(user_statuses_count) < 1397.0, -0.0279955298, if (fieldMatch(text).tail < 15.5, -0.0046990807, if (match < 0.935085535, -0.004028571, 0.0289488138)))), if (term(0).significance < 0.9971770048, if (term(1).significance < 0.9985420108, -0.0058304095, -0.017290911), if (fieldTermMatch(text,1).firstPosition < 17.5, 0.0053683293, -0.0057448395))), if (attribute(yst_reply_auth) < 236.5, 0.0018130071, -0.0097022524)), if (fieldMatch(text).importance < 0.4989485145, if (fieldMatch(text).importance < 0.498603493, -0.0030858773, -0.0594499645), if (fieldTermMatch(text,0).firstPosition < 13.5, 0.0046047026, -0.0041554082)))) +
+if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).earliness < 0.6381819844, if (term(0).significance < 0.9928579926, if (attribute(yst_reply_auth) < 3.5, if (attribute(user_followers_count) < 18.5, -0.0266701398, 0.0020537489), -0.0226862335), if (fieldMatch(text).importance < 0.4997234941, if (attribute(yst_link_array_size) < 5.03E-4, 0.0166948856, -0.0034104232), if (age(created_at) < 270.0, 0.0057923479, if (fieldMatch(text).importance < 0.499758482, -0.0291936745, -0.0059528701)))), if (term(0).significance < 0.8512874842, -0.0436401448, if (age(created_at) < 1830.0, 0.0039537575, -0.0025333564))), if (age(created_at) < 1830.0, if (attribute(yst_reply_auth) < 158.5, if (attribute(yst_reply_auth) < 149.5, if (term(2).significance < 0.9389865398, -0.0024084291, if (term(2).significance < 0.9853805304, 0.0179593679, 0.0053348502)), 0.0511133688), -0.0040868819), if (attribute(user_followers_count) < 172.5, -0.0015268821, 0.001811508))) +
+if (fieldMatch(text) < 0.4593589902, if (term(0).significance < 0.9975925088, if (fieldMatch(text).occurrence < 0.1188234985, if (fieldMatch(text).earliness < 0.9486839771, if (term(0).significance < 0.9817185402, -0.033583138, -0.0111131767), 0.0027474033), -0.0042505836), -3.239219E-4), if (fieldLength(text) < 9.5, if (term(1).significance < 0.996538043, if (fieldMatch(text).importance < 0.6660010219, 0.0283916092, -0.0014717607), -0.0092449117), if (age(created_at) < 1770.0, if (attribute(user_statuses_count) < 5.5, if (attribute(yst_link_array_size) < 1.2E-5, -0.0060483091, -0.0952850231), if (term(2).significance < 0.9389865398, if (attribute(user_statuses_count) < 27302.5, if (term(0).significance < 0.9943234921, -0.0055188147, 0.0033202683), if (attribute(yst_reply_auth) < 19.5, 0.0086451663, -0.0362288139)), 0.0064556248)), if (fieldMatch(text).occurrence < 0.078461498, -0.0044655704, if (fieldTermMatch(text,0).firstPosition < 0.5, 0.0052075545, 7.92364E-5))))) +
+if (fieldMatch(text).longestSequence < 1.5, if (term(0).significance < 0.8512874842, if (term(2).significance < 0.9936410189, -0.090488338, -0.0089960419), if (fieldMatch(text).importance < 0.4999005198, if (attribute(user_friends_count) < 7.5, 0.0105675969, -2.271753E-4), if (fieldMatch(text).importance < 0.4999030232, -0.0345873832, if (fieldMatch(text).earliness < 0.7165180445, if (term(1).significance < 0.9967604876, -0.0115456455, -0.0042372928), -0.0012917255)))), if (age(created_at) < 1830.0, if (attribute(yst_reply_auth) < 26.5, 0.0072943729, if (attribute(user_followers_count) < 3340.0, if (fieldMatch(text).importance < 0.6665514708, if (fieldMatch(text).significance < 0.6661305428, -0.0013967805, -0.0212892006), if (attribute(yst_reply_auth) < 48.5, -0.0095375798, if (fieldMatch(text).importance < 0.7498970032, 0.015502273, -0.0026741211))), 0.0188293335)), if (attribute(user_followers_count) < 72.5, -0.0024566452, if (attribute(yst_reply_auth) < 50.5, 0.0027297795, -0.0018218561)))) +
+if (fieldMatch(text).earliness < 0.929802537, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(user_name).importance < 0.3325359821, if (term(1).significance < 0.997274518, -0.0074074056, if (attribute(user_statuses_count) < 5.5, if (fieldMatch(text) < 0.8386005163, 0.0032054405, -0.0402836718), if (fieldMatch(text).earliness < 0.6381819844, -0.0037883, 8.924796E-4))), if (attribute(user_statuses_count) < 3055.5, 0.0281949081, -0.0048301755)), if (age(created_at) < 1770.0, 0.0037513988, if (attribute(yst_reply_auth) < 71.5, if (attribute(user_followers_count) < 68.5, -0.0032099164, if (fieldMatch(text).weightedOccurrence < 0.0510035008, if (term(2).significance < 0.9982124567, -0.0067952208, 0.0019990379), if (fieldTermMatch(text,1).firstPosition < 3.5, -0.0014223313, 0.0056390354))), -0.0052017914))), if (fieldMatch(text).tail < 8.5, if (fieldTermMatch(text,1).firstPosition < 21.5, 7.76589E-5, -0.0251022513), if (age(created_at) < 1770.0, 0.0107898472, 0.0035281034))) +
+if (fieldMatch(text) < 0.4136639833, if (fieldMatch(text).earliness < 0.6228449941, -0.0108022756, if (term(0).significance < 0.98062253, if (fieldMatch(text).importance < 0.739367485, if (term(1).significance < 0.9957709908, -0.0755310801, -0.0197330906), -0.0036348641), -0.0013106391)), if (fieldLength(text) < 9.5, if (term(1).significance < 0.9965360165, 0.0028564297, -0.0093397644), if (fieldMatch(text).earliness < 0.9321835041, if (attribute(yst_tweet_language) < 3243.5, if (age(created_at) < 1830.0, if (attribute(user_followers_count) < 40.5, if (fieldMatch(text).importance < 0.7464824915, if (fieldMatch(text).importance < 0.7373905182, -0.0053736193, -0.0642881769), 0.0068053454), if (term(0).significance < 0.9995554686, if (fieldMatch(text).tail < 21.5, 0.0051189016, -0.0221854397), -0.005375067)), if (term(2).significance < 0.9946069717, -0.003229661, if (attribute(yst_link_array_size) < 0.0122835003, 0.0019118484, -0.0049101186))), -0.010971348), 0.0042358084))) +
+if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.7788045406, 0.0501539771, if (attribute(user_statuses_count) < 8152.0, if (attribute(user_followers_count) < 443.5, -7.68504E-4, if (attribute(yst_link_array_size) < 0.0250005014, if (fieldMatch(text).importance < 0.6649650335, -0.0040469549, 0.0165434132), if (attribute(user_followers_count) < 1371.0, 0.0529050928, 0.0097057892))), -0.0074806913)), if (fieldLength(text) < 14.5, -0.0075857569, if (fieldMatch(text).significantOccurrence < 0.0476144999, -0.0052872985, if (attribute(yst_reply_auth) < 33.5, 0.0021247688, -0.004326499)))), if (fieldTermMatch(text,1).firstPosition < 12.5, if (fieldMatch(text) < 0.5566675067, -0.0012160193, 0.0034883449), if (attribute(yst_reply_auth) < 20.5, if (attribute(user_followers_count) < 213.5, -0.0026673084, 0.0029249608), if (attribute(user_statuses_count) < 7554.0, -0.0021650101, if (term(0).significance < 0.9956585169, -0.0326924993, -0.0081739014))))) +
+if (fieldMatch(text).longestSequence < 1.5, if (term(0).significance < 0.8512874842, if (term(2).significance < 0.9981595278, -0.0839195878, -0.0112331884), if (fieldMatch(text).importance < 0.4998250008, if (term(0).significance < 0.9983350039, if (fieldMatch(text).importance < 0.4989485145, if (fieldMatch(text).importance < 0.4988874793, -0.0037133544, -0.1248149534), if (fieldMatch(text).importance < 0.4997234941, 0.0058967543, if (attribute(yst_reply_auth) < 0.5, -0.0271620138, 2.819878E-4))), 0.0163052773), if (fieldTermMatch(text,0).firstPosition < 5.5, -9.410187E-4, if (fieldMatch(text).importance < 0.666454494, -0.0070269578, if (term(2).significance < 0.9931030273, -0.0096096659, 0.0015413452))))), if (attribute(user_followers_count) < 520.5, if (attribute(yst_reply_auth) < 8.5, if (fieldMatch(text).significantOccurrence < 0.0363755003, -0.0183942755, if (age(created_at) < 1710.0, 0.0052904688, 3.556613E-4)), -0.0034358951), if (attribute(user_followers_count) < 534.5, 0.0246325003, 0.0032783956))) +
+if (attribute(ythl) < 0.5, if (fieldMatch(text) < 0.4860935211, if (term(1).significance < 0.9882720113, -0.0113150549, -0.003513259), if (attribute(user_statuses_count) < 14534.0, if (attribute(user_followers_count) < 457.5, -0.0013098462, if (term(4).significance < 0.9912315011, 0.0071257515, -0.0107882556)), -0.0080098717)), if (fieldMatch(text).occurrence < 0.1348485053, if (term(1).significance < 0.9926555157, if (attribute(yst_link_array_size) < 6.15E-5, if (fieldTermMatch(text,0).firstPosition < 7.5, -0.0054509513, -0.0267164116), -2.251203E-4), if (fieldMatch(text).completeness < 0.9520415068, -0.0121998182, 6.584783E-4)), if (age(created_at) < 1770.0, 0.0078526654, if (fieldTermMatch(text,3).firstPosition < 2.5, 0.010815374, if (term(2).significance < 0.992915988, if (term(2).significance < 0.9923814535, -0.0011187617, if (fieldMatch(text).completeness < 0.9579474926, -0.0550616595, -0.0034789409)), if (attribute(yst_tweet_adult_score) < 0.5, 0.0029410626, -0.0116671785)))))) +
+if (attribute(yst_tweet_language) < 3243.5, if (age(created_at) < 1830.0, if (term(0).significance < 0.9725670218, if (fieldMatch(text) < 0.6660829782, -0.0173866153, -0.001017438), if (attribute(yst_reply_auth) < 355.5, if (term(0).significance < 0.9816665053, 0.0179367183, if (fieldMatch(text).importance < 0.749382019, if (fieldMatch(text).tail < 6.5, if (attribute(user_friends_count) < 560.5, if (fieldMatch(text) < 0.8736619949, 0.0023509846, -0.0157312448), if (fieldMatch(text) < 0.1418584883, -0.0659559738, -0.0072510736)), 0.004454443), 0.008319561)), if (attribute(yst_link_array_size) < 0.0586175025, -0.0091289813, 0.0114658081))), if (fieldMatch(text).importance < 0.666454494, if (fieldMatch(user_name).fieldCompleteness < 0.5833334923, -0.0041143634, 0.0401025109), if (term(2).significance < 0.9991624951, if (fieldMatch(text) < 0.5540195107, -0.0037827224, 1.685363E-4), if (fieldLength(text) < 8.5, -0.0122620665, 0.0037138353)))), -0.0101985628) +
+if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.4989485145, if (fieldMatch(text).importance < 0.4988874793, if (age(created_at) < 690.0, -0.0256320594, 0.0079996205), -0.1224294269), if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).importance < 0.7491755486, if (term(0).significance < 0.9184160233, if (term(2).significance < 0.9980159998, -0.0525545375, 0.0023785461), if (fieldMatch(text).importance < 0.7490389943, if (attribute(user_followers_count) < 787.5, 0.0010999135, if (fieldMatch(text).importance < 0.499848485, 0.0162069505, 0.0035170311)), -0.0287539558)), 0.0072807979), -0.0106468395)), if (fieldMatch(text).importance < 0.6664534807, -0.0041764714, if (term(0).significance < 0.9954395294, if (fieldMatch(user_name).significantOccurrence < 0.2916665077, if (fieldMatch(text).importance < 0.8318179846, -0.0042118878, 0.0043649147), 0.0158696258), if (term(1).significance < 0.9986659884, if (fieldMatch(text).tail < 1.5, -0.0046863462, 0.0042419546), -8.82831E-4)))) +
+if (age(created_at) < 1830.0, if (attribute(user_statuses_count) < 3.5, -0.0248705295, if (attribute(yst_reply_auth) < 247.5, if (attribute(user_followers_count) < 97.5, if (attribute(yst_link_array_size) < 7.835E-4, if (fieldLength(text) < 27.5, if (fieldMatch(text).significantOccurrence < 0.0591179989, 0.007083232, if (fieldTermMatch(text,1).firstPosition < 1.5, 0.0146283297, -0.0028201578)), if (attribute(user_followers_count) < 23.5, -0.0472201281, 8.078028E-4)), if (fieldTermMatch(text,1).firstPosition < 4.5, 0.0067052369, if (fieldMatch(text).earliness < 0.8221344948, -0.007636276, -0.0324826734))), if (fieldLength(text) < 10.5, -0.0052857206, 0.0059849079)), if (attribute(user_followers_count) < 1692.5, -0.0091360049, 0.0091123239))), if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).earliness < 0.6825754642, if (term(0).significance < 0.9944700003, -0.014802095, -0.0053042509), if (fieldMatch(text).significantOccurrence < 0.1249970049, -0.0014558335, -0.0200174998)), -8.72652E-5)) +
+if (attribute(yst_reply_auth) < 26.5, if (attribute(user_followers_count) < 86.5, if (fieldMatch(text).significantOccurrence < 0.1216785014, -6.414838E-4, -0.0099978879), if (fieldMatch(text) < 0.843991518, if (fieldMatch(text).absoluteOccurrence < 0.0126785003, if (fieldMatch(text).importance < 0.6664454937, if (age(created_at) < 5400.0, -9.786234E-4, -0.010880796), 4.683724E-4), if (fieldTermMatch(text,1).firstPosition < 3.5, -0.0013491196, 0.0100488776)), if (fieldMatch(text).importance < 0.4989485145, -0.018038959, if (age(created_at) < 1770.0, if (term(0).significance < 0.9986659884, if (fieldMatch(text).earliness < 0.6099034548, 0.018822136, 0.0069414922), 0.0010695341), if (attribute(user_statuses_count) < 728.0, 0.0078644585, 0.0013385568))))), if (attribute(user_followers_count) < 1995.0, -0.0037600829, if (term(0).significance < 0.998976469, if (age(created_at) < 2310.0, if (attribute(yst_reply_auth) < 38.5, -0.0117528259, 0.0193329084), 0.0028282077), -0.002709802))) +
+if (fieldMatch(text).longestSequence < 1.5, if (term(0).significance < 0.8512874842, if (term(2).significance < 0.9981595278, -0.0893712611, 0.0139821391), if (fieldMatch(text).absoluteOccurrence < 0.0129164997, if (fieldMatch(user_name).fieldCompleteness < 0.5833334923, if (fieldMatch(text).importance < 0.4999005198, if (fieldMatch(text).importance < 0.4992579818, -0.0088299338, if (fieldLength(text) < 24.5, 0.0031375211, -0.0078301854)), if (fieldMatch(text).importance < 0.4999030232, -0.0514023475, if (fieldMatch(text).earliness < 0.6079194546, -0.0064381419, if (fieldMatch(text) < 0.8824554682, -0.001304103, -0.0302990737)))), 0.0378075574), 0.0037149737)), if (attribute(yst_tweet_language) < 3587.5, if (fieldMatch(text).absoluteProximity < 0.0573749989, -0.0037219953, if (attribute(user_followers_count) < 719.5, if (fieldMatch(text).importance < 0.6660234928, if (term(1).significance < 0.9864724874, -0.0057017615, 0.0142744959), if (fieldMatch(text).importance < 0.6664794683, -0.0033669884, 0.0011748423)), 0.0048833724)), -0.0235368129)) +
+if (fieldMatch(text).tail < 3.5, if (term(0).significance < 0.9849029779, -0.0096533539, if (age(created_at) < 210.0, 0.0086447306, if (term(3).significance < 0.9972594976, -0.0049080669, 0.0019732467))), if (attribute(user_followers_count) < 682.5, if (attribute(yst_reply_auth) < 91.5, if (attribute(yst_tweet_language) < 3243.5, if (term(4).significance < 0.9962199926, if (fieldMatch(text).tail < 11.5, if (term(1).significance < 0.9847429991, if (fieldMatch(text).importance < 0.7463389635, if (term(0).significance < 0.9929184914, -0.0337962464, -0.0042731663), -0.0018179748), -5.816172E-4), if (age(created_at) < 1710.0, 0.0058304594, -1.954122E-4)), if (term(1).significance < 0.8054080009, 0.020695941, 0.0029922212)), -0.010391704), if (attribute(yst_link_array_size) < 0.009443, -0.0102022704, if (attribute(user_friends_count) < 89.0, 0.0214038413, -0.003022702))), if (fieldMatch(text).significantOccurrence < 0.0556650013, 9.656227E-4, 0.0055771237))) +
+if (attribute(ythl) < 0.5, if (fieldMatch(text).importance < 0.4997234941, if (fieldMatch(text).importance < 0.4988809824, -0.0156475694, if (fieldMatch(text).importance < 0.4988874793, 0.0871791947, if (fieldMatch(text).importance < 0.4997065067, 0.005454559, if (term(0).significance < 0.9976885319, 0.1581759963, 0.0057478578)))), if (fieldMatch(text).importance < 0.4997634888, -0.0264810886, if (fieldMatch(text).importance < 0.4998250008, 0.0106354371, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).earliness < 0.701851964, -0.0083100057, -0.0018931553), if (attribute(user_followers_count) < 701.0, -0.0020476113, 0.0053824373))))), if (fieldMatch(text) < 0.4141010046, if (fieldMatch(user_name) < 0.3179910183, if (term(0).significance < 0.9793410301, if (term(2).significance < 0.9970530272, -0.0054897109, if (fieldMatch(text).importance < 0.6620055437, 0.0060602187, -0.0517823718)), -0.0021590151), 0.0298259094), if (fieldMatch(text).significantOccurrence < 0.0392310023, -0.0063084285, 0.0020013986))) +
+if (attribute(yst_reply_auth) < 236.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.7788045406, 0.0479141837, if (term(0).significance < 0.9139549732, if (fieldMatch(text).longestSequenceRatio < 0.4166665077, -0.0732771007, -0.0090569203), if (fieldMatch(text).importance < 0.4989485145, if (fieldMatch(text).importance < 0.4988874793, -0.0042825791, -0.1098205261), if (attribute(user_followers_count) < 98.5, if (attribute(user_statuses_count) < 1260.5, if (match < 0.6491410136, -0.011081957, if (term(0).significance < 0.9986954927, 0.0051994695, -0.0019674695)), -0.0076605248), if (attribute(user_statuses_count) < 5.5, -0.1055265232, if (fieldMatch(text).earliness < 0.3726850152, -7.649567E-4, if (fieldMatch(text).occurrence < 0.1519230008, 0.0078900808, if (attribute(user_followers_count) < 583.5, -0.0022611347, 0.0078521247)))))))), if (fieldMatch(text).importance < 0.6664505005, -0.0037578539, if (fieldMatch(text).significantOccurrence < 0.0386575013, -0.0059854357, 0.0010571345))), -0.0046469325) +
+if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.7467460036, if (fieldMatch(text).tail < 7.5, if (term(0).significance < 0.8547105193, if (term(2).significance < 0.9980159998, -0.0945680172, 0.0133542), if (term(0).significance < 0.9995139837, -9.1115E-6, if (term(0).significance < 0.9996379614, if (fieldMatch(text).tail < 6.5, if (attribute(yst_link_array_size) < 0.0028985001, -0.0033284425, -0.0414096377), -0.069049086), 0.0066873893))), if (attribute(user_friends_count) < 179.5, 0.0013706096, 0.0065517887)), if (attribute(yst_tweet_language) < 3243.5, 0.0068726824, -0.03635308)), if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).absoluteOccurrence < 0.0122500006, if (fieldMatch(user_name).importance < 0.4999729991, -0.0053854995, 0.0353914791), if (attribute(user_friends_count) < 4181.0, 0.0041747763, -0.0313684077)), if (fieldMatch(text) < 0.5549424887, -0.0034986093, if (match < 0.7940984964, 0.0068157141, 5.80831E-4)))) +
+if (fieldMatch(text).importance < 0.4989485145, if (fieldMatch(text).importance < 0.4988874793, if (term(0).significance < 0.991086483, if (fieldMatch(text).importance < 0.4985739887, -0.0080603652, -0.0525354118), 0.030992111), -0.1167737387), if (age(created_at) < 1830.0, if (term(0).significance < 0.973123014, if (term(1).significance < 0.9992040396, if (term(1).significance < 0.9977560043, if (fieldMatch(text) < 0.5470744967, if (fieldMatch(text).importance < 0.7365344763, -0.075573942, -0.0140588177), -0.0021536645), 0.0179729341), -0.0499824746), if (term(0).significance < 0.9751809835, 0.0490509939, if (attribute(yst_reply_auth) < 20.5, if (attribute(user_followers_count) < 97.5, 0.0015097125, if (attribute(user_statuses_count) < 5.5, -0.09483518, 0.0071205807)), if (attribute(yst_link_array_size) < 0.0028940002, -0.0084126298, 0.0022449562)))), if (fieldMatch(text).importance < 0.6664534807, -0.0042838031, if (term(0).significance < 0.9982124567, -0.0016253125, 0.0026475634)))) +
+if (fieldMatch(text).longestSequence < 1.5, if (fieldTermMatch(text,0).firstPosition < 2.5, if (fieldMatch(text).importance < 0.498980999, -0.0234510876, 0.0020809399), if (attribute(yst_reply_auth) < 302.5, if (fieldMatch(text).importance < 0.666454494, if (fieldMatch(text).importance < 0.4998220205, if (attribute(yst_reply_auth) < 42.5, 0.0050751752, -0.0083413352), if (fieldMatch(text).absoluteOccurrence < 0.0124999993, if (age(created_at) < 30600.0, if (fieldMatch(text).absoluteProximity < 0.0125000002, 0.0047520251, if (term(1).significance < 0.988929987, -0.0182384871, -0.0050169041)), -0.0135494985), 0.0043484195)), if (term(0).significance < 0.9941140413, -0.005205476, 0.0033577205)), -0.0100142174)), if (age(created_at) < 1770.0, if (fieldMatch(text) < 0.8529180288, 5.433753E-4, 0.0060936539), if (fieldMatch(text).earliness < 0.929802537, if (fieldLength(text) < 29.5, -5.482093E-4, -0.0264980545), if (fieldMatch(text).occurrence < 0.2290209979, 0.0068625219, -5.992389E-4)))) +
+if (fieldTermMatch(text,2).firstPosition < 13.5, if (term(2).significance < 0.9519284964, -0.0041815526, if (age(created_at) < 5400.0, if (fieldMatch(text).significance < 0.7492735386, if (fieldMatch(text).tail < 20.5, 0.0019880057, 0.0345569075), 0.008782237), if (term(2).significance < 0.9934439659, if (term(2).significance < 0.99218297, 4.470985E-4, -0.0109745339), 0.0025543252))), if (attribute(yst_reply_auth) < 22.5, if (term(1).significance < 0.9951915145, if (fieldMatch(text).fieldCompleteness < 0.0816664994, -0.0140486512, -0.0020108004), if (fieldMatch(user_name).head < 0.5, if (attribute(yst_tweet_language) < 3243.5, if (fieldLength(text) < 8.5, -0.0088342224, 0.0012278464), -0.0093805182), 0.0236651466)), if (attribute(user_followers_count) < 125.5, if (age(created_at) < 270.0, if (term(0).significance < 0.9985035062, 0.0238630955, -0.018749544), -0.0107232535), if (attribute(yst_reply_auth) < 813.0, -0.0012233981, -0.0114833082)))) +
+if (fieldMatch(text).absoluteOccurrence < 0.0122500006, if (fieldMatch(text).earliness < 0.8596060276, if (attribute(yst_reply_auth) < 302.5, if (fieldMatch(user_name).completeness < 0.9791665077, if (fieldMatch(text).longestSequence < 1.5, if (age(created_at) < 1830.0, -8.288048E-4, if (fieldMatch(text).importance < 0.6664404869, if (fieldMatch(text).fieldCompleteness < 0.0976189971, -0.0053357392, -0.013793688), if (fieldMatch(text).occurrence < 0.1558704972, 0.002492224, -0.0089043788))), if (fieldMatch(text).weightedOccurrence < 0.0385860018, -0.0071451431, 5.167991E-4)), 0.0407753689), if (attribute(user_followers_count) < 1721.5, -0.0105255162, 0.0015551667)), if (fieldLength(text) < 8.5, -0.0088276495, if (fieldTermMatch(text,0).firstPosition < 15.5, 0.0024878063, -0.0103987822))), if (attribute(user_followers_count) < 27.5, if (fieldTermMatch(text,1).firstPosition < 17.5, if (term(0).significance < 0.9853284955, -0.0219723254, -0.0024166886), 0.018923986), if (attribute(user_statuses_count) < 93.5, 0.0212835416, 0.0045698024))) +
+if (fieldMatch(text).longestSequence < 1.5, if (term(0).significance < 0.8509274721, if (term(2).significance < 0.9981595278, -0.0629348018, -0.0067077117), if (fieldMatch(text).significance < 0.7493325472, if (fieldMatch(text).importance < 0.4999189973, if (fieldMatch(text).importance < 0.4999135137, if (fieldMatch(text).importance < 0.4999005198, if (fieldMatch(text).importance < 0.4997529984, -0.0025494044, if (fieldMatch(text).importance < 0.4998250008, 0.010539188, if (fieldMatch(text).occurrence < 0.0425724983, -0.0160413841, 0.0020384205))), -0.0118667123), 0.019974214), if (fieldMatch(text) < 0.8826240301, if (fieldMatch(text).absoluteOccurrence < 0.0116665, if (fieldMatch(text).earliness < 0.6554945111, -0.0060372501, -0.0016907417), 0.0022069234), -0.0320846717)), 0.0077067193)), if (term(1).significance < 0.8159549832, 0.0094136687, if (attribute(yst_tweet_language) < 3587.5, if (fieldMatch(text).earliness < 0.929802537, 3.53609E-4, if (fieldMatch(text).fieldCompleteness < 0.1519230008, 0.0079910001, 9.053355E-4)), -0.0202098115))) +
+if (age(created_at) < 60.0, 0.05, 0.0) +
+if (age(created_at) < 120.0, 0.0125, 0.0) +
+if (age(created_at) < 240.0, 0.0125, 0.0) +
+if (age(created_at) < 360.0, 0.0125, 0.0) +
+if (age(created_at) < 480.0, 0.0125, 0.0) +
+if (age(created_at) < 600.0, 0.017, 0.0) +
+if (age(created_at) < 1200.0, 0.017, 0.0) +
+if (age(created_at) < 2400.0, 0.017, 0.0) +
+if (age(created_at) < 3600.0, 0.025, 0.0) +
+if (age(created_at) < 7200.0, 0.025, 0.0) \ No newline at end of file
diff --git a/searchlib/src/test/files/ranking03.expression b/searchlib/src/test/files/ranking03.expression
new file mode 100644
index 00000000000..d6837511a28
--- /dev/null
+++ b/searchlib/src/test/files/ranking03.expression
@@ -0,0 +1,97 @@
+if (MAX_SCORE < 270055.0, if (MAX_SCORE < 241174.0, 0.234534, if (ISABSTRACT_AVG < 0.105, 0.248214, 0.239032)), if (ISABSTRACT_AVG < 0.13, if (DAY_HITS_FRAC < 0.765, if (MAX_SCORE < 347793.0, 0.258244, 0.268225), 0.271744), 0.247728)) +
+if (MAX_SCORE < 270055.0, if (MAX_SCORE < 252585.0, -0.0118809, -0.00253128), if (ISABSTRACT_AVG < 0.21, if (DAY_LW_DAY_HITS_RATIO < 4.345, if (MAX_SCORE < 354461.0, 0.00546628, 0.0164708), 0.0188771), if (DAY_PREV_DAY_HITS_FRAC < 0.805, -0.0092059, 0.00324753))) +
+if (MAX_SCORE < 270290.0, if (MAX_SCORE < 236242.0, -0.0121516, if (DAY_LW_DAY_HITS_RATIO < 3.45, -0.00767489, if (ISABSTRACT_AVG < 0.12, 0.00622939, -0.00488712))), if (ISABSTRACT_AVG < 0.105, if (WEEKAVG < 0.5, 0.00766953, 0.0156887), -7.73677E-4)) +
+if (MAX_SCORE < 270061.0, if (MAX_SCORE < 238942.0, -0.0111281, if (DAY_LW_DAY_HITS_RATIO < 3.9, -0.00750282, 0.00220298)), if (ISABSTRACT_AVG < 0.105, if (DAY_HITS_FRAC < 0.795, if (MAX_SCORE < 348364.0, 0.00374845, 0.0131108), 0.0161683), -0.00111039)) +
+if (MAX_SCORE < 270289.0, if (DAY_PREV_DAY_HITS_FRAC < 0.715, -0.0106179, if (ISTITLE_AVG < 0.705, 0.00251111, -0.0060076)), if (ISABSTRACT_AVG < 0.21, if (PUB_TODAY_AVG < 0.13, 0.00368181, if (ISTITLE_AVG < 0.845, 0.0126785, 0.00345445)), -0.00136004)) +
+if (MAX_SCORE < 271459.0, if (MAX_SCORE < 238606.0, -0.00948395, if (ISABSTRACT_AVG < 0.105, if (DAY_LW_DAY_HITS_RATIO < 2.805, -0.00477034, 0.00664777), -0.00676399)), if (ISABSTRACT_AVG < 0.13, if (DAY_PREV_DAY_HITS_FRAC < 0.675, 0.00489261, 0.0122925), -9.20098E-4)) +
+if (MAX_SCORE < 271407.0, if (DAY_LW_DAY_HITS_RATIO < 3.485, -0.00827195, if (NATIONALNEWS < 0.185, -0.00376713, if (MAX_SCORE < 245976.0, 3.52932E-4, 0.0160415))), if (ISTITLE_AVG < 0.705, if (DAY_PREV_DAY_HITS_FRAC < 0.675, 0.00314056, 0.0112222), 9.24328E-4)) +
+if (MAX_SCORE < 253367.0, if (MAX_SCORE < 177746.0, -0.0118279, -0.00453188), if (ISABSTRACT_AVG < 0.105, if (DAY_LW_DAY_HITS_RATIO < 4.25, if (MAX_SCORE < 354461.0, -7.20492E-4, 0.00994136), 0.0104822), if (DAY_PREV_DAY_HITS_FRAC < 0.915, -0.00613264, 0.00119411))) +
+if (MAX_SCORE < 270055.0, if (DAY_LW_DAY_HITS_RATIO < 4.635, if (MAX_SCORE < 221962.0, -0.00963481, -0.00428119), -3.41413E-4), if (ISTITLE_AVG < 0.565, if (MAX_SCORE < 354542.0, if (DAY_HITS < 1.5, -0.00205135, 0.00751225), 0.0111239), 2.2935E-4)) +
+if (MAX_SCORE < 263726.0, if (DAY_PREV_DAY_HITS_FRAC < 0.715, -0.00728388, if (ISTITLE_AVG < 0.73, 0.00311214, -0.00320301)), if (ISABSTRACT_AVG < 0.105, if (WEEKAVG < 0.215, -0.00472856, if (DAY_LW_DAY_HITS_RATIO < 47.0, 0.00641873, 0.0215092)), -0.00106176)) +
+if (MAX_SCORE < 263734.0, if (DAY_LW_DAY_HITS_RATIO < 3.635, -0.0061738, if (ISTITLE_AVG < 0.05, 0.00678624, -0.0034547)), if (ISABSTRACT_AVG < 0.105, if (LOCALNEWS < 0.105, 0.00405055, 0.00975544), if (DAY_PREV_DAY_HITS_FRAC < 0.905, -0.00538249, 0.00274471))) +
+if (MAX_SCORE < 252459.0, -0.00466436, if (ISABSTRACT_AVG < 0.21, if (SUPERDUPER_AVG < 0.115, if (MAX_SCORE < 254916.0, 0.0300376, 0.00749701), if (PUB_TODAY_AVG < 0.105, if (DAY_PD_HITS_RATIO < 0.085, 0.0121226, -0.00799009), 0.00453286)), -0.00140668)) +
+if (MAX_SCORE < 264515.0, if (DAY_PREV_DAY_HITS_FRAC < 0.725, -0.00554549, if (DAY_HITS < 30.5, -0.0016496, 0.0125357)), if (ISTITLE_AVG < 0.585, if (WEEKAVG < 0.5, 0.00111467, 0.0073944), if (MAX_SCORE < 356177.0, -0.00212114, 0.00636485))) +
+if (DAY_PREV_DAY_HITS_FRAC < 0.825, if (PREV_DAY_HITS < 17.5, -0.00398853, 0.00429611), if (ISTITLE_AVG < 0.73, if (MIN_RANK < 9.0, if (DAY_HITS < 46.0, 0.00610951, 0.0271326), -0.00242626), if (MAX_SCORE < 374204.0, -0.00141569, 0.00890749))) +
+if (MAX_SCORE < 249898.0, -0.00399302, if (BUSINESS < 0.315, if (ISTITLE_AVG < 0.73, if (SUPERDUPER_AVG < 0.105, 0.00904674, if (WEEKAVG < 5.5, if (PUB_TODAY_AVG < 0.13, -0.00769757, 0.00217607), 0.0110208)), -5.93102E-4), -0.00318209)) +
+if (MAX_SCORE < 276408.0, if (DAY_LW_DAY_HITS_RATIO < 4.535, -0.00344589, if (LOCALNEWS < 0.53, -6.70599E-5, 0.0175562)), if (ISABSTRACT_AVG < 0.685, if (DAY_LW_DAY_HITS_RATIO < 33.5, if (LOCALNEWS < 0.115, 0.00202221, 0.00726641), 0.0142841), -0.00307504)) +
+if (MAX_SCORE < 348857.0, if (DAY_PREV_DAY_HITS_FRAC < 0.725, -0.00419409, if (PUB_TODAY_AVG < 0.185, -0.00386261, if (ISTITLE_AVG < 0.705, if (BUSINESS < 0.21, 0.0063503, if (DAY_HITS_FRAC < 0.555, -0.0102176, 0.00356215)), -4.78923E-4))), 0.00498293) +
+if (MAX_SCORE < 286123.0, if (DAY_PD_HITS_RATIO < 48.0, if (ISTITLE_AVG < 0.61, if (MAX_RANK < 9.0, if (MAX_SCORE < 226208.0, -0.00741311, 0.0138247), if (AVG_RANK < 9.635, 3.60768E-4, -0.00785446)), -0.00329639), 0.0225017), 0.00337188) +
+if (DAY_LW_DAY_HITS_RATIO < 7.25, if (BUSINESS < 0.05, if (ISTITLE_AVG < 0.895, if (MIN_RANK < 1.0, 0.0175483, 0.00215143), -0.00158754), if (DAY_WEEK_AVG_RATIO < 0.325, 0.0165492, if (MAX_SCORE < 448185.0, -0.00386364, 0.0131047))), 0.00394983) +
+if (MAX_SCORE < 271407.0, if (MAX_SCORE < 177474.0, -0.00525936, if (SPORTS < 0.645, -0.00170921, 0.00426429)), if (DAY_PD_HITS_RATIO < 0.085, 0.0151019, if (BUSINESS < 0.645, if (LW_DAY_HITS < 2.5, 0.00244345, -0.0099429), -0.00501617))) +
+if (DAY_PREV_DAY_HITS_FRAC < 0.945, if (ISTITLE_AVG < 0.95, if (BUSINESS < 0.235, 0.0017119, -0.00331729), -0.00374611), if (WEEKAVG < 0.215, -0.004784, if (DAY_PD_HITS_RATIO < 0.145, -0.00631232, if (BUSINESS < 0.685, 0.00413018, -0.00259307)))) +
+if (PUB_TODAY_AVG < 0.87, if (BUSINESS < 0.235, if (ISTITLE_AVG < 0.39, if (WEEKAVG < 7.855, if (SUPERDUPER_AVG < 0.115, 0.00505563, -3.98588E-4), 0.017327), -0.00158729), -0.00349104), if (WEEKAVG < 0.36, -0.00122032, 0.00412986)) +
+if (MAX_SCORE < 235342.0, -0.00255699, if (ISABSTRACT_AVG < 0.13, if (WEEKAVG < 0.215, -0.00736871, if (LOCALNEWS < 0.775, if (SUPERDUPER_AVG < 0.315, 0.00356548, if (WEEKAVG < 7.07, -0.00254331, 0.00782112)), 0.0187086)), -0.0015245)) +
+if (DAY_PREV_DAY_HITS_FRAC < 0.825, -0.0017775, if (WEEKAVG < 0.36, -0.00236106, if (MAX_MIN_RANK < 3.0, if (PUB_TODAY_AVG < 0.27, -0.00108329, if (DAY_WEEK_AVG_RATIO < 2.615, if (MAX_SCORE < 248412.0, 0.00662755, 0.0252786), 0.00570542)), 0.00183161))) +
+if (DAY_PREV_DAY_HITS_FRAC < 0.725, if (MAX_SCORE < 453346.0, if (PREV_DAY_HITS < 15.5, -0.00263045, 0.00362606), 0.0113911), if (DAY_LW_DAY_HITS_RATIO < 47.0, if (MAX_SCORE < 214610.0, -0.00305392, if (PREV_DAY_HITS < 46.5, 0.00171595, 0.0157708)), 0.0123294)) +
+if (PUB_TODAY_AVG < 0.815, if (BUSINESS < 0.05, if (PUB_TODAY_AVG < 0.155, -0.00239543, if (PREV_DAY_HITS < 0.5, -0.00219916, 0.00267906)), -0.00274426), if (ISTITLE_AVG < 0.95, if (DAY_PD_HITS_RATIO < 1.445, -0.012251, 0.00474059), -6.50252E-4)) +
+if (ISABSTRACT_AVG < 0.105, if (MAX_SCORE < 235080.0, -0.00337944, if (SUPERDUPER_AVG < 0.105, if (BUSINESS < 0.435, if (MAX_SCORE < 293262.0, 0.00942708, 0.00296784), -0.00165307), if (MAX_SCORE < 262829.0, -0.00745914, 0.0011197))), -0.0017808) +
+if (MAX_SCORE < 347080.0, if (DAY_LW_DAY_HITS_RATIO < 4.31, if (NATIONALNEWS < 0.295, -0.00181733, 0.00242649), if (MAX_SCORE < 313528.0, if (LOCALNEWS < 0.53, if (TOPSTORY < 0.355, 0.00109569, 0.00947164), 0.0165664), -0.00846682)), 0.00293581) +
+if (MAX_SCORE < 177806.0, -0.00360187, if (TOPSTORY < 0.295, if (LOCALNEWS < 0.765, 4.80638E-6, if (ISTITLE_AVG < 0.29, 0.0164568, 0.00112041)), if (INTLNEWS < 0.355, 0.00153933, if (WEEKAVG < 0.36, -0.00129083, 0.0150131)))) +
+if (ISTITLE_AVG < 0.73, if (BUSINESS < 0.27, if (MAX_MIN_RANK < 9.0, if (SUPERDUPER_AVG < 0.315, if (PREV_DAY_HITS < 17.5, 0.00272769, 0.0136338), -3.41266E-4), 0.0154743), if (NATIONALNEWS < 0.21, -0.0029607, 0.0128593)), -0.00131249) +
+if (DAY_LW_DAY_HITS_RATIO < 7.585, if (MAX_SCORE < 424137.0, if (DAY_WEEK_AVG_RATIO < 4.78, -8.74675E-4, -0.0111332), if (ENTERTAINMENT < 0.12, if (DAY_LW_DAY_HITS_RATIO < 2.5, if (DAY_WEEK_AVG_RATIO < 0.74, 0.00976536, 0.0306272), 0.00520021), 8.67293E-5)), 0.00333736) +
+if (DAY_PD_HITS_RATIO < 0.085, 0.00972107, if (SPORTS < 0.845, if (PUB_TODAY_AVG < 0.95, if (PREV_DAY_HITS < 0.5, -0.00391231, -2.54135E-4), if (MAX_MIN_RANK < 7.0, if (DAY_LW_DAY_HITS_RATIO < 19.5, -1.75771E-4, 0.00523989), 0.0109531)), 0.00548354)) +
+if (MAX_SCORE < 466894.0, if (NATIONALNEWS < 0.21, if (DAY_PD_HITS_RATIO < 0.055, 0.0159556, if (REGIONALNEWS < 0.05, -0.00112302, if (DAY_PD_HITS_RATIO < 1.105, 0.0140125, -7.24566E-4))), if (AVG_RANK < 8.1, -2.73744E-4, 0.00546871)), 0.00555251) +
+if (MAX_SCORE < 286123.0, -8.23047E-4, if (DAY_PREV_DAY_HITS_FRAC < 0.435, if (DAY_WEEK_AVG_RATIO < 1.555, 0.0222361, 1.89447E-4), if (MAX_MIN_RANK < 7.0, if (DAY_PREV_DAY_HITS_FRAC < 0.755, -0.00264164, 0.00200226), if (WEEKAVG < 1.07, 0.0151659, 0.00171852)))) +
+if (BUSINESS < 0.05, if (SUPERDUPER_AVG < 0.115, if (ISTITLE_AVG < 0.895, 0.00536839, 7.53571E-5), if (AVG_RANK < 8.21, if (PUB_TODAY_AVG < 0.13, -0.00865216, if (DAY_HITS_FRAC < 0.115, 0.0146316, -0.00249)), 0.00159523)), -0.00131884) +
+if (DAY_LW_DAY_HITS_RATIO < 33.5, if (LIFESTYLE < 0.05, if (DAY_PD_HITS_RATIO < 0.065, 0.0130228, if (SPORTS < 0.39, if (NATIONALNEWS < 0.05, -0.00128992, 0.00127302), if (DAY_PD_HITS_RATIO < 13.5, 0.00304904, -0.0168329))), -0.00545277), 0.00512552) +
+if (DAY_HITS_FRAC < 0.765, -5.27346E-4, if (PUB_TODAY_AVG < 0.355, -0.0153305, if (DAY_HITS < 46.5, if (DAY_PD_HITS_RATIO < 29.5, if (NATIONALNEWS < 0.105, 7.3747E-4, if (DAY_WEEK_AVG_RATIO < 8.47, 0.00769293, -0.0125825)), -0.0108761), 0.00977691))) +
+if (MAX_SCORE < 177732.0, -0.00260643, if (BUSINESS < 0.05, if (WEEKAVG < 0.215, -0.00327106, if (AVG_RANK < 8.635, if (SUPERDUPER_AVG < 0.235, if (ISABSTRACT_AVG < 0.415, 0.00414333, -0.00152725), -0.00286672), 0.00429432)), -4.07557E-4)) +
+if (WEEKAVG < 0.64, if (SUPERDUPER_AVG < 0.29, -1.3784E-4, -0.00368109), if (MAX_SCORE < 271407.0, if (MAX_MIN_RANK < 5.0, 9.85637E-4, if (SUPERDUPER_AVG < 0.115, if (DAY_LW_DAY_HITS_RATIO < 4.415, -0.00258674, 0.00694569), -0.00593057)), 0.00237623)) +
+if (MAX_SCORE < 177732.0, -0.00248172, if (LIFESTYLE < 0.13, if (PUB_TODAY_AVG < 0.105, if (DAY_HITS < 3.5, -7.2429E-4, -0.0150678), if (DAY_HITS_FRAC < 0.075, 0.0156611, if (BUSINESS < 0.05, 0.00219968, -3.65826E-4))), -0.00592673)) +
+if (WEEKAVG < 0.215, -0.0047613, if (SPORTS < 0.355, if (MIN_RANK < 5.0, if (MAX_SCORE < 467877.0, -0.0025312, if (MAX_SCORE < 576366.0, 0.0134173, -0.00903108)), 3.0441E-4), if (WEEKAVG < 5.07, 0.00170865, 0.0116233))) +
+if (WEEKAVG < 0.215, -0.00445856, if (DAY_PREV_DAY_HITS_FRAC < 0.725, if (MAX_SCORE < 459781.0, -0.00110273, 0.01224), if (SPORTS < 0.87, if (NATIONALNEWS < 0.05, 1.76374E-4, if (BUSINESS < 0.185, 0.00101462, 0.0103262)), 0.00758848))) +
+if (MAX_SCORE < 588664.0, if (MAX_SCORE < 453568.0, if (PREV_DAY_HITS < 26.5, -7.786E-5, if (WEEKAVG < 9.215, -0.012221, -0.00126183)), if (DAY_PREV_DAY_HITS_FRAC < 0.555, 0.0175351, if (AVG_RANK < 9.7, 7.47189E-5, 0.0152525))), -0.0113374) +
+if (TOPSTORY < 0.295, -3.12071E-4, if (MAX_MIN_RANK < 7.0, if (ISTITLE_AVG < 0.185, if (MAX_SCORE < 378124.0, 0.00111897, if (MAX_SCORE < 408027.0, -0.0203516, 0.0012991)), if (INTLNEWS < 0.13, 7.74937E-4, 0.00732047)), 0.0117253)) +
+if (MAX_SCORE < 178085.0, -0.00220705, if (WEEKAVG < 6.64, if (SUPERDUPER_AVG < 0.105, if (DAY_PD_HITS_RATIO < 0.115, 0.0149532, 0.00106296), if (AVG_RANK < 7.73, if (PUB_TODAY_AVG < 0.13, -0.0104993, -0.00177497), 2.5899E-7)), 0.00418893)) +
+if (ISTITLE_AVG < 0.585, if (AVG_RANK < 8.47, 0.00253086, if (DAY_HITS_FRAC < 0.885, if (LOCALNEWS < 0.13, -8.94801E-4, -0.00988189), if (NATIONALNEWS < 0.11, if (MAX_SCORE < 282066.0, 0.00689219, -0.00300841), 0.0150141))), -0.00102603) +
+if (DAY_PREV_DAY_HITS_FRAC < 0.985, -8.43826E-4, if (MIN_RANK < 9.0, if (AVG_RANK < 8.71, 4.51436E-4, if (SUPERDUPER_AVG < 0.27, if (DAY_WEEK_AVG_RATIO < 5.05, 0.00639888, 0.020614), if (NATIONALNEWS < 0.185, -0.00149465, 0.0118779))), -0.00241922)) +
+if (HEALTH < 0.105, if (DAY_PREV_DAY_HITS_FRAC < 0.725, if (DAY_PREV_DAY_HITS_FRAC < 0.405, if (INTLNEWS < 0.315, 0.00140618, 0.0145332), -0.00130877), if (WEEKAVG < 0.5, -6.96011E-4, if (MAX_MIN_RANK < 5.0, 0.0028215, -9.0855E-4))), -0.0059993) +
+if (SPORTS < 0.585, if (TOPSTORY < 0.295, -3.94764E-4, if (ENTERTAINMENT < 0.05, 0.00143724, 0.00930005)), if (AVG_RANK < 5.55, if (DAY_WEEK_AVG_RATIO < 1.955, 0.00377635, 0.0210534), if (MAX_SCORE < 389202.0, 0.00246072, -0.012129))) +
+if (MAX_SCORE < 406793.0, if (DAY_PD_HITS_RATIO < 0.075, 0.0102381, if (PUB_TODAY_AVG < 0.295, -0.00209613, if (MAX_SCORE < 305867.0, 9.38554E-4, if (MAX_SCORE < 347812.0, -0.00625349, -3.2361E-5)))), if (PREV_DAY_HITS < 17.5, 0.00426042, -0.0139803)) +
+if (MAX_SCORE < 187757.0, -0.00190196, if (NATIONALNEWS < 0.185, 1.08423E-5, if (PREV_DAY_HITS < 4.5, if (ISTITLE_AVG < 0.585, 0.00687766, 0.00142303), if (SUPERDUPER_AVG < 0.275, -0.00581088, if (INTLNEWS < 0.315, 0.0130163, -0.00562813))))) +
+if (MAX_SCORE < 423724.0, if (MAX_SCORE < 408911.0, -3.03869E-4, -0.00754368), if (MAX_SCORE < 435668.0, 0.0194021, if (DAY_HITS < 5.5, if (AVG_RANK < 9.265, 0.00209562, 0.0171146), if (MAX_SCORE < 466889.0, -0.0147582, 0.00191369)))) +
+if (PREV_DAY_HITS < 26.5, if (PREV_DAY_HITS < 19.5, 4.07731E-6, if (ISTITLE_AVG < 0.7, 0.0180989, 0.0014322)), if (MAX_SCORE < 378124.0, if (INTLNEWS < 0.25, -0.00926901, if (ISTITLE_AVG < 0.15, 0.00951019, -0.00389496)), -0.0168153)) +
+if (ISABSTRACT_AVG < 0.815, if (PUB_TODAY_AVG < 0.05, if (PREV_DAY_HITS < 16.5, -0.00256108, 0.010687), if (BUSINESS < 0.05, 0.00107951, -0.00114831)), if (AVG_RANK < 8.31, -4.90289E-4, if (SPORTS < 0.315, 0.00273855, 0.0123011))) +
+if (SUPERDUPER_AVG < 0.115, if (DAY_PD_HITS_RATIO < 0.115, 0.0119548, 4.25021E-4), if (INTLNEWS < 0.155, if (ISTITLE_AVG < 0.185, if (INTLNEWS < 0.05, -0.00395117, -0.0145832), -0.00135759), if (TOPSTORY < 0.295, -0.00119962, 0.00380053))) +
+if (MAX_SCORE < 187608.0, -0.00129909, if (DAY_WEEK_AVG_RATIO < 10.06, if (DAY_WEEK_AVG_RATIO < 9.235, if (DAY_PD_HITS_RATIO < 0.055, 0.0114518, if (PREV_DAY_HITS < 26.5, 9.65212E-4, if (LOCALNEWS < 0.05, -0.00805593, 0.00585007))), -0.0101744), 0.010206)) +
+if (DAY_LW_DAY_HITS_RATIO < 4.71, if (SUPERDUPER_AVG < 0.315, -2.36511E-4, -0.00312389), if (DAY_PD_HITS_RATIO < 13.5, if (MAX_SCORE < 253372.0, -0.00118965, 0.00291415), if (ENTERTAINMENT < 0.05, if (SPORTS < 0.315, -0.00292663, -0.0194296), 0.00658386))) +
+if (DAY_PREV_DAY_HITS_FRAC < 0.405, if (WEEKAVG < 0.5, 0.0176531, 9.11096E-4), if (SPORTS < 0.39, -6.82618E-4, if (DAY_PD_HITS_RATIO < 11.5, if (DAY_LW_DAY_HITS_RATIO < 4.75, 5.16855E-4, if (DAY_HITS_FRAC < 0.41, 0.0159972, 0.00324363)), -0.0108331))) +
+if (WEEKAVG < 0.36, -0.00257521, if (TOPSTORY < 0.635, if (PUB_TODAY_AVG < 0.79, if (DAY_PD_HITS_RATIO < 2.185, if (DAY_PREV_DAY_HITS_FRAC < 0.415, 0.00607155, -1.26015E-4), if (ISTITLE_AVG < 0.13, -0.00728662, -9.28754E-4)), 0.00147343), 0.014873)) +
+if (TOPSTORY < 0.185, -2.97667E-4, if (DAY_LW_DAY_HITS_RATIO < 6.3, if (PREV_DAY_HITS < 19.5, if (DAY_PD_HITS_RATIO < 0.13, -0.00702476, if (LOCALNEWS < 0.05, 0.00592136, -7.83801E-4)), if (ISABSTRACT_AVG < 0.15, 0.023326, 8.03551E-4)), -0.00103664)) +
+if (WEEKAVG < 0.215, -0.00379646, if (MAX_MIN_RANK < 3.0, if (DAY_HITS < 1.5, -0.00199037, if (DAY_PD_HITS_RATIO < 1.125, if (DAY_LW_DAY_HITS_RATIO < 4.375, 0.00535447, if (DAY_PD_HITS_RATIO < 0.825, 0.00562457, 0.0330072)), 0.00138881)), -7.58841E-4)) +
+if (DAY_HITS_FRAC < 0.435, if (NATIONALNEWS < 0.685, if (INTLNEWS < 0.47, -2.35511E-4, if (MAX_SCORE < 290762.0, 6.19978E-4, if (SUPERDUPER_AVG < 0.155, 0.0182407, 0.00521312))), 0.0140779), if (MAX_SCORE < 484643.0, -5.18234E-4, -0.00804112)) +
+if (DAY_PD_HITS_RATIO < 0.055, 0.0111333, if (DAY_LW_DAY_HITS_RATIO < 0.355, -0.00829529, if (PUB_TODAY_AVG < 0.95, if (DAY_PD_HITS_RATIO < 1.74, 5.29497E-4, if (NATIONALNEWS < 0.415, -0.00200727, 0.0081622)), if (INTLNEWS < 0.47, 0.00260098, -0.001284)))) +
+if (MAX_MIN_RANK < 7.0, if (MAX_MIN_RANK < 5.0, 3.81058E-4, if (SUPERDUPER_AVG < 0.13, 3.29065E-4, -0.00386397)), if (MAX_SCORE < 266105.0, -5.80382E-4, if (MAX_SCORE < 322321.0, if (DAY_WEEK_AVG_RATIO < 5.0, 0.0158417, 2.53264E-4), 0.00216101))) +
+if (WEEKAVG < 0.215, if (SUPERDUPER_AVG < 0.5, if (LOCALNEWS < 0.5, -0.00995113, 0.00292683), 0.00105182), if (DAY_PREV_DAY_HITS_FRAC < 0.725, -5.75584E-4, if (TOPSTORY < 0.585, if (SPORTS < 0.87, 7.83846E-4, 0.00745576), 0.0129932))) +
+if (HEALTH < 0.115, if (DAY_PD_HITS_RATIO < 25.165, if (DAY_WEEK_AVG_RATIO < 10.115, if (DAY_PREV_DAY_HITS_FRAC < 0.405, 0.00322116, -1.4541E-4), 0.00878821), if (DAY_PREV_DAY_HITS_FRAC < 0.975, -0.0149181, -0.00209673)), if (MAX_SCORE < 286434.0, -0.00861656, 0.00142851)) +
+if (LIFESTYLE < 0.185, if (MISC < 0.105, if (DAY_LW_DAY_HITS_RATIO < 0.925, if (MAX_SCORE < 273352.0, if (ISTITLE_AVG < 0.39, 0.00606893, -0.00394074), -0.012762), -8.2932E-5), 0.00878689), if (MAX_SCORE < 250603.0, -0.00131893, -0.0107682)) +
+if (DAY_HITS < 13.5, 1.03863E-4, if (ENTERTAINMENT < 0.415, if (MIN_RANK < 7.0, if (DAY_PREV_DAY_HITS_FRAC < 0.875, if (MAX_SCORE < 261175.0, -0.00601924, 0.00517774), -0.00731704), if (ISABSTRACT_AVG < 0.685, -0.00243371, 0.0102497)), 0.0109447)) +
+if (DAY_WEEK_AVG_RATIO < 4.855, if (DAY_WEEK_AVG_RATIO < 4.625, 3.35357E-4, 0.00823829), if (NATIONALNEWS < 0.39, if (ISABSTRACT_AVG < 0.295, if (INTLNEWS < 0.95, if (NATIONALNEWS < 0.155, -0.00576638, 0.00314375), 0.0086362), 0.00191928), -0.0129199)) +
+if (WEEKAVG < 1.07, -6.88613E-4, if (INTLNEWS < 0.355, if (POLITICS < 0.05, if (PREV_DAY_HITS < 33.5, 8.33826E-4, -0.0106428), -0.0100621), if (DAY_HITS < 5.5, if (DAY_PD_HITS_RATIO < 0.105, -0.00557824, 0.0173808), 0.00181211))) +
+if (PUB_TODAY_AVG < 0.815, -6.47154E-4, if (DAY_PD_HITS_RATIO < 1.53, -0.00676558, if (SCIENCE < 0.05, if (PREV_DAY_HITS < 5.5, if (SUPERDUPER_AVG < 0.27, if (ISTITLE_AVG < 0.95, 0.00474205, -0.00112826), -0.00194945), 0.00633626), 0.0126675))) +
+if (MAX_SCORE < 347896.0, if (NUM_WORDS < 2.5, -8.59477E-5, -0.00464466), if (LOCALNEWS < 0.105, if (PREV_DAY_HITS < 17.5, 8.61947E-4, -0.00908692), if (SUPERDUPER_AVG < 0.415, if (SPORTS < 0.125, 0.00451276, 0.0182081), -0.0128104))) +
+if (SCIENCE < 0.365, if (MAX_SCORE < 588664.0, if (SUPERDUPER_AVG < 0.115, 4.73474E-4, if (MAX_SCORE < 282998.0, -0.00203992, if (SCIENCE < 0.105, if (SPORTS < 0.465, 0.00173095, -0.00632811), -0.013829))), -0.0095913), -0.00990551) +
+if (NATIONALNEWS < 0.105, -6.2577E-4, if (SPORTS < 0.13, if (DAY_WEEK_AVG_RATIO < 9.235, if (DAY_WEEK_AVG_RATIO < 0.505, 0.00990844, 2.4663E-4), -0.0117063), if (MAX_SCORE < 277259.0, if (DAY_WEEK_AVG_RATIO < 1.955, -0.00262119, 0.0102735), 0.0198781))) +
+if (MAX_SCORE < 382346.0, if (MAX_SCORE < 378950.0, -2.01382E-4, -0.0116932), if (MAX_SCORE < 385719.0, 0.0202474, if (AVG_RANK < 8.27, if (PREV_DAY_HITS < 9.5, 3.03439E-4, -0.0119779), if (ENTERTAINMENT < 0.315, 0.00225595, 0.0201995)))) +
+if (DAY_PD_HITS_RATIO < 47.0, if (DAY_PD_HITS_RATIO < 27.25, if (DAY_LW_DAY_HITS_RATIO < 31.5, if (LOCALNEWS < 0.765, -3.99432E-4, 0.00362509), if (DAY_WEEK_AVG_RATIO < 3.98, -0.00769823, if (DAY_WEEK_AVG_RATIO < 5.4, 0.018687, 0.00240302))), -0.00932172), 0.00902439) +
+if (LW_DAY_HITS < 2.5, if (MIN_RANK < 7.0, -1.57235E-4, if (DAY_PREV_DAY_HITS_FRAC < 0.555, if (INTLNEWS < 0.225, 0.00402381, 0.0196079), if (DAY_WEEK_AVG_RATIO < 6.01, if (DAY_WEEK_AVG_RATIO < 3.805, 2.36934E-4, 0.00609122), -0.0040371))), -0.00301446) +
+if (DAY_PD_HITS_RATIO < 0.085, if (AVG_RANK < 8.9, 0.0124264, 1.8696E-4), if (PREV_DAY_HITS < 26.5, if (PREV_DAY_HITS < 19.5, -3.01051E-5, if (MAX_SCORE < 294152.0, 0.010709, -0.00331498)), if (WEEKAVG < 10.785, -0.0102542, -1.3417E-4))) +
+if (DAY_WEEK_AVG_RATIO < 10.06, if (DAY_WEEK_AVG_RATIO < 4.835, -7.25075E-5, if (ENTERTAINMENT < 0.05, if (ISABSTRACT_AVG < 0.83, if (INTLNEWS < 0.635, -0.00614185, if (MAX_MIN_RANK < 1.0, -0.00349054, 0.0109974)), 0.0043811), 0.00439437)), 0.00656011) +
+if (INTLNEWS < 0.185, if (INTLNEWS < 0.115, if (DAY_PD_HITS_RATIO < 0.105, 0.016428, if (AVG_RANK < 8.365, if (ISTITLE_AVG < 0.435, if (DAY_PD_HITS_RATIO < 11.25, 0.00358374, -0.0162655), -7.99475E-4), -0.00198299)), -0.00662532), 8.7188E-4) +
+if (WEEKAVG < 0.64, -4.7051E-4, if (DAY_PREV_DAY_HITS_FRAC < 0.965, 2.24471E-4, if (BUSINESS < 0.53, if (LOCALNEWS < 0.27, 0.00103714, if (NATIONALNEWS < 0.155, 0.0140465, -0.00245531)), if (BUSINESS < 0.645, 0.0237968, 0.00736313)))) +
+if (LOCALNEWS < 0.05, -5.73509E-4, if (MAX_SCORE < 253515.0, -0.00114612, if (SUPERDUPER_AVG < 0.315, if (MAX_SCORE < 255248.0, 0.0174812, if (PUB_TODAY_AVG < 0.05, -0.00327708, if (DAY_PD_HITS_RATIO < 0.425, 0.0118621, 0.0033546))), -0.00154643))) +
+if (MISC < 0.105, if (DAY_WEEK_AVG_RATIO < 10.06, if (DAY_WEEK_AVG_RATIO < 9.235, if (MISC < 0.05, if (WEEKAVG < 0.215, -0.00242466, if (DAY_PREV_DAY_HITS_FRAC < 0.985, -9.73666E-5, 0.00142133)), -0.00753159), -0.00863543), 0.0086817), 0.0102311) +
+if (DAY_PD_HITS_RATIO < 47.5, if (DAY_PD_HITS_RATIO < 32.5, if (DAY_PREV_DAY_HITS_FRAC < 0.405, if (PUB_TODAY_AVG < 0.155, if (DAY_WEEK_AVG_RATIO < 0.67, -0.00300426, 0.0155993), -1.60259E-4), -3.24256E-4), -0.0118989), if (MAX_SCORE < 286380.0, 0.0153486, 6.7452E-4)) +
+if (SUPERDUPER_AVG < 0.105, 9.4466E-4, if (MAX_SCORE < 277301.0, if (ISABSTRACT_AVG < 0.39, if (DAY_PREV_DAY_HITS_FRAC < 0.795, -9.24515E-4, if (SPORTS < 0.275, if (ENTERTAINMENT < 0.05, -0.00695203, 0.00896934), -0.0197272)), 1.67123E-4), 5.7116E-4)) +
+if (DAY_WEEK_AVG_RATIO < 3.635, if (DAY_LW_DAY_HITS_RATIO < 21.25, if (PREV_DAY_HITS < 19.5, -1.99036E-4, if (INTLNEWS < 0.25, -0.00106649, 0.0081603)), -0.0104178), if (DAY_WEEK_AVG_RATIO < 3.845, if (DAY_HITS_FRAC < 0.94, 0.00340684, 0.0209321), 0.00113853)) +
+if (REGIONALNEWS < 0.275, if (LAW < 0.105, if (DAY_WEEK_AVG_RATIO < 6.01, 1.36175E-4, if (NATIONALNEWS < 0.31, if (DAY_HITS_FRAC < 0.895, 0.00575266, if (AVG_RANK < 7.53, -0.0119194, -0.00112094)), -0.0166441)), 0.00794833), -0.0103064) +
+if (TOPSTORY < 0.315, -7.18271E-4, if (DAY_PD_HITS_RATIO < 0.13, -0.0105571, if (DAY_WEEK_AVG_RATIO < 5.82, if (DAY_WEEK_AVG_RATIO < 4.82, if (DAY_LW_DAY_HITS_RATIO < 11.5, 0.00414548, -0.010294), 0.0157636), if (ISABSTRACT_AVG < 0.135, -0.0110257, 0.00663564)))) +
+if (MAX_SCORE < 362776.0, if (MAX_SCORE < 361504.0, 1.23708E-4, 0.0215766), if (INTLNEWS < 0.155, if (WEEKAVG < 2.36, -0.00603082, 0.00760337), if (TOPSTORY < 0.275, -7.04669E-4, if (PUB_TODAY_AVG < 0.86, 0.0139844, -0.00288551)))) +
+if (REGIONALNEWS < 0.115, if (DAY_PREV_DAY_HITS_FRAC < 0.725, if (DAY_LW_DAY_HITS_RATIO < 21.25, if (MAX_SCORE < 242944.0, 4.82612E-4, -0.00179648), -0.0123423), 6.23923E-4), if (DAY_HITS < 4.5, if (DAY_PREV_DAY_HITS_FRAC < 0.87, 0.00798437, -0.0109132), 0.0140617)) +
+if (MAX_SCORE < 322221.0, 4.09287E-4, if (MAX_SCORE < 334601.0, -0.00880555, if (AVG_RANK < 7.58, if (SUPERDUPER_AVG < 0.295, -0.00215568, -0.0124233), if (NATIONALNEWS < 0.21, 0.00160963, if (ISABSTRACT_AVG < 0.185, -0.00703363, 0.00777402))))) +
+if (ENTERTAINMENT < 0.21, -1.9687E-4, if (DAY_PD_HITS_RATIO < 15.75, if (SUPERDUPER_AVG < 0.415, if (MAX_MIN_RANK < 3.0, -0.00268736, if (MAX_MIN_RANK < 5.0, 0.0078927, if (DAY_PD_HITS_RATIO < 2.835, 0.00387939, -0.00410318))), -0.00538175), 0.0142121)) +
+if (SPORTS < 0.415, -3.44051E-5, if (MAX_MIN_RANK < 1.0, -0.00501256, if (SUPERDUPER_AVG < 0.05, if (MAX_SCORE < 229196.0, -1.5078E-4, if (MAX_SCORE < 258856.0, if (ISTITLE_AVG < 0.355, 0.0314869, 0.00734956), 0.00421683)), 8.4287E-4))) +
+if (MAX_MIN_RANK < 9.0, if (MAX_SCORE < 382719.0, if (MAX_SCORE < 362503.0, 3.58027E-5, if (MAX_SCORE < 364403.0, -0.0154942, -0.00276027)), 0.00210644), if (WEEKAVG < 1.36, if (MAX_SCORE < 269970.0, -0.00118638, 0.0203373), -0.00169747)) +
+if (PREV_DAY_HITS < 26.5, if (MAX_SCORE < 187757.0, -0.00124276, 4.03197E-4), if (DAY_PREV_DAY_HITS_FRAC < 0.795, if (DAY_PREV_DAY_HITS_FRAC < 0.675, -0.00288805, 0.00935152), if (WEEKAVG < 11.93, if (DAY_PD_HITS_RATIO < 0.27, -0.00585669, -0.0173104), 0.003142))) +
+if (MAX_MIN_RANK < 7.0, -5.68858E-4, if (PUB_TODAY_AVG < 0.885, if (TOPSTORY < 0.27, if (INTLNEWS < 0.365, -0.0025302, if (AVG_RANK < 6.9, -0.00341549, 0.00979915)), 0.0137197), if (DAY_HITS < 10.5, 0.0179211, -2.30543E-4))) +
+if (MIN_RANK < 7.0, -6.2463E-4, if (DAY_PREV_DAY_HITS_FRAC < 0.475, if (AVG_RANK < 9.745, 0.0142049, -3.73764E-4), if (PUB_TODAY_AVG < 0.27, if (MAX_SCORE < 276860.0, -0.00522975, 0.00307189), if (PREV_DAY_HITS < 1.5, 2.5413E-4, 0.00490254)))) \ No newline at end of file
diff --git a/searchlib/src/test/files/ranking04.expression b/searchlib/src/test/files/ranking04.expression
new file mode 100644
index 00000000000..087d305cd95
--- /dev/null
+++ b/searchlib/src/test/files/ranking04.expression
@@ -0,0 +1,103 @@
+if (AVG_SCORE < 236210.0, if (AVG_SCORE < 151678.0, 0.205803, 0.214904), if (ISABSTRACT_AVG < 0.155, if (WEEKAVG < 0.93, if (TOPSTORY < 0.365, 0.234927, 0.262252), if (MAX_SCORE < 271454.0, 0.236303, 0.251477)), 0.224603)) +
+if (AVG_SCORE < 240282.0, if (AVG_SCORE < 153656.0, -0.0157043, -0.00743688), if (ISTITLE_AVG < 0.705, if (WEEKAVG < 0.93, 0.0116703, if (MAX_SCORE < 266499.0, 0.00213746, if (TWO_DAY_WF < 0.826656, 0.0214705, 0.0350738))), 0.00284844)) +
+if (AVG_SCORE < 239849.0, if (AVG_SCORE < 230612.0, -0.0105243, -2.07603E-4), if (ISTITLE_AVG < 0.95, if (MAX_MIN_SCORE < 36505.8, if (WEEKAVG < 10.925, 0.0103073, 0.0445006), if (MAX_SCORE < 267687.0, 0.00115576, 0.023751)), 0.00109943)) +
+if (AVG_SCORE < 242149.0, if (AVG_SCORE < 153383.0, -0.0131014, if (WEEKAVG < 1.5, -0.00720755, -3.77073E-4)), if (ISTITLE_AVG < 0.705, if (MAX_MIN_SCORE < 36505.0, if (BUSINESS < 0.13, 0.0164936, 0.00560036), 0.0218971), 0.00392608)) +
+if (MAX_SCORE < 264139.0, if (MIN_SCORE < 222136.0, -0.0079708, 0.00140823), if (ISABSTRACT_AVG < 0.315, if (DAY_WEEK_AVG_DERIV < 10.5, if (BUSINESS < 0.105, 0.016512, 0.00726199), 0.0306897), if (SPORTS < 0.42, -9.76569E-4, 0.0183973))) +
+if (AVG_SCORE < 231394.0, -0.00698348, if (ISTITLE_AVG < 0.645, if (MAX_SCORE < 271880.0, 0.00669893, if (AVG_SCORE < 281369.0, 0.0209096, 0.0117951)), if (MIN_SCORE < 318875.0, if (WEEKAVG < 1.5, -0.00428011, 0.00596324), 0.0116652))) +
+if (MIN_SCORE < 222028.0, if (SUPERDUPER_AVG < 0.27, -0.00761706, if (INTLNEWS < 0.535, -0.00274344, 0.0225782)), if (WEEKAVG < 0.93, if (ISTITLE_AVG < 0.71, 0.00689051, -0.00500438), if (MIN_SCORE < 319119.0, 0.00977814, 0.0200288))) +
+if (MIN_SCORE < 222028.0, if (AVG_SCORE < 158974.0, -0.00918892, if (NUM_WORDS < 1.5, if (WEEKAVG < 2.93, -0.00158808, 0.0119896), -0.00568155)), if (ISTITLE_AVG < 0.95, if (ISABSTRACT_AVG < 0.155, 0.0116413, 0.00150493), -0.00110515)) +
+if (AVG_SCORE < 241264.0, if (MIN_SCORE < 132718.0, -0.00978209, if (WEEKAVG < 0.93, -0.00610293, -2.95273E-4)), if (ISABSTRACT_AVG < 0.185, if (DAY_LW_DAY_HITS_RATIO < 11.835, 0.00900634, 0.0221056), if (WEEKAVG < 1.07, -0.00392509, 0.00615921))) +
+if (AVG_SCORE < 233949.0, -0.00503156, if (ISTITLE_AVG < 0.645, if (TOPSTORY < 0.05, if (PUB_TODAY_AVG < 0.105, -0.00286006, if (DAY_PD_HITS_RATIO < 0.65, 0.0275142, 0.00616295)), 0.0129407), if (DAY_LW_DAY_HITS_DERIV < 7.5, -0.00186065, 0.00771893))) +
+if (AVG_SCORE < 241955.0, if (MAX_SCORE < 170767.0, -0.00748858, -0.00266952), if (MIN_SCORE < 321219.0, if (TOPSTORY < 0.05, -0.00143781, if (AVG_RANK < 9.735, 0.00571239, 0.0197833)), if (WEEKAVG < 0.93, 0.00690536, 0.017048))) +
+if (AVG_SCORE < 245333.0, if (TOPSTORY < 0.355, -0.00376047, 0.00963479), if (WEEKAVG < 0.93, 7.35298E-4, if (ISABSTRACT_AVG < 0.705, if (TWO_DAY_WF < 0.872534, if (MAX_MIN_SCORE < 52145.2, 0.00973324, -0.00849394), 0.0177153), 3.37073E-4))) +
+if (MIN_SCORE < 219800.0, -0.00352861, if (ISTITLE_AVG < 0.73, if (BUSINESS < 0.05, if (PREV_DAY_HITS < 7.5, if (PUB_TODAY_AVG < 0.11, 0.00174614, 0.0120552), 0.0184693), 0.00396387), if (DAY_LW_DAY_HITS_DERIV < 7.5, -0.00165207, 0.00838864))) +
+if (AVG_SCORE < 332830.0, if (AVG_SCORE < 221523.0, if (ISABSTRACT_AVG < 0.845, -0.00518819, if (MAX_MIN_SCORE < 41116.5, -0.0022638, 0.010561)), if (MIN_SCORE < 275020.0, if (ISABSTRACT_AVG < 0.13, 0.00532774, -0.00140785), -0.00673306)), 0.0089962) +
+if (AVG_SCORE < 230217.0, -0.00305127, if (WEEKAVG < 0.93, 7.79225E-4, if (BUSINESS < 0.05, if (LOCALNEWS < 0.47, if (MAX_MIN_SCORE < 28240.8, 0.0179967, 0.00806848), -0.00831202), if (AVG_SCORE < 340223.0, 2.31883E-4, 0.0140119)))) +
+if (AVG_SCORE < 245333.0, -0.00229594, if (ISABSTRACT_AVG < 0.315, if (LOCALNEWS < 0.05, if (DAY_LW_DAY_HITS_RATIO < 1.75, -0.0039683, 0.00569577), if (PREV_DAY_HITS < 8.5, 0.007769, if (DAY_HITS < 2.5, 0.0344185, 0.0117709))), -6.76423E-4)) +
+if (MAX_SCORE < 249988.0, if (INTLNEWS < 0.105, -0.00430418, if (AVG_SCORE < 158414.0, -0.00416318, if (MAX_SCORE < 242790.0, 0.00543383, -0.00915253))), if (ISABSTRACT_AVG < 0.155, 0.00464962, if (SPORTS < 0.365, -0.00278462, 0.0111898))) +
+if (AVG_SCORE < 249330.0, -0.00159136, if (WEEKAVG < 1.07, if (TOPSTORY < 0.07, -0.00243507, 0.00585214), if (TWO_DAY_WF < 0.9518, if (EIGHT_HOUR_WF < 0.108586, if (INTLNEWS < 0.42, 0.00435459, 0.0191599), -0.00770634), 0.013571))) +
+if (AVG_SCORE < 332253.0, if (TOPSTORY < 0.355, if (BUSINESS < 0.05, 6.41958E-4, -0.00274201), 0.00886024), if (DAY_PD_HITS_DERIV < 1.5, if (AVG_SCORE < 336554.0, 0.0191918, if (SUPERDUPER_AVG < 0.415, -0.00116436, 0.0183934)), 0.0116471)) +
+if (MAX_SCORE < 249072.0, if (INTLNEWS < 0.185, -0.00383726, if (ISABSTRACT_AVG < 0.61, -0.00202529, if (WEEKAVG < 0.785, -0.0038571, if (AVG_SCORE < 169471.0, 0.00474293, 0.0278332)))), if (BUSINESS < 0.05, 0.00491784, -7.28088E-4)) +
+if (AVG_SCORE < 223608.0, -0.00242896, if (PREV_DAY_HITS < 7.5, if (ISABSTRACT_AVG < 0.05, if (MAX_MIN_RANK < 7.0, 0.00151785, 0.0118374), -0.00165444), if (SPORTS < 0.34, if (DAY_WEEK_AVG_DERIV < -1.93, -0.00307953, 0.00717407), 0.0154963))) +
+if (MIN_SCORE < 319241.0, if (INTLNEWS < 0.73, if (TOPSTORY < 0.355, if (NUM_WORDS < 2.5, if (PREV_DAY_HITS < 3.5, -0.00228523, 0.00146239), -0.00850081), 0.00776825), 0.0160753), if (DAY_LW_DAY_HITS_DERIV < 12.5, 0.00439757, 0.0197836)) +
+if (INTLNEWS < 0.705, if (TOPSTORY < 0.355, if (MIN_SCORE < 323992.0, if (LIFESTYLE < 0.13, if (LOCALNEWS < 0.315, -2.91455E-4, -0.00459663), -0.00868291), if (MIN_SCORE < 325835.0, 0.0255955, 0.00222024)), 0.00689548), 0.0147049) +
+if (DAY_WEEK_AVG_DERIV < 41.5, if (AVG_SCORE < 222620.0, -0.00230434, if (ISTITLE_AVG < 0.95, if (DAY_LW_DAY_HITS_RATIO < 2.9, if (ISABSTRACT_AVG < 0.685, -9.8145E-4, 0.0175646), if (FOUR_HOUR_WF < 0.0415469, 0.00693887, 3.52143E-4)), -0.00149738)), 0.0156711) +
+if (BUSINESS < 0.105, if (DAY_WEEK_AVG_RATIO < 5.705, if (AVG_SCORE < 155902.0, -0.0033031, if (WEEKAVG < 0.64, if (MAX_SCORE < 363895.0, -0.00281287, if (MAX_MIN_SCORE < 19200.5, -0.00201482, 0.0209412)), 0.00313704)), 0.0198315), -0.0020926) +
+if (DAY_PD_HITS_DERIV < -8.5, if (SPORTS < 0.42, if (TOPSTORY < 0.05, -0.00256178, 0.0069554), 0.0189865), if (MAX_SCORE < 455757.0, if (LIFESTYLE < 0.13, if (DAY_WEEK_AVG_RATIO < 4.535, -0.00125806, 0.00573954), -0.00869664), 0.00982766)) +
+if (AVG_SCORE < 158740.0, -0.00306382, if (WEEKAVG < 0.93, if (TOPSTORY < 0.365, -0.00140654, 0.00834836), if (BUSINESS < 0.05, if (MAX_MIN_SCORE < 52064.2, if (MAX_MIN_RANK < 7.0, 0.00487329, 0.0143334), -0.00637212), -1.62153E-4))) +
+if (DAY_PD_HITS_DERIV < -4.5, if (LOCALNEWS < 0.355, if (HEALTH < 0.05, if (MAX_MIN_SCORE < 42320.2, 1.65828E-4, if (TWELVE_HOUR_WF < 0.0923295, 0.00978237, -0.00925785)), 0.0176032), -0.00980315), if (DAY_WEEK_AVG_DERIV < 65.215, -9.40015E-4, 0.0153051)) +
+if (INTLNEWS < 0.53, if (DAY_PD_HITS_RATIO < 0.305, if (SPORTS < 0.115, 2.04707E-4, if (MAX_SCORE < 258205.0, 0.00170055, if (AVG_SCORE < 263393.0, 0.0247726, 0.00690842))), -0.00116708), if (DAY_LW_DAY_HITS_DERIV < 6.5, -5.66203E-5, 0.0136829)) +
+if (TOPSTORY < 0.355, if (MAX_SCORE < 455757.0, if (PREV_DAY_HITS < 59.5, if (MIN_SCORE < 132399.0, -0.00370024, -2.34946E-5), 0.0131047), if (SUPERDUPER_AVG < 0.105, -0.00138025, 0.0159936)), if (AVG_RANK < 9.55, 0.00325951, 0.0248619)) +
+if (TOPSTORY < 0.21, if (PREV_DAY_HITS < 40.5, if (DAY_WEEK_AVG_RATIO < 2.665, -0.00132885, if (AVG_SCORE < 321396.0, 3.75419E-4, 0.0087578)), 0.0103933), if (MAX_SCORE < 258688.0, -0.00128842, if (DAY_LW_DAY_HITS_RATIO < 10.5, 0.00789361, -0.00472212))) +
+if (LIFESTYLE < 0.13, if (MAX_SCORE < 170767.0, -0.00265193, if (REGIONALNEWS < 0.225, if (INTLNEWS < 0.73, if (AVG_SCORE < 446461.0, if (DAY_LW_DAY_HITS_RATIO < 11.835, 3.02165E-4, 0.00420729), 0.0104384), 0.0112014), -0.0150576)), -0.00724807) +
+if (TOPSTORY < 0.21, if (LW_DAY_HITS < 0.5, -2.72826E-4, -0.0037519), if (MAX_SCORE < 249540.0, -0.00257574, if (DAY_WEEK_AVG_DERIV < 3.285, 0.00890149, if (BUSINESS < 0.05, if (EIGHT_HOUR_WF < 0.108586, -0.00485603, 0.0137625), -0.0117843)))) +
+if (BUSINESS < 0.05, if (PREV_DAY_HITS < 2.5, if (MAX_MIN_RANK < 9.0, -1.15002E-4, 0.013627), 0.00426589), if (SPORTS < 0.05, if (WEEKAVG < 1.07, -0.00209775, 0.00207151), if (MAX_SCORE < 282458.0, -0.00363773, -0.0170095))) +
+if (PREV_DAY_HITS < 6.5, if (INTLNEWS < 0.73, -8.40229E-4, 0.0123079), if (TWO_DAY_WF < 0.647854, -0.00158583, if (DAY_WEEK_AVG_RATIO < 0.525, -0.00426295, if (TWELVE_HOUR_WF < 0.0863095, 0.010427, if (WEEKAVG < 2.5, -0.00797465, 0.00511912))))) +
+if (DAY_WEEK_AVG_DERIV < 43.215, if (MAX_SCORE < 171575.0, -0.00279218, if (INTLNEWS < 0.73, if (LIFESTYLE < 0.13, if (AVG_RANK < 5.29, if (AVG_RANK < 4.145, -0.00292507, 0.0109271), 2.53288E-4), -0.00584756), 0.0132182)), 0.00991648) +
+if (DAY_WEEK_AVG_RATIO < 0.305, -0.00885189, if (NATIONALNEWS < 0.105, -5.31735E-4, if (TWELVE_HOUR_WF < 0.685185, if (SPORTS < 0.465, if (ISTITLE_AVG < 0.105, if (DAY_PD_HITS_RATIO < 0.365, -0.00269593, 0.0112221), 8.19631E-4), 0.0143634), -0.00725209))) +
+if (AVG_SCORE < 340384.0, if (AVG_SCORE < 336375.0, -5.08552E-4, -0.0113797), if (MIN_SCORE < 326287.0, 0.0209452, if (ONE_DAY_WF < 0.567629, -0.00150548, if (ENTERTAINMENT < 0.05, if (AVG_SCORE < 375038.0, 0.0210937, 0.00330692), -0.00420613)))) +
+if (AVG_SCORE < 245150.0, -7.56654E-4, if (FOUR_HOUR_WF < 0.436508, if (TWO_DAY_WF < 0.876894, if (DAY_HITS < 4.5, 0.00184962, -0.00446764), if (AVG_SCORE < 247846.0, 0.0195391, if (MAX_SCORE < 264008.0, -0.0119329, 0.0045953))), -0.0101769)) +
+if (TOPSTORY < 0.39, if (SPORTS < 0.73, -1.67518E-4, if (WEEKAVG < 0.785, 9.19437E-5, 0.00941928)), if (AVG_RANK < 9.55, if (AVG_RANK < 8.9, if (MAX_SCORE < 270912.0, 0.0142439, -7.62818E-4), -0.00563315), 0.019371)) +
+if (MAX_MIN_SCORE < 16050.8, -0.00187676, if (LW_DAY_HITS < 3.5, if (MAX_SCORE < 178349.0, -0.00168833, if (WEEKAVG < 0.93, -9.59413E-5, if (FOUR_HOUR_WF < 0.0415469, 0.00437212, if (MAX_MIN_SCORE < 26173.2, 0.013711, -0.00373247)))), -0.00746144)) +
+if (SPORTS < 0.73, if (INTLNEWS < 0.53, -6.62401E-4, if (TWO_DAY_WF < 0.564784, 0.0155579, if (DAY_WEEK_AVG_RATIO < 4.08, -0.00298146, 0.015513))), if (DAY_PD_HITS_RATIO < 0.31, 0.0153445, if (SUPERDUPER_AVG < 0.155, 0.00486013, -0.00508073))) +
+if (MAX_MIN_RANK < 7.0, -2.14923E-4, if (ISTITLE_AVG < 0.55, if (MAX_MIN_SCORE < 41838.0, if (MAX_SCORE < 235701.0, 0.00139705, 0.0257242), 0.00119704), if (NATIONALNEWS < 0.115, -0.00295678, if (MIN_SCORE < 211652.0, 4.9411E-4, 0.0246476)))) +
+if (SPORTS < 0.47, if (SPORTS < 0.05, 2.69559E-4, -0.00314174), if (MAX_SCORE < 187840.0, -0.00191667, if (ISABSTRACT_AVG < 0.415, if (MAX_MIN_RANK < 5.0, -0.00316349, 0.00674809), if (PREV_DAY_HITS < 2.5, 0.00653246, 0.0230973)))) +
+if (NATIONALNEWS < 0.105, -8.06543E-4, if (DAY_PD_HITS_RATIO < 6.75, if (DAY_WEEK_AVG_RATIO < 3.505, if (MIN_RANK < 1.0, 0.0183563, if (NATIONALNEWS < 0.13, 0.0095701, 0.00111755)), 0.0178329), if (MAX_MIN_SCORE < 42048.8, 0.00161585, -0.0115306))) +
+if (DAY_LW_DAY_HITS_RATIO < 2.225, if (ONE_DAY_WF < 0.370833, if (ENTERTAINMENT < 0.415, -0.00110057, 0.00599021), -0.0100266), if (LOCALNEWS < 0.645, if (FOUR_HOUR_WF < 0.0402559, 0.00156752, -0.00187101), if (TWELVE_HOUR_WF < 0.183333, -7.56081E-4, 0.0221542))) +
+if (DAY_WEEK_AVG_DERIV < -0.5, if (TOPSTORY < 0.185, if (SPORTS < 0.775, if (INTLNEWS < 0.435, -8.58072E-4, if (TWO_DAY_WF < 0.607692, 0.0133903, -7.14716E-4)), 0.00949831), 0.00678994), if (LIFESTYLE < 0.13, -2.0835E-4, -0.00685168)) +
+if (MAX_MIN_RANK < 3.0, if (TOPSTORY < 0.365, if (INTLNEWS < 0.585, if (DAY_WEEK_AVG_DERIV < 3.36, 0.00152356, if (MAX_MIN_SCORE < 41565.0, -0.00224511, -0.0154069)), 0.00771362), 0.0141704), if (POLITICS < 0.27, -7.41127E-4, 0.0123356)) +
+if (REGIONALNEWS < 0.21, if (INTLNEWS < 0.415, if (NATIONALNEWS < 0.105, -7.72108E-4, if (TWO_DAY_WF < 0.587963, -0.00126111, 0.00367856)), if (MAX_MIN_SCORE < 41771.0, 6.62317E-4, if (MAX_MIN_SCORE < 45378.8, 0.0229089, 0.00361364))), -0.0100665) +
+if (MAX_MIN_SCORE < 46045.5, if (ISTITLE_AVG < 0.415, if (ONE_DAY_WF < 0.0658009, -0.00148948, if (MIN_SCORE < 226178.0, -2.91172E-4, 0.00500722)), if (MAX_MIN_SCORE < 45527.5, -9.53746E-4, 0.00916764)), if (EIGHT_HOUR_WF < 0.0267094, -5.42017E-4, -0.00644438)) +
+if (DAY_PD_HITS_DERIV < -4.5, if (DAY_PD_HITS_RATIO < 0.16, -0.00116809, if (MAX_SCORE < 178149.0, -0.00327374, if (MIN_SCORE < 172046.0, 0.017586, if (MIN_SCORE < 221890.0, -0.00260826, if (MIN_SCORE < 227242.0, 0.0209327, 0.00475773))))), -4.92544E-4) +
+if (DAY_LW_DAY_HITS_RATIO < 0.83, if (AVG_SCORE < 237778.0, -0.00220195, -0.0171686), if (SPORTS < 0.79, if (SPORTS < 0.05, 2.36122E-4, if (MAX_MIN_SCORE < 46031.8, if (MAX_MIN_SCORE < 7978.0, -0.011323, -6.09338E-4), -0.0077154)), 0.00365925)) +
+if (DAY_WEEK_AVG_DERIV < -3.93, 0.00918467, if (NATIONALNEWS < 0.105, -5.8362E-4, if (EIGHT_HOUR_WF < 0.480769, if (ISTITLE_AVG < 0.155, if (DAY_PD_HITS_RATIO < 0.39, -0.00366457, if (DAY_HITS < 5.5, 0.0162937, -0.00117921)), 0.001006), -0.00659191))) +
+if (SPORTS < 0.705, -5.56203E-4, if (MAX_SCORE < 165481.0, -0.00498224, if (SUPERDUPER_AVG < 0.315, if (DAY_WEEK_AVG_DERIV < 0.36, if (AVG_RANK < 5.73, 0.00571545, 0.0237979), if (DAY_PD_HITS_DERIV < 3.5, -5.91932E-4, 0.011012)), -0.00679759))) +
+if (REGIONALNEWS < 0.21, if (MISC < 0.105, if (TWO_DAY_WF < 0.492284, if (FOUR_HOUR_WF < 0.00462963, if (MAX_MIN_SCORE < 37344.8, -0.00252831, 0.00347229), if (MIN_SCORE < 216377.0, -0.00279429, -0.0169557)), 4.0157E-4), 0.00880965), -0.010143) +
+if (DAY_LW_DAY_HITS_RATIO < 0.645, -0.00704886, if (AVG_SCORE < 291527.0, if (MAX_SCORE < 287802.0, 1.91564E-4, if (ISTITLE_AVG < 0.95, 0.0133402, 5.16175E-4)), if (AVG_SCORE < 317516.0, -0.0121501, if (SPORTS < 0.315, -0.00154239, 0.00704715)))) +
+if (INTLNEWS < 0.705, if (DAY_PD_HITS_RATIO < 5.045, if (WEEKAVG < 0.785, -9.4492E-4, if (ONE_DAY_WF < 0.644009, 9.62924E-4, 0.00910092)), if (LOCALNEWS < 0.295, if (ISTITLE_AVG < 0.185, -0.0103214, -0.0014478), 0.0144627)), 0.00968091) +
+if (ONE_DAY_WF < 0.328096, if (ONE_DAY_WF < 0.246773, if (TOPSTORY < 0.47, 1.96656E-4, 0.0166177), if (MIN_SCORE < 224862.0, 0.00164784, if (AVG_SCORE < 264251.0, 0.0251889, if (AVG_SCORE < 339362.0, -0.00430853, 0.0156826)))), -8.83446E-4) +
+if (MISC < 0.105, if (MIN_SCORE < 445730.0, if (MIN_SCORE < 371741.0, if (REGIONALNEWS < 0.21, 5.14634E-4, -0.0078218), if (INTLNEWS < 0.145, if (ISTITLE_AVG < 0.105, 0.00884708, -0.00706471), -0.0155934)), 0.00871224), 0.0109724) +
+if (AVG_RANK < 5.07, if (MIN_SCORE < 237760.0, -8.82014E-5, 0.0155208), if (ENTERTAINMENT < 0.05, if (MAX_MIN_SCORE < 14449.2, if (TOPSTORY < 0.225, -0.00152853, -0.0123906), if (TOPSTORY < 0.365, 3.30374E-4, 0.00622094)), -0.0026428)) +
+if (LW_DAY_HITS < 0.5, if (MISC < 0.105, 2.26343E-4, 0.00938518), if (MAX_SCORE < 254898.0, if (MAX_SCORE < 249948.0, -0.0010896, 0.0106866), if (ONE_DAY_WF < 0.537727, if (MAX_MIN_RANK < 5.0, -6.68387E-4, -0.0113993), -0.0159024))) +
+if (WEEKAVG < 0.93, if (MIN_RANK < 1.0, 0.0105569, -0.00122424), if (HEALTH < 0.105, if (SPORTS < 0.47, if (NATIONALNEWS < 0.105, -0.00127666, 0.00259145), 0.00361046), if (PREV_DAY_HITS < 4.5, -0.00160398, 0.0210051))) +
+if (MAX_MIN_SCORE < 62647.2, if (ISTITLE_AVG < 0.05, if (MAX_MIN_SCORE < 45894.2, if (DAY_PD_HITS_RATIO < 0.675, if (MAX_MIN_SCORE < 41917.0, if (MIN_SCORE < 227128.0, 0.00157123, -0.0091657), 0.00650689), 7.34575E-4), -0.00919073), -2.54308E-4), 0.0114536) +
+if (BUSINESS < 0.05, if (DAY_WEEK_AVG_RATIO < 0.505, if (LOCALNEWS < 0.115, if (DAY_WEEK_AVG_RATIO < 0.36, if (DAY_WEEK_AVG_RATIO < 0.275, 0.0111617, -0.00883723), if (DAY_PD_HITS_RATIO < 0.185, 0.0229969, -9.84798E-4)), -0.00222586), 2.55018E-4), -0.00124642) +
+if (DAY_LW_DAY_HITS_RATIO < 0.645, -0.0067218, if (WEEKAVG < 26.715, if (AVG_RANK < 9.225, 1.58773E-4, if (TWO_DAY_WF < 0.976136, 0.00457414, if (DAY_WEEK_AVG_DERIV < 5.785, -0.00480445, if (DAY_LW_DAY_HITS_DERIV < 27.5, 0.016426, -0.00308634)))), -0.00901968)) +
+if (LOCALNEWS < 0.295, if (LOCALNEWS < 0.155, -3.03017E-4, if (MIN_SCORE < 222112.0, 2.10471E-4, if (ENTERTAINMENT < 0.05, if (ISTITLE_AVG < 0.185, 0.0201204, 0.00543219), -0.00443157))), if (DAY_PD_HITS_DERIV < 6.5, -0.00312853, 0.0100428)) +
+if (DAY_PD_HITS_RATIO < 32.5, if (DAY_PD_HITS_DERIV < 20.5, if (MAX_MIN_RANK < 7.0, -1.09244E-4, if (DAY_LW_DAY_HITS_DERIV < 9.5, if (MIN_SCORE < 215422.0, 4.54762E-4, if (ISABSTRACT_AVG < 0.225, 0.0146831, -7.90241E-4)), -0.00837207)), -0.00476979), 0.0102445) +
+if (MAX_MIN_SCORE < 45353.5, if (MAX_MIN_SCORE < 44594.8, if (MISC < 0.105, -9.71934E-5, 0.00961124), 0.00683718), if (TWELVE_HOUR_WF < 0.0451153, if (TOPSTORY < 0.05, if (ISTITLE_AVG < 0.79, -0.0112723, 0.00207341), 0.00460994), -0.00460433)) +
+if (SPORTS < 0.47, if (DAY_PD_HITS_RATIO < 4.725, if (DAY_WEEK_AVG_RATIO < 2.74, -3.01001E-4, if (INTLNEWS < 0.415, if (MIN_SCORE < 337522.0, 0.0052542, 0.0240026), -0.00664368)), if (DAY_WEEK_AVG_RATIO < 4.15, -0.00601665, 0.00254873)), 0.00244646) +
+if (TOPSTORY < 0.47, if (LW_DAY_HITS < 0.5, 5.1525E-4, if (ONE_DAY_WF < 0.398413, if (DAY_PD_HITS_RATIO < 0.61, -0.00298415, 0.00362271), if (AVG_SCORE < 242552.0, -0.004241, -0.0152224))), if (MAX_SCORE < 264598.0, 0.0026109, -0.013849)) +
+if (PREV_DAY_HITS < 59.5, if (MAX_SCORE < 455608.0, if (DAY_PD_HITS_DERIV < 64.0, if (SUPERDUPER_AVG < 0.725, if (MIN_SCORE < 132886.0, -0.0021355, 2.53917E-4), -0.0118492), 0.00977612), if (INTLNEWS < 0.145, 0.0130923, -0.00429049)), 0.0115797) +
+if (DAY_PD_HITS_RATIO < 0.115, -0.00460369, if (DAY_WEEK_AVG_RATIO < 0.455, 0.00654726, if (ISTITLE_AVG < 0.565, if (DAY_PD_HITS_DERIV < 38.0, if (DAY_WEEK_AVG_DERIV < -0.785, -0.00572704, 0.00211848), -0.0128642), if (MAX_SCORE < 261066.0, 1.64546E-4, -0.00330215)))) +
+if (DAY_WEEK_AVG_DERIV < 8.36, if (AVG_SCORE < 266020.0, 5.44955E-4, if (TOPSTORY < 0.21, if (MAX_SCORE < 343351.0, -0.00739666, -0.00159552), 0.00316353)), if (EIGHT_HOUR_WF < 0.117802, if (AVG_SCORE < 264897.0, 0.00293963, 0.016815), -4.81606E-4)) +
+if (AVG_SCORE < 446571.0, if (DAY_WEEK_AVG_RATIO < 4.59, if (MAX_SCORE < 390560.0, -8.80057E-6, if (PREV_DAY_HITS < 2.5, if (INTLNEWS < 0.135, -0.00440415, -0.0221297), 8.10032E-4)), if (ENTERTAINMENT < 0.15, 0.00319307, 0.0160496)), 0.00840475) +
+if (MAX_SCORE < 390244.0, if (AVG_SCORE < 360833.0, if (AVG_SCORE < 352194.0, 1.75857E-4, if (DAY_WEEK_AVG_DERIV < 0.855, -0.0187021, 0.00356778)), 0.00780008), if (INTLNEWS < 0.145, if (BUSINESS < 0.135, 0.00421198, -0.0102414), -0.00922112)) +
+if (ONE_DAY_WF < 0.605556, -4.61057E-4, if (MIN_SCORE < 332098.0, if (MAX_SCORE < 355711.0, if (WEEKAVG < 1.5, 2.20435E-5, 0.00562666), -0.0153817), if (INTLNEWS < 0.145, 0.018971, if (SUPERDUPER_AVG < 0.275, -0.00766663, 0.0121696)))) +
+if (ENTERTAINMENT < 0.05, 2.07331E-5, if (ENTERTAINMENT < 0.415, if (AVG_SCORE < 237084.0, -0.00148349, -0.00781033), if (AVG_SCORE < 340606.0, if (ONE_DAY_WF < 0.015625, 0.0143289, 0.00105025), if (WEEKAVG < 0.785, 0.00610972, -0.020138)))) +
+if (DAY_LW_DAY_HITS_DERIV < 17.5, if (DAY_LW_DAY_HITS_RATIO < 16.5, 1.88349E-4, 0.0147086), if (TWO_DAY_WF < 0.743223, if (MIN_SCORE < 212511.0, -0.00155285, -0.0125926), if (TWO_DAY_WF < 0.980566, 0.00635603, if (ISTITLE_AVG < 0.15, -0.0163438, -0.00197531)))) +
+if (AVG_SCORE < 281850.0, if (MAX_SCORE < 288032.0, 3.35293E-4, if (TWELVE_HOUR_WF < 0.358289, 0.00952171, -0.00693432)), if (EIGHT_HOUR_WF < 0.584928, if (MAX_MIN_SCORE < 2471.25, -0.0141419, if (MAX_MIN_SCORE < 6867.75, 0.010146, -0.00291703)), 0.00765541)) +
+if (PREV_DAY_HITS < 3.5, -7.47677E-4, if (EIGHT_HOUR_WF < 0.147108, if (AVG_RANK < 7.69, -8.12926E-4, if (ISABSTRACT_AVG < 0.95, 0.00246382, if (SUPERDUPER_AVG < 0.21, 0.00464639, 0.0245523))), if (ISTITLE_AVG < 0.05, -0.0184693, -8.90194E-4))) +
+if (HEALTH < 0.27, if (DAY_PD_HITS_RATIO < 5.045, 3.07572E-5, if (ISTITLE_AVG < 0.125, if (PUB_TODAY_AVG < 0.95, -0.0154824, -0.00294081), if (TWELVE_HOUR_WF < 0.226496, -0.00622506, 0.00166554))), if (ISABSTRACT_AVG < 0.17, 0.0165858, -0.00330948)) +
+if (DAY_WEEK_AVG_DERIV < 44.86, if (SUPERDUPER_AVG < 0.39, 3.33898E-4, if (TWO_DAY_WF < 0.825226, if (MAX_MIN_SCORE < 42418.5, -0.00881739, if (EIGHT_HOUR_WF < 0.0825189, if (BUSINESS < 0.05, 0.0117745, -0.00306987), -0.0111028)), 0.00218894)), 0.00834421) +
+if (MIN_SCORE < 230160.0, if (MIN_SCORE < 229092.0, -8.07888E-4, -0.0133966), if (TWO_DAY_WF < 0.518064, if (WEEKAVG < 1.5, if (AVG_SCORE < 359388.0, 0.00892968, -0.00763611), -0.00138809), if (MIN_SCORE < 232622.0, 0.0102781, -6.34821E-4))) +
+if (MAX_MIN_RANK < 7.0, -2.44784E-5, if (ISTITLE_AVG < 0.55, if (DAY_WEEK_AVG_DERIV < 4.57, if (DAY_WEEK_AVG_RATIO < 0.76, -8.94147E-4, if (MIN_SCORE < 215272.0, 0.00546979, 0.0273153)), -0.00734683), if (NATIONALNEWS < 0.21, -0.00138435, 0.00911761))) +
+if (DAY_WEEK_AVG_RATIO < 3.83, if (ENTERTAINMENT < 0.53, -2.83843E-4, if (ISABSTRACT_AVG < 0.21, 0.00786177, -7.5151E-4)), if (AVG_RANK < 9.465, if (INTLNEWS < 0.21, 0.00394069, -0.00383803), if (AVG_SCORE < 258669.0, 0.0034867, 0.0179637))) +
+if (SUPERDUPER_AVG < 0.725, if (SPORTS < 0.685, -2.3907E-4, if (AVG_SCORE < 264275.0, if (MIN_SCORE < 219502.0, -4.38684E-4, if (MAX_RANK < 9.0, 0.00337648, 0.0167784)), -0.00334013)), if (WEEKAVG < 12.785, -0.0162992, 2.52385E-5)) +
+if (ISABSTRACT_AVG < 0.895, -2.10253E-4, if (AVG_SCORE < 247839.0, if (PREV_DAY_HITS < 1.5, -3.98583E-4, if (TWO_DAY_WF < 0.551797, -6.35903E-4, if (MIN_SCORE < 141715.0, 0.00351871, if (INTLNEWS < 0.105, 0.00858437, 0.0254582)))), -0.00273198)) +
+if (ONE_DAY_WF < 0.605556, if (EIGHT_HOUR_WF < 0.0411953, 4.98775E-4, -0.00236496), if (AVG_SCORE < 342691.0, if (MAX_MIN_SCORE < 6080.75, if (MIN_SCORE < 236879.0, 0.00101981, -0.0190995), 0.00142291), if (INTLNEWS < 0.275, 0.0147214, -4.75944E-4))) +
+if (DAY_PD_HITS_DERIV < -13.5, if (WEEKAVG < 3.785, -0.00749366, if (PREV_DAY_HITS < 22.5, 0.0146922, if (TWO_DAY_WF < 0.822683, if (PUB_TODAY_AVG < 0.45, -0.00577822, 0.00590076), 0.00846642))), if (MISC < 0.105, -1.96119E-4, 0.0069636)) +
+if (DAY_WEEK_AVG_DERIV < -3.93, 0.00797481, if (DAY_WEEK_AVG_RATIO < 3.865, if (DAY_WEEK_AVG_RATIO < 3.61, if (DAY_WEEK_AVG_RATIO < 3.245, -4.72322E-4, 0.00505862), -0.00837491), if (TWELVE_HOUR_WF < 0.202675, -0.00493061, if (TWELVE_HOUR_WF < 0.36039, 0.0124758, 0.00250066)))) +
+if (MAX_MIN_SCORE < 60845.5, if (MAX_MIN_SCORE < 52128.0, if (MAX_MIN_SCORE < 51264.2, -1.32387E-4, 0.0106899), if (MIN_SCORE < 218318.0, -0.00277432, -0.0140369)), if (ISABSTRACT_AVG < 0.05, 0.0138962, if (MAX_MIN_SCORE < 61542.5, 0.00445669, -0.00606652))) +
+if (BUSINESS < 0.315, -6.66016E-4, if (AVG_RANK < 8.535, if (MAX_MIN_SCORE < 7884.5, 0.0134214, if (ISTITLE_AVG < 0.465, if (MIN_SCORE < 217640.0, -9.67523E-4, 0.0135374), 1.47833E-4)), if (DAY_PD_HITS_RATIO < 0.235, 0.00921797, -0.00182698))) +
+if (AVG_RANK < 5.29, if (AVG_SCORE < 318378.0, 0.00224509, 0.0161861), if (MAX_MIN_SCORE < 51537.0, if (MAX_MIN_SCORE < 50910.2, if (DAY_LW_DAY_HITS_DERIV < 71.0, -2.73537E-4, -0.00811121), 0.0109085), if (SPORTS < 0.415, -0.00253066, -0.0129268))) +
+if (LOCALNEWS < 0.61, if (WEEKAVG < 0.5, -0.00262523, if (NATIONALNEWS < 0.105, -6.35021E-4, if (ISTITLE_AVG < 0.155, 0.005402, 6.7829E-5))), if (TWELVE_HOUR_WF < 0.133929, -0.00168908, if (PUB_TODAY_AVG < 0.535, 0.00693807, 0.0227961))) +
+if (DAY_WEEK_AVG_DERIV < -0.785, -0.0021874, if (ENTERTAINMENT < 0.05, if (MAX_SCORE < 363930.0, 4.68954E-4, if (MAX_SCORE < 384272.0, if (DAY_PD_HITS_RATIO < 0.495, 0.00125669, 0.0177669), if (INTLNEWS < 0.21, 0.0060623, -0.00814847))), -0.00164111)) +
+if (MIN_SCORE < 334353.0, if (NUM_WORDS < 2.5, 5.19901E-5, if (AVG_SCORE < 316903.0, if (ISTITLE_AVG < 0.05, -0.0220661, -0.00683671), -0.00100144)), if (INTLNEWS < 0.13, 0.00532815, if (MIN_SCORE < 357183.0, 0.0067984, -0.00805372))) +
+if (INTLNEWS < 0.53, -4.58708E-5, if (TWELVE_HOUR_WF < 0.21385, if (LOCALNEWS < 0.05, if (TWO_DAY_WF < 0.585356, 0.00547473, if (MAX_SCORE < 244158.0, 0.00333793, -0.00880659)), 0.00966211), if (FOUR_HOUR_WF < 0.0240968, 0.019307, 0.00126046))) +
+if (BUSINESS < 0.05, if (DAY_PD_HITS_DERIV < 18.5, if (DAY_WEEK_AVG_DERIV < 12.93, if (INTLNEWS < 0.73, if (INTLNEWS < 0.315, 0.00145589, -0.00205678), 0.0114136), 0.0135475), if (MIN_RANK < 7.0, -0.0106123, 0.00101067)), -9.65519E-4) +
+if (PREV_DAY_HITS < 6.5, -5.0304E-4, if (TWO_DAY_WF < 0.825345, if (ONE_DAY_WF < 0.275028, -0.00134104, 0.00516485), if (TWO_DAY_WF < 0.861643, 0.0172774, if (LOCALNEWS < 0.05, -0.00143161, if (INTLNEWS < 0.21, 0.0022491, 0.0202453))))) +
+if (TWO_DAY_WF < 0.477226, if (BUSINESS < 0.685, if (MAX_MIN_SCORE < 41352.5, if (SUPERDUPER_AVG < 0.315, -0.00389642, -0.0130707), if (AVG_SCORE < 253118.0, -0.00411848, 0.0126594)), if (DAY_LW_DAY_HITS_RATIO < 1.71, 0.0118462, -0.00195941)), 6.56261E-4) +
+if (POLITICS < 0.235, if (MAX_MIN_SCORE < 8349.75, if (DAY_LW_DAY_HITS_RATIO < 5.335, 0.00316005, -0.00643477), if (MAX_MIN_SCORE < 16062.8, -0.00319606, if (MAX_MIN_SCORE < 16303.8, 0.0107361, -3.21466E-5))), if (ISABSTRACT_AVG < 0.05, 0.0148344, -0.00402193)) +
+if (ENTERTAINMENT < 0.585, if (MAX_MIN_SCORE < 36987.5, -0.00141265, 3.38741E-4), if (PUB_TODAY_AVG < 0.235, if (MAX_MIN_SCORE < 20990.0, 0.00745281, -0.0127174), if (AVG_SCORE < 239671.0, 3.61332E-4, if (ISTITLE_AVG < 0.735, 0.0208577, 0.00530017)))) +
+if (DAY_PD_HITS_DERIV < -3.5, if (HEALTH < 0.105, if (DAY_PD_HITS_RATIO < 0.165, -0.00212795, if (LW_DAY_HITS < 3.5, if (AVG_SCORE < 258650.0, if (ISABSTRACT_AVG < 0.225, -0.00378895, 0.00511293), 0.0070848), -0.0101628)), 0.0144615), -5.10098E-4) \ No newline at end of file
diff --git a/searchlib/src/test/files/ranking05.expression b/searchlib/src/test/files/ranking05.expression
new file mode 100644
index 00000000000..028979c9d12
--- /dev/null
+++ b/searchlib/src/test/files/ranking05.expression
@@ -0,0 +1,77 @@
+if (AVG_SCORE < 240274.0, if (AVG_SCORE < 152115.0, 0.222147, 0.231999), if (ISABSTRACT_AVG < 0.13, if (WEEKAVG < 1.785, 0.254209, if (TWO_DAY_WF < 0.849242, 0.260625, 0.274218)), if (MIN_SCORE < 328158.0, 0.240699, 0.25683))) +
+if (MIN_SCORE < 224388.0, if (AVG_SCORE < 229835.0, if (MAX_SCORE < 171144.0, -0.0167726, -0.0105451), -0.00242754), if (ISABSTRACT_AVG < 0.13, if (WEEKAVG < 1.36, 0.0097257, 0.0198661), if (MIN_SCORE < 330678.0, -3.0123E-4, 0.0145117))) +
+if (AVG_SCORE < 240820.0, if (AVG_SCORE < 159292.0, -0.0146681, -0.00755839), if (ISABSTRACT_AVG < 0.21, if (WEEKAVG < 1.64, 0.0094277, if (AVG_RANK < 7.1, 0.00789525, 0.020449)), if (TOPSTORY < 0.05, -0.00410248, 0.00584918))) +
+if (MIN_SCORE < 226846.0, if (AVG_SCORE < 221526.0, -0.0110986, if (TOPSTORY < 0.365, -0.00479654, 0.020019)), if (WEEKAVG < 1.07, if (ISTITLE_AVG < 0.93, 0.0050835, -0.00497491), if (ISABSTRACT_AVG < 0.05, 0.0175386, 0.00636247))) +
+if (MIN_SCORE < 226373.0, if (AVG_SCORE < 151768.0, -0.0132135, if (WEEKAVG < 2.5, -0.00784849, if (ISABSTRACT_AVG < 0.95, -0.00100834, 0.0234278))), if (ISABSTRACT_AVG < 0.235, if (MAX_MIN_SCORE < 20325.0, 0.00102421, 0.0145312), 8.03179E-4)) +
+if (AVG_SCORE < 234937.0, -0.00791142, if (ISABSTRACT_AVG < 0.13, if (WEEKAVG < 1.5, 0.00645581, if (TWO_DAY_WF < 0.925548, 0.0121753, if (LOCALNEWS < 0.05, 0.013493, 0.0339803))), if (AVG_SCORE < 492653.0, -0.00104991, 0.0192918))) +
+if (MIN_SCORE < 231118.0, if (AVG_SCORE < 223663.0, -0.00824326, if (ISABSTRACT_AVG < 0.05, 0.00253809, -0.00637355)), if (ISTITLE_AVG < 0.885, if (DAY_LW_DAY_HITS_RATIO < 4.045, if (LW_DAY_HITS < 0.5, 0.00769331, -0.00570545), 0.0136625), -5.66847E-4)) +
+if (MIN_SCORE < 222204.0, -0.00655529, if (ISTITLE_AVG < 0.95, if (DAY_LW_DAY_HITS_RATIO < 15.5, if (TOPSTORY < 0.185, if (INTLNEWS < 0.39, 0.00204263, if (FOUR_HOUR_WF < 0.004, 0.0157981, -0.00641486)), 0.0124789), 0.0166668), -0.00299979)) +
+if (MIN_SCORE < 224388.0, -0.00568771, if (WEEKAVG < 1.07, if (ISTITLE_AVG < 0.845, 0.00360536, -0.00514951), if (BUSINESS < 0.05, if (MAX_MIN_RANK < 3.0, 0.0231505, 0.0105904), if (MIN_SCORE < 400082.0, 0.00196019, 0.0152236)))) +
+if (MAX_SCORE < 264920.0, if (AVG_SCORE < 159289.0, -0.00812678, -0.003174), if (DAY_WEEK_AVG_DERIV < 30.715, if (ISABSTRACT_AVG < 0.115, if (MAX_MIN_SCORE < 163787.0, 0.0097262, if (AVG_SCORE < 400330.0, -0.00390127, 0.00706031)), 5.49425E-5), 0.0204424)) +
+if (AVG_SCORE < 241590.0, -0.00459592, if (TOPSTORY < 0.05, if (DAY_WEEK_AVG_RATIO < 4.205, -1.26418E-4, 0.0204507), if (TWO_DAY_WF < 0.86039, if (ISABSTRACT_AVG < 0.585, if (TOPSTORY < 0.365, 0.00566686, 0.0196157), -0.00511988), 0.0124928))) +
+if (MIN_SCORE < 222204.0, if (PREV_DAY_HITS < 4.5, -0.00589519, -6.31753E-4), if (ISABSTRACT_AVG < 0.235, if (MAX_MIN_SCORE < 171496.0, if (MAX_SCORE < 558130.0, if (AVG_RANK < 7.125, 8.6138E-4, 0.00952768), 0.0206013), 0.00133279), -0.00112547)) +
+if (MIN_SCORE < 222204.0, if (DAY_PD_HITS_DERIV < -12.5, 0.00847214, if (DAY_WEEK_AVG_DERIV < 36.785, -0.00450293, 0.0134303)), if (ISABSTRACT_AVG < 0.05, if (DAY_WEEK_AVG_RATIO < 4.83, if (NATIONALNEWS < 0.355, 0.00488766, 0.0169425), 0.0204287), -1.32037E-4)) +
+if (MAX_SCORE < 250058.0, if (INTLNEWS < 0.105, -0.0059595, 2.21029E-4), if (TOPSTORY < 0.355, if (MIN_SCORE < 385241.0, if (BUSINESS < 0.05, 0.00367059, -0.00223683), if (INTLNEWS < 0.365, 0.00575046, 0.0237395)), 0.0174135)) +
+if (MAX_SCORE < 265638.0, -0.00287962, if (ISABSTRACT_AVG < 0.235, if (DAY_PD_HITS_RATIO < 18.75, if (MAX_MIN_SCORE < 67687.2, if (PREV_DAY_HITS < 6.5, 0.00469885, 0.0145573), if (AVG_SCORE < 399037.0, -0.00312833, 0.00680591)), 0.0238803), -0.00158812)) +
+if (MIN_SCORE < 222204.0, if (TOPSTORY < 0.355, -0.00377546, 0.0099145), if (ISTITLE_AVG < 0.885, if (MAX_MIN_SCORE < 57965.2, if (MIN_SCORE < 223217.0, 0.0291906, 0.00802385), if (AVG_SCORE < 402324.0, -0.00259188, 0.00560142)), -0.0015883)) +
+if (MAX_SCORE < 252015.0, -0.00312417, if (DAY_WEEK_AVG_RATIO < 5.91, if (TOPSTORY < 0.185, if (MAX_MIN_SCORE < 123158.0, if (ISABSTRACT_AVG < 0.13, 0.0054303, if (PREV_DAY_HITS < 7.5, -0.00350664, 0.0115054)), -0.00200056), 0.00612929), 0.0248479)) +
+if (MIN_SCORE < 232158.0, if (DAY_PD_HITS_DERIV < -13.5, 0.012118, if (SPORTS < 0.685, -0.00337721, if (MAX_SCORE < 165958.0, -0.00648055, 0.00734207))), if (ISABSTRACT_AVG < 0.635, if (EIGHT_HOUR_WF < 0.493902, 0.00519362, -0.00533505), -0.00220591)) +
+if (AVG_SCORE < 387415.0, if (PREV_DAY_HITS < 2.5, -0.00321038, if (BUSINESS < 0.05, 0.00353532, -0.0020425)), if (TWO_DAY_WF < 0.979149, 0.00271552, if (TWELVE_HOUR_WF < 0.104418, 0.00180155, if (MIN_SCORE < 350308.0, 0.0370742, 0.0145313)))) +
+if (MAX_SCORE < 248824.0, if (INTLNEWS < 0.185, -0.00381799, 0.00109643), if (TOPSTORY < 0.185, if (TWO_DAY_WF < 0.779514, -0.0015664, if (WEEKAVG < 4.07, 0.00171319, 0.0126131)), if (MAX_MIN_RANK < 7.0, 0.00411675, 0.0149353))) +
+if (MIN_SCORE < 233311.0, -0.00183471, if (LW_DAY_HITS < 0.5, if (SUPERDUPER_AVG < 0.21, if (MIN_RANK < 1.0, 0.0173917, 6.43665E-4), if (LOCALNEWS < 0.185, if (DAY_PD_HITS_RATIO < 8.795, 0.00308276, 0.0169982), 0.0159792)), -0.00499866)) +
+if (TOPSTORY < 0.39, if (MAX_SCORE < 176763.0, -0.00448387, if (INTLNEWS < 0.415, if (BUSINESS < 0.05, if (MAX_MIN_SCORE < 20408.8, -0.00328596, if (TWO_DAY_WF < 0.512854, -0.00211998, 0.00522867)), -0.00226038), 0.00574748)), 0.00900215) +
+if (TWO_DAY_WF < 0.75074, if (BUSINESS < 0.05, if (FOUR_HOUR_WF < 0.0149554, if (WEEKAVG < 0.785, -0.00184131, if (AVG_SCORE < 167616.0, -0.00305123, 0.00685803)), -0.00470139), -0.003457), if (MAX_SCORE < 504246.0, 8.78955E-4, 0.00850264)) +
+if (BUSINESS < 0.105, if (AVG_SCORE < 160899.0, -0.00270644, if (AVG_SCORE < 194764.0, if (ISABSTRACT_AVG < 0.315, -0.00800918, 0.012943), if (NATIONALNEWS < 0.355, 9.01868E-4, 0.0112161))), if (INTLNEWS < 0.39, -0.00269415, 0.00725021)) +
+if (BUSINESS < 0.105, if (MAX_SCORE < 188088.0, -0.00298371, if (AVG_SCORE < 190784.0, if (ISABSTRACT_AVG < 0.55, -0.00171064, 0.0237327), if (AVG_RANK < 9.755, 0.00131049, if (WEEKAVG < 0.93, -0.00199335, 0.020099)))), -0.00222399) +
+if (NATIONALNEWS < 0.115, if (PREV_DAY_HITS < 27.5, if (INTLNEWS < 0.725, if (AVG_SCORE < 629440.0, -0.00184197, 0.0166573), 0.0148512), if (TWO_DAY_WF < 0.773805, if (TWELVE_HOUR_WF < 0.114144, 0.00583361, -0.012718), 0.0149618)), 0.00280466) +
+if (TOPSTORY < 0.355, if (DAY_PD_HITS_DERIV < -4.5, 0.00287102, if (SPORTS < 0.73, -0.00185575, 0.00272133)), if (AVG_RANK < 9.55, if (DAY_PD_HITS_RATIO < 0.405, -0.00518413, if (MAX_MIN_SCORE < 115612.0, 0.00438781, 0.0211867)), 0.0209324)) +
+if (AVG_SCORE < 147623.0, -0.00405691, if (DAY_PD_HITS_DERIV < -23.5, 0.00920672, if (INTLNEWS < 0.725, if (TOPSTORY < 0.39, 7.21159E-5, if (INTLNEWS < 0.05, if (SUPERDUPER_AVG < 0.155, 0.00462984, 0.0231233), 5.62082E-4)), 0.0141075))) +
+if (AVG_SCORE < 159075.0, -0.00305707, if (TOPSTORY < 0.05, if (SPORTS < 0.73, -9.35589E-4, if (AVG_RANK < 5.635, -0.00405106, 0.0119584)), if (LW_DAY_HITS < 0.5, 0.0045483, if (PREV_DAY_HITS < 30.0, -0.00540909, 0.00895866)))) +
+if (MAX_SCORE < 507014.0, if (AVG_RANK < 6.775, -0.00328147, if (MAX_MIN_SCORE < 150474.0, 3.93348E-4, -0.00536951)), if (DAY_PD_HITS_RATIO < 7.885, if (ENTERTAINMENT < 0.05, if (MAX_SCORE < 516938.0, 0.0171772, 0.00382646), -0.00447429), 0.0153178)) +
+if (DAY_WEEK_AVG_RATIO < 5.905, if (NATIONALNEWS < 0.105, -8.32529E-4, if (MAX_MIN_RANK < 7.0, if (NATIONALNEWS < 0.13, 0.0108634, 3.13874E-4), if (AVG_SCORE < 231880.0, -0.00104106, if (PREV_DAY_HITS < 5.5, 0.0263191, 0.00601508)))), 0.0149012) +
+if (PREV_DAY_HITS < 59.5, if (NATIONALNEWS < 0.05, if (DAY_PD_HITS_DERIV < -6.5, if (DAY_WEEK_AVG_RATIO < 1.285, if (MAX_MIN_SCORE < 160894.0, 0.00449479, -0.00886993), 0.0153285), -0.00178263), if (POLITICS < 0.05, 0.00198329, -0.00586162)), 0.00935161) +
+if (MIN_SCORE < 132626.0, -0.00387076, if (WEEKAVG < 0.93, -0.00135437, if (MAX_MIN_SCORE < 46712.0, 0.00347721, if (AVG_SCORE < 404994.0, if (MIN_SCORE < 241776.0, if (SPORTS < 0.79, -0.00180685, 0.0168028), -0.00853053), 0.00228774)))) +
+if (LW_DAY_HITS < 0.5, if (AVG_SCORE < 159292.0, -0.00244777, if (LIFESTYLE < 0.155, if (DAY_PD_HITS_DERIV < -3.5, 0.00412328, if (TOPSTORY < 0.39, 4.16163E-4, 0.0104883)), -0.00615481)), if (FOUR_HOUR_WF < 0.158004, -0.00212154, -0.0150848)) +
+if (DAY_PD_HITS_RATIO < 43.0, if (LW_DAY_HITS < 0.5, if (DAY_PD_HITS_DERIV < -5.5, if (AVG_RANK < 9.265, if (TOPSTORY < 0.05, -0.00313951, 0.00432897), 0.0117073), -6.92E-4), if (MAX_MIN_SCORE < 120702.0, -0.00138028, -0.00945152)), 0.0160989) +
+if (LW_DAY_HITS < 0.5, if (MAX_SCORE < 507008.0, if (MAX_SCORE < 339502.0, if (ENTERTAINMENT < 0.415, 3.0327E-4, 0.00803638), -0.0034615), 0.00344157), if (FOUR_HOUR_WF < 0.101282, if (DAY_WEEK_AVG_DERIV < 17.5, -0.00192815, 0.00914257), -0.0127954)) +
+if (DAY_WEEK_AVG_RATIO < 0.255, 0.0131801, if (MAX_MIN_SCORE < 312687.0, if (MAX_MIN_SCORE < 296243.0, if (NATIONALNEWS < 0.105, -6.09993E-4, if (MAX_MIN_RANK < 7.0, 8.98274E-4, if (MAX_SCORE < 234190.0, 9.67677E-4, 0.0163215))), 0.0161496), -0.0112906)) +
+if (TOPSTORY < 0.39, if (MIN_SCORE < 220684.0, -0.00143961, if (ISABSTRACT_AVG < 0.05, if (AVG_SCORE < 277398.0, if (DAY_LW_DAY_HITS_RATIO < 1.75, -9.51177E-4, 0.00924989), if (BUSINESS < 0.39, -0.00128495, 0.0103605)), -0.00106493)), 0.00610485) +
+if (AVG_SCORE < 500951.0, if (DAY_HITS < 42.5, -6.76917E-4, if (INTLNEWS < 0.45, if (ISTITLE_AVG < 0.05, -0.0122069, 0.00752268), 0.0144731)), if (MIN_SCORE < 362007.0, 0.0202143, if (PUB_TODAY_AVG < 0.05, -0.0107444, 0.00512166))) +
+if (WEEKAVG < 1.07, -0.00167316, if (DAY_WEEK_AVG_RATIO < 6.14, if (BUSINESS < 0.05, if (PREV_DAY_HITS < 59.5, if (AVG_RANK < 9.225, if (DAY_HITS < 30.5, 0.00155636, -0.0130867), 0.0105919), 0.0218533), -8.02313E-4), 0.0145597)) +
+if (LIFESTYLE < 0.05, if (MAX_MIN_RANK < 7.0, 2.20029E-4, if (ISABSTRACT_AVG < 0.115, if (TWO_DAY_WF < 0.580973, -1.79904E-4, if (EIGHT_HOUR_WF < 0.0125776, 0.0222343, 0.00659678)), if (DAY_WEEK_AVG_DERIV < -0.93, 0.0146586, -0.0018679))), -0.0043182) +
+if (AVG_SCORE < 500853.0, if (MIN_SCORE < 435034.0, if (DAY_WEEK_AVG_RATIO < 4.15, -6.00797E-4, 0.00413062), -0.0152667), if (MAX_SCORE < 660352.0, if (TWO_DAY_WF < 0.744565, 0.0172406, if (MAX_SCORE < 596568.0, -0.0069398, 0.0163258)), -0.00228486)) +
+if (SPORTS < 0.685, -3.28185E-4, if (AVG_SCORE < 446734.0, if (MAX_SCORE < 500264.0, if (MAX_SCORE < 450904.0, if (MIN_SCORE < 254311.0, if (WEEKAVG < 0.785, -0.00158584, 0.0075942), -0.0103296), 0.0212781), -0.0121229), 0.0182724)) +
+if (TOPSTORY < 0.39, if (PREV_DAY_HITS < 59.5, -5.72966E-5, if (NATIONALNEWS < 0.05, 0.0144398, -0.00316385)), if (FOUR_HOUR_WF < 0.0201025, if (TWELVE_HOUR_WF < 0.163978, 0.00366064, 0.0227011), if (ONE_DAY_WF < 0.658333, -0.0114776, 0.00740238))) +
+if (TOPSTORY < 0.585, if (ENTERTAINMENT < 0.05, if (DAY_WEEK_AVG_DERIV < 43.145, 4.86446E-4, if (DAY_HITS < 78.5, 0.0210513, if (SUPERDUPER_AVG < 0.65, -0.00387695, 0.013128))), if (AVG_RANK < 5.465, 0.00674178, -0.00228932)), -0.0121137) +
+if (TWO_DAY_WF < 0.488162, -0.00237763, if (WEEKAVG < 1.215, -7.73205E-4, if (EIGHT_HOUR_WF < 0.0444065, if (DAY_HITS < 19.5, 0.00278939, 0.0115461), if (NATIONALNEWS < 0.155, -0.00189416, if (SPORTS < 0.105, 0.0072781, -0.00903706))))) +
+if (SPORTS < 0.47, if (SPORTS < 0.105, -4.47312E-5, -0.00348966), if (MAX_RANK < 9.0, -0.0016478, if (EIGHT_HOUR_WF < 0.0459777, if (TWO_DAY_WF < 0.539394, if (WEEKAVG < 1.07, -0.00892999, 0.00865732), 0.0121605), 0.00131641))) +
+if (INTLNEWS < 0.725, if (DAY_LW_DAY_HITS_RATIO < 124.5, if (AVG_SCORE < 628258.0, if (DAY_HITS < 55.5, -1.93067E-4, if (TWELVE_HOUR_WF < 0.117879, 0.0187097, if (TWELVE_HOUR_WF < 0.350814, -0.00734127, 0.0131678))), 0.00987754), -0.0156063), 0.00929408) +
+if (DAY_WEEK_AVG_DERIV < -3.36, -0.00956624, if (TOPSTORY < 0.39, -2.16336E-4, if (AVG_RANK < 9.55, if (AVG_RANK < 8.735, if (TWO_DAY_WF < 0.531551, -0.00490451, if (FOUR_HOUR_WF < 0.0142857, 0.0229256, 3.12813E-4)), -0.00418916), 0.0189348))) +
+if (AVG_SCORE < 625182.0, if (DAY_LW_DAY_HITS_DERIV < 55.5, if (DAY_WEEK_AVG_DERIV < 19.36, if (DAY_WEEK_AVG_DERIV < 13.5, -1.95177E-4, 0.00629794), if (ISTITLE_AVG < 0.05, -0.0149349, if (TWELVE_HOUR_WF < 0.383204, -0.00516327, 0.00921651))), 0.00647785), 0.0102664) +
+if (DAY_WEEK_AVG_RATIO < 0.385, -0.00586045, if (LIFESTYLE < 0.155, if (MAX_MIN_SCORE < 16288.0, -0.0016458, if (MAX_MIN_SCORE < 45875.2, if (MAX_MIN_SCORE < 45537.5, if (AVG_SCORE < 229848.0, -2.97351E-4, 0.00475294), 0.0181171), -5.74173E-4)), -0.00499598)) +
+if (SPORTS < 0.815, if (TWO_DAY_WF < 0.460499, if (INTLNEWS < 0.365, -0.00422695, if (DAY_HITS < 4.5, 0.011483, -0.00407438)), -4.78506E-6), if (DAY_HITS < 1.5, 0.00980267, if (EIGHT_HOUR_WF < 0.301948, -0.00316423, 0.0125528))) +
+if (PREV_DAY_HITS < 0.5, -0.00190281, if (LAW < 0.05, if (PUB_TODAY_AVG < 0.05, -0.00103893, if (EIGHT_HOUR_WF < 0.0492709, if (ISABSTRACT_AVG < 0.05, 0.0053372, 9.62476E-4), if (NATIONALNEWS < 0.13, -0.00161984, 0.005538))), -0.00741284)) +
+if (DAY_LW_DAY_HITS_RATIO < 0.69, if (AVG_SCORE < 229191.0, -0.00217119, -0.0135186), if (DAY_LW_DAY_HITS_RATIO < 125.5, if (ISABSTRACT_AVG < 0.05, if (AVG_SCORE < 218595.0, -0.00358076, if (BUSINESS < 0.685, 0.00139556, 0.0140572)), -5.71652E-4), -0.0128262)) +
+if (WEEKAVG < 1.5, if (DAY_WEEK_AVG_DERIV < 3.5, if (SUPERDUPER_AVG < 0.355, if (LW_DAY_HITS < 0.5, 5.21639E-4, -0.00451687), -0.00637359), -0.00562351), if (TWO_DAY_WF < 0.829824, 2.82632E-5, if (TWELVE_HOUR_WF < 0.940588, 0.00527366, -0.011917))) +
+if (TWELVE_HOUR_WF < 0.742581, if (TOPSTORY < 0.355, -1.2321E-4, if (FOUR_HOUR_WF < 0.026084, if (MIN_SCORE < 356232.0, 0.0111342, -0.00292376), -0.00479873)), if (ISABSTRACT_AVG < 0.185, if (MIN_RANK < 5.0, -1.25896E-4, -0.0115332), -7.63903E-4)) +
+if (NATIONALNEWS < 0.27, -3.28182E-4, if (ISABSTRACT_AVG < 0.05, if (AVG_SCORE < 225658.0, -0.00305383, if (MIN_SCORE < 231962.0, 0.0181265, if (MAX_MIN_SCORE < 33119.5, -0.00486977, if (WEEKAVG < 1.785, 0.0229851, 0.00588037)))), -6.67257E-4)) +
+if (MAX_MIN_SCORE < 312575.0, if (TWO_DAY_WF < 0.531754, -0.00112552, if (MAX_SCORE < 669432.0, if (BUSINESS < 0.05, if (PREV_DAY_HITS < 41.5, 0.00191169, 0.0126963), if (DAY_LW_DAY_HITS_RATIO < 37.5, -0.00101754, 0.00846513)), -0.00792694)), -0.00834756) +
+if (PUB_TODAY_AVG < 0.05, if (SPORTS < 0.645, if (AVG_SCORE < 395268.0, if (MAX_SCORE < 460268.0, -0.00227942, 0.00899341), -0.00823655), if (MIN_RANK < 5.0, -0.00122777, 0.0163908)), if (TWO_DAY_WF < 0.95119, -1.8789E-4, 0.0021492)) +
+if (DAY_WEEK_AVG_RATIO < 3.985, -2.26985E-4, if (DAY_WEEK_AVG_RATIO < 5.525, if (DAY_WEEK_AVG_RATIO < 4.95, if (AVG_SCORE < 373867.0, if (ENTERTAINMENT < 0.05, 0.00254281, -0.0107653), if (PREV_DAY_HITS < 3.0, 0.0220568, 0.00220059)), 0.0155791), -0.00294274)) +
+if (EIGHT_HOUR_WF < 0.349537, if (LOCALNEWS < 0.315, 0.00100629, if (TWELVE_HOUR_WF < 0.324561, if (MAX_SCORE < 547636.0, -0.00365503, 0.00844103), if (MAX_SCORE < 249971.0, -0.00217393, 0.020902))), if (ISTITLE_AVG < 0.05, -0.00679711, -2.87657E-6)) +
+if (DAY_WEEK_AVG_DERIV < 60.285, if (DAY_WEEK_AVG_DERIV < -4.07, if (DAY_WEEK_AVG_DERIV < -5.785, 8.89976E-4, -0.016703), if (DAY_WEEK_AVG_RATIO < 0.285, 0.0108868, if (DAY_WEEK_AVG_RATIO < 0.34, -0.0115452, if (DAY_PD_HITS_DERIV < -24.5, 0.00709642, 5.85454E-5)))), 0.00724335) +
+if (WEEKAVG < 31.07, if (DAY_PD_HITS_DERIV < -8.5, if (ONE_DAY_WF < 0.209914, if (TWO_DAY_WF < 0.537088, 0.00770858, -0.00166542), if (FOUR_HOUR_WF < 0.00547982, 0.0185133, 3.03571E-4)), -3.1074E-4), if (MIN_SCORE < 398722.0, -0.0135078, 0.00113129)) +
+if (AVG_RANK < 9.53, if (INTLNEWS < 0.73, if (SUPERDUPER_AVG < 0.61, -4.09752E-4, -0.00974984), 0.0133732), if (SPORTS < 0.05, if (TOPSTORY < 0.315, -0.00110238, 0.0155814), if (AVG_SCORE < 258098.0, 0.0025561, 0.0218633))) +
+if (WEEKAVG < 4.215, 6.4852E-4, if (MAX_MIN_SCORE < 163619.0, if (TWO_DAY_WF < 0.463325, -0.00769416, 5.3643E-4), if (FOUR_HOUR_WF < 0.060024, if (TWELVE_HOUR_WF < 0.0127518, -0.0161574, if (LOCALNEWS < 0.05, 0.00613049, -0.00855688)), -0.0167968))) +
+if (DAY_WEEK_AVG_RATIO < 0.255, 0.00985333, if (DAY_WEEK_AVG_RATIO < 0.335, -0.00951258, if (MAX_MIN_SCORE < 307460.0, if (MAX_SCORE < 517912.0, if (MAX_MIN_SCORE < 61870.2, if (DAY_PD_HITS_RATIO < 2.865, 0.00152167, -0.0017641), -0.00258099), 0.00333836), -0.00739588))) +
+if (WEEKAVG < 12.785, if (AVG_SCORE < 629228.0, 3.70823E-6, 0.0113262), if (INTLNEWS < 0.25, -0.00996717, if (EIGHT_HOUR_WF < 0.205476, if (MAX_MIN_SCORE < 105786.0, if (FOUR_HOUR_WF < 0.00848006, 0.0151832, 8.57643E-4), -0.00763046), -0.015098))) +
+if (SPORTS < 0.47, -2.09978E-4, if (DAY_PD_HITS_RATIO < 9.5, if (MAX_RANK < 9.0, -0.00197712, if (MAX_SCORE < 188360.0, -4.64352E-4, if (MAX_MIN_SCORE < 45863.5, if (DAY_PD_HITS_RATIO < 0.31, 0.029664, 0.00977088), 0.00333723))), -0.012958)) +
+if (EIGHT_HOUR_WF < 0.397041, if (TWELVE_HOUR_WF < 0.327106, 6.40846E-5, if (MAX_MIN_SCORE < 129263.0, 0.00114373, if (SUPERDUPER_AVG < 0.105, if (AVG_SCORE < 390145.0, 0.00590447, 0.0270599), 0.00116164))), if (DAY_LW_DAY_HITS_RATIO < 27.5, -0.00395192, 0.00937395)) +
+if (LW_DAY_HITS < 3.5, if (ENTERTAINMENT < 0.845, if (LIFESTYLE < 0.115, if (AVG_RANK < 4.45, -0.00817106, if (AVG_RANK < 5.225, if (ISABSTRACT_AVG < 0.55, 0.00926281, -0.00505226), 1.81535E-4)), -0.00449952), 0.00814113), -0.00517456) +
+if (TWO_DAY_WF < 0.439697, if (AVG_RANK < 8.7, -0.00429929, if (MAX_MIN_SCORE < 47973.5, -9.03138E-4, 0.0125022)), if (AVG_RANK < 8.635, if (DAY_WEEK_AVG_DERIV < 29.5, 8.00681E-4, 0.0125881), if (ISTITLE_AVG < 0.05, -0.00468934, 3.1406E-4))) +
+if (LW_DAY_HITS < 0.5, if (WEEKAVG < 39.855, 2.77242E-4, -0.0132349), if (MIN_SCORE < 234431.0, if (MIN_SCORE < 225952.0, -0.00160465, 0.01256), if (PREV_DAY_HITS < 19.5, -0.0107505, if (WEEKAVG < 12.575, 0.0119228, -0.00600679)))) +
+if (WEEKAVG < 26.93, if (TOPSTORY < 0.39, -1.40614E-4, if (AVG_RANK < 9.55, if (AVG_RANK < 7.755, 0.0151495, if (TOPSTORY < 0.45, 0.0043054, -0.00734039)), 0.0204375)), if (SUPERDUPER_AVG < 0.55, -0.0146963, -1.2832E-4)) +
+if (TOPSTORY < 0.55, if (WEEKAVG < 0.5, if (MAX_MIN_SCORE < 165073.0, if (MIN_SCORE < 215208.0, 1.70833E-4, -0.0051217), 0.0104792), if (WEEKAVG < 10.93, 7.81356E-4, if (EIGHT_HOUR_WF < 0.00663439, 0.00783355, -0.00393311))), -0.0097211) +
+if (DAY_WEEK_AVG_RATIO < 0.225, 0.0105222, if (SPORTS < 0.73, -6.88094E-4, if (MAX_MIN_RANK < 5.0, -0.00146174, if (PREV_DAY_HITS < 6.5, if (MIN_SCORE < 144075.0, -0.00415946, if (PREV_DAY_HITS < 2.5, 0.010148, 0.0262199)), -0.00602654)))) +
+if (DAY_WEEK_AVG_DERIV < -3.5, if (TWO_DAY_WF < 0.635642, if (DAY_WEEK_AVG_DERIV < -5.93, 0.00406292, -0.0105257), -0.0144987), if (DAY_PD_HITS_RATIO < 43.0, if (DAY_WEEK_AVG_RATIO < 5.55, -6.55387E-5, if (ISTITLE_AVG < 0.05, -0.0189073, 0.00177271)), 0.0120068)) +
+if (DAY_WEEK_AVG_RATIO < 0.235, 0.0112125, if (DAY_WEEK_AVG_RATIO < 0.345, -0.00610693, if (WEEKAVG < 0.93, -0.00104389, if (WEEKAVG < 4.215, if (TOPSTORY < 0.13, 5.13026E-4, 0.00509033), if (MAX_MIN_SCORE < 206736.0, -4.1877E-4, -0.0108199))))) \ No newline at end of file
diff --git a/searchlib/src/test/files/ranking06.expression b/searchlib/src/test/files/ranking06.expression
new file mode 100644
index 00000000000..017f5ed49b6
--- /dev/null
+++ b/searchlib/src/test/files/ranking06.expression
@@ -0,0 +1,85 @@
+if (MAX_SCORE < 364352.0, if (NUM_WORDS < 1.5, 0.106529, if (WEEKAVG < 0.665, 0.113339, 0.129744)), if (WEEKAVG < 0.35, 0.125401, 0.148456)) +
+if (MAX_SCORE < 386454.0, if (NUM_WORDS < 2.5, if (MAX_SCORE < 266558.0, -0.00435683, 0.00232626), 0.00527105), if (DAY_LW_DAY_HITS_RATIO < 3.75, 0.0125759, 0.0415964)) +
+if (MAX_SCORE < 285564.0, if (NUM_WORDS < 3.5, -0.00312935, 0.0139702), if (DAY_LW_DAY_HITS_RATIO < 4.645, 0.00510366, if (ISABSTRACT_AVG < 0.225, 0.0376987, 0.00704226))) +
+if (MAX_SCORE < 354060.0, if (NUM_WORDS < 1.5, -0.00557684, if (DAY_LW_DAY_HITS_RATIO < 7.25, -4.11611E-4, 0.0176971)), if (ISTITLE_AVG < 0.845, 0.0209172, 0.00437892)) +
+if (MAX_SCORE < 357048.0, if (NUM_WORDS < 3.5, if (DAY_LW_DAY_HITS_RATIO < 6.75, -0.00214246, 0.00909381), 0.0132498), if (DAY_WEEK_AVG_DERIV < 2.785, 0.00781954, 0.0325808)) +
+if (MAX_SCORE < 391984.0, if (NUM_WORDS < 1.5, -0.00479641, if (WEEKAVG < 0.805, 3.14606E-4, 0.0174789)), if (DAY_WEEK_AVG_DERIV < 2.5, 0.0100076, 0.0303617)) +
+if (NUM_WORDS < 2.5, if (MAX_SCORE < 273725.0, -0.0031024, if (ISTITLE_AVG < 0.955, 0.0093897, -0.00177918)), if (WEEKAVG < 0.35, 0.00506228, 0.0238941)) +
+if (NUM_WORDS < 3.5, if (DAY_LW_DAY_HITS_RATIO < 7.835, if (NUM_WORDS < 2.5, -0.00222361, 0.00275911), if (ISABSTRACT_AVG < 0.185, 0.0286851, 0.0025611)), 0.0150946) +
+if (MAX_SCORE < 463634.0, if (DAY_LW_DAY_HITS_RATIO < 14.36, if (NUM_WORDS < 2.5, -0.00168161, 0.00306928), if (ISABSTRACT_AVG < 0.05, 0.03626, 0.00702238)), 0.018646) +
+if (MAX_SCORE < 291384.0, -0.00120841, if (ISTITLE_AVG < 0.845, if (WEEKAVG < 0.325, 0.0070091, if (DAY_WEEK_AVG_DERIV < 27.5, 0.021833, 0.0670236)), -2.39127E-4)) +
+if (MAX_SCORE < 392716.0, if (WEEKAVG < 0.915, if (NUM_WORDS < 1.5, -0.00408665, 1.2681E-5), if (ISABSTRACT_AVG < 0.05, 0.0315009, 0.00309315)), 0.0101865) +
+if (NUM_WORDS < 3.5, if (DAY_LW_DAY_HITS_RATIO < 4.9, if (NUM_WORDS < 1.5, -0.00378383, -1.55068E-4), if (ISTITLE_AVG < 0.915, 0.0164901, 3.5458E-4)), 0.0111533) +
+if (NUM_WORDS < 2.5, if (DAY_LW_DAY_HITS_RATIO < 12.165, -0.00137589, if (ISTITLE_AVG < 0.73, 0.0299723, 0.00442332)), if (DAY_HITS < 2.125, 0.00279729, 0.0157199)) +
+if (MAX_SCORE < 391997.0, if (NUM_WORDS < 1.5, -0.00289017, if (PREV_DAY_HITS < 6.33333, 1.53177E-4, 0.0114408)), if (PREV_DAY_HITS < 9.5, 0.00746655, 0.040233)) +
+if (NUM_WORDS < 3.5, if (DAY_LW_DAY_HITS_RATIO < 4.9, -0.00103084, if (ISTITLE_AVG < 0.915, if (DAY_HITS_FRAC < 0.645, 0.0230528, 0.00568694), 6.15028E-4)), 0.00901386) +
+if (MAX_SCORE < 291385.0, -9.44169E-4, if (ISTITLE_AVG < 0.72, if (DAY_WEEK_AVG_DERIV < 28.855, if (NATIONALNEWS < 0.355, 0.00617921, 0.0271174), 0.0534392), -9.21153E-4)) +
+if (NUM_WORDS < 3.5, if (PREV_DAY_HITS < 6.16667, -6.82897E-4, if (MIN_SCORE < 254342.0, 0.00193942, if (NATIONALNEWS < 0.21, 0.0131534, 0.0605109))), 0.00697463) +
+if (NUM_WORDS < 2.5, -9.31934E-4, if (DAY_WEEK_AVG_RATIO < 3.17, if (ISTITLE_AVG < 0.685, if (NATIONALNEWS < 0.225, 0.00281994, 0.0214747), -0.00300096), 0.0138056)) +
+if (NUM_WORDS < 3.5, if (DAY_LW_DAY_HITS_RATIO < 4.9, -8.81061E-4, if (AVG_SCORE < 268471.0, -8.77741E-5, if (ISABSTRACT_AVG < 0.105, 0.0164307, 9.85136E-4))), 0.00654057) +
+if (TOPSTORY < 0.03, if (ISTITLE_AVG < 0.62, if (AVG_SCORE < 268819.0, -3.15955E-4, 0.00518778), -0.00177677), if (ISTITLE_AVG < 0.72, 0.0116413, 0.00139452)) +
+if (MAX_SCORE < 472738.0, if (ISTITLE_AVG < 0.63, if (DAY_WEEK_AVG_DERIV < 1.825, 4.94339E-4, if (MIN_SCORE < 250779.0, -1.72329E-5, 0.012004)), -0.00134588), 0.0100001) +
+if (NUM_WORDS < 3.5, if (DAY_LW_DAY_HITS_RATIO < 24.9, if (PREV_DAY_HITS < 14.8333, -5.01675E-4, 0.00773172), if (LOCALNEWS < 0.11, 0.00984187, 0.0382478)), 0.00673426) +
+if (PREV_DAY_HITS < 4.35, if (NUM_WORDS < 1.5, -0.00227953, 8.50381E-5), if (ISTITLE_AVG < 0.905, if (DAY_LW_DAY_HITS_RATIO < 1.6, -0.00121777, 0.0155982), 0.00116876)) +
+if (DAY_LW_DAY_HITS_RATIO < 4.73, -2.72614E-4, if (NATIONALNEWS < 0.58, if (DAY_PD_HITS_RATIO < 0.63, if (DAY_HITS_FRAC < 0.265, 0.0099063, 0.0510568), 0.00250323), 0.0470183)) +
+if (DAY_LW_DAY_HITS_RATIO < 5.915, -3.36118E-4, if (DAY_PD_HITS_DERIV < -11.5, if (DAY_WEEK_AVG_RATIO < 1.735, 9.24298E-4, if (DAY_HITS_FRAC < 0.165, 0.0161363, 0.0802279)), 0.00327439)) +
+if (NUM_WORDS < 2.5, -6.56912E-4, if (BUSINESS < 0.275, if (MAX_MIN_SCORE < 50.25, 0.00224658, if (ISABSTRACT_AVG < 0.415, 0.013094, -0.0054932)), -0.00808819)) +
+if (NATIONALNEWS < 0.135, -1.40405E-4, if (AVG_SCORE < 263507.0, -1.19297E-4, if (ISTITLE_AVG < 0.73, if (ENTERTAINMENT < 0.05, 0.0220643, -0.00416695), 0.00371154))) +
+if (DAY_WEEK_AVG_RATIO < 14.28, if (NUM_WORDS < 4.5, if (MIN_SCORE < 245866.0, -9.84768E-4, if (ISTITLE_AVG < 0.72, 0.00341093, -9.73418E-4)), 0.0106439), 0.0360619) +
+if (MIN_SCORE < 472207.0, -2.00627E-4, if (WEEKAVG < 0.325, 0.00409488, if (AVG_SCORE < 531893.0, 0.0518209, if (MAX_SCORE < 602809.0, -0.0080393, 0.0383655)))) +
+if (ISTITLE_AVG < 0.72, if (AVG_SCORE < 268824.0, -1.31907E-4, if (MAX_MIN_SCORE < 7909.75, 0.00173958, if (NATIONALNEWS < 0.39, 0.010916, 0.0343348))), -0.00113192) +
+if (NUM_WORDS < 2.5, -6.26265E-4, if (BUSINESS < 0.115, if (MAX_MIN_SCORE < 15489.8, 0.00192349, if (MAX_MIN_SCORE < 35950.8, 0.0188263, 0.00372838)), -0.00528885)) +
+if (MAX_SCORE < 468155.0, -1.13066E-4, if (ENTERTAINMENT < 0.235, if (TOPSTORY < 0.22, -3.22423E-4, 0.0188811), if (AVG_RANK < 8.365, 0.00856273, 0.064677))) +
+if (DAY_LW_DAY_HITS_RATIO < 38.5, if (AVG_SCORE < 259970.0, -6.11764E-4, if (INTLNEWS < 0.045, 1.28558E-4, if (LOCALNEWS < 0.28, 0.00350635, 0.0165708))), 0.018775) +
+if (DAY_LW_DAY_HITS_RATIO < 28.5, if (ISTITLE_AVG < 0.585, if (MAX_RANK < 9.0, 1.72066E-4, if (AVG_SCORE < 269329.0, -1.02726E-4, 0.00688386)), -0.00101067), 0.0149278) +
+if (DAY_LW_DAY_HITS_DERIV < 14.5, -1.72386E-4, if (DAY_PD_HITS_DERIV < -11.5, if (DAY_WEEK_AVG_RATIO < 1.82, 0.00314713, 0.044771), if (LOCALNEWS < 0.115, 0.00112935, 0.0165557))) +
+if (DAY_PD_HITS_RATIO < 33.75, if (AVG_SCORE < 597646.0, if (DAY_PD_HITS_DERIV < -4.9, if (DAY_PREV_DAY_HITS_FRAC < 0.845, -0.00115559, 0.0101514), -1.88064E-4), 0.0159463), 0.024791) +
+if (MIN_SCORE < 481598.0, -4.83069E-5, if (MIN_SCORE < 512815.0, if (MAX_SCORE < 507654.0, 0.00563943, 0.0345982), if (MAX_SCORE < 584112.0, -0.00935941, 0.0104819))) +
+if (TOPSTORY < 0.105, -3.22897E-4, if (PREV_DAY_HITS < 1.45833, -6.16703E-4, if (MIN_SCORE < 253414.0, 0.00194629, if (MIN_SCORE < 255748.0, 0.0480784, 0.00955667)))) +
+if (NUM_WORDS < 3.5, if (NATIONALNEWS < 0.39, -2.85208E-4, if (PREV_DAY_HITS < 6.83333, 0.00277459, 0.0334432)), if (BUSINESS < 0.77, 0.00558387, -0.0192348)) +
+if (BUSINESS < 0.19, if (INTLNEWS < 0.095, 1.07539E-4, if (TOPSTORY < 0.03, 0.00158516, if (NUM_WORDS < 3.5, 0.00653366, 0.0388007))), -0.00186321) +
+if (DAY_LW_DAY_HITS_RATIO < 3.635, -3.80753E-4, if (LOCALNEWS < 0.185, 5.67701E-4, if (ISTITLE_AVG < 0.585, if (MAX_RANK < 9.0, -0.00212514, 0.0247626), 0.00163409))) +
+if (DAY_WEEK_AVG_RATIO < 14.28, if (TOPSTORY < 0.115, -3.69615E-4, if (DAY_LW_DAY_HITS_RATIO < 4.875, -4.56219E-4, if (DAY_PD_HITS_RATIO < 2.275, 0.0169104, 0.0021639))), 0.0238394) +
+if (NUM_WORDS < 1.5, -0.00139356, if (BUSINESS < 0.13, if (BUSINESS < 0.05, 0.00103638, if (DAY_LW_DAY_HITS_RATIO < 9.5, 0.0441388, -0.00313189)), -0.00195899)) +
+if (NATIONALNEWS < 0.13, -2.4886E-5, if (HEALTH < 0.105, if (WEEKAVG < 0.93, 0.00135398, 0.0138431), if (MIN_RANK < 3.0, 0.0513615, -0.00585742))) +
+if (NATIONALNEWS < 0.225, -8.48873E-5, if (MIN_SCORE < 259062.0, -3.6897E-4, if (ISTITLE_AVG < 0.71, if (ISTITLE_AVG < 0.45, 0.0126383, 0.0410443), 0.00420061))) +
+if (DAY_HITS < 15.25, -3.22532E-4, if (LOCALNEWS < 0.13, 0.00109495, if (WEEKAVG < 6.715, if (MAX_MIN_SCORE < 42695.8, 0.0489508, 0.00942793), -0.00595868))) +
+if (DAY_LW_DAY_HITS_RATIO < 38.5, if (NUM_WORDS < 2.5, -2.53621E-4, 0.00124598), if (DAY_WEEK_AVG_DERIV < 49.715, if (ISTITLE_AVG < 0.74, 0.0495711, 0.00323737), -0.00771975)) +
+if (NUM_WORDS < 1.5, -0.001351, if (DAY_WEEK_AVG_RATIO < 13.99, if (BUSINESS < 0.105, if (DAY_PD_HITS_DERIV < -10.5, 0.0106695, 5.27774E-4), -0.00157676), 0.0239454)) +
+if (DAY_LW_DAY_HITS_RATIO < 24.9, -6.42273E-5, if (MIN_SCORE < 247776.0, if (TOPSTORY < 0.05, -0.0160384, 0.00605178), if (DAY_PREV_DAY_HITS_FRAC < 0.715, -0.00225007, 0.0290955))) +
+if (PREV_DAY_HITS < 1.46429, -3.57361E-4, if (AVG_SCORE < 242369.0, -8.84977E-4, if (WEEKAVG < 5.975, if (DAY_HITS_FRAC < 0.135, 0.00160961, 0.00805956), -0.0114471))) +
+if (PREV_DAY_HITS < 26.5, if (ISABSTRACT_AVG < 0.155, 1.96561E-4, -0.00121068), if (DAY_HITS_FRAC < 0.265, if (DAY_WEEK_AVG_RATIO < 2.12, -0.00523257, 0.0187339), 0.0345852)) +
+if (NUM_WORDS < 4.5, if (MAX_MIN_SCORE < 55839.5, -1.41945E-4, -0.00394864), if (DAY_WEEK_AVG_RATIO < 3.505, if (AVG_RANK < 8.395, 0.0095287, -0.0143254), 0.0273452)) +
+if (PREV_DAY_HITS < 16.5, 1.80791E-4, if (AVG_SCORE < 312154.0, if (DAY_HITS_FRAC < 0.325, if (MIN_SCORE < 254301.0, -0.00274788, 0.0174896), -0.0248862), -0.023664)) +
+if (MIN_SCORE < 245164.0, -5.61547E-4, if (DAY_PREV_DAY_HITS_FRAC < 0.395, -0.00410237, if (MAX_RANK < 9.0, -7.35691E-5, if (MIN_SCORE < 560324.0, 0.00230962, 0.0217268)))) +
+if (DAY_WEEK_AVG_RATIO < 0.925, if (MAX_SCORE < 405533.0, -5.83987E-4, -0.00549206), if (MIN_SCORE < 479344.0, 3.03644E-4, if (MIN_SCORE < 489462.0, 0.038018, 0.00297502))) +
+if (MIN_SCORE < 475038.0, if (MAX_SCORE < 498633.0, -4.41489E-5, -0.0267606), if (ISABSTRACT_AVG < 0.1, if (ENTERTAINMENT < 0.31, 0.00455624, 0.0204099), -0.00647491)) +
+if (DAY_PD_HITS_RATIO < 0.115, -0.00668954, if (DAY_PD_HITS_DERIV < -11.5, if (DAY_PREV_DAY_HITS_FRAC < 0.975, if (DAY_PD_HITS_RATIO < 0.515, -0.00146208, 0.0238106), 0.0377246), -9.33641E-5)) +
+if (DAY_WEEK_AVG_RATIO < 10.84, if (DAY_WEEK_AVG_RATIO < 6.845, 1.28893E-4, if (MIN_SCORE < 367047.0, -0.00823593, 0.0102315)), if (AVG_SCORE < 279315.0, -0.00477584, 0.0265572)) +
+if (PREV_DAY_HITS < 9.75, -2.66304E-4, if (DAY_PREV_DAY_HITS_FRAC < 0.925, -2.69406E-4, if (AVG_RANK < 8.45, if (INTLNEWS < 0.295, 0.0146136, -0.0235187), 0.0313855))) +
+if (MIN_SCORE < 483511.0, -1.81558E-4, if (MIN_SCORE < 498030.0, if (DAY_WEEK_AVG_RATIO < 1.68, 0.00293744, 0.0371557), if (SUPERDUPER_AVG < 0.53, 0.00413503, -0.0112815))) +
+if (DAY_PD_HITS_RATIO < 26.5, if (DAY_PD_HITS_DERIV < -11.5, if (DAY_WEEK_AVG_RATIO < 1.735, 5.1364E-4, 0.0186441), -7.75501E-5), if (WEEKAVG < 5.57, 0.0278366, -0.00263107)) +
+if (NUM_WORDS < 1.5, -8.926E-4, if (DAY_WEEK_AVG_DERIV < 47.86, if (DAY_LW_DAY_HITS_RATIO < 38.5, if (ISABSTRACT_AVG < 0.235, 9.72798E-4, -0.00127979), 0.0250611), -0.0239326)) +
+if (MAX_SCORE < 407652.0, if (MAX_SCORE < 395501.0, 1.49872E-5, if (DAY_PD_HITS_RATIO < 0.285, 0.0434173, if (BUSINESS < 0.05, 0.0102549, -0.010691))), -0.00277705) +
+if (DAY_WEEK_AVG_RATIO < 6.355, 1.3418E-4, if (MIN_SCORE < 405020.0, if (AVG_SCORE < 356693.0, if (MAX_SCORE < 327611.0, -0.00546237, 0.0146496), -0.0196891), 0.0208141)) +
+if (AVG_SCORE < 526352.0, if (MAX_SCORE < 521635.0, -6.26311E-5, 0.0210967), if (MAX_SCORE < 550983.0, -0.0232122, if (AVG_RANK < 5.5, -0.0243343, -0.00151995))) +
+if (DAY_PD_HITS_RATIO < 26.5, if (DAY_PD_HITS_RATIO < 20.625, -5.89198E-5, -0.0216644), if (MAX_SCORE < 200640.0, -0.011139, if (AVG_RANK < 8.55, 0.0390014, 0.00966164))) +
+if (NATIONALNEWS < 0.27, -1.74062E-4, if (HEALTH < 0.05, if (AVG_SCORE < 342310.0, 8.35476E-4, if (MIN_SCORE < 347780.0, 0.0334442, 0.00624751)), 0.025545)) +
+if (ISTITLE_AVG < 0.72, if (MAX_MIN_SCORE < 43995.2, 3.96726E-4, if (DAY_PREV_DAY_HITS_FRAC < 0.935, 0.00123996, if (MAX_MIN_SCORE < 56002.2, 0.0235285, -0.00154573))), -4.78464E-4) +
+if (BUSINESS < 0.21, if (DAY_LW_DAY_HITS_DERIV < 14.5, 2.41495E-5, if (NATIONALNEWS < 0.225, if (LOCALNEWS < 0.035, -0.00142155, 0.0129645), 0.0297085)), -0.0014897) +
+if (SPORTS < 0.585, -3.04907E-4, if (MAX_SCORE < 285618.0, 2.21636E-4, if (ISTITLE_AVG < 0.7, if (MIN_SCORE < 269093.0, 0.0417159, 0.00987586), 0.00129559))) +
+if (DAY_PD_HITS_RATIO < 12.28, -5.73419E-5, if (LOCALNEWS < 0.03, -0.00224701, if (WEEKAVG < 5.57, if (AVG_RANK < 8.1, 0.0150017, 0.0490061), 8.99967E-4))) +
+if (NATIONALNEWS < 0.28, -2.02096E-4, if (MIN_SCORE < 259050.0, -4.61524E-4, if (PREV_DAY_HITS < 5.5, if (ISTITLE_AVG < 0.085, 0.0107478, 6.58206E-4), 0.039025))) +
+if (DAY_WEEK_AVG_RATIO < 0.885, -8.98287E-4, if (MIN_SCORE < 482615.0, 1.37426E-4, if (AVG_SCORE < 506793.0, if (AVG_SCORE < 493340.0, 0.011503, 0.0451903), 1.36945E-5))) +
+if (DAY_PD_HITS_DERIV < -4.5, if (HEALTH < 0.13, if (BUSINESS < 0.96, 0.00144328, if (MAX_SCORE < 239157.0, -4.31323E-5, 0.0302083)), 0.0263586), 3.93517E-5) +
+if (DAY_LW_DAY_HITS_DERIV < 1.91, -3.30312E-4, if (MIN_SCORE < 254252.0, -2.87448E-4, if (WEEKAVG < 4.5, if (WEEKAVG < 0.93, 0.00341942, 0.0180965), -0.0144877))) +
+if (NUM_WORDS < 4.5, if (MAX_MIN_SCORE < 56141.5, 3.55635E-5, if (ISTITLE_AVG < 0.69, -0.0120653, -0.00193295)), if (DAY_HITS_FRAC < 0.585, 0.0109657, -0.00562292)) +
+if (MAX_SCORE < 597411.0, if (AVG_SCORE < 525986.0, if (AVG_SCORE < 504944.0, 6.12611E-5, if (AVG_SCORE < 512650.0, 0.0310299, 6.64858E-4)), -0.010433), 0.00965011) +
+if (DAY_PD_HITS_RATIO < 5.945, if (MAX_SCORE < 629654.0, 2.34339E-4, -0.01439), if (MAX_MIN_SCORE < 63226.5, if (LOCALNEWS < 0.28, -0.00423293, 0.00606695), 0.0188983)) +
+if (MIN_SCORE < 670535.0, if (DAY_PD_HITS_RATIO < 5.845, if (DAY_LW_DAY_HITS_RATIO < 5.47, -8.49912E-5, if (TOPSTORY < 0.105, -3.4055E-4, 0.0101604)), -0.00330677), 0.0174593) +
+if (DAY_PD_HITS_RATIO < 33.75, if (WEEKAVG < 4.46, 3.99921E-5, if (DAY_PREV_DAY_HITS_FRAC < 0.945, if (DAY_LW_DAY_HITS_RATIO < 48.5, -0.00420023, 0.0191669), -0.0241434)), 0.0157146) +
+if (DAY_WEEK_AVG_RATIO < 6.3, 2.4645E-4, if (DAY_WEEK_AVG_DERIV < 13.785, if (MIN_SCORE < 397526.0, -0.00908083, 0.00977666), if (AVG_SCORE < 289007.0, -0.00132101, 0.0196639))) +
+if (MIN_SCORE < 672810.0, if (MIN_SCORE < 631089.0, if (MAX_SCORE < 611207.0, if (MIN_SCORE < 512782.0, -3.64401E-5, -0.00741622), 0.0190309), -0.0227335), 0.0167703) +
+if (MAX_SCORE < 439769.0, 4.38016E-5, if (TOPSTORY < 0.22, -0.00593521, if (SUPERDUPER_AVG < 0.45, if (DAY_PD_HITS_DERIV < -1.5, 0.0429213, 0.0049244), -0.0152763))) +
+if (REGIONALNEWS < 0.105, if (POLITICS < 0.29, -8.83284E-5, -0.00708574), if (MAX_SCORE < 291999.0, 3.87947E-4, if (DAY_WEEK_AVG_DERIV < 2.145, -0.00777391, -0.0310452))) +
+if (MAX_MIN_SCORE < 61554.2, -2.35487E-5, if (NUM_WORDS < 2.5, if (MAX_MIN_SCORE < 88657.5, -0.00672369, if (DAY_PREV_DAY_HITS_FRAC < 0.295, -0.0026578, 0.0151957)), -0.0135855)) \ No newline at end of file
diff --git a/searchlib/src/test/files/ranking07.expression b/searchlib/src/test/files/ranking07.expression
new file mode 100644
index 00000000000..97b6528aa33
--- /dev/null
+++ b/searchlib/src/test/files/ranking07.expression
@@ -0,0 +1,200 @@
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300077,100200034,100200186,100400141,100300165,100200052,100300005,100200172,100300008,100200068,100300027,100300116,100300121,100200053,100300019,100400142,100200054,100300073,100200192,100300212,100300209,100400079,100200170,100300169,100400080,100200176,100300200,100200028,100300076,100200232], if (attribute(catid) in [100200186,100200068,100300121,100300019,100200176,100300200,100200028,100300076], if (attribute(catid) in [100200068,100300019,100200176,100300200], -0.0249999798, 0.0022099815), if (attribute(catid) in [0,100300011,100300014,100300077,100200034,100400141,100300165,100300005,100200172,100300008,100300027,100200053,100200192,100300209,100400079,100200170,100300169,100400080], if (attribute(catid) in [100300011,100300165,100300005,100300027,100200192,100300209,100400079,100400080], 0.013160154, if (attribute(catid) in [100300014,100200034,100400141,100200172,100300008,100200053,100200170,100300169], 0.0191030525, 0.021725414)), if (attribute(catid) in [100200130,100400142,100200054,100300073], 0.0270836867, 0.0305748922))), if (attribute(catid) in [100300058,100300166,100300102,100400037,100400038,100300065,100300127,100200087,100300066,100300006], 0.0410066553, if (attribute(catid) in [100300093,100200234,100300126,100200193,100300122,100300074], 0.0557829172, 0.0704327304))) +
+if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300077,100300143,100200034,100200186,100400141,100200052,100300102,100300005,100200172,100300008,100200068,100300027,100300116,100300121,100200234,100300019,100300073,100400038,100200192,100300065,100300209,100300127,100400079,100200170,100300169,100400080,100200087,100200176,100300200,100300076,100200055,100200232,100300214], if (attribute(catid) in [100300011,100300014,100300058,100300005,100200068,100300019,100300209,100400079,100200170,100400080,100200176,100300076,100200055,100200232,100300214], if (attribute(catid) in [100200068,100300019,100200055,100200232,100300214], -0.03599083, -0.0027644159), if (attribute(catid) in [100200171,100200034,100200186,100300008,100300116,100300073,100400038,100200192,100300127,100300169,100200087,100300200], if (attribute(catid) in [100200171,100200034,100200186,100300008,100300116,100200192], 0.0113307, 0.0164266261), if (attribute(catid) in [0,100400141,100200052,100300102,100300027,100300121,100300065], 0.021255028, 0.0272380704))), if (attribute(catid) in [100200130,100300013,100300166,100300004,100200054,100200193,100300212,100300074,100300066], if (attribute(catid) in [100200130,100300166], 0.0328865429, 0.0399735491), if (attribute(catid) in [100300165,100300093,100400142,100300122,100300006,100300146], 0.0477513417, 0.0587510469))) +
+if (attribute(catid) in [0,100200171,100300014,100300058,100300077,100200034,100200186,100400141,100300165,100300005,100200172,100300008,100200068,100300027,100300121,100200053,100300019,100300004,100300073,100400038,100200192,100300065,100300212,100400079,100200170,100300169,100400080,100200087,100300200,100300076,100300006,100200232,100300146], if (attribute(catid) in [100300014,100300058,100200034,100200186,100300008,100200068,100300019,100300212,100200232], if (attribute(catid) in [100300008,100200068,100300019,100200232], -0.0260716807, -8.004775E-4), if (attribute(catid) in [100300165,100300005,100300073,100400079,100200170,100400080,100200087,100300146], 0.0126841581, if (attribute(catid) in [0,100200171,100300077,100200172,100300065,100300006], 0.022881461, 0.0298499891))), if (attribute(catid) in [100300011,100200130,100300013,100300166,100200052,100300102,100300116,100200234,100400142,100200054,100300209,100300127,100300074,100300066,100200176,100200028], if (attribute(catid) in [100200130,100200052,100300102,100300116,100200234,100200054,100300209], 0.0393021257, 0.0475085975), if (attribute(catid) in [100400037,100300122,100200067], 0.0575085503, 0.0751742626))) +
+if (attribute(catid) in [0,100300011,100300014,100300077,100200186,100400141,100300165,100300005,100300008,100200068,100300032,100300027,100300121,100300019,100300126,100300073,100200192,100300065,100300212,100300209,100400079,100200170,100300169,100400080,100200087,100300074,100200176,100300200,100200028,100300076,100200067,100200055,100300006], if (attribute(catid) in [100300005,100300008,100200068,100300032,100300209,100400080,100200028,100200067,100200055], if (attribute(catid) in [100300005,100300008,100300032,100300209,100200067,100200055], -0.0365460976, -0.0109180769), if (attribute(catid) in [100300014,100300073,100200192,100300212,100400079,100300074,100200176], 0.0093762436, if (attribute(catid) in [100200186,100300165,100300126,100200170,100300169,100300200,100300076], 0.0193739138, if (attribute(catid) in [0,100300077,100200087], 0.0231180054, 0.0274056462)))), if (attribute(catid) in [100200171,100200130,100300058,100200034,100200052,100200172,100300116,100200053,100400142,100200054,100300066], if (attribute(catid) in [100200171,100200130,100300058,100300116,100200054,100300066], 0.0339904435, 0.0402629873), if (attribute(catid) in [100300013,100300166,100300102,100200234,100300004,100400038,100300122,100300127,100200185], 0.0471640537, 0.0679501752))) +
+if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300013,100200034,100200186,100400141,100300165,100300005,100200172,100300008,100200068,100300032,100300027,100300116,100300121,100300019,100300004,100300073,100200192,100300065,100300212,100300209,100400079,100200170,100300169,100400080,100200087,100300074,100200176,100300200,100200028,100300076,100200067,100300146], if (attribute(catid) in [100300014,100200034,100200186,100400141,100300005,100300008,100200068,100300032,100300121,100300019,100300004,100200192,100300212,100300209,100400079,100200170,100400080,100200176,100300200,100200028,100300076,100200067,100300146], if (attribute(catid) in [100300014,100200034,100200068,100300032,100300019,100300212,100300209,100200170,100300200,100200028,100200067], if (attribute(catid) in [100300032,100300019,100300212,100300209,100300200,100200028,100200067], -0.0252149649, 5.982331E-4), 0.0109551118), if (attribute(catid) in [0,100200171,100200172,100300027,100300073,100300065,100300169,100200087,100300074], if (attribute(catid) in [100200171,100300073,100300169,100200087,100300074], 0.0192764204, 0.023932401), 0.0295724103)), if (attribute(catid) in [100300011,100300077,100300166,100200052,100200234,100200053,100400142,100400038,100300122,100300127,100300066], if (attribute(catid) in [100300011,100300077,100300166,100200052,100200234,100400038], 0.0362646736, 0.045898507), if (attribute(catid) in [100300143,100300093,100300102,100300126,100200193,100300006], 0.0576959337, 0.0940124464))) +
+if (attribute(catid) in [0,100200171,100300014,100300013,100300077,100200034,100200186,100300165,100200052,100300102,100300005,100200172,100300008,100200068,100300032,100300027,100300073,100400038,100200192,100300065,100300212,100300209,100400079,100200170,100300169,100400080,100300074,100300066,100200176,100300200,100300076,100200067,100300006,100300214,100300146], if (attribute(catid) in [100300014,100300102,100300005,100300008,100200068,100300032,100300027,100300212,100400079,100300076,100300214,100300146], if (attribute(catid) in [100300008,100200068,100300032,100300212,100300214], if (attribute(catid) in [100300008,100300032,100300214], -0.0524432898, -0.0132279367), 0.003480139), if (attribute(catid) in [100200171,100300013,100200034,100200186,100200052,100300073,100400038,100200192,100300169,100400080,100300074,100300066,100300200,100200067,100300006], if (attribute(catid) in [100300013,100200186,100200052,100300073,100400038,100300169,100400080,100300074,100200067,100300006], 0.017975983, 0.0212068067), if (attribute(catid) in [100300077,100300165,100300209,100200170,100200176], 0.0229665861, 0.0258231076))), if (attribute(catid) in [100300011,100200130,100300058,100300166,100300143,100400141,100300093,100300116,100300121,100200053,100300004,100400142,100200054,100300122,100300127,100200087,100200232], if (attribute(catid) in [100300011,100200130,100300058,100300166,100300143,100300093,100300116,100200053,100200054,100300122,100200087], 0.0353581654, 0.0430524781), if (attribute(catid) in [100200234,100300019,100400037,100200028], 0.0542526213, 0.0961212144))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300077,100200034,100200186,100400141,100200052,100300102,100300005,100300008,100200068,100300032,100300027,100300116,100300121,100200053,100300019,100200054,100300073,100200192,100300209,100400079,100200170,100300169,100400080,100200087,100300200,100200028,100300076,100200067,100300006,100200232,100300214,100300146], if (attribute(catid) in [100200171,100300011,100300014,100300102,100300005,100300008,100200068,100300032,100300027,100200192,100300209,100400079,100400080,100200087,100300076,100200067,100300006,100300214,100300146], if (attribute(catid) in [100300008,100200068,100300032,100300209,100300214], -0.0256804569, 0.0046816048), if (attribute(catid) in [100300058,100200186,100400141,100300121,100300019,100200170,100300169,100300200], 0.0160713107, if (attribute(catid) in [0,100200034,100200052,100200232], 0.022153881, 0.0250017744))), if (attribute(catid) in [100300166,100300143,100300165,100300093,100200172,100200234,100300004,100300126,100400142,100400038,100300065,100300127,100300074,100300066,100200185], if (attribute(catid) in [100300143,100300165,100300093,100200172,100200234,100300126,100400142,100400038,100300065,100300066,100200185], if (attribute(catid) in [100300093,100200234,100400038,100300065,100300066], 0.0315719603, 0.0353792385), 0.0430233685), if (attribute(catid) in [100200193,100300122], 0.0518243263, 0.0744220771))) +
+if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300077,100300166,100200034,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100300008,100200068,100300032,100300027,100300116,100300121,100300019,100300073,100400037,100200192,100300065,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100200176,100300200,100300076,100200067,100300006,100300214,100300146], if (attribute(catid) in [100300011,100300014,100200034,100200186,100400141,100300102,100300005,100300008,100200068,100300032,100300027,100300116,100200192,100400079,100400080,100200176,100300200,100300214,100300146], if (attribute(catid) in [100300011,100300102,100300008,100200068,100300032,100300200,100300214], -0.0241441823, if (attribute(catid) in [100300014,100400141,100300005,100200192,100400080,100200176], 0.0020142953, 0.0081257199)), if (attribute(catid) in [100300058,100300077,100200052,100300093,100300073,100400037,100300065,100200170,100200087,100200067], 0.014123946, if (attribute(catid) in [0,100200171,100300165,100300121,100300019,100300169], if (attribute(catid) in [0], 0.0217711535, 0.023534876), 0.0294010162))), if (attribute(catid) in [100200130,100300143,100200172,100200234,100200053,100400142,100400038,100300212,100300209,100300066], if (attribute(catid) in [100200130,100200172,100200053,100400142,100400038,100300212,100300209], 0.03583431, 0.0447717702), if (attribute(catid) in [100300004,100300126,100300074,100300007,100300045,100200028,100200185,100200232], 0.0622909986, 0.0942393297))) +
+if (attribute(catid) in [100300014,100200034,100300102,100300005,100200068,100300019,100200054,100300209,100400079,100200170,100400080,100300200,100200028,100300076,100200067,100200185,100200232,100300214,100300146], if (attribute(catid) in [100300102,100300005,100200068,100300209,100200028,100200067,100200232,100300214], -0.0222756779, -0.0032979771), if (attribute(catid) in [0,100200171,100200130,100300058,100300077,100200186,100400141,100300165,100200052,100300093,100200172,100300008,100300032,100300027,100300116,100300121,100200053,100400142,100300073,100200192,100300065,100300212,100300122,100300127,100300169,100200087,100300074,100300006], if (attribute(catid) in [100300058,100300077,100200186,100400141,100200052,100300008,100300032,100300073,100200192,100300212,100300169,100200087], if (attribute(catid) in [100300077,100200186,100400141,100300032,100200192,100300212], 0.011447905, 0.0165377861), if (attribute(catid) in [100200171,100300165,100300093,100200172,100300121], 0.0209845722, if (attribute(catid) in [0,100200130], 0.0242667474, 0.0268049425))), if (attribute(catid) in [100300011,100300166,100200234,100300004,100400037,100400038,100300066,100200176], 0.0447283469, 0.0603545392))) +
+if (attribute(catid) in [0,100200171,100300014,100300058,100300077,100200034,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100300008,100200068,100300032,100300027,100300121,100200053,100200054,100300073,100400038,100200192,100300065,100300212,100300209,100400079,100200170,100300169,100400080,100200087,100300074,100300200,100300076,100200067,100200055,100200232,100300214], if (attribute(catid) in [100300005,100300008,100200068,100300032,100300121,100300212,100400079,100400080,100300200,100300076,100200055,100300214], if (attribute(catid) in [100200068,100300076,100200055,100300214], -0.0265329011, -0.003851894), if (attribute(catid) in [100200171,100300014,100400141,100300027,100200054,100300073,100200192,100200087,100300074,100200067], 0.0107802387, if (attribute(catid) in [0,100300058,100300077,100200186,100300093,100200053,100200170,100300169,100200232], 0.0212053257, 0.0251822224))), if (attribute(catid) in [100300011,100200130,100300013,100300166,100300143,100300102,100300116,100200234,100300004,100400142,100200193,100300122,100300127,100300066,100200176,100200028,100300006], if (attribute(catid) in [100200130,100300116,100200234,100300006], 0.031445719, if (attribute(catid) in [100300166,100400142,100300122,100300127,100300066,100200028], 0.0391757711, 0.0454843261)), 0.0873814277)) +
+if (attribute(catid) in [0,100300014,100300058,100300013,100200186,100400141,100300165,100200052,100300102,100300005,100200172,100300008,100200068,100300032,100300027,100300116,100300121,100200234,100300073,100400038,100200192,100300212,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100300200,100200028,100300076,100300006,100200232], if (attribute(catid) in [100300013,100200186,100300165,100200052,100300102,100300005,100200068,100300116,100200234,100300073,100400079,100300169,100400080,100200087,100300200,100300076], if (attribute(catid) in [100300102,100200068,100200234,100400080,100200087,100300200], if (attribute(catid) in [100200068], -0.0151909005, -0.0021225032), if (attribute(catid) in [100300005,100300073,100400079,100300169,100300076], 0.0086835438, 0.0120329553)), if (attribute(catid) in [100300058,100400141,100300008,100300032,100300027,100300121,100400038,100200192,100300212,100200170,100200028,100300006], 0.0171461073, 0.0218015413)), if (attribute(catid) in [100200171,100300011,100200130,100300077,100300166,100200034,100200053,100300019,100400142,100200054,100400037,100300065,100300122,100300127,100200176], if (attribute(catid) in [100200171,100300011,100200034,100200053,100300019,100200054,100300065], 0.0280408356, 0.0355357753), if (attribute(catid) in [100300093,100300004,100300126,100200185], 0.0584272687, 0.0854108429))) +
+if (attribute(catid) in [100300011,100300143,100200034,100300093,100300005,100300008,100200068,100300019,100300073,100400079,100200170,100400080,100200087,100200176,100300200,100300076,100200055,100200185,100300006], if (attribute(catid) in [100300008,100200068,100400080,100200176,100200055], -0.0169257508, 0.0096089202), if (attribute(catid) in [0,100200171,100200130,100300014,100300013,100300077,100300166,100200186,100400141,100300165,100200052,100300102,100200172,100300032,100300027,100300121,100200053,100300004,100400142,100200054,100400037,100200192,100300065,100300209,100300122,100300127,100300169,100300074], if (attribute(catid) in [0,100300014,100300077,100300166,100200186,100400141,100200052,100200172,100300032,100300121,100200053,100400037,100200192,100300065,100300169], if (attribute(catid) in [100300077,100200186,100200052,100300032,100400037,100200192], 0.0188605145, if (attribute(catid) in [0,100300014,100200053], 0.0230703185, 0.0248762385)), 0.0312398602), if (attribute(catid) in [100300116,100400038,100300212,100300066,100200232], 0.0379114379, 0.0540119608))) +
+if (attribute(catid) in [100300011,100300014,100300058,100200186,100400141,100300102,100300005,100200068,100300032,100300121,100300073,100200192,100300209,100400079,100300169,100400080,100200087,100200176,100200028,100200067,100200055,100200232], if (attribute(catid) in [100300011,100300014,100300058,100300005,100200068,100300209,100400080,100200087,100200028,100200067,100200055,100200232], if (attribute(catid) in [100300209,100200087,100200028,100200067,100200055,100200232], -0.0225817796, -8.082327E-4), if (attribute(catid) in [100300102,100300121,100300073,100200192,100400079], 0.0088591799, 0.0138162711)), if (attribute(catid) in [0,100200171,100300013,100200034,100300165,100200172,100300027,100300116,100200234,100300004,100400142,100200054,100300065,100300122,100300127,100200170,100300006], if (attribute(catid) in [0,100200034,100200172,100300027,100300116,100200054,100300065,100300127], if (attribute(catid) in [100200034,100200172,100300027,100300116,100200054,100300065], 0.0185182017, 0.0221653757), 0.0258671547), if (attribute(catid) in [100300166,100200052,100300093,100200053,100400037,100300076], 0.0319314298, if (attribute(catid) in [100200130,100300143,100300008,100400038,100300074,100300066], 0.0399544136, 0.0491124971)))) +
+if (attribute(catid) in [0,100300011,100300013,100300077,100200034,100200186,100400141,100300165,100200052,100300005,100300008,100200068,100300032,100300027,100300116,100300121,100300073,100400038,100300065,100300209,100400079,100200170,100300169,100400080,100200087,100200176,100300200,100200028,100300076,100200067,100300006,100200232,100300214], if (attribute(catid) in [100300011,100300005,100300008,100300032,100400080,100200087,100300076,100300214], if (attribute(catid) in [100300008,100300032,100200087,100300076,100300214], -0.0316835796, -0.0075185917), if (attribute(catid) in [100300077,100200034,100400141,100300165,100200068,100300027,100300121,100300209,100200176,100300200,100200067,100300006], 0.0093589722, if (attribute(catid) in [0,100200052,100300073,100400079,100200232], 0.0185920468, 0.0231228547))), if (attribute(catid) in [100200171,100200130,100300014,100300058,100300166,100300093,100300102,100200172,100200234,100200193,100200192,100300122,100300127,100300074,100300066], if (attribute(catid) in [100200171,100200130,100300014,100300058,100300102,100200172,100200234,100200192,100300127], if (attribute(catid) in [100200130,100200234,100200192], 0.0282114001, 0.0319414987), 0.0377741997), if (attribute(catid) in [100200053,100300004,100400142,100200054,100400037,100200185], 0.0450431326, 0.0654935018))) +
+if (attribute(catid) in [100300011,100300013,100200034,100200186,100300102,100300005,100300008,100200068,100300032,100300027,100300116,100300019,100300126,100300073,100400037,100200192,100300065,100300209,100400079,100200170,100400080,100200087,100300074,100300200,100200067,100300006], if (attribute(catid) in [100300005,100300008,100300032,100300019,100300209,100200067], -0.0352996105, if (attribute(catid) in [100300011,100200034,100200186,100200068,100200192,100200170,100200087,100300074], 0.0029355359, 0.0110257031)), if (attribute(catid) in [0,100200171,100300014,100300165,100200052,100200172,100300121,100200234,100200053,100400142,100200054,100300122,100300127,100300169,100300066], if (attribute(catid) in [100300165,100300121,100200053,100400142,100300127,100300169], 0.0200666023, if (attribute(catid) in [0,100200171,100200052,100200172,100200054], 0.0258497457, 0.0331869782)), if (attribute(catid) in [100200130,100300077,100300166,100400141,100300093,100300004,100300007,100200028], 0.0426763778, if (attribute(catid) in [100300143,100400038,100200176], 0.0551482574, 0.0805987774)))) +
+if (attribute(catid) in [0,100200171,100300014,100300058,100300077,100300166,100200034,100200186,100200052,100300102,100300005,100200172,100200068,100300032,100300116,100300121,100200053,100300073,100400037,100400038,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100200176,100300200,100200028,100300076,100200067,100300146], if (attribute(catid) in [100300014,100200034,100300005,100200068,100300032,100200192,100300212,100300209,100200176,100300200,100200028,100200067,100300146], if (attribute(catid) in [100200068,100300032,100200176,100200028,100200067,100300146], -0.0188052149, -0.0014384095), if (attribute(catid) in [100300058,100300102,100200053,100300073,100300065,100400079,100300169,100400080,100200087,100300076], 0.0138476724, if (attribute(catid) in [0,100200171,100200186,100200052,100400038,100200170], 0.0196068633, 0.0248333768))), if (attribute(catid) in [100300011,100200130,100300143,100400141,100300165,100300093,100300027,100200234,100300019,100300004,100400142,100200193,100300074,100300066,100200232], if (attribute(catid) in [100200130,100400141,100300165,100300027,100300019,100300004,100300074,100300066], 0.0337546327, 0.0412000578), 0.0666143289)) +
+if (attribute(catid) in [100200171,100300011,100300077,100200034,100200186,100300102,100300005,100300008,100200068,100300032,100300027,100300116,100300121,100300019,100300073,100400038,100300209,100400079,100400080,100300074,100300200,100200067,100200055,100300006], if (attribute(catid) in [100200034,100200186,100300005,100300008,100200068,100300019,100300200,100200067,100200055], if (attribute(catid) in [100300008,100200068,100200067,100200055], -0.0193944486, -0.0039850146), if (attribute(catid) in [100300011,100300102,100300027,100300116,100300121,100400038,100300209,100400080,100300074], 0.0038699264, 0.0111071757)), if (attribute(catid) in [0,100200130,100300014,100300058,100300013,100300166,100300143,100200052,100200172,100200054,100200192,100300065,100300127,100200170,100300169,100200087,100200176,100200028,100300076,100300146], if (attribute(catid) in [0,100200130,100300058,100300143,100200172,100200054,100200192,100200170,100300169,100200087,100200176,100200028,100300076], if (attribute(catid) in [100200130,100300143,100200192,100300169,100200176,100200028], 0.0184644801, 0.021360636), 0.0266245188), if (attribute(catid) in [100400141,100300165,100200053,100300004,100300126,100300212,100300122,100300066], if (attribute(catid) in [100200053,100300004,100300126,100300122], 0.0334635662, 0.0386077462), 0.0470519595))) +
+if (attribute(catid) in [0,100300011,100300014,100300058,100300077,100300143,100200034,100200186,100400141,100300093,100300102,100300005,100200172,100300008,100200068,100300027,100300121,100200053,100400142,100300073,100200192,100400079,100200170,100300169,100400080,100200087,100300074,100300200,100200028,100200055,100200232,100300146], if (attribute(catid) in [100300011,100300014,100200034,100300008,100200068,100300121,100200192,100200170,100300074,100300200,100200055,100200232,100300146], if (attribute(catid) in [100200034,100300008,100300121,100300200,100200055,100200232,100300146], -0.0069202095, 0.0039000323), if (attribute(catid) in [100300058,100400141,100300093,100300102,100300005,100200172,100400142,100300073,100400080,100200087], 0.0156946965, if (attribute(catid) in [0], 0.0175514273, 0.0195153127))), if (attribute(catid) in [100200171,100200130,100300166,100300165,100200052,100300032,100300116,100200234,100300004,100300126,100400038,100300065,100300209,100300066,100300007,100200176,100300076], if (attribute(catid) in [100300166,100300165,100300004,100300126,100300065,100300209], 0.0260422255, if (attribute(catid) in [100200171,100200130,100300032,100400038,100300066,100300076], 0.0288416138, 0.0331073272)), if (attribute(catid) in [100200054,100200193,100300122,100300127,100300045,100200067], 0.0443969439, 0.0673805882))) +
+if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300013,100300077,100200034,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100200068,100300032,100300027,100300116,100400142,100300073,100200192,100300209,100400079,100200170,100300169,100400080,100200087,100300200,100200028,100300076,100200055,100200232], if (attribute(catid) in [100200034,100200068,100300209,100200170,100300200,100200028,100200055,100200232], if (attribute(catid) in [100200034,100300209,100300200,100200028,100200055,100200232], -0.0248522225, -0.0018897827), if (attribute(catid) in [100200171,100300013,100300077,100200186,100400141,100300093,100300027,100300116,100400142,100400079,100400080], if (attribute(catid) in [100200171,100300077,100200186,100300116,100400079], 0.009897739, 0.0135323202), if (attribute(catid) in [100300058,100300165,100200052,100200172,100300169,100200087,100300076], 0.0178483129, 0.0206390742))), if (attribute(catid) in [100200130,100300166,100300008,100200234,100300004,100300126,100400037,100400038,100300065,100300122,100300074,100300066,100300006,100300146], if (attribute(catid) in [100200130,100300166,100200234,100300065,100300146], 0.0310277032, 0.0370699377), if (attribute(catid) in [100300121,100200053,100300212,100300127,100200176,100200185], 0.0485097295, 0.0645157682))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300077,100300143,100200034,100200186,100400141,100200052,100300005,100200172,100300008,100200068,100300027,100300116,100300121,100200053,100300019,100300004,100300073,100400038,100200192,100300065,100300209,100300127,100400079,100300169,100400080,100200087,100300074,100300066,100300200,100200055,100300006,100200232,100300214], if (attribute(catid) in [100200171,100300011,100300077,100200034,100200186,100300005,100200068,100200053,100300019,100300004,100300073,100400038,100200192,100300209,100400079,100200087,100300074,100200055,100300006,100300214], if (attribute(catid) in [100300005,100200068,100300019,100300209,100200087,100200055,100300006,100300214], -0.0244019521, if (attribute(catid) in [100200186,100400038,100400079,100300074], 0.003439916, 0.0084132649)), if (attribute(catid) in [100200130,100400141,100200052,100200172,100300008,100300027,100300116,100300065,100300169,100400080,100300200], if (attribute(catid) in [100400141,100200052,100200172,100300116,100300065,100400080,100300200], 0.0146253305, 0.0185737842), 0.0224432378)), if (attribute(catid) in [100300166,100300165,100300032,100400142,100300122,100200170,100300007,100200028], if (attribute(catid) in [100300166,100300032,100400142,100200170], 0.0312540362, 0.0367389808), if (attribute(catid) in [100300014,100200234,100400037,100200193,100200176,100200067,100200185], 0.0515240946, 0.0623565161))) +
+if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300013,100300077,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100200172,100300008,100200068,100300032,100300027,100300121,100200234,100300019,100300004,100200054,100300073,100400037,100400038,100300212,100300209,100400079,100300169,100400080,100200087,100300066,100300200,100200028,100300076,100200067,100300006,100300214], if (attribute(catid) in [100300013,100300093,100300008,100200068,100300019,100300073,100300212,100300209,100400080,100200087,100300200,100200028,100300076,100200067,100300214], if (attribute(catid) in [100300008,100200068,100300019,100300212,100400080,100200067,100300214], -0.0143906523, 0.0034452824), if (attribute(catid) in [100300058,100200186,100400141,100300165,100200052,100300005,100300032,100300027,100200234,100200054,100400038,100400079,100300169,100300006], if (attribute(catid) in [100300058,100300005,100300027,100200234,100200054,100300169,100300006], 0.0099743393, 0.0144610757), if (attribute(catid) in [0,100300011,100300014,100300102,100200172,100300004,100400037], 0.019416211, 0.0220846421))), if (attribute(catid) in [100200130,100300166,100200034,100200053,100200192,100300065,100200170,100200176], 0.0303840891, if (attribute(catid) in [100300116,100400142,100300122,100300127,100300074,100300045], 0.0417668157, 0.0552431545))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300077,100200186,100400141,100300165,100300102,100300005,100200172,100200068,100300032,100300027,100300116,100300121,100200234,100200053,100300126,100200054,100300073,100400038,100200192,100300209,100400079,100300169,100400080,100200087,100200176,100300200,100200028,100300076,100200055,100300006,100200232], if (attribute(catid) in [100300011,100300058,100200186,100300165,100300005,100200068,100300032,100300116,100300121,100200234,100300126,100200054,100400038,100200192,100300209,100400079,100200176,100200028,100200055,100200232], if (attribute(catid) in [100300058,100300005,100200068,100300032,100300209,100200028,100200055,100200232], if (attribute(catid) in [100300058,100300005,100300032,100300209,100200055], -0.0199572721, -1.34782E-5), 0.00950394), if (attribute(catid) in [100200171,100300077,100400141,100300073,100400080,100200087,100300200], 0.0170204672, if (attribute(catid) in [0,100200130,100200053,100300006], 0.0199906818, 0.0230038494))), if (attribute(catid) in [100300014,100300013,100300166,100200034,100200052,100300093,100300008,100400142,100200193,100300065,100300122,100300127,100200170,100300074,100300066,100300045], if (attribute(catid) in [100300013,100300166,100200034,100300093,100300008,100400142,100300127], 0.0272410205, 0.035385042), 0.0546059415)) +
+if (attribute(catid) in [100300014,100300013,100200186,100300165,100300093,100300102,100300005,100300008,100200068,100300027,100300116,100200234,100300073,100400038,100200192,100300212,100400079,100300169,100400080,100300074,100300200,100300076,100200067,100300006,100200232,100300214], if (attribute(catid) in [100300008,100200068,100300027,100300212,100400080,100300074,100300200,100200067,100200232,100300214], if (attribute(catid) in [100300008,100200068,100300212,100300200,100200067,100200232,100300214], -0.0257347618, -0.0087401374), if (attribute(catid) in [100300014,100300165,100300102,100300073,100200192,100400079,100300076], 0.0079479453, 0.0122270306)), if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300077,100300166,100200052,100300121,100300004,100300126,100400142,100200054,100400037,100300065,100300122,100300127,100200170,100200176], if (attribute(catid) in [0,100200130,100200052,100300121,100300004,100300126,100200054,100300065,100300122,100200176], if (attribute(catid) in [100200130,100200052,100300121,100300004,100300065,100300122], 0.019048709, 0.0215394009), 0.0276338957), if (attribute(catid) in [100200034,100400141,100200172,100300032,100200053,100300209,100300066,100200185], 0.0397536732, 0.0689753704))) +
+if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300077,100200034,100200186,100400141,100200052,100300093,100300005,100200172,100200068,100300027,100300121,100200234,100300019,100300073,100200193,100400038,100200192,100300065,100300212,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100200176,100300200,100200067,100200055,100300006,100300214], if (attribute(catid) in [100200171,100300011,100300077,100200186,100200052,100300093,100300005,100200068,100300027,100300121,100200234,100200192,100300065,100300212,100300127,100400079,100200170,100400080,100200087,100300200,100200067,100200055,100300214], if (attribute(catid) in [100200186,100300005,100200068,100300212,100200170,100200087,100300200,100200055,100300214], -0.0052715451, if (attribute(catid) in [100200171,100300011,100300093,100300027,100200234,100300127,100400079,100200067], 0.0075908988, 0.0114788963)), if (attribute(catid) in [100300019,100300073,100200193,100300169,100300074,100300066,100300006], 0.0146049077, 0.0198627318)), if (attribute(catid) in [100300165,100300008,100300032,100300116,100200053,100400142,100400037,100300122], 0.0286281196, if (attribute(catid) in [100200130,100300013,100300166,100300004,100300126,100200054,100300007,100200028], 0.0377626212, 0.0646214069))) +
+if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300077,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100200068,100300032,100300027,100300121,100200234,100300019,100400142,100300073,100200193,100200192,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100300200,100200028,100300006,100200232], if (attribute(catid) in [100300014,100300058,100200186,100400141,100300093,100300005,100300032,100200234,100300019,100300073,100200192,100300169,100200087,100300074,100300200,100200232], if (attribute(catid) in [100300014,100300058,100200186,100300032,100300019,100300074,100300200,100200232], -0.007621457, 0.0077673481), if (attribute(catid) in [0,100200171,100300011,100300077,100300165,100200068,100300121,100200193,100300209,100300122,100400079,100400080,100300066,100200028,100300006], if (attribute(catid) in [100200171,100300011,100300165,100400079,100300006], 0.0143096613, 0.0162758268), 0.02375285)), if (attribute(catid) in [100200130,100300166,100200034,100300116,100200053,100300004,100200054,100400038,100300065,100300212,100300007,100200176,100300045,100300076], if (attribute(catid) in [100200130,100300166,100200034,100300116,100200053], 0.0297411208, 0.0387614885), if (attribute(catid) in [100300143,100300126,100400037,100200185], 0.0600165302, 0.096147213))) +
+if (attribute(catid) in [0,100200171,100200130,100300058,100300077,100200034,100200186,100400141,100200052,100300005,100200172,100300008,100200068,100300032,100300027,100300116,100300121,100300004,100300073,100400038,100200192,100300212,100300209,100400079,100200170,100400080,100200087,100300074,100300066,100300007,100300200,100200028,100200067,100200055,100200232,100300214,100300146], if (attribute(catid) in [100200034,100300005,100300008,100200068,100300116,100300212,100300209,100400079,100400080,100300200,100200067,100200055,100200232,100300214,100300146], if (attribute(catid) in [100200034,100300008,100300212,100300209,100200055,100200232,100300214,100300146], -0.0143064261, 0.0061747257), if (attribute(catid) in [100200171,100300058,100200186,100400141,100200172,100300032,100300027,100300121,100300004,100400038,100200170,100200087,100300074,100200028], 0.0140796593, 0.0185037483)), if (attribute(catid) in [100300011,100300014,100300013,100300166,100300143,100300165,100300093,100300102,100400142,100400037,100300065,100300122,100300127,100300169,100300006], if (attribute(catid) in [100300011,100300014,100300013,100300166,100300165,100300102,100400142,100400037,100300169], 0.0279089674, 0.0344726516), 0.0515666225)) +
+if (attribute(catid) in [0,100200171,100300011,100300014,100300077,100300166,100300143,100200034,100200186,100400141,100200052,100300093,100300005,100200172,100300008,100200068,100300032,100300027,100300121,100200053,100400142,100200054,100300073,100400037,100200192,100300209,100300122,100400079,100200170,100300169,100400080,100200087,100300066,100200176,100300200,100200028,100300076,100200185], if (attribute(catid) in [100300011,100300014,100200068,100300032,100200053,100200192,100300209,100300122,100200170,100400080,100200176,100200028,100300076,100200185], if (attribute(catid) in [100300014,100200068,100300032,100300209,100400080,100200176,100200028,100300076,100200185], -0.0100026799, 0.0069768979), if (attribute(catid) in [100300077,100300143,100400141,100200052,100300005,100300008,100300121,100400079,100300169,100200087], 0.0149447853, if (attribute(catid) in [0,100300166,100200034,100200186,100200172,100400037,100300066,100300200], 0.0207339117, 0.0250275322))), if (attribute(catid) in [100200130,100300058,100300165,100300102,100300116,100200234,100300004,100300126,100200193,100400038,100300065,100300127,100300074,100300045,100300006], if (attribute(catid) in [100300058,100300102,100300116,100200234,100300126,100200193,100400038,100300127,100300006], 0.0305216411, 0.0392374586), 0.07351205)) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300166,100200186,100400141,100300165,100200052,100300102,100300005,100200172,100300008,100200068,100300027,100300121,100200234,100200053,100300019,100300004,100400142,100300073,100400037,100200193,100400038,100200192,100300065,100300209,100300122,100400079,100200170,100300169,100400080,100300066,100200176,100300076,100200067,100200055,100300006,100300214,100300146], if (attribute(catid) in [100200171,100300011,100300014,100400141,100300102,100300005,100300008,100300121,100300019,100200193,100200192,100300209,100200170,100400080,100200067,100200055,100300214], if (attribute(catid) in [100300102,100300008,100300019,100200193,100300209,100200067,100200055,100300214], -0.0452597976, if (attribute(catid) in [100400141,100300005,100200192,100400080], 0.002579873, 0.0085058714)), if (attribute(catid) in [100200130,100200186,100300165,100200172,100200068,100300027,100200234,100400142,100300066,100200176,100300076,100300146], if (attribute(catid) in [100200130,100200186,100200068,100300066,100200176,100300076,100300146], 0.0134972332, 0.0161598104), if (attribute(catid) in [0,100200053,100300073], 0.0201733337, 0.0242718101))), if (attribute(catid) in [100300013,100300077,100300143,100200034,100300093,100300116,100300127,100200087,100300074,100300007,100300200,100300045], 0.0376364024, 0.0722294524)) +
+if (attribute(catid) in [100300014,100400141,100300102,100300008,100200068,100300032,100200234,100300019,100400038,100300212,100400080,100200176,100300200,100200028,100200055,100200185,100300006,100300214], if (attribute(catid) in [100300014,100300102,100300008,100200068,100300032,100300019,100300212,100300200,100200055,100300214], if (attribute(catid) in [100300102,100300008,100300032,100300019,100300212,100300200,100200055,100300214], -0.0330162432, -0.0099054066), 0.0037184723), if (attribute(catid) in [0,100200171,100200130,100300058,100300166,100300143,100200034,100200186,100300165,100200052,100300093,100300005,100200172,100300027,100200053,100200192,100300065,100300127,100400079,100200170,100300169,100300074,100300076], if (attribute(catid) in [100200171,100300166,100200034,100200186,100300165,100300093,100300005,100300027,100200053,100400079,100300169,100300074], if (attribute(catid) in [100200171,100300166,100200186,100300005,100300027,100200053,100300169], 0.0128773968, 0.0170605503), if (attribute(catid) in [0,100300143,100200192,100300065,100200170,100300076], 0.0199592353, 0.0237865531)), if (attribute(catid) in [100300077,100300121,100400142,100300073,100200193,100200087,100300066,100300007], 0.0288156047, 0.0451598089))) +
+if (attribute(catid) in [100200171,100300011,100300013,100200034,100200186,100200052,100300102,100300008,100200068,100300027,100300121,100200053,100300019,100300004,100300073,100200193,100400038,100200192,100300212,100300127,100400079,100200170,100400080,100200087,100300074,100300007,100300214,100300146], if (attribute(catid) in [100300013,100200034,100200068,100300121,100300019,100200170,100200087,100300214,100300146], if (attribute(catid) in [100300013,100200034,100300121,100300019,100300214,100300146], -0.0200374966, -0.0056497245), if (attribute(catid) in [100300011,100200186,100300102,100300008,100300004,100200192,100300212,100400079,100400080,100300074], 0.0036157343, 0.0117177746)), if (attribute(catid) in [0,100200130,100300014,100300058,100300166,100300143,100400141,100300165,100300093,100300005,100200172,100300032,100300116,100300126,100400142,100300065,100300122,100300169,100300066,100300200,100200028,100300006], if (attribute(catid) in [0,100200130,100300166,100400141,100300165,100200172,100300032,100300066,100300006], if (attribute(catid) in [100200130,100400141,100300165,100200172,100300032,100300006], 0.0173296173, 0.0209361475), 0.0268947656), if (attribute(catid) in [100300077,100400037,100300209,100200176,100300045,100300076,100200185], 0.0431779718, 0.0596202146))) +
+if (attribute(catid) in [100300011,100300014,100200034,100300102,100300005,100200068,100300032,100300027,100300116,100200234,100300073,100400038,100200192,100400079,100200170,100400080,100300200,100200028,100200067,100200055,100300006,100200232,100300146], if (attribute(catid) in [100300011,100300102,100200068,100300032,100300027,100400038,100300200,100200067,100200055], -0.0105115826, if (attribute(catid) in [100300014,100200234,100200028,100300006,100200232], 7.402621E-4, 0.0078629039)), if (attribute(catid) in [0,100200171,100200130,100300077,100300143,100200186,100400141,100300165,100200052,100200172,100300121,100200053,100300019,100400142,100300122,100300127,100300169,100200087,100200176], if (attribute(catid) in [100200171,100200130,100200186,100200052,100200172,100200053,100300122,100200176], 0.0148540934, if (attribute(catid) in [0,100300077,100300143,100400141,100300019,100400142,100300127], 0.0193734454, 0.021732037)), if (attribute(catid) in [100300166,100300126,100200054,100200193,100300065,100300212,100300066], 0.0305394508, 0.0503395698))) +
+if (attribute(catid) in [0,100200171,100200130,100300014,100300013,100300077,100200034,100200186,100400141,100300165,100200052,100300102,100300005,100200172,100300008,100200068,100300032,100300027,100300116,100300121,100200053,100300019,100300126,100200054,100300073,100400038,100200192,100300065,100300212,100300209,100300122,100400079,100200170,100300169,100400080,100200087,100300074,100200176,100300200,100200028,100300076,100200055,100300006,100200232], if (attribute(catid) in [100300014,100300013,100300077,100400141,100300165,100300005,100300008,100300116,100200053,100300126,100400038,100300212,100300209,100400079,100200170,100200087,100200176,100300200,100200028,100300076,100200055,100300006,100200232], if (attribute(catid) in [100300005,100300126,100300209,100200087,100300200,100200055], -0.0155099848, if (attribute(catid) in [100300014,100300013,100400141,100400038,100300212,100400079,100200170,100200028,100300076,100200232], 0.0052367005, 0.0111703118)), if (attribute(catid) in [0,100200034,100300102,100200068,100300027,100300121,100300073,100200192,100300169,100400080,100300074], if (attribute(catid) in [100200034,100200068,100300027,100300121,100300073,100200192,100300169,100400080,100300074], 0.0150887568, 0.0184571681), 0.0221098064)), if (attribute(catid) in [100300011,100300166,100300093,100300004,100400142,100300127,100300066,100300045], 0.0294845237, if (attribute(catid) in [100300058,100300143,100200193,100300007], 0.0412156817, 0.0581097149))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300077,100300166,100200034,100200186,100400141,100200052,100300093,100300102,100300005,100300008,100200068,100300032,100300027,100300116,100300121,100200234,100200053,100300019,100300126,100200054,100300073,100400037,100200193,100400038,100200192,100300065,100300212,100300209,100300127,100400079,100200170,100300169,100400080,100200087,100300066,100300007,100200176,100300200,100300076,100200067,100200055,100200185,100300214], if (attribute(catid) in [100300011,100300014,100300008,100200068,100300032,100300027,100300116,100200234,100300019,100400037,100300209,100200176,100200055,100200185,100300214], if (attribute(catid) in [100300008,100200068,100300032,100200055,100200185,100300214], -0.0168814696, -7.128433E-4), if (attribute(catid) in [100200171,100300058,100300077,100200034,100400141,100300102,100300005,100300126,100200054,100200193,100400038,100200192,100300212,100200170,100300169,100400080,100200087,100300076], if (attribute(catid) in [100300058,100200034,100200193,100400038,100200170,100300169,100400080,100300076], 0.0104471779, 0.0151303026), if (attribute(catid) in [0,100200186,100300093,100300121,100200053,100300073,100300200], 0.0192366025, 0.0235706074))), if (attribute(catid) in [100300143,100300165,100200172,100300004,100400142,100300122,100300074,100200232], if (attribute(catid) in [100200172,100400142,100300122,100200232], 0.0313391652, 0.0410697301), 0.0691824633)) +
+if (attribute(catid) in [100300013,100200186,100400141,100300005,100200068,100300032,100300116,100200234,100300004,100200192,100300212,100300209,100200170,100400080,100300074,100300200,100200028,100200055,100300146], if (attribute(catid) in [100300032,100300212,100300209,100200028,100200055,100300146], -0.0313637219, if (attribute(catid) in [100300013,100200186,100400141,100200068,100200234,100300004,100400080,100300200], 0.0050115322, 0.0108163539)), if (attribute(catid) in [0,100200171,100200130,100300077,100200034,100300165,100300102,100200172,100300008,100300027,100300121,100200053,100300126,100400142,100300073,100400038,100300065,100300127,100400079,100300169,100200087,100300066,100300007,100300076,100300006], if (attribute(catid) in [0,100300077,100200034,100300165,100200172,100300008,100300121,100200053,100300126,100400142,100300073,100400038,100300065,100400079,100200087], if (attribute(catid) in [100300077,100300165,100200172,100300008,100300121,100300073,100400038,100200087], 0.0169532751, 0.019777196), 0.0229019262), if (attribute(catid) in [100300014,100300058,100300166,100300143,100200052,100300093,100200054,100300122], 0.0275740075, 0.0402576409))) +
+if (attribute(catid) in [0,100200171,100300011,100300058,100300013,100300077,100200186,100300165,100200052,100300102,100300005,100200068,100300032,100300027,100300116,100200234,100300019,100300126,100400142,100300073,100200192,100400079,100300169,100400080,100300074,100300200,100300045,100200028,100300076,100200185,100200232,100300214], if (attribute(catid) in [100300011,100300013,100200052,100300102,100200068,100300032,100300116,100300019,100300126,100300076,100200185,100200232,100300214], if (attribute(catid) in [100300013,100200068,100300032,100300019,100300076,100200185,100200232,100300214], -0.0132508399, 2.145632E-4), if (attribute(catid) in [0,100300058,100300077,100200186,100300165,100300005,100300027,100300073,100200192,100300169,100400080], 0.0144506818, 0.0181232118)), if (attribute(catid) in [100200130,100300014,100300166,100300143,100200034,100400141,100300093,100200172,100300008,100300121,100200053,100300004,100300065,100300212,100300209,100300122,100300127,100200170,100200087,100300006], if (attribute(catid) in [100200130,100200034,100400141,100300093,100300121,100300004,100300065,100300212,100300127,100200170,100200087,100300006], 0.023751453, 0.0281193568), 0.0394520537)) +
+if (attribute(catid) in [100300011,100300013,100300077,100200034,100200186,100300102,100300005,100200068,100300116,100300121,100400079,100200170,100400080,100300074,100300200,100300076,100200055,100300214,100300146], if (attribute(catid) in [100300011,100300013,100200034,100200186,100300005,100200170,100300076,100200055,100300214], -0.011327312, 0.0081802635), if (attribute(catid) in [0,100200171,100200130,100300058,100300166,100400141,100300165,100200172,100300008,100300027,100200234,100200053,100300126,100400142,100200054,100300073,100400037,100400038,100200192,100300065,100300122,100300169,100300066,100300045,100200028,100200067,100300006], if (attribute(catid) in [100200130,100300058,100300165,100200172,100300027,100200234,100300073], 0.0154377299, if (attribute(catid) in [0,100400141,100300008,100400037,100400038,100200192,100300169,100300045,100200067], 0.0188466465, 0.0219373268)), if (attribute(catid) in [100300014,100200052,100300093,100300032,100300004,100300127,100200087,100200176,100200185], 0.0332492867, 0.0538118306))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300013,100300166,100300143,100200186,100300165,100200052,100300093,100300008,100200068,100300027,100300116,100300121,100200053,100300019,100300004,100300126,100400142,100200054,100300073,100400038,100200192,100300212,100300122,100200170,100300169,100400080,100200087,100300074,100300007,100300200,100200067,100200055,100300214,100300146], if (attribute(catid) in [100200171,100300014,100300013,100300008,100200068,100300116,100300121,100200054,100300073,100200192,100300212,100400080,100200087,100300200,100200055,100300214,100300146], if (attribute(catid) in [100300014,100300008,100200068,100200054,100300212,100300200,100200055,100300214], -0.0068335973, 0.0078647534), if (attribute(catid) in [100300165,100200052,100200053,100300019,100300004,100300126,100300122,100200170,100300169,100300074], 0.0135025323, if (attribute(catid) in [0,100300143,100300027,100400142,100400038], 0.0176344289, 0.019623595))), if (attribute(catid) in [100300058,100200034,100400141,100200193,100300065,100300209,100300127,100400079,100300066,100300045,100200028,100300076,100300006], 0.0257873841, if (attribute(catid) in [100200172,100300032,100200176], 0.03381447, 0.0425972117))) +
+if (attribute(catid) in [0,100300011,100300014,100300013,100300077,100300166,100200034,100200186,100400141,100300165,100200052,100300102,100200172,100300008,100200068,100300027,100300116,100200234,100200053,100300126,100200054,100300073,100400037,100200192,100300209,100300127,100400079,100300169,100400080,100200087,100300074,100300066,100300007,100200176,100200028,100300076], if (attribute(catid) in [100300011,100300014,100200034,100200186,100300102,100300008,100200068,100300027,100300116,100300073,100400037,100200192,100300209,100400079,100200087,100300074,100200176,100200028,100300076], if (attribute(catid) in [100300102,100300008,100400037,100200087,100200176,100300076], -0.0165719048, 0.0081977488), if (attribute(catid) in [100300013,100300165,100200052,100200172,100200053,100200054,100300169], 0.0136130091, 0.0180110506)), if (attribute(catid) in [100200171,100200130,100300058,100300093,100300032,100300121,100400142,100200170,100300200,100300006,100200232], 0.0236852926, if (attribute(catid) in [100300143,100300005,100200193,100300065,100300122,100200067], 0.0327973275, 0.0538361793))) +
+if (attribute(catid) in [100200171,100300011,100300014,100300013,100300143,100200034,100200186,100300005,100300008,100200068,100300116,100200053,100300019,100400037,100200192,100200170,100300074,100300007,100300200,100200028,100200055,100300214,100300146], if (attribute(catid) in [100300011,100300143,100200186,100300005,100300008,100200068,100300019,100200055,100300214,100300146], if (attribute(catid) in [100300011,100300143,100300005,100300019,100200055,100300214,100300146], -0.0264171514, -0.0055670691), if (attribute(catid) in [100300014,100300013,100300116,100400037,100200192,100200170,100300007], 0.0026101595, 0.0084034666)), if (attribute(catid) in [0,100200130,100300058,100300077,100300166,100400141,100200052,100300093,100300027,100300121,100200234,100300004,100300126,100200054,100300073,100300122,100300127,100400079,100300169,100400080,100200087,100200176,100300045,100300076,100200067,100300006,100200232], if (attribute(catid) in [0,100300058,100300077,100200052,100300093,100300027,100200234,100200054,100300073,100300169,100300045,100200067,100200232], if (attribute(catid) in [100300077,100300027,100200234,100200054,100300073,100300169], 0.013631975, 0.0160752676), if (attribute(catid) in [100200130,100400141,100300121,100300127,100400079,100200176,100300076], 0.0182663338, 0.0220047542)), if (attribute(catid) in [100300102,100200172,100400142,100300065,100300066,100200185], 0.026533065, if (attribute(catid) in [100300165,100400038], 0.0364634573, 0.0690252268)))) +
+if (attribute(catid) in [0,100200171,100300077,100300166,100200034,100400141,100300165,100200052,100300093,100200172,100200068,100300032,100300027,100200234,100200053,100300004,100300126,100300073,100200193,100400038,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100300007,100200176,100300076,100200067,100200055,100300006,100300214], if (attribute(catid) in [100400141,100300165,100300032,100300004,100300126,100200193,100300007,100200176,100200067,100200055,100300006,100300214], if (attribute(catid) in [100300032,100300126,100300007,100200176,100200067,100200055,100300006,100300214], -0.0053479534, 0.0054148332), if (attribute(catid) in [0,100300077,100200068,100300027,100200234,100200053,100200192,100300065,100300212,100300209,100400079,100200170,100300169,100400080,100300076], if (attribute(catid) in [100200068,100200053,100300065,100300209,100200170,100300169,100300076], 0.0124266534, 0.0155941575), if (attribute(catid) in [100300166,100200034,100300093,100200172,100300073], 0.0190612693, 0.0217608552))), if (attribute(catid) in [100200130,100300014,100300058,100300013,100200186,100300005,100300116,100300121,100300019,100400142,100300045,100200028,100200185], if (attribute(catid) in [100300058,100300013,100200186,100300005,100300116,100300121,100300019,100300045], 0.0276398014, 0.0339388499), 0.0448102783)) +
+if (attribute(catid) in [100300011,100300014,100300058,100300143,100200052,100300005,100300008,100200068,100300032,100200234,100200054,100400037,100300209,100300127,100300169,100300074,100300007,100300076,100200067,100300006,100200232,100300214], if (attribute(catid) in [100300014,100300058,100300005,100300008,100200068,100300032,100200234,100300209,100200067,100200232,100300214], if (attribute(catid) in [100300014,100300005,100300032,100300209,100200067,100200232,100300214], -0.0271870843, -0.0066979774), if (attribute(catid) in [100300011,100300143,100400037,100300127,100300007,100300076,100300006], 3.261718E-4, 0.0072958932)), if (attribute(catid) in [0,100200171,100200130,100300077,100300166,100200186,100200172,100300121,100200053,100400142,100300073,100200192,100400079,100400080,100300066], if (attribute(catid) in [0,100200130,100300077,100200186,100200172,100300121,100300073,100400079,100400080], if (attribute(catid) in [100200130,100200172,100300121,100300073,100400079,100400080], 0.0130456694, 0.0171286061), 0.0214322678), if (attribute(catid) in [100400141,100300165,100300093,100300102,100300027,100300116,100300126,100400038,100300065,100300122,100200087,100300045,100200028], if (attribute(catid) in [100400141,100300165,100300102,100300027,100200087,100300045], 0.0264983702, 0.0310587203), 0.0435590971))) +
+if (attribute(catid) in [100300013,100200034,100300093,100300102,100300008,100200068,100300032,100200234,100300019,100300004,100300212,100300209,100400079,100200170,100300169,100300200,100200028,100300076,100200067,100200055,100200185,100300006], if (attribute(catid) in [100300013,100300102,100300032,100300019,100300212,100300209,100300200,100200067,100200055], -0.0292043593, if (attribute(catid) in [100300093,100300008,100200234,100300004,100300076,100200185,100300006], -9.351701E-4, 0.0074356232)), if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300143,100200186,100300027,100300116,100200053,100400142,100300073,100200193,100400038,100200192,100300122,100400080,100200087,100300074,100300066,100200176,100300045], if (attribute(catid) in [0,100300058,100200186,100300027,100300116,100200087,100200176], 0.0139006166, 0.0181039982), if (attribute(catid) in [100300014,100300077,100400141,100300165,100200052,100300005,100200172,100300121,100300126,100300065,100300007], 0.0233872084, 0.0318278949))) +
+if (attribute(catid) in [100300013,100300008,100200068,100300121,100300019,100300004,100200054,100400038,100300212,100200028,100200185,100200232], if (attribute(catid) in [100300013,100300008,100300019,100300212,100200185], -0.0407457887, -0.0116755527), if (attribute(catid) in [0,100200171,100300058,100300166,100200034,100200186,100200052,100300005,100300027,100300116,100200053,100300126,100400142,100300073,100400037,100200192,100300065,100300209,100400079,100300169,100400080,100200087,100300074,100300066,100200176,100300006], if (attribute(catid) in [100200171,100200034,100200052,100300005,100200053,100300073,100400037,100300209,100400079,100400080,100300074], if (attribute(catid) in [100200171,100200034,100300005,100200053,100300209,100400079], 0.0049667166, 0.0103313635), if (attribute(catid) in [100200186,100400142,100300065,100300169,100200087,100300066,100200176], 0.0145292773, 0.0169648891)), if (attribute(catid) in [100200130,100300014,100300077,100300143,100400141,100300165,100200172,100200193,100300122,100300127,100300200], if (attribute(catid) in [100200130,100300143,100400141,100300122,100300127], 0.0211036464, 0.0257964434), 0.0412799006))) +
+if (attribute(catid) in [0,100200171,100200130,100300166,100300143,100200034,100200186,100400141,100300093,100300005,100200172,100300008,100300027,100300121,100300019,100300004,100300126,100200054,100300073,100400037,100200192,100300065,100300212,100400079,100200170,100300169,100400080,100200087,100300074,100300007,100300200,100200028,100200185,100300006], if (attribute(catid) in [100300143,100300093,100300005,100300008,100300019,100300212,100200028,100200185,100300006], -0.0120071553, if (attribute(catid) in [100200171,100300166,100200186,100400141,100200172,100300027,100300121,100300004,100200054,100300073,100400079,100200170,100200087,100300074], if (attribute(catid) in [100200186,100300004,100200054,100300073,100200087], 0.0078585148, 0.0109817855), if (attribute(catid) in [0,100200130,100300065,100400080], 0.0142642384, 0.0175222293))), if (attribute(catid) in [100300011,100300014,100300013,100200052,100200068,100200234,100200053,100400038,100300122,100300127,100300066,100200176,100300045,100300076], if (attribute(catid) in [100300014,100300013,100200068,100200234,100200053,100300127,100200176,100300076], 0.0221804998, 0.026563767), if (attribute(catid) in [100300077,100300165,100400142,100200232], 0.0309690505, 0.0459150714))) +
+if (attribute(catid) in [0,100200171,100200130,100300014,100300166,100300143,100200186,100300165,100200052,100300102,100300005,100200172,100300008,100200068,100300032,100300027,100300116,100300121,100200234,100200053,100300019,100300004,100300126,100400142,100400037,100200193,100400038,100200192,100300065,100300212,100300127,100400079,100200170,100300169,100400080,100300074,100300066,100300007,100200176,100300200,100300076,100200232,100300146], if (attribute(catid) in [100300014,100300102,100200068,100300032,100300121,100300019,100300004,100400142,100200193,100400038,100300212,100300127,100400079,100200170,100300074,100200176,100300200,100300076,100200232,100300146], if (attribute(catid) in [100300102,100200068,100300032,100300019,100300212,100200176,100200232], -0.0148052713, 0.0048766529), if (attribute(catid) in [100200171,100300165,100200052,100300005,100200172,100300116,100200234,100200192,100300169], if (attribute(catid) in [100200052,100300116,100200234,100200192,100300169], 0.0099622919, 0.0138380378), if (attribute(catid) in [0,100300027,100300126,100300066], 0.0153805374, 0.0183919749))), if (attribute(catid) in [100300011,100300077,100400141,100300093,100300073,100200087,100200028,100200185], 0.0247957566, 0.0395124104)) +
+if (attribute(catid) in [100300011,100200130,100300014,100300058,100200034,100200186,100300008,100200068,100300073,100400038,100300065,100300127,100400079,100400080,100200087,100200176,100200028,100200067,100200055], if (attribute(catid) in [100300014,100300058,100200034,100300008,100200068,100200087,100200028,100200067,100200055], -0.0102104476, if (attribute(catid) in [100300011,100200186,100400038,100300127], 6.233907E-4, 0.0069244113)), if (attribute(catid) in [0,100200171,100300077,100300166,100300143,100400141,100300165,100300093,100300005,100200172,100300032,100300027,100300116,100300121,100200053,100400142,100200193,100200192,100200170,100300169,100300074,100300066,100300007,100300200,100300045,100300076], if (attribute(catid) in [0,100300077,100300166,100300143,100300165,100300005,100200172,100300032,100300027,100200053,100200192,100300045,100300076], if (attribute(catid) in [100300077,100300143,100300165,100200053,100200192,100300045], 0.0146529601, 0.0167435205), if (attribute(catid) in [100200171,100400141,100300116,100200170,100300074,100300007,100300200], 0.0216207477, 0.025299515)), if (attribute(catid) in [100200052,100300102,100200234,100300004,100300126,100300122,100200185,100300006,100200232], 0.0342788593, 0.0559275992))) +
+if (attribute(catid) in [100200171,100300011,100200130,100300013,100400141,100200052,100300102,100300008,100200068,100300027,100300116,100200234,100200054,100300073,100400037,100300065,100300209,100300122,100300127,100400079,100200170,100300169,100300074,100200176,100200185,100300214], if (attribute(catid) in [100300008,100300027,100400037,100300074,100200176,100200185,100300214], -0.0102961911, if (attribute(catid) in [100200171,100300011,100200052,100300102,100200068,100300116,100200234,100300065,100300209,100400079,100300169], 0.0058614005, 0.0117994941)), if (attribute(catid) in [0,100300014,100300166,100200186,100300165,100300005,100300032,100300004,100300126,100400142,100200192,100400080,100200087,100300200,100200028,100200067,100300006], if (attribute(catid) in [0,100300166,100200186,100300005,100300032,100300126,100400142,100200067], 0.0176102969, 0.0199567396), if (attribute(catid) in [100300058,100300077,100300143,100200034,100300093,100200172,100300121,100300019,100400038,100300212,100300066,100300076,100200232], if (attribute(catid) in [100300077,100300093,100200172,100300019,100300066,100300076,100200232], 0.028069884, 0.0349016561), 0.0778635274))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300013,100300077,100300166,100300143,100200034,100400141,100200052,100300102,100300005,100200172,100300008,100200068,100300116,100300121,100200234,100200053,100300019,100300004,100300126,100300073,100200193,100400038,100200192,100300212,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100300074,100300066,100200176,100300200,100200028,100300076,100200055,100200185,100200232,100300146], if (attribute(catid) in [100200171,100300011,100400141,100300102,100300005,100300008,100200068,100300116,100200053,100300019,100300004,100400038,100300212,100300209,100400079,100200176,100300200,100200028,100300076,100200055,100200185,100300146], if (attribute(catid) in [100300011,100300019,100300209,100300200,100200028,100300076,100200055,100200185], -0.0255215536, 0.0049014532), if (attribute(catid) in [0,100300013,100300166,100300143,100200034,100300121,100300126,100200192,100300122,100300127,100300169,100300074,100300066], if (attribute(catid) in [100300013,100300143,100200192,100300127,100300074,100300066], 0.0096441943, 0.0132971959), 0.0172127947)), if (attribute(catid) in [100300014,100200186,100300093,100300027,100400142,100400037,100200087,100300006], 0.0253813497, if (attribute(catid) in [100300165,100300007,100300045], 0.0318318618, 0.0425817751))) +
+if (attribute(catid) in [100300058,100300013,100300077,100200186,100400141,100300165,100200052,100300005,100200068,100300116,100200234,100300019,100300126,100200054,100400038,100300212,100300169,100300074,100300066,100300007,100300200,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300013,100400141,100300005,100300116,100200234,100300019,100300212,100300200,100200232,100300214,100300146], if (attribute(catid) in [100300013,100300005,100300019,100300200,100200232,100300214,100300146], -0.0378281153, -0.0106433322), if (attribute(catid) in [100300165,100200068,100300126,100400038,100300074,100300007], 0.0013709167, 0.0079886834)), if (attribute(catid) in [0,100200171,100200130,100300014,100200034,100300102,100200172,100300008,100300027,100300121,100200053,100300004,100400142,100300073,100200192,100300065,100300122,100300127,100400079,100200170,100400080,100200087,100200028,100300076], if (attribute(catid) in [100200130,100300014,100200034,100300008,100300027,100300121,100200053,100400142,100300073,100200192,100300122,100300127,100200170,100400080,100200087], 0.0145425948, if (attribute(catid) in [0,100300004], 0.0195574674, 0.0232919623)), if (attribute(catid) in [100300011,100300166,100200176,100300045,100200185], 0.0300271939, 0.049494999))) +
+if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300077,100300166,100200034,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100200172,100200068,100300032,100300116,100300121,100200053,100300019,100300004,100400142,100300073,100400037,100200193,100400038,100200192,100300212,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100300007,100300200,100300045,100200028,100300076,100200067,100200055,100300214,100300146], if (attribute(catid) in [100300014,100300058,100300077,100400141,100300165,100300102,100200068,100300032,100300116,100300019,100300004,100400142,100400037,100300212,100300127,100200170,100400080,100300007,100300045,100200028,100200055,100300214,100300146], if (attribute(catid) in [100300102,100300032,100300019,100300004,100400037,100300212,100200055,100300214,100300146], -0.023734737, if (attribute(catid) in [100300014,100300058,100300165,100200068,100300116,100300127,100400080,100300007,100200028], 0.001360616, 0.0065228229)), if (attribute(catid) in [0,100200171,100200130,100300166,100200034,100300005,100200172,100300121,100200193,100200192,100400079,100300169,100300200], if (attribute(catid) in [0,100200034,100200172,100200193,100200192,100300169], 0.0131275051, 0.0156511717), 0.0216393464)), if (attribute(catid) in [100300011,100300143,100300008,100300027,100300126,100200054,100300065,100300209,100200176,100200232], 0.0329759178, 0.0612562214)) +
+if (attribute(catid) in [100300014,100300058,100300077,100300166,100400141,100300165,100200052,100300093,100200172,100300027,100300116,100300121,100200053,100300126,100200054,100300073,100200193,100200192,100300212,100300209,100300127,100400079,100200170,100300169,100400080,100300074,100300066,100300007,100300200,100300076,100200185,100300214,100300146], if (attribute(catid) in [100300014,100300166,100200052,100300093,100300116,100300121,100200193,100300212,100300209,100400080,100300007,100200185,100300214], if (attribute(catid) in [100300014,100300121,100300212,100300209,100200185,100300214], -0.0165119187, 1.429856E-4), if (attribute(catid) in [100300058,100300077,100300165,100200172,100200053,100300126,100200054,100200170,100300074,100300200,100300146], 0.0055114285, 0.0104192793)), if (attribute(catid) in [0,100200171,100200130,100200034,100200186,100300005,100300008,100200068,100200234,100400142,100400038,100300065,100300045,100300006], if (attribute(catid) in [0,100200130,100200034,100300005,100400142,100300065,100300045,100300006], 0.0161701482, 0.0225059966), if (attribute(catid) in [100300013,100300143,100300004,100300122,100200087], 0.0319347909, 0.0503173002))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300013,100300077,100300166,100300143,100200034,100200186,100400141,100200052,100300102,100300005,100200172,100300008,100200068,100300032,100300027,100300116,100200053,100300019,100300004,100200054,100300073,100400038,100200192,100300212,100300122,100300127,100400079,100200170,100300169,100200087,100300074,100300066,100300007,100200176,100300045,100200028,100200067,100200232], if (attribute(catid) in [100200171,100300013,100300143,100200034,100300102,100300005,100300008,100200068,100300116,100300019,100200054,100400038,100300212,100300122,100300169,100200087,100300074,100300045,100200067,100200232], if (attribute(catid) in [100300143,100200034,100300102,100300008,100300019,100300212,100200067], -0.0135611192, if (attribute(catid) in [100300013,100200068,100300116,100200054,100300122], 0.0015450909, 0.0074784769)), if (attribute(catid) in [100200130,100300014,100300058,100200172,100300027,100200053,100300004,100300073,100200192,100200028], 0.0109788633, 0.01660535)), if (attribute(catid) in [100300165,100200234,100400142,100300065,100400080,100300076,100200185,100300006], if (attribute(catid) in [100200234,100300065,100400080,100300076], 0.0245937925, 0.0299316682), 0.0434718302)) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300077,100300166,100200186,100200052,100300093,100300005,100200172,100200068,100300027,100300116,100300121,100200234,100300019,100300004,100200054,100300073,100400037,100200192,100300065,100300212,100300209,100400079,100200170,100300074,100300066,100300200,100300045,100200067,100200055,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300011,100300014,100300077,100200186,100200172,100200068,100300027,100300116,100300121,100200234,100300019,100300073,100400037,100300212,100400079,100300200,100200067,100200055,100300006,100200232,100300214,100300146], if (attribute(catid) in [100200234,100300019,100300212,100300200,100200067,100200055,100200232,100300214], -0.0132138062, if (attribute(catid) in [100300011,100300014,100200186,100200068,100300027,100300116,100300121,100400037,100300006,100300146], 0.0019211021, 0.0071648202)), if (attribute(catid) in [0,100200171,100200130,100200052,100300093,100300005,100200054,100200192,100300065,100300209,100200170,100300066,100300045], if (attribute(catid) in [100200171,100200130,100200052,100300093,100300005,100300209,100300066,100300045], 0.0114878654, 0.0141002634), 0.0189217722)), if (attribute(catid) in [100300143,100300165,100300102,100300008,100200053,100400142,100200193,100400038,100300122,100300127,100300169,100400080,100200087,100200176,100200185], if (attribute(catid) in [100300143,100300165,100300102,100200053,100400142,100300127,100200176], 0.0249929297, 0.0287505728), if (attribute(catid) in [100300058,100200034,100400141,100200028], 0.0405244074, 0.0625787358))) +
+if (attribute(catid) in [0,100200171,100200130,100300014,100200034,100200052,100300093,100300102,100300005,100300008,100300019,100300004,100300126,100200054,100300073,100200193,100400038,100200192,100300065,100300212,100300209,100200170,100300169,100300066,100300200,100200028,100200067,100200232,100300214], if (attribute(catid) in [100300014,100200034,100300093,100300102,100300008,100300019,100300004,100300126,100200054,100300212,100300209,100300200,100200028,100200067,100200232,100300214], if (attribute(catid) in [100300014,100300008,100300019,100200054,100300209,100200067,100200232,100300214], -0.0251474123, -3.413433E-4), if (attribute(catid) in [100200130,100200052,100300073,100200192,100200170,100300169,100300066], 0.0080094607, 0.0126070285)), if (attribute(catid) in [100300011,100300077,100300166,100300143,100200186,100400141,100200068,100300116,100300121,100200053,100400142,100400037,100400079,100400080,100200087,100300074,100300045], if (attribute(catid) in [100300011,100300077,100300143,100200186,100400141,100200053,100400142,100400079,100400080,100200087,100300045], 0.0176471308, 0.0208465659), if (attribute(catid) in [100300165,100200172,100200234,100300122,100200176], 0.0268188222, 0.0367255273))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300077,100300166,100300143,100200186,100400141,100300093,100300102,100200172,100300008,100300027,100300121,100200234,100200053,100300019,100300004,100300073,100400037,100200193,100300065,100300212,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300200,100300045,100200028,100300076,100200067,100200055,100200185,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300014,100300093,100300102,100300008,100200053,100300019,100300004,100300073,100300212,100300127,100300200,100200028,100300076,100200067,100200055,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300102,100300008,100300019,100200067,100200055,100200232,100300214], -0.0254633193, 0.0030878168), if (attribute(catid) in [0,100200171,100200130,100300058,100300027,100300121,100200234,100200193,100200170,100300045], if (attribute(catid) in [100200171,100200130,100300058,100300027,100300121,100200234,100200193,100300045], 0.0097072082, 0.0114229146), 0.0156613592)), if (attribute(catid) in [100200034,100200052,100300005,100200068,100300032,100300116,100400142,100200054,100200192,100300122], 0.0227531664, 0.0305920398)) +
+if (attribute(catid) in [100300011,100200034,100300102,100300005,100200068,100300032,100300116,100300019,100300004,100200193,100300212,100300007,100300200,100200028,100200067,100200055,100300006,100300214,100300146], if (attribute(catid) in [100300011,100300102,100300005,100300032,100300019,100300004,100200193,100300200,100200067,100200055,100300214,100300146], -0.0181634396, -0.0014727477), if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300013,100300077,100200186,100400141,100200052,100200172,100300008,100200234,100200053,100400142,100300073,100400038,100200192,100300065,100300122,100200170,100300169,100200087,100300074,100200176,100300045,100300076], if (attribute(catid) in [100300014,100200186,100400141,100200052,100200234,100200053,100400038,100200192,100300065,100300169,100300074,100200176,100300045], 0.0098907776, if (attribute(catid) in [0,100200130,100300013,100300077,100200172,100300008,100400142,100300122,100200170], 0.0138164577, 0.017092541)), if (attribute(catid) in [100300166,100300143,100300165,100300093,100300027,100300127,100400079,100400080,100300066], 0.0219244924, 0.04056989))) +
+if (attribute(catid) in [0,100200171,100200130,100300058,100300143,100200034,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100200172,100200068,100300027,100300116,100200053,100300019,100300004,100300126,100400142,100300073,100400037,100200193,100400038,100200192,100300212,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100200176,100300200,100200028,100300076,100200055,100300006,100200232,100300146], if (attribute(catid) in [100300143,100300093,100300102,100300116,100300019,100300004,100200193,100400038,100300212,100400080,100200028,100200055,100300006,100200232,100300146], if (attribute(catid) in [100300019,100400038,100200028,100200055,100200232,100300146], -0.0376899039, -0.0084354615), if (attribute(catid) in [100200034,100200186,100400141,100300027,100300073,100300122,100300127,100400079,100200170,100200087,100300200], 0.0049554661, if (attribute(catid) in [0,100200171,100300058,100300165,100300005,100200172,100200053,100300126,100400142,100200192,100300169,100200176], if (attribute(catid) in [0,100300058,100200172,100200053,100300126,100400142,100200192], 0.0134283205, 0.0149300488), 0.0182669992))), if (attribute(catid) in [100300014,100300077,100300121,100200234,100200054,100300074,100300066,100200185], 0.0260635269, if (attribute(catid) in [100300011,100300166], 0.0344146236, 0.0489908315))) +
+if (attribute(catid) in [0,100200171,100300011,100300014,100300077,100300166,100200034,100200186,100400141,100300165,100300008,100300116,100200234,100300126,100400142,100300073,100200192,100300065,100300209,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100300200,100300045,100200028,100300076,100200055,100200185,100300146], if (attribute(catid) in [100300011,100300014,100200034,100400141,100300165,100300008,100300116,100200234,100200192,100300065,100400079,100300169,100200087,100300200,100200028,100300076,100200055,100300146], if (attribute(catid) in [100300014,100300008,100400079,100300200,100300076,100200055], -0.0047600269, 0.0060145343), if (attribute(catid) in [0,100300073,100300209,100400080,100300066], 0.0120012047, 0.0143999679)), if (attribute(catid) in [100200130,100300058,100300143,100200052,100300093,100300102,100200172,100200068,100300027,100300121,100300004,100200054,100400037,100400038,100300212,100300122,100300127,100300007,100200176], if (attribute(catid) in [100300093,100300102,100200172,100200068,100300027,100300121,100400037,100300122,100300127,100200176], 0.0205502481, 0.0247979152), 0.0392012352)) +
+if (attribute(catid) in [100300011,100200130,100300077,100400141,100300165,100300093,100300005,100300008,100300116,100200053,100200193,100400038,100200192,100300209,100300122,100200087,100300074,100300066,100200176,100300045,100200028,100200067], if (attribute(catid) in [100300011,100400141,100300005,100300008,100200193,100300209,100300122,100200087,100300074,100200176,100200067], if (attribute(catid) in [100300011,100200193,100300209,100200087,100200176,100200067], -0.0143613312, -0.0022635925), 0.0062470659), if (attribute(catid) in [0,100300058,100300166,100200034,100200186,100200052,100200172,100200068,100300027,100300121,100200234,100400142,100300073,100400037,100300127,100400079,100200170,100300169,100300076,100300006,100200232], if (attribute(catid) in [0,100300166,100200034,100200186,100200052,100200234,100400142,100200170,100300169,100300076,100300006], if (attribute(catid) in [100200034,100200186,100200234,100200170,100300169,100300076,100300006], 0.012173824, 0.0142409856), 0.0181125356), if (attribute(catid) in [100200171,100300014,100300013,100300019,100300004,100300126,100300065,100300212,100400080,100300007,100300200], 0.0273051157, 0.0468409464))) +
+if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300077,100300143,100200034,100200186,100400141,100300093,100300005,100200172,100200068,100300032,100300027,100300116,100300121,100200053,100300019,100300004,100200054,100300073,100400037,100200192,100300065,100300212,100300209,100400079,100200170,100300169,100400080,100300074,100300066,100200176,100200028,100200185,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300077,100300143,100200034,100300116,100300019,100300004,100400037,100300212,100300209,100300074,100200028,100200185,100300006,100300214,100300146], if (attribute(catid) in [100300143,100300019,100300004,100400037,100300212,100300209,100300214,100300146], -0.0293561273, -0.0014538622), if (attribute(catid) in [100200186,100400141,100300093,100300005,100200172,100300121,100200054,100200192,100300065,100200232], 0.0075212689, if (attribute(catid) in [0,100200171,100300058,100300032,100200053,100400079,100300169], 0.0113283464, 0.0146230806))), if (attribute(catid) in [100300013,100300166,100300165,100300008,100200234,100400142,100200193,100400038,100300122,100200087,100300007,100300076], 0.0245202493, 0.0398629845)) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300013,100200034,100200186,100300165,100200052,100300093,100300102,100200172,100300008,100300032,100300116,100200234,100200053,100300019,100300004,100400142,100300073,100400037,100200192,100300212,100300209,100300127,100400079,100300169,100200087,100300074,100300007,100300045,100200028,100200067,100300006], if (attribute(catid) in [100300011,100300008,100200234,100300019,100300004,100400142,100400037,100300209,100200087,100200028,100200067,100300006], if (attribute(catid) in [100300011,100300008,100300019,100300209,100200087,100200028,100200067,100300006], -0.012550418, 5.157971E-4), if (attribute(catid) in [0,100200171,100200130,100300014,100300165,100300102,100300116,100200053,100400079,100300169,100300074,100300007,100300045], if (attribute(catid) in [100200171,100200130,100300014,100300102,100300116,100400079,100300169,100300074,100300007,100300045], 0.0073987391, 0.0089320166), 0.0143250256)), if (attribute(catid) in [100300166,100400141,100300005,100200068,100300027,100300121,100300126,100200054,100400038,100300065,100300122,100200170,100400080,100300066,100200176,100300076,100200185], if (attribute(catid) in [100400141,100300027,100300121,100300126,100300065,100200170,100400080,100300066], 0.0203552723, 0.0282273054), 0.0491434915)) +
+if (attribute(catid) in [0,100200171,100200130,100300058,100300143,100200034,100200186,100400141,100200052,100300005,100300008,100300032,100300027,100300116,100300121,100200234,100200053,100300126,100400142,100200054,100300073,100200193,100400038,100200192,100300212,100300209,100300127,100400079,100200170,100300169,100400080,100200087,100200176,100200067,100300214,100300146], if (attribute(catid) in [100200130,100300058,100300143,100400141,100200052,100300005,100300008,100300032,100300027,100300121,100200234,100300126,100200054,100300073,100400038,100300212,100300209,100300127,100200170,100400080,100200087,100200176,100200067,100300214,100300146], if (attribute(catid) in [100300058,100300143,100300005,100300008,100300032,100300121,100200054,100300212,100300209,100200087,100200176,100200067,100300214], -0.0099342652, 0.0039806749), if (attribute(catid) in [0,100400142,100200193,100400079,100300169], 0.0115553152, 0.013542768)), if (attribute(catid) in [100300014,100300077,100300166,100200172,100200068,100300065,100300122,100300074,100300066,100300007,100300200,100300006], if (attribute(catid) in [100300077,100200172,100300065,100300066,100300200], 0.0169304303, 0.0205751961), if (attribute(catid) in [100300013,100300165,100300004,100300076], 0.0276389874, 0.0441753863))) +
+if (attribute(catid) in [100300011,100300014,100300166,100200034,100200186,100400141,100200052,100300102,100300005,100200172,100300008,100200068,100300032,100300027,100200053,100300019,100400142,100400037,100200193,100200170,100300074,100300007,100300200,100200028,100200067,100300006], if (attribute(catid) in [100200034,100300102,100300005,100300008,100200068,100300032,100300019,100400037,100300007,100300200,100200067,100300006], if (attribute(catid) in [100300005,100300008,100300032,100300019,100400037,100200067], -0.0322662364, -0.0081793105), if (attribute(catid) in [100300011,100200186,100200052,100400142,100200193,100300074,100200028], 0.0017199453, 0.0060178344)), if (attribute(catid) in [0,100300013,100300077,100300143,100300165,100300116,100300121,100200234,100300004,100300073,100400038,100200192,100300065,100300127,100400079,100300169,100400080,100300066,100200176], if (attribute(catid) in [100300013,100300077,100300143,100300165,100300116,100300121,100200234,100300073,100300065,100300169,100400080,100300066], if (attribute(catid) in [100300013,100300143,100300165,100300116,100300169], 0.0103393155, 0.0134250404), 0.0166558979), if (attribute(catid) in [100200171,100200130,100200087,100300076,100200232], 0.0241920527, 0.0369765147))) +
+if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300013,100300077,100300143,100200034,100400141,100300093,100200172,100300008,100200068,100300027,100300116,100200234,100200053,100300019,100300004,100400142,100200054,100300073,100400037,100400038,100300065,100300212,100300209,100300122,100400079,100200170,100300169,100200087,100300074,100300007,100200176,100300200,100200028,100200185,100300214,100300146], if (attribute(catid) in [100300143,100300008,100300019,100400037,100300212,100300209,100300007,100300200,100200185,100300214], -0.0243180335, if (attribute(catid) in [100300014,100300013,100200034,100400141,100200172,100200068,100400142,100200054,100400079,100200170,100300169,100200087,100300146], if (attribute(catid) in [100300014,100300013,100200034,100200068,100400142,100200054,100300146], 0.0025036422, 0.0060784676), if (attribute(catid) in [0,100200171,100200130,100200234,100200053,100300004,100300073,100400038,100200028], 0.010955047, 0.0136122663))), if (attribute(catid) in [100300011,100300166,100200186,100300165,100200052,100300102,100200192,100300127,100400080,100300066,100300045,100300076], if (attribute(catid) in [100300011,100200186,100200052,100300102,100200192,100400080,100300066,100300045,100300076], 0.0188007232, 0.0246438709), 0.0423545435)) +
+if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300077,100200186,100400141,100300093,100300102,100300032,100300121,100200234,100300126,100400142,100300073,100400037,100200193,100200192,100300122,100200087,100300074,100300066,100200176,100300200,100200028,100200067,100200055,100200185,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300093,100300032,100200234,100300126,100400037,100200176,100300200,100200067,100200055,100200185,100300006,100200232,100300214,100300146], -0.0157352941, if (attribute(catid) in [100200171,100300011,100300058,100300102,100200193,100200192,100300122,100300074,100200028], 0.0030409464, if (attribute(catid) in [0,100200186,100400141,100300121,100400142], 0.008063718, 0.0107191663))), if (attribute(catid) in [100200130,100300013,100300166,100200034,100300165,100200052,100300005,100200172,100300008,100200068,100300027,100200054,100400038,100300127,100400079,100200170,100300169,100400080,100300007,100300076], if (attribute(catid) in [100200130,100300013,100300165,100200172,100300027,100200054,100400079,100200170,100300169,100400080], if (attribute(catid) in [100300013,100200172,100300027,100400079,100200170,100400080], 0.0154977719, 0.0195741488), 0.0236445967), if (attribute(catid) in [100300116,100200053,100300065], 0.0324449764, 0.0541648949))) +
+if (attribute(catid) in [100300011,100300058,100300143,100200052,100300093,100200068,100300032,100300027,100200054,100300073,100400037,100200193,100200192,100300209,100300122,100300127,100400079,100200170,100400080,100300200,100300045,100200055,100200185,100300214,100300146], if (attribute(catid) in [100300143,100200068,100300032,100300209,100200170,100200055,100300214,100300146], -0.0241523508, if (attribute(catid) in [100300011,100300058,100200054,100200192,100300127,100400080,100300200,100300045,100200185], -9.657677E-4, 0.005124115)), if (attribute(catid) in [0,100300014,100300013,100300077,100200034,100200186,100400141,100300165,100300102,100300116,100300121,100200234,100300126,100400142,100400038,100300065,100300169,100300066,100300006], if (attribute(catid) in [0,100300014,100300013,100300077,100200034,100200186,100400141,100300102,100300116,100300126,100300006], if (attribute(catid) in [100300013,100300077,100200034,100200186,100400141], 0.0090067376, 0.0127908297), 0.0164902475), if (attribute(catid) in [100200171,100200130,100300166,100200172,100200053,100200087,100200176,100300076], 0.0228127126, 0.0418200655))) +
+if (attribute(catid) in [100200171,100200130,100300014,100300013,100300077,100300166,100200186,100400141,100300165,100200052,100200172,100300008,100200068,100300032,100300027,100300116,100200234,100200053,100300019,100300004,100400142,100300073,100400038,100300212,100300209,100400079,100300169,100400080,100300074,100300007,100200176,100300200,100200028,100200067,100300006,100300146], if (attribute(catid) in [100300013,100400141,100300032,100200053,100300019,100300004,100300212,100300209,100400080,100300007,100200176,100200028,100200067], if (attribute(catid) in [100300032,100300019,100300212,100300209,100200176,100200028,100200067], -0.038608259, -0.0063382264), if (attribute(catid) in [100300014,100300077,100200186,100300165,100300008,100200234,100400142,100300073,100400038,100300169,100300074,100300200,100300146], if (attribute(catid) in [100300077,100300008,100300073,100400038,100300074,100300146], 0.0014081125, 0.0048469355), 0.0085143275)), if (attribute(catid) in [0,100300058,100200034,100300121,100300126,100200054,100200193,100200192,100300065,100300122,100300127,100200170,100200087,100300066,100300045], if (attribute(catid) in [0,100300058,100200193,100200192,100300122,100300127,100200170,100200087], 0.0154377122, 0.0222690511), 0.0445329146)) +
+if (attribute(catid) in [100300011,100300058,100300013,100200186,100300165,100300102,100300008,100200068,100300027,100300116,100300121,100200234,100300019,100200054,100300212,100300127,100400079,100200087,100300074,100300007,100200176,100300076,100200055,100300006,100200232,100300214], if (attribute(catid) in [100300011,100300165,100300102,100300027,100300121,100300019,100200087,100300074,100200176,100200055,100300214], -0.0084086451, if (attribute(catid) in [100300013,100200186,100300008,100200234,100200054,100300212,100300127,100300006], 5.019617E-4, 0.0061914097)), if (attribute(catid) in [0,100200171,100200130,100300014,100300077,100300166,100200052,100300093,100300005,100200172,100200053,100300004,100300126,100400142,100300073,100400038,100200192,100300122,100200170,100300169,100300066,100300200,100300045], if (attribute(catid) in [100200130,100300014,100300093,100300005,100300004,100300126,100400038,100200192,100300169,100300200], 0.0114280621, if (attribute(catid) in [0,100200171,100300077,100200052,100300073,100300066], 0.0141959004, 0.0160019821)), if (attribute(catid) in [100300143,100400141,100300032,100300065,100300209,100200067], 0.0245106044, 0.0334093506))) +
+if (attribute(catid) in [100300011,100200130,100300014,100300143,100400141,100300165,100200052,100300005,100300027,100300116,100200053,100300019,100300004,100300073,100400037,100300209,100400079,100200170,100300169,100300074,100300007,100200176,100300045,100300076,100200067,100200055,100200185,100300214], if (attribute(catid) in [100300011,100300005,100300019,100400037,100300209,100200176,100200055,100200185,100300214], -0.0208490757, if (attribute(catid) in [100300143,100400141,100300116,100200053,100300004,100300073,100400079,100300074], 2.97283E-5, 0.0064274847)), if (attribute(catid) in [0,100200171,100300058,100300077,100300166,100200034,100200186,100200172,100200068,100200234,100300126,100200192,100300065,100300122,100300127,100400080,100200087,100300066,100200028,100300006], if (attribute(catid) in [0,100300077,100300166,100200186,100200172,100300122,100200087,100200028,100300006], if (attribute(catid) in [0,100300166,100300122,100200028], 0.0124031076, 0.0150987823), 0.0188364733), if (attribute(catid) in [100300093,100300102,100300008,100300121,100400142,100200054,100300200,100200232], 0.0293407993, 0.0513888162))) +
+if (attribute(catid) in [0,100200171,100200130,100300014,100300077,100400141,100300165,100200052,100300008,100200068,100300116,100300121,100200234,100200053,100300019,100300004,100300126,100200054,100300073,100400038,100200192,100300065,100300212,100300127,100400079,100200170,100300169,100400080,100200176,100300200,100300076,100200067,100200185,100200232], if (attribute(catid) in [100300014,100300008,100300121,100200234,100200053,100300019,100300212,100400080,100200176,100300200,100200067,100200185,100200232], if (attribute(catid) in [100300008,100300019,100300212,100200176,100300200,100200067,100200185,100200232], -0.0190877492, -7.557548E-4), if (attribute(catid) in [100400141,100200052,100300126,100200054,100300065,100400079,100200170,100300169], 0.0071693422, if (attribute(catid) in [0,100200130,100300073,100400038,100200192,100300127,100300076], 0.0119746374, 0.0136797362))), if (attribute(catid) in [100300011,100300058,100300166,100200034,100200186,100300093,100300005,100200172,100300027,100400142,100300122,100200087,100300066,100300045,100300006], if (attribute(catid) in [100300058,100200186,100300093,100300005,100200172,100300027,100400142,100300122,100200087,100300045,100300006], 0.0192765099, 0.0227594602), 0.0348341149)) +
+if (attribute(catid) in [100300011,100300013,100300143,100300008,100200068,100300019,100200054,100400079,100200170,100400080,100300066,100200176,100300200,100200028,100300006,100300146], if (attribute(catid) in [100300011,100300013,100300008,100300019,100200176,100200028], -0.0154031193, -7.651129E-4), if (attribute(catid) in [0,100200171,100200130,100300014,100300077,100300166,100200034,100200186,100400141,100200052,100300102,100200172,100300027,100300121,100300004,100400142,100300073,100400037,100200193,100200192,100300065,100300209,100300122,100300127,100300169,100200087,100300074,100300007,100300045,100300076,100200067], if (attribute(catid) in [100200171,100200130,100300077,100200034,100400141,100300102,100200172,100300027,100300004,100400142,100400037,100300127,100200087,100300074,100300007,100300045], if (attribute(catid) in [100200130,100400141,100300102,100200172,100300027,100300004,100400037,100300127,100300074,100300007], 0.0066677335, 0.0095565475), if (attribute(catid) in [0,100200052,100300065,100300169,100300076,100200067], 0.0113431678, 0.015684483)), if (attribute(catid) in [100300165,100300116,100200234,100200053,100300126,100400038,100200185,100200232], 0.023118108, 0.0499183157))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300013,100300077,100300166,100200034,100200186,100400141,100200052,100300093,100300102,100300005,100200172,100300032,100300027,100300121,100200234,100300004,100300126,100400142,100300073,100400037,100200193,100300065,100300209,100200170,100300169,100400080,100300066,100300200,100300045,100300076,100200055,100300006,100200232,100300146], if (attribute(catid) in [100200171,100200130,100300058,100200034,100400141,100300093,100300102,100300005,100300032,100300027,100300073,100400037,100300209,100400080,100300200,100200055,100200232,100300146], if (attribute(catid) in [100300102,100300032,100300027,100400037,100300209,100400080,100200055,100300146], -0.0210007071, if (attribute(catid) in [100200171,100300058,100200034], -0.0023338437, 0.0034194175)), if (attribute(catid) in [0,100300077,100200186,100200234,100300126,100400142,100200193,100300065,100200170,100300169,100300066,100300045], 0.0096608445, 0.0144728932)), if (attribute(catid) in [100300014,100300165,100300008,100200068,100200192,100300122,100300127,100400079,100200087,100300074,100200028,100200185], if (attribute(catid) in [100300014,100300165,100200068,100200192,100400079,100200087], 0.0193246792, 0.0247702235), 0.0360951958)) +
+if (attribute(catid) in [100300011,100300143,100200186,100400141,100300165,100300008,100300032,100300116,100300019,100300004,100300126,100400142,100300073,100400037,100400038,100200192,100300212,100300122,100300169,100400080,100300066,100300007,100300076,100200067,100200055,100200185,100300146], if (attribute(catid) in [100300143,100300032,100300019,100300126,100400038,100200067,100200055,100200185], -0.0215257824, if (attribute(catid) in [100300011,100400141,100300008,100300004,100400142,100300073,100400037,100200192,100300066,100300076,100300146], 0.0024762462, 0.0073887199)), if (attribute(catid) in [0,100200171,100300014,100300058,100300013,100300077,100300166,100200034,100300093,100200172,100200068,100300027,100200053,100300065,100300209,100300127,100400079,100200170,100200087,100300074,100300200], if (attribute(catid) in [0,100300014,100300013,100300077,100200034,100300093,100200172,100300027,100200053,100300209,100400079,100300074,100300200], 0.0122813047, 0.0177730971), if (attribute(catid) in [100200130,100300121,100200234,100200193,100200176,100200028], 0.0248372595, 0.0378712543))) +
+if (attribute(catid) in [100200171,100200186,100300165,100300102,100300005,100200068,100300032,100300121,100200234,100200053,100300019,100300004,100300126,100400142,100400038,100300212,100300122,100400079,100400080,100200087,100200067,100200055,100200232], if (attribute(catid) in [100300102,100200068,100300032,100300019,100300126,100400038,100300212,100200067,100200055,100200232], if (attribute(catid) in [100300102,100200068,100300032,100300019,100200067,100200055,100200232], -0.0209289749, -0.0056438478), if (attribute(catid) in [100200186,100300165,100300121,100200053,100300004,100400079,100400080,100200087], 0.0024863738, 0.0054172149)), if (attribute(catid) in [0,100300011,100200130,100300014,100300058,100300166,100400141,100200052,100200172,100300027,100300073,100200192,100300127,100200170,100300169,100300066,100300200,100200028], if (attribute(catid) in [0,100300014,100300058,100300166,100400141,100200052,100200192,100300066], if (attribute(catid) in [100300014,100300058,100300166,100400141,100200192], 0.0098208012, 0.0120469551), 0.0162551324), if (attribute(catid) in [100300077,100200034,100300093,100300008,100300116,100200054,100300065,100300074,100300007,100200176], 0.0250861627, 0.0414462132))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300077,100200034,100200186,100200052,100300093,100300102,100200068,100300027,100300121,100200234,100200053,100300019,100300004,100200054,100300073,100400037,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100300074,100300066,100200176,100300076,100200067,100300006,100300214], if (attribute(catid) in [100200052,100200068,100200234,100300019,100300004,100300212,100400079,100300169,100300074,100200176,100300076,100200067,100300214], if (attribute(catid) in [100200234,100300019,100300004,100300212,100300214], -0.0247332845, -0.0060255621), if (attribute(catid) in [0,100300058,100300077,100200034,100200186,100300093,100300102,100300027,100300121,100200054,100300073,100400037,100200192,100300122,100300127,100300006], if (attribute(catid) in [100300058,100200186,100300093,100300121,100300073,100400037,100200192,100300127], 0.0043627132, 0.0088967157), 0.0143359261)), if (attribute(catid) in [100300143,100200172,100400038,100200087,100300200,100200028,100200185], 0.020278194, if (attribute(catid) in [100300014,100300166,100400141,100300165,100300005,100300116,100300126,100400142], 0.0288593151, 0.045065206))) +
+if (attribute(catid) in [100200171,100300011,100300058,100300013,100300077,100300166,100200186,100300093,100300005,100300008,100300027,100300126,100400142,100200054,100300073,100400037,100400038,100200192,100300209,100300127,100400079,100300169,100400080,100200087,100300074,100200176,100200028,100200185], if (attribute(catid) in [100300011,100300013,100300008,100300027,100300126,100200054,100400038,100300209,100300127,100400080,100200185], if (attribute(catid) in [100300011,100300013,100300008,100300126,100200054,100200185], -0.0134142246, -0.002161146), if (attribute(catid) in [100200171,100200186,100300093,100400037,100400079,100200087,100300074,100200176,100200028], 0.0042397431, 0.0082573117)), if (attribute(catid) in [0,100200130,100300143,100400141,100300165,100200052,100200172,100200068,100300032,100300116,100200234,100200053,100300004,100300065,100300122,100200170,100300066,100300076,100300006], if (attribute(catid) in [0,100200130,100400141,100300116,100300004,100300076,100300006], 0.0125577031, if (attribute(catid) in [100300143,100300165,100200052,100200172,100300032,100300065], 0.015170776, 0.0177925563)), 0.0282720969)) +
+if (attribute(catid) in [0,100200171,100200130,100300014,100300013,100300077,100300166,100200034,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100200068,100300032,100300027,100300116,100300121,100200234,100200053,100300019,100300004,100300126,100400142,100300073,100400038,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100400080,100200087,100300074,100300066,100300007,100200176,100200028,100300076,100200067,100200055,100300006,100200232,100300146], if (attribute(catid) in [100200171,100300013,100300093,100300005,100200068,100300121,100200234,100300019,100300212,100300209,100200170,100200028,100200067,100200055,100300006,100300146], if (attribute(catid) in [100300013,100300005,100300121,100300212,100200067,100200055], -0.0177817471, -0.0032656602), if (attribute(catid) in [0,100200130,100300014,100300077,100200034,100200186,100300165,100200052,100200172,100300032,100300027,100300004,100200192,100300065,100300122,100300127,100400079,100300074,100300066,100300076], if (attribute(catid) in [100300014,100200186,100200052,100300122,100300066,100300076], 0.0049749252, if (attribute(catid) in [100300077,100200034,100300165,100200172,100300027,100300004,100200192,100400079,100300074], 0.0100440563, 0.0117011752)), 0.0178896771)), if (attribute(catid) in [100300058,100200054,100400037,100200193,100300169,100300200], 0.0319705253, 0.0522115674)) +
+if (attribute(catid) in [100300058,100200034,100400141,100300093,100300102,100300005,100300008,100300004,100200054,100400038,100300212,100300122,100200087,100300074,100200176,100200055,100300214], if (attribute(catid) in [100300005,100300008,100300212,100200055,100300214], -0.0403440609, if (attribute(catid) in [100300058,100200034,100300102,100300004,100200054,100400038,100200176], -0.011425447, -0.0039460534)), if (attribute(catid) in [0,100200171,100300011,100200130,100300077,100200052,100200172,100400142,100300073,100200193,100200192,100300065,100300127,100400079,100200170,100300045,100200028,100300006,100200232], if (attribute(catid) in [0,100300077,100400142,100300073,100300065,100300127,100200170,100300045,100200028,100300006], if (attribute(catid) in [100300077,100300065,100300127,100200170,100300006], 0.0038995654, 0.0096106726), 0.0136466804), if (attribute(catid) in [100300014,100300013,100300166,100300165,100200068,100300032,100300027,100300121,100200234,100200053,100300126,100400080,100300066,100300007,100300200], if (attribute(catid) in [100300013,100300166,100200068,100300027,100300121,100200234,100200053,100300126,100400080,100300007,100300200], 0.0192859199, 0.0245003908), 0.0334815162))) +
+if (attribute(catid) in [100200171,100200130,100300058,100300013,100300077,100300166,100300143,100200034,100200186,100300165,100200052,100300093,100300008,100200068,100300032,100300027,100300116,100200053,100300019,100300126,100300073,100200193,100300209,100400079,100200170,100300066,100200176,100300076,100200067,100200055,100300214], if (attribute(catid) in [100200052,100300008,100300032,100300019,100300126,100300209,100200176,100300076,100200067,100200055,100300214], if (attribute(catid) in [100300008,100300032,100300209,100200176,100300076,100200067,100200055,100300214], -0.0379621177, -0.0077279547), if (attribute(catid) in [100200171,100300058,100300013,100300077,100200034,100300027,100200053], 0.0040069447, 0.0080600184)), if (attribute(catid) in [0,100300011,100300014,100400141,100300102,100300005,100200172,100300121,100200234,100400037,100400038,100200192,100300065,100400080,100300074,100300006,100200232], if (attribute(catid) in [0,100300011,100300014,100400141,100300102,100300005,100200172,100400038], 0.0135824088, 0.0165574836), if (attribute(catid) in [100300004,100400142,100300127,100300169,100200087,100300007,100200185], 0.0214892901, 0.0327877321))) +
+if (attribute(catid) in [0,100200130,100300014,100300058,100300143,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100200068,100300032,100300121,100200234,100200053,100300004,100300126,100400142,100300073,100200193,100200192,100300065,100300212,100400079,100300169,100200087,100300074,100300066,100300007,100200176,100300200,100300045,100300076,100200067,100200055,100300214], if (attribute(catid) in [100200130,100300058,100300143,100200186,100200052,100300093,100300102,100300005,100300121,100200234,100300004,100300126,100200193,100300212,100200087,100300007,100200176,100300045,100300076,100200067,100200055,100300214], if (attribute(catid) in [100300058,100300143,100300102,100300005,100200234,100200193,100200087,100200176,100300045,100300076,100200067,100200055,100300214], -0.010705309, 3.635835E-4), if (attribute(catid) in [100300014,100300165,100200068,100200053,100400142,100300073,100300065], 0.0065823776, 0.0100610854)), if (attribute(catid) in [100300011,100300077,100300166,100200172,100400037,100300122,100300127], 0.017594626, if (attribute(catid) in [100200171,100200034,100300027,100300116,100300209,100200170,100400080,100200028,100200185], 0.0254523278, 0.0397162435))) +
+if (attribute(catid) in [100300058,100300013,100300077,100200186,100300165,100300093,100200172,100300008,100300032,100300027,100300116,100300121,100200234,100300019,100400142,100300073,100200193,100300212,100400079,100200087,100300066,100200176,100300200,100300045,100200028,100300076,100200055,100300006], if (attribute(catid) in [100300058,100300013,100200186,100300008,100300032,100300027,100300019,100400142,100300212,100200087,100200176,100300200,100300045,100200055], if (attribute(catid) in [100300008,100300032,100300019,100200176,100300200,100200055], -0.0294108915, -0.0054927303), if (attribute(catid) in [100300093,100300116,100300121,100200234,100300073,100200193,100400079,100300066,100300076,100300006], 0.0042404411, 0.0073750844)), if (attribute(catid) in [0,100200171,100200130,100300014,100300166,100200034,100400141,100300102,100200068,100200053,100300004,100200054,100200192,100300065,100300122,100300127,100300169,100400080], if (attribute(catid) in [0,100200171,100300166,100200034,100300102,100200053,100300004,100200192,100300065,100300122,100300127,100300169,100400080], if (attribute(catid) in [100200171,100300166,100300004,100300065,100300127,100300169], 0.0124777156, 0.0143158121), 0.0177029723), if (attribute(catid) in [100300011,100300143,100200052,100300126,100300074,100200067], 0.0257271811, 0.0375978662))) +
+if (attribute(catid) in [100300011,100300058,100300013,100300143,100200034,100400141,100300008,100400038,100300169,100300200,100200028,100200067,100200055,100300006,100200232,100300146], if (attribute(catid) in [100300011,100300013,100300143,100200034,100300008,100300200,100200028,100200067,100200055,100200232,100300146], -0.0179030459, -0.0025885619), if (attribute(catid) in [0,100300077,100200186,100300165,100300102,100300005,100200068,100300027,100300116,100300126,100400142,100300073,100200192,100300122,100400079,100200170,100300074,100300066], if (attribute(catid) in [0,100200186,100300165,100300005,100300027,100300116,100300126,100400079], 0.0078435164, 0.0118007064), if (attribute(catid) in [100200171,100200130,100300014,100300166,100200172,100300121,100300004,100200054,100300065,100300127,100400080,100200087,100200185], 0.0172799995, if (attribute(catid) in [100200052,100200234,100200053,100400037,100200193,100200176], 0.0276688136, 0.0491582153)))) +
+if (attribute(catid) in [100200130,100300014,100300058,100300077,100200034,100200186,100400141,100300165,100300005,100200172,100300032,100300116,100200234,100300004,100400142,100300073,100200193,100400038,100200192,100300212,100300127,100400079,100200170,100400080,100200087,100300007,100200176,100200028,100300076,100200055,100200232,100300214], if (attribute(catid) in [100200186,100300032,100300116,100300073,100400038,100300212,100400079,100200087,100300007,100300076,100200055,100200232,100300214], if (attribute(catid) in [100300032,100300212,100300076,100200055,100200232,100300214], -0.0300814303, -0.0044598873), if (attribute(catid) in [100300058,100300077,100200034,100400141,100300005,100300004,100400142,100200193,100200192,100300127,100200170,100400080,100200176], 0.0033831149, 0.0061763311)), if (attribute(catid) in [0,100200171,100300166,100300143,100200052,100300102,100200068,100300027,100200053,100400037,100300065,100300209,100300122], if (attribute(catid) in [0,100300102,100200053,100400037,100300209,100300122], 0.0108688948, 0.0140670577), if (attribute(catid) in [100300121,100200054,100300169,100300074,100300066,100300200,100200185], 0.0219282043, 0.0332033624))) +
+if (attribute(catid) in [0,100300011,100300058,100300013,100300077,100300166,100400141,100200052,100300093,100300005,100200068,100300032,100300027,100200053,100200054,100400037,100200193,100400038,100300209,100300122,100300127,100400079,100200170,100300066,100300045,100200185,100300214,100300146], if (attribute(catid) in [100300011,100300013,100300005,100300032,100300027,100200193,100300209,100300122,100200170,100200185,100300214,100300146], if (attribute(catid) in [100300011,100300005,100300032,100200193,100300209,100200185,100300214,100300146], -0.0217026454, -0.0034858812), if (attribute(catid) in [100300058,100300077,100400141,100200068,100200053,100400079,100300066], 0.00306927, if (attribute(catid) in [0,100400037], 0.0062304681, 0.0091439421))), if (attribute(catid) in [100200171,100200130,100200034,100200186,100300165,100200172,100300116,100200234,100300073,100200192,100300065,100300169,100400080,100200087,100200028,100300076,100200232], if (attribute(catid) in [100200130,100300165,100200172,100300116,100200234,100300073,100200192,100300169,100200232], 0.0117914211, 0.0156586974), if (attribute(catid) in [100300121,100300126,100400142,100300074,100300006], 0.0236611361, 0.0326251935))) +
+if (attribute(catid) in [100300013,100300008,100300032,100300121,100200234,100300004,100300126,100200054,100200193,100300212,100300122,100300127,100400079,100200087,100200176,100300045,100200028,100300076,100200067,100300006], if (attribute(catid) in [100300121,100200234,100300004,100200054,100300212,100200087,100300076,100200067], -0.0242668043, -0.0057454024), if (attribute(catid) in [0,100200171,100200130,100300058,100300166,100300143,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100200068,100300027,100300116,100400142,100300073,100400037,100400038,100200192,100300065,100200170,100300169,100400080,100300074,100300007], if (attribute(catid) in [100200130,100300166,100300143,100400141,100300165,100300093,100200172,100300027,100300116,100300073,100400038,100200170,100300169,100400080,100300074], if (attribute(catid) in [100300143,100400141,100300093,100300027,100300116,100300073,100400080], 0.0044087377, 0.0084781159), if (attribute(catid) in [0,100200068,100400142,100300065], 0.0107211296, 0.0151177012)), if (attribute(catid) in [100300011,100300014,100300077,100300102,100200053,100300209], 0.022720962, 0.0333058662))) +
+if (attribute(catid) in [100300011,100300014,100300143,100200052,100300102,100300005,100300116,100200053,100300126,100400038,100200192,100300074,100300066,100300007,100300200,100300045,100200028,100300214], if (attribute(catid) in [100300143,100300102,100300005,100300116,100200053,100300126,100400038,100300007,100200028,100300214], -0.0112761132, 5.180964E-4), if (attribute(catid) in [0,100200130,100300166,100300093,100200172,100300121,100200234,100400142,100200054,100300073,100400037,100300122,100300127,100400079,100300169,100400080,100300076,100200067], if (attribute(catid) in [0,100300093,100200172,100300121,100200054,100300073,100300122,100300127,100400079,100300169,100300076], if (attribute(catid) in [100300093,100200172,100200054,100300073,100300122,100300127,100400079,100300169,100300076], 0.0062969186, 0.0079944471), 0.0107843133), if (attribute(catid) in [100200171,100300058,100300013,100300077,100200034,100200186,100400141,100300165,100200068,100300027,100300065,100300212,100200170,100200087,100300006], 0.0156933768, 0.0301329171))) +
+if (attribute(catid) in [100200171,100300011,100300014,100300058,100300013,100300077,100200034,100400141,100200068,100300116,100200234,100300019,100400037,100200193,100400038,100200192,100300212,100300209,100300127,100300169,100400080,100200087,100300074,100300066,100300007,100200176,100200067,100200185,100300006,100300214], if (attribute(catid) in [100300011,100300013,100300116,100400038,100300212,100300209,100200087,100200176,100200067,100200185,100300214], -0.0153646097, if (attribute(catid) in [100200171,100300014,100300058,100200034,100200068,100200234,100300019,100200193,100400080,100300066,100300007], -0.0021431391, 0.0027978033)), if (attribute(catid) in [0,100200130,100300166,100200186,100300165,100200052,100300093,100300102,100300005,100200172,100300027,100300121,100200053,100300004,100300126,100400142,100200054,100300073,100300065,100300122,100400079,100200170,100300045,100200028,100300076], if (attribute(catid) in [0,100200130,100200186,100300165,100300102,100300005,100200172,100300027,100300121,100200053,100300073,100300045,100200028], if (attribute(catid) in [100200130,100300165,100300102,100300027,100300121,100200053,100300073,100300045,100200028], 0.0092257166, 0.0114031662), 0.0178845925), 0.0516414799)) +
+if (attribute(catid) in [100300032,100200234,100300212,100300209,100300045,100200055,100300146], -0.0281888364, if (attribute(catid) in [0,100200130,100300058,100300013,100300077,100300166,100300165,100200052,100200172,100200068,100300027,100300019,100300004,100400142,100400037,100200192,100300122,100400079,100200170,100300169,100400080,100300074,100200176,100200028,100300076,100300006], if (attribute(catid) in [100300058,100300077,100300165,100200068,100300019,100400037,100200192,100300122,100400079,100200170,100300169,100400080,100200028,100300006], if (attribute(catid) in [100300058,100200068,100300019,100400037,100200192,100400079,100200170,100200028,100300006], -5.203242E-4, 0.0053481381), if (attribute(catid) in [100200130,100200052,100200172,100300074], 0.0070360348, 0.0095044104)), if (attribute(catid) in [100200171,100400141,100300093,100300008,100300121,100300126,100300073,100300127,100200087,100300066], 0.0150338406, if (attribute(catid) in [100300011,100300014,100200186,100300116,100200053,100400038,100300065,100300200,100200185,100200232], 0.0219656474, 0.0303343362)))) +
+if (attribute(catid) in [100300011,100300058,100200034,100300165,100300093,100300102,100300032,100300027,100300121,100300019,100300073,100400037,100200193,100300127,100300076,100200232,100300214], if (attribute(catid) in [100300011,100300058,100200034,100300093,100300102,100300032,100200193,100300076,100200232,100300214], -0.0146956932, -0.0021492979), if (attribute(catid) in [0,100200171,100200130,100300014,100300077,100300166,100200186,100400141,100200052,100200172,100200068,100300116,100300004,100400142,100200054,100400038,100200192,100300065,100300122,100400079,100200170,100300169,100400080,100200087,100200185,100300006], if (attribute(catid) in [100200130,100300166,100200052,100200172,100200068,100300004,100400142,100400038,100300065,100400079,100400080,100200087], if (attribute(catid) in [100200130,100200172,100200068,100400142,100400079,100400080], 0.0060613479, 0.0086309145), if (attribute(catid) in [0,100200186,100400141,100300116,100200054,100300122,100300169], 0.0111211317, 0.0167951946)), if (attribute(catid) in [100300143,100300005,100200234,100200053,100300126,100300212,100300074,100300066,100300200,100200028,100200067], if (attribute(catid) in [100200053,100300126,100300074,100300066,100200028], 0.0204993878, 0.0274572562), 0.0495878122))) +
+if (attribute(catid) in [100300011,100300014,100300058,100300166,100300143,100200034,100300165,100200052,100300093,100200172,100300032,100300116,100200053,100300019,100400142,100400037,100400038,100200192,100300209,100300122,100300127,100400079,100300074,100200176,100300200,100200028,100200232], if (attribute(catid) in [100300011,100200034,100300032,100300019,100400037,100300127,100400079,100300074,100200176,100300200,100200028,100200232], if (attribute(catid) in [100300032,100300019,100400037,100300200,100200028,100200232], -0.0422634866, -0.0088083561), if (attribute(catid) in [100300014,100300143,100300165,100300093,100200172,100200192,100300209,100300122], -4.084485E-4, 0.0040562959)), if (attribute(catid) in [0,100200130,100300077,100400141,100300005,100300027,100200234,100300004,100300126,100300073,100200193,100300065,100300212,100200170,100300169,100200087,100300066,100300045,100300076], if (attribute(catid) in [100200130,100400141,100300005,100300065,100300212,100200170,100300169,100300045], 0.0081251015, if (attribute(catid) in [0,100200234], 0.0119875946, 0.0148183346)), if (attribute(catid) in [100200171,100200186,100200068,100300121,100300006], 0.0204518722, 0.0367144755))) +
+if (attribute(catid) in [100300058,100300013,100200034,100300165,100300093,100300005,100200172,100300027,100200053,100300019,100300126,100300073,100300212,100300209,100300169,100400080,100200087,100300007,100300045,100200067,100200055,100200232,100300214], if (attribute(catid) in [100300013,100200034,100300005,100300027,100300019,100300212,100200067,100200055,100200232,100300214], if (attribute(catid) in [100300013,100300005,100300019,100300212,100200055,100200232,100300214], -0.041013169, -0.0176924609), if (attribute(catid) in [100300058,100300165,100300093,100200053,100300126,100300073,100300209,100300007,100300045], -0.0030372433, 0.0030417758)), if (attribute(catid) in [0,100200171,100300077,100300166,100200186,100400141,100200052,100200068,100300116,100300121,100200054,100200192,100300065,100300122,100300127,100200170,100300066,100200028,100300076,100300006], if (attribute(catid) in [0,100300077,100300166,100200186,100200052,100300121,100300065,100300122,100300066,100300076,100300006], if (attribute(catid) in [100300077,100300166,100300121,100300122,100300066,100300076], 0.0066574572, 0.0093092556), 0.0138817683), if (attribute(catid) in [100300011,100300102,100300032,100200234,100300004,100400142,100400079,100300074,100200176,100300200], 0.021173827, if (attribute(catid) in [100200130,100300143,100200193,100400038], 0.027045199, 0.0447659217)))) +
+if (attribute(catid) in [100300014,100300013,100300008,100200068,100200234,100300004,100200193,100300212,100300209,100400080,100200087,100200176,100200028,100200067,100200055,100200185,100300006,100200232], if (attribute(catid) in [100300013,100300008,100200068,100300212,100300209,100200028,100200067,100200055,100200185], if (attribute(catid) in [100300013,100300008,100300209,100200028,100200067,100200055], -0.0410751689, -0.0211139959), -0.0060525832), if (attribute(catid) in [100200171,100200130,100300077,100300143,100400141,100200052,100300102,100200172,100300073,100400037,100300122,100300074,100300200,100300045], if (attribute(catid) in [100200171,100300077,100300143,100200052,100300102,100200172,100400037,100300045], 0.0022476773, 0.0072496833), if (attribute(catid) in [0,100300011,100300166,100200186,100300165,100300005,100300027,100300116,100200053,100300126,100300127,100400079,100200170,100300169,100300066,100300076], if (attribute(catid) in [0,100300166,100300165,100300005,100200053,100400079,100300169], if (attribute(catid) in [100300166,100300165,100300005,100200053,100400079,100300169], 0.0108065489, 0.0121894583), 0.0150333423), if (attribute(catid) in [100200034,100300032,100400142,100400038,100200192,100300065], 0.0204411972, 0.0355402269)))) +
+if (attribute(catid) in [100300058,100300143,100200034,100200186,100400141,100300093,100300102,100300027,100300004,100200054,100200193,100400038,100300065,100300212,100300209,100300007,100200028,100200067,100200185,100300146], if (attribute(catid) in [100300058,100400038,100300065,100300209,100200028,100200067,100200185], -0.0271979436, -0.0038611614), if (attribute(catid) in [0,100300014,100200052,100200172,100400142,100200192,100300122,100300127,100400079,100200170,100300074,100300076], if (attribute(catid) in [0,100200172,100400142,100200192,100300122,100200170,100300076], 0.0073159372, 0.0115841741), if (attribute(catid) in [100200171,100200130,100300013,100300166,100300008,100200068,100300116,100300121,100200053,100300126,100300073,100300169,100400080,100200087,100300066,100200176], if (attribute(catid) in [100200130,100300013,100200068,100300121,100200053,100300073,100300169], 0.013576241, 0.0164515309), 0.0222390758))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300166,100300143,100400141,100200052,100300102,100300005,100200172,100300008,100200068,100300027,100300116,100200234,100200053,100400142,100200054,100300073,100200193,100400038,100200192,100300209,100300122,100200170,100200087,100300074,100300007,100200176,100300200,100300045,100200028,100300076,100200067,100300006,100300214], if (attribute(catid) in [100300011,100300166,100300143,100300008,100400142,100300073,100200193,100300209,100300007,100200176,100300214], if (attribute(catid) in [100300143,100200193,100300007,100200176,100300214], -0.0297063352, -0.0033584809), if (attribute(catid) in [0,100200130,100400141,100200052,100200172,100300027,100300116,100200234,100200053,100200054,100400038,100200192,100300122,100200170,100300076,100300006], if (attribute(catid) in [100200130,100200052,100200172,100300027,100300116,100200234,100200053,100200054,100400038,100200192,100200170,100300006], 0.0038378464, 0.0065133022), 0.0116394129)), if (attribute(catid) in [100300014,100300077,100200034,100200186,100300165,100300121,100300004,100300126,100300065,100300127,100300169,100400080,100300066,100200185,100200232], if (attribute(catid) in [100300014,100300077,100300004,100300127,100300169,100300066,100200185,100200232], 0.0160520754, 0.0218263304), 0.0403414109)) +
+if (attribute(catid) in [100300077,100300143,100200034,100300102,100300005,100300008,100300116,100300121,100300004,100300126,100300073,100400038,100300212,100300209,100300007,100200067,100200055,100300214,100300146], if (attribute(catid) in [100300143,100200034,100300005,100300209,100200067,100200055,100300214], -0.0407918257, if (attribute(catid) in [100300102,100300008,100300116,100300004,100400038,100300007,100300146], -0.0072483912, 8.912521E-4)), if (attribute(catid) in [0,100200171,100200130,100300058,100300166,100200186,100400141,100200052,100300093,100200172,100200068,100300032,100300027,100200053,100400142,100200192,100300065,100300122,100300127,100400079,100200170,100300169,100400080,100300074,100300066,100200176,100200028,100300076], if (attribute(catid) in [0,100200171,100200130,100300058,100300166,100200186,100300093,100200172,100200053,100300122,100300127,100400079,100300169,100200176,100200028], if (attribute(catid) in [100200171,100300166,100200186,100300093,100200053,100300127,100300169,100200176,100200028], 0.0062732454, 0.0100875564), if (attribute(catid) in [100200068,100400142,100300065,100400080,100300074], 0.0120096679, 0.0160208786)), if (attribute(catid) in [100300014,100300165,100200234,100200054,100200193,100200087,100300200,100200185], 0.0234816349, 0.0499780329))) +
+if (attribute(catid) in [100300014,100400141,100300093,100300102,100300008,100200068,100300116,100200234,100200053,100300019,100300004,100400142,100400038,100300127,100200087,100300074,100200176,100300200,100300045,100200028,100200067,100300006,100200232,100300214], if (attribute(catid) in [100300102,100300008,100200068,100300019,100300004,100400038,100300127,100200176,100300200,100300045,100200067,100300006,100200232,100300214], -0.0139227136, if (attribute(catid) in [100400142,100300074], -0.0037315997, 0.0024425812)), if (attribute(catid) in [0,100200171,100200130,100300077,100300166,100300165,100200052,100300073,100300065,100300212,100400079,100300169,100300007], if (attribute(catid) in [100200171,100200130,100300077,100300166,100300165,100200052,100300212,100400079], 0.0062742131, 0.010721818), if (attribute(catid) in [100300058,100300013,100300143,100200034,100200186,100300005,100200172,100300027,100300121,100200192,100300209,100300122,100200170,100400080,100300066], 0.0153086152, 0.0249190643))) +
+if (attribute(catid) in [100300143,100200186,100200052,100300093,100300102,100300008,100300121,100200234,100200053,100300019,100300004,100400038,100300065,100300212,100200087,100300200,100200028,100200067,100200055,100200185,100300006,100300214,100300146], if (attribute(catid) in [100300143,100300102,100300008,100300121,100300019,100300004,100400038,100200067,100200055,100300006,100300214,100300146], -0.0160264772, -0.0023817409), if (attribute(catid) in [0,100300013,100300166,100200034,100300032,100400142,100300073,100200193,100200192,100400079,100200170,100300066], if (attribute(catid) in [0,100300013,100300073,100200192,100400079,100300066], 0.0078781947, 0.0111361463), if (attribute(catid) in [100200171,100300011,100200130,100300014,100300077,100300165,100200172,100200068,100300027,100300126,100200054,100300209,100300122,100300169,100400080,100300074,100200176], if (attribute(catid) in [100300011,100200130,100300014,100300077,100300165,100200054,100300209,100300122,100300169,100200176], 0.0143761703, 0.018000531), 0.0280056529))) +
+if (attribute(catid) in [100300011,100300013,100300166,100200034,100200052,100300008,100200068,100300116,100300121,100300019,100200193,100200192,100300065,100300209,100300127,100300074,100200176,100300200,100300045,100300076,100200067,100200055,100300214,100300146], if (attribute(catid) in [100300011,100200034,100200068,100300019,100300209,100200176,100300200,100300045,100300076,100200067,100200055,100300214,100300146], -0.0143446006, -5.59109E-4), if (attribute(catid) in [0,100200171,100300014,100300077,100300143,100200186,100400141,100300165,100300102,100300005,100200172,100300027,100300004,100300126,100400142,100200054,100300073,100400038,100400079,100200170,100300169,100400080,100300066,100200028,100200185,100300006], if (attribute(catid) in [0,100200171,100200186,100400141,100300165,100300102,100300005,100200172,100300027,100200054,100300073,100400038,100300169,100400080,100300066,100200028], if (attribute(catid) in [100200186,100400141,100300005,100200172,100200054,100300073,100300169,100300066], 0.0063901469, 0.0098655154), 0.0150886566), if (attribute(catid) in [100200130,100300058,100300093,100300032,100300122], 0.0218922437, 0.033132652))) +
+if (attribute(catid) in [100200171,100300011,100300013,100200034,100200186,100400141,100200052,100300093,100300102,100200172,100300008,100300032,100300027,100300116,100200054,100300073,100400037,100200193,100200170,100300066,100300007,100300200,100300045,100300076,100200055], if (attribute(catid) in [100300011,100200186,100300102,100300008,100400037,100200193,100200170,100300007,100300200,100200055], -0.0100694371, -0.0013265371), if (attribute(catid) in [0,100200130,100300014,100300058,100300166,100300165,100300005,100300121,100200234,100300004,100300126,100400142,100200192,100300065,100300212,100300122,100300127,100400079,100300169,100300074,100200028,100200067,100300006], if (attribute(catid) in [0,100300058,100300005,100300121,100200234,100300065,100300212,100300127], 0.0083645817, 0.0130866864), if (attribute(catid) in [100300077,100200068,100200053,100400080,100200087,100200176], 0.0197130039, 0.0369209199))) +
+if (attribute(catid) in [0,100200171,100300011,100300014,100300077,100300166,100300143,100200186,100400141,100200052,100300093,100300102,100300005,100300027,100200234,100200053,100300004,100200054,100300073,100400037,100200193,100200192,100300212,100300209,100300122,100300127,100400080,100200087,100300074,100300007,100300045,100300076,100200067,100200055,100200185,100300006,100300214], if (attribute(catid) in [100300011,100300143,100200052,100300093,100200054,100200193,100300209,100300074,100200067,100200055,100200185,100300006,100300214], if (attribute(catid) in [100300143,100300093,100200193,100300209,100300074,100200055,100200185,100300214], -0.0161446372, -0.0058395053), if (attribute(catid) in [0,100300014,100200186,100400141,100300102,100200234,100300004,100300073,100400037,100300212,100300122,100300127,100300045,100300076], if (attribute(catid) in [100300014,100200186,100400141,100300102,100200234,100300004,100300073,100400037,100300212,100300045], 0.001863977, 0.0041634423), 0.0073459177)), if (attribute(catid) in [100200130,100300058,100200068,100300032,100300116,100300121,100300126,100400142,100300065,100400079,100200170,100300169,100300066], 0.0148443276, if (attribute(catid) in [100200034,100300165,100200172], 0.0223682677, 0.0294343337))) +
+if (attribute(catid) in [100300011,100300143,100300102,100200234,100300019,100400037,100300074,100300066,100300045,100200067,100200055,100200185,100300006,100300214], if (attribute(catid) in [100300143,100200234,100300019,100200055,100300214], -0.024786788, -0.0075505833), if (attribute(catid) in [0,100200130,100300014,100300077,100300166,100200034,100200186,100400141,100300165,100200052,100300093,100200172,100200068,100300027,100300116,100200053,100400142,100200054,100300073,100400038,100200192,100300065,100300122,100400079,100300169,100400080,100200087,100300007,100300076], if (attribute(catid) in [0,100200130,100300077,100300166,100200034,100300165,100200052,100300093,100300027,100300116,100200053,100300073,100400079,100400080,100200087,100300076], if (attribute(catid) in [100200130,100300077,100300093,100300027,100200053,100400079,100200087,100300076], 0.0019910708, 0.0076733873), if (attribute(catid) in [100300014,100400141,100200172,100400142,100300065,100300122,100300007], 0.0110065874, 0.0140312744)), if (attribute(catid) in [100200171,100300121,100300004,100300126,100200193,100300209,100300127,100200170,100200176,100200028], 0.0200792046, 0.0371851273))) +
+if (attribute(catid) in [100300011,100300014,100300077,100300165,100300093,100300005,100200068,100300032,100300121,100200234,100300126,100200054,100400037,100200193,100400038,100300122,100200087,100200176,100300200,100300076,100200067,100200232,100300214], if (attribute(catid) in [100300014,100300165,100300093,100200068,100300032,100200193,100200087,100200176,100200067,100200232,100300214], if (attribute(catid) in [100300014,100300093,100300032,100200176,100200067,100200232,100300214], -0.0238697728, -0.0068920318), -8.192848E-4), if (attribute(catid) in [0,100200171,100200130,100300166,100200034,100200186,100400141,100200052,100200172,100300027,100300116,100200053,100400142,100300073,100200192,100300212,100300127,100200170,100300074,100300066,100300007,100200028], if (attribute(catid) in [100200130,100400141,100200052,100300027,100400142,100200192,100300127,100200170,100300074,100300066], 0.0063630817, if (attribute(catid) in [100200171,100300166,100200034,100200186,100200053,100300073,100300007], 0.0102002983, 0.011707377)), if (attribute(catid) in [100300008,100300004,100300065,100400079,100300169,100400080,100200185,100300006], 0.0199785583, 0.0332135569))) +
+if (attribute(catid) in [0,100300011,100200130,100300014,100300077,100300166,100400141,100300165,100300093,100300005,100200172,100200068,100200053,100300004,100400142,100200192,100300209,100300122,100300127,100400079,100300169,100400080,100200087,100300074,100300066,100300007,100300200,100300045,100200028,100200067,100200055,100300006,100200232], if (attribute(catid) in [100300011,100300014,100300004,100400142,100300209,100400080,100200087,100300007,100300200,100200067,100200055,100300006,100200232], if (attribute(catid) in [100300011,100300014,100300004,100300209,100400080,100300007,100300200,100200067,100200055,100200232], -0.0226068659, -0.0049292005), if (attribute(catid) in [0,100200130,100300077,100300165,100200053,100300122,100400079,100300169,100300074,100300066,100300045,100200028], if (attribute(catid) in [100200130,100300077,100300165,100400079,100300066,100300045,100200028], 0.0024587834, 0.0048477347), 0.0100107901)), if (attribute(catid) in [100200171,100300058,100300013,100200034,100200186,100200052,100300027,100300116,100200234,100300073,100400037,100200193,100400038,100300065,100200170,100300076], if (attribute(catid) in [100300013,100200186,100300116,100200234,100300073,100200193,100400038,100300065], 0.0134952529, 0.0171978597), if (attribute(catid) in [100300143,100300121,100200176], 0.0268710783, 0.0389950015))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300013,100300077,100300166,100200186,100300165,100200052,100300093,100300102,100300005,100200172,100300008,100200068,100300121,100200053,100300004,100300126,100200054,100400037,100200193,100400038,100200192,100300122,100300127,100400079,100200170,100300169,100200087,100300074,100300007,100300045,100200028,100300076,100300006,100200232,100300214,100300146], if (attribute(catid) in [100200171,100300011,100200130,100300014,100300058,100300013,100300166,100200186,100300165,100300093,100300102,100300005,100200172,100300008,100200068,100200053,100300004,100200054,100400037,100400038,100200192,100300122,100400079,100200170,100200028,100300076,100300006,100300214,100300146], if (attribute(catid) in [100300011,100300058,100300013,100300008,100300004,100200054,100400038,100200170,100200028,100300076,100300214,100300146], if (attribute(catid) in [100300058,100300013,100200054,100400038,100300214,100300146], -0.0127825599, -0.0040621004), if (attribute(catid) in [100300165,100300102,100300005,100200068,100200053,100400037,100300122,100300006], 3.831181E-4, 0.0035617568)), if (attribute(catid) in [100300077,100300169,100300074,100300045], 0.0064465003, 0.0097160619)), if (attribute(catid) in [100300143,100400141,100300032,100300027,100300116,100200234,100400142,100300073,100300065,100400080,100300066,100300200,100200185], if (attribute(catid) in [100300143,100300032,100300116,100400142,100400080,100300066,100200185], 0.0176560057, 0.0221678704), 0.0407063066)) +
+if (attribute(catid) in [100300011,100300143,100300102,100300008,100200068,100300116,100300121,100200234,100200054,100300073,100400037,100200193,100300212,100300209,100300066,100300200,100200028,100300076,100200067,100300006,100200232], if (attribute(catid) in [100300011,100300143,100300102,100300116,100200234,100200054,100400037,100300212,100300209,100300066,100300200,100200067,100300006,100200232], if (attribute(catid) in [100300143,100300102,100200054,100300212,100300209,100300200,100200067,100200232], -0.031389503, -0.0136037814), -0.0032608717), if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300077,100300166,100200186,100400141,100200052,100200172,100300032,100300027,100200053,100300004,100300126,100400038,100200192,100300065,100300122,100300127,100400079,100200170,100300169,100400080,100200176,100300045,100200185], if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300166,100200052,100300032,100200053,100300126,100200192,100300065,100300122,100300127,100400079,100200176,100300045], if (attribute(catid) in [100200130,100300014,100300166,100200052,100200192,100300065,100200176], 0.0042937725, 0.0066960091), 0.011520582), if (attribute(catid) in [100200034,100300165,100300093,100300005,100200087], 0.0194805523, 0.0346934783))) +
+if (attribute(catid) in [100300013,100300077,100300143,100200186,100300008,100300027,100300116,100300019,100300004,100300126,100300073,100400037,100200192,100300209,100300122,100200170,100300169,100300066,100300007,100300076,100200055,100300146], if (attribute(catid) in [100300143,100300008,100300019,100400037,100300122,100200055,100300146], -0.0346820188, if (attribute(catid) in [100300013,100300077,100300027,100300126,100300169,100300007,100300076], -0.0068202013, -6.852405E-4)), if (attribute(catid) in [0,100200171,100300011,100300058,100300166,100400141,100300165,100200172,100200068,100300032,100300121,100200234,100200053,100200054,100300127,100400079,100400080,100200087,100300074,100300045,100200185], if (attribute(catid) in [0,100200171,100300058,100300166,100400141,100300121,100200234,100200054,100400079,100200087,100300045], if (attribute(catid) in [100300058,100300166,100300121,100200234,100400079,100200087], 0.0054906177, 0.0089382511), 0.0137638882), if (attribute(catid) in [100200130,100200052,100300093,100400142,100200193,100400038,100300065,100200028], 0.0231575087, 0.0350257823))) +
+if (attribute(catid) in [0,100200171,100200130,100300058,100300013,100300166,100300143,100200186,100400141,100300102,100200172,100300008,100300032,100300027,100300121,100200234,100300019,100300004,100400142,100300073,100400037,100200193,100200192,100300212,100300209,100300122,100400079,100300169,100400080,100200087,100300074,100300066,100300007,100300045,100300076,100200055,100300006,100300146], if (attribute(catid) in [100200186,100300102,100300008,100300032,100200234,100200193,100300212,100300209,100300122,100200087,100300076,100200055,100300146], if (attribute(catid) in [100300102,100200234,100300212,100300209,100300076,100200055], -0.0272486986, -0.0121921962), if (attribute(catid) in [100200171,100200130,100300058,100300143,100400141,100300121,100400142,100400037,100200192,100400080,100300074,100300045,100300006], if (attribute(catid) in [100200171,100200192,100400080,100300074,100300045,100300006], -3.106606E-4, 0.0031181748), if (attribute(catid) in [0,100300013,100200172,100300027,100400079,100300066], 0.0075863782, 0.0110589536))), if (attribute(catid) in [100300011,100300077,100200034,100300165,100200052,100300093,100300005,100200068,100300126,100200054,100300065,100300127,100200170,100200185,100200232], if (attribute(catid) in [100300165,100200052,100300093,100300126,100200170,100200185], 0.0158814592, 0.021324049), 0.0395357198)) +
+if (attribute(catid) in [100300058,100300013,100300077,100300166,100200034,100300165,100200052,100300093,100300005,100200172,100300008,100200068,100300019,100300004,100200054,100400038,100300212,100400079,100200170,100300074,100300066,100200176,100300200,100200028,100300076,100200055,100200232,100300146], if (attribute(catid) in [100300013,100300008,100200068,100300019,100400038,100200055,100200232,100300146], -0.0281903745, if (attribute(catid) in [100300058,100200034,100300165,100200052,100300005,100300004,100300212,100400079,100200170,100300074,100200176,100300200,100200028], -0.0038124936, 0.0031637671)), if (attribute(catid) in [0,100200171,100200130,100300143,100200186,100400141,100300027,100300116,100200234,100200053,100300126,100400142,100300073,100200193,100200192,100300065,100300122,100300127,100300169,100400080,100300045,100300006], if (attribute(catid) in [0,100200171,100200130,100300143,100200186,100400141,100300027,100300116,100200234,100300126,100400142,100200193,100200192,100300122,100300045,100300006], if (attribute(catid) in [100200171,100200130,100400141,100300027,100300116,100200234,100400142,100200193,100300122,100300045,100300006], 0.0097327734, 0.0117883652), 0.0163276141), 0.037807689)) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300013,100300166,100300143,100200186,100400141,100300165,100300093,100300102,100300008,100200068,100300032,100300116,100300121,100200234,100200053,100300126,100400142,100300073,100200193,100200192,100300212,100300127,100400079,100400080,100200087,100300066,100300007,100200176,100300200,100300045,100200028,100200055,100200185,100300006], if (attribute(catid) in [100200171,100300011,100300013,100300102,100300008,100200068,100300032,100300116,100300121,100200234,100300126,100300127,100400079,100400080,100200176,100300045,100200028,100200055,100200185], if (attribute(catid) in [100300011,100300013,100300102,100300008,100300032,100300116,100300121,100300126,100300127,100200176,100200055,100200185], -0.0183851919, -0.0026357282), if (attribute(catid) in [100200130,100300166,100200186,100400141,100400142], 0.0029870563, if (attribute(catid) in [100300143,100200053,100300073,100200193,100200192,100200087,100300066,100300200], 0.0070772264, 0.0091929568))), if (attribute(catid) in [100300058,100300077,100200052,100300005,100200172,100300027,100300019,100300004,100400037,100400038,100300065,100200170,100300169,100300076,100200067], 0.0192905696, 0.0373801828)) +
+if (attribute(catid) in [100200171,100200130,100300014,100300013,100200186,100300165,100200052,100300102,100300008,100200068,100300027,100300116,100300004,100200192,100300065,100300212,100300122,100300127,100200170,100300074,100200176,100300045,100300076,100200055,100200185,100300214], if (attribute(catid) in [100300102,100300008,100300212,100200170,100300074,100200055,100300214], -0.0234366968, if (attribute(catid) in [100300014,100300013,100200186,100300165,100200068,100300116,100300127,100300076,100200185], -0.0041768475, 0.0012159251)), if (attribute(catid) in [0,100300011,100300058,100300077,100200034,100400141,100200172,100300032,100300121,100200234,100300126,100400142,100300073,100400037,100400079,100300169,100200087,100300200,100200028,100300006], if (attribute(catid) in [0,100300077,100200034,100200172,100200234,100300073,100400037,100300169,100300200,100200028], if (attribute(catid) in [100300077,100200172,100200234,100400037,100300169,100300200], 0.0073379486, 0.0101628542), 0.0145361756), if (attribute(catid) in [100300166,100300093,100300005,100400080,100300066,100200232], 0.0211367281, 0.0344431588))) +
+if (attribute(catid) in [0,100300011,100200130,100300014,100300077,100300143,100200034,100400141,100300165,100300093,100200172,100300032,100300027,100300116,100200053,100300004,100300126,100200054,100400037,100200193,100400079,100200170,100400080,100200087,100300074,100300066,100300007,100200176,100300045,100200067,100200055,100200185,100300006,100300214,100300146], if (attribute(catid) in [100300011,100300014,100300143,100300093,100200053,100200054,100400037,100300007,100200176,100200067,100200055,100200185,100300214,100300146], if (attribute(catid) in [100300011,100300093,100400037,100300007,100200176,100200067,100200055,100300214,100300146], -0.0290230654, -0.0106383395), if (attribute(catid) in [100200034,100300165,100200172,100300116,100300126,100400079,100200170,100400080,100300074,100300045,100300006], -0.0013386003, if (attribute(catid) in [0,100300027,100200087,100300066], 0.0034896642, 0.0060074974))), if (attribute(catid) in [100200171,100300058,100300166,100200186,100200052,100300005,100200068,100300121,100400142,100300073,100200192,100300065,100300122,100300127,100300169,100300200], if (attribute(catid) in [100200186,100200052,100300005,100400142,100300073,100300122,100300169], 0.0118090198, 0.018597715), if (attribute(catid) in [100300008,100200234,100400038], 0.0272065563, 0.0476478756))) +
+if (attribute(catid) in [100300011,100300013,100200034,100400141,100200052,100300005,100300008,100200068,100300027,100300116,100200234,100300019,100300004,100400142,100300073,100400038,100300209,100300127,100400079,100200170,100300066,100300007,100200176,100300045,100300076,100200055,100300214,100300146], if (attribute(catid) in [100300013,100300008,100300019,100300209,100200176,100200055,100300214,100300146], -0.0323268404, if (attribute(catid) in [100300011,100200052,100300005,100200068,100300004,100300066,100300007], -0.0057248097, if (attribute(catid) in [100200234,100400142,100300073,100400079,100300076], -0.0010770901, 0.0026132947))), if (attribute(catid) in [0,100200130,100300077,100300166,100200186,100300165,100200172,100300121,100200053,100300126,100200193,100200192,100300065,100300122,100300169,100400080,100300074], if (attribute(catid) in [100200130,100300166,100200186,100300165,100200172,100200053,100300126,100300169,100300074], 0.0070056177, 0.0111605097), if (attribute(catid) in [100200171,100300014,100300143,100200054,100200087,100300200,100200028], 0.0189654625, 0.0388492541))) +
+if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300077,100400141,100300165,100200052,100300093,100300102,100300005,100200172,100300027,100300116,100300121,100200053,100300019,100400142,100300073,100400037,100200192,100300065,100300127,100400079,100200170,100300169,100400080,100300074,100300066,100300007,100300045,100200028,100200067,100200185,100200232,100300146], if (attribute(catid) in [100300011,100300058,100300102,100300005,100300027,100200053,100300019,100300066,100300007,100200028,100200067,100200232,100300146], if (attribute(catid) in [100300011,100300102,100300005,100300019,100300007,100200028,100200067,100200232,100300146], -0.0351424027, -0.0101917869), if (attribute(catid) in [0,100300014,100300165,100300093,100300116,100300073,100300127,100200170,100300169,100400080,100300045,100200185], if (attribute(catid) in [0,100300093,100300116,100300073,100200170,100400080,100300045,100200185], 0.0023576953, 0.0043646024), if (attribute(catid) in [100200171,100200052,100200172,100400142], 0.0064763216, 0.0093043399))), if (attribute(catid) in [100200130,100300013,100300166,100300143,100200034,100200186,100300008,100300032,100200234,100300004,100300126,100200054,100400038,100300122,100200176,100300200,100300006], if (attribute(catid) in [100200130,100300166,100300143,100300032,100300004,100300126,100200054,100400038,100300122,100200176,100300200], 0.015361415, 0.0203450573), 0.0370146837)) +
+if (attribute(catid) in [100200171,100300011,100200034,100200186,100300093,100300102,100300008,100300032,100300121,100300126,100300122,100200087,100300200,100300076,100200067,100200055,100200185,100300006,100300214,100300146], if (attribute(catid) in [100300093,100300008,100300032,100300076,100200067,100200055,100200185,100300214,100300146], -0.0283721448, if (attribute(catid) in [100200171,100300011,100200034,100200186,100300102,100300200,100300006], -0.0097302345, -0.0016235117)), if (attribute(catid) in [0,100200130,100300058,100300013,100300143,100400141,100300165,100200052,100300005,100200068,100300027,100200053,100300004,100400142,100300073,100400037,100200193,100200192,100300212,100300127,100200170,100300169,100300074,100300066,100300045,100200028,100200232], if (attribute(catid) in [0,100300058,100300013,100300143,100400141,100200052,100200068,100200053,100300073,100400037,100200193,100200192,100300127,100200170,100300074,100300045], if (attribute(catid) in [100300058,100200052,100200068,100200053,100300073,100400037,100200193,100200192,100300127,100200170,100300074,100300045], 0.0055504107, 0.0071774828), 0.0090674985), if (attribute(catid) in [100300014,100300077,100300166,100200054,100400038,100300065], 0.0138239288, if (attribute(catid) in [100200172,100200234,100400079,100400080], 0.0189710965, 0.0320267193)))) +
+if (attribute(catid) in [100200171,100300014,100300058,100300013,100300166,100400141,100200172,100300008,100200068,100300121,100200234,100200053,100300019,100300004,100300126,100400037,100200192,100300212,100300209,100300127,100400080,100200087,100300074,100200176,100300076,100200055,100300214], if (attribute(catid) in [100300014,100300058,100300013,100300008,100200068,100200053,100300019,100300212,100300209,100300127,100400080,100200087,100200055,100300214], if (attribute(catid) in [100300013,100300008,100300019,100300212,100200087,100200055,100300214], -0.0279752223, -0.0097745433), if (attribute(catid) in [100200171,100400141,100200234,100300004,100400037,100300074,100200176,100300076], -0.0015465151, 0.0023515763)), if (attribute(catid) in [0,100200130,100300143,100200034,100200052,100300027,100400142,100200054,100300073,100200193,100400038,100300065,100400079,100300169,100300045,100200028], if (attribute(catid) in [100200130,100300143,100200034,100300027,100400142,100200054,100300073,100400038,100300065,100400079,100300169], 0.0070041173, 0.0113800549), if (attribute(catid) in [100300077,100200186,100300165,100300102,100300116,100300122,100200170,100300066,100200185,100200232], 0.0215968499, 0.0448418659))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300013,100300077,100300166,100200186,100300165,100300093,100300102,100300008,100300032,100300027,100300116,100200234,100300004,100400142,100300073,100400037,100200193,100200192,100300065,100300122,100300127,100400080,100300007,100300200,100300045,100200028,100300076,100200185,100300006,100200232], if (attribute(catid) in [100300011,100200130,100300014,100300058,100300013,100300032,100300065,100300007,100300200,100300045,100200028,100300076,100200185,100300006], -0.007403056, if (attribute(catid) in [0,100200186,100300165,100300102,100300008,100300027,100300116,100200234,100400037,100200192,100300122,100400080], if (attribute(catid) in [100300165,100300008,100300027,100200234,100400037,100200192,100300122,100400080], 0.0018774622, 0.0037801837), 0.0085665512)), if (attribute(catid) in [100200034,100400141,100200052,100200172,100200054,100400038,100400079,100200170,100300169,100200087,100200067], if (attribute(catid) in [100200034,100400141,100400038,100400079,100200087,100200067], 0.0118341371, 0.0154556164), if (attribute(catid) in [100200068,100200053,100300126,100300212,100300074,100300066], 0.0234779795, 0.0334980927))) +
+if (attribute(catid) in [100200171,100300058,100300013,100300143,100200186,100200052,100300102,100300008,100300116,100300121,100200234,100300019,100300004,100200054,100200193,100400038,100300212,100300209,100300127,100200170,100200176,100200067,100200055], if (attribute(catid) in [100300013,100300102,100200234,100400038,100300212,100300209,100200176,100200067,100200055], if (attribute(catid) in [100300013,100300102,100400038,100200176,100200067,100200055], -0.0390121048, -0.0143867192), if (attribute(catid) in [100300143,100200186,100300008,100300116,100300121,100300004,100200054,100200193], -0.0044006932, 0.0015511826)), if (attribute(catid) in [0,100200130,100300077,100300166,100200034,100400141,100300165,100300093,100300005,100200172,100200068,100300027,100200053,100400142,100300073,100200192,100300065,100300122,100400079,100300169,100400080,100200087,100300074,100300066,100300007,100300200,100200028,100300076,100200185,100200232], if (attribute(catid) in [100200130,100300077,100300166,100200034,100400141,100300165,100300005,100200172,100200068,100200053,100400142,100300073,100300065,100400079,100300169,100400080,100200087,100300074,100200185,100200232], if (attribute(catid) in [100200130,100400141,100200068,100400142,100300073,100300065,100400079,100300169,100400080,100300074,100200185], 0.0060636247, 0.0088886465), if (attribute(catid) in [0,100300093,100300007], 0.0120697556, 0.0177577497)), 0.0377473017)) +
+if (attribute(catid) in [100300014,100300077,100200034,100300165,100300093,100300005,100300008,100200053,100300004,100300126,100400142,100300212,100300209,100300169,100300200,100300076,100200067,100200185,100300146], if (attribute(catid) in [100200034,100300093,100300005,100300004,100300126,100400142,100300209,100300076,100200067,100200185,100300146], -0.0137318062, -0.001492581), if (attribute(catid) in [0,100300011,100200130,100300058,100300166,100200186,100400141,100200068,100300027,100200234,100200054,100300073,100200193,100200192,100300065,100300122,100400079,100200170,100400080,100300074,100200028], if (attribute(catid) in [0,100300058,100200186,100400141,100200193,100200192,100400080,100200028], 0.0060199161, 0.0102196174), if (attribute(catid) in [100200171,100200052,100200172,100300032,100300116,100400038,100200087,100300066], 0.0151193734, 0.0263768697))) +
+if (attribute(catid) in [0,100300058,100200034,100200052,100300093,100300102,100300005,100300008,100300027,100300004,100300126,100400142,100300073,100300212,100400079,100400080,100300007,100200176,100300200,100300045,100300076,100200067,100200185,100300146], if (attribute(catid) in [100200034,100200052,100300102,100300008,100300027,100300126,100300007,100200176,100200067,100200185,100300146], if (attribute(catid) in [100300102,100300008,100300027,100300126,100200067,100200185,100300146], -0.0197844476, -0.0061589619), if (attribute(catid) in [100300058,100400142,100300212,100400079,100400080,100300200,100300045,100300076], -9.327577E-4, 0.0041966425)), if (attribute(catid) in [100200171,100200130,100300077,100300166,100300143,100200186,100400141,100300165,100200172,100200068,100300032,100300116,100300121,100200053,100400038,100200192,100300065,100200170,100300169,100300074,100300066,100200028,100300006], if (attribute(catid) in [100200171,100200130,100300077,100400141,100300165,100200172,100300032,100300116,100300121,100300065,100200170], 0.0090658634, 0.0131790639), if (attribute(catid) in [100300011,100300013,100400037,100300122,100300127,100200087], 0.0204665481, 0.0306124846))) +
+if (attribute(catid) in [0,100200171,100200130,100300058,100300013,100300077,100300166,100200052,100300005,100200172,100300008,100300116,100200234,100400142,100200054,100300073,100400037,100200193,100200192,100300212,100300209,100300122,100300127,100200170,100300169,100300074,100300007,100200176,100300200,100300045,100200055,100200185,100300006,100200232,100300214,100300146], if (attribute(catid) in [100200130,100300013,100300077,100300005,100300008,100300116,100200234,100200054,100300073,100200193,100300212,100300209,100300122,100200170,100300007,100300045,100200055,100200185,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300013,100300116,100200234,100200054,100200193,100300212,100300209,100300007,100200055,100200185,100300006,100200232,100300214,100300146], -0.0108827313, -5.017785E-4), if (attribute(catid) in [0,100300058,100300166,100200172,100400037,100200176], if (attribute(catid) in [100300058,100300166,100200172,100400037], 0.0023128525, 0.0053288841), 0.0080470055)), if (attribute(catid) in [100300014,100300143,100200034,100200186,100400141,100300165,100200068,100300032,100300027,100300121,100200053,100300004,100300065,100400079,100200087,100300066], if (attribute(catid) in [100300014,100300143,100200034,100400141,100300165,100300032,100200053,100300004,100300065,100400079,100300066], 0.0118541533, 0.0171167195), if (attribute(catid) in [100300093,100300126,100400038,100400080], 0.0252859922, 0.0427399285))) +
+if (attribute(catid) in [100200171,100300011,100200130,100300058,100300166,100200034,100200186,100400141,100300093,100300005,100300008,100200068,100300116,100200053,100300126,100400142,100200054,100300073,100200193,100300065,100400079,100300169,100400080,100300074,100300007,100200176,100300045,100200028,100200067,100300006,100300214,100300146], if (attribute(catid) in [100300011,100300058,100200034,100200186,100300005,100300008,100200054,100400080,100300074,100300007,100200176,100200067,100300214,100300146], if (attribute(catid) in [100200034,100200054,100400080,100300074,100300007,100200067,100300214,100300146], -0.0139396834, -0.0053561842), if (attribute(catid) in [100200171,100200130,100300093,100200068,100200053,100300126,100400142,100400079,100300045,100300006], -0.0012208885, 0.0034171063)), if (attribute(catid) in [0,100300014,100300013,100300143,100300165,100200052,100300102,100200172,100300032,100300121,100400037,100400038,100200192,100300122,100300127,100200170,100200087,100300066,100300200], if (attribute(catid) in [0,100300143,100300165,100200052,100300102,100300032,100300121,100400037,100400038,100300122,100300127,100200170], if (attribute(catid) in [100300143,100200052,100300102,100300032,100400037,100400038,100200170], 0.0071786267, 0.0105454236), 0.0151332724), 0.0271687303)) +
+if (attribute(catid) in [0,100300011,100200130,100300058,100300013,100300077,100300166,100300143,100200034,100200052,100200172,100300032,100300121,100300019,100400142,100300073,100200193,100400038,100300065,100300209,100300122,100300127,100400079,100300169,100400080,100200087,100300007,100200176,100200028,100300076,100200067,100300006], if (attribute(catid) in [100300011,100200034,100300019,100200193,100400038,100300209,100300122,100300007,100200176,100200028,100200067,100300006], -0.0107750803, if (attribute(catid) in [100400142,100300073,100400079,100400080,100300076], 4.14432E-5, if (attribute(catid) in [0,100200130,100300077,100300166,100300143,100200172], 0.0037799336, 0.0062373044))), if (attribute(catid) in [100200171,100200186,100300165,100300102,100300005,100300027,100300116,100200234,100300004,100300126,100200054,100400037,100200170,100300074,100300066], if (attribute(catid) in [100200171,100200186,100300165,100200234,100300074], 0.010552703, 0.0153207486), if (attribute(catid) in [100300014,100300093,100200068,100200053,100200192,100300200,100200232], 0.0240361458, 0.0348297568))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300013,100300166,100200034,100200186,100400141,100300165,100200052,100200172,100200068,100300032,100300027,100300116,100300121,100200053,100300004,100300126,100400142,100200054,100300073,100400037,100200193,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100300074,100300066,100300007,100200176,100300200,100300045,100200028,100300076,100200067,100200055,100300006,100200232,100300146], if (attribute(catid) in [100200130,100300014,100300058,100200034,100300165,100200068,100200054,100300209,100200170,100400080,100300074,100300007,100200176,100200055,100300006,100200232,100300146], if (attribute(catid) in [100300058,100200068,100200054,100300209,100200176,100200055,100200232,100300146], -0.0154543001, if (attribute(catid) in [100200034,100300165,100200170,100300007,100300006], -0.0053315859, -8.526404E-4)), if (attribute(catid) in [0,100300011,100300013,100400141,100200052,100200172,100300121,100300004,100400142,100300073,100400037,100200193,100200192,100300065,100300122,100300127,100400079,100300066,100300200,100300045,100200028,100300076], if (attribute(catid) in [100300011,100300013,100400141,100300121,100400142,100400037,100200193,100200192,100300065,100300122,100300127], 0.002970819, 0.0066974843), if (attribute(catid) in [100200171,100300027,100200053,100300169], 0.0109293572, 0.0151670383))), if (attribute(catid) in [100300077,100300093,100300102,100300005,100300008,100200087], 0.0226972124, 0.0401505411)) +
+if (attribute(catid) in [100200130,100300014,100300077,100200034,100300165,100300102,100300032,100300116,100300121,100200234,100200053,100400038,100300212,100300127,100400080,100200087,100300074,100300007,100200028,100200067,100200055,100200185,100300006], if (attribute(catid) in [100300014,100300032,100400038,100300212,100200087,100300007,100200055,100200185], -0.0294546465, if (attribute(catid) in [100300165,100300121,100200234,100300127,100400080,100200028,100200067,100300006], -0.0041076181, -3.32312E-4)), if (attribute(catid) in [0,100300011,100300058,100300166,100200186,100300093,100300005,100200172,100300008,100200068,100300073,100400037,100200192,100300122,100400079,100300169,100300066,100200176,100300076], if (attribute(catid) in [0,100300011,100300058,100300093,100300005,100200172,100200068,100300073,100200192,100400079,100300169,100300076], if (attribute(catid) in [100300011,100300058,100300093,100300005,100200172,100200068,100300073,100200192,100400079,100300169], 0.006046367, 0.0075872041), 0.0099717066), if (attribute(catid) in [100200171,100400141,100200052,100300004,100300126,100400142,100200054,100300065,100200170], 0.0147808083, 0.0271833064))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300013,100300077,100300166,100300143,100200034,100200186,100400141,100300165,100200052,100300093,100300102,100200172,100300008,100300032,100300027,100300121,100200234,100200053,100300019,100300004,100300073,100400037,100400038,100200192,100300065,100300212,100300122,100300127,100200170,100300169,100400080,100200087,100300074,100300066,100300007,100200176,100300200,100200028,100200067], if (attribute(catid) in [100300014,100300013,100300077,100300143,100200186,100300093,100300102,100300008,100300032,100200053,100300019,100400037,100400038,100300212,100300007,100200028,100200067], if (attribute(catid) in [100200186,100300032,100300019,100300212,100200067], -0.0346775988, -0.0084286391), if (attribute(catid) in [100200034,100400141,100200052,100200172,100200234,100300073,100300065,100200170,100200087,100300074,100200176], 0.0021428485, if (attribute(catid) in [0,100200171,100200130,100300166,100300165,100300027,100200192,100300122,100300169,100300066,100300200], 0.006946403, 0.0110205277))), if (attribute(catid) in [100300058,100200068,100300116,100300126,100400142], 0.0228118568, 0.0343825826)) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300077,100300143,100200186,100400141,100200052,100300093,100300102,100300005,100200068,100300032,100300027,100300121,100200234,100200053,100300126,100400142,100300073,100400037,100200192,100300065,100300209,100300127,100400079,100200170,100400080,100200087,100300074,100300066,100300007,100200176,100300045,100200028,100300076,100200055,100200185,100300006,100200232], if (attribute(catid) in [100300011,100200186,100300102,100300005,100200068,100300032,100300121,100200053,100300126,100400037,100300209,100300007,100300045,100200028,100300076,100200055,100200185,100300006], if (attribute(catid) in [100300011,100300102,100300005,100300121,100300126,100400037,100300209,100300076,100200055,100200185,100300006], -0.0217391763, -0.0085505926), if (attribute(catid) in [100200171,100200130,100300014,100300143,100400141,100200052,100300027,100200234,100300073,100200192,100300127,100200170,100200087,100300074,100300066,100200176], if (attribute(catid) in [100200171,100200130,100300143,100200052,100200170,100300074,100200176], 1.581819E-4, 0.0028254989), 0.0069841416)), if (attribute(catid) in [100300013,100300166,100300165,100200172,100300008,100300116,100300004,100200054,100400038,100300212,100300122,100300169,100300200,100200067], if (attribute(catid) in [100200172,100300116,100300004,100300122,100300169,100300200], 0.0144252893, 0.0197908458), 0.04284419)) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300077,100300166,100300143,100200186,100400141,100200052,100300005,100200172,100300008,100200068,100300027,100300116,100200234,100300019,100300126,100300073,100400037,100200193,100200192,100300209,100300127,100400079,100200170,100300169,100300066,100300007,100200176,100300200,100200028,100300076,100200185,100300006,100300146], if (attribute(catid) in [100300011,100300014,100300143,100300005,100300027,100200234,100300126,100300209,100300066,100300007,100200176,100300200,100200185,100300146], if (attribute(catid) in [100300011,100300005,100300126,100300209,100300007,100200176,100200185], -0.0278752616, -0.0114872053), if (attribute(catid) in [100200171,100300058,100300077,100300166,100200172,100300008,100200068,100300116,100200193,100400079], if (attribute(catid) in [100300058,100200172,100200068,100300116,100400079], -0.0025940117, 5.790855E-4), if (attribute(catid) in [0,100400141,100300019,100300073,100200192,100300076], 0.005097188, 0.0074013229))), if (attribute(catid) in [100200034,100300093,100300121,100200053,100300004,100400142,100200054,100400038,100300065,100300122,100400080,100300074], if (attribute(catid) in [100300093,100300121,100200053,100300004,100400142,100300065,100400080], 0.0112491051, 0.0165554655), if (attribute(catid) in [100300165,100200087,100200232], 0.025263808, 0.039519728))) +
+if (attribute(catid) in [0,100200130,100300058,100300013,100200034,100400141,100300165,100300093,100300102,100200172,100300008,100200068,100300032,100300116,100300121,100200234,100200053,100300004,100300126,100400142,100300073,100200193,100200192,100300212,100300209,100300127,100400079,100200170,100300169,100200087,100300066,100300200,100300045,100200028,100300076,100200067,100200055], if (attribute(catid) in [100300093,100300008,100300032,100300121,100200234,100300004,100300212,100300169,100200087,100300045,100200067,100200055], if (attribute(catid) in [100300032,100200234,100300212,100200087,100200055], -0.0204195189, -0.0097651462), if (attribute(catid) in [0,100200130,100300058,100300165,100300126,100400142,100200192,100400079,100300066,100300200,100200028,100300076], 0.0029821664, 0.007455649)), if (attribute(catid) in [100200171,100300077,100300166,100200186,100300027,100200054,100400038,100300065,100300122,100300007,100200232], 0.0123063395, if (attribute(catid) in [100300014,100200052,100300005,100400037,100400080], 0.021957365, 0.0390618799))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300013,100300077,100300166,100300143,100200034,100200186,100400141,100200052,100300093,100300102,100300005,100200172,100200068,100300027,100300116,100300121,100200053,100300019,100300126,100200054,100300073,100400037,100200193,100200192,100300122,100400079,100200170,100300169,100200087,100300074,100300066,100300007,100300200,100300045,100200028,100200067,100300006,100200232,100300214], if (attribute(catid) in [100300011,100300166,100300143,100400141,100300093,100300005,100200172,100200068,100300116,100300121,100300019,100300126,100200054,100300073,100200193,100200087,100300074,100300007,100300200,100300045,100200028,100200067,100200232,100300214], if (attribute(catid) in [100300143,100400141,100300116,100300019,100300126,100200054,100300074,100200067,100200232,100300214], -0.0084514959, -0.0026252948), if (attribute(catid) in [0,100200130,100300013,100200034,100300027,100200053,100400037,100200192,100200170,100300169,100300006], 0.0047022442, 0.0075241336)), if (attribute(catid) in [100300032,100200234,100300004,100400142,100400038,100300065,100300212,100300127,100400080,100300076], 0.0167624654, if (attribute(catid) in [100300165], 0.0213460841, 0.0385220165))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300077,100300166,100300143,100200034,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100200068,100300032,100300027,100300116,100200234,100200053,100300126,100200054,100300073,100200193,100400038,100200192,100300065,100300209,100300122,100300127,100400079,100300169,100400080,100200087,100300066,100300007,100200176,100300045,100200028,100200055,100300006], if (attribute(catid) in [100300011,100300166,100300143,100300093,100200068,100300032,100200054,100400038,100300209,100300127,100400079,100200087,100200176,100300045,100200028,100200055,100300006], if (attribute(catid) in [100200068,100300032,100300209,100200176,100200028,100200055], -0.0269726448, -0.0065892436), if (attribute(catid) in [0,100200171,100200130,100300058,100300005,100200172,100300027,100300116,100200053,100300073,100200193,100200192,100300065,100300122,100300169,100400080,100300066,100300007], if (attribute(catid) in [100200171,100200130,100300005,100200172,100300027,100300073,100200193,100200192,100300122,100300169,100300007], 0.0012062164, 0.0058063938), 0.011591703)), if (attribute(catid) in [100300014,100300121,100300004,100400142,100200170,100300074,100300076,100200067,100300146], 0.0227008484, 0.0502957296)) +
+if (attribute(catid) in [0,100200171,100300013,100300166,100200186,100300093,100200172,100300008,100300032,100300027,100300116,100200234,100300004,100400142,100200193,100400038,100300212,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100300066,100300007,100300200,100300045,100200028,100300006,100200232], if (attribute(catid) in [100300013,100300032,100200193,100300212,100300127,100300007], -0.031900249, if (attribute(catid) in [100200171,100300166,100300093,100200172,100300008,100200234,100300004,100400142,100400038,100300169,100200087,100300066,100300200,100300045,100300006,100200232], if (attribute(catid) in [100300166,100300093,100300008,100200234,100300004,100400142,100200087,100300066,100300200,100300045,100300006], -0.0040077306, 5.434632E-4), 0.004429589)), if (attribute(catid) in [100200130,100300058,100300077,100300143,100400141,100300165,100300102,100300005,100300121,100200054,100300073,100200192,100300065,100300209,100200176,100300076,100200185], if (attribute(catid) in [100300058,100300077,100300102,100300005,100300073,100300065,100300209], 0.0088697865, 0.0120553652), if (attribute(catid) in [100200034,100200052,100200068,100200053,100300126], 0.0200329832, 0.0315067216))) +
+if (attribute(catid) in [100300011,100200130,100300058,100300013,100300165,100200052,100300005,100200172,100300008,100200068,100300027,100300116,100200234,100200053,100300126,100400142,100200054,100300073,100400037,100300122,100300127,100200170,100300169,100300074,100300007,100300045,100200028,100200185,100300146], if (attribute(catid) in [100300011,100300058,100300013,100300165,100200052,100300008,100200054,100300074,100300007,100200185,100300146], if (attribute(catid) in [100300011,100300013,100300008,100200054,100300007,100300146], -0.0282159404, -0.0102832725), if (attribute(catid) in [100200068,100300116,100200234,100300126,100300073,100400037,100300122,100300127,100300045,100200028], -0.0021058619, 0.0025428121)), if (attribute(catid) in [0,100200171,100300014,100300077,100300166,100300143,100200186,100400141,100300032,100300121,100300004,100400038,100300065,100400079,100300066,100200067,100300006], if (attribute(catid) in [0,100300014,100300077,100200186,100300032,100300121,100300004,100400038,100300066,100300006], 0.009136363, 0.0165605827), if (attribute(catid) in [100200034,100200192,100200087,100200176,100300200], 0.0258553552, 0.0438879554))) +
+if (attribute(catid) in [0,100300011,100200130,100300014,100300077,100300166,100300143,100200034,100400141,100200052,100300093,100300102,100300005,100300008,100300121,100200053,100300004,100300126,100400142,100400037,100200193,100400038,100200192,100300065,100300212,100300122,100300127,100400079,100200170,100400080,100300066,100200176,100200028,100200067,100200055,100300146], if (attribute(catid) in [100300011,100200130,100300014,100300143,100200034,100300093,100300008,100300121,100200053,100300004,100200193,100400038,100200192,100300212,100400079,100200170,100200176,100200067,100200055,100300146], if (attribute(catid) in [100300011,100300143,100300121,100300212,100200067,100200055,100300146], -0.0232281903, -0.0044814242), if (attribute(catid) in [100300166,100200052,100300126,100400142,100400037,100300122,100400080,100200028], 0.0018401795, 0.0038857825)), if (attribute(catid) in [100200171,100200186,100300165,100200172,100200068,100300027,100300116,100200234,100300019,100300073,100300169,100200087,100300074,100300200,100300076,100200232], if (attribute(catid) in [100200186,100300165,100200172,100200068,100300027,100200234,100300073,100200087,100300200], 0.0097089357, 0.0155015911), 0.0416491361)) +
+if (attribute(catid) in [100300011,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100300032,100300027,100300121,100200053,100300019,100300004,100200054,100200193,100400038,100300127,100300169,100300074,100300007,100200176,100200028,100200232,100300214,100300146], if (attribute(catid) in [100300011,100200186,100300093,100300005,100300027,100300019,100200054,100400038,100300007,100200028,100200232,100300214,100300146], -0.0086481469, if (attribute(catid) in [100300165,100200172,100300121,100200193,100300074], -0.0026659712, 1.319612E-4)), if (attribute(catid) in [0,100200171,100200130,100300058,100300077,100300166,100300143,100200034,100200068,100300116,100200234,100300126,100400142,100300073,100400037,100200192,100300065,100300212,100300122,100400079,100200170,100400080,100300066,100300200,100300006], if (attribute(catid) in [0,100300058,100300077,100200034,100200234,100300073,100400037,100200192,100300065,100300122,100400079,100400080,100300066], 0.0058612042, 0.0117793447), 0.0334049996)) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300166,100300143,100200034,100200186,100400141,100300165,100200052,100300093,100300102,100200172,100200068,100300032,100300027,100300121,100200234,100200053,100300019,100300004,100400142,100200054,100300073,100400037,100200193,100400038,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100300007,100200067,100200055,100200185,100300006,100200232,100300214], if (attribute(catid) in [100300014,100300166,100300143,100400141,100200052,100300102,100200068,100300019,100300004,100200054,100400037,100400038,100300212,100300122,100200170,100200087,100200067,100200055,100200185,100300006,100200232,100300214], if (attribute(catid) in [100300143,100300102,100300019,100200054,100400038,100300212,100200067,100200055,100200185,100300214], -0.025649727, -0.0043110057), if (attribute(catid) in [0,100300011,100200130,100200034,100300165,100200172,100300032,100300121,100200053,100300073,100200193,100200192,100300209,100400079,100300169,100300074,100300066,100300007], if (attribute(catid) in [100300011,100200130,100200034,100200053,100300073,100300169,100300066,100300007], 0.0027424259, 0.0069640136), 0.0123173309)), if (attribute(catid) in [100300058,100300077,100300005,100300116,100300126,100300200,100200028,100300076], 0.0267885216, 0.0575708735)) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300077,100300166,100300143,100200186,100400141,100200052,100300102,100300005,100200172,100200068,100300032,100300027,100300116,100300121,100200234,100200053,100400142,100300073,100400038,100300065,100300209,100300122,100300127,100400079,100200170,100200087,100300074,100300066,100300007,100200176,100300200,100300045,100200028,100200055,100200185,100300006,100200232], if (attribute(catid) in [100300011,100300143,100200186,100200068,100200234,100200053,100300209,100200087,100300066,100300045,100200055,100200185,100300006], if (attribute(catid) in [100300011,100200234,100300209,100300066,100200055,100200185], -0.0209712583, -0.0093038797), if (attribute(catid) in [100300077,100200052,100300102,100200172,100300121,100400142,100300122,100200170,100300074,100300007,100200176,100200232], 1.606661E-4, if (attribute(catid) in [0,100200130,100300166,100300005,100300073,100300065,100400079,100300200], 0.0052908982, 0.0074959812))), if (attribute(catid) in [100300165,100300093,100400037,100200192,100300169,100400080,100300076], 0.0146939559, 0.0295044733)) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300013,100300077,100200034,100200186,100400141,100300093,100300005,100300008,100300027,100300116,100300121,100200234,100300004,100300126,100400142,100200054,100300073,100400037,100200193,100400038,100300065,100300212,100300122,100300127,100400079,100300169,100400080,100200087,100300074,100300066,100200176,100300200,100300045,100300076,100200055,100300214], if (attribute(catid) in [100300013,100300093,100300005,100200234,100300004,100200193,100400038,100300212,100300122,100300169,100200087,100300074,100300200,100300045,100300076,100200055,100300214], if (attribute(catid) in [100200234,100300004,100300212,100300200,100200055,100300214], -0.0225234294, -0.0073428723), if (attribute(catid) in [100200130,100300077,100400141,100300027,100300126,100200054,100300073,100400037,100300065,100400080,100200176], -7.043255E-4, 0.0046211972)), if (attribute(catid) in [100300058,100300166,100300143,100200052,100200172,100300032,100200192,100300209,100200170,100200232,100300146], 0.0127794955, if (attribute(catid) in [100300165,100200053,100300019], 0.0212698998, 0.0283454496))) +
+if (attribute(catid) in [0,100300014,100300013,100300077,100200186,100400141,100200172,100300116,100300004,100300126,100400142,100200054,100200192,100300212,100300209,100400079,100200170,100300169,100400080,100300045,100200028,100300076,100200067,100300006,100300146], if (attribute(catid) in [100300126,100400142,100200054,100300212,100300045,100200067,100300146], if (attribute(catid) in [100300126,100300212,100200067,100300146], -0.0423375801, -0.0094881961), if (attribute(catid) in [0,100300014,100300013,100200186,100400141,100300116,100300004,100300209,100400079,100200028,100300076,100300006], -0.0010068479, 0.0043349463)), if (attribute(catid) in [100200171,100200130,100300058,100300166,100200034,100300165,100300093,100300005,100200068,100300032,100300027,100300121,100200234,100200053,100300073,100300065,100300122,100300127,100200176], if (attribute(catid) in [100200130,100300166,100300165,100300093,100300121,100200053,100300073,100300065,100300122,100300127], if (attribute(catid) in [100300166,100300121,100300073,100300065,100300122,100300127], 0.0078139636, 0.0103953208), 0.0132055533), if (attribute(catid) in [100300011,100200052,100300102,100300008,100400037,100200193,100400038,100200087,100300074], 0.0206601511, 0.0355590743))) +
+if (attribute(catid) in [100200171,100200130,100300166,100300143,100200034,100400141,100300093,100300102,100300005,100300008,100200068,100300116,100300073,100200193,100200192,100300209,100300122,100300169,100400080,100200087,100300074,100300066,100300045,100200028,100200067], if (attribute(catid) in [100400141,100300102,100300008,100200193,100300209,100400080,100300066,100300045,100200028,100200067], -0.0141770941, if (attribute(catid) in [100200171,100200130,100300166,100200034,100300005,100200068,100200192,100200087,100300074], -0.0034615278, 0.0015215338)), if (attribute(catid) in [0,100300014,100300058,100300013,100300077,100300165,100200052,100200172,100300032,100300121,100200053,100300004,100300126,100400142,100200054,100400038,100300065,100400079,100200176,100300076,100200185,100300006], if (attribute(catid) in [0,100300014,100300058,100300013,100300165,100300126,100200054,100400079,100300076,100200185,100300006], if (attribute(catid) in [100300014,100300013,100300165,100300126,100200054,100400079,100300076,100200185,100300006], 0.0055004027, 0.0079799037), 0.0113299928), 0.0216285145)) +
+if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300013,100300077,100300143,100200034,100400141,100200052,100300093,100300102,100200172,100200068,100300027,100300121,100200234,100200053,100300019,100300126,100400142,100200054,100300073,100400037,100200193,100200192,100300065,100300212,100300122,100300127,100400079,100300169,100400080,100300007,100200176,100300200,100300045,100200055,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300011,100300014,100300058,100300013,100300143,100200034,100200052,100300093,100300102,100200053,100300019,100200054,100200193,100400080,100300007,100300200,100300045,100200055,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300011,100300143,100300093,100300102,100300019,100200193,100300007,100200055,100300006,100200232,100300214,100300146], -0.021450023, -0.0079398906), if (attribute(catid) in [0,100300077,100400141,100200068,100300027,100300121,100300126,100400142,100300073,100400037,100300065,100300212,100300127,100400079,100300169], if (attribute(catid) in [100300077,100200068,100300027,100300121,100300073,100300127,100400079], 0.0017634542, 0.0044214643), 0.0078134161)), if (attribute(catid) in [100200130,100300166,100200186,100300032,100400038,100300209,100200170,100200087], 0.013392468, 0.0239045512)) +
+if (attribute(catid) in [100300013,100300165,100300102,100300008,100200068,100300116,100300121,100200053,100200054,100200192,100300209,100300122,100200170,100400080,100200087,100300066,100200176,100300200,100300045,100300076,100200067,100300214], if (attribute(catid) in [100300013,100300102,100300008,100300116,100300121,100200176,100300200,100300076,100200067,100300214], -0.0229996772, -0.0059777362), if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300077,100300166,100200052,100300027,100300004,100300126,100300073,100200193,100300065,100300212,100300127,100400079,100300169,100300074,100200028], if (attribute(catid) in [100200130,100300014,100300166,100300004,100300065,100300212,100400079,100300169,100300074], 0.003077496, if (attribute(catid) in [0,100300027,100300126,100300127], 0.0063783302, 0.0094067973)), if (attribute(catid) in [100200186,100400141,100200172,100300032,100200234,100300019,100400142,100400037,100300006,100300146], 0.0137033492, 0.0253895093))) +
+if (attribute(catid) in [0,100300014,100300058,100300013,100300166,100300143,100300165,100200172,100200068,100300116,100300121,100200053,100300004,100400038,100300065,100300212,100200170,100300169,100400080,100200087,100300074,100300066,100300007,100200176,100300045,100300214], if (attribute(catid) in [100300013,100300121,100400038,100300212,100200087,100300045,100300214], if (attribute(catid) in [100300013,100300212,100300214], -0.0545034219, -0.0112857727), if (attribute(catid) in [100300014,100300143,100300165,100200068,100200053,100300065,100200170,100300074,100300007], -0.0011777575, if (attribute(catid) in [0,100200172,100300116,100200176], 0.0034598533, 0.0071870285))), if (attribute(catid) in [100200171,100300011,100200130,100300077,100200034,100200186,100400141,100200052,100300093,100300102,100300027,100200234,100300126,100400142,100200054,100300073,100400037,100200192,100300122,100300127,100400079,100300200,100200028], if (attribute(catid) in [100200130,100300077,100200186,100400141,100300102,100200234,100300126,100400142,100200054,100300073,100200192,100300122,100400079,100300200,100200028], 0.0113718649, if (attribute(catid) in [100200171,100300011,100300027,100400037], 0.0157280324, 0.0222069557)), 0.0489348021)) +
+if (attribute(catid) in [100300011,100300014,100300058,100300013,100300166,100200034,100400141,100300165,100200052,100300093,100300102,100300008,100300027,100200053,100300019,100300126,100400142,100300073,100400038,100300209,100200170,100400080,100200087,100300007,100300200,100300076,100200067,100200055,100300146], if (attribute(catid) in [100300008,100300019,100300126,100300007,100300200,100200055,100300146], -0.0393976544, if (attribute(catid) in [100300011,100300014,100300058,100300013,100300166,100200034,100300093,100300102,100400038,100300209,100200170,100400080,100300076,100200067], -0.0070993241, -7.351884E-4)), if (attribute(catid) in [0,100200171,100200130,100300077,100200186,100200172,100200234,100200054,100400037,100300122,100300127,100400079,100300074,100300066,100200028,100200232], if (attribute(catid) in [100200171,100300077,100200234,100400079,100300066,100200028], 0.0040677589, 0.0073363935), if (attribute(catid) in [100300143,100300005,100200068,100300116,100200192,100300065,100300169,100200176], 0.0152680762, 0.0386360428))) +
+if (attribute(catid) in [100200171,100300143,100400141,100300093,100300005,100300008,100200068,100300027,100300121,100300126,100200054,100400038,100200192,100300212,100300122,100300007,100200176,100300045,100200028,100300076,100200067,100300006,100200232,100300214], if (attribute(catid) in [100300093,100300027,100300212,100200176,100200067,100200232,100300214], -0.0439825609, if (attribute(catid) in [100300143,100400141,100300005,100300008,100300121,100300126,100200192,100300007,100300045,100300076,100300006], -0.0078490055, -0.0029548934)), if (attribute(catid) in [0,100300011,100300014,100300013,100300077,100300166,100200034,100200186,100200052,100200172,100300032,100300116,100200234,100200053,100300004,100400142,100300073,100200193,100300065,100300127,100400079,100300169,100400080,100300066,100300200], if (attribute(catid) in [0,100300011,100300014,100300013,100300077,100200034,100200172,100300116,100200053,100400142,100200193,100300065,100300127,100400079,100300169,100400080,100300066], if (attribute(catid) in [100300014,100200172,100300116,100400142,100200193,100300065,100400079], 0.0037085174, 0.0067213111), 0.0103041294), if (attribute(catid) in [100200130,100300165,100300209,100200087], 0.0193058409, 0.0282268747))) +
+if (attribute(catid) in [100300011,100200130,100300058,100300143,100300165,100200052,100300093,100300102,100300005,100200068,100300027,100300116,100200053,100300019,100300004,100400037,100200193,100300209,100200170,100300074,100300007,100200176,100300045,100200055,100200185,100200232,100300214], if (attribute(catid) in [100300102,100300005,100300116,100300019,100300209,100200176,100200055,100200185,100200232,100300214], -0.0293243609, if (attribute(catid) in [100200052,100300093,100200068,100300004,100200193,100300074,100300045], -0.0070078206, -9.152377E-4)), if (attribute(catid) in [0,100200171,100300013,100300077,100200186,100400141,100300126,100200054,100300073,100300065,100300122,100400079,100300169,100400080,100200087,100300066], if (attribute(catid) in [100200171,100300013,100200186,100300073,100300065,100300122,100300169], 0.0041690469, 0.0070675916), if (attribute(catid) in [100300166,100200034,100300032,100400142,100200192,100300076,100200067], 0.0123326309, 0.0192701631))) +
+if (attribute(catid) in [100400141,100300093,100300102,100200068,100300032,100300116,100200234,100300004,100200193,100400038,100300212,100300209,100400079,100300066,100200176,100300200,100300045,100300076,100300214,100300146], if (attribute(catid) in [100300093,100300102,100200068,100300032,100200234,100400038,100300212,100300214,100300146], -0.0199825802, -0.0078667369), if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300077,100300166,100200186,100300165,100200052,100300005,100200172,100300027,100200053,100400142,100300073,100200192,100300122,100200170,100300169,100400080,100200087,100300074,100300007,100300006], if (attribute(catid) in [0,100200171,100300014,100300058,100300077,100300166,100300005,100200172,100300027,100200053,100300073,100200170,100400080,100200087,100300007], if (attribute(catid) in [100200171,100300014,100300077,100300166,100300005,100200172,100300027,100200170,100400080,100300007], 0.0025693654, 0.0050450725), 0.0104631035), if (attribute(catid) in [100300011,100300143,100300121,100300126,100400037,100300065,100300127,100200028], 0.0209253237, 0.0358125311))) +
+if (attribute(catid) in [100200171,100300058,100300077,100200034,100200186,100300005,100300008,100200068,100300121,100200053,100300019,100300126,100200193,100200192,100300065,100300212,100300209,100400079,100200087,100300045,100200028,100300076,100200067,100200055,100300006,100300146], if (attribute(catid) in [100300008,100300121,100300019,100300126,100300212,100300209,100200028,100200055,100300146], -0.0326978382, if (attribute(catid) in [100300077,100200034,100200186,100300005,100200068,100200053,100300045,100300076,100200067], -0.0102205963, -0.0022839975)), if (attribute(catid) in [0,100300011,100200130,100300014,100300013,100400141,100300165,100200052,100300027,100300116,100300004,100400142,100300073,100400037,100400038,100300122,100200170,100300169,100400080,100300066,100200176,100300200,100200185], if (attribute(catid) in [100200130,100300014,100300165,100200052,100300116,100300004,100400142,100300073,100400038,100300122,100200170,100300169,100300200], 0.0024615075, if (attribute(catid) in [0,100300013,100300027], 0.0070973911, 0.0098426968)), if (attribute(catid) in [100300166,100300093,100200172,100200234,100200054,100300127,100300074,100300007,100200232], 0.0164836278, 0.0507175593))) +
+if (attribute(catid) in [0,100300011,100200130,100300014,100300077,100300166,100200034,100200186,100400141,100200052,100300093,100300005,100300008,100200068,100300032,100300121,100200234,100200053,100300004,100300126,100400037,100300065,100300122,100300127,100400080,100200087,100300066,100300007,100200176,100200067,100300006,100300214], if (attribute(catid) in [100300008,100300032,100300004,100300126,100400037,100200176,100300214], -0.0321717738, if (attribute(catid) in [100300011,100200130,100300014,100300077,100200052,100200234,100200053,100300122,100300127,100400080,100200067,100300006], -0.0016525958, 0.0046522992)), if (attribute(catid) in [100200171,100300143,100300165,100200172,100300027,100300116,100300019,100400142,100200054,100300073,100200193,100400038,100200192,100400079,100200170,100300169,100300200,100300076], if (attribute(catid) in [100200171,100200172,100200054,100300073,100200192,100400079,100200170,100300169], 0.011142025, 0.0154339977), 0.0284170367)) +
+if (attribute(catid) in [100200130,100300058,100300077,100200034,100400141,100300102,100300005,100300008,100200053,100300004,100400037,100300212,100300122,100400079,100400080,100300074,100300007,100200176,100200185,100300006,100200232], if (attribute(catid) in [100200034,100300102,100300005,100300008,100400037,100300074,100300007,100200185,100300006], -0.0179586062, -0.0035242201), if (attribute(catid) in [0,100300014,100300013,100300166,100200186,100300165,100200052,100300093,100200172,100200068,100300027,100200234,100400142,100200054,100300073,100400038,100200192,100300065,100300127,100300169,100300200,100300076], if (attribute(catid) in [100300013,100200052,100400142,100300073,100400038,100300065,100300076], 0.002685538, if (attribute(catid) in [0,100300165,100300093,100200172,100200234,100200054,100200192], 0.00646939, 0.0086837084)), if (attribute(catid) in [100200171,100300143,100300032,100300121,100300126,100200170,100200087,100300066,100200028], 0.014146174, 0.0293888308))) +
+if (attribute(catid) in [100200171,100300011,100200130,100300014,100300058,100300166,100300143,100200186,100300165,100300008,100200068,100300032,100300027,100300116,100200234,100300126,100200054,100400037,100200193,100400038,100300209,100300127,100200170,100400080,100300074,100300007,100300045,100200028,100300076,100200067,100200185,100300006,100200232,100300146], if (attribute(catid) in [100300011,100300014,100300058,100300143,100200186,100300165,100300032,100300116,100200234,100300126,100200193,100400038,100300209,100200170,100300007,100300045,100200028,100200067,100200232,100300146], if (attribute(catid) in [100300014,100300032,100300116,100200234,100300209,100300007,100300045,100200028,100200067,100200232,100300146], -0.0140634241, -0.0065657437), if (attribute(catid) in [100200171,100200068,100200054,100400080,100300074,100300076,100300006], -0.0013766512, 0.0012665197)), if (attribute(catid) in [0,100300077,100200052,100300005,100300121,100200053,100300004,100300073,100200192,100300122,100300169,100200087,100200176,100300200], if (attribute(catid) in [100200052,100300005,100300073,100200192], 0.0044075628, 0.0068551736), if (attribute(catid) in [100200172,100300065,100400079,100300066], 0.014273406, 0.0204258682))) +
+if (attribute(catid) in [100300014,100300166,100300143,100200068,100300032,100300116,100200234,100300126,100400037,100200193,100400038,100300212,100300127,100200087,100300200,100200055,100300214,100300146], if (attribute(catid) in [100300143,100200234,100400037,100200193,100300212,100300200,100200055,100300214,100300146], -0.0253090838, -0.0066897329), if (attribute(catid) in [0,100200171,100200130,100300058,100300077,100200034,100200186,100400141,100200052,100300093,100300005,100200172,100300121,100200053,100300004,100400142,100300073,100300065,100300122,100200170,100300169,100400080,100300074,100300066,100300007,100200176,100300076,100200067,100200185], if (attribute(catid) in [100200171,100200130,100300077,100200186,100200053,100300004,100300073,100300169,100300074,100300066,100200176,100300076], if (attribute(catid) in [100200171,100300004,100300073,100300169,100300066,100300076], 1.455877E-4, 0.0029189153), if (attribute(catid) in [0,100200034,100400141,100300005,100400142,100300065,100300007,100200185], 0.0054511445, 0.0085988325)), if (attribute(catid) in [100300013,100300165,100300102,100300027,100200192,100400079,100200028,100300006,100200232], 0.018000307, 0.0331227663))) +
+if (attribute(catid) in [0,100200130,100300014,100300058,100300077,100200034,100400141,100300165,100300093,100200172,100300008,100300027,100300116,100300121,100200234,100300004,100400142,100300073,100400037,100200193,100300212,100300127,100400080,100300074,100300066,100300007,100300200,100300045,100200028,100300076,100300006,100300146], if (attribute(catid) in [100300014,100300058,100300077,100300093,100300008,100200234,100400142,100400037,100300074,100300200,100300045,100200028,100300076,100300006,100300146], if (attribute(catid) in [100300014,100300058,100300093,100400142,100300200,100300045,100300076,100300006,100300146], -0.012997967, -0.0044598258), if (attribute(catid) in [0,100400141,100200172,100300116,100300004,100200193,100300212,100300127,100400080,100300066], if (attribute(catid) in [100400141,100200172,100300116,100300212,100300127,100400080], -3.302269E-4, 0.0020272308), 0.0056490673)), if (attribute(catid) in [100200171,100300011,100300166,100200186,100200052,100300102,100300005,100200068,100300032,100200053,100400038,100200192,100300065,100300122,100400079,100200170,100300169,100200176,100200232], if (attribute(catid) in [100200171,100300011,100200052,100300005,100200068,100200053,100400038,100300065,100300122,100400079,100200170,100300169,100200232], if (attribute(catid) in [100200052,100300005,100200053,100400038,100400079,100200170,100300169], 0.0089176032, 0.0121994068), 0.0173516652), 0.0325017104)) +
+if (attribute(catid) in [0,100200171,100300014,100300013,100300077,100300166,100200186,100400141,100300102,100200172,100300008,100300116,100200234,100200053,100300019,100300004,100400142,100400037,100200193,100300212,100300209,100300122,100300127,100200170,100200087,100200176,100300200,100300076,100200055,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300013,100300166,100300102,100300008,100300116,100200234,100300019,100300004,100400142,100300212,100300209,100200176,100300200,100200055,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300013,100300166,100300019,100300212,100300200,100200055,100300214,100300146], -0.0132054608, -0.0064396815), if (attribute(catid) in [0,100300014,100300077,100200172,100400037,100200193], 0.0018800662, 0.0043219093)), if (attribute(catid) in [100200130,100300058,100200034,100300165,100200052,100200068,100300032,100300121,100300126,100300073,100200192,100300065,100400079,100300169,100400080,100300074,100300066,100200028], if (attribute(catid) in [100300058,100300165,100300121,100300126,100300073,100400079,100300066,100200028], 0.0086112093, 0.0133895272), 0.0254777637)) +
+if (attribute(catid) in [0,100200171,100300013,100300077,100300143,100200186,100400141,100200052,100300102,100200172,100300027,100300116,100200053,100300004,100400142,100200054,100200192,100300065,100300212,100300209,100300169,100400080,100200087,100300066,100200176,100300200,100300045,100300076,100200067,100200055,100200185,100300006,100300214,100300146], if (attribute(catid) in [100300013,100300077,100300102,100200053,100300004,100400142,100200054,100300065,100300209,100300066,100300200,100300045,100200067,100200055,100200185,100300006,100300214,100300146], if (attribute(catid) in [100300077,100300102,100300066,100300200,100300045,100200067,100200055,100200185,100300214,100300146], -0.01999683, -0.0066258033), if (attribute(catid) in [100200171,100400141,100200052,100200172,100300027,100200192,100400080,100200087,100200176,100300076], 2.616813E-4, 0.0029401657)), if (attribute(catid) in [100200130,100300014,100300166,100200034,100300165,100200068,100200234,100300073,100400037,100300122,100400079,100200170,100300074,100200028], if (attribute(catid) in [100300166,100200034,100300165,100200068,100300122,100400079,100200170], 0.010002913, 0.0150210552), 0.0282741486)) +
+if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300013,100300166,100200034,100400141,100300165,100200052,100300102,100200172,100300032,100300027,100200234,100200053,100300019,100300004,100400142,100300073,100400037,100200193,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100200176,100300045,100200028,100200067,100200055,100200185,100300006,100200232], if (attribute(catid) in [100300014,100300058,100300166,100200034,100300165,100300102,100300032,100200053,100300019,100300004,100300212,100300209,100300127,100200170,100300169,100400080,100200176,100300045,100200067,100200055,100200185,100300006,100200232], if (attribute(catid) in [100300032,100300019,100300212,100300209,100200176,100200067,100200055,100200185,100200232], -0.036557541, if (attribute(catid) in [100300166,100300102,100200053,100300004,100300169,100400080,100300006], -0.0063417149, -0.0028552545)), if (attribute(catid) in [0,100200130,100400141,100200234,100400142,100300073,100200193,100300065,100300122,100400079,100200028], if (attribute(catid) in [100200234,100400142,100300065,100300122,100400079,100200028], 0.0013434898, 0.0041326482), 0.0076971051)), if (attribute(catid) in [100300011,100300143,100200186,100300005,100200068,100400038,100200087,100300066,100300007], 0.0168824118, if (attribute(catid) in [100300077,100300116,100300121,100200054,100300074], 0.0265247041, 0.0418405954))) +
+if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300013,100300077,100300166,100300143,100200034,100200186,100400141,100300165,100300093,100300005,100200068,100300032,100300027,100300116,100300004,100300126,100400142,100200054,100300073,100400037,100400038,100200192,100300212,100300209,100300122,100300127,100400079,100200170,100400080,100300074,100300066,100300007,100300045,100300076,100300006,100200232,100300214], if (attribute(catid) in [100200171,100200034,100300093,100300005,100200068,100300027,100300116,100300004,100300126,100200054,100400037,100400038,100300212,100300209,100400079,100300074,100300214], if (attribute(catid) in [100300005,100300027,100300116,100200054,100400037,100300209,100300074,100300214], -0.0185907409, -0.0037649772), if (attribute(catid) in [100300014,100300058,100300013,100300077,100300166,100300066,100300007,100300076,100300006], 7.932193E-4, 0.0048541117)), if (attribute(catid) in [100200130,100200052,100300102,100200172,100200193,100300169,100200176,100200028,100300146], 0.0114452275, if (attribute(catid) in [100300008,100200234,100300065,100200087], 0.0212529341, 0.0435817724))) +
+if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300077,100300166,100300143,100200034,100200186,100400141,100200052,100300093,100300102,100300005,100200172,100300027,100300116,100300121,100200053,100300004,100400142,100300073,100400037,100400038,100200192,100300212,100300122,100300127,100300169,100400080,100300007,100200028,100300076,100200055,100200185,100300006,100200232,100300146], if (attribute(catid) in [100300014,100300058,100300143,100400141,100300093,100300102,100300116,100400142,100400037,100400038,100300122,100300007,100200028,100300076,100200055,100200185,100300006,100200232,100300146], if (attribute(catid) in [100300143,100300093,100300102,100400037,100300007,100200028,100200055,100200185,100300006,100200232,100300146], -0.024040692, -0.0039451648), if (attribute(catid) in [0,100300077,100300166,100200172,100300027,100300004,100200192,100300212,100300127], if (attribute(catid) in [100300077,100300166,100200172,100300027,100200192,100300212], 7.340608E-4, 0.003096223), 0.0055831787)), if (attribute(catid) in [100300011,100300165,100300032,100200234,100300126,100200193,100300065,100400079,100200170,100200087,100300074,100300066,100300200], if (attribute(catid) in [100300165,100300032,100200234,100300126,100200193,100400079,100200170,100300074,100300066,100300200], 0.0106987137, 0.0159048063), 0.0286231943)) +
+if (attribute(catid) in [0,100300011,100200130,100300014,100300143,100200034,100200186,100400141,100200052,100300093,100300102,100300005,100200172,100200068,100300027,100300116,100300121,100200234,100200053,100300004,100400142,100200054,100400037,100200193,100400038,100200192,100300122,100300169,100400080,100200087,100300074,100300066,100300200,100300045,100300076,100200055,100200232], if (attribute(catid) in [100300011,100300014,100200034,100400141,100300102,100300005,100200172,100200234,100200193,100400080,100300066,100300200,100300045,100300076,100200055,100200232], if (attribute(catid) in [100300011,100200034,100300102,100200193,100400080,100200055,100200232], -0.0213247733, -0.00351169), if (attribute(catid) in [100200130,100200052,100200068,100300121,100200053,100300004,100200054,100300169], 6.568155E-4, 0.0043360634)), if (attribute(catid) in [100200171,100300077,100300166,100300165,100300032,100300073,100300065,100300209,100300127,100400079,100200170,100200028,100300146], if (attribute(catid) in [100200171,100300077,100300165,100300032,100300073,100300209,100300127,100200170], 0.0086409598, 0.0123374521), 0.02758533)) +
+if (attribute(catid) in [100300013,100200034,100400141,100300165,100300093,100300102,100300008,100300032,100300116,100300121,100200234,100300019,100300004,100300126,100200054,100300073,100400037,100200192,100300212,100300209,100300127,100400079,100200170,100300169,100200087,100300066,100300007,100200176,100300200,100300045,100300076,100200067,100200185,100200232], if (attribute(catid) in [100300013,100200034,100400141,100300102,100300008,100300032,100300116,100200234,100300019,100300004,100300126,100300212,100300209,100300200,100200067,100200185,100200232], if (attribute(catid) in [100300102,100300032,100300019,100300212,100300209,100200067,100200185,100200232], -0.05249066, -0.0117164184), if (attribute(catid) in [100300093,100200054,100200192,100400079,100200170,100300066,100300045,100300076], -0.0050797102, -1.8254E-5)), if (attribute(catid) in [0,100200171,100200130,100300058,100300166,100200186,100300005,100200172,100200068,100300027,100200053,100400142,100300065,100300122,100400080,100200028,100300006,100300146], if (attribute(catid) in [0,100200171,100300166,100200186,100300005,100400142,100300065,100300122,100200028], if (attribute(catid) in [100200171,100300166,100300005,100300065,100300122,100200028], 0.0045192638, 0.0060994023), 0.0117841342), if (attribute(catid) in [100300077,100300143,100200052,100300074], 0.0191236363, 0.037609952))) +
+if (attribute(catid) in [0,100200171,100200130,100300058,100300013,100300166,100200034,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100300027,100300121,100300019,100300004,100400142,100300073,100400037,100300065,100300212,100300122,100400079,100300169,100400080,100200087,100300074,100200176,100300045,100200067,100200185], if (attribute(catid) in [100200171,100300013,100300019,100400037,100300212,100200087,100200067,100200185], if (attribute(catid) in [100300013,100300019,100300212,100200185], -0.0408187739, -0.0118328301), if (attribute(catid) in [0,100200130,100300166,100200034,100200186,100400141,100200052,100300093,100300005,100200172,100300027,100300121,100300004,100400142,100300073,100300065,100400079,100300169,100300074,100200176,100300045], if (attribute(catid) in [100200034,100200186,100300093,100300005,100200172,100300027,100300121,100300004,100400142,100300073,100400079,100300169,100300074,100200176], if (attribute(catid) in [100200034,100300093,100300005,100300027,100400079,100300169,100300074,100200176], -0.0023114463, 6.956308E-4), 0.0033093887), 0.0090068346)), if (attribute(catid) in [100300011,100300014,100300077,100300116,100200053,100300126,100200193,100400038,100200192,100300127,100200170,100300066,100200028,100300076,100200232,100300146], 0.015754669, 0.0276821855)) +
+if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300166,100300143,100200186,100300165,100200052,100300093,100300005,100200068,100300032,100300116,100300121,100200234,100200053,100300019,100300004,100300126,100400142,100200054,100300073,100200193,100400038,100200192,100300065,100300212,100300209,100300127,100400079,100200170,100300169,100400080,100300074,100300007,100200176,100300200,100300045,100200028,100200067,100200055,100300006,100200232,100300146], if (attribute(catid) in [100200052,100200068,100300121,100200053,100400142,100400038,100200192,100300212,100300209,100200170,100300200,100300045,100200028,100200067,100200055,100300006,100200232,100300146], if (attribute(catid) in [100300121,100300212,100300209,100200028,100200067,100200055,100200232,100300146], -0.0318529122, -0.0076204316), if (attribute(catid) in [0,100200171,100300014,100300058,100200186,100300165,100300004,100300073,100200193,100300065,100300127,100400079,100300074,100300007,100200176], if (attribute(catid) in [100300014,100300058,100200186,100300165,100300004,100200193,100300065,100300127,100400079,100300074,100300007], 0.0024278683, 0.0048565045), 0.0078949518)), if (attribute(catid) in [100300077,100200034,100400141,100200172,100300027,100300122,100200087], 0.014410917, 0.0330128168)) +
+if (attribute(catid) in [100300077,100300166,100300143,100200034,100200186,100300032,100300121,100300019,100300004,100300126,100400142,100300073,100400037,100400038,100300065,100300212,100300209,100300127,100400079,100200087,100200176,100300200,100300076,100200067,100200055,100300214], if (attribute(catid) in [100300143,100300032,100300126,100300212,100300209,100200176,100200055,100300214], -0.0309719186, if (attribute(catid) in [100300019,100300004,100400142,100400037,100400038,100300065,100200087,100300200,100200067], -0.0082013563, -0.0020664794)), if (attribute(catid) in [0,100200171,100300014,100300058,100300013,100400141,100300165,100200172,100300008,100300027,100300116,100200053,100200192,100300122,100200170,100300169,100400080,100300074,100300045,100200028], if (attribute(catid) in [0,100200171,100300014,100300058,100300008,100300116,100200053,100200192,100300169,100300074,100300045,100200028], if (attribute(catid) in [100200171,100300014,100300058,100300008,100300116,100200053,100200192,100300074,100200028], 0.0024438226, 0.0050777724), 0.0078352283), if (attribute(catid) in [100300011,100200130,100200052,100200054,100200193,100300066,100200232], 0.0123902667, 0.0178145861))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300077,100300166,100300143,100200052,100300093,100200172,100300008,100300032,100300027,100300116,100200234,100400142,100200054,100200193,100400038,100200192,100300065,100300209,100300122,100300127,100400079,100200170,100200087,100300074,100300066,100300007,100300200,100300076,100200055,100300006], if (attribute(catid) in [100300058,100300143,100300032,100200054,100300074,100300007,100300076,100200055,100300006], if (attribute(catid) in [100300058,100300143,100300032,100300076,100200055,100300006], -0.0392140951, -0.0178716478), if (attribute(catid) in [100300011,100200130,100200052,100300093,100300008,100300027,100300116,100200234,100400142,100200193,100400038,100300209,100300127,100400079,100200170,100200087,100300066,100300200], if (attribute(catid) in [100300011,100200052,100300008,100300027,100200234,100400038,100300209,100200170], -0.0066755608, -0.0018428992), if (attribute(catid) in [0,100200171,100300077,100300166], 0.0036371154, 0.0064922752))), if (attribute(catid) in [100300013,100200034,100200186,100400141,100300165,100300005,100200068,100200053,100300073,100300169,100400080,100200067,100200185,100300146], 0.0121567368, if (attribute(catid) in [100300014,100300121,100300004,100400037,100200176,100200028], 0.0238269627, 0.0454004741))) +
+if (attribute(catid) in [0,100300011,100300013,100200034,100400141,100200052,100300093,100300102,100200172,100300008,100200068,100300116,100300004,100300126,100400142,100400038,100300209,100300127,100400079,100300169,100300200,100200055,100200185,100300006,100200232,100300146], if (attribute(catid) in [100300011,100300013,100300008,100300126,100300200,100200055,100200185,100300006,100200232,100300146], -0.0236401207, if (attribute(catid) in [100200034,100200052,100300093,100300102,100200068,100300127], -0.0040925434, if (attribute(catid) in [0,100400142,100400079], 0.0014214596, 0.0034589578))), if (attribute(catid) in [100200171,100200130,100300058,100300077,100300166,100200186,100300165,100300027,100300121,100200234,100200053,100300073,100200192,100300065,100300122,100400080,100300074], if (attribute(catid) in [100300077,100300166,100300165,100300027,100200192,100300065,100300122,100400080], 0.006993185, 0.0098596774), if (attribute(catid) in [100300005,100300032,100400037,100200193,100200170,100300066,100200176,100200028], 0.0175470785, 0.0289653859))) +
+if (attribute(catid) in [0,100200171,100200130,100300014,100300077,100300166,100200034,100200186,100400141,100200052,100300093,100200172,100300116,100200234,100200053,100400142,100200054,100200193,100400038,100300065,100300212,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100200176,100300200,100300045,100300076,100200055,100300006,100300214], if (attribute(catid) in [100300014,100200034,100200052,100300093,100300116,100200053,100200193,100300212,100300127,100200176,100300200,100200055,100300214], if (attribute(catid) in [100300014,100200053,100300212,100300200,100200055,100300214], -0.0378968222, -0.0078372609), if (attribute(catid) in [0,100200171,100300077,100200186,100400142,100200054,100400038,100300065,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100300045], if (attribute(catid) in [100200171,100400142,100200054,100400038,100300065,100200170,100300169,100400080,100300066], -1.641778E-4, 0.0015184867), 0.0061033364)), if (attribute(catid) in [100300058,100300143,100200068,100300032,100300121,100300004,100300126,100300073,100400037,100200192], 0.0125327069, if (attribute(catid) in [100300011,100300013,100300165,100300102,100300005,100300027,100300122], 0.0195931033, 0.0422132813))) +
+if (attribute(catid) in [0,100200171,100300014,100300077,100200186,100300165,100300102,100300005,100200172,100300008,100200068,100300116,100200234,100300004,100300126,100400142,100200054,100300073,100200193,100200192,100300065,100300212,100200170,100200087,100300074,100300066,100300200,100300045,100200028,100300076,100200055,100200185,100300214,100300146], if (attribute(catid) in [100300014,100300102,100300116,100200234,100400142,100200054,100200087,100300200,100300045,100200055,100200185,100300214,100300146], -0.0134771, if (attribute(catid) in [0,100300165,100300005,100300008,100200068,100200193,100300212,100300066,100200028,100300076], if (attribute(catid) in [100300165,100300005,100300008,100300066,100300076], -4.632359E-4, 0.0017931695), 0.0047361213)), if (attribute(catid) in [100300011,100200130,100300166,100400141,100200052,100300032,100300027,100300121,100200053,100400038,100300209,100300122,100400079,100300169,100400080,100300007,100200176], if (attribute(catid) in [100200130,100400141,100200052,100300032,100300027,100200053,100300209,100300122,100400079], 0.0106878879, 0.0162088762), 0.0270716952)) +
+if (attribute(catid) in [100300011,100200130,100300058,100300013,100400141,100300165,100300093,100300005,100300027,100300116,100200234,100300004,100300126,100300073,100200193,100300209,100300122,100300007,100200176,100200028,100200055,100200185,100300006], if (attribute(catid) in [100300011,100300058,100400141,100300005,100200234,100300004,100200193,100300209,100300122,100200028,100200055,100200185], if (attribute(catid) in [100300011,100200193,100300209,100200028,100200055], -0.033539512, -0.0118989268), if (attribute(catid) in [100300013,100300165,100300126,100200176], -0.0067126195, -0.0029610222)), if (attribute(catid) in [0,100300014,100300166,100200034,100200186,100200052,100200172,100200068,100200053,100300019,100200054,100400038,100200192,100300065,100300127,100400079,100200170,100400080,100300074,100300066,100300076,100300146], if (attribute(catid) in [0,100300166,100200186,100200172,100200053,100300019,100400038,100200192,100300065,100200170,100400080,100300074,100300076], if (attribute(catid) in [100200186,100200172,100200053,100300019,100400038,100200170,100400080], 0.0031153692, 0.00551757), 0.0106121829), if (attribute(catid) in [100200171,100300077,100300008,100400142,100300169,100200087,100300200], 0.0168125614, 0.0398137842))) +
+if (attribute(catid) in [100200130,100300013,100300077,100300166,100400141,100300165,100300102,100200068,100300032,100300121,100200234,100300019,100300004,100300126,100300073,100400037,100200193,100300212,100300127,100400079,100200170,100300066,100300045,100300076,100200067,100200185,100200232,100300214,100300146], if (attribute(catid) in [100300102,100300032,100300121,100300019,100400037,100200193,100300212,100200170,100300045,100300076,100200067,100200185,100300214,100300146], -0.015876685, if (attribute(catid) in [100300166,100300165,100200068,100300004,100300126,100300073,100300127,100300066,100200232], -0.0044887193, -3.188875E-4)), if (attribute(catid) in [0,100300014,100300143,100200186,100200053,100400142,100200054,100200192,100300065,100300122,100300169,100300074,100300200], if (attribute(catid) in [0,100300014,100300143,100200186,100200053,100200192,100300065,100300074], 0.0053430945, 0.0090007568), if (attribute(catid) in [100200171,100200052,100300093,100300005,100200172,100300027,100300116,100400080,100200087,100300007], 0.0122908466, 0.0204822127))) +
+if (attribute(catid) in [100200130,100300077,100300143,100200034,100300005,100300032,100300116,100300121,100300019,100200054,100200192,100300212,100300209,100300127,100400079,100200170,100400080,100300074,100300007,100300200,100300045,100200028,100300076,100200055,100200232], if (attribute(catid) in [100300143,100300032,100300121,100300019,100300212,100300200,100200028,100200055], -0.0260564211, if (attribute(catid) in [100200034,100300005,100200054,100300209,100400080,100300074,100300045], -0.0098627619, -0.0040846106)), if (attribute(catid) in [0,100200171,100300011,100300014,100300013,100300166,100200186,100400141,100300165,100200052,100200172,100300027,100200053,100300004,100400142,100300073,100400037,100200193,100300065,100300122,100300169,100300066], if (attribute(catid) in [100200171,100300011,100300013,100300166,100300165,100200172,100400142,100300073,100400037,100300122,100300169,100300066], if (attribute(catid) in [100300011,100300013,100300166,100300165,100300073,100400037,100300066], 0.0011147417, 0.004298363), if (attribute(catid) in [0,100300014,100200053,100300004,100300065], 0.0064436133, 0.0100503455)), if (attribute(catid) in [100300058,100300093,100300008,100200234,100200087,100200176], 0.0146658078, 0.0247793114))) +
+if (attribute(catid) in [100300013,100300165,100300102,100300005,100200068,100300027,100300121,100400037,100200193,100300212,100300169,100200087,100300074,100300066,100300007,100300200,100300045,100200028,100200067], if (attribute(catid) in [100300013,100300165,100300102,100300005,100200068,100400037,100200193,100300007,100300200,100200067], -0.01649051, -0.0065097756), if (attribute(catid) in [0,100300011,100200130,100300058,100300166,100200186,100400141,100200052,100300093,100200172,100300116,100200053,100300019,100300004,100300126,100300073,100300065,100300209,100300122,100400079,100200170,100300076], if (attribute(catid) in [0,100200130,100300058,100200186,100300093,100200172,100300004,100300073,100300209,100300122,100200170,100300076], if (attribute(catid) in [100200130,100200186,100200172,100300073,100200170,100300076], 0.002507844, 0.0047210186), 0.0076570114), if (attribute(catid) in [100200171,100300014,100300077,100200034,100300008,100300032,100200234,100200054,100400038,100200192,100300127,100400080,100300006,100300146], 0.0139365828, 0.0234469942))) +
+if (attribute(catid) in [100300013,100300077,100300165,100200052,100300102,100200068,100300032,100300116,100300121,100200234,100300004,100300126,100400142,100200054,100300073,100200192,100300065,100300212,100200170,100400080,100300074,100300007,100200176,100300045,100200067,100200055,100200185,100200232,100300214,100300146], if (attribute(catid) in [100300032,100300121,100300212,100300007,100200176,100200067,100200055,100200185,100300214,100300146], -0.0389470287, if (attribute(catid) in [100200052,100300102,100200068,100300116,100200234,100300126,100200054,100300065,100200170,100300074], -0.0087219876, -0.0029990733)), if (attribute(catid) in [0,100200130,100300014,100300143,100200034,100200186,100400141,100300005,100200172,100200053,100400038,100300122,100300127,100400079,100300169,100300066,100300076,100300006], if (attribute(catid) in [0,100200130,100300014,100300143,100400141,100400038,100300122,100300127,100400079,100300066,100300076,100300006], 0.0052739356, 0.0099085929), if (attribute(catid) in [100200171,100300011,100300166,100300093,100300027,100200193,100300209,100200087,100300200], if (attribute(catid) in [100200171,100200193,100200087], 0.0138275479, 0.0190545276), 0.0389964998))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300013,100300077,100300166,100300143,100200034,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100200172,100300008,100200068,100300027,100300116,100300121,100200234,100200053,100300019,100300004,100400142,100300073,100400037,100200193,100200192,100300065,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100200176,100300045,100200055,100200185,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300011,100300058,100300077,100300005,100300008,100200068,100400142,100300065,100300122,100400079,100400080,100200087,100300045,100200055,100200185,100200232,100300214], if (attribute(catid) in [100300058,100300005,100400142,100200087,100300045,100200055,100200185,100200232,100300214], -0.019364671, -0.0065149978), if (attribute(catid) in [0,100200171,100200130,100300014,100200034,100200186,100300093,100300102,100300116,100300121,100200234,100200053,100300019,100300073,100200193,100200192,100300127,100300169,100300074,100300006], if (attribute(catid) in [0,100200171,100300014,100200186,100300102,100200193,100300127,100300074,100300006], 0.0016997077, 0.0042301416), if (attribute(catid) in [100300166,100400141,100200172,100300027,100300004,100300209,100200170,100300066], 0.008322508, 0.0106242146))), if (attribute(catid) in [100300126,100200054,100300212,100300200,100300076], 0.0274475014, 0.0436714177)) +
+if (attribute(catid) in [0,100200130,100300014,100300058,100200034,100200186,100300165,100200052,100300093,100300102,100300027,100300121,100200234,100300019,100300004,100300126,100400142,100200054,100300073,100400037,100200193,100400038,100300065,100300212,100300209,100300122,100300127,100400079,100300169,100200087,100300074,100300066,100300007,100300200,100300045,100300076,100200067,100200185], if (attribute(catid) in [100300014,100200034,100200186,100300121,100300004,100400142,100300073,100400037,100300065,100300212,100300209,100300122,100300127,100400079,100200087,100300074,100200067,100200185], if (attribute(catid) in [100300014,100300121,100400037,100300212,100300209,100200087,100200067,100200185], -0.0261969274, -0.0052677773), if (attribute(catid) in [100200130,100300058,100300165,100300019,100300126,100200193,100300169,100300066,100300007,100300200,100300076], 6.535877E-4, 0.0044410765)), if (attribute(catid) in [100200171,100300011,100300013,100300166,100400141,100300005,100200172,100200068,100300032,100300116,100200192,100200170,100300006,100200232,100300146], if (attribute(catid) in [100200171,100300005,100200172,100300006,100200232,100300146], 0.0098796141, 0.0147635144), if (attribute(catid) in [100300077,100200053,100400080,100200028], 0.022608657, 0.0384965531))) +
+if (attribute(catid) in [100200130,100200052,100300093,100300102,100300005,100200172,100300008,100200068,100300032,100200234,100200053,100300004,100300126,100400142,100200054,100200193,100300209,100300074,100300066,100300007,100300045,100200028,100200055,100200232], if (attribute(catid) in [100300093,100300102,100300008,100400142,100200054,100300209,100300074,100200028,100200055], -0.0206111829, if (attribute(catid) in [100200052,100300005,100200172,100200234,100300004,100300126,100300066], -0.00770466, -0.0024157474)), if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300013,100300077,100300166,100200034,100400141,100300027,100300121,100300073,100400038,100200192,100300065,100300127,100400079,100200170,100300169,100400080,100200087,100200067], if (attribute(catid) in [0,100300013,100400141,100300027,100300121,100400038,100200192,100300127,100400079,100300169,100200087,100200067], if (attribute(catid) in [100300013,100400141,100200192,100300127,100400079,100300169,100200087], 0.002893737, 0.005681885), 0.0099814265), if (attribute(catid) in [100200186,100300165,100300212,100300122,100300076], 0.0162181832, 0.0314914649))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300077,100300166,100200034,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100300027,100300116,100200053,100300019,100300126,100200054,100300073,100200193,100200192,100300212,100300209,100300122,100300127,100300169,100400080,100200087,100300074,100200176,100300200,100300045,100200067,100300006,100200232,100300214,100300146], if (attribute(catid) in [100200171,100300058,100200034,100400141,100300093,100300005,100300027,100300116,100200053,100300126,100200054,100200193,100200192,100300212,100300209,100300122,100300074,100200176,100200067,100200232,100300214,100300146], if (attribute(catid) in [100200034,100300126,100200193,100200192,100300209,100300122,100200176,100200067,100200232,100300214,100300146], -0.0183821122, -0.007384387), if (attribute(catid) in [100300011,100300014,100300077,100300166,100200052,100300102,100300019,100300073,100300127,100400080,100200087,100300200,100300006], -6.604841E-4, 0.0030934288)), if (attribute(catid) in [100300013,100200172,100300008,100200068,100200234,100300004,100400142,100300065,100400079,100200170,100200185], if (attribute(catid) in [100300013,100200172,100300008,100300004,100400142,100300065,100400079,100200185], 0.0088954013, 0.0132240377), 0.0239626156)) +
+if (attribute(catid) in [100200171,100300014,100300143,100300165,100300102,100300008,100300032,100300027,100300121,100200053,100400037,100300122,100300127,100300007,100200176,100200028,100200067,100200232,100300146], if (attribute(catid) in [100300143,100300008,100400037,100300122,100300007,100200176,100200028,100200067,100200232,100300146], -0.0238688087, -0.0050071269), if (attribute(catid) in [0,100300011,100200130,100300058,100300077,100300166,100200186,100400141,100200052,100300116,100200234,100300019,100300004,100400142,100200054,100300073,100200193,100200192,100300212,100400079,100200170,100300169,100200087,100300074,100300066,100300200,100300006], if (attribute(catid) in [100300058,100300077,100300166,100200052,100300116,100200234,100400142,100200054,100300073,100200192,100300212,100400079,100300074,100300066,100300200,100300006], 0.0013911393, 0.0050101023), if (attribute(catid) in [100200034,100300005,100200172,100200068,100300126,100400038,100300065], 0.0131280626, 0.026491322))) +
+if (attribute(catid) in [100300011,100300166,100300165,100300102,100300008,100300032,100200234,100400037,100300209,100300169,100300200,100300045,100200028,100300076,100200055,100200185,100300006,100200232,100300146], if (attribute(catid) in [100300011,100300102,100300008,100200234,100400037,100300209,100300076,100200055,100300006,100200232,100300146], -0.0278018549, if (attribute(catid) in [100300165,100200185], -0.0124509833, -0.0065030338)), if (attribute(catid) in [0,100200171,100300013,100300077,100200186,100400141,100200052,100300093,100200068,100300027,100200053,100300004,100400142,100200054,100300073,100200193,100300122,100300127,100400079,100200170,100400080,100200087,100300074,100300066], if (attribute(catid) in [100300077,100400141,100300027,100300004,100400142,100200054,100200193,100300122,100300127,100200087,100300074], -6.424011E-4, if (attribute(catid) in [0,100200171,100200186,100200068,100200053,100400079,100400080], 0.0042262873, 0.0083942658)), if (attribute(catid) in [100200130,100300058,100200034,100300005,100200172,100300121,100200192,100300065,100300212,100300007], if (attribute(catid) in [100300058,100300005,100200172,100200192], 0.0108729295, 0.0157560823), 0.0252591937))) +
+if (attribute(catid) in [100200171,100200130,100300014,100300058,100300013,100300166,100300143,100300093,100300102,100300008,100300027,100300019,100300004,100200054,100300073,100400037,100200193,100300212,100300209,100300127,100200170,100400080,100300066,100200176,100300200,100200067,100300146], if (attribute(catid) in [100300143,100300093,100300102,100300008,100300019,100200054,100400037,100300212,100300209,100200176,100300146], -0.0224536708, if (attribute(catid) in [100200171,100300014,100300013,100300027,100300004,100300066,100200067], -0.0094554624, -0.0036901881)), if (attribute(catid) in [0,100300077,100200034,100200186,100400141,100200052,100300005,100200172,100300116,100300121,100200053,100400038,100200192,100300065,100300122,100400079,100300169,100200087,100200028], if (attribute(catid) in [0,100200186,100200052,100200172,100300121,100200192], 0.0020254456, 0.0072521361), if (attribute(catid) in [100300165,100300126,100400142,100300074,100300007,100300006], 0.0116654091, 0.0258802787))) +
+if (attribute(catid) in [0,100200171,100200130,100300014,100300077,100300166,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100200172,100200068,100300032,100300116,100300121,100300019,100300004,100300126,100200054,100300073,100400037,100200193,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100200087,100300066,100300007,100200176,100300200,100300045,100200028,100300076,100200067,100200232], if (attribute(catid) in [100200171,100200130,100300166,100200186,100300093,100300102,100300005,100200172,100200068,100300116,100300019,100300126,100200054,100200193,100200192,100300212,100300209,100300127,100200170,100200087,100300066,100300007,100200176,100300200,100300045,100300076,100200067,100200232], if (attribute(catid) in [100200130,100200186,100300102,100300005,100300116,100300019,100200054,100200193,100200192,100300212,100300209,100300127,100200087,100300066,100200176,100300200,100300045,100200067], -0.0063942493, -0.0019096403), if (attribute(catid) in [100300014,100300077,100300165,100300004,100300073,100400037,100300122,100400079,100200028], 0.0026034959, if (attribute(catid) in [0,100300032], 0.0043048063, 0.0068527373))), if (attribute(catid) in [100300011,100300058,100200034,100300027,100200234,100200053,100400142,100300169,100400080,100300146], 0.0134919535, 0.0274589028)) +
+if (attribute(catid) in [0,100200130,100300014,100300058,100300013,100300077,100300166,100200034,100200186,100300165,100200052,100300093,100300005,100200172,100200068,100300027,100300116,100300004,100300126,100400142,100200054,100300073,100400037,100200193,100400038,100200192,100300065,100300212,100300209,100400079,100200170,100300169,100400080,100300074,100300066,100300200,100300045,100300076,100200185,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300013,100300165,100200052,100300093,100200068,100300126,100400142,100200054,100200193,100300209,100300200,100300045,100200185,100300214,100300146], if (attribute(catid) in [100300013,100400142,100200054,100300209,100300200,100200185,100300214,100300146], -0.0220987103, -0.0066619739), if (attribute(catid) in [100200130,100300014,100300058,100300166,100200034,100200186,100300005,100200172,100300027,100400038,100200170,100300074,100300076,100200232], -9.424528E-4, if (attribute(catid) in [0,100300004,100300212,100400079], 0.0034943052, 0.0064497198))), if (attribute(catid) in [100200171,100300011,100300143,100400141,100300102,100300032,100300121,100200053,100300019,100300127,100200067], 0.0127972052, 0.0287759999)) +
+if (attribute(catid) in [100200171,100300011,100300014,100300058,100300143,100200034,100300093,100300102,100300005,100300008,100200068,100300032,100300027,100300116,100300019,100300126,100200054,100400038,100200192,100300209,100300122,100300169,100400080,100300007,100200176,100300045,100200185,100200232,100300146], if (attribute(catid) in [100300102,100300008,100300032,100300116,100300019,100300126,100300209,100400080,100200176,100200185,100200232,100300146], -0.0303496242, if (attribute(catid) in [100300011,100300143,100200034,100300093,100200068,100300027,100200054,100300045], -0.0083664582, -0.0038598802)), if (attribute(catid) in [0,100200130,100300077,100300166,100200186,100400141,100200052,100200234,100200053,100400142,100300073,100300065,100300127,100400079,100200170,100300066,100300076,100300006], if (attribute(catid) in [100200130,100300077,100300166,100200186,100200053,100300073,100300065,100300127,100400079,100200170,100300066], 0.001261608, 0.0050477397), if (attribute(catid) in [100300165,100200172,100200193,100300212], 0.0108120161, 0.023552562))) +
+if (attribute(catid) in [100300011,100300058,100300013,100300077,100200186,100300102,100300005,100200172,100300008,100300116,100300121,100300126,100200054,100400038,100200170,100300200,100200055,100200185,100200232], if (attribute(catid) in [100300011,100300008,100200054,100400038,100300200,100200055,100200185,100200232], -0.0268832718, -0.0068696426), if (attribute(catid) in [0,100200171,100200130,100300166,100300143,100200034,100400141,100300165,100200052,100200053,100300004,100400142,100400037,100200193,100200192,100300065,100300122,100300127,100300169,100400080,100300074,100300066,100200176,100300045,100200028,100300006], if (attribute(catid) in [0,100200034,100400141,100300165,100400142,100200193,100200192,100300122,100300127,100400080,100300045,100200028,100300006], if (attribute(catid) in [100200034,100400141,100300165,100400142,100200193,100300122,100200028], -6.90536E-5, 0.003180608), if (attribute(catid) in [100200171,100200130,100300143,100200053,100400037,100300074,100200176], 0.0046312438, 0.0072930454)), if (attribute(catid) in [100300014,100300093,100200068,100300032,100300027,100300019,100300073,100400079,100300007,100300146], 0.0137098872, 0.0263591456))) +
+if (attribute(catid) in [0,100300011,100200130,100300014,100300058,100300166,100200034,100400141,100300165,100200052,100300005,100200172,100300008,100300032,100300027,100200053,100300019,100400142,100300073,100400038,100200192,100300065,100300209,100300122,100400079,100300169,100400080,100200087,100300074,100300007,100300200,100300045,100200055,100200185,100300006,100200232,100300214], if (attribute(catid) in [100300011,100300014,100300058,100300165,100200052,100200172,100300008,100300032,100200053,100300019,100400142,100300073,100400038,100300209,100300122,100400080,100300074,100300007,100300200,100200055,100200185,100300006,100300214], if (attribute(catid) in [100300008,100300032,100300019,100400038,100300209,100300122,100200055,100200185,100300006,100300214], -0.0161057659, if (attribute(catid) in [100300014,100300058,100300165,100200053,100400142,100300074], -0.0064541439, -0.0026860316)), if (attribute(catid) in [0,100200034,100300005,100300065,100400079,100300169], 0.001936498, 0.0039988711)), if (attribute(catid) in [100200171,100300077,100300143,100200186,100200068,100300116,100200234,100300126,100200193,100200170,100300066,100200176,100300076], 0.0130783191, if (attribute(catid) in [100300093,100300121,100200054,100400037,100300127,100200028,100200067], 0.0218038927, 0.0414721313))) +
+if (attribute(catid) in [100200171,100300014,100300058,100300077,100200186,100300093,100300008,100200068,100300032,100300027,100300019,100300004,100300126,100200054,100400037,100400038,100300209,100400079,100200170,100300169,100200087,100300007,100300200,100200055,100200185,100300214,100300146], if (attribute(catid) in [100300032,100300027,100300019,100300004,100400038,100200055,100200185,100300214,100300146], -0.0294531155, if (attribute(catid) in [100300058,100300008,100200068,100300126,100200054,100400037,100300007,100300200], -0.0064558393, -0.0017589508)), if (attribute(catid) in [0,100200130,100300013,100300166,100200052,100300102,100300121,100200234,100400142,100300073,100300065,100300122,100300066,100200028], if (attribute(catid) in [100200130,100300166,100200052,100300102,100400142,100300065], 0.0028476082, 0.0049351263), if (attribute(catid) in [100300143,100200034,100400141,100300165,100200172,100200193,100200192,100300074,100300006], 0.0122604781, 0.0164705118))) +
+if (attribute(catid) in [0,100200171,100300011,100300077,100300166,100200034,100400141,100300165,100300102,100200172,100300032,100300027,100300116,100200053,100300019,100400038,100200192,100300212,100300122,100400079,100300169,100200087,100300200,100300045,100200028,100300076], if (attribute(catid) in [100300077,100300102,100300032,100200053,100300019,100400038,100300122,100300169,100200087,100200028,100300076], if (attribute(catid) in [100300032,100300019,100200087,100200028,100300076], -0.0267943136, -0.00735905), if (attribute(catid) in [0,100200171,100300011,100200034,100400141,100200172,100300027,100300116,100200192,100300212,100300045], if (attribute(catid) in [100200171,100200034,100400141,100200172,100300027,100300212,100300045], -0.0027994712, -9.574051E-4), 0.0035088242)), if (attribute(catid) in [100200130,100300014,100300058,100300013,100300143,100200186,100200052,100200068,100300121,100200234,100300126,100400142,100300073,100200193,100300065,100300209,100300127,100200170,100400080,100300066,100200176,100300006,100200232,100300146], if (attribute(catid) in [100200130,100300143,100200186,100200234,100300073,100300065,100300127,100400080,100300066], 0.0073203788, 0.0143066526), 0.0363911505)) +
+if (attribute(catid) in [100300014,100300058,100300166,100200034,100300093,100300102,100300005,100200172,100200068,100300032,100300027,100300019,100300004,100400038,100300212,100300209,100200170,100300169,100200087,100300074,100300066,100200028,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300102,100300005,100300032,100300019,100300004,100300209,100300074,100300066,100200232,100300214,100300146], if (attribute(catid) in [100300005,100300032,100300019,100300209,100300066,100200232,100300214], -0.0402234424, -0.0155707935), if (attribute(catid) in [100300014,100300166,100200068,100300027,100300212,100200028,100300006], -0.0067321936, -0.0015157833)), if (attribute(catid) in [0,100200171,100300013,100300077,100300143,100200186,100200052,100300121,100200054,100300073,100200192,100300065,100400079,100300045,100200185], if (attribute(catid) in [0,100200171,100300077,100200186,100300121,100200054,100400079,100300045,100200185], if (attribute(catid) in [100200171,100300077,100200186,100300121,100200054,100400079,100300045], 0.0027009397, 0.004912775), 0.0074818877), if (attribute(catid) in [100300011,100200130,100400141,100200234,100200053,100400142,100400037,100200193,100300122,100300127,100400080], 0.0114687451, 0.0184788462))) +
+if (attribute(catid) in [100300011,100300014,100300058,100300143,100300165,100300093,100300102,100300008,100300116,100200234,100300019,100400142,100200193,100400038,100300065,100300127,100300169,100400080,100300074,100300007,100200176,100300200,100300076,100200067,100200185,100200232,100300214,100300146], if (attribute(catid) in [100300011,100300014,100300165,100300008,100300116,100300019,100400038,100300007,100200176,100200067,100200185,100200232,100300214,100300146], if (attribute(catid) in [100300014,100300019,100400038,100300007,100200067,100200185,100200232,100300214], -0.0315173226, -0.0166807619), if (attribute(catid) in [100300058,100300143,100300102,100200234,100200193,100400080,100300076], -0.0097877493, -0.003192232)), if (attribute(catid) in [0,100300013,100400141,100200052,100200172,100200053,100300004,100300073,100400037,100200192,100300122,100400079,100300066,100200028,100300006], if (attribute(catid) in [100200172,100300004,100300073,100400037,100300122,100200028,100300006], 0.0016110349, 0.0058680637), if (attribute(catid) in [100200171,100200130,100300077,100300166,100200186,100300005,100200068,100300126,100200170], 0.012220867, 0.0260145679))) +
+if (attribute(catid) in [0,100300011,100300014,100300058,100300013,100300077,100300143,100200186,100300165,100300005,100300121,100200053,100300004,100300126,100400038,100200192,100300122,100300169,100200087,100300074,100300200,100300045,100200028,100300006], if (attribute(catid) in [100300011,100300013,100300143,100300005,100300126,100400038,100200087,100300200,100200028], if (attribute(catid) in [100300013,100300143,100300005,100300126], -0.0362895954, -0.0192422418), if (attribute(catid) in [100300058,100300121,100200053,100300169,100300074,100300006], -0.0047104781, 2.148509E-4)), if (attribute(catid) in [100200171,100200130,100300166,100200034,100400141,100200052,100300093,100200172,100300027,100300116,100400142,100300073,100400037,100200193,100300065,100300209,100300127,100400079,100400080,100300066,100300007], if (attribute(catid) in [100200171,100200130,100200052,100300093,100300073,100400037,100200193,100300209,100300066], 0.0059481372, if (attribute(catid) in [100300166,100200034,100300027,100300116,100300065], 0.0086482206, 0.0113173904)), if (attribute(catid) in [100300102,100200068,100300212,100200170,100300076,100200067,100200232,100300146], 0.0189016022, 0.0294237004))) +
+if (attribute(catid) in [100300011,100300014,100200034,100200186,100300165,100200172,100300008,100200068,100300121,100200234,100300126,100200054,100300073,100400037,100200193,100200192,100300065,100300209,100300122,100200170,100400080,100300007,100200176,100300045,100200028,100300076,100200185,100300146], if (attribute(catid) in [100300011,100200034,100300165,100300008,100200234,100300126,100200054,100400037,100300209,100400080,100300007,100200176,100200028,100200185,100300146], if (attribute(catid) in [100300011,100200034,100200234,100300126,100400037,100300209,100300007,100200176,100200185,100300146], -0.0183846087, -0.0112640996), if (attribute(catid) in [100200068,100300121,100300073,100300065,100200170,100300045,100300076], -0.0047781445, -0.0011633168)), if (attribute(catid) in [0,100200130,100300058,100300077,100300093,100300005,100300027,100300127,100400079,100200087,100300074,100300066], if (attribute(catid) in [0,100200130,100300027,100300127,100400079,100200087], 0.0039708336, 0.0079025406), if (attribute(catid) in [100200171,100300166,100400141,100200052,100300032,100300116,100200053,100300004,100400142,100300169,100300200,100300006], if (attribute(catid) in [100300166,100300032,100200053,100400142,100300169,100300006], 0.0121783231, 0.0159340797), 0.0363585815))) +
+if (attribute(catid) in [100300011,100300013,100300077,100300143,100200034,100200186,100300165,100300102,100300008,100200053,100300019,100400142,100200054,100400037,100300127,100400079,100400080,100200087,100300007,100300200,100300045,100200067,100200055,100200185,100300006,100300146], if (attribute(catid) in [100300013,100300143,100300102,100200053,100300019,100300200,100200067,100200055,100200185,100300006,100300146], -0.0269176309, if (attribute(catid) in [100300011,100200186,100300008,100200054,100400037,100400079,100400080,100300045], -0.0082753604, -0.0027168619)), if (attribute(catid) in [0,100200171,100200130,100300058,100300166,100400141,100200172,100200068,100300121,100300073,100200193,100200192,100300065,100200170,100300169,100300066,100200028], if (attribute(catid) in [0,100200130,100200172,100300121,100300073,100200192,100300065,100300169,100200028], 0.0030863813, 0.0082511598), if (attribute(catid) in [100300014,100200052,100300027,100300116,100200234,100300004,100400038,100300122,100300074], 0.0142405946, 0.0282071621))) +
+if (attribute(catid) in [100300058,100300143,100300093,100300005,100300116,100300121,100300019,100300004,100200193,100400038,100300200,100300045,100300076,100200055], if (attribute(catid) in [100300143,100300121,100300019,100200193,100300076,100200055], -0.031432122, -0.0099657936), if (attribute(catid) in [0,100200171,100200130,100300166,100200186,100400141,100300165,100200052,100200053,100400142,100300073,100200192,100300065,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100200176,100200028,100200067,100300006], if (attribute(catid) in [100400141,100200052,100200192,100300122,100300127,100200087,100300074,100300006], -0.0018808186, if (attribute(catid) in [0,100200171,100200186,100300165,100200053,100400142,100300065,100300169,100200028,100200067], 0.0034558143, 0.0071443084)), if (attribute(catid) in [100300011,100300014,100300013,100300077,100200034,100200172,100200068,100300027,100200234,100200054,100300212,100300066,100300007,100300146], 0.011374724, 0.0365678279))) +
+if (attribute(catid) in [100200130,100300013,100300077,100300166,100200034,100400141,100300165,100200172,100200068,100300004,100400142,100200054,100400037,100300212,100300127,100400079,100400080,100300074,100300066,100200028,100200067,100200055,100300006], if (attribute(catid) in [100200034,100300165,100300004,100200054,100300127,100200028,100200067,100200055,100300006], if (attribute(catid) in [100200034,100300004,100300127,100200028,100200067,100200055], -0.0176698057, -0.0096668582), if (attribute(catid) in [100200130,100300013,100300077,100300166,100400142,100400037,100300212,100400079], -0.0070606716, -0.0037781209)), if (attribute(catid) in [0,100200171,100300058,100200186,100200052,100300102,100300027,100200053,100300073,100200193,100200192,100300065,100300122,100300169,100200087,100200176,100300076,100200232], if (attribute(catid) in [0,100300058,100200052,100300102,100300027,100200192,100300065,100300122,100200232], 0.0055156165, 0.0073414677), if (attribute(catid) in [100300005,100300008,100300032,100300121,100200234,100300126,100400038,100200170,100300200,100200185], 0.0138952295, 0.0286522384))) +
+if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300077,100300166,100300143,100200186,100300165,100300093,100300102,100300005,100200172,100300008,100300027,100300116,100300121,100200234,100200053,100300004,100200054,100400038,100200192,100400079,100300169,100400080,100200087,100300007,100200176,100300200,100300045,100200055,100300006,100300214], if (attribute(catid) in [100300011,100300014,100300143,100300093,100300102,100200172,100200054,100400079,100200087,100300007,100300200,100300045,100200055,100300006,100300214], if (attribute(catid) in [100300014,100300093,100300102,100200054,100300007,100300200,100200055,100300006,100300214], -0.0266674639, -0.0102733938), if (attribute(catid) in [0,100200171,100300077,100300027,100200053,100300004,100200192,100300169,100400080,100200176], -8.805496E-4, 0.0030248341)), if (attribute(catid) in [100300058,100200034,100400141,100200052,100300126,100300073,100300065,100300212,100300122,100300127,100200170,100300076,100200185,100200232,100300146], if (attribute(catid) in [100300058,100400141,100300126,100300073,100300065,100300212,100300122,100200185], 0.0088319892, 0.0145181522), if (attribute(catid) in [100200068,100400142,100200193,100300074,100300066], 0.02038035, 0.0412794221))) +
+if (attribute(catid) in [100300011,100300014,100300013,100300077,100300143,100200186,100300093,100300005,100300008,100300032,100300121,100200054,100200192,100300122,100300007,100300200,100200067,100200055,100200185,100200232], if (attribute(catid) in [100300143,100300005,100300008,100300032,100200054,100300007,100200067,100200055,100200232], -0.0313843116, -0.0090786448), if (attribute(catid) in [0,100200171,100200130,100300058,100400141,100300165,100200052,100300102,100200172,100200068,100300116,100300004,100400142,100300073,100200193,100400038,100300065,100300127,100400079,100300169,100300066,100200176,100300045,100200028,100300006], if (attribute(catid) in [0,100200171,100200130,100200052,100300116,100400038,100400079,100300169,100300045,100200028], 0.0019399364, 0.0057587234), if (attribute(catid) in [100300166,100300027,100200234,100200053,100200170,100200087,100300074,100300076], 0.011806527, 0.0240753548))) +
+if (attribute(catid) in [100300011,100300014,100200034,100300165,100300093,100300032,100300121,100300019,100300004,100400142,100400038,100300122,100300127,100200170,100300074,100200176,100300200,100200067,100200055,100300006,100200232,100300146], if (attribute(catid) in [100300011,100200034,100300093,100300032,100300019,100400038,100200176,100200067,100200055], -0.029749012, if (attribute(catid) in [100300165,100300004,100300200,100300006,100200232,100300146], -0.0129734582, -0.0048159123)), if (attribute(catid) in [0,100200171,100200130,100300058,100300077,100300166,100200186,100400141,100200052,100300102,100300005,100200172,100200068,100300027,100300116,100200234,100200053,100200054,100300073,100400037,100200193,100200192,100300065,100300169,100400080,100300066,100300045,100200028,100300076,100200185], if (attribute(catid) in [0,100200130,100300058,100300077,100300166,100200186,100300102,100300116,100200053,100200192,100300065,100300045,100200028], if (attribute(catid) in [100300077,100300166,100200186,100300116,100200192], -4.494225E-4, 0.0030538822), if (attribute(catid) in [100200171,100400141,100200234,100300073,100400037,100300169,100300066], 0.0068726028, 0.0116359714)), if (attribute(catid) in [100300143,100300126,100400079,100200087], 0.0222596119, 0.0442934684))) +
+if (attribute(catid) in [100300011,100300077,100200186,100400141,100300165,100200052,100300005,100200172,100300008,100200068,100300032,100300116,100300019,100400142,100200054,100200193,100400038,100300209,100300127,100200170,100300169,100300074,100200176,100300045,100200067,100200055,100200232,100300146], if (attribute(catid) in [100300011,100300008,100200068,100300032,100300019,100200193,100300074,100200176,100200067,100200055,100200232,100300146], if (attribute(catid) in [100300032,100300019,100200193,100300074,100200176,100200067,100200055,100200232,100300146], -0.0304543117, -0.0136959974), if (attribute(catid) in [100400141,100400142,100300127,100200170], -0.0090390793, -0.0027839113)), if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300166,100200034,100300027,100300121,100200234,100200053,100300004,100300073,100200192,100300122,100400079,100400080,100200087,100300066,100300200,100200028,100300076], if (attribute(catid) in [0,100300058,100300027,100200234,100200053,100300004,100300073,100200192,100400080,100300066,100300200], 0.0035175195, 0.0082798864), if (attribute(catid) in [100300093,100300126,100300065], 0.0167800289, 0.0355577197))) +
+if (attribute(catid) in [0,100200130,100300013,100300077,100300166,100300143,100200186,100200052,100300005,100200172,100200234,100200053,100300019,100300004,100400142,100200054,100400037,100200193,100300212,100300127,100400079,100300169,100400080,100200087,100300074,100300066,100300007,100200176,100300200,100300045,100300076,100200067,100200185,100300006,100200232,100300146], if (attribute(catid) in [100300013,100200186,100200234,100300019,100300004,100200054,100200193,100300212,100400080,100200087,100200176,100300200,100200067,100200185,100200232,100300146], if (attribute(catid) in [100300013,100300019,100200193,100300212,100200176,100200067,100200185,100200232], -0.0351013956, -0.0146983415), if (attribute(catid) in [0,100300077,100300143,100300005,100200053,100300169,100300074,100300066,100300007,100300045,100300076,100300006], if (attribute(catid) in [100300077,100300143,100300169,100300074,100300007,100300045], -0.0036169246, 6.508121E-4), 0.0039348871)), if (attribute(catid) in [100200171,100300011,100300058,100400141,100300165,100300093,100200068,100300032,100300027,100300121,100300126,100300073,100400038,100200192,100300065,100200170], if (attribute(catid) in [100300058,100400141,100300165,100300027,100300121,100300065,100200170], 0.0092359739, 0.0139160873), if (attribute(catid) in [100300014,100200034,100300122,100200028], 0.0220286224, 0.0419934945))) +
+if (attribute(catid) in [100300013,100300143,100300005,100300116,100300019,100300004,100200054,100300065,100300212,100300209,100200087,100200232,100300214], if (attribute(catid) in [100300143,100300019,100300065,100300209,100200232,100300214], -0.0342436123, -0.0130043453), if (attribute(catid) in [0,100200171,100200130,100300014,100200186,100400141,100200052,100200172,100200068,100200053,100400142,100300073,100400037,100200193,100400038,100300122,100400079,100200170,100300007,100300045,100200185,100300006], if (attribute(catid) in [100200130,100300014,100400141,100200172,100200068,100400142,100400037,100200193,100300007,100300006], -0.0025589925, if (attribute(catid) in [100200171,100300073,100400038], 0.0013996109, 0.0034393713)), if (attribute(catid) in [100300011,100300077,100300166,100200034,100300165,100300093,100300027,100200192,100300169,100300066,100300200,100200028,100300076], if (attribute(catid) in [100300077,100300166,100200192,100300169,100300066,100200028], 0.0071225815, 0.0104333907), 0.0168963783))) +
+if (attribute(catid) in [100300011,100300014,100300058,100300013,100300143,100300027,100200053,100300019,100300126,100400037,100300169,100400080,100200176,100300076,100200067,100200055,100200185,100300214,100300146], if (attribute(catid) in [100300011,100300058,100300013,100300143,100300019,100400037,100200176,100200067,100200055,100200185,100300214], -0.0290173523, -0.0086034947), if (attribute(catid) in [0,100300077,100300166,100200034,100200186,100400141,100300165,100200052,100300093,100200172,100200068,100300116,100200234,100300004,100400142,100300073,100400038,100200192,100300065,100300209,100300127,100400079,100200170,100200087,100300074,100300045,100200232], if (attribute(catid) in [100300077,100200186,100400141,100300093,100200172,100200068,100300004,100300065,100300074], -7.979247E-4, if (attribute(catid) in [0,100300166,100300165,100200052,100300045], 0.0031637733, 0.0058373245)), if (attribute(catid) in [100200171,100200130,100300005,100300008,100300121,100300066,100300007,100200028,100300006], 0.0142240118, if (attribute(catid) in [100200054,100200193,100300122], 0.0228053439, 0.0584841669)))) +
+if (attribute(catid) in [100300143,100300165,100300093,100300008,100300116,100200234,100300004,100300126,100200054,100400037,100200193,100400038,100300212,100300122,100200170,100300066,100300007,100200055,100200232,100300146], if (attribute(catid) in [100300008,100300116,100300126,100300212,100300007,100200055,100200232,100300146], -0.0246224156, if (attribute(catid) in [100300143,100300165,100200234,100300004,100400037,100300122], -0.0104770173, -0.0059185929)), if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300077,100300166,100200172,100300121,100200053,100300073,100300127,100400079,100200028], if (attribute(catid) in [0,100200171,100200130,100300014,100300077,100300121,100200053,100300073], if (attribute(catid) in [100200171,100200130,100300077,100200053,100300073], 0.0010797418, 0.0024531718), 0.0064407369), if (attribute(catid) in [100200186,100400141,100200052,100300102,100200068,100300027,100300019,100400142,100200192,100300065,100300169,100400080,100300200,100300076,100300006], 0.0120768393, if (attribute(catid) in [100300058,100200034,100300209,100200087,100300074], 0.018583513, 0.0301892716)))) \ No newline at end of file
diff --git a/searchlib/src/test/files/ranking08.expression b/searchlib/src/test/files/ranking08.expression
new file mode 100644
index 00000000000..e2bab6c082c
--- /dev/null
+++ b/searchlib/src/test/files/ranking08.expression
@@ -0,0 +1,5 @@
+if (CT$ in ["Wiki","Web","Image","Video","Finance"], if (SDSF_LOCAL < 0.6359952986, if (CT$ in ["Image","Video"], -0.1846455351, -0.0057844764), -0.4039473684), 0.2900655347) +
+if (CT$ in ["Wiki","Web","Image","Video","KGMovie","Finance","Timezone"], -0.079079733, if (CT$ in ["Local","Q2A"], if (SDSF_LOCAL < 0.5348491371, -0.0304336373, 0.2401947405), 0.373999153)) +
+if (CT$ in ["Web","Image","Video","Timezone"], -0.0572267897, if (CT$ in ["Wiki","Local","KGMovie"], if (SDSF_LOCAL < 0.4078139514, -0.0295648159, 0.1601345785), 0.2612064355)) +
+if (CT$ in ["Image","Video","Timezone"], -0.1103244788, if (CT$ in ["Wiki","Web","Local","KGMovie"], if (QPSCOREFOR_KG_PEOPLE < 0.9930000007, 0.0194079789, -0.2056829336), 0.1987635246)) +
+if (CT$ in ["Image","Video","Event","Timezone"], if (SDSF_WEB < 0.3725785315, -0.0680975953, -0.2264832978), if (CT$ in ["Wiki","Web","Local","Q2A"], 0.010592822, 0.1366891795)) \ No newline at end of file
diff --git a/searchlib/src/test/files/s-expression.vre b/searchlib/src/test/files/s-expression.vre
new file mode 100644
index 00000000000..13719ad411a
--- /dev/null
+++ b/searchlib/src/test/files/s-expression.vre
@@ -0,0 +1 @@
+if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0238095, if (POS_20 < 0.5, if (TERM_CASE_3 < 0.5, 0.0320624, 0.0456067), -0.0294118)), -0.0011905), -0.0138889), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0167606, -0.0306452), -0.0182927), -0.0271429), -0.022549)), 0.0455737), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0399584, -0.0283827), if (CHUNKTYPE < 0.5, if (LENGTH < 11.5, -0.027549, -0.0189895), -0.0031792)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0441547, -0.0346561), -0.0471542))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.026149, if (POS_20 < 0.5, if (TERM_CASE_4 < 0.5, 0.0416028, 0.030199), -0.0316177)), -0.0029114), -0.0143587), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0178874, -0.0223683), -0.0192148), -0.0298584), -0.0213989)), 0.0439902), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0421426, -0.0308258), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0277896, -0.0182189), -0.0040314)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0426529, -0.0330161), -0.0457574))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0225608, if (TERM_CASE_4 < 0.5, 0.0401168, 0.0290235)), -0.0415782), -0.0031138), -0.0153018), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0154691, -0.0262869), -0.0244862), -0.0298703), -0.0205396)), 0.0433266), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0380008, -0.0275714), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0269285, -0.0167566), -0.0042621)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0419571, -0.0321239), -0.0449586))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.027675, if (TERM_CASE_4 < 0.5, 0.0388147, 0.0278569)), -0.0018196), -0.0152118), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0161232, -0.0204323), -0.0198792), -0.0240534), -0.0196779)), 0.0421041), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0407405, -0.0293697), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0261933, -0.0163427), -0.0035442)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.041222, if (CONCEPTTYPE < 0.5, -0.0369189, -0.026805)), -0.0438784))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0293154, if (POS_20 < 0.5, if (TERM_CASE_3 < 0.5, 0.0288701, 0.0416719), -0.0310674)), -3.423E-4), -0.0141918), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0147251, -0.0260802), -0.0173092), -0.0249324), -0.0197797)), 0.0409582), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0370935, -0.0269369), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0254922, -0.0158792), -0.002589)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0397203, -0.0311777), -0.0426463))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0196502, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, if (STOP_WORD_3 < 0.5, 0.028683, 0.0168366), -0.0244408), 0.0384706)), -0.0067946), if (POS_10 < 0.5, 0.0107138, -0.0269118)), -0.0152566), -0.0177037), if (STOP_WORD_3 < 0.5, 0.0422887, 0.0287074)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0356967, -0.0254432), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0244727, -0.0159428), -0.0052851)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0387765, -0.0297345), -0.0414916))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0278568, if (TERM_CASE_4 < 0.5, 0.0359876, 0.0261228)), -0.0010129), -0.0123932), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, 0.0127637, -0.0176055), -0.0244544), if (POS_11 < 0.5, -0.0122589, -0.0454474))), 0.0392691), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0377529, -0.0273269), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0235671, -0.0154739), -0.0030262)), if (ENTITYPLACETYPE < 0.5, -0.0392548, if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, -0.0335334, -0.0234854), -0.039021)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0255861, 0.0264454), -0.0328458), 0.0118034), 0.0351067), -0.0249263), -0.0107626), -0.0138085), -0.015843), 0.0387274), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0338899, -0.0247335), if (CHUNKTYPE < 0.5, if (LENGTH < 17.5, if (CONCEPTTYPE < 0.5, -0.0249901, -0.0160191), -0.0082827), -0.0036917)), if (ENTITYPLACETYPE < 0.5, -0.0382492, if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, -0.0320091, -0.0216203), -0.038139)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0222249, if (POS_20 < 0.5, 0.0261828, -0.0267141)), 0.01303), 0.0349923), -0.0257607), -0.0103532), if (EXTENDEDTYPE < 0.5, 0.0302186, -0.0189039)), if (POS_11 < 0.5, -0.0104305, -0.0438767)), 0.0380382), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0364953, -0.0259777), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0229979, -0.0142767), -0.0021224)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0359463, -0.0278147), -0.0387552))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (TERM_CASE_4 < 0.5, 0.0340066, if (GOOD_SYNTAX < 0.5, -0.0200785, 0.0240284)), 0.0014722), -0.0142752), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0135552, -0.0203988), -0.0159875), -0.0252406), -0.0198481)), if (GOOD_SYNTAX < 0.5, -0.035675, 0.0371821)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0347253, -0.0245341), if (CHUNKTYPE < 0.5, -0.0194266, -0.0039127)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.035227, if (CONCEPTTYPE < 0.5, -0.0308237, -0.0206793)), -0.0379667))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.027432, if (POS_20 < 0.5, 0.0273692, -0.0296341)), -0.0014755), -0.0104811), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (CHUNKTYPE < 0.5, 0.0109256, 0.0306595), -0.0185903), -0.023881), -0.0152296)), 0.0358929), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0338809, -0.0243573), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0215008, -0.0132609), -0.0030742)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0341674, if (CONCEPTTYPE < 0.5, -0.0309224, -0.0209575)), -0.0369798))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0189961, if (TERM_CASE_4 < 0.5, 0.0317904, 0.02261)), 0.00149), if (LENGTH < 13.5, 0.0202299, -0.0227074)), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0132167, -0.0277941), -0.0139034), -0.0239364), -0.0178965)), 0.0348111), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0327393, -0.0243069), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.021828, -0.0132156), -0.0040227)), if (ENTITYPLACETYPE < 0.5, -0.0346582, if (ORDER_IN_CLUSTER < 3.5, -0.026238, -0.0340471)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, 0.0229223, -0.0247442), 0.0341614), 2.957E-4), -0.013085), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (CHUNKTYPE < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0103332, -0.0263093), -0.0125637), 0.0295777), -0.0192543), -0.015281)), 0.033784), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.032373, -0.0232716), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0211888, -0.0139731), -0.0030902)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0322893, -0.0256094), -0.0352917))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, if (TERM_CASE_4 < 0.5, 0.0314522, 0.0210548), -0.027565), -0.0032209), -0.0104833), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0119084, -0.0213878), -0.0171946), -0.0239646), -0.0162698)), 0.0334209), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0313708, -0.0224933), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0209707, -0.0127074), -0.0035211)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0318281, if (CONCEPTTYPE < 0.5, -0.0285518, -0.0189534)), -0.0344255))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0247167, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_20 < 0.5, 0.0220308, -0.0338461), 0.0113261), 0.030921)), -0.0077814), -0.0227584), if (EXTENDEDTYPE < 0.5, 0.0290777, -0.0172571)), -0.0148965), if (STOP_WORD_3 < 0.5, 0.0347528, 0.0225359)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0323354, -0.022918), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0201202, -0.0114946), -0.0023924)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0312292, -0.0242211), -0.033578))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0172385, if (POS_20 < 0.5, 0.0240968, -0.023822)), -9.647E-4), -0.010356), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (CHUNKTYPE < 0.5, 0.0071835, 0.0266903), -0.0148186), -0.0227919), -0.012414)), 0.0315926), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0286387, if (TERM_CASE_3 < 0.5, -0.0163821, -0.0270757)), if (CHUNKTYPE < 0.5, -0.0165336, -0.003919)), if (ENTITYPLACETYPE < 0.5, -0.0317233, if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, -0.0270231, -0.0185894), -0.0309025)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0229224, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0204102, 0.0106909), 0.0289595)), -0.0084357), -0.0200925), -0.0088235), if (POS_11 < 0.5, -0.0102852, -0.0386286)), 0.0315906), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0278104, if (TERM_CASE_3 < 0.5, -0.0167703, -0.0260821)), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0221284, -0.0134741), -0.0120638), -0.0017045)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0296597, -0.0229542), -0.0319876))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.020537, if (POS_20 < 0.5, if (TERM_CASE_4 < 0.5, 0.0277037, 0.0200641), -0.023268)), 7.965E-4), -0.0085846), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, 0.0103221, -0.0168052), -0.0204982), -0.0164096)), 0.0302062), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0269195, if (TERM_CASE_3 < 0.5, -0.0160655, -0.0265024)), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0189328, -0.0107779), -0.0023674)), if (ENTITYPLACETYPE < 0.5, -0.030323, if (ORDER_IN_CLUSTER < 3.5, -0.0219148, -0.0299687)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (STOP_WORD_1 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0214866, if (POS_20 < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 1.5, 0.0198002, -0.0017229), 0.0288705), -0.0298617)), if (POS_10 < 0.5, 0.009464, -0.0155948)), -0.0093611), -0.0123187), -0.0132916), 0.0301288), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0264592, -0.0187969), if (CHUNKTYPE < 0.5, if (LENGTH < 18.5, if (CONCEPTTYPE < 0.5, -0.0190504, -0.0118572), -0.0047683), -0.001059)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0282433, -0.0221125), -0.0308641))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_20 < 0.5, 0.0194071, -0.0236163), 0.0099873), 0.0270746), -0.0059088), -0.0168318), if (EXTENDEDTYPE < 0.5, 0.0305566, -0.0143625)), if (POS_11 < 0.5, -0.0082314, -0.0370651)), 0.0294435), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0272551, -0.0191542), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0176287, -0.0105903), -0.0017609)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0273562, if (CONCEPTTYPE < 0.5, -0.0247411, -0.0169687)), -0.0301205))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.019051, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, 0.0181601, -0.0227417), 0.0267265)), -0.0024569), if (POS_10 < 0.5, 0.0069209, -0.0141317)), if (EXTENDEDTYPE < 0.5, 0.0319879, -0.0149185)), -0.0128912), 0.0289185), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.02486, if (TERM_CASE_3 < 0.5, -0.0141723, -0.0247693)), if (CHUNKTYPE < 0.5, if (LENGTH < 12.5, -0.0172354, if (TERM_CASE_3 < 0.5, -0.0015111, -0.0152732)), -0.0030835)), if (ORDER_IN_CLUSTER < 3.5, -0.0245878, -0.0292687))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0210502, if (POS_20 < 0.5, 0.019687, -0.0249092)), -0.0043769), if (POS_10 < 0.5, 0.0069284, -0.017452)), if (EXTENDEDTYPE < 0.5, 0.029387, -0.0148346)), -0.0119228), 0.0280001), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0246005, -0.0179612), if (CHUNKTYPE < 0.5, if (LENGTH < 12.5, -0.0166346, if (TERM_CASE_4 < 0.5, -0.0149392, 7.37E-5)), -9.508E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0265787, if (CONCEPTTYPE < 0.5, -0.0246949, -0.0130917)), -0.0288808))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0213783, if (POS_20 < 0.5, if (ENTITYPLACETYPE < 0.5, 0.0175062, 0.0255312), -0.0300307)), -0.0052822), if (POS_10 < 0.5, 0.0069152, -0.0155832)), -0.0103763), if (POS_11 < 0.5, -0.0065774, -0.0370802)), 0.0274144), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0239849, if (TERM_CASE_3 < 0.5, -0.0140748, -0.0242957)), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0166813, -0.0106118), -0.002076)), if (ENTITYPLACETYPE < 0.5, -0.0272019, if (CONCEPTTYPE < 0.5, -0.0253823, -0.0176928)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, 0.0175067, -0.0259902), 6.34E-5), 0.027027), -0.0106422), if (STOP_WORD_2 < 0.5, if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, 0.0040347, -0.0239956), 0.0338428), 0.0258125), -0.013453)), 0.0265121), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0233444, if (TERM_CASE_3 < 0.5, -0.0124575, -0.0232757)), if (CHUNKTYPE < 0.5, if (LENGTH < 18.5, -0.0146049, -0.0033815), -0.0026193)), if (ENTITYPLACETYPE < 0.5, -0.0267216, if (ORDER_IN_CLUSTER < 3.5, -0.0192017, -0.0261826)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0246778, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, 0.0165603, -0.0254062), 0.0252378)), -0.0035612), if (POS_10 < 0.5, 0.005575, -0.0140081)), if (EXTENDEDTYPE < 0.5, 0.027077, -0.015631)), -0.0120579), 0.0264607), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.025323, -0.0173393), if (CHUNKTYPE < 0.5, -0.0135205, -0.0010786)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.024778, if (CONCEPTTYPE < 0.5, -0.0229531, if (LENGTH < 9.5, -0.0060947, -0.0195776))), -0.0272583))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (EXTENDEDTYPE < 0.5, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, 0.01557, 0.0232402), -0.0156369), if (POS_19 < 0.5, if (STOP_WORD_3 < 0.5, 0.0144031, 0.0019319), -0.0107302)), -0.0120357), if (STOP_WORD_3 < 0.5, 0.0274022, 0.0170157)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0258868, if (TERM_CASE_4 < 0.5, -0.0196026, -0.013761)), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.01608, -0.0091723), 1.449E-4)), if (ENTITYPLACETYPE < 0.5, -0.0255402, if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, -0.0215057, if (LENGTH < 9.5, -0.0060495, -0.0204416)), -0.0250903)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0161394, 0.0178085), -0.0054002), if (STOP_WORD_2 < 0.5, if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.00706, -0.0218277), -0.0126976), -0.024607), 0.0347621), 0.0247592), -0.0129957)), 0.0251434), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 9.5, -0.0231928, -0.0162238), if (CHUNKTYPE < 0.5, -0.0129107, -0.0021479)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0236224, if (CONCEPTTYPE < 0.5, -0.0217007, if (LENGTH < 8.5, -0.005477, -0.0192289))), -0.0259407))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0162787, 0.0189037), -0.0244341), 2.038E-4), -0.0050615), if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (EXTENDEDTYPE < 0.5, 0.0135523, 0.0046004), -0.0122361), -0.0158453), -0.0097432)), 0.0242278), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0250713, -0.0167955), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0148256, if (TERM_CASE_1 < 0.5, -0.0099836, 0.0248802)), -0.0024053)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0227493, -0.0179295), -0.0252643))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0193283, 0.01592), -0.0067908), if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (REGEXTYPE < 0.5, if (POS_13 < 0.5, if (POS_10 < 0.5, 0.0022836, -0.0184071), -0.0398454), 0.0300278), -0.0334954), 0.0209461)), 0.02435), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0244106, -0.0163105), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0172558, -0.0089579), -0.0084621), -0.0026096)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0226243, if (CONCEPTTYPE < 0.5, -0.020762, -0.0122463)), -0.0249212))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0211361, 0.0157079), if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (POS_13 < 0.5, if (POS_18 < 0.5, 0.0056327, if (LENGTH < 20.5, 0.0114886, -0.027261)), -0.0388943), -0.0230606), 0.031081), -0.0328684), 0.0216318)), -0.0122586), 0.0238667), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0189804, if (LENGTH < 11.5, -0.0146883, -0.0086626)), if (ENTITYPLACETYPE < 0.5, -0.0106364, -5.685E-4)), if (ENTITYPLACETYPE < 0.5, -0.0238854, if (CONCEPTTYPE < 0.5, -0.0216013, -0.0150148)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_4 < 0.5, 0.0200161, if (POS_18 < 0.5, 0.0138241, -0.0047691)), -0.0071074), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (EXTENDEDTYPE < 0.5, 0.0127718, 0.003632), -0.0133302), -0.013666), -0.0118496)), if (STOP_WORD_1 < 0.5, 0.0241087, 0.0099508)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0236828, -0.0154686), if (LENGTH < 17.5, if (CONCEPTTYPE < 0.5, -0.013553, -0.0073624), -6.143E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0217912, if (CONCEPTTYPE < 0.5, -0.020586, -0.012412)), -0.0238811))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, 0.0140879, -4.32E-4), 0.0230054), -0.0089385), if (STOP_WORD_2 < 0.5, if (CHUNKTYPE < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0057379, -0.0224734), -0.0131509), -0.0152052), 0.0214004), -0.013384)), 0.0225425), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0224854, -0.0152947), if (LENGTH < 17.5, if (CONCEPTTYPE < 0.5, -0.0136659, -0.007325), -5.115E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0214652, if (LENGTH < 9.5, -0.013739, -0.0208746)), -0.0237957))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, 0.0148701, -0.0229642), 0.00104), 0.0219624), -0.0082162), if (STOP_WORD_2 < 0.5, if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, 0.0045602, -0.0144154), -0.0193818), 0.029159), 0.0194212), -0.0109387)), 0.0223692), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0197204, if (TERM_CASE_3 < 0.5, -0.0095159, -0.019108)), if (CHUNKTYPE < 0.5, -0.0109369, -0.0018483)), if (ENTITYPLACETYPE < 0.5, -0.0220807, if (CONCEPTTYPE < 0.5, -0.0204406, -0.0145144)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (TERM_CASE_4 < 0.5, 0.0204859, 0.0130288), -0.0014037), -0.0077292), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0072626, -0.0204056), -0.0110636), -0.0136554), if (POS_11 < 0.5, -0.0062418, -0.0338774))), if (STOP_WORD_1 < 0.5, 0.0227449, 0.0109107)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0213143, -0.0143339), if (LENGTH < 17.5, if (CONCEPTTYPE < 0.5, -0.0128657, -0.0063313), -6.136E-4)), if (ENTITYPLACETYPE < 0.5, -0.0216506, if (CONCEPTTYPE < 0.5, -0.0199993, -0.0140245)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, 0.0146672, -0.0018224), -0.0067699), if (POS_11 < 0.5, if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (STOP_WORD_2 < 0.5, 0.001774, -0.0100986), -0.0194597), 0.028292), 0.0153051), -0.0335927)), 0.0216562), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0202377, if (POS_18 < 0.5, -0.0127352, -0.0299783)), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0157168, -0.0069759), -0.0070018), -7.154E-4)), if (ENTITYPLACETYPE < 0.5, -0.021247, if (ORDER_IN_CLUSTER < 3.5, -0.0145454, -0.0207958)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_19 < 0.5, if (STOP_WORD_3 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0205616, 0.0129939), -0.0044695), if (POS_10 < 0.5, 0.0044668, -0.0129957)), if (EXTENDEDTYPE < 0.5, 0.0293093, -0.0107582)), 0.0198518), -0.0095002), 0.0215686), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0173628, if (TERM_CASE_4 < 0.5, -0.0166649, -0.0029049)), if (CHUNKTYPE < 0.5, -0.010394, -0.0015807)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0195013, if (CONCEPTTYPE < 0.5, -0.0181298, if (LENGTH < 9.5, -0.0041341, -0.0161438))), -0.0219923))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (EXTENDEDTYPE < 0.5, if (POS_10 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0263639, if (ENTITYPLACETYPE < 0.5, 0.0126362, 0.0201228)), -0.0101454), if (POS_19 < 0.5, if (STOP_WORD_3 < 0.5, 0.0105334, 0.0015241), -0.010678)), -0.0083597), 0.0209341), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 21.5, if (LENGTH < 7.5, -0.0204873, -0.0145965), -0.0063365), if (LENGTH < 17.5, if (CONCEPTTYPE < 0.5, -0.0125304, -0.0063154), -4.819E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0194133, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0151592, -0.003694), -0.0183897)), -0.0215651))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, 0.0137721, -0.0018103), if (LENGTH < 13.5, 0.0275377, -0.0116633)), if (POS_11 < 0.5, if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, 9.752E-4, -0.0177054), 0.0258804), 0.0148485), -0.028177)), 0.0198978), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0175399, -0.0113568), if (CONCEPTTYPE < 0.5, if (LENGTH < 17.5, -0.0121294, 5.33E-4), -0.0048164)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0187967, if (CONCEPTTYPE < 0.5, -0.0169728, if (LENGTH < 9.5, -0.001531, -0.0160204))), -0.0210201))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (STOP_WORD_1 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0203391, if (POS_20 < 0.5, if (ENTITYPLACETYPE < 0.5, 0.0130113, 0.0203999), -0.0250044)), if (POS_10 < 0.5, 0.0061418, -0.0119255)), -0.007534), if (EXTENDEDTYPE < 0.5, 0.0257184, -0.0117189)), -0.0096893), 0.0206417), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0174476, if (TERM_CASE_3 < 0.5, -0.009257, -0.0183869)), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.012927, -0.0063419), -0.0050945)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0186469, -0.0141184), -0.0207074))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0161598, if (POS_18 < 0.5, 0.0131595, -5.738E-4)), -0.0065529), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_11 < 0.5, if (POS_10 < 0.5, if (POS_19 < 0.5, if (POS_13 < 0.5, 0.0020625, -0.0361297), -0.017138), -0.0188942), -0.0316437), 0.0294602), 0.0170694)), 0.0198958), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 24.5, -0.014876, -0.0052357), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0129979, -0.0066152), -0.0041822)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.017963, -0.0129878), -0.0203123))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0206834, if (POS_18 < 0.5, 0.0134348, -0.0020072)), if (LENGTH < 13.5, 0.025581, -0.0114808)), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, 0.0045001, -0.0123061), -0.0198902), -0.0116103), 0.0278925), 0.0156693)), if (STOP_WORD_1 < 0.5, 0.0206524, 0.0087084)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 12.5, if (ENTITYPLACETYPE < 0.5, -0.0161757, if (CONCEPTTYPE < 0.5, -0.0128887, -0.0050251)), if (TERM_CASE_4 < 0.5, -0.0099339, -0.0035159)), if (ENTITYPLACETYPE < 0.5, -0.019166, -0.0154052))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (TERM_CASE_4 < 0.5, 0.0177521, 0.0106682), -0.0053279), -0.0047629), if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (POS_19 < 0.5, 0.0046018, -0.0227924), -0.0169681), -0.0177201), -0.010902), 0.0262335), 0.0149068)), if (STOP_WORD_1 < 0.5, 0.0201364, 0.0073277)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0154863, -0.0078134), if (LENGTH < 17.5, -0.0089874, 0.0010552)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0176944, -0.0126937), -0.0196004))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0156388, 0.0130898), -0.0013247), -0.0067409), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_11 < 0.5, if (POS_10 < 0.5, 8.397E-4, -0.0195198), -0.0259057), 0.0249483), 0.0153473)), 0.0190098), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, -0.015095, if (TERM_CASE_4 < 0.5, -0.0150926, -0.0019354)), if (CHUNKTYPE < 0.5, -0.0087801, -6.083E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0175396, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0148435, -0.0020005), -0.0167526)), -0.0194128))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0196083, 0.0120079), -0.0259077), if (LENGTH < 13.5, 0.0212285, -0.0137183)), if (STOP_WORD_2 < 0.5, if (CHUNKTYPE < 0.5, 0.0010741, 0.0180207), -0.0094372)), if (GOOD_SYNTAX < 0.5, -0.0415369, 0.0185524)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0158674, if (TERM_CASE_3 < 0.5, -0.0070903, -0.0162977)), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0128093, if (TERM_CASE_4 < 0.5, -0.0094791, 0.00345)), if (STOP_WORD_2 < 0.5, -0.0023198, -0.0292396))), if (ENTITYPLACETYPE < 0.5, -0.0181954, -0.0145461))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0170163, 0.0121323), -0.0042232), if (LENGTH < 13.5, 0.0220881, -0.0102208)), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_11 < 0.5, if (POS_13 < 0.5, 7.481E-4, -0.034593), -0.0247906), 0.026535), 0.0140904)), if (STOP_WORD_3 < 0.5, 0.019341, 0.0096431)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.017048, -0.0114585), if (CHUNKTYPE < 0.5, -0.008748, -7.903E-4)), if (ENTITYPLACETYPE < 0.5, -0.0176339, if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, -0.0141374, -0.0070836), -0.0170066)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0155011, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0116785, 0.0055628), 0.0164792)), -0.035647), -0.0121403), -0.007518), if (EXTENDEDTYPE < 0.5, 0.0221764, -0.0114313)), -0.0088787), 0.0182672), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0175676, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, -0.0115627, if (STOP_WORD_3 < 1.5, 0.0040489, -0.018267)), -0.0284101)), if (CHUNKTYPE < 0.5, -0.0084511, 8.382E-4)), if (ORDER_IN_CLUSTER < 3.5, -0.0145335, -0.0182791))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_20 < 0.5, 0.0104568, -0.0222549), 0.0044133), -0.0119989), -0.0064236), if (LENGTH < 12.5, 0.0257311, -0.010041)), 0.0163182), -0.0104437), if (GOOD_SYNTAX < 0.5, -0.0423132, 0.0178122)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0178496, if (POS_18 < 0.5, -0.0097905, -0.0243957)), if (LENGTH < 17.5, if (CONCEPTTYPE < 0.5, -0.0099414, if (TERM_CASE_1 < 0.5, -0.0047538, 0.0254415)), 0.0010132)), if (ORDER_IN_CLUSTER < 3.5, -0.0145401, -0.0182917))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0171785, if (POS_18 < 0.5, 0.0111746, -0.0020704)), if (LENGTH < 13.5, 0.0239552, -0.0082275)), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, 2.53E-4, -0.0181176), -0.0111345), 0.0144392), 0.0277661), 0.0140805)), 0.0177412), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0164346, -0.0111249), -0.0081683), -0.0030415), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.015696, if (LENGTH < 8.5, -0.0076235, -0.0142175)), -0.0179409))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0173813, if (ENTITYPLACETYPE < 0.5, 0.0090219, 0.0165077)), -0.0086629), -0.0049417), if (POS_17 < 0.5, -0.0128903, 0.0094969)), -0.0088222), if (STOP_WORD_3 < 0.5, 0.0184481, 0.0098499)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0162087, -0.0111325), if (STOP_WORD_2 < 0.5, -0.0075486, -0.0226605)), -0.0035147), if (ENTITYPLACETYPE < 0.5, -0.0164968, if (CONCEPTTYPE < 0.5, -0.0152376, if (LENGTH < 8.5, if (ORDER_IN_CLUSTER < 3.5, 6.5E-6, -0.0159595), -0.0131528))))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (TERM_CASE_4 < 0.5, 0.0140484, if (POS_10 < 0.5, 0.0074606, -0.0102378)), -0.0062586), if (EXTENDEDTYPE < 0.5, 0.0228139, -0.0112505)), -0.007237), if (STOP_WORD_1 < 0.5, 0.0173949, 0.0056298)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, if (LENGTH < 14.5, -0.0143091, -0.0088984), -0.0052595), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0088795, if (STOP_WORD_1 < 0.5, -0.0020573, -0.0225052)), 0.0010976)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0151987, if (CONCEPTTYPE < 0.5, -0.0138226, -0.0065649)), -0.0174896))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, 0.0097569, -0.0031364), if (POS_11 < 0.5, if (TERM_CASE_4 < 0.5, 0.0086995, if (REGEXTYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0030369, 0.0101077), 0.0240326)), -0.0268141)), 0.0161601), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0166546, if (POS_18 < 0.5, -0.0096908, -0.024367)), if (LENGTH < 10.5, if (CONCEPTTYPE < 0.5, -0.0114941, if (TERM_CASE_4 < 0.5, 0.0066344, -0.0084878)), if (TERM_CASE_4 < 0.5, if (CHUNKTYPE < 0.5, -0.0096005, -0.0010291), 0.0025618))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0151401, -0.0108582), -0.0172704))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, 0.0095209, -0.0232525), -0.0022657), if (POS_10 < 0.5, if (POS_18 < 0.5, 0.0042226, -0.0093108), -0.0106807)), 0.0140721), -0.0069394), if (STOP_WORD_1 < 0.5, 0.0174307, 0.0080683)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0137755, if (TERM_CASE_3 < 0.5, -0.0059039, -0.0149249)), if (LENGTH < 10.5, -0.0093623, -0.0032503)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0149707, if (CONCEPTTYPE < 0.5, -0.0136787, if (LENGTH < 9.5, -2.229E-4, -0.0136523))), -0.0167219))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, if (TERM_CASE_4 < 0.5, 0.0138732, 0.008398), -0.0259663), -0.0016621), if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_3 < 1.5, 0.0043232, -0.0044986), -0.0084717), -0.0180725), 0.0233834)), if (STOP_WORD_1 < 0.5, 0.0166125, 0.0053323)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 7.5, if (ENTITYPLACETYPE < 0.5, -0.0163843, -0.0095754), if (ENTITYPLACETYPE < 0.5, if (LENGTH < 24.5, if (STOP_WORD_3 < 1.5, if (STOP_WORD_1 < 0.5, -0.0104948, 0.0012228), -0.0214262), -0.0017735), -0.0045475)), if (ORDER_IN_CLUSTER < 3.5, -0.0129966, -0.0164255))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0228565, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, 0.0092807, -0.0232311), -0.0047011), if (REGEXTYPE < 0.5, if (POS_11 < 0.5, if (POS_10 < 0.5, if (POS_15 < 0.5, if (POS_19 < 0.5, if (POS_13 < 0.5, 0.0044601, -0.0323762), -0.0190737), -0.0183194), -0.0156696), -0.0241681), 0.0252131)), 0.0145758)), if (STOP_WORD_3 < 0.5, 0.0171706, 0.007469)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0120993, -0.0074616), -0.0031379), if (ENTITYPLACETYPE < 0.5, -0.015522, if (CONCEPTTYPE < 0.5, -0.0139359, -0.0086416)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_4 < 0.5, 0.0136454, 0.0070859), if (LENGTH < 12.5, 0.0293225, -0.0072539)), if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, 7.079E-4, -0.0155087), -0.0118841), 0.0221595), 0.0138053)), if (STOP_WORD_1 < 0.5, 0.0159814, 0.0045131)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 21.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0131261, if (STOP_WORD_1 < 0.5, -0.0080341, 0.0163991)), if (STOP_WORD_2 < 0.5, -0.0064649, -0.0217399)), -4.319E-4), if (ENTITYPLACETYPE < 0.5, -0.0151802, if (CONCEPTTYPE < 0.5, -0.0136413, -0.0088831)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_19 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0164443, 0.0103647), -0.0032517), 0.0012502), -0.0065228), -0.0072638), if (STOP_WORD_3 < 0.5, 0.016558, 0.0080528)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 24.5, if (STOP_WORD_3 < 1.5, if (STOP_WORD_1 < 0.5, if (LENGTH < 9.5, -0.0141238, -0.0094132), -0.0016775), -0.0220858), -4.456E-4), if (CHUNKTYPE < 0.5, -0.0065359, 0.0026063)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0135472, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0104012, -0.0010905), -0.0128092)), -0.0155384))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, 0.0126237, if (POS_19 < 0.5, 0.0068935, -0.0053442)), if (POS_11 < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (STOP_WORD_3 < 1.5, if (CHUNKTYPE < 0.5, 0.0018569, 0.0136748), -0.0041457), -0.0172089), 0.0214328), -0.0237328)), 0.0149798), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0138775, -0.0084083), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.008515, -0.0033346), 0.0016883)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0138494, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0105677, -1.645E-4), -0.0126896)), -0.0158156))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_3 < 0.5, 0.0074348, 0.0146558), -0.0052259), if (POS_11 < 0.5, if (REGEXTYPE < 0.5, if (CHUNKTYPE < 0.5, if (POS_10 < 0.5, -3.521E-4, -0.0134149), 0.0103603), 0.0224311), -0.0222263)), 0.0147576), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0140817, -0.00951), -0.0071937), if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_1 < 0.5, -0.00762, 0.0124743), 0.001789)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0134273, if (CONCEPTTYPE < 0.5, -0.0113881, if (LENGTH < 8.5, -3.79E-5, -0.0102505))), -0.0152726))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, 0.007474, -0.0063134), -0.0071365), 0.0136863), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (STOP_WORD_2 < 0.5, 6.559E-4, -0.0085589), -0.0160333), 0.0219506), 0.0130889)), 0.0150582), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0146211, if (TERM_CASE_3 < 0.5, if (CHUNKTYPE < 0.5, -0.0094537, 7.042E-4), -0.0138315)), if (CHUNKTYPE < 0.5, -0.00638, 0.0011708)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.010724, -0.0188838), -0.0189123), -0.015))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0229233, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, 0.0074292, -0.0028084), if (REGEXTYPE < 0.5, if (POS_11 < 0.5, if (POS_13 < 0.5, if (POS_10 < 0.5, 9.274E-4, -0.0137277), -0.0310847), -0.0228395), 0.0241976)), 0.013276)), 0.0146802), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0122109, if (TERM_CASE_3 < 0.5, -0.0058119, -0.0129394)), if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 26.5, -0.0069115, 0.0097361), -0.0245469), 9.084E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0130609, -0.0081145), -0.0148501))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_4 < 0.5, 0.011759, 0.006059), -0.0061099), if (REGEXTYPE < 0.5, if (POS_11 < 0.5, if (TERM_CASE_4 < 0.5, 0.0065599, -0.0010844), -0.0220849), 0.0224768)), 0.0140095), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 12.5, if (ENTITYPLACETYPE < 0.5, -0.0115323, if (CONCEPTTYPE < 0.5, -0.0091078, if (STOP_WORD_3 < 0.5, 7.263E-4, -0.0201621))), if (TERM_CASE_4 < 0.5, -0.0071175, if (LENGTH < 15.5, 0.0081473, -0.0018688))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0128936, if (CONCEPTTYPE < 0.5, -0.0116723, if (LENGTH < 9.5, -2.605E-4, -0.0106371))), -0.014597))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0219888, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_18 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0071866, -0.0033938), 0.0131146), -0.0053445), if (REGEXTYPE < 0.5, -4.858E-4, 0.0216005)), 0.0128922)), if (STOP_WORD_1 < 0.5, 0.0149918, 0.0049231)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0136976, if (POS_18 < 0.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, -0.0082875, 7.551E-4), -0.0248618), -0.0219885)), -0.0051402), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0125589, if (LENGTH < 9.5, -0.0059241, -0.0123873)), -0.014485))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_3 < 0.5, 0.0071765, 0.0139718), -0.0063484), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, 0.002737, -0.0131905), -0.0078928)), if (STOP_WORD_1 < 0.5, 0.0145544, 0.0032937)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 12.5, if (ENTITYPLACETYPE < 0.5, -0.0113455, if (STOP_WORD_1 < 0.5, if (CONCEPTTYPE < 0.5, -0.0077787, -2.138E-4), -0.0191808)), if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0035509, 0.0077407), if (ENTITYPLACETYPE < 0.5, -0.0111466, -0.0041847))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0126288, if (LENGTH < 9.5, -0.0062011, -0.0122915)), -0.0142805))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0193475, if (POS_19 < 0.5, if (POS_20 < 0.5, if (POS_18 < 0.5, 0.0088922, -0.0014413), -0.0243768), if (LENGTH < 13.5, 0.0192753, -0.0066051))), if (CHUNKTYPE < 0.5, if (LENGTH < 14.5, -0.0074286, if (EXTENDEDTYPE < 0.5, if (LENGTH < 24.5, 0.0017664, 0.0167767), -0.001159)), 0.0130916)), if (STOP_WORD_3 < 0.5, 0.0147469, 0.0072769)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 18.5, if (ENTITYPLACETYPE < 0.5, -0.0103194, if (CONCEPTTYPE < 0.5, -0.0078001, -0.0022905)), -0.0020796), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, -0.0104979, -0.0178771), -0.0140859))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, 0.0137203, 0.0064025), if (GOOD_SYNTAX < 0.5, -0.0224563, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.005608, if (POS_11 < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_3 < 1.5, 0.0019291, -0.0051123), 0.0182142), -0.0213983)), 0.012703))), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0113072, if (TERM_CASE_3 < 0.5, -0.004186, -0.0117898)), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0089815, -0.0020853), -0.0016058)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0120968, if (CONCEPTTYPE < 0.5, -0.0098191, if (LENGTH < 9.5, 0.0018021, -0.0097175))), -0.0139154))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, 0.0103513, 0.0045689), if (POS_11 < 0.5, if (REGEXTYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0056874, -0.0016569), 0.0192383), -0.0228582)), 0.0133348), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, -0.0117483, -0.0062281), if (TERM_CASE_1 < 0.5, if (LENGTH < 18.5, -0.0068632, 0.001308), 0.0063511)), -0.0173735), if (CONCEPTTYPE < 0.5, -0.0035826, 0.0050191)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.011691, if (CONCEPTTYPE < 0.5, -0.0103893, if (LENGTH < 8.5, 0.0018196, -0.0088273))), -0.0136453))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, 0.0087542, -0.0270729), -0.0030851), if (LENGTH < 15.5, 0.0129051, -0.0116668)), if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, 0.0030751, -0.0139397), -0.0062987)), if (STOP_WORD_1 < 0.5, 0.0137908, 0.0039076)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (LENGTH < 18.5, if (ENTITYPLACETYPE < 0.5, -0.0106848, -0.006511), -0.0025831), if (STOP_WORD_1 < 0.5, -0.0021885, 0.0128339)), if (ENTITYPLACETYPE < 0.5, -0.0130512, if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, -0.0102829, if (LENGTH < 11.5, -0.00111, -0.0108079)), -0.0125744)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, 0.0111577, if (POS_19 < 0.5, 0.0055582, -0.0044153)), if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, -0.0023607, -0.0173584), 0.0166121), 0.0095357)), 0.0129467), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, if (CONCEPTTYPE < 0.5, -0.003286, -0.0125259), -0.0056692), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0084425, if (TERM_CASE_4 < 0.5, -0.0065046, 0.004556)), -0.0022756)), -0.0159046), -0.0023817), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0113171, -0.0079431), -0.0133184))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_3 < 0.5, 0.0050979, 0.0129595), if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0049818, if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, -5.948E-4, -0.0160451), -0.0109574)), 0.0184255), 0.0110931)), 0.0127109), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0116395, -0.0070247), if (LENGTH < 11.5, if (CONCEPTTYPE < 0.5, -0.0079596, -0.0017647), if (TERM_CASE_4 < 0.5, -0.0045248, 0.006299))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0113466, if (LENGTH < 7.5, if (CONCEPTTYPE < 0.5, -0.007685, 0.0026876), -0.0099917)), -0.0131104))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (EXTENDEDTYPE < 0.5, if (GOOD_SYNTAX < 0.5, -0.0228671, if (LENGTH < 24.5, if (POS_10 < 0.5, 0.0071132, -0.0090998), 0.0134041)), if (STOP_WORD_3 < 1.5, if (POS_19 < 0.5, if (POS_20 < 0.5, if (POS_18 < 0.5, 0.0063903, -0.0050451), -0.0238561), -0.0090155), -0.0075331)), 0.0127866), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0118255, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, -0.0042627, -0.0158184), -0.011879)), if (CHUNKTYPE < 0.5, -0.0052357, 0.0027169)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.0085676, -0.0166909), -0.0164321), -0.0128791))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0195794, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, 0.0066964, -0.0035265), -0.0038897), if (REGEXTYPE < 0.5, if (POS_11 < 0.5, -8.263E-4, -0.0221394), 0.0196917)), 0.0116728)), 0.0126207), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 21.5, -0.0091239, -0.0030069), if (LENGTH < 12.5, if (CONCEPTTYPE < 0.5, -0.0075137, if (STOP_WORD_3 < 0.5, 4.698E-4, -0.018769)), if (TERM_CASE_3 < 0.5, 0.0088638, -0.0050337))), if (ENTITYPLACETYPE < 0.5, -0.0120097, if (CONCEPTTYPE < 0.5, -0.0105747, if (LENGTH < 11.5, -0.0033156, -0.0107989))))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.024075, if (EXTENDEDTYPE < 0.5, if (LENGTH < 25.5, if (POS_10 < 0.5, if (POS_13 < 0.5, 0.0071824, -0.0298384), -0.0101318), 0.0140101), if (POS_19 < 0.5, if (STOP_WORD_3 < 1.5, if (POS_20 < 0.5, if (POS_18 < 0.5, 0.006749, -0.0042325), -0.0278094), -0.0064296), -0.0098671))), 0.012667), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0117698, -0.0065941), if (CHUNKTYPE < 0.5, -0.0045716, 0.0020334)), if (ENTITYPLACETYPE < 0.5, -0.0121529, if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0070855, 0.0020866), -0.0094693), -0.0111803)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0077284, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (STOP_WORD_1 < 0.5, if (LENGTH < 27.5, 0.0037692, 0.0092086), -4.152E-4), -0.0091993), -0.0074325), -0.0068167)), 0.011709), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 17.5, if (CHUNKTYPE < 0.5, -0.0101472, if (STOP_WORD_1 < 0.5, -0.0075701, 0.0192057)), -0.0038737), if (CONCEPTTYPE < 0.5, -0.0054107, if (LENGTH < 25.5, -0.0014838, 0.0156292))), if (ENTITYPLACETYPE < 0.5, -0.0118794, if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, -0.0090476, if (LENGTH < 8.5, 0.0021917, -0.0068113)), -0.0109229)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0227448, if (EXTENDEDTYPE < 0.5, if (POS_10 < 0.5, if (LENGTH < 24.5, 0.0060057, 0.0125722), -0.0101504), if (POS_19 < 0.5, if (POS_20 < 0.5, if (POS_11 < 0.5, 0.0032162, -0.0199493), -0.0217609), -0.006034))), if (STOP_WORD_3 < 0.5, 0.0132183, 0.0057335)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 11.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0102233, -0.0063653), 0.002999), if (TERM_CASE_4 < 0.5, -0.0063009, 1.69E-5)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0106072, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0069765, 0.0023083), -0.009208)), -0.0123613))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0173462, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, if (POS_10 < 0.5, if (POS_13 < 0.5, 0.0057288, -0.0292491), -0.0095924), -0.0243283), -0.0049234), -0.0051067), -0.0204223), 0.0104389)), 0.0118102), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 12.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_3 < 1.5, -0.0076967, -0.0207207), 0.0012527), if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0026614, 0.0062031), -0.0072729)), if (ENTITYPLACETYPE < 0.5, -0.0117006, if (CONCEPTTYPE < 0.5, -0.0099539, if (ORDER_IN_CLUSTER < 3.5, -0.0020873, -0.0096474))))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, 0.0097428, 0.0040314), if (REGEXTYPE < 0.5, if (POS_13 < 0.5, if (POS_11 < 0.5, 5.285E-4, -0.0183482), -0.0286601), 0.0195782)), 0.0113925), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0114242, if (POS_18 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (LENGTH < 9.5, -0.0111308, -0.0033015), 0.0042831), -0.0211832), -0.0097492), -0.0186218)), if (LENGTH < 11.5, if (CONCEPTTYPE < 0.5, -0.0065837, -4.198E-4), if (TERM_CASE_3 < 0.5, 0.0057224, -0.0039242))), if (ORDER_IN_CLUSTER < 3.5, -0.0087263, -0.0120898))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_4 < 0.5, 0.0078785, if (POS_18 < 0.5, if (POS_10 < 0.5, if (POS_19 < 0.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 1.5, 0.00325, -0.0101417), 0.0078278), -0.0054123), -0.0103609), -0.0125887)), -0.0079703), if (STOP_WORD_1 < 0.5, 0.012286, 0.0023518)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 20.5, -0.0085329, -0.0029867), if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, -0.0041063, -0.0192657), 0.0027515)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0099399, if (LENGTH < 7.5, -0.0024334, if (CHUNKTYPE < 0.5, -0.0104878, 0.0020285))), -0.0118966))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0184519, if (ENTITYPLACETYPE < 0.5, if (POS_19 < 0.5, if (POS_11 < 0.5, if (STOP_WORD_3 < 2.5, 0.0038235, 0.0171261), -0.0193481), -0.0053696), 0.0105468)), -0.0252286), if (STOP_WORD_1 < 0.5, 0.0124447, 0.0024796)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 11.5, if (ENTITYPLACETYPE < 0.5, -0.0095294, if (CONCEPTTYPE < 0.5, -0.0076215, -5.182E-4)), if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, 0.001379, if (LENGTH < 22.5, -0.0108893, 0.0020565)), if (ENTITYPLACETYPE < 0.5, -0.0103963, -0.0037001))), if (ENTITYPLACETYPE < 0.5, -0.010986, if (ORDER_IN_CLUSTER < 3.5, -0.006132, -0.0105592)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0047532, -0.0064666), 0.0102174), if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, -0.0024277, 0.0183349), 0.0070475), 0.0115305)), if (STOP_WORD_1 < 0.5, 0.0121889, 0.003729)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0105445, if (POS_10 < 0.5, -0.0053669, -0.0207266)), if (LENGTH < 24.5, -0.0040706, 0.007517)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0097368, if (CHUNKTYPE < 0.5, if (LENGTH < 8.5, if (CONCEPTTYPE < 0.5, -0.0074429, 0.00157), -0.0102013), 0.0045523)), -0.0115658))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0199225, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0045463, -0.0067636), 0.010348), if (REGEXTYPE < 0.5, if (POS_19 < 0.5, -2.734E-4, -0.0166088), 0.0187737)), 0.0105994)), if (STOP_WORD_1 < 0.5, 0.0121884, 0.0016164)), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 9.5, -0.0100509, if (TERM_CASE_3 < 0.5, -0.004709, -0.0101587)), -0.0144494), -0.0128056), if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 17.5, if (CHUNKTYPE < 0.5, -0.0071529, 3.019E-4), 0.0029028), -9.705E-4), -0.0101829))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0133766, 0.0064295), if (POS_11 < 0.5, if (POS_13 < 0.5, if (EXTENDEDTYPE < 0.5, if (POS_10 < 0.5, 0.0064927, -0.0067917), -4.751E-4), -0.0279461), -0.0186938)), -0.0068121), if (STOP_WORD_3 < 0.5, 0.0122787, 0.0050219)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0092979, if (TERM_CASE_3 < 0.5, -0.0018497, -0.0102131)), if (LENGTH < 18.5, if (CONCEPTTYPE < 0.5, -0.0055809, -9.664E-4), 0.0024321)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.0069548, -0.0150524), -0.0148514), -0.0111524))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0199523, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, 0.0055288, -0.0027995), if (STOP_WORD_3 < 2.5, -9.661E-4, 0.0140189)), 0.0100237)), if (STOP_WORD_3 < 0.5, 0.0120304, 0.0049867)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 7.5, -0.0084903, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0055562, if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (LENGTH < 12.5, -0.0033318, 0.0045948), -0.0053979), 0.0026091)), -0.0213714)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0095493, if (LENGTH < 7.5, -0.0014674, if (CHUNKTYPE < 0.5, -0.0095668, 0.0028625))), -0.0111755))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0196303, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, 0.0030396, 0.0103852), if (POS_11 < 0.5, 8.51E-4, -0.0168136)), 0.0099631)), if (STOP_WORD_1 < 0.5, 0.0115791, 0.0028478)), if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 8.5, if (ENTITYPLACETYPE < 0.5, -0.0090833, -0.0043938), if (TERM_CASE_3 < 0.5, if (LENGTH < 12.5, -0.0039915, 2.299E-4), if (ENTITYPLACETYPE < 0.5, -0.0095076, -0.0038278))), -0.0123815), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.009674, if (CONCEPTTYPE < 0.5, -0.0079674, if (LENGTH < 8.5, 0.0032913, -0.0061521))), -0.0112111))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, 0.003502, -0.004849), 0.0095946), 0.0096953), if (POS_11 < 0.5, if (POS_13 < 0.5, 8.188E-4, -0.0274095), -0.0183945)), if (STOP_WORD_1 < 0.5, 0.0112363, 0.0011995)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 26.5, if (ENTITYPLACETYPE < 0.5, -0.006842, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_1 < 0.5, -0.0037862, 0.0072035), -0.0185828), 0.0016982)), 0.0042357), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.008899, if (CONCEPTTYPE < 0.5, -0.0079921, if (LENGTH < 8.5, 0.0034053, -0.0055219))), -0.010813))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, 0.0044217, -0.0254605), -0.0059356), 0.0092415), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, 5.27E-4, -0.0127815), -0.0189175), -0.0079803), 0.0194397), 0.0077178)), if (STOP_WORD_3 < 0.5, 0.0113611, 0.0030626)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 17.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 7.5, if (ENTITYPLACETYPE < 0.5, -0.0099975, -0.0047484), -0.0039338), -0.0129044), -0.0011493), if (ENTITYPLACETYPE < 0.5, -0.0100912, if (CONCEPTTYPE < 0.5, -0.0088244, -0.0051965)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0172671, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0038607, -0.0070186), 0.0092221), if (POS_11 < 0.5, if (STOP_WORD_3 < 2.5, if (POS_10 < 0.5, -3.41E-5, -0.0125225), 0.0120708), -0.0207583)), 0.0092476)), if (STOP_WORD_3 < 0.5, 0.0114261, 0.0037361)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, -0.0089222, -0.0049235), -0.0035604), -0.0144894), -4.387E-4), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, -0.0069498, -0.0138425), -0.0146887), -0.0110439))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.017575, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, 0.0046367, -0.0028153), if (REGEXTYPE < 0.5, if (CHUNKTYPE < 0.5, if (LENGTH < 16.5, -0.0063287, if (STOP_WORD_3 < 1.5, 0.0019162, -0.0049296)), 0.0088493), 0.0153082)), 0.0093202)), if (STOP_WORD_3 < 0.5, 0.0111777, 0.0040758)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (LENGTH < 26.5, if (ENTITYPLACETYPE < 0.5, -0.007783, -0.0044667), 0.0032427), if (LENGTH < 11.5, 0.0058521, -0.0021999)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.0069747, -0.0142179), -0.0139126), -0.0104687))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0043383, -0.0045935), 0.0101261), if (POS_19 < 0.5, if (STOP_WORD_3 < 2.5, if (STOP_WORD_3 < 1.5, if (CHUNKTYPE < 0.5, if (LENGTH < 20.5, -0.0027714, 0.0041583), 0.0115687), -0.0077298), 0.0130931), -0.020191)), if (STOP_WORD_1 < 0.5, 0.0105555, 4.583E-4)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 21.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0102799, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, -0.0040687, -0.0127478), -0.0107389)), -0.0037055), 6.93E-4), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, -0.0074529, -0.0140807), -0.0105013))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, 0.0105702, 0.0034017), if (GOOD_SYNTAX < 0.5, -0.0190164, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (LENGTH < 25.5, 0.0016483, 0.0062856), -0.0067828), if (LENGTH < 13.5, 0.0131435, -0.0107525)), -0.0266499), 0.0084024))), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_1 < 0.5, if (LENGTH < 6.5, -0.009195, if (POS_1 < 0.5, -0.0039017, -0.0152131)), 0.0045369), -0.0128674), -9.539E-4), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0085395, if (CONCEPTTYPE < 0.5, -0.007369, -0.002642)), -0.0102687))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0099611, 0.0030696), if (LENGTH < 25.5, 0.0022581, 0.0075257)), if (STOP_WORD_3 < 1.5, -0.0183479, 0.0086277)), -0.0054524), -0.0061213), -0.0057235), 0.0095436), if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 7.5, if (TERM_CASE_1 < 0.5, -0.0072819, 0.0037503), if (ENTITYPLACETYPE < 0.5, -0.0040748, if (TERM_CASE_3 < 0.5, if (LENGTH < 11.5, -0.0025632, 0.0058961), -0.0031178))), -0.0117283), if (ENTITYPLACETYPE < 0.5, -0.0097562, if (CONCEPTTYPE < 0.5, -0.0083735, -0.0045189)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, 0.0031857, 0.0086866), if (LENGTH < 24.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_3 < 2.5, -0.0038614, 0.0175429), 0.0108061), if (EXTENDEDTYPE < 0.5, 0.0137676, 3.506E-4))), if (STOP_WORD_3 < 0.5, 0.0108081, 0.0040597)), if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 10.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_1 < 0.5, -0.0086597, 0.0060508), if (CONCEPTTYPE < 0.5, -0.0061168, 0.0017479)), if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0016474, 0.0050863), -0.0050703)), -0.0129486), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, -0.006801, -0.0136904), -0.0103789))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0154696, if (ENTITYPLACETYPE < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (STOP_WORD_3 < 0.5, 0.004912, if (STOP_WORD_3 < 2.5, if (POS_10 < 0.5, if (STOP_WORD_2 < 0.5, 0.0023838, -0.0065138), -0.0112473), 0.0126552)), -0.0049311), if (LENGTH < 15.5, 0.0066143, -0.0100076)), 0.0083943)), if (STOP_WORD_3 < 0.5, 0.0107557, 0.003532)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 24.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, if (TERM_CASE_4 < 0.5, -0.00415, -0.0120295), -0.0054989), -0.0026859), 0.0025229), if (ENTITYPLACETYPE < 0.5, -0.0095499, if (CONCEPTTYPE < 0.5, -0.0083022, -0.0036627)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0038506, -0.0044521), 0.0082738), if (ENTITYPLACETYPE < 0.5, -0.001014, 0.0072563)), 0.0096636), if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0102008, if (POS_10 < 0.5, if (ORDER_IN_CLUSTER < 2.5, -0.0047438, -0.0080274), -0.0166333)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (CONCEPTTYPE < 0.5, -0.0050233, if (TERM_CASE_3 < 0.5, 0.0016495, -0.0057258)), -0.0161277), -0.0086894)), if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 2.5, if (TERM_CASE_4 < 0.5, -0.0065386, 0.0025704), -0.0096212), 0.0029606), 0.0107795))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0062343, if (POS_18 < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 0.5, 0.0027147, -0.0024574), 0.0058186), -0.0058367), -0.0094021)), if (STOP_WORD_1 < 0.5, 0.0101689, 0.0017153)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_1 < 0.5, if (LENGTH < 6.5, if (LENGTH < 5.5, -0.005605, -0.0123399), -0.0041545), 0.0029762), -0.0117731), if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, -0.0073463, if (CONCEPTTYPE < 0.5, -0.0016419, 0.0165596)), 0.002869)), if (ENTITYPLACETYPE < 0.5, -0.0091448, if (CONCEPTTYPE < 0.5, -0.0079953, -0.0035768)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0047094, if (POS_11 < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (STOP_WORD_3 < 1.5, if (LENGTH < 20.5, if (CHUNKTYPE < 0.5, -0.0025457, 0.0097957), 0.0053202), if (TERM_CASE_4 < 0.5, 0.0085641, -0.0065398)), -0.0122317), 0.0164329), -0.0173095)), 0.0091212), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0094751, if (POS_18 < 0.5, -0.0050056, -0.0167212)), -0.0030324), if (CONCEPTTYPE < 0.5, -0.00178, if (TERM_CASE_3 < 0.5, 0.0193394, 0.0029243))), if (ENTITYPLACETYPE < 0.5, -0.0091874, if (CONCEPTTYPE < 0.5, -0.0072158, -0.0030999)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (TERM_CASE_3 < 0.5, 0.0027316, 0.008725), 0.0088773), if (REGEXTYPE < 0.5, if (STOP_WORD_3 < 1.5, if (CONCEPTTYPE < 0.5, 0.0032587, -0.0036558), -0.0051207), 0.0179723)), if (STOP_WORD_1 < 0.5, 0.0096336, 7.218E-4)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 12.5, if (STOP_WORD_3 < 1.5, if (CHUNKTYPE < 0.5, -0.0055952, 0.0012386), -0.0179516), if (TERM_CASE_4 < 0.5, -0.0038354, if (ENTITYPLACETYPE < 0.5, -0.0010097, if (STOP_WORD_3 < 0.5, 0.0134052, -6.04E-5)))), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.0054568, -0.0130658), -0.013045), -0.0096278))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (EXTENDEDTYPE < 0.5, if (GOOD_SYNTAX < 0.5, -0.0227151, if (LENGTH < 25.5, if (POS_10 < 0.5, 0.0042345, -0.0100974), 0.0118472)), if (POS_19 < 0.5, if (STOP_WORD_3 < 1.5, if (POS_18 < 0.5, if (POS_20 < 0.5, 0.0043759, -0.0210411), -0.0071613), -0.0055258), -0.0088723)), if (STOP_WORD_3 < 0.5, 0.0101478, 0.003266)), if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0087731, -0.0037382), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0053575, -7.922E-4), 5.438E-4)), -0.0108268), if (ENTITYPLACETYPE < 0.5, -0.008961, if (CONCEPTTYPE < 0.5, -0.0076917, -0.003026)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, 0.0069296, 0.0031202), if (REGEXTYPE < 0.5, if (CHUNKTYPE < 0.5, if (LENGTH < 14.5, -0.0064692, if (STOP_WORD_3 < 1.5, 0.0019488, if (TERM_CASE_4 < 0.5, 0.0085374, -0.0067782))), 0.00637), 0.0180096)), 0.0088053), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 21.5, -0.0061255, -5.129E-4), if (LENGTH < 11.5, if (CONCEPTTYPE < 0.5, -0.0048713, 8.81E-5), if (TERM_CASE_3 < 0.5, 0.005674, -0.0027696))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0078337, if (LENGTH < 10.5, if (CONCEPTTYPE < 0.5, -0.0050692, 0.0022481), -0.0075866)), -0.0096561))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, 0.0048174, -0.0065418), -0.0039589), -0.0069102), -0.0044218), -0.0249516), if (STOP_WORD_1 < 0.5, 0.0098977, 0.0017658)), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (LENGTH < 25.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (TERM_CASE_2 < 0.5, if (STOP_WORD_3 < 0.5, -0.0086001, -0.0023197), -0.0045721), -0.0133756), if (CONCEPTTYPE < 0.5, -0.0057091, -0.0021427)), -0.012263), 0.0037549), if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0039936, 0.0016112), 0.0156535)), -0.0095359)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0179962, 0.0047526), if (POS_11 < 0.5, if (STOP_WORD_3 < 2.5, if (ENTITYPLACETYPE < 0.5, -0.0012421, 0.0066496), 0.0111163), -0.0183546)), if (STOP_WORD_1 < 0.5, 0.0096607, 0.0026903)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 13.5, -0.0069672, if (TERM_CASE_3 < 0.5, -3.931E-4, -0.0085633)), if (TERM_CASE_1 < 0.5, -0.0030407, if (LENGTH < 2.5, -0.0186193, 0.0154944))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0075719, if (CHUNKTYPE < 0.5, if (LENGTH < 8.5, if (CONCEPTTYPE < 0.5, -0.0053055, 0.0020692), -0.0077103), 0.0036415)), -0.0091236))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0161771, if (ENTITYPLACETYPE < 0.5, if (POS_19 < 0.5, if (POS_13 < 0.5, if (POS_18 < 0.5, if (POS_11 < 0.5, if (POS_10 < 0.5, 0.0034826, -0.007235), -0.0148214), -0.0046901), -0.0259102), if (LENGTH < 12.5, 0.0222894, -0.0085975)), 0.0080958)), -0.0232361), if (STOP_WORD_3 < 0.5, 0.009658, 0.0023804)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0081212, -0.0041938), if (CHUNKTYPE < 0.5, -0.0026353, 0.0030653)), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, -0.0054007, -0.0117554), -0.0125025), -0.0091279))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, 0.0020329, 0.0065302), -0.0049206), 0.0082494), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0079899, if (POS_18 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, if (POS_1 < 0.5, -9.112E-4, -0.0175155), -0.018083), -0.0075432), -0.0158243)), if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (LENGTH < 12.5, if (STOP_WORD_1 < 0.5, -0.0017277, -0.0189726), if (STOP_WORD_3 < 0.5, 0.0125924, -0.0011788)), -0.0053552), 0.004024)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0077747, if (LENGTH < 7.5, -0.0016187, -0.0067493)), -0.0091496))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0039294, -0.0030351), 0.0077582), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, 0.002138, -0.0103903), -0.0100254), -0.0051292)), 0.0085061), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 21.5, if (STOP_WORD_3 < 1.5, if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, -0.0077625, -0.0039689), if (TERM_CASE_1 < 0.5, if (LENGTH < 6.5, if (LENGTH < 5.5, -0.0033501, -0.0113932), -0.002125), 0.008145)), 0.0011904), -0.0137035), 6.225E-4), if (ENTITYPLACETYPE < 0.5, -0.0083126, if (CONCEPTTYPE < 0.5, -0.0073919, -0.0037914)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0142326, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (POS_20 < 0.5, 0.0036802, -0.018494), -0.0147649), 0.0078761), -0.0071625)), -0.0061183), -0.0051327), if (STOP_WORD_3 < 0.5, 0.0095996, 0.0031133)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0073096, -0.003005), if (TERM_CASE_1 < 0.5, -0.0027642, if (CONCEPTTYPE < 0.5, -7.958E-4, 0.029568))), 8.063E-4), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0071546, if (CONCEPTTYPE < 0.5, -0.0064773, -0.0012095)), -0.0086887))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0138905, if (EXTENDEDTYPE < 0.5, if (LENGTH < 25.5, 0.0030357, 0.0095136), if (POS_19 < 0.5, if (POS_20 < 0.5, 0.0016078, -0.0186607), -0.0064311))), if (STOP_WORD_1 < 0.5, 0.0090583, 0.0010345)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, if (LENGTH < 8.5, -0.0095116, if (TERM_CASE_3 < 0.5, -0.0026424, -0.0079867)), -0.002998), if (POS_10 < 0.5, if (ORDER_IN_CLUSTER < 2.5, 0.0094607, -0.0032605), -0.0164541)), -0.0102694), -0.0124184), if (CONCEPTTYPE < 0.5, -0.0039713, -9.36E-5)), -0.0088542)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0054645, if (POS_18 < 0.5, if (POS_19 < 0.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, 0.0015531, -0.0146532), if (EXTENDEDTYPE < 0.5, 0.0022157, -0.014745)), 0.0057213), if (LENGTH < 13.5, 0.0115641, -0.0118718)), -0.0097261)), if (STOP_WORD_1 < 0.5, 0.0089889, -0.0019033)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0083219, if (TERM_CASE_4 < 0.5, -0.0049572, -6.174E-4)), -0.0014242), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 14.5, -0.004242, -0.0079644), -0.011519), -0.0120036), -0.0086982))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, 0.0074906, if (POS_18 < 0.5, if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, if (POS_19 < 0.5, if (LENGTH < 21.5, 0.0020388, 0.0062093), -0.0047986), -0.0173869), -0.0034556), -0.0104258)), if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 11.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, -0.007155, if (CONCEPTTYPE < 0.5, -0.0049511, 0.001242)), 0.0069858), if (TERM_CASE_3 < 0.5, 0.0010511, if (ENTITYPLACETYPE < 0.5, -0.0077005, -0.0017536))), -0.0110892), if (ENTITYPLACETYPE < 0.5, -0.0079187, if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 10.5, if (CHUNKTYPE < 0.5, -0.0020188, 0.0204519), -0.0076923), -0.0073604)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, 0.0029762, -0.006914), if (POS_17 < 0.5, -0.0083768, 0.008933)), -0.0043244), 0.0066431), -0.0053725), if (STOP_WORD_1 < 0.5, 0.0088945, if (LENGTH < 13.5, 0.0104462, -0.0058015))), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 25.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0074769, if (STOP_WORD_3 < 1.5, -0.0035436, -0.0121068)), if (CHUNKTYPE < 0.5, -0.0027859, 0.0026616)), 0.0037022), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.0046449, -0.0118423), -0.011505), -0.0086359))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0074625, 0.0020221), if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, 0.002583, -0.021274), if (STOP_WORD_1 < 0.5, -0.0102947, 0.0010559))), -0.0230981), 0.0079978), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_1 < 0.5, if (LENGTH < 12.5, -0.0053582, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.001733, 0.0057119), -0.004411)), 0.0087413), -0.011294), 4.195E-4), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.006941, if (CONCEPTTYPE < 0.5, -0.0058254, if (LENGTH < 9.5, 0.00278, -0.0057821))), -0.0086608))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0074243, 0.0015777), if (POS_18 < 0.5, if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, 0.0026429, -0.0142038), if (STOP_WORD_1 < 0.5, -0.0102012, 0.001646)), -0.0075344)), if (STOP_WORD_1 < 0.5, 0.0083933, 1.793E-4)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 11.5, -0.0047917, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0011668, 0.0046828), -0.005016)), -0.0105396), if (CONCEPTTYPE < 0.5, -0.0014444, 0.0048186)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0068452, if (LENGTH < 10.5, -0.0012332, -0.0074714)), -0.0083915))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, 0.0027744, 0.0078097), if (REGEXTYPE < 0.5, if (STOP_WORD_2 < 0.5, -7.1E-6, if (LENGTH < 22.5, -0.0014678, -0.0144231)), 0.0167356)), if (STOP_WORD_1 < 0.5, 0.0085575, 7.479E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, if (TERM_CASE_2 < 0.5, -0.0096899, -0.0055229), if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (STOP_WORD_3 < 0.5, -0.004084, -6.323E-4), -0.0120285), -0.0097655)), if (LENGTH < 25.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 17.5, -0.0044169, 0.0034553), if (LENGTH < 9.5, 0.002177, -0.0037432)), 0.0084802)), -0.0082518)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.018082, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, if (STOP_WORD_3 < 0.5, 0.0031176, if (REGEXTYPE < 0.5, -0.0012278, 0.0168942)), -0.0207908), 0.0068542)), if (STOP_WORD_1 < 0.5, 0.0088343, if (LENGTH < 17.5, 0.0075565, -0.0119716))), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 12.5, if (ENTITYPLACETYPE < 0.5, -0.0059198, if (STOP_WORD_1 < 0.5, -0.0018112, -0.0145256)), if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, 4.66E-5, if (STOP_WORD_3 < 0.5, 0.0125508, -7.222E-4)), -0.0040445)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0067296, if (CONCEPTTYPE < 0.5, -0.0059403, -3.154E-4)), -0.0082287))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0130698, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0029368, if (REGEXTYPE < 0.5, -0.0010747, 0.0131924)), 0.0066645)), if (LENGTH < 17.5, 0.0095125, if (STOP_WORD_3 < 0.5, 0.0072248, -0.0043409))), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0069589, if (POS_18 < 0.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_3 < 0.5, -0.0021523, -0.0066839), if (TERM_CASE_2 < 0.5, 0.0130775, -0.00491)), -0.0165904), -0.0149927)), -0.0016661), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, -0.0045425, -0.0106861), -0.0117066), -0.0080814))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0149992, if (EXTENDEDTYPE < 0.5, if (LENGTH < 25.5, 0.0021723, 0.0094263), if (POS_19 < 0.5, if (POS_20 < 0.5, if (STOP_WORD_3 < 1.5, 0.0024393, if (STOP_WORD_1 < 0.5, -0.0093972, 0.0027647)), -0.0196084), -0.0062675))), 0.0079505), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 24.5, if (STOP_WORD_3 < 1.5, if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, -0.0069596, -0.0027919), -0.0021029), if (TERM_CASE_4 < 0.5, -0.002159, 0.0099606)), -0.0125355), 0.0025012), if (ENTITYPLACETYPE < 0.5, -0.0074841, if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 7.5, -5.58E-5, -0.005147), -0.0066608)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0167351, if (POS_20 < 0.5, if (POS_13 < 0.5, if (POS_11 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, 0.0040521, -0.0028967), -0.0025444), -0.0156449), -0.0252961), -0.0167637)), if (STOP_WORD_1 < 0.5, 0.0082947, 0.0012067)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, if (EXTENDEDTYPE < 0.5, -0.0055777, 0.0013311), if (TERM_CASE_4 < 0.5, -0.0049957, 0.0023179)), if (LENGTH < 12.5, -0.0021983, if (TERM_CASE_4 < 0.5, 1.283E-4, if (STOP_WORD_3 < 0.5, 0.0135179, 0.0010582)))), if (ENTITYPLACETYPE < 0.5, -0.0075836, if (CONCEPTTYPE < 0.5, -0.0068125, -0.0029257)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 21.5, 0.009203, 0.0049422), 0.0012454), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, if (POS_19 < 0.5, if (LENGTH < 21.5, -8.88E-5, 0.0034595), if (LENGTH < 13.5, 0.013485, -0.0097365)), -0.0162894), -0.0053687), 0.0134028), 0.0057942)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 22.5, -0.0050367, 4.752E-4), if (CONCEPTTYPE < 0.5, -0.0027557, if (TERM_CASE_1 < 0.5, -1.648E-4, 0.0231566))), if (ENTITYPLACETYPE < 0.5, -0.0073333, if (CONCEPTTYPE < 0.5, -0.0065181, if (LENGTH < 11.5, 5.864E-4, -0.0053126))))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, 0.0030356, if (EXTENDEDTYPE < 0.5, if (CONCEPTTYPE < 0.5, 0.0067235, if (LENGTH < 24.5, -0.0023484, 0.0128696)), -0.0023746)), if (STOP_WORD_3 < 0.5, 0.0084655, 0.0019471)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, if (TERM_CASE_2 < 0.5, -0.0089123, -0.0044822), if (POS_18 < 0.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_3 < 0.5, if (LENGTH < 11.5, -0.0057812, -0.0013194), -0.0070555), if (TERM_CASE_2 < 0.5, 0.0065103, -0.0043464)), -0.0150195), -0.0130033)), if (CONCEPTTYPE < 0.5, -0.0037904, if (LENGTH < 9.5, 0.0036481, -0.0019782))), -0.0079288)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (POS_13 < 0.5, if (POS_20 < 0.5, if (POS_11 < 0.5, if (GOOD_SYNTAX < 0.5, -0.010997, 0.0028358), -0.0130965), -0.0178932), -0.0247088), -0.0050676), if (STOP_WORD_3 < 0.5, 0.0082516, 0.0016223)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, if (POS_7 < 0.5, -0.0010904, -0.0114916), if (LENGTH < 7.5, -0.009007, -0.0035801)), -0.0012884), -0.0053262), if (CONCEPTTYPE < 0.5, -0.0024507, if (LENGTH < 16.5, 0.0090724, -0.0037266))), -0.0103176), -0.0095315), -0.0079757)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, 0.0024261, 0.0064191), -7.473E-4), if (LENGTH < 14.5, 0.0096266, if (STOP_WORD_3 < 0.5, 0.0069862, -0.0024304))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0073758, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.004182, if (CHUNKTYPE < 0.5, if (POS_10 < 0.5, 6.003E-4, -0.0154268), 0.0192921)), -0.0093216)), if (LENGTH < 25.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (CONCEPTTYPE < 0.5, -0.0037909, if (LENGTH < 9.5, 0.0023692, -0.0038749)), -0.0123673), if (LENGTH < 14.5, 0.0065791, -0.0031951)), 0.0097134)), -0.0075453)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_11 < 0.5, if (POS_19 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0127214, if (POS_18 < 0.5, if (POS_10 < 0.5, if (POS_20 < 0.5, 0.0042794, -0.0149223), -0.0059644), -0.00434)), if (POS_17 < 0.5, -0.00853, 0.0083197)), -0.0154023), 0.0072666), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (LENGTH < 26.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0073252, -0.0046927), if (TERM_CASE_3 < 0.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 12.5, -0.0042392, 0.00568), if (STOP_WORD_1 < 0.5, 0.0016813, -0.0151251)), -0.0054961)), 0.0036687), if (LENGTH < 10.5, 0.0095857, -0.0017664)), -0.0075218)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0040851, if (POS_18 < 0.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 0.5, 5.489E-4, -0.0032057), if (EXTENDEDTYPE < 0.5, 0.0097395, 0.00173)), -0.0095617)), 0.0071657), if (ORDER_IN_CLUSTER < 2.5, if (POS_10 < 0.5, if (LENGTH < 7.5, if (LENGTH < 5.5, -0.002139, -0.0067802), if (TERM_CASE_3 < 0.5, if (LENGTH < 12.5, -0.0029803, if (ENTITYPLACETYPE < 0.5, 1.964E-4, if (LENGTH < 15.5, 0.0169786, 0.0010571))), -0.0036293)), -0.0158293), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_3 < 0.5, -0.0041655, -0.0076776), 4.427E-4), -0.0101598), -0.0074614))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0030204, if (STOP_WORD_3 < 2.5, if (CHUNKTYPE < 0.5, -0.0020253, 0.0061075), 0.0099431)), if (CHUNKTYPE < 0.5, 0.0052019, 0.0090719)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (LENGTH < 12.5, if (STOP_WORD_3 < 1.5, -0.0038004, -0.0136729), if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -5.417E-4, 0.0076114), -0.0052943)), if (CONCEPTTYPE < 0.5, -8.815E-4, if (LENGTH < 15.5, 0.0130198, -3.208E-4))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0058622, if (LENGTH < 9.5, if (LENGTH < 4.5, -0.0084541, if (CONCEPTTYPE < 0.5, -0.0014463, 0.0069624)), -0.005469)), -0.0077466))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0073904, 8.639E-4), if (POS_18 < 0.5, if (LENGTH < 21.5, if (LENGTH < 16.5, 0.0015875, -0.0035187), 0.0028162), -0.0077068)), if (STOP_WORD_1 < 0.5, 0.0079288, 3.816E-4)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0051656, -8.779E-4), if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (CONCEPTTYPE < 0.5, -0.0024381, if (TERM_CASE_1 < 0.5, 6.55E-5, 0.0229835)), -0.01508), 0.0037615)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (STOP_WORD_1 < 0.5, -0.0044291, 1.517E-4), -0.0106843), -0.0103762), -0.007465))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 19.5, 0.0078365, 0.0033459), if (GOOD_SYNTAX < 0.5, -0.0166694, if (ENTITYPLACETYPE < 0.5, if (POS_18 < 0.5, if (POS_13 < 0.5, if (LENGTH < 12.5, 0.0044238, if (LENGTH < 25.5, -8.241E-4, 0.0029538)), -0.024156), -0.0088242), 0.0064533))), if (ORDER_IN_CLUSTER < 2.5, if (POS_10 < 0.5, if (LENGTH < 7.5, if (LENGTH < 3.5, if (LENGTH < 2.5, -0.0099841, 0.0077372), -0.0051692), if (TERM_CASE_4 < 0.5, -0.0026828, -7.36E-5)), -0.0169365), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, -0.0059379, -0.0085801), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0057262, -0.0027351), 0.0057282)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (POS_11 < 0.5, if (GOOD_SYNTAX < 0.5, -0.012704, if (POS_20 < 0.5, if (POS_10 < 0.5, if (CONCEPTTYPE < 0.5, 0.001787, 0.0048038), -0.0084645), -0.0167773)), -0.0154151), if (LENGTH < 16.5, 0.0038769, -0.0108723)), if (STOP_WORD_3 < 0.5, 0.0076338, 7.876E-4)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 8.5, if (ENTITYPLACETYPE < 0.5, -0.0062637, -0.002461), if (TERM_CASE_4 < 0.5, -0.0028453, if (STOP_WORD_3 < 1.5, if (STOP_WORD_1 < 0.5, 6.745E-4, 0.011796), -0.0077363))), if (ENTITYPLACETYPE < 0.5, -0.0069103, if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0061307, -0.0028226), 0.004202)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0065966, 7.641E-4), if (POS_18 < 0.5, if (POS_19 < 0.5, if (LENGTH < 21.5, -3.54E-5, 0.0030288), -0.0058081), -0.0089103)), if (STOP_WORD_1 < 0.5, 0.0075961, -0.0015237)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 21.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 7.5, if (LENGTH < 5.5, -0.0014982, -0.0064615), if (TERM_CASE_3 < 0.5, if (LENGTH < 11.5, -0.0023943, 0.0017694), if (LENGTH < 14.5, -0.0011234, -0.0061581))), -0.0092837), 0.0021767), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, -0.0034896, -0.0093722), -0.0108028), -0.0071782))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 20.5, 0.0093178, 0.0024853), 0.0023846), if (REGEXTYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0016443, 0.0073395), 0.015653)), if (STOP_WORD_1 < 0.5, 0.0074044, -0.0028423)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_1 < 0.5, if (LENGTH < 12.5, -0.0038378, if (TERM_CASE_3 < 0.5, 0.0011006, -0.0035059)), if (CONCEPTTYPE < 0.5, -1.936E-4, 0.0206681)), -0.0087056), if (TERM_CASE_4 < 0.5, -0.0012193, 0.0046032)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.005725, if (CONCEPTTYPE < 0.5, -0.0046372, 2.139E-4)), -0.007143))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 17.5, 0.0079157, if (STOP_WORD_3 < 0.5, 0.0048937, -7.541E-4)), if (POS_18 < 0.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, 0.0012577, -0.0125994), -0.0050607), if (POS_19 < 0.5, 0.0056225, -0.012138)), -0.0083839)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 7.5, if (LENGTH < 5.5, -0.0027176, -0.0065451), if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_1 < 0.5, -6.922E-4, -0.0115436), -0.0110793), -0.0154559)), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.004107, 7.045E-4), -0.0096248), -0.0103169), -0.0071264))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (STOP_WORD_3 < 0.5, 0.0030903, if (POS_11 < 0.5, if (LENGTH < 17.5, -0.0024268, if (EXTENDEDTYPE < 0.5, 0.0062833, 2.239E-4)), -0.0151462)), -0.0228616), if (STOP_WORD_1 < 0.5, 0.0076768, if (LENGTH < 15.5, 0.0080515, -0.0094696))), if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0074728, if (ORDER_IN_CLUSTER < 3.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_1 < 0.5, -0.0035588, -0.0110619), if (CHUNKTYPE < 0.5, 4.08E-4, 0.0166155)), -0.0135994), -0.0077268)), if (CHUNKTYPE < 0.5, if (ORDER_IN_CLUSTER < 2.5, -0.0012327, -0.0043065), if (LENGTH < 11.5, 0.0112089, 2.74E-4)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, if (POS_18 < 0.5, if (STOP_WORD_2 < 0.5, 0.0028562, -0.0028131), -0.0038413), -0.0176516), -0.0047792), 0.0066566), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, -0.005756, -0.0017927), 0.0054392), if (CONCEPTTYPE < 0.5, -0.0023315, if (TERM_CASE_1 < 0.5, 6.15E-5, 0.0245159))), -0.0107661), 0.0029584), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0055612, if (LENGTH < 7.5, if (LENGTH < 4.5, -0.0053042, 0.0035985), if (CHUNKTYPE < 0.5, -0.006498, 0.0055994))), -0.0069048))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, 0.0048521, 0.0016563), if (REGEXTYPE < 0.5, if (CHUNKTYPE < 0.5, if (LENGTH < 16.5, -0.0050001, -8.071E-4), 0.0052943), 0.0146182)), if (STOP_WORD_1 < 0.5, 0.0072708, -0.0014882)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 8.5, if (TERM_CASE_4 < 0.5, -0.0021294, -0.0078412), if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, -0.0011999, -0.0151139), -0.0051213)), -0.008518), -0.0103242), if (CONCEPTTYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0035994, 0.0012388), if (STOP_WORD_3 < 1.5, 8.365E-4, -0.014637))), -0.0069901)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0181604, if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0017905, -0.0051128), 0.0056444), -0.0015534), 0.0180978), 0.0056899)), 0.0068409), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 12.5, if (ENTITYPLACETYPE < 0.5, -0.0050839, if (STOP_WORD_1 < 0.5, if (CONCEPTTYPE < 0.5, -0.0023704, 0.0013993), -0.0104769)), if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -2.8E-6, 0.004893), -0.0030825)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0055769, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0030359, 0.0039209), -0.0053592)), -0.0070033))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0042003, if (POS_18 < 0.5, 0.0011643, -0.008305)), if (STOP_WORD_1 < 0.5, 0.0071109, -8.943E-4)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 7.5, if (ENTITYPLACETYPE < 0.5, -0.0064374, -0.0022824), if (TERM_CASE_4 < 0.5, -0.0023902, if (LENGTH < 12.5, -0.0015923, if (LENGTH < 15.5, if (ENTITYPLACETYPE < 0.5, 0.0025889, 0.0166472), 8.685E-4)))), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 9.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, -0.0015311, -0.0069653), -1.135E-4), if (LENGTH < 25.5, -0.0052679, 0.0054241)), -0.0090196), -0.0098286), -0.0067572))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0147789, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, 0.0024208, -0.0175133), -0.0047134), if (EXTENDEDTYPE < 0.5, 0.0183169, -0.0075819)), -0.0160729), 0.0057837)), if (STOP_WORD_3 < 0.5, 0.0076398, if (LENGTH < 14.5, 0.0077263, -0.0059873))), if (ORDER_IN_CLUSTER < 2.5, if (POS_10 < 0.5, if (LENGTH < 25.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0056282, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, 4.266E-4, -0.0118752), -0.0073642)), -6.525E-4), 0.0032924), -0.0153252), if (ORDER_IN_CLUSTER < 3.5, -0.0045245, -0.006825))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 17.5, 0.0086494, 0.0046036), 4.573E-4), if (POS_18 < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 23.5, 6.8E-4, 0.004053), -0.0043166), -0.0099531)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, -0.0055884, -0.0018455), if (STOP_WORD_3 < 1.5, 0.005074, -0.0084121)), if (LENGTH < 17.5, if (LENGTH < 5.5, 0.002617, -0.0017765), 0.0040191)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (EXTENDEDTYPE < 0.5, -0.0055967, 0.0035883), if (LENGTH < 10.5, if (CHUNKTYPE < 0.5, -0.0018811, 0.0220776), -0.0055489)), -0.0067699))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0022766, if (STOP_WORD_3 < 2.5, if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (CHUNKTYPE < 0.5, -2.481E-4, 0.0085536), -0.0109575), -0.0063742), 0.0090221)), if (STOP_WORD_1 < 0.5, 0.0068809, -0.0013795)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0060405, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 12.5, -0.003122, 9.681E-4), -0.0081619), -0.0056294)), if (LENGTH < 5.5, 0.0033079, if (LENGTH < 6.5, -0.0088247, if (CHUNKTYPE < 0.5, -0.0011805, 0.0029874)))), if (ENTITYPLACETYPE < 0.5, -0.0060608, if (ORDER_IN_CLUSTER < 3.5, -0.0026822, -0.0055386)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, if (STOP_WORD_3 < 1.5, 0.0022037, if (STOP_WORD_3 < 2.5, -0.0056497, 0.0110546)), -0.0189511), -0.0058411), -0.0044991), 0.0062943), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (TERM_CASE_2 < 0.5, if (STOP_WORD_1 < 0.5, if (LENGTH < 8.5, -0.0074056, if (TERM_CASE_3 < 0.5, -0.0016832, -0.0071036)), if (CHUNKTYPE < 0.5, -0.0012072, 0.0130635)), -0.0015317), -0.0094688), if (LENGTH < 25.5, if (TERM_CASE_3 < 0.5, -0.0010929, if (CHUNKTYPE < 0.5, -0.0049078, 1.558E-4)), 0.0064741)), -0.0087076), -0.0066274)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 18.5, 0.0074476, 0.003019), if (ENTITYPLACETYPE < 0.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 11.5, 0.0084097, 4.69E-5), -0.0027729), if (LENGTH < 31.5, 0.0054712, -2.334E-4)), 0.0048071)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 10.5, if (ENTITYPLACETYPE < 0.5, -0.005338, if (LENGTH < 9.5, -8.877E-4, -0.0081681)), if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, 0.0010318, -0.0164364), -0.0034169)), -0.0082132), if (TERM_CASE_4 < 0.5, -8.338E-4, 0.0039692)), if (ENTITYPLACETYPE < 0.5, -0.0062033, if (CONCEPTTYPE < 0.5, -0.0048085, -0.0017919)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0128902, if (EXTENDEDTYPE < 0.5, if (LENGTH < 25.5, if (POS_10 < 0.5, 0.0019635, -0.0096927), 0.0093435), 5.47E-5)), if (STOP_WORD_1 < 0.5, 0.0070635, -8.492E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0062557, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_1 < 0.5, if (STOP_WORD_3 < 0.5, -0.0011822, -0.0067641), -0.0099856), if (LENGTH < 21.5, -0.0085105, -4.627E-4)), if (LENGTH < 9.5, 0.0084725, -0.0016164)), -0.0126015)), if (CHUNKTYPE < 0.5, -0.0017249, if (LENGTH < 11.5, 0.0112513, 7.338E-4))), -0.0065256)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0117586, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, 8.047E-4, -0.0137932), 0.0050686)), -0.0193129), if (STOP_WORD_1 < 0.5, 0.0066615, if (LENGTH < 13.5, 0.0087308, -0.007798))), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (LENGTH < 6.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, -0.0019189, -0.0082834), -0.0025075), if (ORDER_IN_CLUSTER < 2.5, -0.0010267, -0.0033121)), -0.0083573), -0.0084762), if (LENGTH < 10.5, 0.0094157, if (CONCEPTTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0044572, 0.002104), 0.0042305))), -0.0064498)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 1.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, 0.0027241, -0.011797), -0.0041164), -0.0042097), if (EXTENDEDTYPE < 0.5, 0.002821, if (LENGTH < 20.5, 0.0025468, -0.0096094))), if (STOP_WORD_3 < 0.5, 0.0068954, 9.877E-4)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 8.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, -0.0018671, -0.0075502), -0.0020919), if (POS_10 < 0.5, if (POS_18 < 0.5, -4.131E-4, -0.0108001), -0.0145574)), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.0037169, 0.0016967), -0.0089877), -0.009429), -0.0063806))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (LENGTH < 26.5, if (TERM_CASE_4 < 0.5, 0.003292, -4.955E-4), 0.0035947), -0.0150524), if (STOP_WORD_1 < 0.5, 0.0065514, -0.002053)), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 7.5, if (TERM_CASE_2 < 0.5, -0.0078049, -0.003239), if (POS_18 < 0.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_3 < 0.5, -0.0024005, -0.0061275), 0.0025085), -0.0116573), -0.0106497)), -0.0078103), if (LENGTH < 25.5, if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, -0.0015786, -0.011678), if (LENGTH < 11.5, 0.0093368, -3.592E-4)), -0.0052594), 0.0084376))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_18 < 0.5, 0.0028103, -0.0049653), if (STOP_WORD_3 < 2.5, if (STOP_WORD_3 < 1.5, if (CONCEPTTYPE < 0.5, 0.0018444, -0.0051396), if (LENGTH < 20.5, 1.64E-4, -0.0104176)), 0.0069841)), 0.0061372), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 9.5, if (TERM_CASE_4 < 0.5, -0.0022878, -0.0069758), -6.493E-4), if (CONCEPTTYPE < 0.5, -0.0021318, 0.0040021)), if (STOP_WORD_3 < 0.5, -0.0054074, 0.0013722)), if (LENGTH < 9.5, 0.0115688, -4.597E-4)), -0.0087464), -0.007742), -0.006447)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (CONCEPTTYPE < 0.5, 0.0014661, 0.0044539), -0.0077322), -0.0057684), -0.0043545), 0.0056773), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, if (LENGTH < 9.5, -0.0065427, if (TERM_CASE_3 < 0.5, -6.373E-4, -0.0065177)), if (POS_7 < 0.5, -1.41E-4, -0.0108137)), if (POS_10 < 0.5, 0.0037478, -0.0116254)), -0.0074044), -0.0094093), if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (CONCEPTTYPE < 0.5, -0.0014513, 0.0023516), -0.0034451), 0.0042849)), -0.0061508)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0061685, 5.15E-5), if (POS_19 < 0.5, 4.874E-4, if (LENGTH < 13.5, 0.0130888, -0.0097763))), 0.0060914), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0054016, if (POS_18 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, 1.586E-4, -0.0133983), -0.0043648), -0.0116817)), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0030641, 0.0011094), if (TERM_CASE_1 < 0.5, if (LENGTH < 7.5, -0.0037408, 0.0033386), 0.0264346))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0049021, if (LENGTH < 10.5, -6.125E-4, -0.0049327)), -0.0062874))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, if (LENGTH < 28.5, 0.0024083, 0.006158), -0.0186422), if (LENGTH < 12.5, 0.0214161, -0.0063568)), -0.0137909), if (STOP_WORD_3 < 2.5, if (LENGTH < 20.5, 4.861E-4, if (STOP_WORD_2 < 0.5, -0.0057808, -0.0221523)), 0.0075378)), 0.0058266), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0057981, -0.0024142), if (LENGTH < 25.5, if (LENGTH < 5.5, 0.002449, if (LENGTH < 6.5, -0.0079902, -7.445E-4)), 0.0083487)), if (ENTITYPLACETYPE < 0.5, -0.0056205, if (CONCEPTTYPE < 0.5, -0.0047229, -0.0016518)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, 0.0021222, -0.0059273), -0.0042369), -0.0039936), 0.005264), -0.0050727), if (STOP_WORD_3 < 0.5, 0.0065396, 0.0013351)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 7.5, if (TERM_CASE_4 < 0.5, 3.97E-5, -0.0046735), if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0022059, if (CONCEPTTYPE < 0.5, -5.327E-4, 0.0060757)), -0.0034931)), -0.0076612), if (CONCEPTTYPE < 0.5, -4.815E-4, 0.0054866)), if (ENTITYPLACETYPE < 0.5, -0.0056475, if (CONCEPTTYPE < 0.5, -0.0045053, -0.0017705)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_19 < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0055781, if (TERM_CASE_2 < 0.5, 0.0034575, -0.0054404)), if (LENGTH < 21.5, -9.944E-4, if (STOP_WORD_3 < 1.5, if (LENGTH < 32.5, 0.0056431, -0.0010274), -0.0050648))), -0.0047221), if (STOP_WORD_1 < 0.5, 0.0061927, -0.0023092)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0039235, if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0038252, if (LENGTH < 13.5, 0.0059502, -0.0021774)), 9.908E-4)), if (LENGTH < 11.5, 0.0076676, -3.522E-4)), if (ENTITYPLACETYPE < 0.5, -0.0058481, if (CONCEPTTYPE < 0.5, -0.0050177, -7.061E-4)))) + if (ORDER_IN_CLUSTER < 1.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_3 < 0.5, if (CONCEPTTYPE < 0.5, 0.0010284, 0.0066736), 0.0065223), if (STOP_WORD_3 < 2.5, if (POS_10 < 0.5, if (LENGTH < 12.5, 0.0050687, if (STOP_WORD_2 < 0.5, -1.839E-4, -0.0065509)), -0.0117254), 0.0077351)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, if (LENGTH < 7.5, -0.0070956, if (TERM_CASE_3 < 0.5, -0.002042, -0.0070271)), if (POS_7 < 0.5, 2.952E-4, -0.0105202)), if (CONCEPTTYPE < 0.5, -0.0023623, -3.16E-5)), if (CHUNKTYPE < 0.5, -9.757E-4, 0.0108694)), -0.00811), -0.0074097), -0.0057322)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0109055, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, 7.382E-4, -0.0130298), 0.0048386)), 0.0058612), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, if (POS_1 < 0.5, if (TERM_CASE_2 < 0.5, if (LENGTH < 7.5, -0.0073629, -0.0041134), if (STOP_WORD_2 < 0.5, -0.0010755, -0.0093889)), -0.0092831), if (STOP_WORD_1 < 0.5, -0.0021651, 0.0174822)), if (LENGTH < 25.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, if (STOP_WORD_3 < 0.5, -0.002658, -0.0112811), if (ORDER_IN_CLUSTER < 2.5, 0.0021644, -0.0053512)), if (LENGTH < 9.5, 0.002741, -0.0016955)), 0.0075261)), -0.0060907)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (POS_13 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0011045, -0.0049646), if (STOP_WORD_1 < 0.5, 0.0056989, -0.0010321)), -0.0234215), -0.0139392), 0.0049608), -0.0096252), if (STOP_WORD_1 < 0.5, 0.0066785, -3.264E-4)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 8.5, -0.0033941, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, if (LENGTH < 12.5, -0.0012222, if (STOP_WORD_3 < 0.5, if (ENTITYPLACETYPE < 0.5, 0.0023237, 0.0108239), -8.082E-4)), -0.0136204), if (STOP_WORD_3 < 0.5, -0.0032519, 0.0039417))), if (ORDER_IN_CLUSTER < 3.5, -0.0039092, -0.0059505))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0020916, -0.0038108), -0.0049058), if (LENGTH < 22.5, 6.65E-4, -0.0140022)), if (STOP_WORD_1 < 0.5, 0.0063283, -0.0013054)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 11.5, if (STOP_WORD_1 < 0.5, -0.0055597, if (STOP_WORD_3 < 1.5, 0.0025758, -0.0082833)), if (LENGTH < 25.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, -2.99E-4, -0.0089974), -0.0052529), 0.0029078)), if (CONCEPTTYPE < 0.5, -0.002184, if (LENGTH < 11.5, if (STOP_WORD_2 < 0.5, 0.0036504, -0.0149534), if (LENGTH < 25.5, -0.0028139, 0.0095076)))), -0.0059727)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (EXTENDEDTYPE < 0.5, if (LENGTH < 25.5, if (GOOD_SYNTAX < 0.5, -0.0187617, 0.0012127), 0.0078167), if (STOP_WORD_3 < 0.5, 8.129E-4, -0.0025046)), if (STOP_WORD_1 < 0.5, 0.0061633, -0.0021292)), if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 25.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0054452, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (TERM_CASE_3 < 0.5, -0.0010183, -0.0047755), -0.0092639), -0.0073327)), if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 12.5, -8.616E-4, 0.0093598), -0.0073599), -0.0032817), if (LENGTH < 14.5, 0.0059709, -0.0035062))), 0.0037668), -0.0056394)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, 0.0058724, 0.0022579), if (POS_18 < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, 0.0010854, 0.0045481), -0.0035701), -0.0060972)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 24.5, if (TERM_CASE_2 < 0.5, if (STOP_WORD_3 < 0.5, -0.0057599, 1.844E-4), -0.0016271), 0.0032631), -7.803E-4), -0.014377), if (CONCEPTTYPE < 0.5, 5.08E-5, if (LENGTH < 16.5, if (ENTITYPLACETYPE < 0.5, 0.023955, 0.0050936), -0.0025106))), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, -0.0042898, -0.0067438), if (CONCEPTTYPE < 0.5, -0.0041399, -1.466E-4)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_19 < 0.5, if (POS_13 < 0.5, if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, 0.0023084, -0.01229), -0.0022428), -0.0229772), -0.0044172), 0.0146678), 0.005348), if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (TERM_CASE_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.0047668, 8.196E-4), if (POS_7 < 0.5, -5.52E-4, -0.0108814)), -0.0088699), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0031281, if (TERM_CASE_3 < 0.5, 0.0027361, -0.003712)), if (LENGTH < 11.5, 0.0102952, -6.221E-4))), -0.009679), 0.003716), -0.0057841)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 22.5, 0.0069161, 0.0026916), 2.773E-4), if (POS_18 < 0.5, if (LENGTH < 25.5, if (GOOD_SYNTAX < 0.5, -0.0152795, 2.679E-4), 0.0034702), -0.0067681)), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_3 < 0.5, if (LENGTH < 11.5, if (TERM_CASE_4 < 0.5, -0.0018963, -0.0060569), 5.77E-5), -0.0069136), if (POS_10 < 0.5, 0.0037451, -0.010911)), -0.006892), -0.0086871), -0.006907), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, if (LENGTH < 7.5, -0.0023858, -0.005575), -0.0012976), 3.127E-4))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.013416, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0017878, if (REGEXTYPE < 0.5, -0.001748, 0.0137358)), 0.0050504)), if (STOP_WORD_3 < 0.5, 0.0062387, 3.375E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, if (LENGTH < 9.5, -0.0061554, if (TERM_CASE_3 < 0.5, -9.229E-4, -0.0054569)), if (POS_7 < 0.5, 4.028E-4, -0.0099663)), if (POS_10 < 0.5, 0.0041051, -0.011965)), -0.0067372), -0.0089207), if (ORDER_IN_CLUSTER < 2.5, -3.53E-5, if (LENGTH < 10.5, -4.665E-4, -0.0050129))), -0.0060444)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (POS_11 < 0.5, if (LENGTH < 27.5, 5.492E-4, 0.0033153), -0.0136214), -0.0050055), if (STOP_WORD_3 < 0.5, 0.0061088, 4.413E-4)), if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0058789, if (STOP_WORD_1 < 0.5, -0.0038891, if (CHUNKTYPE < 0.5, if (STOP_WORD_3 < 1.5, -5.614E-4, -0.0076478), 0.0158819))), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_1 < 0.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 12.5, -0.0024318, 0.0043387), 0.0028647), -0.0112887), -0.0034375), if (LENGTH < 11.5, 0.0110138, 3.4E-6)), if (LENGTH < 13.5, -0.005825, 0.0018065)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0020399, if (POS_17 < 0.5, -0.0079739, 0.0077704)), -0.0045273), -0.0042705), if (GOOD_SYNTAX < 0.5, -0.044834, 0.005528)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, if (TERM_CASE_4 < 0.5, -0.00137, -0.0072287), -0.0019477), if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -8.339E-4, if (LENGTH < 6.5, 2.63E-4, 0.0072602)), if (ORDER_IN_CLUSTER < 2.5, -0.0010374, -0.0052903))), if (POS_10 < 0.5, 0.0027254, -0.0107426)), -0.0075967), -0.0070029), -0.0058943)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_3 < 0.5, if (CONCEPTTYPE < 0.5, 7.304E-4, 0.0047063), 0.0053245), if (STOP_WORD_3 < 2.5, -0.0013843, 0.0071936)), if (CHUNKTYPE < 0.5, 0.0039065, 0.0069589)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, if (TERM_CASE_4 < 0.5, -8.39E-4, -0.0061029), if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, 7.354E-4, -0.0132291), -0.0108053), -0.0041196)), 8.29E-5), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0044098, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0021542, if (LENGTH < 5.5, -0.0047024, 0.0085462)), -0.0039158)), -0.0056911))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 23.5, 0.0054057, -0.0050707), if (LENGTH < 21.5, if (LENGTH < 16.5, 6.25E-5, -0.0037585), if (STOP_WORD_3 < 1.5, 0.0025946, -0.004957))), -0.0147982), 0.0037873), if (STOP_WORD_1 < 0.5, 0.0060617, -6.732E-4)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 21.5, if (ENTITYPLACETYPE < 0.5, -0.0034023, if (CONCEPTTYPE < 0.5, if (LENGTH < 17.5, -0.0023813, 0.0122696), if (TERM_CASE_1 < 0.5, 0.0010462, 0.0245056))), 0.0026042), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, -0.0024896, -0.0075246), -0.0082288), -0.0053715))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0140922, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0012705, if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, -0.0030832, 0.0106479), 0.0069127)), 0.0043758)), if (STOP_WORD_3 < 0.5, 0.0057028, 6.454E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0048533, if (STOP_WORD_1 < 0.5, -0.00315, if (STOP_WORD_3 < 1.5, if (CHUNKTYPE < 0.5, if (LENGTH < 9.5, 0.0077806, -0.0014497), 0.0176083), -0.0072975))), if (CONCEPTTYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0024389, 0.0038358), if (LENGTH < 9.5, 0.0032891, if (LENGTH < 25.5, -0.0017344, 0.0116658)))), -0.0057079)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0099419, 0.0018142), if (TERM_CASE_2 < 0.5, -0.0090956, 0.0048625)), -0.0084599), if (STOP_WORD_1 < 0.5, 0.0060414, -0.0010435)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (EXTENDEDTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, if (CHUNKTYPE < 0.5, -0.0056868, if (CONCEPTTYPE < 0.5, -0.003887, 0.0064235)), if (POS_1 < 0.5, if (POS_7 < 0.5, 2.073E-4, -0.0106819), -0.0075795)), if (CHUNKTYPE < 0.5, -0.0012641, 0.0167626)), if (CHUNKTYPE < 0.5, -0.0011626, 0.0025646)), 0.0046914), -0.0069173), -0.0056397)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, if (POS_11 < 0.5, if (POS_13 < 0.5, if (TERM_CASE_4 < 0.5, 0.003112, if (LENGTH < 21.5, if (LENGTH < 9.5, 0.0075376, -0.0010629), if (POS_19 < 0.5, 0.0026077, -0.0099125))), -0.0224772), -0.0146677), -0.0187589), 0.0051532), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, -0.0042885, -7.172E-4), if (STOP_WORD_3 < 1.5, 0.0055999, -0.0083559)), -0.005241), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0014591, 0.0034826), if (LENGTH < 11.5, -0.0047794, -8.561E-4)), 0.0034623), -0.0040649))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 22.5, 0.0064964, 0.0017795), 3.438E-4), if (LENGTH < 27.5, if (GOOD_SYNTAX < 0.5, -0.0140793, 2.06E-4), if (EXTENDEDTYPE < 0.5, 0.007975, 0.0010111))), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 7.5, if (LENGTH < 5.5, if (LENGTH < 2.5, -0.0119005, if (TERM_CASE_4 < 0.5, 0.0066, if (ENTITYPLACETYPE < 0.5, -0.0059626, 0.0012436))), -0.004783), if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0020343, 5.341E-4), 0.0031655), -0.012697)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.0023857, -0.0080862), -0.0079322), -0.0054873))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 19.5, 0.0060748, 0.0021505), if (ENTITYPLACETYPE < 0.5, -4.995E-4, 0.0029317)), if (ORDER_IN_CLUSTER < 3.5, if (POS_10 < 0.5, if (LENGTH < 25.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_1 < 0.5, if (LENGTH < 9.5, if (TERM_CASE_4 < 0.5, if (POS_1 < 0.5, -0.0015543, -0.0082744), -0.0066599), if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (ORDER_IN_CLUSTER < 2.5, 0.0013058, -0.0033382), -0.0077107), -0.0062193)), if (STOP_WORD_3 < 1.5, if (CHUNKTYPE < 0.5, 0.0028224, 0.0161777), -0.007837)), if (CONCEPTTYPE < 0.5, -0.0021692, if (LENGTH < 11.5, 0.0035302, -0.002672))), 0.0030665), -0.0121954), -0.0054196)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (POS_11 < 0.5, if (POS_19 < 0.5, 0.0016966, -0.0032049), -0.0130873), -0.0175838), if (STOP_WORD_1 < 0.5, 0.0057807, -0.0019965)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 25.5, if (POS_1 < 0.5, if (CHUNKTYPE < 0.5, if (TERM_CASE_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (EXTENDEDTYPE < 0.5, if (STOP_WORD_3 < 0.5, -0.0057523, -3.969E-4), 0.0038301), -0.0016762), if (POS_7 < 0.5, 1.834E-4, -0.0095058)), if (LENGTH < 14.5, if (LENGTH < 9.5, 0.0135111, 0.0017529), if (LENGTH < 17.5, -0.0068988, -9.56E-5))), -0.0066534), 0.0024084), -0.0062302), -0.0051074)) + if (ORDER_IN_CLUSTER < 2.5, if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0105326, if (POS_11 < 0.5, 0.0018235, -0.0123765)), -0.0047604), if (LENGTH < 15.5, 0.0066237, -0.0072323)), -0.0156029), if (STOP_WORD_1 < 0.5, 0.0058986, -0.0018743)), if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0042957, if (POS_18 < 0.5, if (STOP_WORD_1 < 0.5, -0.0014716, if (POS_10 < 0.5, if (TERM_CASE_4 < 0.5, 0.0020924, 0.0152342), -0.0103134)), -0.0112)), 2.418E-4)), if (ENTITYPLACETYPE < 0.5, -0.0050267, if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 7.5, 0.0016477, -0.0029925), -0.0043674))) + if (ORDER_IN_CLUSTER < 2.5, if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0128497, if (POS_20 < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 32.5, if (CHUNKTYPE < 0.5, 0.003191, -0.0010209), -0.004492), if (REGEXTYPE < 0.5, -0.0015492, 0.0137018)), 0.0037597), -0.0168394)), 0.0051081), if (CHUNKTYPE < 0.5, if (LENGTH < 7.5, -0.0030254, if (POS_10 < 0.5, -5.306E-4, -0.0122115)), if (LENGTH < 15.5, if (CONCEPTTYPE < 0.5, 0.0015788, if (LENGTH < 13.5, 0.0047772, 0.0255504)), if (LENGTH < 22.5, -0.0033459, 0.0067153)))), if (ENTITYPLACETYPE < 0.5, -0.0047279, if (CONCEPTTYPE < 0.5, -0.003954, -1.26E-5))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_13 < 0.5, 0.0014096, -0.0221129), if (LENGTH < 15.5, 0.0065941, if (STOP_WORD_3 < 0.5, 0.0042286, -0.0046691))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (EXTENDEDTYPE < 0.5, if (POS_1 < 0.5, if (TERM_CASE_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.0047686, if (STOP_WORD_3 < 1.5, 0.0031787, -0.0074552)), if (STOP_WORD_2 < 0.5, if (POS_7 < 0.5, 1.46E-4, -0.0088521), -0.0083276)), -0.0078294), 0.002584), if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_1 < 0.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 12.5, -0.00239, 0.0035981), 0.0015887), -0.0097062), -0.0033672), 0.0028288)), -0.0054668)) + if (ORDER_IN_CLUSTER < 2.5, if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 3.872E-4, -0.0048465), if (LENGTH < 25.5, 0.0015733, 0.0129061)), -0.0186174), if (STOP_WORD_3 < 0.5, 0.0054222, if (LENGTH < 13.5, 0.0074576, -0.0044036))), if (LENGTH < 8.5, -0.0025464, if (POS_10 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, if (LENGTH < 11.5, -4.7E-4, 0.0026154), -0.0094897), if (ENTITYPLACETYPE < 0.5, -0.0052109, 6.52E-5)), -0.0127982))), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.0027834, 0.0020855), -0.0069906), -0.0081609), -0.0052273)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, -2.659E-4, 0.0040309), if (REGEXTYPE < 0.5, -0.0018569, 0.0107891)), -0.0151089), 0.0037686), if (LENGTH < 15.5, 0.0066249, 0.0035574)), if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 11.5, -0.004748, if (ORDER_IN_CLUSTER < 2.5, -0.0014364, -0.0044737)), if (ORDER_IN_CLUSTER < 3.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_1 < 0.5, if (CONCEPTTYPE < 0.5, -9.599E-4, 0.0020235), -0.0108906), -0.0036368), if (LENGTH < 13.5, -0.0053509, 0.0014776))), if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0023834, 0.0031195), 0.0111238))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 16.5, 0.0064385, 0.0031692), if (LENGTH < 13.5, 0.005946, -0.002465)), if (GOOD_SYNTAX < 0.5, -0.0127792, if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CHUNKTYPE < 0.5, 0.0022705, if (LENGTH < 22.5, -0.0042644, 0.0042572)), -0.0017904), 0.0117519), 0.0041327))), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 7.5, if (LENGTH < 5.5, if (ENTITYPLACETYPE < 0.5, -0.0043873, 0.0032623), -0.0047969), if (POS_10 < 0.5, if (POS_18 < 0.5, -3.178E-4, -0.0094845), -0.0118449)), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, -0.0038449, -0.006342), -0.0026466))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 11.5, if (CHUNKTYPE < 0.5, 0.0017729, 0.0119581), if (STOP_WORD_3 < 0.5, if (TERM_CASE_3 < 0.5, if (CHUNKTYPE < 0.5, if (EXTENDEDTYPE < 0.5, 0.0053641, 6.043E-4), if (LENGTH < 24.5, -0.0042042, 0.0050383)), 0.0042807), -0.0012371)), if (CHUNKTYPE < 0.5, 0.0034585, 0.0064023)), if (ENTITYPLACETYPE < 0.5, if (LENGTH < 11.5, -0.0045099, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, 2.905E-4, -0.0040922), -0.0044686)), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0027731, if (TERM_CASE_3 < 0.5, if (STOP_WORD_1 < 0.5, 0.0021029, -0.0118245), if (LENGTH < 10.5, 0.0033777, -0.0043585))), 0.0021061))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, 9.693E-4, 0.0055008), if (STOP_WORD_3 < 2.5, if (POS_10 < 0.5, if (STOP_WORD_3 < 1.5, 7.057E-4, -0.0044153), -0.0096905), 0.0071207)), 0.0044157), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (LENGTH < 7.5, if (TERM_CASE_2 < 0.5, -0.0055987, -9.953E-4), if (POS_18 < 0.5, if (POS_7 < 0.5, if (TERM_CASE_3 < 0.5, -4.215E-4, if (STOP_WORD_3 < 0.5, -0.0045512, 0.0072483)), -0.0107516), -0.0095758)), -0.00816), if (TERM_CASE_4 < 0.5, -0.0022175, if (CONCEPTTYPE < 0.5, -0.0010487, if (STOP_WORD_1 < 0.5, 0.0042406, -0.0102225)))), -0.0049476)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (TERM_CASE_4 < 0.5, 0.0037161, if (REGEXTYPE < 0.5, if (STOP_WORD_1 < 0.5, 0.0016857, if (LENGTH < 24.5, -0.0040707, 0.0027269)), 0.0133696)), if (LENGTH < 24.5, -2.64E-4, -0.0132598)), -0.004131), if (STOP_WORD_1 < 0.5, 0.0052308, -0.0012387)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 11.5, if (ENTITYPLACETYPE < 0.5, -0.0039487, if (CONCEPTTYPE < 0.5, -0.0024748, 0.0038684)), 1.023E-4), -0.0062713), 0.0014277), if (STOP_WORD_2 < 0.5, if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_1 < 0.5, -0.0032143, 0.0020178), -0.0047324), -0.0076371))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, 0.0016398, -0.0034922), if (LENGTH < 34.5, if (LENGTH < 13.5, 0.0014083, if (LENGTH < 14.5, -0.0142889, if (TERM_CASE_2 < 0.5, if (STOP_WORD_3 < 1.5, -0.0015192, -0.006168), 0.0040361))), 0.0040342)), if (STOP_WORD_1 < 0.5, 0.0050596, -0.0022307)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 18.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0032075, 1.902E-4), if (CONCEPTTYPE < 0.5, -0.0018731, if (TERM_CASE_1 < 0.5, if (LENGTH < 7.5, -0.005371, 0.0013392), 0.0230781))), 0.0012825), if (ENTITYPLACETYPE < 0.5, -0.0047609, if (CONCEPTTYPE < 0.5, -0.0033201, -9.118E-4)))) + if (ORDER_IN_CLUSTER < 2.5, if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, if (EXTENDEDTYPE < 0.5, if (LENGTH < 25.5, 0.0010085, 0.0069518), if (LENGTH < 22.5, 0.001243, if (POS_18 < 0.5, -8.336E-4, -0.0122694))), -0.0070642), if (STOP_WORD_3 < 0.5, 0.0053898, -1.218E-4)), if (ENTITYPLACETYPE < 0.5, if (EXTENDEDTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (LENGTH < 8.5, -0.0039162, -4.959E-4), -0.0052243), 0.0046194), if (CONCEPTTYPE < 0.5, -6.389E-4, 0.0022515))), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.0023654, -0.0074924), -0.007877), if (LENGTH < 14.5, 0.0061774, -0.0073295)), -0.0049754)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0017629, if (REGEXTYPE < 0.5, if (STOP_WORD_2 < 0.5, -6.818E-4, if (LENGTH < 22.5, -0.0015196, -0.0125621)), 0.011708)), if (CHUNKTYPE < 0.5, if (STOP_WORD_1 < 0.5, 0.003987, -0.0033875), 0.0062574)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, -0.0035205, -7.65E-4), if (STOP_WORD_3 < 1.5, if (TERM_CASE_4 < 0.5, -3.039E-4, 0.0116126), -0.0071126)), if (TERM_CASE_3 < 0.5, 9.175E-4, -0.0011282)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0036746, if (CHUNKTYPE < 0.5, if (TERM_CASE_4 < 0.5, -0.0050641, -3.915E-4), 0.0063316)), -0.0051913))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 2.5, -3.26E-4, 0.0083876), 0.0029369), if (STOP_WORD_3 < 0.5, 0.0053767, -1.916E-4)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 26.5, if (POS_1 < 0.5, if (STOP_WORD_1 < 0.5, if (ORDER_IN_CLUSTER < 2.5, if (TERM_CASE_1 < 0.5, if (LENGTH < 6.5, -0.0036536, if (TERM_CASE_3 < 0.5, if (LENGTH < 11.5, -0.0015087, if (LENGTH < 15.5, 0.0062319, -9.102E-4)), if (ENTITYPLACETYPE < 0.5, -0.0053479, -0.0010366))), 0.0049837), if (LENGTH < 14.5, -0.0021675, -0.0061561)), if (POS_10 < 0.5, 0.0022445, -0.0092867)), -0.0062426), 0.0029227), -0.0058051), -0.005008)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (CONCEPTTYPE < 0.5, -1.613E-4, if (STOP_WORD_3 < 0.5, 0.0054156, if (LENGTH < 20.5, -0.0033028, 0.0093069))), if (STOP_WORD_3 < 0.5, 0.0053995, 1.395E-4)), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (POS_10 < 0.5, if (CHUNKTYPE < 0.5, if (LENGTH < 11.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 9.5, -0.0028307, -0.0075842), if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, -0.0014079, if (LENGTH < 8.5, -0.0054515, -4.853E-4)), 0.0021386)), if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, 0.0028417, -0.0015154), -0.002849)), if (STOP_WORD_1 < 0.5, -1.69E-4, 0.0092206)), -0.0100777), -0.0072433), -0.0046655)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0028838, if (LENGTH < 20.5, if (LENGTH < 15.5, 1.312E-4, -0.0031039), if (STOP_WORD_3 < 1.5, 0.0026176, if (LENGTH < 25.5, -0.0151727, -0.0015702)))), if (STOP_WORD_1 < 0.5, 0.0050691, -0.0028786)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 21.5, -0.0011859, 0.0020553), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, if (EXTENDEDTYPE < 0.5, -0.004109, 0.0049013), -0.0062146), if (LENGTH < 23.5, if (CONCEPTTYPE < 0.5, -0.0033266, if (LENGTH < 11.5, if (LENGTH < 4.5, -0.0058635, if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 7.5, 0.011381, 3.091E-4), -0.0022451)), -0.0040578)), 0.0121527)))) + if (ORDER_IN_CLUSTER < 2.5, if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_3 < 2.5, if (STOP_WORD_3 < 1.5, if (POS_18 < 0.5, 0.0017708, if (LENGTH < 19.5, -0.0134732, if (LENGTH < 24.5, 0.0160392, -0.0107184))), if (STOP_WORD_2 < 0.5, -7.538E-4, -0.0112972)), 0.0098581), -0.0063261), 0.004338), if (LENGTH < 24.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.003957, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 1.5, if (STOP_WORD_1 < 0.5, -6.735E-4, 0.0054652), -0.0078441), -0.0049023)), 1.8E-6), 0.0035565)), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, -0.0037295, -0.0060866), if (CHUNKTYPE < 0.5, -0.0028149, 0.0071955))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0084904, if (LENGTH < 9.5, 0.0073514, 0.0019155)), -0.0147088), -0.0070136), -0.0044873), if (TERM_CASE_2 < 0.5, -0.010124, 0.0029707)), if (STOP_WORD_3 < 0.5, 0.0052352, if (LENGTH < 15.5, 0.0054464, -0.0068859))), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (TERM_CASE_1 < 0.5, -0.001685, if (CONCEPTTYPE < 0.5, -0.0033985, 0.0170294)), 0.0011249), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, -0.0037196, -0.0059769), if (CONCEPTTYPE < 0.5, -0.003476, if (LENGTH < 11.5, 0.0010272, -0.0037164))))) + if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_2 < 0.5, if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -8.59E-5, 0.0032628), 0.0049911), if (LENGTH < 22.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_3 < 0.5, if (LENGTH < 9.5, -0.0032104, 7.004E-4), -0.00509), if (CONCEPTTYPE < 0.5, -6.639E-4, 0.0018133)), 0.002738)), if (STOP_WORD_3 < 1.5, if (TERM_CASE_4 < 0.5, -0.0100488, if (POS_13 < 0.5, 0.0084514, -0.0191446)), -0.0075867)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0034391, if (CHUNKTYPE < 0.5, if (LENGTH < 8.5, if (LENGTH < 5.5, -0.00378, if (CONCEPTTYPE < 0.5, -0.0012524, 0.008936)), -0.0035542), 0.0063396)), -0.004791)) + if (ORDER_IN_CLUSTER < 1.5, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, 0.0014145, -0.0117675), if (CHUNKTYPE < 0.5, if (STOP_WORD_1 < 0.5, 0.0035616, -0.0045089), 0.0060398)), -0.0087104), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 10.5, if (LENGTH < 5.5, if (TERM_CASE_4 < 0.5, if (CONCEPTTYPE < 0.5, -0.0026657, 0.0125798), if (CONCEPTTYPE < 0.5, 0.0016637, -0.0046097)), -0.0026974), if (TERM_CASE_4 < 0.5, -0.0012374, if (LENGTH < 19.5, 0.0031377, -0.0015069))), if (ENTITYPLACETYPE < 0.5, if (EXTENDEDTYPE < 0.5, -0.0047287, 0.0015157), if (CONCEPTTYPE < 0.5, -0.0034802, if (LENGTH < 4.5, -0.0067615, if (LENGTH < 7.5, 0.0044384, -0.0012433)))))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, if (STOP_WORD_2 < 0.5, 0.0013871, -0.0037106), if (LENGTH < 22.5, if (STOP_WORD_3 < 0.5, -0.0100153, 0.0191959), -0.0108371)), if (STOP_WORD_1 < 0.5, 0.0050918, -0.0016045)), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 7.5, if (TERM_CASE_2 < 0.5, -0.00572, -0.0020626), if (STOP_WORD_1 < 0.5, if (LENGTH < 11.5, -0.0041242, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, 3.635E-4, -0.0055527), -0.0039023)), if (STOP_WORD_3 < 1.5, if (LENGTH < 9.5, 0.0108656, 8.607E-4), -0.0059998))), -0.0057206), if (CHUNKTYPE < 0.5, if (ORDER_IN_CLUSTER < 2.5, -2.344E-4, -0.0026647), 0.0031043))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, 0.0034454, if (CHUNKTYPE < 0.5, if (LENGTH < 32.5, 0.002382, -0.0055036), if (LENGTH < 22.5, -0.0045405, 0.0047748))), -0.0013219), 0.0044944), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0027877, if (TERM_CASE_3 < 0.5, 6.657E-4, -0.0040928)), if (STOP_WORD_3 < 0.5, 2.023E-4, -0.0037834)), if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 4.5, -0.0058118, if (POS_1 < 0.5, if (LENGTH < 9.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, -3.175E-4, -0.0048892), 0.0014306), if (LENGTH < 26.5, -0.0036394, 0.0072831)), -0.0070347)), -0.0044785))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 18.5, 0.0049755, 5.047E-4), if (REGEXTYPE < 0.5, -1.256E-4, 0.0095468)), if (CHUNKTYPE < 0.5, 0.0027465, 0.0059089)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.003476, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, 7.516E-4, -0.0103132), -0.0095231), if (STOP_WORD_3 < 0.5, -0.0052481, 0.0088106))), if (LENGTH < 17.5, -2.658E-4, 0.0034851)), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.0020572, 0.0019232), -0.0064383), -0.0071097), if (LENGTH < 12.5, -0.004996, -0.0020705)))) + if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (POS_13 < 0.5, if (ORDER_IN_CLUSTER < 1.5, if (LENGTH < 12.5, 0.0038949, if (POS_19 < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0044613, 8.62E-5), -1.14E-5), -0.0059595)), if (LENGTH < 8.5, if (TERM_CASE_4 < 0.5, 2.32E-5, -0.0045638), if (TERM_CASE_3 < 0.5, 4.242E-4, -0.002741))), -0.0186513), -0.0123259), if (ORDER_IN_CLUSTER < 1.5, if (CHUNKTYPE < 0.5, 0.002944, 0.0062291), 3.61E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0034353, if (CHUNKTYPE < 0.5, if (LENGTH < 7.5, if (LENGTH < 5.5, -0.0028677, 0.0051801), -0.0031814), 0.0058335)), -0.004572)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, 0.0010304, if (LENGTH < 23.5, 3.895E-4, -0.015312)), if (CHUNKTYPE < 0.5, if (STOP_WORD_1 < 0.5, 0.003584, -0.003437), 0.0060657)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 25.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_1 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, -0.0036443, 1.727E-4), if (TERM_CASE_4 < 0.5, -0.0022025, 2.902E-4)), 0.00482), if (STOP_WORD_3 < 1.5, if (TERM_CASE_4 < 0.5, 6.078E-4, if (LENGTH < 9.5, 0.0267202, 0.0028202)), -0.0059933)), -0.0117848), 0.0045257), if (ENTITYPLACETYPE < 0.5, -0.0040138, if (CONCEPTTYPE < 0.5, -0.0030944, -3.88E-5)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.012145, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, 2.346E-4, -0.0136071), 0.003872)), if (STOP_WORD_3 < 0.5, 0.0053479, -8.352E-4)), if (ORDER_IN_CLUSTER < 2.5, if (POS_10 < 0.5, if (LENGTH < 12.5, if (ENTITYPLACETYPE < 0.5, -0.0024083, -5.153E-4), if (TERM_CASE_3 < 0.5, if (LENGTH < 15.5, if (ENTITYPLACETYPE < 0.5, 0.0020101, 0.0153386), 6.866E-4), if (STOP_WORD_3 < 0.5, -0.0026273, 0.0078831))), -0.0111062), if (ENTITYPLACETYPE < 0.5, -0.0040042, if (CONCEPTTYPE < 0.5, -0.0025863, if (LENGTH < 8.5, if (LENGTH < 5.5, -0.0022329, if (ORDER_IN_CLUSTER < 3.5, 0.0114385, -0.0040637)), -0.0022211))))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, 6.395E-4, 0.0039129), -4.731E-4), if (STOP_WORD_1 < 0.5, 0.0048425, if (CHUNKTYPE < 0.5, -0.0050241, 0.0113075))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, if (TERM_CASE_4 < 0.5, -0.0026423, -0.0058182), if (EXTENDEDTYPE < 0.5, if (POS_1 < 0.5, if (POS_11 < 0.5, -0.0016721, -0.0090084), -0.0068847), 0.0029151)), if (LENGTH < 25.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 12.5, if (CONCEPTTYPE < 0.5, -6.516E-4, if (LENGTH < 4.5, -0.0032959, 0.003969)), 0.00811), -0.0063281), -0.0013025), 0.0071419)), -0.0047354)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, 4.683E-4, if (STOP_WORD_1 < 0.5, 0.0050506, -0.0015297)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 26.5, if (POS_1 < 0.5, if (LENGTH < 6.5, if (TERM_CASE_2 < 0.5, -0.0058316, 1.419E-4), if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.0018701, if (CHUNKTYPE < 0.5, -3.427E-4, 0.0145911)), -0.0054624)), -0.0071397), 0.0030205), if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 12.5, -0.0014839, if (LENGTH < 15.5, 0.0155978, -6.027E-4)), if (STOP_WORD_1 < 0.5, 0.0037224, -0.0097389)), -0.0029211), if (LENGTH < 11.5, 0.0091976, 9.027E-4))), -0.0044954)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 20.5, 0.0046733, 8.251E-4), if (LENGTH < 21.5, -7.639E-4, if (LENGTH < 32.5, if (STOP_WORD_3 < 1.5, 0.0040487, -0.0026534), -0.0017195))), if (STOP_WORD_1 < 0.5, 0.0047493, -0.002194)), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, -0.0032711, -3.03E-5), if (STOP_WORD_3 < 1.5, 0.0053601, -0.004987)), -0.0039912), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_1 < 0.5, 7.016E-4, -0.009457), -0.0016048), if (LENGTH < 14.5, 0.0065525, -3.78E-4)), if (LENGTH < 13.5, -0.0043881, 0.0025122)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, if (LENGTH < 27.5, if (LENGTH < 23.5, 8.637E-4, -0.0021732), if (EXTENDEDTYPE < 0.5, 0.0082735, 1.661E-4)), -0.0074062), if (STOP_WORD_3 < 0.5, 0.0048262, 6.08E-4)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (TERM_CASE_1 < 0.5, if (POS_10 < 0.5, -0.001591, -0.0107858), if (LENGTH < 2.5, -0.0070412, 0.0090356)), if (CONCEPTTYPE < 0.5, -2.129E-4, 0.0049083)), if (STOP_WORD_2 < 0.5, if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_1 < 0.5, if (POS_1 < 0.5, -0.0025013, -0.0069603), 0.0013009), if (LENGTH < 12.5, -0.0049737, if (ENTITYPLACETYPE < 0.5, -0.0046367, 0.0020614))), -0.0065442))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 21.5, 0.0055498, 0.0020362), 4.82E-5), if (GOOD_SYNTAX < 0.5, -0.0113037, if (ENTITYPLACETYPE < 0.5, -2.67E-5, 0.0034735))), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_1 < 0.5, if (LENGTH < 11.5, -0.002038, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, -0.003187, 0.0039767), 0.0076656), -0.0018373), -0.0023729)), 0.0064775), -0.0051693), if (TERM_CASE_4 < 0.5, -0.0010081, 0.004785)), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, -0.0029171, if (LENGTH < 14.5, 0.005164, -0.0067269)), -0.0044377))) + if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 8.5, if (ORDER_IN_CLUSTER < 1.5, 0.0038333, if (LENGTH < 5.5, -3.037E-4, if (LENGTH < 6.5, -0.0058466, -0.0018246))), if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (POS_13 < 0.5, if (POS_10 < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, 9.971E-4, -0.0118452), -0.0031073), -0.0056757), -0.0181566), -0.0106292), if (STOP_WORD_1 < 0.5, if (ORDER_IN_CLUSTER < 1.5, 0.0045608, if (CONCEPTTYPE < 0.5, 3.27E-4, if (TERM_CASE_3 < 0.5, 0.0080705, 0.002204))), -0.0036974))), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, -0.0032085, -0.0053955), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0027009, -5.635E-4), 0.0046874))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (STOP_WORD_3 < 2.5, if (POS_10 < 0.5, if (STOP_WORD_3 < 1.5, 0.0016465, if (LENGTH < 20.5, 0.0061307, -0.0080488)), -0.0083587), 0.0088823), -0.0118533), 0.0038776), if (ORDER_IN_CLUSTER < 3.5, if (POS_10 < 0.5, if (POS_7 < 0.5, if (POS_1 < 0.5, if (TERM_CASE_3 < 0.5, if (LENGTH < 11.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, -0.0038623, 7.132E-4), if (CONCEPTTYPE < 0.5, -0.0014892, 0.0026902)), if (POS_18 < 0.5, 0.0020488, -0.0074362)), if (CHUNKTYPE < 0.5, -0.0032111, if (LENGTH < 14.5, 0.0039343, -0.0033507))), -0.0056254), -0.0087829), -0.0095263), -0.0042301)) + if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_3 < 1.5, if (POS_18 < 0.5, if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 21.5, 0.0058127, 0.001988), -0.001657), if (LENGTH < 22.5, if (LENGTH < 14.5, 0.0010777, -0.0020483), 0.0026629)), if (LENGTH < 7.5, if (LENGTH < 5.5, 8.059E-4, if (LENGTH < 6.5, -0.0069968, -0.0014324)), if (TERM_CASE_4 < 0.5, -7.077E-4, if (STOP_WORD_1 < 0.5, 7.588E-4, 0.0073991)))), -0.0073361), -0.0042277), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (LENGTH < 25.5, if (LENGTH < 9.5, -0.0023911, -0.0044198), 0.007123), if (LENGTH < 14.5, 0.0051462, -0.0075633)), -0.0045164))
diff --git a/searchlib/src/test/files/simple.expression b/searchlib/src/test/files/simple.expression
new file mode 100644
index 00000000000..745e8d376f7
--- /dev/null
+++ b/searchlib/src/test/files/simple.expression
@@ -0,0 +1 @@
+a + b
diff --git a/searchlib/src/test/files/testAggregatorResults b/searchlib/src/test/files/testAggregatorResults
new file mode 100644
index 00000000000..060b8b86bda
--- /dev/null
+++ b/searchlib/src/test/files/testAggregatorResults
Binary files differ
diff --git a/searchlib/src/test/files/testFunctionNodes b/searchlib/src/test/files/testFunctionNodes
new file mode 100644
index 00000000000..646a445e857
--- /dev/null
+++ b/searchlib/src/test/files/testFunctionNodes
Binary files differ
diff --git a/searchlib/src/test/files/testGroup b/searchlib/src/test/files/testGroup
new file mode 100644
index 00000000000..6ecf091a865
--- /dev/null
+++ b/searchlib/src/test/files/testGroup
Binary files differ
diff --git a/searchlib/src/test/files/testGrouping b/searchlib/src/test/files/testGrouping
new file mode 100644
index 00000000000..5dbe3fd9c44
--- /dev/null
+++ b/searchlib/src/test/files/testGrouping
Binary files differ
diff --git a/searchlib/src/test/files/testGroupingLevel b/searchlib/src/test/files/testGroupingLevel
new file mode 100644
index 00000000000..fd212c02341
--- /dev/null
+++ b/searchlib/src/test/files/testGroupingLevel
Binary files differ
diff --git a/searchlib/src/test/files/testHitCollection b/searchlib/src/test/files/testHitCollection
new file mode 100644
index 00000000000..f75b8147529
--- /dev/null
+++ b/searchlib/src/test/files/testHitCollection
Binary files differ
diff --git a/searchlib/src/test/files/testResultTypes b/searchlib/src/test/files/testResultTypes
new file mode 100644
index 00000000000..c2055a34455
--- /dev/null
+++ b/searchlib/src/test/files/testResultTypes
Binary files differ
diff --git a/searchlib/src/test/files/testSpecialNodes b/searchlib/src/test/files/testSpecialNodes
new file mode 100644
index 00000000000..149cc17f99b
--- /dev/null
+++ b/searchlib/src/test/files/testSpecialNodes
Binary files differ
diff --git a/searchlib/src/test/files/treenet01.model b/searchlib/src/test/files/treenet01.model
new file mode 100644
index 00000000000..6e9fda96f57
--- /dev/null
+++ b/searchlib/src/test/files/treenet01.model
@@ -0,0 +1,531 @@
+MODELBEGIN:
+
+/* Generated by DCP*/
+/* N trees: 10 */
+
+link TN0;
+pred = tnscore; /* predicted value for LABEL */
+
+
+/*********************/
+/* Model is complete */
+/*********************/
+
+return;
+
+
+TN0:
+
+tnscore = 0.0;
+
+/* Tree 1 of 10 */
+
+
+N0_1:
+ if attribute(b) < 0.65 then goto N0_2;
+ else goto N0_3;
+
+N0_2:
+ if attribute(c) < 0.55 then goto N0_4;
+ else goto N0_5;
+
+N0_4:
+ if attribute(a) < 0.55 then goto T0_8;
+ else goto T0_9;
+
+T0_8:
+ response = 0.369863;
+ goto D0;
+
+T0_9:
+ response = -0.6578947;
+ goto D0;
+
+N0_5:
+ if attribute(a) < 0.65 then goto T0_6;
+ else goto T0_7;
+
+T0_6:
+ response = -0.775;
+ goto D0;
+
+T0_7:
+ response = -1;
+ goto D0;
+
+N0_3:
+ if attribute(c) < 0.45 then goto T0_10;
+ else goto T0_11;
+
+T0_10:
+ response = -0.9090909;
+ goto D0;
+
+T0_11:
+ response = -1;
+ goto D0;
+
+D0:
+
+tnscore = tnscore + response;
+
+/* Tree 2 of 10 */
+
+
+N1_1:
+ if attribute(a) < 0.55 then goto N1_2;
+ else goto N1_3;
+
+N1_2:
+ if attribute(b) < 0.35 then goto N1_4;
+ else goto N1_5;
+
+N1_4:
+ if attribute(c) < 0.75 then goto T1_8;
+ else goto T1_9;
+
+T1_8:
+ response = 0.4327977;
+ goto D1;
+
+T1_9:
+ response = 0.025;
+ goto D1;
+
+N1_5:
+ if attribute(c) < 0.75 then goto T1_6;
+ else goto T1_7;
+
+T1_6:
+ response = -0.1090028;
+ goto D1;
+
+T1_7:
+ response = -0.07682927;
+ goto D1;
+
+N1_3:
+ if attribute(c) < 0.55 then goto T1_10;
+ else goto T1_11;
+
+T1_10:
+ response = -0.04031544;
+ goto D1;
+
+T1_11:
+ response = -0.01875;
+ goto D1;
+
+D1:
+
+tnscore = tnscore + response;
+
+/* Tree 3 of 10 */
+
+
+N2_1:
+ if attribute(b) < 0.35 then goto T2_2;
+ else goto N2_3;
+
+T2_2:
+ response = 0.06336273;
+ goto D2;
+
+N2_3:
+ if attribute(c) < 0.85 then goto N2_4;
+ else goto N2_5;
+
+N2_4:
+ if attribute(a) < 0.35 then goto N2_8;
+ else goto T2_9;
+
+N2_8:
+ if attribute(c) < 0.35 then goto T2_10;
+ else goto T2_11;
+
+T2_10:
+ response = 0.6091127;
+ goto D2;
+
+T2_11:
+ response = 0.02845135;
+ goto D2;
+
+T2_9:
+ response = -0.07638131;
+ goto D2;
+
+N2_5:
+ if attribute(a) < 0.75 then goto T2_6;
+ else goto T2_7;
+
+T2_6:
+ response = -0.018862;
+ goto D2;
+
+T2_7:
+ response = 0.01875;
+ goto D2;
+
+D2:
+
+tnscore = tnscore + response;
+
+/* Tree 4 of 10 */
+
+
+N3_1:
+ if attribute(c) < 0.15 then goto N3_2;
+ else goto N3_3;
+
+N3_2:
+ if attribute(b) < 0.55 then goto N3_4;
+ else goto T3_5;
+
+N3_4:
+ if attribute(a) < 0.35 then goto N3_6;
+ else goto T3_7;
+
+N3_6:
+ if attribute(b) < 0.3 then goto T3_8;
+ else goto T3_9;
+
+T3_8:
+ response = -1.866023;
+ goto D3;
+
+T3_9:
+ response = 0.1300271;
+ goto D3;
+
+T3_7:
+ response = 0.6299557;
+ goto D3;
+
+T3_5:
+ response = 0.1788445;
+ goto D3;
+
+N3_3:
+ if attribute(b) < 0.65 then goto T3_10;
+ else goto T3_11;
+
+T3_10:
+ response = -0.1586424;
+ goto D3;
+
+T3_11:
+ response = 0.06778581;
+ goto D3;
+
+D3:
+
+tnscore = tnscore + response;
+
+/* Tree 5 of 10 */
+
+
+N4_1:
+ if attribute(c) < 0.45 then goto N4_2;
+ else goto N4_3;
+
+N4_2:
+ if attribute(a) < 0.35 then goto N4_4;
+ else goto N4_5;
+
+N4_4:
+ if attribute(b) < 0.75 then goto T4_6;
+ else goto T4_7;
+
+T4_6:
+ response = 0.1426054;
+ goto D4;
+
+T4_7:
+ response = -0.2282;
+ goto D4;
+
+N4_5:
+ if attribute(b) < 0.85 then goto T4_8;
+ else goto T4_9;
+
+T4_8:
+ response = -0.09571452;
+ goto D4;
+
+T4_9:
+ response = -0.04941978;
+ goto D4;
+
+N4_3:
+ if attribute(a) < 0.25 then goto T4_10;
+ else goto T4_11;
+
+T4_10:
+ response = 0.2759441;
+ goto D4;
+
+T4_11:
+ response = 0.0172878;
+ goto D4;
+
+D4:
+
+tnscore = tnscore + response;
+
+/* Tree 6 of 10 */
+
+
+N5_1:
+ if attribute(a) < 0.15 then goto N5_2;
+ else goto N5_3;
+
+N5_2:
+ if attribute(b) < 0.75 then goto T5_4;
+ else goto T5_5;
+
+T5_4:
+ response = 0.3165435;
+ goto D5;
+
+T5_5:
+ response = -0.04458321;
+ goto D5;
+
+N5_3:
+ if attribute(a) < 0.55 then goto T5_6;
+ else goto N5_7;
+
+T5_6:
+ response = -0.1137117;
+ goto D5;
+
+N5_7:
+ if attribute(b) < 0.75 then goto T5_8;
+ else goto N5_9;
+
+T5_8:
+ response = 0.04622166;
+ goto D5;
+
+N5_9:
+ if attribute(c) < 0.65 then goto T5_10;
+ else goto T5_11;
+
+T5_10:
+ response = 0.004746275;
+ goto D5;
+
+T5_11:
+ response = -0.03648972;
+ goto D5;
+
+D5:
+
+tnscore = tnscore + response;
+
+/* Tree 7 of 10 */
+
+
+N6_1:
+ if attribute(a) < 0.95 then goto N6_2;
+ else goto T6_3;
+
+N6_2:
+ if attribute(b) < 0.25 then goto N6_4;
+ else goto T6_5;
+
+N6_4:
+ if attribute(c) < 0.25 then goto T6_6;
+ else goto N6_7;
+
+T6_6:
+ response = 0.7623822;
+ goto D6;
+
+N6_7:
+ if attribute(a) < 0.65 then goto T6_8;
+ else goto N6_9;
+
+T6_8:
+ response = 0.2338952;
+ goto D6;
+
+N6_9:
+ if attribute(c) < 0.85 then goto T6_10;
+ else goto T6_11;
+
+T6_10:
+ response = -0.06132011;
+ goto D6;
+
+T6_11:
+ response = 0.05052024;
+ goto D6;
+
+T6_5:
+ response = -0.04188744;
+ goto D6;
+
+T6_3:
+ response = -0.03245768;
+ goto D6;
+
+D6:
+
+tnscore = tnscore + response;
+
+/* Tree 8 of 10 */
+
+
+N7_1:
+ if attribute(c) < 0.55 then goto N7_2;
+ else goto N7_3;
+
+N7_2:
+ if attribute(b) < 0.65 then goto T7_4;
+ else goto T7_5;
+
+T7_4:
+ response = -0.2042442;
+ goto D7;
+
+T7_5:
+ response = 0.03887484;
+ goto D7;
+
+N7_3:
+ if attribute(b) < 0.25 then goto T7_6;
+ else goto N7_7;
+
+T7_6:
+ response = -0.0474437;
+ goto D7;
+
+N7_7:
+ if attribute(a) < 0.15 then goto T7_8;
+ else goto N7_9;
+
+T7_8:
+ response = -0.3700475;
+ goto D7;
+
+N7_9:
+ if attribute(a) < 0.65 then goto T7_10;
+ else goto T7_11;
+
+T7_10:
+ response = 0.07656199;
+ goto D7;
+
+T7_11:
+ response = 0.1085871;
+ goto D7;
+
+D7:
+
+tnscore = tnscore + response;
+
+/* Tree 9 of 10 */
+
+
+N8_1:
+ if attribute(a) < 0.75 then goto T8_2;
+ else goto N8_3;
+
+T8_2:
+ response = 0.0189638;
+ goto D8;
+
+N8_3:
+ if attribute(b) < 0.85 then goto T8_4;
+ else goto N8_5;
+
+T8_4:
+ response = 0.0001942833;
+ goto D8;
+
+N8_5:
+ if attribute(c) < 0.85 then goto N8_6;
+ else goto T8_7;
+
+N8_6:
+ if attribute(c) < 0.45 then goto T8_8;
+ else goto N8_9;
+
+T8_8:
+ response = -0.009795157;
+ goto D8;
+
+N8_9:
+ if attribute(a) < 0.85 then goto T8_10;
+ else goto T8_11;
+
+T8_10:
+ response = -0.01795083;
+ goto D8;
+
+T8_11:
+ response = -0.01329222;
+ goto D8;
+
+T8_7:
+ response = -0.1179778;
+ goto D8;
+
+D8:
+
+tnscore = tnscore + response;
+
+/* Tree 10 of 10 */
+
+
+N9_1:
+ if attribute(c) < 0.75 then goto N9_2;
+ else goto N9_3;
+
+N9_2:
+ if attribute(b) < 0.45 then goto N9_4;
+ else goto T9_5;
+
+N9_4:
+ if attribute(c) < 0.15 then goto T9_6;
+ else goto N9_7;
+
+T9_6:
+ response = -0.4551494;
+ goto D9;
+
+N9_7:
+ if attribute(c) < 0.65 then goto T9_8;
+ else goto T9_9;
+
+T9_8:
+ response = 0.1471968;
+ goto D9;
+
+T9_9:
+ response = -0.06380587;
+ goto D9;
+
+T9_5:
+ response = 0.03410008;
+ goto D9;
+
+N9_3:
+ if attribute(b) < 0.65 then goto T9_10;
+ else goto T9_11;
+
+T9_10:
+ response = -0.06397114;
+ goto D9;
+
+T9_11:
+ response = -0.01491517;
+ goto D9;
+
+D9:
+
+tnscore = tnscore + response;
+
+return;
diff --git a/searchlib/src/test/files/treenet02.model b/searchlib/src/test/files/treenet02.model
new file mode 100644
index 00000000000..3c7522a76d6
--- /dev/null
+++ b/searchlib/src/test/files/treenet02.model
@@ -0,0 +1,11784 @@
+MODELBEGIN:
+
+ /* N trees: 500 */
+
+link TN0;
+pred = tnscore; /* predicted value for GRADE */
+
+/*********************/
+/* Model is complete */
+/*********************/
+
+return;
+
+
+
+TN0:
+
+tnscore = 0.0;
+
+/* Tree 1 of 80 */
+N0_1:
+ if attribute(ythl) < 0.5000000000 then goto N0_2;
+ else goto N0_12;
+
+N0_2:
+ if age(created_at) < 1830.0000000000 then goto N0_3;
+ else goto N0_10;
+
+N0_3:
+ if term(0).significance < 0.9964904785 then goto N0_4;
+ else goto N0_5;
+
+N0_4:
+ if match < 0.6113165021 then goto T0_1;
+ else goto T0_2;
+
+T0_1:
+ response = -0.0284270267;
+ goto D0;
+
+T0_2:
+ response = 0.0003592783;
+ goto D0;
+
+N0_5:
+ if fieldMatch(text).significantOccurrence < 0.0488094985 then goto N0_6;
+ else goto N0_7;
+
+N0_6:
+ if attribute(user_friends_count) < 184.5000000000 then goto T0_3;
+ else goto T0_4;
+
+T0_3:
+ response = -0.0124428511;
+ goto D0;
+
+T0_4:
+ response = 0.0077143433;
+ goto D0;
+
+N0_7:
+ if term(0).significance < 0.9977675080 then goto T0_5;
+ else goto N0_8;
+
+T0_5:
+ response = -0.0390395696;
+ goto D0;
+
+N0_8:
+ if term(1).significance < 0.9895755053 then goto T0_6;
+ else goto N0_9;
+
+T0_6:
+ response = 0.0225917600;
+ goto D0;
+
+N0_9:
+ if fieldMatch(text).significantOccurrence < 0.1335410029 then goto T0_7;
+ else goto T0_8;
+
+T0_7:
+ response = -0.0167178600;
+ goto D0;
+
+T0_8:
+ response = -0.0425634221;
+ goto D0;
+
+N0_10:
+ if age(created_at) < 5400.0000000000 then goto T0_9;
+ else goto N0_11;
+
+T0_9:
+ response = -0.0466900690;
+ goto D0;
+
+N0_11:
+ if age(created_at) < 45000.0000000000 then goto T0_10;
+ else goto T0_11;
+
+T0_10:
+ response = -0.0257095410;
+ goto D0;
+
+T0_11:
+ response = -0.0128030420;
+ goto D0;
+
+N0_12:
+ if age(created_at) < 1830.0000000000 then goto N0_13;
+ else goto N0_16;
+
+N0_13:
+ if fieldMatch(text).importance < 0.7488905191 then goto N0_14;
+ else goto T0_15;
+
+N0_14:
+ if fieldMatch(text).absoluteOccurrence < 0.0116665000 then goto N0_15;
+ else goto T0_14;
+
+N0_15:
+ if fieldMatch(text) < 0.8492144942 then goto T0_12;
+ else goto T0_13;
+
+T0_12:
+ response = 0.0288744693;
+ goto D0;
+
+T0_13:
+ response = 0.0439309311;
+ goto D0;
+
+T0_14:
+ response = 0.0566558463;
+ goto D0;
+
+T0_15:
+ response = 0.0594293259;
+ goto D0;
+
+N0_16:
+ if age(created_at) < 5400.0000000000 then goto T0_16;
+ else goto N0_17;
+
+T0_16:
+ response = -0.0184100055;
+ goto D0;
+
+N0_17:
+ if age(created_at) < 27000.0000000000 then goto T0_17;
+ else goto T0_18;
+
+T0_17:
+ response = -0.0003458478;
+ goto D0;
+
+T0_18:
+ response = 0.0087464789;
+ goto D0;
+
+D0:
+
+tnscore = tnscore + response;
+
+/* Tree 2 of 80 */
+N1_1:
+ if attribute(ythl) < 0.5000000000 then goto N1_2;
+ else goto N1_12;
+
+N1_2:
+ if age(created_at) < 1830.0000000000 then goto N1_3;
+ else goto N1_10;
+
+N1_3:
+ if term(0).significance < 0.9964904785 then goto N1_4;
+ else goto N1_5;
+
+N1_4:
+ if term(0).significance < 0.9914690256 then goto T1_1;
+ else goto T1_2;
+
+T1_1:
+ response = -0.0118607453;
+ goto D1;
+
+T1_2:
+ response = 0.0031225791;
+ goto D1;
+
+N1_5:
+ if term(0).significance < 0.9977675080 then goto N1_6;
+ else goto N1_8;
+
+N1_6:
+ if term(1).significance < 0.9883980155 then goto T1_3;
+ else goto N1_7;
+
+T1_3:
+ response = -0.0803907557;
+ goto D1;
+
+N1_7:
+ if attribute(yst_link_array_size) < 0.0250600018 then goto T1_4;
+ else goto T1_5;
+
+T1_4:
+ response = -0.0303931857;
+ goto D1;
+
+T1_5:
+ response = 0.0259097321;
+ goto D1;
+
+N1_8:
+ if term(1).significance < 0.9972054958 then goto T1_6;
+ else goto N1_9;
+
+T1_6:
+ response = 0.0065438125;
+ goto D1;
+
+N1_9:
+ if term(1).significance < 0.9975290298 then goto T1_7;
+ else goto T1_8;
+
+T1_7:
+ response = -0.0913176725;
+ goto D1;
+
+T1_8:
+ response = -0.0123125115;
+ goto D1;
+
+N1_10:
+ if age(created_at) < 5400.0000000000 then goto T1_9;
+ else goto N1_11;
+
+T1_9:
+ response = -0.0448246506;
+ goto D1;
+
+N1_11:
+ if age(created_at) < 23400.0000000000 then goto T1_10;
+ else goto T1_11;
+
+T1_10:
+ response = -0.0262210797;
+ goto D1;
+
+T1_11:
+ response = -0.0146461827;
+ goto D1;
+
+N1_12:
+ if age(created_at) < 1830.0000000000 then goto N1_13;
+ else goto N1_15;
+
+N1_13:
+ if fieldMatch(text).importance < 0.7488645315 then goto N1_14;
+ else goto T1_14;
+
+N1_14:
+ if fieldMatch(text) < 0.8569909930 then goto T1_12;
+ else goto T1_13;
+
+T1_12:
+ response = 0.0307982478;
+ goto D1;
+
+T1_13:
+ response = 0.0469020946;
+ goto D1;
+
+T1_14:
+ response = 0.0558564997;
+ goto D1;
+
+N1_15:
+ if age(created_at) < 5400.0000000000 then goto T1_15;
+ else goto N1_16;
+
+T1_15:
+ response = -0.0166881751;
+ goto D1;
+
+N1_16:
+ if fieldMatch(text) < 0.3820354939 then goto T1_16;
+ else goto N1_17;
+
+T1_16:
+ response = -0.0035580609;
+ goto D1;
+
+N1_17:
+ if age(created_at) < 30600.0000000000 then goto T1_17;
+ else goto T1_18;
+
+T1_17:
+ response = 0.0028319521;
+ goto D1;
+
+T1_18:
+ response = 0.0104819912;
+ goto D1;
+
+D1:
+
+tnscore = tnscore + response;
+
+/* Tree 3 of 80 */
+N2_1:
+ if attribute(ythl) < 0.5000000000 then goto N2_2;
+ else goto N2_12;
+
+N2_2:
+ if age(created_at) < 1830.0000000000 then goto N2_3;
+ else goto N2_9;
+
+N2_3:
+ if fieldMatch(text).tail < 7.5000000000 then goto N2_4;
+ else goto N2_6;
+
+N2_4:
+ if match < 0.6697604656 then goto T2_1;
+ else goto N2_5;
+
+T2_1:
+ response = -0.0323866906;
+ goto D2;
+
+N2_5:
+ if term(0).significance < 0.9955350161 then goto T2_2;
+ else goto T2_3;
+
+T2_2:
+ response = -0.0025720554;
+ goto D2;
+
+T2_3:
+ response = -0.0170321274;
+ goto D2;
+
+N2_6:
+ if attribute(user_followers_count) < 489.5000000000 then goto N2_7;
+ else goto T2_7;
+
+N2_7:
+ if term(0).significance < 0.9964904785 then goto T2_4;
+ else goto N2_8;
+
+T2_4:
+ response = 0.0035465045;
+ goto D2;
+
+N2_8:
+ if term(0).significance < 0.9992840290 then goto T2_5;
+ else goto T2_6;
+
+T2_5:
+ response = -0.0205069971;
+ goto D2;
+
+T2_6:
+ response = 0.0010003389;
+ goto D2;
+
+T2_7:
+ response = 0.0149904595;
+ goto D2;
+
+N2_9:
+ if age(created_at) < 9000.0000000000 then goto N2_10;
+ else goto N2_11;
+
+N2_10:
+ if age(created_at) < 3570.0000000000 then goto T2_8;
+ else goto T2_9;
+
+T2_8:
+ response = -0.0501614448;
+ goto D2;
+
+T2_9:
+ response = -0.0347695722;
+ goto D2;
+
+N2_11:
+ if age(created_at) < 45000.0000000000 then goto T2_10;
+ else goto T2_11;
+
+T2_10:
+ response = -0.0217186612;
+ goto D2;
+
+T2_11:
+ response = -0.0115826893;
+ goto D2;
+
+N2_12:
+ if age(created_at) < 1830.0000000000 then goto N2_13;
+ else goto N2_15;
+
+N2_13:
+ if fieldMatch(text).occurrence < 0.1277174950 then goto N2_14;
+ else goto T2_14;
+
+N2_14:
+ if fieldMatch(text) < 0.8662694693 then goto T2_12;
+ else goto T2_13;
+
+T2_12:
+ response = 0.0270881826;
+ goto D2;
+
+T2_13:
+ response = 0.0422977189;
+ goto D2;
+
+T2_14:
+ response = 0.0471192106;
+ goto D2;
+
+N2_15:
+ if age(created_at) < 5400.0000000000 then goto N2_16;
+ else goto N2_17;
+
+N2_16:
+ if fieldMatch(text).importance < 0.6664260030 then goto T2_15;
+ else goto T2_16;
+
+T2_15:
+ response = -0.0232114640;
+ goto D2;
+
+T2_16:
+ response = -0.0105863112;
+ goto D2;
+
+N2_17:
+ if age(created_at) < 19800.0000000000 then goto T2_17;
+ else goto T2_18;
+
+T2_17:
+ response = -0.0011091805;
+ goto D2;
+
+T2_18:
+ response = 0.0079984015;
+ goto D2;
+
+D2:
+
+tnscore = tnscore + response;
+
+/* Tree 4 of 80 */
+N3_1:
+ if attribute(ythl) < 0.5000000000 then goto N3_2;
+ else goto N3_9;
+
+N3_2:
+ if age(created_at) < 1830.0000000000 then goto N3_3;
+ else goto N3_7;
+
+N3_3:
+ if term(1).significance < 0.8159549832 then goto T3_1;
+ else goto N3_4;
+
+T3_1:
+ response = 0.0342704034;
+ goto D3;
+
+N3_4:
+ if attribute(user_statuses_count) < 574.0000000000 then goto T3_2;
+ else goto N3_5;
+
+T3_2:
+ response = -0.0031829638;
+ goto D3;
+
+N3_5:
+ if fieldMatch(text).weightedOccurrence < 0.1091270000 then goto N3_6;
+ else goto T3_5;
+
+N3_6:
+ if fieldMatch(text) < 0.8472499847 then goto T3_3;
+ else goto T3_4;
+
+T3_3:
+ response = -0.0193605912;
+ goto D3;
+
+T3_4:
+ response = -0.0038142662;
+ goto D3;
+
+T3_5:
+ response = -0.0308342022;
+ goto D3;
+
+N3_7:
+ if age(created_at) < 5400.0000000000 then goto T3_6;
+ else goto N3_8;
+
+T3_6:
+ response = -0.0418216807;
+ goto D3;
+
+N3_8:
+ if age(created_at) < 48600.0000000000 then goto T3_7;
+ else goto T3_8;
+
+T3_7:
+ response = -0.0237625386;
+ goto D3;
+
+T3_8:
+ response = -0.0115288531;
+ goto D3;
+
+N3_9:
+ if age(created_at) < 1830.0000000000 then goto N3_10;
+ else goto N3_16;
+
+N3_10:
+ if fieldMatch(text).importance < 0.7488905191 then goto N3_11;
+ else goto T3_15;
+
+N3_11:
+ if term(0).significance < 0.9918209910 then goto N3_12;
+ else goto N3_14;
+
+N3_12:
+ if term(2).significance < 0.9838794470 then goto N3_13;
+ else goto T3_11;
+
+N3_13:
+ if fieldLength(text) < 23.5000000000 then goto T3_9;
+ else goto T3_10;
+
+T3_9:
+ response = 0.0091275797;
+ goto D3;
+
+T3_10:
+ response = -0.0681415824;
+ goto D3;
+
+T3_11:
+ response = 0.0280728758;
+ goto D3;
+
+N3_14:
+ if fieldTermMatch(text,0).firstPosition < 9.5000000000 then goto N3_15;
+ else goto T3_14;
+
+N3_15:
+ if attribute(user_followers_count) < 2165.5000000000 then goto T3_12;
+ else goto T3_13;
+
+T3_12:
+ response = 0.0378854321;
+ goto D3;
+
+T3_13:
+ response = 0.0555394610;
+ goto D3;
+
+T3_14:
+ response = 0.0261930857;
+ goto D3;
+
+T3_15:
+ response = 0.0496898680;
+ goto D3;
+
+N3_16:
+ if age(created_at) < 12600.0000000000 then goto N3_17;
+ else goto T3_18;
+
+N3_17:
+ if age(created_at) < 5400.0000000000 then goto T3_16;
+ else goto T3_17;
+
+T3_16:
+ response = -0.0136135545;
+ goto D3;
+
+T3_17:
+ response = -0.0029542657;
+ goto D3;
+
+T3_18:
+ response = 0.0066915734;
+ goto D3;
+
+D3:
+
+tnscore = tnscore + response;
+
+/* Tree 5 of 80 */
+N4_1:
+ if attribute(ythl) < 0.5000000000 then goto N4_2;
+ else goto N4_10;
+
+N4_2:
+ if age(created_at) < 1830.0000000000 then goto N4_3;
+ else goto N4_8;
+
+N4_3:
+ if term(1).significance < 0.7788045406 then goto T4_1;
+ else goto N4_4;
+
+T4_1:
+ response = 0.0678231236;
+ goto D4;
+
+N4_4:
+ if fieldMatch(text).significantOccurrence < 0.1249914989 then goto N4_5;
+ else goto T4_6;
+
+N4_5:
+ if attribute(user_statuses_count) < 103.5000000000 then goto T4_2;
+ else goto N4_6;
+
+T4_2:
+ response = 0.0047730322;
+ goto D4;
+
+N4_6:
+ if attribute(user_followers_count) < 3070.5000000000 then goto N4_7;
+ else goto T4_5;
+
+N4_7:
+ if fieldMatch(text).earliness < 0.8834840059 then goto T4_3;
+ else goto T4_4;
+
+T4_3:
+ response = -0.0130691877;
+ goto D4;
+
+T4_4:
+ response = 0.0030931972;
+ goto D4;
+
+T4_5:
+ response = 0.0212955094;
+ goto D4;
+
+T4_6:
+ response = -0.0250041155;
+ goto D4;
+
+N4_8:
+ if age(created_at) < 5400.0000000000 then goto T4_7;
+ else goto N4_9;
+
+T4_7:
+ response = -0.0386563137;
+ goto D4;
+
+N4_9:
+ if age(created_at) < 48600.0000000000 then goto T4_8;
+ else goto T4_9;
+
+T4_8:
+ response = -0.0213844929;
+ goto D4;
+
+T4_9:
+ response = -0.0116543752;
+ goto D4;
+
+N4_10:
+ if age(created_at) < 1830.0000000000 then goto N4_11;
+ else goto N4_15;
+
+N4_11:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N4_12;
+ else goto T4_14;
+
+N4_12:
+ if fieldMatch(text).earliness < 0.5683230162 then goto N4_13;
+ else goto N4_14;
+
+N4_13:
+ if fieldMatch(text).gapLength < 5.5000000000 then goto T4_10;
+ else goto T4_11;
+
+T4_10:
+ response = 0.0208840083;
+ goto D4;
+
+T4_11:
+ response = -0.0392353393;
+ goto D4;
+
+N4_14:
+ if term(0).significance < 0.9139549732 then goto T4_12;
+ else goto T4_13;
+
+T4_12:
+ response = -0.0599160780;
+ goto D4;
+
+T4_13:
+ response = 0.0359567192;
+ goto D4;
+
+T4_14:
+ response = 0.0417870117;
+ goto D4;
+
+N4_15:
+ if age(created_at) < 5400.0000000000 then goto T4_15;
+ else goto N4_16;
+
+T4_15:
+ response = -0.0124473711;
+ goto D4;
+
+N4_16:
+ if age(created_at) < 27000.0000000000 then goto T4_16;
+ else goto N4_17;
+
+T4_16:
+ response = -0.0003908889;
+ goto D4;
+
+N4_17:
+ if fieldMatch(text) < 0.5566675067 then goto T4_17;
+ else goto T4_18;
+
+T4_17:
+ response = 0.0009453270;
+ goto D4;
+
+T4_18:
+ response = 0.0097812185;
+ goto D4;
+
+D4:
+
+tnscore = tnscore + response;
+
+/* Tree 6 of 80 */
+N5_1:
+ if attribute(ythl) < 0.5000000000 then goto N5_2;
+ else goto N5_9;
+
+N5_2:
+ if age(created_at) < 1830.0000000000 then goto N5_3;
+ else goto N5_7;
+
+N5_3:
+ if term(1).significance < 0.8159549832 then goto N5_4;
+ else goto N5_5;
+
+N5_4:
+ if term(0).significance < 0.9873124957 then goto T5_1;
+ else goto T5_2;
+
+T5_1:
+ response = 0.0625267810;
+ goto D5;
+
+T5_2:
+ response = -0.0025881996;
+ goto D5;
+
+N5_5:
+ if fieldMatch(text).tail < 7.5000000000 then goto N5_6;
+ else goto T5_5;
+
+N5_6:
+ if attribute(user_statuses_count) < 504.0000000000 then goto T5_3;
+ else goto T5_4;
+
+T5_3:
+ response = -0.0072144471;
+ goto D5;
+
+T5_4:
+ response = -0.0184304751;
+ goto D5;
+
+T5_5:
+ response = -0.0041050691;
+ goto D5;
+
+N5_7:
+ if age(created_at) < 5400.0000000000 then goto T5_6;
+ else goto N5_8;
+
+T5_6:
+ response = -0.0342922301;
+ goto D5;
+
+N5_8:
+ if age(created_at) < 52200.0000000000 then goto T5_7;
+ else goto T5_8;
+
+T5_7:
+ response = -0.0213685384;
+ goto D5;
+
+T5_8:
+ response = -0.0114302758;
+ goto D5;
+
+N5_9:
+ if age(created_at) < 1830.0000000000 then goto N5_10;
+ else goto N5_16;
+
+N5_10:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N5_11;
+ else goto T5_15;
+
+N5_11:
+ if fieldMatch(text).tail < 7.5000000000 then goto N5_12;
+ else goto N5_15;
+
+N5_12:
+ if fieldMatch(text).significantOccurrence < 0.0574909970 then goto N5_13;
+ else goto N5_14;
+
+N5_13:
+ if term(0).significance < 0.9980279803 then goto T5_9;
+ else goto T5_10;
+
+T5_9:
+ response = -0.0131328933;
+ goto D5;
+
+T5_10:
+ response = 0.0192113014;
+ goto D5;
+
+N5_14:
+ if fieldMatch(text) < 0.8584204912 then goto T5_11;
+ else goto T5_12;
+
+T5_11:
+ response = 0.0314073419;
+ goto D5;
+
+T5_12:
+ response = -0.0026767115;
+ goto D5;
+
+N5_15:
+ if fieldMatch(text).fieldCompleteness < 0.0392310023 then goto T5_13;
+ else goto T5_14;
+
+T5_13:
+ response = -0.0016304919;
+ goto D5;
+
+T5_14:
+ response = 0.0347034740;
+ goto D5;
+
+T5_15:
+ response = 0.0373450153;
+ goto D5;
+
+N5_16:
+ if age(created_at) < 12600.0000000000 then goto N5_17;
+ else goto T5_18;
+
+N5_17:
+ if age(created_at) < 5400.0000000000 then goto T5_16;
+ else goto T5_17;
+
+T5_16:
+ response = -0.0106738218;
+ goto D5;
+
+T5_17:
+ response = -0.0029072167;
+ goto D5;
+
+T5_18:
+ response = 0.0056105069;
+ goto D5;
+
+D5:
+
+tnscore = tnscore + response;
+
+/* Tree 7 of 80 */
+N6_1:
+ if attribute(ythl) < 0.5000000000 then goto N6_2;
+ else goto N6_9;
+
+N6_2:
+ if age(created_at) < 1830.0000000000 then goto N6_3;
+ else goto N6_6;
+
+N6_3:
+ if fieldMatch(text).tail < 12.5000000000 then goto N6_4;
+ else goto T6_4;
+
+N6_4:
+ if attribute(user_statuses_count) < 826.0000000000 then goto T6_1;
+ else goto N6_5;
+
+T6_1:
+ response = -0.0058871349;
+ goto D6;
+
+N6_5:
+ if fieldMatch(text).earliness < 0.8774999976 then goto T6_2;
+ else goto T6_3;
+
+T6_2:
+ response = -0.0128456148;
+ goto D6;
+
+T6_3:
+ response = -0.0362508217;
+ goto D6;
+
+T6_4:
+ response = 0.0039172531;
+ goto D6;
+
+N6_6:
+ if age(created_at) < 5400.0000000000 then goto T6_5;
+ else goto N6_7;
+
+T6_5:
+ response = -0.0332743660;
+ goto D6;
+
+N6_7:
+ if age(created_at) < 48600.0000000000 then goto N6_8;
+ else goto T6_8;
+
+N6_8:
+ if fieldMatch(text) < 0.5479695201 then goto T6_6;
+ else goto T6_7;
+
+T6_6:
+ response = -0.0292307762;
+ goto D6;
+
+T6_7:
+ response = -0.0167816152;
+ goto D6;
+
+T6_8:
+ response = -0.0103426077;
+ goto D6;
+
+N6_9:
+ if age(created_at) < 1830.0000000000 then goto N6_10;
+ else goto N6_15;
+
+N6_10:
+ if fieldMatch(text).earliness < 0.6510869861 then goto N6_11;
+ else goto N6_13;
+
+N6_11:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N6_12;
+ else goto T6_11;
+
+N6_12:
+ if fieldMatch(text) < 0.2023105025 then goto T6_9;
+ else goto T6_10;
+
+T6_9:
+ response = -0.0093665406;
+ goto D6;
+
+T6_10:
+ response = 0.0265294786;
+ goto D6;
+
+T6_11:
+ response = -0.0163231950;
+ goto D6;
+
+N6_13:
+ if term(2).significance < 0.9791975021 then goto N6_14;
+ else goto T6_14;
+
+N6_14:
+ if attribute(yst_reply_auth) < 2.5000000000 then goto T6_12;
+ else goto T6_13;
+
+T6_12:
+ response = 0.0195383609;
+ goto D6;
+
+T6_13:
+ response = 0.0376308584;
+ goto D6;
+
+T6_14:
+ response = 0.0386834550;
+ goto D6;
+
+N6_15:
+ if age(created_at) < 12600.0000000000 then goto N6_16;
+ else goto T6_18;
+
+N6_16:
+ if term(4).significance < 0.9926320314 then goto N6_17;
+ else goto T6_17;
+
+N6_17:
+ if age(created_at) < 5400.0000000000 then goto T6_15;
+ else goto T6_16;
+
+T6_15:
+ response = -0.0119040624;
+ goto D6;
+
+T6_16:
+ response = -0.0033941367;
+ goto D6;
+
+T6_17:
+ response = 0.0037785770;
+ goto D6;
+
+T6_18:
+ response = 0.0059147929;
+ goto D6;
+
+D6:
+
+tnscore = tnscore + response;
+
+/* Tree 8 of 80 */
+N7_1:
+ if attribute(ythl) < 0.5000000000 then goto N7_2;
+ else goto N7_10;
+
+N7_2:
+ if age(created_at) < 1830.0000000000 then goto N7_3;
+ else goto N7_8;
+
+N7_3:
+ if fieldMatch(text).tail < 10.5000000000 then goto N7_4;
+ else goto T7_6;
+
+N7_4:
+ if fieldMatch(text).importance < 0.7443764806 then goto N7_5;
+ else goto N7_6;
+
+N7_5:
+ if match < 0.9134370089 then goto T7_1;
+ else goto T7_2;
+
+T7_1:
+ response = -0.0252698647;
+ goto D7;
+
+T7_2:
+ response = -0.0099637807;
+ goto D7;
+
+N7_6:
+ if term(2).significance < 0.7909680009 then goto T7_3;
+ else goto N7_7;
+
+T7_3:
+ response = -0.0344071695;
+ goto D7;
+
+N7_7:
+ if term(0).significance < 0.9947484732 then goto T7_4;
+ else goto T7_5;
+
+T7_4:
+ response = 0.0145760432;
+ goto D7;
+
+T7_5:
+ response = -0.0105169825;
+ goto D7;
+
+T7_6:
+ response = -0.0002567620;
+ goto D7;
+
+N7_8:
+ if age(created_at) < 12600.0000000000 then goto N7_9;
+ else goto T7_9;
+
+N7_9:
+ if age(created_at) < 5400.0000000000 then goto T7_7;
+ else goto T7_8;
+
+T7_7:
+ response = -0.0322518692;
+ goto D7;
+
+T7_8:
+ response = -0.0221817109;
+ goto D7;
+
+T7_9:
+ response = -0.0127554041;
+ goto D7;
+
+N7_10:
+ if age(created_at) < 1830.0000000000 then goto N7_11;
+ else goto N7_15;
+
+N7_11:
+ if fieldMatch(text).earliness < 0.7247474790 then goto N7_12;
+ else goto N7_14;
+
+N7_12:
+ if fieldTermMatch(text,1).firstPosition < 14.5000000000 then goto T7_10;
+ else goto N7_13;
+
+T7_10:
+ response = 0.0292128233;
+ goto D7;
+
+N7_13:
+ if fieldMatch(text) < 0.3254045248 then goto T7_11;
+ else goto T7_12;
+
+T7_11:
+ response = -0.0359002315;
+ goto D7;
+
+T7_12:
+ response = 0.0194921959;
+ goto D7;
+
+N7_14:
+ if attribute(user_followers_count) < 609.5000000000 then goto T7_13;
+ else goto T7_14;
+
+T7_13:
+ response = 0.0299861583;
+ goto D7;
+
+T7_14:
+ response = 0.0421930400;
+ goto D7;
+
+N7_15:
+ if age(created_at) < 5400.0000000000 then goto N7_16;
+ else goto N7_17;
+
+N7_16:
+ if fieldMatch(text).occurrence < 0.1731635034 then goto T7_15;
+ else goto T7_16;
+
+T7_15:
+ response = -0.0134935559;
+ goto D7;
+
+T7_16:
+ response = -0.0027367126;
+ goto D7;
+
+N7_17:
+ if term(1).significance < 0.9878399968 then goto T7_17;
+ else goto T7_18;
+
+T7_17:
+ response = 0.0121433273;
+ goto D7;
+
+T7_18:
+ response = 0.0020006783;
+ goto D7;
+
+D7:
+
+tnscore = tnscore + response;
+
+/* Tree 9 of 80 */
+N8_1:
+ if attribute(ythl) < 0.5000000000 then goto N8_2;
+ else goto N8_9;
+
+N8_2:
+ if age(created_at) < 1830.0000000000 then goto N8_3;
+ else goto N8_7;
+
+N8_3:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N8_4;
+ else goto N8_6;
+
+N8_4:
+ if fieldTermMatch(text,0).firstPosition < 14.5000000000 then goto N8_5;
+ else goto T8_3;
+
+N8_5:
+ if fieldMatch(text).importance < 0.7413114905 then goto T8_1;
+ else goto T8_2;
+
+T8_1:
+ response = -0.0091155042;
+ goto D8;
+
+T8_2:
+ response = 0.0233289393;
+ goto D8;
+
+T8_3:
+ response = -0.0239608468;
+ goto D8;
+
+N8_6:
+ if attribute(user_followers_count) < 2995.0000000000 then goto T8_4;
+ else goto T8_5;
+
+T8_4:
+ response = -0.0016973828;
+ goto D8;
+
+T8_5:
+ response = 0.0438873528;
+ goto D8;
+
+N8_7:
+ if age(created_at) < 12600.0000000000 then goto N8_8;
+ else goto T8_8;
+
+N8_8:
+ if age(created_at) < 3570.0000000000 then goto T8_6;
+ else goto T8_7;
+
+T8_6:
+ response = -0.0345805450;
+ goto D8;
+
+T8_7:
+ response = -0.0236031788;
+ goto D8;
+
+T8_8:
+ response = -0.0119280014;
+ goto D8;
+
+N8_9:
+ if age(created_at) < 1830.0000000000 then goto N8_10;
+ else goto N8_15;
+
+N8_10:
+ if fieldTermMatch(text,1).firstPosition < 14.5000000000 then goto T8_9;
+ else goto N8_11;
+
+T8_9:
+ response = 0.0323605063;
+ goto D8;
+
+N8_11:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N8_12;
+ else goto T8_14;
+
+N8_12:
+ if fieldTermMatch(text,0).firstPosition < 3.5000000000 then goto N8_13;
+ else goto N8_14;
+
+N8_13:
+ if attribute(user_followers_count) < 114.5000000000 then goto T8_10;
+ else goto T8_11;
+
+T8_10:
+ response = 0.0149219697;
+ goto D8;
+
+T8_11:
+ response = 0.0383892131;
+ goto D8;
+
+N8_14:
+ if fieldMatch(text) < 0.3404299915 then goto T8_12;
+ else goto T8_13;
+
+T8_12:
+ response = -0.0214082868;
+ goto D8;
+
+T8_13:
+ response = 0.0183146341;
+ goto D8;
+
+T8_14:
+ response = -0.0199916697;
+ goto D8;
+
+N8_15:
+ if age(created_at) < 9000.0000000000 then goto T8_15;
+ else goto N8_16;
+
+T8_15:
+ response = -0.0076472907;
+ goto D8;
+
+N8_16:
+ if fieldMatch(text) < 0.5607429743 then goto T8_16;
+ else goto N8_17;
+
+T8_16:
+ response = -0.0000169083;
+ goto D8;
+
+N8_17:
+ if term(1).significance < 0.8870275021 then goto T8_17;
+ else goto T8_18;
+
+T8_17:
+ response = 0.0275141633;
+ goto D8;
+
+T8_18:
+ response = 0.0058735097;
+ goto D8;
+
+D8:
+
+tnscore = tnscore + response;
+
+/* Tree 10 of 80 */
+N9_1:
+ if attribute(ythl) < 0.5000000000 then goto N9_2;
+ else goto N9_10;
+
+N9_2:
+ if age(created_at) < 1830.0000000000 then goto N9_3;
+ else goto N9_9;
+
+N9_3:
+ if term(0).significance < 0.9964904785 then goto N9_4;
+ else goto N9_5;
+
+N9_4:
+ if fieldMatch(text) < 0.1415009946 then goto T9_1;
+ else goto T9_2;
+
+T9_1:
+ response = -0.0331371143;
+ goto D9;
+
+T9_2:
+ response = 0.0016819061;
+ goto D9;
+
+N9_5:
+ if term(0).significance < 0.9974014759 then goto N9_6;
+ else goto N9_7;
+
+N9_6:
+ if term(1).significance < 0.9943025112 then goto T9_3;
+ else goto T9_4;
+
+T9_3:
+ response = -0.0561295193;
+ goto D9;
+
+T9_4:
+ response = -0.0143235877;
+ goto D9;
+
+N9_7:
+ if term(0).significance < 0.9975079894 then goto T9_5;
+ else goto N9_8;
+
+T9_5:
+ response = 0.0456376595;
+ goto D9;
+
+N9_8:
+ if term(0).significance < 0.9976614714 then goto T9_6;
+ else goto T9_7;
+
+T9_6:
+ response = -0.0617225433;
+ goto D9;
+
+T9_7:
+ response = -0.0089081592;
+ goto D9;
+
+N9_9:
+ if age(created_at) < 12600.0000000000 then goto T9_8;
+ else goto T9_9;
+
+T9_8:
+ response = -0.0250708949;
+ goto D9;
+
+T9_9:
+ response = -0.0120490174;
+ goto D9;
+
+N9_10:
+ if age(created_at) < 1830.0000000000 then goto N9_11;
+ else goto N9_15;
+
+N9_11:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N9_12;
+ else goto T9_14;
+
+N9_12:
+ if fieldMatch(text).significantOccurrence < 0.0422540009 then goto T9_10;
+ else goto N9_13;
+
+T9_10:
+ response = 0.0132403332;
+ goto D9;
+
+N9_13:
+ if fieldMatch(text).importance < 0.7485179901 then goto N9_14;
+ else goto T9_13;
+
+N9_14:
+ if fieldMatch(text).tail < 10.5000000000 then goto T9_11;
+ else goto T9_12;
+
+T9_11:
+ response = 0.0224059642;
+ goto D9;
+
+T9_12:
+ response = 0.0317363105;
+ goto D9;
+
+T9_13:
+ response = 0.0363809447;
+ goto D9;
+
+T9_14:
+ response = -0.0059409077;
+ goto D9;
+
+N9_15:
+ if age(created_at) < 12600.0000000000 then goto T9_15;
+ else goto N9_16;
+
+T9_15:
+ response = -0.0059533220;
+ goto D9;
+
+N9_16:
+ if match < 0.7504960299 then goto N9_17;
+ else goto T9_18;
+
+N9_17:
+ if fieldMatch(text).occurrence < 0.1318840086 then goto T9_16;
+ else goto T9_17;
+
+T9_16:
+ response = -0.0092412181;
+ goto D9;
+
+T9_17:
+ response = 0.0036779089;
+ goto D9;
+
+T9_18:
+ response = 0.0067221979;
+ goto D9;
+
+D9:
+
+tnscore = tnscore + response;
+
+/* Tree 11 of 80 */
+N10_1:
+ if attribute(ythl) < 0.5000000000 then goto N10_2;
+ else goto N10_11;
+
+N10_2:
+ if age(created_at) < 1770.0000000000 then goto N10_3;
+ else goto N10_9;
+
+N10_3:
+ if term(0).significance < 0.9964904785 then goto N10_4;
+ else goto N10_7;
+
+N10_4:
+ if fieldMatch(text) < 0.2731105089 then goto T10_1;
+ else goto N10_5;
+
+T10_1:
+ response = -0.0169751683;
+ goto D10;
+
+N10_5:
+ if attribute(yst_reply_auth) < 16.5000000000 then goto T10_2;
+ else goto N10_6;
+
+T10_2:
+ response = 0.0106972872;
+ goto D10;
+
+N10_6:
+ if term(1).significance < 0.8159549832 then goto T10_3;
+ else goto T10_4;
+
+T10_3:
+ response = 0.0454901055;
+ goto D10;
+
+T10_4:
+ response = -0.0067703435;
+ goto D10;
+
+N10_7:
+ if term(0).significance < 0.9966599941 then goto T10_5;
+ else goto N10_8;
+
+T10_5:
+ response = -0.0415369371;
+ goto D10;
+
+N10_8:
+ if match.totalWeight < 250.0000000000 then goto T10_6;
+ else goto T10_7;
+
+T10_6:
+ response = -0.0080140966;
+ goto D10;
+
+T10_7:
+ response = 0.0488608858;
+ goto D10;
+
+N10_9:
+ if age(created_at) < 12600.0000000000 then goto T10_8;
+ else goto N10_10;
+
+T10_8:
+ response = -0.0229133495;
+ goto D10;
+
+N10_10:
+ if fieldMatch(text).absoluteOccurrence < 0.0122500006 then goto T10_9;
+ else goto T10_10;
+
+T10_9:
+ response = -0.0125676511;
+ goto D10;
+
+T10_10:
+ response = 0.0002328845;
+ goto D10;
+
+N10_11:
+ if age(created_at) < 1830.0000000000 then goto N10_12;
+ else goto N10_15;
+
+N10_12:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N10_13;
+ else goto T10_14;
+
+N10_13:
+ if term(0).significance < 0.9184160233 then goto T10_11;
+ else goto N10_14;
+
+T10_11:
+ response = -0.0549048781;
+ goto D10;
+
+N10_14:
+ if fieldLength(text) < 23.5000000000 then goto T10_12;
+ else goto T10_13;
+
+T10_12:
+ response = 0.0226244877;
+ goto D10;
+
+T10_13:
+ response = 0.0077881056;
+ goto D10;
+
+T10_14:
+ response = 0.0280730521;
+ goto D10;
+
+N10_15:
+ if age(created_at) < 9000.0000000000 then goto N10_16;
+ else goto N10_17;
+
+N10_16:
+ if fieldMatch(text) < 0.3730605245 then goto T10_15;
+ else goto T10_16;
+
+T10_15:
+ response = -0.0167181189;
+ goto D10;
+
+T10_16:
+ response = -0.0044234172;
+ goto D10;
+
+N10_17:
+ if fieldMatch(text) < 0.5543889999 then goto T10_17;
+ else goto T10_18;
+
+T10_17:
+ response = -0.0008470900;
+ goto D10;
+
+T10_18:
+ response = 0.0055458527;
+ goto D10;
+
+D10:
+
+tnscore = tnscore + response;
+
+/* Tree 12 of 80 */
+N11_1:
+ if attribute(ythl) < 0.5000000000 then goto N11_2;
+ else goto N11_9;
+
+N11_2:
+ if age(created_at) < 1770.0000000000 then goto N11_3;
+ else goto N11_8;
+
+N11_3:
+ if match.totalWeight < 250.0000000000 then goto N11_4;
+ else goto T11_6;
+
+N11_4:
+ if fieldTermMatch(text,1).firstPosition < 13.5000000000 then goto N11_5;
+ else goto N11_7;
+
+N11_5:
+ if term(0).significance < 0.9701889753 then goto T11_1;
+ else goto N11_6;
+
+T11_1:
+ response = -0.0195353072;
+ goto D11;
+
+N11_6:
+ if term(0).significance < 0.9965775013 then goto T11_2;
+ else goto T11_3;
+
+T11_2:
+ response = 0.0073931107;
+ goto D11;
+
+T11_3:
+ response = -0.0074860039;
+ goto D11;
+
+N11_7:
+ if fieldMatch(text) < 0.6285369992 then goto T11_4;
+ else goto T11_5;
+
+T11_4:
+ response = -0.0322505986;
+ goto D11;
+
+T11_5:
+ response = -0.0073317181;
+ goto D11;
+
+T11_6:
+ response = 0.0451330307;
+ goto D11;
+
+N11_8:
+ if age(created_at) < 45000.0000000000 then goto T11_7;
+ else goto T11_8;
+
+T11_7:
+ response = -0.0206455453;
+ goto D11;
+
+T11_8:
+ response = -0.0085888986;
+ goto D11;
+
+N11_9:
+ if age(created_at) < 1830.0000000000 then goto N11_10;
+ else goto N11_14;
+
+N11_10:
+ if fieldTermMatch(text,1).firstPosition < 14.5000000000 then goto T11_9;
+ else goto N11_11;
+
+T11_9:
+ response = 0.0265081733;
+ goto D11;
+
+N11_11:
+ if fieldMatch(text) < 0.3104079962 then goto T11_10;
+ else goto N11_12;
+
+T11_10:
+ response = -0.0291219391;
+ goto D11;
+
+N11_12:
+ if attribute(yst_tweet_language) < 3271.5000000000 then goto N11_13;
+ else goto T11_13;
+
+N11_13:
+ if attribute(user_friends_count) < 146.5000000000 then goto T11_11;
+ else goto T11_12;
+
+T11_11:
+ response = 0.0133927786;
+ goto D11;
+
+T11_12:
+ response = 0.0247206105;
+ goto D11;
+
+T11_13:
+ response = -0.0249098053;
+ goto D11;
+
+N11_14:
+ if age(created_at) < 12600.0000000000 then goto N11_15;
+ else goto N11_17;
+
+N11_15:
+ if fieldTermMatch(text,0).firstPosition < 1.5000000000 then goto T11_14;
+ else goto N11_16;
+
+T11_14:
+ response = 0.0013211001;
+ goto D11;
+
+N11_16:
+ if fieldMatch(text).importance < 0.6664245129 then goto T11_15;
+ else goto T11_16;
+
+T11_15:
+ response = -0.0124234916;
+ goto D11;
+
+T11_16:
+ response = -0.0044820648;
+ goto D11;
+
+N11_17:
+ if fieldMatch(text).significantOccurrence < 0.0555050001 then goto T11_17;
+ else goto T11_18;
+
+T11_17:
+ response = 0.0006152863;
+ goto D11;
+
+T11_18:
+ response = 0.0069791274;
+ goto D11;
+
+D11:
+
+tnscore = tnscore + response;
+
+/* Tree 13 of 80 */
+N12_1:
+ if attribute(ythl) < 0.5000000000 then goto N12_2;
+ else goto N12_9;
+
+N12_2:
+ if age(created_at) < 1830.0000000000 then goto N12_3;
+ else goto N12_8;
+
+N12_3:
+ if term(1).significance < 0.7788045406 then goto T12_1;
+ else goto N12_4;
+
+T12_1:
+ response = 0.0442178195;
+ goto D12;
+
+N12_4:
+ if term(0).significance < 0.9492504597 then goto T12_2;
+ else goto N12_5;
+
+T12_2:
+ response = -0.0249224413;
+ goto D12;
+
+N12_5:
+ if fieldMatch(text).tail < 11.5000000000 then goto N12_6;
+ else goto T12_6;
+
+N12_6:
+ if fieldMatch(text).importance < 0.7480239868 then goto T12_3;
+ else goto N12_7;
+
+T12_3:
+ response = -0.0099065850;
+ goto D12;
+
+N12_7:
+ if term(0).significance < 0.9947484732 then goto T12_4;
+ else goto T12_5;
+
+T12_4:
+ response = 0.0121048215;
+ goto D12;
+
+T12_5:
+ response = -0.0132930884;
+ goto D12;
+
+T12_6:
+ response = 0.0006079666;
+ goto D12;
+
+N12_8:
+ if age(created_at) < 9000.0000000000 then goto T12_7;
+ else goto T12_8;
+
+T12_7:
+ response = -0.0219397199;
+ goto D12;
+
+T12_8:
+ response = -0.0106952111;
+ goto D12;
+
+N12_9:
+ if age(created_at) < 1830.0000000000 then goto N12_10;
+ else goto N12_14;
+
+N12_10:
+ if fieldMatch(text).absoluteOccurrence < 0.0136665003 then goto N12_11;
+ else goto T12_13;
+
+N12_11:
+ if fieldMatch(text).importance < 0.7488800287 then goto N12_12;
+ else goto T12_12;
+
+N12_12:
+ if fieldTermMatch(text,0).firstPosition < 6.5000000000 then goto N12_13;
+ else goto T12_11;
+
+N12_13:
+ if attribute(user_followers_count) < 866.5000000000 then goto T12_9;
+ else goto T12_10;
+
+T12_9:
+ response = 0.0181298105;
+ goto D12;
+
+T12_10:
+ response = 0.0303594396;
+ goto D12;
+
+T12_11:
+ response = 0.0126963345;
+ goto D12;
+
+T12_12:
+ response = 0.0285788280;
+ goto D12;
+
+T12_13:
+ response = 0.0333028419;
+ goto D12;
+
+N12_14:
+ if age(created_at) < 27000.0000000000 then goto N12_15;
+ else goto N12_17;
+
+N12_15:
+ if fieldTermMatch(text,3).firstPosition < 7.5000000000 then goto T12_14;
+ else goto N12_16;
+
+T12_14:
+ response = 0.0067345611;
+ goto D12;
+
+N12_16:
+ if fieldTermMatch(text,0).firstPosition < 4.5000000000 then goto T12_15;
+ else goto T12_16;
+
+T12_15:
+ response = -0.0013179334;
+ goto D12;
+
+T12_16:
+ response = -0.0081428248;
+ goto D12;
+
+N12_17:
+ if fieldMatch(text) < 0.5568180084 then goto T12_17;
+ else goto T12_18;
+
+T12_17:
+ response = 0.0004597678;
+ goto D12;
+
+T12_18:
+ response = 0.0093837881;
+ goto D12;
+
+D12:
+
+tnscore = tnscore + response;
+
+/* Tree 14 of 80 */
+N13_1:
+ if attribute(ythl) < 0.5000000000 then goto N13_2;
+ else goto N13_10;
+
+N13_2:
+ if age(created_at) < 1830.0000000000 then goto N13_3;
+ else goto N13_7;
+
+N13_3:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N13_4;
+ else goto T13_5;
+
+N13_4:
+ if fieldMatch(text).importance < 0.4998250008 then goto N13_5;
+ else goto N13_6;
+
+N13_5:
+ if term(0).significance < 0.9983664751 then goto T13_1;
+ else goto T13_2;
+
+T13_1:
+ response = -0.0058107338;
+ goto D13;
+
+T13_2:
+ response = 0.0245069566;
+ goto D13;
+
+N13_6:
+ if fieldMatch(text).importance < 0.4998745024 then goto T13_3;
+ else goto T13_4;
+
+T13_3:
+ response = -0.0308383904;
+ goto D13;
+
+T13_4:
+ response = -0.0106009672;
+ goto D13;
+
+T13_5:
+ response = 0.0010971200;
+ goto D13;
+
+N13_7:
+ if age(created_at) < 45000.0000000000 then goto N13_8;
+ else goto T13_9;
+
+N13_8:
+ if fieldMatch(text).weightedOccurrence < 0.0912880003 then goto N13_9;
+ else goto T13_8;
+
+N13_9:
+ if age(created_at) < 3570.0000000000 then goto T13_6;
+ else goto T13_7;
+
+T13_6:
+ response = -0.0267460073;
+ goto D13;
+
+T13_7:
+ response = -0.0152835256;
+ goto D13;
+
+T13_8:
+ response = -0.0298858389;
+ goto D13;
+
+T13_9:
+ response = -0.0088562145;
+ goto D13;
+
+N13_10:
+ if age(created_at) < 1830.0000000000 then goto N13_11;
+ else goto N13_17;
+
+N13_11:
+ if fieldMatch(text).importance < 0.4989734888 then goto N13_12;
+ else goto N13_13;
+
+N13_12:
+ if fieldMatch(text).importance < 0.4988874793 then goto T13_10;
+ else goto T13_11;
+
+T13_10:
+ response = 0.0045636472;
+ goto D13;
+
+T13_11:
+ response = -0.1210997623;
+ goto D13;
+
+N13_13:
+ if fieldMatch(text) < 0.3135755062 then goto N13_14;
+ else goto N13_16;
+
+N13_14:
+ if term(1).significance < 0.9852235317 then goto N13_15;
+ else goto T13_14;
+
+N13_15:
+ if term(0).significance < 0.9929184914 then goto T13_12;
+ else goto T13_13;
+
+T13_12:
+ response = -0.0444011152;
+ goto D13;
+
+T13_13:
+ response = 0.0156709024;
+ goto D13;
+
+T13_14:
+ response = 0.0178486139;
+ goto D13;
+
+N13_16:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto T13_15;
+ else goto T13_16;
+
+T13_15:
+ response = 0.0236557227;
+ goto D13;
+
+T13_16:
+ response = -0.0055893686;
+ goto D13;
+
+N13_17:
+ if age(created_at) < 12600.0000000000 then goto T13_17;
+ else goto T13_18;
+
+T13_17:
+ response = -0.0039404999;
+ goto D13;
+
+T13_18:
+ response = 0.0040376803;
+ goto D13;
+
+D13:
+
+tnscore = tnscore + response;
+
+/* Tree 15 of 80 */
+N14_1:
+ if attribute(ythl) < 0.5000000000 then goto N14_2;
+ else goto N14_12;
+
+N14_2:
+ if age(created_at) < 1770.0000000000 then goto N14_3;
+ else goto N14_10;
+
+N14_3:
+ if term(0).significance < 0.9964904785 then goto N14_4;
+ else goto N14_5;
+
+N14_4:
+ if fieldMatch(text).gapLength < 3.5000000000 then goto T14_1;
+ else goto T14_2;
+
+T14_1:
+ response = 0.0034967960;
+ goto D14;
+
+T14_2:
+ response = -0.0136111988;
+ goto D14;
+
+N14_5:
+ if term(0).significance < 0.9975960255 then goto N14_6;
+ else goto N14_7;
+
+N14_6:
+ if term(1).significance < 0.9943025112 then goto T14_3;
+ else goto T14_4;
+
+T14_3:
+ response = -0.0474034255;
+ goto D14;
+
+T14_4:
+ response = -0.0139986631;
+ goto D14;
+
+N14_7:
+ if fieldMatch(text).importance < 0.6665844917 then goto T14_5;
+ else goto N14_8;
+
+T14_5:
+ response = -0.0083848009;
+ goto D14;
+
+N14_8:
+ if term(0).significance < 0.9992945194 then goto N14_9;
+ else goto T14_8;
+
+N14_9:
+ if term(1).significance < 0.9993695021 then goto T14_6;
+ else goto T14_7;
+
+T14_6:
+ response = 0.0095761689;
+ goto D14;
+
+T14_7:
+ response = 0.0714217668;
+ goto D14;
+
+T14_8:
+ response = -0.0141420420;
+ goto D14;
+
+N14_10:
+ if age(created_at) < 5400.0000000000 then goto T14_9;
+ else goto N14_11;
+
+T14_9:
+ response = -0.0204021576;
+ goto D14;
+
+N14_11:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto T14_10;
+ else goto T14_11;
+
+T14_10:
+ response = -0.0152195185;
+ goto D14;
+
+T14_11:
+ response = -0.0074091603;
+ goto D14;
+
+N14_12:
+ if age(created_at) < 1830.0000000000 then goto N14_13;
+ else goto N14_15;
+
+N14_13:
+ if fieldMatch(text).occurrence < 0.1277174950 then goto N14_14;
+ else goto T14_14;
+
+N14_14:
+ if fieldMatch(text) < 0.8451825380 then goto T14_12;
+ else goto T14_13;
+
+T14_12:
+ response = 0.0072816766;
+ goto D14;
+
+T14_13:
+ response = 0.0185451686;
+ goto D14;
+
+T14_14:
+ response = 0.0243676179;
+ goto D14;
+
+N14_15:
+ if fieldMatch(text).importance < 0.6664544940 then goto N14_16;
+ else goto N14_17;
+
+N14_16:
+ if age(created_at) < 5400.0000000000 then goto T14_15;
+ else goto T14_16;
+
+T14_15:
+ response = -0.0113001116;
+ goto D14;
+
+T14_16:
+ response = -0.0020866841;
+ goto D14;
+
+N14_17:
+ if age(created_at) < 30600.0000000000 then goto T14_17;
+ else goto T14_18;
+
+T14_17:
+ response = -0.0002226823;
+ goto D14;
+
+T14_18:
+ response = 0.0054407552;
+ goto D14;
+
+D14:
+
+tnscore = tnscore + response;
+
+/* Tree 16 of 80 */
+N15_1:
+ if attribute(ythl) < 0.5000000000 then goto N15_2;
+ else goto N15_9;
+
+N15_2:
+ if age(created_at) < 1770.0000000000 then goto N15_3;
+ else goto N15_7;
+
+N15_3:
+ if attribute(user_friends_count) < 1202.5000000000 then goto N15_4;
+ else goto T15_5;
+
+N15_4:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N15_5;
+ else goto T15_4;
+
+N15_5:
+ if attribute(user_statuses_count) < 491.5000000000 then goto N15_6;
+ else goto T15_3;
+
+N15_6:
+ if attribute(user_followers_count) < 39.5000000000 then goto T15_1;
+ else goto T15_2;
+
+T15_1:
+ response = -0.0053604202;
+ goto D15;
+
+T15_2:
+ response = 0.0112837612;
+ goto D15;
+
+T15_3:
+ response = -0.0076658014;
+ goto D15;
+
+T15_4:
+ response = -0.0344819911;
+ goto D15;
+
+T15_5:
+ response = 0.0152860620;
+ goto D15;
+
+N15_7:
+ if age(created_at) < 52200.0000000000 then goto N15_8;
+ else goto T15_8;
+
+N15_8:
+ if fieldMatch(text).importance < 0.6658334732 then goto T15_6;
+ else goto T15_7;
+
+T15_6:
+ response = -0.0236404883;
+ goto D15;
+
+T15_7:
+ response = -0.0155495401;
+ goto D15;
+
+T15_8:
+ response = -0.0063627489;
+ goto D15;
+
+N15_9:
+ if age(created_at) < 1830.0000000000 then goto N15_10;
+ else goto N15_14;
+
+N15_10:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N15_11;
+ else goto T15_13;
+
+N15_11:
+ if fieldMatch(text).significantOccurrence < 0.0377494991 then goto T15_9;
+ else goto N15_12;
+
+T15_9:
+ response = -0.0247545653;
+ goto D15;
+
+N15_12:
+ if term(0).significance < 0.9914690256 then goto N15_13;
+ else goto T15_12;
+
+N15_13:
+ if term(0).significance < 0.9911389947 then goto T15_10;
+ else goto T15_11;
+
+T15_10:
+ response = 0.0074545408;
+ goto D15;
+
+T15_11:
+ response = -0.0712173039;
+ goto D15;
+
+T15_12:
+ response = 0.0174505123;
+ goto D15;
+
+T15_13:
+ response = 0.0227466857;
+ goto D15;
+
+N15_14:
+ if fieldMatch(text).importance < 0.6664484739 then goto N15_15;
+ else goto N15_16;
+
+N15_15:
+ if fieldMatch(text) < 0.3570200205 then goto T15_14;
+ else goto T15_15;
+
+T15_14:
+ response = -0.0164480209;
+ goto D15;
+
+T15_15:
+ response = -0.0029063778;
+ goto D15;
+
+N15_16:
+ if age(created_at) < 30600.0000000000 then goto T15_16;
+ else goto N15_17;
+
+T15_16:
+ response = -0.0008955043;
+ goto D15;
+
+N15_17:
+ if fieldLength(text) < 22.5000000000 then goto T15_17;
+ else goto T15_18;
+
+T15_17:
+ response = 0.0066513594;
+ goto D15;
+
+T15_18:
+ response = -0.0017231871;
+ goto D15;
+
+D15:
+
+tnscore = tnscore + response;
+
+/* Tree 17 of 80 */
+N16_1:
+ if attribute(ythl) < 0.5000000000 then goto N16_2;
+ else goto N16_10;
+
+N16_2:
+ if age(created_at) < 1830.0000000000 then goto N16_3;
+ else goto N16_8;
+
+N16_3:
+ if term(1).significance < 0.8159549832 then goto T16_1;
+ else goto N16_4;
+
+T16_1:
+ response = 0.0266703268;
+ goto D16;
+
+N16_4:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N16_5;
+ else goto T16_6;
+
+N16_5:
+ if match < 0.6756634712 then goto T16_2;
+ else goto N16_6;
+
+T16_2:
+ response = -0.0178612015;
+ goto D16;
+
+N16_6:
+ if fieldMatch(text).weightedOccurrence < 0.1012820005 then goto N16_7;
+ else goto T16_5;
+
+N16_7:
+ if fieldMatch(text).earliness < 0.8834840059 then goto T16_3;
+ else goto T16_4;
+
+T16_3:
+ response = -0.0031504958;
+ goto D16;
+
+T16_4:
+ response = 0.0108290236;
+ goto D16;
+
+T16_5:
+ response = -0.0132400721;
+ goto D16;
+
+T16_6:
+ response = -0.0294468679;
+ goto D16;
+
+N16_8:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto T16_7;
+ else goto N16_9;
+
+T16_7:
+ response = -0.0183944645;
+ goto D16;
+
+N16_9:
+ if age(created_at) < 45000.0000000000 then goto T16_8;
+ else goto T16_9;
+
+T16_8:
+ response = -0.0132570328;
+ goto D16;
+
+T16_9:
+ response = -0.0044394895;
+ goto D16;
+
+N16_10:
+ if age(created_at) < 1830.0000000000 then goto N16_11;
+ else goto N16_15;
+
+N16_11:
+ if fieldMatch(text).occurrence < 0.1225000024 then goto N16_12;
+ else goto N16_14;
+
+N16_12:
+ if fieldMatch(text).head < 9.5000000000 then goto N16_13;
+ else goto T16_12;
+
+N16_13:
+ if term(0).significance < 0.9731230140 then goto T16_10;
+ else goto T16_11;
+
+T16_10:
+ response = -0.0399055768;
+ goto D16;
+
+T16_11:
+ response = 0.0171512303;
+ goto D16;
+
+T16_12:
+ response = 0.0047044679;
+ goto D16;
+
+N16_14:
+ if attribute(user_statuses_count) < 6.5000000000 then goto T16_13;
+ else goto T16_14;
+
+T16_13:
+ response = -0.0303006102;
+ goto D16;
+
+T16_14:
+ response = 0.0230432421;
+ goto D16;
+
+N16_15:
+ if fieldMatch(text).importance < 0.6664534807 then goto T16_15;
+ else goto N16_16;
+
+T16_15:
+ response = -0.0054238462;
+ goto D16;
+
+N16_16:
+ if term(2).significance < 0.9981180429 then goto N16_17;
+ else goto T16_18;
+
+N16_17:
+ if fieldMatch(text).completeness < 0.9590100050 then goto T16_16;
+ else goto T16_17;
+
+T16_16:
+ response = -0.0041171766;
+ goto D16;
+
+T16_17:
+ response = 0.0032716696;
+ goto D16;
+
+T16_18:
+ response = 0.0055362698;
+ goto D16;
+
+D16:
+
+tnscore = tnscore + response;
+
+/* Tree 18 of 80 */
+N17_1:
+ if attribute(ythl) < 0.5000000000 then goto N17_2;
+ else goto N17_8;
+
+N17_2:
+ if age(created_at) < 1830.0000000000 then goto N17_3;
+ else goto N17_7;
+
+N17_3:
+ if fieldMatch(text).tail < 16.5000000000 then goto N17_4;
+ else goto T17_5;
+
+N17_4:
+ if term(2).significance < 0.9960604906 then goto T17_1;
+ else goto N17_5;
+
+T17_1:
+ response = -0.0087614790;
+ goto D17;
+
+N17_5:
+ if fieldMatch(text).occurrence < 0.1225000024 then goto T17_2;
+ else goto N17_6;
+
+T17_2:
+ response = -0.0067609181;
+ goto D17;
+
+N17_6:
+ if term(1).significance < 0.9832755327 then goto T17_3;
+ else goto T17_4;
+
+T17_3:
+ response = 0.0282354539;
+ goto D17;
+
+T17_4:
+ response = 0.0025908270;
+ goto D17;
+
+T17_5:
+ response = 0.0073141014;
+ goto D17;
+
+N17_7:
+ if age(created_at) < 45000.0000000000 then goto T17_6;
+ else goto T17_7;
+
+T17_6:
+ response = -0.0156556128;
+ goto D17;
+
+T17_7:
+ response = -0.0071654687;
+ goto D17;
+
+N17_8:
+ if age(created_at) < 1830.0000000000 then goto N17_9;
+ else goto N17_14;
+
+N17_9:
+ if fieldMatch(text).occurrence < 0.0754984990 then goto T17_8;
+ else goto N17_10;
+
+T17_8:
+ response = 0.0073343160;
+ goto D17;
+
+N17_10:
+ if fieldMatch(text) < 0.3130764961 then goto N17_11;
+ else goto N17_12;
+
+N17_11:
+ if term(1).significance < 0.9978075027 then goto T17_9;
+ else goto T17_10;
+
+T17_9:
+ response = -0.0014603640;
+ goto D17;
+
+T17_10:
+ response = 0.0291628398;
+ goto D17;
+
+N17_12:
+ if attribute(user_statuses_count) < 29.5000000000 then goto T17_11;
+ else goto N17_13;
+
+T17_11:
+ response = -0.0055009180;
+ goto D17;
+
+N17_13:
+ if attribute(yst_reply_auth) < 476.0000000000 then goto T17_12;
+ else goto T17_13;
+
+T17_12:
+ response = 0.0209690045;
+ goto D17;
+
+T17_13:
+ response = -0.0105504498;
+ goto D17;
+
+N17_14:
+ if age(created_at) < 30600.0000000000 then goto N17_15;
+ else goto T17_18;
+
+N17_15:
+ if fieldTermMatch(text,1).occurrences < 1.5000000000 then goto N17_16;
+ else goto T17_17;
+
+N17_16:
+ if fieldMatch(text) < 0.4531754851 then goto T17_14;
+ else goto N17_17;
+
+T17_14:
+ response = -0.0106616269;
+ goto D17;
+
+N17_17:
+ if fieldMatch(text).importance < 0.4999470115 then goto T17_15;
+ else goto T17_16;
+
+T17_15:
+ response = -0.0131472535;
+ goto D17;
+
+T17_16:
+ response = -0.0008043613;
+ goto D17;
+
+T17_17:
+ response = 0.0065678273;
+ goto D17;
+
+T17_18:
+ response = 0.0043163871;
+ goto D17;
+
+D17:
+
+tnscore = tnscore + response;
+
+/* Tree 19 of 80 */
+N18_1:
+ if attribute(ythl) < 0.5000000000 then goto N18_2;
+ else goto N18_10;
+
+N18_2:
+ if age(created_at) < 1830.0000000000 then goto N18_3;
+ else goto N18_7;
+
+N18_3:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N18_4;
+ else goto N18_5;
+
+N18_4:
+ if fieldMatch(text).tail < 12.5000000000 then goto T18_1;
+ else goto T18_2;
+
+T18_1:
+ response = -0.0083194933;
+ goto D18;
+
+T18_2:
+ response = 0.0035102742;
+ goto D18;
+
+N18_5:
+ if attribute(user_friends_count) < 263.5000000000 then goto N18_6;
+ else goto T18_5;
+
+N18_6:
+ if fieldMatch(text).importance < 0.6659150124 then goto T18_3;
+ else goto T18_4;
+
+T18_3:
+ response = 0.0142259075;
+ goto D18;
+
+T18_4:
+ response = -0.0029315835;
+ goto D18;
+
+T18_5:
+ response = 0.0146667338;
+ goto D18;
+
+N18_7:
+ if age(created_at) < 37800.0000000000 then goto N18_8;
+ else goto N18_9;
+
+N18_8:
+ if fieldMatch(text).significantOccurrence < 0.0833195001 then goto T18_6;
+ else goto T18_7;
+
+T18_6:
+ response = -0.0135039055;
+ goto D18;
+
+T18_7:
+ response = -0.0220540111;
+ goto D18;
+
+N18_9:
+ if fieldMatch(text).longestSequenceRatio < 0.5357145071 then goto T18_8;
+ else goto T18_9;
+
+T18_8:
+ response = -0.0130397740;
+ goto D18;
+
+T18_9:
+ response = -0.0039578022;
+ goto D18;
+
+N18_10:
+ if age(created_at) < 1830.0000000000 then goto N18_11;
+ else goto N18_15;
+
+N18_11:
+ if term(0).significance < 0.9184160233 then goto N18_12;
+ else goto N18_13;
+
+N18_12:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto T18_10;
+ else goto T18_11;
+
+T18_10:
+ response = -0.0589194117;
+ goto D18;
+
+T18_11:
+ response = 0.0129273078;
+ goto D18;
+
+N18_13:
+ if fieldTermMatch(text,1).firstPosition < 7.5000000000 then goto T18_12;
+ else goto N18_14;
+
+T18_12:
+ response = 0.0206642588;
+ goto D18;
+
+N18_14:
+ if attribute(user_statuses_count) < 63762.0000000000 then goto T18_13;
+ else goto T18_14;
+
+T18_13:
+ response = 0.0138866614;
+ goto D18;
+
+T18_14:
+ response = -0.0354735543;
+ goto D18;
+
+N18_15:
+ if fieldTermMatch(text,0).firstPosition < 1.5000000000 then goto N18_16;
+ else goto N18_17;
+
+N18_16:
+ if age(created_at) < 52200.0000000000 then goto T18_15;
+ else goto T18_16;
+
+T18_15:
+ response = 0.0028235989;
+ goto D18;
+
+T18_16:
+ response = 0.0121270804;
+ goto D18;
+
+N18_17:
+ if fieldMatch(text).importance < 0.6664544940 then goto T18_17;
+ else goto T18_18;
+
+T18_17:
+ response = -0.0071218235;
+ goto D18;
+
+T18_18:
+ response = 0.0005514519;
+ goto D18;
+
+D18:
+
+tnscore = tnscore + response;
+
+/* Tree 20 of 80 */
+N19_1:
+ if attribute(ythl) < 0.5000000000 then goto N19_2;
+ else goto N19_6;
+
+N19_2:
+ if age(created_at) < 1830.0000000000 then goto N19_3;
+ else goto N19_5;
+
+N19_3:
+ if fieldTermMatch(text,1).firstPosition < 4.5000000000 then goto N19_4;
+ else goto T19_3;
+
+N19_4:
+ if match < 0.9384620190 then goto T19_1;
+ else goto T19_2;
+
+T19_1:
+ response = 0.0165893189;
+ goto D19;
+
+T19_2:
+ response = -0.0049824111;
+ goto D19;
+
+T19_3:
+ response = -0.0046803205;
+ goto D19;
+
+N19_5:
+ if age(created_at) < 41400.0000000000 then goto T19_4;
+ else goto T19_5;
+
+T19_4:
+ response = -0.0148008888;
+ goto D19;
+
+T19_5:
+ response = -0.0071343073;
+ goto D19;
+
+N19_6:
+ if age(created_at) < 1830.0000000000 then goto N19_7;
+ else goto N19_15;
+
+N19_7:
+ if fieldTermMatch(text,0).firstPosition < 6.5000000000 then goto N19_8;
+ else goto N19_9;
+
+N19_8:
+ if attribute(user_followers_count) < 945.5000000000 then goto T19_6;
+ else goto T19_7;
+
+T19_6:
+ response = 0.0158379194;
+ goto D19;
+
+T19_7:
+ response = 0.0252891613;
+ goto D19;
+
+N19_9:
+ if fieldMatch(text) < 0.2744970024 then goto N19_10;
+ else goto N19_12;
+
+N19_10:
+ if term(0).significance < 0.9929184914 then goto N19_11;
+ else goto T19_10;
+
+N19_11:
+ if fieldMatch(text).tail < 6.5000000000 then goto T19_8;
+ else goto T19_9;
+
+T19_8:
+ response = -0.0518040838;
+ goto D19;
+
+T19_9:
+ response = 0.0076190376;
+ goto D19;
+
+T19_10:
+ response = 0.0091624226;
+ goto D19;
+
+N19_12:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N19_13;
+ else goto T19_14;
+
+N19_13:
+ if attribute(yst_reply_auth) < 469.5000000000 then goto N19_14;
+ else goto T19_13;
+
+N19_14:
+ if fieldMatch(text).significantOccurrence < 0.0392310023 then goto T19_11;
+ else goto T19_12;
+
+T19_11:
+ response = -0.0078021755;
+ goto D19;
+
+T19_12:
+ response = 0.0161894548;
+ goto D19;
+
+T19_13:
+ response = -0.0230367514;
+ goto D19;
+
+T19_14:
+ response = -0.0302108693;
+ goto D19;
+
+N19_15:
+ if age(created_at) < 30600.0000000000 then goto N19_16;
+ else goto N19_17;
+
+N19_16:
+ if fieldMatch(text) < 0.3708400130 then goto T19_15;
+ else goto T19_16;
+
+T19_15:
+ response = -0.0093177671;
+ goto D19;
+
+T19_16:
+ response = -0.0008716804;
+ goto D19;
+
+N19_17:
+ if fieldMatch(text) < 0.5607429743 then goto T19_17;
+ else goto T19_18;
+
+T19_17:
+ response = -0.0004994075;
+ goto D19;
+
+T19_18:
+ response = 0.0075202897;
+ goto D19;
+
+D19:
+
+tnscore = tnscore + response;
+
+/* Tree 21 of 80 */
+N20_1:
+ if attribute(ythl) < 0.5000000000 then goto N20_2;
+ else goto N20_9;
+
+N20_2:
+ if age(created_at) < 1830.0000000000 then goto N20_3;
+ else goto N20_8;
+
+N20_3:
+ if match < 0.6055585146 then goto N20_4;
+ else goto N20_5;
+
+N20_4:
+ if fieldTermMatch(text,1).firstPosition < 9.5000000000 then goto T20_1;
+ else goto T20_2;
+
+T20_1:
+ response = -0.0038025793;
+ goto D20;
+
+T20_2:
+ response = -0.0365864040;
+ goto D20;
+
+N20_5:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N20_6;
+ else goto T20_6;
+
+N20_6:
+ if fieldMatch(text).significantOccurrence < 0.1483514905 then goto N20_7;
+ else goto T20_5;
+
+N20_7:
+ if term(1).significance < 0.7788045406 then goto T20_3;
+ else goto T20_4;
+
+T20_3:
+ response = 0.0569638816;
+ goto D20;
+
+T20_4:
+ response = -0.0006508355;
+ goto D20;
+
+T20_5:
+ response = -0.0215954499;
+ goto D20;
+
+T20_6:
+ response = -0.0288353010;
+ goto D20;
+
+N20_8:
+ if age(created_at) < 5400.0000000000 then goto T20_7;
+ else goto T20_8;
+
+T20_7:
+ response = -0.0155259431;
+ goto D20;
+
+T20_8:
+ response = -0.0084487818;
+ goto D20;
+
+N20_9:
+ if age(created_at) < 1830.0000000000 then goto N20_10;
+ else goto N20_16;
+
+N20_10:
+ if attribute(user_followers_count) < 2333.0000000000 then goto N20_11;
+ else goto T20_15;
+
+N20_11:
+ if fieldMatch(text).importance < 0.4989485145 then goto N20_12;
+ else goto N20_13;
+
+N20_12:
+ if fieldMatch(text).importance < 0.4988874793 then goto T20_9;
+ else goto T20_10;
+
+T20_9:
+ response = 0.0002952785;
+ goto D20;
+
+T20_10:
+ response = -0.1374273254;
+ goto D20;
+
+N20_13:
+ if fieldMatch(text).absoluteOccurrence < 0.0122500006 then goto N20_14;
+ else goto N20_15;
+
+N20_14:
+ if attribute(yst_reply_auth) < 22.5000000000 then goto T20_11;
+ else goto T20_12;
+
+T20_11:
+ response = 0.0126010812;
+ goto D20;
+
+T20_12:
+ response = 0.0015712189;
+ goto D20;
+
+N20_15:
+ if fieldTermMatch(text,1).firstPosition < 8.5000000000 then goto T20_13;
+ else goto T20_14;
+
+T20_13:
+ response = 0.0140537649;
+ goto D20;
+
+T20_14:
+ response = 0.0345167434;
+ goto D20;
+
+T20_15:
+ response = 0.0247552557;
+ goto D20;
+
+N20_16:
+ if fieldMatch(text).importance < 0.6664245129 then goto N20_17;
+ else goto T20_18;
+
+N20_17:
+ if match < 0.9277470112 then goto T20_16;
+ else goto T20_17;
+
+T20_16:
+ response = -0.0104111915;
+ goto D20;
+
+T20_17:
+ response = -0.0006366780;
+ goto D20;
+
+T20_18:
+ response = 0.0023009658;
+ goto D20;
+
+D20:
+
+tnscore = tnscore + response;
+
+/* Tree 22 of 80 */
+N21_1:
+ if attribute(ythl) < 0.5000000000 then goto N21_2;
+ else goto N21_10;
+
+N21_2:
+ if fieldMatch(text).tail < 11.5000000000 then goto N21_3;
+ else goto N21_8;
+
+N21_3:
+ if age(created_at) < 1830.0000000000 then goto N21_4;
+ else goto N21_7;
+
+N21_4:
+ if attribute(yst_tweet_language) < 3554.0000000000 then goto N21_5;
+ else goto T21_4;
+
+N21_5:
+ if fieldMatch(text).importance < 0.6656075120 then goto N21_6;
+ else goto T21_3;
+
+N21_6:
+ if term(0).significance < 0.9996379614 then goto T21_1;
+ else goto T21_2;
+
+T21_1:
+ response = -0.0108858514;
+ goto D21;
+
+T21_2:
+ response = 0.0223953057;
+ goto D21;
+
+T21_3:
+ response = -0.0007236850;
+ goto D21;
+
+T21_4:
+ response = -0.0527538471;
+ goto D21;
+
+N21_7:
+ if age(created_at) < 5400.0000000000 then goto T21_5;
+ else goto T21_6;
+
+T21_5:
+ response = -0.0167835591;
+ goto D21;
+
+T21_6:
+ response = -0.0101222507;
+ goto D21;
+
+N21_8:
+ if attribute(user_friends_count) < 103.5000000000 then goto T21_7;
+ else goto N21_9;
+
+T21_7:
+ response = -0.0058634359;
+ goto D21;
+
+N21_9:
+ if age(created_at) < 1770.0000000000 then goto T21_8;
+ else goto T21_9;
+
+T21_8:
+ response = 0.0106468506;
+ goto D21;
+
+T21_9:
+ response = -0.0022715192;
+ goto D21;
+
+N21_10:
+ if age(created_at) < 1830.0000000000 then goto N21_11;
+ else goto N21_15;
+
+N21_11:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N21_12;
+ else goto N21_14;
+
+N21_12:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N21_13;
+ else goto T21_12;
+
+N21_13:
+ if term(0).significance < 0.9184160233 then goto T21_10;
+ else goto T21_11;
+
+T21_10:
+ response = -0.0441538866;
+ goto D21;
+
+T21_11:
+ response = 0.0103401752;
+ goto D21;
+
+T21_12:
+ response = 0.0169759088;
+ goto D21;
+
+N21_14:
+ if term(0).significance < 0.9990385175 then goto T21_13;
+ else goto T21_14;
+
+T21_13:
+ response = 0.0064769128;
+ goto D21;
+
+T21_14:
+ response = -0.0484309871;
+ goto D21;
+
+N21_15:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto T21_15;
+ else goto N21_16;
+
+T21_15:
+ response = -0.0038310021;
+ goto D21;
+
+N21_16:
+ if term(0).significance < 0.9686380029 then goto T21_16;
+ else goto N21_17;
+
+T21_16:
+ response = 0.0135820391;
+ goto D21;
+
+N21_17:
+ if attribute(user_followers_count) < 719.5000000000 then goto T21_17;
+ else goto T21_18;
+
+T21_17:
+ response = -0.0002012513;
+ goto D21;
+
+T21_18:
+ response = 0.0056425249;
+ goto D21;
+
+D21:
+
+tnscore = tnscore + response;
+
+/* Tree 23 of 80 */
+N22_1:
+ if attribute(ythl) < 0.5000000000 then goto N22_2;
+ else goto N22_7;
+
+N22_2:
+ if age(created_at) < 630.0000000000 then goto N22_3;
+ else goto N22_5;
+
+N22_3:
+ if fieldMatch(text).importance < 0.4994785190 then goto T22_1;
+ else goto N22_4;
+
+T22_1:
+ response = 0.0221233715;
+ goto D22;
+
+N22_4:
+ if attribute(user_followers_count) < 926.5000000000 then goto T22_2;
+ else goto T22_3;
+
+T22_2:
+ response = -0.0044878516;
+ goto D22;
+
+T22_3:
+ response = 0.0128654737;
+ goto D22;
+
+N22_5:
+ if fieldMatch(text).tail < 11.5000000000 then goto N22_6;
+ else goto T22_6;
+
+N22_6:
+ if fieldMatch(text).importance < 0.6665325165 then goto T22_4;
+ else goto T22_5;
+
+T22_4:
+ response = -0.0138860556;
+ goto D22;
+
+T22_5:
+ response = -0.0083897223;
+ goto D22;
+
+T22_6:
+ response = -0.0045088750;
+ goto D22;
+
+N22_7:
+ if age(created_at) < 1830.0000000000 then goto N22_8;
+ else goto N22_14;
+
+N22_8:
+ if attribute(yst_tweet_language) < 3587.5000000000 then goto N22_9;
+ else goto T22_13;
+
+N22_9:
+ if fieldMatch(text).importance < 0.4989734888 then goto T22_7;
+ else goto N22_10;
+
+T22_7:
+ response = -0.0087604690;
+ goto D22;
+
+N22_10:
+ if attribute(user_followers_count) < 1733.5000000000 then goto N22_11;
+ else goto T22_12;
+
+N22_11:
+ if fieldMatch(text).occurrence < 0.1455025077 then goto N22_12;
+ else goto T22_11;
+
+N22_12:
+ if fieldMatch(text) < 0.5567239523 then goto N22_13;
+ else goto T22_10;
+
+N22_13:
+ if term(2).significance < 0.9795899987 then goto T22_8;
+ else goto T22_9;
+
+T22_8:
+ response = -0.0543641627;
+ goto D22;
+
+T22_9:
+ response = 0.0024748648;
+ goto D22;
+
+T22_10:
+ response = 0.0124403853;
+ goto D22;
+
+T22_11:
+ response = 0.0174741297;
+ goto D22;
+
+T22_12:
+ response = 0.0222181645;
+ goto D22;
+
+T22_13:
+ response = -0.0288913368;
+ goto D22;
+
+N22_14:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N22_15;
+ else goto N22_16;
+
+N22_15:
+ if fieldMatch(text).importance < 0.6664659977 then goto T22_14;
+ else goto T22_15;
+
+T22_14:
+ response = -0.0065555429;
+ goto D22;
+
+T22_15:
+ response = 0.0005791831;
+ goto D22;
+
+N22_16:
+ if fieldMatch(text).earliness < 0.9354164600 then goto N22_17;
+ else goto T22_18;
+
+N22_17:
+ if fieldMatch(text).significantOccurrence < 0.0339080021 then goto T22_16;
+ else goto T22_17;
+
+T22_16:
+ response = -0.0694353726;
+ goto D22;
+
+T22_17:
+ response = 0.0012739636;
+ goto D22;
+
+T22_18:
+ response = 0.0075882453;
+ goto D22;
+
+D22:
+
+tnscore = tnscore + response;
+
+/* Tree 24 of 80 */
+N23_1:
+ if attribute(ythl) < 0.5000000000 then goto N23_2;
+ else goto N23_11;
+
+N23_2:
+ if fieldMatch(text).tail < 12.5000000000 then goto N23_3;
+ else goto N23_10;
+
+N23_3:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N23_4;
+ else goto N23_8;
+
+N23_4:
+ if age(created_at) < 210.0000000000 then goto T23_1;
+ else goto N23_5;
+
+T23_1:
+ response = 0.0005104670;
+ goto D23;
+
+N23_5:
+ if term(0).significance < 0.9910864830 then goto T23_2;
+ else goto N23_6;
+
+T23_2:
+ response = -0.0213498049;
+ goto D23;
+
+N23_6:
+ if term(0).significance < 0.9921205044 then goto T23_3;
+ else goto N23_7;
+
+T23_3:
+ response = 0.0187676178;
+ goto D23;
+
+N23_7:
+ if fieldMatch(text).tail < 5.5000000000 then goto T23_4;
+ else goto T23_5;
+
+T23_4:
+ response = -0.0155349434;
+ goto D23;
+
+T23_5:
+ response = -0.0073599141;
+ goto D23;
+
+N23_8:
+ if fieldMatch(text).importance < 0.6662604809 then goto N23_9;
+ else goto T23_8;
+
+N23_9:
+ if attribute(user_followers_count) < 1875.0000000000 then goto T23_6;
+ else goto T23_7;
+
+T23_6:
+ response = 0.0009507365;
+ goto D23;
+
+T23_7:
+ response = 0.0527948179;
+ goto D23;
+
+T23_8:
+ response = -0.0073533813;
+ goto D23;
+
+N23_10:
+ if age(created_at) < 810.0000000000 then goto T23_9;
+ else goto T23_10;
+
+T23_9:
+ response = 0.0076378446;
+ goto D23;
+
+T23_10:
+ response = -0.0027198247;
+ goto D23;
+
+N23_11:
+ if age(created_at) < 1830.0000000000 then goto N23_12;
+ else goto N23_16;
+
+N23_12:
+ if fieldMatch(text).significantOccurrence < 0.0424195006 then goto T23_11;
+ else goto N23_13;
+
+T23_11:
+ response = 0.0015356130;
+ goto D23;
+
+N23_13:
+ if fieldMatch(text).tail < 7.5000000000 then goto N23_14;
+ else goto T23_15;
+
+N23_14:
+ if fieldMatch(text).importance < 0.7466344833 then goto N23_15;
+ else goto T23_14;
+
+N23_15:
+ if fieldMatch(text).importance < 0.6666129827 then goto T23_12;
+ else goto T23_13;
+
+T23_12:
+ response = 0.0067592681;
+ goto D23;
+
+T23_13:
+ response = -0.0205924309;
+ goto D23;
+
+T23_14:
+ response = 0.0160937308;
+ goto D23;
+
+T23_15:
+ response = 0.0167252945;
+ goto D23;
+
+N23_16:
+ if fieldTermMatch(text,0).firstPosition < 2.5000000000 then goto T23_16;
+ else goto N23_17;
+
+T23_16:
+ response = 0.0049627365;
+ goto D23;
+
+N23_17:
+ if fieldMatch(text).importance < 0.6664254665 then goto T23_17;
+ else goto T23_18;
+
+T23_17:
+ response = -0.0065001791;
+ goto D23;
+
+T23_18:
+ response = 0.0010821803;
+ goto D23;
+
+D23:
+
+tnscore = tnscore + response;
+
+/* Tree 25 of 80 */
+N24_1:
+ if attribute(ythl) < 0.5000000000 then goto N24_2;
+ else goto N24_9;
+
+N24_2:
+ if age(created_at) < 630.0000000000 then goto N24_3;
+ else goto N24_5;
+
+N24_3:
+ if age(created_at) < 510.0000000000 then goto T24_1;
+ else goto N24_4;
+
+T24_1:
+ response = -0.0026815916;
+ goto D24;
+
+N24_4:
+ if fieldMatch(text).importance < 0.4997144938 then goto T24_2;
+ else goto T24_3;
+
+T24_2:
+ response = 0.0414511969;
+ goto D24;
+
+T24_3:
+ response = 0.0044068150;
+ goto D24;
+
+N24_5:
+ if fieldMatch(text).tail < 11.5000000000 then goto N24_6;
+ else goto N24_8;
+
+N24_6:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N24_7;
+ else goto T24_6;
+
+N24_7:
+ if fieldMatch(text).importance < 0.4994869828 then goto T24_4;
+ else goto T24_5;
+
+T24_4:
+ response = 0.0066561273;
+ goto D24;
+
+T24_5:
+ response = -0.0136850009;
+ goto D24;
+
+T24_6:
+ response = -0.0069580048;
+ goto D24;
+
+N24_8:
+ if attribute(yst_reply_auth) < 16.5000000000 then goto T24_7;
+ else goto T24_8;
+
+T24_7:
+ response = 0.0023887625;
+ goto D24;
+
+T24_8:
+ response = -0.0083630492;
+ goto D24;
+
+N24_9:
+ if age(created_at) < 1830.0000000000 then goto N24_10;
+ else goto N24_13;
+
+N24_10:
+ if fieldMatch(text).fieldCompleteness < 0.0425724983 then goto N24_11;
+ else goto N24_12;
+
+N24_11:
+ if attribute(user_friends_count) < 252.5000000000 then goto T24_9;
+ else goto T24_10;
+
+T24_9:
+ response = -0.0201198990;
+ goto D24;
+
+T24_10:
+ response = 0.0154324464;
+ goto D24;
+
+N24_12:
+ if fieldMatch(text).tail < 5.5000000000 then goto T24_11;
+ else goto T24_12;
+
+T24_11:
+ response = 0.0052786010;
+ goto D24;
+
+T24_12:
+ response = 0.0135236791;
+ goto D24;
+
+N24_13:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto T24_13;
+ else goto N24_14;
+
+T24_13:
+ response = -0.0049136258;
+ goto D24;
+
+N24_14:
+ if age(created_at) < 27000.0000000000 then goto T24_14;
+ else goto N24_15;
+
+T24_14:
+ response = -0.0004128224;
+ goto D24;
+
+N24_15:
+ if term(2).significance < 0.9998239875 then goto N24_16;
+ else goto T24_18;
+
+N24_16:
+ if fieldMatch(text).completeness < 0.9559409618 then goto T24_15;
+ else goto N24_17;
+
+T24_15:
+ response = -0.0209409304;
+ goto D24;
+
+N24_17:
+ if term(1).significance < 0.9128689766 then goto T24_16;
+ else goto T24_17;
+
+T24_16:
+ response = 0.0195740015;
+ goto D24;
+
+T24_17:
+ response = 0.0035250792;
+ goto D24;
+
+T24_18:
+ response = 0.0133058164;
+ goto D24;
+
+D24:
+
+tnscore = tnscore + response;
+
+/* Tree 26 of 80 */
+N25_1:
+ if attribute(ythl) < 0.5000000000 then goto N25_2;
+ else goto N25_8;
+
+N25_2:
+ if age(created_at) < 1770.0000000000 then goto N25_3;
+ else goto N25_5;
+
+N25_3:
+ if fieldMatch(text).tail < 7.5000000000 then goto T25_1;
+ else goto N25_4;
+
+T25_1:
+ response = -0.0047927890;
+ goto D25;
+
+N25_4:
+ if fieldMatch(text).importance < 0.6652389765 then goto T25_2;
+ else goto T25_3;
+
+T25_2:
+ response = -0.0020739127;
+ goto D25;
+
+T25_3:
+ response = 0.0102078569;
+ goto D25;
+
+N25_5:
+ if fieldMatch(text).importance < 0.6657874584 then goto N25_6;
+ else goto N25_7;
+
+N25_6:
+ if attribute(user_friends_count) < 16.5000000000 then goto T25_4;
+ else goto T25_5;
+
+T25_4:
+ response = 0.0004515464;
+ goto D25;
+
+T25_5:
+ response = -0.0178576762;
+ goto D25;
+
+N25_7:
+ if term(0).significance < 0.9976029992 then goto T25_6;
+ else goto T25_7;
+
+T25_6:
+ response = -0.0101410825;
+ goto D25;
+
+T25_7:
+ response = -0.0048545380;
+ goto D25;
+
+N25_8:
+ if age(created_at) < 1770.0000000000 then goto N25_9;
+ else goto N25_14;
+
+N25_9:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N25_10;
+ else goto T25_13;
+
+N25_10:
+ if attribute(user_followers_count) < 606.0000000000 then goto N25_11;
+ else goto N25_12;
+
+N25_11:
+ if fieldMatch(text).importance < 0.7488585114 then goto T25_8;
+ else goto T25_9;
+
+T25_8:
+ response = 0.0066962893;
+ goto D25;
+
+T25_9:
+ response = 0.0158854368;
+ goto D25;
+
+N25_12:
+ if fieldMatch(text).significantOccurrence < 0.0555564985 then goto N25_13;
+ else goto T25_12;
+
+N25_13:
+ if attribute(user_statuses_count) < 13511.5000000000 then goto T25_10;
+ else goto T25_11;
+
+T25_10:
+ response = 0.0163436735;
+ goto D25;
+
+T25_11:
+ response = -0.0031528673;
+ goto D25;
+
+T25_12:
+ response = 0.0222661817;
+ goto D25;
+
+T25_13:
+ response = -0.0172180317;
+ goto D25;
+
+N25_14:
+ if fieldMatch(text).importance < 0.6664534807 then goto N25_15;
+ else goto N25_16;
+
+N25_15:
+ if fieldMatch(text) < 0.3528665006 then goto T25_14;
+ else goto T25_15;
+
+T25_14:
+ response = -0.0116436179;
+ goto D25;
+
+T25_15:
+ response = -0.0025309342;
+ goto D25;
+
+N25_16:
+ if term(2).significance < 0.9981445074 then goto N25_17;
+ else goto T25_18;
+
+N25_17:
+ if fieldTermMatch(text,3).firstPosition < 4.5000000000 then goto T25_16;
+ else goto T25_17;
+
+T25_16:
+ response = 0.0069706912;
+ goto D25;
+
+T25_17:
+ response = -0.0021646003;
+ goto D25;
+
+T25_18:
+ response = 0.0046032512;
+ goto D25;
+
+D25:
+
+tnscore = tnscore + response;
+
+/* Tree 27 of 80 */
+N26_1:
+ if attribute(ythl) < 0.5000000000 then goto N26_2;
+ else goto N26_11;
+
+N26_2:
+ if age(created_at) < 1830.0000000000 then goto N26_3;
+ else goto N26_10;
+
+N26_3:
+ if fieldMatch(user_name).fieldCompleteness < 0.2916665077 then goto N26_4;
+ else goto T26_8;
+
+N26_4:
+ if attribute(user_statuses_count) < 497.5000000000 then goto T26_1;
+ else goto N26_5;
+
+T26_1:
+ response = 0.0022748148;
+ goto D26;
+
+N26_5:
+ if attribute(user_followers_count) < 960.5000000000 then goto N26_6;
+ else goto T26_7;
+
+N26_6:
+ if term(0).significance < 0.9725670218 then goto T26_2;
+ else goto N26_7;
+
+T26_2:
+ response = -0.0246347116;
+ goto D26;
+
+N26_7:
+ if term(0).significance < 0.9963495135 then goto N26_8;
+ else goto T26_6;
+
+N26_8:
+ if fieldMatch(text).tail < 9.5000000000 then goto N26_9;
+ else goto T26_5;
+
+N26_9:
+ if fieldMatch(text).head < 11.5000000000 then goto T26_3;
+ else goto T26_4;
+
+T26_3:
+ response = 0.0013827683;
+ goto D26;
+
+T26_4:
+ response = -0.0212024376;
+ goto D26;
+
+T26_5:
+ response = 0.0161508420;
+ goto D26;
+
+T26_6:
+ response = -0.0104714457;
+ goto D26;
+
+T26_7:
+ response = 0.0070006551;
+ goto D26;
+
+T26_8:
+ response = 0.0447412235;
+ goto D26;
+
+N26_10:
+ if age(created_at) < 16200.0000000000 then goto T26_9;
+ else goto T26_10;
+
+T26_9:
+ response = -0.0124666980;
+ goto D26;
+
+T26_10:
+ response = -0.0057219106;
+ goto D26;
+
+N26_11:
+ if age(created_at) < 1830.0000000000 then goto N26_12;
+ else goto N26_14;
+
+N26_12:
+ if fieldMatch(text).importance < 0.4999520183 then goto T26_11;
+ else goto N26_13;
+
+T26_11:
+ response = 0.0038677446;
+ goto D26;
+
+N26_13:
+ if fieldMatch(text) < 0.4182469845 then goto T26_12;
+ else goto T26_13;
+
+T26_12:
+ response = 0.0026964712;
+ goto D26;
+
+T26_13:
+ response = 0.0132060784;
+ goto D26;
+
+N26_14:
+ if fieldMatch(text).weightedOccurrence < 0.0513554998 then goto N26_15;
+ else goto T26_18;
+
+N26_15:
+ if match < 0.6867794991 then goto N26_16;
+ else goto T26_17;
+
+N26_16:
+ if fieldMatch(text).importance < 0.6665154696 then goto T26_14;
+ else goto N26_17;
+
+T26_14:
+ response = -0.0201022100;
+ goto D26;
+
+N26_17:
+ if term(2).significance < 0.9950574636 then goto T26_15;
+ else goto T26_16;
+
+T26_15:
+ response = -0.0169792919;
+ goto D26;
+
+T26_16:
+ response = 0.0050699268;
+ goto D26;
+
+T26_17:
+ response = -0.0013697969;
+ goto D26;
+
+T26_18:
+ response = 0.0020313056;
+ goto D26;
+
+D26:
+
+tnscore = tnscore + response;
+
+/* Tree 28 of 80 */
+N27_1:
+ if attribute(ythl) < 0.5000000000 then goto N27_2;
+ else goto N27_12;
+
+N27_2:
+ if age(created_at) < 1830.0000000000 then goto N27_3;
+ else goto N27_11;
+
+N27_3:
+ if fieldMatch(text).importance < 0.6656075120 then goto N27_4;
+ else goto N27_5;
+
+N27_4:
+ if fieldMatch(text) < 0.2109414935 then goto T27_1;
+ else goto T27_2;
+
+T27_1:
+ response = -0.0320963356;
+ goto D27;
+
+T27_2:
+ response = -0.0049411304;
+ goto D27;
+
+N27_5:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N27_6;
+ else goto T27_9;
+
+N27_6:
+ if term(2).significance < 0.9769929647 then goto N27_7;
+ else goto N27_9;
+
+N27_7:
+ if fieldMatch(text).importance < 0.6665915251 then goto T27_3;
+ else goto N27_8;
+
+T27_3:
+ response = 0.0282759231;
+ goto D27;
+
+N27_8:
+ if fieldMatch(text).importance < 0.7399419546 then goto T27_4;
+ else goto T27_5;
+
+T27_4:
+ response = -0.0639593720;
+ goto D27;
+
+T27_5:
+ response = -0.0116583984;
+ goto D27;
+
+N27_9:
+ if term(0).significance < 0.9964904785 then goto T27_6;
+ else goto N27_10;
+
+T27_6:
+ response = 0.0113601762;
+ goto D27;
+
+N27_10:
+ if term(0).significance < 0.9984384775 then goto T27_7;
+ else goto T27_8;
+
+T27_7:
+ response = -0.0096497985;
+ goto D27;
+
+T27_8:
+ response = 0.0065807303;
+ goto D27;
+
+T27_9:
+ response = -0.0430967785;
+ goto D27;
+
+N27_11:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto T27_10;
+ else goto T27_11;
+
+T27_10:
+ response = -0.0127352010;
+ goto D27;
+
+T27_11:
+ response = -0.0062520929;
+ goto D27;
+
+N27_12:
+ if age(created_at) < 1770.0000000000 then goto N27_13;
+ else goto N27_15;
+
+N27_13:
+ if fieldMatch(text).earliness < 0.7211109996 then goto T27_12;
+ else goto N27_14;
+
+T27_12:
+ response = 0.0056563829;
+ goto D27;
+
+N27_14:
+ if attribute(user_followers_count) < 812.5000000000 then goto T27_13;
+ else goto T27_14;
+
+T27_13:
+ response = 0.0100250822;
+ goto D27;
+
+T27_14:
+ response = 0.0209608983;
+ goto D27;
+
+N27_15:
+ if fieldTermMatch(text,0).firstPosition < 4.5000000000 then goto T27_15;
+ else goto N27_16;
+
+T27_15:
+ response = 0.0027791958;
+ goto D27;
+
+N27_16:
+ if fieldMatch(text).importance < 0.6664240360 then goto N27_17;
+ else goto T27_18;
+
+N27_17:
+ if match < 0.9192979932 then goto T27_16;
+ else goto T27_17;
+
+T27_16:
+ response = -0.0152598227;
+ goto D27;
+
+T27_17:
+ response = -0.0032270961;
+ goto D27;
+
+T27_18:
+ response = -0.0005422229;
+ goto D27;
+
+D27:
+
+tnscore = tnscore + response;
+
+/* Tree 29 of 80 */
+N28_1:
+ if attribute(ythl) < 0.5000000000 then goto N28_2;
+ else goto N28_8;
+
+N28_2:
+ if age(created_at) < 1830.0000000000 then goto N28_3;
+ else goto N28_6;
+
+N28_3:
+ if attribute(user_followers_count) < 459.5000000000 then goto N28_4;
+ else goto N28_5;
+
+N28_4:
+ if attribute(user_statuses_count) < 496.5000000000 then goto T28_1;
+ else goto T28_2;
+
+T28_1:
+ response = 0.0008480388;
+ goto D28;
+
+T28_2:
+ response = -0.0090870631;
+ goto D28;
+
+N28_5:
+ if attribute(yst_reply_auth) < 244.5000000000 then goto T28_3;
+ else goto T28_4;
+
+T28_3:
+ response = 0.0126503896;
+ goto D28;
+
+T28_4:
+ response = -0.0054197846;
+ goto D28;
+
+N28_6:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N28_7;
+ else goto T28_7;
+
+N28_7:
+ if fieldMatch(text).absoluteOccurrence < 0.0122500006 then goto T28_5;
+ else goto T28_6;
+
+T28_5:
+ response = -0.0148159779;
+ goto D28;
+
+T28_6:
+ response = 0.0029148481;
+ goto D28;
+
+T28_7:
+ response = -0.0058224247;
+ goto D28;
+
+N28_8:
+ if age(created_at) < 1830.0000000000 then goto N28_9;
+ else goto N28_15;
+
+N28_9:
+ if fieldMatch(text).importance < 0.7490880489 then goto N28_10;
+ else goto T28_14;
+
+N28_10:
+ if fieldMatch(text) < 0.5652275085 then goto N28_11;
+ else goto N28_12;
+
+N28_11:
+ if term(2).significance < 0.9916304946 then goto T28_8;
+ else goto T28_9;
+
+T28_8:
+ response = -0.0285282409;
+ goto D28;
+
+T28_9:
+ response = 0.0051566337;
+ goto D28;
+
+N28_12:
+ if attribute(user_followers_count) < 104.5000000000 then goto N28_13;
+ else goto N28_14;
+
+N28_13:
+ if fieldMatch(text) < 0.8065220118 then goto T28_10;
+ else goto T28_11;
+
+T28_10:
+ response = 0.0383292168;
+ goto D28;
+
+T28_11:
+ response = 0.0010266011;
+ goto D28;
+
+N28_14:
+ if attribute(yst_reply_auth) < 391.0000000000 then goto T28_12;
+ else goto T28_13;
+
+T28_12:
+ response = 0.0133363207;
+ goto D28;
+
+T28_13:
+ response = -0.0143777685;
+ goto D28;
+
+T28_14:
+ response = 0.0164241107;
+ goto D28;
+
+N28_15:
+ if fieldTermMatch(text,0).firstPosition < 4.5000000000 then goto T28_15;
+ else goto N28_16;
+
+T28_15:
+ response = 0.0032745831;
+ goto D28;
+
+N28_16:
+ if fieldMatch(text).importance < 0.7496404648 then goto N28_17;
+ else goto T28_18;
+
+N28_17:
+ if term(1).significance < 0.9979525208 then goto T28_16;
+ else goto T28_17;
+
+T28_16:
+ response = -0.0096924346;
+ goto D28;
+
+T28_17:
+ response = -0.0009781494;
+ goto D28;
+
+T28_18:
+ response = 0.0015180251;
+ goto D28;
+
+D28:
+
+tnscore = tnscore + response;
+
+/* Tree 30 of 80 */
+N29_1:
+ if attribute(ythl) < 0.5000000000 then goto N29_2;
+ else goto N29_10;
+
+N29_2:
+ if age(created_at) < 1830.0000000000 then goto N29_3;
+ else goto N29_9;
+
+N29_3:
+ if term(0).significance < 0.9995554686 then goto N29_4;
+ else goto N29_8;
+
+N29_4:
+ if attribute(yst_reply_auth) < 26.5000000000 then goto N29_5;
+ else goto N29_7;
+
+N29_5:
+ if attribute(user_followers_count) < 92.5000000000 then goto N29_6;
+ else goto T29_3;
+
+N29_6:
+ if fieldLength(text) < 27.5000000000 then goto T29_1;
+ else goto T29_2;
+
+T29_1:
+ response = 0.0012866951;
+ goto D29;
+
+T29_2:
+ response = -0.0265027781;
+ goto D29;
+
+T29_3:
+ response = 0.0104770861;
+ goto D29;
+
+N29_7:
+ if attribute(yst_link_array_size) < 0.0041509997 then goto T29_4;
+ else goto T29_5;
+
+T29_4:
+ response = -0.0099713041;
+ goto D29;
+
+T29_5:
+ response = 0.0011954032;
+ goto D29;
+
+N29_8:
+ if term(0).significance < 0.9996379614 then goto T29_6;
+ else goto T29_7;
+
+T29_6:
+ response = -0.0297536383;
+ goto D29;
+
+T29_7:
+ response = -0.0002317059;
+ goto D29;
+
+N29_9:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto T29_8;
+ else goto T29_9;
+
+T29_8:
+ response = -0.0121670225;
+ goto D29;
+
+T29_9:
+ response = -0.0054595694;
+ goto D29;
+
+N29_10:
+ if age(created_at) < 1830.0000000000 then goto N29_11;
+ else goto N29_15;
+
+N29_11:
+ if fieldMatch(text).importance < 0.4989734888 then goto N29_12;
+ else goto N29_14;
+
+N29_12:
+ if fieldMatch(text).importance < 0.4986799955 then goto T29_10;
+ else goto N29_13;
+
+T29_10:
+ response = 0.0071175590;
+ goto D29;
+
+N29_13:
+ if fieldLength(text) < 18.5000000000 then goto T29_11;
+ else goto T29_12;
+
+T29_11:
+ response = 0.0043567972;
+ goto D29;
+
+T29_12:
+ response = -0.0954988221;
+ goto D29;
+
+N29_14:
+ if attribute(user_statuses_count) < 5.5000000000 then goto T29_13;
+ else goto T29_14;
+
+T29_13:
+ response = -0.0298547936;
+ goto D29;
+
+T29_14:
+ response = 0.0103403639;
+ goto D29;
+
+N29_15:
+ if fieldMatch(text).occurrence < 0.1348485053 then goto N29_16;
+ else goto T29_18;
+
+N29_16:
+ if match < 0.9276950359 then goto N29_17;
+ else goto T29_17;
+
+N29_17:
+ if term(0).significance < 0.9981074929 then goto T29_15;
+ else goto T29_16;
+
+T29_15:
+ response = -0.0125565952;
+ goto D29;
+
+T29_16:
+ response = -0.0030946195;
+ goto D29;
+
+T29_17:
+ response = 0.0023600605;
+ goto D29;
+
+T29_18:
+ response = 0.0024001179;
+ goto D29;
+
+D29:
+
+tnscore = tnscore + response;
+
+/* Tree 31 of 80 */
+N30_1:
+ if attribute(ythl) < 0.5000000000 then goto N30_2;
+ else goto N30_12;
+
+N30_2:
+ if age(created_at) < 1830.0000000000 then goto N30_3;
+ else goto N30_11;
+
+N30_3:
+ if fieldMatch(text).importance < 0.7413114905 then goto N30_4;
+ else goto N30_9;
+
+N30_4:
+ if fieldMatch(text).importance < 0.6666384935 then goto N30_5;
+ else goto T30_6;
+
+N30_5:
+ if fieldMatch(text).earliness < 0.8681160212 then goto N30_6;
+ else goto N30_7;
+
+N30_6:
+ if match < 0.6799730062 then goto T30_1;
+ else goto T30_2;
+
+T30_1:
+ response = -0.0205917268;
+ goto D30;
+
+T30_2:
+ response = -0.0018660452;
+ goto D30;
+
+N30_7:
+ if fieldMatch(text) < 0.8819584846 then goto N30_8;
+ else goto T30_5;
+
+N30_8:
+ if fieldMatch(text).completeness < 0.9544465542 then goto T30_3;
+ else goto T30_4;
+
+T30_3:
+ response = 0.0070580213;
+ goto D30;
+
+T30_4:
+ response = 0.0359145000;
+ goto D30;
+
+T30_5:
+ response = -0.0174774107;
+ goto D30;
+
+T30_6:
+ response = -0.0421236424;
+ goto D30;
+
+N30_9:
+ if fieldMatch(text).completeness < 0.9575960040 then goto N30_10;
+ else goto T30_9;
+
+N30_10:
+ if fieldMatch(text).earliness < 0.4128789902 then goto T30_7;
+ else goto T30_8;
+
+T30_7:
+ response = -0.0155841429;
+ goto D30;
+
+T30_8:
+ response = 0.0271271066;
+ goto D30;
+
+T30_9:
+ response = 0.0016623712;
+ goto D30;
+
+N30_11:
+ if age(created_at) < 5400.0000000000 then goto T30_10;
+ else goto T30_11;
+
+T30_10:
+ response = -0.0114561422;
+ goto D30;
+
+T30_11:
+ response = -0.0053122836;
+ goto D30;
+
+N30_12:
+ if age(created_at) < 1830.0000000000 then goto N30_13;
+ else goto N30_17;
+
+N30_13:
+ if term(0).significance < 0.9184160233 then goto N30_14;
+ else goto N30_15;
+
+N30_14:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto T30_12;
+ else goto T30_13;
+
+T30_12:
+ response = -0.0703039170;
+ goto D30;
+
+T30_13:
+ response = 0.0039828312;
+ goto D30;
+
+N30_15:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N30_16;
+ else goto T30_16;
+
+N30_16:
+ if fieldMatch(text).occurrence < 0.1455025077 then goto T30_14;
+ else goto T30_15;
+
+T30_14:
+ response = 0.0064206057;
+ goto D30;
+
+T30_15:
+ response = 0.0136203784;
+ goto D30;
+
+T30_16:
+ response = -0.0155537108;
+ goto D30;
+
+N30_17:
+ if fieldMatch(text).importance < 0.6656370163 then goto T30_17;
+ else goto T30_18;
+
+T30_17:
+ response = -0.0057846012;
+ goto D30;
+
+T30_18:
+ response = 0.0015276435;
+ goto D30;
+
+D30:
+
+tnscore = tnscore + response;
+
+/* Tree 32 of 80 */
+N31_1:
+ if attribute(ythl) < 0.5000000000 then goto N31_2;
+ else goto N31_9;
+
+N31_2:
+ if fieldMatch(text).tail < 12.5000000000 then goto N31_3;
+ else goto N31_8;
+
+N31_3:
+ if fieldMatch(text) < 0.4900699854 then goto N31_4;
+ else goto N31_5;
+
+N31_4:
+ if term(0).significance < 0.9883320332 then goto T31_1;
+ else goto T31_2;
+
+T31_1:
+ response = -0.0195571527;
+ goto D31;
+
+T31_2:
+ response = -0.0078552672;
+ goto D31;
+
+N31_5:
+ if fieldMatch(text).weightedOccurrence < 0.0929629952 then goto N31_6;
+ else goto T31_6;
+
+N31_6:
+ if age(created_at) < 1530.0000000000 then goto N31_7;
+ else goto T31_5;
+
+N31_7:
+ if fieldTermMatch(text,1).firstPosition < 6.5000000000 then goto T31_3;
+ else goto T31_4;
+
+T31_3:
+ response = 0.0158849428;
+ goto D31;
+
+T31_4:
+ response = -0.0010859682;
+ goto D31;
+
+T31_5:
+ response = -0.0055859102;
+ goto D31;
+
+T31_6:
+ response = -0.0109538484;
+ goto D31;
+
+N31_8:
+ if fieldMatch(user_name) < 0.0710614994 then goto T31_7;
+ else goto T31_8;
+
+T31_7:
+ response = 0.0002014444;
+ goto D31;
+
+T31_8:
+ response = 0.0605228154;
+ goto D31;
+
+N31_9:
+ if age(created_at) < 1830.0000000000 then goto N31_10;
+ else goto N31_13;
+
+N31_10:
+ if fieldMatch(text) < 0.2891100049 then goto T31_9;
+ else goto N31_11;
+
+T31_9:
+ response = -0.0017665209;
+ goto D31;
+
+N31_11:
+ if attribute(yst_reply_auth) < 471.5000000000 then goto N31_12;
+ else goto T31_12;
+
+N31_12:
+ if attribute(user_followers_count) < 2200.0000000000 then goto T31_10;
+ else goto T31_11;
+
+T31_10:
+ response = 0.0095812144;
+ goto D31;
+
+T31_11:
+ response = 0.0192088364;
+ goto D31;
+
+T31_12:
+ response = -0.0097908152;
+ goto D31;
+
+N31_13:
+ if fieldMatch(text).occurrence < 0.1348485053 then goto N31_14;
+ else goto N31_16;
+
+N31_14:
+ if match < 0.9285860062 then goto N31_15;
+ else goto T31_15;
+
+N31_15:
+ if attribute(user_statuses_count) < 2957.5000000000 then goto T31_13;
+ else goto T31_14;
+
+T31_13:
+ response = -0.0031994690;
+ goto D31;
+
+T31_14:
+ response = -0.0120737981;
+ goto D31;
+
+T31_15:
+ response = 0.0005449950;
+ goto D31;
+
+N31_16:
+ if term(2).significance < 0.9939094782 then goto N31_17;
+ else goto T31_18;
+
+N31_17:
+ if fieldMatch(text).absoluteProximity < 0.0212500002 then goto T31_16;
+ else goto T31_17;
+
+T31_16:
+ response = -0.0227140008;
+ goto D31;
+
+T31_17:
+ response = -0.0003328979;
+ goto D31;
+
+T31_18:
+ response = 0.0055961138;
+ goto D31;
+
+D31:
+
+tnscore = tnscore + response;
+
+/* Tree 33 of 80 */
+N32_1:
+ if attribute(ythl) < 0.5000000000 then goto N32_2;
+ else goto N32_9;
+
+N32_2:
+ if age(created_at) < 1830.0000000000 then goto N32_3;
+ else goto N32_7;
+
+N32_3:
+ if attribute(yst_reply_auth) < 176.0000000000 then goto N32_4;
+ else goto N32_6;
+
+N32_4:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N32_5;
+ else goto T32_3;
+
+N32_5:
+ if term(1).significance < 0.7788045406 then goto T32_1;
+ else goto T32_2;
+
+T32_1:
+ response = 0.0578500341;
+ goto D32;
+
+T32_2:
+ response = 0.0011485747;
+ goto D32;
+
+T32_3:
+ response = -0.0253253039;
+ goto D32;
+
+N32_6:
+ if attribute(yst_link_array_size) < 0.0223225001 then goto T32_4;
+ else goto T32_5;
+
+T32_4:
+ response = -0.0156277732;
+ goto D32;
+
+T32_5:
+ response = 0.0023478823;
+ goto D32;
+
+N32_7:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto T32_6;
+ else goto N32_8;
+
+T32_6:
+ response = -0.0107069928;
+ goto D32;
+
+N32_8:
+ if attribute(user_followers_count) < 1710.5000000000 then goto T32_7;
+ else goto T32_8;
+
+T32_7:
+ response = -0.0053639058;
+ goto D32;
+
+T32_8:
+ response = 0.0090303888;
+ goto D32;
+
+N32_9:
+ if fieldMatch(text) < 0.5406639576 then goto N32_10;
+ else goto N32_12;
+
+N32_10:
+ if fieldMatch(text).occurrence < 0.1165160015 then goto N32_11;
+ else goto T32_11;
+
+N32_11:
+ if term(1).significance < 0.9973840117 then goto T32_9;
+ else goto T32_10;
+
+T32_9:
+ response = -0.0174263062;
+ goto D32;
+
+T32_10:
+ response = -0.0012512051;
+ goto D32;
+
+T32_11:
+ response = -0.0004493622;
+ goto D32;
+
+N32_12:
+ if age(created_at) < 1830.0000000000 then goto N32_13;
+ else goto N32_17;
+
+N32_13:
+ if fieldMatch(text).importance < 0.4997544885 then goto N32_14;
+ else goto N32_16;
+
+N32_14:
+ if fieldMatch(text).importance < 0.4997400045 then goto T32_12;
+ else goto N32_15;
+
+T32_12:
+ response = 0.0011570612;
+ goto D32;
+
+N32_15:
+ if attribute(user_statuses_count) < 2389.0000000000 then goto T32_13;
+ else goto T32_14;
+
+T32_13:
+ response = -0.0752507430;
+ goto D32;
+
+T32_14:
+ response = 0.0078353389;
+ goto D32;
+
+N32_16:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto T32_15;
+ else goto T32_16;
+
+T32_15:
+ response = 0.0095661855;
+ goto D32;
+
+T32_16:
+ response = -0.0134482465;
+ goto D32;
+
+N32_17:
+ if fieldMatch(text).importance < 0.4999470115 then goto T32_17;
+ else goto T32_18;
+
+T32_17:
+ response = -0.0103740403;
+ goto D32;
+
+T32_18:
+ response = 0.0025010891;
+ goto D32;
+
+D32:
+
+tnscore = tnscore + response;
+
+/* Tree 34 of 80 */
+N33_1:
+ if attribute(ythl) < 0.5000000000 then goto N33_2;
+ else goto N33_11;
+
+N33_2:
+ if fieldMatch(text) < 0.3427360058 then goto N33_3;
+ else goto N33_4;
+
+N33_3:
+ if fieldMatch(text).earliness < 0.6939799786 then goto T33_1;
+ else goto T33_2;
+
+T33_1:
+ response = -0.0183828125;
+ goto D33;
+
+T33_2:
+ response = -0.0073742585;
+ goto D33;
+
+N33_4:
+ if age(created_at) < 1830.0000000000 then goto N33_5;
+ else goto N33_10;
+
+N33_5:
+ if term(0).significance < 0.9958745241 then goto N33_6;
+ else goto T33_8;
+
+N33_6:
+ if term(1).significance < 0.8159549832 then goto T33_3;
+ else goto N33_7;
+
+T33_3:
+ response = 0.0358430149;
+ goto D33;
+
+N33_7:
+ if term(1).significance < 0.9927034974 then goto N33_8;
+ else goto N33_9;
+
+N33_8:
+ if fieldMatch(text).importance < 0.6640119553 then goto T33_4;
+ else goto T33_5;
+
+T33_4:
+ response = 0.0448918743;
+ goto D33;
+
+T33_5:
+ response = -0.0109749723;
+ goto D33;
+
+N33_9:
+ if term(1).significance < 0.9966380000 then goto T33_6;
+ else goto T33_7;
+
+T33_6:
+ response = 0.0219646010;
+ goto D33;
+
+T33_7:
+ response = 0.0055406966;
+ goto D33;
+
+T33_8:
+ response = -0.0040966912;
+ goto D33;
+
+N33_10:
+ if fieldLength(text) < 14.5000000000 then goto T33_9;
+ else goto T33_10;
+
+T33_9:
+ response = -0.0121807234;
+ goto D33;
+
+T33_10:
+ response = -0.0043039012;
+ goto D33;
+
+N33_11:
+ if age(created_at) < 1830.0000000000 then goto N33_12;
+ else goto N33_17;
+
+N33_12:
+ if fieldMatch(text).importance < 0.7479754686 then goto N33_13;
+ else goto T33_16;
+
+N33_13:
+ if term(0).significance < 0.9139549732 then goto T33_11;
+ else goto N33_14;
+
+T33_11:
+ response = -0.0340629156;
+ goto D33;
+
+N33_14:
+ if fieldMatch(text).tail < 7.5000000000 then goto T33_12;
+ else goto N33_15;
+
+T33_12:
+ response = 0.0008350066;
+ goto D33;
+
+N33_15:
+ if fieldMatch(text) < 0.8667535186 then goto T33_13;
+ else goto N33_16;
+
+T33_13:
+ response = 0.0042479503;
+ goto D33;
+
+N33_16:
+ if fieldTermMatch(text,0).firstPosition < 2.5000000000 then goto T33_14;
+ else goto T33_15;
+
+T33_14:
+ response = 0.0090797180;
+ goto D33;
+
+T33_15:
+ response = 0.0217882168;
+ goto D33;
+
+T33_16:
+ response = 0.0152961627;
+ goto D33;
+
+N33_17:
+ if fieldTermMatch(text,1).firstPosition < 6.5000000000 then goto T33_17;
+ else goto T33_18;
+
+T33_17:
+ response = 0.0019770381;
+ goto D33;
+
+T33_18:
+ response = -0.0024650391;
+ goto D33;
+
+D33:
+
+tnscore = tnscore + response;
+
+/* Tree 35 of 80 */
+N34_1:
+ if attribute(ythl) < 0.5000000000 then goto N34_2;
+ else goto N34_5;
+
+N34_2:
+ if age(created_at) < 1650.0000000000 then goto T34_1;
+ else goto N34_3;
+
+T34_1:
+ response = -0.0005354970;
+ goto D34;
+
+N34_3:
+ if fieldMatch(text).absoluteOccurrence < 0.0126785003 then goto N34_4;
+ else goto T34_4;
+
+N34_4:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto T34_2;
+ else goto T34_3;
+
+T34_2:
+ response = -0.0102881411;
+ goto D34;
+
+T34_3:
+ response = -0.0050682353;
+ goto D34;
+
+T34_4:
+ response = 0.0015992266;
+ goto D34;
+
+N34_5:
+ if age(created_at) < 1830.0000000000 then goto N34_6;
+ else goto N34_12;
+
+N34_6:
+ if fieldMatch(text).occurrence < 0.0425724983 then goto T34_5;
+ else goto N34_7;
+
+T34_5:
+ response = -0.0103390097;
+ goto D34;
+
+N34_7:
+ if fieldTermMatch(text,0).firstPosition < 4.5000000000 then goto N34_8;
+ else goto N34_9;
+
+N34_8:
+ if attribute(user_followers_count) < 807.5000000000 then goto T34_6;
+ else goto T34_7;
+
+T34_6:
+ response = 0.0090148290;
+ goto D34;
+
+T34_7:
+ response = 0.0188702216;
+ goto D34;
+
+N34_9:
+ if fieldMatch(text).importance < 0.7488585114 then goto N34_10;
+ else goto T34_11;
+
+N34_10:
+ if term(1).significance < 0.9833209515 then goto N34_11;
+ else goto T34_10;
+
+N34_11:
+ if fieldMatch(text).absoluteOccurrence < 0.0116665000 then goto T34_8;
+ else goto T34_9;
+
+T34_8:
+ response = -0.0062725138;
+ goto D34;
+
+T34_9:
+ response = -0.0573762051;
+ goto D34;
+
+T34_10:
+ response = 0.0061628636;
+ goto D34;
+
+T34_11:
+ response = 0.0150568527;
+ goto D34;
+
+N34_12:
+ if fieldMatch(text) < 0.4552929997 then goto N34_13;
+ else goto N34_17;
+
+N34_13:
+ if fieldMatch(user_name) < 0.3179910183 then goto N34_14;
+ else goto T34_16;
+
+N34_14:
+ if fieldMatch(text).importance < 0.6665154696 then goto N34_15;
+ else goto N34_16;
+
+N34_15:
+ if fieldMatch(text).occurrence < 0.1188234985 then goto T34_12;
+ else goto T34_13;
+
+T34_12:
+ response = -0.0170598650;
+ goto D34;
+
+T34_13:
+ response = -0.0000096562;
+ goto D34;
+
+N34_16:
+ if term(1).significance < 0.9965360165 then goto T34_14;
+ else goto T34_15;
+
+T34_14:
+ response = -0.0071076426;
+ goto D34;
+
+T34_15:
+ response = 0.0044895619;
+ goto D34;
+
+T34_16:
+ response = 0.0279607247;
+ goto D34;
+
+N34_17:
+ if term(2).significance < 0.8411514759 then goto T34_17;
+ else goto T34_18;
+
+T34_17:
+ response = -0.0033532749;
+ goto D34;
+
+T34_18:
+ response = 0.0026365471;
+ goto D34;
+
+D34:
+
+tnscore = tnscore + response;
+
+/* Tree 36 of 80 */
+N35_1:
+ if attribute(ythl) < 0.5000000000 then goto N35_2;
+ else goto N35_12;
+
+N35_2:
+ if age(created_at) < 1830.0000000000 then goto N35_3;
+ else goto N35_11;
+
+N35_3:
+ if term(1).significance < 0.8159549832 then goto T35_1;
+ else goto N35_4;
+
+T35_1:
+ response = 0.0276561682;
+ goto D35;
+
+N35_4:
+ if term(1).significance < 0.9822615385 then goto N35_5;
+ else goto N35_7;
+
+N35_5:
+ if match < 0.7864329815 then goto T35_2;
+ else goto N35_6;
+
+T35_2:
+ response = -0.0313860274;
+ goto D35;
+
+N35_6:
+ if fieldMatch(text).longestSequenceRatio < 0.7749999762 then goto T35_3;
+ else goto T35_4;
+
+T35_3:
+ response = 0.0265495560;
+ goto D35;
+
+T35_4:
+ response = -0.0146271425;
+ goto D35;
+
+N35_7:
+ if term(0).significance < 0.9987125397 then goto N35_8;
+ else goto N35_10;
+
+N35_8:
+ if term(0).significance < 0.9821995497 then goto T35_5;
+ else goto N35_9;
+
+T35_5:
+ response = -0.0131166881;
+ goto D35;
+
+N35_9:
+ if age(created_at) < 1770.0000000000 then goto T35_6;
+ else goto T35_7;
+
+T35_6:
+ response = 0.0024909360;
+ goto D35;
+
+T35_7:
+ response = 0.0242417466;
+ goto D35;
+
+N35_10:
+ if term(0).significance < 0.9987905025 then goto T35_8;
+ else goto T35_9;
+
+T35_8:
+ response = -0.0474544221;
+ goto D35;
+
+T35_9:
+ response = -0.0033216980;
+ goto D35;
+
+N35_11:
+ if fieldMatch(text).significance < 0.6663454771 then goto T35_10;
+ else goto T35_11;
+
+T35_10:
+ response = -0.0112682274;
+ goto D35;
+
+T35_11:
+ response = -0.0050036035;
+ goto D35;
+
+N35_12:
+ if age(created_at) < 1770.0000000000 then goto N35_13;
+ else goto N35_14;
+
+N35_13:
+ if attribute(user_followers_count) < 812.5000000000 then goto T35_12;
+ else goto T35_13;
+
+T35_12:
+ response = 0.0053496824;
+ goto D35;
+
+T35_13:
+ response = 0.0125479103;
+ goto D35;
+
+N35_14:
+ if fieldTermMatch(text,0).firstPosition < 1.5000000000 then goto T35_14;
+ else goto N35_15;
+
+T35_14:
+ response = 0.0037986168;
+ goto D35;
+
+N35_15:
+ if fieldMatch(text).importance < 0.7498390079 then goto N35_16;
+ else goto N35_17;
+
+N35_16:
+ if term(0).significance < 0.9974490404 then goto T35_15;
+ else goto T35_16;
+
+T35_15:
+ response = -0.0066723085;
+ goto D35;
+
+T35_16:
+ response = -0.0006707320;
+ goto D35;
+
+N35_17:
+ if attribute(user_friends_count) < 20.5000000000 then goto T35_17;
+ else goto T35_18;
+
+T35_17:
+ response = -0.0051792310;
+ goto D35;
+
+T35_18:
+ response = 0.0048035663;
+ goto D35;
+
+D35:
+
+tnscore = tnscore + response;
+
+/* Tree 37 of 80 */
+N36_1:
+ if attribute(ythl) < 0.5000000000 then goto N36_2;
+ else goto N36_12;
+
+N36_2:
+ if age(created_at) < 1830.0000000000 then goto N36_3;
+ else goto N36_9;
+
+N36_3:
+ if attribute(yst_reply_auth) < 26.5000000000 then goto N36_4;
+ else goto N36_5;
+
+N36_4:
+ if attribute(user_followers_count) < 80.5000000000 then goto T36_1;
+ else goto T36_2;
+
+T36_1:
+ response = -0.0011002979;
+ goto D36;
+
+T36_2:
+ response = 0.0120623048;
+ goto D36;
+
+N36_5:
+ if fieldMatch(text).importance < 0.4994869828 then goto N36_6;
+ else goto N36_7;
+
+N36_6:
+ if term(0).significance < 0.9910864830 then goto T36_3;
+ else goto T36_4;
+
+T36_3:
+ response = -0.0150381990;
+ goto D36;
+
+T36_4:
+ response = 0.0335272034;
+ goto D36;
+
+N36_7:
+ if attribute(yst_link_array_size) < 0.0041354997 then goto T36_5;
+ else goto N36_8;
+
+T36_5:
+ response = -0.0126645698;
+ goto D36;
+
+N36_8:
+ if fieldTermMatch(text,0).firstPosition < 2.5000000000 then goto T36_6;
+ else goto T36_7;
+
+T36_6:
+ response = 0.0132560119;
+ goto D36;
+
+T36_7:
+ response = -0.0052124596;
+ goto D36;
+
+N36_9:
+ if fieldMatch(text).importance < 0.6665325165 then goto N36_10;
+ else goto N36_11;
+
+N36_10:
+ if fieldMatch(text).tail < 7.5000000000 then goto T36_8;
+ else goto T36_9;
+
+T36_8:
+ response = -0.0136798779;
+ goto D36;
+
+T36_9:
+ response = -0.0055728098;
+ goto D36;
+
+N36_11:
+ if term(0).significance < 0.9954190254 then goto T36_10;
+ else goto T36_11;
+
+T36_10:
+ response = -0.0075794485;
+ goto D36;
+
+T36_11:
+ response = -0.0008289554;
+ goto D36;
+
+N36_12:
+ if age(created_at) < 1770.0000000000 then goto N36_13;
+ else goto N36_14;
+
+N36_13:
+ if fieldMatch(text).earliness < 0.3779760003 then goto T36_12;
+ else goto T36_13;
+
+T36_12:
+ response = -0.0012520588;
+ goto D36;
+
+T36_13:
+ response = 0.0078110682;
+ goto D36;
+
+N36_14:
+ if fieldMatch(text).significantOccurrence < 0.0547899976 then goto N36_15;
+ else goto T36_18;
+
+N36_15:
+ if match < 0.7503944635 then goto N36_16;
+ else goto T36_17;
+
+N36_16:
+ if fieldMatch(text).importance < 0.6665270329 then goto N36_17;
+ else goto T36_16;
+
+N36_17:
+ if fieldMatch(text).earliness < 0.6339714527 then goto T36_14;
+ else goto T36_15;
+
+T36_14:
+ response = -0.0338858554;
+ goto D36;
+
+T36_15:
+ response = -0.0090965850;
+ goto D36;
+
+T36_16:
+ response = -0.0037668704;
+ goto D36;
+
+T36_17:
+ response = 0.0003149666;
+ goto D36;
+
+T36_18:
+ response = 0.0028579202;
+ goto D36;
+
+D36:
+
+tnscore = tnscore + response;
+
+/* Tree 38 of 80 */
+N37_1:
+ if attribute(ythl) < 0.5000000000 then goto N37_2;
+ else goto N37_8;
+
+N37_2:
+ if age(created_at) < 1830.0000000000 then goto N37_3;
+ else goto T37_7;
+
+N37_3:
+ if fieldLength(text) < 8.5000000000 then goto T37_1;
+ else goto N37_4;
+
+T37_1:
+ response = -0.0119762189;
+ goto D37;
+
+N37_4:
+ if attribute(user_friends_count) < 15.5000000000 then goto T37_2;
+ else goto N37_5;
+
+T37_2:
+ response = 0.0150834311;
+ goto D37;
+
+N37_5:
+ if fieldMatch(text) < 0.4306970239 then goto N37_6;
+ else goto N37_7;
+
+N37_6:
+ if term(1).significance < 0.9976525307 then goto T37_3;
+ else goto T37_4;
+
+T37_3:
+ response = -0.0184997590;
+ goto D37;
+
+T37_4:
+ response = 0.0133471677;
+ goto D37;
+
+N37_7:
+ if attribute(user_friends_count) < 1375.5000000000 then goto T37_5;
+ else goto T37_6;
+
+T37_5:
+ response = 0.0005505579;
+ goto D37;
+
+T37_6:
+ response = 0.0194310919;
+ goto D37;
+
+T37_7:
+ response = -0.0058711817;
+ goto D37;
+
+N37_8:
+ if age(created_at) < 1830.0000000000 then goto N37_9;
+ else goto N37_13;
+
+N37_9:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N37_10;
+ else goto T37_12;
+
+N37_10:
+ if fieldMatch(text).tail < 3.5000000000 then goto T37_8;
+ else goto N37_11;
+
+T37_8:
+ response = -0.0081336884;
+ goto D37;
+
+N37_11:
+ if fieldMatch(text).significantOccurrence < 0.0392310023 then goto T37_9;
+ else goto N37_12;
+
+T37_9:
+ response = -0.0074424529;
+ goto D37;
+
+N37_12:
+ if fieldMatch(text).importance < 0.7463564873 then goto T37_10;
+ else goto T37_11;
+
+T37_10:
+ response = 0.0063782103;
+ goto D37;
+
+T37_11:
+ response = 0.0133409187;
+ goto D37;
+
+T37_12:
+ response = -0.0205400734;
+ goto D37;
+
+N37_13:
+ if fieldMatch(text).importance < 0.6664534807 then goto N37_14;
+ else goto N37_16;
+
+N37_14:
+ if fieldMatch(text).significantOccurrence < 0.1249409989 then goto N37_15;
+ else goto T37_15;
+
+N37_15:
+ if fieldTermMatch(text,0).firstPosition < 2.5000000000 then goto T37_13;
+ else goto T37_14;
+
+T37_13:
+ response = 0.0031349276;
+ goto D37;
+
+T37_14:
+ response = -0.0052935732;
+ goto D37;
+
+T37_15:
+ response = -0.0275582309;
+ goto D37;
+
+N37_16:
+ if age(created_at) < 63000.0000000000 then goto N37_17;
+ else goto T37_18;
+
+N37_17:
+ if fieldMatch(text) < 0.3207110167 then goto T37_16;
+ else goto T37_17;
+
+T37_16:
+ response = -0.0114880439;
+ goto D37;
+
+T37_17:
+ response = 0.0009364683;
+ goto D37;
+
+T37_18:
+ response = 0.0048748147;
+ goto D37;
+
+D37:
+
+tnscore = tnscore + response;
+
+/* Tree 39 of 80 */
+N38_1:
+ if attribute(ythl) < 0.5000000000 then goto N38_2;
+ else goto N38_9;
+
+N38_2:
+ if age(created_at) < 1830.0000000000 then goto N38_3;
+ else goto N38_6;
+
+N38_3:
+ if term(0).significance < 0.9995759726 then goto N38_4;
+ else goto T38_4;
+
+N38_4:
+ if fieldMatch(text).significantOccurrence < 0.0363755003 then goto T38_1;
+ else goto N38_5;
+
+T38_1:
+ response = -0.0157437300;
+ goto D38;
+
+N38_5:
+ if term(1).significance < 0.7788045406 then goto T38_2;
+ else goto T38_3;
+
+T38_2:
+ response = 0.0476961371;
+ goto D38;
+
+T38_3:
+ response = 0.0012138970;
+ goto D38;
+
+T38_4:
+ response = -0.0123574496;
+ goto D38;
+
+N38_6:
+ if fieldMatch(text).importance < 0.6664404869 then goto T38_5;
+ else goto N38_7;
+
+T38_5:
+ response = -0.0101736132;
+ goto D38;
+
+N38_7:
+ if term(0).significance < 0.9954395294 then goto N38_8;
+ else goto T38_8;
+
+N38_8:
+ if fieldMatch(text).orderness < 0.5357145071 then goto T38_6;
+ else goto T38_7;
+
+T38_6:
+ response = -0.0179059498;
+ goto D38;
+
+T38_7:
+ response = -0.0054873409;
+ goto D38;
+
+T38_8:
+ response = -0.0020453926;
+ goto D38;
+
+N38_9:
+ if fieldMatch(text) < 0.3861989975 then goto N38_10;
+ else goto N38_12;
+
+N38_10:
+ if term(1).significance < 0.9980044961 then goto N38_11;
+ else goto T38_11;
+
+N38_11:
+ if age(created_at) < 330.0000000000 then goto T38_9;
+ else goto T38_10;
+
+T38_9:
+ response = 0.0172467014;
+ goto D38;
+
+T38_10:
+ response = -0.0090164842;
+ goto D38;
+
+T38_11:
+ response = 0.0018725599;
+ goto D38;
+
+N38_12:
+ if age(created_at) < 1830.0000000000 then goto N38_13;
+ else goto N38_17;
+
+N38_13:
+ if attribute(yst_reply_auth) < 557.0000000000 then goto N38_14;
+ else goto T38_16;
+
+N38_14:
+ if attribute(user_statuses_count) < 7.5000000000 then goto N38_15;
+ else goto N38_16;
+
+N38_15:
+ if attribute(user_followers_count) < 147.0000000000 then goto T38_12;
+ else goto T38_13;
+
+T38_12:
+ response = -0.0007864416;
+ goto D38;
+
+T38_13:
+ response = -0.1136937768;
+ goto D38;
+
+N38_16:
+ if fieldMatch(text).importance < 0.4989485145 then goto T38_14;
+ else goto T38_15;
+
+T38_14:
+ response = -0.0179963640;
+ goto D38;
+
+T38_15:
+ response = 0.0077754184;
+ goto D38;
+
+T38_16:
+ response = -0.0216960094;
+ goto D38;
+
+N38_17:
+ if fieldMatch(text).importance < 0.6657680273 then goto T38_17;
+ else goto T38_18;
+
+T38_17:
+ response = -0.0047764491;
+ goto D38;
+
+T38_18:
+ response = 0.0026613540;
+ goto D38;
+
+D38:
+
+tnscore = tnscore + response;
+
+/* Tree 40 of 80 */
+N39_1:
+ if attribute(ythl) < 0.5000000000 then goto N39_2;
+ else goto N39_8;
+
+N39_2:
+ if fieldMatch(text).absoluteProximity < 0.0247499999 then goto T39_1;
+ else goto N39_3;
+
+T39_1:
+ response = -0.0106958848;
+ goto D39;
+
+N39_3:
+ if fieldLength(text) < 14.5000000000 then goto N39_4;
+ else goto N39_5;
+
+N39_4:
+ if fieldMatch(text).importance < 0.4994429946 then goto T39_2;
+ else goto T39_3;
+
+T39_2:
+ response = 0.0219589017;
+ goto D39;
+
+T39_3:
+ response = -0.0076174869;
+ goto D39;
+
+N39_5:
+ if fieldMatch(text).occurrence < 0.0816664994 then goto T39_4;
+ else goto N39_6;
+
+T39_4:
+ response = -0.0053254707;
+ goto D39;
+
+N39_6:
+ if fieldMatch(text).longestSequenceRatio < 0.8166664839 then goto T39_5;
+ else goto N39_7;
+
+T39_5:
+ response = -0.0034099679;
+ goto D39;
+
+N39_7:
+ if attribute(user_followers_count) < 3130.0000000000 then goto T39_6;
+ else goto T39_7;
+
+T39_6:
+ response = 0.0023795826;
+ goto D39;
+
+T39_7:
+ response = 0.0305866803;
+ goto D39;
+
+N39_8:
+ if age(created_at) < 1830.0000000000 then goto N39_9;
+ else goto N39_15;
+
+N39_9:
+ if attribute(user_followers_count) < 579.5000000000 then goto N39_10;
+ else goto N39_14;
+
+N39_10:
+ if fieldMatch(text).importance < 0.7473194599 then goto N39_11;
+ else goto T39_12;
+
+N39_11:
+ if term(1).significance < 0.9967114925 then goto N39_12;
+ else goto T39_11;
+
+N39_12:
+ if fieldMatch(text).orderness < 0.2500000000 then goto T39_8;
+ else goto N39_13;
+
+T39_8:
+ response = 0.0199100212;
+ goto D39;
+
+N39_13:
+ if age(created_at) < 630.0000000000 then goto T39_9;
+ else goto T39_10;
+
+T39_9:
+ response = 0.0001241074;
+ goto D39;
+
+T39_10:
+ response = -0.0217275952;
+ goto D39;
+
+T39_11:
+ response = 0.0052103051;
+ goto D39;
+
+T39_12:
+ response = 0.0100794994;
+ goto D39;
+
+N39_14:
+ if fieldMatch(text).weightedOccurrence < 0.0574174970 then goto T39_13;
+ else goto T39_14;
+
+T39_13:
+ response = 0.0046668785;
+ goto D39;
+
+T39_14:
+ response = 0.0152380854;
+ goto D39;
+
+N39_15:
+ if fieldMatch(text) < 0.2792814970 then goto N39_16;
+ else goto N39_17;
+
+N39_16:
+ if term(1).significance < 0.9967014790 then goto T39_15;
+ else goto T39_16;
+
+T39_15:
+ response = -0.0134490906;
+ goto D39;
+
+T39_16:
+ response = 0.0005562205;
+ goto D39;
+
+N39_17:
+ if fieldTermMatch(text,1).firstPosition < 9.5000000000 then goto T39_17;
+ else goto T39_18;
+
+T39_17:
+ response = 0.0022888891;
+ goto D39;
+
+T39_18:
+ response = -0.0019659597;
+ goto D39;
+
+D39:
+
+tnscore = tnscore + response;
+
+/* Tree 41 of 80 */
+N40_1:
+ if attribute(ythl) < 0.5000000000 then goto N40_2;
+ else goto N40_7;
+
+N40_2:
+ if fieldMatch(text).tail < 7.5000000000 then goto N40_3;
+ else goto N40_6;
+
+N40_3:
+ if fieldMatch(text).importance < 0.6656044722 then goto N40_4;
+ else goto N40_5;
+
+N40_4:
+ if match < 0.7144390345 then goto T40_1;
+ else goto T40_2;
+
+T40_1:
+ response = -0.0310311214;
+ goto D40;
+
+T40_2:
+ response = -0.0088877493;
+ goto D40;
+
+N40_5:
+ if fieldMatch(text).earliness < 0.0727514997 then goto T40_3;
+ else goto T40_4;
+
+T40_3:
+ response = 0.0149462312;
+ goto D40;
+
+T40_4:
+ response = -0.0044403174;
+ goto D40;
+
+N40_6:
+ if attribute(user_friends_count) < 14.5000000000 then goto T40_5;
+ else goto T40_6;
+
+T40_5:
+ response = 0.0092716632;
+ goto D40;
+
+T40_6:
+ response = -0.0015500378;
+ goto D40;
+
+N40_7:
+ if age(created_at) < 1830.0000000000 then goto N40_8;
+ else goto N40_16;
+
+N40_8:
+ if fieldMatch(text) < 0.2904269993 then goto N40_9;
+ else goto N40_11;
+
+N40_9:
+ if attribute(user_friends_count) < 127.0000000000 then goto T40_7;
+ else goto N40_10;
+
+T40_7:
+ response = 0.0056714395;
+ goto D40;
+
+N40_10:
+ if fieldTermMatch(text,1).firstPosition < 10.5000000000 then goto T40_8;
+ else goto T40_9;
+
+T40_8:
+ response = -0.0061125596;
+ goto D40;
+
+T40_9:
+ response = -0.0400728335;
+ goto D40;
+
+N40_11:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N40_12;
+ else goto N40_15;
+
+N40_12:
+ if fieldLength(text) < 9.5000000000 then goto T40_10;
+ else goto N40_13;
+
+T40_10:
+ response = -0.0142401520;
+ goto D40;
+
+N40_13:
+ if attribute(yst_reply_auth) < 209.5000000000 then goto T40_11;
+ else goto N40_14;
+
+T40_11:
+ response = 0.0090823293;
+ goto D40;
+
+N40_14:
+ if attribute(user_followers_count) < 1677.5000000000 then goto T40_12;
+ else goto T40_13;
+
+T40_12:
+ response = -0.0133225099;
+ goto D40;
+
+T40_13:
+ response = 0.0160376802;
+ goto D40;
+
+N40_15:
+ if fieldTermMatch(text,0).firstPosition < 6.5000000000 then goto T40_14;
+ else goto T40_15;
+
+T40_14:
+ response = 0.0020672660;
+ goto D40;
+
+T40_15:
+ response = -0.0469488746;
+ goto D40;
+
+N40_16:
+ if fieldMatch(text).significantOccurrence < 0.0543674976 then goto T40_16;
+ else goto N40_17;
+
+T40_16:
+ response = -0.0022326468;
+ goto D40;
+
+N40_17:
+ if age(created_at) < 63000.0000000000 then goto T40_17;
+ else goto T40_18;
+
+T40_17:
+ response = 0.0005594003;
+ goto D40;
+
+T40_18:
+ response = 0.0062407904;
+ goto D40;
+
+D40:
+
+tnscore = tnscore + response;
+
+/* Tree 42 of 80 */
+N41_1:
+ if attribute(ythl) < 0.5000000000 then goto N41_2;
+ else goto N41_7;
+
+N41_2:
+ if age(created_at) < 1830.0000000000 then goto N41_3;
+ else goto N41_5;
+
+N41_3:
+ if term(1).significance < 0.8159549832 then goto T41_1;
+ else goto N41_4;
+
+T41_1:
+ response = 0.0232298047;
+ goto D41;
+
+N41_4:
+ if attribute(yst_reply_auth) < 176.0000000000 then goto T41_2;
+ else goto T41_3;
+
+T41_2:
+ response = 0.0005588745;
+ goto D41;
+
+T41_3:
+ response = -0.0071946844;
+ goto D41;
+
+N41_5:
+ if fieldLength(text) < 14.5000000000 then goto T41_4;
+ else goto N41_6;
+
+T41_4:
+ response = -0.0100784734;
+ goto D41;
+
+N41_6:
+ if fieldMatch(text).occurrence < 0.1043554991 then goto T41_5;
+ else goto T41_6;
+
+T41_5:
+ response = -0.0078973837;
+ goto D41;
+
+T41_6:
+ response = -0.0021612919;
+ goto D41;
+
+N41_7:
+ if fieldMatch(text) < 0.4846429825 then goto N41_8;
+ else goto N41_13;
+
+N41_8:
+ if term(1).significance < 0.9980959892 then goto N41_9;
+ else goto T41_12;
+
+N41_9:
+ if fieldMatch(text).significantOccurrence < 0.0594874993 then goto N41_10;
+ else goto T41_11;
+
+N41_10:
+ if fieldMatch(text).importance < 0.7486180067 then goto N41_11;
+ else goto N41_12;
+
+N41_11:
+ if fieldMatch(text).head < 1.5000000000 then goto T41_7;
+ else goto T41_8;
+
+T41_7:
+ response = -0.0040433128;
+ goto D41;
+
+T41_8:
+ response = -0.0206367481;
+ goto D41;
+
+N41_12:
+ if attribute(user_followers_count) < 592.0000000000 then goto T41_9;
+ else goto T41_10;
+
+T41_9:
+ response = 0.0031591870;
+ goto D41;
+
+T41_10:
+ response = -0.0144562410;
+ goto D41;
+
+T41_11:
+ response = 0.0020151861;
+ goto D41;
+
+T41_12:
+ response = 0.0050398144;
+ goto D41;
+
+N41_13:
+ if age(created_at) < 1830.0000000000 then goto N41_14;
+ else goto N41_17;
+
+N41_14:
+ if fieldMatch(text).importance < 0.4997544885 then goto N41_15;
+ else goto T41_16;
+
+N41_15:
+ if fieldMatch(text).importance < 0.4997400045 then goto N41_16;
+ else goto T41_15;
+
+N41_16:
+ if fieldLength(text) < 10.5000000000 then goto T41_13;
+ else goto T41_14;
+
+T41_13:
+ response = -0.0498201605;
+ goto D41;
+
+T41_14:
+ response = 0.0052238898;
+ goto D41;
+
+T41_15:
+ response = -0.0305327007;
+ goto D41;
+
+T41_16:
+ response = 0.0074862309;
+ goto D41;
+
+N41_17:
+ if fieldTermMatch(text,1).firstPosition < 12.5000000000 then goto T41_17;
+ else goto T41_18;
+
+T41_17:
+ response = 0.0024835558;
+ goto D41;
+
+T41_18:
+ response = -0.0030184713;
+ goto D41;
+
+D41:
+
+tnscore = tnscore + response;
+
+/* Tree 43 of 80 */
+N42_1:
+ if fieldMatch(text) < 0.2844820023 then goto N42_2;
+ else goto N42_5;
+
+N42_2:
+ if fieldMatch(text).importance < 0.6664454937 then goto N42_3;
+ else goto N42_4;
+
+N42_3:
+ if fieldMatch(text).earliness < 0.6909815073 then goto T42_1;
+ else goto T42_2;
+
+T42_1:
+ response = -0.0202083466;
+ goto D42;
+
+T42_2:
+ response = -0.0072303460;
+ goto D42;
+
+N42_4:
+ if term(0).significance < 0.9991005063 then goto T42_3;
+ else goto T42_4;
+
+T42_3:
+ response = -0.0078735247;
+ goto D42;
+
+T42_4:
+ response = 0.0050183478;
+ goto D42;
+
+N42_5:
+ if age(created_at) < 1830.0000000000 then goto N42_6;
+ else goto N42_12;
+
+N42_6:
+ if fieldMatch(text).importance < 0.4999545217 then goto N42_7;
+ else goto N42_9;
+
+N42_7:
+ if fieldMatch(text).importance < 0.4999495149 then goto T42_5;
+ else goto N42_8;
+
+T42_5:
+ response = 0.0010054634;
+ goto D42;
+
+N42_8:
+ if attribute(yst_reply_auth) < 24.5000000000 then goto T42_6;
+ else goto T42_7;
+
+T42_6:
+ response = -0.0091545768;
+ goto D42;
+
+T42_7:
+ response = -0.0585794793;
+ goto D42;
+
+N42_9:
+ if attribute(user_followers_count) < 496.5000000000 then goto N42_10;
+ else goto T42_11;
+
+N42_10:
+ if attribute(yst_reply_auth) < 64.5000000000 then goto N42_11;
+ else goto T42_10;
+
+N42_11:
+ if attribute(yst_tweet_language) < 3583.5000000000 then goto T42_8;
+ else goto T42_9;
+
+T42_8:
+ response = 0.0060891079;
+ goto D42;
+
+T42_9:
+ response = -0.0311686318;
+ goto D42;
+
+T42_10:
+ response = -0.0029778507;
+ goto D42;
+
+T42_11:
+ response = 0.0105208941;
+ goto D42;
+
+N42_12:
+ if fieldTermMatch(text,2).firstPosition < 9.5000000000 then goto N42_13;
+ else goto N42_14;
+
+N42_13:
+ if term(2).significance < 0.9943845272 then goto T42_12;
+ else goto T42_13;
+
+T42_12:
+ response = -0.0014959657;
+ goto D42;
+
+T42_13:
+ response = 0.0041636931;
+ goto D42;
+
+N42_14:
+ if fieldMatch(text).earliness < 0.8651515245 then goto N42_15;
+ else goto T42_18;
+
+N42_15:
+ if fieldMatch(user_name).completeness < 0.9791665077 then goto N42_16;
+ else goto T42_17;
+
+N42_16:
+ if attribute(user_followers_count) < 680.5000000000 then goto T42_14;
+ else goto N42_17;
+
+T42_14:
+ response = -0.0055818030;
+ goto D42;
+
+N42_17:
+ if term(1).significance < 0.9998655319 then goto T42_15;
+ else goto T42_16;
+
+T42_15:
+ response = 0.0025943130;
+ goto D42;
+
+T42_16:
+ response = -0.0096650963;
+ goto D42;
+
+T42_17:
+ response = 0.0535512397;
+ goto D42;
+
+T42_18:
+ response = 0.0020234411;
+ goto D42;
+
+D42:
+
+tnscore = tnscore + response;
+
+/* Tree 44 of 80 */
+N43_1:
+ if attribute(ythl) < 0.5000000000 then goto N43_2;
+ else goto N43_11;
+
+N43_2:
+ if fieldMatch(text).absoluteProximity < 0.0491665006 then goto T43_1;
+ else goto N43_3;
+
+T43_1:
+ response = -0.0097379318;
+ goto D43;
+
+N43_3:
+ if age(created_at) < 1710.0000000000 then goto N43_4;
+ else goto T43_10;
+
+N43_4:
+ if term(0).significance < 0.9986090064 then goto N43_5;
+ else goto N43_8;
+
+N43_5:
+ if term(0).significance < 0.9982025027 then goto N43_6;
+ else goto T43_5;
+
+N43_6:
+ if term(2).significance < 0.9959775209 then goto T43_2;
+ else goto N43_7;
+
+T43_2:
+ response = -0.0019926524;
+ goto D43;
+
+N43_7:
+ if fieldMatch(text).importance < 0.6653964520 then goto T43_3;
+ else goto T43_4;
+
+T43_3:
+ response = -0.0116503978;
+ goto D43;
+
+T43_4:
+ response = 0.0109466166;
+ goto D43;
+
+T43_5:
+ response = 0.0189828366;
+ goto D43;
+
+N43_8:
+ if fieldMatch(text).importance < 0.6666469574 then goto N43_9;
+ else goto T43_9;
+
+N43_9:
+ if fieldMatch(text).importance < 0.4998664856 then goto T43_6;
+ else goto N43_10;
+
+T43_6:
+ response = -0.0224440709;
+ goto D43;
+
+N43_10:
+ if attribute(yst_reply_auth) < 18.5000000000 then goto T43_7;
+ else goto T43_8;
+
+T43_7:
+ response = 0.0032764517;
+ goto D43;
+
+T43_8:
+ response = -0.0102488229;
+ goto D43;
+
+T43_9:
+ response = 0.0230535914;
+ goto D43;
+
+T43_10:
+ response = -0.0042653100;
+ goto D43;
+
+N43_11:
+ if age(created_at) < 1650.0000000000 then goto N43_12;
+ else goto N43_14;
+
+N43_12:
+ if attribute(user_followers_count) < 105.5000000000 then goto T43_11;
+ else goto N43_13;
+
+T43_11:
+ response = -0.0006226235;
+ goto D43;
+
+N43_13:
+ if fieldMatch(text).importance < 0.4989485145 then goto T43_12;
+ else goto T43_13;
+
+T43_12:
+ response = -0.0224164552;
+ goto D43;
+
+T43_13:
+ response = 0.0086177649;
+ goto D43;
+
+N43_14:
+ if fieldMatch(text).significantOccurrence < 0.0392310023 then goto T43_14;
+ else goto N43_15;
+
+T43_14:
+ response = -0.0097048559;
+ goto D43;
+
+N43_15:
+ if fieldMatch(text) < 0.4509834945 then goto N43_16;
+ else goto N43_17;
+
+N43_16:
+ if term(1).significance < 0.9981694818 then goto T43_15;
+ else goto T43_16;
+
+T43_15:
+ response = -0.0078122033;
+ goto D43;
+
+T43_16:
+ response = 0.0038068440;
+ goto D43;
+
+N43_17:
+ if term(0).significance < 0.9841674566 then goto T43_17;
+ else goto T43_18;
+
+T43_17:
+ response = 0.0097951581;
+ goto D43;
+
+T43_18:
+ response = 0.0009243530;
+ goto D43;
+
+D43:
+
+tnscore = tnscore + response;
+
+/* Tree 45 of 80 */
+N44_1:
+ if attribute(ythl) < 0.5000000000 then goto N44_2;
+ else goto N44_11;
+
+N44_2:
+ if fieldMatch(text) < 0.2836354971 then goto N44_3;
+ else goto N44_4;
+
+N44_3:
+ if term(1).significance < 0.9996379614 then goto T44_1;
+ else goto T44_2;
+
+T44_1:
+ response = -0.0108495877;
+ goto D44;
+
+T44_2:
+ response = 0.0148675984;
+ goto D44;
+
+N44_4:
+ if fieldMatch(text).tail < 17.5000000000 then goto N44_5;
+ else goto T44_10;
+
+N44_5:
+ if term(2).significance < 0.9099119902 then goto N44_6;
+ else goto N44_8;
+
+N44_6:
+ if fieldMatch(text).importance < 0.4998250008 then goto N44_7;
+ else goto T44_5;
+
+N44_7:
+ if fieldMatch(text).significance < 0.4995914996 then goto T44_3;
+ else goto T44_4;
+
+T44_3:
+ response = -0.0043957101;
+ goto D44;
+
+T44_4:
+ response = 0.0231344492;
+ goto D44;
+
+T44_5:
+ response = -0.0088403820;
+ goto D44;
+
+N44_8:
+ if attribute(user_followers_count) < 506.5000000000 then goto N44_9;
+ else goto T44_9;
+
+N44_9:
+ if fieldMatch(text).importance < 0.6659464836 then goto N44_10;
+ else goto T44_8;
+
+N44_10:
+ if fieldMatch(text).importance < 0.6658334732 then goto T44_6;
+ else goto T44_7;
+
+T44_6:
+ response = -0.0003420491;
+ goto D44;
+
+T44_7:
+ response = 0.0373151929;
+ goto D44;
+
+T44_8:
+ response = -0.0037227166;
+ goto D44;
+
+T44_9:
+ response = 0.0035857585;
+ goto D44;
+
+T44_10:
+ response = 0.0053145038;
+ goto D44;
+
+N44_11:
+ if fieldMatch(text) < 0.2904269993 then goto N44_12;
+ else goto N44_15;
+
+N44_12:
+ if term(1).significance < 0.9982604980 then goto N44_13;
+ else goto T44_14;
+
+N44_13:
+ if term(2).significance < 0.9786905050 then goto T44_11;
+ else goto N44_14;
+
+T44_11:
+ response = -0.0302698107;
+ goto D44;
+
+N44_14:
+ if fieldMatch(text).occurrence < 0.1188234985 then goto T44_12;
+ else goto T44_13;
+
+T44_12:
+ response = -0.0139381667;
+ goto D44;
+
+T44_13:
+ response = 0.0000003038;
+ goto D44;
+
+T44_14:
+ response = 0.0019017619;
+ goto D44;
+
+N44_15:
+ if age(created_at) < 1830.0000000000 then goto N44_16;
+ else goto N44_17;
+
+N44_16:
+ if fieldMatch(text).occurrence < 0.0754984990 then goto T44_15;
+ else goto T44_16;
+
+T44_15:
+ response = -0.0004698689;
+ goto D44;
+
+T44_16:
+ response = 0.0075287937;
+ goto D44;
+
+N44_17:
+ if term(2).significance < 0.9939094782 then goto T44_17;
+ else goto T44_18;
+
+T44_17:
+ response = -0.0018063524;
+ goto D44;
+
+T44_18:
+ response = 0.0022813626;
+ goto D44;
+
+D44:
+
+tnscore = tnscore + response;
+
+/* Tree 46 of 80 */
+N45_1:
+ if attribute(ythl) < 0.5000000000 then goto N45_2;
+ else goto N45_12;
+
+N45_2:
+ if fieldMatch(text).absoluteProximity < 0.0537500009 then goto T45_1;
+ else goto N45_3;
+
+T45_1:
+ response = -0.0076941292;
+ goto D45;
+
+N45_3:
+ if attribute(user_friends_count) < 13.5000000000 then goto N45_4;
+ else goto N45_5;
+
+N45_4:
+ if attribute(user_followers_count) < 180.5000000000 then goto T45_2;
+ else goto T45_3;
+
+T45_2:
+ response = 0.0007683782;
+ goto D45;
+
+T45_3:
+ response = 0.0254407298;
+ goto D45;
+
+N45_5:
+ if term(1).significance < 0.9965690374 then goto N45_6;
+ else goto N45_7;
+
+N45_6:
+ if attribute(user_friends_count) < 1596.0000000000 then goto T45_4;
+ else goto T45_5;
+
+T45_4:
+ response = -0.0009906495;
+ goto D45;
+
+T45_5:
+ response = 0.0179635090;
+ goto D45;
+
+N45_7:
+ if term(1).significance < 0.9967479706 then goto T45_6;
+ else goto N45_8;
+
+T45_6:
+ response = -0.0302754090;
+ goto D45;
+
+N45_8:
+ if term(0).significance < 0.9986245036 then goto N45_9;
+ else goto N45_11;
+
+N45_9:
+ if term(0).significance < 0.9983664751 then goto T45_7;
+ else goto N45_10;
+
+T45_7:
+ response = -0.0033463225;
+ goto D45;
+
+N45_10:
+ if fieldMatch(text).importance < 0.4998250008 then goto T45_8;
+ else goto T45_9;
+
+T45_8:
+ response = 0.0252647259;
+ goto D45;
+
+T45_9:
+ response = 0.0031275837;
+ goto D45;
+
+N45_11:
+ if fieldMatch(text).importance < 0.6665315032 then goto T45_10;
+ else goto T45_11;
+
+T45_10:
+ response = -0.0095874064;
+ goto D45;
+
+T45_11:
+ response = -0.0023156280;
+ goto D45;
+
+N45_12:
+ if age(created_at) < 1830.0000000000 then goto N45_13;
+ else goto N45_15;
+
+N45_13:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N45_14;
+ else goto T45_14;
+
+N45_14:
+ if fieldMatch(text).absoluteOccurrence < 0.0122500006 then goto T45_12;
+ else goto T45_13;
+
+T45_12:
+ response = 0.0044201553;
+ goto D45;
+
+T45_13:
+ response = 0.0117690347;
+ goto D45;
+
+T45_14:
+ response = -0.0145395863;
+ goto D45;
+
+N45_15:
+ if fieldTermMatch(text,0).firstPosition < 10.5000000000 then goto N45_16;
+ else goto T45_18;
+
+N45_16:
+ if fieldMatch(user_name).significantOccurrence < 0.4166665077 then goto N45_17;
+ else goto T45_17;
+
+N45_17:
+ if match < 0.7534494996 then goto T45_15;
+ else goto T45_16;
+
+T45_15:
+ response = -0.0026555272;
+ goto D45;
+
+T45_16:
+ response = 0.0017879837;
+ goto D45;
+
+T45_17:
+ response = 0.0145260250;
+ goto D45;
+
+T45_18:
+ response = -0.0044547476;
+ goto D45;
+
+D45:
+
+tnscore = tnscore + response;
+
+/* Tree 47 of 80 */
+N46_1:
+ if attribute(ythl) < 0.5000000000 then goto N46_2;
+ else goto N46_7;
+
+N46_2:
+ if fieldMatch(text).tail < 5.5000000000 then goto N46_3;
+ else goto N46_6;
+
+N46_3:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N46_4;
+ else goto N46_5;
+
+N46_4:
+ if fieldMatch(text).completeness < 0.9522235394 then goto T46_1;
+ else goto T46_2;
+
+T46_1:
+ response = 0.0042831561;
+ goto D46;
+
+T46_2:
+ response = -0.0095824673;
+ goto D46;
+
+N46_5:
+ if fieldMatch(text).weightedOccurrence < 0.0845234990 then goto T46_3;
+ else goto T46_4;
+
+T46_3:
+ response = -0.0006026845;
+ goto D46;
+
+T46_4:
+ response = -0.0078613786;
+ goto D46;
+
+N46_6:
+ if fieldMatch(user_name).importance < 0.1997880042 then goto T46_5;
+ else goto T46_6;
+
+T46_5:
+ response = -0.0010588456;
+ goto D46;
+
+T46_6:
+ response = 0.0257470432;
+ goto D46;
+
+N46_7:
+ if fieldMatch(text) < 0.2779855132 then goto N46_8;
+ else goto N46_11;
+
+N46_8:
+ if fieldMatch(text).significantOccurrence < 0.0599530004 then goto N46_9;
+ else goto T46_10;
+
+N46_9:
+ if fieldMatch(text).importance < 0.6665154696 then goto T46_7;
+ else goto N46_10;
+
+T46_7:
+ response = -0.0140299808;
+ goto D46;
+
+N46_10:
+ if term(2).significance < 0.9674279690 then goto T46_8;
+ else goto T46_9;
+
+T46_8:
+ response = -0.0277817247;
+ goto D46;
+
+T46_9:
+ response = 0.0028355135;
+ goto D46;
+
+T46_10:
+ response = 0.0034763323;
+ goto D46;
+
+N46_11:
+ if age(created_at) < 2370.0000000000 then goto N46_12;
+ else goto N46_16;
+
+N46_12:
+ if attribute(user_statuses_count) < 10.5000000000 then goto N46_13;
+ else goto N46_14;
+
+N46_13:
+ if attribute(yst_link_array_size) < 0.0000120000 then goto T46_11;
+ else goto T46_12;
+
+T46_11:
+ response = -0.0032323662;
+ goto D46;
+
+T46_12:
+ response = -0.1013679738;
+ goto D46;
+
+N46_14:
+ if fieldTermMatch(text,1).firstPosition < 5.5000000000 then goto T46_13;
+ else goto N46_15;
+
+T46_13:
+ response = 0.0092460814;
+ goto D46;
+
+N46_15:
+ if attribute(user_statuses_count) < 72785.0000000000 then goto T46_14;
+ else goto T46_15;
+
+T46_14:
+ response = 0.0043196848;
+ goto D46;
+
+T46_15:
+ response = -0.0308937796;
+ goto D46;
+
+N46_16:
+ if fieldMatch(text).significantOccurrence < 0.0339080021 then goto T46_16;
+ else goto N46_17;
+
+T46_16:
+ response = -0.0524175559;
+ goto D46;
+
+N46_17:
+ if fieldTermMatch(text,1).firstPosition < 17.5000000000 then goto T46_17;
+ else goto T46_18;
+
+T46_17:
+ response = 0.0020057038;
+ goto D46;
+
+T46_18:
+ response = -0.0052555353;
+ goto D46;
+
+D46:
+
+tnscore = tnscore + response;
+
+/* Tree 48 of 80 */
+N47_1:
+ if age(created_at) < 1830.0000000000 then goto N47_2;
+ else goto N47_12;
+
+N47_2:
+ if fieldMatch(text).significantOccurrence < 0.0382340029 then goto N47_3;
+ else goto N47_5;
+
+N47_3:
+ if fieldMatch(text) < 0.8533049822 then goto N47_4;
+ else goto T47_3;
+
+N47_4:
+ if term(0).significance < 0.9981650114 then goto T47_1;
+ else goto T47_2;
+
+T47_1:
+ response = -0.0368343915;
+ goto D47;
+
+T47_2:
+ response = -0.0056610638;
+ goto D47;
+
+T47_3:
+ response = 0.0049994224;
+ goto D47;
+
+N47_5:
+ if fieldMatch(text).tail < 5.5000000000 then goto N47_6;
+ else goto N47_7;
+
+N47_6:
+ if fieldMatch(text).importance < 0.4989485145 then goto T47_4;
+ else goto T47_5;
+
+T47_4:
+ response = -0.0226356769;
+ goto D47;
+
+T47_5:
+ response = 0.0008778837;
+ goto D47;
+
+N47_7:
+ if term(0).significance < 0.9986659884 then goto T47_6;
+ else goto N47_8;
+
+T47_6:
+ response = 0.0085648682;
+ goto D47;
+
+N47_8:
+ if fieldMatch(text).earliness < 0.7071075439 then goto N47_9;
+ else goto N47_10;
+
+N47_9:
+ if fieldMatch(text).importance < 0.6665714979 then goto T47_7;
+ else goto T47_8;
+
+T47_7:
+ response = -0.0079427382;
+ goto D47;
+
+T47_8:
+ response = 0.0095678431;
+ goto D47;
+
+N47_10:
+ if term(0).significance < 0.9988600016 then goto T47_9;
+ else goto N47_11;
+
+T47_9:
+ response = -0.0090514905;
+ goto D47;
+
+N47_11:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto T47_10;
+ else goto T47_11;
+
+T47_10:
+ response = 0.0116548680;
+ goto D47;
+
+T47_11:
+ response = -0.0250363073;
+ goto D47;
+
+N47_12:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N47_13;
+ else goto N47_14;
+
+N47_13:
+ if fieldMatch(text).absoluteOccurrence < 0.0122500006 then goto T47_12;
+ else goto T47_13;
+
+T47_12:
+ response = -0.0063330281;
+ goto D47;
+
+T47_13:
+ response = 0.0017606811;
+ goto D47;
+
+N47_14:
+ if attribute(user_followers_count) < 521.5000000000 then goto N47_15;
+ else goto T47_18;
+
+N47_15:
+ if attribute(yst_reply_auth) < 6.5000000000 then goto N47_16;
+ else goto N47_17;
+
+N47_16:
+ if fieldLength(text) < 26.5000000000 then goto T47_14;
+ else goto T47_15;
+
+T47_14:
+ response = 0.0008975568;
+ goto D47;
+
+T47_15:
+ response = -0.0115152224;
+ goto D47;
+
+N47_17:
+ if age(created_at) < 12600.0000000000 then goto T47_16;
+ else goto T47_17;
+
+T47_16:
+ response = -0.0079829768;
+ goto D47;
+
+T47_17:
+ response = -0.0007770708;
+ goto D47;
+
+T47_18:
+ response = 0.0029601612;
+ goto D47;
+
+D47:
+
+tnscore = tnscore + response;
+
+/* Tree 49 of 80 */
+N48_1:
+ if attribute(ythl) < 0.5000000000 then goto N48_2;
+ else goto N48_8;
+
+N48_2:
+ if age(created_at) < 1830.0000000000 then goto N48_3;
+ else goto N48_5;
+
+N48_3:
+ if term(1).significance < 0.8159549832 then goto T48_1;
+ else goto N48_4;
+
+T48_1:
+ response = 0.0344152691;
+ goto D48;
+
+N48_4:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto T48_2;
+ else goto T48_3;
+
+T48_2:
+ response = -0.0000931972;
+ goto D48;
+
+T48_3:
+ response = -0.0184607413;
+ goto D48;
+
+N48_5:
+ if fieldMatch(text) < 0.8700245023 then goto N48_6;
+ else goto N48_7;
+
+N48_6:
+ if age(created_at) < 81000.0000000000 then goto T48_4;
+ else goto T48_5;
+
+T48_4:
+ response = -0.0081256943;
+ goto D48;
+
+T48_5:
+ response = -0.0024744760;
+ goto D48;
+
+N48_7:
+ if fieldMatch(text).completeness < 0.9577934742 then goto T48_6;
+ else goto T48_7;
+
+T48_6:
+ response = 0.0060102860;
+ goto D48;
+
+T48_7:
+ response = -0.0049642463;
+ goto D48;
+
+N48_8:
+ if age(created_at) < 1830.0000000000 then goto N48_9;
+ else goto N48_13;
+
+N48_9:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N48_10;
+ else goto N48_11;
+
+N48_10:
+ if fieldMatch(text).completeness < 0.9521285295 then goto T48_8;
+ else goto T48_9;
+
+T48_8:
+ response = -0.0086391614;
+ goto D48;
+
+T48_9:
+ response = 0.0063207862;
+ goto D48;
+
+N48_11:
+ if term(0).significance < 0.9989969730 then goto T48_10;
+ else goto N48_12;
+
+T48_10:
+ response = 0.0055342844;
+ goto D48;
+
+N48_12:
+ if term(0).significance < 0.9996274710 then goto T48_11;
+ else goto T48_12;
+
+T48_11:
+ response = -0.0534092780;
+ goto D48;
+
+T48_12:
+ response = 0.0110272216;
+ goto D48;
+
+N48_13:
+ if fieldMatch(text).head < 1.5000000000 then goto T48_13;
+ else goto N48_14;
+
+T48_13:
+ response = 0.0038230846;
+ goto D48;
+
+N48_14:
+ if fieldMatch(text).importance < 0.6663914919 then goto T48_14;
+ else goto N48_15;
+
+T48_14:
+ response = -0.0050094296;
+ goto D48;
+
+N48_15:
+ if term(2).significance < 0.9939705133 then goto N48_16;
+ else goto T48_18;
+
+N48_16:
+ if term(2).significance < 0.9904664755 then goto T48_15;
+ else goto N48_17;
+
+T48_15:
+ response = -0.0006273878;
+ goto D48;
+
+N48_17:
+ if term(1).significance < 0.9985420108 then goto T48_16;
+ else goto T48_17;
+
+T48_16:
+ response = -0.0060480992;
+ goto D48;
+
+T48_17:
+ response = -0.0305338408;
+ goto D48;
+
+T48_18:
+ response = 0.0023578375;
+ goto D48;
+
+D48:
+
+tnscore = tnscore + response;
+
+/* Tree 50 of 80 */
+N49_1:
+ if attribute(ythl) < 0.5000000000 then goto N49_2;
+ else goto N49_9;
+
+N49_2:
+ if age(created_at) < 1830.0000000000 then goto N49_3;
+ else goto T49_8;
+
+N49_3:
+ if term(1).significance < 0.7788045406 then goto T49_1;
+ else goto N49_4;
+
+T49_1:
+ response = 0.0500283244;
+ goto D49;
+
+N49_4:
+ if fieldMatch(text).importance < 0.4989485145 then goto N49_5;
+ else goto N49_6;
+
+N49_5:
+ if fieldMatch(text).importance < 0.4985739887 then goto T49_2;
+ else goto T49_3;
+
+T49_2:
+ response = -0.0040890196;
+ goto D49;
+
+T49_3:
+ response = -0.0504476618;
+ goto D49;
+
+N49_6:
+ if fieldMatch(text).importance < 0.4994869828 then goto T49_4;
+ else goto N49_7;
+
+T49_4:
+ response = 0.0226347107;
+ goto D49;
+
+N49_7:
+ if fieldMatch(text).importance < 0.7494400144 then goto T49_5;
+ else goto N49_8;
+
+T49_5:
+ response = -0.0013898685;
+ goto D49;
+
+N49_8:
+ if term(1).significance < 0.9945595264 then goto T49_6;
+ else goto T49_7;
+
+T49_6:
+ response = -0.0032601315;
+ goto D49;
+
+T49_7:
+ response = 0.0245669695;
+ goto D49;
+
+T49_8:
+ response = -0.0050325999;
+ goto D49;
+
+N49_9:
+ if age(created_at) < 1830.0000000000 then goto N49_10;
+ else goto N49_15;
+
+N49_10:
+ if attribute(yst_reply_auth) < 218.5000000000 then goto N49_11;
+ else goto T49_14;
+
+N49_11:
+ if attribute(user_followers_count) < 516.5000000000 then goto N49_12;
+ else goto T49_13;
+
+N49_12:
+ if attribute(yst_reply_auth) < 5.5000000000 then goto N49_13;
+ else goto T49_12;
+
+N49_13:
+ if fieldMatch(text).importance < 0.6662045121 then goto N49_14;
+ else goto T49_11;
+
+N49_14:
+ if fieldMatch(text).tail < 15.5000000000 then goto T49_9;
+ else goto T49_10;
+
+T49_9:
+ response = -0.0009372615;
+ goto D49;
+
+T49_10:
+ response = 0.0137497531;
+ goto D49;
+
+T49_11:
+ response = 0.0092949266;
+ goto D49;
+
+T49_12:
+ response = -0.0038400009;
+ goto D49;
+
+T49_13:
+ response = 0.0088974242;
+ goto D49;
+
+T49_14:
+ response = -0.0063462945;
+ goto D49;
+
+N49_15:
+ if fieldMatch(text).fieldCompleteness < 0.1863425076 then goto N49_16;
+ else goto T49_18;
+
+N49_16:
+ if attribute(user_followers_count) < 82.5000000000 then goto T49_15;
+ else goto N49_17;
+
+T49_15:
+ response = -0.0049968940;
+ goto D49;
+
+N49_17:
+ if term(1).significance < 0.9968400002 then goto T49_16;
+ else goto T49_17;
+
+T49_16:
+ response = -0.0035744289;
+ goto D49;
+
+T49_17:
+ response = 0.0022489806;
+ goto D49;
+
+T49_18:
+ response = 0.0031548406;
+ goto D49;
+
+D49:
+
+tnscore = tnscore + response;
+
+/* Tree 51 of 80 */
+N50_1:
+ if attribute(user_followers_count) < 1739.0000000000 then goto N50_2;
+ else goto N50_16;
+
+N50_2:
+ if attribute(yst_reply_auth) < 28.5000000000 then goto N50_3;
+ else goto T50_15;
+
+N50_3:
+ if attribute(user_followers_count) < 86.5000000000 then goto N50_4;
+ else goto N50_8;
+
+N50_4:
+ if fieldMatch(text).importance < 0.6655265093 then goto T50_1;
+ else goto N50_5;
+
+T50_1:
+ response = -0.0070659027;
+ goto D50;
+
+N50_5:
+ if age(created_at) < 1890.0000000000 then goto N50_6;
+ else goto T50_5;
+
+N50_6:
+ if term(1).significance < 0.9980455041 then goto T50_2;
+ else goto N50_7;
+
+T50_2:
+ response = 0.0084857763;
+ goto D50;
+
+N50_7:
+ if attribute(user_friends_count) < 38.5000000000 then goto T50_3;
+ else goto T50_4;
+
+T50_3:
+ response = 0.0082639620;
+ goto D50;
+
+T50_4:
+ response = -0.0130539890;
+ goto D50;
+
+T50_5:
+ response = -0.0029246429;
+ goto D50;
+
+N50_8:
+ if fieldMatch(user_name) < 0.3153960109 then goto N50_9;
+ else goto N50_15;
+
+N50_9:
+ if fieldMatch(text) < 0.5473589897 then goto N50_10;
+ else goto N50_11;
+
+N50_10:
+ if match < 0.5405354500 then goto T50_6;
+ else goto T50_7;
+
+T50_6:
+ response = -0.0265778832;
+ goto D50;
+
+T50_7:
+ response = -0.0016129946;
+ goto D50;
+
+N50_11:
+ if fieldMatch(text).occurrence < 0.0816664994 then goto N50_12;
+ else goto N50_14;
+
+N50_12:
+ if attribute(yst_link_array_size) < 0.0017840000 then goto T50_8;
+ else goto N50_13;
+
+T50_8:
+ response = -0.0042828731;
+ goto D50;
+
+N50_13:
+ if age(created_at) < 1830.0000000000 then goto T50_9;
+ else goto T50_10;
+
+T50_9:
+ response = 0.0199611910;
+ goto D50;
+
+T50_10:
+ response = -0.0028795459;
+ goto D50;
+
+N50_14:
+ if age(created_at) < 1710.0000000000 then goto T50_11;
+ else goto T50_12;
+
+T50_11:
+ response = 0.0085534102;
+ goto D50;
+
+T50_12:
+ response = 0.0023027773;
+ goto D50;
+
+N50_15:
+ if fieldMatch(text).significantOccurrence < 0.0327955000 then goto T50_13;
+ else goto T50_14;
+
+T50_13:
+ response = -0.0415331084;
+ goto D50;
+
+T50_14:
+ response = 0.0263336717;
+ goto D50;
+
+T50_15:
+ response = -0.0045775510;
+ goto D50;
+
+N50_16:
+ if fieldMatch(text).tail < 7.5000000000 then goto N50_17;
+ else goto T50_18;
+
+N50_17:
+ if term(2).significance < 0.8023320436 then goto T50_16;
+ else goto T50_17;
+
+T50_16:
+ response = -0.0097788860;
+ goto D50;
+
+T50_17:
+ response = 0.0038323247;
+ goto D50;
+
+T50_18:
+ response = 0.0081719743;
+ goto D50;
+
+D50:
+
+tnscore = tnscore + response;
+
+/* Tree 52 of 80 */
+N51_1:
+ if attribute(user_followers_count) < 437.5000000000 then goto N51_2;
+ else goto N51_12;
+
+N51_2:
+ if fieldMatch(text).significantOccurrence < 0.1246850044 then goto N51_3;
+ else goto T51_11;
+
+N51_3:
+ if attribute(yst_reply_auth) < 22.5000000000 then goto N51_4;
+ else goto T51_10;
+
+N51_4:
+ if fieldMatch(text) < 0.3409180045 then goto N51_5;
+ else goto N51_8;
+
+N51_5:
+ if fieldMatch(text).importance < 0.6665065289 then goto T51_1;
+ else goto N51_6;
+
+T51_1:
+ response = -0.0102582795;
+ goto D51;
+
+N51_6:
+ if term(1).significance < 0.9962199926 then goto N51_7;
+ else goto T51_4;
+
+N51_7:
+ if term(1).significance < 0.9943439960 then goto T51_2;
+ else goto T51_3;
+
+T51_2:
+ response = -0.0021503448;
+ goto D51;
+
+T51_3:
+ response = -0.0306146076;
+ goto D51;
+
+T51_4:
+ response = 0.0068595469;
+ goto D51;
+
+N51_8:
+ if fieldMatch(text).earliness < 0.9354164600 then goto N51_9;
+ else goto T51_9;
+
+N51_9:
+ if fieldMatch(user_name) < 0.5095770359 then goto N51_10;
+ else goto T51_8;
+
+N51_10:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N51_11;
+ else goto T51_7;
+
+N51_11:
+ if age(created_at) < 1830.0000000000 then goto T51_5;
+ else goto T51_6;
+
+T51_5:
+ response = 0.0020429611;
+ goto D51;
+
+T51_6:
+ response = -0.0019541993;
+ goto D51;
+
+T51_7:
+ response = -0.0132421664;
+ goto D51;
+
+T51_8:
+ response = 0.0279388980;
+ goto D51;
+
+T51_9:
+ response = 0.0043877081;
+ goto D51;
+
+T51_10:
+ response = -0.0053086844;
+ goto D51;
+
+T51_11:
+ response = -0.0109310616;
+ goto D51;
+
+N51_12:
+ if age(created_at) < 1530.0000000000 then goto N51_13;
+ else goto N51_16;
+
+N51_13:
+ if term(0).significance < 0.9986474514 then goto N51_14;
+ else goto T51_15;
+
+N51_14:
+ if term(0).significance < 0.9980379939 then goto N51_15;
+ else goto T51_14;
+
+N51_15:
+ if fieldMatch(text).earliness < 0.2440474927 then goto T51_12;
+ else goto T51_13;
+
+T51_12:
+ response = -0.0189069835;
+ goto D51;
+
+T51_13:
+ response = 0.0085132629;
+ goto D51;
+
+T51_14:
+ response = 0.0221469666;
+ goto D51;
+
+T51_15:
+ response = 0.0002679538;
+ goto D51;
+
+N51_16:
+ if attribute(user_statuses_count) < 2928.5000000000 then goto T51_16;
+ else goto N51_17;
+
+T51_16:
+ response = 0.0037037270;
+ goto D51;
+
+N51_17:
+ if match < 0.5710045099 then goto T51_17;
+ else goto T51_18;
+
+T51_17:
+ response = -0.0125698441;
+ goto D51;
+
+T51_18:
+ response = -0.0003892576;
+ goto D51;
+
+D51:
+
+tnscore = tnscore + response;
+
+/* Tree 53 of 80 */
+N52_1:
+ if fieldMatch(text).tail < 3.5000000000 then goto N52_2;
+ else goto N52_6;
+
+N52_2:
+ if attribute(yst_reply_auth) < 278.5000000000 then goto N52_3;
+ else goto T52_5;
+
+N52_3:
+ if fieldLength(text) < 24.5000000000 then goto N52_4;
+ else goto T52_4;
+
+N52_4:
+ if attribute(yst_link_array_size) < 0.0885144994 then goto N52_5;
+ else goto T52_3;
+
+N52_5:
+ if term(1).significance < 0.7788045406 then goto T52_1;
+ else goto T52_2;
+
+T52_1:
+ response = 0.0265531093;
+ goto D52;
+
+T52_2:
+ response = -0.0020354187;
+ goto D52;
+
+T52_3:
+ response = -0.0289274408;
+ goto D52;
+
+T52_4:
+ response = -0.0107078153;
+ goto D52;
+
+T52_5:
+ response = -0.0129870071;
+ goto D52;
+
+N52_6:
+ if age(created_at) < 1830.0000000000 then goto N52_7;
+ else goto N52_13;
+
+N52_7:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N52_8;
+ else goto N52_11;
+
+N52_8:
+ if fieldLength(text) < 9.5000000000 then goto T52_6;
+ else goto N52_9;
+
+T52_6:
+ response = -0.0103056223;
+ goto D52;
+
+N52_9:
+ if fieldMatch(user_name) < 0.2000829875 then goto N52_10;
+ else goto T52_9;
+
+N52_10:
+ if fieldMatch(text).importance < 0.7970539927 then goto T52_7;
+ else goto T52_8;
+
+T52_7:
+ response = 0.0035454291;
+ goto D52;
+
+T52_8:
+ response = 0.0126182815;
+ goto D52;
+
+T52_9:
+ response = 0.0233580846;
+ goto D52;
+
+N52_11:
+ if fieldLength(text) < 22.5000000000 then goto N52_12;
+ else goto T52_12;
+
+N52_12:
+ if term(0).significance < 0.9991210103 then goto T52_10;
+ else goto T52_11;
+
+T52_10:
+ response = 0.0080261443;
+ goto D52;
+
+T52_11:
+ response = -0.0294237431;
+ goto D52;
+
+T52_12:
+ response = -0.0673310696;
+ goto D52;
+
+N52_13:
+ if fieldMatch(text).importance < 0.6664265394 then goto N52_14;
+ else goto N52_15;
+
+N52_14:
+ if attribute(yst_reply_auth) < 13.5000000000 then goto T52_13;
+ else goto T52_14;
+
+T52_13:
+ response = -0.0018649103;
+ goto D52;
+
+T52_14:
+ response = -0.0077154393;
+ goto D52;
+
+N52_15:
+ if term(0).significance < 0.9997465014 then goto N52_16;
+ else goto N52_17;
+
+N52_16:
+ if attribute(user_followers_count) < 717.5000000000 then goto T52_15;
+ else goto T52_16;
+
+T52_15:
+ response = -0.0012717951;
+ goto D52;
+
+T52_16:
+ response = 0.0029037540;
+ goto D52;
+
+N52_17:
+ if fieldTermMatch(text,1).firstPosition < 15.5000000000 then goto T52_17;
+ else goto T52_18;
+
+T52_17:
+ response = 0.0107252476;
+ goto D52;
+
+T52_18:
+ response = -0.0062640981;
+ goto D52;
+
+D52:
+
+tnscore = tnscore + response;
+
+/* Tree 54 of 80 */
+N53_1:
+ if attribute(ythl) < 0.5000000000 then goto N53_2;
+ else goto N53_8;
+
+N53_2:
+ if attribute(user_followers_count) < 483.5000000000 then goto N53_3;
+ else goto N53_7;
+
+N53_3:
+ if attribute(user_statuses_count) < 491.5000000000 then goto N53_4;
+ else goto N53_6;
+
+N53_4:
+ if fieldMatch(text).tail < 3.5000000000 then goto T53_1;
+ else goto N53_5;
+
+T53_1:
+ response = -0.0057958616;
+ goto D53;
+
+N53_5:
+ if age(created_at) < 1410.0000000000 then goto T53_2;
+ else goto T53_3;
+
+T53_2:
+ response = 0.0070562486;
+ goto D53;
+
+T53_3:
+ response = -0.0007664522;
+ goto D53;
+
+N53_6:
+ if attribute(user_friends_count) < 8.5000000000 then goto T53_4;
+ else goto T53_5;
+
+T53_4:
+ response = 0.0087335556;
+ goto D53;
+
+T53_5:
+ response = -0.0058603167;
+ goto D53;
+
+N53_7:
+ if age(created_at) < 210.0000000000 then goto T53_6;
+ else goto T53_7;
+
+T53_6:
+ response = 0.0246066286;
+ goto D53;
+
+T53_7:
+ response = 0.0003480739;
+ goto D53;
+
+N53_8:
+ if fieldMatch(text) < 0.5547109842 then goto N53_9;
+ else goto N53_14;
+
+N53_9:
+ if fieldMatch(text).occurrence < 0.1348485053 then goto N53_10;
+ else goto T53_13;
+
+N53_10:
+ if attribute(user_statuses_count) < 2933.0000000000 then goto T53_8;
+ else goto N53_11;
+
+T53_8:
+ response = -0.0023188146;
+ goto D53;
+
+N53_11:
+ if attribute(yst_reply_auth) < 1.5000000000 then goto T53_9;
+ else goto N53_12;
+
+T53_9:
+ response = -0.0279839136;
+ goto D53;
+
+N53_12:
+ if fieldTermMatch(text,1).firstPosition < 12.5000000000 then goto N53_13;
+ else goto T53_12;
+
+N53_13:
+ if age(created_at) < 2730.0000000000 then goto T53_10;
+ else goto T53_11;
+
+T53_10:
+ response = 0.0153842703;
+ goto D53;
+
+T53_11:
+ response = -0.0081351611;
+ goto D53;
+
+T53_12:
+ response = -0.0240346583;
+ goto D53;
+
+T53_13:
+ response = 0.0008863957;
+ goto D53;
+
+N53_14:
+ if fieldLength(text) < 9.5000000000 then goto T53_14;
+ else goto N53_15;
+
+T53_14:
+ response = -0.0106073655;
+ goto D53;
+
+N53_15:
+ if fieldMatch(text).earliness < 0.9393379688 then goto N53_16;
+ else goto T53_18;
+
+N53_16:
+ if fieldMatch(text).occurrence < 0.0655914992 then goto T53_15;
+ else goto N53_17;
+
+T53_15:
+ response = -0.0023447985;
+ goto D53;
+
+N53_17:
+ if age(created_at) < 1950.0000000000 then goto T53_16;
+ else goto T53_17;
+
+T53_16:
+ response = 0.0063181854;
+ goto D53;
+
+T53_17:
+ response = 0.0015014161;
+ goto D53;
+
+T53_18:
+ response = 0.0074385233;
+ goto D53;
+
+D53:
+
+tnscore = tnscore + response;
+
+/* Tree 55 of 80 */
+N54_1:
+ if attribute(ythl) < 0.5000000000 then goto N54_2;
+ else goto N54_10;
+
+N54_2:
+ if fieldMatch(text) < 0.8502080441 then goto N54_3;
+ else goto N54_4;
+
+N54_3:
+ if fieldMatch(text).importance < 0.7468224764 then goto T54_1;
+ else goto T54_2;
+
+T54_1:
+ response = -0.0067962178;
+ goto D54;
+
+T54_2:
+ response = -0.0019381191;
+ goto D54;
+
+N54_4:
+ if attribute(user_statuses_count) < 16627.5000000000 then goto N54_5;
+ else goto T54_9;
+
+N54_5:
+ if attribute(user_followers_count) < 515.5000000000 then goto N54_6;
+ else goto N54_8;
+
+N54_6:
+ if fieldMatch(text).weightedOccurrence < 0.0944940001 then goto N54_7;
+ else goto T54_5;
+
+N54_7:
+ if attribute(user_statuses_count) < 109.5000000000 then goto T54_3;
+ else goto T54_4;
+
+T54_3:
+ response = 0.0079116741;
+ goto D54;
+
+T54_4:
+ response = -0.0004709728;
+ goto D54;
+
+T54_5:
+ response = -0.0057247378;
+ goto D54;
+
+N54_8:
+ if term(0).significance < 0.9991005063 then goto N54_9;
+ else goto T54_8;
+
+N54_9:
+ if age(created_at) < 1350.0000000000 then goto T54_6;
+ else goto T54_7;
+
+T54_6:
+ response = 0.0233500539;
+ goto D54;
+
+T54_7:
+ response = 0.0061626722;
+ goto D54;
+
+T54_8:
+ response = -0.0047207579;
+ goto D54;
+
+T54_9:
+ response = -0.0108453748;
+ goto D54;
+
+N54_10:
+ if fieldTermMatch(text,2).firstPosition < 8.5000000000 then goto N54_11;
+ else goto N54_13;
+
+N54_11:
+ if attribute(user_followers_count) < 16.5000000000 then goto T54_10;
+ else goto N54_12;
+
+T54_10:
+ response = -0.0054426486;
+ goto D54;
+
+N54_12:
+ if age(created_at) < 2430.0000000000 then goto T54_11;
+ else goto T54_12;
+
+T54_11:
+ response = 0.0098679265;
+ goto D54;
+
+T54_12:
+ response = 0.0032263599;
+ goto D54;
+
+N54_13:
+ if match < 0.9289889932 then goto N54_14;
+ else goto T54_18;
+
+N54_14:
+ if fieldMatch(text).tail < 3.5000000000 then goto N54_15;
+ else goto N54_16;
+
+N54_15:
+ if fieldMatch(text).occurrence < 0.0976189971 then goto T54_13;
+ else goto T54_14;
+
+T54_13:
+ response = -0.0211220829;
+ goto D54;
+
+T54_14:
+ response = -0.0037859558;
+ goto D54;
+
+N54_16:
+ if attribute(user_statuses_count) < 12392.5000000000 then goto T54_15;
+ else goto N54_17;
+
+T54_15:
+ response = 0.0000802340;
+ goto D54;
+
+N54_17:
+ if attribute(user_followers_count) < 317.5000000000 then goto T54_16;
+ else goto T54_17;
+
+T54_16:
+ response = -0.0361792845;
+ goto D54;
+
+T54_17:
+ response = -0.0049548586;
+ goto D54;
+
+T54_18:
+ response = 0.0025926241;
+ goto D54;
+
+D54:
+
+tnscore = tnscore + response;
+
+/* Tree 56 of 80 */
+N55_1:
+ if attribute(ythl) < 0.5000000000 then goto N55_2;
+ else goto N55_9;
+
+N55_2:
+ if age(created_at) < 1830.0000000000 then goto N55_3;
+ else goto T55_8;
+
+N55_3:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto T55_1;
+ else goto N55_4;
+
+T55_1:
+ response = -0.0031653440;
+ goto D55;
+
+N55_4:
+ if fieldMatch(text).importance < 0.7498974800 then goto N55_5;
+ else goto N55_8;
+
+N55_5:
+ if term(1).significance < 0.9981340170 then goto T55_2;
+ else goto N55_6;
+
+T55_2:
+ response = 0.0094378769;
+ goto D55;
+
+N55_6:
+ if term(1).significance < 0.9994934797 then goto N55_7;
+ else goto T55_5;
+
+N55_7:
+ if term(1).significance < 0.9991415143 then goto T55_3;
+ else goto T55_4;
+
+T55_3:
+ response = 0.0003721852;
+ goto D55;
+
+T55_4:
+ response = -0.0259010774;
+ goto D55;
+
+T55_5:
+ response = 0.0124789418;
+ goto D55;
+
+N55_8:
+ if term(2).significance < 0.9737149477 then goto T55_6;
+ else goto T55_7;
+
+T55_6:
+ response = -0.0202297481;
+ goto D55;
+
+T55_7:
+ response = 0.0040219128;
+ goto D55;
+
+T55_8:
+ response = -0.0045478246;
+ goto D55;
+
+N55_9:
+ if age(created_at) < 1590.0000000000 then goto N55_10;
+ else goto N55_12;
+
+N55_10:
+ if fieldTermMatch(text,1).firstPosition < 7.5000000000 then goto T55_9;
+ else goto N55_11;
+
+T55_9:
+ response = 0.0081043971;
+ goto D55;
+
+N55_11:
+ if fieldMatch(text) < 0.8496830463 then goto T55_10;
+ else goto T55_11;
+
+T55_10:
+ response = -0.0035718865;
+ goto D55;
+
+T55_11:
+ response = 0.0048047847;
+ goto D55;
+
+N55_12:
+ if fieldMatch(text).head < 1.5000000000 then goto N55_13;
+ else goto N55_14;
+
+N55_13:
+ if fieldLength(text) < 8.5000000000 then goto T55_12;
+ else goto T55_13;
+
+T55_12:
+ response = -0.0224047487;
+ goto D55;
+
+T55_13:
+ response = 0.0036563528;
+ goto D55;
+
+N55_14:
+ if fieldMatch(text) < 0.2774904966 then goto N55_15;
+ else goto N55_16;
+
+N55_15:
+ if term(0).significance < 0.9978594780 then goto T55_14;
+ else goto T55_15;
+
+T55_14:
+ response = -0.0142881937;
+ goto D55;
+
+T55_15:
+ response = -0.0014981238;
+ goto D55;
+
+N55_16:
+ if attribute(yst_reply_auth) < 70.5000000000 then goto T55_16;
+ else goto N55_17;
+
+T55_16:
+ response = 0.0001609764;
+ goto D55;
+
+N55_17:
+ if attribute(user_followers_count) < 397.5000000000 then goto T55_17;
+ else goto T55_18;
+
+T55_17:
+ response = -0.0180919900;
+ goto D55;
+
+T55_18:
+ response = -0.0021113953;
+ goto D55;
+
+D55:
+
+tnscore = tnscore + response;
+
+/* Tree 57 of 80 */
+N56_1:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N56_2;
+ else goto N56_14;
+
+N56_2:
+ if fieldTermMatch(text,0).firstPosition < 14.5000000000 then goto N56_3;
+ else goto N56_13;
+
+N56_3:
+ if fieldMatch(text).absoluteOccurrence < 0.0136665003 then goto N56_4;
+ else goto T56_11;
+
+N56_4:
+ if fieldMatch(text).significance < 0.7493325472 then goto N56_5;
+ else goto T56_10;
+
+N56_5:
+ if age(created_at) < 1830.0000000000 then goto N56_6;
+ else goto T56_9;
+
+N56_6:
+ if attribute(user_statuses_count) < 5.5000000000 then goto T56_1;
+ else goto N56_7;
+
+T56_1:
+ response = -0.0282346598;
+ goto D56;
+
+N56_7:
+ if term(0).significance < 0.9725670218 then goto N56_8;
+ else goto N56_9;
+
+N56_8:
+ if term(1).significance < 0.9792364836 then goto T56_2;
+ else goto T56_3;
+
+T56_2:
+ response = -0.0784237022;
+ goto D56;
+
+T56_3:
+ response = -0.0059997941;
+ goto D56;
+
+N56_9:
+ if fieldTermMatch(text,1).firstPosition < 10.5000000000 then goto T56_4;
+ else goto N56_10;
+
+T56_4:
+ response = 0.0078147345;
+ goto D56;
+
+N56_10:
+ if match < 0.9353330135 then goto T56_5;
+ else goto N56_11;
+
+T56_5:
+ response = -0.0019768224;
+ goto D56;
+
+N56_11:
+ if fieldMatch(text).earliness < 0.9198719859 then goto N56_12;
+ else goto T56_8;
+
+N56_12:
+ if attribute(yst_link_array_size) < 0.0002880000 then goto T56_6;
+ else goto T56_7;
+
+T56_6:
+ response = -0.0041019966;
+ goto D56;
+
+T56_7:
+ response = 0.0264356088;
+ goto D56;
+
+T56_8:
+ response = 0.0005631411;
+ goto D56;
+
+T56_9:
+ response = -0.0050485104;
+ goto D56;
+
+T56_10:
+ response = 0.0082039036;
+ goto D56;
+
+T56_11:
+ response = 0.0051839504;
+ goto D56;
+
+N56_13:
+ if term(0).significance < 0.9982025027 then goto T56_12;
+ else goto T56_13;
+
+T56_12:
+ response = -0.0142960741;
+ goto D56;
+
+T56_13:
+ response = -0.0033997299;
+ goto D56;
+
+N56_14:
+ if attribute(yst_tweet_language) < 3587.5000000000 then goto N56_15;
+ else goto T56_18;
+
+N56_15:
+ if age(created_at) < 1770.0000000000 then goto T56_14;
+ else goto N56_16;
+
+T56_14:
+ response = 0.0049084121;
+ goto D56;
+
+N56_16:
+ if attribute(ythl) < 0.5000000000 then goto T56_15;
+ else goto N56_17;
+
+T56_15:
+ response = -0.0020172224;
+ goto D56;
+
+N56_17:
+ if attribute(yst_link_array_size) < 0.0056419997 then goto T56_16;
+ else goto T56_17;
+
+T56_16:
+ response = 0.0029607752;
+ goto D56;
+
+T56_17:
+ response = -0.0028856329;
+ goto D56;
+
+T56_18:
+ response = -0.0261107048;
+ goto D56;
+
+D56:
+
+tnscore = tnscore + response;
+
+/* Tree 58 of 80 */
+N57_1:
+ if fieldMatch(text) < 0.2898915112 then goto N57_2;
+ else goto N57_4;
+
+N57_2:
+ if fieldMatch(text).head < 1.5000000000 then goto T57_1;
+ else goto N57_3;
+
+T57_1:
+ response = 0.0014847907;
+ goto D57;
+
+N57_3:
+ if fieldMatch(text).importance < 0.6663334966 then goto T57_2;
+ else goto T57_3;
+
+T57_2:
+ response = -0.0119151319;
+ goto D57;
+
+T57_3:
+ response = -0.0040477723;
+ goto D57;
+
+N57_4:
+ if attribute(user_followers_count) < 519.5000000000 then goto N57_5;
+ else goto N57_15;
+
+N57_5:
+ if term(2).significance < 0.9943574667 then goto N57_6;
+ else goto N57_14;
+
+N57_6:
+ if age(created_at) < 1830.0000000000 then goto N57_7;
+ else goto N57_11;
+
+N57_7:
+ if attribute(user_statuses_count) < 1256.0000000000 then goto T57_4;
+ else goto N57_8;
+
+T57_4:
+ response = 0.0034757752;
+ goto D57;
+
+N57_8:
+ if attribute(user_statuses_count) < 1397.0000000000 then goto T57_5;
+ else goto N57_9;
+
+T57_5:
+ response = -0.0279955298;
+ goto D57;
+
+N57_9:
+ if fieldMatch(text).tail < 15.5000000000 then goto T57_6;
+ else goto N57_10;
+
+T57_6:
+ response = -0.0046990807;
+ goto D57;
+
+N57_10:
+ if match < 0.9350855350 then goto T57_7;
+ else goto T57_8;
+
+T57_7:
+ response = -0.0040285710;
+ goto D57;
+
+T57_8:
+ response = 0.0289488138;
+ goto D57;
+
+N57_11:
+ if term(0).significance < 0.9971770048 then goto N57_12;
+ else goto N57_13;
+
+N57_12:
+ if term(1).significance < 0.9985420108 then goto T57_9;
+ else goto T57_10;
+
+T57_9:
+ response = -0.0058304095;
+ goto D57;
+
+T57_10:
+ response = -0.0172909110;
+ goto D57;
+
+N57_13:
+ if fieldTermMatch(text,1).firstPosition < 17.5000000000 then goto T57_11;
+ else goto T57_12;
+
+T57_11:
+ response = 0.0053683293;
+ goto D57;
+
+T57_12:
+ response = -0.0057448395;
+ goto D57;
+
+N57_14:
+ if attribute(yst_reply_auth) < 236.5000000000 then goto T57_13;
+ else goto T57_14;
+
+T57_13:
+ response = 0.0018130071;
+ goto D57;
+
+T57_14:
+ response = -0.0097022524;
+ goto D57;
+
+N57_15:
+ if fieldMatch(text).importance < 0.4989485145 then goto N57_16;
+ else goto N57_17;
+
+N57_16:
+ if fieldMatch(text).importance < 0.4986034930 then goto T57_15;
+ else goto T57_16;
+
+T57_15:
+ response = -0.0030858773;
+ goto D57;
+
+T57_16:
+ response = -0.0594499645;
+ goto D57;
+
+N57_17:
+ if fieldTermMatch(text,0).firstPosition < 13.5000000000 then goto T57_17;
+ else goto T57_18;
+
+T57_17:
+ response = 0.0046047026;
+ goto D57;
+
+T57_18:
+ response = -0.0041554082;
+ goto D57;
+
+D57:
+
+tnscore = tnscore + response;
+
+/* Tree 59 of 80 */
+N58_1:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N58_2;
+ else goto N58_12;
+
+N58_2:
+ if fieldMatch(text).earliness < 0.6381819844 then goto N58_3;
+ else goto N58_10;
+
+N58_3:
+ if term(0).significance < 0.9928579926 then goto N58_4;
+ else goto N58_6;
+
+N58_4:
+ if attribute(yst_reply_auth) < 3.5000000000 then goto N58_5;
+ else goto T58_3;
+
+N58_5:
+ if attribute(user_followers_count) < 18.5000000000 then goto T58_1;
+ else goto T58_2;
+
+T58_1:
+ response = -0.0266701398;
+ goto D58;
+
+T58_2:
+ response = 0.0020537489;
+ goto D58;
+
+T58_3:
+ response = -0.0226862335;
+ goto D58;
+
+N58_6:
+ if fieldMatch(text).importance < 0.4997234941 then goto N58_7;
+ else goto N58_8;
+
+N58_7:
+ if attribute(yst_link_array_size) < 0.0005030000 then goto T58_4;
+ else goto T58_5;
+
+T58_4:
+ response = 0.0166948856;
+ goto D58;
+
+T58_5:
+ response = -0.0034104232;
+ goto D58;
+
+N58_8:
+ if age(created_at) < 270.0000000000 then goto T58_6;
+ else goto N58_9;
+
+T58_6:
+ response = 0.0057923479;
+ goto D58;
+
+N58_9:
+ if fieldMatch(text).importance < 0.4997584820 then goto T58_7;
+ else goto T58_8;
+
+T58_7:
+ response = -0.0291936745;
+ goto D58;
+
+T58_8:
+ response = -0.0059528701;
+ goto D58;
+
+N58_10:
+ if term(0).significance < 0.8512874842 then goto T58_9;
+ else goto N58_11;
+
+T58_9:
+ response = -0.0436401448;
+ goto D58;
+
+N58_11:
+ if age(created_at) < 1830.0000000000 then goto T58_10;
+ else goto T58_11;
+
+T58_10:
+ response = 0.0039537575;
+ goto D58;
+
+T58_11:
+ response = -0.0025333564;
+ goto D58;
+
+N58_12:
+ if age(created_at) < 1830.0000000000 then goto N58_13;
+ else goto N58_17;
+
+N58_13:
+ if attribute(yst_reply_auth) < 158.5000000000 then goto N58_14;
+ else goto T58_16;
+
+N58_14:
+ if attribute(yst_reply_auth) < 149.5000000000 then goto N58_15;
+ else goto T58_15;
+
+N58_15:
+ if term(2).significance < 0.9389865398 then goto T58_12;
+ else goto N58_16;
+
+T58_12:
+ response = -0.0024084291;
+ goto D58;
+
+N58_16:
+ if term(2).significance < 0.9853805304 then goto T58_13;
+ else goto T58_14;
+
+T58_13:
+ response = 0.0179593679;
+ goto D58;
+
+T58_14:
+ response = 0.0053348502;
+ goto D58;
+
+T58_15:
+ response = 0.0511133688;
+ goto D58;
+
+T58_16:
+ response = -0.0040868819;
+ goto D58;
+
+N58_17:
+ if attribute(user_followers_count) < 172.5000000000 then goto T58_17;
+ else goto T58_18;
+
+T58_17:
+ response = -0.0015268821;
+ goto D58;
+
+T58_18:
+ response = 0.0018115080;
+ goto D58;
+
+D58:
+
+tnscore = tnscore + response;
+
+/* Tree 60 of 80 */
+N59_1:
+ if fieldMatch(text) < 0.4593589902 then goto N59_2;
+ else goto N59_6;
+
+N59_2:
+ if term(0).significance < 0.9975925088 then goto N59_3;
+ else goto T59_5;
+
+N59_3:
+ if fieldMatch(text).occurrence < 0.1188234985 then goto N59_4;
+ else goto T59_4;
+
+N59_4:
+ if fieldMatch(text).earliness < 0.9486839771 then goto N59_5;
+ else goto T59_3;
+
+N59_5:
+ if term(0).significance < 0.9817185402 then goto T59_1;
+ else goto T59_2;
+
+T59_1:
+ response = -0.0335831380;
+ goto D59;
+
+T59_2:
+ response = -0.0111131767;
+ goto D59;
+
+T59_3:
+ response = 0.0027474033;
+ goto D59;
+
+T59_4:
+ response = -0.0042505836;
+ goto D59;
+
+T59_5:
+ response = -0.0003239219;
+ goto D59;
+
+N59_6:
+ if fieldLength(text) < 9.5000000000 then goto N59_7;
+ else goto N59_9;
+
+N59_7:
+ if term(1).significance < 0.9965380430 then goto N59_8;
+ else goto T59_8;
+
+N59_8:
+ if fieldMatch(text).importance < 0.6660010219 then goto T59_6;
+ else goto T59_7;
+
+T59_6:
+ response = 0.0283916092;
+ goto D59;
+
+T59_7:
+ response = -0.0014717607;
+ goto D59;
+
+T59_8:
+ response = -0.0092449117;
+ goto D59;
+
+N59_9:
+ if age(created_at) < 1770.0000000000 then goto N59_10;
+ else goto N59_16;
+
+N59_10:
+ if attribute(user_statuses_count) < 5.5000000000 then goto N59_11;
+ else goto N59_12;
+
+N59_11:
+ if attribute(yst_link_array_size) < 0.0000120000 then goto T59_9;
+ else goto T59_10;
+
+T59_9:
+ response = -0.0060483091;
+ goto D59;
+
+T59_10:
+ response = -0.0952850231;
+ goto D59;
+
+N59_12:
+ if term(2).significance < 0.9389865398 then goto N59_13;
+ else goto T59_15;
+
+N59_13:
+ if attribute(user_statuses_count) < 27302.5000000000 then goto N59_14;
+ else goto N59_15;
+
+N59_14:
+ if term(0).significance < 0.9943234921 then goto T59_11;
+ else goto T59_12;
+
+T59_11:
+ response = -0.0055188147;
+ goto D59;
+
+T59_12:
+ response = 0.0033202683;
+ goto D59;
+
+N59_15:
+ if attribute(yst_reply_auth) < 19.5000000000 then goto T59_13;
+ else goto T59_14;
+
+T59_13:
+ response = 0.0086451663;
+ goto D59;
+
+T59_14:
+ response = -0.0362288139;
+ goto D59;
+
+T59_15:
+ response = 0.0064556248;
+ goto D59;
+
+N59_16:
+ if fieldMatch(text).occurrence < 0.0784614980 then goto T59_16;
+ else goto N59_17;
+
+T59_16:
+ response = -0.0044655704;
+ goto D59;
+
+N59_17:
+ if fieldTermMatch(text,0).firstPosition < 0.5000000000 then goto T59_17;
+ else goto T59_18;
+
+T59_17:
+ response = 0.0052075545;
+ goto D59;
+
+T59_18:
+ response = 0.0000792364;
+ goto D59;
+
+D59:
+
+tnscore = tnscore + response;
+
+/* Tree 61 of 80 */
+N60_1:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N60_2;
+ else goto N60_9;
+
+N60_2:
+ if term(0).significance < 0.8512874842 then goto N60_3;
+ else goto N60_4;
+
+N60_3:
+ if term(2).significance < 0.9936410189 then goto T60_1;
+ else goto T60_2;
+
+T60_1:
+ response = -0.0904883380;
+ goto D60;
+
+T60_2:
+ response = -0.0089960419;
+ goto D60;
+
+N60_4:
+ if fieldMatch(text).importance < 0.4999005198 then goto N60_5;
+ else goto N60_6;
+
+N60_5:
+ if attribute(user_friends_count) < 7.5000000000 then goto T60_3;
+ else goto T60_4;
+
+T60_3:
+ response = 0.0105675969;
+ goto D60;
+
+T60_4:
+ response = -0.0002271753;
+ goto D60;
+
+N60_6:
+ if fieldMatch(text).importance < 0.4999030232 then goto T60_5;
+ else goto N60_7;
+
+T60_5:
+ response = -0.0345873832;
+ goto D60;
+
+N60_7:
+ if fieldMatch(text).earliness < 0.7165180445 then goto N60_8;
+ else goto T60_8;
+
+N60_8:
+ if term(1).significance < 0.9967604876 then goto T60_6;
+ else goto T60_7;
+
+T60_6:
+ response = -0.0115456455;
+ goto D60;
+
+T60_7:
+ response = -0.0042372928;
+ goto D60;
+
+T60_8:
+ response = -0.0012917255;
+ goto D60;
+
+N60_9:
+ if age(created_at) < 1830.0000000000 then goto N60_10;
+ else goto N60_16;
+
+N60_10:
+ if attribute(yst_reply_auth) < 26.5000000000 then goto T60_9;
+ else goto N60_11;
+
+T60_9:
+ response = 0.0072943729;
+ goto D60;
+
+N60_11:
+ if attribute(user_followers_count) < 3340.0000000000 then goto N60_12;
+ else goto T60_15;
+
+N60_12:
+ if fieldMatch(text).importance < 0.6665514708 then goto N60_13;
+ else goto N60_14;
+
+N60_13:
+ if fieldMatch(text).significance < 0.6661305428 then goto T60_10;
+ else goto T60_11;
+
+T60_10:
+ response = -0.0013967805;
+ goto D60;
+
+T60_11:
+ response = -0.0212892006;
+ goto D60;
+
+N60_14:
+ if attribute(yst_reply_auth) < 48.5000000000 then goto T60_12;
+ else goto N60_15;
+
+T60_12:
+ response = -0.0095375798;
+ goto D60;
+
+N60_15:
+ if fieldMatch(text).importance < 0.7498970032 then goto T60_13;
+ else goto T60_14;
+
+T60_13:
+ response = 0.0155022730;
+ goto D60;
+
+T60_14:
+ response = -0.0026741211;
+ goto D60;
+
+T60_15:
+ response = 0.0188293335;
+ goto D60;
+
+N60_16:
+ if attribute(user_followers_count) < 72.5000000000 then goto T60_16;
+ else goto N60_17;
+
+T60_16:
+ response = -0.0024566452;
+ goto D60;
+
+N60_17:
+ if attribute(yst_reply_auth) < 50.5000000000 then goto T60_17;
+ else goto T60_18;
+
+T60_17:
+ response = 0.0027297795;
+ goto D60;
+
+T60_18:
+ response = -0.0018218561;
+ goto D60;
+
+D60:
+
+tnscore = tnscore + response;
+
+/* Tree 62 of 80 */
+N61_1:
+ if fieldMatch(text).earliness < 0.9298025370 then goto N61_2;
+ else goto N61_15;
+
+N61_2:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N61_3;
+ else goto N61_9;
+
+N61_3:
+ if fieldMatch(user_name).importance < 0.3325359821 then goto N61_4;
+ else goto N61_8;
+
+N61_4:
+ if term(1).significance < 0.9972745180 then goto T61_1;
+ else goto N61_5;
+
+T61_1:
+ response = -0.0074074056;
+ goto D61;
+
+N61_5:
+ if attribute(user_statuses_count) < 5.5000000000 then goto N61_6;
+ else goto N61_7;
+
+N61_6:
+ if fieldMatch(text) < 0.8386005163 then goto T61_2;
+ else goto T61_3;
+
+T61_2:
+ response = 0.0032054405;
+ goto D61;
+
+T61_3:
+ response = -0.0402836718;
+ goto D61;
+
+N61_7:
+ if fieldMatch(text).earliness < 0.6381819844 then goto T61_4;
+ else goto T61_5;
+
+T61_4:
+ response = -0.0037883000;
+ goto D61;
+
+T61_5:
+ response = 0.0008924796;
+ goto D61;
+
+N61_8:
+ if attribute(user_statuses_count) < 3055.5000000000 then goto T61_6;
+ else goto T61_7;
+
+T61_6:
+ response = 0.0281949081;
+ goto D61;
+
+T61_7:
+ response = -0.0048301755;
+ goto D61;
+
+N61_9:
+ if age(created_at) < 1770.0000000000 then goto T61_8;
+ else goto N61_10;
+
+T61_8:
+ response = 0.0037513988;
+ goto D61;
+
+N61_10:
+ if attribute(yst_reply_auth) < 71.5000000000 then goto N61_11;
+ else goto T61_14;
+
+N61_11:
+ if attribute(user_followers_count) < 68.5000000000 then goto T61_9;
+ else goto N61_12;
+
+T61_9:
+ response = -0.0032099164;
+ goto D61;
+
+N61_12:
+ if fieldMatch(text).weightedOccurrence < 0.0510035008 then goto N61_13;
+ else goto N61_14;
+
+N61_13:
+ if term(2).significance < 0.9982124567 then goto T61_10;
+ else goto T61_11;
+
+T61_10:
+ response = -0.0067952208;
+ goto D61;
+
+T61_11:
+ response = 0.0019990379;
+ goto D61;
+
+N61_14:
+ if fieldTermMatch(text,1).firstPosition < 3.5000000000 then goto T61_12;
+ else goto T61_13;
+
+T61_12:
+ response = -0.0014223313;
+ goto D61;
+
+T61_13:
+ response = 0.0056390354;
+ goto D61;
+
+T61_14:
+ response = -0.0052017914;
+ goto D61;
+
+N61_15:
+ if fieldMatch(text).tail < 8.5000000000 then goto N61_16;
+ else goto N61_17;
+
+N61_16:
+ if fieldTermMatch(text,1).firstPosition < 21.5000000000 then goto T61_15;
+ else goto T61_16;
+
+T61_15:
+ response = 0.0000776589;
+ goto D61;
+
+T61_16:
+ response = -0.0251022513;
+ goto D61;
+
+N61_17:
+ if age(created_at) < 1770.0000000000 then goto T61_17;
+ else goto T61_18;
+
+T61_17:
+ response = 0.0107898472;
+ goto D61;
+
+T61_18:
+ response = 0.0035281034;
+ goto D61;
+
+D61:
+
+tnscore = tnscore + response;
+
+/* Tree 63 of 80 */
+N62_1:
+ if fieldMatch(text) < 0.4136639833 then goto N62_2;
+ else goto N62_6;
+
+N62_2:
+ if fieldMatch(text).earliness < 0.6228449941 then goto T62_1;
+ else goto N62_3;
+
+T62_1:
+ response = -0.0108022756;
+ goto D62;
+
+N62_3:
+ if term(0).significance < 0.9806225300 then goto N62_4;
+ else goto T62_5;
+
+N62_4:
+ if fieldMatch(text).importance < 0.7393674850 then goto N62_5;
+ else goto T62_4;
+
+N62_5:
+ if term(1).significance < 0.9957709908 then goto T62_2;
+ else goto T62_3;
+
+T62_2:
+ response = -0.0755310801;
+ goto D62;
+
+T62_3:
+ response = -0.0197330906;
+ goto D62;
+
+T62_4:
+ response = -0.0036348641;
+ goto D62;
+
+T62_5:
+ response = -0.0013106391;
+ goto D62;
+
+N62_6:
+ if fieldLength(text) < 9.5000000000 then goto N62_7;
+ else goto N62_8;
+
+N62_7:
+ if term(1).significance < 0.9965360165 then goto T62_6;
+ else goto T62_7;
+
+T62_6:
+ response = 0.0028564297;
+ goto D62;
+
+T62_7:
+ response = -0.0093397644;
+ goto D62;
+
+N62_8:
+ if fieldMatch(text).earliness < 0.9321835041 then goto N62_9;
+ else goto T62_18;
+
+N62_9:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N62_10;
+ else goto T62_17;
+
+N62_10:
+ if age(created_at) < 1830.0000000000 then goto N62_11;
+ else goto N62_16;
+
+N62_11:
+ if attribute(user_followers_count) < 40.5000000000 then goto N62_12;
+ else goto N62_14;
+
+N62_12:
+ if fieldMatch(text).importance < 0.7464824915 then goto N62_13;
+ else goto T62_10;
+
+N62_13:
+ if fieldMatch(text).importance < 0.7373905182 then goto T62_8;
+ else goto T62_9;
+
+T62_8:
+ response = -0.0053736193;
+ goto D62;
+
+T62_9:
+ response = -0.0642881769;
+ goto D62;
+
+T62_10:
+ response = 0.0068053454;
+ goto D62;
+
+N62_14:
+ if term(0).significance < 0.9995554686 then goto N62_15;
+ else goto T62_13;
+
+N62_15:
+ if fieldMatch(text).tail < 21.5000000000 then goto T62_11;
+ else goto T62_12;
+
+T62_11:
+ response = 0.0051189016;
+ goto D62;
+
+T62_12:
+ response = -0.0221854397;
+ goto D62;
+
+T62_13:
+ response = -0.0053750670;
+ goto D62;
+
+N62_16:
+ if term(2).significance < 0.9946069717 then goto T62_14;
+ else goto N62_17;
+
+T62_14:
+ response = -0.0032296610;
+ goto D62;
+
+N62_17:
+ if attribute(yst_link_array_size) < 0.0122835003 then goto T62_15;
+ else goto T62_16;
+
+T62_15:
+ response = 0.0019118484;
+ goto D62;
+
+T62_16:
+ response = -0.0049101186;
+ goto D62;
+
+T62_17:
+ response = -0.0109713480;
+ goto D62;
+
+T62_18:
+ response = 0.0042358084;
+ goto D62;
+
+D62:
+
+tnscore = tnscore + response;
+
+/* Tree 64 of 80 */
+N63_1:
+ if attribute(ythl) < 0.5000000000 then goto N63_2;
+ else goto N63_12;
+
+N63_2:
+ if age(created_at) < 1830.0000000000 then goto N63_3;
+ else goto N63_9;
+
+N63_3:
+ if term(1).significance < 0.7788045406 then goto T63_1;
+ else goto N63_4;
+
+T63_1:
+ response = 0.0501539771;
+ goto D63;
+
+N63_4:
+ if attribute(user_statuses_count) < 8152.0000000000 then goto N63_5;
+ else goto T63_7;
+
+N63_5:
+ if attribute(user_followers_count) < 443.5000000000 then goto T63_2;
+ else goto N63_6;
+
+T63_2:
+ response = -0.0007685040;
+ goto D63;
+
+N63_6:
+ if attribute(yst_link_array_size) < 0.0250005014 then goto N63_7;
+ else goto N63_8;
+
+N63_7:
+ if fieldMatch(text).importance < 0.6649650335 then goto T63_3;
+ else goto T63_4;
+
+T63_3:
+ response = -0.0040469549;
+ goto D63;
+
+T63_4:
+ response = 0.0165434132;
+ goto D63;
+
+N63_8:
+ if attribute(user_followers_count) < 1371.0000000000 then goto T63_5;
+ else goto T63_6;
+
+T63_5:
+ response = 0.0529050928;
+ goto D63;
+
+T63_6:
+ response = 0.0097057892;
+ goto D63;
+
+T63_7:
+ response = -0.0074806913;
+ goto D63;
+
+N63_9:
+ if fieldLength(text) < 14.5000000000 then goto T63_8;
+ else goto N63_10;
+
+T63_8:
+ response = -0.0075857569;
+ goto D63;
+
+N63_10:
+ if fieldMatch(text).significantOccurrence < 0.0476144999 then goto T63_9;
+ else goto N63_11;
+
+T63_9:
+ response = -0.0052872985;
+ goto D63;
+
+N63_11:
+ if attribute(yst_reply_auth) < 33.5000000000 then goto T63_10;
+ else goto T63_11;
+
+T63_10:
+ response = 0.0021247688;
+ goto D63;
+
+T63_11:
+ response = -0.0043264990;
+ goto D63;
+
+N63_12:
+ if fieldTermMatch(text,1).firstPosition < 12.5000000000 then goto N63_13;
+ else goto N63_14;
+
+N63_13:
+ if fieldMatch(text) < 0.5566675067 then goto T63_12;
+ else goto T63_13;
+
+T63_12:
+ response = -0.0012160193;
+ goto D63;
+
+T63_13:
+ response = 0.0034883449;
+ goto D63;
+
+N63_14:
+ if attribute(yst_reply_auth) < 20.5000000000 then goto N63_15;
+ else goto N63_16;
+
+N63_15:
+ if attribute(user_followers_count) < 213.5000000000 then goto T63_14;
+ else goto T63_15;
+
+T63_14:
+ response = -0.0026673084;
+ goto D63;
+
+T63_15:
+ response = 0.0029249608;
+ goto D63;
+
+N63_16:
+ if attribute(user_statuses_count) < 7554.0000000000 then goto T63_16;
+ else goto N63_17;
+
+T63_16:
+ response = -0.0021650101;
+ goto D63;
+
+N63_17:
+ if term(0).significance < 0.9956585169 then goto T63_17;
+ else goto T63_18;
+
+T63_17:
+ response = -0.0326924993;
+ goto D63;
+
+T63_18:
+ response = -0.0081739014;
+ goto D63;
+
+D63:
+
+tnscore = tnscore + response;
+
+/* Tree 65 of 80 */
+N64_1:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N64_2;
+ else goto N64_13;
+
+N64_2:
+ if term(0).significance < 0.8512874842 then goto N64_3;
+ else goto N64_4;
+
+N64_3:
+ if term(2).significance < 0.9981595278 then goto T64_1;
+ else goto T64_2;
+
+T64_1:
+ response = -0.0839195878;
+ goto D64;
+
+T64_2:
+ response = -0.0112331884;
+ goto D64;
+
+N64_4:
+ if fieldMatch(text).importance < 0.4998250008 then goto N64_5;
+ else goto N64_10;
+
+N64_5:
+ if term(0).significance < 0.9983350039 then goto N64_6;
+ else goto T64_8;
+
+N64_6:
+ if fieldMatch(text).importance < 0.4989485145 then goto N64_7;
+ else goto N64_8;
+
+N64_7:
+ if fieldMatch(text).importance < 0.4988874793 then goto T64_3;
+ else goto T64_4;
+
+T64_3:
+ response = -0.0037133544;
+ goto D64;
+
+T64_4:
+ response = -0.1248149534;
+ goto D64;
+
+N64_8:
+ if fieldMatch(text).importance < 0.4997234941 then goto T64_5;
+ else goto N64_9;
+
+T64_5:
+ response = 0.0058967543;
+ goto D64;
+
+N64_9:
+ if attribute(yst_reply_auth) < 0.5000000000 then goto T64_6;
+ else goto T64_7;
+
+T64_6:
+ response = -0.0271620138;
+ goto D64;
+
+T64_7:
+ response = 0.0002819878;
+ goto D64;
+
+T64_8:
+ response = 0.0163052773;
+ goto D64;
+
+N64_10:
+ if fieldTermMatch(text,0).firstPosition < 5.5000000000 then goto T64_9;
+ else goto N64_11;
+
+T64_9:
+ response = -0.0009410187;
+ goto D64;
+
+N64_11:
+ if fieldMatch(text).importance < 0.6664544940 then goto T64_10;
+ else goto N64_12;
+
+T64_10:
+ response = -0.0070269578;
+ goto D64;
+
+N64_12:
+ if term(2).significance < 0.9931030273 then goto T64_11;
+ else goto T64_12;
+
+T64_11:
+ response = -0.0096096659;
+ goto D64;
+
+T64_12:
+ response = 0.0015413452;
+ goto D64;
+
+N64_13:
+ if attribute(user_followers_count) < 520.5000000000 then goto N64_14;
+ else goto N64_17;
+
+N64_14:
+ if attribute(yst_reply_auth) < 8.5000000000 then goto N64_15;
+ else goto T64_16;
+
+N64_15:
+ if fieldMatch(text).significantOccurrence < 0.0363755003 then goto T64_13;
+ else goto N64_16;
+
+T64_13:
+ response = -0.0183942755;
+ goto D64;
+
+N64_16:
+ if age(created_at) < 1710.0000000000 then goto T64_14;
+ else goto T64_15;
+
+T64_14:
+ response = 0.0052904688;
+ goto D64;
+
+T64_15:
+ response = 0.0003556613;
+ goto D64;
+
+T64_16:
+ response = -0.0034358951;
+ goto D64;
+
+N64_17:
+ if attribute(user_followers_count) < 534.5000000000 then goto T64_17;
+ else goto T64_18;
+
+T64_17:
+ response = 0.0246325003;
+ goto D64;
+
+T64_18:
+ response = 0.0032783956;
+ goto D64;
+
+D64:
+
+tnscore = tnscore + response;
+
+/* Tree 66 of 80 */
+N65_1:
+ if attribute(ythl) < 0.5000000000 then goto N65_2;
+ else goto N65_7;
+
+N65_2:
+ if fieldMatch(text) < 0.4860935211 then goto N65_3;
+ else goto N65_4;
+
+N65_3:
+ if term(1).significance < 0.9882720113 then goto T65_1;
+ else goto T65_2;
+
+T65_1:
+ response = -0.0113150549;
+ goto D65;
+
+T65_2:
+ response = -0.0035132590;
+ goto D65;
+
+N65_4:
+ if attribute(user_statuses_count) < 14534.0000000000 then goto N65_5;
+ else goto T65_6;
+
+N65_5:
+ if attribute(user_followers_count) < 457.5000000000 then goto T65_3;
+ else goto N65_6;
+
+T65_3:
+ response = -0.0013098462;
+ goto D65;
+
+N65_6:
+ if term(4).significance < 0.9912315011 then goto T65_4;
+ else goto T65_5;
+
+T65_4:
+ response = 0.0071257515;
+ goto D65;
+
+T65_5:
+ response = -0.0107882556;
+ goto D65;
+
+T65_6:
+ response = -0.0080098717;
+ goto D65;
+
+N65_7:
+ if fieldMatch(text).occurrence < 0.1348485053 then goto N65_8;
+ else goto N65_12;
+
+N65_8:
+ if term(1).significance < 0.9926555157 then goto N65_9;
+ else goto N65_11;
+
+N65_9:
+ if attribute(yst_link_array_size) < 0.0000615000 then goto N65_10;
+ else goto T65_9;
+
+N65_10:
+ if fieldTermMatch(text,0).firstPosition < 7.5000000000 then goto T65_7;
+ else goto T65_8;
+
+T65_7:
+ response = -0.0054509513;
+ goto D65;
+
+T65_8:
+ response = -0.0267164116;
+ goto D65;
+
+T65_9:
+ response = -0.0002251203;
+ goto D65;
+
+N65_11:
+ if fieldMatch(text).completeness < 0.9520415068 then goto T65_10;
+ else goto T65_11;
+
+T65_10:
+ response = -0.0121998182;
+ goto D65;
+
+T65_11:
+ response = 0.0006584783;
+ goto D65;
+
+N65_12:
+ if age(created_at) < 1770.0000000000 then goto T65_12;
+ else goto N65_13;
+
+T65_12:
+ response = 0.0078526654;
+ goto D65;
+
+N65_13:
+ if fieldTermMatch(text,3).firstPosition < 2.5000000000 then goto T65_13;
+ else goto N65_14;
+
+T65_13:
+ response = 0.0108153740;
+ goto D65;
+
+N65_14:
+ if term(2).significance < 0.9929159880 then goto N65_15;
+ else goto N65_17;
+
+N65_15:
+ if term(2).significance < 0.9923814535 then goto T65_14;
+ else goto N65_16;
+
+T65_14:
+ response = -0.0011187617;
+ goto D65;
+
+N65_16:
+ if fieldMatch(text).completeness < 0.9579474926 then goto T65_15;
+ else goto T65_16;
+
+T65_15:
+ response = -0.0550616595;
+ goto D65;
+
+T65_16:
+ response = -0.0034789409;
+ goto D65;
+
+N65_17:
+ if attribute(yst_tweet_adult_score) < 0.5000000000 then goto T65_17;
+ else goto T65_18;
+
+T65_17:
+ response = 0.0029410626;
+ goto D65;
+
+T65_18:
+ response = -0.0116671785;
+ goto D65;
+
+D65:
+
+tnscore = tnscore + response;
+
+/* Tree 67 of 80 */
+N66_1:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N66_2;
+ else goto T66_18;
+
+N66_2:
+ if age(created_at) < 1830.0000000000 then goto N66_3;
+ else goto N66_13;
+
+N66_3:
+ if term(0).significance < 0.9725670218 then goto N66_4;
+ else goto N66_5;
+
+N66_4:
+ if fieldMatch(text) < 0.6660829782 then goto T66_1;
+ else goto T66_2;
+
+T66_1:
+ response = -0.0173866153;
+ goto D66;
+
+T66_2:
+ response = -0.0010174380;
+ goto D66;
+
+N66_5:
+ if attribute(yst_reply_auth) < 355.5000000000 then goto N66_6;
+ else goto N66_12;
+
+N66_6:
+ if term(0).significance < 0.9816665053 then goto T66_3;
+ else goto N66_7;
+
+T66_3:
+ response = 0.0179367183;
+ goto D66;
+
+N66_7:
+ if fieldMatch(text).importance < 0.7493820190 then goto N66_8;
+ else goto T66_9;
+
+N66_8:
+ if fieldMatch(text).tail < 6.5000000000 then goto N66_9;
+ else goto T66_8;
+
+N66_9:
+ if attribute(user_friends_count) < 560.5000000000 then goto N66_10;
+ else goto N66_11;
+
+N66_10:
+ if fieldMatch(text) < 0.8736619949 then goto T66_4;
+ else goto T66_5;
+
+T66_4:
+ response = 0.0023509846;
+ goto D66;
+
+T66_5:
+ response = -0.0157312448;
+ goto D66;
+
+N66_11:
+ if fieldMatch(text) < 0.1418584883 then goto T66_6;
+ else goto T66_7;
+
+T66_6:
+ response = -0.0659559738;
+ goto D66;
+
+T66_7:
+ response = -0.0072510736;
+ goto D66;
+
+T66_8:
+ response = 0.0044544430;
+ goto D66;
+
+T66_9:
+ response = 0.0083195610;
+ goto D66;
+
+N66_12:
+ if attribute(yst_link_array_size) < 0.0586175025 then goto T66_10;
+ else goto T66_11;
+
+T66_10:
+ response = -0.0091289813;
+ goto D66;
+
+T66_11:
+ response = 0.0114658081;
+ goto D66;
+
+N66_13:
+ if fieldMatch(text).importance < 0.6664544940 then goto N66_14;
+ else goto N66_15;
+
+N66_14:
+ if fieldMatch(user_name).fieldCompleteness < 0.5833334923 then goto T66_12;
+ else goto T66_13;
+
+T66_12:
+ response = -0.0041143634;
+ goto D66;
+
+T66_13:
+ response = 0.0401025109;
+ goto D66;
+
+N66_15:
+ if term(2).significance < 0.9991624951 then goto N66_16;
+ else goto N66_17;
+
+N66_16:
+ if fieldMatch(text) < 0.5540195107 then goto T66_14;
+ else goto T66_15;
+
+T66_14:
+ response = -0.0037827224;
+ goto D66;
+
+T66_15:
+ response = 0.0001685363;
+ goto D66;
+
+N66_17:
+ if fieldLength(text) < 8.5000000000 then goto T66_16;
+ else goto T66_17;
+
+T66_16:
+ response = -0.0122620665;
+ goto D66;
+
+T66_17:
+ response = 0.0037138353;
+ goto D66;
+
+T66_18:
+ response = -0.0101985628;
+ goto D66;
+
+D66:
+
+tnscore = tnscore + response;
+
+/* Tree 68 of 80 */
+N67_1:
+ if age(created_at) < 1830.0000000000 then goto N67_2;
+ else goto N67_12;
+
+N67_2:
+ if fieldMatch(text).importance < 0.4989485145 then goto N67_3;
+ else goto N67_5;
+
+N67_3:
+ if fieldMatch(text).importance < 0.4988874793 then goto N67_4;
+ else goto T67_3;
+
+N67_4:
+ if age(created_at) < 690.0000000000 then goto T67_1;
+ else goto T67_2;
+
+T67_1:
+ response = -0.0256320594;
+ goto D67;
+
+T67_2:
+ response = 0.0079996205;
+ goto D67;
+
+T67_3:
+ response = -0.1224294269;
+ goto D67;
+
+N67_5:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N67_6;
+ else goto T67_11;
+
+N67_6:
+ if fieldMatch(text).importance < 0.7491755486 then goto N67_7;
+ else goto T67_10;
+
+N67_7:
+ if term(0).significance < 0.9184160233 then goto N67_8;
+ else goto N67_9;
+
+N67_8:
+ if term(2).significance < 0.9980159998 then goto T67_4;
+ else goto T67_5;
+
+T67_4:
+ response = -0.0525545375;
+ goto D67;
+
+T67_5:
+ response = 0.0023785461;
+ goto D67;
+
+N67_9:
+ if fieldMatch(text).importance < 0.7490389943 then goto N67_10;
+ else goto T67_9;
+
+N67_10:
+ if attribute(user_followers_count) < 787.5000000000 then goto T67_6;
+ else goto N67_11;
+
+T67_6:
+ response = 0.0010999135;
+ goto D67;
+
+N67_11:
+ if fieldMatch(text).importance < 0.4998484850 then goto T67_7;
+ else goto T67_8;
+
+T67_7:
+ response = 0.0162069505;
+ goto D67;
+
+T67_8:
+ response = 0.0035170311;
+ goto D67;
+
+T67_9:
+ response = -0.0287539558;
+ goto D67;
+
+T67_10:
+ response = 0.0072807979;
+ goto D67;
+
+T67_11:
+ response = -0.0106468395;
+ goto D67;
+
+N67_12:
+ if fieldMatch(text).importance < 0.6664534807 then goto T67_12;
+ else goto N67_13;
+
+T67_12:
+ response = -0.0041764714;
+ goto D67;
+
+N67_13:
+ if term(0).significance < 0.9954395294 then goto N67_14;
+ else goto N67_16;
+
+N67_14:
+ if fieldMatch(user_name).significantOccurrence < 0.2916665077 then goto N67_15;
+ else goto T67_15;
+
+N67_15:
+ if fieldMatch(text).importance < 0.8318179846 then goto T67_13;
+ else goto T67_14;
+
+T67_13:
+ response = -0.0042118878;
+ goto D67;
+
+T67_14:
+ response = 0.0043649147;
+ goto D67;
+
+T67_15:
+ response = 0.0158696258;
+ goto D67;
+
+N67_16:
+ if term(1).significance < 0.9986659884 then goto N67_17;
+ else goto T67_18;
+
+N67_17:
+ if fieldMatch(text).tail < 1.5000000000 then goto T67_16;
+ else goto T67_17;
+
+T67_16:
+ response = -0.0046863462;
+ goto D67;
+
+T67_17:
+ response = 0.0042419546;
+ goto D67;
+
+T67_18:
+ response = -0.0008828310;
+ goto D67;
+
+D67:
+
+tnscore = tnscore + response;
+
+/* Tree 69 of 80 */
+N68_1:
+ if age(created_at) < 1830.0000000000 then goto N68_2;
+ else goto N68_14;
+
+N68_2:
+ if attribute(user_statuses_count) < 3.5000000000 then goto T68_1;
+ else goto N68_3;
+
+T68_1:
+ response = -0.0248705295;
+ goto D68;
+
+N68_3:
+ if attribute(yst_reply_auth) < 247.5000000000 then goto N68_4;
+ else goto N68_13;
+
+N68_4:
+ if attribute(user_followers_count) < 97.5000000000 then goto N68_5;
+ else goto N68_12;
+
+N68_5:
+ if attribute(yst_link_array_size) < 0.0007835000 then goto N68_6;
+ else goto N68_10;
+
+N68_6:
+ if fieldLength(text) < 27.5000000000 then goto N68_7;
+ else goto N68_9;
+
+N68_7:
+ if fieldMatch(text).significantOccurrence < 0.0591179989 then goto T68_2;
+ else goto N68_8;
+
+T68_2:
+ response = 0.0070832320;
+ goto D68;
+
+N68_8:
+ if fieldTermMatch(text,1).firstPosition < 1.5000000000 then goto T68_3;
+ else goto T68_4;
+
+T68_3:
+ response = 0.0146283297;
+ goto D68;
+
+T68_4:
+ response = -0.0028201578;
+ goto D68;
+
+N68_9:
+ if attribute(user_followers_count) < 23.5000000000 then goto T68_5;
+ else goto T68_6;
+
+T68_5:
+ response = -0.0472201281;
+ goto D68;
+
+T68_6:
+ response = 0.0008078028;
+ goto D68;
+
+N68_10:
+ if fieldTermMatch(text,1).firstPosition < 4.5000000000 then goto T68_7;
+ else goto N68_11;
+
+T68_7:
+ response = 0.0067052369;
+ goto D68;
+
+N68_11:
+ if fieldMatch(text).earliness < 0.8221344948 then goto T68_8;
+ else goto T68_9;
+
+T68_8:
+ response = -0.0076362760;
+ goto D68;
+
+T68_9:
+ response = -0.0324826734;
+ goto D68;
+
+N68_12:
+ if fieldLength(text) < 10.5000000000 then goto T68_10;
+ else goto T68_11;
+
+T68_10:
+ response = -0.0052857206;
+ goto D68;
+
+T68_11:
+ response = 0.0059849079;
+ goto D68;
+
+N68_13:
+ if attribute(user_followers_count) < 1692.5000000000 then goto T68_12;
+ else goto T68_13;
+
+T68_12:
+ response = -0.0091360049;
+ goto D68;
+
+T68_13:
+ response = 0.0091123239;
+ goto D68;
+
+N68_14:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N68_15;
+ else goto T68_18;
+
+N68_15:
+ if fieldMatch(text).earliness < 0.6825754642 then goto N68_16;
+ else goto N68_17;
+
+N68_16:
+ if term(0).significance < 0.9944700003 then goto T68_14;
+ else goto T68_15;
+
+T68_14:
+ response = -0.0148020950;
+ goto D68;
+
+T68_15:
+ response = -0.0053042509;
+ goto D68;
+
+N68_17:
+ if fieldMatch(text).significantOccurrence < 0.1249970049 then goto T68_16;
+ else goto T68_17;
+
+T68_16:
+ response = -0.0014558335;
+ goto D68;
+
+T68_17:
+ response = -0.0200174998;
+ goto D68;
+
+T68_18:
+ response = -0.0000872652;
+ goto D68;
+
+D68:
+
+tnscore = tnscore + response;
+
+/* Tree 70 of 80 */
+N69_1:
+ if attribute(yst_reply_auth) < 26.5000000000 then goto N69_2;
+ else goto N69_14;
+
+N69_2:
+ if attribute(user_followers_count) < 86.5000000000 then goto N69_3;
+ else goto N69_4;
+
+N69_3:
+ if fieldMatch(text).significantOccurrence < 0.1216785014 then goto T69_1;
+ else goto T69_2;
+
+T69_1:
+ response = -0.0006414838;
+ goto D69;
+
+T69_2:
+ response = -0.0099978879;
+ goto D69;
+
+N69_4:
+ if fieldMatch(text) < 0.8439915180 then goto N69_5;
+ else goto N69_9;
+
+N69_5:
+ if fieldMatch(text).absoluteOccurrence < 0.0126785003 then goto N69_6;
+ else goto N69_8;
+
+N69_6:
+ if fieldMatch(text).importance < 0.6664454937 then goto N69_7;
+ else goto T69_5;
+
+N69_7:
+ if age(created_at) < 5400.0000000000 then goto T69_3;
+ else goto T69_4;
+
+T69_3:
+ response = -0.0009786234;
+ goto D69;
+
+T69_4:
+ response = -0.0108807960;
+ goto D69;
+
+T69_5:
+ response = 0.0004683724;
+ goto D69;
+
+N69_8:
+ if fieldTermMatch(text,1).firstPosition < 3.5000000000 then goto T69_6;
+ else goto T69_7;
+
+T69_6:
+ response = -0.0013491196;
+ goto D69;
+
+T69_7:
+ response = 0.0100488776;
+ goto D69;
+
+N69_9:
+ if fieldMatch(text).importance < 0.4989485145 then goto T69_8;
+ else goto N69_10;
+
+T69_8:
+ response = -0.0180389590;
+ goto D69;
+
+N69_10:
+ if age(created_at) < 1770.0000000000 then goto N69_11;
+ else goto N69_13;
+
+N69_11:
+ if term(0).significance < 0.9986659884 then goto N69_12;
+ else goto T69_11;
+
+N69_12:
+ if fieldMatch(text).earliness < 0.6099034548 then goto T69_9;
+ else goto T69_10;
+
+T69_9:
+ response = 0.0188221360;
+ goto D69;
+
+T69_10:
+ response = 0.0069414922;
+ goto D69;
+
+T69_11:
+ response = 0.0010695341;
+ goto D69;
+
+N69_13:
+ if attribute(user_statuses_count) < 728.0000000000 then goto T69_12;
+ else goto T69_13;
+
+T69_12:
+ response = 0.0078644585;
+ goto D69;
+
+T69_13:
+ response = 0.0013385568;
+ goto D69;
+
+N69_14:
+ if attribute(user_followers_count) < 1995.0000000000 then goto T69_14;
+ else goto N69_15;
+
+T69_14:
+ response = -0.0037600829;
+ goto D69;
+
+N69_15:
+ if term(0).significance < 0.9989764690 then goto N69_16;
+ else goto T69_18;
+
+N69_16:
+ if age(created_at) < 2310.0000000000 then goto N69_17;
+ else goto T69_17;
+
+N69_17:
+ if attribute(yst_reply_auth) < 38.5000000000 then goto T69_15;
+ else goto T69_16;
+
+T69_15:
+ response = -0.0117528259;
+ goto D69;
+
+T69_16:
+ response = 0.0193329084;
+ goto D69;
+
+T69_17:
+ response = 0.0028282077;
+ goto D69;
+
+T69_18:
+ response = -0.0027098020;
+ goto D69;
+
+D69:
+
+tnscore = tnscore + response;
+
+/* Tree 71 of 80 */
+N70_1:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N70_2;
+ else goto N70_12;
+
+N70_2:
+ if term(0).significance < 0.8512874842 then goto N70_3;
+ else goto N70_4;
+
+N70_3:
+ if term(2).significance < 0.9981595278 then goto T70_1;
+ else goto T70_2;
+
+T70_1:
+ response = -0.0893712611;
+ goto D70;
+
+T70_2:
+ response = 0.0139821391;
+ goto D70;
+
+N70_4:
+ if fieldMatch(text).absoluteOccurrence < 0.0129164997 then goto N70_5;
+ else goto T70_11;
+
+N70_5:
+ if fieldMatch(user_name).fieldCompleteness < 0.5833334923 then goto N70_6;
+ else goto T70_10;
+
+N70_6:
+ if fieldMatch(text).importance < 0.4999005198 then goto N70_7;
+ else goto N70_9;
+
+N70_7:
+ if fieldMatch(text).importance < 0.4992579818 then goto T70_3;
+ else goto N70_8;
+
+T70_3:
+ response = -0.0088299338;
+ goto D70;
+
+N70_8:
+ if fieldLength(text) < 24.5000000000 then goto T70_4;
+ else goto T70_5;
+
+T70_4:
+ response = 0.0031375211;
+ goto D70;
+
+T70_5:
+ response = -0.0078301854;
+ goto D70;
+
+N70_9:
+ if fieldMatch(text).importance < 0.4999030232 then goto T70_6;
+ else goto N70_10;
+
+T70_6:
+ response = -0.0514023475;
+ goto D70;
+
+N70_10:
+ if fieldMatch(text).earliness < 0.6079194546 then goto T70_7;
+ else goto N70_11;
+
+T70_7:
+ response = -0.0064381419;
+ goto D70;
+
+N70_11:
+ if fieldMatch(text) < 0.8824554682 then goto T70_8;
+ else goto T70_9;
+
+T70_8:
+ response = -0.0013041030;
+ goto D70;
+
+T70_9:
+ response = -0.0302990737;
+ goto D70;
+
+T70_10:
+ response = 0.0378075574;
+ goto D70;
+
+T70_11:
+ response = 0.0037149737;
+ goto D70;
+
+N70_12:
+ if attribute(yst_tweet_language) < 3587.5000000000 then goto N70_13;
+ else goto T70_18;
+
+N70_13:
+ if fieldMatch(text).absoluteProximity < 0.0573749989 then goto T70_12;
+ else goto N70_14;
+
+T70_12:
+ response = -0.0037219953;
+ goto D70;
+
+N70_14:
+ if attribute(user_followers_count) < 719.5000000000 then goto N70_15;
+ else goto T70_17;
+
+N70_15:
+ if fieldMatch(text).importance < 0.6660234928 then goto N70_16;
+ else goto N70_17;
+
+N70_16:
+ if term(1).significance < 0.9864724874 then goto T70_13;
+ else goto T70_14;
+
+T70_13:
+ response = -0.0057017615;
+ goto D70;
+
+T70_14:
+ response = 0.0142744959;
+ goto D70;
+
+N70_17:
+ if fieldMatch(text).importance < 0.6664794683 then goto T70_15;
+ else goto T70_16;
+
+T70_15:
+ response = -0.0033669884;
+ goto D70;
+
+T70_16:
+ response = 0.0011748423;
+ goto D70;
+
+T70_17:
+ response = 0.0048833724;
+ goto D70;
+
+T70_18:
+ response = -0.0235368129;
+ goto D70;
+
+D70:
+
+tnscore = tnscore + response;
+
+/* Tree 72 of 80 */
+N71_1:
+ if fieldMatch(text).tail < 3.5000000000 then goto N71_2;
+ else goto N71_5;
+
+N71_2:
+ if term(0).significance < 0.9849029779 then goto T71_1;
+ else goto N71_3;
+
+T71_1:
+ response = -0.0096533539;
+ goto D71;
+
+N71_3:
+ if age(created_at) < 210.0000000000 then goto T71_2;
+ else goto N71_4;
+
+T71_2:
+ response = 0.0086447306;
+ goto D71;
+
+N71_4:
+ if term(3).significance < 0.9972594976 then goto T71_3;
+ else goto T71_4;
+
+T71_3:
+ response = -0.0049080669;
+ goto D71;
+
+T71_4:
+ response = 0.0019732467;
+ goto D71;
+
+N71_5:
+ if attribute(user_followers_count) < 682.5000000000 then goto N71_6;
+ else goto N71_17;
+
+N71_6:
+ if attribute(yst_reply_auth) < 91.5000000000 then goto N71_7;
+ else goto N71_15;
+
+N71_7:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N71_8;
+ else goto T71_13;
+
+N71_8:
+ if term(4).significance < 0.9962199926 then goto N71_9;
+ else goto N71_14;
+
+N71_9:
+ if fieldMatch(text).tail < 11.5000000000 then goto N71_10;
+ else goto N71_13;
+
+N71_10:
+ if term(1).significance < 0.9847429991 then goto N71_11;
+ else goto T71_8;
+
+N71_11:
+ if fieldMatch(text).importance < 0.7463389635 then goto N71_12;
+ else goto T71_7;
+
+N71_12:
+ if term(0).significance < 0.9929184914 then goto T71_5;
+ else goto T71_6;
+
+T71_5:
+ response = -0.0337962464;
+ goto D71;
+
+T71_6:
+ response = -0.0042731663;
+ goto D71;
+
+T71_7:
+ response = -0.0018179748;
+ goto D71;
+
+T71_8:
+ response = -0.0005816172;
+ goto D71;
+
+N71_13:
+ if age(created_at) < 1710.0000000000 then goto T71_9;
+ else goto T71_10;
+
+T71_9:
+ response = 0.0058304594;
+ goto D71;
+
+T71_10:
+ response = -0.0001954122;
+ goto D71;
+
+N71_14:
+ if term(1).significance < 0.8054080009 then goto T71_11;
+ else goto T71_12;
+
+T71_11:
+ response = 0.0206959410;
+ goto D71;
+
+T71_12:
+ response = 0.0029922212;
+ goto D71;
+
+T71_13:
+ response = -0.0103917040;
+ goto D71;
+
+N71_15:
+ if attribute(yst_link_array_size) < 0.0094430000 then goto T71_14;
+ else goto N71_16;
+
+T71_14:
+ response = -0.0102022704;
+ goto D71;
+
+N71_16:
+ if attribute(user_friends_count) < 89.0000000000 then goto T71_15;
+ else goto T71_16;
+
+T71_15:
+ response = 0.0214038413;
+ goto D71;
+
+T71_16:
+ response = -0.0030227020;
+ goto D71;
+
+N71_17:
+ if fieldMatch(text).significantOccurrence < 0.0556650013 then goto T71_17;
+ else goto T71_18;
+
+T71_17:
+ response = 0.0009656227;
+ goto D71;
+
+T71_18:
+ response = 0.0055771237;
+ goto D71;
+
+D71:
+
+tnscore = tnscore + response;
+
+/* Tree 73 of 80 */
+N72_1:
+ if attribute(ythl) < 0.5000000000 then goto N72_2;
+ else goto N72_12;
+
+N72_2:
+ if fieldMatch(text).importance < 0.4997234941 then goto N72_3;
+ else goto N72_7;
+
+N72_3:
+ if fieldMatch(text).importance < 0.4988809824 then goto T72_1;
+ else goto N72_4;
+
+T72_1:
+ response = -0.0156475694;
+ goto D72;
+
+N72_4:
+ if fieldMatch(text).importance < 0.4988874793 then goto T72_2;
+ else goto N72_5;
+
+T72_2:
+ response = 0.0871791947;
+ goto D72;
+
+N72_5:
+ if fieldMatch(text).importance < 0.4997065067 then goto T72_3;
+ else goto N72_6;
+
+T72_3:
+ response = 0.0054545590;
+ goto D72;
+
+N72_6:
+ if term(0).significance < 0.9976885319 then goto T72_4;
+ else goto T72_5;
+
+T72_4:
+ response = 0.1581759963;
+ goto D72;
+
+T72_5:
+ response = 0.0057478578;
+ goto D72;
+
+N72_7:
+ if fieldMatch(text).importance < 0.4997634888 then goto T72_6;
+ else goto N72_8;
+
+T72_6:
+ response = -0.0264810886;
+ goto D72;
+
+N72_8:
+ if fieldMatch(text).importance < 0.4998250008 then goto T72_7;
+ else goto N72_9;
+
+T72_7:
+ response = 0.0106354371;
+ goto D72;
+
+N72_9:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N72_10;
+ else goto N72_11;
+
+N72_10:
+ if fieldMatch(text).earliness < 0.7018519640 then goto T72_8;
+ else goto T72_9;
+
+T72_8:
+ response = -0.0083100057;
+ goto D72;
+
+T72_9:
+ response = -0.0018931553;
+ goto D72;
+
+N72_11:
+ if attribute(user_followers_count) < 701.0000000000 then goto T72_10;
+ else goto T72_11;
+
+T72_10:
+ response = -0.0020476113;
+ goto D72;
+
+T72_11:
+ response = 0.0053824373;
+ goto D72;
+
+N72_12:
+ if fieldMatch(text) < 0.4141010046 then goto N72_13;
+ else goto N72_17;
+
+N72_13:
+ if fieldMatch(user_name) < 0.3179910183 then goto N72_14;
+ else goto T72_16;
+
+N72_14:
+ if term(0).significance < 0.9793410301 then goto N72_15;
+ else goto T72_15;
+
+N72_15:
+ if term(2).significance < 0.9970530272 then goto T72_12;
+ else goto N72_16;
+
+T72_12:
+ response = -0.0054897109;
+ goto D72;
+
+N72_16:
+ if fieldMatch(text).importance < 0.6620055437 then goto T72_13;
+ else goto T72_14;
+
+T72_13:
+ response = 0.0060602187;
+ goto D72;
+
+T72_14:
+ response = -0.0517823718;
+ goto D72;
+
+T72_15:
+ response = -0.0021590151;
+ goto D72;
+
+T72_16:
+ response = 0.0298259094;
+ goto D72;
+
+N72_17:
+ if fieldMatch(text).significantOccurrence < 0.0392310023 then goto T72_17;
+ else goto T72_18;
+
+T72_17:
+ response = -0.0063084285;
+ goto D72;
+
+T72_18:
+ response = 0.0020013986;
+ goto D72;
+
+D72:
+
+tnscore = tnscore + response;
+
+/* Tree 74 of 80 */
+N73_1:
+ if attribute(yst_reply_auth) < 236.5000000000 then goto N73_2;
+ else goto T73_18;
+
+N73_2:
+ if age(created_at) < 1830.0000000000 then goto N73_3;
+ else goto N73_16;
+
+N73_3:
+ if term(1).significance < 0.7788045406 then goto T73_1;
+ else goto N73_4;
+
+T73_1:
+ response = 0.0479141837;
+ goto D73;
+
+N73_4:
+ if term(0).significance < 0.9139549732 then goto N73_5;
+ else goto N73_6;
+
+N73_5:
+ if fieldMatch(text).longestSequenceRatio < 0.4166665077 then goto T73_2;
+ else goto T73_3;
+
+T73_2:
+ response = -0.0732771007;
+ goto D73;
+
+T73_3:
+ response = -0.0090569203;
+ goto D73;
+
+N73_6:
+ if fieldMatch(text).importance < 0.4989485145 then goto N73_7;
+ else goto N73_8;
+
+N73_7:
+ if fieldMatch(text).importance < 0.4988874793 then goto T73_4;
+ else goto T73_5;
+
+T73_4:
+ response = -0.0042825791;
+ goto D73;
+
+T73_5:
+ response = -0.1098205261;
+ goto D73;
+
+N73_8:
+ if attribute(user_followers_count) < 98.5000000000 then goto N73_9;
+ else goto N73_12;
+
+N73_9:
+ if attribute(user_statuses_count) < 1260.5000000000 then goto N73_10;
+ else goto T73_9;
+
+N73_10:
+ if match < 0.6491410136 then goto T73_6;
+ else goto N73_11;
+
+T73_6:
+ response = -0.0110819570;
+ goto D73;
+
+N73_11:
+ if term(0).significance < 0.9986954927 then goto T73_7;
+ else goto T73_8;
+
+T73_7:
+ response = 0.0051994695;
+ goto D73;
+
+T73_8:
+ response = -0.0019674695;
+ goto D73;
+
+T73_9:
+ response = -0.0076605248;
+ goto D73;
+
+N73_12:
+ if attribute(user_statuses_count) < 5.5000000000 then goto T73_10;
+ else goto N73_13;
+
+T73_10:
+ response = -0.1055265232;
+ goto D73;
+
+N73_13:
+ if fieldMatch(text).earliness < 0.3726850152 then goto T73_11;
+ else goto N73_14;
+
+T73_11:
+ response = -0.0007649567;
+ goto D73;
+
+N73_14:
+ if fieldMatch(text).occurrence < 0.1519230008 then goto T73_12;
+ else goto N73_15;
+
+T73_12:
+ response = 0.0078900808;
+ goto D73;
+
+N73_15:
+ if attribute(user_followers_count) < 583.5000000000 then goto T73_13;
+ else goto T73_14;
+
+T73_13:
+ response = -0.0022611347;
+ goto D73;
+
+T73_14:
+ response = 0.0078521247;
+ goto D73;
+
+N73_16:
+ if fieldMatch(text).importance < 0.6664505005 then goto T73_15;
+ else goto N73_17;
+
+T73_15:
+ response = -0.0037578539;
+ goto D73;
+
+N73_17:
+ if fieldMatch(text).significantOccurrence < 0.0386575013 then goto T73_16;
+ else goto T73_17;
+
+T73_16:
+ response = -0.0059854357;
+ goto D73;
+
+T73_17:
+ response = 0.0010571345;
+ goto D73;
+
+T73_18:
+ response = -0.0046469325;
+ goto D73;
+
+D73:
+
+tnscore = tnscore + response;
+
+/* Tree 75 of 80 */
+N74_1:
+ if age(created_at) < 1830.0000000000 then goto N74_2;
+ else goto N74_12;
+
+N74_2:
+ if fieldMatch(text).importance < 0.7467460036 then goto N74_3;
+ else goto N74_11;
+
+N74_3:
+ if fieldMatch(text).tail < 7.5000000000 then goto N74_4;
+ else goto N74_10;
+
+N74_4:
+ if term(0).significance < 0.8547105193 then goto N74_5;
+ else goto N74_6;
+
+N74_5:
+ if term(2).significance < 0.9980159998 then goto T74_1;
+ else goto T74_2;
+
+T74_1:
+ response = -0.0945680172;
+ goto D74;
+
+T74_2:
+ response = 0.0133542000;
+ goto D74;
+
+N74_6:
+ if term(0).significance < 0.9995139837 then goto T74_3;
+ else goto N74_7;
+
+T74_3:
+ response = -0.0000091115;
+ goto D74;
+
+N74_7:
+ if term(0).significance < 0.9996379614 then goto N74_8;
+ else goto T74_7;
+
+N74_8:
+ if fieldMatch(text).tail < 6.5000000000 then goto N74_9;
+ else goto T74_6;
+
+N74_9:
+ if attribute(yst_link_array_size) < 0.0028985001 then goto T74_4;
+ else goto T74_5;
+
+T74_4:
+ response = -0.0033284425;
+ goto D74;
+
+T74_5:
+ response = -0.0414096377;
+ goto D74;
+
+T74_6:
+ response = -0.0690490860;
+ goto D74;
+
+T74_7:
+ response = 0.0066873893;
+ goto D74;
+
+N74_10:
+ if attribute(user_friends_count) < 179.5000000000 then goto T74_8;
+ else goto T74_9;
+
+T74_8:
+ response = 0.0013706096;
+ goto D74;
+
+T74_9:
+ response = 0.0065517887;
+ goto D74;
+
+N74_11:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto T74_10;
+ else goto T74_11;
+
+T74_10:
+ response = 0.0068726824;
+ goto D74;
+
+T74_11:
+ response = -0.0363530800;
+ goto D74;
+
+N74_12:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N74_13;
+ else goto N74_16;
+
+N74_13:
+ if fieldMatch(text).absoluteOccurrence < 0.0122500006 then goto N74_14;
+ else goto N74_15;
+
+N74_14:
+ if fieldMatch(user_name).importance < 0.4999729991 then goto T74_12;
+ else goto T74_13;
+
+T74_12:
+ response = -0.0053854995;
+ goto D74;
+
+T74_13:
+ response = 0.0353914791;
+ goto D74;
+
+N74_15:
+ if attribute(user_friends_count) < 4181.0000000000 then goto T74_14;
+ else goto T74_15;
+
+T74_14:
+ response = 0.0041747763;
+ goto D74;
+
+T74_15:
+ response = -0.0313684077;
+ goto D74;
+
+N74_16:
+ if fieldMatch(text) < 0.5549424887 then goto T74_16;
+ else goto N74_17;
+
+T74_16:
+ response = -0.0034986093;
+ goto D74;
+
+N74_17:
+ if match < 0.7940984964 then goto T74_17;
+ else goto T74_18;
+
+T74_17:
+ response = 0.0068157141;
+ goto D74;
+
+T74_18:
+ response = 0.0005808310;
+ goto D74;
+
+D74:
+
+tnscore = tnscore + response;
+
+/* Tree 76 of 80 */
+N75_1:
+ if fieldMatch(text).importance < 0.4989485145 then goto N75_2;
+ else goto N75_5;
+
+N75_2:
+ if fieldMatch(text).importance < 0.4988874793 then goto N75_3;
+ else goto T75_4;
+
+N75_3:
+ if term(0).significance < 0.9910864830 then goto N75_4;
+ else goto T75_3;
+
+N75_4:
+ if fieldMatch(text).importance < 0.4985739887 then goto T75_1;
+ else goto T75_2;
+
+T75_1:
+ response = -0.0080603652;
+ goto D75;
+
+T75_2:
+ response = -0.0525354118;
+ goto D75;
+
+T75_3:
+ response = 0.0309921110;
+ goto D75;
+
+T75_4:
+ response = -0.1167737387;
+ goto D75;
+
+N75_5:
+ if age(created_at) < 1830.0000000000 then goto N75_6;
+ else goto N75_16;
+
+N75_6:
+ if term(0).significance < 0.9731230140 then goto N75_7;
+ else goto N75_11;
+
+N75_7:
+ if term(1).significance < 0.9992040396 then goto N75_8;
+ else goto T75_9;
+
+N75_8:
+ if term(1).significance < 0.9977560043 then goto N75_9;
+ else goto T75_8;
+
+N75_9:
+ if fieldMatch(text) < 0.5470744967 then goto N75_10;
+ else goto T75_7;
+
+N75_10:
+ if fieldMatch(text).importance < 0.7365344763 then goto T75_5;
+ else goto T75_6;
+
+T75_5:
+ response = -0.0755739420;
+ goto D75;
+
+T75_6:
+ response = -0.0140588177;
+ goto D75;
+
+T75_7:
+ response = -0.0021536645;
+ goto D75;
+
+T75_8:
+ response = 0.0179729341;
+ goto D75;
+
+T75_9:
+ response = -0.0499824746;
+ goto D75;
+
+N75_11:
+ if term(0).significance < 0.9751809835 then goto T75_10;
+ else goto N75_12;
+
+T75_10:
+ response = 0.0490509939;
+ goto D75;
+
+N75_12:
+ if attribute(yst_reply_auth) < 20.5000000000 then goto N75_13;
+ else goto N75_15;
+
+N75_13:
+ if attribute(user_followers_count) < 97.5000000000 then goto T75_11;
+ else goto N75_14;
+
+T75_11:
+ response = 0.0015097125;
+ goto D75;
+
+N75_14:
+ if attribute(user_statuses_count) < 5.5000000000 then goto T75_12;
+ else goto T75_13;
+
+T75_12:
+ response = -0.0948351800;
+ goto D75;
+
+T75_13:
+ response = 0.0071205807;
+ goto D75;
+
+N75_15:
+ if attribute(yst_link_array_size) < 0.0028940002 then goto T75_14;
+ else goto T75_15;
+
+T75_14:
+ response = -0.0084126298;
+ goto D75;
+
+T75_15:
+ response = 0.0022449562;
+ goto D75;
+
+N75_16:
+ if fieldMatch(text).importance < 0.6664534807 then goto T75_16;
+ else goto N75_17;
+
+T75_16:
+ response = -0.0042838031;
+ goto D75;
+
+N75_17:
+ if term(0).significance < 0.9982124567 then goto T75_17;
+ else goto T75_18;
+
+T75_17:
+ response = -0.0016253125;
+ goto D75;
+
+T75_18:
+ response = 0.0026475634;
+ goto D75;
+
+D75:
+
+tnscore = tnscore + response;
+
+/* Tree 77 of 80 */
+N76_1:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N76_2;
+ else goto N76_13;
+
+N76_2:
+ if fieldTermMatch(text,0).firstPosition < 2.5000000000 then goto N76_3;
+ else goto N76_4;
+
+N76_3:
+ if fieldMatch(text).importance < 0.4989809990 then goto T76_1;
+ else goto T76_2;
+
+T76_1:
+ response = -0.0234510876;
+ goto D76;
+
+T76_2:
+ response = 0.0020809399;
+ goto D76;
+
+N76_4:
+ if attribute(yst_reply_auth) < 302.5000000000 then goto N76_5;
+ else goto T76_12;
+
+N76_5:
+ if fieldMatch(text).importance < 0.6664544940 then goto N76_6;
+ else goto N76_12;
+
+N76_6:
+ if fieldMatch(text).importance < 0.4998220205 then goto N76_7;
+ else goto N76_8;
+
+N76_7:
+ if attribute(yst_reply_auth) < 42.5000000000 then goto T76_3;
+ else goto T76_4;
+
+T76_3:
+ response = 0.0050751752;
+ goto D76;
+
+T76_4:
+ response = -0.0083413352;
+ goto D76;
+
+N76_8:
+ if fieldMatch(text).absoluteOccurrence < 0.0124999993 then goto N76_9;
+ else goto T76_9;
+
+N76_9:
+ if age(created_at) < 30600.0000000000 then goto N76_10;
+ else goto T76_8;
+
+N76_10:
+ if fieldMatch(text).absoluteProximity < 0.0125000002 then goto T76_5;
+ else goto N76_11;
+
+T76_5:
+ response = 0.0047520251;
+ goto D76;
+
+N76_11:
+ if term(1).significance < 0.9889299870 then goto T76_6;
+ else goto T76_7;
+
+T76_6:
+ response = -0.0182384871;
+ goto D76;
+
+T76_7:
+ response = -0.0050169041;
+ goto D76;
+
+T76_8:
+ response = -0.0135494985;
+ goto D76;
+
+T76_9:
+ response = 0.0043484195;
+ goto D76;
+
+N76_12:
+ if term(0).significance < 0.9941140413 then goto T76_10;
+ else goto T76_11;
+
+T76_10:
+ response = -0.0052054760;
+ goto D76;
+
+T76_11:
+ response = 0.0033577205;
+ goto D76;
+
+T76_12:
+ response = -0.0100142174;
+ goto D76;
+
+N76_13:
+ if age(created_at) < 1770.0000000000 then goto N76_14;
+ else goto N76_15;
+
+N76_14:
+ if fieldMatch(text) < 0.8529180288 then goto T76_13;
+ else goto T76_14;
+
+T76_13:
+ response = 0.0005433753;
+ goto D76;
+
+T76_14:
+ response = 0.0060936539;
+ goto D76;
+
+N76_15:
+ if fieldMatch(text).earliness < 0.9298025370 then goto N76_16;
+ else goto N76_17;
+
+N76_16:
+ if fieldLength(text) < 29.5000000000 then goto T76_15;
+ else goto T76_16;
+
+T76_15:
+ response = -0.0005482093;
+ goto D76;
+
+T76_16:
+ response = -0.0264980545;
+ goto D76;
+
+N76_17:
+ if fieldMatch(text).occurrence < 0.2290209979 then goto T76_17;
+ else goto T76_18;
+
+T76_17:
+ response = 0.0068625219;
+ goto D76;
+
+T76_18:
+ response = -0.0005992389;
+ goto D76;
+
+D76:
+
+tnscore = tnscore + response;
+
+/* Tree 78 of 80 */
+N77_1:
+ if fieldTermMatch(text,2).firstPosition < 13.5000000000 then goto N77_2;
+ else goto N77_8;
+
+N77_2:
+ if term(2).significance < 0.9519284964 then goto T77_1;
+ else goto N77_3;
+
+T77_1:
+ response = -0.0041815526;
+ goto D77;
+
+N77_3:
+ if age(created_at) < 5400.0000000000 then goto N77_4;
+ else goto N77_6;
+
+N77_4:
+ if fieldMatch(text).significance < 0.7492735386 then goto N77_5;
+ else goto T77_4;
+
+N77_5:
+ if fieldMatch(text).tail < 20.5000000000 then goto T77_2;
+ else goto T77_3;
+
+T77_2:
+ response = 0.0019880057;
+ goto D77;
+
+T77_3:
+ response = 0.0345569075;
+ goto D77;
+
+T77_4:
+ response = 0.0087822370;
+ goto D77;
+
+N77_6:
+ if term(2).significance < 0.9934439659 then goto N77_7;
+ else goto T77_7;
+
+N77_7:
+ if term(2).significance < 0.9921829700 then goto T77_5;
+ else goto T77_6;
+
+T77_5:
+ response = 0.0004470985;
+ goto D77;
+
+T77_6:
+ response = -0.0109745339;
+ goto D77;
+
+T77_7:
+ response = 0.0025543252;
+ goto D77;
+
+N77_8:
+ if attribute(yst_reply_auth) < 22.5000000000 then goto N77_9;
+ else goto N77_14;
+
+N77_9:
+ if term(1).significance < 0.9951915145 then goto N77_10;
+ else goto N77_11;
+
+N77_10:
+ if fieldMatch(text).fieldCompleteness < 0.0816664994 then goto T77_8;
+ else goto T77_9;
+
+T77_8:
+ response = -0.0140486512;
+ goto D77;
+
+T77_9:
+ response = -0.0020108004;
+ goto D77;
+
+N77_11:
+ if fieldMatch(user_name).head < 0.5000000000 then goto N77_12;
+ else goto T77_13;
+
+N77_12:
+ if attribute(yst_tweet_language) < 3243.5000000000 then goto N77_13;
+ else goto T77_12;
+
+N77_13:
+ if fieldLength(text) < 8.5000000000 then goto T77_10;
+ else goto T77_11;
+
+T77_10:
+ response = -0.0088342224;
+ goto D77;
+
+T77_11:
+ response = 0.0012278464;
+ goto D77;
+
+T77_12:
+ response = -0.0093805182;
+ goto D77;
+
+T77_13:
+ response = 0.0236651466;
+ goto D77;
+
+N77_14:
+ if attribute(user_followers_count) < 125.5000000000 then goto N77_15;
+ else goto N77_17;
+
+N77_15:
+ if age(created_at) < 270.0000000000 then goto N77_16;
+ else goto T77_16;
+
+N77_16:
+ if term(0).significance < 0.9985035062 then goto T77_14;
+ else goto T77_15;
+
+T77_14:
+ response = 0.0238630955;
+ goto D77;
+
+T77_15:
+ response = -0.0187495440;
+ goto D77;
+
+T77_16:
+ response = -0.0107232535;
+ goto D77;
+
+N77_17:
+ if attribute(yst_reply_auth) < 813.0000000000 then goto T77_17;
+ else goto T77_18;
+
+T77_17:
+ response = -0.0012233981;
+ goto D77;
+
+T77_18:
+ response = -0.0114833082;
+ goto D77;
+
+D77:
+
+tnscore = tnscore + response;
+
+/* Tree 79 of 80 */
+N78_1:
+ if fieldMatch(text).absoluteOccurrence < 0.0122500006 then goto N78_2;
+ else goto N78_14;
+
+N78_2:
+ if fieldMatch(text).earliness < 0.8596060276 then goto N78_3;
+ else goto N78_12;
+
+N78_3:
+ if attribute(yst_reply_auth) < 302.5000000000 then goto N78_4;
+ else goto N78_11;
+
+N78_4:
+ if fieldMatch(user_name).completeness < 0.9791665077 then goto N78_5;
+ else goto T78_8;
+
+N78_5:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N78_6;
+ else goto N78_10;
+
+N78_6:
+ if age(created_at) < 1830.0000000000 then goto T78_1;
+ else goto N78_7;
+
+T78_1:
+ response = -0.0008288048;
+ goto D78;
+
+N78_7:
+ if fieldMatch(text).importance < 0.6664404869 then goto N78_8;
+ else goto N78_9;
+
+N78_8:
+ if fieldMatch(text).fieldCompleteness < 0.0976189971 then goto T78_2;
+ else goto T78_3;
+
+T78_2:
+ response = -0.0053357392;
+ goto D78;
+
+T78_3:
+ response = -0.0137936880;
+ goto D78;
+
+N78_9:
+ if fieldMatch(text).occurrence < 0.1558704972 then goto T78_4;
+ else goto T78_5;
+
+T78_4:
+ response = 0.0024922240;
+ goto D78;
+
+T78_5:
+ response = -0.0089043788;
+ goto D78;
+
+N78_10:
+ if fieldMatch(text).weightedOccurrence < 0.0385860018 then goto T78_6;
+ else goto T78_7;
+
+T78_6:
+ response = -0.0071451431;
+ goto D78;
+
+T78_7:
+ response = 0.0005167991;
+ goto D78;
+
+T78_8:
+ response = 0.0407753689;
+ goto D78;
+
+N78_11:
+ if attribute(user_followers_count) < 1721.5000000000 then goto T78_9;
+ else goto T78_10;
+
+T78_9:
+ response = -0.0105255162;
+ goto D78;
+
+T78_10:
+ response = 0.0015551667;
+ goto D78;
+
+N78_12:
+ if fieldLength(text) < 8.5000000000 then goto T78_11;
+ else goto N78_13;
+
+T78_11:
+ response = -0.0088276495;
+ goto D78;
+
+N78_13:
+ if fieldTermMatch(text,0).firstPosition < 15.5000000000 then goto T78_12;
+ else goto T78_13;
+
+T78_12:
+ response = 0.0024878063;
+ goto D78;
+
+T78_13:
+ response = -0.0103987822;
+ goto D78;
+
+N78_14:
+ if attribute(user_followers_count) < 27.5000000000 then goto N78_15;
+ else goto N78_17;
+
+N78_15:
+ if fieldTermMatch(text,1).firstPosition < 17.5000000000 then goto N78_16;
+ else goto T78_16;
+
+N78_16:
+ if term(0).significance < 0.9853284955 then goto T78_14;
+ else goto T78_15;
+
+T78_14:
+ response = -0.0219723254;
+ goto D78;
+
+T78_15:
+ response = -0.0024166886;
+ goto D78;
+
+T78_16:
+ response = 0.0189239860;
+ goto D78;
+
+N78_17:
+ if attribute(user_statuses_count) < 93.5000000000 then goto T78_17;
+ else goto T78_18;
+
+T78_17:
+ response = 0.0212835416;
+ goto D78;
+
+T78_18:
+ response = 0.0045698024;
+ goto D78;
+
+D78:
+
+tnscore = tnscore + response;
+
+/* Tree 80 of 80 */
+N79_1:
+ if fieldMatch(text).longestSequence < 1.5000000000 then goto N79_2;
+ else goto N79_14;
+
+N79_2:
+ if term(0).significance < 0.8509274721 then goto N79_3;
+ else goto N79_4;
+
+N79_3:
+ if term(2).significance < 0.9981595278 then goto T79_1;
+ else goto T79_2;
+
+T79_1:
+ response = -0.0629348018;
+ goto D79;
+
+T79_2:
+ response = -0.0067077117;
+ goto D79;
+
+N79_4:
+ if fieldMatch(text).significance < 0.7493325472 then goto N79_5;
+ else goto T79_13;
+
+N79_5:
+ if fieldMatch(text).importance < 0.4999189973 then goto N79_6;
+ else goto N79_11;
+
+N79_6:
+ if fieldMatch(text).importance < 0.4999135137 then goto N79_7;
+ else goto T79_8;
+
+N79_7:
+ if fieldMatch(text).importance < 0.4999005198 then goto N79_8;
+ else goto T79_7;
+
+N79_8:
+ if fieldMatch(text).importance < 0.4997529984 then goto T79_3;
+ else goto N79_9;
+
+T79_3:
+ response = -0.0025494044;
+ goto D79;
+
+N79_9:
+ if fieldMatch(text).importance < 0.4998250008 then goto T79_4;
+ else goto N79_10;
+
+T79_4:
+ response = 0.0105391880;
+ goto D79;
+
+N79_10:
+ if fieldMatch(text).occurrence < 0.0425724983 then goto T79_5;
+ else goto T79_6;
+
+T79_5:
+ response = -0.0160413841;
+ goto D79;
+
+T79_6:
+ response = 0.0020384205;
+ goto D79;
+
+T79_7:
+ response = -0.0118667123;
+ goto D79;
+
+T79_8:
+ response = 0.0199742140;
+ goto D79;
+
+N79_11:
+ if fieldMatch(text) < 0.8826240301 then goto N79_12;
+ else goto T79_12;
+
+N79_12:
+ if fieldMatch(text).absoluteOccurrence < 0.0116665000 then goto N79_13;
+ else goto T79_11;
+
+N79_13:
+ if fieldMatch(text).earliness < 0.6554945111 then goto T79_9;
+ else goto T79_10;
+
+T79_9:
+ response = -0.0060372501;
+ goto D79;
+
+T79_10:
+ response = -0.0016907417;
+ goto D79;
+
+T79_11:
+ response = 0.0022069234;
+ goto D79;
+
+T79_12:
+ response = -0.0320846717;
+ goto D79;
+
+T79_13:
+ response = 0.0077067193;
+ goto D79;
+
+N79_14:
+ if term(1).significance < 0.8159549832 then goto T79_14;
+ else goto N79_15;
+
+T79_14:
+ response = 0.0094136687;
+ goto D79;
+
+N79_15:
+ if attribute(yst_tweet_language) < 3587.5000000000 then goto N79_16;
+ else goto T79_18;
+
+N79_16:
+ if fieldMatch(text).earliness < 0.9298025370 then goto T79_15;
+ else goto N79_17;
+
+T79_15:
+ response = 0.0003536090;
+ goto D79;
+
+N79_17:
+ if fieldMatch(text).fieldCompleteness < 0.1519230008 then goto T79_16;
+ else goto T79_17;
+
+T79_16:
+ response = 0.0079910001;
+ goto D79;
+
+T79_17:
+ response = 0.0009053355;
+ goto D79;
+
+T79_18:
+ response = -0.0202098115;
+ goto D79;
+
+D79:
+
+tnscore = tnscore + response;
+
+N80:
+ if age(created_at) < 60 then goto T80_1;
+ else goto T80_2 ;
+
+T80_1:
+ response = 0.05 ;
+ goto D80;
+
+T80_2:
+ response = 0 ;
+ goto D80;
+
+D80:
+
+tnscore = tnscore + response;
+
+N81:
+ if age(created_at) < 120 then goto T81_1;
+ else goto T81_2 ;
+
+T81_1:
+ response = 0.0125 ;
+ goto D81;
+
+T81_2:
+ response = 0 ;
+ goto D81;
+
+D81:
+
+tnscore = tnscore + response;
+
+N82:
+ if age(created_at) < 240 then goto T82_1;
+ else goto T82_2 ;
+
+T82_1:
+ response = 0.0125 ;
+ goto D82;
+
+T82_2:
+ response = 0 ;
+ goto D82;
+
+D82:
+
+tnscore = tnscore + response;
+
+N83:
+ if age(created_at) < 360 then goto T83_1;
+ else goto T83_2 ;
+
+T83_1:
+ response = 0.0125 ;
+ goto D83;
+
+T83_2:
+ response = 0 ;
+ goto D83;
+
+D83:
+
+tnscore = tnscore + response;
+
+
+N84:
+ if age(created_at) < 480 then goto T84_1;
+ else goto T84_2 ;
+
+T84_1:
+ response = 0.0125 ;
+ goto D84;
+
+T84_2:
+ response = 0 ;
+ goto D84;
+
+D84:
+
+tnscore = tnscore + response;
+
+N85:
+ if age(created_at) < 600 then goto T85_1;
+ else goto T85_2 ;
+
+T85_1:
+ response = 0.017 ;
+ goto D85;
+
+T85_2:
+ response = 0 ;
+ goto D85;
+
+D85:
+
+tnscore = tnscore + response;
+
+
+N86:
+ if age(created_at) < 1200 then goto T86_1;
+ else goto T86_2 ;
+
+T86_1:
+ response = 0.017 ;
+ goto D86;
+
+T86_2:
+ response = 0 ;
+ goto D86;
+
+D86:
+
+tnscore = tnscore + response;
+
+N87:
+ if age(created_at) < 2400 then goto T87_1;
+ else goto T87_2 ;
+
+T87_1:
+ response = 0.017 ;
+ goto D87;
+
+T87_2:
+ response = 0 ;
+ goto D87;
+
+D87:
+
+tnscore = tnscore + response;
+
+N88:
+ if age(created_at) < 3600 then goto T88_1;
+ else goto T88_2 ;
+
+T88_1:
+ response = 0.025 ;
+ goto D88;
+
+T88_2:
+ response = 0 ;
+ goto D88;
+
+D88:
+
+tnscore = tnscore + response;
+
+
+N89:
+ if age(created_at) < 7200 then goto T89_1;
+ else goto T89_2 ;
+
+T89_1:
+ response = 0.025 ;
+ goto D89;
+
+T89_2:
+ response = 0 ;
+ goto D89;
+
+D89:
+
+tnscore = tnscore + response;
+
+
+return;
diff --git a/searchlib/src/test/files/treenet03.model b/searchlib/src/test/files/treenet03.model
new file mode 100644
index 00000000000..dd84c120685
--- /dev/null
+++ b/searchlib/src/test/files/treenet03.model
@@ -0,0 +1,5880 @@
+
+/* Data Dictionary, Number Of Variables = 33 */
+/* Name = NUM_WORDS, Type = continuous. */
+/* Name = DAY_HITS, Type = continuous. */
+/* Name = DAY_HITS_FRAC, Type = continuous. */
+/* Name = PREV_DAY_HITS, Type = continuous. */
+/* Name = DAY_PD_HITS_RATIO, Type = continuous. */
+/* Name = DAY_PREV_DAY_HITS_FRAC, Type = continuous. */
+/* Name = LW_DAY_HITS, Type = continuous. */
+/* Name = DAY_LW_DAY_HITS_RATIO, Type = continuous. */
+/* Name = WEEKAVG, Type = continuous. */
+/* Name = DAY_WEEK_AVG_RATIO, Type = continuous. */
+/* Name = ISTITLE_AVG, Type = continuous. */
+/* Name = ISABSTRACT_AVG, Type = continuous. */
+/* Name = SUPERDUPER_AVG, Type = continuous. */
+/* Name = PUB_TODAY_AVG, Type = continuous. */
+/* Name = BUSINESS, Type = continuous. */
+/* Name = ENTERTAINMENT, Type = continuous. */
+/* Name = HEALTH, Type = continuous. */
+/* Name = INTLNEWS, Type = continuous. */
+/* Name = LAW, Type = continuous. */
+/* Name = LIFESTYLE, Type = continuous. */
+/* Name = LOCALNEWS, Type = continuous. */
+/* Name = MISC, Type = continuous. */
+/* Name = NATIONALNEWS, Type = continuous. */
+/* Name = POLITICS, Type = continuous. */
+/* Name = REGIONALNEWS, Type = continuous. */
+/* Name = SCIENCE, Type = continuous. */
+/* Name = SPORTS, Type = continuous. */
+/* Name = TOPSTORY, Type = continuous. */
+/* Name = AVG_RANK, Type = continuous. */
+/* Name = MAX_RANK, Type = continuous. */
+/* Name = MIN_RANK, Type = continuous. */
+/* Name = MAX_MIN_RANK, Type = continuous. */
+/* Name = MAX_SCORE, Type = continuous. */
+
+MODELBEGIN:
+
+/* CART version: 5.0.9.156 */
+/* TreeNet: TreeNet20071008155525 */
+/* Grove: /home/rparekh/lb/lb_features_all_days.grv */
+/* N trees: 97 */
+
+link TN0;
+pred = tnscore; /* predicted value for IY_CTR */
+
+
+/*********************/
+/* Model is complete */
+/*********************/
+
+return;
+
+
+
+TN0:
+
+/* Tree 1 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+tnscore = 0.0;
+
+N0_1:
+ if MAX_SCORE < 270055 then goto N0_2;
+ else goto N0_4;
+
+N0_2:
+ if MAX_SCORE < 241174 then goto T0_1;
+ else goto N0_3;
+
+T0_1:
+ response = 0.234534;
+ goto D0;
+
+N0_3:
+ if ISABSTRACT_AVG < 0.105 then goto T0_2;
+ else goto T0_3;
+
+T0_2:
+ response = 0.248214;
+ goto D0;
+
+T0_3:
+ response = 0.239032;
+ goto D0;
+
+N0_4:
+ if ISABSTRACT_AVG < 0.13 then goto N0_5;
+ else goto T0_7;
+
+N0_5:
+ if DAY_HITS_FRAC < 0.765 then goto N0_6;
+ else goto T0_6;
+
+N0_6:
+ if MAX_SCORE < 347793 then goto T0_4;
+ else goto T0_5;
+
+T0_4:
+ response = 0.258244;
+ goto D0;
+
+T0_5:
+ response = 0.268225;
+ goto D0;
+
+T0_6:
+ response = 0.271744;
+ goto D0;
+
+T0_7:
+ response = 0.247728;
+ goto D0;
+
+D0:
+
+tnscore = tnscore + response;
+
+/* Tree 2 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N1_1:
+ if MAX_SCORE < 270055 then goto N1_2;
+ else goto N1_3;
+
+N1_2:
+ if MAX_SCORE < 252585 then goto T1_1;
+ else goto T1_2;
+
+T1_1:
+ response = -0.0118809;
+ goto D1;
+
+T1_2:
+ response = -0.00253128;
+ goto D1;
+
+N1_3:
+ if ISABSTRACT_AVG < 0.21 then goto N1_4;
+ else goto N1_6;
+
+N1_4:
+ if DAY_LW_DAY_HITS_RATIO < 4.345 then goto N1_5;
+ else goto T1_5;
+
+N1_5:
+ if MAX_SCORE < 354461 then goto T1_3;
+ else goto T1_4;
+
+T1_3:
+ response = 0.00546628;
+ goto D1;
+
+T1_4:
+ response = 0.0164708;
+ goto D1;
+
+T1_5:
+ response = 0.0188771;
+ goto D1;
+
+N1_6:
+ if DAY_PREV_DAY_HITS_FRAC < 0.805 then goto T1_6;
+ else goto T1_7;
+
+T1_6:
+ response = -0.0092059;
+ goto D1;
+
+T1_7:
+ response = 0.00324753;
+ goto D1;
+
+D1:
+
+tnscore = tnscore + response;
+
+/* Tree 3 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N2_1:
+ if MAX_SCORE < 270290 then goto N2_2;
+ else goto N2_5;
+
+N2_2:
+ if MAX_SCORE < 236242 then goto T2_1;
+ else goto N2_3;
+
+T2_1:
+ response = -0.0121516;
+ goto D2;
+
+N2_3:
+ if DAY_LW_DAY_HITS_RATIO < 3.45 then goto T2_2;
+ else goto N2_4;
+
+T2_2:
+ response = -0.00767489;
+ goto D2;
+
+N2_4:
+ if ISABSTRACT_AVG < 0.12 then goto T2_3;
+ else goto T2_4;
+
+T2_3:
+ response = 0.00622939;
+ goto D2;
+
+T2_4:
+ response = -0.00488712;
+ goto D2;
+
+N2_5:
+ if ISABSTRACT_AVG < 0.105 then goto N2_6;
+ else goto T2_7;
+
+N2_6:
+ if WEEKAVG < 0.5 then goto T2_5;
+ else goto T2_6;
+
+T2_5:
+ response = 0.00766953;
+ goto D2;
+
+T2_6:
+ response = 0.0156887;
+ goto D2;
+
+T2_7:
+ response = -0.000773677;
+ goto D2;
+
+D2:
+
+tnscore = tnscore + response;
+
+/* Tree 4 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N3_1:
+ if MAX_SCORE < 270061 then goto N3_2;
+ else goto N3_4;
+
+N3_2:
+ if MAX_SCORE < 238942 then goto T3_1;
+ else goto N3_3;
+
+T3_1:
+ response = -0.0111281;
+ goto D3;
+
+N3_3:
+ if DAY_LW_DAY_HITS_RATIO < 3.9 then goto T3_2;
+ else goto T3_3;
+
+T3_2:
+ response = -0.00750282;
+ goto D3;
+
+T3_3:
+ response = 0.00220298;
+ goto D3;
+
+N3_4:
+ if ISABSTRACT_AVG < 0.105 then goto N3_5;
+ else goto T3_7;
+
+N3_5:
+ if DAY_HITS_FRAC < 0.795 then goto N3_6;
+ else goto T3_6;
+
+N3_6:
+ if MAX_SCORE < 348364 then goto T3_4;
+ else goto T3_5;
+
+T3_4:
+ response = 0.00374845;
+ goto D3;
+
+T3_5:
+ response = 0.0131108;
+ goto D3;
+
+T3_6:
+ response = 0.0161683;
+ goto D3;
+
+T3_7:
+ response = -0.00111039;
+ goto D3;
+
+D3:
+
+tnscore = tnscore + response;
+
+/* Tree 5 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N4_1:
+ if MAX_SCORE < 270289 then goto N4_2;
+ else goto N4_4;
+
+N4_2:
+ if DAY_PREV_DAY_HITS_FRAC < 0.715 then goto T4_1;
+ else goto N4_3;
+
+T4_1:
+ response = -0.0106179;
+ goto D4;
+
+N4_3:
+ if ISTITLE_AVG < 0.705 then goto T4_2;
+ else goto T4_3;
+
+T4_2:
+ response = 0.00251111;
+ goto D4;
+
+T4_3:
+ response = -0.0060076;
+ goto D4;
+
+N4_4:
+ if ISABSTRACT_AVG < 0.21 then goto N4_5;
+ else goto T4_7;
+
+N4_5:
+ if PUB_TODAY_AVG < 0.13 then goto T4_4;
+ else goto N4_6;
+
+T4_4:
+ response = 0.00368181;
+ goto D4;
+
+N4_6:
+ if ISTITLE_AVG < 0.845 then goto T4_5;
+ else goto T4_6;
+
+T4_5:
+ response = 0.0126785;
+ goto D4;
+
+T4_6:
+ response = 0.00345445;
+ goto D4;
+
+T4_7:
+ response = -0.00136004;
+ goto D4;
+
+D4:
+
+tnscore = tnscore + response;
+
+/* Tree 6 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N5_1:
+ if MAX_SCORE < 271459 then goto N5_2;
+ else goto N5_5;
+
+N5_2:
+ if MAX_SCORE < 238606 then goto T5_1;
+ else goto N5_3;
+
+T5_1:
+ response = -0.00948395;
+ goto D5;
+
+N5_3:
+ if ISABSTRACT_AVG < 0.105 then goto N5_4;
+ else goto T5_4;
+
+N5_4:
+ if DAY_LW_DAY_HITS_RATIO < 2.805 then goto T5_2;
+ else goto T5_3;
+
+T5_2:
+ response = -0.00477034;
+ goto D5;
+
+T5_3:
+ response = 0.00664777;
+ goto D5;
+
+T5_4:
+ response = -0.00676399;
+ goto D5;
+
+N5_5:
+ if ISABSTRACT_AVG < 0.13 then goto N5_6;
+ else goto T5_7;
+
+N5_6:
+ if DAY_PREV_DAY_HITS_FRAC < 0.675 then goto T5_5;
+ else goto T5_6;
+
+T5_5:
+ response = 0.00489261;
+ goto D5;
+
+T5_6:
+ response = 0.0122925;
+ goto D5;
+
+T5_7:
+ response = -0.000920098;
+ goto D5;
+
+D5:
+
+tnscore = tnscore + response;
+
+/* Tree 7 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N6_1:
+ if MAX_SCORE < 271407 then goto N6_2;
+ else goto N6_5;
+
+N6_2:
+ if DAY_LW_DAY_HITS_RATIO < 3.485 then goto T6_1;
+ else goto N6_3;
+
+T6_1:
+ response = -0.00827195;
+ goto D6;
+
+N6_3:
+ if NATIONALNEWS < 0.185 then goto T6_2;
+ else goto N6_4;
+
+T6_2:
+ response = -0.00376713;
+ goto D6;
+
+N6_4:
+ if MAX_SCORE < 245976 then goto T6_3;
+ else goto T6_4;
+
+T6_3:
+ response = 0.000352932;
+ goto D6;
+
+T6_4:
+ response = 0.0160415;
+ goto D6;
+
+N6_5:
+ if ISTITLE_AVG < 0.705 then goto N6_6;
+ else goto T6_7;
+
+N6_6:
+ if DAY_PREV_DAY_HITS_FRAC < 0.675 then goto T6_5;
+ else goto T6_6;
+
+T6_5:
+ response = 0.00314056;
+ goto D6;
+
+T6_6:
+ response = 0.0112222;
+ goto D6;
+
+T6_7:
+ response = 0.000924328;
+ goto D6;
+
+D6:
+
+tnscore = tnscore + response;
+
+/* Tree 8 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N7_1:
+ if MAX_SCORE < 253367 then goto N7_2;
+ else goto N7_3;
+
+N7_2:
+ if MAX_SCORE < 177746 then goto T7_1;
+ else goto T7_2;
+
+T7_1:
+ response = -0.0118279;
+ goto D7;
+
+T7_2:
+ response = -0.00453188;
+ goto D7;
+
+N7_3:
+ if ISABSTRACT_AVG < 0.105 then goto N7_4;
+ else goto N7_6;
+
+N7_4:
+ if DAY_LW_DAY_HITS_RATIO < 4.25 then goto N7_5;
+ else goto T7_5;
+
+N7_5:
+ if MAX_SCORE < 354461 then goto T7_3;
+ else goto T7_4;
+
+T7_3:
+ response = -0.000720492;
+ goto D7;
+
+T7_4:
+ response = 0.00994136;
+ goto D7;
+
+T7_5:
+ response = 0.0104822;
+ goto D7;
+
+N7_6:
+ if DAY_PREV_DAY_HITS_FRAC < 0.915 then goto T7_6;
+ else goto T7_7;
+
+T7_6:
+ response = -0.00613264;
+ goto D7;
+
+T7_7:
+ response = 0.00119411;
+ goto D7;
+
+D7:
+
+tnscore = tnscore + response;
+
+/* Tree 9 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N8_1:
+ if MAX_SCORE < 270055 then goto N8_2;
+ else goto N8_4;
+
+N8_2:
+ if DAY_LW_DAY_HITS_RATIO < 4.635 then goto N8_3;
+ else goto T8_3;
+
+N8_3:
+ if MAX_SCORE < 221962 then goto T8_1;
+ else goto T8_2;
+
+T8_1:
+ response = -0.00963481;
+ goto D8;
+
+T8_2:
+ response = -0.00428119;
+ goto D8;
+
+T8_3:
+ response = -0.000341413;
+ goto D8;
+
+N8_4:
+ if ISTITLE_AVG < 0.565 then goto N8_5;
+ else goto T8_7;
+
+N8_5:
+ if MAX_SCORE < 354542 then goto N8_6;
+ else goto T8_6;
+
+N8_6:
+ if DAY_HITS < 1.5 then goto T8_4;
+ else goto T8_5;
+
+T8_4:
+ response = -0.00205135;
+ goto D8;
+
+T8_5:
+ response = 0.00751225;
+ goto D8;
+
+T8_6:
+ response = 0.0111239;
+ goto D8;
+
+T8_7:
+ response = 0.00022935;
+ goto D8;
+
+D8:
+
+tnscore = tnscore + response;
+
+/* Tree 10 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N9_1:
+ if MAX_SCORE < 263726 then goto N9_2;
+ else goto N9_4;
+
+N9_2:
+ if DAY_PREV_DAY_HITS_FRAC < 0.715 then goto T9_1;
+ else goto N9_3;
+
+T9_1:
+ response = -0.00728388;
+ goto D9;
+
+N9_3:
+ if ISTITLE_AVG < 0.73 then goto T9_2;
+ else goto T9_3;
+
+T9_2:
+ response = 0.00311214;
+ goto D9;
+
+T9_3:
+ response = -0.00320301;
+ goto D9;
+
+N9_4:
+ if ISABSTRACT_AVG < 0.105 then goto N9_5;
+ else goto T9_7;
+
+N9_5:
+ if WEEKAVG < 0.215 then goto T9_4;
+ else goto N9_6;
+
+T9_4:
+ response = -0.00472856;
+ goto D9;
+
+N9_6:
+ if DAY_LW_DAY_HITS_RATIO < 47 then goto T9_5;
+ else goto T9_6;
+
+T9_5:
+ response = 0.00641873;
+ goto D9;
+
+T9_6:
+ response = 0.0215092;
+ goto D9;
+
+T9_7:
+ response = -0.00106176;
+ goto D9;
+
+D9:
+
+tnscore = tnscore + response;
+
+/* Tree 11 of 97 */
+/* N terminal nodes = 7, Depth = 4 */
+
+
+N10_1:
+ if MAX_SCORE < 263734 then goto N10_2;
+ else goto N10_4;
+
+N10_2:
+ if DAY_LW_DAY_HITS_RATIO < 3.635 then goto T10_1;
+ else goto N10_3;
+
+T10_1:
+ response = -0.0061738;
+ goto D10;
+
+N10_3:
+ if ISTITLE_AVG < 0.05 then goto T10_2;
+ else goto T10_3;
+
+T10_2:
+ response = 0.00678624;
+ goto D10;
+
+T10_3:
+ response = -0.0034547;
+ goto D10;
+
+N10_4:
+ if ISABSTRACT_AVG < 0.105 then goto N10_5;
+ else goto N10_6;
+
+N10_5:
+ if LOCALNEWS < 0.105 then goto T10_4;
+ else goto T10_5;
+
+T10_4:
+ response = 0.00405055;
+ goto D10;
+
+T10_5:
+ response = 0.00975544;
+ goto D10;
+
+N10_6:
+ if DAY_PREV_DAY_HITS_FRAC < 0.905 then goto T10_6;
+ else goto T10_7;
+
+T10_6:
+ response = -0.00538249;
+ goto D10;
+
+T10_7:
+ response = 0.00274471;
+ goto D10;
+
+D10:
+
+tnscore = tnscore + response;
+
+/* Tree 12 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N11_1:
+ if MAX_SCORE < 252459 then goto T11_1;
+ else goto N11_2;
+
+T11_1:
+ response = -0.00466436;
+ goto D11;
+
+N11_2:
+ if ISABSTRACT_AVG < 0.21 then goto N11_3;
+ else goto T11_7;
+
+N11_3:
+ if SUPERDUPER_AVG < 0.115 then goto N11_4;
+ else goto N11_5;
+
+N11_4:
+ if MAX_SCORE < 254916 then goto T11_2;
+ else goto T11_3;
+
+T11_2:
+ response = 0.0300376;
+ goto D11;
+
+T11_3:
+ response = 0.00749701;
+ goto D11;
+
+N11_5:
+ if PUB_TODAY_AVG < 0.105 then goto N11_6;
+ else goto T11_6;
+
+N11_6:
+ if DAY_PD_HITS_RATIO < 0.085 then goto T11_4;
+ else goto T11_5;
+
+T11_4:
+ response = 0.0121226;
+ goto D11;
+
+T11_5:
+ response = -0.00799009;
+ goto D11;
+
+T11_6:
+ response = 0.00453286;
+ goto D11;
+
+T11_7:
+ response = -0.00140668;
+ goto D11;
+
+D11:
+
+tnscore = tnscore + response;
+
+/* Tree 13 of 97 */
+/* N terminal nodes = 7, Depth = 4 */
+
+
+N12_1:
+ if MAX_SCORE < 264515 then goto N12_2;
+ else goto N12_4;
+
+N12_2:
+ if DAY_PREV_DAY_HITS_FRAC < 0.725 then goto T12_1;
+ else goto N12_3;
+
+T12_1:
+ response = -0.00554549;
+ goto D12;
+
+N12_3:
+ if DAY_HITS < 30.5 then goto T12_2;
+ else goto T12_3;
+
+T12_2:
+ response = -0.0016496;
+ goto D12;
+
+T12_3:
+ response = 0.0125357;
+ goto D12;
+
+N12_4:
+ if ISTITLE_AVG < 0.585 then goto N12_5;
+ else goto N12_6;
+
+N12_5:
+ if WEEKAVG < 0.5 then goto T12_4;
+ else goto T12_5;
+
+T12_4:
+ response = 0.00111467;
+ goto D12;
+
+T12_5:
+ response = 0.0073944;
+ goto D12;
+
+N12_6:
+ if MAX_SCORE < 356177 then goto T12_6;
+ else goto T12_7;
+
+T12_6:
+ response = -0.00212114;
+ goto D12;
+
+T12_7:
+ response = 0.00636485;
+ goto D12;
+
+D12:
+
+tnscore = tnscore + response;
+
+/* Tree 14 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N13_1:
+ if DAY_PREV_DAY_HITS_FRAC < 0.825 then goto N13_2;
+ else goto N13_3;
+
+N13_2:
+ if PREV_DAY_HITS < 17.5 then goto T13_1;
+ else goto T13_2;
+
+T13_1:
+ response = -0.00398853;
+ goto D13;
+
+T13_2:
+ response = 0.00429611;
+ goto D13;
+
+N13_3:
+ if ISTITLE_AVG < 0.73 then goto N13_4;
+ else goto N13_6;
+
+N13_4:
+ if MIN_RANK < 9 then goto N13_5;
+ else goto T13_5;
+
+N13_5:
+ if DAY_HITS < 46 then goto T13_3;
+ else goto T13_4;
+
+T13_3:
+ response = 0.00610951;
+ goto D13;
+
+T13_4:
+ response = 0.0271326;
+ goto D13;
+
+T13_5:
+ response = -0.00242626;
+ goto D13;
+
+N13_6:
+ if MAX_SCORE < 374204 then goto T13_6;
+ else goto T13_7;
+
+T13_6:
+ response = -0.00141569;
+ goto D13;
+
+T13_7:
+ response = 0.00890749;
+ goto D13;
+
+D13:
+
+tnscore = tnscore + response;
+
+/* Tree 15 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N14_1:
+ if MAX_SCORE < 249898 then goto T14_1;
+ else goto N14_2;
+
+T14_1:
+ response = -0.00399302;
+ goto D14;
+
+N14_2:
+ if BUSINESS < 0.315 then goto N14_3;
+ else goto T14_7;
+
+N14_3:
+ if ISTITLE_AVG < 0.73 then goto N14_4;
+ else goto T14_6;
+
+N14_4:
+ if SUPERDUPER_AVG < 0.105 then goto T14_2;
+ else goto N14_5;
+
+T14_2:
+ response = 0.00904674;
+ goto D14;
+
+N14_5:
+ if WEEKAVG < 5.5 then goto N14_6;
+ else goto T14_5;
+
+N14_6:
+ if PUB_TODAY_AVG < 0.13 then goto T14_3;
+ else goto T14_4;
+
+T14_3:
+ response = -0.00769757;
+ goto D14;
+
+T14_4:
+ response = 0.00217607;
+ goto D14;
+
+T14_5:
+ response = 0.0110208;
+ goto D14;
+
+T14_6:
+ response = -0.000593102;
+ goto D14;
+
+T14_7:
+ response = -0.00318209;
+ goto D14;
+
+D14:
+
+tnscore = tnscore + response;
+
+/* Tree 16 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N15_1:
+ if MAX_SCORE < 276408 then goto N15_2;
+ else goto N15_4;
+
+N15_2:
+ if DAY_LW_DAY_HITS_RATIO < 4.535 then goto T15_1;
+ else goto N15_3;
+
+T15_1:
+ response = -0.00344589;
+ goto D15;
+
+N15_3:
+ if LOCALNEWS < 0.53 then goto T15_2;
+ else goto T15_3;
+
+T15_2:
+ response = -6.70599e-05;
+ goto D15;
+
+T15_3:
+ response = 0.0175562;
+ goto D15;
+
+N15_4:
+ if ISABSTRACT_AVG < 0.685 then goto N15_5;
+ else goto T15_7;
+
+N15_5:
+ if DAY_LW_DAY_HITS_RATIO < 33.5 then goto N15_6;
+ else goto T15_6;
+
+N15_6:
+ if LOCALNEWS < 0.115 then goto T15_4;
+ else goto T15_5;
+
+T15_4:
+ response = 0.00202221;
+ goto D15;
+
+T15_5:
+ response = 0.00726641;
+ goto D15;
+
+T15_6:
+ response = 0.0142841;
+ goto D15;
+
+T15_7:
+ response = -0.00307504;
+ goto D15;
+
+D15:
+
+tnscore = tnscore + response;
+
+/* Tree 17 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N16_1:
+ if MAX_SCORE < 348857 then goto N16_2;
+ else goto T16_7;
+
+N16_2:
+ if DAY_PREV_DAY_HITS_FRAC < 0.725 then goto T16_1;
+ else goto N16_3;
+
+T16_1:
+ response = -0.00419409;
+ goto D16;
+
+N16_3:
+ if PUB_TODAY_AVG < 0.185 then goto T16_2;
+ else goto N16_4;
+
+T16_2:
+ response = -0.00386261;
+ goto D16;
+
+N16_4:
+ if ISTITLE_AVG < 0.705 then goto N16_5;
+ else goto T16_6;
+
+N16_5:
+ if BUSINESS < 0.21 then goto T16_3;
+ else goto N16_6;
+
+T16_3:
+ response = 0.0063503;
+ goto D16;
+
+N16_6:
+ if DAY_HITS_FRAC < 0.555 then goto T16_4;
+ else goto T16_5;
+
+T16_4:
+ response = -0.0102176;
+ goto D16;
+
+T16_5:
+ response = 0.00356215;
+ goto D16;
+
+T16_6:
+ response = -0.000478923;
+ goto D16;
+
+T16_7:
+ response = 0.00498293;
+ goto D16;
+
+D16:
+
+tnscore = tnscore + response;
+
+/* Tree 18 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N17_1:
+ if MAX_SCORE < 286123 then goto N17_2;
+ else goto T17_7;
+
+N17_2:
+ if DAY_PD_HITS_RATIO < 48 then goto N17_3;
+ else goto T17_6;
+
+N17_3:
+ if ISTITLE_AVG < 0.61 then goto N17_4;
+ else goto T17_5;
+
+N17_4:
+ if MAX_RANK < 9 then goto N17_5;
+ else goto N17_6;
+
+N17_5:
+ if MAX_SCORE < 226208 then goto T17_1;
+ else goto T17_2;
+
+T17_1:
+ response = -0.00741311;
+ goto D17;
+
+T17_2:
+ response = 0.0138247;
+ goto D17;
+
+N17_6:
+ if AVG_RANK < 9.635 then goto T17_3;
+ else goto T17_4;
+
+T17_3:
+ response = 0.000360768;
+ goto D17;
+
+T17_4:
+ response = -0.00785446;
+ goto D17;
+
+T17_5:
+ response = -0.00329639;
+ goto D17;
+
+T17_6:
+ response = 0.0225017;
+ goto D17;
+
+T17_7:
+ response = 0.00337188;
+ goto D17;
+
+D17:
+
+tnscore = tnscore + response;
+
+/* Tree 19 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N18_1:
+ if DAY_LW_DAY_HITS_RATIO < 7.25 then goto N18_2;
+ else goto T18_7;
+
+N18_2:
+ if BUSINESS < 0.05 then goto N18_3;
+ else goto N18_5;
+
+N18_3:
+ if ISTITLE_AVG < 0.895 then goto N18_4;
+ else goto T18_3;
+
+N18_4:
+ if MIN_RANK < 1 then goto T18_1;
+ else goto T18_2;
+
+T18_1:
+ response = 0.0175483;
+ goto D18;
+
+T18_2:
+ response = 0.00215143;
+ goto D18;
+
+T18_3:
+ response = -0.00158754;
+ goto D18;
+
+N18_5:
+ if DAY_WEEK_AVG_RATIO < 0.325 then goto T18_4;
+ else goto N18_6;
+
+T18_4:
+ response = 0.0165492;
+ goto D18;
+
+N18_6:
+ if MAX_SCORE < 448185 then goto T18_5;
+ else goto T18_6;
+
+T18_5:
+ response = -0.00386364;
+ goto D18;
+
+T18_6:
+ response = 0.0131047;
+ goto D18;
+
+T18_7:
+ response = 0.00394983;
+ goto D18;
+
+D18:
+
+tnscore = tnscore + response;
+
+/* Tree 20 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N19_1:
+ if MAX_SCORE < 271407 then goto N19_2;
+ else goto N19_4;
+
+N19_2:
+ if MAX_SCORE < 177474 then goto T19_1;
+ else goto N19_3;
+
+T19_1:
+ response = -0.00525936;
+ goto D19;
+
+N19_3:
+ if SPORTS < 0.645 then goto T19_2;
+ else goto T19_3;
+
+T19_2:
+ response = -0.00170921;
+ goto D19;
+
+T19_3:
+ response = 0.00426429;
+ goto D19;
+
+N19_4:
+ if DAY_PD_HITS_RATIO < 0.085 then goto T19_4;
+ else goto N19_5;
+
+T19_4:
+ response = 0.0151019;
+ goto D19;
+
+N19_5:
+ if BUSINESS < 0.645 then goto N19_6;
+ else goto T19_7;
+
+N19_6:
+ if LW_DAY_HITS < 2.5 then goto T19_5;
+ else goto T19_6;
+
+T19_5:
+ response = 0.00244345;
+ goto D19;
+
+T19_6:
+ response = -0.0099429;
+ goto D19;
+
+T19_7:
+ response = -0.00501617;
+ goto D19;
+
+D19:
+
+tnscore = tnscore + response;
+
+/* Tree 21 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N20_1:
+ if DAY_PREV_DAY_HITS_FRAC < 0.945 then goto N20_2;
+ else goto N20_4;
+
+N20_2:
+ if ISTITLE_AVG < 0.95 then goto N20_3;
+ else goto T20_3;
+
+N20_3:
+ if BUSINESS < 0.235 then goto T20_1;
+ else goto T20_2;
+
+T20_1:
+ response = 0.0017119;
+ goto D20;
+
+T20_2:
+ response = -0.00331729;
+ goto D20;
+
+T20_3:
+ response = -0.00374611;
+ goto D20;
+
+N20_4:
+ if WEEKAVG < 0.215 then goto T20_4;
+ else goto N20_5;
+
+T20_4:
+ response = -0.004784;
+ goto D20;
+
+N20_5:
+ if DAY_PD_HITS_RATIO < 0.145 then goto T20_5;
+ else goto N20_6;
+
+T20_5:
+ response = -0.00631232;
+ goto D20;
+
+N20_6:
+ if BUSINESS < 0.685 then goto T20_6;
+ else goto T20_7;
+
+T20_6:
+ response = 0.00413018;
+ goto D20;
+
+T20_7:
+ response = -0.00259307;
+ goto D20;
+
+D20:
+
+tnscore = tnscore + response;
+
+/* Tree 22 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N21_1:
+ if PUB_TODAY_AVG < 0.87 then goto N21_2;
+ else goto N21_6;
+
+N21_2:
+ if BUSINESS < 0.235 then goto N21_3;
+ else goto T21_5;
+
+N21_3:
+ if ISTITLE_AVG < 0.39 then goto N21_4;
+ else goto T21_4;
+
+N21_4:
+ if WEEKAVG < 7.855 then goto N21_5;
+ else goto T21_3;
+
+N21_5:
+ if SUPERDUPER_AVG < 0.115 then goto T21_1;
+ else goto T21_2;
+
+T21_1:
+ response = 0.00505563;
+ goto D21;
+
+T21_2:
+ response = -0.000398588;
+ goto D21;
+
+T21_3:
+ response = 0.017327;
+ goto D21;
+
+T21_4:
+ response = -0.00158729;
+ goto D21;
+
+T21_5:
+ response = -0.00349104;
+ goto D21;
+
+N21_6:
+ if WEEKAVG < 0.36 then goto T21_6;
+ else goto T21_7;
+
+T21_6:
+ response = -0.00122032;
+ goto D21;
+
+T21_7:
+ response = 0.00412986;
+ goto D21;
+
+D21:
+
+tnscore = tnscore + response;
+
+/* Tree 23 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N22_1:
+ if MAX_SCORE < 235342 then goto T22_1;
+ else goto N22_2;
+
+T22_1:
+ response = -0.00255699;
+ goto D22;
+
+N22_2:
+ if ISABSTRACT_AVG < 0.13 then goto N22_3;
+ else goto T22_7;
+
+N22_3:
+ if WEEKAVG < 0.215 then goto T22_2;
+ else goto N22_4;
+
+T22_2:
+ response = -0.00736871;
+ goto D22;
+
+N22_4:
+ if LOCALNEWS < 0.775 then goto N22_5;
+ else goto T22_6;
+
+N22_5:
+ if SUPERDUPER_AVG < 0.315 then goto T22_3;
+ else goto N22_6;
+
+T22_3:
+ response = 0.00356548;
+ goto D22;
+
+N22_6:
+ if WEEKAVG < 7.07 then goto T22_4;
+ else goto T22_5;
+
+T22_4:
+ response = -0.00254331;
+ goto D22;
+
+T22_5:
+ response = 0.00782112;
+ goto D22;
+
+T22_6:
+ response = 0.0187086;
+ goto D22;
+
+T22_7:
+ response = -0.0015245;
+ goto D22;
+
+D22:
+
+tnscore = tnscore + response;
+
+/* Tree 24 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N23_1:
+ if DAY_PREV_DAY_HITS_FRAC < 0.825 then goto T23_1;
+ else goto N23_2;
+
+T23_1:
+ response = -0.0017775;
+ goto D23;
+
+N23_2:
+ if WEEKAVG < 0.36 then goto T23_2;
+ else goto N23_3;
+
+T23_2:
+ response = -0.00236106;
+ goto D23;
+
+N23_3:
+ if MAX_MIN_RANK < 3 then goto N23_4;
+ else goto T23_7;
+
+N23_4:
+ if PUB_TODAY_AVG < 0.27 then goto T23_3;
+ else goto N23_5;
+
+T23_3:
+ response = -0.00108329;
+ goto D23;
+
+N23_5:
+ if DAY_WEEK_AVG_RATIO < 2.615 then goto N23_6;
+ else goto T23_6;
+
+N23_6:
+ if MAX_SCORE < 248412 then goto T23_4;
+ else goto T23_5;
+
+T23_4:
+ response = 0.00662755;
+ goto D23;
+
+T23_5:
+ response = 0.0252786;
+ goto D23;
+
+T23_6:
+ response = 0.00570542;
+ goto D23;
+
+T23_7:
+ response = 0.00183161;
+ goto D23;
+
+D23:
+
+tnscore = tnscore + response;
+
+/* Tree 25 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N24_1:
+ if DAY_PREV_DAY_HITS_FRAC < 0.725 then goto N24_2;
+ else goto N24_4;
+
+N24_2:
+ if MAX_SCORE < 453346 then goto N24_3;
+ else goto T24_3;
+
+N24_3:
+ if PREV_DAY_HITS < 15.5 then goto T24_1;
+ else goto T24_2;
+
+T24_1:
+ response = -0.00263045;
+ goto D24;
+
+T24_2:
+ response = 0.00362606;
+ goto D24;
+
+T24_3:
+ response = 0.0113911;
+ goto D24;
+
+N24_4:
+ if DAY_LW_DAY_HITS_RATIO < 47 then goto N24_5;
+ else goto T24_7;
+
+N24_5:
+ if MAX_SCORE < 214610 then goto T24_4;
+ else goto N24_6;
+
+T24_4:
+ response = -0.00305392;
+ goto D24;
+
+N24_6:
+ if PREV_DAY_HITS < 46.5 then goto T24_5;
+ else goto T24_6;
+
+T24_5:
+ response = 0.00171595;
+ goto D24;
+
+T24_6:
+ response = 0.0157708;
+ goto D24;
+
+T24_7:
+ response = 0.0123294;
+ goto D24;
+
+D24:
+
+tnscore = tnscore + response;
+
+/* Tree 26 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N25_1:
+ if PUB_TODAY_AVG < 0.815 then goto N25_2;
+ else goto N25_5;
+
+N25_2:
+ if BUSINESS < 0.05 then goto N25_3;
+ else goto T25_4;
+
+N25_3:
+ if PUB_TODAY_AVG < 0.155 then goto T25_1;
+ else goto N25_4;
+
+T25_1:
+ response = -0.00239543;
+ goto D25;
+
+N25_4:
+ if PREV_DAY_HITS < 0.5 then goto T25_2;
+ else goto T25_3;
+
+T25_2:
+ response = -0.00219916;
+ goto D25;
+
+T25_3:
+ response = 0.00267906;
+ goto D25;
+
+T25_4:
+ response = -0.00274426;
+ goto D25;
+
+N25_5:
+ if ISTITLE_AVG < 0.95 then goto N25_6;
+ else goto T25_7;
+
+N25_6:
+ if DAY_PD_HITS_RATIO < 1.445 then goto T25_5;
+ else goto T25_6;
+
+T25_5:
+ response = -0.012251;
+ goto D25;
+
+T25_6:
+ response = 0.00474059;
+ goto D25;
+
+T25_7:
+ response = -0.000650252;
+ goto D25;
+
+D25:
+
+tnscore = tnscore + response;
+
+/* Tree 27 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N26_1:
+ if ISABSTRACT_AVG < 0.105 then goto N26_2;
+ else goto T26_7;
+
+N26_2:
+ if MAX_SCORE < 235080 then goto T26_1;
+ else goto N26_3;
+
+T26_1:
+ response = -0.00337944;
+ goto D26;
+
+N26_3:
+ if SUPERDUPER_AVG < 0.105 then goto N26_4;
+ else goto N26_6;
+
+N26_4:
+ if BUSINESS < 0.435 then goto N26_5;
+ else goto T26_4;
+
+N26_5:
+ if MAX_SCORE < 293262 then goto T26_2;
+ else goto T26_3;
+
+T26_2:
+ response = 0.00942708;
+ goto D26;
+
+T26_3:
+ response = 0.00296784;
+ goto D26;
+
+T26_4:
+ response = -0.00165307;
+ goto D26;
+
+N26_6:
+ if MAX_SCORE < 262829 then goto T26_5;
+ else goto T26_6;
+
+T26_5:
+ response = -0.00745914;
+ goto D26;
+
+T26_6:
+ response = 0.0011197;
+ goto D26;
+
+T26_7:
+ response = -0.0017808;
+ goto D26;
+
+D26:
+
+tnscore = tnscore + response;
+
+/* Tree 28 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N27_1:
+ if MAX_SCORE < 347080 then goto N27_2;
+ else goto T27_7;
+
+N27_2:
+ if DAY_LW_DAY_HITS_RATIO < 4.31 then goto N27_3;
+ else goto N27_4;
+
+N27_3:
+ if NATIONALNEWS < 0.295 then goto T27_1;
+ else goto T27_2;
+
+T27_1:
+ response = -0.00181733;
+ goto D27;
+
+T27_2:
+ response = 0.00242649;
+ goto D27;
+
+N27_4:
+ if MAX_SCORE < 313528 then goto N27_5;
+ else goto T27_6;
+
+N27_5:
+ if LOCALNEWS < 0.53 then goto N27_6;
+ else goto T27_5;
+
+N27_6:
+ if TOPSTORY < 0.355 then goto T27_3;
+ else goto T27_4;
+
+T27_3:
+ response = 0.00109569;
+ goto D27;
+
+T27_4:
+ response = 0.00947164;
+ goto D27;
+
+T27_5:
+ response = 0.0165664;
+ goto D27;
+
+T27_6:
+ response = -0.00846682;
+ goto D27;
+
+T27_7:
+ response = 0.00293581;
+ goto D27;
+
+D27:
+
+tnscore = tnscore + response;
+
+/* Tree 29 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N28_1:
+ if MAX_SCORE < 177806 then goto T28_1;
+ else goto N28_2;
+
+T28_1:
+ response = -0.00360187;
+ goto D28;
+
+N28_2:
+ if TOPSTORY < 0.295 then goto N28_3;
+ else goto N28_5;
+
+N28_3:
+ if LOCALNEWS < 0.765 then goto T28_2;
+ else goto N28_4;
+
+T28_2:
+ response = 4.80638e-06;
+ goto D28;
+
+N28_4:
+ if ISTITLE_AVG < 0.29 then goto T28_3;
+ else goto T28_4;
+
+T28_3:
+ response = 0.0164568;
+ goto D28;
+
+T28_4:
+ response = 0.00112041;
+ goto D28;
+
+N28_5:
+ if INTLNEWS < 0.355 then goto T28_5;
+ else goto N28_6;
+
+T28_5:
+ response = 0.00153933;
+ goto D28;
+
+N28_6:
+ if WEEKAVG < 0.36 then goto T28_6;
+ else goto T28_7;
+
+T28_6:
+ response = -0.00129083;
+ goto D28;
+
+T28_7:
+ response = 0.0150131;
+ goto D28;
+
+D28:
+
+tnscore = tnscore + response;
+
+/* Tree 30 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N29_1:
+ if ISTITLE_AVG < 0.73 then goto N29_2;
+ else goto T29_7;
+
+N29_2:
+ if BUSINESS < 0.27 then goto N29_3;
+ else goto N29_6;
+
+N29_3:
+ if MAX_MIN_RANK < 9 then goto N29_4;
+ else goto T29_4;
+
+N29_4:
+ if SUPERDUPER_AVG < 0.315 then goto N29_5;
+ else goto T29_3;
+
+N29_5:
+ if PREV_DAY_HITS < 17.5 then goto T29_1;
+ else goto T29_2;
+
+T29_1:
+ response = 0.00272769;
+ goto D29;
+
+T29_2:
+ response = 0.0136338;
+ goto D29;
+
+T29_3:
+ response = -0.000341266;
+ goto D29;
+
+T29_4:
+ response = 0.0154743;
+ goto D29;
+
+N29_6:
+ if NATIONALNEWS < 0.21 then goto T29_5;
+ else goto T29_6;
+
+T29_5:
+ response = -0.0029607;
+ goto D29;
+
+T29_6:
+ response = 0.0128593;
+ goto D29;
+
+T29_7:
+ response = -0.00131249;
+ goto D29;
+
+D29:
+
+tnscore = tnscore + response;
+
+/* Tree 31 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N30_1:
+ if DAY_LW_DAY_HITS_RATIO < 7.585 then goto N30_2;
+ else goto T30_7;
+
+N30_2:
+ if MAX_SCORE < 424137 then goto N30_3;
+ else goto N30_4;
+
+N30_3:
+ if DAY_WEEK_AVG_RATIO < 4.78 then goto T30_1;
+ else goto T30_2;
+
+T30_1:
+ response = -0.000874675;
+ goto D30;
+
+T30_2:
+ response = -0.0111332;
+ goto D30;
+
+N30_4:
+ if ENTERTAINMENT < 0.12 then goto N30_5;
+ else goto T30_6;
+
+N30_5:
+ if DAY_LW_DAY_HITS_RATIO < 2.5 then goto N30_6;
+ else goto T30_5;
+
+N30_6:
+ if DAY_WEEK_AVG_RATIO < 0.74 then goto T30_3;
+ else goto T30_4;
+
+T30_3:
+ response = 0.00976536;
+ goto D30;
+
+T30_4:
+ response = 0.0306272;
+ goto D30;
+
+T30_5:
+ response = 0.00520021;
+ goto D30;
+
+T30_6:
+ response = 8.67293e-05;
+ goto D30;
+
+T30_7:
+ response = 0.00333736;
+ goto D30;
+
+D30:
+
+tnscore = tnscore + response;
+
+/* Tree 32 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N31_1:
+ if DAY_PD_HITS_RATIO < 0.085 then goto T31_1;
+ else goto N31_2;
+
+T31_1:
+ response = 0.00972107;
+ goto D31;
+
+N31_2:
+ if SPORTS < 0.845 then goto N31_3;
+ else goto T31_7;
+
+N31_3:
+ if PUB_TODAY_AVG < 0.95 then goto N31_4;
+ else goto N31_5;
+
+N31_4:
+ if PREV_DAY_HITS < 0.5 then goto T31_2;
+ else goto T31_3;
+
+T31_2:
+ response = -0.00391231;
+ goto D31;
+
+T31_3:
+ response = -0.000254135;
+ goto D31;
+
+N31_5:
+ if MAX_MIN_RANK < 7 then goto N31_6;
+ else goto T31_6;
+
+N31_6:
+ if DAY_LW_DAY_HITS_RATIO < 19.5 then goto T31_4;
+ else goto T31_5;
+
+T31_4:
+ response = -0.000175771;
+ goto D31;
+
+T31_5:
+ response = 0.00523989;
+ goto D31;
+
+T31_6:
+ response = 0.0109531;
+ goto D31;
+
+T31_7:
+ response = 0.00548354;
+ goto D31;
+
+D31:
+
+tnscore = tnscore + response;
+
+/* Tree 33 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N32_1:
+ if MAX_SCORE < 466894 then goto N32_2;
+ else goto T32_7;
+
+N32_2:
+ if NATIONALNEWS < 0.21 then goto N32_3;
+ else goto N32_6;
+
+N32_3:
+ if DAY_PD_HITS_RATIO < 0.055 then goto T32_1;
+ else goto N32_4;
+
+T32_1:
+ response = 0.0159556;
+ goto D32;
+
+N32_4:
+ if REGIONALNEWS < 0.05 then goto T32_2;
+ else goto N32_5;
+
+T32_2:
+ response = -0.00112302;
+ goto D32;
+
+N32_5:
+ if DAY_PD_HITS_RATIO < 1.105 then goto T32_3;
+ else goto T32_4;
+
+T32_3:
+ response = 0.0140125;
+ goto D32;
+
+T32_4:
+ response = -0.000724566;
+ goto D32;
+
+N32_6:
+ if AVG_RANK < 8.1 then goto T32_5;
+ else goto T32_6;
+
+T32_5:
+ response = -0.000273744;
+ goto D32;
+
+T32_6:
+ response = 0.00546871;
+ goto D32;
+
+T32_7:
+ response = 0.00555251;
+ goto D32;
+
+D32:
+
+tnscore = tnscore + response;
+
+/* Tree 34 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N33_1:
+ if MAX_SCORE < 286123 then goto T33_1;
+ else goto N33_2;
+
+T33_1:
+ response = -0.000823047;
+ goto D33;
+
+N33_2:
+ if DAY_PREV_DAY_HITS_FRAC < 0.435 then goto N33_3;
+ else goto N33_4;
+
+N33_3:
+ if DAY_WEEK_AVG_RATIO < 1.555 then goto T33_2;
+ else goto T33_3;
+
+T33_2:
+ response = 0.0222361;
+ goto D33;
+
+T33_3:
+ response = 0.000189447;
+ goto D33;
+
+N33_4:
+ if MAX_MIN_RANK < 7 then goto N33_5;
+ else goto N33_6;
+
+N33_5:
+ if DAY_PREV_DAY_HITS_FRAC < 0.755 then goto T33_4;
+ else goto T33_5;
+
+T33_4:
+ response = -0.00264164;
+ goto D33;
+
+T33_5:
+ response = 0.00200226;
+ goto D33;
+
+N33_6:
+ if WEEKAVG < 1.07 then goto T33_6;
+ else goto T33_7;
+
+T33_6:
+ response = 0.0151659;
+ goto D33;
+
+T33_7:
+ response = 0.00171852;
+ goto D33;
+
+D33:
+
+tnscore = tnscore + response;
+
+/* Tree 35 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N34_1:
+ if BUSINESS < 0.05 then goto N34_2;
+ else goto T34_7;
+
+N34_2:
+ if SUPERDUPER_AVG < 0.115 then goto N34_3;
+ else goto N34_4;
+
+N34_3:
+ if ISTITLE_AVG < 0.895 then goto T34_1;
+ else goto T34_2;
+
+T34_1:
+ response = 0.00536839;
+ goto D34;
+
+T34_2:
+ response = 7.53571e-05;
+ goto D34;
+
+N34_4:
+ if AVG_RANK < 8.21 then goto N34_5;
+ else goto T34_6;
+
+N34_5:
+ if PUB_TODAY_AVG < 0.13 then goto T34_3;
+ else goto N34_6;
+
+T34_3:
+ response = -0.00865216;
+ goto D34;
+
+N34_6:
+ if DAY_HITS_FRAC < 0.115 then goto T34_4;
+ else goto T34_5;
+
+T34_4:
+ response = 0.0146316;
+ goto D34;
+
+T34_5:
+ response = -0.00249;
+ goto D34;
+
+T34_6:
+ response = 0.00159523;
+ goto D34;
+
+T34_7:
+ response = -0.00131884;
+ goto D34;
+
+D34:
+
+tnscore = tnscore + response;
+
+/* Tree 36 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N35_1:
+ if DAY_LW_DAY_HITS_RATIO < 33.5 then goto N35_2;
+ else goto T35_7;
+
+N35_2:
+ if LIFESTYLE < 0.05 then goto N35_3;
+ else goto T35_6;
+
+N35_3:
+ if DAY_PD_HITS_RATIO < 0.065 then goto T35_1;
+ else goto N35_4;
+
+T35_1:
+ response = 0.0130228;
+ goto D35;
+
+N35_4:
+ if SPORTS < 0.39 then goto N35_5;
+ else goto N35_6;
+
+N35_5:
+ if NATIONALNEWS < 0.05 then goto T35_2;
+ else goto T35_3;
+
+T35_2:
+ response = -0.00128992;
+ goto D35;
+
+T35_3:
+ response = 0.00127302;
+ goto D35;
+
+N35_6:
+ if DAY_PD_HITS_RATIO < 13.5 then goto T35_4;
+ else goto T35_5;
+
+T35_4:
+ response = 0.00304904;
+ goto D35;
+
+T35_5:
+ response = -0.0168329;
+ goto D35;
+
+T35_6:
+ response = -0.00545277;
+ goto D35;
+
+T35_7:
+ response = 0.00512552;
+ goto D35;
+
+D35:
+
+tnscore = tnscore + response;
+
+/* Tree 37 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N36_1:
+ if DAY_HITS_FRAC < 0.765 then goto T36_1;
+ else goto N36_2;
+
+T36_1:
+ response = -0.000527346;
+ goto D36;
+
+N36_2:
+ if PUB_TODAY_AVG < 0.355 then goto T36_2;
+ else goto N36_3;
+
+T36_2:
+ response = -0.0153305;
+ goto D36;
+
+N36_3:
+ if DAY_HITS < 46.5 then goto N36_4;
+ else goto T36_7;
+
+N36_4:
+ if DAY_PD_HITS_RATIO < 29.5 then goto N36_5;
+ else goto T36_6;
+
+N36_5:
+ if NATIONALNEWS < 0.105 then goto T36_3;
+ else goto N36_6;
+
+T36_3:
+ response = 0.00073747;
+ goto D36;
+
+N36_6:
+ if DAY_WEEK_AVG_RATIO < 8.47 then goto T36_4;
+ else goto T36_5;
+
+T36_4:
+ response = 0.00769293;
+ goto D36;
+
+T36_5:
+ response = -0.0125825;
+ goto D36;
+
+T36_6:
+ response = -0.0108761;
+ goto D36;
+
+T36_7:
+ response = 0.00977691;
+ goto D36;
+
+D36:
+
+tnscore = tnscore + response;
+
+/* Tree 38 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N37_1:
+ if MAX_SCORE < 177732 then goto T37_1;
+ else goto N37_2;
+
+T37_1:
+ response = -0.00260643;
+ goto D37;
+
+N37_2:
+ if BUSINESS < 0.05 then goto N37_3;
+ else goto T37_7;
+
+N37_3:
+ if WEEKAVG < 0.215 then goto T37_2;
+ else goto N37_4;
+
+T37_2:
+ response = -0.00327106;
+ goto D37;
+
+N37_4:
+ if AVG_RANK < 8.635 then goto N37_5;
+ else goto T37_6;
+
+N37_5:
+ if SUPERDUPER_AVG < 0.235 then goto N37_6;
+ else goto T37_5;
+
+N37_6:
+ if ISABSTRACT_AVG < 0.415 then goto T37_3;
+ else goto T37_4;
+
+T37_3:
+ response = 0.00414333;
+ goto D37;
+
+T37_4:
+ response = -0.00152725;
+ goto D37;
+
+T37_5:
+ response = -0.00286672;
+ goto D37;
+
+T37_6:
+ response = 0.00429432;
+ goto D37;
+
+T37_7:
+ response = -0.000407557;
+ goto D37;
+
+D37:
+
+tnscore = tnscore + response;
+
+/* Tree 39 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N38_1:
+ if WEEKAVG < 0.64 then goto N38_2;
+ else goto N38_3;
+
+N38_2:
+ if SUPERDUPER_AVG < 0.29 then goto T38_1;
+ else goto T38_2;
+
+T38_1:
+ response = -0.00013784;
+ goto D38;
+
+T38_2:
+ response = -0.00368109;
+ goto D38;
+
+N38_3:
+ if MAX_SCORE < 271407 then goto N38_4;
+ else goto T38_7;
+
+N38_4:
+ if MAX_MIN_RANK < 5 then goto T38_3;
+ else goto N38_5;
+
+T38_3:
+ response = 0.000985637;
+ goto D38;
+
+N38_5:
+ if SUPERDUPER_AVG < 0.115 then goto N38_6;
+ else goto T38_6;
+
+N38_6:
+ if DAY_LW_DAY_HITS_RATIO < 4.415 then goto T38_4;
+ else goto T38_5;
+
+T38_4:
+ response = -0.00258674;
+ goto D38;
+
+T38_5:
+ response = 0.00694569;
+ goto D38;
+
+T38_6:
+ response = -0.00593057;
+ goto D38;
+
+T38_7:
+ response = 0.00237623;
+ goto D38;
+
+D38:
+
+tnscore = tnscore + response;
+
+/* Tree 40 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N39_1:
+ if MAX_SCORE < 177732 then goto T39_1;
+ else goto N39_2;
+
+T39_1:
+ response = -0.00248172;
+ goto D39;
+
+N39_2:
+ if LIFESTYLE < 0.13 then goto N39_3;
+ else goto T39_7;
+
+N39_3:
+ if PUB_TODAY_AVG < 0.105 then goto N39_4;
+ else goto N39_5;
+
+N39_4:
+ if DAY_HITS < 3.5 then goto T39_2;
+ else goto T39_3;
+
+T39_2:
+ response = -0.00072429;
+ goto D39;
+
+T39_3:
+ response = -0.0150678;
+ goto D39;
+
+N39_5:
+ if DAY_HITS_FRAC < 0.075 then goto T39_4;
+ else goto N39_6;
+
+T39_4:
+ response = 0.0156611;
+ goto D39;
+
+N39_6:
+ if BUSINESS < 0.05 then goto T39_5;
+ else goto T39_6;
+
+T39_5:
+ response = 0.00219968;
+ goto D39;
+
+T39_6:
+ response = -0.000365826;
+ goto D39;
+
+T39_7:
+ response = -0.00592673;
+ goto D39;
+
+D39:
+
+tnscore = tnscore + response;
+
+/* Tree 41 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N40_1:
+ if WEEKAVG < 0.215 then goto T40_1;
+ else goto N40_2;
+
+T40_1:
+ response = -0.0047613;
+ goto D40;
+
+N40_2:
+ if SPORTS < 0.355 then goto N40_3;
+ else goto N40_6;
+
+N40_3:
+ if MIN_RANK < 5 then goto N40_4;
+ else goto T40_5;
+
+N40_4:
+ if MAX_SCORE < 467877 then goto T40_2;
+ else goto N40_5;
+
+T40_2:
+ response = -0.0025312;
+ goto D40;
+
+N40_5:
+ if MAX_SCORE < 576366 then goto T40_3;
+ else goto T40_4;
+
+T40_3:
+ response = 0.0134173;
+ goto D40;
+
+T40_4:
+ response = -0.00903108;
+ goto D40;
+
+T40_5:
+ response = 0.00030441;
+ goto D40;
+
+N40_6:
+ if WEEKAVG < 5.07 then goto T40_6;
+ else goto T40_7;
+
+T40_6:
+ response = 0.00170865;
+ goto D40;
+
+T40_7:
+ response = 0.0116233;
+ goto D40;
+
+D40:
+
+tnscore = tnscore + response;
+
+/* Tree 42 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N41_1:
+ if WEEKAVG < 0.215 then goto T41_1;
+ else goto N41_2;
+
+T41_1:
+ response = -0.00445856;
+ goto D41;
+
+N41_2:
+ if DAY_PREV_DAY_HITS_FRAC < 0.725 then goto N41_3;
+ else goto N41_4;
+
+N41_3:
+ if MAX_SCORE < 459781 then goto T41_2;
+ else goto T41_3;
+
+T41_2:
+ response = -0.00110273;
+ goto D41;
+
+T41_3:
+ response = 0.01224;
+ goto D41;
+
+N41_4:
+ if SPORTS < 0.87 then goto N41_5;
+ else goto T41_7;
+
+N41_5:
+ if NATIONALNEWS < 0.05 then goto T41_4;
+ else goto N41_6;
+
+T41_4:
+ response = 0.000176374;
+ goto D41;
+
+N41_6:
+ if BUSINESS < 0.185 then goto T41_5;
+ else goto T41_6;
+
+T41_5:
+ response = 0.00101462;
+ goto D41;
+
+T41_6:
+ response = 0.0103262;
+ goto D41;
+
+T41_7:
+ response = 0.00758848;
+ goto D41;
+
+D41:
+
+tnscore = tnscore + response;
+
+/* Tree 43 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N42_1:
+ if MAX_SCORE < 588664 then goto N42_2;
+ else goto T42_7;
+
+N42_2:
+ if MAX_SCORE < 453568 then goto N42_3;
+ else goto N42_5;
+
+N42_3:
+ if PREV_DAY_HITS < 26.5 then goto T42_1;
+ else goto N42_4;
+
+T42_1:
+ response = -7.786e-05;
+ goto D42;
+
+N42_4:
+ if WEEKAVG < 9.215 then goto T42_2;
+ else goto T42_3;
+
+T42_2:
+ response = -0.012221;
+ goto D42;
+
+T42_3:
+ response = -0.00126183;
+ goto D42;
+
+N42_5:
+ if DAY_PREV_DAY_HITS_FRAC < 0.555 then goto T42_4;
+ else goto N42_6;
+
+T42_4:
+ response = 0.0175351;
+ goto D42;
+
+N42_6:
+ if AVG_RANK < 9.7 then goto T42_5;
+ else goto T42_6;
+
+T42_5:
+ response = 7.47189e-05;
+ goto D42;
+
+T42_6:
+ response = 0.0152525;
+ goto D42;
+
+T42_7:
+ response = -0.0113374;
+ goto D42;
+
+D42:
+
+tnscore = tnscore + response;
+
+/* Tree 44 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N43_1:
+ if TOPSTORY < 0.295 then goto T43_1;
+ else goto N43_2;
+
+T43_1:
+ response = -0.000312071;
+ goto D43;
+
+N43_2:
+ if MAX_MIN_RANK < 7 then goto N43_3;
+ else goto T43_7;
+
+N43_3:
+ if ISTITLE_AVG < 0.185 then goto N43_4;
+ else goto N43_6;
+
+N43_4:
+ if MAX_SCORE < 378124 then goto T43_2;
+ else goto N43_5;
+
+T43_2:
+ response = 0.00111897;
+ goto D43;
+
+N43_5:
+ if MAX_SCORE < 408027 then goto T43_3;
+ else goto T43_4;
+
+T43_3:
+ response = -0.0203516;
+ goto D43;
+
+T43_4:
+ response = 0.0012991;
+ goto D43;
+
+N43_6:
+ if INTLNEWS < 0.13 then goto T43_5;
+ else goto T43_6;
+
+T43_5:
+ response = 0.000774937;
+ goto D43;
+
+T43_6:
+ response = 0.00732047;
+ goto D43;
+
+T43_7:
+ response = 0.0117253;
+ goto D43;
+
+D43:
+
+tnscore = tnscore + response;
+
+/* Tree 45 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N44_1:
+ if MAX_SCORE < 178085 then goto T44_1;
+ else goto N44_2;
+
+T44_1:
+ response = -0.00220705;
+ goto D44;
+
+N44_2:
+ if WEEKAVG < 6.64 then goto N44_3;
+ else goto T44_7;
+
+N44_3:
+ if SUPERDUPER_AVG < 0.105 then goto N44_4;
+ else goto N44_5;
+
+N44_4:
+ if DAY_PD_HITS_RATIO < 0.115 then goto T44_2;
+ else goto T44_3;
+
+T44_2:
+ response = 0.0149532;
+ goto D44;
+
+T44_3:
+ response = 0.00106296;
+ goto D44;
+
+N44_5:
+ if AVG_RANK < 7.73 then goto N44_6;
+ else goto T44_6;
+
+N44_6:
+ if PUB_TODAY_AVG < 0.13 then goto T44_4;
+ else goto T44_5;
+
+T44_4:
+ response = -0.0104993;
+ goto D44;
+
+T44_5:
+ response = -0.00177497;
+ goto D44;
+
+T44_6:
+ response = 2.5899e-07;
+ goto D44;
+
+T44_7:
+ response = 0.00418893;
+ goto D44;
+
+D44:
+
+tnscore = tnscore + response;
+
+/* Tree 46 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N45_1:
+ if ISTITLE_AVG < 0.585 then goto N45_2;
+ else goto T45_7;
+
+N45_2:
+ if AVG_RANK < 8.47 then goto T45_1;
+ else goto N45_3;
+
+T45_1:
+ response = 0.00253086;
+ goto D45;
+
+N45_3:
+ if DAY_HITS_FRAC < 0.885 then goto N45_4;
+ else goto N45_5;
+
+N45_4:
+ if LOCALNEWS < 0.13 then goto T45_2;
+ else goto T45_3;
+
+T45_2:
+ response = -0.000894801;
+ goto D45;
+
+T45_3:
+ response = -0.00988189;
+ goto D45;
+
+N45_5:
+ if NATIONALNEWS < 0.11 then goto N45_6;
+ else goto T45_6;
+
+N45_6:
+ if MAX_SCORE < 282066 then goto T45_4;
+ else goto T45_5;
+
+T45_4:
+ response = 0.00689219;
+ goto D45;
+
+T45_5:
+ response = -0.00300841;
+ goto D45;
+
+T45_6:
+ response = 0.0150141;
+ goto D45;
+
+T45_7:
+ response = -0.00102603;
+ goto D45;
+
+D45:
+
+tnscore = tnscore + response;
+
+/* Tree 47 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N46_1:
+ if DAY_PREV_DAY_HITS_FRAC < 0.985 then goto T46_1;
+ else goto N46_2;
+
+T46_1:
+ response = -0.000843826;
+ goto D46;
+
+N46_2:
+ if MIN_RANK < 9 then goto N46_3;
+ else goto T46_7;
+
+N46_3:
+ if AVG_RANK < 8.71 then goto T46_2;
+ else goto N46_4;
+
+T46_2:
+ response = 0.000451436;
+ goto D46;
+
+N46_4:
+ if SUPERDUPER_AVG < 0.27 then goto N46_5;
+ else goto N46_6;
+
+N46_5:
+ if DAY_WEEK_AVG_RATIO < 5.05 then goto T46_3;
+ else goto T46_4;
+
+T46_3:
+ response = 0.00639888;
+ goto D46;
+
+T46_4:
+ response = 0.020614;
+ goto D46;
+
+N46_6:
+ if NATIONALNEWS < 0.185 then goto T46_5;
+ else goto T46_6;
+
+T46_5:
+ response = -0.00149465;
+ goto D46;
+
+T46_6:
+ response = 0.0118779;
+ goto D46;
+
+T46_7:
+ response = -0.00241922;
+ goto D46;
+
+D46:
+
+tnscore = tnscore + response;
+
+/* Tree 48 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N47_1:
+ if HEALTH < 0.105 then goto N47_2;
+ else goto T47_7;
+
+N47_2:
+ if DAY_PREV_DAY_HITS_FRAC < 0.725 then goto N47_3;
+ else goto N47_5;
+
+N47_3:
+ if DAY_PREV_DAY_HITS_FRAC < 0.405 then goto N47_4;
+ else goto T47_3;
+
+N47_4:
+ if INTLNEWS < 0.315 then goto T47_1;
+ else goto T47_2;
+
+T47_1:
+ response = 0.00140618;
+ goto D47;
+
+T47_2:
+ response = 0.0145332;
+ goto D47;
+
+T47_3:
+ response = -0.00130877;
+ goto D47;
+
+N47_5:
+ if WEEKAVG < 0.5 then goto T47_4;
+ else goto N47_6;
+
+T47_4:
+ response = -0.000696011;
+ goto D47;
+
+N47_6:
+ if MAX_MIN_RANK < 5 then goto T47_5;
+ else goto T47_6;
+
+T47_5:
+ response = 0.0028215;
+ goto D47;
+
+T47_6:
+ response = -0.00090855;
+ goto D47;
+
+T47_7:
+ response = -0.0059993;
+ goto D47;
+
+D47:
+
+tnscore = tnscore + response;
+
+/* Tree 49 of 97 */
+/* N terminal nodes = 7, Depth = 4 */
+
+
+N48_1:
+ if SPORTS < 0.585 then goto N48_2;
+ else goto N48_4;
+
+N48_2:
+ if TOPSTORY < 0.295 then goto T48_1;
+ else goto N48_3;
+
+T48_1:
+ response = -0.000394764;
+ goto D48;
+
+N48_3:
+ if ENTERTAINMENT < 0.05 then goto T48_2;
+ else goto T48_3;
+
+T48_2:
+ response = 0.00143724;
+ goto D48;
+
+T48_3:
+ response = 0.00930005;
+ goto D48;
+
+N48_4:
+ if AVG_RANK < 5.55 then goto N48_5;
+ else goto N48_6;
+
+N48_5:
+ if DAY_WEEK_AVG_RATIO < 1.955 then goto T48_4;
+ else goto T48_5;
+
+T48_4:
+ response = 0.00377635;
+ goto D48;
+
+T48_5:
+ response = 0.0210534;
+ goto D48;
+
+N48_6:
+ if MAX_SCORE < 389202 then goto T48_6;
+ else goto T48_7;
+
+T48_6:
+ response = 0.00246072;
+ goto D48;
+
+T48_7:
+ response = -0.012129;
+ goto D48;
+
+D48:
+
+tnscore = tnscore + response;
+
+/* Tree 50 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N49_1:
+ if MAX_SCORE < 406793 then goto N49_2;
+ else goto N49_6;
+
+N49_2:
+ if DAY_PD_HITS_RATIO < 0.075 then goto T49_1;
+ else goto N49_3;
+
+T49_1:
+ response = 0.0102381;
+ goto D49;
+
+N49_3:
+ if PUB_TODAY_AVG < 0.295 then goto T49_2;
+ else goto N49_4;
+
+T49_2:
+ response = -0.00209613;
+ goto D49;
+
+N49_4:
+ if MAX_SCORE < 305867 then goto T49_3;
+ else goto N49_5;
+
+T49_3:
+ response = 0.000938554;
+ goto D49;
+
+N49_5:
+ if MAX_SCORE < 347812 then goto T49_4;
+ else goto T49_5;
+
+T49_4:
+ response = -0.00625349;
+ goto D49;
+
+T49_5:
+ response = -3.2361e-05;
+ goto D49;
+
+N49_6:
+ if PREV_DAY_HITS < 17.5 then goto T49_6;
+ else goto T49_7;
+
+T49_6:
+ response = 0.00426042;
+ goto D49;
+
+T49_7:
+ response = -0.0139803;
+ goto D49;
+
+D49:
+
+tnscore = tnscore + response;
+
+/* Tree 51 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N50_1:
+ if MAX_SCORE < 187757 then goto T50_1;
+ else goto N50_2;
+
+T50_1:
+ response = -0.00190196;
+ goto D50;
+
+N50_2:
+ if NATIONALNEWS < 0.185 then goto T50_2;
+ else goto N50_3;
+
+T50_2:
+ response = 1.08423e-05;
+ goto D50;
+
+N50_3:
+ if PREV_DAY_HITS < 4.5 then goto N50_4;
+ else goto N50_5;
+
+N50_4:
+ if ISTITLE_AVG < 0.585 then goto T50_3;
+ else goto T50_4;
+
+T50_3:
+ response = 0.00687766;
+ goto D50;
+
+T50_4:
+ response = 0.00142303;
+ goto D50;
+
+N50_5:
+ if SUPERDUPER_AVG < 0.275 then goto T50_5;
+ else goto N50_6;
+
+T50_5:
+ response = -0.00581088;
+ goto D50;
+
+N50_6:
+ if INTLNEWS < 0.315 then goto T50_6;
+ else goto T50_7;
+
+T50_6:
+ response = 0.0130163;
+ goto D50;
+
+T50_7:
+ response = -0.00562813;
+ goto D50;
+
+D50:
+
+tnscore = tnscore + response;
+
+/* Tree 52 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N51_1:
+ if MAX_SCORE < 423724 then goto N51_2;
+ else goto N51_3;
+
+N51_2:
+ if MAX_SCORE < 408911 then goto T51_1;
+ else goto T51_2;
+
+T51_1:
+ response = -0.000303869;
+ goto D51;
+
+T51_2:
+ response = -0.00754368;
+ goto D51;
+
+N51_3:
+ if MAX_SCORE < 435668 then goto T51_3;
+ else goto N51_4;
+
+T51_3:
+ response = 0.0194021;
+ goto D51;
+
+N51_4:
+ if DAY_HITS < 5.5 then goto N51_5;
+ else goto N51_6;
+
+N51_5:
+ if AVG_RANK < 9.265 then goto T51_4;
+ else goto T51_5;
+
+T51_4:
+ response = 0.00209562;
+ goto D51;
+
+T51_5:
+ response = 0.0171146;
+ goto D51;
+
+N51_6:
+ if MAX_SCORE < 466889 then goto T51_6;
+ else goto T51_7;
+
+T51_6:
+ response = -0.0147582;
+ goto D51;
+
+T51_7:
+ response = 0.00191369;
+ goto D51;
+
+D51:
+
+tnscore = tnscore + response;
+
+/* Tree 53 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N52_1:
+ if PREV_DAY_HITS < 26.5 then goto N52_2;
+ else goto N52_4;
+
+N52_2:
+ if PREV_DAY_HITS < 19.5 then goto T52_1;
+ else goto N52_3;
+
+T52_1:
+ response = 4.07731e-06;
+ goto D52;
+
+N52_3:
+ if ISTITLE_AVG < 0.7 then goto T52_2;
+ else goto T52_3;
+
+T52_2:
+ response = 0.0180989;
+ goto D52;
+
+T52_3:
+ response = 0.0014322;
+ goto D52;
+
+N52_4:
+ if MAX_SCORE < 378124 then goto N52_5;
+ else goto T52_7;
+
+N52_5:
+ if INTLNEWS < 0.25 then goto T52_4;
+ else goto N52_6;
+
+T52_4:
+ response = -0.00926901;
+ goto D52;
+
+N52_6:
+ if ISTITLE_AVG < 0.15 then goto T52_5;
+ else goto T52_6;
+
+T52_5:
+ response = 0.00951019;
+ goto D52;
+
+T52_6:
+ response = -0.00389496;
+ goto D52;
+
+T52_7:
+ response = -0.0168153;
+ goto D52;
+
+D52:
+
+tnscore = tnscore + response;
+
+/* Tree 54 of 97 */
+/* N terminal nodes = 7, Depth = 4 */
+
+
+N53_1:
+ if ISABSTRACT_AVG < 0.815 then goto N53_2;
+ else goto N53_5;
+
+N53_2:
+ if PUB_TODAY_AVG < 0.05 then goto N53_3;
+ else goto N53_4;
+
+N53_3:
+ if PREV_DAY_HITS < 16.5 then goto T53_1;
+ else goto T53_2;
+
+T53_1:
+ response = -0.00256108;
+ goto D53;
+
+T53_2:
+ response = 0.010687;
+ goto D53;
+
+N53_4:
+ if BUSINESS < 0.05 then goto T53_3;
+ else goto T53_4;
+
+T53_3:
+ response = 0.00107951;
+ goto D53;
+
+T53_4:
+ response = -0.00114831;
+ goto D53;
+
+N53_5:
+ if AVG_RANK < 8.31 then goto T53_5;
+ else goto N53_6;
+
+T53_5:
+ response = -0.000490289;
+ goto D53;
+
+N53_6:
+ if SPORTS < 0.315 then goto T53_6;
+ else goto T53_7;
+
+T53_6:
+ response = 0.00273855;
+ goto D53;
+
+T53_7:
+ response = 0.0123011;
+ goto D53;
+
+D53:
+
+tnscore = tnscore + response;
+
+/* Tree 55 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N54_1:
+ if SUPERDUPER_AVG < 0.115 then goto N54_2;
+ else goto N54_3;
+
+N54_2:
+ if DAY_PD_HITS_RATIO < 0.115 then goto T54_1;
+ else goto T54_2;
+
+T54_1:
+ response = 0.0119548;
+ goto D54;
+
+T54_2:
+ response = 0.000425021;
+ goto D54;
+
+N54_3:
+ if INTLNEWS < 0.155 then goto N54_4;
+ else goto N54_6;
+
+N54_4:
+ if ISTITLE_AVG < 0.185 then goto N54_5;
+ else goto T54_5;
+
+N54_5:
+ if INTLNEWS < 0.05 then goto T54_3;
+ else goto T54_4;
+
+T54_3:
+ response = -0.00395117;
+ goto D54;
+
+T54_4:
+ response = -0.0145832;
+ goto D54;
+
+T54_5:
+ response = -0.00135759;
+ goto D54;
+
+N54_6:
+ if TOPSTORY < 0.295 then goto T54_6;
+ else goto T54_7;
+
+T54_6:
+ response = -0.00119962;
+ goto D54;
+
+T54_7:
+ response = 0.00380053;
+ goto D54;
+
+D54:
+
+tnscore = tnscore + response;
+
+/* Tree 56 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N55_1:
+ if MAX_SCORE < 187608 then goto T55_1;
+ else goto N55_2;
+
+T55_1:
+ response = -0.00129909;
+ goto D55;
+
+N55_2:
+ if DAY_WEEK_AVG_RATIO < 10.06 then goto N55_3;
+ else goto T55_7;
+
+N55_3:
+ if DAY_WEEK_AVG_RATIO < 9.235 then goto N55_4;
+ else goto T55_6;
+
+N55_4:
+ if DAY_PD_HITS_RATIO < 0.055 then goto T55_2;
+ else goto N55_5;
+
+T55_2:
+ response = 0.0114518;
+ goto D55;
+
+N55_5:
+ if PREV_DAY_HITS < 26.5 then goto T55_3;
+ else goto N55_6;
+
+T55_3:
+ response = 0.000965212;
+ goto D55;
+
+N55_6:
+ if LOCALNEWS < 0.05 then goto T55_4;
+ else goto T55_5;
+
+T55_4:
+ response = -0.00805593;
+ goto D55;
+
+T55_5:
+ response = 0.00585007;
+ goto D55;
+
+T55_6:
+ response = -0.0101744;
+ goto D55;
+
+T55_7:
+ response = 0.010206;
+ goto D55;
+
+D55:
+
+tnscore = tnscore + response;
+
+/* Tree 57 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N56_1:
+ if DAY_LW_DAY_HITS_RATIO < 4.71 then goto N56_2;
+ else goto N56_3;
+
+N56_2:
+ if SUPERDUPER_AVG < 0.315 then goto T56_1;
+ else goto T56_2;
+
+T56_1:
+ response = -0.000236511;
+ goto D56;
+
+T56_2:
+ response = -0.00312389;
+ goto D56;
+
+N56_3:
+ if DAY_PD_HITS_RATIO < 13.5 then goto N56_4;
+ else goto N56_5;
+
+N56_4:
+ if MAX_SCORE < 253372 then goto T56_3;
+ else goto T56_4;
+
+T56_3:
+ response = -0.00118965;
+ goto D56;
+
+T56_4:
+ response = 0.00291415;
+ goto D56;
+
+N56_5:
+ if ENTERTAINMENT < 0.05 then goto N56_6;
+ else goto T56_7;
+
+N56_6:
+ if SPORTS < 0.315 then goto T56_5;
+ else goto T56_6;
+
+T56_5:
+ response = -0.00292663;
+ goto D56;
+
+T56_6:
+ response = -0.0194296;
+ goto D56;
+
+T56_7:
+ response = 0.00658386;
+ goto D56;
+
+D56:
+
+tnscore = tnscore + response;
+
+/* Tree 58 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N57_1:
+ if DAY_PREV_DAY_HITS_FRAC < 0.405 then goto N57_2;
+ else goto N57_3;
+
+N57_2:
+ if WEEKAVG < 0.5 then goto T57_1;
+ else goto T57_2;
+
+T57_1:
+ response = 0.0176531;
+ goto D57;
+
+T57_2:
+ response = 0.000911096;
+ goto D57;
+
+N57_3:
+ if SPORTS < 0.39 then goto T57_3;
+ else goto N57_4;
+
+T57_3:
+ response = -0.000682618;
+ goto D57;
+
+N57_4:
+ if DAY_PD_HITS_RATIO < 11.5 then goto N57_5;
+ else goto T57_7;
+
+N57_5:
+ if DAY_LW_DAY_HITS_RATIO < 4.75 then goto T57_4;
+ else goto N57_6;
+
+T57_4:
+ response = 0.000516855;
+ goto D57;
+
+N57_6:
+ if DAY_HITS_FRAC < 0.41 then goto T57_5;
+ else goto T57_6;
+
+T57_5:
+ response = 0.0159972;
+ goto D57;
+
+T57_6:
+ response = 0.00324363;
+ goto D57;
+
+T57_7:
+ response = -0.0108331;
+ goto D57;
+
+D57:
+
+tnscore = tnscore + response;
+
+/* Tree 59 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N58_1:
+ if WEEKAVG < 0.36 then goto T58_1;
+ else goto N58_2;
+
+T58_1:
+ response = -0.00257521;
+ goto D58;
+
+N58_2:
+ if TOPSTORY < 0.635 then goto N58_3;
+ else goto T58_7;
+
+N58_3:
+ if PUB_TODAY_AVG < 0.79 then goto N58_4;
+ else goto T58_6;
+
+N58_4:
+ if DAY_PD_HITS_RATIO < 2.185 then goto N58_5;
+ else goto N58_6;
+
+N58_5:
+ if DAY_PREV_DAY_HITS_FRAC < 0.415 then goto T58_2;
+ else goto T58_3;
+
+T58_2:
+ response = 0.00607155;
+ goto D58;
+
+T58_3:
+ response = -0.000126015;
+ goto D58;
+
+N58_6:
+ if ISTITLE_AVG < 0.13 then goto T58_4;
+ else goto T58_5;
+
+T58_4:
+ response = -0.00728662;
+ goto D58;
+
+T58_5:
+ response = -0.000928754;
+ goto D58;
+
+T58_6:
+ response = 0.00147343;
+ goto D58;
+
+T58_7:
+ response = 0.014873;
+ goto D58;
+
+D58:
+
+tnscore = tnscore + response;
+
+/* Tree 60 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N59_1:
+ if TOPSTORY < 0.185 then goto T59_1;
+ else goto N59_2;
+
+T59_1:
+ response = -0.000297667;
+ goto D59;
+
+N59_2:
+ if DAY_LW_DAY_HITS_RATIO < 6.3 then goto N59_3;
+ else goto T59_7;
+
+N59_3:
+ if PREV_DAY_HITS < 19.5 then goto N59_4;
+ else goto N59_6;
+
+N59_4:
+ if DAY_PD_HITS_RATIO < 0.13 then goto T59_2;
+ else goto N59_5;
+
+T59_2:
+ response = -0.00702476;
+ goto D59;
+
+N59_5:
+ if LOCALNEWS < 0.05 then goto T59_3;
+ else goto T59_4;
+
+T59_3:
+ response = 0.00592136;
+ goto D59;
+
+T59_4:
+ response = -0.000783801;
+ goto D59;
+
+N59_6:
+ if ISABSTRACT_AVG < 0.15 then goto T59_5;
+ else goto T59_6;
+
+T59_5:
+ response = 0.023326;
+ goto D59;
+
+T59_6:
+ response = 0.000803551;
+ goto D59;
+
+T59_7:
+ response = -0.00103664;
+ goto D59;
+
+D59:
+
+tnscore = tnscore + response;
+
+/* Tree 61 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N60_1:
+ if WEEKAVG < 0.215 then goto T60_1;
+ else goto N60_2;
+
+T60_1:
+ response = -0.00379646;
+ goto D60;
+
+N60_2:
+ if MAX_MIN_RANK < 3 then goto N60_3;
+ else goto T60_7;
+
+N60_3:
+ if DAY_HITS < 1.5 then goto T60_2;
+ else goto N60_4;
+
+T60_2:
+ response = -0.00199037;
+ goto D60;
+
+N60_4:
+ if DAY_PD_HITS_RATIO < 1.125 then goto N60_5;
+ else goto T60_6;
+
+N60_5:
+ if DAY_LW_DAY_HITS_RATIO < 4.375 then goto T60_3;
+ else goto N60_6;
+
+T60_3:
+ response = 0.00535447;
+ goto D60;
+
+N60_6:
+ if DAY_PD_HITS_RATIO < 0.825 then goto T60_4;
+ else goto T60_5;
+
+T60_4:
+ response = 0.00562457;
+ goto D60;
+
+T60_5:
+ response = 0.0330072;
+ goto D60;
+
+T60_6:
+ response = 0.00138881;
+ goto D60;
+
+T60_7:
+ response = -0.000758841;
+ goto D60;
+
+D60:
+
+tnscore = tnscore + response;
+
+/* Tree 62 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N61_1:
+ if DAY_HITS_FRAC < 0.435 then goto N61_2;
+ else goto N61_6;
+
+N61_2:
+ if NATIONALNEWS < 0.685 then goto N61_3;
+ else goto T61_5;
+
+N61_3:
+ if INTLNEWS < 0.47 then goto T61_1;
+ else goto N61_4;
+
+T61_1:
+ response = -0.000235511;
+ goto D61;
+
+N61_4:
+ if MAX_SCORE < 290762 then goto T61_2;
+ else goto N61_5;
+
+T61_2:
+ response = 0.000619978;
+ goto D61;
+
+N61_5:
+ if SUPERDUPER_AVG < 0.155 then goto T61_3;
+ else goto T61_4;
+
+T61_3:
+ response = 0.0182407;
+ goto D61;
+
+T61_4:
+ response = 0.00521312;
+ goto D61;
+
+T61_5:
+ response = 0.0140779;
+ goto D61;
+
+N61_6:
+ if MAX_SCORE < 484643 then goto T61_6;
+ else goto T61_7;
+
+T61_6:
+ response = -0.000518234;
+ goto D61;
+
+T61_7:
+ response = -0.00804112;
+ goto D61;
+
+D61:
+
+tnscore = tnscore + response;
+
+/* Tree 63 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N62_1:
+ if DAY_PD_HITS_RATIO < 0.055 then goto T62_1;
+ else goto N62_2;
+
+T62_1:
+ response = 0.0111333;
+ goto D62;
+
+N62_2:
+ if DAY_LW_DAY_HITS_RATIO < 0.355 then goto T62_2;
+ else goto N62_3;
+
+T62_2:
+ response = -0.00829529;
+ goto D62;
+
+N62_3:
+ if PUB_TODAY_AVG < 0.95 then goto N62_4;
+ else goto N62_6;
+
+N62_4:
+ if DAY_PD_HITS_RATIO < 1.74 then goto T62_3;
+ else goto N62_5;
+
+T62_3:
+ response = 0.000529497;
+ goto D62;
+
+N62_5:
+ if NATIONALNEWS < 0.415 then goto T62_4;
+ else goto T62_5;
+
+T62_4:
+ response = -0.00200727;
+ goto D62;
+
+T62_5:
+ response = 0.0081622;
+ goto D62;
+
+N62_6:
+ if INTLNEWS < 0.47 then goto T62_6;
+ else goto T62_7;
+
+T62_6:
+ response = 0.00260098;
+ goto D62;
+
+T62_7:
+ response = -0.001284;
+ goto D62;
+
+D62:
+
+tnscore = tnscore + response;
+
+/* Tree 64 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N63_1:
+ if MAX_MIN_RANK < 7 then goto N63_2;
+ else goto N63_4;
+
+N63_2:
+ if MAX_MIN_RANK < 5 then goto T63_1;
+ else goto N63_3;
+
+T63_1:
+ response = 0.000381058;
+ goto D63;
+
+N63_3:
+ if SUPERDUPER_AVG < 0.13 then goto T63_2;
+ else goto T63_3;
+
+T63_2:
+ response = 0.000329065;
+ goto D63;
+
+T63_3:
+ response = -0.00386397;
+ goto D63;
+
+N63_4:
+ if MAX_SCORE < 266105 then goto T63_4;
+ else goto N63_5;
+
+T63_4:
+ response = -0.000580382;
+ goto D63;
+
+N63_5:
+ if MAX_SCORE < 322321 then goto N63_6;
+ else goto T63_7;
+
+N63_6:
+ if DAY_WEEK_AVG_RATIO < 5 then goto T63_5;
+ else goto T63_6;
+
+T63_5:
+ response = 0.0158417;
+ goto D63;
+
+T63_6:
+ response = 0.000253264;
+ goto D63;
+
+T63_7:
+ response = 0.00216101;
+ goto D63;
+
+D63:
+
+tnscore = tnscore + response;
+
+/* Tree 65 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N64_1:
+ if WEEKAVG < 0.215 then goto N64_2;
+ else goto N64_4;
+
+N64_2:
+ if SUPERDUPER_AVG < 0.5 then goto N64_3;
+ else goto T64_3;
+
+N64_3:
+ if LOCALNEWS < 0.5 then goto T64_1;
+ else goto T64_2;
+
+T64_1:
+ response = -0.00995113;
+ goto D64;
+
+T64_2:
+ response = 0.00292683;
+ goto D64;
+
+T64_3:
+ response = 0.00105182;
+ goto D64;
+
+N64_4:
+ if DAY_PREV_DAY_HITS_FRAC < 0.725 then goto T64_4;
+ else goto N64_5;
+
+T64_4:
+ response = -0.000575584;
+ goto D64;
+
+N64_5:
+ if TOPSTORY < 0.585 then goto N64_6;
+ else goto T64_7;
+
+N64_6:
+ if SPORTS < 0.87 then goto T64_5;
+ else goto T64_6;
+
+T64_5:
+ response = 0.000783846;
+ goto D64;
+
+T64_6:
+ response = 0.00745576;
+ goto D64;
+
+T64_7:
+ response = 0.0129932;
+ goto D64;
+
+D64:
+
+tnscore = tnscore + response;
+
+/* Tree 66 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N65_1:
+ if HEALTH < 0.115 then goto N65_2;
+ else goto N65_6;
+
+N65_2:
+ if DAY_PD_HITS_RATIO < 25.165 then goto N65_3;
+ else goto N65_5;
+
+N65_3:
+ if DAY_WEEK_AVG_RATIO < 10.115 then goto N65_4;
+ else goto T65_3;
+
+N65_4:
+ if DAY_PREV_DAY_HITS_FRAC < 0.405 then goto T65_1;
+ else goto T65_2;
+
+T65_1:
+ response = 0.00322116;
+ goto D65;
+
+T65_2:
+ response = -0.00014541;
+ goto D65;
+
+T65_3:
+ response = 0.00878821;
+ goto D65;
+
+N65_5:
+ if DAY_PREV_DAY_HITS_FRAC < 0.975 then goto T65_4;
+ else goto T65_5;
+
+T65_4:
+ response = -0.0149181;
+ goto D65;
+
+T65_5:
+ response = -0.00209673;
+ goto D65;
+
+N65_6:
+ if MAX_SCORE < 286434 then goto T65_6;
+ else goto T65_7;
+
+T65_6:
+ response = -0.00861656;
+ goto D65;
+
+T65_7:
+ response = 0.00142851;
+ goto D65;
+
+D65:
+
+tnscore = tnscore + response;
+
+/* Tree 67 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N66_1:
+ if LIFESTYLE < 0.185 then goto N66_2;
+ else goto N66_6;
+
+N66_2:
+ if MISC < 0.105 then goto N66_3;
+ else goto T66_5;
+
+N66_3:
+ if DAY_LW_DAY_HITS_RATIO < 0.925 then goto N66_4;
+ else goto T66_4;
+
+N66_4:
+ if MAX_SCORE < 273352 then goto N66_5;
+ else goto T66_3;
+
+N66_5:
+ if ISTITLE_AVG < 0.39 then goto T66_1;
+ else goto T66_2;
+
+T66_1:
+ response = 0.00606893;
+ goto D66;
+
+T66_2:
+ response = -0.00394074;
+ goto D66;
+
+T66_3:
+ response = -0.012762;
+ goto D66;
+
+T66_4:
+ response = -8.2932e-05;
+ goto D66;
+
+T66_5:
+ response = 0.00878689;
+ goto D66;
+
+N66_6:
+ if MAX_SCORE < 250603 then goto T66_6;
+ else goto T66_7;
+
+T66_6:
+ response = -0.00131893;
+ goto D66;
+
+T66_7:
+ response = -0.0107682;
+ goto D66;
+
+D66:
+
+tnscore = tnscore + response;
+
+/* Tree 68 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N67_1:
+ if DAY_HITS < 13.5 then goto T67_1;
+ else goto N67_2;
+
+T67_1:
+ response = 0.000103863;
+ goto D67;
+
+N67_2:
+ if ENTERTAINMENT < 0.415 then goto N67_3;
+ else goto T67_7;
+
+N67_3:
+ if MIN_RANK < 7 then goto N67_4;
+ else goto N67_6;
+
+N67_4:
+ if DAY_PREV_DAY_HITS_FRAC < 0.875 then goto N67_5;
+ else goto T67_4;
+
+N67_5:
+ if MAX_SCORE < 261175 then goto T67_2;
+ else goto T67_3;
+
+T67_2:
+ response = -0.00601924;
+ goto D67;
+
+T67_3:
+ response = 0.00517774;
+ goto D67;
+
+T67_4:
+ response = -0.00731704;
+ goto D67;
+
+N67_6:
+ if ISABSTRACT_AVG < 0.685 then goto T67_5;
+ else goto T67_6;
+
+T67_5:
+ response = -0.00243371;
+ goto D67;
+
+T67_6:
+ response = 0.0102497;
+ goto D67;
+
+T67_7:
+ response = 0.0109447;
+ goto D67;
+
+D67:
+
+tnscore = tnscore + response;
+
+/* Tree 69 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N68_1:
+ if DAY_WEEK_AVG_RATIO < 4.855 then goto N68_2;
+ else goto N68_3;
+
+N68_2:
+ if DAY_WEEK_AVG_RATIO < 4.625 then goto T68_1;
+ else goto T68_2;
+
+T68_1:
+ response = 0.000335357;
+ goto D68;
+
+T68_2:
+ response = 0.00823829;
+ goto D68;
+
+N68_3:
+ if NATIONALNEWS < 0.39 then goto N68_4;
+ else goto T68_7;
+
+N68_4:
+ if ISABSTRACT_AVG < 0.295 then goto N68_5;
+ else goto T68_6;
+
+N68_5:
+ if INTLNEWS < 0.95 then goto N68_6;
+ else goto T68_5;
+
+N68_6:
+ if NATIONALNEWS < 0.155 then goto T68_3;
+ else goto T68_4;
+
+T68_3:
+ response = -0.00576638;
+ goto D68;
+
+T68_4:
+ response = 0.00314375;
+ goto D68;
+
+T68_5:
+ response = 0.0086362;
+ goto D68;
+
+T68_6:
+ response = 0.00191928;
+ goto D68;
+
+T68_7:
+ response = -0.0129199;
+ goto D68;
+
+D68:
+
+tnscore = tnscore + response;
+
+/* Tree 70 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N69_1:
+ if WEEKAVG < 1.07 then goto T69_1;
+ else goto N69_2;
+
+T69_1:
+ response = -0.000688613;
+ goto D69;
+
+N69_2:
+ if INTLNEWS < 0.355 then goto N69_3;
+ else goto N69_5;
+
+N69_3:
+ if POLITICS < 0.05 then goto N69_4;
+ else goto T69_4;
+
+N69_4:
+ if PREV_DAY_HITS < 33.5 then goto T69_2;
+ else goto T69_3;
+
+T69_2:
+ response = 0.000833826;
+ goto D69;
+
+T69_3:
+ response = -0.0106428;
+ goto D69;
+
+T69_4:
+ response = -0.0100621;
+ goto D69;
+
+N69_5:
+ if DAY_HITS < 5.5 then goto N69_6;
+ else goto T69_7;
+
+N69_6:
+ if DAY_PD_HITS_RATIO < 0.105 then goto T69_5;
+ else goto T69_6;
+
+T69_5:
+ response = -0.00557824;
+ goto D69;
+
+T69_6:
+ response = 0.0173808;
+ goto D69;
+
+T69_7:
+ response = 0.00181211;
+ goto D69;
+
+D69:
+
+tnscore = tnscore + response;
+
+/* Tree 71 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N70_1:
+ if PUB_TODAY_AVG < 0.815 then goto T70_1;
+ else goto N70_2;
+
+T70_1:
+ response = -0.000647154;
+ goto D70;
+
+N70_2:
+ if DAY_PD_HITS_RATIO < 1.53 then goto T70_2;
+ else goto N70_3;
+
+T70_2:
+ response = -0.00676558;
+ goto D70;
+
+N70_3:
+ if SCIENCE < 0.05 then goto N70_4;
+ else goto T70_7;
+
+N70_4:
+ if PREV_DAY_HITS < 5.5 then goto N70_5;
+ else goto T70_6;
+
+N70_5:
+ if SUPERDUPER_AVG < 0.27 then goto N70_6;
+ else goto T70_5;
+
+N70_6:
+ if ISTITLE_AVG < 0.95 then goto T70_3;
+ else goto T70_4;
+
+T70_3:
+ response = 0.00474205;
+ goto D70;
+
+T70_4:
+ response = -0.00112826;
+ goto D70;
+
+T70_5:
+ response = -0.00194945;
+ goto D70;
+
+T70_6:
+ response = 0.00633626;
+ goto D70;
+
+T70_7:
+ response = 0.0126675;
+ goto D70;
+
+D70:
+
+tnscore = tnscore + response;
+
+/* Tree 72 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N71_1:
+ if MAX_SCORE < 347896 then goto N71_2;
+ else goto N71_3;
+
+N71_2:
+ if NUM_WORDS < 2.5 then goto T71_1;
+ else goto T71_2;
+
+T71_1:
+ response = -8.59477e-05;
+ goto D71;
+
+T71_2:
+ response = -0.00464466;
+ goto D71;
+
+N71_3:
+ if LOCALNEWS < 0.105 then goto N71_4;
+ else goto N71_5;
+
+N71_4:
+ if PREV_DAY_HITS < 17.5 then goto T71_3;
+ else goto T71_4;
+
+T71_3:
+ response = 0.000861947;
+ goto D71;
+
+T71_4:
+ response = -0.00908692;
+ goto D71;
+
+N71_5:
+ if SUPERDUPER_AVG < 0.415 then goto N71_6;
+ else goto T71_7;
+
+N71_6:
+ if SPORTS < 0.125 then goto T71_5;
+ else goto T71_6;
+
+T71_5:
+ response = 0.00451276;
+ goto D71;
+
+T71_6:
+ response = 0.0182081;
+ goto D71;
+
+T71_7:
+ response = -0.0128104;
+ goto D71;
+
+D71:
+
+tnscore = tnscore + response;
+
+/* Tree 73 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N72_1:
+ if SCIENCE < 0.365 then goto N72_2;
+ else goto T72_7;
+
+N72_2:
+ if MAX_SCORE < 588664 then goto N72_3;
+ else goto T72_6;
+
+N72_3:
+ if SUPERDUPER_AVG < 0.115 then goto T72_1;
+ else goto N72_4;
+
+T72_1:
+ response = 0.000473474;
+ goto D72;
+
+N72_4:
+ if MAX_SCORE < 282998 then goto T72_2;
+ else goto N72_5;
+
+T72_2:
+ response = -0.00203992;
+ goto D72;
+
+N72_5:
+ if SCIENCE < 0.105 then goto N72_6;
+ else goto T72_5;
+
+N72_6:
+ if SPORTS < 0.465 then goto T72_3;
+ else goto T72_4;
+
+T72_3:
+ response = 0.00173095;
+ goto D72;
+
+T72_4:
+ response = -0.00632811;
+ goto D72;
+
+T72_5:
+ response = -0.013829;
+ goto D72;
+
+T72_6:
+ response = -0.0095913;
+ goto D72;
+
+T72_7:
+ response = -0.00990551;
+ goto D72;
+
+D72:
+
+tnscore = tnscore + response;
+
+/* Tree 74 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N73_1:
+ if NATIONALNEWS < 0.105 then goto T73_1;
+ else goto N73_2;
+
+T73_1:
+ response = -0.00062577;
+ goto D73;
+
+N73_2:
+ if SPORTS < 0.13 then goto N73_3;
+ else goto N73_5;
+
+N73_3:
+ if DAY_WEEK_AVG_RATIO < 9.235 then goto N73_4;
+ else goto T73_4;
+
+N73_4:
+ if DAY_WEEK_AVG_RATIO < 0.505 then goto T73_2;
+ else goto T73_3;
+
+T73_2:
+ response = 0.00990844;
+ goto D73;
+
+T73_3:
+ response = 0.00024663;
+ goto D73;
+
+T73_4:
+ response = -0.0117063;
+ goto D73;
+
+N73_5:
+ if MAX_SCORE < 277259 then goto N73_6;
+ else goto T73_7;
+
+N73_6:
+ if DAY_WEEK_AVG_RATIO < 1.955 then goto T73_5;
+ else goto T73_6;
+
+T73_5:
+ response = -0.00262119;
+ goto D73;
+
+T73_6:
+ response = 0.0102735;
+ goto D73;
+
+T73_7:
+ response = 0.0198781;
+ goto D73;
+
+D73:
+
+tnscore = tnscore + response;
+
+/* Tree 75 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N74_1:
+ if MAX_SCORE < 382346 then goto N74_2;
+ else goto N74_3;
+
+N74_2:
+ if MAX_SCORE < 378950 then goto T74_1;
+ else goto T74_2;
+
+T74_1:
+ response = -0.000201382;
+ goto D74;
+
+T74_2:
+ response = -0.0116932;
+ goto D74;
+
+N74_3:
+ if MAX_SCORE < 385719 then goto T74_3;
+ else goto N74_4;
+
+T74_3:
+ response = 0.0202474;
+ goto D74;
+
+N74_4:
+ if AVG_RANK < 8.27 then goto N74_5;
+ else goto N74_6;
+
+N74_5:
+ if PREV_DAY_HITS < 9.5 then goto T74_4;
+ else goto T74_5;
+
+T74_4:
+ response = 0.000303439;
+ goto D74;
+
+T74_5:
+ response = -0.0119779;
+ goto D74;
+
+N74_6:
+ if ENTERTAINMENT < 0.315 then goto T74_6;
+ else goto T74_7;
+
+T74_6:
+ response = 0.00225595;
+ goto D74;
+
+T74_7:
+ response = 0.0201995;
+ goto D74;
+
+D74:
+
+tnscore = tnscore + response;
+
+/* Tree 76 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N75_1:
+ if DAY_PD_HITS_RATIO < 47 then goto N75_2;
+ else goto T75_7;
+
+N75_2:
+ if DAY_PD_HITS_RATIO < 27.25 then goto N75_3;
+ else goto T75_6;
+
+N75_3:
+ if DAY_LW_DAY_HITS_RATIO < 31.5 then goto N75_4;
+ else goto N75_5;
+
+N75_4:
+ if LOCALNEWS < 0.765 then goto T75_1;
+ else goto T75_2;
+
+T75_1:
+ response = -0.000399432;
+ goto D75;
+
+T75_2:
+ response = 0.00362509;
+ goto D75;
+
+N75_5:
+ if DAY_WEEK_AVG_RATIO < 3.98 then goto T75_3;
+ else goto N75_6;
+
+T75_3:
+ response = -0.00769823;
+ goto D75;
+
+N75_6:
+ if DAY_WEEK_AVG_RATIO < 5.4 then goto T75_4;
+ else goto T75_5;
+
+T75_4:
+ response = 0.018687;
+ goto D75;
+
+T75_5:
+ response = 0.00240302;
+ goto D75;
+
+T75_6:
+ response = -0.00932172;
+ goto D75;
+
+T75_7:
+ response = 0.00902439;
+ goto D75;
+
+D75:
+
+tnscore = tnscore + response;
+
+/* Tree 77 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N76_1:
+ if LW_DAY_HITS < 2.5 then goto N76_2;
+ else goto T76_7;
+
+N76_2:
+ if MIN_RANK < 7 then goto T76_1;
+ else goto N76_3;
+
+T76_1:
+ response = -0.000157235;
+ goto D76;
+
+N76_3:
+ if DAY_PREV_DAY_HITS_FRAC < 0.555 then goto N76_4;
+ else goto N76_5;
+
+N76_4:
+ if INTLNEWS < 0.225 then goto T76_2;
+ else goto T76_3;
+
+T76_2:
+ response = 0.00402381;
+ goto D76;
+
+T76_3:
+ response = 0.0196079;
+ goto D76;
+
+N76_5:
+ if DAY_WEEK_AVG_RATIO < 6.01 then goto N76_6;
+ else goto T76_6;
+
+N76_6:
+ if DAY_WEEK_AVG_RATIO < 3.805 then goto T76_4;
+ else goto T76_5;
+
+T76_4:
+ response = 0.000236934;
+ goto D76;
+
+T76_5:
+ response = 0.00609122;
+ goto D76;
+
+T76_6:
+ response = -0.0040371;
+ goto D76;
+
+T76_7:
+ response = -0.00301446;
+ goto D76;
+
+D76:
+
+tnscore = tnscore + response;
+
+/* Tree 78 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N77_1:
+ if DAY_PD_HITS_RATIO < 0.085 then goto N77_2;
+ else goto N77_3;
+
+N77_2:
+ if AVG_RANK < 8.9 then goto T77_1;
+ else goto T77_2;
+
+T77_1:
+ response = 0.0124264;
+ goto D77;
+
+T77_2:
+ response = 0.00018696;
+ goto D77;
+
+N77_3:
+ if PREV_DAY_HITS < 26.5 then goto N77_4;
+ else goto N77_6;
+
+N77_4:
+ if PREV_DAY_HITS < 19.5 then goto T77_3;
+ else goto N77_5;
+
+T77_3:
+ response = -3.01051e-05;
+ goto D77;
+
+N77_5:
+ if MAX_SCORE < 294152 then goto T77_4;
+ else goto T77_5;
+
+T77_4:
+ response = 0.010709;
+ goto D77;
+
+T77_5:
+ response = -0.00331498;
+ goto D77;
+
+N77_6:
+ if WEEKAVG < 10.785 then goto T77_6;
+ else goto T77_7;
+
+T77_6:
+ response = -0.0102542;
+ goto D77;
+
+T77_7:
+ response = -0.00013417;
+ goto D77;
+
+D77:
+
+tnscore = tnscore + response;
+
+/* Tree 79 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N78_1:
+ if DAY_WEEK_AVG_RATIO < 10.06 then goto N78_2;
+ else goto T78_7;
+
+N78_2:
+ if DAY_WEEK_AVG_RATIO < 4.835 then goto T78_1;
+ else goto N78_3;
+
+T78_1:
+ response = -7.25075e-05;
+ goto D78;
+
+N78_3:
+ if ENTERTAINMENT < 0.05 then goto N78_4;
+ else goto T78_6;
+
+N78_4:
+ if ISABSTRACT_AVG < 0.83 then goto N78_5;
+ else goto T78_5;
+
+N78_5:
+ if INTLNEWS < 0.635 then goto T78_2;
+ else goto N78_6;
+
+T78_2:
+ response = -0.00614185;
+ goto D78;
+
+N78_6:
+ if MAX_MIN_RANK < 1 then goto T78_3;
+ else goto T78_4;
+
+T78_3:
+ response = -0.00349054;
+ goto D78;
+
+T78_4:
+ response = 0.0109974;
+ goto D78;
+
+T78_5:
+ response = 0.0043811;
+ goto D78;
+
+T78_6:
+ response = 0.00439437;
+ goto D78;
+
+T78_7:
+ response = 0.00656011;
+ goto D78;
+
+D78:
+
+tnscore = tnscore + response;
+
+/* Tree 80 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N79_1:
+ if INTLNEWS < 0.185 then goto N79_2;
+ else goto T79_7;
+
+N79_2:
+ if INTLNEWS < 0.115 then goto N79_3;
+ else goto T79_6;
+
+N79_3:
+ if DAY_PD_HITS_RATIO < 0.105 then goto T79_1;
+ else goto N79_4;
+
+T79_1:
+ response = 0.016428;
+ goto D79;
+
+N79_4:
+ if AVG_RANK < 8.365 then goto N79_5;
+ else goto T79_5;
+
+N79_5:
+ if ISTITLE_AVG < 0.435 then goto N79_6;
+ else goto T79_4;
+
+N79_6:
+ if DAY_PD_HITS_RATIO < 11.25 then goto T79_2;
+ else goto T79_3;
+
+T79_2:
+ response = 0.00358374;
+ goto D79;
+
+T79_3:
+ response = -0.0162655;
+ goto D79;
+
+T79_4:
+ response = -0.000799475;
+ goto D79;
+
+T79_5:
+ response = -0.00198299;
+ goto D79;
+
+T79_6:
+ response = -0.00662532;
+ goto D79;
+
+T79_7:
+ response = 0.00087188;
+ goto D79;
+
+D79:
+
+tnscore = tnscore + response;
+
+/* Tree 81 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N80_1:
+ if WEEKAVG < 0.64 then goto T80_1;
+ else goto N80_2;
+
+T80_1:
+ response = -0.00047051;
+ goto D80;
+
+N80_2:
+ if DAY_PREV_DAY_HITS_FRAC < 0.965 then goto T80_2;
+ else goto N80_3;
+
+T80_2:
+ response = 0.000224471;
+ goto D80;
+
+N80_3:
+ if BUSINESS < 0.53 then goto N80_4;
+ else goto N80_6;
+
+N80_4:
+ if LOCALNEWS < 0.27 then goto T80_3;
+ else goto N80_5;
+
+T80_3:
+ response = 0.00103714;
+ goto D80;
+
+N80_5:
+ if NATIONALNEWS < 0.155 then goto T80_4;
+ else goto T80_5;
+
+T80_4:
+ response = 0.0140465;
+ goto D80;
+
+T80_5:
+ response = -0.00245531;
+ goto D80;
+
+N80_6:
+ if BUSINESS < 0.645 then goto T80_6;
+ else goto T80_7;
+
+T80_6:
+ response = 0.0237968;
+ goto D80;
+
+T80_7:
+ response = 0.00736313;
+ goto D80;
+
+D80:
+
+tnscore = tnscore + response;
+
+/* Tree 82 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N81_1:
+ if LOCALNEWS < 0.05 then goto T81_1;
+ else goto N81_2;
+
+T81_1:
+ response = -0.000573509;
+ goto D81;
+
+N81_2:
+ if MAX_SCORE < 253515 then goto T81_2;
+ else goto N81_3;
+
+T81_2:
+ response = -0.00114612;
+ goto D81;
+
+N81_3:
+ if SUPERDUPER_AVG < 0.315 then goto N81_4;
+ else goto T81_7;
+
+N81_4:
+ if MAX_SCORE < 255248 then goto T81_3;
+ else goto N81_5;
+
+T81_3:
+ response = 0.0174812;
+ goto D81;
+
+N81_5:
+ if PUB_TODAY_AVG < 0.05 then goto T81_4;
+ else goto N81_6;
+
+T81_4:
+ response = -0.00327708;
+ goto D81;
+
+N81_6:
+ if DAY_PD_HITS_RATIO < 0.425 then goto T81_5;
+ else goto T81_6;
+
+T81_5:
+ response = 0.0118621;
+ goto D81;
+
+T81_6:
+ response = 0.0033546;
+ goto D81;
+
+T81_7:
+ response = -0.00154643;
+ goto D81;
+
+D81:
+
+tnscore = tnscore + response;
+
+/* Tree 83 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N82_1:
+ if MISC < 0.105 then goto N82_2;
+ else goto T82_7;
+
+N82_2:
+ if DAY_WEEK_AVG_RATIO < 10.06 then goto N82_3;
+ else goto T82_6;
+
+N82_3:
+ if DAY_WEEK_AVG_RATIO < 9.235 then goto N82_4;
+ else goto T82_5;
+
+N82_4:
+ if MISC < 0.05 then goto N82_5;
+ else goto T82_4;
+
+N82_5:
+ if WEEKAVG < 0.215 then goto T82_1;
+ else goto N82_6;
+
+T82_1:
+ response = -0.00242466;
+ goto D82;
+
+N82_6:
+ if DAY_PREV_DAY_HITS_FRAC < 0.985 then goto T82_2;
+ else goto T82_3;
+
+T82_2:
+ response = -9.73666e-05;
+ goto D82;
+
+T82_3:
+ response = 0.00142133;
+ goto D82;
+
+T82_4:
+ response = -0.00753159;
+ goto D82;
+
+T82_5:
+ response = -0.00863543;
+ goto D82;
+
+T82_6:
+ response = 0.0086817;
+ goto D82;
+
+T82_7:
+ response = 0.0102311;
+ goto D82;
+
+D82:
+
+tnscore = tnscore + response;
+
+/* Tree 84 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N83_1:
+ if DAY_PD_HITS_RATIO < 47.5 then goto N83_2;
+ else goto N83_6;
+
+N83_2:
+ if DAY_PD_HITS_RATIO < 32.5 then goto N83_3;
+ else goto T83_5;
+
+N83_3:
+ if DAY_PREV_DAY_HITS_FRAC < 0.405 then goto N83_4;
+ else goto T83_4;
+
+N83_4:
+ if PUB_TODAY_AVG < 0.155 then goto N83_5;
+ else goto T83_3;
+
+N83_5:
+ if DAY_WEEK_AVG_RATIO < 0.67 then goto T83_1;
+ else goto T83_2;
+
+T83_1:
+ response = -0.00300426;
+ goto D83;
+
+T83_2:
+ response = 0.0155993;
+ goto D83;
+
+T83_3:
+ response = -0.000160259;
+ goto D83;
+
+T83_4:
+ response = -0.000324256;
+ goto D83;
+
+T83_5:
+ response = -0.0118989;
+ goto D83;
+
+N83_6:
+ if MAX_SCORE < 286380 then goto T83_6;
+ else goto T83_7;
+
+T83_6:
+ response = 0.0153486;
+ goto D83;
+
+T83_7:
+ response = 0.00067452;
+ goto D83;
+
+D83:
+
+tnscore = tnscore + response;
+
+/* Tree 85 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N84_1:
+ if SUPERDUPER_AVG < 0.105 then goto T84_1;
+ else goto N84_2;
+
+T84_1:
+ response = 0.00094466;
+ goto D84;
+
+N84_2:
+ if MAX_SCORE < 277301 then goto N84_3;
+ else goto T84_7;
+
+N84_3:
+ if ISABSTRACT_AVG < 0.39 then goto N84_4;
+ else goto T84_6;
+
+N84_4:
+ if DAY_PREV_DAY_HITS_FRAC < 0.795 then goto T84_2;
+ else goto N84_5;
+
+T84_2:
+ response = -0.000924515;
+ goto D84;
+
+N84_5:
+ if SPORTS < 0.275 then goto N84_6;
+ else goto T84_5;
+
+N84_6:
+ if ENTERTAINMENT < 0.05 then goto T84_3;
+ else goto T84_4;
+
+T84_3:
+ response = -0.00695203;
+ goto D84;
+
+T84_4:
+ response = 0.00896934;
+ goto D84;
+
+T84_5:
+ response = -0.0197272;
+ goto D84;
+
+T84_6:
+ response = 0.000167123;
+ goto D84;
+
+T84_7:
+ response = 0.00057116;
+ goto D84;
+
+D84:
+
+tnscore = tnscore + response;
+
+/* Tree 86 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N85_1:
+ if DAY_WEEK_AVG_RATIO < 3.635 then goto N85_2;
+ else goto N85_5;
+
+N85_2:
+ if DAY_LW_DAY_HITS_RATIO < 21.25 then goto N85_3;
+ else goto T85_4;
+
+N85_3:
+ if PREV_DAY_HITS < 19.5 then goto T85_1;
+ else goto N85_4;
+
+T85_1:
+ response = -0.000199036;
+ goto D85;
+
+N85_4:
+ if INTLNEWS < 0.25 then goto T85_2;
+ else goto T85_3;
+
+T85_2:
+ response = -0.00106649;
+ goto D85;
+
+T85_3:
+ response = 0.0081603;
+ goto D85;
+
+T85_4:
+ response = -0.0104178;
+ goto D85;
+
+N85_5:
+ if DAY_WEEK_AVG_RATIO < 3.845 then goto N85_6;
+ else goto T85_7;
+
+N85_6:
+ if DAY_HITS_FRAC < 0.94 then goto T85_5;
+ else goto T85_6;
+
+T85_5:
+ response = 0.00340684;
+ goto D85;
+
+T85_6:
+ response = 0.0209321;
+ goto D85;
+
+T85_7:
+ response = 0.00113853;
+ goto D85;
+
+D85:
+
+tnscore = tnscore + response;
+
+/* Tree 87 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N86_1:
+ if REGIONALNEWS < 0.275 then goto N86_2;
+ else goto T86_7;
+
+N86_2:
+ if LAW < 0.105 then goto N86_3;
+ else goto T86_6;
+
+N86_3:
+ if DAY_WEEK_AVG_RATIO < 6.01 then goto T86_1;
+ else goto N86_4;
+
+T86_1:
+ response = 0.000136175;
+ goto D86;
+
+N86_4:
+ if NATIONALNEWS < 0.31 then goto N86_5;
+ else goto T86_5;
+
+N86_5:
+ if DAY_HITS_FRAC < 0.895 then goto T86_2;
+ else goto N86_6;
+
+T86_2:
+ response = 0.00575266;
+ goto D86;
+
+N86_6:
+ if AVG_RANK < 7.53 then goto T86_3;
+ else goto T86_4;
+
+T86_3:
+ response = -0.0119194;
+ goto D86;
+
+T86_4:
+ response = -0.00112094;
+ goto D86;
+
+T86_5:
+ response = -0.0166441;
+ goto D86;
+
+T86_6:
+ response = 0.00794833;
+ goto D86;
+
+T86_7:
+ response = -0.0103064;
+ goto D86;
+
+D86:
+
+tnscore = tnscore + response;
+
+/* Tree 88 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N87_1:
+ if TOPSTORY < 0.315 then goto T87_1;
+ else goto N87_2;
+
+T87_1:
+ response = -0.000718271;
+ goto D87;
+
+N87_2:
+ if DAY_PD_HITS_RATIO < 0.13 then goto T87_2;
+ else goto N87_3;
+
+T87_2:
+ response = -0.0105571;
+ goto D87;
+
+N87_3:
+ if DAY_WEEK_AVG_RATIO < 5.82 then goto N87_4;
+ else goto N87_6;
+
+N87_4:
+ if DAY_WEEK_AVG_RATIO < 4.82 then goto N87_5;
+ else goto T87_5;
+
+N87_5:
+ if DAY_LW_DAY_HITS_RATIO < 11.5 then goto T87_3;
+ else goto T87_4;
+
+T87_3:
+ response = 0.00414548;
+ goto D87;
+
+T87_4:
+ response = -0.010294;
+ goto D87;
+
+T87_5:
+ response = 0.0157636;
+ goto D87;
+
+N87_6:
+ if ISABSTRACT_AVG < 0.135 then goto T87_6;
+ else goto T87_7;
+
+T87_6:
+ response = -0.0110257;
+ goto D87;
+
+T87_7:
+ response = 0.00663564;
+ goto D87;
+
+D87:
+
+tnscore = tnscore + response;
+
+/* Tree 89 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N88_1:
+ if MAX_SCORE < 362776 then goto N88_2;
+ else goto N88_3;
+
+N88_2:
+ if MAX_SCORE < 361504 then goto T88_1;
+ else goto T88_2;
+
+T88_1:
+ response = 0.000123708;
+ goto D88;
+
+T88_2:
+ response = 0.0215766;
+ goto D88;
+
+N88_3:
+ if INTLNEWS < 0.155 then goto N88_4;
+ else goto N88_5;
+
+N88_4:
+ if WEEKAVG < 2.36 then goto T88_3;
+ else goto T88_4;
+
+T88_3:
+ response = -0.00603082;
+ goto D88;
+
+T88_4:
+ response = 0.00760337;
+ goto D88;
+
+N88_5:
+ if TOPSTORY < 0.275 then goto T88_5;
+ else goto N88_6;
+
+T88_5:
+ response = -0.000704669;
+ goto D88;
+
+N88_6:
+ if PUB_TODAY_AVG < 0.86 then goto T88_6;
+ else goto T88_7;
+
+T88_6:
+ response = 0.0139844;
+ goto D88;
+
+T88_7:
+ response = -0.00288551;
+ goto D88;
+
+D88:
+
+tnscore = tnscore + response;
+
+/* Tree 90 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N89_1:
+ if REGIONALNEWS < 0.115 then goto N89_2;
+ else goto N89_5;
+
+N89_2:
+ if DAY_PREV_DAY_HITS_FRAC < 0.725 then goto N89_3;
+ else goto T89_4;
+
+N89_3:
+ if DAY_LW_DAY_HITS_RATIO < 21.25 then goto N89_4;
+ else goto T89_3;
+
+N89_4:
+ if MAX_SCORE < 242944 then goto T89_1;
+ else goto T89_2;
+
+T89_1:
+ response = 0.000482612;
+ goto D89;
+
+T89_2:
+ response = -0.00179648;
+ goto D89;
+
+T89_3:
+ response = -0.0123423;
+ goto D89;
+
+T89_4:
+ response = 0.000623923;
+ goto D89;
+
+N89_5:
+ if DAY_HITS < 4.5 then goto N89_6;
+ else goto T89_7;
+
+N89_6:
+ if DAY_PREV_DAY_HITS_FRAC < 0.87 then goto T89_5;
+ else goto T89_6;
+
+T89_5:
+ response = 0.00798437;
+ goto D89;
+
+T89_6:
+ response = -0.0109132;
+ goto D89;
+
+T89_7:
+ response = 0.0140617;
+ goto D89;
+
+D89:
+
+tnscore = tnscore + response;
+
+/* Tree 91 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N90_1:
+ if MAX_SCORE < 322221 then goto T90_1;
+ else goto N90_2;
+
+T90_1:
+ response = 0.000409287;
+ goto D90;
+
+N90_2:
+ if MAX_SCORE < 334601 then goto T90_2;
+ else goto N90_3;
+
+T90_2:
+ response = -0.00880555;
+ goto D90;
+
+N90_3:
+ if AVG_RANK < 7.58 then goto N90_4;
+ else goto N90_5;
+
+N90_4:
+ if SUPERDUPER_AVG < 0.295 then goto T90_3;
+ else goto T90_4;
+
+T90_3:
+ response = -0.00215568;
+ goto D90;
+
+T90_4:
+ response = -0.0124233;
+ goto D90;
+
+N90_5:
+ if NATIONALNEWS < 0.21 then goto T90_5;
+ else goto N90_6;
+
+T90_5:
+ response = 0.00160963;
+ goto D90;
+
+N90_6:
+ if ISABSTRACT_AVG < 0.185 then goto T90_6;
+ else goto T90_7;
+
+T90_6:
+ response = -0.00703363;
+ goto D90;
+
+T90_7:
+ response = 0.00777402;
+ goto D90;
+
+D90:
+
+tnscore = tnscore + response;
+
+/* Tree 92 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N91_1:
+ if ENTERTAINMENT < 0.21 then goto T91_1;
+ else goto N91_2;
+
+T91_1:
+ response = -0.00019687;
+ goto D91;
+
+N91_2:
+ if DAY_PD_HITS_RATIO < 15.75 then goto N91_3;
+ else goto T91_7;
+
+N91_3:
+ if SUPERDUPER_AVG < 0.415 then goto N91_4;
+ else goto T91_6;
+
+N91_4:
+ if MAX_MIN_RANK < 3 then goto T91_2;
+ else goto N91_5;
+
+T91_2:
+ response = -0.00268736;
+ goto D91;
+
+N91_5:
+ if MAX_MIN_RANK < 5 then goto T91_3;
+ else goto N91_6;
+
+T91_3:
+ response = 0.0078927;
+ goto D91;
+
+N91_6:
+ if DAY_PD_HITS_RATIO < 2.835 then goto T91_4;
+ else goto T91_5;
+
+T91_4:
+ response = 0.00387939;
+ goto D91;
+
+T91_5:
+ response = -0.00410318;
+ goto D91;
+
+T91_6:
+ response = -0.00538175;
+ goto D91;
+
+T91_7:
+ response = 0.0142121;
+ goto D91;
+
+D91:
+
+tnscore = tnscore + response;
+
+/* Tree 93 of 97 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N92_1:
+ if SPORTS < 0.415 then goto T92_1;
+ else goto N92_2;
+
+T92_1:
+ response = -3.44051e-05;
+ goto D92;
+
+N92_2:
+ if MAX_MIN_RANK < 1 then goto T92_2;
+ else goto N92_3;
+
+T92_2:
+ response = -0.00501256;
+ goto D92;
+
+N92_3:
+ if SUPERDUPER_AVG < 0.05 then goto N92_4;
+ else goto T92_7;
+
+N92_4:
+ if MAX_SCORE < 229196 then goto T92_3;
+ else goto N92_5;
+
+T92_3:
+ response = -0.00015078;
+ goto D92;
+
+N92_5:
+ if MAX_SCORE < 258856 then goto N92_6;
+ else goto T92_6;
+
+N92_6:
+ if ISTITLE_AVG < 0.355 then goto T92_4;
+ else goto T92_5;
+
+T92_4:
+ response = 0.0314869;
+ goto D92;
+
+T92_5:
+ response = 0.00734956;
+ goto D92;
+
+T92_6:
+ response = 0.00421683;
+ goto D92;
+
+T92_7:
+ response = 0.00084287;
+ goto D92;
+
+D92:
+
+tnscore = tnscore + response;
+
+/* Tree 94 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N93_1:
+ if MAX_MIN_RANK < 9 then goto N93_2;
+ else goto N93_5;
+
+N93_2:
+ if MAX_SCORE < 382719 then goto N93_3;
+ else goto T93_4;
+
+N93_3:
+ if MAX_SCORE < 362503 then goto T93_1;
+ else goto N93_4;
+
+T93_1:
+ response = 3.58027e-05;
+ goto D93;
+
+N93_4:
+ if MAX_SCORE < 364403 then goto T93_2;
+ else goto T93_3;
+
+T93_2:
+ response = -0.0154942;
+ goto D93;
+
+T93_3:
+ response = -0.00276027;
+ goto D93;
+
+T93_4:
+ response = 0.00210644;
+ goto D93;
+
+N93_5:
+ if WEEKAVG < 1.36 then goto N93_6;
+ else goto T93_7;
+
+N93_6:
+ if MAX_SCORE < 269970 then goto T93_5;
+ else goto T93_6;
+
+T93_5:
+ response = -0.00118638;
+ goto D93;
+
+T93_6:
+ response = 0.0203373;
+ goto D93;
+
+T93_7:
+ response = -0.00169747;
+ goto D93;
+
+D93:
+
+tnscore = tnscore + response;
+
+/* Tree 95 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N94_1:
+ if PREV_DAY_HITS < 26.5 then goto N94_2;
+ else goto N94_3;
+
+N94_2:
+ if MAX_SCORE < 187757 then goto T94_1;
+ else goto T94_2;
+
+T94_1:
+ response = -0.00124276;
+ goto D94;
+
+T94_2:
+ response = 0.000403197;
+ goto D94;
+
+N94_3:
+ if DAY_PREV_DAY_HITS_FRAC < 0.795 then goto N94_4;
+ else goto N94_5;
+
+N94_4:
+ if DAY_PREV_DAY_HITS_FRAC < 0.675 then goto T94_3;
+ else goto T94_4;
+
+T94_3:
+ response = -0.00288805;
+ goto D94;
+
+T94_4:
+ response = 0.00935152;
+ goto D94;
+
+N94_5:
+ if WEEKAVG < 11.93 then goto N94_6;
+ else goto T94_7;
+
+N94_6:
+ if DAY_PD_HITS_RATIO < 0.27 then goto T94_5;
+ else goto T94_6;
+
+T94_5:
+ response = -0.00585669;
+ goto D94;
+
+T94_6:
+ response = -0.0173104;
+ goto D94;
+
+T94_7:
+ response = 0.003142;
+ goto D94;
+
+D94:
+
+tnscore = tnscore + response;
+
+/* Tree 96 of 97 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N95_1:
+ if MAX_MIN_RANK < 7 then goto T95_1;
+ else goto N95_2;
+
+T95_1:
+ response = -0.000568858;
+ goto D95;
+
+N95_2:
+ if PUB_TODAY_AVG < 0.885 then goto N95_3;
+ else goto N95_6;
+
+N95_3:
+ if TOPSTORY < 0.27 then goto N95_4;
+ else goto T95_5;
+
+N95_4:
+ if INTLNEWS < 0.365 then goto T95_2;
+ else goto N95_5;
+
+T95_2:
+ response = -0.0025302;
+ goto D95;
+
+N95_5:
+ if AVG_RANK < 6.9 then goto T95_3;
+ else goto T95_4;
+
+T95_3:
+ response = -0.00341549;
+ goto D95;
+
+T95_4:
+ response = 0.00979915;
+ goto D95;
+
+T95_5:
+ response = 0.0137197;
+ goto D95;
+
+N95_6:
+ if DAY_HITS < 10.5 then goto T95_6;
+ else goto T95_7;
+
+T95_6:
+ response = 0.0179211;
+ goto D95;
+
+T95_7:
+ response = -0.000230543;
+ goto D95;
+
+D95:
+
+tnscore = tnscore + response;
+
+/* Tree 97 of 97 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N96_1:
+ if MIN_RANK < 7 then goto T96_1;
+ else goto N96_2;
+
+T96_1:
+ response = -0.00062463;
+ goto D96;
+
+N96_2:
+ if DAY_PREV_DAY_HITS_FRAC < 0.475 then goto N96_3;
+ else goto N96_4;
+
+N96_3:
+ if AVG_RANK < 9.745 then goto T96_2;
+ else goto T96_3;
+
+T96_2:
+ response = 0.0142049;
+ goto D96;
+
+T96_3:
+ response = -0.000373764;
+ goto D96;
+
+N96_4:
+ if PUB_TODAY_AVG < 0.27 then goto N96_5;
+ else goto N96_6;
+
+N96_5:
+ if MAX_SCORE < 276860 then goto T96_4;
+ else goto T96_5;
+
+T96_4:
+ response = -0.00522975;
+ goto D96;
+
+T96_5:
+ response = 0.00307189;
+ goto D96;
+
+N96_6:
+ if PREV_DAY_HITS < 1.5 then goto T96_6;
+ else goto T96_7;
+
+T96_6:
+ response = 0.00025413;
+ goto D96;
+
+T96_7:
+ response = 0.00490254;
+ goto D96;
+
+D96:
+
+tnscore = tnscore + response;
+
+return;
diff --git a/searchlib/src/test/files/treenet04.model b/searchlib/src/test/files/treenet04.model
new file mode 100644
index 00000000000..64082ca5ca7
--- /dev/null
+++ b/searchlib/src/test/files/treenet04.model
@@ -0,0 +1,6247 @@
+
+/* Data Dictionary, Number Of Variables = 40 */
+/* Name = NUM_WORDS, Type = continuous. */
+/* Name = DAY_HITS, Type = continuous. */
+/* Name = PREV_DAY_HITS, Type = continuous. */
+/* Name = DAY_PD_HITS_RATIO, Type = continuous. */
+/* Name = DAY_PD_HITS_DERIV, Type = continuous. */
+/* Name = LW_DAY_HITS, Type = continuous. */
+/* Name = DAY_LW_DAY_HITS_RATIO, Type = continuous. */
+/* Name = DAY_LW_DAY_HITS_DERIV, Type = continuous. */
+/* Name = WEEKAVG, Type = continuous. */
+/* Name = DAY_WEEK_AVG_RATIO, Type = continuous. */
+/* Name = DAY_WEEK_AVG_DERIV, Type = continuous. */
+/* Name = ISTITLE_AVG, Type = continuous. */
+/* Name = ISABSTRACT_AVG, Type = continuous. */
+/* Name = SUPERDUPER_AVG, Type = continuous. */
+/* Name = PUB_TODAY_AVG, Type = continuous. */
+/* Name = BUSINESS, Type = continuous. */
+/* Name = ENTERTAINMENT, Type = continuous. */
+/* Name = HEALTH, Type = continuous. */
+/* Name = INTLNEWS, Type = continuous. */
+/* Name = LIFESTYLE, Type = continuous. */
+/* Name = LOCALNEWS, Type = continuous. */
+/* Name = MISC, Type = continuous. */
+/* Name = NATIONALNEWS, Type = continuous. */
+/* Name = POLITICS, Type = continuous. */
+/* Name = REGIONALNEWS, Type = continuous. */
+/* Name = SPORTS, Type = continuous. */
+/* Name = TOPSTORY, Type = continuous. */
+/* Name = AVG_RANK, Type = continuous. */
+/* Name = MAX_RANK, Type = continuous. */
+/* Name = MIN_RANK, Type = continuous. */
+/* Name = MAX_MIN_RANK, Type = continuous. */
+/* Name = AVG_SCORE, Type = continuous. */
+/* Name = MAX_SCORE, Type = continuous. */
+/* Name = MIN_SCORE, Type = continuous. */
+/* Name = MAX_MIN_SCORE, Type = continuous. */
+/* Name = FOUR_HOUR_WF, Type = continuous. */
+/* Name = EIGHT_HOUR_WF, Type = continuous. */
+/* Name = TWELVE_HOUR_WF, Type = continuous. */
+/* Name = ONE_DAY_WF, Type = continuous. */
+/* Name = TWO_DAY_WF, Type = continuous. */
+
+MODELBEGIN:
+
+/* CART version: 5.0.9.156 */
+/* TreeNet: TreeNet20071016172855 */
+/* Grove: /home/rparekh/lb/lb_title_hourly/lb_title_hourly.grv */
+/* N trees: 103 */
+
+link TN0;
+pred = tnscore; /* predicted value for IY_CTR */
+
+
+/*********************/
+/* Model is complete */
+/*********************/
+
+return;
+
+
+
+TN0:
+
+/* Tree 1 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+tnscore = 0.0;
+
+N0_1:
+ if AVG_SCORE < 236210 then goto N0_2;
+ else goto N0_3;
+
+N0_2:
+ if AVG_SCORE < 151678 then goto T0_1;
+ else goto T0_2;
+
+T0_1:
+ response = 0.205803;
+ goto D0;
+
+T0_2:
+ response = 0.214904;
+ goto D0;
+
+N0_3:
+ if ISABSTRACT_AVG < 0.155 then goto N0_4;
+ else goto T0_7;
+
+N0_4:
+ if WEEKAVG < 0.93 then goto N0_5;
+ else goto N0_6;
+
+N0_5:
+ if TOPSTORY < 0.365 then goto T0_3;
+ else goto T0_4;
+
+T0_3:
+ response = 0.234927;
+ goto D0;
+
+T0_4:
+ response = 0.262252;
+ goto D0;
+
+N0_6:
+ if MAX_SCORE < 271454 then goto T0_5;
+ else goto T0_6;
+
+T0_5:
+ response = 0.236303;
+ goto D0;
+
+T0_6:
+ response = 0.251477;
+ goto D0;
+
+T0_7:
+ response = 0.224603;
+ goto D0;
+
+D0:
+
+tnscore = tnscore + response;
+
+/* Tree 2 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N1_1:
+ if AVG_SCORE < 240282 then goto N1_2;
+ else goto N1_3;
+
+N1_2:
+ if AVG_SCORE < 153656 then goto T1_1;
+ else goto T1_2;
+
+T1_1:
+ response = -0.0157043;
+ goto D1;
+
+T1_2:
+ response = -0.00743688;
+ goto D1;
+
+N1_3:
+ if ISTITLE_AVG < 0.705 then goto N1_4;
+ else goto T1_7;
+
+N1_4:
+ if WEEKAVG < 0.93 then goto T1_3;
+ else goto N1_5;
+
+T1_3:
+ response = 0.0116703;
+ goto D1;
+
+N1_5:
+ if MAX_SCORE < 266499 then goto T1_4;
+ else goto N1_6;
+
+T1_4:
+ response = 0.00213746;
+ goto D1;
+
+N1_6:
+ if TWO_DAY_WF < 0.826656 then goto T1_5;
+ else goto T1_6;
+
+T1_5:
+ response = 0.0214705;
+ goto D1;
+
+T1_6:
+ response = 0.0350738;
+ goto D1;
+
+T1_7:
+ response = 0.00284844;
+ goto D1;
+
+D1:
+
+tnscore = tnscore + response;
+
+/* Tree 3 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N2_1:
+ if AVG_SCORE < 239849 then goto N2_2;
+ else goto N2_3;
+
+N2_2:
+ if AVG_SCORE < 230612 then goto T2_1;
+ else goto T2_2;
+
+T2_1:
+ response = -0.0105243;
+ goto D2;
+
+T2_2:
+ response = -0.000207603;
+ goto D2;
+
+N2_3:
+ if ISTITLE_AVG < 0.95 then goto N2_4;
+ else goto T2_7;
+
+N2_4:
+ if MAX_MIN_SCORE < 36505.8 then goto N2_5;
+ else goto N2_6;
+
+N2_5:
+ if WEEKAVG < 10.925 then goto T2_3;
+ else goto T2_4;
+
+T2_3:
+ response = 0.0103073;
+ goto D2;
+
+T2_4:
+ response = 0.0445006;
+ goto D2;
+
+N2_6:
+ if MAX_SCORE < 267687 then goto T2_5;
+ else goto T2_6;
+
+T2_5:
+ response = 0.00115576;
+ goto D2;
+
+T2_6:
+ response = 0.023751;
+ goto D2;
+
+T2_7:
+ response = 0.00109943;
+ goto D2;
+
+D2:
+
+tnscore = tnscore + response;
+
+/* Tree 4 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N3_1:
+ if AVG_SCORE < 242149 then goto N3_2;
+ else goto N3_4;
+
+N3_2:
+ if AVG_SCORE < 153383 then goto T3_1;
+ else goto N3_3;
+
+T3_1:
+ response = -0.0131014;
+ goto D3;
+
+N3_3:
+ if WEEKAVG < 1.5 then goto T3_2;
+ else goto T3_3;
+
+T3_2:
+ response = -0.00720755;
+ goto D3;
+
+T3_3:
+ response = -0.000377073;
+ goto D3;
+
+N3_4:
+ if ISTITLE_AVG < 0.705 then goto N3_5;
+ else goto T3_7;
+
+N3_5:
+ if MAX_MIN_SCORE < 36505 then goto N3_6;
+ else goto T3_6;
+
+N3_6:
+ if BUSINESS < 0.13 then goto T3_4;
+ else goto T3_5;
+
+T3_4:
+ response = 0.0164936;
+ goto D3;
+
+T3_5:
+ response = 0.00560036;
+ goto D3;
+
+T3_6:
+ response = 0.0218971;
+ goto D3;
+
+T3_7:
+ response = 0.00392608;
+ goto D3;
+
+D3:
+
+tnscore = tnscore + response;
+
+/* Tree 5 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N4_1:
+ if MAX_SCORE < 264139 then goto N4_2;
+ else goto N4_3;
+
+N4_2:
+ if MIN_SCORE < 222136 then goto T4_1;
+ else goto T4_2;
+
+T4_1:
+ response = -0.0079708;
+ goto D4;
+
+T4_2:
+ response = 0.00140823;
+ goto D4;
+
+N4_3:
+ if ISABSTRACT_AVG < 0.315 then goto N4_4;
+ else goto N4_6;
+
+N4_4:
+ if DAY_WEEK_AVG_DERIV < 10.5 then goto N4_5;
+ else goto T4_5;
+
+N4_5:
+ if BUSINESS < 0.105 then goto T4_3;
+ else goto T4_4;
+
+T4_3:
+ response = 0.016512;
+ goto D4;
+
+T4_4:
+ response = 0.00726199;
+ goto D4;
+
+T4_5:
+ response = 0.0306897;
+ goto D4;
+
+N4_6:
+ if SPORTS < 0.42 then goto T4_6;
+ else goto T4_7;
+
+T4_6:
+ response = -0.000976569;
+ goto D4;
+
+T4_7:
+ response = 0.0183973;
+ goto D4;
+
+D4:
+
+tnscore = tnscore + response;
+
+/* Tree 6 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N5_1:
+ if AVG_SCORE < 231394 then goto T5_1;
+ else goto N5_2;
+
+T5_1:
+ response = -0.00698348;
+ goto D5;
+
+N5_2:
+ if ISTITLE_AVG < 0.645 then goto N5_3;
+ else goto N5_5;
+
+N5_3:
+ if MAX_SCORE < 271880 then goto T5_2;
+ else goto N5_4;
+
+T5_2:
+ response = 0.00669893;
+ goto D5;
+
+N5_4:
+ if AVG_SCORE < 281369 then goto T5_3;
+ else goto T5_4;
+
+T5_3:
+ response = 0.0209096;
+ goto D5;
+
+T5_4:
+ response = 0.0117951;
+ goto D5;
+
+N5_5:
+ if MIN_SCORE < 318875 then goto N5_6;
+ else goto T5_7;
+
+N5_6:
+ if WEEKAVG < 1.5 then goto T5_5;
+ else goto T5_6;
+
+T5_5:
+ response = -0.00428011;
+ goto D5;
+
+T5_6:
+ response = 0.00596324;
+ goto D5;
+
+T5_7:
+ response = 0.0116652;
+ goto D5;
+
+D5:
+
+tnscore = tnscore + response;
+
+/* Tree 7 of 103 */
+/* N terminal nodes = 7, Depth = 4 */
+
+
+N6_1:
+ if MIN_SCORE < 222028 then goto N6_2;
+ else goto N6_4;
+
+N6_2:
+ if SUPERDUPER_AVG < 0.27 then goto T6_1;
+ else goto N6_3;
+
+T6_1:
+ response = -0.00761706;
+ goto D6;
+
+N6_3:
+ if INTLNEWS < 0.535 then goto T6_2;
+ else goto T6_3;
+
+T6_2:
+ response = -0.00274344;
+ goto D6;
+
+T6_3:
+ response = 0.0225782;
+ goto D6;
+
+N6_4:
+ if WEEKAVG < 0.93 then goto N6_5;
+ else goto N6_6;
+
+N6_5:
+ if ISTITLE_AVG < 0.71 then goto T6_4;
+ else goto T6_5;
+
+T6_4:
+ response = 0.00689051;
+ goto D6;
+
+T6_5:
+ response = -0.00500438;
+ goto D6;
+
+N6_6:
+ if MIN_SCORE < 319119 then goto T6_6;
+ else goto T6_7;
+
+T6_6:
+ response = 0.00977814;
+ goto D6;
+
+T6_7:
+ response = 0.0200288;
+ goto D6;
+
+D6:
+
+tnscore = tnscore + response;
+
+/* Tree 8 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N7_1:
+ if MIN_SCORE < 222028 then goto N7_2;
+ else goto N7_5;
+
+N7_2:
+ if AVG_SCORE < 158974 then goto T7_1;
+ else goto N7_3;
+
+T7_1:
+ response = -0.00918892;
+ goto D7;
+
+N7_3:
+ if NUM_WORDS < 1.5 then goto N7_4;
+ else goto T7_4;
+
+N7_4:
+ if WEEKAVG < 2.93 then goto T7_2;
+ else goto T7_3;
+
+T7_2:
+ response = -0.00158808;
+ goto D7;
+
+T7_3:
+ response = 0.0119896;
+ goto D7;
+
+T7_4:
+ response = -0.00568155;
+ goto D7;
+
+N7_5:
+ if ISTITLE_AVG < 0.95 then goto N7_6;
+ else goto T7_7;
+
+N7_6:
+ if ISABSTRACT_AVG < 0.155 then goto T7_5;
+ else goto T7_6;
+
+T7_5:
+ response = 0.0116413;
+ goto D7;
+
+T7_6:
+ response = 0.00150493;
+ goto D7;
+
+T7_7:
+ response = -0.00110515;
+ goto D7;
+
+D7:
+
+tnscore = tnscore + response;
+
+/* Tree 9 of 103 */
+/* N terminal nodes = 7, Depth = 4 */
+
+
+N8_1:
+ if AVG_SCORE < 241264 then goto N8_2;
+ else goto N8_4;
+
+N8_2:
+ if MIN_SCORE < 132718 then goto T8_1;
+ else goto N8_3;
+
+T8_1:
+ response = -0.00978209;
+ goto D8;
+
+N8_3:
+ if WEEKAVG < 0.93 then goto T8_2;
+ else goto T8_3;
+
+T8_2:
+ response = -0.00610293;
+ goto D8;
+
+T8_3:
+ response = -0.000295273;
+ goto D8;
+
+N8_4:
+ if ISABSTRACT_AVG < 0.185 then goto N8_5;
+ else goto N8_6;
+
+N8_5:
+ if DAY_LW_DAY_HITS_RATIO < 11.835 then goto T8_4;
+ else goto T8_5;
+
+T8_4:
+ response = 0.00900634;
+ goto D8;
+
+T8_5:
+ response = 0.0221056;
+ goto D8;
+
+N8_6:
+ if WEEKAVG < 1.07 then goto T8_6;
+ else goto T8_7;
+
+T8_6:
+ response = -0.00392509;
+ goto D8;
+
+T8_7:
+ response = 0.00615921;
+ goto D8;
+
+D8:
+
+tnscore = tnscore + response;
+
+/* Tree 10 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N9_1:
+ if AVG_SCORE < 233949 then goto T9_1;
+ else goto N9_2;
+
+T9_1:
+ response = -0.00503156;
+ goto D9;
+
+N9_2:
+ if ISTITLE_AVG < 0.645 then goto N9_3;
+ else goto N9_6;
+
+N9_3:
+ if TOPSTORY < 0.05 then goto N9_4;
+ else goto T9_5;
+
+N9_4:
+ if PUB_TODAY_AVG < 0.105 then goto T9_2;
+ else goto N9_5;
+
+T9_2:
+ response = -0.00286006;
+ goto D9;
+
+N9_5:
+ if DAY_PD_HITS_RATIO < 0.65 then goto T9_3;
+ else goto T9_4;
+
+T9_3:
+ response = 0.0275142;
+ goto D9;
+
+T9_4:
+ response = 0.00616295;
+ goto D9;
+
+T9_5:
+ response = 0.0129407;
+ goto D9;
+
+N9_6:
+ if DAY_LW_DAY_HITS_DERIV < 7.5 then goto T9_6;
+ else goto T9_7;
+
+T9_6:
+ response = -0.00186065;
+ goto D9;
+
+T9_7:
+ response = 0.00771893;
+ goto D9;
+
+D9:
+
+tnscore = tnscore + response;
+
+/* Tree 11 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N10_1:
+ if AVG_SCORE < 241955 then goto N10_2;
+ else goto N10_3;
+
+N10_2:
+ if MAX_SCORE < 170767 then goto T10_1;
+ else goto T10_2;
+
+T10_1:
+ response = -0.00748858;
+ goto D10;
+
+T10_2:
+ response = -0.00266952;
+ goto D10;
+
+N10_3:
+ if MIN_SCORE < 321219 then goto N10_4;
+ else goto N10_6;
+
+N10_4:
+ if TOPSTORY < 0.05 then goto T10_3;
+ else goto N10_5;
+
+T10_3:
+ response = -0.00143781;
+ goto D10;
+
+N10_5:
+ if AVG_RANK < 9.735 then goto T10_4;
+ else goto T10_5;
+
+T10_4:
+ response = 0.00571239;
+ goto D10;
+
+T10_5:
+ response = 0.0197833;
+ goto D10;
+
+N10_6:
+ if WEEKAVG < 0.93 then goto T10_6;
+ else goto T10_7;
+
+T10_6:
+ response = 0.00690536;
+ goto D10;
+
+T10_7:
+ response = 0.017048;
+ goto D10;
+
+D10:
+
+tnscore = tnscore + response;
+
+/* Tree 12 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N11_1:
+ if AVG_SCORE < 245333 then goto N11_2;
+ else goto N11_3;
+
+N11_2:
+ if TOPSTORY < 0.355 then goto T11_1;
+ else goto T11_2;
+
+T11_1:
+ response = -0.00376047;
+ goto D11;
+
+T11_2:
+ response = 0.00963479;
+ goto D11;
+
+N11_3:
+ if WEEKAVG < 0.93 then goto T11_3;
+ else goto N11_4;
+
+T11_3:
+ response = 0.000735298;
+ goto D11;
+
+N11_4:
+ if ISABSTRACT_AVG < 0.705 then goto N11_5;
+ else goto T11_7;
+
+N11_5:
+ if TWO_DAY_WF < 0.872534 then goto N11_6;
+ else goto T11_6;
+
+N11_6:
+ if MAX_MIN_SCORE < 52145.2 then goto T11_4;
+ else goto T11_5;
+
+T11_4:
+ response = 0.00973324;
+ goto D11;
+
+T11_5:
+ response = -0.00849394;
+ goto D11;
+
+T11_6:
+ response = 0.0177153;
+ goto D11;
+
+T11_7:
+ response = 0.000337073;
+ goto D11;
+
+D11:
+
+tnscore = tnscore + response;
+
+/* Tree 13 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N12_1:
+ if MIN_SCORE < 219800 then goto T12_1;
+ else goto N12_2;
+
+T12_1:
+ response = -0.00352861;
+ goto D12;
+
+N12_2:
+ if ISTITLE_AVG < 0.73 then goto N12_3;
+ else goto N12_6;
+
+N12_3:
+ if BUSINESS < 0.05 then goto N12_4;
+ else goto T12_5;
+
+N12_4:
+ if PREV_DAY_HITS < 7.5 then goto N12_5;
+ else goto T12_4;
+
+N12_5:
+ if PUB_TODAY_AVG < 0.11 then goto T12_2;
+ else goto T12_3;
+
+T12_2:
+ response = 0.00174614;
+ goto D12;
+
+T12_3:
+ response = 0.0120552;
+ goto D12;
+
+T12_4:
+ response = 0.0184693;
+ goto D12;
+
+T12_5:
+ response = 0.00396387;
+ goto D12;
+
+N12_6:
+ if DAY_LW_DAY_HITS_DERIV < 7.5 then goto T12_6;
+ else goto T12_7;
+
+T12_6:
+ response = -0.00165207;
+ goto D12;
+
+T12_7:
+ response = 0.00838864;
+ goto D12;
+
+D12:
+
+tnscore = tnscore + response;
+
+/* Tree 14 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N13_1:
+ if AVG_SCORE < 332830 then goto N13_2;
+ else goto T13_7;
+
+N13_2:
+ if AVG_SCORE < 221523 then goto N13_3;
+ else goto N13_5;
+
+N13_3:
+ if ISABSTRACT_AVG < 0.845 then goto T13_1;
+ else goto N13_4;
+
+T13_1:
+ response = -0.00518819;
+ goto D13;
+
+N13_4:
+ if MAX_MIN_SCORE < 41116.5 then goto T13_2;
+ else goto T13_3;
+
+T13_2:
+ response = -0.0022638;
+ goto D13;
+
+T13_3:
+ response = 0.010561;
+ goto D13;
+
+N13_5:
+ if MIN_SCORE < 275020 then goto N13_6;
+ else goto T13_6;
+
+N13_6:
+ if ISABSTRACT_AVG < 0.13 then goto T13_4;
+ else goto T13_5;
+
+T13_4:
+ response = 0.00532774;
+ goto D13;
+
+T13_5:
+ response = -0.00140785;
+ goto D13;
+
+T13_6:
+ response = -0.00673306;
+ goto D13;
+
+T13_7:
+ response = 0.0089962;
+ goto D13;
+
+D13:
+
+tnscore = tnscore + response;
+
+/* Tree 15 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N14_1:
+ if AVG_SCORE < 230217 then goto T14_1;
+ else goto N14_2;
+
+T14_1:
+ response = -0.00305127;
+ goto D14;
+
+N14_2:
+ if WEEKAVG < 0.93 then goto T14_2;
+ else goto N14_3;
+
+T14_2:
+ response = 0.000779225;
+ goto D14;
+
+N14_3:
+ if BUSINESS < 0.05 then goto N14_4;
+ else goto N14_6;
+
+N14_4:
+ if LOCALNEWS < 0.47 then goto N14_5;
+ else goto T14_5;
+
+N14_5:
+ if MAX_MIN_SCORE < 28240.8 then goto T14_3;
+ else goto T14_4;
+
+T14_3:
+ response = 0.0179967;
+ goto D14;
+
+T14_4:
+ response = 0.00806848;
+ goto D14;
+
+T14_5:
+ response = -0.00831202;
+ goto D14;
+
+N14_6:
+ if AVG_SCORE < 340223 then goto T14_6;
+ else goto T14_7;
+
+T14_6:
+ response = 0.000231883;
+ goto D14;
+
+T14_7:
+ response = 0.0140119;
+ goto D14;
+
+D14:
+
+tnscore = tnscore + response;
+
+/* Tree 16 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N15_1:
+ if AVG_SCORE < 245333 then goto T15_1;
+ else goto N15_2;
+
+T15_1:
+ response = -0.00229594;
+ goto D15;
+
+N15_2:
+ if ISABSTRACT_AVG < 0.315 then goto N15_3;
+ else goto T15_7;
+
+N15_3:
+ if LOCALNEWS < 0.05 then goto N15_4;
+ else goto N15_5;
+
+N15_4:
+ if DAY_LW_DAY_HITS_RATIO < 1.75 then goto T15_2;
+ else goto T15_3;
+
+T15_2:
+ response = -0.0039683;
+ goto D15;
+
+T15_3:
+ response = 0.00569577;
+ goto D15;
+
+N15_5:
+ if PREV_DAY_HITS < 8.5 then goto T15_4;
+ else goto N15_6;
+
+T15_4:
+ response = 0.007769;
+ goto D15;
+
+N15_6:
+ if DAY_HITS < 2.5 then goto T15_5;
+ else goto T15_6;
+
+T15_5:
+ response = 0.0344185;
+ goto D15;
+
+T15_6:
+ response = 0.0117709;
+ goto D15;
+
+T15_7:
+ response = -0.000676423;
+ goto D15;
+
+D15:
+
+tnscore = tnscore + response;
+
+/* Tree 17 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N16_1:
+ if MAX_SCORE < 249988 then goto N16_2;
+ else goto N16_5;
+
+N16_2:
+ if INTLNEWS < 0.105 then goto T16_1;
+ else goto N16_3;
+
+T16_1:
+ response = -0.00430418;
+ goto D16;
+
+N16_3:
+ if AVG_SCORE < 158414 then goto T16_2;
+ else goto N16_4;
+
+T16_2:
+ response = -0.00416318;
+ goto D16;
+
+N16_4:
+ if MAX_SCORE < 242790 then goto T16_3;
+ else goto T16_4;
+
+T16_3:
+ response = 0.00543383;
+ goto D16;
+
+T16_4:
+ response = -0.00915253;
+ goto D16;
+
+N16_5:
+ if ISABSTRACT_AVG < 0.155 then goto T16_5;
+ else goto N16_6;
+
+T16_5:
+ response = 0.00464962;
+ goto D16;
+
+N16_6:
+ if SPORTS < 0.365 then goto T16_6;
+ else goto T16_7;
+
+T16_6:
+ response = -0.00278462;
+ goto D16;
+
+T16_7:
+ response = 0.0111898;
+ goto D16;
+
+D16:
+
+tnscore = tnscore + response;
+
+/* Tree 18 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N17_1:
+ if AVG_SCORE < 249330 then goto T17_1;
+ else goto N17_2;
+
+T17_1:
+ response = -0.00159136;
+ goto D17;
+
+N17_2:
+ if WEEKAVG < 1.07 then goto N17_3;
+ else goto N17_4;
+
+N17_3:
+ if TOPSTORY < 0.07 then goto T17_2;
+ else goto T17_3;
+
+T17_2:
+ response = -0.00243507;
+ goto D17;
+
+T17_3:
+ response = 0.00585214;
+ goto D17;
+
+N17_4:
+ if TWO_DAY_WF < 0.9518 then goto N17_5;
+ else goto T17_7;
+
+N17_5:
+ if EIGHT_HOUR_WF < 0.108586 then goto N17_6;
+ else goto T17_6;
+
+N17_6:
+ if INTLNEWS < 0.42 then goto T17_4;
+ else goto T17_5;
+
+T17_4:
+ response = 0.00435459;
+ goto D17;
+
+T17_5:
+ response = 0.0191599;
+ goto D17;
+
+T17_6:
+ response = -0.00770634;
+ goto D17;
+
+T17_7:
+ response = 0.013571;
+ goto D17;
+
+D17:
+
+tnscore = tnscore + response;
+
+/* Tree 19 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N18_1:
+ if AVG_SCORE < 332253 then goto N18_2;
+ else goto N18_4;
+
+N18_2:
+ if TOPSTORY < 0.355 then goto N18_3;
+ else goto T18_3;
+
+N18_3:
+ if BUSINESS < 0.05 then goto T18_1;
+ else goto T18_2;
+
+T18_1:
+ response = 0.000641958;
+ goto D18;
+
+T18_2:
+ response = -0.00274201;
+ goto D18;
+
+T18_3:
+ response = 0.00886024;
+ goto D18;
+
+N18_4:
+ if DAY_PD_HITS_DERIV < 1.5 then goto N18_5;
+ else goto T18_7;
+
+N18_5:
+ if AVG_SCORE < 336554 then goto T18_4;
+ else goto N18_6;
+
+T18_4:
+ response = 0.0191918;
+ goto D18;
+
+N18_6:
+ if SUPERDUPER_AVG < 0.415 then goto T18_5;
+ else goto T18_6;
+
+T18_5:
+ response = -0.00116436;
+ goto D18;
+
+T18_6:
+ response = 0.0183934;
+ goto D18;
+
+T18_7:
+ response = 0.0116471;
+ goto D18;
+
+D18:
+
+tnscore = tnscore + response;
+
+/* Tree 20 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N19_1:
+ if MAX_SCORE < 249072 then goto N19_2;
+ else goto N19_6;
+
+N19_2:
+ if INTLNEWS < 0.185 then goto T19_1;
+ else goto N19_3;
+
+T19_1:
+ response = -0.00383726;
+ goto D19;
+
+N19_3:
+ if ISABSTRACT_AVG < 0.61 then goto T19_2;
+ else goto N19_4;
+
+T19_2:
+ response = -0.00202529;
+ goto D19;
+
+N19_4:
+ if WEEKAVG < 0.785 then goto T19_3;
+ else goto N19_5;
+
+T19_3:
+ response = -0.0038571;
+ goto D19;
+
+N19_5:
+ if AVG_SCORE < 169471 then goto T19_4;
+ else goto T19_5;
+
+T19_4:
+ response = 0.00474293;
+ goto D19;
+
+T19_5:
+ response = 0.0278332;
+ goto D19;
+
+N19_6:
+ if BUSINESS < 0.05 then goto T19_6;
+ else goto T19_7;
+
+T19_6:
+ response = 0.00491784;
+ goto D19;
+
+T19_7:
+ response = -0.000728088;
+ goto D19;
+
+D19:
+
+tnscore = tnscore + response;
+
+/* Tree 21 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N20_1:
+ if AVG_SCORE < 223608 then goto T20_1;
+ else goto N20_2;
+
+T20_1:
+ response = -0.00242896;
+ goto D20;
+
+N20_2:
+ if PREV_DAY_HITS < 7.5 then goto N20_3;
+ else goto N20_5;
+
+N20_3:
+ if ISABSTRACT_AVG < 0.05 then goto N20_4;
+ else goto T20_4;
+
+N20_4:
+ if MAX_MIN_RANK < 7 then goto T20_2;
+ else goto T20_3;
+
+T20_2:
+ response = 0.00151785;
+ goto D20;
+
+T20_3:
+ response = 0.0118374;
+ goto D20;
+
+T20_4:
+ response = -0.00165444;
+ goto D20;
+
+N20_5:
+ if SPORTS < 0.34 then goto N20_6;
+ else goto T20_7;
+
+N20_6:
+ if DAY_WEEK_AVG_DERIV < -1.93 then goto T20_5;
+ else goto T20_6;
+
+T20_5:
+ response = -0.00307953;
+ goto D20;
+
+T20_6:
+ response = 0.00717407;
+ goto D20;
+
+T20_7:
+ response = 0.0154963;
+ goto D20;
+
+D20:
+
+tnscore = tnscore + response;
+
+/* Tree 22 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N21_1:
+ if MIN_SCORE < 319241 then goto N21_2;
+ else goto N21_6;
+
+N21_2:
+ if INTLNEWS < 0.73 then goto N21_3;
+ else goto T21_5;
+
+N21_3:
+ if TOPSTORY < 0.355 then goto N21_4;
+ else goto T21_4;
+
+N21_4:
+ if NUM_WORDS < 2.5 then goto N21_5;
+ else goto T21_3;
+
+N21_5:
+ if PREV_DAY_HITS < 3.5 then goto T21_1;
+ else goto T21_2;
+
+T21_1:
+ response = -0.00228523;
+ goto D21;
+
+T21_2:
+ response = 0.00146239;
+ goto D21;
+
+T21_3:
+ response = -0.00850081;
+ goto D21;
+
+T21_4:
+ response = 0.00776825;
+ goto D21;
+
+T21_5:
+ response = 0.0160753;
+ goto D21;
+
+N21_6:
+ if DAY_LW_DAY_HITS_DERIV < 12.5 then goto T21_6;
+ else goto T21_7;
+
+T21_6:
+ response = 0.00439757;
+ goto D21;
+
+T21_7:
+ response = 0.0197836;
+ goto D21;
+
+D21:
+
+tnscore = tnscore + response;
+
+/* Tree 23 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N22_1:
+ if INTLNEWS < 0.705 then goto N22_2;
+ else goto T22_7;
+
+N22_2:
+ if TOPSTORY < 0.355 then goto N22_3;
+ else goto T22_6;
+
+N22_3:
+ if MIN_SCORE < 323992 then goto N22_4;
+ else goto N22_6;
+
+N22_4:
+ if LIFESTYLE < 0.13 then goto N22_5;
+ else goto T22_3;
+
+N22_5:
+ if LOCALNEWS < 0.315 then goto T22_1;
+ else goto T22_2;
+
+T22_1:
+ response = -0.000291455;
+ goto D22;
+
+T22_2:
+ response = -0.00459663;
+ goto D22;
+
+T22_3:
+ response = -0.00868291;
+ goto D22;
+
+N22_6:
+ if MIN_SCORE < 325835 then goto T22_4;
+ else goto T22_5;
+
+T22_4:
+ response = 0.0255955;
+ goto D22;
+
+T22_5:
+ response = 0.00222024;
+ goto D22;
+
+T22_6:
+ response = 0.00689548;
+ goto D22;
+
+T22_7:
+ response = 0.0147049;
+ goto D22;
+
+D22:
+
+tnscore = tnscore + response;
+
+/* Tree 24 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N23_1:
+ if DAY_WEEK_AVG_DERIV < 41.5 then goto N23_2;
+ else goto T23_7;
+
+N23_2:
+ if AVG_SCORE < 222620 then goto T23_1;
+ else goto N23_3;
+
+T23_1:
+ response = -0.00230434;
+ goto D23;
+
+N23_3:
+ if ISTITLE_AVG < 0.95 then goto N23_4;
+ else goto T23_6;
+
+N23_4:
+ if DAY_LW_DAY_HITS_RATIO < 2.9 then goto N23_5;
+ else goto N23_6;
+
+N23_5:
+ if ISABSTRACT_AVG < 0.685 then goto T23_2;
+ else goto T23_3;
+
+T23_2:
+ response = -0.00098145;
+ goto D23;
+
+T23_3:
+ response = 0.0175646;
+ goto D23;
+
+N23_6:
+ if FOUR_HOUR_WF < 0.0415469 then goto T23_4;
+ else goto T23_5;
+
+T23_4:
+ response = 0.00693887;
+ goto D23;
+
+T23_5:
+ response = 0.000352143;
+ goto D23;
+
+T23_6:
+ response = -0.00149738;
+ goto D23;
+
+T23_7:
+ response = 0.0156711;
+ goto D23;
+
+D23:
+
+tnscore = tnscore + response;
+
+/* Tree 25 of 103 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N24_1:
+ if BUSINESS < 0.105 then goto N24_2;
+ else goto T24_7;
+
+N24_2:
+ if DAY_WEEK_AVG_RATIO < 5.705 then goto N24_3;
+ else goto T24_6;
+
+N24_3:
+ if AVG_SCORE < 155902 then goto T24_1;
+ else goto N24_4;
+
+T24_1:
+ response = -0.0033031;
+ goto D24;
+
+N24_4:
+ if WEEKAVG < 0.64 then goto N24_5;
+ else goto T24_5;
+
+N24_5:
+ if MAX_SCORE < 363895 then goto T24_2;
+ else goto N24_6;
+
+T24_2:
+ response = -0.00281287;
+ goto D24;
+
+N24_6:
+ if MAX_MIN_SCORE < 19200.5 then goto T24_3;
+ else goto T24_4;
+
+T24_3:
+ response = -0.00201482;
+ goto D24;
+
+T24_4:
+ response = 0.0209412;
+ goto D24;
+
+T24_5:
+ response = 0.00313704;
+ goto D24;
+
+T24_6:
+ response = 0.0198315;
+ goto D24;
+
+T24_7:
+ response = -0.0020926;
+ goto D24;
+
+D24:
+
+tnscore = tnscore + response;
+
+/* Tree 26 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N25_1:
+ if DAY_PD_HITS_DERIV < -8.5 then goto N25_2;
+ else goto N25_4;
+
+N25_2:
+ if SPORTS < 0.42 then goto N25_3;
+ else goto T25_3;
+
+N25_3:
+ if TOPSTORY < 0.05 then goto T25_1;
+ else goto T25_2;
+
+T25_1:
+ response = -0.00256178;
+ goto D25;
+
+T25_2:
+ response = 0.0069554;
+ goto D25;
+
+T25_3:
+ response = 0.0189865;
+ goto D25;
+
+N25_4:
+ if MAX_SCORE < 455757 then goto N25_5;
+ else goto T25_7;
+
+N25_5:
+ if LIFESTYLE < 0.13 then goto N25_6;
+ else goto T25_6;
+
+N25_6:
+ if DAY_WEEK_AVG_RATIO < 4.535 then goto T25_4;
+ else goto T25_5;
+
+T25_4:
+ response = -0.00125806;
+ goto D25;
+
+T25_5:
+ response = 0.00573954;
+ goto D25;
+
+T25_6:
+ response = -0.00869664;
+ goto D25;
+
+T25_7:
+ response = 0.00982766;
+ goto D25;
+
+D25:
+
+tnscore = tnscore + response;
+
+/* Tree 27 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N26_1:
+ if AVG_SCORE < 158740 then goto T26_1;
+ else goto N26_2;
+
+T26_1:
+ response = -0.00306382;
+ goto D26;
+
+N26_2:
+ if WEEKAVG < 0.93 then goto N26_3;
+ else goto N26_4;
+
+N26_3:
+ if TOPSTORY < 0.365 then goto T26_2;
+ else goto T26_3;
+
+T26_2:
+ response = -0.00140654;
+ goto D26;
+
+T26_3:
+ response = 0.00834836;
+ goto D26;
+
+N26_4:
+ if BUSINESS < 0.05 then goto N26_5;
+ else goto T26_7;
+
+N26_5:
+ if MAX_MIN_SCORE < 52064.2 then goto N26_6;
+ else goto T26_6;
+
+N26_6:
+ if MAX_MIN_RANK < 7 then goto T26_4;
+ else goto T26_5;
+
+T26_4:
+ response = 0.00487329;
+ goto D26;
+
+T26_5:
+ response = 0.0143334;
+ goto D26;
+
+T26_6:
+ response = -0.00637212;
+ goto D26;
+
+T26_7:
+ response = -0.000162153;
+ goto D26;
+
+D26:
+
+tnscore = tnscore + response;
+
+/* Tree 28 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N27_1:
+ if DAY_PD_HITS_DERIV < -4.5 then goto N27_2;
+ else goto N27_6;
+
+N27_2:
+ if LOCALNEWS < 0.355 then goto N27_3;
+ else goto T27_5;
+
+N27_3:
+ if HEALTH < 0.05 then goto N27_4;
+ else goto T27_4;
+
+N27_4:
+ if MAX_MIN_SCORE < 42320.2 then goto T27_1;
+ else goto N27_5;
+
+T27_1:
+ response = 0.000165828;
+ goto D27;
+
+N27_5:
+ if TWELVE_HOUR_WF < 0.0923295 then goto T27_2;
+ else goto T27_3;
+
+T27_2:
+ response = 0.00978237;
+ goto D27;
+
+T27_3:
+ response = -0.00925785;
+ goto D27;
+
+T27_4:
+ response = 0.0176032;
+ goto D27;
+
+T27_5:
+ response = -0.00980315;
+ goto D27;
+
+N27_6:
+ if DAY_WEEK_AVG_DERIV < 65.215 then goto T27_6;
+ else goto T27_7;
+
+T27_6:
+ response = -0.000940015;
+ goto D27;
+
+T27_7:
+ response = 0.0153051;
+ goto D27;
+
+D27:
+
+tnscore = tnscore + response;
+
+/* Tree 29 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N28_1:
+ if INTLNEWS < 0.53 then goto N28_2;
+ else goto N28_6;
+
+N28_2:
+ if DAY_PD_HITS_RATIO < 0.305 then goto N28_3;
+ else goto T28_5;
+
+N28_3:
+ if SPORTS < 0.115 then goto T28_1;
+ else goto N28_4;
+
+T28_1:
+ response = 0.000204707;
+ goto D28;
+
+N28_4:
+ if MAX_SCORE < 258205 then goto T28_2;
+ else goto N28_5;
+
+T28_2:
+ response = 0.00170055;
+ goto D28;
+
+N28_5:
+ if AVG_SCORE < 263393 then goto T28_3;
+ else goto T28_4;
+
+T28_3:
+ response = 0.0247726;
+ goto D28;
+
+T28_4:
+ response = 0.00690842;
+ goto D28;
+
+T28_5:
+ response = -0.00116708;
+ goto D28;
+
+N28_6:
+ if DAY_LW_DAY_HITS_DERIV < 6.5 then goto T28_6;
+ else goto T28_7;
+
+T28_6:
+ response = -5.66203e-05;
+ goto D28;
+
+T28_7:
+ response = 0.0136829;
+ goto D28;
+
+D28:
+
+tnscore = tnscore + response;
+
+/* Tree 30 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N29_1:
+ if TOPSTORY < 0.355 then goto N29_2;
+ else goto N29_6;
+
+N29_2:
+ if MAX_SCORE < 455757 then goto N29_3;
+ else goto N29_5;
+
+N29_3:
+ if PREV_DAY_HITS < 59.5 then goto N29_4;
+ else goto T29_3;
+
+N29_4:
+ if MIN_SCORE < 132399 then goto T29_1;
+ else goto T29_2;
+
+T29_1:
+ response = -0.00370024;
+ goto D29;
+
+T29_2:
+ response = -2.34946e-05;
+ goto D29;
+
+T29_3:
+ response = 0.0131047;
+ goto D29;
+
+N29_5:
+ if SUPERDUPER_AVG < 0.105 then goto T29_4;
+ else goto T29_5;
+
+T29_4:
+ response = -0.00138025;
+ goto D29;
+
+T29_5:
+ response = 0.0159936;
+ goto D29;
+
+N29_6:
+ if AVG_RANK < 9.55 then goto T29_6;
+ else goto T29_7;
+
+T29_6:
+ response = 0.00325951;
+ goto D29;
+
+T29_7:
+ response = 0.0248619;
+ goto D29;
+
+D29:
+
+tnscore = tnscore + response;
+
+/* Tree 31 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N30_1:
+ if TOPSTORY < 0.21 then goto N30_2;
+ else goto N30_5;
+
+N30_2:
+ if PREV_DAY_HITS < 40.5 then goto N30_3;
+ else goto T30_4;
+
+N30_3:
+ if DAY_WEEK_AVG_RATIO < 2.665 then goto T30_1;
+ else goto N30_4;
+
+T30_1:
+ response = -0.00132885;
+ goto D30;
+
+N30_4:
+ if AVG_SCORE < 321396 then goto T30_2;
+ else goto T30_3;
+
+T30_2:
+ response = 0.000375419;
+ goto D30;
+
+T30_3:
+ response = 0.0087578;
+ goto D30;
+
+T30_4:
+ response = 0.0103933;
+ goto D30;
+
+N30_5:
+ if MAX_SCORE < 258688 then goto T30_5;
+ else goto N30_6;
+
+T30_5:
+ response = -0.00128842;
+ goto D30;
+
+N30_6:
+ if DAY_LW_DAY_HITS_RATIO < 10.5 then goto T30_6;
+ else goto T30_7;
+
+T30_6:
+ response = 0.00789361;
+ goto D30;
+
+T30_7:
+ response = -0.00472212;
+ goto D30;
+
+D30:
+
+tnscore = tnscore + response;
+
+/* Tree 32 of 103 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N31_1:
+ if LIFESTYLE < 0.13 then goto N31_2;
+ else goto T31_7;
+
+N31_2:
+ if MAX_SCORE < 170767 then goto T31_1;
+ else goto N31_3;
+
+T31_1:
+ response = -0.00265193;
+ goto D31;
+
+N31_3:
+ if REGIONALNEWS < 0.225 then goto N31_4;
+ else goto T31_6;
+
+N31_4:
+ if INTLNEWS < 0.73 then goto N31_5;
+ else goto T31_5;
+
+N31_5:
+ if AVG_SCORE < 446461 then goto N31_6;
+ else goto T31_4;
+
+N31_6:
+ if DAY_LW_DAY_HITS_RATIO < 11.835 then goto T31_2;
+ else goto T31_3;
+
+T31_2:
+ response = 0.000302165;
+ goto D31;
+
+T31_3:
+ response = 0.00420729;
+ goto D31;
+
+T31_4:
+ response = 0.0104384;
+ goto D31;
+
+T31_5:
+ response = 0.0112014;
+ goto D31;
+
+T31_6:
+ response = -0.0150576;
+ goto D31;
+
+T31_7:
+ response = -0.00724807;
+ goto D31;
+
+D31:
+
+tnscore = tnscore + response;
+
+/* Tree 33 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N32_1:
+ if TOPSTORY < 0.21 then goto N32_2;
+ else goto N32_3;
+
+N32_2:
+ if LW_DAY_HITS < 0.5 then goto T32_1;
+ else goto T32_2;
+
+T32_1:
+ response = -0.000272826;
+ goto D32;
+
+T32_2:
+ response = -0.0037519;
+ goto D32;
+
+N32_3:
+ if MAX_SCORE < 249540 then goto T32_3;
+ else goto N32_4;
+
+T32_3:
+ response = -0.00257574;
+ goto D32;
+
+N32_4:
+ if DAY_WEEK_AVG_DERIV < 3.285 then goto T32_4;
+ else goto N32_5;
+
+T32_4:
+ response = 0.00890149;
+ goto D32;
+
+N32_5:
+ if BUSINESS < 0.05 then goto N32_6;
+ else goto T32_7;
+
+N32_6:
+ if EIGHT_HOUR_WF < 0.108586 then goto T32_5;
+ else goto T32_6;
+
+T32_5:
+ response = -0.00485603;
+ goto D32;
+
+T32_6:
+ response = 0.0137625;
+ goto D32;
+
+T32_7:
+ response = -0.0117843;
+ goto D32;
+
+D32:
+
+tnscore = tnscore + response;
+
+/* Tree 34 of 103 */
+/* N terminal nodes = 7, Depth = 4 */
+
+
+N33_1:
+ if BUSINESS < 0.05 then goto N33_2;
+ else goto N33_4;
+
+N33_2:
+ if PREV_DAY_HITS < 2.5 then goto N33_3;
+ else goto T33_3;
+
+N33_3:
+ if MAX_MIN_RANK < 9 then goto T33_1;
+ else goto T33_2;
+
+T33_1:
+ response = -0.000115002;
+ goto D33;
+
+T33_2:
+ response = 0.013627;
+ goto D33;
+
+T33_3:
+ response = 0.00426589;
+ goto D33;
+
+N33_4:
+ if SPORTS < 0.05 then goto N33_5;
+ else goto N33_6;
+
+N33_5:
+ if WEEKAVG < 1.07 then goto T33_4;
+ else goto T33_5;
+
+T33_4:
+ response = -0.00209775;
+ goto D33;
+
+T33_5:
+ response = 0.00207151;
+ goto D33;
+
+N33_6:
+ if MAX_SCORE < 282458 then goto T33_6;
+ else goto T33_7;
+
+T33_6:
+ response = -0.00363773;
+ goto D33;
+
+T33_7:
+ response = -0.0170095;
+ goto D33;
+
+D33:
+
+tnscore = tnscore + response;
+
+/* Tree 35 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N34_1:
+ if PREV_DAY_HITS < 6.5 then goto N34_2;
+ else goto N34_3;
+
+N34_2:
+ if INTLNEWS < 0.73 then goto T34_1;
+ else goto T34_2;
+
+T34_1:
+ response = -0.000840229;
+ goto D34;
+
+T34_2:
+ response = 0.0123079;
+ goto D34;
+
+N34_3:
+ if TWO_DAY_WF < 0.647854 then goto T34_3;
+ else goto N34_4;
+
+T34_3:
+ response = -0.00158583;
+ goto D34;
+
+N34_4:
+ if DAY_WEEK_AVG_RATIO < 0.525 then goto T34_4;
+ else goto N34_5;
+
+T34_4:
+ response = -0.00426295;
+ goto D34;
+
+N34_5:
+ if TWELVE_HOUR_WF < 0.0863095 then goto T34_5;
+ else goto N34_6;
+
+T34_5:
+ response = 0.010427;
+ goto D34;
+
+N34_6:
+ if WEEKAVG < 2.5 then goto T34_6;
+ else goto T34_7;
+
+T34_6:
+ response = -0.00797465;
+ goto D34;
+
+T34_7:
+ response = 0.00511912;
+ goto D34;
+
+D34:
+
+tnscore = tnscore + response;
+
+/* Tree 36 of 103 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N35_1:
+ if DAY_WEEK_AVG_DERIV < 43.215 then goto N35_2;
+ else goto T35_7;
+
+N35_2:
+ if MAX_SCORE < 171575 then goto T35_1;
+ else goto N35_3;
+
+T35_1:
+ response = -0.00279218;
+ goto D35;
+
+N35_3:
+ if INTLNEWS < 0.73 then goto N35_4;
+ else goto T35_6;
+
+N35_4:
+ if LIFESTYLE < 0.13 then goto N35_5;
+ else goto T35_5;
+
+N35_5:
+ if AVG_RANK < 5.29 then goto N35_6;
+ else goto T35_4;
+
+N35_6:
+ if AVG_RANK < 4.145 then goto T35_2;
+ else goto T35_3;
+
+T35_2:
+ response = -0.00292507;
+ goto D35;
+
+T35_3:
+ response = 0.0109271;
+ goto D35;
+
+T35_4:
+ response = 0.000253288;
+ goto D35;
+
+T35_5:
+ response = -0.00584756;
+ goto D35;
+
+T35_6:
+ response = 0.0132182;
+ goto D35;
+
+T35_7:
+ response = 0.00991648;
+ goto D35;
+
+D35:
+
+tnscore = tnscore + response;
+
+/* Tree 37 of 103 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N36_1:
+ if DAY_WEEK_AVG_RATIO < 0.305 then goto T36_1;
+ else goto N36_2;
+
+T36_1:
+ response = -0.00885189;
+ goto D36;
+
+N36_2:
+ if NATIONALNEWS < 0.105 then goto T36_2;
+ else goto N36_3;
+
+T36_2:
+ response = -0.000531735;
+ goto D36;
+
+N36_3:
+ if TWELVE_HOUR_WF < 0.685185 then goto N36_4;
+ else goto T36_7;
+
+N36_4:
+ if SPORTS < 0.465 then goto N36_5;
+ else goto T36_6;
+
+N36_5:
+ if ISTITLE_AVG < 0.105 then goto N36_6;
+ else goto T36_5;
+
+N36_6:
+ if DAY_PD_HITS_RATIO < 0.365 then goto T36_3;
+ else goto T36_4;
+
+T36_3:
+ response = -0.00269593;
+ goto D36;
+
+T36_4:
+ response = 0.0112221;
+ goto D36;
+
+T36_5:
+ response = 0.000819631;
+ goto D36;
+
+T36_6:
+ response = 0.0143634;
+ goto D36;
+
+T36_7:
+ response = -0.00725209;
+ goto D36;
+
+D36:
+
+tnscore = tnscore + response;
+
+/* Tree 38 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N37_1:
+ if AVG_SCORE < 340384 then goto N37_2;
+ else goto N37_3;
+
+N37_2:
+ if AVG_SCORE < 336375 then goto T37_1;
+ else goto T37_2;
+
+T37_1:
+ response = -0.000508552;
+ goto D37;
+
+T37_2:
+ response = -0.0113797;
+ goto D37;
+
+N37_3:
+ if MIN_SCORE < 326287 then goto T37_3;
+ else goto N37_4;
+
+T37_3:
+ response = 0.0209452;
+ goto D37;
+
+N37_4:
+ if ONE_DAY_WF < 0.567629 then goto T37_4;
+ else goto N37_5;
+
+T37_4:
+ response = -0.00150548;
+ goto D37;
+
+N37_5:
+ if ENTERTAINMENT < 0.05 then goto N37_6;
+ else goto T37_7;
+
+N37_6:
+ if AVG_SCORE < 375038 then goto T37_5;
+ else goto T37_6;
+
+T37_5:
+ response = 0.0210937;
+ goto D37;
+
+T37_6:
+ response = 0.00330692;
+ goto D37;
+
+T37_7:
+ response = -0.00420613;
+ goto D37;
+
+D37:
+
+tnscore = tnscore + response;
+
+/* Tree 39 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N38_1:
+ if AVG_SCORE < 245150 then goto T38_1;
+ else goto N38_2;
+
+T38_1:
+ response = -0.000756654;
+ goto D38;
+
+N38_2:
+ if FOUR_HOUR_WF < 0.436508 then goto N38_3;
+ else goto T38_7;
+
+N38_3:
+ if TWO_DAY_WF < 0.876894 then goto N38_4;
+ else goto N38_5;
+
+N38_4:
+ if DAY_HITS < 4.5 then goto T38_2;
+ else goto T38_3;
+
+T38_2:
+ response = 0.00184962;
+ goto D38;
+
+T38_3:
+ response = -0.00446764;
+ goto D38;
+
+N38_5:
+ if AVG_SCORE < 247846 then goto T38_4;
+ else goto N38_6;
+
+T38_4:
+ response = 0.0195391;
+ goto D38;
+
+N38_6:
+ if MAX_SCORE < 264008 then goto T38_5;
+ else goto T38_6;
+
+T38_5:
+ response = -0.0119329;
+ goto D38;
+
+T38_6:
+ response = 0.0045953;
+ goto D38;
+
+T38_7:
+ response = -0.0101769;
+ goto D38;
+
+D38:
+
+tnscore = tnscore + response;
+
+/* Tree 40 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N39_1:
+ if TOPSTORY < 0.39 then goto N39_2;
+ else goto N39_4;
+
+N39_2:
+ if SPORTS < 0.73 then goto T39_1;
+ else goto N39_3;
+
+T39_1:
+ response = -0.000167518;
+ goto D39;
+
+N39_3:
+ if WEEKAVG < 0.785 then goto T39_2;
+ else goto T39_3;
+
+T39_2:
+ response = 9.19437e-05;
+ goto D39;
+
+T39_3:
+ response = 0.00941928;
+ goto D39;
+
+N39_4:
+ if AVG_RANK < 9.55 then goto N39_5;
+ else goto T39_7;
+
+N39_5:
+ if AVG_RANK < 8.9 then goto N39_6;
+ else goto T39_6;
+
+N39_6:
+ if MAX_SCORE < 270912 then goto T39_4;
+ else goto T39_5;
+
+T39_4:
+ response = 0.0142439;
+ goto D39;
+
+T39_5:
+ response = -0.000762818;
+ goto D39;
+
+T39_6:
+ response = -0.00563315;
+ goto D39;
+
+T39_7:
+ response = 0.019371;
+ goto D39;
+
+D39:
+
+tnscore = tnscore + response;
+
+/* Tree 41 of 103 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N40_1:
+ if MAX_MIN_SCORE < 16050.8 then goto T40_1;
+ else goto N40_2;
+
+T40_1:
+ response = -0.00187676;
+ goto D40;
+
+N40_2:
+ if LW_DAY_HITS < 3.5 then goto N40_3;
+ else goto T40_7;
+
+N40_3:
+ if MAX_SCORE < 178349 then goto T40_2;
+ else goto N40_4;
+
+T40_2:
+ response = -0.00168833;
+ goto D40;
+
+N40_4:
+ if WEEKAVG < 0.93 then goto T40_3;
+ else goto N40_5;
+
+T40_3:
+ response = -9.59413e-05;
+ goto D40;
+
+N40_5:
+ if FOUR_HOUR_WF < 0.0415469 then goto T40_4;
+ else goto N40_6;
+
+T40_4:
+ response = 0.00437212;
+ goto D40;
+
+N40_6:
+ if MAX_MIN_SCORE < 26173.2 then goto T40_5;
+ else goto T40_6;
+
+T40_5:
+ response = 0.013711;
+ goto D40;
+
+T40_6:
+ response = -0.00373247;
+ goto D40;
+
+T40_7:
+ response = -0.00746144;
+ goto D40;
+
+D40:
+
+tnscore = tnscore + response;
+
+/* Tree 42 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N41_1:
+ if SPORTS < 0.73 then goto N41_2;
+ else goto N41_5;
+
+N41_2:
+ if INTLNEWS < 0.53 then goto T41_1;
+ else goto N41_3;
+
+T41_1:
+ response = -0.000662401;
+ goto D41;
+
+N41_3:
+ if TWO_DAY_WF < 0.564784 then goto T41_2;
+ else goto N41_4;
+
+T41_2:
+ response = 0.0155579;
+ goto D41;
+
+N41_4:
+ if DAY_WEEK_AVG_RATIO < 4.08 then goto T41_3;
+ else goto T41_4;
+
+T41_3:
+ response = -0.00298146;
+ goto D41;
+
+T41_4:
+ response = 0.015513;
+ goto D41;
+
+N41_5:
+ if DAY_PD_HITS_RATIO < 0.31 then goto T41_5;
+ else goto N41_6;
+
+T41_5:
+ response = 0.0153445;
+ goto D41;
+
+N41_6:
+ if SUPERDUPER_AVG < 0.155 then goto T41_6;
+ else goto T41_7;
+
+T41_6:
+ response = 0.00486013;
+ goto D41;
+
+T41_7:
+ response = -0.00508073;
+ goto D41;
+
+D41:
+
+tnscore = tnscore + response;
+
+/* Tree 43 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N42_1:
+ if MAX_MIN_RANK < 7 then goto T42_1;
+ else goto N42_2;
+
+T42_1:
+ response = -0.000214923;
+ goto D42;
+
+N42_2:
+ if ISTITLE_AVG < 0.55 then goto N42_3;
+ else goto N42_5;
+
+N42_3:
+ if MAX_MIN_SCORE < 41838 then goto N42_4;
+ else goto T42_4;
+
+N42_4:
+ if MAX_SCORE < 235701 then goto T42_2;
+ else goto T42_3;
+
+T42_2:
+ response = 0.00139705;
+ goto D42;
+
+T42_3:
+ response = 0.0257242;
+ goto D42;
+
+T42_4:
+ response = 0.00119704;
+ goto D42;
+
+N42_5:
+ if NATIONALNEWS < 0.115 then goto T42_5;
+ else goto N42_6;
+
+T42_5:
+ response = -0.00295678;
+ goto D42;
+
+N42_6:
+ if MIN_SCORE < 211652 then goto T42_6;
+ else goto T42_7;
+
+T42_6:
+ response = 0.00049411;
+ goto D42;
+
+T42_7:
+ response = 0.0246476;
+ goto D42;
+
+D42:
+
+tnscore = tnscore + response;
+
+/* Tree 44 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N43_1:
+ if SPORTS < 0.47 then goto N43_2;
+ else goto N43_3;
+
+N43_2:
+ if SPORTS < 0.05 then goto T43_1;
+ else goto T43_2;
+
+T43_1:
+ response = 0.000269559;
+ goto D43;
+
+T43_2:
+ response = -0.00314174;
+ goto D43;
+
+N43_3:
+ if MAX_SCORE < 187840 then goto T43_3;
+ else goto N43_4;
+
+T43_3:
+ response = -0.00191667;
+ goto D43;
+
+N43_4:
+ if ISABSTRACT_AVG < 0.415 then goto N43_5;
+ else goto N43_6;
+
+N43_5:
+ if MAX_MIN_RANK < 5 then goto T43_4;
+ else goto T43_5;
+
+T43_4:
+ response = -0.00316349;
+ goto D43;
+
+T43_5:
+ response = 0.00674809;
+ goto D43;
+
+N43_6:
+ if PREV_DAY_HITS < 2.5 then goto T43_6;
+ else goto T43_7;
+
+T43_6:
+ response = 0.00653246;
+ goto D43;
+
+T43_7:
+ response = 0.0230973;
+ goto D43;
+
+D43:
+
+tnscore = tnscore + response;
+
+/* Tree 45 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N44_1:
+ if NATIONALNEWS < 0.105 then goto T44_1;
+ else goto N44_2;
+
+T44_1:
+ response = -0.000806543;
+ goto D44;
+
+N44_2:
+ if DAY_PD_HITS_RATIO < 6.75 then goto N44_3;
+ else goto N44_6;
+
+N44_3:
+ if DAY_WEEK_AVG_RATIO < 3.505 then goto N44_4;
+ else goto T44_5;
+
+N44_4:
+ if MIN_RANK < 1 then goto T44_2;
+ else goto N44_5;
+
+T44_2:
+ response = 0.0183563;
+ goto D44;
+
+N44_5:
+ if NATIONALNEWS < 0.13 then goto T44_3;
+ else goto T44_4;
+
+T44_3:
+ response = 0.0095701;
+ goto D44;
+
+T44_4:
+ response = 0.00111755;
+ goto D44;
+
+T44_5:
+ response = 0.0178329;
+ goto D44;
+
+N44_6:
+ if MAX_MIN_SCORE < 42048.8 then goto T44_6;
+ else goto T44_7;
+
+T44_6:
+ response = 0.00161585;
+ goto D44;
+
+T44_7:
+ response = -0.0115306;
+ goto D44;
+
+D44:
+
+tnscore = tnscore + response;
+
+/* Tree 46 of 103 */
+/* N terminal nodes = 7, Depth = 4 */
+
+
+N45_1:
+ if DAY_LW_DAY_HITS_RATIO < 2.225 then goto N45_2;
+ else goto N45_4;
+
+N45_2:
+ if ONE_DAY_WF < 0.370833 then goto N45_3;
+ else goto T45_3;
+
+N45_3:
+ if ENTERTAINMENT < 0.415 then goto T45_1;
+ else goto T45_2;
+
+T45_1:
+ response = -0.00110057;
+ goto D45;
+
+T45_2:
+ response = 0.00599021;
+ goto D45;
+
+T45_3:
+ response = -0.0100266;
+ goto D45;
+
+N45_4:
+ if LOCALNEWS < 0.645 then goto N45_5;
+ else goto N45_6;
+
+N45_5:
+ if FOUR_HOUR_WF < 0.0402559 then goto T45_4;
+ else goto T45_5;
+
+T45_4:
+ response = 0.00156752;
+ goto D45;
+
+T45_5:
+ response = -0.00187101;
+ goto D45;
+
+N45_6:
+ if TWELVE_HOUR_WF < 0.183333 then goto T45_6;
+ else goto T45_7;
+
+T45_6:
+ response = -0.000756081;
+ goto D45;
+
+T45_7:
+ response = 0.0221542;
+ goto D45;
+
+D45:
+
+tnscore = tnscore + response;
+
+/* Tree 47 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N46_1:
+ if DAY_WEEK_AVG_DERIV < -0.5 then goto N46_2;
+ else goto N46_6;
+
+N46_2:
+ if TOPSTORY < 0.185 then goto N46_3;
+ else goto T46_5;
+
+N46_3:
+ if SPORTS < 0.775 then goto N46_4;
+ else goto T46_4;
+
+N46_4:
+ if INTLNEWS < 0.435 then goto T46_1;
+ else goto N46_5;
+
+T46_1:
+ response = -0.000858072;
+ goto D46;
+
+N46_5:
+ if TWO_DAY_WF < 0.607692 then goto T46_2;
+ else goto T46_3;
+
+T46_2:
+ response = 0.0133903;
+ goto D46;
+
+T46_3:
+ response = -0.000714716;
+ goto D46;
+
+T46_4:
+ response = 0.00949831;
+ goto D46;
+
+T46_5:
+ response = 0.00678994;
+ goto D46;
+
+N46_6:
+ if LIFESTYLE < 0.13 then goto T46_6;
+ else goto T46_7;
+
+T46_6:
+ response = -0.00020835;
+ goto D46;
+
+T46_7:
+ response = -0.00685168;
+ goto D46;
+
+D46:
+
+tnscore = tnscore + response;
+
+/* Tree 48 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N47_1:
+ if MAX_MIN_RANK < 3 then goto N47_2;
+ else goto N47_6;
+
+N47_2:
+ if TOPSTORY < 0.365 then goto N47_3;
+ else goto T47_5;
+
+N47_3:
+ if INTLNEWS < 0.585 then goto N47_4;
+ else goto T47_4;
+
+N47_4:
+ if DAY_WEEK_AVG_DERIV < 3.36 then goto T47_1;
+ else goto N47_5;
+
+T47_1:
+ response = 0.00152356;
+ goto D47;
+
+N47_5:
+ if MAX_MIN_SCORE < 41565 then goto T47_2;
+ else goto T47_3;
+
+T47_2:
+ response = -0.00224511;
+ goto D47;
+
+T47_3:
+ response = -0.0154069;
+ goto D47;
+
+T47_4:
+ response = 0.00771362;
+ goto D47;
+
+T47_5:
+ response = 0.0141704;
+ goto D47;
+
+N47_6:
+ if POLITICS < 0.27 then goto T47_6;
+ else goto T47_7;
+
+T47_6:
+ response = -0.000741127;
+ goto D47;
+
+T47_7:
+ response = 0.0123356;
+ goto D47;
+
+D47:
+
+tnscore = tnscore + response;
+
+/* Tree 49 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N48_1:
+ if REGIONALNEWS < 0.21 then goto N48_2;
+ else goto T48_7;
+
+N48_2:
+ if INTLNEWS < 0.415 then goto N48_3;
+ else goto N48_5;
+
+N48_3:
+ if NATIONALNEWS < 0.105 then goto T48_1;
+ else goto N48_4;
+
+T48_1:
+ response = -0.000772108;
+ goto D48;
+
+N48_4:
+ if TWO_DAY_WF < 0.587963 then goto T48_2;
+ else goto T48_3;
+
+T48_2:
+ response = -0.00126111;
+ goto D48;
+
+T48_3:
+ response = 0.00367856;
+ goto D48;
+
+N48_5:
+ if MAX_MIN_SCORE < 41771 then goto T48_4;
+ else goto N48_6;
+
+T48_4:
+ response = 0.000662317;
+ goto D48;
+
+N48_6:
+ if MAX_MIN_SCORE < 45378.8 then goto T48_5;
+ else goto T48_6;
+
+T48_5:
+ response = 0.0229089;
+ goto D48;
+
+T48_6:
+ response = 0.00361364;
+ goto D48;
+
+T48_7:
+ response = -0.0100665;
+ goto D48;
+
+D48:
+
+tnscore = tnscore + response;
+
+/* Tree 50 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N49_1:
+ if MAX_MIN_SCORE < 46045.5 then goto N49_2;
+ else goto N49_6;
+
+N49_2:
+ if ISTITLE_AVG < 0.415 then goto N49_3;
+ else goto N49_5;
+
+N49_3:
+ if ONE_DAY_WF < 0.0658009 then goto T49_1;
+ else goto N49_4;
+
+T49_1:
+ response = -0.00148948;
+ goto D49;
+
+N49_4:
+ if MIN_SCORE < 226178 then goto T49_2;
+ else goto T49_3;
+
+T49_2:
+ response = -0.000291172;
+ goto D49;
+
+T49_3:
+ response = 0.00500722;
+ goto D49;
+
+N49_5:
+ if MAX_MIN_SCORE < 45527.5 then goto T49_4;
+ else goto T49_5;
+
+T49_4:
+ response = -0.000953746;
+ goto D49;
+
+T49_5:
+ response = 0.00916764;
+ goto D49;
+
+N49_6:
+ if EIGHT_HOUR_WF < 0.0267094 then goto T49_6;
+ else goto T49_7;
+
+T49_6:
+ response = -0.000542017;
+ goto D49;
+
+T49_7:
+ response = -0.00644438;
+ goto D49;
+
+D49:
+
+tnscore = tnscore + response;
+
+/* Tree 51 of 103 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N50_1:
+ if DAY_PD_HITS_DERIV < -4.5 then goto N50_2;
+ else goto T50_7;
+
+N50_2:
+ if DAY_PD_HITS_RATIO < 0.16 then goto T50_1;
+ else goto N50_3;
+
+T50_1:
+ response = -0.00116809;
+ goto D50;
+
+N50_3:
+ if MAX_SCORE < 178149 then goto T50_2;
+ else goto N50_4;
+
+T50_2:
+ response = -0.00327374;
+ goto D50;
+
+N50_4:
+ if MIN_SCORE < 172046 then goto T50_3;
+ else goto N50_5;
+
+T50_3:
+ response = 0.017586;
+ goto D50;
+
+N50_5:
+ if MIN_SCORE < 221890 then goto T50_4;
+ else goto N50_6;
+
+T50_4:
+ response = -0.00260826;
+ goto D50;
+
+N50_6:
+ if MIN_SCORE < 227242 then goto T50_5;
+ else goto T50_6;
+
+T50_5:
+ response = 0.0209327;
+ goto D50;
+
+T50_6:
+ response = 0.00475773;
+ goto D50;
+
+T50_7:
+ response = -0.000492544;
+ goto D50;
+
+D50:
+
+tnscore = tnscore + response;
+
+/* Tree 52 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N51_1:
+ if DAY_LW_DAY_HITS_RATIO < 0.83 then goto N51_2;
+ else goto N51_3;
+
+N51_2:
+ if AVG_SCORE < 237778 then goto T51_1;
+ else goto T51_2;
+
+T51_1:
+ response = -0.00220195;
+ goto D51;
+
+T51_2:
+ response = -0.0171686;
+ goto D51;
+
+N51_3:
+ if SPORTS < 0.79 then goto N51_4;
+ else goto T51_7;
+
+N51_4:
+ if SPORTS < 0.05 then goto T51_3;
+ else goto N51_5;
+
+T51_3:
+ response = 0.000236122;
+ goto D51;
+
+N51_5:
+ if MAX_MIN_SCORE < 46031.8 then goto N51_6;
+ else goto T51_6;
+
+N51_6:
+ if MAX_MIN_SCORE < 7978 then goto T51_4;
+ else goto T51_5;
+
+T51_4:
+ response = -0.011323;
+ goto D51;
+
+T51_5:
+ response = -0.000609338;
+ goto D51;
+
+T51_6:
+ response = -0.0077154;
+ goto D51;
+
+T51_7:
+ response = 0.00365925;
+ goto D51;
+
+D51:
+
+tnscore = tnscore + response;
+
+/* Tree 53 of 103 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N52_1:
+ if DAY_WEEK_AVG_DERIV < -3.93 then goto T52_1;
+ else goto N52_2;
+
+T52_1:
+ response = 0.00918467;
+ goto D52;
+
+N52_2:
+ if NATIONALNEWS < 0.105 then goto T52_2;
+ else goto N52_3;
+
+T52_2:
+ response = -0.00058362;
+ goto D52;
+
+N52_3:
+ if EIGHT_HOUR_WF < 0.480769 then goto N52_4;
+ else goto T52_7;
+
+N52_4:
+ if ISTITLE_AVG < 0.155 then goto N52_5;
+ else goto T52_6;
+
+N52_5:
+ if DAY_PD_HITS_RATIO < 0.39 then goto T52_3;
+ else goto N52_6;
+
+T52_3:
+ response = -0.00366457;
+ goto D52;
+
+N52_6:
+ if DAY_HITS < 5.5 then goto T52_4;
+ else goto T52_5;
+
+T52_4:
+ response = 0.0162937;
+ goto D52;
+
+T52_5:
+ response = -0.00117921;
+ goto D52;
+
+T52_6:
+ response = 0.001006;
+ goto D52;
+
+T52_7:
+ response = -0.00659191;
+ goto D52;
+
+D52:
+
+tnscore = tnscore + response;
+
+/* Tree 54 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N53_1:
+ if SPORTS < 0.705 then goto T53_1;
+ else goto N53_2;
+
+T53_1:
+ response = -0.000556203;
+ goto D53;
+
+N53_2:
+ if MAX_SCORE < 165481 then goto T53_2;
+ else goto N53_3;
+
+T53_2:
+ response = -0.00498224;
+ goto D53;
+
+N53_3:
+ if SUPERDUPER_AVG < 0.315 then goto N53_4;
+ else goto T53_7;
+
+N53_4:
+ if DAY_WEEK_AVG_DERIV < 0.36 then goto N53_5;
+ else goto N53_6;
+
+N53_5:
+ if AVG_RANK < 5.73 then goto T53_3;
+ else goto T53_4;
+
+T53_3:
+ response = 0.00571545;
+ goto D53;
+
+T53_4:
+ response = 0.0237979;
+ goto D53;
+
+N53_6:
+ if DAY_PD_HITS_DERIV < 3.5 then goto T53_5;
+ else goto T53_6;
+
+T53_5:
+ response = -0.000591932;
+ goto D53;
+
+T53_6:
+ response = 0.011012;
+ goto D53;
+
+T53_7:
+ response = -0.00679759;
+ goto D53;
+
+D53:
+
+tnscore = tnscore + response;
+
+/* Tree 55 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N54_1:
+ if REGIONALNEWS < 0.21 then goto N54_2;
+ else goto T54_7;
+
+N54_2:
+ if MISC < 0.105 then goto N54_3;
+ else goto T54_6;
+
+N54_3:
+ if TWO_DAY_WF < 0.492284 then goto N54_4;
+ else goto T54_5;
+
+N54_4:
+ if FOUR_HOUR_WF < 0.00462963 then goto N54_5;
+ else goto N54_6;
+
+N54_5:
+ if MAX_MIN_SCORE < 37344.8 then goto T54_1;
+ else goto T54_2;
+
+T54_1:
+ response = -0.00252831;
+ goto D54;
+
+T54_2:
+ response = 0.00347229;
+ goto D54;
+
+N54_6:
+ if MIN_SCORE < 216377 then goto T54_3;
+ else goto T54_4;
+
+T54_3:
+ response = -0.00279429;
+ goto D54;
+
+T54_4:
+ response = -0.0169557;
+ goto D54;
+
+T54_5:
+ response = 0.00040157;
+ goto D54;
+
+T54_6:
+ response = 0.00880965;
+ goto D54;
+
+T54_7:
+ response = -0.010143;
+ goto D54;
+
+D54:
+
+tnscore = tnscore + response;
+
+/* Tree 56 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N55_1:
+ if DAY_LW_DAY_HITS_RATIO < 0.645 then goto T55_1;
+ else goto N55_2;
+
+T55_1:
+ response = -0.00704886;
+ goto D55;
+
+N55_2:
+ if AVG_SCORE < 291527 then goto N55_3;
+ else goto N55_5;
+
+N55_3:
+ if MAX_SCORE < 287802 then goto T55_2;
+ else goto N55_4;
+
+T55_2:
+ response = 0.000191564;
+ goto D55;
+
+N55_4:
+ if ISTITLE_AVG < 0.95 then goto T55_3;
+ else goto T55_4;
+
+T55_3:
+ response = 0.0133402;
+ goto D55;
+
+T55_4:
+ response = 0.000516175;
+ goto D55;
+
+N55_5:
+ if AVG_SCORE < 317516 then goto T55_5;
+ else goto N55_6;
+
+T55_5:
+ response = -0.0121501;
+ goto D55;
+
+N55_6:
+ if SPORTS < 0.315 then goto T55_6;
+ else goto T55_7;
+
+T55_6:
+ response = -0.00154239;
+ goto D55;
+
+T55_7:
+ response = 0.00704715;
+ goto D55;
+
+D55:
+
+tnscore = tnscore + response;
+
+/* Tree 57 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N56_1:
+ if INTLNEWS < 0.705 then goto N56_2;
+ else goto T56_7;
+
+N56_2:
+ if DAY_PD_HITS_RATIO < 5.045 then goto N56_3;
+ else goto N56_5;
+
+N56_3:
+ if WEEKAVG < 0.785 then goto T56_1;
+ else goto N56_4;
+
+T56_1:
+ response = -0.00094492;
+ goto D56;
+
+N56_4:
+ if ONE_DAY_WF < 0.644009 then goto T56_2;
+ else goto T56_3;
+
+T56_2:
+ response = 0.000962924;
+ goto D56;
+
+T56_3:
+ response = 0.00910092;
+ goto D56;
+
+N56_5:
+ if LOCALNEWS < 0.295 then goto N56_6;
+ else goto T56_6;
+
+N56_6:
+ if ISTITLE_AVG < 0.185 then goto T56_4;
+ else goto T56_5;
+
+T56_4:
+ response = -0.0103214;
+ goto D56;
+
+T56_5:
+ response = -0.0014478;
+ goto D56;
+
+T56_6:
+ response = 0.0144627;
+ goto D56;
+
+T56_7:
+ response = 0.00968091;
+ goto D56;
+
+D56:
+
+tnscore = tnscore + response;
+
+/* Tree 58 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N57_1:
+ if ONE_DAY_WF < 0.328096 then goto N57_2;
+ else goto T57_7;
+
+N57_2:
+ if ONE_DAY_WF < 0.246773 then goto N57_3;
+ else goto N57_4;
+
+N57_3:
+ if TOPSTORY < 0.47 then goto T57_1;
+ else goto T57_2;
+
+T57_1:
+ response = 0.000196656;
+ goto D57;
+
+T57_2:
+ response = 0.0166177;
+ goto D57;
+
+N57_4:
+ if MIN_SCORE < 224862 then goto T57_3;
+ else goto N57_5;
+
+T57_3:
+ response = 0.00164784;
+ goto D57;
+
+N57_5:
+ if AVG_SCORE < 264251 then goto T57_4;
+ else goto N57_6;
+
+T57_4:
+ response = 0.0251889;
+ goto D57;
+
+N57_6:
+ if AVG_SCORE < 339362 then goto T57_5;
+ else goto T57_6;
+
+T57_5:
+ response = -0.00430853;
+ goto D57;
+
+T57_6:
+ response = 0.0156826;
+ goto D57;
+
+T57_7:
+ response = -0.000883446;
+ goto D57;
+
+D57:
+
+tnscore = tnscore + response;
+
+/* Tree 59 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N58_1:
+ if MISC < 0.105 then goto N58_2;
+ else goto T58_7;
+
+N58_2:
+ if MIN_SCORE < 445730 then goto N58_3;
+ else goto T58_6;
+
+N58_3:
+ if MIN_SCORE < 371741 then goto N58_4;
+ else goto N58_5;
+
+N58_4:
+ if REGIONALNEWS < 0.21 then goto T58_1;
+ else goto T58_2;
+
+T58_1:
+ response = 0.000514634;
+ goto D58;
+
+T58_2:
+ response = -0.0078218;
+ goto D58;
+
+N58_5:
+ if INTLNEWS < 0.145 then goto N58_6;
+ else goto T58_5;
+
+N58_6:
+ if ISTITLE_AVG < 0.105 then goto T58_3;
+ else goto T58_4;
+
+T58_3:
+ response = 0.00884708;
+ goto D58;
+
+T58_4:
+ response = -0.00706471;
+ goto D58;
+
+T58_5:
+ response = -0.0155934;
+ goto D58;
+
+T58_6:
+ response = 0.00871224;
+ goto D58;
+
+T58_7:
+ response = 0.0109724;
+ goto D58;
+
+D58:
+
+tnscore = tnscore + response;
+
+/* Tree 60 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N59_1:
+ if AVG_RANK < 5.07 then goto N59_2;
+ else goto N59_3;
+
+N59_2:
+ if MIN_SCORE < 237760 then goto T59_1;
+ else goto T59_2;
+
+T59_1:
+ response = -8.82014e-05;
+ goto D59;
+
+T59_2:
+ response = 0.0155208;
+ goto D59;
+
+N59_3:
+ if ENTERTAINMENT < 0.05 then goto N59_4;
+ else goto T59_7;
+
+N59_4:
+ if MAX_MIN_SCORE < 14449.2 then goto N59_5;
+ else goto N59_6;
+
+N59_5:
+ if TOPSTORY < 0.225 then goto T59_3;
+ else goto T59_4;
+
+T59_3:
+ response = -0.00152853;
+ goto D59;
+
+T59_4:
+ response = -0.0123906;
+ goto D59;
+
+N59_6:
+ if TOPSTORY < 0.365 then goto T59_5;
+ else goto T59_6;
+
+T59_5:
+ response = 0.000330374;
+ goto D59;
+
+T59_6:
+ response = 0.00622094;
+ goto D59;
+
+T59_7:
+ response = -0.0026428;
+ goto D59;
+
+D59:
+
+tnscore = tnscore + response;
+
+/* Tree 61 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N60_1:
+ if LW_DAY_HITS < 0.5 then goto N60_2;
+ else goto N60_3;
+
+N60_2:
+ if MISC < 0.105 then goto T60_1;
+ else goto T60_2;
+
+T60_1:
+ response = 0.000226343;
+ goto D60;
+
+T60_2:
+ response = 0.00938518;
+ goto D60;
+
+N60_3:
+ if MAX_SCORE < 254898 then goto N60_4;
+ else goto N60_5;
+
+N60_4:
+ if MAX_SCORE < 249948 then goto T60_3;
+ else goto T60_4;
+
+T60_3:
+ response = -0.0010896;
+ goto D60;
+
+T60_4:
+ response = 0.0106866;
+ goto D60;
+
+N60_5:
+ if ONE_DAY_WF < 0.537727 then goto N60_6;
+ else goto T60_7;
+
+N60_6:
+ if MAX_MIN_RANK < 5 then goto T60_5;
+ else goto T60_6;
+
+T60_5:
+ response = -0.000668387;
+ goto D60;
+
+T60_6:
+ response = -0.0113993;
+ goto D60;
+
+T60_7:
+ response = -0.0159024;
+ goto D60;
+
+D60:
+
+tnscore = tnscore + response;
+
+/* Tree 62 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N61_1:
+ if WEEKAVG < 0.93 then goto N61_2;
+ else goto N61_3;
+
+N61_2:
+ if MIN_RANK < 1 then goto T61_1;
+ else goto T61_2;
+
+T61_1:
+ response = 0.0105569;
+ goto D61;
+
+T61_2:
+ response = -0.00122424;
+ goto D61;
+
+N61_3:
+ if HEALTH < 0.105 then goto N61_4;
+ else goto N61_6;
+
+N61_4:
+ if SPORTS < 0.47 then goto N61_5;
+ else goto T61_5;
+
+N61_5:
+ if NATIONALNEWS < 0.105 then goto T61_3;
+ else goto T61_4;
+
+T61_3:
+ response = -0.00127666;
+ goto D61;
+
+T61_4:
+ response = 0.00259145;
+ goto D61;
+
+T61_5:
+ response = 0.00361046;
+ goto D61;
+
+N61_6:
+ if PREV_DAY_HITS < 4.5 then goto T61_6;
+ else goto T61_7;
+
+T61_6:
+ response = -0.00160398;
+ goto D61;
+
+T61_7:
+ response = 0.0210051;
+ goto D61;
+
+D61:
+
+tnscore = tnscore + response;
+
+/* Tree 63 of 103 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N62_1:
+ if MAX_MIN_SCORE < 62647.2 then goto N62_2;
+ else goto T62_7;
+
+N62_2:
+ if ISTITLE_AVG < 0.05 then goto N62_3;
+ else goto T62_6;
+
+N62_3:
+ if MAX_MIN_SCORE < 45894.2 then goto N62_4;
+ else goto T62_5;
+
+N62_4:
+ if DAY_PD_HITS_RATIO < 0.675 then goto N62_5;
+ else goto T62_4;
+
+N62_5:
+ if MAX_MIN_SCORE < 41917 then goto N62_6;
+ else goto T62_3;
+
+N62_6:
+ if MIN_SCORE < 227128 then goto T62_1;
+ else goto T62_2;
+
+T62_1:
+ response = 0.00157123;
+ goto D62;
+
+T62_2:
+ response = -0.0091657;
+ goto D62;
+
+T62_3:
+ response = 0.00650689;
+ goto D62;
+
+T62_4:
+ response = 0.000734575;
+ goto D62;
+
+T62_5:
+ response = -0.00919073;
+ goto D62;
+
+T62_6:
+ response = -0.000254308;
+ goto D62;
+
+T62_7:
+ response = 0.0114536;
+ goto D62;
+
+D62:
+
+tnscore = tnscore + response;
+
+/* Tree 64 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N63_1:
+ if BUSINESS < 0.05 then goto N63_2;
+ else goto T63_7;
+
+N63_2:
+ if DAY_WEEK_AVG_RATIO < 0.505 then goto N63_3;
+ else goto T63_6;
+
+N63_3:
+ if LOCALNEWS < 0.115 then goto N63_4;
+ else goto T63_5;
+
+N63_4:
+ if DAY_WEEK_AVG_RATIO < 0.36 then goto N63_5;
+ else goto N63_6;
+
+N63_5:
+ if DAY_WEEK_AVG_RATIO < 0.275 then goto T63_1;
+ else goto T63_2;
+
+T63_1:
+ response = 0.0111617;
+ goto D63;
+
+T63_2:
+ response = -0.00883723;
+ goto D63;
+
+N63_6:
+ if DAY_PD_HITS_RATIO < 0.185 then goto T63_3;
+ else goto T63_4;
+
+T63_3:
+ response = 0.0229969;
+ goto D63;
+
+T63_4:
+ response = -0.000984798;
+ goto D63;
+
+T63_5:
+ response = -0.00222586;
+ goto D63;
+
+T63_6:
+ response = 0.000255018;
+ goto D63;
+
+T63_7:
+ response = -0.00124642;
+ goto D63;
+
+D63:
+
+tnscore = tnscore + response;
+
+/* Tree 65 of 103 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N64_1:
+ if DAY_LW_DAY_HITS_RATIO < 0.645 then goto T64_1;
+ else goto N64_2;
+
+T64_1:
+ response = -0.0067218;
+ goto D64;
+
+N64_2:
+ if WEEKAVG < 26.715 then goto N64_3;
+ else goto T64_7;
+
+N64_3:
+ if AVG_RANK < 9.225 then goto T64_2;
+ else goto N64_4;
+
+T64_2:
+ response = 0.000158773;
+ goto D64;
+
+N64_4:
+ if TWO_DAY_WF < 0.976136 then goto T64_3;
+ else goto N64_5;
+
+T64_3:
+ response = 0.00457414;
+ goto D64;
+
+N64_5:
+ if DAY_WEEK_AVG_DERIV < 5.785 then goto T64_4;
+ else goto N64_6;
+
+T64_4:
+ response = -0.00480445;
+ goto D64;
+
+N64_6:
+ if DAY_LW_DAY_HITS_DERIV < 27.5 then goto T64_5;
+ else goto T64_6;
+
+T64_5:
+ response = 0.016426;
+ goto D64;
+
+T64_6:
+ response = -0.00308634;
+ goto D64;
+
+T64_7:
+ response = -0.00901968;
+ goto D64;
+
+D64:
+
+tnscore = tnscore + response;
+
+/* Tree 66 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N65_1:
+ if LOCALNEWS < 0.295 then goto N65_2;
+ else goto N65_6;
+
+N65_2:
+ if LOCALNEWS < 0.155 then goto T65_1;
+ else goto N65_3;
+
+T65_1:
+ response = -0.000303017;
+ goto D65;
+
+N65_3:
+ if MIN_SCORE < 222112 then goto T65_2;
+ else goto N65_4;
+
+T65_2:
+ response = 0.000210471;
+ goto D65;
+
+N65_4:
+ if ENTERTAINMENT < 0.05 then goto N65_5;
+ else goto T65_5;
+
+N65_5:
+ if ISTITLE_AVG < 0.185 then goto T65_3;
+ else goto T65_4;
+
+T65_3:
+ response = 0.0201204;
+ goto D65;
+
+T65_4:
+ response = 0.00543219;
+ goto D65;
+
+T65_5:
+ response = -0.00443157;
+ goto D65;
+
+N65_6:
+ if DAY_PD_HITS_DERIV < 6.5 then goto T65_6;
+ else goto T65_7;
+
+T65_6:
+ response = -0.00312853;
+ goto D65;
+
+T65_7:
+ response = 0.0100428;
+ goto D65;
+
+D65:
+
+tnscore = tnscore + response;
+
+/* Tree 67 of 103 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N66_1:
+ if DAY_PD_HITS_RATIO < 32.5 then goto N66_2;
+ else goto T66_7;
+
+N66_2:
+ if DAY_PD_HITS_DERIV < 20.5 then goto N66_3;
+ else goto T66_6;
+
+N66_3:
+ if MAX_MIN_RANK < 7 then goto T66_1;
+ else goto N66_4;
+
+T66_1:
+ response = -0.000109244;
+ goto D66;
+
+N66_4:
+ if DAY_LW_DAY_HITS_DERIV < 9.5 then goto N66_5;
+ else goto T66_5;
+
+N66_5:
+ if MIN_SCORE < 215422 then goto T66_2;
+ else goto N66_6;
+
+T66_2:
+ response = 0.000454762;
+ goto D66;
+
+N66_6:
+ if ISABSTRACT_AVG < 0.225 then goto T66_3;
+ else goto T66_4;
+
+T66_3:
+ response = 0.0146831;
+ goto D66;
+
+T66_4:
+ response = -0.000790241;
+ goto D66;
+
+T66_5:
+ response = -0.00837207;
+ goto D66;
+
+T66_6:
+ response = -0.00476979;
+ goto D66;
+
+T66_7:
+ response = 0.0102445;
+ goto D66;
+
+D66:
+
+tnscore = tnscore + response;
+
+/* Tree 68 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N67_1:
+ if MAX_MIN_SCORE < 45353.5 then goto N67_2;
+ else goto N67_4;
+
+N67_2:
+ if MAX_MIN_SCORE < 44594.8 then goto N67_3;
+ else goto T67_3;
+
+N67_3:
+ if MISC < 0.105 then goto T67_1;
+ else goto T67_2;
+
+T67_1:
+ response = -9.71934e-05;
+ goto D67;
+
+T67_2:
+ response = 0.00961124;
+ goto D67;
+
+T67_3:
+ response = 0.00683718;
+ goto D67;
+
+N67_4:
+ if TWELVE_HOUR_WF < 0.0451153 then goto N67_5;
+ else goto T67_7;
+
+N67_5:
+ if TOPSTORY < 0.05 then goto N67_6;
+ else goto T67_6;
+
+N67_6:
+ if ISTITLE_AVG < 0.79 then goto T67_4;
+ else goto T67_5;
+
+T67_4:
+ response = -0.0112723;
+ goto D67;
+
+T67_5:
+ response = 0.00207341;
+ goto D67;
+
+T67_6:
+ response = 0.00460994;
+ goto D67;
+
+T67_7:
+ response = -0.00460433;
+ goto D67;
+
+D67:
+
+tnscore = tnscore + response;
+
+/* Tree 69 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N68_1:
+ if SPORTS < 0.47 then goto N68_2;
+ else goto T68_7;
+
+N68_2:
+ if DAY_PD_HITS_RATIO < 4.725 then goto N68_3;
+ else goto N68_6;
+
+N68_3:
+ if DAY_WEEK_AVG_RATIO < 2.74 then goto T68_1;
+ else goto N68_4;
+
+T68_1:
+ response = -0.000301001;
+ goto D68;
+
+N68_4:
+ if INTLNEWS < 0.415 then goto N68_5;
+ else goto T68_4;
+
+N68_5:
+ if MIN_SCORE < 337522 then goto T68_2;
+ else goto T68_3;
+
+T68_2:
+ response = 0.0052542;
+ goto D68;
+
+T68_3:
+ response = 0.0240026;
+ goto D68;
+
+T68_4:
+ response = -0.00664368;
+ goto D68;
+
+N68_6:
+ if DAY_WEEK_AVG_RATIO < 4.15 then goto T68_5;
+ else goto T68_6;
+
+T68_5:
+ response = -0.00601665;
+ goto D68;
+
+T68_6:
+ response = 0.00254873;
+ goto D68;
+
+T68_7:
+ response = 0.00244646;
+ goto D68;
+
+D68:
+
+tnscore = tnscore + response;
+
+/* Tree 70 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N69_1:
+ if TOPSTORY < 0.47 then goto N69_2;
+ else goto N69_6;
+
+N69_2:
+ if LW_DAY_HITS < 0.5 then goto T69_1;
+ else goto N69_3;
+
+T69_1:
+ response = 0.00051525;
+ goto D69;
+
+N69_3:
+ if ONE_DAY_WF < 0.398413 then goto N69_4;
+ else goto N69_5;
+
+N69_4:
+ if DAY_PD_HITS_RATIO < 0.61 then goto T69_2;
+ else goto T69_3;
+
+T69_2:
+ response = -0.00298415;
+ goto D69;
+
+T69_3:
+ response = 0.00362271;
+ goto D69;
+
+N69_5:
+ if AVG_SCORE < 242552 then goto T69_4;
+ else goto T69_5;
+
+T69_4:
+ response = -0.004241;
+ goto D69;
+
+T69_5:
+ response = -0.0152224;
+ goto D69;
+
+N69_6:
+ if MAX_SCORE < 264598 then goto T69_6;
+ else goto T69_7;
+
+T69_6:
+ response = 0.0026109;
+ goto D69;
+
+T69_7:
+ response = -0.013849;
+ goto D69;
+
+D69:
+
+tnscore = tnscore + response;
+
+/* Tree 71 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N70_1:
+ if PREV_DAY_HITS < 59.5 then goto N70_2;
+ else goto T70_7;
+
+N70_2:
+ if MAX_SCORE < 455608 then goto N70_3;
+ else goto N70_6;
+
+N70_3:
+ if DAY_PD_HITS_DERIV < 64 then goto N70_4;
+ else goto T70_4;
+
+N70_4:
+ if SUPERDUPER_AVG < 0.725 then goto N70_5;
+ else goto T70_3;
+
+N70_5:
+ if MIN_SCORE < 132886 then goto T70_1;
+ else goto T70_2;
+
+T70_1:
+ response = -0.0021355;
+ goto D70;
+
+T70_2:
+ response = 0.000253917;
+ goto D70;
+
+T70_3:
+ response = -0.0118492;
+ goto D70;
+
+T70_4:
+ response = 0.00977612;
+ goto D70;
+
+N70_6:
+ if INTLNEWS < 0.145 then goto T70_5;
+ else goto T70_6;
+
+T70_5:
+ response = 0.0130923;
+ goto D70;
+
+T70_6:
+ response = -0.00429049;
+ goto D70;
+
+T70_7:
+ response = 0.0115797;
+ goto D70;
+
+D70:
+
+tnscore = tnscore + response;
+
+/* Tree 72 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N71_1:
+ if DAY_PD_HITS_RATIO < 0.115 then goto T71_1;
+ else goto N71_2;
+
+T71_1:
+ response = -0.00460369;
+ goto D71;
+
+N71_2:
+ if DAY_WEEK_AVG_RATIO < 0.455 then goto T71_2;
+ else goto N71_3;
+
+T71_2:
+ response = 0.00654726;
+ goto D71;
+
+N71_3:
+ if ISTITLE_AVG < 0.565 then goto N71_4;
+ else goto N71_6;
+
+N71_4:
+ if DAY_PD_HITS_DERIV < 38 then goto N71_5;
+ else goto T71_5;
+
+N71_5:
+ if DAY_WEEK_AVG_DERIV < -0.785 then goto T71_3;
+ else goto T71_4;
+
+T71_3:
+ response = -0.00572704;
+ goto D71;
+
+T71_4:
+ response = 0.00211848;
+ goto D71;
+
+T71_5:
+ response = -0.0128642;
+ goto D71;
+
+N71_6:
+ if MAX_SCORE < 261066 then goto T71_6;
+ else goto T71_7;
+
+T71_6:
+ response = 0.000164546;
+ goto D71;
+
+T71_7:
+ response = -0.00330215;
+ goto D71;
+
+D71:
+
+tnscore = tnscore + response;
+
+/* Tree 73 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N72_1:
+ if DAY_WEEK_AVG_DERIV < 8.36 then goto N72_2;
+ else goto N72_5;
+
+N72_2:
+ if AVG_SCORE < 266020 then goto T72_1;
+ else goto N72_3;
+
+T72_1:
+ response = 0.000544955;
+ goto D72;
+
+N72_3:
+ if TOPSTORY < 0.21 then goto N72_4;
+ else goto T72_4;
+
+N72_4:
+ if MAX_SCORE < 343351 then goto T72_2;
+ else goto T72_3;
+
+T72_2:
+ response = -0.00739666;
+ goto D72;
+
+T72_3:
+ response = -0.00159552;
+ goto D72;
+
+T72_4:
+ response = 0.00316353;
+ goto D72;
+
+N72_5:
+ if EIGHT_HOUR_WF < 0.117802 then goto N72_6;
+ else goto T72_7;
+
+N72_6:
+ if AVG_SCORE < 264897 then goto T72_5;
+ else goto T72_6;
+
+T72_5:
+ response = 0.00293963;
+ goto D72;
+
+T72_6:
+ response = 0.016815;
+ goto D72;
+
+T72_7:
+ response = -0.000481606;
+ goto D72;
+
+D72:
+
+tnscore = tnscore + response;
+
+/* Tree 74 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N73_1:
+ if AVG_SCORE < 446571 then goto N73_2;
+ else goto T73_7;
+
+N73_2:
+ if DAY_WEEK_AVG_RATIO < 4.59 then goto N73_3;
+ else goto N73_6;
+
+N73_3:
+ if MAX_SCORE < 390560 then goto T73_1;
+ else goto N73_4;
+
+T73_1:
+ response = -8.80057e-06;
+ goto D73;
+
+N73_4:
+ if PREV_DAY_HITS < 2.5 then goto N73_5;
+ else goto T73_4;
+
+N73_5:
+ if INTLNEWS < 0.135 then goto T73_2;
+ else goto T73_3;
+
+T73_2:
+ response = -0.00440415;
+ goto D73;
+
+T73_3:
+ response = -0.0221297;
+ goto D73;
+
+T73_4:
+ response = 0.000810032;
+ goto D73;
+
+N73_6:
+ if ENTERTAINMENT < 0.15 then goto T73_5;
+ else goto T73_6;
+
+T73_5:
+ response = 0.00319307;
+ goto D73;
+
+T73_6:
+ response = 0.0160496;
+ goto D73;
+
+T73_7:
+ response = 0.00840475;
+ goto D73;
+
+D73:
+
+tnscore = tnscore + response;
+
+/* Tree 75 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N74_1:
+ if MAX_SCORE < 390244 then goto N74_2;
+ else goto N74_5;
+
+N74_2:
+ if AVG_SCORE < 360833 then goto N74_3;
+ else goto T74_4;
+
+N74_3:
+ if AVG_SCORE < 352194 then goto T74_1;
+ else goto N74_4;
+
+T74_1:
+ response = 0.000175857;
+ goto D74;
+
+N74_4:
+ if DAY_WEEK_AVG_DERIV < 0.855 then goto T74_2;
+ else goto T74_3;
+
+T74_2:
+ response = -0.0187021;
+ goto D74;
+
+T74_3:
+ response = 0.00356778;
+ goto D74;
+
+T74_4:
+ response = 0.00780008;
+ goto D74;
+
+N74_5:
+ if INTLNEWS < 0.145 then goto N74_6;
+ else goto T74_7;
+
+N74_6:
+ if BUSINESS < 0.135 then goto T74_5;
+ else goto T74_6;
+
+T74_5:
+ response = 0.00421198;
+ goto D74;
+
+T74_6:
+ response = -0.0102414;
+ goto D74;
+
+T74_7:
+ response = -0.00922112;
+ goto D74;
+
+D74:
+
+tnscore = tnscore + response;
+
+/* Tree 76 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N75_1:
+ if ONE_DAY_WF < 0.605556 then goto T75_1;
+ else goto N75_2;
+
+T75_1:
+ response = -0.000461057;
+ goto D75;
+
+N75_2:
+ if MIN_SCORE < 332098 then goto N75_3;
+ else goto N75_5;
+
+N75_3:
+ if MAX_SCORE < 355711 then goto N75_4;
+ else goto T75_4;
+
+N75_4:
+ if WEEKAVG < 1.5 then goto T75_2;
+ else goto T75_3;
+
+T75_2:
+ response = 2.20435e-05;
+ goto D75;
+
+T75_3:
+ response = 0.00562666;
+ goto D75;
+
+T75_4:
+ response = -0.0153817;
+ goto D75;
+
+N75_5:
+ if INTLNEWS < 0.145 then goto T75_5;
+ else goto N75_6;
+
+T75_5:
+ response = 0.018971;
+ goto D75;
+
+N75_6:
+ if SUPERDUPER_AVG < 0.275 then goto T75_6;
+ else goto T75_7;
+
+T75_6:
+ response = -0.00766663;
+ goto D75;
+
+T75_7:
+ response = 0.0121696;
+ goto D75;
+
+D75:
+
+tnscore = tnscore + response;
+
+/* Tree 77 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N76_1:
+ if ENTERTAINMENT < 0.05 then goto T76_1;
+ else goto N76_2;
+
+T76_1:
+ response = 2.07331e-05;
+ goto D76;
+
+N76_2:
+ if ENTERTAINMENT < 0.415 then goto N76_3;
+ else goto N76_4;
+
+N76_3:
+ if AVG_SCORE < 237084 then goto T76_2;
+ else goto T76_3;
+
+T76_2:
+ response = -0.00148349;
+ goto D76;
+
+T76_3:
+ response = -0.00781033;
+ goto D76;
+
+N76_4:
+ if AVG_SCORE < 340606 then goto N76_5;
+ else goto N76_6;
+
+N76_5:
+ if ONE_DAY_WF < 0.015625 then goto T76_4;
+ else goto T76_5;
+
+T76_4:
+ response = 0.0143289;
+ goto D76;
+
+T76_5:
+ response = 0.00105025;
+ goto D76;
+
+N76_6:
+ if WEEKAVG < 0.785 then goto T76_6;
+ else goto T76_7;
+
+T76_6:
+ response = 0.00610972;
+ goto D76;
+
+T76_7:
+ response = -0.020138;
+ goto D76;
+
+D76:
+
+tnscore = tnscore + response;
+
+/* Tree 78 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N77_1:
+ if DAY_LW_DAY_HITS_DERIV < 17.5 then goto N77_2;
+ else goto N77_3;
+
+N77_2:
+ if DAY_LW_DAY_HITS_RATIO < 16.5 then goto T77_1;
+ else goto T77_2;
+
+T77_1:
+ response = 0.000188349;
+ goto D77;
+
+T77_2:
+ response = 0.0147086;
+ goto D77;
+
+N77_3:
+ if TWO_DAY_WF < 0.743223 then goto N77_4;
+ else goto N77_5;
+
+N77_4:
+ if MIN_SCORE < 212511 then goto T77_3;
+ else goto T77_4;
+
+T77_3:
+ response = -0.00155285;
+ goto D77;
+
+T77_4:
+ response = -0.0125926;
+ goto D77;
+
+N77_5:
+ if TWO_DAY_WF < 0.980566 then goto T77_5;
+ else goto N77_6;
+
+T77_5:
+ response = 0.00635603;
+ goto D77;
+
+N77_6:
+ if ISTITLE_AVG < 0.15 then goto T77_6;
+ else goto T77_7;
+
+T77_6:
+ response = -0.0163438;
+ goto D77;
+
+T77_7:
+ response = -0.00197531;
+ goto D77;
+
+D77:
+
+tnscore = tnscore + response;
+
+/* Tree 79 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N78_1:
+ if AVG_SCORE < 281850 then goto N78_2;
+ else goto N78_4;
+
+N78_2:
+ if MAX_SCORE < 288032 then goto T78_1;
+ else goto N78_3;
+
+T78_1:
+ response = 0.000335293;
+ goto D78;
+
+N78_3:
+ if TWELVE_HOUR_WF < 0.358289 then goto T78_2;
+ else goto T78_3;
+
+T78_2:
+ response = 0.00952171;
+ goto D78;
+
+T78_3:
+ response = -0.00693432;
+ goto D78;
+
+N78_4:
+ if EIGHT_HOUR_WF < 0.584928 then goto N78_5;
+ else goto T78_7;
+
+N78_5:
+ if MAX_MIN_SCORE < 2471.25 then goto T78_4;
+ else goto N78_6;
+
+T78_4:
+ response = -0.0141419;
+ goto D78;
+
+N78_6:
+ if MAX_MIN_SCORE < 6867.75 then goto T78_5;
+ else goto T78_6;
+
+T78_5:
+ response = 0.010146;
+ goto D78;
+
+T78_6:
+ response = -0.00291703;
+ goto D78;
+
+T78_7:
+ response = 0.00765541;
+ goto D78;
+
+D78:
+
+tnscore = tnscore + response;
+
+/* Tree 80 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N79_1:
+ if PREV_DAY_HITS < 3.5 then goto T79_1;
+ else goto N79_2;
+
+T79_1:
+ response = -0.000747677;
+ goto D79;
+
+N79_2:
+ if EIGHT_HOUR_WF < 0.147108 then goto N79_3;
+ else goto N79_6;
+
+N79_3:
+ if AVG_RANK < 7.69 then goto T79_2;
+ else goto N79_4;
+
+T79_2:
+ response = -0.000812926;
+ goto D79;
+
+N79_4:
+ if ISABSTRACT_AVG < 0.95 then goto T79_3;
+ else goto N79_5;
+
+T79_3:
+ response = 0.00246382;
+ goto D79;
+
+N79_5:
+ if SUPERDUPER_AVG < 0.21 then goto T79_4;
+ else goto T79_5;
+
+T79_4:
+ response = 0.00464639;
+ goto D79;
+
+T79_5:
+ response = 0.0245523;
+ goto D79;
+
+N79_6:
+ if ISTITLE_AVG < 0.05 then goto T79_6;
+ else goto T79_7;
+
+T79_6:
+ response = -0.0184693;
+ goto D79;
+
+T79_7:
+ response = -0.000890194;
+ goto D79;
+
+D79:
+
+tnscore = tnscore + response;
+
+/* Tree 81 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N80_1:
+ if HEALTH < 0.27 then goto N80_2;
+ else goto N80_6;
+
+N80_2:
+ if DAY_PD_HITS_RATIO < 5.045 then goto T80_1;
+ else goto N80_3;
+
+T80_1:
+ response = 3.07572e-05;
+ goto D80;
+
+N80_3:
+ if ISTITLE_AVG < 0.125 then goto N80_4;
+ else goto N80_5;
+
+N80_4:
+ if PUB_TODAY_AVG < 0.95 then goto T80_2;
+ else goto T80_3;
+
+T80_2:
+ response = -0.0154824;
+ goto D80;
+
+T80_3:
+ response = -0.00294081;
+ goto D80;
+
+N80_5:
+ if TWELVE_HOUR_WF < 0.226496 then goto T80_4;
+ else goto T80_5;
+
+T80_4:
+ response = -0.00622506;
+ goto D80;
+
+T80_5:
+ response = 0.00166554;
+ goto D80;
+
+N80_6:
+ if ISABSTRACT_AVG < 0.17 then goto T80_6;
+ else goto T80_7;
+
+T80_6:
+ response = 0.0165858;
+ goto D80;
+
+T80_7:
+ response = -0.00330948;
+ goto D80;
+
+D80:
+
+tnscore = tnscore + response;
+
+/* Tree 82 of 103 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N81_1:
+ if DAY_WEEK_AVG_DERIV < 44.86 then goto N81_2;
+ else goto T81_7;
+
+N81_2:
+ if SUPERDUPER_AVG < 0.39 then goto T81_1;
+ else goto N81_3;
+
+T81_1:
+ response = 0.000333898;
+ goto D81;
+
+N81_3:
+ if TWO_DAY_WF < 0.825226 then goto N81_4;
+ else goto T81_6;
+
+N81_4:
+ if MAX_MIN_SCORE < 42418.5 then goto T81_2;
+ else goto N81_5;
+
+T81_2:
+ response = -0.00881739;
+ goto D81;
+
+N81_5:
+ if EIGHT_HOUR_WF < 0.0825189 then goto N81_6;
+ else goto T81_5;
+
+N81_6:
+ if BUSINESS < 0.05 then goto T81_3;
+ else goto T81_4;
+
+T81_3:
+ response = 0.0117745;
+ goto D81;
+
+T81_4:
+ response = -0.00306987;
+ goto D81;
+
+T81_5:
+ response = -0.0111028;
+ goto D81;
+
+T81_6:
+ response = 0.00218894;
+ goto D81;
+
+T81_7:
+ response = 0.00834421;
+ goto D81;
+
+D81:
+
+tnscore = tnscore + response;
+
+/* Tree 83 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N82_1:
+ if MIN_SCORE < 230160 then goto N82_2;
+ else goto N82_3;
+
+N82_2:
+ if MIN_SCORE < 229092 then goto T82_1;
+ else goto T82_2;
+
+T82_1:
+ response = -0.000807888;
+ goto D82;
+
+T82_2:
+ response = -0.0133966;
+ goto D82;
+
+N82_3:
+ if TWO_DAY_WF < 0.518064 then goto N82_4;
+ else goto N82_6;
+
+N82_4:
+ if WEEKAVG < 1.5 then goto N82_5;
+ else goto T82_5;
+
+N82_5:
+ if AVG_SCORE < 359388 then goto T82_3;
+ else goto T82_4;
+
+T82_3:
+ response = 0.00892968;
+ goto D82;
+
+T82_4:
+ response = -0.00763611;
+ goto D82;
+
+T82_5:
+ response = -0.00138809;
+ goto D82;
+
+N82_6:
+ if MIN_SCORE < 232622 then goto T82_6;
+ else goto T82_7;
+
+T82_6:
+ response = 0.0102781;
+ goto D82;
+
+T82_7:
+ response = -0.000634821;
+ goto D82;
+
+D82:
+
+tnscore = tnscore + response;
+
+/* Tree 84 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N83_1:
+ if MAX_MIN_RANK < 7 then goto T83_1;
+ else goto N83_2;
+
+T83_1:
+ response = -2.44784e-05;
+ goto D83;
+
+N83_2:
+ if ISTITLE_AVG < 0.55 then goto N83_3;
+ else goto N83_6;
+
+N83_3:
+ if DAY_WEEK_AVG_DERIV < 4.57 then goto N83_4;
+ else goto T83_5;
+
+N83_4:
+ if DAY_WEEK_AVG_RATIO < 0.76 then goto T83_2;
+ else goto N83_5;
+
+T83_2:
+ response = -0.000894147;
+ goto D83;
+
+N83_5:
+ if MIN_SCORE < 215272 then goto T83_3;
+ else goto T83_4;
+
+T83_3:
+ response = 0.00546979;
+ goto D83;
+
+T83_4:
+ response = 0.0273153;
+ goto D83;
+
+T83_5:
+ response = -0.00734683;
+ goto D83;
+
+N83_6:
+ if NATIONALNEWS < 0.21 then goto T83_6;
+ else goto T83_7;
+
+T83_6:
+ response = -0.00138435;
+ goto D83;
+
+T83_7:
+ response = 0.00911761;
+ goto D83;
+
+D83:
+
+tnscore = tnscore + response;
+
+/* Tree 85 of 103 */
+/* N terminal nodes = 7, Depth = 4 */
+
+
+N84_1:
+ if DAY_WEEK_AVG_RATIO < 3.83 then goto N84_2;
+ else goto N84_4;
+
+N84_2:
+ if ENTERTAINMENT < 0.53 then goto T84_1;
+ else goto N84_3;
+
+T84_1:
+ response = -0.000283843;
+ goto D84;
+
+N84_3:
+ if ISABSTRACT_AVG < 0.21 then goto T84_2;
+ else goto T84_3;
+
+T84_2:
+ response = 0.00786177;
+ goto D84;
+
+T84_3:
+ response = -0.00075151;
+ goto D84;
+
+N84_4:
+ if AVG_RANK < 9.465 then goto N84_5;
+ else goto N84_6;
+
+N84_5:
+ if INTLNEWS < 0.21 then goto T84_4;
+ else goto T84_5;
+
+T84_4:
+ response = 0.00394069;
+ goto D84;
+
+T84_5:
+ response = -0.00383803;
+ goto D84;
+
+N84_6:
+ if AVG_SCORE < 258669 then goto T84_6;
+ else goto T84_7;
+
+T84_6:
+ response = 0.0034867;
+ goto D84;
+
+T84_7:
+ response = 0.0179637;
+ goto D84;
+
+D84:
+
+tnscore = tnscore + response;
+
+/* Tree 86 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N85_1:
+ if SUPERDUPER_AVG < 0.725 then goto N85_2;
+ else goto N85_6;
+
+N85_2:
+ if SPORTS < 0.685 then goto T85_1;
+ else goto N85_3;
+
+T85_1:
+ response = -0.00023907;
+ goto D85;
+
+N85_3:
+ if AVG_SCORE < 264275 then goto N85_4;
+ else goto T85_5;
+
+N85_4:
+ if MIN_SCORE < 219502 then goto T85_2;
+ else goto N85_5;
+
+T85_2:
+ response = -0.000438684;
+ goto D85;
+
+N85_5:
+ if MAX_RANK < 9 then goto T85_3;
+ else goto T85_4;
+
+T85_3:
+ response = 0.00337648;
+ goto D85;
+
+T85_4:
+ response = 0.0167784;
+ goto D85;
+
+T85_5:
+ response = -0.00334013;
+ goto D85;
+
+N85_6:
+ if WEEKAVG < 12.785 then goto T85_6;
+ else goto T85_7;
+
+T85_6:
+ response = -0.0162992;
+ goto D85;
+
+T85_7:
+ response = 2.52385e-05;
+ goto D85;
+
+D85:
+
+tnscore = tnscore + response;
+
+/* Tree 87 of 103 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N86_1:
+ if ISABSTRACT_AVG < 0.895 then goto T86_1;
+ else goto N86_2;
+
+T86_1:
+ response = -0.000210253;
+ goto D86;
+
+N86_2:
+ if AVG_SCORE < 247839 then goto N86_3;
+ else goto T86_7;
+
+N86_3:
+ if PREV_DAY_HITS < 1.5 then goto T86_2;
+ else goto N86_4;
+
+T86_2:
+ response = -0.000398583;
+ goto D86;
+
+N86_4:
+ if TWO_DAY_WF < 0.551797 then goto T86_3;
+ else goto N86_5;
+
+T86_3:
+ response = -0.000635903;
+ goto D86;
+
+N86_5:
+ if MIN_SCORE < 141715 then goto T86_4;
+ else goto N86_6;
+
+T86_4:
+ response = 0.00351871;
+ goto D86;
+
+N86_6:
+ if INTLNEWS < 0.105 then goto T86_5;
+ else goto T86_6;
+
+T86_5:
+ response = 0.00858437;
+ goto D86;
+
+T86_6:
+ response = 0.0254582;
+ goto D86;
+
+T86_7:
+ response = -0.00273198;
+ goto D86;
+
+D86:
+
+tnscore = tnscore + response;
+
+/* Tree 88 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N87_1:
+ if ONE_DAY_WF < 0.605556 then goto N87_2;
+ else goto N87_3;
+
+N87_2:
+ if EIGHT_HOUR_WF < 0.0411953 then goto T87_1;
+ else goto T87_2;
+
+T87_1:
+ response = 0.000498775;
+ goto D87;
+
+T87_2:
+ response = -0.00236496;
+ goto D87;
+
+N87_3:
+ if AVG_SCORE < 342691 then goto N87_4;
+ else goto N87_6;
+
+N87_4:
+ if MAX_MIN_SCORE < 6080.75 then goto N87_5;
+ else goto T87_5;
+
+N87_5:
+ if MIN_SCORE < 236879 then goto T87_3;
+ else goto T87_4;
+
+T87_3:
+ response = 0.00101981;
+ goto D87;
+
+T87_4:
+ response = -0.0190995;
+ goto D87;
+
+T87_5:
+ response = 0.00142291;
+ goto D87;
+
+N87_6:
+ if INTLNEWS < 0.275 then goto T87_6;
+ else goto T87_7;
+
+T87_6:
+ response = 0.0147214;
+ goto D87;
+
+T87_7:
+ response = -0.000475944;
+ goto D87;
+
+D87:
+
+tnscore = tnscore + response;
+
+/* Tree 89 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N88_1:
+ if DAY_PD_HITS_DERIV < -13.5 then goto N88_2;
+ else goto N88_6;
+
+N88_2:
+ if WEEKAVG < 3.785 then goto T88_1;
+ else goto N88_3;
+
+T88_1:
+ response = -0.00749366;
+ goto D88;
+
+N88_3:
+ if PREV_DAY_HITS < 22.5 then goto T88_2;
+ else goto N88_4;
+
+T88_2:
+ response = 0.0146922;
+ goto D88;
+
+N88_4:
+ if TWO_DAY_WF < 0.822683 then goto N88_5;
+ else goto T88_5;
+
+N88_5:
+ if PUB_TODAY_AVG < 0.45 then goto T88_3;
+ else goto T88_4;
+
+T88_3:
+ response = -0.00577822;
+ goto D88;
+
+T88_4:
+ response = 0.00590076;
+ goto D88;
+
+T88_5:
+ response = 0.00846642;
+ goto D88;
+
+N88_6:
+ if MISC < 0.105 then goto T88_6;
+ else goto T88_7;
+
+T88_6:
+ response = -0.000196119;
+ goto D88;
+
+T88_7:
+ response = 0.0069636;
+ goto D88;
+
+D88:
+
+tnscore = tnscore + response;
+
+/* Tree 90 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N89_1:
+ if DAY_WEEK_AVG_DERIV < -3.93 then goto T89_1;
+ else goto N89_2;
+
+T89_1:
+ response = 0.00797481;
+ goto D89;
+
+N89_2:
+ if DAY_WEEK_AVG_RATIO < 3.865 then goto N89_3;
+ else goto N89_5;
+
+N89_3:
+ if DAY_WEEK_AVG_RATIO < 3.61 then goto N89_4;
+ else goto T89_4;
+
+N89_4:
+ if DAY_WEEK_AVG_RATIO < 3.245 then goto T89_2;
+ else goto T89_3;
+
+T89_2:
+ response = -0.000472322;
+ goto D89;
+
+T89_3:
+ response = 0.00505862;
+ goto D89;
+
+T89_4:
+ response = -0.00837491;
+ goto D89;
+
+N89_5:
+ if TWELVE_HOUR_WF < 0.202675 then goto T89_5;
+ else goto N89_6;
+
+T89_5:
+ response = -0.00493061;
+ goto D89;
+
+N89_6:
+ if TWELVE_HOUR_WF < 0.36039 then goto T89_6;
+ else goto T89_7;
+
+T89_6:
+ response = 0.0124758;
+ goto D89;
+
+T89_7:
+ response = 0.00250066;
+ goto D89;
+
+D89:
+
+tnscore = tnscore + response;
+
+/* Tree 91 of 103 */
+/* N terminal nodes = 7, Depth = 4 */
+
+
+N90_1:
+ if MAX_MIN_SCORE < 60845.5 then goto N90_2;
+ else goto N90_5;
+
+N90_2:
+ if MAX_MIN_SCORE < 52128 then goto N90_3;
+ else goto N90_4;
+
+N90_3:
+ if MAX_MIN_SCORE < 51264.2 then goto T90_1;
+ else goto T90_2;
+
+T90_1:
+ response = -0.000132387;
+ goto D90;
+
+T90_2:
+ response = 0.0106899;
+ goto D90;
+
+N90_4:
+ if MIN_SCORE < 218318 then goto T90_3;
+ else goto T90_4;
+
+T90_3:
+ response = -0.00277432;
+ goto D90;
+
+T90_4:
+ response = -0.0140369;
+ goto D90;
+
+N90_5:
+ if ISABSTRACT_AVG < 0.05 then goto T90_5;
+ else goto N90_6;
+
+T90_5:
+ response = 0.0138962;
+ goto D90;
+
+N90_6:
+ if MAX_MIN_SCORE < 61542.5 then goto T90_6;
+ else goto T90_7;
+
+T90_6:
+ response = 0.00445669;
+ goto D90;
+
+T90_7:
+ response = -0.00606652;
+ goto D90;
+
+D90:
+
+tnscore = tnscore + response;
+
+/* Tree 92 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N91_1:
+ if BUSINESS < 0.315 then goto T91_1;
+ else goto N91_2;
+
+T91_1:
+ response = -0.000666016;
+ goto D91;
+
+N91_2:
+ if AVG_RANK < 8.535 then goto N91_3;
+ else goto N91_6;
+
+N91_3:
+ if MAX_MIN_SCORE < 7884.5 then goto T91_2;
+ else goto N91_4;
+
+T91_2:
+ response = 0.0134214;
+ goto D91;
+
+N91_4:
+ if ISTITLE_AVG < 0.465 then goto N91_5;
+ else goto T91_5;
+
+N91_5:
+ if MIN_SCORE < 217640 then goto T91_3;
+ else goto T91_4;
+
+T91_3:
+ response = -0.000967523;
+ goto D91;
+
+T91_4:
+ response = 0.0135374;
+ goto D91;
+
+T91_5:
+ response = 0.000147833;
+ goto D91;
+
+N91_6:
+ if DAY_PD_HITS_RATIO < 0.235 then goto T91_6;
+ else goto T91_7;
+
+T91_6:
+ response = 0.00921797;
+ goto D91;
+
+T91_7:
+ response = -0.00182698;
+ goto D91;
+
+D91:
+
+tnscore = tnscore + response;
+
+/* Tree 93 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N92_1:
+ if AVG_RANK < 5.29 then goto N92_2;
+ else goto N92_3;
+
+N92_2:
+ if AVG_SCORE < 318378 then goto T92_1;
+ else goto T92_2;
+
+T92_1:
+ response = 0.00224509;
+ goto D92;
+
+T92_2:
+ response = 0.0161861;
+ goto D92;
+
+N92_3:
+ if MAX_MIN_SCORE < 51537 then goto N92_4;
+ else goto N92_6;
+
+N92_4:
+ if MAX_MIN_SCORE < 50910.2 then goto N92_5;
+ else goto T92_5;
+
+N92_5:
+ if DAY_LW_DAY_HITS_DERIV < 71 then goto T92_3;
+ else goto T92_4;
+
+T92_3:
+ response = -0.000273537;
+ goto D92;
+
+T92_4:
+ response = -0.00811121;
+ goto D92;
+
+T92_5:
+ response = 0.0109085;
+ goto D92;
+
+N92_6:
+ if SPORTS < 0.415 then goto T92_6;
+ else goto T92_7;
+
+T92_6:
+ response = -0.00253066;
+ goto D92;
+
+T92_7:
+ response = -0.0129268;
+ goto D92;
+
+D92:
+
+tnscore = tnscore + response;
+
+/* Tree 94 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N93_1:
+ if LOCALNEWS < 0.61 then goto N93_2;
+ else goto N93_5;
+
+N93_2:
+ if WEEKAVG < 0.5 then goto T93_1;
+ else goto N93_3;
+
+T93_1:
+ response = -0.00262523;
+ goto D93;
+
+N93_3:
+ if NATIONALNEWS < 0.105 then goto T93_2;
+ else goto N93_4;
+
+T93_2:
+ response = -0.000635021;
+ goto D93;
+
+N93_4:
+ if ISTITLE_AVG < 0.155 then goto T93_3;
+ else goto T93_4;
+
+T93_3:
+ response = 0.005402;
+ goto D93;
+
+T93_4:
+ response = 6.7829e-05;
+ goto D93;
+
+N93_5:
+ if TWELVE_HOUR_WF < 0.133929 then goto T93_5;
+ else goto N93_6;
+
+T93_5:
+ response = -0.00168908;
+ goto D93;
+
+N93_6:
+ if PUB_TODAY_AVG < 0.535 then goto T93_6;
+ else goto T93_7;
+
+T93_6:
+ response = 0.00693807;
+ goto D93;
+
+T93_7:
+ response = 0.0227961;
+ goto D93;
+
+D93:
+
+tnscore = tnscore + response;
+
+/* Tree 95 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N94_1:
+ if DAY_WEEK_AVG_DERIV < -0.785 then goto T94_1;
+ else goto N94_2;
+
+T94_1:
+ response = -0.0021874;
+ goto D94;
+
+N94_2:
+ if ENTERTAINMENT < 0.05 then goto N94_3;
+ else goto T94_7;
+
+N94_3:
+ if MAX_SCORE < 363930 then goto T94_2;
+ else goto N94_4;
+
+T94_2:
+ response = 0.000468954;
+ goto D94;
+
+N94_4:
+ if MAX_SCORE < 384272 then goto N94_5;
+ else goto N94_6;
+
+N94_5:
+ if DAY_PD_HITS_RATIO < 0.495 then goto T94_3;
+ else goto T94_4;
+
+T94_3:
+ response = 0.00125669;
+ goto D94;
+
+T94_4:
+ response = 0.0177669;
+ goto D94;
+
+N94_6:
+ if INTLNEWS < 0.21 then goto T94_5;
+ else goto T94_6;
+
+T94_5:
+ response = 0.0060623;
+ goto D94;
+
+T94_6:
+ response = -0.00814847;
+ goto D94;
+
+T94_7:
+ response = -0.00164111;
+ goto D94;
+
+D94:
+
+tnscore = tnscore + response;
+
+/* Tree 96 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N95_1:
+ if MIN_SCORE < 334353 then goto N95_2;
+ else goto N95_5;
+
+N95_2:
+ if NUM_WORDS < 2.5 then goto T95_1;
+ else goto N95_3;
+
+T95_1:
+ response = 5.19901e-05;
+ goto D95;
+
+N95_3:
+ if AVG_SCORE < 316903 then goto N95_4;
+ else goto T95_4;
+
+N95_4:
+ if ISTITLE_AVG < 0.05 then goto T95_2;
+ else goto T95_3;
+
+T95_2:
+ response = -0.0220661;
+ goto D95;
+
+T95_3:
+ response = -0.00683671;
+ goto D95;
+
+T95_4:
+ response = -0.00100144;
+ goto D95;
+
+N95_5:
+ if INTLNEWS < 0.13 then goto T95_5;
+ else goto N95_6;
+
+T95_5:
+ response = 0.00532815;
+ goto D95;
+
+N95_6:
+ if MIN_SCORE < 357183 then goto T95_6;
+ else goto T95_7;
+
+T95_6:
+ response = 0.0067984;
+ goto D95;
+
+T95_7:
+ response = -0.00805372;
+ goto D95;
+
+D95:
+
+tnscore = tnscore + response;
+
+/* Tree 97 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N96_1:
+ if INTLNEWS < 0.53 then goto T96_1;
+ else goto N96_2;
+
+T96_1:
+ response = -4.58708e-05;
+ goto D96;
+
+N96_2:
+ if TWELVE_HOUR_WF < 0.21385 then goto N96_3;
+ else goto N96_6;
+
+N96_3:
+ if LOCALNEWS < 0.05 then goto N96_4;
+ else goto T96_5;
+
+N96_4:
+ if TWO_DAY_WF < 0.585356 then goto T96_2;
+ else goto N96_5;
+
+T96_2:
+ response = 0.00547473;
+ goto D96;
+
+N96_5:
+ if MAX_SCORE < 244158 then goto T96_3;
+ else goto T96_4;
+
+T96_3:
+ response = 0.00333793;
+ goto D96;
+
+T96_4:
+ response = -0.00880659;
+ goto D96;
+
+T96_5:
+ response = 0.00966211;
+ goto D96;
+
+N96_6:
+ if FOUR_HOUR_WF < 0.0240968 then goto T96_6;
+ else goto T96_7;
+
+T96_6:
+ response = 0.019307;
+ goto D96;
+
+T96_7:
+ response = 0.00126046;
+ goto D96;
+
+D96:
+
+tnscore = tnscore + response;
+
+/* Tree 98 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N97_1:
+ if BUSINESS < 0.05 then goto N97_2;
+ else goto T97_7;
+
+N97_2:
+ if DAY_PD_HITS_DERIV < 18.5 then goto N97_3;
+ else goto N97_6;
+
+N97_3:
+ if DAY_WEEK_AVG_DERIV < 12.93 then goto N97_4;
+ else goto T97_4;
+
+N97_4:
+ if INTLNEWS < 0.73 then goto N97_5;
+ else goto T97_3;
+
+N97_5:
+ if INTLNEWS < 0.315 then goto T97_1;
+ else goto T97_2;
+
+T97_1:
+ response = 0.00145589;
+ goto D97;
+
+T97_2:
+ response = -0.00205678;
+ goto D97;
+
+T97_3:
+ response = 0.0114136;
+ goto D97;
+
+T97_4:
+ response = 0.0135475;
+ goto D97;
+
+N97_6:
+ if MIN_RANK < 7 then goto T97_5;
+ else goto T97_6;
+
+T97_5:
+ response = -0.0106123;
+ goto D97;
+
+T97_6:
+ response = 0.00101067;
+ goto D97;
+
+T97_7:
+ response = -0.000965519;
+ goto D97;
+
+D97:
+
+tnscore = tnscore + response;
+
+/* Tree 99 of 103 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N98_1:
+ if PREV_DAY_HITS < 6.5 then goto T98_1;
+ else goto N98_2;
+
+T98_1:
+ response = -0.00050304;
+ goto D98;
+
+N98_2:
+ if TWO_DAY_WF < 0.825345 then goto N98_3;
+ else goto N98_4;
+
+N98_3:
+ if ONE_DAY_WF < 0.275028 then goto T98_2;
+ else goto T98_3;
+
+T98_2:
+ response = -0.00134104;
+ goto D98;
+
+T98_3:
+ response = 0.00516485;
+ goto D98;
+
+N98_4:
+ if TWO_DAY_WF < 0.861643 then goto T98_4;
+ else goto N98_5;
+
+T98_4:
+ response = 0.0172774;
+ goto D98;
+
+N98_5:
+ if LOCALNEWS < 0.05 then goto T98_5;
+ else goto N98_6;
+
+T98_5:
+ response = -0.00143161;
+ goto D98;
+
+N98_6:
+ if INTLNEWS < 0.21 then goto T98_6;
+ else goto T98_7;
+
+T98_6:
+ response = 0.0022491;
+ goto D98;
+
+T98_7:
+ response = 0.0202453;
+ goto D98;
+
+D98:
+
+tnscore = tnscore + response;
+
+/* Tree 100 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N99_1:
+ if TWO_DAY_WF < 0.477226 then goto N99_2;
+ else goto T99_7;
+
+N99_2:
+ if BUSINESS < 0.685 then goto N99_3;
+ else goto N99_6;
+
+N99_3:
+ if MAX_MIN_SCORE < 41352.5 then goto N99_4;
+ else goto N99_5;
+
+N99_4:
+ if SUPERDUPER_AVG < 0.315 then goto T99_1;
+ else goto T99_2;
+
+T99_1:
+ response = -0.00389642;
+ goto D99;
+
+T99_2:
+ response = -0.0130707;
+ goto D99;
+
+N99_5:
+ if AVG_SCORE < 253118 then goto T99_3;
+ else goto T99_4;
+
+T99_3:
+ response = -0.00411848;
+ goto D99;
+
+T99_4:
+ response = 0.0126594;
+ goto D99;
+
+N99_6:
+ if DAY_LW_DAY_HITS_RATIO < 1.71 then goto T99_5;
+ else goto T99_6;
+
+T99_5:
+ response = 0.0118462;
+ goto D99;
+
+T99_6:
+ response = -0.00195941;
+ goto D99;
+
+T99_7:
+ response = 0.000656261;
+ goto D99;
+
+D99:
+
+tnscore = tnscore + response;
+
+/* Tree 101 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N100_1:
+ if POLITICS < 0.235 then goto N100_2;
+ else goto N100_6;
+
+N100_2:
+ if MAX_MIN_SCORE < 8349.75 then goto N100_3;
+ else goto N100_4;
+
+N100_3:
+ if DAY_LW_DAY_HITS_RATIO < 5.335 then goto T100_1;
+ else goto T100_2;
+
+T100_1:
+ response = 0.00316005;
+ goto D100;
+
+T100_2:
+ response = -0.00643477;
+ goto D100;
+
+N100_4:
+ if MAX_MIN_SCORE < 16062.8 then goto T100_3;
+ else goto N100_5;
+
+T100_3:
+ response = -0.00319606;
+ goto D100;
+
+N100_5:
+ if MAX_MIN_SCORE < 16303.8 then goto T100_4;
+ else goto T100_5;
+
+T100_4:
+ response = 0.0107361;
+ goto D100;
+
+T100_5:
+ response = -3.21466e-05;
+ goto D100;
+
+N100_6:
+ if ISABSTRACT_AVG < 0.05 then goto T100_6;
+ else goto T100_7;
+
+T100_6:
+ response = 0.0148344;
+ goto D100;
+
+T100_7:
+ response = -0.00402193;
+ goto D100;
+
+D100:
+
+tnscore = tnscore + response;
+
+/* Tree 102 of 103 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N101_1:
+ if ENTERTAINMENT < 0.585 then goto N101_2;
+ else goto N101_3;
+
+N101_2:
+ if MAX_MIN_SCORE < 36987.5 then goto T101_1;
+ else goto T101_2;
+
+T101_1:
+ response = -0.00141265;
+ goto D101;
+
+T101_2:
+ response = 0.000338741;
+ goto D101;
+
+N101_3:
+ if PUB_TODAY_AVG < 0.235 then goto N101_4;
+ else goto N101_5;
+
+N101_4:
+ if MAX_MIN_SCORE < 20990 then goto T101_3;
+ else goto T101_4;
+
+T101_3:
+ response = 0.00745281;
+ goto D101;
+
+T101_4:
+ response = -0.0127174;
+ goto D101;
+
+N101_5:
+ if AVG_SCORE < 239671 then goto T101_5;
+ else goto N101_6;
+
+T101_5:
+ response = 0.000361332;
+ goto D101;
+
+N101_6:
+ if ISTITLE_AVG < 0.735 then goto T101_6;
+ else goto T101_7;
+
+T101_6:
+ response = 0.0208577;
+ goto D101;
+
+T101_7:
+ response = 0.00530017;
+ goto D101;
+
+D101:
+
+tnscore = tnscore + response;
+
+/* Tree 103 of 103 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N102_1:
+ if DAY_PD_HITS_DERIV < -3.5 then goto N102_2;
+ else goto T102_7;
+
+N102_2:
+ if HEALTH < 0.105 then goto N102_3;
+ else goto T102_6;
+
+N102_3:
+ if DAY_PD_HITS_RATIO < 0.165 then goto T102_1;
+ else goto N102_4;
+
+T102_1:
+ response = -0.00212795;
+ goto D102;
+
+N102_4:
+ if LW_DAY_HITS < 3.5 then goto N102_5;
+ else goto T102_5;
+
+N102_5:
+ if AVG_SCORE < 258650 then goto N102_6;
+ else goto T102_4;
+
+N102_6:
+ if ISABSTRACT_AVG < 0.225 then goto T102_2;
+ else goto T102_3;
+
+T102_2:
+ response = -0.00378895;
+ goto D102;
+
+T102_3:
+ response = 0.00511293;
+ goto D102;
+
+T102_4:
+ response = 0.0070848;
+ goto D102;
+
+T102_5:
+ response = -0.0101628;
+ goto D102;
+
+T102_6:
+ response = 0.0144615;
+ goto D102;
+
+T102_7:
+ response = -0.000510098;
+ goto D102;
+
+D102:
+
+tnscore = tnscore + response;
+
+return;
diff --git a/searchlib/src/test/files/treenet05.model b/searchlib/src/test/files/treenet05.model
new file mode 100644
index 00000000000..523b9de42f9
--- /dev/null
+++ b/searchlib/src/test/files/treenet05.model
@@ -0,0 +1,4684 @@
+
+/* Data Dictionary, Number Of Variables = 37 */
+/* Name = DAY_HITS, Type = continuous. */
+/* Name = PREV_DAY_HITS, Type = continuous. */
+/* Name = DAY_PD_HITS_RATIO, Type = continuous. */
+/* Name = DAY_PD_HITS_DERIV, Type = continuous. */
+/* Name = LW_DAY_HITS, Type = continuous. */
+/* Name = DAY_LW_DAY_HITS_RATIO, Type = continuous. */
+/* Name = DAY_LW_DAY_HITS_DERIV, Type = continuous. */
+/* Name = WEEKAVG, Type = continuous. */
+/* Name = DAY_WEEK_AVG_RATIO, Type = continuous. */
+/* Name = DAY_WEEK_AVG_DERIV, Type = continuous. */
+/* Name = ISTITLE_AVG, Type = continuous. */
+/* Name = ISABSTRACT_AVG, Type = continuous. */
+/* Name = SUPERDUPER_AVG, Type = continuous. */
+/* Name = PUB_TODAY_AVG, Type = continuous. */
+/* Name = BUSINESS, Type = continuous. */
+/* Name = ENTERTAINMENT, Type = continuous. */
+/* Name = INTLNEWS, Type = continuous. */
+/* Name = LAW, Type = continuous. */
+/* Name = LIFESTYLE, Type = continuous. */
+/* Name = LOCALNEWS, Type = continuous. */
+/* Name = NATIONALNEWS, Type = continuous. */
+/* Name = POLITICS, Type = continuous. */
+/* Name = SPORTS, Type = continuous. */
+/* Name = TOPSTORY, Type = continuous. */
+/* Name = AVG_RANK, Type = continuous. */
+/* Name = MAX_RANK, Type = continuous. */
+/* Name = MIN_RANK, Type = continuous. */
+/* Name = MAX_MIN_RANK, Type = continuous. */
+/* Name = AVG_SCORE, Type = continuous. */
+/* Name = MAX_SCORE, Type = continuous. */
+/* Name = MIN_SCORE, Type = continuous. */
+/* Name = MAX_MIN_SCORE, Type = continuous. */
+/* Name = FOUR_HOUR_WF, Type = continuous. */
+/* Name = EIGHT_HOUR_WF, Type = continuous. */
+/* Name = TWELVE_HOUR_WF, Type = continuous. */
+/* Name = ONE_DAY_WF, Type = continuous. */
+/* Name = TWO_DAY_WF, Type = continuous. */
+
+MODELBEGIN:
+
+/* CART version: 5.0.9.156 */
+/* TreeNet: TreeNet20071016174833 */
+/* Grove: /home/rparekh/lb/lb_titleabstract_hourly/lb_titleabstract_hourly.grv */
+/* N trees: 77 */
+
+link TN0;
+pred = tnscore; /* predicted value for IY_CTR */
+
+
+/*********************/
+/* Model is complete */
+/*********************/
+
+return;
+
+
+
+TN0:
+
+/* Tree 1 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+tnscore = 0.0;
+
+N0_1:
+ if AVG_SCORE < 240274 then goto N0_2;
+ else goto N0_3;
+
+N0_2:
+ if AVG_SCORE < 152115 then goto T0_1;
+ else goto T0_2;
+
+T0_1:
+ response = 0.222147;
+ goto D0;
+
+T0_2:
+ response = 0.231999;
+ goto D0;
+
+N0_3:
+ if ISABSTRACT_AVG < 0.13 then goto N0_4;
+ else goto N0_6;
+
+N0_4:
+ if WEEKAVG < 1.785 then goto T0_3;
+ else goto N0_5;
+
+T0_3:
+ response = 0.254209;
+ goto D0;
+
+N0_5:
+ if TWO_DAY_WF < 0.849242 then goto T0_4;
+ else goto T0_5;
+
+T0_4:
+ response = 0.260625;
+ goto D0;
+
+T0_5:
+ response = 0.274218;
+ goto D0;
+
+N0_6:
+ if MIN_SCORE < 328158 then goto T0_6;
+ else goto T0_7;
+
+T0_6:
+ response = 0.240699;
+ goto D0;
+
+T0_7:
+ response = 0.25683;
+ goto D0;
+
+D0:
+
+tnscore = tnscore + response;
+
+/* Tree 2 of 77 */
+/* N terminal nodes = 7, Depth = 4 */
+
+
+N1_1:
+ if MIN_SCORE < 224388 then goto N1_2;
+ else goto N1_4;
+
+N1_2:
+ if AVG_SCORE < 229835 then goto N1_3;
+ else goto T1_3;
+
+N1_3:
+ if MAX_SCORE < 171144 then goto T1_1;
+ else goto T1_2;
+
+T1_1:
+ response = -0.0167726;
+ goto D1;
+
+T1_2:
+ response = -0.0105451;
+ goto D1;
+
+T1_3:
+ response = -0.00242754;
+ goto D1;
+
+N1_4:
+ if ISABSTRACT_AVG < 0.13 then goto N1_5;
+ else goto N1_6;
+
+N1_5:
+ if WEEKAVG < 1.36 then goto T1_4;
+ else goto T1_5;
+
+T1_4:
+ response = 0.0097257;
+ goto D1;
+
+T1_5:
+ response = 0.0198661;
+ goto D1;
+
+N1_6:
+ if MIN_SCORE < 330678 then goto T1_6;
+ else goto T1_7;
+
+T1_6:
+ response = -0.00030123;
+ goto D1;
+
+T1_7:
+ response = 0.0145117;
+ goto D1;
+
+D1:
+
+tnscore = tnscore + response;
+
+/* Tree 3 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N2_1:
+ if AVG_SCORE < 240820 then goto N2_2;
+ else goto N2_3;
+
+N2_2:
+ if AVG_SCORE < 159292 then goto T2_1;
+ else goto T2_2;
+
+T2_1:
+ response = -0.0146681;
+ goto D2;
+
+T2_2:
+ response = -0.00755839;
+ goto D2;
+
+N2_3:
+ if ISABSTRACT_AVG < 0.21 then goto N2_4;
+ else goto N2_6;
+
+N2_4:
+ if WEEKAVG < 1.64 then goto T2_3;
+ else goto N2_5;
+
+T2_3:
+ response = 0.0094277;
+ goto D2;
+
+N2_5:
+ if AVG_RANK < 7.1 then goto T2_4;
+ else goto T2_5;
+
+T2_4:
+ response = 0.00789525;
+ goto D2;
+
+T2_5:
+ response = 0.020449;
+ goto D2;
+
+N2_6:
+ if TOPSTORY < 0.05 then goto T2_6;
+ else goto T2_7;
+
+T2_6:
+ response = -0.00410248;
+ goto D2;
+
+T2_7:
+ response = 0.00584918;
+ goto D2;
+
+D2:
+
+tnscore = tnscore + response;
+
+/* Tree 4 of 77 */
+/* N terminal nodes = 7, Depth = 4 */
+
+
+N3_1:
+ if MIN_SCORE < 226846 then goto N3_2;
+ else goto N3_4;
+
+N3_2:
+ if AVG_SCORE < 221526 then goto T3_1;
+ else goto N3_3;
+
+T3_1:
+ response = -0.0110986;
+ goto D3;
+
+N3_3:
+ if TOPSTORY < 0.365 then goto T3_2;
+ else goto T3_3;
+
+T3_2:
+ response = -0.00479654;
+ goto D3;
+
+T3_3:
+ response = 0.020019;
+ goto D3;
+
+N3_4:
+ if WEEKAVG < 1.07 then goto N3_5;
+ else goto N3_6;
+
+N3_5:
+ if ISTITLE_AVG < 0.93 then goto T3_4;
+ else goto T3_5;
+
+T3_4:
+ response = 0.0050835;
+ goto D3;
+
+T3_5:
+ response = -0.00497491;
+ goto D3;
+
+N3_6:
+ if ISABSTRACT_AVG < 0.05 then goto T3_6;
+ else goto T3_7;
+
+T3_6:
+ response = 0.0175386;
+ goto D3;
+
+T3_7:
+ response = 0.00636247;
+ goto D3;
+
+D3:
+
+tnscore = tnscore + response;
+
+/* Tree 5 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N4_1:
+ if MIN_SCORE < 226373 then goto N4_2;
+ else goto N4_5;
+
+N4_2:
+ if AVG_SCORE < 151768 then goto T4_1;
+ else goto N4_3;
+
+T4_1:
+ response = -0.0132135;
+ goto D4;
+
+N4_3:
+ if WEEKAVG < 2.5 then goto T4_2;
+ else goto N4_4;
+
+T4_2:
+ response = -0.00784849;
+ goto D4;
+
+N4_4:
+ if ISABSTRACT_AVG < 0.95 then goto T4_3;
+ else goto T4_4;
+
+T4_3:
+ response = -0.00100834;
+ goto D4;
+
+T4_4:
+ response = 0.0234278;
+ goto D4;
+
+N4_5:
+ if ISABSTRACT_AVG < 0.235 then goto N4_6;
+ else goto T4_7;
+
+N4_6:
+ if MAX_MIN_SCORE < 20325 then goto T4_5;
+ else goto T4_6;
+
+T4_5:
+ response = 0.00102421;
+ goto D4;
+
+T4_6:
+ response = 0.0145312;
+ goto D4;
+
+T4_7:
+ response = 0.000803179;
+ goto D4;
+
+D4:
+
+tnscore = tnscore + response;
+
+/* Tree 6 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N5_1:
+ if AVG_SCORE < 234937 then goto T5_1;
+ else goto N5_2;
+
+T5_1:
+ response = -0.00791142;
+ goto D5;
+
+N5_2:
+ if ISABSTRACT_AVG < 0.13 then goto N5_3;
+ else goto N5_6;
+
+N5_3:
+ if WEEKAVG < 1.5 then goto T5_2;
+ else goto N5_4;
+
+T5_2:
+ response = 0.00645581;
+ goto D5;
+
+N5_4:
+ if TWO_DAY_WF < 0.925548 then goto T5_3;
+ else goto N5_5;
+
+T5_3:
+ response = 0.0121753;
+ goto D5;
+
+N5_5:
+ if LOCALNEWS < 0.05 then goto T5_4;
+ else goto T5_5;
+
+T5_4:
+ response = 0.013493;
+ goto D5;
+
+T5_5:
+ response = 0.0339803;
+ goto D5;
+
+N5_6:
+ if AVG_SCORE < 492653 then goto T5_6;
+ else goto T5_7;
+
+T5_6:
+ response = -0.00104991;
+ goto D5;
+
+T5_7:
+ response = 0.0192918;
+ goto D5;
+
+D5:
+
+tnscore = tnscore + response;
+
+/* Tree 7 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N6_1:
+ if MIN_SCORE < 231118 then goto N6_2;
+ else goto N6_4;
+
+N6_2:
+ if AVG_SCORE < 223663 then goto T6_1;
+ else goto N6_3;
+
+T6_1:
+ response = -0.00824326;
+ goto D6;
+
+N6_3:
+ if ISABSTRACT_AVG < 0.05 then goto T6_2;
+ else goto T6_3;
+
+T6_2:
+ response = 0.00253809;
+ goto D6;
+
+T6_3:
+ response = -0.00637355;
+ goto D6;
+
+N6_4:
+ if ISTITLE_AVG < 0.885 then goto N6_5;
+ else goto T6_7;
+
+N6_5:
+ if DAY_LW_DAY_HITS_RATIO < 4.045 then goto N6_6;
+ else goto T6_6;
+
+N6_6:
+ if LW_DAY_HITS < 0.5 then goto T6_4;
+ else goto T6_5;
+
+T6_4:
+ response = 0.00769331;
+ goto D6;
+
+T6_5:
+ response = -0.00570545;
+ goto D6;
+
+T6_6:
+ response = 0.0136625;
+ goto D6;
+
+T6_7:
+ response = -0.000566847;
+ goto D6;
+
+D6:
+
+tnscore = tnscore + response;
+
+/* Tree 8 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N7_1:
+ if MIN_SCORE < 222204 then goto T7_1;
+ else goto N7_2;
+
+T7_1:
+ response = -0.00655529;
+ goto D7;
+
+N7_2:
+ if ISTITLE_AVG < 0.95 then goto N7_3;
+ else goto T7_7;
+
+N7_3:
+ if DAY_LW_DAY_HITS_RATIO < 15.5 then goto N7_4;
+ else goto T7_6;
+
+N7_4:
+ if TOPSTORY < 0.185 then goto N7_5;
+ else goto T7_5;
+
+N7_5:
+ if INTLNEWS < 0.39 then goto T7_2;
+ else goto N7_6;
+
+T7_2:
+ response = 0.00204263;
+ goto D7;
+
+N7_6:
+ if FOUR_HOUR_WF < 0.004 then goto T7_3;
+ else goto T7_4;
+
+T7_3:
+ response = 0.0157981;
+ goto D7;
+
+T7_4:
+ response = -0.00641486;
+ goto D7;
+
+T7_5:
+ response = 0.0124789;
+ goto D7;
+
+T7_6:
+ response = 0.0166668;
+ goto D7;
+
+T7_7:
+ response = -0.00299979;
+ goto D7;
+
+D7:
+
+tnscore = tnscore + response;
+
+/* Tree 9 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N8_1:
+ if MIN_SCORE < 224388 then goto T8_1;
+ else goto N8_2;
+
+T8_1:
+ response = -0.00568771;
+ goto D8;
+
+N8_2:
+ if WEEKAVG < 1.07 then goto N8_3;
+ else goto N8_4;
+
+N8_3:
+ if ISTITLE_AVG < 0.845 then goto T8_2;
+ else goto T8_3;
+
+T8_2:
+ response = 0.00360536;
+ goto D8;
+
+T8_3:
+ response = -0.00514951;
+ goto D8;
+
+N8_4:
+ if BUSINESS < 0.05 then goto N8_5;
+ else goto N8_6;
+
+N8_5:
+ if MAX_MIN_RANK < 3 then goto T8_4;
+ else goto T8_5;
+
+T8_4:
+ response = 0.0231505;
+ goto D8;
+
+T8_5:
+ response = 0.0105904;
+ goto D8;
+
+N8_6:
+ if MIN_SCORE < 400082 then goto T8_6;
+ else goto T8_7;
+
+T8_6:
+ response = 0.00196019;
+ goto D8;
+
+T8_7:
+ response = 0.0152236;
+ goto D8;
+
+D8:
+
+tnscore = tnscore + response;
+
+/* Tree 10 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N9_1:
+ if MAX_SCORE < 264920 then goto N9_2;
+ else goto N9_3;
+
+N9_2:
+ if AVG_SCORE < 159289 then goto T9_1;
+ else goto T9_2;
+
+T9_1:
+ response = -0.00812678;
+ goto D9;
+
+T9_2:
+ response = -0.003174;
+ goto D9;
+
+N9_3:
+ if DAY_WEEK_AVG_DERIV < 30.715 then goto N9_4;
+ else goto T9_7;
+
+N9_4:
+ if ISABSTRACT_AVG < 0.115 then goto N9_5;
+ else goto T9_6;
+
+N9_5:
+ if MAX_MIN_SCORE < 163787 then goto T9_3;
+ else goto N9_6;
+
+T9_3:
+ response = 0.0097262;
+ goto D9;
+
+N9_6:
+ if AVG_SCORE < 400330 then goto T9_4;
+ else goto T9_5;
+
+T9_4:
+ response = -0.00390127;
+ goto D9;
+
+T9_5:
+ response = 0.00706031;
+ goto D9;
+
+T9_6:
+ response = 5.49425e-05;
+ goto D9;
+
+T9_7:
+ response = 0.0204424;
+ goto D9;
+
+D9:
+
+tnscore = tnscore + response;
+
+/* Tree 11 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N10_1:
+ if AVG_SCORE < 241590 then goto T10_1;
+ else goto N10_2;
+
+T10_1:
+ response = -0.00459592;
+ goto D10;
+
+N10_2:
+ if TOPSTORY < 0.05 then goto N10_3;
+ else goto N10_4;
+
+N10_3:
+ if DAY_WEEK_AVG_RATIO < 4.205 then goto T10_2;
+ else goto T10_3;
+
+T10_2:
+ response = -0.000126418;
+ goto D10;
+
+T10_3:
+ response = 0.0204507;
+ goto D10;
+
+N10_4:
+ if TWO_DAY_WF < 0.86039 then goto N10_5;
+ else goto T10_7;
+
+N10_5:
+ if ISABSTRACT_AVG < 0.585 then goto N10_6;
+ else goto T10_6;
+
+N10_6:
+ if TOPSTORY < 0.365 then goto T10_4;
+ else goto T10_5;
+
+T10_4:
+ response = 0.00566686;
+ goto D10;
+
+T10_5:
+ response = 0.0196157;
+ goto D10;
+
+T10_6:
+ response = -0.00511988;
+ goto D10;
+
+T10_7:
+ response = 0.0124928;
+ goto D10;
+
+D10:
+
+tnscore = tnscore + response;
+
+/* Tree 12 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N11_1:
+ if MIN_SCORE < 222204 then goto N11_2;
+ else goto N11_3;
+
+N11_2:
+ if PREV_DAY_HITS < 4.5 then goto T11_1;
+ else goto T11_2;
+
+T11_1:
+ response = -0.00589519;
+ goto D11;
+
+T11_2:
+ response = -0.000631753;
+ goto D11;
+
+N11_3:
+ if ISABSTRACT_AVG < 0.235 then goto N11_4;
+ else goto T11_7;
+
+N11_4:
+ if MAX_MIN_SCORE < 171496 then goto N11_5;
+ else goto T11_6;
+
+N11_5:
+ if MAX_SCORE < 558130 then goto N11_6;
+ else goto T11_5;
+
+N11_6:
+ if AVG_RANK < 7.125 then goto T11_3;
+ else goto T11_4;
+
+T11_3:
+ response = 0.00086138;
+ goto D11;
+
+T11_4:
+ response = 0.00952768;
+ goto D11;
+
+T11_5:
+ response = 0.0206013;
+ goto D11;
+
+T11_6:
+ response = 0.00133279;
+ goto D11;
+
+T11_7:
+ response = -0.00112547;
+ goto D11;
+
+D11:
+
+tnscore = tnscore + response;
+
+/* Tree 13 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N12_1:
+ if MIN_SCORE < 222204 then goto N12_2;
+ else goto N12_4;
+
+N12_2:
+ if DAY_PD_HITS_DERIV < -12.5 then goto T12_1;
+ else goto N12_3;
+
+T12_1:
+ response = 0.00847214;
+ goto D12;
+
+N12_3:
+ if DAY_WEEK_AVG_DERIV < 36.785 then goto T12_2;
+ else goto T12_3;
+
+T12_2:
+ response = -0.00450293;
+ goto D12;
+
+T12_3:
+ response = 0.0134303;
+ goto D12;
+
+N12_4:
+ if ISABSTRACT_AVG < 0.05 then goto N12_5;
+ else goto T12_7;
+
+N12_5:
+ if DAY_WEEK_AVG_RATIO < 4.83 then goto N12_6;
+ else goto T12_6;
+
+N12_6:
+ if NATIONALNEWS < 0.355 then goto T12_4;
+ else goto T12_5;
+
+T12_4:
+ response = 0.00488766;
+ goto D12;
+
+T12_5:
+ response = 0.0169425;
+ goto D12;
+
+T12_6:
+ response = 0.0204287;
+ goto D12;
+
+T12_7:
+ response = -0.000132037;
+ goto D12;
+
+D12:
+
+tnscore = tnscore + response;
+
+/* Tree 14 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N13_1:
+ if MAX_SCORE < 250058 then goto N13_2;
+ else goto N13_3;
+
+N13_2:
+ if INTLNEWS < 0.105 then goto T13_1;
+ else goto T13_2;
+
+T13_1:
+ response = -0.0059595;
+ goto D13;
+
+T13_2:
+ response = 0.000221029;
+ goto D13;
+
+N13_3:
+ if TOPSTORY < 0.355 then goto N13_4;
+ else goto T13_7;
+
+N13_4:
+ if MIN_SCORE < 385241 then goto N13_5;
+ else goto N13_6;
+
+N13_5:
+ if BUSINESS < 0.05 then goto T13_3;
+ else goto T13_4;
+
+T13_3:
+ response = 0.00367059;
+ goto D13;
+
+T13_4:
+ response = -0.00223683;
+ goto D13;
+
+N13_6:
+ if INTLNEWS < 0.365 then goto T13_5;
+ else goto T13_6;
+
+T13_5:
+ response = 0.00575046;
+ goto D13;
+
+T13_6:
+ response = 0.0237395;
+ goto D13;
+
+T13_7:
+ response = 0.0174135;
+ goto D13;
+
+D13:
+
+tnscore = tnscore + response;
+
+/* Tree 15 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N14_1:
+ if MAX_SCORE < 265638 then goto T14_1;
+ else goto N14_2;
+
+T14_1:
+ response = -0.00287962;
+ goto D14;
+
+N14_2:
+ if ISABSTRACT_AVG < 0.235 then goto N14_3;
+ else goto T14_7;
+
+N14_3:
+ if DAY_PD_HITS_RATIO < 18.75 then goto N14_4;
+ else goto T14_6;
+
+N14_4:
+ if MAX_MIN_SCORE < 67687.2 then goto N14_5;
+ else goto N14_6;
+
+N14_5:
+ if PREV_DAY_HITS < 6.5 then goto T14_2;
+ else goto T14_3;
+
+T14_2:
+ response = 0.00469885;
+ goto D14;
+
+T14_3:
+ response = 0.0145573;
+ goto D14;
+
+N14_6:
+ if AVG_SCORE < 399037 then goto T14_4;
+ else goto T14_5;
+
+T14_4:
+ response = -0.00312833;
+ goto D14;
+
+T14_5:
+ response = 0.00680591;
+ goto D14;
+
+T14_6:
+ response = 0.0238803;
+ goto D14;
+
+T14_7:
+ response = -0.00158812;
+ goto D14;
+
+D14:
+
+tnscore = tnscore + response;
+
+/* Tree 16 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N15_1:
+ if MIN_SCORE < 222204 then goto N15_2;
+ else goto N15_3;
+
+N15_2:
+ if TOPSTORY < 0.355 then goto T15_1;
+ else goto T15_2;
+
+T15_1:
+ response = -0.00377546;
+ goto D15;
+
+T15_2:
+ response = 0.0099145;
+ goto D15;
+
+N15_3:
+ if ISTITLE_AVG < 0.885 then goto N15_4;
+ else goto T15_7;
+
+N15_4:
+ if MAX_MIN_SCORE < 57965.2 then goto N15_5;
+ else goto N15_6;
+
+N15_5:
+ if MIN_SCORE < 223217 then goto T15_3;
+ else goto T15_4;
+
+T15_3:
+ response = 0.0291906;
+ goto D15;
+
+T15_4:
+ response = 0.00802385;
+ goto D15;
+
+N15_6:
+ if AVG_SCORE < 402324 then goto T15_5;
+ else goto T15_6;
+
+T15_5:
+ response = -0.00259188;
+ goto D15;
+
+T15_6:
+ response = 0.00560142;
+ goto D15;
+
+T15_7:
+ response = -0.0015883;
+ goto D15;
+
+D15:
+
+tnscore = tnscore + response;
+
+/* Tree 17 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N16_1:
+ if MAX_SCORE < 252015 then goto T16_1;
+ else goto N16_2;
+
+T16_1:
+ response = -0.00312417;
+ goto D16;
+
+N16_2:
+ if DAY_WEEK_AVG_RATIO < 5.91 then goto N16_3;
+ else goto T16_7;
+
+N16_3:
+ if TOPSTORY < 0.185 then goto N16_4;
+ else goto T16_6;
+
+N16_4:
+ if MAX_MIN_SCORE < 123158 then goto N16_5;
+ else goto T16_5;
+
+N16_5:
+ if ISABSTRACT_AVG < 0.13 then goto T16_2;
+ else goto N16_6;
+
+T16_2:
+ response = 0.0054303;
+ goto D16;
+
+N16_6:
+ if PREV_DAY_HITS < 7.5 then goto T16_3;
+ else goto T16_4;
+
+T16_3:
+ response = -0.00350664;
+ goto D16;
+
+T16_4:
+ response = 0.0115054;
+ goto D16;
+
+T16_5:
+ response = -0.00200056;
+ goto D16;
+
+T16_6:
+ response = 0.00612929;
+ goto D16;
+
+T16_7:
+ response = 0.0248479;
+ goto D16;
+
+D16:
+
+tnscore = tnscore + response;
+
+/* Tree 18 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N17_1:
+ if MIN_SCORE < 232158 then goto N17_2;
+ else goto N17_5;
+
+N17_2:
+ if DAY_PD_HITS_DERIV < -13.5 then goto T17_1;
+ else goto N17_3;
+
+T17_1:
+ response = 0.012118;
+ goto D17;
+
+N17_3:
+ if SPORTS < 0.685 then goto T17_2;
+ else goto N17_4;
+
+T17_2:
+ response = -0.00337721;
+ goto D17;
+
+N17_4:
+ if MAX_SCORE < 165958 then goto T17_3;
+ else goto T17_4;
+
+T17_3:
+ response = -0.00648055;
+ goto D17;
+
+T17_4:
+ response = 0.00734207;
+ goto D17;
+
+N17_5:
+ if ISABSTRACT_AVG < 0.635 then goto N17_6;
+ else goto T17_7;
+
+N17_6:
+ if EIGHT_HOUR_WF < 0.493902 then goto T17_5;
+ else goto T17_6;
+
+T17_5:
+ response = 0.00519362;
+ goto D17;
+
+T17_6:
+ response = -0.00533505;
+ goto D17;
+
+T17_7:
+ response = -0.00220591;
+ goto D17;
+
+D17:
+
+tnscore = tnscore + response;
+
+/* Tree 19 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N18_1:
+ if AVG_SCORE < 387415 then goto N18_2;
+ else goto N18_4;
+
+N18_2:
+ if PREV_DAY_HITS < 2.5 then goto T18_1;
+ else goto N18_3;
+
+T18_1:
+ response = -0.00321038;
+ goto D18;
+
+N18_3:
+ if BUSINESS < 0.05 then goto T18_2;
+ else goto T18_3;
+
+T18_2:
+ response = 0.00353532;
+ goto D18;
+
+T18_3:
+ response = -0.0020425;
+ goto D18;
+
+N18_4:
+ if TWO_DAY_WF < 0.979149 then goto T18_4;
+ else goto N18_5;
+
+T18_4:
+ response = 0.00271552;
+ goto D18;
+
+N18_5:
+ if TWELVE_HOUR_WF < 0.104418 then goto T18_5;
+ else goto N18_6;
+
+T18_5:
+ response = 0.00180155;
+ goto D18;
+
+N18_6:
+ if MIN_SCORE < 350308 then goto T18_6;
+ else goto T18_7;
+
+T18_6:
+ response = 0.0370742;
+ goto D18;
+
+T18_7:
+ response = 0.0145313;
+ goto D18;
+
+D18:
+
+tnscore = tnscore + response;
+
+/* Tree 20 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N19_1:
+ if MAX_SCORE < 248824 then goto N19_2;
+ else goto N19_3;
+
+N19_2:
+ if INTLNEWS < 0.185 then goto T19_1;
+ else goto T19_2;
+
+T19_1:
+ response = -0.00381799;
+ goto D19;
+
+T19_2:
+ response = 0.00109643;
+ goto D19;
+
+N19_3:
+ if TOPSTORY < 0.185 then goto N19_4;
+ else goto N19_6;
+
+N19_4:
+ if TWO_DAY_WF < 0.779514 then goto T19_3;
+ else goto N19_5;
+
+T19_3:
+ response = -0.0015664;
+ goto D19;
+
+N19_5:
+ if WEEKAVG < 4.07 then goto T19_4;
+ else goto T19_5;
+
+T19_4:
+ response = 0.00171319;
+ goto D19;
+
+T19_5:
+ response = 0.0126131;
+ goto D19;
+
+N19_6:
+ if MAX_MIN_RANK < 7 then goto T19_6;
+ else goto T19_7;
+
+T19_6:
+ response = 0.00411675;
+ goto D19;
+
+T19_7:
+ response = 0.0149353;
+ goto D19;
+
+D19:
+
+tnscore = tnscore + response;
+
+/* Tree 21 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N20_1:
+ if MIN_SCORE < 233311 then goto T20_1;
+ else goto N20_2;
+
+T20_1:
+ response = -0.00183471;
+ goto D20;
+
+N20_2:
+ if LW_DAY_HITS < 0.5 then goto N20_3;
+ else goto T20_7;
+
+N20_3:
+ if SUPERDUPER_AVG < 0.21 then goto N20_4;
+ else goto N20_5;
+
+N20_4:
+ if MIN_RANK < 1 then goto T20_2;
+ else goto T20_3;
+
+T20_2:
+ response = 0.0173917;
+ goto D20;
+
+T20_3:
+ response = 0.000643665;
+ goto D20;
+
+N20_5:
+ if LOCALNEWS < 0.185 then goto N20_6;
+ else goto T20_6;
+
+N20_6:
+ if DAY_PD_HITS_RATIO < 8.795 then goto T20_4;
+ else goto T20_5;
+
+T20_4:
+ response = 0.00308276;
+ goto D20;
+
+T20_5:
+ response = 0.0169982;
+ goto D20;
+
+T20_6:
+ response = 0.0159792;
+ goto D20;
+
+T20_7:
+ response = -0.00499866;
+ goto D20;
+
+D20:
+
+tnscore = tnscore + response;
+
+/* Tree 22 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N21_1:
+ if TOPSTORY < 0.39 then goto N21_2;
+ else goto T21_7;
+
+N21_2:
+ if MAX_SCORE < 176763 then goto T21_1;
+ else goto N21_3;
+
+T21_1:
+ response = -0.00448387;
+ goto D21;
+
+N21_3:
+ if INTLNEWS < 0.415 then goto N21_4;
+ else goto T21_6;
+
+N21_4:
+ if BUSINESS < 0.05 then goto N21_5;
+ else goto T21_5;
+
+N21_5:
+ if MAX_MIN_SCORE < 20408.8 then goto T21_2;
+ else goto N21_6;
+
+T21_2:
+ response = -0.00328596;
+ goto D21;
+
+N21_6:
+ if TWO_DAY_WF < 0.512854 then goto T21_3;
+ else goto T21_4;
+
+T21_3:
+ response = -0.00211998;
+ goto D21;
+
+T21_4:
+ response = 0.00522867;
+ goto D21;
+
+T21_5:
+ response = -0.00226038;
+ goto D21;
+
+T21_6:
+ response = 0.00574748;
+ goto D21;
+
+T21_7:
+ response = 0.00900215;
+ goto D21;
+
+D21:
+
+tnscore = tnscore + response;
+
+/* Tree 23 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N22_1:
+ if TWO_DAY_WF < 0.75074 then goto N22_2;
+ else goto N22_6;
+
+N22_2:
+ if BUSINESS < 0.05 then goto N22_3;
+ else goto T22_5;
+
+N22_3:
+ if FOUR_HOUR_WF < 0.0149554 then goto N22_4;
+ else goto T22_4;
+
+N22_4:
+ if WEEKAVG < 0.785 then goto T22_1;
+ else goto N22_5;
+
+T22_1:
+ response = -0.00184131;
+ goto D22;
+
+N22_5:
+ if AVG_SCORE < 167616 then goto T22_2;
+ else goto T22_3;
+
+T22_2:
+ response = -0.00305123;
+ goto D22;
+
+T22_3:
+ response = 0.00685803;
+ goto D22;
+
+T22_4:
+ response = -0.00470139;
+ goto D22;
+
+T22_5:
+ response = -0.003457;
+ goto D22;
+
+N22_6:
+ if MAX_SCORE < 504246 then goto T22_6;
+ else goto T22_7;
+
+T22_6:
+ response = 0.000878955;
+ goto D22;
+
+T22_7:
+ response = 0.00850264;
+ goto D22;
+
+D22:
+
+tnscore = tnscore + response;
+
+/* Tree 24 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N23_1:
+ if BUSINESS < 0.105 then goto N23_2;
+ else goto N23_6;
+
+N23_2:
+ if AVG_SCORE < 160899 then goto T23_1;
+ else goto N23_3;
+
+T23_1:
+ response = -0.00270644;
+ goto D23;
+
+N23_3:
+ if AVG_SCORE < 194764 then goto N23_4;
+ else goto N23_5;
+
+N23_4:
+ if ISABSTRACT_AVG < 0.315 then goto T23_2;
+ else goto T23_3;
+
+T23_2:
+ response = -0.00800918;
+ goto D23;
+
+T23_3:
+ response = 0.012943;
+ goto D23;
+
+N23_5:
+ if NATIONALNEWS < 0.355 then goto T23_4;
+ else goto T23_5;
+
+T23_4:
+ response = 0.000901868;
+ goto D23;
+
+T23_5:
+ response = 0.0112161;
+ goto D23;
+
+N23_6:
+ if INTLNEWS < 0.39 then goto T23_6;
+ else goto T23_7;
+
+T23_6:
+ response = -0.00269415;
+ goto D23;
+
+T23_7:
+ response = 0.00725021;
+ goto D23;
+
+D23:
+
+tnscore = tnscore + response;
+
+/* Tree 25 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N24_1:
+ if BUSINESS < 0.105 then goto N24_2;
+ else goto T24_7;
+
+N24_2:
+ if MAX_SCORE < 188088 then goto T24_1;
+ else goto N24_3;
+
+T24_1:
+ response = -0.00298371;
+ goto D24;
+
+N24_3:
+ if AVG_SCORE < 190784 then goto N24_4;
+ else goto N24_5;
+
+N24_4:
+ if ISABSTRACT_AVG < 0.55 then goto T24_2;
+ else goto T24_3;
+
+T24_2:
+ response = -0.00171064;
+ goto D24;
+
+T24_3:
+ response = 0.0237327;
+ goto D24;
+
+N24_5:
+ if AVG_RANK < 9.755 then goto T24_4;
+ else goto N24_6;
+
+T24_4:
+ response = 0.00131049;
+ goto D24;
+
+N24_6:
+ if WEEKAVG < 0.93 then goto T24_5;
+ else goto T24_6;
+
+T24_5:
+ response = -0.00199335;
+ goto D24;
+
+T24_6:
+ response = 0.020099;
+ goto D24;
+
+T24_7:
+ response = -0.00222399;
+ goto D24;
+
+D24:
+
+tnscore = tnscore + response;
+
+/* Tree 26 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N25_1:
+ if NATIONALNEWS < 0.115 then goto N25_2;
+ else goto T25_7;
+
+N25_2:
+ if PREV_DAY_HITS < 27.5 then goto N25_3;
+ else goto N25_5;
+
+N25_3:
+ if INTLNEWS < 0.725 then goto N25_4;
+ else goto T25_3;
+
+N25_4:
+ if AVG_SCORE < 629440 then goto T25_1;
+ else goto T25_2;
+
+T25_1:
+ response = -0.00184197;
+ goto D25;
+
+T25_2:
+ response = 0.0166573;
+ goto D25;
+
+T25_3:
+ response = 0.0148512;
+ goto D25;
+
+N25_5:
+ if TWO_DAY_WF < 0.773805 then goto N25_6;
+ else goto T25_6;
+
+N25_6:
+ if TWELVE_HOUR_WF < 0.114144 then goto T25_4;
+ else goto T25_5;
+
+T25_4:
+ response = 0.00583361;
+ goto D25;
+
+T25_5:
+ response = -0.012718;
+ goto D25;
+
+T25_6:
+ response = 0.0149618;
+ goto D25;
+
+T25_7:
+ response = 0.00280466;
+ goto D25;
+
+D25:
+
+tnscore = tnscore + response;
+
+/* Tree 27 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N26_1:
+ if TOPSTORY < 0.355 then goto N26_2;
+ else goto N26_4;
+
+N26_2:
+ if DAY_PD_HITS_DERIV < -4.5 then goto T26_1;
+ else goto N26_3;
+
+T26_1:
+ response = 0.00287102;
+ goto D26;
+
+N26_3:
+ if SPORTS < 0.73 then goto T26_2;
+ else goto T26_3;
+
+T26_2:
+ response = -0.00185575;
+ goto D26;
+
+T26_3:
+ response = 0.00272133;
+ goto D26;
+
+N26_4:
+ if AVG_RANK < 9.55 then goto N26_5;
+ else goto T26_7;
+
+N26_5:
+ if DAY_PD_HITS_RATIO < 0.405 then goto T26_4;
+ else goto N26_6;
+
+T26_4:
+ response = -0.00518413;
+ goto D26;
+
+N26_6:
+ if MAX_MIN_SCORE < 115612 then goto T26_5;
+ else goto T26_6;
+
+T26_5:
+ response = 0.00438781;
+ goto D26;
+
+T26_6:
+ response = 0.0211867;
+ goto D26;
+
+T26_7:
+ response = 0.0209324;
+ goto D26;
+
+D26:
+
+tnscore = tnscore + response;
+
+/* Tree 28 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N27_1:
+ if AVG_SCORE < 147623 then goto T27_1;
+ else goto N27_2;
+
+T27_1:
+ response = -0.00405691;
+ goto D27;
+
+N27_2:
+ if DAY_PD_HITS_DERIV < -23.5 then goto T27_2;
+ else goto N27_3;
+
+T27_2:
+ response = 0.00920672;
+ goto D27;
+
+N27_3:
+ if INTLNEWS < 0.725 then goto N27_4;
+ else goto T27_7;
+
+N27_4:
+ if TOPSTORY < 0.39 then goto T27_3;
+ else goto N27_5;
+
+T27_3:
+ response = 7.21159e-05;
+ goto D27;
+
+N27_5:
+ if INTLNEWS < 0.05 then goto N27_6;
+ else goto T27_6;
+
+N27_6:
+ if SUPERDUPER_AVG < 0.155 then goto T27_4;
+ else goto T27_5;
+
+T27_4:
+ response = 0.00462984;
+ goto D27;
+
+T27_5:
+ response = 0.0231233;
+ goto D27;
+
+T27_6:
+ response = 0.000562082;
+ goto D27;
+
+T27_7:
+ response = 0.0141075;
+ goto D27;
+
+D27:
+
+tnscore = tnscore + response;
+
+/* Tree 29 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N28_1:
+ if AVG_SCORE < 159075 then goto T28_1;
+ else goto N28_2;
+
+T28_1:
+ response = -0.00305707;
+ goto D28;
+
+N28_2:
+ if TOPSTORY < 0.05 then goto N28_3;
+ else goto N28_5;
+
+N28_3:
+ if SPORTS < 0.73 then goto T28_2;
+ else goto N28_4;
+
+T28_2:
+ response = -0.000935589;
+ goto D28;
+
+N28_4:
+ if AVG_RANK < 5.635 then goto T28_3;
+ else goto T28_4;
+
+T28_3:
+ response = -0.00405106;
+ goto D28;
+
+T28_4:
+ response = 0.0119584;
+ goto D28;
+
+N28_5:
+ if LW_DAY_HITS < 0.5 then goto T28_5;
+ else goto N28_6;
+
+T28_5:
+ response = 0.0045483;
+ goto D28;
+
+N28_6:
+ if PREV_DAY_HITS < 30 then goto T28_6;
+ else goto T28_7;
+
+T28_6:
+ response = -0.00540909;
+ goto D28;
+
+T28_7:
+ response = 0.00895866;
+ goto D28;
+
+D28:
+
+tnscore = tnscore + response;
+
+/* Tree 30 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N29_1:
+ if MAX_SCORE < 507014 then goto N29_2;
+ else goto N29_4;
+
+N29_2:
+ if AVG_RANK < 6.775 then goto T29_1;
+ else goto N29_3;
+
+T29_1:
+ response = -0.00328147;
+ goto D29;
+
+N29_3:
+ if MAX_MIN_SCORE < 150474 then goto T29_2;
+ else goto T29_3;
+
+T29_2:
+ response = 0.000393348;
+ goto D29;
+
+T29_3:
+ response = -0.00536951;
+ goto D29;
+
+N29_4:
+ if DAY_PD_HITS_RATIO < 7.885 then goto N29_5;
+ else goto T29_7;
+
+N29_5:
+ if ENTERTAINMENT < 0.05 then goto N29_6;
+ else goto T29_6;
+
+N29_6:
+ if MAX_SCORE < 516938 then goto T29_4;
+ else goto T29_5;
+
+T29_4:
+ response = 0.0171772;
+ goto D29;
+
+T29_5:
+ response = 0.00382646;
+ goto D29;
+
+T29_6:
+ response = -0.00447429;
+ goto D29;
+
+T29_7:
+ response = 0.0153178;
+ goto D29;
+
+D29:
+
+tnscore = tnscore + response;
+
+/* Tree 31 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N30_1:
+ if DAY_WEEK_AVG_RATIO < 5.905 then goto N30_2;
+ else goto T30_7;
+
+N30_2:
+ if NATIONALNEWS < 0.105 then goto T30_1;
+ else goto N30_3;
+
+T30_1:
+ response = -0.000832529;
+ goto D30;
+
+N30_3:
+ if MAX_MIN_RANK < 7 then goto N30_4;
+ else goto N30_5;
+
+N30_4:
+ if NATIONALNEWS < 0.13 then goto T30_2;
+ else goto T30_3;
+
+T30_2:
+ response = 0.0108634;
+ goto D30;
+
+T30_3:
+ response = 0.000313874;
+ goto D30;
+
+N30_5:
+ if AVG_SCORE < 231880 then goto T30_4;
+ else goto N30_6;
+
+T30_4:
+ response = -0.00104106;
+ goto D30;
+
+N30_6:
+ if PREV_DAY_HITS < 5.5 then goto T30_5;
+ else goto T30_6;
+
+T30_5:
+ response = 0.0263191;
+ goto D30;
+
+T30_6:
+ response = 0.00601508;
+ goto D30;
+
+T30_7:
+ response = 0.0149012;
+ goto D30;
+
+D30:
+
+tnscore = tnscore + response;
+
+/* Tree 32 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N31_1:
+ if PREV_DAY_HITS < 59.5 then goto N31_2;
+ else goto T31_7;
+
+N31_2:
+ if NATIONALNEWS < 0.05 then goto N31_3;
+ else goto N31_6;
+
+N31_3:
+ if DAY_PD_HITS_DERIV < -6.5 then goto N31_4;
+ else goto T31_4;
+
+N31_4:
+ if DAY_WEEK_AVG_RATIO < 1.285 then goto N31_5;
+ else goto T31_3;
+
+N31_5:
+ if MAX_MIN_SCORE < 160894 then goto T31_1;
+ else goto T31_2;
+
+T31_1:
+ response = 0.00449479;
+ goto D31;
+
+T31_2:
+ response = -0.00886993;
+ goto D31;
+
+T31_3:
+ response = 0.0153285;
+ goto D31;
+
+T31_4:
+ response = -0.00178263;
+ goto D31;
+
+N31_6:
+ if POLITICS < 0.05 then goto T31_5;
+ else goto T31_6;
+
+T31_5:
+ response = 0.00198329;
+ goto D31;
+
+T31_6:
+ response = -0.00586162;
+ goto D31;
+
+T31_7:
+ response = 0.00935161;
+ goto D31;
+
+D31:
+
+tnscore = tnscore + response;
+
+/* Tree 33 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N32_1:
+ if MIN_SCORE < 132626 then goto T32_1;
+ else goto N32_2;
+
+T32_1:
+ response = -0.00387076;
+ goto D32;
+
+N32_2:
+ if WEEKAVG < 0.93 then goto T32_2;
+ else goto N32_3;
+
+T32_2:
+ response = -0.00135437;
+ goto D32;
+
+N32_3:
+ if MAX_MIN_SCORE < 46712 then goto T32_3;
+ else goto N32_4;
+
+T32_3:
+ response = 0.00347721;
+ goto D32;
+
+N32_4:
+ if AVG_SCORE < 404994 then goto N32_5;
+ else goto T32_7;
+
+N32_5:
+ if MIN_SCORE < 241776 then goto N32_6;
+ else goto T32_6;
+
+N32_6:
+ if SPORTS < 0.79 then goto T32_4;
+ else goto T32_5;
+
+T32_4:
+ response = -0.00180685;
+ goto D32;
+
+T32_5:
+ response = 0.0168028;
+ goto D32;
+
+T32_6:
+ response = -0.00853053;
+ goto D32;
+
+T32_7:
+ response = 0.00228774;
+ goto D32;
+
+D32:
+
+tnscore = tnscore + response;
+
+/* Tree 34 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N33_1:
+ if LW_DAY_HITS < 0.5 then goto N33_2;
+ else goto N33_6;
+
+N33_2:
+ if AVG_SCORE < 159292 then goto T33_1;
+ else goto N33_3;
+
+T33_1:
+ response = -0.00244777;
+ goto D33;
+
+N33_3:
+ if LIFESTYLE < 0.155 then goto N33_4;
+ else goto T33_5;
+
+N33_4:
+ if DAY_PD_HITS_DERIV < -3.5 then goto T33_2;
+ else goto N33_5;
+
+T33_2:
+ response = 0.00412328;
+ goto D33;
+
+N33_5:
+ if TOPSTORY < 0.39 then goto T33_3;
+ else goto T33_4;
+
+T33_3:
+ response = 0.000416163;
+ goto D33;
+
+T33_4:
+ response = 0.0104883;
+ goto D33;
+
+T33_5:
+ response = -0.00615481;
+ goto D33;
+
+N33_6:
+ if FOUR_HOUR_WF < 0.158004 then goto T33_6;
+ else goto T33_7;
+
+T33_6:
+ response = -0.00212154;
+ goto D33;
+
+T33_7:
+ response = -0.0150848;
+ goto D33;
+
+D33:
+
+tnscore = tnscore + response;
+
+/* Tree 35 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N34_1:
+ if DAY_PD_HITS_RATIO < 43 then goto N34_2;
+ else goto T34_7;
+
+N34_2:
+ if LW_DAY_HITS < 0.5 then goto N34_3;
+ else goto N34_6;
+
+N34_3:
+ if DAY_PD_HITS_DERIV < -5.5 then goto N34_4;
+ else goto T34_4;
+
+N34_4:
+ if AVG_RANK < 9.265 then goto N34_5;
+ else goto T34_3;
+
+N34_5:
+ if TOPSTORY < 0.05 then goto T34_1;
+ else goto T34_2;
+
+T34_1:
+ response = -0.00313951;
+ goto D34;
+
+T34_2:
+ response = 0.00432897;
+ goto D34;
+
+T34_3:
+ response = 0.0117073;
+ goto D34;
+
+T34_4:
+ response = -0.000692;
+ goto D34;
+
+N34_6:
+ if MAX_MIN_SCORE < 120702 then goto T34_5;
+ else goto T34_6;
+
+T34_5:
+ response = -0.00138028;
+ goto D34;
+
+T34_6:
+ response = -0.00945152;
+ goto D34;
+
+T34_7:
+ response = 0.0160989;
+ goto D34;
+
+D34:
+
+tnscore = tnscore + response;
+
+/* Tree 36 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N35_1:
+ if LW_DAY_HITS < 0.5 then goto N35_2;
+ else goto N35_5;
+
+N35_2:
+ if MAX_SCORE < 507008 then goto N35_3;
+ else goto T35_4;
+
+N35_3:
+ if MAX_SCORE < 339502 then goto N35_4;
+ else goto T35_3;
+
+N35_4:
+ if ENTERTAINMENT < 0.415 then goto T35_1;
+ else goto T35_2;
+
+T35_1:
+ response = 0.00030327;
+ goto D35;
+
+T35_2:
+ response = 0.00803638;
+ goto D35;
+
+T35_3:
+ response = -0.0034615;
+ goto D35;
+
+T35_4:
+ response = 0.00344157;
+ goto D35;
+
+N35_5:
+ if FOUR_HOUR_WF < 0.101282 then goto N35_6;
+ else goto T35_7;
+
+N35_6:
+ if DAY_WEEK_AVG_DERIV < 17.5 then goto T35_5;
+ else goto T35_6;
+
+T35_5:
+ response = -0.00192815;
+ goto D35;
+
+T35_6:
+ response = 0.00914257;
+ goto D35;
+
+T35_7:
+ response = -0.0127954;
+ goto D35;
+
+D35:
+
+tnscore = tnscore + response;
+
+/* Tree 37 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N36_1:
+ if DAY_WEEK_AVG_RATIO < 0.255 then goto T36_1;
+ else goto N36_2;
+
+T36_1:
+ response = 0.0131801;
+ goto D36;
+
+N36_2:
+ if MAX_MIN_SCORE < 312687 then goto N36_3;
+ else goto T36_7;
+
+N36_3:
+ if MAX_MIN_SCORE < 296243 then goto N36_4;
+ else goto T36_6;
+
+N36_4:
+ if NATIONALNEWS < 0.105 then goto T36_2;
+ else goto N36_5;
+
+T36_2:
+ response = -0.000609993;
+ goto D36;
+
+N36_5:
+ if MAX_MIN_RANK < 7 then goto T36_3;
+ else goto N36_6;
+
+T36_3:
+ response = 0.000898274;
+ goto D36;
+
+N36_6:
+ if MAX_SCORE < 234190 then goto T36_4;
+ else goto T36_5;
+
+T36_4:
+ response = 0.000967677;
+ goto D36;
+
+T36_5:
+ response = 0.0163215;
+ goto D36;
+
+T36_6:
+ response = 0.0161496;
+ goto D36;
+
+T36_7:
+ response = -0.0112906;
+ goto D36;
+
+D36:
+
+tnscore = tnscore + response;
+
+/* Tree 38 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N37_1:
+ if TOPSTORY < 0.39 then goto N37_2;
+ else goto T37_7;
+
+N37_2:
+ if MIN_SCORE < 220684 then goto T37_1;
+ else goto N37_3;
+
+T37_1:
+ response = -0.00143961;
+ goto D37;
+
+N37_3:
+ if ISABSTRACT_AVG < 0.05 then goto N37_4;
+ else goto T37_6;
+
+N37_4:
+ if AVG_SCORE < 277398 then goto N37_5;
+ else goto N37_6;
+
+N37_5:
+ if DAY_LW_DAY_HITS_RATIO < 1.75 then goto T37_2;
+ else goto T37_3;
+
+T37_2:
+ response = -0.000951177;
+ goto D37;
+
+T37_3:
+ response = 0.00924989;
+ goto D37;
+
+N37_6:
+ if BUSINESS < 0.39 then goto T37_4;
+ else goto T37_5;
+
+T37_4:
+ response = -0.00128495;
+ goto D37;
+
+T37_5:
+ response = 0.0103605;
+ goto D37;
+
+T37_6:
+ response = -0.00106493;
+ goto D37;
+
+T37_7:
+ response = 0.00610485;
+ goto D37;
+
+D37:
+
+tnscore = tnscore + response;
+
+/* Tree 39 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N38_1:
+ if AVG_SCORE < 500951 then goto N38_2;
+ else goto N38_5;
+
+N38_2:
+ if DAY_HITS < 42.5 then goto T38_1;
+ else goto N38_3;
+
+T38_1:
+ response = -0.000676917;
+ goto D38;
+
+N38_3:
+ if INTLNEWS < 0.45 then goto N38_4;
+ else goto T38_4;
+
+N38_4:
+ if ISTITLE_AVG < 0.05 then goto T38_2;
+ else goto T38_3;
+
+T38_2:
+ response = -0.0122069;
+ goto D38;
+
+T38_3:
+ response = 0.00752268;
+ goto D38;
+
+T38_4:
+ response = 0.0144731;
+ goto D38;
+
+N38_5:
+ if MIN_SCORE < 362007 then goto T38_5;
+ else goto N38_6;
+
+T38_5:
+ response = 0.0202143;
+ goto D38;
+
+N38_6:
+ if PUB_TODAY_AVG < 0.05 then goto T38_6;
+ else goto T38_7;
+
+T38_6:
+ response = -0.0107444;
+ goto D38;
+
+T38_7:
+ response = 0.00512166;
+ goto D38;
+
+D38:
+
+tnscore = tnscore + response;
+
+/* Tree 40 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N39_1:
+ if WEEKAVG < 1.07 then goto T39_1;
+ else goto N39_2;
+
+T39_1:
+ response = -0.00167316;
+ goto D39;
+
+N39_2:
+ if DAY_WEEK_AVG_RATIO < 6.14 then goto N39_3;
+ else goto T39_7;
+
+N39_3:
+ if BUSINESS < 0.05 then goto N39_4;
+ else goto T39_6;
+
+N39_4:
+ if PREV_DAY_HITS < 59.5 then goto N39_5;
+ else goto T39_5;
+
+N39_5:
+ if AVG_RANK < 9.225 then goto N39_6;
+ else goto T39_4;
+
+N39_6:
+ if DAY_HITS < 30.5 then goto T39_2;
+ else goto T39_3;
+
+T39_2:
+ response = 0.00155636;
+ goto D39;
+
+T39_3:
+ response = -0.0130867;
+ goto D39;
+
+T39_4:
+ response = 0.0105919;
+ goto D39;
+
+T39_5:
+ response = 0.0218533;
+ goto D39;
+
+T39_6:
+ response = -0.000802313;
+ goto D39;
+
+T39_7:
+ response = 0.0145597;
+ goto D39;
+
+D39:
+
+tnscore = tnscore + response;
+
+/* Tree 41 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N40_1:
+ if LIFESTYLE < 0.05 then goto N40_2;
+ else goto T40_7;
+
+N40_2:
+ if MAX_MIN_RANK < 7 then goto T40_1;
+ else goto N40_3;
+
+T40_1:
+ response = 0.000220029;
+ goto D40;
+
+N40_3:
+ if ISABSTRACT_AVG < 0.115 then goto N40_4;
+ else goto N40_6;
+
+N40_4:
+ if TWO_DAY_WF < 0.580973 then goto T40_2;
+ else goto N40_5;
+
+T40_2:
+ response = -0.000179904;
+ goto D40;
+
+N40_5:
+ if EIGHT_HOUR_WF < 0.0125776 then goto T40_3;
+ else goto T40_4;
+
+T40_3:
+ response = 0.0222343;
+ goto D40;
+
+T40_4:
+ response = 0.00659678;
+ goto D40;
+
+N40_6:
+ if DAY_WEEK_AVG_DERIV < -0.93 then goto T40_5;
+ else goto T40_6;
+
+T40_5:
+ response = 0.0146586;
+ goto D40;
+
+T40_6:
+ response = -0.0018679;
+ goto D40;
+
+T40_7:
+ response = -0.0043182;
+ goto D40;
+
+D40:
+
+tnscore = tnscore + response;
+
+/* Tree 42 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N41_1:
+ if AVG_SCORE < 500853 then goto N41_2;
+ else goto N41_4;
+
+N41_2:
+ if MIN_SCORE < 435034 then goto N41_3;
+ else goto T41_3;
+
+N41_3:
+ if DAY_WEEK_AVG_RATIO < 4.15 then goto T41_1;
+ else goto T41_2;
+
+T41_1:
+ response = -0.000600797;
+ goto D41;
+
+T41_2:
+ response = 0.00413062;
+ goto D41;
+
+T41_3:
+ response = -0.0152667;
+ goto D41;
+
+N41_4:
+ if MAX_SCORE < 660352 then goto N41_5;
+ else goto T41_7;
+
+N41_5:
+ if TWO_DAY_WF < 0.744565 then goto T41_4;
+ else goto N41_6;
+
+T41_4:
+ response = 0.0172406;
+ goto D41;
+
+N41_6:
+ if MAX_SCORE < 596568 then goto T41_5;
+ else goto T41_6;
+
+T41_5:
+ response = -0.0069398;
+ goto D41;
+
+T41_6:
+ response = 0.0163258;
+ goto D41;
+
+T41_7:
+ response = -0.00228486;
+ goto D41;
+
+D41:
+
+tnscore = tnscore + response;
+
+/* Tree 43 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N42_1:
+ if SPORTS < 0.685 then goto T42_1;
+ else goto N42_2;
+
+T42_1:
+ response = -0.000328185;
+ goto D42;
+
+N42_2:
+ if AVG_SCORE < 446734 then goto N42_3;
+ else goto T42_7;
+
+N42_3:
+ if MAX_SCORE < 500264 then goto N42_4;
+ else goto T42_6;
+
+N42_4:
+ if MAX_SCORE < 450904 then goto N42_5;
+ else goto T42_5;
+
+N42_5:
+ if MIN_SCORE < 254311 then goto N42_6;
+ else goto T42_4;
+
+N42_6:
+ if WEEKAVG < 0.785 then goto T42_2;
+ else goto T42_3;
+
+T42_2:
+ response = -0.00158584;
+ goto D42;
+
+T42_3:
+ response = 0.0075942;
+ goto D42;
+
+T42_4:
+ response = -0.0103296;
+ goto D42;
+
+T42_5:
+ response = 0.0212781;
+ goto D42;
+
+T42_6:
+ response = -0.0121229;
+ goto D42;
+
+T42_7:
+ response = 0.0182724;
+ goto D42;
+
+D42:
+
+tnscore = tnscore + response;
+
+/* Tree 44 of 77 */
+/* N terminal nodes = 7, Depth = 4 */
+
+
+N43_1:
+ if TOPSTORY < 0.39 then goto N43_2;
+ else goto N43_4;
+
+N43_2:
+ if PREV_DAY_HITS < 59.5 then goto T43_1;
+ else goto N43_3;
+
+T43_1:
+ response = -5.72966e-05;
+ goto D43;
+
+N43_3:
+ if NATIONALNEWS < 0.05 then goto T43_2;
+ else goto T43_3;
+
+T43_2:
+ response = 0.0144398;
+ goto D43;
+
+T43_3:
+ response = -0.00316385;
+ goto D43;
+
+N43_4:
+ if FOUR_HOUR_WF < 0.0201025 then goto N43_5;
+ else goto N43_6;
+
+N43_5:
+ if TWELVE_HOUR_WF < 0.163978 then goto T43_4;
+ else goto T43_5;
+
+T43_4:
+ response = 0.00366064;
+ goto D43;
+
+T43_5:
+ response = 0.0227011;
+ goto D43;
+
+N43_6:
+ if ONE_DAY_WF < 0.658333 then goto T43_6;
+ else goto T43_7;
+
+T43_6:
+ response = -0.0114776;
+ goto D43;
+
+T43_7:
+ response = 0.00740238;
+ goto D43;
+
+D43:
+
+tnscore = tnscore + response;
+
+/* Tree 45 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N44_1:
+ if TOPSTORY < 0.585 then goto N44_2;
+ else goto T44_7;
+
+N44_2:
+ if ENTERTAINMENT < 0.05 then goto N44_3;
+ else goto N44_6;
+
+N44_3:
+ if DAY_WEEK_AVG_DERIV < 43.145 then goto T44_1;
+ else goto N44_4;
+
+T44_1:
+ response = 0.000486446;
+ goto D44;
+
+N44_4:
+ if DAY_HITS < 78.5 then goto T44_2;
+ else goto N44_5;
+
+T44_2:
+ response = 0.0210513;
+ goto D44;
+
+N44_5:
+ if SUPERDUPER_AVG < 0.65 then goto T44_3;
+ else goto T44_4;
+
+T44_3:
+ response = -0.00387695;
+ goto D44;
+
+T44_4:
+ response = 0.013128;
+ goto D44;
+
+N44_6:
+ if AVG_RANK < 5.465 then goto T44_5;
+ else goto T44_6;
+
+T44_5:
+ response = 0.00674178;
+ goto D44;
+
+T44_6:
+ response = -0.00228932;
+ goto D44;
+
+T44_7:
+ response = -0.0121137;
+ goto D44;
+
+D44:
+
+tnscore = tnscore + response;
+
+/* Tree 46 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N45_1:
+ if TWO_DAY_WF < 0.488162 then goto T45_1;
+ else goto N45_2;
+
+T45_1:
+ response = -0.00237763;
+ goto D45;
+
+N45_2:
+ if WEEKAVG < 1.215 then goto T45_2;
+ else goto N45_3;
+
+T45_2:
+ response = -0.000773205;
+ goto D45;
+
+N45_3:
+ if EIGHT_HOUR_WF < 0.0444065 then goto N45_4;
+ else goto N45_5;
+
+N45_4:
+ if DAY_HITS < 19.5 then goto T45_3;
+ else goto T45_4;
+
+T45_3:
+ response = 0.00278939;
+ goto D45;
+
+T45_4:
+ response = 0.0115461;
+ goto D45;
+
+N45_5:
+ if NATIONALNEWS < 0.155 then goto T45_5;
+ else goto N45_6;
+
+T45_5:
+ response = -0.00189416;
+ goto D45;
+
+N45_6:
+ if SPORTS < 0.105 then goto T45_6;
+ else goto T45_7;
+
+T45_6:
+ response = 0.0072781;
+ goto D45;
+
+T45_7:
+ response = -0.00903706;
+ goto D45;
+
+D45:
+
+tnscore = tnscore + response;
+
+/* Tree 47 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N46_1:
+ if SPORTS < 0.47 then goto N46_2;
+ else goto N46_3;
+
+N46_2:
+ if SPORTS < 0.105 then goto T46_1;
+ else goto T46_2;
+
+T46_1:
+ response = -4.47312e-05;
+ goto D46;
+
+T46_2:
+ response = -0.00348966;
+ goto D46;
+
+N46_3:
+ if MAX_RANK < 9 then goto T46_3;
+ else goto N46_4;
+
+T46_3:
+ response = -0.0016478;
+ goto D46;
+
+N46_4:
+ if EIGHT_HOUR_WF < 0.0459777 then goto N46_5;
+ else goto T46_7;
+
+N46_5:
+ if TWO_DAY_WF < 0.539394 then goto N46_6;
+ else goto T46_6;
+
+N46_6:
+ if WEEKAVG < 1.07 then goto T46_4;
+ else goto T46_5;
+
+T46_4:
+ response = -0.00892999;
+ goto D46;
+
+T46_5:
+ response = 0.00865732;
+ goto D46;
+
+T46_6:
+ response = 0.0121605;
+ goto D46;
+
+T46_7:
+ response = 0.00131641;
+ goto D46;
+
+D46:
+
+tnscore = tnscore + response;
+
+/* Tree 48 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N47_1:
+ if INTLNEWS < 0.725 then goto N47_2;
+ else goto T47_7;
+
+N47_2:
+ if DAY_LW_DAY_HITS_RATIO < 124.5 then goto N47_3;
+ else goto T47_6;
+
+N47_3:
+ if AVG_SCORE < 628258 then goto N47_4;
+ else goto T47_5;
+
+N47_4:
+ if DAY_HITS < 55.5 then goto T47_1;
+ else goto N47_5;
+
+T47_1:
+ response = -0.000193067;
+ goto D47;
+
+N47_5:
+ if TWELVE_HOUR_WF < 0.117879 then goto T47_2;
+ else goto N47_6;
+
+T47_2:
+ response = 0.0187097;
+ goto D47;
+
+N47_6:
+ if TWELVE_HOUR_WF < 0.350814 then goto T47_3;
+ else goto T47_4;
+
+T47_3:
+ response = -0.00734127;
+ goto D47;
+
+T47_4:
+ response = 0.0131678;
+ goto D47;
+
+T47_5:
+ response = 0.00987754;
+ goto D47;
+
+T47_6:
+ response = -0.0156063;
+ goto D47;
+
+T47_7:
+ response = 0.00929408;
+ goto D47;
+
+D47:
+
+tnscore = tnscore + response;
+
+/* Tree 49 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N48_1:
+ if DAY_WEEK_AVG_DERIV < -3.36 then goto T48_1;
+ else goto N48_2;
+
+T48_1:
+ response = -0.00956624;
+ goto D48;
+
+N48_2:
+ if TOPSTORY < 0.39 then goto T48_2;
+ else goto N48_3;
+
+T48_2:
+ response = -0.000216336;
+ goto D48;
+
+N48_3:
+ if AVG_RANK < 9.55 then goto N48_4;
+ else goto T48_7;
+
+N48_4:
+ if AVG_RANK < 8.735 then goto N48_5;
+ else goto T48_6;
+
+N48_5:
+ if TWO_DAY_WF < 0.531551 then goto T48_3;
+ else goto N48_6;
+
+T48_3:
+ response = -0.00490451;
+ goto D48;
+
+N48_6:
+ if FOUR_HOUR_WF < 0.0142857 then goto T48_4;
+ else goto T48_5;
+
+T48_4:
+ response = 0.0229256;
+ goto D48;
+
+T48_5:
+ response = 0.000312813;
+ goto D48;
+
+T48_6:
+ response = -0.00418916;
+ goto D48;
+
+T48_7:
+ response = 0.0189348;
+ goto D48;
+
+D48:
+
+tnscore = tnscore + response;
+
+/* Tree 50 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N49_1:
+ if AVG_SCORE < 625182 then goto N49_2;
+ else goto T49_7;
+
+N49_2:
+ if DAY_LW_DAY_HITS_DERIV < 55.5 then goto N49_3;
+ else goto T49_6;
+
+N49_3:
+ if DAY_WEEK_AVG_DERIV < 19.36 then goto N49_4;
+ else goto N49_5;
+
+N49_4:
+ if DAY_WEEK_AVG_DERIV < 13.5 then goto T49_1;
+ else goto T49_2;
+
+T49_1:
+ response = -0.000195177;
+ goto D49;
+
+T49_2:
+ response = 0.00629794;
+ goto D49;
+
+N49_5:
+ if ISTITLE_AVG < 0.05 then goto T49_3;
+ else goto N49_6;
+
+T49_3:
+ response = -0.0149349;
+ goto D49;
+
+N49_6:
+ if TWELVE_HOUR_WF < 0.383204 then goto T49_4;
+ else goto T49_5;
+
+T49_4:
+ response = -0.00516327;
+ goto D49;
+
+T49_5:
+ response = 0.00921651;
+ goto D49;
+
+T49_6:
+ response = 0.00647785;
+ goto D49;
+
+T49_7:
+ response = 0.0102664;
+ goto D49;
+
+D49:
+
+tnscore = tnscore + response;
+
+/* Tree 51 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N50_1:
+ if DAY_WEEK_AVG_RATIO < 0.385 then goto T50_1;
+ else goto N50_2;
+
+T50_1:
+ response = -0.00586045;
+ goto D50;
+
+N50_2:
+ if LIFESTYLE < 0.155 then goto N50_3;
+ else goto T50_7;
+
+N50_3:
+ if MAX_MIN_SCORE < 16288 then goto T50_2;
+ else goto N50_4;
+
+T50_2:
+ response = -0.0016458;
+ goto D50;
+
+N50_4:
+ if MAX_MIN_SCORE < 45875.2 then goto N50_5;
+ else goto T50_6;
+
+N50_5:
+ if MAX_MIN_SCORE < 45537.5 then goto N50_6;
+ else goto T50_5;
+
+N50_6:
+ if AVG_SCORE < 229848 then goto T50_3;
+ else goto T50_4;
+
+T50_3:
+ response = -0.000297351;
+ goto D50;
+
+T50_4:
+ response = 0.00475294;
+ goto D50;
+
+T50_5:
+ response = 0.0181171;
+ goto D50;
+
+T50_6:
+ response = -0.000574173;
+ goto D50;
+
+T50_7:
+ response = -0.00499598;
+ goto D50;
+
+D50:
+
+tnscore = tnscore + response;
+
+/* Tree 52 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N51_1:
+ if SPORTS < 0.815 then goto N51_2;
+ else goto N51_5;
+
+N51_2:
+ if TWO_DAY_WF < 0.460499 then goto N51_3;
+ else goto T51_4;
+
+N51_3:
+ if INTLNEWS < 0.365 then goto T51_1;
+ else goto N51_4;
+
+T51_1:
+ response = -0.00422695;
+ goto D51;
+
+N51_4:
+ if DAY_HITS < 4.5 then goto T51_2;
+ else goto T51_3;
+
+T51_2:
+ response = 0.011483;
+ goto D51;
+
+T51_3:
+ response = -0.00407438;
+ goto D51;
+
+T51_4:
+ response = -4.78506e-06;
+ goto D51;
+
+N51_5:
+ if DAY_HITS < 1.5 then goto T51_5;
+ else goto N51_6;
+
+T51_5:
+ response = 0.00980267;
+ goto D51;
+
+N51_6:
+ if EIGHT_HOUR_WF < 0.301948 then goto T51_6;
+ else goto T51_7;
+
+T51_6:
+ response = -0.00316423;
+ goto D51;
+
+T51_7:
+ response = 0.0125528;
+ goto D51;
+
+D51:
+
+tnscore = tnscore + response;
+
+/* Tree 53 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N52_1:
+ if PREV_DAY_HITS < 0.5 then goto T52_1;
+ else goto N52_2;
+
+T52_1:
+ response = -0.00190281;
+ goto D52;
+
+N52_2:
+ if LAW < 0.05 then goto N52_3;
+ else goto T52_7;
+
+N52_3:
+ if PUB_TODAY_AVG < 0.05 then goto T52_2;
+ else goto N52_4;
+
+T52_2:
+ response = -0.00103893;
+ goto D52;
+
+N52_4:
+ if EIGHT_HOUR_WF < 0.0492709 then goto N52_5;
+ else goto N52_6;
+
+N52_5:
+ if ISABSTRACT_AVG < 0.05 then goto T52_3;
+ else goto T52_4;
+
+T52_3:
+ response = 0.0053372;
+ goto D52;
+
+T52_4:
+ response = 0.000962476;
+ goto D52;
+
+N52_6:
+ if NATIONALNEWS < 0.13 then goto T52_5;
+ else goto T52_6;
+
+T52_5:
+ response = -0.00161984;
+ goto D52;
+
+T52_6:
+ response = 0.005538;
+ goto D52;
+
+T52_7:
+ response = -0.00741284;
+ goto D52;
+
+D52:
+
+tnscore = tnscore + response;
+
+/* Tree 54 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N53_1:
+ if DAY_LW_DAY_HITS_RATIO < 0.69 then goto N53_2;
+ else goto N53_3;
+
+N53_2:
+ if AVG_SCORE < 229191 then goto T53_1;
+ else goto T53_2;
+
+T53_1:
+ response = -0.00217119;
+ goto D53;
+
+T53_2:
+ response = -0.0135186;
+ goto D53;
+
+N53_3:
+ if DAY_LW_DAY_HITS_RATIO < 125.5 then goto N53_4;
+ else goto T53_7;
+
+N53_4:
+ if ISABSTRACT_AVG < 0.05 then goto N53_5;
+ else goto T53_6;
+
+N53_5:
+ if AVG_SCORE < 218595 then goto T53_3;
+ else goto N53_6;
+
+T53_3:
+ response = -0.00358076;
+ goto D53;
+
+N53_6:
+ if BUSINESS < 0.685 then goto T53_4;
+ else goto T53_5;
+
+T53_4:
+ response = 0.00139556;
+ goto D53;
+
+T53_5:
+ response = 0.0140572;
+ goto D53;
+
+T53_6:
+ response = -0.000571652;
+ goto D53;
+
+T53_7:
+ response = -0.0128262;
+ goto D53;
+
+D53:
+
+tnscore = tnscore + response;
+
+/* Tree 55 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N54_1:
+ if WEEKAVG < 1.5 then goto N54_2;
+ else goto N54_5;
+
+N54_2:
+ if DAY_WEEK_AVG_DERIV < 3.5 then goto N54_3;
+ else goto T54_4;
+
+N54_3:
+ if SUPERDUPER_AVG < 0.355 then goto N54_4;
+ else goto T54_3;
+
+N54_4:
+ if LW_DAY_HITS < 0.5 then goto T54_1;
+ else goto T54_2;
+
+T54_1:
+ response = 0.000521639;
+ goto D54;
+
+T54_2:
+ response = -0.00451687;
+ goto D54;
+
+T54_3:
+ response = -0.00637359;
+ goto D54;
+
+T54_4:
+ response = -0.00562351;
+ goto D54;
+
+N54_5:
+ if TWO_DAY_WF < 0.829824 then goto T54_5;
+ else goto N54_6;
+
+T54_5:
+ response = 2.82632e-05;
+ goto D54;
+
+N54_6:
+ if TWELVE_HOUR_WF < 0.940588 then goto T54_6;
+ else goto T54_7;
+
+T54_6:
+ response = 0.00527366;
+ goto D54;
+
+T54_7:
+ response = -0.011917;
+ goto D54;
+
+D54:
+
+tnscore = tnscore + response;
+
+/* Tree 56 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N55_1:
+ if TWELVE_HOUR_WF < 0.742581 then goto N55_2;
+ else goto N55_5;
+
+N55_2:
+ if TOPSTORY < 0.355 then goto T55_1;
+ else goto N55_3;
+
+T55_1:
+ response = -0.00012321;
+ goto D55;
+
+N55_3:
+ if FOUR_HOUR_WF < 0.026084 then goto N55_4;
+ else goto T55_4;
+
+N55_4:
+ if MIN_SCORE < 356232 then goto T55_2;
+ else goto T55_3;
+
+T55_2:
+ response = 0.0111342;
+ goto D55;
+
+T55_3:
+ response = -0.00292376;
+ goto D55;
+
+T55_4:
+ response = -0.00479873;
+ goto D55;
+
+N55_5:
+ if ISABSTRACT_AVG < 0.185 then goto N55_6;
+ else goto T55_7;
+
+N55_6:
+ if MIN_RANK < 5 then goto T55_5;
+ else goto T55_6;
+
+T55_5:
+ response = -0.000125896;
+ goto D55;
+
+T55_6:
+ response = -0.0115332;
+ goto D55;
+
+T55_7:
+ response = -0.000763903;
+ goto D55;
+
+D55:
+
+tnscore = tnscore + response;
+
+/* Tree 57 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N56_1:
+ if NATIONALNEWS < 0.27 then goto T56_1;
+ else goto N56_2;
+
+T56_1:
+ response = -0.000328182;
+ goto D56;
+
+N56_2:
+ if ISABSTRACT_AVG < 0.05 then goto N56_3;
+ else goto T56_7;
+
+N56_3:
+ if AVG_SCORE < 225658 then goto T56_2;
+ else goto N56_4;
+
+T56_2:
+ response = -0.00305383;
+ goto D56;
+
+N56_4:
+ if MIN_SCORE < 231962 then goto T56_3;
+ else goto N56_5;
+
+T56_3:
+ response = 0.0181265;
+ goto D56;
+
+N56_5:
+ if MAX_MIN_SCORE < 33119.5 then goto T56_4;
+ else goto N56_6;
+
+T56_4:
+ response = -0.00486977;
+ goto D56;
+
+N56_6:
+ if WEEKAVG < 1.785 then goto T56_5;
+ else goto T56_6;
+
+T56_5:
+ response = 0.0229851;
+ goto D56;
+
+T56_6:
+ response = 0.00588037;
+ goto D56;
+
+T56_7:
+ response = -0.000667257;
+ goto D56;
+
+D56:
+
+tnscore = tnscore + response;
+
+/* Tree 58 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N57_1:
+ if MAX_MIN_SCORE < 312575 then goto N57_2;
+ else goto T57_7;
+
+N57_2:
+ if TWO_DAY_WF < 0.531754 then goto T57_1;
+ else goto N57_3;
+
+T57_1:
+ response = -0.00112552;
+ goto D57;
+
+N57_3:
+ if MAX_SCORE < 669432 then goto N57_4;
+ else goto T57_6;
+
+N57_4:
+ if BUSINESS < 0.05 then goto N57_5;
+ else goto N57_6;
+
+N57_5:
+ if PREV_DAY_HITS < 41.5 then goto T57_2;
+ else goto T57_3;
+
+T57_2:
+ response = 0.00191169;
+ goto D57;
+
+T57_3:
+ response = 0.0126963;
+ goto D57;
+
+N57_6:
+ if DAY_LW_DAY_HITS_RATIO < 37.5 then goto T57_4;
+ else goto T57_5;
+
+T57_4:
+ response = -0.00101754;
+ goto D57;
+
+T57_5:
+ response = 0.00846513;
+ goto D57;
+
+T57_6:
+ response = -0.00792694;
+ goto D57;
+
+T57_7:
+ response = -0.00834756;
+ goto D57;
+
+D57:
+
+tnscore = tnscore + response;
+
+/* Tree 59 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N58_1:
+ if PUB_TODAY_AVG < 0.05 then goto N58_2;
+ else goto N58_6;
+
+N58_2:
+ if SPORTS < 0.645 then goto N58_3;
+ else goto N58_5;
+
+N58_3:
+ if AVG_SCORE < 395268 then goto N58_4;
+ else goto T58_3;
+
+N58_4:
+ if MAX_SCORE < 460268 then goto T58_1;
+ else goto T58_2;
+
+T58_1:
+ response = -0.00227942;
+ goto D58;
+
+T58_2:
+ response = 0.00899341;
+ goto D58;
+
+T58_3:
+ response = -0.00823655;
+ goto D58;
+
+N58_5:
+ if MIN_RANK < 5 then goto T58_4;
+ else goto T58_5;
+
+T58_4:
+ response = -0.00122777;
+ goto D58;
+
+T58_5:
+ response = 0.0163908;
+ goto D58;
+
+N58_6:
+ if TWO_DAY_WF < 0.95119 then goto T58_6;
+ else goto T58_7;
+
+T58_6:
+ response = -0.00018789;
+ goto D58;
+
+T58_7:
+ response = 0.0021492;
+ goto D58;
+
+D58:
+
+tnscore = tnscore + response;
+
+/* Tree 60 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N59_1:
+ if DAY_WEEK_AVG_RATIO < 3.985 then goto T59_1;
+ else goto N59_2;
+
+T59_1:
+ response = -0.000226985;
+ goto D59;
+
+N59_2:
+ if DAY_WEEK_AVG_RATIO < 5.525 then goto N59_3;
+ else goto T59_7;
+
+N59_3:
+ if DAY_WEEK_AVG_RATIO < 4.95 then goto N59_4;
+ else goto T59_6;
+
+N59_4:
+ if AVG_SCORE < 373867 then goto N59_5;
+ else goto N59_6;
+
+N59_5:
+ if ENTERTAINMENT < 0.05 then goto T59_2;
+ else goto T59_3;
+
+T59_2:
+ response = 0.00254281;
+ goto D59;
+
+T59_3:
+ response = -0.0107653;
+ goto D59;
+
+N59_6:
+ if PREV_DAY_HITS < 3 then goto T59_4;
+ else goto T59_5;
+
+T59_4:
+ response = 0.0220568;
+ goto D59;
+
+T59_5:
+ response = 0.00220059;
+ goto D59;
+
+T59_6:
+ response = 0.0155791;
+ goto D59;
+
+T59_7:
+ response = -0.00294274;
+ goto D59;
+
+D59:
+
+tnscore = tnscore + response;
+
+/* Tree 61 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N60_1:
+ if EIGHT_HOUR_WF < 0.349537 then goto N60_2;
+ else goto N60_6;
+
+N60_2:
+ if LOCALNEWS < 0.315 then goto T60_1;
+ else goto N60_3;
+
+T60_1:
+ response = 0.00100629;
+ goto D60;
+
+N60_3:
+ if TWELVE_HOUR_WF < 0.324561 then goto N60_4;
+ else goto N60_5;
+
+N60_4:
+ if MAX_SCORE < 547636 then goto T60_2;
+ else goto T60_3;
+
+T60_2:
+ response = -0.00365503;
+ goto D60;
+
+T60_3:
+ response = 0.00844103;
+ goto D60;
+
+N60_5:
+ if MAX_SCORE < 249971 then goto T60_4;
+ else goto T60_5;
+
+T60_4:
+ response = -0.00217393;
+ goto D60;
+
+T60_5:
+ response = 0.020902;
+ goto D60;
+
+N60_6:
+ if ISTITLE_AVG < 0.05 then goto T60_6;
+ else goto T60_7;
+
+T60_6:
+ response = -0.00679711;
+ goto D60;
+
+T60_7:
+ response = -2.87657e-06;
+ goto D60;
+
+D60:
+
+tnscore = tnscore + response;
+
+/* Tree 62 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N61_1:
+ if DAY_WEEK_AVG_DERIV < 60.285 then goto N61_2;
+ else goto T61_7;
+
+N61_2:
+ if DAY_WEEK_AVG_DERIV < -4.07 then goto N61_3;
+ else goto N61_4;
+
+N61_3:
+ if DAY_WEEK_AVG_DERIV < -5.785 then goto T61_1;
+ else goto T61_2;
+
+T61_1:
+ response = 0.000889976;
+ goto D61;
+
+T61_2:
+ response = -0.016703;
+ goto D61;
+
+N61_4:
+ if DAY_WEEK_AVG_RATIO < 0.285 then goto T61_3;
+ else goto N61_5;
+
+T61_3:
+ response = 0.0108868;
+ goto D61;
+
+N61_5:
+ if DAY_WEEK_AVG_RATIO < 0.34 then goto T61_4;
+ else goto N61_6;
+
+T61_4:
+ response = -0.0115452;
+ goto D61;
+
+N61_6:
+ if DAY_PD_HITS_DERIV < -24.5 then goto T61_5;
+ else goto T61_6;
+
+T61_5:
+ response = 0.00709642;
+ goto D61;
+
+T61_6:
+ response = 5.85454e-05;
+ goto D61;
+
+T61_7:
+ response = 0.00724335;
+ goto D61;
+
+D61:
+
+tnscore = tnscore + response;
+
+/* Tree 63 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N62_1:
+ if WEEKAVG < 31.07 then goto N62_2;
+ else goto N62_6;
+
+N62_2:
+ if DAY_PD_HITS_DERIV < -8.5 then goto N62_3;
+ else goto T62_5;
+
+N62_3:
+ if ONE_DAY_WF < 0.209914 then goto N62_4;
+ else goto N62_5;
+
+N62_4:
+ if TWO_DAY_WF < 0.537088 then goto T62_1;
+ else goto T62_2;
+
+T62_1:
+ response = 0.00770858;
+ goto D62;
+
+T62_2:
+ response = -0.00166542;
+ goto D62;
+
+N62_5:
+ if FOUR_HOUR_WF < 0.00547982 then goto T62_3;
+ else goto T62_4;
+
+T62_3:
+ response = 0.0185133;
+ goto D62;
+
+T62_4:
+ response = 0.000303571;
+ goto D62;
+
+T62_5:
+ response = -0.00031074;
+ goto D62;
+
+N62_6:
+ if MIN_SCORE < 398722 then goto T62_6;
+ else goto T62_7;
+
+T62_6:
+ response = -0.0135078;
+ goto D62;
+
+T62_7:
+ response = 0.00113129;
+ goto D62;
+
+D62:
+
+tnscore = tnscore + response;
+
+/* Tree 64 of 77 */
+/* N terminal nodes = 7, Depth = 4 */
+
+
+N63_1:
+ if AVG_RANK < 9.53 then goto N63_2;
+ else goto N63_4;
+
+N63_2:
+ if INTLNEWS < 0.73 then goto N63_3;
+ else goto T63_3;
+
+N63_3:
+ if SUPERDUPER_AVG < 0.61 then goto T63_1;
+ else goto T63_2;
+
+T63_1:
+ response = -0.000409752;
+ goto D63;
+
+T63_2:
+ response = -0.00974984;
+ goto D63;
+
+T63_3:
+ response = 0.0133732;
+ goto D63;
+
+N63_4:
+ if SPORTS < 0.05 then goto N63_5;
+ else goto N63_6;
+
+N63_5:
+ if TOPSTORY < 0.315 then goto T63_4;
+ else goto T63_5;
+
+T63_4:
+ response = -0.00110238;
+ goto D63;
+
+T63_5:
+ response = 0.0155814;
+ goto D63;
+
+N63_6:
+ if AVG_SCORE < 258098 then goto T63_6;
+ else goto T63_7;
+
+T63_6:
+ response = 0.0025561;
+ goto D63;
+
+T63_7:
+ response = 0.0218633;
+ goto D63;
+
+D63:
+
+tnscore = tnscore + response;
+
+/* Tree 65 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N64_1:
+ if WEEKAVG < 4.215 then goto T64_1;
+ else goto N64_2;
+
+T64_1:
+ response = 0.00064852;
+ goto D64;
+
+N64_2:
+ if MAX_MIN_SCORE < 163619 then goto N64_3;
+ else goto N64_4;
+
+N64_3:
+ if TWO_DAY_WF < 0.463325 then goto T64_2;
+ else goto T64_3;
+
+T64_2:
+ response = -0.00769416;
+ goto D64;
+
+T64_3:
+ response = 0.00053643;
+ goto D64;
+
+N64_4:
+ if FOUR_HOUR_WF < 0.060024 then goto N64_5;
+ else goto T64_7;
+
+N64_5:
+ if TWELVE_HOUR_WF < 0.0127518 then goto T64_4;
+ else goto N64_6;
+
+T64_4:
+ response = -0.0161574;
+ goto D64;
+
+N64_6:
+ if LOCALNEWS < 0.05 then goto T64_5;
+ else goto T64_6;
+
+T64_5:
+ response = 0.00613049;
+ goto D64;
+
+T64_6:
+ response = -0.00855688;
+ goto D64;
+
+T64_7:
+ response = -0.0167968;
+ goto D64;
+
+D64:
+
+tnscore = tnscore + response;
+
+/* Tree 66 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N65_1:
+ if DAY_WEEK_AVG_RATIO < 0.255 then goto T65_1;
+ else goto N65_2;
+
+T65_1:
+ response = 0.00985333;
+ goto D65;
+
+N65_2:
+ if DAY_WEEK_AVG_RATIO < 0.335 then goto T65_2;
+ else goto N65_3;
+
+T65_2:
+ response = -0.00951258;
+ goto D65;
+
+N65_3:
+ if MAX_MIN_SCORE < 307460 then goto N65_4;
+ else goto T65_7;
+
+N65_4:
+ if MAX_SCORE < 517912 then goto N65_5;
+ else goto T65_6;
+
+N65_5:
+ if MAX_MIN_SCORE < 61870.2 then goto N65_6;
+ else goto T65_5;
+
+N65_6:
+ if DAY_PD_HITS_RATIO < 2.865 then goto T65_3;
+ else goto T65_4;
+
+T65_3:
+ response = 0.00152167;
+ goto D65;
+
+T65_4:
+ response = -0.0017641;
+ goto D65;
+
+T65_5:
+ response = -0.00258099;
+ goto D65;
+
+T65_6:
+ response = 0.00333836;
+ goto D65;
+
+T65_7:
+ response = -0.00739588;
+ goto D65;
+
+D65:
+
+tnscore = tnscore + response;
+
+/* Tree 67 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N66_1:
+ if WEEKAVG < 12.785 then goto N66_2;
+ else goto N66_3;
+
+N66_2:
+ if AVG_SCORE < 629228 then goto T66_1;
+ else goto T66_2;
+
+T66_1:
+ response = 3.70823e-06;
+ goto D66;
+
+T66_2:
+ response = 0.0113262;
+ goto D66;
+
+N66_3:
+ if INTLNEWS < 0.25 then goto T66_3;
+ else goto N66_4;
+
+T66_3:
+ response = -0.00996717;
+ goto D66;
+
+N66_4:
+ if EIGHT_HOUR_WF < 0.205476 then goto N66_5;
+ else goto T66_7;
+
+N66_5:
+ if MAX_MIN_SCORE < 105786 then goto N66_6;
+ else goto T66_6;
+
+N66_6:
+ if FOUR_HOUR_WF < 0.00848006 then goto T66_4;
+ else goto T66_5;
+
+T66_4:
+ response = 0.0151832;
+ goto D66;
+
+T66_5:
+ response = 0.000857643;
+ goto D66;
+
+T66_6:
+ response = -0.00763046;
+ goto D66;
+
+T66_7:
+ response = -0.015098;
+ goto D66;
+
+D66:
+
+tnscore = tnscore + response;
+
+/* Tree 68 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N67_1:
+ if SPORTS < 0.47 then goto T67_1;
+ else goto N67_2;
+
+T67_1:
+ response = -0.000209978;
+ goto D67;
+
+N67_2:
+ if DAY_PD_HITS_RATIO < 9.5 then goto N67_3;
+ else goto T67_7;
+
+N67_3:
+ if MAX_RANK < 9 then goto T67_2;
+ else goto N67_4;
+
+T67_2:
+ response = -0.00197712;
+ goto D67;
+
+N67_4:
+ if MAX_SCORE < 188360 then goto T67_3;
+ else goto N67_5;
+
+T67_3:
+ response = -0.000464352;
+ goto D67;
+
+N67_5:
+ if MAX_MIN_SCORE < 45863.5 then goto N67_6;
+ else goto T67_6;
+
+N67_6:
+ if DAY_PD_HITS_RATIO < 0.31 then goto T67_4;
+ else goto T67_5;
+
+T67_4:
+ response = 0.029664;
+ goto D67;
+
+T67_5:
+ response = 0.00977088;
+ goto D67;
+
+T67_6:
+ response = 0.00333723;
+ goto D67;
+
+T67_7:
+ response = -0.012958;
+ goto D67;
+
+D67:
+
+tnscore = tnscore + response;
+
+/* Tree 69 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N68_1:
+ if EIGHT_HOUR_WF < 0.397041 then goto N68_2;
+ else goto N68_6;
+
+N68_2:
+ if TWELVE_HOUR_WF < 0.327106 then goto T68_1;
+ else goto N68_3;
+
+T68_1:
+ response = 6.40846e-05;
+ goto D68;
+
+N68_3:
+ if MAX_MIN_SCORE < 129263 then goto T68_2;
+ else goto N68_4;
+
+T68_2:
+ response = 0.00114373;
+ goto D68;
+
+N68_4:
+ if SUPERDUPER_AVG < 0.105 then goto N68_5;
+ else goto T68_5;
+
+N68_5:
+ if AVG_SCORE < 390145 then goto T68_3;
+ else goto T68_4;
+
+T68_3:
+ response = 0.00590447;
+ goto D68;
+
+T68_4:
+ response = 0.0270599;
+ goto D68;
+
+T68_5:
+ response = 0.00116164;
+ goto D68;
+
+N68_6:
+ if DAY_LW_DAY_HITS_RATIO < 27.5 then goto T68_6;
+ else goto T68_7;
+
+T68_6:
+ response = -0.00395192;
+ goto D68;
+
+T68_7:
+ response = 0.00937395;
+ goto D68;
+
+D68:
+
+tnscore = tnscore + response;
+
+/* Tree 70 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N69_1:
+ if LW_DAY_HITS < 3.5 then goto N69_2;
+ else goto T69_7;
+
+N69_2:
+ if ENTERTAINMENT < 0.845 then goto N69_3;
+ else goto T69_6;
+
+N69_3:
+ if LIFESTYLE < 0.115 then goto N69_4;
+ else goto T69_5;
+
+N69_4:
+ if AVG_RANK < 4.45 then goto T69_1;
+ else goto N69_5;
+
+T69_1:
+ response = -0.00817106;
+ goto D69;
+
+N69_5:
+ if AVG_RANK < 5.225 then goto N69_6;
+ else goto T69_4;
+
+N69_6:
+ if ISABSTRACT_AVG < 0.55 then goto T69_2;
+ else goto T69_3;
+
+T69_2:
+ response = 0.00926281;
+ goto D69;
+
+T69_3:
+ response = -0.00505226;
+ goto D69;
+
+T69_4:
+ response = 0.000181535;
+ goto D69;
+
+T69_5:
+ response = -0.00449952;
+ goto D69;
+
+T69_6:
+ response = 0.00814113;
+ goto D69;
+
+T69_7:
+ response = -0.00517456;
+ goto D69;
+
+D69:
+
+tnscore = tnscore + response;
+
+/* Tree 71 of 77 */
+/* N terminal nodes = 7, Depth = 4 */
+
+
+N70_1:
+ if TWO_DAY_WF < 0.439697 then goto N70_2;
+ else goto N70_4;
+
+N70_2:
+ if AVG_RANK < 8.7 then goto T70_1;
+ else goto N70_3;
+
+T70_1:
+ response = -0.00429929;
+ goto D70;
+
+N70_3:
+ if MAX_MIN_SCORE < 47973.5 then goto T70_2;
+ else goto T70_3;
+
+T70_2:
+ response = -0.000903138;
+ goto D70;
+
+T70_3:
+ response = 0.0125022;
+ goto D70;
+
+N70_4:
+ if AVG_RANK < 8.635 then goto N70_5;
+ else goto N70_6;
+
+N70_5:
+ if DAY_WEEK_AVG_DERIV < 29.5 then goto T70_4;
+ else goto T70_5;
+
+T70_4:
+ response = 0.000800681;
+ goto D70;
+
+T70_5:
+ response = 0.0125881;
+ goto D70;
+
+N70_6:
+ if ISTITLE_AVG < 0.05 then goto T70_6;
+ else goto T70_7;
+
+T70_6:
+ response = -0.00468934;
+ goto D70;
+
+T70_7:
+ response = 0.00031406;
+ goto D70;
+
+D70:
+
+tnscore = tnscore + response;
+
+/* Tree 72 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N71_1:
+ if LW_DAY_HITS < 0.5 then goto N71_2;
+ else goto N71_3;
+
+N71_2:
+ if WEEKAVG < 39.855 then goto T71_1;
+ else goto T71_2;
+
+T71_1:
+ response = 0.000277242;
+ goto D71;
+
+T71_2:
+ response = -0.0132349;
+ goto D71;
+
+N71_3:
+ if MIN_SCORE < 234431 then goto N71_4;
+ else goto N71_5;
+
+N71_4:
+ if MIN_SCORE < 225952 then goto T71_3;
+ else goto T71_4;
+
+T71_3:
+ response = -0.00160465;
+ goto D71;
+
+T71_4:
+ response = 0.01256;
+ goto D71;
+
+N71_5:
+ if PREV_DAY_HITS < 19.5 then goto T71_5;
+ else goto N71_6;
+
+T71_5:
+ response = -0.0107505;
+ goto D71;
+
+N71_6:
+ if WEEKAVG < 12.575 then goto T71_6;
+ else goto T71_7;
+
+T71_6:
+ response = 0.0119228;
+ goto D71;
+
+T71_7:
+ response = -0.00600679;
+ goto D71;
+
+D71:
+
+tnscore = tnscore + response;
+
+/* Tree 73 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N72_1:
+ if WEEKAVG < 26.93 then goto N72_2;
+ else goto N72_6;
+
+N72_2:
+ if TOPSTORY < 0.39 then goto T72_1;
+ else goto N72_3;
+
+T72_1:
+ response = -0.000140614;
+ goto D72;
+
+N72_3:
+ if AVG_RANK < 9.55 then goto N72_4;
+ else goto T72_5;
+
+N72_4:
+ if AVG_RANK < 7.755 then goto T72_2;
+ else goto N72_5;
+
+T72_2:
+ response = 0.0151495;
+ goto D72;
+
+N72_5:
+ if TOPSTORY < 0.45 then goto T72_3;
+ else goto T72_4;
+
+T72_3:
+ response = 0.0043054;
+ goto D72;
+
+T72_4:
+ response = -0.00734039;
+ goto D72;
+
+T72_5:
+ response = 0.0204375;
+ goto D72;
+
+N72_6:
+ if SUPERDUPER_AVG < 0.55 then goto T72_6;
+ else goto T72_7;
+
+T72_6:
+ response = -0.0146963;
+ goto D72;
+
+T72_7:
+ response = -0.00012832;
+ goto D72;
+
+D72:
+
+tnscore = tnscore + response;
+
+/* Tree 74 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N73_1:
+ if TOPSTORY < 0.55 then goto N73_2;
+ else goto T73_7;
+
+N73_2:
+ if WEEKAVG < 0.5 then goto N73_3;
+ else goto N73_5;
+
+N73_3:
+ if MAX_MIN_SCORE < 165073 then goto N73_4;
+ else goto T73_3;
+
+N73_4:
+ if MIN_SCORE < 215208 then goto T73_1;
+ else goto T73_2;
+
+T73_1:
+ response = 0.000170833;
+ goto D73;
+
+T73_2:
+ response = -0.0051217;
+ goto D73;
+
+T73_3:
+ response = 0.0104792;
+ goto D73;
+
+N73_5:
+ if WEEKAVG < 10.93 then goto T73_4;
+ else goto N73_6;
+
+T73_4:
+ response = 0.000781356;
+ goto D73;
+
+N73_6:
+ if EIGHT_HOUR_WF < 0.00663439 then goto T73_5;
+ else goto T73_6;
+
+T73_5:
+ response = 0.00783355;
+ goto D73;
+
+T73_6:
+ response = -0.00393311;
+ goto D73;
+
+T73_7:
+ response = -0.0097211;
+ goto D73;
+
+D73:
+
+tnscore = tnscore + response;
+
+/* Tree 75 of 77 */
+/* N terminal nodes = 7, Depth = 7 */
+
+
+N74_1:
+ if DAY_WEEK_AVG_RATIO < 0.225 then goto T74_1;
+ else goto N74_2;
+
+T74_1:
+ response = 0.0105222;
+ goto D74;
+
+N74_2:
+ if SPORTS < 0.73 then goto T74_2;
+ else goto N74_3;
+
+T74_2:
+ response = -0.000688094;
+ goto D74;
+
+N74_3:
+ if MAX_MIN_RANK < 5 then goto T74_3;
+ else goto N74_4;
+
+T74_3:
+ response = -0.00146174;
+ goto D74;
+
+N74_4:
+ if PREV_DAY_HITS < 6.5 then goto N74_5;
+ else goto T74_7;
+
+N74_5:
+ if MIN_SCORE < 144075 then goto T74_4;
+ else goto N74_6;
+
+T74_4:
+ response = -0.00415946;
+ goto D74;
+
+N74_6:
+ if PREV_DAY_HITS < 2.5 then goto T74_5;
+ else goto T74_6;
+
+T74_5:
+ response = 0.010148;
+ goto D74;
+
+T74_6:
+ response = 0.0262199;
+ goto D74;
+
+T74_7:
+ response = -0.00602654;
+ goto D74;
+
+D74:
+
+tnscore = tnscore + response;
+
+/* Tree 76 of 77 */
+/* N terminal nodes = 7, Depth = 5 */
+
+
+N75_1:
+ if DAY_WEEK_AVG_DERIV < -3.5 then goto N75_2;
+ else goto N75_4;
+
+N75_2:
+ if TWO_DAY_WF < 0.635642 then goto N75_3;
+ else goto T75_3;
+
+N75_3:
+ if DAY_WEEK_AVG_DERIV < -5.93 then goto T75_1;
+ else goto T75_2;
+
+T75_1:
+ response = 0.00406292;
+ goto D75;
+
+T75_2:
+ response = -0.0105257;
+ goto D75;
+
+T75_3:
+ response = -0.0144987;
+ goto D75;
+
+N75_4:
+ if DAY_PD_HITS_RATIO < 43 then goto N75_5;
+ else goto T75_7;
+
+N75_5:
+ if DAY_WEEK_AVG_RATIO < 5.55 then goto T75_4;
+ else goto N75_6;
+
+T75_4:
+ response = -6.55387e-05;
+ goto D75;
+
+N75_6:
+ if ISTITLE_AVG < 0.05 then goto T75_5;
+ else goto T75_6;
+
+T75_5:
+ response = -0.0189073;
+ goto D75;
+
+T75_6:
+ response = 0.00177271;
+ goto D75;
+
+T75_7:
+ response = 0.0120068;
+ goto D75;
+
+D75:
+
+tnscore = tnscore + response;
+
+/* Tree 77 of 77 */
+/* N terminal nodes = 7, Depth = 6 */
+
+
+N76_1:
+ if DAY_WEEK_AVG_RATIO < 0.235 then goto T76_1;
+ else goto N76_2;
+
+T76_1:
+ response = 0.0112125;
+ goto D76;
+
+N76_2:
+ if DAY_WEEK_AVG_RATIO < 0.345 then goto T76_2;
+ else goto N76_3;
+
+T76_2:
+ response = -0.00610693;
+ goto D76;
+
+N76_3:
+ if WEEKAVG < 0.93 then goto T76_3;
+ else goto N76_4;
+
+T76_3:
+ response = -0.00104389;
+ goto D76;
+
+N76_4:
+ if WEEKAVG < 4.215 then goto N76_5;
+ else goto N76_6;
+
+N76_5:
+ if TOPSTORY < 0.13 then goto T76_4;
+ else goto T76_5;
+
+T76_4:
+ response = 0.000513026;
+ goto D76;
+
+T76_5:
+ response = 0.00509033;
+ goto D76;
+
+N76_6:
+ if MAX_MIN_SCORE < 206736 then goto T76_6;
+ else goto T76_7;
+
+T76_6:
+ response = -0.00041877;
+ goto D76;
+
+T76_7:
+ response = -0.0108199;
+ goto D76;
+
+D76:
+
+tnscore = tnscore + response;
+
+return;
diff --git a/searchlib/src/test/files/treenet06.model b/searchlib/src/test/files/treenet06.model
new file mode 100644
index 00000000000..43555fb698a
--- /dev/null
+++ b/searchlib/src/test/files/treenet06.model
@@ -0,0 +1,3799 @@
+
+/* Data Dictionary, Number Of Variables = 32 */
+/* Name = NUM_WORDS, Type = continuous. */
+/* Name = DAY_HITS, Type = continuous. */
+/* Name = DAY_HITS_FRAC, Type = continuous. */
+/* Name = PREV_DAY_HITS, Type = continuous. */
+/* Name = DAY_PD_HITS_RATIO, Type = continuous. */
+/* Name = DAY_PD_HITS_DERIV, Type = continuous. */
+/* Name = DAY_PREV_DAY_HITS_FRAC, Type = continuous. */
+/* Name = DAY_LW_DAY_HITS_RATIO, Type = continuous. */
+/* Name = DAY_LW_DAY_HITS_DERIV, Type = continuous. */
+/* Name = WEEKAVG, Type = continuous. */
+/* Name = DAY_WEEK_AVG_RATIO, Type = continuous. */
+/* Name = DAY_WEEK_AVG_DERIV, Type = continuous. */
+/* Name = ISTITLE_AVG, Type = continuous. */
+/* Name = ISABSTRACT_AVG, Type = continuous. */
+/* Name = SUPERDUPER_AVG, Type = continuous. */
+/* Name = BUSINESS, Type = continuous. */
+/* Name = ENTERTAINMENT, Type = continuous. */
+/* Name = HEALTH, Type = continuous. */
+/* Name = INTLNEWS, Type = continuous. */
+/* Name = LOCALNEWS, Type = continuous. */
+/* Name = NATIONALNEWS, Type = continuous. */
+/* Name = POLITICS, Type = continuous. */
+/* Name = REGIONALNEWS, Type = continuous. */
+/* Name = SPORTS, Type = continuous. */
+/* Name = TOPSTORY, Type = continuous. */
+/* Name = AVG_RANK, Type = continuous. */
+/* Name = MAX_RANK, Type = continuous. */
+/* Name = MIN_RANK, Type = continuous. */
+/* Name = AVG_SCORE, Type = continuous. */
+/* Name = MAX_SCORE, Type = continuous. */
+/* Name = MIN_SCORE, Type = continuous. */
+/* Name = MAX_MIN_SCORE, Type = continuous. */
+
+MODELBEGIN:
+
+/* CART version: 5.0.9.156 */
+/* TreeNet: TreeNet20070830184428 */
+/* Grove: /home/rparekh/lb/lb_treenet.grv */
+/* N trees: 85 */
+
+link TN0;
+pred = tnscore; /* predicted value for IY_CTR */
+
+
+/*********************/
+/* Model is complete */
+/*********************/
+
+return;
+
+
+
+TN0:
+
+/* Tree 1 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+tnscore = 0.0;
+
+N0_1:
+ if MAX_SCORE < 364352 then goto N0_2;
+ else goto N0_4;
+
+N0_2:
+ if NUM_WORDS < 1.5 then goto T0_1;
+ else goto N0_3;
+
+T0_1:
+ response = 0.106529;
+ goto D0;
+
+N0_3:
+ if WEEKAVG < 0.665 then goto T0_2;
+ else goto T0_3;
+
+T0_2:
+ response = 0.113339;
+ goto D0;
+
+T0_3:
+ response = 0.129744;
+ goto D0;
+
+N0_4:
+ if WEEKAVG < 0.35 then goto T0_4;
+ else goto T0_5;
+
+T0_4:
+ response = 0.125401;
+ goto D0;
+
+T0_5:
+ response = 0.148456;
+ goto D0;
+
+D0:
+
+tnscore = tnscore + response;
+
+/* Tree 2 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N1_1:
+ if MAX_SCORE < 386454 then goto N1_2;
+ else goto N1_4;
+
+N1_2:
+ if NUM_WORDS < 2.5 then goto N1_3;
+ else goto T1_3;
+
+N1_3:
+ if MAX_SCORE < 266558 then goto T1_1;
+ else goto T1_2;
+
+T1_1:
+ response = -0.00435683;
+ goto D1;
+
+T1_2:
+ response = 0.00232626;
+ goto D1;
+
+T1_3:
+ response = 0.00527105;
+ goto D1;
+
+N1_4:
+ if DAY_LW_DAY_HITS_RATIO < 3.75 then goto T1_4;
+ else goto T1_5;
+
+T1_4:
+ response = 0.0125759;
+ goto D1;
+
+T1_5:
+ response = 0.0415964;
+ goto D1;
+
+D1:
+
+tnscore = tnscore + response;
+
+/* Tree 3 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N2_1:
+ if MAX_SCORE < 285564 then goto N2_2;
+ else goto N2_3;
+
+N2_2:
+ if NUM_WORDS < 3.5 then goto T2_1;
+ else goto T2_2;
+
+T2_1:
+ response = -0.00312935;
+ goto D2;
+
+T2_2:
+ response = 0.0139702;
+ goto D2;
+
+N2_3:
+ if DAY_LW_DAY_HITS_RATIO < 4.645 then goto T2_3;
+ else goto N2_4;
+
+T2_3:
+ response = 0.00510366;
+ goto D2;
+
+N2_4:
+ if ISABSTRACT_AVG < 0.225 then goto T2_4;
+ else goto T2_5;
+
+T2_4:
+ response = 0.0376987;
+ goto D2;
+
+T2_5:
+ response = 0.00704226;
+ goto D2;
+
+D2:
+
+tnscore = tnscore + response;
+
+/* Tree 4 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N3_1:
+ if MAX_SCORE < 354060 then goto N3_2;
+ else goto N3_4;
+
+N3_2:
+ if NUM_WORDS < 1.5 then goto T3_1;
+ else goto N3_3;
+
+T3_1:
+ response = -0.00557684;
+ goto D3;
+
+N3_3:
+ if DAY_LW_DAY_HITS_RATIO < 7.25 then goto T3_2;
+ else goto T3_3;
+
+T3_2:
+ response = -0.000411611;
+ goto D3;
+
+T3_3:
+ response = 0.0176971;
+ goto D3;
+
+N3_4:
+ if ISTITLE_AVG < 0.845 then goto T3_4;
+ else goto T3_5;
+
+T3_4:
+ response = 0.0209172;
+ goto D3;
+
+T3_5:
+ response = 0.00437892;
+ goto D3;
+
+D3:
+
+tnscore = tnscore + response;
+
+/* Tree 5 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N4_1:
+ if MAX_SCORE < 357048 then goto N4_2;
+ else goto N4_4;
+
+N4_2:
+ if NUM_WORDS < 3.5 then goto N4_3;
+ else goto T4_3;
+
+N4_3:
+ if DAY_LW_DAY_HITS_RATIO < 6.75 then goto T4_1;
+ else goto T4_2;
+
+T4_1:
+ response = -0.00214246;
+ goto D4;
+
+T4_2:
+ response = 0.00909381;
+ goto D4;
+
+T4_3:
+ response = 0.0132498;
+ goto D4;
+
+N4_4:
+ if DAY_WEEK_AVG_DERIV < 2.785 then goto T4_4;
+ else goto T4_5;
+
+T4_4:
+ response = 0.00781954;
+ goto D4;
+
+T4_5:
+ response = 0.0325808;
+ goto D4;
+
+D4:
+
+tnscore = tnscore + response;
+
+/* Tree 6 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N5_1:
+ if MAX_SCORE < 391984 then goto N5_2;
+ else goto N5_4;
+
+N5_2:
+ if NUM_WORDS < 1.5 then goto T5_1;
+ else goto N5_3;
+
+T5_1:
+ response = -0.00479641;
+ goto D5;
+
+N5_3:
+ if WEEKAVG < 0.805 then goto T5_2;
+ else goto T5_3;
+
+T5_2:
+ response = 0.000314606;
+ goto D5;
+
+T5_3:
+ response = 0.0174789;
+ goto D5;
+
+N5_4:
+ if DAY_WEEK_AVG_DERIV < 2.5 then goto T5_4;
+ else goto T5_5;
+
+T5_4:
+ response = 0.0100076;
+ goto D5;
+
+T5_5:
+ response = 0.0303617;
+ goto D5;
+
+D5:
+
+tnscore = tnscore + response;
+
+/* Tree 7 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N6_1:
+ if NUM_WORDS < 2.5 then goto N6_2;
+ else goto N6_4;
+
+N6_2:
+ if MAX_SCORE < 273725 then goto T6_1;
+ else goto N6_3;
+
+T6_1:
+ response = -0.0031024;
+ goto D6;
+
+N6_3:
+ if ISTITLE_AVG < 0.955 then goto T6_2;
+ else goto T6_3;
+
+T6_2:
+ response = 0.0093897;
+ goto D6;
+
+T6_3:
+ response = -0.00177918;
+ goto D6;
+
+N6_4:
+ if WEEKAVG < 0.35 then goto T6_4;
+ else goto T6_5;
+
+T6_4:
+ response = 0.00506228;
+ goto D6;
+
+T6_5:
+ response = 0.0238941;
+ goto D6;
+
+D6:
+
+tnscore = tnscore + response;
+
+/* Tree 8 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N7_1:
+ if NUM_WORDS < 3.5 then goto N7_2;
+ else goto T7_5;
+
+N7_2:
+ if DAY_LW_DAY_HITS_RATIO < 7.835 then goto N7_3;
+ else goto N7_4;
+
+N7_3:
+ if NUM_WORDS < 2.5 then goto T7_1;
+ else goto T7_2;
+
+T7_1:
+ response = -0.00222361;
+ goto D7;
+
+T7_2:
+ response = 0.00275911;
+ goto D7;
+
+N7_4:
+ if ISABSTRACT_AVG < 0.185 then goto T7_3;
+ else goto T7_4;
+
+T7_3:
+ response = 0.0286851;
+ goto D7;
+
+T7_4:
+ response = 0.0025611;
+ goto D7;
+
+T7_5:
+ response = 0.0150946;
+ goto D7;
+
+D7:
+
+tnscore = tnscore + response;
+
+/* Tree 9 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N8_1:
+ if MAX_SCORE < 463634 then goto N8_2;
+ else goto T8_5;
+
+N8_2:
+ if DAY_LW_DAY_HITS_RATIO < 14.36 then goto N8_3;
+ else goto N8_4;
+
+N8_3:
+ if NUM_WORDS < 2.5 then goto T8_1;
+ else goto T8_2;
+
+T8_1:
+ response = -0.00168161;
+ goto D8;
+
+T8_2:
+ response = 0.00306928;
+ goto D8;
+
+N8_4:
+ if ISABSTRACT_AVG < 0.05 then goto T8_3;
+ else goto T8_4;
+
+T8_3:
+ response = 0.03626;
+ goto D8;
+
+T8_4:
+ response = 0.00702238;
+ goto D8;
+
+T8_5:
+ response = 0.018646;
+ goto D8;
+
+D8:
+
+tnscore = tnscore + response;
+
+/* Tree 10 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N9_1:
+ if MAX_SCORE < 291384 then goto T9_1;
+ else goto N9_2;
+
+T9_1:
+ response = -0.00120841;
+ goto D9;
+
+N9_2:
+ if ISTITLE_AVG < 0.845 then goto N9_3;
+ else goto T9_5;
+
+N9_3:
+ if WEEKAVG < 0.325 then goto T9_2;
+ else goto N9_4;
+
+T9_2:
+ response = 0.0070091;
+ goto D9;
+
+N9_4:
+ if DAY_WEEK_AVG_DERIV < 27.5 then goto T9_3;
+ else goto T9_4;
+
+T9_3:
+ response = 0.021833;
+ goto D9;
+
+T9_4:
+ response = 0.0670236;
+ goto D9;
+
+T9_5:
+ response = -0.000239127;
+ goto D9;
+
+D9:
+
+tnscore = tnscore + response;
+
+/* Tree 11 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N10_1:
+ if MAX_SCORE < 392716 then goto N10_2;
+ else goto T10_5;
+
+N10_2:
+ if WEEKAVG < 0.915 then goto N10_3;
+ else goto N10_4;
+
+N10_3:
+ if NUM_WORDS < 1.5 then goto T10_1;
+ else goto T10_2;
+
+T10_1:
+ response = -0.00408665;
+ goto D10;
+
+T10_2:
+ response = 1.2681e-05;
+ goto D10;
+
+N10_4:
+ if ISABSTRACT_AVG < 0.05 then goto T10_3;
+ else goto T10_4;
+
+T10_3:
+ response = 0.0315009;
+ goto D10;
+
+T10_4:
+ response = 0.00309315;
+ goto D10;
+
+T10_5:
+ response = 0.0101865;
+ goto D10;
+
+D10:
+
+tnscore = tnscore + response;
+
+/* Tree 12 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N11_1:
+ if NUM_WORDS < 3.5 then goto N11_2;
+ else goto T11_5;
+
+N11_2:
+ if DAY_LW_DAY_HITS_RATIO < 4.9 then goto N11_3;
+ else goto N11_4;
+
+N11_3:
+ if NUM_WORDS < 1.5 then goto T11_1;
+ else goto T11_2;
+
+T11_1:
+ response = -0.00378383;
+ goto D11;
+
+T11_2:
+ response = -0.000155068;
+ goto D11;
+
+N11_4:
+ if ISTITLE_AVG < 0.915 then goto T11_3;
+ else goto T11_4;
+
+T11_3:
+ response = 0.0164901;
+ goto D11;
+
+T11_4:
+ response = 0.00035458;
+ goto D11;
+
+T11_5:
+ response = 0.0111533;
+ goto D11;
+
+D11:
+
+tnscore = tnscore + response;
+
+/* Tree 13 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N12_1:
+ if NUM_WORDS < 2.5 then goto N12_2;
+ else goto N12_4;
+
+N12_2:
+ if DAY_LW_DAY_HITS_RATIO < 12.165 then goto T12_1;
+ else goto N12_3;
+
+T12_1:
+ response = -0.00137589;
+ goto D12;
+
+N12_3:
+ if ISTITLE_AVG < 0.73 then goto T12_2;
+ else goto T12_3;
+
+T12_2:
+ response = 0.0299723;
+ goto D12;
+
+T12_3:
+ response = 0.00442332;
+ goto D12;
+
+N12_4:
+ if DAY_HITS < 2.125 then goto T12_4;
+ else goto T12_5;
+
+T12_4:
+ response = 0.00279729;
+ goto D12;
+
+T12_5:
+ response = 0.0157199;
+ goto D12;
+
+D12:
+
+tnscore = tnscore + response;
+
+/* Tree 14 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N13_1:
+ if MAX_SCORE < 391997 then goto N13_2;
+ else goto N13_4;
+
+N13_2:
+ if NUM_WORDS < 1.5 then goto T13_1;
+ else goto N13_3;
+
+T13_1:
+ response = -0.00289017;
+ goto D13;
+
+N13_3:
+ if PREV_DAY_HITS < 6.33333 then goto T13_2;
+ else goto T13_3;
+
+T13_2:
+ response = 0.000153177;
+ goto D13;
+
+T13_3:
+ response = 0.0114408;
+ goto D13;
+
+N13_4:
+ if PREV_DAY_HITS < 9.5 then goto T13_4;
+ else goto T13_5;
+
+T13_4:
+ response = 0.00746655;
+ goto D13;
+
+T13_5:
+ response = 0.040233;
+ goto D13;
+
+D13:
+
+tnscore = tnscore + response;
+
+/* Tree 15 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N14_1:
+ if NUM_WORDS < 3.5 then goto N14_2;
+ else goto T14_5;
+
+N14_2:
+ if DAY_LW_DAY_HITS_RATIO < 4.9 then goto T14_1;
+ else goto N14_3;
+
+T14_1:
+ response = -0.00103084;
+ goto D14;
+
+N14_3:
+ if ISTITLE_AVG < 0.915 then goto N14_4;
+ else goto T14_4;
+
+N14_4:
+ if DAY_HITS_FRAC < 0.645 then goto T14_2;
+ else goto T14_3;
+
+T14_2:
+ response = 0.0230528;
+ goto D14;
+
+T14_3:
+ response = 0.00568694;
+ goto D14;
+
+T14_4:
+ response = 0.000615028;
+ goto D14;
+
+T14_5:
+ response = 0.00901386;
+ goto D14;
+
+D14:
+
+tnscore = tnscore + response;
+
+/* Tree 16 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N15_1:
+ if MAX_SCORE < 291385 then goto T15_1;
+ else goto N15_2;
+
+T15_1:
+ response = -0.000944169;
+ goto D15;
+
+N15_2:
+ if ISTITLE_AVG < 0.72 then goto N15_3;
+ else goto T15_5;
+
+N15_3:
+ if DAY_WEEK_AVG_DERIV < 28.855 then goto N15_4;
+ else goto T15_4;
+
+N15_4:
+ if NATIONALNEWS < 0.355 then goto T15_2;
+ else goto T15_3;
+
+T15_2:
+ response = 0.00617921;
+ goto D15;
+
+T15_3:
+ response = 0.0271174;
+ goto D15;
+
+T15_4:
+ response = 0.0534392;
+ goto D15;
+
+T15_5:
+ response = -0.000921153;
+ goto D15;
+
+D15:
+
+tnscore = tnscore + response;
+
+/* Tree 17 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N16_1:
+ if NUM_WORDS < 3.5 then goto N16_2;
+ else goto T16_5;
+
+N16_2:
+ if PREV_DAY_HITS < 6.16667 then goto T16_1;
+ else goto N16_3;
+
+T16_1:
+ response = -0.000682897;
+ goto D16;
+
+N16_3:
+ if MIN_SCORE < 254342 then goto T16_2;
+ else goto N16_4;
+
+T16_2:
+ response = 0.00193942;
+ goto D16;
+
+N16_4:
+ if NATIONALNEWS < 0.21 then goto T16_3;
+ else goto T16_4;
+
+T16_3:
+ response = 0.0131534;
+ goto D16;
+
+T16_4:
+ response = 0.0605109;
+ goto D16;
+
+T16_5:
+ response = 0.00697463;
+ goto D16;
+
+D16:
+
+tnscore = tnscore + response;
+
+/* Tree 18 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N17_1:
+ if NUM_WORDS < 2.5 then goto T17_1;
+ else goto N17_2;
+
+T17_1:
+ response = -0.000931934;
+ goto D17;
+
+N17_2:
+ if DAY_WEEK_AVG_RATIO < 3.17 then goto N17_3;
+ else goto T17_5;
+
+N17_3:
+ if ISTITLE_AVG < 0.685 then goto N17_4;
+ else goto T17_4;
+
+N17_4:
+ if NATIONALNEWS < 0.225 then goto T17_2;
+ else goto T17_3;
+
+T17_2:
+ response = 0.00281994;
+ goto D17;
+
+T17_3:
+ response = 0.0214747;
+ goto D17;
+
+T17_4:
+ response = -0.00300096;
+ goto D17;
+
+T17_5:
+ response = 0.0138056;
+ goto D17;
+
+D17:
+
+tnscore = tnscore + response;
+
+/* Tree 19 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N18_1:
+ if NUM_WORDS < 3.5 then goto N18_2;
+ else goto T18_5;
+
+N18_2:
+ if DAY_LW_DAY_HITS_RATIO < 4.9 then goto T18_1;
+ else goto N18_3;
+
+T18_1:
+ response = -0.000881061;
+ goto D18;
+
+N18_3:
+ if AVG_SCORE < 268471 then goto T18_2;
+ else goto N18_4;
+
+T18_2:
+ response = -8.77741e-05;
+ goto D18;
+
+N18_4:
+ if ISABSTRACT_AVG < 0.105 then goto T18_3;
+ else goto T18_4;
+
+T18_3:
+ response = 0.0164307;
+ goto D18;
+
+T18_4:
+ response = 0.000985136;
+ goto D18;
+
+T18_5:
+ response = 0.00654057;
+ goto D18;
+
+D18:
+
+tnscore = tnscore + response;
+
+/* Tree 20 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N19_1:
+ if TOPSTORY < 0.03 then goto N19_2;
+ else goto N19_4;
+
+N19_2:
+ if ISTITLE_AVG < 0.62 then goto N19_3;
+ else goto T19_3;
+
+N19_3:
+ if AVG_SCORE < 268819 then goto T19_1;
+ else goto T19_2;
+
+T19_1:
+ response = -0.000315955;
+ goto D19;
+
+T19_2:
+ response = 0.00518778;
+ goto D19;
+
+T19_3:
+ response = -0.00177677;
+ goto D19;
+
+N19_4:
+ if ISTITLE_AVG < 0.72 then goto T19_4;
+ else goto T19_5;
+
+T19_4:
+ response = 0.0116413;
+ goto D19;
+
+T19_5:
+ response = 0.00139452;
+ goto D19;
+
+D19:
+
+tnscore = tnscore + response;
+
+/* Tree 21 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N20_1:
+ if MAX_SCORE < 472738 then goto N20_2;
+ else goto T20_5;
+
+N20_2:
+ if ISTITLE_AVG < 0.63 then goto N20_3;
+ else goto T20_4;
+
+N20_3:
+ if DAY_WEEK_AVG_DERIV < 1.825 then goto T20_1;
+ else goto N20_4;
+
+T20_1:
+ response = 0.000494339;
+ goto D20;
+
+N20_4:
+ if MIN_SCORE < 250779 then goto T20_2;
+ else goto T20_3;
+
+T20_2:
+ response = -1.72329e-05;
+ goto D20;
+
+T20_3:
+ response = 0.012004;
+ goto D20;
+
+T20_4:
+ response = -0.00134588;
+ goto D20;
+
+T20_5:
+ response = 0.0100001;
+ goto D20;
+
+D20:
+
+tnscore = tnscore + response;
+
+/* Tree 22 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N21_1:
+ if NUM_WORDS < 3.5 then goto N21_2;
+ else goto T21_5;
+
+N21_2:
+ if DAY_LW_DAY_HITS_RATIO < 24.9 then goto N21_3;
+ else goto N21_4;
+
+N21_3:
+ if PREV_DAY_HITS < 14.8333 then goto T21_1;
+ else goto T21_2;
+
+T21_1:
+ response = -0.000501675;
+ goto D21;
+
+T21_2:
+ response = 0.00773172;
+ goto D21;
+
+N21_4:
+ if LOCALNEWS < 0.11 then goto T21_3;
+ else goto T21_4;
+
+T21_3:
+ response = 0.00984187;
+ goto D21;
+
+T21_4:
+ response = 0.0382478;
+ goto D21;
+
+T21_5:
+ response = 0.00673426;
+ goto D21;
+
+D21:
+
+tnscore = tnscore + response;
+
+/* Tree 23 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N22_1:
+ if PREV_DAY_HITS < 4.35 then goto N22_2;
+ else goto N22_3;
+
+N22_2:
+ if NUM_WORDS < 1.5 then goto T22_1;
+ else goto T22_2;
+
+T22_1:
+ response = -0.00227953;
+ goto D22;
+
+T22_2:
+ response = 8.50381e-05;
+ goto D22;
+
+N22_3:
+ if ISTITLE_AVG < 0.905 then goto N22_4;
+ else goto T22_5;
+
+N22_4:
+ if DAY_LW_DAY_HITS_RATIO < 1.6 then goto T22_3;
+ else goto T22_4;
+
+T22_3:
+ response = -0.00121777;
+ goto D22;
+
+T22_4:
+ response = 0.0155982;
+ goto D22;
+
+T22_5:
+ response = 0.00116876;
+ goto D22;
+
+D22:
+
+tnscore = tnscore + response;
+
+/* Tree 24 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N23_1:
+ if DAY_LW_DAY_HITS_RATIO < 4.73 then goto T23_1;
+ else goto N23_2;
+
+T23_1:
+ response = -0.000272614;
+ goto D23;
+
+N23_2:
+ if NATIONALNEWS < 0.58 then goto N23_3;
+ else goto T23_5;
+
+N23_3:
+ if DAY_PD_HITS_RATIO < 0.63 then goto N23_4;
+ else goto T23_4;
+
+N23_4:
+ if DAY_HITS_FRAC < 0.265 then goto T23_2;
+ else goto T23_3;
+
+T23_2:
+ response = 0.0099063;
+ goto D23;
+
+T23_3:
+ response = 0.0510568;
+ goto D23;
+
+T23_4:
+ response = 0.00250323;
+ goto D23;
+
+T23_5:
+ response = 0.0470183;
+ goto D23;
+
+D23:
+
+tnscore = tnscore + response;
+
+/* Tree 25 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N24_1:
+ if DAY_LW_DAY_HITS_RATIO < 5.915 then goto T24_1;
+ else goto N24_2;
+
+T24_1:
+ response = -0.000336118;
+ goto D24;
+
+N24_2:
+ if DAY_PD_HITS_DERIV < -11.5 then goto N24_3;
+ else goto T24_5;
+
+N24_3:
+ if DAY_WEEK_AVG_RATIO < 1.735 then goto T24_2;
+ else goto N24_4;
+
+T24_2:
+ response = 0.000924298;
+ goto D24;
+
+N24_4:
+ if DAY_HITS_FRAC < 0.165 then goto T24_3;
+ else goto T24_4;
+
+T24_3:
+ response = 0.0161363;
+ goto D24;
+
+T24_4:
+ response = 0.0802279;
+ goto D24;
+
+T24_5:
+ response = 0.00327439;
+ goto D24;
+
+D24:
+
+tnscore = tnscore + response;
+
+/* Tree 26 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N25_1:
+ if NUM_WORDS < 2.5 then goto T25_1;
+ else goto N25_2;
+
+T25_1:
+ response = -0.000656912;
+ goto D25;
+
+N25_2:
+ if BUSINESS < 0.275 then goto N25_3;
+ else goto T25_5;
+
+N25_3:
+ if MAX_MIN_SCORE < 50.25 then goto T25_2;
+ else goto N25_4;
+
+T25_2:
+ response = 0.00224658;
+ goto D25;
+
+N25_4:
+ if ISABSTRACT_AVG < 0.415 then goto T25_3;
+ else goto T25_4;
+
+T25_3:
+ response = 0.013094;
+ goto D25;
+
+T25_4:
+ response = -0.0054932;
+ goto D25;
+
+T25_5:
+ response = -0.00808819;
+ goto D25;
+
+D25:
+
+tnscore = tnscore + response;
+
+/* Tree 27 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N26_1:
+ if NATIONALNEWS < 0.135 then goto T26_1;
+ else goto N26_2;
+
+T26_1:
+ response = -0.000140405;
+ goto D26;
+
+N26_2:
+ if AVG_SCORE < 263507 then goto T26_2;
+ else goto N26_3;
+
+T26_2:
+ response = -0.000119297;
+ goto D26;
+
+N26_3:
+ if ISTITLE_AVG < 0.73 then goto N26_4;
+ else goto T26_5;
+
+N26_4:
+ if ENTERTAINMENT < 0.05 then goto T26_3;
+ else goto T26_4;
+
+T26_3:
+ response = 0.0220643;
+ goto D26;
+
+T26_4:
+ response = -0.00416695;
+ goto D26;
+
+T26_5:
+ response = 0.00371154;
+ goto D26;
+
+D26:
+
+tnscore = tnscore + response;
+
+/* Tree 28 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N27_1:
+ if DAY_WEEK_AVG_RATIO < 14.28 then goto N27_2;
+ else goto T27_5;
+
+N27_2:
+ if NUM_WORDS < 4.5 then goto N27_3;
+ else goto T27_4;
+
+N27_3:
+ if MIN_SCORE < 245866 then goto T27_1;
+ else goto N27_4;
+
+T27_1:
+ response = -0.000984768;
+ goto D27;
+
+N27_4:
+ if ISTITLE_AVG < 0.72 then goto T27_2;
+ else goto T27_3;
+
+T27_2:
+ response = 0.00341093;
+ goto D27;
+
+T27_3:
+ response = -0.000973418;
+ goto D27;
+
+T27_4:
+ response = 0.0106439;
+ goto D27;
+
+T27_5:
+ response = 0.0360619;
+ goto D27;
+
+D27:
+
+tnscore = tnscore + response;
+
+/* Tree 29 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N28_1:
+ if MIN_SCORE < 472207 then goto T28_1;
+ else goto N28_2;
+
+T28_1:
+ response = -0.000200627;
+ goto D28;
+
+N28_2:
+ if WEEKAVG < 0.325 then goto T28_2;
+ else goto N28_3;
+
+T28_2:
+ response = 0.00409488;
+ goto D28;
+
+N28_3:
+ if AVG_SCORE < 531893 then goto T28_3;
+ else goto N28_4;
+
+T28_3:
+ response = 0.0518209;
+ goto D28;
+
+N28_4:
+ if MAX_SCORE < 602809 then goto T28_4;
+ else goto T28_5;
+
+T28_4:
+ response = -0.0080393;
+ goto D28;
+
+T28_5:
+ response = 0.0383655;
+ goto D28;
+
+D28:
+
+tnscore = tnscore + response;
+
+/* Tree 30 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N29_1:
+ if ISTITLE_AVG < 0.72 then goto N29_2;
+ else goto T29_5;
+
+N29_2:
+ if AVG_SCORE < 268824 then goto T29_1;
+ else goto N29_3;
+
+T29_1:
+ response = -0.000131907;
+ goto D29;
+
+N29_3:
+ if MAX_MIN_SCORE < 7909.75 then goto T29_2;
+ else goto N29_4;
+
+T29_2:
+ response = 0.00173958;
+ goto D29;
+
+N29_4:
+ if NATIONALNEWS < 0.39 then goto T29_3;
+ else goto T29_4;
+
+T29_3:
+ response = 0.010916;
+ goto D29;
+
+T29_4:
+ response = 0.0343348;
+ goto D29;
+
+T29_5:
+ response = -0.00113192;
+ goto D29;
+
+D29:
+
+tnscore = tnscore + response;
+
+/* Tree 31 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N30_1:
+ if NUM_WORDS < 2.5 then goto T30_1;
+ else goto N30_2;
+
+T30_1:
+ response = -0.000626265;
+ goto D30;
+
+N30_2:
+ if BUSINESS < 0.115 then goto N30_3;
+ else goto T30_5;
+
+N30_3:
+ if MAX_MIN_SCORE < 15489.8 then goto T30_2;
+ else goto N30_4;
+
+T30_2:
+ response = 0.00192349;
+ goto D30;
+
+N30_4:
+ if MAX_MIN_SCORE < 35950.8 then goto T30_3;
+ else goto T30_4;
+
+T30_3:
+ response = 0.0188263;
+ goto D30;
+
+T30_4:
+ response = 0.00372838;
+ goto D30;
+
+T30_5:
+ response = -0.00528885;
+ goto D30;
+
+D30:
+
+tnscore = tnscore + response;
+
+/* Tree 32 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N31_1:
+ if MAX_SCORE < 468155 then goto T31_1;
+ else goto N31_2;
+
+T31_1:
+ response = -0.000113066;
+ goto D31;
+
+N31_2:
+ if ENTERTAINMENT < 0.235 then goto N31_3;
+ else goto N31_4;
+
+N31_3:
+ if TOPSTORY < 0.22 then goto T31_2;
+ else goto T31_3;
+
+T31_2:
+ response = -0.000322423;
+ goto D31;
+
+T31_3:
+ response = 0.0188811;
+ goto D31;
+
+N31_4:
+ if AVG_RANK < 8.365 then goto T31_4;
+ else goto T31_5;
+
+T31_4:
+ response = 0.00856273;
+ goto D31;
+
+T31_5:
+ response = 0.064677;
+ goto D31;
+
+D31:
+
+tnscore = tnscore + response;
+
+/* Tree 33 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N32_1:
+ if DAY_LW_DAY_HITS_RATIO < 38.5 then goto N32_2;
+ else goto T32_5;
+
+N32_2:
+ if AVG_SCORE < 259970 then goto T32_1;
+ else goto N32_3;
+
+T32_1:
+ response = -0.000611764;
+ goto D32;
+
+N32_3:
+ if INTLNEWS < 0.045 then goto T32_2;
+ else goto N32_4;
+
+T32_2:
+ response = 0.000128558;
+ goto D32;
+
+N32_4:
+ if LOCALNEWS < 0.28 then goto T32_3;
+ else goto T32_4;
+
+T32_3:
+ response = 0.00350635;
+ goto D32;
+
+T32_4:
+ response = 0.0165708;
+ goto D32;
+
+T32_5:
+ response = 0.018775;
+ goto D32;
+
+D32:
+
+tnscore = tnscore + response;
+
+/* Tree 34 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N33_1:
+ if DAY_LW_DAY_HITS_RATIO < 28.5 then goto N33_2;
+ else goto T33_5;
+
+N33_2:
+ if ISTITLE_AVG < 0.585 then goto N33_3;
+ else goto T33_4;
+
+N33_3:
+ if MAX_RANK < 9 then goto T33_1;
+ else goto N33_4;
+
+T33_1:
+ response = 0.000172066;
+ goto D33;
+
+N33_4:
+ if AVG_SCORE < 269329 then goto T33_2;
+ else goto T33_3;
+
+T33_2:
+ response = -0.000102726;
+ goto D33;
+
+T33_3:
+ response = 0.00688386;
+ goto D33;
+
+T33_4:
+ response = -0.00101067;
+ goto D33;
+
+T33_5:
+ response = 0.0149278;
+ goto D33;
+
+D33:
+
+tnscore = tnscore + response;
+
+/* Tree 35 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N34_1:
+ if DAY_LW_DAY_HITS_DERIV < 14.5 then goto T34_1;
+ else goto N34_2;
+
+T34_1:
+ response = -0.000172386;
+ goto D34;
+
+N34_2:
+ if DAY_PD_HITS_DERIV < -11.5 then goto N34_3;
+ else goto N34_4;
+
+N34_3:
+ if DAY_WEEK_AVG_RATIO < 1.82 then goto T34_2;
+ else goto T34_3;
+
+T34_2:
+ response = 0.00314713;
+ goto D34;
+
+T34_3:
+ response = 0.044771;
+ goto D34;
+
+N34_4:
+ if LOCALNEWS < 0.115 then goto T34_4;
+ else goto T34_5;
+
+T34_4:
+ response = 0.00112935;
+ goto D34;
+
+T34_5:
+ response = 0.0165557;
+ goto D34;
+
+D34:
+
+tnscore = tnscore + response;
+
+/* Tree 36 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N35_1:
+ if DAY_PD_HITS_RATIO < 33.75 then goto N35_2;
+ else goto T35_5;
+
+N35_2:
+ if AVG_SCORE < 597646 then goto N35_3;
+ else goto T35_4;
+
+N35_3:
+ if DAY_PD_HITS_DERIV < -4.9 then goto N35_4;
+ else goto T35_3;
+
+N35_4:
+ if DAY_PREV_DAY_HITS_FRAC < 0.845 then goto T35_1;
+ else goto T35_2;
+
+T35_1:
+ response = -0.00115559;
+ goto D35;
+
+T35_2:
+ response = 0.0101514;
+ goto D35;
+
+T35_3:
+ response = -0.000188064;
+ goto D35;
+
+T35_4:
+ response = 0.0159463;
+ goto D35;
+
+T35_5:
+ response = 0.024791;
+ goto D35;
+
+D35:
+
+tnscore = tnscore + response;
+
+/* Tree 37 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N36_1:
+ if MIN_SCORE < 481598 then goto T36_1;
+ else goto N36_2;
+
+T36_1:
+ response = -4.83069e-05;
+ goto D36;
+
+N36_2:
+ if MIN_SCORE < 512815 then goto N36_3;
+ else goto N36_4;
+
+N36_3:
+ if MAX_SCORE < 507654 then goto T36_2;
+ else goto T36_3;
+
+T36_2:
+ response = 0.00563943;
+ goto D36;
+
+T36_3:
+ response = 0.0345982;
+ goto D36;
+
+N36_4:
+ if MAX_SCORE < 584112 then goto T36_4;
+ else goto T36_5;
+
+T36_4:
+ response = -0.00935941;
+ goto D36;
+
+T36_5:
+ response = 0.0104819;
+ goto D36;
+
+D36:
+
+tnscore = tnscore + response;
+
+/* Tree 38 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N37_1:
+ if TOPSTORY < 0.105 then goto T37_1;
+ else goto N37_2;
+
+T37_1:
+ response = -0.000322897;
+ goto D37;
+
+N37_2:
+ if PREV_DAY_HITS < 1.45833 then goto T37_2;
+ else goto N37_3;
+
+T37_2:
+ response = -0.000616703;
+ goto D37;
+
+N37_3:
+ if MIN_SCORE < 253414 then goto T37_3;
+ else goto N37_4;
+
+T37_3:
+ response = 0.00194629;
+ goto D37;
+
+N37_4:
+ if MIN_SCORE < 255748 then goto T37_4;
+ else goto T37_5;
+
+T37_4:
+ response = 0.0480784;
+ goto D37;
+
+T37_5:
+ response = 0.00955667;
+ goto D37;
+
+D37:
+
+tnscore = tnscore + response;
+
+/* Tree 39 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N38_1:
+ if NUM_WORDS < 3.5 then goto N38_2;
+ else goto N38_4;
+
+N38_2:
+ if NATIONALNEWS < 0.39 then goto T38_1;
+ else goto N38_3;
+
+T38_1:
+ response = -0.000285208;
+ goto D38;
+
+N38_3:
+ if PREV_DAY_HITS < 6.83333 then goto T38_2;
+ else goto T38_3;
+
+T38_2:
+ response = 0.00277459;
+ goto D38;
+
+T38_3:
+ response = 0.0334432;
+ goto D38;
+
+N38_4:
+ if BUSINESS < 0.77 then goto T38_4;
+ else goto T38_5;
+
+T38_4:
+ response = 0.00558387;
+ goto D38;
+
+T38_5:
+ response = -0.0192348;
+ goto D38;
+
+D38:
+
+tnscore = tnscore + response;
+
+/* Tree 40 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N39_1:
+ if BUSINESS < 0.19 then goto N39_2;
+ else goto T39_5;
+
+N39_2:
+ if INTLNEWS < 0.095 then goto T39_1;
+ else goto N39_3;
+
+T39_1:
+ response = 0.000107539;
+ goto D39;
+
+N39_3:
+ if TOPSTORY < 0.03 then goto T39_2;
+ else goto N39_4;
+
+T39_2:
+ response = 0.00158516;
+ goto D39;
+
+N39_4:
+ if NUM_WORDS < 3.5 then goto T39_3;
+ else goto T39_4;
+
+T39_3:
+ response = 0.00653366;
+ goto D39;
+
+T39_4:
+ response = 0.0388007;
+ goto D39;
+
+T39_5:
+ response = -0.00186321;
+ goto D39;
+
+D39:
+
+tnscore = tnscore + response;
+
+/* Tree 41 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N40_1:
+ if DAY_LW_DAY_HITS_RATIO < 3.635 then goto T40_1;
+ else goto N40_2;
+
+T40_1:
+ response = -0.000380753;
+ goto D40;
+
+N40_2:
+ if LOCALNEWS < 0.185 then goto T40_2;
+ else goto N40_3;
+
+T40_2:
+ response = 0.000567701;
+ goto D40;
+
+N40_3:
+ if ISTITLE_AVG < 0.585 then goto N40_4;
+ else goto T40_5;
+
+N40_4:
+ if MAX_RANK < 9 then goto T40_3;
+ else goto T40_4;
+
+T40_3:
+ response = -0.00212514;
+ goto D40;
+
+T40_4:
+ response = 0.0247626;
+ goto D40;
+
+T40_5:
+ response = 0.00163409;
+ goto D40;
+
+D40:
+
+tnscore = tnscore + response;
+
+/* Tree 42 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N41_1:
+ if DAY_WEEK_AVG_RATIO < 14.28 then goto N41_2;
+ else goto T41_5;
+
+N41_2:
+ if TOPSTORY < 0.115 then goto T41_1;
+ else goto N41_3;
+
+T41_1:
+ response = -0.000369615;
+ goto D41;
+
+N41_3:
+ if DAY_LW_DAY_HITS_RATIO < 4.875 then goto T41_2;
+ else goto N41_4;
+
+T41_2:
+ response = -0.000456219;
+ goto D41;
+
+N41_4:
+ if DAY_PD_HITS_RATIO < 2.275 then goto T41_3;
+ else goto T41_4;
+
+T41_3:
+ response = 0.0169104;
+ goto D41;
+
+T41_4:
+ response = 0.0021639;
+ goto D41;
+
+T41_5:
+ response = 0.0238394;
+ goto D41;
+
+D41:
+
+tnscore = tnscore + response;
+
+/* Tree 43 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N42_1:
+ if NUM_WORDS < 1.5 then goto T42_1;
+ else goto N42_2;
+
+T42_1:
+ response = -0.00139356;
+ goto D42;
+
+N42_2:
+ if BUSINESS < 0.13 then goto N42_3;
+ else goto T42_5;
+
+N42_3:
+ if BUSINESS < 0.05 then goto T42_2;
+ else goto N42_4;
+
+T42_2:
+ response = 0.00103638;
+ goto D42;
+
+N42_4:
+ if DAY_LW_DAY_HITS_RATIO < 9.5 then goto T42_3;
+ else goto T42_4;
+
+T42_3:
+ response = 0.0441388;
+ goto D42;
+
+T42_4:
+ response = -0.00313189;
+ goto D42;
+
+T42_5:
+ response = -0.00195899;
+ goto D42;
+
+D42:
+
+tnscore = tnscore + response;
+
+/* Tree 44 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N43_1:
+ if NATIONALNEWS < 0.13 then goto T43_1;
+ else goto N43_2;
+
+T43_1:
+ response = -2.4886e-05;
+ goto D43;
+
+N43_2:
+ if HEALTH < 0.105 then goto N43_3;
+ else goto N43_4;
+
+N43_3:
+ if WEEKAVG < 0.93 then goto T43_2;
+ else goto T43_3;
+
+T43_2:
+ response = 0.00135398;
+ goto D43;
+
+T43_3:
+ response = 0.0138431;
+ goto D43;
+
+N43_4:
+ if MIN_RANK < 3 then goto T43_4;
+ else goto T43_5;
+
+T43_4:
+ response = 0.0513615;
+ goto D43;
+
+T43_5:
+ response = -0.00585742;
+ goto D43;
+
+D43:
+
+tnscore = tnscore + response;
+
+/* Tree 45 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N44_1:
+ if NATIONALNEWS < 0.225 then goto T44_1;
+ else goto N44_2;
+
+T44_1:
+ response = -8.48873e-05;
+ goto D44;
+
+N44_2:
+ if MIN_SCORE < 259062 then goto T44_2;
+ else goto N44_3;
+
+T44_2:
+ response = -0.00036897;
+ goto D44;
+
+N44_3:
+ if ISTITLE_AVG < 0.71 then goto N44_4;
+ else goto T44_5;
+
+N44_4:
+ if ISTITLE_AVG < 0.45 then goto T44_3;
+ else goto T44_4;
+
+T44_3:
+ response = 0.0126383;
+ goto D44;
+
+T44_4:
+ response = 0.0410443;
+ goto D44;
+
+T44_5:
+ response = 0.00420061;
+ goto D44;
+
+D44:
+
+tnscore = tnscore + response;
+
+/* Tree 46 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N45_1:
+ if DAY_HITS < 15.25 then goto T45_1;
+ else goto N45_2;
+
+T45_1:
+ response = -0.000322532;
+ goto D45;
+
+N45_2:
+ if LOCALNEWS < 0.13 then goto T45_2;
+ else goto N45_3;
+
+T45_2:
+ response = 0.00109495;
+ goto D45;
+
+N45_3:
+ if WEEKAVG < 6.715 then goto N45_4;
+ else goto T45_5;
+
+N45_4:
+ if MAX_MIN_SCORE < 42695.8 then goto T45_3;
+ else goto T45_4;
+
+T45_3:
+ response = 0.0489508;
+ goto D45;
+
+T45_4:
+ response = 0.00942793;
+ goto D45;
+
+T45_5:
+ response = -0.00595868;
+ goto D45;
+
+D45:
+
+tnscore = tnscore + response;
+
+/* Tree 47 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N46_1:
+ if DAY_LW_DAY_HITS_RATIO < 38.5 then goto N46_2;
+ else goto N46_3;
+
+N46_2:
+ if NUM_WORDS < 2.5 then goto T46_1;
+ else goto T46_2;
+
+T46_1:
+ response = -0.000253621;
+ goto D46;
+
+T46_2:
+ response = 0.00124598;
+ goto D46;
+
+N46_3:
+ if DAY_WEEK_AVG_DERIV < 49.715 then goto N46_4;
+ else goto T46_5;
+
+N46_4:
+ if ISTITLE_AVG < 0.74 then goto T46_3;
+ else goto T46_4;
+
+T46_3:
+ response = 0.0495711;
+ goto D46;
+
+T46_4:
+ response = 0.00323737;
+ goto D46;
+
+T46_5:
+ response = -0.00771975;
+ goto D46;
+
+D46:
+
+tnscore = tnscore + response;
+
+/* Tree 48 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N47_1:
+ if NUM_WORDS < 1.5 then goto T47_1;
+ else goto N47_2;
+
+T47_1:
+ response = -0.001351;
+ goto D47;
+
+N47_2:
+ if DAY_WEEK_AVG_RATIO < 13.99 then goto N47_3;
+ else goto T47_5;
+
+N47_3:
+ if BUSINESS < 0.105 then goto N47_4;
+ else goto T47_4;
+
+N47_4:
+ if DAY_PD_HITS_DERIV < -10.5 then goto T47_2;
+ else goto T47_3;
+
+T47_2:
+ response = 0.0106695;
+ goto D47;
+
+T47_3:
+ response = 0.000527774;
+ goto D47;
+
+T47_4:
+ response = -0.00157676;
+ goto D47;
+
+T47_5:
+ response = 0.0239454;
+ goto D47;
+
+D47:
+
+tnscore = tnscore + response;
+
+/* Tree 49 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N48_1:
+ if DAY_LW_DAY_HITS_RATIO < 24.9 then goto T48_1;
+ else goto N48_2;
+
+T48_1:
+ response = -6.42273e-05;
+ goto D48;
+
+N48_2:
+ if MIN_SCORE < 247776 then goto N48_3;
+ else goto N48_4;
+
+N48_3:
+ if TOPSTORY < 0.05 then goto T48_2;
+ else goto T48_3;
+
+T48_2:
+ response = -0.0160384;
+ goto D48;
+
+T48_3:
+ response = 0.00605178;
+ goto D48;
+
+N48_4:
+ if DAY_PREV_DAY_HITS_FRAC < 0.715 then goto T48_4;
+ else goto T48_5;
+
+T48_4:
+ response = -0.00225007;
+ goto D48;
+
+T48_5:
+ response = 0.0290955;
+ goto D48;
+
+D48:
+
+tnscore = tnscore + response;
+
+/* Tree 50 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N49_1:
+ if PREV_DAY_HITS < 1.46429 then goto T49_1;
+ else goto N49_2;
+
+T49_1:
+ response = -0.000357361;
+ goto D49;
+
+N49_2:
+ if AVG_SCORE < 242369 then goto T49_2;
+ else goto N49_3;
+
+T49_2:
+ response = -0.000884977;
+ goto D49;
+
+N49_3:
+ if WEEKAVG < 5.975 then goto N49_4;
+ else goto T49_5;
+
+N49_4:
+ if DAY_HITS_FRAC < 0.135 then goto T49_3;
+ else goto T49_4;
+
+T49_3:
+ response = 0.00160961;
+ goto D49;
+
+T49_4:
+ response = 0.00805956;
+ goto D49;
+
+T49_5:
+ response = -0.0114471;
+ goto D49;
+
+D49:
+
+tnscore = tnscore + response;
+
+/* Tree 51 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N50_1:
+ if PREV_DAY_HITS < 26.5 then goto N50_2;
+ else goto N50_3;
+
+N50_2:
+ if ISABSTRACT_AVG < 0.155 then goto T50_1;
+ else goto T50_2;
+
+T50_1:
+ response = 0.000196561;
+ goto D50;
+
+T50_2:
+ response = -0.00121068;
+ goto D50;
+
+N50_3:
+ if DAY_HITS_FRAC < 0.265 then goto N50_4;
+ else goto T50_5;
+
+N50_4:
+ if DAY_WEEK_AVG_RATIO < 2.12 then goto T50_3;
+ else goto T50_4;
+
+T50_3:
+ response = -0.00523257;
+ goto D50;
+
+T50_4:
+ response = 0.0187339;
+ goto D50;
+
+T50_5:
+ response = 0.0345852;
+ goto D50;
+
+D50:
+
+tnscore = tnscore + response;
+
+/* Tree 52 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N51_1:
+ if NUM_WORDS < 4.5 then goto N51_2;
+ else goto N51_3;
+
+N51_2:
+ if MAX_MIN_SCORE < 55839.5 then goto T51_1;
+ else goto T51_2;
+
+T51_1:
+ response = -0.000141945;
+ goto D51;
+
+T51_2:
+ response = -0.00394864;
+ goto D51;
+
+N51_3:
+ if DAY_WEEK_AVG_RATIO < 3.505 then goto N51_4;
+ else goto T51_5;
+
+N51_4:
+ if AVG_RANK < 8.395 then goto T51_3;
+ else goto T51_4;
+
+T51_3:
+ response = 0.0095287;
+ goto D51;
+
+T51_4:
+ response = -0.0143254;
+ goto D51;
+
+T51_5:
+ response = 0.0273452;
+ goto D51;
+
+D51:
+
+tnscore = tnscore + response;
+
+/* Tree 53 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N52_1:
+ if PREV_DAY_HITS < 16.5 then goto T52_1;
+ else goto N52_2;
+
+T52_1:
+ response = 0.000180791;
+ goto D52;
+
+N52_2:
+ if AVG_SCORE < 312154 then goto N52_3;
+ else goto T52_5;
+
+N52_3:
+ if DAY_HITS_FRAC < 0.325 then goto N52_4;
+ else goto T52_4;
+
+N52_4:
+ if MIN_SCORE < 254301 then goto T52_2;
+ else goto T52_3;
+
+T52_2:
+ response = -0.00274788;
+ goto D52;
+
+T52_3:
+ response = 0.0174896;
+ goto D52;
+
+T52_4:
+ response = -0.0248862;
+ goto D52;
+
+T52_5:
+ response = -0.023664;
+ goto D52;
+
+D52:
+
+tnscore = tnscore + response;
+
+/* Tree 54 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N53_1:
+ if MIN_SCORE < 245164 then goto T53_1;
+ else goto N53_2;
+
+T53_1:
+ response = -0.000561547;
+ goto D53;
+
+N53_2:
+ if DAY_PREV_DAY_HITS_FRAC < 0.395 then goto T53_2;
+ else goto N53_3;
+
+T53_2:
+ response = -0.00410237;
+ goto D53;
+
+N53_3:
+ if MAX_RANK < 9 then goto T53_3;
+ else goto N53_4;
+
+T53_3:
+ response = -7.35691e-05;
+ goto D53;
+
+N53_4:
+ if MIN_SCORE < 560324 then goto T53_4;
+ else goto T53_5;
+
+T53_4:
+ response = 0.00230962;
+ goto D53;
+
+T53_5:
+ response = 0.0217268;
+ goto D53;
+
+D53:
+
+tnscore = tnscore + response;
+
+/* Tree 55 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N54_1:
+ if DAY_WEEK_AVG_RATIO < 0.925 then goto N54_2;
+ else goto N54_3;
+
+N54_2:
+ if MAX_SCORE < 405533 then goto T54_1;
+ else goto T54_2;
+
+T54_1:
+ response = -0.000583987;
+ goto D54;
+
+T54_2:
+ response = -0.00549206;
+ goto D54;
+
+N54_3:
+ if MIN_SCORE < 479344 then goto T54_3;
+ else goto N54_4;
+
+T54_3:
+ response = 0.000303644;
+ goto D54;
+
+N54_4:
+ if MIN_SCORE < 489462 then goto T54_4;
+ else goto T54_5;
+
+T54_4:
+ response = 0.038018;
+ goto D54;
+
+T54_5:
+ response = 0.00297502;
+ goto D54;
+
+D54:
+
+tnscore = tnscore + response;
+
+/* Tree 56 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N55_1:
+ if MIN_SCORE < 475038 then goto N55_2;
+ else goto N55_3;
+
+N55_2:
+ if MAX_SCORE < 498633 then goto T55_1;
+ else goto T55_2;
+
+T55_1:
+ response = -4.41489e-05;
+ goto D55;
+
+T55_2:
+ response = -0.0267606;
+ goto D55;
+
+N55_3:
+ if ISABSTRACT_AVG < 0.1 then goto N55_4;
+ else goto T55_5;
+
+N55_4:
+ if ENTERTAINMENT < 0.31 then goto T55_3;
+ else goto T55_4;
+
+T55_3:
+ response = 0.00455624;
+ goto D55;
+
+T55_4:
+ response = 0.0204099;
+ goto D55;
+
+T55_5:
+ response = -0.00647491;
+ goto D55;
+
+D55:
+
+tnscore = tnscore + response;
+
+/* Tree 57 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N56_1:
+ if DAY_PD_HITS_RATIO < 0.115 then goto T56_1;
+ else goto N56_2;
+
+T56_1:
+ response = -0.00668954;
+ goto D56;
+
+N56_2:
+ if DAY_PD_HITS_DERIV < -11.5 then goto N56_3;
+ else goto T56_5;
+
+N56_3:
+ if DAY_PREV_DAY_HITS_FRAC < 0.975 then goto N56_4;
+ else goto T56_4;
+
+N56_4:
+ if DAY_PD_HITS_RATIO < 0.515 then goto T56_2;
+ else goto T56_3;
+
+T56_2:
+ response = -0.00146208;
+ goto D56;
+
+T56_3:
+ response = 0.0238106;
+ goto D56;
+
+T56_4:
+ response = 0.0377246;
+ goto D56;
+
+T56_5:
+ response = -9.33641e-05;
+ goto D56;
+
+D56:
+
+tnscore = tnscore + response;
+
+/* Tree 58 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N57_1:
+ if DAY_WEEK_AVG_RATIO < 10.84 then goto N57_2;
+ else goto N57_4;
+
+N57_2:
+ if DAY_WEEK_AVG_RATIO < 6.845 then goto T57_1;
+ else goto N57_3;
+
+T57_1:
+ response = 0.000128893;
+ goto D57;
+
+N57_3:
+ if MIN_SCORE < 367047 then goto T57_2;
+ else goto T57_3;
+
+T57_2:
+ response = -0.00823593;
+ goto D57;
+
+T57_3:
+ response = 0.0102315;
+ goto D57;
+
+N57_4:
+ if AVG_SCORE < 279315 then goto T57_4;
+ else goto T57_5;
+
+T57_4:
+ response = -0.00477584;
+ goto D57;
+
+T57_5:
+ response = 0.0265572;
+ goto D57;
+
+D57:
+
+tnscore = tnscore + response;
+
+/* Tree 59 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N58_1:
+ if PREV_DAY_HITS < 9.75 then goto T58_1;
+ else goto N58_2;
+
+T58_1:
+ response = -0.000266304;
+ goto D58;
+
+N58_2:
+ if DAY_PREV_DAY_HITS_FRAC < 0.925 then goto T58_2;
+ else goto N58_3;
+
+T58_2:
+ response = -0.000269406;
+ goto D58;
+
+N58_3:
+ if AVG_RANK < 8.45 then goto N58_4;
+ else goto T58_5;
+
+N58_4:
+ if INTLNEWS < 0.295 then goto T58_3;
+ else goto T58_4;
+
+T58_3:
+ response = 0.0146136;
+ goto D58;
+
+T58_4:
+ response = -0.0235187;
+ goto D58;
+
+T58_5:
+ response = 0.0313855;
+ goto D58;
+
+D58:
+
+tnscore = tnscore + response;
+
+/* Tree 60 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N59_1:
+ if MIN_SCORE < 483511 then goto T59_1;
+ else goto N59_2;
+
+T59_1:
+ response = -0.000181558;
+ goto D59;
+
+N59_2:
+ if MIN_SCORE < 498030 then goto N59_3;
+ else goto N59_4;
+
+N59_3:
+ if DAY_WEEK_AVG_RATIO < 1.68 then goto T59_2;
+ else goto T59_3;
+
+T59_2:
+ response = 0.00293744;
+ goto D59;
+
+T59_3:
+ response = 0.0371557;
+ goto D59;
+
+N59_4:
+ if SUPERDUPER_AVG < 0.53 then goto T59_4;
+ else goto T59_5;
+
+T59_4:
+ response = 0.00413503;
+ goto D59;
+
+T59_5:
+ response = -0.0112815;
+ goto D59;
+
+D59:
+
+tnscore = tnscore + response;
+
+/* Tree 61 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N60_1:
+ if DAY_PD_HITS_RATIO < 26.5 then goto N60_2;
+ else goto N60_4;
+
+N60_2:
+ if DAY_PD_HITS_DERIV < -11.5 then goto N60_3;
+ else goto T60_3;
+
+N60_3:
+ if DAY_WEEK_AVG_RATIO < 1.735 then goto T60_1;
+ else goto T60_2;
+
+T60_1:
+ response = 0.00051364;
+ goto D60;
+
+T60_2:
+ response = 0.0186441;
+ goto D60;
+
+T60_3:
+ response = -7.75501e-05;
+ goto D60;
+
+N60_4:
+ if WEEKAVG < 5.57 then goto T60_4;
+ else goto T60_5;
+
+T60_4:
+ response = 0.0278366;
+ goto D60;
+
+T60_5:
+ response = -0.00263107;
+ goto D60;
+
+D60:
+
+tnscore = tnscore + response;
+
+/* Tree 62 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N61_1:
+ if NUM_WORDS < 1.5 then goto T61_1;
+ else goto N61_2;
+
+T61_1:
+ response = -0.0008926;
+ goto D61;
+
+N61_2:
+ if DAY_WEEK_AVG_DERIV < 47.86 then goto N61_3;
+ else goto T61_5;
+
+N61_3:
+ if DAY_LW_DAY_HITS_RATIO < 38.5 then goto N61_4;
+ else goto T61_4;
+
+N61_4:
+ if ISABSTRACT_AVG < 0.235 then goto T61_2;
+ else goto T61_3;
+
+T61_2:
+ response = 0.000972798;
+ goto D61;
+
+T61_3:
+ response = -0.00127979;
+ goto D61;
+
+T61_4:
+ response = 0.0250611;
+ goto D61;
+
+T61_5:
+ response = -0.0239326;
+ goto D61;
+
+D61:
+
+tnscore = tnscore + response;
+
+/* Tree 63 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N62_1:
+ if MAX_SCORE < 407652 then goto N62_2;
+ else goto T62_5;
+
+N62_2:
+ if MAX_SCORE < 395501 then goto T62_1;
+ else goto N62_3;
+
+T62_1:
+ response = 1.49872e-05;
+ goto D62;
+
+N62_3:
+ if DAY_PD_HITS_RATIO < 0.285 then goto T62_2;
+ else goto N62_4;
+
+T62_2:
+ response = 0.0434173;
+ goto D62;
+
+N62_4:
+ if BUSINESS < 0.05 then goto T62_3;
+ else goto T62_4;
+
+T62_3:
+ response = 0.0102549;
+ goto D62;
+
+T62_4:
+ response = -0.010691;
+ goto D62;
+
+T62_5:
+ response = -0.00277705;
+ goto D62;
+
+D62:
+
+tnscore = tnscore + response;
+
+/* Tree 64 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N63_1:
+ if DAY_WEEK_AVG_RATIO < 6.355 then goto T63_1;
+ else goto N63_2;
+
+T63_1:
+ response = 0.00013418;
+ goto D63;
+
+N63_2:
+ if MIN_SCORE < 405020 then goto N63_3;
+ else goto T63_5;
+
+N63_3:
+ if AVG_SCORE < 356693 then goto N63_4;
+ else goto T63_4;
+
+N63_4:
+ if MAX_SCORE < 327611 then goto T63_2;
+ else goto T63_3;
+
+T63_2:
+ response = -0.00546237;
+ goto D63;
+
+T63_3:
+ response = 0.0146496;
+ goto D63;
+
+T63_4:
+ response = -0.0196891;
+ goto D63;
+
+T63_5:
+ response = 0.0208141;
+ goto D63;
+
+D63:
+
+tnscore = tnscore + response;
+
+/* Tree 65 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N64_1:
+ if AVG_SCORE < 526352 then goto N64_2;
+ else goto N64_3;
+
+N64_2:
+ if MAX_SCORE < 521635 then goto T64_1;
+ else goto T64_2;
+
+T64_1:
+ response = -6.26311e-05;
+ goto D64;
+
+T64_2:
+ response = 0.0210967;
+ goto D64;
+
+N64_3:
+ if MAX_SCORE < 550983 then goto T64_3;
+ else goto N64_4;
+
+T64_3:
+ response = -0.0232122;
+ goto D64;
+
+N64_4:
+ if AVG_RANK < 5.5 then goto T64_4;
+ else goto T64_5;
+
+T64_4:
+ response = -0.0243343;
+ goto D64;
+
+T64_5:
+ response = -0.00151995;
+ goto D64;
+
+D64:
+
+tnscore = tnscore + response;
+
+/* Tree 66 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N65_1:
+ if DAY_PD_HITS_RATIO < 26.5 then goto N65_2;
+ else goto N65_3;
+
+N65_2:
+ if DAY_PD_HITS_RATIO < 20.625 then goto T65_1;
+ else goto T65_2;
+
+T65_1:
+ response = -5.89198e-05;
+ goto D65;
+
+T65_2:
+ response = -0.0216644;
+ goto D65;
+
+N65_3:
+ if MAX_SCORE < 200640 then goto T65_3;
+ else goto N65_4;
+
+T65_3:
+ response = -0.011139;
+ goto D65;
+
+N65_4:
+ if AVG_RANK < 8.55 then goto T65_4;
+ else goto T65_5;
+
+T65_4:
+ response = 0.0390014;
+ goto D65;
+
+T65_5:
+ response = 0.00966164;
+ goto D65;
+
+D65:
+
+tnscore = tnscore + response;
+
+/* Tree 67 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N66_1:
+ if NATIONALNEWS < 0.27 then goto T66_1;
+ else goto N66_2;
+
+T66_1:
+ response = -0.000174062;
+ goto D66;
+
+N66_2:
+ if HEALTH < 0.05 then goto N66_3;
+ else goto T66_5;
+
+N66_3:
+ if AVG_SCORE < 342310 then goto T66_2;
+ else goto N66_4;
+
+T66_2:
+ response = 0.000835476;
+ goto D66;
+
+N66_4:
+ if MIN_SCORE < 347780 then goto T66_3;
+ else goto T66_4;
+
+T66_3:
+ response = 0.0334442;
+ goto D66;
+
+T66_4:
+ response = 0.00624751;
+ goto D66;
+
+T66_5:
+ response = 0.025545;
+ goto D66;
+
+D66:
+
+tnscore = tnscore + response;
+
+/* Tree 68 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N67_1:
+ if ISTITLE_AVG < 0.72 then goto N67_2;
+ else goto T67_5;
+
+N67_2:
+ if MAX_MIN_SCORE < 43995.2 then goto T67_1;
+ else goto N67_3;
+
+T67_1:
+ response = 0.000396726;
+ goto D67;
+
+N67_3:
+ if DAY_PREV_DAY_HITS_FRAC < 0.935 then goto T67_2;
+ else goto N67_4;
+
+T67_2:
+ response = 0.00123996;
+ goto D67;
+
+N67_4:
+ if MAX_MIN_SCORE < 56002.2 then goto T67_3;
+ else goto T67_4;
+
+T67_3:
+ response = 0.0235285;
+ goto D67;
+
+T67_4:
+ response = -0.00154573;
+ goto D67;
+
+T67_5:
+ response = -0.000478464;
+ goto D67;
+
+D67:
+
+tnscore = tnscore + response;
+
+/* Tree 69 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N68_1:
+ if BUSINESS < 0.21 then goto N68_2;
+ else goto T68_5;
+
+N68_2:
+ if DAY_LW_DAY_HITS_DERIV < 14.5 then goto T68_1;
+ else goto N68_3;
+
+T68_1:
+ response = 2.41495e-05;
+ goto D68;
+
+N68_3:
+ if NATIONALNEWS < 0.225 then goto N68_4;
+ else goto T68_4;
+
+N68_4:
+ if LOCALNEWS < 0.035 then goto T68_2;
+ else goto T68_3;
+
+T68_2:
+ response = -0.00142155;
+ goto D68;
+
+T68_3:
+ response = 0.0129645;
+ goto D68;
+
+T68_4:
+ response = 0.0297085;
+ goto D68;
+
+T68_5:
+ response = -0.0014897;
+ goto D68;
+
+D68:
+
+tnscore = tnscore + response;
+
+/* Tree 70 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N69_1:
+ if SPORTS < 0.585 then goto T69_1;
+ else goto N69_2;
+
+T69_1:
+ response = -0.000304907;
+ goto D69;
+
+N69_2:
+ if MAX_SCORE < 285618 then goto T69_2;
+ else goto N69_3;
+
+T69_2:
+ response = 0.000221636;
+ goto D69;
+
+N69_3:
+ if ISTITLE_AVG < 0.7 then goto N69_4;
+ else goto T69_5;
+
+N69_4:
+ if MIN_SCORE < 269093 then goto T69_3;
+ else goto T69_4;
+
+T69_3:
+ response = 0.0417159;
+ goto D69;
+
+T69_4:
+ response = 0.00987586;
+ goto D69;
+
+T69_5:
+ response = 0.00129559;
+ goto D69;
+
+D69:
+
+tnscore = tnscore + response;
+
+/* Tree 71 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N70_1:
+ if DAY_PD_HITS_RATIO < 12.28 then goto T70_1;
+ else goto N70_2;
+
+T70_1:
+ response = -5.73419e-05;
+ goto D70;
+
+N70_2:
+ if LOCALNEWS < 0.03 then goto T70_2;
+ else goto N70_3;
+
+T70_2:
+ response = -0.00224701;
+ goto D70;
+
+N70_3:
+ if WEEKAVG < 5.57 then goto N70_4;
+ else goto T70_5;
+
+N70_4:
+ if AVG_RANK < 8.1 then goto T70_3;
+ else goto T70_4;
+
+T70_3:
+ response = 0.0150017;
+ goto D70;
+
+T70_4:
+ response = 0.0490061;
+ goto D70;
+
+T70_5:
+ response = 0.000899967;
+ goto D70;
+
+D70:
+
+tnscore = tnscore + response;
+
+/* Tree 72 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N71_1:
+ if NATIONALNEWS < 0.28 then goto T71_1;
+ else goto N71_2;
+
+T71_1:
+ response = -0.000202096;
+ goto D71;
+
+N71_2:
+ if MIN_SCORE < 259050 then goto T71_2;
+ else goto N71_3;
+
+T71_2:
+ response = -0.000461524;
+ goto D71;
+
+N71_3:
+ if PREV_DAY_HITS < 5.5 then goto N71_4;
+ else goto T71_5;
+
+N71_4:
+ if ISTITLE_AVG < 0.085 then goto T71_3;
+ else goto T71_4;
+
+T71_3:
+ response = 0.0107478;
+ goto D71;
+
+T71_4:
+ response = 0.000658206;
+ goto D71;
+
+T71_5:
+ response = 0.039025;
+ goto D71;
+
+D71:
+
+tnscore = tnscore + response;
+
+/* Tree 73 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N72_1:
+ if DAY_WEEK_AVG_RATIO < 0.885 then goto T72_1;
+ else goto N72_2;
+
+T72_1:
+ response = -0.000898287;
+ goto D72;
+
+N72_2:
+ if MIN_SCORE < 482615 then goto T72_2;
+ else goto N72_3;
+
+T72_2:
+ response = 0.000137426;
+ goto D72;
+
+N72_3:
+ if AVG_SCORE < 506793 then goto N72_4;
+ else goto T72_5;
+
+N72_4:
+ if AVG_SCORE < 493340 then goto T72_3;
+ else goto T72_4;
+
+T72_3:
+ response = 0.011503;
+ goto D72;
+
+T72_4:
+ response = 0.0451903;
+ goto D72;
+
+T72_5:
+ response = 1.36945e-05;
+ goto D72;
+
+D72:
+
+tnscore = tnscore + response;
+
+/* Tree 74 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N73_1:
+ if DAY_PD_HITS_DERIV < -4.5 then goto N73_2;
+ else goto T73_5;
+
+N73_2:
+ if HEALTH < 0.13 then goto N73_3;
+ else goto T73_4;
+
+N73_3:
+ if BUSINESS < 0.96 then goto T73_1;
+ else goto N73_4;
+
+T73_1:
+ response = 0.00144328;
+ goto D73;
+
+N73_4:
+ if MAX_SCORE < 239157 then goto T73_2;
+ else goto T73_3;
+
+T73_2:
+ response = -4.31323e-05;
+ goto D73;
+
+T73_3:
+ response = 0.0302083;
+ goto D73;
+
+T73_4:
+ response = 0.0263586;
+ goto D73;
+
+T73_5:
+ response = 3.93517e-05;
+ goto D73;
+
+D73:
+
+tnscore = tnscore + response;
+
+/* Tree 75 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N74_1:
+ if DAY_LW_DAY_HITS_DERIV < 1.91 then goto T74_1;
+ else goto N74_2;
+
+T74_1:
+ response = -0.000330312;
+ goto D74;
+
+N74_2:
+ if MIN_SCORE < 254252 then goto T74_2;
+ else goto N74_3;
+
+T74_2:
+ response = -0.000287448;
+ goto D74;
+
+N74_3:
+ if WEEKAVG < 4.5 then goto N74_4;
+ else goto T74_5;
+
+N74_4:
+ if WEEKAVG < 0.93 then goto T74_3;
+ else goto T74_4;
+
+T74_3:
+ response = 0.00341942;
+ goto D74;
+
+T74_4:
+ response = 0.0180965;
+ goto D74;
+
+T74_5:
+ response = -0.0144877;
+ goto D74;
+
+D74:
+
+tnscore = tnscore + response;
+
+/* Tree 76 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N75_1:
+ if NUM_WORDS < 4.5 then goto N75_2;
+ else goto N75_4;
+
+N75_2:
+ if MAX_MIN_SCORE < 56141.5 then goto T75_1;
+ else goto N75_3;
+
+T75_1:
+ response = 3.55635e-05;
+ goto D75;
+
+N75_3:
+ if ISTITLE_AVG < 0.69 then goto T75_2;
+ else goto T75_3;
+
+T75_2:
+ response = -0.0120653;
+ goto D75;
+
+T75_3:
+ response = -0.00193295;
+ goto D75;
+
+N75_4:
+ if DAY_HITS_FRAC < 0.585 then goto T75_4;
+ else goto T75_5;
+
+T75_4:
+ response = 0.0109657;
+ goto D75;
+
+T75_5:
+ response = -0.00562292;
+ goto D75;
+
+D75:
+
+tnscore = tnscore + response;
+
+/* Tree 77 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N76_1:
+ if MAX_SCORE < 597411 then goto N76_2;
+ else goto T76_5;
+
+N76_2:
+ if AVG_SCORE < 525986 then goto N76_3;
+ else goto T76_4;
+
+N76_3:
+ if AVG_SCORE < 504944 then goto T76_1;
+ else goto N76_4;
+
+T76_1:
+ response = 6.12611e-05;
+ goto D76;
+
+N76_4:
+ if AVG_SCORE < 512650 then goto T76_2;
+ else goto T76_3;
+
+T76_2:
+ response = 0.0310299;
+ goto D76;
+
+T76_3:
+ response = 0.000664858;
+ goto D76;
+
+T76_4:
+ response = -0.010433;
+ goto D76;
+
+T76_5:
+ response = 0.00965011;
+ goto D76;
+
+D76:
+
+tnscore = tnscore + response;
+
+/* Tree 78 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N77_1:
+ if DAY_PD_HITS_RATIO < 5.945 then goto N77_2;
+ else goto N77_3;
+
+N77_2:
+ if MAX_SCORE < 629654 then goto T77_1;
+ else goto T77_2;
+
+T77_1:
+ response = 0.000234339;
+ goto D77;
+
+T77_2:
+ response = -0.01439;
+ goto D77;
+
+N77_3:
+ if MAX_MIN_SCORE < 63226.5 then goto N77_4;
+ else goto T77_5;
+
+N77_4:
+ if LOCALNEWS < 0.28 then goto T77_3;
+ else goto T77_4;
+
+T77_3:
+ response = -0.00423293;
+ goto D77;
+
+T77_4:
+ response = 0.00606695;
+ goto D77;
+
+T77_5:
+ response = 0.0188983;
+ goto D77;
+
+D77:
+
+tnscore = tnscore + response;
+
+/* Tree 79 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N78_1:
+ if MIN_SCORE < 670535 then goto N78_2;
+ else goto T78_5;
+
+N78_2:
+ if DAY_PD_HITS_RATIO < 5.845 then goto N78_3;
+ else goto T78_4;
+
+N78_3:
+ if DAY_LW_DAY_HITS_RATIO < 5.47 then goto T78_1;
+ else goto N78_4;
+
+T78_1:
+ response = -8.49912e-05;
+ goto D78;
+
+N78_4:
+ if TOPSTORY < 0.105 then goto T78_2;
+ else goto T78_3;
+
+T78_2:
+ response = -0.00034055;
+ goto D78;
+
+T78_3:
+ response = 0.0101604;
+ goto D78;
+
+T78_4:
+ response = -0.00330677;
+ goto D78;
+
+T78_5:
+ response = 0.0174593;
+ goto D78;
+
+D78:
+
+tnscore = tnscore + response;
+
+/* Tree 80 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N79_1:
+ if DAY_PD_HITS_RATIO < 33.75 then goto N79_2;
+ else goto T79_5;
+
+N79_2:
+ if WEEKAVG < 4.46 then goto T79_1;
+ else goto N79_3;
+
+T79_1:
+ response = 3.99921e-05;
+ goto D79;
+
+N79_3:
+ if DAY_PREV_DAY_HITS_FRAC < 0.945 then goto N79_4;
+ else goto T79_4;
+
+N79_4:
+ if DAY_LW_DAY_HITS_RATIO < 48.5 then goto T79_2;
+ else goto T79_3;
+
+T79_2:
+ response = -0.00420023;
+ goto D79;
+
+T79_3:
+ response = 0.0191669;
+ goto D79;
+
+T79_4:
+ response = -0.0241434;
+ goto D79;
+
+T79_5:
+ response = 0.0157146;
+ goto D79;
+
+D79:
+
+tnscore = tnscore + response;
+
+/* Tree 81 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N80_1:
+ if DAY_WEEK_AVG_RATIO < 6.3 then goto T80_1;
+ else goto N80_2;
+
+T80_1:
+ response = 0.00024645;
+ goto D80;
+
+N80_2:
+ if DAY_WEEK_AVG_DERIV < 13.785 then goto N80_3;
+ else goto N80_4;
+
+N80_3:
+ if MIN_SCORE < 397526 then goto T80_2;
+ else goto T80_3;
+
+T80_2:
+ response = -0.00908083;
+ goto D80;
+
+T80_3:
+ response = 0.00977666;
+ goto D80;
+
+N80_4:
+ if AVG_SCORE < 289007 then goto T80_4;
+ else goto T80_5;
+
+T80_4:
+ response = -0.00132101;
+ goto D80;
+
+T80_5:
+ response = 0.0196639;
+ goto D80;
+
+D80:
+
+tnscore = tnscore + response;
+
+/* Tree 82 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N81_1:
+ if MIN_SCORE < 672810 then goto N81_2;
+ else goto T81_5;
+
+N81_2:
+ if MIN_SCORE < 631089 then goto N81_3;
+ else goto T81_4;
+
+N81_3:
+ if MAX_SCORE < 611207 then goto N81_4;
+ else goto T81_3;
+
+N81_4:
+ if MIN_SCORE < 512782 then goto T81_1;
+ else goto T81_2;
+
+T81_1:
+ response = -3.64401e-05;
+ goto D81;
+
+T81_2:
+ response = -0.00741622;
+ goto D81;
+
+T81_3:
+ response = 0.0190309;
+ goto D81;
+
+T81_4:
+ response = -0.0227335;
+ goto D81;
+
+T81_5:
+ response = 0.0167703;
+ goto D81;
+
+D81:
+
+tnscore = tnscore + response;
+
+/* Tree 83 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N82_1:
+ if MAX_SCORE < 439769 then goto T82_1;
+ else goto N82_2;
+
+T82_1:
+ response = 4.38016e-05;
+ goto D82;
+
+N82_2:
+ if TOPSTORY < 0.22 then goto T82_2;
+ else goto N82_3;
+
+T82_2:
+ response = -0.00593521;
+ goto D82;
+
+N82_3:
+ if SUPERDUPER_AVG < 0.45 then goto N82_4;
+ else goto T82_5;
+
+N82_4:
+ if DAY_PD_HITS_DERIV < -1.5 then goto T82_3;
+ else goto T82_4;
+
+T82_3:
+ response = 0.0429213;
+ goto D82;
+
+T82_4:
+ response = 0.0049244;
+ goto D82;
+
+T82_5:
+ response = -0.0152763;
+ goto D82;
+
+D82:
+
+tnscore = tnscore + response;
+
+/* Tree 84 of 85 */
+/* N terminal nodes = 5, Depth = 4 */
+
+
+N83_1:
+ if REGIONALNEWS < 0.105 then goto N83_2;
+ else goto N83_3;
+
+N83_2:
+ if POLITICS < 0.29 then goto T83_1;
+ else goto T83_2;
+
+T83_1:
+ response = -8.83284e-05;
+ goto D83;
+
+T83_2:
+ response = -0.00708574;
+ goto D83;
+
+N83_3:
+ if MAX_SCORE < 291999 then goto T83_3;
+ else goto N83_4;
+
+T83_3:
+ response = 0.000387947;
+ goto D83;
+
+N83_4:
+ if DAY_WEEK_AVG_DERIV < 2.145 then goto T83_4;
+ else goto T83_5;
+
+T83_4:
+ response = -0.00777391;
+ goto D83;
+
+T83_5:
+ response = -0.0310452;
+ goto D83;
+
+D83:
+
+tnscore = tnscore + response;
+
+/* Tree 85 of 85 */
+/* N terminal nodes = 5, Depth = 5 */
+
+
+N84_1:
+ if MAX_MIN_SCORE < 61554.2 then goto T84_1;
+ else goto N84_2;
+
+T84_1:
+ response = -2.35487e-05;
+ goto D84;
+
+N84_2:
+ if NUM_WORDS < 2.5 then goto N84_3;
+ else goto T84_5;
+
+N84_3:
+ if MAX_MIN_SCORE < 88657.5 then goto T84_2;
+ else goto N84_4;
+
+T84_2:
+ response = -0.00672369;
+ goto D84;
+
+N84_4:
+ if DAY_PREV_DAY_HITS_FRAC < 0.295 then goto T84_3;
+ else goto T84_4;
+
+T84_3:
+ response = -0.0026578;
+ goto D84;
+
+T84_4:
+ response = 0.0151957;
+ goto D84;
+
+T84_5:
+ response = -0.0135855;
+ goto D84;
+
+D84:
+
+tnscore = tnscore + response;
+
+return;
diff --git a/searchlib/src/test/files/treenet07.model b/searchlib/src/test/files/treenet07.model
new file mode 100644
index 00000000000..98059bec273
--- /dev/null
+++ b/searchlib/src/test/files/treenet07.model
@@ -0,0 +1,13275 @@
+MODELBEGIN:
+
+ /* N trees: 200 */
+
+link TN0;
+pred = tnscore; /* predicted value for GRADE */
+
+/*********************/
+/* Model is complete */
+/*********************/
+
+return;
+
+
+
+TN0:
+
+tnscore = 0.0;
+
+N0_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300077, 100200034, 100200186, 100400141, 100300165, 100200052, 100300005, 100200172, 100300008, 100200068, 100300027, 100300116, 100300121, 100200053, 100300019, 100400142, 100200054, 100300073, 100200192, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100200176, 100300200, 100200028, 100300076, 100200232) then goto N0_2;
+ else goto N0_8;
+
+N0_2:
+ if attribute(catid) in (100200186, 100200068, 100300121, 100300019, 100200176, 100300200, 100200028, 100300076) then goto N0_3;
+ else goto N0_4;
+
+N0_3:
+ if attribute(catid) in (100200068, 100300019, 100200176, 100300200) then goto T0_1;
+ else goto T0_2;
+
+T0_1:
+ response = -0.0249999798;
+ goto D0;
+
+T0_2:
+ response = 0.0022099815;
+ goto D0;
+
+N0_4:
+ if attribute(catid) in (0, 100300011, 100300014, 100300077, 100200034, 100400141, 100300165, 100300005, 100200172, 100300008, 100300027, 100200053, 100200192, 100300209, 100400079, 100200170, 100300169, 100400080) then goto N0_5;
+ else goto N0_7;
+
+N0_5:
+ if attribute(catid) in (100300011, 100300165, 100300005, 100300027, 100200192, 100300209, 100400079, 100400080) then goto T0_3;
+ else goto N0_6;
+
+T0_3:
+ response = 0.0131601540;
+ goto D0;
+
+N0_6:
+ if attribute(catid) in (100300014, 100200034, 100400141, 100200172, 100300008, 100200053, 100200170, 100300169) then goto T0_4;
+ else goto T0_5;
+
+T0_4:
+ response = 0.0191030525;
+ goto D0;
+
+T0_5:
+ response = 0.0217254140;
+ goto D0;
+
+N0_7:
+ if attribute(catid) in (100200130, 100400142, 100200054, 100300073) then goto T0_6;
+ else goto T0_7;
+
+T0_6:
+ response = 0.0270836867;
+ goto D0;
+
+T0_7:
+ response = 0.0305748922;
+ goto D0;
+
+N0_8:
+ if attribute(catid) in (100300058, 100300166, 100300102, 100400037, 100400038, 100300065, 100300127, 100200087, 100300066, 100300006) then goto T0_8;
+ else goto N0_9;
+
+T0_8:
+ response = 0.0410066553;
+ goto D0;
+
+N0_9:
+ if attribute(catid) in (100300093, 100200234, 100300126, 100200193, 100300122, 100300074) then goto T0_9;
+ else goto T0_10;
+
+T0_9:
+ response = 0.0557829172;
+ goto D0;
+
+T0_10:
+ response = 0.0704327304;
+ goto D0;
+
+D0:
+
+tnscore = tnscore + response;
+
+ /* Tree 2 of 200 */
+N1_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300077, 100300143, 100200034, 100200186, 100400141, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300027, 100300116, 100300121, 100200234, 100300019, 100300073, 100400038, 100200192, 100300065, 100300209, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100200176, 100300200, 100300076, 100200055, 100200232, 100300214) then goto N1_2;
+ else goto N1_7;
+
+N1_2:
+ if attribute(catid) in (100300011, 100300014, 100300058, 100300005, 100200068, 100300019, 100300209, 100400079, 100200170, 100400080, 100200176, 100300076, 100200055, 100200232, 100300214) then goto N1_3;
+ else goto N1_4;
+
+N1_3:
+ if attribute(catid) in (100200068, 100300019, 100200055, 100200232, 100300214) then goto T1_1;
+ else goto T1_2;
+
+T1_1:
+ response = -0.0359908300;
+ goto D1;
+
+T1_2:
+ response = -0.0027644159;
+ goto D1;
+
+N1_4:
+ if attribute(catid) in (100200171, 100200034, 100200186, 100300008, 100300116, 100300073, 100400038, 100200192, 100300127, 100300169, 100200087, 100300200) then goto N1_5;
+ else goto N1_6;
+
+N1_5:
+ if attribute(catid) in (100200171, 100200034, 100200186, 100300008, 100300116, 100200192) then goto T1_3;
+ else goto T1_4;
+
+T1_3:
+ response = 0.0113307000;
+ goto D1;
+
+T1_4:
+ response = 0.0164266261;
+ goto D1;
+
+N1_6:
+ if attribute(catid) in (0, 100400141, 100200052, 100300102, 100300027, 100300121, 100300065) then goto T1_5;
+ else goto T1_6;
+
+T1_5:
+ response = 0.0212550280;
+ goto D1;
+
+T1_6:
+ response = 0.0272380704;
+ goto D1;
+
+N1_7:
+ if attribute(catid) in (100200130, 100300013, 100300166, 100300004, 100200054, 100200193, 100300212, 100300074, 100300066) then goto N1_8;
+ else goto N1_9;
+
+N1_8:
+ if attribute(catid) in (100200130, 100300166) then goto T1_7;
+ else goto T1_8;
+
+T1_7:
+ response = 0.0328865429;
+ goto D1;
+
+T1_8:
+ response = 0.0399735491;
+ goto D1;
+
+N1_9:
+ if attribute(catid) in (100300165, 100300093, 100400142, 100300122, 100300006, 100300146) then goto T1_9;
+ else goto T1_10;
+
+T1_9:
+ response = 0.0477513417;
+ goto D1;
+
+T1_10:
+ response = 0.0587510469;
+ goto D1;
+
+D1:
+
+tnscore = tnscore + response;
+
+ /* Tree 3 of 200 */
+N2_1:
+ if attribute(catid) in (0, 100200171, 100300014, 100300058, 100300077, 100200034, 100200186, 100400141, 100300165, 100300005, 100200172, 100300008, 100200068, 100300027, 100300121, 100200053, 100300019, 100300004, 100300073, 100400038, 100200192, 100300065, 100300212, 100400079, 100200170, 100300169, 100400080, 100200087, 100300200, 100300076, 100300006, 100200232, 100300146) then goto N2_2;
+ else goto N2_6;
+
+N2_2:
+ if attribute(catid) in (100300014, 100300058, 100200034, 100200186, 100300008, 100200068, 100300019, 100300212, 100200232) then goto N2_3;
+ else goto N2_4;
+
+N2_3:
+ if attribute(catid) in (100300008, 100200068, 100300019, 100200232) then goto T2_1;
+ else goto T2_2;
+
+T2_1:
+ response = -0.0260716807;
+ goto D2;
+
+T2_2:
+ response = -0.0008004775;
+ goto D2;
+
+N2_4:
+ if attribute(catid) in (100300165, 100300005, 100300073, 100400079, 100200170, 100400080, 100200087, 100300146) then goto T2_3;
+ else goto N2_5;
+
+T2_3:
+ response = 0.0126841581;
+ goto D2;
+
+N2_5:
+ if attribute(catid) in (0, 100200171, 100300077, 100200172, 100300065, 100300006) then goto T2_4;
+ else goto T2_5;
+
+T2_4:
+ response = 0.0228814610;
+ goto D2;
+
+T2_5:
+ response = 0.0298499891;
+ goto D2;
+
+N2_6:
+ if attribute(catid) in (100300011, 100200130, 100300013, 100300166, 100200052, 100300102, 100300116, 100200234, 100400142, 100200054, 100300209, 100300127, 100300074, 100300066, 100200176, 100200028) then goto N2_7;
+ else goto N2_8;
+
+N2_7:
+ if attribute(catid) in (100200130, 100200052, 100300102, 100300116, 100200234, 100200054, 100300209) then goto T2_6;
+ else goto T2_7;
+
+T2_6:
+ response = 0.0393021257;
+ goto D2;
+
+T2_7:
+ response = 0.0475085975;
+ goto D2;
+
+N2_8:
+ if attribute(catid) in (100400037, 100300122, 100200067) then goto T2_8;
+ else goto T2_9;
+
+T2_8:
+ response = 0.0575085503;
+ goto D2;
+
+T2_9:
+ response = 0.0751742626;
+ goto D2;
+
+D2:
+
+tnscore = tnscore + response;
+
+ /* Tree 4 of 200 */
+N3_1:
+ if attribute(catid) in (0, 100300011, 100300014, 100300077, 100200186, 100400141, 100300165, 100300005, 100300008, 100200068, 100300032, 100300027, 100300121, 100300019, 100300126, 100300073, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100200176, 100300200, 100200028, 100300076, 100200067, 100200055, 100300006) then goto N3_2;
+ else goto N3_7;
+
+N3_2:
+ if attribute(catid) in (100300005, 100300008, 100200068, 100300032, 100300209, 100400080, 100200028, 100200067, 100200055) then goto N3_3;
+ else goto N3_4;
+
+N3_3:
+ if attribute(catid) in (100300005, 100300008, 100300032, 100300209, 100200067, 100200055) then goto T3_1;
+ else goto T3_2;
+
+T3_1:
+ response = -0.0365460976;
+ goto D3;
+
+T3_2:
+ response = -0.0109180769;
+ goto D3;
+
+N3_4:
+ if attribute(catid) in (100300014, 100300073, 100200192, 100300212, 100400079, 100300074, 100200176) then goto T3_3;
+ else goto N3_5;
+
+T3_3:
+ response = 0.0093762436;
+ goto D3;
+
+N3_5:
+ if attribute(catid) in (100200186, 100300165, 100300126, 100200170, 100300169, 100300200, 100300076) then goto T3_4;
+ else goto N3_6;
+
+T3_4:
+ response = 0.0193739138;
+ goto D3;
+
+N3_6:
+ if attribute(catid) in (0, 100300077, 100200087) then goto T3_5;
+ else goto T3_6;
+
+T3_5:
+ response = 0.0231180054;
+ goto D3;
+
+T3_6:
+ response = 0.0274056462;
+ goto D3;
+
+N3_7:
+ if attribute(catid) in (100200171, 100200130, 100300058, 100200034, 100200052, 100200172, 100300116, 100200053, 100400142, 100200054, 100300066) then goto N3_8;
+ else goto N3_9;
+
+N3_8:
+ if attribute(catid) in (100200171, 100200130, 100300058, 100300116, 100200054, 100300066) then goto T3_7;
+ else goto T3_8;
+
+T3_7:
+ response = 0.0339904435;
+ goto D3;
+
+T3_8:
+ response = 0.0402629873;
+ goto D3;
+
+N3_9:
+ if attribute(catid) in (100300013, 100300166, 100300102, 100200234, 100300004, 100400038, 100300122, 100300127, 100200185) then goto T3_9;
+ else goto T3_10;
+
+T3_9:
+ response = 0.0471640537;
+ goto D3;
+
+T3_10:
+ response = 0.0679501752;
+ goto D3;
+
+D3:
+
+tnscore = tnscore + response;
+
+ /* Tree 5 of 200 */
+N4_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300013, 100200034, 100200186, 100400141, 100300165, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100300019, 100300004, 100300073, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100200176, 100300200, 100200028, 100300076, 100200067, 100300146) then goto N4_2;
+ else goto N4_7;
+
+N4_2:
+ if attribute(catid) in (100300014, 100200034, 100200186, 100400141, 100300005, 100300008, 100200068, 100300032, 100300121, 100300019, 100300004, 100200192, 100300212, 100300209, 100400079, 100200170, 100400080, 100200176, 100300200, 100200028, 100300076, 100200067, 100300146) then goto N4_3;
+ else goto N4_5;
+
+N4_3:
+ if attribute(catid) in (100300014, 100200034, 100200068, 100300032, 100300019, 100300212, 100300209, 100200170, 100300200, 100200028, 100200067) then goto N4_4;
+ else goto T4_3;
+
+N4_4:
+ if attribute(catid) in (100300032, 100300019, 100300212, 100300209, 100300200, 100200028, 100200067) then goto T4_1;
+ else goto T4_2;
+
+T4_1:
+ response = -0.0252149649;
+ goto D4;
+
+T4_2:
+ response = 0.0005982331;
+ goto D4;
+
+T4_3:
+ response = 0.0109551118;
+ goto D4;
+
+N4_5:
+ if attribute(catid) in (0, 100200171, 100200172, 100300027, 100300073, 100300065, 100300169, 100200087, 100300074) then goto N4_6;
+ else goto T4_6;
+
+N4_6:
+ if attribute(catid) in (100200171, 100300073, 100300169, 100200087, 100300074) then goto T4_4;
+ else goto T4_5;
+
+T4_4:
+ response = 0.0192764204;
+ goto D4;
+
+T4_5:
+ response = 0.0239324010;
+ goto D4;
+
+T4_6:
+ response = 0.0295724103;
+ goto D4;
+
+N4_7:
+ if attribute(catid) in (100300011, 100300077, 100300166, 100200052, 100200234, 100200053, 100400142, 100400038, 100300122, 100300127, 100300066) then goto N4_8;
+ else goto N4_9;
+
+N4_8:
+ if attribute(catid) in (100300011, 100300077, 100300166, 100200052, 100200234, 100400038) then goto T4_7;
+ else goto T4_8;
+
+T4_7:
+ response = 0.0362646736;
+ goto D4;
+
+T4_8:
+ response = 0.0458985070;
+ goto D4;
+
+N4_9:
+ if attribute(catid) in (100300143, 100300093, 100300102, 100300126, 100200193, 100300006) then goto T4_9;
+ else goto T4_10;
+
+T4_9:
+ response = 0.0576959337;
+ goto D4;
+
+T4_10:
+ response = 0.0940124464;
+ goto D4;
+
+D4:
+
+tnscore = tnscore + response;
+
+ /* Tree 6 of 200 */
+N5_1:
+ if attribute(catid) in (0, 100200171, 100300014, 100300013, 100300077, 100200034, 100200186, 100300165, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300073, 100400038, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100200176, 100300200, 100300076, 100200067, 100300006, 100300214, 100300146) then goto N5_2;
+ else goto N5_8;
+
+N5_2:
+ if attribute(catid) in (100300014, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300212, 100400079, 100300076, 100300214, 100300146) then goto N5_3;
+ else goto N5_5;
+
+N5_3:
+ if attribute(catid) in (100300008, 100200068, 100300032, 100300212, 100300214) then goto N5_4;
+ else goto T5_3;
+
+N5_4:
+ if attribute(catid) in (100300008, 100300032, 100300214) then goto T5_1;
+ else goto T5_2;
+
+T5_1:
+ response = -0.0524432898;
+ goto D5;
+
+T5_2:
+ response = -0.0132279367;
+ goto D5;
+
+T5_3:
+ response = 0.0034801390;
+ goto D5;
+
+N5_5:
+ if attribute(catid) in (100200171, 100300013, 100200034, 100200186, 100200052, 100300073, 100400038, 100200192, 100300169, 100400080, 100300074, 100300066, 100300200, 100200067, 100300006) then goto N5_6;
+ else goto N5_7;
+
+N5_6:
+ if attribute(catid) in (100300013, 100200186, 100200052, 100300073, 100400038, 100300169, 100400080, 100300074, 100200067, 100300006) then goto T5_4;
+ else goto T5_5;
+
+T5_4:
+ response = 0.0179759830;
+ goto D5;
+
+T5_5:
+ response = 0.0212068067;
+ goto D5;
+
+N5_7:
+ if attribute(catid) in (100300077, 100300165, 100300209, 100200170, 100200176) then goto T5_6;
+ else goto T5_7;
+
+T5_6:
+ response = 0.0229665861;
+ goto D5;
+
+T5_7:
+ response = 0.0258231076;
+ goto D5;
+
+N5_8:
+ if attribute(catid) in (100300011, 100200130, 100300058, 100300166, 100300143, 100400141, 100300093, 100300116, 100300121, 100200053, 100300004, 100400142, 100200054, 100300122, 100300127, 100200087, 100200232) then goto N5_9;
+ else goto N5_10;
+
+N5_9:
+ if attribute(catid) in (100300011, 100200130, 100300058, 100300166, 100300143, 100300093, 100300116, 100200053, 100200054, 100300122, 100200087) then goto T5_8;
+ else goto T5_9;
+
+T5_8:
+ response = 0.0353581654;
+ goto D5;
+
+T5_9:
+ response = 0.0430524781;
+ goto D5;
+
+N5_10:
+ if attribute(catid) in (100200234, 100300019, 100400037, 100200028) then goto T5_10;
+ else goto T5_11;
+
+T5_10:
+ response = 0.0542526213;
+ goto D5;
+
+T5_11:
+ response = 0.0961212144;
+ goto D5;
+
+D5:
+
+tnscore = tnscore + response;
+
+ /* Tree 7 of 200 */
+N6_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300077, 100200034, 100200186, 100400141, 100200052, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100200053, 100300019, 100200054, 100300073, 100200192, 100300209, 100400079, 100200170, 100300169, 100400080, 100200087, 100300200, 100200028, 100300076, 100200067, 100300006, 100200232, 100300214, 100300146) then goto N6_2;
+ else goto N6_6;
+
+N6_2:
+ if attribute(catid) in (100200171, 100300011, 100300014, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100200192, 100300209, 100400079, 100400080, 100200087, 100300076, 100200067, 100300006, 100300214, 100300146) then goto N6_3;
+ else goto N6_4;
+
+N6_3:
+ if attribute(catid) in (100300008, 100200068, 100300032, 100300209, 100300214) then goto T6_1;
+ else goto T6_2;
+
+T6_1:
+ response = -0.0256804569;
+ goto D6;
+
+T6_2:
+ response = 0.0046816048;
+ goto D6;
+
+N6_4:
+ if attribute(catid) in (100300058, 100200186, 100400141, 100300121, 100300019, 100200170, 100300169, 100300200) then goto T6_3;
+ else goto N6_5;
+
+T6_3:
+ response = 0.0160713107;
+ goto D6;
+
+N6_5:
+ if attribute(catid) in (0, 100200034, 100200052, 100200232) then goto T6_4;
+ else goto T6_5;
+
+T6_4:
+ response = 0.0221538810;
+ goto D6;
+
+T6_5:
+ response = 0.0250017744;
+ goto D6;
+
+N6_6:
+ if attribute(catid) in (100300166, 100300143, 100300165, 100300093, 100200172, 100200234, 100300004, 100300126, 100400142, 100400038, 100300065, 100300127, 100300074, 100300066, 100200185) then goto N6_7;
+ else goto N6_9;
+
+N6_7:
+ if attribute(catid) in (100300143, 100300165, 100300093, 100200172, 100200234, 100300126, 100400142, 100400038, 100300065, 100300066, 100200185) then goto N6_8;
+ else goto T6_8;
+
+N6_8:
+ if attribute(catid) in (100300093, 100200234, 100400038, 100300065, 100300066) then goto T6_6;
+ else goto T6_7;
+
+T6_6:
+ response = 0.0315719603;
+ goto D6;
+
+T6_7:
+ response = 0.0353792385;
+ goto D6;
+
+T6_8:
+ response = 0.0430233685;
+ goto D6;
+
+N6_9:
+ if attribute(catid) in (100200193, 100300122) then goto T6_9;
+ else goto T6_10;
+
+T6_9:
+ response = 0.0518243263;
+ goto D6;
+
+T6_10:
+ response = 0.0744220771;
+ goto D6;
+
+D6:
+
+tnscore = tnscore + response;
+
+ /* Tree 8 of 200 */
+N7_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100300019, 100300073, 100400037, 100200192, 100300065, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100200176, 100300200, 100300076, 100200067, 100300006, 100300214, 100300146) then goto N7_2;
+ else goto N7_8;
+
+N7_2:
+ if attribute(catid) in (100300011, 100300014, 100200034, 100200186, 100400141, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100200192, 100400079, 100400080, 100200176, 100300200, 100300214, 100300146) then goto N7_3;
+ else goto N7_5;
+
+N7_3:
+ if attribute(catid) in (100300011, 100300102, 100300008, 100200068, 100300032, 100300200, 100300214) then goto T7_1;
+ else goto N7_4;
+
+T7_1:
+ response = -0.0241441823;
+ goto D7;
+
+N7_4:
+ if attribute(catid) in (100300014, 100400141, 100300005, 100200192, 100400080, 100200176) then goto T7_2;
+ else goto T7_3;
+
+T7_2:
+ response = 0.0020142953;
+ goto D7;
+
+T7_3:
+ response = 0.0081257199;
+ goto D7;
+
+N7_5:
+ if attribute(catid) in (100300058, 100300077, 100200052, 100300093, 100300073, 100400037, 100300065, 100200170, 100200087, 100200067) then goto T7_4;
+ else goto N7_6;
+
+T7_4:
+ response = 0.0141239460;
+ goto D7;
+
+N7_6:
+ if attribute(catid) in (0, 100200171, 100300165, 100300121, 100300019, 100300169) then goto N7_7;
+ else goto T7_7;
+
+N7_7:
+ if attribute(catid) in (0) then goto T7_5;
+ else goto T7_6;
+
+T7_5:
+ response = 0.0217711535;
+ goto D7;
+
+T7_6:
+ response = 0.0235348760;
+ goto D7;
+
+T7_7:
+ response = 0.0294010162;
+ goto D7;
+
+N7_8:
+ if attribute(catid) in (100200130, 100300143, 100200172, 100200234, 100200053, 100400142, 100400038, 100300212, 100300209, 100300066) then goto N7_9;
+ else goto N7_10;
+
+N7_9:
+ if attribute(catid) in (100200130, 100200172, 100200053, 100400142, 100400038, 100300212, 100300209) then goto T7_8;
+ else goto T7_9;
+
+T7_8:
+ response = 0.0358343100;
+ goto D7;
+
+T7_9:
+ response = 0.0447717702;
+ goto D7;
+
+N7_10:
+ if attribute(catid) in (100300004, 100300126, 100300074, 100300007, 100300045, 100200028, 100200185, 100200232) then goto T7_10;
+ else goto T7_11;
+
+T7_10:
+ response = 0.0622909986;
+ goto D7;
+
+T7_11:
+ response = 0.0942393297;
+ goto D7;
+
+D7:
+
+tnscore = tnscore + response;
+
+ /* Tree 9 of 200 */
+N8_1:
+ if attribute(catid) in (100300014, 100200034, 100300102, 100300005, 100200068, 100300019, 100200054, 100300209, 100400079, 100200170, 100400080, 100300200, 100200028, 100300076, 100200067, 100200185, 100200232, 100300214, 100300146) then goto N8_2;
+ else goto N8_3;
+
+N8_2:
+ if attribute(catid) in (100300102, 100300005, 100200068, 100300209, 100200028, 100200067, 100200232, 100300214) then goto T8_1;
+ else goto T8_2;
+
+T8_1:
+ response = -0.0222756779;
+ goto D8;
+
+T8_2:
+ response = -0.0032979771;
+ goto D8;
+
+N8_3:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300077, 100200186, 100400141, 100300165, 100200052, 100300093, 100200172, 100300008, 100300032, 100300027, 100300116, 100300121, 100200053, 100400142, 100300073, 100200192, 100300065, 100300212, 100300122, 100300127, 100300169, 100200087, 100300074, 100300006) then goto N8_4;
+ else goto N8_8;
+
+N8_4:
+ if attribute(catid) in (100300058, 100300077, 100200186, 100400141, 100200052, 100300008, 100300032, 100300073, 100200192, 100300212, 100300169, 100200087) then goto N8_5;
+ else goto N8_6;
+
+N8_5:
+ if attribute(catid) in (100300077, 100200186, 100400141, 100300032, 100200192, 100300212) then goto T8_3;
+ else goto T8_4;
+
+T8_3:
+ response = 0.0114479050;
+ goto D8;
+
+T8_4:
+ response = 0.0165377861;
+ goto D8;
+
+N8_6:
+ if attribute(catid) in (100200171, 100300165, 100300093, 100200172, 100300121) then goto T8_5;
+ else goto N8_7;
+
+T8_5:
+ response = 0.0209845722;
+ goto D8;
+
+N8_7:
+ if attribute(catid) in (0, 100200130) then goto T8_6;
+ else goto T8_7;
+
+T8_6:
+ response = 0.0242667474;
+ goto D8;
+
+T8_7:
+ response = 0.0268049425;
+ goto D8;
+
+N8_8:
+ if attribute(catid) in (100300011, 100300166, 100200234, 100300004, 100400037, 100400038, 100300066, 100200176) then goto T8_8;
+ else goto T8_9;
+
+T8_8:
+ response = 0.0447283469;
+ goto D8;
+
+T8_9:
+ response = 0.0603545392;
+ goto D8;
+
+D8:
+
+tnscore = tnscore + response;
+
+ /* Tree 10 of 200 */
+N9_1:
+ if attribute(catid) in (0, 100200171, 100300014, 100300058, 100300077, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300121, 100200053, 100200054, 100300073, 100400038, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300200, 100300076, 100200067, 100200055, 100200232, 100300214) then goto N9_2;
+ else goto N9_6;
+
+N9_2:
+ if attribute(catid) in (100300005, 100300008, 100200068, 100300032, 100300121, 100300212, 100400079, 100400080, 100300200, 100300076, 100200055, 100300214) then goto N9_3;
+ else goto N9_4;
+
+N9_3:
+ if attribute(catid) in (100200068, 100300076, 100200055, 100300214) then goto T9_1;
+ else goto T9_2;
+
+T9_1:
+ response = -0.0265329011;
+ goto D9;
+
+T9_2:
+ response = -0.0038518940;
+ goto D9;
+
+N9_4:
+ if attribute(catid) in (100200171, 100300014, 100400141, 100300027, 100200054, 100300073, 100200192, 100200087, 100300074, 100200067) then goto T9_3;
+ else goto N9_5;
+
+T9_3:
+ response = 0.0107802387;
+ goto D9;
+
+N9_5:
+ if attribute(catid) in (0, 100300058, 100300077, 100200186, 100300093, 100200053, 100200170, 100300169, 100200232) then goto T9_4;
+ else goto T9_5;
+
+T9_4:
+ response = 0.0212053257;
+ goto D9;
+
+T9_5:
+ response = 0.0251822224;
+ goto D9;
+
+N9_6:
+ if attribute(catid) in (100300011, 100200130, 100300013, 100300166, 100300143, 100300102, 100300116, 100200234, 100300004, 100400142, 100200193, 100300122, 100300127, 100300066, 100200176, 100200028, 100300006) then goto N9_7;
+ else goto T9_9;
+
+N9_7:
+ if attribute(catid) in (100200130, 100300116, 100200234, 100300006) then goto T9_6;
+ else goto N9_8;
+
+T9_6:
+ response = 0.0314457190;
+ goto D9;
+
+N9_8:
+ if attribute(catid) in (100300166, 100400142, 100300122, 100300127, 100300066, 100200028) then goto T9_7;
+ else goto T9_8;
+
+T9_7:
+ response = 0.0391757711;
+ goto D9;
+
+T9_8:
+ response = 0.0454843261;
+ goto D9;
+
+T9_9:
+ response = 0.0873814277;
+ goto D9;
+
+D9:
+
+tnscore = tnscore + response;
+
+ /* Tree 11 of 200 */
+N10_1:
+ if attribute(catid) in (0, 100300014, 100300058, 100300013, 100200186, 100400141, 100300165, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100200234, 100300073, 100400038, 100200192, 100300212, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300200, 100200028, 100300076, 100300006, 100200232) then goto N10_2;
+ else goto N10_7;
+
+N10_2:
+ if attribute(catid) in (100300013, 100200186, 100300165, 100200052, 100300102, 100300005, 100200068, 100300116, 100200234, 100300073, 100400079, 100300169, 100400080, 100200087, 100300200, 100300076) then goto N10_3;
+ else goto N10_6;
+
+N10_3:
+ if attribute(catid) in (100300102, 100200068, 100200234, 100400080, 100200087, 100300200) then goto N10_4;
+ else goto N10_5;
+
+N10_4:
+ if attribute(catid) in (100200068) then goto T10_1;
+ else goto T10_2;
+
+T10_1:
+ response = -0.0151909005;
+ goto D10;
+
+T10_2:
+ response = -0.0021225032;
+ goto D10;
+
+N10_5:
+ if attribute(catid) in (100300005, 100300073, 100400079, 100300169, 100300076) then goto T10_3;
+ else goto T10_4;
+
+T10_3:
+ response = 0.0086835438;
+ goto D10;
+
+T10_4:
+ response = 0.0120329553;
+ goto D10;
+
+N10_6:
+ if attribute(catid) in (100300058, 100400141, 100300008, 100300032, 100300027, 100300121, 100400038, 100200192, 100300212, 100200170, 100200028, 100300006) then goto T10_5;
+ else goto T10_6;
+
+T10_5:
+ response = 0.0171461073;
+ goto D10;
+
+T10_6:
+ response = 0.0218015413;
+ goto D10;
+
+N10_7:
+ if attribute(catid) in (100200171, 100300011, 100200130, 100300077, 100300166, 100200034, 100200053, 100300019, 100400142, 100200054, 100400037, 100300065, 100300122, 100300127, 100200176) then goto N10_8;
+ else goto N10_9;
+
+N10_8:
+ if attribute(catid) in (100200171, 100300011, 100200034, 100200053, 100300019, 100200054, 100300065) then goto T10_7;
+ else goto T10_8;
+
+T10_7:
+ response = 0.0280408356;
+ goto D10;
+
+T10_8:
+ response = 0.0355357753;
+ goto D10;
+
+N10_9:
+ if attribute(catid) in (100300093, 100300004, 100300126, 100200185) then goto T10_9;
+ else goto T10_10;
+
+T10_9:
+ response = 0.0584272687;
+ goto D10;
+
+T10_10:
+ response = 0.0854108429;
+ goto D10;
+
+D10:
+
+tnscore = tnscore + response;
+
+ /* Tree 12 of 200 */
+N11_1:
+ if attribute(catid) in (100300011, 100300143, 100200034, 100300093, 100300005, 100300008, 100200068, 100300019, 100300073, 100400079, 100200170, 100400080, 100200087, 100200176, 100300200, 100300076, 100200055, 100200185, 100300006) then goto N11_2;
+ else goto N11_3;
+
+N11_2:
+ if attribute(catid) in (100300008, 100200068, 100400080, 100200176, 100200055) then goto T11_1;
+ else goto T11_2;
+
+T11_1:
+ response = -0.0169257508;
+ goto D11;
+
+T11_2:
+ response = 0.0096089202;
+ goto D11;
+
+N11_3:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300013, 100300077, 100300166, 100200186, 100400141, 100300165, 100200052, 100300102, 100200172, 100300032, 100300027, 100300121, 100200053, 100300004, 100400142, 100200054, 100400037, 100200192, 100300065, 100300209, 100300122, 100300127, 100300169, 100300074) then goto N11_4;
+ else goto N11_7;
+
+N11_4:
+ if attribute(catid) in (0, 100300014, 100300077, 100300166, 100200186, 100400141, 100200052, 100200172, 100300032, 100300121, 100200053, 100400037, 100200192, 100300065, 100300169) then goto N11_5;
+ else goto T11_6;
+
+N11_5:
+ if attribute(catid) in (100300077, 100200186, 100200052, 100300032, 100400037, 100200192) then goto T11_3;
+ else goto N11_6;
+
+T11_3:
+ response = 0.0188605145;
+ goto D11;
+
+N11_6:
+ if attribute(catid) in (0, 100300014, 100200053) then goto T11_4;
+ else goto T11_5;
+
+T11_4:
+ response = 0.0230703185;
+ goto D11;
+
+T11_5:
+ response = 0.0248762385;
+ goto D11;
+
+T11_6:
+ response = 0.0312398602;
+ goto D11;
+
+N11_7:
+ if attribute(catid) in (100300116, 100400038, 100300212, 100300066, 100200232) then goto T11_7;
+ else goto T11_8;
+
+T11_7:
+ response = 0.0379114379;
+ goto D11;
+
+T11_8:
+ response = 0.0540119608;
+ goto D11;
+
+D11:
+
+tnscore = tnscore + response;
+
+ /* Tree 13 of 200 */
+N12_1:
+ if attribute(catid) in (100300011, 100300014, 100300058, 100200186, 100400141, 100300102, 100300005, 100200068, 100300032, 100300121, 100300073, 100200192, 100300209, 100400079, 100300169, 100400080, 100200087, 100200176, 100200028, 100200067, 100200055, 100200232) then goto N12_2;
+ else goto N12_5;
+
+N12_2:
+ if attribute(catid) in (100300011, 100300014, 100300058, 100300005, 100200068, 100300209, 100400080, 100200087, 100200028, 100200067, 100200055, 100200232) then goto N12_3;
+ else goto N12_4;
+
+N12_3:
+ if attribute(catid) in (100300209, 100200087, 100200028, 100200067, 100200055, 100200232) then goto T12_1;
+ else goto T12_2;
+
+T12_1:
+ response = -0.0225817796;
+ goto D12;
+
+T12_2:
+ response = -0.0008082327;
+ goto D12;
+
+N12_4:
+ if attribute(catid) in (100300102, 100300121, 100300073, 100200192, 100400079) then goto T12_3;
+ else goto T12_4;
+
+T12_3:
+ response = 0.0088591799;
+ goto D12;
+
+T12_4:
+ response = 0.0138162711;
+ goto D12;
+
+N12_5:
+ if attribute(catid) in (0, 100200171, 100300013, 100200034, 100300165, 100200172, 100300027, 100300116, 100200234, 100300004, 100400142, 100200054, 100300065, 100300122, 100300127, 100200170, 100300006) then goto N12_6;
+ else goto N12_8;
+
+N12_6:
+ if attribute(catid) in (0, 100200034, 100200172, 100300027, 100300116, 100200054, 100300065, 100300127) then goto N12_7;
+ else goto T12_7;
+
+N12_7:
+ if attribute(catid) in (100200034, 100200172, 100300027, 100300116, 100200054, 100300065) then goto T12_5;
+ else goto T12_6;
+
+T12_5:
+ response = 0.0185182017;
+ goto D12;
+
+T12_6:
+ response = 0.0221653757;
+ goto D12;
+
+T12_7:
+ response = 0.0258671547;
+ goto D12;
+
+N12_8:
+ if attribute(catid) in (100300166, 100200052, 100300093, 100200053, 100400037, 100300076) then goto T12_8;
+ else goto N12_9;
+
+T12_8:
+ response = 0.0319314298;
+ goto D12;
+
+N12_9:
+ if attribute(catid) in (100200130, 100300143, 100300008, 100400038, 100300074, 100300066) then goto T12_9;
+ else goto T12_10;
+
+T12_9:
+ response = 0.0399544136;
+ goto D12;
+
+T12_10:
+ response = 0.0491124971;
+ goto D12;
+
+D12:
+
+tnscore = tnscore + response;
+
+ /* Tree 14 of 200 */
+N13_1:
+ if attribute(catid) in (0, 100300011, 100300013, 100300077, 100200034, 100200186, 100400141, 100300165, 100200052, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100300073, 100400038, 100300065, 100300209, 100400079, 100200170, 100300169, 100400080, 100200087, 100200176, 100300200, 100200028, 100300076, 100200067, 100300006, 100200232, 100300214) then goto N13_2;
+ else goto N13_6;
+
+N13_2:
+ if attribute(catid) in (100300011, 100300005, 100300008, 100300032, 100400080, 100200087, 100300076, 100300214) then goto N13_3;
+ else goto N13_4;
+
+N13_3:
+ if attribute(catid) in (100300008, 100300032, 100200087, 100300076, 100300214) then goto T13_1;
+ else goto T13_2;
+
+T13_1:
+ response = -0.0316835796;
+ goto D13;
+
+T13_2:
+ response = -0.0075185917;
+ goto D13;
+
+N13_4:
+ if attribute(catid) in (100300077, 100200034, 100400141, 100300165, 100200068, 100300027, 100300121, 100300209, 100200176, 100300200, 100200067, 100300006) then goto T13_3;
+ else goto N13_5;
+
+T13_3:
+ response = 0.0093589722;
+ goto D13;
+
+N13_5:
+ if attribute(catid) in (0, 100200052, 100300073, 100400079, 100200232) then goto T13_4;
+ else goto T13_5;
+
+T13_4:
+ response = 0.0185920468;
+ goto D13;
+
+T13_5:
+ response = 0.0231228547;
+ goto D13;
+
+N13_6:
+ if attribute(catid) in (100200171, 100200130, 100300014, 100300058, 100300166, 100300093, 100300102, 100200172, 100200234, 100200193, 100200192, 100300122, 100300127, 100300074, 100300066) then goto N13_7;
+ else goto N13_9;
+
+N13_7:
+ if attribute(catid) in (100200171, 100200130, 100300014, 100300058, 100300102, 100200172, 100200234, 100200192, 100300127) then goto N13_8;
+ else goto T13_8;
+
+N13_8:
+ if attribute(catid) in (100200130, 100200234, 100200192) then goto T13_6;
+ else goto T13_7;
+
+T13_6:
+ response = 0.0282114001;
+ goto D13;
+
+T13_7:
+ response = 0.0319414987;
+ goto D13;
+
+T13_8:
+ response = 0.0377741997;
+ goto D13;
+
+N13_9:
+ if attribute(catid) in (100200053, 100300004, 100400142, 100200054, 100400037, 100200185) then goto T13_9;
+ else goto T13_10;
+
+T13_9:
+ response = 0.0450431326;
+ goto D13;
+
+T13_10:
+ response = 0.0654935018;
+ goto D13;
+
+D13:
+
+tnscore = tnscore + response;
+
+ /* Tree 15 of 200 */
+N14_1:
+ if attribute(catid) in (100300011, 100300013, 100200034, 100200186, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100300019, 100300126, 100300073, 100400037, 100200192, 100300065, 100300209, 100400079, 100200170, 100400080, 100200087, 100300074, 100300200, 100200067, 100300006) then goto N14_2;
+ else goto N14_4;
+
+N14_2:
+ if attribute(catid) in (100300005, 100300008, 100300032, 100300019, 100300209, 100200067) then goto T14_1;
+ else goto N14_3;
+
+T14_1:
+ response = -0.0352996105;
+ goto D14;
+
+N14_3:
+ if attribute(catid) in (100300011, 100200034, 100200186, 100200068, 100200192, 100200170, 100200087, 100300074) then goto T14_2;
+ else goto T14_3;
+
+T14_2:
+ response = 0.0029355359;
+ goto D14;
+
+T14_3:
+ response = 0.0110257031;
+ goto D14;
+
+N14_4:
+ if attribute(catid) in (0, 100200171, 100300014, 100300165, 100200052, 100200172, 100300121, 100200234, 100200053, 100400142, 100200054, 100300122, 100300127, 100300169, 100300066) then goto N14_5;
+ else goto N14_7;
+
+N14_5:
+ if attribute(catid) in (100300165, 100300121, 100200053, 100400142, 100300127, 100300169) then goto T14_4;
+ else goto N14_6;
+
+T14_4:
+ response = 0.0200666023;
+ goto D14;
+
+N14_6:
+ if attribute(catid) in (0, 100200171, 100200052, 100200172, 100200054) then goto T14_5;
+ else goto T14_6;
+
+T14_5:
+ response = 0.0258497457;
+ goto D14;
+
+T14_6:
+ response = 0.0331869782;
+ goto D14;
+
+N14_7:
+ if attribute(catid) in (100200130, 100300077, 100300166, 100400141, 100300093, 100300004, 100300007, 100200028) then goto T14_7;
+ else goto N14_8;
+
+T14_7:
+ response = 0.0426763778;
+ goto D14;
+
+N14_8:
+ if attribute(catid) in (100300143, 100400038, 100200176) then goto T14_8;
+ else goto T14_9;
+
+T14_8:
+ response = 0.0551482574;
+ goto D14;
+
+T14_9:
+ response = 0.0805987774;
+ goto D14;
+
+D14:
+
+tnscore = tnscore + response;
+
+ /* Tree 16 of 200 */
+N15_1:
+ if attribute(catid) in (0, 100200171, 100300014, 100300058, 100300077, 100300166, 100200034, 100200186, 100200052, 100300102, 100300005, 100200172, 100200068, 100300032, 100300116, 100300121, 100200053, 100300073, 100400037, 100400038, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100200176, 100300200, 100200028, 100300076, 100200067, 100300146) then goto N15_2;
+ else goto N15_6;
+
+N15_2:
+ if attribute(catid) in (100300014, 100200034, 100300005, 100200068, 100300032, 100200192, 100300212, 100300209, 100200176, 100300200, 100200028, 100200067, 100300146) then goto N15_3;
+ else goto N15_4;
+
+N15_3:
+ if attribute(catid) in (100200068, 100300032, 100200176, 100200028, 100200067, 100300146) then goto T15_1;
+ else goto T15_2;
+
+T15_1:
+ response = -0.0188052149;
+ goto D15;
+
+T15_2:
+ response = -0.0014384095;
+ goto D15;
+
+N15_4:
+ if attribute(catid) in (100300058, 100300102, 100200053, 100300073, 100300065, 100400079, 100300169, 100400080, 100200087, 100300076) then goto T15_3;
+ else goto N15_5;
+
+T15_3:
+ response = 0.0138476724;
+ goto D15;
+
+N15_5:
+ if attribute(catid) in (0, 100200171, 100200186, 100200052, 100400038, 100200170) then goto T15_4;
+ else goto T15_5;
+
+T15_4:
+ response = 0.0196068633;
+ goto D15;
+
+T15_5:
+ response = 0.0248333768;
+ goto D15;
+
+N15_6:
+ if attribute(catid) in (100300011, 100200130, 100300143, 100400141, 100300165, 100300093, 100300027, 100200234, 100300019, 100300004, 100400142, 100200193, 100300074, 100300066, 100200232) then goto N15_7;
+ else goto T15_8;
+
+N15_7:
+ if attribute(catid) in (100200130, 100400141, 100300165, 100300027, 100300019, 100300004, 100300074, 100300066) then goto T15_6;
+ else goto T15_7;
+
+T15_6:
+ response = 0.0337546327;
+ goto D15;
+
+T15_7:
+ response = 0.0412000578;
+ goto D15;
+
+T15_8:
+ response = 0.0666143289;
+ goto D15;
+
+D15:
+
+tnscore = tnscore + response;
+
+ /* Tree 17 of 200 */
+N16_1:
+ if attribute(catid) in (100200171, 100300011, 100300077, 100200034, 100200186, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100300019, 100300073, 100400038, 100300209, 100400079, 100400080, 100300074, 100300200, 100200067, 100200055, 100300006) then goto N16_2;
+ else goto N16_5;
+
+N16_2:
+ if attribute(catid) in (100200034, 100200186, 100300005, 100300008, 100200068, 100300019, 100300200, 100200067, 100200055) then goto N16_3;
+ else goto N16_4;
+
+N16_3:
+ if attribute(catid) in (100300008, 100200068, 100200067, 100200055) then goto T16_1;
+ else goto T16_2;
+
+T16_1:
+ response = -0.0193944486;
+ goto D16;
+
+T16_2:
+ response = -0.0039850146;
+ goto D16;
+
+N16_4:
+ if attribute(catid) in (100300011, 100300102, 100300027, 100300116, 100300121, 100400038, 100300209, 100400080, 100300074) then goto T16_3;
+ else goto T16_4;
+
+T16_3:
+ response = 0.0038699264;
+ goto D16;
+
+T16_4:
+ response = 0.0111071757;
+ goto D16;
+
+N16_5:
+ if attribute(catid) in (0, 100200130, 100300014, 100300058, 100300013, 100300166, 100300143, 100200052, 100200172, 100200054, 100200192, 100300065, 100300127, 100200170, 100300169, 100200087, 100200176, 100200028, 100300076, 100300146) then goto N16_6;
+ else goto N16_8;
+
+N16_6:
+ if attribute(catid) in (0, 100200130, 100300058, 100300143, 100200172, 100200054, 100200192, 100200170, 100300169, 100200087, 100200176, 100200028, 100300076) then goto N16_7;
+ else goto T16_7;
+
+N16_7:
+ if attribute(catid) in (100200130, 100300143, 100200192, 100300169, 100200176, 100200028) then goto T16_5;
+ else goto T16_6;
+
+T16_5:
+ response = 0.0184644801;
+ goto D16;
+
+T16_6:
+ response = 0.0213606360;
+ goto D16;
+
+T16_7:
+ response = 0.0266245188;
+ goto D16;
+
+N16_8:
+ if attribute(catid) in (100400141, 100300165, 100200053, 100300004, 100300126, 100300212, 100300122, 100300066) then goto N16_9;
+ else goto T16_10;
+
+N16_9:
+ if attribute(catid) in (100200053, 100300004, 100300126, 100300122) then goto T16_8;
+ else goto T16_9;
+
+T16_8:
+ response = 0.0334635662;
+ goto D16;
+
+T16_9:
+ response = 0.0386077462;
+ goto D16;
+
+T16_10:
+ response = 0.0470519595;
+ goto D16;
+
+D16:
+
+tnscore = tnscore + response;
+
+ /* Tree 18 of 200 */
+N17_1:
+ if attribute(catid) in (0, 100300011, 100300014, 100300058, 100300077, 100300143, 100200034, 100200186, 100400141, 100300093, 100300102, 100300005, 100200172, 100300008, 100200068, 100300027, 100300121, 100200053, 100400142, 100300073, 100200192, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300200, 100200028, 100200055, 100200232, 100300146) then goto N17_2;
+ else goto N17_6;
+
+N17_2:
+ if attribute(catid) in (100300011, 100300014, 100200034, 100300008, 100200068, 100300121, 100200192, 100200170, 100300074, 100300200, 100200055, 100200232, 100300146) then goto N17_3;
+ else goto N17_4;
+
+N17_3:
+ if attribute(catid) in (100200034, 100300008, 100300121, 100300200, 100200055, 100200232, 100300146) then goto T17_1;
+ else goto T17_2;
+
+T17_1:
+ response = -0.0069202095;
+ goto D17;
+
+T17_2:
+ response = 0.0039000323;
+ goto D17;
+
+N17_4:
+ if attribute(catid) in (100300058, 100400141, 100300093, 100300102, 100300005, 100200172, 100400142, 100300073, 100400080, 100200087) then goto T17_3;
+ else goto N17_5;
+
+T17_3:
+ response = 0.0156946965;
+ goto D17;
+
+N17_5:
+ if attribute(catid) in (0) then goto T17_4;
+ else goto T17_5;
+
+T17_4:
+ response = 0.0175514273;
+ goto D17;
+
+T17_5:
+ response = 0.0195153127;
+ goto D17;
+
+N17_6:
+ if attribute(catid) in (100200171, 100200130, 100300166, 100300165, 100200052, 100300032, 100300116, 100200234, 100300004, 100300126, 100400038, 100300065, 100300209, 100300066, 100300007, 100200176, 100300076) then goto N17_7;
+ else goto N17_9;
+
+N17_7:
+ if attribute(catid) in (100300166, 100300165, 100300004, 100300126, 100300065, 100300209) then goto T17_6;
+ else goto N17_8;
+
+T17_6:
+ response = 0.0260422255;
+ goto D17;
+
+N17_8:
+ if attribute(catid) in (100200171, 100200130, 100300032, 100400038, 100300066, 100300076) then goto T17_7;
+ else goto T17_8;
+
+T17_7:
+ response = 0.0288416138;
+ goto D17;
+
+T17_8:
+ response = 0.0331073272;
+ goto D17;
+
+N17_9:
+ if attribute(catid) in (100200054, 100200193, 100300122, 100300127, 100300045, 100200067) then goto T17_9;
+ else goto T17_10;
+
+T17_9:
+ response = 0.0443969439;
+ goto D17;
+
+T17_10:
+ response = 0.0673805882;
+ goto D17;
+
+D17:
+
+tnscore = tnscore + response;
+
+ /* Tree 19 of 200 */
+N18_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300013, 100300077, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100200068, 100300032, 100300027, 100300116, 100400142, 100300073, 100200192, 100300209, 100400079, 100200170, 100300169, 100400080, 100200087, 100300200, 100200028, 100300076, 100200055, 100200232) then goto N18_2;
+ else goto N18_7;
+
+N18_2:
+ if attribute(catid) in (100200034, 100200068, 100300209, 100200170, 100300200, 100200028, 100200055, 100200232) then goto N18_3;
+ else goto N18_4;
+
+N18_3:
+ if attribute(catid) in (100200034, 100300209, 100300200, 100200028, 100200055, 100200232) then goto T18_1;
+ else goto T18_2;
+
+T18_1:
+ response = -0.0248522225;
+ goto D18;
+
+T18_2:
+ response = -0.0018897827;
+ goto D18;
+
+N18_4:
+ if attribute(catid) in (100200171, 100300013, 100300077, 100200186, 100400141, 100300093, 100300027, 100300116, 100400142, 100400079, 100400080) then goto N18_5;
+ else goto N18_6;
+
+N18_5:
+ if attribute(catid) in (100200171, 100300077, 100200186, 100300116, 100400079) then goto T18_3;
+ else goto T18_4;
+
+T18_3:
+ response = 0.0098977390;
+ goto D18;
+
+T18_4:
+ response = 0.0135323202;
+ goto D18;
+
+N18_6:
+ if attribute(catid) in (100300058, 100300165, 100200052, 100200172, 100300169, 100200087, 100300076) then goto T18_5;
+ else goto T18_6;
+
+T18_5:
+ response = 0.0178483129;
+ goto D18;
+
+T18_6:
+ response = 0.0206390742;
+ goto D18;
+
+N18_7:
+ if attribute(catid) in (100200130, 100300166, 100300008, 100200234, 100300004, 100300126, 100400037, 100400038, 100300065, 100300122, 100300074, 100300066, 100300006, 100300146) then goto N18_8;
+ else goto N18_9;
+
+N18_8:
+ if attribute(catid) in (100200130, 100300166, 100200234, 100300065, 100300146) then goto T18_7;
+ else goto T18_8;
+
+T18_7:
+ response = 0.0310277032;
+ goto D18;
+
+T18_8:
+ response = 0.0370699377;
+ goto D18;
+
+N18_9:
+ if attribute(catid) in (100300121, 100200053, 100300212, 100300127, 100200176, 100200185) then goto T18_9;
+ else goto T18_10;
+
+T18_9:
+ response = 0.0485097295;
+ goto D18;
+
+T18_10:
+ response = 0.0645157682;
+ goto D18;
+
+D18:
+
+tnscore = tnscore + response;
+
+ /* Tree 20 of 200 */
+N19_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300077, 100300143, 100200034, 100200186, 100400141, 100200052, 100300005, 100200172, 100300008, 100200068, 100300027, 100300116, 100300121, 100200053, 100300019, 100300004, 100300073, 100400038, 100200192, 100300065, 100300209, 100300127, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100300200, 100200055, 100300006, 100200232, 100300214) then goto N19_2;
+ else goto N19_7;
+
+N19_2:
+ if attribute(catid) in (100200171, 100300011, 100300077, 100200034, 100200186, 100300005, 100200068, 100200053, 100300019, 100300004, 100300073, 100400038, 100200192, 100300209, 100400079, 100200087, 100300074, 100200055, 100300006, 100300214) then goto N19_3;
+ else goto N19_5;
+
+N19_3:
+ if attribute(catid) in (100300005, 100200068, 100300019, 100300209, 100200087, 100200055, 100300006, 100300214) then goto T19_1;
+ else goto N19_4;
+
+T19_1:
+ response = -0.0244019521;
+ goto D19;
+
+N19_4:
+ if attribute(catid) in (100200186, 100400038, 100400079, 100300074) then goto T19_2;
+ else goto T19_3;
+
+T19_2:
+ response = 0.0034399160;
+ goto D19;
+
+T19_3:
+ response = 0.0084132649;
+ goto D19;
+
+N19_5:
+ if attribute(catid) in (100200130, 100400141, 100200052, 100200172, 100300008, 100300027, 100300116, 100300065, 100300169, 100400080, 100300200) then goto N19_6;
+ else goto T19_6;
+
+N19_6:
+ if attribute(catid) in (100400141, 100200052, 100200172, 100300116, 100300065, 100400080, 100300200) then goto T19_4;
+ else goto T19_5;
+
+T19_4:
+ response = 0.0146253305;
+ goto D19;
+
+T19_5:
+ response = 0.0185737842;
+ goto D19;
+
+T19_6:
+ response = 0.0224432378;
+ goto D19;
+
+N19_7:
+ if attribute(catid) in (100300166, 100300165, 100300032, 100400142, 100300122, 100200170, 100300007, 100200028) then goto N19_8;
+ else goto N19_9;
+
+N19_8:
+ if attribute(catid) in (100300166, 100300032, 100400142, 100200170) then goto T19_7;
+ else goto T19_8;
+
+T19_7:
+ response = 0.0312540362;
+ goto D19;
+
+T19_8:
+ response = 0.0367389808;
+ goto D19;
+
+N19_9:
+ if attribute(catid) in (100300014, 100200234, 100400037, 100200193, 100200176, 100200067, 100200185) then goto T19_9;
+ else goto T19_10;
+
+T19_9:
+ response = 0.0515240946;
+ goto D19;
+
+T19_10:
+ response = 0.0623565161;
+ goto D19;
+
+D19:
+
+tnscore = tnscore + response;
+
+ /* Tree 21 of 200 */
+N20_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300013, 100300077, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300121, 100200234, 100300019, 100300004, 100200054, 100300073, 100400037, 100400038, 100300212, 100300209, 100400079, 100300169, 100400080, 100200087, 100300066, 100300200, 100200028, 100300076, 100200067, 100300006, 100300214) then goto N20_2;
+ else goto N20_7;
+
+N20_2:
+ if attribute(catid) in (100300013, 100300093, 100300008, 100200068, 100300019, 100300073, 100300212, 100300209, 100400080, 100200087, 100300200, 100200028, 100300076, 100200067, 100300214) then goto N20_3;
+ else goto N20_4;
+
+N20_3:
+ if attribute(catid) in (100300008, 100200068, 100300019, 100300212, 100400080, 100200067, 100300214) then goto T20_1;
+ else goto T20_2;
+
+T20_1:
+ response = -0.0143906523;
+ goto D20;
+
+T20_2:
+ response = 0.0034452824;
+ goto D20;
+
+N20_4:
+ if attribute(catid) in (100300058, 100200186, 100400141, 100300165, 100200052, 100300005, 100300032, 100300027, 100200234, 100200054, 100400038, 100400079, 100300169, 100300006) then goto N20_5;
+ else goto N20_6;
+
+N20_5:
+ if attribute(catid) in (100300058, 100300005, 100300027, 100200234, 100200054, 100300169, 100300006) then goto T20_3;
+ else goto T20_4;
+
+T20_3:
+ response = 0.0099743393;
+ goto D20;
+
+T20_4:
+ response = 0.0144610757;
+ goto D20;
+
+N20_6:
+ if attribute(catid) in (0, 100300011, 100300014, 100300102, 100200172, 100300004, 100400037) then goto T20_5;
+ else goto T20_6;
+
+T20_5:
+ response = 0.0194162110;
+ goto D20;
+
+T20_6:
+ response = 0.0220846421;
+ goto D20;
+
+N20_7:
+ if attribute(catid) in (100200130, 100300166, 100200034, 100200053, 100200192, 100300065, 100200170, 100200176) then goto T20_7;
+ else goto N20_8;
+
+T20_7:
+ response = 0.0303840891;
+ goto D20;
+
+N20_8:
+ if attribute(catid) in (100300116, 100400142, 100300122, 100300127, 100300074, 100300045) then goto T20_8;
+ else goto T20_9;
+
+T20_8:
+ response = 0.0417668157;
+ goto D20;
+
+T20_9:
+ response = 0.0552431545;
+ goto D20;
+
+D20:
+
+tnscore = tnscore + response;
+
+ /* Tree 22 of 200 */
+N21_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300077, 100200186, 100400141, 100300165, 100300102, 100300005, 100200172, 100200068, 100300032, 100300027, 100300116, 100300121, 100200234, 100200053, 100300126, 100200054, 100300073, 100400038, 100200192, 100300209, 100400079, 100300169, 100400080, 100200087, 100200176, 100300200, 100200028, 100300076, 100200055, 100300006, 100200232) then goto N21_2;
+ else goto N21_7;
+
+N21_2:
+ if attribute(catid) in (100300011, 100300058, 100200186, 100300165, 100300005, 100200068, 100300032, 100300116, 100300121, 100200234, 100300126, 100200054, 100400038, 100200192, 100300209, 100400079, 100200176, 100200028, 100200055, 100200232) then goto N21_3;
+ else goto N21_5;
+
+N21_3:
+ if attribute(catid) in (100300058, 100300005, 100200068, 100300032, 100300209, 100200028, 100200055, 100200232) then goto N21_4;
+ else goto T21_3;
+
+N21_4:
+ if attribute(catid) in (100300058, 100300005, 100300032, 100300209, 100200055) then goto T21_1;
+ else goto T21_2;
+
+T21_1:
+ response = -0.0199572721;
+ goto D21;
+
+T21_2:
+ response = -0.0000134782;
+ goto D21;
+
+T21_3:
+ response = 0.0095039400;
+ goto D21;
+
+N21_5:
+ if attribute(catid) in (100200171, 100300077, 100400141, 100300073, 100400080, 100200087, 100300200) then goto T21_4;
+ else goto N21_6;
+
+T21_4:
+ response = 0.0170204672;
+ goto D21;
+
+N21_6:
+ if attribute(catid) in (0, 100200130, 100200053, 100300006) then goto T21_5;
+ else goto T21_6;
+
+T21_5:
+ response = 0.0199906818;
+ goto D21;
+
+T21_6:
+ response = 0.0230038494;
+ goto D21;
+
+N21_7:
+ if attribute(catid) in (100300014, 100300013, 100300166, 100200034, 100200052, 100300093, 100300008, 100400142, 100200193, 100300065, 100300122, 100300127, 100200170, 100300074, 100300066, 100300045) then goto N21_8;
+ else goto T21_9;
+
+N21_8:
+ if attribute(catid) in (100300013, 100300166, 100200034, 100300093, 100300008, 100400142, 100300127) then goto T21_7;
+ else goto T21_8;
+
+T21_7:
+ response = 0.0272410205;
+ goto D21;
+
+T21_8:
+ response = 0.0353850420;
+ goto D21;
+
+T21_9:
+ response = 0.0546059415;
+ goto D21;
+
+D21:
+
+tnscore = tnscore + response;
+
+ /* Tree 23 of 200 */
+N22_1:
+ if attribute(catid) in (100300014, 100300013, 100200186, 100300165, 100300093, 100300102, 100300005, 100300008, 100200068, 100300027, 100300116, 100200234, 100300073, 100400038, 100200192, 100300212, 100400079, 100300169, 100400080, 100300074, 100300200, 100300076, 100200067, 100300006, 100200232, 100300214) then goto N22_2;
+ else goto N22_5;
+
+N22_2:
+ if attribute(catid) in (100300008, 100200068, 100300027, 100300212, 100400080, 100300074, 100300200, 100200067, 100200232, 100300214) then goto N22_3;
+ else goto N22_4;
+
+N22_3:
+ if attribute(catid) in (100300008, 100200068, 100300212, 100300200, 100200067, 100200232, 100300214) then goto T22_1;
+ else goto T22_2;
+
+T22_1:
+ response = -0.0257347618;
+ goto D22;
+
+T22_2:
+ response = -0.0087401374;
+ goto D22;
+
+N22_4:
+ if attribute(catid) in (100300014, 100300165, 100300102, 100300073, 100200192, 100400079, 100300076) then goto T22_3;
+ else goto T22_4;
+
+T22_3:
+ response = 0.0079479453;
+ goto D22;
+
+T22_4:
+ response = 0.0122270306;
+ goto D22;
+
+N22_5:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300077, 100300166, 100200052, 100300121, 100300004, 100300126, 100400142, 100200054, 100400037, 100300065, 100300122, 100300127, 100200170, 100200176) then goto N22_6;
+ else goto N22_8;
+
+N22_6:
+ if attribute(catid) in (0, 100200130, 100200052, 100300121, 100300004, 100300126, 100200054, 100300065, 100300122, 100200176) then goto N22_7;
+ else goto T22_7;
+
+N22_7:
+ if attribute(catid) in (100200130, 100200052, 100300121, 100300004, 100300065, 100300122) then goto T22_5;
+ else goto T22_6;
+
+T22_5:
+ response = 0.0190487090;
+ goto D22;
+
+T22_6:
+ response = 0.0215394009;
+ goto D22;
+
+T22_7:
+ response = 0.0276338957;
+ goto D22;
+
+N22_8:
+ if attribute(catid) in (100200034, 100400141, 100200172, 100300032, 100200053, 100300209, 100300066, 100200185) then goto T22_8;
+ else goto T22_9;
+
+T22_8:
+ response = 0.0397536732;
+ goto D22;
+
+T22_9:
+ response = 0.0689753704;
+ goto D22;
+
+D22:
+
+tnscore = tnscore + response;
+
+ /* Tree 24 of 200 */
+N23_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300077, 100200034, 100200186, 100400141, 100200052, 100300093, 100300005, 100200172, 100200068, 100300027, 100300121, 100200234, 100300019, 100300073, 100200193, 100400038, 100200192, 100300065, 100300212, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100200176, 100300200, 100200067, 100200055, 100300006, 100300214) then goto N23_2;
+ else goto N23_6;
+
+N23_2:
+ if attribute(catid) in (100200171, 100300011, 100300077, 100200186, 100200052, 100300093, 100300005, 100200068, 100300027, 100300121, 100200234, 100200192, 100300065, 100300212, 100300127, 100400079, 100200170, 100400080, 100200087, 100300200, 100200067, 100200055, 100300214) then goto N23_3;
+ else goto N23_5;
+
+N23_3:
+ if attribute(catid) in (100200186, 100300005, 100200068, 100300212, 100200170, 100200087, 100300200, 100200055, 100300214) then goto T23_1;
+ else goto N23_4;
+
+T23_1:
+ response = -0.0052715451;
+ goto D23;
+
+N23_4:
+ if attribute(catid) in (100200171, 100300011, 100300093, 100300027, 100200234, 100300127, 100400079, 100200067) then goto T23_2;
+ else goto T23_3;
+
+T23_2:
+ response = 0.0075908988;
+ goto D23;
+
+T23_3:
+ response = 0.0114788963;
+ goto D23;
+
+N23_5:
+ if attribute(catid) in (100300019, 100300073, 100200193, 100300169, 100300074, 100300066, 100300006) then goto T23_4;
+ else goto T23_5;
+
+T23_4:
+ response = 0.0146049077;
+ goto D23;
+
+T23_5:
+ response = 0.0198627318;
+ goto D23;
+
+N23_6:
+ if attribute(catid) in (100300165, 100300008, 100300032, 100300116, 100200053, 100400142, 100400037, 100300122) then goto T23_6;
+ else goto N23_7;
+
+T23_6:
+ response = 0.0286281196;
+ goto D23;
+
+N23_7:
+ if attribute(catid) in (100200130, 100300013, 100300166, 100300004, 100300126, 100200054, 100300007, 100200028) then goto T23_7;
+ else goto T23_8;
+
+T23_7:
+ response = 0.0377626212;
+ goto D23;
+
+T23_8:
+ response = 0.0646214069;
+ goto D23;
+
+D23:
+
+tnscore = tnscore + response;
+
+ /* Tree 25 of 200 */
+N24_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300077, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100200068, 100300032, 100300027, 100300121, 100200234, 100300019, 100400142, 100300073, 100200193, 100200192, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300200, 100200028, 100300006, 100200232) then goto N24_2;
+ else goto N24_6;
+
+N24_2:
+ if attribute(catid) in (100300014, 100300058, 100200186, 100400141, 100300093, 100300005, 100300032, 100200234, 100300019, 100300073, 100200192, 100300169, 100200087, 100300074, 100300200, 100200232) then goto N24_3;
+ else goto N24_4;
+
+N24_3:
+ if attribute(catid) in (100300014, 100300058, 100200186, 100300032, 100300019, 100300074, 100300200, 100200232) then goto T24_1;
+ else goto T24_2;
+
+T24_1:
+ response = -0.0076214570;
+ goto D24;
+
+T24_2:
+ response = 0.0077673481;
+ goto D24;
+
+N24_4:
+ if attribute(catid) in (0, 100200171, 100300011, 100300077, 100300165, 100200068, 100300121, 100200193, 100300209, 100300122, 100400079, 100400080, 100300066, 100200028, 100300006) then goto N24_5;
+ else goto T24_5;
+
+N24_5:
+ if attribute(catid) in (100200171, 100300011, 100300165, 100400079, 100300006) then goto T24_3;
+ else goto T24_4;
+
+T24_3:
+ response = 0.0143096613;
+ goto D24;
+
+T24_4:
+ response = 0.0162758268;
+ goto D24;
+
+T24_5:
+ response = 0.0237528500;
+ goto D24;
+
+N24_6:
+ if attribute(catid) in (100200130, 100300166, 100200034, 100300116, 100200053, 100300004, 100200054, 100400038, 100300065, 100300212, 100300007, 100200176, 100300045, 100300076) then goto N24_7;
+ else goto N24_8;
+
+N24_7:
+ if attribute(catid) in (100200130, 100300166, 100200034, 100300116, 100200053) then goto T24_6;
+ else goto T24_7;
+
+T24_6:
+ response = 0.0297411208;
+ goto D24;
+
+T24_7:
+ response = 0.0387614885;
+ goto D24;
+
+N24_8:
+ if attribute(catid) in (100300143, 100300126, 100400037, 100200185) then goto T24_8;
+ else goto T24_9;
+
+T24_8:
+ response = 0.0600165302;
+ goto D24;
+
+T24_9:
+ response = 0.0961472130;
+ goto D24;
+
+D24:
+
+tnscore = tnscore + response;
+
+ /* Tree 26 of 200 */
+N25_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300077, 100200034, 100200186, 100400141, 100200052, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100300004, 100300073, 100400038, 100200192, 100300212, 100300209, 100400079, 100200170, 100400080, 100200087, 100300074, 100300066, 100300007, 100300200, 100200028, 100200067, 100200055, 100200232, 100300214, 100300146) then goto N25_2;
+ else goto N25_5;
+
+N25_2:
+ if attribute(catid) in (100200034, 100300005, 100300008, 100200068, 100300116, 100300212, 100300209, 100400079, 100400080, 100300200, 100200067, 100200055, 100200232, 100300214, 100300146) then goto N25_3;
+ else goto N25_4;
+
+N25_3:
+ if attribute(catid) in (100200034, 100300008, 100300212, 100300209, 100200055, 100200232, 100300214, 100300146) then goto T25_1;
+ else goto T25_2;
+
+T25_1:
+ response = -0.0143064261;
+ goto D25;
+
+T25_2:
+ response = 0.0061747257;
+ goto D25;
+
+N25_4:
+ if attribute(catid) in (100200171, 100300058, 100200186, 100400141, 100200172, 100300032, 100300027, 100300121, 100300004, 100400038, 100200170, 100200087, 100300074, 100200028) then goto T25_3;
+ else goto T25_4;
+
+T25_3:
+ response = 0.0140796593;
+ goto D25;
+
+T25_4:
+ response = 0.0185037483;
+ goto D25;
+
+N25_5:
+ if attribute(catid) in (100300011, 100300014, 100300013, 100300166, 100300143, 100300165, 100300093, 100300102, 100400142, 100400037, 100300065, 100300122, 100300127, 100300169, 100300006) then goto N25_6;
+ else goto T25_7;
+
+N25_6:
+ if attribute(catid) in (100300011, 100300014, 100300013, 100300166, 100300165, 100300102, 100400142, 100400037, 100300169) then goto T25_5;
+ else goto T25_6;
+
+T25_5:
+ response = 0.0279089674;
+ goto D25;
+
+T25_6:
+ response = 0.0344726516;
+ goto D25;
+
+T25_7:
+ response = 0.0515666225;
+ goto D25;
+
+D25:
+
+tnscore = tnscore + response;
+
+ /* Tree 27 of 200 */
+N26_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100200052, 100300093, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300121, 100200053, 100400142, 100200054, 100300073, 100400037, 100200192, 100300209, 100300122, 100400079, 100200170, 100300169, 100400080, 100200087, 100300066, 100200176, 100300200, 100200028, 100300076, 100200185) then goto N26_2;
+ else goto N26_6;
+
+N26_2:
+ if attribute(catid) in (100300011, 100300014, 100200068, 100300032, 100200053, 100200192, 100300209, 100300122, 100200170, 100400080, 100200176, 100200028, 100300076, 100200185) then goto N26_3;
+ else goto N26_4;
+
+N26_3:
+ if attribute(catid) in (100300014, 100200068, 100300032, 100300209, 100400080, 100200176, 100200028, 100300076, 100200185) then goto T26_1;
+ else goto T26_2;
+
+T26_1:
+ response = -0.0100026799;
+ goto D26;
+
+T26_2:
+ response = 0.0069768979;
+ goto D26;
+
+N26_4:
+ if attribute(catid) in (100300077, 100300143, 100400141, 100200052, 100300005, 100300008, 100300121, 100400079, 100300169, 100200087) then goto T26_3;
+ else goto N26_5;
+
+T26_3:
+ response = 0.0149447853;
+ goto D26;
+
+N26_5:
+ if attribute(catid) in (0, 100300166, 100200034, 100200186, 100200172, 100400037, 100300066, 100300200) then goto T26_4;
+ else goto T26_5;
+
+T26_4:
+ response = 0.0207339117;
+ goto D26;
+
+T26_5:
+ response = 0.0250275322;
+ goto D26;
+
+N26_6:
+ if attribute(catid) in (100200130, 100300058, 100300165, 100300102, 100300116, 100200234, 100300004, 100300126, 100200193, 100400038, 100300065, 100300127, 100300074, 100300045, 100300006) then goto N26_7;
+ else goto T26_8;
+
+N26_7:
+ if attribute(catid) in (100300058, 100300102, 100300116, 100200234, 100300126, 100200193, 100400038, 100300127, 100300006) then goto T26_6;
+ else goto T26_7;
+
+T26_6:
+ response = 0.0305216411;
+ goto D26;
+
+T26_7:
+ response = 0.0392374586;
+ goto D26;
+
+T26_8:
+ response = 0.0735120500;
+ goto D26;
+
+D26:
+
+tnscore = tnscore + response;
+
+ /* Tree 28 of 200 */
+N27_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300166, 100200186, 100400141, 100300165, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300027, 100300121, 100200234, 100200053, 100300019, 100300004, 100400142, 100300073, 100400037, 100200193, 100400038, 100200192, 100300065, 100300209, 100300122, 100400079, 100200170, 100300169, 100400080, 100300066, 100200176, 100300076, 100200067, 100200055, 100300006, 100300214, 100300146) then goto N27_2;
+ else goto N27_8;
+
+N27_2:
+ if attribute(catid) in (100200171, 100300011, 100300014, 100400141, 100300102, 100300005, 100300008, 100300121, 100300019, 100200193, 100200192, 100300209, 100200170, 100400080, 100200067, 100200055, 100300214) then goto N27_3;
+ else goto N27_5;
+
+N27_3:
+ if attribute(catid) in (100300102, 100300008, 100300019, 100200193, 100300209, 100200067, 100200055, 100300214) then goto T27_1;
+ else goto N27_4;
+
+T27_1:
+ response = -0.0452597976;
+ goto D27;
+
+N27_4:
+ if attribute(catid) in (100400141, 100300005, 100200192, 100400080) then goto T27_2;
+ else goto T27_3;
+
+T27_2:
+ response = 0.0025798730;
+ goto D27;
+
+T27_3:
+ response = 0.0085058714;
+ goto D27;
+
+N27_5:
+ if attribute(catid) in (100200130, 100200186, 100300165, 100200172, 100200068, 100300027, 100200234, 100400142, 100300066, 100200176, 100300076, 100300146) then goto N27_6;
+ else goto N27_7;
+
+N27_6:
+ if attribute(catid) in (100200130, 100200186, 100200068, 100300066, 100200176, 100300076, 100300146) then goto T27_4;
+ else goto T27_5;
+
+T27_4:
+ response = 0.0134972332;
+ goto D27;
+
+T27_5:
+ response = 0.0161598104;
+ goto D27;
+
+N27_7:
+ if attribute(catid) in (0, 100200053, 100300073) then goto T27_6;
+ else goto T27_7;
+
+T27_6:
+ response = 0.0201733337;
+ goto D27;
+
+T27_7:
+ response = 0.0242718101;
+ goto D27;
+
+N27_8:
+ if attribute(catid) in (100300013, 100300077, 100300143, 100200034, 100300093, 100300116, 100300127, 100200087, 100300074, 100300007, 100300200, 100300045) then goto T27_8;
+ else goto T27_9;
+
+T27_8:
+ response = 0.0376364024;
+ goto D27;
+
+T27_9:
+ response = 0.0722294524;
+ goto D27;
+
+D27:
+
+tnscore = tnscore + response;
+
+ /* Tree 29 of 200 */
+N28_1:
+ if attribute(catid) in (100300014, 100400141, 100300102, 100300008, 100200068, 100300032, 100200234, 100300019, 100400038, 100300212, 100400080, 100200176, 100300200, 100200028, 100200055, 100200185, 100300006, 100300214) then goto N28_2;
+ else goto N28_4;
+
+N28_2:
+ if attribute(catid) in (100300014, 100300102, 100300008, 100200068, 100300032, 100300019, 100300212, 100300200, 100200055, 100300214) then goto N28_3;
+ else goto T28_3;
+
+N28_3:
+ if attribute(catid) in (100300102, 100300008, 100300032, 100300019, 100300212, 100300200, 100200055, 100300214) then goto T28_1;
+ else goto T28_2;
+
+T28_1:
+ response = -0.0330162432;
+ goto D28;
+
+T28_2:
+ response = -0.0099054066;
+ goto D28;
+
+T28_3:
+ response = 0.0037184723;
+ goto D28;
+
+N28_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300166, 100300143, 100200034, 100200186, 100300165, 100200052, 100300093, 100300005, 100200172, 100300027, 100200053, 100200192, 100300065, 100300127, 100400079, 100200170, 100300169, 100300074, 100300076) then goto N28_5;
+ else goto N28_8;
+
+N28_5:
+ if attribute(catid) in (100200171, 100300166, 100200034, 100200186, 100300165, 100300093, 100300005, 100300027, 100200053, 100400079, 100300169, 100300074) then goto N28_6;
+ else goto N28_7;
+
+N28_6:
+ if attribute(catid) in (100200171, 100300166, 100200186, 100300005, 100300027, 100200053, 100300169) then goto T28_4;
+ else goto T28_5;
+
+T28_4:
+ response = 0.0128773968;
+ goto D28;
+
+T28_5:
+ response = 0.0170605503;
+ goto D28;
+
+N28_7:
+ if attribute(catid) in (0, 100300143, 100200192, 100300065, 100200170, 100300076) then goto T28_6;
+ else goto T28_7;
+
+T28_6:
+ response = 0.0199592353;
+ goto D28;
+
+T28_7:
+ response = 0.0237865531;
+ goto D28;
+
+N28_8:
+ if attribute(catid) in (100300077, 100300121, 100400142, 100300073, 100200193, 100200087, 100300066, 100300007) then goto T28_8;
+ else goto T28_9;
+
+T28_8:
+ response = 0.0288156047;
+ goto D28;
+
+T28_9:
+ response = 0.0451598089;
+ goto D28;
+
+D28:
+
+tnscore = tnscore + response;
+
+ /* Tree 30 of 200 */
+N29_1:
+ if attribute(catid) in (100200171, 100300011, 100300013, 100200034, 100200186, 100200052, 100300102, 100300008, 100200068, 100300027, 100300121, 100200053, 100300019, 100300004, 100300073, 100200193, 100400038, 100200192, 100300212, 100300127, 100400079, 100200170, 100400080, 100200087, 100300074, 100300007, 100300214, 100300146) then goto N29_2;
+ else goto N29_5;
+
+N29_2:
+ if attribute(catid) in (100300013, 100200034, 100200068, 100300121, 100300019, 100200170, 100200087, 100300214, 100300146) then goto N29_3;
+ else goto N29_4;
+
+N29_3:
+ if attribute(catid) in (100300013, 100200034, 100300121, 100300019, 100300214, 100300146) then goto T29_1;
+ else goto T29_2;
+
+T29_1:
+ response = -0.0200374966;
+ goto D29;
+
+T29_2:
+ response = -0.0056497245;
+ goto D29;
+
+N29_4:
+ if attribute(catid) in (100300011, 100200186, 100300102, 100300008, 100300004, 100200192, 100300212, 100400079, 100400080, 100300074) then goto T29_3;
+ else goto T29_4;
+
+T29_3:
+ response = 0.0036157343;
+ goto D29;
+
+T29_4:
+ response = 0.0117177746;
+ goto D29;
+
+N29_5:
+ if attribute(catid) in (0, 100200130, 100300014, 100300058, 100300166, 100300143, 100400141, 100300165, 100300093, 100300005, 100200172, 100300032, 100300116, 100300126, 100400142, 100300065, 100300122, 100300169, 100300066, 100300200, 100200028, 100300006) then goto N29_6;
+ else goto N29_8;
+
+N29_6:
+ if attribute(catid) in (0, 100200130, 100300166, 100400141, 100300165, 100200172, 100300032, 100300066, 100300006) then goto N29_7;
+ else goto T29_7;
+
+N29_7:
+ if attribute(catid) in (100200130, 100400141, 100300165, 100200172, 100300032, 100300006) then goto T29_5;
+ else goto T29_6;
+
+T29_5:
+ response = 0.0173296173;
+ goto D29;
+
+T29_6:
+ response = 0.0209361475;
+ goto D29;
+
+T29_7:
+ response = 0.0268947656;
+ goto D29;
+
+N29_8:
+ if attribute(catid) in (100300077, 100400037, 100300209, 100200176, 100300045, 100300076, 100200185) then goto T29_8;
+ else goto T29_9;
+
+T29_8:
+ response = 0.0431779718;
+ goto D29;
+
+T29_9:
+ response = 0.0596202146;
+ goto D29;
+
+D29:
+
+tnscore = tnscore + response;
+
+ /* Tree 31 of 200 */
+N30_1:
+ if attribute(catid) in (100300011, 100300014, 100200034, 100300102, 100300005, 100200068, 100300032, 100300027, 100300116, 100200234, 100300073, 100400038, 100200192, 100400079, 100200170, 100400080, 100300200, 100200028, 100200067, 100200055, 100300006, 100200232, 100300146) then goto N30_2;
+ else goto N30_4;
+
+N30_2:
+ if attribute(catid) in (100300011, 100300102, 100200068, 100300032, 100300027, 100400038, 100300200, 100200067, 100200055) then goto T30_1;
+ else goto N30_3;
+
+T30_1:
+ response = -0.0105115826;
+ goto D30;
+
+N30_3:
+ if attribute(catid) in (100300014, 100200234, 100200028, 100300006, 100200232) then goto T30_2;
+ else goto T30_3;
+
+T30_2:
+ response = 0.0007402621;
+ goto D30;
+
+T30_3:
+ response = 0.0078629039;
+ goto D30;
+
+N30_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300077, 100300143, 100200186, 100400141, 100300165, 100200052, 100200172, 100300121, 100200053, 100300019, 100400142, 100300122, 100300127, 100300169, 100200087, 100200176) then goto N30_5;
+ else goto N30_7;
+
+N30_5:
+ if attribute(catid) in (100200171, 100200130, 100200186, 100200052, 100200172, 100200053, 100300122, 100200176) then goto T30_4;
+ else goto N30_6;
+
+T30_4:
+ response = 0.0148540934;
+ goto D30;
+
+N30_6:
+ if attribute(catid) in (0, 100300077, 100300143, 100400141, 100300019, 100400142, 100300127) then goto T30_5;
+ else goto T30_6;
+
+T30_5:
+ response = 0.0193734454;
+ goto D30;
+
+T30_6:
+ response = 0.0217320370;
+ goto D30;
+
+N30_7:
+ if attribute(catid) in (100300166, 100300126, 100200054, 100200193, 100300065, 100300212, 100300066) then goto T30_7;
+ else goto T30_8;
+
+T30_7:
+ response = 0.0305394508;
+ goto D30;
+
+T30_8:
+ response = 0.0503395698;
+ goto D30;
+
+D30:
+
+tnscore = tnscore + response;
+
+ /* Tree 32 of 200 */
+N31_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300013, 100300077, 100200034, 100200186, 100400141, 100300165, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100200053, 100300019, 100300126, 100200054, 100300073, 100400038, 100200192, 100300065, 100300212, 100300209, 100300122, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100200176, 100300200, 100200028, 100300076, 100200055, 100300006, 100200232) then goto N31_2;
+ else goto N31_7;
+
+N31_2:
+ if attribute(catid) in (100300014, 100300013, 100300077, 100400141, 100300165, 100300005, 100300008, 100300116, 100200053, 100300126, 100400038, 100300212, 100300209, 100400079, 100200170, 100200087, 100200176, 100300200, 100200028, 100300076, 100200055, 100300006, 100200232) then goto N31_3;
+ else goto N31_5;
+
+N31_3:
+ if attribute(catid) in (100300005, 100300126, 100300209, 100200087, 100300200, 100200055) then goto T31_1;
+ else goto N31_4;
+
+T31_1:
+ response = -0.0155099848;
+ goto D31;
+
+N31_4:
+ if attribute(catid) in (100300014, 100300013, 100400141, 100400038, 100300212, 100400079, 100200170, 100200028, 100300076, 100200232) then goto T31_2;
+ else goto T31_3;
+
+T31_2:
+ response = 0.0052367005;
+ goto D31;
+
+T31_3:
+ response = 0.0111703118;
+ goto D31;
+
+N31_5:
+ if attribute(catid) in (0, 100200034, 100300102, 100200068, 100300027, 100300121, 100300073, 100200192, 100300169, 100400080, 100300074) then goto N31_6;
+ else goto T31_6;
+
+N31_6:
+ if attribute(catid) in (100200034, 100200068, 100300027, 100300121, 100300073, 100200192, 100300169, 100400080, 100300074) then goto T31_4;
+ else goto T31_5;
+
+T31_4:
+ response = 0.0150887568;
+ goto D31;
+
+T31_5:
+ response = 0.0184571681;
+ goto D31;
+
+T31_6:
+ response = 0.0221098064;
+ goto D31;
+
+N31_7:
+ if attribute(catid) in (100300011, 100300166, 100300093, 100300004, 100400142, 100300127, 100300066, 100300045) then goto T31_7;
+ else goto N31_8;
+
+T31_7:
+ response = 0.0294845237;
+ goto D31;
+
+N31_8:
+ if attribute(catid) in (100300058, 100300143, 100200193, 100300007) then goto T31_8;
+ else goto T31_9;
+
+T31_8:
+ response = 0.0412156817;
+ goto D31;
+
+T31_9:
+ response = 0.0581097149;
+ goto D31;
+
+D31:
+
+tnscore = tnscore + response;
+
+ /* Tree 33 of 200 */
+N32_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300077, 100300166, 100200034, 100200186, 100400141, 100200052, 100300093, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100200234, 100200053, 100300019, 100300126, 100200054, 100300073, 100400037, 100200193, 100400038, 100200192, 100300065, 100300212, 100300209, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300066, 100300007, 100200176, 100300200, 100300076, 100200067, 100200055, 100200185, 100300214) then goto N32_2;
+ else goto N32_7;
+
+N32_2:
+ if attribute(catid) in (100300011, 100300014, 100300008, 100200068, 100300032, 100300027, 100300116, 100200234, 100300019, 100400037, 100300209, 100200176, 100200055, 100200185, 100300214) then goto N32_3;
+ else goto N32_4;
+
+N32_3:
+ if attribute(catid) in (100300008, 100200068, 100300032, 100200055, 100200185, 100300214) then goto T32_1;
+ else goto T32_2;
+
+T32_1:
+ response = -0.0168814696;
+ goto D32;
+
+T32_2:
+ response = -0.0007128433;
+ goto D32;
+
+N32_4:
+ if attribute(catid) in (100200171, 100300058, 100300077, 100200034, 100400141, 100300102, 100300005, 100300126, 100200054, 100200193, 100400038, 100200192, 100300212, 100200170, 100300169, 100400080, 100200087, 100300076) then goto N32_5;
+ else goto N32_6;
+
+N32_5:
+ if attribute(catid) in (100300058, 100200034, 100200193, 100400038, 100200170, 100300169, 100400080, 100300076) then goto T32_3;
+ else goto T32_4;
+
+T32_3:
+ response = 0.0104471779;
+ goto D32;
+
+T32_4:
+ response = 0.0151303026;
+ goto D32;
+
+N32_6:
+ if attribute(catid) in (0, 100200186, 100300093, 100300121, 100200053, 100300073, 100300200) then goto T32_5;
+ else goto T32_6;
+
+T32_5:
+ response = 0.0192366025;
+ goto D32;
+
+T32_6:
+ response = 0.0235706074;
+ goto D32;
+
+N32_7:
+ if attribute(catid) in (100300143, 100300165, 100200172, 100300004, 100400142, 100300122, 100300074, 100200232) then goto N32_8;
+ else goto T32_9;
+
+N32_8:
+ if attribute(catid) in (100200172, 100400142, 100300122, 100200232) then goto T32_7;
+ else goto T32_8;
+
+T32_7:
+ response = 0.0313391652;
+ goto D32;
+
+T32_8:
+ response = 0.0410697301;
+ goto D32;
+
+T32_9:
+ response = 0.0691824633;
+ goto D32;
+
+D32:
+
+tnscore = tnscore + response;
+
+ /* Tree 34 of 200 */
+N33_1:
+ if attribute(catid) in (100300013, 100200186, 100400141, 100300005, 100200068, 100300032, 100300116, 100200234, 100300004, 100200192, 100300212, 100300209, 100200170, 100400080, 100300074, 100300200, 100200028, 100200055, 100300146) then goto N33_2;
+ else goto N33_4;
+
+N33_2:
+ if attribute(catid) in (100300032, 100300212, 100300209, 100200028, 100200055, 100300146) then goto T33_1;
+ else goto N33_3;
+
+T33_1:
+ response = -0.0313637219;
+ goto D33;
+
+N33_3:
+ if attribute(catid) in (100300013, 100200186, 100400141, 100200068, 100200234, 100300004, 100400080, 100300200) then goto T33_2;
+ else goto T33_3;
+
+T33_2:
+ response = 0.0050115322;
+ goto D33;
+
+T33_3:
+ response = 0.0108163539;
+ goto D33;
+
+N33_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300077, 100200034, 100300165, 100300102, 100200172, 100300008, 100300027, 100300121, 100200053, 100300126, 100400142, 100300073, 100400038, 100300065, 100300127, 100400079, 100300169, 100200087, 100300066, 100300007, 100300076, 100300006) then goto N33_5;
+ else goto N33_7;
+
+N33_5:
+ if attribute(catid) in (0, 100300077, 100200034, 100300165, 100200172, 100300008, 100300121, 100200053, 100300126, 100400142, 100300073, 100400038, 100300065, 100400079, 100200087) then goto N33_6;
+ else goto T33_6;
+
+N33_6:
+ if attribute(catid) in (100300077, 100300165, 100200172, 100300008, 100300121, 100300073, 100400038, 100200087) then goto T33_4;
+ else goto T33_5;
+
+T33_4:
+ response = 0.0169532751;
+ goto D33;
+
+T33_5:
+ response = 0.0197771960;
+ goto D33;
+
+T33_6:
+ response = 0.0229019262;
+ goto D33;
+
+N33_7:
+ if attribute(catid) in (100300014, 100300058, 100300166, 100300143, 100200052, 100300093, 100200054, 100300122) then goto T33_7;
+ else goto T33_8;
+
+T33_7:
+ response = 0.0275740075;
+ goto D33;
+
+T33_8:
+ response = 0.0402576409;
+ goto D33;
+
+D33:
+
+tnscore = tnscore + response;
+
+ /* Tree 35 of 200 */
+N34_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100300058, 100300013, 100300077, 100200186, 100300165, 100200052, 100300102, 100300005, 100200068, 100300032, 100300027, 100300116, 100200234, 100300019, 100300126, 100400142, 100300073, 100200192, 100400079, 100300169, 100400080, 100300074, 100300200, 100300045, 100200028, 100300076, 100200185, 100200232, 100300214) then goto N34_2;
+ else goto N34_5;
+
+N34_2:
+ if attribute(catid) in (100300011, 100300013, 100200052, 100300102, 100200068, 100300032, 100300116, 100300019, 100300126, 100300076, 100200185, 100200232, 100300214) then goto N34_3;
+ else goto N34_4;
+
+N34_3:
+ if attribute(catid) in (100300013, 100200068, 100300032, 100300019, 100300076, 100200185, 100200232, 100300214) then goto T34_1;
+ else goto T34_2;
+
+T34_1:
+ response = -0.0132508399;
+ goto D34;
+
+T34_2:
+ response = 0.0002145632;
+ goto D34;
+
+N34_4:
+ if attribute(catid) in (0, 100300058, 100300077, 100200186, 100300165, 100300005, 100300027, 100300073, 100200192, 100300169, 100400080) then goto T34_3;
+ else goto T34_4;
+
+T34_3:
+ response = 0.0144506818;
+ goto D34;
+
+T34_4:
+ response = 0.0181232118;
+ goto D34;
+
+N34_5:
+ if attribute(catid) in (100200130, 100300014, 100300166, 100300143, 100200034, 100400141, 100300093, 100200172, 100300008, 100300121, 100200053, 100300004, 100300065, 100300212, 100300209, 100300122, 100300127, 100200170, 100200087, 100300006) then goto N34_6;
+ else goto T34_7;
+
+N34_6:
+ if attribute(catid) in (100200130, 100200034, 100400141, 100300093, 100300121, 100300004, 100300065, 100300212, 100300127, 100200170, 100200087, 100300006) then goto T34_5;
+ else goto T34_6;
+
+T34_5:
+ response = 0.0237514530;
+ goto D34;
+
+T34_6:
+ response = 0.0281193568;
+ goto D34;
+
+T34_7:
+ response = 0.0394520537;
+ goto D34;
+
+D34:
+
+tnscore = tnscore + response;
+
+ /* Tree 36 of 200 */
+N35_1:
+ if attribute(catid) in (100300011, 100300013, 100300077, 100200034, 100200186, 100300102, 100300005, 100200068, 100300116, 100300121, 100400079, 100200170, 100400080, 100300074, 100300200, 100300076, 100200055, 100300214, 100300146) then goto N35_2;
+ else goto N35_3;
+
+N35_2:
+ if attribute(catid) in (100300011, 100300013, 100200034, 100200186, 100300005, 100200170, 100300076, 100200055, 100300214) then goto T35_1;
+ else goto T35_2;
+
+T35_1:
+ response = -0.0113273120;
+ goto D35;
+
+T35_2:
+ response = 0.0081802635;
+ goto D35;
+
+N35_3:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300166, 100400141, 100300165, 100200172, 100300008, 100300027, 100200234, 100200053, 100300126, 100400142, 100200054, 100300073, 100400037, 100400038, 100200192, 100300065, 100300122, 100300169, 100300066, 100300045, 100200028, 100200067, 100300006) then goto N35_4;
+ else goto N35_6;
+
+N35_4:
+ if attribute(catid) in (100200130, 100300058, 100300165, 100200172, 100300027, 100200234, 100300073) then goto T35_3;
+ else goto N35_5;
+
+T35_3:
+ response = 0.0154377299;
+ goto D35;
+
+N35_5:
+ if attribute(catid) in (0, 100400141, 100300008, 100400037, 100400038, 100200192, 100300169, 100300045, 100200067) then goto T35_4;
+ else goto T35_5;
+
+T35_4:
+ response = 0.0188466465;
+ goto D35;
+
+T35_5:
+ response = 0.0219373268;
+ goto D35;
+
+N35_6:
+ if attribute(catid) in (100300014, 100200052, 100300093, 100300032, 100300004, 100300127, 100200087, 100200176, 100200185) then goto T35_6;
+ else goto T35_7;
+
+T35_6:
+ response = 0.0332492867;
+ goto D35;
+
+T35_7:
+ response = 0.0538118306;
+ goto D35;
+
+D35:
+
+tnscore = tnscore + response;
+
+ /* Tree 37 of 200 */
+N36_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300013, 100300166, 100300143, 100200186, 100300165, 100200052, 100300093, 100300008, 100200068, 100300027, 100300116, 100300121, 100200053, 100300019, 100300004, 100300126, 100400142, 100200054, 100300073, 100400038, 100200192, 100300212, 100300122, 100200170, 100300169, 100400080, 100200087, 100300074, 100300007, 100300200, 100200067, 100200055, 100300214, 100300146) then goto N36_2;
+ else goto N36_6;
+
+N36_2:
+ if attribute(catid) in (100200171, 100300014, 100300013, 100300008, 100200068, 100300116, 100300121, 100200054, 100300073, 100200192, 100300212, 100400080, 100200087, 100300200, 100200055, 100300214, 100300146) then goto N36_3;
+ else goto N36_4;
+
+N36_3:
+ if attribute(catid) in (100300014, 100300008, 100200068, 100200054, 100300212, 100300200, 100200055, 100300214) then goto T36_1;
+ else goto T36_2;
+
+T36_1:
+ response = -0.0068335973;
+ goto D36;
+
+T36_2:
+ response = 0.0078647534;
+ goto D36;
+
+N36_4:
+ if attribute(catid) in (100300165, 100200052, 100200053, 100300019, 100300004, 100300126, 100300122, 100200170, 100300169, 100300074) then goto T36_3;
+ else goto N36_5;
+
+T36_3:
+ response = 0.0135025323;
+ goto D36;
+
+N36_5:
+ if attribute(catid) in (0, 100300143, 100300027, 100400142, 100400038) then goto T36_4;
+ else goto T36_5;
+
+T36_4:
+ response = 0.0176344289;
+ goto D36;
+
+T36_5:
+ response = 0.0196235950;
+ goto D36;
+
+N36_6:
+ if attribute(catid) in (100300058, 100200034, 100400141, 100200193, 100300065, 100300209, 100300127, 100400079, 100300066, 100300045, 100200028, 100300076, 100300006) then goto T36_6;
+ else goto N36_7;
+
+T36_6:
+ response = 0.0257873841;
+ goto D36;
+
+N36_7:
+ if attribute(catid) in (100200172, 100300032, 100200176) then goto T36_7;
+ else goto T36_8;
+
+T36_7:
+ response = 0.0338144700;
+ goto D36;
+
+T36_8:
+ response = 0.0425972117;
+ goto D36;
+
+D36:
+
+tnscore = tnscore + response;
+
+ /* Tree 38 of 200 */
+N37_1:
+ if attribute(catid) in (0, 100300011, 100300014, 100300013, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300102, 100200172, 100300008, 100200068, 100300027, 100300116, 100200234, 100200053, 100300126, 100200054, 100300073, 100400037, 100200192, 100300209, 100300127, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100200028, 100300076) then goto N37_2;
+ else goto N37_5;
+
+N37_2:
+ if attribute(catid) in (100300011, 100300014, 100200034, 100200186, 100300102, 100300008, 100200068, 100300027, 100300116, 100300073, 100400037, 100200192, 100300209, 100400079, 100200087, 100300074, 100200176, 100200028, 100300076) then goto N37_3;
+ else goto N37_4;
+
+N37_3:
+ if attribute(catid) in (100300102, 100300008, 100400037, 100200087, 100200176, 100300076) then goto T37_1;
+ else goto T37_2;
+
+T37_1:
+ response = -0.0165719048;
+ goto D37;
+
+T37_2:
+ response = 0.0081977488;
+ goto D37;
+
+N37_4:
+ if attribute(catid) in (100300013, 100300165, 100200052, 100200172, 100200053, 100200054, 100300169) then goto T37_3;
+ else goto T37_4;
+
+T37_3:
+ response = 0.0136130091;
+ goto D37;
+
+T37_4:
+ response = 0.0180110506;
+ goto D37;
+
+N37_5:
+ if attribute(catid) in (100200171, 100200130, 100300058, 100300093, 100300032, 100300121, 100400142, 100200170, 100300200, 100300006, 100200232) then goto T37_5;
+ else goto N37_6;
+
+T37_5:
+ response = 0.0236852926;
+ goto D37;
+
+N37_6:
+ if attribute(catid) in (100300143, 100300005, 100200193, 100300065, 100300122, 100200067) then goto T37_6;
+ else goto T37_7;
+
+T37_6:
+ response = 0.0327973275;
+ goto D37;
+
+T37_7:
+ response = 0.0538361793;
+ goto D37;
+
+D37:
+
+tnscore = tnscore + response;
+
+ /* Tree 39 of 200 */
+N38_1:
+ if attribute(catid) in (100200171, 100300011, 100300014, 100300013, 100300143, 100200034, 100200186, 100300005, 100300008, 100200068, 100300116, 100200053, 100300019, 100400037, 100200192, 100200170, 100300074, 100300007, 100300200, 100200028, 100200055, 100300214, 100300146) then goto N38_2;
+ else goto N38_5;
+
+N38_2:
+ if attribute(catid) in (100300011, 100300143, 100200186, 100300005, 100300008, 100200068, 100300019, 100200055, 100300214, 100300146) then goto N38_3;
+ else goto N38_4;
+
+N38_3:
+ if attribute(catid) in (100300011, 100300143, 100300005, 100300019, 100200055, 100300214, 100300146) then goto T38_1;
+ else goto T38_2;
+
+T38_1:
+ response = -0.0264171514;
+ goto D38;
+
+T38_2:
+ response = -0.0055670691;
+ goto D38;
+
+N38_4:
+ if attribute(catid) in (100300014, 100300013, 100300116, 100400037, 100200192, 100200170, 100300007) then goto T38_3;
+ else goto T38_4;
+
+T38_3:
+ response = 0.0026101595;
+ goto D38;
+
+T38_4:
+ response = 0.0084034666;
+ goto D38;
+
+N38_5:
+ if attribute(catid) in (0, 100200130, 100300058, 100300077, 100300166, 100400141, 100200052, 100300093, 100300027, 100300121, 100200234, 100300004, 100300126, 100200054, 100300073, 100300122, 100300127, 100400079, 100300169, 100400080, 100200087, 100200176, 100300045, 100300076, 100200067, 100300006, 100200232) then goto N38_6;
+ else goto N38_9;
+
+N38_6:
+ if attribute(catid) in (0, 100300058, 100300077, 100200052, 100300093, 100300027, 100200234, 100200054, 100300073, 100300169, 100300045, 100200067, 100200232) then goto N38_7;
+ else goto N38_8;
+
+N38_7:
+ if attribute(catid) in (100300077, 100300027, 100200234, 100200054, 100300073, 100300169) then goto T38_5;
+ else goto T38_6;
+
+T38_5:
+ response = 0.0136319750;
+ goto D38;
+
+T38_6:
+ response = 0.0160752676;
+ goto D38;
+
+N38_8:
+ if attribute(catid) in (100200130, 100400141, 100300121, 100300127, 100400079, 100200176, 100300076) then goto T38_7;
+ else goto T38_8;
+
+T38_7:
+ response = 0.0182663338;
+ goto D38;
+
+T38_8:
+ response = 0.0220047542;
+ goto D38;
+
+N38_9:
+ if attribute(catid) in (100300102, 100200172, 100400142, 100300065, 100300066, 100200185) then goto T38_9;
+ else goto N38_10;
+
+T38_9:
+ response = 0.0265330650;
+ goto D38;
+
+N38_10:
+ if attribute(catid) in (100300165, 100400038) then goto T38_10;
+ else goto T38_11;
+
+T38_10:
+ response = 0.0364634573;
+ goto D38;
+
+T38_11:
+ response = 0.0690252268;
+ goto D38;
+
+D38:
+
+tnscore = tnscore + response;
+
+ /* Tree 40 of 200 */
+N39_1:
+ if attribute(catid) in (0, 100200171, 100300077, 100300166, 100200034, 100400141, 100300165, 100200052, 100300093, 100200172, 100200068, 100300032, 100300027, 100200234, 100200053, 100300004, 100300126, 100300073, 100200193, 100400038, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100300076, 100200067, 100200055, 100300006, 100300214) then goto N39_2;
+ else goto N39_7;
+
+N39_2:
+ if attribute(catid) in (100400141, 100300165, 100300032, 100300004, 100300126, 100200193, 100300007, 100200176, 100200067, 100200055, 100300006, 100300214) then goto N39_3;
+ else goto N39_4;
+
+N39_3:
+ if attribute(catid) in (100300032, 100300126, 100300007, 100200176, 100200067, 100200055, 100300006, 100300214) then goto T39_1;
+ else goto T39_2;
+
+T39_1:
+ response = -0.0053479534;
+ goto D39;
+
+T39_2:
+ response = 0.0054148332;
+ goto D39;
+
+N39_4:
+ if attribute(catid) in (0, 100300077, 100200068, 100300027, 100200234, 100200053, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100300076) then goto N39_5;
+ else goto N39_6;
+
+N39_5:
+ if attribute(catid) in (100200068, 100200053, 100300065, 100300209, 100200170, 100300169, 100300076) then goto T39_3;
+ else goto T39_4;
+
+T39_3:
+ response = 0.0124266534;
+ goto D39;
+
+T39_4:
+ response = 0.0155941575;
+ goto D39;
+
+N39_6:
+ if attribute(catid) in (100300166, 100200034, 100300093, 100200172, 100300073) then goto T39_5;
+ else goto T39_6;
+
+T39_5:
+ response = 0.0190612693;
+ goto D39;
+
+T39_6:
+ response = 0.0217608552;
+ goto D39;
+
+N39_7:
+ if attribute(catid) in (100200130, 100300014, 100300058, 100300013, 100200186, 100300005, 100300116, 100300121, 100300019, 100400142, 100300045, 100200028, 100200185) then goto N39_8;
+ else goto T39_9;
+
+N39_8:
+ if attribute(catid) in (100300058, 100300013, 100200186, 100300005, 100300116, 100300121, 100300019, 100300045) then goto T39_7;
+ else goto T39_8;
+
+T39_7:
+ response = 0.0276398014;
+ goto D39;
+
+T39_8:
+ response = 0.0339388499;
+ goto D39;
+
+T39_9:
+ response = 0.0448102783;
+ goto D39;
+
+D39:
+
+tnscore = tnscore + response;
+
+ /* Tree 41 of 200 */
+N40_1:
+ if attribute(catid) in (100300011, 100300014, 100300058, 100300143, 100200052, 100300005, 100300008, 100200068, 100300032, 100200234, 100200054, 100400037, 100300209, 100300127, 100300169, 100300074, 100300007, 100300076, 100200067, 100300006, 100200232, 100300214) then goto N40_2;
+ else goto N40_5;
+
+N40_2:
+ if attribute(catid) in (100300014, 100300058, 100300005, 100300008, 100200068, 100300032, 100200234, 100300209, 100200067, 100200232, 100300214) then goto N40_3;
+ else goto N40_4;
+
+N40_3:
+ if attribute(catid) in (100300014, 100300005, 100300032, 100300209, 100200067, 100200232, 100300214) then goto T40_1;
+ else goto T40_2;
+
+T40_1:
+ response = -0.0271870843;
+ goto D40;
+
+T40_2:
+ response = -0.0066979774;
+ goto D40;
+
+N40_4:
+ if attribute(catid) in (100300011, 100300143, 100400037, 100300127, 100300007, 100300076, 100300006) then goto T40_3;
+ else goto T40_4;
+
+T40_3:
+ response = 0.0003261718;
+ goto D40;
+
+T40_4:
+ response = 0.0072958932;
+ goto D40;
+
+N40_5:
+ if attribute(catid) in (0, 100200171, 100200130, 100300077, 100300166, 100200186, 100200172, 100300121, 100200053, 100400142, 100300073, 100200192, 100400079, 100400080, 100300066) then goto N40_6;
+ else goto N40_8;
+
+N40_6:
+ if attribute(catid) in (0, 100200130, 100300077, 100200186, 100200172, 100300121, 100300073, 100400079, 100400080) then goto N40_7;
+ else goto T40_7;
+
+N40_7:
+ if attribute(catid) in (100200130, 100200172, 100300121, 100300073, 100400079, 100400080) then goto T40_5;
+ else goto T40_6;
+
+T40_5:
+ response = 0.0130456694;
+ goto D40;
+
+T40_6:
+ response = 0.0171286061;
+ goto D40;
+
+T40_7:
+ response = 0.0214322678;
+ goto D40;
+
+N40_8:
+ if attribute(catid) in (100400141, 100300165, 100300093, 100300102, 100300027, 100300116, 100300126, 100400038, 100300065, 100300122, 100200087, 100300045, 100200028) then goto N40_9;
+ else goto T40_10;
+
+N40_9:
+ if attribute(catid) in (100400141, 100300165, 100300102, 100300027, 100200087, 100300045) then goto T40_8;
+ else goto T40_9;
+
+T40_8:
+ response = 0.0264983702;
+ goto D40;
+
+T40_9:
+ response = 0.0310587203;
+ goto D40;
+
+T40_10:
+ response = 0.0435590971;
+ goto D40;
+
+D40:
+
+tnscore = tnscore + response;
+
+ /* Tree 42 of 200 */
+N41_1:
+ if attribute(catid) in (100300013, 100200034, 100300093, 100300102, 100300008, 100200068, 100300032, 100200234, 100300019, 100300004, 100300212, 100300209, 100400079, 100200170, 100300169, 100300200, 100200028, 100300076, 100200067, 100200055, 100200185, 100300006) then goto N41_2;
+ else goto N41_4;
+
+N41_2:
+ if attribute(catid) in (100300013, 100300102, 100300032, 100300019, 100300212, 100300209, 100300200, 100200067, 100200055) then goto T41_1;
+ else goto N41_3;
+
+T41_1:
+ response = -0.0292043593;
+ goto D41;
+
+N41_3:
+ if attribute(catid) in (100300093, 100300008, 100200234, 100300004, 100300076, 100200185, 100300006) then goto T41_2;
+ else goto T41_3;
+
+T41_2:
+ response = -0.0009351701;
+ goto D41;
+
+T41_3:
+ response = 0.0074356232;
+ goto D41;
+
+N41_4:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300143, 100200186, 100300027, 100300116, 100200053, 100400142, 100300073, 100200193, 100400038, 100200192, 100300122, 100400080, 100200087, 100300074, 100300066, 100200176, 100300045) then goto N41_5;
+ else goto N41_6;
+
+N41_5:
+ if attribute(catid) in (0, 100300058, 100200186, 100300027, 100300116, 100200087, 100200176) then goto T41_4;
+ else goto T41_5;
+
+T41_4:
+ response = 0.0139006166;
+ goto D41;
+
+T41_5:
+ response = 0.0181039982;
+ goto D41;
+
+N41_6:
+ if attribute(catid) in (100300014, 100300077, 100400141, 100300165, 100200052, 100300005, 100200172, 100300121, 100300126, 100300065, 100300007) then goto T41_6;
+ else goto T41_7;
+
+T41_6:
+ response = 0.0233872084;
+ goto D41;
+
+T41_7:
+ response = 0.0318278949;
+ goto D41;
+
+D41:
+
+tnscore = tnscore + response;
+
+ /* Tree 43 of 200 */
+N42_1:
+ if attribute(catid) in (100300013, 100300008, 100200068, 100300121, 100300019, 100300004, 100200054, 100400038, 100300212, 100200028, 100200185, 100200232) then goto N42_2;
+ else goto N42_3;
+
+N42_2:
+ if attribute(catid) in (100300013, 100300008, 100300019, 100300212, 100200185) then goto T42_1;
+ else goto T42_2;
+
+T42_1:
+ response = -0.0407457887;
+ goto D42;
+
+T42_2:
+ response = -0.0116755527;
+ goto D42;
+
+N42_3:
+ if attribute(catid) in (0, 100200171, 100300058, 100300166, 100200034, 100200186, 100200052, 100300005, 100300027, 100300116, 100200053, 100300126, 100400142, 100300073, 100400037, 100200192, 100300065, 100300209, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100200176, 100300006) then goto N42_4;
+ else goto N42_7;
+
+N42_4:
+ if attribute(catid) in (100200171, 100200034, 100200052, 100300005, 100200053, 100300073, 100400037, 100300209, 100400079, 100400080, 100300074) then goto N42_5;
+ else goto N42_6;
+
+N42_5:
+ if attribute(catid) in (100200171, 100200034, 100300005, 100200053, 100300209, 100400079) then goto T42_3;
+ else goto T42_4;
+
+T42_3:
+ response = 0.0049667166;
+ goto D42;
+
+T42_4:
+ response = 0.0103313635;
+ goto D42;
+
+N42_6:
+ if attribute(catid) in (100200186, 100400142, 100300065, 100300169, 100200087, 100300066, 100200176) then goto T42_5;
+ else goto T42_6;
+
+T42_5:
+ response = 0.0145292773;
+ goto D42;
+
+T42_6:
+ response = 0.0169648891;
+ goto D42;
+
+N42_7:
+ if attribute(catid) in (100200130, 100300014, 100300077, 100300143, 100400141, 100300165, 100200172, 100200193, 100300122, 100300127, 100300200) then goto N42_8;
+ else goto T42_9;
+
+N42_8:
+ if attribute(catid) in (100200130, 100300143, 100400141, 100300122, 100300127) then goto T42_7;
+ else goto T42_8;
+
+T42_7:
+ response = 0.0211036464;
+ goto D42;
+
+T42_8:
+ response = 0.0257964434;
+ goto D42;
+
+T42_9:
+ response = 0.0412799006;
+ goto D42;
+
+D42:
+
+tnscore = tnscore + response;
+
+ /* Tree 44 of 200 */
+N43_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300166, 100300143, 100200034, 100200186, 100400141, 100300093, 100300005, 100200172, 100300008, 100300027, 100300121, 100300019, 100300004, 100300126, 100200054, 100300073, 100400037, 100200192, 100300065, 100300212, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300007, 100300200, 100200028, 100200185, 100300006) then goto N43_2;
+ else goto N43_6;
+
+N43_2:
+ if attribute(catid) in (100300143, 100300093, 100300005, 100300008, 100300019, 100300212, 100200028, 100200185, 100300006) then goto T43_1;
+ else goto N43_3;
+
+T43_1:
+ response = -0.0120071553;
+ goto D43;
+
+N43_3:
+ if attribute(catid) in (100200171, 100300166, 100200186, 100400141, 100200172, 100300027, 100300121, 100300004, 100200054, 100300073, 100400079, 100200170, 100200087, 100300074) then goto N43_4;
+ else goto N43_5;
+
+N43_4:
+ if attribute(catid) in (100200186, 100300004, 100200054, 100300073, 100200087) then goto T43_2;
+ else goto T43_3;
+
+T43_2:
+ response = 0.0078585148;
+ goto D43;
+
+T43_3:
+ response = 0.0109817855;
+ goto D43;
+
+N43_5:
+ if attribute(catid) in (0, 100200130, 100300065, 100400080) then goto T43_4;
+ else goto T43_5;
+
+T43_4:
+ response = 0.0142642384;
+ goto D43;
+
+T43_5:
+ response = 0.0175222293;
+ goto D43;
+
+N43_6:
+ if attribute(catid) in (100300011, 100300014, 100300013, 100200052, 100200068, 100200234, 100200053, 100400038, 100300122, 100300127, 100300066, 100200176, 100300045, 100300076) then goto N43_7;
+ else goto N43_8;
+
+N43_7:
+ if attribute(catid) in (100300014, 100300013, 100200068, 100200234, 100200053, 100300127, 100200176, 100300076) then goto T43_6;
+ else goto T43_7;
+
+T43_6:
+ response = 0.0221804998;
+ goto D43;
+
+T43_7:
+ response = 0.0265637670;
+ goto D43;
+
+N43_8:
+ if attribute(catid) in (100300077, 100300165, 100400142, 100200232) then goto T43_8;
+ else goto T43_9;
+
+T43_8:
+ response = 0.0309690505;
+ goto D43;
+
+T43_9:
+ response = 0.0459150714;
+ goto D43;
+
+D43:
+
+tnscore = tnscore + response;
+
+ /* Tree 45 of 200 */
+N44_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300166, 100300143, 100200186, 100300165, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100200234, 100200053, 100300019, 100300004, 100300126, 100400142, 100400037, 100200193, 100400038, 100200192, 100300065, 100300212, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100300007, 100200176, 100300200, 100300076, 100200232, 100300146) then goto N44_2;
+ else goto N44_7;
+
+N44_2:
+ if attribute(catid) in (100300014, 100300102, 100200068, 100300032, 100300121, 100300019, 100300004, 100400142, 100200193, 100400038, 100300212, 100300127, 100400079, 100200170, 100300074, 100200176, 100300200, 100300076, 100200232, 100300146) then goto N44_3;
+ else goto N44_4;
+
+N44_3:
+ if attribute(catid) in (100300102, 100200068, 100300032, 100300019, 100300212, 100200176, 100200232) then goto T44_1;
+ else goto T44_2;
+
+T44_1:
+ response = -0.0148052713;
+ goto D44;
+
+T44_2:
+ response = 0.0048766529;
+ goto D44;
+
+N44_4:
+ if attribute(catid) in (100200171, 100300165, 100200052, 100300005, 100200172, 100300116, 100200234, 100200192, 100300169) then goto N44_5;
+ else goto N44_6;
+
+N44_5:
+ if attribute(catid) in (100200052, 100300116, 100200234, 100200192, 100300169) then goto T44_3;
+ else goto T44_4;
+
+T44_3:
+ response = 0.0099622919;
+ goto D44;
+
+T44_4:
+ response = 0.0138380378;
+ goto D44;
+
+N44_6:
+ if attribute(catid) in (0, 100300027, 100300126, 100300066) then goto T44_5;
+ else goto T44_6;
+
+T44_5:
+ response = 0.0153805374;
+ goto D44;
+
+T44_6:
+ response = 0.0183919749;
+ goto D44;
+
+N44_7:
+ if attribute(catid) in (100300011, 100300077, 100400141, 100300093, 100300073, 100200087, 100200028, 100200185) then goto T44_7;
+ else goto T44_8;
+
+T44_7:
+ response = 0.0247957566;
+ goto D44;
+
+T44_8:
+ response = 0.0395124104;
+ goto D44;
+
+D44:
+
+tnscore = tnscore + response;
+
+ /* Tree 46 of 200 */
+N45_1:
+ if attribute(catid) in (100300011, 100200130, 100300014, 100300058, 100200034, 100200186, 100300008, 100200068, 100300073, 100400038, 100300065, 100300127, 100400079, 100400080, 100200087, 100200176, 100200028, 100200067, 100200055) then goto N45_2;
+ else goto N45_4;
+
+N45_2:
+ if attribute(catid) in (100300014, 100300058, 100200034, 100300008, 100200068, 100200087, 100200028, 100200067, 100200055) then goto T45_1;
+ else goto N45_3;
+
+T45_1:
+ response = -0.0102104476;
+ goto D45;
+
+N45_3:
+ if attribute(catid) in (100300011, 100200186, 100400038, 100300127) then goto T45_2;
+ else goto T45_3;
+
+T45_2:
+ response = 0.0006233907;
+ goto D45;
+
+T45_3:
+ response = 0.0069244113;
+ goto D45;
+
+N45_4:
+ if attribute(catid) in (0, 100200171, 100300077, 100300166, 100300143, 100400141, 100300165, 100300093, 100300005, 100200172, 100300032, 100300027, 100300116, 100300121, 100200053, 100400142, 100200193, 100200192, 100200170, 100300169, 100300074, 100300066, 100300007, 100300200, 100300045, 100300076) then goto N45_5;
+ else goto N45_8;
+
+N45_5:
+ if attribute(catid) in (0, 100300077, 100300166, 100300143, 100300165, 100300005, 100200172, 100300032, 100300027, 100200053, 100200192, 100300045, 100300076) then goto N45_6;
+ else goto N45_7;
+
+N45_6:
+ if attribute(catid) in (100300077, 100300143, 100300165, 100200053, 100200192, 100300045) then goto T45_4;
+ else goto T45_5;
+
+T45_4:
+ response = 0.0146529601;
+ goto D45;
+
+T45_5:
+ response = 0.0167435205;
+ goto D45;
+
+N45_7:
+ if attribute(catid) in (100200171, 100400141, 100300116, 100200170, 100300074, 100300007, 100300200) then goto T45_6;
+ else goto T45_7;
+
+T45_6:
+ response = 0.0216207477;
+ goto D45;
+
+T45_7:
+ response = 0.0252995150;
+ goto D45;
+
+N45_8:
+ if attribute(catid) in (100200052, 100300102, 100200234, 100300004, 100300126, 100300122, 100200185, 100300006, 100200232) then goto T45_8;
+ else goto T45_9;
+
+T45_8:
+ response = 0.0342788593;
+ goto D45;
+
+T45_9:
+ response = 0.0559275992;
+ goto D45;
+
+D45:
+
+tnscore = tnscore + response;
+
+ /* Tree 47 of 200 */
+N46_1:
+ if attribute(catid) in (100200171, 100300011, 100200130, 100300013, 100400141, 100200052, 100300102, 100300008, 100200068, 100300027, 100300116, 100200234, 100200054, 100300073, 100400037, 100300065, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100300074, 100200176, 100200185, 100300214) then goto N46_2;
+ else goto N46_4;
+
+N46_2:
+ if attribute(catid) in (100300008, 100300027, 100400037, 100300074, 100200176, 100200185, 100300214) then goto T46_1;
+ else goto N46_3;
+
+T46_1:
+ response = -0.0102961911;
+ goto D46;
+
+N46_3:
+ if attribute(catid) in (100200171, 100300011, 100200052, 100300102, 100200068, 100300116, 100200234, 100300065, 100300209, 100400079, 100300169) then goto T46_2;
+ else goto T46_3;
+
+T46_2:
+ response = 0.0058614005;
+ goto D46;
+
+T46_3:
+ response = 0.0117994941;
+ goto D46;
+
+N46_4:
+ if attribute(catid) in (0, 100300014, 100300166, 100200186, 100300165, 100300005, 100300032, 100300004, 100300126, 100400142, 100200192, 100400080, 100200087, 100300200, 100200028, 100200067, 100300006) then goto N46_5;
+ else goto N46_6;
+
+N46_5:
+ if attribute(catid) in (0, 100300166, 100200186, 100300005, 100300032, 100300126, 100400142, 100200067) then goto T46_4;
+ else goto T46_5;
+
+T46_4:
+ response = 0.0176102969;
+ goto D46;
+
+T46_5:
+ response = 0.0199567396;
+ goto D46;
+
+N46_6:
+ if attribute(catid) in (100300058, 100300077, 100300143, 100200034, 100300093, 100200172, 100300121, 100300019, 100400038, 100300212, 100300066, 100300076, 100200232) then goto N46_7;
+ else goto T46_8;
+
+N46_7:
+ if attribute(catid) in (100300077, 100300093, 100200172, 100300019, 100300066, 100300076, 100200232) then goto T46_6;
+ else goto T46_7;
+
+T46_6:
+ response = 0.0280698840;
+ goto D46;
+
+T46_7:
+ response = 0.0349016561;
+ goto D46;
+
+T46_8:
+ response = 0.0778635274;
+ goto D46;
+
+D46:
+
+tnscore = tnscore + response;
+
+ /* Tree 48 of 200 */
+N47_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300013, 100300077, 100300166, 100300143, 100200034, 100400141, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300116, 100300121, 100200234, 100200053, 100300019, 100300004, 100300126, 100300073, 100200193, 100400038, 100200192, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100200176, 100300200, 100200028, 100300076, 100200055, 100200185, 100200232, 100300146) then goto N47_2;
+ else goto N47_6;
+
+N47_2:
+ if attribute(catid) in (100200171, 100300011, 100400141, 100300102, 100300005, 100300008, 100200068, 100300116, 100200053, 100300019, 100300004, 100400038, 100300212, 100300209, 100400079, 100200176, 100300200, 100200028, 100300076, 100200055, 100200185, 100300146) then goto N47_3;
+ else goto N47_4;
+
+N47_3:
+ if attribute(catid) in (100300011, 100300019, 100300209, 100300200, 100200028, 100300076, 100200055, 100200185) then goto T47_1;
+ else goto T47_2;
+
+T47_1:
+ response = -0.0255215536;
+ goto D47;
+
+T47_2:
+ response = 0.0049014532;
+ goto D47;
+
+N47_4:
+ if attribute(catid) in (0, 100300013, 100300166, 100300143, 100200034, 100300121, 100300126, 100200192, 100300122, 100300127, 100300169, 100300074, 100300066) then goto N47_5;
+ else goto T47_5;
+
+N47_5:
+ if attribute(catid) in (100300013, 100300143, 100200192, 100300127, 100300074, 100300066) then goto T47_3;
+ else goto T47_4;
+
+T47_3:
+ response = 0.0096441943;
+ goto D47;
+
+T47_4:
+ response = 0.0132971959;
+ goto D47;
+
+T47_5:
+ response = 0.0172127947;
+ goto D47;
+
+N47_6:
+ if attribute(catid) in (100300014, 100200186, 100300093, 100300027, 100400142, 100400037, 100200087, 100300006) then goto T47_6;
+ else goto N47_7;
+
+T47_6:
+ response = 0.0253813497;
+ goto D47;
+
+N47_7:
+ if attribute(catid) in (100300165, 100300007, 100300045) then goto T47_7;
+ else goto T47_8;
+
+T47_7:
+ response = 0.0318318618;
+ goto D47;
+
+T47_8:
+ response = 0.0425817751;
+ goto D47;
+
+D47:
+
+tnscore = tnscore + response;
+
+ /* Tree 49 of 200 */
+N48_1:
+ if attribute(catid) in (100300058, 100300013, 100300077, 100200186, 100400141, 100300165, 100200052, 100300005, 100200068, 100300116, 100200234, 100300019, 100300126, 100200054, 100400038, 100300212, 100300169, 100300074, 100300066, 100300007, 100300200, 100300006, 100200232, 100300214, 100300146) then goto N48_2;
+ else goto N48_5;
+
+N48_2:
+ if attribute(catid) in (100300013, 100400141, 100300005, 100300116, 100200234, 100300019, 100300212, 100300200, 100200232, 100300214, 100300146) then goto N48_3;
+ else goto N48_4;
+
+N48_3:
+ if attribute(catid) in (100300013, 100300005, 100300019, 100300200, 100200232, 100300214, 100300146) then goto T48_1;
+ else goto T48_2;
+
+T48_1:
+ response = -0.0378281153;
+ goto D48;
+
+T48_2:
+ response = -0.0106433322;
+ goto D48;
+
+N48_4:
+ if attribute(catid) in (100300165, 100200068, 100300126, 100400038, 100300074, 100300007) then goto T48_3;
+ else goto T48_4;
+
+T48_3:
+ response = 0.0013709167;
+ goto D48;
+
+T48_4:
+ response = 0.0079886834;
+ goto D48;
+
+N48_5:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100200034, 100300102, 100200172, 100300008, 100300027, 100300121, 100200053, 100300004, 100400142, 100300073, 100200192, 100300065, 100300122, 100300127, 100400079, 100200170, 100400080, 100200087, 100200028, 100300076) then goto N48_6;
+ else goto N48_8;
+
+N48_6:
+ if attribute(catid) in (100200130, 100300014, 100200034, 100300008, 100300027, 100300121, 100200053, 100400142, 100300073, 100200192, 100300122, 100300127, 100200170, 100400080, 100200087) then goto T48_5;
+ else goto N48_7;
+
+T48_5:
+ response = 0.0145425948;
+ goto D48;
+
+N48_7:
+ if attribute(catid) in (0, 100300004) then goto T48_6;
+ else goto T48_7;
+
+T48_6:
+ response = 0.0195574674;
+ goto D48;
+
+T48_7:
+ response = 0.0232919623;
+ goto D48;
+
+N48_8:
+ if attribute(catid) in (100300011, 100300166, 100200176, 100300045, 100200185) then goto T48_8;
+ else goto T48_9;
+
+T48_8:
+ response = 0.0300271939;
+ goto D48;
+
+T48_9:
+ response = 0.0494949990;
+ goto D48;
+
+D48:
+
+tnscore = tnscore + response;
+
+ /* Tree 50 of 200 */
+N49_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100200068, 100300032, 100300116, 100300121, 100200053, 100300019, 100300004, 100400142, 100300073, 100400037, 100200193, 100400038, 100200192, 100300212, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100300200, 100300045, 100200028, 100300076, 100200067, 100200055, 100300214, 100300146) then goto N49_2;
+ else goto N49_7;
+
+N49_2:
+ if attribute(catid) in (100300014, 100300058, 100300077, 100400141, 100300165, 100300102, 100200068, 100300032, 100300116, 100300019, 100300004, 100400142, 100400037, 100300212, 100300127, 100200170, 100400080, 100300007, 100300045, 100200028, 100200055, 100300214, 100300146) then goto N49_3;
+ else goto N49_5;
+
+N49_3:
+ if attribute(catid) in (100300102, 100300032, 100300019, 100300004, 100400037, 100300212, 100200055, 100300214, 100300146) then goto T49_1;
+ else goto N49_4;
+
+T49_1:
+ response = -0.0237347370;
+ goto D49;
+
+N49_4:
+ if attribute(catid) in (100300014, 100300058, 100300165, 100200068, 100300116, 100300127, 100400080, 100300007, 100200028) then goto T49_2;
+ else goto T49_3;
+
+T49_2:
+ response = 0.0013606160;
+ goto D49;
+
+T49_3:
+ response = 0.0065228229;
+ goto D49;
+
+N49_5:
+ if attribute(catid) in (0, 100200171, 100200130, 100300166, 100200034, 100300005, 100200172, 100300121, 100200193, 100200192, 100400079, 100300169, 100300200) then goto N49_6;
+ else goto T49_6;
+
+N49_6:
+ if attribute(catid) in (0, 100200034, 100200172, 100200193, 100200192, 100300169) then goto T49_4;
+ else goto T49_5;
+
+T49_4:
+ response = 0.0131275051;
+ goto D49;
+
+T49_5:
+ response = 0.0156511717;
+ goto D49;
+
+T49_6:
+ response = 0.0216393464;
+ goto D49;
+
+N49_7:
+ if attribute(catid) in (100300011, 100300143, 100300008, 100300027, 100300126, 100200054, 100300065, 100300209, 100200176, 100200232) then goto T49_7;
+ else goto T49_8;
+
+T49_7:
+ response = 0.0329759178;
+ goto D49;
+
+T49_8:
+ response = 0.0612562214;
+ goto D49;
+
+D49:
+
+tnscore = tnscore + response;
+
+ /* Tree 51 of 200 */
+N50_1:
+ if attribute(catid) in (100300014, 100300058, 100300077, 100300166, 100400141, 100300165, 100200052, 100300093, 100200172, 100300027, 100300116, 100300121, 100200053, 100300126, 100200054, 100300073, 100200193, 100200192, 100300212, 100300209, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100300007, 100300200, 100300076, 100200185, 100300214, 100300146) then goto N50_2;
+ else goto N50_5;
+
+N50_2:
+ if attribute(catid) in (100300014, 100300166, 100200052, 100300093, 100300116, 100300121, 100200193, 100300212, 100300209, 100400080, 100300007, 100200185, 100300214) then goto N50_3;
+ else goto N50_4;
+
+N50_3:
+ if attribute(catid) in (100300014, 100300121, 100300212, 100300209, 100200185, 100300214) then goto T50_1;
+ else goto T50_2;
+
+T50_1:
+ response = -0.0165119187;
+ goto D50;
+
+T50_2:
+ response = 0.0001429856;
+ goto D50;
+
+N50_4:
+ if attribute(catid) in (100300058, 100300077, 100300165, 100200172, 100200053, 100300126, 100200054, 100200170, 100300074, 100300200, 100300146) then goto T50_3;
+ else goto T50_4;
+
+T50_3:
+ response = 0.0055114285;
+ goto D50;
+
+T50_4:
+ response = 0.0104192793;
+ goto D50;
+
+N50_5:
+ if attribute(catid) in (0, 100200171, 100200130, 100200034, 100200186, 100300005, 100300008, 100200068, 100200234, 100400142, 100400038, 100300065, 100300045, 100300006) then goto N50_6;
+ else goto N50_7;
+
+N50_6:
+ if attribute(catid) in (0, 100200130, 100200034, 100300005, 100400142, 100300065, 100300045, 100300006) then goto T50_5;
+ else goto T50_6;
+
+T50_5:
+ response = 0.0161701482;
+ goto D50;
+
+T50_6:
+ response = 0.0225059966;
+ goto D50;
+
+N50_7:
+ if attribute(catid) in (100300013, 100300143, 100300004, 100300122, 100200087) then goto T50_7;
+ else goto T50_8;
+
+T50_7:
+ response = 0.0319347909;
+ goto D50;
+
+T50_8:
+ response = 0.0503173002;
+ goto D50;
+
+D50:
+
+tnscore = tnscore + response;
+
+ /* Tree 52 of 200 */
+N51_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300013, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300116, 100200053, 100300019, 100300004, 100200054, 100300073, 100400038, 100200192, 100300212, 100300122, 100300127, 100400079, 100200170, 100300169, 100200087, 100300074, 100300066, 100300007, 100200176, 100300045, 100200028, 100200067, 100200232) then goto N51_2;
+ else goto N51_6;
+
+N51_2:
+ if attribute(catid) in (100200171, 100300013, 100300143, 100200034, 100300102, 100300005, 100300008, 100200068, 100300116, 100300019, 100200054, 100400038, 100300212, 100300122, 100300169, 100200087, 100300074, 100300045, 100200067, 100200232) then goto N51_3;
+ else goto N51_5;
+
+N51_3:
+ if attribute(catid) in (100300143, 100200034, 100300102, 100300008, 100300019, 100300212, 100200067) then goto T51_1;
+ else goto N51_4;
+
+T51_1:
+ response = -0.0135611192;
+ goto D51;
+
+N51_4:
+ if attribute(catid) in (100300013, 100200068, 100300116, 100200054, 100300122) then goto T51_2;
+ else goto T51_3;
+
+T51_2:
+ response = 0.0015450909;
+ goto D51;
+
+T51_3:
+ response = 0.0074784769;
+ goto D51;
+
+N51_5:
+ if attribute(catid) in (100200130, 100300014, 100300058, 100200172, 100300027, 100200053, 100300004, 100300073, 100200192, 100200028) then goto T51_4;
+ else goto T51_5;
+
+T51_4:
+ response = 0.0109788633;
+ goto D51;
+
+T51_5:
+ response = 0.0166053500;
+ goto D51;
+
+N51_6:
+ if attribute(catid) in (100300165, 100200234, 100400142, 100300065, 100400080, 100300076, 100200185, 100300006) then goto N51_7;
+ else goto T51_8;
+
+N51_7:
+ if attribute(catid) in (100200234, 100300065, 100400080, 100300076) then goto T51_6;
+ else goto T51_7;
+
+T51_6:
+ response = 0.0245937925;
+ goto D51;
+
+T51_7:
+ response = 0.0299316682;
+ goto D51;
+
+T51_8:
+ response = 0.0434718302;
+ goto D51;
+
+D51:
+
+tnscore = tnscore + response;
+
+ /* Tree 53 of 200 */
+N52_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300077, 100300166, 100200186, 100200052, 100300093, 100300005, 100200172, 100200068, 100300027, 100300116, 100300121, 100200234, 100300019, 100300004, 100200054, 100300073, 100400037, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300074, 100300066, 100300200, 100300045, 100200067, 100200055, 100300006, 100200232, 100300214, 100300146) then goto N52_2;
+ else goto N52_7;
+
+N52_2:
+ if attribute(catid) in (100300011, 100300014, 100300077, 100200186, 100200172, 100200068, 100300027, 100300116, 100300121, 100200234, 100300019, 100300073, 100400037, 100300212, 100400079, 100300200, 100200067, 100200055, 100300006, 100200232, 100300214, 100300146) then goto N52_3;
+ else goto N52_5;
+
+N52_3:
+ if attribute(catid) in (100200234, 100300019, 100300212, 100300200, 100200067, 100200055, 100200232, 100300214) then goto T52_1;
+ else goto N52_4;
+
+T52_1:
+ response = -0.0132138062;
+ goto D52;
+
+N52_4:
+ if attribute(catid) in (100300011, 100300014, 100200186, 100200068, 100300027, 100300116, 100300121, 100400037, 100300006, 100300146) then goto T52_2;
+ else goto T52_3;
+
+T52_2:
+ response = 0.0019211021;
+ goto D52;
+
+T52_3:
+ response = 0.0071648202;
+ goto D52;
+
+N52_5:
+ if attribute(catid) in (0, 100200171, 100200130, 100200052, 100300093, 100300005, 100200054, 100200192, 100300065, 100300209, 100200170, 100300066, 100300045) then goto N52_6;
+ else goto T52_6;
+
+N52_6:
+ if attribute(catid) in (100200171, 100200130, 100200052, 100300093, 100300005, 100300209, 100300066, 100300045) then goto T52_4;
+ else goto T52_5;
+
+T52_4:
+ response = 0.0114878654;
+ goto D52;
+
+T52_5:
+ response = 0.0141002634;
+ goto D52;
+
+T52_6:
+ response = 0.0189217722;
+ goto D52;
+
+N52_7:
+ if attribute(catid) in (100300143, 100300165, 100300102, 100300008, 100200053, 100400142, 100200193, 100400038, 100300122, 100300127, 100300169, 100400080, 100200087, 100200176, 100200185) then goto N52_8;
+ else goto N52_9;
+
+N52_8:
+ if attribute(catid) in (100300143, 100300165, 100300102, 100200053, 100400142, 100300127, 100200176) then goto T52_7;
+ else goto T52_8;
+
+T52_7:
+ response = 0.0249929297;
+ goto D52;
+
+T52_8:
+ response = 0.0287505728;
+ goto D52;
+
+N52_9:
+ if attribute(catid) in (100300058, 100200034, 100400141, 100200028) then goto T52_9;
+ else goto T52_10;
+
+T52_9:
+ response = 0.0405244074;
+ goto D52;
+
+T52_10:
+ response = 0.0625787358;
+ goto D52;
+
+D52:
+
+tnscore = tnscore + response;
+
+ /* Tree 54 of 200 */
+N53_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100200034, 100200052, 100300093, 100300102, 100300005, 100300008, 100300019, 100300004, 100300126, 100200054, 100300073, 100200193, 100400038, 100200192, 100300065, 100300212, 100300209, 100200170, 100300169, 100300066, 100300200, 100200028, 100200067, 100200232, 100300214) then goto N53_2;
+ else goto N53_5;
+
+N53_2:
+ if attribute(catid) in (100300014, 100200034, 100300093, 100300102, 100300008, 100300019, 100300004, 100300126, 100200054, 100300212, 100300209, 100300200, 100200028, 100200067, 100200232, 100300214) then goto N53_3;
+ else goto N53_4;
+
+N53_3:
+ if attribute(catid) in (100300014, 100300008, 100300019, 100200054, 100300209, 100200067, 100200232, 100300214) then goto T53_1;
+ else goto T53_2;
+
+T53_1:
+ response = -0.0251474123;
+ goto D53;
+
+T53_2:
+ response = -0.0003413433;
+ goto D53;
+
+N53_4:
+ if attribute(catid) in (100200130, 100200052, 100300073, 100200192, 100200170, 100300169, 100300066) then goto T53_3;
+ else goto T53_4;
+
+T53_3:
+ response = 0.0080094607;
+ goto D53;
+
+T53_4:
+ response = 0.0126070285;
+ goto D53;
+
+N53_5:
+ if attribute(catid) in (100300011, 100300077, 100300166, 100300143, 100200186, 100400141, 100200068, 100300116, 100300121, 100200053, 100400142, 100400037, 100400079, 100400080, 100200087, 100300074, 100300045) then goto N53_6;
+ else goto N53_7;
+
+N53_6:
+ if attribute(catid) in (100300011, 100300077, 100300143, 100200186, 100400141, 100200053, 100400142, 100400079, 100400080, 100200087, 100300045) then goto T53_5;
+ else goto T53_6;
+
+T53_5:
+ response = 0.0176471308;
+ goto D53;
+
+T53_6:
+ response = 0.0208465659;
+ goto D53;
+
+N53_7:
+ if attribute(catid) in (100300165, 100200172, 100200234, 100300122, 100200176) then goto T53_7;
+ else goto T53_8;
+
+T53_7:
+ response = 0.0268188222;
+ goto D53;
+
+T53_8:
+ response = 0.0367255273;
+ goto D53;
+
+D53:
+
+tnscore = tnscore + response;
+
+ /* Tree 55 of 200 */
+N54_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300077, 100300166, 100300143, 100200186, 100400141, 100300093, 100300102, 100200172, 100300008, 100300027, 100300121, 100200234, 100200053, 100300019, 100300004, 100300073, 100400037, 100200193, 100300065, 100300212, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300200, 100300045, 100200028, 100300076, 100200067, 100200055, 100200185, 100300006, 100200232, 100300214, 100300146) then goto N54_2;
+ else goto N54_6;
+
+N54_2:
+ if attribute(catid) in (100300014, 100300093, 100300102, 100300008, 100200053, 100300019, 100300004, 100300073, 100300212, 100300127, 100300200, 100200028, 100300076, 100200067, 100200055, 100300006, 100200232, 100300214, 100300146) then goto N54_3;
+ else goto N54_4;
+
+N54_3:
+ if attribute(catid) in (100300102, 100300008, 100300019, 100200067, 100200055, 100200232, 100300214) then goto T54_1;
+ else goto T54_2;
+
+T54_1:
+ response = -0.0254633193;
+ goto D54;
+
+T54_2:
+ response = 0.0030878168;
+ goto D54;
+
+N54_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300027, 100300121, 100200234, 100200193, 100200170, 100300045) then goto N54_5;
+ else goto T54_5;
+
+N54_5:
+ if attribute(catid) in (100200171, 100200130, 100300058, 100300027, 100300121, 100200234, 100200193, 100300045) then goto T54_3;
+ else goto T54_4;
+
+T54_3:
+ response = 0.0097072082;
+ goto D54;
+
+T54_4:
+ response = 0.0114229146;
+ goto D54;
+
+T54_5:
+ response = 0.0156613592;
+ goto D54;
+
+N54_6:
+ if attribute(catid) in (100200034, 100200052, 100300005, 100200068, 100300032, 100300116, 100400142, 100200054, 100200192, 100300122) then goto T54_6;
+ else goto T54_7;
+
+T54_6:
+ response = 0.0227531664;
+ goto D54;
+
+T54_7:
+ response = 0.0305920398;
+ goto D54;
+
+D54:
+
+tnscore = tnscore + response;
+
+ /* Tree 56 of 200 */
+N55_1:
+ if attribute(catid) in (100300011, 100200034, 100300102, 100300005, 100200068, 100300032, 100300116, 100300019, 100300004, 100200193, 100300212, 100300007, 100300200, 100200028, 100200067, 100200055, 100300006, 100300214, 100300146) then goto N55_2;
+ else goto N55_3;
+
+N55_2:
+ if attribute(catid) in (100300011, 100300102, 100300005, 100300032, 100300019, 100300004, 100200193, 100300200, 100200067, 100200055, 100300214, 100300146) then goto T55_1;
+ else goto T55_2;
+
+T55_1:
+ response = -0.0181634396;
+ goto D55;
+
+T55_2:
+ response = -0.0014727477;
+ goto D55;
+
+N55_3:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300013, 100300077, 100200186, 100400141, 100200052, 100200172, 100300008, 100200234, 100200053, 100400142, 100300073, 100400038, 100200192, 100300065, 100300122, 100200170, 100300169, 100200087, 100300074, 100200176, 100300045, 100300076) then goto N55_4;
+ else goto N55_6;
+
+N55_4:
+ if attribute(catid) in (100300014, 100200186, 100400141, 100200052, 100200234, 100200053, 100400038, 100200192, 100300065, 100300169, 100300074, 100200176, 100300045) then goto T55_3;
+ else goto N55_5;
+
+T55_3:
+ response = 0.0098907776;
+ goto D55;
+
+N55_5:
+ if attribute(catid) in (0, 100200130, 100300013, 100300077, 100200172, 100300008, 100400142, 100300122, 100200170) then goto T55_4;
+ else goto T55_5;
+
+T55_4:
+ response = 0.0138164577;
+ goto D55;
+
+T55_5:
+ response = 0.0170925410;
+ goto D55;
+
+N55_6:
+ if attribute(catid) in (100300166, 100300143, 100300165, 100300093, 100300027, 100300127, 100400079, 100400080, 100300066) then goto T55_6;
+ else goto T55_7;
+
+T55_6:
+ response = 0.0219244924;
+ goto D55;
+
+T55_7:
+ response = 0.0405698900;
+ goto D55;
+
+D55:
+
+tnscore = tnscore + response;
+
+ /* Tree 57 of 200 */
+N56_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300143, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100200068, 100300027, 100300116, 100200053, 100300019, 100300004, 100300126, 100400142, 100300073, 100400037, 100200193, 100400038, 100200192, 100300212, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100200176, 100300200, 100200028, 100300076, 100200055, 100300006, 100200232, 100300146) then goto N56_2;
+ else goto N56_7;
+
+N56_2:
+ if attribute(catid) in (100300143, 100300093, 100300102, 100300116, 100300019, 100300004, 100200193, 100400038, 100300212, 100400080, 100200028, 100200055, 100300006, 100200232, 100300146) then goto N56_3;
+ else goto N56_4;
+
+N56_3:
+ if attribute(catid) in (100300019, 100400038, 100200028, 100200055, 100200232, 100300146) then goto T56_1;
+ else goto T56_2;
+
+T56_1:
+ response = -0.0376899039;
+ goto D56;
+
+T56_2:
+ response = -0.0084354615;
+ goto D56;
+
+N56_4:
+ if attribute(catid) in (100200034, 100200186, 100400141, 100300027, 100300073, 100300122, 100300127, 100400079, 100200170, 100200087, 100300200) then goto T56_3;
+ else goto N56_5;
+
+T56_3:
+ response = 0.0049554661;
+ goto D56;
+
+N56_5:
+ if attribute(catid) in (0, 100200171, 100300058, 100300165, 100300005, 100200172, 100200053, 100300126, 100400142, 100200192, 100300169, 100200176) then goto N56_6;
+ else goto T56_6;
+
+N56_6:
+ if attribute(catid) in (0, 100300058, 100200172, 100200053, 100300126, 100400142, 100200192) then goto T56_4;
+ else goto T56_5;
+
+T56_4:
+ response = 0.0134283205;
+ goto D56;
+
+T56_5:
+ response = 0.0149300488;
+ goto D56;
+
+T56_6:
+ response = 0.0182669992;
+ goto D56;
+
+N56_7:
+ if attribute(catid) in (100300014, 100300077, 100300121, 100200234, 100200054, 100300074, 100300066, 100200185) then goto T56_7;
+ else goto N56_8;
+
+T56_7:
+ response = 0.0260635269;
+ goto D56;
+
+N56_8:
+ if attribute(catid) in (100300011, 100300166) then goto T56_8;
+ else goto T56_9;
+
+T56_8:
+ response = 0.0344146236;
+ goto D56;
+
+T56_9:
+ response = 0.0489908315;
+ goto D56;
+
+D56:
+
+tnscore = tnscore + response;
+
+ /* Tree 58 of 200 */
+N57_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100300008, 100300116, 100200234, 100300126, 100400142, 100300073, 100200192, 100300065, 100300209, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300200, 100300045, 100200028, 100300076, 100200055, 100200185, 100300146) then goto N57_2;
+ else goto N57_5;
+
+N57_2:
+ if attribute(catid) in (100300011, 100300014, 100200034, 100400141, 100300165, 100300008, 100300116, 100200234, 100200192, 100300065, 100400079, 100300169, 100200087, 100300200, 100200028, 100300076, 100200055, 100300146) then goto N57_3;
+ else goto N57_4;
+
+N57_3:
+ if attribute(catid) in (100300014, 100300008, 100400079, 100300200, 100300076, 100200055) then goto T57_1;
+ else goto T57_2;
+
+T57_1:
+ response = -0.0047600269;
+ goto D57;
+
+T57_2:
+ response = 0.0060145343;
+ goto D57;
+
+N57_4:
+ if attribute(catid) in (0, 100300073, 100300209, 100400080, 100300066) then goto T57_3;
+ else goto T57_4;
+
+T57_3:
+ response = 0.0120012047;
+ goto D57;
+
+T57_4:
+ response = 0.0143999679;
+ goto D57;
+
+N57_5:
+ if attribute(catid) in (100200130, 100300058, 100300143, 100200052, 100300093, 100300102, 100200172, 100200068, 100300027, 100300121, 100300004, 100200054, 100400037, 100400038, 100300212, 100300122, 100300127, 100300007, 100200176) then goto N57_6;
+ else goto T57_7;
+
+N57_6:
+ if attribute(catid) in (100300093, 100300102, 100200172, 100200068, 100300027, 100300121, 100400037, 100300122, 100300127, 100200176) then goto T57_5;
+ else goto T57_6;
+
+T57_5:
+ response = 0.0205502481;
+ goto D57;
+
+T57_6:
+ response = 0.0247979152;
+ goto D57;
+
+T57_7:
+ response = 0.0392012352;
+ goto D57;
+
+D57:
+
+tnscore = tnscore + response;
+
+ /* Tree 59 of 200 */
+N58_1:
+ if attribute(catid) in (100300011, 100200130, 100300077, 100400141, 100300165, 100300093, 100300005, 100300008, 100300116, 100200053, 100200193, 100400038, 100200192, 100300209, 100300122, 100200087, 100300074, 100300066, 100200176, 100300045, 100200028, 100200067) then goto N58_2;
+ else goto N58_4;
+
+N58_2:
+ if attribute(catid) in (100300011, 100400141, 100300005, 100300008, 100200193, 100300209, 100300122, 100200087, 100300074, 100200176, 100200067) then goto N58_3;
+ else goto T58_3;
+
+N58_3:
+ if attribute(catid) in (100300011, 100200193, 100300209, 100200087, 100200176, 100200067) then goto T58_1;
+ else goto T58_2;
+
+T58_1:
+ response = -0.0143613312;
+ goto D58;
+
+T58_2:
+ response = -0.0022635925;
+ goto D58;
+
+T58_3:
+ response = 0.0062470659;
+ goto D58;
+
+N58_4:
+ if attribute(catid) in (0, 100300058, 100300166, 100200034, 100200186, 100200052, 100200172, 100200068, 100300027, 100300121, 100200234, 100400142, 100300073, 100400037, 100300127, 100400079, 100200170, 100300169, 100300076, 100300006, 100200232) then goto N58_5;
+ else goto N58_7;
+
+N58_5:
+ if attribute(catid) in (0, 100300166, 100200034, 100200186, 100200052, 100200234, 100400142, 100200170, 100300169, 100300076, 100300006) then goto N58_6;
+ else goto T58_6;
+
+N58_6:
+ if attribute(catid) in (100200034, 100200186, 100200234, 100200170, 100300169, 100300076, 100300006) then goto T58_4;
+ else goto T58_5;
+
+T58_4:
+ response = 0.0121738240;
+ goto D58;
+
+T58_5:
+ response = 0.0142409856;
+ goto D58;
+
+T58_6:
+ response = 0.0181125356;
+ goto D58;
+
+N58_7:
+ if attribute(catid) in (100200171, 100300014, 100300013, 100300019, 100300004, 100300126, 100300065, 100300212, 100400080, 100300007, 100300200) then goto T58_7;
+ else goto T58_8;
+
+T58_7:
+ response = 0.0273051157;
+ goto D58;
+
+T58_8:
+ response = 0.0468409464;
+ goto D58;
+
+D58:
+
+tnscore = tnscore + response;
+
+ /* Tree 60 of 200 */
+N59_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300077, 100300143, 100200034, 100200186, 100400141, 100300093, 100300005, 100200172, 100200068, 100300032, 100300027, 100300116, 100300121, 100200053, 100300019, 100300004, 100200054, 100300073, 100400037, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100200176, 100200028, 100200185, 100300006, 100200232, 100300214, 100300146) then goto N59_2;
+ else goto N59_6;
+
+N59_2:
+ if attribute(catid) in (100300077, 100300143, 100200034, 100300116, 100300019, 100300004, 100400037, 100300212, 100300209, 100300074, 100200028, 100200185, 100300006, 100300214, 100300146) then goto N59_3;
+ else goto N59_4;
+
+N59_3:
+ if attribute(catid) in (100300143, 100300019, 100300004, 100400037, 100300212, 100300209, 100300214, 100300146) then goto T59_1;
+ else goto T59_2;
+
+T59_1:
+ response = -0.0293561273;
+ goto D59;
+
+T59_2:
+ response = -0.0014538622;
+ goto D59;
+
+N59_4:
+ if attribute(catid) in (100200186, 100400141, 100300093, 100300005, 100200172, 100300121, 100200054, 100200192, 100300065, 100200232) then goto T59_3;
+ else goto N59_5;
+
+T59_3:
+ response = 0.0075212689;
+ goto D59;
+
+N59_5:
+ if attribute(catid) in (0, 100200171, 100300058, 100300032, 100200053, 100400079, 100300169) then goto T59_4;
+ else goto T59_5;
+
+T59_4:
+ response = 0.0113283464;
+ goto D59;
+
+T59_5:
+ response = 0.0146230806;
+ goto D59;
+
+N59_6:
+ if attribute(catid) in (100300013, 100300166, 100300165, 100300008, 100200234, 100400142, 100200193, 100400038, 100300122, 100200087, 100300007, 100300076) then goto T59_6;
+ else goto T59_7;
+
+T59_6:
+ response = 0.0245202493;
+ goto D59;
+
+T59_7:
+ response = 0.0398629845;
+ goto D59;
+
+D59:
+
+tnscore = tnscore + response;
+
+ /* Tree 61 of 200 */
+N60_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300013, 100200034, 100200186, 100300165, 100200052, 100300093, 100300102, 100200172, 100300008, 100300032, 100300116, 100200234, 100200053, 100300019, 100300004, 100400142, 100300073, 100400037, 100200192, 100300212, 100300209, 100300127, 100400079, 100300169, 100200087, 100300074, 100300007, 100300045, 100200028, 100200067, 100300006) then goto N60_2;
+ else goto N60_6;
+
+N60_2:
+ if attribute(catid) in (100300011, 100300008, 100200234, 100300019, 100300004, 100400142, 100400037, 100300209, 100200087, 100200028, 100200067, 100300006) then goto N60_3;
+ else goto N60_4;
+
+N60_3:
+ if attribute(catid) in (100300011, 100300008, 100300019, 100300209, 100200087, 100200028, 100200067, 100300006) then goto T60_1;
+ else goto T60_2;
+
+T60_1:
+ response = -0.0125504180;
+ goto D60;
+
+T60_2:
+ response = 0.0005157971;
+ goto D60;
+
+N60_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300165, 100300102, 100300116, 100200053, 100400079, 100300169, 100300074, 100300007, 100300045) then goto N60_5;
+ else goto T60_5;
+
+N60_5:
+ if attribute(catid) in (100200171, 100200130, 100300014, 100300102, 100300116, 100400079, 100300169, 100300074, 100300007, 100300045) then goto T60_3;
+ else goto T60_4;
+
+T60_3:
+ response = 0.0073987391;
+ goto D60;
+
+T60_4:
+ response = 0.0089320166;
+ goto D60;
+
+T60_5:
+ response = 0.0143250256;
+ goto D60;
+
+N60_6:
+ if attribute(catid) in (100300166, 100400141, 100300005, 100200068, 100300027, 100300121, 100300126, 100200054, 100400038, 100300065, 100300122, 100200170, 100400080, 100300066, 100200176, 100300076, 100200185) then goto N60_7;
+ else goto T60_8;
+
+N60_7:
+ if attribute(catid) in (100400141, 100300027, 100300121, 100300126, 100300065, 100200170, 100400080, 100300066) then goto T60_6;
+ else goto T60_7;
+
+T60_6:
+ response = 0.0203552723;
+ goto D60;
+
+T60_7:
+ response = 0.0282273054;
+ goto D60;
+
+T60_8:
+ response = 0.0491434915;
+ goto D60;
+
+D60:
+
+tnscore = tnscore + response;
+
+ /* Tree 62 of 200 */
+N61_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300143, 100200034, 100200186, 100400141, 100200052, 100300005, 100300008, 100300032, 100300027, 100300116, 100300121, 100200234, 100200053, 100300126, 100400142, 100200054, 100300073, 100200193, 100400038, 100200192, 100300212, 100300209, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100200176, 100200067, 100300214, 100300146) then goto N61_2;
+ else goto N61_5;
+
+N61_2:
+ if attribute(catid) in (100200130, 100300058, 100300143, 100400141, 100200052, 100300005, 100300008, 100300032, 100300027, 100300121, 100200234, 100300126, 100200054, 100300073, 100400038, 100300212, 100300209, 100300127, 100200170, 100400080, 100200087, 100200176, 100200067, 100300214, 100300146) then goto N61_3;
+ else goto N61_4;
+
+N61_3:
+ if attribute(catid) in (100300058, 100300143, 100300005, 100300008, 100300032, 100300121, 100200054, 100300212, 100300209, 100200087, 100200176, 100200067, 100300214) then goto T61_1;
+ else goto T61_2;
+
+T61_1:
+ response = -0.0099342652;
+ goto D61;
+
+T61_2:
+ response = 0.0039806749;
+ goto D61;
+
+N61_4:
+ if attribute(catid) in (0, 100400142, 100200193, 100400079, 100300169) then goto T61_3;
+ else goto T61_4;
+
+T61_3:
+ response = 0.0115553152;
+ goto D61;
+
+T61_4:
+ response = 0.0135427680;
+ goto D61;
+
+N61_5:
+ if attribute(catid) in (100300014, 100300077, 100300166, 100200172, 100200068, 100300065, 100300122, 100300074, 100300066, 100300007, 100300200, 100300006) then goto N61_6;
+ else goto N61_7;
+
+N61_6:
+ if attribute(catid) in (100300077, 100200172, 100300065, 100300066, 100300200) then goto T61_5;
+ else goto T61_6;
+
+T61_5:
+ response = 0.0169304303;
+ goto D61;
+
+T61_6:
+ response = 0.0205751961;
+ goto D61;
+
+N61_7:
+ if attribute(catid) in (100300013, 100300165, 100300004, 100300076) then goto T61_7;
+ else goto T61_8;
+
+T61_7:
+ response = 0.0276389874;
+ goto D61;
+
+T61_8:
+ response = 0.0441753863;
+ goto D61;
+
+D61:
+
+tnscore = tnscore + response;
+
+ /* Tree 63 of 200 */
+N62_1:
+ if attribute(catid) in (100300011, 100300014, 100300166, 100200034, 100200186, 100400141, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100200053, 100300019, 100400142, 100400037, 100200193, 100200170, 100300074, 100300007, 100300200, 100200028, 100200067, 100300006) then goto N62_2;
+ else goto N62_5;
+
+N62_2:
+ if attribute(catid) in (100200034, 100300102, 100300005, 100300008, 100200068, 100300032, 100300019, 100400037, 100300007, 100300200, 100200067, 100300006) then goto N62_3;
+ else goto N62_4;
+
+N62_3:
+ if attribute(catid) in (100300005, 100300008, 100300032, 100300019, 100400037, 100200067) then goto T62_1;
+ else goto T62_2;
+
+T62_1:
+ response = -0.0322662364;
+ goto D62;
+
+T62_2:
+ response = -0.0081793105;
+ goto D62;
+
+N62_4:
+ if attribute(catid) in (100300011, 100200186, 100200052, 100400142, 100200193, 100300074, 100200028) then goto T62_3;
+ else goto T62_4;
+
+T62_3:
+ response = 0.0017199453;
+ goto D62;
+
+T62_4:
+ response = 0.0060178344;
+ goto D62;
+
+N62_5:
+ if attribute(catid) in (0, 100300013, 100300077, 100300143, 100300165, 100300116, 100300121, 100200234, 100300004, 100300073, 100400038, 100200192, 100300065, 100300127, 100400079, 100300169, 100400080, 100300066, 100200176) then goto N62_6;
+ else goto N62_8;
+
+N62_6:
+ if attribute(catid) in (100300013, 100300077, 100300143, 100300165, 100300116, 100300121, 100200234, 100300073, 100300065, 100300169, 100400080, 100300066) then goto N62_7;
+ else goto T62_7;
+
+N62_7:
+ if attribute(catid) in (100300013, 100300143, 100300165, 100300116, 100300169) then goto T62_5;
+ else goto T62_6;
+
+T62_5:
+ response = 0.0103393155;
+ goto D62;
+
+T62_6:
+ response = 0.0134250404;
+ goto D62;
+
+T62_7:
+ response = 0.0166558979;
+ goto D62;
+
+N62_8:
+ if attribute(catid) in (100200171, 100200130, 100200087, 100300076, 100200232) then goto T62_8;
+ else goto T62_9;
+
+T62_8:
+ response = 0.0241920527;
+ goto D62;
+
+T62_9:
+ response = 0.0369765147;
+ goto D62;
+
+D62:
+
+tnscore = tnscore + response;
+
+ /* Tree 64 of 200 */
+N63_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300013, 100300077, 100300143, 100200034, 100400141, 100300093, 100200172, 100300008, 100200068, 100300027, 100300116, 100200234, 100200053, 100300019, 100300004, 100400142, 100200054, 100300073, 100400037, 100400038, 100300065, 100300212, 100300209, 100300122, 100400079, 100200170, 100300169, 100200087, 100300074, 100300007, 100200176, 100300200, 100200028, 100200185, 100300214, 100300146) then goto N63_2;
+ else goto N63_6;
+
+N63_2:
+ if attribute(catid) in (100300143, 100300008, 100300019, 100400037, 100300212, 100300209, 100300007, 100300200, 100200185, 100300214) then goto T63_1;
+ else goto N63_3;
+
+T63_1:
+ response = -0.0243180335;
+ goto D63;
+
+N63_3:
+ if attribute(catid) in (100300014, 100300013, 100200034, 100400141, 100200172, 100200068, 100400142, 100200054, 100400079, 100200170, 100300169, 100200087, 100300146) then goto N63_4;
+ else goto N63_5;
+
+N63_4:
+ if attribute(catid) in (100300014, 100300013, 100200034, 100200068, 100400142, 100200054, 100300146) then goto T63_2;
+ else goto T63_3;
+
+T63_2:
+ response = 0.0025036422;
+ goto D63;
+
+T63_3:
+ response = 0.0060784676;
+ goto D63;
+
+N63_5:
+ if attribute(catid) in (0, 100200171, 100200130, 100200234, 100200053, 100300004, 100300073, 100400038, 100200028) then goto T63_4;
+ else goto T63_5;
+
+T63_4:
+ response = 0.0109550470;
+ goto D63;
+
+T63_5:
+ response = 0.0136122663;
+ goto D63;
+
+N63_6:
+ if attribute(catid) in (100300011, 100300166, 100200186, 100300165, 100200052, 100300102, 100200192, 100300127, 100400080, 100300066, 100300045, 100300076) then goto N63_7;
+ else goto T63_8;
+
+N63_7:
+ if attribute(catid) in (100300011, 100200186, 100200052, 100300102, 100200192, 100400080, 100300066, 100300045, 100300076) then goto T63_6;
+ else goto T63_7;
+
+T63_6:
+ response = 0.0188007232;
+ goto D63;
+
+T63_7:
+ response = 0.0246438709;
+ goto D63;
+
+T63_8:
+ response = 0.0423545435;
+ goto D63;
+
+D63:
+
+tnscore = tnscore + response;
+
+ /* Tree 65 of 200 */
+N64_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300077, 100200186, 100400141, 100300093, 100300102, 100300032, 100300121, 100200234, 100300126, 100400142, 100300073, 100400037, 100200193, 100200192, 100300122, 100200087, 100300074, 100300066, 100200176, 100300200, 100200028, 100200067, 100200055, 100200185, 100300006, 100200232, 100300214, 100300146) then goto N64_2;
+ else goto N64_5;
+
+N64_2:
+ if attribute(catid) in (100300093, 100300032, 100200234, 100300126, 100400037, 100200176, 100300200, 100200067, 100200055, 100200185, 100300006, 100200232, 100300214, 100300146) then goto T64_1;
+ else goto N64_3;
+
+T64_1:
+ response = -0.0157352941;
+ goto D64;
+
+N64_3:
+ if attribute(catid) in (100200171, 100300011, 100300058, 100300102, 100200193, 100200192, 100300122, 100300074, 100200028) then goto T64_2;
+ else goto N64_4;
+
+T64_2:
+ response = 0.0030409464;
+ goto D64;
+
+N64_4:
+ if attribute(catid) in (0, 100200186, 100400141, 100300121, 100400142) then goto T64_3;
+ else goto T64_4;
+
+T64_3:
+ response = 0.0080637180;
+ goto D64;
+
+T64_4:
+ response = 0.0107191663;
+ goto D64;
+
+N64_5:
+ if attribute(catid) in (100200130, 100300013, 100300166, 100200034, 100300165, 100200052, 100300005, 100200172, 100300008, 100200068, 100300027, 100200054, 100400038, 100300127, 100400079, 100200170, 100300169, 100400080, 100300007, 100300076) then goto N64_6;
+ else goto N64_8;
+
+N64_6:
+ if attribute(catid) in (100200130, 100300013, 100300165, 100200172, 100300027, 100200054, 100400079, 100200170, 100300169, 100400080) then goto N64_7;
+ else goto T64_7;
+
+N64_7:
+ if attribute(catid) in (100300013, 100200172, 100300027, 100400079, 100200170, 100400080) then goto T64_5;
+ else goto T64_6;
+
+T64_5:
+ response = 0.0154977719;
+ goto D64;
+
+T64_6:
+ response = 0.0195741488;
+ goto D64;
+
+T64_7:
+ response = 0.0236445967;
+ goto D64;
+
+N64_8:
+ if attribute(catid) in (100300116, 100200053, 100300065) then goto T64_8;
+ else goto T64_9;
+
+T64_8:
+ response = 0.0324449764;
+ goto D64;
+
+T64_9:
+ response = 0.0541648949;
+ goto D64;
+
+D64:
+
+tnscore = tnscore + response;
+
+ /* Tree 66 of 200 */
+N65_1:
+ if attribute(catid) in (100300011, 100300058, 100300143, 100200052, 100300093, 100200068, 100300032, 100300027, 100200054, 100300073, 100400037, 100200193, 100200192, 100300209, 100300122, 100300127, 100400079, 100200170, 100400080, 100300200, 100300045, 100200055, 100200185, 100300214, 100300146) then goto N65_2;
+ else goto N65_4;
+
+N65_2:
+ if attribute(catid) in (100300143, 100200068, 100300032, 100300209, 100200170, 100200055, 100300214, 100300146) then goto T65_1;
+ else goto N65_3;
+
+T65_1:
+ response = -0.0241523508;
+ goto D65;
+
+N65_3:
+ if attribute(catid) in (100300011, 100300058, 100200054, 100200192, 100300127, 100400080, 100300200, 100300045, 100200185) then goto T65_2;
+ else goto T65_3;
+
+T65_2:
+ response = -0.0009657677;
+ goto D65;
+
+T65_3:
+ response = 0.0051241150;
+ goto D65;
+
+N65_4:
+ if attribute(catid) in (0, 100300014, 100300013, 100300077, 100200034, 100200186, 100400141, 100300165, 100300102, 100300116, 100300121, 100200234, 100300126, 100400142, 100400038, 100300065, 100300169, 100300066, 100300006) then goto N65_5;
+ else goto N65_7;
+
+N65_5:
+ if attribute(catid) in (0, 100300014, 100300013, 100300077, 100200034, 100200186, 100400141, 100300102, 100300116, 100300126, 100300006) then goto N65_6;
+ else goto T65_6;
+
+N65_6:
+ if attribute(catid) in (100300013, 100300077, 100200034, 100200186, 100400141) then goto T65_4;
+ else goto T65_5;
+
+T65_4:
+ response = 0.0090067376;
+ goto D65;
+
+T65_5:
+ response = 0.0127908297;
+ goto D65;
+
+T65_6:
+ response = 0.0164902475;
+ goto D65;
+
+N65_7:
+ if attribute(catid) in (100200171, 100200130, 100300166, 100200172, 100200053, 100200087, 100200176, 100300076) then goto T65_7;
+ else goto T65_8;
+
+T65_7:
+ response = 0.0228127126;
+ goto D65;
+
+T65_8:
+ response = 0.0418200655;
+ goto D65;
+
+D65:
+
+tnscore = tnscore + response;
+
+ /* Tree 67 of 200 */
+N66_1:
+ if attribute(catid) in (100200171, 100200130, 100300014, 100300013, 100300077, 100300166, 100200186, 100400141, 100300165, 100200052, 100200172, 100300008, 100200068, 100300032, 100300027, 100300116, 100200234, 100200053, 100300019, 100300004, 100400142, 100300073, 100400038, 100300212, 100300209, 100400079, 100300169, 100400080, 100300074, 100300007, 100200176, 100300200, 100200028, 100200067, 100300006, 100300146) then goto N66_2;
+ else goto N66_6;
+
+N66_2:
+ if attribute(catid) in (100300013, 100400141, 100300032, 100200053, 100300019, 100300004, 100300212, 100300209, 100400080, 100300007, 100200176, 100200028, 100200067) then goto N66_3;
+ else goto N66_4;
+
+N66_3:
+ if attribute(catid) in (100300032, 100300019, 100300212, 100300209, 100200176, 100200028, 100200067) then goto T66_1;
+ else goto T66_2;
+
+T66_1:
+ response = -0.0386082590;
+ goto D66;
+
+T66_2:
+ response = -0.0063382264;
+ goto D66;
+
+N66_4:
+ if attribute(catid) in (100300014, 100300077, 100200186, 100300165, 100300008, 100200234, 100400142, 100300073, 100400038, 100300169, 100300074, 100300200, 100300146) then goto N66_5;
+ else goto T66_5;
+
+N66_5:
+ if attribute(catid) in (100300077, 100300008, 100300073, 100400038, 100300074, 100300146) then goto T66_3;
+ else goto T66_4;
+
+T66_3:
+ response = 0.0014081125;
+ goto D66;
+
+T66_4:
+ response = 0.0048469355;
+ goto D66;
+
+T66_5:
+ response = 0.0085143275;
+ goto D66;
+
+N66_6:
+ if attribute(catid) in (0, 100300058, 100200034, 100300121, 100300126, 100200054, 100200193, 100200192, 100300065, 100300122, 100300127, 100200170, 100200087, 100300066, 100300045) then goto N66_7;
+ else goto T66_8;
+
+N66_7:
+ if attribute(catid) in (0, 100300058, 100200193, 100200192, 100300122, 100300127, 100200170, 100200087) then goto T66_6;
+ else goto T66_7;
+
+T66_6:
+ response = 0.0154377122;
+ goto D66;
+
+T66_7:
+ response = 0.0222690511;
+ goto D66;
+
+T66_8:
+ response = 0.0445329146;
+ goto D66;
+
+D66:
+
+tnscore = tnscore + response;
+
+ /* Tree 68 of 200 */
+N67_1:
+ if attribute(catid) in (100300011, 100300058, 100300013, 100200186, 100300165, 100300102, 100300008, 100200068, 100300027, 100300116, 100300121, 100200234, 100300019, 100200054, 100300212, 100300127, 100400079, 100200087, 100300074, 100300007, 100200176, 100300076, 100200055, 100300006, 100200232, 100300214) then goto N67_2;
+ else goto N67_4;
+
+N67_2:
+ if attribute(catid) in (100300011, 100300165, 100300102, 100300027, 100300121, 100300019, 100200087, 100300074, 100200176, 100200055, 100300214) then goto T67_1;
+ else goto N67_3;
+
+T67_1:
+ response = -0.0084086451;
+ goto D67;
+
+N67_3:
+ if attribute(catid) in (100300013, 100200186, 100300008, 100200234, 100200054, 100300212, 100300127, 100300006) then goto T67_2;
+ else goto T67_3;
+
+T67_2:
+ response = 0.0005019617;
+ goto D67;
+
+T67_3:
+ response = 0.0061914097;
+ goto D67;
+
+N67_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300077, 100300166, 100200052, 100300093, 100300005, 100200172, 100200053, 100300004, 100300126, 100400142, 100300073, 100400038, 100200192, 100300122, 100200170, 100300169, 100300066, 100300200, 100300045) then goto N67_5;
+ else goto N67_7;
+
+N67_5:
+ if attribute(catid) in (100200130, 100300014, 100300093, 100300005, 100300004, 100300126, 100400038, 100200192, 100300169, 100300200) then goto T67_4;
+ else goto N67_6;
+
+T67_4:
+ response = 0.0114280621;
+ goto D67;
+
+N67_6:
+ if attribute(catid) in (0, 100200171, 100300077, 100200052, 100300073, 100300066) then goto T67_5;
+ else goto T67_6;
+
+T67_5:
+ response = 0.0141959004;
+ goto D67;
+
+T67_6:
+ response = 0.0160019821;
+ goto D67;
+
+N67_7:
+ if attribute(catid) in (100300143, 100400141, 100300032, 100300065, 100300209, 100200067) then goto T67_7;
+ else goto T67_8;
+
+T67_7:
+ response = 0.0245106044;
+ goto D67;
+
+T67_8:
+ response = 0.0334093506;
+ goto D67;
+
+D67:
+
+tnscore = tnscore + response;
+
+ /* Tree 69 of 200 */
+N68_1:
+ if attribute(catid) in (100300011, 100200130, 100300014, 100300143, 100400141, 100300165, 100200052, 100300005, 100300027, 100300116, 100200053, 100300019, 100300004, 100300073, 100400037, 100300209, 100400079, 100200170, 100300169, 100300074, 100300007, 100200176, 100300045, 100300076, 100200067, 100200055, 100200185, 100300214) then goto N68_2;
+ else goto N68_4;
+
+N68_2:
+ if attribute(catid) in (100300011, 100300005, 100300019, 100400037, 100300209, 100200176, 100200055, 100200185, 100300214) then goto T68_1;
+ else goto N68_3;
+
+T68_1:
+ response = -0.0208490757;
+ goto D68;
+
+N68_3:
+ if attribute(catid) in (100300143, 100400141, 100300116, 100200053, 100300004, 100300073, 100400079, 100300074) then goto T68_2;
+ else goto T68_3;
+
+T68_2:
+ response = 0.0000297283;
+ goto D68;
+
+T68_3:
+ response = 0.0064274847;
+ goto D68;
+
+N68_4:
+ if attribute(catid) in (0, 100200171, 100300058, 100300077, 100300166, 100200034, 100200186, 100200172, 100200068, 100200234, 100300126, 100200192, 100300065, 100300122, 100300127, 100400080, 100200087, 100300066, 100200028, 100300006) then goto N68_5;
+ else goto N68_7;
+
+N68_5:
+ if attribute(catid) in (0, 100300077, 100300166, 100200186, 100200172, 100300122, 100200087, 100200028, 100300006) then goto N68_6;
+ else goto T68_6;
+
+N68_6:
+ if attribute(catid) in (0, 100300166, 100300122, 100200028) then goto T68_4;
+ else goto T68_5;
+
+T68_4:
+ response = 0.0124031076;
+ goto D68;
+
+T68_5:
+ response = 0.0150987823;
+ goto D68;
+
+T68_6:
+ response = 0.0188364733;
+ goto D68;
+
+N68_7:
+ if attribute(catid) in (100300093, 100300102, 100300008, 100300121, 100400142, 100200054, 100300200, 100200232) then goto T68_7;
+ else goto T68_8;
+
+T68_7:
+ response = 0.0293407993;
+ goto D68;
+
+T68_8:
+ response = 0.0513888162;
+ goto D68;
+
+D68:
+
+tnscore = tnscore + response;
+
+ /* Tree 70 of 200 */
+N69_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300077, 100400141, 100300165, 100200052, 100300008, 100200068, 100300116, 100300121, 100200234, 100200053, 100300019, 100300004, 100300126, 100200054, 100300073, 100400038, 100200192, 100300065, 100300212, 100300127, 100400079, 100200170, 100300169, 100400080, 100200176, 100300200, 100300076, 100200067, 100200185, 100200232) then goto N69_2;
+ else goto N69_6;
+
+N69_2:
+ if attribute(catid) in (100300014, 100300008, 100300121, 100200234, 100200053, 100300019, 100300212, 100400080, 100200176, 100300200, 100200067, 100200185, 100200232) then goto N69_3;
+ else goto N69_4;
+
+N69_3:
+ if attribute(catid) in (100300008, 100300019, 100300212, 100200176, 100300200, 100200067, 100200185, 100200232) then goto T69_1;
+ else goto T69_2;
+
+T69_1:
+ response = -0.0190877492;
+ goto D69;
+
+T69_2:
+ response = -0.0007557548;
+ goto D69;
+
+N69_4:
+ if attribute(catid) in (100400141, 100200052, 100300126, 100200054, 100300065, 100400079, 100200170, 100300169) then goto T69_3;
+ else goto N69_5;
+
+T69_3:
+ response = 0.0071693422;
+ goto D69;
+
+N69_5:
+ if attribute(catid) in (0, 100200130, 100300073, 100400038, 100200192, 100300127, 100300076) then goto T69_4;
+ else goto T69_5;
+
+T69_4:
+ response = 0.0119746374;
+ goto D69;
+
+T69_5:
+ response = 0.0136797362;
+ goto D69;
+
+N69_6:
+ if attribute(catid) in (100300011, 100300058, 100300166, 100200034, 100200186, 100300093, 100300005, 100200172, 100300027, 100400142, 100300122, 100200087, 100300066, 100300045, 100300006) then goto N69_7;
+ else goto T69_8;
+
+N69_7:
+ if attribute(catid) in (100300058, 100200186, 100300093, 100300005, 100200172, 100300027, 100400142, 100300122, 100200087, 100300045, 100300006) then goto T69_6;
+ else goto T69_7;
+
+T69_6:
+ response = 0.0192765099;
+ goto D69;
+
+T69_7:
+ response = 0.0227594602;
+ goto D69;
+
+T69_8:
+ response = 0.0348341149;
+ goto D69;
+
+D69:
+
+tnscore = tnscore + response;
+
+ /* Tree 71 of 200 */
+N70_1:
+ if attribute(catid) in (100300011, 100300013, 100300143, 100300008, 100200068, 100300019, 100200054, 100400079, 100200170, 100400080, 100300066, 100200176, 100300200, 100200028, 100300006, 100300146) then goto N70_2;
+ else goto N70_3;
+
+N70_2:
+ if attribute(catid) in (100300011, 100300013, 100300008, 100300019, 100200176, 100200028) then goto T70_1;
+ else goto T70_2;
+
+T70_1:
+ response = -0.0154031193;
+ goto D70;
+
+T70_2:
+ response = -0.0007651129;
+ goto D70;
+
+N70_3:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300077, 100300166, 100200034, 100200186, 100400141, 100200052, 100300102, 100200172, 100300027, 100300121, 100300004, 100400142, 100300073, 100400037, 100200193, 100200192, 100300065, 100300209, 100300122, 100300127, 100300169, 100200087, 100300074, 100300007, 100300045, 100300076, 100200067) then goto N70_4;
+ else goto N70_7;
+
+N70_4:
+ if attribute(catid) in (100200171, 100200130, 100300077, 100200034, 100400141, 100300102, 100200172, 100300027, 100300004, 100400142, 100400037, 100300127, 100200087, 100300074, 100300007, 100300045) then goto N70_5;
+ else goto N70_6;
+
+N70_5:
+ if attribute(catid) in (100200130, 100400141, 100300102, 100200172, 100300027, 100300004, 100400037, 100300127, 100300074, 100300007) then goto T70_3;
+ else goto T70_4;
+
+T70_3:
+ response = 0.0066677335;
+ goto D70;
+
+T70_4:
+ response = 0.0095565475;
+ goto D70;
+
+N70_6:
+ if attribute(catid) in (0, 100200052, 100300065, 100300169, 100300076, 100200067) then goto T70_5;
+ else goto T70_6;
+
+T70_5:
+ response = 0.0113431678;
+ goto D70;
+
+T70_6:
+ response = 0.0156844830;
+ goto D70;
+
+N70_7:
+ if attribute(catid) in (100300165, 100300116, 100200234, 100200053, 100300126, 100400038, 100200185, 100200232) then goto T70_7;
+ else goto T70_8;
+
+T70_7:
+ response = 0.0231181080;
+ goto D70;
+
+T70_8:
+ response = 0.0499183157;
+ goto D70;
+
+D70:
+
+tnscore = tnscore + response;
+
+ /* Tree 72 of 200 */
+N71_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300013, 100300077, 100300166, 100200034, 100200186, 100400141, 100200052, 100300093, 100300102, 100300005, 100200172, 100300032, 100300027, 100300121, 100200234, 100300004, 100300126, 100400142, 100300073, 100400037, 100200193, 100300065, 100300209, 100200170, 100300169, 100400080, 100300066, 100300200, 100300045, 100300076, 100200055, 100300006, 100200232, 100300146) then goto N71_2;
+ else goto N71_6;
+
+N71_2:
+ if attribute(catid) in (100200171, 100200130, 100300058, 100200034, 100400141, 100300093, 100300102, 100300005, 100300032, 100300027, 100300073, 100400037, 100300209, 100400080, 100300200, 100200055, 100200232, 100300146) then goto N71_3;
+ else goto N71_5;
+
+N71_3:
+ if attribute(catid) in (100300102, 100300032, 100300027, 100400037, 100300209, 100400080, 100200055, 100300146) then goto T71_1;
+ else goto N71_4;
+
+T71_1:
+ response = -0.0210007071;
+ goto D71;
+
+N71_4:
+ if attribute(catid) in (100200171, 100300058, 100200034) then goto T71_2;
+ else goto T71_3;
+
+T71_2:
+ response = -0.0023338437;
+ goto D71;
+
+T71_3:
+ response = 0.0034194175;
+ goto D71;
+
+N71_5:
+ if attribute(catid) in (0, 100300077, 100200186, 100200234, 100300126, 100400142, 100200193, 100300065, 100200170, 100300169, 100300066, 100300045) then goto T71_4;
+ else goto T71_5;
+
+T71_4:
+ response = 0.0096608445;
+ goto D71;
+
+T71_5:
+ response = 0.0144728932;
+ goto D71;
+
+N71_6:
+ if attribute(catid) in (100300014, 100300165, 100300008, 100200068, 100200192, 100300122, 100300127, 100400079, 100200087, 100300074, 100200028, 100200185) then goto N71_7;
+ else goto T71_8;
+
+N71_7:
+ if attribute(catid) in (100300014, 100300165, 100200068, 100200192, 100400079, 100200087) then goto T71_6;
+ else goto T71_7;
+
+T71_6:
+ response = 0.0193246792;
+ goto D71;
+
+T71_7:
+ response = 0.0247702235;
+ goto D71;
+
+T71_8:
+ response = 0.0360951958;
+ goto D71;
+
+D71:
+
+tnscore = tnscore + response;
+
+ /* Tree 73 of 200 */
+N72_1:
+ if attribute(catid) in (100300011, 100300143, 100200186, 100400141, 100300165, 100300008, 100300032, 100300116, 100300019, 100300004, 100300126, 100400142, 100300073, 100400037, 100400038, 100200192, 100300212, 100300122, 100300169, 100400080, 100300066, 100300007, 100300076, 100200067, 100200055, 100200185, 100300146) then goto N72_2;
+ else goto N72_4;
+
+N72_2:
+ if attribute(catid) in (100300143, 100300032, 100300019, 100300126, 100400038, 100200067, 100200055, 100200185) then goto T72_1;
+ else goto N72_3;
+
+T72_1:
+ response = -0.0215257824;
+ goto D72;
+
+N72_3:
+ if attribute(catid) in (100300011, 100400141, 100300008, 100300004, 100400142, 100300073, 100400037, 100200192, 100300066, 100300076, 100300146) then goto T72_2;
+ else goto T72_3;
+
+T72_2:
+ response = 0.0024762462;
+ goto D72;
+
+T72_3:
+ response = 0.0073887199;
+ goto D72;
+
+N72_4:
+ if attribute(catid) in (0, 100200171, 100300014, 100300058, 100300013, 100300077, 100300166, 100200034, 100300093, 100200172, 100200068, 100300027, 100200053, 100300065, 100300209, 100300127, 100400079, 100200170, 100200087, 100300074, 100300200) then goto N72_5;
+ else goto N72_6;
+
+N72_5:
+ if attribute(catid) in (0, 100300014, 100300013, 100300077, 100200034, 100300093, 100200172, 100300027, 100200053, 100300209, 100400079, 100300074, 100300200) then goto T72_4;
+ else goto T72_5;
+
+T72_4:
+ response = 0.0122813047;
+ goto D72;
+
+T72_5:
+ response = 0.0177730971;
+ goto D72;
+
+N72_6:
+ if attribute(catid) in (100200130, 100300121, 100200234, 100200193, 100200176, 100200028) then goto T72_6;
+ else goto T72_7;
+
+T72_6:
+ response = 0.0248372595;
+ goto D72;
+
+T72_7:
+ response = 0.0378712543;
+ goto D72;
+
+D72:
+
+tnscore = tnscore + response;
+
+ /* Tree 74 of 200 */
+N73_1:
+ if attribute(catid) in (100200171, 100200186, 100300165, 100300102, 100300005, 100200068, 100300032, 100300121, 100200234, 100200053, 100300019, 100300004, 100300126, 100400142, 100400038, 100300212, 100300122, 100400079, 100400080, 100200087, 100200067, 100200055, 100200232) then goto N73_2;
+ else goto N73_5;
+
+N73_2:
+ if attribute(catid) in (100300102, 100200068, 100300032, 100300019, 100300126, 100400038, 100300212, 100200067, 100200055, 100200232) then goto N73_3;
+ else goto N73_4;
+
+N73_3:
+ if attribute(catid) in (100300102, 100200068, 100300032, 100300019, 100200067, 100200055, 100200232) then goto T73_1;
+ else goto T73_2;
+
+T73_1:
+ response = -0.0209289749;
+ goto D73;
+
+T73_2:
+ response = -0.0056438478;
+ goto D73;
+
+N73_4:
+ if attribute(catid) in (100200186, 100300165, 100300121, 100200053, 100300004, 100400079, 100400080, 100200087) then goto T73_3;
+ else goto T73_4;
+
+T73_3:
+ response = 0.0024863738;
+ goto D73;
+
+T73_4:
+ response = 0.0054172149;
+ goto D73;
+
+N73_5:
+ if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300058, 100300166, 100400141, 100200052, 100200172, 100300027, 100300073, 100200192, 100300127, 100200170, 100300169, 100300066, 100300200, 100200028) then goto N73_6;
+ else goto N73_8;
+
+N73_6:
+ if attribute(catid) in (0, 100300014, 100300058, 100300166, 100400141, 100200052, 100200192, 100300066) then goto N73_7;
+ else goto T73_7;
+
+N73_7:
+ if attribute(catid) in (100300014, 100300058, 100300166, 100400141, 100200192) then goto T73_5;
+ else goto T73_6;
+
+T73_5:
+ response = 0.0098208012;
+ goto D73;
+
+T73_6:
+ response = 0.0120469551;
+ goto D73;
+
+T73_7:
+ response = 0.0162551324;
+ goto D73;
+
+N73_8:
+ if attribute(catid) in (100300077, 100200034, 100300093, 100300008, 100300116, 100200054, 100300065, 100300074, 100300007, 100200176) then goto T73_8;
+ else goto T73_9;
+
+T73_8:
+ response = 0.0250861627;
+ goto D73;
+
+T73_9:
+ response = 0.0414462132;
+ goto D73;
+
+D73:
+
+tnscore = tnscore + response;
+
+ /* Tree 75 of 200 */
+N74_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300077, 100200034, 100200186, 100200052, 100300093, 100300102, 100200068, 100300027, 100300121, 100200234, 100200053, 100300019, 100300004, 100200054, 100300073, 100400037, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100200176, 100300076, 100200067, 100300006, 100300214) then goto N74_2;
+ else goto N74_6;
+
+N74_2:
+ if attribute(catid) in (100200052, 100200068, 100200234, 100300019, 100300004, 100300212, 100400079, 100300169, 100300074, 100200176, 100300076, 100200067, 100300214) then goto N74_3;
+ else goto N74_4;
+
+N74_3:
+ if attribute(catid) in (100200234, 100300019, 100300004, 100300212, 100300214) then goto T74_1;
+ else goto T74_2;
+
+T74_1:
+ response = -0.0247332845;
+ goto D74;
+
+T74_2:
+ response = -0.0060255621;
+ goto D74;
+
+N74_4:
+ if attribute(catid) in (0, 100300058, 100300077, 100200034, 100200186, 100300093, 100300102, 100300027, 100300121, 100200054, 100300073, 100400037, 100200192, 100300122, 100300127, 100300006) then goto N74_5;
+ else goto T74_5;
+
+N74_5:
+ if attribute(catid) in (100300058, 100200186, 100300093, 100300121, 100300073, 100400037, 100200192, 100300127) then goto T74_3;
+ else goto T74_4;
+
+T74_3:
+ response = 0.0043627132;
+ goto D74;
+
+T74_4:
+ response = 0.0088967157;
+ goto D74;
+
+T74_5:
+ response = 0.0143359261;
+ goto D74;
+
+N74_6:
+ if attribute(catid) in (100300143, 100200172, 100400038, 100200087, 100300200, 100200028, 100200185) then goto T74_6;
+ else goto N74_7;
+
+T74_6:
+ response = 0.0202781940;
+ goto D74;
+
+N74_7:
+ if attribute(catid) in (100300014, 100300166, 100400141, 100300165, 100300005, 100300116, 100300126, 100400142) then goto T74_7;
+ else goto T74_8;
+
+T74_7:
+ response = 0.0288593151;
+ goto D74;
+
+T74_8:
+ response = 0.0450652060;
+ goto D74;
+
+D74:
+
+tnscore = tnscore + response;
+
+ /* Tree 76 of 200 */
+N75_1:
+ if attribute(catid) in (100200171, 100300011, 100300058, 100300013, 100300077, 100300166, 100200186, 100300093, 100300005, 100300008, 100300027, 100300126, 100400142, 100200054, 100300073, 100400037, 100400038, 100200192, 100300209, 100300127, 100400079, 100300169, 100400080, 100200087, 100300074, 100200176, 100200028, 100200185) then goto N75_2;
+ else goto N75_5;
+
+N75_2:
+ if attribute(catid) in (100300011, 100300013, 100300008, 100300027, 100300126, 100200054, 100400038, 100300209, 100300127, 100400080, 100200185) then goto N75_3;
+ else goto N75_4;
+
+N75_3:
+ if attribute(catid) in (100300011, 100300013, 100300008, 100300126, 100200054, 100200185) then goto T75_1;
+ else goto T75_2;
+
+T75_1:
+ response = -0.0134142246;
+ goto D75;
+
+T75_2:
+ response = -0.0021611460;
+ goto D75;
+
+N75_4:
+ if attribute(catid) in (100200171, 100200186, 100300093, 100400037, 100400079, 100200087, 100300074, 100200176, 100200028) then goto T75_3;
+ else goto T75_4;
+
+T75_3:
+ response = 0.0042397431;
+ goto D75;
+
+T75_4:
+ response = 0.0082573117;
+ goto D75;
+
+N75_5:
+ if attribute(catid) in (0, 100200130, 100300143, 100400141, 100300165, 100200052, 100200172, 100200068, 100300032, 100300116, 100200234, 100200053, 100300004, 100300065, 100300122, 100200170, 100300066, 100300076, 100300006) then goto N75_6;
+ else goto T75_8;
+
+N75_6:
+ if attribute(catid) in (0, 100200130, 100400141, 100300116, 100300004, 100300076, 100300006) then goto T75_5;
+ else goto N75_7;
+
+T75_5:
+ response = 0.0125577031;
+ goto D75;
+
+N75_7:
+ if attribute(catid) in (100300143, 100300165, 100200052, 100200172, 100300032, 100300065) then goto T75_6;
+ else goto T75_7;
+
+T75_6:
+ response = 0.0151707760;
+ goto D75;
+
+T75_7:
+ response = 0.0177925563;
+ goto D75;
+
+T75_8:
+ response = 0.0282720969;
+ goto D75;
+
+D75:
+
+tnscore = tnscore + response;
+
+ /* Tree 77 of 200 */
+N76_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300013, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100200068, 100300032, 100300027, 100300116, 100300121, 100200234, 100200053, 100300019, 100300004, 100300126, 100400142, 100300073, 100400038, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100200028, 100300076, 100200067, 100200055, 100300006, 100200232, 100300146) then goto N76_2;
+ else goto N76_7;
+
+N76_2:
+ if attribute(catid) in (100200171, 100300013, 100300093, 100300005, 100200068, 100300121, 100200234, 100300019, 100300212, 100300209, 100200170, 100200028, 100200067, 100200055, 100300006, 100300146) then goto N76_3;
+ else goto N76_4;
+
+N76_3:
+ if attribute(catid) in (100300013, 100300005, 100300121, 100300212, 100200067, 100200055) then goto T76_1;
+ else goto T76_2;
+
+T76_1:
+ response = -0.0177817471;
+ goto D76;
+
+T76_2:
+ response = -0.0032656602;
+ goto D76;
+
+N76_4:
+ if attribute(catid) in (0, 100200130, 100300014, 100300077, 100200034, 100200186, 100300165, 100200052, 100200172, 100300032, 100300027, 100300004, 100200192, 100300065, 100300122, 100300127, 100400079, 100300074, 100300066, 100300076) then goto N76_5;
+ else goto T76_6;
+
+N76_5:
+ if attribute(catid) in (100300014, 100200186, 100200052, 100300122, 100300066, 100300076) then goto T76_3;
+ else goto N76_6;
+
+T76_3:
+ response = 0.0049749252;
+ goto D76;
+
+N76_6:
+ if attribute(catid) in (100300077, 100200034, 100300165, 100200172, 100300027, 100300004, 100200192, 100400079, 100300074) then goto T76_4;
+ else goto T76_5;
+
+T76_4:
+ response = 0.0100440563;
+ goto D76;
+
+T76_5:
+ response = 0.0117011752;
+ goto D76;
+
+T76_6:
+ response = 0.0178896771;
+ goto D76;
+
+N76_7:
+ if attribute(catid) in (100300058, 100200054, 100400037, 100200193, 100300169, 100300200) then goto T76_7;
+ else goto T76_8;
+
+T76_7:
+ response = 0.0319705253;
+ goto D76;
+
+T76_8:
+ response = 0.0522115674;
+ goto D76;
+
+D76:
+
+tnscore = tnscore + response;
+
+ /* Tree 78 of 200 */
+N77_1:
+ if attribute(catid) in (100300058, 100200034, 100400141, 100300093, 100300102, 100300005, 100300008, 100300004, 100200054, 100400038, 100300212, 100300122, 100200087, 100300074, 100200176, 100200055, 100300214) then goto N77_2;
+ else goto N77_4;
+
+N77_2:
+ if attribute(catid) in (100300005, 100300008, 100300212, 100200055, 100300214) then goto T77_1;
+ else goto N77_3;
+
+T77_1:
+ response = -0.0403440609;
+ goto D77;
+
+N77_3:
+ if attribute(catid) in (100300058, 100200034, 100300102, 100300004, 100200054, 100400038, 100200176) then goto T77_2;
+ else goto T77_3;
+
+T77_2:
+ response = -0.0114254470;
+ goto D77;
+
+T77_3:
+ response = -0.0039460534;
+ goto D77;
+
+N77_4:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300077, 100200052, 100200172, 100400142, 100300073, 100200193, 100200192, 100300065, 100300127, 100400079, 100200170, 100300045, 100200028, 100300006, 100200232) then goto N77_5;
+ else goto N77_7;
+
+N77_5:
+ if attribute(catid) in (0, 100300077, 100400142, 100300073, 100300065, 100300127, 100200170, 100300045, 100200028, 100300006) then goto N77_6;
+ else goto T77_6;
+
+N77_6:
+ if attribute(catid) in (100300077, 100300065, 100300127, 100200170, 100300006) then goto T77_4;
+ else goto T77_5;
+
+T77_4:
+ response = 0.0038995654;
+ goto D77;
+
+T77_5:
+ response = 0.0096106726;
+ goto D77;
+
+T77_6:
+ response = 0.0136466804;
+ goto D77;
+
+N77_7:
+ if attribute(catid) in (100300014, 100300013, 100300166, 100300165, 100200068, 100300032, 100300027, 100300121, 100200234, 100200053, 100300126, 100400080, 100300066, 100300007, 100300200) then goto N77_8;
+ else goto T77_9;
+
+N77_8:
+ if attribute(catid) in (100300013, 100300166, 100200068, 100300027, 100300121, 100200234, 100200053, 100300126, 100400080, 100300007, 100300200) then goto T77_7;
+ else goto T77_8;
+
+T77_7:
+ response = 0.0192859199;
+ goto D77;
+
+T77_8:
+ response = 0.0245003908;
+ goto D77;
+
+T77_9:
+ response = 0.0334815162;
+ goto D77;
+
+D77:
+
+tnscore = tnscore + response;
+
+ /* Tree 79 of 200 */
+N78_1:
+ if attribute(catid) in (100200171, 100200130, 100300058, 100300013, 100300077, 100300166, 100300143, 100200034, 100200186, 100300165, 100200052, 100300093, 100300008, 100200068, 100300032, 100300027, 100300116, 100200053, 100300019, 100300126, 100300073, 100200193, 100300209, 100400079, 100200170, 100300066, 100200176, 100300076, 100200067, 100200055, 100300214) then goto N78_2;
+ else goto N78_5;
+
+N78_2:
+ if attribute(catid) in (100200052, 100300008, 100300032, 100300019, 100300126, 100300209, 100200176, 100300076, 100200067, 100200055, 100300214) then goto N78_3;
+ else goto N78_4;
+
+N78_3:
+ if attribute(catid) in (100300008, 100300032, 100300209, 100200176, 100300076, 100200067, 100200055, 100300214) then goto T78_1;
+ else goto T78_2;
+
+T78_1:
+ response = -0.0379621177;
+ goto D78;
+
+T78_2:
+ response = -0.0077279547;
+ goto D78;
+
+N78_4:
+ if attribute(catid) in (100200171, 100300058, 100300013, 100300077, 100200034, 100300027, 100200053) then goto T78_3;
+ else goto T78_4;
+
+T78_3:
+ response = 0.0040069447;
+ goto D78;
+
+T78_4:
+ response = 0.0080600184;
+ goto D78;
+
+N78_5:
+ if attribute(catid) in (0, 100300011, 100300014, 100400141, 100300102, 100300005, 100200172, 100300121, 100200234, 100400037, 100400038, 100200192, 100300065, 100400080, 100300074, 100300006, 100200232) then goto N78_6;
+ else goto N78_7;
+
+N78_6:
+ if attribute(catid) in (0, 100300011, 100300014, 100400141, 100300102, 100300005, 100200172, 100400038) then goto T78_5;
+ else goto T78_6;
+
+T78_5:
+ response = 0.0135824088;
+ goto D78;
+
+T78_6:
+ response = 0.0165574836;
+ goto D78;
+
+N78_7:
+ if attribute(catid) in (100300004, 100400142, 100300127, 100300169, 100200087, 100300007, 100200185) then goto T78_7;
+ else goto T78_8;
+
+T78_7:
+ response = 0.0214892901;
+ goto D78;
+
+T78_8:
+ response = 0.0327877321;
+ goto D78;
+
+D78:
+
+tnscore = tnscore + response;
+
+ /* Tree 80 of 200 */
+N79_1:
+ if attribute(catid) in (0, 100200130, 100300014, 100300058, 100300143, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100200068, 100300032, 100300121, 100200234, 100200053, 100300004, 100300126, 100400142, 100300073, 100200193, 100200192, 100300065, 100300212, 100400079, 100300169, 100200087, 100300074, 100300066, 100300007, 100200176, 100300200, 100300045, 100300076, 100200067, 100200055, 100300214) then goto N79_2;
+ else goto N79_5;
+
+N79_2:
+ if attribute(catid) in (100200130, 100300058, 100300143, 100200186, 100200052, 100300093, 100300102, 100300005, 100300121, 100200234, 100300004, 100300126, 100200193, 100300212, 100200087, 100300007, 100200176, 100300045, 100300076, 100200067, 100200055, 100300214) then goto N79_3;
+ else goto N79_4;
+
+N79_3:
+ if attribute(catid) in (100300058, 100300143, 100300102, 100300005, 100200234, 100200193, 100200087, 100200176, 100300045, 100300076, 100200067, 100200055, 100300214) then goto T79_1;
+ else goto T79_2;
+
+T79_1:
+ response = -0.0107053090;
+ goto D79;
+
+T79_2:
+ response = 0.0003635835;
+ goto D79;
+
+N79_4:
+ if attribute(catid) in (100300014, 100300165, 100200068, 100200053, 100400142, 100300073, 100300065) then goto T79_3;
+ else goto T79_4;
+
+T79_3:
+ response = 0.0065823776;
+ goto D79;
+
+T79_4:
+ response = 0.0100610854;
+ goto D79;
+
+N79_5:
+ if attribute(catid) in (100300011, 100300077, 100300166, 100200172, 100400037, 100300122, 100300127) then goto T79_5;
+ else goto N79_6;
+
+T79_5:
+ response = 0.0175946260;
+ goto D79;
+
+N79_6:
+ if attribute(catid) in (100200171, 100200034, 100300027, 100300116, 100300209, 100200170, 100400080, 100200028, 100200185) then goto T79_6;
+ else goto T79_7;
+
+T79_6:
+ response = 0.0254523278;
+ goto D79;
+
+T79_7:
+ response = 0.0397162435;
+ goto D79;
+
+D79:
+
+tnscore = tnscore + response;
+
+ /* Tree 81 of 200 */
+N80_1:
+ if attribute(catid) in (100300058, 100300013, 100300077, 100200186, 100300165, 100300093, 100200172, 100300008, 100300032, 100300027, 100300116, 100300121, 100200234, 100300019, 100400142, 100300073, 100200193, 100300212, 100400079, 100200087, 100300066, 100200176, 100300200, 100300045, 100200028, 100300076, 100200055, 100300006) then goto N80_2;
+ else goto N80_5;
+
+N80_2:
+ if attribute(catid) in (100300058, 100300013, 100200186, 100300008, 100300032, 100300027, 100300019, 100400142, 100300212, 100200087, 100200176, 100300200, 100300045, 100200055) then goto N80_3;
+ else goto N80_4;
+
+N80_3:
+ if attribute(catid) in (100300008, 100300032, 100300019, 100200176, 100300200, 100200055) then goto T80_1;
+ else goto T80_2;
+
+T80_1:
+ response = -0.0294108915;
+ goto D80;
+
+T80_2:
+ response = -0.0054927303;
+ goto D80;
+
+N80_4:
+ if attribute(catid) in (100300093, 100300116, 100300121, 100200234, 100300073, 100200193, 100400079, 100300066, 100300076, 100300006) then goto T80_3;
+ else goto T80_4;
+
+T80_3:
+ response = 0.0042404411;
+ goto D80;
+
+T80_4:
+ response = 0.0073750844;
+ goto D80;
+
+N80_5:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300166, 100200034, 100400141, 100300102, 100200068, 100200053, 100300004, 100200054, 100200192, 100300065, 100300122, 100300127, 100300169, 100400080) then goto N80_6;
+ else goto N80_8;
+
+N80_6:
+ if attribute(catid) in (0, 100200171, 100300166, 100200034, 100300102, 100200053, 100300004, 100200192, 100300065, 100300122, 100300127, 100300169, 100400080) then goto N80_7;
+ else goto T80_7;
+
+N80_7:
+ if attribute(catid) in (100200171, 100300166, 100300004, 100300065, 100300127, 100300169) then goto T80_5;
+ else goto T80_6;
+
+T80_5:
+ response = 0.0124777156;
+ goto D80;
+
+T80_6:
+ response = 0.0143158121;
+ goto D80;
+
+T80_7:
+ response = 0.0177029723;
+ goto D80;
+
+N80_8:
+ if attribute(catid) in (100300011, 100300143, 100200052, 100300126, 100300074, 100200067) then goto T80_8;
+ else goto T80_9;
+
+T80_8:
+ response = 0.0257271811;
+ goto D80;
+
+T80_9:
+ response = 0.0375978662;
+ goto D80;
+
+D80:
+
+tnscore = tnscore + response;
+
+ /* Tree 82 of 200 */
+N81_1:
+ if attribute(catid) in (100300011, 100300058, 100300013, 100300143, 100200034, 100400141, 100300008, 100400038, 100300169, 100300200, 100200028, 100200067, 100200055, 100300006, 100200232, 100300146) then goto N81_2;
+ else goto N81_3;
+
+N81_2:
+ if attribute(catid) in (100300011, 100300013, 100300143, 100200034, 100300008, 100300200, 100200028, 100200067, 100200055, 100200232, 100300146) then goto T81_1;
+ else goto T81_2;
+
+T81_1:
+ response = -0.0179030459;
+ goto D81;
+
+T81_2:
+ response = -0.0025885619;
+ goto D81;
+
+N81_3:
+ if attribute(catid) in (0, 100300077, 100200186, 100300165, 100300102, 100300005, 100200068, 100300027, 100300116, 100300126, 100400142, 100300073, 100200192, 100300122, 100400079, 100200170, 100300074, 100300066) then goto N81_4;
+ else goto N81_5;
+
+N81_4:
+ if attribute(catid) in (0, 100200186, 100300165, 100300005, 100300027, 100300116, 100300126, 100400079) then goto T81_3;
+ else goto T81_4;
+
+T81_3:
+ response = 0.0078435164;
+ goto D81;
+
+T81_4:
+ response = 0.0118007064;
+ goto D81;
+
+N81_5:
+ if attribute(catid) in (100200171, 100200130, 100300014, 100300166, 100200172, 100300121, 100300004, 100200054, 100300065, 100300127, 100400080, 100200087, 100200185) then goto T81_5;
+ else goto N81_6;
+
+T81_5:
+ response = 0.0172799995;
+ goto D81;
+
+N81_6:
+ if attribute(catid) in (100200052, 100200234, 100200053, 100400037, 100200193, 100200176) then goto T81_6;
+ else goto T81_7;
+
+T81_6:
+ response = 0.0276688136;
+ goto D81;
+
+T81_7:
+ response = 0.0491582153;
+ goto D81;
+
+D81:
+
+tnscore = tnscore + response;
+
+ /* Tree 83 of 200 */
+N82_1:
+ if attribute(catid) in (100200130, 100300014, 100300058, 100300077, 100200034, 100200186, 100400141, 100300165, 100300005, 100200172, 100300032, 100300116, 100200234, 100300004, 100400142, 100300073, 100200193, 100400038, 100200192, 100300212, 100300127, 100400079, 100200170, 100400080, 100200087, 100300007, 100200176, 100200028, 100300076, 100200055, 100200232, 100300214) then goto N82_2;
+ else goto N82_5;
+
+N82_2:
+ if attribute(catid) in (100200186, 100300032, 100300116, 100300073, 100400038, 100300212, 100400079, 100200087, 100300007, 100300076, 100200055, 100200232, 100300214) then goto N82_3;
+ else goto N82_4;
+
+N82_3:
+ if attribute(catid) in (100300032, 100300212, 100300076, 100200055, 100200232, 100300214) then goto T82_1;
+ else goto T82_2;
+
+T82_1:
+ response = -0.0300814303;
+ goto D82;
+
+T82_2:
+ response = -0.0044598873;
+ goto D82;
+
+N82_4:
+ if attribute(catid) in (100300058, 100300077, 100200034, 100400141, 100300005, 100300004, 100400142, 100200193, 100200192, 100300127, 100200170, 100400080, 100200176) then goto T82_3;
+ else goto T82_4;
+
+T82_3:
+ response = 0.0033831149;
+ goto D82;
+
+T82_4:
+ response = 0.0061763311;
+ goto D82;
+
+N82_5:
+ if attribute(catid) in (0, 100200171, 100300166, 100300143, 100200052, 100300102, 100200068, 100300027, 100200053, 100400037, 100300065, 100300209, 100300122) then goto N82_6;
+ else goto N82_7;
+
+N82_6:
+ if attribute(catid) in (0, 100300102, 100200053, 100400037, 100300209, 100300122) then goto T82_5;
+ else goto T82_6;
+
+T82_5:
+ response = 0.0108688948;
+ goto D82;
+
+T82_6:
+ response = 0.0140670577;
+ goto D82;
+
+N82_7:
+ if attribute(catid) in (100300121, 100200054, 100300169, 100300074, 100300066, 100300200, 100200185) then goto T82_7;
+ else goto T82_8;
+
+T82_7:
+ response = 0.0219282043;
+ goto D82;
+
+T82_8:
+ response = 0.0332033624;
+ goto D82;
+
+D82:
+
+tnscore = tnscore + response;
+
+ /* Tree 84 of 200 */
+N83_1:
+ if attribute(catid) in (0, 100300011, 100300058, 100300013, 100300077, 100300166, 100400141, 100200052, 100300093, 100300005, 100200068, 100300032, 100300027, 100200053, 100200054, 100400037, 100200193, 100400038, 100300209, 100300122, 100300127, 100400079, 100200170, 100300066, 100300045, 100200185, 100300214, 100300146) then goto N83_2;
+ else goto N83_6;
+
+N83_2:
+ if attribute(catid) in (100300011, 100300013, 100300005, 100300032, 100300027, 100200193, 100300209, 100300122, 100200170, 100200185, 100300214, 100300146) then goto N83_3;
+ else goto N83_4;
+
+N83_3:
+ if attribute(catid) in (100300011, 100300005, 100300032, 100200193, 100300209, 100200185, 100300214, 100300146) then goto T83_1;
+ else goto T83_2;
+
+T83_1:
+ response = -0.0217026454;
+ goto D83;
+
+T83_2:
+ response = -0.0034858812;
+ goto D83;
+
+N83_4:
+ if attribute(catid) in (100300058, 100300077, 100400141, 100200068, 100200053, 100400079, 100300066) then goto T83_3;
+ else goto N83_5;
+
+T83_3:
+ response = 0.0030692700;
+ goto D83;
+
+N83_5:
+ if attribute(catid) in (0, 100400037) then goto T83_4;
+ else goto T83_5;
+
+T83_4:
+ response = 0.0062304681;
+ goto D83;
+
+T83_5:
+ response = 0.0091439421;
+ goto D83;
+
+N83_6:
+ if attribute(catid) in (100200171, 100200130, 100200034, 100200186, 100300165, 100200172, 100300116, 100200234, 100300073, 100200192, 100300065, 100300169, 100400080, 100200087, 100200028, 100300076, 100200232) then goto N83_7;
+ else goto N83_8;
+
+N83_7:
+ if attribute(catid) in (100200130, 100300165, 100200172, 100300116, 100200234, 100300073, 100200192, 100300169, 100200232) then goto T83_6;
+ else goto T83_7;
+
+T83_6:
+ response = 0.0117914211;
+ goto D83;
+
+T83_7:
+ response = 0.0156586974;
+ goto D83;
+
+N83_8:
+ if attribute(catid) in (100300121, 100300126, 100400142, 100300074, 100300006) then goto T83_8;
+ else goto T83_9;
+
+T83_8:
+ response = 0.0236611361;
+ goto D83;
+
+T83_9:
+ response = 0.0326251935;
+ goto D83;
+
+D83:
+
+tnscore = tnscore + response;
+
+ /* Tree 85 of 200 */
+N84_1:
+ if attribute(catid) in (100300013, 100300008, 100300032, 100300121, 100200234, 100300004, 100300126, 100200054, 100200193, 100300212, 100300122, 100300127, 100400079, 100200087, 100200176, 100300045, 100200028, 100300076, 100200067, 100300006) then goto N84_2;
+ else goto N84_3;
+
+N84_2:
+ if attribute(catid) in (100300121, 100200234, 100300004, 100200054, 100300212, 100200087, 100300076, 100200067) then goto T84_1;
+ else goto T84_2;
+
+T84_1:
+ response = -0.0242668043;
+ goto D84;
+
+T84_2:
+ response = -0.0057454024;
+ goto D84;
+
+N84_3:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300166, 100300143, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100200068, 100300027, 100300116, 100400142, 100300073, 100400037, 100400038, 100200192, 100300065, 100200170, 100300169, 100400080, 100300074, 100300007) then goto N84_4;
+ else goto N84_7;
+
+N84_4:
+ if attribute(catid) in (100200130, 100300166, 100300143, 100400141, 100300165, 100300093, 100200172, 100300027, 100300116, 100300073, 100400038, 100200170, 100300169, 100400080, 100300074) then goto N84_5;
+ else goto N84_6;
+
+N84_5:
+ if attribute(catid) in (100300143, 100400141, 100300093, 100300027, 100300116, 100300073, 100400080) then goto T84_3;
+ else goto T84_4;
+
+T84_3:
+ response = 0.0044087377;
+ goto D84;
+
+T84_4:
+ response = 0.0084781159;
+ goto D84;
+
+N84_6:
+ if attribute(catid) in (0, 100200068, 100400142, 100300065) then goto T84_5;
+ else goto T84_6;
+
+T84_5:
+ response = 0.0107211296;
+ goto D84;
+
+T84_6:
+ response = 0.0151177012;
+ goto D84;
+
+N84_7:
+ if attribute(catid) in (100300011, 100300014, 100300077, 100300102, 100200053, 100300209) then goto T84_7;
+ else goto T84_8;
+
+T84_7:
+ response = 0.0227209620;
+ goto D84;
+
+T84_8:
+ response = 0.0333058662;
+ goto D84;
+
+D84:
+
+tnscore = tnscore + response;
+
+ /* Tree 86 of 200 */
+N85_1:
+ if attribute(catid) in (100300011, 100300014, 100300143, 100200052, 100300102, 100300005, 100300116, 100200053, 100300126, 100400038, 100200192, 100300074, 100300066, 100300007, 100300200, 100300045, 100200028, 100300214) then goto N85_2;
+ else goto N85_3;
+
+N85_2:
+ if attribute(catid) in (100300143, 100300102, 100300005, 100300116, 100200053, 100300126, 100400038, 100300007, 100200028, 100300214) then goto T85_1;
+ else goto T85_2;
+
+T85_1:
+ response = -0.0112761132;
+ goto D85;
+
+T85_2:
+ response = 0.0005180964;
+ goto D85;
+
+N85_3:
+ if attribute(catid) in (0, 100200130, 100300166, 100300093, 100200172, 100300121, 100200234, 100400142, 100200054, 100300073, 100400037, 100300122, 100300127, 100400079, 100300169, 100400080, 100300076, 100200067) then goto N85_4;
+ else goto N85_6;
+
+N85_4:
+ if attribute(catid) in (0, 100300093, 100200172, 100300121, 100200054, 100300073, 100300122, 100300127, 100400079, 100300169, 100300076) then goto N85_5;
+ else goto T85_5;
+
+N85_5:
+ if attribute(catid) in (100300093, 100200172, 100200054, 100300073, 100300122, 100300127, 100400079, 100300169, 100300076) then goto T85_3;
+ else goto T85_4;
+
+T85_3:
+ response = 0.0062969186;
+ goto D85;
+
+T85_4:
+ response = 0.0079944471;
+ goto D85;
+
+T85_5:
+ response = 0.0107843133;
+ goto D85;
+
+N85_6:
+ if attribute(catid) in (100200171, 100300058, 100300013, 100300077, 100200034, 100200186, 100400141, 100300165, 100200068, 100300027, 100300065, 100300212, 100200170, 100200087, 100300006) then goto T85_6;
+ else goto T85_7;
+
+T85_6:
+ response = 0.0156933768;
+ goto D85;
+
+T85_7:
+ response = 0.0301329171;
+ goto D85;
+
+D85:
+
+tnscore = tnscore + response;
+
+ /* Tree 87 of 200 */
+N86_1:
+ if attribute(catid) in (100200171, 100300011, 100300014, 100300058, 100300013, 100300077, 100200034, 100400141, 100200068, 100300116, 100200234, 100300019, 100400037, 100200193, 100400038, 100200192, 100300212, 100300209, 100300127, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100200067, 100200185, 100300006, 100300214) then goto N86_2;
+ else goto N86_4;
+
+N86_2:
+ if attribute(catid) in (100300011, 100300013, 100300116, 100400038, 100300212, 100300209, 100200087, 100200176, 100200067, 100200185, 100300214) then goto T86_1;
+ else goto N86_3;
+
+T86_1:
+ response = -0.0153646097;
+ goto D86;
+
+N86_3:
+ if attribute(catid) in (100200171, 100300014, 100300058, 100200034, 100200068, 100200234, 100300019, 100200193, 100400080, 100300066, 100300007) then goto T86_2;
+ else goto T86_3;
+
+T86_2:
+ response = -0.0021431391;
+ goto D86;
+
+T86_3:
+ response = 0.0027978033;
+ goto D86;
+
+N86_4:
+ if attribute(catid) in (0, 100200130, 100300166, 100200186, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100300027, 100300121, 100200053, 100300004, 100300126, 100400142, 100200054, 100300073, 100300065, 100300122, 100400079, 100200170, 100300045, 100200028, 100300076) then goto N86_5;
+ else goto T86_7;
+
+N86_5:
+ if attribute(catid) in (0, 100200130, 100200186, 100300165, 100300102, 100300005, 100200172, 100300027, 100300121, 100200053, 100300073, 100300045, 100200028) then goto N86_6;
+ else goto T86_6;
+
+N86_6:
+ if attribute(catid) in (100200130, 100300165, 100300102, 100300027, 100300121, 100200053, 100300073, 100300045, 100200028) then goto T86_4;
+ else goto T86_5;
+
+T86_4:
+ response = 0.0092257166;
+ goto D86;
+
+T86_5:
+ response = 0.0114031662;
+ goto D86;
+
+T86_6:
+ response = 0.0178845925;
+ goto D86;
+
+T86_7:
+ response = 0.0516414799;
+ goto D86;
+
+D86:
+
+tnscore = tnscore + response;
+
+ /* Tree 88 of 200 */
+N87_1:
+ if attribute(catid) in (100300032, 100200234, 100300212, 100300209, 100300045, 100200055, 100300146) then goto T87_1;
+ else goto N87_2;
+
+T87_1:
+ response = -0.0281888364;
+ goto D87;
+
+N87_2:
+ if attribute(catid) in (0, 100200130, 100300058, 100300013, 100300077, 100300166, 100300165, 100200052, 100200172, 100200068, 100300027, 100300019, 100300004, 100400142, 100400037, 100200192, 100300122, 100400079, 100200170, 100300169, 100400080, 100300074, 100200176, 100200028, 100300076, 100300006) then goto N87_3;
+ else goto N87_6;
+
+N87_3:
+ if attribute(catid) in (100300058, 100300077, 100300165, 100200068, 100300019, 100400037, 100200192, 100300122, 100400079, 100200170, 100300169, 100400080, 100200028, 100300006) then goto N87_4;
+ else goto N87_5;
+
+N87_4:
+ if attribute(catid) in (100300058, 100200068, 100300019, 100400037, 100200192, 100400079, 100200170, 100200028, 100300006) then goto T87_2;
+ else goto T87_3;
+
+T87_2:
+ response = -0.0005203242;
+ goto D87;
+
+T87_3:
+ response = 0.0053481381;
+ goto D87;
+
+N87_5:
+ if attribute(catid) in (100200130, 100200052, 100200172, 100300074) then goto T87_4;
+ else goto T87_5;
+
+T87_4:
+ response = 0.0070360348;
+ goto D87;
+
+T87_5:
+ response = 0.0095044104;
+ goto D87;
+
+N87_6:
+ if attribute(catid) in (100200171, 100400141, 100300093, 100300008, 100300121, 100300126, 100300073, 100300127, 100200087, 100300066) then goto T87_6;
+ else goto N87_7;
+
+T87_6:
+ response = 0.0150338406;
+ goto D87;
+
+N87_7:
+ if attribute(catid) in (100300011, 100300014, 100200186, 100300116, 100200053, 100400038, 100300065, 100300200, 100200185, 100200232) then goto T87_7;
+ else goto T87_8;
+
+T87_7:
+ response = 0.0219656474;
+ goto D87;
+
+T87_8:
+ response = 0.0303343362;
+ goto D87;
+
+D87:
+
+tnscore = tnscore + response;
+
+ /* Tree 89 of 200 */
+N88_1:
+ if attribute(catid) in (100300011, 100300058, 100200034, 100300165, 100300093, 100300102, 100300032, 100300027, 100300121, 100300019, 100300073, 100400037, 100200193, 100300127, 100300076, 100200232, 100300214) then goto N88_2;
+ else goto N88_3;
+
+N88_2:
+ if attribute(catid) in (100300011, 100300058, 100200034, 100300093, 100300102, 100300032, 100200193, 100300076, 100200232, 100300214) then goto T88_1;
+ else goto T88_2;
+
+T88_1:
+ response = -0.0146956932;
+ goto D88;
+
+T88_2:
+ response = -0.0021492979;
+ goto D88;
+
+N88_3:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300077, 100300166, 100200186, 100400141, 100200052, 100200172, 100200068, 100300116, 100300004, 100400142, 100200054, 100400038, 100200192, 100300065, 100300122, 100400079, 100200170, 100300169, 100400080, 100200087, 100200185, 100300006) then goto N88_4;
+ else goto N88_7;
+
+N88_4:
+ if attribute(catid) in (100200130, 100300166, 100200052, 100200172, 100200068, 100300004, 100400142, 100400038, 100300065, 100400079, 100400080, 100200087) then goto N88_5;
+ else goto N88_6;
+
+N88_5:
+ if attribute(catid) in (100200130, 100200172, 100200068, 100400142, 100400079, 100400080) then goto T88_3;
+ else goto T88_4;
+
+T88_3:
+ response = 0.0060613479;
+ goto D88;
+
+T88_4:
+ response = 0.0086309145;
+ goto D88;
+
+N88_6:
+ if attribute(catid) in (0, 100200186, 100400141, 100300116, 100200054, 100300122, 100300169) then goto T88_5;
+ else goto T88_6;
+
+T88_5:
+ response = 0.0111211317;
+ goto D88;
+
+T88_6:
+ response = 0.0167951946;
+ goto D88;
+
+N88_7:
+ if attribute(catid) in (100300143, 100300005, 100200234, 100200053, 100300126, 100300212, 100300074, 100300066, 100300200, 100200028, 100200067) then goto N88_8;
+ else goto T88_9;
+
+N88_8:
+ if attribute(catid) in (100200053, 100300126, 100300074, 100300066, 100200028) then goto T88_7;
+ else goto T88_8;
+
+T88_7:
+ response = 0.0204993878;
+ goto D88;
+
+T88_8:
+ response = 0.0274572562;
+ goto D88;
+
+T88_9:
+ response = 0.0495878122;
+ goto D88;
+
+D88:
+
+tnscore = tnscore + response;
+
+ /* Tree 90 of 200 */
+N89_1:
+ if attribute(catid) in (100300011, 100300014, 100300058, 100300166, 100300143, 100200034, 100300165, 100200052, 100300093, 100200172, 100300032, 100300116, 100200053, 100300019, 100400142, 100400037, 100400038, 100200192, 100300209, 100300122, 100300127, 100400079, 100300074, 100200176, 100300200, 100200028, 100200232) then goto N89_2;
+ else goto N89_5;
+
+N89_2:
+ if attribute(catid) in (100300011, 100200034, 100300032, 100300019, 100400037, 100300127, 100400079, 100300074, 100200176, 100300200, 100200028, 100200232) then goto N89_3;
+ else goto N89_4;
+
+N89_3:
+ if attribute(catid) in (100300032, 100300019, 100400037, 100300200, 100200028, 100200232) then goto T89_1;
+ else goto T89_2;
+
+T89_1:
+ response = -0.0422634866;
+ goto D89;
+
+T89_2:
+ response = -0.0088083561;
+ goto D89;
+
+N89_4:
+ if attribute(catid) in (100300014, 100300143, 100300165, 100300093, 100200172, 100200192, 100300209, 100300122) then goto T89_3;
+ else goto T89_4;
+
+T89_3:
+ response = -0.0004084485;
+ goto D89;
+
+T89_4:
+ response = 0.0040562959;
+ goto D89;
+
+N89_5:
+ if attribute(catid) in (0, 100200130, 100300077, 100400141, 100300005, 100300027, 100200234, 100300004, 100300126, 100300073, 100200193, 100300065, 100300212, 100200170, 100300169, 100200087, 100300066, 100300045, 100300076) then goto N89_6;
+ else goto N89_8;
+
+N89_6:
+ if attribute(catid) in (100200130, 100400141, 100300005, 100300065, 100300212, 100200170, 100300169, 100300045) then goto T89_5;
+ else goto N89_7;
+
+T89_5:
+ response = 0.0081251015;
+ goto D89;
+
+N89_7:
+ if attribute(catid) in (0, 100200234) then goto T89_6;
+ else goto T89_7;
+
+T89_6:
+ response = 0.0119875946;
+ goto D89;
+
+T89_7:
+ response = 0.0148183346;
+ goto D89;
+
+N89_8:
+ if attribute(catid) in (100200171, 100200186, 100200068, 100300121, 100300006) then goto T89_8;
+ else goto T89_9;
+
+T89_8:
+ response = 0.0204518722;
+ goto D89;
+
+T89_9:
+ response = 0.0367144755;
+ goto D89;
+
+D89:
+
+tnscore = tnscore + response;
+
+ /* Tree 91 of 200 */
+N90_1:
+ if attribute(catid) in (100300058, 100300013, 100200034, 100300165, 100300093, 100300005, 100200172, 100300027, 100200053, 100300019, 100300126, 100300073, 100300212, 100300209, 100300169, 100400080, 100200087, 100300007, 100300045, 100200067, 100200055, 100200232, 100300214) then goto N90_2;
+ else goto N90_5;
+
+N90_2:
+ if attribute(catid) in (100300013, 100200034, 100300005, 100300027, 100300019, 100300212, 100200067, 100200055, 100200232, 100300214) then goto N90_3;
+ else goto N90_4;
+
+N90_3:
+ if attribute(catid) in (100300013, 100300005, 100300019, 100300212, 100200055, 100200232, 100300214) then goto T90_1;
+ else goto T90_2;
+
+T90_1:
+ response = -0.0410131690;
+ goto D90;
+
+T90_2:
+ response = -0.0176924609;
+ goto D90;
+
+N90_4:
+ if attribute(catid) in (100300058, 100300165, 100300093, 100200053, 100300126, 100300073, 100300209, 100300007, 100300045) then goto T90_3;
+ else goto T90_4;
+
+T90_3:
+ response = -0.0030372433;
+ goto D90;
+
+T90_4:
+ response = 0.0030417758;
+ goto D90;
+
+N90_5:
+ if attribute(catid) in (0, 100200171, 100300077, 100300166, 100200186, 100400141, 100200052, 100200068, 100300116, 100300121, 100200054, 100200192, 100300065, 100300122, 100300127, 100200170, 100300066, 100200028, 100300076, 100300006) then goto N90_6;
+ else goto N90_8;
+
+N90_6:
+ if attribute(catid) in (0, 100300077, 100300166, 100200186, 100200052, 100300121, 100300065, 100300122, 100300066, 100300076, 100300006) then goto N90_7;
+ else goto T90_7;
+
+N90_7:
+ if attribute(catid) in (100300077, 100300166, 100300121, 100300122, 100300066, 100300076) then goto T90_5;
+ else goto T90_6;
+
+T90_5:
+ response = 0.0066574572;
+ goto D90;
+
+T90_6:
+ response = 0.0093092556;
+ goto D90;
+
+T90_7:
+ response = 0.0138817683;
+ goto D90;
+
+N90_8:
+ if attribute(catid) in (100300011, 100300102, 100300032, 100200234, 100300004, 100400142, 100400079, 100300074, 100200176, 100300200) then goto T90_8;
+ else goto N90_9;
+
+T90_8:
+ response = 0.0211738270;
+ goto D90;
+
+N90_9:
+ if attribute(catid) in (100200130, 100300143, 100200193, 100400038) then goto T90_9;
+ else goto T90_10;
+
+T90_9:
+ response = 0.0270451990;
+ goto D90;
+
+T90_10:
+ response = 0.0447659217;
+ goto D90;
+
+D90:
+
+tnscore = tnscore + response;
+
+ /* Tree 92 of 200 */
+N91_1:
+ if attribute(catid) in (100300014, 100300013, 100300008, 100200068, 100200234, 100300004, 100200193, 100300212, 100300209, 100400080, 100200087, 100200176, 100200028, 100200067, 100200055, 100200185, 100300006, 100200232) then goto N91_2;
+ else goto N91_4;
+
+N91_2:
+ if attribute(catid) in (100300013, 100300008, 100200068, 100300212, 100300209, 100200028, 100200067, 100200055, 100200185) then goto N91_3;
+ else goto T91_3;
+
+N91_3:
+ if attribute(catid) in (100300013, 100300008, 100300209, 100200028, 100200067, 100200055) then goto T91_1;
+ else goto T91_2;
+
+T91_1:
+ response = -0.0410751689;
+ goto D91;
+
+T91_2:
+ response = -0.0211139959;
+ goto D91;
+
+T91_3:
+ response = -0.0060525832;
+ goto D91;
+
+N91_4:
+ if attribute(catid) in (100200171, 100200130, 100300077, 100300143, 100400141, 100200052, 100300102, 100200172, 100300073, 100400037, 100300122, 100300074, 100300200, 100300045) then goto N91_5;
+ else goto N91_6;
+
+N91_5:
+ if attribute(catid) in (100200171, 100300077, 100300143, 100200052, 100300102, 100200172, 100400037, 100300045) then goto T91_4;
+ else goto T91_5;
+
+T91_4:
+ response = 0.0022476773;
+ goto D91;
+
+T91_5:
+ response = 0.0072496833;
+ goto D91;
+
+N91_6:
+ if attribute(catid) in (0, 100300011, 100300166, 100200186, 100300165, 100300005, 100300027, 100300116, 100200053, 100300126, 100300127, 100400079, 100200170, 100300169, 100300066, 100300076) then goto N91_7;
+ else goto N91_9;
+
+N91_7:
+ if attribute(catid) in (0, 100300166, 100300165, 100300005, 100200053, 100400079, 100300169) then goto N91_8;
+ else goto T91_8;
+
+N91_8:
+ if attribute(catid) in (100300166, 100300165, 100300005, 100200053, 100400079, 100300169) then goto T91_6;
+ else goto T91_7;
+
+T91_6:
+ response = 0.0108065489;
+ goto D91;
+
+T91_7:
+ response = 0.0121894583;
+ goto D91;
+
+T91_8:
+ response = 0.0150333423;
+ goto D91;
+
+N91_9:
+ if attribute(catid) in (100200034, 100300032, 100400142, 100400038, 100200192, 100300065) then goto T91_9;
+ else goto T91_10;
+
+T91_9:
+ response = 0.0204411972;
+ goto D91;
+
+T91_10:
+ response = 0.0355402269;
+ goto D91;
+
+D91:
+
+tnscore = tnscore + response;
+
+ /* Tree 93 of 200 */
+N92_1:
+ if attribute(catid) in (100300058, 100300143, 100200034, 100200186, 100400141, 100300093, 100300102, 100300027, 100300004, 100200054, 100200193, 100400038, 100300065, 100300212, 100300209, 100300007, 100200028, 100200067, 100200185, 100300146) then goto N92_2;
+ else goto N92_3;
+
+N92_2:
+ if attribute(catid) in (100300058, 100400038, 100300065, 100300209, 100200028, 100200067, 100200185) then goto T92_1;
+ else goto T92_2;
+
+T92_1:
+ response = -0.0271979436;
+ goto D92;
+
+T92_2:
+ response = -0.0038611614;
+ goto D92;
+
+N92_3:
+ if attribute(catid) in (0, 100300014, 100200052, 100200172, 100400142, 100200192, 100300122, 100300127, 100400079, 100200170, 100300074, 100300076) then goto N92_4;
+ else goto N92_5;
+
+N92_4:
+ if attribute(catid) in (0, 100200172, 100400142, 100200192, 100300122, 100200170, 100300076) then goto T92_3;
+ else goto T92_4;
+
+T92_3:
+ response = 0.0073159372;
+ goto D92;
+
+T92_4:
+ response = 0.0115841741;
+ goto D92;
+
+N92_5:
+ if attribute(catid) in (100200171, 100200130, 100300013, 100300166, 100300008, 100200068, 100300116, 100300121, 100200053, 100300126, 100300073, 100300169, 100400080, 100200087, 100300066, 100200176) then goto N92_6;
+ else goto T92_7;
+
+N92_6:
+ if attribute(catid) in (100200130, 100300013, 100200068, 100300121, 100200053, 100300073, 100300169) then goto T92_5;
+ else goto T92_6;
+
+T92_5:
+ response = 0.0135762410;
+ goto D92;
+
+T92_6:
+ response = 0.0164515309;
+ goto D92;
+
+T92_7:
+ response = 0.0222390758;
+ goto D92;
+
+D92:
+
+tnscore = tnscore + response;
+
+ /* Tree 94 of 200 */
+N93_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300166, 100300143, 100400141, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300027, 100300116, 100200234, 100200053, 100400142, 100200054, 100300073, 100200193, 100400038, 100200192, 100300209, 100300122, 100200170, 100200087, 100300074, 100300007, 100200176, 100300200, 100300045, 100200028, 100300076, 100200067, 100300006, 100300214) then goto N93_2;
+ else goto N93_6;
+
+N93_2:
+ if attribute(catid) in (100300011, 100300166, 100300143, 100300008, 100400142, 100300073, 100200193, 100300209, 100300007, 100200176, 100300214) then goto N93_3;
+ else goto N93_4;
+
+N93_3:
+ if attribute(catid) in (100300143, 100200193, 100300007, 100200176, 100300214) then goto T93_1;
+ else goto T93_2;
+
+T93_1:
+ response = -0.0297063352;
+ goto D93;
+
+T93_2:
+ response = -0.0033584809;
+ goto D93;
+
+N93_4:
+ if attribute(catid) in (0, 100200130, 100400141, 100200052, 100200172, 100300027, 100300116, 100200234, 100200053, 100200054, 100400038, 100200192, 100300122, 100200170, 100300076, 100300006) then goto N93_5;
+ else goto T93_5;
+
+N93_5:
+ if attribute(catid) in (100200130, 100200052, 100200172, 100300027, 100300116, 100200234, 100200053, 100200054, 100400038, 100200192, 100200170, 100300006) then goto T93_3;
+ else goto T93_4;
+
+T93_3:
+ response = 0.0038378464;
+ goto D93;
+
+T93_4:
+ response = 0.0065133022;
+ goto D93;
+
+T93_5:
+ response = 0.0116394129;
+ goto D93;
+
+N93_6:
+ if attribute(catid) in (100300014, 100300077, 100200034, 100200186, 100300165, 100300121, 100300004, 100300126, 100300065, 100300127, 100300169, 100400080, 100300066, 100200185, 100200232) then goto N93_7;
+ else goto T93_8;
+
+N93_7:
+ if attribute(catid) in (100300014, 100300077, 100300004, 100300127, 100300169, 100300066, 100200185, 100200232) then goto T93_6;
+ else goto T93_7;
+
+T93_6:
+ response = 0.0160520754;
+ goto D93;
+
+T93_7:
+ response = 0.0218263304;
+ goto D93;
+
+T93_8:
+ response = 0.0403414109;
+ goto D93;
+
+D93:
+
+tnscore = tnscore + response;
+
+ /* Tree 95 of 200 */
+N94_1:
+ if attribute(catid) in (100300077, 100300143, 100200034, 100300102, 100300005, 100300008, 100300116, 100300121, 100300004, 100300126, 100300073, 100400038, 100300212, 100300209, 100300007, 100200067, 100200055, 100300214, 100300146) then goto N94_2;
+ else goto N94_4;
+
+N94_2:
+ if attribute(catid) in (100300143, 100200034, 100300005, 100300209, 100200067, 100200055, 100300214) then goto T94_1;
+ else goto N94_3;
+
+T94_1:
+ response = -0.0407918257;
+ goto D94;
+
+N94_3:
+ if attribute(catid) in (100300102, 100300008, 100300116, 100300004, 100400038, 100300007, 100300146) then goto T94_2;
+ else goto T94_3;
+
+T94_2:
+ response = -0.0072483912;
+ goto D94;
+
+T94_3:
+ response = 0.0008912521;
+ goto D94;
+
+N94_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300166, 100200186, 100400141, 100200052, 100300093, 100200172, 100200068, 100300032, 100300027, 100200053, 100400142, 100200192, 100300065, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100200176, 100200028, 100300076) then goto N94_5;
+ else goto N94_8;
+
+N94_5:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300166, 100200186, 100300093, 100200172, 100200053, 100300122, 100300127, 100400079, 100300169, 100200176, 100200028) then goto N94_6;
+ else goto N94_7;
+
+N94_6:
+ if attribute(catid) in (100200171, 100300166, 100200186, 100300093, 100200053, 100300127, 100300169, 100200176, 100200028) then goto T94_4;
+ else goto T94_5;
+
+T94_4:
+ response = 0.0062732454;
+ goto D94;
+
+T94_5:
+ response = 0.0100875564;
+ goto D94;
+
+N94_7:
+ if attribute(catid) in (100200068, 100400142, 100300065, 100400080, 100300074) then goto T94_6;
+ else goto T94_7;
+
+T94_6:
+ response = 0.0120096679;
+ goto D94;
+
+T94_7:
+ response = 0.0160208786;
+ goto D94;
+
+N94_8:
+ if attribute(catid) in (100300014, 100300165, 100200234, 100200054, 100200193, 100200087, 100300200, 100200185) then goto T94_8;
+ else goto T94_9;
+
+T94_8:
+ response = 0.0234816349;
+ goto D94;
+
+T94_9:
+ response = 0.0499780329;
+ goto D94;
+
+D94:
+
+tnscore = tnscore + response;
+
+ /* Tree 96 of 200 */
+N95_1:
+ if attribute(catid) in (100300014, 100400141, 100300093, 100300102, 100300008, 100200068, 100300116, 100200234, 100200053, 100300019, 100300004, 100400142, 100400038, 100300127, 100200087, 100300074, 100200176, 100300200, 100300045, 100200028, 100200067, 100300006, 100200232, 100300214) then goto N95_2;
+ else goto N95_4;
+
+N95_2:
+ if attribute(catid) in (100300102, 100300008, 100200068, 100300019, 100300004, 100400038, 100300127, 100200176, 100300200, 100300045, 100200067, 100300006, 100200232, 100300214) then goto T95_1;
+ else goto N95_3;
+
+T95_1:
+ response = -0.0139227136;
+ goto D95;
+
+N95_3:
+ if attribute(catid) in (100400142, 100300074) then goto T95_2;
+ else goto T95_3;
+
+T95_2:
+ response = -0.0037315997;
+ goto D95;
+
+T95_3:
+ response = 0.0024425812;
+ goto D95;
+
+N95_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300077, 100300166, 100300165, 100200052, 100300073, 100300065, 100300212, 100400079, 100300169, 100300007) then goto N95_5;
+ else goto N95_6;
+
+N95_5:
+ if attribute(catid) in (100200171, 100200130, 100300077, 100300166, 100300165, 100200052, 100300212, 100400079) then goto T95_4;
+ else goto T95_5;
+
+T95_4:
+ response = 0.0062742131;
+ goto D95;
+
+T95_5:
+ response = 0.0107218180;
+ goto D95;
+
+N95_6:
+ if attribute(catid) in (100300058, 100300013, 100300143, 100200034, 100200186, 100300005, 100200172, 100300027, 100300121, 100200192, 100300209, 100300122, 100200170, 100400080, 100300066) then goto T95_6;
+ else goto T95_7;
+
+T95_6:
+ response = 0.0153086152;
+ goto D95;
+
+T95_7:
+ response = 0.0249190643;
+ goto D95;
+
+D95:
+
+tnscore = tnscore + response;
+
+ /* Tree 97 of 200 */
+N96_1:
+ if attribute(catid) in (100300143, 100200186, 100200052, 100300093, 100300102, 100300008, 100300121, 100200234, 100200053, 100300019, 100300004, 100400038, 100300065, 100300212, 100200087, 100300200, 100200028, 100200067, 100200055, 100200185, 100300006, 100300214, 100300146) then goto N96_2;
+ else goto N96_3;
+
+N96_2:
+ if attribute(catid) in (100300143, 100300102, 100300008, 100300121, 100300019, 100300004, 100400038, 100200067, 100200055, 100300006, 100300214, 100300146) then goto T96_1;
+ else goto T96_2;
+
+T96_1:
+ response = -0.0160264772;
+ goto D96;
+
+T96_2:
+ response = -0.0023817409;
+ goto D96;
+
+N96_3:
+ if attribute(catid) in (0, 100300013, 100300166, 100200034, 100300032, 100400142, 100300073, 100200193, 100200192, 100400079, 100200170, 100300066) then goto N96_4;
+ else goto N96_5;
+
+N96_4:
+ if attribute(catid) in (0, 100300013, 100300073, 100200192, 100400079, 100300066) then goto T96_3;
+ else goto T96_4;
+
+T96_3:
+ response = 0.0078781947;
+ goto D96;
+
+T96_4:
+ response = 0.0111361463;
+ goto D96;
+
+N96_5:
+ if attribute(catid) in (100200171, 100300011, 100200130, 100300014, 100300077, 100300165, 100200172, 100200068, 100300027, 100300126, 100200054, 100300209, 100300122, 100300169, 100400080, 100300074, 100200176) then goto N96_6;
+ else goto T96_7;
+
+N96_6:
+ if attribute(catid) in (100300011, 100200130, 100300014, 100300077, 100300165, 100200054, 100300209, 100300122, 100300169, 100200176) then goto T96_5;
+ else goto T96_6;
+
+T96_5:
+ response = 0.0143761703;
+ goto D96;
+
+T96_6:
+ response = 0.0180005310;
+ goto D96;
+
+T96_7:
+ response = 0.0280056529;
+ goto D96;
+
+D96:
+
+tnscore = tnscore + response;
+
+ /* Tree 98 of 200 */
+N97_1:
+ if attribute(catid) in (100300011, 100300013, 100300166, 100200034, 100200052, 100300008, 100200068, 100300116, 100300121, 100300019, 100200193, 100200192, 100300065, 100300209, 100300127, 100300074, 100200176, 100300200, 100300045, 100300076, 100200067, 100200055, 100300214, 100300146) then goto N97_2;
+ else goto N97_3;
+
+N97_2:
+ if attribute(catid) in (100300011, 100200034, 100200068, 100300019, 100300209, 100200176, 100300200, 100300045, 100300076, 100200067, 100200055, 100300214, 100300146) then goto T97_1;
+ else goto T97_2;
+
+T97_1:
+ response = -0.0143446006;
+ goto D97;
+
+T97_2:
+ response = -0.0005591090;
+ goto D97;
+
+N97_3:
+ if attribute(catid) in (0, 100200171, 100300014, 100300077, 100300143, 100200186, 100400141, 100300165, 100300102, 100300005, 100200172, 100300027, 100300004, 100300126, 100400142, 100200054, 100300073, 100400038, 100400079, 100200170, 100300169, 100400080, 100300066, 100200028, 100200185, 100300006) then goto N97_4;
+ else goto N97_6;
+
+N97_4:
+ if attribute(catid) in (0, 100200171, 100200186, 100400141, 100300165, 100300102, 100300005, 100200172, 100300027, 100200054, 100300073, 100400038, 100300169, 100400080, 100300066, 100200028) then goto N97_5;
+ else goto T97_5;
+
+N97_5:
+ if attribute(catid) in (100200186, 100400141, 100300005, 100200172, 100200054, 100300073, 100300169, 100300066) then goto T97_3;
+ else goto T97_4;
+
+T97_3:
+ response = 0.0063901469;
+ goto D97;
+
+T97_4:
+ response = 0.0098655154;
+ goto D97;
+
+T97_5:
+ response = 0.0150886566;
+ goto D97;
+
+N97_6:
+ if attribute(catid) in (100200130, 100300058, 100300093, 100300032, 100300122) then goto T97_6;
+ else goto T97_7;
+
+T97_6:
+ response = 0.0218922437;
+ goto D97;
+
+T97_7:
+ response = 0.0331326520;
+ goto D97;
+
+D97:
+
+tnscore = tnscore + response;
+
+ /* Tree 99 of 200 */
+N98_1:
+ if attribute(catid) in (100200171, 100300011, 100300013, 100200034, 100200186, 100400141, 100200052, 100300093, 100300102, 100200172, 100300008, 100300032, 100300027, 100300116, 100200054, 100300073, 100400037, 100200193, 100200170, 100300066, 100300007, 100300200, 100300045, 100300076, 100200055) then goto N98_2;
+ else goto N98_3;
+
+N98_2:
+ if attribute(catid) in (100300011, 100200186, 100300102, 100300008, 100400037, 100200193, 100200170, 100300007, 100300200, 100200055) then goto T98_1;
+ else goto T98_2;
+
+T98_1:
+ response = -0.0100694371;
+ goto D98;
+
+T98_2:
+ response = -0.0013265371;
+ goto D98;
+
+N98_3:
+ if attribute(catid) in (0, 100200130, 100300014, 100300058, 100300166, 100300165, 100300005, 100300121, 100200234, 100300004, 100300126, 100400142, 100200192, 100300065, 100300212, 100300122, 100300127, 100400079, 100300169, 100300074, 100200028, 100200067, 100300006) then goto N98_4;
+ else goto N98_5;
+
+N98_4:
+ if attribute(catid) in (0, 100300058, 100300005, 100300121, 100200234, 100300065, 100300212, 100300127) then goto T98_3;
+ else goto T98_4;
+
+T98_3:
+ response = 0.0083645817;
+ goto D98;
+
+T98_4:
+ response = 0.0130866864;
+ goto D98;
+
+N98_5:
+ if attribute(catid) in (100300077, 100200068, 100200053, 100400080, 100200087, 100200176) then goto T98_5;
+ else goto T98_6;
+
+T98_5:
+ response = 0.0197130039;
+ goto D98;
+
+T98_6:
+ response = 0.0369209199;
+ goto D98;
+
+D98:
+
+tnscore = tnscore + response;
+
+ /* Tree 100 of 200 */
+N99_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300077, 100300166, 100300143, 100200186, 100400141, 100200052, 100300093, 100300102, 100300005, 100300027, 100200234, 100200053, 100300004, 100200054, 100300073, 100400037, 100200193, 100200192, 100300212, 100300209, 100300122, 100300127, 100400080, 100200087, 100300074, 100300007, 100300045, 100300076, 100200067, 100200055, 100200185, 100300006, 100300214) then goto N99_2;
+ else goto N99_6;
+
+N99_2:
+ if attribute(catid) in (100300011, 100300143, 100200052, 100300093, 100200054, 100200193, 100300209, 100300074, 100200067, 100200055, 100200185, 100300006, 100300214) then goto N99_3;
+ else goto N99_4;
+
+N99_3:
+ if attribute(catid) in (100300143, 100300093, 100200193, 100300209, 100300074, 100200055, 100200185, 100300214) then goto T99_1;
+ else goto T99_2;
+
+T99_1:
+ response = -0.0161446372;
+ goto D99;
+
+T99_2:
+ response = -0.0058395053;
+ goto D99;
+
+N99_4:
+ if attribute(catid) in (0, 100300014, 100200186, 100400141, 100300102, 100200234, 100300004, 100300073, 100400037, 100300212, 100300122, 100300127, 100300045, 100300076) then goto N99_5;
+ else goto T99_5;
+
+N99_5:
+ if attribute(catid) in (100300014, 100200186, 100400141, 100300102, 100200234, 100300004, 100300073, 100400037, 100300212, 100300045) then goto T99_3;
+ else goto T99_4;
+
+T99_3:
+ response = 0.0018639770;
+ goto D99;
+
+T99_4:
+ response = 0.0041634423;
+ goto D99;
+
+T99_5:
+ response = 0.0073459177;
+ goto D99;
+
+N99_6:
+ if attribute(catid) in (100200130, 100300058, 100200068, 100300032, 100300116, 100300121, 100300126, 100400142, 100300065, 100400079, 100200170, 100300169, 100300066) then goto T99_6;
+ else goto N99_7;
+
+T99_6:
+ response = 0.0148443276;
+ goto D99;
+
+N99_7:
+ if attribute(catid) in (100200034, 100300165, 100200172) then goto T99_7;
+ else goto T99_8;
+
+T99_7:
+ response = 0.0223682677;
+ goto D99;
+
+T99_8:
+ response = 0.0294343337;
+ goto D99;
+
+D99:
+
+tnscore = tnscore + response;
+
+ /* Tree 101 of 200 */
+N100_1:
+ if attribute(catid) in (100300011, 100300143, 100300102, 100200234, 100300019, 100400037, 100300074, 100300066, 100300045, 100200067, 100200055, 100200185, 100300006, 100300214) then goto N100_2;
+ else goto N100_3;
+
+N100_2:
+ if attribute(catid) in (100300143, 100200234, 100300019, 100200055, 100300214) then goto T100_1;
+ else goto T100_2;
+
+T100_1:
+ response = -0.0247867880;
+ goto D100;
+
+T100_2:
+ response = -0.0075505833;
+ goto D100;
+
+N100_3:
+ if attribute(catid) in (0, 100200130, 100300014, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100200172, 100200068, 100300027, 100300116, 100200053, 100400142, 100200054, 100300073, 100400038, 100200192, 100300065, 100300122, 100400079, 100300169, 100400080, 100200087, 100300007, 100300076) then goto N100_4;
+ else goto N100_7;
+
+N100_4:
+ if attribute(catid) in (0, 100200130, 100300077, 100300166, 100200034, 100300165, 100200052, 100300093, 100300027, 100300116, 100200053, 100300073, 100400079, 100400080, 100200087, 100300076) then goto N100_5;
+ else goto N100_6;
+
+N100_5:
+ if attribute(catid) in (100200130, 100300077, 100300093, 100300027, 100200053, 100400079, 100200087, 100300076) then goto T100_3;
+ else goto T100_4;
+
+T100_3:
+ response = 0.0019910708;
+ goto D100;
+
+T100_4:
+ response = 0.0076733873;
+ goto D100;
+
+N100_6:
+ if attribute(catid) in (100300014, 100400141, 100200172, 100400142, 100300065, 100300122, 100300007) then goto T100_5;
+ else goto T100_6;
+
+T100_5:
+ response = 0.0110065874;
+ goto D100;
+
+T100_6:
+ response = 0.0140312744;
+ goto D100;
+
+N100_7:
+ if attribute(catid) in (100200171, 100300121, 100300004, 100300126, 100200193, 100300209, 100300127, 100200170, 100200176, 100200028) then goto T100_7;
+ else goto T100_8;
+
+T100_7:
+ response = 0.0200792046;
+ goto D100;
+
+T100_8:
+ response = 0.0371851273;
+ goto D100;
+
+D100:
+
+tnscore = tnscore + response;
+
+ /* Tree 102 of 200 */
+N101_1:
+ if attribute(catid) in (100300011, 100300014, 100300077, 100300165, 100300093, 100300005, 100200068, 100300032, 100300121, 100200234, 100300126, 100200054, 100400037, 100200193, 100400038, 100300122, 100200087, 100200176, 100300200, 100300076, 100200067, 100200232, 100300214) then goto N101_2;
+ else goto N101_4;
+
+N101_2:
+ if attribute(catid) in (100300014, 100300165, 100300093, 100200068, 100300032, 100200193, 100200087, 100200176, 100200067, 100200232, 100300214) then goto N101_3;
+ else goto T101_3;
+
+N101_3:
+ if attribute(catid) in (100300014, 100300093, 100300032, 100200176, 100200067, 100200232, 100300214) then goto T101_1;
+ else goto T101_2;
+
+T101_1:
+ response = -0.0238697728;
+ goto D101;
+
+T101_2:
+ response = -0.0068920318;
+ goto D101;
+
+T101_3:
+ response = -0.0008192848;
+ goto D101;
+
+N101_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300166, 100200034, 100200186, 100400141, 100200052, 100200172, 100300027, 100300116, 100200053, 100400142, 100300073, 100200192, 100300212, 100300127, 100200170, 100300074, 100300066, 100300007, 100200028) then goto N101_5;
+ else goto N101_7;
+
+N101_5:
+ if attribute(catid) in (100200130, 100400141, 100200052, 100300027, 100400142, 100200192, 100300127, 100200170, 100300074, 100300066) then goto T101_4;
+ else goto N101_6;
+
+T101_4:
+ response = 0.0063630817;
+ goto D101;
+
+N101_6:
+ if attribute(catid) in (100200171, 100300166, 100200034, 100200186, 100200053, 100300073, 100300007) then goto T101_5;
+ else goto T101_6;
+
+T101_5:
+ response = 0.0102002983;
+ goto D101;
+
+T101_6:
+ response = 0.0117073770;
+ goto D101;
+
+N101_7:
+ if attribute(catid) in (100300008, 100300004, 100300065, 100400079, 100300169, 100400080, 100200185, 100300006) then goto T101_7;
+ else goto T101_8;
+
+T101_7:
+ response = 0.0199785583;
+ goto D101;
+
+T101_8:
+ response = 0.0332135569;
+ goto D101;
+
+D101:
+
+tnscore = tnscore + response;
+
+ /* Tree 103 of 200 */
+N102_1:
+ if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300077, 100300166, 100400141, 100300165, 100300093, 100300005, 100200172, 100200068, 100200053, 100300004, 100400142, 100200192, 100300209, 100300122, 100300127, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100300200, 100300045, 100200028, 100200067, 100200055, 100300006, 100200232) then goto N102_2;
+ else goto N102_6;
+
+N102_2:
+ if attribute(catid) in (100300011, 100300014, 100300004, 100400142, 100300209, 100400080, 100200087, 100300007, 100300200, 100200067, 100200055, 100300006, 100200232) then goto N102_3;
+ else goto N102_4;
+
+N102_3:
+ if attribute(catid) in (100300011, 100300014, 100300004, 100300209, 100400080, 100300007, 100300200, 100200067, 100200055, 100200232) then goto T102_1;
+ else goto T102_2;
+
+T102_1:
+ response = -0.0226068659;
+ goto D102;
+
+T102_2:
+ response = -0.0049292005;
+ goto D102;
+
+N102_4:
+ if attribute(catid) in (0, 100200130, 100300077, 100300165, 100200053, 100300122, 100400079, 100300169, 100300074, 100300066, 100300045, 100200028) then goto N102_5;
+ else goto T102_5;
+
+N102_5:
+ if attribute(catid) in (100200130, 100300077, 100300165, 100400079, 100300066, 100300045, 100200028) then goto T102_3;
+ else goto T102_4;
+
+T102_3:
+ response = 0.0024587834;
+ goto D102;
+
+T102_4:
+ response = 0.0048477347;
+ goto D102;
+
+T102_5:
+ response = 0.0100107901;
+ goto D102;
+
+N102_6:
+ if attribute(catid) in (100200171, 100300058, 100300013, 100200034, 100200186, 100200052, 100300027, 100300116, 100200234, 100300073, 100400037, 100200193, 100400038, 100300065, 100200170, 100300076) then goto N102_7;
+ else goto N102_8;
+
+N102_7:
+ if attribute(catid) in (100300013, 100200186, 100300116, 100200234, 100300073, 100200193, 100400038, 100300065) then goto T102_6;
+ else goto T102_7;
+
+T102_6:
+ response = 0.0134952529;
+ goto D102;
+
+T102_7:
+ response = 0.0171978597;
+ goto D102;
+
+N102_8:
+ if attribute(catid) in (100300143, 100300121, 100200176) then goto T102_8;
+ else goto T102_9;
+
+T102_8:
+ response = 0.0268710783;
+ goto D102;
+
+T102_9:
+ response = 0.0389950015;
+ goto D102;
+
+D102:
+
+tnscore = tnscore + response;
+
+ /* Tree 104 of 200 */
+N103_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300013, 100300077, 100300166, 100200186, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100300008, 100200068, 100300121, 100200053, 100300004, 100300126, 100200054, 100400037, 100200193, 100400038, 100200192, 100300122, 100300127, 100400079, 100200170, 100300169, 100200087, 100300074, 100300007, 100300045, 100200028, 100300076, 100300006, 100200232, 100300214, 100300146) then goto N103_2;
+ else goto N103_7;
+
+N103_2:
+ if attribute(catid) in (100200171, 100300011, 100200130, 100300014, 100300058, 100300013, 100300166, 100200186, 100300165, 100300093, 100300102, 100300005, 100200172, 100300008, 100200068, 100200053, 100300004, 100200054, 100400037, 100400038, 100200192, 100300122, 100400079, 100200170, 100200028, 100300076, 100300006, 100300214, 100300146) then goto N103_3;
+ else goto N103_6;
+
+N103_3:
+ if attribute(catid) in (100300011, 100300058, 100300013, 100300008, 100300004, 100200054, 100400038, 100200170, 100200028, 100300076, 100300214, 100300146) then goto N103_4;
+ else goto N103_5;
+
+N103_4:
+ if attribute(catid) in (100300058, 100300013, 100200054, 100400038, 100300214, 100300146) then goto T103_1;
+ else goto T103_2;
+
+T103_1:
+ response = -0.0127825599;
+ goto D103;
+
+T103_2:
+ response = -0.0040621004;
+ goto D103;
+
+N103_5:
+ if attribute(catid) in (100300165, 100300102, 100300005, 100200068, 100200053, 100400037, 100300122, 100300006) then goto T103_3;
+ else goto T103_4;
+
+T103_3:
+ response = 0.0003831181;
+ goto D103;
+
+T103_4:
+ response = 0.0035617568;
+ goto D103;
+
+N103_6:
+ if attribute(catid) in (100300077, 100300169, 100300074, 100300045) then goto T103_5;
+ else goto T103_6;
+
+T103_5:
+ response = 0.0064465003;
+ goto D103;
+
+T103_6:
+ response = 0.0097160619;
+ goto D103;
+
+N103_7:
+ if attribute(catid) in (100300143, 100400141, 100300032, 100300027, 100300116, 100200234, 100400142, 100300073, 100300065, 100400080, 100300066, 100300200, 100200185) then goto N103_8;
+ else goto T103_9;
+
+N103_8:
+ if attribute(catid) in (100300143, 100300032, 100300116, 100400142, 100400080, 100300066, 100200185) then goto T103_7;
+ else goto T103_8;
+
+T103_7:
+ response = 0.0176560057;
+ goto D103;
+
+T103_8:
+ response = 0.0221678704;
+ goto D103;
+
+T103_9:
+ response = 0.0407063066;
+ goto D103;
+
+D103:
+
+tnscore = tnscore + response;
+
+ /* Tree 105 of 200 */
+N104_1:
+ if attribute(catid) in (100300011, 100300143, 100300102, 100300008, 100200068, 100300116, 100300121, 100200234, 100200054, 100300073, 100400037, 100200193, 100300212, 100300209, 100300066, 100300200, 100200028, 100300076, 100200067, 100300006, 100200232) then goto N104_2;
+ else goto N104_4;
+
+N104_2:
+ if attribute(catid) in (100300011, 100300143, 100300102, 100300116, 100200234, 100200054, 100400037, 100300212, 100300209, 100300066, 100300200, 100200067, 100300006, 100200232) then goto N104_3;
+ else goto T104_3;
+
+N104_3:
+ if attribute(catid) in (100300143, 100300102, 100200054, 100300212, 100300209, 100300200, 100200067, 100200232) then goto T104_1;
+ else goto T104_2;
+
+T104_1:
+ response = -0.0313895030;
+ goto D104;
+
+T104_2:
+ response = -0.0136037814;
+ goto D104;
+
+T104_3:
+ response = -0.0032608717;
+ goto D104;
+
+N104_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300077, 100300166, 100200186, 100400141, 100200052, 100200172, 100300032, 100300027, 100200053, 100300004, 100300126, 100400038, 100200192, 100300065, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200176, 100300045, 100200185) then goto N104_5;
+ else goto N104_7;
+
+N104_5:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300166, 100200052, 100300032, 100200053, 100300126, 100200192, 100300065, 100300122, 100300127, 100400079, 100200176, 100300045) then goto N104_6;
+ else goto T104_6;
+
+N104_6:
+ if attribute(catid) in (100200130, 100300014, 100300166, 100200052, 100200192, 100300065, 100200176) then goto T104_4;
+ else goto T104_5;
+
+T104_4:
+ response = 0.0042937725;
+ goto D104;
+
+T104_5:
+ response = 0.0066960091;
+ goto D104;
+
+T104_6:
+ response = 0.0115205820;
+ goto D104;
+
+N104_7:
+ if attribute(catid) in (100200034, 100300165, 100300093, 100300005, 100200087) then goto T104_7;
+ else goto T104_8;
+
+T104_7:
+ response = 0.0194805523;
+ goto D104;
+
+T104_8:
+ response = 0.0346934783;
+ goto D104;
+
+D104:
+
+tnscore = tnscore + response;
+
+ /* Tree 106 of 200 */
+N105_1:
+ if attribute(catid) in (100300013, 100300077, 100300143, 100200186, 100300008, 100300027, 100300116, 100300019, 100300004, 100300126, 100300073, 100400037, 100200192, 100300209, 100300122, 100200170, 100300169, 100300066, 100300007, 100300076, 100200055, 100300146) then goto N105_2;
+ else goto N105_4;
+
+N105_2:
+ if attribute(catid) in (100300143, 100300008, 100300019, 100400037, 100300122, 100200055, 100300146) then goto T105_1;
+ else goto N105_3;
+
+T105_1:
+ response = -0.0346820188;
+ goto D105;
+
+N105_3:
+ if attribute(catid) in (100300013, 100300077, 100300027, 100300126, 100300169, 100300007, 100300076) then goto T105_2;
+ else goto T105_3;
+
+T105_2:
+ response = -0.0068202013;
+ goto D105;
+
+T105_3:
+ response = -0.0006852405;
+ goto D105;
+
+N105_4:
+ if attribute(catid) in (0, 100200171, 100300011, 100300058, 100300166, 100400141, 100300165, 100200172, 100200068, 100300032, 100300121, 100200234, 100200053, 100200054, 100300127, 100400079, 100400080, 100200087, 100300074, 100300045, 100200185) then goto N105_5;
+ else goto N105_7;
+
+N105_5:
+ if attribute(catid) in (0, 100200171, 100300058, 100300166, 100400141, 100300121, 100200234, 100200054, 100400079, 100200087, 100300045) then goto N105_6;
+ else goto T105_6;
+
+N105_6:
+ if attribute(catid) in (100300058, 100300166, 100300121, 100200234, 100400079, 100200087) then goto T105_4;
+ else goto T105_5;
+
+T105_4:
+ response = 0.0054906177;
+ goto D105;
+
+T105_5:
+ response = 0.0089382511;
+ goto D105;
+
+T105_6:
+ response = 0.0137638882;
+ goto D105;
+
+N105_7:
+ if attribute(catid) in (100200130, 100200052, 100300093, 100400142, 100200193, 100400038, 100300065, 100200028) then goto T105_7;
+ else goto T105_8;
+
+T105_7:
+ response = 0.0231575087;
+ goto D105;
+
+T105_8:
+ response = 0.0350257823;
+ goto D105;
+
+D105:
+
+tnscore = tnscore + response;
+
+ /* Tree 107 of 200 */
+N106_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300013, 100300166, 100300143, 100200186, 100400141, 100300102, 100200172, 100300008, 100300032, 100300027, 100300121, 100200234, 100300019, 100300004, 100400142, 100300073, 100400037, 100200193, 100200192, 100300212, 100300209, 100300122, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100300045, 100300076, 100200055, 100300006, 100300146) then goto N106_2;
+ else goto N106_7;
+
+N106_2:
+ if attribute(catid) in (100200186, 100300102, 100300008, 100300032, 100200234, 100200193, 100300212, 100300209, 100300122, 100200087, 100300076, 100200055, 100300146) then goto N106_3;
+ else goto N106_4;
+
+N106_3:
+ if attribute(catid) in (100300102, 100200234, 100300212, 100300209, 100300076, 100200055) then goto T106_1;
+ else goto T106_2;
+
+T106_1:
+ response = -0.0272486986;
+ goto D106;
+
+T106_2:
+ response = -0.0121921962;
+ goto D106;
+
+N106_4:
+ if attribute(catid) in (100200171, 100200130, 100300058, 100300143, 100400141, 100300121, 100400142, 100400037, 100200192, 100400080, 100300074, 100300045, 100300006) then goto N106_5;
+ else goto N106_6;
+
+N106_5:
+ if attribute(catid) in (100200171, 100200192, 100400080, 100300074, 100300045, 100300006) then goto T106_3;
+ else goto T106_4;
+
+T106_3:
+ response = -0.0003106606;
+ goto D106;
+
+T106_4:
+ response = 0.0031181748;
+ goto D106;
+
+N106_6:
+ if attribute(catid) in (0, 100300013, 100200172, 100300027, 100400079, 100300066) then goto T106_5;
+ else goto T106_6;
+
+T106_5:
+ response = 0.0075863782;
+ goto D106;
+
+T106_6:
+ response = 0.0110589536;
+ goto D106;
+
+N106_7:
+ if attribute(catid) in (100300011, 100300077, 100200034, 100300165, 100200052, 100300093, 100300005, 100200068, 100300126, 100200054, 100300065, 100300127, 100200170, 100200185, 100200232) then goto N106_8;
+ else goto T106_9;
+
+N106_8:
+ if attribute(catid) in (100300165, 100200052, 100300093, 100300126, 100200170, 100200185) then goto T106_7;
+ else goto T106_8;
+
+T106_7:
+ response = 0.0158814592;
+ goto D106;
+
+T106_8:
+ response = 0.0213240490;
+ goto D106;
+
+T106_9:
+ response = 0.0395357198;
+ goto D106;
+
+D106:
+
+tnscore = tnscore + response;
+
+ /* Tree 108 of 200 */
+N107_1:
+ if attribute(catid) in (100300058, 100300013, 100300077, 100300166, 100200034, 100300165, 100200052, 100300093, 100300005, 100200172, 100300008, 100200068, 100300019, 100300004, 100200054, 100400038, 100300212, 100400079, 100200170, 100300074, 100300066, 100200176, 100300200, 100200028, 100300076, 100200055, 100200232, 100300146) then goto N107_2;
+ else goto N107_4;
+
+N107_2:
+ if attribute(catid) in (100300013, 100300008, 100200068, 100300019, 100400038, 100200055, 100200232, 100300146) then goto T107_1;
+ else goto N107_3;
+
+T107_1:
+ response = -0.0281903745;
+ goto D107;
+
+N107_3:
+ if attribute(catid) in (100300058, 100200034, 100300165, 100200052, 100300005, 100300004, 100300212, 100400079, 100200170, 100300074, 100200176, 100300200, 100200028) then goto T107_2;
+ else goto T107_3;
+
+T107_2:
+ response = -0.0038124936;
+ goto D107;
+
+T107_3:
+ response = 0.0031637671;
+ goto D107;
+
+N107_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300143, 100200186, 100400141, 100300027, 100300116, 100200234, 100200053, 100300126, 100400142, 100300073, 100200193, 100200192, 100300065, 100300122, 100300127, 100300169, 100400080, 100300045, 100300006) then goto N107_5;
+ else goto T107_7;
+
+N107_5:
+ if attribute(catid) in (0, 100200171, 100200130, 100300143, 100200186, 100400141, 100300027, 100300116, 100200234, 100300126, 100400142, 100200193, 100200192, 100300122, 100300045, 100300006) then goto N107_6;
+ else goto T107_6;
+
+N107_6:
+ if attribute(catid) in (100200171, 100200130, 100400141, 100300027, 100300116, 100200234, 100400142, 100200193, 100300122, 100300045, 100300006) then goto T107_4;
+ else goto T107_5;
+
+T107_4:
+ response = 0.0097327734;
+ goto D107;
+
+T107_5:
+ response = 0.0117883652;
+ goto D107;
+
+T107_6:
+ response = 0.0163276141;
+ goto D107;
+
+T107_7:
+ response = 0.0378076890;
+ goto D107;
+
+D107:
+
+tnscore = tnscore + response;
+
+ /* Tree 109 of 200 */
+N108_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300013, 100300166, 100300143, 100200186, 100400141, 100300165, 100300093, 100300102, 100300008, 100200068, 100300032, 100300116, 100300121, 100200234, 100200053, 100300126, 100400142, 100300073, 100200193, 100200192, 100300212, 100300127, 100400079, 100400080, 100200087, 100300066, 100300007, 100200176, 100300200, 100300045, 100200028, 100200055, 100200185, 100300006) then goto N108_2;
+ else goto N108_6;
+
+N108_2:
+ if attribute(catid) in (100200171, 100300011, 100300013, 100300102, 100300008, 100200068, 100300032, 100300116, 100300121, 100200234, 100300126, 100300127, 100400079, 100400080, 100200176, 100300045, 100200028, 100200055, 100200185) then goto N108_3;
+ else goto N108_4;
+
+N108_3:
+ if attribute(catid) in (100300011, 100300013, 100300102, 100300008, 100300032, 100300116, 100300121, 100300126, 100300127, 100200176, 100200055, 100200185) then goto T108_1;
+ else goto T108_2;
+
+T108_1:
+ response = -0.0183851919;
+ goto D108;
+
+T108_2:
+ response = -0.0026357282;
+ goto D108;
+
+N108_4:
+ if attribute(catid) in (100200130, 100300166, 100200186, 100400141, 100400142) then goto T108_3;
+ else goto N108_5;
+
+T108_3:
+ response = 0.0029870563;
+ goto D108;
+
+N108_5:
+ if attribute(catid) in (100300143, 100200053, 100300073, 100200193, 100200192, 100200087, 100300066, 100300200) then goto T108_4;
+ else goto T108_5;
+
+T108_4:
+ response = 0.0070772264;
+ goto D108;
+
+T108_5:
+ response = 0.0091929568;
+ goto D108;
+
+N108_6:
+ if attribute(catid) in (100300058, 100300077, 100200052, 100300005, 100200172, 100300027, 100300019, 100300004, 100400037, 100400038, 100300065, 100200170, 100300169, 100300076, 100200067) then goto T108_6;
+ else goto T108_7;
+
+T108_6:
+ response = 0.0192905696;
+ goto D108;
+
+T108_7:
+ response = 0.0373801828;
+ goto D108;
+
+D108:
+
+tnscore = tnscore + response;
+
+ /* Tree 110 of 200 */
+N109_1:
+ if attribute(catid) in (100200171, 100200130, 100300014, 100300013, 100200186, 100300165, 100200052, 100300102, 100300008, 100200068, 100300027, 100300116, 100300004, 100200192, 100300065, 100300212, 100300122, 100300127, 100200170, 100300074, 100200176, 100300045, 100300076, 100200055, 100200185, 100300214) then goto N109_2;
+ else goto N109_4;
+
+N109_2:
+ if attribute(catid) in (100300102, 100300008, 100300212, 100200170, 100300074, 100200055, 100300214) then goto T109_1;
+ else goto N109_3;
+
+T109_1:
+ response = -0.0234366968;
+ goto D109;
+
+N109_3:
+ if attribute(catid) in (100300014, 100300013, 100200186, 100300165, 100200068, 100300116, 100300127, 100300076, 100200185) then goto T109_2;
+ else goto T109_3;
+
+T109_2:
+ response = -0.0041768475;
+ goto D109;
+
+T109_3:
+ response = 0.0012159251;
+ goto D109;
+
+N109_4:
+ if attribute(catid) in (0, 100300011, 100300058, 100300077, 100200034, 100400141, 100200172, 100300032, 100300121, 100200234, 100300126, 100400142, 100300073, 100400037, 100400079, 100300169, 100200087, 100300200, 100200028, 100300006) then goto N109_5;
+ else goto N109_7;
+
+N109_5:
+ if attribute(catid) in (0, 100300077, 100200034, 100200172, 100200234, 100300073, 100400037, 100300169, 100300200, 100200028) then goto N109_6;
+ else goto T109_6;
+
+N109_6:
+ if attribute(catid) in (100300077, 100200172, 100200234, 100400037, 100300169, 100300200) then goto T109_4;
+ else goto T109_5;
+
+T109_4:
+ response = 0.0073379486;
+ goto D109;
+
+T109_5:
+ response = 0.0101628542;
+ goto D109;
+
+T109_6:
+ response = 0.0145361756;
+ goto D109;
+
+N109_7:
+ if attribute(catid) in (100300166, 100300093, 100300005, 100400080, 100300066, 100200232) then goto T109_7;
+ else goto T109_8;
+
+T109_7:
+ response = 0.0211367281;
+ goto D109;
+
+T109_8:
+ response = 0.0344431588;
+ goto D109;
+
+D109:
+
+tnscore = tnscore + response;
+
+ /* Tree 111 of 200 */
+N110_1:
+ if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300077, 100300143, 100200034, 100400141, 100300165, 100300093, 100200172, 100300032, 100300027, 100300116, 100200053, 100300004, 100300126, 100200054, 100400037, 100200193, 100400079, 100200170, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100300045, 100200067, 100200055, 100200185, 100300006, 100300214, 100300146) then goto N110_2;
+ else goto N110_6;
+
+N110_2:
+ if attribute(catid) in (100300011, 100300014, 100300143, 100300093, 100200053, 100200054, 100400037, 100300007, 100200176, 100200067, 100200055, 100200185, 100300214, 100300146) then goto N110_3;
+ else goto N110_4;
+
+N110_3:
+ if attribute(catid) in (100300011, 100300093, 100400037, 100300007, 100200176, 100200067, 100200055, 100300214, 100300146) then goto T110_1;
+ else goto T110_2;
+
+T110_1:
+ response = -0.0290230654;
+ goto D110;
+
+T110_2:
+ response = -0.0106383395;
+ goto D110;
+
+N110_4:
+ if attribute(catid) in (100200034, 100300165, 100200172, 100300116, 100300126, 100400079, 100200170, 100400080, 100300074, 100300045, 100300006) then goto T110_3;
+ else goto N110_5;
+
+T110_3:
+ response = -0.0013386003;
+ goto D110;
+
+N110_5:
+ if attribute(catid) in (0, 100300027, 100200087, 100300066) then goto T110_4;
+ else goto T110_5;
+
+T110_4:
+ response = 0.0034896642;
+ goto D110;
+
+T110_5:
+ response = 0.0060074974;
+ goto D110;
+
+N110_6:
+ if attribute(catid) in (100200171, 100300058, 100300166, 100200186, 100200052, 100300005, 100200068, 100300121, 100400142, 100300073, 100200192, 100300065, 100300122, 100300127, 100300169, 100300200) then goto N110_7;
+ else goto N110_8;
+
+N110_7:
+ if attribute(catid) in (100200186, 100200052, 100300005, 100400142, 100300073, 100300122, 100300169) then goto T110_6;
+ else goto T110_7;
+
+T110_6:
+ response = 0.0118090198;
+ goto D110;
+
+T110_7:
+ response = 0.0185977150;
+ goto D110;
+
+N110_8:
+ if attribute(catid) in (100300008, 100200234, 100400038) then goto T110_8;
+ else goto T110_9;
+
+T110_8:
+ response = 0.0272065563;
+ goto D110;
+
+T110_9:
+ response = 0.0476478756;
+ goto D110;
+
+D110:
+
+tnscore = tnscore + response;
+
+ /* Tree 112 of 200 */
+N111_1:
+ if attribute(catid) in (100300011, 100300013, 100200034, 100400141, 100200052, 100300005, 100300008, 100200068, 100300027, 100300116, 100200234, 100300019, 100300004, 100400142, 100300073, 100400038, 100300209, 100300127, 100400079, 100200170, 100300066, 100300007, 100200176, 100300045, 100300076, 100200055, 100300214, 100300146) then goto N111_2;
+ else goto N111_5;
+
+N111_2:
+ if attribute(catid) in (100300013, 100300008, 100300019, 100300209, 100200176, 100200055, 100300214, 100300146) then goto T111_1;
+ else goto N111_3;
+
+T111_1:
+ response = -0.0323268404;
+ goto D111;
+
+N111_3:
+ if attribute(catid) in (100300011, 100200052, 100300005, 100200068, 100300004, 100300066, 100300007) then goto T111_2;
+ else goto N111_4;
+
+T111_2:
+ response = -0.0057248097;
+ goto D111;
+
+N111_4:
+ if attribute(catid) in (100200234, 100400142, 100300073, 100400079, 100300076) then goto T111_3;
+ else goto T111_4;
+
+T111_3:
+ response = -0.0010770901;
+ goto D111;
+
+T111_4:
+ response = 0.0026132947;
+ goto D111;
+
+N111_5:
+ if attribute(catid) in (0, 100200130, 100300077, 100300166, 100200186, 100300165, 100200172, 100300121, 100200053, 100300126, 100200193, 100200192, 100300065, 100300122, 100300169, 100400080, 100300074) then goto N111_6;
+ else goto N111_7;
+
+N111_6:
+ if attribute(catid) in (100200130, 100300166, 100200186, 100300165, 100200172, 100200053, 100300126, 100300169, 100300074) then goto T111_5;
+ else goto T111_6;
+
+T111_5:
+ response = 0.0070056177;
+ goto D111;
+
+T111_6:
+ response = 0.0111605097;
+ goto D111;
+
+N111_7:
+ if attribute(catid) in (100200171, 100300014, 100300143, 100200054, 100200087, 100300200, 100200028) then goto T111_7;
+ else goto T111_8;
+
+T111_7:
+ response = 0.0189654625;
+ goto D111;
+
+T111_8:
+ response = 0.0388492541;
+ goto D111;
+
+D111:
+
+tnscore = tnscore + response;
+
+ /* Tree 113 of 200 */
+N112_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300077, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100300027, 100300116, 100300121, 100200053, 100300019, 100400142, 100300073, 100400037, 100200192, 100300065, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100300007, 100300045, 100200028, 100200067, 100200185, 100200232, 100300146) then goto N112_2;
+ else goto N112_7;
+
+N112_2:
+ if attribute(catid) in (100300011, 100300058, 100300102, 100300005, 100300027, 100200053, 100300019, 100300066, 100300007, 100200028, 100200067, 100200232, 100300146) then goto N112_3;
+ else goto N112_4;
+
+N112_3:
+ if attribute(catid) in (100300011, 100300102, 100300005, 100300019, 100300007, 100200028, 100200067, 100200232, 100300146) then goto T112_1;
+ else goto T112_2;
+
+T112_1:
+ response = -0.0351424027;
+ goto D112;
+
+T112_2:
+ response = -0.0101917869;
+ goto D112;
+
+N112_4:
+ if attribute(catid) in (0, 100300014, 100300165, 100300093, 100300116, 100300073, 100300127, 100200170, 100300169, 100400080, 100300045, 100200185) then goto N112_5;
+ else goto N112_6;
+
+N112_5:
+ if attribute(catid) in (0, 100300093, 100300116, 100300073, 100200170, 100400080, 100300045, 100200185) then goto T112_3;
+ else goto T112_4;
+
+T112_3:
+ response = 0.0023576953;
+ goto D112;
+
+T112_4:
+ response = 0.0043646024;
+ goto D112;
+
+N112_6:
+ if attribute(catid) in (100200171, 100200052, 100200172, 100400142) then goto T112_5;
+ else goto T112_6;
+
+T112_5:
+ response = 0.0064763216;
+ goto D112;
+
+T112_6:
+ response = 0.0093043399;
+ goto D112;
+
+N112_7:
+ if attribute(catid) in (100200130, 100300013, 100300166, 100300143, 100200034, 100200186, 100300008, 100300032, 100200234, 100300004, 100300126, 100200054, 100400038, 100300122, 100200176, 100300200, 100300006) then goto N112_8;
+ else goto T112_9;
+
+N112_8:
+ if attribute(catid) in (100200130, 100300166, 100300143, 100300032, 100300004, 100300126, 100200054, 100400038, 100300122, 100200176, 100300200) then goto T112_7;
+ else goto T112_8;
+
+T112_7:
+ response = 0.0153614150;
+ goto D112;
+
+T112_8:
+ response = 0.0203450573;
+ goto D112;
+
+T112_9:
+ response = 0.0370146837;
+ goto D112;
+
+D112:
+
+tnscore = tnscore + response;
+
+ /* Tree 114 of 200 */
+N113_1:
+ if attribute(catid) in (100200171, 100300011, 100200034, 100200186, 100300093, 100300102, 100300008, 100300032, 100300121, 100300126, 100300122, 100200087, 100300200, 100300076, 100200067, 100200055, 100200185, 100300006, 100300214, 100300146) then goto N113_2;
+ else goto N113_4;
+
+N113_2:
+ if attribute(catid) in (100300093, 100300008, 100300032, 100300076, 100200067, 100200055, 100200185, 100300214, 100300146) then goto T113_1;
+ else goto N113_3;
+
+T113_1:
+ response = -0.0283721448;
+ goto D113;
+
+N113_3:
+ if attribute(catid) in (100200171, 100300011, 100200034, 100200186, 100300102, 100300200, 100300006) then goto T113_2;
+ else goto T113_3;
+
+T113_2:
+ response = -0.0097302345;
+ goto D113;
+
+T113_3:
+ response = -0.0016235117;
+ goto D113;
+
+N113_4:
+ if attribute(catid) in (0, 100200130, 100300058, 100300013, 100300143, 100400141, 100300165, 100200052, 100300005, 100200068, 100300027, 100200053, 100300004, 100400142, 100300073, 100400037, 100200193, 100200192, 100300212, 100300127, 100200170, 100300169, 100300074, 100300066, 100300045, 100200028, 100200232) then goto N113_5;
+ else goto N113_7;
+
+N113_5:
+ if attribute(catid) in (0, 100300058, 100300013, 100300143, 100400141, 100200052, 100200068, 100200053, 100300073, 100400037, 100200193, 100200192, 100300127, 100200170, 100300074, 100300045) then goto N113_6;
+ else goto T113_6;
+
+N113_6:
+ if attribute(catid) in (100300058, 100200052, 100200068, 100200053, 100300073, 100400037, 100200193, 100200192, 100300127, 100200170, 100300074, 100300045) then goto T113_4;
+ else goto T113_5;
+
+T113_4:
+ response = 0.0055504107;
+ goto D113;
+
+T113_5:
+ response = 0.0071774828;
+ goto D113;
+
+T113_6:
+ response = 0.0090674985;
+ goto D113;
+
+N113_7:
+ if attribute(catid) in (100300014, 100300077, 100300166, 100200054, 100400038, 100300065) then goto T113_7;
+ else goto N113_8;
+
+T113_7:
+ response = 0.0138239288;
+ goto D113;
+
+N113_8:
+ if attribute(catid) in (100200172, 100200234, 100400079, 100400080) then goto T113_8;
+ else goto T113_9;
+
+T113_8:
+ response = 0.0189710965;
+ goto D113;
+
+T113_9:
+ response = 0.0320267193;
+ goto D113;
+
+D113:
+
+tnscore = tnscore + response;
+
+ /* Tree 115 of 200 */
+N114_1:
+ if attribute(catid) in (100200171, 100300014, 100300058, 100300013, 100300166, 100400141, 100200172, 100300008, 100200068, 100300121, 100200234, 100200053, 100300019, 100300004, 100300126, 100400037, 100200192, 100300212, 100300209, 100300127, 100400080, 100200087, 100300074, 100200176, 100300076, 100200055, 100300214) then goto N114_2;
+ else goto N114_5;
+
+N114_2:
+ if attribute(catid) in (100300014, 100300058, 100300013, 100300008, 100200068, 100200053, 100300019, 100300212, 100300209, 100300127, 100400080, 100200087, 100200055, 100300214) then goto N114_3;
+ else goto N114_4;
+
+N114_3:
+ if attribute(catid) in (100300013, 100300008, 100300019, 100300212, 100200087, 100200055, 100300214) then goto T114_1;
+ else goto T114_2;
+
+T114_1:
+ response = -0.0279752223;
+ goto D114;
+
+T114_2:
+ response = -0.0097745433;
+ goto D114;
+
+N114_4:
+ if attribute(catid) in (100200171, 100400141, 100200234, 100300004, 100400037, 100300074, 100200176, 100300076) then goto T114_3;
+ else goto T114_4;
+
+T114_3:
+ response = -0.0015465151;
+ goto D114;
+
+T114_4:
+ response = 0.0023515763;
+ goto D114;
+
+N114_5:
+ if attribute(catid) in (0, 100200130, 100300143, 100200034, 100200052, 100300027, 100400142, 100200054, 100300073, 100200193, 100400038, 100300065, 100400079, 100300169, 100300045, 100200028) then goto N114_6;
+ else goto N114_7;
+
+N114_6:
+ if attribute(catid) in (100200130, 100300143, 100200034, 100300027, 100400142, 100200054, 100300073, 100400038, 100300065, 100400079, 100300169) then goto T114_5;
+ else goto T114_6;
+
+T114_5:
+ response = 0.0070041173;
+ goto D114;
+
+T114_6:
+ response = 0.0113800549;
+ goto D114;
+
+N114_7:
+ if attribute(catid) in (100300077, 100200186, 100300165, 100300102, 100300116, 100300122, 100200170, 100300066, 100200185, 100200232) then goto T114_7;
+ else goto T114_8;
+
+T114_7:
+ response = 0.0215968499;
+ goto D114;
+
+T114_8:
+ response = 0.0448418659;
+ goto D114;
+
+D114:
+
+tnscore = tnscore + response;
+
+ /* Tree 116 of 200 */
+N115_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300013, 100300077, 100300166, 100200186, 100300165, 100300093, 100300102, 100300008, 100300032, 100300027, 100300116, 100200234, 100300004, 100400142, 100300073, 100400037, 100200193, 100200192, 100300065, 100300122, 100300127, 100400080, 100300007, 100300200, 100300045, 100200028, 100300076, 100200185, 100300006, 100200232) then goto N115_2;
+ else goto N115_5;
+
+N115_2:
+ if attribute(catid) in (100300011, 100200130, 100300014, 100300058, 100300013, 100300032, 100300065, 100300007, 100300200, 100300045, 100200028, 100300076, 100200185, 100300006) then goto T115_1;
+ else goto N115_3;
+
+T115_1:
+ response = -0.0074030560;
+ goto D115;
+
+N115_3:
+ if attribute(catid) in (0, 100200186, 100300165, 100300102, 100300008, 100300027, 100300116, 100200234, 100400037, 100200192, 100300122, 100400080) then goto N115_4;
+ else goto T115_4;
+
+N115_4:
+ if attribute(catid) in (100300165, 100300008, 100300027, 100200234, 100400037, 100200192, 100300122, 100400080) then goto T115_2;
+ else goto T115_3;
+
+T115_2:
+ response = 0.0018774622;
+ goto D115;
+
+T115_3:
+ response = 0.0037801837;
+ goto D115;
+
+T115_4:
+ response = 0.0085665512;
+ goto D115;
+
+N115_5:
+ if attribute(catid) in (100200034, 100400141, 100200052, 100200172, 100200054, 100400038, 100400079, 100200170, 100300169, 100200087, 100200067) then goto N115_6;
+ else goto N115_7;
+
+N115_6:
+ if attribute(catid) in (100200034, 100400141, 100400038, 100400079, 100200087, 100200067) then goto T115_5;
+ else goto T115_6;
+
+T115_5:
+ response = 0.0118341371;
+ goto D115;
+
+T115_6:
+ response = 0.0154556164;
+ goto D115;
+
+N115_7:
+ if attribute(catid) in (100200068, 100200053, 100300126, 100300212, 100300074, 100300066) then goto T115_7;
+ else goto T115_8;
+
+T115_7:
+ response = 0.0234779795;
+ goto D115;
+
+T115_8:
+ response = 0.0334980927;
+ goto D115;
+
+D115:
+
+tnscore = tnscore + response;
+
+ /* Tree 117 of 200 */
+N116_1:
+ if attribute(catid) in (100200171, 100300058, 100300013, 100300143, 100200186, 100200052, 100300102, 100300008, 100300116, 100300121, 100200234, 100300019, 100300004, 100200054, 100200193, 100400038, 100300212, 100300209, 100300127, 100200170, 100200176, 100200067, 100200055) then goto N116_2;
+ else goto N116_5;
+
+N116_2:
+ if attribute(catid) in (100300013, 100300102, 100200234, 100400038, 100300212, 100300209, 100200176, 100200067, 100200055) then goto N116_3;
+ else goto N116_4;
+
+N116_3:
+ if attribute(catid) in (100300013, 100300102, 100400038, 100200176, 100200067, 100200055) then goto T116_1;
+ else goto T116_2;
+
+T116_1:
+ response = -0.0390121048;
+ goto D116;
+
+T116_2:
+ response = -0.0143867192;
+ goto D116;
+
+N116_4:
+ if attribute(catid) in (100300143, 100200186, 100300008, 100300116, 100300121, 100300004, 100200054, 100200193) then goto T116_3;
+ else goto T116_4;
+
+T116_3:
+ response = -0.0044006932;
+ goto D116;
+
+T116_4:
+ response = 0.0015511826;
+ goto D116;
+
+N116_5:
+ if attribute(catid) in (0, 100200130, 100300077, 100300166, 100200034, 100400141, 100300165, 100300093, 100300005, 100200172, 100200068, 100300027, 100200053, 100400142, 100300073, 100200192, 100300065, 100300122, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100300200, 100200028, 100300076, 100200185, 100200232) then goto N116_6;
+ else goto T116_9;
+
+N116_6:
+ if attribute(catid) in (100200130, 100300077, 100300166, 100200034, 100400141, 100300165, 100300005, 100200172, 100200068, 100200053, 100400142, 100300073, 100300065, 100400079, 100300169, 100400080, 100200087, 100300074, 100200185, 100200232) then goto N116_7;
+ else goto N116_8;
+
+N116_7:
+ if attribute(catid) in (100200130, 100400141, 100200068, 100400142, 100300073, 100300065, 100400079, 100300169, 100400080, 100300074, 100200185) then goto T116_5;
+ else goto T116_6;
+
+T116_5:
+ response = 0.0060636247;
+ goto D116;
+
+T116_6:
+ response = 0.0088886465;
+ goto D116;
+
+N116_8:
+ if attribute(catid) in (0, 100300093, 100300007) then goto T116_7;
+ else goto T116_8;
+
+T116_7:
+ response = 0.0120697556;
+ goto D116;
+
+T116_8:
+ response = 0.0177577497;
+ goto D116;
+
+T116_9:
+ response = 0.0377473017;
+ goto D116;
+
+D116:
+
+tnscore = tnscore + response;
+
+ /* Tree 118 of 200 */
+N117_1:
+ if attribute(catid) in (100300014, 100300077, 100200034, 100300165, 100300093, 100300005, 100300008, 100200053, 100300004, 100300126, 100400142, 100300212, 100300209, 100300169, 100300200, 100300076, 100200067, 100200185, 100300146) then goto N117_2;
+ else goto N117_3;
+
+N117_2:
+ if attribute(catid) in (100200034, 100300093, 100300005, 100300004, 100300126, 100400142, 100300209, 100300076, 100200067, 100200185, 100300146) then goto T117_1;
+ else goto T117_2;
+
+T117_1:
+ response = -0.0137318062;
+ goto D117;
+
+T117_2:
+ response = -0.0014925810;
+ goto D117;
+
+N117_3:
+ if attribute(catid) in (0, 100300011, 100200130, 100300058, 100300166, 100200186, 100400141, 100200068, 100300027, 100200234, 100200054, 100300073, 100200193, 100200192, 100300065, 100300122, 100400079, 100200170, 100400080, 100300074, 100200028) then goto N117_4;
+ else goto N117_5;
+
+N117_4:
+ if attribute(catid) in (0, 100300058, 100200186, 100400141, 100200193, 100200192, 100400080, 100200028) then goto T117_3;
+ else goto T117_4;
+
+T117_3:
+ response = 0.0060199161;
+ goto D117;
+
+T117_4:
+ response = 0.0102196174;
+ goto D117;
+
+N117_5:
+ if attribute(catid) in (100200171, 100200052, 100200172, 100300032, 100300116, 100400038, 100200087, 100300066) then goto T117_5;
+ else goto T117_6;
+
+T117_5:
+ response = 0.0151193734;
+ goto D117;
+
+T117_6:
+ response = 0.0263768697;
+ goto D117;
+
+D117:
+
+tnscore = tnscore + response;
+
+ /* Tree 119 of 200 */
+N118_1:
+ if attribute(catid) in (0, 100300058, 100200034, 100200052, 100300093, 100300102, 100300005, 100300008, 100300027, 100300004, 100300126, 100400142, 100300073, 100300212, 100400079, 100400080, 100300007, 100200176, 100300200, 100300045, 100300076, 100200067, 100200185, 100300146) then goto N118_2;
+ else goto N118_5;
+
+N118_2:
+ if attribute(catid) in (100200034, 100200052, 100300102, 100300008, 100300027, 100300126, 100300007, 100200176, 100200067, 100200185, 100300146) then goto N118_3;
+ else goto N118_4;
+
+N118_3:
+ if attribute(catid) in (100300102, 100300008, 100300027, 100300126, 100200067, 100200185, 100300146) then goto T118_1;
+ else goto T118_2;
+
+T118_1:
+ response = -0.0197844476;
+ goto D118;
+
+T118_2:
+ response = -0.0061589619;
+ goto D118;
+
+N118_4:
+ if attribute(catid) in (100300058, 100400142, 100300212, 100400079, 100400080, 100300200, 100300045, 100300076) then goto T118_3;
+ else goto T118_4;
+
+T118_3:
+ response = -0.0009327577;
+ goto D118;
+
+T118_4:
+ response = 0.0041966425;
+ goto D118;
+
+N118_5:
+ if attribute(catid) in (100200171, 100200130, 100300077, 100300166, 100300143, 100200186, 100400141, 100300165, 100200172, 100200068, 100300032, 100300116, 100300121, 100200053, 100400038, 100200192, 100300065, 100200170, 100300169, 100300074, 100300066, 100200028, 100300006) then goto N118_6;
+ else goto N118_7;
+
+N118_6:
+ if attribute(catid) in (100200171, 100200130, 100300077, 100400141, 100300165, 100200172, 100300032, 100300116, 100300121, 100300065, 100200170) then goto T118_5;
+ else goto T118_6;
+
+T118_5:
+ response = 0.0090658634;
+ goto D118;
+
+T118_6:
+ response = 0.0131790639;
+ goto D118;
+
+N118_7:
+ if attribute(catid) in (100300011, 100300013, 100400037, 100300122, 100300127, 100200087) then goto T118_7;
+ else goto T118_8;
+
+T118_7:
+ response = 0.0204665481;
+ goto D118;
+
+T118_8:
+ response = 0.0306124846;
+ goto D118;
+
+D118:
+
+tnscore = tnscore + response;
+
+ /* Tree 120 of 200 */
+N119_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300013, 100300077, 100300166, 100200052, 100300005, 100200172, 100300008, 100300116, 100200234, 100400142, 100200054, 100300073, 100400037, 100200193, 100200192, 100300212, 100300209, 100300122, 100300127, 100200170, 100300169, 100300074, 100300007, 100200176, 100300200, 100300045, 100200055, 100200185, 100300006, 100200232, 100300214, 100300146) then goto N119_2;
+ else goto N119_6;
+
+N119_2:
+ if attribute(catid) in (100200130, 100300013, 100300077, 100300005, 100300008, 100300116, 100200234, 100200054, 100300073, 100200193, 100300212, 100300209, 100300122, 100200170, 100300007, 100300045, 100200055, 100200185, 100300006, 100200232, 100300214, 100300146) then goto N119_3;
+ else goto N119_4;
+
+N119_3:
+ if attribute(catid) in (100300013, 100300116, 100200234, 100200054, 100200193, 100300212, 100300209, 100300007, 100200055, 100200185, 100300006, 100200232, 100300214, 100300146) then goto T119_1;
+ else goto T119_2;
+
+T119_1:
+ response = -0.0108827313;
+ goto D119;
+
+T119_2:
+ response = -0.0005017785;
+ goto D119;
+
+N119_4:
+ if attribute(catid) in (0, 100300058, 100300166, 100200172, 100400037, 100200176) then goto N119_5;
+ else goto T119_5;
+
+N119_5:
+ if attribute(catid) in (100300058, 100300166, 100200172, 100400037) then goto T119_3;
+ else goto T119_4;
+
+T119_3:
+ response = 0.0023128525;
+ goto D119;
+
+T119_4:
+ response = 0.0053288841;
+ goto D119;
+
+T119_5:
+ response = 0.0080470055;
+ goto D119;
+
+N119_6:
+ if attribute(catid) in (100300014, 100300143, 100200034, 100200186, 100400141, 100300165, 100200068, 100300032, 100300027, 100300121, 100200053, 100300004, 100300065, 100400079, 100200087, 100300066) then goto N119_7;
+ else goto N119_8;
+
+N119_7:
+ if attribute(catid) in (100300014, 100300143, 100200034, 100400141, 100300165, 100300032, 100200053, 100300004, 100300065, 100400079, 100300066) then goto T119_6;
+ else goto T119_7;
+
+T119_6:
+ response = 0.0118541533;
+ goto D119;
+
+T119_7:
+ response = 0.0171167195;
+ goto D119;
+
+N119_8:
+ if attribute(catid) in (100300093, 100300126, 100400038, 100400080) then goto T119_8;
+ else goto T119_9;
+
+T119_8:
+ response = 0.0252859922;
+ goto D119;
+
+T119_9:
+ response = 0.0427399285;
+ goto D119;
+
+D119:
+
+tnscore = tnscore + response;
+
+ /* Tree 121 of 200 */
+N120_1:
+ if attribute(catid) in (100200171, 100300011, 100200130, 100300058, 100300166, 100200034, 100200186, 100400141, 100300093, 100300005, 100300008, 100200068, 100300116, 100200053, 100300126, 100400142, 100200054, 100300073, 100200193, 100300065, 100400079, 100300169, 100400080, 100300074, 100300007, 100200176, 100300045, 100200028, 100200067, 100300006, 100300214, 100300146) then goto N120_2;
+ else goto N120_5;
+
+N120_2:
+ if attribute(catid) in (100300011, 100300058, 100200034, 100200186, 100300005, 100300008, 100200054, 100400080, 100300074, 100300007, 100200176, 100200067, 100300214, 100300146) then goto N120_3;
+ else goto N120_4;
+
+N120_3:
+ if attribute(catid) in (100200034, 100200054, 100400080, 100300074, 100300007, 100200067, 100300214, 100300146) then goto T120_1;
+ else goto T120_2;
+
+T120_1:
+ response = -0.0139396834;
+ goto D120;
+
+T120_2:
+ response = -0.0053561842;
+ goto D120;
+
+N120_4:
+ if attribute(catid) in (100200171, 100200130, 100300093, 100200068, 100200053, 100300126, 100400142, 100400079, 100300045, 100300006) then goto T120_3;
+ else goto T120_4;
+
+T120_3:
+ response = -0.0012208885;
+ goto D120;
+
+T120_4:
+ response = 0.0034171063;
+ goto D120;
+
+N120_5:
+ if attribute(catid) in (0, 100300014, 100300013, 100300143, 100300165, 100200052, 100300102, 100200172, 100300032, 100300121, 100400037, 100400038, 100200192, 100300122, 100300127, 100200170, 100200087, 100300066, 100300200) then goto N120_6;
+ else goto T120_8;
+
+N120_6:
+ if attribute(catid) in (0, 100300143, 100300165, 100200052, 100300102, 100300032, 100300121, 100400037, 100400038, 100300122, 100300127, 100200170) then goto N120_7;
+ else goto T120_7;
+
+N120_7:
+ if attribute(catid) in (100300143, 100200052, 100300102, 100300032, 100400037, 100400038, 100200170) then goto T120_5;
+ else goto T120_6;
+
+T120_5:
+ response = 0.0071786267;
+ goto D120;
+
+T120_6:
+ response = 0.0105454236;
+ goto D120;
+
+T120_7:
+ response = 0.0151332724;
+ goto D120;
+
+T120_8:
+ response = 0.0271687303;
+ goto D120;
+
+D120:
+
+tnscore = tnscore + response;
+
+ /* Tree 122 of 200 */
+N121_1:
+ if attribute(catid) in (0, 100300011, 100200130, 100300058, 100300013, 100300077, 100300166, 100300143, 100200034, 100200052, 100200172, 100300032, 100300121, 100300019, 100400142, 100300073, 100200193, 100400038, 100300065, 100300209, 100300122, 100300127, 100400079, 100300169, 100400080, 100200087, 100300007, 100200176, 100200028, 100300076, 100200067, 100300006) then goto N121_2;
+ else goto N121_5;
+
+N121_2:
+ if attribute(catid) in (100300011, 100200034, 100300019, 100200193, 100400038, 100300209, 100300122, 100300007, 100200176, 100200028, 100200067, 100300006) then goto T121_1;
+ else goto N121_3;
+
+T121_1:
+ response = -0.0107750803;
+ goto D121;
+
+N121_3:
+ if attribute(catid) in (100400142, 100300073, 100400079, 100400080, 100300076) then goto T121_2;
+ else goto N121_4;
+
+T121_2:
+ response = 0.0000414432;
+ goto D121;
+
+N121_4:
+ if attribute(catid) in (0, 100200130, 100300077, 100300166, 100300143, 100200172) then goto T121_3;
+ else goto T121_4;
+
+T121_3:
+ response = 0.0037799336;
+ goto D121;
+
+T121_4:
+ response = 0.0062373044;
+ goto D121;
+
+N121_5:
+ if attribute(catid) in (100200171, 100200186, 100300165, 100300102, 100300005, 100300027, 100300116, 100200234, 100300004, 100300126, 100200054, 100400037, 100200170, 100300074, 100300066) then goto N121_6;
+ else goto N121_7;
+
+N121_6:
+ if attribute(catid) in (100200171, 100200186, 100300165, 100200234, 100300074) then goto T121_5;
+ else goto T121_6;
+
+T121_5:
+ response = 0.0105527030;
+ goto D121;
+
+T121_6:
+ response = 0.0153207486;
+ goto D121;
+
+N121_7:
+ if attribute(catid) in (100300014, 100300093, 100200068, 100200053, 100200192, 100300200, 100200232) then goto T121_7;
+ else goto T121_8;
+
+T121_7:
+ response = 0.0240361458;
+ goto D121;
+
+T121_8:
+ response = 0.0348297568;
+ goto D121;
+
+D121:
+
+tnscore = tnscore + response;
+
+ /* Tree 123 of 200 */
+N122_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300013, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100200172, 100200068, 100300032, 100300027, 100300116, 100300121, 100200053, 100300004, 100300126, 100400142, 100200054, 100300073, 100400037, 100200193, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100300007, 100200176, 100300200, 100300045, 100200028, 100300076, 100200067, 100200055, 100300006, 100200232, 100300146) then goto N122_2;
+ else goto N122_8;
+
+N122_2:
+ if attribute(catid) in (100200130, 100300014, 100300058, 100200034, 100300165, 100200068, 100200054, 100300209, 100200170, 100400080, 100300074, 100300007, 100200176, 100200055, 100300006, 100200232, 100300146) then goto N122_3;
+ else goto N122_5;
+
+N122_3:
+ if attribute(catid) in (100300058, 100200068, 100200054, 100300209, 100200176, 100200055, 100200232, 100300146) then goto T122_1;
+ else goto N122_4;
+
+T122_1:
+ response = -0.0154543001;
+ goto D122;
+
+N122_4:
+ if attribute(catid) in (100200034, 100300165, 100200170, 100300007, 100300006) then goto T122_2;
+ else goto T122_3;
+
+T122_2:
+ response = -0.0053315859;
+ goto D122;
+
+T122_3:
+ response = -0.0008526404;
+ goto D122;
+
+N122_5:
+ if attribute(catid) in (0, 100300011, 100300013, 100400141, 100200052, 100200172, 100300121, 100300004, 100400142, 100300073, 100400037, 100200193, 100200192, 100300065, 100300122, 100300127, 100400079, 100300066, 100300200, 100300045, 100200028, 100300076) then goto N122_6;
+ else goto N122_7;
+
+N122_6:
+ if attribute(catid) in (100300011, 100300013, 100400141, 100300121, 100400142, 100400037, 100200193, 100200192, 100300065, 100300122, 100300127) then goto T122_4;
+ else goto T122_5;
+
+T122_4:
+ response = 0.0029708190;
+ goto D122;
+
+T122_5:
+ response = 0.0066974843;
+ goto D122;
+
+N122_7:
+ if attribute(catid) in (100200171, 100300027, 100200053, 100300169) then goto T122_6;
+ else goto T122_7;
+
+T122_6:
+ response = 0.0109293572;
+ goto D122;
+
+T122_7:
+ response = 0.0151670383;
+ goto D122;
+
+N122_8:
+ if attribute(catid) in (100300077, 100300093, 100300102, 100300005, 100300008, 100200087) then goto T122_8;
+ else goto T122_9;
+
+T122_8:
+ response = 0.0226972124;
+ goto D122;
+
+T122_9:
+ response = 0.0401505411;
+ goto D122;
+
+D122:
+
+tnscore = tnscore + response;
+
+ /* Tree 124 of 200 */
+N123_1:
+ if attribute(catid) in (100200130, 100300014, 100300077, 100200034, 100300165, 100300102, 100300032, 100300116, 100300121, 100200234, 100200053, 100400038, 100300212, 100300127, 100400080, 100200087, 100300074, 100300007, 100200028, 100200067, 100200055, 100200185, 100300006) then goto N123_2;
+ else goto N123_4;
+
+N123_2:
+ if attribute(catid) in (100300014, 100300032, 100400038, 100300212, 100200087, 100300007, 100200055, 100200185) then goto T123_1;
+ else goto N123_3;
+
+T123_1:
+ response = -0.0294546465;
+ goto D123;
+
+N123_3:
+ if attribute(catid) in (100300165, 100300121, 100200234, 100300127, 100400080, 100200028, 100200067, 100300006) then goto T123_2;
+ else goto T123_3;
+
+T123_2:
+ response = -0.0041076181;
+ goto D123;
+
+T123_3:
+ response = -0.0003323120;
+ goto D123;
+
+N123_4:
+ if attribute(catid) in (0, 100300011, 100300058, 100300166, 100200186, 100300093, 100300005, 100200172, 100300008, 100200068, 100300073, 100400037, 100200192, 100300122, 100400079, 100300169, 100300066, 100200176, 100300076) then goto N123_5;
+ else goto N123_7;
+
+N123_5:
+ if attribute(catid) in (0, 100300011, 100300058, 100300093, 100300005, 100200172, 100200068, 100300073, 100200192, 100400079, 100300169, 100300076) then goto N123_6;
+ else goto T123_6;
+
+N123_6:
+ if attribute(catid) in (100300011, 100300058, 100300093, 100300005, 100200172, 100200068, 100300073, 100200192, 100400079, 100300169) then goto T123_4;
+ else goto T123_5;
+
+T123_4:
+ response = 0.0060463670;
+ goto D123;
+
+T123_5:
+ response = 0.0075872041;
+ goto D123;
+
+T123_6:
+ response = 0.0099717066;
+ goto D123;
+
+N123_7:
+ if attribute(catid) in (100200171, 100400141, 100200052, 100300004, 100300126, 100400142, 100200054, 100300065, 100200170) then goto T123_7;
+ else goto T123_8;
+
+T123_7:
+ response = 0.0147808083;
+ goto D123;
+
+T123_8:
+ response = 0.0271833064;
+ goto D123;
+
+D123:
+
+tnscore = tnscore + response;
+
+ /* Tree 125 of 200 */
+N124_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300013, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100200172, 100300008, 100300032, 100300027, 100300121, 100200234, 100200053, 100300019, 100300004, 100300073, 100400037, 100400038, 100200192, 100300065, 100300212, 100300122, 100300127, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100300200, 100200028, 100200067) then goto N124_2;
+ else goto N124_6;
+
+N124_2:
+ if attribute(catid) in (100300014, 100300013, 100300077, 100300143, 100200186, 100300093, 100300102, 100300008, 100300032, 100200053, 100300019, 100400037, 100400038, 100300212, 100300007, 100200028, 100200067) then goto N124_3;
+ else goto N124_4;
+
+N124_3:
+ if attribute(catid) in (100200186, 100300032, 100300019, 100300212, 100200067) then goto T124_1;
+ else goto T124_2;
+
+T124_1:
+ response = -0.0346775988;
+ goto D124;
+
+T124_2:
+ response = -0.0084286391;
+ goto D124;
+
+N124_4:
+ if attribute(catid) in (100200034, 100400141, 100200052, 100200172, 100200234, 100300073, 100300065, 100200170, 100200087, 100300074, 100200176) then goto T124_3;
+ else goto N124_5;
+
+T124_3:
+ response = 0.0021428485;
+ goto D124;
+
+N124_5:
+ if attribute(catid) in (0, 100200171, 100200130, 100300166, 100300165, 100300027, 100200192, 100300122, 100300169, 100300066, 100300200) then goto T124_4;
+ else goto T124_5;
+
+T124_4:
+ response = 0.0069464030;
+ goto D124;
+
+T124_5:
+ response = 0.0110205277;
+ goto D124;
+
+N124_6:
+ if attribute(catid) in (100300058, 100200068, 100300116, 100300126, 100400142) then goto T124_6;
+ else goto T124_7;
+
+T124_6:
+ response = 0.0228118568;
+ goto D124;
+
+T124_7:
+ response = 0.0343825826;
+ goto D124;
+
+D124:
+
+tnscore = tnscore + response;
+
+ /* Tree 126 of 200 */
+N125_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300077, 100300143, 100200186, 100400141, 100200052, 100300093, 100300102, 100300005, 100200068, 100300032, 100300027, 100300121, 100200234, 100200053, 100300126, 100400142, 100300073, 100400037, 100200192, 100300065, 100300209, 100300127, 100400079, 100200170, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100300045, 100200028, 100300076, 100200055, 100200185, 100300006, 100200232) then goto N125_2;
+ else goto N125_6;
+
+N125_2:
+ if attribute(catid) in (100300011, 100200186, 100300102, 100300005, 100200068, 100300032, 100300121, 100200053, 100300126, 100400037, 100300209, 100300007, 100300045, 100200028, 100300076, 100200055, 100200185, 100300006) then goto N125_3;
+ else goto N125_4;
+
+N125_3:
+ if attribute(catid) in (100300011, 100300102, 100300005, 100300121, 100300126, 100400037, 100300209, 100300076, 100200055, 100200185, 100300006) then goto T125_1;
+ else goto T125_2;
+
+T125_1:
+ response = -0.0217391763;
+ goto D125;
+
+T125_2:
+ response = -0.0085505926;
+ goto D125;
+
+N125_4:
+ if attribute(catid) in (100200171, 100200130, 100300014, 100300143, 100400141, 100200052, 100300027, 100200234, 100300073, 100200192, 100300127, 100200170, 100200087, 100300074, 100300066, 100200176) then goto N125_5;
+ else goto T125_5;
+
+N125_5:
+ if attribute(catid) in (100200171, 100200130, 100300143, 100200052, 100200170, 100300074, 100200176) then goto T125_3;
+ else goto T125_4;
+
+T125_3:
+ response = 0.0001581819;
+ goto D125;
+
+T125_4:
+ response = 0.0028254989;
+ goto D125;
+
+T125_5:
+ response = 0.0069841416;
+ goto D125;
+
+N125_6:
+ if attribute(catid) in (100300013, 100300166, 100300165, 100200172, 100300008, 100300116, 100300004, 100200054, 100400038, 100300212, 100300122, 100300169, 100300200, 100200067) then goto N125_7;
+ else goto T125_8;
+
+N125_7:
+ if attribute(catid) in (100200172, 100300116, 100300004, 100300122, 100300169, 100300200) then goto T125_6;
+ else goto T125_7;
+
+T125_6:
+ response = 0.0144252893;
+ goto D125;
+
+T125_7:
+ response = 0.0197908458;
+ goto D125;
+
+T125_8:
+ response = 0.0428441900;
+ goto D125;
+
+D125:
+
+tnscore = tnscore + response;
+
+ /* Tree 127 of 200 */
+N126_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300077, 100300166, 100300143, 100200186, 100400141, 100200052, 100300005, 100200172, 100300008, 100200068, 100300027, 100300116, 100200234, 100300019, 100300126, 100300073, 100400037, 100200193, 100200192, 100300209, 100300127, 100400079, 100200170, 100300169, 100300066, 100300007, 100200176, 100300200, 100200028, 100300076, 100200185, 100300006, 100300146) then goto N126_2;
+ else goto N126_7;
+
+N126_2:
+ if attribute(catid) in (100300011, 100300014, 100300143, 100300005, 100300027, 100200234, 100300126, 100300209, 100300066, 100300007, 100200176, 100300200, 100200185, 100300146) then goto N126_3;
+ else goto N126_4;
+
+N126_3:
+ if attribute(catid) in (100300011, 100300005, 100300126, 100300209, 100300007, 100200176, 100200185) then goto T126_1;
+ else goto T126_2;
+
+T126_1:
+ response = -0.0278752616;
+ goto D126;
+
+T126_2:
+ response = -0.0114872053;
+ goto D126;
+
+N126_4:
+ if attribute(catid) in (100200171, 100300058, 100300077, 100300166, 100200172, 100300008, 100200068, 100300116, 100200193, 100400079) then goto N126_5;
+ else goto N126_6;
+
+N126_5:
+ if attribute(catid) in (100300058, 100200172, 100200068, 100300116, 100400079) then goto T126_3;
+ else goto T126_4;
+
+T126_3:
+ response = -0.0025940117;
+ goto D126;
+
+T126_4:
+ response = 0.0005790855;
+ goto D126;
+
+N126_6:
+ if attribute(catid) in (0, 100400141, 100300019, 100300073, 100200192, 100300076) then goto T126_5;
+ else goto T126_6;
+
+T126_5:
+ response = 0.0050971880;
+ goto D126;
+
+T126_6:
+ response = 0.0074013229;
+ goto D126;
+
+N126_7:
+ if attribute(catid) in (100200034, 100300093, 100300121, 100200053, 100300004, 100400142, 100200054, 100400038, 100300065, 100300122, 100400080, 100300074) then goto N126_8;
+ else goto N126_9;
+
+N126_8:
+ if attribute(catid) in (100300093, 100300121, 100200053, 100300004, 100400142, 100300065, 100400080) then goto T126_7;
+ else goto T126_8;
+
+T126_7:
+ response = 0.0112491051;
+ goto D126;
+
+T126_8:
+ response = 0.0165554655;
+ goto D126;
+
+N126_9:
+ if attribute(catid) in (100300165, 100200087, 100200232) then goto T126_9;
+ else goto T126_10;
+
+T126_9:
+ response = 0.0252638080;
+ goto D126;
+
+T126_10:
+ response = 0.0395197280;
+ goto D126;
+
+D126:
+
+tnscore = tnscore + response;
+
+ /* Tree 128 of 200 */
+N127_1:
+ if attribute(catid) in (0, 100200130, 100300058, 100300013, 100200034, 100400141, 100300165, 100300093, 100300102, 100200172, 100300008, 100200068, 100300032, 100300116, 100300121, 100200234, 100200053, 100300004, 100300126, 100400142, 100300073, 100200193, 100200192, 100300212, 100300209, 100300127, 100400079, 100200170, 100300169, 100200087, 100300066, 100300200, 100300045, 100200028, 100300076, 100200067, 100200055) then goto N127_2;
+ else goto N127_5;
+
+N127_2:
+ if attribute(catid) in (100300093, 100300008, 100300032, 100300121, 100200234, 100300004, 100300212, 100300169, 100200087, 100300045, 100200067, 100200055) then goto N127_3;
+ else goto N127_4;
+
+N127_3:
+ if attribute(catid) in (100300032, 100200234, 100300212, 100200087, 100200055) then goto T127_1;
+ else goto T127_2;
+
+T127_1:
+ response = -0.0204195189;
+ goto D127;
+
+T127_2:
+ response = -0.0097651462;
+ goto D127;
+
+N127_4:
+ if attribute(catid) in (0, 100200130, 100300058, 100300165, 100300126, 100400142, 100200192, 100400079, 100300066, 100300200, 100200028, 100300076) then goto T127_3;
+ else goto T127_4;
+
+T127_3:
+ response = 0.0029821664;
+ goto D127;
+
+T127_4:
+ response = 0.0074556490;
+ goto D127;
+
+N127_5:
+ if attribute(catid) in (100200171, 100300077, 100300166, 100200186, 100300027, 100200054, 100400038, 100300065, 100300122, 100300007, 100200232) then goto T127_5;
+ else goto N127_6;
+
+T127_5:
+ response = 0.0123063395;
+ goto D127;
+
+N127_6:
+ if attribute(catid) in (100300014, 100200052, 100300005, 100400037, 100400080) then goto T127_6;
+ else goto T127_7;
+
+T127_6:
+ response = 0.0219573650;
+ goto D127;
+
+T127_7:
+ response = 0.0390618799;
+ goto D127;
+
+D127:
+
+tnscore = tnscore + response;
+
+ /* Tree 129 of 200 */
+N128_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300013, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100200052, 100300093, 100300102, 100300005, 100200172, 100200068, 100300027, 100300116, 100300121, 100200053, 100300019, 100300126, 100200054, 100300073, 100400037, 100200193, 100200192, 100300122, 100400079, 100200170, 100300169, 100200087, 100300074, 100300066, 100300007, 100300200, 100300045, 100200028, 100200067, 100300006, 100200232, 100300214) then goto N128_2;
+ else goto N128_5;
+
+N128_2:
+ if attribute(catid) in (100300011, 100300166, 100300143, 100400141, 100300093, 100300005, 100200172, 100200068, 100300116, 100300121, 100300019, 100300126, 100200054, 100300073, 100200193, 100200087, 100300074, 100300007, 100300200, 100300045, 100200028, 100200067, 100200232, 100300214) then goto N128_3;
+ else goto N128_4;
+
+N128_3:
+ if attribute(catid) in (100300143, 100400141, 100300116, 100300019, 100300126, 100200054, 100300074, 100200067, 100200232, 100300214) then goto T128_1;
+ else goto T128_2;
+
+T128_1:
+ response = -0.0084514959;
+ goto D128;
+
+T128_2:
+ response = -0.0026252948;
+ goto D128;
+
+N128_4:
+ if attribute(catid) in (0, 100200130, 100300013, 100200034, 100300027, 100200053, 100400037, 100200192, 100200170, 100300169, 100300006) then goto T128_3;
+ else goto T128_4;
+
+T128_3:
+ response = 0.0047022442;
+ goto D128;
+
+T128_4:
+ response = 0.0075241336;
+ goto D128;
+
+N128_5:
+ if attribute(catid) in (100300032, 100200234, 100300004, 100400142, 100400038, 100300065, 100300212, 100300127, 100400080, 100300076) then goto T128_5;
+ else goto N128_6;
+
+T128_5:
+ response = 0.0167624654;
+ goto D128;
+
+N128_6:
+ if attribute(catid) in (100300165) then goto T128_6;
+ else goto T128_7;
+
+T128_6:
+ response = 0.0213460841;
+ goto D128;
+
+T128_7:
+ response = 0.0385220165;
+ goto D128;
+
+D128:
+
+tnscore = tnscore + response;
+
+ /* Tree 130 of 200 */
+N129_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100200068, 100300032, 100300027, 100300116, 100200234, 100200053, 100300126, 100200054, 100300073, 100200193, 100400038, 100200192, 100300065, 100300209, 100300122, 100300127, 100400079, 100300169, 100400080, 100200087, 100300066, 100300007, 100200176, 100300045, 100200028, 100200055, 100300006) then goto N129_2;
+ else goto N129_6;
+
+N129_2:
+ if attribute(catid) in (100300011, 100300166, 100300143, 100300093, 100200068, 100300032, 100200054, 100400038, 100300209, 100300127, 100400079, 100200087, 100200176, 100300045, 100200028, 100200055, 100300006) then goto N129_3;
+ else goto N129_4;
+
+N129_3:
+ if attribute(catid) in (100200068, 100300032, 100300209, 100200176, 100200028, 100200055) then goto T129_1;
+ else goto T129_2;
+
+T129_1:
+ response = -0.0269726448;
+ goto D129;
+
+T129_2:
+ response = -0.0065892436;
+ goto D129;
+
+N129_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300005, 100200172, 100300027, 100300116, 100200053, 100300073, 100200193, 100200192, 100300065, 100300122, 100300169, 100400080, 100300066, 100300007) then goto N129_5;
+ else goto T129_5;
+
+N129_5:
+ if attribute(catid) in (100200171, 100200130, 100300005, 100200172, 100300027, 100300073, 100200193, 100200192, 100300122, 100300169, 100300007) then goto T129_3;
+ else goto T129_4;
+
+T129_3:
+ response = 0.0012062164;
+ goto D129;
+
+T129_4:
+ response = 0.0058063938;
+ goto D129;
+
+T129_5:
+ response = 0.0115917030;
+ goto D129;
+
+N129_6:
+ if attribute(catid) in (100300014, 100300121, 100300004, 100400142, 100200170, 100300074, 100300076, 100200067, 100300146) then goto T129_6;
+ else goto T129_7;
+
+T129_6:
+ response = 0.0227008484;
+ goto D129;
+
+T129_7:
+ response = 0.0502957296;
+ goto D129;
+
+D129:
+
+tnscore = tnscore + response;
+
+ /* Tree 131 of 200 */
+N130_1:
+ if attribute(catid) in (0, 100200171, 100300013, 100300166, 100200186, 100300093, 100200172, 100300008, 100300032, 100300027, 100300116, 100200234, 100300004, 100400142, 100200193, 100400038, 100300212, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300066, 100300007, 100300200, 100300045, 100200028, 100300006, 100200232) then goto N130_2;
+ else goto N130_5;
+
+N130_2:
+ if attribute(catid) in (100300013, 100300032, 100200193, 100300212, 100300127, 100300007) then goto T130_1;
+ else goto N130_3;
+
+T130_1:
+ response = -0.0319002490;
+ goto D130;
+
+N130_3:
+ if attribute(catid) in (100200171, 100300166, 100300093, 100200172, 100300008, 100200234, 100300004, 100400142, 100400038, 100300169, 100200087, 100300066, 100300200, 100300045, 100300006, 100200232) then goto N130_4;
+ else goto T130_4;
+
+N130_4:
+ if attribute(catid) in (100300166, 100300093, 100300008, 100200234, 100300004, 100400142, 100200087, 100300066, 100300200, 100300045, 100300006) then goto T130_2;
+ else goto T130_3;
+
+T130_2:
+ response = -0.0040077306;
+ goto D130;
+
+T130_3:
+ response = 0.0005434632;
+ goto D130;
+
+T130_4:
+ response = 0.0044295890;
+ goto D130;
+
+N130_5:
+ if attribute(catid) in (100200130, 100300058, 100300077, 100300143, 100400141, 100300165, 100300102, 100300005, 100300121, 100200054, 100300073, 100200192, 100300065, 100300209, 100200176, 100300076, 100200185) then goto N130_6;
+ else goto N130_7;
+
+N130_6:
+ if attribute(catid) in (100300058, 100300077, 100300102, 100300005, 100300073, 100300065, 100300209) then goto T130_5;
+ else goto T130_6;
+
+T130_5:
+ response = 0.0088697865;
+ goto D130;
+
+T130_6:
+ response = 0.0120553652;
+ goto D130;
+
+N130_7:
+ if attribute(catid) in (100200034, 100200052, 100200068, 100200053, 100300126) then goto T130_7;
+ else goto T130_8;
+
+T130_7:
+ response = 0.0200329832;
+ goto D130;
+
+T130_8:
+ response = 0.0315067216;
+ goto D130;
+
+D130:
+
+tnscore = tnscore + response;
+
+ /* Tree 132 of 200 */
+N131_1:
+ if attribute(catid) in (100300011, 100200130, 100300058, 100300013, 100300165, 100200052, 100300005, 100200172, 100300008, 100200068, 100300027, 100300116, 100200234, 100200053, 100300126, 100400142, 100200054, 100300073, 100400037, 100300122, 100300127, 100200170, 100300169, 100300074, 100300007, 100300045, 100200028, 100200185, 100300146) then goto N131_2;
+ else goto N131_5;
+
+N131_2:
+ if attribute(catid) in (100300011, 100300058, 100300013, 100300165, 100200052, 100300008, 100200054, 100300074, 100300007, 100200185, 100300146) then goto N131_3;
+ else goto N131_4;
+
+N131_3:
+ if attribute(catid) in (100300011, 100300013, 100300008, 100200054, 100300007, 100300146) then goto T131_1;
+ else goto T131_2;
+
+T131_1:
+ response = -0.0282159404;
+ goto D131;
+
+T131_2:
+ response = -0.0102832725;
+ goto D131;
+
+N131_4:
+ if attribute(catid) in (100200068, 100300116, 100200234, 100300126, 100300073, 100400037, 100300122, 100300127, 100300045, 100200028) then goto T131_3;
+ else goto T131_4;
+
+T131_3:
+ response = -0.0021058619;
+ goto D131;
+
+T131_4:
+ response = 0.0025428121;
+ goto D131;
+
+N131_5:
+ if attribute(catid) in (0, 100200171, 100300014, 100300077, 100300166, 100300143, 100200186, 100400141, 100300032, 100300121, 100300004, 100400038, 100300065, 100400079, 100300066, 100200067, 100300006) then goto N131_6;
+ else goto N131_7;
+
+N131_6:
+ if attribute(catid) in (0, 100300014, 100300077, 100200186, 100300032, 100300121, 100300004, 100400038, 100300066, 100300006) then goto T131_5;
+ else goto T131_6;
+
+T131_5:
+ response = 0.0091363630;
+ goto D131;
+
+T131_6:
+ response = 0.0165605827;
+ goto D131;
+
+N131_7:
+ if attribute(catid) in (100200034, 100200192, 100200087, 100200176, 100300200) then goto T131_7;
+ else goto T131_8;
+
+T131_7:
+ response = 0.0258553552;
+ goto D131;
+
+T131_8:
+ response = 0.0438879554;
+ goto D131;
+
+D131:
+
+tnscore = tnscore + response;
+
+ /* Tree 133 of 200 */
+N132_1:
+ if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300077, 100300166, 100300143, 100200034, 100400141, 100200052, 100300093, 100300102, 100300005, 100300008, 100300121, 100200053, 100300004, 100300126, 100400142, 100400037, 100200193, 100400038, 100200192, 100300065, 100300212, 100300122, 100300127, 100400079, 100200170, 100400080, 100300066, 100200176, 100200028, 100200067, 100200055, 100300146) then goto N132_2;
+ else goto N132_5;
+
+N132_2:
+ if attribute(catid) in (100300011, 100200130, 100300014, 100300143, 100200034, 100300093, 100300008, 100300121, 100200053, 100300004, 100200193, 100400038, 100200192, 100300212, 100400079, 100200170, 100200176, 100200067, 100200055, 100300146) then goto N132_3;
+ else goto N132_4;
+
+N132_3:
+ if attribute(catid) in (100300011, 100300143, 100300121, 100300212, 100200067, 100200055, 100300146) then goto T132_1;
+ else goto T132_2;
+
+T132_1:
+ response = -0.0232281903;
+ goto D132;
+
+T132_2:
+ response = -0.0044814242;
+ goto D132;
+
+N132_4:
+ if attribute(catid) in (100300166, 100200052, 100300126, 100400142, 100400037, 100300122, 100400080, 100200028) then goto T132_3;
+ else goto T132_4;
+
+T132_3:
+ response = 0.0018401795;
+ goto D132;
+
+T132_4:
+ response = 0.0038857825;
+ goto D132;
+
+N132_5:
+ if attribute(catid) in (100200171, 100200186, 100300165, 100200172, 100200068, 100300027, 100300116, 100200234, 100300019, 100300073, 100300169, 100200087, 100300074, 100300200, 100300076, 100200232) then goto N132_6;
+ else goto T132_7;
+
+N132_6:
+ if attribute(catid) in (100200186, 100300165, 100200172, 100200068, 100300027, 100200234, 100300073, 100200087, 100300200) then goto T132_5;
+ else goto T132_6;
+
+T132_5:
+ response = 0.0097089357;
+ goto D132;
+
+T132_6:
+ response = 0.0155015911;
+ goto D132;
+
+T132_7:
+ response = 0.0416491361;
+ goto D132;
+
+D132:
+
+tnscore = tnscore + response;
+
+ /* Tree 134 of 200 */
+N133_1:
+ if attribute(catid) in (100300011, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100300032, 100300027, 100300121, 100200053, 100300019, 100300004, 100200054, 100200193, 100400038, 100300127, 100300169, 100300074, 100300007, 100200176, 100200028, 100200232, 100300214, 100300146) then goto N133_2;
+ else goto N133_4;
+
+N133_2:
+ if attribute(catid) in (100300011, 100200186, 100300093, 100300005, 100300027, 100300019, 100200054, 100400038, 100300007, 100200028, 100200232, 100300214, 100300146) then goto T133_1;
+ else goto N133_3;
+
+T133_1:
+ response = -0.0086481469;
+ goto D133;
+
+N133_3:
+ if attribute(catid) in (100300165, 100200172, 100300121, 100200193, 100300074) then goto T133_2;
+ else goto T133_3;
+
+T133_2:
+ response = -0.0026659712;
+ goto D133;
+
+T133_3:
+ response = 0.0001319612;
+ goto D133;
+
+N133_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300077, 100300166, 100300143, 100200034, 100200068, 100300116, 100200234, 100300126, 100400142, 100300073, 100400037, 100200192, 100300065, 100300212, 100300122, 100400079, 100200170, 100400080, 100300066, 100300200, 100300006) then goto N133_5;
+ else goto T133_6;
+
+N133_5:
+ if attribute(catid) in (0, 100300058, 100300077, 100200034, 100200234, 100300073, 100400037, 100200192, 100300065, 100300122, 100400079, 100400080, 100300066) then goto T133_4;
+ else goto T133_5;
+
+T133_4:
+ response = 0.0058612042;
+ goto D133;
+
+T133_5:
+ response = 0.0117793447;
+ goto D133;
+
+T133_6:
+ response = 0.0334049996;
+ goto D133;
+
+D133:
+
+tnscore = tnscore + response;
+
+ /* Tree 135 of 200 */
+N134_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300166, 100300143, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100200172, 100200068, 100300032, 100300027, 100300121, 100200234, 100200053, 100300019, 100300004, 100400142, 100200054, 100300073, 100400037, 100200193, 100400038, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100200067, 100200055, 100200185, 100300006, 100200232, 100300214) then goto N134_2;
+ else goto N134_6;
+
+N134_2:
+ if attribute(catid) in (100300014, 100300166, 100300143, 100400141, 100200052, 100300102, 100200068, 100300019, 100300004, 100200054, 100400037, 100400038, 100300212, 100300122, 100200170, 100200087, 100200067, 100200055, 100200185, 100300006, 100200232, 100300214) then goto N134_3;
+ else goto N134_4;
+
+N134_3:
+ if attribute(catid) in (100300143, 100300102, 100300019, 100200054, 100400038, 100300212, 100200067, 100200055, 100200185, 100300214) then goto T134_1;
+ else goto T134_2;
+
+T134_1:
+ response = -0.0256497270;
+ goto D134;
+
+T134_2:
+ response = -0.0043110057;
+ goto D134;
+
+N134_4:
+ if attribute(catid) in (0, 100300011, 100200130, 100200034, 100300165, 100200172, 100300032, 100300121, 100200053, 100300073, 100200193, 100200192, 100300209, 100400079, 100300169, 100300074, 100300066, 100300007) then goto N134_5;
+ else goto T134_5;
+
+N134_5:
+ if attribute(catid) in (100300011, 100200130, 100200034, 100200053, 100300073, 100300169, 100300066, 100300007) then goto T134_3;
+ else goto T134_4;
+
+T134_3:
+ response = 0.0027424259;
+ goto D134;
+
+T134_4:
+ response = 0.0069640136;
+ goto D134;
+
+T134_5:
+ response = 0.0123173309;
+ goto D134;
+
+N134_6:
+ if attribute(catid) in (100300058, 100300077, 100300005, 100300116, 100300126, 100300200, 100200028, 100300076) then goto T134_6;
+ else goto T134_7;
+
+T134_6:
+ response = 0.0267885216;
+ goto D134;
+
+T134_7:
+ response = 0.0575708735;
+ goto D134;
+
+D134:
+
+tnscore = tnscore + response;
+
+ /* Tree 136 of 200 */
+N135_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300077, 100300166, 100300143, 100200186, 100400141, 100200052, 100300102, 100300005, 100200172, 100200068, 100300032, 100300027, 100300116, 100300121, 100200234, 100200053, 100400142, 100300073, 100400038, 100300065, 100300209, 100300122, 100300127, 100400079, 100200170, 100200087, 100300074, 100300066, 100300007, 100200176, 100300200, 100300045, 100200028, 100200055, 100200185, 100300006, 100200232) then goto N135_2;
+ else goto N135_6;
+
+N135_2:
+ if attribute(catid) in (100300011, 100300143, 100200186, 100200068, 100200234, 100200053, 100300209, 100200087, 100300066, 100300045, 100200055, 100200185, 100300006) then goto N135_3;
+ else goto N135_4;
+
+N135_3:
+ if attribute(catid) in (100300011, 100200234, 100300209, 100300066, 100200055, 100200185) then goto T135_1;
+ else goto T135_2;
+
+T135_1:
+ response = -0.0209712583;
+ goto D135;
+
+T135_2:
+ response = -0.0093038797;
+ goto D135;
+
+N135_4:
+ if attribute(catid) in (100300077, 100200052, 100300102, 100200172, 100300121, 100400142, 100300122, 100200170, 100300074, 100300007, 100200176, 100200232) then goto T135_3;
+ else goto N135_5;
+
+T135_3:
+ response = 0.0001606661;
+ goto D135;
+
+N135_5:
+ if attribute(catid) in (0, 100200130, 100300166, 100300005, 100300073, 100300065, 100400079, 100300200) then goto T135_4;
+ else goto T135_5;
+
+T135_4:
+ response = 0.0052908982;
+ goto D135;
+
+T135_5:
+ response = 0.0074959812;
+ goto D135;
+
+N135_6:
+ if attribute(catid) in (100300165, 100300093, 100400037, 100200192, 100300169, 100400080, 100300076) then goto T135_6;
+ else goto T135_7;
+
+T135_6:
+ response = 0.0146939559;
+ goto D135;
+
+T135_7:
+ response = 0.0295044733;
+ goto D135;
+
+D135:
+
+tnscore = tnscore + response;
+
+ /* Tree 137 of 200 */
+N136_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300013, 100300077, 100200034, 100200186, 100400141, 100300093, 100300005, 100300008, 100300027, 100300116, 100300121, 100200234, 100300004, 100300126, 100400142, 100200054, 100300073, 100400037, 100200193, 100400038, 100300065, 100300212, 100300122, 100300127, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100200176, 100300200, 100300045, 100300076, 100200055, 100300214) then goto N136_2;
+ else goto N136_5;
+
+N136_2:
+ if attribute(catid) in (100300013, 100300093, 100300005, 100200234, 100300004, 100200193, 100400038, 100300212, 100300122, 100300169, 100200087, 100300074, 100300200, 100300045, 100300076, 100200055, 100300214) then goto N136_3;
+ else goto N136_4;
+
+N136_3:
+ if attribute(catid) in (100200234, 100300004, 100300212, 100300200, 100200055, 100300214) then goto T136_1;
+ else goto T136_2;
+
+T136_1:
+ response = -0.0225234294;
+ goto D136;
+
+T136_2:
+ response = -0.0073428723;
+ goto D136;
+
+N136_4:
+ if attribute(catid) in (100200130, 100300077, 100400141, 100300027, 100300126, 100200054, 100300073, 100400037, 100300065, 100400080, 100200176) then goto T136_3;
+ else goto T136_4;
+
+T136_3:
+ response = -0.0007043255;
+ goto D136;
+
+T136_4:
+ response = 0.0046211972;
+ goto D136;
+
+N136_5:
+ if attribute(catid) in (100300058, 100300166, 100300143, 100200052, 100200172, 100300032, 100200192, 100300209, 100200170, 100200232, 100300146) then goto T136_5;
+ else goto N136_6;
+
+T136_5:
+ response = 0.0127794955;
+ goto D136;
+
+N136_6:
+ if attribute(catid) in (100300165, 100200053, 100300019) then goto T136_6;
+ else goto T136_7;
+
+T136_6:
+ response = 0.0212698998;
+ goto D136;
+
+T136_7:
+ response = 0.0283454496;
+ goto D136;
+
+D136:
+
+tnscore = tnscore + response;
+
+ /* Tree 138 of 200 */
+N137_1:
+ if attribute(catid) in (0, 100300014, 100300013, 100300077, 100200186, 100400141, 100200172, 100300116, 100300004, 100300126, 100400142, 100200054, 100200192, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100300045, 100200028, 100300076, 100200067, 100300006, 100300146) then goto N137_2;
+ else goto N137_5;
+
+N137_2:
+ if attribute(catid) in (100300126, 100400142, 100200054, 100300212, 100300045, 100200067, 100300146) then goto N137_3;
+ else goto N137_4;
+
+N137_3:
+ if attribute(catid) in (100300126, 100300212, 100200067, 100300146) then goto T137_1;
+ else goto T137_2;
+
+T137_1:
+ response = -0.0423375801;
+ goto D137;
+
+T137_2:
+ response = -0.0094881961;
+ goto D137;
+
+N137_4:
+ if attribute(catid) in (0, 100300014, 100300013, 100200186, 100400141, 100300116, 100300004, 100300209, 100400079, 100200028, 100300076, 100300006) then goto T137_3;
+ else goto T137_4;
+
+T137_3:
+ response = -0.0010068479;
+ goto D137;
+
+T137_4:
+ response = 0.0043349463;
+ goto D137;
+
+N137_5:
+ if attribute(catid) in (100200171, 100200130, 100300058, 100300166, 100200034, 100300165, 100300093, 100300005, 100200068, 100300032, 100300027, 100300121, 100200234, 100200053, 100300073, 100300065, 100300122, 100300127, 100200176) then goto N137_6;
+ else goto N137_8;
+
+N137_6:
+ if attribute(catid) in (100200130, 100300166, 100300165, 100300093, 100300121, 100200053, 100300073, 100300065, 100300122, 100300127) then goto N137_7;
+ else goto T137_7;
+
+N137_7:
+ if attribute(catid) in (100300166, 100300121, 100300073, 100300065, 100300122, 100300127) then goto T137_5;
+ else goto T137_6;
+
+T137_5:
+ response = 0.0078139636;
+ goto D137;
+
+T137_6:
+ response = 0.0103953208;
+ goto D137;
+
+T137_7:
+ response = 0.0132055533;
+ goto D137;
+
+N137_8:
+ if attribute(catid) in (100300011, 100200052, 100300102, 100300008, 100400037, 100200193, 100400038, 100200087, 100300074) then goto T137_8;
+ else goto T137_9;
+
+T137_8:
+ response = 0.0206601511;
+ goto D137;
+
+T137_9:
+ response = 0.0355590743;
+ goto D137;
+
+D137:
+
+tnscore = tnscore + response;
+
+ /* Tree 139 of 200 */
+N138_1:
+ if attribute(catid) in (100200171, 100200130, 100300166, 100300143, 100200034, 100400141, 100300093, 100300102, 100300005, 100300008, 100200068, 100300116, 100300073, 100200193, 100200192, 100300209, 100300122, 100300169, 100400080, 100200087, 100300074, 100300066, 100300045, 100200028, 100200067) then goto N138_2;
+ else goto N138_4;
+
+N138_2:
+ if attribute(catid) in (100400141, 100300102, 100300008, 100200193, 100300209, 100400080, 100300066, 100300045, 100200028, 100200067) then goto T138_1;
+ else goto N138_3;
+
+T138_1:
+ response = -0.0141770941;
+ goto D138;
+
+N138_3:
+ if attribute(catid) in (100200171, 100200130, 100300166, 100200034, 100300005, 100200068, 100200192, 100200087, 100300074) then goto T138_2;
+ else goto T138_3;
+
+T138_2:
+ response = -0.0034615278;
+ goto D138;
+
+T138_3:
+ response = 0.0015215338;
+ goto D138;
+
+N138_4:
+ if attribute(catid) in (0, 100300014, 100300058, 100300013, 100300077, 100300165, 100200052, 100200172, 100300032, 100300121, 100200053, 100300004, 100300126, 100400142, 100200054, 100400038, 100300065, 100400079, 100200176, 100300076, 100200185, 100300006) then goto N138_5;
+ else goto T138_7;
+
+N138_5:
+ if attribute(catid) in (0, 100300014, 100300058, 100300013, 100300165, 100300126, 100200054, 100400079, 100300076, 100200185, 100300006) then goto N138_6;
+ else goto T138_6;
+
+N138_6:
+ if attribute(catid) in (100300014, 100300013, 100300165, 100300126, 100200054, 100400079, 100300076, 100200185, 100300006) then goto T138_4;
+ else goto T138_5;
+
+T138_4:
+ response = 0.0055004027;
+ goto D138;
+
+T138_5:
+ response = 0.0079799037;
+ goto D138;
+
+T138_6:
+ response = 0.0113299928;
+ goto D138;
+
+T138_7:
+ response = 0.0216285145;
+ goto D138;
+
+D138:
+
+tnscore = tnscore + response;
+
+ /* Tree 140 of 200 */
+N139_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300013, 100300077, 100300143, 100200034, 100400141, 100200052, 100300093, 100300102, 100200172, 100200068, 100300027, 100300121, 100200234, 100200053, 100300019, 100300126, 100400142, 100200054, 100300073, 100400037, 100200193, 100200192, 100300065, 100300212, 100300122, 100300127, 100400079, 100300169, 100400080, 100300007, 100200176, 100300200, 100300045, 100200055, 100300006, 100200232, 100300214, 100300146) then goto N139_2;
+ else goto N139_6;
+
+N139_2:
+ if attribute(catid) in (100300011, 100300014, 100300058, 100300013, 100300143, 100200034, 100200052, 100300093, 100300102, 100200053, 100300019, 100200054, 100200193, 100400080, 100300007, 100300200, 100300045, 100200055, 100300006, 100200232, 100300214, 100300146) then goto N139_3;
+ else goto N139_4;
+
+N139_3:
+ if attribute(catid) in (100300011, 100300143, 100300093, 100300102, 100300019, 100200193, 100300007, 100200055, 100300006, 100200232, 100300214, 100300146) then goto T139_1;
+ else goto T139_2;
+
+T139_1:
+ response = -0.0214500230;
+ goto D139;
+
+T139_2:
+ response = -0.0079398906;
+ goto D139;
+
+N139_4:
+ if attribute(catid) in (0, 100300077, 100400141, 100200068, 100300027, 100300121, 100300126, 100400142, 100300073, 100400037, 100300065, 100300212, 100300127, 100400079, 100300169) then goto N139_5;
+ else goto T139_5;
+
+N139_5:
+ if attribute(catid) in (100300077, 100200068, 100300027, 100300121, 100300073, 100300127, 100400079) then goto T139_3;
+ else goto T139_4;
+
+T139_3:
+ response = 0.0017634542;
+ goto D139;
+
+T139_4:
+ response = 0.0044214643;
+ goto D139;
+
+T139_5:
+ response = 0.0078134161;
+ goto D139;
+
+N139_6:
+ if attribute(catid) in (100200130, 100300166, 100200186, 100300032, 100400038, 100300209, 100200170, 100200087) then goto T139_6;
+ else goto T139_7;
+
+T139_6:
+ response = 0.0133924680;
+ goto D139;
+
+T139_7:
+ response = 0.0239045512;
+ goto D139;
+
+D139:
+
+tnscore = tnscore + response;
+
+ /* Tree 141 of 200 */
+N140_1:
+ if attribute(catid) in (100300013, 100300165, 100300102, 100300008, 100200068, 100300116, 100300121, 100200053, 100200054, 100200192, 100300209, 100300122, 100200170, 100400080, 100200087, 100300066, 100200176, 100300200, 100300045, 100300076, 100200067, 100300214) then goto N140_2;
+ else goto N140_3;
+
+N140_2:
+ if attribute(catid) in (100300013, 100300102, 100300008, 100300116, 100300121, 100200176, 100300200, 100300076, 100200067, 100300214) then goto T140_1;
+ else goto T140_2;
+
+T140_1:
+ response = -0.0229996772;
+ goto D140;
+
+T140_2:
+ response = -0.0059777362;
+ goto D140;
+
+N140_3:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300077, 100300166, 100200052, 100300027, 100300004, 100300126, 100300073, 100200193, 100300065, 100300212, 100300127, 100400079, 100300169, 100300074, 100200028) then goto N140_4;
+ else goto N140_6;
+
+N140_4:
+ if attribute(catid) in (100200130, 100300014, 100300166, 100300004, 100300065, 100300212, 100400079, 100300169, 100300074) then goto T140_3;
+ else goto N140_5;
+
+T140_3:
+ response = 0.0030774960;
+ goto D140;
+
+N140_5:
+ if attribute(catid) in (0, 100300027, 100300126, 100300127) then goto T140_4;
+ else goto T140_5;
+
+T140_4:
+ response = 0.0063783302;
+ goto D140;
+
+T140_5:
+ response = 0.0094067973;
+ goto D140;
+
+N140_6:
+ if attribute(catid) in (100200186, 100400141, 100200172, 100300032, 100200234, 100300019, 100400142, 100400037, 100300006, 100300146) then goto T140_6;
+ else goto T140_7;
+
+T140_6:
+ response = 0.0137033492;
+ goto D140;
+
+T140_7:
+ response = 0.0253895093;
+ goto D140;
+
+D140:
+
+tnscore = tnscore + response;
+
+ /* Tree 142 of 200 */
+N141_1:
+ if attribute(catid) in (0, 100300014, 100300058, 100300013, 100300166, 100300143, 100300165, 100200172, 100200068, 100300116, 100300121, 100200053, 100300004, 100400038, 100300065, 100300212, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100300045, 100300214) then goto N141_2;
+ else goto N141_6;
+
+N141_2:
+ if attribute(catid) in (100300013, 100300121, 100400038, 100300212, 100200087, 100300045, 100300214) then goto N141_3;
+ else goto N141_4;
+
+N141_3:
+ if attribute(catid) in (100300013, 100300212, 100300214) then goto T141_1;
+ else goto T141_2;
+
+T141_1:
+ response = -0.0545034219;
+ goto D141;
+
+T141_2:
+ response = -0.0112857727;
+ goto D141;
+
+N141_4:
+ if attribute(catid) in (100300014, 100300143, 100300165, 100200068, 100200053, 100300065, 100200170, 100300074, 100300007) then goto T141_3;
+ else goto N141_5;
+
+T141_3:
+ response = -0.0011777575;
+ goto D141;
+
+N141_5:
+ if attribute(catid) in (0, 100200172, 100300116, 100200176) then goto T141_4;
+ else goto T141_5;
+
+T141_4:
+ response = 0.0034598533;
+ goto D141;
+
+T141_5:
+ response = 0.0071870285;
+ goto D141;
+
+N141_6:
+ if attribute(catid) in (100200171, 100300011, 100200130, 100300077, 100200034, 100200186, 100400141, 100200052, 100300093, 100300102, 100300027, 100200234, 100300126, 100400142, 100200054, 100300073, 100400037, 100200192, 100300122, 100300127, 100400079, 100300200, 100200028) then goto N141_7;
+ else goto T141_9;
+
+N141_7:
+ if attribute(catid) in (100200130, 100300077, 100200186, 100400141, 100300102, 100200234, 100300126, 100400142, 100200054, 100300073, 100200192, 100300122, 100400079, 100300200, 100200028) then goto T141_6;
+ else goto N141_8;
+
+T141_6:
+ response = 0.0113718649;
+ goto D141;
+
+N141_8:
+ if attribute(catid) in (100200171, 100300011, 100300027, 100400037) then goto T141_7;
+ else goto T141_8;
+
+T141_7:
+ response = 0.0157280324;
+ goto D141;
+
+T141_8:
+ response = 0.0222069557;
+ goto D141;
+
+T141_9:
+ response = 0.0489348021;
+ goto D141;
+
+D141:
+
+tnscore = tnscore + response;
+
+ /* Tree 143 of 200 */
+N142_1:
+ if attribute(catid) in (100300011, 100300014, 100300058, 100300013, 100300166, 100200034, 100400141, 100300165, 100200052, 100300093, 100300102, 100300008, 100300027, 100200053, 100300019, 100300126, 100400142, 100300073, 100400038, 100300209, 100200170, 100400080, 100200087, 100300007, 100300200, 100300076, 100200067, 100200055, 100300146) then goto N142_2;
+ else goto N142_4;
+
+N142_2:
+ if attribute(catid) in (100300008, 100300019, 100300126, 100300007, 100300200, 100200055, 100300146) then goto T142_1;
+ else goto N142_3;
+
+T142_1:
+ response = -0.0393976544;
+ goto D142;
+
+N142_3:
+ if attribute(catid) in (100300011, 100300014, 100300058, 100300013, 100300166, 100200034, 100300093, 100300102, 100400038, 100300209, 100200170, 100400080, 100300076, 100200067) then goto T142_2;
+ else goto T142_3;
+
+T142_2:
+ response = -0.0070993241;
+ goto D142;
+
+T142_3:
+ response = -0.0007351884;
+ goto D142;
+
+N142_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300077, 100200186, 100200172, 100200234, 100200054, 100400037, 100300122, 100300127, 100400079, 100300074, 100300066, 100200028, 100200232) then goto N142_5;
+ else goto N142_6;
+
+N142_5:
+ if attribute(catid) in (100200171, 100300077, 100200234, 100400079, 100300066, 100200028) then goto T142_4;
+ else goto T142_5;
+
+T142_4:
+ response = 0.0040677589;
+ goto D142;
+
+T142_5:
+ response = 0.0073363935;
+ goto D142;
+
+N142_6:
+ if attribute(catid) in (100300143, 100300005, 100200068, 100300116, 100200192, 100300065, 100300169, 100200176) then goto T142_6;
+ else goto T142_7;
+
+T142_6:
+ response = 0.0152680762;
+ goto D142;
+
+T142_7:
+ response = 0.0386360428;
+ goto D142;
+
+D142:
+
+tnscore = tnscore + response;
+
+ /* Tree 144 of 200 */
+N143_1:
+ if attribute(catid) in (100200171, 100300143, 100400141, 100300093, 100300005, 100300008, 100200068, 100300027, 100300121, 100300126, 100200054, 100400038, 100200192, 100300212, 100300122, 100300007, 100200176, 100300045, 100200028, 100300076, 100200067, 100300006, 100200232, 100300214) then goto N143_2;
+ else goto N143_4;
+
+N143_2:
+ if attribute(catid) in (100300093, 100300027, 100300212, 100200176, 100200067, 100200232, 100300214) then goto T143_1;
+ else goto N143_3;
+
+T143_1:
+ response = -0.0439825609;
+ goto D143;
+
+N143_3:
+ if attribute(catid) in (100300143, 100400141, 100300005, 100300008, 100300121, 100300126, 100200192, 100300007, 100300045, 100300076, 100300006) then goto T143_2;
+ else goto T143_3;
+
+T143_2:
+ response = -0.0078490055;
+ goto D143;
+
+T143_3:
+ response = -0.0029548934;
+ goto D143;
+
+N143_4:
+ if attribute(catid) in (0, 100300011, 100300014, 100300013, 100300077, 100300166, 100200034, 100200186, 100200052, 100200172, 100300032, 100300116, 100200234, 100200053, 100300004, 100400142, 100300073, 100200193, 100300065, 100300127, 100400079, 100300169, 100400080, 100300066, 100300200) then goto N143_5;
+ else goto N143_7;
+
+N143_5:
+ if attribute(catid) in (0, 100300011, 100300014, 100300013, 100300077, 100200034, 100200172, 100300116, 100200053, 100400142, 100200193, 100300065, 100300127, 100400079, 100300169, 100400080, 100300066) then goto N143_6;
+ else goto T143_6;
+
+N143_6:
+ if attribute(catid) in (100300014, 100200172, 100300116, 100400142, 100200193, 100300065, 100400079) then goto T143_4;
+ else goto T143_5;
+
+T143_4:
+ response = 0.0037085174;
+ goto D143;
+
+T143_5:
+ response = 0.0067213111;
+ goto D143;
+
+T143_6:
+ response = 0.0103041294;
+ goto D143;
+
+N143_7:
+ if attribute(catid) in (100200130, 100300165, 100300209, 100200087) then goto T143_7;
+ else goto T143_8;
+
+T143_7:
+ response = 0.0193058409;
+ goto D143;
+
+T143_8:
+ response = 0.0282268747;
+ goto D143;
+
+D143:
+
+tnscore = tnscore + response;
+
+ /* Tree 145 of 200 */
+N144_1:
+ if attribute(catid) in (100300011, 100200130, 100300058, 100300143, 100300165, 100200052, 100300093, 100300102, 100300005, 100200068, 100300027, 100300116, 100200053, 100300019, 100300004, 100400037, 100200193, 100300209, 100200170, 100300074, 100300007, 100200176, 100300045, 100200055, 100200185, 100200232, 100300214) then goto N144_2;
+ else goto N144_4;
+
+N144_2:
+ if attribute(catid) in (100300102, 100300005, 100300116, 100300019, 100300209, 100200176, 100200055, 100200185, 100200232, 100300214) then goto T144_1;
+ else goto N144_3;
+
+T144_1:
+ response = -0.0293243609;
+ goto D144;
+
+N144_3:
+ if attribute(catid) in (100200052, 100300093, 100200068, 100300004, 100200193, 100300074, 100300045) then goto T144_2;
+ else goto T144_3;
+
+T144_2:
+ response = -0.0070078206;
+ goto D144;
+
+T144_3:
+ response = -0.0009152377;
+ goto D144;
+
+N144_4:
+ if attribute(catid) in (0, 100200171, 100300013, 100300077, 100200186, 100400141, 100300126, 100200054, 100300073, 100300065, 100300122, 100400079, 100300169, 100400080, 100200087, 100300066) then goto N144_5;
+ else goto N144_6;
+
+N144_5:
+ if attribute(catid) in (100200171, 100300013, 100200186, 100300073, 100300065, 100300122, 100300169) then goto T144_4;
+ else goto T144_5;
+
+T144_4:
+ response = 0.0041690469;
+ goto D144;
+
+T144_5:
+ response = 0.0070675916;
+ goto D144;
+
+N144_6:
+ if attribute(catid) in (100300166, 100200034, 100300032, 100400142, 100200192, 100300076, 100200067) then goto T144_6;
+ else goto T144_7;
+
+T144_6:
+ response = 0.0123326309;
+ goto D144;
+
+T144_7:
+ response = 0.0192701631;
+ goto D144;
+
+D144:
+
+tnscore = tnscore + response;
+
+ /* Tree 146 of 200 */
+N145_1:
+ if attribute(catid) in (100400141, 100300093, 100300102, 100200068, 100300032, 100300116, 100200234, 100300004, 100200193, 100400038, 100300212, 100300209, 100400079, 100300066, 100200176, 100300200, 100300045, 100300076, 100300214, 100300146) then goto N145_2;
+ else goto N145_3;
+
+N145_2:
+ if attribute(catid) in (100300093, 100300102, 100200068, 100300032, 100200234, 100400038, 100300212, 100300214, 100300146) then goto T145_1;
+ else goto T145_2;
+
+T145_1:
+ response = -0.0199825802;
+ goto D145;
+
+T145_2:
+ response = -0.0078667369;
+ goto D145;
+
+N145_3:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300077, 100300166, 100200186, 100300165, 100200052, 100300005, 100200172, 100300027, 100200053, 100400142, 100300073, 100200192, 100300122, 100200170, 100300169, 100400080, 100200087, 100300074, 100300007, 100300006) then goto N145_4;
+ else goto N145_6;
+
+N145_4:
+ if attribute(catid) in (0, 100200171, 100300014, 100300058, 100300077, 100300166, 100300005, 100200172, 100300027, 100200053, 100300073, 100200170, 100400080, 100200087, 100300007) then goto N145_5;
+ else goto T145_5;
+
+N145_5:
+ if attribute(catid) in (100200171, 100300014, 100300077, 100300166, 100300005, 100200172, 100300027, 100200170, 100400080, 100300007) then goto T145_3;
+ else goto T145_4;
+
+T145_3:
+ response = 0.0025693654;
+ goto D145;
+
+T145_4:
+ response = 0.0050450725;
+ goto D145;
+
+T145_5:
+ response = 0.0104631035;
+ goto D145;
+
+N145_6:
+ if attribute(catid) in (100300011, 100300143, 100300121, 100300126, 100400037, 100300065, 100300127, 100200028) then goto T145_6;
+ else goto T145_7;
+
+T145_6:
+ response = 0.0209253237;
+ goto D145;
+
+T145_7:
+ response = 0.0358125311;
+ goto D145;
+
+D145:
+
+tnscore = tnscore + response;
+
+ /* Tree 147 of 200 */
+N146_1:
+ if attribute(catid) in (100200171, 100300058, 100300077, 100200034, 100200186, 100300005, 100300008, 100200068, 100300121, 100200053, 100300019, 100300126, 100200193, 100200192, 100300065, 100300212, 100300209, 100400079, 100200087, 100300045, 100200028, 100300076, 100200067, 100200055, 100300006, 100300146) then goto N146_2;
+ else goto N146_4;
+
+N146_2:
+ if attribute(catid) in (100300008, 100300121, 100300019, 100300126, 100300212, 100300209, 100200028, 100200055, 100300146) then goto T146_1;
+ else goto N146_3;
+
+T146_1:
+ response = -0.0326978382;
+ goto D146;
+
+N146_3:
+ if attribute(catid) in (100300077, 100200034, 100200186, 100300005, 100200068, 100200053, 100300045, 100300076, 100200067) then goto T146_2;
+ else goto T146_3;
+
+T146_2:
+ response = -0.0102205963;
+ goto D146;
+
+T146_3:
+ response = -0.0022839975;
+ goto D146;
+
+N146_4:
+ if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300013, 100400141, 100300165, 100200052, 100300027, 100300116, 100300004, 100400142, 100300073, 100400037, 100400038, 100300122, 100200170, 100300169, 100400080, 100300066, 100200176, 100300200, 100200185) then goto N146_5;
+ else goto N146_7;
+
+N146_5:
+ if attribute(catid) in (100200130, 100300014, 100300165, 100200052, 100300116, 100300004, 100400142, 100300073, 100400038, 100300122, 100200170, 100300169, 100300200) then goto T146_4;
+ else goto N146_6;
+
+T146_4:
+ response = 0.0024615075;
+ goto D146;
+
+N146_6:
+ if attribute(catid) in (0, 100300013, 100300027) then goto T146_5;
+ else goto T146_6;
+
+T146_5:
+ response = 0.0070973911;
+ goto D146;
+
+T146_6:
+ response = 0.0098426968;
+ goto D146;
+
+N146_7:
+ if attribute(catid) in (100300166, 100300093, 100200172, 100200234, 100200054, 100300127, 100300074, 100300007, 100200232) then goto T146_7;
+ else goto T146_8;
+
+T146_7:
+ response = 0.0164836278;
+ goto D146;
+
+T146_8:
+ response = 0.0507175593;
+ goto D146;
+
+D146:
+
+tnscore = tnscore + response;
+
+ /* Tree 148 of 200 */
+N147_1:
+ if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300077, 100300166, 100200034, 100200186, 100400141, 100200052, 100300093, 100300005, 100300008, 100200068, 100300032, 100300121, 100200234, 100200053, 100300004, 100300126, 100400037, 100300065, 100300122, 100300127, 100400080, 100200087, 100300066, 100300007, 100200176, 100200067, 100300006, 100300214) then goto N147_2;
+ else goto N147_4;
+
+N147_2:
+ if attribute(catid) in (100300008, 100300032, 100300004, 100300126, 100400037, 100200176, 100300214) then goto T147_1;
+ else goto N147_3;
+
+T147_1:
+ response = -0.0321717738;
+ goto D147;
+
+N147_3:
+ if attribute(catid) in (100300011, 100200130, 100300014, 100300077, 100200052, 100200234, 100200053, 100300122, 100300127, 100400080, 100200067, 100300006) then goto T147_2;
+ else goto T147_3;
+
+T147_2:
+ response = -0.0016525958;
+ goto D147;
+
+T147_3:
+ response = 0.0046522992;
+ goto D147;
+
+N147_4:
+ if attribute(catid) in (100200171, 100300143, 100300165, 100200172, 100300027, 100300116, 100300019, 100400142, 100200054, 100300073, 100200193, 100400038, 100200192, 100400079, 100200170, 100300169, 100300200, 100300076) then goto N147_5;
+ else goto T147_6;
+
+N147_5:
+ if attribute(catid) in (100200171, 100200172, 100200054, 100300073, 100200192, 100400079, 100200170, 100300169) then goto T147_4;
+ else goto T147_5;
+
+T147_4:
+ response = 0.0111420250;
+ goto D147;
+
+T147_5:
+ response = 0.0154339977;
+ goto D147;
+
+T147_6:
+ response = 0.0284170367;
+ goto D147;
+
+D147:
+
+tnscore = tnscore + response;
+
+ /* Tree 149 of 200 */
+N148_1:
+ if attribute(catid) in (100200130, 100300058, 100300077, 100200034, 100400141, 100300102, 100300005, 100300008, 100200053, 100300004, 100400037, 100300212, 100300122, 100400079, 100400080, 100300074, 100300007, 100200176, 100200185, 100300006, 100200232) then goto N148_2;
+ else goto N148_3;
+
+N148_2:
+ if attribute(catid) in (100200034, 100300102, 100300005, 100300008, 100400037, 100300074, 100300007, 100200185, 100300006) then goto T148_1;
+ else goto T148_2;
+
+T148_1:
+ response = -0.0179586062;
+ goto D148;
+
+T148_2:
+ response = -0.0035242201;
+ goto D148;
+
+N148_3:
+ if attribute(catid) in (0, 100300014, 100300013, 100300166, 100200186, 100300165, 100200052, 100300093, 100200172, 100200068, 100300027, 100200234, 100400142, 100200054, 100300073, 100400038, 100200192, 100300065, 100300127, 100300169, 100300200, 100300076) then goto N148_4;
+ else goto N148_6;
+
+N148_4:
+ if attribute(catid) in (100300013, 100200052, 100400142, 100300073, 100400038, 100300065, 100300076) then goto T148_3;
+ else goto N148_5;
+
+T148_3:
+ response = 0.0026855380;
+ goto D148;
+
+N148_5:
+ if attribute(catid) in (0, 100300165, 100300093, 100200172, 100200234, 100200054, 100200192) then goto T148_4;
+ else goto T148_5;
+
+T148_4:
+ response = 0.0064693900;
+ goto D148;
+
+T148_5:
+ response = 0.0086837084;
+ goto D148;
+
+N148_6:
+ if attribute(catid) in (100200171, 100300143, 100300032, 100300121, 100300126, 100200170, 100200087, 100300066, 100200028) then goto T148_6;
+ else goto T148_7;
+
+T148_6:
+ response = 0.0141461740;
+ goto D148;
+
+T148_7:
+ response = 0.0293888308;
+ goto D148;
+
+D148:
+
+tnscore = tnscore + response;
+
+ /* Tree 150 of 200 */
+N149_1:
+ if attribute(catid) in (100200171, 100300011, 100200130, 100300014, 100300058, 100300166, 100300143, 100200186, 100300165, 100300008, 100200068, 100300032, 100300027, 100300116, 100200234, 100300126, 100200054, 100400037, 100200193, 100400038, 100300209, 100300127, 100200170, 100400080, 100300074, 100300007, 100300045, 100200028, 100300076, 100200067, 100200185, 100300006, 100200232, 100300146) then goto N149_2;
+ else goto N149_5;
+
+N149_2:
+ if attribute(catid) in (100300011, 100300014, 100300058, 100300143, 100200186, 100300165, 100300032, 100300116, 100200234, 100300126, 100200193, 100400038, 100300209, 100200170, 100300007, 100300045, 100200028, 100200067, 100200232, 100300146) then goto N149_3;
+ else goto N149_4;
+
+N149_3:
+ if attribute(catid) in (100300014, 100300032, 100300116, 100200234, 100300209, 100300007, 100300045, 100200028, 100200067, 100200232, 100300146) then goto T149_1;
+ else goto T149_2;
+
+T149_1:
+ response = -0.0140634241;
+ goto D149;
+
+T149_2:
+ response = -0.0065657437;
+ goto D149;
+
+N149_4:
+ if attribute(catid) in (100200171, 100200068, 100200054, 100400080, 100300074, 100300076, 100300006) then goto T149_3;
+ else goto T149_4;
+
+T149_3:
+ response = -0.0013766512;
+ goto D149;
+
+T149_4:
+ response = 0.0012665197;
+ goto D149;
+
+N149_5:
+ if attribute(catid) in (0, 100300077, 100200052, 100300005, 100300121, 100200053, 100300004, 100300073, 100200192, 100300122, 100300169, 100200087, 100200176, 100300200) then goto N149_6;
+ else goto N149_7;
+
+N149_6:
+ if attribute(catid) in (100200052, 100300005, 100300073, 100200192) then goto T149_5;
+ else goto T149_6;
+
+T149_5:
+ response = 0.0044075628;
+ goto D149;
+
+T149_6:
+ response = 0.0068551736;
+ goto D149;
+
+N149_7:
+ if attribute(catid) in (100200172, 100300065, 100400079, 100300066) then goto T149_7;
+ else goto T149_8;
+
+T149_7:
+ response = 0.0142734060;
+ goto D149;
+
+T149_8:
+ response = 0.0204258682;
+ goto D149;
+
+D149:
+
+tnscore = tnscore + response;
+
+ /* Tree 151 of 200 */
+N150_1:
+ if attribute(catid) in (100300014, 100300166, 100300143, 100200068, 100300032, 100300116, 100200234, 100300126, 100400037, 100200193, 100400038, 100300212, 100300127, 100200087, 100300200, 100200055, 100300214, 100300146) then goto N150_2;
+ else goto N150_3;
+
+N150_2:
+ if attribute(catid) in (100300143, 100200234, 100400037, 100200193, 100300212, 100300200, 100200055, 100300214, 100300146) then goto T150_1;
+ else goto T150_2;
+
+T150_1:
+ response = -0.0253090838;
+ goto D150;
+
+T150_2:
+ response = -0.0066897329;
+ goto D150;
+
+N150_3:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300077, 100200034, 100200186, 100400141, 100200052, 100300093, 100300005, 100200172, 100300121, 100200053, 100300004, 100400142, 100300073, 100300065, 100300122, 100200170, 100300169, 100400080, 100300074, 100300066, 100300007, 100200176, 100300076, 100200067, 100200185) then goto N150_4;
+ else goto N150_7;
+
+N150_4:
+ if attribute(catid) in (100200171, 100200130, 100300077, 100200186, 100200053, 100300004, 100300073, 100300169, 100300074, 100300066, 100200176, 100300076) then goto N150_5;
+ else goto N150_6;
+
+N150_5:
+ if attribute(catid) in (100200171, 100300004, 100300073, 100300169, 100300066, 100300076) then goto T150_3;
+ else goto T150_4;
+
+T150_3:
+ response = 0.0001455877;
+ goto D150;
+
+T150_4:
+ response = 0.0029189153;
+ goto D150;
+
+N150_6:
+ if attribute(catid) in (0, 100200034, 100400141, 100300005, 100400142, 100300065, 100300007, 100200185) then goto T150_5;
+ else goto T150_6;
+
+T150_5:
+ response = 0.0054511445;
+ goto D150;
+
+T150_6:
+ response = 0.0085988325;
+ goto D150;
+
+N150_7:
+ if attribute(catid) in (100300013, 100300165, 100300102, 100300027, 100200192, 100400079, 100200028, 100300006, 100200232) then goto T150_7;
+ else goto T150_8;
+
+T150_7:
+ response = 0.0180003070;
+ goto D150;
+
+T150_8:
+ response = 0.0331227663;
+ goto D150;
+
+D150:
+
+tnscore = tnscore + response;
+
+ /* Tree 152 of 200 */
+N151_1:
+ if attribute(catid) in (0, 100200130, 100300014, 100300058, 100300077, 100200034, 100400141, 100300165, 100300093, 100200172, 100300008, 100300027, 100300116, 100300121, 100200234, 100300004, 100400142, 100300073, 100400037, 100200193, 100300212, 100300127, 100400080, 100300074, 100300066, 100300007, 100300200, 100300045, 100200028, 100300076, 100300006, 100300146) then goto N151_2;
+ else goto N151_6;
+
+N151_2:
+ if attribute(catid) in (100300014, 100300058, 100300077, 100300093, 100300008, 100200234, 100400142, 100400037, 100300074, 100300200, 100300045, 100200028, 100300076, 100300006, 100300146) then goto N151_3;
+ else goto N151_4;
+
+N151_3:
+ if attribute(catid) in (100300014, 100300058, 100300093, 100400142, 100300200, 100300045, 100300076, 100300006, 100300146) then goto T151_1;
+ else goto T151_2;
+
+T151_1:
+ response = -0.0129979670;
+ goto D151;
+
+T151_2:
+ response = -0.0044598258;
+ goto D151;
+
+N151_4:
+ if attribute(catid) in (0, 100400141, 100200172, 100300116, 100300004, 100200193, 100300212, 100300127, 100400080, 100300066) then goto N151_5;
+ else goto T151_5;
+
+N151_5:
+ if attribute(catid) in (100400141, 100200172, 100300116, 100300212, 100300127, 100400080) then goto T151_3;
+ else goto T151_4;
+
+T151_3:
+ response = -0.0003302269;
+ goto D151;
+
+T151_4:
+ response = 0.0020272308;
+ goto D151;
+
+T151_5:
+ response = 0.0056490673;
+ goto D151;
+
+N151_6:
+ if attribute(catid) in (100200171, 100300011, 100300166, 100200186, 100200052, 100300102, 100300005, 100200068, 100300032, 100200053, 100400038, 100200192, 100300065, 100300122, 100400079, 100200170, 100300169, 100200176, 100200232) then goto N151_7;
+ else goto T151_9;
+
+N151_7:
+ if attribute(catid) in (100200171, 100300011, 100200052, 100300005, 100200068, 100200053, 100400038, 100300065, 100300122, 100400079, 100200170, 100300169, 100200232) then goto N151_8;
+ else goto T151_8;
+
+N151_8:
+ if attribute(catid) in (100200052, 100300005, 100200053, 100400038, 100400079, 100200170, 100300169) then goto T151_6;
+ else goto T151_7;
+
+T151_6:
+ response = 0.0089176032;
+ goto D151;
+
+T151_7:
+ response = 0.0121994068;
+ goto D151;
+
+T151_8:
+ response = 0.0173516652;
+ goto D151;
+
+T151_9:
+ response = 0.0325017104;
+ goto D151;
+
+D151:
+
+tnscore = tnscore + response;
+
+ /* Tree 153 of 200 */
+N152_1:
+ if attribute(catid) in (0, 100200171, 100300014, 100300013, 100300077, 100300166, 100200186, 100400141, 100300102, 100200172, 100300008, 100300116, 100200234, 100200053, 100300019, 100300004, 100400142, 100400037, 100200193, 100300212, 100300209, 100300122, 100300127, 100200170, 100200087, 100200176, 100300200, 100300076, 100200055, 100300006, 100200232, 100300214, 100300146) then goto N152_2;
+ else goto N152_5;
+
+N152_2:
+ if attribute(catid) in (100300013, 100300166, 100300102, 100300008, 100300116, 100200234, 100300019, 100300004, 100400142, 100300212, 100300209, 100200176, 100300200, 100200055, 100300006, 100200232, 100300214, 100300146) then goto N152_3;
+ else goto N152_4;
+
+N152_3:
+ if attribute(catid) in (100300013, 100300166, 100300019, 100300212, 100300200, 100200055, 100300214, 100300146) then goto T152_1;
+ else goto T152_2;
+
+T152_1:
+ response = -0.0132054608;
+ goto D152;
+
+T152_2:
+ response = -0.0064396815;
+ goto D152;
+
+N152_4:
+ if attribute(catid) in (0, 100300014, 100300077, 100200172, 100400037, 100200193) then goto T152_3;
+ else goto T152_4;
+
+T152_3:
+ response = 0.0018800662;
+ goto D152;
+
+T152_4:
+ response = 0.0043219093;
+ goto D152;
+
+N152_5:
+ if attribute(catid) in (100200130, 100300058, 100200034, 100300165, 100200052, 100200068, 100300032, 100300121, 100300126, 100300073, 100200192, 100300065, 100400079, 100300169, 100400080, 100300074, 100300066, 100200028) then goto N152_6;
+ else goto T152_7;
+
+N152_6:
+ if attribute(catid) in (100300058, 100300165, 100300121, 100300126, 100300073, 100400079, 100300066, 100200028) then goto T152_5;
+ else goto T152_6;
+
+T152_5:
+ response = 0.0086112093;
+ goto D152;
+
+T152_6:
+ response = 0.0133895272;
+ goto D152;
+
+T152_7:
+ response = 0.0254777637;
+ goto D152;
+
+D152:
+
+tnscore = tnscore + response;
+
+ /* Tree 154 of 200 */
+N153_1:
+ if attribute(catid) in (0, 100200171, 100300013, 100300077, 100300143, 100200186, 100400141, 100200052, 100300102, 100200172, 100300027, 100300116, 100200053, 100300004, 100400142, 100200054, 100200192, 100300065, 100300212, 100300209, 100300169, 100400080, 100200087, 100300066, 100200176, 100300200, 100300045, 100300076, 100200067, 100200055, 100200185, 100300006, 100300214, 100300146) then goto N153_2;
+ else goto N153_5;
+
+N153_2:
+ if attribute(catid) in (100300013, 100300077, 100300102, 100200053, 100300004, 100400142, 100200054, 100300065, 100300209, 100300066, 100300200, 100300045, 100200067, 100200055, 100200185, 100300006, 100300214, 100300146) then goto N153_3;
+ else goto N153_4;
+
+N153_3:
+ if attribute(catid) in (100300077, 100300102, 100300066, 100300200, 100300045, 100200067, 100200055, 100200185, 100300214, 100300146) then goto T153_1;
+ else goto T153_2;
+
+T153_1:
+ response = -0.0199968300;
+ goto D153;
+
+T153_2:
+ response = -0.0066258033;
+ goto D153;
+
+N153_4:
+ if attribute(catid) in (100200171, 100400141, 100200052, 100200172, 100300027, 100200192, 100400080, 100200087, 100200176, 100300076) then goto T153_3;
+ else goto T153_4;
+
+T153_3:
+ response = 0.0002616813;
+ goto D153;
+
+T153_4:
+ response = 0.0029401657;
+ goto D153;
+
+N153_5:
+ if attribute(catid) in (100200130, 100300014, 100300166, 100200034, 100300165, 100200068, 100200234, 100300073, 100400037, 100300122, 100400079, 100200170, 100300074, 100200028) then goto N153_6;
+ else goto T153_7;
+
+N153_6:
+ if attribute(catid) in (100300166, 100200034, 100300165, 100200068, 100300122, 100400079, 100200170) then goto T153_5;
+ else goto T153_6;
+
+T153_5:
+ response = 0.0100029130;
+ goto D153;
+
+T153_6:
+ response = 0.0150210552;
+ goto D153;
+
+T153_7:
+ response = 0.0282741486;
+ goto D153;
+
+D153:
+
+tnscore = tnscore + response;
+
+ /* Tree 155 of 200 */
+N154_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300013, 100300166, 100200034, 100400141, 100300165, 100200052, 100300102, 100200172, 100300032, 100300027, 100200234, 100200053, 100300019, 100300004, 100400142, 100300073, 100400037, 100200193, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200176, 100300045, 100200028, 100200067, 100200055, 100200185, 100300006, 100200232) then goto N154_2;
+ else goto N154_7;
+
+N154_2:
+ if attribute(catid) in (100300014, 100300058, 100300166, 100200034, 100300165, 100300102, 100300032, 100200053, 100300019, 100300004, 100300212, 100300209, 100300127, 100200170, 100300169, 100400080, 100200176, 100300045, 100200067, 100200055, 100200185, 100300006, 100200232) then goto N154_3;
+ else goto N154_5;
+
+N154_3:
+ if attribute(catid) in (100300032, 100300019, 100300212, 100300209, 100200176, 100200067, 100200055, 100200185, 100200232) then goto T154_1;
+ else goto N154_4;
+
+T154_1:
+ response = -0.0365575410;
+ goto D154;
+
+N154_4:
+ if attribute(catid) in (100300166, 100300102, 100200053, 100300004, 100300169, 100400080, 100300006) then goto T154_2;
+ else goto T154_3;
+
+T154_2:
+ response = -0.0063417149;
+ goto D154;
+
+T154_3:
+ response = -0.0028552545;
+ goto D154;
+
+N154_5:
+ if attribute(catid) in (0, 100200130, 100400141, 100200234, 100400142, 100300073, 100200193, 100300065, 100300122, 100400079, 100200028) then goto N154_6;
+ else goto T154_6;
+
+N154_6:
+ if attribute(catid) in (100200234, 100400142, 100300065, 100300122, 100400079, 100200028) then goto T154_4;
+ else goto T154_5;
+
+T154_4:
+ response = 0.0013434898;
+ goto D154;
+
+T154_5:
+ response = 0.0041326482;
+ goto D154;
+
+T154_6:
+ response = 0.0076971051;
+ goto D154;
+
+N154_7:
+ if attribute(catid) in (100300011, 100300143, 100200186, 100300005, 100200068, 100400038, 100200087, 100300066, 100300007) then goto T154_7;
+ else goto N154_8;
+
+T154_7:
+ response = 0.0168824118;
+ goto D154;
+
+N154_8:
+ if attribute(catid) in (100300077, 100300116, 100300121, 100200054, 100300074) then goto T154_8;
+ else goto T154_9;
+
+T154_8:
+ response = 0.0265247041;
+ goto D154;
+
+T154_9:
+ response = 0.0418405954;
+ goto D154;
+
+D154:
+
+tnscore = tnscore + response;
+
+ /* Tree 156 of 200 */
+N155_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300013, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100300165, 100300093, 100300005, 100200068, 100300032, 100300027, 100300116, 100300004, 100300126, 100400142, 100200054, 100300073, 100400037, 100400038, 100200192, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100400080, 100300074, 100300066, 100300007, 100300045, 100300076, 100300006, 100200232, 100300214) then goto N155_2;
+ else goto N155_5;
+
+N155_2:
+ if attribute(catid) in (100200171, 100200034, 100300093, 100300005, 100200068, 100300027, 100300116, 100300004, 100300126, 100200054, 100400037, 100400038, 100300212, 100300209, 100400079, 100300074, 100300214) then goto N155_3;
+ else goto N155_4;
+
+N155_3:
+ if attribute(catid) in (100300005, 100300027, 100300116, 100200054, 100400037, 100300209, 100300074, 100300214) then goto T155_1;
+ else goto T155_2;
+
+T155_1:
+ response = -0.0185907409;
+ goto D155;
+
+T155_2:
+ response = -0.0037649772;
+ goto D155;
+
+N155_4:
+ if attribute(catid) in (100300014, 100300058, 100300013, 100300077, 100300166, 100300066, 100300007, 100300076, 100300006) then goto T155_3;
+ else goto T155_4;
+
+T155_3:
+ response = 0.0007932193;
+ goto D155;
+
+T155_4:
+ response = 0.0048541117;
+ goto D155;
+
+N155_5:
+ if attribute(catid) in (100200130, 100200052, 100300102, 100200172, 100200193, 100300169, 100200176, 100200028, 100300146) then goto T155_5;
+ else goto N155_6;
+
+T155_5:
+ response = 0.0114452275;
+ goto D155;
+
+N155_6:
+ if attribute(catid) in (100300008, 100200234, 100300065, 100200087) then goto T155_6;
+ else goto T155_7;
+
+T155_6:
+ response = 0.0212529341;
+ goto D155;
+
+T155_7:
+ response = 0.0435817724;
+ goto D155;
+
+D155:
+
+tnscore = tnscore + response;
+
+ /* Tree 157 of 200 */
+N156_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100200052, 100300093, 100300102, 100300005, 100200172, 100300027, 100300116, 100300121, 100200053, 100300004, 100400142, 100300073, 100400037, 100400038, 100200192, 100300212, 100300122, 100300127, 100300169, 100400080, 100300007, 100200028, 100300076, 100200055, 100200185, 100300006, 100200232, 100300146) then goto N156_2;
+ else goto N156_6;
+
+N156_2:
+ if attribute(catid) in (100300014, 100300058, 100300143, 100400141, 100300093, 100300102, 100300116, 100400142, 100400037, 100400038, 100300122, 100300007, 100200028, 100300076, 100200055, 100200185, 100300006, 100200232, 100300146) then goto N156_3;
+ else goto N156_4;
+
+N156_3:
+ if attribute(catid) in (100300143, 100300093, 100300102, 100400037, 100300007, 100200028, 100200055, 100200185, 100300006, 100200232, 100300146) then goto T156_1;
+ else goto T156_2;
+
+T156_1:
+ response = -0.0240406920;
+ goto D156;
+
+T156_2:
+ response = -0.0039451648;
+ goto D156;
+
+N156_4:
+ if attribute(catid) in (0, 100300077, 100300166, 100200172, 100300027, 100300004, 100200192, 100300212, 100300127) then goto N156_5;
+ else goto T156_5;
+
+N156_5:
+ if attribute(catid) in (100300077, 100300166, 100200172, 100300027, 100200192, 100300212) then goto T156_3;
+ else goto T156_4;
+
+T156_3:
+ response = 0.0007340608;
+ goto D156;
+
+T156_4:
+ response = 0.0030962230;
+ goto D156;
+
+T156_5:
+ response = 0.0055831787;
+ goto D156;
+
+N156_6:
+ if attribute(catid) in (100300011, 100300165, 100300032, 100200234, 100300126, 100200193, 100300065, 100400079, 100200170, 100200087, 100300074, 100300066, 100300200) then goto N156_7;
+ else goto T156_8;
+
+N156_7:
+ if attribute(catid) in (100300165, 100300032, 100200234, 100300126, 100200193, 100400079, 100200170, 100300074, 100300066, 100300200) then goto T156_6;
+ else goto T156_7;
+
+T156_6:
+ response = 0.0106987137;
+ goto D156;
+
+T156_7:
+ response = 0.0159048063;
+ goto D156;
+
+T156_8:
+ response = 0.0286231943;
+ goto D156;
+
+D156:
+
+tnscore = tnscore + response;
+
+ /* Tree 158 of 200 */
+N157_1:
+ if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300143, 100200034, 100200186, 100400141, 100200052, 100300093, 100300102, 100300005, 100200172, 100200068, 100300027, 100300116, 100300121, 100200234, 100200053, 100300004, 100400142, 100200054, 100400037, 100200193, 100400038, 100200192, 100300122, 100300169, 100400080, 100200087, 100300074, 100300066, 100300200, 100300045, 100300076, 100200055, 100200232) then goto N157_2;
+ else goto N157_5;
+
+N157_2:
+ if attribute(catid) in (100300011, 100300014, 100200034, 100400141, 100300102, 100300005, 100200172, 100200234, 100200193, 100400080, 100300066, 100300200, 100300045, 100300076, 100200055, 100200232) then goto N157_3;
+ else goto N157_4;
+
+N157_3:
+ if attribute(catid) in (100300011, 100200034, 100300102, 100200193, 100400080, 100200055, 100200232) then goto T157_1;
+ else goto T157_2;
+
+T157_1:
+ response = -0.0213247733;
+ goto D157;
+
+T157_2:
+ response = -0.0035116900;
+ goto D157;
+
+N157_4:
+ if attribute(catid) in (100200130, 100200052, 100200068, 100300121, 100200053, 100300004, 100200054, 100300169) then goto T157_3;
+ else goto T157_4;
+
+T157_3:
+ response = 0.0006568155;
+ goto D157;
+
+T157_4:
+ response = 0.0043360634;
+ goto D157;
+
+N157_5:
+ if attribute(catid) in (100200171, 100300077, 100300166, 100300165, 100300032, 100300073, 100300065, 100300209, 100300127, 100400079, 100200170, 100200028, 100300146) then goto N157_6;
+ else goto T157_7;
+
+N157_6:
+ if attribute(catid) in (100200171, 100300077, 100300165, 100300032, 100300073, 100300209, 100300127, 100200170) then goto T157_5;
+ else goto T157_6;
+
+T157_5:
+ response = 0.0086409598;
+ goto D157;
+
+T157_6:
+ response = 0.0123374521;
+ goto D157;
+
+T157_7:
+ response = 0.0275853300;
+ goto D157;
+
+D157:
+
+tnscore = tnscore + response;
+
+ /* Tree 159 of 200 */
+N158_1:
+ if attribute(catid) in (100300013, 100200034, 100400141, 100300165, 100300093, 100300102, 100300008, 100300032, 100300116, 100300121, 100200234, 100300019, 100300004, 100300126, 100200054, 100300073, 100400037, 100200192, 100300212, 100300209, 100300127, 100400079, 100200170, 100300169, 100200087, 100300066, 100300007, 100200176, 100300200, 100300045, 100300076, 100200067, 100200185, 100200232) then goto N158_2;
+ else goto N158_5;
+
+N158_2:
+ if attribute(catid) in (100300013, 100200034, 100400141, 100300102, 100300008, 100300032, 100300116, 100200234, 100300019, 100300004, 100300126, 100300212, 100300209, 100300200, 100200067, 100200185, 100200232) then goto N158_3;
+ else goto N158_4;
+
+N158_3:
+ if attribute(catid) in (100300102, 100300032, 100300019, 100300212, 100300209, 100200067, 100200185, 100200232) then goto T158_1;
+ else goto T158_2;
+
+T158_1:
+ response = -0.0524906600;
+ goto D158;
+
+T158_2:
+ response = -0.0117164184;
+ goto D158;
+
+N158_4:
+ if attribute(catid) in (100300093, 100200054, 100200192, 100400079, 100200170, 100300066, 100300045, 100300076) then goto T158_3;
+ else goto T158_4;
+
+T158_3:
+ response = -0.0050797102;
+ goto D158;
+
+T158_4:
+ response = -0.0000182540;
+ goto D158;
+
+N158_5:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300166, 100200186, 100300005, 100200172, 100200068, 100300027, 100200053, 100400142, 100300065, 100300122, 100400080, 100200028, 100300006, 100300146) then goto N158_6;
+ else goto N158_8;
+
+N158_6:
+ if attribute(catid) in (0, 100200171, 100300166, 100200186, 100300005, 100400142, 100300065, 100300122, 100200028) then goto N158_7;
+ else goto T158_7;
+
+N158_7:
+ if attribute(catid) in (100200171, 100300166, 100300005, 100300065, 100300122, 100200028) then goto T158_5;
+ else goto T158_6;
+
+T158_5:
+ response = 0.0045192638;
+ goto D158;
+
+T158_6:
+ response = 0.0060994023;
+ goto D158;
+
+T158_7:
+ response = 0.0117841342;
+ goto D158;
+
+N158_8:
+ if attribute(catid) in (100300077, 100300143, 100200052, 100300074) then goto T158_8;
+ else goto T158_9;
+
+T158_8:
+ response = 0.0191236363;
+ goto D158;
+
+T158_9:
+ response = 0.0376099520;
+ goto D158;
+
+D158:
+
+tnscore = tnscore + response;
+
+ /* Tree 160 of 200 */
+N159_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300013, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100300027, 100300121, 100300019, 100300004, 100400142, 100300073, 100400037, 100300065, 100300212, 100300122, 100400079, 100300169, 100400080, 100200087, 100300074, 100200176, 100300045, 100200067, 100200185) then goto N159_2;
+ else goto N159_7;
+
+N159_2:
+ if attribute(catid) in (100200171, 100300013, 100300019, 100400037, 100300212, 100200087, 100200067, 100200185) then goto N159_3;
+ else goto N159_4;
+
+N159_3:
+ if attribute(catid) in (100300013, 100300019, 100300212, 100200185) then goto T159_1;
+ else goto T159_2;
+
+T159_1:
+ response = -0.0408187739;
+ goto D159;
+
+T159_2:
+ response = -0.0118328301;
+ goto D159;
+
+N159_4:
+ if attribute(catid) in (0, 100200130, 100300166, 100200034, 100200186, 100400141, 100200052, 100300093, 100300005, 100200172, 100300027, 100300121, 100300004, 100400142, 100300073, 100300065, 100400079, 100300169, 100300074, 100200176, 100300045) then goto N159_5;
+ else goto T159_6;
+
+N159_5:
+ if attribute(catid) in (100200034, 100200186, 100300093, 100300005, 100200172, 100300027, 100300121, 100300004, 100400142, 100300073, 100400079, 100300169, 100300074, 100200176) then goto N159_6;
+ else goto T159_5;
+
+N159_6:
+ if attribute(catid) in (100200034, 100300093, 100300005, 100300027, 100400079, 100300169, 100300074, 100200176) then goto T159_3;
+ else goto T159_4;
+
+T159_3:
+ response = -0.0023114463;
+ goto D159;
+
+T159_4:
+ response = 0.0006956308;
+ goto D159;
+
+T159_5:
+ response = 0.0033093887;
+ goto D159;
+
+T159_6:
+ response = 0.0090068346;
+ goto D159;
+
+N159_7:
+ if attribute(catid) in (100300011, 100300014, 100300077, 100300116, 100200053, 100300126, 100200193, 100400038, 100200192, 100300127, 100200170, 100300066, 100200028, 100300076, 100200232, 100300146) then goto T159_7;
+ else goto T159_8;
+
+T159_7:
+ response = 0.0157546690;
+ goto D159;
+
+T159_8:
+ response = 0.0276821855;
+ goto D159;
+
+D159:
+
+tnscore = tnscore + response;
+
+ /* Tree 161 of 200 */
+N160_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300166, 100300143, 100200186, 100300165, 100200052, 100300093, 100300005, 100200068, 100300032, 100300116, 100300121, 100200234, 100200053, 100300019, 100300004, 100300126, 100400142, 100200054, 100300073, 100200193, 100400038, 100200192, 100300065, 100300212, 100300209, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300007, 100200176, 100300200, 100300045, 100200028, 100200067, 100200055, 100300006, 100200232, 100300146) then goto N160_2;
+ else goto N160_6;
+
+N160_2:
+ if attribute(catid) in (100200052, 100200068, 100300121, 100200053, 100400142, 100400038, 100200192, 100300212, 100300209, 100200170, 100300200, 100300045, 100200028, 100200067, 100200055, 100300006, 100200232, 100300146) then goto N160_3;
+ else goto N160_4;
+
+N160_3:
+ if attribute(catid) in (100300121, 100300212, 100300209, 100200028, 100200067, 100200055, 100200232, 100300146) then goto T160_1;
+ else goto T160_2;
+
+T160_1:
+ response = -0.0318529122;
+ goto D160;
+
+T160_2:
+ response = -0.0076204316;
+ goto D160;
+
+N160_4:
+ if attribute(catid) in (0, 100200171, 100300014, 100300058, 100200186, 100300165, 100300004, 100300073, 100200193, 100300065, 100300127, 100400079, 100300074, 100300007, 100200176) then goto N160_5;
+ else goto T160_5;
+
+N160_5:
+ if attribute(catid) in (100300014, 100300058, 100200186, 100300165, 100300004, 100200193, 100300065, 100300127, 100400079, 100300074, 100300007) then goto T160_3;
+ else goto T160_4;
+
+T160_3:
+ response = 0.0024278683;
+ goto D160;
+
+T160_4:
+ response = 0.0048565045;
+ goto D160;
+
+T160_5:
+ response = 0.0078949518;
+ goto D160;
+
+N160_6:
+ if attribute(catid) in (100300077, 100200034, 100400141, 100200172, 100300027, 100300122, 100200087) then goto T160_6;
+ else goto T160_7;
+
+T160_6:
+ response = 0.0144109170;
+ goto D160;
+
+T160_7:
+ response = 0.0330128168;
+ goto D160;
+
+D160:
+
+tnscore = tnscore + response;
+
+ /* Tree 162 of 200 */
+N161_1:
+ if attribute(catid) in (100300077, 100300166, 100300143, 100200034, 100200186, 100300032, 100300121, 100300019, 100300004, 100300126, 100400142, 100300073, 100400037, 100400038, 100300065, 100300212, 100300209, 100300127, 100400079, 100200087, 100200176, 100300200, 100300076, 100200067, 100200055, 100300214) then goto N161_2;
+ else goto N161_4;
+
+N161_2:
+ if attribute(catid) in (100300143, 100300032, 100300126, 100300212, 100300209, 100200176, 100200055, 100300214) then goto T161_1;
+ else goto N161_3;
+
+T161_1:
+ response = -0.0309719186;
+ goto D161;
+
+N161_3:
+ if attribute(catid) in (100300019, 100300004, 100400142, 100400037, 100400038, 100300065, 100200087, 100300200, 100200067) then goto T161_2;
+ else goto T161_3;
+
+T161_2:
+ response = -0.0082013563;
+ goto D161;
+
+T161_3:
+ response = -0.0020664794;
+ goto D161;
+
+N161_4:
+ if attribute(catid) in (0, 100200171, 100300014, 100300058, 100300013, 100400141, 100300165, 100200172, 100300008, 100300027, 100300116, 100200053, 100200192, 100300122, 100200170, 100300169, 100400080, 100300074, 100300045, 100200028) then goto N161_5;
+ else goto N161_7;
+
+N161_5:
+ if attribute(catid) in (0, 100200171, 100300014, 100300058, 100300008, 100300116, 100200053, 100200192, 100300169, 100300074, 100300045, 100200028) then goto N161_6;
+ else goto T161_6;
+
+N161_6:
+ if attribute(catid) in (100200171, 100300014, 100300058, 100300008, 100300116, 100200053, 100200192, 100300074, 100200028) then goto T161_4;
+ else goto T161_5;
+
+T161_4:
+ response = 0.0024438226;
+ goto D161;
+
+T161_5:
+ response = 0.0050777724;
+ goto D161;
+
+T161_6:
+ response = 0.0078352283;
+ goto D161;
+
+N161_7:
+ if attribute(catid) in (100300011, 100200130, 100200052, 100200054, 100200193, 100300066, 100200232) then goto T161_7;
+ else goto T161_8;
+
+T161_7:
+ response = 0.0123902667;
+ goto D161;
+
+T161_8:
+ response = 0.0178145861;
+ goto D161;
+
+D161:
+
+tnscore = tnscore + response;
+
+ /* Tree 163 of 200 */
+N162_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300077, 100300166, 100300143, 100200052, 100300093, 100200172, 100300008, 100300032, 100300027, 100300116, 100200234, 100400142, 100200054, 100200193, 100400038, 100200192, 100300065, 100300209, 100300122, 100300127, 100400079, 100200170, 100200087, 100300074, 100300066, 100300007, 100300200, 100300076, 100200055, 100300006) then goto N162_2;
+ else goto N162_7;
+
+N162_2:
+ if attribute(catid) in (100300058, 100300143, 100300032, 100200054, 100300074, 100300007, 100300076, 100200055, 100300006) then goto N162_3;
+ else goto N162_4;
+
+N162_3:
+ if attribute(catid) in (100300058, 100300143, 100300032, 100300076, 100200055, 100300006) then goto T162_1;
+ else goto T162_2;
+
+T162_1:
+ response = -0.0392140951;
+ goto D162;
+
+T162_2:
+ response = -0.0178716478;
+ goto D162;
+
+N162_4:
+ if attribute(catid) in (100300011, 100200130, 100200052, 100300093, 100300008, 100300027, 100300116, 100200234, 100400142, 100200193, 100400038, 100300209, 100300127, 100400079, 100200170, 100200087, 100300066, 100300200) then goto N162_5;
+ else goto N162_6;
+
+N162_5:
+ if attribute(catid) in (100300011, 100200052, 100300008, 100300027, 100200234, 100400038, 100300209, 100200170) then goto T162_3;
+ else goto T162_4;
+
+T162_3:
+ response = -0.0066755608;
+ goto D162;
+
+T162_4:
+ response = -0.0018428992;
+ goto D162;
+
+N162_6:
+ if attribute(catid) in (0, 100200171, 100300077, 100300166) then goto T162_5;
+ else goto T162_6;
+
+T162_5:
+ response = 0.0036371154;
+ goto D162;
+
+T162_6:
+ response = 0.0064922752;
+ goto D162;
+
+N162_7:
+ if attribute(catid) in (100300013, 100200034, 100200186, 100400141, 100300165, 100300005, 100200068, 100200053, 100300073, 100300169, 100400080, 100200067, 100200185, 100300146) then goto T162_7;
+ else goto N162_8;
+
+T162_7:
+ response = 0.0121567368;
+ goto D162;
+
+N162_8:
+ if attribute(catid) in (100300014, 100300121, 100300004, 100400037, 100200176, 100200028) then goto T162_8;
+ else goto T162_9;
+
+T162_8:
+ response = 0.0238269627;
+ goto D162;
+
+T162_9:
+ response = 0.0454004741;
+ goto D162;
+
+D162:
+
+tnscore = tnscore + response;
+
+ /* Tree 164 of 200 */
+N163_1:
+ if attribute(catid) in (0, 100300011, 100300013, 100200034, 100400141, 100200052, 100300093, 100300102, 100200172, 100300008, 100200068, 100300116, 100300004, 100300126, 100400142, 100400038, 100300209, 100300127, 100400079, 100300169, 100300200, 100200055, 100200185, 100300006, 100200232, 100300146) then goto N163_2;
+ else goto N163_5;
+
+N163_2:
+ if attribute(catid) in (100300011, 100300013, 100300008, 100300126, 100300200, 100200055, 100200185, 100300006, 100200232, 100300146) then goto T163_1;
+ else goto N163_3;
+
+T163_1:
+ response = -0.0236401207;
+ goto D163;
+
+N163_3:
+ if attribute(catid) in (100200034, 100200052, 100300093, 100300102, 100200068, 100300127) then goto T163_2;
+ else goto N163_4;
+
+T163_2:
+ response = -0.0040925434;
+ goto D163;
+
+N163_4:
+ if attribute(catid) in (0, 100400142, 100400079) then goto T163_3;
+ else goto T163_4;
+
+T163_3:
+ response = 0.0014214596;
+ goto D163;
+
+T163_4:
+ response = 0.0034589578;
+ goto D163;
+
+N163_5:
+ if attribute(catid) in (100200171, 100200130, 100300058, 100300077, 100300166, 100200186, 100300165, 100300027, 100300121, 100200234, 100200053, 100300073, 100200192, 100300065, 100300122, 100400080, 100300074) then goto N163_6;
+ else goto N163_7;
+
+N163_6:
+ if attribute(catid) in (100300077, 100300166, 100300165, 100300027, 100200192, 100300065, 100300122, 100400080) then goto T163_5;
+ else goto T163_6;
+
+T163_5:
+ response = 0.0069931850;
+ goto D163;
+
+T163_6:
+ response = 0.0098596774;
+ goto D163;
+
+N163_7:
+ if attribute(catid) in (100300005, 100300032, 100400037, 100200193, 100200170, 100300066, 100200176, 100200028) then goto T163_7;
+ else goto T163_8;
+
+T163_7:
+ response = 0.0175470785;
+ goto D163;
+
+T163_8:
+ response = 0.0289653859;
+ goto D163;
+
+D163:
+
+tnscore = tnscore + response;
+
+ /* Tree 165 of 200 */
+N164_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300077, 100300166, 100200034, 100200186, 100400141, 100200052, 100300093, 100200172, 100300116, 100200234, 100200053, 100400142, 100200054, 100200193, 100400038, 100300065, 100300212, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100200176, 100300200, 100300045, 100300076, 100200055, 100300006, 100300214) then goto N164_2;
+ else goto N164_6;
+
+N164_2:
+ if attribute(catid) in (100300014, 100200034, 100200052, 100300093, 100300116, 100200053, 100200193, 100300212, 100300127, 100200176, 100300200, 100200055, 100300214) then goto N164_3;
+ else goto N164_4;
+
+N164_3:
+ if attribute(catid) in (100300014, 100200053, 100300212, 100300200, 100200055, 100300214) then goto T164_1;
+ else goto T164_2;
+
+T164_1:
+ response = -0.0378968222;
+ goto D164;
+
+T164_2:
+ response = -0.0078372609;
+ goto D164;
+
+N164_4:
+ if attribute(catid) in (0, 100200171, 100300077, 100200186, 100400142, 100200054, 100400038, 100300065, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300045) then goto N164_5;
+ else goto T164_5;
+
+N164_5:
+ if attribute(catid) in (100200171, 100400142, 100200054, 100400038, 100300065, 100200170, 100300169, 100400080, 100300066) then goto T164_3;
+ else goto T164_4;
+
+T164_3:
+ response = -0.0001641778;
+ goto D164;
+
+T164_4:
+ response = 0.0015184867;
+ goto D164;
+
+T164_5:
+ response = 0.0061033364;
+ goto D164;
+
+N164_6:
+ if attribute(catid) in (100300058, 100300143, 100200068, 100300032, 100300121, 100300004, 100300126, 100300073, 100400037, 100200192) then goto T164_6;
+ else goto N164_7;
+
+T164_6:
+ response = 0.0125327069;
+ goto D164;
+
+N164_7:
+ if attribute(catid) in (100300011, 100300013, 100300165, 100300102, 100300005, 100300027, 100300122) then goto T164_7;
+ else goto T164_8;
+
+T164_7:
+ response = 0.0195931033;
+ goto D164;
+
+T164_8:
+ response = 0.0422132813;
+ goto D164;
+
+D164:
+
+tnscore = tnscore + response;
+
+ /* Tree 166 of 200 */
+N165_1:
+ if attribute(catid) in (0, 100200171, 100300014, 100300077, 100200186, 100300165, 100300102, 100300005, 100200172, 100300008, 100200068, 100300116, 100200234, 100300004, 100300126, 100400142, 100200054, 100300073, 100200193, 100200192, 100300065, 100300212, 100200170, 100200087, 100300074, 100300066, 100300200, 100300045, 100200028, 100300076, 100200055, 100200185, 100300214, 100300146) then goto N165_2;
+ else goto N165_5;
+
+N165_2:
+ if attribute(catid) in (100300014, 100300102, 100300116, 100200234, 100400142, 100200054, 100200087, 100300200, 100300045, 100200055, 100200185, 100300214, 100300146) then goto T165_1;
+ else goto N165_3;
+
+T165_1:
+ response = -0.0134771000;
+ goto D165;
+
+N165_3:
+ if attribute(catid) in (0, 100300165, 100300005, 100300008, 100200068, 100200193, 100300212, 100300066, 100200028, 100300076) then goto N165_4;
+ else goto T165_4;
+
+N165_4:
+ if attribute(catid) in (100300165, 100300005, 100300008, 100300066, 100300076) then goto T165_2;
+ else goto T165_3;
+
+T165_2:
+ response = -0.0004632359;
+ goto D165;
+
+T165_3:
+ response = 0.0017931695;
+ goto D165;
+
+T165_4:
+ response = 0.0047361213;
+ goto D165;
+
+N165_5:
+ if attribute(catid) in (100300011, 100200130, 100300166, 100400141, 100200052, 100300032, 100300027, 100300121, 100200053, 100400038, 100300209, 100300122, 100400079, 100300169, 100400080, 100300007, 100200176) then goto N165_6;
+ else goto T165_7;
+
+N165_6:
+ if attribute(catid) in (100200130, 100400141, 100200052, 100300032, 100300027, 100200053, 100300209, 100300122, 100400079) then goto T165_5;
+ else goto T165_6;
+
+T165_5:
+ response = 0.0106878879;
+ goto D165;
+
+T165_6:
+ response = 0.0162088762;
+ goto D165;
+
+T165_7:
+ response = 0.0270716952;
+ goto D165;
+
+D165:
+
+tnscore = tnscore + response;
+
+ /* Tree 167 of 200 */
+N166_1:
+ if attribute(catid) in (100300011, 100200130, 100300058, 100300013, 100400141, 100300165, 100300093, 100300005, 100300027, 100300116, 100200234, 100300004, 100300126, 100300073, 100200193, 100300209, 100300122, 100300007, 100200176, 100200028, 100200055, 100200185, 100300006) then goto N166_2;
+ else goto N166_5;
+
+N166_2:
+ if attribute(catid) in (100300011, 100300058, 100400141, 100300005, 100200234, 100300004, 100200193, 100300209, 100300122, 100200028, 100200055, 100200185) then goto N166_3;
+ else goto N166_4;
+
+N166_3:
+ if attribute(catid) in (100300011, 100200193, 100300209, 100200028, 100200055) then goto T166_1;
+ else goto T166_2;
+
+T166_1:
+ response = -0.0335395120;
+ goto D166;
+
+T166_2:
+ response = -0.0118989268;
+ goto D166;
+
+N166_4:
+ if attribute(catid) in (100300013, 100300165, 100300126, 100200176) then goto T166_3;
+ else goto T166_4;
+
+T166_3:
+ response = -0.0067126195;
+ goto D166;
+
+T166_4:
+ response = -0.0029610222;
+ goto D166;
+
+N166_5:
+ if attribute(catid) in (0, 100300014, 100300166, 100200034, 100200186, 100200052, 100200172, 100200068, 100200053, 100300019, 100200054, 100400038, 100200192, 100300065, 100300127, 100400079, 100200170, 100400080, 100300074, 100300066, 100300076, 100300146) then goto N166_6;
+ else goto N166_8;
+
+N166_6:
+ if attribute(catid) in (0, 100300166, 100200186, 100200172, 100200053, 100300019, 100400038, 100200192, 100300065, 100200170, 100400080, 100300074, 100300076) then goto N166_7;
+ else goto T166_7;
+
+N166_7:
+ if attribute(catid) in (100200186, 100200172, 100200053, 100300019, 100400038, 100200170, 100400080) then goto T166_5;
+ else goto T166_6;
+
+T166_5:
+ response = 0.0031153692;
+ goto D166;
+
+T166_6:
+ response = 0.0055175700;
+ goto D166;
+
+T166_7:
+ response = 0.0106121829;
+ goto D166;
+
+N166_8:
+ if attribute(catid) in (100200171, 100300077, 100300008, 100400142, 100300169, 100200087, 100300200) then goto T166_8;
+ else goto T166_9;
+
+T166_8:
+ response = 0.0168125614;
+ goto D166;
+
+T166_9:
+ response = 0.0398137842;
+ goto D166;
+
+D166:
+
+tnscore = tnscore + response;
+
+ /* Tree 168 of 200 */
+N167_1:
+ if attribute(catid) in (100200130, 100300013, 100300077, 100300166, 100400141, 100300165, 100300102, 100200068, 100300032, 100300121, 100200234, 100300019, 100300004, 100300126, 100300073, 100400037, 100200193, 100300212, 100300127, 100400079, 100200170, 100300066, 100300045, 100300076, 100200067, 100200185, 100200232, 100300214, 100300146) then goto N167_2;
+ else goto N167_4;
+
+N167_2:
+ if attribute(catid) in (100300102, 100300032, 100300121, 100300019, 100400037, 100200193, 100300212, 100200170, 100300045, 100300076, 100200067, 100200185, 100300214, 100300146) then goto T167_1;
+ else goto N167_3;
+
+T167_1:
+ response = -0.0158766850;
+ goto D167;
+
+N167_3:
+ if attribute(catid) in (100300166, 100300165, 100200068, 100300004, 100300126, 100300073, 100300127, 100300066, 100200232) then goto T167_2;
+ else goto T167_3;
+
+T167_2:
+ response = -0.0044887193;
+ goto D167;
+
+T167_3:
+ response = -0.0003188875;
+ goto D167;
+
+N167_4:
+ if attribute(catid) in (0, 100300014, 100300143, 100200186, 100200053, 100400142, 100200054, 100200192, 100300065, 100300122, 100300169, 100300074, 100300200) then goto N167_5;
+ else goto N167_6;
+
+N167_5:
+ if attribute(catid) in (0, 100300014, 100300143, 100200186, 100200053, 100200192, 100300065, 100300074) then goto T167_4;
+ else goto T167_5;
+
+T167_4:
+ response = 0.0053430945;
+ goto D167;
+
+T167_5:
+ response = 0.0090007568;
+ goto D167;
+
+N167_6:
+ if attribute(catid) in (100200171, 100200052, 100300093, 100300005, 100200172, 100300027, 100300116, 100400080, 100200087, 100300007) then goto T167_6;
+ else goto T167_7;
+
+T167_6:
+ response = 0.0122908466;
+ goto D167;
+
+T167_7:
+ response = 0.0204822127;
+ goto D167;
+
+D167:
+
+tnscore = tnscore + response;
+
+ /* Tree 169 of 200 */
+N168_1:
+ if attribute(catid) in (100200130, 100300077, 100300143, 100200034, 100300005, 100300032, 100300116, 100300121, 100300019, 100200054, 100200192, 100300212, 100300209, 100300127, 100400079, 100200170, 100400080, 100300074, 100300007, 100300200, 100300045, 100200028, 100300076, 100200055, 100200232) then goto N168_2;
+ else goto N168_4;
+
+N168_2:
+ if attribute(catid) in (100300143, 100300032, 100300121, 100300019, 100300212, 100300200, 100200028, 100200055) then goto T168_1;
+ else goto N168_3;
+
+T168_1:
+ response = -0.0260564211;
+ goto D168;
+
+N168_3:
+ if attribute(catid) in (100200034, 100300005, 100200054, 100300209, 100400080, 100300074, 100300045) then goto T168_2;
+ else goto T168_3;
+
+T168_2:
+ response = -0.0098627619;
+ goto D168;
+
+T168_3:
+ response = -0.0040846106;
+ goto D168;
+
+N168_4:
+ if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300013, 100300166, 100200186, 100400141, 100300165, 100200052, 100200172, 100300027, 100200053, 100300004, 100400142, 100300073, 100400037, 100200193, 100300065, 100300122, 100300169, 100300066) then goto N168_5;
+ else goto N168_8;
+
+N168_5:
+ if attribute(catid) in (100200171, 100300011, 100300013, 100300166, 100300165, 100200172, 100400142, 100300073, 100400037, 100300122, 100300169, 100300066) then goto N168_6;
+ else goto N168_7;
+
+N168_6:
+ if attribute(catid) in (100300011, 100300013, 100300166, 100300165, 100300073, 100400037, 100300066) then goto T168_4;
+ else goto T168_5;
+
+T168_4:
+ response = 0.0011147417;
+ goto D168;
+
+T168_5:
+ response = 0.0042983630;
+ goto D168;
+
+N168_7:
+ if attribute(catid) in (0, 100300014, 100200053, 100300004, 100300065) then goto T168_6;
+ else goto T168_7;
+
+T168_6:
+ response = 0.0064436133;
+ goto D168;
+
+T168_7:
+ response = 0.0100503455;
+ goto D168;
+
+N168_8:
+ if attribute(catid) in (100300058, 100300093, 100300008, 100200234, 100200087, 100200176) then goto T168_8;
+ else goto T168_9;
+
+T168_8:
+ response = 0.0146658078;
+ goto D168;
+
+T168_9:
+ response = 0.0247793114;
+ goto D168;
+
+D168:
+
+tnscore = tnscore + response;
+
+ /* Tree 170 of 200 */
+N169_1:
+ if attribute(catid) in (100300013, 100300165, 100300102, 100300005, 100200068, 100300027, 100300121, 100400037, 100200193, 100300212, 100300169, 100200087, 100300074, 100300066, 100300007, 100300200, 100300045, 100200028, 100200067) then goto N169_2;
+ else goto N169_3;
+
+N169_2:
+ if attribute(catid) in (100300013, 100300165, 100300102, 100300005, 100200068, 100400037, 100200193, 100300007, 100300200, 100200067) then goto T169_1;
+ else goto T169_2;
+
+T169_1:
+ response = -0.0164905100;
+ goto D169;
+
+T169_2:
+ response = -0.0065097756;
+ goto D169;
+
+N169_3:
+ if attribute(catid) in (0, 100300011, 100200130, 100300058, 100300166, 100200186, 100400141, 100200052, 100300093, 100200172, 100300116, 100200053, 100300019, 100300004, 100300126, 100300073, 100300065, 100300209, 100300122, 100400079, 100200170, 100300076) then goto N169_4;
+ else goto N169_6;
+
+N169_4:
+ if attribute(catid) in (0, 100200130, 100300058, 100200186, 100300093, 100200172, 100300004, 100300073, 100300209, 100300122, 100200170, 100300076) then goto N169_5;
+ else goto T169_5;
+
+N169_5:
+ if attribute(catid) in (100200130, 100200186, 100200172, 100300073, 100200170, 100300076) then goto T169_3;
+ else goto T169_4;
+
+T169_3:
+ response = 0.0025078440;
+ goto D169;
+
+T169_4:
+ response = 0.0047210186;
+ goto D169;
+
+T169_5:
+ response = 0.0076570114;
+ goto D169;
+
+N169_6:
+ if attribute(catid) in (100200171, 100300014, 100300077, 100200034, 100300008, 100300032, 100200234, 100200054, 100400038, 100200192, 100300127, 100400080, 100300006, 100300146) then goto T169_6;
+ else goto T169_7;
+
+T169_6:
+ response = 0.0139365828;
+ goto D169;
+
+T169_7:
+ response = 0.0234469942;
+ goto D169;
+
+D169:
+
+tnscore = tnscore + response;
+
+ /* Tree 171 of 200 */
+N170_1:
+ if attribute(catid) in (100300013, 100300077, 100300165, 100200052, 100300102, 100200068, 100300032, 100300116, 100300121, 100200234, 100300004, 100300126, 100400142, 100200054, 100300073, 100200192, 100300065, 100300212, 100200170, 100400080, 100300074, 100300007, 100200176, 100300045, 100200067, 100200055, 100200185, 100200232, 100300214, 100300146) then goto N170_2;
+ else goto N170_4;
+
+N170_2:
+ if attribute(catid) in (100300032, 100300121, 100300212, 100300007, 100200176, 100200067, 100200055, 100200185, 100300214, 100300146) then goto T170_1;
+ else goto N170_3;
+
+T170_1:
+ response = -0.0389470287;
+ goto D170;
+
+N170_3:
+ if attribute(catid) in (100200052, 100300102, 100200068, 100300116, 100200234, 100300126, 100200054, 100300065, 100200170, 100300074) then goto T170_2;
+ else goto T170_3;
+
+T170_2:
+ response = -0.0087219876;
+ goto D170;
+
+T170_3:
+ response = -0.0029990733;
+ goto D170;
+
+N170_4:
+ if attribute(catid) in (0, 100200130, 100300014, 100300143, 100200034, 100200186, 100400141, 100300005, 100200172, 100200053, 100400038, 100300122, 100300127, 100400079, 100300169, 100300066, 100300076, 100300006) then goto N170_5;
+ else goto N170_6;
+
+N170_5:
+ if attribute(catid) in (0, 100200130, 100300014, 100300143, 100400141, 100400038, 100300122, 100300127, 100400079, 100300066, 100300076, 100300006) then goto T170_4;
+ else goto T170_5;
+
+T170_4:
+ response = 0.0052739356;
+ goto D170;
+
+T170_5:
+ response = 0.0099085929;
+ goto D170;
+
+N170_6:
+ if attribute(catid) in (100200171, 100300011, 100300166, 100300093, 100300027, 100200193, 100300209, 100200087, 100300200) then goto N170_7;
+ else goto T170_8;
+
+N170_7:
+ if attribute(catid) in (100200171, 100200193, 100200087) then goto T170_6;
+ else goto T170_7;
+
+T170_6:
+ response = 0.0138275479;
+ goto D170;
+
+T170_7:
+ response = 0.0190545276;
+ goto D170;
+
+T170_8:
+ response = 0.0389964998;
+ goto D170;
+
+D170:
+
+tnscore = tnscore + response;
+
+ /* Tree 172 of 200 */
+N171_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300013, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100300008, 100200068, 100300027, 100300116, 100300121, 100200234, 100200053, 100300019, 100300004, 100400142, 100300073, 100400037, 100200193, 100200192, 100300065, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100200176, 100300045, 100200055, 100200185, 100300006, 100200232, 100300214, 100300146) then goto N171_2;
+ else goto N171_7;
+
+N171_2:
+ if attribute(catid) in (100300011, 100300058, 100300077, 100300005, 100300008, 100200068, 100400142, 100300065, 100300122, 100400079, 100400080, 100200087, 100300045, 100200055, 100200185, 100200232, 100300214) then goto N171_3;
+ else goto N171_4;
+
+N171_3:
+ if attribute(catid) in (100300058, 100300005, 100400142, 100200087, 100300045, 100200055, 100200185, 100200232, 100300214) then goto T171_1;
+ else goto T171_2;
+
+T171_1:
+ response = -0.0193646710;
+ goto D171;
+
+T171_2:
+ response = -0.0065149978;
+ goto D171;
+
+N171_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100200034, 100200186, 100300093, 100300102, 100300116, 100300121, 100200234, 100200053, 100300019, 100300073, 100200193, 100200192, 100300127, 100300169, 100300074, 100300006) then goto N171_5;
+ else goto N171_6;
+
+N171_5:
+ if attribute(catid) in (0, 100200171, 100300014, 100200186, 100300102, 100200193, 100300127, 100300074, 100300006) then goto T171_3;
+ else goto T171_4;
+
+T171_3:
+ response = 0.0016997077;
+ goto D171;
+
+T171_4:
+ response = 0.0042301416;
+ goto D171;
+
+N171_6:
+ if attribute(catid) in (100300166, 100400141, 100200172, 100300027, 100300004, 100300209, 100200170, 100300066) then goto T171_5;
+ else goto T171_6;
+
+T171_5:
+ response = 0.0083225080;
+ goto D171;
+
+T171_6:
+ response = 0.0106242146;
+ goto D171;
+
+N171_7:
+ if attribute(catid) in (100300126, 100200054, 100300212, 100300200, 100300076) then goto T171_7;
+ else goto T171_8;
+
+T171_7:
+ response = 0.0274475014;
+ goto D171;
+
+T171_8:
+ response = 0.0436714177;
+ goto D171;
+
+D171:
+
+tnscore = tnscore + response;
+
+ /* Tree 173 of 200 */
+N172_1:
+ if attribute(catid) in (0, 100200130, 100300014, 100300058, 100200034, 100200186, 100300165, 100200052, 100300093, 100300102, 100300027, 100300121, 100200234, 100300019, 100300004, 100300126, 100400142, 100200054, 100300073, 100400037, 100200193, 100400038, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100300169, 100200087, 100300074, 100300066, 100300007, 100300200, 100300045, 100300076, 100200067, 100200185) then goto N172_2;
+ else goto N172_5;
+
+N172_2:
+ if attribute(catid) in (100300014, 100200034, 100200186, 100300121, 100300004, 100400142, 100300073, 100400037, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200087, 100300074, 100200067, 100200185) then goto N172_3;
+ else goto N172_4;
+
+N172_3:
+ if attribute(catid) in (100300014, 100300121, 100400037, 100300212, 100300209, 100200087, 100200067, 100200185) then goto T172_1;
+ else goto T172_2;
+
+T172_1:
+ response = -0.0261969274;
+ goto D172;
+
+T172_2:
+ response = -0.0052677773;
+ goto D172;
+
+N172_4:
+ if attribute(catid) in (100200130, 100300058, 100300165, 100300019, 100300126, 100200193, 100300169, 100300066, 100300007, 100300200, 100300076) then goto T172_3;
+ else goto T172_4;
+
+T172_3:
+ response = 0.0006535877;
+ goto D172;
+
+T172_4:
+ response = 0.0044410765;
+ goto D172;
+
+N172_5:
+ if attribute(catid) in (100200171, 100300011, 100300013, 100300166, 100400141, 100300005, 100200172, 100200068, 100300032, 100300116, 100200192, 100200170, 100300006, 100200232, 100300146) then goto N172_6;
+ else goto N172_7;
+
+N172_6:
+ if attribute(catid) in (100200171, 100300005, 100200172, 100300006, 100200232, 100300146) then goto T172_5;
+ else goto T172_6;
+
+T172_5:
+ response = 0.0098796141;
+ goto D172;
+
+T172_6:
+ response = 0.0147635144;
+ goto D172;
+
+N172_7:
+ if attribute(catid) in (100300077, 100200053, 100400080, 100200028) then goto T172_7;
+ else goto T172_8;
+
+T172_7:
+ response = 0.0226086570;
+ goto D172;
+
+T172_8:
+ response = 0.0384965531;
+ goto D172;
+
+D172:
+
+tnscore = tnscore + response;
+
+ /* Tree 174 of 200 */
+N173_1:
+ if attribute(catid) in (100200130, 100200052, 100300093, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100200234, 100200053, 100300004, 100300126, 100400142, 100200054, 100200193, 100300209, 100300074, 100300066, 100300007, 100300045, 100200028, 100200055, 100200232) then goto N173_2;
+ else goto N173_4;
+
+N173_2:
+ if attribute(catid) in (100300093, 100300102, 100300008, 100400142, 100200054, 100300209, 100300074, 100200028, 100200055) then goto T173_1;
+ else goto N173_3;
+
+T173_1:
+ response = -0.0206111829;
+ goto D173;
+
+N173_3:
+ if attribute(catid) in (100200052, 100300005, 100200172, 100200234, 100300004, 100300126, 100300066) then goto T173_2;
+ else goto T173_3;
+
+T173_2:
+ response = -0.0077046600;
+ goto D173;
+
+T173_3:
+ response = -0.0024157474;
+ goto D173;
+
+N173_4:
+ if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300013, 100300077, 100300166, 100200034, 100400141, 100300027, 100300121, 100300073, 100400038, 100200192, 100300065, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100200067) then goto N173_5;
+ else goto N173_7;
+
+N173_5:
+ if attribute(catid) in (0, 100300013, 100400141, 100300027, 100300121, 100400038, 100200192, 100300127, 100400079, 100300169, 100200087, 100200067) then goto N173_6;
+ else goto T173_6;
+
+N173_6:
+ if attribute(catid) in (100300013, 100400141, 100200192, 100300127, 100400079, 100300169, 100200087) then goto T173_4;
+ else goto T173_5;
+
+T173_4:
+ response = 0.0028937370;
+ goto D173;
+
+T173_5:
+ response = 0.0056818850;
+ goto D173;
+
+T173_6:
+ response = 0.0099814265;
+ goto D173;
+
+N173_7:
+ if attribute(catid) in (100200186, 100300165, 100300212, 100300122, 100300076) then goto T173_7;
+ else goto T173_8;
+
+T173_7:
+ response = 0.0162181832;
+ goto D173;
+
+T173_8:
+ response = 0.0314914649;
+ goto D173;
+
+D173:
+
+tnscore = tnscore + response;
+
+ /* Tree 175 of 200 */
+N174_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100300027, 100300116, 100200053, 100300019, 100300126, 100200054, 100300073, 100200193, 100200192, 100300212, 100300209, 100300122, 100300127, 100300169, 100400080, 100200087, 100300074, 100200176, 100300200, 100300045, 100200067, 100300006, 100200232, 100300214, 100300146) then goto N174_2;
+ else goto N174_5;
+
+N174_2:
+ if attribute(catid) in (100200171, 100300058, 100200034, 100400141, 100300093, 100300005, 100300027, 100300116, 100200053, 100300126, 100200054, 100200193, 100200192, 100300212, 100300209, 100300122, 100300074, 100200176, 100200067, 100200232, 100300214, 100300146) then goto N174_3;
+ else goto N174_4;
+
+N174_3:
+ if attribute(catid) in (100200034, 100300126, 100200193, 100200192, 100300209, 100300122, 100200176, 100200067, 100200232, 100300214, 100300146) then goto T174_1;
+ else goto T174_2;
+
+T174_1:
+ response = -0.0183821122;
+ goto D174;
+
+T174_2:
+ response = -0.0073843870;
+ goto D174;
+
+N174_4:
+ if attribute(catid) in (100300011, 100300014, 100300077, 100300166, 100200052, 100300102, 100300019, 100300073, 100300127, 100400080, 100200087, 100300200, 100300006) then goto T174_3;
+ else goto T174_4;
+
+T174_3:
+ response = -0.0006604841;
+ goto D174;
+
+T174_4:
+ response = 0.0030934288;
+ goto D174;
+
+N174_5:
+ if attribute(catid) in (100300013, 100200172, 100300008, 100200068, 100200234, 100300004, 100400142, 100300065, 100400079, 100200170, 100200185) then goto N174_6;
+ else goto T174_7;
+
+N174_6:
+ if attribute(catid) in (100300013, 100200172, 100300008, 100300004, 100400142, 100300065, 100400079, 100200185) then goto T174_5;
+ else goto T174_6;
+
+T174_5:
+ response = 0.0088954013;
+ goto D174;
+
+T174_6:
+ response = 0.0132240377;
+ goto D174;
+
+T174_7:
+ response = 0.0239626156;
+ goto D174;
+
+D174:
+
+tnscore = tnscore + response;
+
+ /* Tree 176 of 200 */
+N175_1:
+ if attribute(catid) in (100200171, 100300014, 100300143, 100300165, 100300102, 100300008, 100300032, 100300027, 100300121, 100200053, 100400037, 100300122, 100300127, 100300007, 100200176, 100200028, 100200067, 100200232, 100300146) then goto N175_2;
+ else goto N175_3;
+
+N175_2:
+ if attribute(catid) in (100300143, 100300008, 100400037, 100300122, 100300007, 100200176, 100200028, 100200067, 100200232, 100300146) then goto T175_1;
+ else goto T175_2;
+
+T175_1:
+ response = -0.0238688087;
+ goto D175;
+
+T175_2:
+ response = -0.0050071269;
+ goto D175;
+
+N175_3:
+ if attribute(catid) in (0, 100300011, 100200130, 100300058, 100300077, 100300166, 100200186, 100400141, 100200052, 100300116, 100200234, 100300019, 100300004, 100400142, 100200054, 100300073, 100200193, 100200192, 100300212, 100400079, 100200170, 100300169, 100200087, 100300074, 100300066, 100300200, 100300006) then goto N175_4;
+ else goto N175_5;
+
+N175_4:
+ if attribute(catid) in (100300058, 100300077, 100300166, 100200052, 100300116, 100200234, 100400142, 100200054, 100300073, 100200192, 100300212, 100400079, 100300074, 100300066, 100300200, 100300006) then goto T175_3;
+ else goto T175_4;
+
+T175_3:
+ response = 0.0013911393;
+ goto D175;
+
+T175_4:
+ response = 0.0050101023;
+ goto D175;
+
+N175_5:
+ if attribute(catid) in (100200034, 100300005, 100200172, 100200068, 100300126, 100400038, 100300065) then goto T175_5;
+ else goto T175_6;
+
+T175_5:
+ response = 0.0131280626;
+ goto D175;
+
+T175_6:
+ response = 0.0264913220;
+ goto D175;
+
+D175:
+
+tnscore = tnscore + response;
+
+ /* Tree 177 of 200 */
+N176_1:
+ if attribute(catid) in (100300011, 100300166, 100300165, 100300102, 100300008, 100300032, 100200234, 100400037, 100300209, 100300169, 100300200, 100300045, 100200028, 100300076, 100200055, 100200185, 100300006, 100200232, 100300146) then goto N176_2;
+ else goto N176_4;
+
+N176_2:
+ if attribute(catid) in (100300011, 100300102, 100300008, 100200234, 100400037, 100300209, 100300076, 100200055, 100300006, 100200232, 100300146) then goto T176_1;
+ else goto N176_3;
+
+T176_1:
+ response = -0.0278018549;
+ goto D176;
+
+N176_3:
+ if attribute(catid) in (100300165, 100200185) then goto T176_2;
+ else goto T176_3;
+
+T176_2:
+ response = -0.0124509833;
+ goto D176;
+
+T176_3:
+ response = -0.0065030338;
+ goto D176;
+
+N176_4:
+ if attribute(catid) in (0, 100200171, 100300013, 100300077, 100200186, 100400141, 100200052, 100300093, 100200068, 100300027, 100200053, 100300004, 100400142, 100200054, 100300073, 100200193, 100300122, 100300127, 100400079, 100200170, 100400080, 100200087, 100300074, 100300066) then goto N176_5;
+ else goto N176_7;
+
+N176_5:
+ if attribute(catid) in (100300077, 100400141, 100300027, 100300004, 100400142, 100200054, 100200193, 100300122, 100300127, 100200087, 100300074) then goto T176_4;
+ else goto N176_6;
+
+T176_4:
+ response = -0.0006424011;
+ goto D176;
+
+N176_6:
+ if attribute(catid) in (0, 100200171, 100200186, 100200068, 100200053, 100400079, 100400080) then goto T176_5;
+ else goto T176_6;
+
+T176_5:
+ response = 0.0042262873;
+ goto D176;
+
+T176_6:
+ response = 0.0083942658;
+ goto D176;
+
+N176_7:
+ if attribute(catid) in (100200130, 100300058, 100200034, 100300005, 100200172, 100300121, 100200192, 100300065, 100300212, 100300007) then goto N176_8;
+ else goto T176_9;
+
+N176_8:
+ if attribute(catid) in (100300058, 100300005, 100200172, 100200192) then goto T176_7;
+ else goto T176_8;
+
+T176_7:
+ response = 0.0108729295;
+ goto D176;
+
+T176_8:
+ response = 0.0157560823;
+ goto D176;
+
+T176_9:
+ response = 0.0252591937;
+ goto D176;
+
+D176:
+
+tnscore = tnscore + response;
+
+ /* Tree 178 of 200 */
+N177_1:
+ if attribute(catid) in (100200171, 100200130, 100300014, 100300058, 100300013, 100300166, 100300143, 100300093, 100300102, 100300008, 100300027, 100300019, 100300004, 100200054, 100300073, 100400037, 100200193, 100300212, 100300209, 100300127, 100200170, 100400080, 100300066, 100200176, 100300200, 100200067, 100300146) then goto N177_2;
+ else goto N177_4;
+
+N177_2:
+ if attribute(catid) in (100300143, 100300093, 100300102, 100300008, 100300019, 100200054, 100400037, 100300212, 100300209, 100200176, 100300146) then goto T177_1;
+ else goto N177_3;
+
+T177_1:
+ response = -0.0224536708;
+ goto D177;
+
+N177_3:
+ if attribute(catid) in (100200171, 100300014, 100300013, 100300027, 100300004, 100300066, 100200067) then goto T177_2;
+ else goto T177_3;
+
+T177_2:
+ response = -0.0094554624;
+ goto D177;
+
+T177_3:
+ response = -0.0036901881;
+ goto D177;
+
+N177_4:
+ if attribute(catid) in (0, 100300077, 100200034, 100200186, 100400141, 100200052, 100300005, 100200172, 100300116, 100300121, 100200053, 100400038, 100200192, 100300065, 100300122, 100400079, 100300169, 100200087, 100200028) then goto N177_5;
+ else goto N177_6;
+
+N177_5:
+ if attribute(catid) in (0, 100200186, 100200052, 100200172, 100300121, 100200192) then goto T177_4;
+ else goto T177_5;
+
+T177_4:
+ response = 0.0020254456;
+ goto D177;
+
+T177_5:
+ response = 0.0072521361;
+ goto D177;
+
+N177_6:
+ if attribute(catid) in (100300165, 100300126, 100400142, 100300074, 100300007, 100300006) then goto T177_6;
+ else goto T177_7;
+
+T177_6:
+ response = 0.0116654091;
+ goto D177;
+
+T177_7:
+ response = 0.0258802787;
+ goto D177;
+
+D177:
+
+tnscore = tnscore + response;
+
+ /* Tree 179 of 200 */
+N178_1:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300077, 100300166, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100200068, 100300032, 100300116, 100300121, 100300019, 100300004, 100300126, 100200054, 100300073, 100400037, 100200193, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100200087, 100300066, 100300007, 100200176, 100300200, 100300045, 100200028, 100300076, 100200067, 100200232) then goto N178_2;
+ else goto N178_6;
+
+N178_2:
+ if attribute(catid) in (100200171, 100200130, 100300166, 100200186, 100300093, 100300102, 100300005, 100200172, 100200068, 100300116, 100300019, 100300126, 100200054, 100200193, 100200192, 100300212, 100300209, 100300127, 100200170, 100200087, 100300066, 100300007, 100200176, 100300200, 100300045, 100300076, 100200067, 100200232) then goto N178_3;
+ else goto N178_4;
+
+N178_3:
+ if attribute(catid) in (100200130, 100200186, 100300102, 100300005, 100300116, 100300019, 100200054, 100200193, 100200192, 100300212, 100300209, 100300127, 100200087, 100300066, 100200176, 100300200, 100300045, 100200067) then goto T178_1;
+ else goto T178_2;
+
+T178_1:
+ response = -0.0063942493;
+ goto D178;
+
+T178_2:
+ response = -0.0019096403;
+ goto D178;
+
+N178_4:
+ if attribute(catid) in (100300014, 100300077, 100300165, 100300004, 100300073, 100400037, 100300122, 100400079, 100200028) then goto T178_3;
+ else goto N178_5;
+
+T178_3:
+ response = 0.0026034959;
+ goto D178;
+
+N178_5:
+ if attribute(catid) in (0, 100300032) then goto T178_4;
+ else goto T178_5;
+
+T178_4:
+ response = 0.0043048063;
+ goto D178;
+
+T178_5:
+ response = 0.0068527373;
+ goto D178;
+
+N178_6:
+ if attribute(catid) in (100300011, 100300058, 100200034, 100300027, 100200234, 100200053, 100400142, 100300169, 100400080, 100300146) then goto T178_6;
+ else goto T178_7;
+
+T178_6:
+ response = 0.0134919535;
+ goto D178;
+
+T178_7:
+ response = 0.0274589028;
+ goto D178;
+
+D178:
+
+tnscore = tnscore + response;
+
+ /* Tree 180 of 200 */
+N179_1:
+ if attribute(catid) in (0, 100200130, 100300014, 100300058, 100300013, 100300077, 100300166, 100200034, 100200186, 100300165, 100200052, 100300093, 100300005, 100200172, 100200068, 100300027, 100300116, 100300004, 100300126, 100400142, 100200054, 100300073, 100400037, 100200193, 100400038, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100300200, 100300045, 100300076, 100200185, 100300006, 100200232, 100300214, 100300146) then goto N179_2;
+ else goto N179_6;
+
+N179_2:
+ if attribute(catid) in (100300013, 100300165, 100200052, 100300093, 100200068, 100300126, 100400142, 100200054, 100200193, 100300209, 100300200, 100300045, 100200185, 100300214, 100300146) then goto N179_3;
+ else goto N179_4;
+
+N179_3:
+ if attribute(catid) in (100300013, 100400142, 100200054, 100300209, 100300200, 100200185, 100300214, 100300146) then goto T179_1;
+ else goto T179_2;
+
+T179_1:
+ response = -0.0220987103;
+ goto D179;
+
+T179_2:
+ response = -0.0066619739;
+ goto D179;
+
+N179_4:
+ if attribute(catid) in (100200130, 100300014, 100300058, 100300166, 100200034, 100200186, 100300005, 100200172, 100300027, 100400038, 100200170, 100300074, 100300076, 100200232) then goto T179_3;
+ else goto N179_5;
+
+T179_3:
+ response = -0.0009424528;
+ goto D179;
+
+N179_5:
+ if attribute(catid) in (0, 100300004, 100300212, 100400079) then goto T179_4;
+ else goto T179_5;
+
+T179_4:
+ response = 0.0034943052;
+ goto D179;
+
+T179_5:
+ response = 0.0064497198;
+ goto D179;
+
+N179_6:
+ if attribute(catid) in (100200171, 100300011, 100300143, 100400141, 100300102, 100300032, 100300121, 100200053, 100300019, 100300127, 100200067) then goto T179_6;
+ else goto T179_7;
+
+T179_6:
+ response = 0.0127972052;
+ goto D179;
+
+T179_7:
+ response = 0.0287759999;
+ goto D179;
+
+D179:
+
+tnscore = tnscore + response;
+
+ /* Tree 181 of 200 */
+N180_1:
+ if attribute(catid) in (100200171, 100300011, 100300014, 100300058, 100300143, 100200034, 100300093, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100300019, 100300126, 100200054, 100400038, 100200192, 100300209, 100300122, 100300169, 100400080, 100300007, 100200176, 100300045, 100200185, 100200232, 100300146) then goto N180_2;
+ else goto N180_4;
+
+N180_2:
+ if attribute(catid) in (100300102, 100300008, 100300032, 100300116, 100300019, 100300126, 100300209, 100400080, 100200176, 100200185, 100200232, 100300146) then goto T180_1;
+ else goto N180_3;
+
+T180_1:
+ response = -0.0303496242;
+ goto D180;
+
+N180_3:
+ if attribute(catid) in (100300011, 100300143, 100200034, 100300093, 100200068, 100300027, 100200054, 100300045) then goto T180_2;
+ else goto T180_3;
+
+T180_2:
+ response = -0.0083664582;
+ goto D180;
+
+T180_3:
+ response = -0.0038598802;
+ goto D180;
+
+N180_4:
+ if attribute(catid) in (0, 100200130, 100300077, 100300166, 100200186, 100400141, 100200052, 100200234, 100200053, 100400142, 100300073, 100300065, 100300127, 100400079, 100200170, 100300066, 100300076, 100300006) then goto N180_5;
+ else goto N180_6;
+
+N180_5:
+ if attribute(catid) in (100200130, 100300077, 100300166, 100200186, 100200053, 100300073, 100300065, 100300127, 100400079, 100200170, 100300066) then goto T180_4;
+ else goto T180_5;
+
+T180_4:
+ response = 0.0012616080;
+ goto D180;
+
+T180_5:
+ response = 0.0050477397;
+ goto D180;
+
+N180_6:
+ if attribute(catid) in (100300165, 100200172, 100200193, 100300212) then goto T180_6;
+ else goto T180_7;
+
+T180_6:
+ response = 0.0108120161;
+ goto D180;
+
+T180_7:
+ response = 0.0235525620;
+ goto D180;
+
+D180:
+
+tnscore = tnscore + response;
+
+ /* Tree 182 of 200 */
+N181_1:
+ if attribute(catid) in (100300011, 100300058, 100300013, 100300077, 100200186, 100300102, 100300005, 100200172, 100300008, 100300116, 100300121, 100300126, 100200054, 100400038, 100200170, 100300200, 100200055, 100200185, 100200232) then goto N181_2;
+ else goto N181_3;
+
+N181_2:
+ if attribute(catid) in (100300011, 100300008, 100200054, 100400038, 100300200, 100200055, 100200185, 100200232) then goto T181_1;
+ else goto T181_2;
+
+T181_1:
+ response = -0.0268832718;
+ goto D181;
+
+T181_2:
+ response = -0.0068696426;
+ goto D181;
+
+N181_3:
+ if attribute(catid) in (0, 100200171, 100200130, 100300166, 100300143, 100200034, 100400141, 100300165, 100200052, 100200053, 100300004, 100400142, 100400037, 100200193, 100200192, 100300065, 100300122, 100300127, 100300169, 100400080, 100300074, 100300066, 100200176, 100300045, 100200028, 100300006) then goto N181_4;
+ else goto N181_7;
+
+N181_4:
+ if attribute(catid) in (0, 100200034, 100400141, 100300165, 100400142, 100200193, 100200192, 100300122, 100300127, 100400080, 100300045, 100200028, 100300006) then goto N181_5;
+ else goto N181_6;
+
+N181_5:
+ if attribute(catid) in (100200034, 100400141, 100300165, 100400142, 100200193, 100300122, 100200028) then goto T181_3;
+ else goto T181_4;
+
+T181_3:
+ response = -0.0000690536;
+ goto D181;
+
+T181_4:
+ response = 0.0031806080;
+ goto D181;
+
+N181_6:
+ if attribute(catid) in (100200171, 100200130, 100300143, 100200053, 100400037, 100300074, 100200176) then goto T181_5;
+ else goto T181_6;
+
+T181_5:
+ response = 0.0046312438;
+ goto D181;
+
+T181_6:
+ response = 0.0072930454;
+ goto D181;
+
+N181_7:
+ if attribute(catid) in (100300014, 100300093, 100200068, 100300032, 100300027, 100300019, 100300073, 100400079, 100300007, 100300146) then goto T181_7;
+ else goto T181_8;
+
+T181_7:
+ response = 0.0137098872;
+ goto D181;
+
+T181_8:
+ response = 0.0263591456;
+ goto D181;
+
+D181:
+
+tnscore = tnscore + response;
+
+ /* Tree 183 of 200 */
+N182_1:
+ if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300058, 100300166, 100200034, 100400141, 100300165, 100200052, 100300005, 100200172, 100300008, 100300032, 100300027, 100200053, 100300019, 100400142, 100300073, 100400038, 100200192, 100300065, 100300209, 100300122, 100400079, 100300169, 100400080, 100200087, 100300074, 100300007, 100300200, 100300045, 100200055, 100200185, 100300006, 100200232, 100300214) then goto N182_2;
+ else goto N182_6;
+
+N182_2:
+ if attribute(catid) in (100300011, 100300014, 100300058, 100300165, 100200052, 100200172, 100300008, 100300032, 100200053, 100300019, 100400142, 100300073, 100400038, 100300209, 100300122, 100400080, 100300074, 100300007, 100300200, 100200055, 100200185, 100300006, 100300214) then goto N182_3;
+ else goto N182_5;
+
+N182_3:
+ if attribute(catid) in (100300008, 100300032, 100300019, 100400038, 100300209, 100300122, 100200055, 100200185, 100300006, 100300214) then goto T182_1;
+ else goto N182_4;
+
+T182_1:
+ response = -0.0161057659;
+ goto D182;
+
+N182_4:
+ if attribute(catid) in (100300014, 100300058, 100300165, 100200053, 100400142, 100300074) then goto T182_2;
+ else goto T182_3;
+
+T182_2:
+ response = -0.0064541439;
+ goto D182;
+
+T182_3:
+ response = -0.0026860316;
+ goto D182;
+
+N182_5:
+ if attribute(catid) in (0, 100200034, 100300005, 100300065, 100400079, 100300169) then goto T182_4;
+ else goto T182_5;
+
+T182_4:
+ response = 0.0019364980;
+ goto D182;
+
+T182_5:
+ response = 0.0039988711;
+ goto D182;
+
+N182_6:
+ if attribute(catid) in (100200171, 100300077, 100300143, 100200186, 100200068, 100300116, 100200234, 100300126, 100200193, 100200170, 100300066, 100200176, 100300076) then goto T182_6;
+ else goto N182_7;
+
+T182_6:
+ response = 0.0130783191;
+ goto D182;
+
+N182_7:
+ if attribute(catid) in (100300093, 100300121, 100200054, 100400037, 100300127, 100200028, 100200067) then goto T182_7;
+ else goto T182_8;
+
+T182_7:
+ response = 0.0218038927;
+ goto D182;
+
+T182_8:
+ response = 0.0414721313;
+ goto D182;
+
+D182:
+
+tnscore = tnscore + response;
+
+ /* Tree 184 of 200 */
+N183_1:
+ if attribute(catid) in (100200171, 100300014, 100300058, 100300077, 100200186, 100300093, 100300008, 100200068, 100300032, 100300027, 100300019, 100300004, 100300126, 100200054, 100400037, 100400038, 100300209, 100400079, 100200170, 100300169, 100200087, 100300007, 100300200, 100200055, 100200185, 100300214, 100300146) then goto N183_2;
+ else goto N183_4;
+
+N183_2:
+ if attribute(catid) in (100300032, 100300027, 100300019, 100300004, 100400038, 100200055, 100200185, 100300214, 100300146) then goto T183_1;
+ else goto N183_3;
+
+T183_1:
+ response = -0.0294531155;
+ goto D183;
+
+N183_3:
+ if attribute(catid) in (100300058, 100300008, 100200068, 100300126, 100200054, 100400037, 100300007, 100300200) then goto T183_2;
+ else goto T183_3;
+
+T183_2:
+ response = -0.0064558393;
+ goto D183;
+
+T183_3:
+ response = -0.0017589508;
+ goto D183;
+
+N183_4:
+ if attribute(catid) in (0, 100200130, 100300013, 100300166, 100200052, 100300102, 100300121, 100200234, 100400142, 100300073, 100300065, 100300122, 100300066, 100200028) then goto N183_5;
+ else goto N183_6;
+
+N183_5:
+ if attribute(catid) in (100200130, 100300166, 100200052, 100300102, 100400142, 100300065) then goto T183_4;
+ else goto T183_5;
+
+T183_4:
+ response = 0.0028476082;
+ goto D183;
+
+T183_5:
+ response = 0.0049351263;
+ goto D183;
+
+N183_6:
+ if attribute(catid) in (100300143, 100200034, 100400141, 100300165, 100200172, 100200193, 100200192, 100300074, 100300006) then goto T183_6;
+ else goto T183_7;
+
+T183_6:
+ response = 0.0122604781;
+ goto D183;
+
+T183_7:
+ response = 0.0164705118;
+ goto D183;
+
+D183:
+
+tnscore = tnscore + response;
+
+ /* Tree 185 of 200 */
+N184_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100300077, 100300166, 100200034, 100400141, 100300165, 100300102, 100200172, 100300032, 100300027, 100300116, 100200053, 100300019, 100400038, 100200192, 100300212, 100300122, 100400079, 100300169, 100200087, 100300200, 100300045, 100200028, 100300076) then goto N184_2;
+ else goto N184_6;
+
+N184_2:
+ if attribute(catid) in (100300077, 100300102, 100300032, 100200053, 100300019, 100400038, 100300122, 100300169, 100200087, 100200028, 100300076) then goto N184_3;
+ else goto N184_4;
+
+N184_3:
+ if attribute(catid) in (100300032, 100300019, 100200087, 100200028, 100300076) then goto T184_1;
+ else goto T184_2;
+
+T184_1:
+ response = -0.0267943136;
+ goto D184;
+
+T184_2:
+ response = -0.0073590500;
+ goto D184;
+
+N184_4:
+ if attribute(catid) in (0, 100200171, 100300011, 100200034, 100400141, 100200172, 100300027, 100300116, 100200192, 100300212, 100300045) then goto N184_5;
+ else goto T184_5;
+
+N184_5:
+ if attribute(catid) in (100200171, 100200034, 100400141, 100200172, 100300027, 100300212, 100300045) then goto T184_3;
+ else goto T184_4;
+
+T184_3:
+ response = -0.0027994712;
+ goto D184;
+
+T184_4:
+ response = -0.0009574051;
+ goto D184;
+
+T184_5:
+ response = 0.0035088242;
+ goto D184;
+
+N184_6:
+ if attribute(catid) in (100200130, 100300014, 100300058, 100300013, 100300143, 100200186, 100200052, 100200068, 100300121, 100200234, 100300126, 100400142, 100300073, 100200193, 100300065, 100300209, 100300127, 100200170, 100400080, 100300066, 100200176, 100300006, 100200232, 100300146) then goto N184_7;
+ else goto T184_8;
+
+N184_7:
+ if attribute(catid) in (100200130, 100300143, 100200186, 100200234, 100300073, 100300065, 100300127, 100400080, 100300066) then goto T184_6;
+ else goto T184_7;
+
+T184_6:
+ response = 0.0073203788;
+ goto D184;
+
+T184_7:
+ response = 0.0143066526;
+ goto D184;
+
+T184_8:
+ response = 0.0363911505;
+ goto D184;
+
+D184:
+
+tnscore = tnscore + response;
+
+ /* Tree 186 of 200 */
+N185_1:
+ if attribute(catid) in (100300014, 100300058, 100300166, 100200034, 100300093, 100300102, 100300005, 100200172, 100200068, 100300032, 100300027, 100300019, 100300004, 100400038, 100300212, 100300209, 100200170, 100300169, 100200087, 100300074, 100300066, 100200028, 100300006, 100200232, 100300214, 100300146) then goto N185_2;
+ else goto N185_5;
+
+N185_2:
+ if attribute(catid) in (100300102, 100300005, 100300032, 100300019, 100300004, 100300209, 100300074, 100300066, 100200232, 100300214, 100300146) then goto N185_3;
+ else goto N185_4;
+
+N185_3:
+ if attribute(catid) in (100300005, 100300032, 100300019, 100300209, 100300066, 100200232, 100300214) then goto T185_1;
+ else goto T185_2;
+
+T185_1:
+ response = -0.0402234424;
+ goto D185;
+
+T185_2:
+ response = -0.0155707935;
+ goto D185;
+
+N185_4:
+ if attribute(catid) in (100300014, 100300166, 100200068, 100300027, 100300212, 100200028, 100300006) then goto T185_3;
+ else goto T185_4;
+
+T185_3:
+ response = -0.0067321936;
+ goto D185;
+
+T185_4:
+ response = -0.0015157833;
+ goto D185;
+
+N185_5:
+ if attribute(catid) in (0, 100200171, 100300013, 100300077, 100300143, 100200186, 100200052, 100300121, 100200054, 100300073, 100200192, 100300065, 100400079, 100300045, 100200185) then goto N185_6;
+ else goto N185_8;
+
+N185_6:
+ if attribute(catid) in (0, 100200171, 100300077, 100200186, 100300121, 100200054, 100400079, 100300045, 100200185) then goto N185_7;
+ else goto T185_7;
+
+N185_7:
+ if attribute(catid) in (100200171, 100300077, 100200186, 100300121, 100200054, 100400079, 100300045) then goto T185_5;
+ else goto T185_6;
+
+T185_5:
+ response = 0.0027009397;
+ goto D185;
+
+T185_6:
+ response = 0.0049127750;
+ goto D185;
+
+T185_7:
+ response = 0.0074818877;
+ goto D185;
+
+N185_8:
+ if attribute(catid) in (100300011, 100200130, 100400141, 100200234, 100200053, 100400142, 100400037, 100200193, 100300122, 100300127, 100400080) then goto T185_8;
+ else goto T185_9;
+
+T185_8:
+ response = 0.0114687451;
+ goto D185;
+
+T185_9:
+ response = 0.0184788462;
+ goto D185;
+
+D185:
+
+tnscore = tnscore + response;
+
+ /* Tree 187 of 200 */
+N186_1:
+ if attribute(catid) in (100300011, 100300014, 100300058, 100300143, 100300165, 100300093, 100300102, 100300008, 100300116, 100200234, 100300019, 100400142, 100200193, 100400038, 100300065, 100300127, 100300169, 100400080, 100300074, 100300007, 100200176, 100300200, 100300076, 100200067, 100200185, 100200232, 100300214, 100300146) then goto N186_2;
+ else goto N186_5;
+
+N186_2:
+ if attribute(catid) in (100300011, 100300014, 100300165, 100300008, 100300116, 100300019, 100400038, 100300007, 100200176, 100200067, 100200185, 100200232, 100300214, 100300146) then goto N186_3;
+ else goto N186_4;
+
+N186_3:
+ if attribute(catid) in (100300014, 100300019, 100400038, 100300007, 100200067, 100200185, 100200232, 100300214) then goto T186_1;
+ else goto T186_2;
+
+T186_1:
+ response = -0.0315173226;
+ goto D186;
+
+T186_2:
+ response = -0.0166807619;
+ goto D186;
+
+N186_4:
+ if attribute(catid) in (100300058, 100300143, 100300102, 100200234, 100200193, 100400080, 100300076) then goto T186_3;
+ else goto T186_4;
+
+T186_3:
+ response = -0.0097877493;
+ goto D186;
+
+T186_4:
+ response = -0.0031922320;
+ goto D186;
+
+N186_5:
+ if attribute(catid) in (0, 100300013, 100400141, 100200052, 100200172, 100200053, 100300004, 100300073, 100400037, 100200192, 100300122, 100400079, 100300066, 100200028, 100300006) then goto N186_6;
+ else goto N186_7;
+
+N186_6:
+ if attribute(catid) in (100200172, 100300004, 100300073, 100400037, 100300122, 100200028, 100300006) then goto T186_5;
+ else goto T186_6;
+
+T186_5:
+ response = 0.0016110349;
+ goto D186;
+
+T186_6:
+ response = 0.0058680637;
+ goto D186;
+
+N186_7:
+ if attribute(catid) in (100200171, 100200130, 100300077, 100300166, 100200186, 100300005, 100200068, 100300126, 100200170) then goto T186_7;
+ else goto T186_8;
+
+T186_7:
+ response = 0.0122208670;
+ goto D186;
+
+T186_8:
+ response = 0.0260145679;
+ goto D186;
+
+D186:
+
+tnscore = tnscore + response;
+
+ /* Tree 188 of 200 */
+N187_1:
+ if attribute(catid) in (0, 100300011, 100300014, 100300058, 100300013, 100300077, 100300143, 100200186, 100300165, 100300005, 100300121, 100200053, 100300004, 100300126, 100400038, 100200192, 100300122, 100300169, 100200087, 100300074, 100300200, 100300045, 100200028, 100300006) then goto N187_2;
+ else goto N187_5;
+
+N187_2:
+ if attribute(catid) in (100300011, 100300013, 100300143, 100300005, 100300126, 100400038, 100200087, 100300200, 100200028) then goto N187_3;
+ else goto N187_4;
+
+N187_3:
+ if attribute(catid) in (100300013, 100300143, 100300005, 100300126) then goto T187_1;
+ else goto T187_2;
+
+T187_1:
+ response = -0.0362895954;
+ goto D187;
+
+T187_2:
+ response = -0.0192422418;
+ goto D187;
+
+N187_4:
+ if attribute(catid) in (100300058, 100300121, 100200053, 100300169, 100300074, 100300006) then goto T187_3;
+ else goto T187_4;
+
+T187_3:
+ response = -0.0047104781;
+ goto D187;
+
+T187_4:
+ response = 0.0002148509;
+ goto D187;
+
+N187_5:
+ if attribute(catid) in (100200171, 100200130, 100300166, 100200034, 100400141, 100200052, 100300093, 100200172, 100300027, 100300116, 100400142, 100300073, 100400037, 100200193, 100300065, 100300209, 100300127, 100400079, 100400080, 100300066, 100300007) then goto N187_6;
+ else goto N187_8;
+
+N187_6:
+ if attribute(catid) in (100200171, 100200130, 100200052, 100300093, 100300073, 100400037, 100200193, 100300209, 100300066) then goto T187_5;
+ else goto N187_7;
+
+T187_5:
+ response = 0.0059481372;
+ goto D187;
+
+N187_7:
+ if attribute(catid) in (100300166, 100200034, 100300027, 100300116, 100300065) then goto T187_6;
+ else goto T187_7;
+
+T187_6:
+ response = 0.0086482206;
+ goto D187;
+
+T187_7:
+ response = 0.0113173904;
+ goto D187;
+
+N187_8:
+ if attribute(catid) in (100300102, 100200068, 100300212, 100200170, 100300076, 100200067, 100200232, 100300146) then goto T187_8;
+ else goto T187_9;
+
+T187_8:
+ response = 0.0189016022;
+ goto D187;
+
+T187_9:
+ response = 0.0294237004;
+ goto D187;
+
+D187:
+
+tnscore = tnscore + response;
+
+ /* Tree 189 of 200 */
+N188_1:
+ if attribute(catid) in (100300011, 100300014, 100200034, 100200186, 100300165, 100200172, 100300008, 100200068, 100300121, 100200234, 100300126, 100200054, 100300073, 100400037, 100200193, 100200192, 100300065, 100300209, 100300122, 100200170, 100400080, 100300007, 100200176, 100300045, 100200028, 100300076, 100200185, 100300146) then goto N188_2;
+ else goto N188_5;
+
+N188_2:
+ if attribute(catid) in (100300011, 100200034, 100300165, 100300008, 100200234, 100300126, 100200054, 100400037, 100300209, 100400080, 100300007, 100200176, 100200028, 100200185, 100300146) then goto N188_3;
+ else goto N188_4;
+
+N188_3:
+ if attribute(catid) in (100300011, 100200034, 100200234, 100300126, 100400037, 100300209, 100300007, 100200176, 100200185, 100300146) then goto T188_1;
+ else goto T188_2;
+
+T188_1:
+ response = -0.0183846087;
+ goto D188;
+
+T188_2:
+ response = -0.0112640996;
+ goto D188;
+
+N188_4:
+ if attribute(catid) in (100200068, 100300121, 100300073, 100300065, 100200170, 100300045, 100300076) then goto T188_3;
+ else goto T188_4;
+
+T188_3:
+ response = -0.0047781445;
+ goto D188;
+
+T188_4:
+ response = -0.0011633168;
+ goto D188;
+
+N188_5:
+ if attribute(catid) in (0, 100200130, 100300058, 100300077, 100300093, 100300005, 100300027, 100300127, 100400079, 100200087, 100300074, 100300066) then goto N188_6;
+ else goto N188_7;
+
+N188_6:
+ if attribute(catid) in (0, 100200130, 100300027, 100300127, 100400079, 100200087) then goto T188_5;
+ else goto T188_6;
+
+T188_5:
+ response = 0.0039708336;
+ goto D188;
+
+T188_6:
+ response = 0.0079025406;
+ goto D188;
+
+N188_7:
+ if attribute(catid) in (100200171, 100300166, 100400141, 100200052, 100300032, 100300116, 100200053, 100300004, 100400142, 100300169, 100300200, 100300006) then goto N188_8;
+ else goto T188_9;
+
+N188_8:
+ if attribute(catid) in (100300166, 100300032, 100200053, 100400142, 100300169, 100300006) then goto T188_7;
+ else goto T188_8;
+
+T188_7:
+ response = 0.0121783231;
+ goto D188;
+
+T188_8:
+ response = 0.0159340797;
+ goto D188;
+
+T188_9:
+ response = 0.0363585815;
+ goto D188;
+
+D188:
+
+tnscore = tnscore + response;
+
+ /* Tree 190 of 200 */
+N189_1:
+ if attribute(catid) in (100300011, 100300013, 100300077, 100300143, 100200034, 100200186, 100300165, 100300102, 100300008, 100200053, 100300019, 100400142, 100200054, 100400037, 100300127, 100400079, 100400080, 100200087, 100300007, 100300200, 100300045, 100200067, 100200055, 100200185, 100300006, 100300146) then goto N189_2;
+ else goto N189_4;
+
+N189_2:
+ if attribute(catid) in (100300013, 100300143, 100300102, 100200053, 100300019, 100300200, 100200067, 100200055, 100200185, 100300006, 100300146) then goto T189_1;
+ else goto N189_3;
+
+T189_1:
+ response = -0.0269176309;
+ goto D189;
+
+N189_3:
+ if attribute(catid) in (100300011, 100200186, 100300008, 100200054, 100400037, 100400079, 100400080, 100300045) then goto T189_2;
+ else goto T189_3;
+
+T189_2:
+ response = -0.0082753604;
+ goto D189;
+
+T189_3:
+ response = -0.0027168619;
+ goto D189;
+
+N189_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300166, 100400141, 100200172, 100200068, 100300121, 100300073, 100200193, 100200192, 100300065, 100200170, 100300169, 100300066, 100200028) then goto N189_5;
+ else goto N189_6;
+
+N189_5:
+ if attribute(catid) in (0, 100200130, 100200172, 100300121, 100300073, 100200192, 100300065, 100300169, 100200028) then goto T189_4;
+ else goto T189_5;
+
+T189_4:
+ response = 0.0030863813;
+ goto D189;
+
+T189_5:
+ response = 0.0082511598;
+ goto D189;
+
+N189_6:
+ if attribute(catid) in (100300014, 100200052, 100300027, 100300116, 100200234, 100300004, 100400038, 100300122, 100300074) then goto T189_6;
+ else goto T189_7;
+
+T189_6:
+ response = 0.0142405946;
+ goto D189;
+
+T189_7:
+ response = 0.0282071621;
+ goto D189;
+
+D189:
+
+tnscore = tnscore + response;
+
+ /* Tree 191 of 200 */
+N190_1:
+ if attribute(catid) in (100300058, 100300143, 100300093, 100300005, 100300116, 100300121, 100300019, 100300004, 100200193, 100400038, 100300200, 100300045, 100300076, 100200055) then goto N190_2;
+ else goto N190_3;
+
+N190_2:
+ if attribute(catid) in (100300143, 100300121, 100300019, 100200193, 100300076, 100200055) then goto T190_1;
+ else goto T190_2;
+
+T190_1:
+ response = -0.0314321220;
+ goto D190;
+
+T190_2:
+ response = -0.0099657936;
+ goto D190;
+
+N190_3:
+ if attribute(catid) in (0, 100200171, 100200130, 100300166, 100200186, 100400141, 100300165, 100200052, 100200053, 100400142, 100300073, 100200192, 100300065, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100200176, 100200028, 100200067, 100300006) then goto N190_4;
+ else goto N190_6;
+
+N190_4:
+ if attribute(catid) in (100400141, 100200052, 100200192, 100300122, 100300127, 100200087, 100300074, 100300006) then goto T190_3;
+ else goto N190_5;
+
+T190_3:
+ response = -0.0018808186;
+ goto D190;
+
+N190_5:
+ if attribute(catid) in (0, 100200171, 100200186, 100300165, 100200053, 100400142, 100300065, 100300169, 100200028, 100200067) then goto T190_4;
+ else goto T190_5;
+
+T190_4:
+ response = 0.0034558143;
+ goto D190;
+
+T190_5:
+ response = 0.0071443084;
+ goto D190;
+
+N190_6:
+ if attribute(catid) in (100300011, 100300014, 100300013, 100300077, 100200034, 100200172, 100200068, 100300027, 100200234, 100200054, 100300212, 100300066, 100300007, 100300146) then goto T190_6;
+ else goto T190_7;
+
+T190_6:
+ response = 0.0113747240;
+ goto D190;
+
+T190_7:
+ response = 0.0365678279;
+ goto D190;
+
+D190:
+
+tnscore = tnscore + response;
+
+ /* Tree 192 of 200 */
+N191_1:
+ if attribute(catid) in (100200130, 100300013, 100300077, 100300166, 100200034, 100400141, 100300165, 100200172, 100200068, 100300004, 100400142, 100200054, 100400037, 100300212, 100300127, 100400079, 100400080, 100300074, 100300066, 100200028, 100200067, 100200055, 100300006) then goto N191_2;
+ else goto N191_5;
+
+N191_2:
+ if attribute(catid) in (100200034, 100300165, 100300004, 100200054, 100300127, 100200028, 100200067, 100200055, 100300006) then goto N191_3;
+ else goto N191_4;
+
+N191_3:
+ if attribute(catid) in (100200034, 100300004, 100300127, 100200028, 100200067, 100200055) then goto T191_1;
+ else goto T191_2;
+
+T191_1:
+ response = -0.0176698057;
+ goto D191;
+
+T191_2:
+ response = -0.0096668582;
+ goto D191;
+
+N191_4:
+ if attribute(catid) in (100200130, 100300013, 100300077, 100300166, 100400142, 100400037, 100300212, 100400079) then goto T191_3;
+ else goto T191_4;
+
+T191_3:
+ response = -0.0070606716;
+ goto D191;
+
+T191_4:
+ response = -0.0037781209;
+ goto D191;
+
+N191_5:
+ if attribute(catid) in (0, 100200171, 100300058, 100200186, 100200052, 100300102, 100300027, 100200053, 100300073, 100200193, 100200192, 100300065, 100300122, 100300169, 100200087, 100200176, 100300076, 100200232) then goto N191_6;
+ else goto N191_7;
+
+N191_6:
+ if attribute(catid) in (0, 100300058, 100200052, 100300102, 100300027, 100200192, 100300065, 100300122, 100200232) then goto T191_5;
+ else goto T191_6;
+
+T191_5:
+ response = 0.0055156165;
+ goto D191;
+
+T191_6:
+ response = 0.0073414677;
+ goto D191;
+
+N191_7:
+ if attribute(catid) in (100300005, 100300008, 100300032, 100300121, 100200234, 100300126, 100400038, 100200170, 100300200, 100200185) then goto T191_7;
+ else goto T191_8;
+
+T191_7:
+ response = 0.0138952295;
+ goto D191;
+
+T191_8:
+ response = 0.0286522384;
+ goto D191;
+
+D191:
+
+tnscore = tnscore + response;
+
+ /* Tree 193 of 200 */
+N192_1:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300077, 100300166, 100300143, 100200186, 100300165, 100300093, 100300102, 100300005, 100200172, 100300008, 100300027, 100300116, 100300121, 100200234, 100200053, 100300004, 100200054, 100400038, 100200192, 100400079, 100300169, 100400080, 100200087, 100300007, 100200176, 100300200, 100300045, 100200055, 100300006, 100300214) then goto N192_2;
+ else goto N192_5;
+
+N192_2:
+ if attribute(catid) in (100300011, 100300014, 100300143, 100300093, 100300102, 100200172, 100200054, 100400079, 100200087, 100300007, 100300200, 100300045, 100200055, 100300006, 100300214) then goto N192_3;
+ else goto N192_4;
+
+N192_3:
+ if attribute(catid) in (100300014, 100300093, 100300102, 100200054, 100300007, 100300200, 100200055, 100300006, 100300214) then goto T192_1;
+ else goto T192_2;
+
+T192_1:
+ response = -0.0266674639;
+ goto D192;
+
+T192_2:
+ response = -0.0102733938;
+ goto D192;
+
+N192_4:
+ if attribute(catid) in (0, 100200171, 100300077, 100300027, 100200053, 100300004, 100200192, 100300169, 100400080, 100200176) then goto T192_3;
+ else goto T192_4;
+
+T192_3:
+ response = -0.0008805496;
+ goto D192;
+
+T192_4:
+ response = 0.0030248341;
+ goto D192;
+
+N192_5:
+ if attribute(catid) in (100300058, 100200034, 100400141, 100200052, 100300126, 100300073, 100300065, 100300212, 100300122, 100300127, 100200170, 100300076, 100200185, 100200232, 100300146) then goto N192_6;
+ else goto N192_7;
+
+N192_6:
+ if attribute(catid) in (100300058, 100400141, 100300126, 100300073, 100300065, 100300212, 100300122, 100200185) then goto T192_5;
+ else goto T192_6;
+
+T192_5:
+ response = 0.0088319892;
+ goto D192;
+
+T192_6:
+ response = 0.0145181522;
+ goto D192;
+
+N192_7:
+ if attribute(catid) in (100200068, 100400142, 100200193, 100300074, 100300066) then goto T192_7;
+ else goto T192_8;
+
+T192_7:
+ response = 0.0203803500;
+ goto D192;
+
+T192_8:
+ response = 0.0412794221;
+ goto D192;
+
+D192:
+
+tnscore = tnscore + response;
+
+ /* Tree 194 of 200 */
+N193_1:
+ if attribute(catid) in (100300011, 100300014, 100300013, 100300077, 100300143, 100200186, 100300093, 100300005, 100300008, 100300032, 100300121, 100200054, 100200192, 100300122, 100300007, 100300200, 100200067, 100200055, 100200185, 100200232) then goto N193_2;
+ else goto N193_3;
+
+N193_2:
+ if attribute(catid) in (100300143, 100300005, 100300008, 100300032, 100200054, 100300007, 100200067, 100200055, 100200232) then goto T193_1;
+ else goto T193_2;
+
+T193_1:
+ response = -0.0313843116;
+ goto D193;
+
+T193_2:
+ response = -0.0090786448;
+ goto D193;
+
+N193_3:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100400141, 100300165, 100200052, 100300102, 100200172, 100200068, 100300116, 100300004, 100400142, 100300073, 100200193, 100400038, 100300065, 100300127, 100400079, 100300169, 100300066, 100200176, 100300045, 100200028, 100300006) then goto N193_4;
+ else goto N193_5;
+
+N193_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100200052, 100300116, 100400038, 100400079, 100300169, 100300045, 100200028) then goto T193_3;
+ else goto T193_4;
+
+T193_3:
+ response = 0.0019399364;
+ goto D193;
+
+T193_4:
+ response = 0.0057587234;
+ goto D193;
+
+N193_5:
+ if attribute(catid) in (100300166, 100300027, 100200234, 100200053, 100200170, 100200087, 100300074, 100300076) then goto T193_5;
+ else goto T193_6;
+
+T193_5:
+ response = 0.0118065270;
+ goto D193;
+
+T193_6:
+ response = 0.0240753548;
+ goto D193;
+
+D193:
+
+tnscore = tnscore + response;
+
+ /* Tree 195 of 200 */
+N194_1:
+ if attribute(catid) in (100300011, 100300014, 100200034, 100300165, 100300093, 100300032, 100300121, 100300019, 100300004, 100400142, 100400038, 100300122, 100300127, 100200170, 100300074, 100200176, 100300200, 100200067, 100200055, 100300006, 100200232, 100300146) then goto N194_2;
+ else goto N194_4;
+
+N194_2:
+ if attribute(catid) in (100300011, 100200034, 100300093, 100300032, 100300019, 100400038, 100200176, 100200067, 100200055) then goto T194_1;
+ else goto N194_3;
+
+T194_1:
+ response = -0.0297490120;
+ goto D194;
+
+N194_3:
+ if attribute(catid) in (100300165, 100300004, 100300200, 100300006, 100200232, 100300146) then goto T194_2;
+ else goto T194_3;
+
+T194_2:
+ response = -0.0129734582;
+ goto D194;
+
+T194_3:
+ response = -0.0048159123;
+ goto D194;
+
+N194_4:
+ if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300077, 100300166, 100200186, 100400141, 100200052, 100300102, 100300005, 100200172, 100200068, 100300027, 100300116, 100200234, 100200053, 100200054, 100300073, 100400037, 100200193, 100200192, 100300065, 100300169, 100400080, 100300066, 100300045, 100200028, 100300076, 100200185) then goto N194_5;
+ else goto N194_8;
+
+N194_5:
+ if attribute(catid) in (0, 100200130, 100300058, 100300077, 100300166, 100200186, 100300102, 100300116, 100200053, 100200192, 100300065, 100300045, 100200028) then goto N194_6;
+ else goto N194_7;
+
+N194_6:
+ if attribute(catid) in (100300077, 100300166, 100200186, 100300116, 100200192) then goto T194_4;
+ else goto T194_5;
+
+T194_4:
+ response = -0.0004494225;
+ goto D194;
+
+T194_5:
+ response = 0.0030538822;
+ goto D194;
+
+N194_7:
+ if attribute(catid) in (100200171, 100400141, 100200234, 100300073, 100400037, 100300169, 100300066) then goto T194_6;
+ else goto T194_7;
+
+T194_6:
+ response = 0.0068726028;
+ goto D194;
+
+T194_7:
+ response = 0.0116359714;
+ goto D194;
+
+N194_8:
+ if attribute(catid) in (100300143, 100300126, 100400079, 100200087) then goto T194_8;
+ else goto T194_9;
+
+T194_8:
+ response = 0.0222596119;
+ goto D194;
+
+T194_9:
+ response = 0.0442934684;
+ goto D194;
+
+D194:
+
+tnscore = tnscore + response;
+
+ /* Tree 196 of 200 */
+N195_1:
+ if attribute(catid) in (100300011, 100300077, 100200186, 100400141, 100300165, 100200052, 100300005, 100200172, 100300008, 100200068, 100300032, 100300116, 100300019, 100400142, 100200054, 100200193, 100400038, 100300209, 100300127, 100200170, 100300169, 100300074, 100200176, 100300045, 100200067, 100200055, 100200232, 100300146) then goto N195_2;
+ else goto N195_5;
+
+N195_2:
+ if attribute(catid) in (100300011, 100300008, 100200068, 100300032, 100300019, 100200193, 100300074, 100200176, 100200067, 100200055, 100200232, 100300146) then goto N195_3;
+ else goto N195_4;
+
+N195_3:
+ if attribute(catid) in (100300032, 100300019, 100200193, 100300074, 100200176, 100200067, 100200055, 100200232, 100300146) then goto T195_1;
+ else goto T195_2;
+
+T195_1:
+ response = -0.0304543117;
+ goto D195;
+
+T195_2:
+ response = -0.0136959974;
+ goto D195;
+
+N195_4:
+ if attribute(catid) in (100400141, 100400142, 100300127, 100200170) then goto T195_3;
+ else goto T195_4;
+
+T195_3:
+ response = -0.0090390793;
+ goto D195;
+
+T195_4:
+ response = -0.0027839113;
+ goto D195;
+
+N195_5:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300166, 100200034, 100300027, 100300121, 100200234, 100200053, 100300004, 100300073, 100200192, 100300122, 100400079, 100400080, 100200087, 100300066, 100300200, 100200028, 100300076) then goto N195_6;
+ else goto N195_7;
+
+N195_6:
+ if attribute(catid) in (0, 100300058, 100300027, 100200234, 100200053, 100300004, 100300073, 100200192, 100400080, 100300066, 100300200) then goto T195_5;
+ else goto T195_6;
+
+T195_5:
+ response = 0.0035175195;
+ goto D195;
+
+T195_6:
+ response = 0.0082798864;
+ goto D195;
+
+N195_7:
+ if attribute(catid) in (100300093, 100300126, 100300065) then goto T195_7;
+ else goto T195_8;
+
+T195_7:
+ response = 0.0167800289;
+ goto D195;
+
+T195_8:
+ response = 0.0355577197;
+ goto D195;
+
+D195:
+
+tnscore = tnscore + response;
+
+ /* Tree 197 of 200 */
+N196_1:
+ if attribute(catid) in (0, 100200130, 100300013, 100300077, 100300166, 100300143, 100200186, 100200052, 100300005, 100200172, 100200234, 100200053, 100300019, 100300004, 100400142, 100200054, 100400037, 100200193, 100300212, 100300127, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100300200, 100300045, 100300076, 100200067, 100200185, 100300006, 100200232, 100300146) then goto N196_2;
+ else goto N196_6;
+
+N196_2:
+ if attribute(catid) in (100300013, 100200186, 100200234, 100300019, 100300004, 100200054, 100200193, 100300212, 100400080, 100200087, 100200176, 100300200, 100200067, 100200185, 100200232, 100300146) then goto N196_3;
+ else goto N196_4;
+
+N196_3:
+ if attribute(catid) in (100300013, 100300019, 100200193, 100300212, 100200176, 100200067, 100200185, 100200232) then goto T196_1;
+ else goto T196_2;
+
+T196_1:
+ response = -0.0351013956;
+ goto D196;
+
+T196_2:
+ response = -0.0146983415;
+ goto D196;
+
+N196_4:
+ if attribute(catid) in (0, 100300077, 100300143, 100300005, 100200053, 100300169, 100300074, 100300066, 100300007, 100300045, 100300076, 100300006) then goto N196_5;
+ else goto T196_5;
+
+N196_5:
+ if attribute(catid) in (100300077, 100300143, 100300169, 100300074, 100300007, 100300045) then goto T196_3;
+ else goto T196_4;
+
+T196_3:
+ response = -0.0036169246;
+ goto D196;
+
+T196_4:
+ response = 0.0006508121;
+ goto D196;
+
+T196_5:
+ response = 0.0039348871;
+ goto D196;
+
+N196_6:
+ if attribute(catid) in (100200171, 100300011, 100300058, 100400141, 100300165, 100300093, 100200068, 100300032, 100300027, 100300121, 100300126, 100300073, 100400038, 100200192, 100300065, 100200170) then goto N196_7;
+ else goto N196_8;
+
+N196_7:
+ if attribute(catid) in (100300058, 100400141, 100300165, 100300027, 100300121, 100300065, 100200170) then goto T196_6;
+ else goto T196_7;
+
+T196_6:
+ response = 0.0092359739;
+ goto D196;
+
+T196_7:
+ response = 0.0139160873;
+ goto D196;
+
+N196_8:
+ if attribute(catid) in (100300014, 100200034, 100300122, 100200028) then goto T196_8;
+ else goto T196_9;
+
+T196_8:
+ response = 0.0220286224;
+ goto D196;
+
+T196_9:
+ response = 0.0419934945;
+ goto D196;
+
+D196:
+
+tnscore = tnscore + response;
+
+ /* Tree 198 of 200 */
+N197_1:
+ if attribute(catid) in (100300013, 100300143, 100300005, 100300116, 100300019, 100300004, 100200054, 100300065, 100300212, 100300209, 100200087, 100200232, 100300214) then goto N197_2;
+ else goto N197_3;
+
+N197_2:
+ if attribute(catid) in (100300143, 100300019, 100300065, 100300209, 100200232, 100300214) then goto T197_1;
+ else goto T197_2;
+
+T197_1:
+ response = -0.0342436123;
+ goto D197;
+
+T197_2:
+ response = -0.0130043453;
+ goto D197;
+
+N197_3:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100200186, 100400141, 100200052, 100200172, 100200068, 100200053, 100400142, 100300073, 100400037, 100200193, 100400038, 100300122, 100400079, 100200170, 100300007, 100300045, 100200185, 100300006) then goto N197_4;
+ else goto N197_6;
+
+N197_4:
+ if attribute(catid) in (100200130, 100300014, 100400141, 100200172, 100200068, 100400142, 100400037, 100200193, 100300007, 100300006) then goto T197_3;
+ else goto N197_5;
+
+T197_3:
+ response = -0.0025589925;
+ goto D197;
+
+N197_5:
+ if attribute(catid) in (100200171, 100300073, 100400038) then goto T197_4;
+ else goto T197_5;
+
+T197_4:
+ response = 0.0013996109;
+ goto D197;
+
+T197_5:
+ response = 0.0034393713;
+ goto D197;
+
+N197_6:
+ if attribute(catid) in (100300011, 100300077, 100300166, 100200034, 100300165, 100300093, 100300027, 100200192, 100300169, 100300066, 100300200, 100200028, 100300076) then goto N197_7;
+ else goto T197_8;
+
+N197_7:
+ if attribute(catid) in (100300077, 100300166, 100200192, 100300169, 100300066, 100200028) then goto T197_6;
+ else goto T197_7;
+
+T197_6:
+ response = 0.0071225815;
+ goto D197;
+
+T197_7:
+ response = 0.0104333907;
+ goto D197;
+
+T197_8:
+ response = 0.0168963783;
+ goto D197;
+
+D197:
+
+tnscore = tnscore + response;
+
+ /* Tree 199 of 200 */
+N198_1:
+ if attribute(catid) in (100300011, 100300014, 100300058, 100300013, 100300143, 100300027, 100200053, 100300019, 100300126, 100400037, 100300169, 100400080, 100200176, 100300076, 100200067, 100200055, 100200185, 100300214, 100300146) then goto N198_2;
+ else goto N198_3;
+
+N198_2:
+ if attribute(catid) in (100300011, 100300058, 100300013, 100300143, 100300019, 100400037, 100200176, 100200067, 100200055, 100200185, 100300214) then goto T198_1;
+ else goto T198_2;
+
+T198_1:
+ response = -0.0290173523;
+ goto D198;
+
+T198_2:
+ response = -0.0086034947;
+ goto D198;
+
+N198_3:
+ if attribute(catid) in (0, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100200172, 100200068, 100300116, 100200234, 100300004, 100400142, 100300073, 100400038, 100200192, 100300065, 100300209, 100300127, 100400079, 100200170, 100200087, 100300074, 100300045, 100200232) then goto N198_4;
+ else goto N198_6;
+
+N198_4:
+ if attribute(catid) in (100300077, 100200186, 100400141, 100300093, 100200172, 100200068, 100300004, 100300065, 100300074) then goto T198_3;
+ else goto N198_5;
+
+T198_3:
+ response = -0.0007979247;
+ goto D198;
+
+N198_5:
+ if attribute(catid) in (0, 100300166, 100300165, 100200052, 100300045) then goto T198_4;
+ else goto T198_5;
+
+T198_4:
+ response = 0.0031637733;
+ goto D198;
+
+T198_5:
+ response = 0.0058373245;
+ goto D198;
+
+N198_6:
+ if attribute(catid) in (100200171, 100200130, 100300005, 100300008, 100300121, 100300066, 100300007, 100200028, 100300006) then goto T198_6;
+ else goto N198_7;
+
+T198_6:
+ response = 0.0142240118;
+ goto D198;
+
+N198_7:
+ if attribute(catid) in (100200054, 100200193, 100300122) then goto T198_7;
+ else goto T198_8;
+
+T198_7:
+ response = 0.0228053439;
+ goto D198;
+
+T198_8:
+ response = 0.0584841669;
+ goto D198;
+
+D198:
+
+tnscore = tnscore + response;
+
+ /* Tree 200 of 200 */
+N199_1:
+ if attribute(catid) in (100300143, 100300165, 100300093, 100300008, 100300116, 100200234, 100300004, 100300126, 100200054, 100400037, 100200193, 100400038, 100300212, 100300122, 100200170, 100300066, 100300007, 100200055, 100200232, 100300146) then goto N199_2;
+ else goto N199_4;
+
+N199_2:
+ if attribute(catid) in (100300008, 100300116, 100300126, 100300212, 100300007, 100200055, 100200232, 100300146) then goto T199_1;
+ else goto N199_3;
+
+T199_1:
+ response = -0.0246224156;
+ goto D199;
+
+N199_3:
+ if attribute(catid) in (100300143, 100300165, 100200234, 100300004, 100400037, 100300122) then goto T199_2;
+ else goto T199_3;
+
+T199_2:
+ response = -0.0104770173;
+ goto D199;
+
+T199_3:
+ response = -0.0059185929;
+ goto D199;
+
+N199_4:
+ if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300077, 100300166, 100200172, 100300121, 100200053, 100300073, 100300127, 100400079, 100200028) then goto N199_5;
+ else goto N199_7;
+
+N199_5:
+ if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300077, 100300121, 100200053, 100300073) then goto N199_6;
+ else goto T199_6;
+
+N199_6:
+ if attribute(catid) in (100200171, 100200130, 100300077, 100200053, 100300073) then goto T199_4;
+ else goto T199_5;
+
+T199_4:
+ response = 0.0010797418;
+ goto D199;
+
+T199_5:
+ response = 0.0024531718;
+ goto D199;
+
+T199_6:
+ response = 0.0064407369;
+ goto D199;
+
+N199_7:
+ if attribute(catid) in (100200186, 100400141, 100200052, 100300102, 100200068, 100300027, 100300019, 100400142, 100200192, 100300065, 100300169, 100400080, 100300200, 100300076, 100300006) then goto T199_7;
+ else goto N199_8;
+
+T199_7:
+ response = 0.0120768393;
+ goto D199;
+
+N199_8:
+ if attribute(catid) in (100300058, 100200034, 100300209, 100200087, 100300074) then goto T199_8;
+ else goto T199_9;
+
+T199_8:
+ response = 0.0185835130;
+ goto D199;
+
+T199_9:
+ response = 0.0301892716;
+ goto D199;
+
+D199:
+
+tnscore = tnscore + response;
+
+return;
diff --git a/searchlib/src/test/files/treenet08.model b/searchlib/src/test/files/treenet08.model
new file mode 100644
index 00000000000..3edd278d432
--- /dev/null
+++ b/searchlib/src/test/files/treenet08.model
@@ -0,0 +1,227 @@
+/**********************************************************
+ * The following C source code was automatically generated
+ * by the new DTREE version: 1.x
+ **********************************************************/
+
+#include <string.h> /* for strcmp() */
+
+/*******************************************
+ * APPLICATION DEPENDENT MISSING VALUE CODES
+ *******************************************/
+
+const double DBL_MISSING_VALUE = 0.0;
+const int INT_MISSING_VALUE = 0;
+
+/************
+ * PREDICTORS
+ ************/
+double CT$, QPSCOREFOR_KG PEOPLE, SDSF_LOCAL, SDSF_WEB;
+
+/***************************************************************
+ * Here come the trees in the treenet. A shell for calling them
+ * appears at the end of this source file.
+ ***************************************************************/
+/* Data Dictionary, Number Of Variables = 4*/
+/* Name = CT$, Type = categorical. */
+/* Name = QPSCOREFOR_KG PEOPLE, Type = continuous. */
+/* Name = SDSF_LOCAL, Type = continuous. */
+/* Name = SDSF_WEB, Type = continuous. */
+
+MODELBEGIN:
+
+ /* N trees: 5 */
+
+link TN0;
+pred = tnscore; /* predicted value for GRADE */
+
+/*********************/
+/* Model is complete */
+/*********************/
+
+return;
+
+
+tnscore = 0.0;
+
+TN0:
+
+ /* Tree 1 of 5 */
+N0_1:
+ if CT$ in (Wiki, Web, Image, Video, Finance) then goto N0_2;
+ else goto T0_4;
+
+N0_2:
+ if SDSF_LOCAL < 0.6359952986 then goto N0_3;
+ else goto T0_3;
+
+N0_3:
+ if CT$ in (Image, Video) then goto T0_1;
+ else goto T0_2;
+
+T0_1:
+ response = -0.1846455351;
+ goto D0;
+
+T0_2:
+ response = -0.0057844764;
+ goto D0;
+
+T0_3:
+ response = -0.4039473684;
+ goto D0;
+
+T0_4:
+ response = 0.2900655347;
+ goto D0;
+
+D0:
+
+tnscore = tnscore + response;
+
+ /* Tree 2 of 5 */
+N1_1:
+ if CT$ in (Wiki, Web, Image, Video, KG Movie, Finance, Timezone) then goto T1_1;
+ else goto N1_2;
+
+T1_1:
+ response = -0.0790797330;
+ goto D1;
+
+N1_2:
+ if CT$ in (Local, Q2A) then goto N1_3;
+ else goto T1_4;
+
+N1_3:
+ if SDSF_LOCAL < 0.5348491371 then goto T1_2;
+ else goto T1_3;
+
+T1_2:
+ response = -0.0304336373;
+ goto D1;
+
+T1_3:
+ response = 0.2401947405;
+ goto D1;
+
+T1_4:
+ response = 0.3739991530;
+ goto D1;
+
+D1:
+
+tnscore = tnscore + response;
+
+ /* Tree 3 of 5 */
+N2_1:
+ if CT$ in (Web, Image, Video, Timezone) then goto T2_1;
+ else goto N2_2;
+
+T2_1:
+ response = -0.0572267897;
+ goto D2;
+
+N2_2:
+ if CT$ in (Wiki, Local, KG Movie) then goto N2_3;
+ else goto T2_4;
+
+N2_3:
+ if SDSF_LOCAL < 0.4078139514 then goto T2_2;
+ else goto T2_3;
+
+T2_2:
+ response = -0.0295648159;
+ goto D2;
+
+T2_3:
+ response = 0.1601345785;
+ goto D2;
+
+T2_4:
+ response = 0.2612064355;
+ goto D2;
+
+D2:
+
+tnscore = tnscore + response;
+
+ /* Tree 4 of 5 */
+N3_1:
+ if CT$ in (Image, Video, Timezone) then goto T3_1;
+ else goto N3_2;
+
+T3_1:
+ response = -0.1103244788;
+ goto D3;
+
+N3_2:
+ if CT$ in (Wiki, Web, Local, KG Movie) then goto N3_3;
+ else goto T3_4;
+
+N3_3:
+ if QPSCOREFOR_KG_PEOPLE < 0.9930000007 then goto T3_2;
+ else goto T3_3;
+
+T3_2:
+ response = 0.0194079789;
+ goto D3;
+
+T3_3:
+ response = -0.2056829336;
+ goto D3;
+
+T3_4:
+ response = 0.1987635246;
+ goto D3;
+
+D3:
+
+tnscore = tnscore + response;
+
+ /* Tree 5 of 5 */
+N4_1:
+ if CT$ in (Image, Video, Event, Timezone) then goto N4_2;
+ else goto N4_3;
+
+N4_2:
+ if SDSF_WEB < 0.3725785315 then goto T4_1;
+ else goto T4_2;
+
+T4_1:
+ response = -0.0680975953;
+ goto D4;
+
+T4_2:
+ response = -0.2264832978;
+ goto D4;
+
+N4_3:
+ if CT$ in (Wiki, Web, Local, Q2A) then goto T4_3;
+ else goto T4_4;
+
+T4_3:
+ response = 0.0105928220;
+ goto D4;
+
+T4_4:
+ response = 0.1366891795;
+ goto D4;
+
+D4:
+
+tnscore = tnscore + response;
+
+return;
+/******************************
+ * CALL EACH TREENET EXPLICITLY
+ ******************************/
+
+void grove(void)
+{
+ int terminal_node_number;
+
+
+ return;
+}
+
+ ___FINISHED___
+
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/AggregationTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/AggregationTestCase.java
new file mode 100755
index 00000000000..2f271ec84db
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/AggregationTestCase.java
@@ -0,0 +1,346 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.document.DocumentId;
+import com.yahoo.document.GlobalId;
+import com.yahoo.io.GrowableByteBuffer;
+import com.yahoo.searchlib.expression.*;
+import com.yahoo.vespa.objects.BufferSerializer;
+import com.yahoo.vespa.objects.Identifiable;
+import com.yahoo.vespa.objects.ObjectOperation;
+import com.yahoo.vespa.objects.ObjectPredicate;
+import junit.framework.TestCase;
+
+/**
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ */
+public class AggregationTestCase extends TestCase {
+
+ public void testSumAggregationResult() {
+ SumAggregationResult a = new SumAggregationResult();
+ a.setExpression(new AttributeNode("attributeA"));
+ a.setSum(new IntegerResultNode(7));
+ assertEquals(a.getSum().getInteger(), 7);
+ SumAggregationResult b = (SumAggregationResult)serializeDeserialize(a);
+ assertEquals(b.getSum().getInteger(), 7);
+ b.merge(a);
+ assertEquals(b.getSum().getInteger(), 14);
+ }
+
+ public void testXorAggregationResult() {
+ XorAggregationResult a = new XorAggregationResult(6);
+ a.setExpression(new AttributeNode("attributeA"));
+ assertEquals(a.getXor(), 6);
+ a.setXor(7);
+ assertEquals(a.getXor(), 7);
+ XorAggregationResult b = (XorAggregationResult)serializeDeserialize(a);
+ assertEquals(b.getXor(), 7);
+ b.merge(a);
+ assertEquals(b.getXor(), 0);
+ }
+
+ public void testCountAggregationResult() {
+ CountAggregationResult a = new CountAggregationResult(6);
+ a.setExpression(new AttributeNode("attributeA"));
+ assertEquals(a.getCount(), 6);
+ a.setCount(7);
+ assertEquals(a.getCount(), 7);
+ CountAggregationResult b = (CountAggregationResult)serializeDeserialize(a);
+ assertEquals(b.getCount(), 7);
+ b.merge(a);
+ assertEquals(b.getCount(), 14);
+ }
+
+ public void testMinAggregationResult() {
+ MinAggregationResult a = new MinAggregationResult(new IntegerResultNode(6));
+ a.setExpression(new AttributeNode("attributeA"));
+ assertEquals(a.getMin().getInteger(), 6);
+ a.setMin(new IntegerResultNode(7));
+ assertEquals(a.getMin().getInteger(), 7);
+ MinAggregationResult b = (MinAggregationResult)serializeDeserialize(a);
+ a.setMin(new IntegerResultNode(6));
+ assertEquals(b.getMin().getInteger(), 7);
+ b.merge(a);
+ assertEquals(b.getMin().getInteger(), 6);
+ }
+
+ public void testMaxAggregationResult() {
+ MaxAggregationResult a = new MaxAggregationResult(new IntegerResultNode(6));
+ a.setExpression(new AttributeNode("attributeA"));
+ assertEquals(a.getMax().getInteger(), 6);
+ a.setMax(new IntegerResultNode(7));
+ assertEquals(a.getMax().getInteger(), 7);
+ MaxAggregationResult b = (MaxAggregationResult)serializeDeserialize(a);
+ a.setMax(new IntegerResultNode(6));
+ assertEquals(b.getMax().getInteger(), 7);
+ b.merge(a);
+ assertEquals(b.getMax().getInteger(), 7);
+ }
+
+ public void testAverageAggregationResult() {
+ AverageAggregationResult a = new AverageAggregationResult(new FloatResultNode(72), 6);
+ a.setExpression(new AttributeNode("attributeA"));
+ assertEquals(a.getCount(), 6);
+ a.setCount(8);
+ assertEquals(a.getCount(), 8);
+ AverageAggregationResult b = (AverageAggregationResult)serializeDeserialize(a);
+ assertEquals(b.getCount(), 8);
+ a.setCount(6);
+ b.merge(a);
+ assertEquals(b.getCount(), 14);
+ assertEquals(b.getSum().getInteger(), 144);
+ }
+
+ private static boolean equals(Object a, Object b) {
+ return a.equals(b);
+ }
+
+ private GlobalId createGlobalId(int docId) {
+ return new GlobalId((new DocumentId("doc:test:" + docId)).getGlobalId());
+ }
+
+ public void testFs4HitsAggregationResult() {
+ double rank1 = 1;
+ double rank2 = 2;
+ assertEquals(new FS4Hit(1, createGlobalId(1), rank1), new FS4Hit(1, createGlobalId(1), rank1));
+ assertFalse(equals(new FS4Hit(1, createGlobalId(1), rank1), new FS4Hit(2, createGlobalId(1), rank1)));
+ assertFalse(equals(new FS4Hit(1, createGlobalId(1), rank1), new FS4Hit(1, createGlobalId(2), rank1)));
+ assertFalse(equals(new FS4Hit(1, createGlobalId(1), rank1), new FS4Hit(1, createGlobalId(1), rank2)));
+
+ HitsAggregationResult a = new HitsAggregationResult(5);
+ assertEquals(5, a.getMaxHits());
+ assertEquals(0, a.getHits().size());
+ a.setExpression(new AttributeNode("attributeA"));
+ a.addHit(new FS4Hit(1, createGlobalId(2), rank1));
+ a.addHit(new FS4Hit(5, createGlobalId(7), rank2));
+ assertEquals(2, a.getHits().size());
+ HitsAggregationResult b = (HitsAggregationResult)serializeDeserialize(a);
+ assertEquals(a, b);
+ a.postMerge();
+ assertEquals(2, a.getHits().size());
+ assertEquals(2.0, a.getHits().get(0).getRank());
+ a.setMaxHits(1).postMerge();
+ assertEquals(1, a.getHits().size());
+ assertEquals(2.0, a.getHits().get(0).getRank());
+
+ HitsAggregationResult hits = new HitsAggregationResult(3)
+ .addHit(new FS4Hit(1, createGlobalId(3), 1))
+ .addHit(new FS4Hit(2, createGlobalId(2), 2))
+ .addHit(new FS4Hit(3, createGlobalId(1), 3));
+ Grouping request = new Grouping()
+ .setRoot(new Group()
+ .addAggregationResult(hits.clone())
+ .addChild(new Group()
+ .addAggregationResult(hits.clone())
+ .addChild(new Group()
+ .addAggregationResult(hits.clone())))
+ .addChild(new Group()
+ .addAggregationResult(hits.clone())
+ .addChild(new Group()
+ .addAggregationResult(hits.clone())
+ .addChild(new Group()
+ .addAggregationResult(hits.clone())))));
+ assertFS4Hits(request, 0, 0, 3);
+ assertFS4Hits(request, 1, 1, 6);
+ assertFS4Hits(request, 2, 2, 6);
+ assertFS4Hits(request, 3, 3, 3);
+ assertFS4Hits(request, 4, 4, 0);
+
+ assertFS4Hits(request, 0, 1, 9);
+ assertFS4Hits(request, 0, 2, 15);
+ assertFS4Hits(request, 0, 3, 18);
+ assertFS4Hits(request, 0, 4, 18);
+ assertFS4Hits(request, 1, 4, 15);
+ assertFS4Hits(request, 2, 4, 9);
+ assertFS4Hits(request, 3, 4, 3);
+
+ assertFS4Hits(request, 1, 2, 12);
+ assertFS4Hits(request, 2, 3, 9);
+ assertFS4Hits(request, 3, 4, 3);
+ assertFS4Hits(request, 4, 5, 0);
+ }
+
+ public void testVdsHitsAggregationResult() {
+ double rank1 = 1;
+ double rank2 = 2;
+ byte [] s1 = {'a','b','c'};
+ byte [] s2 = {'n','o','e'};
+ byte [] s3 = {'n','o','3'};
+ assertEquals(new VdsHit("1", s1, rank1), new VdsHit("1", s1, rank1));
+ assertFalse(equals(new VdsHit("1", s1, rank1), new VdsHit("2", s1, rank1)));
+ assertFalse(equals(new VdsHit("1", s1, rank1), new VdsHit("1", s2, rank1)));
+ assertFalse(equals(new VdsHit("1", s1, rank1), new VdsHit("1", s1, rank2)));
+
+ HitsAggregationResult a = new HitsAggregationResult(5);
+ assertEquals(5, a.getMaxHits());
+ assertEquals(0, a.getHits().size());
+ a.setExpression(new AttributeNode("attributeA"));
+ a.addHit(new VdsHit("1", s2, rank1));
+// a.addHit(new VdsHit("5", s7, rank2));
+// assertEquals(2, a.getHits().size());
+ HitsAggregationResult b = (HitsAggregationResult)serializeDeserialize(a);
+ assertEquals(a, b);
+
+ HitsAggregationResult hits = new HitsAggregationResult(3)
+ .addHit(new VdsHit("1", s3, 1))
+ .addHit(new VdsHit("2", s2, 2))
+ .addHit(new VdsHit("3", s1, 3));
+ Grouping request = new Grouping()
+ .setRoot(new Group()
+ .addAggregationResult(hits.clone())
+ .addChild(new Group()
+ .addAggregationResult(hits.clone())
+ .addChild(new Group()
+ .addAggregationResult(hits.clone())))
+ .addChild(new Group()
+ .addAggregationResult(hits.clone())
+ .addChild(new Group()
+ .addAggregationResult(hits.clone())
+ .addChild(new Group()
+ .addAggregationResult(hits.clone())))));
+ assertVdsHits(request, 0, 0, 3);
+ assertVdsHits(request, 1, 1, 6);
+ assertVdsHits(request, 2, 2, 6);
+ assertVdsHits(request, 3, 3, 3);
+ assertVdsHits(request, 4, 4, 0);
+
+ assertVdsHits(request, 0, 1, 9);
+ assertVdsHits(request, 0, 2, 15);
+ assertVdsHits(request, 0, 3, 18);
+ assertVdsHits(request, 0, 4, 18);
+ assertVdsHits(request, 1, 4, 15);
+ assertVdsHits(request, 2, 4, 9);
+ assertVdsHits(request, 3, 4, 3);
+
+ assertVdsHits(request, 1, 2, 12);
+ assertVdsHits(request, 2, 3, 9);
+ assertVdsHits(request, 3, 4, 3);
+ assertVdsHits(request, 4, 5, 0);
+ }
+
+
+ private void assertFS4Hits(Grouping request, int firstLevel, int lastLevel, int expected) {
+ CountFS4Hits obj = new CountFS4Hits();
+ request.setFirstLevel(firstLevel);
+ request.setLastLevel(lastLevel);
+ request.select(obj, obj);
+ assertEquals(expected, obj.count);
+ }
+
+ private void assertVdsHits(Grouping request, int firstLevel, int lastLevel, int expected) {
+ CountVdsHits obj = new CountVdsHits();
+ request.setFirstLevel(firstLevel);
+ request.setLastLevel(lastLevel);
+ request.select(obj, obj);
+ assertEquals(expected, obj.count);
+ }
+
+ private class CountFS4Hits implements ObjectPredicate, ObjectOperation {
+ int count;
+ public boolean check(Object obj) {
+ return obj instanceof FS4Hit;
+ }
+ public void execute(Object obj) {
+ ++count;
+ }
+ }
+
+ private class CountVdsHits implements ObjectPredicate, ObjectOperation {
+ int count;
+ public boolean check(Object obj) {
+ return obj instanceof VdsHit;
+ }
+ public void execute(Object obj) {
+ ++count;
+ }
+ }
+
+ public void testGroup() {
+ Group a = new Group();
+ a.setId(new IntegerResultNode(17));
+ a.addAggregationResult(new XorAggregationResult());
+ serializeDeserialize1(a);
+ }
+
+ public void testGrouping() {
+ Grouping a = new Grouping();
+ GroupingLevel level = new GroupingLevel();
+ level.setExpression(new AttributeNode("folder"));
+
+ XorAggregationResult xor = new XorAggregationResult();
+ xor.setExpression(new MD5BitFunctionNode(new AttributeNode("docid"), 64));
+ level.getGroupPrototype().addAggregationResult(xor);
+
+ SumAggregationResult sum = new SumAggregationResult();
+ MinFunctionNode min = new MinFunctionNode();
+ min.addArg(new AttributeNode("attribute1"));
+ min.addArg(new AttributeNode("attribute2"));
+ sum.setExpression(min);
+ level.getGroupPrototype().addAggregationResult(sum);
+
+ CatFunctionNode cat = new CatFunctionNode();
+ cat.addArg(new GetDocIdNamespaceSpecificFunctionNode());
+ cat.addArg(new DocumentFieldNode("folder"));
+ cat.addArg(new DocumentFieldNode("flags"));
+ XorAggregationResult xor2 = new XorAggregationResult();
+ xor2.setExpression(new XorBitFunctionNode(cat, 64));
+ level.getGroupPrototype().addAggregationResult(xor2);
+ a.addLevel(level);
+
+ Group g = new Group();
+ g.setId(new IntegerResultNode(17));
+ g.addAggregationResult(xor); // XXX: this is BAD
+ a.getRoot().addChild(g);
+ serializeDeserialize1(a);
+
+
+ Grouping foo = new Grouping();
+ foo.addLevel(level);
+ int hashBefore = foo.hashCode();
+ foo.setFirstLevel(66);
+ assertEquals(hashBefore, foo.hashCode());
+ foo.setFirstLevel(99);
+ assertEquals(hashBefore, foo.hashCode());
+ foo.setLastLevel(66);
+ assertEquals(hashBefore, foo.hashCode());
+ foo.setLastLevel(99);
+ assertEquals(hashBefore, foo.hashCode());
+ foo.getRoot().addChild(g);
+ assertEquals(hashBefore, foo.hashCode());
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // Everything below this point is helper functions.
+ //
+ // --------------------------------------------------------------------------------
+ private static Identifiable serializeDeserialize1(Identifiable a) {
+ BufferSerializer buf = new BufferSerializer(new GrowableByteBuffer());
+ a.serializeWithId(buf);
+ buf.flip();
+ Identifiable b = Identifiable.create(buf);
+ assertEquals(a.getClass(), b.getClass());
+ assertEquals(buf.getBuf().hasRemaining(), false);
+ Identifiable c = b.clone();
+ assertEquals(b.getClass(), c.getClass());
+ BufferSerializer bb = new BufferSerializer(new GrowableByteBuffer());
+ BufferSerializer cb = new BufferSerializer(new GrowableByteBuffer());
+ b.serializeWithId(bb);
+ c.serializeWithId(cb);
+ assertEquals(bb.getBuf().limit(), cb.getBuf().limit());
+ assertEquals(bb.position(), cb.position());
+ bb.getBuf().flip();
+ cb.getBuf().flip();
+ for (int i = 0; i < bb.getBuf().limit(); i++) {
+ assertEquals(bb.getBuf().get(), cb.getBuf().get());
+ }
+
+ return b;
+ }
+
+ private static AggregationResult serializeDeserialize(AggregationResult a) {
+ AggregationResult b = (AggregationResult)serializeDeserialize1(a);
+ assertEquals(a.getExpression().getClass(), b.getExpression().getClass());
+ return b;
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResultTest.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResultTest.java
new file mode 100644
index 00000000000..0d7c4c8bca1
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResultTest.java
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.searchlib.aggregation.hll.*;
+import com.yahoo.vespa.objects.BufferSerializer;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author bjorncs
+ */
+public class ExpressionCountAggregationResultTest {
+
+ @Test
+ public void requireThatSketchesAreMerged() {
+ ExpressionCountAggregationResult aggr1 = createAggregationWithSparseSketch(42);
+ ExpressionCountAggregationResult aggr2 = createAggregationWithSparseSketch(1337);
+
+ // Merge performs union of the underlying data of the sparse sketch.
+ aggr1.onMerge(aggr2);
+
+ SparseSketch sketch = (SparseSketch) aggr1.getSketch();
+ SketchUtils.assertSparseSketchContains(sketch, 42, 1337);
+ }
+
+ @Test
+ public void requireThatEstimateIsCorrect() {
+ ExpressionCountAggregationResult aggr = createAggregationWithSparseSketch(42);
+ assertTrue(aggr.getEstimatedUniqueCount() == 1);
+ }
+
+ @Test
+ public void requireThatPostMergeUpdatesEstimate() {
+ ExpressionCountAggregationResult aggr = createAggregationWithSparseSketch(1337);
+ assertEquals(1, aggr.getEstimatedUniqueCount());
+ // Merge performs union of the underlying data of the sparse sketch.
+ aggr.onMerge(createAggregationWithSparseSketch(9001));
+ assertEquals(2, aggr.getEstimatedUniqueCount());
+ }
+
+ @Test
+ public void requireThatSerializationDeserializationMatchSparseSketch() {
+ ExpressionCountAggregationResult from = createAggregationWithSparseSketch(42);
+ ExpressionCountAggregationResult to = createAggregationWithSparseSketch(1337);
+ testSerialization(from, to);
+ }
+
+ @Test
+ public void requireThatSerializationDeserializationMatchNormalSketch() {
+ ExpressionCountAggregationResult from = createAggregationWithNormalSketch(42);
+ ExpressionCountAggregationResult to = createAggregationWithNormalSketch(1337);
+ testSerialization(from, to);
+ }
+
+ private void testSerialization(ExpressionCountAggregationResult from, ExpressionCountAggregationResult to) {
+ BufferSerializer buffer = new BufferSerializer();
+ from.serialize(buffer);
+ buffer.flip();
+ to.deserialize(buffer);
+
+ assertEquals(from.getSketch(), to.getSketch());
+ }
+
+ private static ExpressionCountAggregationResult createAggregationWithSparseSketch(int sketchValue) {
+ SparseSketch initialSketch = SketchUtils.createSparseSketch(sketchValue);
+ return new ExpressionCountAggregationResult(
+ initialSketch,
+ sketch -> ((SparseSketch) sketch).size()
+ );
+ }
+
+ private static ExpressionCountAggregationResult createAggregationWithNormalSketch(int sketchValue) {
+ NormalSketch initialSketch = SketchUtils.createNormalSketch(sketchValue);
+ return new ExpressionCountAggregationResult(
+ initialSketch,
+ sketch -> 42
+ );
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/ForceLoadTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/ForceLoadTestCase.java
new file mode 100755
index 00000000000..ee7d50f33cb
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/ForceLoadTestCase.java
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+public class ForceLoadTestCase extends junit.framework.TestCase {
+
+ public ForceLoadTestCase(String name) {
+ super(name);
+ }
+
+ public void testLoadClasses() {
+ try {
+ new com.yahoo.searchlib.aggregation.ForceLoad();
+ assertTrue(com.yahoo.searchlib.aggregation.ForceLoad.forceLoad());
+ } catch (com.yahoo.system.ForceLoadError e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupTestCase.java
new file mode 100644
index 00000000000..1852f292a48
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupTestCase.java
@@ -0,0 +1,229 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.searchlib.expression.*;
+import com.yahoo.vespa.objects.BufferSerializer;
+import com.yahoo.vespa.objects.Identifiable;
+import org.junit.Test;
+
+import java.util.Arrays;
+
+import static org.junit.Assert.*;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class GroupTestCase {
+
+ @Test
+ public void requireThatAggregationResultsCanBeAdded() {
+ Group group = new Group();
+ AggregationResult res = new AverageAggregationResult();
+ group.addAggregationResult(res);
+ assertEquals(1, group.getAggregationResults().size());
+ assertSame(res, group.getAggregationResults().get(0));
+ }
+
+ @Test
+ public void requireThatAggregationResultListIsNotImmutable() {
+ Group group = new Group();
+ group.getAggregationResults().add(new AverageAggregationResult());
+ }
+
+ @Test
+ public void requireThatOrderByExpressionsCanBeAdded() {
+ Group group = new Group();
+ ExpressionNode foo = new ConstantNode(new IntegerResultNode(6));
+ group.addOrderBy(foo, true);
+ assertEquals(1, group.getOrderByExpressions().size());
+ assertSame(foo, group.getOrderByExpressions().get(0));
+ assertEquals(Arrays.asList(1), group.getOrderByIndexes());
+
+ ExpressionNode bar = new ConstantNode(new IntegerResultNode(9));
+ group.addOrderBy(bar, false);
+ assertEquals(2, group.getOrderByExpressions().size());
+ assertSame(bar, group.getOrderByExpressions().get(1));
+ assertEquals(Arrays.asList(1, -2), group.getOrderByIndexes());
+ }
+
+ @Test
+ public void requireThatOrderByListsAreImmutable() {
+ Group group = new Group();
+ try {
+ group.getOrderByExpressions().add(new ConstantNode(new IntegerResultNode(69)));
+ fail();
+ } catch (UnsupportedOperationException e) {
+
+ }
+ try {
+ group.getOrderByIndexes().add(69);
+ fail();
+ } catch (UnsupportedOperationException e) {
+
+ }
+ }
+
+ @Test
+ public void requireThatAddOrderByAddsAggregationResult() {
+ Group group = new Group();
+ AggregationResult res = new MinAggregationResult();
+ group.addOrderBy(res, true);
+ assertEquals(1, group.getAggregationResults().size());
+ assertSame(res, group.getAggregationResults().get(0));
+ }
+
+ @Test
+ public void requireThatAddOrderByDoesNotAddDuplicateAggregationResult() {
+ Group group = new Group();
+ AggregationResult res = new MinAggregationResult();
+ group.addAggregationResult(res);
+ group.addOrderBy(res, true);
+ assertEquals(1, group.getAggregationResults().size());
+ assertSame(res, group.getAggregationResults().get(0));
+ }
+
+ @Test
+ public void requireThatAddOrderByIgnoresAggregationResultTagWhenMatching() {
+ Group group = new Group();
+ AggregationResult foo = new MinAggregationResult();
+ foo.setTag(6);
+ group.addAggregationResult(foo);
+ AggregationResult bar = new MinAggregationResult();
+ bar.setTag(9);
+ group.addOrderBy(bar, true);
+ assertEquals(1, group.getAggregationResults().size());
+ assertSame(foo, group.getAggregationResults().get(0));
+ assertEquals(6, foo.getTag());
+ }
+
+ @Test
+ public void requireThatAddOrderByDoesNotModifyTagOfNewAggregationResult() {
+ Group group = new Group();
+ AggregationResult foo = new MinAggregationResult();
+ foo.setTag(6);
+ group.addAggregationResult(foo);
+ AggregationResult bar = new MaxAggregationResult();
+ bar.setTag(9);
+ group.addOrderBy(bar, true);
+ assertEquals(2, group.getAggregationResults().size());
+ assertSame(foo, group.getAggregationResults().get(0));
+ assertEquals(6, foo.getTag());
+ assertSame(bar, group.getAggregationResults().get(1));
+ assertEquals(9, bar.getTag());
+ }
+
+ @Test
+ public void requireThatAddOrderByAddsReferencedAggregationResult() {
+ Group group = new Group();
+ AggregationResult res = new MinAggregationResult();
+ group.addOrderBy(new AggregationRefNode(res), true);
+ assertEquals(1, group.getAggregationResults().size());
+ assertSame(res, group.getAggregationResults().get(0));
+ }
+
+ @Test
+ public void requireThatAddOrderByDoesNotAddDuplicateReferencedAggregationResult() {
+ Group group = new Group();
+ AggregationResult res = new MinAggregationResult();
+ group.addAggregationResult(res);
+ group.addOrderBy(new AggregationRefNode(res), true);
+ assertEquals(1, group.getAggregationResults().size());
+ assertSame(res, group.getAggregationResults().get(0));
+ }
+
+ @Test
+ public void requireThatAddOrderByAddsDeepReferencedAggregationResult() {
+ Group group = new Group();
+ AggregationResult res = new MinAggregationResult();
+ group.addOrderBy(new NegateFunctionNode(new AggregationRefNode(res)), true);
+ assertEquals(1, group.getAggregationResults().size());
+ assertSame(res, group.getAggregationResults().get(0));
+ }
+
+ @Test
+ public void requireThatAddOrderByDoesNotAddDuplicateDeepReferencedAggregationResult() {
+ Group group = new Group();
+ AggregationResult res = new MinAggregationResult();
+ group.addAggregationResult(res);
+ group.addOrderBy(new NegateFunctionNode(new AggregationRefNode(res)), true);
+ assertEquals(1, group.getAggregationResults().size());
+ assertSame(res, group.getAggregationResults().get(0));
+ }
+
+ @Test
+ public void requireThatAddOrderByResolvesReferenceIndex() {
+ Group group = new Group();
+ AggregationResult res = new MinAggregationResult();
+ group.addAggregationResult(res);
+ group.addOrderBy(new AggregationRefNode(res), true);
+ assertEquals(1, group.getOrderByExpressions().size());
+ AggregationRefNode ref = (AggregationRefNode)group.getOrderByExpressions().get(0);
+ assertEquals(0, ref.getIndex());
+ assertSame(res, ref.getExpression());
+ }
+
+ @Test
+ public void requireThatAddOrderByResolvesDeepReferenceIndex() {
+ Group group = new Group();
+ AggregationResult res = new MinAggregationResult();
+ group.addAggregationResult(res);
+ group.addOrderBy(new NegateFunctionNode(new AggregationRefNode(res)), true);
+ assertEquals(1, group.getOrderByExpressions().size());
+ AggregationRefNode ref = (AggregationRefNode)((NegateFunctionNode)group.getOrderByExpressions().get(0)).getArg();
+ assertEquals(0, ref.getIndex());
+ assertSame(res, ref.getExpression());
+ }
+
+ @Test
+ public void requireThatAddOrderByResolvesReferenceResult() {
+ Group group = new Group();
+ AggregationResult res = new MinAggregationResult();
+ group.addOrderBy(new AggregationRefNode(res), true);
+ assertEquals(1, group.getOrderByExpressions().size());
+ AggregationRefNode ref = (AggregationRefNode)group.getOrderByExpressions().get(0);
+ assertEquals(0, ref.getIndex());
+ assertSame(res, ref.getExpression());
+ }
+
+ @Test
+ public void requireThatAddOrderByResolvesDeepReferenceResult() {
+ Group group = new Group();
+ AggregationResult res = new MinAggregationResult();
+ group.addOrderBy(new NegateFunctionNode(new AggregationRefNode(res)), true);
+ assertEquals(1, group.getOrderByExpressions().size());
+ AggregationRefNode ref = (AggregationRefNode)((NegateFunctionNode)group.getOrderByExpressions().get(0)).getArg();
+ assertEquals(0, ref.getIndex());
+ assertSame(res, ref.getExpression());
+ }
+
+ @Test
+ public void requireThatCloneResolvesAggregationRef() {
+ Group group = new Group();
+ AggregationResult res = new MinAggregationResult();
+ group.addOrderBy(new AggregationRefNode(res), true);
+ group = group.clone();
+
+ assertEquals(1, group.getOrderByExpressions().size());
+ AggregationRefNode ref = (AggregationRefNode)group.getOrderByExpressions().get(0);
+ assertEquals(0, ref.getIndex());
+ assertEquals(res, ref.getExpression());
+ assertNotSame(res, ref.getExpression());
+ }
+
+ @Test
+ public void requireThatDeserializeResolvesAggregationRef() {
+ Group group = new Group();
+ AggregationResult res = new MinAggregationResult();
+ group.addOrderBy(new AggregationRefNode(res), true);
+ BufferSerializer buf = new BufferSerializer();
+ group.serializeWithId(buf);
+ buf.flip();
+ group = (Group)Identifiable.create(buf);
+
+ assertEquals(1, group.getOrderByExpressions().size());
+ AggregationRefNode ref = (AggregationRefNode)group.getOrderByExpressions().get(0);
+ assertEquals(0, ref.getIndex());
+ assertEquals(res, ref.getExpression());
+ assertNotSame(res, ref.getExpression());
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingSerializationTest.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingSerializationTest.java
new file mode 100644
index 00000000000..a9926f7c0e2
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingSerializationTest.java
@@ -0,0 +1,387 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.document.DocumentId;
+import com.yahoo.document.GlobalId;
+import com.yahoo.io.GrowableByteBuffer;
+import com.yahoo.searchlib.aggregation.hll.SparseSketch;
+import com.yahoo.searchlib.expression.*;
+import com.yahoo.vespa.objects.BufferSerializer;
+import com.yahoo.vespa.objects.Identifiable;
+import com.yahoo.vespa.objects.ObjectDumper;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.*;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+
+import static org.junit.Assert.fail;
+
+/**
+ * Tests serialization compatibility across Java and C++. The comparison is performed by comparing serialized Java
+ * object graphs with the content of specific binary files. C++ unit tests serializes
+ * identical data structures into these files.
+ * Note: This test relies heavily on proper implementation of {@link Object#equals(Object)}!
+ */
+public class GroupingSerializationTest {
+
+ @BeforeClass
+ public static void forceLoadingOfSerializableClasses() {
+ com.yahoo.searchlib.aggregation.ForceLoad.forceLoad();
+ com.yahoo.searchlib.expression.ForceLoad.forceLoad();
+ }
+
+ @Test
+ public void testResultTypes() throws IOException {
+ try (SerializationTester t = new SerializationTester("testResultTypes")) {
+ t.assertMatch(new IntegerResultNode(7));
+ t.assertMatch(new FloatResultNode(7.3));
+ t.assertMatch(new StringResultNode("7.3"));
+ t.assertMatch(new StringResultNode(
+ new String(new byte[]{(byte)0xe5, (byte)0xa6, (byte)0x82, (byte)0xe6, (byte)0x9e, (byte)0x9c})));
+ t.assertMatch(new RawResultNode(new byte[]{'7', '.', '4'}));
+ t.assertMatch(new IntegerBucketResultNode());
+ t.assertMatch(new FloatBucketResultNode());
+ t.assertMatch(new IntegerBucketResultNode(10, 20));
+ t.assertMatch(new FloatBucketResultNode(10, 20));
+ t.assertMatch(new StringBucketResultNode("10.0", "20.0"));
+ t.assertMatch(new RawBucketResultNode(
+ new RawResultNode(new byte[]{1, 0, 0}),
+ new RawResultNode(new byte[]{1, 1, 0})));
+ t.assertMatch(new IntegerBucketResultNodeVector()
+ .add(new IntegerBucketResultNode(878, 3246823)));
+ t.assertMatch(new FloatBucketResultNodeVector()
+ .add(new FloatBucketResultNode(878, 3246823)));
+ t.assertMatch(new StringBucketResultNodeVector()
+ .add(new StringBucketResultNode("878", "3246823")));
+ t.assertMatch(new RawBucketResultNodeVector()
+ .add(new RawBucketResultNode(
+ new RawResultNode(new byte[]{1, 0, 0}),
+ new RawResultNode(new byte[]{1, 1, 0}))));
+ }
+
+ }
+
+ @Test
+ public void testSpecialNodes() throws IOException {
+ try (SerializationTester t = new SerializationTester("testSpecialNodes")) {
+ t.assertMatch(new AttributeNode("testattribute"));
+ t.assertMatch(new DocumentFieldNode("testdocumentfield"));
+ t.assertMatch(new GetDocIdNamespaceSpecificFunctionNode(new IntegerResultNode(7)));
+ t.assertMatch(new GetYMUMChecksumFunctionNode());
+ }
+ }
+
+ @Test
+ public void testFunctionNodes() throws IOException {
+ try (SerializationTester t = new SerializationTester("testFunctionNodes")) {
+ t.assertMatch(new AddFunctionNode()
+ .addArg(new ConstantNode(new IntegerResultNode(7)))
+ .addArg(new ConstantNode(new IntegerResultNode(8)))
+ .addArg(new ConstantNode(new IntegerResultNode(9))));
+ t.assertMatch(new XorFunctionNode()
+ .addArg(new ConstantNode(new IntegerResultNode(7)))
+ .addArg(new ConstantNode(new IntegerResultNode(8)))
+ .addArg(new ConstantNode(new IntegerResultNode(9))));
+ t.assertMatch(new MultiplyFunctionNode()
+ .addArg(new ConstantNode(new IntegerResultNode(7)))
+ .addArg(new ConstantNode(new IntegerResultNode(8)))
+ .addArg(new ConstantNode(new IntegerResultNode(9))));
+ t.assertMatch(new DivideFunctionNode()
+ .addArg(new ConstantNode(new IntegerResultNode(7)))
+ .addArg(new ConstantNode(new IntegerResultNode(8)))
+ .addArg(new ConstantNode(new IntegerResultNode(9))));
+ t.assertMatch(new ModuloFunctionNode()
+ .addArg(new ConstantNode(new IntegerResultNode(7)))
+ .addArg(new ConstantNode(new IntegerResultNode(8)))
+ .addArg(new ConstantNode(new IntegerResultNode(9))));
+ t.assertMatch(new MinFunctionNode()
+ .addArg(new ConstantNode(new IntegerResultNode(7)))
+ .addArg(new ConstantNode(new IntegerResultNode(8)))
+ .addArg(new ConstantNode(new IntegerResultNode(9))));
+ t.assertMatch(new MaxFunctionNode()
+ .addArg(new ConstantNode(new IntegerResultNode(7)))
+ .addArg(new ConstantNode(new IntegerResultNode(8)))
+ .addArg(new ConstantNode(new IntegerResultNode(9))));
+ t.assertMatch(new TimeStampFunctionNode(new ConstantNode(new IntegerResultNode(7)),
+ TimeStampFunctionNode.TimePart.Hour, true));
+ t.assertMatch(new ZCurveFunctionNode(new ConstantNode(new IntegerResultNode(7)),
+ ZCurveFunctionNode.Dimension.X));
+ t.assertMatch(new ZCurveFunctionNode(new ConstantNode(new IntegerResultNode(7)),
+ ZCurveFunctionNode.Dimension.Y));
+ t.assertMatch(new NegateFunctionNode(new ConstantNode(new IntegerResultNode(7))));
+ t.assertMatch(new SortFunctionNode(new ConstantNode(new IntegerResultNode(7))));
+ t.assertMatch(new NormalizeSubjectFunctionNode(new ConstantNode(
+ new StringResultNode("foo"))));
+ t.assertMatch(new ReverseFunctionNode(new ConstantNode(new IntegerResultNode(7))));
+ t.assertMatch(new MD5BitFunctionNode(new ConstantNode(new IntegerResultNode(7)), 64));
+ t.assertMatch(new XorBitFunctionNode(new ConstantNode(new IntegerResultNode(7)), 64));
+ t.assertMatch(new CatFunctionNode()
+ .addArg(new ConstantNode(new IntegerResultNode(7)))
+ .addArg(new ConstantNode(new IntegerResultNode(8)))
+ .addArg(new ConstantNode(new IntegerResultNode(9))));
+ t.assertMatch(new FixedWidthBucketFunctionNode());
+ t.assertMatch(new FixedWidthBucketFunctionNode().addArg(new AttributeNode("foo")));
+ t.assertMatch(new FixedWidthBucketFunctionNode(new IntegerResultNode(10), new AttributeNode("foo")));
+ t.assertMatch(new FixedWidthBucketFunctionNode(new FloatResultNode(10.0), new AttributeNode("foo")));
+ t.assertMatch(new RangeBucketPreDefFunctionNode());
+ t.assertMatch(new RangeBucketPreDefFunctionNode().addArg(new AttributeNode("foo")));
+ t.assertMatch(new DebugWaitFunctionNode(new ConstantNode(new IntegerResultNode(5)),
+ 3.3, false));
+ }
+
+ }
+
+ @Test
+ public void testAggregatorResults() throws IOException {
+ try (SerializationTester t = new SerializationTester("testAggregatorResults")) {
+ t.assertMatch(new SumAggregationResult(new IntegerResultNode(7))
+ .setExpression(new AttributeNode("attributeA")));
+ t.assertMatch(new XorAggregationResult()
+ .setXor(7)
+ .setExpression(new AttributeNode("attributeA")));
+ t.assertMatch(new CountAggregationResult()
+ .setCount(7)
+ .setExpression(new AttributeNode("attributeA")));
+ t.assertMatch(new MinAggregationResult(new IntegerResultNode(7))
+ .setExpression(new AttributeNode("attributeA")));
+ t.assertMatch(new MaxAggregationResult(new IntegerResultNode(7))
+ .setExpression(new AttributeNode("attributeA")));
+ t.assertMatch(new AverageAggregationResult(new IntegerResultNode(7), 0)
+ .setExpression(new AttributeNode("attributeA")));
+ SparseSketch sketch = new SparseSketch();
+ sketch.aggregate(1955583074);
+ t.assertMatch(new ExpressionCountAggregationResult(sketch, s -> 42)
+ .setExpression(new ConstantNode(new IntegerResultNode(67))));
+ }
+ }
+
+ @Test
+ public void testHitCollection() throws IOException {
+ try (SerializationTester t = new SerializationTester("testHitCollection")) {
+ t.assertMatch(new FS4Hit(0, new GlobalId(new byte[GlobalId.LENGTH]), 0, -1));
+ t.assertMatch(new FS4Hit(0, createGlobalId(100), 50.0, -1));
+ t.assertMatch(new VdsHit());
+ //TODO Verify the two structures below
+ t.assertMatch(new VdsHit("100", new byte[0], 50.0));
+ t.assertMatch(new VdsHit("100", "rawsummary".getBytes(), 50.0));
+ t.assertMatch(new HitsAggregationResult());
+ t.assertMatch(new HitsAggregationResult()
+ .setMaxHits(5)
+ .addHit(new FS4Hit(0, createGlobalId(10), 1.0, -1))
+ .addHit(new FS4Hit(0, createGlobalId(20), 2.0, -1))
+ .addHit(new FS4Hit(0, createGlobalId(30), 3.0, -1))
+ .addHit(new FS4Hit(0, createGlobalId(40), 4.0, -1))
+ .addHit(new FS4Hit(0, createGlobalId(50), 5.0, -1))
+ .setExpression(new ConstantNode(new IntegerResultNode(5))));
+ t.assertMatch(new HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(new FS4Hit(0, createGlobalId(10), 1.0, 100))
+ .addHit(new FS4Hit(0, createGlobalId(20), 2.0, 200))
+ .addHit(new FS4Hit(0, createGlobalId(30), 3.0, 300))
+ .setExpression(new ConstantNode(new IntegerResultNode(5))));
+ //TODO Verify content
+ t.assertMatch(new HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(new VdsHit("10", "100".getBytes(), 1.0))
+ .addHit(new VdsHit("20", "200".getBytes(), 2.0))
+ .addHit(new VdsHit("30", "300".getBytes(), 3.0))
+ .setExpression(new ConstantNode(new IntegerResultNode(5))));
+ }
+ }
+
+ @Test
+ public void testGroupingLevel() throws IOException {
+ try (SerializationTester t = new SerializationTester("testGroupingLevel")) {
+ GroupingLevel groupingLevel = new GroupingLevel();
+ groupingLevel.setMaxGroups(100)
+ .setExpression(createDummyExpression())
+ .getGroupPrototype()
+ .addAggregationResult(
+ new SumAggregationResult()
+ .setExpression(createDummyExpression()));
+ t.assertMatch(groupingLevel);
+ }
+ }
+
+ @Test
+ public void testGroup() throws IOException {
+ try (SerializationTester t = new SerializationTester("testGroup")) {
+ t.assertMatch(new Group());
+ t.assertMatch(new Group().setId(new IntegerResultNode(50))
+ .setRank(10));
+ t.assertMatch(new Group().setId(new IntegerResultNode(100))
+ .addChild(new Group().setId(new IntegerResultNode(110)))
+ .addChild(new Group().setId(new IntegerResultNode(120))
+ .setRank(20.5)
+ .addAggregationResult(new SumAggregationResult()
+ .setExpression(createDummyExpression()))
+ .addAggregationResult(new SumAggregationResult()
+ .setExpression(createDummyExpression())))
+ .addChild(new Group().setId(new IntegerResultNode(130))
+ .addChild(new Group().setId(new IntegerResultNode(131)))));
+ }
+ }
+
+ @Test
+ public void testGrouping() throws IOException {
+ try (SerializationTester t = new SerializationTester("testGrouping")) {
+ t.assertMatch(new Grouping());
+
+ GroupingLevel level1 = new GroupingLevel();
+ level1.setMaxGroups(100)
+ .setExpression(createDummyExpression())
+ .getGroupPrototype()
+ .addAggregationResult(
+ new SumAggregationResult()
+ .setExpression(createDummyExpression()));
+ GroupingLevel level2 = new GroupingLevel();
+ level2.setMaxGroups(10)
+ .setExpression(createDummyExpression())
+ .getGroupPrototype()
+ .addAggregationResult(
+ new SumAggregationResult()
+ .setExpression(createDummyExpression()))
+ .addAggregationResult(
+ new SumAggregationResult()
+ .setExpression(createDummyExpression()));
+ t.assertMatch(new Grouping()
+ .addLevel(level1)
+ .addLevel(level2));
+
+ GroupingLevel level3 = new GroupingLevel();
+ level3.setExpression(new AttributeNode("folder"))
+ .getGroupPrototype()
+ .addAggregationResult(
+ new XorAggregationResult()
+ .setExpression(new MD5BitFunctionNode(new AttributeNode("docid"), 64)))
+ .addAggregationResult(
+ new SumAggregationResult()
+ .setExpression(new MinFunctionNode()
+ .addArg(new AttributeNode("attribute1"))
+ .addArg(new AttributeNode("attribute2"))))
+ .addAggregationResult(
+ new XorAggregationResult()
+ .setExpression(
+ new XorBitFunctionNode(new CatFunctionNode()
+ .addArg(new GetDocIdNamespaceSpecificFunctionNode(new StringResultNode("")))
+ .addArg(new DocumentFieldNode("folder"))
+ .addArg(new DocumentFieldNode("flags")), 64)));
+ t.assertMatch(new Grouping()
+ .addLevel(level3));
+ }
+ }
+
+
+ private static GlobalId createGlobalId(int docId) {
+ return new GlobalId(
+ new DocumentId(String.format("doc:test:%d", docId)).getGlobalId());
+ }
+
+ private static ExpressionNode createDummyExpression() {
+ return new AddFunctionNode()
+ .addArg(new ConstantNode(new IntegerResultNode(2)))
+ .addArg(new ConstantNode(new IntegerResultNode(2)));
+ }
+
+ private static class SerializationTester implements AutoCloseable {
+
+ private static final String FILE_PATH = "src/test/files";
+
+ private final DataInputStream in;
+ private final String fileName;
+
+ public SerializationTester(String fileName) throws IOException {
+ this.fileName = fileName;
+ this.in = new DataInputStream(
+ new BufferedInputStream(
+ new FileInputStream(
+ new File(FILE_PATH, fileName))));
+ }
+
+ public SerializationTester assertMatch(Identifiable expectedObject) throws IOException {
+ int length = readLittleEndianInt(in);
+ byte[] originalData = new byte[length];
+ in.readFully(originalData);
+ Identifiable deserializedObject = Identifiable.create(new BufferSerializer(originalData));
+
+ if (!deserializedObject.equals(expectedObject)) {
+ fail(String.format("Serialized object in file '%s' does not equal expected values.\n" +
+ "==================================================\n" +
+ "Expected:\n" +
+ "==================================================\n" +
+ "%s\n" +
+ "==================================================\n" +
+ "Actual:\n" +
+ "==================================================\n" +
+ "%s\n" +
+ "==================================================\n",
+ fileName, dumpObject(expectedObject), dumpObject(deserializedObject)));
+ }
+ GrowableByteBuffer buffer = new GrowableByteBuffer(1024 * 8);
+ BufferSerializer serializer = new BufferSerializer(buffer);
+ deserializedObject.serializeWithId(serializer);
+ buffer.flip();
+
+ byte[] newData = new byte[buffer.limit()];
+ buffer.get(newData);
+ if (!Arrays.equals(newData, originalData)) {
+ fail(String.format("Serialized object data does not match the original serialized data from file.\n" +
+ "==================================================\n" +
+ "Original:\n" +
+ "==================================================\n" +
+ "%s\n" +
+ "==================================================\n" +
+ "Serialized:\n" +
+ "==================================================\n" +
+ "%s\n" +
+ "==================================================\n",
+ toHexString(originalData), toHexString(newData)));
+ }
+ return this;
+ }
+
+ private static int readLittleEndianInt(DataInputStream in) throws IOException {
+ byte[] data = new byte[4];
+ in.readFully(data);
+ ByteBuffer buffer = ByteBuffer.wrap(data);
+ buffer.order(ByteOrder.LITTLE_ENDIAN);
+ return buffer.getInt();
+ }
+
+ private static String dumpObject(Identifiable obj) {
+ ObjectDumper dumper = new ObjectDumper();
+ obj.visitMembers(dumper);
+ return dumper.toString();
+ }
+
+ @Override
+ public void close() throws IOException {
+ boolean moreDataAvailable = in.read() != -1;
+ in.close();
+ if (moreDataAvailable) {
+ fail("The file was not fully consumed. Did you forget to deserialize an object on Java side?");
+ }
+ }
+
+ private static String toHexString(byte[] data) {
+ char[] table = "0123456789ABCDEF".toCharArray();
+ StringBuilder builder = new StringBuilder();
+ builder.append("(" + data.length + " bytes)");
+ for (int i = 0; i < data.length; i++) {
+ if (i % 16 == 0) {
+ builder.append("\n");
+ }
+ builder.append(table[(data[i] >> 4) & 0xf]);
+ builder.append(table[data[i] & 0xf]);
+ builder.append(" ");
+ }
+ return builder.toString();
+ }
+
+
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingTestCase.java
new file mode 100644
index 00000000000..f4ae62265d7
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingTestCase.java
@@ -0,0 +1,227 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.searchlib.expression.NullResultNode;
+import com.yahoo.searchlib.expression.StringBucketResultNode;
+import com.yahoo.vespa.objects.BufferSerializer;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.Collections;
+
+import static org.junit.Assert.*;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class GroupingTestCase {
+
+ private static final int VALID_BYTE_INDEX = 8;
+
+ @Test
+ public void requireThatIdAccessorsWork() {
+ Grouping grouping = new Grouping();
+ assertEquals(0, grouping.getId());
+
+ grouping = new Grouping(6);
+ assertEquals(6, grouping.getId());
+ grouping.setId(9);
+ assertEquals(9, grouping.getId());
+
+ Grouping other = new Grouping(6);
+ assertFalse(grouping.equals(other));
+ other.setId(9);
+ assertEquals(grouping, other);
+
+ assertEquals(grouping, grouping.clone());
+ assertSerialize(grouping);
+ }
+
+ @Test
+ public void requireThatAllAccessorsWork() {
+ Grouping grouping = new Grouping();
+ assertFalse(grouping.getAll());
+ grouping.setAll(true);
+ assertTrue(grouping.getAll());
+
+ Grouping other = new Grouping();
+ assertFalse(grouping.equals(other));
+ other.setAll(true);
+ assertEquals(grouping, other);
+
+ assertEquals(grouping, grouping.clone());
+ assertSerialize(grouping);
+ }
+
+ @Test
+ public void requireThatTopNAccessorsWork() {
+ Grouping grouping = new Grouping();
+ assertEquals(-1, grouping.getTopN());
+ grouping.setTopN(69);
+ assertEquals(69, grouping.getTopN());
+
+ Grouping other = new Grouping();
+ assertFalse(grouping.equals(other));
+ other.setTopN(69);
+ assertEquals(grouping, other);
+
+ assertEquals(grouping, grouping.clone());
+ assertSerialize(grouping);
+ }
+
+ @Test
+ public void requireThatFirstLevelAccessorsWork() {
+ Grouping grouping = new Grouping();
+ assertEquals(0, grouping.getFirstLevel());
+ grouping.setFirstLevel(69);
+ assertEquals(69, grouping.getFirstLevel());
+
+ Grouping other = new Grouping();
+ assertFalse(grouping.equals(other));
+ other.setFirstLevel(69);
+ assertEquals(grouping, other);
+
+ assertEquals(grouping, grouping.clone());
+ assertSerialize(grouping);
+ }
+
+ @Test
+ public void requireThatLastLevelAccessorsWork() {
+ Grouping grouping = new Grouping();
+ assertEquals(0, grouping.getLastLevel());
+ grouping.setLastLevel(69);
+ assertEquals(69, grouping.getLastLevel());
+
+ Grouping other = new Grouping();
+ assertFalse(grouping.equals(other));
+ other.setLastLevel(69);
+ assertEquals(grouping, other);
+
+ assertEquals(grouping, grouping.clone());
+ assertSerialize(grouping);
+ }
+
+ @Test
+ public void requireThatRootAccessorsWork() {
+ Grouping grouping = new Grouping();
+ assertEquals(new Group(), grouping.getRoot());
+ try {
+ grouping.setRoot(null);
+ fail();
+ } catch (NullPointerException e) {
+
+ }
+ Group root = new Group().setRank(6.9);
+ grouping.setRoot(root);
+ assertEquals(root, grouping.getRoot());
+
+ Grouping other = new Grouping();
+ assertFalse(grouping.equals(other));
+ other.setRoot(root);
+ assertEquals(grouping, other);
+
+ assertEquals(grouping, grouping.clone());
+ assertSerialize(grouping);
+ }
+
+ @Test
+ public void requireThatLevelAccessorsWork() {
+ Grouping grouping = new Grouping();
+ assertEquals(Collections.emptyList(), grouping.getLevels());
+ try {
+ grouping.addLevel(null);
+ fail();
+ } catch (NullPointerException e) {
+
+ }
+ GroupingLevel level = new GroupingLevel();
+ grouping.addLevel(level);
+ assertEquals(Arrays.asList(level), grouping.getLevels());
+
+ Grouping other = new Grouping();
+ assertFalse(grouping.equals(other));
+ other.addLevel(level);
+ assertEquals(grouping, other);
+
+ assertEquals(grouping, grouping.clone());
+ assertSerialize(grouping);
+ }
+
+ @Test
+ public void requireThatHashCodeIsImplemented() {
+ assertEquals(new Grouping().hashCode(), new Grouping().hashCode());
+ }
+
+ @Test
+ public void requireThatEqualsIsImplemented() {
+ assertFalse(new Grouping().equals(new Object()));
+ assertTrue(new Grouping().equals(new Grouping()));
+ }
+
+ @Test
+ public void requireThatValidAccessorsWork() {
+ byte[] arr = new byte[1024];
+ BufferSerializer buf = new BufferSerializer(arr);
+ Grouping grouping = new Grouping();
+ grouping.serializeWithId(buf);
+ buf.flip();
+ assertEquals(1, arr[VALID_BYTE_INDEX]);
+ arr[VALID_BYTE_INDEX] = 0;
+
+ Grouping other = (Grouping)Grouping.create(buf);
+ assertFalse(other.valid());
+
+ assertEquals(grouping, grouping.clone());
+ assertSerialize(grouping);
+ }
+
+ @Test
+ public void requireThatSetForceSinglePassWorks() {
+ assertFalse(new Grouping().getForceSinglePass());
+ assertFalse(new Grouping().setForceSinglePass(false).getForceSinglePass());
+ assertTrue(new Grouping().setForceSinglePass(true).getForceSinglePass());
+ }
+
+ @Test
+ public void requireThatNeedDeepResultCollectionWorks() {
+ assertFalse(new Grouping().addLevel(new GroupingLevel().setGroupPrototype(new Group())).needDeepResultCollection());
+ assertTrue(new Grouping().addLevel(new GroupingLevel().setGroupPrototype(new Group().addOrderBy(new CountAggregationResult(9), true))).needDeepResultCollection());
+ }
+
+ @Test
+ public void requireThatUseSinglePassWorks() {
+ assertFalse(new Grouping().useSinglePass());
+ assertFalse(new Grouping().setForceSinglePass(false).useSinglePass());
+ assertTrue(new Grouping().setForceSinglePass(true).useSinglePass());
+ assertFalse(new Grouping().addLevel(new GroupingLevel().setGroupPrototype(new Group())).useSinglePass());
+ assertTrue(new Grouping().addLevel(new GroupingLevel().setGroupPrototype(new Group().addOrderBy(new CountAggregationResult(9), true))).useSinglePass());
+ }
+
+ @Test
+ public void requireThatUnifyNullReplacesEmptyBucketIds() {
+ Grouping grouping = new Grouping();
+ grouping.getRoot().addChild(new Group().setId(new StringBucketResultNode()));
+ grouping.setLastLevel(1); // otherwise unifyNull will not operate on it
+ grouping.unifyNull();
+ assertEquals(NullResultNode.class, grouping.getRoot().getChildren().get(0).getId().getClass());
+ }
+
+ @Test
+ public void requireThatUnifyNullDoesNotReplaceNonEmptyBucketIds() {
+ Grouping grouping = new Grouping();
+ grouping.getRoot().addChild(new Group().setId(new StringBucketResultNode("6", "9")));
+ grouping.setLastLevel(1); // otherwise unifyNull will not operate on it
+ grouping.unifyNull();
+ assertEquals(StringBucketResultNode.class, grouping.getRoot().getChildren().get(0).getId().getClass());
+ }
+
+ private static void assertSerialize(Grouping grouping) {
+ BufferSerializer buf = new BufferSerializer();
+ grouping.serializeWithId(buf);
+
+ buf.flip();
+ Grouping other = (Grouping)Grouping.create(buf);
+ assertEquals(grouping, other);
+ }
+}
+
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/MergeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/MergeTestCase.java
new file mode 100755
index 00000000000..67361048773
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/MergeTestCase.java
@@ -0,0 +1,735 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation;
+
+import com.yahoo.document.DocumentId;
+import com.yahoo.document.GlobalId;
+import com.yahoo.searchlib.expression.*;
+import junit.framework.TestCase;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class MergeTestCase extends TestCase {
+
+ private GlobalId createGlobalId(int docId) {
+ return new GlobalId((new DocumentId("doc:test:" + docId)).getGlobalId());
+ }
+
+ // Test merging of hits.
+ public void testMergeHits() {
+ Grouping request = new Grouping()
+ .setFirstLevel(0)
+ .setLastLevel(1)
+ .addLevel(new GroupingLevel().setMaxGroups(69));
+
+ Group expect = new Group()
+ .addAggregationResult(new HitsAggregationResult()
+ .setMaxHits(5)
+ .addHit(new FS4Hit(30, createGlobalId(30), 30))
+ .addHit(new FS4Hit(20, createGlobalId(20), 20))
+ .addHit(new FS4Hit(10, createGlobalId(10), 10))
+ .addHit(new FS4Hit(5, createGlobalId(9), 9))
+ .addHit(new FS4Hit(6, createGlobalId(8), 8))
+ .setExpression(new ConstantNode( new IntegerResultNode(0))));
+
+ Group a = new Group()
+ .addAggregationResult(new HitsAggregationResult()
+ .setMaxHits(5)
+ .addHit(new FS4Hit(10, createGlobalId(10), 10))
+ .addHit(new FS4Hit(1, createGlobalId(5), 5))
+ .addHit(new FS4Hit(2, createGlobalId(4), 4))
+ .setExpression(new ConstantNode( new IntegerResultNode(0))));
+
+ Group b = new Group()
+ .addAggregationResult(new HitsAggregationResult()
+ .setMaxHits(5)
+ .addHit(new FS4Hit(20, createGlobalId(20), 20))
+ .addHit(new FS4Hit(3, createGlobalId(7), 7))
+ .addHit(new FS4Hit(4, createGlobalId(6), 6))
+ .setExpression(new ConstantNode( new IntegerResultNode(0))));
+
+ Group c = new Group()
+ .addAggregationResult(new HitsAggregationResult()
+ .setMaxHits(5)
+ .addHit(new FS4Hit(30, createGlobalId(30), 30))
+ .addHit(new FS4Hit(5, createGlobalId(9), 9))
+ .addHit(new FS4Hit(6, createGlobalId(8), 8))
+ .setExpression(new ConstantNode( new IntegerResultNode(0))));
+
+ assertMerge(request, a, b, c, expect);
+ assertMerge(request, a, c, b, expect);
+ assertMerge(request, b, a, c, expect);
+ assertMerge(request, c, a, b, expect);
+ assertMerge(request, b, c, a, expect);
+ assertMerge(request, c, b, a, expect);
+ }
+
+ // Test merging the sum of the values from a single attribute vector that was collected directly into the root node.
+ public void testMergeSimpleSum() {
+ Grouping lhs = new Grouping()
+ .setRoot(new Group()
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(20))
+ .setExpression(new AttributeNode("foo"))));
+
+ Grouping rhs = new Grouping()
+ .setRoot(new Group()
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(30))
+ .setExpression(new AttributeNode("foo"))));
+
+ Group expect = new Group()
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(50))
+ .setExpression(new AttributeNode("foo")));
+
+ assertMerge(lhs, rhs, expect);
+ }
+
+ // Test merging of the value from a single attribute vector in level 1.
+ public void testMergeSingleChild() {
+ Grouping lhs = new Grouping()
+ .setFirstLevel(0)
+ .setLastLevel(1)
+ .setRoot(new Group().addChild(new Group()
+ .setId(new StringResultNode("foo"))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(20))
+ .setExpression(new AttributeNode("foo")))));
+
+ Grouping rhs = new Grouping()
+ .setFirstLevel(0)
+ .setLastLevel(1)
+ .setRoot(new Group().addChild(new Group()
+ .setId(new StringResultNode("foo"))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(30))
+ .setExpression(new AttributeNode("foo")))));
+
+ Group expect = new Group().addChild(new Group()
+ .setId(new StringResultNode("foo"))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(50))
+ .setExpression(new AttributeNode("foo"))));
+
+ assertMerge(lhs, rhs, expect);
+ }
+
+ // Test merging of the value from a multiple attribute vectors in level 1.
+ public void testMergeMultiChild() {
+ Grouping lhs = new Grouping()
+ .setFirstLevel(0)
+ .setLastLevel(1)
+ .setRoot(new Group()
+ .addChild(new Group()
+ .setId(new StringResultNode("foo"))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(20))
+ .setExpression(new AttributeNode("foo"))))
+ .addChild(new Group()
+ .setId(new StringResultNode("bar"))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(40))
+ .setExpression(new AttributeNode("foo")))));
+
+ Grouping rhs = new Grouping()
+ .setFirstLevel(0)
+ .setLastLevel(1)
+ .setRoot(new Group()
+ .addChild(new Group()
+ .setId(new StringResultNode("foo"))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(30))
+ .setExpression(new AttributeNode("foo"))))
+ .addChild(new Group()
+ .setId(new StringResultNode("baz"))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(30))
+ .setExpression(new AttributeNode("foo")))));
+
+ Group expect = new Group().addChild(
+ new Group()
+ .setId(new StringResultNode("foo"))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(50))
+ .setExpression(new AttributeNode("foo"))))
+ .addChild(new Group()
+ .setId(new StringResultNode("bar"))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(40))
+ .setExpression(new AttributeNode("foo"))))
+ .addChild(new Group()
+ .setId(new StringResultNode("baz"))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(30))
+ .setExpression(new AttributeNode("foo"))));
+
+ assertMerge(lhs, rhs, expect);
+ }
+
+ // Verify that frozen levels are not touched during merge.
+ public void testMergeLevels() {
+ Grouping request = new Grouping()
+ .addLevel(new GroupingLevel()
+ .setExpression(new AttributeNode("c1"))
+ .setGroupPrototype(new Group().addAggregationResult(
+ new SumAggregationResult().setExpression(new AttributeNode("s1")))))
+ .addLevel(new GroupingLevel()
+ .setExpression(new AttributeNode("c2"))
+ .setGroupPrototype(new Group().addAggregationResult(
+ new SumAggregationResult().setExpression(new AttributeNode("s2")))))
+ .addLevel(new GroupingLevel()
+ .setExpression(new AttributeNode("c3"))
+ .setGroupPrototype(new Group().addAggregationResult(
+ new SumAggregationResult().setExpression(new AttributeNode("s3")))));
+
+ Group lhs = new Group()
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(5))
+ .setExpression(new AttributeNode("s0")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(10))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(10))
+ .setExpression(new AttributeNode("s1")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(20))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(15))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(30))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(20))
+ .setExpression(new AttributeNode("s3"))))));
+
+ Group rhs = new Group()
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(5))
+ .setExpression(new AttributeNode("s0")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(10))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(10))
+ .setExpression(new AttributeNode("s1")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(20))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(15))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(30))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(20))
+ .setExpression(new AttributeNode("s3"))))));
+
+ Group expectAll = new Group()
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(10))
+ .setExpression(new AttributeNode("s0")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(10))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(20))
+ .setExpression(new AttributeNode("s1")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(20))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(30))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(30))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(40))
+ .setExpression(new AttributeNode("s3"))))));
+
+ Group expect0 = new Group()
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(5))
+ .setExpression(new AttributeNode("s0")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(10))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(20))
+ .setExpression(new AttributeNode("s1")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(20))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(30))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(30))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(40))
+ .setExpression(new AttributeNode("s3"))))));
+
+ Group expect1 = new Group()
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(5))
+ .setExpression(new AttributeNode("s0")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(10))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(10))
+ .setExpression(new AttributeNode("s1")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(20))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(30))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(30))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(40))
+ .setExpression(new AttributeNode("s3"))))));
+
+ Group expect2 = new Group()
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(5))
+ .setExpression(new AttributeNode("s0")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(10))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(10))
+ .setExpression(new AttributeNode("s1")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(20))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(15))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(30))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(40))
+ .setExpression(new AttributeNode("s3"))))));
+
+ Group expect3 = new Group()
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(5))
+ .setExpression(new AttributeNode("s0")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(10))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(10))
+ .setExpression(new AttributeNode("s1")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(20))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(15))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(30))
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(20))
+ .setExpression(new AttributeNode("s3"))))));
+
+ request.setFirstLevel(0).setLastLevel(3);
+ assertMerge(request, lhs, rhs, expectAll);
+ request.setFirstLevel(1).setLastLevel(3);
+ assertMerge(request, lhs, rhs, expect0);
+ request.setFirstLevel(2).setLastLevel(5);
+ assertMerge(request, lhs, rhs, expect1);
+ request.setFirstLevel(3).setLastLevel(5);
+ assertMerge(request, lhs, rhs, expect2);
+ request.setFirstLevel(4).setLastLevel(4);
+ assertMerge(request, lhs, rhs, expect3);
+ }
+
+ // Verify that the number of groups for a level is pruned down to maxGroups, that the remaining groups are the
+ // highest ranked ones, and that they are sorted by group id.
+ public void testMergeGroups() {
+ Grouping request = new Grouping()
+ .addLevel(new GroupingLevel()
+ .setExpression(new AttributeNode("attr")));
+ Group lhs = new Group()
+ .addChild(new Group().setId(new IntegerResultNode(5)).setRank(5))
+ .addChild(new Group().setId(new IntegerResultNode(10)).setRank(5))
+ .addChild(new Group().setId(new IntegerResultNode(15)).setRank(15))
+ .addChild(new Group().setId(new IntegerResultNode(40)).setRank(100))
+ .addChild(new Group().setId(new IntegerResultNode(50)).setRank(30));
+
+ Group rhs = new Group()
+ .addChild(new Group().setId(new IntegerResultNode(0)).setRank(10))
+ .addChild(new Group().setId(new IntegerResultNode(10)).setRank(50))
+ .addChild(new Group().setId(new IntegerResultNode(20)).setRank(25))
+ .addChild(new Group().setId(new IntegerResultNode(40)).setRank(10))
+ .addChild(new Group().setId(new IntegerResultNode(45)).setRank(20));
+
+ Group expect3 = new Group()
+ .addChild(new Group().setId(new IntegerResultNode(10)).setRank(50))
+ .addChild(new Group().setId(new IntegerResultNode(40)).setRank(100))
+ .addChild(new Group().setId(new IntegerResultNode(50)).setRank(30));
+
+ Group expect5 = new Group()
+ .addChild(new Group().setId(new IntegerResultNode(10)).setRank(50))
+ .addChild(new Group().setId(new IntegerResultNode(20)).setRank(25))
+ .addChild(new Group().setId(new IntegerResultNode(40)).setRank(100))
+ .addChild(new Group().setId(new IntegerResultNode(45)).setRank(20))
+ .addChild(new Group().setId(new IntegerResultNode(50)).setRank(30));
+
+ Group expectAll = new Group()
+ .addChild(new Group().setId(new IntegerResultNode(0)).setRank(10))
+ .addChild(new Group().setId(new IntegerResultNode(5)).setRank(5))
+ .addChild(new Group().setId(new IntegerResultNode(10)).setRank(50))
+ .addChild(new Group().setId(new IntegerResultNode(15)).setRank(15))
+ .addChild(new Group().setId(new IntegerResultNode(20)).setRank(25))
+ .addChild(new Group().setId(new IntegerResultNode(40)).setRank(100))
+ .addChild(new Group().setId(new IntegerResultNode(45)).setRank(20))
+ .addChild(new Group().setId(new IntegerResultNode(50)).setRank(30));
+
+ request.getLevels().get(0).setMaxGroups(3);
+ assertMerge(request, lhs, rhs, expect3);
+ assertMerge(request, rhs, lhs, expect3);
+
+ request.getLevels().get(0).setMaxGroups(5);
+ assertMerge(request, lhs, rhs, expect5);
+ assertMerge(request, rhs, lhs, expect5);
+
+ request.getLevels().get(0).setMaxGroups(-1);
+ assertMerge(request, lhs, rhs, expectAll);
+ assertMerge(request, rhs, lhs, expectAll);
+ }
+
+ public void testMergeBuckets() {
+ Grouping lhs = new Grouping()
+ .setRoot(new Group().setTag(0)
+ .addChild(new Group().setId(new FloatBucketResultNode(FloatResultNode.getNegativeInfinity().getFloat(), 0.4))
+ .addAggregationResult(new CountAggregationResult().setCount(1))
+ .setTag(1))
+ .addChild(new Group().setId(new FloatBucketResultNode(0, 0))
+ .addAggregationResult(new CountAggregationResult().setCount(12))
+ .setTag(1)));
+
+ Grouping rhs = new Grouping()
+ .setRoot(new Group().setTag(0)
+ .addChild(new Group().setId(new FloatBucketResultNode(FloatResultNode.getNegativeInfinity().getFloat(), 0.4))
+ .addAggregationResult(new CountAggregationResult().setCount(0))
+ .setTag(1))
+ .addChild(new Group().setId(new FloatBucketResultNode(0, 0))
+ .addAggregationResult(new CountAggregationResult().setCount(15))
+ .setTag(1)));
+
+ Group expected = new Group().setTag(0)
+ .addChild(new Group().setId(new FloatBucketResultNode(FloatResultNode.getNegativeInfinity().getFloat(), 0.4))
+ .addAggregationResult(new CountAggregationResult().setCount(1))
+ .setTag(1))
+ .addChild(new Group().setId(new FloatBucketResultNode(0, 0))
+ .addAggregationResult(new CountAggregationResult().setCount(27))
+ .setTag(1));
+ assertMerge(lhs, rhs, expected);
+ }
+
+ // Merge two trees that are ordered by an expression, and verify that the resulting order after merge is correct.
+ public void testMergeExpressions() {
+ Grouping a = new Grouping()
+ .setFirstLevel(0)
+ .setLastLevel(1)
+ .addLevel(new GroupingLevel().setMaxGroups(1))
+ .setRoot(new Group()
+ .addChild(new Group().setId(new StringResultNode("aa"))
+ .addAggregationResult(new MaxAggregationResult().setMax(new IntegerResultNode(9)))
+ .addAggregationResult(new CountAggregationResult().setCount(2))
+ .addOrderBy(new MultiplyFunctionNode().addArg(new AggregationRefNode(0))
+ .addArg(new AggregationRefNode(1)), true)));
+ Grouping b = new Grouping()
+ .setFirstLevel(0)
+ .setLastLevel(1)
+ .addLevel(new GroupingLevel().setMaxGroups(1))
+ .setRoot(new Group()
+ .addChild(new Group().setId(new StringResultNode("ab"))
+ .addAggregationResult(new MaxAggregationResult().setMax(
+ new IntegerResultNode(12)))
+ .addAggregationResult(new CountAggregationResult().setCount(1))
+ .addOrderBy(new MultiplyFunctionNode().addArg(new AggregationRefNode(0))
+ .addArg(new AggregationRefNode(1)), true)));
+
+ Grouping expected = new Grouping()
+ .setFirstLevel(0)
+ .setLastLevel(1)
+ .addLevel(new GroupingLevel().setMaxGroups(1))
+ .setRoot(new Group()
+ .addChild(new Group().setId(new StringResultNode("ab"))
+ .addAggregationResult(new MaxAggregationResult().setMax(
+ new IntegerResultNode(12)))
+ .addAggregationResult(new CountAggregationResult().setCount(1))
+ .addOrderBy(new MultiplyFunctionNode().addArg(new AggregationRefNode(0))
+ .addArg(new AggregationRefNode(1)), true)));
+ expected.postMerge();
+
+ a.merge(b);
+ a.postMerge();
+ assertEquals(expected.toString(), a.toString());
+ }
+
+ // Merge two relatively complex tree structures and verify that the end result is as expected.
+ public void testMergeTrees() {
+ Grouping request = new Grouping()
+ .addLevel(new GroupingLevel()
+ .setMaxGroups(3)
+ .setExpression(new AttributeNode("c1"))
+ .setGroupPrototype(new Group().addAggregationResult(
+ new SumAggregationResult().setExpression(new AttributeNode("s1")))))
+ .addLevel(new GroupingLevel()
+ .setMaxGroups(2)
+ .setExpression(new AttributeNode("c2"))
+ .setGroupPrototype(new Group().addAggregationResult(
+ new SumAggregationResult().setExpression(new AttributeNode("s2")))))
+ .addLevel(new GroupingLevel()
+ .setMaxGroups(1)
+ .setExpression(new AttributeNode("c3"))
+ .setGroupPrototype(new Group().addAggregationResult(
+ new SumAggregationResult().setExpression(new AttributeNode("s3")))));
+
+ Group lhs = new Group()
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s0")))
+ .addChild(new Group().setId(new IntegerResultNode(4)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(5))
+ .setRank(5) // merged with 200 rank node
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s1")))
+ .addChild(new Group().setId(new IntegerResultNode(4)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(5))
+ .setRank(500)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group().setId(new IntegerResultNode(4)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(5))
+ .setRank(200)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s3"))))))
+ .addChild(new Group().setId(new IntegerResultNode(9)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(10))
+ .setRank(100)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s1")))
+ // dummy child would be picked up here
+ .addChild(new Group()
+ .setId(new IntegerResultNode(15))
+ .setRank(200)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group().setId(new IntegerResultNode(14)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(15))
+ .setRank(300)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s3"))))))
+ .addChild(new Group().setId(new IntegerResultNode(14)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(15))
+ .setRank(300)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s1")))
+ .addChild(new Group().setId(new IntegerResultNode(19)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(20))
+ .setRank(100)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s2")))));
+
+ Group rhs = new Group()
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s0")))
+ .addChild(new Group().setId(new IntegerResultNode(4)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(5))
+ .setRank(200)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s1")))
+ .addChild(new Group().setId(new IntegerResultNode(9)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(10))
+ .setRank(400)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group().setId(new IntegerResultNode(9)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(10))
+ .setRank(100)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s3"))))))
+ .addChild(new Group().setId(new IntegerResultNode(9)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(10))
+ .setRank(100)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s1")))
+ // dummy child would be picket up here
+ .addChild(new Group()
+ .setId(new IntegerResultNode(15))
+ .setRank(200)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s2")))))
+ .addChild(new Group().setId(new IntegerResultNode(14)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(15))
+ .setRank(5) // merged with 300 rank node
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s1")))
+ .addChild(new Group().setId(new IntegerResultNode(19)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(20))
+ .setRank(5) // merged with 100 rank node
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group().setId(new IntegerResultNode(19)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(20))
+ .setRank(500)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s3")))))
+ .addChild(new Group().setId(new IntegerResultNode(24)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(25))
+ .setRank(300)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group().setId(new IntegerResultNode(24)).setRank(10))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(25))
+ .setRank(400)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s3"))))));
+
+ Group expect = new Group()
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(200))
+ .setExpression(new AttributeNode("s0")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(5))
+ .setRank(200)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(200))
+ .setExpression(new AttributeNode("s1")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(5))
+ .setRank(500)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(5))
+ .setRank(200)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s3")))))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(10))
+ .setRank(400)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(10))
+ .setRank(100)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s3"))))))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(10))
+ .setRank(100)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(200))
+ .setExpression(new AttributeNode("s1")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(15))
+ .setRank(200)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(200))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(15))
+ .setRank(300)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s3"))))))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(15))
+ .setRank(300)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(200))
+ .setExpression(new AttributeNode("s1")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(20))
+ .setRank(100)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(200))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(20))
+ .setRank(500)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s3")))))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(25))
+ .setRank(300)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s2")))
+ .addChild(new Group()
+ .setId(new IntegerResultNode(25))
+ .setRank(400)
+ .addAggregationResult(new SumAggregationResult()
+ .setSum(new IntegerResultNode(100))
+ .setExpression(new AttributeNode("s3"))))));
+
+ assertMerge(request, lhs, rhs, expect);
+ assertMerge(request, rhs, lhs, expect);
+ }
+
+ private static void assertMerge(Grouping request, Group lhs, Group rhs, Group expect) {
+ assertMerge(Arrays.asList(request.clone().setRoot(lhs.clone()),
+ request.clone().setRoot(rhs.clone())),
+ expect);
+ }
+
+ private static void assertMerge(Grouping request, Group a, Group b, Group c, Group expect) {
+ assertMerge(Arrays.asList(request.clone().setRoot(a.clone()),
+ request.clone().setRoot(b.clone()),
+ request.clone().setRoot(c.clone())),
+ expect);
+ }
+
+ private static void assertMerge(Grouping lhs, Grouping rhs, Group expect) {
+ assertMerge(Arrays.asList(lhs, rhs), expect);
+ }
+
+ private static void assertMerge(List<Grouping> groupingList, Group expect) {
+ Grouping tmp = groupingList.get(0).clone();
+ for (int i = 1; i < groupingList.size(); ++i) {
+ tmp.merge(groupingList.get(i));
+ }
+ tmp.postMerge();
+ assertEquals(expect.toString(), tmp.getRoot().toString());
+ assertEquals(expect, tmp.getRoot());
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/BiasEstimatorTest.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/BiasEstimatorTest.java
new file mode 100644
index 00000000000..307214d8c1c
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/BiasEstimatorTest.java
@@ -0,0 +1,70 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation.hll;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class BiasEstimatorTest {
+
+ @Test
+ public void requireThatExactValueIsReturnedIfAvailable() {
+ BiasEstimator biasEstimator = new BiasEstimator(10);
+ // Index 0 in biasData/rawEstimateData
+ assertEstimateEquals(737.1256, 738.1256, biasEstimator);
+ // Index 10 in biasData/rawEstimateData
+ assertEstimateEquals(612.1992, 868.1992, biasEstimator);
+ // Index 199 (last) in biasData/rawEstimateData
+ assertEstimateEquals(-9.81720000000041, 5084.1828, biasEstimator);
+ }
+
+ @Test
+ public void requireThatBiasEstimatorHandlesAllValidPrecisions() {
+ // Index 0 values for biasData/rawEstimateData for each precision
+ double[][] testValuesForPrecision = new double[][] {
+ {11, 10},
+ {23, 22},
+ {46, 45},
+ {92, 91},
+ {184.2152, 183.2152},
+ {369, 368},
+ {738.1256, 737.1256},
+ {1477, 1476},
+ {2954, 2953},
+ {5908.5052, 5907.5052},
+ {11817.475, 11816.475},
+ {23635.0036, 23634.0036},
+ {47271, 47270},
+ {94542, 94541},
+ {189084, 189083}
+ };
+ for (int p = 4; p <= 18; p++) {
+ assertEstimateEquals(testValuesForPrecision[p - 4][1], testValuesForPrecision[p - 4][0], new BiasEstimator(p));
+ }
+ }
+
+ @Test
+ public void requireThatEdgeCasesAreCorrect() {
+ BiasEstimator estimator = new BiasEstimator(10);
+ // Test with a raw estimate less than first element of rawEstimateData
+ assertEstimateEquals(737.1256, 7, estimator);
+ // Test with a raw estimate larger than last element of rawEstimateData
+ assertEstimateEquals(-9.81720000000041, 9001, estimator);
+ }
+
+ @Test
+ public void requireThatLinearInterpolationIsCorrect() {
+ BiasEstimator estimator = new BiasEstimator(10);
+ double rawEstimate = (738.1256 + 750.4234) / 2; // average of two first elements
+ double expectedBias = (737.1256 + 724.4234) / 2;
+ assertEstimateEquals(expectedBias, rawEstimate, estimator);
+
+ rawEstimate = 3 * 854.7864 / 4 + 868.1992 / 4; // weighted average of element 10 and 11
+ expectedBias = 3 * 623.7864 / 4 + 612.1992 / 4;
+ assertEstimateEquals(expectedBias, rawEstimate, estimator);
+ }
+
+ private static void assertEstimateEquals(double expected, double rawEstimate, BiasEstimator biasEstimator) {
+ assertEquals(expected, biasEstimator.estimateBias(rawEstimate), 0.00000001);
+ }
+} \ No newline at end of file
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimatorTest.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimatorTest.java
new file mode 100644
index 00000000000..1ba4a71d102
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimatorTest.java
@@ -0,0 +1,89 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation.hll;
+
+import net.jpountz.xxhash.XXHash32;
+import net.jpountz.xxhash.XXHashFactory;
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Random;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class HyperLogLogEstimatorTest {
+
+ private XXHash32 hashGenerator = XXHashFactory.safeInstance().hash32();
+
+ @Test
+ public void requireThatEstimateInRangeForSmallValueSetUsingNormalSketch() {
+ testEstimateUsingNormalSketch(15, 1337);
+ }
+
+ @Test
+ public void requireThatEstimateInRangeForLargeValueSetUsingNormalSketch() {
+ testEstimateUsingNormalSketch(1_000_000, 1337);
+ }
+
+ @Test
+ public void requireThatEstimateIsReasonableForFullNormalSketch() {
+ HyperLogLogEstimator estimator = new HyperLogLogEstimator(10);
+ NormalSketch sketch = new NormalSketch(10);
+ // Fill sketch with 23 - highest possible zero prefix for precision 10.
+ Arrays.fill(sketch.data(), (byte) 23);
+ long estimate = estimator.estimateCount(sketch);
+ assertTrue(estimate > 6_000_000_000l);
+ }
+
+ @Test
+ public void requireThatEstimateIsCorrectForSparseSketch() {
+ SparseSketch sketch = new SparseSketch();
+ HyperLogLogEstimator estimator = new HyperLogLogEstimator(10);
+ long estimate = estimator.estimateCount(sketch);
+ assertEquals(0, estimate);
+
+ // Check that estimate is correct for every possible sketch size up to threshold
+ for (int i = 1; i <= HyperLogLog.SPARSE_SKETCH_CONVERSION_THRESHOLD; i++) {
+ sketch.aggregate(i);
+ estimate = estimator.estimateCount(sketch);
+ assertEquals(i, estimate);
+ }
+ }
+
+ private void testEstimateUsingNormalSketch(int nValues, int seed) {
+ for (int precision = 4; precision <= 16; precision++) {
+ HyperLogLogEstimator estimator = new HyperLogLogEstimator(precision);
+
+ long uniqueCount = new Random(seed)
+ .ints(nValues)
+ .map(this::makeHash)
+ .distinct()
+ .count();
+
+ Iterable<Integer> hashValues = () ->
+ new Random(seed)
+ .ints(nValues)
+ .map(this::makeHash)
+ .iterator();
+
+ NormalSketch sketch = new NormalSketch(precision);
+ sketch.aggregate(hashValues);
+ long estimate = estimator.estimateCount(sketch);
+ double standardError = standardErrorForPrecision(precision);
+ assertTrue(estimate > uniqueCount * (1 - standardError) * 0.9);
+ assertTrue(estimate < uniqueCount * (1 + standardError) * 1.1);
+ }
+ }
+
+ private static double standardErrorForPrecision(int precision) {
+ return 1.04 / Math.sqrt(1 << precision); // HLL standard error
+ }
+
+
+ private int makeHash(int value) {
+ final int seed = 42424242;
+ byte[] bytes = ByteBuffer.allocate(4).putInt(value).array();
+ return hashGenerator.hash(bytes, 0, 4, seed);
+ }
+} \ No newline at end of file
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogPrecisionBenchmark.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogPrecisionBenchmark.java
new file mode 100644
index 00000000000..5dba5e48578
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogPrecisionBenchmark.java
@@ -0,0 +1,70 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation.hll;
+
+import net.jpountz.xxhash.XXHash32;
+import net.jpountz.xxhash.XXHashFactory;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+/**
+ * This benchmarks performs a series of unique counting tests to analyse the HyperLogLog accuracy.
+ */
+public class HyperLogLogPrecisionBenchmark {
+
+ private static final int MAX_VAL = 256_000;
+ private static final int MAX_ITERATION = 1000;
+
+ private static final XXHash32 hashGenerator = XXHashFactory.safeInstance().hash32();
+ private static final HyperLogLogEstimator estimator = new HyperLogLogEstimator();
+ private static final Random random = new Random(424242);
+
+
+ public static void main(String[] args) {
+ System.out.println("Unique count; Average estimated unique count; Normalized standard error; Standard error; Min; Max");
+ for (int val = 1; val <= MAX_VAL; val *= 2) {
+ List<Long> samples = new ArrayList<>();
+ long sumEstimates = 0;
+ for (int iteration = 0; iteration < MAX_ITERATION; iteration++) {
+ long sample = estimateUniqueCount(val);
+ samples.add(sample);
+ sumEstimates += sample;
+ }
+ double average = sumEstimates / (double) MAX_ITERATION;
+ long min = samples.stream().min(Long::compare).get();
+ long max = samples.stream().max(Long::compare).get();
+ double standardDeviation = getStandardDeviation(samples, average);
+ System.out.printf("%d; %.2f; %.4f; %.4f; %d; %d\n", val, average, standardDeviation / average, standardDeviation, min, max);
+ }
+ }
+
+ private static double getStandardDeviation(List<Long> samples, double average) {
+ double sumSquared = 0;
+ for (long sample : samples) {
+ sumSquared += Math.pow(sample - average, 2);
+ }
+ return Math.sqrt(sumSquared / samples.size());
+ }
+
+ private static long estimateUniqueCount(int nValues) {
+ SparseSketch sparse = new SparseSketch();
+ while (sparse.size() < nValues) {
+ sparse.aggregate(makeHash(random.nextInt()));
+ }
+ if (sparse.size() > HyperLogLog.SPARSE_SKETCH_CONVERSION_THRESHOLD) {
+ NormalSketch normal = new NormalSketch();
+ normal.aggregate(sparse.data());
+ return estimator.estimateCount(normal);
+ } else {
+ return estimator.estimateCount(sparse);
+ }
+ }
+
+ private static int makeHash(int value) {
+ final int seed = 1333337;
+ byte[] bytes = ByteBuffer.allocate(4).putInt(value).array();
+ return hashGenerator.hash(bytes, 0, 4, seed);
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/NormalSketchTest.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/NormalSketchTest.java
new file mode 100644
index 00000000000..3b0a584f37b
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/NormalSketchTest.java
@@ -0,0 +1,121 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation.hll;
+
+import com.yahoo.vespa.objects.BufferSerializer;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+
+public class NormalSketchTest {
+
+ @Test
+ public void requireThatSerializationIsCorrectForCompressibleData() {
+ testSerializationForPrecision(16);
+ }
+
+ @Test
+ public void requireThatSerializationIsCorrectForIncompressibleData() {
+ // A sketch of precision 1 contains only two elements and will therefore not be compressible.
+ testSerializationForPrecision(1);
+ }
+
+ private static void testSerializationForPrecision(int precision) {
+ NormalSketch from = new NormalSketch(precision); // precision p => 2^p bytes
+ for (int i = 0; i < from.size(); i++) {
+ from.data()[i] = (byte) i;
+ }
+ NormalSketch to = new NormalSketch(precision);
+
+ BufferSerializer buffer = new BufferSerializer();
+ from.serialize(buffer);
+ buffer.flip();
+ to.deserialize(buffer);
+
+ assertEquals(from, to);
+ }
+
+ @Test
+ public void requireThatMergeDoesElementWiseMax() {
+ NormalSketch s1 = new NormalSketch(2);
+ setSketchValues(s1, 0, 1, 1, 3);
+ NormalSketch s2 = new NormalSketch(2);
+ setSketchValues(s2, 2, 1, 1, 0);
+ s1.merge(s2);
+
+ assertBucketEquals(s1, 0, 2);
+ assertBucketEquals(s1, 1, 1);
+ assertBucketEquals(s1, 2, 1);
+ assertBucketEquals(s1, 3, 3);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void requireThatMergingFailsForSketchesOfDifferentSize() {
+ NormalSketch s1 = new NormalSketch(2);
+ NormalSketch s2 = new NormalSketch(3);
+ s1.merge(s2);
+ }
+
+ @Test
+ public void requireThatEqualsIsCorrect() {
+ NormalSketch s1 = new NormalSketch(1);
+ setSketchValues(s1, 42, 127);
+ NormalSketch s2 = new NormalSketch(1);
+ setSketchValues(s2, 42, 127);
+ assertEquals(s1, s2);
+ }
+
+ @Test
+ public void requireThatSketchBucketsAreCorrectForSingleValues() {
+
+ testSingleValueAggregation(0, 0, 23);
+ testSingleValueAggregation(1, 1, 23);
+ testSingleValueAggregation(-1, 1023, 1);
+ testSingleValueAggregation(Integer.MAX_VALUE, 1023, 2);
+ testSingleValueAggregation(Integer.MIN_VALUE, 0, 1);
+ testSingleValueAggregation(42, 42, 23);
+ testSingleValueAggregation(0b00000011_00000000_00000000_11000011, 0b11000011, 7);
+ }
+
+ private static void testSingleValueAggregation(int hashValue, int bucketIndex, int expectedValue) {
+ NormalSketch sketch = new NormalSketch(10);
+ sketch.aggregate(hashValue);
+ assertBucketEquals(sketch, bucketIndex, expectedValue);
+ for (int i = 0; i < sketch.size(); i++) {
+ if (i == bucketIndex) {
+ continue;
+ }
+ assertBucketEquals(sketch, i, 0);
+ }
+ }
+
+ @Test
+ public void requireThatSketchBucketsAreCorrectForMultipleValues() {
+ NormalSketch sketch = new NormalSketch(10);
+
+ // Aggregate multiple values
+ sketch.aggregate(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
+ for (int i = 0; i < 10; i++) {
+ assertBucketEquals(sketch, i, 23);
+ }
+ // Check that the other values are zero.
+ for (int i = 10; i < 1024; i++) {
+ assertBucketEquals(sketch, i, 0);
+ }
+ }
+
+ private static void assertBucketEquals(NormalSketch sketch, int index, int expectedValue) {
+ assertEquals(expectedValue, sketch.data()[index]);
+ }
+
+ private static void setSketchValues(NormalSketch sketch, Integer... values) {
+ for (int i = 0; i < values.length; i++) {
+ sketch.data()[i] = values[i].byteValue();
+ }
+ }
+
+} \ No newline at end of file
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchMergerTest.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchMergerTest.java
new file mode 100644
index 00000000000..07488d21fd3
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchMergerTest.java
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation.hll;
+
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class SketchMergerTest {
+
+ private final SketchMerger merger = new SketchMerger();
+
+ @Test
+ public void requireThatMergingTwoSmallSparseSketchesReturnsSparseSketch() {
+ SparseSketch s1 = SketchUtils.createSparseSketch(1);
+ SparseSketch s2 = SketchUtils.createSparseSketch(2);
+
+ Sketch<?> result = merger.merge(s1, s2);
+ assertEquals(result.getClass(), SparseSketch.class);
+ assertTrue("Should return the instance given by first argument.", result == s1);
+ SketchUtils.assertSketchContains(result, 1, 2);
+ }
+
+ @Test
+ public void requireThatMergingTwoThresholdSizeSparseSketchesReturnsNormalSketch() {
+ SparseSketch s1 = SketchUtils.createSparseSketch();
+ SparseSketch s2 = SketchUtils.createSparseSketch();
+
+ // Fill sketches with disjoint data.
+ for (int i = 0; i < HyperLogLog.SPARSE_SKETCH_CONVERSION_THRESHOLD; i++) {
+ s1.aggregate(i);
+ s2.aggregate(i + HyperLogLog.SPARSE_SKETCH_CONVERSION_THRESHOLD);
+ }
+
+ Sketch<?> result = merger.merge(s1, s2);
+ assertEquals(result.getClass(), NormalSketch.class);
+
+ List<Integer> unionOfSketchData = new ArrayList<>();
+ unionOfSketchData.addAll(s1.data());
+ unionOfSketchData.addAll(s2.data());
+ Integer[] expectedValues = unionOfSketchData.toArray(new Integer[unionOfSketchData.size()]);
+ SketchUtils.assertSketchContains(result, expectedValues);
+ }
+
+ @Test
+ public void requireThatMergingTwoNormalSketchesReturnsNormalSketch() {
+ NormalSketch s1 = SketchUtils.createNormalSketch(1);
+ NormalSketch s2 = SketchUtils.createNormalSketch(2);
+
+ Sketch<?> result = merger.merge(s1, s2);
+ assertEquals(result.getClass(), NormalSketch.class);
+ assertTrue("Should return the instance given by first argument.", result == s1);
+ SketchUtils.assertSketchContains(result, 1, 2);
+ }
+
+ @Test
+ public void requireThatMergingNormalAndSparseSketchReturnsNormalSketch() {
+ SparseSketch s1 = SketchUtils.createSparseSketch(1);
+ NormalSketch s2 = SketchUtils.createNormalSketch(2);
+
+ Sketch<?> result = merger.merge(s1, s2);
+ assertEquals(result.getClass(), NormalSketch.class);
+ assertTrue("Should return the NormalSketch instance given by the arguments.", result == s2);
+ SketchUtils.assertSketchContains(result, 1, 2);
+ }
+} \ No newline at end of file
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchUtils.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchUtils.java
new file mode 100644
index 00000000000..90098f8c950
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchUtils.java
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation.hll;
+
+import java.util.Arrays;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Utility class for creating sketches and comparing their content.
+ *
+ * @author bjorncs
+ */
+public class SketchUtils {
+
+ private SketchUtils() {}
+
+ public static SparseSketch createSparseSketch(Integer... values) {
+ SparseSketch sketch = new SparseSketch();
+ sketch.aggregate(Arrays.asList(values));
+ return sketch;
+ }
+
+ public static NormalSketch createNormalSketch(Integer... values) {
+ NormalSketch sketch = new NormalSketch();
+ sketch.aggregate(Arrays.asList(values));
+ return sketch;
+ }
+
+ public static void assertSketchContains(Sketch<?> sketch, Integer... values) {
+ if (sketch instanceof SparseSketch) {
+ assertSparseSketchContains((SparseSketch) sketch, values);
+ } else {
+ assertNormalSketchContains((NormalSketch) sketch, values);
+ }
+ }
+
+ public static void assertNormalSketchContains(NormalSketch sketch, Integer... values) {
+ NormalSketch expectedSketch = createNormalSketch(values);
+ assertEquals(expectedSketch, sketch);
+ }
+
+ public static void assertSparseSketchContains(SparseSketch sketch, Integer... values) {
+ SparseSketch expectedSketch = createSparseSketch(values);
+ assertEquals(expectedSketch, sketch);
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SparseSketchTest.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SparseSketchTest.java
new file mode 100644
index 00000000000..4be0f89514d
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SparseSketchTest.java
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.aggregation.hll;
+
+import com.yahoo.vespa.objects.BufferSerializer;
+import org.junit.Test;
+
+import java.util.HashSet;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class SparseSketchTest {
+
+ @Test
+ public void requireThatMergeDoesSetUnion() {
+ SparseSketch s1 = new SparseSketch();
+ s1.aggregate(42);
+ s1.aggregate(9001);
+
+ SparseSketch s2 = new SparseSketch();
+ s2.aggregate(1337);
+ s2.aggregate(9001);
+
+ s1.merge(s2);
+
+ HashSet<Integer> data = s1.data();
+ assertEquals(3, s1.size());
+ assertTrue(data.contains(42));
+ assertTrue(data.contains(1337));
+ assertTrue(data.contains(9001));
+ }
+
+
+ @Test
+ public void requireThatSerializationRetainAllData() {
+ SparseSketch from = new SparseSketch();
+ from.aggregate(42);
+ from.aggregate(1337);
+
+ SparseSketch to = new SparseSketch();
+
+ BufferSerializer buffer = new BufferSerializer();
+ from.serialize(buffer);
+ buffer.flip();
+ to.deserialize(buffer);
+
+ assertEquals(from, to);
+ }
+
+ @Test
+ public void requireThatEqualsComparesDataContent() {
+ SparseSketch s1 = new SparseSketch();
+ s1.aggregate(1337);
+ s1.aggregate(42);
+
+ SparseSketch s2 = new SparseSketch();
+ s2.aggregate(42);
+ s2.aggregate(1337);
+
+ assertEquals(s1.data(), s2.data());
+ }
+} \ No newline at end of file
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/ExpressionTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/ExpressionTestCase.java
new file mode 100755
index 00000000000..2c5e65c03e4
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/ExpressionTestCase.java
@@ -0,0 +1,932 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.io.GrowableByteBuffer;
+import com.yahoo.text.Utf8;
+import com.yahoo.vespa.objects.BufferSerializer;
+import com.yahoo.vespa.objects.Identifiable;
+import junit.framework.TestCase;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+/**
+ * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a>
+ */
+public class ExpressionTestCase extends TestCase {
+
+ public void testRangeBucketPreDefFunctionNode() {
+ assertMultiArgFunctionNode(new RangeBucketPreDefFunctionNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")), new AttributeNode("foo")));
+ assertEquals(new RangeBucketPreDefFunctionNode(), new RangeBucketPreDefFunctionNode());
+ assertEquals(new RangeBucketPreDefFunctionNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")), new AttributeNode("foo")),
+ new RangeBucketPreDefFunctionNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")), new AttributeNode("foo")));
+ assertNotEquals(new RangeBucketPreDefFunctionNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")), new AttributeNode("foo")),
+ new RangeBucketPreDefFunctionNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "21")), new AttributeNode("foo")));
+ assertNotEquals(new RangeBucketPreDefFunctionNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")), new AttributeNode("foo")),
+ new RangeBucketPreDefFunctionNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")), new AttributeNode("bar")));
+ }
+
+ public void testFixedWidthBucketFunctionNode() {
+ assertMultiArgFunctionNode(new FixedWidthBucketFunctionNode());
+ assertEquals(new FixedWidthBucketFunctionNode(), new FixedWidthBucketFunctionNode());
+ assertEquals(new FixedWidthBucketFunctionNode(new IntegerResultNode(5), new AttributeNode("foo")),
+ new FixedWidthBucketFunctionNode(new IntegerResultNode(5), new AttributeNode("foo")));
+ assertNotEquals(new FixedWidthBucketFunctionNode(new IntegerResultNode(5), new AttributeNode("foo")),
+ new FixedWidthBucketFunctionNode(new IntegerResultNode(6), new AttributeNode("foo")));
+ assertNotEquals(new FixedWidthBucketFunctionNode(new IntegerResultNode(5), new AttributeNode("foo")),
+ new FixedWidthBucketFunctionNode(new IntegerResultNode(5), new AttributeNode("bar")));
+ }
+
+ public void testIntegerBucketResultNodeVector() {
+ assertResultNode(new IntegerBucketResultNodeVector().add(new IntegerBucketResultNode(10, 20)));
+ assertEquals(new IntegerBucketResultNodeVector().add(new IntegerBucketResultNode(10, 20)),
+ new IntegerBucketResultNodeVector().add(new IntegerBucketResultNode(10, 20)));
+ assertNotEquals(new IntegerBucketResultNodeVector().add(new IntegerBucketResultNode(10, 20)),
+ new IntegerBucketResultNodeVector());
+ assertNotEquals(new IntegerBucketResultNodeVector().add(new IntegerBucketResultNode(10, 20)),
+ new IntegerBucketResultNodeVector().add(new IntegerBucketResultNode(11, 20)));
+ }
+
+ public void testFloatBucketResultNodeVector() {
+ assertResultNode(new FloatBucketResultNodeVector().add(new FloatBucketResultNode(10, 20)));
+ assertEquals(new FloatBucketResultNodeVector().add(new FloatBucketResultNode(10, 20)),
+ new FloatBucketResultNodeVector().add(new FloatBucketResultNode(10, 20)));
+ assertNotEquals(new FloatBucketResultNodeVector().add(new FloatBucketResultNode(10, 20)),
+ new FloatBucketResultNodeVector());
+ assertNotEquals(new FloatBucketResultNodeVector().add(new FloatBucketResultNode(10, 20)),
+ new FloatBucketResultNodeVector().add(new FloatBucketResultNode(11, 20)));
+ }
+
+ public void testStringBucketResultNodeVector() {
+ assertResultNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")));
+ assertEquals(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")),
+ new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")));
+ assertNotEquals(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")),
+ new StringBucketResultNodeVector());
+ assertNotEquals(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")),
+ new StringBucketResultNodeVector().add(new StringBucketResultNode("11", "20")));
+ }
+
+ public void testIntegerBucketResultNode() {
+ assertResultNode(new IntegerBucketResultNode(10, 20));
+ assertEquals(new IntegerBucketResultNode(10, 20), new IntegerBucketResultNode(10, 20));
+ assertNotEquals(new IntegerBucketResultNode(10, 20), new IntegerBucketResultNode(11, 20));
+ assertNotEquals(new IntegerBucketResultNode(10, 20), new IntegerBucketResultNode(10, 21));
+ }
+
+ public void testFloatBucketResultNode() {
+ assertResultNode(new FloatBucketResultNode(10.0, 20.0));
+ assertEquals(new FloatBucketResultNode(10.0, 20.0), new FloatBucketResultNode(10.0, 20.0));
+ assertNotEquals(new FloatBucketResultNode(10.0, 20.0), new FloatBucketResultNode(11.0, 20.0));
+ assertNotEquals(new FloatBucketResultNode(10.0, 20.0), new FloatBucketResultNode(10.0, 21.0));
+ }
+
+ public void testStringBucketResultNode() {
+ assertResultNode(new StringBucketResultNode("10.0", "20.0"));
+ assertEquals(new StringBucketResultNode("10.0", "20.0"), new StringBucketResultNode("10.0", "20.0"));
+ assertNotEquals(new StringBucketResultNode("10.0", "20.0"), new StringBucketResultNode("11.0", "20.0"));
+ assertNotEquals(new StringBucketResultNode("10.0", "20.0"), new StringBucketResultNode("10.0", "21.0"));
+ compare(new StringBucketResultNode("10.0", "20.0"), new StringBucketResultNode("10.0", "21.0"), new StringBucketResultNode("10.0", "22.0"));
+ compare(new StringBucketResultNode("10.0", "20.0"), new StringBucketResultNode("11.0", "19.0"), new StringBucketResultNode("11.0", "20.0"));
+ compare(new StringBucketResultNode(StringResultNode.getNegativeInfinity(), new StringResultNode("20.0")),
+ new StringBucketResultNode("11.0", "19.0"), new StringBucketResultNode("11.0", "20.0"));
+ compare(new StringBucketResultNode(StringResultNode.getNegativeInfinity(), new StringResultNode("20.0")),
+ new StringBucketResultNode(StringResultNode.getNegativeInfinity(), new StringResultNode("21.0")),
+ new StringBucketResultNode("11.0", "20.0"));
+ compare(new StringBucketResultNode("10.0", "20.0"), new StringBucketResultNode("10.0", "21.0"),
+ new StringBucketResultNode(new StringResultNode("10.0"), StringResultNode.getPositiveInfinity()));
+ compare(new StringBucketResultNode(new StringResultNode("10.0"), StringResultNode.getPositiveInfinity()),
+ new StringBucketResultNode("11.0", "19.0"), new StringBucketResultNode("11.0", "20.0"));
+ }
+
+ public void testPositiveInfinity() {
+ PositiveInfinityResultNode inf = new PositiveInfinityResultNode();
+ PositiveInfinityResultNode inf2 = new PositiveInfinityResultNode();
+ assertResultNode(inf);
+ assertEquals(inf, inf2);
+ }
+
+ public void testAddFunctionNode() {
+ assertMultiArgFunctionNode(new AddFunctionNode());
+ assertFunctionNode(new AddFunctionNode().addArg(new ConstantNode(new IntegerResultNode(2)))
+ .addArg(new ConstantNode(new IntegerResultNode(3))),
+ 5, 5.0, "5", longAsRaw(5));
+ assertFunctionNode(new AddFunctionNode().addArg(new ConstantNode(new FloatResultNode(3.0)))
+ .addArg(new ConstantNode(new IntegerResultNode(2))),
+ 5, 5.0, "5.0", doubleAsRaw(5.0));
+ assertFunctionNode(new AddFunctionNode().addArg(new ConstantNode(new IntegerResultNode(3)))
+ .addArg(new ConstantNode(new FloatResultNode(2.0))),
+ 5, 5.0, "5.0", doubleAsRaw(5.0));
+ }
+
+ public void testAndFunctionNode() {
+ assertMultiArgFunctionNode(new AndFunctionNode());
+ assertFunctionNode(new AndFunctionNode().addArg(new ConstantNode(new IntegerResultNode(3)))
+ .addArg(new ConstantNode(new IntegerResultNode(7))),
+ 3, 3.0, "3", longAsRaw(3));
+ }
+
+ public void testZCurveFunctionNode() {
+ assertMultiArgFunctionNode(
+ new ZCurveFunctionNode(new ConstantNode(new IntegerResultNode(7)), ZCurveFunctionNode.Dimension.Y));
+ }
+
+ public void testTimeStampFunctionNode() {
+ assertMultiArgFunctionNode(new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Hour, true));
+ assertEquals(new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Hour, true),
+ new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Hour, true));
+ assertNotEquals(
+ new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Hour,
+ true),
+ new TimeStampFunctionNode(new AttributeNode("testattributt"), TimeStampFunctionNode.TimePart.Hour,
+ true));
+ assertNotEquals(
+ new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Hour,
+ true),
+ new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Year,
+ true));
+ assertNotEquals(
+ new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Hour,
+ true),
+ new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Hour,
+ false));
+ }
+
+ public void testExpressionRefNode() {
+ AggregationRefNode ref = new AggregationRefNode(3);
+ assertEquals(3, ref.getIndex());
+ }
+
+ public void testAttributeNode() {
+ try {
+ new AttributeNode(null);
+ fail("Should not be able to set null attribute name.");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ try {
+ new AttributeNode().setAttributeName(null);
+ fail("Should not be able to set null attribute name.");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ try {
+ new AttributeNode().prepare();
+ fail("Should not be possible to prepare or execute attribute node");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ try {
+ new AttributeNode().execute();
+ fail("Should not be possible to prepare or execute attribute node");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ AttributeNode a = new AttributeNode("testattribute");
+ assertEquals("testattribute", a.getAttributeName());
+ AttributeNode b = (AttributeNode)assertSerialize(a);
+ assertEquals("testattribute", b.getAttributeName());
+ AttributeNode c = new AttributeNode("testattribute");
+ assertEquals(b, c);
+ c.setAttributeName("fail");
+ assertFalse(b.equals(c));
+ }
+
+ public void testInterpolatedLookupNode() {
+ ExpressionNode argA = new ConstantNode(new FloatResultNode(2.71828182846));
+ ExpressionNode argB = new ConstantNode(new FloatResultNode(3.14159265359));
+ try {
+ new InterpolatedLookupNode(null, argA);
+ fail("Should not be able to set null attribute name.");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ try {
+ new InterpolatedLookupNode().setAttributeName(null);
+ fail("Should not be able to set null attribute name.");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ try {
+ new InterpolatedLookupNode().prepare();
+ fail("Should not be possible to prepare or execute interpolatedlookup node");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ try {
+ new InterpolatedLookupNode().execute();
+ fail("Should not be possible to prepare or execute interpolatedlookup node");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ ExpressionNode a1 = new InterpolatedLookupNode().setAttributeName("foo").addArg(argA);
+ InterpolatedLookupNode a2 = new InterpolatedLookupNode("foo", argA);
+ assertEquals("foo", ((InterpolatedLookupNode)a1).getAttributeName());
+ assertEquals("foo", a2.getAttributeName());
+ assertEquals(argA, ((InterpolatedLookupNode)a1).getArg());
+ assertEquals(argA, a2.getArg());
+ assertEquals(a1, a2);
+ InterpolatedLookupNode b1 = new InterpolatedLookupNode("foo", argB);
+ InterpolatedLookupNode b2 = new InterpolatedLookupNode("bar", argA);
+ assertFalse(a1.equals(b1));
+ assertFalse(a1.equals(b2));
+ assertFalse(a2.equals(b1));
+ assertFalse(a2.equals(b2));
+ a2.setAttributeName("fail");
+ assertFalse(a1.equals(a2));
+ }
+
+ public void testCatFunctionNode() {
+ assertMultiArgFunctionNode(new CatFunctionNode());
+ assertFunctionNode(new CatFunctionNode().addArg(new ConstantNode(new RawResultNode(asRaw('1', '2'))))
+ .addArg(new ConstantNode(new RawResultNode(asRaw('3', '4')))),
+ 0, 0.0, "1234", asRaw('1', '2', '3', '4'));
+ }
+
+ public void testStrCatFunctionNode() {
+ assertMultiArgFunctionNode(new StrCatFunctionNode());
+ assertFunctionNode(new StrCatFunctionNode().addArg(new ConstantNode(new StringResultNode("foo")))
+ .addArg(new ConstantNode(new StringResultNode("bar"))),
+ 0, 0.0, "foobar", stringAsRaw("foobar"));
+ }
+
+ public void testDivideFunctionNode() {
+ assertMultiArgFunctionNode(new DivideFunctionNode());
+ assertFunctionNode(new DivideFunctionNode().addArg(new ConstantNode(new IntegerResultNode(10)))
+ .addArg(new ConstantNode(new IntegerResultNode(2))),
+ 5, 5.0, "5", longAsRaw(5));
+ assertFunctionNode(new DivideFunctionNode().addArg(new ConstantNode(new IntegerResultNode(6)))
+ .addArg(new ConstantNode(new FloatResultNode(2.0))),
+ 3, 3.0, "3.0", doubleAsRaw(3.0));
+ assertFunctionNode(new DivideFunctionNode().addArg(new ConstantNode(new IntegerResultNode(6)))
+ .addArg(new ConstantNode(new FloatResultNode(12.0))),
+ 1, 0.5, "0.5", doubleAsRaw(0.5));
+ }
+
+ public void testDocumentFieldNode() {
+ try {
+ new DocumentFieldNode(null);
+ fail("Should not be able to set null field name.");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ try {
+ new DocumentFieldNode().setDocumentFieldName(null);
+ fail("Should not be able to set null field name.");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ try {
+ new DocumentFieldNode("foo").prepare();
+ fail("Should not be able to prepare documentfieldnode");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ try {
+ new DocumentFieldNode("foo").execute();
+ fail("Should not be able to execute documentfieldnode");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ DocumentFieldNode a = new DocumentFieldNode("testdocumentfield");
+ assertEquals("testdocumentfield", a.getDocumentFieldName());
+ DocumentFieldNode b = (DocumentFieldNode)assertSerialize(a);
+ assertEquals("testdocumentfield", b.getDocumentFieldName());
+ DocumentFieldNode c = new DocumentFieldNode("testdocumentfield");
+ assertEquals(b, c);
+ c.setDocumentFieldName("fail");
+ assertFalse(b.equals(c));
+ }
+
+ public void testFloatResultNode() {
+ FloatResultNode a = new FloatResultNode(7.3);
+ assertEquals(a.getInteger(), 7);
+ assertEquals(a.getFloat(), 7.3);
+ assertEquals(a.getString(), "7.3");
+ assertEquals(a.getNumber(), new Double(7.3));
+ byte[] raw = a.getRaw();
+ assertEquals(raw.length, 8);
+ assertResultNode(a);
+ compare(new FloatResultNode(-1), new FloatResultNode(0), new FloatResultNode(1));
+ a.set(new FloatResultNode(4));
+ assertResultNode(a);
+
+ FloatResultNode b = new FloatResultNode(7.5);
+ assertEquals(b.getInteger(), 8);
+ assertEquals(b.getFloat(), 7.5);
+ assertEquals(b.getString(), "7.5");
+ assertEquals(b.getNumber(), new Double(7.5));
+ }
+
+ public void testGetDocIdNamespaceSpecificFunctionNode() {
+ GetDocIdNamespaceSpecificFunctionNode a = new GetDocIdNamespaceSpecificFunctionNode(new IntegerResultNode(7));
+ assertTrue(a.getResult() instanceof IntegerResultNode);
+ GetDocIdNamespaceSpecificFunctionNode b = (GetDocIdNamespaceSpecificFunctionNode)assertSerialize(a);
+ assertTrue(b.getResult() instanceof IntegerResultNode);
+ assertEquals(7, b.getResult().getInteger());
+ GetDocIdNamespaceSpecificFunctionNode c = new GetDocIdNamespaceSpecificFunctionNode(new IntegerResultNode(7));
+ assertEquals(b, c);
+ try {
+ new GetDocIdNamespaceSpecificFunctionNode(new IntegerResultNode(7)).prepare();
+ fail("Should not be able to prepare documentfieldnode");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ try {
+ new GetDocIdNamespaceSpecificFunctionNode(new IntegerResultNode(7)).execute();
+ fail("Should not be able to execute documentfieldnode");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ }
+
+ public void testGetYMUMChecksumFunctionNode() {
+ GetYMUMChecksumFunctionNode a = new GetYMUMChecksumFunctionNode();
+ assertTrue(a.getResult() instanceof IntegerResultNode);
+ assertSerialize(a);
+ try {
+ new GetYMUMChecksumFunctionNode().prepare();
+ fail("Should not be able to prepare documentfieldnode");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ try {
+ new GetYMUMChecksumFunctionNode().execute();
+ fail("Should not be able to execute documentfieldnode");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ }
+
+ public void testIntegerResultNode() {
+ IntegerResultNode a = new IntegerResultNode(7);
+ assertEquals(a.getInteger(), 7);
+ assertEquals(a.getFloat(), 7.0);
+ assertEquals(a.getString(), "7");
+ assertEquals(a.getNumber(), new Long(7));
+ byte[] raw = a.getRaw();
+ assertEquals(raw.length, 8);
+ assertResultNode(a);
+ compare(new IntegerResultNode(-1), new IntegerResultNode(0), new IntegerResultNode(1));
+ compare(new IntegerResultNode(-1), new IntegerResultNode(0), new IntegerResultNode(0x80000000L));
+ }
+
+ public void testMaxFunctionNode() {
+ assertMultiArgFunctionNode(new MaxFunctionNode());
+ assertFunctionNode(new MaxFunctionNode().addArg(new ConstantNode(new IntegerResultNode(3)))
+ .addArg(new ConstantNode(new IntegerResultNode(5))),
+ 5, 5.0, "5", longAsRaw(5));
+ assertFunctionNode(new MaxFunctionNode().addArg(new ConstantNode(new FloatResultNode(4.9999999)))
+ .addArg(new ConstantNode(new IntegerResultNode(5))),
+ 5, 5.0, "5.0", doubleAsRaw(5.0));
+ }
+
+ public void testMD5BitFunctionNode() {
+ try {
+ new MD5BitFunctionNode(null, 64);
+ fail("Should not be able to set null argument.");
+ } catch (NullPointerException e) {
+ // expected
+ }
+ try {
+ new MD5BitFunctionNode().prepare();
+ fail("Should not be able to run prepare.");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ try {
+ new MD5BitFunctionNode().execute();
+ fail("Should not be able to run execute.");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ assertUnaryBitFunctionNode(new MD5BitFunctionNode());
+ }
+
+ public void testMinFunctionNode() {
+ assertMultiArgFunctionNode(new MinFunctionNode());
+ assertFunctionNode(new MinFunctionNode().addArg(new ConstantNode(new IntegerResultNode(3)))
+ .addArg(new ConstantNode(new IntegerResultNode(5))),
+ 3, 3.0, "3", longAsRaw(3));
+ assertFunctionNode(new MinFunctionNode().addArg(new ConstantNode(new FloatResultNode(4.9999999)))
+ .addArg(new ConstantNode(new IntegerResultNode(5))),
+ 5, 4.9999999, "4.9999999", doubleAsRaw(4.9999999));
+ }
+
+ public void testModuloFunctionNode() {
+ assertMultiArgFunctionNode(new ModuloFunctionNode());
+ assertFunctionNode(new ModuloFunctionNode().addArg(new ConstantNode(new IntegerResultNode(13)))
+ .addArg(new ConstantNode(new IntegerResultNode(5))),
+ 3, 3.0, "3", longAsRaw(3));
+ assertFunctionNode(new ModuloFunctionNode().addArg(new ConstantNode(new FloatResultNode(4.9999999)))
+ .addArg(new ConstantNode(new IntegerResultNode(5))),
+ 5, 4.9999999, "4.9999999", doubleAsRaw(4.9999999));
+ }
+
+ public void testMultiplyFunctionNode() {
+ assertMultiArgFunctionNode(new MultiplyFunctionNode());
+ assertFunctionNode(new MultiplyFunctionNode().addArg(new ConstantNode(new IntegerResultNode(3)))
+ .addArg(new ConstantNode(new IntegerResultNode(5))),
+ 15, 15.0, "15", longAsRaw(15));
+ assertFunctionNode(new MultiplyFunctionNode().addArg(new ConstantNode(new FloatResultNode(4.5)))
+ .addArg(new ConstantNode(new IntegerResultNode(5))),
+ 23, 22.5, "22.5", doubleAsRaw(22.5));
+ }
+
+ public void testNegateFunctionNode() {
+ assertMultiArgFunctionNode(new NegateFunctionNode());
+ assertFunctionNode(new NegateFunctionNode().addArg(new ConstantNode(new IntegerResultNode(3))),
+ -3, -3.0, "-3", longAsRaw(-3));
+ assertFunctionNode(new NegateFunctionNode().addArg(new ConstantNode(new FloatResultNode(3.0))),
+ -3, -3.0, "-3.0", doubleAsRaw(-3.0));
+ }
+
+ public void testSortFunctionNode() {
+ assertMultiArgFunctionNode(new SortFunctionNode());
+
+ }
+
+ public void testReverseFunctionNode() {
+ assertMultiArgFunctionNode(new ReverseFunctionNode());
+ }
+
+ public void testToIntFunctionNode() {
+ assertMultiArgFunctionNode(new ToIntFunctionNode());
+ assertFunctionNode(new ToIntFunctionNode().addArg(new ConstantNode(new StringResultNode("1337"))),
+ 1337, 1337.0, "1337", longAsRaw(1337));
+ }
+
+ public void testToFloatFunctionNode() {
+ assertMultiArgFunctionNode(new ToFloatFunctionNode());
+ assertFunctionNode(new ToFloatFunctionNode().addArg(new ConstantNode(new FloatResultNode(3.14))),
+ 3, 3.14, "3.14", doubleAsRaw(3.14));
+ }
+
+ public void testMathFunctionNode() {
+ assertMultiArgFunctionNode(new MathFunctionNode(MathFunctionNode.Function.LOG10));
+ assertFunctionNode(new MathFunctionNode(MathFunctionNode.Function.LOG10).addArg(new ConstantNode(new IntegerResultNode(100000))),
+ 5, 5.0, "5.0", doubleAsRaw(5.0));
+ }
+
+ public void testStrLenFunctionNode() {
+ assertMultiArgFunctionNode(new StrLenFunctionNode());
+ assertFunctionNode(new StrLenFunctionNode().addArg(new ConstantNode(new StringResultNode("foo"))),
+ 3, 3.0, "3", longAsRaw(3));
+ }
+
+ public void testNormalizeSubjectFunctionNode() {
+ assertMultiArgFunctionNode(new NormalizeSubjectFunctionNode());
+ assertFunctionNode(new NormalizeSubjectFunctionNode().addArg(new ConstantNode(new StringResultNode("Re: Your mail"))),
+ 0, 0, "Your mail", stringAsRaw("Your mail"));
+ }
+
+ public void testNormalizeSubjectFunctionNode2() {
+ assertMultiArgFunctionNode(new NormalizeSubjectFunctionNode());
+ assertFunctionNode(new NormalizeSubjectFunctionNode().addArg(new ConstantNode(new StringResultNode("Your mail"))),
+ 0, 0, "Your mail", stringAsRaw("Your mail"));
+ }
+
+ public void testNumElemFunctionNode() {
+ assertMultiArgFunctionNode(new NumElemFunctionNode());
+ assertFunctionNode(new NumElemFunctionNode().addArg(new ConstantNode(new IntegerResultNode(1337))),
+ 1, 1.0, "1", longAsRaw(1));
+ }
+
+ public void testToStringFunctionNode() {
+ assertMultiArgFunctionNode(new ToStringFunctionNode());
+ assertFunctionNode(new ToStringFunctionNode().addArg(new ConstantNode(new IntegerResultNode(1337))),
+ 1337, 1337.0, "1337", stringAsRaw("1337"));
+ }
+
+ public void testToRawFunctionNode() {
+ assertMultiArgFunctionNode(new ToRawFunctionNode());
+ assertFunctionNode(new ToRawFunctionNode().addArg(new ConstantNode(new IntegerResultNode(1337))),
+ 1337, 1337.0, "1337", longAsRaw(1337));
+ }
+
+ public void testNullResultNode() {
+ // TODO: Implement.
+ }
+
+ public void testOrFunctionNode() {
+ assertMultiArgFunctionNode(new OrFunctionNode());
+ assertFunctionNode(new OrFunctionNode().addArg(new ConstantNode(new IntegerResultNode(2)))
+ .addArg(new ConstantNode(new IntegerResultNode(4))),
+ 6, 6.0, "6", longAsRaw(6));
+ }
+
+ public void testDebugWaitFunctionNode() {
+ assertFunctionNode(
+ new DebugWaitFunctionNode(new OrFunctionNode().addArg(new ConstantNode(new IntegerResultNode(2)))
+ .addArg(new ConstantNode(new IntegerResultNode(4))),
+ 0.01,
+ true),
+ 6, 6.0, "6", longAsRaw(6));
+ DebugWaitFunctionNode n = new DebugWaitFunctionNode(new OrFunctionNode().addArg(new ConstantNode(new IntegerResultNode(2)))
+ .addArg(new ConstantNode(new IntegerResultNode(4))),
+ 0.3,
+ false);
+ n.prepare();
+ long start = System.currentTimeMillis();
+ n.execute();
+ long end = System.currentTimeMillis();
+ assertTrue(end - start > 250);
+
+ DebugWaitFunctionNode n2 = new DebugWaitFunctionNode(new OrFunctionNode().addArg(new ConstantNode(new IntegerResultNode(2)))
+ .addArg(new ConstantNode(new IntegerResultNode(4))),
+ 0.5,
+ true);
+ n2.prepare();
+ start = System.currentTimeMillis();
+ n2.execute();
+ end = System.currentTimeMillis();
+ assertTrue(end - start > 450);
+ }
+
+ public void testRawResultNode() {
+ try {
+ new RawResultNode(null);
+ fail("Should not be able to set null value.");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ try {
+ new RawResultNode().setValue(null);
+ fail("Should not be able to set null value.");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ byte[] b = { '7', '.', '4' };
+ RawResultNode a = new RawResultNode(b);
+ byte[] raw = a.getRaw();
+ assertEquals(raw.length, 3);
+ assertEquals(raw[0], '7');
+ assertEquals(raw[1], '.');
+ assertEquals(raw[2], '4');
+ assertEquals(a.getInteger(), 0);
+ assertEquals(a.getFloat(), 0.0);
+ assertEquals(a.getString(), "7.4");
+ assertResultNode(a);
+ compare(new RawResultNode(), new RawResultNode(new byte [] {'z'}), new RawResultNode(new byte [] {'z', 'z'}));
+ compare(new RawResultNode(new byte [] {'z'}), new RawResultNode(new byte [] {'z', 'z'}), new RawResultNode(new byte [] {'z','z','z'}));
+ compare(new RawResultNode(new byte [] {'z'}), new RawResultNode(new byte [] {'z','z'}), new PositiveInfinityResultNode());
+ byte [] b1 = {0x00};
+ byte [] b2 = {0x07};
+ byte [] b3 = {0x7f};
+ byte [] b4 = {(byte)0x80};
+ byte [] b5 = {(byte)0xb1};
+ byte [] b6 = {(byte)0xff};
+
+ assertEquals(0x00, b1[0]);
+ assertEquals(0x07, b2[0]);
+ assertEquals(0x7f, b3[0]);
+ assertEquals(0x80, ((int)b4[0]) & 0xff);
+ assertEquals(0xb1, ((int)b5[0]) & 0xff);
+ assertEquals(0xff, ((int)b6[0]) & 0xff);
+
+ RawResultNode r1 = new RawResultNode(b1);
+ RawResultNode r2 = new RawResultNode(b2);
+ RawResultNode r3 = new RawResultNode(b3);
+ RawResultNode r4 = new RawResultNode(b4);
+ RawResultNode r5 = new RawResultNode(b5);
+ RawResultNode r6 = new RawResultNode(b6);
+
+ assertTrue(r1.compareTo(r1) == 0);
+ assertTrue(r1.compareTo(r2) < 0);
+ assertTrue(r1.compareTo(r3) < 0);
+ assertTrue(r1.compareTo(r4) < 0);
+ assertTrue(r1.compareTo(r5) < 0);
+ assertTrue(r1.compareTo(r6) < 0);
+
+ assertTrue(r2.compareTo(r1) > 0);
+ assertTrue(r2.compareTo(r2) == 0);
+ assertTrue(r2.compareTo(r3) < 0);
+ assertTrue(r2.compareTo(r4) < 0);
+ assertTrue(r2.compareTo(r5) < 0);
+ assertTrue(r2.compareTo(r6) < 0);
+
+ assertTrue(r3.compareTo(r1) > 0);
+ assertTrue(r3.compareTo(r2) > 0);
+ assertTrue(r3.compareTo(r3) == 0);
+ assertTrue(r3.compareTo(r4) < 0);
+ assertTrue(r3.compareTo(r5) < 0);
+ assertTrue(r3.compareTo(r6) < 0);
+
+ assertTrue(r4.compareTo(r1) > 0);
+ assertTrue(r4.compareTo(r2) > 0);
+ assertTrue(r4.compareTo(r3) > 0);
+ assertTrue(r4.compareTo(r4) == 0);
+ assertTrue(r4.compareTo(r5) < 0);
+ assertTrue(r4.compareTo(r6) < 0);
+
+ assertTrue(r5.compareTo(r1) > 0);
+ assertTrue(r5.compareTo(r2) > 0);
+ assertTrue(r5.compareTo(r3) > 0);
+ assertTrue(r5.compareTo(r4) > 0);
+ assertTrue(r5.compareTo(r5) == 0);
+ assertTrue(r5.compareTo(r6) < 0);
+
+ assertTrue(r6.compareTo(r1) > 0);
+ assertTrue(r6.compareTo(r2) > 0);
+ assertTrue(r6.compareTo(r3) > 0);
+ assertTrue(r6.compareTo(r4) > 0);
+ assertTrue(r6.compareTo(r5) > 0);
+ assertTrue(r6.compareTo(r6) == 0);
+
+ }
+
+ private void compare(ResultNode small, ResultNode medium, ResultNode large) {
+ assertTrue(small.compareTo(medium) < 0);
+ assertTrue(small.compareTo(large) < 0);
+ assertTrue(medium.compareTo(large) < 0);
+ assertTrue(medium.compareTo(small) > 0);
+ assertTrue(large.compareTo(small) > 0);
+ assertTrue(large.compareTo(medium) > 0);
+ assertEquals(0, small.compareTo(small));
+ assertEquals(0, medium.compareTo(medium));
+ assertEquals(0, large.compareTo(large));
+ }
+
+ public void testStringResultNode() {
+ try {
+ new StringResultNode(null);
+ fail("Should not be able to set null value.");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ try {
+ new StringResultNode().setValue(null);
+ fail("Should not be able to set null value.");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ StringResultNode a = new StringResultNode("7.3");
+ assertEquals(a.getInteger(), 0);
+ assertEquals(a.getFloat(), 7.3);
+ assertEquals(a.getString(), "7.3");
+ byte[] raw = a.getRaw();
+ assertEquals(raw.length, 3);
+ assertResultNode(a);
+ compare(new StringResultNode(), new StringResultNode("z"), new StringResultNode("zz"));
+ compare(new StringResultNode("z"), new StringResultNode("zz"), new StringResultNode("zzz"));
+ compare(new StringResultNode("a"), new StringResultNode("zz"), new PositiveInfinityResultNode());
+ }
+
+ public void testXorBitFunctionNode() {
+ try {
+ new XorBitFunctionNode(null, 64);
+ fail("Should not be able to set null argument.");
+ } catch (NullPointerException e) {
+ // expected
+ }
+ try {
+ new XorBitFunctionNode().prepare();
+ fail("Should not be able to run prepare.");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ try {
+ new XorBitFunctionNode().execute();
+ fail("Should not be able to run execute.");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ assertUnaryBitFunctionNode(new XorBitFunctionNode());
+ }
+
+ public void testUcaFunctionNode() {
+ try {
+ new UcaFunctionNode(null, "foo");
+ fail("Should not be able to set null argument.");
+ } catch (NullPointerException e) {
+ // expected
+ }
+ try {
+ new UcaFunctionNode().prepare();
+ fail("Should not be able to run prepare.");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ try {
+ new UcaFunctionNode().execute();
+ fail("Should not be able to run execute.");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ assertUcaFunctionNode(new UcaFunctionNode(new ConstantNode(new IntegerResultNode(1337)), "foo", "bar"));
+ }
+
+ public void testNestedFunctions() {
+ assertFunctionNode(new AddFunctionNode()
+ .addArg(new MultiplyFunctionNode().addArg(new ConstantNode(new IntegerResultNode(3)))
+ .addArg(new ConstantNode(
+ new StringResultNode("4"))))
+ .addArg(new ConstantNode(new FloatResultNode(2.0))),
+ 14, 14.0, "14.0", doubleAsRaw(14.0));
+ }
+
+ public void testArithmeticNodes() {
+ ExpressionNode i1 = new ConstantNode(new IntegerResultNode(1));
+ ExpressionNode i2 = new ConstantNode(new IntegerResultNode(2));
+ ExpressionNode f2 = new ConstantNode(new FloatResultNode(9.9));
+ ExpressionNode s2 = new ConstantNode(new StringResultNode("2"));
+ ExpressionNode r2 = new ConstantNode(new RawResultNode(asRaw(2)));
+
+ AddFunctionNode add1 = new AddFunctionNode();
+ add1.addArg(i1).addArg(i2);
+ ExpressionNode exp1 = add1;
+ exp1.prepare();
+ assertTrue(exp1.getResult() instanceof IntegerResultNode);
+ assertTrue(exp1.execute());
+ assertEquals(exp1.getResult().getInteger(), 3);
+ assertTrue(exp1.execute());
+ assertEquals(exp1.getResult().getInteger(), 3);
+
+ AddFunctionNode add2 = new AddFunctionNode();
+ add2.addArg(i1);
+ add2.addArg(f2);
+ add2.prepare();
+ assertTrue(add2.getResult() instanceof FloatResultNode);
+
+ AddFunctionNode add3 = new AddFunctionNode();
+ add3.addArg(i1);
+ add3.addArg(s2);
+ add3.prepare();
+ assertTrue(add3.getResult() instanceof IntegerResultNode);
+
+ AddFunctionNode add4 = new AddFunctionNode();
+ add4.addArg(i1);
+ add4.addArg(r2);
+ add4.prepare();
+ assertTrue(add4.getResult() instanceof IntegerResultNode);
+ }
+
+ public void testArithmeticOperations() {
+ ExpressionNode i1 = new ConstantNode(new IntegerResultNode(1793253241));
+ ExpressionNode i2 = new ConstantNode(new IntegerResultNode(1676521321));
+ ExpressionNode f1 = new ConstantNode(new FloatResultNode(1.1109876));
+ ExpressionNode f2 = new ConstantNode(new FloatResultNode(9.767681239));
+
+ assertAdd(i1, i2, 3469774562l, 3469774562l);
+ assertAdd(i1, f2, 1793253251l, 1793253250.767681239);
+ assertAdd(f1, f2, 11, 10.878668839);
+ assertMultiply(i1, i2, 3006427292488851361l, 3006427292488851361l);
+ assertMultiply(i1, f2, 17515926039l, 1793253241.0 * 9.767681239);
+ assertMultiply(f1, f2, 11, 10.8517727372816364);
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // Everything below this point is helper functions.
+ //
+ // --------------------------------------------------------------------------------
+ private static void assertNotEquals(Object lhs, Object rhs) {
+ assertFalse(lhs.equals(rhs));
+ }
+
+ private static void assertUcaFunctionNode(UcaFunctionNode node) {
+ UcaFunctionNode obj = node.clone();
+ assertEquals(obj, node);
+ assertMultiArgFunctionNode((UcaFunctionNode)Identifiable.createFromId(node.getClassId()));
+ }
+
+ public byte[] asRaw(int ... extra) {
+ byte[] mybytes = new byte[extra.length];
+ for (int i = 0; i < mybytes.length; i++) {
+ mybytes[i] = (byte)extra[i];
+ }
+ return mybytes;
+ }
+
+ public byte[] longAsRaw(long value) {
+ return ByteBuffer.allocate(8).putLong(value).array();
+ }
+
+ public byte[] doubleAsRaw(double value) {
+ return ByteBuffer.allocate(8).putDouble(value).array();
+ }
+
+ public byte[] stringAsRaw(String value) {
+ return Utf8.toBytes(value);
+ }
+
+ private static void assertUnaryBitFunctionNode(UnaryBitFunctionNode node) {
+ UnaryBitFunctionNode obj = (UnaryBitFunctionNode)node.clone();
+ assertEquals(obj, node);
+
+ obj.setNumBits(obj.getNumBits() + 1);
+ assertFalse(obj.equals(node));
+
+ assertMultiArgFunctionNode((UnaryBitFunctionNode)Identifiable.createFromId(node.getClassId()));
+ }
+
+ private static void assertMultiArgFunctionNode(MultiArgFunctionNode node) {
+ try {
+ node.addArg(null);
+ fail("Should not be able to add a null argument.");
+ } catch (NullPointerException e) {
+ // expected
+ }
+ int initialSz = node.getNumArgs();
+ node.addArg(new ConstantNode(new IntegerResultNode(69)));
+ assertEquals(1+initialSz, node.getNumArgs());
+ node.addArg(new ConstantNode(new IntegerResultNode(6699)));
+ assertEquals(2+initialSz, node.getNumArgs());
+ node.addArg(new ConstantNode(new IntegerResultNode(666999)));
+ assertEquals(3+initialSz, node.getNumArgs());
+
+ MultiArgFunctionNode obj = (MultiArgFunctionNode)assertSerialize(node);
+ assertEquals(node, obj);
+ assertEquals(node.getNumArgs(), obj.getNumArgs());
+ for (int i = 0, len = node.getNumArgs(); i < len; i++) {
+ assertEquals(node.getArg(i), obj.getArg(i));
+ }
+
+ obj.addArg(new ConstantNode(new IntegerResultNode(69)));
+ assertFalse(node.equals(obj));
+ }
+
+ public void assertAdd(ExpressionNode arg1, ExpressionNode arg2, long lexpected, double dexpected) {
+ assertArith(new AddFunctionNode(), arg1, arg2, lexpected, dexpected);
+ }
+
+ public void assertMultiply(ExpressionNode arg1, ExpressionNode arg2, long lexpected, double dexpected) {
+ assertArith(new MultiplyFunctionNode(), arg1, arg2, lexpected, dexpected);
+ }
+
+ public void assertArith(MultiArgFunctionNode node, ExpressionNode arg1, ExpressionNode arg2, long lexpected, double dexpected) {
+ node.addArg(arg1);
+ node.addArg(arg2);
+ node.prepare();
+ node.execute();
+ assertEquals(lexpected, node.getResult().getInteger());
+ assertEquals(dexpected, node.getResult().getFloat());
+ }
+
+ public void assertFunctionNode(FunctionNode node, long lexpected, double dexpected, String sexpected, byte[] rexpected) {
+ node.prepare();
+ node.execute();
+ assertEquals(lexpected, node.getResult().getInteger());
+ assertEquals(dexpected, node.getResult().getFloat());
+ assertEquals(sexpected, node.getResult().getString());
+ assertTrue(Arrays.equals(rexpected, node.getResult().getRaw()));
+ }
+
+ private static void assertResultNode(ResultNode node) {
+ BufferSerializer buf = new BufferSerializer(new GrowableByteBuffer());
+ long oldInteger = node.getInteger();
+ double oldFloat = node.getFloat();
+ String oldString = node.getString();
+ byte[] oldRaw = node.getRaw();
+ node.serialize(buf);
+ buf.flip();
+ node.deserialize(buf);
+ assertEquals(oldInteger, node.getInteger());
+ assertEquals(oldFloat, node.getFloat());
+ assertEquals(oldString, node.getString());
+ assertEquals(oldRaw.length, node.getRaw().length);
+
+ buf = new BufferSerializer(new GrowableByteBuffer());
+ node.serializeWithId(buf);
+ buf.flip();
+ node.deserializeWithId(buf);
+ assertEquals(oldInteger, node.getInteger());
+ assertEquals(oldFloat, node.getFloat());
+ assertEquals(oldString, node.getString());
+ assertEquals(oldRaw.length, node.getRaw().length);
+
+ buf = new BufferSerializer(new GrowableByteBuffer());
+ node.serializeWithId(buf);
+ buf.flip();
+ ResultNode obj = (ResultNode)Identifiable.create(buf);
+ assertEquals(oldInteger, obj.getInteger());
+ assertEquals(oldFloat, obj.getFloat());
+ assertEquals(oldString, obj.getString());
+ assertEquals(oldRaw.length, obj.getRaw().length);
+
+ assertSerialize(node);
+ }
+
+ private static Identifiable assertSerialize(Identifiable node) {
+ BufferSerializer buf = new BufferSerializer(new GrowableByteBuffer());
+ node.serializeWithId(buf);
+ buf.flip();
+ Identifiable created = Identifiable.create(buf);
+ assertEquals(node, created);
+ assertEquals(buf.getBuf().hasRemaining(), false);
+ Identifiable cloned = created.clone();
+ assertEquals(node, cloned);
+ BufferSerializer createdBuffer = new BufferSerializer(new GrowableByteBuffer());
+ BufferSerializer clonedBuffer = new BufferSerializer(new GrowableByteBuffer());
+ created.serializeWithId(createdBuffer);
+ cloned.serializeWithId(clonedBuffer);
+ assertEquals(createdBuffer.getBuf().limit(), clonedBuffer.getBuf().limit());
+ assertEquals(createdBuffer.position(), clonedBuffer.position());
+ createdBuffer.getBuf().flip();
+ clonedBuffer.getBuf().flip();
+ for (int i = 0; i < createdBuffer.getBuf().limit(); i++) {
+ assertEquals(createdBuffer.getBuf().get(), clonedBuffer.getBuf().get());
+ }
+ return created;
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionTestCase.java
new file mode 100644
index 00000000000..4836c9c05d2
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionTestCase.java
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertSame;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class FixedWidthBucketFunctionTestCase {
+
+ @Test
+ public void requireThatAccessorsWork() {
+ ExpressionNode arg = new AttributeNode("foo");
+ NumericResultNode width = new IntegerResultNode(69L);
+ FixedWidthBucketFunctionNode node = new FixedWidthBucketFunctionNode(width, arg);
+ assertSame(arg, node.getArg());
+ assertSame(width, node.getWidth());
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/FloatBucketResultNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/FloatBucketResultNodeTestCase.java
new file mode 100644
index 00000000000..a1255db4536
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/FloatBucketResultNodeTestCase.java
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import org.junit.Test;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.*;
+
+/**
+ * @author lulf
+ * @since 5.1
+ */
+public class FloatBucketResultNodeTestCase extends ResultNodeTest {
+ @Test
+ public void testEmpty() {
+ final double val = 3.14;
+ final FloatBucketResultNode node = createNode(val, val);
+ assertTrue(node.empty());
+ assertCorrectSerialization(node, new FloatBucketResultNode());
+ }
+
+ @Test
+ public void testRange() {
+ FloatBucketResultNode bucket = createNode(3.14, 6.9);
+ assertFalse(bucket.empty());
+ assertEquals(bucket.getFrom(), 3.14, 0.01);
+ assertEquals(bucket.getTo(), 6.9, 0.01);
+ assertCorrectSerialization(bucket, new FloatBucketResultNode());
+ assertTrue(dumpNode(bucket).contains("from: 3.14"));
+ assertTrue(dumpNode(bucket).contains("to: 6.9"));
+ }
+
+ private FloatBucketResultNode createNode(double from, double to) {
+ return new FloatBucketResultNode(from, to);
+ }
+
+ @Test
+ public void testCmp() {
+ assertOrder(createNode(6, 9), createNode(7, 9), createNode(8, 9));
+ assertOrder(createNode(6, 7), createNode(6, 8), createNode(6, 9));
+ assertOrder(createNode(6, 3), createNode(7, 2), createNode(8, 1));
+ assertTrue(createNode(6, 8).onCmp(new NullResultNode()) != 0);
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/ForceLoadTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/ForceLoadTestCase.java
new file mode 100755
index 00000000000..e1bfe321619
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/ForceLoadTestCase.java
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+public class ForceLoadTestCase extends junit.framework.TestCase {
+
+ public ForceLoadTestCase(String name) {
+ super(name);
+ }
+
+ public void testLoadClasses() {
+ try {
+ new com.yahoo.searchlib.expression.ForceLoad();
+ assertTrue(com.yahoo.searchlib.expression.ForceLoad.forceLoad());
+ } catch (com.yahoo.system.ForceLoadError e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeTestCase.java
new file mode 100644
index 00000000000..a7517952703
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeTestCase.java
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.BufferSerializer;
+import org.junit.Test;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author lulf
+ * @since 5.1
+ */
+public class IntegerBucketResultNodeTestCase extends ResultNodeTest {
+
+ @Test
+ public void testEmptyRange() {
+ IntegerBucketResultNode bucket = new IntegerBucketResultNode(4, 4);
+ assertTrue(bucket.empty());
+ assertCorrectSerialization(bucket, new IntegerBucketResultNode());
+ }
+
+ @Test
+ public void testRange() {
+ IntegerBucketResultNode bucket = new IntegerBucketResultNode(4, 10);
+ assertThat(bucket.getFrom(), is(4l));
+ assertThat(bucket.getTo(), is(10l));
+ assertFalse(bucket.empty());
+ assertTrue(dumpNode(bucket).contains("from: 4"));
+ assertTrue(dumpNode(bucket).contains("to: 10"));
+ assertCorrectSerialization(bucket, new IntegerBucketResultNode());
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerResultNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerResultNodeTestCase.java
new file mode 100644
index 00000000000..07c88464958
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerResultNodeTestCase.java
@@ -0,0 +1,118 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.BufferSerializer;
+import com.yahoo.vespa.objects.ObjectDumper;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author lulf
+ * @since 5.1
+ */
+public class IntegerResultNodeTestCase extends ResultNodeTest {
+
+ List<NumericResultNode> getResultNodes(long startvalue) {
+ return Arrays.asList(new Int8ResultNode((byte)startvalue),
+ new Int16ResultNode((short)startvalue),
+ new Int32ResultNode((int)startvalue),
+ new IntegerResultNode(startvalue));
+ }
+
+ @Test
+ public void testClassId() {
+ assertThat(new Int8ResultNode().getClassId(), is(Int8ResultNode.classId));
+ assertThat(new Int16ResultNode().getClassId(), is(Int16ResultNode.classId));
+ assertThat(new Int32ResultNode().getClassId(), is(Int32ResultNode.classId));
+ assertThat(new IntegerResultNode().getClassId(), is(IntegerResultNode.classId));
+
+ }
+
+ @Test
+ public void testTypeConversion() {
+ for (NumericResultNode node : getResultNodes(3)) {
+ assertThat(node.getInteger(), is(3l));
+ assertEquals(node.getFloat(), 3.0, 0.01);
+ assertThat(node.getRaw(), is(new byte[]{0, 0, 0, 0, 0, 0, 0, (byte) 3}));
+ assertThat(node.getString(), is("3"));
+ assertThat(node.getNumber().toString(), is("3"));
+ }
+ }
+
+ @Test
+ public void testMath() {
+ for (NumericResultNode node : getResultNodes(5)) {
+ assertThat(node.getInteger(), is(5l));
+ node.negate();
+ assertThat(node.getInteger(), is(-5l));
+ node.multiply(new Int32ResultNode(3));
+ assertThat(node.getInteger(), is(-15l));
+ node.add(new Int32ResultNode(1));
+ assertThat(node.getInteger(), is(-14l));
+ node.divide(new Int32ResultNode(2));
+ assertThat(node.getInteger(), is(-7l));
+ node.modulo(new Int32ResultNode(3));
+ assertThat(node.getInteger(), is(-1l));
+ node.min(new Int32ResultNode(2));
+ assertThat(node.getInteger(), is(-1l));
+ node.min(new Int32ResultNode(-2));
+ assertThat(node.getInteger(), is(-2l));
+ node.max(new Int32ResultNode(-4));
+ assertThat(node.getInteger(), is(-2l));
+ node.max(new Int32ResultNode(4));
+ assertThat(node.getInteger(), is(4l));
+ assertThat(node.onCmp(new Int32ResultNode(3)), is(1));
+ assertThat(node.onCmp(new Int32ResultNode(4)), is(0));
+ assertThat(node.onCmp(new Int32ResultNode(5)), is(-1));
+ node.set(new Int32ResultNode(8));
+ assertThat(node.getInteger(), is(8l));
+ assertThat(node.hashCode(), is((int)(8 + node.getClassId())));
+ assertTrue(dumpNode(node).contains("value: 8"));
+ }
+ }
+
+ @Test
+ public void testInt8() {
+ Int8ResultNode node = new Int8ResultNode();
+ node.setValue((byte) 5);
+ assertThat(node.getInteger(), is(5l));
+ }
+
+ @Test
+ public void testInt16() {
+ Int16ResultNode node = new Int16ResultNode();
+ node.setValue((short)5);
+ assertThat(node.getInteger(), is(5l));
+ }
+
+ @Test
+ public void testInt32() {
+ Int32ResultNode node = new Int32ResultNode();
+ node.setValue(5);
+ assertThat(node.getInteger(), is(5l));
+ }
+
+ @Test
+ public void testLong() {
+ IntegerResultNode node = new IntegerResultNode();
+ node.setValue(5);
+ assertThat(node.getInteger(), is(5l));
+ }
+
+ @Test
+ public void testSerialization() throws IllegalAccessException, InstantiationException {
+ for (NumericResultNode node : getResultNodes(8)) {
+ assertThat(node.getInteger(), is(8l));
+ NumericResultNode out = node.getClass().newInstance();
+ assertCorrectSerialization(node, out);
+ assertThat(out.getInteger(), is(node.getInteger()));
+ }
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/NullResultNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/NullResultNodeTestCase.java
new file mode 100644
index 00000000000..9eb4ee4fea7
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/NullResultNodeTestCase.java
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.ObjectDumper;
+import org.junit.Test;
+
+import java.util.regex.Pattern;
+
+import static org.hamcrest.core.Is.is;
+import static org.hamcrest.core.IsNot.not;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author lulf
+ * @since 5.1
+ */
+public class NullResultNodeTestCase {
+ @Test
+ public void testNullResultNode() {
+ NullResultNode nullRes = new NullResultNode();
+ assertThat(nullRes.onGetClassId(), is(NullResultNode.classId));
+ assertThat(nullRes.getInteger(), is(0l));
+ assertThat(nullRes.getString(), is(""));
+ assertThat(nullRes.getRaw(), is(new byte[0]));
+ assertEquals(nullRes.getFloat(), 0.0, 0.01);
+ assertThat(nullRes.onCmp(new NullResultNode()), is(0));
+ assertThat(nullRes.onCmp(new IntegerResultNode(0)), is(not(0)));
+ ObjectDumper dumper = new ObjectDumper();
+ nullRes.visitMembers(dumper);
+ assertTrue(dumper.toString().contains("result: <NULL>"));
+ nullRes.set(new IntegerResultNode(3));
+ assertThat(nullRes.onCmp(new IntegerResultNode(3)), is(not(0)));
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/ObjectVisitorTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/ObjectVisitorTestCase.java
new file mode 100755
index 00000000000..2924ee945e5
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/ObjectVisitorTestCase.java
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.ObjectDumper;
+import com.yahoo.searchlib.expression.FixedWidthBucketFunctionNode;
+import com.yahoo.searchlib.expression.IntegerResultNode;
+import com.yahoo.searchlib.expression.AttributeNode;
+import junit.framework.TestCase;
+
+import java.util.Arrays;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class ObjectVisitorTestCase extends TestCase {
+
+ public void testObjectDumper() {
+ assertDump("test: <NULL>\n", null);
+ assertDump("test: 1\n", 1);
+ assertDump("test: 'foo'\n", "foo");
+ assertDump("test: List {\n" +
+ " [0]: 'foo'\n" +
+ " [1]: 69\n" +
+ " [2]: <NULL>\n" +
+ "}\n",
+ Arrays.asList("foo", 69, null));
+ assertDump("test: String[] {\n" +
+ " [0]: 'foo'\n" +
+ " [1]: 'bar'\n" +
+ " [2]: 'baz'\n" +
+ "}\n",
+ new String[] { "foo", "bar", "baz" });
+ assertDump("test: IntegerResultNode {\n" +
+ " classId: 16491\n" +
+ " value: 5\n" +
+ "}\n",
+ new IntegerResultNode(5));
+ assertDump("test: FixedWidthBucketFunctionNode {\n" +
+ " classId: 16461\n" +
+ " result: <NULL>\n" +
+ " args: List {\n" +
+ " [0]: AttributeNode {\n" +
+ " classId: 16439\n" +
+ " result: <NULL>\n" +
+ " attribute: 'foo'\n" +
+ " }\n" +
+ " }\n" +
+ " width: IntegerResultNode {\n" +
+ " classId: 16491\n" +
+ " value: 5\n" +
+ " }\n" +
+ "}\n",
+ new FixedWidthBucketFunctionNode(new IntegerResultNode(5), new AttributeNode("foo")));
+ }
+
+ private void assertDump(String expected, Object obj) {
+ ObjectDumper dump = new ObjectDumper();
+ dump.visit("test", obj);
+ assertEquals(expected, dump.toString());
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionTestCase.java
new file mode 100644
index 00000000000..d2db697c743
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionTestCase.java
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertSame;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class RangeBucketPreDefFunctionTestCase {
+
+ @Test
+ public void requireThatAccessorsWork() {
+ ResultNodeVector bucketList = new IntegerResultNodeVector();
+ ExpressionNode arg = new AttributeNode("foo");
+ RangeBucketPreDefFunctionNode node = new RangeBucketPreDefFunctionNode(bucketList, arg);
+ assertSame(bucketList, node.getBucketList());
+ assertSame(arg, node.getArg());
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/RawBucketResultNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/RawBucketResultNodeTestCase.java
new file mode 100644
index 00000000000..83a36445294
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/RawBucketResultNodeTestCase.java
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import org.junit.Test;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author lulf
+ * @since 5.1
+ */
+public class RawBucketResultNodeTestCase extends ResultNodeTest {
+ @Test
+ public void testEmpty() {
+ RawBucketResultNode bucket = new RawBucketResultNode(new RawResultNode(new byte[]{6, 9}), new RawResultNode(new byte[]{6, 9}));
+ assertTrue(bucket.empty());
+ assertCorrectSerialization(bucket, new RawBucketResultNode());
+ }
+
+ @Test
+ public void testRange() {
+ RawBucketResultNode bucket = new RawBucketResultNode(new RawResultNode(new byte[]{6, 9}), new RawResultNode(new byte[]{9, 6}));
+ assertFalse(bucket.empty());
+ assertThat(bucket.getFrom(), is(new byte[]{6, 9}));
+ assertThat(bucket.getTo(), is(new byte[]{9, 6}));
+ assertCorrectSerialization(bucket, new RawBucketResultNode());
+ assertTrue(dumpNode(bucket).contains("value: RawData(data = [6, 9])"));
+ assertTrue(dumpNode(bucket).contains("value: RawData(data = [9, 6])"));
+ }
+
+ private RawBucketResultNode createNode(int from, int to) {
+ return new RawBucketResultNode(new RawResultNode(new byte[]{(byte)from}),
+ new RawResultNode(new byte[]{(byte)to}));
+ }
+
+ @Test
+ public void testCmp() {
+ assertOrder(createNode(6, 9), createNode(7, 9), createNode(8, 9));
+ assertOrder(createNode(6, 7), createNode(6, 8), createNode(6, 9));
+ assertOrder(createNode(6, 3), createNode(7, 2), createNode(8, 1));
+ assertTrue(createNode(6, 8).onCmp(new NullResultNode()) != 0);
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeTest.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeTest.java
new file mode 100644
index 00000000000..17744db7edb
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeTest.java
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.BufferSerializer;
+import com.yahoo.vespa.objects.ObjectDumper;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author lulf
+ * @since 5.1
+ */
+public class ResultNodeTest {
+ public String dumpNode(ResultNode node) {
+ ObjectDumper dump = new ObjectDumper();
+ node.visitMembers(dump);
+ return dump.toString();
+ }
+
+ public void assertCorrectSerialization(ResultNode from, ResultNode to) {
+ BufferSerializer buffer = new BufferSerializer();
+ from.serialize(buffer);
+ buffer.flip();
+ to.deserialize(buffer);
+ assertThat(from.onCmp(to), is(0));
+ }
+
+ public void assertOrder(ResultNode a, ResultNode b, ResultNode c) {
+ assertTrue(a.onCmp(a) == 0);
+ assertTrue(a.onCmp(b) < 0);
+ assertTrue(a.onCmp(c) < 0);
+
+ assertTrue(b.onCmp(a) > 0);
+ assertTrue(b.onCmp(b) == 0);
+ assertTrue(b.onCmp(c) < 0);
+
+ assertTrue(c.onCmp(a) > 0);
+ assertTrue(c.onCmp(b) > 0);
+ assertTrue(c.onCmp(c) == 0);
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeVectorTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeVectorTestCase.java
new file mode 100644
index 00000000000..ba306099a80
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeVectorTestCase.java
@@ -0,0 +1,167 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import com.yahoo.vespa.objects.BufferSerializer;
+import org.junit.Test;
+
+import java.util.List;
+
+import static org.hamcrest.core.Is.is;
+import static org.hamcrest.core.IsNot.not;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author lulf
+ * @since 5.1
+ */
+public class ResultNodeVectorTestCase extends ResultNodeTest {
+ @Test
+ public void testClassId() {
+ assertThat(new IntegerResultNodeVector().getClassId(), is(IntegerResultNodeVector.classId));
+ assertThat(new Int32ResultNodeVector().getClassId(), is(Int32ResultNodeVector.classId));
+ assertThat(new Int16ResultNodeVector().getClassId(), is(Int16ResultNodeVector.classId));
+ assertThat(new Int8ResultNodeVector().getClassId(), is(Int8ResultNodeVector.classId));
+ assertThat(new FloatResultNodeVector().getClassId(), is(FloatResultNodeVector.classId));
+ }
+
+ @Test
+ public void testVectorAdd() {
+ Int8ResultNodeVector i8 = new Int8ResultNodeVector();
+ i8.add(new Int8ResultNode((byte)9));
+ i8.add(new Int8ResultNode((byte)2));
+ i8.add((ResultNode)new Int8ResultNode((byte)5));
+ assertThat(i8.getVector().size(), is(3));
+
+ Int16ResultNodeVector i16 = new Int16ResultNodeVector();
+ i16.add(new Int16ResultNode((short)9));
+ i16.add(new Int16ResultNode((short)2));
+ i16.add((ResultNode)new Int16ResultNode((short)5));
+ assertThat(i16.getVector().size(), is(3));
+
+ Int32ResultNodeVector i32 = new Int32ResultNodeVector();
+ i32.add(new Int32ResultNode(9));
+ i32.add(new Int32ResultNode(2));
+ i32.add((ResultNode)new Int32ResultNode(5));
+ assertThat(i32.getVector().size(), is(3));
+
+ IntegerResultNodeVector ieger = new IntegerResultNodeVector();
+ ieger.add(new IntegerResultNode(9));
+ ieger.add(new IntegerResultNode(2));
+ ieger.add((ResultNode)new IntegerResultNode(5));
+ assertThat(ieger.getVector().size(), is(3));
+
+ FloatResultNodeVector floatvec = new FloatResultNodeVector();
+ floatvec.add(new FloatResultNode(3.3));
+ floatvec.add(new FloatResultNode(3.4));
+ floatvec.add((ResultNode)new FloatResultNode(3.5));
+ assertThat(floatvec.getVector().size(), is(3));
+ }
+
+ @Test
+ public void testCmp() {
+ ResultNodeVector int8vec = new Int8ResultNodeVector().add(new Int8ResultNode((byte) 2));
+ ResultNodeVector int8veclarge = new Int8ResultNodeVector().add(new Int8ResultNode((byte) 2)).add(new Int8ResultNode((byte) 5));
+ ResultNodeVector int8vecsmall = new Int8ResultNodeVector().add(new Int8ResultNode((byte) 1));
+
+ ResultNodeVector int16vec = new Int16ResultNodeVector().add(new Int16ResultNode((short) 2));
+ ResultNodeVector int16veclarge = new Int16ResultNodeVector().add(new Int16ResultNode((short) 2)).add(new Int16ResultNode((short) 5));
+ ResultNodeVector int16vecsmall = new Int16ResultNodeVector().add(new Int16ResultNode((short) 1));
+
+ ResultNodeVector int32vec = new Int32ResultNodeVector().add(new Int32ResultNode(2));
+ ResultNodeVector int32veclarge = new Int32ResultNodeVector().add(new Int32ResultNode(2)).add(new Int32ResultNode(5));
+ ResultNodeVector int32vecsmall = new Int32ResultNodeVector().add(new Int32ResultNode(1));
+
+ ResultNodeVector intvec = new IntegerResultNodeVector().add(new IntegerResultNode(2));
+ ResultNodeVector intveclarge = new IntegerResultNodeVector().add(new IntegerResultNode(2)).add(new IntegerResultNode(5));
+ ResultNodeVector intvecsmall = new IntegerResultNodeVector().add(new IntegerResultNode(1));
+
+ FloatResultNodeVector floatvec = new FloatResultNodeVector().add(new FloatResultNode(2.2));
+ FloatResultNodeVector floatveclarge = new FloatResultNodeVector().add(new FloatResultNode(2.2)).add(new FloatResultNode(5.5));
+ FloatResultNodeVector floatvecsmall = new FloatResultNodeVector().add(new FloatResultNode(1.2));
+
+ StringResultNodeVector strvec = new StringResultNodeVector().add(new StringResultNode("foo"));
+ StringResultNodeVector strveclarge = new StringResultNodeVector().add(new StringResultNode("foolio"));
+ StringResultNodeVector strvecsmall = new StringResultNodeVector().add(new StringResultNode("bario"));
+
+ RawResultNodeVector rawvec = new RawResultNodeVector().add(new RawResultNode(new byte[]{6, 9}));
+ RawResultNodeVector rawveclarge = new RawResultNodeVector().add(new RawResultNode(new byte[]{9, 6}));
+ RawResultNodeVector rawvecsmall = new RawResultNodeVector().add(new RawResultNode(new byte[]{6, 6}));
+
+ assertClassCmp(int8vec);
+ assertClassCmp(int16vec);
+ assertClassCmp(int32vec);
+ assertClassCmp(intvec);
+ assertClassCmp(floatvec);
+ assertClassCmp(strvec);
+ assertClassCmp(rawvec);
+
+ assertVecEqual(int8vec, int8vec);
+ assertVecLt(int8vec, int8veclarge);
+ assertVecGt(int8veclarge, int8vec);
+ assertVecGt(int8vec, int8vecsmall);
+ assertVecLt(int8vecsmall, int8vec);
+
+ assertVecEqual(int16vec, int16vec);
+ assertVecLt(int16vec, int16veclarge);
+ assertVecGt(int16veclarge, int16vec);
+ assertVecGt(int16vec, int16vecsmall);
+ assertVecLt(int16vecsmall, int16vec);
+
+ assertVecEqual(int32vec, int32vec);
+ assertVecLt(int32vec, int32veclarge);
+ assertVecGt(int32veclarge, int32vec);
+ assertVecGt(int32vec, int32vecsmall);
+ assertVecLt(int32vecsmall, int32vec);
+
+ assertVecEqual(intvec, intvec);
+ assertVecLt(intvec, intveclarge);
+ assertVecGt(intveclarge, intvec);
+ assertVecGt(intvec, intvecsmall);
+ assertVecLt(intvecsmall, intvec);
+
+ assertVecEqual(floatvec, floatvec);
+ assertVecLt(floatvec, floatveclarge);
+ assertVecGt(floatveclarge, floatvec);
+ assertVecGt(floatvec, floatvecsmall);
+ assertVecLt(floatvecsmall, floatvec);
+
+ assertVecEqual(strvec, strvec);
+ assertVecLt(strvec, strveclarge);
+ assertVecGt(strveclarge, strvec);
+ assertVecGt(strvec, strvecsmall);
+ assertVecLt(strvecsmall, strvec);
+
+ assertVecEqual(rawvec, rawvec);
+ assertVecLt(rawvec, rawveclarge);
+ assertVecGt(rawveclarge, rawvec);
+ assertVecGt(rawvec, rawvecsmall);
+ assertVecLt(rawvecsmall, rawvec);
+ }
+
+ private void assertVecLt(ResultNodeVector vec1, ResultNodeVector vec2) {
+ assertTrue(vec1.onCmp(vec2) < 0);
+ }
+
+ private void assertVecGt(ResultNodeVector vec1, ResultNodeVector vec2) {
+ assertTrue(vec1.onCmp(vec2) > 0);
+ }
+
+ private void assertVecEqual(ResultNodeVector vec1, ResultNodeVector vec2) {
+ assertThat(vec1.onCmp(vec2), is(0));
+ }
+
+ private void assertClassCmp(ResultNodeVector add) {
+ assertThat(add.onCmp(new NullResultNode()), is(not(0)));
+ }
+
+ @Test
+ public void testSerialize() throws InstantiationException, IllegalAccessException {
+ assertCorrectSerialization(new FloatResultNodeVector().add(new FloatResultNode(1.1)).add(new FloatResultNode(3.3)), new FloatResultNodeVector());
+ assertCorrectSerialization(new IntegerResultNodeVector().add(new IntegerResultNode(1)).add(new IntegerResultNode(3)), new IntegerResultNodeVector());
+ assertCorrectSerialization(new Int16ResultNodeVector().add(new Int16ResultNode((short) 1)).add(new Int16ResultNode((short) 3)), new Int16ResultNodeVector());
+ assertCorrectSerialization(new Int8ResultNodeVector().add(new Int8ResultNode((byte) 1)).add(new Int8ResultNode((byte) 3)), new Int8ResultNodeVector());
+ assertCorrectSerialization(new StringResultNodeVector().add(new StringResultNode("foo")).add(new StringResultNode("bar")), new StringResultNodeVector());
+ assertCorrectSerialization(new RawResultNodeVector().add(new RawResultNode(new byte[]{6, 9})).add(new RawResultNode(new byte[]{9, 6})), new RawResultNodeVector());
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/StringBucketResultNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/StringBucketResultNodeTestCase.java
new file mode 100644
index 00000000000..b82c7a34048
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/StringBucketResultNodeTestCase.java
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import org.junit.Test;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author lulf
+ * @since 5.1
+ */
+public class StringBucketResultNodeTestCase extends ResultNodeTest {
+ @Test
+ public void testEmpty() {
+ StringBucketResultNode bucket = new StringBucketResultNode("aaa", "aaa");
+ assertTrue(bucket.empty());
+ assertCorrectSerialization(bucket, new StringBucketResultNode());
+ }
+
+ @Test
+ public void testRange() {
+ StringBucketResultNode bucket = new StringBucketResultNode("a", "d");
+ assertThat(bucket.getFrom(), is("a"));
+ assertThat(bucket.getTo(), is("d"));
+ assertTrue(dumpNode(bucket).contains("value: 'a'"));
+ assertTrue(dumpNode(bucket).contains("value: 'd'"));
+ assertCorrectSerialization(bucket, new StringBucketResultNode());
+ }
+
+ @Test
+ public void testCmp() {
+ StringBucketResultNode b1 = new StringBucketResultNode("a", "d");
+ StringBucketResultNode b2 = new StringBucketResultNode("d", "h");
+ StringBucketResultNode b3 = new StringBucketResultNode("h", "u");
+ assertTrue(b1.onCmp(b1) == 0);
+ assertTrue(b1.onCmp(b2) < 0);
+ assertTrue(b1.onCmp(b3) < 0);
+
+ assertTrue(b2.onCmp(b1) > 0);
+ assertTrue(b2.onCmp(b2) == 0);
+ assertTrue(b2.onCmp(b3) < 0);
+
+ assertTrue(b3.onCmp(b1) > 0);
+ assertTrue(b3.onCmp(b2) > 0);
+ assertTrue(b3.onCmp(b3) == 0);
+
+ b2 = new StringBucketResultNode("a", "b");
+ assertTrue(b1.onCmp(b2) > 0);
+ b2 = new StringBucketResultNode("a", "f");
+ assertTrue(b1.onCmp(b2) < 0);
+ b2 = new StringBucketResultNode("k", "a");
+ assertTrue(b1.onCmp(b2) < 0);
+ assertTrue(b1.onCmp(new NullResultNode()) != 0);
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/TimeStampFunctionTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/TimeStampFunctionTestCase.java
new file mode 100644
index 00000000000..4d591843321
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/TimeStampFunctionTestCase.java
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import org.junit.Test;
+
+import java.util.Arrays;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class TimeStampFunctionTestCase {
+
+ @Test
+ public void requireThatAccessorsWork() {
+ ExpressionNode arg = new AttributeNode("foo");
+ for (TimeStampFunctionNode.TimePart part : TimeStampFunctionNode.TimePart.values()) {
+ for (Boolean gmt : Arrays.asList(true, false)) {
+ TimeStampFunctionNode node = new TimeStampFunctionNode(arg, part, gmt);
+ assertSame(arg, node.getArg());
+ assertEquals(part, node.getTimePart());
+ assertEquals(gmt, node.isGmt());
+ assertEquals(!gmt, node.isLocal());
+ }
+ }
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/ZCurveFunctionTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/ZCurveFunctionTestCase.java
new file mode 100644
index 00000000000..899e4e28a20
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/ZCurveFunctionTestCase.java
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.expression;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class ZCurveFunctionTestCase {
+
+ @Test
+ public void requireThatAccessorsWork() {
+ ExpressionNode arg = new AttributeNode("foo");
+ ZCurveFunctionNode node = new ZCurveFunctionNode(arg, ZCurveFunctionNode.Dimension.X);
+ assertSame(arg, node.getArg());
+ assertEquals(ZCurveFunctionNode.Dimension.X, node.getDimension());
+
+ node = new ZCurveFunctionNode(arg, ZCurveFunctionNode.Dimension.Y);
+ assertSame(arg, node.getArg());
+ assertEquals(ZCurveFunctionNode.Dimension.Y, node.getDimension());
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtConverterTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtConverterTestCase.java
new file mode 100644
index 00000000000..fc21b3496f9
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtConverterTestCase.java
@@ -0,0 +1,169 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.gbdt;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
+import java.security.Permission;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen Hult</a>
+ */
+public class GbdtConverterTestCase {
+
+ @Before
+ public void enableSecurityManager() {
+ System.setSecurityManager(new NoExitSecurityManager());
+ }
+
+ @After
+ public void disableSecurityManager() {
+ System.setSecurityManager(null);
+ }
+
+ @Test
+ public void testOnlyOneArgumentIsAccepted() throws UnsupportedEncodingException {
+ assertError("Usage: GbdtConverter <filename>\n", new String[0]);
+ assertError("Usage: GbdtConverter <filename>\n", new String[] { "foo", "bar" });
+ }
+
+ @Test
+ public void testFileIsFound() throws UnsupportedEncodingException {
+ assertError("Could not find file 'not.found'.\n", new String[] { "not.found" });
+ }
+
+ @Test
+ public void testFileParsingExceptionIsCaught() throws UnsupportedEncodingException {
+ assertError("An error occurred while parsing the content of file 'src/test/files/gbdt_err.xml': " +
+ "Node 'Unknown' has no 'DecisionTree' children.\n",
+ new String[] { "src/test/files/gbdt_err.xml" });
+ }
+
+ @Test
+ public void testEmptyTreesAreIgnored() throws Exception {
+ assertConvert("src/test/files/gbdt_empty_tree.xml",
+ "if (INFD_SCORE < 3.2105989, if (GMP_SCORE < 0.013873, if (INFD_SCORE < 1.8138845, 0.0018257, if (GMP_SCORE < 0.006184, 0.0034753, 0.0062119)), if (INFD_SCORE < 1.5684295, if (GMP_SCORE < 0.0217475, 0.0043064, 0.0082065), 0.0110743)), if (GMP_SCORE < 0.010012, if (INFD_SCORE < 5.5982456, if (GMP_SCORE < 0.0052305, 0.0060169, 0.0094888), 0.0119292), 0.017415))\n" +
+ "\n");
+ }
+
+ @Test
+ public void testTreesMayContainAResponse() throws Exception {
+ assertConvert("src/test/files/gbdt_tree_response.xml",
+ "if (INFD_SCORE < 2.128036, -1.12E-5, 8.71E-5) +\n" +
+ "if (value(0) < 1.0, 2.8E-6, 0.0) +\n" +
+ "if (GMP_SCORE < 0.016798, if (INFD_SCORE < 3.9760852, if (INFD_SCORE < 0.1266405, -5.98E-5, 2.25E-5), -1.383E-4), 1.529E-4)\n" +
+ "\n");
+ }
+
+ @Test
+ public void testConvertedModelIsPrintedToSystemOut() throws Exception {
+ assertConvert("src/test/files/gbdt.xml",
+ "if (F55 < 2.0932798, if (F42 < 1.7252731, if (F33 < 0.5, if (F38 < 1.5367546, 1.7333333, 1.3255814), if (F37 < 0.675922, 1.9014085, 1.0)), if (F109 < 0.5, if (F116 < 5.25, if (F111 < 0.0521445, 1.0, 1.9090909), if (F38 < 4.0740733, 0.8, if (F38 < 6.6152048, 1.7142857, 0.625))), 1.5945946)), if (F109 < 0.5, if (F113 < 0.7835808, if (F110 < 491.0, if (F56 < 2.5423126, if (F108 < 243.5, 1.375, 0.78), 0.5), 2.0), if (F103 < 0.9918365, 1.6, 0.3333333)), if (F59 < 0.9207, if (F30 < 0.86, 1.5890411, 0.625), if (F100 < 5.9548216, 1.0, 0.0)))) +\n" +
+ "if (F55 < 59.5480576, if (F42 < 1.8308522, if (F100 < 5.9549484, if (F107 < 0.5, -0.3406279, if (F56 < 1.7057916, if (F36 < 3.778285, if (F103 < 0.5600199, 0.047108, if (F36 < 1.2203553, if (F102 < 1.5, 0.0460316, -0.473794), -0.9825869)), -0.8848045), if (F47 < 15.5, 0.348047, -1.0890411))), 1.75), if (F113 < 0.8389627, if (F110 < 7.5, -0.5778378, if (F111 < 0.8596972, if (F114 < 831.5, if (F113 < 0.3807178, 0.0497646, if (F110 < 63.0, 0.6549377, 0.2486999)), if (F39 < 8.9685574, 0.3222195, -0.1690968)), 1.0381818)), if (F58 < 0.889763, -0.0702703, -1.6))), if (F102 < 3.5, -0.3059684, -1.5890411)) +\n" +
+ "if (F55 < 119.6311035, if (F55 < 90.895813, if (F39 < 12.162282, if (F35 < 1.1213787, if (F55 < 34.9389648, if (F45 < 3.5, if (F51 < 0.0502058, if (F103 < 0.8550526, if (F55 < 4.96804, 0.048519, 0.6596588), if (F38 < 1.3808891, -0.7416763, 0.0176633)), 0.4502234), -0.6811898), 0.5572351), if (F100 < 3.3971992, if (F39 < 7.0869236, if (F43 < 5.5100875, if (F46 < 4.5, -0.1702421, -0.9797453), -1.5426025), 0.0774408), if (F52 < 22.3562355, if (F35 < 4.4263992, 0.4011598, -0.3898472), -1.75))), if (F39 < 14.5762558, if (F109 < 0.5, 1.6616928, 0.4001626), if (F100 < 3.0519419, 0.616491, -0.1808479))), -1.2135522), 0.5535716) +\n" +
+ "if (F43 < 9.272151, if (F36 < 9.0613861, if (F115 < 36.5, if (F34 < 1.4407213, if (F41 < 10.4713802, if (F34 < 1.2610778, if (F105 < 8.2159586, if (F46 < 88.5, 0.0075843, -0.6358738), if (F105 < 9.5308332, 1.4464284, -0.0895592)), 0.3532708), -1.8289603), if (F45 < 24.5, if (F111 < 0.9095335, if (F113 < 0.0529755, -0.6272416, if (F50 < 34.2163391, if (F113 < 0.0813664, 0.3683843, if (F34 < 1.6283135, -0.6334628, -0.1610307)), 1.5559684)), -1.7492068), 1.5060212)), if (F49 < 23.5787125, if (F100 < 6.5115452, if (F37 < 0.8601408, if (F57 < 6.5, 0.0547747, 1.193346), 0.6402962), 1.7395205), 2.5559684)), -3.1016318), 1.8657542) +\n" +
+ "if (F55 < 764.9404297, if (F34 < 23.2379246, if (F36 < 9.2296076, if (F114 < 116.0, if (F108 < 13.5, if (F108 < 12.5, -0.2736142, -1.7384173), if (F110 < 10.5, 0.0794336, -0.2171646)), if (F114 < 129.0, if (F109 < 0.5, 1.4407836, -0.1458547), if (F111 < 0.9703438, if (F47 < 18.5, if (F32 < 3.5, 0.0708936, if (F118 < 0.6794872, if (F119 < 3.8533711, if (F34 < 0.1213822, -2.0046196, -8.566E-4), -0.9490828), 0.0790339)), if (F113 < 0.3637481, 0.1161088, -0.9997786)), 1.3003114))), if (F111 < 0.2438112, -2.0582902, 0.6918949)), if (F115 < 95.0, -2.8602383, -0.0063699)), if (F101 < 0.9411763, -2.0253283, -0.6417007)) +\n" +
+ "if (F114 < 516.0, if (F49 < 8.9197922, if (F48 < 3.5, if (F36 < 1.3889931, if (F43 < 0.9699799, if (F34 < 9.6113167, if (F106 < 8.5, if (F108 < 153.5, if (F110 < 130.5, 0.180242, 2.545163), if (F108 < 161.5, -2.2253985, if (F55 < 31.4965668, -0.0122572, 0.7364454))), -0.2596613), 0.7247348), if (F111 < 0.2817393, -0.6409092, 0.2100071)), if (F116 < 18.75, 0.511352, -0.1093323)), 0.9379161), 0.3603908), if (F46 < 32.5, if (F46 < 5.5, if (F39 < 11.7440758, if (F115 < 774.0, -0.0433343, -1.7439904), -0.3662575), 0.5413771), if (F110 < 67.0, if (F46 < 34.5, -2.6581287, -0.9399502), 0.075664))) +\n" +
+ "if (F42 < 24.3080139, if (F118 < 0.8452381, if (F119 < 6.2847767, if (F100 < 3.2778931, if (F46 < 30.0, if (F43 < 1.2712233, if (F104 < 3.5, 0.1365837, 0.5592712), if (F39 < 0.6294491, -0.8729556, -0.0123421)), 3.7677864), if (F111 < 0.6580936, if (F103 < 0.9319581, -0.2822538, if (F107 < 1.5, -0.3983539, if (F104 < 5.5, 0.0792465, 0.7273864))), if (F104 < 3.5, -1.1550477, 0.0490706))), 1.4735778), if (F111 < 0.3724709, if (F51 < 16.0989189, if (F114 < 154.0, if (F108 < 57.5, -0.0675733, -0.3994327), -0.0250285), -1.4871782), if (F34 < 2.1943491, 0.0229469, if (F108 < 1527.0, 1.4706301, 0.0285333)))), 3.489949) +\n" +
+ "if (F34 < 30.3465347, if (F103 < 0.9996098, if (F38 < 0.558669, if (F105 < 3.6287756, if (F104 < 3.5, if (F31 < 0.86, 0.1121421, 1.8153648), -0.8281607), if (F55 < 37.6819153, 0.9656266, 0.1585065)), if (F113 < 0.840385, if (F38 < 9.6623116, if (F46 < 136.0, if (F53 < 0.5548913, if (F38 < 8.4469957, if (F34 < 3.1969421, if (F114 < 20.0, -0.2944335, 0.03499), if (F34 < 3.4671984, -1.3154796, -0.1742507)), 0.4071658), if (F105 < 2.315434, if (F110 < 59.5, -0.1713032, -1.420465), -0.1456236)), 0.5520287), if (F108 < 12156.5, if (F111 < 0.3892631, -0.16285, -0.9015614), -2.6391831)), 0.2011691)), -3.073049), -3.2461861) +\n" +
+ "if (F55 < 28.4668102, if (F34 < 0.4929269, if (F30 < 0.86, if (F37 < 0.8360082, -0.0815482, -0.7898247), -0.5144471), if (F108 < 20498.0, if (F44 < 1.1856511, if (F56 < 1.0706565, if (F39 < 8.377079, if (F59 < 0.5604, 0.0429508, if (F34 < 0.7287493, -1.0264078, 0.6052195)), -0.4814408), if (F119 < 3.7530813, if (F115 < 8.5, 0.4916013, 0.0457533), if (F114 < 1093.5, 1.1673864, 0.3411176))), -0.6176305), if (F100 < 3.151973, 2.6908011, 0.3835885))), if (F116 < 62.0, if (F114 < 562.0, -0.415543, if (F103 < 0.9826763, -0.1169933, if (F104 < 0.5, -0.0665763, 1.0238317))), if (F100 < 5.8046961, -3.2954836, 0.2781039))) +\n" +
+ "if (F34 < 26.9548168, if (F35 < 18.4714928, if (F115 < 698.0, if (F116 < 41.5, if (F38 < 1.1138718, if (F46 < 9.0, if (F31 < 0.86, 0.1059075, -0.2995292), if (F46 < 25.5, if (F46 < 13.0, 0.6297316, 1.8451736), 0.2079161)), if (F38 < 19.3839836, if (F49 < 29.9797497, if (F46 < 235.5, if (F38 < 1.2626771, -0.5165347, if (F35 < 10.3027954, if (F50 < 0.2823648, -0.0424489, if (F113 < 0.0776736, 0.7495954, -0.2948665)), 0.3229146)), -1.0711968), 0.3153474), if (F116 < 5.2182379, 2.8017734, 0.3444192))), if (F113 < 0.5691726, 1.7530511, 0.3534861)), -2.4915219), if (F103 < 0.9680555, -2.1724317, 0.2143739)), 3.1712332)\n" +
+ "\n");
+ }
+
+ @Test
+ public void testSetTestsWork() throws Exception {
+ assertConvert("src/test/files/gbdt_set_inclusion_test.xml",
+ "if (AGE_GROUP$ in [2.0], if (EDUCATION_LEVEL$ in [0.0], -0.25, 0.125), if (AGE_GROUP$ in [1.0], 0.125, 0.25)) +\n" +
+ "if (AGE_GROUP$ in [2.0], if (EDUCATION_LEVEL$ in [0.0], -0.2189117, -0.0), if (EDUCATION_LEVEL$ in [0.0], 0.1094559, 0.2343953)) +\n" +
+ "if (AGE_GROUP$ in [2.0], -0.0962185, if (EDUCATION_LEVEL$ in [0.0], if (AGE_GROUP$ in [1.0], 0.0, 0.2055456), 0.205553)) +\n" +
+ "if (EDUCATION_LEVEL$ in [0.0], 0.0905977, 0.1812016) +\n" +
+ "if (EDUCATION_LEVEL$ in [0.0, 1.0], if (AGE_GROUP$ in [2.0], if (EDUCATION_LEVEL$ in [0.0], -0.191772, -0.0), if (AGE_GROUP$ in [1.0], if (EDUCATION_LEVEL$ in [0.0], 0.0, 0.1608304), 0.1708644)), 0.1923393) +\n" +
+ "if (EDUCATION_LEVEL$ in [\"foo\", \"bar\"], if (AGE_GROUP$ in [2.0], if (EDUCATION_LEVEL$ in [\"baz\"], -0.1696624, -0.0), if (AGE_GROUP$ in [1.0], if (EDUCATION_LEVEL$ in [0.0], 0.0, 0.1438091), 0.1521967)), 0.2003772) +\n" +
+ "if (value(0) < 1.0, -0.0108278, 0.0) +\n" +
+ "if (EDUCATION_LEVEL$ in [0.0], -0.1500528, if (GENDER$ in [1.0], 0.0652894, 0.1543407)) +\n" +
+ "if (AGE_GROUP$ in [1.0], 0.0, 0.1569706) +\n" +
+ "if (AGE_GROUP$ in [1.0], 0.0, if (EDUCATION_LEVEL$ in [1.0], 0.0, 0.1405829))\n" +
+ "\n");
+ }
+
+ @Test
+ public void testExtModelCausesBranchProbabilitiesToBeUsed() throws Exception {
+ assertConvert("src/test/files/gbdt.ext.xml",
+ "if (F4 < 0.6972222, if (F1 < 0.7928572, if (F54 < 0.9166666, 0.1145211, if (F111 < 1105.0, 0.3115265, 1.6772487, 0.77256316), 0.89193755), 1.493617, 0.970347), if (F111 < 85.5, 1.1202186, 2.5111421, 0.33763838), 0.93598676) +\n" +
+ "if (F1 < 0.8875, if (F1 < 0.0634921, 0.4755052, if (F111 < 8765.0, -0.0572274, 0.542222, 0.983461), 0.04500549), if (F114 < 55.0, -0.2409815, if (F54 < 0.55, 0.2211539, 1.3125561, 0.29620853), 0.21268657), 0.9683477) +\n" +
+ "if (F4 < 0.6972222, if (F3 < 0.9285715, if (F8 < 0.0540936, -0.007629, 0.322873, 0.95869595), if (F1 < 0.8166667, 0.843579, 0.1053924, 0.57522124), 0.97148263), if (F4 < 0.7619048, -0.5500016, 0.0274784, 0.5784133), 0.93598676) +\n" +
+ "if (F74 < 0.875, if (F54 < 0.8452381, -0.0031926, if (F111 < 141.5, -0.1402742, if (F4 < 0.5871212, 1.2691849, 0.2681826, 0.35703), 0.47206005), 0.92346483), if (F111 < 1105.0, -0.0588169, -0.7294473, 0.7697161), 0.92512107) +\n" +
+ "if (F1 < 0.7619048, 0.0089472, if (F3 < 0.9285715, if (F114 < 36.5, -1.1389426, if (F97 < 0.0468557, if (F6 < 0.5357143, 0.5614127, -0.2162048, 0.32456142), -0.8289478, 0.742671), 0.21483377), 0.0168442, 0.3867458), 0.9402976) +\n" +
+ "if (F1 < 0.6583333, -0.0187975, if (F74 < 0.2104235, 0.1951745, if (F68 < 0.8158333, if (F68 < 0.7616667, -0.0701389, -1.908711, 0.8685714), if (F91 < 0.9516667, 0.2880719, 0.0202404, 0.08918849), 0.043402776), 0.12821622), 0.72688085) +\n" +
+ "if (F97 < 0.0104738, if (F4 < 0.6833333, -0.1119661, -0.7331711, 0.795539), if (F111 < 1.5, -0.0487729, if (F54 < 0.0294118, if (F6 < 0.225, 0.3140816, 0.0241852, 0.44444445), 0.0063921, 0.077068806), 0.20816082), 0.015885202) +\n" +
+ "if (F8 < 0.0488095, if (F97 < 0.0196587, -0.037317, if (F4 < 0.5527778, 0.0085123, if (F111 < 4064.5, if (F111 < 109.5, 0.2020749, -0.1841633, 0.5994437), 0.4359319, 0.8789731), 0.86483806), 0.24595065), -0.1090751, 0.94791543) +\n" +
+ "if (F111 < 7801.5, 0.005243, if (F4 < 0.5444444, -0.4434354, if (F4 < 0.725, if (F111 < 86382.5, if (F77 < 0.0250039, 0.9485625, 0.1099304, 0.2840909), -1.5740248, 0.9361702), -0.2924902, 0.48205128), 0.47580644), 0.97803235) +\n" +
+ "if (F4 < 0.9270834, if (F1 < 0.8166667, 0.0033574, if (F4 < 0.7071428, -0.2470163, 0.0482702, 0.5796915), 0.9535162), if (F54 < 0.5833334, 0.8142192, if (F1 < 0.95, 1.2211719, -0.0357525, 0.07643312), 0.20304568), 0.9883666) +\n" +
+ "if (F113 < 37.5050011, if (F111 < 252.5, -0.0110506, if (F4 < 0.69375, if (F5 < 0.9, 0.0488562, 0.3987899, 0.9362022), if (F74 < 0.75, -0.2113237, 0.3806402, 0.8606272), 0.8527072), 0.7694356), -0.5899943, 0.9981103) +\n" +
+ "if (F3 < 0.4365079, -0.0192181, if (F77 < 0.1715686, if (F111 < 1187.5, 0.016142, if (F112 < 467.5, if (F68 < 0.855, 0.9831077, 0.227789, 0.12048193), 0.0345274, 0.36617646), 0.89238805), 0.7605657, 0.9962163), 0.62542814) +\n" +
+ "if (F5 < 0.6125, if (F4 < 0.7928572, 0.0063205, 1.68561, 0.99925923), if (F113 < 1.6900001, if (F113 < 1.635, -0.0275853, 1.1438084, 0.99412453), if (F97 < 0.0363399, -0.0843354, -0.346791, 0.552356), 0.8166987), 0.876934) +\n" +
+ "if (F8 < 0.1396104, -0.001079, if (F54 < 0.55, if (F111 < 513.5, if (F77 < 0.0380987, -0.1117221, 0.9370234, 0.6551724), 1.654114, 0.7631579), if (F113 < 1.0700001, 0.1069487, -1.0835573, 0.8292683), 0.48101267), 0.9953348) +\n" +
+ "if (F6 < 0.7321429, 0.0033418, if (F111 < 74.5, if (F4 < 0.6708333, if (F1 < 0.5435606, 0.5229282, -0.451666, 0.11594203), 0.253665, 0.3270142), if (F113 < 2.47, -0.2267124, 0.2586769, 0.8803419), 0.4741573), 0.947443)\n" +
+ "\n");
+ }
+
+ private static void assertConvert(String gbdtModelFile, String expectedExpression)
+ throws ParseException, UnsupportedEncodingException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ System.setOut(new PrintStream(out));
+ GbdtConverter.main(new String[] { gbdtModelFile });
+ String actualExpression = out.toString("UTF-8");
+ assertEquals(expectedExpression, actualExpression);
+ assertNotNull(new RankingExpression(actualExpression));
+ }
+
+ private static void assertError(String expected, String[] args) throws UnsupportedEncodingException {
+ ByteArrayOutputStream err = new ByteArrayOutputStream();
+ System.setErr(new PrintStream(err));
+ try {
+ GbdtConverter.main(args);
+ fail();
+ } catch (ExitException e) {
+ assertEquals(1, e.status);
+ assertEquals(expected, err.toString("UTF-8"));
+ }
+ }
+
+ private static class NoExitSecurityManager extends SecurityManager {
+
+ @Override
+ public void checkPermission(Permission perm) {
+ // allow anything
+ }
+
+ @Override
+ public void checkPermission(Permission perm, Object context) {
+ // allow anything
+ }
+
+ @Override
+ public void checkExit(int status) {
+ throw new ExitException(status);
+ }
+ }
+
+ private static class ExitException extends SecurityException {
+
+ final int status;
+
+ ExitException(int status) {
+ this.status = status;
+ }
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtModelTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtModelTestCase.java
new file mode 100644
index 00000000000..0561fb8ac7f
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtModelTestCase.java
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.gbdt;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import org.junit.Test;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+
+import static org.junit.Assert.*;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class GbdtModelTestCase {
+
+ @Test
+ public void requireThatFactoryMethodWorks() throws Exception {
+ GbdtModel model = GbdtModel.fromXmlFile("src/test/files/gbdt.xml");
+ assertEquals(10, model.trees().size());
+ String exp = model.toRankingExpression();
+ assertEquals(readFile("src/test/files/gbdt.expression").trim(), exp.trim());
+ assertNotNull(new RankingExpression(exp));
+ }
+
+ @Test
+ public void requireThatIllegalXmlThrowsException() throws Exception {
+ assertIllegalXml("<Unknown />");
+ assertIllegalXml("<DecisionTree />");
+ assertIllegalXml("<DecisionTree>" +
+ " <Unknown />" +
+ "</DecisionTree>");
+ assertIllegalXml("<DecisionTree>" +
+ " <Forest />" +
+ "</DecisionTree>");
+ assertIllegalXml("<DecisionTree>" +
+ " <Forest>" +
+ " <Unknown />" +
+ " </Forest>" +
+ "</DecisionTree>");
+ }
+
+ private static void assertIllegalXml(String xml) throws Exception {
+ try {
+ GbdtModel.fromXml(xml);
+ fail();
+ } catch (IllegalArgumentException e) {
+
+ }
+ }
+
+ private static String readFile(String file) throws IOException {
+ StringBuilder ret = new StringBuilder();
+ BufferedReader in = new BufferedReader(new FileReader(file));
+ while (true) {
+ String str = in.readLine();
+ if (str == null) {
+ break;
+ }
+ ret.append(str).append("\n");
+ }
+ return ret.toString();
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/gbdt/ReferenceNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/ReferenceNodeTestCase.java
new file mode 100644
index 00000000000..6b4e075b769
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/ReferenceNodeTestCase.java
@@ -0,0 +1,101 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.gbdt;
+
+import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue;
+import org.junit.Test;
+
+import java.util.Optional;
+
+import static org.junit.Assert.*;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen Hult</a>
+ */
+public class ReferenceNodeTestCase {
+
+ @Test
+ public void requireThatAccessorsWork() {
+ TreeNode lhs = new ResponseNode(6.0, null);
+ TreeNode rhs = new ResponseNode(9.0, null);
+ NumericFeatureNode node = new NumericFeatureNode("foo", new DoubleValue(6.9), null, lhs, rhs);
+ assertEquals("foo", node.feature());
+ assertEquals(6.9, node.value().asDouble(), 1E-6);
+ assertSame(lhs, node.left());
+ assertSame(rhs, node.right());
+ }
+
+ @Test
+ public void requireThatRankingExpressionCanBeGenerated() {
+ assertExpression("if (a < 0.0, b, c)", new NumericFeatureNode("a", new DoubleValue(0), null, new MyNode("b"), new MyNode("c")));
+ assertExpression("if (d < 1.0, e, f)", new NumericFeatureNode("d", new DoubleValue(1), null, new MyNode("e"), new MyNode("f")));
+ assertExpression("if (d < 1.0, e, f, 0.5)", new NumericFeatureNode("d", new DoubleValue(1), null, new MyNode("e", 1), new MyNode("f", 1)));
+ assertExpression("if (d < 1.0, e, f, 0.75)", new NumericFeatureNode("d", new DoubleValue(1), null, new MyNode("e", 3), new MyNode("f", 1)));
+ }
+
+ @Test
+ public void requireThatNodeCanBeGeneratedFromDomNode() throws Exception {
+ String xml = "<Node feature='a' value='1'>\n" +
+ " <Response value='2' />\n" +
+ " <Response value='4' />\n" +
+ "</Node>\n";
+ NumericFeatureNode node = (NumericFeatureNode)FeatureNode.fromDom(XmlHelper.parseXml(xml));
+ assertEquals("a", node.feature());
+ assertEquals(1, node.value().asDouble(), 1E-6);
+ assertTrue(node.left() instanceof ResponseNode);
+ assertEquals(2, ((ResponseNode)node.left()).value(), 1E-6);
+ assertTrue(node.right() instanceof ResponseNode);
+ assertEquals(4, ((ResponseNode)node.right()).value(), 1E-6);
+ }
+
+ @Test
+ public void requireThatUnknownNodeThrowsException() throws Exception {
+ String xml = "<Node feature='a' value='1'>\n" +
+ " <Response value='2' />\n" +
+ "</Node>\n";
+ try {
+ TreeNode.fromDom(XmlHelper.parseXml(xml));
+ fail();
+ } catch (IllegalArgumentException e) {
+
+ }
+ xml = "<Node feature='a' value='1'>\n" +
+ " <Response value='2' />\n" +
+ " <Response value='4' />\n" +
+ " <Response value='8' />\n" +
+ "</Node>\n";
+ try {
+ TreeNode.fromDom(XmlHelper.parseXml(xml));
+ fail();
+ } catch (IllegalArgumentException e) {
+
+ }
+ }
+
+ private static void assertExpression(String expected, TreeNode node) {
+ assertEquals(expected, node.toRankingExpression());
+ }
+
+ private static class MyNode extends TreeNode {
+
+ final String str;
+
+ MyNode(String str) {
+ this(str, Optional.empty());
+ }
+
+ MyNode(String str, int samples) {
+ super(Optional.of(samples));
+ this.str = str;
+ }
+
+ MyNode(String str, Optional<Integer> samples) {
+ super(samples);
+ this.str = str;
+ }
+
+ @Override
+ public String toRankingExpression() {
+ return str;
+ }
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/gbdt/ResponseNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/ResponseNodeTestCase.java
new file mode 100644
index 00000000000..7d6022fa304
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/ResponseNodeTestCase.java
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.gbdt;
+
+import org.junit.Test;
+import org.xml.sax.SAXException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import java.io.IOException;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen Hult</a>
+ */
+public class ResponseNodeTestCase {
+
+ @Test
+ public void requireThatAccessorsWork() {
+ ResponseNode node = new ResponseNode(6.9, null);
+ assertEquals(6.9, node.value(), 1E-6);
+ }
+
+ @Test
+ public void requireThatRankingExpressionCanBeGenerated() {
+ assertExpression("0.0", new ResponseNode(0, null));
+ assertExpression("1.0", new ResponseNode(1, null));
+ }
+
+ @Test
+ public void requireThatNodeCanBeGeneratedFromDomNode() throws ParserConfigurationException, IOException, SAXException {
+ String xml = "<Response value='1' />\n";
+ ResponseNode node = ResponseNode.fromDom(XmlHelper.parseXml(xml));
+ assertEquals(1, node.value(), 1E-6);
+ }
+
+ private static void assertExpression(String expected, TreeNode node) {
+ assertEquals(expected, node.toRankingExpression());
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/gbdt/TreeNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/TreeNodeTestCase.java
new file mode 100644
index 00000000000..572bd2d8c11
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/TreeNodeTestCase.java
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.gbdt;
+
+import org.junit.Test;
+import org.xml.sax.SAXException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import java.io.IOException;
+
+import static org.junit.Assert.*;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen Hult</a>
+ */
+public class TreeNodeTestCase {
+
+ @Test
+ public void requireThatFeatureNodeCanBeGeneratedFromDomNode()
+ throws ParserConfigurationException, IOException, SAXException
+ {
+ String xml = "<Node feature='a' value='1'>\n" +
+ " <Response value='2' />\n" +
+ " <Response value='4' />\n" +
+ "</Node>\n";
+ TreeNode obj = TreeNode.fromDom(XmlHelper.parseXml(xml));
+ assertTrue(obj instanceof FeatureNode);
+ NumericFeatureNode node = (NumericFeatureNode)obj;
+ assertEquals("a", node.feature());
+ assertEquals(1, node.value().asDouble(), 1E-6);
+ assertTrue(node.left() instanceof ResponseNode);
+ assertEquals(2, ((ResponseNode)node.left()).value(), 1E-6);
+ assertTrue(node.right() instanceof ResponseNode);
+ assertEquals(4, ((ResponseNode)node.right()).value(), 1E-6);
+ }
+
+ @Test
+ public void requireThatResponseNodeCanBeGeneratedFromDomNode()
+ throws ParserConfigurationException, IOException, SAXException
+ {
+ String xml = "<Response value='1' />\n";
+ TreeNode obj = TreeNode.fromDom(XmlHelper.parseXml(xml));
+ assertTrue(obj instanceof ResponseNode);
+ assertEquals(1, ((ResponseNode)obj).value(), 1E-6);
+ }
+
+ @Test
+ public void requireThatUnknownNodeThrowsException()
+ throws ParserConfigurationException, IOException, SAXException
+ {
+ try {
+ TreeNode.fromDom(XmlHelper.parseXml("<Unknown />"));
+ fail();
+ } catch (UnsupportedOperationException e) {
+ assertEquals("Unknown", e.getMessage());
+ }
+ }
+} \ No newline at end of file
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/gbdt/XmlHelperTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/XmlHelperTestCase.java
new file mode 100644
index 00000000000..7dc7c42f590
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/XmlHelperTestCase.java
@@ -0,0 +1,153 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.gbdt;
+
+import org.junit.Test;
+import org.w3c.dom.Element;
+
+import java.util.List;
+
+import static org.junit.Assert.*;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen Hult</a>
+ */
+public class XmlHelperTestCase {
+
+ @Test
+ public void requireThatAttributeTextCanBeRetrieved() throws Exception {
+ Element node = XmlHelper.parseXml("<element a1='v1' a2='v2' />");
+ assertEquals("v1", XmlHelper.getAttributeText(node, "a1"));
+ assertEquals("v2", XmlHelper.getAttributeText(node, "a2"));
+ }
+
+ @Test
+ public void requireThatMissingAttributeTextThrowsIllegalArgument() throws Exception {
+ try {
+ XmlHelper.getAttributeText(XmlHelper.parseXml("<element />"), "a1");
+ fail();
+ } catch (IllegalArgumentException e) {
+
+ }
+ try {
+ XmlHelper.getAttributeText(XmlHelper.parseXml("<element a1='' />"), "a1");
+ fail();
+ } catch (IllegalArgumentException e) {
+
+ }
+ }
+
+ @Test
+ public void requireThatSingleElementCanBeRetrieved() throws Exception {
+ String xml = "<parent>" +
+ " <child id='a' />" +
+ "</parent>";
+ Element element = XmlHelper.getSingleElement(XmlHelper.parseXml(xml), null);
+ assertNotNull(element);
+ assertEquals("a", XmlHelper.getAttributeText(element, "id"));
+ }
+
+ @Test
+ public void requireThatNamedSingleElementCanBeRetrieved() throws Exception {
+ String xml = "<parent>" +
+ " <bastard id='a' />" +
+ " <child id='b' />" +
+ " <bastard id='c' />" +
+ "</parent>";
+ Element element = XmlHelper.getSingleElement(XmlHelper.parseXml(xml), "child");
+ assertNotNull(element);
+ assertEquals("b", XmlHelper.getAttributeText(element, "id"));
+ }
+
+ @Test
+ public void requireThatMissingSingleElementThrowsIllegalArgument() throws Exception {
+ try {
+ XmlHelper.getSingleElement(XmlHelper.parseXml("<parent />"), null);
+ fail();
+ } catch (IllegalArgumentException e) {
+
+ }
+ }
+
+ @Test
+ public void requireThatMissingNamedSingleElementThrowsIllegalArgument() throws Exception {
+ String xml = "<parent>" +
+ " <bastard id='a' />" +
+ "</parent>";
+ try {
+ XmlHelper.getSingleElement(XmlHelper.parseXml(xml), "child");
+ fail();
+ } catch (IllegalArgumentException e) {
+
+ }
+ }
+
+ @Test
+ public void requireThatAmbigousSingleElementThrowsIllegalArgument() throws Exception {
+ String xml = "<parent>" +
+ " <child id='a' />" +
+ " <child id='b' />" +
+ "</parent>";
+ try {
+ XmlHelper.getSingleElement(XmlHelper.parseXml(xml), null);
+ fail();
+ } catch (IllegalArgumentException e) {
+
+ }
+ }
+
+ @Test
+ public void requireThatAmbigousNamedSingleElementThrowsIllegalArgument() throws Exception {
+ String xml = "<parent>" +
+ " <child id='a' />" +
+ " <bastard id='b' />" +
+ " <child id='c' />" +
+ "</parent>";
+ try {
+ XmlHelper.getSingleElement(XmlHelper.parseXml(xml), "child");
+ fail();
+ } catch (IllegalArgumentException e) {
+
+ }
+ }
+
+ @Test
+ public void requireThatChildElementsCanBeRetrieved() throws Exception {
+ String xml = "<parent>" +
+ " <child id='a' />" +
+ " <child id='b' />" +
+ "</parent>";
+ List<Element> lst = XmlHelper.getChildElements(XmlHelper.parseXml(xml), null);
+ assertNotNull(lst);
+ assertEquals(2, lst.size());
+ assertEquals("a", XmlHelper.getAttributeText(lst.get(0), "id"));
+ assertEquals("b", XmlHelper.getAttributeText(lst.get(1), "id"));
+ }
+
+ @Test
+ public void requireThatNamedChildElementsCanBeRetrieved() throws Exception {
+ String xml = "<parent>" +
+ " <child id='a' />" +
+ " <bastard id='b' />" +
+ " <child id='c' />" +
+ "</parent>";
+ List<Element> lst = XmlHelper.getChildElements(XmlHelper.parseXml(xml), "child");
+ assertNotNull(lst);
+ assertEquals(2, lst.size());
+ assertEquals("a", XmlHelper.getAttributeText(lst.get(0), "id"));
+ assertEquals("c", XmlHelper.getAttributeText(lst.get(1), "id"));
+ }
+
+ @Test
+ public void requireThatChildElementsAreNeverNull() throws Exception {
+ List<Element> lst = XmlHelper.getChildElements(XmlHelper.parseXml("<parent />"), null);
+ assertNotNull(lst);
+ assertTrue(lst.isEmpty());
+ }
+
+ @Test
+ public void requireThatNamedChildElementsAreNeverNull() throws Exception {
+ List<Element> lst = XmlHelper.getChildElements(XmlHelper.parseXml("<parent />"), "child");
+ assertNotNull(lst);
+ assertTrue(lst.isEmpty());
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/CsvFileCaseListTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/CsvFileCaseListTestCase.java
new file mode 100644
index 00000000000..e95af6ad61d
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/CsvFileCaseListTestCase.java
@@ -0,0 +1,81 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga.test;
+
+import com.yahoo.searchlib.mlr.ga.TrainingParameters;
+import com.yahoo.searchlib.mlr.ga.caselist.CsvFileCaseList;
+import com.yahoo.yolean.Exceptions;
+import com.yahoo.searchlib.mlr.ga.TrainingSet;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+/**
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class CsvFileCaseListTestCase {
+
+ private static final double delta = 0.000001;
+
+ @Test
+ public void testLegalFile() {
+ CsvFileCaseList list = new CsvFileCaseList("src/test/files/mlr/cases.csv");
+
+ assertEquals(3,list.cases().size());
+ {
+ TrainingSet.Case case1 = list.cases().get(0);
+ assertEquals(1.0, case1.targetValue(), delta);
+ assertEquals(2, case1.arguments().names().size());
+ assertEquals(2.0, case1.arguments().get("arg1").asDouble(),delta);
+ assertEquals(-1.3, case1.arguments().get("arg2").asDouble(),delta);
+ }
+
+ {
+ TrainingSet.Case case2 = list.cases().get(1);
+ assertEquals(-1.003, case2.targetValue(), delta);
+ assertEquals(1, case2.arguments().names().size());
+ assertEquals(500007, case2.arguments().get("arg1").asDouble(),delta);
+ }
+
+ {
+ TrainingSet.Case case3 = list.cases().get(2);
+ assertEquals(0, case3.targetValue(), delta);
+ assertEquals(1, case3.arguments().names().size());
+ assertEquals(1.0, case3.arguments().get("arg2").asDouble(),delta);
+ }
+
+ TrainingSet trainingSet = new TrainingSet(list, new TrainingParameters());
+ assertEquals(2, trainingSet.argumentNames().size());
+ assertTrue(trainingSet.argumentNames().contains("arg1"));
+ assertTrue(trainingSet.argumentNames().contains("arg2"));
+ }
+
+ @Test
+ public void testNonExistingFile() {
+ try {
+ new CsvFileCaseList("nosuchfile");
+ }
+ catch (IllegalArgumentException e) {
+ assertEquals("Could not create a case list from file 'nosuchfile': nosuchfile (No such file or directory)", Exceptions.toMessageString(e));
+ }
+ }
+
+ @Test
+ public void testInvalidFile1() {
+ try {
+ new CsvFileCaseList("src/test/files/mlr/cases-illegal1.csv");
+ }
+ catch (IllegalArgumentException e) {
+ assertEquals("Could not create a case list from file 'src/test/files/mlr/cases-illegal1.csv': At line 5, element 3: Expected argument on the form 'identifier:double', got ' arg2:'", Exceptions.toMessageString(e));
+ }
+ }
+
+ @Test
+ public void testInvalidFile2() {
+ try {
+ new CsvFileCaseList("src/test/files/mlr/cases-illegal2.csv");
+ }
+ catch (IllegalArgumentException e) {
+ assertEquals("Could not create a case list from file 'src/test/files/mlr/cases-illegal2.csv': At line 2: Expected a target value double at the start of the line, got '5db'", Exceptions.toMessageString(e));
+ }
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/ExampleLearningSessions.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/ExampleLearningSessions.java
new file mode 100644
index 00000000000..fc834181f53
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/ExampleLearningSessions.java
@@ -0,0 +1,110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga.test;
+
+import com.yahoo.searchlib.mlr.ga.PrintingTracker;
+import com.yahoo.searchlib.mlr.ga.RankingExpressionCaseList;
+import com.yahoo.searchlib.mlr.ga.Trainer;
+import com.yahoo.searchlib.mlr.ga.TrainingParameters;
+import com.yahoo.searchlib.mlr.ga.TrainingSet;
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.MapContext;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Main class - drives a learning session from the command line.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class ExampleLearningSessions {
+
+ public static void main(String[] args) throws ParseException {
+ test3();
+ }
+
+ // Always learnt precisely in less than a second
+ private static void test1() throws ParseException {
+ TrainingParameters parameters = new TrainingParameters();
+
+ RankingExpression target = new RankingExpression("2*x");
+ List<Context> arguments = new ArrayList<>();
+ arguments.add(MapContext.fromString("x:0").freeze());
+ arguments.add(MapContext.fromString("x:1").freeze());
+ arguments.add(MapContext.fromString("x:2").freeze());
+ TrainingSet trainingSet = new TrainingSet(new RankingExpressionCaseList(arguments, target), parameters);
+
+ Trainer trainer = new Trainer(trainingSet);
+
+ System.out.println("Learning ...");
+ RankingExpression learntExpression = trainer.train(parameters, new PrintingTracker());
+ }
+
+ // Solved well in a few seconds at most. Slow going thereafter.
+ private static void test2() throws ParseException {
+ TrainingParameters parameters = new TrainingParameters();
+ parameters.setSpeciesLifespan(100); // Shorter lifespan is faster?
+
+ RankingExpression target = new RankingExpression("5*x*x + 2*x + 13");
+ List<Context> arguments = new ArrayList<>();
+ arguments.add(MapContext.fromString("x:0").freeze());
+ arguments.add(MapContext.fromString("x:1").freeze());
+ arguments.add(MapContext.fromString("x:2").freeze());
+ arguments.add(MapContext.fromString("x:3").freeze());
+ arguments.add(MapContext.fromString("x:4").freeze());
+ arguments.add(MapContext.fromString("x:5").freeze());
+ arguments.add(MapContext.fromString("x:6").freeze());
+ arguments.add(MapContext.fromString("x:7").freeze());
+ arguments.add(MapContext.fromString("x:8").freeze());
+ arguments.add(MapContext.fromString("x:9").freeze());
+ arguments.add(MapContext.fromString("x:10").freeze());
+ arguments.add(MapContext.fromString("x:50").freeze());
+ arguments.add(MapContext.fromString("x:500").freeze());
+ arguments.add(MapContext.fromString("x:5000").freeze());
+ arguments.add(MapContext.fromString("x:50000").freeze());
+ TrainingSet trainingSet = new TrainingSet(new RankingExpressionCaseList(arguments, target), parameters);
+
+ Trainer trainer = new Trainer(trainingSet);
+
+ System.out.println("Learning ...");
+ RankingExpression learntExpression = trainer.train(parameters, new PrintingTracker());
+ }
+
+ // Solved well in at most a few minutes
+ private static void test3() throws ParseException {
+ TrainingParameters parameters = new TrainingParameters();
+ parameters.setAllowConditions(false); // disallow non-smooth functions: Speeds up learning of smooth ones greatly
+
+ RankingExpression target = new RankingExpression("-2.7*x*x*x + 5*x*x + 2*x + 13");
+ List<Context> arguments = new ArrayList<>();
+ arguments.add(MapContext.fromString("x:-50000").freeze());
+ arguments.add(MapContext.fromString("x:-5000").freeze());
+ arguments.add(MapContext.fromString("x:-500").freeze());
+ arguments.add(MapContext.fromString("x:-50").freeze());
+ arguments.add(MapContext.fromString("x:-10").freeze());
+ arguments.add(MapContext.fromString("x:0").freeze());
+ arguments.add(MapContext.fromString("x:1").freeze());
+ arguments.add(MapContext.fromString("x:2").freeze());
+ arguments.add(MapContext.fromString("x:3").freeze());
+ arguments.add(MapContext.fromString("x:4").freeze());
+ arguments.add(MapContext.fromString("x:5").freeze());
+ arguments.add(MapContext.fromString("x:6").freeze());
+ arguments.add(MapContext.fromString("x:7").freeze());
+ arguments.add(MapContext.fromString("x:8").freeze());
+ arguments.add(MapContext.fromString("x:9").freeze());
+ arguments.add(MapContext.fromString("x:10").freeze());
+ arguments.add(MapContext.fromString("x:50").freeze());
+ arguments.add(MapContext.fromString("x:500").freeze());
+ arguments.add(MapContext.fromString("x:5000").freeze());
+ arguments.add(MapContext.fromString("x:50000").freeze());
+ TrainingSet trainingSet = new TrainingSet(new RankingExpressionCaseList(arguments, target), parameters);
+
+ Trainer trainer = new Trainer(trainingSet);
+
+ System.out.println("Learning ...");
+ RankingExpression learntExpression = trainer.train(parameters, new PrintingTracker());
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MainTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MainTestCase.java
new file mode 100644
index 00000000000..51460855983
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MainTestCase.java
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga.test;
+
+import com.yahoo.searchlib.mlr.ga.Evolvable;
+import com.yahoo.searchlib.mlr.ga.Main;
+import com.yahoo.searchlib.mlr.ga.PrintingTracker;
+import com.yahoo.searchlib.mlr.ga.Species;
+import com.yahoo.searchlib.mlr.ga.Tracker;
+import com.yahoo.searchlib.mlr.ga.TrainingParameters;
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+import java.util.List;
+
+/**
+ * Tests the main class used from the command line
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class MainTestCase {
+
+ /** Tests that an extremely simple function expressed as cases in a file is learnt perfectly. */
+ @Test
+ public void testMain() {
+ SilentTestTracker tracker = new SilentTestTracker();
+ new Main(new String[] { "src/test/files/mlr/cases-linear.csv"}, tracker);
+ assertTrue(Double.isInfinite(tracker.winner.getFitness()));
+ }
+
+ private static class SilentTestTracker implements Tracker {
+
+ public Evolvable winner;
+
+ @Override
+ public void newSpecies(Species predecessor, int initialSize, List<RankingExpression> genePool) {
+ }
+
+ @Override
+ public void newSpeciesCreated(Species predecessor) {
+ }
+
+ @Override
+ public void speciesCompleted(Species predecessor) {
+ }
+
+ @Override
+ public void iteration(Species species, int generation) {
+ }
+
+ @Override
+ public void result(Evolvable winner) {
+ this.winner = winner;
+ }
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MockTrainingSetTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MockTrainingSetTestCase.java
new file mode 100644
index 00000000000..ab1d5c362b8
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MockTrainingSetTestCase.java
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga.test;
+
+import com.yahoo.searchlib.mlr.ga.RankingExpressionCaseList;
+import com.yahoo.searchlib.mlr.ga.TrainingParameters;
+import com.yahoo.searchlib.mlr.ga.TrainingSet;
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.MapContext;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class MockTrainingSetTestCase {
+
+ @Test
+ public void testMockTrainingSet() throws ParseException {
+ RankingExpression target = new RankingExpression("2*x");
+ List<Context> arguments = new ArrayList<>();
+ arguments.add(MapContext.fromString("x:0"));
+ arguments.add(MapContext.fromString("x:1"));
+ arguments.add(MapContext.fromString("x:2"));
+ TrainingSet trainingSet = new TrainingSet(new RankingExpressionCaseList(arguments, target), new TrainingParameters());
+ assertTrue(Double.isInfinite(trainingSet.evaluate(new RankingExpression("2*x"))));
+ assertEquals(4.0, trainingSet.evaluate(new RankingExpression("x")), 0.001);
+ assertEquals(0.0, trainingSet.evaluate(new RankingExpression("x/x")), 0.001);
+ }
+
+ @Test
+ public void testEvaluation() throws ParseException {
+ // with freezing
+ assertEquals(16.0,new RankingExpression("2*x*x*x").evaluate(MapContext.fromString("x:2").freeze()).asDouble(),0.0001);
+ assertEquals(8.0,new RankingExpression("x*x+x*x").evaluate(MapContext.fromString("x:2").freeze()).asDouble(),0.0001);
+
+ // without freezing
+ assertEquals(16.0,new RankingExpression("2*x*x*x").evaluate(MapContext.fromString("x:2")).asDouble(),0.0001);
+ assertEquals(8.0,new RankingExpression("x*x+x*x").evaluate(MapContext.fromString("x:2")).asDouble(),0.0001);
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/TripAdvisorFileCaseList.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/TripAdvisorFileCaseList.java
new file mode 100644
index 00000000000..9c3e514ddad
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/TripAdvisorFileCaseList.java
@@ -0,0 +1,99 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.ga.test;
+
+import com.yahoo.searchlib.mlr.ga.CaseList;
+import com.yahoo.searchlib.mlr.ga.TrainingSet;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.MapContext;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.*;
+
+/**
+ * Reads a tripadvisor Kaggle challenge training set
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class TripAdvisorFileCaseList implements CaseList {
+
+ private List<TrainingSet.Case> cases = new ArrayList<>();
+ private Map<Integer,String> columnNames = new HashMap<>();
+
+ /**
+ * Reads a case list from file.
+ *
+ * @throws IllegalArgumentException if the file could not be found or opened
+ */
+ public TripAdvisorFileCaseList(String fileName) throws IllegalArgumentException {
+ System.out.print("Reading training data ");
+ try (BufferedReader reader = new BufferedReader(new FileReader(fileName))) {
+ String line;
+ readColumnNames(reader.readLine());
+ int lineNumber=1;
+ while (null != (line=reader.readLine())) {
+ lineNumber++;
+ line = line.trim();
+ if (line.startsWith("#")) continue;
+ if (line.isEmpty()) continue;
+ cases.add(lineToCase(line, lineNumber));
+ }
+ }
+ catch (IOException | IllegalArgumentException e) {
+ throw new IllegalArgumentException("Could not create a case list from file '" + fileName + "'", e);
+ }
+ System.out.println("done");
+ }
+
+ private void readColumnNames(String line) {
+ int columnNumber = 0;
+ for (String columnName : line.split(","))
+ columnNames.put(columnNumber++, columnName);
+ }
+
+ protected TrainingSet.Case lineToCase(String line, int lineNumber) {
+ if ((lineNumber % 10000) ==0)
+ System.out.print(".");
+
+ Map<String,Double> columnValues = readColumns(line);
+
+ double targetValue = columnValues.get("click_bool") + columnValues.get("booking_bool")*5;
+
+ Context context = new MapContext();
+ for (Map.Entry<String,Double> value : columnValues.entrySet()) {
+ if (value.getKey().equals("click_bool")) continue;
+ if (value.getKey().equals("gross_bookings_usd")) continue;
+ if (value.getKey().equals("booking_bool")) continue;
+ context.put(value.getKey(),value.getValue());
+ }
+ return new TrainingSet.Case(context, targetValue);
+ }
+
+ private Map<String, Double> readColumns(String line) {
+ Map<String,Double> columnValues = new LinkedHashMap<>();
+ int columnNumber = 0;
+ for (String valueString : line.split(",")) {
+ String columnName = columnNames.get(columnNumber++);
+ if (columnName.equals("date_time")) continue;
+ Double columnValue;
+ if (valueString.equals("NULL")) {
+ columnValue = 0.0;
+ }
+ else {
+ try {
+ columnValue = Double.parseDouble(valueString);
+ }
+ catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Could not parse column '" + columnName + "'",e);
+ }
+ }
+ columnValues.put(columnName, columnValue);
+ }
+ return columnValues;
+ }
+
+ @Override
+ public List<TrainingSet.Case> cases() { return Collections.unmodifiableList(cases); }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysisRunner.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysisRunner.java
new file mode 100644
index 00000000000..301fdfcd4f2
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysisRunner.java
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.mlr.gbdt;
+
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * Run an expression analyser without having to muck with classpath.
+ *
+ * @author bratseth
+ */
+public class ExpressionAnalysisRunner {
+
+ @Test @Ignore
+ public void runAnalysis() {
+ ExpressionAnalysis.main(new String[] { "/Users/bratseth/Downloads/getty_mlr_001.expression"});
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/ElementCompletenessTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/ElementCompletenessTestCase.java
new file mode 100644
index 00000000000..804f34ccce8
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/ElementCompletenessTestCase.java
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * @author bratseth
+ */
+public class ElementCompletenessTestCase {
+
+ private static final double delta = 0.0001;
+
+ @Test
+ public void testElementCompleteness1() {
+ Map<String, Integer> query = createQuery();
+ ElementCompleteness.Item[] items = createField(1);
+
+ Features f = ElementCompleteness.compute(query, items);
+ assertEquals(0.26111111111111107, f.get("completeness").asDouble(), delta);
+ assertEquals(1.0, f.get("fieldCompleteness").asDouble(), delta);
+ assertEquals(0.2222222222222222, f.get("queryCompleteness").asDouble(), delta);
+ assertEquals(3.0, f.get("elementWeight").asDouble(), delta);
+ }
+
+ @Test
+ public void testElementCompleteness2() {
+ Map<String, Integer> query = createQuery();
+ ElementCompleteness.Item[] items = createField(2);
+
+ Features f = ElementCompleteness.compute(query, items);
+ assertEquals(0.975, f.get("completeness").asDouble(), delta);
+ assertEquals(0.5, f.get("fieldCompleteness").asDouble(), delta);
+ assertEquals(1.0, f.get("queryCompleteness").asDouble(), delta);
+ assertEquals(4.0, f.get("elementWeight").asDouble(), delta);
+ }
+
+ @Test
+ public void testElementCompleteness3() {
+ Map<String, Integer> query = createQuery();
+ ElementCompleteness.Item[] items = createField(3);
+
+ Features f = ElementCompleteness.compute(query, items);
+ assertEquals(1.0, f.get("completeness").asDouble(), delta);
+ assertEquals(1.0, f.get("fieldCompleteness").asDouble(), delta);
+ assertEquals(1.0, f.get("queryCompleteness").asDouble(), delta);
+ assertEquals(5.0, f.get("elementWeight").asDouble(), delta);
+ }
+
+ @Test
+ public void testElementCompletenessNoMatches() {
+ ElementCompleteness.Item[] items = createField(3);
+
+ Features f = ElementCompleteness.compute(new HashMap<String, Integer>(), items);
+ assertEquals(0.0, f.get("completeness").asDouble(), delta);
+ assertEquals(0.0, f.get("fieldCompleteness").asDouble(), delta);
+ assertEquals(0.0, f.get("queryCompleteness").asDouble(), delta);
+ assertEquals(0.0, f.get("elementWeight").asDouble(), delta);
+ }
+
+ private Map<String, Integer> createQuery() {
+ Map<String, Integer> query = new HashMap<>();
+ query.put("a", 100);
+ query.put("b", 150);
+ query.put("c", 200);
+ return query;
+ }
+
+ private ElementCompleteness.Item[] createField(int size) {
+ ElementCompleteness.Item[] items = new ElementCompleteness.Item[size];
+ if (size > 0) items[0] = new ElementCompleteness.Item("a", 3); // qc: 100/450=0.22, fc: 1.0, c: 0.611
+ if (size > 1) items[1] = new ElementCompleteness.Item("a b c d e f", 4); // qc: 1.0, fc: 0.5, c: 0.75
+ if (size > 2) items[2] = new ElementCompleteness.Item("a b c", 5);
+ return items;
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/FieldTermMatchTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/FieldTermMatchTestCase.java
new file mode 100644
index 00000000000..61c313956c5
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/FieldTermMatchTestCase.java
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features;
+
+import static org.junit.Assert.*;
+
+import org.junit.Test;
+
+/**
+ * @author bratseth
+ */
+public class FieldTermMatchTestCase {
+
+ private static final double delta = 0.0001;
+
+ @Test
+ public void testFieldTermMatch() {
+ assertEquals(1.0, FieldTermMatch.compute("a", "a b c").get("occurrences").asDouble(), delta);
+ assertEquals(0.0, FieldTermMatch.compute("a", "a b c").get("firstPosition").asDouble(), delta);
+
+ assertEquals(3.0, FieldTermMatch.compute("a", "a a a").get("occurrences").asDouble(), delta);
+ assertEquals(0.0, FieldTermMatch.compute("a", "a a a").get("firstPosition").asDouble(), delta);
+
+ assertEquals(0.0, FieldTermMatch.compute("d", "a b c").get("occurrences").asDouble(), delta);
+ assertEquals(1000000.0, FieldTermMatch.compute("d", "a b c").get("firstPosition").asDouble(), delta);
+
+ assertEquals(0.0, FieldTermMatch.compute("d", "").get("occurrences").asDouble(), delta);
+ assertEquals(1000000, FieldTermMatch.compute("d", "").get("firstPosition").asDouble(), delta);
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/SemanticDistanceTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/SemanticDistanceTestCase.java
new file mode 100644
index 00000000000..14ea58961ba
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/SemanticDistanceTestCase.java
@@ -0,0 +1,140 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features.fieldmatch;
+
+import com.yahoo.searchlib.ranking.features.fieldmatch.FieldMatchMetricsComputer;
+
+import java.util.Set;
+import java.util.HashSet;
+
+/**
+ * The "semantic distance" refers to the non-continuous distance from a token
+ * to the next token used by the string match metrics algorithm. This class
+ * tests invariants which must hold for any such distance metric as well as specifics
+ * for the currently used distance metric.
+ *
+ * @author bratseth
+ */
+public class SemanticDistanceTestCase extends junit.framework.TestCase {
+
+ FieldMatchMetricsComputer c;
+
+ public SemanticDistanceTestCase(String name) {
+ super(name);
+ }
+
+ public @Override void setUp() {
+ c=new FieldMatchMetricsComputer();
+ StringBuilder field=new StringBuilder();
+ for (int i=0; i<150; i++)
+ field.append("t" + i + " ");
+ c.compute("query",field.toString()); // Just to set the field value
+ }
+
+ /** Must be true using any semantic distance function */
+ public void testBothWayConversionProducesOriginalValue() {
+ assertBothWayConversionProducesOriginalValue(50);
+ assertBothWayConversionProducesOriginalValue(10);
+ assertBothWayConversionProducesOriginalValue(5);
+ assertBothWayConversionProducesOriginalValue(0);
+ assertBothWayConversionProducesOriginalValue(140);
+ assertBothWayConversionProducesOriginalValue(145);
+ assertBothWayConversionProducesOriginalValue(149);
+ }
+
+ /** Must be true using any semantic distance function */
+ public void testFunctionsAreOneToOne() {
+ assertFunctionsAreOneToOne(50);
+ assertFunctionsAreOneToOne(10);
+ assertFunctionsAreOneToOne(5);
+ assertFunctionsAreOneToOne(0);
+ assertFunctionsAreOneToOne(140);
+ assertFunctionsAreOneToOne(145);
+ assertFunctionsAreOneToOne(149);
+ }
+
+ /** Specific to this particular distance function */
+ public void testFunction() {
+ int zeroJ=50;
+ assertFunction(50,0,zeroJ);
+ assertFunction(59,9,zeroJ);
+ assertFunction(49,10,zeroJ);
+ assertFunction(40,19,zeroJ);
+ assertFunction(60,20,zeroJ);
+ assertFunction(149,109,zeroJ);
+ assertFunction(39,110,zeroJ);
+ assertFunction(0,149,zeroJ);
+
+ zeroJ=0;
+ assertFunction(0,0,zeroJ);
+ assertFunction(10,10,zeroJ);
+ assertFunction(20,20,zeroJ);
+ assertFunction(149,149,zeroJ);
+
+ zeroJ=5;
+ assertFunction(5,0,zeroJ);
+ assertFunction(10,5,zeroJ);
+ assertFunction(14,9,zeroJ);
+ assertFunction(4,10,zeroJ);
+ assertFunction(0,14,zeroJ);
+ assertFunction(15,15,zeroJ);
+ assertFunction(25,25,zeroJ);
+ assertFunction(149,149,zeroJ);
+
+ zeroJ=149;
+ assertFunction(149,0,zeroJ);
+ assertFunction(140,9,zeroJ);
+ assertFunction(130,19,zeroJ);
+ assertFunction(0,149,zeroJ);
+
+ zeroJ=145;
+ assertFunction(145,0,zeroJ);
+ assertFunction(149,4,zeroJ);
+ assertFunction(144,5,zeroJ);
+ assertFunction(140,9,zeroJ);
+ assertFunction(135,14,zeroJ);
+ assertFunction(125,24,zeroJ);
+ assertFunction(0,149,zeroJ);
+ }
+
+ /** Hits both limits at once */
+ public void testSmallField() {
+ c=new FieldMatchMetricsComputer();
+ c.compute("query","my field value four"); // Just to set the field value
+ assertBothWayConversionProducesOriginalValue(2);
+ assertBothWayConversionProducesOriginalValue(0);
+ assertBothWayConversionProducesOriginalValue(3);
+ assertFunctionsAreOneToOne(2);
+ assertFunctionsAreOneToOne(0);
+ assertFunctionsAreOneToOne(3);
+
+ int zeroJ=2;
+ assertFunction(2,0,zeroJ);
+ assertFunction(3,1,zeroJ);
+ assertFunction(1,2,zeroJ);
+ assertFunction(0,3,zeroJ);
+ }
+
+ private void assertBothWayConversionProducesOriginalValue(int zeroJ) {
+ // Starting point in the middle
+ for (int j=0; j<c.getField().terms().size(); j++) {
+ int semanticDistance=c.fieldIndexToSemanticDistance(j,zeroJ);
+ assertTrue("Using zeroJ=" + zeroJ + ": " + semanticDistance +">=0", semanticDistance >= 0);
+ int backConvertedJ=c.semanticDistanceToFieldIndex(semanticDistance,zeroJ);
+ assertEquals("Using zeroJ=" + zeroJ + ": " + j + "->" + semanticDistance + "->" + backConvertedJ,j, backConvertedJ);
+ }
+ }
+
+ private void assertFunctionsAreOneToOne(int zeroJ) {
+ Set<Integer> distances=new HashSet<Integer>();
+ for (int j=0; j<c.getField().terms().size(); j++) {
+ int semanticDistance=c.fieldIndexToSemanticDistance(j,zeroJ);
+ assertTrue("Using zeroJ=" + zeroJ + ": " + j + "->" + semanticDistance + " is unique", ! distances.contains(semanticDistance));
+ distances.add(semanticDistance);
+ }
+ }
+
+ private void assertFunction(int j,int semanticDistance,int zeroJ) {
+ assertEquals(j + "->" + semanticDistance,semanticDistance,c.fieldIndexToSemanticDistance(j,zeroJ));
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/OptimalStringAlignmentDistance.java b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/OptimalStringAlignmentDistance.java
new file mode 100644
index 00000000000..272ca98d7c4
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/OptimalStringAlignmentDistance.java
@@ -0,0 +1,201 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features.fieldmatch.reference;
+
+import java.util.Arrays;
+
+/**
+ * Implementation of optimal string alignment distance which also retains the four subdistances
+ * and which uses 2*query length memory rather than field length*query length.
+ * This class is not thread safe.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class OptimalStringAlignmentDistance {
+
+ /** The cell containg the last calculated edit distance */
+ private Cell value=new Cell(0,0,0,0);
+
+ // Temporary variables
+ private Cell[] thisRow, previousRow, previousPreviousRow;
+
+ private String[] query, field;
+
+ private boolean printTable=false;
+
+ public void calculate(String queryString,String fieldString) {
+ this.query=queryString.split(" ");
+ this.field=fieldString.split(" ");
+
+ thisRow=new Cell[query.length+1];
+ previousRow=new Cell[query.length+1];
+ previousPreviousRow=new Cell[query.length+1];
+
+ for(int i=0; i<=query.length; i++) {
+ thisRow[i]=new Cell(i+1,0,0,0);
+ previousRow[i]=new Cell(i,0,0,0);
+ previousPreviousRow[i]=new Cell(i-1,0,0,0);
+ }
+
+ print(previousRow);
+
+ for(int j=1;j<=field.length; j++) {
+ thisRow[0].setTo(0,j,0,0);
+ for(int i=1; i<=query.length; i++) {
+ setCell(i,j);
+ }
+
+ print(thisRow);
+
+ // Shift round thisRow -> previousRow -> previousPreviousRow -> thisRow
+ Cell[] temporaryRow=thisRow;
+ thisRow=previousPreviousRow;
+ previousPreviousRow=previousRow;
+ previousRow=temporaryRow;
+ }
+ value=previousRow[query.length];
+ }
+
+ private void setCell(int i,int j) {
+ Cell thisCell=thisRow[i];
+ Cell left=thisRow[i-1];
+ Cell above=previousRow[i];
+ Cell leftAbove=previousRow[i-1];
+
+ boolean substitution=!query[i-1].equals(field[j-1]);
+
+ int leftCost=left.getTotal()+1;
+ int aboveCost=above.getTotal()+1;
+ int leftAboveCost=leftAbove.getTotal() + ( substitution ? 1 : 0 );
+
+ if (leftCost<=aboveCost && leftCost<=leftAboveCost) {
+ thisCell.setTo(left);
+ thisCell.addDeletion();
+ }
+ else if (aboveCost<=leftCost && aboveCost<=leftAboveCost) {
+ thisCell.setTo(above);
+ thisCell.addInsertion();
+ }
+ else {
+ thisCell.setTo(leftAbove);
+ if (substitution)
+ thisCell.addSubstitution();
+ }
+
+ if (i>1 && j>1 && query[i-1].equals(field[j-2]) && query[i-2].equals(field[j-1]) ) {
+ Cell twoAboveAndLeft=previousPreviousRow[i-2];
+ int transpositionCost= + ( substitution ? 1 : 0);
+ if (transpositionCost<thisCell.getTotal()) {
+ thisCell.setTo(twoAboveAndLeft);
+ thisCell.addTransposition();
+ }
+ }
+ }
+
+ private void setCell(Cell thisCell,Cell left, Cell above, Cell leftAbove, boolean substitution) {
+ int a=left.getTotal()+1;
+ int b=above.getTotal()+1;
+ int c=leftAbove.getTotal();
+
+ c+=substitution ? 1 : 0;
+
+ if (a<=b && a<=c) {
+ thisCell.setTo(left);
+ thisCell.addDeletion();
+ }
+ else if (b<=a && b<=c) {
+ thisCell.setTo(above);
+ thisCell.addInsertion();
+ }
+ else {
+ thisCell.setTo(leftAbove);
+ if (substitution)
+ thisCell.addSubstitution();
+ }
+ /*
+ if(i > 1 and j > 1 and str1[i] = str2[j-1] and str1[i-1] = str2[j]) then
+ d[i, j] := minimum(
+ d[i, j],
+ d[i-2, j-2] + cost // transposition
+ )
+ */
+ }
+
+ public float getTotal() { return value.getTotal(); }
+ public float getSubstitutions() { return value.getSubstitutions(); }
+ public float getDeletions() { return value.getDeletions(); }
+ public float getInsertions() { return value.getInsertions(); }
+ public float getTranspositions() { return value.getTranspositions(); }
+
+ /** Print the calculated edit distance table as we go */
+ public void setPrintTable(boolean printTable) {
+ this.printTable=printTable;
+ }
+
+ private void print(Cell[] row) {
+ if (!printTable) return;
+ for (Cell cell : row) {
+ System.out.print(cell.toShortString());
+ System.out.print(" ");
+ }
+ System.out.println();
+ }
+
+ /** Returns the current state as a string */
+ public String toString() {
+ StringBuffer b=new StringBuffer();
+ b.append("Query: " + Arrays.toString(query) + "\n");
+ b.append("Field: " + Arrays.toString(field) + "\n");
+ b.append(value);
+ return b.toString();
+ }
+
+ /** An edit distance table cell */
+ public static final class Cell {
+
+ private int deletions, insertions, substitutions, transpositions;
+
+ public Cell(int deletions,int insertions,int substitutions,int transpositions) {
+ setTo(deletions,insertions,substitutions,transpositions);
+ }
+
+ public void setTo(Cell cell) {
+ this.deletions=cell.deletions;
+ this.insertions=cell.insertions;
+ this.substitutions=cell.substitutions;
+ this.transpositions=cell.transpositions;
+ }
+
+ public void setTo(int deletions,int insertions,int substitutions,int transpositions) {
+ this.deletions=deletions;
+ this.insertions=insertions;
+ this.substitutions=substitutions;
+ this.transpositions=transpositions;
+ }
+
+ public int getTotal() {
+ return deletions+insertions+substitutions+transpositions;
+ }
+
+ public void addDeletion() { deletions++; }
+ public void addInsertion() { insertions++; }
+ public void addSubstitution() { substitutions++; }
+ public void addTransposition() { transpositions++; }
+
+ public int getDeletions() { return deletions; }
+ public int getInsertions() { return insertions; }
+ public int getSubstitutions() { return substitutions; }
+ public int getTranspositions() { return transpositions; }
+
+ public String toString() {
+ return "Total: " + getTotal() + ", substitutions: " + substitutions + ", deletions: " +
+ deletions + ", insertions: " + insertions + ", transpositions: " + transpositions + "\n";
+ }
+
+ public String toShortString() {
+ return "(" + substitutions + "," + deletions + "," + insertions + "," + transpositions + ")";
+ }
+
+
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/TextbookLevenshteinDistance.java b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/TextbookLevenshteinDistance.java
new file mode 100644
index 00000000000..5ad3449a9d3
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/TextbookLevenshteinDistance.java
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features.fieldmatch.reference;
+
+/**
+ * Textbook implementation from
+ * <a href="http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#Java">
+ * Wikipedia algorithms</a>
+ * Licensed under the Creative Commons Attribution-ShareAlike License
+ */
+public class TextbookLevenshteinDistance {
+
+ private static int minimum(int a, int b, int c){
+ if (a<=b && a<=c)
+ return a;
+ if (b<=a && b<=c)
+ return b;
+ return c;
+ }
+
+ public static int computeLevenshteinDistance(char[] str1, char[] str2) {
+ int[][] distance = new int[str1.length+1][];
+
+ for(int i=0; i<=str1.length; i++){
+ distance[i] = new int[str2.length+1];
+ distance[i][0] = i;
+ }
+ for(int j=0; j<=str2.length; j++)
+ distance[0][j]=j;
+
+ for(int i=1; i<=str1.length; i++)
+ for(int j=1;j<=str2.length; j++)
+ distance[i][j]= minimum(distance[i-1][j]+1, distance[i][j-1]+1,
+ distance[i-1][j-1]+((str1[i-1]==str2[j-1])?0:1));
+
+ return distance[str1.length][str2.length];
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/test/OptimalStringAlignmentTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/test/OptimalStringAlignmentTestCase.java
new file mode 100644
index 00000000000..398c4e70fb7
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/test/OptimalStringAlignmentTestCase.java
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features.fieldmatch.reference.test;
+
+import com.yahoo.searchlib.ranking.features.fieldmatch.reference.OptimalStringAlignmentDistance;
+
+/**
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class OptimalStringAlignmentTestCase extends junit.framework.TestCase {
+
+ public OptimalStringAlignmentTestCase(String name) {
+ super(name);
+ }
+
+ public void testEditDistance() {
+ // Edit distance, substitution, deletion, insertion, transposition, query, field, print?
+
+ boolean print=false;
+ assertEditDistance(0,0,0,0,0,"niels bohr","niels bohr",print);
+ assertEditDistance(1,1,0,0,0,"niels","bohr",print);
+ assertEditDistance(1,0,0,1,0,"niels","niels bohr",print);
+ assertEditDistance(1,0,1,0,0,"niels bohr","bohr",print);
+ assertEditDistance(1,0,0,0,1,"niels bohr","bohr niels",print);
+ assertEditDistance(1,0,0,1,0,"niels bohr","niels henrik bohr",print);
+ assertEditDistance(2,0,0,1,1,"niels bohr","bohr niels henrik",print);
+ assertEditDistance(4,1,0,3,0,"niels bohr","niels henrik bor i kopenhagen",print);
+ assertEditDistance(3,2,0,1,0,"niels bohr i kopenhagen","niels henrik bor i stockholm",print);
+ }
+
+ public void testEditDistanceAsRelevance() {
+ boolean print=false;
+ assertEditDistance(2,0,0,2,0,"niels bohr","niels blah blah bohr",print);
+ assertEditDistance(4,0,1,3,0,"niels bohr","bohr blah blah niels",print); // Not desired
+ assertEditDistance(4,2,0,2,0,"niels bohr","koko blah blah bahia",print);
+ }
+
+ private void assertEditDistance(int total,int substitution,int deletion,int insertion,int transposition,String query,String field,boolean printResult) {
+ assertEditDistance(total,substitution,deletion,insertion,transposition,query,field,printResult,false);
+ }
+
+ private void assertEditDistance(int total,int substitution,int deletion,int insertion,int transposition,String query,String field,boolean printResult,boolean printTable) {
+ OptimalStringAlignmentDistance e=new OptimalStringAlignmentDistance();
+ e.setPrintTable(printTable);
+ e.calculate(query,field);
+
+ if (printResult) {
+ System.out.print(e.toString());
+ System.out.println();
+ }
+
+ assertEquals("Substitutions",(float)substitution,e.getSubstitutions());
+ assertEquals("Deletions",(float)deletion,e.getDeletions());
+ assertEquals("Insertions",(float)insertion,e.getInsertions());
+ assertEquals("Transpositions",(float)transposition,e.getTranspositions());
+ assertEquals("Total",(float)total,e.getTotal());
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/test/FieldMatchMetricsTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/test/FieldMatchMetricsTestCase.java
new file mode 100644
index 00000000000..ef8daec2b73
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/test/FieldMatchMetricsTestCase.java
@@ -0,0 +1,757 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.ranking.features.fieldmatch.test;
+
+import com.google.common.collect.ImmutableList;
+import com.yahoo.searchlib.ranking.features.fieldmatch.Field;
+import com.yahoo.searchlib.ranking.features.fieldmatch.FieldMatchMetrics;
+import com.yahoo.searchlib.ranking.features.fieldmatch.FieldMatchMetricsComputer;
+import com.yahoo.searchlib.ranking.features.fieldmatch.FieldMatchMetricsParameters;
+import com.yahoo.searchlib.ranking.features.fieldmatch.QueryTerm;
+import com.yahoo.searchlib.ranking.features.fieldmatch.Query;
+
+import java.util.List;
+
+/**
+ * Tests of calculation of all the string match metrics.
+ * Add true as the fourth parameter to assertMetrics to see a trace of what the test is doing.
+ *
+ * @author bratseth
+ */
+public class FieldMatchMetricsTestCase extends junit.framework.TestCase {
+
+ public FieldMatchMetricsTestCase(String name) {
+ super(name);
+ }
+
+ public void testOutOfOrder() {
+ assertMetrics("outOfOrder:0","a","a");
+ assertMetrics("outOfOrder:0","a b c","a b c");
+ assertMetrics("outOfOrder:1","a b c","a c b");
+ assertMetrics("outOfOrder:2","a b c","c b a");
+ assertMetrics("outOfOrder:2","a b c d e","c x a b x x x x x e x x d");
+ assertMetrics("outOfOrder:2","a b c d e","c x a b x x x x x e x x d");
+ assertMetrics("outOfOrder:2", "a b c d e", "c x a b x x x x x e x x d");
+ }
+
+ public void testSegments() {
+ assertMetrics("segments:1","a","a");
+ assertMetrics("segments:1","a b c","a b c");
+ assertMetrics("segments:1","a b c","a x x b c");
+ assertMetrics("segments:2","a b c","a x x x x x x x x x x x x x x x x x x x b c");
+ assertMetrics("segments:2","a b c","b c x x x x x x x x x x x x x x x x x x x a");
+ assertMetrics("segments:2 gaps:1","a b c","x x x a x x x x x x x x x x x x x x x x x x x b x x c x x");
+ assertMetrics("segments:2 gaps:0 outOfOrder:0","a b c","b c x x x x x x x x x x x x x x x x x x x a");
+ assertMetrics("segments:2 gaps:1","a b c","x x x b x x c x x x x x x x x x x x x x x x x x x x a x x");
+ assertMetrics("segments:2 gaps:1","a y y b c","x x x b x x c x x x x x x x x x x x x x x x x x x x a x x");
+ }
+
+ public void testGaps() {
+ assertMetrics("gaps:0","a","a");
+ assertMetrics("gaps:0","x�a","a");
+ assertMetrics("gaps:0 gapLength:0","a b c","a b c");
+ assertMetrics("gaps:1 gapLength:1","a b","b a");
+ assertMetrics("gaps:1 gapLength:1","a b c","a x b c");
+ assertMetrics("gaps:1 gapLength:3","a b c","a x X Xb c");
+ assertMetrics("gaps:2 gapLength:2 outOfOrder:1","a b c","a c b");
+ assertMetrics("gaps:2 gapLength:2 outOfOrder:0","a b c","a x b x c");
+ assertMetrics("gaps:2 gapLength:5 outOfOrder:1","a b c","a x c x b");
+ assertMetrics("gaps:3 outOfOrder:2 segments:1","a b c d e","x d x x b c x x a e");
+ assertMetrics("gaps:0","y a b c","a b c x");
+ }
+
+ public void testHead() {
+ assertMetrics("head:0","a","a");
+ assertMetrics("head:0","y","a");
+ assertMetrics("head:1","a","x a");
+ assertMetrics("head:2","a b c","x x a b c");
+ assertMetrics("head:2","a b c","x x c x x a b");
+ assertMetrics("head:2", "a b c", "x x c x x x x x x x x x x x x x x x a b");
+ }
+
+ public void testTail() {
+ assertMetrics("tail:0","a","a");
+ assertMetrics("tail:0","y","a");
+ assertMetrics("tail:1","a","a x");
+ assertMetrics("tail:2","a b c","a b c x x");
+ assertMetrics("tail:2","a b c","x x x c x x x x a b x x");
+ assertMetrics("tail:0","a b c","x x c x x x x x x x x x x x x x x x a b");
+ }
+
+ public void testLongestSequence() {
+ assertMetrics("longestSequence:1","a","a");
+ assertMetrics("longestSequence:1","a","a b c");
+ assertMetrics("longestSequence:1","b","a b c");
+ assertMetrics("longestSequence:3","a b c","x x a b c x x a b x");
+ assertMetrics("longestSequence:3 segments:1","a b c","x x a b x x a b c x");
+ assertMetrics("longestSequence:2","a b c d","x x c d x x a b x");
+ assertMetrics("longestSequence:2","a b c d","x x a b x c d x x");
+ assertMetrics("longestSequence:2","a b c d","x x a b x x x x x x x x x x x x x x x x x c d x x");
+ assertMetrics("longestSequence:4 segments:1","a b c d","x x a b x x x x x x x x x x x x x x x x x c d x x a b c d");
+ }
+
+ public void testMatches() {
+ assertMetrics("matches:1 queryCompleteness:1 fieldCompleteness:1", "a","a");
+ assertMetrics("matches:3 queryCompleteness:1 fieldCompleteness:1", "a b c","a b c");
+ assertMetrics("matches:3 queryCompleteness:1 fieldCompleteness:0.5", "a b c","a b c a b d");
+ assertMetrics("matches:3 queryCompleteness:0.5 fieldCompleteness:0.25","a y y b c y","a x x b c x a x a b x x");
+ }
+
+ public void testCompleteness() {
+ assertMetrics("completeness:1 queryCompleteness:1 fieldCompleteness:1", "a","a");
+ assertMetrics("completeness:0 queryCompleteness:0 fieldCompleteness:0", "a","x");
+ assertMetrics("completeness:0 queryCompleteness:0 fieldCompleteness:0", "y","a");
+ assertMetrics("completeness:0.975 queryCompleteness:1 fieldCompleteness:0.5","a","a a");
+ assertMetrics("completeness:0.525 queryCompleteness:0.5 fieldCompleteness:1", "a a","a");
+ assertMetrics("completeness:1 queryCompleteness:1 fieldCompleteness:1", "a b c","a b c");
+ assertMetrics("completeness:0.525 queryCompleteness:0.5 fieldCompleteness:1", "a b c d","a b");
+ assertMetrics("completeness:0.975 queryCompleteness:1 fieldCompleteness:0.5","a b","a b c d");
+ assertMetrics("completeness:0.97 queryCompleteness:1 fieldCompleteness:0.4","a b","a b c d e");
+ assertMetrics("completeness:0.97 queryCompleteness:1 fieldCompleteness:0.4","a b","a b b b b");
+ }
+
+ public void testOrderness() {
+ assertMetrics("orderness:1", "a","a");
+ assertMetrics("orderness:1", "a","x");
+ assertMetrics("orderness:0", "a a a","a"); // Oh well...
+ assertMetrics("orderness:1", "a","a a a");
+ assertMetrics("orderness:0", "a b","b a");
+ assertMetrics("orderness:0.5","a b c","b a c");
+ assertMetrics("orderness:0.5","a b c d","c b d x x x x x x x x x x x x x x x x x x x x x a");
+ assertMetrics("orderness:1", "a b","b x x x x x x x x x x x x x x x x x x x x x a");
+ }
+
+ public void testRelatedness() {
+ assertMetrics("relatedness:1", "a","a");
+ assertMetrics("relatedness:0", "a","x");
+ assertMetrics("relatedness:1", "a b","a b");
+ assertMetrics("relatedness:1", "a b c","a b c");
+ assertMetrics("relatedness:0.5","a b c","a b x x x x x x x x x x x x x x x x x x x x x x x c");
+ assertMetrics("relatedness:0.5","a y b y y y c","a b x x x x x x x x x x x x x x x x x x x x x x x c");
+ }
+
+ public void testLongestSequenceRatio() {
+ assertMetrics("longestSequenceRatio:1", "a","a");
+ assertMetrics("longestSequenceRatio:0", "a","x");
+ assertMetrics("longestSequenceRatio:1", "a a","a");
+ assertMetrics("longestSequenceRatio:1", "a","a a");
+ assertMetrics("longestSequenceRatio:1", "a b","a b");
+ assertMetrics("longestSequenceRatio:1", "a y"," a x");
+ assertMetrics("longestSequenceRatio:0.5","a b","a x b");
+ assertMetrics("longestSequenceRatio:0.75","a b c d","x x a b x a x c d a b c x d x");
+ }
+
+ public void testEarliness() {
+ assertMetrics("earliness:1", "a","a");
+ assertMetrics("earliness:0", "a","x");
+ assertMetrics("earliness:1", "a","a a a");
+ assertMetrics("earliness:1", "a a a","a");
+ assertMetrics("earliness:0.8", "b","a b c");
+ assertMetrics("earliness:0.8", "b","a b");
+ assertMetrics("earliness:0.9091","a b c","x b c x x x x x a x x x");
+ assertMetrics("earliness:0.2", "a b c","x b c a x x x x a x x x x x x x a b c x x");
+ }
+
+ public void testWeight() {
+ assertMetrics("weight:1", "a","a");
+ assertMetrics("weight:0", "y","a");
+ assertMetrics("weight:0.3333","a a a","a");
+ assertMetrics("weight:1", "a","a a a");
+ assertMetrics("weight:1", "a b c","a b c");
+ assertMetrics("weight:1", "a b c","x x a b x a x c x x a b x c c x");
+
+ assertMetrics("weight:0.3333","a b c","a");
+ assertMetrics("weight:0.6667","a b c","a b");
+
+ assertMetrics("weight:1", "a b c!200","a b c"); // Best
+ assertMetrics("weight:0.75","a b c!200","b c"); // Middle
+ assertMetrics("weight:0.5", "a b c!200","a b"); // Worst
+
+ assertMetrics("weight:1","a!300 b c!200","a b c"); // Best too
+
+ assertMetrics("weight:1", "a b c!50","a b c"); // Best
+ assertMetrics("weight:0.6","a b c!50","b c"); // Worse
+ assertMetrics("weight:0.4","a b c!50","b"); // Worse
+ assertMetrics("weight:0.2","a b c!50","c"); // Worst
+ assertMetrics("weight:0.8","a b c!50","a b"); // Middle
+
+ assertMetrics("weight:1", "a b c!0","a b c"); // Best
+ assertMetrics("weight:0.5","a b c!0","b c"); // Worst
+ assertMetrics("weight:1", "a b c!0","a b"); // As good as best
+ assertMetrics("weight:0", "a b c!0","c"); // No contribution
+
+ assertMetrics("weight:0","a!0 b!0","a b");
+ assertMetrics("weight:0","a!0 b!0","");
+
+ // The query also has other terms having a total weight of 300
+ // so we add a weight parameter which is the sum of the weights of this query terms + 300
+ assertMetrics("weight:0.25", "a","a",400);
+ assertMetrics("weight:0", "y","a",400);
+ assertMetrics("weight:0.1667","a a a","a",600);
+ assertMetrics("weight:0.25", "a","a a a",400);
+ assertMetrics("weight:0.5", "a b c","a b c",600);
+ assertMetrics("weight:0.5", "a b c","x x a b x a x c x x a b x c c x",600);
+
+ assertMetrics("weight:0.1667","a b c","a",600);
+ assertMetrics("weight:0.3333","a b c","a b",600);
+
+ assertMetrics("weight:0.5714","a b c!200","a b c",700); // Best
+ assertMetrics("weight:0.4286","a b c!200","b c",700); // Middle
+ assertMetrics("weight:0.2857","a b c!200","a b",700); // Worst
+
+ assertMetrics("weight:0.6667","a!300 b c!200","a b c",900); // Better than best
+
+ assertMetrics("weight:0.4545","a b c!50","a b c",550); // Best
+ assertMetrics("weight:0.2727","a b c!50","b c",550); // Worse
+ assertMetrics("weight:0.1818","a b c!50","b",550); // Worse
+ assertMetrics("weight:0.0909","a b c!50","c",550); // Worst
+ assertMetrics("weight:0.3636","a b c!50","a b",550); // Middle
+
+ assertMetrics("weight:0.4","a b c!0","a b c",500); // Best
+ assertMetrics("weight:0.2","a b c!0","b c",500); // Worst
+ assertMetrics("weight:0.4","a b c!0","a b",500); // As good as best
+ assertMetrics("weight:0", "a b c!0","c",500); // No contribution
+
+ assertMetrics("weight:0","a!0 b!0","a b",300);
+ assertMetrics("weight:0","a!0 b!0","",300);
+ }
+
+ /** Calculated the same way as weight */
+ public void testSignificance() {
+ assertMetrics("significance:1", "a","a");
+ assertMetrics("significance:0", "a","x");
+ assertMetrics("significance:0.3333","a a a","a");
+ assertMetrics("significance:1", "a","a a a");
+ assertMetrics("significance:1", "a b c","a b c");
+ assertMetrics("significance:1", "a b c","x x a b x a x c x x a b x c c x");
+
+ assertMetrics("significance:0.3333","a b c","a");
+ assertMetrics("significance:0.6667","a b c","a b");
+
+ assertMetrics("significance:1", "a b c%0.2","a b c"); // Best
+ assertMetrics("significance:0.75","a b c%0.2","b c"); // Middle
+ assertMetrics("significance:0.5", "a b c%0.2","a b"); // Worst
+
+ assertMetrics("significance:1","a%0.3 b c%0.2","a b c"); // Best too
+
+ assertMetrics("significance:1", "a b c%0.05","a b c"); // Best
+ assertMetrics("significance:0.6","a b c%0.05","b c"); // Worse
+ assertMetrics("significance:0.4","a b c%0.05","b"); // Worse
+ assertMetrics("significance:0.2","a b c%0.05","c"); // Worst
+ assertMetrics("significance:0.8","a b c%0.05","a b"); // Middle
+
+ assertMetrics("significance:1", "a b c%0","a b c"); // Best
+ assertMetrics("significance:0.5","a b c%0","b c"); // Worst
+ assertMetrics("significance:1", "a b c%0","a b"); // As good as best
+ assertMetrics("significance:0", "a b c%0","c"); // No contribution
+
+ assertMetrics("significance:0","a%0 b%0","a b");
+ assertMetrics("significance:0","a%0 b%0","");
+
+ // The query also has other terms having a total significance of 0.3
+ // so we add a significance parameter which is the sum of the significances of this query terms + 0.3
+ assertMetrics("significance:0.25", "a","a",0.4f);
+ assertMetrics("significance:0", "y","a",0.4f);
+ assertMetrics("significance:0.1667","a a a","a",0.6f);
+ assertMetrics("significance:0.25", "a","a a a",0.4f);
+ assertMetrics("significance:0.5", "a b c","a b c",0.6f);
+ assertMetrics("significance:0.5", "a b c","x x a b x a x c x x a b x c c x",0.6f);
+
+ assertMetrics("significance:0.1667","a b c","a",0.6f);
+ assertMetrics("significance:0.3333","a b c","a b",0.6f);
+
+ assertMetrics("significance:0.5714","a b c%0.2","a b c",0.7f); // Best
+ assertMetrics("significance:0.4286","a b c%0.2","b c",0.7f); // Middle
+ assertMetrics("significance:0.2857","a b c%0.2","a b",0.7f); // Worst
+
+ assertMetrics("significance:0.6667","a%0.3 b c%0.2","a b c",0.9f); // Better than best
+
+ assertMetrics("significance:0.4545","a b c%0.05","a b c",0.55f); // Best
+ assertMetrics("significance:0.2727","a b c%0.05","b c",0.55f); // Worse
+ assertMetrics("significance:0.1818","a b c%0.05","b",0.55f); // Worse
+ assertMetrics("significance:0.0909","a b c%0.05","c",0.55f); // Worst
+ assertMetrics("significance:0.3636","a b c%0.05","a b",0.55f); // Middle
+
+ assertMetrics("significance:0.4","a b c%0","a b c",0.5f); // Best
+ assertMetrics("significance:0.2","a b c%0","b c",0.5f); // Worst
+ assertMetrics("significance:0.4","a b c%0","a b",0.5f); // As good as best
+ assertMetrics("significance:0", "a b c%0","c",0.5f); // No contribution
+
+ assertMetrics("significance:0","a%0 b%0","a b",0.3f);
+ assertMetrics("significance:0","a%0 b%0","",0.3f);
+ }
+
+ public void testImportance() {
+ assertMetrics("importance:0.75","a b c", "a x x b x c c c",600);
+ assertMetrics("importance:0.85","a b!500 c","a x x b x c c c",1000);
+
+ // Twice as common - twice as weighty, but total weight has the extra 300 - less than the previous
+ assertMetrics("importance:0.7857","a b!200%0.05 c","a x x b x c c c",700);
+ // Here higher importancy exactly offsets the lowered uniqueness
+ assertMetrics("importance:0.85","a b!500%0.5 c","a x x b x c c c",1000);
+ }
+
+ public void testOccurrence() {
+ assertMetrics("occurrence:0","a","x");
+ assertMetrics("occurrence:1","a","a");
+ assertMetrics("occurrence:0","a a a","x");
+ assertMetrics("occurrence:1","a a a","a");
+ assertMetrics("occurrence:1","a a a","a a a");
+ assertMetrics("occurrence:1","a a a","a a a a");
+ assertMetrics("occurrence:0.3571","a","x x x a x x a x a x x x a a");
+ assertMetrics("occurrence:1","a","a a a a a a a a a a a a a a");
+ assertMetrics("occurrence:1","a b","a b b a a a a a b a a b a a");
+
+ // tests going beyond the occurrence limit
+ FieldMatchMetricsParameters parameters=new FieldMatchMetricsParameters();
+ parameters.setMaxOccurrences(10);
+ parameters.freeze();
+ FieldMatchMetricsComputer c=new FieldMatchMetricsComputer(parameters);
+ assertMetrics("occurrence:1", "a b","a a a a a a a a a a b b",false,c);
+ assertMetrics("occurrence:0.9231","a b","a a a a a a a a a a a b b",false,c); // Starting to cut off
+ assertMetrics("occurrence:0.6", "a b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a
+ assertMetrics("occurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff
+ assertMetrics("occurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length
+ }
+
+ public void testAbsoluteOccurrence() {
+ assertMetrics("absoluteOccurrence:0", "a","x");
+ assertMetrics("absoluteOccurrence:0.01","a","a");
+ assertMetrics("absoluteOccurrence:0","a a a","x");
+ assertMetrics("absoluteOccurrence:0.01", "a a a","a");
+ assertMetrics("absoluteOccurrence:0.03", "a a a","a a a");
+ assertMetrics("absoluteOccurrence:0.04", "a a a","a a a a");
+ assertMetrics("absoluteOccurrence:0.05","a","x x x a x x a x a x x x a a");
+ assertMetrics("absoluteOccurrence:0.14","a","a a a a a a a a a a a a a a");
+ assertMetrics("absoluteOccurrence:0.07","a b","a b b a a a a a b a a b a a");
+
+ // tests going beyond the occurrence limit
+ FieldMatchMetricsParameters parameters=new FieldMatchMetricsParameters();
+ parameters.setMaxOccurrences(10);
+ parameters.freeze();
+ FieldMatchMetricsComputer c=new FieldMatchMetricsComputer(parameters);
+ assertMetrics("absoluteOccurrence:0.6","a b","a a a a a a a a a a b b",false,c);
+ assertMetrics("absoluteOccurrence:0.6","a b","a a a a a a a a a a a b b",false,c); // Starting to cut off
+ assertMetrics("absoluteOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a
+ assertMetrics("absoluteOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff
+ assertMetrics("absoluteOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length
+ }
+
+ public void testWeightedOccurrence() {
+ assertMetrics("weightedOccurrence:0","a!200","x");
+ assertMetrics("weightedOccurrence:1","a!200","a");
+ assertMetrics("weightedOccurrence:0","a!200 a a","x");
+ assertMetrics("weightedOccurrence:1","a!200 a a","a");
+ assertMetrics("weightedOccurrence:1","a a a","a a a");
+ assertMetrics("weightedOccurrence:1","a!200 a a","a a a a");
+ assertMetrics("weightedOccurrence:0.3571","a!200","x x x a x x a x a x x x a a");
+ assertMetrics("weightedOccurrence:1","a!200","a a a a a a a a a a a a a a");
+ assertMetrics("weightedOccurrence:0.5","a b","a b b a a a a a b a a b a a");
+
+ assertMetrics("weightedOccurrence:0.5714","a!200 b","a b b a a a a a b a a b a a");
+ assertMetrics("weightedOccurrence:0.6753","a!1000 b","a b b a a a a a b a a b a a"); // Should be higher
+ assertMetrics("weightedOccurrence:0.4286","a b!200","a b b a a a a a b a a b a a"); // Should be lower
+ assertMetrics("weightedOccurrence:0.3061","a b!2000","a b b a a a a a b a a b a a"); // Should be even lower
+
+ assertMetrics("weightedOccurrence:0.30","a b", "a a b b b b x x x x");
+ assertMetrics("weightedOccurrence:0.3333","a b!200","a a b b b b x x x x"); // More frequent is more important - higher
+ assertMetrics("weightedOccurrence:0.2667","a!200 b","a a b b b b x x x x"); // Less frequent is more important - lower
+ assertMetrics("weightedOccurrence:0.2667","a b!50", "a a b b b b x x x x"); // Same relative
+
+ assertMetrics("weightedOccurrence:0","a!0 b!0", "a a b b b b x x x x");
+
+ // tests going beyond the occurrence limit
+ FieldMatchMetricsParameters parameters=new FieldMatchMetricsParameters();
+ parameters.setMaxOccurrences(10);
+ parameters.freeze();
+ FieldMatchMetricsComputer c=new FieldMatchMetricsComputer(parameters);
+ assertMetrics("weightedOccurrence:0.6","a b","a a a a a a a a a a b b",false,c);
+ assertMetrics("weightedOccurrence:0.6","a b","a a a a a a a a a a a b b",false,c); // Starting to cut off
+ assertMetrics("weightedOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a
+ assertMetrics("weightedOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff
+ assertMetrics("weightedOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length
+
+ assertMetrics("weightedOccurrence:0.7333","a!200 b","a a a a a a a a a a b b",false,c);
+ assertMetrics("weightedOccurrence:0.4667","a b!200","a a a a a a a a a a b b",false,c);
+ assertMetrics("weightedOccurrence:0.7333","a!200 b","a a a a a a a a a a a b b",false,c); // Starting to cut off
+ assertMetrics("weightedOccurrence:0.7333","a!200 b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a
+ assertMetrics("weightedOccurrence:1", "a!200 b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff
+ assertMetrics("weightedOccurrence:1", "a!200 b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length
+ }
+
+ public void testWeightedAbsoluteOccurrence() {
+ assertMetrics("weightedAbsoluteOccurrence:0", "a!200","x");
+ assertMetrics("weightedAbsoluteOccurrence:0.01", "a!200","a");
+ assertMetrics("weightedAbsoluteOccurrence:0", "a!200 a a","x");
+ assertMetrics("weightedAbsoluteOccurrence:0.01", "a!200 a a","a");
+ assertMetrics("weightedAbsoluteOccurrence:0.03", "a a a","a a a");
+ assertMetrics("weightedAbsoluteOccurrence:0.04", "a!200 a a","a a a a");
+ assertMetrics("weightedAbsoluteOccurrence:0.05", "a!200","x x x a x x a x a x x x a a");
+ assertMetrics("weightedAbsoluteOccurrence:0.14", "a!200","a a a a a a a a a a a a a a");
+ assertMetrics("weightedAbsoluteOccurrence:0.07","a b","a b b a a a a a b a a b a a");
+
+ assertMetrics("weightedAbsoluteOccurrence:0.08", "a!200 b","a b b a a a a a b a a b a a");
+ assertMetrics("weightedAbsoluteOccurrence:0.0945","a!1000 b","a b b a a a a a b a a b a a"); // Should be higher
+ assertMetrics("weightedAbsoluteOccurrence:0.06", "a b!200","a b b a a a a a b a a b a a"); // Should be lower
+ assertMetrics("weightedAbsoluteOccurrence:0.0429","a b!2000","a b b a a a a a b a a b a a"); // Should be even lower
+
+ assertMetrics("weightedAbsoluteOccurrence:0.03", "a b", "a a b b b b x x x x");
+ assertMetrics("weightedAbsoluteOccurrence:0.0333","a b!200","a a b b b b x x x x"); // More frequent is more important - higher
+ assertMetrics("weightedAbsoluteOccurrence:0.0267","a!200 b","a a b b b b x x x x"); // Less frequent is more important - lower
+ assertMetrics("weightedAbsoluteOccurrence:0.0267","a b!50", "a a b b b b x x x x"); // Same relative
+
+ assertMetrics("weightedAbsoluteOccurrence:0","a!0 b!0", "a a b b b b x x x x");
+
+ // tests going beyond the occurrence limit
+ FieldMatchMetricsParameters parameters=new FieldMatchMetricsParameters();
+ parameters.setMaxOccurrences(10);
+ parameters.freeze();
+ FieldMatchMetricsComputer c=new FieldMatchMetricsComputer(parameters);
+ assertMetrics("weightedAbsoluteOccurrence:0.6","a b","a a a a a a a a a a b b",false,c);
+ assertMetrics("weightedAbsoluteOccurrence:0.6","a b","a a a a a a a a a a a b b",false,c); // Starting to cut off
+ assertMetrics("weightedAbsoluteOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a
+ assertMetrics("weightedAbsoluteOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff
+ assertMetrics("weightedAbsoluteOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length
+
+ assertMetrics("weightedAbsoluteOccurrence:0.7333","a!200 b","a a a a a a a a a a b b",false,c);
+ assertMetrics("weightedAbsoluteOccurrence:0.4667","a b!200","a a a a a a a a a a b b",false,c);
+ assertMetrics("weightedAbsoluteOccurrence:0.7333","a!200 b","a a a a a a a a a a a b b",false,c); // Starting to cut off
+ assertMetrics("weightedAbsoluteOccurrence:0.7333","a!200 b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a
+ assertMetrics("weightedAbsoluteOccurrence:1", "a!200 b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff
+ assertMetrics("weightedAbsoluteOccurrence:1", "a!200 b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length
+ }
+
+ public void testSignificantOccurrence() {
+ assertMetrics("significantOccurrence:0","a%0.2","x");
+ assertMetrics("significantOccurrence:1","a%0.2","a");
+ assertMetrics("significantOccurrence:0","a%0.2 a a","x");
+ assertMetrics("significantOccurrence:1","a%0.2 a a","a");
+ assertMetrics("significantOccurrence:1","a a a","a a a");
+ assertMetrics("significantOccurrence:1","a%0.2 a a","a a a a");
+ assertMetrics("significantOccurrence:0.3571","a%0.2","x x x a x x a x a x x x a a");
+ assertMetrics("significantOccurrence:1","a%0.2","a a a a a a a a a a a a a a");
+ assertMetrics("significantOccurrence:0.5","a b","a b b a a a a a b a a b a a");
+
+ assertMetrics("significantOccurrence:0.5714","a%0.2 b","a b b a a a a a b a a b a a");
+ assertMetrics("significantOccurrence:0.6753","a%1 b","a b b a a a a a b a a b a a"); // Should be higher
+ assertMetrics("significantOccurrence:0.4286","a b%0.2","a b b a a a a a b a a b a a"); // Should be lower
+ assertMetrics("significantOccurrence:0.3247","a b%1","a b b a a a a a b a a b a a"); // Should be even lower
+
+ assertMetrics("significantOccurrence:0.30","a b", "a a b b b b x x x x");
+ assertMetrics("significantOccurrence:0.3333","a b%0.2","a a b b b b x x x x"); // More frequent is more important - higher
+ assertMetrics("significantOccurrence:0.2667","a%0.2 b","a a b b b b x x x x"); // Less frequent is more important - lower
+ assertMetrics("significantOccurrence:0.2667","a b%0.05", "a a b b b b x x x x"); // Same relative
+
+ assertMetrics("significantOccurrence:0","a%0 b%0", "a a b b b b x x x x");
+
+ // tests going beyond the occurrence limit
+ FieldMatchMetricsParameters parameters=new FieldMatchMetricsParameters();
+ parameters.setMaxOccurrences(10);
+ parameters.freeze();
+ FieldMatchMetricsComputer c=new FieldMatchMetricsComputer(parameters);
+ assertMetrics("significantOccurrence:0.6","a b","a a a a a a a a a a b b",false,c);
+ assertMetrics("significantOccurrence:0.6","a b","a a a a a a a a a a a b b",false,c); // Starting to cut off
+ assertMetrics("significantOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a
+ assertMetrics("significantOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff
+ assertMetrics("significantOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length
+
+ assertMetrics("significantOccurrence:0.7333","a%0.2 b","a a a a a a a a a a b b",false,c);
+ assertMetrics("significantOccurrence:0.4667","a b%0.2","a a a a a a a a a a b b",false,c);
+ assertMetrics("significantOccurrence:0.7333","a%0.2 b","a a a a a a a a a a a b b",false,c); // Starting to cut off
+ assertMetrics("significantOccurrence:0.7333","a%0.2 b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a
+ assertMetrics("significantOccurrence:1", "a%0.2 b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff
+ assertMetrics("significantOccurrence:1", "a%0.2 b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length
+ }
+
+ public void testUnweightedProximity() {
+ assertMetrics("unweightedProximity:1", "a","a");
+ assertMetrics("unweightedProximity:1", "a b c","a b c");
+ assertMetrics("unweightedProximity:1", "a b c","a b c x");
+ assertMetrics("unweightedProximity:1", "y a b c","a b c x");
+ assertMetrics("unweightedProximity:1", "y a b c", "a b c x");
+ assertMetrics("unweightedProximity:0.855", "y a b c", "a b x c x");
+ assertMetrics("unweightedProximity:0.750","y a b c","a b x x c x");
+ assertMetrics("unweightedProximity:0.71", "y a b c","a x b x c x"); // Should be slightly worse than the previous one
+ assertMetrics("unweightedProximity:0.605","y a b c","a x b x x c x");
+ assertMetrics("unweightedProximity:0.53", "y a b c","a x b x x x c x");
+ assertMetrics("unweightedProximity:0.5", "y a b c","a x x b x x c x");
+ }
+
+ public void testReverseProximity() {
+ assertMetrics("unweightedProximity:0.33", "a b","b a");
+ assertMetrics("unweightedProximity:0.62", "a b c","c a b");
+ assertMetrics("unweightedProximity:0.585", "y a b c","c x a b");
+ assertMetrics("unweightedProximity:0.33", "a b c","c b a");
+ assertMetrics("unweightedProximity:0.6875","a b c d e","a b d c e");
+ assertMetrics("unweightedProximity:0.9275","a b c d e","a b x c d e");
+ }
+
+ public void testProximity() {
+ assertMetrics("absoluteProximity:0.1 proximity:1", "a b","a b");
+ assertMetrics("absoluteProximity:0.3 proximity:1", "a 0.3:b","a b");
+ assertMetrics("absoluteProximity:0.1 proximity:1", "a 0.0:b","a b");
+ assertMetrics("absoluteProximity:1 proximity:1", "a 1.0:b","a b");
+ assertMetrics("absoluteProximity:0.033 proximity:0.33", "a b","b a");
+ assertMetrics("absoluteProximity:0.0108 proximity:0.0359","a 0.3:b","b a"); // Should be worse than the previous one
+ assertMetrics("absoluteProximity:0.1 proximity:1", "a 0.0:b","b a");
+ assertMetrics("absoluteProximity:0 proximity:0", "a 1.0:b","b a");
+
+ // proximity with connextedness
+ assertMetrics("absoluteProximity:0.0605 proximity:0.605", "a b c","a x b x x c");
+ assertMetrics("absoluteProximity:0.0701 proximity:0.2003","a 0.5:b 0.2:c","a x b x x c"); // Most important is close, less important is far: Better
+ assertMetrics("absoluteProximity:0.0605 proximity:0.605", "a b c","a x x b x c");
+ assertMetrics("absoluteProximity:0.0582 proximity:0.1663","a 0.5:b 0.2:c","a x x b x c"); // Most important is far, less important is close: Worse
+
+ assertMetrics("absoluteProximity:0.0727 proximity:0.7267","a b c d","a b x x x x x c d");
+ assertMetrics("absoluteProximity:0.1 proximity:1", "a b 0:c d","a b x x x x x c d"); // Should be better because the gap is unimportant
+ }
+
+ /**
+ * Tests exactness (using field exactness only - nothing additional of interest to test with query exactness
+ * as that is just another number multiplied with the term exactness)
+ */
+ public void testExactness() {
+ assertMetrics("exactness:1", "a b c","a x b x x c");
+ assertMetrics("exactness:0.9", "a b c","a x b:0.7 x x c");
+ assertMetrics("exactness:0.7", "a b c","a x b:0.6 x x c:0.5");
+ assertMetrics("exactness:0.775", "a!200 b c","a x b:0.6 x x c:0.5");
+ assertMetrics("exactness:0.65", "a b c!200","a x b:0.6 x x c:0.5");
+ }
+
+ public void testMultiSegmentProximity() {
+ assertMetrics("absoluteProximity:0.1 proximity:1", "a b c", "a b x x x x x x x x x x x x x x x x x x x x x x c");
+ assertMetrics("absoluteProximity:0.05 proximity:0.5","a b c", "a x x b x x x x x x x x x x x x x x x x x x x x x x c");
+ assertMetrics("absoluteProximity:0.075 proximity:0.75","a b c d","a x x b x x x x x x x x x x x x x x x x x x x x x x c d");
+ }
+
+ public void testSegmentDistance() {
+ assertMetrics("segmentDistance:13 absoluteProximity:0.1", "a b c","a b x x x x x x x x x x c");
+ assertMetrics("segmentDistance:13 absoluteProximity:0.5", "a 0.5:b c","a b x x x x x x x x x x c");
+ assertMetrics("segmentDistance:13 absoluteProximity:0.1", "a b c","b c x x x x x x x x x x a");
+ assertMetrics("segmentDistance:25 absoluteProximity:0.1", "a b c","b x x x x x x x x x x x a x x x x x x x x x x c");
+ assertMetrics("segmentDistance:13 absoluteProximity:0.006","a b c","a x x x x x x x x x x x b x x x x x x x x c");
+ assertMetrics("segmentDistance:24 absoluteProximity:0.1", "a b c","a x x x x x x x x x x x b x x x x x x x x x c");
+ assertMetrics("segmentDistance:25 absoluteProximity:0.1", "a b c","a x x x x x x x x x x x b x x x x x x x x x x c");
+ assertMetrics("segmentDistance:25 absoluteProximity:0.1", "a b c","c x x x x x x x x x x x b x x x x x x x x x x a");
+ }
+
+ public void testSegmentProximity() {
+ assertMetrics("segmentProximity:1", "a","a");
+ assertMetrics("segmentProximity:0", "a","x");
+ assertMetrics("segmentProximity:1", "a","a x");
+ assertMetrics("segmentProximity:0", "a b","a x x x x x x x x x x x x x x x x x x x x x x x b");
+ assertMetrics("segmentProximity:0.4","a b","a x x x x x x x x x x x x x x x x x x x x x x b x x x x x x x x x x x x x x x x");
+ assertMetrics("segmentProximity:0", "a b c","a b x x x x x x x x x x x x x x x x x x x x x c");
+ assertMetrics("segmentProximity:0.4","a b c","a b x x x x x x x x x x x x x x x x x x x x x c x x x x x x x x x x x x x x x x");
+ assertMetrics("segmentProximity:0.4","a b c","b c x x x x x x x x x x x x x x x x x x x x x a x x x x x x x x x x x x x x x x");
+ }
+
+ /** Test cases where we choose between multiple different segmentations */
+ public void testSegmentSelection() {
+ assertMetrics("segments:2 absoluteProximity:0.1 proximity:1 segmentStarts:19,41",
+ "a b c d e","x a b x c x x x x x x x x x x x x x x a b c x x x x x x x x x e x d x c d x x x c d e");
+ // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2
+ // 0 1 2 3 4
+ // Should choose - - - - -
+
+ // Same as above but best matching segment have too low exactness
+ assertMetrics("segments:2 absoluteProximity:0.0903 proximity:0.9033 segmentStarts:1,41",
+ "a b c d e","x a b x c x x x x x x x x x x x x x x a:0.2 b:0.3 c:0.4 x x x x x x x x x e x d x c d x x x c d e");
+ // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2
+ // 0 1 2 3 4
+ // Should choose - - - - -
+
+ assertMetrics("segments:1 absoluteProximity:0.0778 proximity:0.778","a b c d e f","x x a b b b c f e d a b c d x e x x x x x f d e f a b c a a b b c c d d e e f f");
+
+ // Prefer one segment with ok proximity over two segments with great proximity
+ assertMetrics("segments:1 segmentStarts:0","a b c d","a b x c d x x x x x x x x x x x a b x x x x x x x x x x x c d");
+ assertMetrics("segments:1 segmentStarts:0","a b c d","a b x x x x x x x x c d x x x x x x x x x x x a b x x x x x x x x x x x c d");
+ }
+
+ public void testMoreThanASegmentLengthOfUnmatchedQuery() {
+ assertMetrics("absoluteProximity:0.1 proximity:1","a b y y y y y y y y y y y y y y y","a b");
+ assertMetrics("segments:2 absoluteProximity:0.1 proximity:1","a b c d y y y y y y y y y y y y y y y","a b x x x x x x x x x x x x x x x x x x c d");
+ assertMetrics("segments:2 absoluteProximity:0.1 proximity:1","a b y y y y y y y y y y y y y y y c d","a b x x x x x x x x x x x x x x x x x x c d");
+ }
+
+ public void testQueryRepeats() {
+ // Not really handled perfectly, but good enough
+ assertMetrics("absoluteProximity:0.1 proximity:1 head:0 tail:0", "a a a","a");
+ assertMetrics("absoluteProximity:0.1 proximity:1 head:0 tail:0 gapLength:0","a a b c c","a a b c c");
+ assertMetrics("absoluteProximity:0.1 proximity:1 head:0 tail:0 gapLength:0","a a b c c","a b c");
+ assertMetrics("absoluteProximity:0.1 proximity:1 head:0 tail:0 gapLength:0","a b a b","a b a b");
+ assertMetrics("absoluteProximity:0.0903 proximity:0.9033 head:0 tail:0 gapLength:1","a b a b","a b x a b");
+ // Both terms take the same segment:
+ assertMetrics("absoluteProximity:0.1 proximity:1 segments:2 gapLength:0 head:3 tail:18","a a","x x x a x x x x x x x x x x x x x x a x x x");
+ // But not when the second is preferable
+ assertMetrics("absoluteProximity:0.1 proximity:1 segments:2 gapLength:0 head:3 tail:3","a b b a","x x x a b x x x x x x x x x x x x x x b a x x x");
+
+ assertMetrics("matches:2 fieldCompleteness:1","a b b b","a b");
+ }
+
+ public void testZeroCases() {
+ assertMetrics("absoluteProximity:0.1 proximity:1 matches:0 exactness:0","y","a");
+ assertMetrics("absoluteProximity:0.1 proximity:1 matches:0 exactness:0","a","x");
+ assertMetrics("absoluteProximity:0.1 proximity:1 matches:0 exactness:0","","x");
+ assertMetrics("absoluteProximity:0.1 proximity:1 matches:0 exactness:0","y","");
+ assertMetrics("absoluteProximity:0.1 proximity:1 matches:0 exactness:0","","");
+ }
+
+ public void testExceedingIterationLimit() {
+
+ { // Segments found: a x x b and c d
+ FieldMatchMetricsParameters p=new FieldMatchMetricsParameters();
+ p.setMaxAlternativeSegmentations(0);
+ FieldMatchMetricsComputer m=new FieldMatchMetricsComputer(p);
+ assertMetrics("matches:4 tail:0 proximity:0.75 absoluteProximity:0.075","a b c d","a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d",false,m);
+ }
+
+ { // Segments found: a x b and c d
+ FieldMatchMetricsParameters p=new FieldMatchMetricsParameters();
+ p.setMaxAlternativeSegmentations(1);
+ FieldMatchMetricsComputer m=new FieldMatchMetricsComputer(p);
+ assertMetrics("matches:4 tail:0 proximity:0.855 absoluteProximity:0.0855","a b c d","a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d",false,m);
+ }
+
+ { // Segments found: a b and c d
+ FieldMatchMetricsParameters p=new FieldMatchMetricsParameters();
+ p.setMaxAlternativeSegmentations(2);
+ FieldMatchMetricsComputer m=new FieldMatchMetricsComputer(p);
+ assertMetrics("matches:4 tail:0 proximity:1 absoluteProximity:0.1","a b c d","a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d",false,m);
+ }
+ }
+
+ public void testMatch() {
+ // Ordered by decreasing match score per query
+ assertMetrics("match:1", "a","a");
+ assertMetrics("match:0.9339","a","a x");
+ assertMetrics("match:0", "a","x");
+ assertMetrics("match:0.9243","a","x a");
+ assertMetrics("match:0.9025","a","x a x");
+
+ assertMetrics("match:1", "a b","a b");
+ assertMetrics("match:0.9558","a b","a b x");
+ assertMetrics("match:0.9463","a b","x a b");
+ assertMetrics("match:0.1296","a b","a x x x x x x x x x x x x x x x x x x x x x x b");
+ assertMetrics("match:0.1288","a b","a x x x x x x x x x x x x x x x x x x x x x x x x x x x b");
+
+ assertMetrics("match:0.8647","a b c","x x a x b x x x x x x x x a b c x x x x x x x x c x x");
+ assertMetrics("match:0.861", "a b c","x x a x b x x x x x x x x x x a b c x x x x x x c x x");
+ assertMetrics("match:0.4869","a b c","a b x x x x x x x x x x x x x x x x x x x x x x c x x");
+ assertMetrics("match:0.4853","a b c","x x a x b x x x x x x x x x x b a c x x x x x x c x x");
+ assertMetrics("match:0.3621","a b c","a x b x x x x x x x x x x x x x x x x x x x x x c x x");
+ assertMetrics("match:0.3619","a b c","x x a x b x x x x x x x x x x x x x x x x x x x c x x");
+ assertMetrics("match:0.3584","a b c","x x a x b x x x x x x x x x x x x x x x x x x x x x c");
+ assertMetrics("match:0.3474","a b c","x x a x b x x x x x x x x x x x x x x b x x x b x b x");
+ assertMetrics("match:0.3421","a b c","x x a x b x x x x x x x x x x x x x x x x x x x x x x");
+ assertMetrics("match:0.305" ,"a b c","x x a x b:0.7 x x x x x x x x x x x x x x x x x x x x x x");
+ assertMetrics("match:0.2927","a b!200 c","x x a x b:0.7 x x x x x x x x x x x x x x x x x x x x x x");
+ }
+
+ public void testRepeatedMatch() {
+ // gap==1 caused by finding two possible segments due to repeated matching
+ assertMetrics("fieldCompleteness:1 queryCompleteness:0.6667 segments:1 earliness:1 gaps:1",
+ "pizza hut pizza","pizza hut");
+ }
+
+ /** Three segments - improving the score on the first should impact the last */
+ public void testNestedAlternatives() {
+ assertMetrics("segmentStarts:6,19,32 proximity:1",
+ "a b c d e f",
+ "a x b x x x a b x x x x x x x x x x x c d x x x x x x x x x x x e f");
+ assertMetrics("segmentStarts:6,19,47 proximity:1",
+ "a b c d e f",
+ "a x b x x x a b x x x x x x x x x x x c d x x x x x x x x x x x e x f x x x x x x x x x x x x e f");
+ }
+
+ /** Nice demonstration of the limitations of this algorithm: Segment end points are determined greedily */
+ public void testSegmentationGreedyness() {
+ assertMetrics("match:0.3717","a b c","a x b x x x x x x x x b c");
+ assertMetrics("match:0.4981","a b c","a x z x x x x x x x x b c");
+ }
+
+ protected void assertMetrics(String correctSpec, String query, String field) {
+ assertMetrics(correctSpec, query, field, false);
+ }
+
+ protected void assertMetrics(String correctSpec, String queryString, String field, int totalTermWeight) {
+ Query query=toQuery(queryString);
+ query.setTotalTermWeight(totalTermWeight);
+ assertMetrics(correctSpec, query, toField(field), false, new FieldMatchMetricsComputer());
+ }
+
+ protected void assertMetrics(String correctSpec, String queryString, String field, float totalSignificance) {
+ Query query=toQuery(queryString);
+ query.setTotalSignificance(totalSignificance);
+ assertMetrics(correctSpec, query, toField(field), false, new FieldMatchMetricsComputer());
+ }
+
+ protected void assertMetrics(String correctSpec,String query,String field,boolean printTrace) {
+ assertMetrics(correctSpec,query,field,printTrace,new FieldMatchMetricsComputer());
+ }
+
+ protected void assertMetrics(String correctSpec,String query,String field,boolean printTrace,FieldMatchMetricsComputer m) {
+ assertMetrics(correctSpec, toQuery(query), toField(field), printTrace, m);
+ }
+
+ protected void assertMetrics(String correctSpec, Query query, Field field, boolean printTrace, FieldMatchMetricsComputer m) {
+ FieldMatchMetrics metrics = m.compute(query, field, printTrace);
+ if (printTrace)
+ System.out.println(metrics.trace());
+
+ if (printTrace)
+ System.out.println(metrics.toStringDump());
+
+ for (String correctValueSpec: correctSpec.split(" ")) {
+ if (correctValueSpec.trim().equals("")) continue;
+ String metricName=correctValueSpec.split(":")[0];
+ String correctValueString=correctValueSpec.split(":")[1];
+ if (metricName.equals("segmentStarts")) {
+ String[] correctSegmentStarts=correctValueString.split(",");
+ List<Integer> segmentStarts=metrics.getSegmentStarts();
+ assertEquals("Segment start count",correctSegmentStarts.length,segmentStarts.size());
+ for (int i=0; i<segmentStarts.size(); i++)
+ assertEquals("Expected segment starts " + correctValueString + " was " + segmentStarts,
+ Integer.valueOf(correctSegmentStarts[i]),segmentStarts.get(i));
+ }
+ else {
+ float correctValue=Float.parseFloat(correctValueString);
+ assertEquals(metricName, correctValue, (float)Math.round(metrics.get(metricName)*10000)/10000 );
+ }
+ }
+ }
+
+ private Query toQuery(String queryString) {
+ if (queryString.length()==0) return new Query(new QueryTerm[0]);
+ String[] queryTerms=queryString.split(" ");
+ QueryTerm[] query=new QueryTerm[queryTerms.length];
+ for (int i=0; i<query.length; i++) {
+ String[] percentSplit=queryTerms[i].split("%");
+ String[] bangSplit=percentSplit[0].split("!");
+ String[] colonSplit=bangSplit[0].split(":");
+ if (colonSplit.length>1)
+ query[i]=new QueryTerm(colonSplit[1],Float.parseFloat(colonSplit[0]));
+ else
+ query[i]=new QueryTerm(colonSplit[0]);
+
+ if (bangSplit.length>1)
+ query[i].setWeight(Integer.parseInt(bangSplit[1]));
+ if (percentSplit.length>1)
+ query[i].setSignificance(Float.parseFloat(percentSplit[1]));
+ }
+ return new Query(query);
+ }
+
+ private Field toField(String fieldString) {
+ if (fieldString.length() == 0) return new Field(ImmutableList.of());
+
+ ImmutableList.Builder<Field.Term> terms = new ImmutableList.Builder<>();
+ for (String termString : fieldString.split(" ")) {
+ String[] colonSplit = termString.split(":");
+ if (colonSplit.length > 1)
+ terms.add(new Field.Term(colonSplit[0], Float.parseFloat(colonSplit[1])));
+ else
+ terms.add(new Field.Term(colonSplit[0]));
+ }
+ return new Field(terms.build());
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/FeatureListTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/FeatureListTestCase.java
new file mode 100755
index 00000000000..7399088ac1c
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/FeatureListTestCase.java
@@ -0,0 +1,77 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression;
+
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class FeatureListTestCase {
+
+ @Test
+ public void requireThatFeatureListFromStringWorks() throws ParseException {
+ assertFromString("attribute(foo).out",
+ Arrays.asList("attribute(foo).out"));
+ assertFromString("attribute(foo).out attribute ( bar ) . out",
+ Arrays.asList("attribute(foo).out", "attribute(bar).out"));
+ assertFromString("foo\n bar\n \t \t \n baz \n",
+ Arrays.asList("foo", "bar", "baz"));
+ assertFromString("attribute attribute(foo) attribute(foo).out attribute(bar).out.out",
+ Arrays.asList("attribute", "attribute(foo)", "attribute(foo).out", "attribute(bar).out.out"));
+ }
+
+ @Test
+ public void requireThatFeatureListFromReaderWorks() throws ParseException {
+ assertFromReader(new StringReader("attribute(foo).out"),
+ Arrays.asList("attribute(foo).out"));
+ assertFromReader(new StringReader("attribute(foo).out attribute ( bar ) . out"),
+ Arrays.asList("attribute(foo).out", "attribute(bar).out"));
+ assertFromReader(new StringReader("foo\n bar\n \t \t \n baz \n"),
+ Arrays.asList("foo", "bar", "baz"));
+ assertFromReader(new StringReader("attribute attribute(foo) attribute(foo).out attribute(bar).out.out"),
+ Arrays.asList("attribute", "attribute(foo)", "attribute(foo).out", "attribute(bar).out.out"));
+ }
+
+ @Test
+ public void requireThatFeatureListFromFileWorks() throws ParseException, FileNotFoundException {
+ assertFromFile(new File("src/test/files/features01.expression"),
+ Arrays.asList("attribute(foo).out"));
+ assertFromFile(new File("src/test/files/features02.expression"),
+ Arrays.asList("attribute(foo).out", "attribute(bar).out"));
+ assertFromFile(new File("src/test/files/features03.expression"),
+ Arrays.asList("foo", "bar", "baz"));
+ assertFromFile(new File("src/test/files/features04.expression"),
+ Arrays.asList("attribute", "attribute(foo)", "attribute(foo).out", "attribute(bar).out.out"));
+ }
+
+ public void assertFromString(String input, List<String> expected) throws ParseException {
+ assertFeatureList(new FeatureList(input), expected);
+ }
+
+ public void assertFromReader(Reader input, List<String> expected) throws ParseException {
+ assertFeatureList(new FeatureList(input), expected);
+ }
+
+ public void assertFromFile(File input, List<String> expected) throws ParseException, FileNotFoundException {
+ assertFeatureList(new FeatureList(input), expected);
+ }
+
+ public void assertFeatureList(FeatureList features, List<String> expected) throws ParseException {
+ assertEquals(expected.size(), features.size());
+ for (int i = 0; i < features.size(); ++i) {
+ assertTrue(features.get(i) != null);
+ assertEquals(expected.get(i), features.get(i).toString());
+ }
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/RankingExpressionTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/RankingExpressionTestCase.java
new file mode 100755
index 00000000000..24d7c82235c
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/RankingExpressionTestCase.java
@@ -0,0 +1,281 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression;
+
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+import com.yahoo.searchlib.rankingexpression.rule.CompositeNode;
+import com.yahoo.searchlib.rankingexpression.rule.IfNode;
+import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode;
+import com.yahoo.searchlib.rankingexpression.rule.FunctionNode;
+import junit.framework.TestCase;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.*;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class RankingExpressionTestCase extends TestCase {
+
+ public void testParamInFeature() throws ParseException {
+ assertParse("if (1 > 2, dotProduct(allparentid,query(cate1_parentid)), 2)",
+ "if ( 1 > 2,\n" +
+ "dotProduct(allparentid, query(cate1_parentid)),\n" +
+ "2\n" +
+ ")");
+ }
+
+ public void testDollarShorthand() throws ParseException {
+ assertParse("query(var1)", " $var1");
+ assertParse("query(var1)", " $var1 ");
+ assertParse("query(var1) + query(var2)", " $var1 + $var2 ");
+ assertParse("query(var1) + query(var2) - query(var3)", " $var1 + $var2 - $var3 ");
+ assertParse("query(var1) + query(var2) - query(var3) * query(var4) / query(var5)", " $var1 + $var2 - $var3 * $var4 / $var5 ");
+ assertParse("(query(var1) + query(var2)) - query(var3) * query(var4) / query(var5)", "($var1 + $var2)- $var3 * $var4 / $var5 ");
+ assertParse("query(var1) + (query(var2) - query(var3)) * query(var4) / query(var5)", " $var1 +($var2 - $var3)* $var4 / $var5 ");
+ assertParse("query(var1) + query(var2) - (query(var3) * query(var4)) / query(var5)", " $var1 + $var2 -($var3 * $var4)/ $var5 ");
+ assertParse("query(var1) + query(var2) - query(var3) * (query(var4) / query(var5))", " $var1 + $var2 - $var3 *($var4 / $var5)");
+ assertParse("if (if (f1.out < query(p1), 0, 1) < if (f2.out < query(p2), 0, 1), f3.out, query(p3))", "if(if(f1.out<$p1,0,1)<if(f2.out<$p2,0,1),f3.out,$p3)");
+ }
+
+ public void testLookaheadIndefinitely() throws Exception {
+ ExecutorService exec = Executors.newSingleThreadExecutor();
+ Future<Boolean> future = exec.submit(new Callable<Boolean>() {
+ @Override
+ public Boolean call() {
+ try {
+ new RankingExpression("if (fieldMatch(title) < 0.316316, if (now < 1.218627E9, if (now < 1.217667E9, if (now < 1.217244E9, if (rankBoost < 100050.0, 0.1424368, if (match < 0.284921, if (now < 1.217238E9, 0.1528184, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, 0.1493261))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))), 0.1646852)), 0.1850886), if (match < 0.308468, if (firstPhase < 5891.5, 0.08424015, 0.1167076), if (rankBoost < 120050.0, 0.111576, 0.1370456))), if (match < 0.31644, 0.1543837, 0.1727403)), if (now < 1.218088E9, if (now < 1.217244E9, if (fieldMatch(metakeywords).significance < 0.1425405, if (match.totalWeight < 450.0, 0.1712793, 0.1632426), 0.1774488), 0.1895567), if (now < 1.218361E9, if (fieldTermMatch(keywords_1).firstPosition < 1.5, 0.1530005, 0.1370894), 0.1790079)))");
+ return Boolean.TRUE;
+ } catch (ParseException e) {
+ return Boolean.FALSE;
+ }
+ }
+ });
+ assertTrue(future.get(60, TimeUnit.SECONDS));
+ }
+
+ public void testSelfRecursionScript() throws ParseException {
+ List<ExpressionFunction> macros = new ArrayList<>();
+ macros.add(new ExpressionFunction("foo", null, new RankingExpression("foo")));
+
+ RankingExpression exp = new RankingExpression("foo");
+ try {
+ exp.getRankProperties(macros);
+ } catch (RuntimeException e) {
+ assertEquals("Cycle in ranking expression function: [foo[]]", e.getMessage());
+ }
+ }
+
+ public void testMacroCycleScript() throws ParseException {
+ List<ExpressionFunction> macros = new ArrayList<>();
+ macros.add(new ExpressionFunction("foo", null, new RankingExpression("bar")));
+ macros.add(new ExpressionFunction("bar", null, new RankingExpression("foo")));
+
+ RankingExpression exp = new RankingExpression("foo");
+ try {
+ exp.getRankProperties(macros);
+ } catch (RuntimeException e) {
+ assertEquals("Cycle in ranking expression function: [foo[], bar[]]", e.getMessage());
+ }
+ }
+
+ public void testScript() throws ParseException {
+ List<ExpressionFunction> macros = new ArrayList<>();
+ macros.add(new ExpressionFunction("foo", Arrays.asList("arg1", "arg2"), new RankingExpression("min(arg1, pow(arg2, 2))")));
+ macros.add(new ExpressionFunction("bar", Arrays.asList("arg1", "arg2"), new RankingExpression("arg1 * arg1 + 2 * arg1 * arg2 + arg2 * arg2")));
+ macros.add(new ExpressionFunction("baz", Arrays.asList("arg1", "arg2"), new RankingExpression("foo(1, 2) / bar(arg1, arg2)")));
+ macros.add(new ExpressionFunction("cox", null, new RankingExpression("10 + 08 * 1977")));
+
+ assertScript("foo(1,2) + foo(3,4) * foo(5, foo(foo(6, 7), 8))", macros,
+ Arrays.asList(
+ "rankingExpression(foo@e2dc17a89864aed0.12232eb692c6c502) + rankingExpression(foo@af74e3fd9070bd18.a368ed0a5ba3a5d0) * rankingExpression(foo@dbab346efdad5362.e5c39e42ebd91c30)",
+ "min(5,pow(rankingExpression(foo@d1d1417259cdc651.573bbcd4be18f379),2))",
+ "min(6,pow(7,2))",
+ "min(1,pow(2,2))",
+ "min(3,pow(4,2))",
+ "min(rankingExpression(foo@84951be88255b0ec.d0303e061b36fab8),pow(8,2))"
+ ));
+ assertScript("foo(1, 2) + bar(3, 4)", macros,
+ Arrays.asList(
+ "rankingExpression(foo@e2dc17a89864aed0.12232eb692c6c502) + rankingExpression(bar@af74e3fd9070bd18.a368ed0a5ba3a5d0)",
+ "min(1,pow(2,2))",
+ "3 * 3 + 2 * 3 * 4 + 4 * 4"
+ ));
+ assertScript("baz(1, 2)", macros,
+ Arrays.asList(
+ "rankingExpression(baz@e2dc17a89864aed0.12232eb692c6c502)",
+ "min(1,pow(2,2))",
+ "rankingExpression(foo@e2dc17a89864aed0.12232eb692c6c502) / rankingExpression(bar@e2dc17a89864aed0.12232eb692c6c502)",
+ "1 * 1 + 2 * 1 * 2 + 2 * 2"
+ ));
+ assertScript("cox", macros,
+ Arrays.asList(
+ "rankingExpression(cox)",
+ "10 + 08 * 1977"
+ ));
+ }
+
+ public void testBug3464208() throws ParseException {
+ List<ExpressionFunction> macros = new ArrayList<>();
+ macros.add(new ExpressionFunction("log10tweetage", null, new RankingExpression("69")));
+
+ String lhs = "log10(0.01+attribute(user_followers_count)) * log10(socialratio) * " +
+ "log10(userage/(0.01+attribute(user_statuses_count)))";
+ String rhs = "(log10tweetage * log10tweetage * log10tweetage) + 5.0 * " +
+ "attribute(ythl)";
+
+ String expLhs = "log10(0.01 + attribute(user_followers_count)) * log10(socialratio) * " +
+ "log10(userage / (0.01 + attribute(user_statuses_count)))";
+ String expRhs = "(rankingExpression(log10tweetage) * rankingExpression(log10tweetage) * " +
+ "rankingExpression(log10tweetage)) + 5.0 * attribute(ythl)";
+
+ assertScript(lhs + " + " + rhs, macros,
+ Arrays.asList(
+ expLhs + " + " + expRhs,
+ "69"
+ ));
+ assertScript(lhs + " - " + rhs, macros,
+ Arrays.asList(
+ expLhs + " - " + expRhs,
+ "69"
+ ));
+ }
+
+ public void testParse() throws ParseException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader("src/tests/rankingexpression/rankingexpressionlist"));
+ String line;
+ int lineNumber = 0;
+ while ((line = reader.readLine()) != null) {
+ lineNumber++;
+ if (line.length() == 0 || line.charAt(0) == '#') {
+ continue;
+ }
+ String[] parts = line.split(";");
+ // System.out.println("Parsing '" + parts[0].trim() + "'..");
+ RankingExpression expression = new RankingExpression(parts[0].trim());
+
+ String out = expression.toString();
+ if (parts.length == 1) {
+ assertEquals(parts[0].trim(), out);
+ } else {
+ boolean ok = false;
+ String err = "Expression '" + out + "' not present in { ";
+ for (int i = 1; i < parts.length && !ok; ++i) {
+ err += "'" + parts[i].trim() + "'";
+ if (parts[i].trim().equals(out)) {
+ ok = true;
+ }
+ if (i < parts.length - 1) {
+ err += ", ";
+ }
+ }
+ err += " }.";
+ assertTrue("At line " + lineNumber + ": " + err, ok);
+ }
+ }
+ }
+
+ public void testIssue() throws ParseException {
+ assertEquals("feature.0", new RankingExpression("feature.0").toString());
+ assertEquals("if (1 > 2, 3, 4) + feature(arg1).out.out",
+ new RankingExpression("if ( 1 > 2 , 3 , 4 ) + feature ( arg1 ) . out.out").toString());
+ }
+
+ public void testNegativeConstantArgument() throws ParseException {
+ assertEquals("foo(-1.2)", new RankingExpression("foo(-1.2)").toString());
+ }
+
+ public void testNaming() throws ParseException {
+ RankingExpression test = new RankingExpression("a+b");
+ test.setName("test");
+ assertEquals("test: a + b", test.toString());
+ }
+
+ public void testCondition() throws ParseException {
+ RankingExpression expression = new RankingExpression("if(1<2,3,4)");
+ assertTrue(expression.getRoot() instanceof IfNode);
+ }
+
+ public void testFileImporting() throws ParseException {
+ RankingExpression expression = new RankingExpression(new File("src/test/files/simple.expression"));
+ assertEquals("simple: a + b", expression.toString());
+ }
+
+ public void testNonCanonicalLegalStrings() throws ParseException {
+ assertParse("a * b + c * d", "a* (b) + \nc*d");
+ }
+
+ public void testEquality() throws ParseException {
+ assertEquals(new RankingExpression("if ( attribute(foo)==\"BAR\",log(attribute(popularity)+5),log(fieldMatch(title).proximity)*fieldMatch(title).completeness)"),
+ new RankingExpression("if(attribute(foo)==\"BAR\", log(attribute(popularity)+5),log(fieldMatch(title).proximity) * fieldMatch(title).completeness)"));
+
+ assertFalse(new RankingExpression("if ( attribute(foo)==\"BAR\",log(attribute(popularity)+5),log(fieldMatch(title).proximity)*fieldMatch(title).completeness)").equals(
+ new RankingExpression("if(attribute(foo)==\"BAR\", log(attribute(popularity)+5),log(fieldMatch(title).earliness) * fieldMatch(title).completeness)")));
+ }
+
+ public void testSetMembershipConditions() throws ParseException {
+ assertEquals(new RankingExpression("if ( attribute(foo) in [\"FOO\", \"BAR\"],log(attribute(popularity)+5),log(fieldMatch(title).proximity)*fieldMatch(title).completeness)"),
+ new RankingExpression("if(attribute(foo) in [\"FOO\",\"BAR\"], log(attribute(popularity)+5),log(fieldMatch(title).proximity) * fieldMatch(title).completeness)"));
+
+ assertFalse(new RankingExpression("if ( attribute(foo) in [\"FOO\", \"BAR\"],log(attribute(popularity)+5),log(fieldMatch(title).proximity)*fieldMatch(title).completeness)").equals(
+ new RankingExpression("if(attribute(foo) in [\"FOO\",\"BAR\"], log(attribute(popularity)+5),log(fieldMatch(title).earliness) * fieldMatch(title).completeness)")));
+
+ assertEquals(new RankingExpression("if ( attribute(foo) in [attribute(category), \"BAR\"],log(attribute(popularity)+5),log(fieldMatch(title).proximity)*fieldMatch(title).completeness)"),
+ new RankingExpression("if(attribute(foo) in [attribute(category),\"BAR\"], log(attribute(popularity)+5),log(fieldMatch(title).proximity) * fieldMatch(title).completeness)"));
+ assertEquals(new RankingExpression("if (GENDER$ in [-1.0, 1.0], 1, 0)"), new RankingExpression("if (GENDER$ in [-1.0, 1.0], 1, 0)"));
+ }
+
+ public void testComments() throws ParseException {
+ assertEquals(new RankingExpression("if ( attribute(foo) in [\"FOO\", \"BAR\"],\n" +
+ "# a comment\n" +
+ "log(attribute(popularity)+5),log(fieldMatch(title).proximity)*" +
+ "# a multiline \n" +
+ " # comment\n" +
+ "fieldMatch(title).completeness)"),
+ new RankingExpression("if(attribute(foo) in [\"FOO\",\"BAR\"], log(attribute(popularity)+5),log(fieldMatch(title).proximity) * fieldMatch(title).completeness)"));
+ }
+
+ public void testIsNan() throws ParseException {
+ String strExpr = "if (isNan(attribute(foo)) == 1.0, 1.0, attribute(foo))";
+ RankingExpression expr = new RankingExpression(strExpr);
+ CompositeNode root = (CompositeNode)expr.getRoot();
+ CompositeNode comparison = (CompositeNode)root.children().get(0);
+ ExpressionNode isNan = comparison.children().get(0);
+ assertTrue(isNan instanceof FunctionNode);
+ assertEquals("isNan(attribute(foo))", isNan.toString());
+ }
+
+ protected static void assertParse(String expected, String expression) throws ParseException {
+ assertEquals(expected, new RankingExpression(expression).toString());
+ }
+
+ private void assertScript(String expression, List<ExpressionFunction> macros, List<String> expectedScripts)
+ throws ParseException {
+ boolean print = false;
+ if (print)
+ System.out.println("Parsing expression '" + expression + "'.");
+
+ RankingExpression exp = new RankingExpression(expression);
+ Map<String, String> scripts = exp.getRankProperties(macros);
+ if (print) {
+ for (String key : scripts.keySet()) {
+ System.out.println("Script '" + key + "': " + scripts.get(key));
+ }
+ }
+
+ for (Map.Entry<String, String> m : scripts.entrySet())
+ System.out.println(m);
+ for (int i = 0; i < expectedScripts.size();) {
+ String val = expectedScripts.get(i++);
+ assertTrue("Script contains " + val, scripts.containsValue(val));
+ }
+ if (print)
+ System.out.println("");
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/Benchmark.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/Benchmark.java
new file mode 100644
index 00000000000..7690efb1112
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/Benchmark.java
@@ -0,0 +1,144 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization.GBDTForestOptimizer;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+import com.yahoo.searchlib.rankingexpression.rule.CompositeNode;
+import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode;
+import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public final class Benchmark {
+
+ public static void main(String[] args) {
+ if (args.length < 1) {
+ System.err.println("Usage: Benchmark <filename> [<iterations>]");
+ System.exit(1);
+ }
+ int numRuns = 1000;
+ if (args.length == 2) {
+ numRuns = Integer.valueOf(args[1]);
+ }
+ List<Result> res = new ArrayList<Result>();
+ try {
+ BufferedReader in = new BufferedReader(new FileReader(args[0]));
+ StringBuilder str = new StringBuilder();
+ String line;
+ while ((line = in.readLine()) != null) {
+ str.append(line);
+ }
+ String exp = str.toString();
+ res.add(evaluateTree(exp, numRuns));
+ res.add(evaluateTreeOptimized(exp, numRuns));
+ res.add(evaluateForestOptimized(exp, numRuns));
+ } catch (IOException e) {
+ System.out.println("An error occured while reading the content of file '" + args[0] + "': " + e);
+ System.exit(1);
+ } catch (ParseException e) {
+ System.out.println("An error occured while parsing the content of file '" + args[0] + "': " + e);
+ System.exit(1);
+ }
+ for (Result lhs : res) {
+ for (Result rhs : res) {
+ if (lhs.res < rhs.res - 1e-6 || lhs.res > rhs.res + 1e-6) {
+ System.err.println("Evaluation of '" + lhs.name + "' and '" + rhs.name + "' disagree on result; " +
+ "expected " + lhs.res + ", got " + rhs.res + ".");
+ System.exit(1);
+ }
+ }
+ System.out.format("%1$-16s : %2$8.04f ms (%3$-6.04f)\n",
+ lhs.name, lhs.millis, res.get(0).millis / lhs.millis);
+ }
+ }
+
+ private static Result evaluateTree(String str, int numRuns) throws ParseException {
+ Result ret = new Result();
+ ret.name = "Unoptimized";
+
+ RankingExpression exp = new RankingExpression(str);
+ List<String> vars = new LinkedList<String>();
+ getFeatures(exp.getRoot(), vars);
+
+ benchmark(exp, vars, new MapContext(), numRuns, ret);
+ return ret;
+ }
+
+ private static Result evaluateTreeOptimized(String str, int numRuns) throws ParseException {
+ Result ret = new Result();
+ ret.name = "Optimized tree";
+
+ RankingExpression exp = new RankingExpression(str);
+ List<String> vars = new LinkedList<String>();
+ getFeatures(exp.getRoot(), vars);
+
+ ArrayContext ctx = new ArrayContext(exp);
+ ExpressionOptimizer optimizer = new ExpressionOptimizer();
+ optimizer.getOptimizer(GBDTForestOptimizer.class).setEnabled(false);
+ optimizer.optimize(exp, ctx);
+
+ benchmark(exp, vars, ctx, numRuns, ret);
+ return ret;
+ }
+
+ private static Result evaluateForestOptimized(String str, int numRuns) throws ParseException {
+ Result ret = new Result();
+ ret.name = "Optimized forest";
+
+ RankingExpression exp = new RankingExpression(str);
+ List<String> vars = new LinkedList<String>();
+ getFeatures(exp.getRoot(), vars);
+
+ ArrayContext ctx = new ArrayContext(exp);
+ ExpressionOptimizer optimizer = new ExpressionOptimizer();
+ optimizer.optimize(exp, ctx);
+
+ benchmark(exp, vars, ctx, numRuns, ret);
+ return ret;
+ }
+
+ private static void benchmark(RankingExpression exp, List<String> vars, Context ctx, int numRuns, Result out) {
+ for (int i = 0, len = vars.size(); i < len; ++i) {
+ ctx.put(vars.get(i), i / (double)len);
+ }
+ for (int i = 0; i < numRuns; ++i) {
+ out.res = exp.evaluate(ctx).asDouble();
+ }
+ long begin = System.nanoTime();
+ for (int i = 0; i < numRuns; ++i) {
+ out.res = exp.evaluate(ctx).asDouble();
+ }
+ long end = System.nanoTime();
+
+ out.millis = (end - begin) / (1000.0 * 1000.0);
+ }
+
+ private static void getFeatures(ExpressionNode node, List<String> out) {
+ if (node instanceof ReferenceNode) {
+ String feature = ((ReferenceNode)node).getName();
+ if (!out.contains(feature)) {
+ out.add(feature);
+ }
+ } else if (node instanceof CompositeNode) {
+ CompositeNode cNode = (CompositeNode)node;
+ for (ExpressionNode child : cNode.children()) {
+ getFeatures(child, out);
+ }
+ }
+ }
+
+ private static class Result {
+ String name = "anonymous";
+ double millis = Double.MAX_VALUE;
+ double res = 0;
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationBenchmark.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationBenchmark.java
new file mode 100644
index 00000000000..708235647e6
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationBenchmark.java
@@ -0,0 +1,474 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.io.IOUtils;
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization.GBDTForestOptimizer;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+
+/**
+ * Two small benchmarks of ranking expression evaluation
+ *
+ * @author bratseth
+ */
+public class EvaluationBenchmark {
+
+ public void run() {
+ try {
+ //runNativeComparison(100*1000*1000);
+
+ // benchmark with a large gbdt: Expected tree and forest speedup: 2x, 4x
+ runGBDT(1000*1000, gbdt);
+
+ // benchmark with a large gbdt using set membership tests (on integers) extensively
+ // we simplify the attribute name to make it work with the map context implementation.
+ // Expected tree and forest speedup: 3x, 4x
+ // runGBDT(100*1000, readFile("src/test/files/ranking07.expression").replace("attribute(catid)","catid"));
+ }
+ catch (ParseException e) {
+ throw new RuntimeException("Benchmarking failed",e);
+ }
+ }
+
+ private String readFile(String file) {
+ try {
+ return IOUtils.readFile(new File(file));
+ } catch (IOException e) {
+ throw new AssertionError(e);
+ }
+ }
+
+ public void runNativeComparison(int iterations) {
+ oul("Running native expression...");
+ MapContext arguments=new MapContext();
+ arguments.put("one",1d);
+
+ out(" warming up...");
+ double nativeTotal=0;
+ for (int i=0; i<iterations/5; i++) {
+ arguments.put("i",(double)i);
+ nativeTotal+=nativeExpression(arguments);
+ }
+ oul("done");
+
+ out(" running " + iterations + " iterations...");
+ long startTime=System.currentTimeMillis();
+ for (int i=0; i<iterations; i++) {
+ arguments.put("i",(double)i);
+ nativeTotal+=nativeExpression(arguments);
+ }
+ long nativeTotalTime=System.currentTimeMillis()-startTime;
+ oul("done");
+ oul(" Total time running native: " + nativeTotalTime + " ms (" + iterations/nativeTotalTime + " expressions/ms)");
+
+ oul("Running ranking expression...");
+ RankingExpression expression;
+ try {
+ expression=new RankingExpression(comparisonExpression);
+ }
+ catch (ParseException e) {
+ throw new RuntimeException(e);
+ }
+ out(" warming up...");
+ double rankingTotal=0;
+ for (int i=0; i<iterations/5; i++) {
+ arguments.put("i",(double)i);
+ rankingTotal+=expression.evaluate(arguments).asDouble();
+ }
+ oul("done");
+
+ out(" running " + iterations + " iterations...");
+ startTime=System.currentTimeMillis();
+ for (int i=0; i<iterations; i++) {
+ arguments.put("i",(double)i);
+ rankingTotal+=expression.evaluate(arguments).asDouble();
+ }
+ long rankingTotalTime=System.currentTimeMillis()-startTime;
+ if (rankingTotal!=nativeTotal)
+ throw new IllegalStateException("Expressions are not the same, native: " + nativeTotal + " rankingExpression: " + rankingTotal);
+ oul("done");
+ oul(" Total time running expression: " + rankingTotalTime + " ms (" + iterations/rankingTotalTime + " expressions/ms)");
+ oul("Expression % of max possible speed: " + ((int)((100*nativeTotalTime)/rankingTotalTime)) + " %");
+ }
+
+ private static final String comparisonExpression="10*if(i>35,if(i>one,if(i>=670,4,8),if(i>8000,5,3)),if(i==478,90,91))";
+
+ private final double nativeExpression(Context context) {
+ double r;
+ if (context.get("i").asDouble()>35) {
+ if (context.get("i").asDouble()>context.get("one").asDouble()) {
+ if (context.get("i").asDouble()>=670)
+ r=4;
+ else
+ r=8;
+ }
+ else {
+ if (context.get("i").asDouble()>8000)
+ r=5;
+ else
+ r=3;
+ }
+ }
+ else {
+ if (context.get("i").asDouble()==478)
+ r=90;
+ else
+ r=91;
+ }
+ return r*10;
+ }
+
+ private void runGBDT(int iterations, String gbdtString) throws ParseException {
+
+ // Unoptimized...............
+ double total = benchmark(new RankingExpression(gbdtString), new MapContext(), iterations, "Unoptimized");
+ System.out.println("-----------------------------------------------------------------------------------------------------");
+
+ // Tree optimized...................
+ RankingExpression treeOptimized = new RankingExpression(gbdtString);
+ ArrayContext treeContext = new ArrayContext(treeOptimized, true);
+ ExpressionOptimizer optimizer = new ExpressionOptimizer();
+ optimizer.getOptimizer(GBDTForestOptimizer.class).setEnabled(false);
+ System.out.print("Tree optimizing ... ");
+ OptimizationReport treeOptimizationReport = optimizer.optimize(treeOptimized, treeContext);
+ System.out.println("done");
+ System.out.println(treeOptimizationReport);
+ double treeTotal = benchmark(treeOptimized, treeContext, iterations, "Tree optimized");
+ assertEqualish(total, treeTotal);
+ System.out.println("-----------------------------------------------------------------------------------------------------");
+
+ // Forest optimized...................
+ RankingExpression forestOptimized=new RankingExpression(gbdtString);
+ DoubleOnlyArrayContext forestContext = new DoubleOnlyArrayContext(forestOptimized, true);
+ System.out.print("Forest optimizing ... ");
+ OptimizationReport forestOptimizationReport=new ExpressionOptimizer().optimize(forestOptimized, forestContext);
+ System.out.println("done");
+ System.out.println(forestOptimizationReport);
+ double forestTotal=benchmark(forestOptimized,forestContext,iterations,"Forest optimized");
+ assertEqualish(total,forestTotal);
+ System.out.println("-----------------------------------------------------------------------------------------------------");
+ }
+
+ private double benchmark(RankingExpression gbdt, Context context, int iterations, String description) {
+ oul("Running '" + description + "':");
+ out(" Warming up ...");
+ double total=0;
+ total+=benchmarkIterations(gbdt,context,iterations/5);
+ oul("done");
+
+ out(" Running " + iterations + " of '" + description + "' ...");
+ long tStartTime=System.currentTimeMillis();
+ total+=benchmarkIterations(gbdt,context,iterations);
+ long totalTime=System.currentTimeMillis()-tStartTime;
+ oul("done");
+ oul(" Total time running '" + description + "': " + totalTime + " ms (" + totalTime*1000/iterations + " microseconds/expression)");
+ return total;
+ }
+
+ private double benchmarkIterations(RankingExpression gbdt, Context contextPrototype, int iterations) {
+ // This tries to simulate realistic use: The array context can be reused for a series of evaluations in a thread
+ // but each evaluation binds a new set of values.
+ double total=0;
+ Context context = copyForEvaluation(contextPrototype);
+ for (int i=0; i<iterations; i++) {
+ context.put("LW_NEWS_SEARCHES_RATIO",(double)i);
+ context.put("NEWS_USERS",(double)i/1000*1000);
+ context.put("catid",100300102);
+ total+=gbdt.evaluate(context).asDouble();
+ }
+ return total;
+ }
+
+ private Context copyForEvaluation(Context contextPrototype) {
+ if (contextPrototype instanceof AbstractArrayContext) // optimized - contains name to index map
+ return ((AbstractArrayContext)contextPrototype).clone();
+ else if (contextPrototype instanceof MapContext) // Unoptimized - nothing to keep
+ return new MapContext();
+ else
+ throw new RuntimeException("Unknown context type " + contextPrototype.getClass());
+ }
+
+ private void out(String s) {
+ System.out.print(s);
+ }
+
+ private void oul(String s) {
+ System.out.println(s);
+ }
+
+ public static void main(String[] args) {
+ new EvaluationBenchmark().run();
+ }
+
+ private void assertEqualish(double a,double b) {
+ if (Math.abs(a-b) >= Math.abs((a+b)/100000000) )
+ throw new RuntimeException("Expected value " + a + " but optimized evaluation produced " + b);
+ }
+
+ private final String gbdt =
+ "if (LW_NEWS_SEARCHES_RATIO < 1.72971, 0.0697159, if (LW_USERS < 0.10496, if (SEARCHES < 0.0329127, 0.151257, 0.117501), if (SUGG_OVERLAP < 18.5, 0.0897622, 0.0756903))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.73156, if (NEWS_USERS < 0.0737993, -0.00481646, 0.00110018), if (LW_USERS < 0.0844616, 0.0488919, if (SUGG_OVERLAP < 32.5, 0.0136917, 9.85328E-4))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.74451, -0.00298257, if (LW_USERS < 0.116207, if (SEARCHES < 0.0329127, 0.0676105, 0.0340198), if (NUM_WORDS < 1.5, -8.55514E-5, 0.0112406))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.72995, if (NEWS_USERS < 0.0737993, -0.00407515, 0.00139088), if (LW_USERS < 0.0509035, 0.0439466, if (LW_USERS < 0.325818, 0.0187156, 0.00236949))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.72503, -0.00239817, if (LW_USERS < 0.0977572, if (ISABSTRACT_AVG < 0.04, 0.041602, 0.0157381), if (LW_USERS < 0.602112, 0.0118004, 7.92829E-4))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.53348, -0.00227065, if (LW_USERS < 0.0613667, 0.0345214, if (NUM_WORDS < 1.5, -9.25274E-4, if (BIDDED_SEARCHES < 0.538873, 0.0207086, 0.00549622)))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.50465, -0.00206609, if (LW_USERS < 0.183424, if (NUM_WORDS < 1.5, 0.00203703, if (BIDDED_SEARCHES < 0.0686975, 0.0412142, 0.0219894)), 0.00246537)) + \n" +
+ "if (NEWS_USERS < 0.0737993, -0.00298889, if (LW_USERS < 0.212577, if (NUM_WORDS < 1.5, 0.00385669, 0.0260773), if (NUM_WORDS < 1.5, -0.00141889, 0.00565858))) + \n" +
+ "if (NEWS_USERS < 0.0737993, -0.0026984, if (BIDDED_SEARCHES < 0.202548, if (NUM_WORDS < 1.5, 0.00356601, 0.026572), if (SUGG_OVERLAP < 34.5, 0.00642933, -8.83847E-4))) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 8.47575, if (NUM_WORDS < 2.5, if (NEWS_USERS < 0.0737993, -0.0031992, if (ISTITLE_AVG < 0.315, 0.0106735, 1.98748E-4)), 0.00717291), 0.0216488) + \n" +
+ "if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.79697, if (NEWS_CTR < 0.659695, -0.0018297, 0.0062345), if (BIDDED_SEARCHES < 0.148816, if (NUM_WORDS < 1.5, 0.00397494, 0.0282706), 0.00287526)) + \n" +
+ "if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.81978, if (NUM_WORDS < 2.5, -0.00183825, 0.00447334), if (SUGG_OVERLAP < 8.5, if (SEARCHES < 0.0692601, 0.0319928, 0.0121653), 0.0010403)) + \n" +
+ "if (NEWS_CTR < 0.660025, if (PREV_DAY_NEWS_CTR_RATIO < 0.502543, if (SEARCHES < 0.245402, 0.0193446, 9.09694E-4), -0.00160176), if (NEWS_MAIN_SEARCHES_RATIO < 1.64873, 0.00264489, 0.0177375)) + \n" +
+ "if (NUM_WORDS < 2.5, if (NEWS_USERS < 0.0737993, -0.00238821, if (LW_USERS < 0.0143922, 0.0188957, 8.0445E-4)), if (LW_NEWS_SEARCHES_RATIO < 1.32846, 0.00349568, 0.015966)) + \n" +
+ "if (NUM_WORDS < 2.5, if (NEWS_USERS < 0.0737993, -0.002169, if (ISTITLE_AVG < 0.625, 0.00906748, -2.5122E-4)), if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.69164, 0.0039487, 0.0174816)) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 8.66642, if (NUM_WORDS < 2.5, -8.59968E-4, if (NEWS_CTR < 0.632914, 0.00287223, 0.0148924)), if (SEARCHES < 0.0237478, 0.033539, 0.0071663)) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 1.26315, -0.00130179, if (NEWS_CTR < 0.628621, if (PREV_DAY_NEWS_CTR_RATIO < 0.525166, if (SUGG_OVERLAP < 9.5, 0.0171556, 2.36297E-4), 2.29746E-4), 0.0123793)) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 1.88252, if (NEWS_USERS < 0.0737993, -0.00207461, 6.60118E-4), if (NEWS_USERS < 0.0737993, 9.39125E-4, if (SEARCHES < 0.0248661, 0.0272446, 0.00973038))) + \n" +
+ "if (NUM_WORDS < 1.5, -0.0018842, if (NEWS_USERS < 0.0737993, -5.44658E-4, if (PREV_DAY_USERS < 0.43141, if (PREV_DAY_NEWS_CTR < 0.447268, 4.25375E-4, 0.0152695), 0.00230817))) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 2.6946, -7.37738E-4, if (NEWS_CTR < 0.618656, if (PREV_DAY_NEWS_CTR_RATIO < 0.522617, if (ISTITLE_AVG < 0.21, 0.0202984, 0.00221158), 8.26792E-4), 0.0131518)) + \n" +
+ "if (NUM_WORDS < 3.5, if (NEWS_CTR < 0.660239, if (PREV_DAY_NEWS_CTR_RATIO < 0.505308, 0.00214801, -0.00113168), if (NEWS_MAIN_SEARCHES_RATIO < 0.9266, 1.28813E-4, 0.0090932)), 0.0111807) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 1.27238, -9.46325E-4, if (NEWS_USERS < 0.0737993, 2.20417E-4, if (ISTITLE_AVG < 0.435, 0.0143694, if (MIN_SCORE < 243538.0, 1.76879E-4, 0.00682761)))) + \n" +
+ "if (NUM_WORDS < 3.5, if (NUM_WORDS < 1.5, -0.00153422, if (NEWS_USERS < 0.0737993, -6.54983E-4, if (PREV_DAY_NEWS_CTR < 0.55636, -4.40154E-4, 0.00666305))), 0.00961529) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 1.88316, -6.18023E-4, if (NEWS_USERS < 0.0737993, if (NUM_WORDS < 2.5, -4.22107E-4, 0.00583448), if (SEARCHES < 0.0202227, 0.0218746, 0.0061446))) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 1.91611, if (NEWS_MAIN_SEARCHES_RATIO < 0.384315, -0.0015553, 2.57266E-4), if (NEWS_CTR < 0.659281, if (NUM_WORDS < 2.5, 2.40504E-4, 0.00572176), 0.0105389)) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 2.68704, -5.65225E-4, if (NEWS_CTR < 0.782417, if (PREV_DAY_NEWS_CTR_RATIO < 0.990517, if (NEWS_SEARCHES < 0.339382, 0.0135414, 0.00113811), 5.21526E-4), 0.0112535)) + \n" +
+ "if (BIDDED_SEARCHES < 0.00581527, 0.00560086, if (NUM_WORDS < 1.5, -0.00130462, if (NEWS_USERS < 0.0737993, -7.52446E-4, if (BIDDED_SEARCHES < 1.29452, 0.00626868, 1.75195E-4)))) + \n" +
+ "if (NUM_WORDS < 3.5, if (NUM_WORDS < 1.5, -0.00114958, if (NEWS_USERS < 0.0737993, -5.00434E-4, if (PREV_DAY_NEWS_CTR < 0.563721, -6.96671E-4, 0.00517722))), 0.00807433) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 0.382901, -0.00122923, if (NEWS_USERS < 0.0737993, -4.15058E-4, if (ISABSTRACT_AVG < 0.095, if (PREV_DAY_NEWS_CTR < 0.557042, 8.71338E-4, 0.00994663), 1.56446E-4))) + \n" +
+ "if (BIDDED_SEARCHES < 0.00581527, if (MAX_SCORE < 379805.0, 0.00362486, 0.0132902), if (NEWS_CTR < 0.913345, -3.53901E-4, if (NEWS_USERS < 2.48409, 0.00191813, 0.013908))) + \n" +
+ "if (HAS_NEWS_QC == 0.0, if (NUM_WORDS < 3.5, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.90333, -6.26897E-4, if (ISTITLE_AVG < 0.355, 0.00723851, -2.62543E-5)), 0.0058211), 0.00433763) + \n" +
+ "if (NUM_WORDS < 2.5, if (NEWS_USERS < 2.28805, -5.10768E-4, 0.00255996), if (LW_MAIN_SEARCHES_RATIO < 1.84597, 3.31329E-4, if (DAY_WEEK_AVG_RATIO < 2.655, 0.00434755, 0.0196317))) + \n" +
+ "if (HAS_NEWS_QC == 0.0, if (BIDDED_SEARCHES < 0.0119577, if (PREV_DAY_NEWS_CTR_RATIO < 0.928266, 0.0111871, 0.00198432), -3.24627E-4), if (NEWS_MAIN_SEARCHES_RATIO < 2.71304, 0.00196875, 0.00945297)) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 1.82872, -4.20354E-4, if (DAY_PD_HITS_RATIO < 3.61, if (NEWS_MAIN_SEARCHES_RATIO < 12.766, 7.51735E-4, if (LW_NEWS_SEARCHES_RATIO < 6.15807, 0.0147332, -0.0135118)), 0.010677)) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 0.327632, -0.00102446, if (NEWS_USERS < 0.0737993, -3.80041E-4, if (ISABSTRACT_AVG < 0.105, if (NEWS_SEARCHES < 0.286926, 0.00928139, 0.00265099), 8.96147E-5))) + \n" +
+ "if (ALGO_CTR < 1.05585, if (HAS_NEWS_QC == 0.0, -4.34462E-4, 0.00319487), if (PREV_DAY_NEWS_CTR_RATIO < 0.541632, if (DAY_PD_HITS_RATIO < 5.75, 0.00845667, 0.0571546), 0.00162096)) + \n" +
+ "if (NUM_WORDS < 3.5, if (LW_NEWS_CTR < 0.59494, -3.29593E-4, if (NEWS_MAIN_SEARCHES_RATIO < 1.24936, 3.83584E-4, if (MAX_SCORE < 263568.0, 0.00219784, 0.0104741))), 0.00532617) + \n" +
+ "if (NUM_WORDS < 3.5, if (MAX_SCORE < 268176.0, -5.00757E-4, if (NEWS_MAIN_SEARCHES_RATIO < 0.812821, -3.72572E-4, if (NEWS_CTR < 0.898792, 0.0017999, 0.00908918))), 0.00538528) + \n" +
+ "if (ISTITLE_AVG < 0.705, if (NEWS_USERS < 0.0737993, 2.51012E-5, if (BIDDED_SEARCHES < 1.61095, if (YSM_N_ALGO_CTR_RATIO < 6.42257E-4, 0.0804317, 0.00586482), -4.26664E-4)), -4.79119E-4) + \n" +
+ "if (NUM_WORDS < 3.5, if (HAS_NEWS_QC == 0.0, -1.93562E-4, if (LW_MAIN_SEARCHES_RATIO < 1.72448, 0.00109732, 0.00738421)), if (NEWS_MAIN_SEARCHES_RATIO < 0.406201, -0.00263026, 0.00733129)) + \n" +
+ "if (BIDDED_SEARCHES < 0.0120163, 0.00278665, if (NEWS_USERS < 2.75198, -3.22197E-4, if (NEWS_MAIN_CTR_RATIO < 1.4679, 0.00148229, if (PREV_DAY_USERS < 0.117185, 0.0517723, 0.010204)))) + \n" +
+ "if (LW_NEWS_CTR < 0.597955, if (SUGG_OVERLAP < 0.5, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.79767, 6.24799E-4, 0.0051004), -5.51886E-4), if (NEWS_MAIN_SEARCHES_RATIO < 0.660064, 2.21724E-4, 0.00474931)) + \n" +
+ "if (BIDDED_SEARCHES < 0.00581527, 0.0030367, if (NEWS_USERS < 2.65484, -3.02764E-4, if (LW_MAIN_SEARCHES_RATIO < 1.39539, 6.36888E-4, if (NEWS_MAIN_CTR_RATIO < 2.18629, 0.00661051, 0.0228632)))) + \n" +
+ "if (LW_NEWS_CTR < 0.619817, if (LW_USERS < 0.0143922, 0.0012313, -4.11044E-4), if (NEWS_MAIN_SEARCHES_RATIO < 1.63866, 6.94464E-4, if (LW_MAIN_SEARCHES_RATIO < 2.79335, 0.00448877, 0.0171177))) + \n" +
+ "if (HAS_NEWS_QC == 0.0, if (ALGO_CTR < 1.1644, -2.80479E-4, 0.002092), if (NUM_WORDS < 2.5, 9.21741E-4, if (LW_MAIN_CTR_RATIO < 0.771928, 0.018042, 0.00519068))) + \n" +
+ "if (MAX_SCORE < 270938.0, -3.72001E-4, if (NEWS_MAIN_SEARCHES_RATIO < 0.382818, -8.43057E-4, if (NEWS_USERS < 0.0737993, 2.74749E-4, if (ISABSTRACT_AVG < 0.355, 0.00699732, 9.68093E-4)))) + \n" +
+ "if (NEWS_CTR < 0.187967, -0.00236148, if (LW_NEWS_CTR_RATIO < 0.501045, if (ISABSTRACT_AVG < 0.065, if (USERS < 0.79806, 0.00751647, 5.67897E-4), -1.95953E-4), -1.28664E-4)) + \n" +
+ "if (NEWS_CTR < 0.916156, if (NEWS_CTR < 0.131787, -0.00260812, -2.96076E-6), if (LW_MAIN_SEARCHES_RATIO < 1.7079, if (LW_NEWS_CTR < 0.827357, -0.00103106, 0.00752405), 0.00712343)) + \n" +
+ "if (ALGO_CTR < 1.11796, -9.56953E-5, if (LW_NEWS_CTR_RATIO < 0.965768, if (PREV_DAY_NEWS_CTR_RATIO < 0.318964, -0.0068748, if (DAY_PD_HITS_RATIO < 5.9, 0.00781228, 0.0430918)), 0.0010225)) + \n" +
+ "if (ISTITLE_AVG < 0.785, if (PREV_DAY_NEWS_CTR_RATIO < 0.937235, if (BIDDED_SEARCHES < 0.549316, 0.00782989, 5.1726E-4), if (LW_MAIN_SEARCHES_RATIO < 14.3819, -7.98452E-5, 0.00931358)), -3.44667E-4) + \n" +
+ "if (NUM_WORDS < 4.5, if (HAS_NEWS_QC == 0.0, -1.1162E-4, if (LW_NEWS_CTR < 0.625492, 0.00137801, if (NEWS_MAIN_SEARCHES_RATIO < 3.2392, 0.00481811, 0.0203582))), 0.00957663) + \n" +
+ "if (NUM_WORDS < 4.5, if (NEWS_MAIN_SEARCHES_RATIO < 12.878, -7.973E-5, if (SUGG_LW < 0.5, 0.0113112, if (PREV_DAY_NEWS_USERS < 1.63248, -0.0093633, 0.0081117))), 0.00891687) + \n" +
+ "if (NEWS_CTR < 0.260948, -0.00146919, if (PREV_DAY_NEWS_CTR_RATIO < 0.949304, if (NEWS_MAIN_SEARCHES_RATIO < 0.305788, -5.28063E-4, if (MIN_SCORE < 199600.0, 8.23835E-4, 0.00533948)), -1.59293E-4)) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 0.116451, -0.00113111, if (PREV_DAY_NEWS_CTR_RATIO < 0.999206, if (NEWS_SEARCHES < 0.30129, if (ISTITLE_AVG < 0.61, 0.00769846, 0.00162987), -2.39796E-4), -1.20795E-4)) + \n" +
+ "if (NEWS_USERS < 2.75198, -1.04934E-4, if (NEWS_CTR < 0.504788, -3.87773E-4, if (BIDDED_SEARCHES < 3.77166, if (LW_MAIN_SEARCHES_RATIO < 1.76307, 0.00639344, 0.0180493), 0.00240808))) + \n" +
+ "if (NUM_WORDS < 4.5, if (LW_NEWS_CTR < 0.789202, -2.11327E-4, if (NEWS_USERS < 0.312345, -4.52231E-4, if (SCIENCE < 0.535, 0.00367411, 0.0491292))), 0.00847389) + \n" +
+ "if (NEWS_CTR < 0.182514, -0.00177053, if (LW_NEWS_CTR_RATIO < 0.501045, if (USERS < 1.36009, if (MIN_SCORE < 187234.0, 3.6643E-4, 0.0055156), -0.0011557), -8.54842E-5)) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 0.32584, if (NEWS_CTR < 1.19657, 0.00362961, if (PREV_DAY_NEWS_CTR_RATIO < 2.37995, if (NEWS_MAIN_SEARCHES_RATIO < 2.07684, 0.0176304, 0.0773353), 0.00489339)), -2.00322E-5) + \n" +
+ "if (AVG_SCORE < 354962.0, -1.53495E-4, if (NEWS_CTR < 0.596437, if (LW_SEARCHES < 0.0532569, 0.00410978, -0.00116517), if (LW_MAIN_CTR_RATIO < 0.779754, 0.0149197, 0.00348209))) + \n" +
+ "if (PREV_DAY_NEWS_USERS < 14.0861, if (BIDDED_SEARCHES < 3.24749, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.63285, -8.28682E-5, if (NEWS_SEARCHES < 0.317829, 0.00348768, -6.08623E-4)), -0.00114994), 0.00458862) + \n" +
+ "if (ISABSTRACT_AVG < 0.295, if (NEWS_USERS < 0.0737993, -1.36945E-4, if (MIN_SCORE < 233429.0, 2.59393E-5, if (NEWS_MAIN_SEARCHES_RATIO < 0.221135, -7.57098E-4, 0.00463699))), -4.62083E-4) + \n" +
+ "if (ALGO_CTR < 1.01522, -1.09825E-4, if (LW_NEWS_CTR_RATIO < 0.55285, if (LW_MAIN_SEARCHES_RATIO < 5.11061, if (NEWS_SEARCHES < 1.02345, 0.00847552, -0.00437523), -0.0112885), 6.61898E-4)) + \n" +
+ "if (NEWS_USERS < 4.05804, if (LW_NEWS_SEARCHES_RATIO < 6.67644, -1.03466E-5, if (USERS < 0.101853, -0.0245653, -0.00297792)), if (NEWS_MAIN_CTR_RATIO < 1.09325, 6.6298E-4, 0.00723109)) + \n" +
+ "if (NUM_WORDS < 4.5, if (LW_NEWS_USERS < 31.8516, -4.91517E-5, 0.00701562), if (ALGO_CLICKS < 0.012133, 0.020461, if (DAY_WEEK_AVG_RATIO < 2.93, 8.3867E-4, 0.0326788))) + \n" +
+ "if (PREV_DAY_NEWS_SEARCHES_RATIO < 3.9286, if (NEWS_MAIN_SEARCHES_RATIO < 60.9048, 6.59836E-5, 0.0391173), if (NEWS_USERS < 0.223578, -0.0109831, if (NEWS_MAIN_SEARCHES_RATIO < 36.1125, -9.18296E-4, -0.0321067))) + \n" +
+ "if (PREV_DAY_NEWS_SEARCHES_RATIO < 3.92945, if (NEWS_MAIN_SEARCHES_RATIO < 12.878, 3.89745E-5, if (PREV_DAY_NEWS_CTR < 0.537022, -0.00162034, 0.0079279)), if (NEWS_USERS < 0.245347, -0.0101132, -0.00126814)) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 0.480833, if (NEWS_USERS < 0.0737993, 9.57273E-5, if (SUGG_LW < 12.5, if (PUB_TODAY_AVG < 0.355, 0.0161319, -0.00334364), 0.00260343)), -7.52983E-5) + \n" +
+ "if (PREV_DAY_NEWS_USERS < 38.5221, if (BIDDED_SEARCHES < 3.7973, if (PREV_DAY_NEWS_CTR_RATIO < 0.999247, if (ISABSTRACT_AVG < 0.075, 0.00272842, -3.86777E-5), -1.51219E-4), -0.00100249), 0.00670928) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 2.77887, 9.37848E-5, if (NEWS_USERS < 0.245347, if (SEARCHES < 0.013024, if (ENTERTAINMENT_QC == 0.0, 0.0110759, 0.0905384), -0.00681271), -6.6913E-4)) + \n" +
+ "if (NEWS_CTR < 0.916322, if (LW_NEWS_SEARCHES_RATIO < 5.23703, 2.81507E-5, if (SEARCHES < 0.233024, -0.0177547, -0.00220902)), if (NEWS_USERS < 2.30165, 0.00110318, 0.00810944)) + \n" +
+ "if (HAS_NEWS_QC == 0.0, -1.08882E-4, if (MAX_SCORE < 137730.0, if (ALGO_CTR < 0.489733, 0.0199541, 0.0026349), if (NEWS_USERS < 2.20454, -3.16208E-4, 0.00699663))) + \n" +
+ "if (BIDDED_SEARCHES < 0.00581527, if (LW_NEWS_USERS < 1.81124, 0.00173624, if (PREV_DAY_USERS < 1.36892, 0.0405308, -0.00100716)), if (NEWS_MAIN_SEARCHES_RATIO < 58.9771, -1.26569E-4, 0.0286363)) + \n" +
+ "if (LW_NEWS_CTR < 0.621598, -1.10247E-4, if (LW_MAIN_SEARCHES_RATIO < 0.317173, 0.0110308, if (ALGO_CTR < 1.26031, 9.13964E-4, if (ALGO_CTR < 1.27034, 0.0667268, 0.00722662)))) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 25.7554, -6.12962E-6, if (LW_NEWS_SEARCHES < 0.765878, if (DAY_WEEK_AVG_RATIO < 1.475, if (PREV_DAY_NEWS_SEARCHES < 0.285188, 0.00389095, -0.0350617), -0.0440429), -7.44561E-4)) + \n" +
+ "if (DAY_PD_HITS_RATIO < 16.25, -5.78971E-5, if (INTLNEWS < 0.235, if (BIDDED_SEARCHES < 0.401931, if (PREV_DAY_MAIN_CTR_RATIO < 0.852642, 0.00517, 0.0517763), 0.00726245), 0.00172079)) + \n" +
+ "if (DAY_PD_HITS_RATIO < 18.89, -9.58573E-5, if (NEWS_MAIN_CTR_RATIO < 4.42646, if (LW_MAIN_SEARCHES_RATIO < 1.64955, -0.00540243, if (PREV_DAY_CTR < 0.823034, 0.0147119, -0.00456252)), 0.0476969)) + \n" +
+ "if (LW_CTR < 1.01377, -9.34648E-5, if (NEWS_USERS < 0.0737993, -6.338E-5, if (MIN_SCORE < 376483.0, 0.00251265, if (LW_MAIN_SEARCHES_RATIO < 0.683623, 0.0350855, 0.00794114)))) + \n" +
+ "if (ISABSTRACT_AVG < 0.315, if (NEWS_USERS < 0.0737993, -1.37636E-4, if (LW_MAIN_SEARCHES_RATIO < 0.661526, if (SUGG_LW < 3.5, 0.0168399, 0.00323338), 9.73973E-4)), -4.12741E-4) + \n" +
+ "if (LW_CTR < 1.01683, -1.32017E-4, if (LW_NEWS_CTR_RATIO < 0.500058, if (SCIENCE < 0.55, 0.0039965, 0.0428649), if (NEWS_CTR < 0.594088, 3.24961E-6, 0.00367602))) + \n" +
+ "if (LW_NEWS_CTR < 0.856244, -1.10246E-4, if (PREV_DAY_MAIN_SEARCHES_RATIO < 10.6833, if (LW_MAIN_SEARCHES_RATIO < 0.31726, if (LW_NEWS_CTR_RATIO < 1.23633, 0.00906872, 0.0473513), 0.00134361), 0.041372)) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 6.69974, -1.86907E-5, if (NEWS_MAIN_CTR_RATIO < 1.46029, if (LW_NEWS_SEARCHES_RATIO < 6.53657, if (PREV_DAY_NEWS_SEARCHES_RATIO < 0.316051, 0.0332713, 0.00117973), -0.010984), 0.00761193)) + \n" +
+ "if (NEWS_CTR < 0.237839, if (USERS < 0.0168938, 0.0267063, if (LW_USERS < 0.0827926, if (PREV_DAY_NEWS_CTR < 1.08233, -0.0138873, 0.0330313), -8.56477E-4)), 1.37177E-4) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 7.02911, 5.45191E-5, if (USERS < 0.118739, -0.0243638, if (NEWS_MAIN_CTR_RATIO < 1.63574, if (SEARCHES < 0.478602, -0.0123115, -0.00225071), 0.0054502))) + \n" +
+ "if (BIDDED_SEARCHES < 3.7973, if (NEWS_USERS < 2.20454, 8.53898E-5, if (NEWS_MAIN_CTR_RATIO < 1.9298, 0.00163898, if (SUGG_OVERLAP < 34.0, 0.0222897, 0.00356636))), -8.81981E-4) + \n" +
+ "if (BIDDED_SEARCHES < 0.00581527, if (MIN_SCORE < 253612.0, -5.12189E-4, if (MAX_MIN_SCORE < 35925.0, 0.00252377, if (PREV_DAY_NEWS_SEARCHES_RATIO < 0.610935, 0.0432434, 0.00906418))), -1.01198E-4) + \n" +
+ "if (DAY_PD_HITS_RATIO < 24.585, if (ALGO_CTR < 3.15833, -2.12884E-5, 0.0175937), if (PREV_DAY_CTR < 0.824546, if (LW_NEWS_CTR < 0.651434, 0.011673, 0.0567104), -0.00676867)) + \n" +
+ "if (LW_CTR < 1.551, if (LW_NEWS_USERS < 3.59178, -1.29153E-4, if (SUGG_LW < 46.5, 0.00702818, 2.27956E-4)), if (NEWS_MAIN_SEARCHES_RATIO < 8.86382, 0.0028952, 0.0366156)) + \n" +
+ "if (DAY_PD_HITS_RATIO < 18.89, -5.51307E-6, if (YSM_CTR < 0.0178362, if (ALGO_CLICKS < 0.127132, 0.0471277, if (SUGG_TW < 0.975545, 0.0048341, 0.0335537)), -0.00344397)) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 8.21211, -5.10935E-5, if (DAY_WEEK_AVG_RATIO < 1.205, -4.84709E-4, if (NEWS_MAIN_SEARCHES_RATIO < 2.63328, if (LW_NEWS_SEARCHES_RATIO < 1.83743, 0.0125448, -0.00162932), 0.0144536))) + \n" +
+ "if (ALGO_CTR < 1.01463, -1.17159E-4, if (PREV_DAY_NEWS_CTR_RATIO < 0.780396, if (USERS < 0.614133, if (MAX_MIN_SCORE < 54869.8, 0.00624085, 0.0337856), 7.62548E-4), 3.62126E-4)) + \n" +
+ "if (NUM_WORDS < 3.5, -1.00136E-5, if (PREV_DAY_NEWS_CTR_RATIO < 0.958905, if (PREV_DAY_USERS < 0.377834, if (YSM_N_ALGO_CTR_RATIO < 0.189731, 0.0259994, -0.0142924), 4.37294E-4), 9.62911E-4)) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 92.7164, if (LW_NEWS_CTR < 0.822371, -4.99393E-5, if (PREV_DAY_MAIN_SEARCHES_RATIO < 13.0501, if (NEWS_USERS < 0.309237, -8.38369E-4, 0.00312145), 0.043612)), -0.00674822) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 2.51597, 1.01649E-4, if (SEARCHES < 0.0202227, if (PREV_DAY_MAIN_CTR_RATIO < 1.20113, 0.00953861, 0.0583575), if (USERS < 0.295073, -0.00536031, -4.99861E-4))) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 0.146655, 0.00684325, if (LW_CTR < 1.43439, -5.31424E-5, if (NEWS_MAIN_SEARCHES_RATIO < 11.7367, if (PREV_DAY_NEWS_CTR_RATIO < 0.541013, 0.0101571, 0.0013804), 0.0362471))) + \n" +
+ "if (LW_NEWS_SEARCHES < 5.77429, -9.91104E-5, if (NEWS_CTR < 1.71804, if (SUGG_OVERLAP < 32.5, if (HAS_NEWS_QC == 0.0, 0.00333027, 0.0179206), 4.42358E-4), 0.0445137)) + \n" +
+ "if (ISABSTRACT_AVG < 0.435, if (NEWS_USERS < 0.158915, -2.22842E-5, if (PREV_DAY_NEWS_USERS < 0.0737993, 0.00311367, if (USERS < 0.119577, -0.00919024, 7.29693E-4))), -3.98811E-4) + \n" +
+ "if (ALGO_CLICKS < 4.04596, if (NEWS_USERS < 0.223578, if (NEWS_SEARCHES < 0.452288, 3.21367E-5, -0.00726485), if (LOCAL_QC == 1.0, -0.00144797, 0.00132603)), -9.1988E-4) + \n" +
+ "if (NEWS_CTR < 0.25921, -8.87978E-4, if (PREV_DAY_NEWS_CTR_RATIO < 0.530395, if (USERS < 0.710459, if (MAX_MIN_SCORE < 758.5, 0.00626933, 9.79114E-4), -3.43207E-4), -7.62231E-5)) + \n" +
+ "if (SUGG_TW < 0.0623373, if (LW_NEWS_SEARCHES_RATIO < 6.68433, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.89603, 1.96789E-4, if (LW_MAIN_SEARCHES_RATIO < 0.719144, 0.013244, 0.00182593)), -0.00570262), -2.16189E-4) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 2.07246, if (NEWS_MAIN_SEARCHES_RATIO < 53.2676, 8.99313E-5, 0.0338743), if (LW_SEARCHES < 0.216881, -0.00282376, if (PREV_DAY_SEARCHES < 0.0712414, 0.0484119, -3.84987E-4))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 2.51974, 9.68801E-5, if (ALGO_CTR < 1.86978, if (LW_USERS < 0.0798854, if (NEWS_MAIN_CTR_RATIO < 0.42837, -0.0141747, -0.00244278), -4.47252E-4), 0.0201717)) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 10.0121, -1.42949E-5, if (PREV_DAY_MAIN_CTR_RATIO < 1.47714, 9.66134E-4, if (BIDDED_SEARCHES < 0.0585926, if (WEEKAVG < 0.36, 0.00997522, 0.0530748), 0.00387354))) + \n" +
+ "if (SUGG_TW < 0.984769, -3.34988E-5, if (PREV_DAY_NEWS_CTR < 1.13129, 0.0013372, if (BUSINESS < 0.05, 0.00681273, if (LOCAL_QC == 0.0, 0.0221056, 0.13305)))) + \n" +
+ "if (LW_CTR < 1.63323, -1.51312E-5, if (LW_NEWS_SEARCHES_RATIO < 1.28425, 0.00114219, if (ELECTRONICS_QC == 0.0, if (PREV_DAY_MAIN_CTR_RATIO < 0.530832, 0.0312363, 0.00679683), 0.0640472))) + \n" +
+ "if (PREV_DAY_NEWS_USERS < 4.25111, -4.70532E-5, if (PREV_DAY_MAIN_CTR_RATIO < 2.58573, if (YSM_NCTR < 0.00660392, if (NEWS_MAIN_SEARCHES_RATIO < 1.27373, -1.99449E-4, 0.00625635), -5.22971E-4), 0.0405083)) + \n" +
+ "if (PREV_DAY_MAIN_SEARCHES_RATIO < 377.799, if (LW_NEWS_SEARCHES_RATIO < 6.67644, 1.17654E-5, if (PUB_TODAY_AVG < 0.0050, -0.00565339, if (NATIONALNEWS < 0.55, 2.61588E-4, 0.0318784))), 0.0238311) + \n" +
+ "if (PREV_DAY_CTR < 1.16424, -7.76883E-5, if (LW_NEWS_SEARCHES_RATIO < 8.68994, 0.00182771, if (NEWS_SEARCHES < 7.1215, -0.013084, if (NEWS_MAIN_SEARCHES_RATIO < 3.58161, -0.00835768, 0.0377434)))) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 26.7481, 4.45294E-5, if (LW_NEWS_SEARCHES_RATIO < 1.57387, if (LW_NEWS_SEARCHES_RATIO < 1.3782, if (LW_CTR < 0.34851, 0.0177335, -0.00964832), 0.024959), -0.016879)) + \n" +
+ "if (LOCAL_QC == 1.0, if (NEWS_USERS < 0.0737993, 1.57459E-4, if (ISTITLE_AVG < 0.515, -0.00580773, if (PREV_DAY_MAIN_SEARCHES_RATIO < 4.81114, -0.00140636, 0.0204618))), 1.02083E-4) + \n" +
+ "if (HAS_NEWS_QC == 0.0, -3.53931E-5, if (ALGO_CTR < 0.5969, if (MIN_SCORE < 30200.0, if (NEWS_CTR < 0.713517, 0.0124535, 0.049838), 0.00304798), -2.6664E-4)) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 59.3594, if (NEWS_MAIN_SEARCHES_RATIO < 46.9165, if (PREV_DAY_NEWS_SEARCHES_RATIO < 48.6166, -8.91528E-6, 0.0156096), -0.0194015), if (INTLNEWS < 0.275, 0.0391563, 2.94525E-5)) + \n" +
+ "if (ALGO_CTR < 3.09161, if (NEWS_MAIN_SEARCHES_RATIO < 71.6642, -7.16141E-5, 0.0245016), if (NEWS_MAIN_SEARCHES_RATIO < 5.48496, if (ELECTRONICS_QC == 0.0, 3.80175E-4, 0.134021), 0.0467547)) + \n" +
+ "if (LOCAL_QC == 1.0, if (PREV_DAY_NEWS_CTR_RATIO < 0.55814, if (LW_USERS < 0.179284, -0.0110475, -0.00187986), if (LW_NEWS_SEARCHES_RATIO < 11.9839, -4.62166E-4, 0.0120886)), 4.16986E-5) + \n" +
+ "if (LW_NEWS_USERS < 48.703, if (LW_MAIN_SEARCHES_RATIO < 104.672, -1.11529E-5, if (PUB_TODAY_AVG < 0.645, -0.0109524, if (LW_MAIN_CTR_RATIO < 0.820426, 0.0173264, -0.00598908))), 0.00642443) + \n" +
+ "if (NEWS_USERS < 26.8033, if (USERS < 2.70898, if (NEWS_USERS < 0.212247, if (NEWS_SEARCHES < 0.312345, 1.94111E-5, -0.00494194), 9.66727E-4), -7.27397E-4), 0.00366377) + \n" +
+ "if (PREV_DAY_NEWS_CTR_RATIO < 0.948678, if (ISTITLE_AVG < 0.565, if (PREV_DAY_MAIN_CTR_RATIO < 1.53864, 0.00145357, if (YSM_N_ALGO_CTR_RATIO < 0.00279164, 0.053982, 0.0096231)), 1.01252E-4), -9.24301E-5) + \n" +
+ "if (PREV_DAY_NEWS_CTR_RATIO < 0.999206, 5.03044E-4, if (LW_MAIN_SEARCHES_RATIO < 11.8351, -2.19647E-4, if (DAY_WEEK_AVG_RATIO < 2.785, 0.00174311, if (ISABSTRACT_AVG < 0.73, 0.020265, -0.00658421)))) + \n" +
+ "if (SUGG_OVERLAP < 0.5, if (BIDDED_SEARCHES < 0.00581527, if (SUGG_LW < 8.5, 0.00316453, if (ELECTRONICS_QC == 0.0, 0.0240488, 0.285332)), 2.9583E-4), -1.0113E-4) + \n" +
+ "if (ALGO_CTR < 1.15516, -9.02219E-5, if (LW_NEWS_CTR_RATIO < 0.131516, 0.0416615, if (NEWS_CTR < 0.841155, 5.45051E-4, if (ALGO_CLICKS < 0.0703111, 0.0508979, 0.00584922)))) + \n" +
+ "if (ENTERTAINMENT < 0.305, if (ALGO_CTR < 1.53687, -1.42467E-4, if (PREV_DAY_NEWS_SEARCHES_RATIO < 2.43692, 0.00172748, if (LW_NEWS_CTR_RATIO < 1.09767, 0.0382724, 3.85821E-4))), 9.95127E-4) + \n" +
+ "if (PREV_DAY_NEWS_SEARCHES_RATIO < 3.61514, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.904, -6.72591E-5, if (USERS < 1.06349, 0.00243637, -8.96343E-4)), if (NEWS_USERS < 0.179867, -0.00813249, -0.0012514)) + \n" +
+ "if (PREV_DAY_NEWS_USERS < 13.0067, -3.50928E-5, if (PREV_DAY_NEWS_CTR < 0.714421, 7.97227E-4, if (USERS < 3.56693, if (YSM_NCTR < 0.036612, 0.0297616, -0.00692722), 0.00476212))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 2.51803, 5.8313E-5, if (PREV_DAY_MAIN_CTR_RATIO < 2.34354, -0.00134957, if (LW_USERS < 0.0410895, if (AVG_SCORE < 284173.0, 0.046743, 0.00519612), 2.52E-4))) + \n" +
+ "if (YSM_CTR < 0.106731, -1.71864E-4, if (NEWS_MAIN_SEARCHES_RATIO < 9.26668, 5.48603E-4, if (USERS < 0.0145216, if (MAX_SCORE < 273414.0, 0.0139875, -0.0068697), -0.00914662))) + \n" +
+ "if (LW_CTR < 2.10467, if (NEWS_USERS < 0.223578, if (NEWS_SEARCHES < 0.452288, -4.92949E-5, -0.00633483), 4.52239E-4), if (MAX_MIN_SCORE < 36225.0, 0.00340485, 0.0295635)) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 27.2801, -3.29416E-5, if (LW_NEWS_USERS < 0.516988, -0.0205183, if (AVG_RANK < 9.5, -0.00354209, if (POLITICS_QC == 0.0, 0.0108605, 0.0656188)))) + \n" +
+ "if (LW_NEWS_CTR_RATIO < 0.130813, if (LW_USERS < 0.0675101, -0.0246242, -0.00263751), if (LW_NEWS_CTR_RATIO < 0.132702, 0.0418786, if (LW_MAIN_SEARCHES_RATIO < 0.4981, 0.0014675, -1.14374E-5))) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 110.393, if (PREV_DAY_MAIN_SEARCHES_RATIO < 32.725, -1.42702E-6, if (NEWS_MAIN_SEARCHES_RATIO < 3.54764, if (DAY_PD_HITS_RATIO < 5.165, -0.00858847, 0.0288169), 0.0673668)), -0.00630045) + \n" +
+ "if (SUGG_TW < 0.0905167, if (LW_NEWS_SEARCHES_RATIO < 5.38735, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.82076, 1.83185E-4, if (PREV_DAY_USERS < 0.729889, 0.00456522, -6.55502E-4)), -0.00337268), -1.85203E-4) + \n" +
+ "if (SUGG_TW < 0.985223, -7.3888E-5, if (PREV_DAY_NEWS_SEARCHES_RATIO < 0.131265, 0.0410781, if (NEWS_USERS < 0.688593, 6.1809E-4, if (NEWS_USERS < 0.999268, 0.0215243, 0.00200864)))) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 0.164771, if (NEWS_CTR < 0.581094, 0.001345, if (NEWS_MAIN_SEARCHES_RATIO < 4.47209, 0.00479447, 0.0485025)), if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.759041, 6.85213E-4, -1.09858E-4)) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 0.480853, if (NEWS_USERS < 0.0737993, -2.87273E-4, if (SUGG_TW < 0.0811122, if (PREV_DAY_NEWS_SEARCHES_RATIO < 3.02247, 0.00952516, 0.0353053), 0.00133144)), 4.44055E-6) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 10.0035, 1.25006E-5, if (NEWS_CTR < 0.530131, if (SEARCHES < 0.261805, if (ENTERTAINMENT_QC == 0.0, -0.00352081, 0.040869), -0.0108829), 0.00398639)) + \n" +
+ "if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.21358, -1.96787E-4, if (ALGO_CTR < 3.09691, if (NEWS_SEARCHES < 0.542027, 8.12659E-4, if (NEWS_USERS < 0.193619, -0.00715108, -2.37342E-4)), 0.0334561)) + \n" +
+ "if (LW_NEWS_CTR < 0.598979, -7.53961E-5, if (PREV_DAY_CTR < 1.09221, if (SCIENCE < 0.55, 4.60354E-4, if (PREV_DAY_MAIN_SEARCHES_RATIO < 1.34224, 0.00248627, 0.0781891)), 0.00584066)) + \n" +
+ "if (PREV_DAY_NEWS_USERS < 27.8368, if (BIDDED_SEARCHES < 5.85314, 6.36817E-5, -9.02941E-4), if (PREV_DAY_NEWS_CTR < 0.773569, 0.00156477, if (SEARCHES < 4.08125, 0.0385004, 0.00774733))) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 109.125, if (DAY_PD_HITS_RATIO < 18.75, -2.83223E-5, if (YSM_CTR < 0.0174757, if (PREV_DAY_USERS < 0.117185, 0.0405892, 0.0056735), -0.00114963)), -0.00632407) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 67.6563, if (PREV_DAY_MAIN_SEARCHES_RATIO < 32.127, 3.92781E-5, if (NEWS_CTR < 0.920256, -0.00180273, if (AVG_RANK < 8.9, 0.0401539, -0.0106621))), 0.0199772) + \n" +
+ "if (YSM_CTR < 0.0299671, if (PREV_DAY_NEWS_USERS < 1.57751, if (NEWS_MAIN_SEARCHES_RATIO < 9.09345, -3.3303E-4, if (PREV_DAY_HITS < 0.5, -0.0132526, 0.00213574)), 0.00135158), 2.86346E-4) + \n" +
+ "if (HAS_NEWS_QC == 0.0, -4.23535E-5, if (NUM_WORDS < 2.5, 1.89805E-4, if (MAX_MIN_SCORE < 34177.2, 0.00313415, if (MAX_MIN_SCORE < 38154.2, 0.0393482, 0.00649343)))) + \n" +
+ "if (NUM_WORDS < 4.5, -1.5123E-5, if (NEWS_MAIN_CTR_RATIO < 0.333385, 0.0256599, if (MIN_SCORE < 261595.0, if (LOCAL_QC == 0.0, 0.0096315, 0.0775677), -8.03435E-4))) + \n" +
+ "if (PREV_DAY_NEWS_SEARCHES_RATIO < 4.7637, 4.65171E-5, if (SEARCHES < 0.273091, if (PREV_DAY_NEWS_SEARCHES_RATIO < 11.1511, if (PREV_DAY_NEWS_CTR_RATIO < 1.59426, -0.0164486, -6.06353E-4), 0.0148189), -7.27497E-4)) + \n" +
+ "if (NEWS_USERS < 1.78862, -9.34176E-5, if (NEWS_CTR < 0.503725, -3.05424E-4, if (SUGG_LW < 92.5, if (ALGO_CTR < 0.866144, 0.00709828, 2.29334E-4), -2.83122E-4))) + \n" +
+ "if (PREV_DAY_MAIN_SEARCHES_RATIO < 31.9116, 1.62331E-5, if (SEARCHES < 0.437086, 0.05257, if (NEWS_MAIN_CTR_RATIO < 1.327, -0.00681457, if (PUB_TODAY_AVG < 0.365, -0.00897547, 0.0268691)))) + \n" +
+ "if (DAY_PD_HITS_RATIO < 79.5, if (BUSINESS < 0.195, 1.77773E-4, if (LW_MAIN_SEARCHES_RATIO < 59.4737, if (LW_MAIN_SEARCHES_RATIO < 50.3613, -3.2627E-4, 0.0335433), -0.0156041)), -0.0188673) + \n" +
+ "if (PREV_DAY_MAIN_SEARCHES_RATIO < 20.9996, 2.5689E-5, if (PREV_DAY_MAIN_CTR_RATIO < 0.692676, if (DAY_WEEK_AVG_RATIO < 2.95, -0.00554275, 0.0235987), if (LW_NEWS_SEARCHES_RATIO < 1.70492, 0.00485286, -0.0165676))) + \n" +
+ "if (PREV_DAY_NEWS_CTR < 0.239879, if (NEWS_MAIN_SEARCHES_RATIO < 7.73296, -5.33314E-4, -0.00970318), if (NEWS_USERS < 0.223578, if (NEWS_SEARCHES < 0.312345, 3.91472E-5, -0.00407912), 6.05168E-4)) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 2.51675, 1.51048E-5, if (LW_USERS < 0.228893, if (PREV_DAY_MAIN_CTR_RATIO < 3.25636, -0.00414565, if (PREV_DAY_USERS < 0.085979, -0.00665102, 0.0314653)), -1.93031E-4)) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 11.633, -3.54622E-5, if (NEWS_CTR < 0.341241, if (SEARCHES < 0.263899, if (DUDE < 0.121324, 0.0225604, -0.023524), -0.0147208), 0.0032981)) + \n" +
+ "if (DAY_WEEK_AVG_RATIO < 14.225, if (YSM_CTR < 0.0816637, -2.14327E-4, if (LW_USERS < 3.05964, if (NEWS_USERS < 0.0737993, 2.02599E-5, 0.00173562), -7.80059E-4)), 0.0180608) + \n" +
+ "if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.743695, if (PREV_DAY_NEWS_CTR < 0.855411, 3.7646E-4, if (PREV_DAY_CTR < 1.27851, if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.633963, 0.0103505, 0.0012006), -0.0123434)), -4.6606E-5) + \n" +
+ "if (DAY_WEEK_AVG_RATIO < 14.225, if (DAY_WEEK_AVG_RATIO < 6.985, -1.74518E-7, if (NEWS_USERS < 0.0737993, 0.00502863, -0.00831447)), if (LW_MAIN_SEARCHES_RATIO < 3.92095, -0.00421267, 0.0344091)) + \n" +
+ "if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.413706, if (PREV_DAY_NEWS_CTR < 0.874719, if (LW_MAIN_SEARCHES_RATIO < 11.2056, 0.00144004, -0.014018), if (ELECTRONICS_QC == 0.0, 0.0123258, 0.119451)), -3.38696E-5) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.77722, 4.48781E-5, if (PREV_DAY_NEWS_USERS < 0.0737993, if (SUGG_TW < 0.054069, 0.00321739, -5.83152E-4), if (SEARCHES < 0.120921, -0.0131926, -0.0011303))) + \n" +
+ "if (PREV_DAY_NEWS_SEARCHES_RATIO < 6.83205, 1.51597E-5, if (LW_CTR < 0.831409, -7.90698E-4, if (USERS < 0.249336, -0.0239581, if (PREV_DAY_MAIN_CTR_RATIO < 0.604761, 0.0179905, -0.00651038)))) + \n" +
+ "if (SUGG_TW < 0.967398, -6.92965E-6, if (PREV_DAY_NEWS_SEARCHES_RATIO < 0.135403, 0.0310525, if (DUDE < 0.567766, if (SPORTS < 0.73, 0.00175153, 0.00962597), -0.00111687))) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 8.3318, 4.08033E-5, if (NEWS_CTR < 0.445404, if (USERS < 0.179365, if (TOPSTORY < 0.155, -0.00102, 0.0450773), -0.0100005), 0.00289212)) + \n" +
+ "if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.18832, -1.47618E-4, if (ISABSTRACT_AVG < 0.03, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.21666, if (LW_MAIN_SEARCHES_RATIO < 0.747696, 0.024334, 0.00586202), 8.905E-4), -1.50915E-4)) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 108.679, -8.69379E-6, if (PREV_DAY_CTR < 1.00967, if (USERS < 1.24806, -0.0260056, if (NATIONALNEWS < 0.225, -0.00279946, 0.0163558)), -0.0151386)) + \n" +
+ "if (LW_CTR < 0.968595, -9.48294E-5, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.904, 2.43976E-4, if (ISTITLE_AVG < 0.37, if (WEEKAVG < 3.5, 0.00702639, 0.0745663), 8.32579E-4))) + \n" +
+ "if (POLITICS < 0.145, if (PREV_DAY_NEWS_CTR_RATIO < 0.999206, 4.19546E-4, -1.41595E-4), if (LW_NEWS_SEARCHES_RATIO < 2.08879, -0.00110749, if (PREV_DAY_SEARCHES < 0.108517, -0.0335177, -0.00494023))) + \n" +
+ "if (ENTERTAINMENT < 0.315, -5.15746E-5, if (NEWS_MAIN_SEARCHES_RATIO < 8.67009, 6.82081E-4, if (SUGG_TW < 0.0215705, if (NEWS_MAIN_SEARCHES_RATIO < 10.1884, 0.0604503, 0.0100963), -0.00450777))) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 66.4948, if (NUM_WORDS < 4.5, -1.49344E-5, if (DAY_WEEK_AVG_RATIO < 0.885, -0.00241279, if (NEWS_MAIN_SEARCHES_RATIO < 0.440079, -0.00883573, 0.0129644))), 0.0201648) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 168.382, if (NEWS_USERS < 1.4626, -9.7013E-5, 6.89976E-4), if (NEWS_MAIN_CTR_RATIO < 0.919145, -0.0160583, if (PREV_DAY_NEWS_SEARCHES_RATIO < 42.1812, -0.00586574, 0.0127268))) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 69.7876, if (LW_CTR < 2.10467, -2.15592E-5, if (PREV_DAY_NEWS_CTR < 0.64905, if (SUGG_LW < 1.5, 0.011141, -0.00207262), 0.0231231)), -0.01887) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 28.9579, -1.03443E-5, if (NATIONALNEWS < 0.315, -0.00440798, if (ISTITLE_AVG < 0.685, if (SEARCHES < 0.599257, 0.0596869, 0.0120629), -0.0170673))) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 25.791, 1.18225E-5, if (DAY_LW_DAY_HITS_RATIO < 1.96, if (POLITICS_QC == 0.0, if (AVG_SCORE < 377554.0, -0.00736807, 0.0228459), 0.0215737), -0.0192701)) + \n" +
+ "if (BIDDED_SEARCHES < 0.00581527, if (MAX_SCORE < 304693.0, -1.37144E-4, if (WEEKAVG < 0.325, 9.10176E-4, if (NEWS_USERS < 0.737974, 0.0207108, -0.0108051))), -4.27638E-5) + \n" +
+ "if (NATIONALNEWS < 0.215, -4.53121E-5, if (LW_NEWS_CTR < 1.22057, 6.18227E-4, if (NEWS_MAIN_SEARCHES_RATIO < 4.57054, if (MIN_SCORE < 241439.0, -0.00108969, 0.0178961), 0.0489683))) + \n" +
+ "if (LOCAL_QC == 1.0, if (LW_NEWS_CTR_RATIO < 0.592627, if (LW_SEARCHES < 0.101433, -0.0152231, if (PREV_DAY_USERS < 0.0833142, 0.0217818, -0.00211607)), -3.0503E-4), 7.1333E-5) + \n" +
+ "if (PREV_DAY_CTR < 1.27104, -1.52119E-5, if (DAY_PD_HITS_RATIO < 4.25, if (LW_NEWS_CTR_RATIO < 0.659092, if (PREV_DAY_NEWS_CTR_RATIO < 0.316981, -0.00815248, 0.00978334), 3.99397E-5), 0.0164301)) + \n" +
+ "if (YSM_CTR < 0.0209264, if (PREV_DAY_NEWS_USERS < 1.61612, if (NEWS_MAIN_SEARCHES_RATIO < 2.83652, -2.92569E-4, if (USERS < 0.0435647, -0.0269406, -0.00312742)), 0.00154452), 2.03401E-4) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 29.1019, -4.51494E-5, if (NATIONALNEWS < 0.58, if (NEWS_USERS < 0.0737993, 0.00750159, -0.00562872), if (YSM_NCTR < 0.0734117, -0.00843834, 0.0454542))) + \n" +
+ "if (PREV_DAY_NEWS_SEARCHES < 127.689, if (NEWS_MAIN_SEARCHES_RATIO < 20.0978, -5.5152E-5, if (ALGO_CTR < 1.31726, if (SPORTS < 0.55, -0.00726806, 0.0277824), 0.0380699)), 0.00603891) + \n" +
+ "if (NEWS_MAIN_CTR_RATIO < 0.118028, if (MIN_SCORE < 208142.0, 3.24283E-4, if (PREV_DAY_USERS < 0.364978, if (NEWS_SEARCHES < 0.405894, -0.00227332, -0.0219515), -0.00291436)), 7.25864E-5) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 59.3594, if (DAY_PD_HITS_RATIO < 20.625, 2.84167E-5, if (PREV_DAY_CTR < 0.822381, 0.00954114, -0.00760958)), if (POLITICS_QC == 0.0, 0.00278641, 0.063001)) + \n" +
+ "if (MIN_RANK < 5.0, if (ALGO_CTR < 1.02929, 1.47687E-4, if (LW_NEWS_CTR_RATIO < 0.131516, 0.0453262, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.88827, 7.66045E-4, 0.00609536))), -2.16222E-4) + \n" +
+ "if (MIN_SCORE < 730226.0, if (MAX_SCORE < 633968.0, 1.63517E-5, if (ISTITLE_AVG < 0.27, if (SUPERDUPER_AVG < 0.11, 0.00752064, 0.0466919), 3.46353E-4)), -0.0132458) + \n" +
+ "if (HAS_NEWS_QC == 0.0, -4.23514E-5, if (ALGO_CLICKS < 0.0117185, if (SUGG_OVERLAP < 0.5, if (MIN_RANK < 3.0, 0.0232483, 0.00659311), -0.00265598), 4.52324E-4)) + \n" +
+ "if (LW_SEARCHES < 0.189865, if (LW_NEWS_SEARCHES_RATIO < 2.08206, if (NEWS_MAIN_SEARCHES_RATIO < 38.5995, -1.13521E-4, 0.04933), if (LW_NEWS_CTR < 0.873417, -0.00342462, 0.0176586)), 1.50845E-4) + \n" +
+ "if (ALGO_CTR < 0.298421, -5.53583E-4, if (LW_MAIN_CTR_RATIO < 0.511342, if (NEWS_MAIN_SEARCHES_RATIO < 8.48751, if (NUM_WORDS < 2.5, -1.33754E-5, 0.00448769), 0.00836265), -6.6781E-6)) + \n" +
+ "if (BIDDED_SEARCHES < 2.76167, if (LW_NEWS_CTR_RATIO < 0.932077, if (LW_MAIN_SEARCHES_RATIO < 5.40525, if (SUGG_OVERLAP < 16.5, 0.00258433, 2.00886E-4), -0.00529792), -8.74994E-5), -5.14235E-4) + \n" +
+ "if (NEWS_CTR < 4.12971, if (NEWS_MAIN_SEARCHES_RATIO < 8.67009, 1.65575E-5, if (ALGO_CTR < 0.691525, if (PREV_DAY_NEWS_CTR_RATIO < 0.0958437, 0.0404329, 1.86524E-4), -0.00627032)), 0.0202226) + \n" +
+ "if (DAY_PD_HITS_RATIO < 79.5, if (DAY_PD_HITS_RATIO < 6.655, -3.12406E-5, if (USERS < 0.0351556, if (DAY_WEEK_AVG_RATIO < 3.82, 0.045008, 0.00842323), 0.00104995)), -0.0188449) + \n" +
+ "if (NEWS_CTR < 4.12578, if (PREV_DAY_NEWS_USERS < 14.0861, -2.21302E-5, if (LW_USERS < 0.956063, if (NEWS_USERS < 14.236, 0.0231446, 1.33354E-4), 0.00122231)), 0.0216971) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 11.8368, -5.34645E-5, if (TOPSTORY < 0.39, 9.98011E-4, if (DUDE < 0.0709353, 0.00678128, if (DUDE < 1.97964, 0.0552834, -0.00176926)))) + \n" +
+ "if (AVG_RANK < 8.325, 1.49825E-4, if (LW_NEWS_SEARCHES_RATIO < 1.73427, -6.98057E-5, if (LW_MAIN_SEARCHES_RATIO < 0.662255, if (ALGO_CTR < 1.04359, 0.001813, 0.0309613), -0.00160574))) + \n" +
+ "if (NEWS_USERS < 1.4626, if (NEWS_SEARCHES < 1.12712, 1.58784E-5, -0.00187785), if (ALGO_CTR < 1.42277, 7.02003E-4, if (ALGO_CLICKS < 3.32631, 0.048031, -0.003279))) + \n" +
+ "if (POLITICS < 0.24, if (NEWS_MAIN_SEARCHES_RATIO < 35.8437, 5.18183E-5, if (POLITICS_QC == 0.0, -0.0123086, 0.0199861)), if (NEWS_USERS < 0.0737993, -6.26382E-4, -0.00549246)) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 71.6642, if (NEWS_MAIN_SEARCHES_RATIO < 15.8372, 6.81273E-7, if (AVG_SCORE < 363153.0, -0.00465733, if (ALGO_CTR < 0.695395, 0.0265484, -0.00829536))), 0.025954) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 11.8213, -4.37253E-6, if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.405918, -0.0124363, if (NEWS_MAIN_SEARCHES_RATIO < 1.5753, if (LW_NEWS_SEARCHES_RATIO < 1.76106, 0.00532177, -0.00463922), 0.00671844))) + \n" +
+ "if (SUGG_TW < 0.999491, -2.8076E-5, if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.216777, -0.0226515, if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.268164, 0.0268326, if (LW_NEWS_SEARCHES < 3.78249, 0.00120701, 0.0234219)))) + \n" +
+ "if (LIFESTYLE < 0.26, -7.02645E-5, if (LW_NEWS_SEARCHES_RATIO < 1.51723, 2.96639E-4, if (USERS < 0.723483, if (LW_MAIN_SEARCHES_RATIO < 1.39783, 0.016844, -0.00205856), 4.40619E-4))) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 65.0244, if (BIDDED_SEARCHES < 7.80003, if (NEWS_USERS < 1.78862, 3.10706E-5, if (NEWS_CTR < 1.16946, 0.00108557, 0.00916809)), -7.32103E-4), -0.0167254) + \n" +
+ "if (MIN_SCORE < 706927.0, if (NEWS_MAIN_SEARCHES_RATIO < 34.6252, 2.64245E-7, if (PREV_DAY_USERS < 0.0768826, -0.018824, if (MAX_SCORE < 266368.0, -0.00725209, 0.0196337))), -0.0123073) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 6.50524, 1.03884E-5, if (PREV_DAY_MAIN_CTR_RATIO < 3.78226, if (SUGG_LW < 20.5, -0.00464095, if (LW_MAIN_SEARCHES_RATIO < 1.80191, -0.00156348, 0.00548999)), 0.0381174)) + \n" +
+ "if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.759041, if (PREV_DAY_NEWS_CTR < 0.863395, 2.90623E-4, if (PREV_DAY_NEWS_CTR < 0.888089, if (ELECTRONICS_QC == 0.0, 0.0295237, 0.234098), 0.00434771)), -1.29379E-4) + \n" +
+ "if (NUM_WORDS < 4.5, -1.45879E-5, if (LW_MAIN_SEARCHES_RATIO < 0.500824, 0.0301862, if (PREV_DAY_CTR < 0.774918, -0.00862254, if (NEWS_MAIN_CTR_RATIO < 0.379925, 0.0202843, 0.00289057)))) + \n" +
+ "if (NEWS_USERS < 0.223578, if (NEWS_SEARCHES < 0.368987, -2.11848E-5, if (SUGG_OVERLAP < 27.5, if (SUGG_LW < 0.5, -1.33621E-4, -0.0115821), -9.66948E-4)), 4.18664E-4) + \n" +
+ "if (PREV_DAY_NEWS_CTR < 2.62668, if (NEWS_MAIN_SEARCHES_RATIO < 59.3594, 2.95226E-5, if (POLITICS_QC == 0.0, 0.00292095, 0.0790473)), if (NEWS_MAIN_SEARCHES_RATIO < 1.59073, -0.00436975, -0.0244164)) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 7.02911, 6.98898E-5, if (USERS < 0.407094, if (PREV_DAY_CTR < 0.374015, if (SUGG_OVERLAP < 2.5, 0.0472886, -0.00535839), -0.0102684), -3.2528E-4)) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 29.7446, 3.85782E-5, if (LW_NEWS_SEARCHES_RATIO < 1.21834, 0.0126136, if (NEWS_SEARCHES < 0.476744, -0.0168822, if (NEWS_SEARCHES < 0.555398, 0.020656, -0.00228292)))) + \n" +
+ "if (NEWS_USERS < 0.223578, -1.43802E-4, if (PREV_DAY_NEWS_USERS < 0.150071, if (BIDDED_SEARCHES < 0.867205, if (YSM_CTR < 0.297271, 0.00337525, 0.0192626), 3.30042E-4), -5.02865E-5)) + \n" +
+ "if (DAY_PD_HITS_RATIO < 24.585, -3.60812E-5, if (LW_NEWS_CTR < 0.657508, if (NEWS_SEARCHES < 0.504412, if (PREV_DAY_MAIN_CTR_RATIO < 1.04252, -0.00553346, 0.0408869), -0.0021064), 0.0343193)) + \n" +
+ "if (LW_NEWS_USERS < 53.7995, if (DAY_LW_DAY_HITS_RATIO < 32.5, -3.9933E-5, 0.0108883), if (LW_CTR < 0.355044, if (PREV_DAY_HITS < 50.0, 0.00394348, 0.0296827), 0.0024317)) + \n" +
+ "if (MAX_SCORE < 533059.0, if (NUM_WORDS < 4.5, 5.09566E-6, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.45664, if (USERS < 0.0408067, 0.0119362, -0.00850073), 0.0335548)), -0.00280973) + \n" +
+ "if (ENTERTAINMENT < 0.385, -7.00557E-5, if (LW_NEWS_CTR_RATIO < 0.0964147, 0.0237102, if (DAY_LW_DAY_HITS_RATIO < 0.32, if (PREV_DAY_NEWS_CTR_RATIO < 0.357356, 0.0317688, 0.00391061), 4.78597E-4))) + \n" +
+ "if (YSM_CTR < 0.0466685, if (LW_NEWS_SEARCHES_RATIO < 1.77997, -3.31782E-5, if (USERS < 0.0159575, -0.0241302, if (LW_SEARCHES < 0.10844, -0.00372346, -5.03317E-4))), 2.60865E-4) + \n" +
+ "if (LW_NEWS_CTR_RATIO < 0.983428, if (PREV_DAY_MAIN_CTR_RATIO < 1.22783, 3.17497E-5, if (BIDDED_SEARCHES < 0.0585926, if (ALGO_CTR < 0.685543, 0.0286017, 0.00458196), 0.00141704)), -1.42453E-4) + \n" +
+ "if (DAY_WEEK_AVG_RATIO < 14.465, if (LW_NEWS_SEARCHES_RATIO < 2.48779, 5.43157E-5, if (LW_USERS < 0.0728421, if (LW_SEARCHES < 0.0462011, -0.00122403, -0.0108271), -1.06335E-4)), 0.0132433) + \n" +
+ "if (POLITICS < 0.24, if (BIDDED_SEARCHES < 7.93667, if (NEWS_MAIN_CTR_RATIO < 0.0682826, -0.003825, 8.27348E-5), -8.14933E-4), if (NEWS_MAIN_SEARCHES_RATIO < 13.1906, -0.00193925, -0.0207467)) + \n" +
+ "if (PREV_DAY_MAIN_SEARCHES_RATIO < 31.9116, -2.99465E-5, if (SEARCHES < 0.43648, 0.044537, if (LW_MAIN_CTR_RATIO < 1.03393, 0.00821746, if (PREV_DAY_NEWS_CTR_RATIO < 1.72904, 9.38031E-5, -0.0300209)))) + \n" +
+ "if (ALGO_CLICKS < 3.95494, if (LW_NEWS_USERS < 3.59178, 9.55661E-5, if (PREV_DAY_MAIN_SEARCHES_RATIO < 1.45041, if (SUGG_OVERLAP < 19.5, 0.00838423, 2.60441E-4), 0.0134882)), -5.72155E-4) + \n" +
+ "if (LW_MAIN_CTR_RATIO < 0.44235, -8.26482E-4, if (LW_MAIN_CTR_RATIO < 0.572619, if (HAS_NEWS_QC == 0.0, 6.82192E-4, if (MIN_RANK < 3.0, 0.0197753, 0.00309895)), -2.28104E-5)) + \n" +
+ "if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.82043, -4.7561E-5, if (MAX_MIN_SCORE < 99.25, if (PREV_DAY_MAIN_CTR_RATIO < 1.23087, 0.00100234, if (LW_NEWS_CTR < 1.55817, 0.00609794, 0.0443022)), -2.12393E-4)) + \n" +
+ "if (PREV_DAY_CTR < 1.25038, -5.38648E-6, if (DAY_WEEK_AVG_RATIO < 3.56, 9.65443E-4, if (ISABSTRACT_AVG < 0.17, if (INTLNEWS < 0.185, 0.0537142, 0.00762876), -0.00561531))) + \n" +
+ "if (NEWS_USERS < 2.76691, -1.93104E-5, if (DUDE < 1.53391, if (PREV_DAY_CTR < 0.546773, if (PREV_DAY_MAIN_CTR_RATIO < 0.500807, 0.0447824, 0.00516532), 0.00151797), -8.85898E-4)) + \n" +
+ "if (LW_NEWS_CTR < 1.82543, if (NUM_WORDS < 4.5, -9.66944E-6, if (ISTITLE_AVG < 0.225, if (NEWS_MAIN_CTR_RATIO < 0.784694, 0.00125644, 0.0174436), -0.00293329)), -0.00426485) + \n" +
+ "if (PREV_DAY_MAIN_SEARCHES_RATIO < 32.8094, -6.29023E-5, if (MAX_MIN_SCORE < 29576.2, if (LW_MAIN_SEARCHES_RATIO < 10.4119, 0.0302981, -0.00444216), if (ALGO_CLICKS < 0.867944, 0.0333789, 0.00409533))) + \n" +
+ "if (LW_CTR < 1.02078, -3.94933E-5, if (LW_NEWS_CTR_RATIO < 0.144121, if (NEWS_CTR < 0.1349, -6.3538E-4, 0.0621699), if (NEWS_USERS < 0.158915, -3.31917E-5, 0.00191984))) + \n" +
+ "if (LW_USERS < 0.154237, if (SUGG_OVERLAP < 0.5, if (PREV_DAY_NEWS_CTR < 1.792, if (PREV_DAY_NEWS_CTR_RATIO < 0.922571, 0.00370056, -1.34817E-5), -0.0231586), -7.99336E-4), 1.05768E-4) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 38.9947, if (LW_MAIN_SEARCHES_RATIO < 0.855833, 4.19968E-4, -1.20742E-4), if (POLITICS_QC == 0.0, if (MAX_MIN_SCORE < 18453.8, 0.0234502, -0.00779192), 0.0350307)) + \n" +
+ "if (INTLNEWS < 0.575, 4.64842E-5, if (NEWS_CTR < 0.642032, -1.79661E-4, if (LW_USERS < 0.254706, if (PREV_DAY_MAIN_SEARCHES_RATIO < 13.2143, -0.0124018, 0.0123916), -0.00150583))) + \n" +
+ "if (LOCAL_QC == 1.0, if (NEWS_USERS < 0.0737993, 2.98545E-4, if (ALGO_CTR < 1.47845, if (ISTITLE_AVG < 0.515, -0.00529041, -0.00103532), 0.018429)), 6.65685E-5) + \n" +
+ "if (NEWS_USERS < 0.223578, if (NEWS_SEARCHES < 0.258474, -3.069E-6, if (SEARCHES < 0.0136022, 0.0176599, if (MIN_RANK < 1.0, -0.0134598, -0.00250337))), 3.60727E-4) + \n" +
+ "if (DAY_WEEK_AVG_RATIO < 11.945, if (LW_NEWS_USERS < 48.4961, -4.7398E-5, if (ALGO_CLICKS < 1.87723, 0.0178789, 0.00240131)), if (LW_NEWS_CTR_RATIO < 1.85865, -0.0164323, 0.00680461)) + \n" +
+ "if (PREV_DAY_NEWS_CTR_RATIO < 0.216219, if (PREV_DAY_USERS < 0.0840105, if (LW_NEWS_SEARCHES_RATIO < 1.96746, 0.00527106, -0.0232653), if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.533542, 0.0105354, -0.00147978)), 1.96032E-5) + \n" +
+ "if (LW_CTR < 2.10467, -2.34181E-5, if (ENTERTAINMENT < 0.055, if (LW_MAIN_CTR_RATIO < 0.545111, -0.00186289, if (NEWS_MAIN_SEARCHES_RATIO < 1.20214, -4.63276E-4, 0.0235053)), 0.0256623)) + \n" +
+ "if (NATIONALNEWS < 0.215, -9.95943E-5, if (LW_NEWS_CTR < 1.26504, 5.92433E-4, if (NEWS_MAIN_SEARCHES_RATIO < 4.23613, if (YSM_CTR < 0.00285117, 0.0324961, 0.00265865), 0.0435376))) + \n" +
+ "if (YSM_CTR < 0.0395997, if (LW_NEWS_SEARCHES_RATIO < 1.73969, if (LW_MAIN_SEARCHES_RATIO < 20.0256, -7.93453E-5, if (ALGO_CTR < 1.07219, 0.00480972, 0.0310903)), -9.97104E-4), 2.33769E-4) + \n" +
+ "if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.761868, if (PREV_DAY_NEWS_CTR_RATIO < 0.980053, if (AVG_SCORE < 400606.0, 0.00185437, if (LW_SEARCHES < 0.918927, 0.0198896, -0.0028412)), 6.98434E-6), -6.12361E-5) + \n" +
+ "if (NEWS_CTR < 1.31473, -1.47001E-5, if (LW_MAIN_SEARCHES_RATIO < 0.392613, if (PREV_DAY_NEWS_CTR < 0.855967, 0.00656145, 0.0531645), if (LW_MAIN_CTR_RATIO < 0.6064, 0.0105008, 2.09583E-4))) + \n" +
+ "if (ALGO_CTR < 3.3716, if (NEWS_CTR < 2.18598, 1.88911E-5, if (LW_NEWS_SEARCHES_RATIO < 2.3084, 8.27546E-4, -0.0121609)), if (NEWS_MAIN_SEARCHES_RATIO < 5.23803, -0.00440315, 0.0413186)) + \n" +
+ "if (DAY_PD_HITS_RATIO < 79.5, if (NEWS_MAIN_SEARCHES_RATIO < 72.49, if (PREV_DAY_NEWS_SEARCHES_RATIO < 11.2643, 3.08728E-5, if (DAY_HITS < 40.5, -0.00439766, 0.0200117)), -0.0203816), -0.0250875) + \n" +
+ "if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.422088, if (LW_MAIN_SEARCHES_RATIO < 15.8595, if (PREV_DAY_NEWS_CTR < 0.567401, 5.47179E-4, 0.0106651), if (BIDDED_SEARCHES < 0.77063, -0.0254713, 0.00761775)), -4.33956E-5) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 8.67546, -1.7051E-6, if (SEARCHES < 0.0118739, 0.0135766, if (ALGO_CTR < 0.697676, -2.48861E-4, if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.480888, -0.0243516, -0.00457767)))) + \n" +
+ "if (LOCAL_QC == 1.0, if (NEWS_CTR < 0.346638, if (LW_USERS < 0.160728, if (POLITICS_QC == 0.0, -0.0103889, 0.0692409), -0.00150749), 1.47672E-4), 7.59478E-5) + \n" +
+ "if (BIDDED_SEARCHES < 7.79863, if (NEWS_USERS < 0.212247, -7.88991E-5, if (AVG_SCORE < 176423.0, -2.66651E-4, if (LW_MAIN_SEARCHES_RATIO < 0.662064, 0.00587605, 7.70017E-4))), -6.54564E-4) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 27.2801, 3.4831E-5, if (BIDDED_SEARCHES < 0.0164564, if (ENTERTAINMENT_QC == 0.0, -0.0319078, 0.0305272), if (ENTERTAINMENT_QC == 1.0, -0.0309537, 4.40641E-4))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 9.60088, -2.84775E-6, if (MAX_MIN_RANK < 5.0, if (ALGO_CTR < 1.25608, -0.00440719, 0.00573622), if (NEWS_CTR < 1.49071, 0.00109495, 0.0298901))) + \n" +
+ "if (DAY_PD_HITS_RATIO < 80.5, if (LW_NEWS_SEARCHES_RATIO < 45.9165, -1.89532E-5, if (DAY_WEEK_AVG_RATIO < 0.595, 0.0244208, if (DAY_WEEK_AVG_RATIO < 1.15, -0.00810109, 0.00531513))), -0.016232) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 65.0244, if (NEWS_CTR < 2.21055, 1.4463E-5, if (PREV_DAY_MAIN_CTR_RATIO < 1.39998, if (USERS < 0.0902444, -0.0288837, -0.00520806), 0.0146236)), -0.0186142) + \n" +
+ "if (DAY_LW_DAY_HITS_RATIO < 33.75, if (AVG_SCORE < 297290.0, 7.35472E-5, -5.61732E-4), if (MAX_SCORE < 200514.0, -0.0113399, if (ALGO_CLICKS < 0.990362, 0.0451975, 0.0062547))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.77794, 7.00143E-5, if (PREV_DAY_NEWS_USERS < 0.0737993, if (MAX_MIN_SCORE < 0.75, 0.00257668, -8.8479E-4), if (SEARCHES < 0.156212, -0.0110319, -9.0476E-4))) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 0.146655, if (NEWS_MAIN_SEARCHES_RATIO < 3.25715, -2.57748E-5, if (USERS < 0.0468741, 4.50433E-4, if (YSM_NCTR < 0.0110028, 0.00633345, 0.0451608))), 4.31141E-5) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 29.7446, 8.17879E-6, if (LW_MAIN_SEARCHES_RATIO < 41.136, -0.0118399, if (NEWS_MAIN_SEARCHES_RATIO < 9.18546, if (ALGO_CLICKS < 0.578097, 0.0106049, -0.0015147), -0.0281154))) + \n" +
+ "if (NEWS_MAIN_SEARCHES_RATIO < 59.3594, if (LW_MAIN_SEARCHES_RATIO < 16.1013, -3.93323E-5, if (PREV_DAY_MAIN_CTR_RATIO < 1.64641, 5.96626E-4, 0.0120328)), if (POLITICS_QC == 0.0, 0.00481287, 0.0549552)) + \n" +
+ "if (DAY_PD_HITS_RATIO < 38.5, 1.00083E-5, if (ISTITLE_AVG < 0.25, if (POLITICS_QC == 0.0, if (HAS_NEWS_QC == 0.0, 0.0110884, 0.0787268), 0.165545), -4.90381E-5)) + \n" +
+ "if (DAY_PD_HITS_RATIO < 52.405, if (LW_MAIN_SEARCHES_RATIO < 21.0036, -2.31432E-6, if (LW_NEWS_SEARCHES_RATIO < 1.10007, 0.00655827, -0.00322249)), if (DUDE < 0.0213316, 0.00165459, 0.0267117)) + \n" +
+ "if (NEWS_CTR < 2.36159, 3.01421E-5, if (PUB_TODAY_AVG < 0.535, if (BIDDED_SEARCHES < 0.0756912, -0.0325427, if (SUGG_LW < 3.5, 0.0120055, -0.00841523)), 0.013018)) + \n" +
+ "if (PREV_DAY_MAIN_CTR_RATIO < 14.1009, if (PREV_DAY_MAIN_SEARCHES_RATIO < 293.154, if (PREV_DAY_MAIN_SEARCHES_RATIO < 106.761, 2.29918E-6, if (PREV_DAY_NEWS_SEARCHES_RATIO < 13.676, -0.0258432, 3.98425E-5)), 0.0146062), 0.0145195) + \n" +
+ "if (ISABSTRACT_AVG < 0.435, if (NEWS_CTR < 0.778514, 6.28755E-5, if (PREV_DAY_MAIN_CTR_RATIO < 0.325327, 0.0171532, 0.00156784)), if (NUM_WORDS < 2.5, -8.6343E-5, -0.00307349)) + \n" +
+ "if (PREV_DAY_NEWS_CTR_RATIO < 0.922298, if (SEARCHES < 0.0120163, if (DAY_PD_HITS_RATIO < 1.25, 0.0163074, -0.0180352), 2.42644E-4), if (LW_NEWS_SEARCHES_RATIO < 1.74561, 9.23406E-7, -0.00112041)) + \n" +
+ "if (PREV_DAY_CTR < 1.06609, -5.66011E-5, if (NEWS_SEARCHES < 1.12712, if (NEWS_USERS < 0.158915, 2.4137E-4, if (SUGG_TW < 0.0829887, 0.00752324, 7.62554E-4)), -0.00259993)) + \n" +
+ "if (NATIONALNEWS < 0.105, -4.51166E-5, if (AVG_SCORE < 359807.0, 3.6945E-4, if (ISTITLE_AVG < 0.885, if (MIN_SCORE < 346564.0, 0.041974, 0.0097136), -9.19818E-4))) + \n" +
+ "if (DAY_LW_DAY_HITS_RATIO < 57.5, if (NEWS_MAIN_SEARCHES_RATIO < 7.52403, -8.69476E-5, if (NEWS_CTR < 1.28406, 4.93978E-4, if (LW_MAIN_CTR_RATIO < 1.5554, 0.00922772, 0.0449952))), 0.0178316) + \n" +
+ "if (NEWS_CTR < 2.81183, -2.30122E-5, if (PEOPLE_QC == 0.0, if (DAY_WEEK_AVG_RATIO < 1.715, if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.906796, 0.0145094, -0.00780837), 0.0292031), 0.166154)) + \n" +
+ "if (LW_MAIN_SEARCHES_RATIO < 19.6323, -9.07867E-6, if (LW_MAIN_CTR_RATIO < 1.34263, if (MIN_SCORE < 229623.0, -0.00765222, 0.00226572), if (LW_NEWS_SEARCHES_RATIO < 1.08241, 0.0130526, -0.0101225))) + \n" +
+ "if (DAY_PD_HITS_RATIO < 4.205, -4.64359E-5, if (LW_NEWS_CTR_RATIO < 0.13101, 0.0518657, if (LW_MAIN_SEARCHES_RATIO < 1.77864, -0.00106588, if (DAY_PD_HITS_RATIO < 4.55, 0.0407926, 0.00454191))))";
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationTestCase.java
new file mode 100644
index 00000000000..19948cad9f2
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationTestCase.java
@@ -0,0 +1,399 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.javacc.UnicodeUtilities;
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.tensor.MapTensor;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+import com.yahoo.searchlib.rankingexpression.rule.*;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Tests expression evaluation
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class EvaluationTestCase extends junit.framework.TestCase {
+
+ private Context defaultContext;
+
+ @Override
+ protected void setUp() {
+ Map<String, Value> bindings = new HashMap<String, Value>();
+ bindings.put("zero", DoubleValue.frozen(0d));
+ bindings.put("one", DoubleValue.frozen(1d));
+ bindings.put("one_half", DoubleValue.frozen(0.5d));
+ bindings.put("a_quarter", DoubleValue.frozen(0.25d));
+ bindings.put("foo", StringValue.frozen("foo"));
+ defaultContext = new MapContext(bindings);
+ }
+
+ public void testEvaluation() {
+ assertEvaluates(0.5, "0.5");
+ assertEvaluates(-0.5, "-0.5");
+ assertEvaluates(0.5, "one_half");
+ assertEvaluates(-0.5, "-one_half");
+ assertEvaluates(0, "nonexisting");
+ assertEvaluates(0.75, "0.5 + 0.25");
+ assertEvaluates(0.75, "one_half + a_quarter");
+ assertEvaluates(1.25, "0.5 - 0.25 + one");
+
+ // String
+ assertEvaluates(1, "if(\"a\"==\"a\",1,0)");
+
+ // Precedence
+ assertEvaluates(26, "2*3+4*5");
+ assertEvaluates(1, "2/6+4/6");
+ assertEvaluates(2 * 3 * 4 + 3 * 4 * 5 - 4 * 200 / 10, "2*3*4+3*4*5-4*200/10");
+
+ // Conditionals
+ assertEvaluates(2 * (3 * 4 + 3) * (4 * 5 - 4 * 200) / 10, "2*(3*4+3)*(4*5-4*200)/10");
+ assertEvaluates(0.5, "if( 2<3, one_half, one_quarter)");
+ assertEvaluates(0.25,"if( 2>3, one_half, a_quarter)");
+ assertEvaluates(0.5, "if( 1==1, one_half, a_quarter)");
+ assertEvaluates(0.5, "if( 1<=1, one_half, a_quarter)");
+ assertEvaluates(0.5, "if( 1<=1.1, one_half, a_quarter)");
+ assertEvaluates(0.25,"if( 1>=1.1, one_half, a_quarter)");
+ assertEvaluates(0.5, "if( 0.33333333333333333333~=1/3, one_half, a_quarter)");
+ assertEvaluates(0.25,"if( 0.33333333333333333333~=1/35, one_half, a_quarter)");
+ assertEvaluates(5.5, "if(one_half in [one_quarter,one_half], one_half+5,log(one_quarter) * one_quarter)");
+ assertEvaluates(0.5, "if( 1 in [1,2 , 3], one_half, a_quarter)");
+ assertEvaluates(0.25,"if( 1 in [ 2,3,4], one_half, a_quarter)");
+ assertEvaluates(0.5, "if( \"foo\" in [\"foo\",\"bar\"], one_half, a_quarter)");
+ assertEvaluates(0.5, "if( foo in [\"foo\",\"bar\"], one_half, a_quarter)");
+ assertEvaluates(0.5, "if( \"foo\" in [foo,\"bar\"], one_half, a_quarter)");
+ assertEvaluates(0.5, "if( foo in [foo,\"bar\"], one_half, a_quarter)");
+ assertEvaluates(0.25,"if( \"foo\" in [\"baz\",\"boz\"], one_half, a_quarter)");
+ assertEvaluates(0.5, "if( one in [0, 1, 2], one_half, a_quarter)");
+ assertEvaluates(0.25,"if( one in [2], one_half, a_quarter)");
+ assertEvaluates(2.5, "if(1.0, 2.5, 3.5)");
+ assertEvaluates(3.5, "if(0.0, 2.5, 3.5)");
+ assertEvaluates(2.5, "if(1.0-1.1, 2.5, 3.5)");
+ assertEvaluates(3.5, "if(1.0-1.0, 2.5, 3.5)");
+
+ // Conditionals with branch probabilities
+ RankingExpression e = assertEvaluates(3.5, "if(1.0-1.0, 2.5, 3.5, 0.3)");
+ assertEquals(0.3, ((IfNode) e.getRoot()).getTrueProbability());
+
+ // Conditionals as expressions
+ assertEvaluates(new BooleanValue(true), "2<3");
+ assertEvaluates(new BooleanValue(false), "2>3");
+ assertEvaluates(new BooleanValue(false), "if (3>2, 2>3, 5.0)");
+ assertEvaluates(new BooleanValue(true), "2>3<1"); // The result of 2>3 is converted to 0, which is <1
+ assertEvaluates(2.5, "if(2>3<1, 2.5, 3.5)");
+ assertEvaluates(2.5, "if(1+1>3<1+0, 2.5, 3.5)");
+
+ // Functions
+ assertEvaluates(0, "sin(0)");
+ assertEvaluates(1, "cos(0)");
+ assertEvaluates(8, "pow(4/2,min(cos(0)*3,5))");
+
+ // Combined
+ assertEvaluates(1.25, "5*if(1>=1.1, one_half, if(min(1,2)<max(1,2),if (\"foo\" in [\"foo\",\"bar\"],a_quarter,3000), 0.57345347))");
+
+ }
+
+ @Test
+ public void testTensorEvaluation() {
+ assertEvaluates("{}", "{}"); // empty
+ assertEvaluates("( {{x:-}:1} * {} )", "( {{x:-}:1} * {} )"); // empty with dimensions
+
+ // sum(tensor)
+ assertEvaluates(5.0, "sum({{}:5.0})");
+ assertEvaluates(-5.0, "sum({{}:-5.0})");
+ assertEvaluates(12.5, "sum({ {d1:l1}:5.5, {d2:l2}:7.0 })");
+ assertEvaluates(0.0, "sum({ {d1:l1}:5.0, {d2:l2}:7.0, {}:-12.0})");
+
+ // scalar functions on tensors
+ assertEvaluates("{ {}:1, {d1:l1}:2, {d1:l1,d2:l1 }:3 }",
+ "log10({ {}:10, {d1:l1}:100, {d1:l1,d2:l1}:1000 })");
+ assertEvaluates("{ {}:50, {d1:l1}:500, {d1:l1,d2:l1}:5000 }",
+ "5 * { {}:10, {d1:l1}:100, {d1:l1,d2:l1}:1000 }");
+ assertEvaluates("{ {}:13, {d1:l1}:103, {d1:l1,d2:l1}:1003 }",
+ "{ {}:10, {d1:l1}:100, {d1:l1,d2:l1}:1000 } + 3");
+ assertEvaluates("{ {}:1, {d1:l1}:10, {d1:l1,d2:l1 }:100 }",
+ "{ {}:10, {d1:l1}:100, {d1:l1,d2:l1}:1000 } / 10");
+ assertEvaluates("{ {}:-10, {d1:l1}:-100, {d1:l1,d2:l1 }:-1000 }",
+ "- { {}:10, {d1:l1}:100, {d1:l1,d2:l1}:1000 }");
+ assertEvaluates("{ {}:-10, {d1:l1}:0, {d1:l1,d2:l1 }:0 }",
+ "min({ {}:-10, {d1:l1}:0, {d1:l1,d2:l1}:10 }, 0)");
+ assertEvaluates("{ {}:0, {d1:l1}:0, {d1:l1,d2:l1 }:10 }",
+ "max({ {}:-10, {d1:l1}:0, {d1:l1,d2:l1}:10 }, 0)");
+ assertEvaluates("{ {h:1}:1.5, {h:2}:1.5 }", "0.5 + {{h:1}:1.0,{h:2}:1.0}");
+
+ // sum(tensor, dimension)
+ assertEvaluates("{ {y:1}:4.0, {y:2}:12.0 }",
+ "sum({ {x:1,y:1}:1.0, {x:2,y:1}:3.0, {x:1,y:2}:5.0, {x:2,y:2}:7.0 }, x)");
+ assertEvaluates("{ {x:1}:6.0, {x:2}:10.0 }",
+ "sum({ {x:1,y:1}:1.0, {x:2,y:1}:3.0, {x:1,y:2}:5.0, {x:2,y:2}:7.0 }, y)");
+
+ // tensor sum
+ assertEvaluates("{ }", "{} + {}");
+ assertEvaluates("{ {x:1}:3, {x:2}:5 }",
+ "{ {x:1}:3 } + { {x:2}:5 }");
+ assertEvaluates("{ {x:1}:8 }",
+ "{ {x:1}:3 } + { {x:1}:5 }");
+ assertEvaluates("{ {x:1}:3, {y:1}:5 }",
+ "{ {x:1}:3 } + { {y:1}:5 }");
+ assertEvaluates("{ {x:1}:3, {x:2}:7, {y:1}:5 }",
+ "{ {x:1}:3, {x:2}:7 } + { {y:1}:5 }");
+ assertEvaluates("{ {x:1,y:1}:1, {x:2,y:1}:3, {x:1,y:2}:5, {y:1,z:1}:7, {y:2,z:1}:11, {y:1,z:2}:13 }",
+ "{ {x:1,y:1}:1, {x:2,y:1}:3, {x:1,y:2}:5 } + { {y:1,z:1}:7, {y:2,z:1}:11, {y:1,z:2}:13 }");
+ assertEvaluates("{ {x:1}:5, {x:1,y:1}:1, {y:1,z:1}:7 }",
+ "{ {x:1}:5, {x:1,y:1}:1 } + { {y:1,z:1}:7 }");
+ assertEvaluates("{ {x:1}:5, {x:1,y:1}:1, {z:1}:11, {y:1,z:1}:7 }",
+ "{ {x:1}:5, {x:1,y:1}:1 } + { {z:1}:11, {y:1,z:1}:7 }");
+ assertEvaluates("{ {}:5, {x:1,y:1}:1, {y:1,z:1}:7 }",
+ "{ {}:5, {x:1,y:1}:1 } + { {y:1,z:1}:7 }");
+ assertEvaluates("{ {}:16, {x:1,y:1}:1, {y:1,z:1}:7 }",
+ "{ {}:5, {x:1,y:1}:1 } + { {}:11, {y:1,z:1}:7 }");
+
+ // tensor difference
+ assertEvaluates("{ }", "{} - {}");
+ assertEvaluates("{ {x:1}:3, {x:2}:-5 }",
+ "{ {x:1}:3 } - { {x:2}:5 }");
+ assertEvaluates("{ {x:1}:-2 }",
+ "{ {x:1}:3 } - { {x:1}:5 }");
+ assertEvaluates("{ {x:1}:3, {y:1}:-5 }",
+ "{ {x:1}:3 } - { {y:1}:5 }");
+ assertEvaluates("{ {x:1}:3, {x:2}:7, {y:1}:-5 }",
+ "{ {x:1}:3, {x:2}:7 } - { {y:1}:5 }");
+ assertEvaluates("{ {x:1,y:1}:1, {x:2,y:1}:3, {x:1,y:2}:5, {y:1,z:1}:-7, {y:2,z:1}:-11, {y:1,z:2}:-13 }",
+ "{ {x:1,y:1}:1, {x:2,y:1}:3, {x:1,y:2}:5 } - { {y:1,z:1}:7, {y:2,z:1}:11, {y:1,z:2}:13 }");
+ assertEvaluates("{ {x:1}:5, {x:1,y:1}:1, {y:1,z:1}:-7 }",
+ "{ {x:1}:5, {x:1,y:1}:1 } - { {y:1,z:1}:7 }");
+ assertEvaluates("{ {x:1}:5, {x:1,y:1}:1, {z:1}:-11, {y:1,z:1}:-7 }",
+ "{ {x:1}:5, {x:1,y:1}:1 } - { {z:1}:11, {y:1,z:1}:7 }");
+ assertEvaluates("{ {}:5, {x:1,y:1}:1, {y:1,z:1}:-7 }",
+ "{ {}:5, {x:1,y:1}:1 } - { {y:1,z:1}:7 }");
+ assertEvaluates("{ {}:-6, {x:1,y:1}:1, {y:1,z:1}:-7 }",
+ "{ {}:5, {x:1,y:1}:1 } - { {}:11, {y:1,z:1}:7 }");
+ assertEvaluates("{ {x:1}:0 }",
+ "{ {x:1}:3 } - { {x:1}:3 }");
+ assertEvaluates("{ {x:1}:0, {x:2}:1 }",
+ "{ {x:1}:3, {x:2}:1 } - { {x:1}:3 }");
+
+ // tensor product
+ assertEvaluates("{ }", "{} * {}");
+ assertEvaluates("( {{x:-,y:-,z:-}:1}*{} )", "( {{x:-}:1} * {} ) * ( {{y:-,z:-}:1} * {} )"); // empty dimensions are preserved
+ assertEvaluates("( {{x:-}:1} * {} )",
+ "{ {x:1}:3 } * { {x:2}:5 }");
+ assertEvaluates("{ {x:1}:15 }",
+ "{ {x:1}:3 } * { {x:1}:5 }");
+ assertEvaluates("{ {x:1,y:1}:15 }",
+ "{ {x:1}:3 } * { {y:1}:5 }");
+ assertEvaluates("{ {x:1,y:1}:15, {x:2,y:1}:35 }",
+ "{ {x:1}:3, {x:2}:7 } * { {y:1}:5 }");
+ assertEvaluates("{ {x:1,y:1,z:1}:7, {x:1,y:1,z:2}:13, {x:2,y:1,z:1}:21, {x:2,y:1,z:2}:39, {x:1,y:2,z:1}:55 }",
+ "{ {x:1,y:1}:1, {x:2,y:1}:3, {x:1,y:2}:5 } * { {y:1,z:1}:7, {y:2,z:1}:11, {y:1,z:2}:13 }");
+ assertEvaluates("{ {x:1,y:1,z:1}:7 }",
+ "{ {x:1}:5, {x:1,y:1}:1 } * { {y:1,z:1}:7 }");
+ assertEvaluates("{ {x:1,y:1,z:1}:7, {x:1,z:1}:55 }",
+ "{ {x:1}:5, {x:1,y:1}:1 } * { {z:1}:11, {y:1,z:1}:7 }");
+ assertEvaluates("{ {x:1,y:1,z:1}:7 }",
+ "{ {}:5, {x:1,y:1}:1 } * { {y:1,z:1}:7 }");
+ assertEvaluates("{ {x:1,y:1,z:1}:7, {}:55 }",
+ "{ {}:5, {x:1,y:1}:1 } * { {}:11, {y:1,z:1}:7 }");
+
+ // match product
+ assertEvaluates("{ }", "match({}, {})");
+ assertEvaluates("( {{x:-}:1} * {} )",
+ "match({ {x:1}:3 }, { {x:2}:5 })");
+ assertEvaluates("{ {x:1}:15 }",
+ "match({ {x:1}:3 }, { {x:1}:5 })");
+ assertEvaluates("( {{x:-,y:-}:1} * {} )",
+ "match({ {x:1}:3 }, { {y:1}:5 })");
+ assertEvaluates("( {{x:-,y:-}:1} * {} )",
+ "match({ {x:1}:3, {x:2}:7 }, { {y:1}:5 })");
+ assertEvaluates("( {{x:-,y:-,z:-}:1} * {} )",
+ "match({ {x:1,y:1}:1, {x:2,y:1}:3, {x:1,y:2}:5 }, { {y:1,z:1}:7, {y:2,z:1}:11, {y:1,z:2}:13 })");
+ assertEvaluates("( {{x:-,y:-,z:-}:1} * {} )",
+ "match({ {x:1}:5, {x:1,y:1}:1 }, { {y:1,z:1}:7 })");
+ assertEvaluates("( {{x:-,y:-,z:-}:1} * {} )",
+ "match({ {x:1}:5, {x:1,y:1}:1 }, { {z:1}:11, {y:1,z:1}:7 })");
+ assertEvaluates("( {{x:-,y:-,z:-}:1} * {} )",
+ "match({ {}:5, {x:1,y:1}:1 }, { {y:1,z:1}:7 })");
+ assertEvaluates("( {{x:-,y:-,z:-}:1} * { {}:55 } )",
+ "match({ {}:5, {x:1,y:1}:1 }, { {}:11, {y:1,z:1}:7 })");
+ assertEvaluates("( {{z:-}:1} * { {x:1}:15, {x:1,y:1}:7 } )",
+ "match({ {}:5, {x:1}:3, {x:2}:4, {x:1,y:1}:1, {x:1,y:2}:6 }, { {x:1}:5, {y:1,x:1}:7, {z:1,y:1,x:1}:10 })");
+
+ // min
+ assertEvaluates("{ {x:1}:3, {x:2}:5 }",
+ "min({ {x:1}:3 }, { {x:2}:5 })");
+ assertEvaluates("{ {x:1}:3 }",
+ "min({ {x:1}:3 }, { {x:1}:5 })");
+ assertEvaluates("{ {x:1}:3, {y:1}:5 }",
+ "min({ {x:1}:3 }, { {y:1}:5 })");
+ assertEvaluates("{ {x:1}:3, {x:2}:7, {y:1}:5 }",
+ "min({ {x:1}:3, {x:2}:7 }, { {y:1}:5 })");
+ assertEvaluates("{ {x:1,y:1}:1, {x:2,y:1}:3, {x:1,y:2}:5, {y:1,z:1}:7, {y:2,z:1}:11, {y:1,z:2}:13 }",
+ "min({ {x:1,y:1}:1, {x:2,y:1}:3, {x:1,y:2}:5 }, { {y:1,z:1}:7, {y:2,z:1}:11, {y:1,z:2}:13 })");
+ assertEvaluates("{ {x:1}:5, {x:1,y:1}:1, {y:1,z:1}:7 }",
+ "min({ {x:1}:5, {x:1,y:1}:1 }, { {y:1,z:1}:7 })");
+ assertEvaluates("{ {x:1}:5, {x:1,y:1}:1, {z:1}:11, {y:1,z:1}:7 }",
+ "min({ {x:1}:5, {x:1,y:1}:1 }, { {z:1}:11, {y:1,z:1}:7 })");
+ assertEvaluates("{ {}:5, {x:1,y:1}:1, {y:1,z:1}:7 }",
+ "min({ {}:5, {x:1,y:1}:1 }, { {y:1,z:1}:7 })");
+ assertEvaluates("{ {}:5, {x:1,y:1}:1, {y:1,z:1}:7 }",
+ "min({ {}:5, {x:1,y:1}:1 }, { {}:11, {y:1,z:1}:7 })");
+ assertEvaluates("{ {}:5, {x:1}:3, {x:2}:4, {x:1,y:1}:1, {x:1,y:2}:6, {z:1,y:1,x:1}:10 }",
+ "min({ {}:5, {x:1}:3, {x:2}:4, {x:1,y:1}:1, {x:1,y:2}:6 }, { {x:1}:5, {y:1,x:1}:7, {z:1,y:1,x:1}:10 })");
+
+ // max
+ assertEvaluates("{ {x:1}:3, {x:2}:5 }",
+ "max({ {x:1}:3 }, { {x:2}:5 })");
+ assertEvaluates("{ {x:1}:5 }",
+ "max({ {x:1}:3 }, { {x:1}:5 })");
+ assertEvaluates("{ {x:1}:3, {y:1}:5 }",
+ "max({ {x:1}:3 }, { {y:1}:5 })");
+ assertEvaluates("{ {x:1}:3, {x:2}:7, {y:1}:5 }",
+ "max({ {x:1}:3, {x:2}:7 }, { {y:1}:5 })");
+ assertEvaluates("{ {x:1,y:1}:1, {x:2,y:1}:3, {x:1,y:2}:5, {y:1,z:1}:7, {y:2,z:1}:11, {y:1,z:2}:13 }",
+ "max({ {x:1,y:1}:1, {x:2,y:1}:3, {x:1,y:2}:5 }, { {y:1,z:1}:7, {y:2,z:1}:11, {y:1,z:2}:13 })");
+ assertEvaluates("{ {x:1}:5, {x:1,y:1}:1, {y:1,z:1}:7 }",
+ "max({ {x:1}:5, {x:1,y:1}:1 }, { {y:1,z:1}:7 })");
+ assertEvaluates("{ {x:1}:5, {x:1,y:1}:1, {z:1}:11, {y:1,z:1}:7 }",
+ "max({ {x:1}:5, {x:1,y:1}:1 }, { {z:1}:11, {y:1,z:1}:7 })");
+ assertEvaluates("{ {}:5, {x:1,y:1}:1, {y:1,z:1}:7 }",
+ "max({ {}:5, {x:1,y:1}:1 }, { {y:1,z:1}:7 })");
+ assertEvaluates("{ {}:11, {x:1,y:1}:1, {y:1,z:1}:7 }",
+ "max({ {}:5, {x:1,y:1}:1 }, { {}:11, {y:1,z:1}:7 })");
+ assertEvaluates("{ {}:5, {x:1}:5, {x:2}:4, {x:1,y:1}:7, {x:1,y:2}:6, {z:1,y:1,x:1}:10 }",
+ "max({ {}:5, {x:1}:3, {x:2}:4, {x:1,y:1}:1, {x:1,y:2}:6 }, { {x:1}:5, {y:1,x:1}:7, {z:1,y:1,x:1}:10 })");
+
+ // Combined
+ assertEvaluates(7.5 + 45 + 1.7,
+ "sum( " + // model computation
+ " match( " + // model weight application
+ " { {x:1}:1, {x:2}:2 } * { {y:1}:3, {y:2}:4 } * { {z:1}:5 }, " + // feature combinations
+ " { {x:1,y:1,z:1}:0.5, {x:2,y:1,z:1}:1.5, {x:1,y:1,z:2}:4.5 }" + // model weights
+ "))+1.7");
+
+ // undefined is not the same as 0
+ assertEvaluates(1.0, "sum({ {x:1}:0, {x:2}:0 } * { {x:1}:1, {x:2}:1 } + 0.5)");
+ assertEvaluates(0.0, "sum({ } * { {x:1}:1, {x:2}:1 } + 0.5)");
+
+ // tensor result dimensions are given from argument dimensions, not the resulting values
+ assertEvaluates("x", "( {{x:-}:1.0} * {} )", "{ {x:1}:1 } * { {x:2}:1 }");
+ assertEvaluates("x, y", "( {{y:-}:1.0} * {{x:1}:1.0} )", "{ {x:1}:1 } * { {x:2,y:1}:1, {x:1}:1 }");
+
+ // demonstration of where this produces different results: { {x:1}:1 } with 2 dimensions ...
+ assertEvaluates("x, y", "( {{x:-,y:-}:1.0} * {} )","{ {x:1}:1 } * { {x:2,y:1}:1, {x:1}:1 } * { {x:1,y:1}:1 }");
+ // ... vs { {x:1}:1 } with only one dimension
+ assertEvaluates("x, y", "{{x:1,y:1}:1.0}", "{ {x:1}:1 } * { {x:1,y:1}:1 }");
+
+ // check that dimensions are preserved through other operations
+ String d2 = "{ {x:1}:1 } * { {x:2,y:1}:1, {x:1}:1 }"; // creates a 2d tensor with only an 1d value
+ assertEvaluates("x, y", "( {{x:-,y:-}:1.0} * {} )", "match(" + d2 + ", {})");
+ assertEvaluates("x, y", "( {{y:-}:1.0} * {{x:1}:1.0} )", d2 + " - {}");
+ assertEvaluates("x, y", "( {{y:-}:1.0} * {{x:1}:1.0} )", d2 + " + {}");
+ assertEvaluates("x, y", "( {{y:-}:1.0} * {{x:1}:1.0} )", "min(1.5, " + d2 +")");
+ assertEvaluates("x, y", "( {{y:-}:1.0} * {{x:1}:1.0} )", "max({{x:1}:0}, " + d2 +")");
+ }
+
+ public void testProgrammaticBuildingAndPrecedence() {
+ RankingExpression standardPrecedence = new RankingExpression(new ArithmeticNode(constant(2), ArithmeticOperator.PLUS, new ArithmeticNode(constant(3), ArithmeticOperator.MULTIPLY, constant(4))));
+ RankingExpression oppositePrecedence = new RankingExpression(new ArithmeticNode(new ArithmeticNode(constant(2), ArithmeticOperator.PLUS, constant(3)), ArithmeticOperator.MULTIPLY, constant(4)));
+ assertEquals(14.0, standardPrecedence.evaluate(null).asDouble());
+ assertEquals(20.0, oppositePrecedence.evaluate(null).asDouble());
+ assertEquals("2.0 + 3.0 * 4.0", standardPrecedence.toString());
+ assertEquals("(2.0 + 3.0) * 4.0", oppositePrecedence.toString());
+ }
+
+ private ConstantNode constant(double value) {
+ return new ConstantNode(new DoubleValue(value));
+ }
+
+ public void testStructuredVariableEvaluation() {
+ Context context = new StructuredTestContext();
+ //assertEvaluates(77,"average(6,8)+average(6,8).timesten",context);
+ assertEvaluates(77, "average(\"2*3\",\"pow(2,3)\")+average(\"2*3\",\"pow(2,3)\").timesten", context);
+ }
+
+ private RankingExpression assertEvaluates(String tensorValue, String expressionString) {
+ return assertEvaluates(new TensorValue(MapTensor.from(tensorValue)), expressionString, defaultContext);
+ }
+
+ /** Validate also that the dimension of the resulting tensors are as expected */
+ private RankingExpression assertEvaluates(String tensorDimensions, String resultTensor, String expressionString) {
+ RankingExpression expression = assertEvaluates(new TensorValue(MapTensor.from(resultTensor)), expressionString, defaultContext);
+ TensorValue value = (TensorValue)expression.evaluate(defaultContext);
+ assertEquals(toSet(tensorDimensions), value.asTensor().dimensions());
+ assertEquals("String values are equals", resultTensor, expression.evaluate(defaultContext).toString());
+ return expression;
+ }
+
+ private RankingExpression assertEvaluates(Value value, String expressionString) {
+ return assertEvaluates(value, expressionString, defaultContext);
+ }
+
+ private RankingExpression assertEvaluates(double value, String expressionString) {
+ return assertEvaluates(value, expressionString, defaultContext);
+ }
+
+ private RankingExpression assertEvaluates(double value, String expressionString, Context context) {
+ return assertEvaluates(new DoubleValue(value), expressionString, context);
+ }
+
+ private RankingExpression assertEvaluates(Value value, String expressionString, Context context) {
+ try {
+ RankingExpression expression = new RankingExpression(expressionString);
+ assertEquals(expression.toString(), value, expression.evaluate(context));
+ return expression;
+ }
+ catch (ParseException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /** Turns a comma-separated string into a set of string values */
+ private Set<String> toSet(String values) {
+ Set<String> set = new HashSet<>();
+ for (String value : values.split(","))
+ set.add(value.trim());
+ return set;
+ }
+
+ private static class StructuredTestContext extends MapContext {
+
+ @Override
+ public Value get(String name, Arguments arguments, String output) {
+ if (!name.equals("average")) {
+ throw new IllegalArgumentException("Unknown operation '" + name + "'");
+ }
+ if (arguments.expressions().size() != 2) {
+ throw new IllegalArgumentException("'average' takes 2 arguments");
+ }
+ if (output != null && !output.equals("timesten")) {
+ throw new IllegalArgumentException("Unknown 'average' output '" + output + "'");
+ }
+
+ Value result = evaluateStringAsExpression(0, arguments).add(evaluateStringAsExpression(1, arguments)).divide(new DoubleValue(2));
+ if ("timesten".equals(output)) {
+ result = result.multiply(new DoubleValue(10));
+ }
+ return result;
+ }
+
+ private Value evaluateStringAsExpression(int index, Arguments arguments) {
+ try {
+ ExpressionNode e = arguments.expressions().get(index);
+ if (e instanceof ConstantNode) {
+ return new DoubleValue(new RankingExpression(UnicodeUtilities.unquote(((ConstantNode)e).sourceString())).evaluate(this).asDouble());
+ }
+ return e.evaluate(this);
+ }
+ catch (ParseException e) {
+ throw new RuntimeException("Could not evaluate argument '" + index + "'", e);
+ }
+ }
+
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/NeuralNetEvaluationTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/NeuralNetEvaluationTestCase.java
new file mode 100644
index 00000000000..95c4402a612
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/NeuralNetEvaluationTestCase.java
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.tensor.MapTensor;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Tests evaluating neural nets expressed as tensors
+ *
+ * @author bratseth
+ */
+public class NeuralNetEvaluationTestCase {
+
+ /** "XOR" neural network, separate expression per layer */
+ @Test
+ public void testPerLayerExpression() {
+ String input = "{ {x:1}:0, {x:2}:1 }";
+
+ String firstLayerWeights = "{ {x:1,h:1}:1, {x:1,h:2}:1, {x:2,h:1}:1, {x:2,h:2}:1 }";
+ String firstLayerBias = "{ {h:1}:-0.5, {h:2}:-1.5 }";
+ String firstLayerInput = "sum(" + input + "*" + firstLayerWeights + ", x) + " + firstLayerBias;
+ String firstLayerOutput = "min(1.0, max(0.0, 0.5 + " + firstLayerInput + "))"; // non-linearity, "poor man's sigmoid"
+ assertEvaluates("{ {h:1}:1.0, {h:2}:0.0} }", firstLayerOutput);
+ String secondLayerWeights = "{ {h:1,y:1}:1, {h:2,y:1}:-1 }";
+ String secondLayerBias = "{ {y:1}:-0.5 }";
+ String secondLayerInput = "sum(" + firstLayerOutput + "*" + secondLayerWeights + ", h) + " + secondLayerBias;
+ String secondLayerOutput = "min(1.0, max(0.0, 0.5 + " + secondLayerInput + "))"; // non-linearity, "poor man's sigmoid"
+ assertEvaluates("{ {y:1}:1 }", secondLayerOutput);
+ }
+
+ private RankingExpression assertEvaluates(String tensorValue, String expressionString) {
+ return assertEvaluates(new TensorValue(MapTensor.from(tensorValue)), expressionString, new MapContext());
+ }
+
+ private RankingExpression assertEvaluates(Value value, String expressionString, Context context) {
+ try {
+ RankingExpression expression = new RankingExpression(expressionString);
+ assertEquals(expression.toString(), value, expression.evaluate(context));
+ return expression;
+ }
+ catch (ParseException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/StreamEvaluationBenchmark.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/StreamEvaluationBenchmark.java
new file mode 100644
index 00000000000..837c7401813
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/StreamEvaluationBenchmark.java
@@ -0,0 +1,160 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation;
+
+import com.yahoo.io.IOUtils;
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization.GBDTForestOptimizer;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Two small benchmarks of ranking expression evaluation
+ *
+ * @author bratseth
+ */
+public class StreamEvaluationBenchmark {
+
+ public void run() {
+ try {
+ List<Map<String, Double>> features = readFeatures("/Users/bratseth/development/data/stream/gbdtFeatures");
+ String streamExpression = readFile("/Users/bratseth/development/data/stream/stream.expression");
+ run(streamExpression, features, 10);
+ }
+ catch (ParseException e) {
+ throw new RuntimeException("Benchmarking failed", e);
+ }
+ }
+
+ private String readFile(String file) {
+ try {
+ return IOUtils.readFile(new File(file));
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /** Read an ad-hoc file format with some similarly ad hoc code */
+ private List<Map<String, Double>> readFeatures(String fileName) {
+ try (BufferedReader reader = IOUtils.createReader(fileName)) {
+ List<Map<String, Double>> featureItems = new ArrayList<>();
+ String line;
+ Map<String, Double> featureItem = null;
+ while (null != (line = reader.readLine())) {
+ if (line.trim().equals("Printing Feature Set")) { // new feature item
+ featureItem = new HashMap<>();
+ featureItems.add(featureItem);
+ }
+ else { // a feature
+ line = line.replace("Feature key is ", "");
+ line = line.replace(" Feature Value is ", "=");
+ // now we have featurekey=featurevalue
+ String[] keyValue = line.split("=");
+ if (keyValue.length != 2)
+ System.err.println("Skipping invalid feature line '" + line + "'");
+ else
+ featureItem.put(keyValue[0], Double.parseDouble(keyValue[1]));
+ }
+ }
+ System.out.println("Read " + featureItems.size() + " feature items");
+ return featureItems;
+ }
+ catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void run(String expressionString, List<Map<String, Double>> features, int iterations) throws ParseException {
+ // Optimize
+ RankingExpression expression = new RankingExpression(expressionString);
+ DoubleOnlyArrayContext contextPrototype = new DoubleOnlyArrayContext(expression, true);
+ OptimizationReport forestOptimizationReport = new ExpressionOptimizer().optimize(expression, contextPrototype);
+ System.out.println(forestOptimizationReport);
+ System.out.println("Optimized expression: " + expression.getRoot());
+
+ // Warm up
+ out("Warming up ...");
+ double total = 0;
+ total += benchmarkIterations(expression , contextPrototype, features, Math.max(iterations/5, 1));
+ oul("done");
+
+ // Benchmark
+ out("Running " + iterations + " of 'stream' ...");
+ long tStartTime = System.currentTimeMillis();
+ total += benchmarkIterations(expression, contextPrototype, features, iterations);
+ long totalTime = System.currentTimeMillis() - tStartTime;
+ oul("done");
+ oul(" Total time running 'stream': " + totalTime +
+ " ms (" + totalTime*1000/(iterations*features.size()) + " microseconds/expression)");
+ }
+
+ private double benchmarkIterations(RankingExpression gbdt, Context contextPrototype,
+ List<Map<String, Double>> features, int iterations) {
+ // Simulate realistic use: The array context can be reused for a series of evaluations in a thread
+ // but each evaluation binds a new set of values.
+ double total=0;
+ Context context = copyForEvaluation(contextPrototype);
+ long totalNanoTime = 0;
+ for (int i=0; i<iterations; i++) {
+ for (Map<String, Double> featureItem : features) {
+ long startTime = System.nanoTime();
+ bindStreamingFeatures(featureItem, context);
+ total += gbdt.evaluate(context).asDouble();
+ totalNanoTime += System.nanoTime() - startTime;
+ blowCaches();
+ }
+ }
+ System.out.println("Total time fine-grain measured: " + totalNanoTime/(1000 * iterations * features.size()));
+ return total;
+ }
+
+ private double blowCaches() {
+ List<Integer> list = new ArrayList<>();
+ for (int i = 0; i < 1000 * 1000; i++) {
+ list.add(i);
+ }
+ double total = 0;
+ for (Integer i : list)
+ total += i;
+ return total;
+ }
+
+ private Context copyForEvaluation(Context contextPrototype) {
+ if (contextPrototype instanceof AbstractArrayContext) // optimized - contains name to index map
+ return ((AbstractArrayContext)contextPrototype).clone();
+ else if (contextPrototype instanceof MapContext) // Unoptimized - nothing to keep
+ return new MapContext();
+ else
+ throw new RuntimeException("Unknown context type " + contextPrototype.getClass());
+ }
+
+ private void out(String s) {
+ System.out.print(s);
+ }
+
+ private void oul(String s) {
+ System.out.println(s);
+ }
+
+ public static void main(String[] args) {
+ new StreamEvaluationBenchmark().run();
+ }
+
+ private void assertEqualish(double a,double b) {
+ if (Math.abs(a-b) >= Math.abs((a+b)/100000000) )
+ throw new RuntimeException("Expected value " + a + " but optimized evaluation produced " + b);
+ }
+
+ private void bindStreamingFeatures(Map<String, Double> featureItem, Context context) {
+ for (Map.Entry<String, Double> feature : featureItem.entrySet())
+ context.put(feature.getKey(), feature.getValue());
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/ContextReuseTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/ContextReuseTestCase.java
new file mode 100644
index 00000000000..998f25b943a
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/ContextReuseTestCase.java
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization;
+
+import com.yahoo.io.IOUtils;
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.ArrayContext;
+import com.yahoo.searchlib.rankingexpression.evaluation.ExpressionOptimizer;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * This tests reuse of a optimized context which is not initialized with
+ * all values referenced in the expression.
+ *
+ * @author bratseth
+ */
+public class ContextReuseTestCase extends junit.framework.TestCase {
+
+ private String contextString=
+ "CONCEPTTYPE = 0.0\n" +
+ "REGEXTYPE = 0.0\n" +
+ "POS_18 = 0.0\n" +
+ "POS_19 = 0.0\n" +
+ "ORDER_IN_CLUSTER = 2.0\n" +
+ "GOOD_SYNTAX = 1.0\n" +
+ "POS_20 = 0.0\n" +
+ "POS_11 = 0.0\n" +
+ "POS_10 = 0.0\n" +
+ "CHUNKTYPE = 0.0\n" +
+ "POS_13 = 0.0\n" +
+ "STOP_WORD_1 = 0.0\n" +
+ "TERM_CASE_2 = 0.0\n" +
+ "TERM_CASE_3 = 0.0\n" +
+ "STOP_WORD_3 = 0.0\n" +
+ "POS_15 = 0.0\n" +
+ "TERM_CASE_1 = 0.0\n" +
+ "STOP_WORD_2 = 0.0\n" +
+ "POS_1 = 0.0\n" +
+ "TERM_CASE_4 = 1.0\n" +
+ "LENGTH = 6.0\n" +
+ "EXTENDEDTYPE = 0.0\n" +
+ "ENTITYPLACETYPE = 0.0\n";
+
+ public void testIt() throws ParseException, IOException {
+ // Prepare
+ RankingExpression expression=new RankingExpression(IOUtils.readFile(new File("src/test/files/s-expression.vre")));
+ ArrayContext contextPrototype=new ArrayContext(expression);
+ new ExpressionOptimizer().optimize(expression,contextPrototype);
+
+ // Execute
+ ArrayContext context=contextPrototype.clone();
+ for (String contextValueString : contextString.split("\n")) {
+ String[] contextValueParts = contextValueString.split("=");
+ context.put(contextValueParts[0].trim(), Double.valueOf(contextValueParts[1].trim()));
+ }
+ assertEquals(-2.3450294999999994, expression.evaluate(context).asDouble());
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizerTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizerTestCase.java
new file mode 100644
index 00000000000..7058e909ef1
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizerTestCase.java
@@ -0,0 +1,109 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.*;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+
+/**
+ * @author bratseth
+ */
+public class GBDTForestOptimizerTestCase extends junit.framework.TestCase {
+
+ public void testForestOptimization() throws ParseException {
+ String gbdtString =
+ "if (LW_NEWS_SEARCHES_RATIO < 1.72971, 0.0697159, if (LW_USERS < 0.10496, if (SEARCHES < 0.0329127, 0.151257, 0.117501), if (SUGG_OVERLAP < 18.5, 0.0897622, 0.0756903))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.73156, if (NEWS_USERS < 0.0737993, -0.00481646, 0.00110018), if (LW_USERS < 0.0844616, 0.0488919, if (SUGG_OVERLAP < 32.5, 0.0136917, 9.85328E-4))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.74451, -0.00298257, if (LW_USERS < 0.116207, if (SEARCHES < 0.0329127, 0.0676105, 0.0340198), if (NUM_WORDS < 1.5, -8.55514E-5, 0.0112406))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.72995, if (NEWS_USERS < 0.0737993, -0.00407515, 0.00139088), if (LW_USERS == 0.0509035, 0.0439466, if (LW_USERS < 0.325818, 0.0187156, 0.00236949)))";
+ RankingExpression gbdt = new RankingExpression(gbdtString);
+
+ // Regular evaluation
+ MapContext arguments = new MapContext();
+ arguments.put("LW_NEWS_SEARCHES_RATIO", 1d);
+ arguments.put("SUGG_OVERLAP", 17d);
+ double result1 = gbdt.evaluate(arguments).asDouble();
+ arguments.put("LW_NEWS_SEARCHES_RATIO", 2d);
+ arguments.put("SUGG_OVERLAP", 20d);
+ double result2 = gbdt.evaluate(arguments).asDouble();
+ arguments.put("LW_NEWS_SEARCHES_RATIO", 2d);
+ arguments.put("SUGG_OVERLAP", 40d);
+ double result3 = gbdt.evaluate(arguments).asDouble();
+
+ // Optimized evaluation
+ ArrayContext fArguments = new ArrayContext(gbdt);
+ ExpressionOptimizer optimizer = new ExpressionOptimizer();
+ OptimizationReport report = optimizer.optimize(gbdt, fArguments);
+ assertEquals(4, report.getMetric("Optimized GDBT trees"));
+ assertEquals(4, report.getMetric("GBDT trees optimized to forests"));
+ assertEquals(1, report.getMetric("Number of forests"));
+ fArguments.put("LW_NEWS_SEARCHES_RATIO", 1d);
+ fArguments.put("SUGG_OVERLAP", 17d);
+ double oResult1 = gbdt.evaluate(fArguments).asDouble();
+ fArguments.put("LW_NEWS_SEARCHES_RATIO", 2d);
+ fArguments.put("SUGG_OVERLAP", 20d);
+ double oResult2 = gbdt.evaluate(fArguments).asDouble();
+ fArguments.put("LW_NEWS_SEARCHES_RATIO", 2d);
+ fArguments.put("SUGG_OVERLAP", 40d);
+ double oResult3 = gbdt.evaluate(fArguments).asDouble();
+
+ // Assert the same results are produced
+ // (adding linearly to one double does not produce exactly the same double
+ // as adding up a tree of stack frames though)
+ assertEqualish(result1, oResult1);
+ assertEqualish(result2, oResult2);
+ assertEqualish(result3, oResult3);
+ }
+
+ public void testForestOptimizationWithSetMembershipConditions() throws ParseException {
+ String gbdtString =
+ "if (MYSTRING in [\"string 1\",\"string 2\"], 0.0697159, if (LW_USERS < 0.10496, if (SEARCHES < 0.0329127, 0.151257, 0.117501), if (MYSTRING in [\"string 2\"], 0.0897622, 0.0756903))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.73156, if (NEWS_USERS < 0.0737993, -0.00481646, 0.00110018), if (LW_USERS < 0.0844616, 0.0488919, if (SUGG_OVERLAP < 32.5, 0.0136917, 9.85328E-4))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.74451, -0.00298257, if (LW_USERS < 0.116207, if (SEARCHES < 0.0329127, 0.0676105, 0.0340198), if (NUM_WORDS < 1.5, -8.55514E-5, 0.0112406))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.72995, if (NEWS_USERS < 0.0737993, -0.00407515, 0.00139088), if (LW_USERS == 0.0509035, 0.0439466, if (LW_USERS < 0.325818, 0.0187156, 0.00236949)))";
+ RankingExpression gbdt = new RankingExpression(gbdtString);
+
+ // Regular evaluation
+ MapContext arguments = new MapContext();
+ arguments.put("MYSTRING", new StringValue("string 1"));
+ arguments.put("LW_NEWS_SEARCHES_RATIO", 1d);
+ arguments.put("SUGG_OVERLAP", 17d);
+ double result1 = gbdt.evaluate(arguments).asDouble();
+ arguments.put("LW_NEWS_SEARCHES_RATIO", 2d);
+ arguments.put("SUGG_OVERLAP", 20d);
+ double result2 = gbdt.evaluate(arguments).asDouble();
+ arguments.put("LW_NEWS_SEARCHES_RATIO", 2d);
+ arguments.put("SUGG_OVERLAP", 40d);
+ double result3 = gbdt.evaluate(arguments).asDouble();
+
+ // Optimized evaluation
+ ArrayContext fArguments = new ArrayContext(gbdt);
+ ExpressionOptimizer optimizer = new ExpressionOptimizer();
+ OptimizationReport report = optimizer.optimize(gbdt, fArguments);
+ assertEquals(4, report.getMetric("Optimized GDBT trees"));
+ assertEquals(4, report.getMetric("GBDT trees optimized to forests"));
+ assertEquals(1, report.getMetric("Number of forests"));
+ fArguments.put("MYSTRING", new StringValue("string 1"));
+ fArguments.put("LW_NEWS_SEARCHES_RATIO", 1d);
+ fArguments.put("SUGG_OVERLAP", 17d);
+ double oResult1 = gbdt.evaluate(fArguments).asDouble();
+ fArguments.put("LW_NEWS_SEARCHES_RATIO", 2d);
+ fArguments.put("SUGG_OVERLAP", 20d);
+ double oResult2 = gbdt.evaluate(fArguments).asDouble();
+ fArguments.put("LW_NEWS_SEARCHES_RATIO", 2d);
+ fArguments.put("SUGG_OVERLAP", 40d);
+ double oResult3 = gbdt.evaluate(fArguments).asDouble();
+
+ // Assert the same results are produced
+ // (adding linearly to one double does not produce exactly the same double
+ // as adding up a tree of stack frames though)
+ assertEqualish(result1, oResult1);
+ assertEqualish(result2, oResult2);
+ assertEqualish(result3, oResult3);
+ }
+
+ private void assertEqualish(double a, double b) {
+ assertTrue("Almost equal to " + a + ": " + b, Math.abs(a - b) < ((a + b) / 100000000));
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizerTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizerTestCase.java
new file mode 100644
index 00000000000..993262b1241
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizerTestCase.java
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.ArrayContext;
+import com.yahoo.searchlib.rankingexpression.evaluation.ExpressionOptimizer;
+import com.yahoo.searchlib.rankingexpression.evaluation.MapContext;
+import com.yahoo.searchlib.rankingexpression.evaluation.OptimizationReport;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+
+/**
+ * @author bratseth
+ */
+public class GBDTOptimizerTestCase extends junit.framework.TestCase {
+
+ public void testSimpleNodeOptimization() throws ParseException {
+ RankingExpression gbdt=new RankingExpression("if (a < 2, if (b < 2, 5, 6), 4) + if (a < 3, 7, 8)");
+
+ // Optimized evaluation
+ ArrayContext arguments=new ArrayContext(gbdt);
+ ExpressionOptimizer optimizer=new ExpressionOptimizer();
+ optimizer.getOptimizer(GBDTForestOptimizer.class).setEnabled(false);
+ OptimizationReport report=optimizer.optimize(gbdt,arguments);
+ assertEquals(2,report.getMetric("Optimized GDBT trees"));
+ arguments.put("a",1d);
+ arguments.put("b",2d);
+ assertEquals(13.0,gbdt.evaluate(arguments).asDouble());
+ }
+
+ public void testNodeOptimization() throws ParseException {
+ String gbdtString=
+ "if (LW_NEWS_SEARCHES_RATIO < 1.72971, 0.0697159, if (LW_USERS < 0.10496, if (SEARCHES < 0.0329127, 0.151257, 0.117501), if (SUGG_OVERLAP < 18.5, 0.0897622, 0.0756903))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.73156, if (NEWS_USERS < 0.0737993, -0.00481646, 0.00110018), if (LW_USERS < 0.0844616, 0.0488919, if (SUGG_OVERLAP < 32.5, 0.0136917, 9.85328E-4))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.74451, -0.00298257, if (LW_USERS < 0.116207, if (SEARCHES < 0.0329127, 0.0676105, 0.0340198), if (NUM_WORDS < 1.5, -8.55514E-5, 0.0112406))) + \n" +
+ "if (LW_NEWS_SEARCHES_RATIO < 1.72995, if (NEWS_USERS < 0.0737993, -0.00407515, 0.00139088), if (LW_USERS == 0.0509035, 0.0439466, if (LW_USERS < 0.325818, 0.0187156, 0.00236949)))";
+ RankingExpression gbdt=new RankingExpression(gbdtString);
+
+ // Regular evaluation
+ MapContext arguments=new MapContext();
+ arguments.put("LW_NEWS_SEARCHES_RATIO",1d);
+ arguments.put("SUGG_OVERLAP",17d);
+ double result1=gbdt.evaluate(arguments).asDouble();
+ arguments.put("LW_NEWS_SEARCHES_RATIO",2d);
+ arguments.put("SUGG_OVERLAP",20d);
+ double result2=gbdt.evaluate(arguments).asDouble();
+ arguments.put("LW_NEWS_SEARCHES_RATIO",2d);
+ arguments.put("SUGG_OVERLAP",40d);
+ double result3=gbdt.evaluate(arguments).asDouble();
+
+ // Optimized evaluation
+ ArrayContext fArguments=new ArrayContext(gbdt);
+ ExpressionOptimizer optimizer=new ExpressionOptimizer();
+ optimizer.getOptimizer(GBDTForestOptimizer.class).setEnabled(false);
+ OptimizationReport report=optimizer.optimize(gbdt,fArguments);
+ assertEquals(4,report.getMetric("Optimized GDBT trees"));
+ fArguments.put("LW_NEWS_SEARCHES_RATIO",1d);
+ fArguments.put("SUGG_OVERLAP",17d);
+ double oResult1=gbdt.evaluate(fArguments).asDouble();
+ fArguments.put("LW_NEWS_SEARCHES_RATIO",2d);
+ fArguments.put("SUGG_OVERLAP",20d);
+ double oResult2=gbdt.evaluate(fArguments).asDouble();
+ fArguments.put("LW_NEWS_SEARCHES_RATIO",2d);
+ fArguments.put("SUGG_OVERLAP",40d);
+ double oResult3=gbdt.evaluate(fArguments).asDouble();
+
+ // Assert the same results are produced
+ assertEquals(result1,oResult1);
+ assertEquals(result2,oResult2);
+ assertEquals(result3,oResult3);
+ }
+
+ public void testFeatureNamesWithDots() throws ParseException {
+ String gbdtString=
+ "if (a.b < 1.72971, 0.0697159, if (a.b.c < 0.10496, if (a.c < 0.0329127, 0.151257, 0.117501), if (a < 18.5, 0.0897622, 0.0756903))) + 1";
+ RankingExpression gbdt=new RankingExpression(gbdtString);
+
+ // Regular evaluation
+ MapContext arguments=new MapContext();
+ arguments.put("a.b",1d);
+ arguments.put("a.b.c",0.1d);
+ arguments.put("a.c",0.01d);
+ arguments.put("a",19d);
+ double result=gbdt.evaluate(arguments).asDouble();
+
+ // Optimized evaluation
+ ArrayContext fArguments=new ArrayContext(gbdt);
+ OptimizationReport report=new OptimizationReport();
+ new GBDTOptimizer().optimize(gbdt,fArguments,report);
+ assertEquals("Optimization result is as expected:\n" + report,1,report.getMetric("Optimized GDBT trees"));
+ fArguments.put("a.b",1d);
+ fArguments.put("a.b.c",0.1d);
+ fArguments.put("a.c",0.01d);
+ fArguments.put("a",19d);
+ double oResult=gbdt.evaluate(fArguments).asDouble();
+
+ // Assert the same results are produced
+ assertEquals(result,oResult);
+ }
+
+ public void testBug4009433() throws ParseException {
+ RankingExpression exp = new RankingExpression("10*if(two>35,if(two>one,if(two>=670,4,8),if(two>8000,5,3)),if(two==478,90,91))");
+ new GBDTOptimizer().optimize(exp, new ArrayContext(exp), new OptimizationReport());
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ArgumentsTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ArgumentsTestCase.java
new file mode 100644
index 00000000000..5402935697d
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ArgumentsTestCase.java
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.Collections;
+
+import static org.junit.Assert.*;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class ArgumentsTestCase {
+
+ @Test
+ public void requireThatAccessorsWork() {
+ Arguments args = new Arguments(null);
+ assertTrue(args.expressions().isEmpty());
+
+ args = new Arguments(Collections.<ExpressionNode>emptyList());
+ assertTrue(args.expressions().isEmpty());
+
+ NameNode foo = new NameNode("foo");
+ NameNode bar = new NameNode("bar");
+ args = new Arguments(Arrays.asList(foo, bar));
+ assertEquals(2, args.expressions().size());
+ assertSame(foo, args.expressions().get(0));
+ assertSame(bar, args.expressions().get(1));
+ }
+
+ @Test
+ public void requireThatHashCodeAndEqualsWork() {
+ Arguments arg1 = new Arguments(Arrays.asList(new NameNode("foo"), new NameNode("bar")));
+ Arguments arg2 = new Arguments(Arrays.asList(new NameNode("foo"), new NameNode("bar")));
+ Arguments arg3 = new Arguments(Arrays.asList(new NameNode("foo")));
+
+ assertEquals(arg1.hashCode(), arg2.hashCode());
+ assertTrue(arg1.equals(arg2));
+ assertFalse(arg2.equals(arg3));
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNodeTestCase.java
new file mode 100644
index 00000000000..6070a3805c6
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNodeTestCase.java
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.rule;
+
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class ReferenceNodeTestCase {
+
+ @Test
+ public void requireThatAccessorsWork() {
+ ReferenceNode node = new ReferenceNode("foo", Arrays.asList(new NameNode("bar"), new NameNode("baz")), "cox");
+ assertEquals("foo", node.getName());
+ List<ExpressionNode> args = node.getArguments().expressions();
+ assertEquals(2, args.size());
+ assertEquals(new NameNode("bar"), args.get(0));
+ assertEquals(new NameNode("baz"), args.get(1));
+ assertEquals("cox", node.getOutput());
+
+ node = node.setArguments(Arrays.<ExpressionNode>asList(new NameNode("bar'")));
+ assertEquals(new NameNode("bar'"), node.getArguments().expressions().get(0));
+
+ node = node.setArguments(Arrays.<ExpressionNode>asList(new NameNode("baz'")));
+ assertEquals(new NameNode("baz'"), node.getArguments().expressions().get(0));
+
+ node = node.setOutput("cox'");
+ assertEquals("cox'", node.getOutput());
+ }
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencerTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencerTestCase.java
new file mode 100644
index 00000000000..9fbaddaab1e
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencerTestCase.java
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.transform;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.Value;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class ConstantDereferencerTestCase {
+
+ @Test
+ public void testConstantDereferencer() throws ParseException {
+ Map<String, Value> constants = new HashMap<>();
+ constants.put("a", Value.parse("1.0"));
+ constants.put("b", Value.parse("2"));
+ constants.put("c", Value.parse("3.5"));
+ ConstantDereferencer c = new ConstantDereferencer(constants);
+
+ assertEquals("1.0 + 2.0 + 3.5", c.transform(new RankingExpression("a + b + c")).toString());
+ assertEquals("myMacro(1.0,2.0)", c.transform(new RankingExpression("myMacro(a, b)")).toString());
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/SimplifierTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/SimplifierTestCase.java
new file mode 100644
index 00000000000..69ec3a914d1
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/SimplifierTestCase.java
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.transform;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.evaluation.Context;
+import com.yahoo.searchlib.rankingexpression.evaluation.MapContext;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+import com.yahoo.searchlib.rankingexpression.rule.CompositeNode;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+/**
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class SimplifierTestCase {
+
+ @Test
+ public void testSimplify() throws ParseException {
+ Simplifier s = new Simplifier();
+ assertEquals("a + b", s.transform(new RankingExpression("a + b")).toString());
+ assertEquals("6.5", s.transform(new RankingExpression("1.0 + 2.0 + 3.5")).toString());
+ assertEquals("6.5", s.transform(new RankingExpression("1.0 + ( 2.0 + 3.5 )")).toString());
+ assertEquals("6.5", s.transform(new RankingExpression("( 1.0 + 2.0 ) + 3.5 ")).toString());
+ assertEquals("6.5", s.transform(new RankingExpression("1.0 + ( 2.0 + 3.5 )")).toString());
+ assertEquals("7.5", s.transform(new RankingExpression("1.0 + ( 2.0 + 3.5 ) + 1")).toString());
+ assertEquals("6.5 + a", s.transform(new RankingExpression("1.0 + ( 2.0 + 3.5 ) + a")).toString());
+ assertEquals("7.5", s.transform(new RankingExpression("7.5 + ( 2.0 + 3.5 ) * 0.0")).toString());
+ assertEquals("7.5", s.transform(new RankingExpression("7.5 + ( 2.0 + 3.5 ) * (0.0)")).toString());
+ assertEquals("7.5", s.transform(new RankingExpression("7.5 + ( 2.0 + 3.5 ) * (1.0 - 1.0)")).toString());
+ assertEquals("7.5", s.transform(new RankingExpression("if (2 > 0, 3.5 * 2 + 0.5, a *3 )")).toString());
+ assertEquals("0.0", s.transform(new RankingExpression("0.0 * (1.3 + 7.0)")).toString());
+ assertEquals("6.4", s.transform(new RankingExpression("max(0, 10.0-2.0)*(1-fabs(0.0-0.2))")).toString());
+ assertEquals("(query(d) + query(b) - query(a)) * query(c) / query(e)", s.transform(new RankingExpression("(query(d) + query(b) - query(a)) * query(c) / query(e)")).toString());
+ assertEquals("14.0", s.transform(new RankingExpression("5 + (2 + 3) + 4")).toString());
+ assertEquals("28.0 + bar", s.transform(new RankingExpression("7.0 + 12.0 + 9.0 + bar")).toString());
+ assertEquals("1.0 - 0.001 * attribute(number)", s.transform(new RankingExpression("1.0 - 0.001*attribute(number)")).toString());
+ assertEquals("attribute(number) * 1.5 - 0.001 * attribute(number)", s.transform(new RankingExpression("attribute(number) * 1.5 - 0.001 * attribute(number)")).toString());
+ }
+
+ // A black box test verifying we are not screwing up real expressions
+ @Test
+ public void testSimplifyComplexExpression() throws ParseException {
+ RankingExpression initial = new RankingExpression("sqrt(if (if (INFERRED * 0.9 < INFERRED, GMP, (1 + 1.1) * INFERRED) < INFERRED * INFERRED - INFERRED, if (GMP < 85.80799542793133 * GMP, INFERRED, if (GMP < GMP, tanh(INFERRED), log(76.89956221113943))), tanh(tanh(INFERRED))) * sqrt(sqrt(GMP + INFERRED)) * GMP ) + 13.5 * (1 - GMP) * pow(GMP * 0.1, 2 + 1.1 * 0)");
+ RankingExpression simplified = new Simplifier().transform(initial);
+
+ Context context = new MapContext();
+ context.put("INFERRED", 0.5);
+ context.put("GMP", 80.0);
+ context.put("value", 50.0);
+ assertEquals(initial.evaluate(context), simplified.evaluate(context));
+ context.put("INFERRED", 38.0);
+ context.put("GMP", 80.0);
+ context.put("value", 50.0);
+ assertEquals(initial.evaluate(context), simplified.evaluate(context));
+ context.put("INFERRED", 38.0);
+ context.put("GMP", 90.0);
+ context.put("value", 100.0);
+ assertEquals(initial.evaluate(context), simplified.evaluate(context));
+ context.put("INFERRED", 500.0);
+ context.put("GMP", 90.0);
+ context.put("value", 100.0);
+ assertEquals(initial.evaluate(context), simplified.evaluate(context));
+ }
+
+ @Test
+ public void testParenthesisPreservation() throws ParseException {
+ Simplifier s = new Simplifier();
+ CompositeNode transformed = (CompositeNode)s.transform(new RankingExpression("a + (b + c) / 100000000.0")).getRoot();
+ assertEquals("a + (b + c) / 100000000.0", transformed.toString());
+ }
+
+ @Test
+ public void testSimplificationWithTensorConstants() throws ParseException {
+ new Simplifier().transform(new RankingExpression(
+ "sum(sum((tensorFromWeightedSet(query(wset_query),x)+" +
+ " tensorFromWeightedSet(attribute(wset),x)) * " +
+ " {{x:0,y:0}:54, {x:0,y:1} :69, {x:1,y:0} :72, {x:1,y:1} :93},x))"));
+ }
+
+}
diff --git a/searchlib/src/test/java/com/yahoo/searchlib/treenet/TreeNetParserTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/treenet/TreeNetParserTestCase.java
new file mode 100755
index 00000000000..0e27d53338a
--- /dev/null
+++ b/searchlib/src/test/java/com/yahoo/searchlib/treenet/TreeNetParserTestCase.java
@@ -0,0 +1,79 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.treenet;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.treenet.parser.ParseException;
+import com.yahoo.searchlib.treenet.parser.TreeNetParser;
+import junit.framework.TestCase;
+
+import java.io.*;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class TreeNetParserTestCase extends TestCase {
+
+ private static final boolean WRITE_FILES = false;
+
+ public void testRankingExpression() {
+ for (int i = 1; i <= 8; ++i) {
+ String inputFile = String.format("src/test/files/treenet%02d.model", i);
+ String outputFile = String.format("src/test/files/ranking%02d.expression", i);
+ String input = readFile(inputFile);
+ String expression = convertModel(inputFile, input);
+ if (WRITE_FILES) {
+ writeFile(outputFile, expression);
+ }
+ else {
+ String output = readFile(outputFile);
+ assertParseable(output, outputFile);
+ assertEquals(output.trim(), expression);
+ }
+ }
+ }
+
+ private void assertParseable(String rankingExpressionString,String fileName) {
+ try {
+ new RankingExpression(rankingExpressionString);
+ }
+ catch (com.yahoo.searchlib.rankingexpression.parser.ParseException e) {
+ throw new RuntimeException("Could not parse ranking expression in '" + fileName + "'",e);
+ }
+ }
+
+ private String convertModel(String modelFile, String model) {
+ try {
+ TreeNetParser parser = new TreeNetParser(new StringReader(model));
+ return parser.treeNet().toRankingExpression();
+ } catch (ParseException e) {
+ throw new AssertionError("In model " + modelFile + ": " + e.getMessage(), e);
+ }
+ }
+
+ private String readFile(String file) {
+ try {
+ StringBuilder ret = new StringBuilder();
+ BufferedReader in = new BufferedReader(new FileReader(file));
+ while (true) {
+ String str = in.readLine();
+ if (str == null) {
+ break;
+ }
+ ret.append(str).append("\n");
+ }
+ return ret.toString();
+ } catch (IOException e) {
+ throw new AssertionError(e);
+ }
+ }
+
+ private void writeFile(String file, String content) {
+ try {
+ FileWriter out = new FileWriter(file);
+ out.write(content);
+ out.close();
+ } catch (IOException e) {
+ throw new AssertionError(e);
+ }
+ }
+}
diff --git a/searchlib/src/testlist.txt b/searchlib/src/testlist.txt
new file mode 100644
index 00000000000..a5f728cdbae
--- /dev/null
+++ b/searchlib/src/testlist.txt
@@ -0,0 +1,137 @@
+?tests/groupingengine
+?tests/sort
+tests/aggregator
+tests/alignment
+tests/attribute
+tests/attribute/attributefilewriter
+tests/attribute/attributemanager
+tests/attribute/bitvector
+tests/attribute/comparator
+tests/attribute/document_weight_iterator
+tests/attribute/enumeratedsave
+tests/attribute/enumstore
+tests/attribute/extendattributes
+tests/attribute/multivaluemapping
+tests/attribute/postinglist
+tests/attribute/postinglistattribute
+tests/attribute/searchable
+tests/attribute/searchcontext
+tests/attribute/sourceselector
+tests/attribute/stringattribute
+tests/attribute/tensorattribute
+tests/bitcompression/expgolomb
+tests/bitvector
+tests/btree
+tests/bytecomplens
+tests/common/bitvector
+tests/common/foregroundtaskexecutor
+tests/common/location
+tests/common/packets
+tests/common/rcuvector
+tests/common/resultset
+tests/common/sequencedtaskexecutor
+tests/common/summaryfeatures
+tests/datastore
+tests/diskindex/bitvector
+tests/diskindex/diskindex
+tests/diskindex/fieldwriter
+tests/diskindex/fusion
+tests/diskindex/pagedict4
+tests/document_store
+tests/document_store/visitor
+tests/engine/docsumapi
+tests/engine/monitorapi
+tests/engine/searchapi
+tests/engine/transportserver
+tests/features
+tests/features/beta
+tests/features/element_completeness
+tests/features/element_similarity_feature
+tests/features/euclidean_distance
+tests/features/item_raw_score
+tests/features/native_dot_product
+tests/features/ranking_expression
+tests/features/raw_score
+tests/features/subqueries
+tests/features/tensor
+tests/features/tensor_from_labels
+tests/features/tensor_from_weighted_set
+tests/features/text_similarity_feature
+tests/features/util
+tests/fef
+tests/fef/attributecontent
+tests/fef/featurenamebuilder
+tests/fef/featurenameparser
+tests/fef/featureoverride
+tests/fef/object_passing
+tests/fef/parameter
+tests/fef/phrasesplitter
+tests/fef/properties
+tests/fef/rank_program
+tests/fef/resolver
+tests/fef/table
+tests/fef/termfieldmodel
+tests/fef/termmatchdatamerger
+tests/fileheaderinspect
+tests/fileheadertk
+tests/forcelink
+tests/grouping
+tests/hitcollector
+tests/index/docbuilder
+tests/index/doctypebuilder
+tests/indexmetainfo
+tests/ld-library-path
+tests/memoryindex/btree
+tests/memoryindex/compact_document_words_store
+tests/memoryindex/datastore
+tests/memoryindex/dictionary
+tests/memoryindex/document_remover
+tests/memoryindex/documentinverter
+tests/memoryindex/fieldinverter
+tests/memoryindex/memoryindex
+tests/memoryindex/urlfieldinverter
+tests/memorytub
+tests/nativerank
+tests/nearsearch
+tests/postinglistbm
+tests/predicate
+tests/prettyfloat
+tests/query
+tests/queryeval
+tests/queryeval/blueprint
+tests/queryeval/booleanmatchiteratorwrapper
+tests/queryeval/dot_product
+tests/queryeval/equiv
+tests/queryeval/fake_searchable
+tests/queryeval/getnodeweight
+tests/queryeval/monitoring_search_iterator
+tests/queryeval/multibitvectoriterator
+tests/queryeval/parallel_weak_and
+tests/queryeval/predicate
+tests/queryeval/simple_phrase
+tests/queryeval/sourceblender
+tests/queryeval/sparse_vector_benchmark
+tests/queryeval/termwise_eval
+tests/queryeval/weak_and
+tests/queryeval/weak_and_heap
+tests/queryeval/weak_and_scorers
+tests/queryeval/weighted_set_term
+tests/rankingexpression/feature_name_extractor
+tests/ranksetup
+tests/ranksetup/verify_feature
+tests/sortresults
+tests/sortspec
+tests/stackdumpiterator
+tests/stringenum
+tests/transactionlog
+tests/transactionlogstress
+tests/true
+tests/url
+tests/util
+tests/util/bufferwriter
+tests/util/ioerrorhandler
+tests/util/searchable_stats
+tests/util/sigbushandler
+tests/util/slime_output_raw_buf_adapter
+tests/util/statebuf
+tests/util/statefile
diff --git a/searchlib/src/tests/.gitignore b/searchlib/src/tests/.gitignore
new file mode 100644
index 00000000000..a3e9c375723
--- /dev/null
+++ b/searchlib/src/tests/.gitignore
@@ -0,0 +1,3 @@
+.depend
+Makefile
+*_test
diff --git a/searchlib/src/tests/aggregator/.gitignore b/searchlib/src/tests/aggregator/.gitignore
new file mode 100644
index 00000000000..fed1175d7cd
--- /dev/null
+++ b/searchlib/src/tests/aggregator/.gitignore
@@ -0,0 +1,7 @@
+*.dat
+.depend
+Makefile
+aggregator_test
+perdocexpr_test
+searchlib_attr_test_app
+searchlib_perdocexpr_test_app
diff --git a/searchlib/src/tests/aggregator/CMakeLists.txt b/searchlib/src/tests/aggregator/CMakeLists.txt
new file mode 100644
index 00000000000..1cc750a8fac
--- /dev/null
+++ b/searchlib/src/tests/aggregator/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_perdocexpr_test_app
+ SOURCES
+ perdocexpr.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_perdocexpr_test_app COMMAND searchlib_perdocexpr_test_app)
+vespa_add_executable(searchlib_attr_test_app
+ SOURCES
+ attr_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attr_test_app COMMAND searchlib_attr_test_app)
diff --git a/searchlib/src/tests/aggregator/DESC b/searchlib/src/tests/aggregator/DESC
new file mode 100644
index 00000000000..74bbb4a99fe
--- /dev/null
+++ b/searchlib/src/tests/aggregator/DESC
@@ -0,0 +1 @@
+This is a test of the aggregator manager interface.
diff --git a/searchlib/src/tests/aggregator/FILES b/searchlib/src/tests/aggregator/FILES
new file mode 100644
index 00000000000..2d49a798a26
--- /dev/null
+++ b/searchlib/src/tests/aggregator/FILES
@@ -0,0 +1 @@
+aggregator.cpp
diff --git a/searchlib/src/tests/aggregator/attr_test.cpp b/searchlib/src/tests/aggregator/attr_test.cpp
new file mode 100644
index 00000000000..5184f61b573
--- /dev/null
+++ b/searchlib/src/tests/aggregator/attr_test.cpp
@@ -0,0 +1,285 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+
+#include <vespa/searchlib/aggregation/perdocexpression.h>
+#include <vespa/searchlib/aggregation/aggregation.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/vespalib/objects/objectdumper.h>
+
+using namespace search;
+using namespace search::expression;
+using namespace vespalib;
+
+
+struct AttributeFixture {
+
+ AttributeGuard guard;
+
+ const double doc0attr[11] = {
+ 0.1428571428571428,
+ 0.2539682539682539,
+ 0.3448773448773448,
+ 0.4218004218004217,
+ 0.4884670884670883,
+ 0.5472906178788530,
+ 0.5999221968262214,
+ 0.6475412444452690,
+ 0.6910195053148342,
+ 0.7310195053148342,
+ 0.7680565423518712
+ };
+ const double doc1attr[11] = {
+ 0.1408450704225352,
+ 0.2507351803126450,
+ 0.3408252704027350,
+ 0.4171611482653304,
+ 0.4833863138282443,
+ 0.5418658459919869,
+ 0.5942218669343952,
+ 0.6416152318633051,
+ 0.6849052751533483,
+ 0.7247459126035475,
+ 0.7616462816072375
+ };
+
+ AttributeFixture() : guard()
+ {
+ MultiFloatExtAttribute *attr = new MultiFloatExtAttribute("sortedArrayAttr");
+ DocId d = 0;
+
+ attr->addDoc(d);
+ for (double val : doc0attr) {
+ attr->add(val);
+ }
+ attr->addDoc(d);
+ for (double val : doc1attr) {
+ attr->add(val);
+ }
+ AttributeVector::SP sp(attr);
+ guard = AttributeGuard(sp);
+ }
+};
+
+struct IntAttrFixture {
+ AttributeGuard guard;
+
+ const int64_t doc0attr[11] = {
+ 1,
+ 333,
+ 88888888L,
+ -17
+ };
+ const double doc1attr[11] = {
+ 2,
+ -42,
+ 4444,
+ 999999999L
+ };
+
+ IntAttrFixture() : guard()
+ {
+ MultiIntegerExtAttribute *attr = new MultiIntegerExtAttribute("sortedArrayAttr");
+ DocId d = 0;
+ attr->addDoc(d);
+ for (int64_t val : doc0attr) {
+ attr->add(val);
+ }
+ attr->addDoc(d);
+ for (int64_t val : doc1attr) {
+ attr->add(val);
+ }
+ AttributeVector::SP sp(attr);
+ guard = AttributeGuard(sp);
+ }
+};
+
+struct StringAttrFixture {
+ AttributeGuard guard;
+ StringAttrFixture() : guard()
+ {
+ MultiStringExtAttribute *attr = new MultiStringExtAttribute("sortedArrayAttr");
+ DocId d = 0;
+ attr->addDoc(d);
+ attr->add("1");
+ attr->add("333");
+ attr->add("88888888");
+ attr->addDoc(d);
+ attr->add("2");
+ attr->add("4444");
+ attr->add("999999999");
+ AttributeVector::SP sp(attr);
+ guard = AttributeGuard(sp);
+ }
+};
+
+
+TEST_F("testArrayAt", AttributeFixture()) {
+ for (int i = 0; i < 11; i++) {
+ ExpressionNode::CP cn(new ConstantNode(new Int64ResultNode(i)));
+ ExpressionNode::CP ln(new ArrayAtLookup(*f1.guard, cn));
+
+ ExpressionTree et(ln);
+ ExpressionTree::Configure treeConf;
+ et.select(treeConf, treeConf);
+ EXPECT_TRUE(et.getResult().getClass().inherits(FloatResultNode::classId));
+
+ EXPECT_TRUE(et.execute(0, HitRank(0.0)));
+ EXPECT_EQUAL(et.getResult().getFloat(), f1.doc0attr[i]);
+ EXPECT_TRUE(et.execute(1, HitRank(0.0)));
+ EXPECT_EQUAL(et.getResult().getFloat(), f1.doc1attr[i]);
+ }
+}
+
+TEST_F("testArrayAtInt", IntAttrFixture()) {
+ for (int i = 0; i < 3; i++) {
+ ExpressionNode::CP othercn(new ConstantNode(new Int64ResultNode(4567)));
+ ArrayAtLookup *x = new ArrayAtLookup(*f1.guard, othercn);
+ ExpressionNode::CP cn(new ConstantNode(new Int64ResultNode(i)));
+ ArrayAtLookup *y = new ArrayAtLookup(*f1.guard, cn);
+ *x = *y;
+ delete y;
+ ExpressionNode::CP ln(x);
+
+ ExpressionTree et(ln);
+ ExpressionTree::Configure treeConf;
+ et.select(treeConf, treeConf);
+ EXPECT_TRUE(et.getResult().getClass().inherits(IntegerResultNode::classId));
+
+ EXPECT_TRUE(et.execute(0, HitRank(0.0)));
+ EXPECT_EQUAL(et.getResult().getInteger(), f1.doc0attr[i]);
+ EXPECT_TRUE(et.execute(1, HitRank(0.0)));
+ EXPECT_EQUAL(et.getResult().getInteger(), f1.doc1attr[i]);
+ }
+}
+
+
+TEST_F("testArrayAtString", StringAttrFixture()) {
+ ExpressionNode::CP cn(new ConstantNode(new Int64ResultNode(1)));
+ ExpressionNode::CP ln(new ArrayAtLookup(*f1.guard, cn));
+
+ ExpressionTree et(ln);
+ ExpressionTree::Configure treeConf;
+ et.select(treeConf, treeConf);
+ EXPECT_TRUE(et.getResult().getClass().inherits(StringResultNode::classId));
+
+ char mem[64];
+ ResultNode::BufferRef buf(&mem, sizeof(mem));
+
+ EXPECT_TRUE(et.execute(0, HitRank(0.0)));
+ EXPECT_EQUAL(et.getResult().getString(buf).c_str(), std::string("333"));
+
+ EXPECT_TRUE(et.execute(1, HitRank(0.0)));
+ EXPECT_EQUAL(et.getResult().getString(buf).c_str(), std::string("4444"));
+}
+
+struct ArrayAtExpressionFixture :
+ public AttributeFixture
+{
+ ExpressionNode::CP cn;
+ ExpressionNode::CP ln;
+ ExpressionTree et;
+
+ ArrayAtExpressionFixture(int i) :
+ AttributeFixture(),
+ cn(new ConstantNode(new Int64ResultNode(i))),
+ ln(new ArrayAtLookup(*guard, cn)),
+ et(ln)
+ {
+ ExpressionTree::Configure treeConf;
+ et.select(treeConf, treeConf);
+ }
+};
+
+
+TEST_F("testArrayAtBelowRange", ArrayAtExpressionFixture(-1)) {
+ EXPECT_TRUE(f1.et.getResult().getClass().inherits(FloatResultNode::classId));
+
+ EXPECT_TRUE(f1.et.execute(0, HitRank(0.0)));
+ EXPECT_EQUAL(f1.et.getResult().getFloat(), f1.doc0attr[0]);
+ EXPECT_TRUE(f1.et.execute(1, HitRank(0.0)));
+ EXPECT_EQUAL(f1.et.getResult().getFloat(), f1.doc1attr[0]);
+}
+
+TEST_F("testArrayAtAboveRange", ArrayAtExpressionFixture(17)) {
+ EXPECT_TRUE(f1.et.getResult().getClass().inherits(FloatResultNode::classId));
+
+ EXPECT_TRUE(f1.et.execute(0, HitRank(0.0)));
+ EXPECT_EQUAL(f1.et.getResult().getFloat(), f1.doc0attr[10]);
+ EXPECT_TRUE(f1.et.execute(1, HitRank(0.0)));
+ EXPECT_EQUAL(f1.et.getResult().getFloat(), f1.doc1attr[10]);
+}
+
+TEST_F("testInterpolatedLookup", AttributeFixture()) {
+
+ ExpressionNode::CP c1(new ConstantNode(new FloatResultNode(f1.doc0attr[2])));
+ ExpressionNode::CP l1(new InterpolatedLookup(*f1.guard, c1));
+
+ ExpressionTree et(l1);
+ ExpressionTree::Configure treeConf;
+ et.select(treeConf, treeConf);
+
+ EXPECT_TRUE(et.getResult().getClass().inherits(FloatResultNode::classId));
+
+ EXPECT_TRUE(et.execute(0, HitRank(0.0)));
+ EXPECT_EQUAL(et.getResult().getFloat(), 2.0);
+
+ EXPECT_TRUE(et.execute(1, HitRank(0.0)));
+ EXPECT_EQUAL(et.getResult().getFloat(), 2.053082175617388);
+}
+
+TEST_F("testWithRelevance", AttributeFixture()) {
+
+ ExpressionNode::CP r1(new RelevanceNode());
+ ExpressionNode::CP l1(new InterpolatedLookup(*f1.guard, r1));
+
+ ExpressionTree et(l1);
+ ExpressionTree::Configure treeConf;
+ et.select(treeConf, treeConf);
+
+ EXPECT_TRUE(et.getResult().getClass().inherits(FloatResultNode::classId));
+
+ // docid 0
+ double expect0[] = { 0.0, 0.0, 0.0,
+
+ 0.514285714285715012,
+ 1.506349206349207659,
+ 2.716594516594518005,
+
+ 4.19605949605949835,
+ 6.001633866649353166,
+ 8.224512367129145574,
+
+ 10.0, 10.0, 10.0 };
+
+ for (int i = 0; i < 12; i++) {
+ double r = i-1;
+ r *= 0.1;
+ TEST_STATE(vespalib::make_string("i=%d", i).c_str());
+ EXPECT_TRUE(et.execute(0, HitRank(r)));
+ EXPECT_EQUAL(expect0[i], et.getResult().getFloat());
+ }
+
+ EXPECT_TRUE(et.execute(0, HitRank(f1.doc0attr[2])));
+ EXPECT_EQUAL(et.getResult().getFloat(), 2.0);
+
+ // docid 1
+ EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[0] - 0.001)));
+ EXPECT_EQUAL(et.getResult().getFloat(), 0.0);
+
+ EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[0])));
+ EXPECT_EQUAL(et.getResult().getFloat(), 0.0);
+
+ EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[2])));
+ EXPECT_EQUAL(et.getResult().getFloat(), 2.0);
+
+ EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[4])));
+ EXPECT_EQUAL(et.getResult().getFloat(), 4.0);
+
+ EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[10])));
+ EXPECT_EQUAL(et.getResult().getFloat(), 10.0);
+
+ EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[10] + 0.01)));
+ EXPECT_EQUAL(et.getResult().getFloat(), 10.0);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/aggregator/perdocexpr.cpp b/searchlib/src/tests/aggregator/perdocexpr.cpp
new file mode 100644
index 00000000000..8f073187cce
--- /dev/null
+++ b/searchlib/src/tests/aggregator/perdocexpr.cpp
@@ -0,0 +1,1693 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/aggregation/aggregation.h>
+#include <vespa/searchlib/aggregation/expressioncountaggregationresult.h>
+#include <vespa/searchlib/aggregation/perdocexpression.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/vespalib/objects/objectdumper.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <stdexcept>
+#include <vespa/document/base/testdocman.h>
+#include <vespa/vespalib/util/md5.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/expression/getdocidnamespacespecificfunctionnode.h>
+
+using namespace search;
+using namespace search::expression;
+using namespace search::aggregation;
+using namespace vespalib;
+
+struct AggrGetter {
+ virtual ~AggrGetter() { }
+ virtual const ResultNode &operator()(const AggregationResult &r) const = 0;
+};
+
+AttributeGuard createInt64Attribute();
+AttributeGuard createInt32Attribute();
+AttributeGuard createInt16Attribute();
+AttributeGuard createInt8Attribute();
+template<typename T>
+void testCmp(const T & small, const T & medium, const T & large);
+
+void testMin(const ResultNode & a, const ResultNode & b) {
+ ASSERT_TRUE(a.cmp(b) < 0);
+ MinFunctionNode func;
+ func.appendArg(ConstantNode(a)).appendArg(ConstantNode(b)).prepare(false)
+ .execute();
+ ASSERT_TRUE(func.getResult().cmp(a) == 0);
+
+ MinFunctionNode funcR;
+ funcR.appendArg(ConstantNode(b)).appendArg(ConstantNode(a)).prepare(false)
+ .execute();
+ ASSERT_TRUE(funcR.getResult().cmp(a) == 0);
+}
+
+TEST("testMin") {
+ testMin(Int64ResultNode(67), Int64ResultNode(68));
+ testMin(FloatResultNode(67), FloatResultNode(68));
+ testMin(StringResultNode("67"), StringResultNode("68"));
+ testMin(RawResultNode("67", 2), RawResultNode("68", 2));
+ testMin(RawResultNode("-67", 2), RawResultNode("68", 2));
+}
+
+void testMax(const ResultNode & a, const ResultNode & b) {
+ ASSERT_TRUE(a.cmp(b) < 0);
+ MaxFunctionNode func;
+ func.appendArg(ConstantNode(a)).appendArg(ConstantNode(b)).prepare(false)
+ .execute();
+ ASSERT_TRUE(func.getResult().cmp(b) == 0);
+
+ MaxFunctionNode funcR;
+ funcR.appendArg(ConstantNode(b)).appendArg(ConstantNode(a)).prepare(false)
+ .execute();
+ ASSERT_TRUE(funcR.getResult().cmp(b) == 0);
+}
+
+TEST("testMax") {
+ testMax(Int64ResultNode(67), Int64ResultNode(68));
+ testMax(FloatResultNode(67), FloatResultNode(68));
+ testMax(StringResultNode("67"), StringResultNode("68"));
+ testMax(RawResultNode("67", 2), RawResultNode("68", 2));
+ testMax(RawResultNode("-67", 2), RawResultNode("68", 2));
+}
+
+ExpressionCountAggregationResult getExpressionCountWithNormalSketch() {
+ nbostream stream;
+ stream << (uint32_t)ExpressionCountAggregationResult::classId
+ << (char)0 << (uint32_t)0
+ << (uint32_t)NormalSketch<>::classId
+ << NormalSketch<>::BUCKET_COUNT << NormalSketch<>::BUCKET_COUNT;
+ for (size_t i = 0; i < NormalSketch<>::BUCKET_COUNT; ++i) {
+ stream << static_cast<char>(0);
+ }
+ NBOSerializer serializer(stream);
+ ExpressionCountAggregationResult result;
+ serializer >> result;
+ EXPECT_EQUAL(0u, stream.size());
+ EXPECT_EQUAL(NormalSketch<>(), result.getSketch());
+ return result;
+}
+
+void testExpressionCount(const ResultNode &a, uint32_t bucket, uint8_t val) {
+ ExpressionCountAggregationResult func =
+ getExpressionCountWithNormalSketch();
+ func.setExpression(ConstantNode(a));
+ func.aggregate(DocId(42), HitRank(21));
+
+ const auto &sketch = func.getSketch();
+ auto normal = dynamic_cast<const NormalSketch<>&>(sketch);
+ for (uint32_t i = 0; i < sketch.BUCKET_COUNT; ++i) {
+ TEST_STATE(make_string("Bucket %u. Expected bucket %u=%u",
+ i, bucket, val).c_str());
+ EXPECT_EQUAL(i == bucket? val : 0, (int) normal.bucket[i]);
+ }
+}
+
+TEST("require that expression count can operate on different results") {
+ testExpressionCount(Int64ResultNode(67), 98, 2);
+ testExpressionCount(FloatResultNode(67), 545, 1);
+ testExpressionCount(StringResultNode("67"), 243, 1);
+ testExpressionCount(RawResultNode("67", 2), 243, 1);
+ testExpressionCount(RawResultNode("-67", 2), 434, 1);
+}
+
+TEST("require that expression counts can be merged") {
+ ExpressionCountAggregationResult func1 =
+ getExpressionCountWithNormalSketch();
+ func1.setExpression(ConstantNode(Int64ResultNode(67)))
+ .aggregate(DocId(42), HitRank(21));
+ ExpressionCountAggregationResult func2 =
+ getExpressionCountWithNormalSketch();
+ func2.setExpression(ConstantNode(FloatResultNode(67)))
+ .aggregate(DocId(42), HitRank(21));
+
+ EXPECT_EQUAL(2, func1.getRank().getInteger());
+ func1.merge(func2);
+ EXPECT_EQUAL(3, func1.getRank().getInteger());
+ const auto &sketch = func1.getSketch();
+ auto normal = dynamic_cast<const NormalSketch<>&>(sketch);
+ EXPECT_EQUAL(2, normal.bucket[98]); // from func1
+ EXPECT_EQUAL(1, normal.bucket[545]); // from func2
+}
+
+TEST("require that expression counts can be serialized") {
+ ExpressionCountAggregationResult func;
+ func.setExpression(ConstantNode(Int64ResultNode(67)))
+ .aggregate(DocId(42), HitRank(21));
+ func.setExpression(ConstantNode(Int64ResultNode(68)))
+ .aggregate(DocId(42), HitRank(21));
+
+ nbostream os;
+ NBOSerializer nos(os);
+ nos << func;
+ Identifiable::UP obj = Identifiable::create(nos);
+ auto *func2 = dynamic_cast<ExpressionCountAggregationResult *>(obj.get());
+ ASSERT_TRUE(func2);
+ EXPECT_EQUAL(func.getSketch(), func2->getSketch());
+}
+
+TEST("require that expression count estimates rank") {
+ ExpressionCountAggregationResult func =
+ getExpressionCountWithNormalSketch();
+ EXPECT_EQUAL(0, func.getRank().getInteger());
+ func.setExpression(ConstantNode(Int64ResultNode(67)))
+ .aggregate(DocId(42), HitRank(21));
+ EXPECT_EQUAL(2, func.getRank().getInteger());
+ func.setExpression(ConstantNode(FloatResultNode(67)))
+ .aggregate(DocId(42), HitRank(21));
+ EXPECT_EQUAL(3, func.getRank().getInteger());
+ func.setExpression(ConstantNode(FloatResultNode(67)))
+ .aggregate(DocId(42), HitRank(21));
+ EXPECT_EQUAL(3, func.getRank().getInteger());
+}
+
+void testAdd(const ResultNode &a, const ResultNode &b, const ResultNode &c) {
+ AddFunctionNode func;
+ func.appendArg(ConstantNode(a)).appendArg(ConstantNode(b)).prepare(false)
+ .execute();
+ EXPECT_EQUAL(func.getResult().asString(), c.asString());
+ EXPECT_EQUAL(func.getResult().cmp(c), 0);
+ EXPECT_EQUAL(c.cmp(func.getResult()), 0);
+}
+
+TEST("testAdd") {
+ testAdd(Int64ResultNode(67), Int64ResultNode(68), Int64ResultNode(67+68));
+ testAdd(FloatResultNode(67), FloatResultNode(68), FloatResultNode(67+68));
+ testAdd(StringResultNode("67"), StringResultNode("68"),
+ StringResultNode("lo"));
+ testAdd(RawResultNode("67", 2), RawResultNode("68", 2),
+ RawResultNode("lo", 2));
+}
+
+void testDivide(const ResultNode &a, const ResultNode &b,
+ const ResultNode &c) {
+ DivideFunctionNode func;
+ func.appendArg(ConstantNode(a)).appendArg(ConstantNode(b)).prepare(false)
+ .execute();
+ EXPECT_EQUAL(func.getResult().asString(), c.asString());
+ EXPECT_EQUAL(func.getResult().getFloat(), c.getFloat());
+ EXPECT_EQUAL(func.getResult().cmp(c), 0);
+ EXPECT_EQUAL(c.cmp(func.getResult()), 0);
+}
+
+TEST("testDivide") {
+ testDivide(Int64ResultNode(6), FloatResultNode(12.0),
+ FloatResultNode(0.5));
+ testDivide(Int64ResultNode(6), Int64ResultNode(1), Int64ResultNode(6));
+ testDivide(Int64ResultNode(6), Int64ResultNode(0), Int64ResultNode(0));
+}
+
+void testModulo(const ResultNode &a, const ResultNode &b,
+ const ResultNode &c) {
+ ModuloFunctionNode func;
+ func.appendArg(ConstantNode(a)).appendArg(ConstantNode(b)).prepare(false)
+ .execute();
+ EXPECT_EQUAL(func.getResult().asString(), c.asString());
+ EXPECT_EQUAL(func.getResult().getFloat(), c.getFloat());
+ EXPECT_EQUAL(func.getResult().cmp(c), 0);
+ EXPECT_EQUAL(c.cmp(func.getResult()), 0);
+}
+
+TEST("testModulo") {
+ testModulo(Int64ResultNode(0), Int64ResultNode(6), Int64ResultNode(0));
+ testModulo(Int64ResultNode(1), Int64ResultNode(6), Int64ResultNode(1));
+ testModulo(Int64ResultNode(2), Int64ResultNode(6), Int64ResultNode(2));
+ testModulo(Int64ResultNode(3), Int64ResultNode(6), Int64ResultNode(3));
+ testModulo(Int64ResultNode(4), Int64ResultNode(6), Int64ResultNode(4));
+ testModulo(Int64ResultNode(5), Int64ResultNode(6), Int64ResultNode(5));
+ testModulo(Int64ResultNode(6), Int64ResultNode(6), Int64ResultNode(0));
+
+ testModulo(Int64ResultNode(6), Int64ResultNode(1), Int64ResultNode(0));
+ testModulo(Int64ResultNode(6), Int64ResultNode(0), Int64ResultNode(0));
+
+ testModulo(FloatResultNode(2), Int64ResultNode(6), FloatResultNode(2));
+ testModulo(Int64ResultNode(3), FloatResultNode(6), FloatResultNode(3));
+}
+
+void testNegate(const ResultNode & a, const ResultNode & b) {
+ NegateFunctionNode func;
+ func.appendArg(ConstantNode(a)).prepare(false).execute();
+ EXPECT_EQUAL(func.getResult().asString(), b.asString());
+ EXPECT_EQUAL(func.getResult().cmp(b), 0);
+ EXPECT_EQUAL(b.cmp(func.getResult()), 0);
+}
+
+TEST("testNegate") {
+ testNegate(Int64ResultNode(67), Int64ResultNode(-67));
+ testNegate(FloatResultNode(67.0), FloatResultNode(-67.0));
+
+ char strnorm[4] = { 102, 111, 111, 0 };
+ char strneg[4] = { -102, -111, -111, 0 };
+ testNegate(StringResultNode(strnorm), StringResultNode(strneg));
+ testNegate(RawResultNode(strnorm, 3), RawResultNode(strneg, 3));
+}
+
+template <typename T>
+void testBuckets(const T * b) {
+ EXPECT_TRUE(b[0].cmp(b[1]) < 0);
+ EXPECT_TRUE(b[1].cmp(b[2]) < 0);
+ EXPECT_TRUE(b[2].cmp(b[3]) < 0);
+ EXPECT_TRUE(b[3].cmp(b[4]) < 0);
+ EXPECT_TRUE(b[4].cmp(b[5]) < 0);
+
+ EXPECT_TRUE(b[1].cmp(b[0]) > 0);
+ EXPECT_TRUE(b[2].cmp(b[1]) > 0);
+ EXPECT_TRUE(b[3].cmp(b[2]) > 0);
+ EXPECT_TRUE(b[4].cmp(b[3]) > 0);
+ EXPECT_TRUE(b[5].cmp(b[4]) > 0);
+
+ EXPECT_TRUE(b[1].cmp(b[1]) == 0);
+ EXPECT_TRUE(b[2].cmp(b[2]) == 0);
+ EXPECT_TRUE(b[3].cmp(b[3]) == 0);
+ EXPECT_TRUE(b[4].cmp(b[4]) == 0);
+ EXPECT_TRUE(b[5].cmp(b[5]) == 0);
+
+ EXPECT_TRUE(b[0].contains(b[1]) < 0);
+ EXPECT_TRUE(b[1].contains(b[2]) < 0);
+ EXPECT_TRUE(b[2].contains(b[3]) == 0);
+ EXPECT_TRUE(b[3].contains(b[4]) < 0);
+ EXPECT_TRUE(b[4].contains(b[5]) < 0);
+
+ EXPECT_TRUE(b[1].contains(b[0]) > 0);
+ EXPECT_TRUE(b[2].contains(b[1]) > 0);
+ EXPECT_TRUE(b[3].contains(b[2]) == 0);
+ EXPECT_TRUE(b[4].contains(b[3]) > 0);
+ EXPECT_TRUE(b[5].contains(b[4]) > 0);
+
+ EXPECT_TRUE(b[1].contains(b[1]) == 0);
+ EXPECT_TRUE(b[2].contains(b[2]) == 0);
+ EXPECT_TRUE(b[3].contains(b[3]) == 0);
+ EXPECT_TRUE(b[4].contains(b[4]) == 0);
+ EXPECT_TRUE(b[5].contains(b[5]) == 0);
+}
+
+TEST("testBuckets") {
+ IntegerBucketResultNodeVector iv;
+ IntegerBucketResultNodeVector::Vector & ib = iv.getVector();
+ EXPECT_TRUE(iv.find(Int64ResultNode(6)) == NULL);
+ ib.resize(1);
+ ib[0] = IntegerBucketResultNode(7, 9);
+ EXPECT_TRUE(iv.find(Int64ResultNode(6)) == NULL);
+ EXPECT_TRUE(iv.find(Int64ResultNode(7)) != NULL);
+ EXPECT_TRUE(iv.find(Int64ResultNode(8)) != NULL);
+ EXPECT_TRUE(iv.find(Int64ResultNode(9)) == NULL);
+ EXPECT_TRUE(iv.find(Int64ResultNode(10)) == NULL);
+
+ ib.resize(6);
+ ib[0] = IntegerBucketResultNode(7, 9);
+ ib[1] = IntegerBucketResultNode(13, 17);
+ ib[2] = IntegerBucketResultNode(15, 30);
+ ib[3] = IntegerBucketResultNode(19, 27);
+ ib[4] = IntegerBucketResultNode(20, 33);
+ ib[5] = IntegerBucketResultNode(50, 50);
+ testBuckets(&ib[0]);
+ iv.sort();
+ testBuckets(&ib[0]);
+ EXPECT_TRUE(ib[0].contains(6) > 0);
+ EXPECT_TRUE(ib[0].contains(7) == 0);
+ EXPECT_TRUE(ib[0].contains(8) == 0);
+ EXPECT_TRUE(ib[0].contains(9) < 0);
+ EXPECT_TRUE(ib[0].contains(10) < 0);
+ EXPECT_TRUE(iv.find(Int64ResultNode(6)) == NULL);
+ EXPECT_TRUE(iv.find(Int64ResultNode(7)) != NULL);
+ EXPECT_TRUE(iv.find(Int64ResultNode(8)) != NULL);
+ EXPECT_TRUE(iv.find(Int64ResultNode(9)) == NULL);
+ EXPECT_TRUE(iv.find(Int64ResultNode(10)) == NULL);
+ EXPECT_TRUE(iv.find(Int64ResultNode(14)) != NULL);
+ EXPECT_TRUE(iv.find(Int64ResultNode(27)) != NULL);
+ EXPECT_TRUE(iv.find(Int64ResultNode(32)) != NULL);
+ EXPECT_TRUE(iv.find(Int64ResultNode(33)) == NULL);
+ EXPECT_TRUE(iv.find(Int64ResultNode(50)) == NULL);
+
+ FloatBucketResultNodeVector fv;
+ FloatBucketResultNodeVector::Vector & fb = fv.getVector();
+ fb.resize(6);
+ fb[0] = FloatBucketResultNode(7, 9);
+ fb[1] = FloatBucketResultNode(13, 17);
+ fb[2] = FloatBucketResultNode(15, 30);
+ fb[3] = FloatBucketResultNode(19, 27);
+ fb[4] = FloatBucketResultNode(20, 33);
+ fb[5] = FloatBucketResultNode(50, 50);
+ testBuckets(&fb[0]);
+ fv.sort();
+ testBuckets(&fb[0]);
+ EXPECT_TRUE(fb[0].contains(6) > 0);
+ EXPECT_TRUE(fb[0].contains(7) == 0);
+ EXPECT_TRUE(fb[0].contains(8) == 0);
+ EXPECT_TRUE(fb[0].contains(9) < 0);
+ EXPECT_TRUE(fb[0].contains(10) < 0);
+ EXPECT_TRUE(fv.find(FloatResultNode(6)) == NULL);
+ EXPECT_TRUE(fv.find(FloatResultNode(7)) != NULL);
+ EXPECT_TRUE(fv.find(FloatResultNode(8)) != NULL);
+ EXPECT_TRUE(fv.find(FloatResultNode(9)) == NULL);
+ EXPECT_TRUE(fv.find(FloatResultNode(10)) == NULL);
+ EXPECT_TRUE(fv.find(FloatResultNode(14)) != NULL);
+ EXPECT_TRUE(fv.find(FloatResultNode(27)) != NULL);
+ EXPECT_TRUE(fv.find(FloatResultNode(32)) != NULL);
+ EXPECT_TRUE(fv.find(FloatResultNode(33)) == NULL);
+ EXPECT_TRUE(fv.find(FloatResultNode(50)) == NULL);
+
+ StringBucketResultNodeVector sv;
+ StringBucketResultNodeVector::Vector & sb = sv.getVector();
+ sb.resize(6);
+ sb[0] = StringBucketResultNode("07", "09");
+ sb[1] = StringBucketResultNode("13", "17");
+ sb[2] = StringBucketResultNode("15", "30");
+ sb[3] = StringBucketResultNode("19", "27");
+ sb[4] = StringBucketResultNode("20", "33");
+ sb[5] = StringBucketResultNode("50", "50");
+ testBuckets(&sb[0]);
+ sv.sort();
+ testBuckets(&sb[0]);
+ EXPECT_TRUE(sb[0].contains("06") > 0);
+ EXPECT_TRUE(sb[0].contains("07") == 0);
+ EXPECT_TRUE(sb[0].contains("08") == 0);
+ EXPECT_TRUE(sb[0].contains("09") < 0);
+ EXPECT_TRUE(sb[0].contains("10") < 0);
+ EXPECT_TRUE(sv.find(StringResultNode("06")) == NULL);
+ EXPECT_TRUE(sv.find(StringResultNode("07")) != NULL);
+ EXPECT_TRUE(sv.find(StringResultNode("08")) != NULL);
+ EXPECT_TRUE(sv.find(StringResultNode("09")) == NULL);
+ EXPECT_TRUE(sv.find(StringResultNode("10")) == NULL);
+ EXPECT_TRUE(sv.find(StringResultNode("14")) != NULL);
+ EXPECT_TRUE(sv.find(StringResultNode("27")) != NULL);
+ EXPECT_TRUE(sv.find(StringResultNode("32")) != NULL);
+ EXPECT_TRUE(sv.find(StringResultNode("33")) == NULL);
+ EXPECT_TRUE(sv.find(StringResultNode("50")) == NULL);
+}
+
+template<typename T>
+void testCmp(const T & small, const T & medium, const T & large) {
+ EXPECT_TRUE(small.cmp(medium) < 0);
+ EXPECT_TRUE(small.cmp(large) < 0);
+ EXPECT_TRUE(medium.cmp(large) < 0);
+ EXPECT_TRUE(medium.cmp(small) > 0);
+ EXPECT_TRUE(large.cmp(small) > 0);
+ EXPECT_TRUE(large.cmp(medium) > 0);
+}
+
+TEST("testResultNodes") {
+ Int64ResultNode i(89);
+ char mem[64];
+ ResultNode::BufferRef buf(&mem, sizeof(mem));
+ EXPECT_EQUAL(i.getInteger(), 89);
+ EXPECT_EQUAL(i.getFloat(), 89.0);
+ EXPECT_EQUAL(i.getString(buf).c_str(), std::string("89"));
+ FloatResultNode f(2165.798);
+ EXPECT_EQUAL(f.getInteger(), 2166);
+ EXPECT_EQUAL(f.getFloat(), 2165.798);
+ EXPECT_EQUAL(f.getString(buf).c_str(), std::string("2165.8"));
+ StringResultNode s("17.89hjkljly");
+ EXPECT_EQUAL(s.getInteger(), 17);
+ EXPECT_EQUAL(s.getFloat(), 17.89);
+ EXPECT_EQUAL(s.getString(buf).c_str(), std::string("17.89hjkljly"));
+ RawResultNode r("hjgasfdg", 9);
+ EXPECT_EQUAL(r.getString(buf).c_str(), std::string("hjgasfdg"));
+ int64_t j(789);
+ double d(786324.78);
+ nbostream os;
+ os << j << d;
+ RawResultNode r1(os.c_str(), sizeof(j));
+ EXPECT_EQUAL(r1.getInteger(), 789);
+ RawResultNode r2(os.c_str() + sizeof(j), sizeof(d));
+ EXPECT_EQUAL(r2.getFloat(), 786324.78);
+
+ StringResultNode s1, s2("a"), s3("a"), s4("b"), s5("bb");
+ EXPECT_EQUAL(s1.cmp(s1), 0);
+ EXPECT_EQUAL(s2.cmp(s3), 0);
+ EXPECT_EQUAL(s4.cmp(s4), 0);
+ EXPECT_EQUAL(s5.cmp(s5), 0);
+ testCmp(s1, s2, s4);
+ testCmp(s1, s2, s5);
+ testCmp(s2, s4, s5);
+
+ {
+ Int64ResultNode i1(-1), i2(0), i3(1), i4(0x80000000lu);
+ EXPECT_EQUAL(i1.cmp(i1), 0);
+ EXPECT_EQUAL(i2.cmp(i2), 0);
+ EXPECT_EQUAL(i3.cmp(i3), 0);
+ testCmp(i1, i2, i3);
+ testCmp(i1, i2, i4);
+ }
+
+ {
+ FloatResultNode i1(-1), i2(0), i3(1), notanumber(nan("")),
+ minusInf(-INFINITY), plussInf(INFINITY);
+ EXPECT_EQUAL(i1.cmp(i1), 0);
+ EXPECT_EQUAL(i2.cmp(i2), 0);
+ EXPECT_EQUAL(i3.cmp(i3), 0);
+ EXPECT_EQUAL(minusInf.cmp(minusInf), 0);
+ EXPECT_EQUAL(plussInf.cmp(plussInf), 0);
+ EXPECT_EQUAL(notanumber.cmp(notanumber), 0);
+ testCmp(i1, i2, i3);
+ testCmp(minusInf, i1, plussInf);
+ testCmp(minusInf, i2, plussInf);
+ testCmp(minusInf, i3, plussInf);
+ testCmp(notanumber, i2, i3);
+ testCmp(notanumber, i2, plussInf);
+ testCmp(notanumber, minusInf, plussInf);
+ }
+ {
+ FloatBucketResultNode
+ i1(-1, 3), i2(188000, 188500), i3(1630000, 1630500),
+ notanumber(-nan(""), nan("")), inf(-INFINITY, INFINITY);
+ EXPECT_EQUAL(i1.cmp(i1), 0);
+ EXPECT_EQUAL(i2.cmp(i2), 0);
+ EXPECT_EQUAL(notanumber.cmp(notanumber), 0);
+ EXPECT_EQUAL(inf.cmp(inf), 0);
+
+ testCmp(i1, i2, i3);
+ testCmp(inf, i1, i2);
+ testCmp(notanumber, i2, i3);
+ testCmp(notanumber, i1, i2);
+ testCmp(notanumber, inf, i1);
+ }
+}
+
+void testStreaming(const Identifiable &v) {
+ nbostream os;
+ NBOSerializer nos(os);
+ nos << v;
+ Identifiable::UP s = Identifiable::create(nos);
+ ASSERT_TRUE(s.get() != NULL);
+ ASSERT_TRUE(v.cmp(*s) == 0);
+ nbostream os2, os3;
+ NBOSerializer nos2(os2), nos3(os3);
+ nos2 << v;
+ nos3 << *s;
+
+ EXPECT_EQUAL(os2.size(), os3.size());
+ ASSERT_TRUE(os2.size() == os3.size());
+ EXPECT_EQUAL(0, memcmp(os2.c_str(), os3.c_str(), os3.size()));
+}
+
+TEST("testTimeStamp") {
+ TimeStampFunctionNode t1;
+ testStreaming(t1);
+}
+
+namespace {
+
+std::string
+getVespaChecksumV2(
+ const std::string& ymumid,
+ int fid,
+ const std::string& flags_str)
+{
+ if (fid == 6 || fid == 0 || fid == 5) {
+ return 0;
+ }
+
+ std::list<char> flags_list;
+ flags_list.clear();
+ for (unsigned int i = 0; i< flags_str.length();i++)
+ if (isalpha(flags_str[i]))
+ flags_list.push_back(flags_str[i]);
+ flags_list.sort();
+
+ std::string new_flags_str ="";
+ std::list<char>::iterator it;
+ for (it = flags_list.begin();it!=flags_list.end();it++)
+ new_flags_str += *it;
+
+ uint32_t networkFid = htonl(fid);
+
+ int length = ymumid.length()+
+ sizeof(networkFid)+
+ new_flags_str.length();
+
+ unsigned char buffer[length];
+ memset(buffer, 0x00, length);
+ memcpy(buffer, ymumid.c_str(), ymumid.length());
+ memcpy(buffer + ymumid.length(),
+ (const char*)&networkFid, sizeof(networkFid));
+ memcpy(buffer+ymumid.length()+sizeof(networkFid), new_flags_str.c_str(),
+ new_flags_str.length());
+
+ return std::string((char*)buffer, length);
+}
+} // namespace
+
+TEST("testMailChecksumExpression") {
+ document::TestDocMan testDocMan;
+
+ int folder = 32;
+ std::string flags = "RWA";
+ std::string ymumid = "barmuda";
+
+ document::Document::UP doc =
+ testDocMan.createDocument("foo", "userdoc:footype:1234:" + ymumid);
+ document::WeightedSetFieldValue
+ ws(doc->getField("byteweightedset").getDataType());
+
+ for (uint32_t i = 0; i < flags.size(); i++) {
+ ws.add(document::ByteFieldValue(flags[i]));
+ }
+ doc->setValue("headerval", document::IntFieldValue(folder));
+ doc->setValue("byteweightedset", ws);
+
+ CatFunctionNode e;
+
+ // YMUMID
+ GetDocIdNamespaceSpecificFunctionNode* ns =
+ new GetDocIdNamespaceSpecificFunctionNode(
+ ResultNode::UP(new StringResultNode));
+ e.appendArg(ExpressionNode::CP(ns));
+
+ // Folder
+ e.appendArg(DocumentFieldNode("headerval"));
+
+ // Flags
+ e.appendArg(SortFunctionNode(DocumentFieldNode("byteweightedset")));
+
+ MD5BitFunctionNode node(e, 32);
+
+ CatFunctionNode &cfn =
+ static_cast<CatFunctionNode&>(*node.expressionNodeVector()[0]);
+ MultiArgFunctionNode::ExpressionNodeVector &xe =
+ cfn.expressionNodeVector();
+
+ for (uint32_t i = 0; i < xe.size(); i++) {
+ DocumentAccessorNode* rf =
+ dynamic_cast<DocumentAccessorNode *>(xe[i].get());
+ if (rf) {
+ rf->setDocType(doc->getType());
+ rf->prepare(true);
+ rf->setDoc(*doc);
+ } else {
+ MultiArgFunctionNode * mf =
+ dynamic_cast<MultiArgFunctionNode *>(xe[i].get());
+ MultiArgFunctionNode::ExpressionNodeVector& se =
+ mf->expressionNodeVector();
+ for (uint32_t j = 0; j < se.size(); j++) {
+ DocumentAccessorNode* tf =
+ dynamic_cast<DocumentAccessorNode *>(se[j].get());
+ tf->setDocType(doc->getType());
+ tf->prepare(true);
+ tf->setDoc(*doc);
+ }
+ }
+ }
+ // SortFunctionNode & sfn = static_cast<SortFunctionNode&>(*xe[1]);
+ // sfn.prepare(false);
+ cfn.prepare(false);
+
+ cfn.execute();
+ ConstBufferRef ref =
+ static_cast<const RawResultNode &>(cfn.getResult()).get();
+
+ std::string cmp = getVespaChecksumV2(ymumid, folder, flags);
+
+ EXPECT_EQUAL(ref.size(), 14u);
+ EXPECT_EQUAL(cmp.size(), ref.size());
+
+ for (uint32_t i = 0; i < ref.size(); i++) {
+ std::cerr << i << ": " << (int)ref.c_str()[i] << "/" << (int)cmp[i]
+ << "\n";
+ }
+
+ EXPECT_TRUE(memcmp(cmp.c_str(), ref.c_str(), cmp.size()) == 0);
+
+ node.prepare(true);
+ node.execute();
+
+ ConstBufferRef ref2 =
+ static_cast<const RawResultNode &>(node.getResult()).get();
+
+ for (uint32_t i = 0; i < ref2.size(); i++) {
+ std::cerr << i << ": " << (int)ref2.c_str()[i] << "\n";
+ }
+}
+
+TEST("testDebugFunction") {
+ {
+ AddFunctionNode add;
+ add.appendArg(ConstantNode(Int64ResultNode(3)));
+ add.appendArg(ConstantNode(Int64ResultNode(4)));
+ DebugWaitFunctionNode n(add, 1.3, false);
+ n.prepare(false);
+
+ FastOS_Time time;
+ time.SetNow();
+ n.execute();
+ EXPECT_TRUE(time.MilliSecsToNow() > 1000.0);
+ EXPECT_EQUAL(static_cast<const Int64ResultNode &>(n.getResult()).get(),
+ 7);
+ }
+ {
+ AddFunctionNode add;
+ add.appendArg(ConstantNode(Int64ResultNode(3)));
+ add.appendArg(ConstantNode(Int64ResultNode(4)));
+ DebugWaitFunctionNode n(add, 1.3, true);
+ n.prepare(false);
+
+ FastOS_Time time;
+ time.SetNow();
+ n.execute();
+ EXPECT_TRUE(time.MilliSecsToNow() > 1000.0);
+ EXPECT_EQUAL(static_cast<const Int64ResultNode &>(n.getResult()).get(),
+ 7);
+ }
+}
+
+TEST("testDivExpressions") {
+ {
+ StrLenFunctionNode e(ConstantNode(Int64ResultNode(238686)));
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(static_cast<const Int64ResultNode &>(e.getResult()).get(),
+ 6);
+ }
+ {
+ NormalizeSubjectFunctionNode
+ e(ConstantNode(StringResultNode("Re: Your mail")));
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(
+ static_cast<const StringResultNode &>(e.getResult()).get(),
+ "Your mail");
+ }
+ {
+ NormalizeSubjectFunctionNode
+ e(ConstantNode(StringResultNode("Your mail")));
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(
+ static_cast<const StringResultNode &>(e.getResult()).get(),
+ "Your mail");
+ }
+ {
+ StrCatFunctionNode e(ConstantNode(Int64ResultNode(238686)));
+ e.appendArg(ConstantNode(StringResultNode("ARG 2")));
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(
+ static_cast<const StringResultNode &>(e.getResult()).get(),
+ "238686ARG 2");
+ }
+
+ {
+ ToStringFunctionNode e(ConstantNode(Int64ResultNode(238686)));
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(strcmp(static_cast<const StringResultNode &>(
+ e.getResult()).get().c_str(), "238686"), 0);
+ }
+
+ {
+ ToRawFunctionNode e(ConstantNode(Int64ResultNode(238686)));
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(strcmp(static_cast<const RawResultNode &>(
+ e.getResult()).get().c_str(), "238686"), 0);
+ }
+
+ {
+ CatFunctionNode e(ConstantNode(Int64ResultNode(238686)));
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(
+ static_cast<const RawResultNode &>(e.getResult()).get().size(),
+ 8u);
+ }
+ {
+ CatFunctionNode e(ConstantNode(Int32ResultNode(23886)));
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(
+ static_cast<const RawResultNode &>(e.getResult()).get().size(),
+ 4u);
+ }
+ {
+ const uint8_t buf[4] = { 0, 0, 0, 7 };
+ MD5BitFunctionNode
+ e(ConstantNode(RawResultNode(buf, sizeof(buf))), 16*8);
+ e.prepare(false);
+ e.execute();
+ ASSERT_TRUE(e.getResult().getClass().inherits(RawResultNode::classId));
+ const RawResultNode &
+ r(static_cast<const RawResultNode &>(e.getResult()));
+ EXPECT_EQUAL(r.get().size(), 16u);
+ }
+ {
+ const uint8_t buf[4] = { 0, 0, 0, 7 };
+ MD5BitFunctionNode
+ e(ConstantNode(RawResultNode(buf, sizeof(buf))), 2*8);
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(
+ static_cast<const RawResultNode &>(e.getResult()).get().size(),
+ 2u);
+ }
+ {
+ const uint8_t buf[4] = { 0, 0, 0, 7 };
+ XorBitFunctionNode
+ e(ConstantNode(RawResultNode(buf, sizeof(buf))), 1*8);
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(
+ static_cast<const RawResultNode &>(e.getResult()).get().size(),
+ 1u);
+ EXPECT_EQUAL(static_cast<const RawResultNode &>(
+ e.getResult()).get().c_str()[0],
+ 0x7);
+ }
+ {
+ const uint8_t buf[4] = { 6, 0, 7, 7 };
+ XorBitFunctionNode
+ e(ConstantNode(RawResultNode(buf, sizeof(buf))), 2*8);
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(
+ static_cast<const RawResultNode &>(e.getResult()).get().size(),
+ 2u);
+ EXPECT_EQUAL((int)static_cast<const RawResultNode &>(
+ e.getResult()).get().c_str()[0],
+ 0x1);
+ EXPECT_EQUAL((int)static_cast<const RawResultNode &>(
+ e.getResult()).get().c_str()[1],
+ 0x7);
+ }
+ {
+ const uint8_t wantedBuf[14] =
+ { 98, 97, 114, 109, 117, 100, 97, 0, 0, 0, 32, 65, 82, 87 };
+ const uint8_t md5facit[16] =
+ { 0x22, 0x5, 0x22, 0x1c, 0x49, 0xff, 0x90, 0x25, 0xad, 0xbf,
+ 0x4e, 0x51, 0xdb, 0xca, 0x2a, 0xc5 };
+ const uint8_t thomasBuf[22] =
+ { 0, 0, 0, 7, 98, 97, 114, 109, 117, 100, 97, 0, 0, 0, 32, 0,
+ 0, 0, 3, 65, 82, 87 };
+ const uint8_t currentBuf[26] =
+ { 0, 0, 0, 22, 0, 0, 0, 7, 98, 97, 114, 109, 117, 100, 97, 0,
+ 0, 0, 32, 0 , 0, 0, 3, 65, 82, 87 };
+
+ MD5BitFunctionNode
+ e(ConstantNode(RawResultNode(wantedBuf, sizeof(wantedBuf))), 16*8);
+ e.prepare(false);
+ e.execute();
+ ASSERT_TRUE(e.getResult().getClass().inherits(RawResultNode::classId));
+ const RawResultNode &
+ r(static_cast<const RawResultNode &>(e.getResult()));
+ EXPECT_EQUAL(r.get().size(), 16u);
+ uint8_t md5[16];
+ fastc_md5sum(currentBuf, sizeof(currentBuf), md5);
+ EXPECT_TRUE(memcmp(r.get().data(), md5, sizeof(md5)) != 0);
+ fastc_md5sum(wantedBuf, sizeof(wantedBuf), md5);
+ EXPECT_TRUE(memcmp(r.get().data(), md5, sizeof(md5)) == 0);
+ fastc_md5sum(thomasBuf, sizeof(thomasBuf), md5);
+ EXPECT_TRUE(memcmp(r.get().data(), md5, sizeof(md5)) != 0);
+
+ MD5BitFunctionNode
+ finalCheck(
+ CatFunctionNode(ConstantNode(StringResultNode("barmuda")))
+ .appendArg(ConstantNode(Int32ResultNode(32)))
+ .appendArg(SortFunctionNode(
+ ConstantNode(Int8ResultNodeVector()
+ .push_back(Int8ResultNode(87))
+ .push_back(Int8ResultNode(65))
+ .push_back(Int8ResultNode(82))
+ )
+ )
+ ), 32);
+ finalCheck.prepare(false);
+ finalCheck.execute();
+ const RawResultNode &
+ rr(static_cast<const RawResultNode &>(finalCheck.getResult()));
+ EXPECT_EQUAL(rr.get().size(), 4u);
+ fastc_md5sum(wantedBuf, sizeof(wantedBuf), md5);
+ EXPECT_TRUE(memcmp(md5facit, md5, sizeof(md5)) == 0);
+ EXPECT_TRUE(memcmp(rr.get().data(), md5, rr.get().size()) == 0);
+ }
+ {
+ CatFunctionNode e(ConstantNode(Int16ResultNode(23886)));
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(
+ static_cast<const RawResultNode &>(e.getResult()).get().size(),
+ 2u);
+ }
+ {
+ CatFunctionNode
+ e(ConstantNode(Int8ResultNodeVector().push_back(Int8ResultNode(86))
+ .push_back(Int8ResultNode(14))));
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(
+ static_cast<const RawResultNode &>(e.getResult()).get().size(),
+ 1*2u);
+ }
+ {
+ CatFunctionNode
+ e(ConstantNode(Int32ResultNodeVector()
+ .push_back(Int32ResultNode(238686))
+ .push_back(Int32ResultNode(2133214))));
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(
+ static_cast<const RawResultNode &>(e.getResult()).get().size(),
+ 4*2u);
+ }
+ {
+ NumElemFunctionNode e(ConstantNode(Int64ResultNode(238686)));
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(e.getResult().getInteger(), 1);
+ }
+ {
+ NumElemFunctionNode
+ e(ConstantNode(Int32ResultNodeVector()
+ .push_back(Int32ResultNode(238686))
+ .push_back(Int32ResultNode(2133214))));
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(e.getResult().getInteger(), 2);
+ }
+ {
+ NumElemFunctionNode
+ e(ConstantNode(Int32ResultNodeVector()
+ .push_back(Int32ResultNode(238686))
+ .push_back(Int32ResultNode(2133214))));
+ e.prepare(false);
+ e.execute();
+ EXPECT_EQUAL(e.getResult().getInteger(), 2);
+ }
+}
+
+bool test1MultivalueExpression(const MultiArgFunctionNode &exprConst,
+ const ExpressionNode::CP &mv,
+ const ResultNode & expected) {
+ MultiArgFunctionNode & expr(const_cast<MultiArgFunctionNode &>(exprConst));
+ expr.appendArg(mv);
+ expr.prepare(false);
+ bool ok = EXPECT_TRUE(expr.execute()) &&
+ EXPECT_EQUAL(0, expr.getResult().cmp(expected));
+ if (!ok) {
+ std::cerr << "Expected:" << expected.asString() << std::endl
+ << "Got: " << expr.getResult().asString() << std::endl;
+ }
+ return ok;
+}
+
+bool test1MultivalueExpressionException(const MultiArgFunctionNode & exprConst,
+ const ExpressionNode::CP & mv,
+ const char * expected) {
+ try {
+ test1MultivalueExpression(exprConst, mv, NullResultNode());
+ return EXPECT_TRUE(false);
+ } catch (std::runtime_error & e) {
+ return EXPECT_TRUE(std::string(e.what()).find(expected)
+ != std::string::npos);
+ }
+}
+
+TEST("testMultivalueExpression") {
+ IntegerResultNodeVector iv;
+ iv.push_back(Int64ResultNode(7))
+ .push_back(Int64ResultNode(17)).push_back(Int64ResultNode(117));
+ ExpressionNode::CP mv(new ConstantNode(iv));
+
+ EXPECT_TRUE(test1MultivalueExpression(AddFunctionNode(), mv,
+ Int64ResultNode(7 + 17 + 117)));
+ EXPECT_TRUE(test1MultivalueExpression(MultiplyFunctionNode(), mv,
+ Int64ResultNode(7 * 17 * 117)));
+ EXPECT_TRUE(test1MultivalueExpressionException(DivideFunctionNode(), mv,
+ "DivideFunctionNode"));
+ EXPECT_TRUE(test1MultivalueExpressionException(ModuloFunctionNode(), mv,
+ "ModuloFunctionNode"));
+ EXPECT_TRUE(test1MultivalueExpression(MinFunctionNode(), mv,
+ Int64ResultNode(7)));
+ EXPECT_TRUE(test1MultivalueExpression(MaxFunctionNode(), mv,
+ Int64ResultNode(117)));
+
+ EXPECT_TRUE(
+ test1MultivalueExpression(
+ FixedWidthBucketFunctionNode()
+ .setWidth(Int64ResultNode(1)), mv,
+ IntegerBucketResultNodeVector()
+ .push_back(IntegerBucketResultNode(7,8))
+ .push_back(IntegerBucketResultNode(17,18))
+ .push_back(IntegerBucketResultNode(117,118))));
+
+ EXPECT_TRUE(
+ test1MultivalueExpression(
+ RangeBucketPreDefFunctionNode()
+ .setBucketList(
+ IntegerBucketResultNodeVector()
+ .push_back(IntegerBucketResultNode(0,10))
+ .push_back(IntegerBucketResultNode(20,30))
+ .push_back(IntegerBucketResultNode(100,120))),
+ mv,
+ IntegerBucketResultNodeVector()
+ .push_back(IntegerBucketResultNode(0,10))
+ .push_back(IntegerBucketResultNode(0,0))
+ .push_back(IntegerBucketResultNode(100,120))));
+
+ EXPECT_TRUE(
+ test1MultivalueExpression(
+ TimeStampFunctionNode()
+ .setTimePart(TimeStampFunctionNode::Second), mv,
+ IntegerResultNodeVector()
+ .push_back(Int64ResultNode(7))
+ .push_back(Int64ResultNode(17))
+ .push_back(Int64ResultNode(117%60))));
+
+ EXPECT_TRUE(
+ test1MultivalueExpression(NegateFunctionNode(), mv,
+ IntegerResultNodeVector()
+ .push_back(Int64ResultNode(-7))
+ .push_back(Int64ResultNode(-17))
+ .push_back(Int64ResultNode(-117))));
+ EXPECT_TRUE(test1MultivalueExpression(SortFunctionNode(), mv,
+ IntegerResultNodeVector()
+ .push_back(Int64ResultNode(7))
+ .push_back(Int64ResultNode(17))
+ .push_back(Int64ResultNode(117))));
+ EXPECT_TRUE(test1MultivalueExpression(ReverseFunctionNode(), mv,
+ IntegerResultNodeVector()
+ .push_back(Int64ResultNode(117))
+ .push_back(Int64ResultNode(17))
+ .push_back(Int64ResultNode(7))));
+ EXPECT_TRUE(test1MultivalueExpression(SortFunctionNode(),
+ ReverseFunctionNode(mv),
+ IntegerResultNodeVector()
+ .push_back(Int64ResultNode(7))
+ .push_back(Int64ResultNode(17))
+ .push_back(Int64ResultNode(117))));
+ EXPECT_TRUE(test1MultivalueExpression(AndFunctionNode(), mv,
+ Int64ResultNode(7 & 17 & 117)));
+ EXPECT_TRUE(test1MultivalueExpression(OrFunctionNode(), mv,
+ Int64ResultNode(7 | 17 | 117)));
+ EXPECT_TRUE(test1MultivalueExpression(XorFunctionNode(), mv,
+ Int64ResultNode(7 ^ 17 ^ 117)));
+}
+
+TEST("testArithmeticNodes") {
+ AttributeGuard attr1 = createInt64Attribute();
+ ExpressionNode::CP i1(new ConstantNode(new Int64ResultNode(1)));
+ ExpressionNode::CP i2(new ConstantNode(new Int64ResultNode(2)));
+ ExpressionNode::CP f1(new ConstantNode(new FloatResultNode(1.1)));
+ ExpressionNode::CP f2(new ConstantNode(new FloatResultNode(9.9)));
+ ExpressionNode::CP s1(new ConstantNode(new StringResultNode("1")));
+ ExpressionNode::CP s2(new ConstantNode(new StringResultNode("2")));
+ ExpressionNode::CP r1(new ConstantNode(new RawResultNode("1", 1)));
+ ExpressionNode::CP r2(new ConstantNode(new RawResultNode("2", 1)));
+ ExpressionNode::CP a1(new AttributeNode(*attr1));
+ ExpressionNode::CP a2(new AttributeNode(*attr1));
+ AddFunctionNode add1;
+ add1.appendArg(i1);
+ add1.appendArg(i2);
+ ExpressionTree et(add1);
+
+ ExpressionTree::Configure treeConf;
+ et.select(treeConf, treeConf);
+
+ EXPECT_TRUE(
+ et.getResult().getClass().inherits(IntegerResultNode::classId));
+ EXPECT_TRUE(et.ExpressionNode::execute());
+ EXPECT_EQUAL(et.getResult().getInteger(), 3);
+ EXPECT_TRUE(et.ExpressionNode::execute());
+ EXPECT_EQUAL(et.getResult().getInteger(), 3);
+ AddFunctionNode add2;
+ add2.appendArg(i1);
+ add2.appendArg(f2);
+ add2.prepare(false);
+ EXPECT_TRUE(
+ add2.getResult().getClass().inherits(FloatResultNode::classId));
+ AddFunctionNode add3;
+ add3.appendArg(i1);
+ add3.appendArg(s2);
+ add3.prepare(false);
+ EXPECT_TRUE(
+ add3.getResult().getClass().inherits(IntegerResultNode::classId));
+ AddFunctionNode add4;
+ add4.appendArg(i1);
+ add4.appendArg(r2);
+ add4.prepare(false);
+ EXPECT_TRUE(
+ add4.getResult().getClass().inherits(IntegerResultNode::classId));
+ AddFunctionNode add5;
+ add5.appendArg(i1);
+ add5.appendArg(a1);
+ add5.prepare(false);
+ EXPECT_TRUE(
+ add5.getResult().getClass().inherits(IntegerResultNode::classId));
+ AddFunctionNode add6;
+ add6.appendArg(f1);
+ add6.appendArg(a1);
+ add6.prepare(false);
+ EXPECT_TRUE(
+ add6.getResult().getClass().inherits(FloatResultNode::classId));
+}
+
+void testArith(MultiArgFunctionNode &op, const ExpressionNode::CP &arg1,
+ const ExpressionNode::CP & arg2, int64_t intResult,
+ double floatResult) {
+ op.appendArg(arg1);
+ op.appendArg(arg2);
+ op.prepare(false);
+ op.execute();
+ EXPECT_EQUAL(intResult, op.getResult().getInteger());
+ ASSERT_TRUE(intResult == op.getResult().getInteger());
+ EXPECT_EQUAL(floatResult, op.getResult().getFloat());
+}
+
+void testArith2(MultiArgFunctionNode &op, int64_t intResult,
+ double floatResult) {
+ op.prepare(false);
+ op.execute();
+ EXPECT_EQUAL(intResult, op.getResult().getInteger());
+ ASSERT_TRUE(intResult == op.getResult().getInteger());
+ EXPECT_EQUAL(floatResult, op.getResult().getFloat());
+}
+
+void testAdd(const ExpressionNode::CP &arg1,
+ const ExpressionNode::CP &arg2,
+ int64_t intResult, double floatResult){
+ AddFunctionNode add;
+ testArith(add, arg1, arg2, intResult, floatResult);
+}
+
+void testMultiply(const ExpressionNode::CP & arg1,
+ const ExpressionNode::CP & arg2,
+ int64_t intResult, double floatResult) {
+ MultiplyFunctionNode add;
+ testArith(add, arg1, arg2, intResult, floatResult);
+}
+
+void testDivide(const ExpressionNode::CP & arg1,
+ const ExpressionNode::CP & arg2,
+ int64_t intResult, double floatResult) {
+ DivideFunctionNode add;
+ testArith(add, arg1, arg2, intResult, floatResult);
+}
+
+void testModulo(const ExpressionNode::CP & arg1,
+ const ExpressionNode::CP & arg2,
+ int64_t intResult, double floatResult) {
+ ModuloFunctionNode add;
+ testArith(add, arg1, arg2, intResult, floatResult);
+}
+
+void testArithmeticArguments(NumericFunctionNode &function,
+ std::vector<double> & arg1,
+ std::vector<double> & arg2,
+ const std::vector<double> & result,
+ double flattenResult) {
+ ExpressionNode::CP scalarInt1(new ConstantNode(new Int64ResultNode(
+ static_cast<int64_t>(arg1[0]))));
+ ExpressionNode::CP scalarInt2(new ConstantNode(new Int64ResultNode(
+ static_cast<int64_t>(arg2[0]))));
+ ExpressionNode::CP scalarFloat1(new ConstantNode(new FloatResultNode(
+ arg1[0])));
+ ExpressionNode::CP scalarFloat2(new ConstantNode(new FloatResultNode(
+ arg2[0])));
+
+ IntegerResultNodeVector iv1;
+ for (size_t i(0), m(arg1.size()); i<m; i++) {
+ iv1.push_back(Int64ResultNode(static_cast<int64_t>(arg1[i])));
+ }
+ IntegerResultNodeVector iv2;
+ for (size_t i(0), m(arg2.size()); i<m; i++) {
+ iv2.push_back(Int64ResultNode(static_cast<int64_t>(arg2[i])));
+ }
+ FloatResultNodeVector fv1;
+ for (size_t i(0), m(arg1.size()); i<m; i++) {
+ fv1.push_back(FloatResultNode(arg1[i]));
+ }
+ FloatResultNodeVector fv2;
+ for (size_t i(0), m(arg2.size()); i<m; i++) {
+ fv2.push_back(FloatResultNode(arg2[i]));
+ }
+ IntegerResultNodeVector ir;
+ for (size_t i(0), m(result.size()); i<m; i++) {
+ ir.push_back(Int64ResultNode((int64_t)result[i]));
+ }
+ FloatResultNodeVector fr;
+ for (size_t i(0), m(result.size()); i<m; i++) {
+ fr.push_back(FloatResultNode(result[i]));
+ }
+ ExpressionNode::CP vectorInt1(new ConstantNode(iv1));
+ ExpressionNode::CP vectorInt2(new ConstantNode(iv2));
+ ExpressionNode::CP vectorFloat1(new ConstantNode(fv1));
+ ExpressionNode::CP vectorFloat2(new ConstantNode(fv2));
+ function.appendArg(scalarInt1).appendArg(scalarInt2);
+ function.prepare(false);
+ EXPECT_TRUE(
+ function.getResult().getClass().equal(Int64ResultNode::classId));
+ EXPECT_TRUE(function.execute());
+ EXPECT_EQUAL(function.getResult().getInteger(),
+ static_cast<int64_t>(result[0]));
+
+ function.reset();
+
+ function.appendArg(scalarInt1).appendArg(scalarFloat2);
+ function.prepare(false);
+ EXPECT_TRUE(
+ function.getResult().getClass().equal(FloatResultNode::classId));
+ EXPECT_TRUE(function.execute());
+ EXPECT_EQUAL(function.getResult().getFloat(), result[0]);
+
+ function.reset();
+
+ function.appendArg(scalarFloat1).appendArg(scalarInt2);
+ function.prepare(false);
+ EXPECT_TRUE(
+ function.getResult().getClass().equal(FloatResultNode::classId));
+ EXPECT_TRUE(function.execute());
+ EXPECT_EQUAL(function.getResult().getFloat(), result[0]);
+
+ function.reset();
+
+ function.appendArg(scalarFloat1).appendArg(scalarFloat2);
+ function.prepare(false);
+ EXPECT_TRUE(
+ function.getResult().getClass().equal(FloatResultNode::classId));
+ EXPECT_TRUE(function.execute());
+ EXPECT_EQUAL(function.getResult().getFloat(), result[0]);
+
+ function.reset();
+
+ function.appendArg(vectorInt1);
+ function.prepare(false);
+ EXPECT_TRUE(
+ function.getResult().getClass().equal(Int64ResultNode::classId));
+ EXPECT_TRUE(function.execute());
+ EXPECT_EQUAL(function.getResult().getInteger(),
+ static_cast<int64_t>(flattenResult));
+
+ function.reset();
+
+ function.appendArg(vectorFloat1);
+ function.prepare(false);
+ EXPECT_TRUE(
+ function.getResult().getClass().equal(FloatResultNode::classId));
+ EXPECT_TRUE(function.execute());
+ EXPECT_EQUAL(function.getResult().getFloat(), flattenResult);
+
+ function.reset();
+
+ function.appendArg(vectorInt1).appendArg(vectorInt2);
+ function.prepare(false);
+ EXPECT_TRUE(function.getResult().getClass()
+ .equal(IntegerResultNodeVector::classId));
+ EXPECT_TRUE(function.execute());
+ EXPECT_TRUE(function.getResult().getClass()
+ .equal(IntegerResultNodeVector::classId));
+ EXPECT_EQUAL(static_cast<const IntegerResultNodeVector &>(
+ function.getResult()).size(), 7u);
+ EXPECT_EQUAL(0, function.getResult().cmp(ir));
+
+ function.reset();
+
+ function.appendArg(vectorFloat1).appendArg(vectorFloat2);
+ function.prepare(false);
+ EXPECT_TRUE(function.getResult().getClass()
+ .equal(FloatResultNodeVector::classId));
+ EXPECT_TRUE(function.execute());
+ EXPECT_TRUE(function.getResult().getClass()
+ .equal(FloatResultNodeVector::classId));
+ EXPECT_EQUAL(static_cast<const FloatResultNodeVector &>(
+ function.getResult()).size(), 7u);
+ EXPECT_EQUAL(0, function.getResult().cmp(fr));
+
+ function.reset();
+
+ function.appendArg(vectorInt1).appendArg(vectorFloat2);
+ function.prepare(false);
+ EXPECT_TRUE(function.getResult().getClass()
+ .equal(FloatResultNodeVector::classId));
+ EXPECT_TRUE(function.execute());
+ EXPECT_TRUE(function.getResult().getClass()
+ .equal(FloatResultNodeVector::classId));
+ EXPECT_EQUAL(static_cast<const FloatResultNodeVector &>(
+ function.getResult()).size(), 7u);
+ EXPECT_EQUAL(0, function.getResult().cmp(fr));
+
+ function.reset();
+
+ function.appendArg(vectorFloat1).appendArg(vectorInt2);
+ function.prepare(false);
+ EXPECT_TRUE(function.getResult().getClass()
+ .equal(FloatResultNodeVector::classId));
+ EXPECT_TRUE(function.execute());
+ EXPECT_TRUE(function.getResult().getClass()
+ .equal(FloatResultNodeVector::classId));
+ EXPECT_EQUAL(static_cast<const FloatResultNodeVector &>(
+ function.getResult()).size(), 7u);
+ EXPECT_EQUAL(0, function.getResult().cmp(fr));
+}
+
+TEST("testArithmeticOperations") {
+ ExpressionNode::CP i1(new ConstantNode(new Int64ResultNode(1793253241)));
+ ExpressionNode::CP i2(new ConstantNode(new Int64ResultNode(1676521321)));
+ ExpressionNode::CP f1(new ConstantNode(new FloatResultNode(1.1109876)));
+ ExpressionNode::CP f2(new ConstantNode(new FloatResultNode(9.767681239)));
+ testAdd(i1, i2, 3469774562ull, 3469774562ull);
+ testAdd(i1, f2, 1793253251ull, 1793253250.767681239);
+ testAdd(f1, f2, 11, 10.878668839 );
+ testMultiply(i1, i2, 3006427292488851361ull, 3006427292488851361ull);
+ testMultiply(i1, f2, 17515926039ull, 1793253241.0*9.767681239);
+ testMultiply(f1, f2, 11, 10.8517727372816364 );
+
+ std::vector<double> a(5), b(7);
+ a[0] = b[0] = 1;
+ a[1] = b[1] = 2;
+ a[2] = b[2] = 3;
+ a[3] = b[3] = 4;
+ a[4] = b[4] = 5;
+ b[5] = 6;
+ b[6] = 7;
+ std::vector<double> r(7);
+ {
+ r[0] = a[0] + b[0];
+ r[1] = a[1] + b[1];
+ r[2] = a[2] + b[2];
+ r[3] = a[3] + b[3];
+ r[4] = a[4] + b[4];
+ r[5] = a[0] + b[5];
+ r[6] = a[1] + b[6];
+ AddFunctionNode f;
+ testArithmeticArguments(f, a, b, r, a[0]+a[1]+a[2]+a[3]+a[4]);
+ }
+ {
+ r[0] = a[0] * b[0];
+ r[1] = a[1] * b[1];
+ r[2] = a[2] * b[2];
+ r[3] = a[3] * b[3];
+ r[4] = a[4] * b[4];
+ r[5] = a[0] * b[5];
+ r[6] = a[1] * b[6];
+ MultiplyFunctionNode f;
+ testArithmeticArguments(f, a, b, r, a[0]*a[1]*a[2]*a[3]*a[4]);
+ }
+}
+
+TEST("testAggregatorsInExpressions") {
+ CountAggregationResult *c = new CountAggregationResult();
+ c->setCount(3);
+ SumAggregationResult *s = new SumAggregationResult();
+ ResultNode::CP r1(new Int64ResultNode(7)),
+ r2(new Int64ResultNode(22));
+ ExpressionNode::CP i1(new ConstantNode(new Int64ResultNode(7))),
+ i2(c),
+ i3(s),
+ i4(new ConstantNode(new Int64ResultNode(22)));
+ AggregationResult::Configure conf;
+ s->setExpression(i4).select(conf, conf);
+ s->aggregate(0, 0);
+
+ testAdd(i1, i2, 10, 10);
+ testMultiply(i1, i2, 21, 21);
+ testMultiply(i2, i3, 66, 66);
+ testDivide(i3, i2, 7, 7);
+ testDivide(i3, i1, 3, 3);
+ testModulo(i3, i2, 1, 1);
+ testModulo(i3, i1, 1, 1);
+
+ MinAggregationResult *min = new MinAggregationResult();
+ min->setResult(r2);
+ ExpressionNode::CP imin(min);
+ testAdd(imin, i1, 29, 29);
+
+ MaxAggregationResult *max = new MaxAggregationResult();
+ max->setResult(r1);
+ ExpressionNode::CP imax(max);
+ testAdd(imin, imax, 29, 29);
+
+ XorAggregationResult *x = new XorAggregationResult();
+ x->setExpression(i4).select(conf, conf);
+ x->aggregate(0, 0);
+ ExpressionNode::CP ix(x);
+ testAdd(ix, i1, 29, 29);
+
+ AverageAggregationResult *avg = new AverageAggregationResult();
+ avg->setExpression(i4).select(conf, conf);
+ avg->aggregate(0, 0);
+ ExpressionNode::CP iavg(avg);
+ testAdd(iavg, i1, 29, 29);
+}
+
+void testAggregationResult(AggregationResult & aggr, const AggrGetter & g,
+ const ResultNode & v, const ResultNode & i,
+ const ResultNode & m, const ResultNode & s) {
+ ExpressionNode::CP scalarInt1(new ConstantNode(v));
+ AggregationResult::Configure conf;
+ aggr.setExpression(scalarInt1).select(conf, conf);
+ EXPECT_TRUE(g(aggr).getClass().equal(i.getClass().id()));
+ EXPECT_EQUAL(0, i.cmp(g(aggr)));
+ aggr.aggregate(0,0);
+ EXPECT_TRUE(g(aggr).getClass().equal(i.getClass().id()));
+ EXPECT_EQUAL(0, m.cmp(g(aggr)));
+ aggr.aggregate(1,0);
+ EXPECT_TRUE(g(aggr).getClass().equal(i.getClass().id()));
+ EXPECT_EQUAL(0, s.cmp(g(aggr)));
+}
+
+TEST("testAggregationResults") {
+ struct SumGetter : AggrGetter {
+ virtual const ResultNode &operator()(const AggregationResult & r) const
+ { return static_cast<const SumAggregationResult &>(r).getSum(); }
+ };
+ SumAggregationResult sum;
+ testAggregationResult(sum, SumGetter(), Int64ResultNode(7),
+ Int64ResultNode(0), Int64ResultNode(7),
+ Int64ResultNode(14));
+ testAggregationResult(sum, SumGetter(), FloatResultNode(7.77),
+ FloatResultNode(0), FloatResultNode(7.77),
+ FloatResultNode(15.54));
+ IntegerResultNodeVector v;
+ v.push_back(Int64ResultNode(7)).push_back(Int64ResultNode(8));
+ testAggregationResult(sum, SumGetter(), v, Int64ResultNode(0),
+ Int64ResultNode(15), Int64ResultNode(30));
+ testAggregationResult(sum, SumGetter(), FloatResultNode(7.77),
+ FloatResultNode(0), FloatResultNode(7.77),
+ FloatResultNode(15.54));
+}
+
+TEST("testGrouping") {
+ AttributeGuard attr1 = createInt64Attribute();
+ ExpressionNode::CP select1(new AttributeNode(*attr1));
+ ExpressionNode::CP result1(new CountAggregationResult());
+ (static_cast<AggregationResult &>(*result1)).setExpression(select1);
+ ExpressionNode::CP result2( new SumAggregationResult());
+ (static_cast<AggregationResult &>(*result2)).setExpression(select1);
+
+ Grouping grouping = Grouping()
+ .setFirstLevel(0)
+ .setLastLevel(1)
+ .addLevel(GroupingLevel()
+ .setExpression(select1)
+ .addResult(result1)
+ .addResult(result2));
+
+ grouping.configureStaticStuff(ConfigureStaticParams(0, 0));
+ grouping.aggregate(0u, 10u);
+ const Group::GroupList &groups = grouping.getRoot().groups();
+ EXPECT_EQUAL(grouping.getRoot().getChildrenSize(), 9u);
+ ASSERT_TRUE(groups[0]->getAggregationResult(0).getClass().id() ==
+ CountAggregationResult::classId);
+ ASSERT_TRUE(groups[0]->getAggregationResult(1).getClass().id() ==
+ SumAggregationResult::classId);
+ EXPECT_EQUAL(groups[0]->getId().getInteger(), 6u);
+ EXPECT_EQUAL(static_cast<const CountAggregationResult &>(
+ groups[0]->getAggregationResult(0)).getCount(), 1u);
+ EXPECT_EQUAL(static_cast<const SumAggregationResult &>(
+ groups[0]->getAggregationResult(1)).getSum().getInteger(),
+ 6);
+ EXPECT_EQUAL(groups[1]->getId().getInteger(), 7u);
+ EXPECT_EQUAL(static_cast<const CountAggregationResult &>(
+ groups[1]->getAggregationResult(0)).getCount(), 1u);
+ EXPECT_EQUAL(static_cast<const SumAggregationResult &>(
+ groups[1]->getAggregationResult(1)).getSum().getInteger(),
+ 7);
+ EXPECT_EQUAL(groups[2]->getId().getInteger(), 11u);
+ EXPECT_EQUAL(static_cast<const CountAggregationResult &>(
+ groups[2]->getAggregationResult(0)).getCount(), 1u);
+ EXPECT_EQUAL(static_cast<const SumAggregationResult &>(
+ groups[2]->getAggregationResult(1)).getSum().getInteger(),
+ 11);
+ EXPECT_EQUAL(groups[3]->getId().getInteger(), 13u);
+ EXPECT_EQUAL(static_cast<const CountAggregationResult &>(
+ groups[3]->getAggregationResult(0)).getCount(), 2u);
+ EXPECT_EQUAL(static_cast<const SumAggregationResult &>(
+ groups[3]->getAggregationResult(1)).getSum().getInteger(),
+ 26);
+ EXPECT_EQUAL(groups[4]->getId().getInteger(), 17u);
+ EXPECT_EQUAL(static_cast<const CountAggregationResult &>(
+ groups[4]->getAggregationResult(0)).getCount(), 1u);
+ EXPECT_EQUAL(static_cast<const SumAggregationResult &>(
+ groups[4]->getAggregationResult(1)).getSum().getInteger(),
+ 17);
+ EXPECT_EQUAL(groups[5]->getId().getInteger(), 27u);
+ EXPECT_EQUAL(static_cast<const CountAggregationResult &>(
+ groups[5]->getAggregationResult(0)).getCount(), 1u);
+ EXPECT_EQUAL(static_cast<const SumAggregationResult &>(
+ groups[5]->getAggregationResult(1)).getSum().getInteger(),
+ 27);
+ EXPECT_EQUAL(groups[6]->getId().getInteger(), 34u);
+ EXPECT_EQUAL(static_cast<const CountAggregationResult &>(
+ groups[6]->getAggregationResult(0)).getCount(), 1u);
+ EXPECT_EQUAL(static_cast<const SumAggregationResult &>(
+ groups[6]->getAggregationResult(1)).getSum().getInteger(),
+ 34);
+ EXPECT_EQUAL(groups[7]->getId().getInteger(), 67891u);
+ EXPECT_EQUAL(static_cast<const CountAggregationResult &>(
+ groups[7]->getAggregationResult(0)).getCount(), 1u);
+ EXPECT_EQUAL(static_cast<const SumAggregationResult &>(
+ groups[7]->getAggregationResult(1)).getSum().getInteger(),
+ 67891);
+ EXPECT_EQUAL(groups[8]->getId().getInteger(), 67892u);
+ EXPECT_EQUAL(static_cast<const CountAggregationResult &>(
+ groups[8]->getAggregationResult(0)).getCount(), 1u);
+ EXPECT_EQUAL(static_cast<const SumAggregationResult &>(
+ groups[8]->getAggregationResult(1)).getSum().getInteger(),
+ 67892);
+ testStreaming(grouping);
+}
+
+TEST("testGrouping2") {
+ AttributeGuard attr1 = createInt64Attribute();
+
+ RangeBucketPreDefFunctionNode *predef(
+ new RangeBucketPreDefFunctionNode(AttributeNode(*attr1)));
+ IntegerBucketResultNodeVector prevec;
+ prevec.getVector().push_back(IntegerBucketResultNode(6,7));
+ prevec.getVector().push_back(IntegerBucketResultNode(7,14));
+ prevec.getVector().push_back(IntegerBucketResultNode(18,50)); //30
+ prevec.getVector()
+ .push_back(IntegerBucketResultNode(80,50000000000ull)); //30
+ predef->setBucketList(prevec);
+ ExpressionNode::CP select1(predef);
+ ExpressionNode::CP result1( new CountAggregationResult());
+ (static_cast<AggregationResult &>(*result1)).setExpression(select1);
+
+ Grouping grouping = Grouping()
+ .setFirstLevel(0)
+ .setLastLevel(1)
+ .addLevel(GroupingLevel()
+ .setExpression(select1)
+ .addResult(result1));
+
+ grouping.configureStaticStuff(ConfigureStaticParams(0, 0));
+ grouping.aggregate(0u, 10u);
+ const Group::GroupList &groups = grouping.getRoot().groups();
+ EXPECT_EQUAL(grouping.getRoot().getChildrenSize(), 5u);
+ ASSERT_TRUE(groups[0]->getAggregationResult(0).getClass().id()
+ == CountAggregationResult::classId);
+ EXPECT_EQUAL(groups[0]->getId().getInteger(), 0u);
+ EXPECT_EQUAL(static_cast<const CountAggregationResult &>(
+ groups[0]->getAggregationResult(0)).getCount(), 1u);
+ EXPECT_EQUAL(groups[1]->getId().getInteger(), 0u);
+ EXPECT_EQUAL(static_cast<const CountAggregationResult &>(
+ groups[1]->getAggregationResult(0)).getCount(), 1u);
+ EXPECT_EQUAL(groups[2]->getId().getInteger(), 0u);
+ EXPECT_EQUAL(static_cast<const CountAggregationResult &>(
+ groups[2]->getAggregationResult(0)).getCount(), 4u);
+ EXPECT_EQUAL(groups[3]->getId().getInteger(), 0u);
+ EXPECT_EQUAL(static_cast<const CountAggregationResult &>(
+ groups[3]->getAggregationResult(0)).getCount(), 2u);
+ EXPECT_EQUAL(groups[4]->getId().getInteger(), 0u);
+ EXPECT_EQUAL(static_cast<const CountAggregationResult &>(
+ groups[4]->getAggregationResult(0)).getCount(), 2u);
+ testStreaming(grouping);
+}
+
+AttributeGuard createInt64Attribute() {
+ SingleInt64ExtAttribute *selectAttr1(
+ new SingleInt64ExtAttribute("selectAttr1"));
+ DocId docId(0);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(7);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(6);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(13);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(11);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(27);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(17);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(13);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(34);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(67891);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(67892);
+
+ AttributeVector::SP spSelectAttr1(selectAttr1);
+ AttributeGuard attr1( spSelectAttr1 );
+ return attr1;
+}
+
+AttributeGuard createInt32Attribute() {
+ SingleInt32ExtAttribute *selectAttr1(
+ new SingleInt32ExtAttribute("selectAttr1"));
+ DocId docId(0);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(7);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(6);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(13);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(11);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(27);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(17);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(13);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(34);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(67891);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(67892);
+
+ AttributeVector::SP spSelectAttr1(selectAttr1);
+ AttributeGuard attr1( spSelectAttr1 );
+ return attr1;
+}
+
+AttributeGuard createInt16Attribute() {
+ SingleInt16ExtAttribute *selectAttr1(
+ new SingleInt16ExtAttribute("selectAttr1"));
+ DocId docId(0);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(7);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(6);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(13);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(11);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(27);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(17);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(13);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(34);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(67891);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(67892);
+
+ AttributeVector::SP spSelectAttr1(selectAttr1);
+ AttributeGuard attr1( spSelectAttr1 );
+ return attr1;
+}
+
+AttributeGuard createInt8Attribute() {
+ SingleInt8ExtAttribute *selectAttr1(
+ new SingleInt8ExtAttribute("selectAttr1"));
+ DocId docId(0);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(7);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(6);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(13);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(11);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(27);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(17);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(13);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(34);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(67891);
+ selectAttr1->addDoc(docId);
+ selectAttr1->add(67892);
+
+ AttributeVector::SP spSelectAttr1(selectAttr1);
+ AttributeGuard attr1( spSelectAttr1 );
+ return attr1;
+}
+
+TEST("testIntegerTypes") {
+ EXPECT_EQUAL(AttributeNode(*createInt8Attribute()).prepare(false)
+ .getResult().getClass().id(),
+ uint32_t(Int64ResultNode::classId));
+ EXPECT_EQUAL(AttributeNode(*createInt8Attribute())
+ .prepare(true).getResult().getClass().id(),
+ uint32_t(Int8ResultNode::classId));
+ EXPECT_EQUAL(AttributeNode(*createInt16Attribute())
+ .prepare(false).getResult().getClass().id(),
+ uint32_t(Int64ResultNode::classId));
+ EXPECT_EQUAL(AttributeNode(*createInt16Attribute())
+ .prepare(true).getResult().getClass().id(),
+ uint32_t(Int16ResultNode::classId));
+ EXPECT_EQUAL(AttributeNode(*createInt32Attribute())
+ .prepare(false).getResult().getClass().id(),
+ uint32_t(Int64ResultNode::classId));
+ EXPECT_EQUAL(AttributeNode(*createInt32Attribute())
+ .prepare(true).getResult().getClass().id(),
+ uint32_t(Int32ResultNode::classId));
+ EXPECT_EQUAL(AttributeNode(*createInt64Attribute())
+ .prepare(false).getResult().getClass().id(),
+ uint32_t(Int64ResultNode::classId));
+ EXPECT_EQUAL(AttributeNode(*createInt64Attribute())
+ .prepare(true).getResult().getClass().id(),
+ uint32_t(Int64ResultNode::classId));
+
+ EXPECT_EQUAL(
+ AttributeNode(*AttributeGuard(AttributeVector::SP(
+ new MultiInt8ExtAttribute("test"))))
+ .prepare(false).getResult().getClass().id(),
+ uint32_t(Int64ResultNodeVector::classId));
+ EXPECT_EQUAL(
+ AttributeNode(*AttributeGuard(AttributeVector::SP(
+ new MultiInt8ExtAttribute("test"))))
+ .prepare(true).getResult().getClass().id(),
+ uint32_t(Int8ResultNodeVector::classId));
+ EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP(
+ new MultiInt16ExtAttribute("test"))))
+ .prepare(false).getResult().getClass().id(),
+ uint32_t(Int64ResultNodeVector::classId));
+ EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP(
+ new MultiInt16ExtAttribute("test"))))
+ .prepare(true).getResult().getClass().id(),
+ uint32_t(Int16ResultNodeVector::classId));
+ EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP(
+ new MultiInt32ExtAttribute("test"))))
+ .prepare(false).getResult().getClass().id(),
+ uint32_t(Int64ResultNodeVector::classId));
+ EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP(
+ new MultiInt32ExtAttribute("test"))))
+ .prepare(true).getResult().getClass().id(),
+ uint32_t(Int32ResultNodeVector::classId));
+ EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP(
+ new MultiInt64ExtAttribute("test"))))
+ .prepare(false).getResult().getClass().id(),
+ uint32_t(Int64ResultNodeVector::classId));
+ EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP(
+ new MultiInt64ExtAttribute("test"))))
+ .prepare(true).getResult().getClass().id(),
+ uint32_t(Int64ResultNodeVector::classId));
+}
+
+TEST("testStreamingAll") {
+ testStreaming(Int64ResultNode(89));
+ testStreaming(FloatResultNode(89.765));
+ testStreaming(StringResultNode("Tester StringResultNode streaming"));
+ testStreaming(RawResultNode("Tester RawResultNode streaming", 30));
+ testStreaming(CountAggregationResult());
+ testStreaming(ExpressionCountAggregationResult());
+ testStreaming(SumAggregationResult());
+ testStreaming(MinAggregationResult());
+ testStreaming(MaxAggregationResult());
+ testStreaming(AverageAggregationResult());
+ testStreaming(Group());
+ testStreaming(Grouping());
+ testStreaming(HitsAggregationResult());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/alignment/.gitignore b/searchlib/src/tests/alignment/.gitignore
new file mode 100644
index 00000000000..9668e4fc02c
--- /dev/null
+++ b/searchlib/src/tests/alignment/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+alignment_test
+searchlib_alignment_test_app
diff --git a/searchlib/src/tests/alignment/CMakeLists.txt b/searchlib/src/tests/alignment/CMakeLists.txt
new file mode 100644
index 00000000000..3695c600f9b
--- /dev/null
+++ b/searchlib/src/tests/alignment/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_alignment_test_app
+ SOURCES
+ alignment.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_alignment_test_app COMMAND searchlib_alignment_test_app)
diff --git a/searchlib/src/tests/alignment/DESC b/searchlib/src/tests/alignment/DESC
new file mode 100644
index 00000000000..a37dbbc1c7a
--- /dev/null
+++ b/searchlib/src/tests/alignment/DESC
@@ -0,0 +1 @@
+alignment test. Take a look at alignment.cpp for details.
diff --git a/searchlib/src/tests/alignment/FILES b/searchlib/src/tests/alignment/FILES
new file mode 100644
index 00000000000..067828da485
--- /dev/null
+++ b/searchlib/src/tests/alignment/FILES
@@ -0,0 +1 @@
+alignment.cpp
diff --git a/searchlib/src/tests/alignment/alignment.cpp b/searchlib/src/tests/alignment/alignment.cpp
new file mode 100644
index 00000000000..882e0942976
--- /dev/null
+++ b/searchlib/src/tests/alignment/alignment.cpp
@@ -0,0 +1,68 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("alignment_test");
+
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+struct Timer {
+ rusage usage;
+ void start() {
+ getrusage(RUSAGE_SELF, &usage);
+ }
+ double stop() {
+ rusage tmp;
+ getrusage(RUSAGE_SELF, &tmp);
+ double startMs = (((double)usage.ru_utime.tv_sec) * 1000.0)
+ + (((double)usage.ru_utime.tv_usec) / 1000.0);
+ double stopMs = (((double)tmp.ru_utime.tv_sec) * 1000.0)
+ + (((double)tmp.ru_utime.tv_usec) / 1000.0);
+ return (stopMs - startMs);
+ }
+};
+
+TEST_SETUP(Test);
+
+double
+timeAccess(void *bufp, uint32_t len, double &sum)
+{
+ double *buf = (double *)bufp;
+ Timer timer;
+ timer.start();
+ for(uint32_t i = 0; i < 512 * 1024; ++i) {
+ for (uint32_t j = 0; j < len; ++j) {
+ sum += buf[j];
+ }
+ }
+ double ret = timer.stop();
+ return ret;
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("alignment_test");
+
+ uint32_t buf[129];
+ for (uint32_t i = 0; i < 129; ++i) {
+ buf[i] = i;
+ }
+
+ uintptr_t ptr = reinterpret_cast<uintptr_t>(&buf[0]);
+ bool aligned = (ptr % sizeof(double) == 0);
+
+ double foo = 0, bar = 0;
+ printf(aligned ? "ALIGNED\n" : "UNALIGNED\n");
+ printf("warmup time = %.2f\n", timeAccess(reinterpret_cast<void*>(&buf[0]), 64, foo));
+ printf("real time = %.2f\n", timeAccess(reinterpret_cast<void*>(&buf[0]), 64, bar));
+ EXPECT_EQUAL(foo, bar);
+
+ printf(!aligned ? "ALIGNED\n" : "UNALIGNED\n");
+ printf("warmup time = %.2f\n", timeAccess(reinterpret_cast<void*>(&buf[1]), 64, foo));
+ printf("real time = %.2f\n", timeAccess(reinterpret_cast<void*>(&buf[1]), 64, bar));
+ EXPECT_EQUAL(foo, bar);
+
+ TEST_DONE();
+}
diff --git a/searchlib/src/tests/attribute/.gitignore b/searchlib/src/tests/attribute/.gitignore
new file mode 100644
index 00000000000..732912ab981
--- /dev/null
+++ b/searchlib/src/tests/attribute/.gitignore
@@ -0,0 +1,11 @@
+*.dat
+*.idx
+*.weight
+.depend
+Makefile
+attribute_test
+attributebenchmark
+searchlib_attribute_test_app
+searchlib_attributeguard_test_app
+searchlib_changevector_test_app
+searchlib_attributebenchmark_app
diff --git a/searchlib/src/tests/attribute/CMakeLists.txt b/searchlib/src/tests/attribute/CMakeLists.txt
new file mode 100644
index 00000000000..0598b5776a8
--- /dev/null
+++ b/searchlib/src/tests/attribute/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_attributeguard_test_app
+ SOURCES
+ attributeguard.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attributeguard_test_app COMMAND sh attributeguard_test.sh)
+vespa_add_executable(searchlib_attribute_test_app
+ SOURCES
+ attribute_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attribute_test_app COMMAND sh attribute_test.sh)
+vespa_add_executable(searchlib_changevector_test_app
+ SOURCES
+ changevector_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_changevector_test_app COMMAND sh changevector_test.sh)
+vespa_add_executable(searchlib_attributebenchmark_app
+ SOURCES
+ attributebenchmark.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attributebenchmark_app COMMAND searchlib_attributebenchmark_app BENCHMARK)
diff --git a/searchlib/src/tests/attribute/DESC b/searchlib/src/tests/attribute/DESC
new file mode 100644
index 00000000000..6a9215b1a3b
--- /dev/null
+++ b/searchlib/src/tests/attribute/DESC
@@ -0,0 +1 @@
+Unit tests for attribute use.
diff --git a/searchlib/src/tests/attribute/FILES b/searchlib/src/tests/attribute/FILES
new file mode 100644
index 00000000000..b742644b750
--- /dev/null
+++ b/searchlib/src/tests/attribute/FILES
@@ -0,0 +1,2 @@
+attribute.cpp
+attributebenchmark.cpp
diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp
new file mode 100644
index 00000000000..b1d4e675e23
--- /dev/null
+++ b/searchlib/src/tests/attribute/attribute_test.cpp
@@ -0,0 +1,2200 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/document/fieldvalue/intfieldvalue.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributefile.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributememorysavetarget.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/attribute/attrvector.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/util/randomgenerator.h>
+#include <vespa/vespalib/io/fileutil.h>
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+
+#include <vespa/log/log.h>
+LOG_SETUP("attribute_test");
+
+
+using namespace document;
+using std::shared_ptr;
+using search::common::FileHeaderContext;
+using search::index::DummyFileHeaderContext;
+using search::attribute::BasicType;
+using search::attribute::IAttributeVector;
+
+namespace
+{
+
+
+vespalib::string empty;
+vespalib::string clstmp("clstmp");
+vespalib::string asuDir("asutmp");
+
+bool
+isUnsignedSmallIntAttribute(const BasicType::Type &type)
+{
+ switch (type)
+ {
+ case BasicType::UINT1:
+ case BasicType::UINT2:
+ case BasicType::UINT4:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
+isUnsignedSmallIntAttribute(const AttributeVector &a)
+{
+ return isUnsignedSmallIntAttribute(a.getBasicType());
+}
+
+template <typename BufferType>
+void
+expectZero(const BufferType &b)
+{
+ EXPECT_EQUAL(0, b);
+}
+
+template <>
+void
+expectZero(const vespalib::string &b)
+{
+ EXPECT_EQUAL(empty, b);
+}
+
+uint64_t
+statSize(const vespalib::string &fileName)
+{
+ FastOS_StatInfo statInfo;
+ if (EXPECT_TRUE(FastOS_File::Stat(fileName.c_str(), &statInfo))) {
+ return statInfo._size;
+ } else {
+ return 0u;
+ }
+}
+
+uint64_t
+statSize(const AttributeVector &a)
+{
+ vespalib::string baseFileName = a.getBaseFileName();
+ uint64_t resultSize = statSize(baseFileName + ".dat");
+ if (a.hasMultiValue()) {
+ resultSize += statSize(baseFileName + ".idx");
+ }
+ if (a.hasWeightedSetType()) {
+ resultSize += statSize(baseFileName + ".weight");
+ }
+ if (a.hasEnum() && a.getEnumeratedSave()) {
+ resultSize += statSize(baseFileName + ".udat");
+ }
+ return resultSize;
+}
+
+
+bool
+preciseEstimatedSize(const AttributeVector &a)
+{
+ if (a.getBasicType() == BasicType::STRING &&
+ EXPECT_TRUE(a.hasEnum()) && !a.getEnumeratedSave()) {
+ return false; // Using average of string lens, can be somewhat off
+ }
+ return true;
+}
+
+}
+
+namespace search {
+
+using attribute::CollectionType;
+using attribute::Config;
+
+class AttributeTest : public vespalib::TestApp
+{
+private:
+ typedef AttributeVector::SP AttributePtr;
+
+ void addDocs(const AttributePtr & v, size_t sz);
+ template <typename VectorType>
+ void populate(VectorType & ptr, unsigned seed);
+ template <typename VectorType, typename BufferType>
+ void compare(VectorType & a, VectorType & b);
+
+ void testReloadInt(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs);
+ void testReloadString(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs);
+ template <typename VectorType, typename BufferType>
+ void testReload(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c);
+ void testMemorySaverInt(const AttributePtr & a, const AttributePtr & b, size_t numDocs);
+ void testMemorySaverString(const AttributePtr & a, const AttributePtr & b, size_t numDocs);
+ template <typename VectorType, typename BufferType>
+ void testMemorySaver(const AttributePtr & a, const AttributePtr & b);
+
+ void testReload();
+ void testHasLoadData();
+ void testMemorySaver();
+
+ void commit(const AttributePtr & ptr);
+
+ template <typename T>
+ void fillNumeric(std::vector<T> & values, uint32_t numValues);
+ void fillString(std::vector<vespalib::string> & values, uint32_t numValues);
+ template <typename VectorType, typename BufferType>
+ bool appendToVector(VectorType & v, uint32_t doc, uint32_t valueCount,
+ const std::vector<BufferType> & values);
+ template <typename BufferType>
+ bool checkCount(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount,
+ uint32_t numValues, const BufferType & value);
+ template <typename BufferType>
+ bool checkContent(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount,
+ uint32_t range, const std::vector<BufferType> & values);
+
+ // CollectionType::SINGLE
+ template <typename VectorType, typename BufferType, typename BaseType>
+ void testSingle(const AttributePtr & ptr, const std::vector<BufferType> & values);
+ void testSingle();
+
+ // CollectionType::ARRAY
+ template <typename BufferType>
+ void printArray(const AttributePtr & ptr);
+ template <typename VectorType, typename BufferType>
+ void testArray(const AttributePtr & ptr, const std::vector<BufferType> & values);
+ void testArray();
+
+ // CollectionType::WSET
+ template <typename BufferType>
+ void printWeightedSet(const AttributePtr & ptr);
+ template <typename VectorType, typename BufferType>
+ void testWeightedSet(const AttributePtr & ptr, const std::vector<BufferType> & values);
+ void testWeightedSet();
+ void testBaseName();
+
+ template <typename VectorType, typename BufferType>
+ void testArithmeticValueUpdate(const AttributePtr & ptr);
+ void testArithmeticValueUpdate();
+
+ template <typename VectorType, typename BaseType, typename BufferType>
+ void testArithmeticWithUndefinedValue(const AttributePtr & ptr, BaseType before, BaseType after);
+ void testArithmeticWithUndefinedValue();
+
+ template <typename VectorType, typename BufferType>
+ void testMapValueUpdate(const AttributePtr & ptr, BufferType initValue,
+ const FieldValue & initFieldValue, const FieldValue & nonExistant,
+ bool removeIfZero, bool createIfNonExistant);
+ void testMapValueUpdate();
+
+ void testStatus();
+ void testNullProtection();
+ void testGeneration(const AttributePtr & attr, bool exactStatus);
+ void testGeneration();
+
+ void
+ testCreateSerialNum(void);
+
+ template <typename VectorType, typename BufferType>
+ void
+ testCompactLidSpace(const Config &config,
+ bool fs,
+ bool es);
+
+ template <typename VectorType, typename BufferType>
+ void
+ testCompactLidSpace(const Config &config);
+
+ void
+ testCompactLidSpace(const Config &config);
+
+ void
+ testCompactLidSpace(void);
+
+ template <typename AttributeType>
+ void requireThatAddressSpaceUsageIsReported(const Config &config, bool fastSearch);
+ template <typename AttributeType>
+ void requireThatAddressSpaceUsageIsReported(const Config &config);
+ void requireThatAddressSpaceUsageIsReported();
+
+public:
+ AttributeTest() { }
+ int Main();
+};
+
+void AttributeTest::testBaseName()
+{
+ AttributeVector::BaseName v("attr1");
+ EXPECT_EQUAL(v.getAttributeName(), "attr1");
+ EXPECT_TRUE(v.getSnapshotName().empty());
+ // EXPECT_TRUE(v.getIndexName().empty());
+ EXPECT_EQUAL("", v.getIndexName());
+ EXPECT_TRUE(v.getDirName().empty());
+ v = "attribute/attr1/attr1";
+ EXPECT_EQUAL(v.getAttributeName(), "attr1");
+ EXPECT_TRUE(v.getSnapshotName().empty());
+ // EXPECT_TRUE(v.getIndexName().empty());
+ EXPECT_EQUAL("", v.getIndexName());
+ EXPECT_EQUAL(v.getDirName(), "attribute/attr1");
+ v = "attribute/attr1/snapshot-X/attr1";
+ EXPECT_EQUAL(v.getAttributeName(), "attr1");
+ EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X");
+ // EXPECT_TRUE(v.getIndexName().empty());
+ EXPECT_EQUAL("", v.getIndexName());
+ EXPECT_EQUAL(v.getDirName(), "attribute/attr1/snapshot-X");
+ v = "/attribute/attr1/snapshot-X/attr1";
+ EXPECT_EQUAL(v.getAttributeName(), "attr1");
+ EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X");
+ // EXPECT_TRUE(v.getIndexName().empty());
+ EXPECT_EQUAL("", v.getIndexName());
+ EXPECT_EQUAL(v.getDirName(), "/attribute/attr1/snapshot-X");
+ v = "index.1/1.ready/attribute/attr1/snapshot-X/attr1";
+ EXPECT_EQUAL(v.getAttributeName(), "attr1");
+ EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X");
+ EXPECT_EQUAL(v.getIndexName(), "index.1");
+ EXPECT_EQUAL(v.getDirName(), "index.1/1.ready/attribute/attr1/snapshot-X");
+ v = "/index.1/1.ready/attribute/attr1/snapshot-X/attr1";
+ EXPECT_EQUAL(v.getAttributeName(), "attr1");
+ EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X");
+ EXPECT_EQUAL(v.getIndexName(), "index.1");
+ EXPECT_EQUAL(v.getDirName(),
+ "/index.1/1.ready/attribute/attr1/snapshot-X");
+ v = "xxxyyyy/zzz/index.1/1.ready/attribute/attr1/snapshot-X/attr1";
+ EXPECT_EQUAL(v.getAttributeName(), "attr1");
+ EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X");
+ EXPECT_EQUAL(v.getIndexName(), "index.1");
+ EXPECT_EQUAL(v.getDirName(),
+ "xxxyyyy/zzz/index.1/1.ready/attribute/attr1/snapshot-X");
+}
+
+void AttributeTest::addDocs(const AttributePtr & v, size_t sz)
+{
+ if (sz) {
+ AttributeVector::DocId docId;
+ for(size_t i(0); i< sz; i++) {
+ EXPECT_TRUE( v->addDoc(docId) );
+ }
+ EXPECT_TRUE( docId+1 == sz );
+ EXPECT_TRUE( v->getNumDocs() == sz );
+ commit(v);
+ }
+}
+
+
+template <>
+void AttributeTest::populate(IntegerAttribute & v, unsigned seed)
+{
+ srand(seed);
+ int weight = 1;
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (v.hasMultiValue()) {
+ if (v.hasWeightedSetType()) {
+ weight = (rand() % 256) - 128;
+ }
+ for (size_t j(0); j <= i; j++) {
+ EXPECT_TRUE( v.append(i, rand(), weight) );
+ }
+ } else {
+ EXPECT_TRUE( v.update(i, rand()) );
+ }
+ }
+ v.commit();
+}
+
+template <>
+void AttributeTest::populate(FloatingPointAttribute & v, unsigned seed)
+{
+ srand(seed);
+ int weight = 1;
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (v.hasMultiValue()) {
+ if (v.hasWeightedSetType()) {
+ weight = (rand() % 256) - 128;
+ }
+ for (size_t j(0); j <= i; j++) {
+ EXPECT_TRUE( v.append(i, rand() * 1.25, weight) );
+ }
+ } else {
+ EXPECT_TRUE( v.update(i, rand() * 1.25) );
+ }
+ }
+ v.commit();
+}
+
+template <>
+void AttributeTest::populate(StringAttribute & v, unsigned seed)
+{
+ RandomGenerator rnd(seed);
+ int weight = 1;
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (v.hasMultiValue()) {
+ if (v.hasWeightedSetType()) {
+ weight = rnd.rand(0, 256) - 128;
+ }
+ for (size_t j(0); j <= i; j++) {
+ EXPECT_TRUE( v.append(i, rnd.getRandomString(2, 50), weight) );
+ }
+ } else {
+ EXPECT_TRUE( v.update(i, rnd.getRandomString(2, 50)) );
+ }
+ }
+ v.commit();
+}
+
+template <typename VectorType, typename BufferType>
+void AttributeTest::compare(VectorType & a, VectorType & b)
+{
+ EXPECT_EQUAL(a.getNumDocs(), b.getNumDocs());
+ ASSERT_TRUE(a.getNumDocs() == b.getNumDocs());
+ uint32_t asz(a.getMaxValueCount());
+ uint32_t bsz(b.getMaxValueCount());
+ BufferType *av = new BufferType[asz];
+ BufferType *bv = new BufferType[bsz];
+
+ for (size_t i(0), m(a.getNumDocs()); i < m; i++) {
+ ASSERT_TRUE(asz >= static_cast<uint32_t>(a.getValueCount(i)));
+ ASSERT_TRUE(bsz >= static_cast<uint32_t>(b.getValueCount(i)));
+ EXPECT_EQUAL(a.getValueCount(i), b.getValueCount(i));
+ ASSERT_TRUE(a.getValueCount(i) == b.getValueCount(i));
+ EXPECT_EQUAL(static_cast<const AttributeVector &>(a).get(i, av, asz), static_cast<uint32_t>(a.getValueCount(i)));
+ EXPECT_EQUAL(static_cast<const AttributeVector &>(b).get(i, bv, bsz), static_cast<uint32_t>(b.getValueCount(i)));
+ for(size_t j(0), k(std::min(a.getValueCount(i), b.getValueCount(i))); j < k; j++) {
+ EXPECT_TRUE(av[j] == bv[j]);
+ }
+ }
+ delete [] bv;
+ delete [] av;
+}
+
+void AttributeTest::testReloadInt(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs)
+{
+ addDocs(a, numDocs);
+ addDocs(b, numDocs);
+ populate(static_cast<IntegerAttribute &>(*a.get()), 17);
+ populate(static_cast<IntegerAttribute &>(*b.get()), 17);
+ if (a->hasWeightedSetType()) {
+ testReload<IntegerAttribute, IntegerAttribute::WeightedInt>(a, b, c);
+ } else {
+ testReload<IntegerAttribute, IntegerAttribute::largeint_t>(a, b, c);
+ }
+}
+
+
+void AttributeTest::testReloadString(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs)
+{
+ addDocs(a, numDocs);
+ addDocs(b, numDocs);
+ populate(static_cast<StringAttribute &>(*a.get()), 17);
+ populate(static_cast<StringAttribute &>(*b.get()), 17);
+ if (a->hasWeightedSetType()) {
+ testReload<StringAttribute, StringAttribute::WeightedString>(a, b, c);
+ } else {
+ testReload<StringAttribute, vespalib::string>(a, b, c);
+ }
+}
+
+template <typename VectorType, typename BufferType>
+void AttributeTest::testReload(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c)
+{
+ LOG(info, "testReload: vector '%s'", a->getName().c_str());
+
+ compare<VectorType, BufferType>
+ (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get())));
+ a->setCreateSerialNum(43u);
+ EXPECT_TRUE( a->saveAs(b->getBaseFileName()) );
+ if (preciseEstimatedSize(*a)) {
+ EXPECT_EQUAL(statSize(*b), a->getEstimatedSaveByteSize());
+ } else {
+ double estSize = a->getEstimatedSaveByteSize();
+ double actSize = statSize(*b);
+ EXPECT_LESS_EQUAL(actSize * 1.0, estSize * 1.3);
+ EXPECT_GREATER_EQUAL(actSize * 1.0, estSize * 0.7);
+ }
+ EXPECT_TRUE( a->saveAs(c->getBaseFileName()) );
+ if (preciseEstimatedSize(*a)) {
+ EXPECT_EQUAL(statSize(*c), a->getEstimatedSaveByteSize());
+ }
+ EXPECT_TRUE( b->load() );
+ EXPECT_EQUAL(43u, b->getCreateSerialNum());
+ compare<VectorType, BufferType>
+ (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get())));
+ EXPECT_TRUE( c->load() );
+ compare<VectorType, BufferType>
+ (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(c.get())));
+
+ if (isUnsignedSmallIntAttribute(*a))
+ return;
+ populate(static_cast<VectorType &>(*b.get()), 700);
+ populate(static_cast<VectorType &>(*c.get()), 700);
+ compare<VectorType, BufferType>
+ (*(static_cast<VectorType *>(b.get())), *(static_cast<VectorType *>(c.get())));
+
+ {
+ ReadAttributeFile readC(c->getBaseFileName(), c->getConfig());
+ WriteAttributeFile writeC(b->getBaseFileName(), b->getConfig(),
+ DummyFileHeaderContext(),
+ c->getNumDocs());
+ std::unique_ptr<AttributeFile::Record> record(readC.getRecord());
+ ASSERT_TRUE(record.get());
+ for (size_t i(0), m(c->getNumDocs()); i < m; i++) {
+ EXPECT_TRUE(readC.read(*record));
+ EXPECT_TRUE(writeC.write(*record));
+ }
+ EXPECT_TRUE( ! readC.read(*record));
+ }
+ EXPECT_TRUE( b->load() );
+ compare<VectorType, BufferType>
+ (*(static_cast<VectorType *>(a.get())),
+ *(static_cast<VectorType *>(b.get())));
+ {
+ ReadAttributeFile readC(c->getBaseFileName(), c->getConfig());
+ WriteAttributeFile writeC(b->getBaseFileName(), b->getConfig(),
+ DummyFileHeaderContext(),
+ c->getNumDocs());
+ readC.enableDirectIO();
+ writeC.enableDirectIO();
+ std::unique_ptr<AttributeFile::Record> record(readC.getRecord());
+ ASSERT_TRUE(record.get());
+ for (size_t i(0), m(c->getNumDocs()); i < m; i++) {
+ EXPECT_TRUE(readC.read(*record));
+ EXPECT_TRUE(writeC.write(*record));
+ }
+ EXPECT_TRUE( ! readC.read(*record));
+ }
+ EXPECT_TRUE( b->load() );
+ compare<VectorType, BufferType>
+ (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get())));
+}
+
+
+void AttributeTest::testReload()
+{
+ // IntegerAttribute
+ // CollectionType::SINGLE
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("sint32_1", Config(BasicType::INT32, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("sint32_2", Config(BasicType::INT32, CollectionType::SINGLE));
+ AttributePtr iv3 = AttributeFactory::createAttribute("sint32_3", Config(BasicType::INT32, CollectionType::SINGLE));
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("suint4_1", Config(BasicType::UINT4, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("suint4_2", Config(BasicType::UINT4, CollectionType::SINGLE));
+ AttributePtr iv3 = AttributeFactory::createAttribute("suint4_3", Config(BasicType::UINT4, CollectionType::SINGLE));
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("suint2_1", Config(BasicType::UINT2, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("suint2_2", Config(BasicType::UINT2, CollectionType::SINGLE));
+ AttributePtr iv3 = AttributeFactory::createAttribute("suint2_3", Config(BasicType::UINT2, CollectionType::SINGLE));
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("suint1_1", Config(BasicType::UINT1, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("suint1_2", Config(BasicType::UINT1, CollectionType::SINGLE));
+ AttributePtr iv3 = AttributeFactory::createAttribute("suint1_3", Config(BasicType::UINT1, CollectionType::SINGLE));
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr iv1 = AttributeFactory::createAttribute("sfsint32_1", cfg);
+ AttributePtr iv2 = AttributeFactory::createAttribute("sfsint32_2", cfg);
+ AttributePtr iv3 = AttributeFactory::createAttribute("sfsint32_3", cfg);
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ // CollectionType::ARRAY
+ {
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr iv1 = AttributeFactory::createAttribute("flag_1", cfg);
+ AttributePtr iv2 = AttributeFactory::createAttribute("flag_2", cfg);
+ AttributePtr iv3 = AttributeFactory::createAttribute("flag_3", cfg);
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("aint32_1", Config(BasicType::INT32, CollectionType::ARRAY));
+ AttributePtr iv2 = AttributeFactory::createAttribute("aint32_2", Config(BasicType::INT32, CollectionType::ARRAY));
+ AttributePtr iv3 = AttributeFactory::createAttribute("aint32_3", Config(BasicType::INT32, CollectionType::ARRAY));
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr iv1 = AttributeFactory::createAttribute("afsint32_1", cfg);
+ AttributePtr iv2 = AttributeFactory::createAttribute("afsint32_2", cfg);
+ AttributePtr iv3 = AttributeFactory::createAttribute("afsint32_3", cfg);
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ // CollectionType::WSET
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("wint32_1", Config(BasicType::INT32, CollectionType::WSET));
+ AttributePtr iv2 = AttributeFactory::createAttribute("wint32_2", Config(BasicType::INT32, CollectionType::WSET));
+ AttributePtr iv3 = AttributeFactory::createAttribute("wint32_3", Config(BasicType::INT32, CollectionType::WSET));
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ AttributePtr iv1 = AttributeFactory::createAttribute("wfsint32_1", cfg);
+ AttributePtr iv2 = AttributeFactory::createAttribute("wfsint32_2", cfg);
+ AttributePtr iv3 = AttributeFactory::createAttribute("wfsint32_3", cfg);
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+
+
+ // StringAttribute
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("sstring_1", Config(BasicType::STRING, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("sstring_2", Config(BasicType::STRING, CollectionType::SINGLE));
+ AttributePtr iv3 = AttributeFactory::createAttribute("sstring_3", Config(BasicType::STRING, CollectionType::SINGLE));
+ testReloadString(iv1, iv2, iv3, 0);
+ testReloadString(iv1, iv2, iv3, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("astring_1", Config(BasicType::STRING, CollectionType::ARRAY));
+ AttributePtr iv2 = AttributeFactory::createAttribute("astring_2", Config(BasicType::STRING, CollectionType::ARRAY));
+ AttributePtr iv3 = AttributeFactory::createAttribute("astring_3", Config(BasicType::STRING, CollectionType::ARRAY));
+ testReloadString(iv1, iv2, iv3, 0);
+ testReloadString(iv1, iv2, iv3, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("wstring_1", Config(BasicType::STRING, CollectionType::WSET));
+ AttributePtr iv2 = AttributeFactory::createAttribute("wstring_2", Config(BasicType::STRING, CollectionType::WSET));
+ AttributePtr iv3 = AttributeFactory::createAttribute("wstring_3", Config(BasicType::STRING, CollectionType::WSET));
+ testReloadString(iv1, iv2, iv3, 0);
+ testReloadString(iv1, iv2, iv3, 100);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr iv1 = AttributeFactory::createAttribute("sfsstring_1", cfg);
+ AttributePtr iv2 = AttributeFactory::createAttribute("sfsstring_2", cfg);
+ AttributePtr iv3 = AttributeFactory::createAttribute("sfsstring_3", cfg);
+ testReloadString(iv1, iv2, iv3, 0);
+ testReloadString(iv1, iv2, iv3, 100);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::ARRAY));
+ cfg.setFastSearch(true);
+ AttributePtr iv1 = AttributeFactory::createAttribute("afsstring_1", cfg);
+ AttributePtr iv2 = AttributeFactory::createAttribute("afsstring_2", cfg);
+ AttributePtr iv3 = AttributeFactory::createAttribute("afsstring_3", cfg);
+ testReloadString(iv1, iv2, iv3, 0);
+ testReloadString(iv1, iv2, iv3, 100);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::WSET));
+ cfg.setFastSearch(true);
+ AttributePtr iv1 = AttributeFactory::createAttribute("wsfsstring_1", cfg);
+ AttributePtr iv2 = AttributeFactory::createAttribute("wsfsstring_2", cfg);
+ AttributePtr iv3 = AttributeFactory::createAttribute("wsfsstring_3", cfg);
+ testReloadString(iv1, iv2, iv3, 0);
+ testReloadString(iv1, iv2, iv3, 100);
+ }
+}
+
+void AttributeTest::testHasLoadData()
+{
+ { // single value
+ AttributePtr av = AttributeFactory::createAttribute("loaddata1", Config(BasicType::INT32));
+ EXPECT_TRUE(!av->hasLoadData());
+ av->save();
+ EXPECT_TRUE(av->hasLoadData());
+ av->saveAs("loaddata2");
+ av = AttributeFactory::createAttribute("loaddata2", Config(BasicType::INT32));
+ EXPECT_TRUE(av->hasLoadData());
+ av->saveAs("loaddata3");
+ }
+ { // array
+ AttributePtr av = AttributeFactory::createAttribute("loaddata3", Config(BasicType::INT32, CollectionType::ARRAY));
+ EXPECT_TRUE(!av->hasLoadData());
+ av->save();
+ EXPECT_TRUE(av->hasLoadData());
+ av->saveAs("loaddata4");
+ av = AttributeFactory::createAttribute("loaddata4", Config(BasicType::INT32, CollectionType::ARRAY));
+ EXPECT_TRUE(av->hasLoadData());
+ av->saveAs("loaddata5");
+ }
+ { // wset
+ AttributePtr av = AttributeFactory::createAttribute("loaddata5", Config(BasicType::INT32, CollectionType::WSET));
+ EXPECT_TRUE(!av->hasLoadData());
+ av->save();
+ EXPECT_TRUE(av->hasLoadData());
+ av->saveAs("loaddata6");
+ av = AttributeFactory::createAttribute("loaddata6", Config(BasicType::INT32, CollectionType::WSET));
+ EXPECT_TRUE(av->hasLoadData());
+ }
+}
+
+void
+AttributeTest::testMemorySaverInt(const AttributePtr & a, const AttributePtr & b, size_t numDocs)
+{
+ addDocs(a, numDocs);
+ populate(static_cast<IntegerAttribute &>(*a.get()), 21);
+ if (a->hasWeightedSetType()) {
+ testMemorySaver<IntegerAttribute, IntegerAttribute::WeightedInt>(a, b);
+ } else {
+ testMemorySaver<IntegerAttribute, IntegerAttribute::largeint_t>(a, b);
+ }
+}
+
+void
+AttributeTest::testMemorySaverString(const AttributePtr & a, const AttributePtr & b, size_t numDocs)
+{
+ addDocs(a, numDocs);
+ populate(static_cast<StringAttribute &>(*a.get()), 21);
+ if (a->hasWeightedSetType()) {
+ testMemorySaver<StringAttribute, StringAttribute::WeightedString>(a, b);
+ } else {
+ testMemorySaver<StringAttribute, vespalib::string>(a, b);
+ }
+}
+
+template <typename VectorType, typename BufferType>
+void
+AttributeTest::testMemorySaver(const AttributePtr & a, const AttributePtr & b)
+{
+ LOG(info, "testMemorySaver: vector '%s'", a->getName().c_str());
+
+ AttributeMemorySaveTarget saveTarget;
+ EXPECT_TRUE(a->saveAs(b->getBaseFileName(), saveTarget));
+ FastOS_StatInfo statInfo;
+ vespalib::string datFile = vespalib::make_string("%s.dat", b->getBaseFileName().c_str());
+ EXPECT_TRUE(!FastOS_File::Stat(datFile.c_str(), &statInfo));
+ EXPECT_TRUE(saveTarget.writeToFile(TuneFileAttributes(),
+ DummyFileHeaderContext()));
+ EXPECT_TRUE(FastOS_File::Stat(datFile.c_str(), &statInfo));
+ EXPECT_TRUE(b->load());
+ compare<VectorType, BufferType>
+ (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get())));
+}
+
+void
+AttributeTest::testMemorySaver()
+{
+ // CollectionType::SINGLE
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("sint32_1ms", Config(BasicType::INT32, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("sint32_2ms", Config(BasicType::INT32, CollectionType::SINGLE));
+ testMemorySaverInt(iv1, iv2, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("suint4_1ms", Config(BasicType::UINT4, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("suint4_2ms", Config(BasicType::UINT4, CollectionType::SINGLE));
+ testMemorySaverInt(iv1, iv2, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("sstr_1ms", Config(BasicType::STRING, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("sstr_2ms", Config(BasicType::STRING, CollectionType::SINGLE));
+ testMemorySaverString(iv1, iv2, 100);
+ }
+ // CollectionType::ARRAY
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("aint32_1ms", Config(BasicType::INT32, CollectionType::ARRAY));
+ AttributePtr iv2 = AttributeFactory::createAttribute("aint32_2ms", Config(BasicType::INT32, CollectionType::ARRAY));
+ testMemorySaverInt(iv1, iv2, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("astr_1ms", Config(BasicType::STRING, CollectionType::ARRAY));
+ AttributePtr iv2 = AttributeFactory::createAttribute("astr_2ms", Config(BasicType::STRING, CollectionType::ARRAY));
+ testMemorySaverString(iv1, iv2, 100);
+ }
+ // CollectionType::WSET
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("wint32_1ms", Config(BasicType::INT32, CollectionType::WSET));
+ AttributePtr iv2 = AttributeFactory::createAttribute("wint32_2ms", Config(BasicType::INT32, CollectionType::WSET));
+ testMemorySaverInt(iv1, iv2, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("wstr_1ms", Config(BasicType::STRING, CollectionType::WSET));
+ AttributePtr iv2 = AttributeFactory::createAttribute("wstr_2ms", Config(BasicType::STRING, CollectionType::WSET));
+ testMemorySaverString(iv1, iv2, 100);
+ }
+}
+
+
+template <typename T>
+void
+AttributeTest::fillNumeric(std::vector<T> & values, uint32_t numValues)
+{
+ values.clear();
+ values.reserve(numValues);
+ for (uint32_t i = 0; i < numValues; ++i) {
+ values.push_back(static_cast<T>(i));
+ }
+}
+
+void
+AttributeTest::fillString(std::vector<vespalib::string> & values, uint32_t numValues)
+{
+ values.clear();
+ values.reserve(numValues);
+ for (uint32_t i = 0; i < numValues; ++i) {
+ vespalib::asciistream ss;
+ ss << "string" << (i < 10 ? "0" : "") << i;
+ values.push_back(ss.str());
+ }
+}
+
+template <typename VectorType, typename BufferType>
+bool
+AttributeTest::appendToVector(VectorType & v, uint32_t doc, uint32_t valueCount,
+ const std::vector<BufferType> & values)
+{
+ bool retval = true;
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ EXPECT_TRUE((retval = retval && v.append(doc, values[i], 1)));
+ }
+ return retval;
+}
+
+template <typename BufferType>
+bool
+AttributeTest::checkCount(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount,
+ uint32_t numValues, const BufferType & value)
+{
+ std::vector<BufferType> buffer(valueCount);
+ if (!EXPECT_EQUAL(valueCount, ptr->getValueCount(doc))) return false;
+ if (!EXPECT_EQUAL(valueCount, ptr->get(doc, &buffer[0], buffer.size()))) return false;
+ if (!EXPECT_EQUAL(numValues, static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), value)))) return false;
+ return true;
+}
+
+template <typename BufferType>
+bool
+AttributeTest::checkContent(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount,
+ uint32_t range, const std::vector<BufferType> & values)
+{
+ std::vector<BufferType> buffer(valueCount);
+ bool retval = true;
+ EXPECT_TRUE((retval = retval && (static_cast<uint32_t>(ptr->getValueCount(doc)) == valueCount)));
+ EXPECT_TRUE((retval = retval && (ptr->get(doc, &buffer[0], buffer.size()) == valueCount)));
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ EXPECT_TRUE((retval = retval && (buffer[i] == values[i % range])));
+ }
+ return retval;
+}
+
+
+//-----------------------------------------------------------------------------
+// CollectionType::SINGLE
+//-----------------------------------------------------------------------------
+
+template <typename VectorType, typename BufferType, typename BaseType>
+void
+AttributeTest::testSingle(const AttributePtr & ptr, const std::vector<BufferType> & values)
+{
+ LOG(info, "testSingle: vector '%s' with %u documents and %lu values",
+ ptr->getName().c_str(), ptr->getNumDocs(), static_cast<unsigned long>(values.size()));
+
+ VectorType & v = *(static_cast<VectorType *>(ptr.get()));
+ uint32_t numUniques = values.size();
+ std::vector<BufferType> buffer(1);
+
+ // test update()
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ EXPECT_TRUE(ptr->getValueCount(doc) == 1);
+ uint32_t i = doc % numUniques;
+ uint32_t j = (doc + 1) % numUniques;
+
+ EXPECT_TRUE(v.update(doc, values[i]));
+ ptr->commit();
+ EXPECT_TRUE(checkCount(ptr, doc, 1, 1, values[i]));
+
+ EXPECT_TRUE(v.update(doc, values[j]));
+ ptr->commit();
+ EXPECT_TRUE(checkCount(ptr, doc, 1, 1, values[j]));
+ }
+ EXPECT_TRUE(!v.update(ptr->getNumDocs(), values[0]));
+
+ // test append()
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ EXPECT_TRUE(!v.append(doc, values[0], 1));
+ }
+ EXPECT_TRUE(!v.append(ptr->getNumDocs(), values[0], 1));
+
+ // test remove()
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ EXPECT_TRUE(!v.remove(doc, values[0], 1));
+ }
+ EXPECT_TRUE(!v.remove(ptr->getNumDocs(), values[0], 1));
+
+ bool smallUInt = isUnsignedSmallIntAttribute(*ptr);
+ // test clearDoc()
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ uint32_t i = (doc + 2) % numUniques;
+
+ EXPECT_TRUE(v.update(doc, values[i]));
+ if (doc % 2 == 0) { // alternate clearing
+ ptr->clearDoc(doc);
+ }
+ ptr->commit();
+ EXPECT_EQUAL(1u, ptr->get(doc, &buffer[0], buffer.size()));
+ if (doc % 2 == 0) {
+ if (smallUInt) {
+ expectZero(buffer[0]);
+ } else {
+ EXPECT_TRUE(attribute::isUndefined<BaseType>(buffer[0]));
+ }
+ } else {
+ EXPECT_TRUE(!attribute::isUndefined<BaseType>(buffer[0]));
+ EXPECT_EQUAL(values[i], buffer[0]);
+ }
+ }
+ EXPECT_TRUE(!v.clearDoc(ptr->getNumDocs()));
+}
+
+void
+AttributeTest::testSingle()
+{
+ uint32_t numDocs = 1000;
+ uint32_t numUniques = 50;
+ uint32_t numUniqueNibbles = 9;
+ {
+ std::vector<AttributeVector::largeint_t> values;
+ fillNumeric(values, numUniques);
+ std::vector<AttributeVector::largeint_t> nibbleValues;
+ fillNumeric(nibbleValues, numUniqueNibbles);
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sv-int32", Config(BasicType::INT32, CollectionType::SINGLE));
+ addDocs(ptr, numDocs);
+ testSingle<IntegerAttribute, AttributeVector::largeint_t, int32_t>(ptr, values);
+ }
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sv-uint4", Config(BasicType::UINT4, CollectionType::SINGLE));
+ addDocs(ptr, numDocs);
+ testSingle<IntegerAttribute, AttributeVector::largeint_t, int8_t>(ptr, nibbleValues);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sv-post-int32", cfg);
+ addDocs(ptr, numDocs);
+ testSingle<IntegerAttribute, AttributeVector::largeint_t, int32_t>(ptr, values);
+ }
+ }
+ {
+ std::vector<double> values;
+ fillNumeric(values, numUniques);
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sv-float", Config(BasicType::FLOAT, CollectionType::SINGLE));
+ addDocs(ptr, numDocs);
+ testSingle<FloatingPointAttribute, double, float>(ptr, values);
+ }
+ {
+ Config cfg(BasicType::FLOAT, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sv-post-float", cfg);
+ addDocs(ptr, numDocs);
+ testSingle<FloatingPointAttribute, double, float>(ptr, values);
+ }
+
+ }
+ {
+ std::vector<vespalib::string> values;
+ fillString(values, numUniques);
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sv-string", Config(BasicType::STRING, CollectionType::SINGLE));
+ addDocs(ptr, numDocs);
+ testSingle<StringAttribute, vespalib::string, vespalib::string>(ptr, values);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sv-fs-string", cfg);
+ addDocs(ptr, numDocs);
+ testSingle<StringAttribute, vespalib::string, vespalib::string>(ptr, values);
+ }
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// CollectionType::ARRAY
+//-----------------------------------------------------------------------------
+
+template <typename VectorType, typename BufferType>
+void
+AttributeTest::testArray(const AttributePtr & ptr, const std::vector<BufferType> & values)
+{
+ LOG(info, "testArray: vector '%s' with %i documents and %lu values",
+ ptr->getName().c_str(), ptr->getNumDocs(), static_cast<unsigned long>(values.size()));
+
+ VectorType & v = *(static_cast<VectorType *>(ptr.get()));
+ uint32_t numUniques = values.size();
+ ASSERT_TRUE(numUniques >= 6);
+
+
+ // test update()
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 0u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+ size_t sumAppends(0);
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ uint32_t valueCount = doc % numUniques;
+ ptr->clearDoc(doc);
+
+ EXPECT_TRUE(appendToVector(v, doc, valueCount, values));
+ ptr->commit();
+ sumAppends += valueCount;
+
+ uint32_t i = doc % numUniques;
+ EXPECT_TRUE(v.update(doc, values[i]));
+ ptr->commit();
+ EXPECT_TRUE(checkCount(ptr, doc, 1, 1, values[i]));
+ }
+ EXPECT_TRUE(!v.update(ptr->getNumDocs(), values[0]));
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), (1 + 2)*ptr->getNumDocs() + sumAppends);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), sumAppends);
+
+
+ // test append()
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ uint32_t valueCount = doc % numUniques;
+ ptr->clearDoc(doc);
+
+ // append unique values
+ EXPECT_TRUE(appendToVector(v, doc, valueCount, values));
+ ptr->commit();
+ EXPECT_TRUE(checkContent(ptr, doc, valueCount, valueCount, values));
+
+ // append duplicates
+ EXPECT_TRUE(appendToVector(v, doc, valueCount, values));
+ ptr->commit();
+ EXPECT_TRUE(checkContent(ptr, doc, valueCount * 2, valueCount, values));
+ }
+ EXPECT_TRUE(!v.append(ptr->getNumDocs(), values[0], 1));
+
+
+ // test remove()
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ ptr->clearDoc(doc);
+
+ EXPECT_TRUE(v.append(doc, values[1], 1));
+ for (uint32_t i = 0; i < 3; ++i) {
+ EXPECT_TRUE(v.append(doc, values[3], 1));
+ }
+ for (uint32_t i = 0; i < 5; ++i) {
+ EXPECT_TRUE(v.append(doc, values[5], 1));
+ }
+
+ ptr->commit();
+ EXPECT_TRUE(checkCount(ptr, doc, 9, 1, values[1]));
+ EXPECT_TRUE(checkCount(ptr, doc, 9, 3, values[3]));
+ EXPECT_TRUE(checkCount(ptr, doc, 9, 5, values[5]));
+
+ EXPECT_TRUE(v.remove(doc, values[0], 1));
+ ptr->commit();
+ EXPECT_TRUE(checkCount(ptr, doc, 9, 1, values[1]));
+ EXPECT_TRUE(checkCount(ptr, doc, 9, 3, values[3]));
+ EXPECT_TRUE(checkCount(ptr, doc, 9, 5, values[5]));
+
+ EXPECT_TRUE(v.remove(doc, values[1], 1));
+ ptr->commit();
+ EXPECT_TRUE(checkCount(ptr, doc, 8, 0, values[1]));
+ EXPECT_TRUE(checkCount(ptr, doc, 8, 3, values[3]));
+ EXPECT_TRUE(checkCount(ptr, doc, 8, 5, values[5]));
+
+ EXPECT_TRUE(v.remove(doc, values[5], 1));
+ ptr->commit();
+ EXPECT_TRUE(checkCount(ptr, doc, 3, 0, values[1]));
+ EXPECT_TRUE(checkCount(ptr, doc, 3, 3, values[3]));
+ EXPECT_TRUE(checkCount(ptr, doc, 3, 0, values[5]));
+ }
+ EXPECT_TRUE(!v.remove(ptr->getNumDocs(), values[0], 1));
+
+
+ // test clearDoc()
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ uint32_t valueCount = doc % numUniques;
+
+ ptr->clearDoc(doc);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(v.append(doc, values[0], 1));
+ }
+ ptr->clearDoc(doc);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(v.append(doc, values[1], 1));
+ }
+ ptr->commit();
+
+ EXPECT_TRUE(checkCount(ptr, doc, valueCount, valueCount, values[1]));
+ }
+ EXPECT_TRUE(!v.clearDoc(ptr->getNumDocs()));
+}
+
+template <typename BufferType>
+void
+AttributeTest::printArray(const AttributePtr & ptr)
+{
+ uint32_t bufferSize = ptr->getMaxValueCount();
+ std::vector<BufferType> buffer(bufferSize);
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ uint32_t valueCount = ptr->get(doc, &buffer[0], buffer.size());
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ std::cout << "doc[" << doc << "][" << i << "] = " << buffer[i]
+ << std::endl;
+ }
+ }
+}
+
+void
+AttributeTest::testArray()
+{
+ uint32_t numDocs = 100;
+ uint32_t numUniques = 50;
+ { // IntegerAttribute
+ std::vector<AttributeVector::largeint_t> values;
+ fillNumeric(values, numUniques);
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("a-int32", Config(BasicType::INT32, CollectionType::ARRAY));
+ addDocs(ptr, numDocs);
+ testArray<IntegerAttribute, AttributeVector::largeint_t>(ptr, values);
+ }
+ {
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg);
+ addDocs(ptr, numDocs);
+ testArray<IntegerAttribute, AttributeVector::largeint_t>(ptr, values);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("a-fs-int32", cfg);
+ addDocs(ptr, numDocs);
+ testArray<IntegerAttribute, AttributeVector::largeint_t>(ptr, values);
+ }
+ }
+ { // FloatingPointAttribute
+ std::vector<double> values;
+ fillNumeric(values, numUniques);
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("a-float", Config(BasicType::FLOAT, CollectionType::ARRAY));
+ addDocs(ptr, numDocs);
+ testArray<FloatingPointAttribute, double>(ptr, values);
+ }
+ {
+ Config cfg(BasicType::FLOAT, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("a-fs-float", cfg);
+ addDocs(ptr, numDocs);
+ testArray<FloatingPointAttribute, double>(ptr, values);
+ }
+ }
+ { // StringAttribute
+ std::vector<vespalib::string> values;
+ fillString(values, numUniques);
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("a-string", Config(BasicType::STRING, CollectionType::ARRAY));
+ addDocs(ptr, numDocs);
+ testArray<StringAttribute, vespalib::string>(ptr, values);
+ }
+ {
+ Config cfg(BasicType::STRING, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("afs-string", cfg);
+ addDocs(ptr, numDocs);
+ testArray<StringAttribute, vespalib::string>(ptr, values);
+ }
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// CollectionType::WSET
+//-----------------------------------------------------------------------------
+
+template <typename BufferType>
+void
+AttributeTest::printWeightedSet(const AttributePtr & ptr)
+{
+ std::vector<BufferType> buffer(ptr->getMaxValueCount());
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ uint32_t valueCount = ptr->get(doc, &buffer[0], buffer.size());
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ std::cout << "doc[" << doc << "][" << i << "] = {" << buffer[i].getValue()
+ << ", " << buffer[i].getWeight() << "}" << std::endl;
+ }
+ }
+}
+
+template <typename VectorType, typename BufferType>
+void
+AttributeTest::testWeightedSet(const AttributePtr & ptr, const std::vector<BufferType> & values)
+{
+ LOG(info, "testWeightedSet: vector '%s' with %u documents and %lu values",
+ ptr->getName().c_str(), ptr->getNumDocs(), static_cast<unsigned long>(values.size()));
+
+ VectorType & v = *(static_cast<VectorType *>(ptr.get()));
+ uint32_t numDocs = v.getNumDocs();
+ ASSERT_TRUE(values.size() >= numDocs + 10);
+ uint32_t bufferSize = numDocs + 10;
+ std::vector<BufferType> buffer(bufferSize);
+
+ // fill and check
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 0u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t valueCount = doc;
+ v.clearDoc(doc);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(v.append(doc, values[j].getValue(), values[j].getWeight()));
+ }
+ commit(ptr);
+ EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(buffer[j].getValue() == values[j].getValue());
+ EXPECT_TRUE(buffer[j].getWeight() == values[j].getWeight());
+ }
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+
+ // test append()
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t valueCount = doc;
+
+ // append non-existent value
+ EXPECT_TRUE(v.append(doc, values[doc].getValue(), values[doc].getWeight()));
+ commit(ptr);
+ EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1);
+ EXPECT_TRUE(buffer[doc].getValue() == values[doc].getValue());
+ EXPECT_TRUE(buffer[doc].getWeight() == values[doc].getWeight());
+
+ // append existent value
+ EXPECT_TRUE(v.append(doc, values[doc].getValue(), values[doc].getWeight() + 10));
+ commit(ptr);
+ EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1);
+ EXPECT_TRUE(buffer[doc].getValue() == values[doc].getValue());
+ EXPECT_TRUE(buffer[doc].getWeight() == values[doc].getWeight() + 10);
+
+ // append non-existent value two times
+ EXPECT_TRUE(v.append(doc, values[doc + 1].getValue(), values[doc + 1].getWeight()));
+ EXPECT_TRUE(v.append(doc, values[doc + 1].getValue(), values[doc + 1].getWeight() + 10));
+ commit(ptr);
+ EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 2);
+ EXPECT_TRUE(buffer[doc + 1].getValue() == values[doc + 1].getValue());
+ EXPECT_TRUE(buffer[doc + 1].getWeight() == values[doc + 1].getWeight() + 10);
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2 + numDocs*4);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+
+ // test remove()
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t valueCount = doc;
+
+ // remove non-existent value
+ EXPECT_TRUE(static_cast<uint32_t>(v.getValueCount(doc)) == valueCount + 2);
+ EXPECT_TRUE(v.remove(doc, values[doc + 2].getValue(), 0));
+ commit(ptr);
+ EXPECT_TRUE(static_cast<uint32_t>(v.getValueCount(doc)) == valueCount + 2);
+
+ // remove existent value
+ EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 2);
+ EXPECT_TRUE(buffer[doc + 1].getValue() == values[doc + 1].getValue());
+ EXPECT_TRUE(v.remove(doc, values[doc + 1].getValue(), 0));
+ commit(ptr);
+ EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1);
+ for (uint32_t i = 0; i < valueCount + 1; ++i) {
+ EXPECT_TRUE(buffer[i].getValue() != values[doc + 1].getValue());
+ }
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2 + numDocs*4 + numDocs * 2);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+}
+
+void
+AttributeTest::testWeightedSet()
+{
+ uint32_t numDocs = 100;
+ uint32_t numValues = numDocs + 10;
+ { // IntegerAttribute
+ std::vector<AttributeVector::WeightedInt> values;
+ values.reserve(numValues);
+ for (uint32_t i = 0; i < numValues; ++i) {
+ values.push_back(AttributeVector::WeightedInt(i, i + numValues));
+ }
+
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute
+ ("wsint32", Config(BasicType::INT32, CollectionType::WSET));
+ addDocs(ptr, numDocs);
+ testWeightedSet<IntegerAttribute, AttributeVector::WeightedInt>(ptr, values);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("ws-fs-int32", cfg);
+ addDocs(ptr, numDocs);
+ testWeightedSet<IntegerAttribute, AttributeVector::WeightedInt>(ptr, values);
+ IAttributeVector::EnumHandle e;
+ EXPECT_TRUE(ptr->findEnum("1", e));
+ }
+ }
+ { // FloatingPointAttribute
+ std::vector<AttributeVector::WeightedFloat> values;
+ values.reserve(numValues);
+ for (uint32_t i = 0; i < numValues; ++i) {
+ values.push_back(AttributeVector::WeightedFloat(i, i + numValues));
+ }
+
+ {
+ Config cfg(BasicType::FLOAT, CollectionType::WSET);
+ AttributePtr ptr = AttributeFactory::createAttribute("ws-float", cfg);
+ addDocs(ptr, numDocs);
+ testWeightedSet<FloatingPointAttribute, AttributeVector::WeightedFloat>(ptr, values);
+ }
+ {
+ Config cfg(BasicType::FLOAT, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("ws-fs-float", cfg);
+ addDocs(ptr, numDocs);
+ testWeightedSet<FloatingPointAttribute, AttributeVector::WeightedFloat>(ptr, values);
+ IAttributeVector::EnumHandle e;
+ EXPECT_TRUE(ptr->findEnum("1", e));
+ }
+ }
+ { // StringAttribute
+ std::vector<AttributeVector::WeightedString> values;
+ values.reserve(numValues);
+ for (uint32_t i = 0; i < numValues; ++i) {
+ vespalib::asciistream ss;
+ ss << "string" << (i < 10 ? "0" : "") << i;
+ values.push_back(AttributeVector::WeightedString(ss.str(), i + numValues));
+ }
+
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute
+ ("wsstr", Config(BasicType::STRING, CollectionType::WSET));
+ addDocs(ptr, numDocs);
+ testWeightedSet<StringAttribute, AttributeVector::WeightedString>(ptr, values);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::WSET));
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", cfg);
+ addDocs(ptr, numDocs);
+ testWeightedSet<StringAttribute, AttributeVector::WeightedString>(ptr, values);
+ IAttributeVector::EnumHandle e;
+ EXPECT_TRUE(ptr->findEnum("string00", e));
+ }
+ }
+}
+
+template <typename VectorType, typename BufferType>
+void
+AttributeTest::testArithmeticValueUpdate(const AttributePtr & ptr)
+{
+ LOG(info, "testArithmeticValueUpdate: vector '%s'", ptr->getName().c_str());
+
+ typedef document::ArithmeticValueUpdate Arith;
+ VectorType & vec = static_cast<VectorType &>(*ptr.get());
+ addDocs(ptr, 13);
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 0u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+ for (uint32_t doc = 0; doc < 13; ++doc) {
+ ASSERT_TRUE(vec.update(doc, 100));
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 13u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+ ptr->commit();
+
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10)));
+ EXPECT_TRUE(vec.apply(1, Arith(Arith::Add, -10)));
+ EXPECT_TRUE(vec.apply(2, Arith(Arith::Sub, 10)));
+ EXPECT_TRUE(vec.apply(3, Arith(Arith::Sub, -10)));
+ EXPECT_TRUE(vec.apply(4, Arith(Arith::Mul, 10)));
+ EXPECT_TRUE(vec.apply(5, Arith(Arith::Mul, -10)));
+ EXPECT_TRUE(vec.apply(6, Arith(Arith::Div, 10)));
+ EXPECT_TRUE(vec.apply(7, Arith(Arith::Div, -10)));
+ EXPECT_TRUE(vec.apply(8, Arith(Arith::Add, 10.5)));
+ EXPECT_TRUE(vec.apply(9, Arith(Arith::Sub, 10.5)));
+ EXPECT_TRUE(vec.apply(10, Arith(Arith::Mul, 1.2)));
+ EXPECT_TRUE(vec.apply(11, Arith(Arith::Mul, 0.8)));
+ EXPECT_TRUE(vec.apply(12, Arith(Arith::Div, 0.8)));
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 26u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 13u);
+ ptr->commit();
+
+ std::vector<BufferType> buf(1);
+ ptr->get(0, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 110);
+ ptr->get(1, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 90);
+ ptr->get(2, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 90);
+ ptr->get(3, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 110);
+ ptr->get(4, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 1000);
+ ptr->get(5, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], -1000);
+ ptr->get(6, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 10);
+ ptr->get(7, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], -10);
+ if (ptr->getBasicType() == BasicType::INT32) {
+ ptr->get(8, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 110);
+ ptr->get(9, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 90);
+ } else if (ptr->getBasicType() == BasicType::FLOAT ||
+ ptr->getBasicType() == BasicType::DOUBLE)
+ {
+ ptr->get(8, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 110.5);
+ ptr->get(9, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 89.5);
+ } else {
+ ASSERT_TRUE(false);
+ }
+ ptr->get(10, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 120);
+ ptr->get(11, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 80);
+ ptr->get(12, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 125);
+
+
+ // try several arithmetic operations on the same document in a single commit
+ ASSERT_TRUE(vec.update(0, 1100));
+ ASSERT_TRUE(vec.update(1, 1100));
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 28u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 13u);
+ for (uint32_t i = 0; i < 10; ++i) {
+ ASSERT_TRUE(vec.apply(0, Arith(Arith::Add, 10)));
+ ASSERT_TRUE(vec.apply(1, Arith(Arith::Add, 10)));
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 48u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 33u);
+ ptr->commit();
+ ptr->get(0, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 1200);
+ ptr->get(1, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 1200);
+
+ ASSERT_TRUE(vec.update(0, 10));
+ ASSERT_TRUE(vec.update(1, 10));
+ ASSERT_TRUE(vec.update(2, 10));
+ ASSERT_TRUE(vec.update(3, 10));
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 52u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 33u);
+ for (uint32_t i = 0; i < 8; ++i) {
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Mul, 1.2)));
+ EXPECT_TRUE(vec.apply(1, Arith(Arith::Mul, 2.3)));
+ EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 3.4)));
+ EXPECT_TRUE(vec.apply(3, Arith(Arith::Mul, 5.6)));
+ ptr->commit();
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 84u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 65u);
+
+
+ // try divide by zero
+ ASSERT_TRUE(vec.update(0, 100));
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Div, 0)));
+ ptr->commit();
+ if (ptr->getClass().inherits(FloatingPointAttribute::classId)) {
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 86u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 66u);
+ } else { // does not apply for interger attributes
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 85u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 65u);
+ }
+ ptr->get(0, &buf[0], 1);
+ if (ptr->getBasicType() == BasicType::INT32) {
+ EXPECT_EQUAL(buf[0], 100);
+ }
+
+ // try divide by zero with empty change vector
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Div, 0)));
+ ptr->commit();
+ if (ptr->getClass().inherits(FloatingPointAttribute::classId)) {
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 87u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 67u);
+ } else { // does not apply for interger attributes
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 85u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 65u);
+ }
+}
+
+void
+AttributeTest::testArithmeticValueUpdate()
+{
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sint32", Config(BasicType::INT32, CollectionType::SINGLE));
+ testArithmeticValueUpdate<IntegerAttribute, IntegerAttribute::largeint_t>(ptr);
+ }
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sfloat", Config(BasicType::FLOAT, CollectionType::SINGLE));
+ testArithmeticValueUpdate<FloatingPointAttribute, double>(ptr);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sfsint32", cfg);
+ testArithmeticValueUpdate<IntegerAttribute, IntegerAttribute::largeint_t>(ptr);
+ }
+ {
+ Config cfg(BasicType::FLOAT, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sfsfloat", cfg);
+ testArithmeticValueUpdate<FloatingPointAttribute, double>(ptr);
+ }
+ {
+ Config cfg(BasicType::DOUBLE, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sfsdouble", cfg);
+ testArithmeticValueUpdate<FloatingPointAttribute, double>(ptr);
+ }
+}
+
+
+template <typename VectorType, typename BaseType, typename BufferType>
+void
+AttributeTest::testArithmeticWithUndefinedValue(const AttributePtr & ptr, BaseType before, BaseType after)
+{
+ LOG(info, "testArithmeticWithUndefinedValue: vector '%s'", ptr->getName().c_str());
+
+ typedef document::ArithmeticValueUpdate Arith;
+ VectorType & vec = static_cast<VectorType &>(*ptr.get());
+ addDocs(ptr, 1);
+ ASSERT_TRUE(vec.update(0, before));
+ ptr->commit();
+
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10)));
+ ptr->commit();
+
+ std::vector<BufferType> buf(1);
+ ptr->get(0, &buf[0], 1);
+
+ if (ptr->getClass().inherits(FloatingPointAttribute::classId)) {
+ EXPECT_TRUE(std::isnan(buf[0]));
+ } else {
+ EXPECT_EQUAL(buf[0], after);
+ }
+}
+
+void
+AttributeTest::testArithmeticWithUndefinedValue()
+{
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sint32", Config(BasicType::INT32, CollectionType::SINGLE));
+ testArithmeticWithUndefinedValue<IntegerAttribute, int32_t, IntegerAttribute::largeint_t>
+ (ptr, std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::min());
+ }
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sfloat", Config(BasicType::FLOAT, CollectionType::SINGLE));
+ testArithmeticWithUndefinedValue<FloatingPointAttribute, float, double>
+ (ptr, std::numeric_limits<float>::quiet_NaN(), std::numeric_limits<float>::quiet_NaN());
+ }
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sdouble", Config(BasicType::DOUBLE, CollectionType::SINGLE));
+ testArithmeticWithUndefinedValue<FloatingPointAttribute, double, double>
+ (ptr, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN());
+ }
+}
+
+
+template <typename VectorType, typename BufferType>
+void
+AttributeTest::testMapValueUpdate(const AttributePtr & ptr, BufferType initValue,
+ const FieldValue & initFieldValue, const FieldValue & nonExistant,
+ bool removeIfZero, bool createIfNonExistant)
+{
+ LOG(info, "testMapValueUpdate: vector '%s'", ptr->getName().c_str());
+ typedef MapValueUpdate MapVU;
+ typedef ArithmeticValueUpdate ArithVU;
+ VectorType & vec = static_cast<VectorType &>(*ptr.get());
+
+ addDocs(ptr, 6);
+ for (uint32_t doc = 0; doc < 6; ++doc) {
+ ASSERT_TRUE(vec.append(doc, initValue.getValue(), 100));
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 6u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+
+ EXPECT_TRUE(ptr->apply(0, MapVU(initFieldValue,
+ ArithVU(ArithVU::Add, 10))));
+ EXPECT_TRUE(ptr->apply(1, MapVU(initFieldValue,
+ ArithVU(ArithVU::Sub, 10))));
+ EXPECT_TRUE(ptr->apply(2, MapVU(initFieldValue,
+ ArithVU(ArithVU::Mul, 10))));
+ EXPECT_TRUE(ptr->apply(3, MapVU(initFieldValue,
+ ArithVU(ArithVU::Div, 10))));
+ ptr->commit();
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 10u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 4u);
+
+ std::vector<BufferType> buf(2);
+ ptr->get(0, &buf[0], 2);
+ EXPECT_EQUAL(buf[0].getWeight(), 110);
+ ptr->get(1, &buf[0], 2);
+ EXPECT_EQUAL(buf[0].getWeight(), 90);
+ ptr->get(2, &buf[0], 2);
+ EXPECT_EQUAL(buf[0].getWeight(), 1000);
+ ptr->get(3, &buf[0], 2);
+ EXPECT_EQUAL(buf[0].getWeight(), 10);
+
+ // removeifzero
+ EXPECT_TRUE(ptr->apply(4, MapVU(initFieldValue,
+ ArithVU(ArithVU::Sub, 100))));
+ ptr->commit();
+ if (removeIfZero) {
+ EXPECT_EQUAL(ptr->get(4, &buf[0], 2), uint32_t(0));
+ } else {
+ EXPECT_EQUAL(ptr->get(4, &buf[0], 2), uint32_t(1));
+ EXPECT_EQUAL(buf[0].getWeight(), 0);
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 11u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 5u);
+
+ // createifnonexistant
+ EXPECT_TRUE(ptr->apply(5, MapVU(nonExistant,
+ ArithVU(ArithVU::Add, 10))));
+ ptr->commit();
+ if (createIfNonExistant) {
+ EXPECT_EQUAL(ptr->get(5, &buf[0], 2), uint32_t(2));
+ EXPECT_EQUAL(buf[0].getWeight(), 100);
+ EXPECT_EQUAL(buf[1].getWeight(), 10);
+ } else {
+ EXPECT_EQUAL(ptr->get(5, &buf[0], 2), uint32_t(1));
+ EXPECT_EQUAL(buf[0].getWeight(), 100);
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 12u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 6u);
+
+
+ // try divide by zero (should be ignored)
+ vec.clearDoc(0);
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 13u);
+ ASSERT_TRUE(vec.append(0, initValue.getValue(), 12345));
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 14u);
+ EXPECT_TRUE(ptr->apply(0, MapVU(initFieldValue, ArithVU(ArithVU::Div, 0))));
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 14u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 6u);
+ ptr->commit();
+ ptr->get(0, &buf[0], 1);
+ EXPECT_EQUAL(buf[0].getWeight(), 12345);
+}
+
+void
+AttributeTest::testMapValueUpdate()
+{
+ { // regular set
+ AttributePtr ptr = AttributeFactory::createAttribute
+ ("wsint32", Config(BasicType::INT32, CollectionType::WSET));
+ testMapValueUpdate<IntegerAttribute, AttributeVector::WeightedInt>
+ (ptr, AttributeVector::WeightedInt(64, 1), IntFieldValue(64),
+ IntFieldValue(32), false, false);
+ }
+ { // remove if zero
+ AttributePtr ptr = AttributeFactory::createAttribute
+ ("wsint32", Config(BasicType::INT32, CollectionType(CollectionType::WSET, true, false)));
+ testMapValueUpdate<IntegerAttribute, AttributeVector::WeightedInt>
+ (ptr, AttributeVector::WeightedInt(64, 1), IntFieldValue(64),
+ IntFieldValue(32), true, false);
+ }
+ { // create if non existant
+ AttributePtr ptr = AttributeFactory::createAttribute
+ ("wsint32", Config(BasicType::INT32, CollectionType(CollectionType::WSET, false, true)));
+ testMapValueUpdate<IntegerAttribute, AttributeVector::WeightedInt>
+ (ptr, AttributeVector::WeightedInt(64, 1), IntFieldValue(64),
+ IntFieldValue(32), false, true);
+ }
+
+ Config setCfg(Config(BasicType::STRING, CollectionType::WSET));
+ Config setRemoveCfg(Config(BasicType::STRING, CollectionType(CollectionType::WSET, true, false)));
+ Config setCreateCfg(Config(BasicType::STRING, CollectionType(CollectionType::WSET, false, true)));
+
+ { // regular set
+ AttributePtr ptr = AttributeFactory::createAttribute("wsstr", setCfg);
+ testMapValueUpdate<StringAttribute, AttributeVector::WeightedString>
+ (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"),
+ StringFieldValue("second"), false, false);
+ }
+ { // remove if zero
+ AttributePtr ptr = AttributeFactory::createAttribute("wsstr", setRemoveCfg);
+ testMapValueUpdate<StringAttribute, AttributeVector::WeightedString>
+ (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"),
+ StringFieldValue("second"), true, false);
+ }
+ { // create if non existant
+ AttributePtr ptr = AttributeFactory::createAttribute("wsstr", setCreateCfg);
+ testMapValueUpdate<StringAttribute, AttributeVector::WeightedString>
+ (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"),
+ StringFieldValue("second"), false, true);
+ }
+
+ // fast-search - posting lists
+ { // regular set
+ setCfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", setCfg);
+ testMapValueUpdate<StringAttribute, AttributeVector::WeightedString>
+ (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"),
+ StringFieldValue("second"), false, false);
+ }
+ { // remove if zero
+ setRemoveCfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", setRemoveCfg);
+ testMapValueUpdate<StringAttribute, AttributeVector::WeightedString>
+ (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"),
+ StringFieldValue("second"), true, false);
+ }
+ { // create if non existant
+ setCreateCfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", setCreateCfg);
+ testMapValueUpdate<StringAttribute, AttributeVector::WeightedString>
+ (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"),
+ StringFieldValue("second"), false, true);
+ }
+}
+
+
+
+void
+AttributeTest::commit(const AttributePtr & ptr)
+{
+ ptr->commit();
+}
+
+
+void
+AttributeTest::testStatus()
+{
+ std::vector<vespalib::string> values;
+ fillString(values, 16);
+ uint32_t numDocs = 100;
+ // No posting list
+ static constexpr size_t LeafNodeSize =
+ 4 + sizeof(EnumStoreBase::Index) * EnumTreeTraits::LEAF_SLOTS;
+ static constexpr size_t InternalNodeSize =
+ 8 + (sizeof(EnumStoreBase::Index) +
+ sizeof(btree::EntryRef)) * EnumTreeTraits::INTERNAL_SLOTS;
+ static constexpr size_t NestedVectorSize = 24; // sizeof(vespalib::Array)
+
+ {
+ Config cfg(BasicType::STRING, CollectionType::ARRAY);
+ AttributePtr ptr = AttributeFactory::createAttribute("as", cfg);
+ addDocs(ptr, numDocs);
+ StringAttribute & sa = *(static_cast<StringAttribute *>(ptr.get()));
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ EXPECT_TRUE(appendToVector(sa, i, 1, values));
+ }
+ ptr->commit(true);
+ EXPECT_EQUAL(ptr->getStatus().getNumDocs(), 100u);
+ EXPECT_EQUAL(ptr->getStatus().getNumValues(), 100u);
+ EXPECT_EQUAL(ptr->getStatus().getNumUniqueValues(), 1u);
+ size_t expUsed = 0;
+ expUsed += 1 * InternalNodeSize + 1 * LeafNodeSize; // enum store tree
+ expUsed += 1 * 32; // enum store (uniquevalues * bytes per entry)
+ // multi value mapping (numdocs * sizeof(MappingIndex) + numvalues * sizeof(EnumIndex))
+ expUsed += 100 * sizeof(search::multivalue::Index32) + 100 * 4;
+ EXPECT_GREATER_EQUAL(ptr->getStatus().getUsed(), expUsed);
+ EXPECT_GREATER_EQUAL(ptr->getStatus().getAllocated(), expUsed);
+ }
+
+ {
+ Config cfg(BasicType::STRING, CollectionType::ARRAY);
+ AttributePtr ptr = AttributeFactory::createAttribute("as", cfg);
+ addDocs(ptr, numDocs);
+ StringAttribute & sa = *(static_cast<StringAttribute *>(ptr.get()));
+ const size_t numUniq(16);
+ const size_t numValuesPerDoc(16);
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ EXPECT_TRUE(appendToVector(sa, i, numValuesPerDoc, values));
+ }
+ ptr->commit(true);
+ EXPECT_EQUAL(ptr->getStatus().getNumDocs(), numDocs);
+ EXPECT_EQUAL(ptr->getStatus().getNumValues(), numDocs*numValuesPerDoc);
+ EXPECT_EQUAL(ptr->getStatus().getNumUniqueValues(), numUniq);
+ size_t expUsed = 0;
+ expUsed += 1 * InternalNodeSize + 1 * LeafNodeSize; // enum store tree
+ expUsed += numUniq * 32; // enum store (16 unique values, 32 bytes per entry)
+ // multi value mapping (numdocs * sizeof(MappingIndex) + numvalues * sizeof(EnumIndex) +
+ // numdocs * sizeof(Array<EnumIndex>) (due to vector vector))
+ expUsed += numDocs * sizeof(search::multivalue::Index32) + numDocs * numValuesPerDoc * sizeof(EnumStoreBase::Index) + ((numValuesPerDoc > search::multivalue::Index32::maxValues()) ? numDocs * NestedVectorSize : 0);
+ EXPECT_GREATER_EQUAL(ptr->getStatus().getUsed(), expUsed);
+ EXPECT_GREATER_EQUAL(ptr->getStatus().getAllocated(), expUsed);
+ }
+}
+
+void
+AttributeTest::testNullProtection()
+{
+ size_t len1 = strlen("evil");
+ size_t len2 = strlen("string");
+ size_t len = len1 + 1 + len2;
+ vespalib::string good("good");
+ vespalib::string evil("evil string");
+ vespalib::string pureEvil("evil");
+ EXPECT_EQUAL(strlen(evil.data()), len);
+ EXPECT_EQUAL(strlen(evil.c_str()), len);
+ evil[len1] = 0; // replace space with '\0'
+ EXPECT_EQUAL(strlen(evil.data()), len1);
+ EXPECT_EQUAL(strlen(evil.c_str()), len1);
+ EXPECT_EQUAL(strlen(evil.data() + len1), 0u);
+ EXPECT_EQUAL(strlen(evil.c_str() + len1), 0u);
+ EXPECT_EQUAL(strlen(evil.data() + len1 + 1), len2);
+ EXPECT_EQUAL(strlen(evil.c_str() + len1 + 1), len2);
+ EXPECT_EQUAL(evil.size(), len);
+ { // string
+ AttributeVector::DocId docId;
+ std::vector<vespalib::string> buf(16);
+ AttributePtr attr = AttributeFactory::createAttribute("string", Config(BasicType::STRING, CollectionType::SINGLE));
+ StringAttribute &v = static_cast<StringAttribute &>(*attr.get());
+ EXPECT_TRUE(v.addDoc(docId));
+ EXPECT_TRUE(v.update(docId, evil));
+ v.commit();
+ size_t n = static_cast<const AttributeVector &>(v).get(docId, &buf[0], buf.size());
+ EXPECT_EQUAL(n, 1u);
+ EXPECT_EQUAL(buf[0], pureEvil);
+ }
+ { // string array
+ AttributeVector::DocId docId;
+ std::vector<vespalib::string> buf(16);
+ AttributePtr attr = AttributeFactory::createAttribute("string", Config(BasicType::STRING, CollectionType::ARRAY));
+ StringAttribute &v = static_cast<StringAttribute &>(*attr.get());
+ EXPECT_TRUE(v.addDoc(docId));
+ EXPECT_TRUE(v.append(0, good, 1));
+ EXPECT_TRUE(v.append(0, evil, 1));
+ EXPECT_TRUE(v.append(0, good, 1));
+ v.commit();
+ size_t n = static_cast<const AttributeVector &>(v).get(0, &buf[0], buf.size());
+ EXPECT_EQUAL(n, 3u);
+ EXPECT_EQUAL(buf[0], good);
+ EXPECT_EQUAL(buf[1], pureEvil);
+ EXPECT_EQUAL(buf[2], good);
+ }
+ { // string set
+ AttributeVector::DocId docId;
+ std::vector<StringAttribute::WeightedString> buf(16);
+ AttributePtr attr = AttributeFactory::createAttribute("string", Config(BasicType::STRING, CollectionType::WSET));
+ StringAttribute &v = static_cast<StringAttribute &>(*attr.get());
+ EXPECT_TRUE(v.addDoc(docId));
+ EXPECT_TRUE(v.append(0, good, 10));
+ EXPECT_TRUE(v.append(0, evil, 20));
+ v.commit();
+ size_t n = static_cast<const AttributeVector &>(v).get(0, &buf[0], buf.size());
+ EXPECT_EQUAL(n, 2u);
+ if (buf[0].getValue() != good) {
+ std::swap(buf[0], buf[1]);
+ }
+ EXPECT_EQUAL(buf[0].getValue(), good);
+ EXPECT_EQUAL(buf[0].getWeight(), 10);
+ EXPECT_EQUAL(buf[1].getValue(), pureEvil);
+ EXPECT_EQUAL(buf[1].getWeight(), 20);
+
+ // remove
+ EXPECT_TRUE(v.remove(0, evil, 20));
+ v.commit();
+ n = static_cast<const AttributeVector &>(v).get(0, &buf[0], buf.size());
+ EXPECT_EQUAL(n, 1u);
+ EXPECT_EQUAL(buf[0].getValue(), good);
+ EXPECT_EQUAL(buf[0].getWeight(), 10);
+ }
+}
+
+void
+AttributeTest::testGeneration(const AttributePtr & attr, bool exactStatus)
+{
+ LOG(info, "testGeneration(%s)", attr->getName().c_str());
+ IntegerAttribute & ia = static_cast<IntegerAttribute &>(*attr.get());
+ // add docs to trigger inc generation when data vector is full
+ AttributeVector::DocId docId;
+ EXPECT_EQUAL(0u, ia.getCurrentGeneration());
+ EXPECT_TRUE(ia.addDoc(docId));
+ EXPECT_EQUAL(0u, ia.getCurrentGeneration());
+ EXPECT_TRUE(ia.addDoc(docId));
+ EXPECT_EQUAL(0u, ia.getCurrentGeneration());
+ ia.commit(true);
+ EXPECT_EQUAL(1u, ia.getCurrentGeneration());
+ uint64_t lastAllocated;
+ uint64_t lastOnHold;
+ if (exactStatus) {
+ EXPECT_EQUAL(2u, ia.getStatus().getAllocated());
+ EXPECT_EQUAL(0u, ia.getStatus().getOnHold());
+ } else {
+ EXPECT_LESS(0u, ia.getStatus().getAllocated());
+ EXPECT_EQUAL(0u, ia.getStatus().getOnHold());
+ lastAllocated = ia.getStatus().getAllocated();
+ lastOnHold = ia.getStatus().getOnHold();
+ }
+ {
+ AttributeGuard ag(attr); // guard on generation 1
+ EXPECT_TRUE(ia.addDoc(docId)); // inc gen
+ EXPECT_EQUAL(2u, ia.getCurrentGeneration());
+ ia.commit(true);
+ EXPECT_EQUAL(3u, ia.getCurrentGeneration());
+ if (exactStatus) {
+ EXPECT_EQUAL(4u, ia.getStatus().getAllocated());
+ EXPECT_EQUAL(2u, ia.getStatus().getOnHold()); // no cleanup due to guard
+ } else {
+ EXPECT_LESS(lastAllocated, ia.getStatus().getAllocated());
+ EXPECT_LESS(lastOnHold, ia.getStatus().getOnHold());
+ lastAllocated = ia.getStatus().getAllocated();
+ lastOnHold = ia.getStatus().getOnHold();
+ }
+ }
+ EXPECT_TRUE(ia.addDoc(docId));
+ EXPECT_EQUAL(3u, ia.getCurrentGeneration());
+ {
+ AttributeGuard ag(attr); // guard on generation 3
+ ia.commit(true);
+ EXPECT_EQUAL(4u, ia.getCurrentGeneration());
+ if (exactStatus) {
+ EXPECT_EQUAL(4u, ia.getStatus().getAllocated());
+ EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); // cleanup at end of addDoc()
+ } else {
+ EXPECT_EQUAL(lastAllocated, ia.getStatus().getAllocated());
+ EXPECT_GREATER(lastOnHold, ia.getStatus().getOnHold());
+ lastAllocated = ia.getStatus().getAllocated();
+ lastOnHold = ia.getStatus().getOnHold();
+ }
+ }
+ {
+ AttributeGuard ag(attr); // guard on generation 4
+ EXPECT_TRUE(ia.addDoc(docId)); // inc gen
+ EXPECT_EQUAL(5u, ia.getCurrentGeneration());
+ ia.commit();
+ EXPECT_EQUAL(6u, ia.getCurrentGeneration());
+ if (exactStatus) {
+ EXPECT_EQUAL(6u, ia.getStatus().getAllocated());
+ EXPECT_EQUAL(4u, ia.getStatus().getOnHold()); // no cleanup due to guard
+ } else {
+ EXPECT_LESS(lastAllocated, ia.getStatus().getAllocated());
+ EXPECT_LESS(lastOnHold, ia.getStatus().getOnHold());
+ lastAllocated = ia.getStatus().getAllocated();
+ lastOnHold = ia.getStatus().getOnHold();
+ }
+ }
+ ia.commit(true);
+ EXPECT_EQUAL(7u, ia.getCurrentGeneration());
+ if (exactStatus) {
+ EXPECT_EQUAL(6u, ia.getStatus().getAllocated());
+ EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); // cleanup at end of commit()
+ } else {
+ EXPECT_EQUAL(lastAllocated, ia.getStatus().getAllocated());
+ EXPECT_GREATER(lastOnHold, ia.getStatus().getOnHold());
+ }
+}
+
+void
+AttributeTest::testGeneration()
+{
+ { // single value attribute
+ Config cfg(BasicType::INT8);
+ cfg.setGrowStrategy(GrowStrategy(2, 0, 2));
+ AttributePtr attr = AttributeFactory::createAttribute("int8", cfg);
+ testGeneration(attr, true);
+ }
+ { // enum attribute (with fast search)
+ Config cfg(BasicType::INT8);
+ cfg.setFastSearch(true);
+ cfg.setGrowStrategy(GrowStrategy(2, 0, 2));
+ AttributePtr attr = AttributeFactory::createAttribute("faint8", cfg);
+ testGeneration(attr, false);
+ }
+ { // multi value attribute
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setGrowStrategy(GrowStrategy(2, 0, 2));
+ AttributePtr attr = AttributeFactory::createAttribute("aint8", cfg);
+ testGeneration(attr, false);
+ }
+ { // multi value enum attribute (with fast search)
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ cfg.setGrowStrategy(GrowStrategy(2, 0, 2));
+ AttributePtr attr = AttributeFactory::createAttribute("faaint8", cfg);
+ testGeneration(attr, false);
+ }
+}
+
+
+void
+AttributeTest::testCreateSerialNum()
+{
+ Config cfg(BasicType::INT32);
+ AttributePtr attr = AttributeFactory::createAttribute("int32", cfg);
+ attr->setCreateSerialNum(42u);
+ EXPECT_TRUE(attr->save());
+ AttributePtr attr2 = AttributeFactory::createAttribute("int32", cfg);
+ EXPECT_TRUE(attr2->load());
+ EXPECT_EQUAL(42u, attr2->getCreateSerialNum());
+}
+
+
+template <typename VectorType, typename BufferType>
+void
+AttributeTest::testCompactLidSpace(const Config &config,
+ bool fs,
+ bool es)
+{
+ uint32_t highDocs = 100;
+ uint32_t trimmedDocs = 30;
+ vespalib::string bts = config.basicType().asString();
+ vespalib::string cts = config.collectionType().asString();
+ vespalib::string fas = fs ? "-fs" : "";
+ vespalib::string ess = es ? "-es" : "";
+ Config cfg = config;
+ cfg.setFastSearch(fs);
+
+ vespalib::string name = clstmp + "/" + bts + "-" + cts + fas + ess;
+ LOG(info, "testCompactLidSpace(%s)", name.c_str());
+ AttributePtr attr = AttributeFactory::createAttribute(name, cfg);
+ VectorType &v = static_cast<VectorType &>(*attr.get());
+ attr->enableEnumeratedSave(es);
+ attr->addDocs(highDocs);
+ populate(v, 17);
+ AttributePtr attr2 = AttributeFactory::createAttribute(name, cfg);
+ VectorType &v2 = static_cast<VectorType &>(*attr2.get());
+ attr2->enableEnumeratedSave(es);
+ attr2->addDocs(trimmedDocs);
+ populate(v2, 17);
+ EXPECT_EQUAL(trimmedDocs, attr2->getNumDocs());
+ EXPECT_EQUAL(trimmedDocs, attr2->getCommittedDocIdLimit());
+ EXPECT_EQUAL(highDocs, attr->getNumDocs());
+ EXPECT_EQUAL(highDocs, attr->getCommittedDocIdLimit());
+ attr->compactLidSpace(trimmedDocs);
+ EXPECT_EQUAL(highDocs, attr->getNumDocs());
+ EXPECT_EQUAL(trimmedDocs, attr->getCommittedDocIdLimit());
+ EXPECT_TRUE(attr->save());
+ EXPECT_EQUAL(highDocs, attr->getNumDocs());
+ EXPECT_EQUAL(trimmedDocs, attr->getCommittedDocIdLimit());
+ AttributePtr attr3 = AttributeFactory::createAttribute(name, cfg);
+ EXPECT_TRUE(attr3->load());
+ EXPECT_EQUAL(trimmedDocs, attr3->getNumDocs());
+ EXPECT_EQUAL(trimmedDocs, attr3->getCommittedDocIdLimit());
+ VectorType &v3 = static_cast<VectorType &>(*attr3.get());
+ compare<VectorType, BufferType>(v2, v3);
+ attr->shrinkLidSpace();
+ EXPECT_EQUAL(trimmedDocs, attr->getNumDocs());
+ EXPECT_EQUAL(trimmedDocs, attr->getCommittedDocIdLimit());
+ compare<VectorType, BufferType>(v, v3);
+}
+
+
+template <typename VectorType, typename BufferType>
+void
+AttributeTest::testCompactLidSpace(const Config &config)
+{
+ testCompactLidSpace<VectorType, BufferType>(config, false, false);
+ testCompactLidSpace<VectorType, BufferType>(config, false, true);
+ bool smallUInt = isUnsignedSmallIntAttribute(config.basicType().type());
+ if (smallUInt)
+ return;
+ testCompactLidSpace<VectorType, BufferType>(config, true, false);
+ testCompactLidSpace<VectorType, BufferType>(config, true, true);
+}
+
+
+void
+AttributeTest::testCompactLidSpace(const Config &config)
+{
+ switch (config.basicType().type()) {
+ case BasicType::UINT1:
+ case BasicType::UINT2:
+ case BasicType::UINT4:
+ case BasicType::INT8:
+ case BasicType::INT16:
+ case BasicType::INT32:
+ case BasicType::INT64:
+ if (config.collectionType() == CollectionType::WSET) {
+ testCompactLidSpace<IntegerAttribute,
+ IntegerAttribute::WeightedInt>(config);
+ } else {
+ testCompactLidSpace<IntegerAttribute,
+ IntegerAttribute::largeint_t>(config);
+ }
+ break;
+ case BasicType::FLOAT:
+ case BasicType::DOUBLE:
+ if (config.collectionType() == CollectionType::WSET) {
+ testCompactLidSpace<FloatingPointAttribute,
+ FloatingPointAttribute::WeightedFloat>(config);
+ } else {
+ testCompactLidSpace<FloatingPointAttribute, double>(config);
+ }
+ break;
+ case BasicType::STRING:
+ if (config.collectionType() == CollectionType::WSET) {
+ testCompactLidSpace<StringAttribute,
+ StringAttribute::WeightedString>(config);
+ } else {
+ testCompactLidSpace<StringAttribute, vespalib::string>(config);
+ }
+ break;
+ default:
+ abort();
+ }
+}
+
+
+void
+AttributeTest::testCompactLidSpace()
+{
+ vespalib::rmdir(clstmp, true);
+ vespalib::mkdir(clstmp);
+ TEST_DO(testCompactLidSpace(Config(BasicType::UINT1,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::UINT2,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::UINT4,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT8,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT8,
+ CollectionType::ARRAY)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT8,
+ CollectionType::WSET)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT16,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT16,
+ CollectionType::ARRAY)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT16,
+ CollectionType::WSET)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT32,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT32,
+ CollectionType::ARRAY)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT32,
+ CollectionType::WSET)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT64,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT64,
+ CollectionType::ARRAY)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT64,
+ CollectionType::WSET)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::FLOAT,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::FLOAT,
+ CollectionType::ARRAY)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::FLOAT,
+ CollectionType::WSET)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::DOUBLE,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::DOUBLE,
+ CollectionType::ARRAY)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::DOUBLE,
+ CollectionType::WSET)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::STRING,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::STRING,
+ CollectionType::ARRAY)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::STRING,
+ CollectionType::WSET)));
+ vespalib::rmdir(clstmp, true);
+}
+
+template <typename AttributeType>
+void
+AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config, bool fastSearch)
+{
+ uint32_t numDocs = 10;
+ vespalib::string attrName = asuDir + "/" + config.basicType().asString() + "-" +
+ config.collectionType().asString() + (fastSearch ? "-fs" : "");
+ Config cfg = config;
+ cfg.setFastSearch(fastSearch);
+
+ AttributePtr attrPtr = AttributeFactory::createAttribute(attrName, cfg);
+ addDocs(attrPtr, numDocs);
+ AddressSpaceUsage before = attrPtr->getAddressSpaceUsage();
+ populate(static_cast<AttributeType &>(*attrPtr.get()), 5);
+ AddressSpaceUsage after = attrPtr->getAddressSpaceUsage();
+ if (attrPtr->hasEnum()) {
+ LOG(info, "requireThatAddressSpaceUsageIsReported(%s): Has enum", attrName.c_str());
+ EXPECT_EQUAL(before.enumStoreUsage().used(), 0u);
+ EXPECT_GREATER(after.enumStoreUsage().used(), before.enumStoreUsage().used());
+ EXPECT_EQUAL(after.enumStoreUsage().limit(), before.enumStoreUsage().limit());
+ EXPECT_EQUAL(34359738368u, after.enumStoreUsage().limit()); // EnumStoreBase::DataStoreType::RefType::offsetSize()
+ } else {
+ LOG(info, "requireThatAddressSpaceUsageIsReported(%s): NOT enum", attrName.c_str());
+ EXPECT_EQUAL(before.enumStoreUsage().used(), 0u);
+ EXPECT_EQUAL(after.enumStoreUsage(), before.enumStoreUsage());
+ EXPECT_EQUAL(AddressSpaceUsage::defaultEnumStoreUsage(), after.enumStoreUsage());
+ }
+ if (attrPtr->hasMultiValue()) {
+ LOG(info, "requireThatAddressSpaceUsageIsReported(%s): Has multi-value", attrName.c_str());
+ EXPECT_EQUAL(before.multiValueUsage().used(), 0u);
+ EXPECT_GREATER(after.multiValueUsage().used(), before.multiValueUsage().used());
+ EXPECT_EQUAL(after.multiValueUsage().limit(), before.multiValueUsage().limit());
+ EXPECT_EQUAL(134217728u, after.multiValueUsage().limit()); // multivalue::Index32::offsetSize()
+ } else {
+ LOG(info, "requireThatAddressSpaceUsageIsReported(%s): NOT multi-value", attrName.c_str());
+ EXPECT_EQUAL(before.multiValueUsage().used(), 0u);
+ EXPECT_EQUAL(after.multiValueUsage(), before.multiValueUsage());
+ EXPECT_EQUAL(AddressSpaceUsage::defaultMultiValueUsage(), after.multiValueUsage());
+ }
+}
+
+template <typename AttributeType>
+void
+AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config)
+{
+ requireThatAddressSpaceUsageIsReported<AttributeType>(config, false);
+ requireThatAddressSpaceUsageIsReported<AttributeType>(config, true);
+}
+
+void
+AttributeTest::requireThatAddressSpaceUsageIsReported()
+{
+ vespalib::rmdir(asuDir, true);
+ vespalib::mkdir(asuDir);
+ TEST_DO(requireThatAddressSpaceUsageIsReported<IntegerAttribute>(Config(BasicType::INT32, CollectionType::SINGLE)));
+ TEST_DO(requireThatAddressSpaceUsageIsReported<IntegerAttribute>(Config(BasicType::INT32, CollectionType::ARRAY)));
+ TEST_DO(requireThatAddressSpaceUsageIsReported<FloatingPointAttribute>(Config(BasicType::FLOAT, CollectionType::SINGLE)));
+ TEST_DO(requireThatAddressSpaceUsageIsReported<FloatingPointAttribute>(Config(BasicType::FLOAT, CollectionType::ARRAY)));
+ TEST_DO(requireThatAddressSpaceUsageIsReported<StringAttribute>(Config(BasicType::STRING, CollectionType::SINGLE)));
+ TEST_DO(requireThatAddressSpaceUsageIsReported<StringAttribute>(Config(BasicType::STRING, CollectionType::ARRAY)));
+}
+
+int AttributeTest::Main()
+{
+ TEST_INIT("attribute_test");
+
+ if (_argc > 0) {
+ DummyFileHeaderContext::setCreator(_argv[0]);
+ }
+ testBaseName();
+ testReload();
+ testHasLoadData();
+ testMemorySaver();
+
+ testSingle();
+ testArray();
+ testWeightedSet();
+ testArithmeticValueUpdate();
+ testArithmeticWithUndefinedValue();
+ testMapValueUpdate();
+ testStatus();
+ testNullProtection();
+ testGeneration();
+ testCreateSerialNum();
+ TEST_DO(testCompactLidSpace());
+ TEST_DO(requireThatAddressSpaceUsageIsReported());
+
+ TEST_DONE();
+}
+
+}
+
+
+TEST_APPHOOK(search::AttributeTest);
diff --git a/searchlib/src/tests/attribute/attribute_test.sh b/searchlib/src/tests/attribute/attribute_test.sh
new file mode 100644
index 00000000000..89c52129b74
--- /dev/null
+++ b/searchlib/src/tests/attribute/attribute_test.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+$VALGRIND ./searchlib_attribute_test_app
+rm -rf *.dat
+rm -rf *.idx
+rm -rf *.weight
+rm -rf clstmp
+rm -rf alstmp
diff --git a/searchlib/src/tests/attribute/attributebenchmark.cpp b/searchlib/src/tests/attribute/attributebenchmark.cpp
new file mode 100644
index 00000000000..88446ef71f7
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributebenchmark.cpp
@@ -0,0 +1,678 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "attributesearcher.h"
+#include "attributeupdater.h"
+#include <vespa/searchlib/util/randomgenerator.h>
+#include "runnable.h"
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/attribute/attrvector.h>
+#include <vespa/vespalib/util/sync.h>
+#include <iostream>
+#include <fstream>
+#include <vespa/log/log.h>
+
+LOG_SETUP("attributebenchmark");
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+
+using vespalib::Monitor;
+using vespalib::MonitorGuard;
+using std::shared_ptr;
+
+typedef std::vector<uint32_t> NumVector;
+typedef std::vector<vespalib::string> StringVector;
+typedef AttributeVector::SP AttributePtr;
+typedef AttributeVector::DocId DocId;
+typedef search::attribute::Config AttrConfig;
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
+
+namespace search {
+
+class AttributeBenchmark : public FastOS_Application
+{
+private:
+ class Config {
+ public:
+ vespalib::string _attribute;
+ uint32_t _numDocs;
+ uint32_t _numUpdates;
+ uint32_t _numValues;
+ uint32_t _numSearchers;
+ uint32_t _numQueries;
+ bool _searchersOnly;
+ bool _validate;
+ uint32_t _populateRuns;
+ uint32_t _updateRuns;
+ uint32_t _commitFreq;
+ uint32_t _minValueCount;
+ uint32_t _maxValueCount;
+ uint32_t _minStringLen;
+ uint32_t _maxStringLen;
+ uint32_t _seed;
+ bool _writeAttribute;
+ int64_t _rangeStart;
+ int64_t _rangeEnd;
+ int64_t _rangeDelta;
+ bool _rangeSearch;
+ uint32_t _prefixLength;
+ bool _prefixSearch;
+
+
+ Config() : _attribute(""), _numDocs(0), _numUpdates(0), _numValues(0),
+ _numSearchers(0), _numQueries(0), _searchersOnly(true), _validate(false), _populateRuns(0), _updateRuns(0),
+ _commitFreq(0), _minValueCount(0), _maxValueCount(0), _minStringLen(0), _maxStringLen(0), _seed(0),
+ _writeAttribute(false), _rangeStart(0), _rangeEnd(0), _rangeDelta(0), _rangeSearch(false),
+ _prefixLength(0), _prefixSearch(false) {}
+ void printXML() const;
+ };
+
+ class Resource {
+ private:
+ std::vector<struct rusage> _usages;
+ struct rusage _reset;
+
+ public:
+ Resource() : _usages(), _reset() { reset(); };
+ void reset() {
+ getrusage(0, &_reset);
+ }
+ void saveUsage() {
+ struct rusage now;
+ getrusage(0, &now);
+ struct rusage usage = computeDifference(_reset, now);
+ _usages.push_back(usage);
+ }
+ void printLastXML(uint32_t opCount) {
+ (void) opCount;
+ struct rusage & usage = _usages.back();
+ std::cout << "<ru_utime>" << usage.ru_utime.tv_sec * 1000 + usage.ru_utime.tv_usec / 1000
+ << "</ru_utime>" << std::endl;
+ std::cout << "<ru_stime>" << usage.ru_stime.tv_sec * 1000 + usage.ru_stime.tv_usec / 1000
+ << "</ru_stime>" << std::endl;
+ std::cout << "<ru_nvcsw>" << usage.ru_nvcsw << "</ru_nvcsw>" << std::endl;
+ std::cout << "<ru_nivcsw>" << usage.ru_nivcsw << "</ru_nivcsw>" << std::endl;
+ }
+ static struct rusage computeDifference(struct rusage & first, struct rusage & second);
+ };
+
+ FastOS_ThreadPool * _threadPool;
+ Config _config;
+ RandomGenerator _rndGen;
+
+ void init(const Config & config);
+ void usage();
+
+ // benchmark helper methods
+ void addDocs(const AttributePtr & ptr, uint32_t numDocs);
+ template <typename Vector, typename T, typename BT>
+ void benchmarkPopulate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id);
+ template <typename Vector, typename T, typename BT>
+ void benchmarkUpdate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id);
+
+ template <typename T>
+ std::vector<vespalib::string> prepareForPrefixSearch(const std::vector<T> & values) const;
+ template <typename T>
+ void benchmarkSearch(const AttributePtr & ptr, const std::vector<T> & values);
+ template <typename Vector, typename T, typename BT>
+ void benchmarkSearchWithUpdater(const AttributePtr & ptr,
+ const std::vector<T> & values);
+
+ template <typename Vector, typename T, typename BT>
+ void benchmarkAttribute(const AttributePtr & ptr, const std::vector<T> & values);
+
+ // Numeric Attribute
+ void benchmarkNumeric(const AttributePtr & ptr);
+
+ // String Attribute
+ void benchmarkString(const AttributePtr & ptr);
+
+
+public:
+ AttributeBenchmark() : _threadPool(NULL), _config(), _rndGen() {}
+ ~AttributeBenchmark() {
+ if (_threadPool != NULL) {
+ delete _threadPool;
+ }
+ }
+ int Main();
+};
+
+
+void
+AttributeBenchmark::Config::printXML() const
+{
+ std::cout << "<config>" << std::endl;
+ std::cout << "<attribute>" << _attribute << "</attribute>" << std::endl;
+ std::cout << "<num-docs>" << _numDocs << "</num-docs>" << std::endl;
+ std::cout << "<num-updates>" << _numUpdates << "</num-updates>" << std::endl;
+ std::cout << "<num-values>" << _numValues << "</num-values>" << std::endl;
+ std::cout << "<num-searchers>" << _numSearchers << "</num-searchers>" << std::endl;
+ std::cout << "<num-queries>" << _numQueries << "</num-queries>" << std::endl;
+ std::cout << "<searchers-only>" << (_searchersOnly ? "true" : "false") << "</searchers-only>" << std::endl;
+ std::cout << "<validate>" << (_validate ? "true" : "false") << "</validate>" << std::endl;
+ std::cout << "<populate-runs>" << _populateRuns << "</populate-runs>" << std::endl;
+ std::cout << "<update-runs>" << _updateRuns << "</update-runs>" << std::endl;
+ std::cout << "<commit-freq>" << _commitFreq << "</commit-freq>" << std::endl;
+ std::cout << "<min-value-count>" << _minValueCount << "</min-value-count>" << std::endl;
+ std::cout << "<max-value-count>" << _maxValueCount << "</max-value-count>" << std::endl;
+ std::cout << "<min-string-len>" << _minStringLen << "</min-string-len>" << std::endl;
+ std::cout << "<max-string-len>" << _maxStringLen << "</max-string-len>" << std::endl;
+ std::cout << "<seed>" << _seed << "</seed>" << std::endl;
+ std::cout << "<range-start>" << _rangeStart << "</range-start>" << std::endl;
+ std::cout << "<range-end>" << _rangeEnd << "</range-end>" << std::endl;
+ std::cout << "<range-delta>" << _rangeDelta << "</range-delta>" << std::endl;
+ std::cout << "<range-search>" << (_rangeSearch ? "true" : "false") << "</range-search>" << std::endl;
+ std::cout << "<prefix-length>" << _prefixLength << "</range-length>" << std::endl;
+ std::cout << "<prefix-search>" << (_prefixSearch ? "true" : "false") << "</prefix-search>" << std::endl;
+ std::cout << "</config>" << std::endl;
+}
+
+void
+AttributeBenchmark::init(const Config & config)
+{
+ _config = config;
+ _rndGen.srand(_config._seed);
+}
+
+
+//-----------------------------------------------------------------------------
+// Benchmark helper methods
+//-----------------------------------------------------------------------------
+void
+AttributeBenchmark::addDocs(const AttributePtr & ptr, uint32_t numDocs)
+{
+ DocId startDoc;
+ DocId lastDoc;
+ bool success = ptr->addDocs(startDoc, lastDoc, numDocs);
+ assert(success);
+ (void) success;
+ assert(startDoc == 0);
+ assert(lastDoc + 1 == numDocs);
+ assert(ptr->getNumDocs() == numDocs);
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeBenchmark::benchmarkPopulate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id)
+{
+ std::cout << "<!-- Populate " << _config._numDocs << " documents -->" << std::endl;
+ AttributeUpdater<Vector, T, BT>
+ updater(ptr, values, _rndGen, _config._validate, _config._commitFreq,
+ _config._minValueCount, _config._maxValueCount);
+ updater.populate();
+ std::cout << "<populate id='" << id << "'>" << std::endl;
+ updater.getStatus().printXML();
+ std::cout << "</populate>" << std::endl;
+ if (_config._validate) {
+ std::cout << "<!-- All " << updater.getValidator().getTotalCnt()
+ << " asserts passed -->" << std::endl;
+ }
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeBenchmark::benchmarkUpdate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id)
+{
+ std::cout << "<!-- Apply " << _config._numUpdates << " updates -->" << std::endl;
+ AttributeUpdater<Vector, T, BT>
+ updater(ptr, values, _rndGen, _config._validate, _config._commitFreq,
+ _config._minValueCount, _config._maxValueCount);
+ updater.update(_config._numUpdates);
+ std::cout << "<update id='" << id << "'>" << std::endl;
+ updater.getStatus().printXML();
+ std::cout << "</update>" << std::endl;
+ if (_config._validate) {
+ std::cout << "<!-- All " << updater.getValidator().getTotalCnt()
+ << " asserts passed -->" << std::endl;
+ }
+}
+
+template <typename T>
+std::vector<vespalib::string>
+AttributeBenchmark::prepareForPrefixSearch(const std::vector<T> & values) const
+{
+ (void) values;
+ return std::vector<vespalib::string>();
+}
+
+template <>
+std::vector<vespalib::string>
+AttributeBenchmark::prepareForPrefixSearch(const std::vector<AttributeVector::WeightedString> & values) const
+{
+ std::vector<vespalib::string> retval;
+ retval.reserve(values.size());
+ for (size_t i = 0; i < values.size(); ++i) {
+ retval.push_back(values[i].getValue().substr(0, _config._prefixLength));
+ }
+ return retval;
+}
+
+template <typename T>
+void
+AttributeBenchmark::benchmarkSearch(const AttributePtr & ptr, const std::vector<T> & values)
+{
+ std::vector<AttributeSearcher *> searchers;
+ if (_config._numSearchers > 0) {
+ std::cout << "<!-- Starting " << _config._numSearchers << " searcher threads with "
+ << _config._numQueries << " queries each -->" << std::endl;
+
+ std::vector<vespalib::string> prefixStrings = prepareForPrefixSearch(values);
+
+ for (uint32_t i = 0; i < _config._numSearchers; ++i) {
+ if (_config._rangeSearch) {
+ RangeSpec spec(_config._rangeStart, _config._rangeEnd, _config._rangeDelta);
+ searchers.push_back(new AttributeRangeSearcher(i, ptr, spec, _config._numQueries));
+ } else if (_config._prefixSearch) {
+ searchers.push_back(new AttributePrefixSearcher(i, ptr, prefixStrings, _config._numQueries));
+ } else {
+ searchers.push_back(new AttributeFindSearcher<T>(i, ptr, values, _config._numQueries));
+ }
+ _threadPool->NewThread(searchers.back());
+ }
+
+ for (uint32_t i = 0; i < searchers.size(); ++i) {
+ searchers[i]->join();
+ }
+
+ AttributeSearcherStatus totalStatus;
+ for (uint32_t i = 0; i < searchers.size(); ++i) {
+ std::cout << "<searcher-summary id='" << i << "'>" << std::endl;
+ searchers[i]->getStatus().printXML();
+ std::cout << "</searcher-summary>" << std::endl;
+ totalStatus.merge(searchers[i]->getStatus());
+ delete searchers[i];
+ }
+ std::cout << "<total-searcher-summary>" << std::endl;
+ totalStatus.printXML();
+ std::cout << "</total-searcher-summary>" << std::endl;
+ }
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeBenchmark::benchmarkSearchWithUpdater(const AttributePtr & ptr,
+ const std::vector<T> & values)
+{
+ if (_config._numSearchers > 0) {
+ std::cout << "<!-- Starting 1 updater thread -->" << std::endl;
+ AttributeUpdaterThread<Vector, T, BT>
+ updater(ptr, values, _rndGen, _config._validate, _config._commitFreq,
+ _config._minValueCount, _config._maxValueCount);
+ _threadPool->NewThread(&updater);
+ benchmarkSearch(ptr, values);
+ updater.stop();
+ updater.join();
+ std::cout << "<updater-summary>" << std::endl;
+ updater.getStatus().printXML();
+ std::cout << "</updater-summary>" << std::endl;
+ if (_config._validate) {
+ std::cout << "<!-- All " << updater.getValidator().getTotalCnt()
+ << " asserts passed -->" << std::endl;
+ }
+ }
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeBenchmark::benchmarkAttribute(const AttributePtr & ptr, const std::vector<T> & values)
+{
+ addDocs(ptr, _config._numDocs);
+
+ // populate
+ for (uint32_t i = 0; i < _config._populateRuns; ++i) {
+ benchmarkPopulate<Vector, T, BT>(ptr, values, i);
+ }
+
+ // update
+ if (_config._numUpdates > 0) {
+ for (uint32_t i = 0; i < _config._updateRuns; ++i) {
+ benchmarkUpdate<Vector, T, BT>(ptr, values, i);
+ }
+ }
+
+ // search
+ if (_config._searchersOnly) {
+ benchmarkSearch(ptr, values);
+ } else {
+ benchmarkSearchWithUpdater<Vector, T, BT>(ptr, values);
+ }
+
+ _threadPool->Close();
+}
+
+
+//-----------------------------------------------------------------------------
+// Numeric Attribute
+//-----------------------------------------------------------------------------
+void
+AttributeBenchmark::benchmarkNumeric(const AttributePtr & ptr)
+{
+ NumVector values;
+ if (_config._rangeSearch) {
+ values.reserve(_config._numValues);
+ for (uint32_t i = 0; i < _config._numValues; ++i) {
+ values.push_back(i);
+ }
+ } else {
+ _rndGen.fillRandomIntegers(values, _config._numValues);
+ }
+
+ std::vector<int32_t> weights;
+ _rndGen.fillRandomIntegers(weights, _config._numValues);
+
+ std::vector<AttributeVector::WeightedInt> weightedVector;
+ weightedVector.reserve(values.size());
+ for (size_t i = 0; i < values.size(); ++i) {
+ if (!ptr->hasWeightedSetType()) {
+ weightedVector.push_back(AttributeVector::WeightedInt(values[i]));
+ } else {
+ weightedVector.push_back(AttributeVector::WeightedInt(values[i], weights[i]));
+ }
+ }
+ benchmarkAttribute<IntegerAttribute, AttributeVector::WeightedInt, AttributeVector::WeightedInt>
+ (ptr, weightedVector);
+}
+
+
+//-----------------------------------------------------------------------------
+// String Attribute
+//-----------------------------------------------------------------------------
+void
+AttributeBenchmark::benchmarkString(const AttributePtr & ptr)
+{
+ StringVector strings;
+ _rndGen.fillRandomStrings(strings, _config._numValues, _config._minStringLen, _config._maxStringLen);
+
+ std::vector<int32_t> weights;
+ _rndGen.fillRandomIntegers(weights, _config._numValues);
+
+ std::vector<AttributeVector::WeightedString> weightedVector;
+ weightedVector.reserve(strings.size());
+ for (size_t i = 0; i < strings.size(); ++i) {
+ if (!ptr->hasWeightedSetType()) {
+ weightedVector.push_back(AttributeVector::WeightedString(strings[i]));
+ } else {
+ weightedVector.push_back(AttributeVector::WeightedString(strings[i], weights[i]));
+ }
+ }
+ benchmarkAttribute<StringAttribute, AttributeVector::WeightedString, AttributeVector::WeightedString>
+ (ptr, weightedVector);
+}
+
+
+//-----------------------------------------------------------------------------
+// Resource utilization
+//-----------------------------------------------------------------------------
+struct rusage
+AttributeBenchmark::Resource::computeDifference(struct rusage & first, struct rusage & second)
+{
+ struct rusage result;
+ // utime
+ uint64_t firstutime = first.ru_utime.tv_sec * 1000000 + first.ru_utime.tv_usec;
+ uint64_t secondutime = second.ru_utime.tv_sec * 1000000 + second.ru_utime.tv_usec;
+ uint64_t resultutime = secondutime - firstutime;
+ result.ru_utime.tv_sec = resultutime / 1000000;
+ result.ru_utime.tv_usec = resultutime % 1000000;
+
+ // stime
+ uint64_t firststime = first.ru_stime.tv_sec * 1000000 + first.ru_stime.tv_usec;
+ uint64_t secondstime = second.ru_stime.tv_sec * 1000000 + second.ru_stime.tv_usec;
+ uint64_t resultstime = secondstime - firststime;
+ result.ru_stime.tv_sec = resultstime / 1000000;
+ result.ru_stime.tv_usec = resultstime % 1000000;
+
+ result.ru_maxrss = second.ru_maxrss; // - first.ru_maxrss;
+ result.ru_ixrss = second.ru_ixrss; // - first.ru_ixrss;
+ result.ru_idrss = second.ru_idrss; // - first.ru_idrss;
+ result.ru_isrss = second.ru_isrss; // - first.ru_isrss;
+ result.ru_minflt = second.ru_minflt - first.ru_minflt;
+ result.ru_majflt = second.ru_majflt - first.ru_majflt;
+ result.ru_nswap = second.ru_nswap - first.ru_nswap;
+ result.ru_inblock = second.ru_inblock - first.ru_inblock;
+ result.ru_oublock = second.ru_oublock - first.ru_oublock;
+ result.ru_msgsnd = second.ru_msgsnd - first.ru_msgsnd;
+ result.ru_msgrcv = second.ru_msgrcv - first.ru_msgrcv;
+ result.ru_nsignals = second.ru_nsignals - first.ru_nsignals;
+ result.ru_nvcsw = second.ru_nvcsw - first.ru_nvcsw;
+ result.ru_nivcsw = second.ru_nivcsw - first.ru_nivcsw;
+
+ return result;
+}
+
+
+void
+AttributeBenchmark::usage()
+{
+ std::cout << "usage: attributebenchmark [-n numDocs] [-u numUpdates] [-v numValues]" << std::endl;
+ std::cout << " [-s numSearchers] [-q numQueries] [-p populateRuns] [-r updateRuns]" << std::endl;
+ std::cout << " [-c commitFrequency] [-l minValueCount] [-h maxValueCount]" << std::endl;
+ std::cout << " [-i minStringLen] [-a maxStringLen] [-e seed]" << std::endl;
+ std::cout << " [-S rangeStart] [-E rangeEnd] [-D rangeDelta] [-L prefixLength]" << std::endl;
+ std::cout << " [-b (searchers with updater)] [-R (range search)] [-P (prefix search)]" << std::endl;
+ std::cout << " [-t (validate updates)] [-w (write attribute to disk)]" << std::endl;
+ std::cout << " <attribute>" << std::endl;
+ std::cout << " <attribute> : s-uint32, a-uint32, ws-uint32" << std::endl;
+ std::cout << " s-fa-uint32, a-fa-uint32, ws-fa-uint32" << std::endl;
+ std::cout << " s-fs-uint32, a-fs-uint32, ws-fs-uint32 ws-frs-uint32" << std::endl;
+ std::cout << " s-string, a-string, ws-string" << std::endl;
+ std::cout << " s-fs-string, a-fs-string, ws-fs-string ws-frs-string" << std::endl;
+}
+
+int
+AttributeBenchmark::Main()
+{
+ Config dc;
+ dc._numDocs = 50000;
+ dc._numUpdates = 50000;
+ dc._numValues = 1000;
+ dc._numSearchers = 0;
+ dc._numQueries = 1000;
+ dc._searchersOnly = true;
+ dc._validate = false;
+ dc._populateRuns = 1;
+ dc._updateRuns = 1;
+ dc._commitFreq = 1000;
+ dc._minValueCount = 0;
+ dc._maxValueCount = 20;
+ dc._minStringLen = 1;
+ dc._maxStringLen = 50;
+ dc._seed = 555;
+ dc._writeAttribute = false;
+ dc._rangeStart = 0;
+ dc._rangeEnd = 1000;
+ dc._rangeDelta = 10;
+ dc._rangeSearch = false;
+ dc._prefixLength = 2;
+ dc._prefixSearch = false;
+
+ int idx = 1;
+ char opt;
+ const char * arg;
+ bool optError = false;
+ while ((opt = GetOpt("n:u:v:s:q:p:r:c:l:h:i:a:e:S:E:D:L:bRPtw", arg, idx)) != -1) {
+ switch (opt) {
+ case 'n':
+ dc._numDocs = atoi(arg);
+ break;
+ case 'u':
+ dc._numUpdates = atoi(arg);
+ break;
+ case 'v':
+ dc._numValues = atoi(arg);
+ break;
+ case 's':
+ dc._numSearchers = atoi(arg);
+ break;
+ case 'q':
+ dc._numQueries = atoi(arg);
+ break;
+ case 'p':
+ dc._populateRuns = atoi(arg);
+ break;
+ case 'r':
+ dc._updateRuns = atoi(arg);
+ break;
+ case 'c':
+ dc._commitFreq = atoi(arg);
+ break;
+ case 'l':
+ dc._minValueCount = atoi(arg);
+ break;
+ case 'h':
+ dc._maxValueCount = atoi(arg);
+ break;
+ case 'i':
+ dc._minStringLen = atoi(arg);
+ break;
+ case 'a':
+ dc._maxStringLen = atoi(arg);
+ break;
+ case 'e':
+ dc._seed = atoi(arg);
+ break;
+ case 'S':
+ dc._rangeStart = strtoll(arg, NULL, 10);
+ break;
+ case 'E':
+ dc._rangeEnd = strtoll(arg, NULL, 10);
+ break;
+ case 'D':
+ dc._rangeDelta = strtoll(arg, NULL, 10);
+ break;
+ case 'L':
+ dc._prefixLength = atoi(arg);
+ break;
+ case 'b':
+ dc._searchersOnly = false;
+ break;
+ case 'R':
+ dc._rangeSearch = true;
+ break;
+ case 'P':
+ dc._prefixSearch = true;
+ break;
+ case 't':
+ dc._validate = true;
+ break;
+ case 'w':
+ dc._writeAttribute = true;
+ break;
+ default:
+ optError = true;
+ break;
+ }
+ }
+
+ if (_argc != (idx + 1) || optError) {
+ usage();
+ return -1;
+ }
+
+ dc._attribute = vespalib::string(_argv[idx]);
+
+ _threadPool = new FastOS_ThreadPool(256000);
+
+ std::cout << "<attribute-benchmark>" << std::endl;
+ init(dc);
+ _config.printXML();
+
+ AttributePtr ptr;
+
+ if (_config._attribute == "s-int32") {
+ std::cout << "<!-- Benchmark SingleValueNumericAttribute<int32_t> -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("s-int32", AttrConfig(BasicType::INT32, CollectionType::SINGLE));
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "a-int32") {
+ std::cout << "<!-- Benchmark MultiValueNumericAttribute<int32_t> (array) -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("a-int32", AttrConfig(BasicType::INT32, CollectionType::ARRAY));
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "ws-int32") {
+ std::cout << "<!-- Benchmark MultiValueNumericAttribute<int32_t> (wset) -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("ws-int32", AttrConfig(BasicType::INT32, CollectionType::WSET));
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "s-fs-int32") {
+ std::cout << "<!-- Benchmark SingleValueNumericPostingAttribute<int32_t> -->" << std::endl;
+ AttrConfig cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("s-fs-int32", cfg);
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "a-fs-int32") {
+ std::cout << "<!-- Benchmark MultiValueNumericPostingAttribute<int32_t> (array) -->" << std::endl;
+ AttrConfig cfg(BasicType::INT32, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("a-fs-int32", cfg);
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "ws-fs-int32") {
+ std::cout << "<!-- Benchmark MultiValueNumericPostingAttribute<int32_t> (wset) -->" << std::endl;
+ AttrConfig cfg(BasicType::INT32, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("ws-fs-int32", cfg);
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "s-string") {
+ std::cout << "<!-- Benchmark SingleValueStringAttribute -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("s-string", AttrConfig(BasicType::STRING, CollectionType::SINGLE));
+ benchmarkString(ptr);
+
+ } else if (_config._attribute == "a-string") {
+ std::cout << "<!-- Benchmark ArrayStringAttribute (array) -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("a-string", AttrConfig(BasicType::STRING, CollectionType::ARRAY));
+ benchmarkString(ptr);
+
+ } else if (_config._attribute == "ws-string") {
+ std::cout << "<!-- Benchmark WeightedSetStringAttribute (wset) -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("ws-string", AttrConfig(BasicType::STRING, CollectionType::WSET));
+ benchmarkString(ptr);
+
+ } else if (_config._attribute == "s-fs-string") {
+ std::cout << "<!-- Benchmark SingleValueStringPostingAttribute (single fast search) -->" << std::endl;
+ AttrConfig cfg(BasicType::STRING, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("s-fs-string", cfg);
+ benchmarkString(ptr);
+
+ } else if (_config._attribute == "a-fs-string") {
+ std::cout << "<!-- Benchmark ArrayStringPostingAttribute (array fast search) -->" << std::endl;
+ AttrConfig cfg(BasicType::STRING, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("a-fs-string", cfg);
+ benchmarkString(ptr);
+
+ } else if (_config._attribute == "ws-fs-string") {
+ std::cout << "<!-- Benchmark WeightedSetStringPostingAttribute (wset fast search) -->" << std::endl;
+ AttrConfig cfg(BasicType::STRING, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("ws-fs-string", cfg);
+ benchmarkString(ptr);
+
+ }
+
+ if (dc._writeAttribute) {
+ std::cout << "<!-- Writing attribute to disk -->" << std::endl;
+ ptr->saveAs(ptr->getBaseFileName());
+ }
+
+ std::cout << "</attribute-benchmark>" << std::endl;
+
+ return 0;
+}
+}
+
+int main(int argc, char ** argv)
+{
+ search::AttributeBenchmark myapp;
+ return myapp.Entry(argc, argv);
+}
+
diff --git a/searchlib/src/tests/attribute/attributebenchmark.rb b/searchlib/src/tests/attribute/attributebenchmark.rb
new file mode 100644
index 00000000000..44b08ec4389
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributebenchmark.rb
@@ -0,0 +1,22 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vectors = ["sv-num-new", "mv-num-new", "sv-string-new", "mv-string-new"]#, "sv-num-old", "mv-num-old", "sv-string-old", "mv-string-old"]
+num_docs = [500000, 1000000, 2000000, 4000000, 8000000, 16000000]
+unique_percent = [0.001, 0.01, 0.05, 0.20, 0.50]
+
+vectors.each do |vector|
+ num_docs.each do |num|
+ unique_percent.each do |percent|
+ unique = num * percent
+ command = "./attributebenchmark -n #{num} -u 1000000 -v #{unique} -p 2 -r 1 -s 1 -q 1000 #{vector} > 03-27-full/#{vector}-n#{num}-v#{unique}-p2-r1-s1-q1000.log 2>&1"
+ puts command
+ `#{command}`
+ s = 1
+ 5.times do
+ command = "./attributebenchmark -n #{num} -v #{unique} -p 1 -r 0 -s #{s} -q 100 -b #{vector} > 03-27-full/#{vector}-n#{num}-v#{unique}-s#{s}-q100-b.log 2>&1"
+ puts command
+ `#{command}`
+ s = s*2;
+ end
+ end
+ end
+end
diff --git a/searchlib/src/tests/attribute/attributefilewriter/.gitignore b/searchlib/src/tests/attribute/attributefilewriter/.gitignore
new file mode 100644
index 00000000000..ea6a0e03bf2
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributefilewriter/.gitignore
@@ -0,0 +1 @@
+searchlib_attributefilewriter_test_app
diff --git a/searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt b/searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt
new file mode 100644
index 00000000000..a1d859bbfb9
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_attributefilewriter_test_app
+ SOURCES
+ attributefilewriter_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attributefilewriter_test_app COMMAND searchlib_attributefilewriter_test_app)
diff --git a/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp
new file mode 100644
index 00000000000..acf61cd58bb
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp
@@ -0,0 +1,116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("attributefilewriter_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/searchlib/attribute/attributefilewriter.h>
+#include <vespa/searchlib/attribute/attributefilebufferwriter.h>
+#include <vespa/searchlib/util/fileutil.h>
+#include <vespa/searchlib/util/rand48.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include <vespa/searchlib/common/fileheadercontext.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+
+using search::index::DummyFileHeaderContext;
+
+namespace search
+{
+
+namespace
+{
+
+vespalib::string testFileName("test.dat");
+vespalib::string hello("Hello world");
+
+void removeTestFile() { FastOS_File::Delete(testFileName.c_str()); }
+
+struct Fixture {
+ TuneFileAttributes _tuneFileAttributes;
+ DummyFileHeaderContext _fileHeaderContext;
+ IAttributeSaveTarget::Config _cfg;
+ const vespalib::string _desc;
+ AttributeFileWriter _writer;
+
+ Fixture()
+ : _tuneFileAttributes(),
+ _fileHeaderContext(),
+ _cfg(),
+ _desc("Attribute file sample description"),
+ _writer(_tuneFileAttributes,
+ _fileHeaderContext,
+ _cfg,
+ _desc)
+ {
+ removeTestFile();
+ }
+
+ ~Fixture() {
+ removeTestFile();
+ }
+
+};
+
+}
+
+
+TEST_F("Test that we can write empty attribute file", Fixture)
+{
+ EXPECT_TRUE(f._writer.open(testFileName));
+ f._writer.close();
+ FileUtil::LoadedBuffer::UP loaded(FileUtil::loadFile(testFileName));
+ EXPECT_EQUAL(0u, loaded->size());
+}
+
+
+TEST_F("Test that we destroy writer without calling close", Fixture)
+{
+ EXPECT_TRUE(f._writer.open(testFileName));
+}
+
+
+TEST_F("Test that buffer writer passes on written data", Fixture)
+{
+ std::vector<int> a;
+ const size_t mysize = 3000000;
+ const size_t writerBufferSize = AttributeFileBufferWriter::BUFFER_SIZE;
+ EXPECT_GREATER(mysize * sizeof(int), writerBufferSize);
+ a.reserve(mysize);
+ search::Rand48 rnd;
+ for (uint32_t i = 0; i < mysize; ++i) {
+ a.emplace_back(rnd.lrand48());
+ }
+ EXPECT_TRUE(f._writer.open(testFileName));
+ std::unique_ptr<BufferWriter> writer(f._writer.allocBufferWriter());
+ writer->write(&a[0], a.size() * sizeof(int));
+ writer->flush();
+ writer.reset();
+ f._writer.close();
+ FileUtil::LoadedBuffer::UP loaded(FileUtil::loadFile(testFileName));
+ EXPECT_EQUAL(a.size() * sizeof(int), loaded->size());
+ EXPECT_TRUE(memcmp(&a[0], loaded->buffer(), loaded->size()) == 0);
+}
+
+
+TEST_F("Test that we can pass buffer directly", Fixture)
+{
+ using Buffer = IAttributeFileWriter::Buffer;
+ Buffer buf = f._writer.allocBuf(hello.size());
+ buf->writeBytes(hello.c_str(), hello.size());
+ EXPECT_TRUE(f._writer.open(testFileName));
+ f._writer.writeBuf(std::move(buf));
+ f._writer.close();
+ FileUtil::LoadedBuffer::UP loaded(FileUtil::loadFile(testFileName));
+ EXPECT_EQUAL(hello.size(), loaded->size());
+ EXPECT_TRUE(memcmp(hello.c_str(), loaded->buffer(), loaded->size()) == 0);
+}
+
+
+}
+
+
+TEST_MAIN()
+{
+ TEST_RUN_ALL();
+}
diff --git a/searchlib/src/tests/attribute/attributeguard.cpp b/searchlib/src/tests/attribute/attributeguard.cpp
new file mode 100644
index 00000000000..5c90caa094b
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributeguard.cpp
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("attributeguard_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+
+namespace search {
+
+class AttributeGuardTest : public vespalib::TestApp
+{
+public:
+ int Main();
+};
+
+int
+AttributeGuardTest::Main()
+{
+ TEST_INIT("attributeguard_test");
+
+
+ AttributeVector::SP ssattr(new SingleStringExtAttribute("ss1"));
+ AttributeEnumGuard guard(ssattr);
+ EXPECT_TRUE(guard.valid());
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::AttributeGuardTest);
diff --git a/searchlib/src/tests/attribute/attributeguard_test.sh b/searchlib/src/tests/attribute/attributeguard_test.sh
new file mode 100644
index 00000000000..6a9557e7da7
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributeguard_test.sh
@@ -0,0 +1,7 @@
+#!/bin/bahs
+$VALGRIND ./searchlib_attributeguard_test_app
+rm -rf *.dat
+rm -rf *.idx
+rm -rf *.weight
+rm -rf clstmp
+rm -rf alstmp
diff --git a/searchlib/src/tests/attribute/attributemanager/.gitignore b/searchlib/src/tests/attribute/attributemanager/.gitignore
new file mode 100644
index 00000000000..6fa89f09572
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributemanager/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+attributemanager_test
+searchlib_attributemanager_test_app
diff --git a/searchlib/src/tests/attribute/attributemanager/CMakeLists.txt b/searchlib/src/tests/attribute/attributemanager/CMakeLists.txt
new file mode 100644
index 00000000000..ed3eeee1065
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributemanager/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_attributemanager_test_app
+ SOURCES
+ attributemanager_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attributemanager_test_app COMMAND searchlib_attributemanager_test_app)
diff --git a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp
new file mode 100644
index 00000000000..bf247668843
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp
@@ -0,0 +1,422 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("attribute_test");
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributemanager.h>
+#include <vespa/searchlib/attribute/configconverter.h>
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/multinumericattribute.hpp>
+#include <vespa/searchlib/attribute/stringattribute.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <algorithm>
+
+using namespace config;
+using namespace vespa::config::search;
+using namespace search;
+using namespace search::attribute;
+using vespalib::tensor::TensorType;
+using std::shared_ptr;
+
+typedef BasicType BT;
+typedef CollectionType CT;
+typedef AttributeVector::SP AVSP;
+
+namespace search {
+
+class AttributeManagerTest : public vespalib::TestApp
+{
+private:
+ void verifyLoad(AttributeVector & v);
+ void testLoad();
+ void testGuards();
+ void testConfigConvert();
+ void testContext();
+
+ bool
+ assertDataType(BT::Type exp,
+ AttributesConfig::Attribute::Datatype in);
+
+ bool
+ assertCollectionType(CollectionType exp,
+ AttributesConfig::Attribute::Collectiontype in,
+ bool removeIfZ = false,
+ bool createIfNe = false);
+
+public:
+ AttributeManagerTest()
+ {
+ }
+ int Main();
+};
+
+
+typedef MultiValueNumericAttribute< IntegerAttributeTemplate<int32_t>,
+ multivalue::MVMTemplateArg<
+ multivalue::Value<int32_t>, multivalue::Index32> >
+TestAttributeBase;
+
+class TestAttribute : public TestAttributeBase
+{
+public:
+ TestAttribute(const std::string &name)
+ :
+ TestAttributeBase(name)
+ {
+ }
+
+ generation_t
+ getGen() const
+ {
+ return getCurrentGeneration();
+ }
+
+ uint32_t
+ getRefCount(generation_t gen) const
+ {
+ return getGenerationRefCount(gen);
+ }
+
+ void
+ incGen()
+ {
+ incGeneration();
+ }
+
+ void
+ updateFirstUsedGen(void)
+ {
+ updateFirstUsedGeneration();
+ }
+
+ generation_t
+ getFirstUsedGen() const
+ {
+ return getFirstUsedGeneration();
+ }
+};
+
+
+void
+AttributeManagerTest::testGuards()
+{
+ AttributeVector::SP vec(new TestAttribute("mvint") );
+ TestAttribute * v = static_cast<TestAttribute *> (vec.get());
+ EXPECT_EQUAL(v->getGen(), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0));
+ {
+ AttributeGuard g0(vec);
+ EXPECT_EQUAL(v->getGen(), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(1));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0));
+ {
+ AttributeGuard g1(vec);
+ EXPECT_EQUAL(v->getGen(), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(2));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0));
+ }
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(1));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0));
+ }
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0));
+
+ v->incGen();
+ EXPECT_EQUAL(v->getGen(), unsigned(1));
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(1), unsigned(0));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1));
+ {
+ AttributeGuard g0(vec);
+ EXPECT_EQUAL(v->getGen(), unsigned(1));
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(1), unsigned(1));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1));
+ {
+ v->incGen();
+ AttributeGuard g1(vec);
+ EXPECT_EQUAL(v->getGen(), unsigned(2));
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(1), unsigned(1));
+ EXPECT_EQUAL(v->getRefCount(2), unsigned(1));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1));
+ }
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(1), unsigned(1));
+ EXPECT_EQUAL(v->getRefCount(2), unsigned(0));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1));
+ }
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(1), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(2), unsigned(0));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1));
+ v->updateFirstUsedGeneration();
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(2));
+ EXPECT_EQUAL(v->getGen(), unsigned(2));
+}
+
+
+void
+AttributeManagerTest::verifyLoad(AttributeVector & v)
+{
+ EXPECT_TRUE( !v.isLoaded() );
+ EXPECT_TRUE( v.load() );
+ EXPECT_TRUE( v.isLoaded() );
+ EXPECT_EQUAL( v.getNumDocs(), size_t(100) );
+}
+
+
+void
+AttributeManagerTest::testLoad()
+{
+ {
+ TestAttributeBase v("mvint");
+ EXPECT_TRUE(!v.isLoaded());
+ for(size_t i(0); i < 100; i++) {
+ AttributeVector::DocId doc;
+ EXPECT_TRUE( v.addDoc(doc) );
+ EXPECT_TRUE( doc == i);
+ }
+ EXPECT_TRUE( v.getNumDocs() == 100);
+ for(size_t i(0); i < 100; i++) {
+ for(size_t j(0); j < i; j++) {
+ EXPECT_TRUE( v.append(i, j, 1) );
+ }
+ v.commit();
+ EXPECT_TRUE(size_t(v.getValueCount(i)) == i);
+ EXPECT_EQUAL(v.getMaxValueCount(), std::max(size_t(1), i));
+ }
+ EXPECT_TRUE(v.isLoaded());
+ EXPECT_TRUE(v.save());
+ EXPECT_TRUE(v.isLoaded());
+ }
+ {
+ TestAttributeBase v("mvint");
+ verifyLoad(v);
+ }
+ {
+ AttributeVector::Config config(BT::INT32,
+ CollectionType::ARRAY);
+ TestAttributeBase v("mvint", config);
+ verifyLoad(v);
+ }
+ {
+ AttributeManager manager;
+ AttributeVector::Config config(BT::INT32,
+ CollectionType::ARRAY);
+ EXPECT_TRUE(manager.addVector("mvint", config));
+ AttributeManager::AttributeList list;
+ manager.getAttributeList(list);
+ EXPECT_TRUE(list.size() == 1);
+ EXPECT_TRUE( list[0]->isLoaded());
+ AttributeGuard::UP attrG(manager.getAttribute("mvint"));
+ EXPECT_TRUE( attrG->valid() );
+ }
+}
+
+
+bool
+AttributeManagerTest::assertDataType(BT::Type exp,
+ AttributesConfig::Attribute::Datatype in)
+{
+ AttributesConfig::Attribute a;
+ a.datatype = in;
+ return EXPECT_EQUAL(exp, ConfigConverter::convert(a).basicType().type());
+}
+
+
+bool
+AttributeManagerTest::
+assertCollectionType(CollectionType exp,
+ AttributesConfig::Attribute::Collectiontype in,
+ bool removeIfZ,
+ bool createIfNe)
+{
+ AttributesConfig::Attribute a;
+ a.collectiontype = in;
+ a.removeifzero = removeIfZ;
+ a.createifnonexistent = createIfNe;
+ AttributeVector::Config out = ConfigConverter::convert(a);
+ return EXPECT_EQUAL(exp.type(), out.collectionType().type()) &&
+ EXPECT_EQUAL(exp.removeIfZero(), out.collectionType().removeIfZero()) &&
+ EXPECT_EQUAL(exp.createIfNonExistant(),
+ out.collectionType().createIfNonExistant());
+}
+
+
+void
+AttributeManagerTest::testConfigConvert()
+{
+ // typedef AttributeVector::Config AVC;
+ typedef BT AVBT;
+ typedef CollectionType AVCT;
+ typedef AttributesConfig::Attribute CACA;
+ typedef ConfigConverter CC;
+
+ EXPECT_TRUE(assertDataType(AVBT::STRING, CACA::STRING));
+ EXPECT_TRUE(assertDataType(AVBT::INT8, CACA::INT8));
+ EXPECT_TRUE(assertDataType(AVBT::INT16, CACA::INT16));
+ EXPECT_TRUE(assertDataType(AVBT::INT32, CACA::INT32));
+ EXPECT_TRUE(assertDataType(AVBT::INT64, CACA::INT64));
+ EXPECT_TRUE(assertDataType(AVBT::FLOAT, CACA::FLOAT));
+ EXPECT_TRUE(assertDataType(AVBT::DOUBLE, CACA::DOUBLE));
+ EXPECT_TRUE(assertDataType(AVBT::PREDICATE, CACA::PREDICATE));
+ EXPECT_TRUE(assertDataType(AVBT::TENSOR, CACA::TENSOR));
+ EXPECT_TRUE(assertDataType(AVBT::NONE, CACA::NONE));
+
+ EXPECT_TRUE(assertCollectionType(AVCT::SINGLE, CACA::SINGLE));
+ EXPECT_TRUE(assertCollectionType(AVCT::ARRAY, CACA::ARRAY));
+ EXPECT_TRUE(assertCollectionType(AVCT::WSET, CACA::WEIGHTEDSET));
+ EXPECT_TRUE(assertCollectionType(AVCT(AVCT::SINGLE, true, false),
+ CACA::SINGLE, true, false));
+ EXPECT_TRUE(assertCollectionType(AVCT(AVCT::SINGLE, false, true),
+ CACA::SINGLE, false, true));
+
+ { // fastsearch
+ CACA a;
+ EXPECT_TRUE(!CC::convert(a).fastSearch());
+ a.fastsearch = true;
+ EXPECT_TRUE(CC::convert(a).fastSearch());
+ }
+ { // huge
+ CACA a;
+ EXPECT_TRUE(!CC::convert(a).huge());
+ a.huge = true;
+ EXPECT_TRUE(CC::convert(a).huge());
+ }
+ { // fastAccess
+ CACA a;
+ EXPECT_TRUE(!CC::convert(a).fastAccess());
+ a.fastaccess = true;
+ EXPECT_TRUE(CC::convert(a).fastAccess());
+ }
+ { // tensor
+ CACA a;
+ a.datatype = CACA::TENSOR;
+ a.tensortype = "tensor(x[5])";
+ AttributeVector::Config out = ConfigConverter::convert(a);
+ EXPECT_EQUAL("tensor(x[5])", out.tensorType().toSpec());
+ }
+}
+
+bool gt_attribute(const attribute::IAttributeVector * a, const attribute::IAttributeVector * b) {
+ return a->getName() < b->getName();
+}
+
+void
+AttributeManagerTest::testContext()
+{
+ std::vector<AVSP> attrs;
+ // create various attributes vectors
+ attrs.push_back(AttributeFactory::createAttribute("sint32",
+ Config(BT::INT32, CT::SINGLE)));
+ attrs.push_back(AttributeFactory::createAttribute("aint32",
+ Config(BT::INT32, CT::ARRAY)));
+ attrs.push_back(AttributeFactory::createAttribute("wsint32",
+ Config(BT::INT32, CT::WSET)));
+ attrs.push_back(AttributeFactory::createAttribute("dontcare",
+ Config(BT::INT32, CT::SINGLE)));
+
+ // add docs
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ attrs[i]->addDocs(64);
+ }
+
+ // commit all attributes (current generation -> 1);
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ attrs[i]->commit();
+ }
+
+ AttributeManager manager;
+ // add to manager
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ manager.add(attrs[i]);
+ }
+
+ {
+ IAttributeContext::UP first = manager.createContext();
+
+ // no generation guards taken yet
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u);
+ EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), 0u);
+ }
+
+ for (uint32_t i = 0; i < 2; ++i) {
+ EXPECT_TRUE(first->getAttribute("sint32") != NULL);
+ EXPECT_TRUE(first->getAttribute("aint32") != NULL);
+ EXPECT_TRUE(first->getAttribute("wsint32") != NULL);
+ EXPECT_TRUE(first->getAttributeStableEnum("wsint32") != NULL);
+ }
+ EXPECT_TRUE(first->getAttribute("foo") == NULL);
+ EXPECT_TRUE(first->getAttribute("bar") == NULL);
+
+ // one generation guard taken per attribute asked for
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u);
+ EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u),
+ (i < 3) ? (i == 2 ? 2u : 1u) : 0u);
+ }
+
+ {
+ IAttributeContext::UP second = manager.createContext();
+
+ EXPECT_TRUE(second->getAttribute("sint32") != NULL);
+ EXPECT_TRUE(second->getAttribute("aint32") != NULL);
+ EXPECT_TRUE(second->getAttribute("wsint32") != NULL);
+ EXPECT_TRUE(second->getAttributeStableEnum("wsint32") != NULL);
+
+ // two generation guards taken per attribute asked for
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u);
+ EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u),
+ (i < 3) ? (i == 2 ? 4u : 2u) : 0u);
+ }
+ }
+
+ // one generation guard taken per attribute asked for
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u);
+ EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u),
+ (i < 3) ? (i == 2 ? 2u : 1u) : 0u);
+ }
+ }
+
+ // no generation guards taken
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u);
+ EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), 0u);
+ }
+
+ {
+ IAttributeContext::UP ctx = manager.createContext();
+ std::vector<const attribute::IAttributeVector *> all;
+ ctx->getAttributeList(all);
+ EXPECT_EQUAL(4u, all.size());
+ std::sort(all.begin(), all.end(), gt_attribute);
+ EXPECT_EQUAL("aint32", all[0]->getName());
+ EXPECT_EQUAL("dontcare", all[1]->getName());
+ EXPECT_EQUAL("sint32", all[2]->getName());
+ EXPECT_EQUAL("wsint32", all[3]->getName());
+ }
+}
+
+int AttributeManagerTest::Main()
+{
+ TEST_INIT("attributemanager_test");
+
+ testLoad();
+ testGuards();
+ testConfigConvert();
+ testContext();
+
+ TEST_DONE();
+}
+
+} // namespace search
+
+
+TEST_APPHOOK(search::AttributeManagerTest);
diff --git a/searchlib/src/tests/attribute/attributesearcher.h b/searchlib/src/tests/attribute/attributesearcher.h
new file mode 100644
index 00000000000..7456d22f306
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributesearcher.h
@@ -0,0 +1,265 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "runnable.h"
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/queryeval/hitcollector.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/util/compress.h>
+
+namespace search {
+
+std::unique_ptr<ResultSet>
+performSearch(queryeval::SearchIterator & sb, uint32_t numDocs)
+{
+ queryeval::HitCollector hc(numDocs, numDocs, 0);
+ // assume strict toplevel search object located at start
+ for (sb.seek(1); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) {
+ hc.addHit(sb.getDocId(), 0.0);
+ }
+ return hc.getResultSet();
+}
+
+class AttributeSearcherStatus
+{
+public:
+ double _totalSearchTime;
+ uint64_t _totalHitCount;
+ uint64_t _numQueries;
+ uint64_t _numClients;
+
+ AttributeSearcherStatus() : _totalSearchTime(0), _totalHitCount(0), _numQueries(0), _numClients(0) {}
+ void merge(const AttributeSearcherStatus & status) {
+ _totalSearchTime += status._totalSearchTime;
+ _totalHitCount += status._totalHitCount;
+ _numQueries += status._numQueries;
+ _numClients += status._numClients;
+ }
+ void printXML() const {
+ std::cout << "<total-search-time>" << _totalSearchTime << "</total-search-time>" << std::endl; // ms
+ std::cout << "<avg-search-time>" << avgSearchTime() << "</avg-search-time>" << std::endl; // ms
+ std::cout << "<search-throughput>" << searchThroughout() << "</search-throughput>" << std::endl; // per/sec
+ std::cout << "<total-hit-count>" << _totalHitCount << "</total-hit-count>" << std::endl;
+ std::cout << "<avg-hit-count>" << avgHitCount() << "</avg-hit-count>" << std::endl;
+ }
+ double avgSearchTime() const {
+ return _totalSearchTime / _numQueries;
+ }
+ double searchThroughout() const {
+ return _numClients * 1000 * _numQueries / _totalSearchTime;
+ }
+ double avgHitCount() const {
+ return _totalHitCount / static_cast<double>(_numQueries);
+ }
+};
+
+
+class AttributeSearcher : public Runnable
+{
+protected:
+ typedef AttributeVector::SP AttributePtr;
+
+ const AttributePtr & _attrPtr;
+ FastOS_Time _timer;
+ AttributeSearcherStatus _status;
+
+public:
+ AttributeSearcher(uint32_t id, const AttributePtr & attrPtr) :
+ Runnable(id), _attrPtr(attrPtr), _timer(), _status()
+ {
+ _status._numClients = 1;
+ }
+ virtual void doRun() = 0;
+ AttributeSearcherStatus & getStatus() { return _status; }
+ void buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const char * term, bool prefix = false);
+};
+
+void
+AttributeSearcher::buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const char * term, bool prefix)
+{
+ uint32_t indexLen = index.size();
+ uint32_t termLen = strlen(term);
+ uint32_t termIdx = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM;
+ uint32_t queryPacketSize = vespalib::compress::Integer::compressedPositiveLength(termIdx)
+ + vespalib::compress::Integer::compressedPositiveLength(indexLen)
+ + vespalib::compress::Integer::compressedPositiveLength(termLen)
+ + indexLen + termLen;
+ buffer.resize(queryPacketSize);
+ char * p = &buffer[0];
+ p += vespalib::compress::Integer::compressPositive(termIdx, p);
+ p += vespalib::compress::Integer::compressPositive(indexLen, p);
+ memcpy(p, index.c_str(), indexLen);
+ p += indexLen;
+ p += vespalib::compress::Integer::compressPositive(termLen, p);
+ memcpy(p, term, termLen);
+ p += termLen;
+ assert(p == (&buffer[0] + buffer.size()));
+}
+
+
+template <typename T>
+class AttributeFindSearcher : public AttributeSearcher
+{
+private:
+ const std::vector<T> & _values;
+ std::vector<char> _query;
+
+public:
+ AttributeFindSearcher(uint32_t id, const AttributePtr & attrPtr, const std::vector<T> & values,
+ uint32_t numQueries) :
+ AttributeSearcher(id, attrPtr), _values(values), _query()
+ {
+ _status._numQueries = numQueries;
+ }
+ virtual void doRun();
+};
+
+template <typename T>
+void
+AttributeFindSearcher<T>::doRun()
+{
+ _timer.SetNow();
+ for (uint32_t i = 0; i < _status._numQueries; ++i) {
+ // build simple term query
+ vespalib::asciistream ss;
+ ss << _values[i % _values.size()].getValue();
+ this->buildTermQuery(_query, _attrPtr->getName(), ss.str().c_str());
+
+ AttributeGuard guard(_attrPtr);
+ std::unique_ptr<AttributeVector::SearchContext> searchContext =
+ _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()),
+ AttributeVector::SearchContext::Params());
+
+ searchContext->fetchPostings(true);
+ std::unique_ptr<queryeval::SearchIterator> iterator = searchContext->createIterator(NULL, true);
+ std::unique_ptr<ResultSet> results = performSearch(*iterator, _attrPtr->getNumDocs());
+
+ _status._totalHitCount += results->getNumHits();
+ }
+ _status._totalSearchTime += _timer.MilliSecsToNow();
+}
+
+
+class RangeSpec
+{
+public:
+ int64_t _min;
+ int64_t _max;
+ int64_t _range;
+ RangeSpec(int64_t min, int64_t max, int64_t range) :
+ _min(min), _max(max), _range(range)
+ {
+ assert(_min < _max);
+ assert(_range <= (_max - _min));
+ }
+};
+
+class RangeIterator
+{
+private:
+ RangeSpec _spec;
+ int64_t _a;
+ int64_t _b;
+
+public:
+ RangeIterator(const RangeSpec & spec) : _spec(spec), _a(spec._min), _b(spec._min + _spec._range) {}
+ RangeIterator & operator++() {
+ _a += _spec._range;
+ _b += _spec._range;
+ if (_b > _spec._max) {
+ _a = _spec._min;
+ _b = _spec._min + _spec._range;
+ }
+ return *this;
+ }
+ int64_t a() const { return _a; }
+ int64_t b() const { return _b; }
+};
+
+class AttributeRangeSearcher : public AttributeSearcher
+{
+private:
+ RangeSpec _spec;
+ std::vector<char> _query;
+
+public:
+ AttributeRangeSearcher(uint32_t id, const AttributePtr & attrPtr, const RangeSpec & spec,
+ uint32_t numQueries) :
+ AttributeSearcher(id, attrPtr), _spec(spec), _query()
+ {
+ _status._numQueries = numQueries;
+ }
+ virtual void doRun();
+};
+
+void
+AttributeRangeSearcher::doRun()
+{
+ _timer.SetNow();
+ RangeIterator iter(_spec);
+ for (uint32_t i = 0; i < _status._numQueries; ++i, ++iter) {
+ // build simple range term query
+ vespalib::asciistream ss;
+ ss << "[" << iter.a() << ";" << iter.b() << "]";
+ buildTermQuery(_query, _attrPtr->getName(), ss.str().c_str());
+
+ AttributeGuard guard(_attrPtr);
+ std::unique_ptr<AttributeVector::SearchContext> searchContext =
+ _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()),
+ AttributeVector::SearchContext::Params());
+
+ searchContext->fetchPostings(true);
+ std::unique_ptr<queryeval::SearchIterator> iterator = searchContext->createIterator(NULL, true);
+ std::unique_ptr<ResultSet> results = performSearch(*iterator, _attrPtr->getNumDocs());
+
+ _status._totalHitCount += results->getNumHits();
+ }
+ _status._totalSearchTime += _timer.MilliSecsToNow();
+}
+
+
+class AttributePrefixSearcher : public AttributeSearcher
+{
+private:
+ const std::vector<vespalib::string> & _values;
+ std::vector<char> _query;
+
+public:
+ AttributePrefixSearcher(uint32_t id, const AttributePtr & attrPtr,
+ const std::vector<vespalib::string> & values, uint32_t numQueries) :
+ AttributeSearcher(id, attrPtr), _values(values), _query()
+ {
+ _status._numQueries = numQueries;
+ }
+ virtual void doRun();
+};
+
+void
+AttributePrefixSearcher::doRun()
+{
+ _timer.SetNow();
+ for (uint32_t i = 0; i < _status._numQueries; ++i) {
+ // build simple prefix term query
+ buildTermQuery(_query, _attrPtr->getName(), _values[i % _values.size()].c_str(), true);
+
+ AttributeGuard guard(_attrPtr);
+ std::unique_ptr<AttributeVector::SearchContext> searchContext =
+ _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()),
+ AttributeVector::SearchContext::Params());
+
+ searchContext->fetchPostings(true);
+ std::unique_ptr<queryeval::SearchIterator> iterator = searchContext->createIterator(NULL, true);
+ std::unique_ptr<ResultSet> results = performSearch(*iterator, _attrPtr->getNumDocs());
+
+ _status._totalHitCount += results->getNumHits();
+ }
+ _status._totalSearchTime += _timer.MilliSecsToNow();
+}
+
+
+
+} // search
+
diff --git a/searchlib/src/tests/attribute/attributeupdater.h b/searchlib/src/tests/attribute/attributeupdater.h
new file mode 100644
index 00000000000..5193ca0f873
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributeupdater.h
@@ -0,0 +1,299 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/util/randomgenerator.h>
+#include "runnable.h"
+#include <vespa/searchlib/attribute/attribute.h>
+
+#define VALIDATOR_STR(str) #str
+#define VALIDATOR_ASSERT(rc) reportAssert(rc, __FILE__, __LINE__, VALIDATOR_STR(rc))
+#define VALIDATOR_ASSERT_EQUAL(a, b) reportAssertEqual(__FILE__, __LINE__, VALIDATOR_STR(a), VALIDATOR_STR(b), a, b)
+
+namespace search {
+
+class AttributeValidator
+{
+private:
+ uint32_t _totalCnt;
+
+public:
+ AttributeValidator() : _totalCnt(0) {}
+ uint32_t getTotalCnt() const { return _totalCnt; }
+ bool reportAssert(bool rc, const vespalib::string & file, uint32_t line, const vespalib::string & str) {
+ _totalCnt++;
+ if (!rc) {
+ std::cout << "Assert " << _totalCnt << " failed: \"" << str << "\" ("
+ << file << ":" << line << ")" << std::endl;
+ abort();
+ }
+ return true;
+ }
+ template <class A, class B>
+ bool reportAssertEqual(const vespalib::string & file, uint32_t line,
+ const vespalib::string & aStr, const vespalib::string & bStr,
+ const A & a, const B & b) {
+ _totalCnt++;
+ if (!(a == b)) {
+ std::cout << "Assert equal failed: " << std::endl;
+ std::cout << aStr << ": " << a << std::endl;
+ std::cout << bStr << ": " << b << std::endl;
+ std::cout << "(" << file << ":" << line << ")" << std::endl;
+ abort();
+ }
+ return true;
+ }
+};
+
+class AttributeUpdaterStatus
+{
+public:
+ double _totalUpdateTime;
+ uint64_t _numDocumentUpdates;
+ uint64_t _numValueUpdates;
+
+ AttributeUpdaterStatus() :
+ _totalUpdateTime(0), _numDocumentUpdates(0), _numValueUpdates(0) {}
+ void reset() {
+ _totalUpdateTime = 0;
+ _numDocumentUpdates = 0;
+ _numValueUpdates = 0;
+ }
+ void printXML() const {
+ std::cout << "<total-update-time>" << _totalUpdateTime << "</total-update-time>" << std::endl;
+ std::cout << "<documents-updated>" << _numDocumentUpdates << "</documents-updated>" << std::endl;
+ std::cout << "<document-update-throughput>" << documentUpdateThroughput() << "</document-update-throughput>" << std::endl;
+ std::cout << "<avg-document-update-time>" << avgDocumentUpdateTime() << "</avg-document-update-time>" << std::endl;
+ std::cout << "<values-updated>" << _numValueUpdates << "</values-updated>" << std::endl;
+ std::cout << "<value-update-throughput>" << valueUpdateThroughput() << "</value-update-throughput>" << std::endl;
+ std::cout << "<avg-value-update-time>" << avgValueUpdateTime() << "</avg-value-update-time>" << std::endl;
+ }
+ double documentUpdateThroughput() const {
+ return _numDocumentUpdates * 1000 / _totalUpdateTime;
+ }
+ double avgDocumentUpdateTime() const {
+ return _totalUpdateTime / _numDocumentUpdates;
+ }
+ double valueUpdateThroughput() const {
+ return _numValueUpdates * 1000 / _totalUpdateTime;
+ }
+ double avgValueUpdateTime() const {
+ return _totalUpdateTime / _numValueUpdates;
+ }
+};
+
+// AttributeVectorInstance, AttributeVectorType, AttributeVectorBufferType
+template <typename Vector, typename T, typename BT>
+class AttributeUpdater
+{
+protected:
+ typedef AttributeVector::SP AttributePtr;
+ typedef std::map<uint32_t, std::vector<T> > AttributeCommit;
+
+ const AttributePtr & _attrPtr;
+ Vector & _attrVec;
+ const std::vector<T> & _values;
+ std::vector<T> _buffer;
+ std::vector<BT> _getBuffer;
+ RandomGenerator & _rndGen;
+ AttributeCommit _expected;
+ FastOS_Time _timer;
+ AttributeUpdaterStatus _status;
+ AttributeValidator _validator;
+
+ // config
+ bool _validate;
+ uint32_t _commitFreq;
+ uint32_t _minValueCount;
+ uint32_t _maxValueCount;
+
+ uint32_t getRandomCount() {
+ return _rndGen.rand(_minValueCount, _maxValueCount);
+ }
+ uint32_t getRandomDoc() {
+ return _rndGen.rand(0, _attrPtr->getNumDocs() - 1);
+ }
+ const T & getRandomValue() {
+ return _values[_rndGen.rand(0, _values.size() - 1)];
+ }
+ void updateValues(uint32_t doc);
+ void commit();
+
+public:
+ AttributeUpdater(const AttributePtr & attrPtr, const std::vector<T> & values,
+ RandomGenerator & rndGen, bool validate, uint32_t commitFreq,
+ uint32_t minValueCount, uint32_t maxValueCount) :
+ _attrPtr(attrPtr), _attrVec(*(static_cast<Vector *>(attrPtr.get()))),
+ _values(values), _buffer(), _getBuffer(), _rndGen(rndGen), _expected(), _timer(), _status(), _validator(),
+ _validate(validate), _commitFreq(commitFreq), _minValueCount(minValueCount), _maxValueCount(maxValueCount)
+ {
+ }
+ void resetStatus() {
+ _status.reset();
+ }
+ const AttributeUpdaterStatus & getStatus() const {
+ return _status;
+ }
+ const AttributeValidator & getValidator() const {
+ return _validator;
+ }
+ void populate();
+ void update(uint32_t numUpdates);
+};
+
+template <typename Vector, typename T, typename BT>
+class AttributeUpdaterThread : public AttributeUpdater<Vector, T, BT>, public Runnable
+{
+private:
+ typedef AttributeVector::SP AttributePtr;
+
+public:
+ AttributeUpdaterThread(const AttributePtr & attrPtr, const std::vector<T> & values,
+ RandomGenerator & rndGen, bool validate, uint32_t commitFreq,
+ uint32_t minValueCount, uint32_t maxValueCount) :
+ AttributeUpdater<Vector, T, BT>(attrPtr, values, rndGen, validate, commitFreq, minValueCount, maxValueCount),
+ Runnable(0) {}
+
+ virtual void doRun();
+};
+
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeUpdater<Vector, T, BT>::updateValues(uint32_t doc)
+{
+ uint32_t valueCount = getRandomCount();
+
+ if (_validate) {
+ _buffer.clear();
+ if (_attrPtr->hasMultiValue()) {
+ _attrPtr->clearDoc(doc);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ T value = getRandomValue();
+ if (_attrPtr->hasWeightedSetType()) {
+ bool exists = false;
+ for (typename std::vector<T>::iterator iter = _buffer.begin(); iter != _buffer.end(); ++iter) {
+ if (iter->getValue() == value.getValue()) {
+ exists = true;
+ iter->setWeight(value.getWeight());
+ break;
+ }
+ }
+ if (!exists) {
+ _buffer.push_back(value);
+ }
+ } else {
+ _buffer.push_back(value);
+ }
+ _attrVec.append(doc, value.getValue(), value.getWeight());
+ }
+ } else {
+ _buffer.push_back(getRandomValue());
+ _attrVec.update(doc, _buffer.back().getValue());
+ }
+ _expected[doc] = _buffer;
+
+ } else {
+ if (_attrPtr->hasMultiValue()) {
+ _attrPtr->clearDoc(doc);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ T value = getRandomValue();
+ _attrVec.append(doc, value.getValue(), value.getWeight());
+ }
+ } else {
+ _attrVec.update(doc, getRandomValue().getValue());
+ }
+ }
+
+ _status._numDocumentUpdates++;
+ _status._numValueUpdates += (_attrPtr->hasMultiValue() ? valueCount: 1);
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeUpdater<Vector, T, BT>::commit()
+{
+ AttributeGuard guard(this->_attrPtr);
+ if (_validate) {
+ _attrPtr->commit();
+ _getBuffer.resize(_maxValueCount);
+ for (typename AttributeCommit::iterator iter = _expected.begin();
+ iter != _expected.end(); ++iter)
+ {
+ uint32_t valueCount = _attrPtr->get(iter->first, &_getBuffer[0], _getBuffer.size());
+ _validator.VALIDATOR_ASSERT(_minValueCount <= valueCount && valueCount <= _maxValueCount);
+ if (valueCount != iter->second.size()) {
+ std::cout << "validate(" << iter->first << ")" << std::endl;
+ std::cout << "expected(" << iter->second.size() << ")" << std::endl;
+ for (size_t i = 0; i < iter->second.size(); ++i) {
+ std::cout << " [" << iter->second[i].getValue() << ", " << iter->second[i].getWeight() << "]" << std::endl;
+ }
+ std::cout << "actual(" << valueCount << ")" << std::endl;
+ for (size_t i = 0; i < valueCount; ++i) {
+ std::cout << " [" << _getBuffer[i].getValue() << ", " << _getBuffer[i].getWeight() << "]" << std::endl;
+ }
+ }
+ _validator.VALIDATOR_ASSERT_EQUAL(valueCount, iter->second.size());
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ _validator.VALIDATOR_ASSERT_EQUAL(_getBuffer[i].getValue(), iter->second[i].getValue());
+ _validator.VALIDATOR_ASSERT_EQUAL(_getBuffer[i].getWeight(), iter->second[i].getWeight());
+ }
+ }
+ _expected.clear();
+ } else {
+ _attrPtr->commit();
+ }
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeUpdater<Vector, T, BT>::populate()
+{
+ _timer.SetNow();
+ for (uint32_t doc = 0; doc < _attrPtr->getNumDocs(); ++doc) {
+ updateValues(doc);
+ if (doc % _commitFreq == (_commitFreq - 1)) {
+ commit();
+ }
+ }
+ commit();
+ _status._totalUpdateTime += _timer.MilliSecsToNow();
+}
+
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeUpdater<Vector, T, BT>::update(uint32_t numUpdates)
+{
+ _timer.SetNow();
+ for (uint32_t i = 0; i < numUpdates; ++i) {
+ uint32_t doc = getRandomDoc();
+ updateValues(doc);
+ if (i % _commitFreq == (_commitFreq - 1)) {
+ commit();
+ }
+ }
+ commit();
+ _status._totalUpdateTime += _timer.MilliSecsToNow();
+}
+
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeUpdaterThread<Vector, T, BT>::doRun()
+{
+ this->_timer.SetNow();
+ while(!_done) {
+ uint32_t doc = this->getRandomDoc();
+ this->updateValues(doc);
+ if (this->_status._numDocumentUpdates % this->_commitFreq == (this->_commitFreq - 1)) {
+ this->commit();
+ }
+ }
+ this->commit();
+ this->_status._totalUpdateTime += this->_timer.MilliSecsToNow();
+}
+
+
+} // search
+
diff --git a/searchlib/src/tests/attribute/benchmarkplotter.rb b/searchlib/src/tests/attribute/benchmarkplotter.rb
new file mode 100644
index 00000000000..d77c92c8acd
--- /dev/null
+++ b/searchlib/src/tests/attribute/benchmarkplotter.rb
@@ -0,0 +1,134 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+require 'rexml/document'
+
+def plot_graph(plot_data, plot_png, title, xlabel, ylabel, graph_titles)
+ plot_cmd = "";
+ plot_cmd += "set terminal png\n"
+ plot_cmd += "set output \"#{plot_png}\"\n"
+ plot_cmd += "set title \"#{title}\"\n"
+ plot_cmd += "set xlabel \"#{xlabel}\"\n"
+ plot_cmd += "set ylabel \"#{ylabel}\"\n"
+ c = 2
+ plots = []
+ plot_cmd += "plot "
+ graph_titles.each do |title|
+ plots.push("\"#{plot_data}\" using 1:#{c} title \"#{title}\" with linespoints")
+ c += 1
+ end
+ plot_cmd += plots.join(", ")
+
+ plot_cmd_file = File.open("plot_graph.cmd", "w")
+ plot_cmd_file.write(plot_cmd);
+ plot_cmd_file.close
+ cmd = "gnuplot plot_graph.cmd"
+ puts cmd
+ puts `#{cmd}`
+end
+
+def extract_alpha(num_docs, percentages, input, output, xml_getter)
+ plot_data = File.open(output, "w");
+ num_docs.each do |num|
+ data_line = "#{num} "
+ percentages.each do |prc|
+ unique = num * prc
+ filename = input.sub("#N", "#{num}").sub("#V", "#{unique}")
+ value = 0
+ begin
+ xml_root = REXML::Document.new(File.open(filename)).root
+ value = send(xml_getter, xml_root)
+ rescue REXML::ParseException
+ puts "Could not parse file: #{filename}"
+ end
+ data_line += "#{value} "
+ end
+ plot_data.write(data_line + "\n")
+ end
+ plot_data.close
+end
+
+def extract_beta(num_docs, percentage, num_threads, input, output, xml_getter)
+ plot_data = File.open(output, "w");
+ num_docs.each do |num|
+ data_line = "#{num} "
+ unique = num * percentage
+ num_threads.each do |thread|
+ filename = input.sub("#N", "#{num}").sub("#V", "#{unique}").sub("#S", "#{thread}")
+ value = 0
+ begin
+ xml_root = REXML::Document.new(File.open(filename)).root
+ value = send(xml_getter, xml_root)
+ rescue REXML::ParseException
+ puts "Could not parse file: #{filename}"
+ end
+ data_line += "#{value} "
+ end
+ plot_data.write(data_line + "\n")
+ end
+ plot_data.close
+end
+
+def xml_getter_update_0_throughput(xml_root)
+ return xml_root.elements["update[@id='0']"].elements["throughput"].text
+end
+
+def xml_getter_search_throughput(xml_root)
+ return xml_root.elements["total-searcher-summary"].elements["search-throughput"].text
+end
+
+def xml_getter_updater_thread_throughput(xml_root)
+ return throughput = xml_root.elements["updater-summary"].elements["throughput"].text
+end
+
+
+vectors = ["mv-num-new"]#, "mv-num-new", "sv-string-new", "mv-string-new"]#, "sv-num-old", "mv-num-old", "sv-string-old", "mv-string-old"]
+num_docs = [500000, 1000000, 2000000, 4000000, 8000000, 16000000]
+unique_percentages = [0.001, 0.01, 0.05, 0.20, 0.50]
+num_threads = [1, 2, 4, 8, 16]
+
+inputs = ["03-27-full/#AV-n#N-v#V-p2-r1-s1-q1000.log",
+ "03-27-full/#AV-n#N-v#V-s#S-q100-b.log"]
+graph_titles = [[], []]
+unique_percentages.each do |percentage|
+ graph_titles[0].push("#{percentage * 100} % uniques")
+end
+num_threads.each do |thread|
+ graph_titles[1].push("#{thread} searcher thread(s)")
+end
+
+vectors.each do |vector|
+ extract_alpha(num_docs, unique_percentages,
+ inputs[0].sub("#AV", vector),
+ "#{vector}-update-speed.dat",
+ :xml_getter_update_0_throughput)
+ plot_graph("#{vector}-update-speed.dat",
+ "#{vector}-update-speed.png",
+ "Update speed when applying 1M updates",
+ "Number of documents", "Updates per/sec", graph_titles[0])
+
+ extract_alpha(num_docs, unique_percentages,
+ inputs[0].sub("#AV", vector),
+ "#{vector}-search-speed.dat",
+ :xml_getter_search_throughput)
+ plot_graph("#{vector}-search-speed.dat",
+ "#{vector}-search-speed.png",
+ "Search speed with 1 searcher thread",
+ "Number of documents", "Queries per/sec", graph_titles[0])
+
+ extract_beta(num_docs, 0.01, num_threads,
+ inputs[1].sub("#AV", vector),
+ "#{vector}-search-speed-multiple.dat",
+ :xml_getter_search_throughput)
+ plot_graph("#{vector}-search-speed-multiple.dat",
+ "#{vector}-search-speed-multiple.png",
+ "Search speed with 1 update thread and X searcher threads",
+ "Number of documents", "Queries per/sec", graph_titles[1])
+
+ extract_beta(num_docs, 0.01, num_threads,
+ inputs[1].sub("#AV", vector),
+ "#{vector}-update-speed-multiple.dat",
+ :xml_getter_updater_thread_throughput)
+ plot_graph("#{vector}-update-speed-multiple.dat",
+ "#{vector}-update-speed-multiple.png",
+ "Update speed with 1 update thread and X searcher threads",
+ "Number of documents", "Updates per/sec", graph_titles[1])
+end
diff --git a/searchlib/src/tests/attribute/bitvector/.gitignore b/searchlib/src/tests/attribute/bitvector/.gitignore
new file mode 100644
index 00000000000..05ec0a4df59
--- /dev/null
+++ b/searchlib/src/tests/attribute/bitvector/.gitignore
@@ -0,0 +1 @@
+searchlib_bitvector_test_app
diff --git a/searchlib/src/tests/attribute/bitvector/CMakeLists.txt b/searchlib/src/tests/attribute/bitvector/CMakeLists.txt
new file mode 100644
index 00000000000..bc65fc04dc4
--- /dev/null
+++ b/searchlib/src/tests/attribute/bitvector/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_bitvector_test_app
+ SOURCES
+ bitvector_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_bitvector_test_app COMMAND searchlib_bitvector_test_app)
diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
new file mode 100644
index 00000000000..85f83d217eb
--- /dev/null
+++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
@@ -0,0 +1,632 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("bitvector_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/util/randomgenerator.h>
+#include <vespa/vespalib/util/compress.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/attribute/i_document_weight_attribute.h>
+#include <vespa/searchlib/queryeval/document_weight_search_iterator.h>
+#include <vespa/searchlib/test/initrange.h>
+#include <vespa/searchlib/common/bitvectoriterator.h>
+
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
+using search::attribute::Config;
+using search::AttributeFactory;
+using search::FloatingPointAttribute;
+using search::IntegerAttribute;
+using search::StringAttribute;
+using search::AttributeVector;
+using search::ParseItem;
+using search::fef::TermFieldMatchData;
+using search::BitVector;
+using search::BitVectorIterator;
+using search::queryeval::SearchIterator;
+
+typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr;
+typedef std::unique_ptr<search::queryeval::SearchIterator> SearchBasePtr;
+
+struct BitVectorTest
+{
+ typedef AttributeVector::SP AttributePtr;
+
+ BitVectorTest() { }
+
+ ~BitVectorTest() { }
+
+ template <typename VectorType>
+ VectorType & as(AttributePtr &v);
+ IntegerAttribute & asInt(AttributePtr &v);
+ StringAttribute & asString(AttributePtr &v);
+ FloatingPointAttribute & asFloat(AttributePtr &v);
+
+ AttributePtr
+ make(Config cfg,
+ const vespalib::string &pref,
+ bool fastSearch,
+ bool enableBitVectors,
+ bool enableOnlyBitVector,
+ bool filter);
+
+ void
+ addDocs(const AttributePtr &v, size_t sz);
+
+ template <typename VectorType>
+ void populate(VectorType &v,
+ uint32_t low,
+ uint32_t high,
+ bool set);
+
+ template <typename VectorType>
+ void populateAll(VectorType &v,
+ uint32_t low,
+ uint32_t high,
+ bool set);
+
+ void
+ buildTermQuery(std::vector<char> & buffer,
+ const vespalib::string & index,
+ const vespalib::string & term, bool prefix);
+
+ template <typename V>
+ vespalib::string
+ getSearchStr();
+
+ template <typename V, typename T>
+ SearchContextPtr
+ getSearch(const V & vec, const T & term, bool prefix, bool useBitVector);
+
+ template <typename V>
+ SearchContextPtr
+ getSearch(const V & vec, bool useBitVector);
+
+ void
+ checkSearch(AttributePtr v,
+ SearchBasePtr sb,
+ TermFieldMatchData &md,
+ uint32_t expFirstDocId,
+ uint32_t expFastDocId,
+ uint32_t expDocFreq,
+ bool weights,
+ bool checkStride);
+
+ void
+ checkSearch(AttributePtr v,
+ SearchContextPtr sc,
+ uint32_t expFirstDocId,
+ uint32_t expLastDocId,
+ uint32_t expDocFreq,
+ bool weights,
+ bool checkStride);
+
+ template <typename VectorType, typename BufferType>
+ void
+ test(BasicType bt, CollectionType ct, const vespalib::string &pref,
+ bool fastSearch,
+ bool enableBitVectors,
+ bool enableOnlyBitVector,
+ bool filter);
+
+ template <typename VectorType, typename BufferType>
+ void
+ test(BasicType bt, CollectionType ct, const vespalib::string &pref);
+};
+
+
+template <typename VectorType>
+VectorType &
+BitVectorTest::as(AttributePtr &v)
+{
+ VectorType *res = dynamic_cast<VectorType *>(v.get());
+ assert(res != NULL);
+ return *res;
+}
+
+
+IntegerAttribute &
+BitVectorTest::asInt(AttributePtr &v)
+{
+ return as<IntegerAttribute>(v);
+}
+
+
+StringAttribute &
+BitVectorTest::asString(AttributePtr &v)
+{
+ return as<StringAttribute>(v);
+}
+
+
+FloatingPointAttribute &
+BitVectorTest::asFloat(AttributePtr &v)
+{
+ return as<FloatingPointAttribute>(v);
+}
+
+
+void
+BitVectorTest::buildTermQuery(std::vector<char> &buffer,
+ const vespalib::string &index,
+ const vespalib::string &term,
+ bool prefix)
+{
+ uint32_t indexLen = index.size();
+ uint32_t termLen = term.size();
+ uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen;
+ uint32_t p = 0;
+ buffer.resize(queryPacketSize);
+ buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM;
+ p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]);
+ memcpy(&buffer[p], index.c_str(), indexLen);
+ p += indexLen;
+ p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]);
+ memcpy(&buffer[p], term.c_str(), termLen);
+ p += termLen;
+ buffer.resize(p);
+}
+
+
+template <>
+vespalib::string
+BitVectorTest::getSearchStr<IntegerAttribute>()
+{
+ return "[-42;-42]";
+}
+
+template <>
+vespalib::string
+BitVectorTest::getSearchStr<FloatingPointAttribute>()
+{
+ return "[-42.0;-42.0]";
+}
+
+template <>
+vespalib::string
+BitVectorTest::getSearchStr<StringAttribute>()
+{
+ return "foo";
+}
+
+
+template <typename V, typename T>
+SearchContextPtr
+BitVectorTest::getSearch(const V &vec, const T &term, bool prefix,
+ bool useBitVector)
+{
+ std::vector<char> query;
+ vespalib::asciistream ss;
+ ss << term;
+ buildTermQuery(query, vec.getName(), ss.str(), prefix);
+
+ return (static_cast<const AttributeVector &>(vec)).
+ getSearch(vespalib::stringref(&query[0], query.size()),
+ AttributeVector::SearchContext::Params().useBitVector(useBitVector));
+}
+
+
+template <>
+SearchContextPtr
+BitVectorTest::getSearch<IntegerAttribute>(const IntegerAttribute &v,
+ bool useBitVector)
+{
+ return getSearch<IntegerAttribute>(v, "[-42;-42]", false, useBitVector);
+}
+
+template <>
+SearchContextPtr
+BitVectorTest::
+getSearch<FloatingPointAttribute>(const FloatingPointAttribute &v,
+ bool useBitVector)
+{
+ return getSearch<FloatingPointAttribute>(v, "[-42.0;-42.0]", false,
+ useBitVector);
+}
+
+template <>
+SearchContextPtr
+BitVectorTest::getSearch<StringAttribute>(const StringAttribute &v,
+ bool useBitVector)
+{
+ return getSearch<StringAttribute, const vespalib::string &>
+ (v, "foo", false, useBitVector);
+}
+
+
+BitVectorTest::AttributePtr
+BitVectorTest::make(Config cfg,
+ const vespalib::string &pref,
+ bool fastSearch,
+ bool enableBitVectors,
+ bool enableOnlyBitVector,
+ bool filter)
+{
+ cfg.setFastSearch(fastSearch);
+ cfg.setEnableBitVectors(enableBitVectors);
+ cfg.setEnableOnlyBitVector(enableOnlyBitVector);
+ cfg.setIsFilter(filter);
+ AttributePtr v = AttributeFactory::createAttribute(pref, cfg);
+ return v;
+}
+
+
+void
+BitVectorTest::addDocs(const AttributePtr &v, size_t sz)
+{
+ while (v->getNumDocs() < sz) {
+ AttributeVector::DocId docId = 0;
+ EXPECT_TRUE(v->addDoc(docId));
+ v->clearDoc(docId);
+ }
+ EXPECT_TRUE(v->getNumDocs() == sz);
+ v->commit(true);
+}
+
+
+template <>
+void
+BitVectorTest::populate(IntegerAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
+{
+ for(size_t i(low), m(high); i < m; i+= 5) {
+ if (!set) {
+ v.clearDoc(i);
+ } else if (v.hasMultiValue()) {
+ v.append(i, -42, 27);
+ v.append(i, -43, 14);
+ v.append(i, -42, -3);
+ } else {
+ EXPECT_TRUE(v.update(i, -42));
+ }
+ }
+ v.commit();
+}
+
+
+template <>
+void
+BitVectorTest::populate(FloatingPointAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
+{
+ for(size_t i(low), m(high); i < m; i+= 5) {
+ if (!set) {
+ v.clearDoc(i);
+ } else if (v.hasMultiValue()) {
+ v.append(i, -42.0, 27);
+ v.append(i, -43.0, 14);
+ v.append(i, -42.0, -3);
+ } else {
+ EXPECT_TRUE(v.update(i, -42.0));
+ }
+ }
+ v.commit();
+}
+
+
+template <>
+void
+BitVectorTest::populate(StringAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
+{
+ for(size_t i(low), m(high); i < m; i+= 5) {
+ if (!set) {
+ v.clearDoc(i);
+ } else if (v.hasMultiValue()) {
+ v.append(i, "foo", 27);
+ v.append(i, "bar", 14);
+ v.append(i, "foO", -3);
+ } else {
+ EXPECT_TRUE(v.update(i, "foo"));
+ }
+ }
+ v.commit();
+}
+
+template <>
+void
+BitVectorTest::populateAll(IntegerAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
+{
+ for(size_t i(low), m(high); i < m; ++i) {
+ if (!set) {
+ v.clearDoc(i);
+ } else if (v.hasMultiValue()) {
+ v.clearDoc(i);
+ v.append(i, -42, 27);
+ v.append(i, -43, 14);
+ v.append(i, -42, -3);
+ } else {
+ EXPECT_TRUE(v.update(i, -42));
+ }
+ }
+ v.commit();
+}
+
+
+template <>
+void
+BitVectorTest::populateAll(FloatingPointAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
+{
+ for(size_t i(low), m(high); i < m; ++i) {
+ if (!set) {
+ v.clearDoc(i);
+ } else if (v.hasMultiValue()) {
+ v.clearDoc(i);
+ v.append(i, -42.0, 27);
+ v.append(i, -43.0, 14);
+ v.append(i, -42.0, -3);
+ } else {
+ EXPECT_TRUE(v.update(i, -42.0));
+ }
+ }
+ v.commit();
+}
+
+
+template <>
+void
+BitVectorTest::populateAll(StringAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
+{
+ for(size_t i(low), m(high); i < m; ++i) {
+ if (!set) {
+ v.clearDoc(i);
+ } else if (v.hasMultiValue()) {
+ v.clearDoc(i);
+ v.append(i, "foo", 27);
+ v.append(i, "bar", 14);
+ v.append(i, "foO", -3);
+ } else {
+ EXPECT_TRUE(v.update(i, "foo"));
+ }
+ }
+ v.commit();
+}
+
+
+void
+BitVectorTest::checkSearch(AttributePtr v,
+ SearchBasePtr sb,
+ TermFieldMatchData &md,
+ uint32_t expFirstDocId,
+ uint32_t expLastDocId,
+ uint32_t expDocFreq,
+ bool weights,
+ bool checkStride)
+{
+ sb->initFullRange();
+ sb->seek(1u);
+ uint32_t docId = sb->getDocId();
+ uint32_t lastDocId = 0;
+ uint32_t docFreq = 0;
+ EXPECT_EQUAL(expFirstDocId, docId);
+ while (docId != search::endDocId) {
+ lastDocId = docId;
+ ++docFreq,
+ assert(!checkStride || (docId % 5) == 2u);
+ sb->unpack(docId);
+ EXPECT_EQUAL(md.getDocId(), docId);
+ if (v->getCollectionType() == CollectionType::SINGLE ||
+ !weights) {
+ EXPECT_EQUAL(1, md.getWeight());
+ } else if (v->getCollectionType() == CollectionType::ARRAY) {
+ EXPECT_EQUAL(2, md.getWeight());
+ } else {
+ if (v->getBasicType() == BasicType::STRING) {
+ EXPECT_EQUAL(24, md.getWeight());
+ } else {
+ EXPECT_EQUAL(-3, md.getWeight());
+ }
+ }
+ sb->seek(docId + 1);
+ docId = sb->getDocId();
+ }
+ EXPECT_EQUAL(expLastDocId, lastDocId);
+ EXPECT_EQUAL(expDocFreq, docFreq);
+}
+
+
+void
+BitVectorTest::checkSearch(AttributePtr v,
+ SearchContextPtr sc,
+ uint32_t expFirstDocId,
+ uint32_t expLastDocId,
+ uint32_t expDocFreq,
+ bool weights,
+ bool checkStride)
+{
+ TermFieldMatchData md;
+ sc->fetchPostings(true);
+ SearchBasePtr sb = sc->createIterator(&md, true);
+ checkSearch(v, std::move(sb), md,
+ expFirstDocId, expLastDocId, expDocFreq, weights,
+ checkStride);
+}
+
+
+template <typename VectorType, typename BufferType>
+void
+BitVectorTest::test(BasicType bt,
+ CollectionType ct,
+ const vespalib::string &pref,
+ bool fastSearch,
+ bool enableBitVectors,
+ bool enableOnlyBitVector,
+ bool filter)
+{
+ Config cfg(bt, ct);
+ AttributePtr v = make(cfg, pref, fastSearch,
+ enableBitVectors, enableOnlyBitVector, filter);
+ addDocs(v, 1024);
+ VectorType &tv = as<VectorType>(v);
+ populate(tv, 2, 1023, true);
+
+ SearchContextPtr sc = getSearch<VectorType>(tv, true);
+ checkSearch(v, std::move(sc), 2, 1022, 205, !enableBitVectors && !filter,
+ true);
+ sc = getSearch<VectorType>(tv, false);
+ checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector &&
+ !filter, true);
+ const search::IDocumentWeightAttribute *dwa =
+ v->asDocumentWeightAttribute();
+ if (dwa != NULL) {
+ search::IDocumentWeightAttribute::LookupResult lres =
+ dwa->lookup(getSearchStr<VectorType>());
+ typedef search::queryeval::DocumentWeightSearchIterator DWSI;
+ typedef search::queryeval::SearchIterator SI;
+ TermFieldMatchData md;
+ SI::UP dwsi(new DWSI(md, *dwa, lres));
+ if (!enableOnlyBitVector) {
+ checkSearch(v, std::move(dwsi), md, 2, 1022, 205, !filter, true);
+ } else {
+ dwsi->initFullRange();
+ EXPECT_TRUE(dwsi->isAtEnd());
+ }
+ }
+ populate(tv, 2, 973, false);
+ sc = getSearch<VectorType>(tv, true);
+ checkSearch(v, std::move(sc), 977, 1022, 10, !enableOnlyBitVector &&
+ !filter, true);
+ populate(tv, 2, 973, true);
+ sc = getSearch<VectorType>(tv, true);
+ checkSearch(v, std::move(sc), 2, 1022, 205, !enableBitVectors && !filter,
+ true);
+ addDocs(v, 15000);
+ sc = getSearch<VectorType>(tv, true);
+ checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector &&
+ !filter, true);
+ populateAll(tv, 10, 15000, true);
+ sc = getSearch<VectorType>(tv, true);
+ checkSearch(v, std::move(sc), 2, 14999, 14992,
+ !enableBitVectors && !filter,
+ false);
+}
+
+
+template <typename VectorType, typename BufferType>
+void
+BitVectorTest::test(BasicType bt,
+ CollectionType ct,
+ const vespalib::string &pref)
+{
+ LOG(info,
+ "test run, pref is %s",
+ pref.c_str());
+ test<VectorType, BufferType>(bt, ct, pref,
+ false, false, false, false);
+ test<VectorType, BufferType>(bt, ct, pref,
+ false, false, false, true);
+ test<VectorType, BufferType>(bt, ct, pref,
+ true, false, false, false);
+ test<VectorType, BufferType>(bt, ct, pref,
+ true, false, false, true);
+ test<VectorType, BufferType>(bt, ct, pref,
+ true, true, false, false);
+ test<VectorType, BufferType>(bt, ct, pref,
+ true, true, false, true);
+ test<VectorType, BufferType>(bt, ct, pref,
+ true, true, true, false);
+ test<VectorType, BufferType>(bt, ct, pref,
+ true, true, true, true);
+}
+
+
+TEST_F("Test bitvectors with single value int32", BitVectorTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT32,
+ CollectionType::SINGLE,
+ "int32_sv");
+}
+
+TEST_F("Test bitvectors with array value int32", BitVectorTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT32,
+ CollectionType::ARRAY,
+ "int32_a");
+}
+
+TEST_F("Test bitvectors with weighted set value int32", BitVectorTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::WeightedInt>(BasicType::INT32,
+ CollectionType::WSET,
+ "int32_sv");
+}
+
+TEST_F("Test bitvectors with single value double", BitVectorTest)
+{
+ f.template test<FloatingPointAttribute,
+ double>(BasicType::DOUBLE,
+ CollectionType::SINGLE,
+ "double_sv");
+}
+
+TEST_F("Test bitvectors with array value double", BitVectorTest)
+{
+ f.template test<FloatingPointAttribute,
+ double>(BasicType::DOUBLE,
+ CollectionType::ARRAY,
+ "double_a");
+}
+
+TEST_F("Test bitvectors with weighted set value double", BitVectorTest)
+{
+ f.template test<FloatingPointAttribute,
+ FloatingPointAttribute::WeightedFloat>(BasicType::DOUBLE,
+ CollectionType::WSET,
+ "double_ws");
+}
+
+TEST_F("Test bitvectors with single value string", BitVectorTest)
+{
+ f.template test<StringAttribute,
+ vespalib::string>(BasicType::STRING,
+ CollectionType::SINGLE,
+ "string_sv");
+}
+
+TEST_F("Test bitvectors with array value string", BitVectorTest)
+{
+ f.template test<StringAttribute,
+ vespalib::string>(BasicType::STRING,
+ CollectionType::ARRAY,
+ "string_a");
+}
+
+TEST_F("Test bitvectors with weighted set value string", BitVectorTest)
+{
+ f.template test<StringAttribute,
+ StringAttribute::WeightedString>(BasicType::STRING,
+ CollectionType::WSET,
+ "string_ws");
+}
+
+TEST("Test bitvector iterators adheres to initRange") {
+ search::test::InitRangeVerifier initRangeTest;
+ BitVector::UP bv = BitVector::create(initRangeTest.getDocIdLimit());
+ for (uint32_t docId: initRangeTest.getExpectedDocIds()) {
+ bv->setBit(docId);
+ }
+ TermFieldMatchData tfmd;
+ initRangeTest.verify(*BitVectorIterator::create(bv.get(), initRangeTest.getDocIdLimit(), tfmd, false));
+ initRangeTest.verify(*BitVectorIterator::create(bv.get(), initRangeTest.getDocIdLimit(), tfmd, true));
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/attribute/changevector_test.cpp b/searchlib/src/tests/attribute/changevector_test.cpp
new file mode 100644
index 00000000000..9f0a796fd3e
--- /dev/null
+++ b/searchlib/src/tests/attribute/changevector_test.cpp
@@ -0,0 +1,92 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("changevector_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/searchlib/attribute/changevector.h>
+
+using namespace search;
+
+template <typename T>
+void verifyStrictOrdering(const T & v) {
+ long count(0);
+ for (const auto & c : v) {
+ count++;
+ EXPECT_EQUAL(count, c._data.get());
+ }
+ EXPECT_EQUAL(v.size(), size_t(count));
+}
+
+class Accessor {
+public:
+ Accessor(const std::vector<long> & v) : _size(v.size()), _current(v.begin()), _end(v.end()) { }
+ size_t size() const { return _size; }
+ void next() { _current++; }
+ long value() const { return *_current; }
+ int weight() const { return *_current; }
+private:
+ size_t _size;
+ std::vector<long>::const_iterator _current;
+ std::vector<long>::const_iterator _end;
+};
+
+TEST("require insert ordering is preserved for same doc")
+{
+ typedef ChangeTemplate<NumericChangeData<long>> Change;
+ typedef ChangeVectorT<Change> CV;
+ CV a;
+ a.push_back(Change(Change::NOOP, 7, 1));
+ EXPECT_EQUAL(1u, a.size());
+ a.push_back(Change(Change::NOOP, 7, 2));
+ EXPECT_EQUAL(2u, a.size());
+ verifyStrictOrdering(a);
+}
+
+TEST("require insert ordering is preserved ")
+{
+ typedef ChangeTemplate<NumericChangeData<long>> Change;
+ typedef ChangeVectorT<Change> CV;
+ CV a;
+ a.push_back(Change(Change::NOOP, 7, 1));
+ EXPECT_EQUAL(1u, a.size());
+ a.push_back(Change(Change::NOOP, 5, 2));
+ EXPECT_EQUAL(2u, a.size());
+ a.push_back(Change(Change::NOOP, 6, 3));
+ EXPECT_EQUAL(3u, a.size());
+ verifyStrictOrdering(a);
+}
+
+TEST("require insert ordering is preserved with mix")
+{
+ typedef ChangeTemplate<NumericChangeData<long>> Change;
+ typedef ChangeVectorT<Change> CV;
+ CV a;
+ a.push_back(Change(Change::NOOP, 7, 1));
+ EXPECT_EQUAL(1u, a.size());
+ a.push_back(Change(Change::NOOP, 5, 2));
+ EXPECT_EQUAL(2u, a.size());
+ a.push_back(Change(Change::NOOP, 5, 3));
+ EXPECT_EQUAL(3u, a.size());
+ a.push_back(Change(Change::NOOP, 6, 10));
+ EXPECT_EQUAL(4u, a.size());
+ std::vector<long> v({4,5,6,7,8});
+ Accessor ac(v);
+ a.push_back(5, ac);
+ EXPECT_EQUAL(9u, a.size());
+ a.push_back(Change(Change::NOOP, 5, 9));
+ EXPECT_EQUAL(10u, a.size());
+ verifyStrictOrdering(a);
+}
+
+TEST("require that inserting empty vector does not affect the vector.") {
+ typedef ChangeTemplate<NumericChangeData<long>> Change;
+ typedef ChangeVectorT<Change> CV;
+ CV a;
+ std::vector<long> v;
+ Accessor ac(v);
+ a.push_back(1, ac);
+ EXPECT_EQUAL(0u, a.size());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/attribute/changevector_test.sh b/searchlib/src/tests/attribute/changevector_test.sh
new file mode 100644
index 00000000000..cb70f5465a4
--- /dev/null
+++ b/searchlib/src/tests/attribute/changevector_test.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+$VALGRIND ./searchlib_changevector_test_app
+rm -rf *.dat
+rm -rf *.idx
+rm -rf *.weight
+rm -rf clstmp
+rm -rf alstmp
diff --git a/searchlib/src/tests/attribute/comparator/.gitignore b/searchlib/src/tests/attribute/comparator/.gitignore
new file mode 100644
index 00000000000..51c5b5944c9
--- /dev/null
+++ b/searchlib/src/tests/attribute/comparator/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+comparator_test
+searchlib_comparator_test_app
diff --git a/searchlib/src/tests/attribute/comparator/CMakeLists.txt b/searchlib/src/tests/attribute/comparator/CMakeLists.txt
new file mode 100644
index 00000000000..4a14181db3c
--- /dev/null
+++ b/searchlib/src/tests/attribute/comparator/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_comparator_test_app
+ SOURCES
+ comparator_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_comparator_test_app COMMAND searchlib_comparator_test_app)
diff --git a/searchlib/src/tests/attribute/comparator/DESC b/searchlib/src/tests/attribute/comparator/DESC
new file mode 100644
index 00000000000..6b3ba01c89b
--- /dev/null
+++ b/searchlib/src/tests/attribute/comparator/DESC
@@ -0,0 +1 @@
+comparator test. Take a look at comparator_test.cpp for details.
diff --git a/searchlib/src/tests/attribute/comparator/FILES b/searchlib/src/tests/attribute/comparator/FILES
new file mode 100644
index 00000000000..b4c23c09022
--- /dev/null
+++ b/searchlib/src/tests/attribute/comparator/FILES
@@ -0,0 +1 @@
+comparator_test.cpp
diff --git a/searchlib/src/tests/attribute/comparator/comparator_test.cpp b/searchlib/src/tests/attribute/comparator/comparator_test.cpp
new file mode 100644
index 00000000000..2a4c3c6fb87
--- /dev/null
+++ b/searchlib/src/tests/attribute/comparator/comparator_test.cpp
@@ -0,0 +1,169 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("comparator_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/attribute/enumcomparator.h>
+#include <vespa/searchlib/btree/btreeroot.h>
+
+#include <vespa/searchlib/attribute/enumstore.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+
+namespace search {
+
+using namespace btree;
+
+typedef EnumStoreT<NumericEntryType<int32_t> > NumericEnumStore;
+typedef EnumStoreComparatorT<NumericEntryType<int32_t> > NumericComparator;
+
+typedef EnumStoreT<NumericEntryType<float> > FloatEnumStore;
+typedef EnumStoreComparatorT<NumericEntryType<float> > FloatComparator;
+
+typedef EnumStoreT<StringEntryType> StringEnumStore;
+typedef EnumStoreComparatorT<StringEntryType> StringComparator;
+typedef EnumStoreFoldedComparatorT<StringEntryType> FoldedStringComparator;
+
+typedef EnumStoreBase::Index EnumIndex;
+
+typedef BTreeRoot<EnumIndex, BTreeNoLeafData,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper> TreeType;
+typedef TreeType::NodeAllocatorType NodeAllocator;
+
+class Test : public vespalib::TestApp {
+private:
+ void requireThatNumericComparatorIsWorking();
+ void requireThatFloatComparatorIsWorking();
+ void requireThatStringComparatorIsWorking();
+ void requireThatComparatorWithTreeIsWorking();
+ void requireThatFoldedComparatorIsWorking();
+
+public:
+ Test() {}
+ int Main();
+};
+
+void
+Test::requireThatNumericComparatorIsWorking()
+{
+ NumericEnumStore es(1024, false);
+ EnumIndex e1, e2;
+ es.addEnum(10, e1);
+ es.addEnum(30, e2);
+ NumericComparator cmp1(es);
+ EXPECT_TRUE(cmp1(e1, e2));
+ EXPECT_TRUE(!cmp1(e2, e1));
+ EXPECT_TRUE(!cmp1(e1, e1));
+ NumericComparator cmp2(es, 20);
+ EXPECT_TRUE(cmp2(EnumIndex(), e2));
+ EXPECT_TRUE(!cmp2(e2, EnumIndex()));
+}
+
+void
+Test::requireThatFloatComparatorIsWorking()
+{
+ FloatEnumStore es(1024, false);
+ EnumIndex e1, e2, e3;
+ es.addEnum(10.5, e1);
+ es.addEnum(30.5, e2);
+ es.addEnum(std::numeric_limits<float>::quiet_NaN(), e3);
+ FloatComparator cmp1(es);
+ EXPECT_TRUE(cmp1(e1, e2));
+ EXPECT_TRUE(!cmp1(e2, e1));
+ EXPECT_TRUE(!cmp1(e1, e1));
+ EXPECT_TRUE(cmp1(e3, e1)); // nan
+ EXPECT_TRUE(!cmp1(e1, e3)); // nan
+ EXPECT_TRUE(!cmp1(e3, e3)); // nan
+ FloatComparator cmp2(es, 20.5);
+ EXPECT_TRUE(cmp2(EnumIndex(), e2));
+ EXPECT_TRUE(!cmp2(e2, EnumIndex()));
+}
+
+void
+Test::requireThatStringComparatorIsWorking()
+{
+ StringEnumStore es(1024, false);
+ EnumIndex e1, e2, e3;
+ es.addEnum("Aa", e1);
+ es.addEnum("aa", e2);
+ es.addEnum("aB", e3);
+ StringComparator cmp1(es);
+ EXPECT_TRUE(cmp1(e1, e2)); // similar folded, fallback to regular
+ EXPECT_TRUE(!cmp1(e2, e1));
+ EXPECT_TRUE(!cmp1(e1, e1));
+ EXPECT_TRUE(cmp1(e2, e3)); // folded compare
+ EXPECT_TRUE(strcmp("aa", "aB") > 0); // regular
+ StringComparator cmp2(es, "AB");
+ EXPECT_TRUE(cmp2(EnumIndex(), e3));
+ EXPECT_TRUE(!cmp2(e3, EnumIndex()));
+}
+
+void
+Test::requireThatComparatorWithTreeIsWorking()
+{
+ NumericEnumStore es(2048, false);
+ vespalib::GenerationHandler g;
+ TreeType t;
+ NodeAllocator m;
+ EnumIndex ei;
+ for (int32_t v = 100; v > 0; --v) {
+ NumericComparator cmp(es, v);
+ EXPECT_TRUE(!t.find(EnumIndex(), m, cmp).valid());
+ es.addEnum(v, ei);
+ t.insert(ei, BTreeNoLeafData(), m, cmp);
+ }
+ EXPECT_EQUAL(100u, t.size(m));
+ int32_t exp = 1;
+ for (TreeType::Iterator itr = t.begin(m); itr.valid(); ++itr) {
+ EXPECT_EQUAL(exp++, es.getValue(itr.getKey()));
+ }
+ EXPECT_EQUAL(101, exp);
+ t.clear(m);
+ m.freeze();
+ m.transferHoldLists(g.getCurrentGeneration());
+ g.incGeneration();
+ m.trimHoldLists(g.getFirstUsedGeneration());
+}
+
+void
+Test::requireThatFoldedComparatorIsWorking()
+{
+ StringEnumStore es(1024, false);
+ EnumIndex e1, e2, e3, e4;
+ es.addEnum("Aa", e1);
+ es.addEnum("aa", e2);
+ es.addEnum("aB", e3);
+ es.addEnum("Folded", e4);
+ FoldedStringComparator cmp1(es);
+ EXPECT_TRUE(!cmp1(e1, e2)); // similar folded
+ EXPECT_TRUE(!cmp1(e2, e1)); // similar folded
+ EXPECT_TRUE(cmp1(e2, e3)); // folded compare
+ EXPECT_TRUE(!cmp1(e3, e2)); // folded compare
+ FoldedStringComparator cmp2(es, "fol", false);
+ FoldedStringComparator cmp3(es, "fol", true);
+ EXPECT_TRUE(cmp2(EnumIndex(), e4));
+ EXPECT_TRUE(!cmp2(e4, EnumIndex()));
+ EXPECT_TRUE(!cmp3(EnumIndex(), e4)); // similar when prefix
+ EXPECT_TRUE(!cmp3(e4, EnumIndex())); // similar when prefix
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("comparator_test");
+
+ requireThatNumericComparatorIsWorking();
+ requireThatFloatComparatorIsWorking();
+ requireThatStringComparatorIsWorking();
+ requireThatComparatorWithTreeIsWorking();
+ requireThatFoldedComparatorIsWorking();
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::Test);
+
diff --git a/searchlib/src/tests/attribute/document_weight_iterator/.gitignore b/searchlib/src/tests/attribute/document_weight_iterator/.gitignore
new file mode 100644
index 00000000000..08cae9a48df
--- /dev/null
+++ b/searchlib/src/tests/attribute/document_weight_iterator/.gitignore
@@ -0,0 +1 @@
+searchlib_document_weight_iterator_test_app
diff --git a/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt b/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt
new file mode 100644
index 00000000000..2a1b36a626d
--- /dev/null
+++ b/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_document_weight_iterator_test_app
+ SOURCES
+ document_weight_iterator_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_document_weight_iterator_test_app COMMAND searchlib_document_weight_iterator_test_app)
diff --git a/searchlib/src/tests/attribute/document_weight_iterator/FILES b/searchlib/src/tests/attribute/document_weight_iterator/FILES
new file mode 100644
index 00000000000..9bb94dc8770
--- /dev/null
+++ b/searchlib/src/tests/attribute/document_weight_iterator/FILES
@@ -0,0 +1 @@
+document_weight_iterator_test.cpp
diff --git a/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp b/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp
new file mode 100644
index 00000000000..fbe62f80843
--- /dev/null
+++ b/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp
@@ -0,0 +1,189 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+
+#include <vespa/searchlib/attribute/i_document_weight_attribute.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributefile.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributememorysavetarget.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/attribute/attrvector.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/util/randomgenerator.h>
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/attribute/multinumericpostattribute.hpp>
+#include <vespa/searchlib/attribute/multistringpostattribute.hpp>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchlib/query/tree/location.h>
+#include <vespa/searchlib/query/tree/point.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/weight.h>
+#include <vespa/searchlib/test/initrange.h>
+#include <vespa/searchlib/queryeval/document_weight_search_iterator.h>
+
+using namespace search;
+using namespace search::attribute;
+
+AttributeVector::SP make_attribute(BasicType type, CollectionType collection, bool fast_search) {
+ Config cfg(type, collection);
+ cfg.setFastSearch(fast_search);
+ return AttributeFactory::createAttribute("my_attribute", cfg);
+}
+
+void add_docs(AttributeVector::SP attr_ptr, size_t limit = 1000) {
+ AttributeVector::DocId docid;
+ for (size_t i = 0; i < limit; ++i) {
+ attr_ptr->addDoc(docid);
+ }
+ attr_ptr->commit();
+ ASSERT_EQUAL((limit - 1), docid);
+}
+
+template <typename ATTR, typename KEY>
+void set_doc(ATTR *attr, uint32_t docid, KEY key, int32_t weight) {
+ attr->clearDoc(docid);
+ attr->append(docid, key, weight);
+ attr->commit();
+}
+
+void populate_long(AttributeVector::SP attr_ptr) {
+ IntegerAttribute *attr = static_cast<IntegerAttribute *>(attr_ptr.get());
+ set_doc(attr, 1, int64_t(111), 20);
+ set_doc(attr, 5, int64_t(111), 5);
+ set_doc(attr, 7, int64_t(111), 10);
+}
+
+void populate_string(AttributeVector::SP attr_ptr) {
+ StringAttribute *attr = static_cast<StringAttribute *>(attr_ptr.get());
+ set_doc(attr, 1, "foo", 20);
+ set_doc(attr, 5, "foo", 5);
+ set_doc(attr, 7, "foo", 10);
+}
+
+struct LongFixture {
+ AttributeVector::SP attr;
+ const IDocumentWeightAttribute *api;
+ LongFixture() : attr(make_attribute(BasicType::INT64, CollectionType::WSET, true)),
+ api(attr->asDocumentWeightAttribute())
+ {
+ ASSERT_TRUE(api != nullptr);
+ add_docs(attr);
+ populate_long(attr);
+ }
+};
+
+struct StringFixture {
+ AttributeVector::SP attr;
+ const IDocumentWeightAttribute *api;
+ StringFixture() : attr(make_attribute(BasicType::STRING, CollectionType::WSET, true)),
+ api(attr->asDocumentWeightAttribute())
+ {
+ ASSERT_TRUE(api != nullptr);
+ add_docs(attr);
+ populate_string(attr);
+ }
+};
+
+TEST("require that appropriate attributes support the document weight attribute interface") {
+ EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, true)->asDocumentWeightAttribute() != nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, true)->asDocumentWeightAttribute() != nullptr);
+}
+
+TEST("require that inappropriate attributes do not support the document weight attribute interface") {
+ EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, false)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, false)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, false)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, true)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, true)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, false)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, false)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, false)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, true)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, true)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::INT32, CollectionType::WSET, true)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::DOUBLE, CollectionType::WSET, true)->asDocumentWeightAttribute() == nullptr);
+}
+
+void verify_valid_lookup(IDocumentWeightAttribute::LookupResult result) {
+ EXPECT_TRUE(result.posting_idx.valid());
+ EXPECT_EQUAL(3u, result.posting_size);
+ EXPECT_EQUAL(5, result.min_weight);
+ EXPECT_EQUAL(20, result.max_weight);
+}
+
+void verify_invalid_lookup(IDocumentWeightAttribute::LookupResult result) {
+ EXPECT_FALSE(result.posting_idx.valid());
+ EXPECT_EQUAL(0u, result.posting_size);
+ EXPECT_EQUAL(0, result.min_weight);
+ EXPECT_EQUAL(0, result.max_weight);
+}
+
+TEST_F("require that integer lookup works correctly", LongFixture) {
+ verify_valid_lookup(f1.api->lookup("111"));
+ verify_invalid_lookup(f1.api->lookup("222"));
+}
+
+TEST_F("require string lookup works correctly", StringFixture) {
+ verify_valid_lookup(f1.api->lookup("foo"));
+ verify_invalid_lookup(f1.api->lookup("bar"));
+}
+
+void verify_posting(const IDocumentWeightAttribute &api, const char *term) {
+ auto result = api.lookup(term);
+ ASSERT_TRUE(result.posting_idx.valid());
+ std::vector<DocumentWeightIterator> itr_store;
+ api.create(result.posting_idx, itr_store);
+ ASSERT_EQUAL(1u, itr_store.size());
+ {
+ DocumentWeightIterator &itr = itr_store[0];
+ if (itr.valid() && itr.getKey() < 1) {
+ itr.linearSeek(1);
+ }
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(1u, itr.getKey()); // docid
+ EXPECT_EQUAL(20, itr.getData()); // weight
+ itr.linearSeek(2);
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(5u, itr.getKey()); // docid
+ EXPECT_EQUAL(5, itr.getData()); // weight
+ itr.linearSeek(6);
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(7u, itr.getKey()); // docid
+ EXPECT_EQUAL(10, itr.getData()); // weight
+ itr.linearSeek(8);
+ EXPECT_FALSE(itr.valid());
+ }
+}
+
+TEST_F("require that integer iterators are created correctly", LongFixture) {
+ verify_posting(*f1.api, "111");
+}
+
+TEST_F("require that string iterators are created correctly", StringFixture) {
+ verify_posting(*f1.api, "foo");
+}
+
+TEST("verify init range for document weight search iterator") {
+ search::test::InitRangeVerifier ir;
+ AttributeVector::SP attr(make_attribute(BasicType::INT64, CollectionType::WSET, true));
+ add_docs(attr, ir.getDocIdLimit());
+ auto docids = ir.getExpectedDocIds();
+ IntegerAttribute *int_attr = static_cast<IntegerAttribute *>(attr.get());
+ for (auto docid: docids) {
+ set_doc(int_attr, docid, int64_t(123), 1);
+ }
+ const IDocumentWeightAttribute *api(attr->asDocumentWeightAttribute());
+ ASSERT_TRUE(api != nullptr);
+ auto dict_entry = api->lookup("123");
+ ASSERT_TRUE(dict_entry.posting_idx.valid());
+ fef::TermFieldMatchData tfmd;
+ queryeval::DocumentWeightSearchIterator itr(tfmd, *api, dict_entry);
+ ir.verify(itr);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/attribute/enumeratedsave/.gitignore b/searchlib/src/tests/attribute/enumeratedsave/.gitignore
new file mode 100644
index 00000000000..a4680f95f72
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumeratedsave/.gitignore
@@ -0,0 +1,127 @@
+/double_a0_e.udat
+/double_a0_ee.udat
+/double_a1_e.udat
+/double_a1_ee.udat
+/double_a2_e.udat
+/double_a2_ee.udat
+/double_sv0_e.udat
+/double_sv0_ee.udat
+/double_sv1_e.udat
+/double_sv1_ee.udat
+/double_sv2_e.udat
+/double_sv2_ee.udat
+/double_ws0_e.udat
+/double_ws0_ee.udat
+/double_ws1_e.udat
+/double_ws1_ee.udat
+/double_ws2_e.udat
+/double_ws2_ee.udat
+/float_a0_e.udat
+/float_a0_ee.udat
+/float_a1_e.udat
+/float_a1_ee.udat
+/float_a2_e.udat
+/float_a2_ee.udat
+/float_sv0_e.udat
+/float_sv0_ee.udat
+/float_sv1_e.udat
+/float_sv1_ee.udat
+/float_sv2_e.udat
+/float_sv2_ee.udat
+/float_ws0_e.udat
+/float_ws0_ee.udat
+/float_ws1_e.udat
+/float_ws1_ee.udat
+/float_ws2_e.udat
+/float_ws2_ee.udat
+/int16_a0_e.udat
+/int16_a0_ee.udat
+/int16_a1_e.udat
+/int16_a1_ee.udat
+/int16_a2_e.udat
+/int16_a2_ee.udat
+/int16_sv0_e.udat
+/int16_sv0_ee.udat
+/int16_sv1_e.udat
+/int16_sv1_ee.udat
+/int16_sv2_e.udat
+/int16_sv2_ee.udat
+/int16_ws0_e.udat
+/int16_ws0_ee.udat
+/int16_ws1_e.udat
+/int16_ws1_ee.udat
+/int16_ws2_e.udat
+/int16_ws2_ee.udat
+/int32_a0_e.udat
+/int32_a0_ee.udat
+/int32_a1_e.udat
+/int32_a1_ee.udat
+/int32_a2_e.udat
+/int32_a2_ee.udat
+/int32_sv0_e.udat
+/int32_sv0_ee.udat
+/int32_sv1_e.udat
+/int32_sv1_ee.udat
+/int32_sv2_e.udat
+/int32_sv2_ee.udat
+/int32_ws0_e.udat
+/int32_ws0_ee.udat
+/int32_ws1_e.udat
+/int32_ws1_ee.udat
+/int32_ws2_e.udat
+/int32_ws2_ee.udat
+/int64_a0_e.udat
+/int64_a0_ee.udat
+/int64_a1_e.udat
+/int64_a1_ee.udat
+/int64_a2_e.udat
+/int64_a2_ee.udat
+/int64_sv0_e.udat
+/int64_sv0_ee.udat
+/int64_sv1_e.udat
+/int64_sv1_ee.udat
+/int64_sv2_e.udat
+/int64_sv2_ee.udat
+/int64_ws0_e.udat
+/int64_ws0_ee.udat
+/int64_ws1_e.udat
+/int64_ws1_ee.udat
+/int64_ws2_e.udat
+/int64_ws2_ee.udat
+/int8_a0_e.udat
+/int8_a0_ee.udat
+/int8_a1_e.udat
+/int8_a1_ee.udat
+/int8_a2_e.udat
+/int8_a2_ee.udat
+/int8_sv0_e.udat
+/int8_sv0_ee.udat
+/int8_sv1_e.udat
+/int8_sv1_ee.udat
+/int8_sv2_e.udat
+/int8_sv2_ee.udat
+/int8_ws0_e.udat
+/int8_ws0_ee.udat
+/int8_ws1_e.udat
+/int8_ws1_ee.udat
+/int8_ws2_e.udat
+/int8_ws2_ee.udat
+/str_a0_e.udat
+/str_a0_ee.udat
+/str_a1_e.udat
+/str_a1_ee.udat
+/str_a2_e.udat
+/str_a2_ee.udat
+/str_sv0_e.udat
+/str_sv0_ee.udat
+/str_sv1_e.udat
+/str_sv1_ee.udat
+/str_sv2_e.udat
+/str_sv2_ee.udat
+/str_ws0_e.udat
+/str_ws0_ee.udat
+/str_ws1_e.udat
+/str_ws1_ee.udat
+/str_ws2_e.udat
+/str_ws2_ee.udat
+searchlib_enumeratedsave_test_app
diff --git a/searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt b/searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt
new file mode 100644
index 00000000000..0dbb59043c1
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_enumeratedsave_test_app
+ SOURCES
+ enumeratedsave_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_enumeratedsave_test_app COMMAND searchlib_enumeratedsave_test_app)
diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
new file mode 100644
index 00000000000..312814eb55a
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
@@ -0,0 +1,944 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/document/fieldvalue/intfieldvalue.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributefile.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributememorysavetarget.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/attribute/attrvector.h>
+#include <vespa/searchlib/attribute/attributefilesavetarget.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/compress.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/util/randomgenerator.h>
+#include <vespa/searchlib/util/bufferwriter.h>
+#include <vespa/searchlib/attribute/attributememoryfilebufferwriter.h>
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+
+#include <vespa/log/log.h>
+LOG_SETUP("enumeratedsave_test");
+#include <limits>
+#include <string>
+#include <iostream>
+
+
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
+using search::attribute::Config;
+using search::AttributeFactory;
+using search::FloatingPointAttribute;
+using search::IntegerAttribute;
+using search::StringAttribute;
+using search::RandomGenerator;
+using search::ParseItem;
+using search::fef::TermFieldMatchData;
+using search::IAttributeFileWriter;
+using search::BufferWriter;
+using search::AttributeMemoryFileBufferWriter;
+
+typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr;
+typedef std::unique_ptr<search::queryeval::SearchIterator> SearchBasePtr;
+
+bool
+FastOS_UNIX_File::Sync(void)
+{
+ // LOG(info, "Skip sync");
+ return true;
+}
+
+
+class MemAttrFileWriter : public IAttributeFileWriter
+{
+private:
+ Buffer _buf;
+
+public:
+ MemAttrFileWriter()
+ : _buf()
+ {
+ }
+
+ virtual Buffer allocBuf(size_t size) override {
+ return std::make_unique<BufferBuf>(size, 4096);
+ }
+
+ virtual void writeBuf(Buffer buf_in) override {
+ if (!_buf) {
+ _buf = std::move(buf_in);
+ } else {
+ _buf->writeBytes(buf_in->getData(), buf_in->getDataLen());
+ }
+ }
+
+ const Buffer &buf() const { return _buf; }
+
+ std::unique_ptr<BufferWriter> allocBufferWriter() override;
+};
+
+std::unique_ptr<BufferWriter>
+MemAttrFileWriter::allocBufferWriter()
+{
+ if (!_buf) {
+ _buf = allocBuf(1);
+ }
+ return std::make_unique<AttributeMemoryFileBufferWriter>(*this);
+}
+
+class MemAttr : public search::IAttributeSaveTarget
+{
+private:
+ MemAttrFileWriter _datWriter;
+ MemAttrFileWriter _idxWriter;
+ MemAttrFileWriter _weightWriter;
+ MemAttrFileWriter _udatWriter;
+
+public:
+ typedef std::shared_ptr<MemAttr> SP;
+
+ MemAttr(void)
+ : _datWriter(),
+ _idxWriter(),
+ _weightWriter(),
+ _udatWriter()
+ {
+ }
+
+ // Implements IAttributeSaveTarget
+ virtual bool setup() { return true; }
+ virtual void close() {}
+ virtual IAttributeFileWriter &datWriter() override { return _datWriter; }
+ virtual IAttributeFileWriter &idxWriter() override { return _idxWriter; }
+ virtual IAttributeFileWriter &weightWriter() override {
+ return _weightWriter;
+ }
+ virtual IAttributeFileWriter &udatWriter() override { return _udatWriter; }
+
+ bool
+ bufEqual(const Buffer &lhs, const Buffer &rhs) const;
+
+ bool
+ operator==(const MemAttr &rhs) const;
+};
+
+class EnumeratedSaveTest
+{
+private:
+ typedef AttributeVector::SP AttributePtr;
+
+ template <typename VectorType>
+ VectorType &
+ as(AttributePtr &v);
+
+ IntegerAttribute &
+ asInt(AttributePtr &v);
+
+ StringAttribute &
+ asString(AttributePtr &v);
+
+ FloatingPointAttribute &
+ asFloat(AttributePtr &v);
+
+ void
+ addDocs(const AttributePtr &v, size_t sz);
+
+ template <typename VectorType>
+ void populate(VectorType &v, unsigned seed, BasicType bt);
+
+ template <typename VectorType, typename BufferType>
+ void compare(VectorType &a, VectorType &b);
+
+ void
+ buildTermQuery(std::vector<char> & buffer,
+ const vespalib::string & index,
+ const vespalib::string & term, bool prefix);
+
+ template <typename V, typename T>
+ SearchContextPtr
+ getSearch(const V & vec, const T & term, bool prefix);
+
+ template <typename V>
+ SearchContextPtr
+ getSearch(const V & vec);
+
+ MemAttr::SP
+ saveMem(AttributeVector &v);
+
+ void
+ checkMem(AttributeVector &v, const MemAttr &e, bool enumerated);
+
+ MemAttr::SP
+ saveBoth(AttributePtr v);
+
+ AttributePtr
+ make(Config cfg,
+ const vespalib::string &pref,
+ bool fastSearch = false);
+
+ void
+ load(AttributePtr v, const vespalib::string &name);
+
+ template <typename VectorType, typename BufferType>
+ void
+ checkLoad(AttributePtr v,
+ const vespalib::string &name,
+ AttributePtr ev);
+
+ template <typename VectorType, typename BufferType>
+ void
+ testReload(AttributePtr v0,
+ AttributePtr v1,
+ AttributePtr v2,
+ MemAttr::SP mv0,
+ MemAttr::SP mv1,
+ MemAttr::SP mv2,
+ MemAttr::SP emv0,
+ MemAttr::SP emv1,
+ MemAttr::SP emv2,
+ Config cfg,
+ const vespalib::string &pref,
+ bool fastSearch);
+
+public:
+ template <typename VectorType, typename BufferType>
+ void
+ test(BasicType bt, CollectionType ct, const vespalib::string &pref);
+
+ EnumeratedSaveTest()
+ {
+ }
+};
+
+
+bool
+MemAttr::bufEqual(const Buffer &lhs, const Buffer &rhs) const
+{
+ if (!EXPECT_TRUE((lhs.get() != NULL) == (rhs.get() != NULL)))
+ return false;
+ if (lhs.get() == NULL)
+ return true;
+ if (!EXPECT_TRUE(lhs->getDataLen() == rhs->getDataLen()))
+ return false;
+ if (!EXPECT_TRUE(memcmp(lhs->getData(), rhs->getData(),
+ lhs->getDataLen()) == 0))
+ return false;
+ return true;
+}
+
+bool
+MemAttr::operator==(const MemAttr &rhs) const
+{
+ if (!EXPECT_TRUE(bufEqual(_datWriter.buf(), rhs._datWriter.buf())))
+ return false;
+ if (!EXPECT_TRUE(bufEqual(_idxWriter.buf(), rhs._idxWriter.buf())))
+ return false;
+ if (!EXPECT_TRUE(bufEqual(_weightWriter.buf(), rhs._weightWriter.buf())))
+ return false;
+ if (!EXPECT_TRUE(bufEqual(_udatWriter.buf(), rhs._udatWriter.buf())))
+ return false;
+ return true;
+}
+
+
+void
+EnumeratedSaveTest::addDocs(const AttributePtr &v, size_t sz)
+{
+ if (sz) {
+ AttributeVector::DocId docId;
+ for(size_t i(0); i< sz; i++) {
+ EXPECT_TRUE( v->addDoc(docId) );
+ }
+ EXPECT_TRUE( docId+1 == sz );
+ EXPECT_TRUE( v->getNumDocs() == sz );
+ v->commit(true);
+ }
+}
+
+
+template <>
+void
+EnumeratedSaveTest::populate(IntegerAttribute &v, unsigned seed,
+ BasicType bt)
+{
+ search::Rand48 rnd;
+ IntegerAttribute::largeint_t mask(std::numeric_limits
+ <IntegerAttribute::largeint_t>::max());
+ switch (bt.type()) {
+ case BasicType::INT8:
+ mask = 0x7f;
+ break;
+ case BasicType::INT16:
+ mask = 0x7fff;
+ break;
+ default:
+ ;
+ }
+ rnd.srand48(seed);
+ int weight = 1;
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (i == 9)
+ continue;
+ if (i == 7) {
+ if (v.hasMultiValue()) {
+ v.append(i, -42, 27);
+ v.append(i, -43, 14);
+ v.append(i, -42, -3);
+ } else {
+ EXPECT_TRUE( v.update(i, -42) );
+ }
+ v.commit();
+ continue;
+ }
+ if (v.hasMultiValue()) {
+ if (v.hasWeightedSetType()) {
+ weight = (rand() % 256) - 128;
+ }
+ for (size_t j(0); j <= i; j++) {
+ EXPECT_TRUE( v.append(i, rnd.lrand48() & mask, weight) );
+ }
+ v.commit();
+ if (!v.hasWeightedSetType()) {
+ EXPECT_EQUAL(static_cast<uint32_t>(v.getValueCount(i)), i + 1);
+ ASSERT_TRUE(static_cast<uint32_t>(v.getValueCount(i)) ==
+ i + 1);
+ }
+ } else {
+ EXPECT_TRUE( v.update(i, lrand48() & mask) );
+ }
+ }
+ v.commit();
+}
+
+
+template <>
+void
+EnumeratedSaveTest::populate(FloatingPointAttribute &v, unsigned seed,
+ BasicType bt)
+{
+ (void) bt;
+ search::Rand48 rnd;
+ rnd.srand48(seed);
+ int weight = 1;
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (i == 9)
+ continue;
+ if (i == 7) {
+ if (v.hasMultiValue()) {
+ v.append(i, -42.0, 27);
+ v.append(i, -43.0, 14);
+ v.append(i, -42.0, -3);
+ } else {
+ EXPECT_TRUE( v.update(i, -42.0) );
+ }
+ v.commit();
+ continue;
+ }
+ if (v.hasMultiValue()) {
+ if (v.hasWeightedSetType()) {
+ weight = (rand() % 256) - 128;
+ }
+ for (size_t j(0); j <= i; j++) {
+ EXPECT_TRUE( v.append(i, rnd.lrand48(), weight) );
+ }
+ v.commit();
+ if (!v.hasWeightedSetType()) {
+ EXPECT_EQUAL(static_cast<uint32_t>(v.getValueCount(i)), i + 1);
+ ASSERT_TRUE(static_cast<uint32_t>(v.getValueCount(i)) ==
+ i + 1);
+ }
+ } else {
+ EXPECT_TRUE( v.update(i, lrand48()) );
+ }
+ }
+ v.commit();
+}
+
+
+template <>
+void
+EnumeratedSaveTest::populate(StringAttribute &v, unsigned seed,
+ BasicType bt)
+{
+ (void) bt;
+ RandomGenerator rnd(seed);
+ int weight = 1;
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (i == 9)
+ continue;
+ if (i == 7) {
+ if (v.hasMultiValue()) {
+ v.append(i, "foo", 27);
+ v.append(i, "bar", 14);
+ v.append(i, "foO", -3);
+ } else {
+ EXPECT_TRUE( v.update(i, "foo") );
+ }
+ v.commit();
+ continue;
+ }
+ if (v.hasMultiValue()) {
+ if (v.hasWeightedSetType()) {
+ weight = rnd.rand(0, 256) - 128;
+ }
+ for (size_t j(0); j <= i; j++) {
+ EXPECT_TRUE( v.append(i, rnd.getRandomString(2, 50), weight) );
+ }
+ v.commit();
+ if (!v.hasWeightedSetType()) {
+ EXPECT_EQUAL(static_cast<uint32_t>(v.getValueCount(i)), i + 1);
+ }
+ } else {
+ EXPECT_TRUE( v.update(i, rnd.getRandomString(2, 50)) );
+ }
+ }
+ v.commit();
+}
+
+namespace
+{
+
+template <typename T>
+inline bool
+equalsHelper(const T &lhs, const T &rhs)
+{
+ return lhs == rhs;
+}
+
+template <>
+inline bool
+equalsHelper<float>(const float &lhs, const float &rhs)
+{
+ if (std::isnan(lhs))
+ return std::isnan(rhs);
+ if (std::isnan(rhs))
+ return false;
+ return lhs == rhs;
+}
+
+template <>
+inline bool
+equalsHelper<double>(const double &lhs, const double &rhs)
+{
+ if (std::isnan(lhs))
+ return std::isnan(rhs);
+ if (std::isnan(rhs))
+ return false;
+ return lhs == rhs;
+}
+
+}
+
+template <typename VectorType, typename BufferType>
+void
+EnumeratedSaveTest::compare(VectorType &a, VectorType &b)
+{
+ EXPECT_EQUAL(a.getNumDocs(), b.getNumDocs());
+ ASSERT_TRUE(a.getNumDocs() == b.getNumDocs());
+ // EXPECT_EQUAL(a.getMaxValueCount(), b.getMaxValueCount());
+ EXPECT_EQUAL(a.getCommittedDocIdLimit(), b.getCommittedDocIdLimit());
+ uint32_t asz(a.getMaxValueCount());
+ uint32_t bsz(b.getMaxValueCount());
+ BufferType *av = new BufferType[asz];
+ BufferType *bv = new BufferType[bsz];
+
+ for (size_t i(0), m(a.getNumDocs()); i < m; i++) {
+ ASSERT_TRUE(asz >= static_cast<uint32_t>(a.getValueCount(i)));
+ ASSERT_TRUE(bsz >= static_cast<uint32_t>(b.getValueCount(i)));
+ EXPECT_EQUAL(a.getValueCount(i), b.getValueCount(i));
+ ASSERT_TRUE(a.getValueCount(i) == b.getValueCount(i));
+ EXPECT_EQUAL(static_cast<const AttributeVector &>(a).get(i, av, asz),
+ static_cast<uint32_t>(a.getValueCount(i)));
+ EXPECT_EQUAL(static_cast<const AttributeVector &>(b).get(i, bv, bsz),
+ static_cast<uint32_t>(b.getValueCount(i)));
+ for(size_t j(0), k(std::min(a.getValueCount(i), b.getValueCount(i)));
+ j < k; j++) {
+ EXPECT_TRUE(equalsHelper(av[j], bv[j]));
+ }
+ }
+ delete [] bv;
+ delete [] av;
+}
+
+
+template <typename VectorType>
+VectorType &
+EnumeratedSaveTest::as(AttributePtr &v)
+{
+ VectorType *res = dynamic_cast<VectorType *>(v.get());
+ assert(res != NULL);
+ return *res;
+}
+
+
+IntegerAttribute &
+EnumeratedSaveTest::asInt(AttributePtr &v)
+{
+ return as<IntegerAttribute>(v);
+}
+
+
+StringAttribute &
+EnumeratedSaveTest::asString(AttributePtr &v)
+{
+ return as<StringAttribute>(v);
+}
+
+
+FloatingPointAttribute &
+EnumeratedSaveTest::asFloat(AttributePtr &v)
+{
+ return as<FloatingPointAttribute>(v);
+}
+
+
+void
+EnumeratedSaveTest::buildTermQuery(std::vector<char> &buffer,
+ const vespalib::string &index,
+ const vespalib::string &term,
+ bool prefix)
+{
+ uint32_t indexLen = index.size();
+ uint32_t termLen = term.size();
+ uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen;
+ uint32_t p = 0;
+ buffer.resize(queryPacketSize);
+ buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM;
+ p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]);
+ memcpy(&buffer[p], index.c_str(), indexLen);
+ p += indexLen;
+ p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]);
+ memcpy(&buffer[p], term.c_str(), termLen);
+ p += termLen;
+ buffer.resize(p);
+}
+
+
+template <typename V, typename T>
+SearchContextPtr
+EnumeratedSaveTest::getSearch(const V &vec, const T &term, bool prefix)
+{
+ std::vector<char> query;
+ vespalib::asciistream ss;
+ ss << term;
+ buildTermQuery(query, vec.getName(), ss.str(), prefix);
+
+ return (static_cast<const AttributeVector &>(vec)).
+ getSearch(vespalib::stringref(&query[0], query.size()),
+ AttributeVector::SearchContext::Params());
+}
+
+
+template <>
+SearchContextPtr
+EnumeratedSaveTest::getSearch<IntegerAttribute>(const IntegerAttribute &v)
+{
+ return getSearch<IntegerAttribute>(v, "[-42;-42]", false);
+}
+
+template <>
+SearchContextPtr
+EnumeratedSaveTest::getSearch<FloatingPointAttribute>(const FloatingPointAttribute &v)
+{
+ return getSearch<FloatingPointAttribute>(v, "[-42.0;-42.0]", false);
+}
+
+template <>
+SearchContextPtr
+EnumeratedSaveTest::getSearch<StringAttribute>(const StringAttribute &v)
+{
+ return getSearch<StringAttribute, const vespalib::string &>
+ (v, "foo", false);
+}
+
+MemAttr::SP
+EnumeratedSaveTest::saveMem(AttributeVector &v)
+{
+ MemAttr::SP res(new MemAttr);
+ EXPECT_TRUE(v.save(*res));
+ return res;
+}
+
+
+void
+EnumeratedSaveTest::checkMem(AttributeVector &v, const MemAttr &e,
+ bool enumerated)
+{
+ MemAttr m;
+ v.enableEnumeratedSave(enumerated);
+ EXPECT_TRUE(v.save(m));
+ v.enableEnumeratedSave(false);
+ ASSERT_TRUE(m == e);
+}
+
+
+MemAttr::SP
+EnumeratedSaveTest::saveBoth(AttributePtr v)
+{
+ EXPECT_TRUE(v->save());
+ vespalib::string basename = v->getBaseFileName();
+ AttributePtr v2 = make(v->getConfig(), basename, true);
+ EXPECT_TRUE(v2->load());
+ v2->enableEnumeratedSave(true);
+ EXPECT_TRUE(v2->saveAs(basename + "_e"));
+ if ((v->getConfig().basicType() == BasicType::INT32 &&
+ v->getConfig().collectionType() == CollectionType::WSET) || true) {
+ search::AttributeMemorySaveTarget ms;
+ search::TuneFileAttributes tune;
+ search::index::DummyFileHeaderContext fileHeaderContext;
+ EXPECT_TRUE(v2->saveAs(basename + "_ee", ms));
+ EXPECT_TRUE(ms.writeToFile(tune, fileHeaderContext));
+ }
+ return saveMem(*v2);
+}
+
+
+EnumeratedSaveTest::AttributePtr
+EnumeratedSaveTest::make(Config cfg,
+ const vespalib::string &pref,
+ bool fastSearch)
+{
+ cfg.setFastSearch(fastSearch);
+ AttributePtr v = AttributeFactory::createAttribute(pref, cfg);
+ return v;
+}
+
+
+void
+EnumeratedSaveTest::load(AttributePtr v, const vespalib::string &name)
+{
+ v->setBaseFileName(name);
+ EXPECT_TRUE(v->load());
+}
+
+template <typename VectorType, typename BufferType>
+void
+EnumeratedSaveTest::checkLoad(AttributePtr v, const vespalib::string &name,
+ AttributePtr ev)
+{
+ v->setBaseFileName(name);
+ EXPECT_TRUE(v->load());
+ compare<VectorType, BufferType>(as<VectorType>(v), as<VectorType>(ev));
+}
+
+
+template <typename VectorType, typename BufferType>
+void
+EnumeratedSaveTest::testReload(AttributePtr v0,
+ AttributePtr v1,
+ AttributePtr v2,
+ MemAttr::SP mv0,
+ MemAttr::SP mv1,
+ MemAttr::SP mv2,
+ MemAttr::SP emv0,
+ MemAttr::SP emv1,
+ MemAttr::SP emv2,
+ Config cfg,
+ const vespalib::string &pref,
+ bool fastSearch)
+{
+ // typedef AttributePtr AVP;
+
+ bool flagAttr =
+ cfg.collectionType() == CollectionType::ARRAY &&
+ cfg.basicType() == BasicType::INT8 &&
+ fastSearch;
+ bool supportsEnumerated = (fastSearch ||
+ cfg.basicType() == BasicType::STRING) &&
+ !flagAttr;
+
+
+ AttributePtr v = make(cfg, pref, fastSearch);
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0", v0)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1", v1)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2", v2)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1", v1)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0", v0)));
+
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0", v0)));
+ TEST_DO(checkMem(*v, *mv0, false));
+ TEST_DO(checkMem(*v, supportsEnumerated ? *emv0 : *mv0, true));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1", v1)));
+ TEST_DO(checkMem(*v, *mv1, false));
+ TEST_DO(checkMem(*v, supportsEnumerated ? *emv1 : *mv1, true));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2", v2)));
+ TEST_DO(checkMem(*v, *mv2, false));
+ TEST_DO(checkMem(*v, supportsEnumerated ? *emv2 : *mv2, true));
+
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0_e", v0)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1_e", v1)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2_e", v2)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1_e", v1)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0_e", v0)));
+
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0_e", v0)));
+ TEST_DO(checkMem(*v, *mv0, false));
+ TEST_DO(checkMem(*v, supportsEnumerated ? *emv0 : *mv0, true));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1_e", v1)));
+ TEST_DO(checkMem(*v, *mv1, false));
+ TEST_DO(checkMem(*v, supportsEnumerated ? *emv1 : *mv1, true));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2_e", v2)));
+ TEST_DO(checkMem(*v, *mv2, false));
+ TEST_DO(checkMem(*v, supportsEnumerated ? *emv2 : *mv2, true));
+
+ TermFieldMatchData md;
+ SearchContextPtr sc = getSearch<VectorType>(as<VectorType>(v));
+ sc->fetchPostings(true);
+ SearchBasePtr sb = sc->createIterator(&md, true);
+ sb->initFullRange();
+ sb->seek(1u);
+ EXPECT_EQUAL(7u, sb->getDocId());
+ sb->unpack(7u);
+ EXPECT_EQUAL(md.getDocId(), 7u);
+ if (v->getCollectionType() == CollectionType::SINGLE ||
+ flagAttr) {
+ EXPECT_EQUAL(md.getWeight(), 1);
+ } else if (v->getCollectionType() == CollectionType::ARRAY) {
+ EXPECT_EQUAL(md.getWeight(), 2);
+ } else {
+ if (cfg.basicType() == BasicType::STRING) {
+ EXPECT_EQUAL(md.getWeight(), 24);
+ } else {
+ EXPECT_EQUAL(md.getWeight(), -3);
+ }
+ }
+}
+
+
+template <typename VectorType, typename BufferType>
+void
+EnumeratedSaveTest::test(BasicType bt, CollectionType ct,
+ const vespalib::string &pref)
+{
+ Config cfg(bt, ct);
+ AttributePtr v0 = AttributeFactory::createAttribute(pref + "0", cfg);
+ AttributePtr v1 = AttributeFactory::createAttribute(pref + "1", cfg);
+ AttributePtr v2 = AttributeFactory::createAttribute(pref + "2", cfg);
+
+ addDocs(v0, 0);
+ addDocs(v1, 10);
+ addDocs(v2, 30);
+
+ populate(as<VectorType>(v0), 0, bt);
+ populate(as<VectorType>(v1), 10, bt);
+ populate(as<VectorType>(v2), 30, bt);
+
+ MemAttr::SP mv0 = saveMem(*v0);
+ MemAttr::SP mv1 = saveMem(*v1);
+ MemAttr::SP mv2 = saveMem(*v2);
+
+ MemAttr::SP emv0 = saveBoth(v0);
+ MemAttr::SP emv1 = saveBoth(v1);
+ MemAttr::SP emv2 = saveBoth(v2);
+
+ AttributePtr v = make(cfg, pref, true);
+ checkLoad<VectorType, BufferType>(v, pref + "0_ee", v0);
+ checkLoad<VectorType, BufferType>(v, pref + "1_ee", v1);
+ checkLoad<VectorType, BufferType>(v, pref + "2_ee", v2);
+ v.reset();
+
+ TEST_DO((testReload<VectorType, BufferType>(v0, v1, v2,
+ mv0, mv1, mv2,
+ emv0, emv1, emv2,
+ cfg, pref,
+ false)));
+ TEST_DO((testReload<VectorType, BufferType>(v0, v1, v2,
+ mv0, mv1, mv2,
+ emv0, emv1, emv2,
+ cfg, pref,
+ true)));
+}
+
+TEST_F("Test enumerated save with single value int8", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT8,
+ CollectionType::SINGLE,
+ "int8_sv");
+}
+
+TEST_F("Test enumerated save with array value int8", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT8,
+ CollectionType::ARRAY,
+ "int8_a");
+}
+
+TEST_F("Test enumerated save with weighted set value int8",
+ EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::WeightedInt>(BasicType::INT8,
+ CollectionType::WSET,
+ "int8_ws");
+}
+
+TEST_F("Test enumerated save with single value int16", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT16,
+ CollectionType::SINGLE,
+ "int16_sv");
+}
+
+TEST_F("Test enumerated save with array value int16", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT16,
+ CollectionType::ARRAY,
+ "int16_a");
+}
+
+TEST_F("Test enumerated save with weighted set value int16",
+ EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::WeightedInt>(BasicType::INT16,
+ CollectionType::WSET,
+ "int16_ws");
+}
+
+TEST_F("Test enumerated save with single value int32", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT32,
+ CollectionType::SINGLE,
+ "int32_sv");
+}
+
+TEST_F("Test enumerated save with array value int32", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT32,
+ CollectionType::ARRAY,
+ "int32_a");
+}
+
+TEST_F("Test enumerated save with weighted set value int32",
+ EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::WeightedInt>(BasicType::INT32,
+ CollectionType::WSET,
+ "int32_ws");
+}
+
+TEST_F("Test enumerated save with single value int64", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT64,
+ CollectionType::SINGLE,
+ "int64_sv");
+}
+
+TEST_F("Test enumerated save with array value int64", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT64,
+ CollectionType::ARRAY,
+ "int64_a");
+}
+
+TEST_F("Test enumerated save with weighted set value int64",
+ EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::WeightedInt>(BasicType::INT64,
+ CollectionType::WSET,
+ "int64_ws");
+}
+
+TEST_F("Test enumerated save with single value float", EnumeratedSaveTest)
+{
+ f.template test<FloatingPointAttribute,
+ double>(BasicType::FLOAT,
+ CollectionType::SINGLE,
+ "float_sv");
+}
+
+TEST_F("Test enumerated save with array value float", EnumeratedSaveTest)
+{
+ f.template test<FloatingPointAttribute,
+ double>(BasicType::FLOAT,
+ CollectionType::ARRAY,
+ "float_a");
+}
+
+TEST_F("Test enumerated save with weighted set value float",
+ EnumeratedSaveTest)
+{
+ f.template test<FloatingPointAttribute,
+ FloatingPointAttribute::WeightedFloat>(
+ BasicType::FLOAT,
+ CollectionType::WSET,
+ "float_ws");
+}
+
+
+TEST_F("Test enumerated save with single value double", EnumeratedSaveTest)
+{
+ f.template test<FloatingPointAttribute,
+ double>(BasicType::DOUBLE,
+ CollectionType::SINGLE,
+ "double_sv");
+}
+
+TEST_F("Test enumerated save with array value double", EnumeratedSaveTest)
+{
+ f.template test<FloatingPointAttribute,
+ double>(BasicType::DOUBLE,
+ CollectionType::ARRAY,
+ "double_a");
+}
+
+TEST_F("Test enumerated save with weighted set value double",
+ EnumeratedSaveTest)
+{
+ f.template test<FloatingPointAttribute,
+ FloatingPointAttribute::WeightedFloat>(
+ BasicType::DOUBLE,
+ CollectionType::WSET,
+ "double_ws");
+}
+
+
+TEST_F("Test enumerated save with single value string", EnumeratedSaveTest)
+{
+ f.template test<StringAttribute,
+ vespalib::string>(BasicType::STRING,
+ CollectionType::SINGLE,
+ "str_sv");
+}
+
+TEST_F("Test enumerated save with array value string", EnumeratedSaveTest)
+{
+ f.template test<StringAttribute,
+ vespalib::string>(BasicType::STRING,
+ CollectionType::ARRAY,
+ "str_a");
+}
+
+TEST_F("Test enumerated save with weighted set value string",
+ EnumeratedSaveTest)
+{
+ f.template test<StringAttribute,
+ StringAttribute::WeightedString>(
+ BasicType::STRING,
+ CollectionType::WSET,
+ "str_ws");
+}
+
+TEST_MAIN()
+{
+ AttributeVector::enableEnumeratedLoad();
+ TEST_RUN_ALL();
+}
diff --git a/searchlib/src/tests/attribute/enumstore/.gitignore b/searchlib/src/tests/attribute/enumstore/.gitignore
new file mode 100644
index 00000000000..c58a018bbd9
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumstore/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+enumstore_test
+searchlib_enumstore_test_app
diff --git a/searchlib/src/tests/attribute/enumstore/CMakeLists.txt b/searchlib/src/tests/attribute/enumstore/CMakeLists.txt
new file mode 100644
index 00000000000..33190553747
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumstore/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_enumstore_test_app
+ SOURCES
+ enumstore_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_enumstore_test_app COMMAND searchlib_enumstore_test_app)
diff --git a/searchlib/src/tests/attribute/enumstore/DESC b/searchlib/src/tests/attribute/enumstore/DESC
new file mode 100644
index 00000000000..514c9a47caf
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumstore/DESC
@@ -0,0 +1 @@
+This is a test for the EnumStore class.
diff --git a/searchlib/src/tests/attribute/enumstore/FILES b/searchlib/src/tests/attribute/enumstore/FILES
new file mode 100644
index 00000000000..6fdb2381292
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumstore/FILES
@@ -0,0 +1 @@
+enumstore.cpp
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
new file mode 100644
index 00000000000..e63889bbeb8
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
@@ -0,0 +1,879 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("enumstore_test");
+#include <vespa/vespalib/testkit/testapp.h>
+//#define LOG_ENUM_STORE
+#include <vespa/searchlib/attribute/enumstore.hpp>
+#include <limits>
+#include <string>
+#include <iostream>
+
+namespace search {
+
+size_t enumStoreAlign(size_t size)
+{
+ return (size + 15) & -UINT64_C(16);
+}
+
+// EnumStoreBase::Index(0,0) is reserved thus 16 bytes are reserved in buffer 0
+const uint32_t RESERVED_BYTES = 16u;
+typedef EnumStoreT<NumericEntryType<uint32_t> > NumericEnumStore;
+
+class EnumStoreTest : public vespalib::TestApp
+{
+private:
+ typedef EnumStoreT<StringEntryType> StringEnumStore;
+ typedef EnumStoreT<NumericEntryType<float> > FloatEnumStore;
+ typedef EnumStoreT<NumericEntryType<double> > DoubleEnumStore;
+
+ typedef EnumStoreBase::Index EnumIndex;
+ typedef vespalib::GenerationHandler::generation_t generation_t;
+
+ void testIndex();
+ void fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount,
+ const std::string & string);
+ void fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount,
+ uint32_t value);
+ void testStringEntry();
+ void testNumericEntry();
+
+ template <typename EnumStoreType, typename T>
+ void testFloatEnumStore(EnumStoreType & es);
+ void testFloatEnumStore();
+
+ void testAddEnum();
+ template <typename EnumStoreType>
+ void testAddEnum(bool hasPostings);
+
+ template <typename EnumStoreType, typename Dictionary>
+ void
+ testUniques(const EnumStoreType &ses,
+ const std::vector<std::string> &unique);
+
+
+ void testCompaction();
+ template <typename EnumStoreType>
+ void testCompaction(bool hasPostings, bool disableReEnumerate);
+
+ void testReset();
+ template <typename EnumStoreType>
+ void testReset(bool hasPostings);
+
+ void testHoldListAndGeneration();
+ void testMemoryUsage();
+ void requireThatAddressSpaceUsageIsReported();
+ void testBufferLimit();
+
+ // helper methods
+ typedef std::vector<std::string> StringVector;
+ template <typename T>
+ T random(T low, T high);
+ std::string getRandomString(uint32_t minLen, uint32_t maxLen);
+ StringVector fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen);
+ StringVector sortRandomStrings(StringVector & strings);
+
+ struct StringEntry {
+ StringEntry(uint32_t e, uint32_t r, const std::string & s) :
+ _enum(e), _refCount(r), _string(s) {}
+ uint32_t _enum;
+ uint32_t _refCount;
+ std::string _string;
+ };
+
+ struct Reader {
+ typedef StringEnumStore::Index Index;
+ typedef std::vector<Index> IndexVector;
+ typedef std::vector<StringEntry> ExpectedVector;
+ uint32_t _generation;
+ IndexVector _indices;
+ ExpectedVector _expected;
+ Reader(uint32_t generation, const IndexVector & indices,
+ const ExpectedVector & expected) :
+ _generation(generation), _indices(indices), _expected(expected) {}
+ };
+
+ void
+ checkReaders(const StringEnumStore &ses,
+ generation_t sesGen,
+ const std::vector<Reader> &readers);
+
+public:
+ EnumStoreTest() {}
+ int Main();
+};
+
+void
+EnumStoreTest::testIndex()
+{
+ {
+ StringEnumStore::Index idx;
+ EXPECT_TRUE( ! idx.valid());
+ EXPECT_EQUAL(idx.offset(), 0u);
+ EXPECT_TRUE(idx.bufferId() == 0);
+ }
+ {
+ StringEnumStore::Index idx(enumStoreAlign(1000), 0);
+ EXPECT_TRUE(idx.offset() == enumStoreAlign(1000));
+ EXPECT_TRUE(idx.bufferId() == 0);
+ }
+ {
+ StringEnumStore::Index idx((UINT64_C(1) << 31)- RESERVED_BYTES, 1);
+ EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 31) - RESERVED_BYTES);
+ EXPECT_TRUE(idx.bufferId() == 1);
+ }
+ {
+ StringEnumStore::Index idx((UINT64_C(1) << 33) - RESERVED_BYTES, 1);
+ EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 33) - RESERVED_BYTES);
+ EXPECT_TRUE(idx.bufferId() == 1);
+ }
+ {
+ StringEnumStore::Index idx((UINT64_C(1) << 35) - RESERVED_BYTES, 1);
+ EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 35) - RESERVED_BYTES);
+ EXPECT_TRUE(idx.bufferId() == 1);
+ }
+ {
+ // Change offsets when alignment changes.
+ StringEnumStore::Index idx1(48, 0);
+ StringEnumStore::Index idx2(80, 0);
+ StringEnumStore::Index idx3(48, 0);
+ EXPECT_TRUE(!(idx1 == idx2));
+ EXPECT_TRUE(idx1 == idx3);
+ }
+ {
+ EXPECT_TRUE(StringEnumStore::Index::numBuffers() == 2);
+ }
+}
+
+void
+EnumStoreTest::fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount,
+ const std::string & string)
+{
+ StringEnumStore::insertEntry(data, enumValue, refCount, string.c_str());
+}
+
+void
+EnumStoreTest::fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount,
+ uint32_t value)
+{
+ NumericEnumStore::insertEntry(data, enumValue, refCount, value);
+}
+
+void
+EnumStoreTest::testStringEntry()
+{
+ {
+ char data[9];
+ fillDataBuffer(data, 0, 0, "");
+ StringEnumStore::Entry e(data);
+ EXPECT_TRUE(StringEnumStore::getEntrySize("") ==
+ StringEnumStore::alignEntrySize(8 + 1));
+
+ EXPECT_TRUE(e.getEnum() == 0);
+ EXPECT_TRUE(e.getRefCount() == 0);
+ EXPECT_TRUE(strcmp(e.getValue(), "") == 0);
+
+ e.incRefCount();
+ EXPECT_TRUE(e.getEnum() == 0);
+ EXPECT_TRUE(e.getRefCount() == 1);
+ EXPECT_TRUE(strcmp(e.getValue(), "") == 0);
+ e.decRefCount();
+ EXPECT_TRUE(e.getEnum() == 0);
+ EXPECT_TRUE(e.getRefCount() == 0);
+ EXPECT_TRUE(strcmp(e.getValue(), "") == 0);
+ }
+ {
+ char data[18];
+ fillDataBuffer(data, 10, 5, "enumstore");
+ StringEnumStore::Entry e(data);
+ EXPECT_TRUE(StringEnumStore::getEntrySize("enumstore") ==
+ StringEnumStore::alignEntrySize(8 + 1 + 9));
+
+ EXPECT_TRUE(e.getEnum() == 10);
+ EXPECT_TRUE(e.getRefCount() == 5);
+ EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0);
+
+ e.incRefCount();
+ EXPECT_TRUE(e.getEnum() == 10);
+ EXPECT_TRUE(e.getRefCount() == 6);
+ EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0);
+ e.decRefCount();
+ EXPECT_TRUE(e.getEnum() == 10);
+ EXPECT_TRUE(e.getRefCount() == 5);
+ EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0);
+ }
+}
+
+void
+EnumStoreTest::testNumericEntry()
+{
+ {
+ char data[12];
+ fillDataBuffer(data, 10, 20, 30);
+ NumericEnumStore::Entry e(data);
+ EXPECT_TRUE(NumericEnumStore::getEntrySize(30) ==
+ NumericEnumStore::alignEntrySize(8 + 4));
+
+ EXPECT_TRUE(e.getEnum() == 10);
+ EXPECT_TRUE(e.getRefCount() == 20);
+ EXPECT_TRUE(e.getValue() == 30);
+
+ e.incRefCount();
+ EXPECT_TRUE(e.getEnum() == 10);
+ EXPECT_TRUE(e.getRefCount() == 21);
+ EXPECT_TRUE(e.getValue() == 30);
+ e.decRefCount();
+ EXPECT_TRUE(e.getEnum() == 10);
+ EXPECT_TRUE(e.getRefCount() == 20);
+ EXPECT_TRUE(e.getValue() == 30);
+ }
+}
+
+template <typename EnumStoreType, typename T>
+void
+EnumStoreTest::testFloatEnumStore(EnumStoreType & es)
+{
+ EnumIndex idx;
+
+ T a[5] = {-20.5f, -10.5f, -0.5f, 9.5f, 19.5f};
+ T b[5] = {-25.5f, -15.5f, -5.5f, 4.5f, 14.5f};
+
+ for (uint32_t i = 0; i < 5; ++i) {
+ es.addEnum(a[i], idx);
+ }
+
+ for (uint32_t i = 0; i < 5; ++i) {
+ EXPECT_TRUE(es.findIndex(a[i], idx));
+ EXPECT_TRUE(!es.findIndex(b[i], idx));
+ }
+
+ es.addEnum(std::numeric_limits<T>::quiet_NaN(), idx);
+ EXPECT_TRUE(es.findIndex(std::numeric_limits<T>::quiet_NaN(), idx));
+ EXPECT_TRUE(es.findIndex(std::numeric_limits<T>::quiet_NaN(), idx));
+
+ for (uint32_t i = 0; i < 5; ++i) {
+ EXPECT_TRUE(es.findIndex(a[i], idx));
+ EXPECT_TRUE(!es.findIndex(b[i], idx));
+ }
+}
+
+void
+EnumStoreTest::testFloatEnumStore()
+{
+ {
+ FloatEnumStore fes(1000, false);
+ testFloatEnumStore<FloatEnumStore, float>(fes);
+ }
+ {
+ DoubleEnumStore des(1000, false);
+ testFloatEnumStore<DoubleEnumStore, double>(des);
+ }
+}
+
+void
+EnumStoreTest::testAddEnum()
+{
+ testAddEnum<StringEnumStore>(false);
+
+ testAddEnum<StringEnumStore>(true);
+}
+
+template <typename EnumStoreType>
+void
+EnumStoreTest::testAddEnum(bool hasPostings)
+{
+ EnumStoreType ses(100, hasPostings);
+ EXPECT_EQUAL(enumStoreAlign(100u) + RESERVED_BYTES,
+ ses.getBuffer(0).capacity());
+ EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).size());
+ EXPECT_EQUAL(enumStoreAlign(100u), ses.getBuffer(0).remaining());
+ EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems);
+
+ EnumIndex idx;
+ uint64_t offset = ses.getBuffer(0).size();
+ std::vector<EnumIndex> indices;
+ std::vector<std::string> unique;
+ unique.push_back("");
+ unique.push_back("add");
+ unique.push_back("enumstore");
+ unique.push_back("unique");
+
+ for (uint32_t i = 0; i < unique.size(); ++i) {
+ ses.addEnum(unique[i].c_str(), idx);
+ EXPECT_EQUAL(offset, idx.offset());
+ EXPECT_EQUAL(0u, idx.bufferId());
+ ses.incRefCount(idx);
+ EXPECT_EQUAL(1u, ses.getRefCount(idx));
+ indices.push_back(idx);
+ offset += EnumStoreType::alignEntrySize(unique[i].size() + 1 + 8);
+ EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx));
+ EXPECT_TRUE(ses.getLastEnum() == i);
+ }
+ ses.freezeTree();
+
+ for (uint32_t i = 0; i < indices.size(); ++i) {
+ uint32_t e = ses.getEnum(indices[i]);
+ EXPECT_EQUAL(i, e);
+ EXPECT_TRUE(ses.findEnum(unique[i].c_str(), e));
+ EXPECT_TRUE(ses.getEnum(btree::EntryRef(e)) == i);
+ EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx));
+ EXPECT_TRUE(idx == indices[i]);
+ EXPECT_EQUAL(1u, ses.getRefCount(indices[i]));
+ StringEntryType::Type value = 0;
+ EXPECT_TRUE(ses.getValue(indices[i], value));
+ EXPECT_TRUE(strcmp(unique[i].c_str(), value) == 0);
+ }
+
+ if (hasPostings) {
+ testUniques<EnumStoreType, EnumPostingTree>(ses, unique);
+ } else {
+ testUniques<EnumStoreType, EnumTree>(ses, unique);
+ }
+}
+
+template <typename EnumStoreType, typename Dictionary>
+void
+EnumStoreTest::testUniques
+(const EnumStoreType &ses, const std::vector<std::string> &unique)
+{
+ const EnumStoreDict<Dictionary> *enumDict =
+ dynamic_cast<const EnumStoreDict<Dictionary> *>
+ (&ses.getEnumStoreDict());
+ assert(enumDict != NULL);
+ const Dictionary &dict = enumDict->getDictionary();
+ uint32_t i = 0;
+ EnumIndex idx;
+ for (typename Dictionary::Iterator iter = dict.begin();
+ iter.valid(); ++iter, ++i) {
+ idx = iter.getKey();
+ EXPECT_TRUE(strcmp(unique[i].c_str(), ses.getValue(idx)) == 0);
+ }
+ EXPECT_EQUAL(static_cast<uint32_t>(unique.size()), i);
+}
+
+
+void
+EnumStoreTest::testCompaction()
+{
+ testCompaction<StringEnumStore>(false, false);
+ testCompaction<StringEnumStore>(true, false);
+ testCompaction<StringEnumStore>(false, true);
+ testCompaction<StringEnumStore>(true, true);
+}
+
+template <typename EnumStoreType>
+void
+EnumStoreTest::testCompaction(bool hasPostings, bool disableReEnumerate)
+{
+ // entrySize = 15 before alignment
+ uint32_t entrySize = EnumStoreType::alignEntrySize(15);
+ uint32_t bufferSize = entrySize * 5;
+ EnumStoreType ses(bufferSize, hasPostings);
+ EnumIndex idx;
+ std::vector<EnumIndex> indices;
+ typename EnumStoreType::Type t = "foo";
+ std::vector<std::string> uniques;
+ uniques.push_back("enum00");
+ uniques.push_back("enum01");
+ uniques.push_back("enum02");
+ uniques.push_back("enum03");
+ uniques.push_back("enum04");
+
+ // fill with unique values
+ for (uint32_t i = 0; i < 5; ++i) {
+ EXPECT_TRUE(ses.getRemaining() == bufferSize - i * entrySize);
+ ses.addEnum(uniques[i].c_str(), idx);
+ ses.incRefCount(idx);
+ EXPECT_TRUE(ses.getRefCount(idx));
+ indices.push_back(idx);
+ }
+ EXPECT_EQUAL(0u, ses.getRemaining());
+ EXPECT_EQUAL(0u, ses.getBuffer(0).remaining());
+ EXPECT_EQUAL(entrySize * 5 + RESERVED_BYTES, ses.getBuffer(0).size());
+ EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems);
+ uint32_t failEntrySize = ses.getEntrySize("enum05");
+ EXPECT_TRUE(failEntrySize > ses.getRemaining());
+
+ // change from enum00 -> enum01
+ ses.decRefCount(indices[0]);
+ ses.incRefCount(indices[1]);
+ indices[0] = indices[1];
+
+ // check correct refcount
+ for (uint32_t i = 0; i < 5; ++i) {
+ EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx));
+ uint32_t refCount = ses.getRefCount(idx);
+ if (i == 0) {
+ EXPECT_TRUE(refCount == 0);
+ } else if (i == 1) {
+ EXPECT_TRUE(refCount == 2);
+ } else {
+ EXPECT_TRUE(refCount == 1);
+ }
+ }
+
+ // free unused enums
+ ses.freeUnusedEnums(true);
+ EXPECT_TRUE(!ses.findIndex("enum00", idx));
+ EXPECT_EQUAL(entrySize + RESERVED_BYTES, ses.getBuffer(0)._deadElems);
+
+ // perform compaction
+ if (disableReEnumerate) {
+ ses.disableReEnumerate();
+ }
+ EXPECT_TRUE(ses.performCompaction(3 * entrySize));
+ if (disableReEnumerate) {
+ ses.enableReEnumerate();
+ }
+ EXPECT_TRUE(ses.getRemaining() >= 3 * entrySize);
+ EXPECT_TRUE(ses.getBuffer(1).remaining() >= 3 * entrySize);
+ EXPECT_TRUE(ses.getBuffer(1).size() == entrySize * 4);
+ EXPECT_TRUE(ses.getBuffer(1)._deadElems == 0);
+
+ EXPECT_EQUAL((disableReEnumerate ? 4u : 3u), ses.getLastEnum());
+
+ // add new unique strings
+ ses.addEnum("enum05", idx);
+ EXPECT_EQUAL((disableReEnumerate ? 5u : 4u), ses.getEnum(idx));
+ ses.addEnum("enum06", idx);
+ EXPECT_EQUAL((disableReEnumerate ? 6u : 5u), ses.getEnum(idx));
+ ses.addEnum("enum00", idx);
+ EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getEnum(idx));
+
+ EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getLastEnum());
+
+ // compare old and new indices
+ for (uint32_t i = 0; i < indices.size(); ++i) {
+ EXPECT_TRUE(ses.getCurrentIndex(indices[i], idx));
+ EXPECT_TRUE(indices[i].bufferId() == 0);
+ EXPECT_TRUE(idx.bufferId() == 1);
+ EXPECT_TRUE(ses.getValue(indices[i], t));
+ typename EnumStoreType::Type s = "bar";
+ EXPECT_TRUE(ses.getValue(idx, s));
+ EXPECT_TRUE(strcmp(t, s) == 0);
+ }
+ // EnumIndex(0,0) is reserved so we have 4 bytes extra at the start of buffer 0
+ EXPECT_TRUE(ses.getCurrentIndex(indices[0], idx));
+ EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[0].offset());
+ EXPECT_EQUAL(0u, idx.offset());
+ EXPECT_TRUE(ses.getCurrentIndex(indices[1], idx));
+ EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[1].offset());
+ EXPECT_EQUAL(0u, idx.offset());
+ EXPECT_TRUE(ses.getCurrentIndex(indices[2], idx));
+ EXPECT_EQUAL(2 * entrySize + RESERVED_BYTES, indices[2].offset());
+ EXPECT_EQUAL(entrySize, idx.offset());
+ EXPECT_TRUE(ses.getCurrentIndex(indices[3], idx));
+ EXPECT_EQUAL(3 * entrySize + RESERVED_BYTES, indices[3].offset());
+ EXPECT_EQUAL(2 * entrySize, idx.offset());
+ EXPECT_TRUE(ses.getCurrentIndex(indices[4], idx));
+ EXPECT_EQUAL(4 * entrySize + RESERVED_BYTES, indices[4].offset());
+ EXPECT_EQUAL(3 * entrySize, idx.offset());
+}
+
+void
+EnumStoreTest::testReset()
+{
+ testReset<StringEnumStore>(false);
+
+ testReset<StringEnumStore>(true);
+}
+
+template <typename EnumStoreType>
+void
+EnumStoreTest::testReset(bool hasPostings)
+{
+ uint32_t numUniques = 10000;
+ srand(123456789);
+ StringVector rndStrings = fillRandomStrings(numUniques, 10, 15);
+ EXPECT_EQUAL(rndStrings.size(), size_t(numUniques));
+ StringVector uniques = sortRandomStrings(rndStrings);
+ EXPECT_EQUAL(uniques.size(), size_t(numUniques));
+ // max entrySize = 25 before alignment
+ uint32_t maxEntrySize = EnumStoreType::alignEntrySize(8 + 1 + 16);
+ EnumStoreType ses(numUniques * maxEntrySize, hasPostings);
+ EnumIndex idx;
+
+ uint32_t cnt = 0;
+ // add new unique strings
+ for (StringVector::reverse_iterator iter = uniques.rbegin(); iter != uniques.rend(); ++iter) {
+ ses.addEnum(iter->c_str(), idx);
+ EXPECT_EQUAL(ses.getNumUniques(), ++cnt);
+ }
+
+ // check for unique strings
+ for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) {
+ EXPECT_TRUE(ses.findIndex(iter->c_str(), idx));
+ }
+
+ EXPECT_EQUAL(ses.getNumUniques(), numUniques);
+ if (hasPostings) {
+ testUniques<EnumStoreType, EnumPostingTree>(ses, uniques);
+ } else {
+ testUniques<EnumStoreType, EnumTree>(ses, uniques);
+ }
+
+ rndStrings = fillRandomStrings(numUniques, 15, 20);
+ StringVector newUniques = sortRandomStrings(rndStrings);
+
+ typename EnumStoreType::Builder builder;
+ for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) {
+ builder.insert(iter->c_str());
+ }
+
+ ses.reset(builder);
+ EXPECT_EQUAL(RESERVED_BYTES, ses.getRemaining());
+
+ // check for old unique strings
+ for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) {
+ EXPECT_TRUE(!ses.findIndex(iter->c_str(), idx));
+ }
+
+ // check for new unique strings
+ for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) {
+ EXPECT_TRUE(ses.findIndex(iter->c_str(), idx));
+ }
+
+ EXPECT_EQUAL(ses.getNumUniques(), numUniques);
+ if (hasPostings) {
+ testUniques<EnumStoreType, EnumPostingTree>(ses, newUniques);
+ } else {
+ testUniques<EnumStoreType, EnumTree>(ses, newUniques);
+ }
+}
+
+void
+EnumStoreTest::testHoldListAndGeneration()
+{
+ uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 6);
+ StringEnumStore ses(100 * entrySize, false);
+ StringEnumStore::Index idx;
+ StringVector uniques;
+ generation_t sesGen = 0u;
+ uniques.reserve(100);
+ for (uint32_t i = 0; i < 100; ++i) {
+ char tmp[16];
+ sprintf(tmp, i < 10 ? "enum0%u" : "enum%u", i);
+ uniques.push_back(tmp);
+ }
+ StringVector newUniques;
+ newUniques.reserve(100);
+ for (uint32_t i = 0; i < 100; ++i) {
+ char tmp[16];
+ sprintf(tmp, i < 10 ? "unique0%u" : "unique%u", i);
+ newUniques.push_back(tmp);
+ }
+ uint32_t generation = 0;
+ std::vector<Reader> readers;
+
+ // insert first batch of unique strings
+ for (uint32_t i = 0; i < 100; ++i) {
+ ses.addEnum(uniques[i].c_str(), idx);
+ ses.incRefCount(idx);
+ EXPECT_TRUE(ses.getRefCount(idx));
+
+ // associate readers
+ if (i % 10 == 9) {
+ Reader::IndexVector indices;
+ Reader::ExpectedVector expected;
+ for (uint32_t j = i - 9; j <= i; ++j) {
+ EXPECT_TRUE(ses.findIndex(uniques[j].c_str(), idx));
+ indices.push_back(idx);
+ StringEnumStore::Entry entry = ses.getEntry(idx);
+ EXPECT_TRUE(entry.getEnum() == j);
+ EXPECT_TRUE(entry.getRefCount() == 1);
+ EXPECT_TRUE(strcmp(entry.getValue(), uniques[j].c_str()) == 0);
+ expected.push_back(StringEntry(entry.getEnum(), entry.getRefCount(),
+ std::string(entry.getValue())));
+ }
+ EXPECT_TRUE(indices.size() == 10);
+ EXPECT_TRUE(expected.size() == 10);
+ sesGen = generation++;
+ readers.push_back(Reader(sesGen, indices, expected));
+ checkReaders(ses, sesGen, readers);
+ }
+ }
+
+ EXPECT_EQUAL(0u, ses.getRemaining());
+ EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems);
+
+ // remove all uniques
+ for (uint32_t i = 0; i < 100; ++i) {
+ EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx));
+ ses.decRefCount(idx);
+ EXPECT_EQUAL(0u, ses.getRefCount(idx));
+ }
+ ses.freeUnusedEnums(true);
+ EXPECT_EQUAL(100 * entrySize + RESERVED_BYTES, ses.getBuffer(0)._deadElems);
+
+ // perform compaction
+ uint32_t newEntrySize = StringEnumStore::alignEntrySize(8 + 1 + 8);
+ EXPECT_TRUE(ses.performCompaction(5 * newEntrySize));
+
+ // check readers again
+ checkReaders(ses, sesGen, readers);
+
+ // fill up buffer
+ uint32_t i = 0;
+ while (ses.getRemaining() >= newEntrySize) {
+ //LOG(info, "fill: %s", newUniques[i].c_str());
+ ses.addEnum(newUniques[i++].c_str(), idx);
+ ses.incRefCount(idx);
+ EXPECT_TRUE(ses.getRefCount(idx));
+ }
+ EXPECT_LESS(ses.getRemaining(), newEntrySize);
+ // buffer on hold list
+ EXPECT_TRUE(!ses.performCompaction(5 * newEntrySize));
+
+ checkReaders(ses, sesGen, readers);
+ ses.transferHoldLists(sesGen);
+ ses.trimHoldLists(sesGen + 1);
+
+ // buffer no longer on hold list
+ EXPECT_LESS(ses.getRemaining(), newEntrySize);
+ EXPECT_TRUE(ses.performCompaction(5 * newEntrySize));
+ EXPECT_TRUE(ses.getRemaining() >= 5 * newEntrySize);
+}
+
+void
+EnumStoreTest::testMemoryUsage()
+{
+ StringEnumStore ses(200, false);
+ StringEnumStore::Index idx;
+ uint32_t num = 8;
+ std::vector<StringEnumStore::Index> indices;
+ std::vector<std::string> uniques;
+ for (uint32_t i = 0; i < num; ++i) {
+ std::stringstream ss;
+ ss << "enum" << i;
+ uniques.push_back(ss.str());
+ }
+ generation_t sesGen = 0u;
+ uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 5); // enum(4) + refcount(4) + 1(\0) + strlen("enumx")
+
+ // usage before inserting enums
+ MemoryUsage usage = ses.getMemoryUsage();
+ EXPECT_EQUAL(ses.getNumUniques(), uint32_t(0));
+ EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes());
+ EXPECT_EQUAL(RESERVED_BYTES, usage.usedBytes());
+ EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes());
+ EXPECT_EQUAL(0u, usage.allocatedBytesOnHold());
+
+ for (uint32_t i = 0; i < num; ++i) {
+ ses.addEnum(uniques[i].c_str(), idx);
+ indices.push_back(idx);
+ ses.incRefCount(idx);
+ EXPECT_TRUE(ses.getRefCount(idx));
+ }
+
+ // usage after inserting enums
+ usage = ses.getMemoryUsage();
+ EXPECT_EQUAL(ses.getNumUniques(), num);
+ EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes());
+ EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes());
+ EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes());
+ EXPECT_EQUAL(0u, usage.allocatedBytesOnHold());
+
+ // assign new enum for num / 2 of indices
+ for (uint32_t i = 0; i < num / 2; ++i) {
+ ses.decRefCount(indices[i]);
+ EXPECT_TRUE(ses.findIndex(uniques.back().c_str(), idx));
+ ses.incRefCount(idx);
+ indices[i] = idx;
+ }
+ ses.freeUnusedEnums(true);
+
+ // usage after removing enums
+ usage = ses.getMemoryUsage();
+ EXPECT_EQUAL(ses.getNumUniques(), num / 2);
+ EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes());
+ EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes());
+ EXPECT_EQUAL((num / 2) * entrySize + RESERVED_BYTES, usage.deadBytes());
+ EXPECT_EQUAL(0u, usage.allocatedBytesOnHold());
+
+ ses.performCompaction(400);
+
+ // usage after compaction
+ MemoryUsage usage2 = ses.getMemoryUsage();
+ EXPECT_EQUAL(ses.getNumUniques(), num / 2);
+ EXPECT_EQUAL(usage.usedBytes() + (num / 2) * entrySize, usage2.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), usage2.deadBytes());
+ EXPECT_EQUAL(usage.usedBytes() - usage.deadBytes(), usage2.allocatedBytesOnHold());
+
+ ses.transferHoldLists(sesGen);
+ ses.trimHoldLists(sesGen + 1);
+
+ // usage after hold list trimming
+ MemoryUsage usage3 = ses.getMemoryUsage();
+ EXPECT_EQUAL((num / 2) * entrySize, usage3.usedBytes());
+ EXPECT_EQUAL(0u, usage3.deadBytes());
+ EXPECT_EQUAL(0u, usage3.allocatedBytesOnHold());
+}
+
+namespace {
+
+NumericEnumStore::Index
+addEnum(NumericEnumStore &store, uint32_t value)
+{
+ NumericEnumStore::Index result;
+ store.addEnum(value, result);
+ store.incRefCount(result);
+ return result;
+}
+
+void
+decRefCount(NumericEnumStore &store, NumericEnumStore::Index idx)
+{
+ store.decRefCount(idx);
+ store.freeUnusedEnums(false);
+}
+
+}
+
+void
+EnumStoreTest::requireThatAddressSpaceUsageIsReported()
+{
+ const size_t ADDRESS_LIMIT = 34359738368; // NumericEnumStore::DataStoreType::RefType::offsetSize()
+ NumericEnumStore store(200, false);
+
+ EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ NumericEnumStore::Index idx1 = addEnum(store, 10);
+ EXPECT_EQUAL(AddressSpace(16, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ NumericEnumStore::Index idx2 = addEnum(store, 20);
+ EXPECT_EQUAL(AddressSpace(32, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ decRefCount(store, idx1);
+ EXPECT_EQUAL(AddressSpace(16, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ decRefCount(store, idx2);
+ EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+}
+
+size_t
+digits(size_t num)
+{
+ size_t digits = 1;
+ while (num / 10 > 0) {
+ num /= 10;
+ digits++;
+ }
+ return digits;
+}
+
+void
+EnumStoreTest::testBufferLimit()
+{
+ size_t enumSize = StringEnumStore::Index::offsetSize();
+ StringEnumStore es(enumSize, false);
+
+ size_t strLen = 65536;
+ char str[strLen + 1];
+ for (size_t i = 0; i < strLen; ++i) {
+ str[i] = 'X';
+ }
+ str[strLen] = 0;
+
+ size_t entrySize = StringEnumStore::getEntrySize(str);
+ size_t numUniques = enumSize / entrySize;
+ size_t uniqDigits = digits(numUniques);
+
+ EnumIndex idx;
+ EnumIndex lastIdx;
+ for (size_t i = 0; i < numUniques; ++i) {
+ sprintf(str, "%0*zu", (int)uniqDigits, i);
+ str[uniqDigits] = 'X';
+ es.addEnum(str, idx);
+ if (i % (numUniques / 32) == 1) {
+ EXPECT_TRUE(idx.offset() > lastIdx.offset());
+ EXPECT_EQUAL(i + 1, es.getNumUniques());
+ std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl;
+ }
+ lastIdx = idx;
+ }
+ EXPECT_EQUAL(idx.offset(), lastIdx.offset());
+ EXPECT_EQUAL(numUniques, es.getNumUniques());
+ std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl;
+}
+
+template <typename T>
+T
+EnumStoreTest::random(T low, T high)
+{
+ return (rand() % (high - low)) + low;
+}
+
+std::string
+EnumStoreTest::getRandomString(uint32_t minLen, uint32_t maxLen)
+{
+ uint32_t len = random(minLen, maxLen);
+ std::string retval;
+ for (uint32_t i = 0; i < len; ++i) {
+ char c = random('a', 'z');
+ retval.push_back(c);
+ }
+ return retval;
+}
+
+EnumStoreTest::StringVector
+EnumStoreTest::fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen)
+{
+ StringVector retval;
+ retval.reserve(numStrings);
+ for (uint32_t i = 0; i < numStrings; ++i) {
+ retval.push_back(getRandomString(minLen, maxLen));
+ }
+ return retval;
+}
+
+EnumStoreTest::StringVector
+EnumStoreTest::sortRandomStrings(StringVector & strings)
+{
+ std::sort(strings.begin(), strings.end());
+ std::vector<std::string> retval;
+ retval.reserve(strings.size());
+ std::vector<std::string>::iterator pos = std::unique(strings.begin(), strings.end());
+ std::copy(strings.begin(), pos, std::back_inserter(retval));
+ return retval;
+}
+
+void
+EnumStoreTest::checkReaders(const StringEnumStore & ses,
+ generation_t sesGen,
+ const std::vector<Reader> & readers)
+{
+ (void) sesGen;
+ //uint32_t refCount = 1000;
+ StringEnumStore::Type t = "";
+ for (uint32_t i = 0; i < readers.size(); ++i) {
+ const Reader & r = readers[i];
+ for (uint32_t j = 0; j < r._indices.size(); ++j) {
+ EXPECT_EQUAL(r._expected[j]._enum, ses.getEnum(r._indices[j]));
+ EXPECT_TRUE(ses.getValue(r._indices[j], t));
+ EXPECT_TRUE(r._expected[j]._string == std::string(t));
+ }
+ }
+}
+
+
+int
+EnumStoreTest::Main()
+{
+ TEST_INIT("enumstore_test");
+
+ testIndex();
+ testStringEntry();
+ testNumericEntry();
+ testFloatEnumStore();
+ testAddEnum();
+ testCompaction();
+ testReset();
+ testHoldListAndGeneration();
+ testMemoryUsage();
+ TEST_DO(requireThatAddressSpaceUsageIsReported());
+ if (_argc > 1) {
+ testBufferLimit(); // large test with 8 GB buffer
+ }
+
+ TEST_DONE();
+}
+}
+
+
+TEST_APPHOOK(search::EnumStoreTest);
diff --git a/searchlib/src/tests/attribute/extendattributes/.gitignore b/searchlib/src/tests/attribute/extendattributes/.gitignore
new file mode 100644
index 00000000000..4018a7d4f5b
--- /dev/null
+++ b/searchlib/src/tests/attribute/extendattributes/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+extendattribute_test
+searchlib_extendattribute_test_app
diff --git a/searchlib/src/tests/attribute/extendattributes/CMakeLists.txt b/searchlib/src/tests/attribute/extendattributes/CMakeLists.txt
new file mode 100644
index 00000000000..b0803f0a232
--- /dev/null
+++ b/searchlib/src/tests/attribute/extendattributes/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_extendattribute_test_app
+ SOURCES
+ extendattribute.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_extendattribute_test_app COMMAND sh extendattribute_test.sh)
diff --git a/searchlib/src/tests/attribute/extendattributes/DESC b/searchlib/src/tests/attribute/extendattributes/DESC
new file mode 100644
index 00000000000..4f88189a1d7
--- /dev/null
+++ b/searchlib/src/tests/attribute/extendattributes/DESC
@@ -0,0 +1 @@
+Unit tests for extendable attributes.
diff --git a/searchlib/src/tests/attribute/extendattributes/FILES b/searchlib/src/tests/attribute/extendattributes/FILES
new file mode 100644
index 00000000000..930039cae19
--- /dev/null
+++ b/searchlib/src/tests/attribute/extendattributes/FILES
@@ -0,0 +1 @@
+extendattribute.cpp
diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp
new file mode 100644
index 00000000000..0bb751d26ee
--- /dev/null
+++ b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp
@@ -0,0 +1,176 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("extendattribute_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+
+namespace search {
+
+class ExtendAttributeTest : public vespalib::TestApp
+{
+private:
+ template <typename Attribute>
+ void testExtendInteger(Attribute & attr);
+ template <typename Attribute>
+ void testExtendFloat(Attribute & attr);
+ template <typename Attribute>
+ void testExtendString(Attribute & attr);
+
+public:
+ int Main();
+};
+
+template <typename Attribute>
+void ExtendAttributeTest::testExtendInteger(Attribute & attr)
+{
+ uint32_t docId(0);
+ EXPECT_EQUAL(attr.getNumDocs(), 0u);
+ attr.addDoc(docId);
+ EXPECT_EQUAL(docId, 0u);
+ EXPECT_EQUAL(attr.getNumDocs(), 1u);
+ attr.add(1, 10);
+ EXPECT_EQUAL(attr.getInt(0), 1);
+ attr.add(2, 20);
+ EXPECT_EQUAL(attr.getInt(0), attr.hasMultiValue() ? 1 : 2);
+ if (attr.hasMultiValue()) {
+ AttributeVector::WeightedInt v[2];
+ EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(0, v, 2), 2u);
+ EXPECT_EQUAL(v[0].getValue(), 1);
+ EXPECT_EQUAL(v[1].getValue(), 2);
+ if (attr.hasWeightedSetType()) {
+ EXPECT_EQUAL(v[0].getWeight(), 10);
+ EXPECT_EQUAL(v[1].getWeight(), 20);
+ }
+ }
+ attr.addDoc(docId);
+ EXPECT_EQUAL(docId, 1u);
+ EXPECT_EQUAL(attr.getNumDocs(), 2u);
+ attr.add(3, 30);
+ EXPECT_EQUAL(attr.getInt(1), 3);
+ if (attr.hasMultiValue()) {
+ AttributeVector::WeightedInt v[1];
+ EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(1, v, 1), 1u);
+ EXPECT_EQUAL(v[0].getValue(), 3);
+ if (attr.hasWeightedSetType()) {
+ EXPECT_EQUAL(v[0].getWeight(), 30);
+ }
+ }
+}
+
+template <typename Attribute>
+void ExtendAttributeTest::testExtendFloat(Attribute & attr)
+{
+ uint32_t docId(0);
+ EXPECT_EQUAL(attr.getNumDocs(), 0u);
+ attr.addDoc(docId);
+ EXPECT_EQUAL(docId, 0u);
+ EXPECT_EQUAL(attr.getNumDocs(), 1u);
+ attr.add(1.7, 10);
+ EXPECT_EQUAL(attr.getInt(0), 1);
+ EXPECT_EQUAL(attr.getFloat(0), 1.7);
+ attr.add(2.3, 20);
+ EXPECT_EQUAL(attr.getFloat(0), attr.hasMultiValue() ? 1.7 : 2.3);
+ if (attr.hasMultiValue()) {
+ AttributeVector::WeightedFloat v[2];
+ EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(0, v, 2), 2u);
+ EXPECT_EQUAL(v[0].getValue(), 1.7);
+ EXPECT_EQUAL(v[1].getValue(), 2.3);
+ if (attr.hasWeightedSetType()) {
+ EXPECT_EQUAL(v[0].getWeight(), 10);
+ EXPECT_EQUAL(v[1].getWeight(), 20);
+ }
+ }
+ attr.addDoc(docId);
+ EXPECT_EQUAL(docId, 1u);
+ EXPECT_EQUAL(attr.getNumDocs(), 2u);
+ attr.add(3.6, 30);
+ EXPECT_EQUAL(attr.getFloat(1), 3.6);
+ if (attr.hasMultiValue()) {
+ AttributeVector::WeightedFloat v[1];
+ EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(1, v, 1), 1u);
+ EXPECT_EQUAL(v[0].getValue(), 3.6);
+ if (attr.hasWeightedSetType()) {
+ EXPECT_EQUAL(v[0].getWeight(), 30);
+ }
+ }
+}
+
+template <typename Attribute>
+void ExtendAttributeTest::testExtendString(Attribute & attr)
+{
+ uint32_t docId(0);
+ EXPECT_EQUAL(attr.getNumDocs(), 0u);
+ attr.addDoc(docId);
+ EXPECT_EQUAL(docId, 0u);
+ EXPECT_EQUAL(attr.getNumDocs(), 1u);
+ attr.add("1.7", 10);
+ EXPECT_EQUAL(std::string(attr.getString(0, NULL, 0)), "1.7");
+ attr.add("2.3", 20);
+ EXPECT_EQUAL(std::string(attr.getString(0, NULL, 0)), attr.hasMultiValue() ? "1.7" : "2.3");
+ if (attr.hasMultiValue()) {
+ AttributeVector::WeightedString v[2];
+ EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(0, v, 2), 2u);
+ EXPECT_EQUAL(v[0].getValue(), "1.7");
+ EXPECT_EQUAL(v[1].getValue(), "2.3");
+ if (attr.hasWeightedSetType()) {
+ EXPECT_EQUAL(v[0].getWeight(), 10);
+ EXPECT_EQUAL(v[1].getWeight(), 20);
+ }
+ }
+ attr.addDoc(docId);
+ EXPECT_EQUAL(docId, 1u);
+ EXPECT_EQUAL(attr.getNumDocs(), 2u);
+ attr.add("3.6", 30);
+ EXPECT_EQUAL(std::string(attr.getString(1, NULL, 0)), "3.6");
+ if (attr.hasMultiValue()) {
+ AttributeVector::WeightedString v[1];
+ EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(1, v, 1), 1u);
+ EXPECT_EQUAL(v[0].getValue(), "3.6");
+ if (attr.hasWeightedSetType()) {
+ EXPECT_EQUAL(v[0].getWeight(), 30);
+ }
+ }
+}
+
+int
+ExtendAttributeTest::Main()
+{
+ TEST_INIT("extendattribute_test");
+
+ SingleIntegerExtAttribute siattr("si1");
+ MultiIntegerExtAttribute miattr("mi1");
+ WeightedSetIntegerExtAttribute wsiattr("wsi1");
+ EXPECT_TRUE( ! siattr.hasMultiValue() );
+ EXPECT_TRUE( miattr.hasMultiValue() );
+ EXPECT_TRUE( wsiattr.hasWeightedSetType() );
+ testExtendInteger(siattr);
+ testExtendInteger(miattr);
+ testExtendInteger(wsiattr);
+
+ SingleFloatExtAttribute sdattr("sd1");
+ MultiFloatExtAttribute mdattr("md1");
+ WeightedSetFloatExtAttribute wsdattr("wsd1");
+ EXPECT_TRUE( ! sdattr.hasMultiValue() );
+ EXPECT_TRUE( mdattr.hasMultiValue() );
+ EXPECT_TRUE( wsdattr.hasWeightedSetType() );
+ testExtendFloat(sdattr);
+ testExtendFloat(mdattr);
+ testExtendFloat(wsdattr);
+
+ SingleStringExtAttribute ssattr("ss1");
+ MultiStringExtAttribute msattr("ms1");
+ WeightedSetStringExtAttribute wssattr("wss1");
+ EXPECT_TRUE( ! ssattr.hasMultiValue() );
+ EXPECT_TRUE( msattr.hasMultiValue() );
+ EXPECT_TRUE( wssattr.hasWeightedSetType() );
+ testExtendString(ssattr);
+ testExtendString(msattr);
+ testExtendString(wssattr);
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::ExtendAttributeTest);
diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh b/searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh
new file mode 100755
index 00000000000..6f335b18229
--- /dev/null
+++ b/searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+$VALGRIND ./searchlib_extendattribute_test_app
+rm -rf *.dat
diff --git a/searchlib/src/tests/attribute/gidmapattribute/.gitignore b/searchlib/src/tests/attribute/gidmapattribute/.gitignore
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/searchlib/src/tests/attribute/gidmapattribute/.gitignore
diff --git a/searchlib/src/tests/attribute/multivaluemapping/.gitignore b/searchlib/src/tests/attribute/multivaluemapping/.gitignore
new file mode 100644
index 00000000000..743c738a0a2
--- /dev/null
+++ b/searchlib/src/tests/attribute/multivaluemapping/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+multivaluemapping_test
+searchlib_multivaluemapping_test_app
diff --git a/searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt b/searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt
new file mode 100644
index 00000000000..36c66b09966
--- /dev/null
+++ b/searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_multivaluemapping_test_app
+ SOURCES
+ multivaluemapping_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_multivaluemapping_test_app COMMAND searchlib_multivaluemapping_test_app)
diff --git a/searchlib/src/tests/attribute/multivaluemapping/DESC b/searchlib/src/tests/attribute/multivaluemapping/DESC
new file mode 100644
index 00000000000..44c27ec9926
--- /dev/null
+++ b/searchlib/src/tests/attribute/multivaluemapping/DESC
@@ -0,0 +1 @@
+This is a test for the MultivalueMapping class.
diff --git a/searchlib/src/tests/attribute/multivaluemapping/FILES b/searchlib/src/tests/attribute/multivaluemapping/FILES
new file mode 100644
index 00000000000..bf22403a5fe
--- /dev/null
+++ b/searchlib/src/tests/attribute/multivaluemapping/FILES
@@ -0,0 +1 @@
+multivaluemapping.cpp
diff --git a/searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp b/searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp
new file mode 100644
index 00000000000..e78e180856b
--- /dev/null
+++ b/searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp
@@ -0,0 +1,836 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("multivaluemapping_test");
+#include <vespa/vespalib/testkit/testapp.h>
+//#define DEBUG_MULTIVALUE_MAPPING
+//#define LOG_MULTIVALUE_MAPPING
+#include <vespa/searchlib/attribute/multivaluemapping.h>
+#include <algorithm>
+#include <limits>
+
+namespace search {
+
+namespace
+{
+
+uint32_t dummyCommittedDocIdLimit = std::numeric_limits<uint32_t>::max();
+
+}
+
+typedef MultiValueMappingT<uint32_t> MvMapping;
+typedef MvMapping::Index Index;
+typedef multivalue::Index64 Index64;
+typedef multivalue::Index32 Index32;
+typedef MvMapping::Histogram Histogram;
+
+class MultiValueMappingTest : public vespalib::TestApp
+{
+private:
+ typedef std::vector<Index> IndexVector;
+ typedef std::vector<std::vector<uint32_t> > ExpectedVector;
+ typedef vespalib::GenerationHandler::generation_t generation_t;
+
+ class Reader {
+ public:
+ uint32_t _startGen;
+ uint32_t _endGen;
+ IndexVector _indices;
+ ExpectedVector _expected;
+ uint32_t numKeys() { return _indices.size(); }
+ Reader(uint32_t startGen, uint32_t endGen, const IndexVector & indices,
+ const ExpectedVector & expected) :
+ _startGen(startGen), _endGen(endGen), _indices(indices), _expected(expected) {}
+ };
+
+ typedef std::vector<Reader> ReaderVector;
+
+ void testIndex32();
+ void testIndex64();
+ void testSimpleSetAndGet();
+ void testChangingValueCount();
+
+ void
+ checkReaders(MvMapping &mvm,
+ generation_t mvmGen,
+ ReaderVector &readers);
+
+ void testHoldListAndGeneration();
+ void testManualCompaction();
+ void testVariousGets();
+ void testReplace();
+ void testMemoryUsage();
+ void testShrink();
+ void testHoldElem();
+ void requireThatAddressSpaceUsageIsReported();
+ void requireThatDeadIsNotAccountedInAddressSpaceUsage();
+
+public:
+ int Main();
+};
+
+void
+MultiValueMappingTest::testIndex32()
+{
+ {
+ Index32 idx;
+ EXPECT_EQUAL(idx.values(), 0u);
+ EXPECT_EQUAL(idx.alternative(), 0u);
+ EXPECT_EQUAL(idx.vectorIdx(), 0u);
+ EXPECT_EQUAL(idx.offset(), 0u);
+ }
+ {
+ Index32 idx(3, 0, 1000);
+ EXPECT_EQUAL(idx.values(), 3u);
+ EXPECT_EQUAL(idx.alternative(), 0u);
+ EXPECT_EQUAL(idx.vectorIdx(), 6u);
+ EXPECT_EQUAL(idx.offset(), 1000u);
+ EXPECT_EQUAL(idx.idx(), 0x300003e8u);
+ }
+ {
+ Index32 idx(15, 1, 134217727);
+ EXPECT_EQUAL(idx.values(), 15u);
+ EXPECT_EQUAL(idx.alternative(), 1u);
+ EXPECT_EQUAL(idx.vectorIdx(), 31u);
+ EXPECT_EQUAL(idx.offset(), 134217727u);
+ EXPECT_EQUAL(idx.idx(), 0xffffffffu);
+ }
+ {
+ EXPECT_EQUAL(Index32::maxValues(), 15u);
+ EXPECT_EQUAL(Index32::alternativeSize(), 2u);
+ }
+}
+
+void
+MultiValueMappingTest::testIndex64()
+{
+ {
+ Index64 idx;
+ EXPECT_EQUAL(idx.values(), 0u);
+ EXPECT_EQUAL(idx.alternative(), 0u);
+ EXPECT_EQUAL(idx.vectorIdx(), 0u);
+ EXPECT_EQUAL(idx.offset(), 0u);
+ }
+ {
+ Index64 idx(3, 0, 1000);
+ EXPECT_EQUAL(idx.values(), 3u);
+ EXPECT_EQUAL(idx.alternative(), 0u);
+ EXPECT_EQUAL(idx.vectorIdx(), 6u);
+ EXPECT_EQUAL(idx.offset(), 1000u);
+ EXPECT_EQUAL(idx.idx(), 0x3000003e8ull);
+ }
+ {
+ Index64 idx(15, 1, 134217727);
+ EXPECT_EQUAL(idx.values(), 15u);
+ EXPECT_EQUAL(idx.alternative(), 1u);
+ EXPECT_EQUAL(idx.vectorIdx(), 31u);
+ EXPECT_EQUAL(idx.offset(), 134217727u);
+ EXPECT_EQUAL(idx.idx(), 0xf87ffffffull);
+ }
+ {
+ EXPECT_EQUAL(Index64::maxValues(), 1023u);
+ EXPECT_EQUAL(Index64::alternativeSize(), 2u);
+ }
+}
+
+void
+MultiValueMappingTest::testSimpleSetAndGet()
+{
+ uint32_t maxValueCount = Index::maxValues() * 2;
+ uint32_t numKeys = maxValueCount * 2;
+ MvMapping mvm(dummyCommittedDocIdLimit, numKeys);
+ EXPECT_EQUAL(mvm.getNumKeys(), numKeys);
+ Index idx;
+
+ // insert values
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ uint32_t valueCount = key / maxValueCount;
+ std::vector<uint32_t> values(valueCount, key);
+ Histogram needed(Index::maxValues());
+ needed[valueCount] = 1;
+ if (!mvm.enoughCapacity(needed)) {
+ mvm.trimHoldLists(1);
+ mvm.performCompaction(needed);
+ }
+ mvm.set(key, values);
+ EXPECT_EQUAL(mvm.getValueCount(key), valueCount);
+ idx = mvm._indices[key];
+ if (valueCount < Index::maxValues()) {
+ EXPECT_EQUAL(idx.values(), valueCount);
+ } else {
+ EXPECT_EQUAL(idx.values(), Index::maxValues());
+ }
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info, "------------------------------------------------------------");
+#endif
+ }
+ EXPECT_TRUE(!mvm.hasKey(numKeys));
+
+ // check for expected values
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ uint32_t valueCount = key / maxValueCount;
+ EXPECT_EQUAL(mvm.getValueCount(key), valueCount);
+ std::vector<uint32_t> buffer(valueCount);
+ EXPECT_EQUAL(mvm.get(key, buffer), valueCount);
+ EXPECT_TRUE(buffer.size() == valueCount);
+ EXPECT_EQUAL(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), key)), valueCount);
+ uint32_t value;
+ const uint32_t * handle = NULL;
+ EXPECT_EQUAL(mvm.get(key, handle), valueCount);
+ EXPECT_TRUE(valueCount == 0 ? handle == NULL : handle != NULL);
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ EXPECT_TRUE(mvm.get(key, i, value));
+ EXPECT_EQUAL(value, key);
+ EXPECT_TRUE(handle[i] == key);
+ }
+ EXPECT_TRUE(!mvm.get(key, valueCount, value));
+ }
+
+ // reset
+ mvm.reset(10);
+ EXPECT_TRUE(mvm.getNumKeys() == 10);
+ EXPECT_TRUE(!mvm.hasKey(10));
+ EXPECT_TRUE(mvm._genHolder.getHeldBytes() == 0);
+ for (uint32_t key = 0; key < 10; ++key) {
+ EXPECT_TRUE(mvm.getValueCount(key) == 0);
+ std::vector<uint32_t> buffer;
+ EXPECT_TRUE(mvm.get(key, buffer) == 0);
+ EXPECT_TRUE(buffer.size() == 0);
+ }
+
+ // add more keys
+ for (uint32_t i = 0; i < 5; ++i) {
+ uint32_t key;
+ mvm.addKey(key);
+ EXPECT_TRUE(key == 10 + i);
+ EXPECT_TRUE(mvm.getNumKeys() == 11 + i);
+ }
+}
+
+void
+MultiValueMappingTest::testChangingValueCount()
+{
+ uint32_t numKeys = 10;
+ uint32_t maxCount = Index::maxValues() + 1;
+ Histogram initCapacity(Index::maxValues());
+ for (uint32_t i = 0; i < Index::maxValues(); ++i) {
+ initCapacity[i] = numKeys;
+ }
+ initCapacity[Index::maxValues()] = numKeys * 2;
+ MvMapping mvm(dummyCommittedDocIdLimit, numKeys, initCapacity);
+
+ // Increasing the value count for some keys
+ for (uint32_t valueCount = 1; valueCount <= maxCount; ++valueCount) {
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info, "########################### %u ##############################", valueCount);
+#endif
+ uint32_t lastValueCount = valueCount - 1;
+ // set values
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ std::vector<uint32_t> buffer(valueCount, key);
+ mvm.set(key, buffer);
+ }
+
+ Histogram remaining = mvm.getRemaining();
+ if (valueCount < Index::maxValues()) {
+ EXPECT_TRUE(remaining[valueCount] == 0);
+ } else {
+ EXPECT_TRUE(remaining[Index::maxValues()] == numKeys * (maxCount - valueCount));
+ }
+
+ if (valueCount < Index::maxValues()) {
+ MvMapping::SingleVectorPtr current = mvm.getSingleVector(valueCount, MvMapping::ACTIVE);
+ EXPECT_TRUE(current.first->used() == numKeys * (valueCount));
+ EXPECT_TRUE(current.first->dead() == 0);
+
+ if (lastValueCount != 0) {
+ MvMapping::SingleVectorPtr last = mvm.getSingleVector(lastValueCount, MvMapping::ACTIVE);
+ EXPECT_TRUE(last.first->used() == numKeys * (lastValueCount));
+ EXPECT_TRUE(last.first->dead() == numKeys * (lastValueCount));
+ }
+ } else {
+ MvMapping::VectorVectorPtr current = mvm.getVectorVector(MvMapping::ACTIVE);
+ EXPECT_TRUE(current.first->used() == numKeys * (valueCount - Index::maxValues() + 1));
+ EXPECT_TRUE(current.first->dead() == numKeys * (valueCount - Index::maxValues()));
+ }
+
+ // check values
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ std::vector<uint32_t> buffer(valueCount);
+ EXPECT_TRUE(mvm.get(key, buffer) == valueCount);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), key)) == valueCount);
+ }
+ }
+}
+
+void
+MultiValueMappingTest::checkReaders(MvMapping &mvm,
+ generation_t mvmGen,
+ ReaderVector &readers)
+{
+ for (ReaderVector::iterator iter = readers.begin();
+ iter != readers.end(); ) {
+ if (iter->_endGen <= mvmGen) {
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info, "check and remove reader: start = %u, end = %u",
+ iter->_startGen, iter->_endGen);
+#endif
+ for (uint32_t key = 0; key < iter->numKeys(); ++key) {
+ Index idx = iter->_indices[key];
+ uint32_t valueCount = iter->_expected[key].size();
+ if (valueCount < Index::maxValues()) {
+ EXPECT_TRUE(idx.values() == valueCount);
+ for (uint32_t i = idx.offset() * idx.values(), j = 0;
+ i < (idx.offset() + 1) * idx.values() && j < iter->_expected[key].size();
+ ++i, ++j)
+ {
+ EXPECT_TRUE(mvm._singleVectors[idx.vectorIdx()][i] == iter->_expected[key][j]);
+ }
+ } else {
+ EXPECT_TRUE(mvm._vectorVectors[idx.alternative()][idx.offset()].size() ==
+ valueCount);
+ EXPECT_TRUE(std::equal(mvm._vectorVectors[idx.alternative()][idx.offset()].begin(),
+ mvm._vectorVectors[idx.alternative()][idx.offset()].end(),
+ iter->_expected[key].begin()));
+ }
+ }
+ iter = readers.erase(iter);
+ } else {
+ ++iter;
+ }
+ }
+}
+
+void
+MultiValueMappingTest::testHoldListAndGeneration()
+{
+ uint32_t numKeys = 10;
+ uint32_t maxCount = Index::maxValues() + 1;
+ uint32_t maxKeys = numKeys * 2;
+
+ Histogram initCapacity(Index::maxValues());
+ for (uint32_t i = 1; i < maxCount; ++i) {
+ initCapacity[i] = numKeys; // make enough capacity for 1/2 of the keys
+ }
+ MvMapping mvm(dummyCommittedDocIdLimit, maxKeys, initCapacity);
+ EXPECT_TRUE(mvm.enoughCapacity(initCapacity));
+
+ ReaderVector readers;
+ uint32_t safeGen = std::numeric_limits<uint32_t>::max();
+ uint32_t readDuration = 2;
+ generation_t mvmGen = 0u;
+
+ for (uint32_t valueCount = 1; valueCount < maxCount; ++valueCount) {
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info, "#################### count(%u) - gen(%u) ####################",
+ valueCount, mvm.getGeneration());
+#endif
+
+ // check and remove readers
+ checkReaders(mvm, mvmGen, readers);
+
+ // update safe generation and removeOldGenerations
+ safeGen = std::numeric_limits<uint32_t>::max();
+ for (ReaderVector::iterator iter = readers.begin(); iter != readers.end(); ++iter) {
+ if ((*iter)._startGen < safeGen) {
+ safeGen= (*iter)._startGen;
+ }
+ }
+ mvm.trimHoldLists(safeGen);
+
+ // set new values for 1/2 of the keys
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ std::vector<uint32_t> values(valueCount, valueCount * numKeys + key);
+ mvm.set(key, values);
+ }
+ // check new values
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ EXPECT_TRUE(mvm.getValueCount(key) == valueCount);
+ std::vector<uint32_t> buffer(valueCount);
+ EXPECT_TRUE(mvm.get(key, buffer) == valueCount);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), valueCount * numKeys + key)) == valueCount);
+ }
+ mvm.transferHoldLists(mvmGen);
+ ++mvmGen;
+
+ // associate reader with current generation
+ IndexVector indices;
+ ExpectedVector expected;
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ indices.push_back(mvm._indices[key]);
+ expected.push_back(std::vector<uint32_t>(valueCount, valueCount * numKeys + key));
+ }
+ readers.push_back(Reader(mvmGen, mvmGen + readDuration,
+ indices, expected));
+ readDuration = (readDuration % 4) + 2;
+
+ // perform compaction
+ Histogram needed(Index::maxValues());
+ needed[valueCount] = maxKeys;
+ EXPECT_TRUE(!mvm.enoughCapacity(needed));
+ mvm.performCompaction(needed);
+
+ // set new value for all keys (the associated reader should see the old values)
+ for (uint32_t key = 0; key < maxKeys; ++key) {
+ std::vector<uint32_t> values(valueCount, valueCount * maxKeys + key);
+ mvm.set(key, values);
+ }
+ // check new values
+ for (uint32_t key = 0; key < maxKeys; ++key) {
+ EXPECT_TRUE(mvm.getValueCount(key) == valueCount);
+ std::vector<uint32_t> buffer(valueCount);
+ EXPECT_TRUE(mvm.get(key, buffer) == valueCount);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), valueCount * maxKeys + key)) == valueCount);
+ }
+
+ mvm.transferHoldLists(mvmGen);
+ ++mvmGen;
+ }
+ while (!readers.empty()) {
+ checkReaders(mvm, mvmGen, readers);
+ mvm.transferHoldLists(mvmGen);
+ ++mvmGen;
+ }
+}
+
+void
+MultiValueMappingTest::testManualCompaction()
+{
+ Histogram initCapacity(Index::maxValues());
+ uint32_t maxCount = Index::maxValues() + 1;
+ for (uint32_t i = 1; i < maxCount; ++i) {
+ initCapacity[i] = 1;
+ }
+ MvMapping mvm(dummyCommittedDocIdLimit, maxCount * 2, initCapacity);
+ EXPECT_TRUE(mvm.enoughCapacity(initCapacity));
+
+ // first update pass. use all capacity
+ for (uint32_t key = 1; key < maxCount; ++key) {
+ std::vector<uint32_t> values(key, key);
+ Histogram needed(Index::maxValues());
+ needed[key] = 1;
+ EXPECT_TRUE(mvm.enoughCapacity(needed));
+ mvm.set(key, values);
+ EXPECT_TRUE(!mvm.enoughCapacity(needed));
+ }
+ // second update pass. must perform compaction
+ for (uint32_t key = maxCount + 1; key < maxCount * 2; ++key) {
+ uint32_t valueCount = key % maxCount;
+ std::vector<uint32_t> values(valueCount, key);
+ Histogram needed(Index::maxValues());
+ needed[valueCount] = 1;
+ EXPECT_TRUE(!mvm.enoughCapacity(needed));
+ mvm.performCompaction(needed);
+ EXPECT_TRUE(mvm.enoughCapacity(needed));
+ mvm.set(key, values);
+ }
+ // check for correct buffer values
+ for (uint32_t key = 0; key < maxCount * 2; ++key) {
+ uint32_t valueCount = key % maxCount;
+ EXPECT_TRUE(mvm.getValueCount(key) == valueCount);
+ std::vector<uint32_t> buffer(valueCount);
+ EXPECT_TRUE(mvm.get(key, buffer) == valueCount);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), key)) == valueCount);
+ }
+
+ // reset
+ mvm.reset(maxCount, initCapacity);
+ EXPECT_TRUE(mvm.getNumKeys() == maxCount);
+ EXPECT_TRUE(mvm.enoughCapacity(initCapacity));
+
+ // new update pass. use all capacity
+ for (uint32_t key = 1; key < maxCount; ++key) {
+ std::vector<uint32_t> values(key, key);
+ Histogram needed(Index::maxValues());
+ needed[key] = 1;
+ EXPECT_EQUAL(mvm.getValueCount(key), 0u);
+ EXPECT_TRUE(mvm.enoughCapacity(needed));
+ mvm.set(key, values);
+ EXPECT_TRUE(!mvm.enoughCapacity(needed));
+ }
+}
+
+void
+MultiValueMappingTest::testVariousGets()
+{
+ MvMapping::Histogram initCapacity(Index::maxValues());
+ initCapacity[5] = 1;
+ initCapacity[Index::maxValues()] = 1;
+ MvMapping mvm(dummyCommittedDocIdLimit, 3, initCapacity);
+ Index idx;
+
+ mvm.set(1, std::vector<uint32_t>(5, 50));
+ mvm.set(2, std::vector<uint32_t>(25, 250));
+ EXPECT_TRUE(25 >= Index::maxValues());
+
+ {
+ std::vector<uint32_t> buffer(5);
+ EXPECT_TRUE(mvm.get(0, &buffer[0], 0) == 0);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)5)) == 0);
+ }
+ {
+ std::vector<uint32_t> buffer(5);
+ EXPECT_TRUE(mvm.get(0, &buffer[0], 5) == 0);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)5)) == 0);
+ }
+ {
+ std::vector<uint32_t> buffer(10);
+ EXPECT_TRUE(mvm.get(1, &buffer[0], 3) == 5);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)50)) == 3);
+ }
+ {
+ std::vector<uint32_t> buffer(10);
+ EXPECT_TRUE(mvm.get(1, &buffer[0], 10) == 5);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)50)) == 5);
+ }
+ {
+ std::vector<uint32_t> buffer(30);
+ EXPECT_TRUE(mvm.get(2, &buffer[0], 23) == 25);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)250)) == 23);
+ }
+ {
+ std::vector<uint32_t> buffer(30);
+ EXPECT_TRUE(mvm.get(2, &buffer[0], 30) == 25);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)250)) == 25);
+ }
+}
+
+void
+MultiValueMappingTest::testReplace()
+{
+ MvMapping::Histogram initCapacity(Index::maxValues());
+ initCapacity[5] = 1;
+ initCapacity[Index::maxValues()] = 1;
+ MvMapping mvm(dummyCommittedDocIdLimit, 3, initCapacity);
+ Index idx;
+
+ mvm.set(1, std::vector<uint32_t>(5, 50));
+ mvm.set(2, std::vector<uint32_t>(25, 100));
+ EXPECT_TRUE(25 >= Index::maxValues());
+
+ {
+ EXPECT_TRUE(mvm.getValueCount(0) == 0);
+ std::vector<uint32_t> replace(5, 50);
+ mvm.replace(0, replace);
+ EXPECT_TRUE(mvm.getValueCount(0) == 0);
+ }
+ {
+ EXPECT_TRUE(mvm.getValueCount(1) == 5);
+ std::vector<uint32_t> buffer(5);
+ EXPECT_TRUE(mvm.get(1, buffer) == 5);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)50)) == 5);
+
+ std::vector<uint32_t> replace(5, 55);
+ mvm.replace(1, replace);
+ EXPECT_TRUE(mvm.getValueCount(1) == 5);
+ EXPECT_TRUE(mvm.get(1, buffer) == 5);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)55)) == 5);
+ }
+ {
+ EXPECT_TRUE(mvm.getValueCount(2) == 25);
+ std::vector<uint32_t> buffer(25);
+ EXPECT_TRUE(mvm.get(2, buffer) == 25);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)100)) == 25);
+
+ std::vector<uint32_t> replace(25, 200);
+ mvm.replace(2, replace);
+ EXPECT_TRUE(mvm.getValueCount(2) == 25);
+ EXPECT_TRUE(mvm.get(2, buffer) == 25);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)200)) == 25);
+ }
+}
+
+void
+MultiValueMappingTest::testMemoryUsage()
+{
+ uint32_t numKeys = Index::maxValues() + 4;
+ MemoryUsage exp;
+ exp.incAllocatedBytes(numKeys * sizeof(Index));
+ exp.incUsedBytes(numKeys * sizeof(Index));
+ uint32_t totalCnt = 0;
+
+ Histogram initCapacity(Index::maxValues());
+ for (uint32_t i = 0; i < Index::maxValues(); ++i) {
+ initCapacity[i] = 2;
+ exp.incAllocatedBytes(i * 2 * sizeof(uint32_t));
+ }
+ initCapacity[Index::maxValues()] = 12;
+ exp.incAllocatedBytes(12 * sizeof(vespalib::Array<uint32_t>)); // due to vector vector
+
+ MvMapping mvm(dummyCommittedDocIdLimit,
+ numKeys, initCapacity, GrowStrategy(numKeys));
+
+ // usage before inserting values
+ MemoryUsage usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt);
+ EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes());
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), uint32_t(0));
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), uint32_t(0));
+
+ // insert values for all keys
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ uint32_t cnt = key + 1;
+ std::vector<uint32_t> values(cnt, key);
+ mvm.set(key, values);
+ EXPECT_EQUAL(mvm.getValueCount(key), cnt);
+ totalCnt += cnt;
+ exp.incUsedBytes(cnt * sizeof(uint32_t));
+ if (cnt >= Index::maxValues()) {
+ exp.incAllocatedBytes(cnt * sizeof(uint32_t));
+ exp.incUsedBytes(sizeof(vespalib::Array<uint32_t>)); // due to vector vector
+ }
+ }
+
+ // usage after inserting values
+ usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt);
+ EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes());
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), uint32_t(0));
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), uint32_t(0));
+
+ totalCnt = 0;
+ // insert new values for all keys making dead bytes
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ uint32_t cnt = key + 2;
+ std::vector<uint32_t> values(cnt, key);
+ mvm.set(key, values);
+ EXPECT_EQUAL(mvm.getValueCount(key), cnt);
+ totalCnt += cnt;
+ exp.incUsedBytes(cnt * sizeof(uint32_t));
+ if ((cnt - 1) < Index::maxValues()) {
+ exp.incDeadBytes((cnt - 1) * sizeof(uint32_t)); // the previous values are marked dead
+ } else {
+ exp.incAllocatedBytesOnHold((cnt - 1) * sizeof(uint32_t) +
+ sizeof(vespalib::Array<uint32_t>));
+ }
+ if (cnt >= Index::maxValues()) {
+ exp.incAllocatedBytes(cnt * sizeof(uint32_t));
+ exp.incUsedBytes(sizeof(vespalib::Array<uint32_t>)); // due to vector vector
+ }
+ }
+
+ // usage after inserting new values making dead bytes
+ usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt);
+ EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes());
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes());
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold());
+
+ // make sure all internal vectors are put on hold list
+ mvm.performCompaction(initCapacity);
+ usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt);
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes() - exp.deadBytes() - exp.allocatedBytesOnHold());
+ EXPECT_EQUAL(usage.deadBytes(), uint32_t(0));
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytes() - numKeys * sizeof(Index) + exp.allocatedBytesOnHold());
+ mvm.transferHoldLists(0);
+ mvm.trimHoldLists(1);
+ usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt);
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes() - exp.deadBytes() - exp.allocatedBytesOnHold());
+ EXPECT_EQUAL(usage.deadBytes(), uint32_t(0));
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), 0u);
+}
+
+
+void
+MultiValueMappingTest::testShrink()
+{
+ uint32_t committedDocIdLimit = dummyCommittedDocIdLimit;
+ MvMapping mvm(committedDocIdLimit);
+ for (uint32_t i = 0; i < 10; ++i) {
+ uint32_t k;
+ mvm.addKey(k);
+ EXPECT_EQUAL(i, k);
+ }
+ mvm.transferHoldLists(0);
+ mvm.trimHoldLists(1);
+ uint32_t shrinkTarget = 4;
+ committedDocIdLimit = shrinkTarget;
+ mvm.shrinkKeys(shrinkTarget);
+ mvm.transferHoldLists(1);
+ mvm.trimHoldLists(2);
+ EXPECT_EQUAL(shrinkTarget, mvm.getNumKeys());
+ EXPECT_EQUAL(shrinkTarget, mvm.getCapacityKeys());
+}
+
+
+void
+MultiValueMappingTest::testHoldElem()
+{
+ uint32_t numKeys = 1;
+ MemoryUsage exp;
+ exp.incAllocatedBytes(numKeys * sizeof(Index));
+ exp.incUsedBytes(numKeys * sizeof(Index));
+
+ Histogram initCapacity(Index::maxValues());
+ initCapacity[Index::maxValues()] = 3;
+ exp.incAllocatedBytes(3 * sizeof(vespalib::Array<uint32_t>)); // due to vector vector
+
+ MvMapping mvm(dummyCommittedDocIdLimit,
+ numKeys, initCapacity, GrowStrategy(numKeys));
+
+ // usage before inserting values
+ MemoryUsage usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), 0u);
+ EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes());
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes());
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold());
+
+ uint32_t key = 0;
+ uint32_t cnt = Index::maxValues() + 3;
+ {
+ std::vector<uint32_t> values(cnt, key);
+ mvm.set(key, values);
+ exp.incAllocatedBytes(cnt * sizeof(uint32_t));
+ exp.incUsedBytes(cnt * sizeof(uint32_t) +
+ sizeof(vespalib::Array<uint32_t>));
+ }
+ usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), cnt);
+ EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes());
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes());
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold());
+ ++cnt;
+ {
+ std::vector<uint32_t> values(cnt, key);
+ mvm.set(key, values);
+ exp.incAllocatedBytes(cnt * sizeof(uint32_t));
+ exp.incUsedBytes(cnt * sizeof(uint32_t) +
+ sizeof(vespalib::Array<uint32_t>));
+ exp.incAllocatedBytesOnHold((cnt - 1) * sizeof(uint32_t) +
+ sizeof(vespalib::Array<uint32_t>));
+ }
+ usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), cnt);
+ EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes());
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes());
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold());
+ mvm.transferHoldLists(0);
+ mvm.trimHoldLists(1);
+ exp.incDeadBytes(sizeof(vespalib::Array<uint32_t>));
+ exp.decAllocatedBytes((cnt - 1) * sizeof(uint32_t));
+ usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), cnt);
+ EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes());
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes());
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), 0u);
+}
+
+namespace {
+
+void
+insertValues(MvMapping &mvm, uint32_t key, uint32_t count)
+{
+ std::vector<uint32_t> values(count, 13);
+ mvm.set(key, values);
+}
+
+Histogram
+createHistogram(uint32_t numValuesPerValueClass)
+{
+ Histogram result(Index32::maxValues());
+ for (uint32_t i = 0; i <= Index32::maxValues(); ++i) {
+ result[i] = numValuesPerValueClass;
+ }
+ return result;
+}
+
+const size_t ADDRESS_LIMIT = 134217728; // Index32::offsetSize()
+
+struct AddressSpaceFixture
+{
+ MvMapping mvm;
+ AddressSpaceFixture()
+ : mvm(dummyCommittedDocIdLimit, 20, createHistogram(4), GrowStrategy(20))
+ {}
+};
+
+}
+
+void
+MultiValueMappingTest::requireThatAddressSpaceUsageIsReported()
+{
+ AddressSpaceFixture f;
+ MvMapping &mvm = f.mvm;
+
+ EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 1, 1);
+ EXPECT_EQUAL(AddressSpace(1, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 2, 2);
+ insertValues(mvm, 3, 2);
+ EXPECT_EQUAL(AddressSpace(2, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 4, 13);
+ insertValues(mvm, 5, 13);
+ insertValues(mvm, 6, 13);
+ EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 7, 14);
+ insertValues(mvm, 8, 14);
+ insertValues(mvm, 9, 14);
+ EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 10, 15);
+ insertValues(mvm, 11, 16);
+ insertValues(mvm, 12, 17);
+ insertValues(mvm, 13, 18);
+ EXPECT_EQUAL(AddressSpace(4, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+}
+
+void
+MultiValueMappingTest::requireThatDeadIsNotAccountedInAddressSpaceUsage()
+{
+ AddressSpaceFixture f;
+ MvMapping &mvm = f.mvm;
+
+ EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 1, 3);
+ insertValues(mvm, 2, 3);
+ insertValues(mvm, 3, 3);
+ insertValues(mvm, 4, 3);
+ EXPECT_EQUAL(AddressSpace(4, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 1, 4);
+ EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 2, 5);
+ EXPECT_EQUAL(AddressSpace(2, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 10, 15);
+ insertValues(mvm, 11, 15);
+ insertValues(mvm, 12, 15);
+ insertValues(mvm, 13, 15);
+ EXPECT_EQUAL(AddressSpace(4, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 10, 14);
+ EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 11, 14);
+ EXPECT_EQUAL(AddressSpace(2, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+}
+
+int
+MultiValueMappingTest::Main()
+{
+ TEST_INIT("multivaluemapping_test");
+
+ testIndex32();
+ testIndex64();
+ testSimpleSetAndGet();
+ testChangingValueCount();
+ testHoldListAndGeneration();
+ testManualCompaction();
+ testVariousGets();
+ testReplace();
+ testMemoryUsage();
+ testShrink();
+ testHoldElem();
+ TEST_DO(requireThatAddressSpaceUsageIsReported());
+ TEST_DO(requireThatDeadIsNotAccountedInAddressSpaceUsage());
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::MultiValueMappingTest);
diff --git a/searchlib/src/tests/attribute/postinglist/.gitignore b/searchlib/src/tests/attribute/postinglist/.gitignore
new file mode 100644
index 00000000000..8cf10f7f9dc
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglist/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+postinglist_test
+searchlib_postinglist_test_app
diff --git a/searchlib/src/tests/attribute/postinglist/CMakeLists.txt b/searchlib/src/tests/attribute/postinglist/CMakeLists.txt
new file mode 100644
index 00000000000..a22d1ae2fdc
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglist/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_postinglist_test_app
+ SOURCES
+ postinglist.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_postinglist_test_app COMMAND searchlib_postinglist_test_app)
diff --git a/searchlib/src/tests/attribute/postinglist/DESC b/searchlib/src/tests/attribute/postinglist/DESC
new file mode 100644
index 00000000000..1499e3070fb
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglist/DESC
@@ -0,0 +1 @@
+This is a test for the AttributePostingList class.
diff --git a/searchlib/src/tests/attribute/postinglist/FILES b/searchlib/src/tests/attribute/postinglist/FILES
new file mode 100644
index 00000000000..268f6c09f1e
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglist/FILES
@@ -0,0 +1 @@
+postinglist.cpp
diff --git a/searchlib/src/tests/attribute/postinglist/postinglist.cpp b/searchlib/src/tests/attribute/postinglist/postinglist.cpp
new file mode 100644
index 00000000000..ab95ce27a0e
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglist/postinglist.cpp
@@ -0,0 +1,707 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("postinglist_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/util/rand48.h>
+#include <algorithm>
+#include <limits>
+#include <map>
+#include <set>
+
+#include <vespa/searchlib/btree/datastore.h>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodestore.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btreestore.hpp>
+
+namespace search {
+
+using vespalib::GenerationHandler;
+
+/*
+ * TODO: Make it pass MALLOC_OPTIONS=AJ on freebsd and valgrind on Linux.
+ */
+
+class AttributePostingListTest : public vespalib::TestApp
+{
+private:
+ /* Limited STL version for validation of full version */
+ typedef std::set<uint32_t> STLPostingList;
+ typedef std::map<int, STLPostingList> STLValueTree;
+
+ class RandomValue
+ {
+ public:
+ uint32_t _docId;
+ int _value;
+ uint32_t _order;
+
+ RandomValue(void)
+ : _docId(0),
+ _value(0u),
+ _order(0u)
+ {
+ }
+
+ RandomValue(uint32_t docId, uint32_t value, uint32_t order)
+ : _docId(docId),
+ _value(value),
+ _order(order)
+ {
+ }
+
+ bool
+ operator<(const RandomValue &rhs) const
+ {
+ return (_value < rhs._value ||
+ (_value == rhs._value &&
+ (_docId < rhs._docId ||
+ (_docId == rhs._docId &&
+ _order < rhs._order))));
+ }
+
+ bool
+ operator>(const RandomValue &rhs) const
+ {
+ return (_value > rhs._value ||
+ (_value == rhs._value &&
+ (_docId > rhs._docId ||
+ (_docId == rhs._docId &&
+ _order > rhs._order))));
+ }
+
+ bool
+ operator==(const RandomValue &rhs) const
+ {
+ return (_value == rhs._value &&
+ _docId == rhs._docId &&
+ _order == rhs._order);
+ }
+ };
+
+ class CompareOrder
+ {
+ public:
+ bool
+ operator()(const RandomValue &a, const RandomValue &b)
+ {
+ return (a._order < b._order ||
+ (a._order == b._order &&
+ (a._value < b._value ||
+ (a._value == b._value &&
+ a._docId < b._docId))));
+ }
+ };
+ std::vector<RandomValue> _randomValues;
+
+public:
+ typedef btree::DataStore<int> IntKeyStore;
+ typedef btree::BTreeKeyData<uint32_t, btree::BTreeNoLeafData>
+ AttributePosting;
+ typedef btree::BTreeStore<uint32_t,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ std::less<uint32_t>,
+ btree::BTreeDefaultTraits>
+ PostingList;
+ typedef PostingList::NodeAllocatorType PostingListNodeAllocator;
+ typedef btree::EntryRef PostingIdx;
+ typedef btree::EntryRef StoreIndex;
+
+ class IntComp {
+ private:
+ const IntKeyStore & _store;
+ int _value;
+ int getValue(const StoreIndex & idx) const {
+ if (idx.valid()) {
+ return _store.getEntry(idx);
+ }
+ return _value;
+ }
+ public:
+ IntComp(const IntKeyStore & store) : _store(store), _value(0) {}
+ IntComp(const IntKeyStore & store, int value) : _store(store), _value(value) {}
+ bool operator() (const StoreIndex & lhs, const StoreIndex & rhs) const {
+ return getValue(lhs) < getValue(rhs);
+ }
+ };
+
+ typedef btree::BTreeRoot<StoreIndex, PostingIdx,
+ btree::NoAggregated,
+ const IntComp &> IntEnumTree;
+ typedef IntEnumTree::NodeAllocatorType IntEnumNodeAllocator;
+ typedef IntEnumTree Tree;
+ typedef IntEnumNodeAllocator TreeManager;
+ typedef IntKeyStore ValueHandle;
+ typedef std::vector<RandomValue> RandomValuesVector;
+private:
+ GenerationHandler _handler;
+ IntKeyStore *_intKeyStore;
+ IntEnumNodeAllocator *_intNodeAlloc;
+ IntEnumTree *_intTree;
+ PostingList *_intPostings;
+ STLValueTree *_stlTree;
+
+ Rand48 _randomGenerator;
+ uint32_t _generation;
+
+ void
+ allocTree(void);
+
+ void
+ freeTree(bool verbose);
+
+ void
+ fillRandomValues(unsigned int count,
+ unsigned int mvcount);
+
+ void
+ insertRandomValues(Tree &tree,
+ TreeManager &treeMgr,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ STLValueTree *stlTree,
+ RandomValuesVector &values);
+
+ void
+ removeRandomValues(Tree &tree,
+ TreeManager &treeMgr,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ STLValueTree *stlTree,
+ RandomValuesVector &values);
+
+ void
+ lookupRandomValues(Tree &tree,
+ TreeManager &treeMgr,
+ const ValueHandle &valueHandle,
+ PostingList &postings,
+ STLValueTree *stlTree,
+ RandomValuesVector &values);
+
+ void
+ sortRandomValues(void);
+
+ void
+ doCompactEnumStore(Tree &tree,
+ TreeManager &treeMgr,
+ ValueHandle &valueHandle);
+
+ void
+ doCompactPostingList(Tree &tree,
+ TreeManager &treeMgr,
+ PostingList &postings,
+ PostingListNodeAllocator &postingsAlloc);
+
+ void
+ bumpGeneration(Tree &tree,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ PostingListNodeAllocator &postingsAlloc);
+
+ void
+ removeOldGenerations(Tree &tree,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ PostingListNodeAllocator &postingsAlloc);
+
+ static const char *
+ frozenName(bool frozen)
+ {
+ return frozen ? "frozen" : "thawed";
+ }
+public:
+ AttributePostingListTest(void)
+ : vespalib::TestApp(),
+ _randomValues(),
+ _handler(),
+ _intKeyStore(NULL),
+ _intNodeAlloc(NULL),
+ _intTree(NULL),
+ _intPostings(NULL),
+ _stlTree(NULL),
+ _randomGenerator()
+ {
+ }
+
+ int Main(void);
+};
+
+
+
+void
+AttributePostingListTest::allocTree(void)
+{
+ _intKeyStore = new IntKeyStore;
+ _intNodeAlloc = new IntEnumNodeAllocator();
+ _intTree = new IntEnumTree();
+ _intPostings = new PostingList();
+ _stlTree = new STLValueTree;
+}
+
+
+void
+AttributePostingListTest::freeTree(bool verbose)
+{
+ (void) verbose;
+ LOG(info,
+ "freeTree before clear: %" PRIu64 " (%" PRIu64 " held)"
+ ", %zu leaves",
+ static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()),
+ static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold()),
+ _intTree->size(*_intNodeAlloc));
+ _intTree->clear(*_intNodeAlloc);
+ LOG(info,
+ "freeTree before unhold: %" PRIu64 " (%" PRIu64 " held)",
+ static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()),
+ static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold()));
+ _intNodeAlloc->freeze();
+ _intPostings->freeze();
+ _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration());
+ _intPostings->clearBuilder();
+ _intPostings->transferHoldLists(_handler.getCurrentGeneration());
+ _handler.incGeneration();
+ _intNodeAlloc->trimHoldLists(_handler.getFirstUsedGeneration());
+ _intPostings->trimHoldLists(_handler.getFirstUsedGeneration());
+ LOG(info,
+ "freeTree after unhold: %" PRIu64 " (%" PRIu64 " held)",
+ static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()),
+ static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold()));
+ delete _stlTree;
+ _stlTree = NULL;
+ delete _intTree;
+ _intTree = NULL;
+ delete _intNodeAlloc;
+ _intNodeAlloc = NULL;
+ delete _intKeyStore;
+ _intKeyStore = NULL;
+ delete _intPostings;
+ _intPostings = NULL;
+}
+
+
+void
+AttributePostingListTest::
+fillRandomValues(unsigned int count,
+ unsigned int mvcount)
+{
+ unsigned int i;
+ unsigned int j;
+ unsigned int mv;
+ unsigned int mvmax;
+ unsigned int mvcount2;
+ unsigned int mvcount3;
+
+ mvmax = 100;
+ mvcount2 = mvcount * (mvmax * (mvmax - 1)) / 2;
+ LOG(info,
+ "Filling %u+%u random values", count, mvcount2);
+ _randomValues.clear();
+ _randomValues.reserve(count);
+ _randomGenerator.srand48(42);
+ for (i = 0; i <count; i++) {
+ uint32_t docId = _randomGenerator.lrand48();
+ uint32_t val = _randomGenerator.lrand48();
+ uint32_t order = _randomGenerator.lrand48();
+ _randomValues.push_back(RandomValue(docId, val, order));
+ }
+ for (mv = 1; mv < mvmax; mv++) {
+ for (i = 0; i < mvcount; i++) {
+ for (j = 0; j < mv; j++) {
+ uint32_t docId = _randomGenerator.lrand48();
+ uint32_t val = _randomGenerator.lrand48();
+ uint32_t order = _randomGenerator.lrand48();
+ _randomValues.push_back(RandomValue(docId, val, order));
+ }
+ }
+ }
+ mvcount3 = 0;
+ for (mv = 10; mv < 4000; mv = mv * 3)
+ {
+ mvcount3 += mv * 2;
+ for (j = 0; j < mv; j++) {
+ uint32_t val = _randomGenerator.lrand48();
+ uint32_t docId = _randomGenerator.lrand48();
+ uint32_t order = _randomGenerator.lrand48();
+ _randomValues.push_back(RandomValue(docId, val, order));
+ val = _randomGenerator.lrand48();
+ docId = _randomGenerator.lrand48();
+ order = _randomGenerator.lrand48();
+ _randomValues.push_back(RandomValue(docId, val, order));
+ }
+ }
+ std::sort(_randomValues.begin(),
+ _randomValues.end(),
+ CompareOrder());
+
+ EXPECT_TRUE(_randomValues.size() == count + mvcount2 + mvcount3);
+}
+
+
+void
+AttributePostingListTest::
+insertRandomValues(Tree &tree,
+ TreeManager &treeMgr,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ STLValueTree *stlTree,
+ RandomValuesVector &
+ values)
+{
+ RandomValuesVector::iterator i;
+ RandomValuesVector::iterator ie;
+
+ LOG(info, "insertRandomValues start");
+ ie = values.end();
+ for (i = values.begin(); i != ie; ++i) {
+ Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value));
+ if (!itr.valid()) {
+#if 0
+ if (valueHandle.needResize())
+ doCompactEnumStore(tree, treeMgr, valueHandle);
+#endif
+ StoreIndex idx = valueHandle.addEntry(i->_value);
+ if (tree.insert(idx, PostingIdx(), treeMgr, IntComp(valueHandle))) {
+ itr = tree.find(idx, treeMgr, IntComp(valueHandle));
+ }
+ } else {
+ }
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(i->_value, valueHandle.getEntry(itr.getKey()));
+
+ /* TODO: Insert docid to postinglist */
+ PostingIdx oldIdx = itr.getData();
+ PostingIdx newIdx = oldIdx;
+ AttributePosting newPosting(i->_docId,
+ btree::BTreeNoLeafData());
+ std::vector<AttributePosting> additions;
+ std::vector<uint32_t> removals;
+ additions.push_back(newPosting);
+ postings.apply(newIdx, &additions[0], &additions[0] + additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ std::atomic_thread_fence(std::memory_order_release);
+ itr.writeData(newIdx);
+
+ if (stlTree != NULL) {
+ STLValueTree::iterator it;
+ it = stlTree->find(i->_value);
+ if (it == stlTree->end()) {
+ std::pair<STLValueTree::iterator,bool> ir =
+ stlTree->insert(std::make_pair(i->_value,
+ STLPostingList()));
+ ASSERT_TRUE(ir.second && ir.first != stlTree->end() &&
+ ir.first->first == i->_value);
+ it = ir.first;
+ }
+ ASSERT_TRUE(it != stlTree->end() && it->first == i->_value);
+ it->second.insert(i->_docId);
+
+ if (it->second.empty()) {
+ stlTree->erase(it);
+ ASSERT_TRUE(!itr.valid());
+ } else {
+ size_t postingsize;
+
+ ASSERT_TRUE(itr.valid());
+ postingsize = postings.size(newIdx);
+ ASSERT_TRUE(postingsize > 0 &&
+ postingsize == it->second.size());
+ STLPostingList::iterator it3;
+ STLPostingList::iterator it3b;
+ STLPostingList::iterator it3e;
+
+ PostingList::Iterator it0;
+
+ it3b = it->second.begin();
+ it3e = it->second.end();
+ it0 = postings.begin(newIdx);
+ it3 = it3b;
+
+ while (it3 != it3e) {
+ ASSERT_TRUE(it0.valid());
+ ASSERT_TRUE(*it3 == it0.getKey());
+ ++it3;
+ ++it0;
+ }
+ ASSERT_TRUE(!it0.valid());
+ }
+ }
+ }
+ ASSERT_TRUE(tree.isValid(treeMgr, IntComp(valueHandle)));
+ LOG(info, "insertRandomValues done");
+}
+
+
+void
+AttributePostingListTest::
+removeRandomValues(Tree &tree,
+ TreeManager &treeMgr,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ STLValueTree *stlTree,
+ RandomValuesVector &values)
+{
+ RandomValuesVector::iterator i;
+ RandomValuesVector::iterator ie;
+
+ LOG(info, "removeRandomValues start");
+ ie = values.end();
+ for (i = values.begin(); i != ie; ++i) {
+ Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value));
+ PostingIdx newIdx;
+ /*
+ * TODO: Remove docid from postinglist, and only remove
+ * value from tree if postinglist is empty
+ */
+ if (itr.valid()) {
+ PostingIdx oldIdx = itr.getData();
+ newIdx = oldIdx;
+ std::vector<AttributePosting> additions;
+ std::vector<uint32_t> removals;
+ removals.push_back(i->_docId);
+ postings.apply(newIdx, &additions[0], &additions[0]+additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ if (newIdx != oldIdx) {
+ std::atomic_thread_fence(std::memory_order_release);
+ itr.writeData(newIdx);
+ }
+ if (!newIdx.valid()) {
+ if (tree.remove(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value))) {
+ itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value));
+ }
+ }
+ }
+ if (stlTree != NULL) {
+ STLValueTree::iterator it;
+ it = stlTree->find(i->_value);
+ ASSERT_TRUE(it != stlTree->end() && it->first == i->_value);
+ STLPostingList::iterator it2;
+ it2 = it->second.find(i->_docId);
+ ASSERT_TRUE(it2 != it->second.end() &&
+ *it2 == i->_docId);
+ it->second.erase(it2);
+
+ if (it->second.empty()) {
+ stlTree->erase(it);
+ ASSERT_TRUE(!itr.valid());
+ } else {
+ size_t postingsize;
+
+ ASSERT_TRUE(itr.valid());
+ postingsize = postings.size(newIdx);
+ ASSERT_TRUE(postingsize > 0 &&
+ postingsize == it->second.size());
+ STLPostingList::iterator it3;
+ STLPostingList::iterator it3b;
+ STLPostingList::iterator it3e;
+
+ PostingList::Iterator it0;
+
+ it3b = it->second.begin();
+ it3e = it->second.end();
+ it0 = postings.begin(newIdx);
+ it3 = it3b;
+
+ while (it3 != it3e) {
+ ASSERT_TRUE(it0.valid());
+ ASSERT_TRUE(*it3 == it0.getKey());
+ ++it3;
+ ++it0;
+ }
+ ASSERT_TRUE(!it0.valid());
+ }
+ }
+ }
+ ASSERT_TRUE(tree.isValid(treeMgr, IntComp(valueHandle)));
+ LOG(info, "removeRandomValues done");
+}
+
+
+void
+AttributePostingListTest::
+lookupRandomValues(Tree &tree,
+ TreeManager &treeMgr,
+ const ValueHandle &valueHandle,
+ PostingList &postings,
+ STLValueTree *stlTree,
+ RandomValuesVector &values)
+{
+ RandomValuesVector::iterator i;
+ RandomValuesVector::iterator ie;
+
+ LOG(info, "lookupRandomValues start");
+ ie = values.end();
+ for (i = values.begin(); i != ie; ++i) {
+ Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value));
+ ASSERT_TRUE(itr.valid() &&
+ valueHandle.getEntry(itr.getKey()) == i->_value);
+ if (stlTree != NULL) {
+ STLValueTree::iterator it;
+ it = stlTree->find(i->_value);
+ ASSERT_TRUE(it != stlTree->end() && it->first == i->_value);
+
+ if (it->second.empty()) {
+ stlTree->erase(it);
+ ASSERT_TRUE(!itr.valid());
+ } else {
+ size_t postingsize;
+
+ ASSERT_TRUE(itr.valid());
+ postingsize = postings.size(itr.getData());
+ ASSERT_TRUE(postingsize > 0 &&
+ postingsize == it->second.size());
+ STLPostingList::iterator it3;
+ STLPostingList::iterator it3b;
+ STLPostingList::iterator it3e;
+
+ PostingList::Iterator it0;
+
+ it3b = it->second.begin();
+ it3e = it->second.end();
+ it0 = postings.begin(itr.getData());
+ it3 = it3b;
+
+ while (it3 != it3e) {
+ ASSERT_TRUE(it0.valid());
+ ASSERT_TRUE(*it3 == it0.getKey());
+ ++it3;
+ ++it0;
+ }
+ ASSERT_TRUE(!it0.valid());
+ }
+ }
+ }
+ LOG(info, "lookupRandomValues done");
+}
+
+
+void
+AttributePostingListTest::doCompactEnumStore(Tree &tree,
+ TreeManager &treeMgr,
+ ValueHandle &valueHandle)
+{
+ LOG(info,
+ "doCompactEnumStore start");
+
+ Tree::Iterator i = tree.begin(treeMgr);
+
+ uint32_t numBuffers = valueHandle.getNumBuffers();
+ std::vector<uint32_t> toHold;
+
+ for (uint32_t bufferId = 0; bufferId < numBuffers; ++bufferId) {
+ btree::BufferState &state = valueHandle.getBufferState(bufferId);
+ if (state._state == btree::BufferState::ACTIVE) {
+ toHold.push_back(bufferId);
+ // Freelists already disabled due to variable sized data
+ }
+ }
+ valueHandle.switchActiveBuffer(0, 0u);
+
+ for (; i.valid(); ++i)
+ {
+ StoreIndex ov = i.getKey();
+ StoreIndex nv = valueHandle.addEntry(valueHandle.getEntry(ov));
+
+ std::atomic_thread_fence(std::memory_order_release);
+ i.writeKey(nv);
+ }
+ typedef GenerationHandler::generation_t generation_t;
+ for (std::vector<uint32_t>::const_iterator
+ it = toHold.begin(), ite = toHold.end(); it != ite; ++it) {
+ valueHandle.holdBuffer(*it);
+ }
+ generation_t generation = _handler.getCurrentGeneration();
+ valueHandle.transferHoldLists(generation);
+ _handler.incGeneration();
+ valueHandle.trimHoldLists(_handler.getFirstUsedGeneration());
+
+ LOG(info,
+ "doCompactEnumStore done");
+}
+
+
+void
+AttributePostingListTest::
+doCompactPostingList(Tree &tree,
+ TreeManager &treeMgr,
+ PostingList &postings,
+ PostingListNodeAllocator &postingsAlloc)
+{
+ LOG(info,
+ "doCompactPostingList start");
+
+#if 0
+ Tree::Iterator i(tree.begin(treeMgr));
+
+ postings.performCompaction(i, capacityNeeded);
+#else
+ (void) tree;
+ (void) treeMgr;
+ (void) postings;
+ (void) postingsAlloc;
+#endif
+
+ LOG(info,
+ "doCompactPostingList done");
+}
+
+
+void
+AttributePostingListTest::
+bumpGeneration(Tree &tree,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ PostingListNodeAllocator &postingsAlloc)
+{
+ (void) tree;
+ (void) valueHandle;
+ postingsAlloc.freeze();
+ postingsAlloc.transferHoldLists(_handler.getCurrentGeneration());
+ postings.transferHoldLists(_handler.getCurrentGeneration());
+ _handler.incGeneration();
+}
+
+void
+AttributePostingListTest::
+removeOldGenerations(Tree &tree,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ PostingListNodeAllocator &postingsAlloc)
+{
+ (void) tree;
+ (void) valueHandle;
+ postingsAlloc.trimHoldLists(_handler.getFirstUsedGeneration());
+ postings.trimHoldLists(_handler.getFirstUsedGeneration());
+}
+
+int
+AttributePostingListTest::Main()
+{
+ TEST_INIT("postinglist_test");
+
+ fillRandomValues(1000, 10);
+
+ allocTree();
+ insertRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings,
+ _stlTree, _randomValues);
+ lookupRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings,
+ _stlTree, _randomValues);
+ _intNodeAlloc->freeze();
+ _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration());
+ doCompactEnumStore(*_intTree, *_intNodeAlloc, *_intKeyStore);
+ removeRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings,
+ _stlTree, _randomValues);
+ insertRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings,
+ _stlTree, _randomValues);
+ freeTree(true);
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::AttributePostingListTest);
diff --git a/searchlib/src/tests/attribute/postinglistattribute/.gitignore b/searchlib/src/tests/attribute/postinglistattribute/.gitignore
new file mode 100644
index 00000000000..9614cdd7626
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglistattribute/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+postinglistattribute_test
+searchlib_postinglistattribute_test_app
diff --git a/searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt b/searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt
new file mode 100644
index 00000000000..77d137c7b6e
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_postinglistattribute_test_app
+ SOURCES
+ postinglistattribute_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_postinglistattribute_test_app COMMAND sh postinglistattribute_test.sh)
diff --git a/searchlib/src/tests/attribute/postinglistattribute/DESC b/searchlib/src/tests/attribute/postinglistattribute/DESC
new file mode 100644
index 00000000000..04c97a729a0
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglistattribute/DESC
@@ -0,0 +1 @@
+Unit tests for subclasses of PostingListAttribute.
diff --git a/searchlib/src/tests/attribute/postinglistattribute/FILES b/searchlib/src/tests/attribute/postinglistattribute/FILES
new file mode 100644
index 00000000000..56029570a21
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglistattribute/FILES
@@ -0,0 +1 @@
+postinglistattribute.cpp
diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp
new file mode 100644
index 00000000000..5e248dc8758
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp
@@ -0,0 +1,1021 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("postinglistattribute_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/postinglistattribute.h>
+#include <vespa/searchlib/attribute/singlenumericpostattribute.h>
+#include <vespa/searchlib/attribute/multinumericpostattribute.h>
+#include <vespa/searchlib/attribute/singlestringpostattribute.h>
+#include <vespa/searchlib/attribute/multistringpostattribute.h>
+
+#include <vespa/searchlib/attribute/enumstore.hpp>
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/vespalib/util/compress.h>
+
+using std::shared_ptr;
+
+bool
+FastOS_UNIX_File::Sync(void)
+{
+ // LOG(info, "Skip sync");
+ return true;
+}
+
+namespace search {
+
+using attribute::CollectionType;
+using attribute::BasicType;
+using attribute::Config;
+using queryeval::PostingInfo;
+using queryeval::MinMaxPostingInfo;
+using search::fef::TermFieldMatchData;
+using search::queryeval::SearchIterator;
+
+typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr;
+typedef std::unique_ptr<search::queryeval::SearchIterator> SearchBasePtr;
+
+void
+toStr(std::stringstream &ss, SearchIterator &it)
+{
+ it.initFullRange();
+ it.seek(1u);
+ bool first = true;
+ while ( !it.isAtEnd()) {
+ if (first)
+ first = false;
+ else
+ ss << ",";
+ ss << it.getDocId();
+ it.seek(it.getDocId() + 1);
+ }
+}
+
+
+bool
+assertIterator(const std::string &exp, SearchIterator &it)
+{
+ std::stringstream ss;
+ toStr(ss, it);
+ if (!EXPECT_EQUAL(exp, ss.str()))
+ return false;
+ return true;
+}
+
+
+class PostingListAttributeTest : public vespalib::TestApp
+{
+private:
+ typedef IntegerAttribute::largeint_t largeint_t;
+ typedef AttributeVector::SP AttributePtr;
+ typedef std::set<AttributeVector::DocId> DocSet;
+
+ typedef SingleValueNumericPostingAttribute<
+ EnumAttribute<IntegerAttributeTemplate<int32_t> > >
+ Int32PostingListAttribute;
+ typedef MultiValueNumericPostingAttribute<
+ EnumAttribute<IntegerAttributeTemplate<int32_t> >,
+ multivalue::MVMTemplateArg<
+ multivalue::Value<EnumStoreBase::Index>,
+ multivalue::Index32> > Int32ArrayPostingListAttribute;
+ typedef MultiValueNumericPostingAttribute<
+ EnumAttribute<IntegerAttributeTemplate<int32_t> >,
+ multivalue::MVMTemplateArg<
+ multivalue::WeightedValue<EnumStoreBase::Index>,
+ multivalue::Index32> > Int32WsetPostingListAttribute;
+
+ typedef SingleValueNumericPostingAttribute<
+ EnumAttribute<FloatingPointAttributeTemplate<float> > >
+ FloatPostingListAttribute;
+ typedef MultiValueNumericPostingAttribute<
+ EnumAttribute<FloatingPointAttributeTemplate<float> >,
+ multivalue::MVMTemplateArg<
+ multivalue::Value<EnumStoreBase::Index>,
+ multivalue::Index32> > FloatArrayPostingListAttribute;
+ typedef MultiValueNumericPostingAttribute<
+ EnumAttribute<FloatingPointAttributeTemplate<float> >,
+ multivalue::MVMTemplateArg<
+ multivalue::WeightedValue<EnumStoreBase::Index>,
+ multivalue::Index32> > FloatWsetPostingListAttribute;
+
+ typedef SingleValueStringPostingAttribute StringPostingListAttribute;
+ typedef ArrayStringPostingAttribute StringArrayPostingListAttribute;
+ typedef WeightedSetStringPostingAttribute StringWsetPostingListAttribute;
+
+ template <typename VectorType>
+ void
+ populate(VectorType &v);
+
+ template <typename VectorType>
+ VectorType &
+ as(AttributePtr &v);
+
+ IntegerAttribute &
+ asInt(AttributePtr &v);
+
+ StringAttribute &
+ asString(AttributePtr &v);
+
+ void
+ buildTermQuery(std::vector<char> & buffer,
+ const vespalib::string & index,
+ const vespalib::string & term, bool prefix);
+
+ template <typename V, typename T>
+ SearchContextPtr
+ getSearch(const V & vec, const T & term, bool prefix);
+
+ template <typename V>
+ SearchContextPtr
+ getSearch(const V & vec);
+
+ template <typename V>
+ SearchContextPtr
+ getSearch2(const V & vec);
+
+ bool
+ assertSearch(const std::string &exp, StringAttribute &sa);
+
+ void addDocs(const AttributePtr & ptr, uint32_t numDocs);
+
+ template <typename VectorType, typename BufferType, typename Range>
+ void checkPostingList(const VectorType & vec, const std::vector<BufferType> & values, const Range & range);
+
+ template <typename VectorType, typename BufferType>
+ void testPostingList(const AttributePtr & ptr1, const AttributePtr & ptr2,
+ uint32_t numDocs, const std::vector<BufferType> & values);
+ void testPostingList();
+
+ template <typename AttributeType, typename ValueType>
+ void checkPostingList(AttributeType & vec, ValueType value, DocSet expected);
+ template <typename AttributeType, typename ValueType>
+ void checkNonExistantPostingList(AttributeType & vec, ValueType value);
+ template <typename AttributeType, typename ValueType>
+ void testArithmeticValueUpdate(const AttributePtr & ptr);
+ void testArithmeticValueUpdate();
+
+ template <typename VectorType, typename ValueType>
+ void testReload(const AttributePtr & ptr1, const AttributePtr & ptr2, const ValueType & value);
+ void testReload();
+
+ template <typename VectorType>
+ void
+ testMinMax(AttributePtr &ptr1, uint32_t trimmed);
+
+ template <typename VectorType>
+ void
+ testMinMax(AttributePtr &ptr1, AttributePtr &ptr2);
+
+ void
+ testMinMax(void);
+
+ void
+ testStringFold(void);
+public:
+ int Main();
+};
+
+template <>
+void
+PostingListAttributeTest::populate<IntegerAttribute>(IntegerAttribute &v)
+{
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (i == 0)
+ continue;
+ if (i == 9)
+ continue;
+ if (i == 7) {
+ if (v.hasMultiValue()) {
+ v.append(i, -42, 27);
+ v.append(i, -43, 14);
+ v.append(i, -42, -3);
+ } else {
+ EXPECT_TRUE( v.update(i, -43) );
+ }
+ v.commit();
+ continue;
+ }
+ if (i == 20) {
+ if (v.hasMultiValue()) {
+ v.append(i, -42, 27);
+ v.append(i, -43, 14);
+ v.append(i, -42, -3);
+ } else {
+ EXPECT_TRUE( v.update(i, -43) );
+ }
+ v.commit();
+ continue;
+ }
+ if (i == 25) {
+ if (v.hasMultiValue()) {
+ v.append(i, -42, 27);
+ v.append(i, -43, 12);
+ v.append(i, -42, -3);
+ } else {
+ EXPECT_TRUE( v.update(i, -43) );
+ }
+ v.commit();
+ continue;
+ }
+ if (v.hasMultiValue()) {
+ v.append(i, -42, 3);
+ } else {
+ v.update(i, -42);
+ }
+ v.commit();
+ }
+ v.commit();
+}
+
+template <>
+void
+PostingListAttributeTest::populate<StringAttribute>(StringAttribute &v)
+{
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (i == 0)
+ continue;
+ if (i == 9)
+ continue;
+ if (i == 7) {
+ if (v.hasMultiValue()) {
+ v.append(i, "foo", 27);
+ v.append(i, "bar", 14);
+ v.append(i, "foo", -3);
+ } else {
+ EXPECT_TRUE( v.update(i, "bar") );
+ }
+ v.commit();
+ continue;
+ }
+ if (i == 20) {
+ if (v.hasMultiValue()) {
+ v.append(i, "foo", 27);
+ v.append(i, "bar", 14);
+ v.append(i, "foo", -3);
+ } else {
+ EXPECT_TRUE( v.update(i, "bar") );
+ }
+ v.commit();
+ continue;
+ }
+ if (i == 25) {
+ if (v.hasMultiValue()) {
+ v.append(i, "foo", 27);
+ v.append(i, "bar", 12);
+ v.append(i, "foo", -3);
+ } else {
+ EXPECT_TRUE( v.update(i, "bar") );
+ }
+ v.commit();
+ continue;
+ }
+ if (v.hasMultiValue()) {
+ v.append(i, "foo", 3);
+ } else {
+ v.update(i, "foo");
+ }
+ v.commit();
+ }
+}
+
+
+template <typename VectorType>
+VectorType &
+PostingListAttributeTest::as(AttributePtr &v)
+{
+ VectorType *res = dynamic_cast<VectorType *>(v.get());
+ assert(res != NULL);
+ return *res;
+}
+
+
+IntegerAttribute &
+PostingListAttributeTest::asInt(AttributePtr &v)
+{
+ return as<IntegerAttribute>(v);
+}
+
+
+StringAttribute &
+PostingListAttributeTest::asString(AttributePtr &v)
+{
+ return as<StringAttribute>(v);
+}
+
+
+void
+PostingListAttributeTest::buildTermQuery(std::vector<char> &buffer,
+ const vespalib::string &index,
+ const vespalib::string &term,
+ bool prefix)
+{
+ uint32_t indexLen = index.size();
+ uint32_t termLen = term.size();
+ uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen;
+ uint32_t p = 0;
+ buffer.resize(queryPacketSize);
+ buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM;
+ p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]);
+ memcpy(&buffer[p], index.c_str(), indexLen);
+ p += indexLen;
+ p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]);
+ memcpy(&buffer[p], term.c_str(), termLen);
+ p += termLen;
+ buffer.resize(p);
+}
+
+
+template <typename V, typename T>
+SearchContextPtr
+PostingListAttributeTest::getSearch(const V &vec, const T &term, bool prefix)
+{
+ std::vector<char> query;
+ vespalib::asciistream ss;
+ ss << term;
+ buildTermQuery(query, vec.getName(), ss.str(), prefix);
+
+ return (static_cast<const AttributeVector &>(vec)).
+ getSearch(vespalib::stringref(&query[0], query.size()),
+ AttributeVector::SearchContext::Params());
+}
+
+
+template <>
+SearchContextPtr
+PostingListAttributeTest::getSearch<IntegerAttribute>(const IntegerAttribute &v)
+{
+ return getSearch<IntegerAttribute>(v, "[-42;-42]", false);
+}
+
+
+template <>
+SearchContextPtr
+PostingListAttributeTest::getSearch<StringAttribute>(const StringAttribute &v)
+{
+ return getSearch<StringAttribute, const vespalib::string &>
+ (v, "foo", false);
+}
+
+
+template <>
+SearchContextPtr
+PostingListAttributeTest::getSearch2<IntegerAttribute>(const IntegerAttribute &v)
+{
+ return getSearch<IntegerAttribute>(v, "[-43;-43]", false);
+}
+
+
+template <>
+SearchContextPtr
+PostingListAttributeTest::getSearch2<StringAttribute>(const StringAttribute &v)
+{
+ return getSearch<StringAttribute, const vespalib::string &>
+ (v, "bar", false);
+}
+
+
+bool
+PostingListAttributeTest::assertSearch(const std::string &exp,
+ StringAttribute &sa)
+{
+ TermFieldMatchData md;
+ SearchContextPtr sc = getSearch<StringAttribute>(sa);
+ sc->fetchPostings(true);
+ SearchBasePtr sb = sc->createIterator(&md, true);
+ if (!EXPECT_TRUE(assertIterator(exp, *sb)))
+ return false;
+ return true;
+}
+
+
+void
+PostingListAttributeTest::addDocs(const AttributePtr & ptr, uint32_t numDocs)
+{
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ uint32_t doc;
+ ASSERT_TRUE(ptr->addDoc(doc));
+ ASSERT_TRUE(doc == i);
+ ASSERT_TRUE(ptr->getNumDocs() == i + 1);
+ }
+ ASSERT_TRUE(ptr->getNumDocs() == numDocs);
+}
+
+class RangeAlpha {
+private:
+ uint32_t _part;
+public:
+ RangeAlpha(uint32_t part) : _part(part) { }
+ uint32_t getBegin(uint32_t i) const { return i * _part; }
+ uint32_t getEnd(uint32_t i) const { return (i + 1) * _part; }
+};
+
+class RangeBeta {
+private:
+ uint32_t _part;
+ uint32_t _numValues;
+public:
+ RangeBeta(uint32_t part, uint32_t numValues) : _part(part), _numValues(numValues) { }
+ uint32_t getBegin(uint32_t i) const { return (_numValues - 1 - i) * _part; }
+ uint32_t getEnd(uint32_t i) const { return (_numValues - i) * _part; }
+};
+
+template <typename VectorType, typename BufferType, typename RangeGenerator>
+void
+PostingListAttributeTest::checkPostingList(const VectorType & vec, const std::vector<BufferType> & values,
+ const RangeGenerator & range)
+{
+ const typename VectorType::EnumStore & enumStore = vec.getEnumStore();
+ const typename VectorType::Dictionary & dict =
+ enumStore.getPostingDictionary();
+ const typename VectorType::PostingList & postingList = vec.getPostingList();
+
+ for (size_t i = 0; i < values.size(); ++i) {
+ uint32_t docBegin = range.getBegin(i);
+ uint32_t docEnd = range.getEnd(i);
+
+ typename VectorType::DictionaryIterator itr =
+ dict.find(typename VectorType::EnumIndex(),
+ typename VectorType::ComparatorType(enumStore, values[i]));
+ ASSERT_TRUE(itr.valid());
+
+ typename VectorType::PostingList::Iterator postings;
+ postings = postingList.begin(itr.getData());
+
+ uint32_t doc = docBegin;
+ for (; postings.valid(); ++postings) {
+ EXPECT_EQUAL(doc++, postings.getKey());
+ }
+ EXPECT_EQUAL(doc, docEnd);
+ }
+}
+
+template <typename VectorType, typename BufferType>
+void
+PostingListAttributeTest::testPostingList(const AttributePtr & ptr1, const AttributePtr & ptr2,
+ uint32_t numDocs, const std::vector<BufferType> & values)
+{
+ LOG(info, "testPostingList: vector '%s'", ptr1->getName().c_str());
+
+ VectorType & vec1 = static_cast<VectorType &>(*ptr1.get());
+ VectorType & vec2 = static_cast<VectorType &>(*ptr2.get());
+ addDocs(ptr1, numDocs);
+
+ uint32_t part = numDocs / values.size();
+
+ // insert values
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t idx = doc / part;
+ EXPECT_TRUE(vec1.update(doc, values[idx]));
+ }
+ vec1.commit();
+
+#if 0
+ std::cout << "***** printBuffer 0 ***** " << std::endl;
+ vec1.getEnumStore().printBuffer(std::cout, 0);
+ std::cout << "***** printBuffer 1 ***** " << std::endl;
+ vec1.getEnumStore().printBuffer(std::cout, 1);
+ std::cout << "***** printCurrentContent ***** " << std::endl;
+ vec1.getEnumStore().printCurrentContent(std::cout);
+ std::cout << "***** printPostingListContent *****" << std::endl;
+ vec1.printPostingListContent(std::cout);
+#endif
+
+ // check posting list for correct content
+ checkPostingList(vec1, values, RangeAlpha(part));
+
+ // load and save vector
+ ptr1->saveAs(ptr2->getBaseFileName());
+ ptr2->load();
+#if 0
+ std::cout << "***** vec2.printPostingListContent *****" << std::endl;
+ vec2.printPostingListContent(std::cout);
+#endif
+ checkPostingList(vec2, values, RangeAlpha(part));
+
+ // insert values in another order
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t idx = values.size() - 1 - (doc / part);
+ EXPECT_TRUE(vec1.update(doc, values[idx]));
+ }
+ vec1.commit();
+
+ // check posting list again for correct content
+ checkPostingList(vec1, values, RangeBeta(part, values.size()));
+
+ // load and save vector
+ ptr1->saveAs(ptr2->getBaseFileName());
+ ptr2->load();
+ checkPostingList(vec2, values, RangeBeta(part, values.size()));
+}
+
+void
+PostingListAttributeTest::testPostingList()
+{
+ uint32_t numDocs = 1000;
+ uint32_t numValues = 50;
+
+ { // IntegerAttribute
+ std::vector<largeint_t> values;
+ for (uint32_t i = 0; i < numValues; ++i) {
+ values.push_back(i);
+ }
+ {
+ Config cfg(Config(BasicType::INT32, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg);
+ testPostingList<Int32PostingListAttribute>(ptr1, ptr2, numDocs, values);
+ }
+ {
+ Config cfg(Config(BasicType::INT32, CollectionType::ARRAY));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("aint32_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("aint32_2", cfg);
+ testPostingList<Int32ArrayPostingListAttribute>(ptr1, ptr2, numDocs, values);
+ }
+ {
+ Config cfg(Config(BasicType::INT32, CollectionType::WSET));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("wsint32_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("wsint32_2", cfg);
+ testPostingList<Int32WsetPostingListAttribute>(ptr1, ptr2, numDocs, values);
+ }
+ }
+
+ { // FloatingPointAttribute
+ std::vector<double> values;
+ for (uint32_t i = 0; i < numValues; ++i) {
+ values.push_back(i);
+ }
+ {
+ Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg);
+ testPostingList<FloatPostingListAttribute>(ptr1, ptr2, numDocs, values);
+ }
+ {
+ Config cfg(Config(BasicType::FLOAT, CollectionType::ARRAY));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("afloat_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("afloat_2", cfg);
+ testPostingList<FloatArrayPostingListAttribute>(ptr1, ptr2, numDocs, values);
+ }
+ {
+ Config cfg(Config(BasicType::FLOAT, CollectionType::WSET));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("wsfloat_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("wsfloat_2", cfg);
+ testPostingList<FloatWsetPostingListAttribute>(ptr1, ptr2, numDocs, values);
+ }
+ }
+
+ { // StringAttribute
+ std::vector<vespalib::string> values;
+ std::vector<const char *> charValues;
+ values.reserve(numValues);
+ charValues.reserve(numValues);
+ values.push_back("");
+ charValues.push_back(values.back().c_str());
+ for (uint32_t i = 1; i < numValues; ++i) {
+ vespalib::asciistream ss;
+ ss << "string" << i;
+ values.push_back(ss.str());
+ charValues.push_back(values.back().c_str());
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg);
+ testPostingList<StringPostingListAttribute>(ptr1, ptr2, numDocs, charValues);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::ARRAY));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("astr_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("astr_2", cfg);
+ testPostingList<StringArrayPostingListAttribute>(ptr1, ptr2, numDocs, charValues);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::WSET));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("wsstr_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("wsstr_2", cfg);
+ testPostingList<StringWsetPostingListAttribute>(ptr1, ptr2, numDocs, charValues);
+ }
+ }
+}
+
+template <typename AttributeType, typename ValueType>
+void
+PostingListAttributeTest::checkPostingList(AttributeType & vec, ValueType value, DocSet expected)
+{
+ const typename AttributeType::EnumStore & enumStore = vec.getEnumStore();
+ const typename AttributeType::Dictionary & dict =
+ enumStore.getPostingDictionary();
+ const typename AttributeType::PostingList & postingList = vec.getPostingList();
+ typename AttributeType::DictionaryIterator itr =
+ dict.find(typename AttributeType::EnumIndex(),
+ typename AttributeType::ComparatorType(vec.getEnumStore(), value));
+ ASSERT_TRUE(itr.valid());
+
+ typename AttributeType::PostingList::Iterator postings;
+ postings = postingList.begin(itr.getData());
+
+ DocSet::iterator docBegin = expected.begin();
+ DocSet::iterator docEnd = expected.end();
+ for (; postings.valid(); ++postings) {
+ EXPECT_EQUAL(*docBegin++, postings.getKey());
+ }
+ EXPECT_TRUE(docBegin == docEnd);
+}
+
+template <typename AttributeType, typename ValueType>
+void
+PostingListAttributeTest::checkNonExistantPostingList(AttributeType & vec, ValueType value)
+{
+ const typename AttributeType::Dictionary & dict =
+ vec.getEnumStore().getPostingDictionary();
+ typename AttributeType::DictionaryIterator itr =
+ dict.find(typename AttributeType::EnumIndex(),
+ typename AttributeType::ComparatorType(vec.getEnumStore(), value));
+ EXPECT_TRUE(!itr.valid());
+}
+
+template <typename AttributeType, typename ValueType>
+void
+PostingListAttributeTest::testArithmeticValueUpdate(const AttributePtr & ptr)
+{
+ LOG(info, "testArithmeticValueUpdate: vector '%s'", ptr->getName().c_str());
+
+ typedef document::ArithmeticValueUpdate Arith;
+ AttributeType & vec = static_cast<AttributeType &>(*ptr.get());
+
+ addDocs(ptr, 4);
+
+ uint32_t allDocs[] = {0, 1, 2, 3};
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 0);
+
+ for (uint32_t doc = 0; doc < 4; ++doc) {
+ ASSERT_TRUE(vec.update(doc, 100));
+ }
+ ptr->commit();
+
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 0);
+ checkPostingList<AttributeType, ValueType>(vec, 100, DocSet(allDocs, allDocs + 4));
+
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10)));
+ EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10)));
+ EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10)));
+ EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10)));
+ ptr->commit();
+
+ {
+ uint32_t docs[] = {0};
+ checkPostingList<AttributeType, ValueType>(vec, 110, DocSet(docs, docs + 1));
+ }
+ {
+ uint32_t docs[] = {1};
+ checkPostingList<AttributeType, ValueType>(vec, 90, DocSet(docs, docs + 1));
+ }
+ {
+ uint32_t docs[] = {2};
+ checkPostingList<AttributeType, ValueType>(vec, 1000, DocSet(docs, docs + 1));
+ }
+ {
+ uint32_t docs[] = {3};
+ checkPostingList<AttributeType, ValueType>(vec, 10, DocSet(docs, docs + 1));
+ }
+
+
+ // several inside a single commit
+ for (uint32_t doc = 0; doc < 4; ++doc) {
+ ASSERT_TRUE(vec.update(doc, 2000));
+ }
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10)));
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10)));
+ EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10)));
+ EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10)));
+ EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10)));
+ EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10)));
+ EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10)));
+ EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10)));
+ ptr->commit();
+
+ vespalib::asciistream ss;
+ vec.printPostingListContent(ss);
+ std::cout << ss.str();
+ {
+ uint32_t docs[] = {0};
+ checkPostingList<AttributeType, ValueType>(vec, 2020, DocSet(docs, docs + 1));
+ }
+ {
+ uint32_t docs[] = {1};
+ checkPostingList<AttributeType, ValueType>(vec, 1980, DocSet(docs, docs + 1));
+ }
+ {
+ uint32_t docs[] = {2};
+ checkPostingList<AttributeType, ValueType>(vec, 200000, DocSet(docs, docs + 1));
+ }
+ {
+ uint32_t docs[] = {3};
+ checkPostingList<AttributeType, ValueType>(vec, 20, DocSet(docs, docs + 1));
+ }
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 100);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 110);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 90);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 1000);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 10);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 2000);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 2010);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 1990);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 20000);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 200);
+}
+
+void
+PostingListAttributeTest::testArithmeticValueUpdate()
+{
+ { // IntegerAttribute
+ Config cfg(Config(BasicType::INT32, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sint32", cfg);
+ testArithmeticValueUpdate<Int32PostingListAttribute, largeint_t>(ptr);
+ }
+
+ { // FloatingPointAttribute
+ Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sfloat", cfg);
+ testArithmeticValueUpdate<FloatPostingListAttribute, double>(ptr);
+ }
+}
+
+
+template <typename VectorType, typename ValueType>
+void
+PostingListAttributeTest::testReload(const AttributePtr & ptr1, const AttributePtr & ptr2, const ValueType & value)
+{
+ LOG(info, "testReload: vector '%s'", ptr1->getName().c_str());
+
+ VectorType & vec1 = static_cast<VectorType &>(*ptr1.get());
+
+ addDocs(ptr1, 5);
+ for (uint32_t doc = 0; doc < 5; ++doc) {
+ EXPECT_TRUE(vec1.update(doc, value));
+ }
+ ptr1->commit();
+
+ ASSERT_TRUE(ptr1->saveAs(ptr2->getBaseFileName()));
+ ASSERT_TRUE(ptr2->load());
+
+ EXPECT_TRUE(ptr2->getNumDocs() == 5);
+ ValueType buffer[1];
+ for (uint32_t doc = 0; doc < 5; ++doc) {
+ EXPECT_TRUE(ptr2->get(doc, buffer, 1) == 1);
+ EXPECT_EQUAL(buffer[0], value);
+ }
+}
+
+void
+PostingListAttributeTest::testReload()
+{
+ { // IntegerAttribute
+ Config cfg(Config(BasicType::INT32, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ {
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg);
+ testReload<Int32PostingListAttribute, largeint_t>(ptr1, ptr2, 100);
+ }
+ {
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg);
+ testReload<Int32PostingListAttribute, largeint_t>(ptr1, ptr2, 0);
+ }
+ }
+
+ { // FloatingPointAttribute
+ Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ {
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg);
+ testReload<FloatPostingListAttribute, double>(ptr1, ptr2, 100);
+ }
+ {
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg);
+ testReload<FloatPostingListAttribute, double>(ptr1, ptr2, 0);
+ }
+ }
+
+ { // StringAttribute
+ Config cfg(Config(BasicType::STRING, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ {
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg);
+ testReload<StringPostingListAttribute, vespalib::string>(ptr1, ptr2, "unique");
+ }
+ {
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg);
+ testReload<StringPostingListAttribute, vespalib::string>(ptr1, ptr2, "");
+ }
+ }
+}
+
+template <typename VectorType>
+void
+PostingListAttributeTest::testMinMax(AttributePtr &ptr1, uint32_t trimmed)
+{
+ TermFieldMatchData md;
+ SearchContextPtr sc = getSearch<VectorType>(as<VectorType>(ptr1));
+ sc->fetchPostings(true);
+ SearchBasePtr sb = sc->createIterator(&md, true);
+ sb->initFullRange();
+
+ const PostingInfo *pi = sb->getPostingInfo();
+ ASSERT_TRUE(pi != NULL);
+ const MinMaxPostingInfo *mmpi =
+ dynamic_cast<const MinMaxPostingInfo *>(pi);
+ ASSERT_TRUE(mmpi != NULL);
+
+ if (ptr1->hasMultiValue()) {
+ if (trimmed == 2u) {
+ EXPECT_EQUAL(3, mmpi->getMinWeight());
+ } else {
+ EXPECT_EQUAL(-3, mmpi->getMinWeight());
+ }
+ EXPECT_EQUAL(3, mmpi->getMaxWeight());
+ } else {
+ EXPECT_EQUAL(1, mmpi->getMinWeight());
+ EXPECT_EQUAL(1, mmpi->getMaxWeight());
+ }
+
+ sb->seek(1u);
+ EXPECT_EQUAL(1u, sb->getDocId());
+
+ sc = getSearch2<VectorType>(as<VectorType>(ptr1));
+ sc->fetchPostings(true);
+ sb = sc->createIterator(&md, true);
+ sb->initFullRange();
+
+ pi = sb->getPostingInfo();
+ if (trimmed == 2) {
+ ASSERT_TRUE(pi == NULL);
+ } else {
+ ASSERT_TRUE(pi != NULL);
+ mmpi = dynamic_cast<const MinMaxPostingInfo *>(pi);
+ ASSERT_TRUE(mmpi != NULL);
+
+ if (ptr1->hasMultiValue()) {
+ if (trimmed == 0) {
+ EXPECT_EQUAL(12, mmpi->getMinWeight());
+ } else {
+ EXPECT_EQUAL(14, mmpi->getMinWeight());
+ }
+ EXPECT_EQUAL(14, mmpi->getMaxWeight());
+ } else {
+ EXPECT_EQUAL(1, mmpi->getMinWeight());
+ EXPECT_EQUAL(1, mmpi->getMaxWeight());
+ }
+ }
+
+ sb->seek(1u);
+ if (trimmed == 2u) {
+ EXPECT_TRUE(sb->isAtEnd());
+ } else {
+ EXPECT_EQUAL(7u, sb->getDocId());
+ }
+}
+
+template <typename VectorType>
+void
+PostingListAttributeTest::testMinMax(AttributePtr &ptr1, AttributePtr &ptr2)
+{
+ uint32_t numDocs = 100;
+ addDocs(ptr1, numDocs);
+ populate(as<VectorType>(ptr1));
+
+ TEST_DO(testMinMax<VectorType>(ptr1, 0u));
+ ASSERT_TRUE(ptr1->saveAs(ptr2->getBaseFileName()));
+ ASSERT_TRUE(ptr2->load());
+ testMinMax<VectorType>(ptr2, 0u);
+
+ ptr2->clearDoc(20);
+ ptr2->clearDoc(25);
+ ptr2->commit();
+ TEST_DO(testMinMax<VectorType>(ptr2, 1u));
+
+ ptr2->clearDoc(7);
+ ptr2->commit();
+ TEST_DO(testMinMax<VectorType>(ptr2, 2u));
+
+}
+
+void
+PostingListAttributeTest::testMinMax(void)
+{
+ {
+ Config cfg(Config(BasicType::INT32, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg);
+ testMinMax<IntegerAttribute>(ptr1, ptr2);
+ }
+ {
+ Config cfg(Config(BasicType::INT32, CollectionType::WSET));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 =
+ AttributeFactory::createAttribute("wsint32_1", cfg);
+ AttributePtr ptr2 =
+ AttributeFactory::createAttribute("wsint32_2", cfg);
+ testMinMax<IntegerAttribute>(ptr1, ptr2);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg);
+ testMinMax<StringAttribute>(ptr1, ptr2);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::WSET));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("wsstr_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("wsstr_2", cfg);
+ testMinMax<StringAttribute>(ptr1, ptr2);
+ }
+}
+
+
+void
+PostingListAttributeTest::testStringFold(void)
+{
+ Config cfg(Config(BasicType::STRING, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg);
+
+ addDocs(ptr1, 6);
+
+ StringAttribute &sa(asString(ptr1));
+
+ sa.update(1, "a");
+ sa.commit();
+ sa.update(3, "FOo");
+ sa.commit();
+ sa.update(4, "foo");
+ sa.commit();
+ sa.update(5, "z");
+ sa.commit();
+
+ EXPECT_TRUE(assertSearch("3,4", sa));
+
+ sa.update(2, "FOO");
+ sa.commit();
+
+ EXPECT_TRUE(assertSearch("2,3,4", sa));
+
+ sa.update(4, "");
+ sa.commit();
+
+ EXPECT_TRUE(assertSearch("2,3", sa));
+
+ sa.update(2, "");
+ sa.commit();
+
+ EXPECT_TRUE(assertSearch("3", sa));
+
+ sa.update(3, "");
+ sa.commit();
+
+ EXPECT_TRUE(assertSearch("", sa));
+}
+
+
+int
+PostingListAttributeTest::Main()
+{
+ TEST_INIT("postinglistattribute_test");
+
+ testPostingList();
+ testArithmeticValueUpdate();
+ testReload();
+ testMinMax();
+ testStringFold();
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::PostingListAttributeTest);
diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh
new file mode 100755
index 00000000000..e6f9c214cb9
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+$VALGRIND ./searchlib_postinglistattribute_test_app
+rm -rf *.dat
+rm -rf *.idx
+rm -rf *.weight
diff --git a/searchlib/src/tests/attribute/runnable.h b/searchlib/src/tests/attribute/runnable.h
new file mode 100644
index 00000000000..418230a2fc5
--- /dev/null
+++ b/searchlib/src/tests/attribute/runnable.h
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/util/sync.h>
+
+namespace search {
+
+class Runnable : public FastOS_Runnable
+{
+protected:
+ uint32_t _id;
+ vespalib::Monitor _cond;
+ bool _done;
+ bool _stopped;
+
+public:
+ Runnable(uint32_t id) :
+ _id(id), _cond(), _done(false), _stopped(false)
+ { }
+ void Run(FastOS_ThreadInterface *, void *) {
+ doRun();
+
+ vespalib::MonitorGuard guard(_cond);
+ _stopped = true;
+ guard.broadcast();
+ }
+ virtual void doRun() = 0;
+ void stop() {
+ vespalib::MonitorGuard guard(_cond);
+ _done = true;
+ }
+ void join() {
+ vespalib::MonitorGuard guard(_cond);
+ while (!_stopped) {
+ guard.wait();
+ }
+ }
+};
+
+} // search
+
diff --git a/searchlib/src/tests/attribute/searchable/.gitignore b/searchlib/src/tests/attribute/searchable/.gitignore
new file mode 100644
index 00000000000..663692907f6
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchable/.gitignore
@@ -0,0 +1,4 @@
+/my_logctl_file
+searchlib_attribute_blueprint_test_app
+searchlib_attribute_searchable_adapter_test_app
+searchlib_attribute_weighted_set_blueprint_test_app
diff --git a/searchlib/src/tests/attribute/searchable/CMakeLists.txt b/searchlib/src/tests/attribute/searchable/CMakeLists.txt
new file mode 100644
index 00000000000..ed76520af29
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchable/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_attribute_searchable_adapter_test_app
+ SOURCES
+ attribute_searchable_adapter_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attribute_searchable_adapter_test_app COMMAND sh attribute_searchable_adapter_test.sh)
+vespa_add_executable(searchlib_attribute_weighted_set_blueprint_test_app
+ SOURCES
+ attribute_weighted_set_blueprint_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attribute_weighted_set_blueprint_test_app COMMAND searchlib_attribute_weighted_set_blueprint_test_app)
+vespa_add_executable(searchlib_attribute_blueprint_test_app
+ SOURCES
+ attributeblueprint_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attribute_blueprint_test_app COMMAND searchlib_attribute_blueprint_test_app)
diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp
new file mode 100644
index 00000000000..1d69f516b52
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp
@@ -0,0 +1,689 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vespa/searchlib/attribute/attribute_blueprint_factory.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributecontext.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchlib/attribute/predicate_attribute.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.hpp>
+#include <vespa/searchlib/attribute/singlenumericpostattribute.hpp>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/query/tree/location.h>
+#include <vespa/searchlib/query/tree/point.h>
+#include <vespa/searchlib/query/tree/predicate_query_term.h>
+#include <vespa/searchlib/query/tree/rectangle.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/weight.h>
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <vespa/searchlib/queryeval/field_spec.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/wand/parallel_weak_and_search.h>
+#include <memory>
+
+using search::AttributeEnumGuard;
+using search::AttributeFactory;
+using search::AttributeGuard;
+using search::AttributeVector;
+using search::IAttributeManager;
+using search::IntegerAttribute;
+using search::SingleStringExtAttribute;
+using search::attribute::IAttributeContext;
+using search::fef::MatchData;
+using search::fef::MatchDataLayout;
+using search::fef::TermFieldHandle;
+using search::fef::TermFieldMatchData;
+using search::query::Location;
+using search::query::Node;
+using search::query::Point;
+using search::query::PredicateQueryTerm;
+using search::query::Rectangle;
+using search::query::SimpleDotProduct;
+using search::query::SimpleLocationTerm;
+using search::query::SimplePredicateQuery;
+using search::query::SimplePrefixTerm;
+using search::query::SimpleRangeTerm;
+using search::query::SimpleSuffixTerm;
+using search::query::SimpleSubstringTerm;
+using search::query::SimpleStringTerm;
+using search::query::SimpleWandTerm;
+using search::query::SimpleWeightedSetTerm;
+using search::query::Weight;
+using search::queryeval::Blueprint;
+using search::queryeval::FieldSpec;
+using search::queryeval::FakeRequestContext;
+using search::queryeval::MinMaxPostingInfo;
+using search::queryeval::ParallelWeakAndSearch;
+using search::queryeval::PostingInfo;
+using search::queryeval::SearchIterator;
+using std::vector;
+using vespalib::string;
+using namespace search::attribute;
+using namespace search;
+
+namespace {
+
+const string field = "field";
+const string other = "other";
+const int32_t weight = 1;
+const uint32_t num_docs = 1000;
+
+class MyAttributeManager : public IAttributeManager {
+ AttributeVector::SP _attribute_vector;
+ AttributeVector::SP _other;
+
+public:
+ explicit MyAttributeManager(AttributeVector *attr)
+ : _attribute_vector(attr), _other() {}
+
+ explicit MyAttributeManager(AttributeVector::SP attr)
+ : _attribute_vector(attr), _other() {}
+
+ void set_other(AttributeVector::SP attr) {
+ _other = attr;
+ }
+
+ virtual AttributeGuard::UP getAttribute(const string &name) const {
+ if (name == field) {
+ return AttributeGuard::UP(new AttributeGuard(_attribute_vector));
+ } else if (name == other) {
+ return AttributeGuard::UP(new AttributeGuard(_other));
+ } else {
+ return AttributeGuard::UP(nullptr);
+ }
+ }
+
+ virtual AttributeGuard::UP
+ getAttributeStableEnum(const string &name) const {
+ if (name == field) {
+ return AttributeGuard::UP(new AttributeEnumGuard(_attribute_vector));
+ } else if (name == other) {
+ return AttributeGuard::UP(new AttributeEnumGuard(_other));
+ } else {
+ return AttributeGuard::UP(nullptr);
+ }
+ }
+
+ virtual void getAttributeList(vector<AttributeGuard> &) const {
+ assert(!"Not implemented");
+ }
+ virtual IAttributeContext::UP createContext() const {
+ assert(!"Not implemented");
+ return IAttributeContext::UP();
+ }
+};
+
+struct Result {
+ struct Hit {
+ uint32_t docid;
+ double raw_score;
+ int32_t match_weight;
+ Hit(uint32_t id, double raw, int32_t match_weight_in)
+ : docid(id), raw_score(raw), match_weight(match_weight_in) {}
+ };
+ size_t est_hits;
+ bool est_empty;
+ bool has_minmax;
+ int32_t min_weight;
+ int32_t max_weight;
+ size_t wand_hits;
+ int64_t wand_initial_threshold;
+ double wand_boost_factor;
+ std::vector<Hit> hits;
+ vespalib::string iterator_dump;
+
+ Result(size_t est_hits_in, bool est_empty_in)
+ : est_hits(est_hits_in), est_empty(est_empty_in),
+ has_minmax(false), min_weight(0), max_weight(0),
+ wand_hits(0), wand_initial_threshold(0), wand_boost_factor(0.0),
+ hits(), iterator_dump() {}
+
+ void set_minmax(int32_t min, int32_t max) {
+ has_minmax = true;
+ min_weight = min;
+ max_weight = max;
+ }
+};
+
+void extract_posting_info(Result &result, const PostingInfo *postingInfo) {
+ if (postingInfo != NULL) {
+ const MinMaxPostingInfo *minMax = dynamic_cast<const MinMaxPostingInfo *>(postingInfo);
+ if (minMax != NULL) {
+ result.set_minmax(minMax->getMinWeight(), minMax->getMaxWeight());
+ }
+ }
+}
+
+void extract_wand_params(Result &result, ParallelWeakAndSearch *wand) {
+ if (wand != nullptr) {
+ result.wand_hits = wand->getMatchParams().scores.getScoresToTrack();
+ result.wand_initial_threshold = wand->getMatchParams().scoreThreshold;
+ result.wand_boost_factor = wand->getMatchParams().thresholdBoostFactor;
+ }
+}
+
+Result do_search(IAttributeManager &attribute_manager, const Node &node, bool strict) {
+ uint32_t fieldId = 0;
+ AttributeContext ac(attribute_manager);
+ FakeRequestContext requestContext(&ac);
+ AttributeBlueprintFactory source;
+ MatchDataLayout mdl;
+ TermFieldHandle handle = mdl.allocTermField(fieldId);
+ MatchData::UP match_data = mdl.createMatchData();
+ Blueprint::UP bp = source.createBlueprint(requestContext, FieldSpec(field, fieldId, handle), node);
+ ASSERT_TRUE(bp.get() != nullptr);
+ Result result(bp->getState().estimate().estHits, bp->getState().estimate().empty);
+ bp->fetchPostings(strict);
+ SearchIterator::UP iterator = bp->createSearch(*match_data, strict);
+ ASSERT_TRUE(iterator.get() != nullptr);
+ iterator->initFullRange();
+ extract_posting_info(result, iterator->getPostingInfo());
+ extract_wand_params(result, dynamic_cast<ParallelWeakAndSearch*>(iterator.get()));
+ result.iterator_dump = iterator->asString();
+ for (uint32_t docid = 1; docid < num_docs; ++docid) {
+ if (iterator->seek(docid)) {
+ iterator->unpack(docid);
+ result.hits.emplace_back(docid,
+ match_data->resolveTermField(handle)->getRawScore(),
+ match_data->resolveTermField(handle)->getWeight());
+ }
+ }
+ return result;
+}
+
+bool search(const Node &node, IAttributeManager &attribute_manager,
+ bool fast_search = false, bool strict = true)
+{
+ Result result = do_search(attribute_manager, node, strict);
+ if (fast_search) {
+ EXPECT_LESS(result.est_hits, num_docs / 10);
+ } else {
+ EXPECT_TRUE(!result.est_empty);
+ EXPECT_EQUAL(num_docs, result.est_hits);
+ }
+ return (result.hits.size() == 1) && (result.hits[0].docid == (num_docs - 1));
+}
+
+bool search(const string &term, IAttributeManager &attribute_manager,
+ bool fast_search = false, bool strict = true)
+{
+ TEST_STATE(term.c_str());
+ SimpleStringTerm node(term, "field", 0, Weight(0));
+ return search(node, attribute_manager, fast_search, strict);
+}
+
+template <typename T> struct AttributeVectorTypeFinder {
+ //typedef search::SingleValueStringAttribute Type;
+ typedef SingleStringExtAttribute Type;
+ static void add(Type & a, const T & v) { a.add(v, weight); }
+};
+template <> struct AttributeVectorTypeFinder<int64_t> {
+ typedef search::SingleValueNumericAttribute<search::IntegerAttributeTemplate<int64_t> > Type;
+ static void add(Type & a, int64_t v) { a.set(a.getNumDocs()-1, v); a.commit(); }
+};
+
+void add_docs(AttributeVector *attr, size_t n) {
+ AttributeVector::DocId docid;
+ for (size_t i = 0; i < n; ++i) {
+ attr->addDoc(docid);
+ if (attr->inherits(PredicateAttribute::classId)) {
+ const_cast<uint8_t *>(static_cast<PredicateAttribute *>(attr)->getMinFeatureVector().first)[docid] = 0;
+ }
+ }
+ ASSERT_EQUAL(n - 1, docid);
+}
+
+template <typename T>
+MyAttributeManager makeAttributeManager(T value) {
+ typedef AttributeVectorTypeFinder<T> AT;
+ typedef typename AT::Type AttributeVectorType;
+ AttributeVectorType *attr = new AttributeVectorType(field);
+ add_docs(attr, num_docs);
+ AT::add(*attr, value);
+ MyAttributeManager attribute_manager(attr);
+ return attribute_manager;
+}
+
+MyAttributeManager makeFastSearchLongAttributeManager(int64_t value) {
+ Config cfg(BasicType::INT64, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributeVector::SP attr_ptr = AttributeFactory::createAttribute(field, cfg);
+ IntegerAttribute *attr = static_cast<IntegerAttribute *>(attr_ptr.get());
+ add_docs(attr, num_docs);
+ attr->update(num_docs - 1, value);
+ attr->commit();
+ MyAttributeManager attribute_manager(attr_ptr);
+ return attribute_manager;
+}
+
+TEST("requireThatIteratorsCanBeCreated") {
+ MyAttributeManager attribute_manager = makeAttributeManager("foo");
+
+ EXPECT_TRUE(search("foo", attribute_manager));
+}
+
+TEST("requireThatRangeTermsWorkToo") {
+ MyAttributeManager attribute_manager = makeAttributeManager(int64_t(42));
+
+ EXPECT_TRUE(search("[23;46]", attribute_manager));
+ EXPECT_TRUE(!search("[10;23]", attribute_manager));
+ EXPECT_TRUE(!search(">43", attribute_manager));
+ EXPECT_TRUE(search("[10;]", attribute_manager));
+}
+
+TEST("requireThatPrefixTermsWork") {
+ MyAttributeManager attribute_manager = makeAttributeManager("foo");
+
+ SimplePrefixTerm node("fo", "field", 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager));
+}
+
+TEST("requireThatLocationTermsWork") {
+ // 0xcc is z-curve for (10, 10).
+ MyAttributeManager attribute_manager = makeAttributeManager(int64_t(0xcc));
+
+ SimpleLocationTerm node(Location(Point(10, 10), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(100, 100), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(13, 13), 4, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(10, 13), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager));
+}
+
+TEST("requireThatOptimizedLocationTermsWork") {
+ // 0xcc is z-curve for (10, 10).
+ MyAttributeManager attribute_manager = makeFastSearchLongAttributeManager(int64_t(0xcc));
+
+ SimpleLocationTerm node(Location(Point(10, 10), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager, true));
+ node = SimpleLocationTerm(Location(Point(100, 100), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager, true));
+ node = SimpleLocationTerm(Location(Point(13, 13), 4, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager, true));
+ node = SimpleLocationTerm(Location(Point(10, 13), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager, true));
+}
+
+TEST("require that optimized location search works with wrapped bounding box (no hits)") {
+ // 0xcc is z-curve for (10, 10).
+ MyAttributeManager attribute_manager = makeFastSearchLongAttributeManager(int64_t(0xcc));
+ SimpleLocationTerm term1(Location(Rectangle(5, 5, 15, 15)), field, 0, Weight(0)); // unwrapped
+ SimpleLocationTerm term2(Location(Rectangle(15, 5, 5, 15)), field, 0, Weight(0)); // wrapped x
+ SimpleLocationTerm term3(Location(Rectangle(5, 15, 15, 5)), field, 0, Weight(0)); // wrapped y
+ Result result1 = do_search(attribute_manager, term1, true);
+ Result result2 = do_search(attribute_manager, term2, true);
+ Result result3 = do_search(attribute_manager, term3, true);
+ EXPECT_EQUAL(1u, result1.hits.size());
+ EXPECT_EQUAL(0u, result2.hits.size());
+ EXPECT_EQUAL(0u, result3.hits.size());
+ EXPECT_TRUE(result1.iterator_dump.find("LocationPreFilterIterator") != vespalib::string::npos);
+ EXPECT_TRUE(result2.iterator_dump.find("EmptySearch") != vespalib::string::npos);
+ EXPECT_TRUE(result3.iterator_dump.find("EmptySearch") != vespalib::string::npos);
+}
+
+void set_weights(StringAttribute *attr, uint32_t docid,
+ int32_t foo_weight, int32_t bar_weight, int32_t baz_weight)
+{
+ attr->clearDoc(docid);
+ if (foo_weight > 0) attr->append(docid, "foo", foo_weight);
+ if (bar_weight > 0) attr->append(docid, "bar", bar_weight);
+ if (baz_weight > 0) attr->append(docid, "baz", baz_weight);
+ attr->commit();
+}
+
+MyAttributeManager make_weighted_string_attribute_manager(bool fast_search) {
+ Config cfg(BasicType::STRING, CollectionType::WSET);
+ cfg.setFastSearch(fast_search);
+ AttributeVector::SP attr_ptr = AttributeFactory::createAttribute(field, cfg);
+ StringAttribute *attr = static_cast<StringAttribute *>(attr_ptr.get());
+ add_docs(attr, num_docs);
+ set_weights(attr, 10, 0, 200, 0);
+ set_weights(attr, 20, 100, 200, 300);
+ set_weights(attr, 30, 0, 0, 300);
+ set_weights(attr, 40, 100, 0, 0);
+ set_weights(attr, 50, 1000, 0, 300);
+ MyAttributeManager attribute_manager(attr_ptr);
+ return attribute_manager;
+}
+
+TEST("require that attribute dot product works") {
+ for (int i = 0; i <= 0x3; ++i) {
+ bool fast_search = ((i & 0x1) != 0);
+ bool strict = ((i & 0x2) != 0);
+ MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search);
+ SimpleDotProduct node(field, 0, Weight(1));
+ node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(1))));
+ Result result = do_search(attribute_manager, node, strict);
+ ASSERT_EQUAL(5u, result.hits.size());
+ if (fast_search) {
+ EXPECT_EQUAL(8u, result.est_hits);
+ } else {
+ // 'fox' is detected to produce no hits since it has no enum value
+ EXPECT_EQUAL(num_docs * 3, result.est_hits);
+ }
+ EXPECT_FALSE(result.est_empty);
+ EXPECT_EQUAL(10u, result.hits[0].docid);
+ EXPECT_EQUAL(200.0, result.hits[0].raw_score);
+ EXPECT_EQUAL(20u, result.hits[1].docid);
+ EXPECT_EQUAL(600.0, result.hits[1].raw_score);
+ EXPECT_EQUAL(30u, result.hits[2].docid);
+ EXPECT_EQUAL(300.0, result.hits[2].raw_score);
+ EXPECT_EQUAL(40u, result.hits[3].docid);
+ EXPECT_EQUAL(100.0, result.hits[3].raw_score);
+ EXPECT_EQUAL(50u, result.hits[4].docid);
+ EXPECT_EQUAL(1300.0, result.hits[4].raw_score);
+ }
+}
+
+TEST("require that attribute dot product can produce no hits") {
+ for (int i = 0; i <= 0x3; ++i) {
+ bool fast_search = ((i & 0x1) != 0);
+ bool strict = ((i & 0x2) != 0);
+ MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search);
+ SimpleDotProduct node(field, 0, Weight(1));
+ node.append(Node::UP(new SimpleStringTerm("notfoo", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("notbar", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("notbaz", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("notfox", "", 0, Weight(1))));
+ Result result = do_search(attribute_manager, node, strict);
+ ASSERT_EQUAL(0u, result.hits.size());
+ EXPECT_EQUAL(0u, result.est_hits);
+ EXPECT_TRUE(result.est_empty);
+ }
+}
+
+TEST("require that direct attribute iterators work") {
+ for (int i = 0; i <= 0x3; ++i) {
+ bool fast_search = ((i & 0x1) != 0);
+ bool strict = ((i & 0x2) != 0);
+ MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search);
+ SimpleStringTerm empty_node("notfoo", "", 0, Weight(1));
+ Result empty_result = do_search(attribute_manager, empty_node, strict);
+ EXPECT_EQUAL(0u, empty_result.hits.size());
+ SimpleStringTerm node("foo", "", 0, Weight(1));
+ Result result = do_search(attribute_manager, node, strict);
+ if (fast_search) {
+ EXPECT_EQUAL(3u, result.est_hits);
+ EXPECT_TRUE(result.has_minmax);
+ EXPECT_EQUAL(100, result.min_weight);
+ EXPECT_EQUAL(1000, result.max_weight);
+ EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator")
+ != vespalib::string::npos);
+ } else {
+ EXPECT_EQUAL(num_docs, result.est_hits);
+ EXPECT_FALSE(result.has_minmax);
+ EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator")
+ == vespalib::string::npos);
+ }
+ ASSERT_EQUAL(3u, result.hits.size());
+ EXPECT_FALSE(result.est_empty);
+ EXPECT_EQUAL(20u, result.hits[0].docid);
+ EXPECT_EQUAL(40u, result.hits[1].docid);
+ EXPECT_EQUAL(50u, result.hits[2].docid);
+ }
+}
+
+const char *as_str(bool flag) { return flag? "true" : "false"; }
+
+TEST("require that attribute parallel wand works") {
+ for (int i = 0; i <= 0x3; ++i) {
+ bool fast_search = ((i & 0x1) != 0);
+ bool strict = ((i & 0x2) != 0);
+ MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search);
+ SimpleWandTerm node(field, 0, Weight(1), 10, 500, 1.5);
+ node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(1))));
+ Result result = do_search(attribute_manager, node, strict);
+ EXPECT_FALSE(result.est_empty);
+ if (fast_search) {
+ EXPECT_EQUAL(8u, result.est_hits);
+ } else {
+ // 'fox' is detected to produce no hits since it has no enum value
+ EXPECT_EQUAL(num_docs * 3, result.est_hits);
+ }
+ if (EXPECT_EQUAL(2u, result.hits.size())) {
+ if (result.iterator_dump.find("MonitoringDumpIterator") == vespalib::string::npos) {
+ EXPECT_EQUAL(10u, result.wand_hits);
+ EXPECT_EQUAL(500, result.wand_initial_threshold);
+ EXPECT_EQUAL(1.5, result.wand_boost_factor);
+ }
+ EXPECT_EQUAL(20u, result.hits[0].docid);
+ EXPECT_EQUAL(600.0, result.hits[0].raw_score);
+ EXPECT_EQUAL(50u, result.hits[1].docid);
+ EXPECT_EQUAL(1300.0, result.hits[1].raw_score);
+ } else {
+ fprintf(stderr, " (fast_search: %s, strict: %s)\n",
+ as_str(fast_search), as_str(strict));
+ assert(false);
+ }
+ }
+}
+
+TEST("require that attribute weighted set term works") {
+ for (int i = 0; i <= 0x3; ++i) {
+ bool fast_search = ((i & 0x1) != 0);
+ bool strict = ((i & 0x2) != 0);
+ MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search);
+ SimpleWeightedSetTerm node(field, 0, Weight(1));
+ node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(10))));
+ node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(20))));
+ node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(30))));
+ node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(40))));
+ Result result = do_search(attribute_manager, node, strict);
+ EXPECT_FALSE(result.est_empty);
+ ASSERT_EQUAL(5u, result.hits.size());
+ if (fast_search && result.iterator_dump.find("MonitoringDumpIterator") == vespalib::string::npos) {
+ fprintf(stderr, "DUMP: %s\n", result.iterator_dump.c_str());
+ EXPECT_TRUE(result.iterator_dump.find("AttributeIteratorPack") != vespalib::string::npos);
+ }
+ EXPECT_EQUAL(10u, result.hits[0].docid);
+ EXPECT_EQUAL(20, result.hits[0].match_weight);
+ EXPECT_EQUAL(20u, result.hits[1].docid);
+ EXPECT_EQUAL(30, result.hits[1].match_weight);
+ EXPECT_EQUAL(30u, result.hits[2].docid);
+ EXPECT_EQUAL(30, result.hits[2].match_weight);
+ EXPECT_EQUAL(40u, result.hits[3].docid);
+ EXPECT_EQUAL(10, result.hits[3].match_weight);
+ EXPECT_EQUAL(50u, result.hits[4].docid);
+ EXPECT_EQUAL(30, result.hits[4].match_weight);
+ }
+}
+
+TEST("require that predicate query in non-predicate field yields empty.") {
+ MyAttributeManager attribute_manager = makeAttributeManager("foo");
+
+ PredicateQueryTerm::UP term(new PredicateQueryTerm);
+ SimplePredicateQuery node(std::move(term), field, 0, Weight(1));
+ Result result = do_search(attribute_manager, node, true);
+ EXPECT_TRUE(result.est_empty);
+ EXPECT_EQUAL(0u, result.hits.size());
+}
+
+TEST("require that predicate query in predicate field yields results.") {
+ PredicateAttribute *attr =
+ new PredicateAttribute(
+ field, Config(BasicType::PREDICATE,
+ CollectionType::SINGLE));
+ add_docs(attr, num_docs);
+ attr->getIndex().indexEmptyDocument(2); // matches anything
+ attr->getIndex().commit();
+ const_cast<PredicateAttribute::IntervalRange *>(attr->getIntervalRangeVector())[2] = 1u;
+ MyAttributeManager attribute_manager(attr);
+
+ PredicateQueryTerm::UP term(new PredicateQueryTerm);
+ SimplePredicateQuery node(std::move(term), field, 0, Weight(1));
+ Result result = do_search(attribute_manager, node, true);
+ EXPECT_FALSE(result.est_empty);
+ EXPECT_EQUAL(1u, result.hits.size());
+}
+
+TEST("require that substring terms work") {
+ MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(true);
+ SimpleSubstringTerm node("a", "", 0, Weight(1));
+ Result result = do_search(attribute_manager, node, true);
+ ASSERT_EQUAL(4u, result.hits.size());
+ EXPECT_EQUAL(10u, result.hits[0].docid);
+ EXPECT_EQUAL(20u, result.hits[1].docid);
+ EXPECT_EQUAL(30u, result.hits[2].docid);
+ EXPECT_EQUAL(50u, result.hits[3].docid);
+}
+
+TEST("require that suffix terms work") {
+ MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(true);
+ SimpleSuffixTerm node("oo", "", 0, Weight(1));
+ Result result = do_search(attribute_manager, node, true);
+ ASSERT_EQUAL(3u, result.hits.size());
+ EXPECT_EQUAL(20u, result.hits[0].docid);
+ EXPECT_EQUAL(40u, result.hits[1].docid);
+ EXPECT_EQUAL(50u, result.hits[2].docid);
+}
+
+void set_attr_value(AttributeVector &attr, uint32_t docid, size_t value) {
+ IntegerAttribute *int_attr = dynamic_cast<IntegerAttribute *>(&attr);
+ FloatingPointAttribute *float_attr = dynamic_cast<FloatingPointAttribute *>(&attr);
+ StringAttribute *string_attr = dynamic_cast<StringAttribute *>(&attr);
+ if (int_attr != nullptr) {
+ int_attr->update(docid, value);
+ int_attr->commit();
+ } else if (float_attr != nullptr) {
+ float_attr->update(docid, value);
+ float_attr->commit();
+ } else if (string_attr != nullptr) {
+ ASSERT_LESS(value, size_t(27*26 + 26));
+ vespalib::string str;
+ str.push_back('a' + value / 27);
+ str.push_back('a' + value % 27);
+ string_attr->update(docid, str);
+ string_attr->commit();
+ } else {
+ ASSERT_TRUE(false);
+ }
+}
+
+MyAttributeManager make_diversity_setup(BasicType::Type field_type,
+ bool field_fast_search,
+ BasicType::Type other_type,
+ bool other_fast_search)
+{
+ Config field_cfg(field_type, CollectionType::SINGLE);
+ field_cfg.setFastSearch(field_fast_search);
+ AttributeVector::SP field_attr = AttributeFactory::createAttribute(field, field_cfg);
+ Config other_cfg(other_type, CollectionType::SINGLE);
+ other_cfg.setFastSearch(other_fast_search);
+ AttributeVector::SP other_attr = AttributeFactory::createAttribute(other, other_cfg);
+ add_docs(&*field_attr, num_docs);
+ add_docs(&*other_attr, num_docs);
+ for (size_t i = 1; i < num_docs; ++i) {
+ set_attr_value(*field_attr, i, i / 5);
+ set_attr_value(*other_attr, i, i / 10);
+ }
+ MyAttributeManager attribute_manager(field_attr);
+ attribute_manager.set_other(other_attr);
+ return attribute_manager;
+}
+
+size_t diversity_hits(IAttributeManager &manager, const vespalib::string &term, bool strict) {
+ SimpleRangeTerm node(term, "", 0, Weight(1));
+ Result result = do_search(manager, node, strict);
+ return result.hits.size();
+}
+
+std::pair<size_t,size_t> diversity_docid_range(IAttributeManager &manager, const vespalib::string &term, bool strict) {
+ SimpleRangeTerm node(term, "", 0, Weight(1));
+ Result result = do_search(manager, node, strict);
+ std::pair<size_t, size_t> range(0, 0);
+ for (const Result::Hit &hit: result.hits) {
+ if (range.first == 0) {
+ range.first = hit.docid;
+ range.second = hit.docid;
+ } else {
+ EXPECT_GREATER(size_t(hit.docid), range.second);
+ range.second = hit.docid;
+ }
+ }
+ return range;
+}
+
+TEST("require that diversity range searches work for various types") {
+ for (auto field_type: std::vector<BasicType::Type>({BasicType::INT32, BasicType::DOUBLE})) {
+ for (auto other_type: std::vector<BasicType::Type>({BasicType::INT16, BasicType::INT32, BasicType::INT64,
+ BasicType::FLOAT, BasicType::DOUBLE, BasicType::STRING}))
+ {
+ for (bool other_fast_search: std::vector<bool>({true, false})) {
+ MyAttributeManager manager = make_diversity_setup(field_type, true, other_type, other_fast_search);
+ for (bool strict: std::vector<bool>({true, false})) {
+ TEST_STATE(vespalib::make_string("field_type: %s, other_type: %s, other_fast_search: %s, strict: %s",
+ BasicType(field_type).asString(), BasicType(other_type).asString(),
+ other_fast_search ? "true" : "false", strict ? "true" : "false").c_str());
+ EXPECT_EQUAL(999u, diversity_hits(manager, "[;;1000;other;10]", strict));
+ EXPECT_EQUAL(999u, diversity_hits(manager, "[;;-1000;other;10]", strict));
+ EXPECT_EQUAL(100u, diversity_hits(manager, "[;;1000;other;1]", strict));
+ EXPECT_EQUAL(100u, diversity_hits(manager, "[;;-1000;other;1]", strict));
+ EXPECT_EQUAL(300u, diversity_hits(manager, "[;;1000;other;3]", strict));
+ EXPECT_EQUAL(300u, diversity_hits(manager, "[;;-1000;other;3]", strict));
+ EXPECT_EQUAL(10u, diversity_hits(manager, "[;;10;other;3]", strict));
+ EXPECT_EQUAL(10u, diversity_hits(manager, "[;;-10;other;3]", strict));
+ EXPECT_EQUAL(1u, diversity_docid_range(manager, "[;;10;other;3]", strict).first);
+ EXPECT_EQUAL(30u, diversity_docid_range(manager, "[;;10;other;3]", strict).second);
+ EXPECT_EQUAL(965u, diversity_docid_range(manager, "[;;-10;other;3]", strict).first);
+ EXPECT_EQUAL(997u, diversity_docid_range(manager, "[;;-10;other;3]", strict).second);
+ }
+ }
+ }
+ }
+}
+
+TEST("require that diversity also works for a single unique value") {
+ MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true);
+ EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;100;other;2]", true));
+ EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;-100;other;2]", true));
+ EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;100;other;2]", false));
+ EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;-100;other;2]", false));
+}
+
+TEST("require that diversity range searches gives empty results for non-existing diversity attributes") {
+ MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true);
+ EXPECT_EQUAL(0u, diversity_hits(manager, "[;;1000;bogus;10]", true));
+ EXPECT_EQUAL(0u, diversity_hits(manager, "[;;-1000;bogus;10]", true));
+ EXPECT_EQUAL(0u, diversity_hits(manager, "[;;1000;;10]", true));
+ EXPECT_EQUAL(0u, diversity_hits(manager, "[;;-1000;;10]", true));
+}
+
+TEST("require that loose diversity gives enough diversity and hits while doing less work") {
+ MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true);
+ EXPECT_EQUAL(999u, diversity_hits(manager, "[;;1000;other;10;4;loose]", true));
+ EXPECT_EQUAL(1u, diversity_docid_range(manager, "[;;10;other;3;2;loose]", true).first);
+ EXPECT_EQUAL(16u, diversity_docid_range(manager, "[;;10;other;3;2;loose]", true).second);
+}
+
+TEST("require that strict diversity gives enough diversity and hits while doing less work, even though more than loose, but more correct than loose") {
+ MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true);
+ EXPECT_EQUAL(999u, diversity_hits(manager, "[;;-1000;other;10;4;strict]", true));
+ EXPECT_EQUAL(1u, diversity_docid_range(manager, "[;;10;other;3;2;strict]", true).first);
+ EXPECT_EQUAL(23u, diversity_docid_range(manager, "[;;10;other;3;2;strict]", true).second);
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh
new file mode 100755
index 00000000000..9fcee4b1ebb
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+$VALGRIND ./searchlib_attribute_searchable_adapter_test_sh
+rm -f ./my_logctl_file
+VESPA_LOG_CONTROL_FILE=./my_logctl_file VESPA_LOG_LEVEL=all $VALGRIND ./searchlib_attribute_searchable_adapter_test_app
diff --git a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp
new file mode 100644
index 00000000000..bd781a37a5b
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp
@@ -0,0 +1,231 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/searchlib/attribute/attribute_blueprint_factory.h>
+#include <vespa/searchlib/attribute/attribute_weighted_set_blueprint.h>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchlib/attribute/attributecontext.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/queryeval/field_spec.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/fake_result.h>
+#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <memory>
+#include <string>
+#include <map>
+
+#include <vespa/searchlib/attribute/enumstore.hpp>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+
+using namespace search;
+using namespace search::query;
+using namespace search::fef;
+using namespace search::queryeval;
+using namespace search::attribute;
+
+namespace {
+
+class FakeAttributeManager : public IAttributeManager
+{
+private:
+ typedef std::map<std::string, AttributeVector::SP> Map;
+ Map _map;
+
+ AttributeVector::SP lookup(const std::string &name) const {
+ Map::const_iterator pos = _map.find(name);
+ if (pos == _map.end()) {
+ return AttributeVector::SP();
+ }
+ return pos->second;
+ }
+
+public:
+ FakeAttributeManager() : _map() {}
+
+ void addAttribute(AttributeVector::SP attr) {
+ _map[attr->getName()] = attr;
+ }
+
+ virtual AttributeGuard::UP getAttribute(const vespalib::string &name) const {
+ return AttributeGuard::UP(new AttributeGuard(lookup(name)));
+ }
+
+ virtual AttributeGuard::UP getAttributeStableEnum(const vespalib::string &name) const {
+ return AttributeGuard::UP(new AttributeEnumGuard(lookup(name)));
+ }
+
+ virtual void getAttributeList(std::vector<AttributeGuard> &list) const {
+ Map::const_iterator pos = _map.begin();
+ for (; pos != _map.end(); ++pos) {
+ list.push_back(pos->second);
+ }
+ }
+
+ virtual IAttributeContext::UP createContext() const {
+ return IAttributeContext::UP(new AttributeContext(*this));
+ }
+};
+
+void
+setupAttributeManager(FakeAttributeManager &manager)
+{
+ AttributeVector::DocId docId;
+ {
+ AttributeVector::SP attr_sp = AttributeFactory::createAttribute(
+ "integer", Config(BasicType("int64")));
+ IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get());
+ attr->addDoc(docId);
+ assert(0u == docId);
+ for (size_t i = 1; i < 10; ++i) {
+ attr->addDoc(docId);
+ assert(i == docId);
+ attr->update(docId, i);
+ attr->commit();
+ }
+ manager.addAttribute(attr_sp);
+ }
+ {
+ AttributeVector::SP attr_sp = AttributeFactory::createAttribute(
+ "string", Config(BasicType("string")));
+ StringAttribute *attr = (StringAttribute*)(attr_sp.get());
+ attr->addDoc(docId);
+ assert(0u == docId);
+ for (size_t i = 1; i < 10; ++i) {
+ attr->addDoc(docId);
+ assert(i == docId);
+ attr->update(i, std::string(1, '1' + i - 1).c_str());
+ attr->commit();
+ }
+ manager.addAttribute(attr_sp);
+ }
+ {
+ AttributeVector::SP attr_sp = AttributeFactory::createAttribute(
+ "multi", Config(BasicType("int64"), search::attribute::CollectionType("array")));
+ IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get());
+ attr->addDoc(docId);
+ assert(0u == docId);
+ for (size_t i = 1; i < 10; ++i) {
+ attr->addDoc(docId);
+ assert(i == docId);
+ attr->append(docId, i, 0);
+ attr->append(docId, i + 10, 1);
+ attr->commit();
+ }
+ manager.addAttribute(attr_sp);
+ }
+}
+
+struct WS {
+ static const uint32_t fieldId = 42;
+ IAttributeManager & attribute_manager;
+ MatchDataLayout layout;
+ TermFieldHandle handle;
+ std::vector<std::pair<std::string, uint32_t> > tokens;
+
+ WS(IAttributeManager & manager) : attribute_manager(manager), layout(), handle(layout.allocTermField(fieldId)), tokens() {
+ MatchData::UP tmp = layout.createMatchData();
+ ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId);
+ }
+
+ WS &add(const std::string &token, uint32_t weight) {
+ tokens.push_back(std::make_pair(token, weight));
+ return *this;
+ }
+
+ Node::UP createNode() const {
+ SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm("view", 0, Weight(0));
+ for (size_t i = 0; i < tokens.size(); ++i) {
+ node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second))));
+ }
+ return Node::UP(node);
+ }
+
+ bool isGenericSearch(Searchable &searchable, const std::string &field, bool strict) const {
+ AttributeContext ac(attribute_manager);
+ FakeRequestContext requestContext(&ac);
+ MatchData::UP md = layout.createMatchData();
+ Node::UP node = createNode();
+ FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle));
+ queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node);
+ bp->fetchPostings(strict);
+ SearchIterator::UP sb = bp->createSearch(*md, strict);
+ return (dynamic_cast<WeightedSetTermSearch*>(sb.get()) != 0);
+ }
+
+ FakeResult search(Searchable &searchable, const std::string &field, bool strict) const {
+ AttributeContext ac(attribute_manager);
+ FakeRequestContext requestContext(&ac);
+ MatchData::UP md = layout.createMatchData();
+ Node::UP node = createNode();
+ FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle));
+ queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node);
+ bp->fetchPostings(strict);
+ SearchIterator::UP sb = bp->createSearch(*md, strict);
+ FakeResult result;
+ sb->initFullRange();
+ for (uint32_t docId = 1; docId < 10; ++docId) {
+ if (sb->seek(docId)) {
+ sb->unpack(docId);
+ result.doc(docId);
+ TermFieldMatchData &data = *md->resolveTermField(handle);
+ FieldPositionsIterator itr = data.getIterator();
+ for (; itr.valid(); itr.next()) {
+ result.elem(itr.getElementId());
+ result.weight(itr.getElementWeight());
+ result.pos(itr.getPosition());
+ }
+ }
+ }
+ return result;
+ }
+};
+
+} // namespace <unnamed>
+
+class Test : public vespalib::TestApp
+{
+public:
+ int Main();
+};
+
+int
+Test::Main()
+{
+ TEST_INIT("attribute_weighted_set_test");
+ {
+ FakeAttributeManager manager;
+ setupAttributeManager(manager);
+ AttributeBlueprintFactory adapter;
+
+ FakeResult expect = FakeResult()
+ .doc(3).elem(0).weight(30).pos(0)
+ .doc(5).elem(0).weight(50).pos(0)
+ .doc(7).elem(0).weight(70).pos(0);
+ WS ws = WS(manager).add("7", 70).add("5", 50).add("3", 30);
+
+ EXPECT_TRUE(ws.isGenericSearch(adapter, "integer", true));
+ EXPECT_TRUE(!ws.isGenericSearch(adapter, "integer", false));
+ EXPECT_TRUE(ws.isGenericSearch(adapter, "string", true));
+ EXPECT_TRUE(!ws.isGenericSearch(adapter, "string", false));
+ EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", true));
+ EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", false));
+
+ EXPECT_EQUAL(expect, ws.search(adapter, "integer", true));
+ EXPECT_EQUAL(expect, ws.search(adapter, "integer", false));
+ EXPECT_EQUAL(expect, ws.search(adapter, "string", true));
+ EXPECT_EQUAL(expect, ws.search(adapter, "string", false));
+ EXPECT_EQUAL(expect, ws.search(adapter, "multi", true));
+ EXPECT_EQUAL(expect, ws.search(adapter, "multi", false));
+ }
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp
new file mode 100644
index 00000000000..ed851d872e1
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp
@@ -0,0 +1,240 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("attributeblueprint_test");
+
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vespa/searchlib/attribute/attribute_blueprint_factory.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributecontext.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.hpp>
+#include <vespa/searchlib/attribute/singlenumericpostattribute.hpp>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/query/tree/location.h>
+#include <vespa/searchlib/query/tree/point.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/weight.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <memory>
+#include <string>
+
+using search::AttributeEnumGuard;
+using search::AttributeGuard;
+using search::AttributeVector;
+using search::IAttributeManager;
+using search::SingleStringExtAttribute;
+using search::attribute::IAttributeContext;
+using search::fef::MatchData;
+using search::fef::TermFieldMatchData;
+using search::query::Location;
+using search::query::Node;
+using search::query::Point;
+using search::query::SimpleLocationTerm;
+using search::query::SimplePrefixTerm;
+using search::query::SimpleStringTerm;
+using search::query::Weight;
+using search::queryeval::Blueprint;
+using search::queryeval::FieldSpec;
+using search::queryeval::SearchIterator;
+using search::queryeval::FakeRequestContext;
+using std::string;
+using std::vector;
+using namespace search::attribute;
+using namespace search;
+
+namespace {
+
+class Test : public vespalib::TestApp {
+ void requireThatIteratorsCanBeCreated();
+ void requireThatRangeTermsWorkToo();
+ void requireThatPrefixTermsWork();
+ void requireThatLocationTermsWork();
+ void requireThatFastSearchLocationTermsWork();
+
+ bool search(const string &term, IAttributeManager &attribute_manager);
+ bool search(const Node &term, IAttributeManager &attribute_manager);
+
+public:
+ int Main();
+};
+
+int
+Test::Main()
+{
+ TEST_INIT("attributeblueprint_test");
+
+ TEST_DO(requireThatIteratorsCanBeCreated());
+ TEST_DO(requireThatRangeTermsWorkToo());
+ TEST_DO(requireThatPrefixTermsWork());
+ TEST_DO(requireThatLocationTermsWork());
+ TEST_DO(requireThatFastSearchLocationTermsWork());
+
+ TEST_DONE();
+}
+
+const string field = "field";
+const int32_t weight = 1;
+
+class MyAttributeManager : public IAttributeManager {
+ AttributeVector::SP _attribute_vector;
+ AttributeVector::DocId _docid;
+
+public:
+ MyAttributeManager(AttributeVector *attr)
+ : _attribute_vector(attr) {}
+
+ virtual AttributeGuard::UP getAttribute(const string &) const {
+ return AttributeGuard::UP(new AttributeGuard(_attribute_vector));
+ }
+
+ virtual AttributeGuard::UP
+ getAttributeStableEnum(const string &) const {
+ return AttributeGuard::UP(new AttributeEnumGuard(_attribute_vector));
+ }
+
+ virtual void getAttributeList(vector<AttributeGuard> &) const {
+ assert(!"Not implemented");
+ }
+ virtual IAttributeContext::UP createContext() const {
+ assert(!"Not implemented");
+ return IAttributeContext::UP();
+ }
+};
+
+bool Test::search(const string &term, IAttributeManager &attribute_manager) {
+ TEST_STATE(term.c_str());
+ SimpleStringTerm node(term, "field", 0, Weight(0));
+ bool ret = search(node, attribute_manager);
+ return ret;
+}
+
+bool Test::search(const Node &node, IAttributeManager &attribute_manager) {
+ AttributeContext ac(attribute_manager);
+ FakeRequestContext requestContext(&ac);
+ MatchData::UP md(MatchData::makeTestInstance(0, 1, 1));
+ AttributeBlueprintFactory source;
+ Blueprint::UP result = source.createBlueprint(requestContext, FieldSpec(field, 0, 0), node);
+ ASSERT_TRUE(result.get());
+ EXPECT_TRUE(!result->getState().estimate().empty);
+ EXPECT_EQUAL(3u, result->getState().estimate().estHits);
+ result->fetchPostings(true);
+ SearchIterator::UP iterator = result->createSearch(*md, true);
+ ASSERT_TRUE((bool)iterator);
+ iterator->initFullRange();
+ EXPECT_TRUE(!iterator->seek(1));
+ return iterator->seek(2);
+}
+
+template <typename T> struct AttributeVectorTypeFinder {
+ typedef SingleStringExtAttribute Type;
+ static void add(Type & a, const T & v) { a.add(v, weight); }
+};
+template <> struct AttributeVectorTypeFinder<int64_t> {
+ typedef search::SingleValueNumericAttribute<search::IntegerAttributeTemplate<int64_t> > Type;
+ static void add(Type & a, int64_t v) { a.set(a.getNumDocs()-1, v); a.commit(); }
+};
+
+struct FastSearchLongAttribute {
+ typedef search::SingleValueNumericPostingAttribute< search::EnumAttribute<search::IntegerAttributeTemplate<int64_t> > > Type;
+ static void add(Type & a, int64_t v) { a.update(a.getNumDocs()-1, v); a.commit(); }
+};
+
+template <typename AT, typename T>
+MyAttributeManager fill(typename AT::Type * attr, T value) {
+ AttributeVector::DocId docid;
+ attr->addDoc(docid);
+ attr->addDoc(docid);
+ attr->addDoc(docid);
+ assert(2u == docid);
+ AT::add(*attr, value);
+ MyAttributeManager attribute_manager(attr);
+ return attribute_manager;
+}
+
+template <typename T>
+MyAttributeManager makeAttributeManager(T value) {
+ typedef AttributeVectorTypeFinder<T> AT;
+ typedef typename AT::Type AttributeVectorType;
+ AttributeVectorType *attr = new AttributeVectorType(field);
+ return fill<AT, T>(attr, value);
+}
+
+MyAttributeManager makeFastSearchLongAttribute(int64_t value) {
+ typedef FastSearchLongAttribute::Type AttributeVectorType;
+ Config cfg(BasicType::fromType(int64_t()), CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributeVectorType *attr = new AttributeVectorType(field, cfg);
+ return fill<FastSearchLongAttribute, int64_t>(attr, value);
+}
+
+void Test::requireThatIteratorsCanBeCreated() {
+ MyAttributeManager attribute_manager = makeAttributeManager("foo");
+
+ EXPECT_TRUE(search("foo", attribute_manager));
+}
+
+void Test::requireThatRangeTermsWorkToo() {
+ MyAttributeManager attribute_manager = makeAttributeManager(int64_t(42));
+
+ EXPECT_TRUE(search("[23;46]", attribute_manager));
+ EXPECT_TRUE(!search("[10;23]", attribute_manager));
+ EXPECT_TRUE(!search(">43", attribute_manager));
+ EXPECT_TRUE(search("[10;]", attribute_manager));
+}
+
+void Test::requireThatPrefixTermsWork()
+{
+ MyAttributeManager attribute_manager = makeAttributeManager("foo");
+
+ SimplePrefixTerm node("fo", "field", 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager));
+}
+
+void Test::requireThatLocationTermsWork() {
+ // 0xcc is z-curve for (10, 10).
+ MyAttributeManager attribute_manager = makeAttributeManager(int64_t(0xcc));
+
+ SimpleLocationTerm node(Location(Point(10, 10), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(100, 100), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(13, 13), 4, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(10, 13), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager));
+}
+
+void Test::requireThatFastSearchLocationTermsWork() {
+ // 0xcc is z-curve for (10, 10).
+ MyAttributeManager attribute_manager = makeFastSearchLongAttribute(int64_t(0xcc));
+
+ SimpleLocationTerm node(Location(Point(10, 10), 3, 0),
+ field, 0, Weight(0));
+#if 0
+ EXPECT_TRUE(search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(100, 100), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(13, 13), 4, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(10, 13), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager));
+#endif
+}
+
+} // namespace
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/attribute/searchcontext/.gitignore b/searchlib/src/tests/attribute/searchcontext/.gitignore
new file mode 100644
index 00000000000..61dc5e8fc8e
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchcontext/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+searchcontext_test
+searchlib_searchcontext_test_app
diff --git a/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt b/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt
new file mode 100644
index 00000000000..24652373a00
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_searchcontext_test_app
+ SOURCES
+ searchcontext.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_searchcontext_test_app COMMAND sh searchcontext_test.sh)
diff --git a/searchlib/src/tests/attribute/searchcontext/DESC b/searchlib/src/tests/attribute/searchcontext/DESC
new file mode 100644
index 00000000000..8ce9805dbb0
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchcontext/DESC
@@ -0,0 +1 @@
+Unit test for AttributeVector::SearchContext using all attribute vector implementations.
diff --git a/searchlib/src/tests/attribute/searchcontext/FILES b/searchlib/src/tests/attribute/searchcontext/FILES
new file mode 100644
index 00000000000..cebd66e863f
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchcontext/FILES
@@ -0,0 +1 @@
+searchcontext.cpp
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp
new file mode 100644
index 00000000000..6c69e79a93b
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp
@@ -0,0 +1,1900 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributeiterators.h>
+#include <vespa/searchlib/attribute/flagattribute.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/fef/termfieldmatchdataposition.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchlib/queryeval/hitcollector.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/compress.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/searchlib/test/initrange.h>
+#include <iterator>
+#include <set>
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+
+LOG_SETUP("searchcontext_test");
+
+namespace search {
+
+namespace
+{
+
+bool
+isUnsignedSmallIntAttribute(const AttributeVector &a)
+{
+ switch (a.getBasicType())
+ {
+ case attribute::BasicType::UINT1:
+ case attribute::BasicType::UINT2:
+ case attribute::BasicType::UINT4:
+ return true;
+ default:
+ return false;
+ }
+}
+
+}
+
+typedef AttributeVector::SP AttributePtr;
+typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr;
+typedef AttributeVector::SearchContext SearchContext;
+using attribute::Config;
+using attribute::BasicType;
+using attribute::CollectionType;
+typedef AttributeVector::largeint_t largeint_t;
+typedef queryeval::SearchIterator::UP SearchBasePtr;
+typedef std::unique_ptr<ResultSet> ResultSetPtr;
+
+using queryeval::HitCollector;
+using queryeval::SearchIterator;
+using fef::MatchData;
+using fef::TermFieldMatchData;
+using fef::TermFieldMatchDataArray;
+using fef::TermFieldMatchDataPosition;
+
+class DocSet : public std::set<uint32_t>
+{
+public:
+ DocSet() : std::set<uint32_t>() {}
+ DocSet(const uint32_t *b, const uint32_t *e) : std::set<uint32_t>(b, e) {}
+ DocSet & put(const uint32_t &v) {
+ insert(v);
+ return *this;
+ }
+};
+
+template <typename V, typename T>
+class PostingList
+{
+private:
+ V * _vec;
+ T _value;
+ DocSet _hits;
+
+public:
+ PostingList(V & vec, T value) : _vec(&vec), _value(value), _hits() {}
+ const V & getAttribute() const { return *_vec; }
+ V & getAttribute() { return *_vec; }
+ const T & getValue() const { return _value; }
+ DocSet & getHits() { return _hits; }
+ const DocSet & getHits() const { return _hits; }
+ uint32_t getHitCount() const { return _hits.size(); }
+};
+
+class DocRange
+{
+public:
+ uint32_t start;
+ uint32_t end;
+ DocRange(uint32_t start_, uint32_t end_) : start(start_), end(end_) {}
+};
+
+class SearchContextTest : public vespalib::TestApp
+{
+private:
+ typedef std::map<vespalib::string, Config> ConfigMap;
+ // Map of all config objects
+ ConfigMap _integerCfg;
+ ConfigMap _floatCfg;
+ ConfigMap _stringCfg;
+
+
+ // helper functions
+ void
+ addReservedDoc(AttributeVector &ptr);
+
+ void addDocs(AttributeVector & ptr, uint32_t numDocs);
+ template <typename T>
+ void fillVector(std::vector<T> & values, size_t numValues);
+ template <typename V, typename T>
+ void fillAttribute(V & vec, const std::vector<T> & values);
+ template <typename V, typename T>
+ void resetAttribute(V & vec, const T & value);
+ template <typename V, typename T>
+ void fillPostingList(PostingList<V, T> & pl, const DocRange & range);
+ template <typename V, typename T>
+ void fillPostingList(PostingList<V, T> & pl);
+ void buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const vespalib::string & term,
+ QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD);
+ template <typename V, typename T>
+ SearchContextPtr getSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD);
+ ResultSetPtr performSearch(SearchIterator & sb, uint32_t numDocs);
+ template <typename V, typename T>
+ ResultSetPtr performSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD);
+ template <typename V>
+ void performSearch(const V & vec, const vespalib::string & term,
+ const DocSet & expected, QueryTermSimple::SearchTerm termType);
+ void checkResultSet(const ResultSet & rs, const DocSet & exp, bool bitVector);
+
+ template<typename T, typename A>
+ void testInitRange(T key, const vespalib::string & keyAsString, const ConfigMap & cfgs);
+ void testInitRange();
+ // test search functionality
+ template <typename V, typename T>
+ void testFind(const PostingList<V, T> & first);
+
+ template <typename V, typename T>
+ void testSearch(V & attribute, uint32_t numDocs, const std::vector<T> & values);
+ template<typename T, typename A>
+ void testSearch(const ConfigMap & cfgs);
+ template <typename V, typename T>
+ void testMultiValueSearchHelper(V & vec, const std::vector<T> & values);
+ template <typename V, typename T>
+ void testMultiValueSearch(V & first, V & second, const std::vector<T> & values);
+ void testSearch();
+
+ class IteratorTester {
+ public:
+ virtual bool matches(const SearchIterator & base) const = 0;
+ virtual ~IteratorTester() { }
+ };
+ class AttributeIteratorTester : public IteratorTester
+ {
+ public:
+ virtual bool matches(const SearchIterator & base) const {
+ return dynamic_cast<const AttributeIterator *>(&base) != NULL;
+ }
+ };
+ class FlagAttributeIteratorTester : public IteratorTester
+ {
+ public:
+ virtual bool matches(const SearchIterator & base) const {
+ return (dynamic_cast<const FlagAttributeIterator *>(&base) != NULL) ||
+ (dynamic_cast<const BitVectorIterator *>(&base) != NULL) ||
+ (dynamic_cast<const queryeval::EmptySearch *>(&base) != NULL);
+ }
+ };
+ class AttributePostingListIteratorTester : public IteratorTester
+ {
+ public:
+ virtual bool matches(const SearchIterator & base) const {
+ return dynamic_cast<const AttributePostingListIterator *>(&base) != NULL ||
+ dynamic_cast<const queryeval::EmptySearch *>(&base) != NULL;
+
+ }
+ };
+
+
+ // test search iterator functionality
+ void testStrictSearchIterator(SearchContext & threeHits,
+ SearchContext & noHits,
+ const IteratorTester & typeTester);
+ void testNonStrictSearchIterator(SearchContext & threeHits,
+ SearchContext & noHits,
+ const IteratorTester & typeTester);
+ void fillForSearchIteratorTest(IntegerAttribute * ia);
+ void fillForSemiNibbleSearchIteratorTest(IntegerAttribute * ia);
+ void testSearchIterator();
+
+
+ // test search iterator unpacking
+ void fillForSearchIteratorUnpackingTest(IntegerAttribute * ia, bool extra);
+ void testSearchIteratorUnpacking(const AttributePtr & ptr,
+ SearchContext & sc,
+ bool extra,
+ bool strict);
+ void testSearchIteratorUnpacking();
+
+
+ // test range search
+ template <typename VectorType>
+ void performRangeSearch(const VectorType & vec, const vespalib::string & term,
+ const DocSet & expected);
+ template <typename VectorType, typename ValueType>
+ void testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector<ValueType> values);
+ void testRangeSearch();
+ void testRangeSearchLimited();
+
+
+ // test case insensitive search
+ void performCaseInsensitiveSearch(const StringAttribute & vec, const vespalib::string & term,
+ const DocSet & expected);
+ void testCaseInsensitiveSearch(const AttributePtr & ptr);
+ void testCaseInsensitiveSearch();
+ void testRegexSearch(const AttributePtr & ptr);
+ void testRegexSearch();
+
+
+ // test prefix search
+ void performPrefixSearch(const StringAttribute & vec, const vespalib::string & term,
+ const DocSet & expected, QueryTermSimple::SearchTerm termType);
+ void testPrefixSearch(const AttributePtr & ptr);
+ void testPrefixSearch();
+
+ // test that search is working after clear doc
+ template <typename VectorType, typename ValueType>
+ void requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, const Config & cfg,
+ ValueType startValue, const vespalib::string & term);
+ void requireThatSearchIsWorkingAfterClearDoc();
+
+ // test that search is working after load and clear doc
+ template <typename VectorType, typename ValueType>
+ void requireThatSearchIsWorkingAfterLoadAndClearDoc(const vespalib::string & name, const Config & cfg,
+ ValueType startValue, ValueType defaultValue,
+ const vespalib::string & term);
+ void requireThatSearchIsWorkingAfterLoadAndClearDoc();
+
+ template <typename VectorType, typename ValueType>
+ void requireThatSearchIsWorkingAfterUpdates(const vespalib::string & name,
+ const Config & cfg,
+ ValueType value1,
+ ValueType value2);
+ void requireThatSearchIsWorkingAfterUpdates();
+
+ void requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded();
+
+ template <typename VectorType, typename ValueType>
+ void requireThatInvalidSearchTermGivesZeroHits(const vespalib::string & name,
+ const Config & cfg,
+ ValueType value);
+ void requireThatInvalidSearchTermGivesZeroHits();
+
+ void requireThatFlagAttributeHandlesTheByteRange();
+
+ void requireThatOutOfBoundsSearchTermGivesZeroHits(const vespalib::string &name,
+ const Config &cfg,
+ int64_t maxValue);
+ void requireThatOutOfBoundsSearchTermGivesZeroHits();
+
+ // init maps with config objects
+ void initIntegerConfig();
+ void initFloatConfig();
+ void initStringConfig();
+
+public:
+ SearchContextTest();
+ int Main();
+};
+
+
+void
+SearchContextTest::addReservedDoc(AttributeVector &ptr)
+{
+ ptr.addReservedDoc();
+}
+
+
+void
+SearchContextTest::addDocs(AttributeVector & ptr, uint32_t numDocs)
+{
+ uint32_t docId;
+ addReservedDoc(ptr);
+ for (uint32_t i = 1; i <= numDocs; ++i) {
+ ptr.addDoc(docId);
+ EXPECT_EQUAL(docId, i);
+ }
+ ASSERT_TRUE(ptr.getNumDocs() == numDocs + 1);
+}
+
+template <typename T>
+void
+SearchContextTest::fillVector(std::vector<T> & values, size_t numValues)
+{
+ values.clear();
+ values.reserve(numValues);
+ for (size_t i = 1; i <= numValues; ++i) {
+ values.push_back(static_cast<T>(i));
+ }
+}
+
+template <>
+void
+SearchContextTest::fillVector(std::vector<vespalib::string> & values, size_t numValues)
+{
+ values.clear();
+ values.reserve(numValues);
+ for (size_t i = 0; i < numValues; ++i) {
+ vespalib::asciistream ss;
+ ss << "string" << (i < 10 ? "0" : "") << i;
+ values.push_back(ss.str());
+ }
+}
+
+template <typename V, typename T>
+void
+SearchContextTest::fillAttribute(V & vec, const std::vector<T> & values)
+{
+ for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) {
+ ASSERT_TRUE(doc < vec.getNumDocs());
+ vec.clearDoc(doc);
+ uint32_t valueCount = doc % (values.size() + 1);
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ // std::cout << "append(" << doc << ", " << values[i] << ")" << std::endl;
+ EXPECT_TRUE(vec.append(doc, values[i], 1));
+ }
+ }
+ vec.commit(true);
+}
+
+template <typename V, typename T>
+void
+SearchContextTest::resetAttribute(V & vec, const T & value)
+{
+ for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) {
+ ASSERT_TRUE(doc < vec.getNumDocs());
+ EXPECT_TRUE(vec.update(doc, value));
+ }
+ vec.commit(true);
+}
+
+template <typename V, typename T>
+void
+SearchContextTest::fillPostingList(PostingList<V, T> & pl, const DocRange & range)
+{
+ pl.getHits().clear();
+ for (uint32_t doc = range.start; doc < range.end; ++doc) {
+ ASSERT_TRUE(doc < pl.getAttribute().getNumDocs());
+ EXPECT_TRUE(pl.getAttribute().update(doc, pl.getValue()));
+ pl.getHits().insert(doc);
+ }
+ pl.getAttribute().commit(true);
+}
+
+template <typename V, typename T>
+void
+SearchContextTest::fillPostingList(PostingList<V, T> & pl)
+{
+ AttributeVector & vec = dynamic_cast<AttributeVector &>(pl.getAttribute());
+ pl.getHits().clear();
+ uint32_t sz = vec.getMaxValueCount();
+ T * buf = new T[sz];
+ for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) {
+ uint32_t valueCount = vec.get(doc, buf, sz);
+ EXPECT_TRUE(valueCount <= sz);
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ if (buf[i] == pl.getValue()) {
+ //std::cout << "hit for doc(" << doc << "): buf[" << i << "] (=" << buf[i] << ") == " << pl.getValue() << std::endl;
+ pl.getHits().insert(doc);
+ break;
+ }
+ }
+ }
+ delete [] buf;
+}
+
+void
+SearchContextTest::buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const vespalib::string & term, QueryTermSimple::SearchTerm termType)
+{
+ uint32_t indexLen = index.size();
+ uint32_t termLen = term.size();
+ uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen;
+ uint32_t p = 0;
+ buffer.resize(queryPacketSize);
+ switch (termType) {
+ case QueryTermSimple::PREFIXTERM: buffer[p++] = ParseItem::ITEM_PREFIXTERM; break;
+ case QueryTermSimple::REGEXP: buffer[p++] = ParseItem::ITEM_REGEXP; break;
+ default:
+ buffer[p++] = ParseItem::ITEM_TERM;
+ break;
+ }
+ p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]);
+ memcpy(&buffer[p], index.c_str(), indexLen);
+ p += indexLen;
+ p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]);
+ memcpy(&buffer[p], term.c_str(), termLen);
+ p += termLen;
+ buffer.resize(p);
+}
+
+template <typename V, typename T>
+SearchContextPtr
+SearchContextTest::getSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType)
+{
+ std::vector<char> query;
+ vespalib::asciistream ss;
+ ss << term;
+ buildTermQuery(query, vec.getName(), ss.str(), termType);
+
+ return (dynamic_cast<const AttributeVector &>(vec)).
+ getSearch(vespalib::stringref(&query[0], query.size()),
+ AttributeVector::SearchContext::Params());
+}
+
+ResultSetPtr
+SearchContextTest::performSearch(SearchIterator & sb, uint32_t numDocs)
+{
+ HitCollector hc(numDocs, numDocs, 0);
+ sb.initFullRange();
+ // assume strict toplevel search object located at start
+ for (sb.seek(1u); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) {
+ hc.addHit(sb.getDocId(), 0.0);
+ }
+ return hc.getResultSet();
+}
+
+template <typename V, typename T>
+ResultSetPtr
+SearchContextTest::performSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType)
+{
+ TermFieldMatchData dummy;
+ SearchContextPtr sc = getSearch(vec, term, termType);
+ sc->fetchPostings(true);
+ SearchBasePtr sb = sc->createIterator(&dummy, true);
+ ResultSetPtr rs = performSearch(*sb, vec.getNumDocs());
+ return rs;
+}
+
+template <typename V>
+void
+SearchContextTest::performSearch(const V & vec, const vespalib::string & term,
+ const DocSet & expected, QueryTermSimple::SearchTerm termType)
+{
+#if 0
+ std::cout << "performSearch[" << term << "]: {";
+ std::copy(expected.begin(), expected.end(), std::ostream_iterator<uint32_t>(std::cout, ", "));
+ std::cout << "}, prefix(" << (prefix ? "true" : "false") << ")" << std::endl;
+#endif
+ { // strict search iterator
+ ResultSetPtr rs = performSearch(vec, term, termType);
+ checkResultSet(*rs, expected, false);
+ }
+}
+
+void
+SearchContextTest::checkResultSet(const ResultSet & rs, const DocSet & expected, bool bitVector)
+{
+ EXPECT_EQUAL(rs.getNumHits(), expected.size());
+ if (bitVector) {
+ const BitVector * vec = rs.getBitOverflow();
+ if (expected.size() != 0) {
+ ASSERT_TRUE(vec != NULL);
+ for (const auto & expect : expected) {
+ EXPECT_TRUE(vec->testBit(expect));
+ }
+ }
+ } else {
+ const RankedHit * array = rs.getArray();
+ if (expected.size() != 0) {
+ ASSERT_TRUE(array != NULL);
+ uint32_t i = 0;
+ for (DocSet::const_iterator iter = expected.begin();
+ iter != expected.end(); ++iter, ++i)
+ {
+ EXPECT_TRUE(array[i]._docId == *iter);
+ }
+ }
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// Test search functionality
+//-----------------------------------------------------------------------------
+template <typename V, typename T>
+void
+SearchContextTest::testFind(const PostingList<V, T> & pl)
+{
+ { // strict search iterator
+ SearchContextPtr sc = getSearch(pl.getAttribute(), pl.getValue());
+ sc->fetchPostings(true);
+ TermFieldMatchData dummy;
+ SearchBasePtr sb = sc->createIterator(&dummy, true);
+ ResultSetPtr rs = performSearch(*sb, pl.getAttribute().getNumDocs());
+ checkResultSet(*rs, pl.getHits(), false);
+ }
+}
+
+template <typename V, typename T>
+void
+SearchContextTest::testSearch(V & attribute, uint32_t numDocs, const std::vector<T> & values)
+{
+ LOG(info, "testSearch: vector '%s' with %u documents and %lu unique values",
+ attribute.getName().c_str(), numDocs, static_cast<unsigned long>(values.size()));
+
+ // fill attribute vectors
+ addDocs(attribute, numDocs);
+
+ std::vector<PostingList<V, T> > lists;
+
+ // fill posting lists
+ ASSERT_TRUE((attribute.getNumDocs() - 1) % values.size() == 0);
+ uint32_t hitCount = attribute.getNumDocs() / values.size();
+ for (uint32_t i = 0; i < values.size(); ++i) {
+ // for each value a range with hitCount documents will hit on that value
+ lists.push_back(PostingList<V, T>(attribute, values[i]));
+ fillPostingList(lists.back(), DocRange(i * hitCount + 1, (i + 1) * hitCount + 1));
+ }
+
+ // test find()
+ for (const auto & list : lists) {
+ testFind(list);
+ }
+}
+
+template <typename V, typename T>
+void
+SearchContextTest::testMultiValueSearchHelper(V & vec, const std::vector<T> & values)
+{
+ std::vector<PostingList<V, T> > lists;
+
+ // fill posting lists based on attribute content
+ for (const T & value : values) {
+ lists.push_back(PostingList<V, T>(vec, value));
+ fillPostingList(lists.back());
+ }
+
+ // test find()
+ for (const auto & list : lists) {
+ //std::cout << "testFind(lists[" << i << "]): value = " << lists[i].getValue()
+ // << ", hit count = " << lists[i].getHitCount() << std::endl;
+ testFind(list);
+ }
+}
+
+template <typename V, typename T>
+void
+SearchContextTest::testMultiValueSearch(V & first, V & second, const std::vector<T> & values)
+{
+ addDocs(first, second.getNumDocs());
+ LOG(info, "testMultiValueSearch: vector '%s' with %u documents and %lu unique values",
+ first.getName().c_str(), first.getNumDocs(), static_cast<unsigned long>(values.size()));
+
+ fillAttribute(first, values);
+
+ testMultiValueSearchHelper(first, values);
+
+ ASSERT_TRUE(first.saveAs(second.getBaseFileName()));
+ ASSERT_TRUE(second.load());
+
+ testMultiValueSearchHelper(second, values);
+
+ size_t sz = values.size();
+ ASSERT_TRUE(sz > 2);
+ std::vector<T> subset;
+ // values[sz - 2] is not used -> 0 hits
+ // values[sz - 1] is used once -> 1 hit
+ for (size_t i = 0; i < sz - 2; ++i) {
+ subset.push_back(values[i]);
+ }
+
+ fillAttribute(first, subset);
+
+ ASSERT_TRUE(1u < first.getNumDocs());
+ EXPECT_TRUE(first.append(1u, values[sz - 1], 1));
+ first.commit(true);
+
+ testMultiValueSearchHelper(first, values);
+
+ ASSERT_TRUE(first.saveAs(second.getBaseFileName()));
+ ASSERT_TRUE(second.load());
+
+ testMultiValueSearchHelper(second, values);
+}
+
+template<typename T, typename A>
+void SearchContextTest::testSearch(const ConfigMap & cfgs) {
+ uint32_t numDocs = 100;
+ uint32_t numUniques = 20;
+ std::vector<T> values;
+ fillVector(values, numUniques);
+ for (const auto & cfg : cfgs) {
+ AttributePtr second = AttributeFactory::createAttribute(cfg.first + "-2", cfg.second);
+ testSearch(*(dynamic_cast<A *>(second.get())), numDocs, values);
+ if (second->hasMultiValue()) {
+ AttributePtr first = AttributeFactory::createAttribute(cfg.first + "-1", cfg.second);
+ testMultiValueSearch(*(dynamic_cast<A *>(first.get())),
+ *(dynamic_cast<A *>(second.get())), values);
+ }
+ }
+}
+
+using search::test::InitRangeVerifier;
+
+template<typename T, typename A>
+void SearchContextTest::testInitRange(T key, const vespalib::string & keyAsString, const ConfigMap & cfgs) {
+ InitRangeVerifier ir;
+ for (const auto & cfg : cfgs) {
+ AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-initrange", cfg.second);
+ addDocs(*attribute, ir.getDocIdLimit());
+ for (uint32_t doc : ir.getExpectedDocIds()) {
+ EXPECT_TRUE(nullptr != dynamic_cast<A *>(attribute.get()));
+ EXPECT_TRUE(dynamic_cast<A *>(attribute.get())->update(doc, key));
+ }
+ attribute->commit(true);
+ SearchContextPtr sc = getSearch(*attribute, keyAsString);
+ ASSERT_TRUE(sc->valid());
+ sc->fetchPostings(true);
+ TermFieldMatchData dummy;
+ SearchBasePtr sb = sc->createIterator(&dummy, true);
+ ir.verify(*sb);
+ }
+}
+
+void SearchContextTest::testInitRange() {
+ testInitRange<AttributeVector::largeint_t, IntegerAttribute>(42, "42", _integerCfg);
+ testInitRange<double, FloatingPointAttribute>(42.42, "42.42", _floatCfg);
+ testInitRange<vespalib::string, StringAttribute>("any-key", "any-key", _stringCfg);
+}
+
+void
+SearchContextTest::testSearch()
+{
+ const uint32_t numDocs = 100;
+ const uint32_t numUniques = 20;
+
+ { // IntegerAttribute
+ for (const auto & cfg : _integerCfg) {
+ AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-3", cfg.second);
+ SearchContextPtr sc = getSearch(*attribute, "100");
+ ASSERT_TRUE(sc->valid());
+ sc = getSearch(*attribute, "1A0");
+ EXPECT_FALSE( sc->valid() );
+ }
+
+
+ { // CollectionType::ARRAY Flags.
+ std::vector<AttributeVector::largeint_t> values;
+ fillVector(values, numUniques);
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr second = AttributeFactory::createAttribute("flags-2", cfg);
+ testSearch(*(dynamic_cast<IntegerAttribute *>(second.get())), numDocs, values);
+ AttributePtr first = AttributeFactory::createAttribute("flags-1", cfg);
+ testMultiValueSearch(*(dynamic_cast<IntegerAttribute *>(first.get())),
+ *(dynamic_cast<IntegerAttribute *>(second.get())), values);
+ }
+ }
+
+ { // FloatingPointAttribute
+ for (const auto & cfg : _floatCfg) {
+ AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-3", cfg.second);
+ SearchContextPtr sc = getSearch(*attribute, "100");
+ ASSERT_TRUE(sc->valid());
+ sc = getSearch(*attribute, "7.3");
+ ASSERT_TRUE( sc->valid() );
+ sc = getSearch(*attribute, "1A0");
+ EXPECT_FALSE( sc->valid() );
+ }
+ }
+
+ testSearch<AttributeVector::largeint_t, IntegerAttribute>(_integerCfg);
+ testSearch<double, FloatingPointAttribute>(_floatCfg);
+ testSearch<vespalib::string, StringAttribute>(_stringCfg);
+}
+
+//-----------------------------------------------------------------------------
+// Test search iterator functionality
+//-----------------------------------------------------------------------------
+void
+SearchContextTest::testStrictSearchIterator(SearchContext & threeHits,
+ SearchContext & noHits,
+ const IteratorTester & typeTester)
+{
+ TermFieldMatchData dummy;
+ { // search for value with 3 hits
+ threeHits.fetchPostings(true);
+ SearchBasePtr sb = threeHits.createIterator(&dummy, true);
+ sb->initFullRange();
+ EXPECT_TRUE(typeTester.matches(*sb));
+ EXPECT_TRUE(sb->getDocId() == sb->beginId() ||
+ sb->getDocId() == 1u);
+ EXPECT_TRUE(sb->seek(1));
+ EXPECT_EQUAL(sb->getDocId(), 1u);
+ EXPECT_TRUE(!sb->seek(2));
+ EXPECT_EQUAL(sb->getDocId(), 3u);
+ EXPECT_TRUE(sb->seek(3));
+ EXPECT_EQUAL(sb->getDocId(), 3u);
+ EXPECT_TRUE(!sb->seek(4));
+ EXPECT_EQUAL(sb->getDocId(), 5u);
+ EXPECT_TRUE(sb->seek(5));
+ EXPECT_EQUAL(sb->getDocId(), 5u);
+ EXPECT_TRUE(!sb->seek(6));
+ EXPECT_TRUE(sb->isAtEnd());
+ }
+
+ { // search for value with no hits
+ noHits.fetchPostings(true);
+ SearchBasePtr sb = noHits.createIterator(&dummy, true);
+ sb->initFullRange();
+ ASSERT_TRUE(typeTester.matches(*sb));
+ EXPECT_TRUE(sb->getDocId() == sb->beginId() ||
+ sb->isAtEnd());
+ EXPECT_TRUE(!sb->seek(1));
+ EXPECT_TRUE(sb->isAtEnd());
+ }
+}
+
+void
+SearchContextTest::testNonStrictSearchIterator(SearchContext & threeHits,
+ SearchContext & noHits,
+ const IteratorTester & typeTester)
+{
+ TermFieldMatchData dummy;
+ { // search for value with three hits
+ threeHits.fetchPostings(false);
+ SearchBasePtr sb = threeHits.createIterator(&dummy, false);
+ sb->initFullRange();
+ EXPECT_TRUE(typeTester.matches(*sb));
+ EXPECT_TRUE(sb->seek(1));
+ EXPECT_EQUAL(sb->getDocId(), 1u);
+ EXPECT_TRUE(!sb->seek(2));
+ EXPECT_EQUAL(sb->getDocId(), 1u);
+ EXPECT_TRUE(sb->seek(3));
+ EXPECT_EQUAL(sb->getDocId(), 3u);
+ EXPECT_TRUE(!sb->seek(4));
+ EXPECT_EQUAL(sb->getDocId(), 3u);
+ EXPECT_TRUE(sb->seek(5));
+ EXPECT_EQUAL(sb->getDocId(), 5u);
+ EXPECT_TRUE(!sb->seek(6));
+ EXPECT_TRUE(sb->getDocId() == 5u || sb->isAtEnd());
+ }
+ { // search for value with no hits
+ noHits.fetchPostings(false);
+ SearchBasePtr sb = noHits.createIterator(&dummy, false);
+ sb->initFullRange();
+
+ EXPECT_TRUE(typeTester.matches(*sb));
+ EXPECT_TRUE(sb->getDocId() == sb->beginId() ||
+ sb->isAtEnd());
+ EXPECT_TRUE(!sb->seek(1));
+ EXPECT_NOT_EQUAL(sb->getDocId(), 1u);
+ EXPECT_TRUE(!sb->seek(6));
+ EXPECT_NOT_EQUAL(sb->getDocId(), 6u);
+ }
+}
+
+void
+SearchContextTest::fillForSearchIteratorTest(IntegerAttribute * ia)
+{
+ addReservedDoc(*ia);
+ ia->addDocs(5);
+ ia->update(1, 10);
+ ia->update(2, 20);
+ ia->update(3, 10);
+ ia->update(4, 20);
+ ia->update(5, 10);
+ ia->commit(true);
+}
+
+void
+SearchContextTest::fillForSemiNibbleSearchIteratorTest(IntegerAttribute * ia)
+{
+ addReservedDoc(*ia);
+ ia->addDocs(5);
+ ia->update(1, 1);
+ ia->update(2, 2);
+ ia->update(3, 1);
+ ia->update(4, 2);
+ ia->update(5, 1);
+ ia->commit(true);
+}
+
+void
+SearchContextTest::testSearchIterator()
+{
+ {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ AttributePtr ptr = AttributeFactory::createAttribute("s-int32", cfg);
+ fillForSearchIteratorTest(dynamic_cast<IntegerAttribute *>(ptr.get()));
+
+ SearchContextPtr threeHits = getSearch(*ptr.get(), 10);
+ SearchContextPtr noHits = getSearch(*ptr.get(), 30);
+ AttributeIteratorTester tester;
+ testStrictSearchIterator(*threeHits, *noHits, tester);
+ threeHits = getSearch(*ptr.get(), 10);
+ noHits = getSearch(*ptr.get(), 30);
+ testNonStrictSearchIterator(*threeHits, *noHits, tester);
+ }
+ {
+ Config cfg(BasicType::UINT2, CollectionType::SINGLE);
+ AttributePtr ptr = AttributeFactory::createAttribute("s-uint2", cfg);
+ fillForSemiNibbleSearchIteratorTest(dynamic_cast<IntegerAttribute *>
+ (ptr.get()));
+
+ SearchContextPtr threeHits = getSearch(*ptr.get(), 1);
+ SearchContextPtr noHits = getSearch(*ptr.get(), 3);
+ AttributeIteratorTester tester;
+ testStrictSearchIterator(*threeHits, *noHits, tester);
+ threeHits = getSearch(*ptr.get(), 1);
+ noHits = getSearch(*ptr.get(), 3);
+ testNonStrictSearchIterator(*threeHits, *noHits, tester);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sfs-int32", cfg);
+ fillForSearchIteratorTest(dynamic_cast<IntegerAttribute *>(ptr.get()));
+
+ SearchContextPtr threeHits = getSearch(*ptr.get(), 10);
+ SearchContextPtr noHits = getSearch(*ptr.get(), 30);
+ AttributePostingListIteratorTester tester;
+ testStrictSearchIterator(*threeHits, *noHits, tester);
+ }
+ {
+ Config cfg(BasicType::STRING, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sfs-string", cfg);
+ StringAttribute * sa = dynamic_cast<StringAttribute *>(ptr.get());
+ addReservedDoc(*ptr);
+ ptr->addDocs(5);
+ sa->update(1, "three");
+ sa->update(2, "two");
+ sa->update(3, "three");
+ sa->update(4, "two");
+ sa->update(5, "three");
+ ptr->commit(true);
+
+ SearchContextPtr threeHits = getSearch(*ptr.get(), "three");
+ SearchContextPtr noHits = getSearch(*ptr.get(), "none");
+ AttributePostingListIteratorTester tester;
+ testStrictSearchIterator(*threeHits, *noHits, tester);
+ }
+ {
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg);
+ fillForSearchIteratorTest(dynamic_cast<IntegerAttribute *>(ptr.get()));
+
+ SearchContextPtr threeHits = getSearch(*ptr.get(), 10);
+ SearchContextPtr noHits = getSearch(*ptr.get(), 30);
+ FlagAttributeIteratorTester tester;
+ testStrictSearchIterator(*threeHits, *noHits, tester);
+ threeHits = getSearch(*ptr.get(), 10);
+ noHits = getSearch(*ptr.get(), 30);
+ testNonStrictSearchIterator(*threeHits, *noHits, tester);
+ }
+}
+
+
+
+//-----------------------------------------------------------------------------
+// Test search iterator unpacking
+//-----------------------------------------------------------------------------
+void
+SearchContextTest::fillForSearchIteratorUnpackingTest(IntegerAttribute * ia,
+ bool extra)
+{
+ addReservedDoc(*ia);
+ ia->addDocs(3);
+ if (ia->getCollectionType() == CollectionType::SINGLE) {
+ ia->update(1, 10);
+ ia->update(2, 10);
+ ia->update(3, 10);
+ } else if (ia->getCollectionType() == CollectionType::ARRAY) {
+ ia->append(1, 10, 1);
+ ia->append(2, 10, 1);
+ ia->append(2, 10, 1);
+ ia->append(3, 10, 1);
+ ia->append(3, 10, 1);
+ ia->append(3, 10, 1);
+ } else { // WEIGHTED SET
+ ia->append(1, 10, -50);
+ ia->append(2, 10, 0);
+ ia->append(3, 10, 50);
+ }
+ ia->commit(true);
+ if (!extra)
+ return;
+ ia->addDocs(20);
+ for (uint32_t d = 4; d < 24; ++d) {
+ if (ia->getCollectionType() == CollectionType::SINGLE)
+ ia->update(d, 10);
+ else
+ ia->append(d, 10, 1);
+ }
+ ia->commit(true);
+}
+
+void
+SearchContextTest::testSearchIteratorUnpacking(const AttributePtr & attr,
+ SearchContext & sc,
+ bool extra,
+ bool strict)
+{
+ LOG(info,
+ "testSearchIteratorUnpacking: vector '%s'", attr->getName().c_str());
+
+ TermFieldMatchData md;
+ md.reset(100);
+
+ TermFieldMatchDataPosition pos;
+ pos.setElementWeight(100);
+ md.appendPosition(pos);
+
+ sc.fetchPostings(strict);
+ SearchBasePtr sb = sc.createIterator(&md, strict);
+ sb->initFullRange();
+
+ std::vector<int32_t> weights(3);
+ if (attr->getCollectionType() == CollectionType::SINGLE ||
+ (attr->getCollectionType() == CollectionType::ARRAY && attr->getBasicType() == BasicType::INT8))
+ {
+ weights[0] = 1;
+ weights[1] = 1;
+ weights[2] = 1;
+ } else if (attr->getCollectionType() == CollectionType::ARRAY) {
+ weights[0] = 1;
+ weights[1] = 2;
+ weights[2] = 3;
+ } else {
+ weights[0] = -50;
+ weights[1] = 0;
+ weights[2] = 50;
+ }
+
+ // unpack and check weights
+ sb->unpack(1);
+ EXPECT_EQUAL(sb->getDocId(), 1u);
+ EXPECT_EQUAL(md.getDocId(), 1u);
+ EXPECT_EQUAL(md.getWeight(), weights[0]);
+
+ sb->unpack(2);
+ EXPECT_EQUAL(sb->getDocId(), 2u);
+ EXPECT_EQUAL(md.getDocId(), 2u);
+ EXPECT_EQUAL(md.getWeight(), weights[1]);
+
+ sb->unpack(3);
+ EXPECT_EQUAL(sb->getDocId(), 3u);
+ EXPECT_EQUAL(md.getDocId(), 3u);
+ EXPECT_EQUAL(md.getWeight(), weights[2]);
+ if (extra) {
+ sb->unpack(4);
+ EXPECT_EQUAL(sb->getDocId(), 4u);
+ EXPECT_EQUAL(md.getDocId(), 4u);
+ EXPECT_EQUAL(md.getWeight(), 1);
+ }
+}
+
+void
+SearchContextTest::testSearchIteratorUnpacking()
+{
+ std::vector<std::pair<vespalib::string, Config> > config;
+
+ {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ config.emplace_back("s-int32", cfg);
+ }
+ {
+ Config cfg(BasicType::UINT4, CollectionType::SINGLE);
+ config.emplace_back("s-uint4", cfg);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::ARRAY);
+ config.emplace_back("a-int32", cfg);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::WSET);
+ config.emplace_back("w-int32", cfg);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ config.emplace_back("sfs-int32", cfg);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ config.emplace_back("afs-int32", cfg);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ config.emplace_back("wfs-int32", cfg);
+ }
+ {
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ config.emplace_back("flags", cfg);
+ }
+
+ for (const auto & cfg : config) {
+ AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second);
+ fillForSearchIteratorUnpackingTest(dynamic_cast<IntegerAttribute *>(ptr.get()), false);
+ SearchContextPtr sc = getSearch(*ptr.get(), 10);
+ testSearchIteratorUnpacking(ptr, *sc, false, true);
+ sc = getSearch(*ptr.get(), 10);
+ testSearchIteratorUnpacking(ptr, *sc, false, false);
+ if (cfg.second.fastSearch()) {
+ AttributePtr ptr2 = AttributeFactory::createAttribute(cfg.first + "-extra", cfg.second);
+ fillForSearchIteratorUnpackingTest(dynamic_cast<IntegerAttribute *>(ptr2.get()), true);
+ SearchContextPtr sc2 = getSearch(*ptr2.get(), 10);
+ testSearchIteratorUnpacking(ptr2, *sc2, true, true);
+ sc2 = getSearch(*ptr2.get(), 10);
+ testSearchIteratorUnpacking(ptr2, *sc2, true, false);
+ }
+ }
+}
+
+
+
+//-----------------------------------------------------------------------------
+// Test range search
+//-----------------------------------------------------------------------------
+
+template <typename VectorType>
+void
+SearchContextTest::performRangeSearch(const VectorType & vec, const vespalib::string & term,
+ const DocSet & expected)
+{
+ performSearch(vec, term, expected, QueryTermSimple::WORD);
+}
+
+template <typename VectorType, typename ValueType>
+void
+SearchContextTest::testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector<ValueType> values)
+{
+ LOG(info, "testRangeSearch: vector '%s'", ptr->getName().c_str());
+
+ VectorType & vec = dynamic_cast<VectorType &>(*ptr.get());
+
+ addDocs(vec, numDocs);
+
+ std::map<ValueType, DocSet> postingList;
+
+ uint32_t docCnt = 0;
+ for (uint32_t i = 0; i < values.size() && docCnt < numDocs; i+=2) {
+ //std::cout << "postingList[" << values[i] << "]: {";
+ for (uint32_t j = 0; j < (i + 1) && docCnt < numDocs; ++j, ++docCnt) {
+ EXPECT_TRUE(vec.update(docCnt + 1u, values[i]));
+ postingList[values[i]].insert(docCnt + 1u);
+ //std::cout << docCnt << ", ";
+ }
+ //std::cout << "}" << std::endl;
+ }
+ ptr->commit(true);
+ uint32_t smallHits = 0;
+ ValueType zeroValue = 0;
+ bool smallUInt = isUnsignedSmallIntAttribute(vec);
+ if (smallUInt) {
+ for (uint32_t i = docCnt ; i < numDocs; ++i) {
+ postingList[zeroValue].insert(i + 1u);
+ ++smallHits;
+ }
+ }
+
+ // test less than ("<a")
+ for (uint32_t i = 0; i < values.size(); ++i) {
+ vespalib::asciistream ss;
+ ss << "<" << values[i];
+ DocSet expected;
+ if (smallUInt) {
+ expected.insert(postingList[zeroValue].begin(),
+ postingList[zeroValue].end());
+ }
+ for (uint32_t j = 0; j < i; ++j) {
+ expected.insert(postingList[values[j]].begin(), postingList[values[j]].end());
+ }
+ performRangeSearch(vec, ss.str(), expected);
+ }
+
+ // test greater than (">a")
+ for (uint32_t i = 0; i < values.size(); ++i) {
+ vespalib::asciistream ss;
+ ss << ">" << values[i];
+ DocSet expected;
+ for (uint32_t j = i + 1; j < values.size(); ++j) {
+ expected.insert(postingList[values[j]].begin(), postingList[values[j]].end());
+ }
+ performRangeSearch(vec, ss.str(), expected);
+ }
+
+ // test range ("[a;b]")
+ for (uint32_t i = 0; i < values.size(); ++i) {
+ for (uint32_t j = 0; j < values.size(); ++j) { // illegal range when j < i
+ vespalib::asciistream ss;
+ ss << "[" << values[i] << ";" << values[j] << "]";
+ DocSet expected;
+ for (uint32_t k = i; k < j + 1; ++k) {
+ expected.insert(postingList[values[k]].begin(), postingList[values[k]].end());
+ }
+ performRangeSearch(vec, ss.str(), expected);
+ }
+ }
+
+ { // test large range
+ vespalib::asciistream ss;
+ ss << "[" << (values.front() - 1) << ";" << (values.back() + 1) << "]";
+ DocSet expected;
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ expected.insert(doc + 1);
+ }
+ performRangeSearch(vec, ss.str(), expected);
+ }
+}
+
+void
+SearchContextTest::testRangeSearchLimited()
+{
+ largeint_t VALUES [] = {0,1,1,2,3,4,5,6,7,8,9,9,10 };
+ std::vector<largeint_t> values(VALUES, VALUES+sizeof(VALUES)/sizeof(VALUES[0]));
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("limited-int32", cfg);
+ IntegerAttribute & vec = dynamic_cast<IntegerAttribute &>(*ptr);
+ addDocs(vec, values.size());
+ for (size_t i(1); i < values.size(); i++) {
+ EXPECT_TRUE(vec.update(i, values[i]));
+ }
+ ptr->commit(true);
+
+ DocSet expected;
+ for (size_t i(1); i < 12; i++) {
+ expected.put(i);
+ }
+ performRangeSearch(vec, "[1;9]", expected);
+ performRangeSearch(vec, "[1;9;100]", expected);
+ performRangeSearch(vec, "[1;9;-100]", expected);
+ expected.clear();
+ expected.put(3);
+ performRangeSearch(vec, "<1;3>", expected);
+ expected.put(4);
+ performRangeSearch(vec, "<1;3]", expected);
+ expected.clear();
+ expected.put(1).put(2).put(3);
+ performRangeSearch(vec, "[1;3>", expected);
+ expected.put(4);
+ performRangeSearch(vec, "[1;3]", expected);
+ expected.clear();
+ expected.put(1).put(2);
+ performRangeSearch(vec, "[1;9;1]", expected);
+ performRangeSearch(vec, "[1;9;2]", expected);
+ expected.put(3);
+ performRangeSearch(vec, "[1;9;3]", expected);
+ expected.clear();
+ expected.put(10).put(11);
+ performRangeSearch(vec, "[1;9;-1]", expected);
+ performRangeSearch(vec, "[1;9;-2]", expected);
+ expected.put(9);
+ performRangeSearch(vec, "[1;9;-3]", expected);
+ performRangeSearch(vec, "[1;9;-3]", expected);
+
+ expected.clear();
+ for (size_t i(1); i < 13; i++) {
+ expected.put(i);
+ }
+ performRangeSearch(vec, "[;;100]", expected);
+ performRangeSearch(vec, "[;;-100]", expected);
+
+ expected.clear();
+ expected.put(1).put(2);
+ performRangeSearch(vec, "[;;1]", expected);
+ expected.clear();
+ expected.put(12);
+ performRangeSearch(vec, "[;;-1]", expected);
+}
+
+void
+SearchContextTest::testRangeSearch()
+{
+ const uint32_t numDocs = 100;
+ const uint32_t numValues = 20;
+ const uint32_t numNibbleValues = 9;
+
+ { // IntegerAttribute
+ std::vector<largeint_t> values;
+ std::vector<largeint_t> nibbleValues;
+ largeint_t start = 1;
+
+ for (uint32_t i = 0; i < numValues; ++i) {
+ values.push_back(start + i);
+ }
+ for (uint32_t i = 0; i < numNibbleValues; ++i) {
+ nibbleValues.push_back(start + i);
+ }
+
+ for (const auto & cfg : _integerCfg) {
+ AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second);
+ testRangeSearch<IntegerAttribute, largeint_t>(ptr, numDocs, values);
+ }
+ { // CollectionType::ARRAY Flags.
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg);
+ testRangeSearch<IntegerAttribute, largeint_t>(ptr, numDocs, values);
+ }
+ {
+ Config cfg(BasicType::UINT4, CollectionType::SINGLE);
+ AttributePtr ptr = AttributeFactory::createAttribute("s-uint4", cfg);
+ testRangeSearch<IntegerAttribute, largeint_t>(ptr, numDocs, nibbleValues);
+ }
+ }
+
+ { // FloatingPointAttribute
+ std::vector<double> values;
+ double start = 1;
+
+ for (uint32_t i = 0; i < numValues; ++i) {
+ values.push_back(start + i);
+ }
+
+ for (const auto & cfg : _floatCfg) {
+ AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second);
+ testRangeSearch<FloatingPointAttribute, double>(ptr, numDocs, values);
+ }
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// Test case insensitive search
+//-----------------------------------------------------------------------------
+
+void
+SearchContextTest::performCaseInsensitiveSearch(const StringAttribute & vec, const vespalib::string & term,
+ const DocSet & expected)
+{
+ performSearch(vec, term, expected, QueryTermSimple::WORD);
+}
+
+void
+SearchContextTest::testCaseInsensitiveSearch(const AttributePtr & ptr)
+{
+ LOG(info, "testCaseInsensitiveSearch: vector '%s'", ptr->getName().c_str());
+
+ StringAttribute & vec = dynamic_cast<StringAttribute &>(*ptr.get());
+
+ uint32_t numDocs = 5 * 5;
+ addDocs(*ptr.get(), numDocs);
+
+ const char * terms[][5] = {
+ {"lower", "upper", "firstupper", "mixedcase", "intermixedcase"}, // lower
+ {"LOWER", "UPPER", "FIRSTUPPER", "MIXEDCASE", "INTERMIXEDCASE"}, // upper
+ {"Lower", "Upper", "Firstupper", "Mixedcase", "Intermixedcase"}, // firstUpper
+ {"Lower", "Upper", "FirstUpper", "MixedCase", "InterMixedCase"}, // mixedCase
+ {"lower", "upper", "firstUpper", "mixedCase", "interMixedCase"}, // interMixedCase
+ };
+
+ uint32_t doc = 1;
+ for (uint32_t j = 0; j < 5; ++j) {
+ for (uint32_t i = 0; i < 5; ++i) {
+ ASSERT_TRUE(doc < vec.getNumDocs());
+ EXPECT_TRUE(vec.update(doc++, terms[i][j]));
+ }
+ }
+
+ ptr->commit(true);
+
+ const char * buffer[1];
+ doc = 1;
+ for (uint32_t j = 0; j < 5; ++j) {
+ for (uint32_t i = 0; i < 5; ++i) {
+ EXPECT_EQUAL(ptr->get(doc++, buffer, 1), uint32_t(1));
+ EXPECT_EQUAL(vespalib::string(buffer[0]), vespalib::string(terms[i][j]));
+ }
+ }
+
+ DocSet empty;
+ for (uint32_t j = 0; j < 5; ++j) {
+ DocSet expected;
+ for (doc = j * 5 + 1; doc < (j + 1) * 5 + 1; ++doc) {
+ expected.insert(doc);
+ }
+ // for non-posting attributes only lower case search terms should give hits
+ performCaseInsensitiveSearch(vec, terms[0][j], expected);
+
+ if (ptr->getConfig().fastSearch()) {
+ for (uint32_t i = 1; i < 5; ++i) {
+ performCaseInsensitiveSearch(vec, terms[i][j], expected);
+ }
+ } else {
+ for (uint32_t i = 1; i < 4; ++i) {
+ performCaseInsensitiveSearch(vec, terms[i][j], empty);
+ }
+ }
+ }
+ performCaseInsensitiveSearch(vec, "none", empty);
+ performCaseInsensitiveSearch(vec, "NONE", empty);
+ performCaseInsensitiveSearch(vec, "None", empty);
+}
+
+void
+SearchContextTest::testRegexSearch(const AttributePtr & ptr)
+{
+ LOG(info, "testRegexSearch: vector '%s'", ptr->getName().c_str());
+
+ StringAttribute & vec = dynamic_cast<StringAttribute &>(*ptr.get());
+
+ uint32_t numDocs = 6;
+ addDocs(*ptr.get(), numDocs);
+
+ const char * strings [] = {"abc1def", "abc2Def", "abc2def", "abc4def", "abc5def", "abc6def"};
+ std::vector<const char *> terms = { "abc", "bc2de" };
+
+ for (uint32_t doc = 1; doc < numDocs + 1; ++doc) {
+ ASSERT_TRUE(doc < vec.getNumDocs());
+ EXPECT_TRUE(vec.update(doc, strings[doc - 1]));
+ }
+
+ ptr->commit(true);
+
+ std::vector<DocSet> expected;
+ DocSet empty;
+ {
+ uint32_t docs[] = {1, 2, 3, 4, 5, 6};
+ expected.push_back(DocSet(docs, docs + 6)); // "abc"
+ }
+ {
+ uint32_t docs[] = {2, 3};
+ expected.push_back(DocSet(docs, docs + 2)); // "bc2de"
+ }
+
+ for (uint32_t i = 0; i < terms.size(); ++i) {
+ performSearch(vec, terms[i], expected[i], QueryTermSimple::REGEXP);
+ performSearch(vec, terms[i], empty, QueryTermSimple::WORD);
+ }
+}
+
+
+void
+SearchContextTest::testCaseInsensitiveSearch()
+{
+ for (const auto & cfg : _stringCfg) {
+ testCaseInsensitiveSearch(AttributeFactory::createAttribute(cfg.first, cfg.second));
+ }
+}
+
+void
+SearchContextTest::testRegexSearch()
+{
+ for (const auto & cfg : _stringCfg) {
+ testRegexSearch(AttributeFactory::createAttribute(cfg.first, cfg.second));
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// Test prefix search
+//-----------------------------------------------------------------------------
+
+void
+SearchContextTest::performPrefixSearch(const StringAttribute & vec, const vespalib::string & term,
+ const DocSet & expected, QueryTermSimple::SearchTerm termType)
+{
+ performSearch(vec, term, expected, termType);
+}
+
+void
+SearchContextTest::testPrefixSearch(const AttributePtr & ptr)
+{
+ LOG(info, "testPrefixSearch: vector '%s'", ptr->getName().c_str());
+
+ StringAttribute & vec = dynamic_cast<StringAttribute &>(*ptr.get());
+
+ uint32_t numDocs = 6;
+ addDocs(*ptr.get(), numDocs);
+
+ const char * strings [] = {"prefixsearch", "PREFIXSEARCH", "PrefixSearch", "precommit", "PRECOMMIT", "PreCommit"};
+ const char * terms[][3] = {{"pre", "PRE", "Pre"}, {"pref", "PREF", "Pref"},
+ {"prec", "PREC", "PreC"}, {"prex", "PREX", "Prex"}};
+
+ for (uint32_t doc = 1; doc < numDocs + 1; ++doc) {
+ ASSERT_TRUE(doc < vec.getNumDocs());
+ EXPECT_TRUE(vec.update(doc, strings[doc - 1]));
+ }
+
+ ptr->commit(true);
+
+ std::vector<DocSet> expected;
+ DocSet empty;
+ {
+ uint32_t docs[] = {1, 2, 3, 4, 5, 6};
+ expected.push_back(DocSet(docs, docs + 6)); // "pre"
+ }
+ {
+ uint32_t docs[] = {1, 2, 3};
+ expected.push_back(DocSet(docs, docs + 3)); // "pref"
+ }
+ {
+ uint32_t docs[] = {4, 5, 6};
+ expected.push_back(DocSet(docs, docs + 3)); // "prec"
+ }
+ expected.push_back(DocSet()); // "prex"
+
+ for (uint32_t i = 0; i < 4; ++i) {
+ for (uint32_t j = 0; j < 3; ++j) {
+ if (j == 0 || ptr->getConfig().fastSearch()) {
+ performPrefixSearch(vec, terms[i][j], expected[i], QueryTermSimple::PREFIXTERM);
+ performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::WORD);
+ } else {
+ performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::PREFIXTERM);
+ performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::WORD);
+ }
+ }
+ }
+}
+
+
+void
+SearchContextTest::testPrefixSearch()
+{
+ for (const auto & cfg : _stringCfg) {
+ testPrefixSearch(AttributeFactory::createAttribute(cfg.first, cfg.second));
+ }
+}
+
+template <typename VectorType, typename ValueType>
+void
+SearchContextTest::requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name,
+ const Config & cfg,
+ ValueType startValue,
+ const vespalib::string & term)
+{
+ AttributePtr a = AttributeFactory::createAttribute(name, cfg);
+ LOG(info, "requireThatSearchIsWorkingAfterClearDoc: vector '%s', term '%s'",
+ a->getName().c_str(), term.c_str());
+ addReservedDoc(*a);
+ a->addDocs(4);
+ VectorType & v = dynamic_cast<VectorType &>(*a);
+ resetAttribute(v, startValue);
+ {
+ ResultSetPtr rs = performSearch(v, term);
+ EXPECT_EQUAL(4u, rs->getNumHits());
+ ASSERT_TRUE(4u == rs->getNumHits());
+ const RankedHit * array = rs->getArray();
+ EXPECT_EQUAL(1u, array[0]._docId);
+ EXPECT_EQUAL(2u, array[1]._docId);
+ EXPECT_EQUAL(3u, array[2]._docId);
+ EXPECT_EQUAL(4u, array[3]._docId);
+ }
+ a->clearDoc(1);
+ a->clearDoc(3);
+ a->commit(true);
+ {
+ ResultSetPtr rs = performSearch(v, term);
+ EXPECT_EQUAL(2u, rs->getNumHits());
+ const RankedHit * array = rs->getArray();
+ EXPECT_EQUAL(2u, array[0]._docId);
+ EXPECT_EQUAL(4u, array[1]._docId);
+ }
+}
+
+void
+SearchContextTest::requireThatSearchIsWorkingAfterClearDoc()
+{
+ for (const auto & cfg : _integerCfg) {
+ requireThatSearchIsWorkingAfterClearDoc<IntegerAttribute>(cfg.first, cfg.second, 10, "10");
+ requireThatSearchIsWorkingAfterClearDoc<IntegerAttribute>(cfg.first, cfg.second, 10, "<11");
+ }
+
+ for (const auto & cfg : _floatCfg) {
+ requireThatSearchIsWorkingAfterClearDoc<FloatingPointAttribute>(cfg.first, cfg.second, 10.5, "10.5");
+ requireThatSearchIsWorkingAfterClearDoc<FloatingPointAttribute>(cfg.first, cfg.second, 10.5, "<10.6");
+ }
+
+ for (const auto & cfg : _stringCfg) {
+ requireThatSearchIsWorkingAfterClearDoc<StringAttribute>(cfg.first, cfg.second, "start", "start");
+ }
+}
+
+template <typename VectorType, typename ValueType>
+void
+SearchContextTest::requireThatSearchIsWorkingAfterLoadAndClearDoc(const vespalib::string & name,
+ const Config & cfg,
+ ValueType startValue,
+ ValueType defaultValue,
+ const vespalib::string & term)
+{
+ AttributePtr a = AttributeFactory::createAttribute(name, cfg);
+ LOG(info, "requireThatSearchIsWorkingAfterLoadAndClearDoc: vector '%s', term '%s'",
+ a->getName().c_str(), term.c_str());
+ addReservedDoc(*a);
+ a->addDocs(15);
+ VectorType & va = dynamic_cast<VectorType &>(*a);
+ resetAttribute(va, startValue); // triggers vector vector in posting list (count 15)
+ AttributePtr b = AttributeFactory::createAttribute(name + "-save", cfg);
+ EXPECT_TRUE(a->saveAs(b->getBaseFileName()));
+ EXPECT_TRUE(b->load());
+ b->clearDoc(6); // goes from vector vector to single vector with count 14
+ b->commit(true);
+ {
+ ResultSetPtr rs = performSearch(dynamic_cast<VectorType &>(*b), term);
+ EXPECT_EQUAL(14u, rs->getNumHits());
+ const RankedHit * array = rs->getArray();
+ for (uint32_t i = 0; i < 14; ++i) {
+ if (i < 5) {
+ EXPECT_EQUAL(i + 1, array[i]._docId);
+ } else
+ EXPECT_EQUAL(i + 2, array[i]._docId);
+ }
+ }
+ ValueType buf;
+ if (cfg.collectionType().isMultiValue()) {
+ EXPECT_EQUAL(0u, b->get(6, &buf, 1));
+ } else {
+ EXPECT_EQUAL(1u, b->get(6, &buf, 1));
+ EXPECT_EQUAL(defaultValue, buf);
+ }
+}
+
+void
+SearchContextTest::requireThatSearchIsWorkingAfterLoadAndClearDoc()
+{
+ {
+ int64_t value = 10;
+ int64_t defValue = search::attribute::getUndefined<int32_t>();
+ requireThatSearchIsWorkingAfterLoadAndClearDoc<IntegerAttribute>("s-fs-int32", _integerCfg["s-fs-int32"],
+ value, defValue, "10");
+ requireThatSearchIsWorkingAfterLoadAndClearDoc<IntegerAttribute>("a-fs-int32", _integerCfg["a-fs-int32"],
+ value, defValue, "10");
+ }
+ {
+ vespalib::string value = "foo";
+ vespalib::string defValue = "";
+ requireThatSearchIsWorkingAfterLoadAndClearDoc<StringAttribute>("s-fs-str", _stringCfg["s-fs-str"],
+ value, defValue, value);
+ requireThatSearchIsWorkingAfterLoadAndClearDoc<StringAttribute>("a-fs-str", _stringCfg["a-fs-str"],
+ value, defValue, value);
+ }
+}
+
+template <typename VectorType, typename ValueType>
+void
+SearchContextTest::requireThatSearchIsWorkingAfterUpdates(const vespalib::string & name,
+ const Config & cfg,
+ ValueType value1,
+ ValueType value2)
+{
+ AttributePtr a = AttributeFactory::createAttribute(name, cfg);
+ VectorType & va = dynamic_cast<VectorType &>(*a);
+ LOG(info, "requireThatSearchIsWorkingAfterUpdates: vector '%s'", a->getName().c_str());
+ addReservedDoc(*a);
+ a->addDocs(2);
+ va.update(1, value1);
+ va.commit(true);
+ va.update(2, value1);
+ va.update(2, value2);
+ va.commit(true);
+ {
+ ResultSetPtr rs = performSearch(va, value1);
+ EXPECT_EQUAL(1u, rs->getNumHits()); // doc 1 should not have this value
+ }
+ {
+ ResultSetPtr rs = performSearch(va, value2);
+ EXPECT_EQUAL(1u, rs->getNumHits());
+ }
+}
+
+void
+SearchContextTest::requireThatSearchIsWorkingAfterUpdates()
+{
+ for (const auto & cfg : _integerCfg) {
+ requireThatSearchIsWorkingAfterUpdates<IntegerAttribute>(cfg.first, cfg.second, 10, 20);
+ }
+
+ for (const auto & cfg : _stringCfg) {
+ requireThatSearchIsWorkingAfterUpdates<StringAttribute>(cfg.first, cfg.second, "foo", "bar");
+ }
+}
+
+void
+SearchContextTest::requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded()
+{
+ LOG(info, "requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded()");
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ {
+ cfg.setGrowStrategy(GrowStrategy(1, 0, 1));
+ AttributePtr a = AttributeFactory::createAttribute("flags", cfg);
+ FlagAttribute & fa = dynamic_cast<FlagAttribute &>(*a);
+ addReservedDoc(fa);
+ fa.addDocs(1);
+ fa.append(1, 10, 1);
+ fa.append(1, 24, 1);
+ fa.commit(true);
+ fa.addDocs(1);
+ fa.append(2, 20, 1);
+ fa.append(2, 24, 1);
+ fa.commit(true);
+ fa.addDocs(1);
+ fa.append(3, 30, 1);
+ fa.append(3, 26, 1);
+ fa.commit(true);
+ fa.addDocs(1);
+ fa.append(4, 40, 1);
+ fa.append(4, 24, 1);
+ fa.commit(true);
+ {
+ ResultSetPtr rs = performSearch(fa, "<24");
+ EXPECT_EQUAL(2u, rs->getNumHits());
+ EXPECT_EQUAL(1u, rs->getArray()[0]._docId);
+ EXPECT_EQUAL(2u, rs->getArray()[1]._docId);
+ }
+ {
+ ResultSetPtr rs = performSearch(fa, "24");
+ EXPECT_EQUAL(3u, rs->getNumHits());
+ EXPECT_EQUAL(1u, rs->getArray()[0]._docId);
+ EXPECT_EQUAL(2u, rs->getArray()[1]._docId);
+ EXPECT_EQUAL(4u, rs->getArray()[2]._docId);
+ }
+ }
+ {
+ cfg.setGrowStrategy(GrowStrategy(4, 0, 4));
+ AttributePtr a = AttributeFactory::createAttribute("flags", cfg);
+ FlagAttribute & fa = dynamic_cast<FlagAttribute &>(*a);
+ std::vector<uint32_t> exp50;
+ std::vector<uint32_t> exp60;
+ addReservedDoc(fa);
+ for (uint32_t i = 0; i < 200; ++i) {
+ uint32_t docId;
+ EXPECT_TRUE(fa.addDoc(docId));
+ if (i % 2 == 0) {
+ fa.append(docId, 50, 1);
+ exp50.push_back(docId);
+ } else {
+ fa.append(docId, 60, 1);
+ exp60.push_back(docId);
+ }
+ fa.commit(true);
+ {
+ ResultSetPtr rs1 = performSearch(fa, "50");
+ ResultSetPtr rs2 = performSearch(fa, "<51");
+ EXPECT_EQUAL(exp50.size(), rs1->getNumHits());
+ EXPECT_EQUAL(exp50.size(), rs2->getNumHits());
+ for (size_t j = 0; j < exp50.size(); ++j) {
+ EXPECT_EQUAL(exp50[j], rs1->getArray()[j]._docId);
+ EXPECT_EQUAL(exp50[j], rs2->getArray()[j]._docId);
+ }
+ }
+ {
+ ResultSetPtr rs = performSearch(fa, "60");
+ EXPECT_EQUAL(exp60.size(), rs->getNumHits());
+ for (size_t j = 0; j < exp60.size(); ++j) {
+ EXPECT_EQUAL(exp60[j], rs->getArray()[j]._docId);
+ }
+ }
+ }
+ }
+}
+
+template <typename VectorType, typename ValueType>
+void
+SearchContextTest::requireThatInvalidSearchTermGivesZeroHits(const vespalib::string & name,
+ const Config & cfg,
+ ValueType value)
+{
+ AttributePtr a = AttributeFactory::createAttribute(name, cfg);
+ VectorType & va = dynamic_cast<VectorType &>(*a);
+ LOG(info, "requireThatInvalidSearchTermGivesZeroHits: vector '%s'", a->getName().c_str());
+ addReservedDoc(*a);
+ a->addDocs(1);
+ va.update(1, value);
+ va.commit(true);
+ ResultSetPtr rs = performSearch(va, "foo");
+ EXPECT_EQUAL(0u, rs->getNumHits());
+}
+
+void
+SearchContextTest::requireThatInvalidSearchTermGivesZeroHits()
+{
+ for (const auto & cfg : _integerCfg) {
+ requireThatInvalidSearchTermGivesZeroHits<IntegerAttribute>(cfg.first, cfg.second, 10);
+ }
+ for (const auto & cfg : _floatCfg) {
+ requireThatInvalidSearchTermGivesZeroHits<FloatingPointAttribute>(cfg.first, cfg.second, 10);
+ }
+}
+
+void
+SearchContextTest::requireThatFlagAttributeHandlesTheByteRange()
+{
+ LOG(info, "requireThatFlagAttributeHandlesTheByteRange()");
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+
+ AttributePtr a = AttributeFactory::createAttribute("flags", cfg);
+ FlagAttribute & fa = dynamic_cast<FlagAttribute &>(*a);
+ addReservedDoc(fa);
+ fa.addDocs(5);
+ fa.append(1, -128, 1);
+ fa.append(2, -64, 1);
+ fa.append(2, -8, 1);
+ fa.append(3, 0, 1);
+ fa.append(3, 8, 1);
+ fa.append(4, 64, 1);
+ fa.append(4, 24, 1);
+ fa.append(5, 127, 1);
+ fa.commit(true);
+
+ performSearch(fa, "-128", DocSet().put(1), QueryTermSimple::WORD);
+ performSearch(fa, "127", DocSet().put(5), QueryTermSimple::WORD);
+ performSearch(fa, ">-128", DocSet().put(2).put(3).put(4).put(5), QueryTermSimple::WORD);
+ performSearch(fa, "<127", DocSet().put(1).put(2).put(3).put(4), QueryTermSimple::WORD);
+ performSearch(fa, "[-128;-8]", DocSet().put(1).put(2), QueryTermSimple::WORD);
+ performSearch(fa, "[-8;8]", DocSet().put(2).put(3), QueryTermSimple::WORD);
+ performSearch(fa, "[8;127]", DocSet().put(3).put(4).put(5), QueryTermSimple::WORD);
+ performSearch(fa, "[-129;-8]", DocSet().put(1).put(2), QueryTermSimple::WORD);
+ performSearch(fa, "[8;128]", DocSet().put(3).put(4).put(5), QueryTermSimple::WORD);
+}
+
+void
+SearchContextTest::requireThatOutOfBoundsSearchTermGivesZeroHits(const vespalib::string &name,
+ const Config &cfg,
+ int64_t maxValue)
+{
+ AttributePtr a = AttributeFactory::createAttribute(name, cfg);
+ IntegerAttribute &ia = dynamic_cast<IntegerAttribute &>(*a);
+ addReservedDoc(*a);
+ a->addDocs(1);
+ ia.update(1, maxValue);
+ ia.commit(true);
+ vespalib::string term = vespalib::make_string("%" PRIu64 "", (int64_t) maxValue + 1);
+ LOG(info, "requireThatOutOfBoundsSearchTermGivesZeroHits: vector '%s', term '%s'", a->getName().c_str(), term.c_str());
+ ResultSetPtr rs = performSearch(ia, term);
+ EXPECT_EQUAL(0u, rs->getNumHits());
+}
+
+void
+SearchContextTest::requireThatOutOfBoundsSearchTermGivesZeroHits()
+{
+ for (const auto & cfg : _integerCfg) {
+ int32_t maxValue = std::numeric_limits<int32_t>::max();
+ requireThatOutOfBoundsSearchTermGivesZeroHits(cfg.first, cfg.second, maxValue);
+ }
+ {
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ int8_t maxValue = std::numeric_limits<int8_t>::max();
+ requireThatOutOfBoundsSearchTermGivesZeroHits("flags", cfg, maxValue);
+ }
+}
+
+
+void
+SearchContextTest::initIntegerConfig()
+{
+ { // CollectionType::SINGLE
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ _integerCfg["s-int32"] = cfg;
+ }
+ { // CollectionType::SINGLE && fastSearch
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ _integerCfg["s-fs-int32"] = cfg;
+ }
+ { // CollectionType::ARRAY
+ Config cfg(BasicType::INT32, CollectionType::ARRAY);
+ _integerCfg["a-int32"] = cfg;
+ }
+ { // CollectionType::ARRAY && fastSearch
+ Config cfg(BasicType::INT32, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ _integerCfg["a-fs-int32"] = cfg;
+ }
+ { // CollectionType::WSET
+ Config cfg(BasicType::INT32, CollectionType::WSET);
+ _integerCfg["w-int32"] = cfg;
+ }
+ { // CollectionType::WSET && fastSearch
+ Config cfg(BasicType::INT32, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ _integerCfg["w-fs-int32"] = cfg;
+ }
+}
+
+void
+SearchContextTest::initFloatConfig()
+{
+ { // CollectionType::SINGLE
+ Config cfg(BasicType::FLOAT, CollectionType::SINGLE);
+ _floatCfg["s-float"] = cfg;
+ }
+ { // CollectionType::SINGLE && fastSearch
+ Config cfg(BasicType::FLOAT, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ _floatCfg["s-fs-float"] = cfg;
+ }
+ { // CollectionType::ARRAY
+ Config cfg(BasicType::FLOAT, CollectionType::ARRAY);
+ _floatCfg["a-float"] = cfg;
+ }
+ { // CollectionType::ARRAY && fastSearch
+ Config cfg(BasicType::FLOAT, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ _floatCfg["a-fs-float"] = cfg;
+ }
+ { // CollectionType::WSET
+ Config cfg(BasicType::FLOAT, CollectionType::WSET);
+ _floatCfg["w-float"] = cfg;
+ }
+ { // CollectionType::WSET && fastSearch
+ Config cfg(BasicType::FLOAT, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ _floatCfg["w-fs-float"] = cfg;
+ }
+}
+
+void
+SearchContextTest::initStringConfig()
+{
+ { // CollectionType::SINGLE
+ Config cfg(BasicType::STRING, CollectionType::SINGLE);
+ _stringCfg["s-str"] = cfg;
+ }
+ { // CollectionType::ARRAY
+ Config cfg(BasicType::STRING, CollectionType::ARRAY);
+ _stringCfg["a-str"] = cfg;
+ }
+ { // CollectionType::WSET
+ Config cfg(BasicType::STRING, CollectionType::WSET);
+ _stringCfg["w-str"] = cfg;
+ }
+ { // CollectionType::SINGLE && fastSearch
+ Config cfg(BasicType::STRING, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ _stringCfg["s-fs-str"] = cfg;
+ }
+ { // CollectionType::ARRAY && fastSearch
+ Config cfg(BasicType::STRING, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ _stringCfg["a-fs-str"] = cfg;
+ }
+ { // CollectionType::WSET && fastSearch
+ Config cfg(BasicType::STRING, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ _stringCfg["w-fs-str"] = cfg;
+ }
+}
+
+SearchContextTest::SearchContextTest() :
+ _integerCfg(),
+ _floatCfg(),
+ _stringCfg()
+{
+ initIntegerConfig();
+ initFloatConfig();
+ initStringConfig();
+}
+
+int
+SearchContextTest::Main()
+{
+ TEST_INIT("searchcontext_test");
+ EXPECT_TRUE(true);
+
+ testSearch();
+ testInitRange();
+ testRangeSearch();
+ testRangeSearchLimited();
+ testCaseInsensitiveSearch();
+ testRegexSearch();
+ testPrefixSearch();
+ testSearchIterator();
+ testSearchIteratorUnpacking();
+ TEST_DO(requireThatSearchIsWorkingAfterClearDoc());
+ TEST_DO(requireThatSearchIsWorkingAfterLoadAndClearDoc());
+ TEST_DO(requireThatSearchIsWorkingAfterUpdates());
+ TEST_DO(requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded());
+ TEST_DO(requireThatInvalidSearchTermGivesZeroHits());
+ TEST_DO(requireThatFlagAttributeHandlesTheByteRange());
+ TEST_DO(requireThatOutOfBoundsSearchTermGivesZeroHits());
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::SearchContextTest);
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh
new file mode 100755
index 00000000000..3aae4bfe4d5
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+$VALGRIND ./searchlib_searchcontext_test_app
+rm -rf *.dat
+rm -rf *.idx
+rm -rf *.weight
diff --git a/searchlib/src/tests/attribute/sourceselector/.gitignore b/searchlib/src/tests/attribute/sourceselector/.gitignore
new file mode 100644
index 00000000000..265c856fd01
--- /dev/null
+++ b/searchlib/src/tests/attribute/sourceselector/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+sourceselector_test
+searchlib_sourceselector_test_app
diff --git a/searchlib/src/tests/attribute/sourceselector/CMakeLists.txt b/searchlib/src/tests/attribute/sourceselector/CMakeLists.txt
new file mode 100644
index 00000000000..24b7a75dd07
--- /dev/null
+++ b/searchlib/src/tests/attribute/sourceselector/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_sourceselector_test_app
+ SOURCES
+ sourceselector_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_sourceselector_test_app COMMAND searchlib_sourceselector_test_app)
diff --git a/searchlib/src/tests/attribute/sourceselector/DESC b/searchlib/src/tests/attribute/sourceselector/DESC
new file mode 100644
index 00000000000..7568f5de080
--- /dev/null
+++ b/searchlib/src/tests/attribute/sourceselector/DESC
@@ -0,0 +1 @@
+This is a test of the sourceselector interface.
diff --git a/searchlib/src/tests/attribute/sourceselector/FILES b/searchlib/src/tests/attribute/sourceselector/FILES
new file mode 100644
index 00000000000..0d2803e762d
--- /dev/null
+++ b/searchlib/src/tests/attribute/sourceselector/FILES
@@ -0,0 +1 @@
+sourceselector.cpp
diff --git a/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp
new file mode 100644
index 00000000000..a3595f8724d
--- /dev/null
+++ b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp
@@ -0,0 +1,216 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for sourceselector.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("sourceselector_test");
+
+#include <vespa/searchlib/attribute/fixedsourceselector.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using std::unique_ptr;
+using std::string;
+using namespace search;
+using namespace search::queryeval;
+using search::common::FileHeaderContext;
+using search::index::DummyFileHeaderContext;
+
+namespace {
+template <typename T, size_t N> size_t arraysize(const T (&)[N]) { return N; }
+
+const uint32_t maxDocId = 4096;
+struct DocSource { uint32_t docId; uint8_t source; };
+const DocSource docs[] = { {0,1}, {1, 0}, {2, 2}, {4, 3}, {8, 9}, {16, 178},
+ {32, 1}, {64, 2}, {128, 3}, {256,4}, {512, 2},
+ {1024, 1}, {2048,5}, {maxDocId,1} };
+const string index_dir = "test_data";
+const string base_file_name = "test_data/sourcelist";
+const string base_file_name2 = "test_data/sourcelist2";
+const uint32_t default_source = 7;
+const uint32_t base_id = 42;
+
+class Test : public vespalib::TestApp
+{
+public:
+ int Main();
+private:
+ void testSourceSelector(const DocSource *docSource, size_t sz, uint8_t defaultSource, ISourceSelector & selector);
+ void testFixed(const DocSource *docSource, size_t sz);
+ template <typename SelectorType>
+ void requireThatSelectorCanCloneAndSubtract();
+ void requireThatSelectorCanCloneAndSubtract();
+ template <typename SelectorType>
+ void requireThatSelectorCanSaveAndLoad();
+ void requireThatSelectorCanSaveAndLoad();
+ template <typename SelectorType>
+ void requireThatCompleteSourceRangeIsHandled();
+ void requireThatCompleteSourceRangeIsHandled();
+ template <typename SelectorType>
+ void requireThatSourcesAreCountedCorrectly();
+ void requireThatSourcesAreCountedCorrectly();
+};
+
+int
+Test::Main()
+{
+ TEST_INIT("sourceselector_test");
+
+ if (_argc > 0) {
+ DummyFileHeaderContext::setCreator(_argv[0]);
+ }
+ testFixed(docs, arraysize(docs));
+ TEST_DO(requireThatSelectorCanCloneAndSubtract());
+ TEST_DO(requireThatSelectorCanSaveAndLoad());
+ TEST_DO(requireThatCompleteSourceRangeIsHandled());
+ TEST_DO(requireThatSourcesAreCountedCorrectly());
+
+ TEST_DONE();
+}
+
+void setSources(ISourceSelector &selector) {
+ for (size_t i = 0; i < arraysize(docs); ++i) {
+ selector.setSource(docs[i].docId, docs[i].source);
+ }
+}
+
+void Test::testFixed(const DocSource *docSource, size_t sz)
+{
+ FixedSourceSelector selector(default_source, base_file_name, 10);
+ EXPECT_EQUAL(default_source, selector.getDefaultSource());
+ EXPECT_EQUAL(10u, selector.getDocIdLimit());
+// EXPECT_EQUAL(default_source, selector.createIterator()->getSource(maxDocId + 1));
+ setSources(selector);
+ testSourceSelector(docSource, sz, selector.getDefaultSource(), selector);
+ EXPECT_EQUAL(maxDocId+1, selector.getDocIdLimit());
+}
+
+void Test::testSourceSelector(const DocSource *docSource, size_t sz,
+ uint8_t defaultSource, ISourceSelector &selector)
+{
+ {
+ ISourceSelector::Iterator::UP it(selector.createIterator());
+ for (size_t i = 0; i < sz; ++i) {
+ EXPECT_EQUAL(docSource[i].source, it->getSource(docSource[i].docId));
+ }
+ }
+ {
+ ISourceSelector::Iterator::UP it(selector.createIterator());
+ for (size_t i = 0, j = 0; i <= docSource[sz - 1].docId; ++i) {
+ if (i != docSource[j].docId) {
+ EXPECT_EQUAL(defaultSource, it->getSource(i));
+ } else {
+ EXPECT_EQUAL(docSource[j].source, it->getSource(i));
+ ++j;
+ }
+ }
+ }
+}
+
+template <typename SelectorType>
+void
+Test::requireThatSelectorCanCloneAndSubtract()
+{
+ SelectorType selector(default_source, base_file_name);
+ setSources(selector);
+ selector.setBaseId(base_id);
+
+ const uint32_t diff = 3;
+ typename SelectorType::UP
+ new_selector(selector.cloneAndSubtract(base_file_name2, diff));
+ EXPECT_EQUAL(default_source - diff, new_selector->getDefaultSource());
+ EXPECT_EQUAL(base_id + diff, new_selector->getBaseId());
+ EXPECT_EQUAL(maxDocId+1, new_selector->getDocIdLimit());
+
+ ISourceSelector::Iterator::UP it(new_selector->createIterator());
+ for(size_t i = 0; i < arraysize(docs); ++i) {
+ if (docs[i].source > diff) {
+ EXPECT_EQUAL(docs[i].source - diff, it->getSource(docs[i].docId));
+ } else {
+ EXPECT_EQUAL(0, it->getSource(docs[i].docId));
+ }
+ }
+}
+
+void
+Test::requireThatSelectorCanCloneAndSubtract()
+{
+ requireThatSelectorCanCloneAndSubtract<FixedSourceSelector>();
+}
+
+template <typename SelectorType>
+void
+Test::requireThatSelectorCanSaveAndLoad()
+{
+ SelectorType selector(default_source, base_file_name2);
+ setSources(selector);
+ selector.setBaseId(base_id);
+ selector.setSource(maxDocId + 1, default_source);
+
+ FastOS_FileInterface::EmptyAndRemoveDirectory(index_dir.c_str());
+ FastOS_FileInterface::MakeDirIfNotPresentOrExit(index_dir.c_str());
+
+ SourceSelector::SaveInfo::UP save_info =
+ selector.extractSaveInfo(base_file_name);
+ save_info->save(TuneFileAttributes(), DummyFileHeaderContext());
+ typename SelectorType::UP
+ selector2(SelectorType::load(base_file_name));
+ testSourceSelector(docs, arraysize(docs), default_source, *selector2);
+ EXPECT_EQUAL(base_id, selector2->getBaseId());
+ EXPECT_EQUAL(maxDocId + 2, selector2->getDocIdLimit());
+
+ FastOS_FileInterface::EmptyAndRemoveDirectory(index_dir.c_str());
+}
+
+void
+Test::requireThatSelectorCanSaveAndLoad()
+{
+ requireThatSelectorCanSaveAndLoad<FixedSourceSelector>();
+}
+
+template <typename SelectorType>
+void
+Test::requireThatCompleteSourceRangeIsHandled()
+{
+ SelectorType selector(default_source, base_file_name);
+ for (uint32_t i = 0; i < ISourceSelector::SOURCE_LIMIT; ++i) {
+ selector.setSource(i, i);
+ }
+ ISourceSelector::Iterator::UP itr = selector.createIterator();
+ for (uint32_t i = 0; i < ISourceSelector::SOURCE_LIMIT; ++i) {
+ EXPECT_EQUAL((queryeval::Source)i, itr->getSource(i));
+ }
+}
+
+void
+Test::requireThatCompleteSourceRangeIsHandled()
+{
+ requireThatCompleteSourceRangeIsHandled<FixedSourceSelector>();
+}
+
+template <typename SelectorType>
+void
+Test::requireThatSourcesAreCountedCorrectly()
+{
+ SelectorType selector(default_source, base_file_name);
+ for (uint32_t i = 0; i < 256; ++i) {
+ selector.setSource(i, i%16);
+ }
+ SourceSelector::Histogram hist = selector.getDistribution();
+ for (uint32_t i = 0; i < 16; ++i) {
+ EXPECT_EQUAL(16u, hist[i]);
+ }
+ for (uint32_t i = 16; i < 256; ++i) {
+ EXPECT_EQUAL(0u, hist[i]);
+ }
+}
+
+void
+Test::requireThatSourcesAreCountedCorrectly()
+{
+ requireThatSourcesAreCountedCorrectly<FixedSourceSelector>();
+}
+
+} // namespace
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/attribute/stringattribute/.gitignore b/searchlib/src/tests/attribute/stringattribute/.gitignore
new file mode 100644
index 00000000000..0e8a04bc19d
--- /dev/null
+++ b/searchlib/src/tests/attribute/stringattribute/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+stringattribute_test
+searchlib_stringattribute_test_app
diff --git a/searchlib/src/tests/attribute/stringattribute/CMakeLists.txt b/searchlib/src/tests/attribute/stringattribute/CMakeLists.txt
new file mode 100644
index 00000000000..032ce9cac4e
--- /dev/null
+++ b/searchlib/src/tests/attribute/stringattribute/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_stringattribute_test_app
+ SOURCES
+ stringattribute_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_stringattribute_test_app COMMAND sh stringattribute_test.sh)
diff --git a/searchlib/src/tests/attribute/stringattribute/DESC b/searchlib/src/tests/attribute/stringattribute/DESC
new file mode 100644
index 00000000000..5d94ab94325
--- /dev/null
+++ b/searchlib/src/tests/attribute/stringattribute/DESC
@@ -0,0 +1 @@
+Unit tests for SingleValueStringAttribute and MultiValueStringAttribute.
diff --git a/searchlib/src/tests/attribute/stringattribute/FILES b/searchlib/src/tests/attribute/stringattribute/FILES
new file mode 100644
index 00000000000..e68ef57177d
--- /dev/null
+++ b/searchlib/src/tests/attribute/stringattribute/FILES
@@ -0,0 +1 @@
+stringattribute.cpp
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
new file mode 100644
index 00000000000..154340ba408
--- /dev/null
+++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
@@ -0,0 +1,453 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("stringattribute_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/attribute/enumstore.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/singlestringpostattribute.h>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/attribute/multistringpostattribute.h>
+
+#include <vespa/searchlib/attribute/enumstore.hpp>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/singlestringpostattribute.hpp>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/attribute/multistringpostattribute.hpp>
+
+namespace search {
+
+using attribute::CollectionType;
+using attribute::IAttributeVector;
+
+class StringAttributeTest : public vespalib::TestApp
+{
+private:
+ typedef ArrayStringAttribute ArrayStr;
+ typedef WeightedSetStringAttribute WeightedSetStr;
+ typedef ArrayStringPostingAttribute ArrayStrPosting;
+ typedef WeightedSetStringPostingAttribute WeightedSetStrPosting;
+ typedef attribute::Config Config;
+ typedef attribute::BasicType BasicType;
+
+ template <typename Attribute>
+ void addDocs(Attribute & vec, uint32_t numDocs);
+ template <typename Attribute>
+ void checkCount(Attribute & vec, uint32_t doc, uint32_t valueCount,
+ uint32_t numValues, const vespalib::string & value);
+ void testMultiValue();
+ template <typename Attribute>
+ void testMultiValue(Attribute & attr, uint32_t numDocs);
+ void testMultiValueMultipleClearDocBetweenCommit();
+ void testMultiValueRemove();
+ void testSingleValue();
+ void testDefaultValueOnAddDoc(AttributeVector & v);
+ template <typename Attribute>
+ void testSingleValue(Attribute & svsa, Config &cfg);
+
+public:
+ int Main();
+};
+
+template <typename Attribute>
+void
+StringAttributeTest::addDocs(Attribute & vec, uint32_t numDocs)
+{
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ typename Attribute::DocId doc;
+ EXPECT_TRUE(vec.addDoc(doc));
+ EXPECT_TRUE(doc == i);
+ EXPECT_TRUE(vec.getNumDocs() == i + 1);
+ EXPECT_TRUE(vec.getValueCount(doc) == 0);
+ }
+ EXPECT_TRUE(vec.getNumDocs() == numDocs);
+}
+
+template <typename Attribute>
+void
+StringAttributeTest::checkCount(Attribute & vec, uint32_t doc, uint32_t valueCount,
+ uint32_t numValues, const vespalib::string & value)
+{
+ std::vector<vespalib::string> buffer(valueCount);
+ EXPECT_TRUE(static_cast<uint32_t>(vec.getValueCount(doc)) == valueCount);
+ EXPECT_TRUE(vec.get(doc, &buffer[0], buffer.size()) == valueCount);
+ EXPECT_TRUE(std::count(buffer.begin(), buffer.end(), value) == numValues);
+}
+
+
+void
+StringAttributeTest::testMultiValue()
+{
+ uint32_t numDocs = ArrayStr::MultiValueMapping::maxValues() + 1;
+
+ { // Array String Attribute
+ ASSERT_TRUE(ArrayStr::MultiValueMapping::maxValues() == numDocs - 1);
+ ArrayStr attr("a-string");
+ testMultiValue(attr, numDocs);
+ }
+ { // Weighted Set String Attribute
+ ASSERT_TRUE(WeightedSetStr::MultiValueMapping::maxValues() == numDocs - 1);
+ WeightedSetStr attr("ws-string",
+ Config(BasicType::STRING, CollectionType::WSET));
+ testMultiValue(attr, numDocs);
+ }
+ { // Array String Posting Attribute
+ ASSERT_TRUE(ArrayStrPosting::MultiValueMapping::maxValues() == numDocs - 1);
+ Config cfg(BasicType::STRING, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ ArrayStrPosting attr("a-fs-string", cfg);
+ testMultiValue(attr, numDocs);
+ }
+ { // Weighted Set String Posting Attribute
+ ASSERT_TRUE(WeightedSetStrPosting::MultiValueMapping::maxValues() == numDocs - 1);
+ Config cfg(BasicType::STRING, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ WeightedSetStrPosting attr("ws-fs-string", cfg);
+ testMultiValue(attr, numDocs);
+ }
+
+}
+
+
+template <typename Attribute>
+void
+StringAttributeTest::testMultiValue(Attribute & attr, uint32_t numDocs)
+{
+ EXPECT_TRUE(attr.getNumDocs() == 0);
+
+ // generate two sets of unique strings
+ std::vector<vespalib::string> uniqueStrings;
+ uniqueStrings.reserve(numDocs - 1);
+ for (uint32_t i = 0; i < numDocs - 1; ++i) {
+ char unique[16];
+ sprintf(unique, i < 10 ? "enum0%u" : "enum%u", i);
+ uniqueStrings.push_back(vespalib::string(unique));
+ }
+ std::vector<vespalib::string> newUniques;
+ newUniques.reserve(numDocs - 1);
+ for (uint32_t i = 0; i < numDocs - 1; ++i) {
+ char unique[16];
+ sprintf(unique, i < 10 ? "unique0%u" : "unique%u", i);
+ newUniques.push_back(vespalib::string(unique));
+ }
+
+ // add docs
+ addDocs(attr, numDocs);
+
+ // insert values
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t valueCount = doc;
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(attr.append(doc, uniqueStrings[j], 1));
+ }
+ attr.commit();
+ }
+
+ //attr.getEnumStore().printCurrentContent();
+
+ // check values and enums
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t valueCount = attr.getValueCount(doc);
+ EXPECT_TRUE(valueCount == doc);
+
+ // test get first
+ if (valueCount == 0) {
+ EXPECT_TRUE(attr.get(doc) == NULL);
+ EXPECT_TRUE(attr.getEnum(doc) == std::numeric_limits<uint32_t>::max());
+ } else {
+ EXPECT_TRUE(strcmp(attr.get(doc), uniqueStrings[0].c_str()) == 0);
+ uint32_t e;
+ EXPECT_TRUE(attr.findEnum(uniqueStrings[0].c_str(), e));
+ EXPECT_TRUE(attr.getEnum(doc) == e);
+ }
+
+ // test get all
+ std::vector<vespalib::string> values(valueCount);
+ EXPECT_TRUE(attr.get(doc, &values[0], valueCount) == valueCount);
+
+ std::vector<uint32_t> enums(valueCount);
+ EXPECT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, &enums[0], valueCount) == valueCount);
+
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ //LOG(info, "doc[%u][%u] = %s", doc, j, values[j].c_str());
+ EXPECT_TRUE(values[j] == uniqueStrings[j]);
+ uint32_t e = 100;
+ EXPECT_TRUE(attr.findEnum(values[j].c_str(), e));
+ EXPECT_TRUE(enums[j] == e);
+ }
+ }
+
+ // check for correct refcounts
+ for (uint32_t i = 0; i < uniqueStrings.size(); ++i) {
+ typename Attribute::EnumStore::Index idx;
+ EXPECT_TRUE(attr.getEnumStore().findIndex(uniqueStrings[i].c_str(), idx));
+ uint32_t expectedUsers = numDocs - 1 - i;
+ EXPECT_EQUAL(expectedUsers, attr.getEnumStore().getRefCount(idx));
+ }
+
+ typename Attribute::Histogram remaining = attr.getMultiValueMapping().getRemaining();
+ for (typename Attribute::Histogram::const_iterator it(remaining.begin()), mt(remaining.end()); it != mt; ++it) {
+ EXPECT_TRUE(it->second == 0);
+ }
+
+ // clear and insert new unique strings
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t oldValueCount = doc;
+ uint32_t valueCount = numDocs - 1 - doc;
+ //LOG(info, "clear and insert: doc = %u, valueCount = %u", doc, valueCount);
+ EXPECT_TRUE(attr.clearDoc(doc) == oldValueCount);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(attr.append(doc, newUniques[j], 1));
+ }
+ attr.commit();
+
+ //attr.getEnumStore().printCurrentContent();
+ }
+
+ // check values and enums
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t valueCount = attr.getValueCount(doc);
+ uint32_t expectedValueCount = numDocs - 1 - doc;
+ EXPECT_TRUE(valueCount == expectedValueCount);
+
+ // test get all
+ std::vector<vespalib::string> values(valueCount);
+ EXPECT_TRUE(attr.get(doc, &values[0], valueCount) == valueCount);
+
+ std::vector<uint32_t> enums(valueCount);
+ EXPECT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, &enums[0], valueCount) == valueCount);
+
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ //LOG(info, "doc[%u][%u] = %s", doc, j, values[j].c_str());
+ EXPECT_TRUE(values[j] == newUniques[j]);
+ uint32_t e = 100;
+ EXPECT_TRUE(attr.findEnum(values[j].c_str(), e));
+ EXPECT_TRUE(enums[j] == e);
+ }
+ }
+
+ // check that enumXX strings are removed
+ for (uint32_t i = 0; i < uniqueStrings.size(); ++i) {
+ uint32_t e;
+ EXPECT_TRUE(!attr.findEnum(uniqueStrings[i].c_str(), e));
+ }
+
+ // check for correct refcounts
+ for (uint32_t i = 0; i < newUniques.size(); ++i) {
+ typename Attribute::EnumStore::Index idx;
+ EXPECT_TRUE(attr.getEnumStore().findIndex(newUniques[i].c_str(), idx));
+ uint32_t expectedUsers = numDocs - 1 - i;
+ EXPECT_EQUAL(expectedUsers, attr.getEnumStore().getRefCount(idx));
+ }
+}
+
+void
+StringAttributeTest::testMultiValueMultipleClearDocBetweenCommit()
+{
+ // This is also tested for all array attributes in attribute unit test
+ ArrayStr mvsa("a-string");
+ uint32_t numDocs = 50;
+ addDocs(mvsa, numDocs);
+ std::vector<vespalib::string> buffer(numDocs);
+
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t valueCount = doc;
+ EXPECT_TRUE(mvsa.clearDoc(doc) == 0);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(mvsa.append(doc, "first", 1));
+ }
+ EXPECT_TRUE(mvsa.clearDoc(doc) == 0);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(mvsa.append(doc, "second", 1));
+ }
+ mvsa.commit();
+
+ // check for correct values
+ checkCount(mvsa, doc, valueCount, valueCount, "second");
+ }
+}
+
+
+void
+StringAttributeTest::testMultiValueRemove()
+{
+ // This is also tested for all array attributes in attribute unit test
+ ArrayStr mvsa("a-string");
+ uint32_t numDocs = 50;
+ addDocs(mvsa, numDocs);
+ std::vector<vespalib::string> buffer(9);
+
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ EXPECT_TRUE(mvsa.append(doc, "one", 1));
+ for (uint32_t i = 0; i < 3; ++i) {
+ EXPECT_TRUE(mvsa.append(doc, "three", 1));
+ }
+ for (uint32_t i = 0; i < 5; ++i) {
+ EXPECT_TRUE(mvsa.append(doc, "five", 1));
+ }
+
+ mvsa.commit();
+ checkCount(mvsa, doc, 9, 1, "one");
+ checkCount(mvsa, doc, 9, 3, "three");
+ checkCount(mvsa, doc, 9, 5, "five");
+
+ EXPECT_TRUE(mvsa.remove(doc, "zero", 1));
+ mvsa.commit();
+ checkCount(mvsa, doc, 9, 1, "one");
+ checkCount(mvsa, doc, 9, 3, "three");
+ checkCount(mvsa, doc, 9, 5, "five");
+
+ EXPECT_TRUE(mvsa.remove(doc, "one", 1));
+ mvsa.commit();
+ checkCount(mvsa, doc, 8, 0, "one");
+ checkCount(mvsa, doc, 8, 3, "three");
+ checkCount(mvsa, doc, 8, 5, "five");
+
+ EXPECT_TRUE(mvsa.remove(doc, "five", 1));
+ mvsa.commit();
+ checkCount(mvsa, doc, 3, 0, "one");
+ checkCount(mvsa, doc, 3, 3, "three");
+ checkCount(mvsa, doc, 3, 0, "five");
+ }
+}
+
+void
+StringAttributeTest::testSingleValue()
+{
+ {
+ Config cfg(BasicType::STRING, CollectionType::SINGLE);
+ SingleValueStringAttribute svsa("svsa", cfg);
+ const IAttributeVector * ia = &svsa;
+ EXPECT_TRUE(dynamic_cast<const SingleValueEnumAttributeBase *>(ia) != nullptr);
+ testSingleValue(svsa, cfg);
+
+ SingleValueStringAttribute svsb("svsa", cfg);
+ testDefaultValueOnAddDoc(svsb);
+ }
+ {
+ Config cfg(BasicType::STRING, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ SingleValueStringPostingAttribute svsa("svspb", cfg);
+ testSingleValue(svsa, cfg);
+
+ SingleValueStringPostingAttribute svsb("svspb", cfg);
+ testDefaultValueOnAddDoc(svsb);
+ }
+}
+
+void StringAttributeTest::testDefaultValueOnAddDoc(AttributeVector & v)
+{
+ EXPECT_EQUAL(0u, v.getNumDocs());
+ v.addReservedDoc();
+ EXPECT_EQUAL(1u, v.getNumDocs());
+ EXPECT_TRUE( EnumStoreBase::Index(v.getEnum(0)).valid() );
+ uint32_t doc(7);
+ EXPECT_TRUE( v.addDoc(doc) );
+ EXPECT_EQUAL(1u, doc);
+ EXPECT_EQUAL(2u, v.getNumDocs());
+ EXPECT_TRUE( EnumStoreBase::Index(v.getEnum(doc)).valid() );
+ EXPECT_EQUAL(0u, strlen(v.getString(doc, NULL, 0)));
+}
+
+template <typename Attribute>
+void
+StringAttributeTest::testSingleValue(Attribute & svsa, Config &cfg)
+{
+ StringAttribute & v = svsa;
+ const char * t = "not defined";
+ uint32_t doc = 2000;
+ uint32_t e1 = 2000;
+ uint32_t e2 = 2000;
+ uint32_t numDocs = 1000;
+ char tmp[32];
+
+ // add docs
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ EXPECT_TRUE( v.addDoc(doc) );
+ EXPECT_TRUE( doc == i );
+ EXPECT_TRUE( v.getNumDocs() == i + 1 );
+ EXPECT_TRUE( v.getValueCount(doc) == 1 );
+ EXPECT_TRUE( ! EnumStoreBase::Index(v.getEnum(doc)).valid() );
+ }
+
+ std::map<vespalib::string, uint32_t> enums;
+ // 10 unique strings
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ sprintf(tmp, "enum%u", i % 10);
+ EXPECT_TRUE( v.update(i, tmp) );
+ EXPECT_TRUE( v.getValueCount(i) == 1 );
+ EXPECT_TRUE( ! EnumStoreBase::Index(v.getEnum(i)).valid() );
+ if ((i % 10) == 9) {
+ v.commit();
+ for (uint32_t j = i - 9; j <= i; ++j) {
+ sprintf(tmp, "enum%u", j % 10);
+ EXPECT_TRUE( strcmp(t = v.get(j), tmp) == 0 );
+ e1 = v.getEnum(j);
+ EXPECT_TRUE( v.findEnum(t, e2) );
+ EXPECT_TRUE( e1 == e2 );
+ if (enums.count(vespalib::string(t)) == 0) {
+ enums[vespalib::string(t)] = e1;
+ } else {
+ EXPECT_TRUE( e1 == enums[vespalib::string(t)]);
+ EXPECT_TRUE( e2 == enums[vespalib::string(t)]);
+ }
+ }
+ }
+ }
+
+ //svsa.printBuffers();
+
+ // 1000 unique strings
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ sprintf(tmp, "unique%u", i);
+ EXPECT_TRUE( v.update(i, tmp) );
+ sprintf(tmp, "enum%u", i % 10);
+ EXPECT_TRUE( strcmp(v.get(i), tmp) == 0 );
+ if ((i % 10) == 9) {
+ //LOG(info, "commit: i = %u", i);
+ v.commit();
+ for (uint32_t j = i - 9; j <= i; ++j) {
+ sprintf(tmp, "unique%u", j);
+ EXPECT_TRUE( strcmp(t = v.get(j), tmp) == 0 );
+ e1 = v.getEnum(j);
+ EXPECT_TRUE( v.findEnum(t, e2) );
+ EXPECT_TRUE( e1 == e2 );
+ }
+ //svsa.printBuffers();
+ }
+ }
+ //svsa.printBuffers();
+
+ // check that enumX strings are removed (
+ for (uint32_t i = 0; i < 10; ++i) {
+ sprintf(tmp, "enum%u", i);
+ EXPECT_TRUE( !v.findEnum(tmp, e1) );
+ }
+
+
+ Attribute load("load", cfg);
+ svsa.saveAs(load.getBaseFileName());
+ load.load();
+}
+
+
+
+int
+StringAttributeTest::Main()
+{
+ TEST_INIT("stringattribute_test");
+
+ testMultiValue();
+
+ testMultiValueMultipleClearDocBetweenCommit();
+
+ testMultiValueRemove();
+
+ testSingleValue();
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::StringAttributeTest);
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh
new file mode 100755
index 00000000000..d7ac263c1c9
--- /dev/null
+++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+$VALGRIND ./searchlib_stringattribute_test_app
+rm -rf *.dat
diff --git a/searchlib/src/tests/attribute/tensorattribute/.gitignore b/searchlib/src/tests/attribute/tensorattribute/.gitignore
new file mode 100644
index 00000000000..08519fe7ae8
--- /dev/null
+++ b/searchlib/src/tests/attribute/tensorattribute/.gitignore
@@ -0,0 +1 @@
+searchlib_tensorattribute_test_app
diff --git a/searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt b/searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt
new file mode 100644
index 00000000000..ec16b4363eb
--- /dev/null
+++ b/searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_tensorattribute_test_app
+ SOURCES
+ tensorattribute_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_tensorattribute_test_app COMMAND sh tensorattribute_test.sh)
diff --git a/searchlib/src/tests/attribute/tensorattribute/DESC b/searchlib/src/tests/attribute/tensorattribute/DESC
new file mode 100644
index 00000000000..1cd9aa7cf14
--- /dev/null
+++ b/searchlib/src/tests/attribute/tensorattribute/DESC
@@ -0,0 +1 @@
+Unit tests for TensorAttribute.
diff --git a/searchlib/src/tests/attribute/tensorattribute/FILES b/searchlib/src/tests/attribute/tensorattribute/FILES
new file mode 100644
index 00000000000..1c8480ffde7
--- /dev/null
+++ b/searchlib/src/tests/attribute/tensorattribute/FILES
@@ -0,0 +1 @@
+tensorattribute.cpp
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
new file mode 100644
index 00000000000..137f93bcffe
--- /dev/null
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -0,0 +1,217 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("tensorattribute_test");
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/attribute/tensorattribute.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/vespalib/tensor/tensor_factory.h>
+#include <vespa/vespalib/tensor/default_tensor.h>
+#include <vespa/vespalib/tensor/simple/simple_tensor_builder.h>
+
+using search::attribute::TensorAttribute;
+using search::AttributeGuard;
+using search::AttributeVector;
+using vespalib::tensor::Tensor;
+using vespalib::tensor::TensorCells;
+using vespalib::tensor::TensorDimensions;
+using vespalib::tensor::TensorFactory;
+using vespalib::tensor::TensorType;
+using vespalib::tensor::SimpleTensorBuilder;
+
+namespace vespalib {
+namespace tensor {
+
+static bool operator==(const Tensor &lhs, const Tensor &rhs)
+{
+ return lhs.equals(rhs);
+}
+
+}
+}
+
+
+struct Fixture
+{
+ using BasicType = search::attribute::BasicType;
+ using CollectionType = search::attribute::CollectionType;
+ using Config = search::attribute::Config;
+
+ Config _cfg;
+ vespalib::string _name;
+ std::shared_ptr<TensorAttribute> _tensorAttr;
+ std::shared_ptr<AttributeVector> _attr;
+ vespalib::tensor::DefaultTensor::builder _builder;
+
+ Fixture(const vespalib::string &typeSpec)
+ : _cfg(BasicType::TENSOR, CollectionType::SINGLE),
+ _name("test"),
+ _tensorAttr(),
+ _attr()
+ {
+ _cfg.setTensorType(TensorType::fromSpec(typeSpec));
+ _tensorAttr = std::make_shared<TensorAttribute>(_name, _cfg);
+ _attr = _tensorAttr;
+ _attr->addReservedDoc();
+ }
+
+ Tensor::UP createTensor(const TensorCells &cells) {
+ return TensorFactory::create(cells, _builder);
+ }
+ Tensor::UP createTensor(const TensorCells &cells,
+ const TensorDimensions &dimensions) {
+ return TensorFactory::create(cells, dimensions, _builder);
+ }
+
+ void ensureSpace(uint32_t docId) {
+ while (_attr->getNumDocs() <= docId) {
+ uint32_t newDocId = 0u;
+ _attr->addDoc(newDocId);
+ _attr->commit();
+ }
+ }
+
+ void clearTensor(uint32_t docId) {
+ ensureSpace(docId);
+ _tensorAttr->clearDoc(docId);
+ _attr->commit();
+ }
+
+ void setTensor(uint32_t docId, const Tensor &tensor) {
+ ensureSpace(docId);
+ _tensorAttr->setTensor(docId, tensor);
+ _attr->commit();
+ }
+
+ search::attribute::Status getStatus() {
+ _attr->commit(true);
+ return _attr->getStatus();
+ }
+
+ void
+ assertGetNoTensor(uint32_t docId) {
+ AttributeGuard guard(_attr);
+ Tensor::UP actTensor = _tensorAttr->getTensor(docId);
+ EXPECT_FALSE(actTensor);
+ }
+
+ void
+ assertGetTensor(const Tensor &expTensor, uint32_t docId)
+ {
+ AttributeGuard guard(_attr);
+ Tensor::UP actTensor = _tensorAttr->getTensor(docId);
+ EXPECT_TRUE(static_cast<bool>(actTensor));
+ EXPECT_EQUAL(expTensor, *actTensor);
+ }
+
+ void
+ assertGetTensor(const TensorCells &expCells,
+ const TensorDimensions &expDimensions,
+ uint32_t docId)
+ {
+ Tensor::UP expTensor = createTensor(expCells, expDimensions);
+ assertGetTensor(*expTensor, docId);
+ }
+
+ void save() {
+ bool saveok = _attr->save();
+ EXPECT_TRUE(saveok);
+ }
+
+ void load() {
+ _tensorAttr = std::make_shared<TensorAttribute>(_name, _cfg);
+ _attr = _tensorAttr;
+ bool loadok = _attr->load();
+ EXPECT_TRUE(loadok);
+ }
+};
+
+
+TEST_F("Test empty tensor attribute", Fixture("tensor()"))
+{
+ EXPECT_EQUAL(1u, f._attr->getNumDocs());
+ EXPECT_EQUAL(1u, f._attr->getCommittedDocIdLimit());
+}
+
+
+TEST_F("Test setting tensor value", Fixture("tensor(x{}, y{})"))
+{
+ f.ensureSpace(4);
+ EXPECT_EQUAL(5u, f._attr->getNumDocs());
+ EXPECT_EQUAL(5u, f._attr->getCommittedDocIdLimit());
+ TEST_DO(f.assertGetNoTensor(4));
+ f.setTensor(4, *f.createTensor({}, {}));
+ TEST_DO(f.assertGetTensor({}, {"x", "y"}, 4));
+ f.setTensor(3, *f.createTensor({ {{}, 3} }, { "x", "y"}));
+ TEST_DO(f.assertGetTensor({ {{}, 3} }, { "x", "y"}, 3));
+ TEST_DO(f.assertGetNoTensor(2));
+ TEST_DO(f.clearTensor(3));
+ TEST_DO(f.assertGetNoTensor(3));
+}
+
+
+TEST_F("Test saving / loading tensor attribute", Fixture("tensor(x{}, y{})"))
+{
+ f.ensureSpace(4);
+ f.setTensor(4, *f.createTensor({}, {}));
+ f.setTensor(3, *f.createTensor({ {{}, 3} }, { "x", "y"}));
+ TEST_DO(f.save());
+ TEST_DO(f.load());
+ EXPECT_EQUAL(5u, f._attr->getNumDocs());
+ EXPECT_EQUAL(5u, f._attr->getCommittedDocIdLimit());
+ TEST_DO(f.assertGetTensor({ {{}, 3} }, { "x", "y"}, 3));
+ TEST_DO(f.assertGetTensor({}, {"x", "y"}, 4));
+ TEST_DO(f.assertGetNoTensor(2));
+}
+
+
+TEST_F("Test compaction of tensor attribute", Fixture("tensor(x{}, y{})"))
+{
+ f.ensureSpace(4);
+ Tensor::UP emptytensor = f.createTensor({}, {});
+ Tensor::UP emptyxytensor = f.createTensor({}, {"x", "y"});
+ Tensor::UP simpletensor = f.createTensor({ {{}, 3} }, { "x", "y"});
+ Tensor::UP filltensor = f.createTensor({ {{}, 5} }, { "x", "y"});
+ f.setTensor(4, *emptytensor);
+ f.setTensor(3, *simpletensor);
+ f.setTensor(2, *filltensor);
+ f.clearTensor(2);
+ f.setTensor(2, *filltensor);
+ search::attribute::Status oldStatus = f.getStatus();
+ search::attribute::Status newStatus = oldStatus;
+ uint64_t iter = 0;
+ uint64_t iterLimit = 100000;
+ for (; iter < iterLimit; ++iter) {
+ f.clearTensor(2);
+ f.setTensor(2, *filltensor);
+ newStatus = f.getStatus();
+ if (newStatus.getUsed() < oldStatus.getUsed()) {
+ break;
+ }
+ oldStatus = newStatus;
+ }
+ EXPECT_GREATER(iterLimit, iter);
+ LOG(info,
+ "iter = %" PRIu64 ", memory usage %" PRIu64 ", -> %" PRIu64,
+ iter, oldStatus.getUsed(), newStatus.getUsed());
+ TEST_DO(f.assertGetNoTensor(1));
+ TEST_DO(f.assertGetTensor(*filltensor, 2));
+ TEST_DO(f.assertGetTensor(*simpletensor, 3));
+ TEST_DO(f.assertGetTensor(*emptyxytensor, 4));
+}
+
+TEST_F("Test tensortype file header tag", Fixture("tensor(x[10])"))
+{
+ f.ensureSpace(4);
+ TEST_DO(f.save());
+
+ vespalib::FileHeader header;
+ FastOS_File file;
+ EXPECT_TRUE(file.OpenReadOnly("test.dat"));
+ (void) header.readFile(file);
+ file.Close();
+ EXPECT_TRUE(header.hasTag("tensortype"));
+ EXPECT_EQUAL("tensor(x[10])", header.getTag("tensortype").asString());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh
new file mode 100644
index 00000000000..2e940d5d99a
--- /dev/null
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+$VALGRIND ./searchlib_tensorattribute_test_app
+rm -rf *.dat
diff --git a/searchlib/src/tests/bitcompression/expgolomb/.gitignore b/searchlib/src/tests/bitcompression/expgolomb/.gitignore
new file mode 100644
index 00000000000..5ba0f36a2f0
--- /dev/null
+++ b/searchlib/src/tests/bitcompression/expgolomb/.gitignore
@@ -0,0 +1 @@
+searchlib_expgolomb_test_app
diff --git a/searchlib/src/tests/bitcompression/expgolomb/CMakeLists.txt b/searchlib/src/tests/bitcompression/expgolomb/CMakeLists.txt
new file mode 100644
index 00000000000..f724773dfd6
--- /dev/null
+++ b/searchlib/src/tests/bitcompression/expgolomb/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_expgolomb_test_app
+ SOURCES
+ expgolomb_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_expgolomb_test_app NO_VALGRIND COMMAND searchlib_expgolomb_test_app)
diff --git a/searchlib/src/tests/bitcompression/expgolomb/DESC b/searchlib/src/tests/bitcompression/expgolomb/DESC
new file mode 100644
index 00000000000..4abef0ecf24
--- /dev/null
+++ b/searchlib/src/tests/bitcompression/expgolomb/DESC
@@ -0,0 +1 @@
+Exp golomb encoding / decoding test. Take a look at expgolomb_test.cpp for details.
diff --git a/searchlib/src/tests/bitcompression/expgolomb/FILES b/searchlib/src/tests/bitcompression/expgolomb/FILES
new file mode 100644
index 00000000000..dbc3fa5e527
--- /dev/null
+++ b/searchlib/src/tests/bitcompression/expgolomb/FILES
@@ -0,0 +1 @@
+expgolomb_test.cpp
diff --git a/searchlib/src/tests/bitcompression/expgolomb/expgolomb_test.cpp b/searchlib/src/tests/bitcompression/expgolomb/expgolomb_test.cpp
new file mode 100644
index 00000000000..dcf0f69ee55
--- /dev/null
+++ b/searchlib/src/tests/bitcompression/expgolomb/expgolomb_test.cpp
@@ -0,0 +1,621 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("expglomb_test");
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vector>
+
+using search::bitcompression::DecodeContext64;
+using search::bitcompression::DecodeContext64Base;
+using search::bitcompression::EncodeContext64;
+using search::bitcompression::EncodeContext64Base;
+
+template <bool bigEndian>
+class DecodeContext : public DecodeContext64<bigEndian>
+{
+public:
+ using Parent = DecodeContext64<bigEndian>;
+ using Parent::defineReadOffset;
+ using EC = EncodeContext64<bigEndian>;
+
+ DecodeContext(const uint64_t *compr, int bitOffset)
+ : DecodeContext64<bigEndian>(compr, bitOffset)
+ {
+ this->defineReadOffset(0);
+ }
+};
+
+
+class IDecodeFunc
+{
+public:
+ virtual uint64_t decode() = 0;
+ virtual void skip() = 0;
+ virtual uint64_t decodeSmall() = 0;
+ virtual uint64_t decodeSmallApply() = 0;
+ virtual void skipSmall() = 0;
+
+ virtual ~IDecodeFunc() { }
+
+};
+
+
+/*
+ * Exp golomb decode functions getting kValue from a variable, i.e.
+ * compiler is not allowed to generate shift instructions with immediate values.
+ * Expressions involving kValue are not constant and can thus not be
+ * folded to constant values.
+ */
+template <bool bigEndian>
+class DecodeExpGolombVarK : public IDecodeFunc
+{
+public:
+ using DCB = DecodeContext64Base;
+ using DC = DecodeContext<bigEndian>;
+ using EC = typename DC::EC;
+
+ DCB &_dc;
+ int _kValue;
+
+ DecodeExpGolombVarK(DCB &dc, int kValue)
+ : _dc(dc),
+ _kValue(kValue)
+ {
+ }
+
+ virtual uint64_t decode()
+ {
+ unsigned int length;
+ uint64_t val64;
+ UC64_DECODEEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead,
+ _dc._cacheInt, _kValue, EC);
+ return val64;
+ }
+
+ virtual void skip()
+ {
+ unsigned int length;
+ UC64_SKIPEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead,
+ _dc._cacheInt, _kValue, EC);
+ }
+
+ virtual uint64_t decodeSmall()
+ {
+ unsigned int length;
+ uint64_t val64;
+ UC64_DECODEEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead,
+ _dc._cacheInt, _kValue, EC);
+ return val64;
+ }
+
+ virtual uint64_t decodeSmallApply()
+ {
+ unsigned int length;
+ uint64_t val64;
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(_dc._val, _dc._valI, _dc._preRead,
+ _dc._cacheInt, _kValue, EC, val64 =);
+ return val64;
+ }
+
+ virtual void skipSmall()
+ {
+ unsigned int length;
+ UC64_SKIPEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead,
+ _dc._cacheInt, _kValue, EC);
+ }
+
+ static std::unique_ptr<IDecodeFunc>
+ make(DCB &dc, int kValue)
+ {
+ return std::unique_ptr<IDecodeFunc>
+ (new DecodeExpGolombVarK<bigEndian>(dc, kValue));
+ }
+};
+
+
+/*
+ * Exp golomb decode functions getting kValue from a template argument
+ * i.e. compiler is allowed to generate shift instructions with
+ * immediate values and fold constant expressions involving kValue.
+ */
+template <bool bigEndian, int kValue>
+class DecodeExpGolombConstK : public IDecodeFunc
+{
+public:
+ using DCB = DecodeContext64Base;
+ using DC = DecodeContext<bigEndian>;
+ using EC = typename DC::EC;
+
+ DCB &_dc;
+
+ DecodeExpGolombConstK(DCB &dc)
+ : _dc(dc)
+ {
+ }
+
+ virtual uint64_t decode()
+ {
+ unsigned int length;
+ uint64_t val64;
+ UC64_DECODEEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead,
+ _dc._cacheInt, kValue, EC);
+ return val64;
+ }
+
+ virtual void skip()
+ {
+ unsigned int length;
+ UC64_SKIPEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead,
+ _dc._cacheInt, kValue, EC);
+ }
+
+ virtual uint64_t decodeSmall()
+ {
+ unsigned int length;
+ uint64_t val64;
+ UC64_DECODEEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead,
+ _dc._cacheInt, kValue, EC);
+ return val64;
+ }
+
+ virtual uint64_t decodeSmallApply()
+ {
+ unsigned int length;
+ uint64_t val64;
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(_dc._val, _dc._valI, _dc._preRead,
+ _dc._cacheInt, kValue, EC, val64 =);
+ return val64;
+ }
+
+ virtual void skipSmall()
+ {
+ unsigned int length;
+ UC64_SKIPEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead,
+ _dc._cacheInt, kValue, EC);
+ }
+
+ static std::unique_ptr<IDecodeFunc>
+ make(DCB &dc, int)
+ {
+ return std::unique_ptr<IDecodeFunc>
+ (new DecodeExpGolombConstK<bigEndian, kValue>(dc));
+ }
+};
+
+
+using IDecodeFuncFactory =
+ std::unique_ptr<IDecodeFunc> (*)(DecodeContext64Base &dc, int kValue);
+
+
+template <bool bigEndian>
+class DecodeFuncFactories
+{
+public:
+ using IDF = IDecodeFuncFactory;
+ std::vector<IDF> _constK;
+ IDF _varK;
+
+public:
+ DecodeFuncFactories();
+
+ void
+ addConstKFactory(int kValue, IDecodeFuncFactory factory)
+ {
+ assert(static_cast<unsigned int>(kValue) == _constK.size());
+ _constK.push_back(factory);
+ }
+
+ IDecodeFuncFactory
+ getConstKFactory(int kValue) const
+ {
+ assert(kValue >= 0 &&
+ static_cast<unsigned int>(kValue) < _constK.size());
+ return _constK[kValue];
+ }
+
+ IDecodeFuncFactory
+ getVarKFactory() const
+ {
+ return _varK;
+ }
+};
+
+
+template <bool bigEndian>
+struct RegisterFactoryPtr;
+
+
+template <bool bigEndian>
+using RegisterFactory = void (*)(DecodeFuncFactories<bigEndian> &factories,
+ RegisterFactoryPtr<bigEndian> &ptr);
+
+
+template <bool bigEndian>
+struct RegisterFactoryPtr
+{
+ RegisterFactory<bigEndian> _ptr;
+
+ RegisterFactoryPtr(RegisterFactory<bigEndian> ptr)
+ : _ptr(ptr)
+ {
+ }
+};
+
+
+template <bool bigEndian, int kValue>
+class RegisterFactories
+{
+public:
+ static void registerFactory(DecodeFuncFactories<bigEndian> &factories,
+ RegisterFactoryPtr<bigEndian> &ptr)
+ {
+ factories.addConstKFactory(kValue,
+ &DecodeExpGolombConstK<bigEndian, kValue>::
+ make);
+ ptr._ptr = &RegisterFactories<bigEndian, kValue+1>::registerFactory;
+ }
+};
+
+
+template <bool bigEndian>
+class RegisterFactories<bigEndian, 64>
+{
+public:
+ static void registerFactory(DecodeFuncFactories<bigEndian> &factories,
+ RegisterFactoryPtr<bigEndian> &ptr)
+ {
+ (void) factories;
+ ptr._ptr = nullptr;
+ }
+};
+
+
+template <bool bigEndian>
+DecodeFuncFactories<bigEndian>::DecodeFuncFactories()
+ : _constK(),
+ _varK(&DecodeExpGolombVarK<bigEndian>::make)
+{
+ RegisterFactoryPtr<bigEndian> f(
+ &RegisterFactories<bigEndian, 0>::registerFactory);
+ while (f._ptr) {
+ (*f._ptr)(*this, f);
+ }
+}
+
+
+class TestFixtureBase
+{
+public:
+ std::vector<uint64_t> _randNums;
+ using EC = EncodeContext64Base;
+
+ void fillRandNums();
+
+ void
+ calcBoundaries(int kValue, bool small, std::vector<uint64_t> &v);
+
+ void
+ testBoundaries(int kValue, bool small,
+ std::vector<uint64_t> &v,
+ DecodeContext64Base &dc,
+ DecodeContext64Base &dcSkip,
+ DecodeContext64Base &dcApply,
+ IDecodeFunc &df,
+ IDecodeFunc &dfSkip,
+ IDecodeFunc &dfApply);
+
+ void
+ testRandNums(DecodeContext64Base &dc,
+ DecodeContext64Base &dcSkip,
+ IDecodeFunc &df,
+ IDecodeFunc &dfSkip);
+};
+
+
+void
+TestFixtureBase::fillRandNums()
+{
+ for (int i = 0; i < 10000; ++i) {
+ uint64_t rval = rand();
+ rval <<= 30;
+ rval |= rand();
+ _randNums.push_back(rval);
+ }
+ for (int i = 0; i < 10000; ++i) {
+ uint64_t rval = rand();
+ rval <<= 30;
+ rval |= rand();
+ uint32_t bits = (rand() & 63);
+ rval &= ((UINT64_C(1) << bits) - 1);
+ _randNums.push_back(rval);
+ }
+}
+
+
+namespace
+{
+
+/*
+ * Add values around a calculated boundary, to catch off by one errors.
+ */
+void
+addBoundary(uint64_t boundary, uint64_t maxVal, std::vector<uint64_t> &v)
+{
+ uint64_t low = boundary > 2u ? boundary - 2 : 0;
+ uint64_t high = maxVal - 2u < boundary ? maxVal : boundary + 2;
+ assert(low <= high);
+ LOG(info, "low=0x%lx, high=0x%lx", low, high);
+ uint64_t i = low;
+ for (;;) {
+ v.push_back(i);
+ if (i == high)
+ break;
+ ++i;
+ }
+}
+
+}
+
+void
+TestFixtureBase::calcBoundaries(int kValue, bool small,
+ std::vector<uint64_t> &v)
+{
+ const char *smallStr = small ? "small" : "not small";
+ v.push_back(0);
+ uint64_t maxVal = EC::maxExpGolombVal(kValue); // encode method limit
+ if (small) {
+ maxVal = EC::maxExpGolombVal(kValue, 64);
+ }
+ LOG(debug, "kValue=%u, %s, maxVal is 0x%lx", kValue, smallStr, maxVal);
+ for (int bits = kValue + 1;
+ bits + kValue <= 128 && (bits <= 64 || !small);
+ ++bits) {
+ uint64_t boundary = EC::maxExpGolombVal(kValue, bits);
+ if (bits + kValue == 128) {
+ LOG(debug,
+ "boundary for kValue=%d, %s, bits=%d: 0x%lx",
+ kValue, smallStr, bits, boundary);
+ }
+ addBoundary(boundary, maxVal, v);
+ }
+ std::sort(v.begin(), v.end());
+ auto ve = std::unique(v.begin(), v.end());
+ uint32_t oldSize = v.size();
+ v.resize(ve - v.begin());
+ uint32_t newSize = v.size();
+ LOG(debug,
+ "kValues=%u, %s, boundaries %u -> %u, maxVal=0x%lx, highest=0x%lx",
+ kValue, smallStr, oldSize, newSize, maxVal, v.back());
+}
+
+
+void
+TestFixtureBase::testBoundaries(int kValue, bool small,
+ std::vector<uint64_t> &v,
+ DecodeContext64Base &dc,
+ DecodeContext64Base &dcSkip,
+ DecodeContext64Base &dcApply,
+ IDecodeFunc &df,
+ IDecodeFunc &dfSkip,
+ IDecodeFunc &dfApply)
+{
+ uint32_t bits = 0;
+ uint64_t maxSame = 0;
+
+ for (auto num : v) {
+ uint64_t prevPos = dc.getReadOffset();
+ uint64_t val64 = small ? df.decodeSmall() : df.decode();
+ EXPECT_EQUAL(num, val64);
+ uint64_t currPos = dc.getReadOffset();
+ if (small) {
+ dfSkip.skipSmall();
+ } else {
+ dfSkip.skip();
+ }
+ EXPECT_EQUAL(currPos, dcSkip.getReadOffset());
+ if (small) {
+ uint64_t sval64 = dfApply.decodeSmallApply();
+ EXPECT_EQUAL(num, sval64);
+ EXPECT_EQUAL(currPos, dcApply.getReadOffset());
+ }
+ if (num == 0) {
+ bits = currPos - prevPos;
+ maxSame = EC::maxExpGolombVal(kValue, bits);
+ } else {
+ assert(bits <= currPos - prevPos);
+ if (bits < currPos - prevPos) {
+ ASSERT_EQUAL(bits + 2, currPos - prevPos);
+ bits += 2;
+ ASSERT_EQUAL(maxSame + 1, num);
+ maxSame = EC::maxExpGolombVal(kValue, bits);
+ }
+ }
+ }
+}
+
+
+void
+TestFixtureBase::testRandNums(DecodeContext64Base &dc,
+ DecodeContext64Base &dcSkip,
+ IDecodeFunc &df,
+ IDecodeFunc &dfSkip)
+{
+ for (auto num : _randNums) {
+ uint64_t val64 = df.decode();
+ EXPECT_EQUAL(num, val64);
+ uint64_t currPos = dc.getReadOffset();
+ dfSkip.skip();
+ EXPECT_EQUAL(currPos, dcSkip.getReadOffset());
+ }
+}
+
+
+
+template <bool bigEndian>
+class TestFixture : public TestFixtureBase
+{
+public:
+ DecodeFuncFactories<bigEndian> _factories;
+ using DC = DecodeContext<bigEndian>;
+ using EC = typename DC::EC;
+ using Parent = TestFixtureBase;
+ using Parent::testBoundaries;
+ using Parent::testRandNums;
+
+ TestFixture()
+ : TestFixtureBase(),
+ _factories()
+ {
+ fillRandNums();
+ }
+
+ void
+ testBoundaries(int kValue, bool small,
+ std::vector<uint64_t> &v,
+ IDecodeFuncFactory f,
+ search::ComprFileWriteContext &wc);
+ void
+ testBoundaries(int kValue, bool small, std::vector<uint64_t> &v);
+
+ void
+ testBoundaries();
+
+ void
+ testRandNums(int kValue,
+ IDecodeFuncFactory f,
+ search::ComprFileWriteContext &wc);
+
+ void
+ testRandNums(int kValue);
+
+ void
+ testRandNums();
+};
+
+
+template <bool bigEndian>
+void
+TestFixture<bigEndian>::testBoundaries(int kValue, bool small,
+ std::vector<uint64_t> &v,
+ IDecodeFuncFactory f,
+ search::ComprFileWriteContext &wc)
+{
+ DC dc(static_cast<const uint64_t *>(wc._comprBuf), 0);
+ DC dcSkip(static_cast<const uint64_t *>(wc._comprBuf), 0);
+ DC dcApply(static_cast<const uint64_t *>(wc._comprBuf), 0);
+ std::unique_ptr<IDecodeFunc> df((*f)(dc, kValue));
+ std::unique_ptr<IDecodeFunc> dfSkip((*f)(dcSkip, kValue));
+ std::unique_ptr<IDecodeFunc> dfApply((*f)(dcApply, kValue));
+ testBoundaries(kValue, small, v, dc, dcSkip, dcApply,
+ *df, *dfSkip, *dfApply);
+}
+
+
+template <bool bigEndian>
+void
+TestFixture<bigEndian>::testBoundaries(int kValue, bool small,
+ std::vector<uint64_t> &v)
+{
+ EC e;
+ search::ComprFileWriteContext wc(e);
+ wc.allocComprBuf(32768, 32768);
+ e.setupWrite(wc);
+ for (auto num : v) {
+ e.encodeExpGolomb(num, kValue);
+ if (e._valI >= e._valE)
+ wc.writeComprBuffer(false);
+ }
+ e.flush();
+
+ IDecodeFuncFactory f = _factories.getConstKFactory(kValue);
+ testBoundaries(kValue, small, v, f, wc);
+ f = _factories.getVarKFactory();
+ testBoundaries(kValue, small, v, f, wc);
+}
+
+
+template <bool bigEndian>
+void
+TestFixture<bigEndian>::testBoundaries()
+{
+ for (int kValue = 0; kValue < 64; ++kValue) {
+ std::vector<uint64_t> v;
+ calcBoundaries(kValue, false, v);
+ testBoundaries(kValue, false, v);
+ /*
+ * Note: We don't support kValue being 63 for when decoding
+ * "small" numbers (limited to 64 bits in encoded form) since
+ * performance penalty is not worth the extra flexibility.
+ */
+ if (kValue < 63) {
+ v.clear();
+ calcBoundaries(kValue, true, v);
+ testBoundaries(kValue, true, v);
+ }
+ }
+}
+
+
+template <bool bigEndian>
+void
+TestFixture<bigEndian>::testRandNums(int kValue,
+ IDecodeFuncFactory f,
+ search::ComprFileWriteContext &wc)
+{
+ DC dc(static_cast<const uint64_t *>(wc._comprBuf), 0);
+ DC dcSkip(static_cast<const uint64_t *>(wc._comprBuf), 0);
+ std::unique_ptr<IDecodeFunc> df((*f)(dc, kValue));
+ std::unique_ptr<IDecodeFunc> dfSkip((*f)(dcSkip, kValue));
+ testRandNums(dc, dcSkip, *df, *dfSkip);
+}
+
+
+template <bool bigEndian>
+void
+TestFixture<bigEndian>::testRandNums(int kValue)
+{
+ EC e;
+ search::ComprFileWriteContext wc(e);
+ wc.allocComprBuf(32768, 32768);
+ e.setupWrite(wc);
+ for (auto num : _randNums) {
+ e.encodeExpGolomb(num, kValue);
+ if (e._valI >= e._valE)
+ wc.writeComprBuffer(false);
+ }
+ e.flush();
+
+ IDecodeFuncFactory f = _factories.getConstKFactory(kValue);
+ testRandNums(kValue, f, wc);
+ f = _factories.getVarKFactory();
+ testRandNums(kValue, f, wc);
+}
+
+
+template <bool bigEndian>
+void
+TestFixture<bigEndian>::testRandNums()
+{
+ for (int k = 0; k < 64; ++k) {
+ testRandNums(k);
+ }
+}
+
+
+TEST_F("Test bigendian expgolomb encoding/decoding", TestFixture<true>)
+{
+ f.testRandNums();
+ f.testBoundaries();
+}
+
+
+TEST_F("Test little expgolomb encoding/decoding", TestFixture<false>)
+{
+ f.testRandNums();
+ f.testBoundaries();
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/bitvector/.gitignore b/searchlib/src/tests/bitvector/.gitignore
new file mode 100644
index 00000000000..21aed8ce6b2
--- /dev/null
+++ b/searchlib/src/tests/bitvector/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+bitvectorbenchmark_test
+searchlib_bitvectorbenchmark_test_app
diff --git a/searchlib/src/tests/bitvector/CMakeLists.txt b/searchlib/src/tests/bitvector/CMakeLists.txt
new file mode 100644
index 00000000000..7edae6f7cc4
--- /dev/null
+++ b/searchlib/src/tests/bitvector/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_bitvectorbenchmark_test_app
+ SOURCES
+ bitvectorbenchmark.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_bitvectorbenchmark_test_app COMMAND searchlib_bitvectorbenchmark_test_app BENCHMARK)
diff --git a/searchlib/src/tests/bitvector/DESC b/searchlib/src/tests/bitvector/DESC
new file mode 100644
index 00000000000..1a6c0fc2959
--- /dev/null
+++ b/searchlib/src/tests/bitvector/DESC
@@ -0,0 +1 @@
+This is a test for the BitVector class.
diff --git a/searchlib/src/tests/bitvector/FILES b/searchlib/src/tests/bitvector/FILES
new file mode 100644
index 00000000000..0688c3933eb
--- /dev/null
+++ b/searchlib/src/tests/bitvector/FILES
@@ -0,0 +1 @@
+bitvectorbenchmark.cpp
diff --git a/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp b/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp
new file mode 100644
index 00000000000..c9b962495f4
--- /dev/null
+++ b/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp
@@ -0,0 +1,225 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchlib/common/bitvector.h>
+#include <iostream>
+#include <string>
+#include <vector>
+
+LOG_SETUP("bitvectorbenchmark");
+
+namespace search {
+
+class BitVectorBenchmark : public FastOS_Application
+{
+private:
+ std::vector<BitVector *> _bv;
+ std::vector<unsigned int> _bvc;
+ void testCountSpeed1();
+ void testCountSpeed2();
+ void testCountSpeed3();
+ void testOrSpeed1();
+ void testOrSpeed2();
+ static void usage();
+ void init(size_t n);
+public:
+ BitVectorBenchmark();
+ ~BitVectorBenchmark();
+ int Main();
+};
+
+BitVectorBenchmark::BitVectorBenchmark() :
+ _bv()
+{
+}
+
+BitVectorBenchmark::~BitVectorBenchmark()
+{
+ for(size_t i(0); i < _bv.size(); i++) {
+ delete _bv[i];
+ }
+}
+
+void BitVectorBenchmark::usage()
+{
+ std::cout << "usage: bitvectorbenchmark [-n numBits] [-t operation]" << std::endl;
+}
+
+void BitVectorBenchmark::init(size_t n)
+{
+ BitVector *a(BitVector::create(n).release());
+ BitVector *b(BitVector::create(n).release());
+ srand(1);
+ for(size_t i(0), j(0); i < n; i += rand()%10, j++) {
+ a->flip(i);
+ }
+ for(size_t i(0), j(0); i < n; i += rand()%10, j++) {
+ b->flip(i);
+ }
+ a->invalidateCachedCount();
+ b->invalidateCachedCount();
+ _bv.push_back(a);
+ _bvc.push_back(a->countTrueBits());
+ _bv.push_back(b);
+ _bvc.push_back(b->countTrueBits());
+}
+
+void BitVectorBenchmark::testOrSpeed1()
+{
+ _bv[0]->orWith(*_bv[1]);
+}
+
+void BitVectorBenchmark::testCountSpeed1()
+{
+ _bv[0]->invalidateCachedCount();
+ unsigned int cnt = _bv[0]->countTrueBits();
+ assert(cnt = _bvc[0]);
+ (void) cnt;
+}
+
+static int bitTab[256] = {
+ 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,
+ 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
+ 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
+ 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+ 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
+ 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+ 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+ 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
+ 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
+ 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+ 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+ 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
+ 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+ 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
+ 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
+ 4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
+};
+
+void BitVectorBenchmark::testCountSpeed2()
+{
+ const unsigned char * p = reinterpret_cast<const unsigned char *>(_bv[0]->getStart());
+ size_t sz = _bv[0]->size()/8;
+ size_t sum0(0);
+ size_t sum1(0);
+ size_t sum2(0);
+ size_t sum3(0);
+ for (size_t i(0); i < sz; i+=4) {
+ sum0 += bitTab[p[i+0]];
+ sum1 += bitTab[p[i+1]];
+ sum2 += bitTab[p[i+2]];
+ sum3 += bitTab[p[i+3]];
+ }
+ assert(sum0 + sum1 + sum2 + sum3 == _bvc[0]);
+}
+
+
+static int
+popCount(unsigned int bits)
+{
+ unsigned int odd = bits & 0x55555555;
+ unsigned int even = bits & 0xaaaaaaaa;
+ bits = odd + (even >> 1);
+ odd = bits & 0x33333333;
+ even = bits & 0xcccccccc;
+ bits = odd + (even >> 2);
+ odd = bits & 0x0f0f0f0f;
+ even = bits & 0xf0f0f0f0;
+ bits = odd + (even >> 4);
+ odd = bits & 0x00ff00ff;
+ even = bits & 0xff00ff00;
+ bits = odd + (even >> 8);
+ odd = bits & 0x0000ffff;
+ even = bits & 0xffff0000;
+ bits = odd + (even >> 16);
+ return bits;
+}
+
+
+void
+BitVectorBenchmark::testCountSpeed3()
+{
+ const unsigned int * p = static_cast<const unsigned int *>(_bv[0]->getStart());
+ const unsigned int * pe = p + (_bv[0]->size()/(sizeof(uint32_t)*8));
+ size_t sum(0);
+ for (; p < pe; ++p) {
+ sum += popCount(*p);
+ }
+ assert(sum == _bvc[0]);
+}
+
+void BitVectorBenchmark::testOrSpeed2()
+{
+ typedef uint64_t T;
+ T * a = reinterpret_cast<T *>(_bv[0]->getStart());
+ const T * b = reinterpret_cast<const T *>(_bv[1]->getStart());
+ size_t sz = _bv[0]->size()/(8*sizeof(*a));
+ for (size_t i(0); i < sz; i+=2) {
+ a[i] |= b[i];
+ a[i+1] |= b[i+1];
+ // a[i+2] |= b[i+2];
+ // a[i+3] |= b[i+3];
+ }
+}
+
+int BitVectorBenchmark::Main()
+{
+ int idx = 1;
+ std::string operation;
+ size_t numBits(8*1000000);
+ char opt;
+ const char * arg;
+ bool optError = false;
+ while ((opt = GetOpt("n:t:", arg, idx)) != -1) {
+ switch (opt) {
+ case 'n':
+ numBits = strtoll(arg, NULL, 10);
+ break;
+ case 't':
+ operation = arg;
+ break;
+ default:
+ optError = true;
+ break;
+ }
+ }
+
+ if ((_argc != idx ) || optError) {
+ usage();
+ return -1;
+ }
+
+ init(numBits);
+ for (size_t i(0); i < operation.size(); i++) {
+ char op(operation[i]);
+ size_t splitBits1 = rand() % numBits;
+ size_t splitBits2 = rand() % numBits;
+ if (splitBits1 > splitBits2)
+ std::swap(splitBits1, splitBits2);
+ for (size_t j(0); j < 1000; j++) {
+ if (op == 'c') {
+ testCountSpeed1();
+ } else if (op == 'd') {
+ testCountSpeed2();
+ } else if (op == 'e') {
+ testCountSpeed3();
+ } else if (op == 'o') {
+ testOrSpeed1();
+ } else if (op == 'p') {
+ testOrSpeed2();
+ } else {
+ std::cerr << "Unknown operation " << op << std::endl;
+ }
+ }
+ }
+
+ return 0;
+}
+}
+
+int main(int argc, char ** argv)
+{
+ search::BitVectorBenchmark myapp;
+ return myapp.Entry(argc, argv);
+}
+
diff --git a/searchlib/src/tests/btree/.gitignore b/searchlib/src/tests/btree/.gitignore
new file mode 100644
index 00000000000..a6bdd572c7d
--- /dev/null
+++ b/searchlib/src/tests/btree/.gitignore
@@ -0,0 +1,3 @@
+iteratespeed
+searchlib_btreeaggregation_test_app
+searchlib_iteratespeed_app
diff --git a/searchlib/src/tests/btree/CMakeLists.txt b/searchlib/src/tests/btree/CMakeLists.txt
new file mode 100644
index 00000000000..d88953d43fd
--- /dev/null
+++ b/searchlib/src/tests/btree/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_btreeaggregation_test_app
+ SOURCES
+ btreeaggregation_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_btreeaggregation_test_app COMMAND searchlib_btreeaggregation_test_app)
+vespa_add_executable(searchlib_iteratespeed_app
+ SOURCES
+ iteratespeed.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_iteratespeed_app COMMAND searchlib_iteratespeed_app BENCHMARK)
diff --git a/searchlib/src/tests/btree/DESC b/searchlib/src/tests/btree/DESC
new file mode 100644
index 00000000000..da074ca2c45
--- /dev/null
+++ b/searchlib/src/tests/btree/DESC
@@ -0,0 +1 @@
+btree aggregation test. Take a look at btreeaggregation_test.cpp for details.
diff --git a/searchlib/src/tests/btree/FILES b/searchlib/src/tests/btree/FILES
new file mode 100644
index 00000000000..45756255961
--- /dev/null
+++ b/searchlib/src/tests/btree/FILES
@@ -0,0 +1 @@
+btreeaggregation_test.cpp
diff --git a/searchlib/src/tests/btree/btreeaggregation_test.cpp b/searchlib/src/tests/btree/btreeaggregation_test.cpp
new file mode 100644
index 00000000000..bb8e86ef49d
--- /dev/null
+++ b/searchlib/src/tests/btree/btreeaggregation_test.cpp
@@ -0,0 +1,1146 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("btreeaggregation_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <string>
+#include <set>
+#include <iostream>
+#include <vespa/searchlib/btree/btreeroot.h>
+#include <vespa/searchlib/btree/btreebuilder.h>
+#include <vespa/searchlib/btree/btreenodeallocator.h>
+#include <vespa/searchlib/btree/btree.h>
+#include <vespa/searchlib/btree/btreestore.h>
+#include <vespa/searchlib/util/rand48.h>
+
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodestore.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btreebuilder.hpp>
+#include <vespa/searchlib/btree/btree.hpp>
+#include <vespa/searchlib/btree/btreestore.hpp>
+#include <vespa/searchlib/btree/btreeaggregator.hpp>
+
+using vespalib::GenerationHandler;
+
+namespace search {
+namespace btree {
+
+namespace {
+
+int32_t
+toVal(uint32_t key)
+{
+ return key + 1000;
+}
+
+int32_t
+toHighVal(uint32_t key)
+{
+ return toVal(key) + 1000;
+}
+
+int32_t
+toLowVal(uint32_t key)
+{
+ return toVal(key) - 1000000;
+}
+
+int32_t
+toNotVal(uint32_t key)
+{
+ return key + 2000;
+}
+
+template <typename AggrT>
+void
+aggrToStr(std::stringstream &ss, const AggrT &aggr)
+{
+ (void) aggr;
+ ss << "[noaggr]";
+}
+
+template <>
+void
+aggrToStr<MinMaxAggregated>(std::stringstream &ss,
+ const MinMaxAggregated &aggr)
+{
+ ss << "[min=" << aggr.getMin() << ",max=" << aggr.getMax() << "]";
+}
+
+
+template <typename LeafNode>
+void
+leafNodeToStr(std::stringstream &ss, const LeafNode &n)
+{
+ ss << "[";
+ for (uint32_t i = 0; i < n.validSlots(); ++i) {
+ if (i > 0) ss << ",";
+ ss << n.getKey(i) << ":" << n.getData(i);
+ }
+ aggrToStr(ss, n.getAggregated());
+ ss << "]";
+}
+
+template <typename InternalNode, typename LeafNode, typename NodeAllocator>
+void
+nodeToStr(std::stringstream &ss, const BTreeNode::Ref &node,
+ const NodeAllocator &allocator)
+{
+ if (!node.valid()) {
+ ss << "[]";
+ return;
+ }
+ if (allocator.isLeafRef(node)) {
+ leafNodeToStr(ss, *allocator.mapLeafRef(node));
+ return;
+ }
+ const InternalNode &n(*allocator.mapInternalRef(node));
+ ss << "[";
+ for (uint32_t i = 0; i < n.validSlots(); ++i) {
+ if (i > 0) ss << ",";
+ ss << n.getKey(i) << ":";
+ nodeToStr<InternalNode,
+ LeafNode,
+ NodeAllocator>(ss, n.getChild(i), allocator);
+ }
+ aggrToStr(ss, n.getAggregated());
+ ss << "]";
+}
+
+
+template <typename Tree>
+void
+treeToStr(std::stringstream &ss, const Tree &t)
+{
+ nodeToStr<typename Tree::InternalNodeType,
+ typename Tree::LeafNodeType,
+ typename Tree::NodeAllocatorType>(ss, t.getRoot(), t.getAllocator());
+}
+
+
+}
+
+typedef BTreeTraits<4, 4, 31, false> MyTraits;
+
+#define KEYWRAP
+
+#ifdef KEYWRAP
+
+// Force use of functor to compare keys.
+class WrapInt
+{
+public:
+ int _val;
+ WrapInt(int val) : _val(val) {}
+ WrapInt(void) : _val(0) {}
+ bool operator==(const WrapInt & rhs) const { return _val == rhs._val; }
+};
+
+std::ostream &
+operator<<(std::ostream &s, const WrapInt &i)
+{
+ s << i._val;
+ return s;
+}
+
+typedef WrapInt MyKey;
+class MyComp
+{
+public:
+ bool
+ operator()(const WrapInt &a, const WrapInt &b) const
+ {
+ return a._val < b._val;
+ }
+};
+
+#define UNWRAP(key) (key._val)
+#else
+typedef int MyKey;
+typedef std::less<int> MyComp;
+#define UNWRAP(key) (key)
+#endif
+
+typedef BTree<MyKey, int32_t,
+ btree::MinMaxAggregated,
+ MyComp, MyTraits,
+ MinMaxAggrCalc> MyTree;
+typedef BTreeStore<MyKey, int32_t,
+ btree::MinMaxAggregated,
+ MyComp,
+ BTreeDefaultTraits,
+ MinMaxAggrCalc> MyTreeStore;
+typedef MyTree::Builder MyTreeBuilder;
+typedef MyTree::LeafNodeType MyLeafNode;
+typedef MyTree::InternalNodeType MyInternalNode;
+typedef MyTree::NodeAllocatorType MyNodeAllocator;
+typedef MyTree::Builder::Aggregator MyAggregator;
+typedef MyTree::AggrCalcType MyAggrCalc;
+typedef std::pair<MyKey, int32_t> LeafPair;
+typedef MyTreeStore::KeyDataType MyKeyData;
+typedef MyTreeStore::KeyDataTypeRefPair MyKeyDataRefPair;
+
+typedef BTree<int, BTreeNoLeafData, btree::NoAggregated> SetTreeB;
+
+typedef BTreeTraits<16, 16, 10, false> LSeekTraits;
+typedef BTree<int, BTreeNoLeafData, btree::NoAggregated,
+ std::less<int>, LSeekTraits> SetTreeL;
+
+struct LeafPairLess {
+ bool operator()(const LeafPair & lhs, const LeafPair & rhs) const {
+ return UNWRAP(lhs.first) < UNWRAP(rhs.first);
+ }
+};
+
+
+class MockTree
+{
+public:
+ typedef std::map<uint32_t, int32_t> MTree;
+ typedef std::map<int32_t, std::set<uint32_t> > MRTree;
+ MTree _tree;
+ MRTree _rtree;
+
+ MockTree()
+ : _tree(),
+ _rtree()
+ {
+ }
+
+
+ void
+ erase(uint32_t key)
+ {
+ MTree::iterator it(_tree.find(key));
+ if (it == _tree.end())
+ return;
+ int32_t oval = it->second;
+ MRTree::iterator rit(_rtree.find(oval));
+ assert(rit != _rtree.end());
+ size_t ecount = rit->second.erase(key);
+ assert(ecount == 1);
+ (void) ecount;
+ if (rit->second.empty()) {
+ _rtree.erase(oval);
+ }
+ _tree.erase(key);
+ }
+
+ void
+ insert(uint32_t key, int32_t val)
+ {
+ erase(key);
+ _tree[key] = val;
+ _rtree[val].insert(key);
+ }
+};
+
+
+class MyTreeForceApplyStore : public MyTreeStore
+{
+public:
+ typedef MyComp CompareT;
+
+ bool
+ insert(EntryRef &ref, const KeyType &key, const DataType &data,
+ CompareT comp = CompareT());
+
+ bool
+ remove(EntryRef &ref, const KeyType &key, CompareT comp = CompareT());
+};
+
+
+bool
+MyTreeForceApplyStore::insert(EntryRef &ref,
+ const KeyType &key, const DataType &data,
+ CompareT comp)
+{
+ bool retVal = true;
+ if (ref.valid()) {
+ RefType iRef(ref);
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize == 0) {
+ const BTreeType *tree = getTreeEntry(iRef);
+ const NodeAllocatorType &allocator = getAllocator();
+ Iterator itr = tree->find(key, allocator, comp);
+ if (itr.valid())
+ retVal = false;
+ } else {
+ const KeyDataType *old = getKeyDataEntry(iRef, clusterSize);
+ const KeyDataType *olde = old + clusterSize;
+ const KeyDataType *oldi = lower_bound(old, olde, key, comp);
+ if (oldi < olde && !comp(key, oldi->_key))
+ retVal = false; // key already present
+ }
+ }
+ KeyDataType addition(key, data);
+ if (retVal) {
+ apply(ref, &addition, &addition+1, NULL, NULL, comp);
+ }
+ return retVal;
+}
+
+
+bool
+MyTreeForceApplyStore::remove(EntryRef &ref, const KeyType &key,
+ CompareT comp)
+{
+ bool retVal = true;
+ if (!ref.valid())
+ retVal = false; // not found
+ else {
+ RefType iRef(ref);
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize == 0) {
+ const BTreeType *tree = getTreeEntry(iRef);
+ const NodeAllocatorType &allocator = getAllocator();
+ Iterator itr = tree->find(key, allocator, comp);
+ if (!itr.valid())
+ retVal = false;
+ } else {
+ const KeyDataType *old = getKeyDataEntry(iRef, clusterSize);
+ const KeyDataType *olde = old + clusterSize;
+ const KeyDataType *oldi = lower_bound(old, olde, key, comp);
+ if (oldi == olde || comp(key, oldi->_key))
+ retVal = false; // not found
+ }
+ }
+ std::vector<KeyDataType> additions;
+ std::vector<KeyType> removals;
+ removals.push_back(key);
+ apply(ref,
+ &additions[0], &additions[additions.size()],
+ &removals[0], &removals[removals.size()],
+ comp);
+ return retVal;
+}
+
+
+template <typename ManagerType>
+void
+freezeTree(GenerationHandler &g, ManagerType &m)
+{
+ m.freeze();
+ m.transferHoldLists(g.getCurrentGeneration());
+ g.incGeneration();
+ m.trimHoldLists(g.getFirstUsedGeneration());
+}
+
+template <typename ManagerType>
+void
+cleanup(GenerationHandler &g, ManagerType &m)
+{
+ freezeTree(g, m);
+}
+
+template <typename ManagerType, typename NodeType>
+void
+cleanup(GenerationHandler & g,
+ ManagerType & m,
+ BTreeNode::Ref n1Ref, NodeType * n1,
+ BTreeNode::Ref n2Ref = BTreeNode::Ref(), NodeType * n2 = NULL)
+{
+ assert(ManagerType::isValidRef(n1Ref));
+ m.holdNode(n1Ref, n1);
+ if (n2 != NULL) {
+ assert(ManagerType::isValidRef(n2Ref));
+ m.holdNode(n2Ref, n2);
+ } else {
+ assert(!ManagerType::isValidRef(n2Ref));
+ }
+ cleanup(g, m);
+}
+
+class Test : public vespalib::TestApp {
+private:
+ template <typename Tree>
+ bool
+ assertTree(const std::string & exp, const Tree &t);
+
+ template <typename Tree>
+ bool
+ assertAggregated(const MockTree &m, const Tree &t);
+
+ template <typename TreeStore>
+ bool
+ assertAggregated(const MockTree &m, const TreeStore &s, EntryRef ref);
+
+ void
+ buildSubTree(const std::vector<LeafPair> &sub,
+ size_t numEntries);
+
+ void requireThatNodeInsertWorks();
+ void requireThatNodeSplitInsertWorks();
+ void requireThatNodeStealWorks();
+ void requireThatNodeRemoveWorks();
+ void requireThatWeCanInsertAndRemoveFromTree();
+ void requireThatSortedTreeInsertWorks();
+ void requireThatCornerCaseTreeFindWorks();
+ void requireThatBasicTreeIteratorWorks();
+ void requireThatTreeIteratorAssignWorks();
+ void requireThatUpdateOfKeyWorks();
+ void requireThatUpdateOfDataWorks();
+
+ template <typename TreeStore>
+ void
+ requireThatSmallNodesWorks();
+public:
+ int Main();
+};
+
+
+template<typename Tree>
+bool
+Test::assertTree(const std::string &exp, const Tree &t)
+{
+ std::stringstream ss;
+ treeToStr(ss, t);
+ if (!EXPECT_EQUAL(exp, ss.str())) return false;
+ return true;
+}
+
+
+template <typename Tree>
+bool
+Test::assertAggregated(const MockTree &m, const Tree &t)
+{
+ const MinMaxAggregated &ta(t.getAggregated());
+ if (t.getRoot().valid()) {
+ return
+ EXPECT_FALSE(m._rtree.empty()) &&
+ EXPECT_EQUAL(m._rtree.rbegin()->first,
+ ta.getMax()) &&
+ EXPECT_EQUAL(m._rtree.begin()->first,
+ ta.getMin());
+ } else {
+ return EXPECT_TRUE(m._rtree.empty()) &&
+ EXPECT_EQUAL(std::numeric_limits<int32_t>::min(),
+ ta.getMax()) &&
+ EXPECT_EQUAL(std::numeric_limits<int32_t>::max(),
+ ta.getMin());
+ }
+}
+
+template <typename TreeStore>
+bool
+Test::assertAggregated(const MockTree &m, const TreeStore &s, EntryRef ref)
+{
+ typename TreeStore::Iterator i(s.begin(ref));
+ MinMaxAggregated sa(s.getAggregated(ref));
+ const MinMaxAggregated &ia(i.getAggregated());
+ if (ref.valid()) {
+ return
+ EXPECT_FALSE(m._rtree.empty()) &&
+ EXPECT_EQUAL(m._rtree.rbegin()->first,
+ ia.getMax()) &&
+ EXPECT_EQUAL(m._rtree.begin()->first,
+ ia.getMin()) &&
+ EXPECT_EQUAL(m._rtree.rbegin()->first,
+ sa.getMax()) &&
+ EXPECT_EQUAL(m._rtree.begin()->first,
+ sa.getMin());
+ } else {
+ return EXPECT_TRUE(m._rtree.empty()) &&
+ EXPECT_EQUAL(std::numeric_limits<int32_t>::min(),
+ ia.getMax()) &&
+ EXPECT_EQUAL(std::numeric_limits<int32_t>::max(),
+ ia.getMin()) &&
+ EXPECT_EQUAL(std::numeric_limits<int32_t>::min(),
+ sa.getMax()) &&
+ EXPECT_EQUAL(std::numeric_limits<int32_t>::max(),
+ sa.getMin());
+ }
+}
+
+
+void
+Test::requireThatNodeInsertWorks()
+{
+ MyTree t;
+ t.insert(20, 102);
+ EXPECT_TRUE(assertTree("[20:102[min=102,max=102]]", t));
+ t.insert(10, 101);
+ EXPECT_TRUE(assertTree("[10:101,20:102[min=101,max=102]]", t));
+ t.insert(30, 103);
+ t.insert(40, 104);
+ EXPECT_TRUE(assertTree("[10:101,20:102,30:103,40:104"
+ "[min=101,max=104]]", t));
+}
+
+void
+getLeafNode(MyTree &t)
+{
+ t.insert(1, 101);
+ t.insert(3, 103);
+ t.insert(5, 105);
+ t.insert(7, 107);
+// EXPECT_TRUE(assertTree("[1:101,3:103,5:105,7:107[min=101,max=107]]", t));
+}
+
+void
+Test::requireThatNodeSplitInsertWorks()
+{
+ { // new entry in current node
+ MyTree t;
+ getLeafNode(t);
+ t.insert(4, 104);
+ EXPECT_TRUE(assertTree("[4:"
+ "[1:101,3:103,4:104[min=101,max=104]]"
+ ",7:"
+ "[5:105,7:107[min=105,max=107]]"
+ "[min=101,max=107]]", t));
+ }
+ { // new entry in split node
+ MyTree t;
+ getLeafNode(t);
+ t.insert(6, 106);
+ EXPECT_TRUE(assertTree("[5:"
+ "[1:101,3:103,5:105[min=101,max=105]]"
+ ",7:"
+ "[6:106,7:107[min=106,max=107]]"
+ "[min=101,max=107]]", t));
+ }
+ { // new entry at end
+ MyTree t;
+ getLeafNode(t);
+ t.insert(8, 108);
+ EXPECT_TRUE(assertTree("[5:"
+ "[1:101,3:103,5:105[min=101,max=105]]"
+ ",8:"
+ "[7:107,8:108[min=107,max=108]]"
+ "[min=101,max=108]]", t));
+ }
+}
+
+struct BTreeStealTraits
+{
+ static const size_t LEAF_SLOTS = 6;
+ static const size_t INTERNAL_SLOTS = 6;
+ static const size_t PATH_SIZE = 20;
+ static const bool BINARY_SEEK = true;
+};
+
+void
+Test::requireThatNodeStealWorks()
+{
+ typedef BTree<MyKey, int32_t,
+ btree::MinMaxAggregated,
+ MyComp, BTreeStealTraits,
+ MinMaxAggrCalc> MyStealTree;
+ { // steal all from left
+ MyStealTree t;
+ t.insert(10, 110);
+ t.insert(20, 120);
+ t.insert(30, 130);
+ t.insert(40, 140);
+ t.insert(50, 150);
+ t.insert(60, 160);
+ t.insert(35, 135);
+ t.remove(35);
+ EXPECT_TRUE(assertTree("[30:"
+ "[10:110,20:120,30:130[min=110,max=130]]"
+ ",60:"
+ "[40:140,50:150,60:160[min=140,max=160]]"
+ "[min=110,max=160]]", t));
+ t.remove(50);
+ EXPECT_TRUE(assertTree("[10:110,20:120,30:130,40:140,60:160"
+ "[min=110,max=160]]", t));
+ }
+ { // steal all from right
+ MyStealTree t;
+ t.insert(10, 110);
+ t.insert(20, 120);
+ t.insert(30, 130);
+ t.insert(40, 140);
+ t.insert(50, 150);
+ t.insert(60, 160);
+ t.insert(35, 135);
+ t.remove(35);
+ EXPECT_TRUE(assertTree("[30:"
+ "[10:110,20:120,30:130[min=110,max=130]]"
+ ",60:"
+ "[40:140,50:150,60:160[min=140,max=160]]"
+ "[min=110,max=160]]", t));
+ t.remove(20);
+ EXPECT_TRUE(assertTree("[10:110,30:130,40:140,50:150,60:160"
+ "[min=110,max=160]]", t));
+ }
+ { // steal some from left
+ MyStealTree t;
+ t.insert(10, 110);
+ t.insert(20, 120);
+ t.insert(30, 130);
+ t.insert(60, 160);
+ t.insert(70, 170);
+ t.insert(80, 180);
+ t.insert(50, 150);
+ t.insert(40, 140);
+ EXPECT_TRUE(assertTree("[50:"
+ "[10:110,20:120,30:130,40:140,50:150"
+ "[min=110,max=150]]"
+ ",80:"
+ "[60:160,70:170,80:180[min=160,max=180]]"
+ "[min=110,max=180]]", t));
+ t.remove(60);
+ EXPECT_TRUE(assertTree("[40:"
+ "[10:110,20:120,30:130,40:140"
+ "[min=110,max=140]]"
+ ",80:"
+ "[50:150,70:170,80:180[min=150,max=180]]"
+ "[min=110,max=180]]", t));
+ }
+ { // steal some from right
+ MyStealTree t;
+ t.insert(10, 110);
+ t.insert(20, 120);
+ t.insert(30, 130);
+ t.insert(40, 140);
+ t.insert(50, 150);
+ t.insert(60, 160);
+ t.insert(70, 170);
+ t.insert(80, 180);
+ t.insert(90, 190);
+ t.remove(40);
+ EXPECT_TRUE(assertTree("[30:"
+ "[10:110,20:120,30:130"
+ "[min=110,max=130]]"
+ ",90:"
+ "[50:150,60:160,70:170,80:180,90:190"
+ "[min=150,max=190]]"
+ "[min=110,max=190]]", t));
+ t.remove(20);
+ EXPECT_TRUE(assertTree("[50:"
+ "[10:110,30:130,50:150"
+ "[min=110,max=150]]"
+ ",90:"
+ "[60:160,70:170,80:180,90:190"
+ "[min=160,max=190]]"
+ "[min=110,max=190]]", t));
+ }
+}
+
+void
+Test::requireThatNodeRemoveWorks()
+{
+ MyTree t;
+ getLeafNode(t);
+ t.remove(3);
+ EXPECT_TRUE(assertTree("[1:101,5:105,7:107[min=101,max=107]]", t));
+ t.remove(1);
+ EXPECT_TRUE(assertTree("[5:105,7:107[min=105,max=107]]", t));
+ t.remove(7);
+ EXPECT_TRUE(assertTree("[5:105[min=105,max=105]]", t));
+}
+
+void
+generateData(std::vector<LeafPair> & data, size_t numEntries)
+{
+ data.reserve(numEntries);
+ Rand48 rnd;
+ rnd.srand48(10);
+ for (size_t i = 0; i < numEntries; ++i) {
+ int num = rnd.lrand48() % 10000000;
+ uint32_t val = toVal(num);
+ data.push_back(std::make_pair(num, val));
+ }
+}
+
+void
+Test::buildSubTree(const std::vector<LeafPair> &sub,
+ size_t numEntries)
+{
+ GenerationHandler g;
+ MyTree tree;
+ MyTreeBuilder builder(tree.getAllocator());
+ MockTree mock;
+
+ std::vector<LeafPair> sorted(sub.begin(), sub.begin() + numEntries);
+ std::sort(sorted.begin(), sorted.end(), LeafPairLess());
+ for (size_t i = 0; i < numEntries; ++i) {
+ int num = UNWRAP(sorted[i].first);
+ const uint32_t & val = sorted[i].second;
+ builder.insert(num, val);
+ mock.insert(num, val);
+ }
+ tree.assign(builder);
+ assert(numEntries == tree.size());
+ assert(tree.isValid());
+
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree)));
+ EXPECT_EQUAL(numEntries, tree.size());
+ EXPECT_TRUE(tree.isValid());
+ MyTree::Iterator itr = tree.begin();
+ MyTree::Iterator ritr = itr;
+ if (numEntries > 0) {
+ EXPECT_TRUE(ritr.valid());
+ EXPECT_EQUAL(0u, ritr.position());
+ --ritr;
+ EXPECT_TRUE(!ritr.valid());
+ EXPECT_EQUAL(numEntries, ritr.position());
+ --ritr;
+ EXPECT_TRUE(ritr.valid());
+ EXPECT_EQUAL(numEntries - 1, ritr.position());
+ } else {
+ EXPECT_TRUE(!ritr.valid());
+ EXPECT_EQUAL(0u, ritr.position());
+ --ritr;
+ EXPECT_TRUE(!ritr.valid());
+ EXPECT_EQUAL(0u, ritr.position());
+ }
+ for (size_t i = 0; i < numEntries; ++i) {
+ EXPECT_TRUE(itr.valid());
+ EXPECT_EQUAL(sorted[i].first, itr.getKey());
+ EXPECT_EQUAL(sorted[i].second, itr.getData());
+ ++itr;
+ }
+ EXPECT_TRUE(!itr.valid());
+ ritr = itr;
+ EXPECT_TRUE(!ritr.valid());
+ --ritr;
+ for (size_t i = 0; i < numEntries; ++i) {
+ EXPECT_TRUE(ritr.valid());
+ EXPECT_EQUAL(sorted[numEntries - 1 - i].first, ritr.getKey());
+ EXPECT_EQUAL(sorted[numEntries - 1 - i].second, ritr.getData());
+ --ritr;
+ }
+ EXPECT_TRUE(!ritr.valid());
+}
+
+void
+Test::requireThatWeCanInsertAndRemoveFromTree()
+{
+ GenerationHandler g;
+ MyTree tree;
+ MockTree mock;
+ std::vector<LeafPair> exp;
+ std::vector<LeafPair> sorted;
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree)));
+ size_t numEntries = 1000;
+ generateData(exp, numEntries);
+ sorted = exp;
+ std::sort(sorted.begin(), sorted.end(), LeafPairLess());
+ // insert entries
+ for (size_t i = 0; i < numEntries; ++i) {
+ int num = UNWRAP(exp[i].first);
+ const uint32_t & val = exp[i].second;
+ EXPECT_TRUE(!tree.find(num).valid());
+ //LOG(info, "insert[%zu](%d, %s)", i, num, str.c_str());
+ EXPECT_TRUE(tree.insert(num, val));
+ EXPECT_TRUE(!tree.insert(num, val));
+ mock.insert(num, val);
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree)));
+ for (size_t j = 0; j <= i; ++j) {
+ //LOG(info, "find[%zu](%d)", j, exp[j].first._val);
+ MyTree::Iterator itr = tree.find(exp[j].first);
+ EXPECT_TRUE(itr.valid());
+ EXPECT_EQUAL(exp[j].first, itr.getKey());
+ EXPECT_EQUAL(exp[j].second, itr.getData());
+ }
+ EXPECT_EQUAL(i + 1u, tree.size());
+ EXPECT_TRUE(tree.isValid());
+ buildSubTree(exp, i + 1);
+ }
+ //std::cout << "tree: " << tree.toString() << std::endl;
+
+ {
+ MyTree::Iterator itr = tree.begin();
+ MyTree::Iterator itre = itr;
+ MyTree::Iterator itre2;
+ MyTree::Iterator ritr = itr;
+ while (itre.valid())
+ ++itre;
+ if (numEntries > 0) {
+ EXPECT_TRUE(ritr.valid());
+ EXPECT_EQUAL(0u, ritr.position());
+ --ritr;
+ EXPECT_TRUE(!ritr.valid());
+ EXPECT_EQUAL(numEntries, ritr.position());
+ --ritr;
+ EXPECT_TRUE(ritr.valid());
+ EXPECT_EQUAL(numEntries - 1, ritr.position());
+ } else {
+ EXPECT_TRUE(!ritr.valid());
+ EXPECT_EQUAL(0u, ritr.position());
+ --ritr;
+ EXPECT_TRUE(!ritr.valid());
+ EXPECT_EQUAL(0u, ritr.position());
+ }
+ MyTree::Iterator pitr = itr;
+ for (size_t i = 0; i < numEntries; ++i) {
+ ssize_t si = i;
+ ssize_t sileft = numEntries - i;
+ EXPECT_TRUE(itr.valid());
+ EXPECT_EQUAL(i, itr.position());
+ EXPECT_EQUAL(sileft, itre - itr);
+ EXPECT_EQUAL(-sileft, itr - itre);
+ EXPECT_EQUAL(sileft, itre2 - itr);
+ EXPECT_EQUAL(-sileft, itr - itre2);
+ EXPECT_EQUAL(si, itr - tree.begin());
+ EXPECT_EQUAL(-si, tree.begin() - itr);
+ EXPECT_EQUAL(i != 0, itr - pitr);
+ EXPECT_EQUAL(-(i != 0), pitr - itr);
+ EXPECT_EQUAL(sorted[i].first, itr.getKey());
+ EXPECT_EQUAL(sorted[i].second, itr.getData());
+ pitr = itr;
+ ++itr;
+ ritr = itr;
+ --ritr;
+ EXPECT_TRUE(ritr.valid());
+ EXPECT_TRUE(ritr == pitr);
+ }
+ EXPECT_TRUE(!itr.valid());
+ EXPECT_EQUAL(numEntries, itr.position());
+ ssize_t sNumEntries = numEntries;
+ EXPECT_EQUAL(sNumEntries, itr - tree.begin());
+ EXPECT_EQUAL(-sNumEntries, tree.begin() - itr);
+ EXPECT_EQUAL(1, itr - pitr);
+ EXPECT_EQUAL(-1, pitr - itr);
+ }
+ // compact full tree by calling incremental compaction methods in a loop
+ {
+ MyTree::NodeAllocatorType &manager = tree.getAllocator();
+ std::vector<uint32_t> toHold = manager.startCompact();
+ MyTree::Iterator itr = tree.begin();
+ tree.setRoot(itr.moveFirstLeafNode(tree.getRoot()));
+ while (itr.valid()) {
+ // LOG(info, "Leaf moved to %d", UNWRAP(itr.getKey()));
+ itr.moveNextLeafNode();
+ }
+ manager.finishCompact(toHold);
+ manager.freeze();
+ manager.transferHoldLists(g.getCurrentGeneration());
+ g.incGeneration();
+ manager.trimHoldLists(g.getFirstUsedGeneration());
+ }
+ // remove entries
+ for (size_t i = 0; i < numEntries; ++i) {
+ int num = UNWRAP(exp[i].first);
+ //LOG(info, "remove[%zu](%d)", i, num);
+ //std::cout << "tree: " << tree.toString() << std::endl;
+ EXPECT_TRUE(tree.remove(num));
+ EXPECT_TRUE(!tree.find(num).valid());
+ EXPECT_TRUE(!tree.remove(num));
+ EXPECT_TRUE(tree.isValid());
+ mock.erase(num);
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree)));
+ for (size_t j = i + 1; j < numEntries; ++j) {
+ MyTree::Iterator itr = tree.find(exp[j].first);
+ EXPECT_TRUE(itr.valid());
+ EXPECT_EQUAL(exp[j].first, itr.getKey());
+ EXPECT_EQUAL(exp[j].second, itr.getData());
+ }
+ EXPECT_EQUAL(numEntries - 1 - i, tree.size());
+ }
+}
+
+void
+Test::requireThatSortedTreeInsertWorks()
+{
+ {
+ MyTree tree;
+ MockTree mock;
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree)));
+ for (int i = 0; i < 1000; ++i) {
+ EXPECT_TRUE(tree.insert(i, toVal(i)));
+ mock.insert(i, toVal(i));
+ MyTree::Iterator itr = tree.find(i);
+ EXPECT_TRUE(itr.valid());
+ EXPECT_EQUAL(toVal(i), itr.getData());
+ EXPECT_TRUE(tree.isValid());
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree)));
+ }
+ }
+ {
+ MyTree tree;
+ MockTree mock;
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree)));
+ for (int i = 1000; i > 0; --i) {
+ EXPECT_TRUE(tree.insert(i, toVal(i)));
+ mock.insert(i, toVal(i));
+ MyTree::Iterator itr = tree.find(i);
+ EXPECT_TRUE(itr.valid());
+ EXPECT_EQUAL(toVal(i), itr.getData());
+ EXPECT_TRUE(tree.isValid());
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree)));
+ }
+ }
+}
+
+void
+Test::requireThatCornerCaseTreeFindWorks()
+{
+ GenerationHandler g;
+ MyTree tree;
+ for (int i = 1; i < 100; ++i) {
+ tree.insert(i, toVal(i));
+ }
+ EXPECT_TRUE(!tree.find(0).valid()); // lower than lowest
+ EXPECT_TRUE(!tree.find(1000).valid()); // higher than highest
+}
+
+void
+Test::requireThatBasicTreeIteratorWorks()
+{
+ GenerationHandler g;
+ MyTree tree;
+ EXPECT_TRUE(!tree.begin().valid());
+ std::vector<LeafPair> exp;
+ size_t numEntries = 1000;
+ generateData(exp, numEntries);
+ for (size_t i = 0; i < numEntries; ++i) {
+ tree.insert(exp[i].first, exp[i].second);
+ }
+ std::sort(exp.begin(), exp.end(), LeafPairLess());
+ size_t ei = 0;
+ MyTree::Iterator itr = tree.begin();
+ MyTree::Iterator ritr;
+ EXPECT_EQUAL(1000u, itr.size());
+ for (; itr.valid(); ++itr) {
+ //LOG(info, "itr(%d, %s)", itr.getKey(), itr.getData().c_str());
+ EXPECT_EQUAL(UNWRAP(exp[ei].first), UNWRAP(itr.getKey()));
+ EXPECT_EQUAL(exp[ei].second, itr.getData());
+ ei++;
+ ritr = itr;
+ }
+ EXPECT_EQUAL(numEntries, ei);
+ for (; ritr.valid(); --ritr) {
+ --ei;
+ //LOG(info, "itr(%d, %s)", itr.getKey(), itr.getData().c_str());
+ EXPECT_EQUAL(UNWRAP(exp[ei].first), UNWRAP(ritr.getKey()));
+ EXPECT_EQUAL(exp[ei].second, ritr.getData());
+ }
+}
+
+
+
+void
+Test::requireThatTreeIteratorAssignWorks()
+{
+ GenerationHandler g;
+ MyTree tree;
+ for (int i = 0; i < 1000; ++i) {
+ tree.insert(i, toVal(i));
+ }
+ for (int i = 0; i < 1000; ++i) {
+ MyTree::Iterator itr = tree.find(i);
+ MyTree::Iterator itr2 = itr;
+ EXPECT_TRUE(itr == itr2);
+ int expNum = i;
+ for (; itr2.valid(); ++itr2) {
+ EXPECT_EQUAL(expNum++, UNWRAP(itr2.getKey()));
+ }
+ EXPECT_EQUAL(1000, expNum);
+ }
+}
+
+struct UpdKeyComp {
+ int _remainder;
+ mutable size_t _numErrors;
+ UpdKeyComp(int remainder) : _remainder(remainder), _numErrors(0) {}
+ bool operator() (const int & lhs, const int & rhs) const {
+ if (lhs % 2 != _remainder) ++_numErrors;
+ if (rhs % 2 != _remainder) ++_numErrors;
+ return lhs < rhs;
+ }
+};
+
+void
+Test::requireThatUpdateOfKeyWorks()
+{
+ typedef BTree<int, BTreeNoLeafData,
+ btree::NoAggregated,
+ UpdKeyComp &> UpdKeyTree;
+ typedef UpdKeyTree::Iterator UpdKeyTreeIterator;
+ GenerationHandler g;
+ UpdKeyTree t;
+ UpdKeyComp cmp1(0);
+ for (int i = 0; i < 1000; i+=2) {
+ EXPECT_TRUE(t.insert(i, BTreeNoLeafData(), cmp1));
+ }
+ EXPECT_EQUAL(0u, cmp1._numErrors);
+ for (int i = 0; i < 1000; i+=2) {
+ UpdKeyTreeIterator itr = t.find(i, cmp1);
+ itr.writeKey(i + 1);
+ }
+ UpdKeyComp cmp2(1);
+ for (int i = 1; i < 1000; i+=2) {
+ UpdKeyTreeIterator itr = t.find(i, cmp2);
+ EXPECT_TRUE(itr.valid());
+ }
+ EXPECT_EQUAL(0u, cmp2._numErrors);
+}
+
+
+void
+Test::requireThatUpdateOfDataWorks()
+{
+ // typedef MyTree::Iterator Iterator;
+ GenerationHandler g;
+ MyTree t;
+ MockTree mock;
+ MyAggrCalc ac;
+ MyTree::NodeAllocatorType &manager = t.getAllocator();
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, t)));
+ for (int i = 0; i < 1000; i+=2) {
+ EXPECT_TRUE(t.insert(i, toVal(i)));
+ mock.insert(i, toVal(i));
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, t)));
+ }
+ freezeTree(g, manager);
+ for (int i = 0; i < 1000; i+=2) {
+ MyTree::Iterator itr = t.find(i);
+ MyTree::Iterator itr2 = itr;
+ t.thaw(itr);
+ itr.updateData(toHighVal(i), ac);
+ EXPECT_EQUAL(toHighVal(i), itr.getData());
+ EXPECT_EQUAL(toVal(i), itr2.getData());
+ mock.erase(i);
+ mock.insert(i, toHighVal(i));
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, t)));
+ freezeTree(g, manager);
+ itr = t.find(i);
+ itr2 = itr;
+ t.thaw(itr);
+ itr.updateData(toLowVal(i), ac);
+ EXPECT_EQUAL(toLowVal(i), itr.getData());
+ EXPECT_EQUAL(toHighVal(i), itr2.getData());
+ mock.erase(i);
+ mock.insert(i, toLowVal(i));
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, t)));
+ freezeTree(g, manager);
+ itr = t.find(i);
+ itr2 = itr;
+ t.thaw(itr);
+ itr.updateData(toVal(i), ac);
+ EXPECT_EQUAL(toVal(i), itr.getData());
+ EXPECT_EQUAL(toLowVal(i), itr2.getData());
+ mock.erase(i);
+ mock.insert(i, toVal(i));
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, t)));
+ freezeTree(g, manager);
+ }
+}
+
+
+template <typename TreeStore>
+void
+Test::requireThatSmallNodesWorks(void)
+{
+ GenerationHandler g;
+ TreeStore s;
+ MockTree mock;
+
+ EntryRef root;
+ EXPECT_EQUAL(0u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root)));
+ EXPECT_TRUE(s.insert(root, 40, toVal(40)));
+ mock.insert(40, toVal(40));
+ EXPECT_TRUE(!s.insert(root, 40, toNotVal(40)));
+ EXPECT_EQUAL(1u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root)));
+ EXPECT_TRUE(s.insert(root, 20, toVal(20)));
+ mock.insert(20, toVal(20));
+ EXPECT_TRUE(!s.insert(root, 20, toNotVal(20)));
+ EXPECT_TRUE(!s.insert(root, 40, toNotVal(40)));
+ EXPECT_EQUAL(2u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root)));
+ EXPECT_TRUE(s.insert(root, 60, toVal(60)));
+ mock.insert(60, toVal(60));
+ EXPECT_TRUE(!s.insert(root, 60, toNotVal(60)));
+ EXPECT_TRUE(!s.insert(root, 20, toNotVal(20)));
+ EXPECT_TRUE(!s.insert(root, 40, toNotVal(40)));
+ EXPECT_EQUAL(3u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root)));
+ EXPECT_TRUE(s.insert(root, 50, toVal(50)));
+ mock.insert(50, toVal(50));
+ EXPECT_TRUE(!s.insert(root, 50, toNotVal(50)));
+ EXPECT_TRUE(!s.insert(root, 60, toNotVal(60)));
+ EXPECT_TRUE(!s.insert(root, 20, toNotVal(20)));
+ EXPECT_TRUE(!s.insert(root, 40, toNotVal(40)));
+ EXPECT_EQUAL(4u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root)));
+
+ for (uint32_t i = 0; i < 100; ++i) {
+ EXPECT_TRUE(s.insert(root, 1000 + i, 42));
+ mock.insert(1000 + i, 42);
+ if (i > 0) {
+ EXPECT_TRUE(!s.insert(root, 1000 + i - 1, 42));
+ }
+ EXPECT_EQUAL(5u + i, s.size(root));
+ EXPECT_EQUAL(5u + i <= 8u, s.isSmallArray(root));
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root)));
+ }
+ EXPECT_TRUE(s.remove(root, 40));
+ mock.erase(40);
+ EXPECT_TRUE(!s.remove(root, 40));
+ EXPECT_EQUAL(103u, s.size(root));
+ EXPECT_TRUE(!s.isSmallArray(root));
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root)));
+ EXPECT_TRUE(s.remove(root, 20));
+ mock.erase(20);
+ EXPECT_TRUE(!s.remove(root, 20));
+ EXPECT_EQUAL(102u, s.size(root));
+ EXPECT_TRUE(!s.isSmallArray(root));
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root)));
+ EXPECT_TRUE(s.remove(root, 50));
+ mock.erase(50);
+ EXPECT_TRUE(!s.remove(root, 50));
+ EXPECT_EQUAL(101u, s.size(root));
+ EXPECT_TRUE(!s.isSmallArray(root));
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root)));
+ for (uint32_t i = 0; i < 100; ++i) {
+ EXPECT_TRUE(s.remove(root, 1000 + i));
+ mock.erase(1000 + i);
+ if (i > 0) {
+ EXPECT_TRUE(!s.remove(root, 1000 + i - 1));
+ }
+ EXPECT_EQUAL(100 - i, s.size(root));
+ EXPECT_EQUAL(100 - i <= 8u, s.isSmallArray(root));
+ TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root)));
+ }
+ EXPECT_EQUAL(1u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+
+ s.clear(root);
+ s.clearBuilder();
+ s.freeze();
+ s.transferHoldLists(g.getCurrentGeneration());
+ g.incGeneration();
+ s.trimHoldLists(g.getFirstUsedGeneration());
+}
+
+
+int
+Test::Main()
+{
+ TEST_INIT("btreeaggregation_test");
+
+ requireThatNodeInsertWorks();
+ requireThatNodeSplitInsertWorks();
+ requireThatNodeStealWorks();
+ requireThatNodeRemoveWorks();
+ requireThatWeCanInsertAndRemoveFromTree();
+ requireThatSortedTreeInsertWorks();
+ requireThatCornerCaseTreeFindWorks();
+ requireThatBasicTreeIteratorWorks();
+ requireThatTreeIteratorAssignWorks();
+ requireThatUpdateOfKeyWorks();
+ requireThatUpdateOfDataWorks();
+ TEST_DO(requireThatSmallNodesWorks<MyTreeStore>());
+ TEST_DO(requireThatSmallNodesWorks<MyTreeForceApplyStore>());
+
+ TEST_DONE();
+}
+
+}
+}
+
+TEST_APPHOOK(search::btree::Test);
diff --git a/searchlib/src/tests/btree/iteratespeed.cpp b/searchlib/src/tests/btree/iteratespeed.cpp
new file mode 100644
index 00000000000..719dc28c036
--- /dev/null
+++ b/searchlib/src/tests/btree/iteratespeed.cpp
@@ -0,0 +1,213 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("iteratespeed");
+#include <string>
+#include <vespa/searchlib/btree/btreeroot.h>
+#include <vespa/searchlib/btree/btreebuilder.h>
+#include <vespa/searchlib/btree/btreenodeallocator.h>
+#include <vespa/searchlib/btree/btree.h>
+#include <vespa/searchlib/btree/btreestore.h>
+#include <vespa/searchlib/util/rand48.h>
+
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodestore.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btreebuilder.hpp>
+#include <vespa/searchlib/btree/btree.hpp>
+#include <vespa/searchlib/btree/btreestore.hpp>
+
+namespace search {
+namespace btree {
+
+enum class IterateMethod
+{
+ FORWARD,
+ BACKWARDS,
+ LAMBDA
+};
+
+class IterateSpeed : public FastOS_Application
+{
+ template <typename Traits, IterateMethod iterateMethod>
+ void
+ workLoop(int loops, bool enableForward, bool enableBackwards,
+ bool enableLambda, int leafSlots);
+
+ void usage();
+
+ int
+ Main(void);
+};
+
+
+namespace {
+
+const char *iterateMethodName(IterateMethod iterateMethod)
+{
+ switch (iterateMethod) {
+ case IterateMethod::FORWARD:
+ return "forward";
+ case IterateMethod::BACKWARDS:
+ return "backwards";
+ default:
+ return "lambda";
+ }
+}
+
+}
+
+template <typename Traits, IterateMethod iterateMethod>
+void
+IterateSpeed::workLoop(int loops, bool enableForward, bool enableBackwards,
+ bool enableLambda, int leafSlots)
+{
+ if ((iterateMethod == IterateMethod::FORWARD && !enableForward) ||
+ (iterateMethod == IterateMethod::BACKWARDS && !enableBackwards) ||
+ (iterateMethod == IterateMethod::LAMBDA && !enableLambda) ||
+ (leafSlots != 0 &&
+ leafSlots != static_cast<int>(Traits::LEAF_SLOTS)))
+ return;
+ vespalib::GenerationHandler g;
+ using Tree = BTree<int, int, btree::NoAggregated, std::less<int>, Traits>;
+ using Builder = typename Tree::Builder;
+ using ConstIterator = typename Tree::ConstIterator;
+ Tree tree;
+ Builder builder(tree.getAllocator());
+ size_t numEntries = 1000000;
+ size_t numInnerLoops = 1000;
+ for (size_t i = 0; i < numEntries; ++i) {
+ builder.insert(i, 0);
+ }
+ tree.assign(builder);
+ assert(numEntries == tree.size());
+ assert(tree.isValid());
+ for (int l = 0; l < loops; ++l) {
+ fastos::TimeStamp before = fastos::ClockSystem::now();
+ uint64_t sum = 0;
+ for (size_t innerl = 0; innerl < numInnerLoops; ++innerl) {
+ if (iterateMethod == IterateMethod::FORWARD) {
+ ConstIterator itr(BTreeNode::Ref(), tree.getAllocator());
+ itr.begin(tree.getRoot());
+ while (itr.valid()) {
+ sum += itr.getKey();
+ ++itr;
+ }
+ } else if (iterateMethod == IterateMethod::BACKWARDS) {
+ ConstIterator itr(BTreeNode::Ref(), tree.getAllocator());
+ itr.end(tree.getRoot());
+ --itr;
+ while (itr.valid()) {
+ sum += itr.getKey();
+ --itr;
+ }
+ } else {
+ tree.getAllocator().foreach_key(tree.getRoot(),
+ [&](int key) { sum += key; } );
+ }
+ }
+ fastos::TimeStamp after = fastos::ClockSystem::now();
+ double used = after.sec() - before.sec();
+ printf("Elapsed time for iterating %ld steps is %8.5f, "
+ "direction=%s, fanout=%u,%u, sum=%" PRIu64 "\n",
+ numEntries * numInnerLoops,
+ used,
+ iterateMethodName(iterateMethod),
+ static_cast<int>(Traits::LEAF_SLOTS),
+ static_cast<int>(Traits::INTERNAL_SLOTS),
+ sum);
+ fflush(stdout);
+ }
+}
+
+
+void
+IterateSpeed::usage()
+{
+ printf("iteratspeed "
+ "[-F <leafSlots>] "
+ "[-b] "
+ "[-c <numLoops>] "
+ "[-f] "
+ "[-l]\n");
+}
+
+int
+IterateSpeed::Main()
+{
+ int argi;
+ char c;
+ const char *optArg;
+ argi = 1;
+ int loops = 1;
+ bool backwards = false;
+ bool forwards = false;
+ bool lambda = false;
+ int leafSlots = 0;
+ while ((c = GetOpt("F:bc:fl", optArg, argi)) != -1) {
+ switch (c) {
+ case 'F':
+ leafSlots = atoi(optArg);
+ break;
+ case 'b':
+ backwards = true;
+ break;
+ case 'c':
+ loops = atoi(optArg);
+ break;
+ case 'f':
+ forwards = true;
+ break;
+ case 'l':
+ lambda = true;
+ break;
+ default:
+ usage();
+ return 1;
+ }
+ }
+ if (!backwards && !forwards && !lambda) {
+ backwards = true;
+ forwards = true;
+ lambda = true;
+ }
+
+ using SmallTraits = BTreeTraits<4, 4, 31, false>;
+ using DefTraits = BTreeDefaultTraits;
+ using LargeTraits = BTreeTraits<32, 16, 10, true>;
+ using HugeTraits = BTreeTraits<64, 16, 10, true>;
+ workLoop<SmallTraits, IterateMethod::FORWARD>(loops, forwards, backwards,
+ lambda, leafSlots);
+ workLoop<DefTraits, IterateMethod::FORWARD>(loops, forwards, backwards,
+ lambda, leafSlots);
+ workLoop<LargeTraits, IterateMethod::FORWARD>(loops, forwards, backwards,
+ lambda, leafSlots);
+ workLoop<HugeTraits, IterateMethod::FORWARD>(loops, forwards, backwards,
+ lambda, leafSlots);
+ workLoop<SmallTraits, IterateMethod::BACKWARDS>(loops, forwards, backwards,
+ lambda, leafSlots);
+ workLoop<DefTraits, IterateMethod::BACKWARDS>(loops, forwards, backwards,
+ lambda, leafSlots);
+ workLoop<LargeTraits, IterateMethod::BACKWARDS>(loops, forwards, backwards,
+ lambda, leafSlots);
+ workLoop<HugeTraits, IterateMethod::BACKWARDS>(loops, forwards, backwards,
+ lambda, leafSlots);
+ workLoop<SmallTraits, IterateMethod::LAMBDA>(loops, forwards, backwards,
+ lambda, leafSlots);
+ workLoop<DefTraits, IterateMethod::LAMBDA>(loops, forwards, backwards,
+ lambda, leafSlots);
+ workLoop<LargeTraits, IterateMethod::LAMBDA>(loops, forwards, backwards,
+ lambda, leafSlots);
+ workLoop<HugeTraits, IterateMethod::LAMBDA>(loops, forwards, backwards,
+ lambda, leafSlots);
+ return 0;
+}
+
+}
+}
+
+FASTOS_MAIN(search::btree::IterateSpeed);
+
+
diff --git a/searchlib/src/tests/bytecomplens/.gitignore b/searchlib/src/tests/bytecomplens/.gitignore
new file mode 100644
index 00000000000..afe9bff02f6
--- /dev/null
+++ b/searchlib/src/tests/bytecomplens/.gitignore
@@ -0,0 +1,5 @@
+*.So
+.depend*
+Makefile
+bytecomp_test
+searchlib_bytecomp_test_app
diff --git a/searchlib/src/tests/bytecomplens/CMakeLists.txt b/searchlib/src/tests/bytecomplens/CMakeLists.txt
new file mode 100644
index 00000000000..188c3fccbdf
--- /dev/null
+++ b/searchlib/src/tests/bytecomplens/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_bytecomp_test_app
+ SOURCES
+ bytecomp.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_bytecomp_test_app NO_VALGRIND COMMAND searchlib_bytecomp_test_app)
diff --git a/searchlib/src/tests/bytecomplens/DESC b/searchlib/src/tests/bytecomplens/DESC
new file mode 100644
index 00000000000..e40e528ddea
--- /dev/null
+++ b/searchlib/src/tests/bytecomplens/DESC
@@ -0,0 +1 @@
+Test of search::ByteCompressedLengths class. Look at bytecomp.cpp for details.
diff --git a/searchlib/src/tests/bytecomplens/FILES b/searchlib/src/tests/bytecomplens/FILES
new file mode 100644
index 00000000000..c44e7f254f8
--- /dev/null
+++ b/searchlib/src/tests/bytecomplens/FILES
@@ -0,0 +1 @@
+bytecomplens.cpp
diff --git a/searchlib/src/tests/bytecomplens/bytecomp.cpp b/searchlib/src/tests/bytecomplens/bytecomp.cpp
new file mode 100644
index 00000000000..63aa2da15f6
--- /dev/null
+++ b/searchlib/src/tests/bytecomplens/bytecomp.cpp
@@ -0,0 +1,102 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <memory>
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("bytecomplens_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/random.h>
+#include <vespa/searchlib/docstore/bytecomplens.h>
+
+
+class Test : public vespalib::TestApp {
+private:
+ void testRandomLengths();
+
+public:
+ int Main() {
+ TEST_INIT("bytecomplens_test");
+ testRandomLengths(); TEST_FLUSH();
+ TEST_DONE();
+ }
+};
+
+TEST_APPHOOK(Test);
+
+
+void
+Test::testRandomLengths()
+{
+ vespalib::RandomGen rndgen(0x07031969);
+
+#define TBLSIZ 0xc00000
+
+ auto lentable = std::unique_ptr<uint32_t[]>(new uint32_t[TBLSIZ]);
+ auto offtable = std::unique_ptr<uint64_t[]>(new uint64_t[TBLSIZ]);
+
+ uint64_t offset = 16;
+
+ for (int i = 0; i < TBLSIZ; i++) {
+ int sel = rndgen.nextInt32();
+ int val = rndgen.nextInt32();
+ switch (sel & 0x7) {
+ case 0:
+ val &= 0x7F;
+ break;
+ case 1:
+ val &= 0xFF;
+ break;
+ case 3:
+ val &= 0x1FFF;
+ break;
+ case 4:
+ val &= 0x3FFF;
+ break;
+ case 5:
+ val &= 0x7FFF;
+ break;
+ case 6:
+ val &= 0xFFFF;
+ break;
+ case 7:
+ default:
+ val &= 0xFFFFF;
+ break;
+ }
+ offtable[i] = offset;
+ lentable[i] = val;
+ offset += val;
+ }
+
+ LOG(info, "made %d random offsets", TBLSIZ);
+
+ search::ByteCompressedLengths foo;
+
+ LOG(info, "empty BCL using %9ld bytes memory", foo.memoryUsed());
+
+ foo.addOffsetTable(TBLSIZ/4, offtable.get());
+ foo.addOffsetTable(TBLSIZ/4, offtable.get() + 1*(TBLSIZ/4));
+
+ LOG(info, "half BCL using %9ld bytes memory", foo.memoryUsed());
+
+ search::ByteCompressedLengths bar;
+ foo.swap(bar);
+ bar.addOffsetTable(TBLSIZ/4, offtable.get() + 2*(TBLSIZ/4));
+ bar.addOffsetTable(TBLSIZ/4, offtable.get() + 3*(TBLSIZ/4));
+ foo.swap(bar);
+
+ LOG(info, "full BCL using %9ld bytes memory", foo.memoryUsed());
+
+ LOG(info, "constructed %d byte compressed lengths", TBLSIZ-1);
+
+ for (int i = 0; i < TBLSIZ-1; i++) {
+ search::ByteCompressedLengths::OffLen offlen;
+ offlen = foo.getOffLen(i);
+
+ if ((i % 1000000) == 0) {
+ LOG(info, "data blob [%d] length %ld offset %ld", i, offlen.length, offlen.offset);
+ }
+ EXPECT_EQUAL(lentable[i], offlen.length);
+ EXPECT_EQUAL(offtable[i], offlen.offset);
+ }
+}
+
diff --git a/searchlib/src/tests/bytecomplens/example.txt b/searchlib/src/tests/bytecomplens/example.txt
new file mode 100644
index 00000000000..6dc3df0118a
--- /dev/null
+++ b/searchlib/src/tests/bytecomplens/example.txt
@@ -0,0 +1,122 @@
+offset length BCN val L0 len/off skipL1 skipL2 skipL3
+
+976 18707 [ 93 92 01 ] 3/0 976/0/0/0
+19683 11527 [ 87 5A ] 2/3
+31210 3926 [ D6 1E ] 2/5
+35136 2 [ 02 ] 1/7
+35138 6060 [ AC 2F ] 2/8 34162/8
+41198 649445 [ E5 D1 27 ] 3/10
+690643 2866 [ B2 16 ] 2/13
+693509 824767 [ BF AB 32 ] 3/15
+1518276 499173 [ E5 BB 1E ] 3/18 1483138/10
+2017449 20455 [ E7 9F 01 ] 3/21
+2037904 11 [ 0B ] 1/24
+2037915 19207 [ 87 96 01 ] 3/25
+2057122 6355 [ D3 31 ] 2/28 538846/10
+2063477 3422 [ DE 1A ] 2/30
+2066899 10683 [ BB 53 ] 2/32
+2077582 7360 [ C0 39 ] 2/34
+2084942 17969 [ B1 8C 01 ] 3/36 2083966/36/12
+2102911 6114 [ E2 2F ] 2/39
+2109025 31741 [ FD F7 01 ] 3/41
+2140766 581588 [ D4 BF 23 ] 3/44
+2722354 5341 [ DD 29 ] 2/47 637412/11
+2727695 13774 [ CE 6B ] 2/49
+2741469 717809 [ F1 E7 2B ] 3/51
+3459278 815406 [ AE E2 31 ] 3/54
+4274684 89 [ 59 ] 1/57 1552330/10
+4274773 4545 [ C1 23 ] 2/58
+4279318 803868 [ 9C 88 31 ] 3/60
+5083186 12865 [ C1 64 ] 2/63
+5096051 75 [ 4B ] 1/65 821367/8
+5096126 40734 [ 9E BE 02 ] 3/66
+5136860 101 [ 65 ] 1/69
+5136961 128 [ 80 01 ] 2/70
+5137089 253 [ FD 01 ] 2/72 3052147/36/12
+5137342 13 [ 0D ] 1/74
+5137355 24986 [ 9A C3 01 ] 3/75
+5162341 231 [ E7 01 ] 2/78
+5162572 997853 [ DD F3 3C ] 3/80 25483/8
+6160425 4728 [ F8 24 ] 2/83
+6165153 2025 [ E9 0F ] 2/85
+6167178 7281 [ F1 38 ] 2/87
+6174459 1026302 [ FE D1 3E ] 3/89 1011887/9
+7200761 848783 [ 8F E7 33 ] 3/92
+8049544 145767 [ E7 F2 08 ] 3/95
+8195311 19103 [ 9F 95 01 ] 3/98
+8214414 22166 [ 96 AD 01 ] 3/101 2039955/12
+8236580 30020 [ C4 EA 01 ] 3/104
+8266600 13 [ 0D ] 1/107
+8266613 120 [ 78 ] 1/108
+8266733 22398 [ FE AE 01 ] 3/109 3129644/37/12
+8289131 10832 [ D0 54 ] 2/112
+8299963 3765 [ B5 1D ] 2/114
+8303728 432771 [ 83 B5 1A ] 3/116
+8736499 30133 [ B5 EB 01 ] 3/119 469766/10
+8766632 6444 [ AC 32 ] 2/122
+8773076 16033 [ A1 7D ] 2/124
+8789109 78 [ 4E ] 1/126
+8789187 12510 [ DE 61 ] 2/127 52688/8
+8801697 12441 [ 99 61 ] 2/129
+8814138 117 [ 75 ] 1/131
+8814255 7147 [ EB 37 ] 2/132
+8821402 189 [ BD 01 ] 2/134 32215/7
+8821591 199704 [ 98 98 0C ] 3/136
+9021295 13240 [ B8 67 ] 2/139
+9034535 110 [ 6E ] 1/141
+9034645 31677 [ BD F7 01 ] 3/142 9034645/142/48/17
+9066322 18547 [ F3 90 01 ] 3/145
+9084869 734679 [ D7 EB 2C ] 3/148
+9819548 112 [ 70 ] 1/151
+9819660 883565 [ ED F6 35 ] 3/152 785015/10
+10703225 10290 [ B2 50 ] 2/155
+10713515 21410 [ A2 A7 01 ] 3/157
+10734925 15 [ 0F ] 1/160
+10734940 747774 [ FE D1 2D ] 3/161 915280/9
+11482714 39 [ 27 ] 1/164
+11482753 77 [ 4D ] 1/165
+11482830 235 [ EB 01 ] 2/166
+11483065 1991 [ C7 0F ] 2/168 748125/7
+11485056 9187 [ E3 47 ] 2/170
+11494243 18800 [ F0 92 01 ] 3/172
+11513043 1042219 [ AB CE 3F ] 3/175
+12555262 9154 [ C2 47 ] 2/178 3520617/36/12
+12564416 43582 [ BE D4 02 ] 3/180
+12607998 847240 [ 88 DB 33 ] 3/183
+13455238 4726 [ F6 24 ] 2/186
+13459964 590348 [ 8C 84 24 ] 3/188 904702/10
+14050312 8659 [ D3 43 ] 2/191
+14058971 116 [ 74 ] 1/193
+14059087 13563 [ FB 69 ] 2/194
+14072650 713064 [ E8 C2 2B ] 3/196 612686/8
+14785714 40321 [ 81 BB 02 ] 3/199
+14826035 2296 [ F8 11 ] 2/202
+14828331 7273 [ E9 38 ] 2/204
+14835604 68285 [ BD 95 04 ] 3/206 762954/10
+14903889 235 [ EB 01 ] 2/209
+14904124 4669 [ BD 24 ] 2/211
+14908793 28535 [ F7 DE 01 ] 3/213
+14937328 19 [ 13 ] 1/216 2382066/38/12
+14937347 5369 [ F9 29 ] 2/217
+14942716 602191 [ CF E0 24 ] 3/219
+15544907 2653 [ DD 14 ] 2/222
+15547560 25755 [ 9B C9 01 ] 3/224 610232/8
+15573315 11349 [ D5 58 ] 2/227
+15584664 15006 [ 9E 75 ] 2/229
+15599670 89 [ 59 ] 1/231
+15599759 52772 [ A4 9C 03 ] 3/232 52199/8
+15652531 776175 [ EF AF 2F ] 3/235
+16428706 126 [ 7E ] 1/238
+16428832 3884 [ AC 1E ] 2/239
+16432716 33958 [ A6 89 02 ] 3/241 832957/9
+16466674 122 [ 7A ] 1/244
+16466796 41895 [ A7 C7 02 ] 3/245
+16508691 105882 [ 9A BB 06 ] 3/248
+16614573 11067 [ BB 56 ] 2/251 1677245/35/12
+16625640 4588 [ EC 23 ] 2/253
+16630228 7349 [ B5 39 ] 2/255
+16637577 902638 [ EE 8B 37 ] 3/257
+17540215 8737 [ A1 44 ] 2/260 925642/9
+17548952 29186 [ 82 E4 01 ] 3/262
+17578138 41 [ 29 ] 1/265
+17578179
diff --git a/searchlib/src/tests/bytecomplens/tblprint.cpp b/searchlib/src/tests/bytecomplens/tblprint.cpp
new file mode 100644
index 00000000000..93657d82178
--- /dev/null
+++ b/searchlib/src/tests/bytecomplens/tblprint.cpp
@@ -0,0 +1,357 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("tblprint");
+#include <vespa/vespalib/util/random.h>
+
+#include <vector>
+#include <vespa/vespalib/data/databuffer.h>
+
+
+/**
+ * Class compressing a table of offsets in memory.
+ * After adding (n) offsets you can access
+ * (n-1) pairs of (length, offset).
+ * All offsets must be increasing, but they
+ * may be added in several chunks.
+ **/
+class ByteCompressedLengths
+{
+public:
+ /**
+ * Construct an empty instance
+ **/
+ ByteCompressedLengths();
+
+ /**
+ * add the given offset table.
+ * @param entries number of offsets to store.
+ * @param offsets table that contains (entries) offsets.
+ **/
+ void addOffsetTable(uint64_t entries, uint64_t *offsets);
+
+ /**
+ * free resources
+ **/
+ ~ByteCompressedLengths();
+
+ /**
+ * Fetch a length and offset from compressed data.
+ * Note invariant: id < size(); size() == (entries-1)
+ *
+ * @param id The index into the offset table
+ * @param offset Will be incremented by offset[id]
+ * @return The delta (offset[id+1] - offset[id])
+ **/
+ uint64_t getLength(uint64_t id, uint64_t &offset) const;
+
+ /**
+ * The number of (length, offset) pairs stored
+ **/
+ uint64_t size() const { return _entries; }
+
+ struct L3Entry {
+ uint64_t offset;
+ uint64_t l0toff;
+ uint64_t l1toff;
+ uint64_t l2toff;
+ };
+ vespalib::DataBuffer _l0space;
+ vespalib::DataBuffer _l1space;
+ vespalib::DataBuffer _l2space;
+ const uint8_t *_l0table;
+ const uint8_t *_l1table;
+ const uint8_t *_l2table;
+
+ std::vector<L3Entry> _l3table;
+
+ uint64_t _lenSum1;
+ uint64_t _lenSum2;
+ uint64_t _l0oSum1;
+ uint64_t _l0oSum2;
+ uint64_t _l1oSum2;
+ uint64_t _last_offset;
+ uint64_t _entries;
+
+ void addOffset(uint64_t offset);
+};
+
+/**
+ * get "Byte Compressed Number" from buffer, incrementing pointer
+ **/
+static inline uint64_t getBCN(const uint8_t *&buffer)
+{
+ uint8_t b = *buffer++;
+ uint64_t len = (b & 127);
+ unsigned shiftLen = 0;
+ while (b & 128) {
+ shiftLen += 7;
+ b = *buffer++;
+ len |= ((b & 127) << shiftLen);
+ }
+ return len;
+}
+
+static size_t writeLen(vespalib::DataBuffer &buf, uint64_t len)
+{
+ size_t bytes = 0;
+ do {
+ uint8_t b = len & 127;
+ len >>= 7;
+ if (len > 0) {
+ b |= 128;
+ }
+ buf.ensureFree(1);
+ buf.writeInt8(b);
+ ++bytes;
+ } while (len > 0);
+ return bytes;
+}
+
+
+ByteCompressedLengths::ByteCompressedLengths()
+ : _l0space(),
+ _l1space(),
+ _l2space(),
+ _l3table(),
+ _lenSum1(0),
+ _lenSum2(0),
+ _l0oSum1(0),
+ _l0oSum2(0),
+ _l1oSum2(0),
+ _last_offset(0),
+ _entries(0)
+{
+}
+
+
+void
+ByteCompressedLengths::addOffset(uint64_t offset)
+{
+ assert(offset >= _last_offset);
+
+ uint64_t len = offset - _last_offset;
+ uint64_t i = _entries++;
+
+ if ((i & 3) == 0) {
+ _lenSum2 += _lenSum1;
+ _l0oSum2 += _l0oSum1;
+
+ uint64_t t1n = i >> 2;
+ if ((t1n & 3) == 0) {
+ uint64_t t2n = t1n >> 2;
+
+ if ((t2n & 3) == 0) {
+ L3Entry e;
+ e.offset = _last_offset;
+ e.l0toff = _l0space.getDataLen();
+ e.l1toff = _l1space.getDataLen();
+ e.l2toff = _l2space.getDataLen();
+
+ _l3table.push_back(e);
+ } else {
+ writeLen(_l2space, _lenSum2);
+ writeLen(_l2space, _l0oSum2);
+ writeLen(_l2space, _l1oSum2);
+ }
+ _lenSum2 = 0;
+ _l0oSum2 = 0;
+ _l1oSum2 = 0;
+ } else {
+ _l1oSum2 += writeLen(_l1space, _lenSum1);
+ _l1oSum2 += writeLen(_l1space, _l0oSum1);
+ }
+ _lenSum1 = 0;
+ _l0oSum1 = 0;
+ }
+ _l0oSum1 += writeLen(_l0space, len);
+ _lenSum1 += len;
+ _last_offset = offset;
+}
+
+
+void
+ByteCompressedLengths::addOffsetTable(uint64_t entries, uint64_t *offsets)
+{
+ if (entries == 0) return;
+ // Do we have some offsets already?
+ if (_entries > 0) {
+ // yes, add first offset normally
+ addOffset(offsets[0]);
+ } else {
+ // no, special treatment for very first offset
+ _last_offset = offsets[0];
+ }
+ for (uint64_t cnt = 1; cnt < entries; ++cnt) {
+ addOffset(offsets[cnt]);
+ }
+ _l0table = (uint8_t *)_l0space.getData();
+ _l1table = (uint8_t *)_l1space.getData();
+ _l2table = (uint8_t *)_l2space.getData();
+
+ LOG(debug, "compressed %ld offsets", (_entries+1));
+ LOG(debug, "(%ld bytes)", (_entries+1)*sizeof(uint64_t));
+ LOG(debug, "to (%ld + %ld + %ld) bytes + %ld l3entries",
+ _l0space.getDataLen(),
+ _l1space.getDataLen(),
+ _l2space.getDataLen(),
+ _l3table.size());
+ LOG(debug, "(%ld bytes)",
+ (_l0space.getDataLen() + _l1space.getDataLen() + _l2space.getDataLen() +
+ _l3table.size()*sizeof(L3Entry)));
+}
+
+
+ByteCompressedLengths::~ByteCompressedLengths()
+{
+}
+
+uint64_t
+ByteCompressedLengths::getLength(uint64_t numSkip, uint64_t &offset) const
+{
+ assert(numSkip < _entries);
+
+ unsigned skipL0 = numSkip & 3;
+ unsigned skipL1 = (numSkip >> 2) & 3;
+ unsigned skipL2 = (numSkip >> 4) & 3;
+ uint64_t skipL3 = (numSkip >> 6);
+
+ offset += _l3table[skipL3].offset;
+ uint64_t l0toff = _l3table[skipL3].l0toff;
+ uint64_t l1toff = _l3table[skipL3].l1toff;
+ uint64_t l2toff = _l3table[skipL3].l2toff;
+
+ // printf("start off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff);
+
+ const uint8_t *l2pos = _l2table + l2toff;
+
+ while (skipL2 > 0) {
+ --skipL2;
+ offset += getBCN(l2pos);
+ l0toff += getBCN(l2pos);
+ l1toff += getBCN(l2pos);
+ }
+
+ const uint8_t *l1pos = _l1table + l1toff;
+
+ while (skipL1 > 0) {
+ --skipL1;
+ offset += getBCN(l1pos);
+ l0toff += getBCN(l1pos);
+
+ }
+ const uint8_t *l0pos = _l0table + l0toff;
+
+ while (skipL0 > 0) {
+ --skipL0;
+ offset += getBCN(l0pos);
+ }
+ // printf("end off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff);
+ return getBCN(l0pos);
+}
+
+
+
+class Test {
+public:
+ static void printTable();
+};
+
+
+
+int main(int /*argc*/, char ** /*argv*/)
+{
+ Test::printTable();
+ return 0;
+}
+
+void
+Test::printTable()
+{
+ vespalib::RandomGen rndgen(0x07031969);
+#define TBLSIZ 120
+ uint32_t *lentable = new uint32_t[TBLSIZ];
+ uint64_t *offtable = new uint64_t[TBLSIZ];
+
+ uint64_t offset = 16 + TBLSIZ*8;
+
+ for (int i = 0; i < TBLSIZ; i++) {
+ int sel = rndgen.nextInt32();
+ int val = rndgen.nextInt32();
+ switch (sel & 0x7) {
+ case 0:
+ val &= 0x7F;
+ break;
+ case 1:
+ val &= 0xFF;
+ break;
+ case 3:
+ val &= 0x1FFF;
+ break;
+ case 4:
+ val &= 0x3FFF;
+ break;
+ case 5:
+ val &= 0x7FFF;
+ break;
+ case 6:
+ val &= 0xFFFF;
+ break;
+ case 7:
+ default:
+ val &= 0xFFFFF;
+ break;
+ }
+ offtable[i] = offset;
+ lentable[i] = val;
+ offset += val;
+ }
+
+ ByteCompressedLengths foo;
+ foo.addOffsetTable(TBLSIZ, offtable);
+
+ const uint8_t *l1pos = foo._l1table;
+ const uint8_t *l2pos = foo._l2table;
+
+ printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
+ "offset", "length", "BCN val", "L0 len/off", "skipL1", "skipL2", "skipL3");
+
+ int slb = 0;
+ for (int i = 0; i+1 < TBLSIZ; i++) {
+ printf("%ld\t%d\t[", offtable[i], lentable[i]);
+ int bytes=0;
+ uint64_t len = lentable[i];
+ do {
+ uint8_t b = len & 127;
+ len >>= 7;
+ if (len > 0) {
+ b |= 128;
+ }
+ printf(" %02X", b);
+ ++bytes;
+ } while (len > 0);
+ printf(" ]\t%d", bytes);
+ printf("/%d", slb);
+ slb += bytes;
+
+ if ((i & 63) == 0) {
+ printf("\t\t\t%ld/%ld/%ld/%ld",
+ foo._l3table[i >> 6].offset,
+ foo._l3table[i >> 6].l0toff,
+ foo._l3table[i >> 6].l1toff,
+ foo._l3table[i >> 6].l2toff);
+ } else
+ if ((i & 15) == 0) {
+ printf("\t\t%ld", getBCN(l2pos));
+ printf("/%ld", getBCN(l2pos));
+ printf("/%ld", getBCN(l2pos));
+ } else
+ if ((i & 3) == 0) {
+ printf("\t%ld", getBCN(l1pos));
+ printf("/%ld", getBCN(l1pos));
+ }
+ printf("\n");
+ }
+ printf("%ld\n", offtable[TBLSIZ-1]);
+ fflush(stdout);
+}
diff --git a/searchlib/src/tests/common/bitvector/.gitignore b/searchlib/src/tests/common/bitvector/.gitignore
new file mode 100644
index 00000000000..bdc2879ea74
--- /dev/null
+++ b/searchlib/src/tests/common/bitvector/.gitignore
@@ -0,0 +1,8 @@
+.depend
+Makefile
+*_test
+*_benchmark
+/bitvector_test-common
+searchlib_condensedbitvector_test_app
+searchlib_bitvector_benchmark_app
+searchlib_bitvector_test-common_app
diff --git a/searchlib/src/tests/common/bitvector/CMakeLists.txt b/searchlib/src/tests/common/bitvector/CMakeLists.txt
new file mode 100644
index 00000000000..ce49872319a
--- /dev/null
+++ b/searchlib/src/tests/common/bitvector/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_bitvector_test-common_app
+ SOURCES
+ bitvector_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_bitvector_test-common_app COMMAND searchlib_bitvector_test-common_app)
+vespa_add_executable(searchlib_bitvector_benchmark_app
+ SOURCES
+ bitvector_benchmark.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_bitvector_benchmark_app COMMAND searchlib_bitvector_benchmark_app BENCHMARK)
+vespa_add_executable(searchlib_condensedbitvector_test_app
+ SOURCES
+ condensedbitvector_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_condensedbitvector_test_app COMMAND searchlib_condensedbitvector_test_app)
diff --git a/searchlib/src/tests/common/bitvector/DESC b/searchlib/src/tests/common/bitvector/DESC
new file mode 100644
index 00000000000..313f0f89f2a
--- /dev/null
+++ b/searchlib/src/tests/common/bitvector/DESC
@@ -0,0 +1 @@
+bitvector test. Take a look at bitvector_test.cpp for details.
diff --git a/searchlib/src/tests/common/bitvector/FILES b/searchlib/src/tests/common/bitvector/FILES
new file mode 100644
index 00000000000..a2583d74519
--- /dev/null
+++ b/searchlib/src/tests/common/bitvector/FILES
@@ -0,0 +1 @@
+bitvector_test.cpp
diff --git a/searchlib/src/tests/common/bitvector/bitvector_benchmark.cpp b/searchlib/src/tests/common/bitvector/bitvector_benchmark.cpp
new file mode 100644
index 00000000000..cc0ef78c193
--- /dev/null
+++ b/searchlib/src/tests/common/bitvector/bitvector_benchmark.cpp
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("bitvector_benchmark");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/bitvector.h>
+
+using namespace search;
+
+namespace {
+
+size_t scan(BitVector & bv) __attribute__((noinline));
+
+size_t scan(BitVector & bv)
+{
+ size_t count(0);
+ for (BitVector::Index i(bv.getFirstTrueBit()), m(bv.size()); i < m; i = bv.getNextTrueBit(i+1)) {
+ count++;
+ }
+ return count;
+}
+
+}
+
+// This test is 10% faster with table lookup than with runtime shifting.
+TEST("speed of getNextTrueBit")
+{
+ BitVector::UP bv(BitVector::create(100000000));
+ bv->setInterval(0, bv->size() - 1);
+
+ for (size_t i(0); i < 10; i++) {
+ EXPECT_EQUAL(bv->size(), scan(*bv));
+ }
+ EXPECT_EQUAL(bv->size(), bv->countTrueBits());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/common/bitvector/bitvector_test.cpp b/searchlib/src/tests/common/bitvector/bitvector_test.cpp
new file mode 100644
index 00000000000..11c43166ef5
--- /dev/null
+++ b/searchlib/src/tests/common/bitvector/bitvector_test.cpp
@@ -0,0 +1,541 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("bitvector_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/searchlib/common/growablebitvector.h>
+#include <vespa/searchlib/common/partialbitvector.h>
+#include <vespa/searchlib/common/rankedhit.h>
+#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/util/rand48.h>
+
+using namespace search;
+
+namespace {
+
+std::string
+toString(const BitVector & bv)
+{
+ std::stringstream ss;
+ ss << "[";
+ bool first = true;
+ uint32_t nextBit = bv.getStartIndex();
+ for (;;) {
+ nextBit = bv.getNextTrueBit(nextBit);
+ if (nextBit >= bv.size()) {
+ break;
+ }
+ if (!first) {
+ ss << ",";
+ }
+ ss << nextBit++;
+ first = false;
+ }
+ ss << "]";
+ return ss.str();
+}
+
+
+std::string
+toString(BitVectorIterator &b)
+{
+ std::stringstream ss;
+ ss << "[";
+ bool first = true;
+ b.initFullRange();
+ for (uint32_t docId = 1; ! b.isAtEnd(docId); ) {
+ if (!b.seek(docId)) {
+ docId = std::max(docId + 1, b.getDocId());
+ if (b.isAtEnd(docId))
+ break;
+ continue;
+ }
+ if (!first) {
+ ss << ",";
+ }
+ b.unpack(docId);
+ ss << docId++;
+ first = false;
+ }
+ ss << "]";
+ return ss.str();
+}
+
+
+
+uint32_t
+myCountInterval(const BitVector &bv, uint32_t low, uint32_t high)
+{
+ uint32_t res = 0u;
+ if (bv.size() == 0u)
+ return 0u;
+ if (high >= bv.size())
+ high = bv.size() - 1;
+ for (; low <= high; ++low) {
+ if (bv.testBit(low))
+ ++res;
+ }
+ return res;
+}
+
+void
+scan(uint32_t count, uint32_t offset, uint32_t size, Rand48 &rnd)
+{
+ std::vector<uint32_t> lids;
+ lids.reserve(count);
+ uint32_t end = size + offset;
+ for (uint32_t i = 0; i < count; ++i) {
+ uint32_t lid = offset + (rnd.lrand48() % (size - 1)) + 1;
+ lids.push_back(lid);
+ }
+ std::sort(lids.begin(), lids.end());
+ lids.resize(std::unique(lids.begin(), lids.end()) - lids.begin());
+ BitVector::UP bv(BitVector::create(offset, end));
+ for (auto lid : lids) {
+ bv->setBit(lid);
+ }
+ EXPECT_EQUAL(bv->getFirstTrueBit(), bv->getNextTrueBit(bv->getStartIndex()));
+ uint32_t prevLid = bv->getStartIndex();
+ for (auto lid : lids) {
+ EXPECT_EQUAL(lid, bv->getNextTrueBit(prevLid + 1));
+ EXPECT_EQUAL(prevLid, bv->getPrevTrueBit(lid - 1));
+ prevLid = lid;
+ }
+ EXPECT_TRUE(bv->getNextTrueBit(prevLid + 1) >= end);
+ EXPECT_EQUAL(prevLid, bv->getPrevTrueBit(end - 1));
+}
+
+void
+scanWithOffset(uint32_t offset)
+{
+ Rand48 rnd;
+
+ rnd.srand48(32);
+ scan(10, offset, 1000000, rnd);
+ scan(100, offset, 1000000, rnd);
+ scan(1000, offset, 1000000, rnd);
+ scan(10000, offset, 1000000, rnd);
+ scan(100000, offset, 1000000, rnd);
+ scan(500000, offset, 1000000, rnd);
+ scan(1000000, offset, 1000000, rnd);
+}
+
+}
+
+bool
+assertBV(const std::string & exp, const BitVector & act)
+{
+ bool res1 = EXPECT_EQUAL(exp, toString(act));
+ search::fef::TermFieldMatchData f;
+ search::fef::TermFieldMatchDataArray a;
+ a.add(&f);
+ queryeval::SearchIterator::UP it(BitVectorIterator::create(&act, a, true));
+ BitVectorIterator & b(dynamic_cast<BitVectorIterator &>(*it));
+ bool res2 = EXPECT_EQUAL(exp, toString(b));
+ return res1 && res2;
+}
+
+void
+fill(BitVector & bv, const std::vector<uint32_t> & bits, uint32_t offset)
+{
+ for (uint32_t bit : bits) {
+ bv.setBit(bit + offset);
+ }
+}
+
+vespalib::string
+fill(const std::vector<uint32_t> & bits, uint32_t offset)
+{
+ vespalib::asciistream os;
+ os << "[";
+ size_t count(0);
+ for (uint32_t bit : bits) {
+ count++;
+ os << bit + offset;
+ if (count != bits.size()) { os << ","; }
+ }
+ os << "]";
+ return os.str();
+}
+
+std::vector<uint32_t> A = {7, 39, 71, 103};
+std::vector<uint32_t> B = {15, 39, 71, 100};
+
+void
+testAnd(uint32_t offset)
+{
+ uint32_t end = offset + 128;
+ BitVector::UP v1(BitVector::create(offset, end));
+ BitVector::UP v2(BitVector::create(offset, end));
+ BitVector::UP v3(BitVector::create(offset, end));
+
+ fill(*v1, A, offset);
+ fill(*v3, A, offset);
+ fill(*v2, B, offset);
+ EXPECT_TRUE(assertBV(fill(A, offset), *v1));
+ EXPECT_TRUE(assertBV(fill(B, offset), *v2));
+
+ EXPECT_TRUE(assertBV(fill(A, offset), *v3));
+ v3->andWith(*v2);
+ EXPECT_TRUE(assertBV(fill({39,71}, offset), *v3));
+
+ EXPECT_TRUE(assertBV(fill(A, offset), *v1));
+ EXPECT_TRUE(assertBV(fill(B, offset), *v2));
+}
+
+void
+testOr(uint32_t offset)
+{
+ uint32_t end = offset + 128;
+ BitVector::UP v1(BitVector::create(offset, end));
+ BitVector::UP v2(BitVector::create(offset, end));
+ BitVector::UP v3(BitVector::create(offset, end));
+
+ fill(*v1, A, offset);
+ fill(*v3, A, offset);
+ fill(*v2, B, offset);
+ EXPECT_TRUE(assertBV(fill(A, offset), *v1));
+ EXPECT_TRUE(assertBV(fill(B, offset), *v2));
+
+ EXPECT_TRUE(assertBV(fill(A, offset), *v3));
+ v3->orWith(*v2);
+ EXPECT_TRUE(assertBV(fill({7,15,39,71,100,103}, offset), *v3));
+
+ EXPECT_TRUE(assertBV(fill(A, offset), *v1));
+ EXPECT_TRUE(assertBV(fill(B, offset), *v2));
+}
+
+void
+testAndNot(uint32_t offset)
+{
+ uint32_t end = offset + 128;
+ BitVector::UP v1(BitVector::create(offset, end));
+ BitVector::UP v2(BitVector::create(offset, end));
+ BitVector::UP v3(BitVector::create(offset, end));
+
+ fill(*v1, A, offset);
+ fill(*v3, A, offset);
+ fill(*v2, B, offset);
+ EXPECT_TRUE(assertBV(fill(A, offset), *v1));
+ EXPECT_TRUE(assertBV(fill(B, offset), *v2));
+
+ EXPECT_TRUE(assertBV(fill(A, offset), *v3));
+ v3->andNotWith(*v2);
+ EXPECT_TRUE(assertBV(fill({7,103}, offset), *v3));
+
+ EXPECT_TRUE(assertBV(fill(A, offset), *v1));
+ EXPECT_TRUE(assertBV(fill(B, offset), *v2));
+
+ v3->clear();
+ fill(*v3, A, offset);
+ EXPECT_TRUE(assertBV(fill(A, offset), *v3));
+
+
+ std::vector<RankedHit> rh;
+ rh.emplace_back(15u+offset, 0.0);
+ rh.emplace_back(39u+offset, 0.0);
+ rh.emplace_back(71u+offset, 0.0);
+ rh.emplace_back(100u+offset, 0.0);
+
+ v3->andNotWithT(RankedHitIterator(&rh[0], 4));
+ EXPECT_TRUE(assertBV(fill({7,103}, offset), *v3));
+}
+
+TEST("requireThatSequentialOperationsOnPartialWorks")
+{
+ PartialBitVector p1(717,919);
+
+ EXPECT_FALSE(p1.hasTrueBits());
+ EXPECT_EQUAL(0u, p1.countTrueBits());
+ p1.setBit(719);
+ EXPECT_EQUAL(0u, p1.countTrueBits());
+ p1.invalidateCachedCount();
+ EXPECT_TRUE(p1.hasTrueBits());
+ EXPECT_EQUAL(1u, p1.countTrueBits());
+ p1.slowSetBit(718);
+ p1.slowSetBit(739);
+ p1.slowSetBit(871);
+ p1.slowSetBit(903);
+ EXPECT_EQUAL(5u, p1.countTrueBits());
+ EXPECT_TRUE(assertBV("[718,719,739,871,903]", p1));
+
+ PartialBitVector p2(717,919);
+ EXPECT_FALSE(p1 == p2);
+ p2.slowSetBit(719);
+ p2.slowSetBit(718);
+ p2.slowSetBit(739);
+ p2.slowSetBit(871);
+ EXPECT_FALSE(p1 == p2);
+ p2.slowSetBit(903);
+ EXPECT_TRUE(p1 == p2);
+
+ AllocatedBitVector full(1000);
+ full.setInterval(0, 1000);
+ EXPECT_EQUAL(5u, p2.countTrueBits());
+ p2.orWith(full);
+ EXPECT_EQUAL(202u, p2.countTrueBits());
+}
+
+TEST("requireThatInitRangeStaysWithinBounds") {
+ AllocatedBitVector v1(128);
+ search::fef::TermFieldMatchData f;
+ search::fef::TermFieldMatchDataArray a;
+ a.add(&f);
+ queryeval::SearchIterator::UP it(BitVectorIterator::create(&v1, a, true));
+ it->initRange(700, 800);
+ EXPECT_TRUE(it->isAtEnd());
+}
+
+TEST("requireThatAndWorks") {
+ for (uint32_t offset(0); offset < 100; offset++) {
+ testAnd(offset);
+ }
+}
+
+TEST("requireThatOrWorks") {
+ for (uint32_t offset(0); offset < 100; offset++) {
+ testOr(offset);
+ }
+}
+
+
+TEST("requireThatAndNotWorks") {
+ for (uint32_t offset(0); offset < 100; offset++) {
+ testAndNot(offset);
+ }
+}
+
+TEST("requireThatClearWorks")
+{
+ AllocatedBitVector v1(128);
+
+ v1.setBit(7);
+ v1.setBit(39);
+ v1.setBit(71);
+ v1.setBit(103);
+ EXPECT_TRUE(assertBV("[7,39,71,103]", v1));
+
+ v1.clear();
+ EXPECT_TRUE(assertBV("[]", v1));
+}
+
+TEST("requireThatForEachWorks") {
+ AllocatedBitVector v1(128);
+
+ v1.setBit(7);
+ v1.setBit(39);
+ v1.setBit(71);
+ v1.setBit(103);
+ EXPECT_EQUAL(128u, v1.size());
+
+ size_t sum(0);
+ v1.foreach_truebit([&](uint32_t key) { sum += key; });
+ EXPECT_EQUAL(220u, sum);
+
+ sum = 0;
+ v1.foreach_truebit([&](uint32_t key) { sum += key; }, 7);
+ EXPECT_EQUAL(220u, sum);
+
+ sum = 0;
+ v1.foreach_truebit([&](uint32_t key) { sum += key; }, 6, 7);
+ EXPECT_EQUAL(0u, sum);
+ sum = 0;
+ v1.foreach_truebit([&](uint32_t key) { sum += key; }, 7, 8);
+ EXPECT_EQUAL(7u, sum);
+ sum = 0;
+ v1.foreach_truebit([&](uint32_t key) { sum += key; }, 8, 9);
+ EXPECT_EQUAL(0u, sum);
+
+ sum = 0;
+ v1.foreach_truebit([&](uint32_t key) { sum += key; }, 8);
+ EXPECT_EQUAL(213u, sum);
+
+ sum = 0;
+ v1.foreach_falsebit([&](uint32_t key) { sum += key; }, 5, 6);
+ EXPECT_EQUAL(5u, sum);
+
+ sum = 0;
+ v1.foreach_falsebit([&](uint32_t key) { sum += key; }, 5, 7);
+ EXPECT_EQUAL(11u, sum);
+
+ sum = 0;
+ v1.foreach_falsebit([&](uint32_t key) { sum += key; }, 5, 8);
+ EXPECT_EQUAL(11u, sum);
+
+ sum = 0;
+ v1.foreach_falsebit([&](uint32_t key) { sum += key; }, 5, 9);
+ EXPECT_EQUAL(19u, sum);
+
+ sum = 0;
+ v1.foreach_falsebit([&](uint32_t key) { sum += key; }, 6);
+ EXPECT_EQUAL(size_t((((6+127)*(127-6 + 1)) >> 1) - 220), sum);
+}
+
+
+TEST("requireThatSetWorks")
+{
+ AllocatedBitVector v1(128);
+
+ v1.setBit(7);
+ v1.setBit(39);
+ v1.setBit(71);
+ v1.setBit(103);
+ EXPECT_TRUE(assertBV("[7,39,71,103]", v1));
+ v1.invalidateCachedCount();
+ EXPECT_EQUAL(4u, v1.countTrueBits());
+
+ v1.setBit(80);
+ EXPECT_EQUAL(4u, v1.countTrueBits());
+ v1.invalidateCachedCount();
+ EXPECT_EQUAL(5u, v1.countTrueBits());
+ EXPECT_TRUE(assertBV("[7,39,71,80,103]", v1));
+
+ v1.clearBit(35);
+ EXPECT_EQUAL(5u, v1.countTrueBits());
+ v1.invalidateCachedCount();
+ EXPECT_EQUAL(5u, v1.countTrueBits());
+ EXPECT_TRUE(assertBV("[7,39,71,80,103]", v1));
+ v1.clearBit(71);
+ EXPECT_EQUAL(5u, v1.countTrueBits());
+ v1.invalidateCachedCount();
+ EXPECT_EQUAL(4u, v1.countTrueBits());
+ EXPECT_TRUE(assertBV("[7,39,80,103]", v1));
+
+ v1.slowSetBit(39);
+ EXPECT_EQUAL(4u, v1.countTrueBits());
+ EXPECT_TRUE(assertBV("[7,39,80,103]", v1));
+ v1.slowSetBit(57);
+ EXPECT_EQUAL(5u, v1.countTrueBits());
+ EXPECT_TRUE(assertBV("[7,39,57,80,103]", v1));
+}
+
+
+TEST("requireThatClearIntervalWorks")
+{
+ AllocatedBitVector v1(1200);
+
+ v1.setBit(7);
+ v1.setBit(39);
+ v1.setBit(71);
+ v1.setBit(103);
+ v1.setBit(200);
+ v1.setBit(500);
+ EXPECT_TRUE(assertBV("[7,39,71,103,200,500]", v1));
+
+ v1.clearInterval(40, 70);
+ EXPECT_TRUE(assertBV("[7,39,71,103,200,500]", v1));
+ v1.clearInterval(39, 71);
+ EXPECT_TRUE(assertBV("[7,71,103,200,500]", v1));
+ v1.clearInterval(39, 72);
+ EXPECT_TRUE(assertBV("[7,103,200,500]", v1));
+ v1.clearInterval(20, 501);
+ EXPECT_TRUE(assertBV("[7]", v1));
+}
+
+
+TEST("requireThatSetIntervalWorks")
+{
+ AllocatedBitVector v1(1200);
+
+ EXPECT_FALSE(v1.hasTrueBits());
+ v1.setBit(7);
+ v1.setBit(39);
+ v1.setBit(71);
+ v1.setBit(103);
+ v1.setBit(200);
+ v1.setBit(500);
+ EXPECT_TRUE(assertBV("[7,39,71,103,200,500]", v1));
+
+ v1.setInterval(40, 46);
+ EXPECT_TRUE(assertBV("[7,39,40,41,42,43,44,45,71,103,200,500]", v1));
+ EXPECT_TRUE(v1.hasTrueBits());
+ v1.invalidateCachedCount();
+ EXPECT_EQUAL(12u, v1.countTrueBits());
+ EXPECT_EQUAL(12u, v1.countInterval(1, 1199));
+ EXPECT_EQUAL(12u, myCountInterval(v1, 1, 1199));
+
+ v1.setInterval(40, 200);
+ EXPECT_EQUAL(164u, v1.countInterval(1, 1199));
+ EXPECT_EQUAL(164u, myCountInterval(v1, 1, 1199));
+ EXPECT_EQUAL(163u, v1.countInterval(1, 201));
+ EXPECT_EQUAL(162u, v1.countInterval(1, 200));
+ EXPECT_EQUAL(163u, v1.countInterval(7, 201));
+ EXPECT_EQUAL(162u, v1.countInterval(8, 201));
+ EXPECT_EQUAL(161u, v1.countInterval(8, 200));
+ v1.clearInterval(72, 174);
+ EXPECT_EQUAL(62u, v1.countInterval(1, 1199));
+ EXPECT_EQUAL(62u, myCountInterval(v1, 1, 1199));
+ EXPECT_EQUAL(61u, v1.countInterval(1, 201));
+ EXPECT_EQUAL(60u, v1.countInterval(1, 200));
+ EXPECT_EQUAL(61u, v1.countInterval(7, 201));
+ EXPECT_EQUAL(60u, v1.countInterval(8, 201));
+ EXPECT_EQUAL(59u, v1.countInterval(8, 200));
+ EXPECT_EQUAL(51u, v1.countInterval(8, 192));
+ EXPECT_EQUAL(50u, v1.countInterval(8, 191));
+
+ EXPECT_EQUAL(1u, v1.countInterval(1, 20));
+ EXPECT_EQUAL(1u, v1.countInterval(7, 20));
+ EXPECT_EQUAL(0u, v1.countInterval(8, 20));
+ EXPECT_EQUAL(1u, v1.countInterval(1, 8));
+ EXPECT_EQUAL(0u, v1.countInterval(1, 7));
+}
+
+TEST("requireThatScanWorks")
+{
+ scanWithOffset(0);
+ scanWithOffset(19876);
+}
+
+TEST("requireThatGrowWorks")
+{
+ vespalib::GenerationHolder g;
+ GrowableBitVector v(200, 200, g);
+
+ v.setBit(7);
+ v.setBit(39);
+ v.setBit(71);
+ v.setBit(103);
+
+ EXPECT_EQUAL(200u, v.size());
+ v.invalidateCachedCount();
+ EXPECT_TRUE(assertBV("[7,39,71,103]", v));
+ EXPECT_EQUAL(4u, v.countTrueBits());
+ v.reserve(204);
+ EXPECT_EQUAL(200u, v.size());
+ EXPECT_EQUAL(204u, v.capacity());
+ EXPECT_TRUE(assertBV("[7,39,71,103]", v));
+ EXPECT_EQUAL(4u, v.countTrueBits());
+ v.extend(202);
+ EXPECT_EQUAL(202u, v.size());
+ EXPECT_EQUAL(204u, v.capacity());
+ EXPECT_TRUE(assertBV("[7,39,71,103]", v));
+ EXPECT_EQUAL(4u, v.countTrueBits());
+ v.shrink(200);
+ EXPECT_EQUAL(200u, v.size());
+ EXPECT_EQUAL(204u, v.capacity());
+ EXPECT_TRUE(assertBV("[7,39,71,103]", v));
+ EXPECT_EQUAL(4u, v.countTrueBits());
+ v.reserve(204);
+ EXPECT_EQUAL(200u, v.size());
+ EXPECT_EQUAL(204u, v.capacity());
+ EXPECT_TRUE(assertBV("[7,39,71,103]", v));
+ EXPECT_EQUAL(4u, v.countTrueBits());
+ v.shrink(202);
+ EXPECT_EQUAL(202u, v.size());
+ EXPECT_EQUAL(204u, v.capacity());
+ EXPECT_TRUE(assertBV("[7,39,71,103]", v));
+ EXPECT_EQUAL(4u, v.countTrueBits());
+
+ v.shrink(100);
+ EXPECT_EQUAL(100u, v.size());
+ EXPECT_EQUAL(204u, v.capacity());
+ EXPECT_TRUE(assertBV("[7,39,71]", v));
+ EXPECT_EQUAL(3u, v.countTrueBits());
+ g.transferHoldLists(1);
+ g.trimHoldLists(2);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/common/bitvector/condensedbitvector_test.cpp b/searchlib/src/tests/common/bitvector/condensedbitvector_test.cpp
new file mode 100644
index 00000000000..eddd3941c35
--- /dev/null
+++ b/searchlib/src/tests/common/bitvector/condensedbitvector_test.cpp
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/condensedbitvectors.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP("condensedbitvector_test");
+
+using search::CondensedBitVector;
+using vespalib::GenerationHolder;
+
+TEST("Verify state after init")
+{
+ GenerationHolder genHolder;
+ CondensedBitVector::UP cbv(CondensedBitVector::create(8, genHolder));
+ EXPECT_EQUAL(32u, cbv->getKeyCapacity());
+ EXPECT_EQUAL(8u, cbv->getCapacity());
+ EXPECT_EQUAL(8u, cbv->getSize());
+}
+
+
+TEST("Verify set/get")
+{
+ GenerationHolder genHolder;
+ CondensedBitVector::UP cbv(CondensedBitVector::create(8, genHolder));
+ for (size_t i(0); i < 32; i++) {
+ for (size_t j(0); j < 8; j++) {
+ EXPECT_FALSE(cbv->get(i,j));
+ }
+ }
+ cbv->set(23,5, false);
+ EXPECT_FALSE(cbv->get(23, 5));
+ for (size_t i(0); i < 32; i++) {
+ for (size_t j(0); j < 8; j++) {
+ EXPECT_FALSE(cbv->get(i,j));
+ }
+ }
+ cbv->set(23,5, true);
+ EXPECT_TRUE(cbv->get(23, 5));
+ size_t sum(0);
+ for (size_t i(0); i < 32; i++) {
+ for (size_t j(0); j < 8; j++) {
+ sum += cbv->get(i,j) ? 1 : 0;
+ }
+ }
+ EXPECT_EQUAL(1u, sum);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/common/foregroundtaskexecutor/.gitignore b/searchlib/src/tests/common/foregroundtaskexecutor/.gitignore
new file mode 100644
index 00000000000..0bd7759156b
--- /dev/null
+++ b/searchlib/src/tests/common/foregroundtaskexecutor/.gitignore
@@ -0,0 +1 @@
+searchlib_foregroundtaskexecutor_test_app
diff --git a/searchlib/src/tests/common/foregroundtaskexecutor/CMakeLists.txt b/searchlib/src/tests/common/foregroundtaskexecutor/CMakeLists.txt
new file mode 100644
index 00000000000..dd0e5c0b039
--- /dev/null
+++ b/searchlib/src/tests/common/foregroundtaskexecutor/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_foregroundtaskexecutor_test_app
+ SOURCES
+ foregroundtaskexecutor_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_foregroundtaskexecutor_test_app COMMAND searchlib_foregroundtaskexecutor_test_app)
diff --git a/searchlib/src/tests/common/foregroundtaskexecutor/DESC b/searchlib/src/tests/common/foregroundtaskexecutor/DESC
new file mode 100644
index 00000000000..bfa0dfa3e6a
--- /dev/null
+++ b/searchlib/src/tests/common/foregroundtaskexecutor/DESC
@@ -0,0 +1 @@
+foregroundtaskexecutor test. Take a look at foregroundtaskexecutor_test.cpp for details.
diff --git a/searchlib/src/tests/common/foregroundtaskexecutor/FILES b/searchlib/src/tests/common/foregroundtaskexecutor/FILES
new file mode 100644
index 00000000000..5c0c9178abd
--- /dev/null
+++ b/searchlib/src/tests/common/foregroundtaskexecutor/FILES
@@ -0,0 +1 @@
+foregroundtaskexecutor_test.cpp
diff --git a/searchlib/src/tests/common/foregroundtaskexecutor/foregroundtaskexecutor_test.cpp b/searchlib/src/tests/common/foregroundtaskexecutor/foregroundtaskexecutor_test.cpp
new file mode 100644
index 00000000000..49ebbf12bc0
--- /dev/null
+++ b/searchlib/src/tests/common/foregroundtaskexecutor/foregroundtaskexecutor_test.cpp
@@ -0,0 +1,124 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("foregroundtaskexecutor_test");
+#include <vespa/searchlib/common/foregroundtaskexecutor.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <mutex>
+#include <condition_variable>
+
+namespace search
+{
+
+namespace common
+{
+
+
+class Fixture
+{
+public:
+ ForegroundTaskExecutor _threads;
+
+ Fixture()
+ : _threads()
+ {
+ }
+};
+
+
+class TestObj
+{
+public:
+ std::mutex _m;
+ std::condition_variable _cv;
+ int _done;
+ int _fail;
+ int _val;
+
+ TestObj()
+ : _m(),
+ _cv(),
+ _done(0),
+ _fail(0),
+ _val(0)
+ {
+ }
+
+ void
+ modify(int oldValue, int newValue)
+ {
+ {
+ std::lock_guard<std::mutex> guard(_m);
+ if (_val == oldValue) {
+ _val = newValue;
+ } else {
+ ++_fail;
+ }
+ ++_done;
+ }
+ _cv.notify_all();
+ }
+
+ void
+ wait(int wantDone)
+ {
+ std::unique_lock<std::mutex> guard(_m);
+ _cv.wait(guard, [=] { return this->_done >= wantDone; });
+ }
+};
+
+TEST_F("testExecute", Fixture) {
+ std::shared_ptr<TestObj> tv(std::make_shared<TestObj>());
+ EXPECT_EQUAL(0, tv->_val);
+ f._threads.execute(1, [=]() { tv->modify(0, 42); });
+ tv->wait(1);
+ EXPECT_EQUAL(0, tv->_fail);
+ EXPECT_EQUAL(42, tv->_val);
+ f._threads.sync();
+ EXPECT_EQUAL(0, tv->_fail);
+ EXPECT_EQUAL(42, tv->_val);
+}
+
+
+TEST_F("require that task with same id are serialized", Fixture)
+{
+ std::shared_ptr<TestObj> tv(std::make_shared<TestObj>());
+ EXPECT_EQUAL(0, tv->_val);
+ f._threads.execute(0, [=]() { usleep(2000); tv->modify(0, 14); });
+ f._threads.execute(0, [=]() { tv->modify(14, 42); });
+ tv->wait(2);
+ EXPECT_EQUAL(0, tv->_fail);
+ EXPECT_EQUAL(42, tv->_val);
+ f._threads.sync();
+ EXPECT_EQUAL(0, tv->_fail);
+ EXPECT_EQUAL(42, tv->_val);
+}
+
+TEST_F("require that task with different ids are serialized", Fixture)
+{
+ int tryCnt = 0;
+ for (tryCnt = 0; tryCnt < 100; ++tryCnt) {
+ std::shared_ptr<TestObj> tv(std::make_shared<TestObj>());
+ EXPECT_EQUAL(0, tv->_val);
+ f._threads.execute(0, [=]() { usleep(2000); tv->modify(0, 14); });
+ f._threads.execute(1, [=]() { tv->modify(14, 42); });
+ tv->wait(2);
+ if (tv->_fail != 1) {
+ continue;
+ }
+ EXPECT_EQUAL(1, tv->_fail);
+ EXPECT_EQUAL(14, tv->_val);
+ f._threads.sync();
+ EXPECT_EQUAL(1, tv->_fail);
+ EXPECT_EQUAL(14, tv->_val);
+ break;
+ }
+ EXPECT_TRUE(tryCnt >= 100);
+}
+
+
+} // namespace common
+} // namespace search
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/common/location/.gitignore b/searchlib/src/tests/common/location/.gitignore
new file mode 100644
index 00000000000..ec9acbe771e
--- /dev/null
+++ b/searchlib/src/tests/common/location/.gitignore
@@ -0,0 +1 @@
+searchlib_location_test_app
diff --git a/searchlib/src/tests/common/location/CMakeLists.txt b/searchlib/src/tests/common/location/CMakeLists.txt
new file mode 100644
index 00000000000..3617657cdf9
--- /dev/null
+++ b/searchlib/src/tests/common/location/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_location_test_app
+ SOURCES
+ location_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_location_test_app COMMAND searchlib_location_test_app)
diff --git a/searchlib/src/tests/common/location/FILES b/searchlib/src/tests/common/location/FILES
new file mode 100644
index 00000000000..7bd6fa8b581
--- /dev/null
+++ b/searchlib/src/tests/common/location/FILES
@@ -0,0 +1 @@
+location_test.cpp
diff --git a/searchlib/src/tests/common/location/location_test.cpp b/searchlib/src/tests/common/location/location_test.cpp
new file mode 100644
index 00000000000..1cbe24ec225
--- /dev/null
+++ b/searchlib/src/tests/common/location/location_test.cpp
@@ -0,0 +1,119 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/common/location.h>
+
+using search::common::Location;
+
+bool is_parseable(const char *str) {
+ Location loc;
+ return loc.parse(str);
+}
+
+Location parse(const char *str) {
+ Location loc;
+ if (!EXPECT_TRUE(loc.parse(str))) {
+ fprintf(stderr, " parse error: %s\n", loc.getParseError());
+ }
+ return loc;
+}
+
+TEST("require that malformed bounding boxes are not parseable") {
+ EXPECT_TRUE(is_parseable("[2,10,20,30,40]"));
+ EXPECT_FALSE(is_parseable("[2,10,20,30,40][2,10,20,30,40]"));
+ EXPECT_FALSE(is_parseable("[1,10,20,30,40]"));
+ EXPECT_FALSE(is_parseable("[3,10,20,30,40]"));
+ EXPECT_FALSE(is_parseable("[2, 10, 20, 30, 40]"));
+ EXPECT_FALSE(is_parseable("[2,10,20,30,40"));
+ EXPECT_FALSE(is_parseable("[2,10,20,30]"));
+ EXPECT_FALSE(is_parseable("[10,20,30,40]"));
+}
+
+TEST("require that malformed circles are not parseable") {
+ EXPECT_TRUE(is_parseable("(2,10,20,5,0,0,0)"));
+ EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0)(2,10,20,5,0,0,0)"));
+ EXPECT_FALSE(is_parseable("(1,10,20,5,0,0,0)"));
+ EXPECT_FALSE(is_parseable("(3,10,20,5,0,0,0)"));
+ EXPECT_FALSE(is_parseable("(2, 10, 20, 5, 0, 0, 0)"));
+ EXPECT_FALSE(is_parseable("(2,10,20,5)"));
+ EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0"));
+ EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0,1000"));
+ EXPECT_FALSE(is_parseable("(10,20,5)"));
+}
+
+TEST("require that bounding boxes can be parsed") {
+ Location loc = parse("[2,10,20,30,40]");
+ EXPECT_EQUAL(false, loc.getRankOnDistance());
+ EXPECT_EQUAL(true, loc.getPruneOnDistance());
+ EXPECT_EQUAL(0u, loc.getXAspect());
+ EXPECT_EQUAL(0, loc.getX());
+ EXPECT_EQUAL(0, loc.getY());
+ EXPECT_EQUAL(std::numeric_limits<uint32_t>::max(), loc.getRadius());
+ EXPECT_EQUAL(10, loc.getMinX());
+ EXPECT_EQUAL(20, loc.getMinY());
+ EXPECT_EQUAL(30, loc.getMaxX());
+ EXPECT_EQUAL(40, loc.getMaxY());
+}
+
+TEST("require that circles can be parsed") {
+ Location loc = parse("(2,10,20,5,0,0,0)");
+ EXPECT_EQUAL(true, loc.getRankOnDistance());
+ EXPECT_EQUAL(true, loc.getPruneOnDistance());
+ EXPECT_EQUAL(0u, loc.getXAspect());
+ EXPECT_EQUAL(10, loc.getX());
+ EXPECT_EQUAL(20, loc.getY());
+ EXPECT_EQUAL(5u, loc.getRadius());
+ EXPECT_EQUAL(5, loc.getMinX());
+ EXPECT_EQUAL(15, loc.getMinY());
+ EXPECT_EQUAL(15, loc.getMaxX());
+ EXPECT_EQUAL(25, loc.getMaxY());
+}
+
+TEST("require that circles can have aspect ratio") {
+ Location loc = parse("(2,10,20,5,0,0,0,2147483648)");
+ EXPECT_EQUAL(true, loc.getRankOnDistance());
+ EXPECT_EQUAL(true, loc.getPruneOnDistance());
+ EXPECT_EQUAL(2147483648u, loc.getXAspect());
+ EXPECT_EQUAL(10, loc.getX());
+ EXPECT_EQUAL(20, loc.getY());
+ EXPECT_EQUAL(5u, loc.getRadius());
+ EXPECT_EQUAL(-1, loc.getMinX());
+ EXPECT_EQUAL(15, loc.getMinY());
+ EXPECT_EQUAL(21, loc.getMaxX());
+ EXPECT_EQUAL(25, loc.getMaxY());
+}
+
+TEST("require that bounding box can be specified after circle") {
+ Location loc = parse("(2,10,20,5,0,0,0)[2,10,20,30,40]");
+ EXPECT_EQUAL(true, loc.getRankOnDistance());
+ EXPECT_EQUAL(true, loc.getPruneOnDistance());
+ EXPECT_EQUAL(0u, loc.getXAspect());
+ EXPECT_EQUAL(10, loc.getX());
+ EXPECT_EQUAL(20, loc.getY());
+ EXPECT_EQUAL(5u, loc.getRadius());
+ EXPECT_EQUAL(10, loc.getMinX());
+ EXPECT_EQUAL(20, loc.getMinY());
+ EXPECT_EQUAL(15, loc.getMaxX());
+ EXPECT_EQUAL(25, loc.getMaxY());
+}
+
+TEST("require that circles can be specified after bounding box") {
+ Location loc = parse("[2,10,20,30,40](2,10,20,5,0,0,0)");
+ EXPECT_EQUAL(true, loc.getRankOnDistance());
+ EXPECT_EQUAL(true, loc.getPruneOnDistance());
+ EXPECT_EQUAL(0u, loc.getXAspect());
+ EXPECT_EQUAL(10, loc.getX());
+ EXPECT_EQUAL(20, loc.getY());
+ EXPECT_EQUAL(5u, loc.getRadius());
+ EXPECT_EQUAL(10, loc.getMinX());
+ EXPECT_EQUAL(20, loc.getMinY());
+ EXPECT_EQUAL(15, loc.getMaxX());
+ EXPECT_EQUAL(25, loc.getMaxY());
+}
+
+TEST("require that santa search gives non-wrapped bounding box") {
+ Location loc = parse("(2,122163600,89998536,290112,4,2000,0,109704)");
+ EXPECT_GREATER_EQUAL(loc.getMaxX(), loc.getMinX());
+ EXPECT_GREATER_EQUAL(loc.getMaxY(), loc.getMinY());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/common/packets/.gitignore b/searchlib/src/tests/common/packets/.gitignore
new file mode 100644
index 00000000000..e3dcf5376d5
--- /dev/null
+++ b/searchlib/src/tests/common/packets/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+packets_test
+searchlib_packets_test_app
diff --git a/searchlib/src/tests/common/packets/CMakeLists.txt b/searchlib/src/tests/common/packets/CMakeLists.txt
new file mode 100644
index 00000000000..e35883b1d8c
--- /dev/null
+++ b/searchlib/src/tests/common/packets/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_packets_test_app
+ SOURCES
+ packets_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_packets_test_app COMMAND searchlib_packets_test_app)
diff --git a/searchlib/src/tests/common/packets/DESC b/searchlib/src/tests/common/packets/DESC
new file mode 100644
index 00000000000..0808703b5fb
--- /dev/null
+++ b/searchlib/src/tests/common/packets/DESC
@@ -0,0 +1 @@
+packets test. Take a look at packets.cpp for details.
diff --git a/searchlib/src/tests/common/packets/FILES b/searchlib/src/tests/common/packets/FILES
new file mode 100644
index 00000000000..35191f9a36d
--- /dev/null
+++ b/searchlib/src/tests/common/packets/FILES
@@ -0,0 +1 @@
+packets.cpp
diff --git a/searchlib/src/tests/common/packets/packets_test.cpp b/searchlib/src/tests/common/packets/packets_test.cpp
new file mode 100644
index 00000000000..443436537e1
--- /dev/null
+++ b/searchlib/src/tests/common/packets/packets_test.cpp
@@ -0,0 +1,705 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("packets_test");
+
+#include <vespa/searchlib/common/mapnames.h>
+#include <vespa/searchlib/common/packets.h>
+#include <vector>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+using namespace search::fs4transport;
+
+// ----------------------------------------------------------------------------
+//
+// Utilities
+//
+// ----------------------------------------------------------------------------
+
+#define QRF_RANKTYPE QRF_RANKTYPE_DOUBLE
+
+#define PCODE_BEGIN PCODE_EOL
+#define PCODE_END PCODE_LastCode
+
+class MyPersistentPacketStreamer : public FS4PersistentPacketStreamer {
+public:
+ MyPersistentPacketStreamer() :
+ FS4PersistentPacketStreamer(FS4PacketFactory::CreateFS4Packet) {
+ // empty
+ }
+
+ uint32_t getChannelId(uint32_t pcode, uint32_t chid) {
+ return HasChannelID(pcode) ? chid : -1u;
+ }
+};
+
+FNET_Packet *
+testEncodeDecode(FS4PersistentPacketStreamer &streamer, FNET_Packet &packet)
+{
+ FNET_Context ctx;
+ FNET_DataBuffer buf;
+ buf.WriteInt32(0xdeadbeef); // buffers can have extra data at the front.
+ streamer.Encode(&packet, 1u, &buf);
+ buf.DataToDead(sizeof(uint32_t));
+
+ FNET_DataBuffer lhs;
+ lhs.WriteBytes(buf.GetData(), buf.GetDataLen());
+
+ buf.WriteInt32(0xdeadbeef); // buffers can have extra data at the end.
+
+ bool broken;
+ uint32_t plen, pcode, chid;
+ MyPersistentPacketStreamer myStreamer;
+ EXPECT_TRUE(streamer.GetPacketInfo(&buf, &plen, &pcode, &chid, &broken));
+ if ((pcode & ~PCODE_MASK) == 0) {
+ EXPECT_EQUAL(packet.GetLength(), plen);
+ }
+ EXPECT_EQUAL(packet.GetPCODE() & PCODE_MASK, pcode & PCODE_MASK);
+ EXPECT_EQUAL(myStreamer.getChannelId(pcode, 1u), chid);
+
+ FNET_Packet *ret = streamer.Decode(&buf, plen, pcode, ctx);
+ ASSERT_TRUE(ret);
+ if (ret->GetPCODE() == (pcode & PCODE_MASK)) {
+ FNET_DataBuffer rhs;
+ streamer.Encode(ret, 1u, &rhs);
+ if (!EXPECT_TRUE(lhs.Equals(&rhs))) {
+ lhs.HexDump();
+ rhs.HexDump();
+ }
+ } else {
+ // Packet was transcoded.
+ }
+ return ret;
+}
+
+FNET_Packet *
+testEncodeDecode(FNET_Packet &packet)
+{
+ return testEncodeDecode(FS4PersistentPacketStreamer::Instance, packet);
+}
+
+void fillProperties(FS4Properties &props, const std::string &name,
+ uint32_t len) {
+ props.setName(name);
+ props.allocEntries(len);
+ for (uint32_t i = 0; i < len; ++i) {
+ std::string key = vespalib::make_string("key%d", i);
+ props.setKey(i, key);
+
+ std::string val = vespalib::make_string("val%d", i);
+ props.setValue(i, val);
+ }
+}
+
+void testProperties(FS4Properties &props, const std::string &name,
+ uint32_t len) {
+ EXPECT_EQUAL(name, props.getName());
+ EXPECT_EQUAL(name.size(), props.getNameLen());
+ for (uint32_t i = 0; i < len; ++i) {
+ std::string key = vespalib::make_string("key%d", i);
+ EXPECT_EQUAL(key, std::string(props.getKey(i), props.getKeyLen(i)));
+
+ std::string val = vespalib::make_string("val%d", i);
+ EXPECT_EQUAL(val,
+ std::string(props.getValue(i), props.getValueLen(i)));
+ }
+}
+
+
+// ----------------------------------------------------------------------------
+//
+// Tests
+//
+// ----------------------------------------------------------------------------
+
+document::GlobalId gid0("aaaaaaaaaaaa");
+document::GlobalId gid1("bbbbbbbbbbbb");
+
+TEST("testPacketArray") {
+ PacketArray arr;
+ for (uint32_t i = 0; i < 32; ++i) {
+ EXPECT_EQUAL(i, arr.Length());
+ arr.Add(new FNET_ControlPacket(i));
+ EXPECT_EQUAL(i, static_cast<FNET_ControlPacket&>(*arr.Array()[i]).GetCommand());
+ }
+ for (uint32_t i = 0; i < arr.Length(); ++i) {
+ delete static_cast<FNET_ControlPacket *>(arr.Array()[i]);
+ }
+}
+
+TEST("testPacketFactory") {
+ ASSERT_TRUE(FS4PacketFactory::CreateFS4Packet(PCODE_BEGIN - 1) == NULL);
+
+ ASSERT_TRUE(FS4PacketFactory::CreateFS4Packet(PCODE_END) == NULL);
+
+ for (uint32_t pcode = PCODE_BEGIN; pcode < PCODE_END; ++pcode) {
+ if ((pcode != PCODE_MLD_QUERYRESULT2_NOTUSED) &&
+ (pcode != PCODE_QUERY_NOTUSED) &&
+ (pcode != PCODE_QUERY2_NOTUSED) &&
+ (pcode != PCODE_MLD_GETDOCSUMS2_NOTUSED))
+ {
+ std::unique_ptr<FNET_Packet> aptr(FS4PacketFactory::CreateFS4Packet(pcode));
+ ASSERT_TRUE(aptr.get() != NULL);
+ EXPECT_EQUAL(pcode, aptr->GetPCODE());
+ }
+ }
+}
+
+TEST("testPersistentPacketStreamer") {
+ for (uint32_t pcode = PCODE_BEGIN; pcode < PCODE_END; ++pcode) {
+ if ((pcode == PCODE_QUERYX) ||
+ (pcode != PCODE_MLD_QUERYRESULT2_NOTUSED) ||
+ (pcode != PCODE_MLD_GETDOCSUMS2_NOTUSED))
+ {
+ continue;
+ }
+ std::unique_ptr<FNET_Packet> arg(FS4PacketFactory::CreateFS4Packet(pcode));
+ std::unique_ptr<FNET_Packet> ret(testEncodeDecode(FS4PersistentPacketStreamer::Instance, *arg));
+ EXPECT_TRUE(ret.get() != NULL);
+
+ FNET_Packet *raw = testEncodeDecode(FS4PersistentPacketStreamer::Instance,
+ *FS4PacketFactory::CreateFS4Packet(pcode));
+ EXPECT_TRUE(raw != NULL);
+ }
+}
+
+TEST("testProperties") {
+ FS4Properties src;
+ fillProperties(src, "foo", 32u);
+ testProperties(src, "foo", 32u);
+
+ FNET_DataBuffer buf;
+ src.encode(buf);
+ FNET_DataBuffer lhs;
+ lhs.WriteBytes(buf.GetData(), buf.GetDataLen());
+
+ uint32_t len = buf.GetDataLen();
+ FS4Properties dst;
+ dst.decode(buf, len);
+ EXPECT_EQUAL(src.getLength(), dst.getLength());
+
+ testProperties(dst, "foo", 32u);
+
+ FNET_DataBuffer rhs;
+ dst.encode(rhs);
+ EXPECT_TRUE(lhs.Equals(&rhs));
+}
+
+TEST("testEol") {
+ FS4Packet_EOL *src = dynamic_cast<FS4Packet_EOL*>(FS4PacketFactory::CreateFS4Packet(PCODE_EOL));
+ ASSERT_TRUE(src != NULL);
+
+ std::vector<FNET_Packet*> lst { src, testEncodeDecode(*src) };
+
+ for (FNET_Packet * packet : lst) {
+ FS4Packet_EOL *ptr = dynamic_cast<FS4Packet_EOL*>(packet);
+ ASSERT_TRUE(ptr != NULL);
+ EXPECT_EQUAL((uint32_t)PCODE_EOL, ptr->GetPCODE());
+ EXPECT_EQUAL(0u, ptr->GetLength());
+
+ delete ptr;
+ }
+}
+
+TEST("testError") {
+ FS4Packet_ERROR *src = dynamic_cast<FS4Packet_ERROR*>(FS4PacketFactory::CreateFS4Packet(PCODE_ERROR));
+ ASSERT_TRUE(src != NULL);
+ src->_errorCode = 1u;
+ src->setErrorMessage("foo");
+
+ std::vector<FNET_Packet*> lst { src, testEncodeDecode(*src) };
+
+ for (FNET_Packet * packet : lst) {
+ FS4Packet_ERROR *ptr = dynamic_cast<FS4Packet_ERROR*>(packet);
+ ASSERT_TRUE(ptr != NULL);
+ EXPECT_EQUAL((uint32_t)PCODE_ERROR, ptr->GetPCODE());
+ EXPECT_EQUAL(11u, ptr->GetLength());
+ EXPECT_EQUAL(1u, ptr->_errorCode);
+ EXPECT_EQUAL("foo", ptr->_message);
+
+ delete ptr;
+ }
+}
+
+TEST("testDocsum") {
+ FS4Packet_DOCSUM *src = dynamic_cast<FS4Packet_DOCSUM*>(FS4PacketFactory::CreateFS4Packet(PCODE_DOCSUM));
+ ASSERT_TRUE(src != NULL);
+ src->setGid(gid0);
+ src->SetBuf("foo", 3u);
+
+ std::vector<FNET_Packet*> lst { src, testEncodeDecode(*src) };
+
+ for (FNET_Packet * packet : lst) {
+ FS4Packet_DOCSUM *ptr = dynamic_cast<FS4Packet_DOCSUM*>(packet);
+ ASSERT_TRUE(ptr != NULL);
+ EXPECT_EQUAL((uint32_t)PCODE_DOCSUM, ptr->GetPCODE());
+ EXPECT_EQUAL(3u + 12u, ptr->GetLength());
+ EXPECT_EQUAL(gid0, ptr->getGid());
+ EXPECT_EQUAL("foo", std::string(ptr->getBuf().c_str(), ptr->getBuf().size()));
+
+ delete ptr;
+ }
+}
+
+TEST("testMonitorQueryX") {
+ FS4Packet_MONITORQUERYX *src = dynamic_cast<FS4Packet_MONITORQUERYX*>(FS4PacketFactory::CreateFS4Packet(PCODE_MONITORQUERYX));
+ ASSERT_TRUE(src != NULL);
+ src->_qflags = 1u;
+
+ std::vector<FNET_Packet*> lst;
+ for (uint32_t i = MQF_QFLAGS, len = (uint32_t)(MQF_QFLAGS << 1); i < len; ++i) {
+ if (i & ~FNET_MQF_SUPPORTED_MASK) {
+ continue; // not supported;
+ }
+ src->_features = i;
+ lst.push_back(testEncodeDecode(*src));
+ }
+ src->_features = (uint32_t)-1;
+ lst.push_back(src);
+
+ for (FNET_Packet * packet : lst) {
+ FS4Packet_MONITORQUERYX *ptr = dynamic_cast<FS4Packet_MONITORQUERYX*>(packet);
+ ASSERT_TRUE(ptr != NULL);
+ EXPECT_EQUAL((uint32_t)PCODE_MONITORQUERYX, ptr->GetPCODE());
+ EXPECT_EQUAL(ptr->_features & MQF_QFLAGS ? 1u : 0u, ptr->_qflags);
+
+ delete ptr;
+ }
+}
+
+TEST("testMonitorResultX") {
+ FS4Packet_MONITORRESULTX *src = dynamic_cast<FS4Packet_MONITORRESULTX*>(FS4PacketFactory::CreateFS4Packet(PCODE_MONITORRESULTX));
+ ASSERT_TRUE(src != NULL);
+ src->_partid = 1u;
+ src->_timestamp = 2u;
+ src->_totalNodes = 3u;
+ src->_activeNodes = 4u;
+ src->_totalParts = 5u;
+ src->_activeParts = 6u;
+ src->_rflags = 7u;
+
+ std::vector<FNET_Packet*> lst;
+ for (uint32_t i = MRF_MLD, len = (uint32_t)(MRF_RFLAGS << 1); i < len; ++i) {
+ if (i & ~FNET_MRF_SUPPORTED_MASK) {
+ continue; // not supported;
+ }
+ src->_features = i;
+ lst.push_back(testEncodeDecode(*src));
+ }
+ src->_features = (uint32_t)-1;
+ lst.push_back(src);
+
+ for (FNET_Packet * packet : lst) {
+ FS4Packet_MONITORRESULTX *ptr = dynamic_cast<FS4Packet_MONITORRESULTX*>(packet);
+ ASSERT_TRUE(ptr != NULL);
+ EXPECT_EQUAL((uint32_t)PCODE_MONITORRESULTX, ptr->GetPCODE());
+ EXPECT_EQUAL(1u, ptr->_partid);
+ EXPECT_EQUAL(2u, ptr->_timestamp);
+ EXPECT_EQUAL(ptr->_features & MRF_MLD ? 3u : 0u, ptr->_totalNodes);
+ EXPECT_EQUAL(ptr->_features & MRF_MLD ? 4u : 0u, ptr->_activeNodes);
+ EXPECT_EQUAL(ptr->_features & MRF_MLD ? 5u : 0u, ptr->_totalParts);
+ EXPECT_EQUAL(ptr->_features & MRF_MLD ? 6u : 0u, ptr->_activeParts);
+ EXPECT_EQUAL(ptr->_features & MRF_RFLAGS ? 7u : 0u, ptr->_rflags);
+
+ delete ptr;
+ }
+}
+
+TEST("testClearCaches") {
+ FS4Packet_CLEARCACHES *src = dynamic_cast<FS4Packet_CLEARCACHES*>(FS4PacketFactory::CreateFS4Packet(PCODE_CLEARCACHES));
+ ASSERT_TRUE(src != NULL);
+
+ std::vector<FNET_Packet*> lst { src, testEncodeDecode(*src) };
+
+ for (FNET_Packet * packet : lst) {
+ FS4Packet_CLEARCACHES *ptr = dynamic_cast<FS4Packet_CLEARCACHES*>(packet);
+ ASSERT_TRUE(ptr != NULL);
+ EXPECT_EQUAL((uint32_t)PCODE_CLEARCACHES, ptr->GetPCODE());
+ EXPECT_EQUAL(0u, ptr->GetLength());
+
+ delete ptr;
+ }
+}
+
+TEST("testQueueLen") {
+ FS4Packet_QUEUELEN *src = dynamic_cast<FS4Packet_QUEUELEN*>(FS4PacketFactory::CreateFS4Packet(PCODE_QUEUELEN));
+ ASSERT_TRUE(src != NULL);
+ src->_queueLen = 1u;
+ src->_dispatchers = 2u;
+
+ std::vector<FNET_Packet*> lst { src, testEncodeDecode(*src) };
+
+ for (FNET_Packet * packet : lst) {
+ FS4Packet_QUEUELEN *ptr = dynamic_cast<FS4Packet_QUEUELEN*>(packet);
+ ASSERT_TRUE(ptr != NULL);
+ EXPECT_EQUAL((uint32_t)PCODE_QUEUELEN, ptr->GetPCODE());
+ EXPECT_EQUAL(8u, ptr->GetLength());
+ EXPECT_EQUAL(1u, ptr->_queueLen);
+ EXPECT_EQUAL(2u, ptr->_dispatchers);
+
+ delete ptr;
+ }
+}
+
+TEST("testQueryResultX") {
+ FS4Packet_QUERYRESULTX *src = dynamic_cast<FS4Packet_QUERYRESULTX*>(FS4PacketFactory::CreateFS4Packet(PCODE_QUERYRESULTX));
+ ASSERT_TRUE(src != NULL);
+ src->_offset = 1u;
+ src->_totNumDocs = 2u;
+ src->_maxRank = (search::HitRank)3;
+ src->setDistributionKey(4u);
+ src->_coverageDocs = 6u;
+ src->_activeDocs = 7u;
+ uint32_t sortIndex[3] = { 0u, 1u, 3u /* size of data */}; // numDocs + 1
+ src->SetSortDataRef(2, sortIndex, "foo");
+ src->SetAggrDataRef("bar", 3u);
+ src->SetGroupDataRef("baz", 3u);
+ src->AllocateHits(2);
+ src->_hits[0]._gid = gid0;
+ src->_hits[0]._metric = (search::HitRank)2;
+ src->_hits[0]._partid = 3u;
+ src->_hits[0].setDistributionKey(4u);
+ src->_hits[1]._gid = gid1;
+ src->_hits[1]._metric = (search::HitRank)3;
+ src->_hits[1]._partid = 4u;
+ src->_hits[1].setDistributionKey(5u);
+
+ std::vector<FNET_Packet*> lst;
+ for (uint32_t i = QRF_MLD, len = (uint32_t)(QRF_GROUPDATA << 1); i < len; ++i) {
+ if (i & ~FNET_QRF_SUPPORTED_MASK) {
+ continue; // not supported;
+ }
+ src->_features = i;
+ lst.push_back(testEncodeDecode(*src));
+ }
+ src->_features = (uint32_t)-1;
+ lst.push_back(src);
+
+ for (FNET_Packet * packet : lst) {
+ FS4Packet_QUERYRESULTX *ptr = dynamic_cast<FS4Packet_QUERYRESULTX*>(packet);
+ ASSERT_TRUE(ptr != NULL);
+ EXPECT_EQUAL((uint32_t)PCODE_QUERYRESULTX, ptr->GetPCODE());
+
+ EXPECT_EQUAL(1u, ptr->_offset);
+ EXPECT_EQUAL(2u, ptr->_totNumDocs);
+ EXPECT_EQUAL((search::HitRank)3, ptr->_maxRank);
+ EXPECT_EQUAL(4u, ptr->getDistributionKey());
+ EXPECT_EQUAL(ptr->_features & QRF_COVERAGE ? 6u : 0u, ptr->_coverageDocs);
+ EXPECT_EQUAL(ptr->_features & QRF_COVERAGE ? 7u : 0u, ptr->_activeDocs);
+ if (ptr->_features & QRF_SORTDATA) {
+ EXPECT_EQUAL(0u, ptr->_sortIndex[0]);
+ EXPECT_EQUAL(1u, ptr->_sortIndex[1]);
+ EXPECT_EQUAL(3u, ptr->_sortIndex[2]);
+ EXPECT_EQUAL("foo", std::string(ptr->_sortData, ptr->_sortIndex[2]));
+ } else {
+ EXPECT_EQUAL((void*)NULL, ptr->_sortIndex);
+ EXPECT_EQUAL((void*)NULL, ptr->_sortData);
+ }
+ if (ptr->_features & QRF_AGGRDATA) {
+ EXPECT_EQUAL("bar", std::string(ptr->_aggrData, ptr->_aggrDataLen));
+ } else {
+ EXPECT_EQUAL(0u, ptr->_aggrDataLen);
+ EXPECT_EQUAL((void*)NULL, ptr->_aggrData);
+ }
+ if (ptr->_features & QRF_GROUPDATA) {
+ EXPECT_EQUAL("baz", std::string(ptr->_groupData, ptr->_groupDataLen));
+ } else {
+ EXPECT_EQUAL(0u, ptr->_groupDataLen);
+ EXPECT_EQUAL((void*)NULL, ptr->_groupData);
+ }
+ EXPECT_EQUAL(2u, ptr->_numDocs);
+ for (uint32_t i = 0; i < ptr->_numDocs; ++i) {
+ EXPECT_EQUAL(i == 0 ? gid0 : gid1, ptr->_hits[i]._gid);
+ EXPECT_EQUAL((search::HitRank)2 + i, ptr->_hits[i]._metric);
+ EXPECT_EQUAL(ptr->_features & QRF_MLD ? 3u + i : 0u, ptr->_hits[i]._partid);
+ EXPECT_EQUAL(ptr->_features & QRF_MLD ? 4u + i : ptr->getDistributionKey(), ptr->_hits[i].getDistributionKey());
+ }
+
+ delete ptr;
+ }
+}
+
+FS4Packet_QUERYX *
+createAndFill_QUERYX()
+{
+ FS4Packet_QUERYX *src = dynamic_cast<FS4Packet_QUERYX*>(FS4PacketFactory::CreateFS4Packet(PCODE_QUERYX));
+ ASSERT_TRUE(src != NULL);
+ src->_offset = 2u;
+ src->_maxhits = 3u;
+ src->setTimeout(fastos::TimeStamp(4*fastos::TimeStamp::MS));
+ EXPECT_EQUAL(fastos::TimeStamp(4*fastos::TimeStamp::MS), src->getTimeout());
+ src->setTimeout(fastos::TimeStamp(-4*fastos::TimeStamp::MS));
+ EXPECT_EQUAL(0l, src->getTimeout());
+ src->setTimeout(fastos::TimeStamp(4*fastos::TimeStamp::MS));
+ EXPECT_EQUAL(fastos::TimeStamp(4*fastos::TimeStamp::MS), src->getTimeout());
+ src->_qflags = 5u;
+ src->setRanking("seven");
+ src->_numStackItems = 14u;
+ src->_propsVector.resize(2);
+ fillProperties(src->_propsVector[0], "foo", 8);
+ fillProperties(src->_propsVector[1], "bar", 16);
+ src->setSortSpec("sortspec");
+ src->setAggrSpec("aggrspec");
+ src->setGroupSpec("groupspec");
+ src->setLocation("location");
+ src->setStackDump("stackdump");
+ return src;
+}
+
+void
+verifyQueryX(FS4Packet_QUERYX & queryX, uint32_t features)
+{
+ EXPECT_EQUAL((uint32_t)PCODE_QUERYX, queryX.GetPCODE());
+ EXPECT_EQUAL(features, queryX._features);
+ EXPECT_EQUAL(2u, queryX._offset);
+ EXPECT_EQUAL(3u, queryX._maxhits);
+ EXPECT_EQUAL(fastos::TimeStamp(4*fastos::TimeStamp::MS), queryX.getTimeout());
+ EXPECT_EQUAL(0x5u, queryX._qflags);
+ if (queryX._features & QF_RANKP) {
+ EXPECT_EQUAL("seven", queryX._ranking);
+ } else {
+ EXPECT_EQUAL("", queryX._ranking);
+ }
+ EXPECT_EQUAL(queryX._features & QF_PARSEDQUERY ? 14u : 0u, queryX._numStackItems);
+ if (queryX._features & QF_PROPERTIES) {
+ EXPECT_EQUAL(2u, queryX._propsVector.size());
+ testProperties(queryX._propsVector[0], "foo", 8);
+ testProperties(queryX._propsVector[1], "bar", 16);
+ } else {
+ EXPECT_EQUAL(0u, queryX._propsVector.size());
+ }
+ if (queryX._features & QF_SORTSPEC) {
+ EXPECT_EQUAL("sortspec", queryX._sortSpec);
+ } else {
+ EXPECT_EQUAL(0u, queryX._sortSpec.size());
+ }
+ if (queryX._features & QF_AGGRSPEC) {
+ EXPECT_EQUAL("aggrspec", queryX._aggrSpec);
+ } else {
+ EXPECT_EQUAL(0u, queryX._aggrSpec.size());
+ }
+ if (queryX._features & QF_GROUPSPEC) {
+ EXPECT_EQUAL("groupspec", queryX._groupSpec);
+ } else {
+ EXPECT_EQUAL(0u, queryX._groupSpec.size());
+ }
+ if (queryX._features & QF_LOCATION) {
+ EXPECT_EQUAL("location", queryX._location);
+ } else {
+ EXPECT_EQUAL(0u, queryX._location.size());
+ }
+ if (queryX._features & QF_PARSEDQUERY) {
+ EXPECT_EQUAL("stackdump", queryX._stackDump);
+ } else {
+ EXPECT_EQUAL(0u, queryX._stackDump.size());
+ }
+}
+
+TEST("testQueryX") {
+ FS4Packet_QUERYX *src = createAndFill_QUERYX();
+ std::vector<std::pair<FNET_Packet*, uint32_t>> lst;
+ for (uint32_t i = QF_PARSEDQUERY, len = (uint32_t)(QF_GROUPSPEC << 1), skip = 0; i < len; ++i) {
+ if (!(i & QF_PARSEDQUERY)) {
+ continue; // skip most
+ }
+ if (i & ~FNET_QF_SUPPORTED_MASK) {
+ continue; // not supported
+ }
+ if (++skip % 10) {
+ continue; // skip most
+ }
+ src->_features = i;
+ lst.emplace_back(testEncodeDecode(*src), i);
+ }
+ src->_features = uint32_t(-1);
+ lst.emplace_back(src, -1);
+
+ for (const auto & pfPair : lst) {
+ FS4Packet_QUERYX *ptr = dynamic_cast<FS4Packet_QUERYX*>(pfPair.first);
+ ASSERT_TRUE(ptr != NULL);
+ verifyQueryX(*ptr, pfPair.second);
+
+ delete ptr;
+ }
+}
+
+TEST("testSharedPacket") {
+ FNET_Packet::SP src(createAndFill_QUERYX());
+ static_cast<FS4Packet_QUERYX *>(src.get())->_features=FNET_QF_SUPPORTED_MASK;
+ FNET_Packet::SP decoded(testEncodeDecode(*src));
+ verifyQueryX(*static_cast<FS4Packet_QUERYX *>(decoded.get()), FNET_QF_SUPPORTED_MASK);
+ EXPECT_TRUE(decoded.get() != nullptr);
+ FS4Packet_Shared shared(decoded);
+ FNET_Packet::UP decoded2(testEncodeDecode(shared));
+ EXPECT_TRUE(decoded2.get() != nullptr);
+ EXPECT_TRUE(nullptr == dynamic_cast<const FS4Packet_Shared *>(decoded2.get()));
+ EXPECT_TRUE(nullptr != dynamic_cast<const FS4Packet_QUERYX *>(decoded2.get()));
+ EXPECT_EQUAL(src->GetLength(), decoded2->GetLength());
+ verifyQueryX(*static_cast<FS4Packet_QUERYX *>(decoded2.get()), FNET_QF_SUPPORTED_MASK);
+}
+
+TEST("test pre serializing packets no compression") {
+ FNET_Packet::UP src(createAndFill_QUERYX());
+ FS4Packet_QUERYX * queryX = static_cast<FS4Packet_QUERYX *>(src.get());
+ queryX->_features=FNET_QF_SUPPORTED_MASK;
+ FNET_Packet::UP decoded(testEncodeDecode(*src));
+ verifyQueryX(*static_cast<FS4Packet_QUERYX *>(decoded.get()), FNET_QF_SUPPORTED_MASK);
+ EXPECT_EQUAL(512u, src->GetLength());
+ EXPECT_EQUAL(src->GetLength(), decoded->GetLength());
+ FS4Packet_PreSerialized serialized(*src);
+ EXPECT_EQUAL(218u, serialized.GetPCODE());
+ EXPECT_EQUAL(512u, serialized.GetLength());
+ FNET_Packet::UP decoded2(testEncodeDecode(serialized));
+ EXPECT_EQUAL(512u, decoded2->GetLength());
+ verifyQueryX(*static_cast<FS4Packet_QUERYX *>(decoded2.get()), FNET_QF_SUPPORTED_MASK);
+}
+
+TEST("test pre serializing packets with compression") {
+ FNET_Packet::UP src(createAndFill_QUERYX());
+ FS4Packet_QUERYX * queryX = static_cast<FS4Packet_QUERYX *>(src.get());
+ queryX->_features=FNET_QF_SUPPORTED_MASK;
+ FNET_Packet::UP decoded(testEncodeDecode(*src));
+ verifyQueryX(*static_cast<FS4Packet_QUERYX *>(decoded.get()), FNET_QF_SUPPORTED_MASK);
+ EXPECT_EQUAL(512u, src->GetLength());
+ EXPECT_EQUAL(src->GetLength(), decoded->GetLength());
+ FS4PersistentPacketStreamer::Instance.SetCompressionLimit(100);
+ FS4Packet_PreSerialized serialized(*src);
+ EXPECT_EQUAL(218u | (document::CompressionConfig::LZ4 << 24), serialized.GetPCODE());
+ EXPECT_GREATER_EQUAL(321u, serialized.GetLength());
+ FNET_Packet::UP decoded2(testEncodeDecode(serialized));
+ EXPECT_EQUAL(512u, decoded2->GetLength());
+ verifyQueryX(*static_cast<FS4Packet_QUERYX *>(decoded2.get()), FNET_QF_SUPPORTED_MASK);
+}
+
+
+TEST("testGetDocsumsX") {
+ FS4Packet_GETDOCSUMSX *src = dynamic_cast<FS4Packet_GETDOCSUMSX*>(FS4PacketFactory::CreateFS4Packet(PCODE_GETDOCSUMSX));
+ ASSERT_TRUE(src != NULL);
+ src->setTimeout(fastos::TimeStamp(2*fastos::TimeStamp::MS));
+ src->setRanking("four");
+ src->_qflags = 5u;
+ src->_stackItems = 7u;
+ src->_propsVector.resize(2);
+ fillProperties(src->_propsVector[0], "foo", 8);
+ fillProperties(src->_propsVector[1], "bar", 16);
+ src->setResultClassName("resultclassname");
+ src->setStackDump("stackdump");
+ src->setLocation("location");
+ src->_flags = GDFLAG_IGNORE_ROW;
+ src->AllocateDocIDs(2);
+ src->_docid[0]._gid = gid0;
+ src->_docid[0]._partid = 2u;
+ src->_docid[1]._gid = gid1;
+ src->_docid[1]._partid = 3u;
+
+ std::vector<std::pair<FNET_Packet*, uint32_t>> lst;
+ for (uint32_t i = GDF_MLD, len = (uint32_t)(GDF_FLAGS << 1); i < len; ++i) {
+ if (i & ~FNET_GDF_SUPPORTED_MASK) {
+ continue; // not supported
+ }
+ src->_features = i;
+ lst.emplace_back(testEncodeDecode(*src), i);
+ }
+ src->_features = uint32_t(-1);
+ lst.emplace_back(src, uint32_t(-1));
+
+ for (const auto & pfPair : lst) {
+ FS4Packet_GETDOCSUMSX *ptr = dynamic_cast<FS4Packet_GETDOCSUMSX*>(pfPair.first);
+ ASSERT_TRUE(ptr != NULL);
+ EXPECT_EQUAL((uint32_t)PCODE_GETDOCSUMSX, ptr->GetPCODE());
+ EXPECT_EQUAL(pfPair.second, ptr->_features);
+ EXPECT_EQUAL(fastos::TimeStamp(2*fastos::TimeStamp::MS), ptr->getTimeout());
+ if (ptr->_features & GDF_RANKP_QFLAGS) {
+ EXPECT_EQUAL("four", ptr->_ranking);
+ } else {
+ EXPECT_EQUAL("", ptr->_ranking);
+ }
+ EXPECT_EQUAL(ptr->_features & GDF_RANKP_QFLAGS ? 5u : 0u, ptr->_qflags);
+ EXPECT_EQUAL(ptr->_features & GDF_QUERYSTACK ? 7u : 0u, ptr->_stackItems);
+ if (ptr->_features & GDF_PROPERTIES) {
+ EXPECT_EQUAL(2u, ptr->_propsVector.size());
+ testProperties(ptr->_propsVector[0], "foo", 8);
+ testProperties(ptr->_propsVector[1], "bar", 16);
+ } else {
+ EXPECT_EQUAL(0u, ptr->_propsVector.size());
+ }
+ if (ptr->_features & GDF_RESCLASSNAME) {
+ EXPECT_EQUAL("resultclassname", ptr->_resultClassName);
+ } else {
+ EXPECT_EQUAL(0u, ptr->_resultClassName.size());
+ }
+ if (ptr->_features & GDF_QUERYSTACK) {
+ EXPECT_EQUAL("stackdump", ptr->_stackDump);
+ } else {
+ EXPECT_EQUAL(0u, ptr->_stackDump.size());
+ }
+ if (ptr->_features & GDF_LOCATION) {
+ EXPECT_EQUAL("location", ptr->_location);
+ } else {
+ EXPECT_EQUAL(0u, ptr->_location.size());
+ }
+ if (ptr->_features & GDF_FLAGS) {
+ EXPECT_EQUAL(static_cast<uint32_t>(GDFLAG_IGNORE_ROW),
+ ptr->_flags);
+ } else {
+ EXPECT_EQUAL(0u, ptr->_flags);
+ }
+ EXPECT_EQUAL(2u, ptr->_docidCnt);
+ ASSERT_TRUE(ptr->_docid != NULL);
+ for (uint32_t i = 0; i < ptr->_docidCnt; ++i) {
+ EXPECT_EQUAL(i == 0u ? gid0 : gid1, ptr->_docid[i]._gid);
+ EXPECT_EQUAL(ptr->_features & GDF_MLD ? 2u + i : 0u, ptr->_docid[i]._partid);
+ }
+
+ delete ptr;
+ }
+}
+
+TEST("require that FS4PersistentPacketStreamer can compress packets") {
+ FS4Packet_ERROR *packet = static_cast<FS4Packet_ERROR*>(FS4PacketFactory::CreateFS4Packet(PCODE_ERROR));
+ packet->_errorCode = 1u;
+ packet->setErrorMessage(string(1000, 'a'));
+
+ FS4PersistentPacketStreamer streamer(FS4PacketFactory::CreateFS4Packet);
+
+ FNET_DataBuffer buf1;
+ streamer.Encode(packet, 1u, &buf1);
+ EXPECT_EQUAL(1020u, buf1.GetDataLen());
+
+ streamer.SetCompressionLimit(100);
+ FNET_DataBuffer buf2;
+ streamer.Encode(packet, 1u, &buf2);
+ EXPECT_EQUAL(38u, buf2.GetDataLen());
+
+ std::vector<FNET_Packet*> lst{ packet, testEncodeDecode(streamer, *packet) };
+
+ for (FNET_Packet * fnetPacket : lst) {
+ FS4Packet_ERROR *ptr = dynamic_cast<FS4Packet_ERROR*>(fnetPacket);
+ ASSERT_TRUE(ptr != NULL);
+ EXPECT_EQUAL((uint32_t)PCODE_ERROR, ptr->GetPCODE());
+ EXPECT_EQUAL(1008u, ptr->GetLength());
+ delete ptr;
+ }
+}
+
+TEST("require that FS4PersistentPacketStreamer can avoid compressing small packets") {
+ FS4Packet_ERROR *packet = static_cast<FS4Packet_ERROR*>(FS4PacketFactory::CreateFS4Packet(PCODE_ERROR));
+ packet->_errorCode = 1u;
+ packet->setErrorMessage("a");
+
+ FS4PersistentPacketStreamer streamer(FS4PacketFactory::CreateFS4Packet);
+
+ FNET_DataBuffer buf1;
+ streamer.Encode(packet, 1u, &buf1);
+ EXPECT_EQUAL(21u, buf1.GetDataLen());
+
+ streamer.SetCompressionLimit(10);
+ FNET_DataBuffer buf2;
+ streamer.Encode(packet, 1u, &buf2);
+ EXPECT_EQUAL(21u, buf2.GetDataLen());
+
+ delete packet;
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/common/rcuvector/.gitignore b/searchlib/src/tests/common/rcuvector/.gitignore
new file mode 100644
index 00000000000..d88533ed6af
--- /dev/null
+++ b/searchlib/src/tests/common/rcuvector/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+rcuvector_test
+searchlib_rcuvector_test_app
diff --git a/searchlib/src/tests/common/rcuvector/CMakeLists.txt b/searchlib/src/tests/common/rcuvector/CMakeLists.txt
new file mode 100644
index 00000000000..362dbf68dca
--- /dev/null
+++ b/searchlib/src/tests/common/rcuvector/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_rcuvector_test_app
+ SOURCES
+ rcuvector_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_rcuvector_test_app COMMAND searchlib_rcuvector_test_app)
diff --git a/searchlib/src/tests/common/rcuvector/DESC b/searchlib/src/tests/common/rcuvector/DESC
new file mode 100644
index 00000000000..38af6317f80
--- /dev/null
+++ b/searchlib/src/tests/common/rcuvector/DESC
@@ -0,0 +1 @@
+rcuvector test. Take a look at rcuvector.h for details.
diff --git a/searchlib/src/tests/common/rcuvector/FILES b/searchlib/src/tests/common/rcuvector/FILES
new file mode 100644
index 00000000000..a8bae8dbd5c
--- /dev/null
+++ b/searchlib/src/tests/common/rcuvector/FILES
@@ -0,0 +1 @@
+rcuvector.h
diff --git a/searchlib/src/tests/common/rcuvector/rcuvector_test.cpp b/searchlib/src/tests/common/rcuvector/rcuvector_test.cpp
new file mode 100644
index 00000000000..dd50de79f17
--- /dev/null
+++ b/searchlib/src/tests/common/rcuvector/rcuvector_test.cpp
@@ -0,0 +1,284 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("rcuvector_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/rcuvector.h>
+
+namespace search {
+namespace attribute {
+
+using vespalib::GenerationHandler;
+using vespalib::GenerationHolder;
+using vespalib::GenerationHeldBase;
+
+class Test : public vespalib::TestApp {
+private:
+ bool assertUsage(const MemoryUsage & exp, const MemoryUsage & act);
+ void testGenerationHolder();
+ void testBasic();
+ void testResize();
+ void testGenerationHandling();
+ void testMemoryUsage();
+
+ void
+ testShrink();
+ void testSmallExpand();
+public:
+ int Main();
+};
+
+bool
+Test::assertUsage(const MemoryUsage & exp, const MemoryUsage & act)
+{
+ bool retval = true;
+ if (!EXPECT_EQUAL(exp.allocatedBytes(), act.allocatedBytes())) retval = false;
+ if (!EXPECT_EQUAL(exp.usedBytes(), act.usedBytes())) retval = false;
+ if (!EXPECT_EQUAL(exp.deadBytes(), act.deadBytes())) retval = false;
+ if (!EXPECT_EQUAL(exp.allocatedBytesOnHold(), act.allocatedBytesOnHold())) retval = false;
+ return retval;
+}
+
+void
+Test::testGenerationHolder()
+{
+ typedef std::unique_ptr<int32_t> IntPtr;
+ GenerationHolder gh;
+ gh.hold(GenerationHeldBase::UP(new RcuVectorHeld<int32_t>(sizeof(int32_t),
+ IntPtr(new int32_t(0)))));
+ gh.transferHoldLists(0);
+ gh.hold(GenerationHeldBase::UP(new RcuVectorHeld<int32_t>(sizeof(int32_t),
+ IntPtr(new int32_t(1)))));
+ gh.transferHoldLists(1);
+ gh.hold(GenerationHeldBase::UP(new RcuVectorHeld<int32_t>(sizeof(int32_t),
+ IntPtr(new int32_t(2)))));
+ gh.transferHoldLists(2);
+ gh.hold(GenerationHeldBase::UP(new RcuVectorHeld<int32_t>(sizeof(int32_t),
+ IntPtr(new int32_t(4)))));
+ gh.transferHoldLists(4);
+ EXPECT_EQUAL(4u * sizeof(int32_t), gh.getHeldBytes());
+ gh.trimHoldLists(0);
+ EXPECT_EQUAL(4u * sizeof(int32_t), gh.getHeldBytes());
+ gh.trimHoldLists(1);
+ EXPECT_EQUAL(3u * sizeof(int32_t), gh.getHeldBytes());
+ gh.trimHoldLists(2);
+ EXPECT_EQUAL(2u * sizeof(int32_t), gh.getHeldBytes());
+ gh.hold(GenerationHeldBase::UP(new RcuVectorHeld<int32_t>(sizeof(int32_t),
+ IntPtr(new int32_t(6)))));
+ gh.transferHoldLists(6);
+ EXPECT_EQUAL(3u * sizeof(int32_t), gh.getHeldBytes());
+ gh.trimHoldLists(6);
+ EXPECT_EQUAL(1u * sizeof(int32_t), gh.getHeldBytes());
+ gh.trimHoldLists(7);
+ EXPECT_EQUAL(0u * sizeof(int32_t), gh.getHeldBytes());
+ gh.trimHoldLists(7);
+ EXPECT_EQUAL(0u * sizeof(int32_t), gh.getHeldBytes());
+}
+
+void
+Test::testBasic()
+{
+ { // insert
+ RcuVector<int32_t> v(4, 0, 4);
+ for (int32_t i = 0; i < 100; ++i) {
+ v.push_back(i);
+ EXPECT_EQUAL(i, v[i]);
+ EXPECT_EQUAL((size_t)i + 1, v.size());
+ }
+ for (int32_t i = 0; i < 100; ++i) {
+ v[i] = i + 1;
+ EXPECT_EQUAL(i + 1, v[i]);
+ EXPECT_EQUAL(100u, v.size());
+ }
+ }
+}
+
+void
+Test::testResize()
+{
+ { // resize percent
+ RcuVector<int32_t> v(2, 50, 0);
+ EXPECT_EQUAL(2u, v.capacity());
+ v.push_back(0);
+ EXPECT_EQUAL(2u, v.capacity());
+ v.push_back(0);
+ EXPECT_EQUAL(2u, v.capacity());
+ EXPECT_TRUE(v.isFull());
+ v.push_back(0);
+ EXPECT_EQUAL(3u, v.capacity());
+ EXPECT_TRUE(v.isFull());
+ }
+ { // resize delta
+ RcuVector<int32_t> v(1, 0, 3);
+ EXPECT_EQUAL(1u, v.capacity());
+ v.push_back(0);
+ EXPECT_EQUAL(1u, v.capacity());
+ EXPECT_TRUE(v.isFull());
+ v.push_back(0);
+ EXPECT_EQUAL(4u, v.capacity());
+ EXPECT_TRUE(!v.isFull());
+ }
+ { // resize both
+ RcuVector<int32_t> v(2, 200, 3);
+ EXPECT_EQUAL(2u, v.capacity());
+ v.push_back(0);
+ EXPECT_EQUAL(2u, v.capacity());
+ v.push_back(0);
+ EXPECT_EQUAL(2u, v.capacity());
+ EXPECT_TRUE(v.isFull());
+ v.push_back(0);
+ EXPECT_EQUAL(9u, v.capacity());
+ EXPECT_TRUE(!v.isFull());
+ }
+ { // reserve
+ RcuVector<int32_t> v(2, 0, 0);
+ EXPECT_EQUAL(2u, v.capacity());
+ v.unsafe_reserve(8);
+ EXPECT_EQUAL(8u, v.capacity());
+ }
+ { // explicit resize
+ GenerationHolder g;
+ RcuVectorBase<int8_t> v(g);
+ v.push_back(1);
+ v.push_back(2);
+ g.transferHoldLists(0);
+ g.trimHoldLists(1);
+ const int8_t *old = &v[0];
+ EXPECT_EQUAL(16u, v.capacity());
+ EXPECT_EQUAL(2u, v.size());
+ v.ensure_size(32, 3);
+ v[0] = 3;
+ v[1] = 3;
+ g.transferHoldLists(1);
+ EXPECT_EQUAL(1, old[0]);
+ EXPECT_EQUAL(2, old[1]);
+ EXPECT_EQUAL(3, v[0]);
+ EXPECT_EQUAL(3, v[1]);
+ EXPECT_EQUAL(3, v[2]);
+ EXPECT_EQUAL(3, v[31]);
+ EXPECT_EQUAL(64u, v.capacity());
+ EXPECT_EQUAL(32u, v.size());
+ g.trimHoldLists(2);
+ }
+}
+
+void
+Test::testGenerationHandling()
+{
+ RcuVector<int32_t> v(2, 0, 2);
+ v.push_back(0);
+ v.push_back(10);
+ EXPECT_EQUAL(0u, v.getMemoryUsage().allocatedBytesOnHold());
+ v.push_back(20); // new array
+ EXPECT_EQUAL(8u, v.getMemoryUsage().allocatedBytesOnHold());
+
+ v.setGeneration(1);
+ v.push_back(30);
+ EXPECT_EQUAL(8u, v.getMemoryUsage().allocatedBytesOnHold());
+ v.push_back(40); // new array
+ EXPECT_EQUAL(24u, v.getMemoryUsage().allocatedBytesOnHold());
+
+ v.setGeneration(2);
+ v.push_back(50);
+ v.removeOldGenerations(3);
+ EXPECT_EQUAL(0u, v.getMemoryUsage().allocatedBytesOnHold());
+ v.push_back(60); // new array
+ EXPECT_EQUAL(24u, v.getMemoryUsage().allocatedBytesOnHold());
+}
+
+void
+Test::testMemoryUsage()
+{
+ RcuVector<int8_t> v(2, 0, 2);
+ EXPECT_TRUE(assertUsage(MemoryUsage(2,0,0,0), v.getMemoryUsage()));
+ v.push_back(0);
+ EXPECT_TRUE(assertUsage(MemoryUsage(2,1,0,0), v.getMemoryUsage()));
+ v.push_back(1);
+ EXPECT_TRUE(assertUsage(MemoryUsage(2,2,0,0), v.getMemoryUsage()));
+ v.push_back(2);
+ EXPECT_TRUE(assertUsage(MemoryUsage(4,3,0,2), v.getMemoryUsage()));
+ v.push_back(3);
+ EXPECT_TRUE(assertUsage(MemoryUsage(4,4,0,2), v.getMemoryUsage()));
+ v.push_back(4);
+ EXPECT_TRUE(assertUsage(MemoryUsage(6,5,0,6), v.getMemoryUsage()));
+ v.removeOldGenerations(1);
+ EXPECT_TRUE(assertUsage(MemoryUsage(6,5,0,0), v.getMemoryUsage()));
+}
+
+
+void
+Test::testShrink()
+{
+ GenerationHolder g;
+ RcuVectorBase<int8_t> v(g);
+ v.push_back(1);
+ v.push_back(2);
+ v.push_back(3);
+ v.push_back(4);
+ g.transferHoldLists(0);
+ g.trimHoldLists(1);
+ MemoryUsage mu;
+ mu = v.getMemoryUsage();
+ mu.incAllocatedBytesOnHold(g.getHeldBytes());
+ EXPECT_TRUE(assertUsage(MemoryUsage(16, 4, 0, 0), mu));
+ EXPECT_EQUAL(4u, v.size());
+ EXPECT_TRUE(v.capacity() >= 4u);
+ EXPECT_EQUAL(1, v[0]);
+ EXPECT_EQUAL(2, v[1]);
+ EXPECT_EQUAL(3, v[2]);
+ EXPECT_EQUAL(4, v[3]);
+ const int8_t *old = &v[0];
+ v.shrink(2);
+ g.transferHoldLists(1);
+ EXPECT_EQUAL(2u, v.size());
+ EXPECT_EQUAL(2u, v.capacity());
+ EXPECT_EQUAL(1, v[0]);
+ EXPECT_EQUAL(2, v[1]);
+ EXPECT_EQUAL(1, old[0]);
+ EXPECT_EQUAL(2, old[1]);
+ g.trimHoldLists(2);
+ EXPECT_EQUAL(1, v[0]);
+ EXPECT_EQUAL(2, v[1]);
+ mu = v.getMemoryUsage();
+ mu.incAllocatedBytesOnHold(g.getHeldBytes());
+ EXPECT_TRUE(assertUsage(MemoryUsage(2, 2, 0, 0), mu));
+}
+
+void
+Test::testSmallExpand()
+{
+ GenerationHolder g;
+ RcuVectorBase<int8_t> v(1, 50, 0, g);
+ EXPECT_EQUAL(1u, v.capacity());
+ EXPECT_EQUAL(0u, v.size());
+ v.push_back(1);
+ EXPECT_EQUAL(1u, v.capacity());
+ EXPECT_EQUAL(1u, v.size());
+ v.push_back(2);
+ EXPECT_EQUAL(2u, v.capacity());
+ EXPECT_EQUAL(2u, v.size());
+ g.transferHoldLists(1);
+ g.trimHoldLists(2);
+}
+
+
+int
+Test::Main()
+{
+ TEST_INIT("rcuvector_test");
+
+ testGenerationHolder();
+ testBasic();
+ testResize();
+ testGenerationHandling();
+ testMemoryUsage();
+ testShrink();
+ testSmallExpand();
+
+ TEST_DONE();
+}
+
+}
+}
+
+TEST_APPHOOK(search::attribute::Test);
diff --git a/searchlib/src/tests/common/resultset/.gitignore b/searchlib/src/tests/common/resultset/.gitignore
new file mode 100644
index 00000000000..41242fde289
--- /dev/null
+++ b/searchlib/src/tests/common/resultset/.gitignore
@@ -0,0 +1 @@
+searchlib_resultset_test_app
diff --git a/searchlib/src/tests/common/resultset/CMakeLists.txt b/searchlib/src/tests/common/resultset/CMakeLists.txt
new file mode 100644
index 00000000000..0aed46f6e89
--- /dev/null
+++ b/searchlib/src/tests/common/resultset/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_resultset_test_app
+ SOURCES
+ resultset_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_resultset_test_app COMMAND searchlib_resultset_test_app)
diff --git a/searchlib/src/tests/common/resultset/resultset_test.cpp b/searchlib/src/tests/common/resultset/resultset_test.cpp
new file mode 100644
index 00000000000..983dc10b914
--- /dev/null
+++ b/searchlib/src/tests/common/resultset/resultset_test.cpp
@@ -0,0 +1,109 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for resultset.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("resultset_test");
+
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/common/resultset.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/arraysize.h>
+
+using namespace search;
+using vespalib::arraysize;
+
+namespace {
+
+void concatenate(const ResultSet *input_array[], size_t array_size,
+ ResultSet &output)
+{
+ size_t hit_count = 0;
+ for (size_t i = 0; i < array_size; ++i) {
+ hit_count += input_array[i]->getArrayUsed();
+ }
+ output.allocArray(hit_count);
+ RankedHit *p = output.getArray();
+ for (size_t i = 0; i < array_size; ++i) {
+ const ResultSet &set = *input_array[i];
+ memcpy(p, set.getArray(), set.getArrayUsed() * sizeof(RankedHit));
+ p += set.getArrayUsed();
+ if (set.getBitOverflow()) {
+ if (output.getBitOverflow()) {
+ output.getBitOverflow()->orWith(*set.getBitOverflow());
+ } else {
+ output.setBitOverflow(BitVector::create(*set.getBitOverflow()));
+ }
+ }
+ }
+ output.setArrayUsed(hit_count);
+}
+
+
+void addHit(ResultSet &set, unsigned int doc_id, double rank) {
+ if (set.getArrayAllocated() == 0) {
+ set.allocArray(10);
+ }
+ ASSERT_LESS(set.getArrayUsed(), set.getArrayAllocated());
+ RankedHit *hit_array = set.getArray();
+ hit_array[set.getArrayUsed()]._docId = doc_id;
+ hit_array[set.getArrayUsed()]._rankValue = rank;
+ set.setArrayUsed(set.getArrayUsed() + 1);
+}
+
+TEST("require that mergeWithOverflow works") {
+ ResultSet set1;
+ addHit(set1, 2, 4.2);
+ addHit(set1, 4, 3.2);
+ BitVector::UP bit_vector = BitVector::create(20);
+ bit_vector->setBit(2);
+ bit_vector->setBit(4);
+ bit_vector->setBit(7);
+ bit_vector->invalidateCachedCount();
+ set1.setBitOverflow(std::move(bit_vector));
+ EXPECT_EQUAL(3u, set1.getNumHits());
+ set1.mergeWithBitOverflow();
+ EXPECT_EQUAL(3u, set1.getNumHits());
+}
+
+TEST("require that resultsets can be concatenated") {
+ ResultSet set1;
+ addHit(set1, 2, 4.2);
+ addHit(set1, 4, 3.2);
+ BitVector::UP bit_vector = BitVector::create(20);
+ bit_vector->setBit(7);
+ set1.setBitOverflow(std::move(bit_vector));
+
+ ResultSet set2;
+ addHit(set2, 12, 4.2);
+ addHit(set2, 14, 3.2);
+ bit_vector = BitVector::create(20);
+ bit_vector->setBit(17);
+ set2.setBitOverflow(std::move(bit_vector));
+
+ const ResultSet *sets[] = { &set1, &set2 };
+ ResultSet target;
+ concatenate(sets, arraysize(sets), target);
+
+ EXPECT_EQUAL(4u, target.getArrayAllocated());
+ ASSERT_EQUAL(4u, target.getArrayUsed());
+ EXPECT_EQUAL(2u, target.getArray()[0]._docId);
+ EXPECT_EQUAL(4.2, target.getArray()[0]._rankValue);
+ EXPECT_EQUAL(4u, target.getArray()[1]._docId);
+ EXPECT_EQUAL(3.2, target.getArray()[1]._rankValue);
+ EXPECT_EQUAL(12u, target.getArray()[2]._docId);
+ EXPECT_EQUAL(4.2, target.getArray()[2]._rankValue);
+ EXPECT_EQUAL(14u, target.getArray()[3]._docId);
+ EXPECT_EQUAL(3.2, target.getArray()[3]._rankValue);
+
+ BitVector * bv = target.getBitOverflow();
+ ASSERT_TRUE(bv);
+ EXPECT_EQUAL(20u, bv->size());
+ EXPECT_EQUAL(7u, bv->getNextTrueBit(0));
+ EXPECT_EQUAL(17u, bv->getNextTrueBit(8));
+ EXPECT_EQUAL(20u, bv->getNextTrueBit(18));
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/.gitignore b/searchlib/src/tests/common/sequencedtaskexecutor/.gitignore
new file mode 100644
index 00000000000..35d038b0b7c
--- /dev/null
+++ b/searchlib/src/tests/common/sequencedtaskexecutor/.gitignore
@@ -0,0 +1 @@
+searchlib_sequencedtaskexecutor_test_app
diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/CMakeLists.txt b/searchlib/src/tests/common/sequencedtaskexecutor/CMakeLists.txt
new file mode 100644
index 00000000000..501fd3b07f1
--- /dev/null
+++ b/searchlib/src/tests/common/sequencedtaskexecutor/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_sequencedtaskexecutor_test_app
+ SOURCES
+ sequencedtaskexecutor_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_sequencedtaskexecutor_test_app COMMAND searchlib_sequencedtaskexecutor_test_app)
diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/DESC b/searchlib/src/tests/common/sequencedtaskexecutor/DESC
new file mode 100644
index 00000000000..29ac00d3453
--- /dev/null
+++ b/searchlib/src/tests/common/sequencedtaskexecutor/DESC
@@ -0,0 +1 @@
+sequencedtaskexecutor test. Take a look at sequencedtaskexecutor_test.cpp for details.
diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/FILES b/searchlib/src/tests/common/sequencedtaskexecutor/FILES
new file mode 100644
index 00000000000..a8ebec0ebca
--- /dev/null
+++ b/searchlib/src/tests/common/sequencedtaskexecutor/FILES
@@ -0,0 +1 @@
+sequencedtaskexecutor_test.cpp
diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp b/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp
new file mode 100644
index 00000000000..98436364ea0
--- /dev/null
+++ b/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp
@@ -0,0 +1,194 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("sequencedtaskexecutor_test");
+#include <vespa/searchlib/common/sequencedtaskexecutor.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/test/insertion_operators.h>
+
+#include <mutex>
+#include <condition_variable>
+
+namespace search
+{
+
+namespace common
+{
+
+
+class Fixture
+{
+public:
+ SequencedTaskExecutor _threads;
+
+ Fixture()
+ : _threads(2)
+ {
+ }
+};
+
+
+class TestObj
+{
+public:
+ std::mutex _m;
+ std::condition_variable _cv;
+ int _done;
+ int _fail;
+ int _val;
+
+ TestObj()
+ : _m(),
+ _cv(),
+ _done(0),
+ _fail(0),
+ _val(0)
+ {
+ }
+
+ void
+ modify(int oldValue, int newValue)
+ {
+ {
+ std::lock_guard<std::mutex> guard(_m);
+ if (_val == oldValue) {
+ _val = newValue;
+ } else {
+ ++_fail;
+ }
+ ++_done;
+ }
+ _cv.notify_all();
+ }
+
+ void
+ wait(int wantDone)
+ {
+ std::unique_lock<std::mutex> guard(_m);
+ _cv.wait(guard, [=] { return this->_done >= wantDone; });
+ }
+};
+
+TEST_F("testExecute", Fixture) {
+ std::shared_ptr<TestObj> tv(std::make_shared<TestObj>());
+ EXPECT_EQUAL(0, tv->_val);
+ f._threads.execute(1, [=]() { tv->modify(0, 42); });
+ tv->wait(1);
+ EXPECT_EQUAL(0, tv->_fail);
+ EXPECT_EQUAL(42, tv->_val);
+ f._threads.sync();
+ EXPECT_EQUAL(0, tv->_fail);
+ EXPECT_EQUAL(42, tv->_val);
+}
+
+
+TEST_F("require that task with same id are serialized", Fixture)
+{
+ std::shared_ptr<TestObj> tv(std::make_shared<TestObj>());
+ EXPECT_EQUAL(0, tv->_val);
+ f._threads.execute(0, [=]() { usleep(2000); tv->modify(0, 14); });
+ f._threads.execute(0, [=]() { tv->modify(14, 42); });
+ tv->wait(2);
+ EXPECT_EQUAL(0, tv->_fail);
+ EXPECT_EQUAL(42, tv->_val);
+ f._threads.sync();
+ EXPECT_EQUAL(0, tv->_fail);
+ EXPECT_EQUAL(42, tv->_val);
+}
+
+TEST_F("require that task with different ids are not serialized", Fixture)
+{
+ int tryCnt = 0;
+ for (tryCnt = 0; tryCnt < 100; ++tryCnt) {
+ std::shared_ptr<TestObj> tv(std::make_shared<TestObj>());
+ EXPECT_EQUAL(0, tv->_val);
+ f._threads.execute(0, [=]() { usleep(2000); tv->modify(0, 14); });
+ f._threads.execute(2, [=]() { tv->modify(14, 42); });
+ tv->wait(2);
+ if (tv->_fail != 1) {
+ continue;
+ }
+ EXPECT_EQUAL(1, tv->_fail);
+ EXPECT_EQUAL(14, tv->_val);
+ f._threads.sync();
+ EXPECT_EQUAL(1, tv->_fail);
+ EXPECT_EQUAL(14, tv->_val);
+ break;
+ }
+ EXPECT_TRUE(tryCnt < 100);
+}
+
+
+TEST_F("require that task with same string id are serialized", Fixture)
+{
+ std::shared_ptr<TestObj> tv(std::make_shared<TestObj>());
+ EXPECT_EQUAL(0, tv->_val);
+ auto test2 = [=]() { tv->modify(14, 42); };
+ f._threads.execute("0", [=]() { usleep(2000); tv->modify(0, 14); });
+ f._threads.execute("0", test2);
+ tv->wait(2);
+ EXPECT_EQUAL(0, tv->_fail);
+ EXPECT_EQUAL(42, tv->_val);
+ f._threads.sync();
+ EXPECT_EQUAL(0, tv->_fail);
+ EXPECT_EQUAL(42, tv->_val);
+}
+
+TEST_F("require that task with different string ids are not serialized",
+ Fixture)
+{
+ int tryCnt = 0;
+ for (tryCnt = 0; tryCnt < 100; ++tryCnt) {
+ std::shared_ptr<TestObj> tv(std::make_shared<TestObj>());
+ EXPECT_EQUAL(0, tv->_val);
+ f._threads.execute("0", [=]() { usleep(2000); tv->modify(0, 14); });
+ f._threads.execute("2", [=]() { tv->modify(14, 42); });
+ tv->wait(2);
+ if (tv->_fail != 1) {
+ continue;
+ }
+ EXPECT_EQUAL(1, tv->_fail);
+ EXPECT_EQUAL(14, tv->_val);
+ f._threads.sync();
+ EXPECT_EQUAL(1, tv->_fail);
+ EXPECT_EQUAL(14, tv->_val);
+ break;
+ }
+ EXPECT_TRUE(tryCnt < 100);
+}
+
+
+TEST_F("require that execute works with const lambda", Fixture)
+{
+ int i = 5;
+ std::vector<int> res;
+ const auto lambda = [i, &res]() mutable
+ { res.push_back(i--); res.push_back(i--); };
+ f._threads.execute(0, lambda);
+ f._threads.execute(0, lambda);
+ f._threads.sync();
+ std::vector<int> exp({5, 4, 5, 4});
+ EXPECT_EQUAL(exp, res);
+ EXPECT_EQUAL(5, i);
+}
+
+TEST_F("require that execute works with reference to lambda", Fixture)
+{
+ int i = 5;
+ std::vector<int> res;
+ auto lambda = [i, &res]() mutable
+ { res.push_back(i--); res.push_back(i--); };
+ auto &lambdaref = lambda;
+ f._threads.execute(0, lambdaref);
+ f._threads.execute(0, lambdaref);
+ f._threads.sync();
+ std::vector<int> exp({5, 4, 5, 4});
+ EXPECT_EQUAL(exp, res);
+ EXPECT_EQUAL(5, i);
+}
+
+
+} // namespace common
+} // namespace search
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/common/summaryfeatures/.gitignore b/searchlib/src/tests/common/summaryfeatures/.gitignore
new file mode 100644
index 00000000000..543319fb8dd
--- /dev/null
+++ b/searchlib/src/tests/common/summaryfeatures/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+summaryfeatures_test
+searchlib_summaryfeatures_test_app
diff --git a/searchlib/src/tests/common/summaryfeatures/CMakeLists.txt b/searchlib/src/tests/common/summaryfeatures/CMakeLists.txt
new file mode 100644
index 00000000000..3b6cb392615
--- /dev/null
+++ b/searchlib/src/tests/common/summaryfeatures/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_summaryfeatures_test_app
+ SOURCES
+ summaryfeatures.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_summaryfeatures_test_app COMMAND searchlib_summaryfeatures_test_app)
diff --git a/searchlib/src/tests/common/summaryfeatures/DESC b/searchlib/src/tests/common/summaryfeatures/DESC
new file mode 100644
index 00000000000..9cc24928a82
--- /dev/null
+++ b/searchlib/src/tests/common/summaryfeatures/DESC
@@ -0,0 +1 @@
+summaryfeatures test. Take a look at summaryfeatures.cpp for details.
diff --git a/searchlib/src/tests/common/summaryfeatures/FILES b/searchlib/src/tests/common/summaryfeatures/FILES
new file mode 100644
index 00000000000..19692b59229
--- /dev/null
+++ b/searchlib/src/tests/common/summaryfeatures/FILES
@@ -0,0 +1 @@
+summaryfeatures.cpp
diff --git a/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp b/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp
new file mode 100644
index 00000000000..6d4e8bc49c8
--- /dev/null
+++ b/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp
@@ -0,0 +1,152 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("summaryfeatures_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/featureset.h>
+
+using namespace search;
+
+TEST_SETUP(Test);
+
+int
+Test::Main()
+{
+ TEST_INIT("summaryfeatures_test");
+ {
+ FeatureSet sf;
+ EXPECT_EQUAL(sf.getNames().size(), 0u);
+ EXPECT_EQUAL(sf.numFeatures(), 0u);
+ EXPECT_EQUAL(sf.numDocs(), 0u);
+ EXPECT_TRUE(sf.getFeaturesByIndex(0) == 0);
+ EXPECT_TRUE(sf.getFeaturesByDocId(0) == 0);
+ std::vector<uint32_t> docs;
+ EXPECT_TRUE(sf.contains(docs));
+ docs.push_back(1);
+ EXPECT_TRUE(!sf.contains(docs));
+ }
+ {
+ FeatureSet::StringVector n;
+ n.push_back("f1");
+ n.push_back("f2");
+ n.push_back("f3");
+
+ FeatureSet sf(n, 5);
+ EXPECT_EQUAL(sf.getNames().size(), 3u);
+ EXPECT_EQUAL(sf.getNames()[0], "f1");
+ EXPECT_EQUAL(sf.getNames()[1], "f2");
+ EXPECT_EQUAL(sf.getNames()[2], "f3");
+ EXPECT_EQUAL(sf.numFeatures(), 3u);
+ EXPECT_EQUAL(sf.numDocs(), 0u);
+ EXPECT_EQUAL(sf.addDocId(10), 0u);
+ EXPECT_EQUAL(sf.addDocId(20), 1u);
+ EXPECT_EQUAL(sf.addDocId(30), 2u);
+ EXPECT_EQUAL(sf.addDocId(40), 3u);
+ EXPECT_EQUAL(sf.addDocId(50), 4u);
+ EXPECT_EQUAL(sf.numDocs(), 5u);
+ feature_t *f;
+ const feature_t *cf;
+ f = sf.getFeaturesByIndex(0);
+ ASSERT_TRUE(f != 0);
+ f[0] = 11.0;
+ f[1] = 12.0;
+ f[2] = 13.0;
+ f = sf.getFeaturesByIndex(1);
+ ASSERT_TRUE(f != 0);
+ f[0] = 21.0;
+ f[1] = 22.0;
+ f[2] = 23.0;
+ f = sf.getFeaturesByIndex(2);
+ ASSERT_TRUE(f != 0);
+ f[0] = 31.0;
+ f[1] = 32.0;
+ f[2] = 33.0;
+ f = sf.getFeaturesByIndex(3);
+ ASSERT_TRUE(f != 0);
+ f[0] = 41.0;
+ f[1] = 42.0;
+ f[2] = 43.0;
+ f = sf.getFeaturesByIndex(4);
+ ASSERT_TRUE(f != 0);
+ f[0] = 51.0;
+ f[1] = 52.0;
+ f[2] = 53.0;
+ EXPECT_TRUE(sf.getFeaturesByIndex(5) == 0);
+ {
+ std::vector<uint32_t> docs;
+ EXPECT_TRUE(sf.contains(docs));
+ }
+ {
+ std::vector<uint32_t> docs;
+ docs.push_back(1);
+ EXPECT_TRUE(!sf.contains(docs));
+ }
+ {
+ std::vector<uint32_t> docs;
+ docs.push_back(31);
+ EXPECT_TRUE(!sf.contains(docs));
+ }
+ {
+ std::vector<uint32_t> docs;
+ docs.push_back(51);
+ EXPECT_TRUE(!sf.contains(docs));
+ }
+ {
+ std::vector<uint32_t> docs;
+ docs.push_back(20);
+ docs.push_back(40);
+ EXPECT_TRUE(sf.contains(docs));
+ }
+ {
+ std::vector<uint32_t> docs;
+ docs.push_back(10);
+ docs.push_back(20);
+ docs.push_back(30);
+ docs.push_back(40);
+ docs.push_back(50);
+ EXPECT_TRUE(sf.contains(docs));
+ }
+ {
+ cf = sf.getFeaturesByDocId(10);
+ ASSERT_TRUE(cf != 0);
+ EXPECT_APPROX(cf[0], 11.0, 10e-6);
+ EXPECT_APPROX(cf[1], 12.0, 10e-6);
+ EXPECT_APPROX(cf[2], 13.0, 10e-6);
+ }
+ {
+ cf = sf.getFeaturesByDocId(20);
+ ASSERT_TRUE(cf != 0);
+ EXPECT_APPROX(cf[0], 21.0, 10e-6);
+ EXPECT_APPROX(cf[1], 22.0, 10e-6);
+ EXPECT_APPROX(cf[2], 23.0, 10e-6);
+ }
+ {
+ cf = sf.getFeaturesByDocId(30);
+ ASSERT_TRUE(cf != 0);
+ EXPECT_APPROX(cf[0], 31.0, 10e-6);
+ EXPECT_APPROX(cf[1], 32.0, 10e-6);
+ EXPECT_APPROX(cf[2], 33.0, 10e-6);
+ }
+ {
+ cf = sf.getFeaturesByDocId(40);
+ ASSERT_TRUE(cf != 0);
+ EXPECT_APPROX(cf[0], 41.0, 10e-6);
+ EXPECT_APPROX(cf[1], 42.0, 10e-6);
+ EXPECT_APPROX(cf[2], 43.0, 10e-6);
+ }
+ {
+ cf = sf.getFeaturesByDocId(50);
+ ASSERT_TRUE(cf != 0);
+ EXPECT_APPROX(cf[0], 51.0, 10e-6);
+ EXPECT_APPROX(cf[1], 52.0, 10e-6);
+ EXPECT_APPROX(cf[2], 53.0, 10e-6);
+ }
+ EXPECT_TRUE(sf.getFeaturesByDocId(5) == 0);
+ EXPECT_TRUE(sf.getFeaturesByDocId(15) == 0);
+ EXPECT_TRUE(sf.getFeaturesByDocId(25) == 0);
+ EXPECT_TRUE(sf.getFeaturesByDocId(35) == 0);
+ EXPECT_TRUE(sf.getFeaturesByDocId(45) == 0);
+ EXPECT_TRUE(sf.getFeaturesByDocId(55) == 0);
+ }
+ TEST_DONE();
+}
diff --git a/searchlib/src/tests/create-test.sh b/searchlib/src/tests/create-test.sh
new file mode 100755
index 00000000000..d2bc3ded67b
--- /dev/null
+++ b/searchlib/src/tests/create-test.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+gen_project_file() {
+ echo "generating '$1' ..."
+ echo "APPLICATION ${test}_test" > $1
+ echo "OBJS ${test}_test" >> $1
+ echo "LIBS searchlib/searchlib" >> $1
+ echo "EXTERNALLIBS searchcommon persistencetypes metrics" >> $1
+ echo "" >> $1
+ echo "CUSTOMMAKE" >> $1
+ echo "test: all" >> $1
+ echo -e "\t\$(HIDE) \$(LDL) \$(VALGRIND) ./${test}_test" >> $1
+}
+
+gen_source() {
+ echo "generating '$1' ..."
+ echo "#include <vespa/vespalib/testkit/test_kit.h>" >> $1
+ echo "" >> $1
+ echo "// using namespace search;" >> $1
+ echo "" >> $1
+ echo "TEST(\"require something\") {" >> $1
+ echo "}" >> $1
+ echo "" >> $1
+ echo "TEST_MAIN() { TEST_RUN_ALL(); }" >> $1
+}
+
+gen_file_list() {
+ echo "generating '$1' ..."
+ echo "${test}_test.cpp" > $1
+}
+
+if [ $# -ne 1 ]; then
+ echo "usage: $0 <name>"
+ echo " name: name of the test to create"
+ exit 1
+fi
+
+test=$1
+if [ -e $test ]; then
+ echo "$test already present, don't want to mess it up..."
+ exit 1
+fi
+
+echo "creating directory '$test' ..."
+mkdir -p $test || exit 1
+cd $test || exit 1
+test=`basename $test`
+
+gen_project_file fastos.project
+gen_source ${test}_test.cpp
+gen_file_list FILES
diff --git a/searchlib/src/tests/datastore/.gitignore b/searchlib/src/tests/datastore/.gitignore
new file mode 100644
index 00000000000..0f6b605a280
--- /dev/null
+++ b/searchlib/src/tests/datastore/.gitignore
@@ -0,0 +1,8 @@
+*.So
+*_test
+.depend*
+Makefile
+vlog1.txt
+vlog2.txt
+vlog3.txt
+searchlib_logdatastore_test_app
diff --git a/searchlib/src/tests/datastore/CMakeLists.txt b/searchlib/src/tests/datastore/CMakeLists.txt
new file mode 100644
index 00000000000..b10bc4d4e09
--- /dev/null
+++ b/searchlib/src/tests/datastore/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_logdatastore_test_app
+ SOURCES
+ logdatastore_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_logdatastore_test_app COMMAND sh logdatastore_test.sh)
diff --git a/searchlib/src/tests/datastore/DESC b/searchlib/src/tests/datastore/DESC
new file mode 100644
index 00000000000..f035e6aecfb
--- /dev/null
+++ b/searchlib/src/tests/datastore/DESC
@@ -0,0 +1 @@
+Tests behavior of class search::DataStore from <searchlib/docstore/datastore.h>
diff --git a/searchlib/src/tests/datastore/FILES b/searchlib/src/tests/datastore/FILES
new file mode 100644
index 00000000000..6bfee2917f4
--- /dev/null
+++ b/searchlib/src/tests/datastore/FILES
@@ -0,0 +1 @@
+datastore.cpp
diff --git a/searchlib/src/tests/datastore/bad.dat b/searchlib/src/tests/datastore/bad.dat
new file mode 100644
index 00000000000..1bf7a93a2f8
--- /dev/null
+++ b/searchlib/src/tests/datastore/bad.dat
Binary files differ
diff --git a/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.dat b/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.dat
new file mode 100644
index 00000000000..dfeedf08029
--- /dev/null
+++ b/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.dat
Binary files differ
diff --git a/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.idx b/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.idx
new file mode 100644
index 00000000000..883a5265afe
--- /dev/null
+++ b/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.idx
Binary files differ
diff --git a/searchlib/src/tests/datastore/dangling/1425506005745465000.dat b/searchlib/src/tests/datastore/dangling/1425506005745465000.dat
new file mode 100644
index 00000000000..cb202f8d72a
--- /dev/null
+++ b/searchlib/src/tests/datastore/dangling/1425506005745465000.dat
Binary files differ
diff --git a/searchlib/src/tests/datastore/dangling/1425506005745465000.idx b/searchlib/src/tests/datastore/dangling/1425506005745465000.idx
new file mode 100644
index 00000000000..0fc41cdf9e0
--- /dev/null
+++ b/searchlib/src/tests/datastore/dangling/1425506005745465000.idx
Binary files differ
diff --git a/searchlib/src/tests/datastore/dangling/2425506005745465000.dat b/searchlib/src/tests/datastore/dangling/2425506005745465000.dat
new file mode 100644
index 00000000000..cb202f8d72a
--- /dev/null
+++ b/searchlib/src/tests/datastore/dangling/2425506005745465000.dat
Binary files differ
diff --git a/searchlib/src/tests/datastore/dangling/2425506005745465000.idx b/searchlib/src/tests/datastore/dangling/2425506005745465000.idx
new file mode 100644
index 00000000000..0fc41cdf9e0
--- /dev/null
+++ b/searchlib/src/tests/datastore/dangling/2425506005745465000.idx
Binary files differ
diff --git a/searchlib/src/tests/datastore/dangling/3425506005745465000.dat b/searchlib/src/tests/datastore/dangling/3425506005745465000.dat
new file mode 100644
index 00000000000..cb202f8d72a
--- /dev/null
+++ b/searchlib/src/tests/datastore/dangling/3425506005745465000.dat
Binary files differ
diff --git a/searchlib/src/tests/datastore/dangling/4425506005745465000.dat b/searchlib/src/tests/datastore/dangling/4425506005745465000.dat
new file mode 100644
index 00000000000..cb202f8d72a
--- /dev/null
+++ b/searchlib/src/tests/datastore/dangling/4425506005745465000.dat
Binary files differ
diff --git a/searchlib/src/tests/datastore/dangling/4425506005745465000.idx b/searchlib/src/tests/datastore/dangling/4425506005745465000.idx
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/searchlib/src/tests/datastore/dangling/4425506005745465000.idx
diff --git a/searchlib/src/tests/datastore/datastore.dat b/searchlib/src/tests/datastore/datastore.dat
new file mode 100644
index 00000000000..34d6ed1392f
--- /dev/null
+++ b/searchlib/src/tests/datastore/datastore.dat
Binary files differ
diff --git a/searchlib/src/tests/datastore/logdatastore_test.cpp b/searchlib/src/tests/datastore/logdatastore_test.cpp
new file mode 100644
index 00000000000..776e6b25533
--- /dev/null
+++ b/searchlib/src/tests/datastore/logdatastore_test.cpp
@@ -0,0 +1,468 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("datastore_test");
+
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/docstore/logdatastore.h>
+#include <vespa/searchlib/docstore/chunkformats.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <iostream>
+
+#include <vespa/vespalib/util/exceptions.h>
+
+class MyTlSyncer : public search::transactionlog::SyncProxy {
+ search::SerialNum _syncedTo;
+public:
+ MyTlSyncer(void) : _syncedTo(0) { }
+
+ void sync(search::SerialNum syncTo) {
+ _syncedTo = syncTo;
+ }
+};
+
+using namespace search;
+using search::index::DummyFileHeaderContext;
+
+namespace {
+
+void
+showStats(const DataStoreStorageStats &stats)
+{
+ fprintf(stdout,
+ "Storage stats usage=%9lu bloat=%9lu"
+ " lastSerial=%9lu lastFlushedSerial=%9lu"
+ " maxBucketSpread=%6.2f\n",
+ stats.diskUsage(), stats.diskBloat(),
+ stats.lastSerialNum(), stats.lastFlushedSerialNum(),
+ stats.maxBucketSpread());
+ fflush(stdout);
+}
+
+void
+showChunks(const std::vector<DataStoreFileChunkStats> &chunkStats)
+{
+ fprintf(stdout, "Number of chunks is %zu\n", chunkStats.size());
+ for (const auto &chunk : chunkStats) {
+ fprintf(stdout,
+ "Chunk %019lu usage=%9lu bloat=%9lu"
+ " lastSerial=%9lu lastFlushedSerial=%9lu"
+ " bucketSpread=%6.2f\n",
+ chunk.nameId(), chunk.diskUsage(), chunk.diskBloat(),
+ chunk.lastSerialNum(), chunk.lastFlushedSerialNum(),
+ chunk.maxBucketSpread());
+ }
+ fflush(stdout);
+}
+
+SerialNum
+calcLastSerialNum(const std::vector<DataStoreFileChunkStats> &chunkStats)
+{
+ SerialNum lastSerialNum = 0u;
+ for (const auto &chunk : chunkStats) {
+ lastSerialNum = std::max(lastSerialNum, chunk.lastSerialNum());
+ }
+ return lastSerialNum;
+}
+
+SerialNum
+calcLastFlushedSerialNum(const std::vector<DataStoreFileChunkStats> &chunkStats)
+{
+ SerialNum lastFlushedSerialNum = 0u;
+ for (const auto &chunk : chunkStats) {
+ lastFlushedSerialNum = std::max(lastFlushedSerialNum,
+ chunk.lastFlushedSerialNum());
+ }
+ return lastFlushedSerialNum;
+}
+
+uint64_t
+calcDiskUsage(const std::vector<DataStoreFileChunkStats> &chunkStats)
+{
+ uint64_t diskUsage = 0u;
+ for (const auto &chunk : chunkStats) {
+ diskUsage += chunk.diskUsage();
+ }
+ return diskUsage;
+}
+
+uint64_t
+calcDiskBloat(const std::vector<DataStoreFileChunkStats> &chunkStats)
+{
+ uint64_t diskBloat = 0u;
+ for (const auto &chunk : chunkStats) {
+ diskBloat += chunk.diskBloat();
+ }
+ return diskBloat;
+}
+
+void
+checkStats(IDataStore &store,
+ SerialNum expLastSerial, SerialNum expLastFlushedSerial)
+{
+ DataStoreStorageStats storageStats(store.getStorageStats());
+ std::vector<DataStoreFileChunkStats> chunkStats;
+ chunkStats = store.getFileChunkStats();
+ showStats(storageStats);
+ showChunks(chunkStats);
+ EXPECT_EQUAL(expLastSerial, storageStats.lastSerialNum());
+ EXPECT_EQUAL(expLastFlushedSerial, storageStats.lastFlushedSerialNum());
+ EXPECT_EQUAL(storageStats.lastSerialNum(), calcLastSerialNum(chunkStats));
+ EXPECT_EQUAL(storageStats.lastFlushedSerialNum(),
+ calcLastFlushedSerialNum(chunkStats));
+ EXPECT_EQUAL(storageStats.diskUsage(),
+ calcDiskUsage(chunkStats));
+ EXPECT_EQUAL(storageStats.diskBloat(), calcDiskBloat(chunkStats));
+}
+
+
+}
+
+TEST("testThatLidInfoOrdersFileChunkSize") {
+ EXPECT_TRUE(LidInfo(1, 1, 1) == LidInfo(1, 1, 1));
+ EXPECT_FALSE(LidInfo(1, 1, 1) < LidInfo(1, 1, 1));
+
+ EXPECT_FALSE(LidInfo(1, 1, 1) == LidInfo(2, 1, 1));
+ EXPECT_TRUE(LidInfo(1, 1, 1) < LidInfo(2, 1, 1));
+ EXPECT_TRUE(LidInfo(1, 2, 1) < LidInfo(2, 1, 1));
+ EXPECT_TRUE(LidInfo(1, 1, 2) < LidInfo(2, 1, 1));
+}
+
+TEST("testGrowing") {
+ FastOS_File::EmptyAndRemoveDirectory("growing");
+ EXPECT_TRUE(FastOS_File::MakeDirectory("growing"));
+ LogDataStore::Config config(100000, 0.1, 3.0, 0.2, 8, true,
+ WriteableFileChunk::Config(
+ document::CompressionConfig(
+ document::CompressionConfig::
+ LZ4, 9, 60),
+ 1000,
+ 20));
+ vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024);
+ DummyFileHeaderContext fileHeaderContext;
+ MyTlSyncer tlSyncer;
+ {
+ LogDataStore datastore(executor,
+ "growing",
+ config,
+ GrowStrategy(),
+ TuneFileSummary(),
+ fileHeaderContext,
+ tlSyncer,
+ NULL);
+ srand(7);
+ char buffer[12000];
+ SerialNum lastSyncToken(0);
+ for (size_t i(0); i < sizeof(buffer); i++) {
+ buffer[i] = rand() & 0xff;
+ }
+ for (size_t i(1); i < 10000; i++) {
+ long r = rand()%10000;
+ assert(i > lastSyncToken);
+ lastSyncToken = i;
+ datastore.write(i, i, &buffer[r], uint8_t(buffer[r])*4);
+ }
+ datastore.flush(datastore.initFlush(lastSyncToken));
+ for (size_t i(1); i < 200; i++) {
+ assert(i + 20000 > lastSyncToken);
+ lastSyncToken = i + 20000;
+ datastore.remove(i + 20000, i);
+ }
+ for (size_t i(201); i < 2000; i+= 2) {
+ assert(i + 20000 > lastSyncToken);
+ lastSyncToken = i + 20000;
+ datastore.remove(i + 20000, i);
+ }
+ datastore.flush(datastore.initFlush(lastSyncToken));
+ datastore.compact(30000);
+ datastore.remove(31000, 0);
+ checkStats(datastore, 31000, 30000);
+ }
+ {
+ LogDataStore datastore(executor,
+ "growing",
+ config,
+ GrowStrategy(),
+ TuneFileSummary(),
+ fileHeaderContext,
+ tlSyncer,
+ NULL);
+ checkStats(datastore, 30000, 30000);
+ }
+
+ FastOS_File::EmptyAndRemoveDirectory("growing");
+}
+
+void fetchAndTest(IDataStore & datastore, uint32_t lid, const void *a, size_t sz)
+{
+ vespalib::DataBuffer buf;
+ EXPECT_EQUAL(static_cast<ssize_t>(sz), datastore.read(lid, buf));
+ EXPECT_EQUAL(buf.getDataLen(), sz);
+ EXPECT_TRUE(memcmp(a, buf.getData(), sz) == 0);
+}
+
+TEST("testTruncatedIdxFile"){
+ LogDataStore::Config config;
+ DummyFileHeaderContext fileHeaderContext;
+ vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024);
+ MyTlSyncer tlSyncer;
+ {
+ // Files comes from the 'growing test'.
+ LogDataStore datastore(executor, "bug-7257706", config,
+ GrowStrategy(), TuneFileSummary(),
+ fileHeaderContext, tlSyncer, NULL);
+ EXPECT_EQUAL(354ul, datastore.lastSyncToken());
+ }
+ {
+ LogDataStore datastore(executor, "bug-7257706-truncated", config,
+ GrowStrategy(), TuneFileSummary(),
+ fileHeaderContext, tlSyncer, NULL);
+ EXPECT_EQUAL(331ul, datastore.lastSyncToken());
+ }
+ {
+ LogDataStore datastore(executor, "bug-7257706-truncated", config,
+ GrowStrategy(), TuneFileSummary(),
+ fileHeaderContext, tlSyncer, NULL);
+ EXPECT_EQUAL(331ul, datastore.lastSyncToken());
+ }
+}
+
+TEST("testThatEmptyIdxFilesAndDanglingDatFilesAreRemoved") {
+ LogDataStore::Config config;
+ DummyFileHeaderContext fileHeaderContext;
+ vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024);
+ MyTlSyncer tlSyncer;
+ LogDataStore datastore(executor, "dangling-test", config,
+ GrowStrategy(), TuneFileSummary(),
+ fileHeaderContext, tlSyncer, NULL);
+ EXPECT_EQUAL(354ul, datastore.lastSyncToken());
+ EXPECT_EQUAL(4096u + 480u, datastore.getDiskHeaderFootprint());
+ EXPECT_EQUAL(datastore.getDiskHeaderFootprint() + 94016u, datastore.getDiskFootprint());
+}
+
+TEST("testWriteRead") {
+ FastOS_File::RemoveDirectory("empty");
+ const char * bufA = "aaaaaaaaaaaaaaaaaaaaa";
+ const char * bufB = "bbbbbbbbbbbbbbbb";
+ const vespalib::ConstBufferRef a[2] = { vespalib::ConstBufferRef(bufA, strlen(bufA)), vespalib::ConstBufferRef(bufB, strlen(bufB))};
+ LogDataStore::Config config;
+ {
+ EXPECT_TRUE(FastOS_File::MakeDirectory("empty"));
+ DummyFileHeaderContext fileHeaderContext;
+ vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024);
+ MyTlSyncer tlSyncer;
+ LogDataStore datastore(executor, "empty", config,
+ GrowStrategy(), TuneFileSummary(),
+ fileHeaderContext, tlSyncer, NULL);
+ ASSERT_TRUE(datastore.lastSyncToken() == 0);
+ size_t headerFootprint = datastore.getDiskHeaderFootprint();
+ EXPECT_LESS(0u, headerFootprint);
+ EXPECT_EQUAL(datastore.getDiskFootprint(), headerFootprint);
+ EXPECT_EQUAL(datastore.getDiskBloat(), 0ul);
+ EXPECT_EQUAL(datastore.getMaxCompactGain(), 0ul);
+ datastore.write(1, 0, a[0].c_str(), a[0].size());
+ fetchAndTest(datastore, 0, a[0].c_str(), a[0].size());
+ datastore.write(2, 0, a[1].c_str(), a[1].size());
+ fetchAndTest(datastore, 0, a[1].c_str(), a[1].size());
+ fetchAndTest(datastore, 1, NULL, 0);
+ datastore.remove(3, 0);
+ fetchAndTest(datastore, 0, "", 0);
+
+ SerialNum lastSyncToken(0);
+ for(size_t i=0; i < 100; i++) {
+ datastore.write(i+4, i, a[i%2].c_str(), a[i%2].size());
+ assert(i +4 > lastSyncToken);
+ lastSyncToken = i + 4;
+ fetchAndTest(datastore, i, a[i%2].c_str(), a[i%2].size());
+ }
+ for(size_t i=0; i < 100; i++) {
+ fetchAndTest(datastore, i, a[i%2].c_str(), a[i%2].size());
+ }
+ EXPECT_EQUAL(datastore.getDiskFootprint(),
+ 2711ul + headerFootprint);
+ EXPECT_EQUAL(datastore.getDiskBloat(), 0ul);
+ EXPECT_EQUAL(datastore.getMaxCompactGain(), 0ul);
+ datastore.flush(datastore.initFlush(lastSyncToken));
+ }
+ {
+ DummyFileHeaderContext fileHeaderContext;
+ vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024);
+ MyTlSyncer tlSyncer;
+ LogDataStore datastore(executor, "empty", config,
+ GrowStrategy(), TuneFileSummary(),
+ fileHeaderContext, tlSyncer, NULL);
+ size_t headerFootprint = datastore.getDiskHeaderFootprint();
+ EXPECT_LESS(0u, headerFootprint);
+ EXPECT_EQUAL(4944ul + headerFootprint, datastore.getDiskFootprint());
+ EXPECT_EQUAL(0ul, datastore.getDiskBloat());
+ EXPECT_EQUAL(0ul, datastore.getMaxCompactGain());
+
+ for(size_t i=0; i < 100; i++) {
+ fetchAndTest(datastore, i, a[i%2].c_str(), a[i%2].size());
+ }
+ for(size_t i=0; i < 100; i++) {
+ datastore.write(i+3+100, i, a[(i+1)%2].c_str(), a[(i+1)%2].size());
+ fetchAndTest(datastore, i, a[(i+1)%2].c_str(), a[(i+1)%2].size());
+ }
+ for(size_t i=0; i < 100; i++) {
+ fetchAndTest(datastore, i, a[(i+1)%2].c_str(), a[(i+1)%2].size());
+ }
+
+ EXPECT_EQUAL(7594ul + headerFootprint, datastore.getDiskFootprint());
+ EXPECT_EQUAL(0ul, datastore.getDiskBloat());
+ EXPECT_EQUAL(0ul, datastore.getMaxCompactGain());
+ }
+ FastOS_File::EmptyAndRemoveDirectory("empty");
+}
+
+TEST("requireThatSyncTokenIsUpdatedAfterFlush") {
+#if 0
+ std::string file = "sync.dat";
+ FastOS_File::Delete(file.c_str());
+ {
+ vespalib::DataBuffer buf;
+ SimpleDataStore store(file);
+ EXPECT_EQUAL(0u, store.lastSyncToken());
+ makeData(buf, 10);
+ store.write(0, buf, 10);
+ store.flush(4);
+ EXPECT_EQUAL(4u, store.lastSyncToken());
+ }
+ FastOS_File::Delete(file.c_str());
+#endif
+}
+
+class GuardDirectory {
+public:
+ GuardDirectory(const vespalib::string & dir) : _dir(dir)
+ {
+ FastOS_File::EmptyAndRemoveDirectory(_dir.c_str());
+ EXPECT_TRUE(FastOS_File::MakeDirectory(_dir.c_str()));
+ }
+ ~GuardDirectory() {
+ FastOS_File::EmptyAndRemoveDirectory(_dir.c_str());
+ }
+ const vespalib::string & getDir() const { return _dir; }
+private:
+ vespalib::string _dir;
+};
+
+TEST("requireThatFlushTimeIsAvailableAfterFlush") {
+ GuardDirectory testDir("flushtime");
+ fastos::TimeStamp before(fastos::ClockSystem::now());
+ DummyFileHeaderContext fileHeaderContext;
+ LogDataStore::Config config;
+ vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024);
+ MyTlSyncer tlSyncer;
+ LogDataStore store(executor,
+ testDir.getDir(),
+ config,
+ GrowStrategy(),
+ TuneFileSummary(),
+ fileHeaderContext,
+ tlSyncer,
+ NULL);
+ EXPECT_EQUAL(0, store.getLastFlushTime().time());
+ uint64_t flushToken = store.initFlush(5);
+ EXPECT_EQUAL(5u, flushToken);
+ store.flush(flushToken);
+ fastos::TimeStamp after(fastos::ClockSystem::now());
+ // the file name of the dat file is 'magic', using the clock instead of stating the file
+ EXPECT_LESS_EQUAL(before.time(), store.getLastFlushTime().time());
+ EXPECT_GREATER_EQUAL(after.time(), store.getLastFlushTime().time());
+}
+
+TEST("requireThatChunksObeyLimits") {
+ Chunk c(0, Chunk::Config(256, 2));
+ EXPECT_TRUE(c.hasRoom(1000)); // At least 1 is allowed no matter what the size is.
+ c.append(1, "abc", 3);
+ EXPECT_TRUE(c.hasRoom(229));
+ EXPECT_FALSE(c.hasRoom(230));
+ c.append(2, "abc", 3);
+ EXPECT_FALSE(c.hasRoom(20));
+}
+
+TEST("requireThatChunkCanProduceUniqueList") {
+ const char *d = "ABCDEF";
+ Chunk c(0, Chunk::Config(100, 20));
+ c.append(1, d, 1);
+ c.append(2, d, 2);
+ c.append(3, d, 3);
+ c.append(2, d, 4);
+ c.append(1, d, 5);
+ EXPECT_EQUAL(5u, c.count());
+ const Chunk::LidList & all = c.getLids();
+ EXPECT_EQUAL(5u, all.size());
+ Chunk::LidList unique = c.getUniqueLids();
+ EXPECT_EQUAL(3u, unique.size());
+ EXPECT_EQUAL(1u, unique[0].getLid());
+ EXPECT_EQUAL(5u, unique[0].netSize());
+ EXPECT_EQUAL(2u, unique[1].getLid());
+ EXPECT_EQUAL(4u, unique[1].netSize());
+ EXPECT_EQUAL(3u, unique[2].getLid());
+ EXPECT_EQUAL(3u, unique[2].netSize());
+}
+
+void testChunkFormat(ChunkFormat & cf, size_t expectedLen, const vespalib::string & expectedContent)
+{
+ document::CompressionConfig cfg;
+ uint64_t MAGIC_CONTENT(0xabcdef9876543210);
+ cf.getBuffer() << MAGIC_CONTENT;
+ vespalib::DataBuffer buffer;
+ cf.pack(7, buffer, cfg);
+ EXPECT_EQUAL(expectedLen, buffer.getDataLen());
+ std::ostringstream os;
+ os << vespalib::HexDump(buffer.getData(), buffer.getDataLen());
+ EXPECT_EQUAL(expectedContent, os.str());
+}
+
+TEST("requireThatChunkFormatsDoesNotChangeBetweenReleases") {
+ ChunkFormatV1 v1(10);
+ testChunkFormat(v1, 26, "26 000000000010ABCDEF987654321000000000000000079CF5E79B");
+ ChunkFormatV2 v2(10);
+ testChunkFormat(v2, 34, "34 015BA32DE7000000220000000010ABCDEF987654321000000000000000074D000694");
+}
+
+class DummyBucketizer : public IBucketizer
+{
+public:
+ DummyBucketizer(uint32_t mod) : _mod(mod) { }
+ uint64_t getBucketOf(const vespalib::GenerationHandler::Guard &, uint32_t lid) const override {
+ return lid%_mod;
+ }
+ vespalib::GenerationHandler::Guard getGuard() const override {
+ return vespalib::GenerationHandler::Guard();
+ }
+private:
+ uint32_t _mod;
+};
+
+TEST("testBucketDensityComputer") {
+ DummyBucketizer bucketizer(100);
+ BucketDensityComputer bdc(&bucketizer);
+ vespalib::GenerationHandler::Guard guard = bdc.getGuard();
+ EXPECT_EQUAL(0u, bdc.getNumBuckets());
+ bdc.recordLid(guard, 1, 1);
+ EXPECT_EQUAL(1u, bdc.getNumBuckets());
+ bdc.recordLid(guard, 2, 1);
+ EXPECT_EQUAL(2u, bdc.getNumBuckets());
+ bdc.recordLid(guard, 3, 1);
+ EXPECT_EQUAL(3u, bdc.getNumBuckets());
+ bdc.recordLid(guard, 2, 1);
+ EXPECT_EQUAL(3u, bdc.getNumBuckets());
+ bdc.recordLid(guard, 4, 0);
+ EXPECT_EQUAL(3u, bdc.getNumBuckets());
+ bdc.recordLid(guard, 4, 1);
+ EXPECT_EQUAL(4u, bdc.getNumBuckets());
+
+ BucketDensityComputer nonRecording(nullptr);
+ guard = nonRecording.getGuard();
+ EXPECT_EQUAL(0u, nonRecording.getNumBuckets());
+ nonRecording.recordLid(guard, 1, 1);
+ EXPECT_EQUAL(0u, nonRecording.getNumBuckets());
+}
+
+TEST_MAIN() {
+ DummyFileHeaderContext::setCreator("logdatastore_test");
+ TEST_RUN_ALL();
+}
diff --git a/searchlib/src/tests/datastore/logdatastore_test.sh b/searchlib/src/tests/datastore/logdatastore_test.sh
new file mode 100755
index 00000000000..46455e1fae9
--- /dev/null
+++ b/searchlib/src/tests/datastore/logdatastore_test.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+cp -r bug-7257706 bug-7257706-truncated
+mkdir dangling-test
+cp bug-7257706/*.dat dangling-test/
+cp bug-7257706/*.idx dangling-test/
+cp dangling/*.dat dangling-test/
+cp dangling/*.idx dangling-test/
+truncate --size 3830 bug-7257706-truncated/1422358701368384000.idx
+VESPA_LOG_TARGET=file:vlog2.txt $VALGRIND ./searchlib_logdatastore_test_app
+rm -rf bug-7257706-truncated dangling-test
diff --git a/searchlib/src/tests/diskindex/bitvector/.gitignore b/searchlib/src/tests/diskindex/bitvector/.gitignore
new file mode 100644
index 00000000000..32b1b86e1e5
--- /dev/null
+++ b/searchlib/src/tests/diskindex/bitvector/.gitignore
@@ -0,0 +1,6 @@
+.depend
+Makefile
+bitvector_test
+dump
+/bitvector_test-diskindex
+searchlib_bitvector_test-diskindex_app
diff --git a/searchlib/src/tests/diskindex/bitvector/CMakeLists.txt b/searchlib/src/tests/diskindex/bitvector/CMakeLists.txt
new file mode 100644
index 00000000000..27c03b483ab
--- /dev/null
+++ b/searchlib/src/tests/diskindex/bitvector/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_bitvector_test-diskindex_app
+ SOURCES
+ bitvector_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_bitvector_test-diskindex_app COMMAND searchlib_bitvector_test-diskindex_app)
diff --git a/searchlib/src/tests/diskindex/bitvector/DESC b/searchlib/src/tests/diskindex/bitvector/DESC
new file mode 100644
index 00000000000..313f0f89f2a
--- /dev/null
+++ b/searchlib/src/tests/diskindex/bitvector/DESC
@@ -0,0 +1 @@
+bitvector test. Take a look at bitvector_test.cpp for details.
diff --git a/searchlib/src/tests/diskindex/bitvector/FILES b/searchlib/src/tests/diskindex/bitvector/FILES
new file mode 100644
index 00000000000..a2583d74519
--- /dev/null
+++ b/searchlib/src/tests/diskindex/bitvector/FILES
@@ -0,0 +1 @@
+bitvector_test.cpp
diff --git a/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp
new file mode 100644
index 00000000000..bf95e3d56a6
--- /dev/null
+++ b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp
@@ -0,0 +1,221 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("bitvector_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/diskindex/bitvectordictionary.h>
+#include <vespa/searchlib/diskindex/fieldwriter.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/vespalib/io/fileutil.h>
+
+using namespace search::index;
+
+namespace search {
+namespace diskindex {
+
+struct FieldWriterWrapper
+{
+ FieldWriter _writer;
+
+ FieldWriterWrapper(uint32_t docIdLimit, uint64_t numWordIds);
+
+ FieldWriterWrapper &
+ newWord(const vespalib::stringref &word);
+
+ FieldWriterWrapper &
+ add(uint32_t docId);
+
+ bool
+ open(const std::string &path,
+ const Schema &schema,
+ const uint32_t indexId,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const common::FileHeaderContext &fileHeaderContext);
+};
+
+
+FieldWriterWrapper::FieldWriterWrapper(uint32_t docIdLimit, uint64_t numWordIds)
+ : _writer(docIdLimit, numWordIds)
+{
+}
+
+bool
+FieldWriterWrapper::open(const std::string &path,
+ const Schema &schema,
+ const uint32_t indexId,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const common::FileHeaderContext &fileHeaderContext)
+{
+ vespalib::mkdir(path, false);
+ _writer.earlyOpen(path, 64, 10000, false, schema, indexId, tuneFileWrite);
+ return _writer.lateOpen(tuneFileWrite, fileHeaderContext);
+}
+
+FieldWriterWrapper &
+FieldWriterWrapper::newWord(const vespalib::stringref &word)
+{
+ _writer.newWord(word);
+ return *this;
+}
+
+
+FieldWriterWrapper &
+FieldWriterWrapper::add(uint32_t docId)
+{
+ DocIdAndFeatures daf;
+ daf._docId = docId;
+ daf._elements.push_back(WordDocElementFeatures(0));
+ daf._elements.back().setNumOccs(1);
+ daf._wordPositions.push_back(WordDocElementWordPosFeatures(0));
+ //LOG(info, "add(%" PRIu64 ", %u)", wordNum, docId);
+ _writer.add(daf);
+ return *this;
+}
+
+class Test : public vespalib::TestApp
+{
+private:
+ Schema _schema;
+ uint32_t _indexId;
+public:
+ void
+ requireThatDictionaryHandlesNoEntries(bool directio, bool readmmap);
+
+ void
+ requireThatDictionaryHandlesMultipleEntries(bool directio, bool readmmap);
+
+ Test();
+ int Main();
+};
+
+void
+Test::requireThatDictionaryHandlesNoEntries(bool directio, bool readmmap)
+{
+ TuneFileSeqWrite tuneFileWrite;
+ TuneFileRandRead tuneFileRead;
+ DummyFileHeaderContext fileHeaderContext;
+
+ if (directio) {
+ tuneFileWrite.setWantDirectIO();
+ tuneFileRead.setWantDirectIO();
+ }
+ if (readmmap)
+ tuneFileRead.setWantMemoryMap();
+ FieldWriterWrapper fww(5, 2);
+ vespalib::mkdir("dump", false);
+ EXPECT_TRUE(fww.open("dump/1/", _schema, _indexId, tuneFileWrite,
+ fileHeaderContext));
+ fww.newWord("1").add(1);
+ fww.newWord("2").add(2).add(3);
+ EXPECT_TRUE(fww._writer.close());
+
+ BitVectorDictionary dict;
+ BitVectorKeyScope bvScope(BitVectorKeyScope::PERFIELD_WORDS);
+ EXPECT_TRUE(dict.open("dump/1/", tuneFileRead, bvScope));
+ EXPECT_EQUAL(5u, dict.getDocIdLimit());
+ EXPECT_EQUAL(0u, dict.getEntries().size());
+ EXPECT_TRUE(dict.lookup(1).get() == NULL);
+ EXPECT_TRUE(dict.lookup(2).get() == NULL);
+}
+
+void
+Test::requireThatDictionaryHandlesMultipleEntries(bool directio, bool readmmap)
+{
+ TuneFileSeqWrite tuneFileWrite;
+ TuneFileRandRead tuneFileRead;
+ DummyFileHeaderContext fileHeaderContext;
+
+ if (directio) {
+ tuneFileWrite.setWantDirectIO();
+ tuneFileRead.setWantDirectIO();
+ }
+ if (readmmap)
+ tuneFileRead.setWantMemoryMap();
+ FieldWriterWrapper fww(64, 6);
+ EXPECT_TRUE(fww.open("dump/2/", _schema, _indexId, tuneFileWrite,
+ fileHeaderContext));
+ // must have >16 docs in order to create bitvector for a word
+ // 17 docs for word 1
+ BitVector::UP bv1exp(BitVector::create(64));
+ fww.newWord("1");
+ for (uint32_t docId = 1; docId < 18; ++docId) {
+ fww.add(docId);
+ bv1exp->setBit(docId);
+ }
+ fww.newWord("2").add(1);
+ // 16 docs for word 3
+ fww.newWord("3");
+ for (uint32_t docId = 1; docId < 17; ++docId) {
+ fww.add(docId);
+ }
+ fww.newWord("4").add(1);
+ // 23 docs for word 5
+ BitVector::UP bv5exp(BitVector::create(64));
+ fww.newWord("5");
+ for (uint32_t docId = 1; docId < 24; ++docId) {
+ fww.add(docId * 2);
+ bv5exp->setBit(docId * 2);
+ }
+ fww.newWord("6").add(1);
+ EXPECT_TRUE(fww._writer.close());
+
+ BitVectorDictionary dict;
+ BitVectorKeyScope bvScope(BitVectorKeyScope::PERFIELD_WORDS);
+ EXPECT_TRUE(dict.open("dump/2/", tuneFileRead, bvScope));
+ EXPECT_EQUAL(64u, dict.getDocIdLimit());
+ EXPECT_EQUAL(2u, dict.getEntries().size());
+
+ BitVectorWordSingleKey e;
+ e = dict.getEntries()[0];
+ EXPECT_EQUAL(1u, e._wordNum);
+ EXPECT_EQUAL(17u, e._numDocs);
+ e = dict.getEntries()[1];
+ EXPECT_EQUAL(5u, e._wordNum);
+ EXPECT_EQUAL(23u, e._numDocs);
+
+ EXPECT_TRUE(dict.lookup(2).get() == NULL);
+ EXPECT_TRUE(dict.lookup(3).get() == NULL);
+ EXPECT_TRUE(dict.lookup(4).get() == NULL);
+ EXPECT_TRUE(dict.lookup(6).get() == NULL);
+
+ BitVector::UP bv1act = dict.lookup(1);
+ EXPECT_TRUE(bv1act.get() != NULL);
+ EXPECT_TRUE(*bv1exp == *bv1act);
+
+ BitVector::UP bv5act = dict.lookup(5);
+ EXPECT_TRUE(bv5act.get() != NULL);
+ EXPECT_TRUE(*bv5exp == *bv5act);
+}
+
+Test::Test()
+ : _schema(),
+ _indexId(0)
+{
+ _schema.addIndexField(Schema::IndexField("f1", Schema::STRING));
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("bitvector_test");
+
+ TuneFileSeqWrite tuneFileWrite;
+ TuneFileRandRead tuneFileRead;
+
+ if (_argc > 0) {
+ DummyFileHeaderContext::setCreator(_argv[0]);
+ }
+ TEST_DO(requireThatDictionaryHandlesNoEntries(false, false));
+ TEST_DO(requireThatDictionaryHandlesMultipleEntries(false, false));
+ TEST_DO(requireThatDictionaryHandlesNoEntries(true, false));
+ TEST_DO(requireThatDictionaryHandlesMultipleEntries(true, false));
+ TEST_DO(requireThatDictionaryHandlesNoEntries(false, true));
+ TEST_DO(requireThatDictionaryHandlesMultipleEntries(false, true));
+
+ TEST_DONE();
+}
+
+}
+}
+
+TEST_APPHOOK(search::diskindex::Test);
diff --git a/searchlib/src/tests/diskindex/diskindex/.gitignore b/searchlib/src/tests/diskindex/diskindex/.gitignore
new file mode 100644
index 00000000000..58819f1c4bb
--- /dev/null
+++ b/searchlib/src/tests/diskindex/diskindex/.gitignore
@@ -0,0 +1,5 @@
+.depend
+Makefile
+diskindex_test
+index
+searchlib_diskindex_test_app
diff --git a/searchlib/src/tests/diskindex/diskindex/CMakeLists.txt b/searchlib/src/tests/diskindex/diskindex/CMakeLists.txt
new file mode 100644
index 00000000000..7cee100f534
--- /dev/null
+++ b/searchlib/src/tests/diskindex/diskindex/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_diskindex_test_app
+ SOURCES
+ diskindex_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_diskindex_test_app COMMAND searchlib_diskindex_test_app)
diff --git a/searchlib/src/tests/diskindex/diskindex/DESC b/searchlib/src/tests/diskindex/diskindex/DESC
new file mode 100644
index 00000000000..fc14faaca7a
--- /dev/null
+++ b/searchlib/src/tests/diskindex/diskindex/DESC
@@ -0,0 +1 @@
+diskindex test. Take a look at diskindex_test.cpp for details.
diff --git a/searchlib/src/tests/diskindex/diskindex/FILES b/searchlib/src/tests/diskindex/diskindex/FILES
new file mode 100644
index 00000000000..54eef52f856
--- /dev/null
+++ b/searchlib/src/tests/diskindex/diskindex/FILES
@@ -0,0 +1 @@
+diskindex_test.cpp
diff --git a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp
new file mode 100644
index 00000000000..a8972d2a289
--- /dev/null
+++ b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp
@@ -0,0 +1,330 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/searchlib/diskindex/disktermblueprint.h>
+#include <vespa/searchlib/test/diskindex/testdiskindex.h>
+#include <vespa/searchlib/test/initrange.h>
+#include <vespa/searchlib/test/fakedata/fakeword.h>
+#include <vespa/searchlib/diskindex/zcposocciterators.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
+#include <vespa/searchlib/queryeval/leaf_blueprints.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/vespalib/io/fileutil.h>
+#include <iostream>
+#include <set>
+#include <vespa/searchlib/test/fakedata/fpfactory.h>
+
+LOG_SETUP("diskindex_test");
+
+using search::BitVectorIterator;
+using namespace search::fef;
+using namespace search::index;
+using namespace search::query;
+using namespace search::queryeval;
+using namespace search::queryeval::blueprint;
+using search::test::InitRangeVerifier;
+using namespace search::fakedata;
+
+namespace search {
+namespace diskindex {
+
+typedef DiskIndex::LookupResult LookupResult;
+
+std::string
+toString(SearchIterator & sb)
+{
+ std::ostringstream oss;
+ bool first = true;
+ for (sb.seek(1u); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) {
+ if (!first) oss << ",";
+ oss << sb.getDocId();
+ first = false;
+ }
+ return oss.str();
+}
+
+SimpleStringTerm
+makeTerm(const std::string & term)
+{
+ return SimpleStringTerm(term, "field", 0, search::query::Weight(0));
+}
+
+class Test : public vespalib::TestApp, public TestDiskIndex {
+private:
+ FakeRequestContext _requestContext;
+
+ void requireThatLookupIsWorking(bool fieldEmpty, bool docEmpty, bool wordEmpty);
+ void requireThatWeCanReadPostingList();
+ void requireThatWeCanReadBitVector();
+ void requireThatBlueprintIsCreated();
+ void requireThatBlueprintCanCreateSearchIterators();
+ void requireThatInitRangeConforms();
+public:
+ Test();
+ int Main();
+};
+
+void
+Test::requireThatInitRangeConforms()
+{
+ InitRangeVerifier ir;
+ Schema schema;
+ schema.addIndexField(Schema::IndexField("a", Schema::DataType::STRING));
+ bitcompression::PosOccFieldsParams params;
+ params.setSchemaParams(schema, 0);
+ search::fakedata::FakeWord fw(ir.getDocIdLimit(), ir.getExpectedDocIds(), "a", params, 0);
+ TermFieldMatchData md;
+ TermFieldMatchDataArray tfmda;
+ tfmda.add(&md);
+ std::vector<const FakeWord *> v;
+ v.push_back(&fw);
+ std::set<std::string> ignored = { "MemTreeOcc", "MemTreeOcc2",
+ "FilterOcc", "ZcFilterOcc",
+ "ZcNoSkipFilterOcc", "ZcSkipFilterOcc",
+ "ZcbFilterOcc",
+ "EGCompr64FilterOcc", "EGCompr64LEFilterOcc",
+ "EGCompr64NoSkipFilterOcc", "EGCompr64SkipFilterOcc" };
+ for (auto postingType : search::fakedata::getPostingTypes()) {
+ if (ignored.find(postingType) == ignored.end()) {
+ std::cerr << "Verifying " << postingType << std::endl;
+ std::unique_ptr<FPFactory> ff(getFPFactory(postingType, schema));
+ ff->setup(v);
+ FakePosting::SP f(ff->make(fw));
+ TEST_DO(ir.verify(f->createIterator(tfmda)));
+ }
+ }
+}
+
+void
+Test::requireThatLookupIsWorking(bool fieldEmpty,
+ bool docEmpty,
+ bool wordEmpty)
+{
+ uint32_t f1(_schema.getIndexFieldId("f1"));
+ uint32_t f2(_schema.getIndexFieldId("f2"));
+ uint32_t f3(_schema.getIndexFieldId("f3"));
+ LookupResult::UP r;
+ r = _index->lookup(f1, "not");
+ EXPECT_TRUE(!r || r->counts._numDocs == 0);
+ r = _index->lookup(f1, "w1not");
+ EXPECT_TRUE(!r || r->counts._numDocs == 0);
+ r = _index->lookup(f1, "wnot");
+ EXPECT_TRUE(!r || r->counts._numDocs == 0);
+ { // field 'f1'
+ r = _index->lookup(f1, "w1");
+ if (wordEmpty || fieldEmpty || docEmpty) {
+ EXPECT_TRUE(!r || r->counts._numDocs == 0);
+ } else {
+ EXPECT_EQUAL(1u, r->wordNum);
+ EXPECT_EQUAL(2u, r->counts._numDocs);
+ }
+ r = _index->lookup(f1, "w2");
+ EXPECT_TRUE(!r || r->counts._numDocs == 0);
+ }
+ { // field 'f2'
+ r = _index->lookup(f2, "w1");
+ if (wordEmpty || fieldEmpty || docEmpty) {
+ EXPECT_TRUE(!r || r->counts._numDocs == 0);
+ } else {
+ EXPECT_EQUAL(1u, r->wordNum);
+ EXPECT_EQUAL(3u, r->counts._numDocs);
+ }
+ r = _index->lookup(f2, "w2");
+ if (wordEmpty || fieldEmpty || docEmpty) {
+ EXPECT_TRUE(!r || r->counts._numDocs == 0);
+ } else {
+ EXPECT_EQUAL(2u, r->wordNum);
+ EXPECT_EQUAL(17u, r->counts._numDocs);
+ }
+ }
+ { // field 'f3' doesn't exist
+ r = _index->lookup(f3, "w1");
+ EXPECT_TRUE(!r || r->counts._numDocs == 0);
+ r = _index->lookup(f3, "w2");
+ EXPECT_TRUE(!r || r->counts._numDocs == 0);
+ }
+}
+
+void
+Test::requireThatWeCanReadPostingList()
+{
+ TermFieldMatchDataArray mda;
+ { // field 'f1'
+ LookupResult::UP r = _index->lookup(0, "w1");
+ PostingListHandle::UP h = _index->readPostingList(*r);
+ SearchIterator * sb = h->createIterator(r->counts, mda);
+ sb->initFullRange();
+ EXPECT_EQUAL("1,3", toString(*sb));
+ delete sb;
+ }
+}
+
+void
+Test::requireThatWeCanReadBitVector()
+{
+ { // word 'w1'
+ LookupResult::UP r = _index->lookup(1, "w1");
+ // not bit vector for 'w1'
+ EXPECT_TRUE(_index->readBitVector(*r).get() == NULL);
+ }
+ { // word 'w2'
+ BitVector::UP exp(BitVector::create(32));
+ for (uint32_t docId = 1; docId < 18; ++docId) exp->setBit(docId);
+ { // field 'f2'
+ LookupResult::UP r =
+ _index->lookup(1, "w2");
+ BitVector::UP bv = _index->readBitVector(*r);
+ EXPECT_TRUE(bv.get() != NULL);
+ EXPECT_TRUE(*bv == *exp);
+ }
+ }
+}
+
+void
+Test::requireThatBlueprintIsCreated()
+{
+ { // unknown field
+ Blueprint::UP b =
+ _index->createBlueprint(_requestContext, FieldSpec("none", 0, 0), makeTerm("w1"));
+ EXPECT_TRUE(dynamic_cast<EmptyBlueprint *>(b.get()) != NULL);
+ }
+ { // unknown word
+ Blueprint::UP b =
+ _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("none"));
+ EXPECT_TRUE(dynamic_cast<EmptyBlueprint *>(b.get()) != NULL);
+ }
+ { // known field & word with hits
+ Blueprint::UP b =
+ _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w1"));
+ EXPECT_TRUE(dynamic_cast<DiskTermBlueprint *>(b.get()) != NULL);
+ EXPECT_EQUAL(2u, b->getState().estimate().estHits);
+ EXPECT_TRUE(!b->getState().estimate().empty);
+ }
+ { // known field & word without hits
+ Blueprint::UP b =
+ _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w2"));
+// std::cerr << "BP = " << typeid(*b).name() << std::endl;
+ EXPECT_TRUE((dynamic_cast<DiskTermBlueprint *>(b.get()) != NULL) ||
+ (dynamic_cast<EmptyBlueprint *>(b.get()) != NULL));
+ EXPECT_EQUAL(0u, b->getState().estimate().estHits);
+ EXPECT_TRUE(b->getState().estimate().empty);
+ }
+}
+
+void
+Test::requireThatBlueprintCanCreateSearchIterators()
+{
+ TermFieldMatchData md;
+ TermFieldMatchDataArray mda;
+ mda.add(&md);
+ Blueprint::UP b;
+ SearchIterator::UP s;
+ { // bit vector due to isFilter
+ b = _index->createBlueprint(_requestContext, FieldSpec("f2", 0, 0, true), makeTerm("w2"));
+ b->fetchPostings(true);
+ s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda, true);
+ EXPECT_TRUE(dynamic_cast<BitVectorIterator *>(s.get()) != NULL);
+ }
+ { // bit vector due to no ranking needed
+ b = _index->createBlueprint(_requestContext, FieldSpec("f2", 0, 0, false), makeTerm("w2"));
+ b->fetchPostings(true);
+ s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda, true);
+ EXPECT_FALSE(dynamic_cast<BitVectorIterator *>(s.get()) != NULL);
+ TermFieldMatchData md2;
+ md2.tagAsNotNeeded();
+ TermFieldMatchDataArray mda2;
+ mda2.add(&md2);
+ EXPECT_TRUE(mda2[0]->isNotNeeded());
+ s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda2, false);
+ EXPECT_TRUE(dynamic_cast<BitVectorIterator *>(s.get()) != NULL);
+ }
+ { // fake bit vector
+ b = _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0, true), makeTerm("w2"));
+// std::cerr << "BP = " << typeid(*b).name() << std::endl;
+ b->fetchPostings(true);
+ s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda, true);
+// std::cerr << "SI = " << typeid(*s).name() << std::endl;
+ EXPECT_TRUE((dynamic_cast<BooleanMatchIteratorWrapper *>(s.get()) != NULL) ||
+ dynamic_cast<EmptySearch *>(s.get()));
+ }
+ { // posting list iterator
+ b = _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w1"));
+ b->fetchPostings(true);
+ s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda, true);
+ ASSERT_TRUE(dynamic_cast<Zc4RareWordPosOccIterator<true> *>(s.get()) != NULL);
+ }
+}
+
+Test::Test() :
+ TestDiskIndex()
+{
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("diskindex_test");
+
+ if (_argc > 0) {
+ DummyFileHeaderContext::setCreator(_argv[0]);
+ }
+
+ vespalib::mkdir("index", false);
+ TEST_DO(openIndex("index/1fedewe", false, false, true, true, true));
+ TEST_DO(requireThatLookupIsWorking(true, true, true));
+ TEST_DO(openIndex("index/1fede", false, false, true, true, false));
+ TEST_DO(requireThatLookupIsWorking(true, true, false));
+ TEST_DO(openIndex("index/1fewe", false, false, true, false, true));
+ TEST_DO(requireThatLookupIsWorking(true, false, true));
+ TEST_DO(openIndex("index/1fe", false, false, true, false, false));
+ TEST_DO(requireThatLookupIsWorking(true, false, false));
+ buildSchema();
+ TEST_DO(openIndex("index/1dewe", false, false, false, true, true));
+ TEST_DO(requireThatLookupIsWorking(false, true, true));
+ TEST_DO(openIndex("index/1de", false, false, false, true, false));
+ TEST_DO(requireThatLookupIsWorking(false, true, false));
+ TEST_DO(openIndex("index/1we", false, false, false, false, true));
+ TEST_DO(requireThatLookupIsWorking(false, false, true));
+ TEST_DO(openIndex("index/1", false, false, false, false, false));
+ TEST_DO(requireThatLookupIsWorking(false, false, false));
+ TEST_DO(requireThatWeCanReadPostingList());
+ TEST_DO(requireThatWeCanReadBitVector());
+ TEST_DO(requireThatBlueprintIsCreated());
+ TEST_DO(requireThatBlueprintCanCreateSearchIterators());
+
+ TEST_DO(openIndex("index/2", true, false, false, false, false));
+ TEST_DO(requireThatLookupIsWorking(false, false, false));
+ TEST_DO(requireThatWeCanReadPostingList());
+ TEST_DO(requireThatWeCanReadBitVector());
+ TEST_DO(requireThatBlueprintIsCreated());
+ TEST_DO(requireThatBlueprintCanCreateSearchIterators());
+
+ TEST_DO(openIndex("index/3", false, true, false, false, false));
+ TEST_DO(requireThatLookupIsWorking(false, false, false));
+ TEST_DO(requireThatWeCanReadPostingList());
+ TEST_DO(requireThatWeCanReadBitVector());
+ TEST_DO(requireThatBlueprintIsCreated());
+ TEST_DO(requireThatBlueprintCanCreateSearchIterators());
+
+ TEST_DO(openIndex("index/4", true, true, false, false, false));
+ TEST_DO(requireThatLookupIsWorking(false, false, false));
+ TEST_DO(requireThatWeCanReadPostingList());
+ TEST_DO(requireThatWeCanReadBitVector());
+ TEST_DO(requireThatBlueprintIsCreated());
+ TEST_DO(requireThatBlueprintCanCreateSearchIterators());
+ TEST_DO(requireThatInitRangeConforms());
+
+ TEST_DONE();
+}
+
+}
+}
+
+TEST_APPHOOK(search::diskindex::Test);
diff --git a/searchlib/src/tests/diskindex/fieldwriter/.gitignore b/searchlib/src/tests/diskindex/fieldwriter/.gitignore
new file mode 100644
index 00000000000..bdb91bca5eb
--- /dev/null
+++ b/searchlib/src/tests/diskindex/fieldwriter/.gitignore
@@ -0,0 +1,3 @@
+/field1.f
+/index
+searchlib_fieldwriter_test_app
diff --git a/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt b/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt
new file mode 100644
index 00000000000..a03313fac35
--- /dev/null
+++ b/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_fieldwriter_test_app
+ SOURCES
+ fieldwriter_test.cpp
+ DEPENDS
+ searchlib_test
+ searchlib
+)
+vespa_add_test(NAME searchlib_fieldwriter_test_app COMMAND sh runtests.sh)
diff --git a/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp
new file mode 100644
index 00000000000..ab6be2e0801
--- /dev/null
+++ b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp
@@ -0,0 +1,972 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("fieldwriter_test");
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/common/resultset.h>
+#include <vespa/searchlib/util/rand48.h>
+#include <vespa/searchlib/test/fakedata/fakeword.h>
+#include <vespa/searchlib/test/fakedata/fakewordset.h>
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/searchlib/index/postinglisthandle.h>
+#include <vespa/searchlib/diskindex/zcposocc.h>
+#include <vespa/searchlib/diskindex/zcposoccrandread.h>
+#include <vespa/searchlib/diskindex/checkpointfile.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/index/schemautil.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/searchlib/diskindex/fieldwriter.h>
+#include <vespa/searchlib/diskindex/fieldreader.h>
+#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/searchlib/util/dirtraverse.h>
+#include <vespa/searchlib/diskindex/pagedict4file.h>
+#include <vespa/searchlib/diskindex/pagedict4randread.h>
+
+
+using search::ResultSet;
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataArray;
+using search::queryeval::SearchIterator;
+using search::fakedata::FakeWord;
+using search::fakedata::FakeWordSet;
+using search::index::PostingListParams;
+using search::index::PostingListCounts;
+using search::index::PostingListOffsetAndCounts;
+using search::index::Schema;
+using search::index::SchemaUtil;
+using search::common::FileHeaderContext;
+using search::index::DummyFileHeaderContext;
+using search::diskindex::CheckPointFile;
+using search::TuneFileSeqRead;
+using search::TuneFileSeqWrite;
+using search::TuneFileRandRead;
+using vespalib::nbostream;
+using search::diskindex::FieldWriter;
+using search::diskindex::FieldReader;
+using search::diskindex::DocIdMapping;
+using search::diskindex::WordNumMapping;
+using search::diskindex::PageDict4RandRead;
+
+// needed to resolve external symbol from httpd.h on AIX
+void FastS_block_usr2() {}
+
+namespace fieldwriter
+{
+
+uint32_t minSkipDocs = 64;
+uint32_t minChunkDocs = 262144;
+
+vespalib::string dirprefix = "index/";
+
+void
+disableSkip(void)
+{
+ minSkipDocs = 10000000;
+ minChunkDocs = 1 << 30;
+}
+
+void
+enableSkip(void)
+{
+ minSkipDocs = 64;
+ minChunkDocs = 1 << 30;
+}
+
+void
+enableSkipChunks(void)
+{
+ minSkipDocs = 64;
+ minChunkDocs = 9000; // Unrealistic low for testing
+}
+
+
+vespalib::string
+makeWordString(uint64_t wordNum)
+{
+ using AS = vespalib::asciistream;
+ AS ws;
+ ws << AS::Width(4) << AS::Fill('0') << wordNum;
+ return ws.str();
+}
+
+
+typedef std::shared_ptr<FieldReader> FieldReaderSP;
+typedef std::shared_ptr<FieldWriter> FieldWriterSP;
+
+class FieldWriterTest : public FastOS_Application
+{
+private:
+ bool _verbose;
+ uint32_t _numDocs;
+ uint32_t _commonDocFreq;
+ uint32_t _numWordsPerClass;
+ FakeWordSet _wordSet;
+ FakeWordSet _wordSet2;
+public:
+ search::Rand48 _rnd;
+
+private:
+ void Usage(void);
+ void testFake(const std::string &postingType, FakeWord &fw);
+public:
+ FieldWriterTest(void);
+ ~FieldWriterTest(void);
+ int Main(void);
+};
+
+
+void
+FieldWriterTest::Usage(void)
+{
+ printf("fieldwriter_test "
+ "[-c <commonDocFreq>] "
+ "[-d <numDocs>] "
+ "[-v] "
+ "[-w <numWordPerClass>]\n");
+}
+
+
+FieldWriterTest::FieldWriterTest(void)
+ : _verbose(false),
+ _numDocs(3000000),
+ _commonDocFreq(50000),
+ _numWordsPerClass(6),
+ _wordSet(),
+ _wordSet2(),
+ _rnd()
+{
+}
+
+
+FieldWriterTest::~FieldWriterTest(void)
+{
+}
+
+
+class WrappedFieldWriter : public search::fakedata::CheckPointCallback
+{
+public:
+ FieldWriterSP _fieldWriter;
+private:
+ bool _dynamicK;
+ uint32_t _numWordIds;
+ uint32_t _docIdLimit;
+ vespalib::string _namepref;
+ Schema _schema;
+ uint32_t _indexId;
+
+public:
+
+ WrappedFieldWriter(const vespalib::string &namepref,
+ bool dynamicK,
+ uint32_t numWordIds,
+ uint32_t docIdLimit);
+
+ virtual void
+ checkPoint(void) override;
+
+ void
+ earlyOpen(void);
+
+ void
+ lateOpen(void);
+
+ void
+ open(void);
+
+ void
+ close(void);
+
+ void
+ writeCheckPoint(void);
+
+ void
+ readCheckPoint(bool first);
+};
+
+
+WrappedFieldWriter::WrappedFieldWriter(const vespalib::string &namepref,
+ bool dynamicK,
+ uint32_t numWordIds,
+ uint32_t docIdLimit)
+ : _fieldWriter(),
+ _dynamicK(dynamicK),
+ _numWordIds(numWordIds),
+ _docIdLimit(docIdLimit),
+ _namepref(dirprefix + namepref),
+ _schema(),
+ _indexId()
+{
+ Schema::CollectionType ct(Schema::SINGLE);
+ _schema.addIndexField(Schema::IndexField("field1", Schema::STRING, ct));
+ _indexId = _schema.getIndexFieldId("field1");
+}
+
+
+void
+WrappedFieldWriter::earlyOpen(void)
+{
+ TuneFileSeqWrite tuneFileWrite;
+ _fieldWriter.reset(new FieldWriter(_docIdLimit, _numWordIds));
+ _fieldWriter->earlyOpen(_namepref,
+ minSkipDocs, minChunkDocs, _dynamicK, _schema,
+ _indexId,
+ tuneFileWrite);
+}
+
+
+void
+WrappedFieldWriter::lateOpen(void)
+{
+ TuneFileSeqWrite tuneFileWrite;
+ DummyFileHeaderContext fileHeaderContext;
+ fileHeaderContext.disableFileName();
+ _fieldWriter->lateOpen(tuneFileWrite, fileHeaderContext);
+}
+
+
+void
+WrappedFieldWriter::open(void)
+{
+ earlyOpen();
+ lateOpen();
+}
+
+
+void
+WrappedFieldWriter::close(void)
+{
+ _fieldWriter->close();
+ _fieldWriter.reset();
+}
+
+
+void
+WrappedFieldWriter::writeCheckPoint(void)
+{
+ CheckPointFile chkptfile("chkpt");
+ nbostream out;
+ _fieldWriter->checkPointWrite(out);
+ chkptfile.write(out, DummyFileHeaderContext());
+}
+
+
+void
+WrappedFieldWriter::readCheckPoint(bool first)
+{
+ CheckPointFile chkptfile("chkpt");
+ nbostream in;
+ bool openRes = chkptfile.read(in);
+ assert(first || openRes);
+ (void) first;
+ if (!openRes)
+ return;
+ _fieldWriter->checkPointRead(in);
+ assert(in.empty());
+}
+
+
+void
+WrappedFieldWriter::checkPoint(void)
+{
+ writeCheckPoint();
+ _fieldWriter.reset();
+ earlyOpen();
+ readCheckPoint(false);
+ lateOpen();
+}
+
+
+class WrappedFieldReader : public search::fakedata::CheckPointCallback
+{
+public:
+ FieldReaderSP _fieldReader;
+private:
+ std::string _namepref;
+ uint32_t _numWordIds;
+ uint32_t _docIdLimit;
+ WordNumMapping _wmap;
+ DocIdMapping _dmap;
+ Schema _oldSchema;
+ Schema _schema;
+
+public:
+ WrappedFieldReader(const vespalib::string &namepref,
+ uint32_t numWordIds,
+ uint32_t docIdLimit);
+
+ ~WrappedFieldReader(void);
+
+ void
+ earlyOpen(void);
+
+ void
+ lateOpen(void);
+
+ void
+ open(void);
+
+ void
+ close(void);
+
+ void
+ writeCheckPoint(void);
+
+ void
+ readCheckPoint(bool first);
+
+ virtual void
+ checkPoint(void) override;
+};
+
+
+WrappedFieldReader::WrappedFieldReader(const vespalib::string &namepref,
+ uint32_t numWordIds,
+ uint32_t docIdLimit)
+ : search::fakedata::CheckPointCallback(),
+ _fieldReader(),
+ _namepref(dirprefix + namepref),
+ _numWordIds(numWordIds),
+ _docIdLimit(docIdLimit),
+ _wmap(),
+ _dmap(),
+ _oldSchema(),
+ _schema()
+{
+ Schema::CollectionType ct(Schema::SINGLE);
+ _oldSchema.addIndexField(Schema::IndexField("field1",
+ Schema::STRING,
+ ct));
+ _schema.addIndexField(Schema::IndexField("field1",
+ Schema::STRING,
+ ct));
+}
+
+
+WrappedFieldReader::~WrappedFieldReader(void)
+{
+}
+
+
+void
+WrappedFieldReader::earlyOpen(void)
+{
+ TuneFileSeqRead tuneFileRead;
+ _fieldReader.reset(new FieldReader());
+ _fieldReader->earlyOpen(_namepref, tuneFileRead);
+}
+
+
+void
+WrappedFieldReader::lateOpen(void)
+{
+ TuneFileSeqRead tuneFileRead;
+ _wmap.setup(_numWordIds);
+ _dmap.setup(_docIdLimit);
+ _fieldReader->setup(_wmap, _dmap);
+ _fieldReader->lateOpen(_namepref, tuneFileRead);
+}
+
+
+void
+WrappedFieldReader::open(void)
+{
+ earlyOpen();
+ lateOpen();
+}
+
+
+void
+WrappedFieldReader::close(void)
+{
+ _fieldReader->close();
+ _fieldReader.reset();
+}
+
+
+void
+WrappedFieldReader::writeCheckPoint(void)
+{
+ CheckPointFile chkptfile("chkpt");
+ nbostream out;
+ _fieldReader->checkPointWrite(out);
+ chkptfile.write(out, DummyFileHeaderContext());
+}
+
+
+void
+WrappedFieldReader::readCheckPoint(bool first)
+{
+ CheckPointFile chkptfile("chkpt");
+ nbostream in;
+ bool openRes = chkptfile.read(in);
+ assert(first || openRes);
+ (void) first;
+ if (!openRes)
+ return;
+ _fieldReader->checkPointRead(in);
+ assert(in.empty());
+}
+
+
+void
+WrappedFieldReader::checkPoint(void)
+{
+ writeCheckPoint();
+ _fieldReader.reset();
+ earlyOpen();
+ readCheckPoint(false);
+ lateOpen();
+}
+
+
+void
+writeField(FakeWordSet &wordSet,
+ uint32_t docIdLimit,
+ const std::string &namepref,
+ bool dynamicK)
+{
+ const char *dynamicKStr = dynamicK ? "true" : "false";
+
+ FastOS_Time tv;
+ double before;
+ double after;
+
+ LOG(info,
+ "enter writeField, "
+ "namepref=%s, dynamicK=%s",
+ namepref.c_str(),
+ dynamicKStr);
+ tv.SetNow();
+ before = tv.Secs();
+ WrappedFieldWriter ostate(namepref,
+ dynamicK,
+ wordSet.getNumWords(), docIdLimit);
+ FieldWriter::remove(namepref);
+ ostate.open();
+
+ unsigned int wordNum = 1;
+ uint32_t checkPointCheck = 0;
+ uint32_t checkPointInterval = 12227;
+ for (unsigned int wc = 0; wc < wordSet._words.size(); ++wc) {
+ for (unsigned int wi = 0; wi < wordSet._words[wc].size(); ++wi) {
+ FakeWord &fw = *wordSet._words[wc][wi];
+ ostate._fieldWriter->newWord(makeWordString(wordNum));
+ fw.dump(ostate._fieldWriter, false,
+ checkPointCheck,
+ checkPointInterval,
+ NULL);
+ ++wordNum;
+ }
+ }
+ ostate.close();
+
+ tv.SetNow();
+ after = tv.Secs();
+ LOG(info,
+ "leave writeField, "
+ "namepref=%s, dynamicK=%s"
+ " elapsed=%10.6f",
+ namepref.c_str(),
+ dynamicKStr,
+ after - before);
+}
+
+
+void
+writeFieldCheckPointed(FakeWordSet &wordSet,
+ uint32_t docIdLimit,
+ const std::string &namepref,
+ bool dynamicK)
+{
+ const char *dynamicKStr = dynamicK ? "true" : "false";
+
+ FastOS_Time tv;
+ double before;
+ double after;
+ bool first = true;
+
+ LOG(info,
+ "enter writeFieldCheckPointed, "
+ "namepref=%s, dynamicK=%s",
+ namepref.c_str(),
+ dynamicKStr);
+ tv.SetNow();
+ before = tv.Secs();
+
+ unsigned int wordNum = 1;
+ uint32_t checkPointCheck = 0;
+ uint32_t checkPointInterval = 12227;
+ for (unsigned int wc = 0; wc < wordSet._words.size(); ++wc) {
+ for (unsigned int wi = 0; wi < wordSet._words[wc].size(); ++wi) {
+ FakeWord &fw = *wordSet._words[wc][wi];
+
+ WrappedFieldWriter ostate(namepref,
+ dynamicK,
+ wordSet.getNumWords(), docIdLimit);
+ ostate.earlyOpen();
+ ostate.readCheckPoint(first);
+ first = false;
+ ostate.lateOpen();
+ ostate._fieldWriter->newWord(makeWordString(wordNum));
+ fw.dump(ostate._fieldWriter, false,
+ checkPointCheck,
+ checkPointInterval,
+ &ostate);
+ ostate.writeCheckPoint();
+ ++wordNum;
+ }
+ }
+ do {
+ WrappedFieldWriter ostate(namepref,
+ dynamicK,
+ wordSet.getNumWords(), docIdLimit);
+ ostate.earlyOpen();
+ ostate.readCheckPoint(first);
+ ostate.lateOpen();
+ ostate.close();
+ } while (0);
+ CheckPointFile dropper("chkpt");
+ dropper.remove();
+
+ tv.SetNow();
+ after = tv.Secs();
+ LOG(info,
+ "leave writeFieldCheckPointed, "
+ "namepref=%s, dynamicK=%s"
+ " elapsed=%10.6f",
+ namepref.c_str(),
+ dynamicKStr,
+ after - before);
+}
+
+
+void
+readField(FakeWordSet &wordSet,
+ uint32_t docIdLimit,
+ const std::string &namepref,
+ bool dynamicK,
+ bool verbose)
+{
+ const char *dynamicKStr = dynamicK ? "true" : "false";
+
+ FastOS_Time tv;
+ double before;
+ double after;
+ WrappedFieldReader istate(namepref, wordSet.getNumWords(),
+ docIdLimit);
+ LOG(info,
+ "enter readField, "
+ "namepref=%s, dynamicK=%s",
+ namepref.c_str(),
+ dynamicKStr);
+ tv.SetNow();
+ before = tv.Secs();
+ istate.open();
+ if (istate._fieldReader->isValid())
+ istate._fieldReader->read();
+
+ TermFieldMatchData mdfield1;
+
+ unsigned int wordNum = 1;
+ uint32_t checkPointCheck = 0;
+ uint32_t checkPointInterval = 12227;
+ for (unsigned int wc = 0; wc < wordSet._words.size(); ++wc) {
+ for (unsigned int wi = 0; wi < wordSet._words[wc].size(); ++wi) {
+ FakeWord &fw = *wordSet._words[wc][wi];
+
+ TermFieldMatchDataArray tfmda;
+ tfmda.add(&mdfield1);
+
+ fw.validate(istate._fieldReader, wordNum,
+ tfmda, verbose,
+ checkPointCheck, checkPointInterval, &istate);
+ ++wordNum;
+ }
+ }
+
+ istate.close();
+ tv.SetNow();
+ after = tv.Secs();
+ CheckPointFile dropper("chkpt");
+ dropper.remove();
+ LOG(info,
+ "leave readField, "
+ "namepref=%s, dynamicK=%s"
+ " elapsed=%10.6f",
+ namepref.c_str(),
+ dynamicKStr,
+ after - before);
+}
+
+
+void
+randReadField(FakeWordSet &wordSet,
+ const std::string &namepref,
+ bool dynamicK,
+ bool verbose)
+{
+ const char *dynamicKStr = dynamicK ? "true" : "false";
+
+ FastOS_Time tv;
+ double before;
+ double after;
+ PostingListCounts counts;
+
+ LOG(info,
+ "enter randReadField,"
+ " namepref=%s, dynamicK=%s",
+ namepref.c_str(),
+ dynamicKStr);
+ tv.SetNow();
+ before = tv.Secs();
+
+ std::string cname = dirprefix + namepref;
+ cname += "dictionary";
+
+ std::unique_ptr<search::index::DictionaryFileRandRead> dictFile;
+ dictFile.reset(new PageDict4RandRead);
+
+ search::index::PostingListFileRandRead *postingFile = NULL;
+ if (dynamicK)
+ postingFile =
+ new search::diskindex::ZcPosOccRandRead;
+ else
+ postingFile =
+ new search::diskindex::Zc4PosOccRandRead;
+
+ TuneFileSeqRead tuneFileRead;
+ TuneFileRandRead tuneFileRandRead;
+ bool openCntRes = dictFile->open(cname, tuneFileRandRead);
+ assert(openCntRes);
+ (void) openCntRes;
+ vespalib::string cWord;
+
+ std::string pname = dirprefix + namepref + "posocc.dat";
+ pname += ".compressed";
+ bool openPostingRes = postingFile->open(pname, tuneFileRandRead);
+ assert(openPostingRes);
+ (void) openPostingRes;
+
+ for (int loop = 0; loop < 1; ++loop) {
+ unsigned int wordNum = 1;
+ for (unsigned int wc = 0; wc < wordSet._words.size(); ++wc) {
+ for (unsigned int wi = 0; wi < wordSet._words[wc].size(); ++wi) {
+ FakeWord &fw = *wordSet._words[wc][wi];
+
+ PostingListOffsetAndCounts offsetAndCounts;
+ uint64_t checkWordNum;
+ dictFile->lookup(makeWordString(wordNum),
+ checkWordNum,
+ offsetAndCounts);
+ assert(wordNum == checkWordNum);
+
+ counts = offsetAndCounts._counts;
+ search::index::PostingListHandle handle;
+
+ handle._bitLength = counts._bitLength;
+ handle._file = postingFile;
+ handle._bitOffset = offsetAndCounts._offset;
+
+ postingFile->readPostingList(counts,
+ 0,
+ counts._segments.empty() ? 1 : counts._segments.size(),
+ handle);
+
+ TermFieldMatchData mdfield1;
+ TermFieldMatchDataArray tfmda;
+ tfmda.add(&mdfield1);
+
+ std::unique_ptr<SearchIterator>
+ sb(handle.createIterator(counts, tfmda));
+
+ // LOG(info, "loop=%d, wordNum=%u", loop, wordNum);
+ fw.validate(sb.get(), tfmda, verbose);
+
+ sb.reset(handle.createIterator(counts, tfmda));
+ fw.validate(sb.get(), tfmda, 19, verbose);
+
+ sb.reset(handle.createIterator(counts, tfmda));
+ fw.validate(sb.get(), tfmda, 99, verbose);
+
+ sb.reset(handle.createIterator(counts, tfmda));
+ fw.validate(sb.get(), tfmda, 799, verbose);
+
+ sb.reset(handle.createIterator(counts, tfmda));
+ fw.validate(sb.get(), tfmda, 6399, verbose);
+
+ sb.reset(handle.createIterator(counts, tfmda));
+ fw.validate(sb.get(), tfmda, 11999, verbose);
+ ++wordNum;
+ }
+ }
+ }
+
+ postingFile->close();
+ dictFile->close();
+ delete postingFile;
+ dictFile.reset();
+ tv.SetNow();
+ after = tv.Secs();
+ LOG(info,
+ "leave randReadField, namepref=%s,"
+ " dynamicK=%s, "
+ "elapsed=%10.6f",
+ namepref.c_str(),
+ dynamicKStr,
+ after - before);
+}
+
+
+void
+fusionField(uint32_t numWordIds,
+ uint32_t docIdLimit,
+ const vespalib::string &ipref,
+ const vespalib::string &opref,
+ bool doRaw,
+ bool dynamicK)
+{
+ const char *rawStr = doRaw ? "true" : "false";
+ const char *dynamicKStr = dynamicK ? "true" : "false";
+
+
+ LOG(info,
+ "enter fusionField, ipref=%s, opref=%s,"
+ " raw=%s,"
+ " dynamicK=%s",
+ ipref.c_str(),
+ opref.c_str(),
+ rawStr,
+ dynamicKStr);
+
+ FastOS_Time tv;
+ double before;
+ double after;
+ WrappedFieldWriter ostate(opref,
+ dynamicK,
+ numWordIds, docIdLimit);
+ WrappedFieldReader istate(ipref, numWordIds, docIdLimit);
+
+ tv.SetNow();
+ before = tv.Secs();
+
+ ostate.open();
+ istate.open();
+
+ if (doRaw) {
+ PostingListParams featureParams;
+ featureParams.clear();
+ featureParams.set("cooked", false);
+ istate._fieldReader->setFeatureParams(featureParams);
+ }
+ if (istate._fieldReader->isValid())
+ istate._fieldReader->read();
+
+ while (istate._fieldReader->isValid()) {
+ istate._fieldReader->write(*ostate._fieldWriter);
+ istate._fieldReader->read();
+ }
+ istate.close();
+ ostate.close();
+ tv.SetNow();
+ after = tv.Secs();
+ LOG(info,
+ "leave fusionField, ipref=%s, opref=%s,"
+ " raw=%s dynamicK=%s, "
+ " elapsed=%10.6f",
+ ipref.c_str(),
+ opref.c_str(),
+ rawStr,
+ dynamicKStr,
+ after - before);
+}
+
+
+void
+testFieldWriterVariants(FakeWordSet &wordSet,
+ uint32_t docIdLimit, bool verbose)
+{
+ CheckPointFile dropper("chkpt");
+ dropper.remove();
+ disableSkip();
+ writeField(wordSet, docIdLimit, "new4", true);
+ readField(wordSet, docIdLimit, "new4", true, verbose);
+ readField(wordSet, docIdLimit, "new4", true, verbose);
+ writeFieldCheckPointed(wordSet, docIdLimit, "new6", true);
+ writeField(wordSet, docIdLimit, "new5", false);
+ readField(wordSet, docIdLimit, "new5", false, verbose);
+ writeFieldCheckPointed(wordSet, docIdLimit, "new7", false);
+ enableSkip();
+ writeField(wordSet, docIdLimit, "newskip4", true);
+ readField(wordSet, docIdLimit, "newskip4", true, verbose);
+ writeFieldCheckPointed(wordSet, docIdLimit, "newskip6",
+ true);
+ writeField(wordSet, docIdLimit, "newskip5", false);
+ readField(wordSet, docIdLimit, "newskip5", false, verbose);
+ writeFieldCheckPointed(wordSet, docIdLimit, "newskip7",
+ false);
+ enableSkipChunks();
+ writeField(wordSet, docIdLimit, "newchunk4", true);
+ readField(wordSet, docIdLimit, "newchunk4", true, verbose);
+ writeFieldCheckPointed(wordSet, docIdLimit, "newchunk6",
+ true);
+ writeField(wordSet, docIdLimit, "newchunk5", false);
+ readField(wordSet, docIdLimit,
+ "newchunk5",false, verbose);
+ writeFieldCheckPointed(wordSet, docIdLimit, "newchunk7",
+ false);
+ disableSkip();
+ fusionField(wordSet.getNumWords(),
+ docIdLimit,
+ "new4", "new4x",
+ false, true);
+ fusionField(wordSet.getNumWords(),
+ docIdLimit,
+ "new4", "new4xx",
+ true, true);
+ fusionField(wordSet.getNumWords(),
+ docIdLimit,
+ "new5", "new5x",
+ false, false);
+ fusionField(wordSet.getNumWords(),
+ docIdLimit,
+ "new5", "new5xx",
+ true, false);
+ randReadField(wordSet, "new4", true, verbose);
+ randReadField(wordSet, "new5", false, verbose);
+ enableSkip();
+ fusionField(wordSet.getNumWords(),
+ docIdLimit,
+ "newskip4", "newskip4x",
+ false, true);
+ fusionField(wordSet.getNumWords(),
+ docIdLimit,
+ "newskip4", "newskip4xx",
+ true, true);
+ fusionField(wordSet.getNumWords(),
+ docIdLimit,
+ "newskip5", "newskip5x",
+ false, false);
+ fusionField(wordSet.getNumWords(),
+ docIdLimit,
+ "newskip5", "newskip5xx",
+ true, false);
+ randReadField(wordSet, "newskip4", true, verbose);
+ randReadField(wordSet, "newskip5", false, verbose);
+ enableSkipChunks();
+ fusionField(wordSet.getNumWords(),
+ docIdLimit,
+ "newchunk4", "newchunk4x",
+ false, true);
+ fusionField(wordSet.getNumWords(),
+ docIdLimit,
+ "newchunk4", "newchunk4xx",
+ true, true);
+ fusionField(wordSet.getNumWords(),
+ docIdLimit,
+ "newchunk5", "newchunk5x",
+ false, false);
+ fusionField(wordSet.getNumWords(),
+ docIdLimit,
+ "newchunk5", "newchunk5xx",
+ true, false);
+ randReadField(wordSet, "newchunk4", true, verbose);
+ randReadField(wordSet, "newchunk5", false, verbose);
+}
+
+
+void
+testFieldWriterVariantsWithHighLids(FakeWordSet &wordSet, uint32_t docIdLimit,
+ bool verbose)
+{
+ CheckPointFile dropper("chkpt");
+ dropper.remove();
+ disableSkip();
+ writeField(wordSet, docIdLimit, "hlid4", true);
+ readField(wordSet, docIdLimit, "hlid4", true, verbose);
+ writeField(wordSet, docIdLimit, "hlid5", false);
+ readField(wordSet, docIdLimit, "hlid5", false, verbose);
+ randReadField(wordSet, "hlid4", true, verbose);
+ randReadField(wordSet, "hlid5", false, verbose);
+ enableSkip();
+ writeField(wordSet, docIdLimit, "hlidskip4", true);
+ readField(wordSet, docIdLimit, "hlidskip4", true, verbose);
+ writeField(wordSet, docIdLimit, "hlidskip5", false);
+ readField(wordSet, docIdLimit, "hlidskip5", false, verbose);
+ randReadField(wordSet, "hlidskip4", true, verbose);
+ randReadField(wordSet, "hlidskip5", false, verbose);
+ enableSkipChunks();
+ writeField(wordSet, docIdLimit, "hlidchunk4", true);
+ readField(wordSet, docIdLimit, "hlidchunk4", true, verbose);
+ writeField(wordSet, docIdLimit, "hlidchunk5", false);
+ readField(wordSet, docIdLimit, "hlidchunk5", false, verbose);
+ randReadField(wordSet, "hlidchunk4", true, verbose);
+ randReadField(wordSet, "hlidchunk5", false, verbose);
+}
+
+int
+FieldWriterTest::Main(void)
+{
+ int argi;
+ char c;
+ const char *optArg;
+
+ if (_argc > 0) {
+ DummyFileHeaderContext::setCreator(_argv[0]);
+ }
+ argi = 1;
+
+ while ((c = GetOpt("c:d:vw:", optArg, argi)) != -1) {
+ switch(c) {
+ case 'c':
+ _commonDocFreq = atoi(optArg);
+ if (_commonDocFreq == 0)
+ _commonDocFreq = 1;
+ break;
+ case 'd':
+ _numDocs = atoi(optArg);
+ break;
+ case 'v':
+ _verbose = true;
+ break;
+ case 'w':
+ _numWordsPerClass = atoi(optArg);
+ break;
+ default:
+ Usage();
+ return 1;
+ }
+ }
+
+ if (_commonDocFreq > _numDocs) {
+ Usage();
+ return 1;
+ }
+
+ _wordSet.setupParams(false, false);
+ _wordSet.setupWords(_rnd, _numDocs, _commonDocFreq, _numWordsPerClass);
+
+ vespalib::mkdir("index", false);
+ testFieldWriterVariants(_wordSet, _numDocs, _verbose);
+
+ _wordSet2.setupParams(false, false);
+ _wordSet2.setupWords(_rnd, _numDocs, _commonDocFreq, 3);
+ uint32_t docIdBias = 700000000;
+ _wordSet2.addDocIdBias(docIdBias); // Large skip numbers
+ testFieldWriterVariantsWithHighLids(_wordSet2, _numDocs + docIdBias,
+ _verbose);
+ return 0;
+}
+
+} // namespace fieldwriter
+
+int
+main(int argc, char **argv)
+{
+ fieldwriter::FieldWriterTest app;
+
+ setvbuf(stdout, NULL, _IOLBF, 32768);
+ app._rnd.srand48(32);
+ return app.Entry(argc, argv);
+}
diff --git a/searchlib/src/tests/diskindex/fieldwriter/runtests.sh b/searchlib/src/tests/diskindex/fieldwriter/runtests.sh
new file mode 100755
index 00000000000..1f2b6d6076f
--- /dev/null
+++ b/searchlib/src/tests/diskindex/fieldwriter/runtests.sh
@@ -0,0 +1,66 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+rm -f new* chkpt*
+sync
+sleep 2
+
+if ${VALGRIND} ./searchlib_fieldwriter_test_app "$@"
+then
+ :
+else
+ echo FAILURE: ./searchlib_fieldwriter_test_app program failed.
+ exit 1
+fi
+
+checksame()
+{
+ file1=$1
+ rval=0
+ shift
+ for file in $*
+ do
+ if cmp -s $file1 $file
+ then
+ :
+ else
+ echo "FAILURE: $file1 != $file"
+ rval=1
+ fi
+ done
+ return $rval
+}
+
+newpcntfiles1=index/new[46]*dictionary.pdat
+newpcntfiles1b=index/new[46]*dictionary.spdat
+newpcntfiles1c=index/new[46]*dictionary.ssdat
+newpcntfiles2=index/newskip[46]*dictionary.pdat
+newpcntfiles2b=index/newskip[46]*dictionary.pdat
+newpcntfiles2c=index/newskip[46]*dictionary.pdat
+newpcntfiles3=index/newchunk[46]*dictionary.pdat
+newpcntfiles3b=index/newchunk[46]*dictionary.pdat
+newpcntfiles3c=index/newchunk[46]*dictionary.pdat
+newpcntfiles4=index/new[57]*dictionary.pdat
+newpcntfiles4b=index/new[57]*dictionary.pdat
+newpcntfiles4c=index/new[57]*dictionary.pdat
+newpcntfiles5=index/newskip[57]*dictionary.pdat
+newpcntfiles5b=index/newskip[57]*dictionary.pdat
+newpcntfiles5c=index/newskip[57]*dictionary.pdat
+newpcntfiles6=index/newchunk[57]*dictionary.pdat
+newpcntfiles6b=index/newchunk[57]*dictionary.pdat
+newpcntfiles6c=index/newchunk[57]*dictionary.pdat
+newpfiles1=index/new[46]*posocc.dat.compressed
+newpfiles2=index/newskip[46]*posocc.dat.compressed
+newpfiles3=index/newchunk[46]*posocc.dat.compressed
+newpfiles4=index/new[57]*posocc.dat.compressed
+newpfiles5=index/newskip[57]*posocc.dat.compressed
+newpfiles6=index/newchunk[57]*posocc.dat.compressed
+
+if checksame $newpcntfiles1 && checksame $newpcntfiles1b && checksame $newpcntfiles1c && checksame $newpfiles1 && checksame $newpcntfiles2 && checksame $newpcntfiles2b && checksame $newpcntfiles2c && checksame $newpfiles2 && checksame $newpcntfiles3 && checksame $newpcntfiles3b && checksame $newpcntfiles3c && checksame $newpfiles3 && checksame $newpcntfiles4 && checksame $newpcntfiles4b && checksame $newpcntfiles4c && checksame $newpfiles4 && checksame $newpcntfiles5 && checksame $newpcntfiles5b && checksame $newpcntfiles5c && checksame $newpfiles5 && checksame $newpcntfiles6 && checksame $newpcntfiles6b && checksame $newpcntfiles6c && checksame $newpfiles6
+then
+ echo SUCCESS: Files match up
+ exit 0
+else
+ echo FAILURE: Files do not match up
+ exit 1
+fi
diff --git a/searchlib/src/tests/diskindex/fusion/.gitignore b/searchlib/src/tests/diskindex/fusion/.gitignore
new file mode 100644
index 00000000000..8526d6faa38
--- /dev/null
+++ b/searchlib/src/tests/diskindex/fusion/.gitignore
@@ -0,0 +1,37 @@
+.depend
+Makefile
+[dms]dump[1-5]
+chkpt
+ddump2
+ddump3
+ddump4
+ddump5
+dmdump2
+dmdump3
+dmdump4
+dmdump5
+dmdump[1-5]
+dump2
+dump3
+dump4
+dump5
+dump[1-5]
+fusion_test
+mdump2
+mdump3
+mdump4
+mdump5
+sdump2
+sdump3
+sdump4
+sdump5
+/ddump6
+/dmdump6
+/dump6
+/dumpwords.out
+/mdump6
+/transpose.out
+/usage.out
+/zwordc0coll.out
+/zwordf0field.out
+searchlib_fusion_test_app
diff --git a/searchlib/src/tests/diskindex/fusion/CMakeLists.txt b/searchlib/src/tests/diskindex/fusion/CMakeLists.txt
new file mode 100644
index 00000000000..9c079b09c90
--- /dev/null
+++ b/searchlib/src/tests/diskindex/fusion/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_fusion_test_app
+ SOURCES
+ fusion_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_fusion_test_app COMMAND sh fusion_test.sh)
diff --git a/searchlib/src/tests/diskindex/fusion/DESC b/searchlib/src/tests/diskindex/fusion/DESC
new file mode 100644
index 00000000000..b0db86422b9
--- /dev/null
+++ b/searchlib/src/tests/diskindex/fusion/DESC
@@ -0,0 +1 @@
+fusion test. Performs basic fusion operations and validates results.
diff --git a/searchlib/src/tests/diskindex/fusion/FILES b/searchlib/src/tests/diskindex/fusion/FILES
new file mode 100644
index 00000000000..fb22ce21a9d
--- /dev/null
+++ b/searchlib/src/tests/diskindex/fusion/FILES
@@ -0,0 +1 @@
+fusion_test.cpp
diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
new file mode 100644
index 00000000000..4191a8f8d2b
--- /dev/null
+++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
@@ -0,0 +1,506 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("fusion_test");
+#include <vespa/searchlib/diskindex/checkpointfile.h>
+#include <vespa/searchlib/diskindex/fusion.h>
+#include <vespa/searchlib/diskindex/indexbuilder.h>
+#include <vespa/searchlib/diskindex/zcposoccrandread.h>
+#include <vespa/searchlib/fef/fieldpositionsiterator.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/index/indexbuilder.h>
+#include <vespa/searchlib/index/schemautil.h>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/memoryindex/dictionary.h>
+#include <vespa/searchlib/memoryindex/documentinverter.h>
+#include <vespa/searchlib/memoryindex/featurestore.h>
+#include <vespa/searchlib/memoryindex/postingiterator.h>
+#include <vespa/searchlib/memoryindex/i_document_insert_listener.h>
+#include <vespa/searchlib/diskindex/diskindex.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/util/filekit.h>
+#include <vespa/searchlib/common/sequencedtaskexecutor.h>
+
+namespace search
+{
+
+
+using document::Document;
+using fef::FieldPositionsIterator;
+using fef::TermFieldMatchData;
+using fef::TermFieldMatchDataArray;
+using index::DocBuilder;
+using index::DocIdAndFeatures;
+using index::Schema;
+using index::SchemaUtil;
+using search::common::FileHeaderContext;
+using search::index::DummyFileHeaderContext;
+using memoryindex::Dictionary;
+using memoryindex::DocumentInverter;
+using queryeval::SearchIterator;
+
+namespace diskindex
+{
+
+
+class Test : public vespalib::TestApp
+{
+private:
+ Schema _schema;
+ const Schema & getSchema() const { return _schema; }
+
+ void
+ requireThatFusionIsWorking(const vespalib::string &prefix,
+ bool directio,
+ bool readmmap);
+
+public:
+ Test();
+ int Main();
+};
+
+
+namespace
+{
+
+void
+myPushDocument(DocumentInverter &inv, Dictionary &d)
+{
+ inv.pushDocuments(d, std::shared_ptr<IDestructorCallback>());
+}
+
+
+}
+
+vespalib::string
+toString(FieldPositionsIterator posItr,
+ bool hasElements = false, bool hasWeights = false)
+{
+ vespalib::asciistream ss;
+ ss << "{";
+ ss << posItr.getFieldLength() << ":";
+ bool first = true;
+ for (; posItr.valid(); posItr.next()) {
+ if (!first) ss << ",";
+ ss << posItr.getPosition();
+ first = false;
+ if (hasElements) {
+ ss << "[e=" << posItr.getElementId();
+ if (hasWeights)
+ ss << ",w=" << posItr.getElementWeight();
+ ss << ",l=" << posItr.getElementLen() << "]";
+ }
+ }
+ ss << "}";
+ return ss.str();
+}
+
+
+#if 0
+vespalib::string
+toString(DocIdAndFeatures &features)
+{
+ vespalib::asciistream ss;
+ ss << "{";
+ std::vector<search::index::WordDocFieldElementFeatures>::const_iterator
+ element = features._elements.begin();
+ std::vector<search::index::WordDocFieldElementWordPosFeatures>::
+ const_iterator position = features._wordPositions.begin();
+ for (; field != fielde; ++field) {
+ ss << "f=" << field->getFieldId() << "{";
+ uint32_t numElements = field->getNumElements();
+ while (numElements--) {
+ ss << "e=" << element->getElementId() << ","
+ << "ew=" << element->getWeight() << ","
+ << "el=" << element->getElementLen() << "{";
+ uint32_t numOccs = element->getNumOccs();
+ while (numOccs--) {
+ ss << position->getWordPos();
+ if (numOccs != 0)
+ ss << ",";
+ }
+ ss << "}";
+ if (numElements != 0)
+ ss << ",";
+ }
+ ss << "}";
+ }
+ ss << "}";
+ return ss.str();
+}
+#endif
+
+
+void
+validateDiskIndex(DiskIndex &dw,
+ bool f2HasElements,
+ bool f3HasWeights)
+{
+ typedef DiskIndex::LookupResult LR;
+ typedef index::PostingListHandle PH;
+ typedef search::queryeval::SearchIterator SB;
+
+ const Schema &schema(dw.getSchema());
+
+ {
+ uint32_t id1(schema.getIndexFieldId("f0"));
+ LR::UP lr1(dw.lookup(id1, "c"));
+ EXPECT_TRUE(lr1.get() != NULL);
+ PH::UP wh1(dw.readPostingList(*lr1));
+ EXPECT_TRUE(wh1.get() != NULL);
+ TermFieldMatchData f0;
+ TermFieldMatchDataArray a;
+ a.add(&f0);
+ SB::UP sbap(wh1->createIterator(lr1->counts, a));
+ sbap->initFullRange();
+ EXPECT_EQUAL("{1000000:}", toString(f0.getIterator()));
+ EXPECT_TRUE(sbap->seek(10));
+ sbap->unpack(10);
+ EXPECT_EQUAL("{7:2}", toString(f0.getIterator()));
+ }
+ {
+ uint32_t id1(schema.getIndexFieldId("f2"));
+ LR::UP lr1(dw.lookup(id1, "ax"));
+ EXPECT_TRUE(lr1.get() != NULL);
+ PH::UP wh1(dw.readPostingList(*lr1));
+ EXPECT_TRUE(wh1.get() != NULL);
+ TermFieldMatchData f2;
+ TermFieldMatchDataArray a;
+ a.add(&f2);
+ SB::UP sbap(wh1->createIterator(lr1->counts, a));
+ sbap->initFullRange();
+ EXPECT_EQUAL("{1000000:}", toString(f2.getIterator()));
+ EXPECT_TRUE(sbap->seek(10));
+ sbap->unpack(10);
+ if (f2HasElements) {
+ EXPECT_EQUAL("{3:0[e=0,l=3],0[e=1,l=1]}",
+ toString(f2.getIterator(), true));
+ } else {
+ EXPECT_EQUAL("{3:0[e=0,l=3]}",
+ toString(f2.getIterator(), true));
+ }
+ }
+ {
+ uint32_t id1(schema.getIndexFieldId("f3"));;
+ LR::UP lr1(dw.lookup(id1, "wx"));
+ EXPECT_TRUE(lr1.get() != NULL);
+ PH::UP wh1(dw.readPostingList(*lr1));
+ EXPECT_TRUE(wh1.get() != NULL);
+ TermFieldMatchData f3;
+ TermFieldMatchDataArray a;
+ a.add(&f3);
+ SB::UP sbap(wh1->createIterator(lr1->counts, a));
+ sbap->initFullRange();
+ EXPECT_EQUAL("{1000000:}", toString(f3.getIterator()));
+ EXPECT_TRUE(sbap->seek(10));
+ sbap->unpack(10);
+ if (f3HasWeights) {
+ EXPECT_EQUAL("{2:0[e=0,w=4,l=2]}",
+ toString(f3.getIterator(), true, true));
+ } else {
+ EXPECT_EQUAL("{2:0[e=0,w=1,l=2]}",
+ toString(f3.getIterator(), true, true));
+ }
+ }
+ {
+ uint32_t id1(schema.getIndexFieldId("f3"));;
+ LR::UP lr1(dw.lookup(id1, "zz"));
+ EXPECT_TRUE(lr1.get() != NULL);
+ PH::UP wh1(dw.readPostingList(*lr1));
+ EXPECT_TRUE(wh1.get() != NULL);
+ TermFieldMatchData f3;
+ TermFieldMatchDataArray a;
+ a.add(&f3);
+ SB::UP sbap(wh1->createIterator(lr1->counts, a));
+ sbap->initFullRange();
+ EXPECT_EQUAL("{1000000:}", toString(f3.getIterator()));
+ EXPECT_TRUE(sbap->seek(11));
+ sbap->unpack(11);
+ if (f3HasWeights) {
+ EXPECT_EQUAL("{1:0[e=0,w=-27,l=1]}",
+ toString(f3.getIterator(), true, true));
+ } else {
+ EXPECT_EQUAL("{1:0[e=0,w=1,l=1]}",
+ toString(f3.getIterator(), true, true));
+ }
+ }
+ {
+ uint32_t id1(schema.getIndexFieldId("f3"));;
+ LR::UP lr1(dw.lookup(id1, "zz0"));
+ EXPECT_TRUE(lr1.get() != NULL);
+ PH::UP wh1(dw.readPostingList(*lr1));
+ EXPECT_TRUE(wh1.get() != NULL);
+ TermFieldMatchData f3;
+ TermFieldMatchDataArray a;
+ a.add(&f3);
+ SB::UP sbap(wh1->createIterator(lr1->counts, a));
+ sbap->initFullRange();
+ EXPECT_EQUAL("{1000000:}", toString(f3.getIterator()));
+ EXPECT_TRUE(sbap->seek(12));
+ sbap->unpack(12);
+ if (f3HasWeights) {
+ EXPECT_EQUAL("{1:0[e=0,w=0,l=1]}",
+ toString(f3.getIterator(), true, true));
+ } else {
+ EXPECT_EQUAL("{1:0[e=0,w=1,l=1]}",
+ toString(f3.getIterator(), true, true));
+ }
+ }
+}
+
+
+void
+Test::requireThatFusionIsWorking(const vespalib::string &prefix,
+ bool directio,
+ bool readmmap)
+{
+ Schema schema;
+ Schema schema2;
+ Schema schema3;
+ for (SchemaUtil::IndexIterator it(getSchema()); it.isValid(); ++it) {
+ const Schema::IndexField &iField =
+ _schema.getIndexField(it.getIndex());
+ schema.addIndexField(Schema::IndexField(iField.getName(),
+ iField.getDataType(),
+ iField.getCollectionType()));
+ if (iField.getCollectionType() == Schema::WEIGHTEDSET)
+ schema2.addIndexField(Schema::IndexField(iField.getName(),
+ iField.getDataType(),
+ Schema::ARRAY));
+ else
+ schema2.addIndexField(Schema::IndexField(iField.getName(),
+ iField.getDataType(),
+ iField.getCollectionType()));
+ schema3.addIndexField(Schema::IndexField(iField.getName(),
+ iField.getDataType(),
+ Schema::SINGLE));
+ }
+ schema3.addIndexField(Schema::IndexField("f4",
+ Schema::STRING));
+ schema.addFieldSet(Schema::FieldSet("nc0").
+ addField("f0").addField("f1"));
+ schema2.addFieldSet(Schema::FieldSet("nc0").
+ addField("f1").addField("f0"));
+ schema3.addFieldSet(Schema::FieldSet("nc2").
+ addField("f0").addField("f1").
+ addField("f2").addField("f3").
+ addField("f4"));
+ Dictionary d(schema);
+ DocBuilder b(schema);
+ SequencedTaskExecutor invertThreads(2);
+ SequencedTaskExecutor pushThreads(2);
+ DocumentInverter inv(schema, invertThreads, pushThreads);
+ Document::UP doc;
+
+ b.startDocument("doc::10");
+ b.startIndexField("f0").
+ addStr("a").addStr("b").addStr("c").addStr("d").
+ addStr("e").addStr("f").addStr("z").
+ endField();
+ b.startIndexField("f1").
+ addStr("w").addStr("x").
+ addStr("y").addStr("z").
+ endField();
+ b.startIndexField("f2").
+ startElement(4).addStr("ax").addStr("ay").addStr("z").endElement().
+ startElement(5).addStr("ax").endElement().
+ endField();
+ b.startIndexField("f3").
+ startElement(4).addStr("wx").addStr("z").endElement().
+ endField();
+
+ doc = b.endDocument();
+ inv.invertDocument(10, *doc);
+ invertThreads.sync();
+ myPushDocument(inv, d);
+ pushThreads.sync();
+
+ b.startDocument("doc::11").
+ startIndexField("f3").
+ startElement(-27).addStr("zz").endElement().
+ endField();
+ doc = b.endDocument();
+ inv.invertDocument(11, *doc);
+ invertThreads.sync();
+ myPushDocument(inv, d);
+ pushThreads.sync();
+
+ b.startDocument("doc::12").
+ startIndexField("f3").
+ startElement(0).addStr("zz0").endElement().
+ endField();
+ doc = b.endDocument();
+ inv.invertDocument(12, *doc);
+ invertThreads.sync();
+ myPushDocument(inv, d);
+ pushThreads.sync();
+
+ IndexBuilder ib(schema);
+ vespalib::string dump2dir = prefix + "dump2";
+ ib.setPrefix(dump2dir);
+ uint32_t numDocs = 12 + 1;
+ uint32_t numWords = d.getNumUniqueWords();
+ bool dynamicKPosOcc = false;
+ TuneFileIndexing tuneFileIndexing;
+ TuneFileSearch tuneFileSearch;
+ DummyFileHeaderContext fileHeaderContext;
+ if (directio) {
+ tuneFileIndexing._read.setWantDirectIO();
+ tuneFileIndexing._write.setWantDirectIO();
+ tuneFileSearch._read.setWantDirectIO();
+ }
+ if (readmmap)
+ tuneFileSearch._read.setWantMemoryMap();
+ ib.open(numDocs, numWords, tuneFileIndexing, fileHeaderContext);
+ d.dump(ib);
+ ib.close();
+
+ vespalib::string tsName = dump2dir + "/.teststamp";
+ typedef search::FileKit FileKit;
+ EXPECT_TRUE(FileKit::createStamp(tsName));
+ EXPECT_TRUE(FileKit::hasStamp(tsName));
+ EXPECT_TRUE(FileKit::removeStamp(tsName));
+ EXPECT_FALSE(FileKit::hasStamp(tsName));
+
+ do {
+ DiskIndex dw2(prefix + "dump2");
+ if (!EXPECT_TRUE(dw2.setup(tuneFileSearch)))
+ break;
+ TEST_DO(validateDiskIndex(dw2, true, true));
+ } while (0);
+
+ do {
+ std::vector<vespalib::string> sources;
+ SelectorArray selector(numDocs, 0);
+ sources.push_back(prefix + "dump2");
+ if (!EXPECT_TRUE(Fusion::merge(schema,
+ prefix + "dump3",
+ sources, selector,
+ dynamicKPosOcc,
+ tuneFileIndexing,
+ fileHeaderContext)))
+ return;
+ } while (0);
+ do {
+ DiskIndex dw3(prefix + "dump3");
+ if (!EXPECT_TRUE(dw3.setup(tuneFileSearch)))
+ break;
+ TEST_DO(validateDiskIndex(dw3, true, true));
+ } while (0);
+ do {
+ std::vector<vespalib::string> sources;
+ SelectorArray selector(numDocs, 0);
+ sources.push_back(prefix + "dump3");
+ if (!EXPECT_TRUE(Fusion::merge(schema2,
+ prefix + "dump4",
+ sources, selector,
+ dynamicKPosOcc,
+ tuneFileIndexing,
+ fileHeaderContext)))
+ return;
+ } while (0);
+ do {
+ DiskIndex dw4(prefix + "dump4");
+ if (!EXPECT_TRUE(dw4.setup(tuneFileSearch)))
+ break;
+ TEST_DO(validateDiskIndex(dw4, true, false));
+ } while (0);
+ do {
+ std::vector<vespalib::string> sources;
+ SelectorArray selector(numDocs, 0);
+ sources.push_back(prefix + "dump3");
+ if (!EXPECT_TRUE(Fusion::merge(schema3,
+ prefix + "dump5",
+ sources, selector,
+ dynamicKPosOcc,
+ tuneFileIndexing,
+ fileHeaderContext)))
+ return;
+ } while (0);
+ do {
+ DiskIndex dw5(prefix + "dump5");
+ if (!EXPECT_TRUE(dw5.setup(tuneFileSearch)))
+ break;
+ TEST_DO(validateDiskIndex(dw5, false, false));
+ } while (0);
+ do {
+ std::vector<vespalib::string> sources;
+ SelectorArray selector(numDocs, 0);
+ sources.push_back(prefix + "dump3");
+ if (!EXPECT_TRUE(Fusion::merge(schema,
+ prefix + "dump6",
+ sources, selector,
+ !dynamicKPosOcc,
+ tuneFileIndexing,
+ fileHeaderContext)))
+ return;
+ } while (0);
+ do {
+ DiskIndex dw6(prefix + "dump6");
+ if (!EXPECT_TRUE(dw6.setup(tuneFileSearch)))
+ break;
+ TEST_DO(validateDiskIndex(dw6, true, true));
+ } while (0);
+ do {
+ std::vector<vespalib::string> sources;
+ SelectorArray selector(numDocs, 0);
+ sources.push_back(prefix + "dump2");
+ if (!EXPECT_TRUE(Fusion::merge(schema,
+ prefix + "dump3",
+ sources, selector,
+ dynamicKPosOcc,
+ tuneFileIndexing,
+ fileHeaderContext)))
+ return;
+ } while (0);
+ do {
+ DiskIndex dw3(prefix + "dump3");
+ if (!EXPECT_TRUE(dw3.setup(tuneFileSearch)))
+ break;
+ TEST_DO(validateDiskIndex(dw3, true, true));
+ } while (0);
+}
+
+
+Test::Test()
+ : _schema()
+{
+ _schema.addIndexField(Schema::IndexField("f0", Schema::STRING));
+ _schema.addIndexField(Schema::IndexField("f1", Schema::STRING));
+ _schema.addIndexField(Schema::IndexField("f2", Schema::STRING,
+ Schema::ARRAY));
+ _schema.addIndexField(Schema::IndexField("f3", Schema::STRING,
+ Schema::WEIGHTEDSET));
+}
+
+
+int
+Test::Main()
+{
+ TEST_INIT("fusion_test");
+
+ if (_argc > 0) {
+ DummyFileHeaderContext::setCreator(_argv[0]);
+ }
+
+ TEST_DO(requireThatFusionIsWorking("", false, false));
+ TEST_DO(requireThatFusionIsWorking("d", true, false));
+ TEST_DO(requireThatFusionIsWorking("m", false, true));
+ TEST_DO(requireThatFusionIsWorking("dm", true, true));
+
+ TEST_DONE();
+}
+
+}
+
+
+}
+
+
+TEST_APPHOOK(search::diskindex::Test);
diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.sh b/searchlib/src/tests/diskindex/fusion/fusion_test.sh
new file mode 100755
index 00000000000..127453fae07
--- /dev/null
+++ b/searchlib/src/tests/diskindex/fusion/fusion_test.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+IINSPECT=../../../apps/vespa-index-inspect/searchlib_vespa-index-inspect_app
+ECHO_CMD=echo
+
+$VALGRIND ./searchlib_fusion_test_app
+$ECHO_CMD showing usage
+$IINSPECT --help > usage.out 2>&1 || true
+$ECHO_CMD dumping dictionary words for field f0
+$IINSPECT dumpwords --indexdir dump3 --field f0 > dumpwords.out
+$ECHO_CMD transposing index back for inspection
+$IINSPECT showpostings --transpose --indexdir dump3 > transpose.out
+$ECHO_CMD dumping posting list for word z in field f0
+$IINSPECT showpostings --indexdir dump3 --field f0 z > zwordf0field.out
+$ECHO_CMD inspection done.
+
diff --git a/searchlib/src/tests/diskindex/pagedict4/.gitignore b/searchlib/src/tests/diskindex/pagedict4/.gitignore
new file mode 100644
index 00000000000..2381ed57229
--- /dev/null
+++ b/searchlib/src/tests/diskindex/pagedict4/.gitignore
@@ -0,0 +1,5 @@
+.depend
+Makefile
+pagedict4_test
+fakedict.*
+searchlib_pagedict4_test_app
diff --git a/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt b/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt
new file mode 100644
index 00000000000..f8aef573c9a
--- /dev/null
+++ b/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_pagedict4_test_app
+ SOURCES
+ pagedict4test.cpp
+ DEPENDS
+ searchlib_test
+ searchlib
+)
+vespa_add_test(NAME searchlib_pagedict4_test_app COMMAND searchlib_pagedict4_test_app)
diff --git a/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp b/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp
new file mode 100644
index 00000000000..03d73e84b42
--- /dev/null
+++ b/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp
@@ -0,0 +1,876 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("pagedict4test");
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vector>
+#include <vespa/searchlib/util/rand48.h>
+#include <vespa/searchlib/index/schemautil.h>
+#include <vespa/searchlib/bitcompression/countcompression.h>
+#include <vespa/searchlib/bitcompression/pagedict4.h>
+#include <vespa/searchlib/test/diskindex/threelevelcountbuffers.h>
+#include <vespa/searchlib/index/postinglistcounts.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/diskindex/pagedict4file.h>
+#include <vespa/searchlib/diskindex/pagedict4randread.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+
+using search::bitcompression::PostingListCountFileEncodeContext;
+using search::bitcompression::PostingListCountFileDecodeContext;
+using search::index::PostingListCounts;
+using search::index::PostingListOffsetAndCounts;
+using search::index::PostingListParams;
+using search::bitcompression::PageDict4SSWriter;
+using search::bitcompression::PageDict4SPWriter;
+using search::bitcompression::PageDict4PWriter;
+using search::bitcompression::PageDict4Reader;
+using search::bitcompression::PageDict4SSReader;
+using search::bitcompression::PageDict4SSLookupRes;
+using search::bitcompression::PageDict4SPLookupRes;
+using search::bitcompression::PageDict4PLookupRes;
+using search::index::Schema;
+using search::index::DictionaryFileSeqRead;
+using search::index::DictionaryFileSeqWrite;
+using search::index::DictionaryFileRandRead;
+using search::diskindex::PageDict4FileSeqRead;
+using search::diskindex::PageDict4FileSeqWrite;
+using search::diskindex::PageDict4RandRead;
+using search::index::DummyFileHeaderContext;
+
+typedef search::bitcompression::PageDict4StartOffset StartOffset;
+
+namespace
+{
+
+
+class Writer : public search::diskindex::ThreeLevelCountWriteBuffers
+{
+public:
+ PageDict4SSWriter *_ssw;
+ PageDict4SPWriter *_spw;
+ PageDict4PWriter *_pw;
+
+ Writer(EC &sse,
+ EC &spe,
+ EC &pe)
+ : ThreeLevelCountWriteBuffers(sse, spe, pe),
+ _ssw(NULL),
+ _spw(NULL),
+ _pw(NULL)
+ {
+ }
+
+ ~Writer(void)
+ {
+ delete _ssw;
+ delete _spw;
+ delete _pw;
+ }
+
+ void
+ allocWriters()
+ {
+ _ssw = new PageDict4SSWriter(_sse);
+ _spw = new PageDict4SPWriter(*_ssw, _spe);
+ _pw = new PageDict4PWriter(*_spw, _pe);
+ _spw->setup();
+ _pw->setup();
+ }
+
+ void
+ flush(void)
+ {
+ _pw->flush();
+ ThreeLevelCountWriteBuffers::flush();
+ }
+
+ void
+ addCounts(const std::string &word,
+ const PostingListCounts &counts)
+ {
+ _pw->addCounts(word, counts);
+ }
+};
+
+
+class SeqReader : public search::diskindex::ThreeLevelCountReadBuffers
+{
+public:
+ PageDict4SSReader _ssr;
+ PageDict4Reader _pr;
+
+ SeqReader(DC &ssd,
+ DC &spd,
+ DC &pd,
+ search::diskindex::ThreeLevelCountWriteBuffers &wb)
+ : ThreeLevelCountReadBuffers(ssd, spd, pd, wb),
+ _ssr(_rcssd,
+ wb._ssHeaderLen, wb._ssFileBitSize,
+ wb._spHeaderLen, wb._spFileBitSize,
+ wb._pHeaderLen, wb._pFileBitSize),
+ _pr(_ssr, spd, pd)
+ {
+ _ssr.setup(ssd);
+ _pr.setup();
+ }
+
+ void
+ readCounts(vespalib::string &word,
+ uint64_t &wordNum,
+ PostingListCounts &counts)
+ {
+ _pr.readCounts(word, wordNum, counts);
+ }
+};
+
+class RandReader : public search::diskindex::ThreeLevelCountReadBuffers
+{
+public:
+ PageDict4SSReader _ssr;
+ const char *_spData;
+ const char *_pData;
+ size_t _pageSize;
+
+ RandReader(DC &ssd,
+ DC &spd,
+ DC &pd,
+ search::diskindex::ThreeLevelCountWriteBuffers &wb)
+ : ThreeLevelCountReadBuffers(ssd, spd, pd, wb),
+ _ssr(_rcssd,
+ wb._ssHeaderLen, wb._ssFileBitSize,
+ wb._spHeaderLen, wb._spFileBitSize,
+ wb._pHeaderLen, wb._pFileBitSize),
+ _spData(static_cast<const char *>(_rcspd._comprBuf)),
+ _pData(static_cast<const char *>(_rcpd._comprBuf)),
+ _pageSize(search::bitcompression::PageDict4PageParams::getPageByteSize())
+ {
+ _ssr.setup(ssd);
+ }
+
+ bool
+ lookup(const std::string &key,
+ uint64_t &wordNum,
+ PostingListCounts &counts,
+ StartOffset &offsets)
+ {
+ PageDict4SSLookupRes sslr;
+
+ sslr = _ssr.lookup(key);
+ if (!sslr._res) {
+ counts.clear();
+ offsets = sslr._l6StartOffset;
+ wordNum = sslr._l6WordNum;
+ return false;
+ }
+
+ if (sslr._overflow) {
+ wordNum = sslr._l6WordNum;
+ counts = sslr._counts;
+ offsets = sslr._startOffset;
+ return true;
+ }
+ PageDict4SPLookupRes splr;
+ splr.lookup(_ssr,
+ _spData +
+ _pageSize * sslr._sparsePageNum,
+ key,
+ sslr._l6Word,
+ sslr._lastWord,
+ sslr._l6StartOffset,
+ sslr._l6WordNum,
+ sslr._pageNum);
+
+ PageDict4PLookupRes plr;
+ plr.lookup(_ssr,
+ _pData + _pageSize * splr._pageNum,
+ key,
+ splr._l3Word,
+ splr._lastWord,
+ splr._l3StartOffset,
+ splr._l3WordNum);
+ wordNum = plr._wordNum;
+ offsets = plr._startOffset;
+ if (plr._res) {
+ counts = plr._counts;
+ return true;
+ }
+ counts.clear();
+ return false;
+ }
+};
+
+}
+
+class PageDict4TestApp : public FastOS_Application
+{
+public:
+ search::Rand48 _rnd;
+ bool _stress;
+ bool _emptyWord;
+ bool _firstWordForcedCommon;
+ bool _lastWordForcedCommon;
+
+ void
+ usage(void);
+
+ int
+ Main(void);
+
+ void
+ testWords(void);
+
+ PageDict4TestApp(void)
+ : _rnd(),
+ _stress(false),
+ _emptyWord(false),
+ _firstWordForcedCommon(false),
+ _lastWordForcedCommon(false)
+ {
+ }
+};
+
+
+void
+PageDict4TestApp::usage(void)
+{
+ printf("Usage: wordnumbers\n");
+ fflush(stdout);
+}
+
+
+int
+PageDict4TestApp::Main(void)
+{
+ if (_argc > 0) {
+ DummyFileHeaderContext::setCreator(_argv[0]);
+ }
+ _rnd.srand48(32);
+ for (int32_t i = 1; i < _argc; ++i) {
+ if (strcmp(_argv[i], "stress") == 0)
+ _stress = true;
+ if (strcmp(_argv[i], "emptyword") == 0)
+ _emptyWord = true;
+ if (strcmp(_argv[i], "firstwordforcedcommon") == 0)
+ _firstWordForcedCommon = true;
+ if (strcmp(_argv[i], "lastwordforcedcommon") == 0)
+ _lastWordForcedCommon = true;
+ }
+ testWords();
+
+ LOG(info,
+ "_stress is %s",
+ _stress ? "true" : "false");
+ LOG(info,
+ "_emptyWord is %s",
+ _emptyWord ? "true" : "false");
+ LOG(info,
+ "_firstWordForcedCommon is %s",
+ _firstWordForcedCommon ? "true" : "false");
+ LOG(info,
+ "_lastWordForcedCommon is %s",
+ _lastWordForcedCommon ? "true" : "false");
+
+ LOG(info, "SUCCESS");
+ return 0;
+}
+
+
+class WordIndexCounts
+{
+public:
+ uint32_t _numDocs;
+ uint64_t _fileOffset;
+ uint64_t _bitLength;
+ uint64_t _accNumDocs;
+
+ WordIndexCounts(uint64_t bitLength,
+ uint32_t numDocs)
+ : _numDocs(numDocs),
+ _fileOffset(0),
+ _bitLength(bitLength),
+ _accNumDocs(0)
+ {
+ }
+
+ WordIndexCounts()
+ : _numDocs(0),
+ _fileOffset(0),
+ _bitLength(0),
+ _accNumDocs(0)
+ {
+ }
+};
+
+class WordCounts
+{
+public:
+ std::string _word;
+ WordIndexCounts _counts;
+
+ bool
+ operator!=(const WordCounts &rhs) const
+ {
+ return _word != rhs._word;
+ }
+
+ WordCounts(const std::string &word)
+ : _word(word),
+ _counts()
+ {
+ }
+
+ bool
+ operator<(const WordCounts &rhs) const
+ {
+ return _word < rhs._word;
+ }
+};
+
+
+void
+deDup(std::vector<WordCounts> &v)
+{
+ std::vector<WordCounts> v2;
+ std::sort(v.begin(), v.end());
+ for (std::vector<WordCounts>::const_iterator
+ i = v.begin(),
+ ie = v.end();
+ i != ie;
+ ++i) {
+ if (v2.empty() || v2.back() != *i)
+ v2.push_back(*i);
+ }
+ std::swap(v, v2);
+}
+
+
+void
+deDup(std::vector<uint32_t> &v)
+{
+ std::vector<uint32_t> v2;
+ std::sort(v.begin(), v.end());
+ for (std::vector<uint32_t>::const_iterator
+ i = v.begin(),
+ ie = v.end();
+ i != ie;
+ ++i) {
+ if (v2.empty() || v2.back() != *i)
+ v2.push_back(*i);
+ }
+ std::swap(v, v2);
+}
+
+
+static WordIndexCounts
+makeIndex(search::Rand48 &rnd, bool forceCommon)
+{
+ uint64_t bitLength = 10;
+ uint32_t numDocs = 1;
+ if ((rnd.lrand48() % 150) == 0 || forceCommon) {
+ bitLength = 1000000000;
+ numDocs = 500000;
+ }
+ return WordIndexCounts(bitLength, numDocs);
+}
+
+
+void
+makeIndexes(search::Rand48 &rnd,
+ WordIndexCounts &counts,
+ bool forceCommon)
+{
+ counts = makeIndex(rnd, forceCommon);
+}
+
+
+static void
+makeWords(std::vector<WordCounts> &v,
+ search::Rand48 &rnd,
+ uint32_t numWordIds,
+ uint32_t tupleCount,
+ bool emptyWord,
+ bool firstWordForcedCommon,
+ bool lastWordForcedCommon)
+{
+ v.clear();
+ for (unsigned int i = 0; i < tupleCount; ++i) {
+ uint64_t word = rnd.lrand48() % numWordIds;
+ uint64_t wordCount = (rnd.lrand48() % 10) + 1;
+ for (unsigned int j = 0; j < wordCount; ++j) {
+ uint64_t nextWord = rnd.lrand48() % numWordIds;
+ uint64_t nextWordCount = 0;
+ bool incomplete = true;
+ nextWordCount = rnd.lrand48() % 10;
+ incomplete = (rnd.lrand48() % 3) == 0 || nextWordCount == 0;
+ for (unsigned int k = 0; k < nextWordCount; ++k) {
+ uint64_t nextNextWord = rnd.lrand48() % numWordIds;
+ std::ostringstream w;
+ w << word;
+ w << "-";
+ w << nextWord;
+ w << "-";
+ w << nextNextWord;
+ v.push_back(WordCounts(w.str()));
+ }
+ if (incomplete) {
+ std::ostringstream w;
+ w << word;
+ w << "-";
+ w << nextWord;
+ w << "-";
+ w << "9999999999999999";
+ v.push_back(WordCounts(w.str()));
+ }
+ }
+ }
+ deDup(v);
+ if (!v.empty() && emptyWord)
+ v.front()._word = "";
+ for (std::vector<WordCounts>::iterator
+ i = v.begin(), ib = v.begin(), ie = v.end();
+ i != ie; ++i) {
+ std::vector<WordIndexCounts> indexes;
+ makeIndexes(rnd, i->_counts,
+ (i == ib && firstWordForcedCommon) ||
+ (i + 1 == ie && lastWordForcedCommon));
+ }
+ uint64_t fileOffset = 0;
+ uint64_t accNumDocs = 0;
+ for (std::vector<WordCounts>::iterator
+ i = v.begin(),
+ ie = v.end();
+ i != ie;
+ ++i) {
+ WordIndexCounts *f = &i->_counts;
+ assert(f->_numDocs > 0);
+ assert(f->_bitLength > 0);
+ f->_fileOffset = fileOffset;
+ f->_accNumDocs = accNumDocs;
+ fileOffset += f->_bitLength;
+ accNumDocs += f->_numDocs;
+ }
+}
+
+
+void
+makeCounts(PostingListCounts &counts,
+ const WordCounts &i,
+ uint32_t chunkSize)
+{
+ PostingListCounts c;
+ const WordIndexCounts *j = &i._counts;
+ c._bitLength = j->_bitLength;
+ c._numDocs = j->_numDocs;
+ c._segments.clear();
+ assert(j->_numDocs > 0);
+ uint32_t numChunks = (j->_numDocs + chunkSize - 1) / chunkSize;
+ if (numChunks > 1) {
+ uint32_t chunkBits = j->_bitLength / numChunks;
+ for (uint32_t chunkNo = 0; chunkNo < numChunks; ++chunkNo) {
+ PostingListCounts::Segment seg;
+ seg._bitLength = chunkBits;
+ seg._numDocs = chunkSize;
+ seg._lastDoc = (chunkNo + 1) * chunkSize - 1;
+ if (chunkNo + 1 == numChunks) {
+ seg._bitLength = c._bitLength -
+ (numChunks - 1) * chunkBits;
+ seg._lastDoc = c._numDocs - 1;
+ seg._numDocs = c._numDocs - (numChunks - 1) * chunkSize;
+ }
+ c._segments.push_back(seg);
+ }
+ }
+ counts = c;
+}
+
+
+void
+checkCounts(const std::string &word,
+ const PostingListCounts &counts,
+ const StartOffset &fileOffset,
+ const WordCounts &i,
+ uint32_t chunkSize)
+{
+ PostingListCounts answer;
+
+ makeCounts(answer, i, chunkSize);
+ assert(word == i._word);
+ (void) word;
+ (void) fileOffset;
+ const WordIndexCounts *j = &i._counts;
+ assert(counts._bitLength == j->_bitLength);
+ assert(counts._numDocs == j->_numDocs);
+ assert(fileOffset._fileOffset == j->_fileOffset);
+ assert(fileOffset._accNumDocs == j->_accNumDocs);
+ assert(counts._segments == answer._segments);
+ assert(counts == answer);
+ (void) counts;
+}
+
+
+void
+testWords(const std::string &logname,
+ search::Rand48 &rnd,
+ uint64_t numWordIds,
+ uint32_t tupleCount,
+ uint32_t chunkSize,
+ uint32_t ssPad,
+ uint32_t spPad,
+ uint32_t pPad,
+ bool emptyWord,
+ bool firstWordForcedCommon,
+ bool lastWordForcedCommon)
+{
+ typedef search::bitcompression::PostingListCountFileEncodeContext EC;
+ typedef search::bitcompression::PostingListCountFileDecodeContext DC;
+
+ LOG(info, "%s: word test start", logname.c_str());
+ std::vector<WordCounts> myrand;
+ makeWords(myrand, rnd, numWordIds, tupleCount,
+ emptyWord, firstWordForcedCommon, lastWordForcedCommon);
+
+ PostingListCounts xcounts;
+ for (std::vector<WordCounts>::const_iterator
+ i = myrand.begin(),
+ ie = myrand.end();
+ i != ie;
+ ++i) {
+ makeCounts(xcounts, *i, chunkSize);
+ }
+ LOG(info, "%s: word counts generated", logname.c_str());
+
+ EC pe;
+ EC spe;
+ EC sse;
+
+ sse._minChunkDocs = chunkSize;
+ sse._numWordIds = numWordIds;
+ spe.copyParams(sse);
+ pe.copyParams(sse);
+ Writer w(sse, spe, pe);
+ w.startPad(ssPad, spPad, pPad);
+ w.allocWriters();
+
+ PostingListCounts counts;
+ for (std::vector<WordCounts>::const_iterator
+ i = myrand.begin(),
+ ie = myrand.end();
+ i != ie;
+ ++i) {
+ makeCounts(counts, *i, chunkSize);
+ w.addCounts(i->_word, counts);
+ }
+ w.flush();
+
+ LOG(info,
+ "%s: Used %" PRIu64 "+%" PRIu64 "+%" PRIu64
+ " bits for %d words",
+ logname.c_str(),
+ w._pFileBitSize,
+ w._spFileBitSize,
+ w._ssFileBitSize,
+ (int) myrand.size());
+
+ StartOffset checkOffset;
+
+ {
+ DC ssd;
+ ssd._minChunkDocs = chunkSize;
+ ssd._numWordIds = numWordIds;
+ DC spd;
+ spd.copyParams(ssd);
+ DC pd;
+ pd.copyParams(ssd);
+
+ SeqReader r(ssd, spd, pd, w);
+
+ uint64_t wordNum = 1;
+ uint64_t checkWordNum = 0;
+ for (std::vector<WordCounts>::const_iterator
+ i = myrand.begin(),
+ ie = myrand.end();
+ i != ie;
+ ++i, ++wordNum) {
+ vespalib::string word;
+ counts.clear();
+ r.readCounts(word, checkWordNum, counts);
+ checkCounts(word, counts, checkOffset, *i, chunkSize);
+ assert(checkWordNum == wordNum);
+ checkOffset._fileOffset += counts._bitLength;
+ checkOffset._accNumDocs += counts._numDocs;
+ }
+ assert(pd.getReadOffset() == w._pFileBitSize);
+ LOG(info, "%s: words seqRead test OK", logname.c_str());
+ }
+
+ {
+ DC ssd;
+ ssd._minChunkDocs = chunkSize;
+ ssd._numWordIds = numWordIds;
+ DC spd;
+ spd.copyParams(ssd);
+ DC pd;
+ pd.copyParams(ssd);
+
+ RandReader rr(ssd, spd, pd, w);
+
+ uint64_t wordNum = 1;
+ uint64_t checkWordNum = 0;
+ for (std::vector<WordCounts>::const_iterator
+ i = myrand.begin(),
+ ie = myrand.end();
+ i != ie;
+ ++i, ++wordNum) {
+ checkWordNum = 0;
+ bool res = rr.lookup(i->_word,
+ checkWordNum,
+ counts,
+ checkOffset);
+ assert(res);
+ (void) res;
+ checkCounts(i->_word, counts, checkOffset,
+ *i, chunkSize);
+ assert(checkWordNum == wordNum);
+ }
+ LOG(info, "%s: word randRead test OK", logname.c_str());
+ }
+
+ Schema schema;
+ std::vector<uint32_t> indexes;
+ {
+ std::ostringstream fn;
+ fn << "f0";
+ schema.addIndexField(Schema::
+ IndexField(fn.str(),
+ Schema::STRING,
+ Schema::SINGLE));
+ indexes.push_back(0);
+ }
+ {
+ std::unique_ptr<DictionaryFileSeqWrite>
+ dw(new PageDict4FileSeqWrite);
+ std::vector<uint32_t> wIndexes;
+ std::vector<PostingListCounts> wCounts;
+ search::TuneFileSeqWrite tuneFileWrite;
+ DummyFileHeaderContext fileHeaderContext;
+ PostingListParams params;
+ params.set("numWordIds", numWordIds);
+ params.set("minChunkDocs", chunkSize);
+ dw->setParams(params);
+ bool openres = dw->open("fakedict",
+ tuneFileWrite,
+ fileHeaderContext);
+ assert(openres);
+
+ for (std::vector<WordCounts>::const_iterator
+ i = myrand.begin(),
+ ie = myrand.end();
+ i != ie;
+ ++i) {
+ makeCounts(counts, *i, chunkSize);
+ dw->writeWord(i->_word, counts);
+ }
+ bool closeres = dw->close();
+ assert(closeres);
+ (void) closeres;
+
+ LOG(info, "%s: pagedict4 written", logname.c_str());
+ }
+ {
+ std::unique_ptr<DictionaryFileSeqRead> dr(new PageDict4FileSeqRead);
+ search::TuneFileSeqRead tuneFileRead;
+
+ bool openres = dr->open("fakedict",
+ tuneFileRead);
+ assert(openres);
+ (void) openres;
+ std::string lastWord;
+ vespalib::string checkWord;
+ PostingListCounts wCounts;
+ PostingListCounts rCounts;
+ uint64_t wordNum = 1;
+ uint64_t checkWordNum = 5;
+ for (std::vector<WordCounts>::const_iterator
+ i = myrand.begin(),
+ ie = myrand.end();
+ i != ie;
+ ++i, ++wordNum) {
+ makeCounts(counts, *i, chunkSize);
+ wCounts = counts;
+ checkWord.clear();
+ checkWordNum = 0;
+ dr->readWord(checkWord, checkWordNum, rCounts);
+ assert(rCounts == wCounts);
+ assert(wordNum == checkWordNum);
+ assert(checkWord == i->_word);
+ }
+
+ checkWord = "bad";
+ checkWordNum = 5;
+ dr->readWord(checkWord, checkWordNum, rCounts);
+ assert(checkWord.empty());
+ assert(checkWordNum == DictionaryFileSeqRead::noWordNumHigh());
+ bool closeres = dr->close();
+ assert(closeres);
+ (void) closeres;
+
+ LOG(info, "%s: pagedict4 seqverify OK", logname.c_str());
+ }
+ {
+ std::unique_ptr<DictionaryFileRandRead> drr(new PageDict4RandRead);
+ search::TuneFileRandRead tuneFileRead;
+ bool openres = drr->open("fakedict",
+ tuneFileRead);
+ assert(openres);
+ (void) openres;
+ std::string lastWord;
+ vespalib::string checkWord;
+ PostingListCounts wCounts;
+ PostingListCounts rCounts;
+ uint64_t wOffset;
+ uint64_t rOffset;
+ PostingListOffsetAndCounts rOffsetAndCounts;
+ uint64_t wordNum = 1;
+ uint64_t checkWordNum = 5;
+ std::string missWord;
+ wOffset = 0;
+ for (std::vector<WordCounts>::const_iterator
+ i = myrand.begin(),
+ ie = myrand.end();
+ i != ie;
+ ++i, ++wordNum) {
+ makeCounts(counts, *i, chunkSize);
+ wCounts = counts;
+
+ checkWordNum = 0;
+ rCounts.clear();
+ rOffset = 0;
+ bool lres = drr->lookup(i->_word, checkWordNum,
+ rOffsetAndCounts);
+ assert(lres);
+ (void) lres;
+ assert((rOffsetAndCounts._counts._bitLength == 0) ==
+ (rOffsetAndCounts._counts._numDocs == 0));
+ rOffset = rOffsetAndCounts._offset;
+ rCounts = rOffsetAndCounts._counts;
+ assert(rCounts == wCounts);
+ assert(wordNum == checkWordNum);
+ assert(rOffset == wOffset);
+
+ wOffset += wCounts._bitLength;
+ lastWord = i->_word;
+
+ missWord = i->_word;
+ missWord.append(1, '\1');
+ checkWordNum = 0;
+ lres = drr->lookup(missWord, checkWordNum,
+ rOffsetAndCounts);
+ assert(!lres);
+ assert(checkWordNum == wordNum + 1);
+ }
+
+ checkWordNum = 0;
+ std::string notfoundword = "Thiswordhasbetternotbeindictionary";
+ bool lres = drr->lookup(notfoundword, checkWordNum,
+ rOffsetAndCounts);
+ assert(!lres);
+ checkWordNum = 0;
+ notfoundword = lastWord + "somethingmore";
+ lres = drr->lookup(notfoundword, checkWordNum,
+ rOffsetAndCounts);
+ assert(!lres);
+ (void) lres;
+ LOG(info, "Lookup beyond dict EOF gave wordnum %d", (int) checkWordNum);
+
+ if (firstWordForcedCommon) {
+ if (!emptyWord) {
+ checkWordNum = 0;
+ notfoundword = "";
+ lres = drr->lookup(notfoundword, checkWordNum,
+ rOffsetAndCounts);
+ assert(!lres);
+ assert(checkWordNum == 1);
+ }
+ if (!myrand.empty()) {
+ checkWordNum = 0;
+ notfoundword = myrand.front()._word;
+ notfoundword.append(1, '\1');
+ lres = drr->lookup(notfoundword, checkWordNum,
+ rOffsetAndCounts);
+ assert(!lres);
+ assert(checkWordNum == 2);
+ }
+ }
+ if (lastWordForcedCommon && !myrand.empty()) {
+ if (myrand.size() > 1) {
+ checkWordNum = 0;
+ notfoundword = myrand[myrand.size() - 2]._word;
+ notfoundword.append(1, '\1');
+ lres = drr->lookup(notfoundword, checkWordNum,
+ rOffsetAndCounts);
+ assert(!lres);
+ assert(checkWordNum == myrand.size());
+ }
+ checkWordNum = 0;
+ notfoundword = myrand[myrand.size() - 1]._word;
+ notfoundword.append(1, '\1');
+ lres = drr->lookup(notfoundword, checkWordNum,
+ rOffsetAndCounts);
+ assert(!lres);
+ assert(checkWordNum == myrand.size() + 1);
+ }
+ bool closeres = drr->close();
+ assert(closeres);
+ (void) closeres;
+ LOG(info, "%s: pagedict4 randverify OK", logname.c_str());
+ }
+}
+
+
+void
+PageDict4TestApp::testWords(void)
+{
+ ::testWords("smallchunkwordsempty", _rnd,
+ 1000000, 0,
+ 64, 80, 72, 64,
+ false, false, false);
+ ::testWords("smallchunkwordsempty2", _rnd,
+ 0, 0,
+ 64, 80, 72, 64,
+ false, false, false);
+ ::testWords("smallchunkwords", _rnd,
+ 1000000, 100,
+ 64, 80, 72, 64,
+ false, false, false);
+ ::testWords("smallchunkwordswithemptyword", _rnd,
+ 1000000, 100,
+ 64, 80, 72, 64,
+ true, false, false);
+ ::testWords("smallchunkwordswithcommonfirstword", _rnd,
+ 1000000, 100,
+ 64, 80, 72, 64,
+ false, true, false);
+ ::testWords("smallchunkwordswithcommonemptyfirstword", _rnd,
+ 1000000, 100,
+ 64, 80, 72, 64,
+ true, true, false);
+ ::testWords("smallchunkwordswithcommonlastword", _rnd,
+ 1000000, 100,
+ 64, 80, 72, 64,
+ false, false, true);
+#if 1
+ ::testWords("smallchunkwords2", _rnd,
+ 1000000, _stress ? 10000 : 100,
+ 64, 80, 72, 64,
+ _emptyWord, _firstWordForcedCommon, _lastWordForcedCommon);
+#endif
+#if 1
+ ::testWords("stdwords", _rnd,
+ 1000000, _stress ? 10000 : 100,
+ 262144, 80, 72, 64,
+ _emptyWord, _firstWordForcedCommon, _lastWordForcedCommon);
+#endif
+}
+
+FASTOS_MAIN(PageDict4TestApp);
diff --git a/searchlib/src/tests/document_store/.gitignore b/searchlib/src/tests/document_store/.gitignore
new file mode 100644
index 00000000000..bc9b97decab
--- /dev/null
+++ b/searchlib/src/tests/document_store/.gitignore
@@ -0,0 +1 @@
+searchlib_document_store_test_app
diff --git a/searchlib/src/tests/document_store/CMakeLists.txt b/searchlib/src/tests/document_store/CMakeLists.txt
new file mode 100644
index 00000000000..18b9e408fae
--- /dev/null
+++ b/searchlib/src/tests/document_store/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_document_store_test_app
+ SOURCES
+ document_store_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_document_store_test_app COMMAND searchlib_document_store_test_app)
diff --git a/searchlib/src/tests/document_store/FILES b/searchlib/src/tests/document_store/FILES
new file mode 100644
index 00000000000..b1dd2b610d0
--- /dev/null
+++ b/searchlib/src/tests/document_store/FILES
@@ -0,0 +1 @@
+document_store_test.cpp
diff --git a/searchlib/src/tests/document_store/document_store_test.cpp b/searchlib/src/tests/document_store/document_store_test.cpp
new file mode 100644
index 00000000000..e6a3d9b5c3d
--- /dev/null
+++ b/searchlib/src/tests/document_store/document_store_test.cpp
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/docstore/documentstore.h>
+#include <vespa/searchlib/docstore/cachestats.h>
+#include <vespa/document/repo/documenttyperepo.h>
+
+using namespace search;
+
+document::DocumentTypeRepo repo;
+
+struct NullDataStore : IDataStore {
+ NullDataStore() : IDataStore("") {}
+ ssize_t read(uint32_t, vespalib::DataBuffer &) const override { return 0; }
+ void read(const LidVector &, IBufferVisitor &) const override { }
+ void write(uint64_t, uint32_t, const void *, size_t) override {}
+ void remove(uint64_t, uint32_t) override {}
+ void flush(uint64_t) override {}
+
+ uint64_t initFlush(uint64_t syncToken) override { return syncToken; }
+
+ size_t memoryUsed() const override { return 0; }
+ size_t memoryMeta() const override { return 0; }
+ size_t getDiskFootprint() const override { return 0; }
+ size_t getDiskBloat() const override { return 0; }
+ uint64_t lastSyncToken() const override { return 0; }
+ uint64_t tentativeLastSyncToken() const override { return 0; }
+ fastos::TimeStamp getLastFlushTime() const override { return fastos::TimeStamp(); }
+ void accept(IDataStoreVisitor &, IDataStoreVisitorProgress &, bool) override { }
+ double getVisitCost() const override { return 1.0; }
+ virtual DataStoreStorageStats getStorageStats() const override {
+ return DataStoreStorageStats(0, 0, 0.0, 0, 0);
+ }
+ virtual std::vector<DataStoreFileChunkStats>
+ getFileChunkStats() const override {
+ std::vector<DataStoreFileChunkStats> result;
+ return result;
+ }
+};
+
+TEST_FFF("require that uncache docstore lookups are counted",
+ DocumentStore::Config(document::CompressionConfig::NONE, 0, 0),
+ NullDataStore(), DocumentStore(f1, f2))
+{
+ EXPECT_EQUAL(0u, f3.getCacheStats().misses);
+ f3.read(1, repo);
+ EXPECT_EQUAL(1u, f3.getCacheStats().misses);
+}
+
+TEST_FFF("require that cached docstore lookups are counted",
+ DocumentStore::Config(document::CompressionConfig::NONE, 100000, 100),
+ NullDataStore(), DocumentStore(f1, f2))
+{
+ EXPECT_EQUAL(0u, f3.getCacheStats().misses);
+ f3.read(1, repo);
+ EXPECT_EQUAL(1u, f3.getCacheStats().misses);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/document_store/visitor/.gitignore b/searchlib/src/tests/document_store/visitor/.gitignore
new file mode 100644
index 00000000000..c97186f86d7
--- /dev/null
+++ b/searchlib/src/tests/document_store/visitor/.gitignore
@@ -0,0 +1 @@
+searchlib_document_store_visitor_test_app
diff --git a/searchlib/src/tests/document_store/visitor/CMakeLists.txt b/searchlib/src/tests/document_store/visitor/CMakeLists.txt
new file mode 100644
index 00000000000..976463bdfe8
--- /dev/null
+++ b/searchlib/src/tests/document_store/visitor/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_document_store_visitor_test_app
+ SOURCES
+ document_store_visitor_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_document_store_visitor_test_app COMMAND searchlib_document_store_visitor_test_app)
diff --git a/searchlib/src/tests/document_store/visitor/DESC b/searchlib/src/tests/document_store/visitor/DESC
new file mode 100644
index 00000000000..03e9c6681ad
--- /dev/null
+++ b/searchlib/src/tests/document_store/visitor/DESC
@@ -0,0 +1 @@
+Document store visiting test.
diff --git a/searchlib/src/tests/document_store/visitor/FILES b/searchlib/src/tests/document_store/visitor/FILES
new file mode 100644
index 00000000000..412f9879bb5
--- /dev/null
+++ b/searchlib/src/tests/document_store/visitor/FILES
@@ -0,0 +1 @@
+document_store_visitor_test.cpp
diff --git a/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp b/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp
new file mode 100644
index 00000000000..1898fa35a29
--- /dev/null
+++ b/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp
@@ -0,0 +1,466 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("document_store_visitor_test");
+
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/docstore/documentstore.h>
+#include <vespa/searchlib/docstore/logdocumentstore.h>
+#include <vespa/searchlib/docstore/cachestats.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/document/repo/documenttyperepo.h>
+#include <vespa/document/repo/configbuilder.h>
+#include <vespa/vespalib/io/fileutil.h>
+
+using namespace search;
+
+using vespalib::string;
+using document::DataType;
+using document::Document;
+using document::DocumentId;
+using document::DocumentType;
+using document::DocumentTypeRepo;
+using vespalib::asciistream;
+using index::DummyFileHeaderContext;
+
+namespace
+{
+
+const string doc_type_name = "test";
+const string header_name = doc_type_name + ".header";
+const string body_name = doc_type_name + ".body";
+
+document::DocumenttypesConfig
+makeDocTypeRepoConfig(void)
+{
+ const int32_t doc_type_id = 787121340;
+ document::config_builder::DocumenttypesConfigBuilderHelper builder;
+ builder.document(doc_type_id,
+ doc_type_name,
+ document::config_builder::Struct(header_name),
+ document::config_builder::Struct(body_name).
+ addField("main", DataType::T_STRING).
+ addField("extra", DataType::T_STRING));
+ return builder.config();
+}
+
+
+Document::UP
+makeDoc(const DocumentTypeRepo &repo, uint32_t i, bool before)
+{
+ asciistream idstr;
+ idstr << "id:test:test:: " << i;
+ DocumentId id(idstr.str());
+ const DocumentType *docType = repo.getDocumentType(doc_type_name);
+ Document::UP doc(new Document(*docType, id));
+ ASSERT_TRUE(doc.get());
+ asciistream mainstr;
+ mainstr << "static text" << i << " body something";
+ for (uint32_t j = 0; j < 10; ++j) {
+ mainstr << (j + i * 1000) << " ";
+ }
+ mainstr << " and end field";
+ doc->set("main", mainstr.c_str());
+ if (!before) {
+ doc->set("extra", "foo");
+ }
+
+ return doc;
+}
+
+}
+
+class MyTlSyncer : public transactionlog::SyncProxy
+{
+ SerialNum _syncedTo;
+
+public:
+ MyTlSyncer(void)
+ : _syncedTo(0)
+ {
+ }
+
+ void
+ sync(SerialNum syncTo)
+ {
+ _syncedTo = syncTo;
+ }
+};
+
+
+class MyVisitorBase
+{
+public:
+ DocumentTypeRepo &_repo;
+ uint32_t _visitCount;
+ uint32_t _visitRmCount;
+ uint32_t _docIdLimit;
+ BitVector::UP _valid;
+ bool _before;
+
+ MyVisitorBase(DocumentTypeRepo &repo, uint32_t docIdLimit, bool before);
+};
+
+MyVisitorBase::MyVisitorBase(DocumentTypeRepo &repo,
+ uint32_t docIdLimit,
+ bool before)
+ : _repo(repo),
+ _visitCount(0u),
+ _visitRmCount(0u),
+ _docIdLimit(docIdLimit),
+ _valid(BitVector::create(docIdLimit)),
+ _before(before)
+{
+}
+
+
+class MyVisitor : public MyVisitorBase,
+ public IDocumentStoreReadVisitor
+{
+public:
+ using MyVisitorBase::MyVisitorBase;
+
+ virtual void
+ visit(uint32_t lid, const Document &doc);
+
+ virtual void
+ visit(uint32_t lid);
+};
+
+
+void
+MyVisitor::visit(uint32_t lid, const Document &doc)
+{
+ ++_visitCount;
+ assert(lid < _docIdLimit);
+ Document::UP expDoc(makeDoc(_repo, lid, _before));
+ EXPECT_TRUE(*expDoc == doc);
+ _valid->slowSetBit(lid);
+}
+
+
+void
+MyVisitor::visit(uint32_t lid)
+{
+ ++_visitRmCount;
+ assert(lid < _docIdLimit);
+ _valid->slowClearBit(lid);
+}
+
+
+class MyRewriteVisitor : public MyVisitorBase,
+ public IDocumentStoreRewriteVisitor
+{
+public:
+ using MyVisitorBase::MyVisitorBase;
+
+ virtual void
+ visit(uint32_t lid, Document &doc);
+};
+
+
+void
+MyRewriteVisitor::visit(uint32_t lid, Document &doc)
+{
+ ++_visitCount;
+ assert(lid < _docIdLimit);
+ Document::UP expDoc(makeDoc(_repo, lid, _before));
+ EXPECT_TRUE(*expDoc == doc);
+ _valid->slowSetBit(lid);
+ doc.set("extra", "foo");
+}
+
+
+class MyVisitorProgress : public IDocumentStoreVisitorProgress
+{
+public:
+ double _progress;
+ uint32_t _updates;
+
+ MyVisitorProgress();
+
+ virtual void
+ updateProgress(double progress);
+
+ virtual double
+ getProgress() const;
+};
+
+
+MyVisitorProgress::MyVisitorProgress()
+ : _progress(0.0),
+ _updates(0)
+{
+}
+
+
+void
+MyVisitorProgress::updateProgress(double progress)
+{
+ EXPECT_TRUE(progress >= _progress);
+ _progress = progress;
+ ++_updates;
+ LOG(info,
+ "updateProgress(%6.2f), %u updates",
+ progress, _updates);
+}
+
+
+double
+MyVisitorProgress::getProgress() const
+{
+ return _progress;
+}
+
+
+struct Fixture
+{
+ string _baseDir;
+ DocumentTypeRepo _repo;
+ LogDocumentStore::Config _storeConfig;
+ vespalib::ThreadStackExecutor _executor;
+ DummyFileHeaderContext _fileHeaderContext;
+ MyTlSyncer _tlSyncer;
+ std::unique_ptr<LogDocumentStore> _store;
+ uint64_t _syncToken;
+ uint32_t _docIdLimit;
+ BitVector::UP _valid;
+
+ Fixture();
+
+ ~Fixture();
+
+ Document::UP
+ makeDoc(uint32_t i);
+
+ void
+ resetDocStore();
+
+ void
+ mkdir();
+
+ void
+ rmdir();
+
+ void
+ setDocIdLimit(uint32_t docIdLimit);
+
+ void
+ put(const Document &doc, uint32_t lid);
+
+ void
+ remove(uint32_t lid);
+
+ void
+ flush();
+
+ void
+ populate(uint32_t low, uint32_t high, uint32_t docIdLimit);
+
+ void
+ applyRemoves(uint32_t rmDocs);
+
+ void
+ checkRemovePostCond(uint32_t numDocs,
+ uint32_t docIdLimit,
+ uint32_t rmDocs,
+ bool before);
+};
+
+Fixture::Fixture()
+ : _baseDir("visitor"),
+ _repo(makeDocTypeRepoConfig()),
+ _storeConfig(DocumentStore::
+ Config(document::CompressionConfig::NONE, 0, 0),
+ LogDataStore::
+ Config(50000, 0.2, 3.0, 0.2, 1, true,
+ WriteableFileChunk::Config(
+ document::CompressionConfig(),
+ 16384,
+ 64))),
+ _executor(_storeConfig.getLogConfig().getNumThreads(), 128 * 1024),
+ _fileHeaderContext(),
+ _tlSyncer(),
+ _store(),
+ _syncToken(0u),
+ _docIdLimit(0u),
+ _valid(BitVector::create(0u))
+{
+ rmdir();
+ mkdir();
+ resetDocStore();
+}
+
+
+Fixture::~Fixture()
+{
+ _store.reset();
+ rmdir();
+}
+
+Document::UP
+Fixture::makeDoc(uint32_t i)
+{
+ return ::makeDoc(_repo, i, true);
+}
+
+void
+Fixture::resetDocStore()
+{
+ _store.reset(new LogDocumentStore(_executor,
+ _baseDir,
+ _storeConfig,
+ GrowStrategy(),
+ TuneFileSummary(),
+ _fileHeaderContext,
+ _tlSyncer,
+ NULL));
+}
+
+
+void
+Fixture::rmdir()
+{
+ vespalib::rmdir(_baseDir, true);
+}
+
+void
+Fixture::mkdir()
+{
+ vespalib::mkdir(_baseDir, false);
+}
+
+
+void
+Fixture::setDocIdLimit(uint32_t docIdLimit)
+{
+ _docIdLimit = docIdLimit;
+ _valid->resize(_docIdLimit);
+}
+
+void
+Fixture::put(const Document &doc, uint32_t lid)
+{
+ ++_syncToken;
+ assert(lid < _docIdLimit);
+ _store->write(_syncToken, doc, lid);
+ _valid->slowSetBit(lid);
+}
+
+
+void
+Fixture::remove(uint32_t lid)
+{
+ ++_syncToken;
+ assert(lid < _docIdLimit);
+ _store->remove(_syncToken, lid);
+ _valid->slowClearBit(lid);
+}
+
+
+void
+Fixture::flush()
+{
+ _store->initFlush(_syncToken);
+ _store->flush(_syncToken);
+}
+
+
+void
+Fixture::populate(uint32_t low, uint32_t high, uint32_t docIdLimit)
+{
+ setDocIdLimit(docIdLimit);
+ for (uint32_t lid = low; lid < high; ++lid) {
+ Document::UP doc = makeDoc(lid);
+ put(*doc, lid);
+ }
+}
+
+
+void
+Fixture::applyRemoves(uint32_t rmDocs)
+{
+ for (uint32_t lid = 20; lid < 20 + rmDocs; ++lid) {
+ remove(lid);
+ }
+ put(*makeDoc(25), 25);
+ remove(25);
+ put(*makeDoc(25), 25);
+}
+
+
+void
+Fixture::checkRemovePostCond(uint32_t numDocs,
+ uint32_t docIdLimit,
+ uint32_t rmDocs,
+ bool before)
+{
+ MyVisitor visitor(_repo, docIdLimit, before);
+ MyVisitorProgress visitorProgress;
+ EXPECT_EQUAL(0.0, visitorProgress.getProgress());
+ EXPECT_EQUAL(0u, visitorProgress._updates);
+ _store->accept(visitor, visitorProgress, _repo);
+ EXPECT_EQUAL(numDocs - rmDocs + 1, visitor._visitCount);
+ EXPECT_EQUAL(rmDocs - 1, visitor._visitRmCount);
+ EXPECT_EQUAL(1.0, visitorProgress.getProgress());
+ EXPECT_NOT_EQUAL(0u, visitorProgress._updates);
+ EXPECT_TRUE(*_valid == *visitor._valid);
+}
+
+
+TEST_F("require that basic visit works", Fixture())
+{
+ uint32_t numDocs = 3000;
+ uint32_t docIdLimit = numDocs + 1;
+ f.populate(1, docIdLimit, docIdLimit);
+ f.flush();
+ MyVisitor visitor(f._repo, docIdLimit, true);
+ MyVisitorProgress visitorProgress;
+ EXPECT_EQUAL(0.0, visitorProgress.getProgress());
+ EXPECT_EQUAL(0u, visitorProgress._updates);
+ f._store->accept(visitor, visitorProgress, f._repo);
+ EXPECT_EQUAL(numDocs, visitor._visitCount);
+ EXPECT_EQUAL(0u, visitor._visitRmCount);
+ EXPECT_EQUAL(1.0, visitorProgress.getProgress());
+ EXPECT_NOT_EQUAL(0u, visitorProgress._updates);
+ EXPECT_TRUE(*f._valid == *visitor._valid);
+}
+
+
+TEST_F("require that visit with remove works", Fixture())
+{
+ uint32_t numDocs = 1000;
+ uint32_t docIdLimit = numDocs + 1;
+ f.populate(1, docIdLimit, docIdLimit);
+ uint32_t rmDocs = 20;
+ f.applyRemoves(rmDocs);
+ f.flush();
+ f.checkRemovePostCond(numDocs, docIdLimit, rmDocs, true);
+}
+
+TEST_F("require that visit with rewrite and remove works", Fixture())
+{
+ uint32_t numDocs = 1000;
+ uint32_t docIdLimit = numDocs + 1;
+ f.populate(1, docIdLimit, docIdLimit);
+ uint32_t rmDocs = 20;
+ f.applyRemoves(rmDocs);
+ f.flush();
+ f.checkRemovePostCond(numDocs, docIdLimit, rmDocs, true);
+ {
+ MyRewriteVisitor visitor(f._repo, docIdLimit, true);
+ MyVisitorProgress visitorProgress;
+ EXPECT_EQUAL(0.0, visitorProgress.getProgress());
+ EXPECT_EQUAL(0u, visitorProgress._updates);
+ f._store->accept(visitor, visitorProgress, f._repo);
+ EXPECT_EQUAL(numDocs - rmDocs + 1, visitor._visitCount);
+ EXPECT_EQUAL(1.0, visitorProgress.getProgress());
+ EXPECT_NOT_EQUAL(0u, visitorProgress._updates);
+ EXPECT_TRUE(*f._valid == *visitor._valid);
+ f.flush();
+ }
+ f.checkRemovePostCond(numDocs, docIdLimit, rmDocs, false);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/engine/docsumapi/.gitignore b/searchlib/src/tests/engine/docsumapi/.gitignore
new file mode 100644
index 00000000000..1b38a4ff745
--- /dev/null
+++ b/searchlib/src/tests/engine/docsumapi/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+docsumapi_test
+searchlib_docsumapi_test_app
diff --git a/searchlib/src/tests/engine/docsumapi/CMakeLists.txt b/searchlib/src/tests/engine/docsumapi/CMakeLists.txt
new file mode 100644
index 00000000000..a8fbe70de4b
--- /dev/null
+++ b/searchlib/src/tests/engine/docsumapi/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_docsumapi_test_app
+ SOURCES
+ docsumapi_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_docsumapi_test_app COMMAND searchlib_docsumapi_test_app)
diff --git a/searchlib/src/tests/engine/docsumapi/DESC b/searchlib/src/tests/engine/docsumapi/DESC
new file mode 100644
index 00000000000..fa9d72e98be
--- /dev/null
+++ b/searchlib/src/tests/engine/docsumapi/DESC
@@ -0,0 +1 @@
+docsumapi test. Take a look at docsumapi.cpp for details.
diff --git a/searchlib/src/tests/engine/docsumapi/FILES b/searchlib/src/tests/engine/docsumapi/FILES
new file mode 100644
index 00000000000..3e2e2e636be
--- /dev/null
+++ b/searchlib/src/tests/engine/docsumapi/FILES
@@ -0,0 +1 @@
+docsumapi.cpp
diff --git a/searchlib/src/tests/engine/docsumapi/docsumapi_test.cpp b/searchlib/src/tests/engine/docsumapi/docsumapi_test.cpp
new file mode 100644
index 00000000000..d96295bb7ad
--- /dev/null
+++ b/searchlib/src/tests/engine/docsumapi/docsumapi_test.cpp
@@ -0,0 +1,185 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("docsumapi_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/packets.h>
+#include <vespa/searchlib/engine/docsumapi.h>
+#include <vespa/searchlib/engine/packetconverter.h>
+
+using namespace search::engine;
+using namespace search::fs4transport;
+
+namespace {
+
+// light-weight network hop simulation
+template <typename T> void copyPacket(T &src, T &dst) {
+ FNET_DataBuffer buf;
+ src.Encode(&buf);
+ dst.Decode(&buf, buf.GetDataLen());
+}
+
+} // namespace <unnamed>
+
+class Test : public vespalib::TestApp
+{
+public:
+ void convertToRequest();
+ void convertFromReply();
+ int Main();
+};
+
+document::GlobalId gid0("aaaaaaaaaaaa");
+document::GlobalId gid1("bbbbbbbbbbbb");
+
+void
+Test::convertToRequest()
+{
+ const string sessionId("qrserver.0.XXXXXXXXXXXXX.0");
+
+ FS4Packet_GETDOCSUMSX src;
+ src.setTimeout(fastos::TimeStamp(4*fastos::TimeStamp::MS));
+ src._features |= GDF_RANKP_QFLAGS;
+ src.setRanking("seven");
+ src._qflags = 5u;
+ src._features |= GDF_RESCLASSNAME;
+ src.setResultClassName("resclass");
+ src._features |= GDF_PROPERTIES;
+ src._propsVector.resize(3);
+ src._propsVector[0].allocEntries(2);
+ src._propsVector[0].setName("feature", strlen("feature"));
+ src._propsVector[0].setKey(0, "p1k1", strlen("p1k1"));
+ src._propsVector[0].setValue(0, "p1v1", strlen("p1v1"));
+ src._propsVector[0].setKey(1, "p1k2", strlen("p1k2"));
+ src._propsVector[0].setValue(1, "p1v2", strlen("p1v2"));
+ src._propsVector[1].allocEntries(2);
+ src._propsVector[1].setName("caches", strlen("caches"));
+ src._propsVector[1].setKey(0, "p2k1", strlen("p2k1"));
+ src._propsVector[1].setValue(0, "p2v1", strlen("p2v1"));
+ src._propsVector[1].setKey(1, "p2k2", strlen("p2k2"));
+ src._propsVector[1].setValue(1, "p2v2", strlen("p2v2"));
+ src._propsVector[2].allocEntries(1);
+ src._propsVector[2].setName("rank", strlen("rank"));
+ src._propsVector[2].setKey(0, "sessionId", strlen("sessionId"));
+ src._propsVector[2].setValue(0, sessionId.c_str(), sessionId.size());
+ src._features |= GDF_QUERYSTACK;
+ src._stackItems = 14u;
+ src.setStackDump("stackdump");
+ src._features |= GDF_LOCATION;
+ src.setLocation("location");
+ src._features |= GDF_MLD;
+ src.AllocateDocIDs(2);
+ src._docid[0]._gid = gid0;
+ src._docid[0]._partid = 5;
+ src._docid[1]._gid = gid1;
+ src._docid[1]._partid = 6;
+
+ { // full copy
+ FS4Packet_GETDOCSUMSX cpy;
+ copyPacket(src, cpy);
+
+ DocsumRequest dst;
+ PacketConverter::toDocsumRequest(cpy, dst);
+ EXPECT_EQUAL((dst.getTimeOfDoom() - dst.getStartTime()).ms(), 4u);
+ EXPECT_EQUAL(dst.ranking, "seven");
+ EXPECT_EQUAL(dst.queryFlags, 5u);
+ EXPECT_EQUAL(dst.resultClassName, "resclass");
+ EXPECT_EQUAL(dst.propertiesMap.size(), 3u);
+ EXPECT_EQUAL(dst.propertiesMap.featureOverrides().lookup("p1k1").get(), std::string("p1v1"));
+ EXPECT_EQUAL(dst.propertiesMap.featureOverrides().lookup("p1k2").get(), std::string("p1v2"));
+ EXPECT_EQUAL(dst.propertiesMap.cacheProperties().lookup("p2k1").get(), std::string("p2v1"));
+ EXPECT_EQUAL(dst.propertiesMap.cacheProperties().lookup("p2k2").get(), std::string("p2v2"));
+ EXPECT_EQUAL(dst.propertiesMap.matchProperties().lookup("p3k1").get(), std::string(""));
+ EXPECT_EQUAL(std::string(&dst.stackDump[0], dst.stackDump.size()), "stackdump");
+ EXPECT_EQUAL(dst.location, "location");
+ EXPECT_EQUAL(dst._flags, 0u);
+ EXPECT_EQUAL(dst.hits.size(), 2u);
+ EXPECT_EQUAL(dst.hits[0].docid, 0u);
+ EXPECT_TRUE(dst.hits[0].gid == gid0);
+ EXPECT_EQUAL(dst.hits[0].path, 5u);
+ EXPECT_EQUAL(dst.hits[1].docid, 0u);
+ EXPECT_TRUE(dst.hits[1].gid == gid1);
+ EXPECT_EQUAL(dst.hits[1].path, 6u);
+ EXPECT_EQUAL(sessionId,
+ string(&dst.sessionId[0], dst.sessionId.size()));
+ }
+ { // without datetime
+ FS4Packet_GETDOCSUMSX cpy;
+ copyPacket(src, cpy);
+
+ DocsumRequest dst;
+ PacketConverter::toDocsumRequest(cpy, dst);
+ }
+ { // without mld
+ FS4Packet_GETDOCSUMSX cpy;
+ copyPacket(src, cpy);
+ cpy._features &= ~GDF_MLD;
+
+ DocsumRequest dst;
+ PacketConverter::toDocsumRequest(cpy, dst);
+ EXPECT_EQUAL(dst.useWideHits, false);
+ EXPECT_EQUAL(dst.hits.size(), 2u);
+ EXPECT_EQUAL(dst.hits[0].docid, 0u);
+ EXPECT_TRUE(dst.hits[0].gid == gid0);
+ EXPECT_EQUAL(dst.hits[1].docid, 0u);
+ EXPECT_TRUE(dst.hits[1].gid == gid1);
+ }
+ { // with ignore row flag
+ FS4Packet_GETDOCSUMSX tcpy;
+ copyPacket(src, tcpy);
+ tcpy._features |= GDF_FLAGS;
+ tcpy._flags = GDFLAG_IGNORE_ROW;
+ FS4Packet_GETDOCSUMSX cpy;
+ copyPacket(tcpy, cpy);
+ DocsumRequest dst;
+ PacketConverter::toDocsumRequest(cpy, dst);
+ EXPECT_EQUAL(dst._flags, static_cast<uint32_t>(GDFLAG_IGNORE_ROW));
+ }
+}
+
+void
+Test::convertFromReply()
+{
+ DocsumReply src;
+ src.docsums.resize(2);
+ src.docsums[0].docid = 1;
+ src.docsums[0].gid = gid0;
+ src.docsums[0].data.resize(2);
+ src.docsums[0].data.str()[0] = 5;
+ src.docsums[0].data.str()[1] = 6;
+ src.docsums[1].docid = 2;
+ src.docsums[1].gid = gid1;
+ src.docsums[1].data.resize(3);
+ src.docsums[1].data.str()[0] = 7;
+ src.docsums[1].data.str()[1] = 8;
+ src.docsums[1].data.str()[2] = 9;
+
+ { // test first
+ FS4Packet_DOCSUM dst;
+ PacketConverter::fromDocsumReplyElement(src.docsums[0], dst);
+ EXPECT_EQUAL(dst.getGid(), gid0);
+ EXPECT_EQUAL(dst.getBuf().size(), 2u);
+ EXPECT_EQUAL(dst.getBuf().c_str()[0], 5);
+ EXPECT_EQUAL(dst.getBuf().c_str()[1], 6);
+ }
+ { // test second
+ FS4Packet_DOCSUM dst;
+ PacketConverter::fromDocsumReplyElement(src.docsums[1], dst);
+ EXPECT_EQUAL(dst.getGid(), gid1);
+ EXPECT_EQUAL(dst.getBuf().size(), 3u);
+ EXPECT_EQUAL(dst.getBuf().c_str()[0], 7);
+ EXPECT_EQUAL(dst.getBuf().c_str()[1], 8);
+ EXPECT_EQUAL(dst.getBuf().c_str()[2], 9);
+ }
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("docsumapi_test");
+ convertToRequest();
+ convertFromReply();
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/engine/monitorapi/.gitignore b/searchlib/src/tests/engine/monitorapi/.gitignore
new file mode 100644
index 00000000000..66fc005087f
--- /dev/null
+++ b/searchlib/src/tests/engine/monitorapi/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+monitorapi_test
+searchlib_monitorapi_test_app
diff --git a/searchlib/src/tests/engine/monitorapi/CMakeLists.txt b/searchlib/src/tests/engine/monitorapi/CMakeLists.txt
new file mode 100644
index 00000000000..f78a8e04fd1
--- /dev/null
+++ b/searchlib/src/tests/engine/monitorapi/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_monitorapi_test_app
+ SOURCES
+ monitorapi_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_monitorapi_test_app COMMAND searchlib_monitorapi_test_app)
diff --git a/searchlib/src/tests/engine/monitorapi/DESC b/searchlib/src/tests/engine/monitorapi/DESC
new file mode 100644
index 00000000000..882636f1952
--- /dev/null
+++ b/searchlib/src/tests/engine/monitorapi/DESC
@@ -0,0 +1 @@
+monitorapi test. Take a look at monitorapi.cpp for details.
diff --git a/searchlib/src/tests/engine/monitorapi/FILES b/searchlib/src/tests/engine/monitorapi/FILES
new file mode 100644
index 00000000000..16ad6789632
--- /dev/null
+++ b/searchlib/src/tests/engine/monitorapi/FILES
@@ -0,0 +1 @@
+monitorapi.cpp
diff --git a/searchlib/src/tests/engine/monitorapi/monitorapi_test.cpp b/searchlib/src/tests/engine/monitorapi/monitorapi_test.cpp
new file mode 100644
index 00000000000..0df52cbe0d8
--- /dev/null
+++ b/searchlib/src/tests/engine/monitorapi/monitorapi_test.cpp
@@ -0,0 +1,126 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("monitorapi_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/packets.h>
+#include <vespa/searchlib/engine/monitorapi.h>
+#include <vespa/searchlib/engine/packetconverter.h>
+
+using namespace search::engine;
+using namespace search::fs4transport;
+
+namespace {
+
+bool checkFeature(uint32_t features, uint32_t mask) {
+ return ((features & mask) != 0);
+}
+
+bool checkNotFeature(uint32_t features, uint32_t mask) {
+ return !checkFeature(features, mask);
+}
+
+// light-weight network hop simulation
+template <typename T> void copyPacket(T &src, T &dst) {
+ FNET_DataBuffer buf;
+ src.Encode(&buf);
+ dst.Decode(&buf, buf.GetDataLen());
+}
+
+} // namespace <unnamed>
+
+class Test : public vespalib::TestApp
+{
+public:
+ void convertToRequest();
+ void convertFromReply();
+ int Main();
+};
+
+void
+Test::convertToRequest()
+{
+ FS4Packet_MONITORQUERYX src;
+ src._features |= MQF_QFLAGS;
+ src._qflags = 1u;
+
+ { // copy all
+ FS4Packet_MONITORQUERYX cpy;
+ copyPacket(src, cpy);
+
+ MonitorRequest dst;
+ PacketConverter::toMonitorRequest(cpy, dst);
+ EXPECT_EQUAL(dst.flags, 1u);
+ }
+}
+
+void
+Test::convertFromReply()
+{
+ MonitorReply src;
+ src.mld = true;
+ src.partid = 1u;
+ src.timestamp = 2u;
+ src.totalNodes = 3u;
+ src.activeNodes = 4u;
+ src.totalParts = 5u;
+ src.activeParts = 6u;
+ src.flags = 7u;
+ src.activeDocs = 8u;
+ src.activeDocsRequested = true;
+
+ { // full copy
+ MonitorReply cpy = src;
+
+ FS4Packet_MONITORRESULTX dst;
+ PacketConverter::fromMonitorReply(cpy, dst);
+ EXPECT_EQUAL(dst._partid, 1u);
+ EXPECT_EQUAL(dst._timestamp, 2u);
+ EXPECT_TRUE(checkFeature(dst._features, MRF_MLD));
+ EXPECT_EQUAL(dst._totalNodes, 3u);
+ EXPECT_EQUAL(dst._activeNodes, 4u);
+ EXPECT_EQUAL(dst._totalParts, 5u);
+ EXPECT_EQUAL(dst._activeParts, 6u);
+ EXPECT_TRUE(checkFeature(dst._features, MRF_RFLAGS));
+ EXPECT_EQUAL(dst._rflags, 7u);
+ EXPECT_EQUAL(dst._activeDocs, 8u);
+ EXPECT_TRUE(checkFeature(dst._features, MRF_ACTIVEDOCS));
+ }
+ { // non-mld
+ MonitorReply cpy = src;
+ cpy.mld = false;
+
+ FS4Packet_MONITORRESULTX dst;
+ PacketConverter::fromMonitorReply(cpy, dst);
+ EXPECT_TRUE(checkNotFeature(dst._features, MRF_MLD));
+ }
+ { // without flags
+ MonitorReply cpy = src;
+ cpy.flags = 0;
+
+ FS4Packet_MONITORRESULTX dst;
+ PacketConverter::fromMonitorReply(cpy, dst);
+ EXPECT_TRUE(checkNotFeature(dst._features, MRF_RFLAGS));
+ EXPECT_EQUAL(dst._rflags, 0u);
+ }
+ { // without activedocs
+ MonitorReply cpy = src;
+ cpy.activeDocsRequested = false;
+
+ FS4Packet_MONITORRESULTX dst;
+ PacketConverter::fromMonitorReply(cpy, dst);
+ EXPECT_TRUE(checkNotFeature(dst._features, MRF_ACTIVEDOCS));
+ EXPECT_EQUAL(dst._activeDocs, 0u);
+ }
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("monitorapi_test");
+ convertToRequest();
+ convertFromReply();
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/engine/searchapi/.gitignore b/searchlib/src/tests/engine/searchapi/.gitignore
new file mode 100644
index 00000000000..92089e63cdd
--- /dev/null
+++ b/searchlib/src/tests/engine/searchapi/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+searchapi_test
+searchlib_searchapi_test_app
diff --git a/searchlib/src/tests/engine/searchapi/CMakeLists.txt b/searchlib/src/tests/engine/searchapi/CMakeLists.txt
new file mode 100644
index 00000000000..89d1b8197a5
--- /dev/null
+++ b/searchlib/src/tests/engine/searchapi/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_searchapi_test_app
+ SOURCES
+ searchapi_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_searchapi_test_app COMMAND searchlib_searchapi_test_app)
diff --git a/searchlib/src/tests/engine/searchapi/DESC b/searchlib/src/tests/engine/searchapi/DESC
new file mode 100644
index 00000000000..b006841d75d
--- /dev/null
+++ b/searchlib/src/tests/engine/searchapi/DESC
@@ -0,0 +1 @@
+searchapi test. Take a look at searchapi.cpp for details.
diff --git a/searchlib/src/tests/engine/searchapi/FILES b/searchlib/src/tests/engine/searchapi/FILES
new file mode 100644
index 00000000000..806f04bbe4c
--- /dev/null
+++ b/searchlib/src/tests/engine/searchapi/FILES
@@ -0,0 +1 @@
+searchapi.cpp
diff --git a/searchlib/src/tests/engine/searchapi/searchapi_test.cpp b/searchlib/src/tests/engine/searchapi/searchapi_test.cpp
new file mode 100644
index 00000000000..cd040bfaeac
--- /dev/null
+++ b/searchlib/src/tests/engine/searchapi/searchapi_test.cpp
@@ -0,0 +1,267 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("searchapi_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/packets.h>
+#include <vespa/searchlib/engine/searchapi.h>
+#include <vespa/searchlib/engine/packetconverter.h>
+
+using namespace search::engine;
+using namespace search::fs4transport;
+
+namespace {
+
+bool checkFeature(uint32_t features, uint32_t mask) {
+ return ((features & mask) != 0);
+}
+
+bool checkNotFeature(uint32_t features, uint32_t mask) {
+ return !checkFeature(features, mask);
+}
+
+// light-weight network hop simulation
+template <typename T> void copyPacket(T &src, T &dst) {
+ FNET_DataBuffer buf;
+ src.Encode(&buf);
+ dst.Decode(&buf, buf.GetDataLen());
+}
+
+} // namespace <unnamed>
+
+class Test : public vespalib::TestApp
+{
+public:
+ void propertyNames();
+ void convertToRequest();
+ void convertFromReply();
+ int Main();
+};
+
+void
+Test::propertyNames()
+{
+ EXPECT_EQUAL(search::MapNames::RANK, "rank");
+ EXPECT_EQUAL(search::MapNames::FEATURE, "feature");
+ EXPECT_EQUAL(search::MapNames::HIGHLIGHTTERMS, "highlightterms");
+ EXPECT_EQUAL(search::MapNames::MATCH, "match");
+ EXPECT_EQUAL(search::MapNames::CACHES, "caches");
+}
+
+void
+Test::convertToRequest()
+{
+ FS4Packet_QUERYX src;
+ src._offset = 2u;
+ src._maxhits = 3u;
+ src.setTimeout(fastos::TimeStamp(4*fastos::TimeStamp::MS));
+ src._qflags = 5u;
+ src._features |= QF_RANKP;
+ src.setRanking("seven");
+ src._features |= QF_PROPERTIES;
+ src._propsVector.resize(2);
+ src._propsVector[0].allocEntries(2);
+ src._propsVector[0].setName("feature", strlen("feature"));
+ src._propsVector[0].setKey(0, "p1k1", strlen("p1k1"));
+ src._propsVector[0].setValue(0, "p1v1", strlen("p1v1"));
+ src._propsVector[0].setKey(1, "p1k2", strlen("p1k2"));
+ src._propsVector[0].setValue(1, "p1v2", strlen("p1v2"));
+ src._propsVector[1].allocEntries(2);
+ src._propsVector[1].setName("caches", strlen("caches"));
+ src._propsVector[1].setKey(0, "p2k1", strlen("p2k1"));
+ src._propsVector[1].setValue(0, "p2v1", strlen("p2v1"));
+ src._propsVector[1].setKey(1, "p2k2", strlen("p2k2"));
+ src._propsVector[1].setValue(1, "p2v2", strlen("p2v2"));
+ src._features |= QF_SORTSPEC;
+ src.setSortSpec("sortspec");
+ src._features |= QF_AGGRSPEC;
+ src.setAggrSpec("aggrspec");
+ src._features |= QF_GROUPSPEC;
+ src.setGroupSpec("groupspec");
+ src._features |= QF_SESSIONID;
+ src.setSessionId("sessionid");
+ src._features |= QF_LOCATION;
+ src.setLocation("location");
+ src._features |= QF_PARSEDQUERY;
+ src._numStackItems = 14u;
+ src.setStackDump("stackdump");
+
+ { // full copy
+ FS4Packet_QUERYX cpy;
+ copyPacket(src, cpy);
+
+ SearchRequest dst;
+ PacketConverter::toSearchRequest(cpy, dst);
+ EXPECT_EQUAL(dst.offset, 2u);
+ EXPECT_EQUAL(dst.maxhits, 3u);
+ EXPECT_EQUAL((dst.getTimeOfDoom() - dst.getStartTime()).ms(), 4u);
+ EXPECT_EQUAL(dst.queryFlags, 5u);
+ EXPECT_EQUAL(vespalib::string("seven"), dst.ranking);
+ EXPECT_EQUAL(dst.propertiesMap.size(), 2u);
+ EXPECT_EQUAL(dst.propertiesMap.featureOverrides().lookup("p1k1").get(), std::string("p1v1"));
+ EXPECT_EQUAL(dst.propertiesMap.featureOverrides().lookup("p1k2").get(), std::string("p1v2"));
+ EXPECT_EQUAL(dst.propertiesMap.cacheProperties().lookup("p2k1").get(), std::string("p2v1"));
+ EXPECT_EQUAL(dst.propertiesMap.cacheProperties().lookup("p2k2").get(), std::string("p2v2"));
+ EXPECT_EQUAL(dst.propertiesMap.matchProperties().lookup("p3k1").get(), std::string(""));
+ EXPECT_EQUAL(dst.sortSpec, "sortspec");
+ EXPECT_EQUAL(std::string(&dst.groupSpec[0], dst.groupSpec.size()), "groupspec");
+ EXPECT_EQUAL(std::string(&dst.sessionId[0], dst.sessionId.size()), "sessionid");
+ EXPECT_EQUAL(dst.location, "location");
+ EXPECT_EQUAL(dst.stackItems, 14u);
+ EXPECT_EQUAL(std::string(&dst.stackDump[0], dst.stackDump.size()), "stackdump");
+ }
+ { // without datetime
+ FS4Packet_QUERYX cpy;
+ copyPacket(src, cpy);
+
+ SearchRequest dst;
+ PacketConverter::toSearchRequest(cpy, dst);
+ }
+}
+
+void
+Test::convertFromReply()
+{
+ SearchReply src;
+ src.offset = 1u;
+ src.totalHitCount = 2u;
+ src.maxRank = 3;
+ src.setDistributionKey(4u);
+ src.sortIndex.push_back(0);
+ src.sortIndex.push_back(1);
+ src.sortIndex.push_back(2);
+ src.sortData.push_back(11);
+ src.sortData.push_back(22);
+ src.groupResult.push_back(2);
+ src.useCoverage = true;
+ src.coverage = SearchReply::Coverage(5, 3);
+ src.useWideHits = true;
+ src.hits.resize(2);
+ document::GlobalId gid0("aaaaaaaaaaaa");
+ document::GlobalId gid1("bbbbbbbbbbbb");
+ src.hits[0].gid = gid0;
+ src.hits[0].metric = 5;
+ src.hits[0].path = 11;
+ src.hits[0].setDistributionKey(100);
+ src.hits[1].gid = gid1;
+ src.hits[1].metric = 4;
+ src.hits[1].path = 10;
+ src.hits[1].setDistributionKey(105);
+
+ { // full copy
+ SearchReply cpy = src;
+
+ FS4Packet_QUERYRESULTX dst0;
+ PacketConverter::fromSearchReply(cpy, dst0);
+ FS4Packet_QUERYRESULTX dst;
+ copyPacket(dst0, dst);
+ EXPECT_EQUAL(dst._offset, 1u);
+ EXPECT_EQUAL(dst._numDocs, 2u);
+ EXPECT_EQUAL(dst._totNumDocs, 2u);
+ EXPECT_EQUAL(dst._maxRank, 3);
+ EXPECT_EQUAL(4u, dst.getDistributionKey());
+ EXPECT_TRUE(checkFeature(dst._features, QRF_SORTDATA));
+ EXPECT_EQUAL(dst._sortIndex[0], 0u);
+ EXPECT_EQUAL(dst._sortIndex[1], 1u);
+ EXPECT_EQUAL(dst._sortIndex[2], 2u);
+ EXPECT_EQUAL(dst._sortData[0], 11);
+ EXPECT_EQUAL(dst._sortData[1], 22);
+ EXPECT_TRUE(checkFeature(dst._features, QRF_GROUPDATA));
+ EXPECT_EQUAL(dst._groupDataLen, 1u);
+ EXPECT_EQUAL(dst._groupData[0], 2);
+ EXPECT_TRUE(checkFeature(dst._features, QRF_COVERAGE));
+ EXPECT_EQUAL(dst._coverageDocs, 3u);
+ EXPECT_EQUAL(dst._activeDocs, 5u);
+ EXPECT_TRUE(checkFeature(dst._features, QRF_MLD));
+ EXPECT_TRUE(dst._hits[0]._gid == gid0);
+ EXPECT_EQUAL(dst._hits[0]._metric, 5);
+ EXPECT_EQUAL(dst._hits[0]._partid, 11u);
+ EXPECT_EQUAL(dst._hits[0].getDistributionKey(), 100u);
+ EXPECT_TRUE(dst._hits[1]._gid == gid1);
+ EXPECT_EQUAL(dst._hits[1]._metric, 4);
+ EXPECT_EQUAL(dst._hits[1]._partid, 10u);
+ EXPECT_EQUAL(dst._hits[1].getDistributionKey(), 105u);
+ }
+ { // not sortdata
+ SearchReply cpy = src;
+ cpy.sortIndex.clear();
+ cpy.sortData.clear();
+
+ FS4Packet_QUERYRESULTX dst0;
+ PacketConverter::fromSearchReply(cpy, dst0);
+ FS4Packet_QUERYRESULTX dst;
+ copyPacket(dst0, dst);
+ EXPECT_TRUE(checkNotFeature(dst._features, QRF_SORTDATA));
+ }
+ { // not groupdata
+ SearchReply cpy = src;
+ cpy.groupResult.clear();
+
+ FS4Packet_QUERYRESULTX dst0;
+ PacketConverter::fromSearchReply(cpy, dst0);
+ FS4Packet_QUERYRESULTX dst;
+ copyPacket(dst0, dst);
+ EXPECT_TRUE(checkNotFeature(dst._features, QRF_GROUPDATA));
+ }
+ { // non-full coverage
+ SearchReply cpy = src;
+
+ FS4Packet_QUERYRESULTX dst0;
+ PacketConverter::fromSearchReply(cpy, dst0);
+ FS4Packet_QUERYRESULTX dst;
+ copyPacket(dst0, dst);
+ EXPECT_TRUE(checkFeature(dst._features, QRF_COVERAGE));
+ EXPECT_EQUAL(dst._coverageDocs, 3u);
+ EXPECT_EQUAL(dst._activeDocs, 5u);
+ }
+ { // not coverage
+ SearchReply cpy = src;
+ cpy.useCoverage = false;
+
+ FS4Packet_QUERYRESULTX dst0;
+ PacketConverter::fromSearchReply(cpy, dst0);
+ FS4Packet_QUERYRESULTX dst;
+ copyPacket(dst0, dst);
+ EXPECT_TRUE(checkNotFeature(dst._features, QRF_COVERAGE));
+ }
+ { // non-mld
+ SearchReply cpy = src;
+ cpy.useWideHits = false;
+
+ FS4Packet_QUERYRESULTX dst0;
+ PacketConverter::fromSearchReply(cpy, dst0);
+ FS4Packet_QUERYRESULTX dst;
+ copyPacket(dst0, dst);
+ EXPECT_TRUE(checkNotFeature(dst._features, QRF_MLD));
+ EXPECT_TRUE(dst._hits[0]._gid == gid0);
+ EXPECT_EQUAL(dst._hits[0]._metric, 5);
+ EXPECT_TRUE(dst._hits[1]._gid == gid1);
+ EXPECT_EQUAL(dst._hits[1]._metric, 4);
+ }
+ { // non-mld not siteid
+ SearchReply cpy = src;
+ cpy.useWideHits = false;
+
+ FS4Packet_QUERYRESULTX dst0;
+ PacketConverter::fromSearchReply(cpy, dst0);
+ FS4Packet_QUERYRESULTX dst;
+ copyPacket(dst0, dst);
+ EXPECT_TRUE(checkNotFeature(dst._features, QRF_MLD));
+ EXPECT_TRUE(dst._hits[0]._gid == gid0);
+ EXPECT_EQUAL(dst._hits[0]._metric, 5);
+ EXPECT_TRUE(dst._hits[1]._gid == gid1);
+ EXPECT_EQUAL(dst._hits[1]._metric, 4);
+ }
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("searchapi_test");
+ propertyNames();
+ convertToRequest();
+ convertFromReply();
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/engine/transportserver/.gitignore b/searchlib/src/tests/engine/transportserver/.gitignore
new file mode 100644
index 00000000000..09d836e0004
--- /dev/null
+++ b/searchlib/src/tests/engine/transportserver/.gitignore
@@ -0,0 +1,5 @@
+.depend
+Makefile
+transportserver_test
+vlog.txt
+searchlib_transportserver_test_app
diff --git a/searchlib/src/tests/engine/transportserver/CMakeLists.txt b/searchlib/src/tests/engine/transportserver/CMakeLists.txt
new file mode 100644
index 00000000000..502279bc728
--- /dev/null
+++ b/searchlib/src/tests/engine/transportserver/CMakeLists.txt
@@ -0,0 +1,12 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_transportserver_test_app
+ SOURCES
+ transportserver_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(
+ NAME searchlib_transportserver_test_app
+ COMMAND searchlib_transportserver_test_app
+ ENVIRONMENT "VESPA_LOG_TARGET=file:vlog.txt;VESPA_LOG_LEVEL=\"all -spam\""
+)
diff --git a/searchlib/src/tests/engine/transportserver/DESC b/searchlib/src/tests/engine/transportserver/DESC
new file mode 100644
index 00000000000..2fb736a9319
--- /dev/null
+++ b/searchlib/src/tests/engine/transportserver/DESC
@@ -0,0 +1 @@
+transportserver test. Take a look at transportserver.cpp for details.
diff --git a/searchlib/src/tests/engine/transportserver/FILES b/searchlib/src/tests/engine/transportserver/FILES
new file mode 100644
index 00000000000..ec1b60cf739
--- /dev/null
+++ b/searchlib/src/tests/engine/transportserver/FILES
@@ -0,0 +1 @@
+transportserver.cpp
diff --git a/searchlib/src/tests/engine/transportserver/transportserver_test.cpp b/searchlib/src/tests/engine/transportserver/transportserver_test.cpp
new file mode 100644
index 00000000000..af4dc4761bc
--- /dev/null
+++ b/searchlib/src/tests/engine/transportserver/transportserver_test.cpp
@@ -0,0 +1,187 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("transportserver_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/document/base/documentid.h>
+#include <vespa/searchlib/common/packets.h>
+#include <vespa/searchlib/engine/transportserver.h>
+#include <vespa/searchlib/engine/searchapi.h>
+#include <vespa/searchlib/engine/docsumapi.h>
+#include <vespa/searchlib/engine/monitorapi.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/fnet/fnet.h>
+#include <vespa/searchlib/engine/errorcodes.h>
+
+using namespace document;
+using namespace vespalib;
+using namespace search::engine;
+using namespace search::fs4transport;
+
+class SyncServer : public search::engine::SearchServer,
+ public search::engine::DocsumServer,
+ public search::engine::MonitorServer
+{
+private:
+ virtual SearchReply::UP search(SearchRequest::Source request, SearchClient &client);
+ virtual DocsumReply::UP getDocsums(DocsumRequest::Source request, DocsumClient &client);
+ virtual MonitorReply::UP ping(MonitorRequest::UP request, MonitorClient &client);
+
+ SyncServer(const SyncServer &);
+ SyncServer &operator=(const SyncServer &);
+public:
+ SyncServer() {}
+ virtual ~SyncServer() {}
+};
+
+SearchReply::UP
+SyncServer::search(SearchRequest::Source request, SearchClient &)
+{
+ const SearchRequest &req = *request.get();
+ SearchReply::UP reply(new SearchReply());
+ SearchReply &ret = *reply;
+ ret.request = request.release();
+ LOG(info, "responding to search request...");
+ ret.offset = req.offset;
+ return reply;
+}
+
+DocsumReply::UP
+SyncServer::getDocsums(DocsumRequest::Source request, DocsumClient &)
+{
+ DocsumReply::UP reply(new DocsumReply());
+ DocsumReply &ret = *reply;
+ ret.request = request.release();
+ LOG(info, "responding to docsum request...");
+ ret.docsums.resize(1);
+ ret.docsums[0].setData("data", strlen("data"));
+ ret.docsums[0].gid = DocumentId(vespalib::make_string("doc::100")).getGlobalId();
+ return reply;
+}
+
+MonitorReply::UP
+SyncServer::ping(MonitorRequest::UP request, MonitorClient &)
+{
+ MonitorRequest &req = *request;
+ MonitorReply::UP reply(new MonitorReply());
+ MonitorReply &ret = *reply;
+ LOG(info, "responding to monitor request...");
+ ret.timestamp = req.flags;
+ return reply;
+}
+
+TEST("transportserver") {
+ {
+ SyncServer server;
+ TransportServer transport(server, server, server, 0,
+ TransportServer::DEBUG_ALL);
+ ASSERT_TRUE(transport.start());
+ int port = transport.getListenPort();
+ ASSERT_TRUE(port > 0);
+ {
+ FNET_Context ctx;
+ FastOS_ThreadPool pool(128 * 1024);
+ FNET_Transport client;
+ ASSERT_TRUE(client.Start(&pool));
+
+ FNET_PacketQueue adminQ;
+ FNET_Connection *conn = client.Connect(make_string("tcp/localhost:%d", port).c_str(),
+ &FS4PersistentPacketStreamer::Instance, &adminQ);
+ ASSERT_TRUE(conn != 0);
+ {
+ FS4Packet_MONITORQUERYX *mq = new FS4Packet_MONITORQUERYX();
+ mq->_qflags = 30;
+ mq->_features |= MQF_QFLAGS;
+ conn->PostPacket(mq, FNET_NOID);
+ FNET_Packet *p = adminQ.DequeuePacket(60000, &ctx);
+ ASSERT_TRUE(p != 0);
+ ASSERT_TRUE(p->GetPCODE() == PCODE_MONITORRESULTX);
+ FS4Packet_MONITORRESULTX *r = (FS4Packet_MONITORRESULTX*)p;
+ EXPECT_EQUAL(r->_timestamp, 30u);
+ p->Free();
+ }
+ {
+ FNET_PacketQueue q;
+ FNET_Channel *ch = conn->OpenChannel(&q, FNET_Context());
+ FS4Packet_QUERYX *qx = new FS4Packet_QUERYX();
+ qx->_features |= QF_PARSEDQUERY;
+ qx->_offset = 100;
+ ch->Send(qx);
+ FNET_Packet *p = q.DequeuePacket(60000, &ctx);
+ ASSERT_TRUE(p != 0);
+ ASSERT_TRUE(p->GetPCODE() == PCODE_QUERYRESULTX);
+ FS4Packet_QUERYRESULTX *r = (FS4Packet_QUERYRESULTX*)p;
+ EXPECT_EQUAL(r->_offset, 100u);
+ p->Free();
+ ch->CloseAndFree();
+ }
+ {
+ FS4Packet_MONITORQUERYX *mq = new FS4Packet_MONITORQUERYX();
+ mq->_qflags = 40;
+ mq->_features |= MQF_QFLAGS;
+ conn->PostPacket(mq, FNET_NOID);
+ FNET_Packet *p = adminQ.DequeuePacket(60000, &ctx);
+ ASSERT_TRUE(p != 0);
+ ASSERT_TRUE(p->GetPCODE() == PCODE_MONITORRESULTX);
+ FS4Packet_MONITORRESULTX *r = (FS4Packet_MONITORRESULTX*)p;
+ EXPECT_EQUAL(r->_timestamp, 40u);
+ p->Free();
+ }
+ {
+ FNET_PacketQueue q;
+ FNET_Channel *ch = conn->OpenChannel(&q, FNET_Context());
+ FS4Packet_GETDOCSUMSX *qdx = new FS4Packet_GETDOCSUMSX();
+ ch->Send(qdx);
+ FNET_Packet *p = q.DequeuePacket(60000, &ctx);
+ ASSERT_TRUE(p != 0);
+ ASSERT_TRUE(p->GetPCODE() == PCODE_DOCSUM);
+ FS4Packet_DOCSUM *r = (FS4Packet_DOCSUM*)p;
+ EXPECT_EQUAL(r->getGid(), DocumentId("doc::100").getGlobalId());
+ p->Free();
+ p = q.DequeuePacket(60000, &ctx);
+ ASSERT_TRUE(p != 0);
+ ASSERT_TRUE(p->GetPCODE() == PCODE_EOL);
+ p->Free();
+ ch->CloseAndFree();
+ }
+ {
+ FS4Packet_MONITORQUERYX *mq = new FS4Packet_MONITORQUERYX();
+ mq->_qflags = 50;
+ mq->_features |= MQF_QFLAGS;
+ conn->PostPacket(mq, FNET_NOID);
+ FNET_Packet *p = adminQ.DequeuePacket(60000, &ctx);
+ ASSERT_TRUE(p != 0);
+ ASSERT_TRUE(p->GetPCODE() == PCODE_MONITORRESULTX);
+ FS4Packet_MONITORRESULTX *r = (FS4Packet_MONITORRESULTX*)p;
+ EXPECT_EQUAL(r->_timestamp, 50u);
+ p->Free();
+ }
+ // shut down client
+ conn->CloseAdminChannel();
+ client.Close(conn);
+ conn->SubRef();
+ client.sync();
+ client.ShutDown(true);
+ pool.Close();
+ }
+
+ }
+}
+
+void printError(ErrorCode ecode) {
+ fprintf(stderr, "error code %u: '%s'\n", ecode, getStringFromErrorCode(ecode));
+}
+
+TEST("print errors") {
+ printError(ECODE_NO_ERROR);
+ printError(ECODE_GENERAL_ERROR);
+ printError(ECODE_QUERY_PARSE_ERROR);
+ printError(ECODE_ALL_PARTITIONS_DOWN);
+ printError(ECODE_ILLEGAL_DATASET);
+ printError(ECODE_OVERLOADED);
+ printError(ECODE_NOT_IMPLEMENTED);
+ printError(ECODE_QUERY_NOT_ALLOWED);
+ printError(ECODE_TIMEOUT);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/features/.gitignore b/searchlib/src/tests/features/.gitignore
new file mode 100644
index 00000000000..1c71377a25e
--- /dev/null
+++ b/searchlib/src/tests/features/.gitignore
@@ -0,0 +1,11 @@
+.depend
+Makefile
+beta_features_test
+featurebenchmark
+nativerank_test
+prod_features_test
+vlog1.txt
+vlog2.txt
+vlog3.txt
+searchlib_prod_features_test_app
+searchlib_featurebenchmark_app
diff --git a/searchlib/src/tests/features/CMakeLists.txt b/searchlib/src/tests/features/CMakeLists.txt
new file mode 100644
index 00000000000..f1703b02c8b
--- /dev/null
+++ b/searchlib/src/tests/features/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_prod_features_test_app
+ SOURCES
+ prod_features.cpp
+ prod_features_framework.cpp
+ prod_features_attributematch.cpp
+ prod_features_fieldmatch.cpp
+ prod_features_fieldtermmatch.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_prod_features_test_app COMMAND sh prod_features_test.sh)
+vespa_add_executable(searchlib_featurebenchmark_app
+ SOURCES
+ featurebenchmark.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_featurebenchmark_app COMMAND searchlib_featurebenchmark_app BENCHMARK)
diff --git a/searchlib/src/tests/features/DESC b/searchlib/src/tests/features/DESC
new file mode 100644
index 00000000000..333541aa0a0
--- /dev/null
+++ b/searchlib/src/tests/features/DESC
@@ -0,0 +1 @@
+features test. Take a look at features.cpp for details.
diff --git a/searchlib/src/tests/features/FILES b/searchlib/src/tests/features/FILES
new file mode 100644
index 00000000000..6e53d562fc0
--- /dev/null
+++ b/searchlib/src/tests/features/FILES
@@ -0,0 +1,3 @@
+beta_features.cpp
+prod_features.cpp
+nativerank.cpp
diff --git a/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-double.txt b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-double.txt
new file mode 100644
index 00000000000..a4319bdae53
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-double.txt
@@ -0,0 +1,7 @@
+case=dotProduct
+numruns=10000000
+numdocs=1000
+numvalues=1000
+collectiontype=array
+datatype=double
+dotProduct.vector=[0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]
diff --git a/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-float.txt b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-float.txt
new file mode 100644
index 00000000000..0371c72f13a
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-float.txt
@@ -0,0 +1,7 @@
+case=dotProduct
+numruns=10000000
+numdocs=1000
+numvalues=1000
+collectiontype=array
+datatype=float
+dotProduct.vector=[0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]
diff --git a/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-int.txt b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-int.txt
new file mode 100644
index 00000000000..0e27edf2e09
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-int.txt
@@ -0,0 +1,7 @@
+case=dotProduct
+numruns=10000000
+numdocs=1000
+numvalues=1000
+collectiontype=array
+datatype=int
+dotProduct.vector=[0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]
diff --git a/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-long.txt b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-long.txt
new file mode 100644
index 00000000000..ca1aa57e738
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-long.txt
@@ -0,0 +1,7 @@
+case=dotProduct
+numruns=10000000
+numdocs=1000
+numvalues=1000
+collectiontype=array
+datatype=long
+dotProduct.vector=[0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]
diff --git a/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-wset.txt b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-wset.txt
new file mode 100644
index 00000000000..38c323c667d
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-wset.txt
@@ -0,0 +1,7 @@
+case=dotProduct
+numruns=1000000
+numdocs=1000
+numvalues=1000
+collectiontype=wset
+datatype=int
+dotProduct.vector={0:2,1:2,2:2,3:2,4:2,5:2,6:2,7:2,8:2,9:2,10:2,11:2,12:2,13:2,14:2,15:2,16:2,17:2,18:2,19:2,20:2,21:2,22:2,23:2,24:2,25:2,26:2,27:2,28:2,29:2,30:2,31:2,32:2,33:2,34:2,35:2,36:2,37:2,38:2,39:2,40:2,41:2,42:2,43:2,44:2,45:2,46:2,47:2,48:2,49:2,50:2,51:2,52:2,53:2,54:2,55:2,56:2,57:2,58:2,59:2,60:2,61:2,62:2,63:2,64:2,65:2,66:2,67:2,68:2,69:2,70:2,71:2,72:2,73:2,74:2,75:2,76:2,77:2,78:2,79:2,80:2,81:2,82:2,83:2,84:2,85:2,86:2,87:2,88:2,89:2,90:2,91:2,92:2,93:2,94:2,95:2,96:2,97:2,98:2,99:2,100:2,101:2,102:2,103:2,104:2,105:2,106:2,107:2,108:2,109:2,110:2,111:2,112:2,113:2,114:2,115:2,116:2,117:2,118:2,119:2,120:2,121:2,122:2,123:2,124:2,125:2,126:2,127:2,128:2,129:2,130:2,131:2,132:2,133:2,134:2,135:2,136:2,137:2,138:2,139:2,140:2,141:2,142:2,143:2,144:2,145:2,146:2,147:2,148:2,149:2,150:2,151:2,152:2,153:2,154:2,155:2,156:2,157:2,158:2,159:2,160:2,161:2,162:2,163:2,164:2,165:2,166:2,167:2,168:2,169:2,170:2,171:2,172:2,173:2,174:2,175:2,176:2,177:2,178:2,179:2,180:2,181:2,182:2,183:2,184:2,185:2,186:2,187:2,188:2,189:2,190:2,191:2,192:2,193:2,194:2,195:2,196:2,197:2,198:2,199:2,200:2,201:2,202:2,203:2,204:2,205:2,206:2,207:2,208:2,209:2,210:2,211:2,212:2,213:2,214:2,215:2,216:2,217:2,218:2,219:2,220:2,221:2,222:2,223:2,224:2,225:2,226:2,227:2,228:2,229:2,230:2,231:2,232:2,233:2,234:2,235:2,236:2,237:2,238:2,239:2,240:2,241:2,242:2,243:2,244:2,245:2,246:2,247:2,248:2,249:2,250:2,251:2,252:2,253:2,254:2,255:2,256:2,257:2,258:2,259:2,260:2,261:2,262:2,263:2,264:2,265:2,266:2,267:2,268:2,269:2,270:2,271:2,272:2,273:2,274:2,275:2,276:2,277:2,278:2,279:2,280:2,281:2,282:2,283:2,284:2,285:2,286:2,287:2,288:2,289:2,290:2,291:2,292:2,293:2,294:2,295:2,296:2,297:2,298:2,299:2,300:2,301:2,302:2,303:2,304:2,305:2,306:2,307:2,308:2,309:2,310:2,311:2,312:2,313:2,314:2,315:2,316:2,317:2,318:2,319:2,320:2,321:2,322:2,323:2,324:2,325:2,326:2,327:2,328:2,329:2,330:2,331:2,332:2,333:2,334:2,335:2,336:2,337:2,338:2,339:2,340:2,341:2,342:2,343:2,344:2,345:2,346:2,347:2,348:2,349:2,350:2,351:2,352:2,353:2,354:2,355:2,356:2,357:2,358:2,359:2,360:2,361:2,362:2,363:2,364:2,365:2,366:2,367:2,368:2,369:2,370:2,371:2,372:2,373:2,374:2,375:2,376:2,377:2,378:2,379:2,380:2,381:2,382:2,383:2,384:2,385:2,386:2,387:2,388:2,389:2,390:2,391:2,392:2,393:2,394:2,395:2,396:2,397:2,398:2,399:2,400:2,401:2,402:2,403:2,404:2,405:2,406:2,407:2,408:2,409:2,410:2,411:2,412:2,413:2,414:2,415:2,416:2,417:2,418:2,419:2,420:2,421:2,422:2,423:2,424:2,425:2,426:2,427:2,428:2,429:2,430:2,431:2,432:2,433:2,434:2,435:2,436:2,437:2,438:2,439:2,440:2,441:2,442:2,443:2,444:2,445:2,446:2,447:2,448:2,449:2,450:2,451:2,452:2,453:2,454:2,455:2,456:2,457:2,458:2,459:2,460:2,461:2,462:2,463:2,464:2,465:2,466:2,467:2,468:2,469:2,470:2,471:2,472:2,473:2,474:2,475:2,476:2,477:2,478:2,479:2,480:2,481:2,482:2,483:2,484:2,485:2,486:2,487:2,488:2,489:2,490:2,491:2,492:2,493:2,494:2,495:2,496:2,497:2,498:2,499:2,500:2,501:2,502:2,503:2,504:2,505:2,506:2,507:2,508:2,509:2,510:2,511:2,512:2,513:2,514:2,515:2,516:2,517:2,518:2,519:2,520:2,521:2,522:2,523:2,524:2,525:2,526:2,527:2,528:2,529:2,530:2,531:2,532:2,533:2,534:2,535:2,536:2,537:2,538:2,539:2,540:2,541:2,542:2,543:2,544:2,545:2,546:2,547:2,548:2,549:2,550:2,551:2,552:2,553:2,554:2,555:2,556:2,557:2,558:2,559:2,560:2,561:2,562:2,563:2,564:2,565:2,566:2,567:2,568:2,569:2,570:2,571:2,572:2,573:2,574:2,575:2,576:2,577:2,578:2,579:2,580:2,581:2,582:2,583:2,584:2,585:2,586:2,587:2,588:2,589:2,590:2,591:2,592:2,593:2,594:2,595:2,596:2,597:2,598:2,599:2,600:2,601:2,602:2,603:2,604:2,605:2,606:2,607:2,608:2,609:2,610:2,611:2,612:2,613:2,614:2,615:2,616:2,617:2,618:2,619:2,620:2,621:2,622:2,623:2,624:2,625:2,626:2,627:2,628:2,629:2,630:2,631:2,632:2,633:2,634:2,635:2,636:2,637:2,638:2,639:2,640:2,641:2,642:2,643:2,644:2,645:2,646:2,647:2,648:2,649:2,650:2,651:2,652:2,653:2,654:2,655:2,656:2,657:2,658:2,659:2,660:2,661:2,662:2,663:2,664:2,665:2,666:2,667:2,668:2,669:2,670:2,671:2,672:2,673:2,674:2,675:2,676:2,677:2,678:2,679:2,680:2,681:2,682:2,683:2,684:2,685:2,686:2,687:2,688:2,689:2,690:2,691:2,692:2,693:2,694:2,695:2,696:2,697:2,698:2,699:2,700:2,701:2,702:2,703:2,704:2,705:2,706:2,707:2,708:2,709:2,710:2,711:2,712:2,713:2,714:2,715:2,716:2,717:2,718:2,719:2,720:2,721:2,722:2,723:2,724:2,725:2,726:2,727:2,728:2,729:2,730:2,731:2,732:2,733:2,734:2,735:2,736:2,737:2,738:2,739:2,740:2,741:2,742:2,743:2,744:2,745:2,746:2,747:2,748:2,749:2,750:2,751:2,752:2,753:2,754:2,755:2,756:2,757:2,758:2,759:2,760:2,761:2,762:2,763:2,764:2,765:2,766:2,767:2,768:2,769:2,770:2,771:2,772:2,773:2,774:2,775:2,776:2,777:2,778:2,779:2,780:2,781:2,782:2,783:2,784:2,785:2,786:2,787:2,788:2,789:2,790:2,791:2,792:2,793:2,794:2,795:2,796:2,797:2,798:2,799:2,800:2,801:2,802:2,803:2,804:2,805:2,806:2,807:2,808:2,809:2,810:2,811:2,812:2,813:2,814:2,815:2,816:2,817:2,818:2,819:2,820:2,821:2,822:2,823:2,824:2,825:2,826:2,827:2,828:2,829:2,830:2,831:2,832:2,833:2,834:2,835:2,836:2,837:2,838:2,839:2,840:2,841:2,842:2,843:2,844:2,845:2,846:2,847:2,848:2,849:2,850:2,851:2,852:2,853:2,854:2,855:2,856:2,857:2,858:2,859:2,860:2,861:2,862:2,863:2,864:2,865:2,866:2,867:2,868:2,869:2,870:2,871:2,872:2,873:2,874:2,875:2,876:2,877:2,878:2,879:2,880:2,881:2,882:2,883:2,884:2,885:2,886:2,887:2,888:2,889:2,890:2,891:2,892:2,893:2,894:2,895:2,896:2,897:2,898:2,899:2,900:2,901:2,902:2,903:2,904:2,905:2,906:2,907:2,908:2,909:2,910:2,911:2,912:2,913:2,914:2,915:2,916:2,917:2,918:2,919:2,920:2,921:2,922:2,923:2,924:2,925:2,926:2,927:2,928:2,929:2,930:2,931:2,932:2,933:2,934:2,935:2,936:2,937:2,938:2,939:2,940:2,941:2,942:2,943:2,944:2,945:2,946:2,947:2,948:2,949:2,950:2,951:2,952:2,953:2,954:2,955:2,956:2,957:2,958:2,959:2,960:2,961:2,962:2,963:2,964:2,965:2,966:2,967:2,968:2,969:2,970:2,971:2,972:2,973:2,974:2,975:2,976:2,977:2,978:2,979:2,980:2,981:2,982:2,983:2,984:2,985:2,986:2,987:2,988:2,989:2,990:2,991:2,992:2,993:2,994:2,995:2,996:2,997:2,998:2,999:2}
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1.txt
new file mode 100644
index 00000000000..3b3e0915e9e
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=1
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10.txt
new file mode 100644
index 00000000000..322784fc409
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=10
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-100.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-100.txt
new file mode 100644
index 00000000000..9a31201941c
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-100.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=100
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1000.txt
new file mode 100644
index 00000000000..0a7b99c79fb
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1000.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=1000
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10000.txt
new file mode 100644
index 00000000000..1f859dc4ac6
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10000.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=10000
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-5.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-5.txt
new file mode 100644
index 00000000000..1d9b6de23a4
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-5.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=5
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-50.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-50.txt
new file mode 100644
index 00000000000..c50f602a111
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-50.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=50
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-500.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-500.txt
new file mode 100644
index 00000000000..163a9bfd96d
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-500.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=500
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100.txt
new file mode 100644
index 00000000000..b6a1094140b
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100.txt
@@ -0,0 +1,6 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1-callgrind.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1-callgrind.txt
new file mode 100644
index 00000000000..d3fc48be0be
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1-callgrind.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=1000
+fieldMatch(bar).maxAlternativeSegmentations=1
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1.txt
new file mode 100644
index 00000000000..b6d4d2b4bb3
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=1
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10.txt
new file mode 100644
index 00000000000..67d1db34e17
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=10
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100-callgrind.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100-callgrind.txt
new file mode 100644
index 00000000000..838ee6871f0
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100-callgrind.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=1000
+fieldMatch(bar).maxAlternativeSegmentations=100
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100.txt
new file mode 100644
index 00000000000..3e02b0ee27f
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=100
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1000.txt
new file mode 100644
index 00000000000..407579b6bee
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1000.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=1000
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10000.txt
new file mode 100644
index 00000000000..57aa1759b23
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10000.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=10000
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-5.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-5.txt
new file mode 100644
index 00000000000..d91604f0bb5
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-5.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=5
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-50.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-50.txt
new file mode 100644
index 00000000000..7d388e25cfa
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-50.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=50
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-500.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-500.txt
new file mode 100644
index 00000000000..7cfc899b1f3
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-500.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=500
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000.txt
new file mode 100644
index 00000000000..f06091fbcaa
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000.txt
@@ -0,0 +1,6 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=10000
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1.txt
new file mode 100644
index 00000000000..b62b8b21e7c
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=1000
+fieldMatch(bar).maxAlternativeSegmentations=1
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10.txt
new file mode 100644
index 00000000000..19f133833aa
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=1000
+fieldMatch(bar).maxAlternativeSegmentations=10
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-100.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-100.txt
new file mode 100644
index 00000000000..7dbfc2731a1
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-100.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=1000
+fieldMatch(bar).maxAlternativeSegmentations=100
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1000.txt
new file mode 100644
index 00000000000..e436ffb270c
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1000.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=1000
+fieldMatch(bar).maxAlternativeSegmentations=1000
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10000.txt
new file mode 100644
index 00000000000..ec2727a7035
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10000.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=1000
+fieldMatch(bar).maxAlternativeSegmentations=10000
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-5.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-5.txt
new file mode 100644
index 00000000000..cadd682a817
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-5.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=1000
+fieldMatch(bar).maxAlternativeSegmentations=5
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-50.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-50.txt
new file mode 100644
index 00000000000..66c3203ad25
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-50.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=1000
+fieldMatch(bar).maxAlternativeSegmentations=50
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-500.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-500.txt
new file mode 100644
index 00000000000..c82fba41604
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-500.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=1000
+fieldMatch(bar).maxAlternativeSegmentations=500
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000.txt
new file mode 100644
index 00000000000..bd2404eba81
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000.txt
@@ -0,0 +1,6 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c
+numruns=1000
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1.txt
new file mode 100644
index 00000000000..6266271fe4f
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=1
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10.txt
new file mode 100644
index 00000000000..9f7593f8c76
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=10
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-100.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-100.txt
new file mode 100644
index 00000000000..20a26196c44
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-100.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=100
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1000.txt
new file mode 100644
index 00000000000..126a7f4355d
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1000.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=1000
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10000.txt
new file mode 100644
index 00000000000..456762710e1
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10000.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=10000
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-5.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-5.txt
new file mode 100644
index 00000000000..2839245ccdd
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-5.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=5
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-50.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-50.txt
new file mode 100644
index 00000000000..a94fb7cecd8
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-50.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=50
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-500.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-500.txt
new file mode 100644
index 00000000000..a53dd4fd6a7
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-500.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c
+numruns=10000
+fieldMatch(bar).maxAlternativeSegmentations=500
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20.txt
new file mode 100644
index 00000000000..82d455795d4
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20.txt
@@ -0,0 +1,6 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a x x b x x x a x b x x x x x a b x x c
+numruns=10000
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/phrase-02.txt b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-02.txt
new file mode 100644
index 00000000000..b55e2d60429
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-02.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x
+numruns=100000
+fieldMatch(bar).maxAlternativeSegmentations=1000
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/phrase-10.txt b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-10.txt
new file mode 100644
index 00000000000..8f934a3e2a1
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-10.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x
+numruns=100000
+fieldMatch(bar).maxAlternativeSegmentations=1000
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/phrase-50.txt b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-50.txt
new file mode 100644
index 00000000000..e1b687802f9
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-50.txt
@@ -0,0 +1,7 @@
+case=fieldMatch
+feature=fieldMatch(bar)
+index=bar
+query=a b c
+field=a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x
+numruns=100000
+fieldMatch(bar).maxAlternativeSegmentations=1000
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/plot.rb b/searchlib/src/tests/features/benchmark/fieldmatch/plot.rb
new file mode 100644
index 00000000000..ffbbc25e354
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/plot.rb
@@ -0,0 +1,30 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+require '../plotlib'
+
+if ARGV.size == 0
+ puts "must specify folder"
+ exit
+end
+
+folder = ARGV[0]
+extra = ""
+extra = ARGV[1] if ARGV.size == 2
+field = [20, 100, 1000, 10000]
+segmentation = [1, 5, 10, 50, 100, 500, 1000, 10000]
+
+dat = folder + "/plot.dat"
+png = folder + "/plot.png"
+
+file = File.open(dat, "w")
+segmentation.each do |s|
+ file.write("#{s} ")
+ field.each do |f|
+ file.write(extract_data(folder + "/c-#{f}-#{s}.out") + " ")
+ end
+ file.write("\n")
+end
+file.close
+
+titles = ["fl-20", "fl-100", "fl-1000", "fl-10000"]
+
+plot_graph(dat, titles, png, "fieldMatch feature (#{extra})", "maxAlternativeSegmentations", "execution time per document (ms)", folder)
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/readme.txt b/searchlib/src/tests/features/benchmark/fieldmatch/readme.txt
new file mode 100644
index 00000000000..a96922e58fb
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/readme.txt
@@ -0,0 +1,22 @@
+** Running the benchmark **
+ruby run.rb folder
+folder is the place to store the output files.
+
+
+** Generating gnu plots **
+ruby plot.rb folder "description"
+folder contains the output files and description are used when setting the title of the graph.
+
+
+** Config file format **
+c-x-y.txt
+x is the length of the field and y is the value for maxAlternativeSegmentations.
+
+
+** Running callgrind **
+valgrind --tool=callgrind ../../featurebenchmark -c c-1000-1-callgrind.txt
+valgrind --tool=callgrind ../../featurebenchmark -c c-1000-100-callgrind.txt
+The numruns config value is reduced in these two config files.
+
+The output after running callgrind is two files: callgrind.out.x and callgrind.out.y.
+Use kcachegrind to look at these two files.
diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/run.rb b/searchlib/src/tests/features/benchmark/fieldmatch/run.rb
new file mode 100644
index 00000000000..d0350c454e8
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/fieldmatch/run.rb
@@ -0,0 +1,17 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+if ARGV.size == 0
+ puts "must specify folder"
+ exit
+end
+
+folder = ARGV[0]
+cases = [20, 100, 1000, 10000]
+segmentations = [1, 5, 10, 50, 100, 500, 1000, 10000]
+cases.each do |c|
+ segmentations.each do |s|
+ file = "c-#{c}-#{s}"
+ cmd = "script -c \"../../featurebenchmark -c #{file}.txt\" " + folder + "/#{file}.out"
+ puts cmd
+ `#{cmd}`
+ end
+end
diff --git a/searchlib/src/tests/features/benchmark/plotlib.rb b/searchlib/src/tests/features/benchmark/plotlib.rb
new file mode 100644
index 00000000000..53a1ee984a9
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/plotlib.rb
@@ -0,0 +1,36 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+def plot_graph(dat, titles, png, title, xlabel, ylabel, folder)
+ plot_cmd = "";
+ plot_cmd += "set terminal png\n"
+ plot_cmd += "set output \"#{png}\"\n"
+ plot_cmd += "set title \"#{title}\"\n"
+ plot_cmd += "set xlabel \"#{xlabel}\"\n"
+ plot_cmd += "set ylabel \"#{ylabel}\"\n"
+ plot_cmd += "set logscale\n"
+
+ plots = []
+ c = 2
+ titles.each do |title|
+ plots.push("\"#{dat}\" using 1:#{c} title \"#{title}\" with linespoints")
+ c += 1
+ end
+ plot_cmd += "plot "
+ plot_cmd += plots.join(", ")
+
+ plot_cmd_file = File.open(folder + "/plot.cmd", "w")
+ plot_cmd_file.write(plot_cmd);
+ plot_cmd_file.close
+ cmd = "gnuplot " + folder + "/plot.cmd"
+ puts cmd
+ puts `#{cmd}`
+end
+
+def extract_data(file_name)
+ content = IO.readlines(file_name).join
+ r = /ETPD:\s*(\d+\.\d+)/
+ if content =~ r
+ return $1
+ end
+ return "0"
+end
+
diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-1.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-1.txt
new file mode 100644
index 00000000000..f46508379af
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-1.txt
@@ -0,0 +1,4 @@
+case=rankingExpression
+feature=rankingExpression
+numruns=1000000
+rankingExpression.rankingScript=1
diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-10.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-10.txt
new file mode 100644
index 00000000000..cd9a34865cb
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-10.txt
@@ -0,0 +1,4 @@
+case=rankingExpression
+feature=rankingExpression
+numruns=1000000
+rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1
diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-100.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-100.txt
new file mode 100644
index 00000000000..1d3007a14c5
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-100.txt
@@ -0,0 +1,4 @@
+case=rankingExpression
+feature=rankingExpression
+numruns=1000000
+rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1
diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-200.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-200.txt
new file mode 100644
index 00000000000..0a9db3c3539
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-200.txt
@@ -0,0 +1,4 @@
+case=rankingExpression
+feature=rankingExpression
+numruns=1000000
+rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1
diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-400.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-400.txt
new file mode 100644
index 00000000000..41600fb943d
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-400.txt
@@ -0,0 +1,4 @@
+case=rankingExpression
+feature=rankingExpression
+numruns=1000000
+rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1
diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-5.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-5.txt
new file mode 100644
index 00000000000..b4704f8a822
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-5.txt
@@ -0,0 +1,4 @@
+case=rankingExpression
+feature=rankingExpression
+numruns=1000000
+rankingExpression.rankingScript=1+1+1+1+1
diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-50.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-50.txt
new file mode 100644
index 00000000000..74790ff0a21
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-50.txt
@@ -0,0 +1,4 @@
+case=rankingExpression
+feature=rankingExpression
+numruns=1000000
+rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1
diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-800.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-800.txt
new file mode 100644
index 00000000000..57c250137fe
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-800.txt
@@ -0,0 +1,4 @@
+case=rankingExpression
+feature=rankingExpression
+numruns=1000000
+rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1
diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/plot.rb b/searchlib/src/tests/features/benchmark/rankingexpression/plot.rb
new file mode 100644
index 00000000000..ca586e1176e
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/rankingexpression/plot.rb
@@ -0,0 +1,22 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+require '../plotlib'
+
+folder = ARGV[0]
+extra = ""
+extra = ARGV[1] if ARGV.size == 2
+trees = [1, 5, 10, 50, 100, 200, 400, 800]
+
+dat = folder + "/plot.dat"
+png = folder + "/plot.png"
+
+file = File.open(dat, "w")
+trees.each do |t|
+ file.write("#{t} ")
+ file.write(extract_data(folder + "/c-#{t}.out") + " ")
+ file.write("\n")
+end
+file.close
+
+titles = ["expression"]
+
+plot_graph(dat, titles, png, "rankingExpression feature (#{extra})", "number of trees", "execution time per document (ms)", folder)
diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/run.rb b/searchlib/src/tests/features/benchmark/rankingexpression/run.rb
new file mode 100644
index 00000000000..2f707e35b51
--- /dev/null
+++ b/searchlib/src/tests/features/benchmark/rankingexpression/run.rb
@@ -0,0 +1,14 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+if ARGV.size == 0
+ puts "must specify folder"
+ exit
+end
+
+folder = ARGV[0]
+trees = [1, 5, 10, 50, 100, 200, 400, 800]
+trees.each do |t|
+ file = "c-#{t}"
+ cmd = "script -c \"../../featurebenchmark -c #{file}.txt\" " + folder + "/#{file}.out"
+ puts cmd
+ `#{cmd}`
+end
diff --git a/searchlib/src/tests/features/beta/.gitignore b/searchlib/src/tests/features/beta/.gitignore
new file mode 100644
index 00000000000..3a7ba416343
--- /dev/null
+++ b/searchlib/src/tests/features/beta/.gitignore
@@ -0,0 +1 @@
+searchlib_beta_features_test_app
diff --git a/searchlib/src/tests/features/beta/CMakeLists.txt b/searchlib/src/tests/features/beta/CMakeLists.txt
new file mode 100644
index 00000000000..ee7020f01fc
--- /dev/null
+++ b/searchlib/src/tests/features/beta/CMakeLists.txt
@@ -0,0 +1,12 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_beta_features_test_app
+ SOURCES
+ beta_features.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(
+ NAME searchlib_beta_features_test_app
+ COMMAND searchlib_beta_features_test_app
+ ENVIRONMENT "VESPA_LOG_TARGET=file:vlog1.txt"
+)
diff --git a/searchlib/src/tests/features/beta/beta_features.cpp b/searchlib/src/tests/features/beta/beta_features.cpp
new file mode 100644
index 00000000000..e5642f475de
--- /dev/null
+++ b/searchlib/src/tests/features/beta/beta_features.cpp
@@ -0,0 +1,726 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("beta_features_test");
+
+#include <boost/tokenizer.hpp>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributemanager.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/searchlib/features/agefeature.h>
+#include <vespa/searchlib/features/attributefeature.h>
+#include <vespa/searchlib/features/attributematchfeature.h>
+#include <vespa/searchlib/features/fieldlengthfeature.h>
+#include <vespa/searchlib/features/fieldmatchfeature.h>
+#include <vespa/searchlib/features/fieldtermmatchfeature.h>
+#include <vespa/searchlib/features/firstphasefeature.h>
+#include <vespa/searchlib/features/flow_completeness_feature.h>
+#include <vespa/searchlib/features/jarowinklerdistancefeature.h>
+#include <vespa/searchlib/features/matchfeature.h>
+#include <vespa/searchlib/features/nowfeature.h>
+#include <vespa/searchlib/features/proximityfeature.h>
+#include <vespa/searchlib/features/queryfeature.h>
+#include <vespa/searchlib/features/querycompletenessfeature.h>
+#include <vespa/searchlib/features/randomfeature.h>
+#include <vespa/searchlib/features/rankingexpressionfeature.h>
+#include <vespa/searchlib/features/reverseproximityfeature.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/features/termeditdistancefeature.h>
+#include <vespa/searchlib/features/termfeature.h>
+#include <vespa/searchlib/features/utils.h>
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/test/plugin/setup.h>
+#include <vespa/searchlib/util/rand48.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/fef/test/ftlib.h>
+
+using namespace search::features;
+using namespace search::fef;
+using namespace search::fef::test;
+
+//---------------------------------------------------------------------------------------------------------------------
+// TermPositionList
+//---------------------------------------------------------------------------------------------------------------------
+typedef std::pair<uint32_t, uint32_t> TermPosition;
+class TermPositionList : public std::vector<TermPosition> {
+public:
+ TermPositionList &add(uint32_t termId, uint32_t pos) {
+ push_back(TermPosition(termId, pos));
+ return *this;
+ }
+ TermPositionList &clear() {
+ std::vector<TermPosition>::clear();
+ return *this;
+ }
+};
+
+//---------------------------------------------------------------------------------------------------------------------
+// Test
+//---------------------------------------------------------------------------------------------------------------------
+class Test : public FtTestApp {
+public:
+ int Main();
+ void testJaroWinklerDistance();
+ void testProximity();
+ void testFlowCompleteness();
+ void testQueryCompleteness();
+ void testReverseProximity();
+ void testTermEditDistance();
+
+private:
+ void assertJaroWinklerDistance(const vespalib::string &query, const vespalib::string &field, feature_t expected);
+ void assertQueryCompleteness(FtFeatureTest & ft, uint32_t firstOcc, uint32_t hits, uint32_t miss);
+ void assertTermEditDistance(const vespalib::string &query, const vespalib::string &field,
+ uint32_t expectedDel, uint32_t expectedIns, uint32_t expectedSub);
+
+private:
+ search::fef::BlueprintFactory _factory;
+};
+
+TEST_APPHOOK(Test);
+
+int
+Test::Main()
+{
+ TEST_INIT("beta_features_test");
+
+ // Configure factory with all known blueprints.
+ setup_fef_test_plugin(_factory);
+ setup_search_features(_factory);
+
+ // Test all features.
+ testJaroWinklerDistance(); TEST_FLUSH();
+ testProximity(); TEST_FLUSH();
+ testFlowCompleteness(); TEST_FLUSH();
+ testQueryCompleteness(); TEST_FLUSH();
+ testReverseProximity(); TEST_FLUSH();
+ testTermEditDistance(); TEST_FLUSH();
+
+ TEST_DONE();
+ return 0;
+}
+
+void
+Test::testJaroWinklerDistance()
+{
+ {
+ // Test blueprint.
+ JaroWinklerDistanceBlueprint pt;
+ {
+ EXPECT_TRUE(assertCreateInstance(pt, "jaroWinklerDistance"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params);
+ FT_SETUP_FAIL(pt, params.add("foo"));
+ FT_SETUP_FAIL(pt, params.add("0"));
+ params.clear();
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "afoo");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo");
+ FT_SETUP_FAIL(pt, ie, params);
+ FT_SETUP_OK (pt, ie, params.add("foo"), in.add("fieldLength(foo)"), out.add("out"));
+ FT_SETUP_FAIL(pt, ie, params.add("afoo"));
+ FT_SETUP_FAIL(pt, ie, params.add("wfoo"));
+ FT_SETUP_FAIL(pt, ie, params.add("1"));
+ }
+ {
+ FT_DUMP_EMPTY(_factory, "jaroWinklerDistance");
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "abar");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wbar");
+ FT_DUMP_EMPTY(_factory, "jaroWinklerDistance", ie); // must be a single value index field
+
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+ StringList dump;
+ FT_DUMP(_factory, "jaroWinklerDistance", ie, dump/*.add("jaroWinklerDistance(bar).out")*/);
+ }
+ }
+ {
+ // These measures are taken from table 6 in the paper "Overview of Record Linkage and Current Research Directions"
+ // by William E. Winkler. It is available at: http://www.census.gov/srd/papers/pdf/rrs2006-02.pdf
+ //
+ // Note that the strings used as query and field here are transformed into query and field terms, and therefore
+ // they all need to be unique. The second occurence of a character in the below names are therefore
+ // capitalized. A comment is given whenever our result is different from what is presented in the paper (only 2
+ // of 17 is actually different).
+ assertJaroWinklerDistance("shackleford", "shackelford", 1 - 0.982f);
+ assertJaroWinklerDistance("dunNigham", "cunnigham", 1 - 0.852f); // 3x'n' in query, removed one
+ assertJaroWinklerDistance("nichlesoN", "nichulsoN", 1 - 0.956f);
+ assertJaroWinklerDistance("jones", "johnsoN", 1 - 0.832f);
+ assertJaroWinklerDistance("masSey", "masSie", 1 - 0.933f);
+ assertJaroWinklerDistance("abroms", "abrAms", 1 - 0.922f);
+ assertJaroWinklerDistance("hardin", "martinez", 1 - 0.722f); // no measure was given
+ assertJaroWinklerDistance("itman", "smith", 1 - 0.622f); // no measure was given
+ assertJaroWinklerDistance("jeraldinE", "geraldinE", 1 - 0.926f);
+ assertJaroWinklerDistance("marhtA", "marthA", 1 - 0.961f);
+ assertJaroWinklerDistance("micheLlE", "michael", 1 - 0.921f);
+ assertJaroWinklerDistance("julies", "juliUs", 1 - 0.933f);
+ assertJaroWinklerDistance("tanyA", "tonyA", 1 - 0.880f);
+ assertJaroWinklerDistance("dwayne", "duane", 1 - 0.765f); // was 0.840 in paper
+ assertJaroWinklerDistance("sean", "suSan", 1 - 0.672f); // was 0.805 in paper
+ assertJaroWinklerDistance("jon", "john", 1 - 0.933f);
+ assertJaroWinklerDistance("jon", "jan", 1 - 0.800f); // no measure was given
+ }
+}
+
+void
+Test::assertJaroWinklerDistance(const vespalib::string &query, const vespalib::string &field, feature_t expected)
+{
+ FtFeatureTest ft(_factory, "jaroWinklerDistance(foo)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ FT_SETUP(ft, query, StringMap().add("foo", field), 1);
+
+ RankResult res;
+ ASSERT_TRUE(ft.execute(res.setEpsilon(0.001).addScore("jaroWinklerDistance(foo).out", expected)));
+}
+
+void
+Test::testProximity()
+{
+
+ { // Test blueprint.
+ ProximityBlueprint prototype;
+ {
+ EXPECT_TRUE(assertCreateInstance(prototype, "proximity"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(prototype, params);
+ FT_SETUP_FAIL(prototype, params.add("foo"));
+ FT_SETUP_FAIL(prototype, params.add("0"));
+ FT_SETUP_FAIL(prototype, params.add("1"));
+ FT_SETUP_FAIL(prototype, params.add("2"));
+ params.clear();
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ FT_SETUP_FAIL(prototype, ie, params.add("foo"));
+ FT_SETUP_FAIL(prototype, ie, params.add("0"));
+ FT_SETUP_OK (prototype, ie, params.add("1"), in, out.add("out").add("posA").add("posB"));
+ FT_SETUP_FAIL(prototype, ie, params.add("2"));
+ }
+
+ {
+ FT_DUMP_EMPTY(_factory, "proximity");
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ FT_DUMP_EMPTY(_factory, "proximity", ie); // must be an index field
+
+ StringList dump;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+#ifdef VISIT_BETA_FEATURES
+ for (uint32_t a = 0; a < 5; ++a) {
+ for (uint32_t b = a + 1; b < 6; ++b) {
+ vespalib::string bn = vespalib::make_string("proximity(bar,%u,%u)", a, b);
+ dump.add(bn + ".out");
+ dump.add(bn + ".posA");
+ dump.add(bn + ".posB");
+ }
+ }
+#endif
+ FT_DUMP(_factory, "proximity", ie, dump);
+ }
+ }
+ {
+ // Test executor.
+ FtFeatureTest ft(_factory, "proximity(foo,0,1)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ASSERT_TRUE(ft.setup());
+
+ search::fef::test::RankResult exp;
+ exp.addScore("proximity(foo,0,1).out", util::FEATURE_MAX).
+ addScore("proximity(foo,0,1).posA", util::FEATURE_MAX).
+ addScore("proximity(foo,0,1).posB", util::FEATURE_MIN);
+ ASSERT_TRUE(ft.execute(exp, 1));
+ }
+ {
+ FtFeatureTest ft(_factory, "proximity(foo,0,1)");
+ ASSERT_TRUE(!ft.setup());
+
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+
+ search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ ASSERT_TRUE(mdb->setFieldLength("foo", 50));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 30));
+ search::fef::test::RankResult exp;
+ exp.addScore("proximity(foo,0,1).out", util::FEATURE_MAX).
+ addScore("proximity(foo,0,1).posA", util::FEATURE_MAX).
+ addScore("proximity(foo,0,1).posB", util::FEATURE_MIN);
+ ASSERT_TRUE(mdb->apply(1));
+ ASSERT_TRUE(ft.execute(exp, 1));
+
+ ASSERT_TRUE(mdb->addOccurence("foo", 1, 20));
+ ASSERT_TRUE(mdb->apply(2));
+ ASSERT_TRUE(ft.execute(exp, 2));
+
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 10));
+ ASSERT_TRUE(mdb->apply(3));
+ exp .clear()
+ .addScore("proximity(foo,0,1).out", 10.0f)
+ .addScore("proximity(foo,0,1).posA", 10.0f)
+ .addScore("proximity(foo,0,1).posB", 20.0f);
+ ASSERT_TRUE(ft.execute(exp, 3));
+ }
+ {
+ for (int a = 0; a < 10; ++a) {
+ for (int b = 0; b < 10; ++b) {
+ FtFeatureTest ft(_factory, "proximity(foo,0,1)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+
+ search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ ASSERT_TRUE(mdb->setFieldLength("foo", 10));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, a));
+ ASSERT_TRUE(mdb->addOccurence("foo", 1, b));
+ ASSERT_TRUE(mdb->apply(1));
+
+ search::fef::test::RankResult exp;
+ exp .addScore("proximity(foo,0,1).out", a < b ? b - a : util::FEATURE_MAX)
+ .addScore("proximity(foo,0,1).posA", a < b ? a : util::FEATURE_MAX)
+ .addScore("proximity(foo,0,1).posB", a < b ? b : util::FEATURE_MIN);
+ TEST_STATE(vespalib::make_string("a=%u, b=%u", a, b).c_str());
+ EXPECT_TRUE(ft.execute(exp));
+ }
+ }
+ }
+}
+
+void
+Test::testQueryCompleteness()
+{
+ { // Test blueprint.
+ QueryCompletenessBlueprint prototype;
+
+ EXPECT_TRUE(assertCreateInstance(prototype, "queryCompleteness"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(prototype, params);
+ FT_SETUP_FAIL(prototype, params.add("foo"));
+ FT_SETUP_FAIL(prototype, params.add("0"));
+ params.clear();
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ FT_SETUP_OK (prototype, ie, params.add("foo"), in, out.add("hit").add("miss"));
+ FT_SETUP_OK (prototype, ie, params.add("0"), in, out);
+ FT_SETUP_OK (prototype, ie, params.add("1"), in, out);
+ FT_SETUP_FAIL(prototype, ie, params.add("2"));
+
+ FT_DUMP_EMPTY(_factory, "queryCompleteness");
+ FT_DUMP_EMPTY(_factory, "queryCompleteness", ie);
+ }
+
+ { // Test executor.
+ FtFeatureTest ft(_factory, "queryCompleteness(foo)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ // add 5 term nodes
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+ // from 0 to 5 hits (5 to 0 misses)
+ for (uint32_t i = 0; i < 6; ++i) {
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setFieldLength("foo", 10);
+ for (uint32_t j = 0; j < i; ++j) {
+ mdb->addOccurence("foo", j, j);
+ }
+ ASSERT_TRUE(mdb->apply(1));
+ RankResult exp;
+ exp.addScore("queryCompleteness(foo).hit", (feature_t)(i));
+ exp.addScore("queryCompleteness(foo).miss", (feature_t)(5 - i));
+ EXPECT_TRUE(ft.execute(exp));
+ }
+ }
+ { // Test executor.
+ FtFeatureTest ft(_factory, "queryCompleteness(foo,5,10)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+
+ // before window
+ assertQueryCompleteness(ft, 4, 0, 1);
+ // inside window
+ assertQueryCompleteness(ft, 5, 1, 0);
+ // inside window
+ assertQueryCompleteness(ft, 9, 1, 0);
+ // after window
+ assertQueryCompleteness(ft, 10, 0, 1);
+ }
+}
+
+void
+Test::assertQueryCompleteness(FtFeatureTest & ft, uint32_t firstOcc, uint32_t hits, uint32_t miss)
+{
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setFieldLength("foo", 20);
+ mdb->addOccurence("foo", 0, firstOcc);
+ ASSERT_TRUE(mdb->apply(1));
+ RankResult exp;
+ exp.addScore("queryCompleteness(foo,5,10).hit", hits);
+ exp.addScore("queryCompleteness(foo,5,10).miss", miss);
+ EXPECT_TRUE(ft.execute(exp));
+}
+
+// BFI implementation: brute force and ignorance
+int cntFlow(int m1, int m2, int m3, int m4)
+{
+ int flow = 0;
+
+ for (int p1p = 0; p1p < 4; p1p++) {
+ if (((1 << p1p) & m1) == 0) continue;
+ for (int p2p = 0; p2p < 4; p2p++) {
+ if (((1 << p2p) & m2) == 0) continue;
+ int f2 = 1;
+ if (p2p != p1p) ++f2;
+ for (int p3p = 0; p3p < 4; p3p++) {
+ if (((1 << p3p) & m3) == 0) continue;
+ int f3 = f2;
+ if (p3p != p1p && p3p != p2p) ++f3;
+ for (int p4p = 0; p4p < 4; p4p++) {
+ if (((1 << p4p) & m4) == 0) continue;
+ int f4 = f3;
+ if (p4p != p1p && p4p != p2p && p4p != p3p) ++f4;
+ if (flow < f4) flow = f4;
+ }
+ }
+ }
+ }
+ return flow;
+}
+
+void
+Test::testFlowCompleteness()
+{
+ { // Test blueprint.
+ TEST_STATE("test flow completeness blueprint");
+ FlowCompletenessBlueprint prototype;
+
+ EXPECT_TRUE(assertCreateInstance(prototype, "flowCompleteness"));
+
+ StringList params, in, out;
+ TEST_DO(FT_SETUP_FAIL(prototype, params));
+ TEST_DO(FT_SETUP_FAIL(prototype, params.add("foo")));
+ TEST_DO(FT_SETUP_FAIL(prototype, params.add("0")));
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+
+ params.clear();
+ params.add("foo");
+
+ out.add("completeness").add("fieldCompleteness")
+ .add("queryCompleteness").add("elementWeight")
+ .add("weight").add("flow");
+
+ StringList expDump;
+ for (size_t i = 0; i < out.size(); ++i) {
+ vespalib::string fn = "flowCompleteness(foo).";
+ fn.append(out[i]);
+ expDump.push_back(fn);
+ }
+
+ TEST_DO(FT_SETUP_OK(prototype, ie, params, in, out));
+ TEST_DO(FT_SETUP_FAIL(prototype, ie, params.add("2")));
+ TEST_DO(FT_DUMP_EMPTY(_factory, "flowCompleteness"));
+#ifdef notyet
+ TEST_DO(FT_DUMP(_factory, "flowCompleteness", ie, expDump));
+#endif
+ }
+
+ { // Test executor.
+ TEST_STATE("test flow completeness executor");
+
+ FtFeatureTest ft(_factory, "flowCompleteness(foo)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ // add 5 term nodes
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+ // from 0 to 5 hits (5 to 0 misses)
+ for (uint32_t i = 0; i < 6; ++i) {
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setFieldLength("foo", 10);
+ for (uint32_t j = 0; j < i; ++j) {
+ mdb->addOccurence("foo", j, j);
+ }
+ ASSERT_TRUE(mdb->apply(1));
+ RankResult exp;
+ exp.setEpsilon(0.000001);
+ exp.addScore("flowCompleteness(foo)", i * 0.15);
+ exp.addScore("flowCompleteness(foo).completeness", i * 0.15); // == 0.1*0.5 + 0.2*(1-0.5)
+ exp.addScore("flowCompleteness(foo).fieldCompleteness", i * 0.1);
+ exp.addScore("flowCompleteness(foo).queryCompleteness", i * 0.2);
+ exp.addScore("flowCompleteness(foo).elementWeight", i > 0 ? 1 : 0);
+ exp.addScore("flowCompleteness(foo).weight", 100.0);
+ exp.addScore("flowCompleteness(foo).flow", i);
+ TEST_STATE("run execute");
+ EXPECT_TRUE(ft.execute(exp));
+ }
+ }
+
+
+ { // Test executor, pass 2
+ TEST_STATE("test flow completeness executor (pass 2)");
+
+ FtFeatureTest ft(_factory, "flowCompleteness(foo)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ // add 4 term nodes
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+
+ // each term will have 1 to 3 positions it matches,
+ // with various points of overlap
+
+ for (uint32_t t0m = 1; t0m < 15 ; ++t0m) {
+
+ for (uint32_t t1m = 1; t1m < 15 ; ++t1m) {
+
+ for (uint32_t t2m = 1; t2m < 15 ; ++t2m) {
+
+ for (uint32_t t3m = 1; t3m < 15 ; ++t3m) {
+
+ int flow = cntFlow(t0m, t1m, t2m, t3m);
+
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setFieldLength("foo", 4);
+ for (int pos = 0; pos < 4; ++pos) {
+ if (((1 << pos) & t0m) != 0) mdb->addOccurence("foo", 0, pos);
+ if (((1 << pos) & t1m) != 0) mdb->addOccurence("foo", 1, pos);
+ if (((1 << pos) & t2m) != 0) mdb->addOccurence("foo", 2, pos);
+ if (((1 << pos) & t3m) != 0) mdb->addOccurence("foo", 3, pos);
+ }
+
+ ASSERT_TRUE(mdb->apply(1));
+ RankResult exp;
+ exp.setEpsilon(0.0001);
+ exp.addScore("flowCompleteness(foo)", flow * 0.25);
+ exp.addScore("flowCompleteness(foo).completeness", flow * 0.25);
+ exp.addScore("flowCompleteness(foo).fieldCompleteness", flow * 0.25);
+ exp.addScore("flowCompleteness(foo).queryCompleteness", flow * 0.25);
+ exp.addScore("flowCompleteness(foo).elementWeight", 1);
+ exp.addScore("flowCompleteness(foo).weight", 100.0);
+ exp.addScore("flowCompleteness(foo).flow", flow);
+ TEST_STATE(vespalib::make_string("execute t0m=%u t1m=%u t2m=%u t3m=%u flow=%u",
+ t0m, t1m, t2m, t3m, flow).c_str());
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ }
+ }
+ }
+ }
+}
+
+
+void
+Test::testReverseProximity()
+{
+ { // Test blueprint.
+ ReverseProximityBlueprint prototype;
+ {
+ EXPECT_TRUE(assertCreateInstance(prototype, "reverseProximity"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(prototype, params);
+ FT_SETUP_FAIL(prototype, params.add("foo"));
+ FT_SETUP_FAIL(prototype, params.add("0"));
+ FT_SETUP_FAIL(prototype, params.add("1"));
+ FT_SETUP_FAIL(prototype, params.add("2"));
+ params.clear();
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ FT_SETUP_FAIL(prototype, ie, params.add("foo"));
+ FT_SETUP_FAIL(prototype, ie, params.add("0"));
+ FT_SETUP_OK (prototype, ie, params.add("1"), in, out.add("out").add("posA").add("posB"));
+ FT_SETUP_FAIL(prototype, ie, params.add("2"));
+ }
+
+ {
+ FT_DUMP_EMPTY(_factory, "reverseProximity");
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ FT_DUMP_EMPTY(_factory, "reverseProximity", ie); // must be an index field
+
+ StringList dump;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+#ifdef VISIT_BETA_FEATURES
+ for (uint32_t a = 0; a < 5; ++a) {
+ for (uint32_t b = a + 1; b < 6; ++b) {
+ vespalib::string bn = vespalib::make_string("reverseProximity(bar,%u,%u)", a, b);
+ dump.add(bn + ".out");
+ dump.add(bn + ".posA");
+ dump.add(bn + ".posB");
+ }
+ }
+#endif
+ FT_DUMP(_factory, "reverseProximity", ie, dump);
+ }
+ }
+
+
+ { // Test executor.
+ FtFeatureTest ft(_factory, "reverseProximity(foo,0,1)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ASSERT_TRUE(ft.setup());
+ search::fef::test::RankResult exp;
+ exp.addScore("reverseProximity(foo,0,1).out", util::FEATURE_MAX).
+ addScore("reverseProximity(foo,0,1).posA", util::FEATURE_MIN).
+ addScore("reverseProximity(foo,0,1).posB", util::FEATURE_MAX);
+ ASSERT_TRUE(ft.execute(exp, 1));
+ }
+ {
+ FtFeatureTest ft(_factory, "reverseProximity(foo,0,1)"); ASSERT_TRUE(!ft.setup());
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields(); ASSERT_TRUE(ft.setup());
+
+ search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ ASSERT_TRUE(mdb->setFieldLength("foo", 50));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 20));
+ search::fef::test::RankResult exp;
+ exp .addScore("reverseProximity(foo,0,1).out", util::FEATURE_MAX)
+ .addScore("reverseProximity(foo,0,1).posA", util::FEATURE_MIN)
+ .addScore("reverseProximity(foo,0,1).posB", util::FEATURE_MAX);
+ ASSERT_TRUE(mdb->apply(1));
+ ASSERT_TRUE(ft.execute(exp, 1));
+
+ ASSERT_TRUE(mdb->addOccurence("foo", 1, 30));
+ ASSERT_TRUE(mdb->apply(2));
+ ASSERT_TRUE(ft.execute(exp, 2));
+
+ ASSERT_TRUE(mdb->addOccurence("foo", 1, 10));
+ ASSERT_TRUE(mdb->apply(3));
+ exp .clear()
+ .addScore("reverseProximity(foo,0,1).out", 10.0f)
+ .addScore("reverseProximity(foo,0,1).posA", 20.0f)
+ .addScore("reverseProximity(foo,0,1).posB", 10.0f);
+ ASSERT_TRUE(ft.execute(exp, 3));
+ }
+ {
+ for (int a = 0; a < 10; ++a) {
+ for (int b = 0; b < 10; ++b) {
+ FtFeatureTest ft(_factory, "reverseProximity(foo,0,1)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+
+ search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ ASSERT_TRUE(mdb->setFieldLength("foo", 10));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, a));
+ ASSERT_TRUE(mdb->addOccurence("foo", 1, b));
+ ASSERT_TRUE(mdb->apply(1));
+
+ search::fef::test::RankResult exp;
+ exp .addScore("reverseProximity(foo,0,1).out", a >= b ? a - b : util::FEATURE_MAX)
+ .addScore("reverseProximity(foo,0,1).posA", a >= b ? a : util::FEATURE_MIN)
+ .addScore("reverseProximity(foo,0,1).posB", a >= b ? b : util::FEATURE_MAX);
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ }
+ }
+}
+
+void
+Test::testTermEditDistance()
+{
+ { // Test blueprint.
+ TermEditDistanceBlueprint prototype;
+ {
+ EXPECT_TRUE(assertCreateInstance(prototype, "termEditDistance"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(prototype, params);
+ FT_SETUP_FAIL(prototype, params.add("foo"));
+ FT_SETUP_FAIL(prototype, params.add("0"));
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "afoo");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo");
+ FT_SETUP_FAIL(prototype, ie, params.clear());
+ FT_SETUP_OK (prototype, ie, params.add("foo"), in.add("fieldLength(foo)"), out.add("out").add("del").add("ins").add("sub"));
+ FT_SETUP_FAIL(prototype, ie, params.add("afoo"));
+ FT_SETUP_FAIL(prototype, ie, params.add("wfoo"));
+ FT_SETUP_FAIL(prototype, ie, params.add("0"));
+ }
+
+ {
+ FT_DUMP_EMPTY(_factory, "termEditDistance");
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "abar");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wbar");
+ FT_DUMP_EMPTY(_factory, "termEditDistance", ie); // must be a single-value index field
+
+ StringList dump;
+#ifdef VISIT_BETA_FEATURES
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+ vespalib::string bn = "termEditDistance(bar)";
+ dump.add(bn + ".out");
+ dump.add(bn + ".del");
+ dump.add(bn + ".ins");
+ dump.add(bn + ".sub");
+#endif
+ FT_DUMP(_factory, "termEditDistance", ie, dump);
+ }
+ }
+
+ { // Test executor.
+ assertTermEditDistance("abcde", "abcde", 0, 0, 0);
+ assertTermEditDistance("abcde", "abcd.", 0, 0, 1);
+ assertTermEditDistance("abcde", ".bcd.", 0, 0, 2);
+ assertTermEditDistance("abcde", ".bc..", 0, 0, 3);
+ assertTermEditDistance("abcde", "..c..", 0, 0, 4);
+ assertTermEditDistance("abcd" , "..c..", 0, 1, 3);
+ assertTermEditDistance("abc", "..c..", 0, 2, 2);
+ assertTermEditDistance("ab", "..b..", 0, 3, 1);
+ assertTermEditDistance("a", "..a..", 0, 4, 0);
+ }
+}
+
+// #pragma GCC diagnostic ignored "-Wstrict-aliasing"
+
+void
+Test::assertTermEditDistance(const vespalib::string &query, const vespalib::string &field,
+ uint32_t expectedDel, uint32_t expectedIns, uint32_t expectedSub)
+{
+ // Setup feature test.
+ vespalib::string feature = "termEditDistance(foo)";
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ StringMap foo;
+ foo.add("foo", field);
+ FT_SETUP(ft, query, foo, 1);
+
+ // Execute and compare results.
+ search::fef::test::RankResult exp;
+ exp .addScore(feature + ".out", (feature_t)(expectedDel*1 + expectedIns*1 + expectedSub*1))
+ .addScore(feature + ".del", (feature_t)expectedDel)
+ .addScore(feature + ".ins", (feature_t)expectedIns)
+ .addScore(feature + ".sub", (feature_t)expectedSub);
+ ASSERT_TRUE(ft.execute(exp));
+}
diff --git a/searchlib/src/tests/features/element_completeness/.gitignore b/searchlib/src/tests/features/element_completeness/.gitignore
new file mode 100644
index 00000000000..9d45fbda0ad
--- /dev/null
+++ b/searchlib/src/tests/features/element_completeness/.gitignore
@@ -0,0 +1 @@
+searchlib_element_completeness_test_app
diff --git a/searchlib/src/tests/features/element_completeness/CMakeLists.txt b/searchlib/src/tests/features/element_completeness/CMakeLists.txt
new file mode 100644
index 00000000000..aee13befe2d
--- /dev/null
+++ b/searchlib/src/tests/features/element_completeness/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_element_completeness_test_app
+ SOURCES
+ element_completeness_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_element_completeness_test_app COMMAND searchlib_element_completeness_test_app)
diff --git a/searchlib/src/tests/features/element_completeness/FILES b/searchlib/src/tests/features/element_completeness/FILES
new file mode 100644
index 00000000000..5b995b34729
--- /dev/null
+++ b/searchlib/src/tests/features/element_completeness/FILES
@@ -0,0 +1 @@
+element_completeness_test.cpp
diff --git a/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp b/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp
new file mode 100644
index 00000000000..24d1625520d
--- /dev/null
+++ b/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp
@@ -0,0 +1,201 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/features/element_completeness_feature.h>
+#include <vespa/searchlib/fef/test/ftlib.h>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+
+using namespace search::fef;
+using namespace search::fef::test;
+using namespace search::features;
+
+std::vector<vespalib::string> featureNamesFoo() {
+ std::vector<vespalib::string> f;
+ f.push_back("elementCompleteness(foo).completeness");
+ f.push_back("elementCompleteness(foo).fieldCompleteness");
+ f.push_back("elementCompleteness(foo).queryCompleteness");
+ f.push_back("elementCompleteness(foo).elementWeight");
+ return f;
+}
+
+const size_t TOTAL = 0;
+const size_t FIELD = 1;
+const size_t QUERY = 2;
+const size_t WEIGHT = 3;
+
+FtIndex indexFoo() {
+ FtIndex idx;
+ idx.field("foo");
+ return idx;
+}
+
+struct BlueprintFactoryFixture {
+ BlueprintFactory factory;
+ BlueprintFactoryFixture() : factory()
+ {
+ setup_search_features(factory);
+ }
+};
+
+struct IndexFixture {
+ IndexEnvironment indexEnv;
+ IndexFixture() : indexEnv()
+ {
+ IndexEnvironmentBuilder builder(indexEnv);
+ builder.addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "foo");
+ builder.addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar");
+ }
+};
+
+struct FeatureDumpFixture : public IDumpFeatureVisitor {
+ std::vector<vespalib::string> expect;
+ size_t dumped;
+ virtual void visitDumpFeature(const vespalib::string &name) {
+ EXPECT_LESS(dumped, expect.size());
+ EXPECT_EQUAL(expect[dumped++], name);
+ }
+ FeatureDumpFixture() : IDumpFeatureVisitor(), expect(featureNamesFoo()), dumped(0) {}
+};
+
+struct RankFixture : BlueprintFactoryFixture {
+ Properties idxProps;
+ RankFixture() : BlueprintFactoryFixture(), idxProps() {}
+ void test(const vespalib::string &queryStr, const FtIndex &index,
+ feature_t field, feature_t query, int32_t weight = 1, feature_t factor = 0.5,
+ bool useStaleMatchData = false)
+ {
+ std::vector<vespalib::string> names = featureNamesFoo();
+ ASSERT_TRUE(names.size() == 4u);
+ RankResult expect;
+ expect.addScore(names[TOTAL], field*factor + query*(1-factor))
+ .addScore(names[FIELD], field).addScore(names[QUERY], query)
+ .addScore(names[WEIGHT], (double)weight);
+ FtFeatureTest ft(factory, names);
+ ft.getIndexEnv().getProperties().import(idxProps);
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz");
+ FtTestApp::FT_SETUP(ft, FtUtil::toQuery(queryStr), index, 1);
+ RankResult actual;
+ EXPECT_TRUE(ft.executeOnly(actual, useStaleMatchData ? 2 : 1));
+ for (size_t i = 0; i < names.size(); ++i) {
+ TEST_STATE(names[i].c_str());
+ EXPECT_EQUAL(expect.getScore(names[i]), actual.getScore(names[i]));
+ }
+ }
+};
+
+TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) {
+ Blueprint::SP bp = f.factory.createBlueprint("elementCompleteness");
+ EXPECT_TRUE(bp.get() != 0);
+ EXPECT_TRUE(dynamic_cast<ElementCompletenessBlueprint*>(bp.get()) != 0);
+}
+
+TEST_FFF("require that appropriate features are dumped", ElementCompletenessBlueprint, IndexFixture, FeatureDumpFixture) {
+ f1.visitDumpFeatures(f2.indexEnv, f3);
+ EXPECT_EQUAL(f3.expect.size(), f3.dumped);
+}
+
+TEST_FF("require that setup can be done on index field", ElementCompletenessBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str()));
+ EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "foo")));
+}
+
+TEST_FF("require that setup can not be done on attribute field", ElementCompletenessBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(bar)", f1.getBaseName().c_str()));
+ EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "bar")));
+}
+
+TEST_FF("require that default config parameters are correct", ElementCompletenessBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str()));
+ EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "foo")));
+ EXPECT_EQUAL(0u, f1.getParams().fieldId);
+ EXPECT_EQUAL(0.5, f1.getParams().fieldCompletenessImportance);
+}
+
+TEST_FF("require that blueprint can be configured", ElementCompletenessBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str()));
+ f2.indexEnv.getProperties().add("elementCompleteness(foo).fieldCompletenessImportance", "0.75");
+ EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "foo")));
+ EXPECT_EQUAL(0.75, f1.getParams().fieldCompletenessImportance);
+}
+
+TEST_F("require that no match gives zero outputs", RankFixture) {
+ TEST_DO(f.test("x", indexFoo().element("y"), 0.0, 0.0, 0));
+}
+
+TEST_F("require that perfect match gives max outputs", RankFixture) {
+ TEST_DO(f.test("x", indexFoo().element("x"), 1.0, 1.0));
+}
+
+TEST_F("require that matching half the field gives appropriate outputs", RankFixture) {
+ TEST_DO(f.test("x", indexFoo().element("x y"), 0.5, 1.0));
+ TEST_DO(f.test("x y", indexFoo().element("x y a b"), 0.5, 1.0));
+}
+
+TEST_F("require that matching half the query gives appropriate outputs", RankFixture) {
+ TEST_DO(f.test("x y", indexFoo().element("x"), 1.0, 0.5));
+ TEST_DO(f.test("x y a b", indexFoo().element("x y"), 1.0, 0.5));
+}
+
+TEST_F("require that query completeness is affected by query term weight", RankFixture) {
+ TEST_DO(f.test("x!300 y!100", indexFoo().element("y"), 1.0, 0.25));
+ TEST_DO(f.test("x!300 y!100", indexFoo().element("x"), 1.0, 0.75));
+}
+
+TEST_F("require that field completeness is not affected by duplicate field tokens", RankFixture) {
+ TEST_DO(f.test("x", indexFoo().element("x y y y"), 0.25, 1.00));
+ TEST_DO(f.test("x", indexFoo().element("x x y y"), 0.25, 1.00));
+ TEST_DO(f.test("x", indexFoo().element("x x x y"), 0.25, 1.00));
+ TEST_DO(f.test("x", indexFoo().element("x x x x"), 0.25, 1.00));
+}
+
+TEST_F("require that field completeness is affected by duplicate query terms", RankFixture) {
+ TEST_DO(f.test("x", indexFoo().element("x x x x"), 0.25, 1.00));
+ TEST_DO(f.test("x x", indexFoo().element("x x x x"), 0.50, 1.00));
+ TEST_DO(f.test("x x x", indexFoo().element("x x x x"), 0.75, 1.00));
+ TEST_DO(f.test("x x x x", indexFoo().element("x x x x"), 1.00, 1.00));
+}
+
+TEST_F("require that a single field token can match multiple query terms", RankFixture) {
+ TEST_DO(f.test("x", indexFoo().element("x"), 1.00, 1.00));
+ TEST_DO(f.test("x x", indexFoo().element("x"), 1.00, 1.00));
+ TEST_DO(f.test("x x x", indexFoo().element("x"), 1.00, 1.00));
+ TEST_DO(f.test("x x x x", indexFoo().element("x"), 1.00, 1.00));
+}
+
+TEST_F("require that field completeness importance can be adjusted", RankFixture) {
+ f.idxProps.clear().add("elementCompleteness(foo).fieldCompletenessImportance", "0.1");
+ TEST_DO(f.test("x y", indexFoo().element("x"), 1.0, 0.5, 1, 0.1));
+ f.idxProps.clear().add("elementCompleteness(foo).fieldCompletenessImportance", "0.4");
+ TEST_DO(f.test("x y", indexFoo().element("x"), 1.0, 0.5, 1, 0.4));
+ f.idxProps.clear().add("elementCompleteness(foo).fieldCompletenessImportance", "0.7");
+ TEST_DO(f.test("x y", indexFoo().element("x"), 1.0, 0.5, 1, 0.7));
+}
+
+TEST_F("require that order is not relevant", RankFixture) {
+ TEST_DO(f.test("x y a b", indexFoo().element("n x n y"), 0.5, 0.5));
+ TEST_DO(f.test("a b x y", indexFoo().element("y x n n"), 0.5, 0.5));
+ TEST_DO(f.test("a y x b", indexFoo().element("x n y n"), 0.5, 0.5));
+}
+
+TEST_F("require that element is selected based on completeness times element weight", RankFixture) {
+ f.idxProps.clear().add("elementCompleteness(foo).fieldCompletenessImportance", "0.0");
+ TEST_DO(f.test("x y a b", indexFoo().element("x", 39).element("y", 39).element("a b", 19).element("x y a b", 10), 1.0, 1.0, 10, 0.0));
+ TEST_DO(f.test("x y a b", indexFoo().element("x", 39).element("y", 39).element("a b", 21).element("x y a b", 10), 1.0, 0.5, 21, 0.0));
+ TEST_DO(f.test("x y a b", indexFoo().element("x", 39).element("y", 45).element("a b", 21).element("x y a b", 10), 1.0, 0.25, 45, 0.0));
+}
+
+TEST_F("require that stale match data is ignored", RankFixture) {
+ TEST_DO(f.test("x y a b", indexFoo().element("x y"), 0.0, 0.0, 0, 0.5, true));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/features/element_similarity_feature/.gitignore b/searchlib/src/tests/features/element_similarity_feature/.gitignore
new file mode 100644
index 00000000000..36e60cd547e
--- /dev/null
+++ b/searchlib/src/tests/features/element_similarity_feature/.gitignore
@@ -0,0 +1 @@
+searchlib_element_similarity_feature_test_app
diff --git a/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt b/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt
new file mode 100644
index 00000000000..08e3b04cd73
--- /dev/null
+++ b/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_element_similarity_feature_test_app
+ SOURCES
+ element_similarity_feature_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_element_similarity_feature_test_app COMMAND searchlib_element_similarity_feature_test_app)
diff --git a/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp b/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp
new file mode 100644
index 00000000000..181f2fb71f3
--- /dev/null
+++ b/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp
@@ -0,0 +1,371 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/features/element_similarity_feature.h>
+#include <vespa/searchlib/fef/test/ftlib.h>
+#include <initializer_list>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+
+using namespace search::fef;
+using namespace search::fef::test;
+using namespace search::features;
+
+const vespalib::string DEFAULT = "elementSimilarity(foo)";
+const vespalib::string PROXIMITY = "elementSimilarity(foo).proximity";
+const vespalib::string ORDER = "elementSimilarity(foo).order";
+const vespalib::string QUERY = "elementSimilarity(foo).query_coverage";
+const vespalib::string FIELD = "elementSimilarity(foo).field_coverage";
+const vespalib::string WEIGHT = "elementSimilarity(foo).weight";
+
+FtIndex indexFoo() {
+ FtIndex idx;
+ idx.field("foo");
+ return idx;
+}
+
+//-----------------------------------------------------------------------------
+
+struct BlueprintFactoryFixture {
+ BlueprintFactory factory;
+ BlueprintFactoryFixture() : factory()
+ {
+ setup_search_features(factory);
+ }
+};
+
+struct IndexFixture {
+ IndexEnvironment indexEnv;
+ IndexFixture() : indexEnv()
+ {
+ IndexEnvironmentBuilder builder(indexEnv);
+ builder.addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "foo");
+ builder.addField(FieldType::INDEX, CollectionType::ARRAY, "bar");
+ builder.addField(FieldType::INDEX, CollectionType::SINGLE, "baz");
+ builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "fox");
+ set("elementSimilarity(foo).output.proximity", "max(p)");
+ set("elementSimilarity(foo).output.order", "max(o)");
+ set("elementSimilarity(foo).output.query_coverage", "max(q)");
+ set("elementSimilarity(foo).output.field_coverage", "max(f)");
+ set("elementSimilarity(foo).output.weight", "max(w)");
+ set("elementSimilarity(bar).output.default", "avg(1)");
+ }
+ IndexFixture &set(const vespalib::string &key, const vespalib::string &value) {
+ Properties tmp;
+ tmp.add(key, value);
+ indexEnv.getProperties().import(tmp);
+ return *this;
+ }
+};
+
+struct FeatureDumpFixture : public IDumpFeatureVisitor {
+ std::vector<vespalib::string> actual;
+ FeatureDumpFixture() : IDumpFeatureVisitor(), actual() {}
+ virtual void visitDumpFeature(const vespalib::string &name) {
+ actual.push_back(name);
+ }
+};
+
+struct RankFixture : BlueprintFactoryFixture {
+ RankFixture() : BlueprintFactoryFixture() {}
+ double get_feature(const vespalib::string &query, const FtIndex &index, const vespalib::string &select,
+ const IndexFixture &idx_env = IndexFixture())
+ {
+ std::vector<vespalib::string> names({"elementSimilarity(foo).default", // use 'default' explicitly to verify default output name
+ "elementSimilarity(foo).proximity",
+ "elementSimilarity(foo).order",
+ "elementSimilarity(foo).query_coverage",
+ "elementSimilarity(foo).field_coverage",
+ "elementSimilarity(foo).weight"});
+ FtFeatureTest ft(factory, names);
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "foo");
+ ft.getIndexEnv().getBuilder().getIndexEnv().getProperties().import(idx_env.indexEnv.getProperties());
+ FtTestApp::FT_SETUP(ft, FtUtil::toQuery(query), index, 1);
+ {
+ RankResult stale;
+ EXPECT_TRUE(ft.executeOnly(stale, 2));
+ EXPECT_EQUAL(0.0, stale.getScore(select));
+ }
+ RankResult actual;
+ EXPECT_TRUE(ft.executeOnly(actual, 1));
+ return actual.getScore(select);
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+double prox(uint32_t dist) {
+ return (dist > 8) ? 0 : (1.0 - (((dist-1)/8.0) * ((dist-1)/8.0)));
+}
+
+double sum(std::initializer_list<double> values) {
+ double my_sum = 0.0;
+ for (double value: values) {
+ my_sum += value;
+ }
+ return my_sum;
+}
+
+double comb(std::initializer_list<double> values) {
+ return (sum(values)/values.size());
+}
+
+double mix(double proximity, double order, double query, double field) {
+ return (0.35 * proximity) + (0.15 * order) + (0.30 * query) + (0.20 * field);
+}
+
+//-----------------------------------------------------------------------------
+
+template <typename A, typename B>
+bool cmp_lists_impl(const A &a, const B &b) {
+ std::vector<typename A::value_type> tmp_a(a.begin(), a.end());
+ std::vector<typename B::value_type> tmp_b(b.begin(), b.end());
+ std::sort(tmp_a.begin(), tmp_a.end());
+ std::sort(tmp_b.begin(), tmp_b.end());
+ if (!EXPECT_EQUAL(tmp_a.size(), tmp_b.size())) {
+ return false;
+ }
+ for (size_t i = 0; i < tmp_a.size(); ++i) {
+ if(!EXPECT_EQUAL(tmp_a[i], tmp_b[i])) {
+ return false;
+ }
+ }
+ return true;
+}
+
+template <typename T>
+void dump_list(const vespalib::string &name, const T &list) {
+ fprintf(stderr, "list(name: '%s', size: %zu)\n", name.c_str(), list.size());
+ std::vector<typename T::value_type> tmp(list.begin(), list.end());
+ std::sort(tmp.begin(), tmp.end());
+ for (vespalib::string item: tmp) {
+ fprintf(stderr, " '%s'\n", item.c_str());
+ }
+}
+
+template <typename A, typename B>
+bool cmp_lists(const A &a, const B &b) {
+ if(!cmp_lists_impl(a, b)) {
+ dump_list("expected", a);
+ dump_list("actual", b);
+ return false;
+ }
+ return true;
+};
+
+//-----------------------------------------------------------------------------
+
+TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) {
+ Blueprint::SP bp = f.factory.createBlueprint("elementSimilarity");
+ EXPECT_TRUE(bp.get() != 0);
+ EXPECT_TRUE(dynamic_cast<ElementSimilarityBlueprint*>(bp.get()) != 0);
+}
+
+TEST_FFF("require that appropriate features are dumped", ElementSimilarityBlueprint, IndexFixture, FeatureDumpFixture) {
+ f1.visitDumpFeatures(f2.indexEnv, f3);
+ EXPECT_TRUE(cmp_lists(std::vector<vespalib::string>({"elementSimilarity(foo)",
+ "elementSimilarity(foo).proximity",
+ "elementSimilarity(foo).order",
+ "elementSimilarity(foo).query_coverage",
+ "elementSimilarity(foo).field_coverage",
+ "elementSimilarity(foo).weight",
+ "elementSimilarity(bar)"}),
+ f3.actual));
+}
+
+bool try_setup(ElementSimilarityBlueprint &blueprint, const IndexFixture &index, const vespalib::string &field) {
+ DummyDependencyHandler deps(blueprint);
+ blueprint.setName(vespalib::make_string("%s(%s)", blueprint.getBaseName().c_str(), field.c_str()));
+ return ((Blueprint&)blueprint).setup(index.indexEnv, std::vector<vespalib::string>(1, field));
+}
+
+TEST_FF("require that setup can be done on weighted set index field", ElementSimilarityBlueprint, IndexFixture) {
+ EXPECT_TRUE(try_setup(f1, f2, "foo"));
+}
+
+TEST_FF("require that setup can be done on array index field", ElementSimilarityBlueprint, IndexFixture) {
+ EXPECT_TRUE(try_setup(f1, f2, "bar"));
+}
+
+TEST_FF("require that setup can be done on single value index field", ElementSimilarityBlueprint, IndexFixture) {
+ EXPECT_TRUE(try_setup(f1, f2, "baz"));
+}
+
+TEST_FF("require that setup can not be done on single value attribute field", ElementSimilarityBlueprint, IndexFixture) {
+ EXPECT_FALSE(try_setup(f1, f2, "fox"));
+}
+
+TEST_FF("require that setup will fail if output expression does not contain an aggregator", ElementSimilarityBlueprint, IndexFixture) {
+ f2.set("elementSimilarity(foo).output.default", "p");
+ EXPECT_FALSE(try_setup(f1, f2, "foo"));
+}
+
+TEST_FF("require that setup will fail if output expression contains an unknown aggregator", ElementSimilarityBlueprint, IndexFixture) {
+ f2.set("elementSimilarity(foo).output.default", "bogus(p)");
+ EXPECT_FALSE(try_setup(f1, f2, "foo"));
+}
+
+TEST_FF("require that setup will fail if output expression contains an unknown symbol", ElementSimilarityBlueprint, IndexFixture) {
+ f2.set("elementSimilarity(foo).output.default", "max(bogus)");
+ EXPECT_FALSE(try_setup(f1, f2, "foo"));
+}
+
+TEST_FF("require that setup will fail if output expression is malformed", ElementSimilarityBlueprint, IndexFixture) {
+ f2.set("elementSimilarity(foo).output.default", "max(w+)");
+ EXPECT_FALSE(try_setup(f1, f2, "foo"));
+}
+
+TEST_F("require that no match gives zero outputs", RankFixture) {
+ EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), DEFAULT));
+ EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), PROXIMITY));
+ EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), ORDER));
+ EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), QUERY));
+ EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), FIELD));
+}
+
+TEST_F("require that minal perfect match gives max outputs", RankFixture) {
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), DEFAULT));
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), PROXIMITY));
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), ORDER));
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), QUERY));
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), FIELD));
+}
+
+TEST_F("require that larger perfect match gives max outputs", RankFixture) {
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), DEFAULT));
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), PROXIMITY));
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), ORDER));
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), QUERY));
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), FIELD));
+}
+
+TEST_F("require that extra query terms reduces order but not proximity", RankFixture) {
+ EXPECT_EQUAL(1.0, f1.get_feature("x y", indexFoo().element("x"), PROXIMITY));
+ EXPECT_EQUAL(1.0, f1.get_feature("x y y", indexFoo().element("x"), PROXIMITY));
+ EXPECT_EQUAL(1.0, f1.get_feature("x y y y", indexFoo().element("x"), PROXIMITY));
+
+ EXPECT_EQUAL(0.0, f1.get_feature("x y", indexFoo().element("x"), ORDER));
+ EXPECT_EQUAL(0.0, f1.get_feature("x y y", indexFoo().element("x"), ORDER));
+ EXPECT_EQUAL(0.0, f1.get_feature("x y y y", indexFoo().element("x"), ORDER));
+}
+
+TEST_F("require that extra field terms reduces proximity but not order", RankFixture) {
+ EXPECT_EQUAL(prox(2), f1.get_feature("x", indexFoo().element("x y"), PROXIMITY));
+ EXPECT_EQUAL(prox(3), f1.get_feature("x", indexFoo().element("x y y"), PROXIMITY));
+ EXPECT_EQUAL(prox(4), f1.get_feature("x", indexFoo().element("x y y y"), PROXIMITY));
+
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y"), ORDER));
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y y"), ORDER));
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y y y"), ORDER));
+}
+
+TEST_F("require that proximity acts as expected", RankFixture) {
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(2), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x b c d e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(3), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x x b c d e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(4), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x x x b c d e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(2), prox(2), prox(2), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("a x b x c x d x e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(2), prox(2), prox(1), prox(3)}), f1.get_feature("a b c d e", indexFoo().element("a x b x c d x x e"), PROXIMITY));
+}
+
+TEST_F("require that field order does not affect proximity score", RankFixture) {
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("d c a b e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(2), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x c a b e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(3), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x x c a b e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(4), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x x x c a b e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(2), prox(2), prox(2), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("d x c x a x b x e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(2), prox(2), prox(1), prox(3)}), f1.get_feature("a b c d e", indexFoo().element("d x c x a b x x e"), PROXIMITY));
+}
+
+TEST_F("require that order score acts as expected", RankFixture) {
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), ORDER));
+ EXPECT_EQUAL(comb({1.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("a b c e d"), ORDER));
+ EXPECT_EQUAL(comb({0.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b a c e d"), ORDER));
+ EXPECT_EQUAL(comb({0.0, 1.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b a e d c"), ORDER));
+ EXPECT_EQUAL(comb({0.0, 0.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("e d c b a"), ORDER));
+}
+
+TEST_F("require that proximity does not affect order score", RankFixture) {
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), ORDER));
+ EXPECT_EQUAL(comb({1.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("a x b x c x e x d"), ORDER));
+ EXPECT_EQUAL(comb({0.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b x a x c x e x d"), ORDER));
+ EXPECT_EQUAL(comb({0.0, 1.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b x a x e x d x c"), ORDER));
+ EXPECT_EQUAL(comb({0.0, 0.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("e x d x c x b x a"), ORDER));
+}
+
+TEST_F("require that query coverage acts as expected", RankFixture) {
+ EXPECT_EQUAL(5.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), QUERY));
+ EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d"), QUERY));
+ EXPECT_EQUAL(3.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c"), QUERY));
+ EXPECT_EQUAL(2.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b"), QUERY));
+ EXPECT_EQUAL(4.0/7.0, f1.get_feature("a!200 b!200 c d e", indexFoo().element("a b"), QUERY));
+ EXPECT_EQUAL(2.0/7.0, f1.get_feature("a b c!500", indexFoo().element("a b"), QUERY));
+ EXPECT_EQUAL(5.0/7.0, f1.get_feature("a b c!500", indexFoo().element("c"), QUERY));
+}
+
+TEST_F("require that field coverage acts as expected", RankFixture) {
+ EXPECT_EQUAL(5.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), FIELD));
+ EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a x c d e"), FIELD));
+ EXPECT_EQUAL(3.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b x x e"), FIELD));
+ EXPECT_EQUAL(2.0/5.0, f1.get_feature("a b c d e", indexFoo().element("x x x d e"), FIELD));
+}
+
+TEST_F("require that first unique match is used per query term", RankFixture) {
+ EXPECT_EQUAL(prox(3), f1.get_feature("a b", indexFoo().element("a a a b"), PROXIMITY));
+ EXPECT_EQUAL(1.0, f1.get_feature("a b", indexFoo().element("a a a b"), ORDER));
+ EXPECT_EQUAL(1.0, f1.get_feature("a b", indexFoo().element("a a a b"), QUERY));
+ EXPECT_EQUAL(2.0/4.0, f1.get_feature("a b", indexFoo().element("a a a b"), FIELD));
+
+ EXPECT_EQUAL(comb({prox(1), prox(2)}), f1.get_feature("a b a", indexFoo().element("a a a b"), PROXIMITY));
+ EXPECT_EQUAL(0.5, f1.get_feature("a b a", indexFoo().element("a a a b"), ORDER));
+ EXPECT_EQUAL(1.0, f1.get_feature("a b a", indexFoo().element("a a a b"), QUERY));
+ EXPECT_EQUAL(3.0/4.0, f1.get_feature("a b a", indexFoo().element("a a a b"), FIELD));
+}
+
+TEST_F("require that default score combines individual signals appropriately", RankFixture) {
+ EXPECT_EQUAL(comb({prox(1), prox(3), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), PROXIMITY));
+ EXPECT_EQUAL(comb({1.0, 0.0, 1.0}), f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), ORDER));
+ EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), QUERY));
+ EXPECT_EQUAL(4.0/7.0, f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), FIELD));
+ EXPECT_EQUAL(mix(comb({prox(1), prox(3), prox(2)}), comb({1.0, 0.0, 1.0}), 4.0/5.0, 4.0/7.0),
+ f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), DEFAULT));
+ EXPECT_EQUAL(7.0 * mix(comb({prox(1), prox(3), prox(2)}), comb({1.0, 0.0, 1.0}), 4.0/5.0, 4.0/7.0),
+ f1.get_feature("a b c d e", indexFoo().element("a c x x b x d", 7), DEFAULT));
+}
+
+TEST_FF("require that max aggregation works", RankFixture, IndexFixture) {
+ f2.set("elementSimilarity(foo).output.default", "max(w)");
+ EXPECT_EQUAL(5.0, f1.get_feature("x", indexFoo().element("x y", 5), DEFAULT, f2));
+ EXPECT_EQUAL(5.0, f1.get_feature("x", indexFoo().element("x y", 5).element("x y", 3), DEFAULT, f2));
+ EXPECT_EQUAL(5.0, f1.get_feature("x", indexFoo().element("x y", 3).element("x y", 5), DEFAULT, f2));
+}
+
+TEST_FF("require that avg aggregation works", RankFixture, IndexFixture) {
+ f2.set("elementSimilarity(foo).output.default", "avg(w)");
+ EXPECT_EQUAL(5.0, f1.get_feature("x", indexFoo().element("x y", 5), DEFAULT, f2));
+ EXPECT_EQUAL(4.0, f1.get_feature("x", indexFoo().element("x y", 5).element("x y", 3), DEFAULT, f2));
+ EXPECT_EQUAL(4.0, f1.get_feature("x", indexFoo().element("x y", 3).element("x y", 5), DEFAULT, f2));
+}
+
+TEST_FF("require that sum aggregation works", RankFixture, IndexFixture) {
+ f2.set("elementSimilarity(foo).output.default", "sum(w)");
+ EXPECT_EQUAL(5.0, f1.get_feature("x", indexFoo().element("x y", 5), DEFAULT, f2));
+ EXPECT_EQUAL(8.0, f1.get_feature("x", indexFoo().element("x y", 5).element("x y", 3), DEFAULT, f2));
+ EXPECT_EQUAL(8.0, f1.get_feature("x", indexFoo().element("x y", 3).element("x y", 5), DEFAULT, f2));
+}
+
+TEST_FF("require that element demultiplexing works", RankFixture, IndexFixture) {
+ f2.set("elementSimilarity(foo).output.default", "sum(q)");
+ EXPECT_EQUAL(sum({0.25, 0.5, 0.5, 0.25, 0.5}),
+ f1.get_feature("x y z t", indexFoo()
+ .element("x")
+ .element("x y")
+ .element("x z")
+ .element("y")
+ .element("x z"), DEFAULT, f2));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/features/euclidean_distance/.gitignore b/searchlib/src/tests/features/euclidean_distance/.gitignore
new file mode 100644
index 00000000000..2d08dd27122
--- /dev/null
+++ b/searchlib/src/tests/features/euclidean_distance/.gitignore
@@ -0,0 +1 @@
+searchlib_euclidean_distance_test_app
diff --git a/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt b/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt
new file mode 100644
index 00000000000..d79aa9572bc
--- /dev/null
+++ b/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_euclidean_distance_test_app
+ SOURCES
+ euclidean_distance_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_euclidean_distance_test_app COMMAND searchlib_euclidean_distance_test_app)
diff --git a/searchlib/src/tests/features/euclidean_distance/FILES b/searchlib/src/tests/features/euclidean_distance/FILES
new file mode 100644
index 00000000000..4ed7d9969b3
--- /dev/null
+++ b/searchlib/src/tests/features/euclidean_distance/FILES
@@ -0,0 +1 @@
+euclidean_distance_test.cpp
diff --git a/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp b/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp
new file mode 100644
index 00000000000..b0d97902728
--- /dev/null
+++ b/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp
@@ -0,0 +1,115 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/fef/test/ftlib.h>
+#include <vespa/searchlib/features/euclidean_distance_feature.h>
+#include <vespa/searchlib/fef/fef.h>
+
+using search::feature_t;
+using namespace search::fef;
+using namespace search::fef::test;
+using namespace search::features;
+using search::AttributeFactory;
+using search::IntegerAttribute;
+using search::FloatingPointAttribute;
+
+typedef search::attribute::Config AVC;
+typedef search::attribute::BasicType AVBT;
+typedef search::attribute::CollectionType AVCT;
+typedef search::AttributeVector::SP AttributePtr;
+typedef FtTestApp FTA;
+
+struct SetupFixture
+{
+ EuclideanDistanceBlueprint blueprint;
+ IndexEnvironment indexEnv;
+ SetupFixture()
+ : blueprint(),
+ indexEnv()
+ {
+ FieldInfo myField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "myAttribute", 1);
+ indexEnv.getFields().push_back(myField);
+ }
+};
+
+TEST_F("require that blueprint can be created from factory", SetupFixture)
+{
+ EXPECT_TRUE(FTA::assertCreateInstance(f.blueprint, "euclideanDistance"));
+}
+
+TEST_F("require that setup succeeds with attribute source", SetupFixture)
+{
+ FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, StringList().add("myAttribute").add("myVector"),
+ StringList(), StringList().add("distance"));
+}
+
+struct ExecFixture
+{
+ BlueprintFactory factory;
+ FtFeatureTest test;
+ ExecFixture(const vespalib::string &feature)
+ : factory(),
+ test(factory, feature)
+ {
+ setup_search_features(factory);
+ setupAttributeVectors();
+ setupQueryEnvironment();
+ ASSERT_TRUE(test.setup());
+ }
+ void setupAttributeVectors() {
+ std::vector<AttributePtr> attrs;
+ attrs.push_back(AttributeFactory::createAttribute("aint", AVC(AVBT::INT32, AVCT::ARRAY)));
+ attrs.push_back(AttributeFactory::createAttribute("afloat", AVC(AVBT::FLOAT, AVCT::ARRAY)));
+
+ test.getIndexEnv().getFields().push_back(FieldInfo(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint", 0));
+ test.getIndexEnv().getFields().push_back(FieldInfo(FieldType::ATTRIBUTE, CollectionType::ARRAY, "afloat", 1));
+
+ for (const auto &attr : attrs) {
+ attr->addReservedDoc();
+ attr->addDocs(1);
+ test.getIndexEnv().getAttributeManager().add(attr);
+ }
+
+ IntegerAttribute *aint = static_cast<IntegerAttribute *>(attrs[0].get());
+ aint->append(1, 1, 0);
+ aint->append(1, -2, 0);
+ aint->append(1, 3, 0);
+
+ FloatingPointAttribute *afloat = static_cast<FloatingPointAttribute *>(attrs[1].get());
+ afloat->append(1, 1.3, 0);
+ afloat->append(1, 1.5, 0);
+ afloat->append(1, -1.7, 0);
+
+ for (const auto &attr : attrs) {
+ attr->commit();
+ }
+ }
+ void setupQueryEnvironment() {
+ test.getQueryEnv().getProperties().add("euclideanDistance.intquery", "[4 5 -6]");
+ test.getQueryEnv().getProperties().add("euclideanDistance.floatquery", "[4.1 15 0.001]");
+ }
+
+};
+
+TEST_F("require that distance is calculated for integer vectors",
+ ExecFixture("euclideanDistance(aint,intquery)"))
+{
+ EXPECT_TRUE(f.test.execute(11.789826, 0.000001));
+}
+
+TEST_F("require that distance is calculated for floating point vectors",
+ ExecFixture("euclideanDistance(afloat,floatquery)"))
+{
+ EXPECT_TRUE(f.test.execute(13.891846, 0.000001));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/features/featurebenchmark.cpp b/searchlib/src/tests/features/featurebenchmark.cpp
new file mode 100644
index 00000000000..14e43fa7d47
--- /dev/null
+++ b/searchlib/src/tests/features/featurebenchmark.cpp
@@ -0,0 +1,657 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("featurebenchmark");
+
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <string>
+#include <boost/tokenizer.hpp>
+
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/features/utils.h>
+#include <vespa/searchlib/fef/functiontablefactory.h>
+#include <vespa/searchlib/fef/test/plugin/setup.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/fef/test/ftlib.h>
+
+using namespace search::features;
+using namespace search::fef;
+using namespace search::fef::test;
+
+using search::AttributeVector;
+using search::AttributeFactory;
+using search::IntegerAttribute;
+using search::StringAttribute;
+
+typedef search::attribute::Config AVC;
+typedef search::attribute::BasicType AVBT;
+typedef search::attribute::CollectionType AVCT;
+
+typedef AttributeVector::SP AttributePtr;
+
+class Benchmark : public FtTestApp {
+public:
+ typedef std::vector<std::pair<vespalib::string, vespalib::string> > KeyValueVector;
+
+ class Config {
+ private:
+ typedef std::map<vespalib::string, vespalib::string> StringMap;
+ StringMap _config;
+
+ bool isKnown(const vespalib::string & key) const;
+
+ public:
+ Config() : _config() {}
+ Config(const vespalib::string & fileName) : _config() {
+ init(fileName);
+ }
+ void init(const vespalib::string & fileName);
+
+ void add(const vespalib::string & key, const vespalib::string & value) {
+ _config[key] = value;
+ }
+
+ void addIfNotFound(const vespalib::string & key, const vespalib::string & value) {
+ if (_config.count(key) == 0) {
+ add(key, value);
+ }
+ }
+
+ // known config values
+ vespalib::string getCase(const vespalib::string & fallback = "") const {
+ return getAsStr("case", fallback);
+ }
+ vespalib::string getFeature(const vespalib::string & fallback = "") const {
+ return getAsStr("feature", fallback);
+ }
+ vespalib::string getIndex(const vespalib::string & fallback = "") const {
+ return getAsStr("index", fallback);
+ }
+ vespalib::string getQuery(const vespalib::string & fallback = "") const {
+ return getAsStr("query", fallback);
+ }
+ vespalib::string getField(const vespalib::string & fallback = "") const {
+ return getAsStr("field", fallback);
+ }
+ uint32_t getNumRuns(uint32_t fallback = 1000) const {
+ return getAsUint32("numruns", fallback);
+ }
+
+ // access "unknown" config values
+ vespalib::string getAsStr(const vespalib::string & key, const vespalib::string & fallback = "") const {
+ StringMap::const_iterator itr = _config.find(key);
+ if (itr != _config.end()) {
+ return vespalib::string(itr->second);
+ }
+ return vespalib::string(fallback);
+ }
+ uint32_t getAsUint32(const vespalib::string & key, uint32_t fallback = 0) const {
+ return util::strToNum<uint32_t>(getAsStr(key, vespalib::make_string("%u", fallback)));
+ }
+ double getAsDouble(const vespalib::string & key, double fallback = 0) const {
+ return util::strToNum<double>(getAsStr(key, vespalib::make_string("%f", fallback)));
+ }
+
+ KeyValueVector getUnknown() const;
+
+ friend std::ostream & operator << (std::ostream & os, const Config & cfg);
+ };
+
+private:
+ search::fef::BlueprintFactory _factory;
+ FastOS_Time _timer;
+ double _sample;
+
+ void start() { _timer.SetNow(); }
+ void sample() { _sample = _timer.MilliSecsToNow(); }
+ void setupPropertyMap(Properties & props, const KeyValueVector & values);
+ void runFieldMatch(Config & cfg);
+ void runRankingExpression(Config & cfg);
+
+ AttributePtr createAttributeVector(AVBT dt, const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs,
+ AttributeVector::largeint_t value, uint32_t valueCount);
+ AttributePtr createAttributeVector(const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs,
+ AttributeVector::largeint_t value, uint32_t valueCount);
+ AttributePtr createStringAttributeVector(const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs,
+ const std::vector<vespalib::string> & values);
+ void runAttributeMatch(Config & cfg);
+ void runAttribute(Config & cfg);
+ void runDotProduct(Config & cfg);
+ void runNativeAttributeMatch(Config & cfg);
+ void runNativeFieldMatch(Config & cfg);
+ void runNativeProximity(Config & cfg);
+
+public:
+ Benchmark() : _factory(), _timer(), _sample() {}
+ int Main();
+
+};
+
+TEST_APPHOOK(Benchmark);
+
+
+bool
+Benchmark::Config::isKnown(const vespalib::string & key) const
+{
+ if (key == vespalib::string("case") ||
+ key == vespalib::string("feature") ||
+ key == vespalib::string("index") ||
+ key == vespalib::string("query") ||
+ key == vespalib::string("field") ||
+ key == vespalib::string("numruns"))
+ {
+ return true;
+ }
+ return false;
+}
+
+void
+Benchmark::Config::init(const vespalib::string & fileName)
+{
+ std::ifstream is(fileName.c_str());
+ if (is.fail()) {
+ throw std::runtime_error(fileName);
+ }
+
+ while (is.good()) {
+ std::string line;
+ std::getline(is, line);
+ if (!line.empty()) {
+ std::vector<vespalib::string> values = FtUtil::tokenize(line, "=");
+ LOG_ASSERT(values.size() == 2);
+ add(values[0], values[1]);
+ }
+ }
+}
+
+Benchmark::KeyValueVector
+Benchmark::Config::getUnknown() const
+{
+ KeyValueVector retval;
+ for (StringMap::const_iterator itr = _config.begin(); itr != _config.end(); ++itr) {
+ if (!isKnown(itr->first)) {
+ retval.push_back(std::make_pair(itr->first, itr->second));
+ }
+ }
+ return retval;
+}
+
+std::ostream & operator << (std::ostream & os, const Benchmark::Config & cfg)
+{
+ std::cout << "getCase: '" << cfg.getCase() << "'" << std::endl;
+ std::cout << "getFeature: '" << cfg.getFeature() << "'" << std::endl;
+ std::cout << "getIndex: '" << cfg.getIndex() << "'" << std::endl;
+ std::cout << "getQuery: '" << cfg.getQuery() << "'" << std::endl;
+ std::cout << "getField: '" << cfg.getField() << "'" << std::endl;
+ std::cout << "getNumRuns: '" << cfg.getNumRuns() << "'" << std::endl;
+
+ for (StringMap::const_iterator itr = cfg._config.begin(); itr != cfg._config.end(); ++itr) {
+ os << "'" << itr->first << "'='" << itr->second << "'" << std::endl;
+ }
+ return os;
+}
+
+
+void
+Benchmark::setupPropertyMap(Properties & props, const KeyValueVector & values)
+{
+ std::cout << "**** setup property map ****" << std::endl;
+ for (uint32_t i = 0; i < values.size(); ++i) {
+ std::cout << "'" << values[i].first << "'='" << values[i].second << "'" << std::endl;
+ props.add(values[i].first, values[i].second);
+ }
+ std::cout << "**** setup property map ****" << std::endl;
+}
+
+void
+Benchmark::runFieldMatch(Config & cfg)
+{
+ cfg.addIfNotFound("feature", "fieldMatch(foo)");
+ cfg.addIfNotFound("index", "foo");
+ cfg.addIfNotFound("query", "a b c d");
+ cfg.addIfNotFound("field", "a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d");
+
+ std::cout << "**** config ****" << std::endl;
+ std::cout << cfg << std::endl;
+ std::cout << "**** config ****" << std::endl;
+
+ vespalib::string feature = cfg.getFeature();
+ vespalib::string index = cfg.getIndex();
+ vespalib::string query = cfg.getQuery();
+ vespalib::string field = cfg.getField();
+ uint32_t numRuns = cfg.getNumRuns();
+
+ FtFeatureTest ft(_factory, feature);
+
+ setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown());
+ setupFieldMatch(ft, index, query, field, NULL, 0, 0.0f, 0);
+
+ start();
+ std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl;
+ for (uint32_t i = 0; i < numRuns; ++i) {
+ ft.executeOnly(0);
+ }
+ sample();
+}
+
+void
+Benchmark::runRankingExpression(Config & cfg)
+{
+ cfg.addIfNotFound("feature", "rankingExpression");
+ cfg.addIfNotFound("rankingExpression.rankingScript", "1 + 1 + 1 + 1");
+
+ std::cout << "**** config ****" << std::endl;
+ std::cout << cfg << std::endl;
+ std::cout << "**** config ****" << std::endl;
+
+ vespalib::string feature = cfg.getFeature();
+ uint32_t numRuns = cfg.getNumRuns();
+
+ FtFeatureTest ft(_factory, feature);
+ setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown());
+ ASSERT_TRUE(ft.setup());
+
+ start();
+ std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl;
+ for (uint32_t i = 0; i < numRuns; ++i) {
+ ft.executeOnly(0);
+ }
+ sample();
+}
+
+AttributePtr
+Benchmark::createAttributeVector(const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs,
+ AttributeVector::largeint_t value, uint32_t valueCount)
+{
+ return createAttributeVector(AVBT::INT32, name, ctype, numDocs, value, valueCount);
+}
+
+AttributePtr
+Benchmark::createAttributeVector(AVBT dt, const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs,
+ AttributeVector::largeint_t value, uint32_t valueCount)
+{
+ AttributePtr a;
+ if (ctype == "single") {
+ a = AttributeFactory::createAttribute(name, AVC(dt, AVCT::SINGLE));
+ std::cout << "create single int32" << std::endl;
+ } else if (ctype == "array") {
+ a = AttributeFactory::createAttribute(name, AVC(dt, AVCT::ARRAY));
+ std::cout << "create array int32" << std::endl;
+ } else if (ctype == "wset") {
+ a = AttributeFactory::createAttribute(name, AVC(dt, AVCT::WSET));
+ std::cout << "create wset int32" << std::endl;
+ }
+
+ a->addDocs(numDocs);
+ IntegerAttribute * ia = static_cast<IntegerAttribute *>(a.get());
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ if (ctype == "single") {
+ ia->update(i, value);
+ } else {
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ if (ctype == "array") {
+ ia->append(i, value, 0);
+ } else {
+ ia->append(i, value + j, j);
+ }
+ }
+ }
+ }
+
+ a->commit();
+ return a;
+}
+
+AttributePtr
+Benchmark::createStringAttributeVector(const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs,
+ const std::vector<vespalib::string> & values)
+{
+ AttributePtr a;
+ if (ctype == "single") {
+ a = AttributeFactory::createAttribute(name, AVC(AVBT::STRING, AVCT::SINGLE));
+ std::cout << "create single string" << std::endl;
+ } else if (ctype == "array") {
+ a = AttributeFactory::createAttribute(name, AVC(AVBT::STRING, AVCT::ARRAY));
+ std::cout << "create array string" << std::endl;
+ } else if (ctype == "wset") {
+ a = AttributeFactory::createAttribute(name, AVC(AVBT::STRING, AVCT::WSET));
+ std::cout << "create wset string" << std::endl;
+ }
+
+ a->addDocs(numDocs);
+ StringAttribute * sa = static_cast<StringAttribute *>(a.get());
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ if (ctype == "single") {
+ sa->update(i, values[0]);
+ } else {
+ for (uint32_t j = 0; j < values.size(); ++j) {
+ sa->append(i, values[j], j);
+ }
+ }
+ }
+
+ a->commit();
+ return a;
+}
+
+void
+Benchmark::runAttributeMatch(Config & cfg)
+{
+ cfg.addIfNotFound("feature", "attributeMatch(foo)");
+
+ std::cout << "**** config ****" << std::endl;
+ std::cout << cfg << std::endl;
+ std::cout << "**** config ****" << std::endl;
+
+ vespalib::string feature = cfg.getFeature();
+ uint32_t numRuns = 1000000;
+ uint32_t numDocs = 1000000;
+
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getAttributeManager().add(createAttributeVector("foo", "single", numDocs, 10, 10));
+ ft.getQueryEnv().getBuilder().addAttributeNode("foo");
+ setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown());
+ ASSERT_TRUE(ft.setup());
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setWeight("foo", 0, 0);
+ mdb->apply(0);
+ TermFieldMatchData *amd = mdb->getTermFieldMatchData(0, 0);
+
+ start();
+ std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl;
+ for (uint32_t i = 0; i < numRuns; ++i) {
+ {
+ amd->reset(0); // preserve old behavior
+ TermFieldMatchDataPosition pos;
+ pos.setElementWeight(i % numDocs);
+ amd->appendPosition(pos);
+ }
+ ft.executeOnly(i % numDocs);
+ }
+ sample();
+}
+
+void
+Benchmark::runAttribute(Config & cfg)
+{
+ cfg.addIfNotFound("feature", "attribute(foo,str4)");
+ cfg.addIfNotFound("numruns", "10000000");
+
+ std::cout << "**** config ****" << std::endl;
+ std::cout << cfg << std::endl;
+ std::cout << "**** config ****" << std::endl;
+
+ vespalib::string feature = cfg.getFeature();
+ uint32_t numRuns = cfg.getNumRuns();
+ uint32_t numDocs = cfg.getAsUint32("numdocs", 1000);
+ StringList values;
+ values.add("str0").add("str1").add("str2").add("str3").add("str4")
+ .add("str5").add("str6").add("str7").add("str8").add("str9");
+
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "foo");
+ ft.getIndexEnv().getAttributeManager().add(createStringAttributeVector("foo", "wset", numDocs, values));
+ ASSERT_TRUE(ft.setup());
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+
+ start();
+ std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl;
+ for (uint32_t i = 0; i < numRuns; ++i) {
+ ft.executeOnly(i % numDocs);
+ }
+ sample();
+}
+
+void
+Benchmark::runDotProduct(Config & cfg)
+{
+ cfg.addIfNotFound("feature", "dotProduct(wsstr,vector)");
+ cfg.addIfNotFound("numruns", "1000000");
+ cfg.addIfNotFound("numdocs", "1000");
+ cfg.addIfNotFound("numvalues", "10");
+
+ std::cout << "**** config ****" << std::endl;
+ std::cout << cfg << std::endl;
+ std::cout << "**** config ****" << std::endl;
+
+ vespalib::string feature = cfg.getFeature();
+ vespalib::string collectionType = cfg.getAsStr("collectiontype", "wset");
+ vespalib::string dataType = cfg.getAsStr("datatype", "string");
+ uint32_t numRuns = cfg.getNumRuns();
+ uint32_t numDocs = cfg.getAsUint32("numdocs", 1000);
+ uint32_t numValues = cfg.getAsUint32("numvalues", 10);
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE,
+ collectionType == "wset" ? CollectionType::WEIGHTEDSET : CollectionType::ARRAY,
+ "wsstr");
+ if (dataType == "string") {
+ StringList values;
+ for (uint32_t i = 0; i < numValues; ++i) {
+ values.add(vespalib::make_string("str%u", i));
+ }
+
+ ft.getIndexEnv().getAttributeManager().add(createStringAttributeVector("wsstr", collectionType, numDocs, values));
+ } else if (dataType == "int") {
+ ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::INT32, "wsstr", collectionType, numDocs, 0, numValues));
+ } else if (dataType == "long") {
+ ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::INT64, "wsstr", collectionType, numDocs, 0, numValues));
+ } else if (dataType == "float") {
+ ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::FLOAT, "wsstr", collectionType, numDocs, 0, numValues));
+ } else if (dataType == "double") {
+ ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::DOUBLE, "wsstr", collectionType, numDocs, 0, numValues));
+ } else {
+ std::cerr << "Illegal data type '" << dataType << std::endl;
+ }
+ ft.getQueryEnv().getProperties().add("dotProduct.vector", cfg.getAsStr("dotProduct.vector", "(str0:1)"));
+ ASSERT_TRUE(ft.setup());
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+
+ start();
+ std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl;
+ for (uint32_t i = 0; i < numRuns; ++i) {
+ ft.executeOnly(i % numDocs);
+ }
+ sample();
+}
+
+void
+Benchmark::runNativeAttributeMatch(Config & cfg)
+{
+ cfg.addIfNotFound("feature", "nativeAttributeMatch(foo)");
+ cfg.addIfNotFound("numruns", "10000000");
+ cfg.addIfNotFound("numdocs", "1000000");
+
+ std::cout << "**** config ****" << std::endl;
+ std::cout << cfg << std::endl;
+ std::cout << "**** config ****" << std::endl;
+
+ vespalib::string feature = cfg.getFeature();
+ uint32_t numRuns = cfg.getNumRuns();
+ uint32_t numDocs = cfg.getAsUint32("numdocs");
+
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(256))); // same as backend
+ ft.getQueryEnv().getBuilder().addAttributeNode("foo")->setWeight(search::query::Weight(100));
+ setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown());
+ ASSERT_TRUE(ft.setup());
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setWeight("foo", 0, 0);
+ mdb->apply(0);
+
+ TermFieldMatchData *amd = mdb->getTermFieldMatchData(0, 0);
+
+ start();
+ std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl;
+ for (uint32_t i = 0; i < numRuns; ++i) {
+ uint32_t docId = i % numDocs;
+ {
+ amd->reset(docId);
+ TermFieldMatchDataPosition pos;
+ pos.setElementWeight(docId);
+ amd->appendPosition(pos);
+ }
+ ft.executeOnly(docId);
+ }
+ sample();
+}
+
+void
+Benchmark::runNativeFieldMatch(Config & cfg)
+{
+ cfg.addIfNotFound("feature", "nativeFieldMatch(foo)");
+ cfg.addIfNotFound("numruns", "10000000");
+
+ std::cout << "**** config ****" << std::endl;
+ std::cout << cfg << std::endl;
+ std::cout << "**** config ****" << std::endl;
+
+ vespalib::string feature = cfg.getFeature();
+ uint32_t numRuns = cfg.getNumRuns();
+
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(256))); // same as backend
+ std::vector<vespalib::string> searchedFields;
+ searchedFields.push_back("foo");
+ ft.getQueryEnv().getBuilder().addIndexNode(searchedFields);
+ setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown());
+ ASSERT_TRUE(ft.setup());
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+
+ // setup occurrence data
+ mdb->setFieldLength("foo", 100);
+ mdb->addOccurence("foo", 0, 2);
+ mdb->addOccurence("foo", 0, 8);
+ mdb->addOccurence("foo", 0, 32);
+ mdb->addOccurence("foo", 0, 64);
+ ASSERT_TRUE(mdb->apply(0));
+
+ start();
+ std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl;
+ for (uint32_t i = 0; i < numRuns; ++i) {
+ ft.executeOnly(0);
+ }
+ sample();
+}
+
+void
+Benchmark::runNativeProximity(Config & cfg)
+{
+ cfg.addIfNotFound("feature", "nativeProximity(foo)");
+ cfg.addIfNotFound("numruns", "10000000");
+
+ std::cout << "**** config ****" << std::endl;
+ std::cout << cfg << std::endl;
+ std::cout << "**** config ****" << std::endl;
+
+ vespalib::string feature = cfg.getFeature();
+ uint32_t numRuns = cfg.getNumRuns();
+
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(256))); // same as backend
+ std::vector<vespalib::string> searchedFields;
+ searchedFields.push_back("foo");
+ ft.getQueryEnv().getBuilder().addIndexNode(searchedFields); // termId 0
+ ft.getQueryEnv().getBuilder().addIndexNode(searchedFields); // termId 1
+ setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown());
+ ASSERT_TRUE(ft.setup());
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+
+ // setup occurrence data
+ mdb->setFieldLength("foo", 100);
+ mdb->addOccurence("foo", 0, 2);
+ mdb->addOccurence("foo", 0, 16);
+ mdb->addOccurence("foo", 0, 32);
+ mdb->addOccurence("foo", 1, 6);
+ mdb->addOccurence("foo", 1, 12);
+ mdb->addOccurence("foo", 1, 30);
+ ASSERT_TRUE(mdb->apply(0));
+
+ start();
+ std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl;
+ for (uint32_t i = 0; i < numRuns; ++i) {
+ ft.executeOnly(0);
+ }
+ sample();
+}
+
+int
+Benchmark::Main()
+{
+ TEST_INIT("featurebenchmark");
+
+ // Configure factory with all known blueprints.
+ setup_fef_test_plugin(_factory);
+ setup_search_features(_factory);
+
+ int idx = 1;
+ char opt;
+ const char * arg;
+ bool optError = false;
+ vespalib::string file;
+ vespalib::string feature;
+ while ((opt = GetOpt("c:f:", arg, idx)) != -1) {
+ switch (opt) {
+ case 'c':
+ file.assign(arg);
+ break;
+ case 'f':
+ feature.assign(arg);
+ break;
+ default:
+ optError = true;
+ break;
+ }
+ }
+
+ if (_argc != idx || optError) {
+ //usage();
+ return -1;
+ }
+
+ Config cfg;
+ if (file.empty()) {
+ cfg.add("case", feature);
+ } else {
+ cfg.init(file);
+ }
+
+ if (cfg.getCase() == vespalib::string("fieldMatch")) {
+ runFieldMatch(cfg);
+ } else if (cfg.getCase() == vespalib::string("rankingExpression")) {
+ runRankingExpression(cfg);
+ } else if (cfg.getCase() == vespalib::string("attributeMatch")) {
+ runAttributeMatch(cfg);
+ } else if (cfg.getCase() == vespalib::string("attribute")) {
+ runAttribute(cfg);
+ } else if (cfg.getCase() == vespalib::string("dotProduct")) {
+ runDotProduct(cfg);
+ } else if (cfg.getCase() == vespalib::string("nativeAttributeMatch")) {
+ runNativeAttributeMatch(cfg);
+ } else if (cfg.getCase() == vespalib::string("nativeFieldMatch")) {
+ runNativeFieldMatch(cfg);
+ } else if (cfg.getCase() == vespalib::string("nativeProximity")) {
+ runNativeProximity(cfg);
+ } else {
+ std::cout << "feature case '" << cfg.getCase() << "' is not known" << std::endl;
+ }
+
+ std::cout << "TET: " << _sample << " (ms)" << std::endl;
+ std::cout << "ETPD: " << std::fixed << std::setprecision(10) << _sample / cfg.getNumRuns() << " (ms)" << std::endl;
+ std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl;
+
+ TEST_DONE();
+ return 0;
+}
+
diff --git a/searchlib/src/tests/features/item_raw_score/.gitignore b/searchlib/src/tests/features/item_raw_score/.gitignore
new file mode 100644
index 00000000000..29711c1533d
--- /dev/null
+++ b/searchlib/src/tests/features/item_raw_score/.gitignore
@@ -0,0 +1 @@
+searchlib_item_raw_score_test_app
diff --git a/searchlib/src/tests/features/item_raw_score/CMakeLists.txt b/searchlib/src/tests/features/item_raw_score/CMakeLists.txt
new file mode 100644
index 00000000000..24ef339133c
--- /dev/null
+++ b/searchlib/src/tests/features/item_raw_score/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_item_raw_score_test_app
+ SOURCES
+ item_raw_score_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_item_raw_score_test_app COMMAND searchlib_item_raw_score_test_app)
diff --git a/searchlib/src/tests/features/item_raw_score/FILES b/searchlib/src/tests/features/item_raw_score/FILES
new file mode 100644
index 00000000000..bce307ff6c1
--- /dev/null
+++ b/searchlib/src/tests/features/item_raw_score/FILES
@@ -0,0 +1 @@
+item_raw_score_test.cpp
diff --git a/searchlib/src/tests/features/item_raw_score/item_raw_score_test.cpp b/searchlib/src/tests/features/item_raw_score/item_raw_score_test.cpp
new file mode 100644
index 00000000000..20f9449062d
--- /dev/null
+++ b/searchlib/src/tests/features/item_raw_score/item_raw_score_test.cpp
@@ -0,0 +1,158 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/features/item_raw_score_feature.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+
+using search::feature_t;
+using namespace search::fef;
+using namespace search::fef::test;
+using namespace search::features;
+
+const vespalib::string featureName("itemRawScore(label)");
+
+struct BlueprintFactoryFixture {
+ BlueprintFactory factory;
+ BlueprintFactoryFixture() : factory()
+ {
+ setup_search_features(factory);
+ }
+};
+
+struct IndexFixture {
+ IndexEnvironment indexEnv;
+ IndexFixture() : indexEnv()
+ {
+ IndexEnvironmentBuilder builder(indexEnv);
+ builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar");
+ }
+};
+
+struct FeatureDumpFixture : public IDumpFeatureVisitor {
+ virtual void visitDumpFeature(const vespalib::string &) {
+ TEST_ERROR("no features should be dumped");
+ }
+ FeatureDumpFixture() : IDumpFeatureVisitor() {}
+};
+
+struct Labels {
+ virtual void inject(Properties &p) const = 0;
+ virtual ~Labels() {}
+};
+struct NoLabel : public Labels {
+ virtual void inject(Properties &) const {}
+};
+struct SingleLabel : public Labels {
+ vespalib::string label;
+ uint32_t uid;
+ SingleLabel(const vespalib::string &l, uint32_t x) : label(l), uid(x) {}
+ virtual void inject(Properties &p) const {
+ vespalib::asciistream key;
+ key << "vespa.label." << label << ".id";
+ vespalib::asciistream value;
+ value << uid;
+ p.add(key.str(), value.str());
+ }
+};
+
+struct RankFixture : BlueprintFactoryFixture, IndexFixture {
+ QueryEnvironment queryEnv;
+ RankSetup rankSetup;
+ RankProgram::UP rankProgram;
+ MatchDataLayout mdl;
+ std::vector<TermFieldHandle> fooHandles;
+ std::vector<TermFieldHandle> barHandles;
+ RankFixture(size_t fooCnt, size_t barCnt, const Labels &labels)
+ : queryEnv(&indexEnv), rankSetup(factory, indexEnv),
+ rankProgram(), mdl(), fooHandles(), barHandles()
+ {
+ for (size_t i = 0; i < fooCnt; ++i) {
+ uint32_t fieldId = indexEnv.getFieldByName("foo")->id();
+ fooHandles.push_back(mdl.allocTermField(fieldId));
+ SimpleTermData term;
+ term.setUniqueId(i + 1);
+ term.addField(fieldId).setHandle(fooHandles.back());
+ queryEnv.getTerms().push_back(term);
+ }
+ for (size_t i = 0; i < barCnt; ++i) {
+ uint32_t fieldId = indexEnv.getFieldByName("bar")->id();
+ barHandles.push_back(mdl.allocTermField(fieldId));
+ SimpleTermData term;
+ term.setUniqueId(fooCnt + i + 1);
+ term.addField(fieldId).setHandle(barHandles.back());
+ queryEnv.getTerms().push_back(term);
+ }
+ labels.inject(queryEnv.getProperties());
+ rankSetup.setFirstPhaseRank(featureName);
+ rankSetup.setIgnoreDefaultRankFeatures(true);
+ ASSERT_TRUE(rankSetup.compile());
+ rankProgram = rankSetup.create_first_phase_program();
+ rankProgram->setup(mdl, queryEnv);
+ }
+ feature_t getScore(uint32_t docId) {
+ rankProgram->run(docId);
+ return *Utils::getScoreFeature(*rankProgram);
+ }
+ void setScore(TermFieldHandle handle, uint32_t docId, feature_t score) {
+ rankProgram->match_data().resolveTermField(handle)->setRawScore(docId, score);
+ }
+ void setFooScore(uint32_t i, uint32_t docId, feature_t score) {
+ ASSERT_LESS(i, fooHandles.size());
+ setScore(fooHandles[i], docId, score);
+ }
+ void setBarScore(uint32_t i, uint32_t docId, feature_t score) {
+ ASSERT_LESS(i, barHandles.size());
+ setScore(barHandles[i], docId, score);
+ }
+};
+
+TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) {
+ Blueprint::SP bp = f.factory.createBlueprint("itemRawScore");
+ EXPECT_TRUE(bp.get() != 0);
+ EXPECT_TRUE(dynamic_cast<ItemRawScoreBlueprint*>(bp.get()) != 0);
+}
+
+TEST_FFF("require that no features are dumped", ItemRawScoreBlueprint, IndexFixture, FeatureDumpFixture) {
+ f1.visitDumpFeatures(f2.indexEnv, f3);
+}
+
+TEST_FF("require that setup can be done on random label", ItemRawScoreBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(random_label)", f1.getBaseName().c_str()));
+ EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "random_label")));
+}
+
+TEST_FF("require that no label gives 0.0 item raw score", NoLabel(), RankFixture(2, 2, f1)) {
+ EXPECT_EQUAL(0.0, f2.getScore(10));
+}
+
+TEST_FF("require that unrelated label gives 0.0 item raw score", SingleLabel("unrelated", 1), RankFixture(2, 2, f1)) {
+ EXPECT_EQUAL(0.0, f2.getScore(10));
+}
+
+TEST_FF("require that item raw score can be obtained", SingleLabel("label", 1), RankFixture(2, 2, f1)) {
+ f2.setFooScore(0, 10, 5.0);
+ EXPECT_EQUAL(5.0, f2.getScore(10));
+}
+
+TEST_FF("require that other raw scores are ignored", SingleLabel("label", 2), RankFixture(2, 2, f1)) {
+ f2.setFooScore(0, 10, 1.0);
+ f2.setFooScore(1, 10, 2.0);
+ f2.setBarScore(0, 10, 5.0);
+ f2.setBarScore(1, 10, 6.0);
+ EXPECT_EQUAL(2.0, f2.getScore(10));
+}
+
+TEST_FF("require that stale raw score is ignored", SingleLabel("label", 2), RankFixture(2, 2, f1)) {
+ f2.setFooScore(0, 10, 1.0);
+ f2.setFooScore(1, 5, 2.0);
+ EXPECT_EQUAL(0.0, f2.getScore(10));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/features/native_dot_product/.gitignore b/searchlib/src/tests/features/native_dot_product/.gitignore
new file mode 100644
index 00000000000..d95f15f4492
--- /dev/null
+++ b/searchlib/src/tests/features/native_dot_product/.gitignore
@@ -0,0 +1 @@
+searchlib_native_dot_product_test_app
diff --git a/searchlib/src/tests/features/native_dot_product/CMakeLists.txt b/searchlib/src/tests/features/native_dot_product/CMakeLists.txt
new file mode 100644
index 00000000000..2dad758c82d
--- /dev/null
+++ b/searchlib/src/tests/features/native_dot_product/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_native_dot_product_test_app
+ SOURCES
+ native_dot_product_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_native_dot_product_test_app COMMAND searchlib_native_dot_product_test_app)
diff --git a/searchlib/src/tests/features/native_dot_product/FILES b/searchlib/src/tests/features/native_dot_product/FILES
new file mode 100644
index 00000000000..ab007656448
--- /dev/null
+++ b/searchlib/src/tests/features/native_dot_product/FILES
@@ -0,0 +1 @@
+native_dot_product_test.cpp
diff --git a/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp b/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp
new file mode 100644
index 00000000000..3e3702cceec
--- /dev/null
+++ b/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp
@@ -0,0 +1,191 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/features/native_dot_product_feature.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/query/weight.h>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+
+using search::feature_t;
+using namespace search::fef;
+using namespace search::fef::test;
+using namespace search::features;
+
+const std::string featureName("nativeDotProduct(foo)");
+
+struct BlueprintFactoryFixture {
+ BlueprintFactory factory;
+ BlueprintFactoryFixture() : factory()
+ {
+ setup_search_features(factory);
+ }
+};
+
+struct IndexFixture {
+ IndexEnvironment indexEnv;
+ IndexFixture() : indexEnv()
+ {
+ IndexEnvironmentBuilder builder(indexEnv);
+ builder.addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "foo");
+ builder.addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar");
+ }
+};
+
+struct FeatureDumpFixture : public IDumpFeatureVisitor {
+ virtual void visitDumpFeature(const vespalib::string &) {
+ TEST_ERROR("no features should be dumped");
+ }
+ FeatureDumpFixture() : IDumpFeatureVisitor() {}
+};
+
+std::vector<uint32_t> vec() {
+ std::vector<uint32_t> ret;
+ return ret;
+}
+
+std::vector<uint32_t> vec(uint32_t w1) {
+ std::vector<uint32_t> ret;
+ ret.push_back(w1);
+ return ret;
+}
+
+std::vector<uint32_t> vec(uint32_t w1, uint32_t w2) {
+ std::vector<uint32_t> ret;
+ ret.push_back(w1);
+ ret.push_back(w2);
+ return ret;
+}
+
+std::vector<uint32_t> vec(uint32_t w1, uint32_t w2, uint32_t w3) {
+ std::vector<uint32_t> ret;
+ ret.push_back(w1);
+ ret.push_back(w2);
+ ret.push_back(w3);
+ return ret;
+}
+
+struct RankFixture : BlueprintFactoryFixture, IndexFixture {
+ QueryEnvironment queryEnv;
+ RankSetup rankSetup;
+ RankProgram::UP rankProgram;
+ MatchDataLayout mdl;
+ std::vector<TermFieldHandle> fooHandles;
+ std::vector<TermFieldHandle> barHandles;
+ RankFixture(const std::vector<uint32_t> &fooWeights,
+ const std::vector<uint32_t> &barWeights)
+ : queryEnv(&indexEnv), rankSetup(factory, indexEnv),
+ rankProgram(), mdl(), fooHandles(), barHandles()
+ {
+ for (size_t i = 0; i < fooWeights.size(); ++i) {
+ uint32_t fieldId = indexEnv.getFieldByName("foo")->id();
+ fooHandles.push_back(mdl.allocTermField(fieldId));
+ SimpleTermData term;
+ term.addField(fieldId).setHandle(fooHandles.back());
+ term.setWeight(search::query::Weight(fooWeights[i]));
+ queryEnv.getTerms().push_back(term);
+ }
+ for (size_t i = 0; i < barWeights.size(); ++i) {
+ uint32_t fieldId = indexEnv.getFieldByName("bar")->id();
+ barHandles.push_back(mdl.allocTermField(fieldId));
+ SimpleTermData term;
+ term.addField(fieldId).setHandle(barHandles.back());
+ term.setWeight(search::query::Weight(barWeights[i]));
+ queryEnv.getTerms().push_back(term);
+ }
+ rankSetup.setFirstPhaseRank(featureName);
+ rankSetup.setIgnoreDefaultRankFeatures(true);
+ ASSERT_TRUE(rankSetup.compile());
+ rankProgram = rankSetup.create_first_phase_program();
+ rankProgram->setup(mdl, queryEnv);
+ }
+ feature_t getScore(uint32_t docId) {
+ rankProgram->run(docId);
+ return *Utils::getScoreFeature(*rankProgram);
+ }
+ void setFooWeight(uint32_t i, uint32_t docId, int32_t index_weight) {
+ ASSERT_LESS(i, fooHandles.size());
+ TermFieldMatchDataPosition pos;
+ pos.setElementWeight(index_weight);
+ rankProgram->match_data().resolveTermField(fooHandles[i])->reset(docId);
+ rankProgram->match_data().resolveTermField(fooHandles[i])->appendPosition(pos);
+ }
+ void setBarWeight(uint32_t i, uint32_t docId, int32_t index_weight) {
+ ASSERT_LESS(i, barHandles.size());
+ TermFieldMatchDataPosition pos;
+ pos.setElementWeight(index_weight);
+ rankProgram->match_data().resolveTermField(barHandles[i])->reset(docId);
+ rankProgram->match_data().resolveTermField(barHandles[i])->appendPosition(pos);
+ }
+};
+
+TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) {
+ Blueprint::SP bp = f.factory.createBlueprint("nativeDotProduct");
+ EXPECT_TRUE(bp.get() != 0);
+ EXPECT_TRUE(dynamic_cast<NativeDotProductBlueprint*>(bp.get()) != 0);
+}
+
+TEST_FFF("require that no features are dumped", NativeDotProductBlueprint, IndexFixture, FeatureDumpFixture) {
+ f1.visitDumpFeatures(f2.indexEnv, f3);
+}
+
+TEST_FF("require that setup can be done on index field", NativeDotProductBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str()));
+ EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "foo")));
+}
+
+TEST_FF("require that setup can be done on attribute field", NativeDotProductBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(bar)", f1.getBaseName().c_str()));
+ EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "bar")));
+}
+
+TEST_FF("require that setup fails for unknown field", NativeDotProductBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(unknown)", f1.getBaseName().c_str()));
+ EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "unknown")));
+}
+
+TEST_F("require that not searching a field will give it 0.0 dot product", RankFixture(vec(), vec(1, 2, 3))) {
+ EXPECT_EQUAL(0.0, f1.getScore(10));
+}
+
+TEST_F("require that dot product works for single match", RankFixture(vec(5), vec())) {
+ f1.setFooWeight(0, 10, 7);
+ EXPECT_EQUAL(35, f1.getScore(10));
+}
+
+TEST_F("require that dot product works for multiple matches", RankFixture(vec(1, 3, 5), vec())) {
+ f1.setFooWeight(0, 10, 2);
+ f1.setFooWeight(1, 10, 4);
+ f1.setFooWeight(2, 10, 6);
+ EXPECT_EQUAL(44, f1.getScore(10));
+}
+
+TEST_F("require that stale data is ignored", RankFixture(vec(1, 3, 5), vec())) {
+ f1.setFooWeight(0, 10, 2);
+ f1.setFooWeight(1, 9, 4);
+ f1.setFooWeight(2, 10, 6);
+ EXPECT_EQUAL(32, f1.getScore(10));
+}
+
+TEST_F("require that data from other fields is ignored", RankFixture(vec(1, 3), vec(5, 7))) {
+ f1.setFooWeight(0, 10, 2);
+ f1.setFooWeight(1, 10, 4);
+ f1.setBarWeight(0, 10, 6);
+ f1.setBarWeight(1, 10, 8);
+ EXPECT_EQUAL(14, f1.getScore(10));
+}
+
+TEST_F("require that negative weights in the index works", RankFixture(vec(1, 3), vec())) {
+ f1.setFooWeight(0, 10, 2);
+ f1.setFooWeight(1, 10, -4);
+ EXPECT_EQUAL(-10, f1.getScore(10));
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp
new file mode 100644
index 00000000000..b0bac4b576d
--- /dev/null
+++ b/searchlib/src/tests/features/prod_features.cpp
@@ -0,0 +1,1937 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("prod_features_test");
+
+#include "prod_features.h"
+#include <boost/tokenizer.hpp>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/searchlib/features/agefeature.h>
+#include <vespa/searchlib/features/array_parser.hpp>
+#include <vespa/searchlib/features/attributefeature.h>
+#include <vespa/searchlib/features/attributematchfeature.h>
+#include <vespa/searchlib/features/closenessfeature.h>
+#include <vespa/searchlib/features/distancefeature.h>
+#include <vespa/searchlib/features/dotproductfeature.h>
+#include <vespa/searchlib/features/fieldlengthfeature.h>
+#include <vespa/searchlib/features/fieldmatchfeature.h>
+#include <vespa/searchlib/features/fieldtermmatchfeature.h>
+#include <vespa/searchlib/features/firstphasefeature.h>
+#include <vespa/searchlib/features/foreachfeature.h>
+#include <vespa/searchlib/features/freshnessfeature.h>
+#include <vespa/searchlib/features/matchesfeature.h>
+#include <vespa/searchlib/features/matchfeature.h>
+#include <vespa/searchlib/features/nowfeature.h>
+#include <vespa/searchlib/features/queryfeature.h>
+#include <vespa/searchlib/features/querytermcountfeature.h>
+#include <vespa/searchlib/features/randomfeature.h>
+#include <vespa/searchlib/features/rankingexpressionfeature.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/features/termfeature.h>
+#include <vespa/searchlib/features/utils.h>
+#include <vespa/searchlib/features/valuefeature.h>
+#include <vespa/searchlib/features/weighted_set_parser.hpp>
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/queryproperties.h>
+#include <vespa/searchlib/fef/test/plugin/setup.h>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+#include <vespa/searchlib/util/rand48.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/geo/zcurve.h>
+#include <vespa/vespalib/util/string_hash.h>
+
+using namespace search::features;
+using namespace search::fef;
+using namespace search::fef::test;
+
+using search::AttributeVector;
+using search::AttributeFactory;
+using search::IntegerAttribute;
+using search::FloatingPointAttribute;
+using search::StringAttribute;
+using search::WeightedSetStringExtAttribute;
+using search::attribute::WeightedEnumContent;
+
+typedef AttributeVector::SP AttributePtr;
+
+typedef search::attribute::Config AVC;
+typedef search::attribute::BasicType AVBT;
+typedef search::attribute::CollectionType AVCT;
+
+const double EPS = 10e-6;
+
+
+TEST_APPHOOK(Test);
+
+int
+Test::Main()
+{
+ TEST_INIT("prod_features_test");
+
+ // Configure factory with all known blueprints.
+ setup_fef_test_plugin(_factory);
+ setup_search_features(_factory);
+
+ // Test all features.
+ TEST_DO(testFramework()); TEST_FLUSH();
+ TEST_DO(testFtLib()); TEST_FLUSH();
+ TEST_DO(testAge()); TEST_FLUSH();
+ TEST_DO(testAttribute()); TEST_FLUSH();
+ TEST_DO(testAttributeMatch()); TEST_FLUSH();
+ TEST_DO(testCloseness()); TEST_FLUSH();
+ TEST_DO(testDistance()); TEST_FLUSH();
+ TEST_DO(testDistanceToPath()); TEST_FLUSH();
+ TEST_DO(testDotProduct()); TEST_FLUSH();
+ TEST_DO(testFieldLength()); TEST_FLUSH();
+ TEST_DO(testFieldMatch()); TEST_FLUSH();
+ TEST_DO(testFieldTermMatch()); TEST_FLUSH();
+ TEST_DO(testFirstPhase()); TEST_FLUSH();
+ TEST_DO(testForeach()); TEST_FLUSH();
+ TEST_DO(testFreshness()); TEST_FLUSH();
+ TEST_DO(testMatch()); TEST_FLUSH();
+ TEST_DO(testMatches()); TEST_FLUSH();
+ TEST_DO(testNow()); TEST_FLUSH();
+ TEST_DO(testQuery()); TEST_FLUSH();
+ TEST_DO(testQueryTermCount()); TEST_FLUSH();
+ TEST_DO(testRandom()); TEST_FLUSH();
+ TEST_DO(testRankingExpression()); TEST_FLUSH();
+ TEST_DO(testTerm()); TEST_FLUSH();
+ TEST_DO(testTermDistance()); TEST_FLUSH();
+ TEST_DO(testUtils()); TEST_FLUSH();
+
+ TEST_DONE();
+ return 0;
+}
+
+
+void
+Test::testFtLib()
+{
+ { // toQuery
+ FtQuery q = FtUtil::toQuery("a b!50 0.5:c!200%0.5 d%0.3 e!300 0.3:f ");
+ ASSERT_TRUE(q.size() == 6);
+ EXPECT_EQUAL(q[0].term, vespalib::string("a"));
+ EXPECT_EQUAL(q[0].termWeight.percent(), 100);
+ EXPECT_APPROX(q[0].connexity, 0.1f, EPS);
+ EXPECT_APPROX(q[0].significance, 0.1f, EPS);
+ EXPECT_EQUAL(q[1].term, vespalib::string("b"));
+ EXPECT_EQUAL(q[1].termWeight.percent(), 50);
+ EXPECT_APPROX(q[1].connexity, 0.1f, EPS);
+ EXPECT_APPROX(q[1].significance, 0.1f, EPS);
+ EXPECT_EQUAL(q[2].term, vespalib::string("c"));
+ EXPECT_EQUAL(q[2].termWeight.percent(), 200);
+ EXPECT_APPROX(q[2].connexity, 0.5f, EPS);
+ EXPECT_APPROX(q[2].significance, 0.5f, EPS);
+ EXPECT_EQUAL(q[3].term, vespalib::string("d"));
+ EXPECT_EQUAL(q[3].termWeight.percent(), 100);
+ EXPECT_APPROX(q[3].connexity, 0.1f, EPS);
+ EXPECT_APPROX(q[3].significance, 0.3f, EPS);
+ EXPECT_EQUAL(q[4].term, vespalib::string("e"));
+ EXPECT_EQUAL(q[4].termWeight.percent(), 300);
+ EXPECT_APPROX(q[4].connexity, 0.1f, EPS);
+ EXPECT_APPROX(q[4].significance, 0.1f, EPS);
+ EXPECT_EQUAL(q[5].term, vespalib::string("f"));
+ EXPECT_EQUAL(q[5].termWeight.percent(), 100);
+ EXPECT_APPROX(q[5].connexity, 0.3f, EPS);
+ EXPECT_APPROX(q[5].significance, 0.1f, EPS);
+ }
+ { // toRankResult
+ RankResult rr = toRankResult("foo", "a:0.5 b:-0.5 c:2 d:3 ");
+ std::vector<vespalib::string> keys = rr.getKeys();
+ ASSERT_TRUE(keys.size() == 4);
+ EXPECT_EQUAL(keys[0], vespalib::string("foo.a"));
+ EXPECT_EQUAL(keys[1], vespalib::string("foo.b"));
+ EXPECT_EQUAL(keys[2], vespalib::string("foo.c"));
+ EXPECT_EQUAL(keys[3], vespalib::string("foo.d"));
+ EXPECT_APPROX(rr.getScore("foo.a"), 0.5f, EPS);
+ EXPECT_APPROX(rr.getScore("foo.b"), -0.5f, EPS);
+ EXPECT_APPROX(rr.getScore("foo.c"), 2.0f, EPS);
+ EXPECT_APPROX(rr.getScore("foo.d"), 3.0f, EPS);
+ }
+}
+
+
+void
+Test::testAge()
+{
+ { // Test blueprint
+ FtIndexEnvironment idx_env;
+ idx_env.getBuilder()
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "datetime")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "datetime2");
+
+ AgeBlueprint pt;
+ EXPECT_TRUE(assertCreateInstance(pt, "age"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, idx_env, params);
+ FT_SETUP_OK(pt, idx_env, params.add("datetime"), in.add("now"), out.add("out"));
+ FT_SETUP_FAIL(pt, idx_env, params.add("datetime2"));
+
+ FT_DUMP_EMPTY(_factory, "age");
+ }
+
+ { // Test executor
+ assertAge(0, "doctime", 60, 120);
+ assertAge(60, "doctime", 180, 120);
+ assertAge(15000000000, "doctime", 20000000000, 5000000000);
+ }
+}
+
+void
+Test::assertAge(feature_t expAge, const vespalib::string & attr, uint64_t now, uint64_t docTime)
+{
+ vespalib::string feature = "age(" + attr + ")";
+ FtFeatureTest ft(_factory, feature);
+ setupForAgeTest(ft, docTime);
+ ft.getQueryEnv().getProperties().add(queryproperties::now::SystemTime::NAME,
+ vespalib::make_string("%" PRIu64, now));
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore(feature, expAge)));
+}
+
+void
+Test::setupForAgeTest(FtFeatureTest & ft, uint64_t docTime)
+{
+ AttributePtr doctime = AttributeFactory::createAttribute("doctime", AVC(AVBT::INT64, AVCT::SINGLE));
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "doctime");
+ doctime->addReservedDoc();
+ doctime->addDocs(1);
+ ft.getIndexEnv().getAttributeManager().add(doctime);
+ (static_cast<IntegerAttribute *>(doctime.get()))->update(1, docTime);
+ doctime->commit();
+}
+
+void
+Test::testAttribute()
+{
+ AttributeBlueprint prototype;
+ {
+ FtIndexEnvironment idx_env;
+ idx_env.getBuilder()
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar");
+
+ EXPECT_TRUE(assertCreateInstance(prototype, "attribute"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(prototype, idx_env, params); // expects 1 - 2 params
+
+ FT_SETUP_OK(prototype, idx_env, params.add("bar"), in,
+ out.add("value").add("weight").add("contains").add("count"));
+ FT_SETUP_OK(prototype, idx_env, params.add("0"), in, out);
+
+ FT_DUMP_EMPTY(_factory, "attribute");
+ }
+ { // single attributes
+ RankResult exp;
+ exp.addScore("attribute(sint)", 10).
+ addScore("attribute(sint,0)", 10).
+ addScore("attribute(sfloat)", 60.5f).
+ addScore("attribute(sstr)", (feature_t)vespalib::hash_code("foo")).
+ addScore("attribute(sint).count", 1).
+ addScore("attribute(sfloat).count", 1).
+ addScore("attribute(sstr).count", 1).
+ addScore("attribute(udefint)", search::attribute::getUndefined<feature_t>()).
+ addScore("attribute(udeffloat)", search::attribute::getUndefined<feature_t>()).
+ addScore("attribute(udefstr)", (feature_t)vespalib::hash_code(""));
+
+ FtFeatureTest ft(_factory, exp.getKeys());
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint").
+ addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sfloat").
+ addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sstr").
+ addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefint").
+ addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udeffloat").
+ addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefstr");
+ setupForAttributeTest(ft);
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ { // array attributes
+ RankResult exp;
+ exp.addScore("attribute(aint)", 0).
+ addScore("attribute(aint,0)", 20).
+ addScore("attribute(aint,1)", 30).
+ addScore("attribute(aint,2)", 0).
+ addScore("attribute(afloat,0)", 70.5f).
+ addScore("attribute(afloat,1)", 80.5f).
+ addScore("attribute(astr,0)", (feature_t)vespalib::hash_code("bar")).
+ addScore("attribute(astr,1)", (feature_t)vespalib::hash_code("baz")).
+ addScore("attribute(aint).count", 2).
+ addScore("attribute(aint,0).count", 0).
+ addScore("attribute(afloat).count", 2).
+ addScore("attribute(afloat,0).count", 0).
+ addScore("attribute(astr).count", 2).
+ addScore("attribute(astr,0).count", 0);
+
+ FtFeatureTest ft(_factory, exp.getKeys());
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint").
+ addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "afloat").
+ addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "astr");
+ setupForAttributeTest(ft);
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ { // weighted set attributes
+ RankResult exp;
+ exp.addScore("attribute(wsint).value", 0).
+ addScore("attribute(wsint).weight", 0).
+ addScore("attribute(wsint).contains", 0).
+ addScore("attribute(wsint,100).value", 0).
+ addScore("attribute(wsint,100).weight", 0).
+ addScore("attribute(wsint,100).contains", 0).
+ addScore("attribute(wsint,40).value", 40).
+ addScore("attribute(wsint,40).weight", 10).
+ addScore("attribute(wsint,40).contains", 1).
+ addScore("attribute(wsint,50).value", 50).
+ addScore("attribute(wsint,50).weight", 20).
+ addScore("attribute(wsint,50).contains", 1).
+ addScore("attribute(wsfloat).value", 0).
+ addScore("attribute(wsfloat).weight", 0).
+ addScore("attribute(wsfloat).contains", 0).
+ addScore("attribute(wsfloat,1000.5).value", 0).
+ addScore("attribute(wsfloat,1000.5).weight", 0).
+ addScore("attribute(wsfloat,1000.5).contains", 0).
+ addScore("attribute(wsfloat,90.5).value", 90.5f).
+ addScore("attribute(wsfloat,90.5).weight", -30).
+ addScore("attribute(wsfloat,90.5).contains", 1).
+ addScore("attribute(wsfloat,100.5).value", 100.5f).
+ addScore("attribute(wsfloat,100.5).weight", -40).
+ addScore("attribute(wsfloat,100.5).contains", 1).
+ addScore("attribute(wsstr).value", 0).
+ addScore("attribute(wsstr).weight", 0).
+ addScore("attribute(wsstr).contains", 0).
+ addScore("attribute(wsstr,foo).value", 0).
+ addScore("attribute(wsstr,foo).weight", 0).
+ addScore("attribute(wsstr,foo).contains", 0).
+ addScore("attribute(wsstr,qux).value", (feature_t)vespalib::hash_code("qux")).
+ addScore("attribute(wsstr,qux).weight", 11).
+ addScore("attribute(wsstr,qux).contains", 1).
+ addScore("attribute(wsstr,quux).value", (feature_t)vespalib::hash_code("quux")).
+ addScore("attribute(wsstr,quux).weight", 12).
+ addScore("attribute(wsstr,quux).contains", 1).
+ addScore("attribute(wsint).count", 2).
+ addScore("attribute(wsint,40).count", 0).
+ addScore("attribute(wsfloat).count", 2).
+ addScore("attribute(wsfloat,90.5).count", 0).
+ addScore("attribute(wsstr).count", 2).
+ addScore("attribute(wsstr,qux).count", 0);
+
+ FtFeatureTest ft(_factory, exp.getKeys());
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint").
+ addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsfloat").
+ addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsstr");
+ setupForAttributeTest(ft);
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ { // unique only attribute
+ RankResult exp;
+ exp.addScore("attribute(unique).value", 0).
+ addScore("attribute(unique).weight", 0).
+ addScore("attribute(unique).contains", 0).
+ addScore("attribute(unique).count", 0);
+
+ FtFeatureTest ft(_factory, exp.getKeys());
+ setupForAttributeTest(ft);
+ ASSERT_TRUE(ft.setup());
+ //ASSERT_TRUE(ft.execute(exp));
+ }
+}
+
+
+void
+Test::setupForAttributeTest(FtFeatureTest &ft, bool setup_env)
+{
+ // setup an original attribute manager with attributes
+ std::vector<AttributePtr> avs;
+ avs.push_back(AttributeFactory::createAttribute("sint", AVC(AVBT::INT32, AVCT::SINGLE))); // 0
+ avs.push_back(AttributeFactory::createAttribute("aint", AVC(AVBT::INT32, AVCT::ARRAY))); // 1
+ avs.push_back(AttributeFactory::createAttribute("wsint", AVC(AVBT::INT32, AVCT::WSET))); // 2
+ avs.push_back(AttributeFactory::createAttribute("sfloat", AVC(AVBT::FLOAT, AVCT::SINGLE))); // 3
+ avs.push_back(AttributeFactory::createAttribute("afloat", AVC(AVBT::FLOAT, AVCT::ARRAY))); // 4
+ avs.push_back(AttributeFactory::createAttribute("wsfloat",AVC(AVBT::FLOAT, AVCT::WSET))); // 5
+ avs.push_back(AttributeFactory::createAttribute("sstr", AVC(AVBT::STRING, AVCT::SINGLE))); // 6
+ avs.push_back(AttributeFactory::createAttribute("astr", AVC(AVBT::STRING, AVCT::ARRAY))); // 7
+ avs.push_back(AttributeFactory::createAttribute("wsstr", AVC(AVBT::STRING, AVCT::WSET))); // 8
+ avs.push_back(AttributeFactory::createAttribute("udefint", AVC(AVBT::INT32, AVCT::SINGLE))); // 9
+ avs.push_back(AttributeFactory::createAttribute("udeffloat", AVC(AVBT::FLOAT, AVCT::SINGLE))); // 10
+ avs.push_back(AttributeFactory::createAttribute("udefstr", AVC(AVBT::STRING, AVCT::SINGLE))); // 11
+
+ // simulate a unique only attribute as specified in sd
+ AVC cfg(AVBT::INT32, AVCT::SINGLE);
+ cfg.setFastSearch(true);
+ avs.push_back(AttributeFactory::createAttribute("unique", cfg)); // 9
+
+ if (setup_env) {
+ // register attributes in index environment
+ ft.getIndexEnv().getBuilder()
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sfloat")
+ .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "afloat")
+ .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsfloat")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sstr")
+ .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "astr")
+ .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsstr")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udeffloat")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefstr")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "unique");
+ }
+
+ for (uint32_t i = 0; i < avs.size(); ++i) {
+ avs[i]->addReservedDoc();
+ avs[i]->addDocs(1);
+ ft.getIndexEnv().getAttributeManager().add(avs[i]);
+ }
+
+ // integer attributes
+ (static_cast<IntegerAttribute *>(avs[0].get()))->update(1, 10);
+ (static_cast<IntegerAttribute *>(avs[1].get()))->append(1, 20, 0);
+ (static_cast<IntegerAttribute *>(avs[1].get()))->append(1, 30, 0);
+ (static_cast<IntegerAttribute *>(avs[2].get()))->append(1, 40, 10);
+ (static_cast<IntegerAttribute *>(avs[2].get()))->append(1, 50, 20);
+ (static_cast<IntegerAttribute *>(avs[9].get()))->update(1, search::attribute::getUndefined<int32_t>());
+ // feature_t attributes
+ (static_cast<FloatingPointAttribute *>(avs[3].get()))->update(1, 60.5f);
+ (static_cast<FloatingPointAttribute *>(avs[4].get()))->append(1, 70.5f, 0);
+ (static_cast<FloatingPointAttribute *>(avs[4].get()))->append(1, 80.5f, 0);
+ (static_cast<FloatingPointAttribute *>(avs[5].get()))->append(1, 90.5f, -30);
+ (static_cast<FloatingPointAttribute *>(avs[5].get()))->append(1, 100.5f, -40);
+ (static_cast<FloatingPointAttribute *>(avs[10].get()))->update(1, search::attribute::getUndefined<float>());
+ // string attributes
+ (static_cast<StringAttribute *>(avs[6].get()))->update(1, "foo");
+ (static_cast<StringAttribute *>(avs[7].get()))->append(1, "bar", 0);
+ (static_cast<StringAttribute *>(avs[7].get()))->append(1, "baz", 0);
+ (static_cast<StringAttribute *>(avs[8].get()))->append(1, "qux", 11);
+ (static_cast<StringAttribute *>(avs[8].get()))->append(1, "quux", 12);
+ (static_cast<StringAttribute *>(avs[11].get()))->update(1, "");
+
+ for (uint32_t i = 0; i < avs.size() - 1; ++i) { // do not commit the noupdate attribute
+ avs[i]->commit();
+ }
+
+ // save 'sint' and load it into 'unique' (only way to set a noupdate attribute)
+ ASSERT_TRUE(avs[0]->saveAs(avs[9]->getBaseFileName()));
+ ASSERT_TRUE(avs[9]->load());
+}
+
+void
+Test::testCloseness()
+{
+ { // Test blueprint.
+ ClosenessBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "closeness"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params);
+ FT_SETUP_OK(pt, params.add("name"), in.add("distance(name)"), out.add("out").add("logscale"));
+
+ FT_DUMP_EMPTY(_factory, "closeness");
+ }
+
+ { // Test executor.
+ assertCloseness(1, "pos", 0);
+ assertCloseness(0.8, "pos", 1802661);
+ assertCloseness(0, "pos", 9013306);
+ // use non-existing attribute -> default distance
+ assertCloseness(0, "no", 0);
+
+ // use non-default maxDistance
+ assertCloseness(1, "pos", 0, 100);
+ assertCloseness(0.5, "pos", 50, 100);
+ assertCloseness(0, "pos", 100, 100);
+ assertCloseness(0, "pos", 101, 100);
+
+ // test logscale using halfResponse (define that x = 10 should give 0.5 -> s = -10^2/(2*10 - 100) = 1.25 (scale distance))
+ assertCloseness(1, "pos", 0, 100, 10);
+ assertCloseness(0.5, "pos", 10, 100, 10);
+ assertCloseness(0, "pos", 100, 100, 10);
+ assertCloseness(0, "pos", 101, 100, 10);
+ }
+}
+
+void
+Test::assertCloseness(feature_t exp, const vespalib::string & attr, double distance, double maxDistance, double halfResponse)
+{
+ vespalib::string feature = "closeness(" + attr + ")";
+ FtFeatureTest ft(_factory, feature);
+ std::vector<std::pair<int32_t, int32_t> > positions;
+ int32_t x = 0;
+ positions.push_back(std::make_pair(x, x));
+ setupForDistanceTest(ft, "pos", positions, false);
+ ft.getQueryEnv().getLocation().setXPosition((int)distance);
+ ft.getQueryEnv().getLocation().setValid(true);
+ if (maxDistance > 0) {
+ ft.getIndexEnv().getProperties().add(feature + ".maxDistance",
+ vespalib::make_string("%u", (unsigned int)maxDistance));
+ }
+ if (halfResponse > 0) {
+ ft.getIndexEnv().getProperties().add(feature + ".halfResponse",
+ vespalib::make_string("%f", halfResponse));
+ feature.append(".logscale");
+ }
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore(feature, exp)));
+}
+
+void
+Test::testFieldLength()
+{
+ FieldLengthBlueprint pt;
+
+ { // Test blueprint.
+ EXPECT_TRUE(assertCreateInstance(pt, "fieldLength"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params);
+ FtIndexEnvironment ie;
+ ie.getBuilder()
+ .addField(FieldType::INDEX, CollectionType::SINGLE, "foo")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar")
+ .addField(FieldType::INDEX, CollectionType::ARRAY, "afoo")
+ .addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo");
+ FT_SETUP_FAIL(pt, params.add("qux")); // does not exists
+ FT_SETUP_FAIL(pt, params.clear().add("bar")); // not an index
+ FT_SETUP_FAIL(pt, params.clear().add("afoo")); // wrong collection type
+ FT_SETUP_FAIL(pt, params.clear().add("wfoo")); // wrong collection type
+ FT_SETUP_OK(pt, ie, params.clear().add("foo"), in, out.add("out"));
+
+ FT_DUMP_EMPTY(_factory, "fieldLength");
+ FT_DUMP_EMPTY(_factory, "fieldLength", ie);
+ }
+
+ { // Test executor.
+ for (uint32_t i = 0; i < 10; ++i) {
+ StringList features;
+ features.add("fieldLength(foo)").add("fieldLength(baz)");
+ FtFeatureTest ft(_factory, features);
+ ASSERT_TRUE(!ft.setup());
+
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo").
+ addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar").addField(FieldType::INDEX, CollectionType::SINGLE, "baz");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+
+ search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, i));
+ ASSERT_TRUE(mdb->setFieldLength("foo", i + 10));
+ ASSERT_TRUE(mdb->addOccurence("baz", 0, i));
+ ASSERT_TRUE(mdb->setFieldLength("baz", i + 20));
+ ASSERT_TRUE(mdb->apply(1));
+ ASSERT_TRUE(ft.execute(RankResult()
+ .addScore("fieldLength(foo)", (feature_t)i + 10)
+ .addScore("fieldLength(baz)", (feature_t)i + 20)));
+ }
+ }
+}
+
+
+void
+Test::assertFieldMatch(const vespalib::string & spec,
+ const vespalib::string & query,
+ const vespalib::string & field,
+ const fieldmatch::Params * params,
+ uint32_t totalTermWeight,
+ feature_t totalSignificance)
+{
+ LOG(info, "assertFieldMatch('%s', '%s', '%s', (%u))", spec.c_str(), query.c_str(), field.c_str(), totalTermWeight);
+
+ // Setup feature test.
+ vespalib::string feature = "fieldMatch(foo)";
+ FtFeatureTest ft(_factory, feature);
+
+ setupFieldMatch(ft, "foo", query, field, params, totalTermWeight, totalSignificance, 1);
+
+ // Execute and compare results.
+ RankResult rr = toRankResult(feature, spec);
+ rr.setEpsilon(1e-4); // same as java tests
+ ASSERT_TRUE(ft.execute(rr));
+}
+
+void
+Test::assertFieldMatch(const vespalib::string & spec,
+ const vespalib::string & query,
+ const vespalib::string & field,
+ uint32_t totalTermWeight)
+{
+ assertFieldMatch(spec, query, field, NULL, totalTermWeight);
+}
+
+void
+Test::assertFieldMatchTS(const vespalib::string & spec,
+ const vespalib::string & query,
+ const vespalib::string & field,
+ feature_t totalSignificance)
+{
+ assertFieldMatch(spec, query, field, NULL, 0, totalSignificance);
+}
+
+
+void
+Test::testFirstPhase()
+{
+ { // Test blueprint.
+ FirstPhaseBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "firstPhase"));
+
+ FtIndexEnvironment ie;
+ ie.getProperties().add(indexproperties::rank::FirstPhase::NAME, "random"); // override nativeRank dependency
+
+ StringList params, in, out;
+ FT_SETUP_OK(pt, ie, params, in.add("random"), out.add("score"));
+ FT_SETUP_FAIL(pt, params.add("foo"));
+ params.clear();
+
+ FT_DUMP(_factory, "firstPhase", ie, StringList().add("firstPhase"));
+ }
+
+ { // Test executor.
+ FtFeatureTest ft(_factory, "firstPhase");
+ ft.getIndexEnv().getProperties().add(indexproperties::rank::FirstPhase::NAME, "value(10)");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(10.0f));
+ }
+}
+
+void
+Test::testForeach()
+{
+ { // Test blueprint.
+ ForeachBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "foreach"));
+
+ StringList params, in, out;
+ out.add("value");
+ FT_SETUP_FAIL(pt, params);
+ // illegal dimension
+ FT_SETUP_FAIL(pt, params.add("squares").add("N").add("foo").add("true").add("sum"));
+ // illegal condition
+ FT_SETUP_FAIL(pt, params.clear().add("fields").add("N").add("foo").add("false").add("sum"));
+ // illegal operation
+ FT_SETUP_FAIL(pt, params.clear().add("fields").add("N").add("foo").add("true").add("dotproduct"));
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+ ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "baz");
+
+ // various dimensions
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo(N)").add("true").add("sum"),
+ in.clear().add("foo(0)").add("foo(1)").add("foo(2)").add("foo(3)").add("foo(4)").
+ add("foo(5)").add("foo(6)").add("foo(7)").add("foo(8)").add("foo(9)").
+ add("foo(10)").add("foo(11)").add("foo(12)").add("foo(13)").add("foo(14)").add("foo(15)"), out);
+ ie.getProperties().add("foreach.maxTerms", "1");
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("sum"),
+ in.clear().add("foo"), out);
+ FT_SETUP_OK(pt, ie, params.clear().add("fields").add("N").add("foo(N)").add("true").add("sum"),
+ in.clear().add("foo(foo)").add("foo(bar)"), out);
+ FT_SETUP_OK(pt, ie, params.clear().add("attributes").add("N").add("foo(N)").add("true").add("sum"),
+ in.clear().add("foo(baz)"), out);
+
+ // various conditions
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("sum"), in.clear().add("foo"), out);
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("<4").add("sum"), in, out);
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add(">4").add("sum"), in, out);
+ // various operations
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("sum"), in, out);
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("product"), in, out);
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("average"), in, out);
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("max"), in, out);
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("min"), in, out);
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("count"), in, out);
+
+ FT_DUMP_EMPTY(_factory, "foreach");
+ }
+ { // Test executor
+ // single loop
+ assertForeachOperation( 16.5, "true", "sum");
+ assertForeachOperation(-2106, "true", "product");
+ assertForeachOperation( 3.3, "true", "average");
+ assertForeachOperation( 8, "true", "max");
+ assertForeachOperation( -4.5, "true", "min");
+ assertForeachOperation( 5, "true", "count");
+
+ assertForeachOperation(3, "\">4\"", "count");
+ assertForeachOperation(2, "\">4.5\"", "count");
+ assertForeachOperation(2, "\"<4\"", "count");
+ assertForeachOperation(2, "\"<4.5\"", "count");
+ assertForeachOperation(4, "\">0\"", "count");
+ assertForeachOperation(1, "\"<0\"", "count");
+ assertForeachOperation(4, "\">-4.5\"", "count");
+ assertForeachOperation(1, "\"<-4.4\"", "count");
+
+ { // average without any values
+ FtFeatureTest ft(_factory, "foreach(fields,N,value(N),true,average)");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(0));
+ }
+
+ { // double loop
+ vespalib::string feature =
+ "foreach(fields,N,foreach(attributes,M,rankingExpression(\"value(N)+value(M)\"),true,product),true,sum)";
+ LOG(info, "double loop feature: '%s'", feature.c_str());
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getProperties().add("foreach.maxTerms", "1");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "1");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "2");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "3");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "4");
+ // ((1 + 3) * (1 + 4)) + ((2 + 3) * (2 + 4)) = 4 * 5 + 5 * 6 = 20 + 30 = 50
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(50));
+ ASSERT_TRUE(ft.execute(50)); // check that reset works
+ }
+ }
+}
+
+void
+Test::assertForeachOperation(feature_t exp, const vespalib::string & cond, const vespalib::string & op)
+{
+ vespalib::string feature = "foreach(fields,N,value(N)," + cond + "," + op + ")";
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "4.5");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "2");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "8");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "6.5");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "-4.5");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(exp));
+ ASSERT_TRUE(ft.execute(exp)); // check that reset works
+}
+
+
+void
+Test::testFreshness()
+{
+ { // Test blueprint.
+ FtIndexEnvironment idx_env;
+ idx_env.getBuilder()
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "name");
+
+ FreshnessBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "freshness"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, idx_env, params);
+ FT_SETUP_OK(pt, idx_env, params.add("name"), in.add("age(name)"), out.add("out").add("logscale"));
+
+ FT_DUMP_EMPTY(_factory, "freshness");
+ }
+
+ { // Test executor.
+ assertFreshness(1, "doctime", 0);
+ assertFreshness(0.5, "doctime", 3*15*24*60*60);
+ assertFreshness(0, "doctime", 3*30*24*60*60);
+ // use non-default maxAge
+ assertFreshness(1, "doctime", 0, 120);
+ assertFreshness(0.75, "doctime", 30, 120);
+ assertFreshness(0.5, "doctime", 60, 120);
+ assertFreshness(0, "doctime", 120, 120);
+ assertFreshness(0, "doctime", 121, 120);
+
+ // test logscale
+ assertFreshness(1, "doctime", 0, 0, 0, true);
+ assertFreshness(0.5, "doctime", 7*24*60*60, 0, 0, true);
+ assertFreshness(0, "doctime", 3*30*24*60*60, 0, 0, true);
+ // use non-default maxAge & halfResponse
+ assertFreshness(1, "doctime", 0, 120, 30, true);
+ assertFreshness(0.5, "doctime", 30, 120, 30, true); // half response after 30 secs
+ assertFreshness(0, "doctime", 120, 120, 30, true);
+ assertFreshness(0, "doctime", 121, 120, 30, true);
+ // test invalid half response
+ assertFreshness(0.5, "doctime", 1, 120, 0.5, true); // half response is set to 1
+ assertFreshness(0.5, "doctime", 59, 120, 70, true); // half response is set to 120/2 - 1
+ }
+}
+
+void
+Test::assertFreshness(feature_t expFreshness, const vespalib::string & attr, uint32_t age, uint32_t maxAge, double halfResponse, bool logScale)
+{
+ vespalib::string feature = "freshness(" + attr + ")";
+ FtFeatureTest ft(_factory, feature);
+ setupForAgeTest(ft, 60); // time = 60
+ if (maxAge > 0) {
+ ft.getIndexEnv().getProperties().add("freshness(" + attr + ").maxAge",
+ vespalib::make_string("%u", maxAge));
+ }
+ if (halfResponse > 0) {
+ ft.getIndexEnv().getProperties().add("freshness(" + attr + ").halfResponse",
+ vespalib::make_string("%f", halfResponse));
+ }
+ if (logScale) {
+ feature.append(".logscale");
+ }
+ ft.getQueryEnv().getProperties().add(queryproperties::now::SystemTime::NAME,
+ vespalib::make_string("%u", age + 60)); // now = age + 60
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore(feature, expFreshness).setEpsilon(EPS)));
+}
+
+void
+Test::testDistance()
+{
+ { // Test blueprint.
+ DistanceBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "distance"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params);
+ FT_SETUP_OK(pt, params.add("pos"), in, out.add("out"));
+
+ FT_DUMP_EMPTY(_factory, "distance");
+ }
+
+ { // Test executor.
+
+ { // test 2D single location (zcurve)
+ assert2DZDistance(static_cast<feature_t>(sqrt(650.0f)), "5:-5", 10, 20);
+ assert2DZDistance(static_cast<feature_t>(sqrt(250.0f)), "5:-5", 10, -20);
+ assert2DZDistance(static_cast<feature_t>(sqrt(450.0f)), "5:-5", -10, -20);
+ assert2DZDistance(static_cast<feature_t>(sqrt(850.0f)), "5:-5", -10, 20);
+ assert2DZDistance(static_cast<feature_t>(sqrt(250.0f)), "5:-5", 15, -20, 0x80000000); // 2^31
+ }
+
+ { // test 2D multi location (zcurve)
+ vespalib::string positions = "5:-5,35:0,5:40,35:-40";
+ assert2DZDistance(static_cast<feature_t>(sqrt(425.0f)), positions, 10, 20);
+ assert2DZDistance(static_cast<feature_t>(sqrt(250.0f)), positions, 10, -20);
+ assert2DZDistance(static_cast<feature_t>(sqrt(450.0f)), positions, -10, -20);
+ assert2DZDistance(static_cast<feature_t>(sqrt(625.0f)), positions, -10, 20);
+ assert2DZDistance(static_cast<feature_t>(sqrt(250.0f)), positions, 15, -20, 0x80000000); // 2^31
+ assert2DZDistance(static_cast<feature_t>(sqrt(425.0f)), positions, 45, -20, 0x80000000); // 2^31
+ }
+
+ { // test default distance
+ { // non-existing attribute
+ FtFeatureTest ft(_factory, "distance(pos)");
+ ft.getQueryEnv().getLocation().setValid(true);
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0)));
+ }
+ { // wrong attribute type (float)
+ FtFeatureTest ft(_factory, "distance(pos)");
+ AttributePtr pos = AttributeFactory::createAttribute("pos", AVC(AVBT::FLOAT, AVCT::SINGLE));
+ pos->commit();
+ ft.getIndexEnv().getAttributeManager().add(pos);
+ ft.getQueryEnv().getLocation().setValid(true);
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0)));
+ }
+ { // wrong attribute type (string)
+ FtFeatureTest ft(_factory, "distance(pos)");
+ AttributePtr pos = AttributeFactory::createAttribute("pos", AVC(AVBT::STRING, AVCT::SINGLE));
+ pos->commit();
+ ft.getIndexEnv().getAttributeManager().add(pos);
+ ft.getQueryEnv().getLocation().setValid(true);
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0)));
+ }
+ { // wrong attribute collection type (weighted set)
+ FtFeatureTest ft(_factory, "distance(pos)");
+ AttributePtr pos = AttributeFactory::createAttribute("pos", AVC(AVBT::INT64, AVCT::WSET));
+ pos->commit();
+ ft.getIndexEnv().getAttributeManager().add(pos);
+ ft.getQueryEnv().getLocation().setValid(true);
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0)));
+ }
+ }
+ }
+}
+
+void
+Test::setupForDistanceTest(FtFeatureTest &ft, const vespalib::string & attrName,
+ const std::vector<std::pair<int32_t, int32_t> > & positions, bool zcurve)
+{
+ AttributePtr pos = AttributeFactory::createAttribute(attrName, AVC(AVBT::INT64, AVCT::ARRAY));
+
+ pos->addReservedDoc();
+ pos->addDocs(1);
+ ft.getIndexEnv().getAttributeManager().add(pos);
+
+ IntegerAttribute * ia = static_cast<IntegerAttribute *>(pos.get());
+ for (uint32_t i = 0; i < positions.size(); ++i) {
+ if (zcurve) {
+ ia->append(1, vespalib::geo::ZCurve::encode(positions[i].first, positions[i].second), 0);
+ } else {
+ ia->append(1, positions[i].first, 0);
+ }
+ }
+
+ pos->commit();
+}
+
+void
+Test::assert2DZDistance(feature_t exp, const vespalib::string & positions,
+ int32_t xquery, int32_t yquery, uint32_t xAspect)
+{
+ LOG(info, "assert2DZDistance(%g, %s, %d, %d, %u)", exp, positions.c_str(), xquery, yquery, xAspect);
+ FtFeatureTest ft(_factory, "distance(pos)");
+ std::vector<vespalib::string> ta = FtUtil::tokenize(positions, ",");
+ std::vector<std::pair<int32_t, int32_t> > pos;
+ for (uint32_t i = 0; i < ta.size(); ++i) {
+ std::vector<vespalib::string> tb = FtUtil::tokenize(ta[i], ":");
+ int32_t x = util::strToNum<int32_t>(tb[0]);
+ int32_t y = util::strToNum<int32_t>(tb[1]);
+ pos.push_back(std::make_pair(x, y));
+ }
+ setupForDistanceTest(ft, "pos", pos, true);
+ ft.getQueryEnv().getLocation().setXPosition(xquery);
+ ft.getQueryEnv().getLocation().setYPosition(yquery);
+ ft.getQueryEnv().getLocation().setXAspect(xAspect);
+ ft.getQueryEnv().getLocation().setValid(true);
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-4).
+ addScore("distance(pos)", exp)));
+}
+
+void
+Test::testDistanceToPath()
+{
+ {
+ // Test blueprint.
+ DistanceToPathBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "distanceToPath"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params);
+ FT_SETUP_OK(pt, params.add("pos"), in, out.add("distance").add("traveled").add("product"));
+ FT_SETUP_FAIL(pt, params.add("foo"));
+
+ FT_DUMP_EMPTY(_factory, "distanceToPath");
+ }
+
+ {
+ // Test executor.
+ std::vector<std::pair<int32_t, int32_t> > pos;
+ pos.push_back(std::make_pair(0, 0));
+
+ // invalid path
+ assertDistanceToPath(pos, "a");
+ assertDistanceToPath(pos, "(");
+ assertDistanceToPath(pos, "(a");
+ assertDistanceToPath(pos, "(a)");
+ assertDistanceToPath(pos, "(-1)");
+ assertDistanceToPath(pos, "(-1,1)");
+ assertDistanceToPath(pos, "(-1,1,1)");
+ assertDistanceToPath(pos, "(-1 1 1 1)");
+
+ // path on either side of document
+ assertDistanceToPath(pos, "(-1,1,1,1)", 1, 0.5, 2);
+ assertDistanceToPath(pos, "(-1,-1,1,-1)", 1, 0.5, -2);
+
+ // zero length path
+ assertDistanceToPath(pos, "(0,0,0,0)", 0, 0);
+ assertDistanceToPath(pos, "(0,0,0,0,0,0)", 0, 0);
+ assertDistanceToPath(pos, "(0,1,0,1)", 1, 0);
+ assertDistanceToPath(pos, "(0,1,0,1,0,1)", 1, 0);
+
+ // path crosses document
+ assertDistanceToPath(pos, "(-1,1,1,-1)", 0, 0.5);
+ assertDistanceToPath(pos, "(-2,2,2,-2)", 0, 0.5);
+ assertDistanceToPath(pos, "(-1,1,3,-3)", 0, 0.25);
+
+ // intersection outside segments
+ assertDistanceToPath(pos, "(1,0,2,0)", 1, 0); // before
+ assertDistanceToPath(pos, "(0,1,0,2)", 1, 0);
+ assertDistanceToPath(pos, "(-2,0,-1,0)", 1, 1); // after
+ assertDistanceToPath(pos, "(0,-2,0,-1)", 1, 1);
+
+ // various paths
+ assertDistanceToPath(pos, "(-3,1,2,1,2,-2,-2,-2)", 1, 0.25, 5);
+ assertDistanceToPath(pos, "(-3,2,2,2,2,-1,0,-1)", 1, 1, 2);
+
+ // multiple document locations
+ pos.push_back(std::make_pair(0, 1));
+ assertDistanceToPath(pos, "(-1,1,1,1)", 0, 0.5);
+ assertDistanceToPath(pos, "(-2,-1,-1,1)", 1, 1, 2);
+ assertDistanceToPath(pos, "(-1,0.25,1,0.25)", 0.25, 0.5, 0.5);
+
+ {
+ // Test defaults.
+ RankResult res;
+ res.addScore("distanceToPath(pos).distance", DistanceExecutor::DEFAULT_DISTANCE);
+ res.addScore("distanceToPath(pos).traveled", 1);
+ {
+ // Non-existing attribute.
+ FtFeatureTest ft(_factory, "distanceToPath(pos)");
+ ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(res));
+ }
+ {
+ // Wrong attribute type (float).
+ FtFeatureTest ft(_factory, "distanceToPath(pos)");
+ AttributePtr att = AttributeFactory::createAttribute("pos", AVC(AVBT::FLOAT, AVCT::SINGLE));
+ att->commit();
+ ft.getIndexEnv().getAttributeManager().add(att);
+ ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(res));
+ }
+ {
+ // Wrong attribute type (string).
+ FtFeatureTest ft(_factory, "distanceToPath(pos)");
+ AttributePtr att = AttributeFactory::createAttribute("pos", AVC(AVBT::STRING, AVCT::SINGLE));
+ att->commit();
+ ft.getIndexEnv().getAttributeManager().add(att);
+ ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(res));
+ }
+ {
+ // Wrong attribute collection type (weighted set).
+ FtFeatureTest ft(_factory, "distanceToPath(pos)");
+ AttributePtr att = AttributeFactory::createAttribute("pos", AVC(AVBT::INT64, AVCT::WSET));
+ att->commit();
+ ft.getIndexEnv().getAttributeManager().add(att);
+ ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(res));
+ }
+ }
+ }
+}
+
+void
+Test::assertDistanceToPath(const std::vector<std::pair<int32_t, int32_t> > pos,
+ const vespalib::string &path, feature_t distance, feature_t traveled, feature_t product)
+{
+ LOG(info, "Testing distance to path '%s' with %zd document locations.", path.c_str(), pos.size());
+
+ FtFeatureTest ft(_factory, "distanceToPath(pos)");
+ setupForDistanceTest(ft, "pos", pos, true);
+
+ ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", path);
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult()
+ .addScore("distanceToPath(pos).distance", distance)
+ .addScore("distanceToPath(pos).traveled", traveled)
+ .addScore("distanceToPath(pos).product", product)));
+}
+
+void
+Test::setupForDocumentTest(FtFeatureTest &ft, const vespalib::string & attrName, const vespalib::string & docType)
+{
+ AttributePtr type = AttributeFactory::createAttribute(attrName, AVC(AVBT::STRING, AVCT::SINGLE));
+
+ type->addReservedDoc();
+ type->addDocs(1);
+ ft.getIndexEnv().getAttributeManager().add(type);
+
+ (static_cast<StringAttribute *>(type.get()))->update(1, docType);
+ type->commit();
+}
+
+void
+Test::testDotProduct()
+{
+ { // Test blueprint.
+ FtIndexEnvironment idx_env;
+ idx_env.getBuilder()
+ .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "attribute");
+
+ DotProductBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "dotProduct"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, idx_env, params);
+ FT_SETUP_OK(pt, idx_env, params.add("attribute").add("vector"), in, out.add("scalar"));
+
+ FT_DUMP_EMPTY(_factory, "dotProduct");
+ }
+
+ { // Test vector parser
+ { // string enum vector
+ FtFeatureTest ft(_factory, "value(0)");
+ setupForDotProductTest(ft);
+ search::AttributeGuard::UP ag(ft.getIndexEnv().getAttributeManager().getAttribute("wsstr"));
+ const search::attribute::IAttributeVector * sv = ag->operator->();
+ EXPECT_TRUE(sv->hasEnum());
+ search::attribute::EnumHandle e;
+ {
+ dotproduct::wset::EnumVector out(sv);
+ WeightedSetParser::parse("", out);
+ EXPECT_EQUAL(out.getVector().size(), 0u);
+ WeightedSetParser::parse("()", out);
+ EXPECT_EQUAL(out.getVector().size(), 0u);
+ WeightedSetParser::parse("(a;1)", out);
+ EXPECT_EQUAL(out.getVector().size(), 0u);
+ WeightedSetParser::parse("(a:1)", out);
+ EXPECT_EQUAL(out.getVector().size(), 1u);
+ EXPECT_TRUE(sv->findEnum("a", e));
+ EXPECT_EQUAL(out.getVector()[0].first, e);
+ EXPECT_EQUAL(out.getVector()[0].second, 1.0);
+ }
+ std::vector<vespalib::string> v = {"(b:2.5,c:-3.5)", "{b:2.5,c:-3.5}"};
+ for(const vespalib::string & s : v) {
+ dotproduct::wset::EnumVector out(sv);
+ WeightedSetParser::parse(s, out);
+ EXPECT_EQUAL(out.getVector().size(), 2u);
+ EXPECT_TRUE(sv->findEnum("b", e));
+ EXPECT_EQUAL(out.getVector()[0].first, e);
+ EXPECT_EQUAL(out.getVector()[0].second, 2.5);
+ EXPECT_TRUE(sv->findEnum("c", e));
+ EXPECT_EQUAL(out.getVector()[1].first, e);
+ EXPECT_EQUAL(out.getVector()[1].second, -3.5);
+ }
+ { // test funky syntax
+ dotproduct::wset::EnumVector out(sv);
+ WeightedSetParser::parse("( a: 1, b:2 ,c: , :3)", out);
+ EXPECT_EQUAL(out.getVector().size(), 3u);
+ EXPECT_TRUE(sv->findEnum("a", e));
+ EXPECT_EQUAL(out.getVector()[0].first, e);
+ EXPECT_EQUAL(out.getVector()[0].second, 1);
+ EXPECT_TRUE(sv->findEnum("b", e));
+ EXPECT_EQUAL(out.getVector()[1].first, e);
+ EXPECT_EQUAL(out.getVector()[1].second, 2);
+ EXPECT_TRUE(sv->findEnum("c", e));
+ EXPECT_EQUAL(out.getVector()[2].first, e);
+ EXPECT_EQUAL(out.getVector()[2].second, 0);
+ }
+ { // strings not in attribute vector
+ dotproduct::wset::EnumVector out(sv);
+ WeightedSetParser::parse("(not:1)", out);
+ EXPECT_EQUAL(out.getVector().size(), 0u);
+ }
+ }
+ { // string vector
+ dotproduct::wset::StringVector out;
+ WeightedSetParser::parse("(b:2.5,c:-3.5)", out);
+ EXPECT_EQUAL(out.getVector().size(), 2u);
+ EXPECT_EQUAL(out.getVector()[0].first, "b");
+ EXPECT_EQUAL(out.getVector()[0].second, 2.5);
+ EXPECT_EQUAL(out.getVector()[1].first, "c");
+ EXPECT_EQUAL(out.getVector()[1].second, -3.5);
+ }
+ { // integer vector
+ dotproduct::wset::IntegerVector out;
+ WeightedSetParser::parse("(20:2.5,30:-3.5)", out);
+ EXPECT_EQUAL(out.getVector().size(), 2u);
+ EXPECT_EQUAL(out.getVector()[0].first, 20);
+ EXPECT_EQUAL(out.getVector()[0].second, 2.5);
+ EXPECT_EQUAL(out.getVector()[1].first, 30);
+ EXPECT_EQUAL(out.getVector()[1].second, -3.5);
+ }
+ }
+ { // Array parser
+ std::vector<vespalib::string> v = {"(0:2,7:-3,1:-3)", "{0:2,7:-3,1:-3}", "[2 -3 0 0 0 0 0 -3]"};
+ for(const vespalib::string & s : v) {
+ std::vector<int32_t> out;
+ ArrayParser::parse(s, out);
+ EXPECT_EQUAL(8u, out.size());
+ EXPECT_EQUAL(2, out[0]);
+ EXPECT_EQUAL(-3, out[1]);
+ EXPECT_EQUAL(0, out[2]);
+ EXPECT_EQUAL(0, out[3]);
+ EXPECT_EQUAL(0, out[4]);
+ EXPECT_EQUAL(0, out[5]);
+ EXPECT_EQUAL(0, out[6]);
+ EXPECT_EQUAL(-3, out[7]);
+ }
+ }
+ {
+ vespalib::string s = "[[1:3]]";
+ std::vector<int32_t> out;
+ ArrayParser::parse(s, out);
+ EXPECT_EQUAL(0u, out.size());
+ }
+
+ { // Test executor.
+ { // string enum attribute
+ // docId = 1
+ assertDotProduct(0, "()");
+ assertDotProduct(0, "(f:5)");
+ assertDotProduct(0, "(f:5,g:5)");
+ assertDotProduct(-5, "(a:-5)");
+ assertDotProduct(25, "(e:5)");
+ assertDotProduct(-5.5, "(a:-5.5)");
+ assertDotProduct(27.5, "(e:5.5)");
+ assertDotProduct(55, "(a:1,b:2,c:3,d:4,e:5)");
+ assertDotProduct(20, "(b:10,b:15)");
+ // docId = 2
+ assertDotProduct(0, "()", 2);
+ assertDotProduct(0, "(a:1,b:2,c:3,d:4,e:5)", 2);
+ }
+ { // string attribute
+ assertDotProduct(0, "(f:5,g:5)", 1, "wsextstr");
+ assertDotProduct(550, "(a:1,b:2,c:3,d:4,e:5)", 1, "wsextstr");
+ }
+ { // integer attribute
+ assertDotProduct(0, "()", 1, "wsint");
+ assertDotProduct(0, "(6:5,7:5)", 1, "wsint");
+ assertDotProduct(55, "(1:1,2:2,3:3,4:4,5:5)", 1, "wsint");
+ }
+ std::vector<const char *> attributes = {"arrint", "arrfloat", "arrint_fast", "arrfloat_fast"};
+ for (const char * name : attributes) {
+ assertDotProduct(0, "()", 1, name);
+ assertDotProduct(0, "(6:5,7:5)", 1, name);
+ assertDotProduct(55, "(0:1,1:2,2:3,3:4,4:5)", 1, name);
+ assertDotProduct(55, "[1 2 3 4 5]", 1, name);
+ assertDotProduct(41, "{3:4,4:5}", 1, name);
+ }
+ { // float array attribute
+ assertDotProduct(55, "[1.0 2.0 3.0 4.0 5.0]", 1, "arrfloat");
+ assertDotProduct(41, "{3:4,4:5.0}", 1, "arrfloat");
+ }
+ { // Sparse float array attribute.
+ assertDotProduct(17, "(0:1,3:4,50:97)", 1, "arrfloat");
+ }
+
+ assertDotProduct(0, "(0:1,3:4,50:97)", 1, "sint"); // attribute of the wrong type
+ assertDotProduct(17, "(0:1,3:4,50:97)", 1, "sint", "arrfloat"); // attribute override
+ assertDotProduct(0, "(0:1,3:4,50:97)", 1, "sint", "arrfloat_non_existing"); // incorrect attribute override
+ }
+ { // Test that correct executor is created
+ FtFeatureTest ft(_factory, "value(0)");
+ setupForDotProductTest(ft);
+ ft.getQueryEnv().getProperties().add("dotProduct.vector", "(a:1)");
+ ParameterList params;
+ params.push_back(Parameter(ParameterType::ATTRIBUTE, "wsstr"));
+ params.push_back(Parameter(ParameterType::STRING, "vector"));
+ DotProductBlueprint bp;
+ DummyDependencyHandler deps(bp);
+ EXPECT_TRUE(bp.setup(ft.getIndexEnv(), params));
+ FeatureExecutor::LP exc = bp.createExecutor(ft.getQueryEnv());
+ // check that we have the optimized enum version
+ dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent> * myExc =
+ dynamic_cast<dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent> *>(exc.get());
+ EXPECT_TRUE(myExc != nullptr);
+ EXPECT_EQUAL(1u, deps.output.size());
+ }
+}
+
+void
+Test::assertDotProduct(feature_t exp, const vespalib::string & vector, uint32_t docId,
+ const vespalib::string & attribute, const vespalib::string & attributeOverride)
+{
+ RankResult rr;
+ rr.addScore("dotProduct(" + attribute + ",vector)", exp);
+ FtFeatureTest ft(_factory, rr.getKeys());
+ setupForDotProductTest(ft);
+ ft.getQueryEnv().getProperties().add("dotProduct.vector", vector);
+ if ( ! attributeOverride.empty() ) {
+ ft.getQueryEnv().getProperties().add("dotProduct." + attribute + ".override.name", attributeOverride);
+ }
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(rr, docId));
+}
+
+void
+Test::setupForDotProductTest(FtFeatureTest & ft)
+{
+ struct Config {
+ const char * name;
+ AVBT dataType;
+ AVCT collectionType;
+ bool fastSearch;
+ };
+ std::vector<Config> cfgList = { {"wsint", AVBT::INT32, AVCT::WSET, false},
+ {"arrint", AVBT::INT32, AVCT::ARRAY, false},
+ {"arrfloat", AVBT::FLOAT, AVCT::ARRAY, false},
+ {"arrint_fast", AVBT::INT32, AVCT::ARRAY, true},
+ {"arrfloat_fast", AVBT::FLOAT, AVCT::ARRAY, true}
+ };
+ AttributePtr a = AttributeFactory::createAttribute("wsstr", AVC(AVBT::STRING, AVCT::WSET));
+ AttributePtr c = AttributeFactory::createAttribute("sint", AVC(AVBT::INT32, AVCT::SINGLE));
+ AttributePtr d(new search::WeightedSetStringExtAttribute("wsextstr"));
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsstr");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsextstr");
+ for (const Config & cfg : cfgList) {
+ AttributePtr baf = AttributeFactory::createAttribute(cfg.name, AVC(cfg.dataType,
+ cfg.collectionType,
+ cfg.fastSearch));
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE,
+ cfg.collectionType==AVCT::ARRAY
+ ? CollectionType::ARRAY
+ : CollectionType::WEIGHTEDSET,
+ cfg.name);
+ baf->addReservedDoc();
+ baf->addDocs(2);
+ ft.getIndexEnv().getAttributeManager().add(baf);
+ for (size_t i(1); i < 6; i++) {
+ IntegerAttribute * ia = dynamic_cast<IntegerAttribute *>(baf.get());
+ if (ia) {
+ ia->append(1, i, i);
+ } else {
+ FloatingPointAttribute * fa = dynamic_cast<FloatingPointAttribute *>(baf.get());
+ fa->append(1, i, i);
+ }
+ }
+ baf->commit();
+ }
+
+ a->addReservedDoc();
+ c->addReservedDoc();
+ a->addDocs(2);
+ c->addDocs(2);
+ ft.getIndexEnv().getAttributeManager().add(a);
+ ft.getIndexEnv().getAttributeManager().add(c);
+ ft.getIndexEnv().getAttributeManager().add(d);
+
+ StringAttribute * sa = static_cast<StringAttribute *>(a.get());
+ sa->append(1, "a", 1);
+ sa->append(1, "b", 2);
+ sa->append(1, "c", 3);
+ sa->append(1, "d", 4);
+ sa->append(1, "e", 5);
+
+ WeightedSetStringExtAttribute * ea = static_cast<WeightedSetStringExtAttribute *>(d.get());
+ EXPECT_TRUE(!ea->hasEnum());
+ uint32_t docId;
+ ea->addDoc(docId); // reserved doc
+ ea->addDoc(docId);
+ ea->add("a", 10);
+ ea->add("b", 20);
+ ea->add("c", 30);
+ ea->add("d", 40);
+ ea->add("e", 50);
+ ea->addDoc(docId);
+
+ a->commit();
+ c->commit();
+}
+
+void
+Test::testNow()
+{
+ {
+ // Test blueprint.
+ NowBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "now"));
+
+ StringList params, in, out;
+ FT_SETUP_OK (pt, params, in, out.add("out"));
+ FT_SETUP_FAIL(pt, params.add("foo"));
+
+ FT_DUMP(_factory, "now", StringList().add("now"));
+ }
+
+ {
+ // Test executor.
+ FtFeatureTest ft(_factory, "now");
+ ASSERT_TRUE(ft.setup());
+
+ RankResult res;
+ res.addScore("now", 0.0f);
+ for (uint32_t i = 1; i <= 10; ++i) {
+ feature_t last = res.getScore("now");
+ res.clear();
+ ASSERT_TRUE(ft.executeOnly(res, i));
+ ASSERT_TRUE(last <= res.getScore("now"));
+ }
+ }
+
+ {
+ // Test executor with ms resolution
+ FtFeatureTest ft(_factory, "now");
+ ft.getQueryEnv().getProperties().add("vespa.now", "15000000000");
+ ASSERT_TRUE(ft.setup());
+
+ RankResult res;
+ ASSERT_TRUE(ft.executeOnly(res, 0));
+ feature_t now = 15000000000;
+ ASSERT_EQUAL(now, res.getScore("now"));
+ }
+}
+
+
+void
+Test::testMatch()
+{
+ { // Test blueprint.
+ MatchBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "match"));
+
+ FtFeatureTest ft(_factory, "");
+ setupForAttributeTest(ft);
+
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "bar");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint");
+
+ FtIndexEnvironment idx_env;
+ idx_env.getBuilder()
+ .addField(FieldType::INDEX, CollectionType::SINGLE, "foo")
+ .addField(FieldType::INDEX, CollectionType::ARRAY, "bar")
+ .addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint");
+
+ StringList params, in, out;
+ FT_SETUP_OK(pt, params, in, out.add("score").add("totalWeight"));
+ FT_SETUP_OK(pt, idx_env, params, in
+ .add("fieldMatch(foo)")
+ .add("elementCompleteness(bar)")
+ .add("elementCompleteness(baz)")
+ .add("attributeMatch(sint)")
+ .add("attributeMatch(aint)")
+ .add("attributeMatch(wsint)"), out
+ .add("weight.foo")
+ .add("weight.bar")
+ .add("weight.baz")
+ .add("weight.sint")
+ .add("weight.aint")
+ .add("weight.wsint"));
+ FT_SETUP_FAIL(pt, idx_env, params.add("1")); // expects 0 parameters
+
+ FT_DUMP_EMPTY(_factory, "match");
+ }
+
+ { // Test executor
+ FtFeatureTest ft(_factory, "match");
+
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "bar");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint");
+
+ ft.getIndexEnv().getProperties().add("vespa.fieldweight.foo", "100"); // assign weight to all fields, simulate sd behaviour
+ ft.getIndexEnv().getProperties().add("vespa.fieldweight.bar", "200");
+ ft.getIndexEnv().getProperties().add("vespa.fieldweight.sint", "300");
+ ft.getIndexEnv().getProperties().add("vespa.fieldweight.aint", "400");
+
+ // search in field 'foo'
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // term id 0
+
+ // search in field 'sint'
+ ft.getQueryEnv().getBuilder().addAttributeNode("sint"); // term id 1
+ setupForAttributeTest(ft, false);
+
+ ASSERT_TRUE(ft.setup());
+
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+
+ // add hit for field 'foo' for search term 0
+ ASSERT_TRUE(mdb->setFieldLength("foo", 1));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 0));
+ ASSERT_TRUE(mdb->setWeight("sint", 1, 0));
+ ASSERT_TRUE(mdb->apply(1));
+
+ RankResult rr = toRankResult("match", "score:1 totalWeight:400 weight.foo:100 weight.bar:200 weight.baz:100 weight.sint:300 weight.aint:400 weight.wsint:100");
+ rr.setEpsilon(1e-4); // same as java tests
+ ASSERT_TRUE(ft.execute(rr));
+ }
+
+ { // Test executor
+ FtFeatureTest ft(_factory, "match");
+
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ // search in field 'foo'
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // term id 0
+ ASSERT_TRUE(ft.setup());
+
+ // must create this so that term match data is configured with the term data object
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+
+ // no hits on docId 1
+ RankResult rr = toRankResult("match", "score:0 totalWeight:0 weight.foo:100");
+ ASSERT_TRUE(ft.execute(rr, 1));
+ }
+}
+
+void
+Test::testMatches()
+{
+ { // Test blueprint.
+ MatchesBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "matches"));
+
+ FtFeatureTest ft(_factory, "");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar");
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // expects 1-2 parameters
+ FT_SETUP_FAIL(pt, ft.getIndexEnv(), params.add("baz")); // cannot find the field
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("foo"), in, out.add("out"));
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("1"), in, out);
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("bar"), in, out);
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("1"), in, out);
+
+ FT_DUMP_EMPTY(_factory, "matches");
+ }
+ { // Test executor for index fields
+ EXPECT_TRUE(assertMatches(0, "x", "a"));
+ EXPECT_TRUE(assertMatches(1, "a", "a"));
+ EXPECT_TRUE(assertMatches(1, "a b", "a b"));
+ // change docId to indicate no matches in the field
+ EXPECT_TRUE(assertMatches(0, "a", "a", "matches(foo)", 2));
+ // specify termIdx as second parameter
+ EXPECT_TRUE(assertMatches(0, "x", "a", "matches(foo,0)"));
+ EXPECT_TRUE(assertMatches(1, "a", "a", "matches(foo,0)"));
+ EXPECT_TRUE(assertMatches(0, "a", "a", "matches(foo,1)"));
+ EXPECT_TRUE(assertMatches(0, "x b", "a b", "matches(foo,0)"));
+ EXPECT_TRUE(assertMatches(1, "x b", "a b", "matches(foo,1)"));
+ }
+ { // Test executor for attribute fields
+ FtFeatureTest ft(_factory, StringList().add("matches(foo)").
+ add("matches(baz)").
+ add("matches(foo,0)").
+ add("matches(foo,1)").
+ add("matches(foo,2)").
+ add("matches(foo,3)"));
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "baz");
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("foo") != NULL); // query term 0, hit in foo
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("bar") != NULL); // query term 1, hit in bar
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("foo") != NULL); // query term 2, hit in foo
+ ASSERT_TRUE(ft.setup());
+
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setWeight("foo", 0, 0);
+ mdb->setWeight("bar", 1, 0);
+ mdb->apply(1);
+ EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo)", 1)));
+ EXPECT_TRUE(ft.execute(RankResult().addScore("matches(baz)", 0)));
+ EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,0)", 1)));
+ EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,1)", 0)));
+ EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,2)", 0)));
+ EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,3)", 0)));
+ }
+}
+
+bool
+Test::assertMatches(uint32_t output,
+ const vespalib::string & query,
+ const vespalib::string & field,
+ const vespalib::string & feature,
+ uint32_t docId)
+{
+ LOG(info, "assertMatches(%u, '%s', '%s', '%s')", output, query.c_str(), field.c_str(), feature.c_str());
+
+ // Setup feature test.
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ std::map<vespalib::string, std::vector<vespalib::string> > index;
+ index["foo"] = FtUtil::tokenize(field);
+ FT_SETUP(ft, FtUtil::toQuery(query), index, 1);
+
+ ASSERT_TRUE(ft.execute(output, EPS, docId));
+ // Execute and compare results.
+ if (!EXPECT_TRUE(ft.execute(output, EPS, docId))) return false;
+ return true;
+}
+
+
+void
+Test::testQuery()
+{
+ { // Test blueprint.
+ QueryBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "query"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params);
+ FT_SETUP_OK(pt, params.add("foo"), in, out.add("out"));
+
+ FT_DUMP_EMPTY(_factory, "query");
+ }
+
+ { // Test executor.
+ RankResult exp;
+ exp.addScore("query(def1)", 1.0).
+ addScore("query(def2)", 2.0).
+ addScore("query(def3)", 0.0).
+ addScore("query(val1)", 1.1).
+ addScore("query(val2)", 2.2).
+ addScore("query(hash1)", vespalib::hash_code("foo")).
+ addScore("query(hash2)", vespalib::hash_code("2")).
+ addScore("query(hash3)", vespalib::hash_code("foo")).
+ addScore("query(hash4)", vespalib::hash_code("'foo"));
+ FtFeatureTest ft(_factory, exp.getKeys());
+ ft.getIndexEnv().getProperties()
+ .add("query(def1)", "1.0")
+ .add("$def2", "2.0");
+ ft.getQueryEnv().getProperties()
+ .add("val1", "1.1")
+ .add("$val2", "2.2")
+ .add("hash1", "foo")
+ .add("hash2", "'2")
+ .add("hash3", "'foo")
+ .add("hash4", "''foo");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(exp));
+ }
+}
+
+void
+Test::testQueryTermCount()
+{
+ { // Test blueprint.
+ QueryTermCountBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "queryTermCount"));
+
+ StringList params, in, out;
+ FT_SETUP_OK(pt, params, in, out.add("out"));
+ FT_SETUP_FAIL(pt, params.add("foo"));
+
+ StringList dump;
+ FT_DUMP(_factory, "queryTermCount", dump.add("queryTermCount"));
+ }
+
+ { // Test executor.
+ FtFeatureTest ft(_factory, "queryTermCount");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore("queryTermCount", 0)));
+ }
+
+ { // Test executor.
+ FtFeatureTest ft(_factory, "queryTermCount");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore("queryTermCount", 1)));
+ }
+
+ { // Test executor.
+ FtFeatureTest ft(_factory, "queryTermCount");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore("queryTermCount", 2)));
+ }
+}
+
+void
+Test::testRandom()
+{
+ { // Test blueprint.
+ RandomBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "random"));
+
+ StringList params, in, out;
+ FT_SETUP_OK (pt, params, in, out.add("out").add("match"));
+ FT_SETUP_OK (pt, params.add("1"), in, out);
+ FT_SETUP_FAIL(pt, params.add("2"));
+
+ FT_DUMP_EMPTY(_factory, "random");
+ }
+
+ { // Test executor (seed specified through config)
+ FtFeatureTest ft(_factory, "random");
+ ft.getIndexEnv().getProperties().add("random.seed", "100");
+ ASSERT_TRUE(ft.setup());
+ search::Rand48 rnd;
+ rnd.srand48(100);
+ for (uint32_t i = 0; i < 5; ++i) {
+ feature_t exp = rnd.lrand48() / (feature_t)0x80000000u;
+ ASSERT_TRUE(ft.execute(exp, EPS, i + 1));
+ }
+ }
+ { // Test executor (current time used as seed)
+ FtFeatureTest ft(_factory, "random");
+ ASSERT_TRUE(ft.setup());
+ RankResult rr;
+ rr.addScore("random", 1.0f);
+ for (uint32_t i = 0; i < 5; ++i) {
+ feature_t last = rr.getScore("random");
+ rr.clear();
+ ASSERT_TRUE(ft.executeOnly(rr, i + 1));
+ ASSERT_TRUE(last != rr.getScore("random"));
+ }
+ }
+ { // Test executor (random.match)
+ FtFeatureTest ft(_factory, "random.match");
+ ft.getQueryEnv().getProperties().add("random.match.seed", "100");
+ ASSERT_TRUE(ft.setup());
+ search::Rand48 rnd;
+ for (uint32_t i = 1; i <= 5; ++i) {
+ rnd.srand48(100 + i); // seed + lid
+ feature_t exp = rnd.lrand48() / (feature_t)0x80000000u;
+ ASSERT_TRUE(ft.execute(exp, EPS, i));
+ }
+ }
+}
+
+
+void
+Test::testRankingExpression()
+{
+ { // Test blueprint.
+ RankingExpressionBlueprint prototype;
+
+ EXPECT_TRUE(assertCreateInstance(prototype, "rankingExpression"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(prototype, params); // requires config to run without params
+ FT_SETUP_OK (prototype, params.add("foo.out"), in.add("foo.out"), out.add("out"));
+ FT_SETUP_FAIL(prototype, params.add("bar.out"));
+ FT_SETUP_OK (prototype, params.clear().add("log((1 + 2)- 3 * 4 / 5 )"), in.clear(), out);
+ FT_SETUP_OK (prototype,
+ params.clear().add("if(if(f1.out<1,0,1)<if(f2.out<2,0,1),f3.out,3)"),
+ in.clear().add("f1.out").add("f2.out").add("f3.out"), out);
+
+ FT_DUMP_EMPTY(_factory, "rankingExpression");
+ }
+
+ { // Test executor.
+ {
+ FtFeatureTest ft(_factory, getExpression("if(1<2,3,4)"));
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(3.0f));
+ }
+ {
+ FtFeatureTest ft(_factory, getExpression("sqrt(100)"));
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(10.0f));
+ }
+ {
+ FtFeatureTest ft(_factory, getExpression("mysum(value(4),value(4))"));
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(8.0f));
+ }
+ {
+ FtFeatureTest ft(_factory, getExpression("if(mysum(value(4),value(4))>3+4,1,0)"));
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(1.0f));
+ }
+ {
+ FtFeatureTest ft(_factory, "rankingExpression");
+ ft.getIndexEnv().getProperties().add("rankingExpression.rankingScript", "if(1<2,3,4)");
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(3.0f));
+ }
+ {
+ FtFeatureTest ft(_factory, "rankingExpression(foo)");
+ ft.getIndexEnv().getProperties().add("rankingExpression(foo).rankingScript", "if(1<2,3,4)");
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(3.0f));
+ }
+ {
+ FtFeatureTest ft(_factory, "rankingExpression");
+ ft.getIndexEnv().getProperties()
+ .add("rankingExpression.rankingScript", "if(")
+ .add("rankingExpression.rankingScript", "1<")
+ .add("rankingExpression.rankingScript", "2,")
+ .add("rankingExpression.rankingScript", "3,")
+ .add("rankingExpression.rankingScript", "4)");
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(3.0f));
+ }
+ {
+ // test interpreted expression
+ vespalib::string my_expr("3.0 + value(4.0) + sum(tensorFromWeightedSet(query(my_tensor)))");
+ FtFeatureTest ft(_factory, getExpression(my_expr));
+ ft.getQueryEnv().getProperties().add("my_tensor", "{a:1,b:2,c:3}");
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(13.0));
+ }
+ }
+}
+
+vespalib::string
+Test::getExpression(const vespalib::string &parameter) const
+{
+ typedef search::fef::FeatureNameBuilder FNB;
+ return FNB().baseName("rankingExpression").parameter(parameter).buildName();
+}
+
+void
+Test::testTerm()
+{
+ {
+ // Test blueprint.
+ TermBlueprint pt;
+ {
+ EXPECT_TRUE(assertCreateInstance(pt, "term"));
+
+ StringList params, in, out;
+ FT_SETUP_OK (pt, params.add("0"), in, out.add("connectedness").add("significance").add("weight"));
+ FT_SETUP_FAIL(pt, params.add("1"));
+ }
+ {
+ StringList dump;
+ for (uint32_t term = 0; term < 3; ++term) {
+ vespalib::string bn = vespalib::make_string("term(%u)", term);
+ dump.add(bn + ".connectedness").add(bn + ".significance").add(bn + ".weight");
+ }
+ FtIndexEnvironment ie;
+ ie.getProperties().add("term.numTerms", "3");
+ FT_DUMP(_factory, "term", ie, dump); // check override
+
+ for (uint32_t term = 3; term < 5; ++term) {
+ vespalib::string bn = vespalib::make_string("term(%u)", term);
+ dump.add(bn + ".connectedness").add(bn + ".significance").add(bn + ".weight");
+ }
+ FT_DUMP(_factory, "term", dump); // check default
+ }
+ }
+
+ {
+ // Test executor.
+ FtFeatureTest ft(_factory, "term(0)");
+ ASSERT_TRUE(ft.setup());
+
+ RankResult exp;
+ exp .addScore("term(0).connectedness", 0)
+ .addScore("term(0).significance", 0)
+ .addScore("term(0).weight", 0);
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ {
+ // Test executor.
+ FtFeatureTest ft(_factory, StringList().add("term(1)").add("term(2)"));
+ ft.getIndexEnv().getBuilder()
+ .addField(FieldType::INDEX, CollectionType::SINGLE, "idx1") // field 0
+ .addField(FieldType::INDEX, CollectionType::SINGLE, "idx2") // field 1
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "attr"); // field 2
+ ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(0);
+ ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(1).setWeight(search::query::Weight(200)).lookupField(0)->setDocFreq(0.5);
+ ft.getQueryEnv().getBuilder().addAttributeNode("attr")->setUniqueId(2).setWeight(search::query::Weight(400)).lookupField(2)->setDocFreq(0.25);
+ // setup connectedness between term 1 and term 0
+ ft.getQueryEnv().getProperties().add("vespa.term.1.connexity", "0");
+ ft.getQueryEnv().getProperties().add("vespa.term.1.connexity", "0.7");
+ ASSERT_TRUE(ft.setup());
+
+ RankResult exp;
+ exp.addScore("term(1).significance", util::getSignificance(0.50)).
+ addScore("term(1).weight", 200.0f).
+ addScore("term(1).connectedness", 0.7f).
+ addScore("term(2).significance", util::getSignificance(0.25)).
+ addScore("term(2).weight", 400.0f).
+ addScore("term(2).connectedness", 0.1f). // default connectedness
+ setEpsilon(10e-6);
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ {
+ // Test executor.
+ FtFeatureTest ft(_factory, "term(0)");
+ ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(0);
+ // setup significance for term 0
+ ft.getQueryEnv().getProperties().add("vespa.term.0.significance", "0.3");
+ ASSERT_TRUE(ft.setup());
+
+ ASSERT_TRUE(ft.execute(RankResult().addScore("term(0).significance", 0.3f).setEpsilon(10e-6)));
+ }
+}
+
+void
+Test::testTermDistance()
+{
+ { // test blueprint
+ TermDistanceBlueprint pt;
+ {
+ EXPECT_TRUE(assertCreateInstance(pt, "termDistance"));
+
+ StringList params, in, out;
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar");
+ FT_SETUP_FAIL(pt, params);
+ FT_SETUP_FAIL(pt, ie, params.add("baz").add("0").add("0"));
+ FT_SETUP_FAIL(pt, ie, params.clear().add("bar").add("0").add("0"));
+
+ FT_SETUP_OK(pt, ie, params.clear().add("foo").add("0").add("0"),
+ in, out.add("forward").add("forwardTermPosition")
+ .add("reverse").add("reverseTermPosition"));
+ }
+ {
+ FT_DUMP_EMPTY(_factory, "termDistance");
+ }
+ }
+
+ { // test executor
+ typedef TermDistanceCalculator::Result Result;
+ const uint32_t UV = TermDistanceCalculator::UNDEFINED_VALUE;
+
+ EXPECT_TRUE(assertTermDistance(Result(), "a b", "x x"));
+ EXPECT_TRUE(assertTermDistance(Result(), "a b", "a x"));
+ EXPECT_TRUE(assertTermDistance(Result(), "a b", "x b"));
+ EXPECT_TRUE(assertTermDistance(Result(), "a", "a b"));
+ EXPECT_TRUE(assertTermDistance(Result(), "a", "a a"));
+ EXPECT_TRUE(assertTermDistance(Result(1,0,UV,UV), "a b", "a b"));
+ EXPECT_TRUE(assertTermDistance(Result(2,0,UV,UV), "a b", "a x b"));
+ EXPECT_TRUE(assertTermDistance(Result(UV,UV,1,0), "a b", "b a"));
+ EXPECT_TRUE(assertTermDistance(Result(UV,UV,2,0), "a b", "b x a"));
+ EXPECT_TRUE(assertTermDistance(Result(2,18,1,20), "a b", "a x x x x x b x x x x a x x x b x x a x b a"));
+ EXPECT_TRUE(assertTermDistance(Result(1,0,2,1), "a b", "a b x a x x b x x x a x x x x b x x x x x a"));
+ EXPECT_TRUE(assertTermDistance(Result(1,0,1,1), "a b", "a b a b a")); // first best is kept
+ EXPECT_TRUE(assertTermDistance(Result(1,0,1,0), "a a", "a a"));
+ EXPECT_TRUE(assertTermDistance(Result(2,0,2,0), "a a", "a x a"));
+ }
+}
+
+bool
+Test::assertTermDistance(const TermDistanceCalculator::Result & exp,
+ const vespalib::string & query,
+ const vespalib::string & field,
+ uint32_t docId)
+{
+ LOG(info, "assertTermDistance('%s', '%s')", query.c_str(), field.c_str());
+
+ vespalib::string feature = "termDistance(foo,0,1)";
+ FtFeatureTest ft(_factory, feature);
+
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ StringVectorMap index;
+ index["foo"] = FtUtil::tokenize(field);
+ FT_SETUP(ft, FtUtil::toQuery(query), index, 1);
+
+ RankResult rr;
+ rr.addScore(feature + ".forward", exp.forwardDist);
+ rr.addScore(feature + ".forwardTermPosition", exp.forwardTermPos);
+ rr.addScore(feature + ".reverse", exp.reverseDist);
+ rr.addScore(feature + ".reverseTermPosition", exp.reverseTermPos);
+ if (!EXPECT_TRUE(ft.execute(rr, docId))) {
+ return false;
+ }
+ return true;
+}
+
+void
+Test::testUtils()
+{
+ { // getSignificance
+ EXPECT_APPROX(util::getSignificance(0.0), 1, EPS);
+ EXPECT_APPROX(util::getSignificance(0.0 + 1.0e-7), 1, EPS);
+ EXPECT_APPROX(util::getSignificance(1.0), 0.5, EPS);
+ EXPECT_APPROX(util::getSignificance(1.0 + 1.0e-7), 0.5, EPS);
+ feature_t last = 1;
+ for (uint32_t i = 2; i <= 100; i = i + 1) {
+ feature_t s = util::getSignificance(i * 1.0e-6);
+ EXPECT_GREATER(s, 0);
+ EXPECT_LESS(s, 1);
+ EXPECT_LESS(s, last);
+ last = s;
+ }
+ for (uint32_t i = 999900; i <= 1000000; i = i + 1) {
+ feature_t s = util::getSignificance(i * 1.0e-6);
+ EXPECT_GREATER(s, 0);
+ EXPECT_LESS(s, 1);
+ EXPECT_LESS(s, last);
+ last = s;
+ }
+ }
+}
+
diff --git a/searchlib/src/tests/features/prod_features.h b/searchlib/src/tests/features/prod_features.h
new file mode 100644
index 00000000000..dd15981af1f
--- /dev/null
+++ b/searchlib/src/tests/features/prod_features.h
@@ -0,0 +1,175 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/features/distancetopathfeature.h>
+#include <vespa/searchlib/features/termdistancefeature.h>
+#include <vespa/searchlib/fef/test/ftlib.h>
+
+class Test : public FtTestApp
+{
+public:
+ int Main();
+ void testFramework();
+ void testFtLib();
+ void testAge();
+ void testAttribute();
+ void testAttributeMatch();
+ void testCloseness();
+ void testDistance();
+ void testDistanceToPath();
+ void testDotProduct();
+ void testFieldLength();
+ void testFieldMatch();
+ void testFieldTermMatch();
+ void testFirstPhase();
+ void testForeach();
+ void testFreshness();
+ void testMatch();
+ void testMatches();
+ void testNow();
+ void testQuery();
+ void testQueryTermCount();
+ void testRandom();
+ void testRankingExpression();
+ void testTerm();
+ void testTermDistance();
+ void testUtils();
+
+private:
+ void
+ testFieldMatchBluePrint();
+
+ void
+ testFieldMatchExecutor();
+
+ void
+ testFieldMatchExecutorOutOfOrder();
+
+ void
+ testFieldMatchExecutorSegments();
+
+ void
+ testFieldMatchExecutorGaps();
+
+ void
+ testFieldMatchExecutorHead();
+
+ void
+ testFieldMatchExecutorTail();
+
+ void
+ testFieldMatchExecutorLongestSequence();
+
+ void
+ testFieldMatchExecutorMatches();
+
+ void
+ testFieldMatchExecutorCompleteness();
+
+ void
+ testFieldMatchExecutorOrderness();
+
+ void
+ testFieldMatchExecutorRelatedness();
+
+ void
+ testFieldMatchExecutorLongestSequenceRatio();
+
+ void
+ testFieldMatchExecutorEarliness();
+
+ void
+ testFieldMatchExecutorWeight();
+
+ void
+ testFieldMatchExecutorSignificance();
+
+ void
+ testFieldMatchExecutorImportance();
+
+ void
+ testFieldMatchExecutorOccurrence();
+
+ void
+ testFieldMatchExecutorAbsoluteOccurrence();
+
+ void
+ testFieldMatchExecutorWeightedOccurrence();
+
+ void
+ testFieldMatchExecutorWeightedAbsoluteOccurrence();
+
+ void
+ testFieldMatchExecutorSignificantOccurrence();
+
+ void
+ testFieldMatchExecutorUnweightedProximity();
+
+ void
+ testFieldMatchExecutorReverseProximity();
+
+ void
+ testFieldMatchExecutorAbsoluteProximity();
+
+ void
+ testFieldMatchExecutorMultiSegmentProximity();
+
+ void
+ testFieldMatchExecutorSegmentDistance();
+
+ void
+ testFieldMatchExecutorSegmentProximity();
+
+ void
+ testFieldMatchExecutorSegmentStarts();
+
+ void
+ testFieldMatchExecutorMoreThanASegmentLengthOfUnmatchedQuery();
+
+ void
+ testFieldMatchExecutorQueryRepeats();
+
+ void
+ testFieldMatchExecutorZeroCases();
+
+ void
+ testFieldMatchExecutorExceedingIterationLimit();
+
+ void
+ testFieldMatchExecutorRemaining();
+
+
+ void assertAge(feature_t expAge, const vespalib::string & attr, uint64_t now, uint64_t docTime);
+ void setupForAgeTest(FtFeatureTest & ft, uint64_t docTime);
+ void setupForAttributeTest(FtFeatureTest &ft, bool setup_env = true);
+ void assertCloseness(feature_t exp, const vespalib::string & attr, double distance, double maxDistance = 0, double halfResponse = 0);
+ void setupForDistanceTest(FtFeatureTest & ft, const vespalib::string & attrName,
+ const std::vector<std::pair<int32_t, int32_t> > & positions, bool zcurve);
+ void assert2DZDistance(feature_t exp, const vespalib::string & positions,
+ int32_t xquery, int32_t yquery, uint32_t xAspect = 0);
+ void assertDistanceToPath(const std::vector<std::pair<int32_t, int32_t> > pos, const vespalib::string &path,
+ feature_t distance = search::features::DistanceToPathExecutor::DEFAULT_DISTANCE,
+ feature_t traveled = 1, feature_t product = 0);
+ void setupForDocumentTest(FtFeatureTest &ft, const vespalib::string & attrName, const vespalib::string & docType);
+ void assertDotProduct(feature_t exp, const vespalib::string & vector, uint32_t docId = 1,
+ const vespalib::string & attribute = "wsstr", const vespalib::string & attributeOverride="");
+ void setupForDotProductTest(FtFeatureTest & ft);
+ void assertFieldMatch(const vespalib::string & spec, const vespalib::string & query, const vespalib::string & field,
+ const search::features::fieldmatch::Params * params = NULL, uint32_t totalTermWeight = 0, feature_t totalSignificance = 0.0f);
+ void assertFieldMatch(const vespalib::string & spec, const vespalib::string & query, const vespalib::string & field,
+ uint32_t totalTermWeight);
+ void assertFieldMatchTS(const vespalib::string & spec, const vespalib::string & query, const vespalib::string & field,
+ feature_t totalSignificance);
+ vespalib::string getExpression(const vespalib::string &parameter) const;
+ void assertForeachOperation(feature_t exp, const vespalib::string & cond, const vespalib::string & op);
+ void assertFreshness(feature_t expFreshness, const vespalib::string & attr, uint32_t age, uint32_t maxAge = 0, double halfResponse = 0, bool logScale = false);
+ bool assertTermDistance(const search::features::TermDistanceCalculator::Result & exp, const vespalib::string & query,
+ const vespalib::string & field, uint32_t docId = 1);
+ bool assertMatches(uint32_t output, const vespalib::string & query, const vespalib::string & field,
+ const vespalib::string & feature = "matches(foo)", uint32_t docId = 1);
+
+private:
+ search::fef::BlueprintFactory _factory;
+};
+
diff --git a/searchlib/src/tests/features/prod_features_attributematch.cpp b/searchlib/src/tests/features/prod_features_attributematch.cpp
new file mode 100644
index 00000000000..06b2b859709
--- /dev/null
+++ b/searchlib/src/tests/features/prod_features_attributematch.cpp
@@ -0,0 +1,300 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".prod_features_attributematch");
+
+#include "prod_features.h"
+#include <vespa/searchlib/features/attributematchfeature.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+
+using namespace search::features;
+using namespace search::fef;
+using namespace search::fef::test;
+
+using search::AttributeVector;
+using search::AttributeFactory;
+
+typedef AttributeVector::SP AttributePtr;
+
+typedef search::attribute::Config AVC;
+typedef search::attribute::BasicType AVBT;
+typedef search::attribute::CollectionType AVCT;
+
+
+void
+Test::testAttributeMatch()
+{
+ AttributeMatchBlueprint pt;
+ {
+ EXPECT_TRUE(assertCreateInstance(pt, "attributeMatch"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params); // expects 1 param
+ FT_SETUP_FAIL(pt, params.add("foo")); // field must exists
+
+ FtIndexEnvironment idx_env;
+ idx_env.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ FT_SETUP_FAIL(pt, idx_env, params); // field must be an attribute
+ idx_env.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint");
+
+ FT_SETUP_OK(pt, idx_env, params.clear().add("sint"), in, out
+ .add("completeness")
+ .add("queryCompleteness")
+ .add("fieldCompleteness")
+ .add("normalizedWeight")
+ .add("normalizedWeightedWeight")
+ .add("weight")
+ .add("significance")
+ .add("importance")
+ .add("matches")
+ .add("totalWeight")
+ .add("averageWeight"));
+
+ FT_DUMP_EMPTY(_factory, "attributeMatch");
+
+ FT_DUMP(_factory, "attributeMatch", idx_env, out.clear()
+ .add("attributeMatch(sint)")
+ .add("attributeMatch(sint).completeness")
+ .add("attributeMatch(sint).queryCompleteness")
+ .add("attributeMatch(sint).fieldCompleteness")
+ .add("attributeMatch(sint).normalizedWeight")
+ .add("attributeMatch(sint).normalizedWeightedWeight")
+ .add("attributeMatch(sint).weight")
+ .add("attributeMatch(sint).significance")
+ .add("attributeMatch(sint).importance")
+ .add("attributeMatch(sint).matches")
+ .add("attributeMatch(sint).totalWeight")
+ .add("attributeMatch(sint).averageWeight"));
+ }
+
+ { // single attributes
+ FtFeatureTest ft(_factory, StringList().
+ add("attributeMatch(sint)").add("attributeMatch(sfloat)").add("attributeMatch(sstr)"));
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint"); // 2 matches
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sfloat"); // 1 matches
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sstr"); // 0 matches
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ setupForAttributeTest(ft);
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("sint") != NULL); // query term 0, hit in sint
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("sint") != NULL); // query term 1, ..
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("sint") != NULL); // query term 2, ..
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("sint") != NULL); // query term 3, ..
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("sfloat") != NULL); // query term 4, hit in sfloat
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")) != NULL);
+ ft.getQueryEnv().getTerms()[0].setWeight(search::query::Weight(20));
+ ft.getQueryEnv().getTerms()[0].setUniqueId(0);
+ ft.getQueryEnv().getTerms()[1].setWeight(search::query::Weight(20));
+ ft.getQueryEnv().getTerms()[1].setUniqueId(1);
+ ft.getQueryEnv().getTerms()[2].setWeight(search::query::Weight(10));
+ ft.getQueryEnv().getTerms()[2].setUniqueId(1);
+ ft.getQueryEnv().getTerms()[3].setWeight(search::query::Weight(10));
+ ft.getQueryEnv().getTerms()[3].setUniqueId(1);
+ ft.getQueryEnv().getTerms()[4].setWeight(search::query::Weight(20));
+ ft.getQueryEnv().getTerms()[4].setUniqueId(1);
+ ft.getQueryEnv().getTerms()[5].setWeight(search::query::Weight(20));
+ ft.getQueryEnv().getTerms()[5].setUniqueId(1);
+ ft.getQueryEnv().getProperties().add("vespa.term.0.significance", "0.5"); // change significance for term 0
+ ft.getQueryEnv().getProperties().add("vespa.term.1.significance", "0.1"); // change significance for all other terms
+ ASSERT_TRUE(ft.setup());
+
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setWeight("sint", 0, 0);
+ mdb->setWeight("sint", 1, 0);
+ mdb->setWeight("sfloat", 4, 0);
+ mdb->apply(1);
+ RankResult exp;
+ exp.addScore("attributeMatch(sint)", 0.5f). // same as completeness
+ addScore("attributeMatch(sint).matches", 2).
+ addScore("attributeMatch(sint).totalWeight", 0).
+ addScore("attributeMatch(sint).averageWeight", 0).
+ addScore("attributeMatch(sint).completeness", 0.5f).
+ addScore("attributeMatch(sint).queryCompleteness", 0.5f).
+ addScore("attributeMatch(sint).fieldCompleteness", 1).
+ addScore("attributeMatch(sint).normalizedWeight", 0).
+ addScore("attributeMatch(sint).normalizedWeightedWeight", 0).
+ addScore("attributeMatch(sint).weight", 0.4).
+ addScore("attributeMatch(sint).significance", 0.6).
+ addScore("attributeMatch(sint).importance", 0.5).
+ addScore("attributeMatch(sfloat)", 1). // same as completeness
+ addScore("attributeMatch(sfloat).matches", 1).
+ addScore("attributeMatch(sfloat).totalWeight", 0).
+ addScore("attributeMatch(sfloat).averageWeight", 0).
+ addScore("attributeMatch(sfloat).completeness", 1).
+ addScore("attributeMatch(sfloat).queryCompleteness", 1).
+ addScore("attributeMatch(sfloat).fieldCompleteness", 1).
+ addScore("attributeMatch(sfloat).normalizedWeight", 0).
+ addScore("attributeMatch(sfloat).normalizedWeightedWeight", 0).
+ addScore("attributeMatch(sfloat).weight", 0.2).
+ addScore("attributeMatch(sfloat).significance", 0.1).
+ addScore("attributeMatch(sfloat).importance", 0.15).
+ addScore("attributeMatch(sstr)", 0). // same as completeness
+ addScore("attributeMatch(sstr).matches", 0).
+ addScore("attributeMatch(sstr).totalWeight", 0).
+ addScore("attributeMatch(sstr).averageWeight", 0).
+ addScore("attributeMatch(sstr).completeness", 0).
+ addScore("attributeMatch(sstr).queryCompleteness", 0).
+ addScore("attributeMatch(sstr).fieldCompleteness", 0).
+ addScore("attributeMatch(sstr).normalizedWeight", 0).
+ addScore("attributeMatch(sstr).normalizedWeightedWeight", 0).
+ addScore("attributeMatch(sstr).weight", 0).
+ addScore("attributeMatch(sstr).significance", 0).
+ addScore("attributeMatch(sstr).importance", 0).
+ setEpsilon(10e-6);
+ ASSERT_TRUE(ft.execute(exp));
+ ASSERT_TRUE(ft.execute(exp));
+ }
+
+ { // array attributes
+
+ FtFeatureTest ft(_factory, StringList().add("attributeMatch(aint)"));
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint"); // 1 matches
+ ft.getIndexEnv().getProperties().add("attributeMatch(aint).fieldCompletenessImportance", "0.5");
+ setupForAttributeTest(ft);
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("aint") != NULL); // 0
+ ASSERT_TRUE(ft.setup());
+
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setWeight("aint", 0, 0);
+ mdb->apply(1);
+ RankResult exp;
+ exp.addScore("attributeMatch(aint)", 0.75f) // same as completeness
+ .addScore("attributeMatch(aint).matches", 1)
+ .addScore("attributeMatch(aint).totalWeight", 0)
+ .addScore("attributeMatch(aint).averageWeight", 0)
+ .addScore("attributeMatch(aint).completeness", 0.75f)
+ .addScore("attributeMatch(aint).queryCompleteness", 1)
+ .addScore("attributeMatch(aint).fieldCompleteness", 0.5f)
+ .addScore("attributeMatch(aint).normalizedWeight", 0)
+ .addScore("attributeMatch(aint).normalizedWeightedWeight", 0);
+ ASSERT_TRUE(ft.execute(exp));
+ ASSERT_TRUE(ft.execute(exp));
+ }
+
+ { // weighted set attributes
+ FtFeatureTest ft(_factory, StringList().
+ add("attributeMatch(wsint)").add("attributeMatch(wsfloat)").add("attributeMatch(wsstr)"));
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint"); // 2 matches
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsfloat"); // 1 matches
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsstr"); // 0 matches
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getProperties().add("attributeMatch(wsint).maxWeight", "100");
+ setupForAttributeTest(ft);
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("wsint") != NULL); // 0
+ ft.getQueryEnv().getTerms()[0].setWeight(search::query::Weight(2));
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("wsint") != NULL); // 1
+ ft.getQueryEnv().getTerms()[1].setWeight(search::query::Weight(3));
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("wsfloat") != NULL); // 2
+ ft.getQueryEnv().getTerms()[2].setWeight(search::query::Weight(0));
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")) != NULL);
+ ft.getQueryEnv().getTerms()[3].setWeight(search::query::Weight(0));
+ ASSERT_TRUE(ft.setup());
+
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setWeight("wsint", 0, 10);
+ mdb->setWeight("wsint", 1, 20);
+ mdb->setWeight("wsfloat", 2, -30);
+ mdb->apply(1);
+ RankResult exp;
+
+ // test all three attributes
+ exp.addScore("attributeMatch(wsint)", 1). // same as completeness
+ addScore("attributeMatch(wsint).matches", 2).
+ addScore("attributeMatch(wsint).totalWeight", 30).
+ addScore("attributeMatch(wsint).averageWeight", 15).
+ addScore("attributeMatch(wsint).completeness", 1).
+ addScore("attributeMatch(wsint).queryCompleteness", 1).
+ addScore("attributeMatch(wsint).fieldCompleteness", 1).
+ addScore("attributeMatch(wsint).normalizedWeight", 0.1f).
+ addScore("attributeMatch(wsint).normalizedWeightedWeight", 0.16f).
+ addScore("attributeMatch(wsfloat)", 0.95). // same as completeness
+ addScore("attributeMatch(wsfloat).matches", 1).
+ addScore("attributeMatch(wsfloat).totalWeight", -30).
+ addScore("attributeMatch(wsfloat).averageWeight", -30).
+ addScore("attributeMatch(wsfloat).completeness", 0.95).
+ addScore("attributeMatch(wsfloat).queryCompleteness", 1).
+ addScore("attributeMatch(wsfloat).fieldCompleteness", 0).
+ addScore("attributeMatch(wsfloat).normalizedWeight", 0).
+ addScore("attributeMatch(wsfloat).normalizedWeightedWeight", 0).
+ addScore("attributeMatch(wsstr)", 0). // same as completeness
+ addScore("attributeMatch(wsstr).matches", 0).
+ addScore("attributeMatch(wsstr).totalWeight", 0).
+ addScore("attributeMatch(wsstr).averageWeight", 0).
+ addScore("attributeMatch(wsstr).completeness", 0).
+ addScore("attributeMatch(wsstr).queryCompleteness", 0).
+ addScore("attributeMatch(wsstr).fieldCompleteness", 0).
+ addScore("attributeMatch(wsstr).normalizedWeight", 0).
+ addScore("attributeMatch(wsstr).normalizedWeightedWeight", 0).
+ setEpsilon(10e-6);
+ ASSERT_TRUE(ft.execute(exp));
+ ASSERT_TRUE(ft.execute(exp));
+
+ // test fieldCompleteness
+ mdb->setWeight("wsint", 0, 0);
+ mdb->setWeight("wsint", 1, 15);
+ mdb->apply(1);
+ exp.clear().
+ addScore("attributeMatch(wsint).fieldCompleteness", 0.5f);
+ ASSERT_TRUE(ft.execute(exp));
+
+ // test that normalized values lies in the interval [0,1].
+ mdb->setWeight("wsfloat", 2, 1000);
+ mdb->apply(1);
+ ft.getQueryEnv().getTerms()[2].setWeight(search::query::Weight(100));
+ exp.clear().
+ addScore("attributeMatch(wsfloat).normalizedWeight", 1).
+ addScore("attributeMatch(wsfloat).normalizedWeightedWeight", 1);
+ ASSERT_TRUE(ft.execute(exp));
+ }
+
+ { // unique only attribute
+ FtFeatureTest ft(_factory, "attributeMatch(unique)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "unique");
+ setupForAttributeTest(ft);
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("unique") != NULL);
+ ASSERT_TRUE(ft.setup());
+
+ RankResult exp;
+ exp.addScore("attributeMatch(unique)", 0). // same as completeness
+ addScore("attributeMatch(unique).matches", 0).
+ addScore("attributeMatch(unique).totalWeight", 0).
+ addScore("attributeMatch(unique).averageWeight", 0).
+ addScore("attributeMatch(unique).completeness", 0).
+ addScore("attributeMatch(unique).queryCompleteness", 0).
+ addScore("attributeMatch(unique).fieldCompleteness", 0).
+ addScore("attributeMatch(unique).normalizedWeight", 0).
+ addScore("attributeMatch(unique).normalizedWeightedWeight", 0);
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ {
+ FtFeatureTest ft(_factory, StringList().add("attributeMatch(aint)").add("attributeMatch(wint)"));
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wint");
+
+ // setup an array and wset attributes with 0 elements
+ AttributePtr aint = AttributeFactory::createAttribute("aint", AVC (AVBT::INT32, AVCT::ARRAY));
+ AttributePtr wint = AttributeFactory::createAttribute("wint", AVC(AVBT::INT32, AVCT::WSET));
+ aint->addReservedDoc();
+ wint->addReservedDoc();
+ ft.getIndexEnv().getAttributeManager().add(aint);
+ ft.getIndexEnv().getAttributeManager().add(wint);
+ aint->addDocs(1);
+ aint->commit();
+ ASSERT_TRUE(aint->getValueCount(0) == 0);
+ wint->addDocs(1);
+ wint->commit();
+ ASSERT_TRUE(wint->getValueCount(0) == 0);
+
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("aint") != NULL);
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("wint") != NULL);
+ ASSERT_TRUE(ft.setup());
+
+ RankResult exp;
+ exp.addScore("attributeMatch(aint)", 0). // same as completeness
+ addScore("attributeMatch(aint).completeness", 0).
+ addScore("attributeMatch(aint).fieldCompleteness", 0).
+ addScore("attributeMatch(wint)", 0). // same as completeness
+ addScore("attributeMatch(wint).completeness", 0).
+ addScore("attributeMatch(wint).fieldCompleteness", 0);
+ ASSERT_TRUE(ft.execute(exp));
+ }
+}
diff --git a/searchlib/src/tests/features/prod_features_fieldmatch.cpp b/searchlib/src/tests/features/prod_features_fieldmatch.cpp
new file mode 100644
index 00000000000..e26d6a92fa6
--- /dev/null
+++ b/searchlib/src/tests/features/prod_features_fieldmatch.cpp
@@ -0,0 +1,1079 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".prod_features_fieldmatch");
+
+#include <vespa/searchlib/fef/test/ftlib.h>
+
+#include "prod_features.h"
+
+#include <vespa/searchlib/features/fieldmatchfeature.h>
+
+using namespace search::features;
+using namespace search::fef;
+using namespace search::fef::test;
+
+using search::AttributeVector;
+
+void
+Test::testFieldMatch()
+{
+ testFieldMatchBluePrint();
+ testFieldMatchExecutor();
+}
+
+
+void
+Test::testFieldMatchBluePrint()
+{
+ FieldMatchBlueprint pt;
+ StringList out;
+ out.add("score").
+ add("proximity").
+ add("completeness").
+ add("queryCompleteness").
+ add("fieldCompleteness").
+ add("orderness").
+ add("relatedness").
+ add("earliness").
+ add("longestSequenceRatio").
+ add("segmentProximity").
+ add("unweightedProximity").
+ add("absoluteProximity").
+ add("occurrence").
+ add("absoluteOccurrence").
+ add("weightedOccurrence").
+ add("weightedAbsoluteOccurrence").
+ add("significantOccurrence").
+
+ add("weight").
+ add("significance").
+ add("importance").
+
+ add("segments").
+ add("matches").
+ add("outOfOrder").
+ add("gaps").
+ add("gapLength").
+ add("longestSequence").
+ add("head").
+ add("tail").
+ add("segmentDistance").
+ add("degradedMatches");
+ {
+ EXPECT_TRUE(assertCreateInstance(pt, "fieldMatch"));
+
+ StringList params, in;
+ FT_SETUP_FAIL(pt, params);
+ FT_SETUP_FAIL(pt, params.add("foo"));
+ FT_SETUP_FAIL(pt, params.add("bar"));
+ params.clear();
+
+ {
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "abar");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wbar");
+ FT_SETUP_FAIL(pt, ie, params.add("foo"));
+ FT_SETUP_FAIL(pt, ie, params.add("abar"));
+ FT_SETUP_FAIL(pt, ie, params.add("wbar"));
+
+ FT_SETUP_OK(pt, ie, params.clear().add("bar"), in, out);
+ }
+
+ { // test illegal proximity table
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ Properties & p = ie.getProperties();
+ p.add("fieldMatch(foo).proximityLimit", "1");
+
+ // too few elements, should be 3 (1*2 + 1)
+ p.add("fieldMatch(foo).proximityTable", "0.5");
+ p.add("fieldMatch(foo).proximityTable", "1.0");
+ FT_SETUP_FAIL(pt, ie, params);
+
+ // too many elements, should be 3 (1*2 + 1)
+ p.add("fieldMatch(foo).proximityTable", "1.0");
+ p.add("fieldMatch(foo).proximityTable", "0.5");
+ FT_SETUP_FAIL(pt, ie, params);
+ }
+ }
+ { // test dumping with a regular index field
+ FT_DUMP_EMPTY(_factory, "fieldMatch");
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ FT_DUMP_EMPTY(_factory, "fieldMatch", ie); // must be an index field
+
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "abar");
+ FT_DUMP_EMPTY(_factory, "fieldMatch", ie); // must be single value
+
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wbar");
+ FT_DUMP_EMPTY(_factory, "fieldMatch", ie); // must be single value
+
+ StringList dump;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+ vespalib::string bn = "fieldMatch(bar)";
+ dump.add(bn);
+ for (uint32_t i = 1; i < out.size(); ++i) {
+ dump.add(bn + "." + out[i]);
+ }
+ FT_DUMP(_factory, "fieldMatch", ie, dump);
+ }
+
+ { // test dumping with a filter index field
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ie.getFields()[0].setFilter(true);
+
+ StringList dump;
+ vespalib::string bn = "fieldMatch(foo)";
+ dump.add(bn);
+ dump.add(bn + ".completeness");
+ dump.add(bn + ".queryCompleteness");
+ dump.add(bn + ".weight");
+ dump.add(bn + ".matches");
+ dump.add(bn + ".degradedMatches");
+ FT_DUMP(_factory, "fieldMatch", ie, dump);
+ }
+}
+
+
+void
+Test::testFieldMatchExecutor()
+{
+ testFieldMatchExecutorOutOfOrder();
+ testFieldMatchExecutorSegments();
+ testFieldMatchExecutorGaps();
+ testFieldMatchExecutorHead();
+ testFieldMatchExecutorTail();
+ testFieldMatchExecutorLongestSequence();
+ testFieldMatchExecutorMatches();
+ testFieldMatchExecutorCompleteness();
+ testFieldMatchExecutorOrderness();
+ testFieldMatchExecutorRelatedness();
+ testFieldMatchExecutorLongestSequenceRatio();
+ testFieldMatchExecutorEarliness();
+ testFieldMatchExecutorWeight();
+ testFieldMatchExecutorSignificance();
+ testFieldMatchExecutorImportance();
+ testFieldMatchExecutorOccurrence();
+ testFieldMatchExecutorAbsoluteOccurrence();
+ testFieldMatchExecutorWeightedOccurrence();
+ testFieldMatchExecutorWeightedAbsoluteOccurrence();
+ testFieldMatchExecutorSignificantOccurrence();
+ testFieldMatchExecutorUnweightedProximity();
+ testFieldMatchExecutorReverseProximity();
+ testFieldMatchExecutorAbsoluteProximity();
+ testFieldMatchExecutorMultiSegmentProximity();
+ testFieldMatchExecutorSegmentDistance();
+ testFieldMatchExecutorSegmentProximity();
+ testFieldMatchExecutorSegmentStarts();
+ testFieldMatchExecutorMoreThanASegmentLengthOfUnmatchedQuery();
+ testFieldMatchExecutorQueryRepeats();
+ testFieldMatchExecutorZeroCases();
+ testFieldMatchExecutorExceedingIterationLimit();
+ testFieldMatchExecutorRemaining();
+}
+
+
+void
+Test::testFieldMatchExecutorOutOfOrder()
+{
+ assertFieldMatch("outOfOrder:0","a","a");
+ assertFieldMatch("outOfOrder:0","a b c","a b c");
+ assertFieldMatch("outOfOrder:1","a b c","a c b");
+ assertFieldMatch("outOfOrder:2","a b c","c b a");
+ assertFieldMatch("outOfOrder:2","a b c d e","c x a b x x x x x e x x d");
+ assertFieldMatch("outOfOrder:2","a b c d e","c x a b x x x x x e x x d");
+ assertFieldMatch("outOfOrder:2","a b c d e","c x a b x x x x x e x x d");
+}
+
+
+void
+Test::testFieldMatchExecutorSegments()
+{
+ assertFieldMatch("segments:1","a","a");
+ assertFieldMatch("segments:1","a b c","a b c");
+ assertFieldMatch("segments:1","a b c","a x x b c");
+ assertFieldMatch("segments:2","a b c","a x x x x x x x x x x x x x x x x x x x b c");
+ assertFieldMatch("segments:2","a b c","b c x x x x x x x x x x x x x x x x x x x a");
+ assertFieldMatch("segments:2 gaps:1","a b c","x x x a x x x x x x x x x x x x x x x x x x x b x x c x x");
+ assertFieldMatch("segments:2 gaps:0 outOfOrder:0","a b c","b c x x x x x x x x x x x x x x x x x x x a");
+ assertFieldMatch("segments:2 gaps:1","a b c","x x x b x x c x x x x x x x x x x x x x x x x x x x a x x");
+ assertFieldMatch("segments:2 gaps:1","a y y b c","x x x b x x c x x x x x x x x x x x x x x x x x x x a x x");
+}
+
+
+void
+Test::testFieldMatchExecutorGaps()
+{
+ assertFieldMatch("gaps:0","a","a");
+ assertFieldMatch("gaps:0","x�a","a"); // TODO: which char ?
+ assertFieldMatch("gaps:0 gapLength:0","a b c","a b c");
+ assertFieldMatch("gaps:1 gapLength:1","a b","b a");
+ assertFieldMatch("gaps:1 gapLength:1","a b c","a x b c");
+ assertFieldMatch("gaps:1 gapLength:3","a b c","a x X Xb c");
+ assertFieldMatch("gaps:2 gapLength:2 outOfOrder:1","a b c","a c b");
+ assertFieldMatch("gaps:2 gapLength:2 outOfOrder:0","a b c","a x b x c");
+ assertFieldMatch("gaps:2 gapLength:5 outOfOrder:1","a b c","a x c x b");
+ assertFieldMatch("gaps:3 outOfOrder:2 segments:1","a b c d e","x d x x b c x x a e");
+ assertFieldMatch("gaps:0","y a b c","a b c x");
+}
+
+
+void
+Test::testFieldMatchExecutorHead()
+{
+ assertFieldMatch("head:0","a","a");
+ //assertFieldMatch("head:0","y","a"); // no hit, executor will not run
+ assertFieldMatch("head:1","a","x a");
+ assertFieldMatch("head:2","a b c","x x a b c");
+ assertFieldMatch("head:2","a b c","x x c x x a b");
+ assertFieldMatch("head:2","a b c","x x c x x x x x x x x x x x x x x x a b");
+}
+
+
+void
+Test::testFieldMatchExecutorTail()
+{
+ assertFieldMatch("tail:0","a","a");
+ //assertFieldMatch("tail:0","y","a"); // no hit, executor will not run
+ assertFieldMatch("tail:1","a","a x");
+ assertFieldMatch("tail:2","a b c","a b c x x");
+ assertFieldMatch("tail:2","a b c","x x x c x x x x a b x x");
+ assertFieldMatch("tail:0","a b c","x x c x x x x x x x x x x x x x x x a b");
+}
+
+void
+Test::testFieldMatchExecutorLongestSequence()
+{
+ assertFieldMatch("longestSequence:1","a","a");
+ assertFieldMatch("longestSequence:1","a","a b c");
+ assertFieldMatch("longestSequence:1","b","a b c");
+ assertFieldMatch("longestSequence:3","a b c","x x a b c x x a b x");
+ assertFieldMatch("longestSequence:3 segments:1","a b c","x x a b x x a b c x");
+ assertFieldMatch("longestSequence:2","a b c d","x x c d x x a b x");
+ assertFieldMatch("longestSequence:2","a b c d","x x a b x c d x x");
+ assertFieldMatch("longestSequence:2","a b c d","x x a b x x x x x x x x x x x x x x x x x c d x x");
+ assertFieldMatch("longestSequence:4 segments:1","a b c d","x x a b x x x x x x x x x x x x x x x x x c d x x a b c d");
+}
+
+
+void
+Test::testFieldMatchExecutorMatches()
+{
+ assertFieldMatch("matches:1 queryCompleteness:1 fieldCompleteness:1","a","a");
+ assertFieldMatch("matches:3 queryCompleteness:1 fieldCompleteness:1","a b c","a b c");
+ assertFieldMatch("matches:3 queryCompleteness:1 fieldCompleteness:0.5","a b c","a b c a b d");
+ assertFieldMatch("matches:3 queryCompleteness:0.5 fieldCompleteness:0.25","a y y b c y","a x x b c x a x a b x x");
+}
+
+
+void
+Test::testFieldMatchExecutorCompleteness()
+{
+ assertFieldMatch("completeness:1 queryCompleteness:1 fieldCompleteness:1","a","a");
+ assertFieldMatch("completeness:0 queryCompleteness:0 fieldCompleteness:0","a","x");
+ assertFieldMatch("completeness:0 queryCompleteness:0 fieldCompleteness:0","y","a");
+ assertFieldMatch("completeness:0.975 queryCompleteness:1 fieldCompleteness:0.5","a","a a");
+ assertFieldMatch("completeness:0.525 queryCompleteness:0.5 fieldCompleteness:1","a a","a");
+ assertFieldMatch("completeness:1 queryCompleteness:1 fieldCompleteness:1","a b c","a b c");
+ assertFieldMatch("completeness:0.525 queryCompleteness:0.5 fieldCompleteness:1","a b c d","a b");
+ assertFieldMatch("completeness:0.975 queryCompleteness:1 fieldCompleteness:0.5","a b","a b c d");
+ assertFieldMatch("completeness:0.97 queryCompleteness:1 fieldCompleteness:0.4","a b","a b c d e");
+}
+
+
+void
+Test::testFieldMatchExecutorOrderness()
+{
+ assertFieldMatch("orderness:1", "a","a");
+ // Note: we have no hits -> orderness: 0(1)
+ assertFieldMatch("orderness:0", "a","x");
+ assertFieldMatch("orderness:0", "a a a","a"); // Oh well...
+ assertFieldMatch("orderness:1", "a","a a a");
+ assertFieldMatch("orderness:0", "a b","b a");
+ assertFieldMatch("orderness:0.5","a b c","b a c");
+ assertFieldMatch("orderness:0.5","a b c d","c b d x x x x x x x x x x x x x x x x x x x x x a");
+}
+
+
+void
+Test::testFieldMatchExecutorRelatedness()
+{
+ assertFieldMatch("relatedness:1", "a","a");
+ assertFieldMatch("relatedness:0", "a","x");
+ assertFieldMatch("relatedness:1", "a b","a b");
+ assertFieldMatch("relatedness:1", "a b c","a b c");
+ assertFieldMatch("relatedness:0.5","a b c","a b x x x x x x x x x x x x x x x x x x x x x x x c");
+ assertFieldMatch("relatedness:0.5","a y b y y y c","a b x x x x x x x x x x x x x x x x x x x x x x x c");
+}
+
+
+void
+Test::testFieldMatchExecutorLongestSequenceRatio()
+{
+ assertFieldMatch("longestSequenceRatio:1", "a","a");
+ assertFieldMatch("longestSequenceRatio:0", "a","x");
+ assertFieldMatch("longestSequenceRatio:1", "a a","a");
+ assertFieldMatch("longestSequenceRatio:1", "a","a a");
+ assertFieldMatch("longestSequenceRatio:1", "a b","a b");
+ assertFieldMatch("longestSequenceRatio:1", "a y"," a x");
+ assertFieldMatch("longestSequenceRatio:0.5","a b","a x b");
+ assertFieldMatch("longestSequenceRatio:0.75","a b c d","x x a b x a x c d a b c x d x");
+}
+
+
+void
+Test::testFieldMatchExecutorEarliness()
+{
+ assertFieldMatch("earliness:1", "a","a");
+ assertFieldMatch("earliness:0", "a","x");
+ assertFieldMatch("earliness:1", "a","a a a");
+ assertFieldMatch("earliness:1", "a a a","a");
+ assertFieldMatch("earliness:0.8", "b","a b c");
+ assertFieldMatch("earliness:0.8", "b","a b");
+ assertFieldMatch("earliness:0.9091","a b c","x b c x x x x x a x x x");
+ assertFieldMatch("earliness:0.2", "a b c","x b c a x x x x a x x x x x x x a b c x x");
+}
+
+
+void
+Test::testFieldMatchExecutorWeight()
+{
+ assertFieldMatch("weight:1", "a","a");
+ assertFieldMatch("weight:0", "y","a");
+ assertFieldMatch("weight:0.3333","a a a","a");
+ assertFieldMatch("weight:1", "a","a a a");
+ assertFieldMatch("weight:1", "a b c","a b c");
+ assertFieldMatch("weight:1", "a b c","x x a b x a x c x x a b x c c x");
+
+ assertFieldMatch("weight:0.3333","a b c","a");
+ assertFieldMatch("weight:0.6667","a b c","a b");
+
+ assertFieldMatch("weight:1", "a b c!200","a b c"); // Best
+ assertFieldMatch("weight:0.75","a b c!200","b c"); // Middle
+ assertFieldMatch("weight:0.5", "a b c!200","a b"); // Worst
+
+ assertFieldMatch("weight:1","a!300 b c!200","a b c"); // Best too
+
+ assertFieldMatch("weight:1", "a b c!50","a b c"); // Best
+ assertFieldMatch("weight:0.6","a b c!50","b c"); // Worse
+ assertFieldMatch("weight:0.4","a b c!50","b"); // Worse
+ assertFieldMatch("weight:0.2","a b c!50","c"); // Worst
+ assertFieldMatch("weight:0.8","a b c!50","a b"); // Middle
+
+ assertFieldMatch("weight:1", "a b c!0","a b c"); // Best
+ assertFieldMatch("weight:0.5","a b c!0","b c"); // Worst
+ assertFieldMatch("weight:1", "a b c!0","a b"); // As good as best
+ assertFieldMatch("weight:0", "a b c!0","c"); // No contribution
+
+ assertFieldMatch("weight:0","a!0 b!0","a b");
+ assertFieldMatch("weight:0","a!0 b!0","");
+
+ // The query also has other terms having a total weight of 300
+ // so we add a weight parameter which is the sum of the weights of this query terms + 300
+ assertFieldMatch("weight:0.25", "a","a",400);
+ assertFieldMatch("weight:0", "y","a",400);
+ assertFieldMatch("weight:0.1667","a a a","a",600);
+ assertFieldMatch("weight:0.25", "a","a a a",400);
+ assertFieldMatch("weight:0.5", "a b c","a b c",600);
+ assertFieldMatch("weight:0.5", "a b c","x x a b x a x c x x a b x c c x",600);
+
+ assertFieldMatch("weight:0.1667","a b c","a",600);
+ assertFieldMatch("weight:0.3333","a b c","a b",600);
+
+ assertFieldMatch("weight:0.5714","a b c!200","a b c",700); // Best
+ assertFieldMatch("weight:0.4286","a b c!200","b c",700); // Middle
+ assertFieldMatch("weight:0.2857","a b c!200","a b",700); // Worst
+
+ assertFieldMatch("weight:0.6667","a!300 b c!200","a b c",900); // Better than best
+
+ assertFieldMatch("weight:0.4545","a b c!50","a b c",550); // Best
+ assertFieldMatch("weight:0.2727","a b c!50","b c",550); // Worse
+ assertFieldMatch("weight:0.1818","a b c!50","b",550); // Worse
+ assertFieldMatch("weight:0.0909","a b c!50","c",550); // Worst
+ assertFieldMatch("weight:0.3636","a b c!50","a b",550); // Middle
+
+ assertFieldMatch("weight:0.4","a b c!0","a b c",500); // Best
+ assertFieldMatch("weight:0.2","a b c!0","b c",500); // Worst
+ assertFieldMatch("weight:0.4","a b c!0","a b",500); // As good as best
+ assertFieldMatch("weight:0", "a b c!0","c",500); // No contribution
+
+ assertFieldMatch("weight:0","a!0 b!0","a b",300);
+ assertFieldMatch("weight:0","a!0 b!0","",300);
+}
+
+
+void
+Test::testFieldMatchExecutorSignificance()
+{
+ assertFieldMatch("significance:1", "a","a");
+ assertFieldMatch("significance:0", "a","x");
+ assertFieldMatch("significance:0.3333","a a a","a");
+ assertFieldMatch("significance:1", "a","a a a");
+ assertFieldMatch("significance:1", "a b c","a b c");
+ assertFieldMatch("significance:1", "a b c","x x a b x a x c x x a b x c c x");
+
+ assertFieldMatch("significance:0.3333","a b c","a");
+ assertFieldMatch("significance:0.6667","a b c","a b");
+
+ assertFieldMatch("significance:1", "a b c%0.2","a b c"); // Best
+ assertFieldMatch("significance:0.75","a b c%0.2","b c"); // Middle
+ assertFieldMatch("significance:0.5", "a b c%0.2","a b"); // Worst
+
+ assertFieldMatch("significance:1","a%0.3 b c%0.2","a b c"); // Best too
+
+ assertFieldMatch("significance:1", "a b c%0.05","a b c"); // Best
+ assertFieldMatch("significance:0.6","a b c%0.05","b c"); // Worse
+ assertFieldMatch("significance:0.4","a b c%0.05","b"); // Worse
+ assertFieldMatch("significance:0.2","a b c%0.05","c"); // Worst
+ assertFieldMatch("significance:0.8","a b c%0.05","a b"); // Middle
+
+ assertFieldMatch("significance:1", "a b c%0","a b c"); // Best
+ assertFieldMatch("significance:0.5","a b c%0","b c"); // Worst
+ assertFieldMatch("significance:1", "a b c%0","a b"); // As good as best
+ assertFieldMatch("significance:0", "a b c%0","c"); // No contribution
+
+ assertFieldMatch("significance:0","a%0 b%0","a b");
+ assertFieldMatch("significance:0","a%0 b%0","");
+
+ // The query also has other terms having a total significance of 0.3
+ // so we add a significance parameter which is the sum of the significances of this query terms + 0.3
+ assertFieldMatchTS("significance:0.25", "a","a",0.4f);
+ assertFieldMatchTS("significance:0", "y","a",0.4f);
+ assertFieldMatchTS("significance:0.1667","a a a","a",0.6f);
+ assertFieldMatchTS("significance:0.25", "a","a a a",0.4f);
+ assertFieldMatchTS("significance:0.5", "a b c","a b c",0.6f);
+ assertFieldMatchTS("significance:0.5", "a b c","x x a b x a x c x x a b x c c x",0.6f);
+
+ assertFieldMatchTS("significance:0.1667","a b c","a",0.6f);
+ assertFieldMatchTS("significance:0.3333","a b c","a b",0.6f);
+
+ assertFieldMatchTS("significance:0.5714","a b c%0.2","a b c",0.7f); // Best
+ assertFieldMatchTS("significance:0.4286","a b c%0.2","b c",0.7f); // Middle
+ assertFieldMatchTS("significance:0.2857","a b c%0.2","a b",0.7f); // Worst
+
+ assertFieldMatchTS("significance:0.6667","a%0.3 b c%0.2","a b c",0.9f); // Better than best
+
+ assertFieldMatchTS("significance:0.4545","a b c%0.05","a b c",0.55f); // Best
+ assertFieldMatchTS("significance:0.2727","a b c%0.05","b c",0.55f); // Worse
+ assertFieldMatchTS("significance:0.1818","a b c%0.05","b",0.55f); // Worse
+ assertFieldMatchTS("significance:0.0909","a b c%0.05","c",0.55f); // Worst
+ assertFieldMatchTS("significance:0.3636","a b c%0.05","a b",0.55f); // Middle
+
+ assertFieldMatchTS("significance:0.4","a b c%0","a b c",0.5f); // Best
+ assertFieldMatchTS("significance:0.2","a b c%0","b c",0.5f); // Worst
+ assertFieldMatchTS("significance:0.4","a b c%0","a b",0.5f); // As good as best
+ assertFieldMatchTS("significance:0", "a b c%0","c",0.5f); // No contribution
+
+ assertFieldMatchTS("significance:0","a%0 b%0","a b",0.3f);
+ assertFieldMatchTS("significance:0","a%0 b%0","",0.3f);
+}
+
+
+void
+Test::testFieldMatchExecutorImportance()
+{
+ assertFieldMatch("importance:0.75","a b c", "a x x b x c c c",600);
+ assertFieldMatch("importance:0.85","a b!500 c","a x x b x c c c",1000);
+
+ // Twice as common - twice as weighty, but total weight has the extra 300 - less than the previous
+ assertFieldMatch("importance:0.7857","a b!200%0.05 c","a x x b x c c c",700);
+ // Here higher importancy exactly offsets the lowered uniqueness
+ assertFieldMatch("importance:0.85","a b!500%0.5 c","a x x b x c c c",1000);
+}
+
+
+void
+Test::testFieldMatchExecutorOccurrence()
+{
+ assertFieldMatch("occurrence:0","a","x");
+ assertFieldMatch("occurrence:1","a","a");
+ assertFieldMatch("occurrence:0","a a a","x");
+ assertFieldMatch("occurrence:1","a a a","a");
+ assertFieldMatch("occurrence:1","a a a","a a a");
+ assertFieldMatch("occurrence:1","a a a","a a a a");
+ assertFieldMatch("occurrence:0.3571","a","x x x a x x a x a x x x a a");
+ assertFieldMatch("occurrence:1","a","a a a a a a a a a a a a a a");
+ assertFieldMatch("occurrence:1","a b","a b b a a a a a b a a b a a");
+
+ // tests going beyond the occurrence limit
+ fieldmatch::Params params;
+ params.setMaxOccurrences(10);
+ assertFieldMatch("occurrence:1", "a b","a a a a a a a a a a b b", &params);
+ assertFieldMatch("occurrence:0.9231","a b","a a a a a a a a a a a b b", &params); // Starting to cut off
+ assertFieldMatch("occurrence:0.6", "a b","a a a a a a a a a a a a a a a a a a a a a b b", &params); // Way beyond cutoff for a
+ assertFieldMatch("occurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b", &params); // Exactly no cutoff
+ assertFieldMatch("occurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b", &params); // Field is too large to consider field length
+}
+
+
+void
+Test::testFieldMatchExecutorAbsoluteOccurrence()
+{
+ assertFieldMatch("absoluteOccurrence:0", "a","x");
+ assertFieldMatch("absoluteOccurrence:0.01","a","a");
+ assertFieldMatch("absoluteOccurrence:0","a a a","x");
+ assertFieldMatch("absoluteOccurrence:0.01", "a a a","a");
+ assertFieldMatch("absoluteOccurrence:0.03", "a a a","a a a");
+ assertFieldMatch("absoluteOccurrence:0.04", "a a a","a a a a");
+ assertFieldMatch("absoluteOccurrence:0.05","a","x x x a x x a x a x x x a a");
+ assertFieldMatch("absoluteOccurrence:0.14","a","a a a a a a a a a a a a a a");
+ assertFieldMatch("absoluteOccurrence:0.07","a b","a b b a a a a a b a a b a a");
+
+ // tests going beyond the occurrence limit
+ fieldmatch::Params params;
+ params.setMaxOccurrences(10);
+ assertFieldMatch("absoluteOccurrence:0.6","a b","a a a a a a a a a a b b", &params);
+ assertFieldMatch("absoluteOccurrence:0.6","a b","a a a a a a a a a a a b b", &params); // Starting to cut off
+ assertFieldMatch("absoluteOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b", &params); // Way beyond cutoff for a
+ assertFieldMatch("absoluteOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b", &params); // Exactly no cutoff
+ assertFieldMatch("absoluteOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b", &params); // Field is too large to consider field length
+}
+
+
+void
+Test::testFieldMatchExecutorWeightedOccurrence()
+{
+ assertFieldMatch("weightedOccurrence:0","a!200","x");
+ assertFieldMatch("weightedOccurrence:1","a!200","a");
+ assertFieldMatch("weightedOccurrence:0","a!200 a a","x");
+ assertFieldMatch("weightedOccurrence:1","a!200 a a","a");
+ assertFieldMatch("weightedOccurrence:1","a a a","a a a");
+ assertFieldMatch("weightedOccurrence:1","a!200 a a","a a a a");
+ assertFieldMatch("weightedOccurrence:0.3571","a!200","x x x a x x a x a x x x a a");
+ assertFieldMatch("weightedOccurrence:1","a!200","a a a a a a a a a a a a a a");
+ assertFieldMatch("weightedOccurrence:0.5","a b","a b b a a a a a b a a b a a");
+
+ assertFieldMatch("weightedOccurrence:0.5714","a!200 b","a b b a a a a a b a a b a a");
+ assertFieldMatch("weightedOccurrence:0.6753","a!1000 b","a b b a a a a a b a a b a a"); // Should be higher
+ assertFieldMatch("weightedOccurrence:0.4286","a b!200","a b b a a a a a b a a b a a"); // Should be lower
+ assertFieldMatch("weightedOccurrence:0.3061","a b!2000","a b b a a a a a b a a b a a"); // Should be even lower
+
+ assertFieldMatch("weightedOccurrence:0.30","a b", "a a b b b b x x x x");
+ assertFieldMatch("weightedOccurrence:0.3333","a b!200","a a b b b b x x x x"); // More frequent is more important - higher
+ assertFieldMatch("weightedOccurrence:0.2667","a!200 b","a a b b b b x x x x"); // Less frequent is more important - lower
+ assertFieldMatch("weightedOccurrence:0.2667","a b!50", "a a b b b b x x x x"); // Same relative
+
+ assertFieldMatch("weightedOccurrence:0","a!0 b!0", "a a b b b b x x x x");
+
+ // tests going beyond the occurrence limit
+ fieldmatch::Params params;
+ params.setMaxOccurrences(10);
+ assertFieldMatch("weightedOccurrence:0.6","a b","a a a a a a a a a a b b", &params);
+ assertFieldMatch("weightedOccurrence:0.6","a b","a a a a a a a a a a a b b", &params); // Starting to cut off
+ assertFieldMatch("weightedOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b", &params); // Way beyond cutoff for a
+ assertFieldMatch("weightedOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b", &params); // Exactly no cutoff
+ assertFieldMatch("weightedOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b", &params); // Field is too large to consider field length
+
+ assertFieldMatch("weightedOccurrence:0.7333","a!200 b","a a a a a a a a a a b b", &params);
+ assertFieldMatch("weightedOccurrence:0.4667","a b!200","a a a a a a a a a a b b", &params);
+ assertFieldMatch("weightedOccurrence:0.7333","a!200 b","a a a a a a a a a a a b b", &params); // Starting to cut off
+ assertFieldMatch("weightedOccurrence:0.7333","a!200 b","a a a a a a a a a a a a a a a a a a a a a b b", &params); // Way beyond cutoff for a
+ assertFieldMatch("weightedOccurrence:1", "a!200 b","a a a a a a a a a a b b b b b b b b b b", &params); // Exactly no cutoff
+ assertFieldMatch("weightedOccurrence:1", "a!200 b","a a a a a a a a a a a b b b b b b b b b b b", &params); // Field is too large to consider field length
+}
+
+
+void
+Test::testFieldMatchExecutorWeightedAbsoluteOccurrence()
+{
+ assertFieldMatch("weightedAbsoluteOccurrence:0", "a!200","x");
+ assertFieldMatch("weightedAbsoluteOccurrence:0.01", "a!200","a");
+ assertFieldMatch("weightedAbsoluteOccurrence:0", "a!200 a a","x");
+ assertFieldMatch("weightedAbsoluteOccurrence:0.01", "a!200 a a","a");
+ assertFieldMatch("weightedAbsoluteOccurrence:0.03", "a a a","a a a");
+ assertFieldMatch("weightedAbsoluteOccurrence:0.04", "a!200 a a","a a a a");
+ assertFieldMatch("weightedAbsoluteOccurrence:0.05", "a!200","x x x a x x a x a x x x a a");
+ assertFieldMatch("weightedAbsoluteOccurrence:0.14", "a!200","a a a a a a a a a a a a a a");
+ assertFieldMatch("weightedAbsoluteOccurrence:0.07","a b","a b b a a a a a b a a b a a");
+
+ assertFieldMatch("weightedAbsoluteOccurrence:0.08", "a!200 b","a b b a a a a a b a a b a a");
+ assertFieldMatch("weightedAbsoluteOccurrence:0.0945","a!1000 b","a b b a a a a a b a a b a a"); // Should be higher
+ assertFieldMatch("weightedAbsoluteOccurrence:0.06", "a b!200","a b b a a a a a b a a b a a"); // Should be lower
+ assertFieldMatch("weightedAbsoluteOccurrence:0.0429","a b!2000","a b b a a a a a b a a b a a"); // Should be even lower
+
+ assertFieldMatch("weightedAbsoluteOccurrence:0.03", "a b", "a a b b b b x x x x");
+ assertFieldMatch("weightedAbsoluteOccurrence:0.0333","a b!200","a a b b b b x x x x"); // More frequent is more important - higher
+ assertFieldMatch("weightedAbsoluteOccurrence:0.0267","a!200 b","a a b b b b x x x x"); // Less frequent is more important - lower
+ assertFieldMatch("weightedAbsoluteOccurrence:0.0267","a b!50", "a a b b b b x x x x"); // Same relative
+
+ assertFieldMatch("weightedAbsoluteOccurrence:0","a!0 b!0", "a a b b b b x x x x");
+
+ // tests going beyond the occurrence limit
+ fieldmatch::Params params;
+ params.setMaxOccurrences(10);
+ assertFieldMatch("weightedAbsoluteOccurrence:0.6","a b","a a a a a a a a a a b b", &params);
+ assertFieldMatch("weightedAbsoluteOccurrence:0.6","a b","a a a a a a a a a a a b b", &params); // Starting to cut off
+ assertFieldMatch("weightedAbsoluteOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b", &params); // Way beyond cutoff for a
+ assertFieldMatch("weightedAbsoluteOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b", &params); // Exactly no cutoff
+ assertFieldMatch("weightedAbsoluteOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b", &params); // Field is too large to consider field length
+
+ assertFieldMatch("weightedAbsoluteOccurrence:0.7333","a!200 b","a a a a a a a a a a b b", &params);
+ assertFieldMatch("weightedAbsoluteOccurrence:0.4667","a b!200","a a a a a a a a a a b b", &params);
+ assertFieldMatch("weightedAbsoluteOccurrence:0.7333","a!200 b","a a a a a a a a a a a b b", &params); // Starting to cut off
+ assertFieldMatch("weightedAbsoluteOccurrence:0.7333","a!200 b","a a a a a a a a a a a a a a a a a a a a a b b", &params); // Way beyond cutoff for a
+ assertFieldMatch("weightedAbsoluteOccurrence:1", "a!200 b","a a a a a a a a a a b b b b b b b b b b", &params); // Exactly no cutoff
+ assertFieldMatch("weightedAbsoluteOccurrence:1", "a!200 b","a a a a a a a a a a a b b b b b b b b b b b", &params); // Field is too large to consider field length
+}
+
+
+void
+Test::testFieldMatchExecutorSignificantOccurrence()
+{
+ assertFieldMatch("significantOccurrence:0","a%0.2","x");
+ assertFieldMatch("significantOccurrence:1","a%0.2","a");
+ assertFieldMatch("significantOccurrence:0","a%0.2 a a","x");
+ assertFieldMatch("significantOccurrence:1","a%0.2 a a","a");
+ assertFieldMatch("significantOccurrence:1","a a a","a a a");
+ assertFieldMatch("significantOccurrence:1","a%0.2 a a","a a a a");
+ assertFieldMatch("significantOccurrence:0.3571","a%0.2","x x x a x x a x a x x x a a");
+ assertFieldMatch("significantOccurrence:1","a%0.2","a a a a a a a a a a a a a a");
+ assertFieldMatch("significantOccurrence:0.5","a b","a b b a a a a a b a a b a a");
+
+ assertFieldMatch("significantOccurrence:0.5714","a%0.2 b","a b b a a a a a b a a b a a");
+ assertFieldMatch("significantOccurrence:0.6753","a%1 b","a b b a a a a a b a a b a a"); // Should be higher
+ assertFieldMatch("significantOccurrence:0.4286","a b%0.2","a b b a a a a a b a a b a a"); // Should be lower
+ assertFieldMatch("significantOccurrence:0.3247","a b%1","a b b a a a a a b a a b a a"); // Should be even lower
+
+ assertFieldMatch("significantOccurrence:0.30","a b", "a a b b b b x x x x");
+ assertFieldMatch("significantOccurrence:0.3333","a b%0.2","a a b b b b x x x x"); // More frequent is more important - higher
+ assertFieldMatch("significantOccurrence:0.2667","a%0.2 b","a a b b b b x x x x"); // Less frequent is more important - lower
+ assertFieldMatch("significantOccurrence:0.2667","a b%0.05", "a a b b b b x x x x"); // Same relative
+
+ assertFieldMatch("significantOccurrence:0","a%0 b%0", "a a b b b b x x x x");
+
+ // tests going beyond the occurrence limit
+ fieldmatch::Params params;
+ params.setMaxOccurrences(10);
+ assertFieldMatch("significantOccurrence:0.6","a b","a a a a a a a a a a b b", &params);
+ assertFieldMatch("significantOccurrence:0.6","a b","a a a a a a a a a a a b b", &params); // Starting to cut off
+ assertFieldMatch("significantOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b", &params); // Way beyond cutoff for a
+ assertFieldMatch("significantOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b", &params); // Exactly no cutoff
+ assertFieldMatch("significantOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b", &params); // Field is too large to consider field length
+
+ assertFieldMatch("significantOccurrence:0.7333","a%0.2 b","a a a a a a a a a a b b", &params);
+ assertFieldMatch("significantOccurrence:0.4667","a b%0.2","a a a a a a a a a a b b", &params);
+ assertFieldMatch("significantOccurrence:0.7333","a%0.2 b","a a a a a a a a a a a b b", &params); // Starting to cut off
+ assertFieldMatch("significantOccurrence:0.7333","a%0.2 b","a a a a a a a a a a a a a a a a a a a a a b b", &params); // Way beyond cutoff for a
+ assertFieldMatch("significantOccurrence:1", "a%0.2 b","a a a a a a a a a a b b b b b b b b b b", &params); // Exactly no cutoff
+ assertFieldMatch("significantOccurrence:1", "a%0.2 b","a a a a a a a a a a a b b b b b b b b b b b", &params); // Field is too large to consider field length
+}
+
+void
+Test::testFieldMatchExecutorUnweightedProximity()
+{
+ assertFieldMatch("unweightedProximity:1", "a","a");
+ assertFieldMatch("unweightedProximity:1", "a b c","a b c");
+ assertFieldMatch("unweightedProximity:1", "a b c","a b c x");
+ assertFieldMatch("unweightedProximity:1", "y a b c","a b c x");
+ assertFieldMatch("unweightedProximity:1", "y a b c","a b c x");
+ assertFieldMatch("unweightedProximity:0.855","y a b c","a b x c x");
+ assertFieldMatch("unweightedProximity:0.750","y a b c","a b x x c x");
+ assertFieldMatch("unweightedProximity:0.71", "y a b c","a x b x c x"); // Should be slightly worse than the previous one
+ assertFieldMatch("unweightedProximity:0.605","y a b c","a x b x x c x");
+ assertFieldMatch("unweightedProximity:0.53", "y a b c","a x b x x x c x");
+ assertFieldMatch("unweightedProximity:0.5", "y a b c","a x x b x x c x");
+}
+
+
+void
+Test::testFieldMatchExecutorReverseProximity()
+{
+ assertFieldMatch("unweightedProximity:0.33", "a b","b a");
+ assertFieldMatch("unweightedProximity:0.62", "a b c","c a b");
+ assertFieldMatch("unweightedProximity:0.585", "y a b c","c x a b");
+ assertFieldMatch("unweightedProximity:0.33", "a b c","c b a");
+ assertFieldMatch("unweightedProximity:0.6875","a b c d e","a b d c e");
+ assertFieldMatch("unweightedProximity:0.9275","a b c d e","a b x c d e");
+}
+
+
+void
+Test::testFieldMatchExecutorAbsoluteProximity()
+{
+ assertFieldMatch("absoluteProximity:0.1 proximity:1", "a b","a b");
+ assertFieldMatch("absoluteProximity:0.3 proximity:1", "a 0.3:b","a b");
+ assertFieldMatch("absoluteProximity:0.1 proximity:1", "a 0.0:b","a b");
+ assertFieldMatch("absoluteProximity:1 proximity:1", "a 1.0:b","a b");
+ assertFieldMatch("absoluteProximity:0.033 proximity:0.33", "a b","b a");
+ assertFieldMatch("absoluteProximity:0.0108 proximity:0.0359","a 0.3:b","b a"); // Should be worse than the previous one
+ assertFieldMatch("absoluteProximity:0.1 proximity:1", "a 0.0:b","b a");
+ assertFieldMatch("absoluteProximity:0 proximity:0", "a 1.0:b","b a");
+
+ assertFieldMatch("absoluteProximity:0.0605 proximity:0.605", "a b c","a x b x x c");
+ assertFieldMatch("absoluteProximity:0.0701 proximity:0.2003","a 0.5:b 0.2:c","a x b x x c"); // Most important is close, less important is far: Better
+ assertFieldMatch("absoluteProximity:0.0605 proximity:0.605", "a b c","a x x b x c");
+ assertFieldMatch("absoluteProximity:0.0582 proximity:0.1663","a 0.5:b 0.2:c","a x x b x c"); // Most important is far, less important is close: Worse
+
+ assertFieldMatch("absoluteProximity:0.0727 proximity:0.7267","a b c d","a b x x x x x c d");
+ assertFieldMatch("absoluteProximity:0.1 proximity:1", "a b 0:c d","a b x x x x x c d"); // Should be better because the gap is unimportant
+
+ // test with another proximity table
+ std::vector<feature_t> pt;
+ pt.push_back(0.2);
+ pt.push_back(0.4);
+ pt.push_back(0.6);
+ pt.push_back(0.8);
+ pt.push_back(1.0);
+ pt.push_back(0.8);
+ pt.push_back(0.6);
+ pt.push_back(0.4);
+ pt.push_back(0.2);
+ fieldmatch::Params params;
+ params.setProximityLimit(4);
+ params.setProximityTable(pt);
+ assertFieldMatch("absoluteProximity:0.07 proximity:0.7", "a b c","a x b x x c", &params);
+ assertFieldMatch("absoluteProximity:0.1179 proximity:0.3369","a 0.5:b 0.2:c","a x b x x c", &params); // Most important is close, less important is far: Better
+ assertFieldMatch("absoluteProximity:0.07 proximity:0.7", "a b c","a x x b x c", &params);
+ assertFieldMatch("absoluteProximity:0.0834 proximity:0.2384","a 0.5:b 0.2:c","a x x b x c", &params); // Most important is far, less important is close: Worse
+}
+
+
+void
+Test::testFieldMatchExecutorMultiSegmentProximity()
+{
+ assertFieldMatch("absoluteProximity:0.1 proximity:1", "a b c", "a b x x x x x x x x x x x x x x x x x x x x x x c");
+ assertFieldMatch("absoluteProximity:0.05 proximity:0.5","a b c", "a x x b x x x x x x x x x x x x x x x x x x x x x x c");
+ assertFieldMatch("absoluteProximity:0.075 proximity:0.75","a b c d","a x x b x x x x x x x x x x x x x x x x x x x x x x c d");
+}
+
+
+void
+Test::testFieldMatchExecutorSegmentDistance()
+{
+ assertFieldMatch("segmentDistance:13 absoluteProximity:0.1", "a b c","a b x x x x x x x x x x c");
+ assertFieldMatch("segmentDistance:13 absoluteProximity:0.5", "a 0.5:b c","a b x x x x x x x x x x c");
+ assertFieldMatch("segmentDistance:13 absoluteProximity:0.1", "a b c","b c x x x x x x x x x x a");
+ assertFieldMatch("segmentDistance:25 absoluteProximity:0.1", "a b c","b x x x x x x x x x x x a x x x x x x x x x x c");
+ assertFieldMatch("segmentDistance:13 absoluteProximity:0.006","a b c","a x x x x x x x x x x x b x x x x x x x x c");
+ assertFieldMatch("segmentDistance:24 absoluteProximity:0.1", "a b c","a x x x x x x x x x x x b x x x x x x x x x c");
+ assertFieldMatch("segmentDistance:25 absoluteProximity:0.1", "a b c","a x x x x x x x x x x x b x x x x x x x x x x c");
+ assertFieldMatch("segmentDistance:25 absoluteProximity:0.1", "a b c","c x x x x x x x x x x x b x x x x x x x x x x a");
+}
+
+
+void
+Test::testFieldMatchExecutorSegmentProximity()
+{
+ assertFieldMatch("segmentProximity:1", "a","a");
+ assertFieldMatch("segmentProximity:0", "a","x");
+ assertFieldMatch("segmentProximity:1", "a","a x");
+ assertFieldMatch("segmentProximity:0", "a b","a x x x x x x x x x x x x x x x x x x x x x x x b");
+ assertFieldMatch("segmentProximity:0.4","a b","a x x x x x x x x x x x x x x x x x x x x x x b x x x x x x x x x x x x x x x x");
+ assertFieldMatch("segmentProximity:0", "a b c","a b x x x x x x x x x x x x x x x x x x x x x c");
+ assertFieldMatch("segmentProximity:0.4","a b c","a b x x x x x x x x x x x x x x x x x x x x x c x x x x x x x x x x x x x x x x");
+ assertFieldMatch("segmentProximity:0.4","a b c","b c x x x x x x x x x x x x x x x x x x x x x a x x x x x x x x x x x x x x x x");
+}
+
+
+void
+Test::testFieldMatchExecutorSegmentStarts()
+{
+#ifdef FIELDMATCH_OUTPUTS_SEGMENTSTARTS
+ // Test cases where we choose between multiple different segmentations
+ { // test segmentSelection
+ assertFieldMatch("segments:2 absoluteProximity:0.1 proximity:1 segmentStarts:19,41",
+ "a b c d e","x a b x c x x x x x x x x x x x x x x a b c x x x x x x x x x e x d x c d x x x c d e");
+ // 0 1 2 3 4 5 6 7 8 9�0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2
+ // 0 1 2 3 4
+ // Should choose - - - - -
+
+ assertFieldMatch("segments:1 absoluteProximity:0.0778 proximity:0.778","a b c d e f","x x a b b b c f e d a b c d x e x x x x x f d e f a b c a a b b c c d d e e f f");
+
+ // Prefer one segment with ok proximity or two segments with great proximity
+ assertFieldMatch("segments:1 segmentStarts:0","a b c d","a b x c d x x x x x x x x x x x a b x x x x x x x x x x x c d");
+ assertFieldMatch("segments:1 segmentStarts:0","a b c d","a b x x x x x x x x c d x x x x x x x x x x x a b x x x x x x x x x x x c d");
+ }
+#endif
+}
+
+
+void
+Test::testFieldMatchExecutorMoreThanASegmentLengthOfUnmatchedQuery()
+{
+ assertFieldMatch("absoluteProximity:0.1 proximity:1","a b y y y y y y y y y y y y y y y","a b");
+ assertFieldMatch("segments:2 absoluteProximity:0.1 proximity:1","a b c d y y y y y y y y y y y y y y y","a b x x x x x x x x x x x x x x x x x x c d");
+ assertFieldMatch("segments:2 absoluteProximity:0.1 proximity:1","a b y y y y y y y y y y y y y y y c d","a b x x x x x x x x x x x x x x x x x x c d");
+}
+
+
+void
+Test::testFieldMatchExecutorQueryRepeats()
+{
+ // Not really handled perfectly, but good enough
+ assertFieldMatch("absoluteProximity:0.1 proximity:1 head:0 tail:0", "a a a","a");
+ assertFieldMatch("absoluteProximity:0.1 proximity:1 head:0 tail:0 gapLength:0","a a b c c","a a b c c");
+ assertFieldMatch("absoluteProximity:0.1 proximity:1 head:0 tail:0 gapLength:0","a a b c c","a b c");
+ assertFieldMatch("absoluteProximity:0.1 proximity:1 head:0 tail:0 gapLength:0","a b a b","a b a b");
+ assertFieldMatch("absoluteProximity:0.0903 proximity:0.9033 head:0 tail:0 gapLength:1","a b a b","a b x a b");
+ // Both terms take the same segment:
+ assertFieldMatch("absoluteProximity:0.1 proximity:1 segments:2 gapLength:0 head:3 tail:18","a a","x x x a x x x x x x x x x x x x x x a x x x");
+ // But not when the second is preferable
+ assertFieldMatch("absoluteProximity:0.1 proximity:1 segments:2 gapLength:0 head:3 tail:3","a b b a","x x x a b x x x x x x x x x x x x x x b a x x x");
+ assertFieldMatch("matches:2 fieldCompleteness:1","a b b b","a b");
+}
+
+
+void
+Test::testFieldMatchExecutorZeroCases()
+{
+ // Note: we have no hits -> absoluteProximity:0(0.1) proximity:0(1)
+ assertFieldMatch("absoluteProximity:0 proximity:0 matches:0","y","a");
+ assertFieldMatch("absoluteProximity:0 proximity:0 matches:0","a","x");
+ assertFieldMatch("absoluteProximity:0 proximity:0 matches:0","","x");
+ assertFieldMatch("absoluteProximity:0 proximity:0 matches:0","y","");
+ assertFieldMatch("absoluteProximity:0 proximity:0 matches:0","","");
+}
+
+
+void
+Test::testFieldMatchExecutorExceedingIterationLimit()
+{
+ // Segments found: a x x b and c d
+ {
+ fieldmatch::Params params;
+ params.setMaxAlternativeSegmentations(0);
+ assertFieldMatch("matches:4 tail:0 proximity:0.75 absoluteProximity:0.075","a b c d","a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d", &params);
+ }
+
+ // Segments found: a x b and c d
+ {
+ fieldmatch::Params params;
+ params.setMaxAlternativeSegmentations(1);
+ assertFieldMatch("matches:4 tail:0 proximity:0.855 absoluteProximity:0.0855","a b c d","a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d", &params);
+ }
+
+ // Segments found: a b and c d
+ {
+ fieldmatch::Params params;
+ params.setMaxAlternativeSegmentations(2);
+ assertFieldMatch("matches:4 tail:0 proximity:1 absoluteProximity:0.1","a b c d","a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d", &params);
+ }
+}
+
+
+void
+Test::testFieldMatchExecutorRemaining()
+{
+
+ { // test match (aka score)
+ // Ordered by decreasing match score per query
+ assertFieldMatch("score:1", "a","a");
+ assertFieldMatch("score:0.9339","a","a x");
+ assertFieldMatch("score:0", "a","x");
+ assertFieldMatch("score:0.9243","a","x a");
+ assertFieldMatch("score:0.9025","a","x a x");
+
+ assertFieldMatch("score:1", "a b","a b");
+ assertFieldMatch("score:0.9558","a b","a b x");
+ assertFieldMatch("score:0.9463","a b","x a b");
+ assertFieldMatch("score:0.1296","a b","a x x x x x x x x x x x x x x x x x x x x x x b");
+ assertFieldMatch("score:0.1288","a b","a x x x x x x x x x x x x x x x x x x x x x x x x x x x b");
+
+ assertFieldMatch("score:0.8647","a b c","x x a x b x x x x x x x x a b c x x x x x x x x c x x");
+ assertFieldMatch("score:0.861", "a b c","x x a x b x x x x x x x x x x a b c x x x x x x c x x");
+ assertFieldMatch("score:0.4869","a b c","a b x x x x x x x x x x x x x x x x x x x x x x c x x");
+ assertFieldMatch("score:0.4853","a b c","x x a x b x x x x x x x x x x b a c x x x x x x c x x");
+ assertFieldMatch("score:0.3621","a b c","a x b x x x x x x x x x x x x x x x x x x x x x c x x");
+ assertFieldMatch("score:0.3619","a b c","x x a x b x x x x x x x x x x x x x x x x x x x c x x");
+ assertFieldMatch("score:0.3584","a b c","x x a x b x x x x x x x x x x x x x x x x x x x x x c");
+ assertFieldMatch("score:0.3421","a b c","x x a x b x x x x x x x x x x x x x x x x x x x x x x");
+
+ assertFieldMatch("score:0.3474","a b c","x x a x b x x x x x x x x x x x x x x b x x x b x b x");
+ }
+
+ { // test repeated match
+ // gap==1 caused by finding two possible segments due to repeated matching
+ assertFieldMatch("fieldCompleteness:1 queryCompleteness:0.6667 segments:1 earliness:1 gaps:1",
+ "pizza hut pizza","pizza hut");
+ }
+
+ //------------------- extra tests -------------------//
+
+ { // test with a query on an attribute field
+ LOG(info, "Query on an attribute field");
+ vespalib::string feature = "fieldMatch(foo)";
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar");
+ ft.getQueryEnv().getBuilder().addAttributeNode("bar");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(toRankResult(feature, "score:0")));
+ }
+
+
+ { // test with query on another index field as well
+ LOG(info, "Query on an another index field");
+ FtFeatureTest ft(_factory, StringList().add("fieldMatch(foo)"));
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // search on 'foo' (0)
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("bar")); // search on 'bar' (1)
+ ASSERT_TRUE(ft.setup());
+
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+
+ // add occurrence for 'foo' with query=a
+ ASSERT_TRUE(mdb->setFieldLength("foo", 1));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 0)); // a
+
+ // add occurrence for 'bar' with query=a
+ ASSERT_TRUE(mdb->setFieldLength("bar", 2));
+ ASSERT_TRUE(mdb->addOccurence("bar", 1, 1)); // x a
+
+ ASSERT_TRUE(mdb->apply(1));
+
+ ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", "score:1 matches:1 queryCompleteness:1 fieldCompleteness:1")));
+ ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", "score:0"), 2)); // another docid -> no hit -> default values
+ }
+
+ { // search on more than one document
+ LOG(info, "Query on more than one document");
+ FtFeatureTest ft(_factory, StringList().add("fieldMatch(foo)"));
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // 'a' (0)
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // 'b' (1)
+ ASSERT_TRUE(ft.setup());
+
+ // check that we get the same results as this
+ // assertFieldMatch("score:1", "a b","a b");
+ // assertFieldMatch("score:0.9558","a b","a b x");
+ // assertFieldMatch("score:0.932", "a b","x a b");
+
+ { // docid 1: "a b"
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ ASSERT_TRUE(mdb->setFieldLength("foo", 2));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 0)); // 'a'
+ ASSERT_TRUE(mdb->addOccurence("foo", 1, 1)); // 'b'
+ ASSERT_TRUE(mdb->apply(1));
+ ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", "score:1 matches:2"), 1));
+ }
+ { // docid 2: "a b x"
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ ASSERT_TRUE(mdb->setFieldLength("foo", 3));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 0)); // 'a'
+ ASSERT_TRUE(mdb->addOccurence("foo", 1, 1)); // 'b'
+ ASSERT_TRUE(mdb->apply(1));
+ RankResult rr = toRankResult("fieldMatch(foo)", "score:0.9558 matches:2");
+ rr.setEpsilon(1e-4); // same as java tests
+ ASSERT_TRUE(ft.execute(rr, 1));
+ }
+ { // docid 3: "x a b"
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ ASSERT_TRUE(mdb->setFieldLength("foo", 3));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 1)); // 'a'
+ ASSERT_TRUE(mdb->addOccurence("foo", 1, 2)); // 'b'
+ ASSERT_TRUE(mdb->apply(2));
+ RankResult rr = toRankResult("fieldMatch(foo)", "score:0.9463 matches:2");
+ rr.setEpsilon(1e-4); // same as java tests
+ ASSERT_TRUE(ft.execute(rr, 2));
+ }
+ }
+
+ { // test where not all hits have position information
+ LOG(info, "Not all hits have position information");
+ FtFeatureTest ft(_factory, StringList().add("fieldMatch(foo)"));
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"))->setWeight(search::query::Weight(200)); // search for 'a' (termId 0)
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"))->setWeight(search::query::Weight(400)); // search for 'b' (termId 1)
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"))->setWeight(search::query::Weight(600)); // search for 'c' (termId 2)
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"))->setWeight(search::query::Weight(800)); // search for 'd' (termId 3)
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("bar"))->setWeight(search::query::Weight(1000)); // search for 'e' (termId 4)
+ ASSERT_TRUE(ft.setup());
+
+ assertFieldMatch("score:0.3389 completeness:0.5083 degradedMatches:0", "a b c d", "x a b");
+
+ // field: x a b
+ { // no pos occ for term b -> score is somewhat degraded (lower .occurrence)
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ // add occurrence with query term 'a'
+ ASSERT_TRUE(mdb->setFieldLength("foo", 3));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 1));
+ // add hit with query term 'b'
+ mdb->getTermFieldMatchData(1, 0)->reset(1);
+ ASSERT_TRUE(mdb->apply(1));
+ ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)",
+ "score:0.3231 completeness:0.5083 queryCompleteness:0.5 weight:0.2 matches:2 degradedMatches:1").
+ setEpsilon(1e-4)));
+ }
+ { // no pos occ for term a & b
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ // add hit with query term 'a'
+ mdb->getTermFieldMatchData(0, 0)->reset(1);
+ // add hit with query term 'b'
+ mdb->getTermFieldMatchData(1, 0)->reset(1);
+ ASSERT_TRUE(mdb->apply(1));
+ ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)",
+ "score:0 completeness:0.475 queryCompleteness:0.5 weight:0.2 matches:2 degradedMatches:2").
+ setEpsilon(1e-4)));
+ }
+ }
+
+ { // invalid field length
+ LOG(info, "We have an invalid field length");
+ FtFeatureTest ft(_factory, StringList().add("fieldMatch(foo)"));
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"))->setWeight(search::query::Weight(100)); // search for 'a' (termId 0)
+ ASSERT_TRUE(ft.setup());
+
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+
+ // add occurrence with query term 'a'
+ ASSERT_TRUE(mdb->setFieldLength("foo", search::fef::FieldPositionsIterator::UNKNOWN_LENGTH)); // invalid field length
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 10));
+
+ ASSERT_TRUE(mdb->apply(1));
+
+ ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", "score:0 matches:1 degradedMatches:0")));
+ }
+
+ { // test default values when we do not have hits in the field
+ LOG(info, "Default values when we have no hits");
+ FtFeatureTest ft(_factory, StringList().add("fieldMatch(foo)"));
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // search on 'foo' (0)
+ ASSERT_TRUE(ft.setup());
+
+ // must create this so that term match data is configured with the term data object
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+
+ RankResult rr = toRankResult("fieldMatch(foo)",
+ "score:0 "
+ "proximity:0 "
+ "completeness:0 "
+ "queryCompleteness:0 "
+ "fieldCompleteness:0 "
+ "orderness:0 "
+ "relatedness:0 "
+ "earliness:0 "
+ "longestSequenceRatio:0 "
+ "segmentProximity:0 "
+ "unweightedProximity:0 "
+ "absoluteProximity:0 "
+ "occurrence:0 "
+ "absoluteOccurrence:0 "
+ "weightedOccurrence:0 "
+ "weightedAbsoluteOccurrence:0 "
+ "significantOccurrence:0 "
+ "weight:0 "
+ "significance:0 "
+ "importance:0 "
+ "segments:0 "
+ "matches:0 "
+ "outOfOrder:0 "
+ "gaps:0 "
+ "gapLength:0 "
+ "longestSequence:0 "
+ "head:0 "
+ "tail:0 "
+ "segmentDistance:0 ")
+ .setEpsilon(10e-6);
+
+ ASSERT_TRUE(ft.execute(rr, 1)); // another docid -> no hit -> default values
+ }
+}
diff --git a/searchlib/src/tests/features/prod_features_fieldtermmatch.cpp b/searchlib/src/tests/features/prod_features_fieldtermmatch.cpp
new file mode 100644
index 00000000000..04caadd2029
--- /dev/null
+++ b/searchlib/src/tests/features/prod_features_fieldtermmatch.cpp
@@ -0,0 +1,113 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".prod_features_fieldtermmatch");
+
+#include "prod_features.h"
+#include <vespa/searchlib/features/fieldtermmatchfeature.h>
+
+using namespace search::features;
+using namespace search::fef;
+using namespace search::fef::test;
+
+void
+Test::testFieldTermMatch()
+{
+ {
+ // Test blueprint.
+ FieldTermMatchBlueprint pt;
+ {
+ EXPECT_TRUE(assertCreateInstance(pt, "fieldTermMatch"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params);
+ FT_SETUP_FAIL(pt, params.add("foo"));
+ FT_SETUP_FAIL(pt, params.add("0"));
+ FT_SETUP_FAIL(pt, params.add("1"));
+ params.clear();
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ FT_SETUP_FAIL(pt, ie, params.add("foo"));
+ FT_SETUP_OK (pt, ie, params.add("0"), in,
+ out.add("firstPosition")
+ .add("lastPosition")
+ .add("occurrences").add("weight").add("exactness"));
+ FT_SETUP_FAIL(pt, ie, params.add("1"));
+ }
+ {
+ FT_DUMP_EMPTY(_factory, "fieldTermMatch");
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ FT_DUMP_EMPTY(_factory, "fieldTermMatch", ie); // must be an index field
+
+ StringList dump;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+ for (uint32_t term = 0; term < 5; ++term) {
+ vespalib::string bn = vespalib::make_string("fieldTermMatch(bar,%u)", term);
+ dump.add(bn + ".firstPosition").add(bn + ".occurrences").add(bn + ".weight");
+ }
+ FT_DUMP(_factory, "fieldTermMatch", ie, dump);
+
+ ie.getProperties().add("fieldTermMatch.numTerms", "0");
+ FT_DUMP_EMPTY(_factory, "fieldTermMatch", ie);
+
+ ie.getProperties().add("fieldTermMatch.numTerms.bar", "5");
+ FT_DUMP(_factory, "fieldTermMatch", ie, dump);
+ }
+ }
+
+ { // Test executor.
+ FtFeatureTest ft(_factory, "fieldTermMatch(foo,0)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ASSERT_TRUE(ft.setup());
+ RankResult exp;
+ exp .addScore("fieldTermMatch(foo,0).firstPosition", 1000000)
+ .addScore("fieldTermMatch(foo,0).lastPosition", 1000000)
+ .addScore("fieldTermMatch(foo,0).occurrences", 0)
+ .addScore("fieldTermMatch(foo,0).weight", 0)
+ .addScore("fieldTermMatch(foo,0).exactness", 0);
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ {
+ // Test executor.
+ FtFeatureTest ft(_factory, "fieldTermMatch(foo,0)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+
+ search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ ASSERT_TRUE(mdb->setFieldLength("foo", 100));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 10));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 20));
+ ASSERT_TRUE(mdb->apply(1));
+
+ search::fef::test::RankResult exp;
+ exp .addScore("fieldTermMatch(foo,0).firstPosition", 10)
+ .addScore("fieldTermMatch(foo,0).lastPosition", 20)
+ .addScore("fieldTermMatch(foo,0).occurrences", 2)
+ .addScore("fieldTermMatch(foo,0).weight", 2)
+ .addScore("fieldTermMatch(foo,0).exactness", 1);
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ {
+ // Test executor (match without position information)
+ FtFeatureTest ft(_factory, "fieldTermMatch(foo,0)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"));
+ ASSERT_TRUE(ft.setup());
+
+ // make sure the term match data is initialized with the term data
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->getTermFieldMatchData(0, 0)->reset(1);
+
+ search::fef::test::RankResult exp;
+ exp .addScore("fieldTermMatch(foo,0).firstPosition", 1000000)
+ .addScore("fieldTermMatch(foo,0).lastPosition", 1000000)
+ .addScore("fieldTermMatch(foo,0).occurrences", 1)
+ .addScore("fieldTermMatch(foo,0).weight", 0)
+ .addScore("fieldTermMatch(foo,0).exactness", 0);
+ ASSERT_TRUE(ft.execute(exp));
+ }
+}
diff --git a/searchlib/src/tests/features/prod_features_framework.cpp b/searchlib/src/tests/features/prod_features_framework.cpp
new file mode 100644
index 00000000000..5ce5e2c3177
--- /dev/null
+++ b/searchlib/src/tests/features/prod_features_framework.cpp
@@ -0,0 +1,174 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".prod_features_framework");
+
+#include "prod_features.h"
+#include <vespa/searchlib/features/valuefeature.h>
+
+using namespace search::features;
+using namespace search::fef;
+using namespace search::fef::test;
+
+void
+Test::testFramework()
+{
+ LOG(info, "testFramework()");
+ IndexEnvironment indexEnv;
+ { // test index environment builder
+ IndexEnvironmentBuilder ieb(indexEnv);
+ ieb.addField(FieldType::INDEX, CollectionType::SINGLE, "foo")
+ .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar")
+ .addField(FieldType::INDEX, CollectionType::ARRAY, "baz");
+ {
+ const FieldInfo * info = indexEnv.getFieldByName("foo");
+ ASSERT_TRUE(info != NULL);
+ EXPECT_EQUAL(info->id(), 0u);
+ EXPECT_TRUE(info->type() == FieldType::INDEX);
+ EXPECT_TRUE(info->collection() == CollectionType::SINGLE);
+ }
+ {
+ const FieldInfo * info = indexEnv.getFieldByName("bar");
+ ASSERT_TRUE(info != NULL);
+ EXPECT_EQUAL(info->id(), 1u);
+ EXPECT_TRUE(info->type() == FieldType::ATTRIBUTE);
+ EXPECT_TRUE(info->collection() == CollectionType::WEIGHTEDSET);
+ }
+ {
+ const FieldInfo * info = indexEnv.getFieldByName("baz");
+ ASSERT_TRUE(info != NULL);
+ EXPECT_EQUAL(info->id(), 2u);
+ EXPECT_TRUE(info->type() == FieldType::INDEX);
+ EXPECT_TRUE(info->collection() == CollectionType::ARRAY);
+ }
+ ASSERT_TRUE(indexEnv.getFieldByName("qux") == NULL);
+ }
+
+ QueryEnvironment queryEnv(&indexEnv);
+ MatchDataLayout layout;
+ { // test query environment builder
+ QueryEnvironmentBuilder qeb(queryEnv, layout);
+ {
+ SimpleTermData &tr = qeb.addAllFields();
+ ASSERT_TRUE(tr.lookupField(0) != 0);
+ ASSERT_TRUE(tr.lookupField(1) != 0);
+ ASSERT_TRUE(tr.lookupField(2) != 0);
+ EXPECT_TRUE(tr.lookupField(3) == 0);
+ EXPECT_TRUE(tr.lookupField(0)->getHandle() == 0u);
+ EXPECT_TRUE(tr.lookupField(1)->getHandle() == 1u);
+ EXPECT_TRUE(tr.lookupField(2)->getHandle() == 2u);
+ const ITermData *tp = queryEnv.getTerm(0);
+ ASSERT_TRUE(tp != NULL);
+ EXPECT_EQUAL(tp, &tr);
+ }
+ {
+ SimpleTermData *tr = qeb.addAttributeNode("bar");
+ ASSERT_TRUE(tr != 0);
+ ASSERT_TRUE(tr->lookupField(1) != 0);
+ EXPECT_TRUE(tr->lookupField(0) == 0);
+ EXPECT_TRUE(tr->lookupField(2) == 0);
+ EXPECT_TRUE(tr->lookupField(3) == 0);
+ EXPECT_TRUE(tr->lookupField(1)->getHandle() == 3u);
+ const ITermData *tp = queryEnv.getTerm(1);
+ ASSERT_TRUE(tp != NULL);
+ EXPECT_EQUAL(tp, tr);
+ }
+ }
+
+ MatchData::UP data = layout.createMatchData();
+ EXPECT_EQUAL(data->getNumTermFields(), 4u);
+ EXPECT_EQUAL(data->getNumFeatures(), 0u);
+
+ { // check match data access
+ MatchDataBuilder mdb(queryEnv, *data);
+
+ // setup some occurence lists
+ ASSERT_TRUE(mdb.addOccurence("foo", 0, 20));
+ ASSERT_TRUE(mdb.addOccurence("foo", 0, 10));
+ ASSERT_TRUE(mdb.setFieldLength("foo", 50));
+ ASSERT_TRUE(mdb.addOccurence("baz", 0, 15));
+ ASSERT_TRUE(mdb.addOccurence("baz", 0, 5));
+ ASSERT_TRUE(mdb.setFieldLength("baz", 100));
+ ASSERT_TRUE(mdb.apply(100));
+
+ {
+ {
+ TermFieldMatchData *tfmd = mdb.getTermFieldMatchData(0, 0);
+ ASSERT_TRUE(tfmd != NULL);
+
+ FieldPositionsIterator itr = tfmd->getIterator(); // foo (index)
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(itr.getFieldLength(), 50u);
+ EXPECT_EQUAL(itr.getPosition(), 10u);
+ itr.next();
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(itr.getPosition(), 20u);
+ itr.next();
+ ASSERT_TRUE(!itr.valid());
+ }
+ {
+ TermFieldMatchData *tfmd = mdb.getTermFieldMatchData(0, 1);
+ ASSERT_TRUE(tfmd != NULL);
+
+ FieldPositionsIterator itr = tfmd->getIterator(); // bar (attribute)
+ ASSERT_TRUE(!itr.valid());
+ }
+ {
+ TermFieldMatchData *tfmd = mdb.getTermFieldMatchData(0, 2);
+ ASSERT_TRUE(tfmd != NULL);
+
+ FieldPositionsIterator itr = tfmd->getIterator(); // baz (index)
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(itr.getFieldLength(), 100u);
+ EXPECT_EQUAL(itr.getPosition(), 5u);
+ itr.next();
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(itr.getPosition(), 15u);
+ itr.next();
+ ASSERT_TRUE(!itr.valid());
+ }
+ }
+ {
+ TermFieldMatchData *tfmd = mdb.getTermFieldMatchData(1, 1);
+ ASSERT_TRUE(tfmd != NULL);
+
+ FieldPositionsIterator itr = tfmd->getIterator(); // bar (attribute)
+ ASSERT_TRUE(!itr.valid());
+ }
+ }
+ { // check that data is cleared
+ MatchDataBuilder mdb(queryEnv, *data);
+ EXPECT_EQUAL(mdb.getTermFieldMatchData(0, 0)->getDocId(), TermFieldMatchData::invalidId());
+ EXPECT_EQUAL(mdb.getTermFieldMatchData(0, 1)->getDocId(), TermFieldMatchData::invalidId());
+ EXPECT_EQUAL(mdb.getTermFieldMatchData(0, 2)->getDocId(), TermFieldMatchData::invalidId());
+ EXPECT_EQUAL(mdb.getTermFieldMatchData(1, 1)->getDocId(), TermFieldMatchData::invalidId());
+
+ // test illegal things
+ ASSERT_TRUE(!mdb.addOccurence("foo", 1, 10)); // invalid term/field combination
+ }
+
+ BlueprintFactory factory;
+ factory.addPrototype(Blueprint::SP(new ValueBlueprint()));
+ Properties overrides;
+
+ { // test feature test runner
+ FeatureTest ft(factory, indexEnv, queryEnv, layout,
+ StringList().add("value(10)").add("value(20)").add("value(30)"), overrides);
+ MatchDataBuilder::UP mdb1 = ft.createMatchDataBuilder();
+ EXPECT_TRUE(mdb1.get() == NULL);
+ EXPECT_TRUE(!ft.execute(RankResult().addScore("value(10)", 10.0f)));
+ ASSERT_TRUE(ft.setup());
+ MatchDataBuilder::UP mdb2 = ft.createMatchDataBuilder();
+ EXPECT_TRUE(mdb2.get() != NULL);
+
+ EXPECT_TRUE(ft.execute(RankResult().addScore("value(10)", 10.0f).addScore("value(20)", 20.0f)));
+ EXPECT_TRUE(!ft.execute(RankResult().addScore("value(10)", 20.0f)));
+ EXPECT_TRUE(!ft.execute(RankResult().addScore("value(5)", 5.0f)));
+ }
+ { // test simple constructor
+ MatchDataLayout mdl; // match data layout cannot be reused
+ FeatureTest ft(factory, indexEnv, queryEnv, mdl, "value(10)", overrides);
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(10.0f));
+ }
+}
diff --git a/searchlib/src/tests/features/prod_features_test.sh b/searchlib/src/tests/features/prod_features_test.sh
new file mode 100755
index 00000000000..bec2b49807f
--- /dev/null
+++ b/searchlib/src/tests/features/prod_features_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+VESPA_LOG_TARGET=file:vlog2.txt $VALGRIND ./searchlib_prod_features_test_app
+rm -rf *.dat
diff --git a/searchlib/src/tests/features/ranking_expression/.gitignore b/searchlib/src/tests/features/ranking_expression/.gitignore
new file mode 100644
index 00000000000..63ab51e663a
--- /dev/null
+++ b/searchlib/src/tests/features/ranking_expression/.gitignore
@@ -0,0 +1 @@
+searchlib_ranking_expression_test_app
diff --git a/searchlib/src/tests/features/ranking_expression/CMakeLists.txt b/searchlib/src/tests/features/ranking_expression/CMakeLists.txt
new file mode 100644
index 00000000000..4caddaa7bd8
--- /dev/null
+++ b/searchlib/src/tests/features/ranking_expression/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_ranking_expression_test_app
+ SOURCES
+ ranking_expression_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_ranking_expression_test_app COMMAND searchlib_ranking_expression_test_app)
diff --git a/searchlib/src/tests/features/ranking_expression/ranking_expression_test.cpp b/searchlib/src/tests/features/ranking_expression/ranking_expression_test.cpp
new file mode 100644
index 00000000000..64fb3477951
--- /dev/null
+++ b/searchlib/src/tests/features/ranking_expression/ranking_expression_test.cpp
@@ -0,0 +1,90 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+
+#include <vespa/vespalib/eval/value_type.h>
+#include <vespa/searchlib/fef/feature_type.h>
+#include <vespa/searchlib/fef/featurenameparser.h>
+#include <vespa/searchlib/features/rankingexpressionfeature.h>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+
+using namespace search::features;
+using namespace search::fef::test;
+using namespace search::fef;
+using namespace vespalib::eval;
+
+using TypeMap = std::map<vespalib::string,vespalib::string>;
+
+struct SetupResult {
+ IndexEnvironment index_env;
+ RankingExpressionBlueprint rank;
+ DummyDependencyHandler deps;
+ bool setup_ok;
+ SetupResult(const TypeMap &object_inputs,
+ const vespalib::string &expression)
+ : index_env(), rank(), deps(rank), setup_ok(false)
+ {
+ rank.setName("self");
+ index_env.getProperties().add("self.rankingScript", expression);
+ for (const auto &input: object_inputs) {
+ deps.define_object_input(input.first, ValueType::from_spec(input.second));
+ }
+ setup_ok = rank.setup(index_env, {});
+ EXPECT_TRUE(!deps.accept_type_mismatch);
+ }
+};
+
+void verify_output_type(const TypeMap &object_inputs,
+ const vespalib::string &expression, const FeatureType &expect)
+{
+ SetupResult result(object_inputs, expression);
+ EXPECT_TRUE(result.setup_ok);
+ EXPECT_EQUAL(1u, result.deps.output.size());
+ ASSERT_EQUAL(1u, result.deps.output_type.size());
+ if (expect.is_object()) {
+ EXPECT_EQUAL(expect.type(), result.deps.output_type[0].type());
+ } else {
+ EXPECT_TRUE(!result.deps.output_type[0].is_object());
+ }
+}
+
+void verify_setup_fail(const TypeMap &object_inputs,
+ const vespalib::string &expression)
+{
+ SetupResult result(object_inputs, expression);
+ EXPECT_TRUE(!result.setup_ok);
+ EXPECT_EQUAL(0u, result.deps.output.size());
+}
+
+TEST("require that expression with only number inputs produce number output (compiled)") {
+ TEST_DO(verify_output_type({}, "a*b", FeatureType::number()));
+}
+
+TEST("require that expression with object input produces object output (interpreted)") {
+ TEST_DO(verify_output_type({{"b", "double"}}, "a*b", FeatureType::object(ValueType::double_type())));
+}
+
+TEST("require that expression with internal tensor operations produce object output (interpreted)") {
+ TEST_DO(verify_output_type({}, "a*b*sum({{x:1}:5,{x:2}:7})", FeatureType::object(ValueType::double_type())));
+}
+
+TEST("require that ranking expression can resolve to concrete complex type") {
+ TEST_DO(verify_output_type({{"a", "tensor(x{},y{})"}, {"b", "tensor(y{},z{})"}}, "a*b",
+ FeatureType::object(ValueType::from_spec("tensor(x{},y{},z{})"))));
+}
+
+TEST("require that ranking expression can resolve to abstract complex type") {
+ TEST_DO(verify_output_type({{"a", "tensor"}}, "a*b", FeatureType::object(ValueType::from_spec("tensor"))));
+}
+
+TEST("require that ranking expression can resolve to 'any' type") {
+ TEST_DO(verify_output_type({{"a", "tensor(x{},y{})"}, {"b", "tensor"}}, "a*b",
+ FeatureType::object(ValueType::from_spec("any"))));
+}
+
+TEST("require that setup fails for incompatible types") {
+ TEST_DO(verify_setup_fail({{"a", "tensor(x{},y{})"}, {"b", "tensor(y[10],z{})"}}, "a*b"));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/features/raw_score/.gitignore b/searchlib/src/tests/features/raw_score/.gitignore
new file mode 100644
index 00000000000..a1b2d4e3f16
--- /dev/null
+++ b/searchlib/src/tests/features/raw_score/.gitignore
@@ -0,0 +1 @@
+searchlib_raw_score_test_app
diff --git a/searchlib/src/tests/features/raw_score/CMakeLists.txt b/searchlib/src/tests/features/raw_score/CMakeLists.txt
new file mode 100644
index 00000000000..a672b7b071d
--- /dev/null
+++ b/searchlib/src/tests/features/raw_score/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_raw_score_test_app
+ SOURCES
+ raw_score_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_raw_score_test_app COMMAND searchlib_raw_score_test_app)
diff --git a/searchlib/src/tests/features/raw_score/FILES b/searchlib/src/tests/features/raw_score/FILES
new file mode 100644
index 00000000000..479927259ee
--- /dev/null
+++ b/searchlib/src/tests/features/raw_score/FILES
@@ -0,0 +1 @@
+raw_score_test.cpp
diff --git a/searchlib/src/tests/features/raw_score/raw_score_test.cpp b/searchlib/src/tests/features/raw_score/raw_score_test.cpp
new file mode 100644
index 00000000000..0a15ff69318
--- /dev/null
+++ b/searchlib/src/tests/features/raw_score/raw_score_test.cpp
@@ -0,0 +1,151 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/features/raw_score_feature.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+
+using search::feature_t;
+using namespace search::fef;
+using namespace search::fef::test;
+using namespace search::features;
+
+const std::string featureName("rawScore(foo)");
+
+struct BlueprintFactoryFixture {
+ BlueprintFactory factory;
+ BlueprintFactoryFixture() : factory()
+ {
+ setup_search_features(factory);
+ }
+};
+
+struct IndexFixture {
+ IndexEnvironment indexEnv;
+ IndexFixture() : indexEnv()
+ {
+ IndexEnvironmentBuilder builder(indexEnv);
+ builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar");
+ }
+};
+
+struct FeatureDumpFixture : public IDumpFeatureVisitor {
+ virtual void visitDumpFeature(const vespalib::string &) {
+ TEST_ERROR("no features should be dumped");
+ }
+ FeatureDumpFixture() : IDumpFeatureVisitor() {}
+};
+
+struct RankFixture : BlueprintFactoryFixture, IndexFixture {
+ QueryEnvironment queryEnv;
+ RankSetup rankSetup;
+ RankProgram::UP rankProgram;
+ MatchDataLayout mdl;
+ std::vector<TermFieldHandle> fooHandles;
+ std::vector<TermFieldHandle> barHandles;
+ RankFixture(size_t fooCnt, size_t barCnt)
+ : queryEnv(&indexEnv), rankSetup(factory, indexEnv),
+ rankProgram(), mdl(), fooHandles(), barHandles()
+ {
+ for (size_t i = 0; i < fooCnt; ++i) {
+ uint32_t fieldId = indexEnv.getFieldByName("foo")->id();
+ fooHandles.push_back(mdl.allocTermField(fieldId));
+ SimpleTermData term;
+ term.addField(fieldId).setHandle(fooHandles.back());
+ queryEnv.getTerms().push_back(term);
+ }
+ for (size_t i = 0; i < barCnt; ++i) {
+ uint32_t fieldId = indexEnv.getFieldByName("bar")->id();
+ barHandles.push_back(mdl.allocTermField(fieldId));
+ SimpleTermData term;
+ term.addField(fieldId).setHandle(barHandles.back());
+ queryEnv.getTerms().push_back(term);
+ }
+ rankSetup.setFirstPhaseRank(featureName);
+ rankSetup.setIgnoreDefaultRankFeatures(true);
+ ASSERT_TRUE(rankSetup.compile());
+ rankProgram = rankSetup.create_first_phase_program();
+ rankProgram->setup(mdl, queryEnv);
+ }
+ feature_t getScore(uint32_t docId) {
+ rankProgram->run(docId);
+ return *Utils::getScoreFeature(*rankProgram);
+ }
+ void setScore(TermFieldHandle handle, uint32_t docId, feature_t score) {
+ rankProgram->match_data().resolveTermField(handle)->setRawScore(docId, score);
+ }
+ void setFooScore(uint32_t i, uint32_t docId, feature_t score) {
+ ASSERT_LESS(i, fooHandles.size());
+ setScore(fooHandles[i], docId, score);
+ }
+ void setBarScore(uint32_t i, uint32_t docId, feature_t score) {
+ ASSERT_LESS(i, barHandles.size());
+ setScore(barHandles[i], docId, score);
+ }
+};
+
+TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) {
+ Blueprint::SP bp = f.factory.createBlueprint("rawScore");
+ EXPECT_TRUE(bp.get() != 0);
+ EXPECT_TRUE(dynamic_cast<RawScoreBlueprint*>(bp.get()) != 0);
+}
+
+TEST_FFF("require that no features are dumped", RawScoreBlueprint, IndexFixture, FeatureDumpFixture) {
+ f1.visitDumpFeatures(f2.indexEnv, f3);
+}
+
+TEST_FF("require that setup can be done on index field", RawScoreBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str()));
+ EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "foo")));
+}
+
+TEST_FF("require that setup can be done on attribute field", RawScoreBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(bar)", f1.getBaseName().c_str()));
+ EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "bar")));
+}
+
+TEST_FF("require that setup fails for unknown field", RawScoreBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(unknown)", f1.getBaseName().c_str()));
+ EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "unknown")));
+}
+
+TEST_F("require that not searching a filed will give it 0.0 raw score", RankFixture(0, 3)) {
+ EXPECT_EQUAL(0.0, f1.getScore(10));
+}
+
+TEST_F("require that raw score can be obtained", RankFixture(1, 0)) {
+ f1.setFooScore(0, 10, 5.0);
+ EXPECT_EQUAL(5.0, f1.getScore(10));
+}
+
+TEST_F("require that multiple raw scores are accumulated", RankFixture(3, 0)) {
+ f1.setFooScore(0, 10, 1.0);
+ f1.setFooScore(1, 10, 2.0);
+ f1.setFooScore(2, 10, 3.0);
+ EXPECT_EQUAL(6.0, f1.getScore(10));
+}
+
+TEST_F("require that stale raw scores are ignored", RankFixture(3, 0)) {
+ f1.setFooScore(0, 10, 1.0);
+ f1.setFooScore(1, 9, 2.0);
+ f1.setFooScore(2, 10, 3.0);
+ EXPECT_EQUAL(4.0, f1.getScore(10));
+}
+
+TEST_F("require that raw scores from other fields are ignored", RankFixture(2, 2)) {
+ f1.setFooScore(0, 10, 1.0);
+ f1.setFooScore(1, 10, 2.0);
+ f1.setBarScore(0, 10, 5.0);
+ f1.setBarScore(1, 10, 6.0);
+ EXPECT_EQUAL(3.0, f1.getScore(10));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/features/subqueries/.gitignore b/searchlib/src/tests/features/subqueries/.gitignore
new file mode 100644
index 00000000000..63dc19177d1
--- /dev/null
+++ b/searchlib/src/tests/features/subqueries/.gitignore
@@ -0,0 +1 @@
+searchlib_subqueries_test_app
diff --git a/searchlib/src/tests/features/subqueries/CMakeLists.txt b/searchlib/src/tests/features/subqueries/CMakeLists.txt
new file mode 100644
index 00000000000..45845e8ec1b
--- /dev/null
+++ b/searchlib/src/tests/features/subqueries/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_subqueries_test_app
+ SOURCES
+ subqueries_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_subqueries_test_app COMMAND searchlib_subqueries_test_app)
diff --git a/searchlib/src/tests/features/subqueries/subqueries_test.cpp b/searchlib/src/tests/features/subqueries/subqueries_test.cpp
new file mode 100644
index 00000000000..160ec404b20
--- /dev/null
+++ b/searchlib/src/tests/features/subqueries/subqueries_test.cpp
@@ -0,0 +1,162 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/features/subqueries_feature.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+
+using search::feature_t;
+using namespace search::fef;
+using namespace search::fef::test;
+using namespace search::features;
+
+struct BlueprintFactoryFixture {
+ BlueprintFactory factory;
+ BlueprintFactoryFixture() : factory()
+ {
+ setup_search_features(factory);
+ }
+};
+
+struct IndexFixture {
+ IndexEnvironment indexEnv;
+ IndexFixture() : indexEnv()
+ {
+ IndexEnvironmentBuilder builder(indexEnv);
+ builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar");
+ }
+};
+
+struct FeatureDumpFixture : public IDumpFeatureVisitor {
+ virtual void visitDumpFeature(const vespalib::string &) {
+ TEST_ERROR("no features should be dumped");
+ }
+ FeatureDumpFixture() : IDumpFeatureVisitor() {}
+};
+
+struct RankFixture : BlueprintFactoryFixture, IndexFixture {
+ QueryEnvironment queryEnv;
+ RankSetup rankSetup;
+ RankProgram::UP rankProgram;
+ MatchDataLayout mdl;
+ std::vector<TermFieldHandle> fooHandles;
+ std::vector<TermFieldHandle> barHandles;
+ RankFixture(size_t fooCnt, size_t barCnt,
+ std::string featureName = "subqueries(foo)")
+ : queryEnv(&indexEnv), rankSetup(factory, indexEnv),
+ rankProgram(), mdl(), fooHandles(), barHandles()
+ {
+ fooHandles = addFields(fooCnt, indexEnv.getFieldByName("foo")->id());
+ barHandles = addFields(barCnt, indexEnv.getFieldByName("bar")->id());
+ rankSetup.setFirstPhaseRank(featureName);
+ rankSetup.setIgnoreDefaultRankFeatures(true);
+ ASSERT_TRUE(rankSetup.compile());
+ rankProgram = rankSetup.create_first_phase_program();
+ rankProgram->setup(mdl, queryEnv);
+ }
+ std::vector<TermFieldHandle> addFields(size_t count, uint32_t fieldId) {
+ std::vector<TermFieldHandle> handles;
+ for (size_t i = 0; i < count; ++i) {
+ handles.push_back(mdl.allocTermField(fieldId));
+ SimpleTermData term;
+ term.addField(fieldId).setHandle(handles.back());
+ queryEnv.getTerms().push_back(term);
+ }
+ return handles;
+ }
+ feature_t getSubqueries(uint32_t docId) {
+ rankProgram->run(docId);
+ return *Utils::getScoreFeature(*rankProgram);
+ }
+ void setSubqueries(TermFieldHandle handle, uint32_t docId,
+ uint64_t subqueries) {
+ rankProgram->match_data().resolveTermField(handle)->setSubqueries(docId, subqueries);
+ }
+ void setFooSubqueries(uint32_t i, uint32_t docId, uint64_t subqueries) {
+ ASSERT_LESS(i, fooHandles.size());
+ setSubqueries(fooHandles[i], docId, subqueries);
+ }
+ void setBarSubqueries(uint32_t i, uint32_t docId, uint64_t subqueries) {
+ ASSERT_LESS(i, barHandles.size());
+ setSubqueries(barHandles[i], docId, subqueries);
+ }
+};
+
+TEST_F("require that blueprint can be created from factory",
+ BlueprintFactoryFixture) {
+ Blueprint::SP bp = f.factory.createBlueprint("subqueries");
+ EXPECT_TRUE(bp.get() != 0);
+ EXPECT_TRUE(dynamic_cast<SubqueriesBlueprint*>(bp.get()) != 0);
+}
+
+TEST_FFF("require that no features are dumped",
+ SubqueriesBlueprint, IndexFixture, FeatureDumpFixture) {
+ f1.visitDumpFeatures(f2.indexEnv, f3);
+}
+
+TEST_FF("require that setup can be done on index field",
+ SubqueriesBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str()));
+ EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, {"foo"}));
+}
+
+TEST_FF("require that setup can be done on attribute field",
+ SubqueriesBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(bar)", f1.getBaseName().c_str()));
+ EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, {"bar"}));
+}
+
+TEST_FF("require that setup fails for unknown field",
+ SubqueriesBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(unknown)", f1.getBaseName().c_str()));
+ EXPECT_FALSE(((Blueprint&)f1).setup(f2.indexEnv, {"unknown"}));
+}
+
+TEST_F("require that not searching a field will give it 0 subqueries",
+ RankFixture(0, 3)) {
+ EXPECT_EQUAL(0, f1.getSubqueries(10));
+}
+
+TEST_F("require that subqueries can be obtained", RankFixture(1, 0)) {
+ f1.setFooSubqueries(0, 10, 0x1234);
+ EXPECT_EQUAL(0x1234, f1.getSubqueries(10));
+}
+
+TEST_F("require that msb subqueries can be obtained",
+ RankFixture(1, 0, "subqueries(foo).msb")) {
+ f1.setFooSubqueries(0, 10, 0x123412345678ULL);
+ EXPECT_EQUAL(0x1234, f1.getSubqueries(10));
+}
+
+TEST_F("require that multiple subqueries are accumulated", RankFixture(3, 0)) {
+ f1.setFooSubqueries(0, 10, 1);
+ f1.setFooSubqueries(1, 10, 2);
+ f1.setFooSubqueries(2, 10, 4);
+ EXPECT_EQUAL(7, f1.getSubqueries(10));
+}
+
+TEST_F("require that stale subqueries are ignored", RankFixture(3, 0)) {
+ f1.setFooSubqueries(0, 10, 1);
+ f1.setFooSubqueries(1, 9, 2);
+ f1.setFooSubqueries(2, 10, 4);
+ EXPECT_EQUAL(5, f1.getSubqueries(10));
+}
+
+TEST_F("require that subqueries from other fields are ignored",
+ RankFixture(2, 2)) {
+ f1.setFooSubqueries(0, 10, 1);
+ f1.setFooSubqueries(1, 10, 2);
+ f1.setBarSubqueries(0, 10, 4);
+ f1.setBarSubqueries(1, 10, 8);
+ EXPECT_EQUAL(3, f1.getSubqueries(10));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/features/tensor/.gitignore b/searchlib/src/tests/features/tensor/.gitignore
new file mode 100644
index 00000000000..ae6d6dfb414
--- /dev/null
+++ b/searchlib/src/tests/features/tensor/.gitignore
@@ -0,0 +1 @@
+searchlib_tensor_test_app
diff --git a/searchlib/src/tests/features/tensor/CMakeLists.txt b/searchlib/src/tests/features/tensor/CMakeLists.txt
new file mode 100644
index 00000000000..33f7d44d8fe
--- /dev/null
+++ b/searchlib/src/tests/features/tensor/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_tensor_test_app
+ SOURCES
+ tensor_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_tensor_test_app COMMAND searchlib_tensor_test_app)
diff --git a/searchlib/src/tests/features/tensor/FILES b/searchlib/src/tests/features/tensor/FILES
new file mode 100644
index 00000000000..6ece9b360b5
--- /dev/null
+++ b/searchlib/src/tests/features/tensor/FILES
@@ -0,0 +1 @@
+tensor_test.cpp
diff --git a/searchlib/src/tests/features/tensor/tensor_test.cpp b/searchlib/src/tests/features/tensor/tensor_test.cpp
new file mode 100644
index 00000000000..caceea0f47b
--- /dev/null
+++ b/searchlib/src/tests/features/tensor/tensor_test.cpp
@@ -0,0 +1,237 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/vespalib/eval/function.h>
+
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/tensorattribute.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/fef/test/ftlib.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/vespalib/tensor/tensor_factory.h>
+#include <vespa/vespalib/tensor/default_tensor.h>
+#include <vespa/vespalib/tensor/serialization/typed_binary_format.h>
+#include <vespa/searchlib/attribute/tensorattribute.h>
+#include <vespa/vespalib/eval/interpreted_function.h>
+#include <vespa/vespalib/tensor/default_tensor_engine.h>
+
+using search::feature_t;
+using namespace search::fef;
+using namespace search::fef::indexproperties;
+using namespace search::fef::test;
+using namespace search::features;
+using search::AttributeFactory;
+using search::attribute::TensorAttribute;
+using search::AttributeVector;
+using vespalib::eval::Value;
+using vespalib::eval::Function;
+using vespalib::tensor::Tensor;
+using vespalib::tensor::TensorCells;
+using vespalib::tensor::DenseTensorCells;
+using vespalib::tensor::TensorDimensions;
+using vespalib::tensor::TensorFactory;
+using vespalib::tensor::TensorType;
+using vespalib::eval::InterpretedFunction;
+using vespalib::tensor::DefaultTensorEngine;
+
+typedef search::attribute::Config AVC;
+typedef search::attribute::BasicType AVBT;
+typedef search::attribute::CollectionType AVCT;
+typedef search::AttributeVector::SP AttributePtr;
+typedef FtTestApp FTA;
+
+namespace
+{
+
+Tensor::UP createTensor(const TensorCells &cells,
+ const TensorDimensions &dimensions) {
+ vespalib::tensor::DefaultTensor::builder builder;
+ return TensorFactory::create(cells, dimensions, builder);
+}
+
+}
+
+struct ExecFixture
+{
+ BlueprintFactory factory;
+ FtFeatureTest test;
+ ExecFixture(const vespalib::string &feature)
+ : factory(),
+ test(factory, feature)
+ {
+ setup_search_features(factory);
+ setupAttributeVectors();
+ setupQueryEnvironment();
+ ASSERT_TRUE(test.setup());
+ }
+ void addAttributeField(const vespalib::string &attrName) {
+ test.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, attrName);
+ }
+ AttributeVector::SP createStringAttribute(const vespalib::string &attrName) {
+ addAttributeField(attrName);
+ return AttributeFactory::createAttribute(attrName, AVC(AVBT::STRING, AVCT::SINGLE));
+ }
+ AttributeVector::SP createTensorAttribute(const vespalib::string &attrName, const vespalib::string &type) {
+ addAttributeField(attrName);
+ AVC config(AVBT::TENSOR, AVCT::SINGLE);
+ config.setTensorType(TensorType::fromSpec(type));
+ return AttributeFactory::createAttribute(attrName, config);
+ }
+ void setAttributeTensorType(const vespalib::string &attrName, const vespalib::string &type) {
+ type::Attribute::set(test.getIndexEnv().getProperties(), attrName, type);
+ }
+ void setQueryTensorType(const vespalib::string &queryFeatureName, const vespalib::string &type) {
+ type::QueryFeature::set(test.getIndexEnv().getProperties(), queryFeatureName, type);
+ }
+ void setupAttributeVectors() {
+ std::vector<AttributePtr> attrs;
+ attrs.push_back(createTensorAttribute("tensorattr", "tensor(x{})"));
+ attrs.push_back(createStringAttribute("singlestr"));
+ attrs.push_back(createTensorAttribute("wrongtype", "tensor(y{})"));
+ addAttributeField("null");
+ setAttributeTensorType("tensorattr", "tensor(x{})");
+ setAttributeTensorType("wrongtype", "tensor(x{})");
+ setAttributeTensorType("null", "tensor(x{})");
+
+ for (const auto &attr : attrs) {
+ attr->addReservedDoc();
+ attr->addDocs(2);
+ attr->clearDoc(1);
+ attr->clearDoc(2);
+ attr->commit();
+ test.getIndexEnv().getAttributeManager().add(attr);
+ }
+
+ TensorAttribute *tensorAttr =
+ dynamic_cast<TensorAttribute *>(attrs[0].get());
+
+ tensorAttr->setTensor(1, *createTensor({ {{{"x", "a"}}, 3},
+ {{{"x", "b"}}, 5},
+ {{{"x", "c"}}, 7} },
+ { "x" }));
+
+ for (const auto &attr : attrs) {
+ attr->commit();
+ }
+ }
+ void setQueryTensor(const vespalib::string &tensorName,
+ const vespalib::string &tensorTypeSpec,
+ const TensorCells &cells,
+ const TensorDimensions &dimensions)
+ {
+ auto tensor = createTensor(cells, dimensions);
+ vespalib::nbostream stream;
+ vespalib::tensor::TypedBinaryFormat::serialize(stream, *tensor);
+ test.getQueryEnv().getProperties().add(tensorName,
+ vespalib::stringref(stream.peek(), stream.size()));
+ setQueryTensorType(tensorName, tensorTypeSpec);
+ }
+
+ void setupQueryEnvironment() {
+ setQueryTensor("tensorquery",
+ "tensor(q{})",
+ { {{{"q", "d"}}, 11 },
+ {{{"q", "e"}}, 13 },
+ {{{"q", "f"}}, 17 } },
+ { "q" });
+ setQueryTensor("mappedtensorquery",
+ "tensor(x[2])",
+ { {{{"x", "0"},{"y", "0"}}, 11 },
+ {{{"x", "0"},{"y", "1"}}, 13 },
+ {{{"x", "1"},{"y", "0"}}, 17 } },
+ { "x", "y" });
+ setQueryTensorType("null", "tensor(q{})");
+ }
+ const Tensor &extractTensor() {
+ const Value::CREF *value = test.resolveObjectFeature();
+ ASSERT_TRUE(value != nullptr);
+ ASSERT_TRUE(value->get().is_tensor());
+ return static_cast<const Tensor &>(*value->get().as_tensor());
+ }
+ const Tensor &execute(uint32_t docId = 1) {
+ test.executeOnly(docId);
+ return extractTensor();
+ }
+};
+
+struct AsTensor {
+ InterpretedFunction ifun;
+ InterpretedFunction::Context ctx;
+ const Value *result;
+ explicit AsTensor(const vespalib::string &expr)
+ : ifun(DefaultTensorEngine::ref(), Function::parse(expr)), ctx(), result(&ifun.eval(ctx))
+ {
+ ASSERT_TRUE(result->is_tensor());
+ }
+ bool operator==(const Tensor &rhs) const { return static_cast<const Tensor &>(*result->as_tensor()).equals(rhs); }
+};
+
+std::ostream &operator<<(std::ostream &os, const AsTensor &my_tensor) {
+ os << my_tensor.result->as_tensor();
+ return os;
+}
+
+TEST_F("require that tensor attribute can be extracted as tensor in attribute feature",
+ ExecFixture("attribute(tensorattr)"))
+{
+ EXPECT_EQUAL(AsTensor("{ {x:b}:5, {x:c}:7, {x:a}:3 }"), f.execute());
+}
+
+TEST_F("require that tensor from query can be extracted as tensor in query feature",
+ ExecFixture("query(tensorquery)"))
+{
+ EXPECT_EQUAL(AsTensor("{ {q:f}:17, {q:d}:11, {q:e}:13 }"), f.execute());
+}
+
+TEST_F("require that empty tensor is created if attribute does not exists",
+ ExecFixture("attribute(null)"))
+{
+ EXPECT_EQUAL(AsTensor("{ }"), f.execute());
+}
+
+TEST_F("require that empty tensor is created if tensor type is wrong",
+ ExecFixture("attribute(wrongtype)"))
+{
+ EXPECT_EQUAL(AsTensor("{ }"), f.execute());
+}
+
+TEST_F("require that empty tensor is created if query parameter is not found",
+ ExecFixture("query(null)"))
+{
+ EXPECT_EQUAL(AsTensor("{ }"), f.execute());
+}
+
+TEST_F("require that empty tensor is created if document has no tensor",
+ ExecFixture("attribute(tensorattr)")) {
+ EXPECT_EQUAL(AsTensor("{ }"), f.execute(2));
+}
+
+struct AsDenseTensor {
+ Tensor::UP tensor;
+ explicit AsDenseTensor(const DenseTensorCells &cells)
+ : tensor(TensorFactory::createDense(cells))
+ {
+ ASSERT_TRUE(!!tensor);
+ }
+ bool operator==(const Tensor &rhs) const { return tensor->equals(rhs); }
+};
+
+
+std::ostream &operator<<(std::ostream &os, const AsDenseTensor &my_tensor) {
+ os << *my_tensor.tensor;
+ return os;
+}
+
+TEST_F("require that tensor from query is mapped",
+ ExecFixture("query(mappedtensorquery)")) {
+ EXPECT_EQUAL(AsDenseTensor({ {{{"x", 0}}, 24},
+ {{{"x", 1}}, 17} }),
+ f.execute());
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/features/tensor_from_labels/.gitignore b/searchlib/src/tests/features/tensor_from_labels/.gitignore
new file mode 100644
index 00000000000..0e241941ca3
--- /dev/null
+++ b/searchlib/src/tests/features/tensor_from_labels/.gitignore
@@ -0,0 +1 @@
+searchlib_tensor_from_labels_test_app
diff --git a/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt b/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt
new file mode 100644
index 00000000000..db1814a0f66
--- /dev/null
+++ b/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_tensor_from_labels_test_app
+ SOURCES
+ tensor_from_labels_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_tensor_from_labels_test_app COMMAND searchlib_tensor_from_labels_test_app)
diff --git a/searchlib/src/tests/features/tensor_from_labels/FILES b/searchlib/src/tests/features/tensor_from_labels/FILES
new file mode 100644
index 00000000000..daecb2bbf5b
--- /dev/null
+++ b/searchlib/src/tests/features/tensor_from_labels/FILES
@@ -0,0 +1 @@
+tensor_from_labels_test.cpp
diff --git a/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp b/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp
new file mode 100644
index 00000000000..b15ffb956ce
--- /dev/null
+++ b/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp
@@ -0,0 +1,211 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/vespalib/eval/function.h>
+#include <vespa/vespalib/eval/interpreted_function.h>
+#include <vespa/vespalib/tensor/tensor.h>
+#include <vespa/vespalib/tensor/default_tensor_engine.h>
+
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/fef/test/ftlib.h>
+#include <vespa/searchlib/features/tensor_from_labels_feature.h>
+#include <vespa/searchlib/fef/fef.h>
+
+using search::feature_t;
+using namespace search::fef;
+using namespace search::fef::test;
+using namespace search::features;
+using search::AttributeFactory;
+using search::IntegerAttribute;
+using search::StringAttribute;
+using vespalib::eval::Value;
+using vespalib::eval::Function;
+using vespalib::eval::InterpretedFunction;
+using vespalib::tensor::Tensor;
+using vespalib::tensor::DefaultTensorEngine;
+
+typedef search::attribute::Config AVC;
+typedef search::attribute::BasicType AVBT;
+typedef search::attribute::CollectionType AVCT;
+typedef search::AttributeVector::SP AttributePtr;
+typedef FtTestApp FTA;
+
+struct SetupFixture
+{
+ TensorFromLabelsBlueprint blueprint;
+ IndexEnvironment indexEnv;
+ SetupFixture()
+ : blueprint(),
+ indexEnv()
+ {
+ }
+};
+
+TEST_F("require that blueprint can be created from factory", SetupFixture)
+{
+ EXPECT_TRUE(FTA::assertCreateInstance(f.blueprint, "tensorFromLabels"));
+}
+
+TEST_F("require that setup fails if source spec is invalid", SetupFixture)
+{
+ FTA::FT_SETUP_FAIL(f.blueprint, f.indexEnv, StringList().add("source(foo)"));
+}
+
+TEST_F("require that setup succeeds with attribute source", SetupFixture)
+{
+ FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, StringList().add("attribute(foo)"),
+ StringList(), StringList().add("tensor"));
+}
+
+TEST_F("require that setup succeeds with query source", SetupFixture)
+{
+ FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, StringList().add("query(foo)"),
+ StringList(), StringList().add("tensor"));
+}
+
+struct ExecFixture
+{
+ BlueprintFactory factory;
+ FtFeatureTest test;
+ ExecFixture(const vespalib::string &feature)
+ : factory(),
+ test(factory, feature)
+ {
+ setup_search_features(factory);
+ setupAttributeVectors();
+ setupQueryEnvironment();
+ ASSERT_TRUE(test.setup());
+ }
+ void setupAttributeVectors() {
+ std::vector<AttributePtr> attrs;
+ attrs.push_back(AttributeFactory::createAttribute("astr", AVC(AVBT::STRING, AVCT::ARRAY)));
+ attrs.push_back(AttributeFactory::createAttribute("aint", AVC(AVBT::INT32, AVCT::ARRAY)));
+ attrs.push_back(AttributeFactory::createAttribute("wsstr", AVC(AVBT::STRING, AVCT::WSET)));
+
+ for (const auto &attr : attrs) {
+ attr->addReservedDoc();
+ attr->addDocs(1);
+ test.getIndexEnv().getAttributeManager().add(attr);
+ }
+
+ StringAttribute *astr = static_cast<StringAttribute *>(attrs[0].get());
+ // Note that the weight parameter is not used
+ astr->append(1, "a", 0);
+ astr->append(1, "b", 0);
+ astr->append(1, "c", 0);
+
+ IntegerAttribute *aint = static_cast<IntegerAttribute *>(attrs[1].get());
+ aint->append(1, 3, 0);
+ aint->append(1, 5, 0);
+ aint->append(1, 7, 0);
+
+ for (const auto &attr : attrs) {
+ attr->commit();
+ }
+ }
+ void setupQueryEnvironment() {
+ test.getQueryEnv().getProperties().add("astr_query", "[d e f]");
+ test.getQueryEnv().getProperties().add("aint_query", "[11 13 17]");
+ }
+ const Tensor &extractTensor() {
+ const Value::CREF *value = test.resolveObjectFeature();
+ ASSERT_TRUE(value != nullptr);
+ ASSERT_TRUE(value->get().is_tensor());
+ return static_cast<const Tensor &>(*value->get().as_tensor());
+ }
+ const Tensor &execute() {
+ test.executeOnly();
+ return extractTensor();
+ }
+};
+
+struct AsTensor {
+ InterpretedFunction ifun;
+ InterpretedFunction::Context ctx;
+ const Value *result;
+ explicit AsTensor(const vespalib::string &expr)
+ : ifun(DefaultTensorEngine::ref(), Function::parse(expr)), ctx(), result(&ifun.eval(ctx))
+ {
+ ASSERT_TRUE(result->is_tensor());
+ }
+ bool operator==(const Tensor &rhs) const { return static_cast<const Tensor &>(*result->as_tensor()).equals(rhs); }
+};
+
+std::ostream &operator<<(std::ostream &os, const AsTensor &my_tensor) {
+ os << my_tensor.result->as_tensor();
+ return os;
+}
+
+// Tests for attribute source:
+
+TEST_F("require that array string attribute can be converted to tensor (default dimension)",
+ ExecFixture("tensorFromLabels(attribute(astr))"))
+{
+ EXPECT_EQUAL(AsTensor("{ {astr:a}:1, {astr:b}:1, {astr:c}:1 }"), f.execute());
+}
+
+TEST_F("require that array string attribute can be converted to tensor (explicit dimension)",
+ ExecFixture("tensorFromLabels(attribute(astr),dim)"))
+{
+ EXPECT_EQUAL(AsTensor("{ {dim:a}:1, {dim:b}:1, {dim:c}:1 }"), f.execute());
+}
+
+TEST_F("require that array integer attribute can be converted to tensor (default dimension)",
+ ExecFixture("tensorFromLabels(attribute(aint))"))
+{
+ EXPECT_EQUAL(AsTensor("{ {aint:7}:1, {aint:3}:1, {aint:5}:1 }"), f.execute());
+}
+
+TEST_F("require that array attribute can be converted to tensor (explicit dimension)",
+ ExecFixture("tensorFromLabels(attribute(aint),dim)"))
+{
+ EXPECT_EQUAL(AsTensor("{ {dim:7}:1, {dim:3}:1, {dim:5}:1 }"), f.execute());
+}
+
+TEST_F("require that empty tensor is created if attribute does not exists",
+ ExecFixture("tensorFromLabels(attribute(null))"))
+{
+ EXPECT_EQUAL(AsTensor("{ }"), f.execute());
+}
+
+TEST_F("require that empty tensor is created if attribute type is not supported",
+ ExecFixture("tensorFromLabels(attribute(wsstr))"))
+{
+ EXPECT_EQUAL(AsTensor("{ }"), f.execute());
+}
+
+
+// Tests for query source:
+
+TEST_F("require that string array from query can be converted to tensor (default dimension)",
+ ExecFixture("tensorFromLabels(query(astr_query))"))
+{
+ EXPECT_EQUAL(AsTensor("{ {astr_query:d}:1, {astr_query:e}:1, {astr_query:f}:1 }"), f.execute());
+}
+
+TEST_F("require that integer array from query can be converted to tensor (default dimension)",
+ ExecFixture("tensorFromLabels(query(aint_query))"))
+{
+ EXPECT_EQUAL(AsTensor("{ {aint_query:13}:1, {aint_query:17}:1, {aint_query:11}:1 }"), f.execute());
+}
+
+TEST_F("require that string array from query can be converted to tensor (explicit dimension)",
+ ExecFixture("tensorFromLabels(query(astr_query),dim)"))
+{
+ EXPECT_EQUAL(AsTensor("{ {dim:d}:1, {dim:e}:1, {dim:f}:1 }"), f.execute());
+}
+
+TEST_F("require that empty tensor is created if query parameter is not found",
+ ExecFixture("tensorFromLabels(query(null))"))
+{
+ EXPECT_EQUAL(AsTensor("{ }"), f.execute());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/.gitignore b/searchlib/src/tests/features/tensor_from_weighted_set/.gitignore
new file mode 100644
index 00000000000..a56eade053e
--- /dev/null
+++ b/searchlib/src/tests/features/tensor_from_weighted_set/.gitignore
@@ -0,0 +1 @@
+searchlib_tensor_from_weighted_set_test_app
diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt b/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt
new file mode 100644
index 00000000000..7c38b301679
--- /dev/null
+++ b/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_tensor_from_weighted_set_test_app
+ SOURCES
+ tensor_from_weighted_set_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_tensor_from_weighted_set_test_app COMMAND searchlib_tensor_from_weighted_set_test_app)
diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/FILES b/searchlib/src/tests/features/tensor_from_weighted_set/FILES
new file mode 100644
index 00000000000..639a54230b1
--- /dev/null
+++ b/searchlib/src/tests/features/tensor_from_weighted_set/FILES
@@ -0,0 +1 @@
+tensor_from_weighted_set_test.cpp
diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp b/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp
new file mode 100644
index 00000000000..163fd5b5389
--- /dev/null
+++ b/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp
@@ -0,0 +1,198 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/vespalib/eval/function.h>
+#include <vespa/vespalib/eval/interpreted_function.h>
+#include <vespa/vespalib/tensor/tensor.h>
+#include <vespa/vespalib/tensor/default_tensor_engine.h>
+
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/fef/test/ftlib.h>
+#include <vespa/searchlib/features/tensor_from_weighted_set_feature.h>
+#include <vespa/searchlib/fef/fef.h>
+
+using search::feature_t;
+using namespace search::fef;
+using namespace search::fef::test;
+using namespace search::features;
+using search::AttributeFactory;
+using search::IntegerAttribute;
+using search::StringAttribute;
+using vespalib::eval::Value;
+using vespalib::eval::Function;
+using vespalib::eval::InterpretedFunction;
+using vespalib::tensor::Tensor;
+using vespalib::tensor::DefaultTensorEngine;
+
+typedef search::attribute::Config AVC;
+typedef search::attribute::BasicType AVBT;
+typedef search::attribute::CollectionType AVCT;
+typedef search::AttributeVector::SP AttributePtr;
+typedef FtTestApp FTA;
+
+struct SetupFixture
+{
+ TensorFromWeightedSetBlueprint blueprint;
+ IndexEnvironment indexEnv;
+ SetupFixture()
+ : blueprint(),
+ indexEnv()
+ {
+ }
+};
+
+TEST_F("require that blueprint can be created from factory", SetupFixture)
+{
+ EXPECT_TRUE(FTA::assertCreateInstance(f.blueprint, "tensorFromWeightedSet"));
+}
+
+TEST_F("require that setup fails if source spec is invalid", SetupFixture)
+{
+ FTA::FT_SETUP_FAIL(f.blueprint, f.indexEnv, StringList().add("source(foo)"));
+}
+
+TEST_F("require that setup succeeds with attribute source", SetupFixture)
+{
+ FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, StringList().add("attribute(foo)"),
+ StringList(), StringList().add("tensor"));
+}
+
+TEST_F("require that setup succeeds with query source", SetupFixture)
+{
+ FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, StringList().add("query(foo)"),
+ StringList(), StringList().add("tensor"));
+}
+
+struct ExecFixture
+{
+ BlueprintFactory factory;
+ FtFeatureTest test;
+ ExecFixture(const vespalib::string &feature)
+ : factory(),
+ test(factory, feature)
+ {
+ setup_search_features(factory);
+ setupAttributeVectors();
+ setupQueryEnvironment();
+ ASSERT_TRUE(test.setup());
+ }
+ void setupAttributeVectors() {
+ std::vector<AttributePtr> attrs;
+ attrs.push_back(AttributeFactory::createAttribute("wsstr", AVC(AVBT::STRING, AVCT::WSET)));
+ attrs.push_back(AttributeFactory::createAttribute("wsint", AVC(AVBT::INT32, AVCT::WSET)));
+ attrs.push_back(AttributeFactory::createAttribute("astr", AVC(AVBT::STRING, AVCT::ARRAY)));
+
+ for (const auto &attr : attrs) {
+ attr->addReservedDoc();
+ attr->addDocs(1);
+ test.getIndexEnv().getAttributeManager().add(attr);
+ }
+
+ StringAttribute *wsstr = static_cast<StringAttribute *>(attrs[0].get());
+ wsstr->append(1, "a", 3);
+ wsstr->append(1, "b", 5);
+ wsstr->append(1, "c", 7);
+
+ IntegerAttribute *wsint = static_cast<IntegerAttribute *>(attrs[1].get());
+ wsint->append(1, 11, 3);
+ wsint->append(1, 13, 5);
+ wsint->append(1, 17, 7);
+
+ for (const auto &attr : attrs) {
+ attr->commit();
+ }
+ }
+ void setupQueryEnvironment() {
+ test.getQueryEnv().getProperties().add("wsquery", "{d:11,e:13,f:17}");
+ }
+ const Tensor &extractTensor() {
+ const Value::CREF *value = test.resolveObjectFeature();
+ ASSERT_TRUE(value != nullptr);
+ ASSERT_TRUE(value->get().is_tensor());
+ return static_cast<const Tensor &>(*value->get().as_tensor());
+ }
+ const Tensor &execute() {
+ test.executeOnly();
+ return extractTensor();
+ }
+};
+
+struct AsTensor {
+ InterpretedFunction ifun;
+ InterpretedFunction::Context ctx;
+ const Value *result;
+ explicit AsTensor(const vespalib::string &expr)
+ : ifun(DefaultTensorEngine::ref(), Function::parse(expr)), ctx(), result(&ifun.eval(ctx))
+ {
+ ASSERT_TRUE(result->is_tensor());
+ }
+ bool operator==(const Tensor &rhs) const { return static_cast<const Tensor &>(*result->as_tensor()).equals(rhs); }
+};
+
+std::ostream &operator<<(std::ostream &os, const AsTensor &my_tensor) {
+ os << my_tensor.result->as_tensor();
+ return os;
+}
+
+TEST_F("require that weighted set string attribute can be converted to tensor (default dimension)",
+ ExecFixture("tensorFromWeightedSet(attribute(wsstr))"))
+{
+ EXPECT_EQUAL(AsTensor("{ {wsstr:b}:5, {wsstr:c}:7, {wsstr:a}:3 }"), f.execute());
+}
+
+TEST_F("require that weighted set string attribute can be converted to tensor (explicit dimension)",
+ ExecFixture("tensorFromWeightedSet(attribute(wsstr),dim)"))
+{
+ EXPECT_EQUAL(AsTensor("{ {dim:a}:3, {dim:b}:5, {dim:c}:7 }"), f.execute());
+}
+
+TEST_F("require that weighted set integer attribute can be converted to tensor (default dimension)",
+ ExecFixture("tensorFromWeightedSet(attribute(wsint))"))
+{
+ EXPECT_EQUAL(AsTensor("{ {wsint:13}:5, {wsint:17}:7, {wsint:11}:3 }"), f.execute());
+}
+
+TEST_F("require that weighted set integer attribute can be converted to tensor (explicit dimension)",
+ ExecFixture("tensorFromWeightedSet(attribute(wsint),dim)"))
+{
+ EXPECT_EQUAL(AsTensor("{ {dim:17}:7, {dim:11}:3, {dim:13}:5 }"), f.execute());
+}
+
+TEST_F("require that weighted set from query can be converted to tensor (default dimension)",
+ ExecFixture("tensorFromWeightedSet(query(wsquery))"))
+{
+ EXPECT_EQUAL(AsTensor("{ {wsquery:f}:17, {wsquery:d}:11, {wsquery:e}:13 }"), f.execute());
+}
+
+TEST_F("require that weighted set from query can be converted to tensor (explicit dimension)",
+ ExecFixture("tensorFromWeightedSet(query(wsquery),dim)"))
+{
+ EXPECT_EQUAL(AsTensor("{ {dim:d}:11, {dim:e}:13, {dim:f}:17 }"), f.execute());
+}
+
+TEST_F("require that empty tensor is created if attribute does not exists",
+ ExecFixture("tensorFromWeightedSet(attribute(null))"))
+{
+ EXPECT_EQUAL(AsTensor("{ }"), f.execute());
+}
+
+TEST_F("require that empty tensor is created if attribute type is not supported",
+ ExecFixture("tensorFromWeightedSet(attribute(astr))"))
+{
+ EXPECT_EQUAL(AsTensor("{ }"), f.execute());
+}
+
+TEST_F("require that empty tensor is created if query parameter is not found",
+ ExecFixture("tensorFromWeightedSet(query(null))"))
+{
+ EXPECT_EQUAL(AsTensor("{ }"), f.execute());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/features/text_similarity_feature/.gitignore b/searchlib/src/tests/features/text_similarity_feature/.gitignore
new file mode 100644
index 00000000000..9ffa5b46a43
--- /dev/null
+++ b/searchlib/src/tests/features/text_similarity_feature/.gitignore
@@ -0,0 +1 @@
+searchlib_text_similarity_feature_test_app
diff --git a/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt b/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt
new file mode 100644
index 00000000000..e0cb043c8f1
--- /dev/null
+++ b/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_text_similarity_feature_test_app
+ SOURCES
+ text_similarity_feature_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_text_similarity_feature_test_app COMMAND searchlib_text_similarity_feature_test_app)
diff --git a/searchlib/src/tests/features/text_similarity_feature/FILES b/searchlib/src/tests/features/text_similarity_feature/FILES
new file mode 100644
index 00000000000..dfa5173742d
--- /dev/null
+++ b/searchlib/src/tests/features/text_similarity_feature/FILES
@@ -0,0 +1 @@
+text_similarity_feature_test.cpp
diff --git a/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp b/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp
new file mode 100644
index 00000000000..6a6b9d0a48e
--- /dev/null
+++ b/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp
@@ -0,0 +1,245 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/features/text_similarity_feature.h>
+#include <vespa/searchlib/fef/test/ftlib.h>
+#include <initializer_list>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+
+using namespace search::fef;
+using namespace search::fef::test;
+using namespace search::features;
+
+std::vector<vespalib::string> featureNamesFoo() {
+ std::vector<vespalib::string> f;
+ f.push_back("textSimilarity(foo).score");
+ f.push_back("textSimilarity(foo).proximity");
+ f.push_back("textSimilarity(foo).order");
+ f.push_back("textSimilarity(foo).queryCoverage");
+ f.push_back("textSimilarity(foo).fieldCoverage");
+ return f;
+}
+
+const size_t SCORE = 0;
+const size_t PROXIMITY = 1;
+const size_t ORDER = 2;
+const size_t QUERY = 3;
+const size_t FIELD = 4;
+
+FtIndex indexFoo() {
+ FtIndex idx;
+ idx.field("foo");
+ return idx;
+}
+
+struct BlueprintFactoryFixture {
+ BlueprintFactory factory;
+ BlueprintFactoryFixture() : factory()
+ {
+ setup_search_features(factory);
+ }
+};
+
+struct IndexFixture {
+ IndexEnvironment indexEnv;
+ IndexFixture() : indexEnv()
+ {
+ IndexEnvironmentBuilder builder(indexEnv);
+ builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ builder.addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "bar");
+ builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "baz");
+ }
+};
+
+struct FeatureDumpFixture : public IDumpFeatureVisitor {
+ std::vector<vespalib::string> expect;
+ size_t dumped;
+ virtual void visitDumpFeature(const vespalib::string &name) {
+ EXPECT_LESS(dumped, expect.size());
+ EXPECT_EQUAL(expect[dumped++], name);
+ }
+ FeatureDumpFixture() : IDumpFeatureVisitor(), expect(featureNamesFoo()), dumped(0) {}
+};
+
+struct RankFixture : BlueprintFactoryFixture {
+ RankFixture() : BlueprintFactoryFixture() {}
+ double get_feature(const vespalib::string &query, const FtIndex &index, size_t select,
+ bool useStaleMatchData = false)
+ {
+ std::vector<vespalib::string> names = featureNamesFoo();
+ ASSERT_TRUE(names.size() == 5u);
+ FtFeatureTest ft(factory, names);
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ FtTestApp::FT_SETUP(ft, FtUtil::toQuery(query), index, 1);
+ RankResult actual;
+ EXPECT_TRUE(ft.executeOnly(actual, useStaleMatchData ? 2 : 1));
+ return actual.getScore(names[select]);
+ }
+};
+
+double prox(uint32_t dist) {
+ return (dist > 8) ? 0 : (1.0 - (((dist-1)/8.0) * ((dist-1)/8.0)));
+}
+
+double comb(std::initializer_list<double> values) {
+ double sum = 0.0;
+ for (double value: values) {
+ sum += value;
+ }
+ return (sum/values.size());
+}
+
+double mix(double proximity, double order, double query, double field) {
+ return (0.35 * proximity) + (0.15 * order) + (0.30 * query) + (0.20 * field);
+}
+
+TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) {
+ Blueprint::SP bp = f.factory.createBlueprint("textSimilarity");
+ EXPECT_TRUE(bp.get() != 0);
+ EXPECT_TRUE(dynamic_cast<TextSimilarityBlueprint*>(bp.get()) != 0);
+}
+
+TEST_FFF("require that appropriate features are dumped", TextSimilarityBlueprint, IndexFixture, FeatureDumpFixture) {
+ f1.visitDumpFeatures(f2.indexEnv, f3);
+ EXPECT_EQUAL(f3.expect.size(), f3.dumped);
+}
+
+TEST_FF("require that setup can be done on single value index field", TextSimilarityBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str()));
+ EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "foo")));
+}
+
+TEST_FF("require that setup can not be done on weighted set index field", TextSimilarityBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(bar)", f1.getBaseName().c_str()));
+ EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "bar")));
+}
+
+TEST_FF("require that setup can not be done on single value attribute field", TextSimilarityBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s(baz)", f1.getBaseName().c_str()));
+ EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "baz")));
+}
+
+TEST_F("require that no match gives zero outputs", RankFixture) {
+ EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), SCORE));
+ EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), PROXIMITY));
+ EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), ORDER));
+ EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), QUERY));
+ EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), FIELD));
+}
+
+TEST_F("require that minal perfect match gives max outputs", RankFixture) {
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), SCORE));
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), PROXIMITY));
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), ORDER));
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), QUERY));
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), FIELD));
+}
+
+TEST_F("require that larger perfect match gives max outputs", RankFixture) {
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), SCORE));
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), PROXIMITY));
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), ORDER));
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), QUERY));
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), FIELD));
+}
+
+TEST_F("require that extra query terms reduces order but not proximity", RankFixture) {
+ EXPECT_EQUAL(1.0, f1.get_feature("x y", indexFoo().element("x"), PROXIMITY));
+ EXPECT_EQUAL(1.0, f1.get_feature("x y y", indexFoo().element("x"), PROXIMITY));
+ EXPECT_EQUAL(1.0, f1.get_feature("x y y y", indexFoo().element("x"), PROXIMITY));
+
+ EXPECT_EQUAL(0.0, f1.get_feature("x y", indexFoo().element("x"), ORDER));
+ EXPECT_EQUAL(0.0, f1.get_feature("x y y", indexFoo().element("x"), ORDER));
+ EXPECT_EQUAL(0.0, f1.get_feature("x y y y", indexFoo().element("x"), ORDER));
+}
+
+TEST_F("require that extra field terms reduces proximity but not order", RankFixture) {
+ EXPECT_EQUAL(prox(2), f1.get_feature("x", indexFoo().element("x y"), PROXIMITY));
+ EXPECT_EQUAL(prox(3), f1.get_feature("x", indexFoo().element("x y y"), PROXIMITY));
+ EXPECT_EQUAL(prox(4), f1.get_feature("x", indexFoo().element("x y y y"), PROXIMITY));
+
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y"), ORDER));
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y y"), ORDER));
+ EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y y y"), ORDER));
+}
+
+TEST_F("require that proximity acts as expected", RankFixture) {
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(2), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x b c d e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(3), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x x b c d e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(4), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x x x b c d e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(2), prox(2), prox(2), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("a x b x c x d x e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(2), prox(2), prox(1), prox(3)}), f1.get_feature("a b c d e", indexFoo().element("a x b x c d x x e"), PROXIMITY));
+}
+
+TEST_F("require that field order does not affect proximity score", RankFixture) {
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("d c a b e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(2), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x c a b e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(3), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x x c a b e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(4), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x x x c a b e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(2), prox(2), prox(2), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("d x c x a x b x e"), PROXIMITY));
+ EXPECT_EQUAL(comb({prox(2), prox(2), prox(1), prox(3)}), f1.get_feature("a b c d e", indexFoo().element("d x c x a b x x e"), PROXIMITY));
+}
+
+TEST_F("require that order score acts as expected", RankFixture) {
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), ORDER));
+ EXPECT_EQUAL(comb({1.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("a b c e d"), ORDER));
+ EXPECT_EQUAL(comb({0.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b a c e d"), ORDER));
+ EXPECT_EQUAL(comb({0.0, 1.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b a e d c"), ORDER));
+ EXPECT_EQUAL(comb({0.0, 0.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("e d c b a"), ORDER));
+}
+
+TEST_F("require that proximity does not affect order score", RankFixture) {
+ EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), ORDER));
+ EXPECT_EQUAL(comb({1.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("a x b x c x e x d"), ORDER));
+ EXPECT_EQUAL(comb({0.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b x a x c x e x d"), ORDER));
+ EXPECT_EQUAL(comb({0.0, 1.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b x a x e x d x c"), ORDER));
+ EXPECT_EQUAL(comb({0.0, 0.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("e x d x c x b x a"), ORDER));
+}
+
+TEST_F("require that query coverage acts as expected", RankFixture) {
+ EXPECT_EQUAL(5.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), QUERY));
+ EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d"), QUERY));
+ EXPECT_EQUAL(3.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c"), QUERY));
+ EXPECT_EQUAL(2.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b"), QUERY));
+ EXPECT_EQUAL(4.0/7.0, f1.get_feature("a!200 b!200 c d e", indexFoo().element("a b"), QUERY));
+ EXPECT_EQUAL(2.0/7.0, f1.get_feature("a b c!500", indexFoo().element("a b"), QUERY));
+ EXPECT_EQUAL(5.0/7.0, f1.get_feature("a b c!500", indexFoo().element("c"), QUERY));
+}
+
+TEST_F("require that field coverage acts as expected", RankFixture) {
+ EXPECT_EQUAL(5.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), FIELD));
+ EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a x c d e"), FIELD));
+ EXPECT_EQUAL(3.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b x x e"), FIELD));
+ EXPECT_EQUAL(2.0/5.0, f1.get_feature("a b c d e", indexFoo().element("x x x d e"), FIELD));
+}
+
+TEST_F("require that first unique match is used per query term", RankFixture) {
+ EXPECT_EQUAL(prox(3), f1.get_feature("a b", indexFoo().element("a a a b"), PROXIMITY));
+ EXPECT_EQUAL(1.0, f1.get_feature("a b", indexFoo().element("a a a b"), ORDER));
+ EXPECT_EQUAL(1.0, f1.get_feature("a b", indexFoo().element("a a a b"), QUERY));
+ EXPECT_EQUAL(2.0/4.0, f1.get_feature("a b", indexFoo().element("a a a b"), FIELD));
+
+ EXPECT_EQUAL(comb({prox(1), prox(2)}), f1.get_feature("a b a", indexFoo().element("a a a b"), PROXIMITY));
+ EXPECT_EQUAL(0.5, f1.get_feature("a b a", indexFoo().element("a a a b"), ORDER));
+ EXPECT_EQUAL(1.0, f1.get_feature("a b a", indexFoo().element("a a a b"), QUERY));
+ EXPECT_EQUAL(3.0/4.0, f1.get_feature("a b a", indexFoo().element("a a a b"), FIELD));
+}
+
+TEST_F("require that overall score combines individual signals appropriately", RankFixture) {
+ EXPECT_EQUAL(comb({prox(1), prox(3), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), PROXIMITY));
+ EXPECT_EQUAL(comb({1.0, 0.0, 1.0}), f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), ORDER));
+ EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), QUERY));
+ EXPECT_EQUAL(4.0/7.0, f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), FIELD));
+ EXPECT_EQUAL(mix(comb({prox(1), prox(3), prox(2)}), comb({1.0, 0.0, 1.0}), 4.0/5.0, 4.0/7.0),
+ f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), SCORE));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/features/util/.gitignore b/searchlib/src/tests/features/util/.gitignore
new file mode 100644
index 00000000000..14e50fdaf47
--- /dev/null
+++ b/searchlib/src/tests/features/util/.gitignore
@@ -0,0 +1 @@
+searchlib_util_test_app
diff --git a/searchlib/src/tests/features/util/CMakeLists.txt b/searchlib/src/tests/features/util/CMakeLists.txt
new file mode 100644
index 00000000000..95a0bf3b45d
--- /dev/null
+++ b/searchlib/src/tests/features/util/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_util_test_app
+ SOURCES
+ util_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_util_test_app COMMAND searchlib_util_test_app)
diff --git a/searchlib/src/tests/features/util/FILES b/searchlib/src/tests/features/util/FILES
new file mode 100644
index 00000000000..f0bd0a06305
--- /dev/null
+++ b/searchlib/src/tests/features/util/FILES
@@ -0,0 +1 @@
+util_test.cpp
diff --git a/searchlib/src/tests/features/util/util_test.cpp b/searchlib/src/tests/features/util/util_test.cpp
new file mode 100644
index 00000000000..d2f97631d0f
--- /dev/null
+++ b/searchlib/src/tests/features/util/util_test.cpp
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/features/utils.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+
+using namespace search;
+using namespace search::fef;
+using namespace search::fef::test;
+using namespace search::features;
+
+SimpleTermData make_term(uint32_t uid) {
+ SimpleTermData term;
+ term.setUniqueId(uid);
+ return term;
+}
+
+struct TermLabelFixture {
+ IndexEnvironment indexEnv;
+ QueryEnvironment queryEnv;
+ TermLabelFixture() : indexEnv(), queryEnv(&indexEnv) {
+ queryEnv.getTerms().push_back(make_term(5));
+ queryEnv.getTerms().push_back(make_term(0));
+ queryEnv.getTerms().push_back(make_term(10));
+ queryEnv.getProperties().add("vespa.label.foo.id", "5");
+ queryEnv.getProperties().add("vespa.label.bar.id", "0"); // undefined uid
+ queryEnv.getProperties().add("vespa.label.baz.id", "10");
+ queryEnv.getProperties().add("vespa.label.fox.id", "7"); // non-existing
+ }
+};
+
+TEST_F("require that label can be mapped to term", TermLabelFixture) {
+ EXPECT_EQUAL((ITermData*)&f1.queryEnv.getTerms()[0], util::getTermByLabel(f1.queryEnv, "foo"));
+ EXPECT_EQUAL((ITermData*)0, util::getTermByLabel(f1.queryEnv, "bar"));
+ EXPECT_EQUAL((ITermData*)&f1.queryEnv.getTerms()[2], util::getTermByLabel(f1.queryEnv, "baz"));
+ EXPECT_EQUAL((ITermData*)0, util::getTermByLabel(f1.queryEnv, "fox"));
+ EXPECT_EQUAL((ITermData*)0, util::getTermByLabel(f1.queryEnv, "unknown"));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/fef/.gitignore b/searchlib/src/tests/fef/.gitignore
new file mode 100644
index 00000000000..ff604ccaf00
--- /dev/null
+++ b/searchlib/src/tests/fef/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+fef_test
+searchlib_fef_test_app
diff --git a/searchlib/src/tests/fef/CMakeLists.txt b/searchlib/src/tests/fef/CMakeLists.txt
new file mode 100644
index 00000000000..a239ba972c3
--- /dev/null
+++ b/searchlib/src/tests/fef/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_fef_test_app
+ SOURCES
+ fef_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_fef_test_app COMMAND searchlib_fef_test_app)
diff --git a/searchlib/src/tests/fef/DESC b/searchlib/src/tests/fef/DESC
new file mode 100644
index 00000000000..431ee7a1a1f
--- /dev/null
+++ b/searchlib/src/tests/fef/DESC
@@ -0,0 +1 @@
+fef test. Take a look at fef.cpp for details.
diff --git a/searchlib/src/tests/fef/FILES b/searchlib/src/tests/fef/FILES
new file mode 100644
index 00000000000..7e6752e501e
--- /dev/null
+++ b/searchlib/src/tests/fef/FILES
@@ -0,0 +1 @@
+fef.cpp
diff --git a/searchlib/src/tests/fef/attributecontent/.gitignore b/searchlib/src/tests/fef/attributecontent/.gitignore
new file mode 100644
index 00000000000..dd57ee57362
--- /dev/null
+++ b/searchlib/src/tests/fef/attributecontent/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+attributecontent_test
+searchlib_attributecontent_test_app
diff --git a/searchlib/src/tests/fef/attributecontent/CMakeLists.txt b/searchlib/src/tests/fef/attributecontent/CMakeLists.txt
new file mode 100644
index 00000000000..84cdb3d4fce
--- /dev/null
+++ b/searchlib/src/tests/fef/attributecontent/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_attributecontent_test_app
+ SOURCES
+ attributecontent_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attributecontent_test_app COMMAND searchlib_attributecontent_test_app)
diff --git a/searchlib/src/tests/fef/attributecontent/DESC b/searchlib/src/tests/fef/attributecontent/DESC
new file mode 100644
index 00000000000..fa1c457c573
--- /dev/null
+++ b/searchlib/src/tests/fef/attributecontent/DESC
@@ -0,0 +1 @@
+attributecontent test. Take a look at attributecontent.cpp for details.
diff --git a/searchlib/src/tests/fef/attributecontent/FILES b/searchlib/src/tests/fef/attributecontent/FILES
new file mode 100644
index 00000000000..4325e907b45
--- /dev/null
+++ b/searchlib/src/tests/fef/attributecontent/FILES
@@ -0,0 +1 @@
+attributecontent.cpp
diff --git a/searchlib/src/tests/fef/attributecontent/attributecontent_test.cpp b/searchlib/src/tests/fef/attributecontent/attributecontent_test.cpp
new file mode 100644
index 00000000000..66430994016
--- /dev/null
+++ b/searchlib/src/tests/fef/attributecontent/attributecontent_test.cpp
@@ -0,0 +1,106 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("attributecontent_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+
+using namespace search::attribute;
+
+namespace search {
+namespace fef {
+
+class Test : public vespalib::TestApp {
+private:
+ void testWriteAndRead();
+ void testFill();
+
+public:
+ int Main();
+};
+
+void
+Test::testWriteAndRead()
+{
+ typedef search::attribute::AttributeContent<uint32_t> UintContent;
+ UintContent buf;
+ EXPECT_EQUAL(buf.capacity(), 16u);
+ EXPECT_EQUAL(buf.size(), 0u);
+
+ uint32_t i;
+ uint32_t * data;
+ const uint32_t * itr;
+ for (i = 0, data = buf.data(); i < 16; ++i, ++data) {
+ *data = i;
+ }
+ buf.setSize(16);
+ EXPECT_EQUAL(buf.size(), 16u);
+ for (i = 0, itr = buf.begin(); itr != buf.end(); ++i, ++itr) {
+ EXPECT_EQUAL(*itr, i);
+ EXPECT_EQUAL(buf[i], i);
+ }
+ EXPECT_EQUAL(i, 16u);
+
+ buf.allocate(10);
+ EXPECT_EQUAL(buf.capacity(), 16u);
+ EXPECT_EQUAL(buf.size(), 16u);
+ buf.allocate(32);
+ EXPECT_EQUAL(buf.capacity(), 32u);
+ EXPECT_EQUAL(buf.size(), 0u);
+
+ for (i = 0, data = buf.data(); i < 32; ++i, ++data) {
+ *data = i;
+ }
+ buf.setSize(32);
+ EXPECT_EQUAL(buf.size(), 32u);
+ for (i = 0, itr = buf.begin(); itr != buf.end(); ++i, ++itr) {
+ EXPECT_EQUAL(*itr, i);
+ EXPECT_EQUAL(buf[i], i);
+ }
+ EXPECT_EQUAL(i, 32u);
+}
+
+void
+Test::testFill()
+{
+ Config cfg(BasicType::INT32, CollectionType::ARRAY);
+ AttributeVector::SP av = AttributeFactory::createAttribute("aint32", cfg);
+ av->addDocs(2);
+ IntegerAttribute * ia = static_cast<IntegerAttribute *>(av.get());
+ ia->append(0, 10, 0);
+ ia->append(1, 20, 0);
+ ia->append(1, 30, 0);
+ av->commit();
+ const IAttributeVector & iav = *av.get();
+ IntegerContent buf;
+ buf.fill(iav, 0);
+ EXPECT_EQUAL(1u, buf.size());
+ EXPECT_EQUAL(10, buf[0]);
+ buf.fill(iav, 1);
+ EXPECT_EQUAL(2u, buf.size());
+ EXPECT_EQUAL(20, buf[0]);
+ EXPECT_EQUAL(30, buf[1]);
+ buf.fill(iav, 0);
+ EXPECT_EQUAL(1u, buf.size());
+ EXPECT_EQUAL(10, buf[0]);
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("attributecontent_test");
+
+ testWriteAndRead();
+ testFill();
+
+ TEST_DONE();
+}
+
+} // namespace fef
+} // namespace search
+
+TEST_APPHOOK(search::fef::Test);
diff --git a/searchlib/src/tests/fef/featurenamebuilder/.gitignore b/searchlib/src/tests/fef/featurenamebuilder/.gitignore
new file mode 100644
index 00000000000..781f49956a9
--- /dev/null
+++ b/searchlib/src/tests/fef/featurenamebuilder/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+featurenamebuilder_test
+searchlib_featurenamebuilder_test_app
diff --git a/searchlib/src/tests/fef/featurenamebuilder/CMakeLists.txt b/searchlib/src/tests/fef/featurenamebuilder/CMakeLists.txt
new file mode 100644
index 00000000000..167642c1337
--- /dev/null
+++ b/searchlib/src/tests/fef/featurenamebuilder/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_featurenamebuilder_test_app
+ SOURCES
+ featurenamebuilder_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_featurenamebuilder_test_app COMMAND searchlib_featurenamebuilder_test_app)
diff --git a/searchlib/src/tests/fef/featurenamebuilder/DESC b/searchlib/src/tests/fef/featurenamebuilder/DESC
new file mode 100644
index 00000000000..38abf1af794
--- /dev/null
+++ b/searchlib/src/tests/fef/featurenamebuilder/DESC
@@ -0,0 +1 @@
+featurenamebuilder test. Take a look at featurenamebuilder.cpp for details.
diff --git a/searchlib/src/tests/fef/featurenamebuilder/FILES b/searchlib/src/tests/fef/featurenamebuilder/FILES
new file mode 100644
index 00000000000..71df1d1033f
--- /dev/null
+++ b/searchlib/src/tests/fef/featurenamebuilder/FILES
@@ -0,0 +1 @@
+featurenamebuilder.cpp
diff --git a/searchlib/src/tests/fef/featurenamebuilder/featurenamebuilder_test.cpp b/searchlib/src/tests/fef/featurenamebuilder/featurenamebuilder_test.cpp
new file mode 100644
index 00000000000..0e574c776b5
--- /dev/null
+++ b/searchlib/src/tests/fef/featurenamebuilder/featurenamebuilder_test.cpp
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("featurenamebuilder_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+
+using namespace search::fef;
+
+typedef FeatureNameBuilder B;
+
+TEST_SETUP(Test);
+
+int
+Test::Main()
+{
+ TEST_INIT("featurenamebuilder_test");
+
+ // normal cases
+ EXPECT_EQUAL(B().baseName("foo").buildName(), "foo");
+ EXPECT_EQUAL(B().baseName("foo").output("out").buildName(), "foo.out");
+ EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").buildName(), "foo(a,b)");
+ EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").buildName(), "foo(a,b).out");
+
+ // empty base = empty name
+ EXPECT_EQUAL(B().baseName("").buildName(), "");
+ EXPECT_EQUAL(B().baseName("").output("out").buildName(), "");
+ EXPECT_EQUAL(B().baseName("").parameter("a").parameter("b").buildName(), "");
+ EXPECT_EQUAL(B().baseName("").parameter("a").parameter("b").output("out").buildName(), "");
+
+ // quoting
+ EXPECT_EQUAL(B().baseName("foo").parameter("a,b").output("out").buildName(), "foo(\"a,b\").out");
+ EXPECT_EQUAL(B().baseName("foo").parameter("a\\").output("out").buildName(), "foo(\"a\\\\\").out");
+ EXPECT_EQUAL(B().baseName("foo").parameter("a)").output("out").buildName(), "foo(\"a)\").out");
+ EXPECT_EQUAL(B().baseName("foo").parameter(" ").output("out").buildName(), "foo(\" \").out");
+ EXPECT_EQUAL(B().baseName("foo").parameter("\"").output("out").buildName(), "foo(\"\\\"\").out");
+ EXPECT_EQUAL(B().baseName("foo").parameter("\\\t\n\r\f\x15").output("out").buildName(), "foo(\"\\\\\\t\\n\\r\\f\\x15\").out");
+ EXPECT_EQUAL(B().baseName("foo").parameter("\\\t\n\r\f\x20").output("out").buildName(), "foo(\"\\\\\\t\\n\\r\\f \").out");
+
+ // empty parameters
+ EXPECT_EQUAL(B().baseName("foo").parameter("").output("out").buildName(), "foo().out");
+ EXPECT_EQUAL(B().baseName("foo").parameter("").parameter("").output("out").buildName(), "foo(,).out");
+ EXPECT_EQUAL(B().baseName("foo").parameter("").parameter("").parameter("").output("out").buildName(), "foo(,,).out");
+ EXPECT_EQUAL(B().baseName("foo").parameter("").parameter("x").parameter("").output("out").buildName(), "foo(,x,).out");
+
+ // test change components
+ EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").buildName(), "foo(a,b).out");
+ EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").baseName("bar").buildName(), "bar(a,b).out");
+ EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").clearParameters().buildName(), "foo.out");
+ EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").clearParameters().parameter("x").buildName(), "foo(x).out");
+ EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").output("").buildName(), "foo(a,b)");
+ EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").output("len").buildName(), "foo(a,b).len");
+
+ // test exact quote vs non-quote
+ EXPECT_EQUAL(B().baseName("foo").parameter("a").buildName(), "foo(a)");
+ EXPECT_EQUAL(B().baseName("foo").parameter(" a").buildName(), "foo(\" a\")");
+ EXPECT_EQUAL(B().baseName("foo").parameter("a.out").buildName(), "foo(a.out)");
+ EXPECT_EQUAL(B().baseName("foo").parameter(" a.out").buildName(), "foo(\" a.out\")");
+ EXPECT_EQUAL(B().baseName("foo").parameter("bar(a,b)").buildName(), "foo(bar(a,b))");
+ EXPECT_EQUAL(B().baseName("foo").parameter("bar(a, b)").buildName(), "foo(\"bar(a, b)\")");
+ EXPECT_EQUAL(B().baseName("foo").parameter("bar(a,b).out").buildName(), "foo(bar(a,b).out)");
+ EXPECT_EQUAL(B().baseName("foo").parameter("bar(a, b).out").buildName(), "foo(\"bar(a, b).out\")");
+
+ // test non-exact quote vs non-quote
+ EXPECT_EQUAL(B().baseName("foo").parameter(" \t\n\r\f", false).buildName(), "foo()");
+ EXPECT_EQUAL(B().baseName("foo").parameter(" \t\n\r\fbar ", false).buildName(), "foo(bar)");
+ EXPECT_EQUAL(B().baseName("foo").parameter(" bar ", false).buildName(), "foo(bar)");
+ EXPECT_EQUAL(B().baseName("foo").parameter(" a b ", false).buildName(), "foo(\" a b \")");
+ EXPECT_EQUAL(B().baseName("foo").parameter("a%", false).buildName(), "foo(\"a%\")");
+ EXPECT_EQUAL(B().baseName("foo").parameter("foo\"\\", false).buildName(), "foo(\"foo\\\"\\\\\")");
+ EXPECT_EQUAL(B().baseName("foo").parameter(" a . out ", false).buildName(), "foo(a.out)");
+ EXPECT_EQUAL(B().baseName("foo").parameter(" bar ( a , b ) ", false).buildName(), "foo(bar(a,b))");
+ EXPECT_EQUAL(B().baseName("foo").parameter(" bar ( a , b ) . out ", false).buildName(), "foo(bar(a,b).out)");
+ EXPECT_EQUAL(B().baseName("foo").parameter(" bar ( a , b ) . out.2 ", false).buildName(), "foo(bar(a,b).out.2)");
+ EXPECT_EQUAL(B().baseName("foo").parameter(" bar ( a , b ) . out . 2 ", false).buildName(), "foo(\" bar ( a , b ) . out . 2 \")");
+
+ TEST_DONE();
+}
diff --git a/searchlib/src/tests/fef/featurenameparser/.gitignore b/searchlib/src/tests/fef/featurenameparser/.gitignore
new file mode 100644
index 00000000000..f16080e9791
--- /dev/null
+++ b/searchlib/src/tests/fef/featurenameparser/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+featurenameparser_test
+searchlib_featurenameparser_test_app
diff --git a/searchlib/src/tests/fef/featurenameparser/CMakeLists.txt b/searchlib/src/tests/fef/featurenameparser/CMakeLists.txt
new file mode 100644
index 00000000000..e313ee24deb
--- /dev/null
+++ b/searchlib/src/tests/fef/featurenameparser/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_featurenameparser_test_app
+ SOURCES
+ featurenameparser_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_featurenameparser_test_app COMMAND searchlib_featurenameparser_test_app)
diff --git a/searchlib/src/tests/fef/featurenameparser/DESC b/searchlib/src/tests/fef/featurenameparser/DESC
new file mode 100644
index 00000000000..4c3da4e47a2
--- /dev/null
+++ b/searchlib/src/tests/fef/featurenameparser/DESC
@@ -0,0 +1 @@
+featurenameparser test. Take a look at featurenameparser.cpp for details.
diff --git a/searchlib/src/tests/fef/featurenameparser/FILES b/searchlib/src/tests/fef/featurenameparser/FILES
new file mode 100644
index 00000000000..4567d5b7ccc
--- /dev/null
+++ b/searchlib/src/tests/fef/featurenameparser/FILES
@@ -0,0 +1 @@
+featurenameparser.cpp
diff --git a/searchlib/src/tests/fef/featurenameparser/featurenameparser_test.cpp b/searchlib/src/tests/fef/featurenameparser/featurenameparser_test.cpp
new file mode 100644
index 00000000000..2824f5ef8fc
--- /dev/null
+++ b/searchlib/src/tests/fef/featurenameparser/featurenameparser_test.cpp
@@ -0,0 +1,151 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("featurenameparser_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/fef/featurenameparser.h>
+#include <vector>
+#include <string>
+
+using namespace search::fef;
+
+struct ParamList {
+ std::vector<vespalib::string> list;
+ ParamList() : list() {}
+ ParamList(const std::vector<vespalib::string> &l) : list(l) {}
+ ParamList &add(const vespalib::string &str) {
+ list.push_back(str);
+ return *this;
+ }
+ bool operator==(const ParamList &rhs) const {
+ return rhs.list == list;
+ }
+};
+
+std::ostream &operator<<(std::ostream &os, const ParamList &pl) {
+ os << std::endl;
+ for (uint32_t i = 0; i < pl.list.size(); ++i) {
+ os << " " << pl.list[i] << std::endl;
+ }
+ return os;
+}
+
+class Test : public vespalib::TestApp
+{
+public:
+ bool testParse(const vespalib::string &input, bool valid,
+ const vespalib::string &base, ParamList pl,
+ const vespalib::string &output);
+ void testFile(const vespalib::string &name);
+ int Main();
+};
+
+bool
+Test::testParse(const vespalib::string &input, bool valid,
+ const vespalib::string &base, ParamList pl,
+ const vespalib::string &output)
+{
+ bool ok = true;
+ FeatureNameParser parser(input);
+ if (!parser.valid()) {
+ LOG(warning, "parse error: input:'%s', rest:'%s'",
+ input.c_str(), input.substr(parser.parsedBytes()).c_str());
+ }
+ ok &= EXPECT_EQUAL(parser.valid(), valid);
+ ok &= EXPECT_EQUAL(parser.baseName(), base);
+ ok &= EXPECT_EQUAL(ParamList(parser.parameters()), pl);
+ ok &= EXPECT_EQUAL(parser.output(), output);
+ return ok;
+}
+
+void
+Test::testFile(const vespalib::string &name)
+{
+ char buf[4096];
+ uint32_t lineN = 0;
+ FILE *f = fopen(name.c_str(), "r");
+ ASSERT_TRUE(f != 0);
+ while (fgets(buf, sizeof(buf), f) != NULL) {
+ ++lineN;
+ vespalib::string line(buf);
+ if (*line.rbegin() == '\n') {
+ line.resize(line.size() - 1);
+ }
+ if (line.empty() || line[0] == '#') {
+ continue;
+ }
+ uint32_t idx = line.find("<=>");
+ if (!EXPECT_TRUE(idx < line.size())) {
+ LOG(error, "(%s:%u): malformed line: '%s'",
+ name.c_str(), lineN, line.c_str());
+ } else {
+ vespalib::string input = line.substr(0, idx);
+ vespalib::string expect = line.substr(idx + strlen("<=>"));
+ if (!EXPECT_EQUAL(FeatureNameParser(input).featureName(), expect)) {
+ LOG(error, "(%s:%u): test failed: '%s'",
+ name.c_str(), lineN, line.c_str());
+ }
+ }
+ }
+ ASSERT_TRUE(!ferror(f));
+ fclose(f);
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("featurenameparser_test");
+
+ // normal cases
+ EXPECT_TRUE(testParse("foo", true, "foo", ParamList(), ""));
+ EXPECT_TRUE(testParse("foo.out", true, "foo", ParamList(), "out"));
+ EXPECT_TRUE(testParse("foo(a)", true, "foo", ParamList().add("a"), ""));
+ EXPECT_TRUE(testParse("foo(a,b)", true, "foo", ParamList().add("a").add("b"), ""));
+ EXPECT_TRUE(testParse("foo(a,b).out", true, "foo", ParamList().add("a").add("b"), "out"));
+
+ // @ in feature name (for macros)
+ EXPECT_TRUE(testParse("foo@", true, "foo@", ParamList(), ""));
+ EXPECT_TRUE(testParse("foo@.out", true, "foo@", ParamList(), "out"));
+ EXPECT_TRUE(testParse("foo@(a)", true, "foo@", ParamList().add("a"), ""));
+ EXPECT_TRUE(testParse("foo@(a,b)", true, "foo@", ParamList().add("a").add("b"), ""));
+ EXPECT_TRUE(testParse("foo@(a,b).out", true, "foo@", ParamList().add("a").add("b"), "out"));
+
+ // $ in feature name (for macros)
+ EXPECT_TRUE(testParse("foo$", true, "foo$", ParamList(), ""));
+ EXPECT_TRUE(testParse("foo$.out", true, "foo$", ParamList(), "out"));
+ EXPECT_TRUE(testParse("foo$(a)", true, "foo$", ParamList().add("a"), ""));
+ EXPECT_TRUE(testParse("foo$(a,b)", true, "foo$", ParamList().add("a").add("b"), ""));
+ EXPECT_TRUE(testParse("foo$(a,b).out", true, "foo$", ParamList().add("a").add("b"), "out"));
+
+ // de-quoting of parameters
+ EXPECT_TRUE(testParse("foo(a,\"b\")", true, "foo", ParamList().add("a").add("b"), ""));
+ EXPECT_TRUE(testParse("foo(a,\" b \")", true, "foo", ParamList().add("a").add(" b "), ""));
+ EXPECT_TRUE(testParse("foo( \"a\" , \" b \" )", true, "foo", ParamList().add("a").add(" b "), ""));
+ EXPECT_TRUE(testParse("foo(\"\\\"\\\\\\t\\n\\r\\f\\x20\")", true, "foo", ParamList().add("\"\\\t\n\r\f "), ""));
+
+ // only default output if '.' not specified
+ EXPECT_TRUE(testParse("foo.", false, "", ParamList(), ""));
+ EXPECT_TRUE(testParse("foo(a,b).", false, "", ParamList(), ""));
+
+ // string cannot end in parameter list
+ EXPECT_TRUE(testParse("foo(", false, "", ParamList(), ""));
+ EXPECT_TRUE(testParse("foo(a", false, "", ParamList(), ""));
+ EXPECT_TRUE(testParse("foo(a\\", false, "", ParamList(), ""));
+ EXPECT_TRUE(testParse("foo(a\\)", false, "", ParamList(), ""));
+ EXPECT_TRUE(testParse("foo(a,", false, "", ParamList(), ""));
+ EXPECT_TRUE(testParse("foo(a,b", false, "", ParamList(), ""));
+
+ // empty parameters
+ EXPECT_TRUE(testParse("foo()", true, "foo", ParamList().add(""), ""));
+ EXPECT_TRUE(testParse("foo(,)", true, "foo", ParamList().add("").add(""), ""));
+ EXPECT_TRUE(testParse("foo(,,)", true, "foo", ParamList().add("").add("").add(""), ""));
+ EXPECT_TRUE(testParse("foo(,x,)", true, "foo", ParamList().add("").add("x").add(""), ""));
+ EXPECT_TRUE(testParse("foo( )", true, "foo", ParamList().add(""), ""));
+ EXPECT_TRUE(testParse("foo( , , )", true, "foo", ParamList().add("").add("").add(""), ""));
+ EXPECT_TRUE(testParse("foo( \t , \n , \r , \f )", true, "foo", ParamList().add("").add("").add("").add(""), ""));
+
+ testFile("parsetest.txt");
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/fef/featurenameparser/parsetest.txt b/searchlib/src/tests/fef/featurenameparser/parsetest.txt
new file mode 100644
index 00000000000..ce9db595eca
--- /dev/null
+++ b/searchlib/src/tests/fef/featurenameparser/parsetest.txt
@@ -0,0 +1,55 @@
+# This file is used to test feature name parsing. The file format is
+# as follows: Empty lines and lines starting with '#' will be
+# ignored. Other lines must be on the form
+# "<input>'<=>'<expected_output>". The parser will be run on the
+# input, and the normalized feature name will be compared to the
+# expected output. If they match the test passes, if they don't match
+# the test fails. The normalized feature name in the case of a parse
+# error is the empty string. When parsing this file, no whitespace
+# skipping is allowed inside the input or the expected output. To
+# simplify things, the byte sequence '<=>' may not be used anywhere
+# else than as a separator between the input and the expected
+# output. Malformed lines will result in a failed test.
+
+# basic normalization
+ foo . out <=>foo.out
+ foo ( a , b ) . out <=>foo(a,b).out
+ foo ( a , b , "") . out <=>foo(a,b,).out
+ foo ( bar ( a ) , b , "") . out <=>foo(bar(a),b,).out
+
+# basic parse errors
+<=>
+ <=>
+foo(<=>
+foo(,<=>
+foo().<=>
+foo(a b)<=>
+foo(bar(a b))<=>
+foo . a . b<=>
+
+#quoting
+foo("a b")<=>foo("a b")
+foo(bar("a b"))<=>foo(bar("a b"))
+foo("\"bar\"")<=>foo("\"bar\"")
+foo( "bar(x)" )<=>foo(bar(x))
+foo( "bar( x )" )<=>foo("bar( x )")
+foo("xyz")<=>foo(xyz)
+foo("\\\t\n\r\f\x10")<=>foo("\\\t\n\r\f\x10")
+foo("\y")<=>
+foo("\x05")<=>foo("\x05")
+foo("\x00")<=>
+foo("\")<=>
+foo("abc<=>
+foo("\x5")<=>
+foo("\x31\x32\x33")<=>foo(123)
+
+# my current favorite pair :)
+foo("bar(\"x\")")<=>foo("bar(\"x\")")
+foo("bar(\"x \")")<=>foo(bar("x "))
+
+# might want to disallow non-printables inside quotes...
+foo(" ")<=>foo("\t")
+
+#some more fancy normalization tests
+ foo ( a , b ) . out <=>foo(a,b).out
+ foo ( "", bar ( baz ( a, "" ) , "" ) , b , " ") . out <=>foo(,bar(baz(a,),),b," ").out
diff --git a/searchlib/src/tests/fef/featureoverride/.gitignore b/searchlib/src/tests/fef/featureoverride/.gitignore
new file mode 100644
index 00000000000..35285582ceb
--- /dev/null
+++ b/searchlib/src/tests/fef/featureoverride/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+featureoverride_test
+searchlib_featureoverride_test_app
diff --git a/searchlib/src/tests/fef/featureoverride/CMakeLists.txt b/searchlib/src/tests/fef/featureoverride/CMakeLists.txt
new file mode 100644
index 00000000000..23370d51d22
--- /dev/null
+++ b/searchlib/src/tests/fef/featureoverride/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_featureoverride_test_app
+ SOURCES
+ featureoverride.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_featureoverride_test_app COMMAND searchlib_featureoverride_test_app)
diff --git a/searchlib/src/tests/fef/featureoverride/DESC b/searchlib/src/tests/fef/featureoverride/DESC
new file mode 100644
index 00000000000..1605959dae6
--- /dev/null
+++ b/searchlib/src/tests/fef/featureoverride/DESC
@@ -0,0 +1 @@
+featureoverride test. Take a look at featureoverride.cpp for details.
diff --git a/searchlib/src/tests/fef/featureoverride/FILES b/searchlib/src/tests/fef/featureoverride/FILES
new file mode 100644
index 00000000000..864ca65657a
--- /dev/null
+++ b/searchlib/src/tests/fef/featureoverride/FILES
@@ -0,0 +1 @@
+featureoverride.cpp
diff --git a/searchlib/src/tests/fef/featureoverride/featureoverride.cpp b/searchlib/src/tests/fef/featureoverride/featureoverride.cpp
new file mode 100644
index 00000000000..b0929f50fa9
--- /dev/null
+++ b/searchlib/src/tests/fef/featureoverride/featureoverride.cpp
@@ -0,0 +1,175 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("featureoverride_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/fef/fef.h>
+
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/fef/test/plugin/double.h>
+#include <vespa/searchlib/fef/test/plugin/sum.h>
+#include <vespa/searchlib/features/valuefeature.h>
+
+using namespace search::fef;
+using namespace search::fef::test;
+using namespace search::features;
+using search::feature_t;
+
+typedef FeatureExecutor::LP FESP;
+typedef Blueprint::SP BPSP;
+
+struct Fixture
+{
+ MatchDataLayout mdl;
+ std::vector<FeatureExecutor::LP> executors;
+ MatchData::UP md;
+ Fixture() : mdl(), executors(), md() {}
+ Fixture &add(FeatureExecutor::LP &executor, size_t outCnt) {
+ executor->inputs_done();
+ for (uint32_t outIdx = 0; outIdx < outCnt; ++outIdx) {
+ executor->bindOutput(mdl.allocFeature());
+ }
+ executor->outputs_done();
+ executors.push_back(executor);
+ return *this;
+ }
+ Fixture &run() {
+ md = mdl.createMatchData();
+ for (const auto &executor : executors) {
+ executor->execute(*md);
+ }
+ return *this;
+ }
+ feature_t resolveFeature(FeatureHandle handle) {
+ return *md->resolveFeature(handle);
+ }
+ FESP createValueExecutor() {
+ std::vector<feature_t> values;
+ values.push_back(1.0);
+ values.push_back(2.0);
+ values.push_back(3.0);
+ return FESP(new ValueExecutor(values));
+ }
+};
+
+TEST_F("test decorator - single override", Fixture)
+{
+ FESP fe = f.createValueExecutor();
+ fe = FESP(new FeatureOverrider(fe, 1, 50.0));
+ f.add(fe, 3).run();
+ EXPECT_EQUAL(fe->outputs().size(), 3u);
+
+ EXPECT_EQUAL(f.resolveFeature(fe->outputs()[0]), 1.0);
+ EXPECT_EQUAL(f.resolveFeature(fe->outputs()[1]), 50.0);
+ EXPECT_EQUAL(f.resolveFeature(fe->outputs()[2]), 3.0);
+}
+
+TEST_F("test decorator - multiple overrides", Fixture)
+{
+ FESP fe = f.createValueExecutor();
+ fe = FESP(new FeatureOverrider(fe, 0, 50.0));
+ fe = FESP(new FeatureOverrider(fe, 2, 100.0));
+ f.add(fe, 3).run();
+ EXPECT_EQUAL(fe->outputs().size(), 3u);
+
+ EXPECT_EQUAL(f.resolveFeature(fe->outputs()[0]), 50.0);
+ EXPECT_EQUAL(f.resolveFeature(fe->outputs()[1]), 2.0);
+ EXPECT_EQUAL(f.resolveFeature(fe->outputs()[2]), 100.0);
+}
+
+TEST_F("test decorator - non-existing override", Fixture)
+{
+ FESP fe = f.createValueExecutor();
+ fe = FESP(new FeatureOverrider(fe, 1000, 50.0));
+ f.add(fe, 3).run();
+ EXPECT_EQUAL(fe->outputs().size(), 3u);
+
+ EXPECT_EQUAL(f.resolveFeature(fe->outputs()[0]), 1.0);
+ EXPECT_EQUAL(f.resolveFeature(fe->outputs()[1]), 2.0);
+ EXPECT_EQUAL(f.resolveFeature(fe->outputs()[2]), 3.0);
+}
+
+TEST_F("test decorator - transitive override", Fixture)
+{
+ FeatureExecutor::SharedInputs inputs;
+ FESP fe = f.createValueExecutor();
+ fe = FESP(new FeatureOverrider(fe, 1, 50.0));
+ f.add(fe, 3);
+ EXPECT_EQUAL(fe->outputs().size(), 3u);
+
+ FESP fe2 = FESP(new DoubleExecutor(3));
+ fe2->bind_shared_inputs(inputs);
+ fe2->addInput(fe->outputs()[0]);
+ fe2->addInput(fe->outputs()[1]);
+ fe2->addInput(fe->outputs()[2]);
+ fe2 = FESP(new FeatureOverrider(fe2, 2, 10.0));
+ f.add(fe2, 3).run();
+ EXPECT_EQUAL(fe2->outputs().size(), 3u);
+
+ EXPECT_EQUAL(f.resolveFeature(fe->outputs()[0]), 1.0);
+ EXPECT_EQUAL(f.resolveFeature(fe->outputs()[1]), 50.0);
+ EXPECT_EQUAL(f.resolveFeature(fe->outputs()[2]), 3.0);
+ EXPECT_EQUAL(f.resolveFeature(fe2->outputs()[0]), 2.0);
+ EXPECT_EQUAL(f.resolveFeature(fe2->outputs()[1]), 100.0);
+ EXPECT_EQUAL(f.resolveFeature(fe2->outputs()[2]), 10.0);
+}
+
+TEST("test overrides")
+{
+ BlueprintFactory bf;
+ bf.addPrototype(BPSP(new ValueBlueprint()));
+ bf.addPrototype(BPSP(new DoubleBlueprint()));
+ bf.addPrototype(BPSP(new SumBlueprint()));
+
+ IndexEnvironment idxEnv;
+ RankSetup rs(bf, idxEnv);
+
+ rs.addDumpFeature("value(1,2,3)");
+ rs.addDumpFeature("double(value(1))");
+ rs.addDumpFeature("double(value(2))");
+ rs.addDumpFeature("double(value(3))");
+ rs.addDumpFeature("mysum(value(2),value(2))");
+ rs.addDumpFeature("mysum(value(1),value(2),value(3))");
+ EXPECT_TRUE(rs.compile());
+
+ RankProgram::UP rankProgram = rs.create_dump_program();
+
+ MatchDataLayout mdl;
+ QueryEnvironment queryEnv;
+ Properties overrides;
+
+ overrides.add("value(2)", "20.0");
+ overrides.add("value(1,2,3).1", "4.0");
+ overrides.add("value(1,2,3).2", "6.0");
+ overrides.add("bogus(feature)", "10.0");
+
+ rankProgram->setup(mdl, queryEnv, overrides);
+ rankProgram->run(2);
+
+ std::map<vespalib::string, feature_t> res = Utils::getAllFeatures(*rankProgram);
+
+ EXPECT_EQUAL(res.size(), 20u);
+ EXPECT_APPROX(res["value(1)"], 1.0, 1e-6);
+ EXPECT_APPROX(res["value(1).0"], 1.0, 1e-6);
+ EXPECT_APPROX(res["value(2)"], 20.0, 1e-6);
+ EXPECT_APPROX(res["value(2).0"], 20.0, 1e-6);
+ EXPECT_APPROX(res["value(3)"], 3.0, 1e-6);
+ EXPECT_APPROX(res["value(3).0"], 3.0, 1e-6);
+ EXPECT_APPROX(res["value(1,2,3)"], 1.0, 1e-6);
+ EXPECT_APPROX(res["value(1,2,3).0"], 1.0, 1e-6);
+ EXPECT_APPROX(res["value(1,2,3).1"], 4.0, 1e-6);
+ EXPECT_APPROX(res["value(1,2,3).2"], 6.0, 1e-6);
+ EXPECT_APPROX(res["mysum(value(2),value(2))"], 40.0, 1e-6);
+ EXPECT_APPROX(res["mysum(value(2),value(2)).out"], 40.0, 1e-6);
+ EXPECT_APPROX(res["mysum(value(1),value(2),value(3))"], 24.0, 1e-6);
+ EXPECT_APPROX(res["mysum(value(1),value(2),value(3)).out"], 24.0, 1e-6);
+ EXPECT_APPROX(res["double(value(1))"], 2.0, 1e-6);
+ EXPECT_APPROX(res["double(value(1)).0"], 2.0, 1e-6);
+ EXPECT_APPROX(res["double(value(2))"], 40.0, 1e-6);
+ EXPECT_APPROX(res["double(value(2)).0"], 40.0, 1e-6);
+ EXPECT_APPROX(res["double(value(3))"], 6.0, 1e-6);
+ EXPECT_APPROX(res["double(value(3)).0"], 6.0, 1e-6);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/fef/fef_test.cpp b/searchlib/src/tests/fef/fef_test.cpp
new file mode 100644
index 00000000000..b3107e57fae
--- /dev/null
+++ b/searchlib/src/tests/fef/fef_test.cpp
@@ -0,0 +1,116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("fef_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/fef/objectstore.h>
+
+using namespace search::fef;
+using std::shared_ptr;
+using search::feature_t;
+
+class Test : public vespalib::TestApp
+{
+public:
+ void testLayout();
+ void testObjectStore();
+ void testTermFieldMatchDataAppend();
+ int Main();
+};
+
+void
+Test::testLayout()
+{
+ {
+ TermFieldMatchData tmd;
+ EXPECT_EQUAL(IllegalFieldId, tmd.getFieldId());
+ EXPECT_EQUAL(TermFieldMatchData::invalidId(), tmd.getDocId());
+ }
+ MatchDataLayout mdl;
+ EXPECT_EQUAL(mdl.allocTermField(0), 0u);
+ EXPECT_EQUAL(mdl.allocTermField(42), 1u);
+ EXPECT_EQUAL(mdl.allocTermField(IllegalFieldId), 2u);
+ EXPECT_EQUAL(mdl.allocFeature(), 0u);
+ EXPECT_EQUAL(mdl.allocFeature(), 1u);
+ EXPECT_EQUAL(mdl.allocFeature(), 2u);
+
+ MatchData::UP md = mdl.createMatchData();
+ EXPECT_EQUAL(TermFieldMatchData::invalidId(), md->getDocId());
+ EXPECT_EQUAL(md->getNumTermFields(), 3u);
+ EXPECT_EQUAL(md->getNumFeatures(), 3u);
+ TermFieldMatchData *t0 = md->resolveTermField(0);
+ TermFieldMatchData *t1 = md->resolveTermField(1);
+ TermFieldMatchData *t2 = md->resolveTermField(2);
+ EXPECT_EQUAL(t1, t0 + 1);
+ EXPECT_EQUAL(t2, t1 + 1);
+ EXPECT_EQUAL(0u, t0->getFieldId());
+ EXPECT_EQUAL(42u, t1->getFieldId());
+ EXPECT_EQUAL(IllegalFieldId, t2->getFieldId());
+ feature_t *f0 = md->resolveFeature(0);
+ feature_t *f1 = md->resolveFeature(1);
+ feature_t *f2 = md->resolveFeature(2);
+ EXPECT_EQUAL(f1, f0 + 1);
+ EXPECT_EQUAL(f2, f1 + 1);
+ EXPECT_TRUE((void*)t2 < (void*)f0 || (void*)f2 < (void*)t0);
+}
+
+void
+Test::testObjectStore()
+{
+ ObjectStore s;
+ class Object : public Anything {
+ };
+ Anything::UP u1(new Object());
+ Anything::UP u11(new Object());
+ Anything::UP u2(new Object());
+ const Anything * o1(u1.get());
+ const Anything * o11(u11.get());
+ const Anything * o2(u2.get());
+ EXPECT_TRUE(nullptr == s.get("a"));
+ s.add("a", std::move(u1));
+ EXPECT_EQUAL(o1, s.get("a"));
+ EXPECT_TRUE(nullptr == s.get("b"));
+ s.add("b", std::move(u2));
+ EXPECT_EQUAL(o1, s.get("a"));
+ EXPECT_EQUAL(o2, s.get("b"));
+ s.add("a", std::move(u11));
+ EXPECT_EQUAL(o11, s.get("a"));
+}
+
+void
+Test::testTermFieldMatchDataAppend()
+{
+ TermFieldMatchData tmd;
+ EXPECT_EQUAL(0u, tmd.size());
+ EXPECT_EQUAL(1u, tmd.capacity());
+ TermFieldMatchDataPosition pos;
+ tmd.appendPosition(pos);
+ EXPECT_EQUAL(1u, tmd.size());
+ EXPECT_EQUAL(1u, tmd.capacity());
+ tmd.appendPosition(pos);
+ EXPECT_EQUAL(2u, tmd.size());
+ EXPECT_EQUAL(2u, tmd.capacity());
+ for (size_t i(2); i < std::numeric_limits<uint16_t>::max(); i++) {
+ EXPECT_EQUAL(i, tmd.size());
+ EXPECT_EQUAL(std::min(size_t(std::numeric_limits<uint16_t>::max()), vespalib::roundUp2inN(i)), tmd.capacity());
+ tmd.appendPosition(pos);
+ }
+ EXPECT_EQUAL(std::numeric_limits<uint16_t>::max(), tmd.size());
+ EXPECT_EQUAL(std::numeric_limits<uint16_t>::max(), tmd.capacity());
+ tmd.appendPosition(pos);
+ EXPECT_EQUAL(std::numeric_limits<uint16_t>::max(), tmd.size());
+ EXPECT_EQUAL(std::numeric_limits<uint16_t>::max(), tmd.capacity());
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("fef_test");
+ testLayout();
+ testObjectStore();
+ testTermFieldMatchDataAppend();
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/fef/object_passing/.gitignore b/searchlib/src/tests/fef/object_passing/.gitignore
new file mode 100644
index 00000000000..64b250201a8
--- /dev/null
+++ b/searchlib/src/tests/fef/object_passing/.gitignore
@@ -0,0 +1 @@
+searchlib_object_passing_test_app
diff --git a/searchlib/src/tests/fef/object_passing/CMakeLists.txt b/searchlib/src/tests/fef/object_passing/CMakeLists.txt
new file mode 100644
index 00000000000..2334711f015
--- /dev/null
+++ b/searchlib/src/tests/fef/object_passing/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_object_passing_test_app
+ SOURCES
+ object_passing_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_object_passing_test_app COMMAND searchlib_object_passing_test_app)
diff --git a/searchlib/src/tests/fef/object_passing/object_passing_test.cpp b/searchlib/src/tests/fef/object_passing/object_passing_test.cpp
new file mode 100644
index 00000000000..69c681d8f60
--- /dev/null
+++ b/searchlib/src/tests/fef/object_passing/object_passing_test.cpp
@@ -0,0 +1,128 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/features/valuefeature.h>
+#include <vespa/searchlib/fef/blueprintfactory.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/fef/test/plugin/sum.h>
+#include <vespa/searchlib/fef/rank_program.h>
+#include <vespa/searchlib/fef/verify_feature.h>
+#include <vespa/vespalib/eval/value_type.h>
+#include <vespa/searchlib/fef/feature_type.h>
+
+using namespace search::fef;
+using namespace search::fef::test;
+using namespace search::features;
+using vespalib::eval::ValueType;
+
+struct ProxyExecutor : FeatureExecutor {
+ double number_value;
+ vespalib::eval::Value::UP object_value;
+ ProxyExecutor() : number_value(0.0), object_value() {}
+ bool isPure() override { return true; }
+ void execute(search::fef::MatchData &md) override {
+ double was_object = 0.0;
+ if (md.feature_is_object(inputs()[0])) {
+ was_object = 1.0;
+ number_value = md.resolve_object_feature(inputs()[0])->get().as_double();
+ object_value.reset(new vespalib::eval::DoubleValue(number_value));
+ } else {
+ number_value = *md.resolveFeature(inputs()[0]);
+ object_value.reset(new vespalib::eval::DoubleValue(number_value));
+ }
+ if (md.feature_is_object(outputs()[0])) {
+ *md.resolve_object_feature(outputs()[0]) = *object_value;
+ } else {
+ *md.resolveFeature(outputs()[0]) = number_value;
+ }
+ *md.resolveFeature(outputs()[1]) = was_object;
+ }
+};
+
+struct ProxyBlueprint : Blueprint {
+ vespalib::string name;
+ AcceptInput accept_input;
+ bool object_output;
+ ProxyBlueprint(const vespalib::string &name_in, AcceptInput accept_input_in, bool object_output_in)
+ : Blueprint(name_in), name(name_in), accept_input(accept_input_in), object_output(object_output_in) {}
+ void visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const override {}
+ Blueprint::UP createInstance() const override {
+ return Blueprint::UP(new ProxyBlueprint(name, accept_input, object_output));
+ }
+ bool setup(const IIndexEnvironment &, const std::vector<vespalib::string> &params) override {
+ ASSERT_EQUAL(1u, params.size());
+ defineInput(params[0], accept_input);
+ describeOutput("value", "the value", object_output ? FeatureType::object(ValueType::double_type()) : FeatureType::number());
+ describeOutput("was_object", "whether input was object", FeatureType::number());
+ return true;
+ }
+ FeatureExecutor::LP createExecutor(const IQueryEnvironment &) const override {
+ return FeatureExecutor::LP(new ProxyExecutor());
+ }
+};
+
+struct Fixture {
+ BlueprintFactory factory;
+ IndexEnvironment indexEnv;
+
+ explicit Fixture() {
+ factory.addPrototype(Blueprint::SP(new ValueBlueprint()));
+ factory.addPrototype(Blueprint::SP(new ProxyBlueprint("box", Blueprint::AcceptInput::NUMBER, true)));
+ factory.addPrototype(Blueprint::SP(new ProxyBlueprint("maybe_box", Blueprint::AcceptInput::ANY, true)));
+ factory.addPrototype(Blueprint::SP(new ProxyBlueprint("unbox", Blueprint::AcceptInput::OBJECT, false)));
+ factory.addPrototype(Blueprint::SP(new ProxyBlueprint("maybe_unbox", Blueprint::AcceptInput::ANY, false)));
+ }
+
+ double eval(const vespalib::string &feature) {
+ BlueprintResolver::SP resolver(new BlueprintResolver(factory, indexEnv));
+ resolver->addSeed(feature);
+ if (!resolver->compile()) {
+ return vespalib::eval::error_value;
+ }
+ MatchDataLayout mdl;
+ QueryEnvironment queryEnv(&indexEnv);
+ Properties overrides;
+ RankProgram program(resolver);
+ program.setup(mdl, queryEnv, overrides);
+ program.run(1);
+ std::vector<vespalib::string> names;
+ std::vector<FeatureHandle> handles;
+ program.get_seed_handles(names, handles);
+ EXPECT_EQUAL(1u, names.size());
+ EXPECT_EQUAL(names.size(), handles.size());
+ const auto &md = program.match_data();
+ EXPECT_TRUE(!md.feature_is_object(handles[0])); // verifies auto-unboxing
+ return *md.resolveFeature(handles[0]);
+ }
+
+ bool verify(const vespalib::string &feature) {
+ return verifyFeature(factory, indexEnv, feature, "unit test");
+ }
+};
+
+TEST_F("require that values can be boxed and unboxed", Fixture()) {
+ EXPECT_EQUAL(3.0, f1.eval("box(value(3))"));
+ EXPECT_EQUAL(0.0, f1.eval("box(value(3)).was_object"));
+ EXPECT_EQUAL(3.0, f1.eval("unbox(box(value(3)))"));
+ EXPECT_EQUAL(1.0, f1.eval("unbox(box(value(3))).was_object"));
+ EXPECT_EQUAL(3.0, f1.eval("box(unbox(box(value(3))))"));
+ EXPECT_EQUAL(0.0, f1.eval("box(unbox(box(value(3)))).was_object"));
+}
+
+TEST_F("require that output features may be either objects or numbers", Fixture()) {
+ EXPECT_TRUE(f1.verify("value(3)"));
+ EXPECT_TRUE(f1.verify("box(value(3))"));
+}
+
+TEST_F("require that feature input/output types must be compatible", Fixture()) {
+ EXPECT_TRUE(!f1.verify("unbox(value(3))"));
+ EXPECT_TRUE(f1.verify("maybe_unbox(value(3))"));
+ EXPECT_TRUE(f1.verify("unbox(box(value(3)))"));
+ EXPECT_TRUE(!f1.verify("unbox(box(box(value(3))))"));
+ EXPECT_TRUE(f1.verify("unbox(maybe_box(box(value(3))))"));
+ EXPECT_TRUE(f1.verify("unbox(box(unbox(box(value(3)))))"));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/fef/parameter/.gitignore b/searchlib/src/tests/fef/parameter/.gitignore
new file mode 100644
index 00000000000..17cf6c69953
--- /dev/null
+++ b/searchlib/src/tests/fef/parameter/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+parameter_test
+searchlib_parameter_test_app
diff --git a/searchlib/src/tests/fef/parameter/CMakeLists.txt b/searchlib/src/tests/fef/parameter/CMakeLists.txt
new file mode 100644
index 00000000000..dcd45390ce3
--- /dev/null
+++ b/searchlib/src/tests/fef/parameter/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_parameter_test_app
+ SOURCES
+ parameter_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_parameter_test_app NO_VALGRIND COMMAND searchlib_parameter_test_app)
diff --git a/searchlib/src/tests/fef/parameter/DESC b/searchlib/src/tests/fef/parameter/DESC
new file mode 100644
index 00000000000..738e0dbd512
--- /dev/null
+++ b/searchlib/src/tests/fef/parameter/DESC
@@ -0,0 +1 @@
+parameter test. Take a look at parameter.cpp for details.
diff --git a/searchlib/src/tests/fef/parameter/FILES b/searchlib/src/tests/fef/parameter/FILES
new file mode 100644
index 00000000000..20c9e0c9ba0
--- /dev/null
+++ b/searchlib/src/tests/fef/parameter/FILES
@@ -0,0 +1 @@
+parameter.cpp
diff --git a/searchlib/src/tests/fef/parameter/parameter_test.cpp b/searchlib/src/tests/fef/parameter/parameter_test.cpp
new file mode 100644
index 00000000000..4d6741937d5
--- /dev/null
+++ b/searchlib/src/tests/fef/parameter/parameter_test.cpp
@@ -0,0 +1,267 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("parameter_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/searchlib/fef/parametervalidator.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+
+using namespace search::fef::test;
+
+namespace search {
+namespace fef {
+
+class StringList : public std::vector<vespalib::string> {
+public:
+ StringList & add(const vespalib::string & str) { push_back(str); return *this; }
+};
+
+class ParameterTest : public vespalib::TestApp {
+private:
+ typedef ParameterDescriptions PDS;
+ typedef ParameterType PT;
+ typedef Parameter P;
+ typedef StringList SL;
+ typedef ParameterValidator::Result PVR;
+
+ bool assertParameter(const Parameter & exp, const Parameter & act);
+ bool validate(const IIndexEnvironment & env,
+ const std::vector<vespalib::string> & params,
+ const ParameterDescriptions & descs);
+ bool validate(const IIndexEnvironment & env,
+ const std::vector<vespalib::string> & params,
+ const ParameterDescriptions & descs,
+ const ParameterValidator::Result & result);
+
+ void testDescriptions();
+ void testValidator();
+ void testParameters();
+
+public:
+ int Main();
+};
+
+bool
+ParameterTest::assertParameter(const Parameter & exp, const Parameter & act)
+{
+ bool retval = true;
+ if (!EXPECT_EQUAL(exp.getType(), act.getType())) retval = false;
+ if (!EXPECT_EQUAL(exp.getValue(), act.getValue())) retval = false;
+ if (!EXPECT_EQUAL(exp.asDouble(), act.asDouble())) retval = false;
+ if (!EXPECT_EQUAL(exp.asInteger(), act.asInteger())) retval = false;
+ if (!EXPECT_EQUAL(exp.asField(), act.asField())) retval = false;
+ return retval;
+}
+
+bool
+ParameterTest::validate(const IIndexEnvironment & env,
+ const std::vector<vespalib::string> & params,
+ const ParameterDescriptions & descs)
+{
+ ParameterValidator pv(env, params, descs);
+ ParameterValidator::Result result = pv.validate();
+ LOG(info, "validate(%s)", result.getError().c_str());
+ return result.valid();
+}
+
+bool
+ParameterTest::validate(const IIndexEnvironment & env,
+ const std::vector<vespalib::string> & params,
+ const ParameterDescriptions & descs,
+ const ParameterValidator::Result & result)
+{
+ if (!validate(env, params, descs)) return false;
+ ParameterValidator pv(env, params, descs);
+ ParameterValidator::Result actual = pv.validate();
+ if (!EXPECT_EQUAL(result.getTag(), actual.getTag())) return false;
+ if (!EXPECT_EQUAL(result.getParameters().size(), actual.getParameters().size())) return false;
+ bool retval = true;
+ for (size_t i = 0; i < result.getParameters().size(); ++i) {
+ if (!assertParameter(result.getParameters()[i], actual.getParameters()[i])) retval = false;
+ }
+ return retval;
+}
+
+void
+ParameterTest::testDescriptions()
+{
+ PDS descs = PDS().
+ desc().indexField(ParameterCollection::SINGLE).indexField(ParameterCollection::ARRAY).indexField(ParameterCollection::WEIGHTEDSET).attribute(ParameterCollection::ANY).attributeField(ParameterCollection::ANY).field().
+ desc(5).feature().number().string().attribute(ParameterCollection::ANY).
+ desc().string().number().repeat(2);
+ const PDS::DescriptionVector & v = descs.getDescriptions();
+ EXPECT_EQUAL(v.size(), 3u);
+ EXPECT_EQUAL(v[0].getTag(), 0u);
+ EXPECT_TRUE(!v[0].hasRepeat());
+ EXPECT_EQUAL(v[0].getParams().size(), 6u);
+ EXPECT_EQUAL(v[0].getParam(0).type, ParameterType::INDEX_FIELD);
+ EXPECT_EQUAL(v[0].getParam(1).type, ParameterType::INDEX_FIELD);
+ EXPECT_EQUAL(v[0].getParam(2).type, ParameterType::INDEX_FIELD);
+ EXPECT_EQUAL(v[0].getParam(3).type, ParameterType::ATTRIBUTE);
+ EXPECT_EQUAL(v[0].getParam(4).type, ParameterType::ATTRIBUTE_FIELD);
+ EXPECT_EQUAL(v[0].getParam(5).type, ParameterType::FIELD);
+ EXPECT_EQUAL(v[0].getParam(0).collection, ParameterCollection::SINGLE);
+ EXPECT_EQUAL(v[0].getParam(1).collection, ParameterCollection::ARRAY);
+ EXPECT_EQUAL(v[0].getParam(2).collection, ParameterCollection::WEIGHTEDSET);
+ EXPECT_EQUAL(v[0].getParam(3).collection, ParameterCollection::ANY);
+ EXPECT_EQUAL(v[0].getParam(4).collection, ParameterCollection::ANY);
+ EXPECT_EQUAL(v[0].getParam(5).collection, ParameterCollection::ANY);
+
+ EXPECT_EQUAL(v[1].getTag(), 5u);
+ EXPECT_TRUE(!v[1].hasRepeat());
+ EXPECT_EQUAL(v[1].getParams().size(), 4u);
+ EXPECT_EQUAL(v[1].getParam(0).type, ParameterType::FEATURE);
+ EXPECT_EQUAL(v[1].getParam(1).type, ParameterType::NUMBER);
+ EXPECT_EQUAL(v[1].getParam(2).type, ParameterType::STRING);
+ EXPECT_EQUAL(v[1].getParam(3).type, ParameterType::ATTRIBUTE);
+
+ EXPECT_EQUAL(v[2].getTag(), 6u);
+ EXPECT_TRUE(v[2].hasRepeat());
+ EXPECT_EQUAL(v[2].getParams().size(), 2u);
+ EXPECT_EQUAL(v[2].getParam(0).type, ParameterType::STRING);
+ EXPECT_EQUAL(v[2].getParam(1).type, ParameterType::NUMBER);
+ EXPECT_EQUAL(v[2].getParam(2).type, ParameterType::STRING);
+ EXPECT_EQUAL(v[2].getParam(3).type, ParameterType::NUMBER);
+ EXPECT_EQUAL(v[2].getParam(4).type, ParameterType::STRING);
+ EXPECT_EQUAL(v[2].getParam(5).type, ParameterType::NUMBER);
+}
+
+void
+ParameterTest::testValidator()
+{
+ IndexEnvironment env;
+ IndexEnvironmentBuilder builder(env);
+ builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar")
+ .addField(FieldType::INDEX, CollectionType::ARRAY, "afoo")
+ .addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo")
+ .addField(FieldType::INDEX, CollectionType::SINGLE, "hybrid");
+ env.getFields().back().addAttribute(); // 'hybrid' field can also be accessed as an attribute
+
+ // valid
+ EXPECT_TRUE(validate(env, SL(), PDS().desc()));
+ EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().field()));
+ EXPECT_TRUE(validate(env, SL().add("bar"), PDS().desc().field()));
+ EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::SINGLE)));
+ EXPECT_TRUE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::ARRAY)));
+ EXPECT_TRUE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::WEIGHTEDSET)));
+ EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::ANY)));
+ EXPECT_TRUE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::ANY)));
+ EXPECT_TRUE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::ANY)));
+ EXPECT_TRUE(validate(env, SL().add("bar"), PDS().desc().attribute(ParameterCollection::ANY)));
+ EXPECT_TRUE(validate(env, SL().add("bar"), PDS().desc().attributeField(ParameterCollection::ANY)));
+ EXPECT_TRUE(validate(env, SL().add("hybrid"), PDS().desc().attribute(ParameterCollection::ANY)));
+ EXPECT_TRUE(validate(env, SL().add("baz"), PDS().desc().feature()));
+ EXPECT_TRUE(validate(env, SL().add("123"), PDS().desc().number()));
+ EXPECT_TRUE(validate(env, SL().add("baz"), PDS().desc().string()));
+ // first fail but second pass
+ EXPECT_TRUE(validate(env, SL().add("baz"), PDS().desc().field().desc().string()));
+
+ // not valid
+ EXPECT_FALSE(validate(env, SL().add("baz"), PDS().desc().string().string()));
+ EXPECT_FALSE(validate(env, SL().add("baz").add("baz"), PDS().desc().string()));
+ EXPECT_FALSE(validate(env, SL().add("baz"), PDS().desc().field()));
+ EXPECT_FALSE(validate(env, SL().add("bar"), PDS().desc().indexField(ParameterCollection::SINGLE)));
+ EXPECT_FALSE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::NONE)));
+ EXPECT_FALSE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::ARRAY)));
+ EXPECT_FALSE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::WEIGHTEDSET)));
+ EXPECT_FALSE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::NONE)));
+ EXPECT_FALSE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::SINGLE)));
+ EXPECT_FALSE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::WEIGHTEDSET)));
+ EXPECT_FALSE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::NONE)));
+ EXPECT_FALSE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::SINGLE)));
+ EXPECT_FALSE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::ARRAY)));
+ EXPECT_FALSE(validate(env, SL().add("unknown"), PDS().desc().attribute(ParameterCollection::ANY)));
+ EXPECT_FALSE(validate(env, SL().add("unknown"), PDS().desc().attributeField(ParameterCollection::ANY)));
+ EXPECT_FALSE(validate(env, SL().add("foo"), PDS().desc().attribute(ParameterCollection::ANY)));
+ EXPECT_FALSE(validate(env, SL().add("foo"), PDS().desc().attributeField(ParameterCollection::ANY)));
+ EXPECT_FALSE(validate(env, SL().add("hybrid"), PDS().desc().attributeField(ParameterCollection::ANY)));
+ EXPECT_FALSE(validate(env, SL().add("12a"), PDS().desc().number()));
+ EXPECT_FALSE(validate(env, SL().add("a12"), PDS().desc().number()));
+
+ // test repeat
+ PDS d1 = PDS().desc().field().repeat();
+ EXPECT_TRUE(validate(env, SL(), d1));
+ EXPECT_TRUE(validate(env, SL().add("foo"), d1));
+ EXPECT_TRUE(validate(env, SL().add("foo").add("bar"), d1));
+ EXPECT_TRUE(!validate(env, SL().add("foo").add("bar").add("baz"), d1));
+ PDS d2 = PDS().desc().string().attribute(ParameterCollection::ANY).indexField(ParameterCollection::SINGLE).repeat(2);
+ EXPECT_TRUE(validate(env, SL().add("str"), d2));
+ EXPECT_TRUE(validate(env, SL().add("str").add("bar").add("foo"), d2));
+ EXPECT_TRUE(validate(env, SL().add("str").add("bar").add("foo").add("bar").add("foo"), d2));
+ EXPECT_TRUE(!validate(env, SL().add("str").add("bar"), d2));
+ EXPECT_TRUE(!validate(env, SL().add("str").add("bar").add("foo").add("bar"), d2));
+}
+
+void
+ParameterTest::testParameters()
+{
+ IndexEnvironment env;
+ IndexEnvironmentBuilder builder(env);
+ builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar")
+ .addField(FieldType::INDEX, CollectionType::ARRAY, "afoo")
+ .addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo");
+
+ const FieldInfo * foo = env.getFieldByName("foo");
+ const FieldInfo * bar = env.getFieldByName("bar");
+ const FieldInfo * afoo = env.getFieldByName("afoo");
+ const FieldInfo * wfoo = env.getFieldByName("wfoo");
+
+ EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().field(),
+ PVR().addParameter(P(PT::FIELD, "foo").setField(foo)))); // field
+ EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::SINGLE),
+ PVR().addParameter(P(PT::INDEX_FIELD, "foo").setField(foo)))); // index field
+ EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::ANY),
+ PVR().addParameter(P(PT::INDEX_FIELD, "foo").setField(foo)))); // index field
+ EXPECT_TRUE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::ARRAY),
+ PVR().addParameter(P(PT::INDEX_FIELD, "afoo").setField(afoo)))); // index field
+ EXPECT_TRUE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::ANY),
+ PVR().addParameter(P(PT::INDEX_FIELD, "afoo").setField(afoo)))); // index field
+ EXPECT_TRUE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::WEIGHTEDSET),
+ PVR().addParameter(P(PT::INDEX_FIELD, "wfoo").setField(wfoo)))); // index field
+ EXPECT_TRUE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::ANY),
+ PVR().addParameter(P(PT::INDEX_FIELD, "wfoo").setField(wfoo)))); // index field
+ EXPECT_TRUE(validate(env, SL().add("bar"), PDS().desc().attribute(ParameterCollection::ANY),
+ PVR().addParameter(P(PT::ATTRIBUTE, "bar").setField(bar)))); // attribute field
+ EXPECT_TRUE(validate(env, SL().add("feature"), PDS().desc().feature(),
+ PVR().addParameter(P(PT::FEATURE, "feature")))); // feature
+ EXPECT_TRUE(validate(env, SL().add("string"), PDS().desc().string(),
+ PVR().addParameter(P(PT::STRING, "string")))); // string
+
+ // numbers
+ EXPECT_TRUE(validate(env, SL().add("-100"), PDS().desc().number(),
+ PVR().addParameter(P(PT::NUMBER, "-100").setDouble(-100).setInteger(-100))));
+ EXPECT_TRUE(validate(env, SL().add("100"), PDS().desc().number(),
+ PVR().addParameter(P(PT::NUMBER, "100").setDouble(100).setInteger(100))));
+ EXPECT_TRUE(validate(env, SL().add("100.16"), PDS().desc().number(),
+ PVR().addParameter(P(PT::NUMBER, "100.16").setDouble(100.16).setInteger(100))));
+
+ EXPECT_TRUE(validate(env, SL(), PDS().desc(), PVR())); // no param
+ EXPECT_TRUE(validate(env, SL().add("foo").add("bar"), PDS().desc().string().string(),
+ PVR().addParameter(P(PT::STRING, "foo")).addParameter(P(PT::STRING, "bar")))); // multiple params
+ EXPECT_TRUE(validate(env, SL().add("foo").add("bar"), PDS().desc().string().repeat(),
+ PVR().addParameter(P(PT::STRING, "foo")).addParameter(P(PT::STRING, "bar")))); // repeat
+ EXPECT_TRUE(validate(env, SL().add("baz"), PDS().desc(10).field().desc(20).string(),
+ PVR(20).addParameter(P(PT::STRING, "baz")))); // second desc matching
+}
+
+int
+ParameterTest::Main()
+{
+ TEST_INIT("parameter_test");
+
+ testDescriptions();
+ testValidator();
+ testParameters();
+
+ TEST_DONE();
+}
+
+}
+}
+
+TEST_APPHOOK(search::fef::ParameterTest);
+
diff --git a/searchlib/src/tests/fef/phrasesplitter/.gitignore b/searchlib/src/tests/fef/phrasesplitter/.gitignore
new file mode 100644
index 00000000000..418f9961840
--- /dev/null
+++ b/searchlib/src/tests/fef/phrasesplitter/.gitignore
@@ -0,0 +1,6 @@
+.depend
+Makefile
+benchmark
+phrasesplitter_test
+searchlib_phrasesplitter_test_app
+searchlib_benchmark_app
diff --git a/searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt b/searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt
new file mode 100644
index 00000000000..aa16f3e0a0d
--- /dev/null
+++ b/searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_phrasesplitter_test_app
+ SOURCES
+ phrasesplitter_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_phrasesplitter_test_app COMMAND searchlib_phrasesplitter_test_app)
+vespa_add_executable(searchlib_benchmark_app
+ SOURCES
+ benchmark.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_benchmark_app COMMAND searchlib_benchmark_app BENCHMARK)
diff --git a/searchlib/src/tests/fef/phrasesplitter/DESC b/searchlib/src/tests/fef/phrasesplitter/DESC
new file mode 100644
index 00000000000..fba49bdb8c0
--- /dev/null
+++ b/searchlib/src/tests/fef/phrasesplitter/DESC
@@ -0,0 +1 @@
+phrasesplitter test. Take a look at phrasesplitter.cpp for details.
diff --git a/searchlib/src/tests/fef/phrasesplitter/FILES b/searchlib/src/tests/fef/phrasesplitter/FILES
new file mode 100644
index 00000000000..be37941d0c8
--- /dev/null
+++ b/searchlib/src/tests/fef/phrasesplitter/FILES
@@ -0,0 +1 @@
+phrasesplitter.cpp
diff --git a/searchlib/src/tests/fef/phrasesplitter/benchmark.cpp b/searchlib/src/tests/fef/phrasesplitter/benchmark.cpp
new file mode 100644
index 00000000000..ca90b1de261
--- /dev/null
+++ b/searchlib/src/tests/fef/phrasesplitter/benchmark.cpp
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("phrasesplitter_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <iomanip>
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/phrasesplitter.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+
+namespace search {
+namespace fef {
+
+class Benchmark : public vespalib::TestApp
+{
+private:
+ FastOS_Time _timer;
+ double _sample;
+
+ void start() { _timer.SetNow(); }
+ void sample() { _sample = _timer.MilliSecsToNow(); }
+ void run(size_t numRuns, size_t numPositions);
+
+public:
+ Benchmark() : _timer(), _sample(0) {}
+ int Main();
+};
+
+void
+Benchmark::run(size_t numRuns, size_t numPositions)
+{
+ test::QueryEnvironment qe;
+ std::vector<SimpleTermData> &terms = qe.getTerms();
+ MatchDataLayout mdl;
+ terms.push_back(SimpleTermData());
+ terms.back().setUniqueId(1);
+ terms.back().setPhraseLength(3); // phrase with 3 terms
+ terms.back().addField(0).setHandle(mdl.allocTermField(0));
+ MatchData::UP md = mdl.createMatchData();
+ TermFieldMatchData *tmd = md->resolveTermField(terms[0].lookupField(0)->getHandle());
+ for (size_t i = 0; i < numPositions; ++i) {
+ tmd->appendPosition(TermFieldMatchDataPosition(0, i, 0, numPositions));
+ }
+
+ PhraseSplitter ps(qe, 0);
+
+ std::cout << "Start benchmark with numRuns(" << numRuns << ") and numPositions(" << numPositions << ")" << std::endl;
+
+ start();
+
+ for (size_t i = 0; i < numRuns; ++i) {
+ ps.update(*md);
+ }
+
+ sample();
+}
+
+int
+Benchmark::Main()
+{
+
+ TEST_INIT("benchmark");
+
+ if (_argc != 3) {
+ std::cout << "Must specify <numRuns> and <numPositions>" << std::endl;
+ return 0;
+ }
+
+ size_t numRuns = strtoull(_argv[1], NULL, 10);
+ size_t numPositions = strtoull(_argv[2], NULL, 10);
+
+ run(numRuns, numPositions);
+
+ std::cout << "TET: " << _sample << " (ms)" << std::endl;
+ std::cout << "ETPD: " << std::fixed << std::setprecision(10) << _sample / numRuns << " (ms)" << std::endl;
+
+ TEST_DONE();
+}
+
+}
+}
+
+TEST_APPHOOK(search::fef::Benchmark);
diff --git a/searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp b/searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp
new file mode 100644
index 00000000000..0fa6f27022e
--- /dev/null
+++ b/searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp
@@ -0,0 +1,242 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("phrasesplitter_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/phrasesplitter.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+
+namespace search {
+namespace fef {
+
+class PhraseSplitterTest : public vespalib::TestApp
+{
+private:
+ void assertTermData(const ITermData * td, uint32_t uniqueId, uint32_t numTerms,
+ uint32_t fieldId, uint32_t termHandle);
+ void testCopyTermFieldMatchData();
+ void testSplitter();
+ void testSplitterUpdate();
+
+public:
+ int Main();
+};
+
+void
+PhraseSplitterTest::assertTermData(const ITermData *td, uint32_t uniqueId, uint32_t numTerms,
+ uint32_t fieldId, uint32_t tfHandle)
+{
+ // fprintf(stderr, "checking uid=%d numterms=%d field=%d handle=%d\n", uniqueId, numTerms, fieldId, tfHandle);
+ EXPECT_EQUAL(uniqueId, td->getUniqueId());
+ EXPECT_EQUAL(numTerms, td->getPhraseLength());
+ EXPECT_EQUAL(tfHandle, td->lookupField(fieldId)->getHandle());
+}
+
+void
+PhraseSplitterTest::testCopyTermFieldMatchData()
+{
+ TermFieldMatchData src;
+ src.reset(1);
+ src.appendPosition(TermFieldMatchDataPosition(0, 5, 0, 1000));
+ src.appendPosition(TermFieldMatchDataPosition(0, 15, 0, 1000));
+
+ SimpleTermData td;
+ TermFieldMatchData dst;
+ dst.reset(0);
+ // dst.setTermData(&td);
+ dst.appendPosition(TermFieldMatchDataPosition(0, 10, 0, 1000));
+ {
+ FieldPositionsIterator itr = dst.getIterator();
+ EXPECT_EQUAL(itr.getPosition(), 10u);
+ itr.next();
+ ASSERT_TRUE(!itr.valid());
+ }
+
+ PhraseSplitter::copyTermFieldMatchData(dst, src, 2);
+
+ EXPECT_EQUAL(dst.getDocId(), 1u);
+ {
+ TermFieldMatchData::PositionsIterator itr = dst.begin();
+ EXPECT_EQUAL(itr->getPosition(), 7u);
+ ++itr;
+ EXPECT_EQUAL(itr->getPosition(), 17u);
+ ++itr;
+ ASSERT_TRUE(itr == dst.end());
+ }
+ {
+ FieldPositionsIterator itr = dst.getIterator();
+ EXPECT_EQUAL(itr.getPosition(), 7u);
+ itr.next();
+ EXPECT_EQUAL(itr.getPosition(), 17u);
+ itr.next();
+ ASSERT_TRUE(!itr.valid());
+ }
+}
+
+void
+PhraseSplitterTest::testSplitter()
+{
+ { // single term
+ test::QueryEnvironment qe;
+ std::vector<SimpleTermData> &terms = qe.getTerms();
+ MatchDataLayout mdl;
+ terms.push_back(SimpleTermData());
+ terms.back().addField(0).setHandle(mdl.allocTermField(0));
+ MatchData::UP md = mdl.createMatchData();
+ PhraseSplitter ps(qe, 0);
+ ASSERT_TRUE(ps.getNumTerms() == 1);
+ ps.update(*md);
+ // check that nothing is served from the splitter
+ EXPECT_EQUAL(ps.getTerm(0), &terms[0]);
+ TermFieldHandle handle = terms[0].lookupField(0)->getHandle();
+ EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle));
+ }
+ { // single phrase
+ test::QueryEnvironment qe;
+ std::vector<SimpleTermData> & terms = qe.getTerms();
+ MatchDataLayout mdl;
+ terms.push_back(SimpleTermData());
+ terms.back().setUniqueId(1);
+ terms.back().setPhraseLength(3);
+ terms.back().addField(0).setHandle(mdl.allocTermField(0));
+ terms.back().addField(7).setHandle(mdl.allocTermField(7));
+ MatchData::UP md = mdl.createMatchData();
+ PhraseSplitter ps(qe, 7);
+ ASSERT_TRUE(ps.getNumTerms() == 3);
+ ps.update(*md);
+ // check that all is served from the splitter
+ for (size_t i = 0; i < 3; ++i) {
+ // fprintf(stderr, "checking term %d\n", (int)i);
+ const ITermData *td = ps.getTerm(i);
+ EXPECT_NOT_EQUAL(td, &terms[0]);
+ EXPECT_NOT_EQUAL(td->lookupField(7), (ITermFieldData *)0);
+ EXPECT_EQUAL(td->lookupField(0), (ITermFieldData *)0);
+ TEST_DO(assertTermData(td, 1, 1, 7, i + 4)); // skipHandles = 4
+ EXPECT_NOT_EQUAL(td->lookupField(7)->getHandle(),
+ terms[0].lookupField(7)->getHandle());
+ EXPECT_NOT_EQUAL(ps.resolveTermField(td->lookupField(7)->getHandle()),
+ md->resolveTermField(terms[0].lookupField(7)->getHandle()));
+ }
+ }
+ { // combination
+ test::QueryEnvironment qe;
+ std::vector<SimpleTermData> &terms = qe.getTerms();
+ MatchDataLayout mdl;
+ for (size_t i = 0; i < 3; ++i) {
+ terms.push_back(SimpleTermData());
+ terms.back().setUniqueId(i);
+ terms.back().setPhraseLength(1);
+ terms.back().addField(4).setHandle(mdl.allocTermField(4));
+ terms.back().addField(7).setHandle(mdl.allocTermField(7));
+ // fprintf(stderr, "setup B term %p #f %zd\n", &terms.back(), terms.back().numFields());
+ }
+ terms[1].setPhraseLength(3);
+ MatchData::UP md = mdl.createMatchData();
+ PhraseSplitter ps(qe, 4);
+ ASSERT_TRUE(ps.getNumTerms() == 5);
+ ps.update(*md);
+ { // first term
+ // fprintf(stderr, "first term\n");
+ EXPECT_EQUAL(ps.getTerm(0), &terms[0]);
+ TEST_DO(assertTermData(ps.getTerm(0), 0, 1, 4, 0));
+ TEST_DO(assertTermData(ps.getTerm(0), 0, 1, 7, 1));
+
+ TermFieldHandle handle = terms[0].lookupField(4)->getHandle();
+ EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle));
+ handle = terms[0].lookupField(7)->getHandle();
+ EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle));
+ }
+ for (size_t i = 0; i < 3; ++i) { // phrase
+ // fprintf(stderr, "phrase term %zd\n", i);
+ const ITermData *td = ps.getTerm(i + 1);
+ EXPECT_NOT_EQUAL(td, &terms[1]);
+ TEST_DO(assertTermData(td, 1, 1, 4, i + 11)); // skipHandles == 11
+ EXPECT_EQUAL(td->lookupField(7), (ITermFieldData *)0);
+ EXPECT_NOT_EQUAL(ps.resolveTermField(td->lookupField(4)->getHandle()),
+ md->resolveTermField(terms[1].lookupField(4)->getHandle()));
+ }
+ { // last term
+ // fprintf(stderr, "last term\n");
+ EXPECT_EQUAL(ps.getTerm(4), &terms[2]);
+ TEST_DO(assertTermData(ps.getTerm(4), 2, 1, 4, 4));
+ TEST_DO(assertTermData(ps.getTerm(4), 2, 1, 7, 5));
+
+ // fprintf(stderr, "inspect term %p #f %zd\n", &terms[2], terms[2].numFields());
+ fflush(stderr);
+ TermFieldHandle handle = terms[2].lookupField(4)->getHandle();
+ EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle));
+ }
+ }
+}
+
+void
+PhraseSplitterTest::testSplitterUpdate()
+{
+ {
+ test::QueryEnvironment qe;
+ std::vector<SimpleTermData> &terms = qe.getTerms();
+ MatchDataLayout mdl;
+ for (size_t i = 0; i < 3; ++i) {
+ terms.push_back(SimpleTermData());
+ terms.back().setUniqueId(i);
+ terms.back().setPhraseLength(1);
+ terms.back().addField(0).setHandle(mdl.allocTermField(0));
+ }
+ terms[0].setPhraseLength(2);
+ terms[2].setPhraseLength(2);
+ MatchData::UP md = mdl.createMatchData();
+ PhraseSplitter ps(qe, 0);
+ ASSERT_TRUE(ps.getNumTerms() == 5);
+ { // first phrase
+ TermFieldMatchData * tmd = md->resolveTermField(terms[0].lookupField(0)->getHandle());
+ tmd->appendPosition(TermFieldMatchDataPosition(0, 10, 0, 1000));
+ }
+ { // first term
+ TermFieldMatchData * tmd = md->resolveTermField(terms[1].lookupField(0)->getHandle());
+ tmd->appendPosition(TermFieldMatchDataPosition(0, 20, 0, 1000));
+ }
+ { // second phrase
+ TermFieldMatchData * tmd = md->resolveTermField(terms[2].lookupField(0)->getHandle());
+ tmd->appendPosition(TermFieldMatchDataPosition(0, 30, 0, 1000));
+ }
+ ps.update(*md);
+ for (size_t i = 0; i < 2; ++i) { // first phrase
+ const TermFieldMatchData * tmd = ps.resolveTermField(ps.getTerm(i)->lookupField(0)->getHandle());
+ TermFieldMatchData::PositionsIterator itr = tmd->begin();
+ EXPECT_EQUAL((itr++)->getPosition(), 10 + i);
+ ASSERT_TRUE(itr == tmd->end());
+ }
+ { // first term
+ TermFieldMatchData * tmd = md->resolveTermField(ps.getTerm(2)->lookupField(0)->getHandle());
+ TermFieldMatchData::PositionsIterator itr = tmd->begin();
+ EXPECT_EQUAL((itr++)->getPosition(), 20u);
+ ASSERT_TRUE(itr == tmd->end());
+ }
+ for (size_t i = 0; i < 2; ++i) { // second phrase
+ const TermFieldMatchData * tmd = ps.resolveTermField(ps.getTerm(i + 3)->lookupField(0)->getHandle());
+ TermFieldMatchData::PositionsIterator itr = tmd->begin();
+ EXPECT_EQUAL((itr++)->getPosition(), 30 + i);
+ ASSERT_TRUE(itr == tmd->end());
+ }
+ }
+}
+
+int
+PhraseSplitterTest::Main()
+{
+
+ TEST_INIT("phrasesplitter_test");
+
+ testCopyTermFieldMatchData();
+ testSplitter();
+ testSplitterUpdate();
+
+ TEST_DONE();
+}
+
+}
+}
+
+TEST_APPHOOK(search::fef::PhraseSplitterTest);
diff --git a/searchlib/src/tests/fef/properties/.gitignore b/searchlib/src/tests/fef/properties/.gitignore
new file mode 100644
index 00000000000..00f94794fa3
--- /dev/null
+++ b/searchlib/src/tests/fef/properties/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+properties_test
+searchlib_properties_test_app
diff --git a/searchlib/src/tests/fef/properties/CMakeLists.txt b/searchlib/src/tests/fef/properties/CMakeLists.txt
new file mode 100644
index 00000000000..0b74b10cb31
--- /dev/null
+++ b/searchlib/src/tests/fef/properties/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_properties_test_app
+ SOURCES
+ properties_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_properties_test_app COMMAND searchlib_properties_test_app)
diff --git a/searchlib/src/tests/fef/properties/DESC b/searchlib/src/tests/fef/properties/DESC
new file mode 100644
index 00000000000..02faa4cb727
--- /dev/null
+++ b/searchlib/src/tests/fef/properties/DESC
@@ -0,0 +1 @@
+properties test. Take a look at properties.cpp for details.
diff --git a/searchlib/src/tests/fef/properties/FILES b/searchlib/src/tests/fef/properties/FILES
new file mode 100644
index 00000000000..61054fa62c2
--- /dev/null
+++ b/searchlib/src/tests/fef/properties/FILES
@@ -0,0 +1 @@
+properties.cpp
diff --git a/searchlib/src/tests/fef/properties/properties_test.cpp b/searchlib/src/tests/fef/properties/properties_test.cpp
new file mode 100644
index 00000000000..a08d511b418
--- /dev/null
+++ b/searchlib/src/tests/fef/properties/properties_test.cpp
@@ -0,0 +1,425 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <limits>
+
+using namespace search::fef;
+using namespace search::fef::indexproperties;
+
+struct CopyVisitor : public IPropertiesVisitor
+{
+ Properties &dst;
+ CopyVisitor(Properties &p) : dst(p) {}
+ virtual void visitProperty(const Property::Value &key,
+ const Property &values)
+ {
+ for (uint32_t i = 0; i < values.size(); ++i) {
+ dst.add(key, values.getAt(i));
+ }
+ }
+};
+
+Properties make_props(std::initializer_list<std::pair<const char *, std::initializer_list<const char *> > > entries) {
+ Properties props;
+ for (const auto &entry: entries) {
+ vespalib::string key = entry.first;
+ for (vespalib::string value: entry.second) {
+ props.add(key, value);
+ }
+ }
+ return props;
+}
+
+TEST("require that namespace visitation works") {
+ Properties props = make_props({ {"foo", {"outside"}},
+ {"foo.a", {"a_value"}},
+ {"foo.b", {"b_value"}},
+ {"foo.", {"outside"}}
+ });
+ Properties result;
+ CopyVisitor copy_visitor(result);
+ props.visitNamespace("foo", copy_visitor);
+ EXPECT_EQUAL(2u, result.numKeys());
+ EXPECT_EQUAL(result.lookup("a").get(), Property::Value("a_value"));
+ EXPECT_EQUAL(result.lookup("b").get(), Property::Value("b_value"));
+}
+
+TEST("test stuff") {
+ { // empty lookup result
+ Property p;
+
+ EXPECT_EQUAL(p.found(), false);
+ EXPECT_EQUAL(p.get(), Property::Value(""));
+ EXPECT_EQUAL(p.get("fb"), Property::Value("fb"));
+ EXPECT_EQUAL(p.size(), 0u);
+ EXPECT_EQUAL(p.getAt(0), Property::Value(""));
+ }
+ { // add / count / remove
+ Properties p = make_props({ {"a", {"a1", "a2", "a3"}},
+ {"b", {"b1", "b2"}},
+ {"c", {"c1"}}
+ });
+ const Properties &pc = p;
+
+ EXPECT_EQUAL(pc.numKeys(), 3u);
+ EXPECT_EQUAL(pc.numValues(), 6u);
+ EXPECT_EQUAL(pc.count("a"), 3u);
+ EXPECT_EQUAL(pc.count("b"), 2u);
+ EXPECT_EQUAL(pc.count("c"), 1u);
+ EXPECT_EQUAL(pc.count("d"), 0u);
+
+ p.remove("d");
+
+ EXPECT_EQUAL(pc.numKeys(), 3u);
+ EXPECT_EQUAL(pc.numValues(), 6u);
+ EXPECT_EQUAL(pc.count("a"), 3u);
+ EXPECT_EQUAL(pc.count("b"), 2u);
+ EXPECT_EQUAL(pc.count("c"), 1u);
+ EXPECT_EQUAL(pc.count("d"), 0u);
+
+ p.remove("c");
+
+ EXPECT_EQUAL(pc.numKeys(), 2u);
+ EXPECT_EQUAL(pc.numValues(), 5u);
+ EXPECT_EQUAL(pc.count("a"), 3u);
+ EXPECT_EQUAL(pc.count("b"), 2u);
+ EXPECT_EQUAL(pc.count("c"), 0u);
+ EXPECT_EQUAL(pc.count("d"), 0u);
+
+ p.remove("b");
+
+ EXPECT_EQUAL(pc.numKeys(), 1u);
+ EXPECT_EQUAL(pc.numValues(), 3u);
+ EXPECT_EQUAL(pc.count("a"), 3u);
+ EXPECT_EQUAL(pc.count("b"), 0u);
+ EXPECT_EQUAL(pc.count("c"), 0u);
+ EXPECT_EQUAL(pc.count("d"), 0u);
+
+ p.remove("a");
+
+ EXPECT_EQUAL(pc.numKeys(), 0u);
+ EXPECT_EQUAL(pc.numValues(), 0u);
+ EXPECT_EQUAL(pc.count("a"), 0u);
+ EXPECT_EQUAL(pc.count("b"), 0u);
+ EXPECT_EQUAL(pc.count("c"), 0u);
+ EXPECT_EQUAL(pc.count("d"), 0u);
+ }
+ { // lookup / import / visit / compare / hash
+ Properties p;
+
+ p.add("x", "x1");
+ p.add("a.x", "x2");
+ p.add("a.b.x", "x3");
+ p.add("a.b.c.x", "x4");
+
+ p.add("list", "e1").add("list", "e2").add("list", "e3");
+
+ EXPECT_EQUAL(p.numKeys(), 5u);
+ EXPECT_EQUAL(p.numValues(), 7u);
+
+ EXPECT_EQUAL(p.lookup("x").found(), true);
+ EXPECT_EQUAL(p.lookup("a.x").found(), true);
+ EXPECT_EQUAL(p.lookup("a.b.x").found(), true);
+ EXPECT_EQUAL(p.lookup("a.b.c.x").found(), true);
+ EXPECT_EQUAL(p.lookup("list").found(), true);
+ EXPECT_EQUAL(p.lookup("y").found(), false);
+
+ EXPECT_EQUAL(p.lookup("x").get(), Property::Value("x1"));
+ EXPECT_EQUAL(p.lookup("a.x").get(), Property::Value("x2"));
+ EXPECT_EQUAL(p.lookup("a.b.x").get(), Property::Value("x3"));
+ EXPECT_EQUAL(p.lookup("a.b.c.x").get(), Property::Value("x4"));
+ EXPECT_EQUAL(p.lookup("list").get(), Property::Value("e1"));
+ EXPECT_EQUAL(p.lookup("y").get(), Property::Value(""));
+
+ EXPECT_EQUAL(p.lookup("x").get(), Property::Value("x1"));
+ EXPECT_EQUAL(p.lookup("a", "x").get(), Property::Value("x2"));
+ EXPECT_EQUAL(p.lookup("a", "b", "x").get(), Property::Value("x3"));
+ EXPECT_EQUAL(p.lookup("a", "b", "c", "x").get(), Property::Value("x4"));
+
+ EXPECT_EQUAL(p.lookup("x").get("fallback"), Property::Value("x1"));
+ EXPECT_EQUAL(p.lookup("y").get("fallback"), Property::Value("fallback"));
+
+ EXPECT_EQUAL(p.lookup("y").size(), 0u);
+ EXPECT_EQUAL(p.lookup("x").size(), 1u);
+ EXPECT_EQUAL(p.lookup("list").size(), 3u);
+ EXPECT_EQUAL(p.lookup("list").getAt(0), Property::Value("e1"));
+ EXPECT_EQUAL(p.lookup("list").getAt(1), Property::Value("e2"));
+ EXPECT_EQUAL(p.lookup("list").getAt(2), Property::Value("e3"));
+ EXPECT_EQUAL(p.lookup("list").getAt(3), Property::Value(""));
+
+ Properties p2;
+
+ p2.add("x", "new_x");
+ p2.add("y", "y1");
+ p2.add("list", "foo").add("list", "bar");
+
+ EXPECT_EQUAL(p2.numKeys(), 3u);
+ EXPECT_EQUAL(p2.numValues(), 4u);
+
+ p.import(p2);
+
+ EXPECT_EQUAL(p.numKeys(), 6u);
+ EXPECT_EQUAL(p.numValues(), 7u);
+
+ EXPECT_EQUAL(p.lookup("y").size(), 1u);
+ EXPECT_EQUAL(p.lookup("y").get(), Property::Value("y1"));
+
+ EXPECT_EQUAL(p.lookup("x").size(), 1u);
+ EXPECT_EQUAL(p.lookup("x").get(), Property::Value("new_x"));
+
+ EXPECT_EQUAL(p.lookup("z").size(), 0u);
+
+ EXPECT_EQUAL(p.lookup("a", "x").size(), 1u);
+ EXPECT_EQUAL(p.lookup("a", "x").get(), Property::Value("x2"));
+
+ EXPECT_EQUAL(p.lookup("list").size(), 2u);
+ EXPECT_EQUAL(p.lookup("list").getAt(0), Property::Value("foo"));
+ EXPECT_EQUAL(p.lookup("list").getAt(1), Property::Value("bar"));
+ EXPECT_EQUAL(p.lookup("list").getAt(2), Property::Value(""));
+
+ Properties p3;
+
+ EXPECT_TRUE(!(p == p2));
+ EXPECT_TRUE(!(p == p3));
+ EXPECT_TRUE(!(p2 == p));
+ EXPECT_TRUE(!(p3 == p));
+ EXPECT_TRUE(!(p2 == p3));
+ EXPECT_TRUE(!(p3 == p2));
+
+ CopyVisitor cv(p3);
+ p.visitProperties(cv);
+
+ EXPECT_EQUAL(p3.numKeys(), 6u);
+ EXPECT_EQUAL(p3.numValues(), 7u);
+
+ EXPECT_TRUE(p == p3);
+ EXPECT_TRUE(p3 == p);
+ EXPECT_EQUAL(p.hashCode(), p3.hashCode());
+
+ p.clear();
+ EXPECT_EQUAL(p.numKeys(), 0u);
+ EXPECT_EQUAL(p.numValues(), 0u);
+ EXPECT_TRUE(!(p == p3));
+ EXPECT_TRUE(!(p3 == p));
+
+ Properties p4;
+ CopyVisitor cv2(p4);
+ p.visitProperties(cv);
+ EXPECT_EQUAL(p4.numKeys(), 0u);
+ EXPECT_EQUAL(p4.numValues(), 0u);
+ EXPECT_TRUE(p == p4);
+ EXPECT_TRUE(p4 == p);
+ EXPECT_EQUAL(p.hashCode(), p4.hashCode());
+ }
+
+ { // test index properties known by the framework
+ { // vespa.rank.firstphase
+ EXPECT_EQUAL(rank::FirstPhase::NAME, vespalib::string("vespa.rank.firstphase"));
+ EXPECT_EQUAL(rank::FirstPhase::DEFAULT_VALUE, vespalib::string("nativeRank"));
+ Properties p;
+ EXPECT_EQUAL(rank::FirstPhase::lookup(p), vespalib::string("nativeRank"));
+ p.add("vespa.rank.firstphase", "specialrank");
+ EXPECT_EQUAL(rank::FirstPhase::lookup(p), vespalib::string("specialrank"));
+ }
+ { // vespa.rank.secondphase
+ EXPECT_EQUAL(rank::SecondPhase::NAME, vespalib::string("vespa.rank.secondphase"));
+ EXPECT_EQUAL(rank::SecondPhase::DEFAULT_VALUE, vespalib::string(""));
+ Properties p;
+ EXPECT_EQUAL(rank::SecondPhase::lookup(p), vespalib::string(""));
+ p.add("vespa.rank.secondphase", "specialrank");
+ EXPECT_EQUAL(rank::SecondPhase::lookup(p), vespalib::string("specialrank"));
+ }
+ { // vespa.dump.feature
+ EXPECT_EQUAL(dump::Feature::NAME, vespalib::string("vespa.dump.feature"));
+ EXPECT_EQUAL(dump::Feature::DEFAULT_VALUE.size(), 0u);
+ Properties p;
+ EXPECT_EQUAL(dump::Feature::lookup(p).size(), 0u);
+ p.add("vespa.dump.feature", "foo");
+ p.add("vespa.dump.feature", "bar");
+ std::vector<vespalib::string> a = dump::Feature::lookup(p);
+ ASSERT_TRUE(a.size() == 2);
+ EXPECT_EQUAL(a[0], vespalib::string("foo"));
+ EXPECT_EQUAL(a[1], vespalib::string("bar"));
+ }
+ { // vespa.dump.ignoredefaultfeatures
+ EXPECT_EQUAL(dump::IgnoreDefaultFeatures::NAME, vespalib::string("vespa.dump.ignoredefaultfeatures"));
+ EXPECT_EQUAL(dump::IgnoreDefaultFeatures::DEFAULT_VALUE, "false");
+ Properties p;
+ EXPECT_TRUE(!dump::IgnoreDefaultFeatures::check(p));
+ p.add("vespa.dump.ignoredefaultfeatures", "true");
+ EXPECT_TRUE(dump::IgnoreDefaultFeatures::check(p));
+ }
+ { // vespa.matching.termwise_limit
+ EXPECT_EQUAL(matching::TermwiseLimit::NAME, vespalib::string("vespa.matching.termwise_limit"));
+ EXPECT_EQUAL(matching::TermwiseLimit::DEFAULT_VALUE, 1.0);
+ Properties p;
+ EXPECT_EQUAL(matching::TermwiseLimit::lookup(p), 1.0);
+ p.add("vespa.matching.termwise_limit", "0.05");
+ EXPECT_EQUAL(matching::TermwiseLimit::lookup(p), 0.05);
+ }
+ { // vespa.matching.numthreads
+ EXPECT_EQUAL(matching::NumThreadsPerSearch::NAME, vespalib::string("vespa.matching.numthreadspersearch"));
+ EXPECT_EQUAL(matching::NumThreadsPerSearch::DEFAULT_VALUE, std::numeric_limits<uint32_t>::max());
+ Properties p;
+ EXPECT_EQUAL(matching::NumThreadsPerSearch::lookup(p), std::numeric_limits<uint32_t>::max());
+ p.add("vespa.matching.numthreadspersearch", "50");
+ EXPECT_EQUAL(matching::NumThreadsPerSearch::lookup(p), 50u);
+ }
+ {
+ EXPECT_EQUAL(matching::NumSearchPartitions::NAME, vespalib::string("vespa.matching.numsearchpartitions"));
+ EXPECT_EQUAL(matching::NumSearchPartitions::DEFAULT_VALUE, 1u);
+ Properties p;
+ EXPECT_EQUAL(matching::NumSearchPartitions::lookup(p), 1u);
+ p.add("vespa.matching.numsearchpartitions", "50");
+ EXPECT_EQUAL(matching::NumSearchPartitions::lookup(p), 50u);
+ }
+ { // vespa.matchphase.degradation.attribute
+ EXPECT_EQUAL(matchphase::DegradationAttribute::NAME, vespalib::string("vespa.matchphase.degradation.attribute"));
+ EXPECT_EQUAL(matchphase::DegradationAttribute::DEFAULT_VALUE, "");
+ Properties p;
+ EXPECT_EQUAL(matchphase::DegradationAttribute::lookup(p), "");
+ p.add("vespa.matchphase.degradation.attribute", "foobar");
+ EXPECT_EQUAL(matchphase::DegradationAttribute::lookup(p), "foobar");
+ }
+ { // vespa.matchphase.degradation.ascending
+ EXPECT_EQUAL(matchphase::DegradationAscendingOrder::NAME, vespalib::string("vespa.matchphase.degradation.ascendingorder"));
+ EXPECT_EQUAL(matchphase::DegradationAscendingOrder::DEFAULT_VALUE, false);
+ Properties p;
+ EXPECT_EQUAL(matchphase::DegradationAscendingOrder::lookup(p), false);
+ p.add("vespa.matchphase.degradation.ascendingorder", "true");
+ EXPECT_EQUAL(matchphase::DegradationAscendingOrder::lookup(p), true);
+ }
+ { // vespa.matchphase.degradation.maxhits
+ EXPECT_EQUAL(matchphase::DegradationMaxHits::NAME, vespalib::string("vespa.matchphase.degradation.maxhits"));
+ EXPECT_EQUAL(matchphase::DegradationMaxHits::DEFAULT_VALUE, 0u);
+ Properties p;
+ EXPECT_EQUAL(matchphase::DegradationMaxHits::lookup(p), 0u);
+ p.add("vespa.matchphase.degradation.maxhits", "123789");
+ EXPECT_EQUAL(matchphase::DegradationMaxHits::lookup(p), 123789u);
+ }
+ { // vespa.matchphase.degradation.samplepercentage
+ EXPECT_EQUAL(matchphase::DegradationSamplePercentage::NAME, vespalib::string("vespa.matchphase.degradation.samplepercentage"));
+ EXPECT_EQUAL(matchphase::DegradationSamplePercentage::DEFAULT_VALUE, 0.2);
+ Properties p;
+ EXPECT_EQUAL(matchphase::DegradationSamplePercentage::lookup(p), 0.2);
+ p.add("vespa.matchphase.degradation.samplepercentage", "0.9");
+ EXPECT_EQUAL(matchphase::DegradationSamplePercentage::lookup(p), 0.9);
+ }
+ { // vespa.matchphase.degradation.maxfiltercoverage
+ EXPECT_EQUAL(matchphase::DegradationMaxFilterCoverage::NAME, vespalib::string("vespa.matchphase.degradation.maxfiltercoverage"));
+ EXPECT_EQUAL(matchphase::DegradationMaxFilterCoverage::DEFAULT_VALUE, 1.0);
+ Properties p;
+ EXPECT_EQUAL(matchphase::DegradationMaxFilterCoverage::lookup(p), 1.0);
+ p.add("vespa.matchphase.degradation.maxfiltercoverage", "0.076");
+ EXPECT_EQUAL(matchphase::DegradationMaxFilterCoverage::lookup(p), 0.076);
+ }
+ { // vespa.matchphase.degradation.postfiltermultiplier
+ EXPECT_EQUAL(matchphase::DegradationPostFilterMultiplier::NAME, vespalib::string("vespa.matchphase.degradation.postfiltermultiplier"));
+ EXPECT_EQUAL(matchphase::DegradationPostFilterMultiplier::DEFAULT_VALUE, 1.0);
+ Properties p;
+ EXPECT_EQUAL(matchphase::DegradationPostFilterMultiplier::lookup(p), 1.0);
+ p.add("vespa.matchphase.degradation.postfiltermultiplier", "0.9");
+ EXPECT_EQUAL(matchphase::DegradationPostFilterMultiplier::lookup(p), 0.9);
+ }
+ { // vespa.matchphase.diversity.attribute
+ EXPECT_EQUAL(matchphase::DiversityAttribute::NAME, vespalib::string("vespa.matchphase.diversity.attribute"));
+ EXPECT_EQUAL(matchphase::DiversityAttribute::DEFAULT_VALUE, "");
+ Properties p;
+ EXPECT_EQUAL(matchphase::DiversityAttribute::lookup(p), "");
+ p.add("vespa.matchphase.diversity.attribute", "foobar");
+ EXPECT_EQUAL(matchphase::DiversityAttribute::lookup(p), "foobar");
+ }
+ { // vespa.matchphase.diversity.mingroups
+ EXPECT_EQUAL(matchphase::DiversityMinGroups::NAME, vespalib::string("vespa.matchphase.diversity.mingroups"));
+ EXPECT_EQUAL(matchphase::DiversityMinGroups::DEFAULT_VALUE, 1u);
+ Properties p;
+ EXPECT_EQUAL(matchphase::DiversityMinGroups::lookup(p), 1u);
+ p.add("vespa.matchphase.diversity.mingroups", "5");
+ EXPECT_EQUAL(matchphase::DiversityMinGroups::lookup(p), 5u);
+ }
+ { // vespa.hitcollector.heapsize
+ EXPECT_EQUAL(hitcollector::HeapSize::NAME, vespalib::string("vespa.hitcollector.heapsize"));
+ EXPECT_EQUAL(hitcollector::HeapSize::DEFAULT_VALUE, 100u);
+ Properties p;
+ EXPECT_EQUAL(hitcollector::HeapSize::lookup(p), 100u);
+ p.add("vespa.hitcollector.heapsize", "50");
+ EXPECT_EQUAL(hitcollector::HeapSize::lookup(p), 50u);
+ }
+ { // vespa.hitcollector.arraysize
+ EXPECT_EQUAL(hitcollector::ArraySize::NAME, vespalib::string("vespa.hitcollector.arraysize"));
+ EXPECT_EQUAL(hitcollector::ArraySize::DEFAULT_VALUE, 10000u);
+ Properties p;
+ EXPECT_EQUAL(hitcollector::ArraySize::lookup(p), 10000u);
+ p.add("vespa.hitcollector.arraysize", "50");
+ EXPECT_EQUAL(hitcollector::ArraySize::lookup(p), 50u);
+ }
+ { // vespa.hitcollector.estimatepoint
+ EXPECT_EQUAL(hitcollector::EstimatePoint::NAME, vespalib::string("vespa.hitcollector.estimatepoint"));
+ EXPECT_EQUAL(hitcollector::EstimatePoint::DEFAULT_VALUE, 0xffffffffu);
+ Properties p;
+ EXPECT_EQUAL(hitcollector::EstimatePoint::lookup(p), 0xffffffffu);
+ p.add("vespa.hitcollector.estimatepoint", "50");
+ EXPECT_EQUAL(hitcollector::EstimatePoint::lookup(p), 50u);
+ }
+ { // vespa.hitcollector.estimatelimit
+ EXPECT_EQUAL(hitcollector::EstimateLimit::NAME, vespalib::string("vespa.hitcollector.estimatelimit"));
+ EXPECT_EQUAL(hitcollector::EstimateLimit::DEFAULT_VALUE, 0xffffffffu);
+ Properties p;
+ EXPECT_EQUAL(hitcollector::EstimateLimit::lookup(p), 0xffffffffu);
+ p.add("vespa.hitcollector.estimatelimit", "50");
+ EXPECT_EQUAL(hitcollector::EstimateLimit::lookup(p), 50u);
+ }
+ { // vespa.hitcollector.rankscoredroplimit
+ EXPECT_EQUAL(hitcollector::RankScoreDropLimit::NAME, vespalib::string("vespa.hitcollector.rankscoredroplimit"));
+ search::feature_t got1 = hitcollector::RankScoreDropLimit::DEFAULT_VALUE;
+ EXPECT_TRUE(got1 != got1);
+ Properties p;
+ search::feature_t got2= hitcollector::RankScoreDropLimit::lookup(p);
+ EXPECT_TRUE(got2 != got2);
+ p.add("vespa.hitcollector.rankscoredroplimit", "-123456789.12345");
+ EXPECT_EQUAL(hitcollector::RankScoreDropLimit::lookup(p), -123456789.12345);
+ p.clear().add("vespa.hitcollector.rankscoredroplimit", "123456789.12345");
+ EXPECT_EQUAL(hitcollector::RankScoreDropLimit::lookup(p), 123456789.12345);
+ }
+ { // vespa.fieldweight.
+ EXPECT_EQUAL(FieldWeight::BASE_NAME, vespalib::string("vespa.fieldweight."));
+ EXPECT_EQUAL(FieldWeight::DEFAULT_VALUE, 100u);
+ Properties p;
+ EXPECT_EQUAL(FieldWeight::lookup(p, "foo"), 100u);
+ p.add("vespa.fieldweight.foo", "200");
+ EXPECT_EQUAL(FieldWeight::lookup(p, "foo"), 200u);
+ }
+ { // vespa.isfilterfield.
+ EXPECT_EQUAL(IsFilterField::BASE_NAME, "vespa.isfilterfield.");
+ EXPECT_EQUAL(IsFilterField::DEFAULT_VALUE, "false");
+ Properties p;
+ EXPECT_TRUE(!IsFilterField::check(p, "foo"));
+ p.add("vespa.isfilterfield.foo", "true");
+ EXPECT_TRUE(IsFilterField::check(p, "foo"));
+ EXPECT_TRUE(!IsFilterField::check(p, "bar"));
+ IsFilterField::set(p, "bar");
+ EXPECT_TRUE(IsFilterField::check(p, "bar"));
+ }
+ }
+}
+
+TEST("test attribute type properties")
+{
+ Properties p;
+ p.add("vespa.type.attribute.foo", "tensor(x[10])");
+ EXPECT_EQUAL("tensor(x[10])", type::Attribute::lookup(p, "foo"));
+ EXPECT_EQUAL("", type::Attribute::lookup(p, "bar"));
+}
+
+TEST("test query feature type properties")
+{
+ Properties p;
+ p.add("vespa.type.query.foo", "tensor(x[10])");
+ EXPECT_EQUAL("tensor(x[10])", type::QueryFeature::lookup(p, "foo"));
+ EXPECT_EQUAL("", type::QueryFeature::lookup(p, "bar"));
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/fef/rank_program/.gitignore b/searchlib/src/tests/fef/rank_program/.gitignore
new file mode 100644
index 00000000000..b86a29e139f
--- /dev/null
+++ b/searchlib/src/tests/fef/rank_program/.gitignore
@@ -0,0 +1 @@
+searchlib_rank_program_test_app
diff --git a/searchlib/src/tests/fef/rank_program/CMakeLists.txt b/searchlib/src/tests/fef/rank_program/CMakeLists.txt
new file mode 100644
index 00000000000..12d971a9421
--- /dev/null
+++ b/searchlib/src/tests/fef/rank_program/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_rank_program_test_app
+ SOURCES
+ rank_program_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_rank_program_test_app COMMAND searchlib_rank_program_test_app)
diff --git a/searchlib/src/tests/fef/rank_program/FILES b/searchlib/src/tests/fef/rank_program/FILES
new file mode 100644
index 00000000000..bf6e4665a68
--- /dev/null
+++ b/searchlib/src/tests/fef/rank_program/FILES
@@ -0,0 +1 @@
+rank_program_test.cpp
diff --git a/searchlib/src/tests/fef/rank_program/rank_program_test.cpp b/searchlib/src/tests/fef/rank_program/rank_program_test.cpp
new file mode 100644
index 00000000000..baf665c58e8
--- /dev/null
+++ b/searchlib/src/tests/fef/rank_program/rank_program_test.cpp
@@ -0,0 +1,172 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/features/valuefeature.h>
+#include <vespa/searchlib/fef/blueprintfactory.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/fef/test/plugin/sum.h>
+#include <vespa/searchlib/fef/rank_program.h>
+
+using namespace search::fef;
+using namespace search::fef::test;
+using namespace search::features;
+
+struct ImpureValueExecutor : FeatureExecutor {
+ double value;
+ ImpureValueExecutor(double value_in) : value(value_in) {}
+ bool isPure() override { return false; }
+ void execute(search::fef::MatchData &md) override { *md.resolveFeature(outputs()[0]) = value; }
+};
+
+struct ImpureValueBlueprint : Blueprint {
+ double value;
+ ImpureValueBlueprint() : Blueprint("ivalue"), value(31212.0) {}
+ void visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const override {}
+ Blueprint::UP createInstance() const override { return Blueprint::UP(new ImpureValueBlueprint()); }
+ bool setup(const IIndexEnvironment &, const std::vector<vespalib::string> &params) override {
+ ASSERT_EQUAL(1u, params.size());
+ value = strtod(params[0].c_str(), nullptr);
+ describeOutput("out", "the impure value");
+ return true;
+ }
+ FeatureExecutor::LP createExecutor(const IQueryEnvironment &) const override {
+ return FeatureExecutor::LP(new ImpureValueExecutor(value));
+ }
+};
+
+struct MySetup {
+ BlueprintFactory factory;
+ IndexEnvironment indexEnv;
+ BlueprintResolver::SP resolver;
+ Properties overrides;
+ RankProgram program;
+ MySetup() : factory(), indexEnv(), resolver(new BlueprintResolver(factory, indexEnv)),
+ overrides(), program(resolver)
+ {
+ factory.addPrototype(Blueprint::SP(new ValueBlueprint()));
+ factory.addPrototype(Blueprint::SP(new ImpureValueBlueprint()));
+ factory.addPrototype(Blueprint::SP(new SumBlueprint()));
+ }
+ MySetup &add(const vespalib::string &feature) {
+ resolver->addSeed(feature);
+ return *this;
+ }
+ MySetup &override(const vespalib::string &feature, double value) {
+ overrides.add(feature, vespalib::make_string("%g", value));
+ return *this;
+ }
+ MySetup &compile() {
+ ASSERT_TRUE(resolver->compile());
+ MatchDataLayout mdl;
+ QueryEnvironment queryEnv(&indexEnv);
+ program.setup(mdl, queryEnv, overrides);
+ return *this;
+ }
+ MySetup &run() {
+ program.run(1);
+ return *this;
+ }
+ double get() {
+ std::vector<vespalib::string> names;
+ std::vector<FeatureHandle> handles;
+ program.get_seed_handles(names, handles);
+ EXPECT_EQUAL(1u, names.size());
+ EXPECT_EQUAL(names.size(), handles.size());
+ return *program.match_data().resolveFeature(handles[0]);
+ }
+ double get(const vespalib::string &feature) {
+ std::vector<vespalib::string> names;
+ std::vector<FeatureHandle> handles;
+ program.get_seed_handles(names, handles);
+ EXPECT_EQUAL(names.size(), handles.size());
+ for (size_t i = 0; i < names.size(); ++i) {
+ if (names[i] == feature) {
+ return *program.match_data().resolveFeature(handles[i]);
+ }
+ }
+ return 31212.0;
+ }
+ std::map<vespalib::string, double> all() {
+ std::map<vespalib::string, double> result;
+ std::vector<vespalib::string> names;
+ std::vector<FeatureHandle> handles;
+ program.get_seed_handles(names, handles);
+ EXPECT_EQUAL(names.size(), handles.size());
+ for (size_t i = 0; i < names.size(); ++i) {
+ result[names[i]] = *program.match_data().resolveFeature(handles[i]);
+ }
+ return result;
+ }
+};
+
+TEST_F("require that match data docid is set by run", MySetup()) {
+ f1.compile();
+ EXPECT_NOT_EQUAL(1u, f1.program.match_data().getDocId());
+ f1.run();
+ EXPECT_EQUAL(1u, f1.program.match_data().getDocId());
+}
+
+TEST_F("require that simple program works", MySetup()) {
+ EXPECT_EQUAL(15.0, f1.add("mysum(value(10),ivalue(5))").compile().run().get());
+ EXPECT_EQUAL(3u, f1.program.num_executors());
+ EXPECT_EQUAL(2u, f1.program.program_size());
+}
+
+TEST_F("require that const features are calculated during setup", MySetup()) {
+ f1.add("mysum(value(10),value(5))").compile();
+ EXPECT_EQUAL(15.0, f1.get());
+ EXPECT_EQUAL(3u, f1.program.num_executors());
+ EXPECT_EQUAL(0u, f1.program.program_size());
+}
+
+TEST_F("require that non-const features are calculated during run", MySetup()) {
+ f1.add("mysum(ivalue(10),ivalue(5))").compile();
+ EXPECT_EQUAL(0.0, f1.get());
+ f1.run();
+ EXPECT_EQUAL(15.0, f1.get());
+ EXPECT_EQUAL(3u, f1.program.num_executors());
+ EXPECT_EQUAL(3u, f1.program.program_size());
+}
+
+TEST_F("require that a single program can calculate multiple output features", MySetup()) {
+ f1.add("value(1)").add("ivalue(2)").add("ivalue(3)");
+ f1.add("mysum(value(1),value(2),ivalue(3))");
+ f1.compile().run();
+ EXPECT_EQUAL(5u, f1.program.num_executors());
+ EXPECT_EQUAL(3u, f1.program.program_size());
+ EXPECT_EQUAL(5u, f1.program.match_data().getNumFeatures());
+ auto result = f1.all();
+ EXPECT_EQUAL(4u, result.size());
+ EXPECT_EQUAL(1.0, result["value(1)"]);
+ EXPECT_EQUAL(2.0, result["ivalue(2)"]);
+ EXPECT_EQUAL(3.0, result["ivalue(3)"]);
+ EXPECT_EQUAL(6.0, result["mysum(value(1),value(2),ivalue(3))"]);
+}
+
+TEST_F("require that a single executor can produce multiple features", MySetup()) {
+ f1.add("mysum(value(1,2,3).0,value(1,2,3).1,value(1,2,3).2)");
+ EXPECT_EQUAL(6.0, f1.compile().run().get());
+ EXPECT_EQUAL(2u, f1.program.num_executors());
+ EXPECT_EQUAL(0u, f1.program.program_size());
+ EXPECT_EQUAL(4u, f1.program.match_data().getNumFeatures());
+}
+
+TEST_F("require that feature values can be overridden", MySetup()) {
+ f1.add("value(1)").add("ivalue(2)").add("ivalue(3)");
+ f1.add("mysum(value(1),value(2),ivalue(3))");
+ f1.override("value(2)", 20.0).override("ivalue(3)", 30.0);
+ f1.compile().run();
+ EXPECT_EQUAL(5u, f1.program.num_executors());
+ EXPECT_EQUAL(3u, f1.program.program_size());
+ EXPECT_EQUAL(5u, f1.program.match_data().getNumFeatures());
+ auto result = f1.all();
+ EXPECT_EQUAL(4u, result.size());
+ EXPECT_EQUAL(1.0, result["value(1)"]);
+ EXPECT_EQUAL(2.0, result["ivalue(2)"]);
+ EXPECT_EQUAL(30.0, result["ivalue(3)"]);
+ EXPECT_EQUAL(51.0, result["mysum(value(1),value(2),ivalue(3))"]);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/fef/resolver/.gitignore b/searchlib/src/tests/fef/resolver/.gitignore
new file mode 100644
index 00000000000..57114e69298
--- /dev/null
+++ b/searchlib/src/tests/fef/resolver/.gitignore
@@ -0,0 +1,4 @@
+*_test
+.depend
+Makefile
+searchlib_resolver_test_app
diff --git a/searchlib/src/tests/fef/resolver/CMakeLists.txt b/searchlib/src/tests/fef/resolver/CMakeLists.txt
new file mode 100644
index 00000000000..835a50fd6fb
--- /dev/null
+++ b/searchlib/src/tests/fef/resolver/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_resolver_test_app
+ SOURCES
+ resolver_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_resolver_test_app COMMAND searchlib_resolver_test_app)
diff --git a/searchlib/src/tests/fef/resolver/DESC b/searchlib/src/tests/fef/resolver/DESC
new file mode 100644
index 00000000000..7d3262ab110
--- /dev/null
+++ b/searchlib/src/tests/fef/resolver/DESC
@@ -0,0 +1 @@
+resolver test. Take a look at resolver_test.cpp for details.
diff --git a/searchlib/src/tests/fef/resolver/FILES b/searchlib/src/tests/fef/resolver/FILES
new file mode 100644
index 00000000000..c40c0663848
--- /dev/null
+++ b/searchlib/src/tests/fef/resolver/FILES
@@ -0,0 +1 @@
+resolver_test.cpp
diff --git a/searchlib/src/tests/fef/resolver/resolver_test.cpp b/searchlib/src/tests/fef/resolver/resolver_test.cpp
new file mode 100644
index 00000000000..3d791f886e1
--- /dev/null
+++ b/searchlib/src/tests/fef/resolver/resolver_test.cpp
@@ -0,0 +1,93 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("resolver_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+
+namespace search {
+namespace fef {
+
+class BaseBlueprint : public Blueprint {
+public:
+ BaseBlueprint() : Blueprint("base") { }
+ virtual void visitDumpFeatures(const IIndexEnvironment &,
+ IDumpFeatureVisitor &) const {}
+ virtual Blueprint::UP createInstance() const { return Blueprint::UP(new BaseBlueprint()); }
+ virtual bool setup(const IIndexEnvironment & indexEnv,
+ const ParameterList & params) {
+ (void) indexEnv; (void) params;
+ describeOutput("foo", "foo");
+ describeOutput("bar", "bar");
+ describeOutput("baz", "baz");
+ return true;
+ }
+ virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment &) const {
+ return FeatureExecutor::LP(NULL);
+ }
+};
+
+class CombineBlueprint : public Blueprint {
+public:
+ CombineBlueprint() : Blueprint("combine") { }
+ virtual void visitDumpFeatures(const IIndexEnvironment &,
+ IDumpFeatureVisitor &) const {}
+ virtual Blueprint::UP createInstance() const { return Blueprint::UP(new CombineBlueprint()); }
+ virtual bool setup(const IIndexEnvironment & indexEnv,
+ const ParameterList & params) {
+ (void) indexEnv; (void) params;
+ defineInput("base.foo");
+ defineInput("base.bar");
+ defineInput("base.baz");
+ describeOutput("out", "out");
+ return true;
+ }
+ virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment &) const {
+ return FeatureExecutor::LP(NULL);
+ }
+};
+
+class Test : public vespalib::TestApp {
+private:
+ BlueprintFactory _factory;
+ void requireThatWeGetUniqueBlueprints();
+public:
+ Test();
+ int Main();
+};
+
+Test::Test() :
+ _factory()
+{
+ _factory.addPrototype(Blueprint::SP(new BaseBlueprint()));
+ _factory.addPrototype(Blueprint::SP(new CombineBlueprint()));
+}
+
+void
+Test::requireThatWeGetUniqueBlueprints()
+{
+ test::IndexEnvironment ienv;
+ BlueprintResolver::SP res(new BlueprintResolver(_factory, ienv));
+ res->addSeed("combine");
+ EXPECT_TRUE(res->compile());
+ const BlueprintResolver::ExecutorSpecList & spec = res->getExecutorSpecs();
+ EXPECT_EQUAL(2u, spec.size());
+ EXPECT_TRUE(dynamic_cast<BaseBlueprint *>(spec[0].blueprint.get()) != NULL);
+ EXPECT_TRUE(dynamic_cast<CombineBlueprint *>(spec[1].blueprint.get()) != NULL);
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("resolver_test");
+
+ requireThatWeGetUniqueBlueprints();
+
+ TEST_DONE();
+}
+
+}
+}
+
+TEST_APPHOOK(search::fef::Test);
diff --git a/searchlib/src/tests/fef/table/.gitignore b/searchlib/src/tests/fef/table/.gitignore
new file mode 100644
index 00000000000..b89a30490e0
--- /dev/null
+++ b/searchlib/src/tests/fef/table/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+table_test
+searchlib_table_test_app
diff --git a/searchlib/src/tests/fef/table/CMakeLists.txt b/searchlib/src/tests/fef/table/CMakeLists.txt
new file mode 100644
index 00000000000..ca61eb7c365
--- /dev/null
+++ b/searchlib/src/tests/fef/table/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_table_test_app
+ SOURCES
+ table_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_table_test_app COMMAND searchlib_table_test_app)
diff --git a/searchlib/src/tests/fef/table/DESC b/searchlib/src/tests/fef/table/DESC
new file mode 100644
index 00000000000..65834ed1305
--- /dev/null
+++ b/searchlib/src/tests/fef/table/DESC
@@ -0,0 +1 @@
+table test. Take a look at table.cpp for details.
diff --git a/searchlib/src/tests/fef/table/FILES b/searchlib/src/tests/fef/table/FILES
new file mode 100644
index 00000000000..40be726aeb8
--- /dev/null
+++ b/searchlib/src/tests/fef/table/FILES
@@ -0,0 +1 @@
+table.cpp
diff --git a/searchlib/src/tests/fef/table/table_test.cpp b/searchlib/src/tests/fef/table/table_test.cpp
new file mode 100644
index 00000000000..2d05e0c7310
--- /dev/null
+++ b/searchlib/src/tests/fef/table/table_test.cpp
@@ -0,0 +1,159 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("tablemanager_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <fstream>
+#include <limits>
+#include <iostream>
+#include <vespa/searchlib/fef/filetablefactory.h>
+#include <vespa/searchlib/fef/functiontablefactory.h>
+#include <vespa/searchlib/fef/table.h>
+#include <vespa/searchlib/fef/tablemanager.h>
+
+namespace search {
+namespace fef {
+
+class TableTest : public vespalib::TestApp
+{
+private:
+ bool assertTable(const Table & act, const Table & exp);
+ bool assertCreateTable(const ITableFactory & tf, const vespalib::string & name, const Table & exp);
+ void testTable();
+ void testFileTableFactory();
+ void testFunctionTableFactory();
+ void testTableManager();
+
+public:
+ int Main();
+};
+
+bool
+TableTest::assertTable(const Table & act, const Table & exp)
+{
+ if (!EXPECT_EQUAL(act.size(), exp.size())) return false;
+ for (size_t i = 0; i < act.size(); ++i) {
+ if (!EXPECT_APPROX(act[i], exp[i], 0.01)) return false;
+ }
+ return true;
+}
+
+bool
+TableTest::assertCreateTable(const ITableFactory & tf, const vespalib::string & name, const Table & exp)
+{
+ Table::SP t = tf.createTable(name);
+ if (!EXPECT_TRUE(t.get() != NULL)) return false;
+ return assertTable(*t, exp);
+}
+
+void
+TableTest::testTable()
+{
+ Table t;
+ EXPECT_EQUAL(t.size(), 0u);
+ EXPECT_EQUAL(t.max(), -std::numeric_limits<double>::max());
+ t.add(1).add(2);
+ EXPECT_EQUAL(t.size(), 2u);
+ EXPECT_EQUAL(t.max(), 2);
+ EXPECT_EQUAL(t[0], 1);
+ EXPECT_EQUAL(t[1], 2);
+ t.add(10);
+ EXPECT_EQUAL(t.size(), 3u);
+ EXPECT_EQUAL(t.max(), 10);
+ EXPECT_EQUAL(t[2], 10);
+ t.add(5);
+ EXPECT_EQUAL(t.size(), 4u);
+ EXPECT_EQUAL(t.max(), 10);
+ EXPECT_EQUAL(t[3], 5);
+}
+
+void
+TableTest::testFileTableFactory()
+{
+ {
+ FileTableFactory ftf("tables1");
+ EXPECT_TRUE(assertCreateTable(ftf, "a", Table().add(1.5).add(2.25).add(3)));
+ EXPECT_TRUE(ftf.createTable("b").get() == NULL);
+ }
+ {
+ FileTableFactory ftf("tables1/");
+ EXPECT_TRUE(ftf.createTable("a").get() != NULL);
+ }
+}
+
+void
+TableTest::testFunctionTableFactory()
+{
+ FunctionTableFactory ftf(2);
+ EXPECT_TRUE(assertCreateTable(ftf, "expdecay(400,12)",
+ Table().add(400).add(368.02)));
+ EXPECT_TRUE(assertCreateTable(ftf, "loggrowth(1000,5000,1)",
+ Table().add(5000).add(5693.15)));
+ EXPECT_TRUE(assertCreateTable(ftf, "linear(10,100)",
+ Table().add(100).add(110)));
+ // specify table size
+ EXPECT_TRUE(assertCreateTable(ftf, "expdecay(400,12,3)",
+ Table().add(400).add(368.02).add(338.60)));
+ EXPECT_TRUE(assertCreateTable(ftf, "loggrowth(1000,5000,1,3)",
+ Table().add(5000).add(5693.15).add(6098.61)));
+ EXPECT_TRUE(assertCreateTable(ftf, "linear(10,100,3)",
+ Table().add(100).add(110).add(120)));
+ EXPECT_TRUE(ftf.createTable("expdecay()").get() == NULL);
+ EXPECT_TRUE(ftf.createTable("expdecay(10)").get() == NULL);
+ EXPECT_TRUE(ftf.createTable("loggrowth()").get() == NULL);
+ EXPECT_TRUE(ftf.createTable("linear()").get() == NULL);
+ EXPECT_TRUE(ftf.createTable("none").get() == NULL);
+ EXPECT_TRUE(ftf.createTable("none(").get() == NULL);
+ EXPECT_TRUE(ftf.createTable("none)").get() == NULL);
+ EXPECT_TRUE(ftf.createTable("none)(").get() == NULL);
+}
+
+void
+TableTest::testTableManager()
+{
+ {
+ TableManager tm;
+ tm.addFactory(ITableFactory::SP(new FileTableFactory("tables1")));
+ tm.addFactory(ITableFactory::SP(new FileTableFactory("tables2")));
+
+ {
+ const Table * t = tm.getTable("a"); // from tables1
+ ASSERT_TRUE(t != NULL);
+ EXPECT_TRUE(assertTable(*t, Table().add(1.5).add(2.25).add(3)));
+ EXPECT_TRUE(t == tm.getTable("a"));
+ }
+ {
+ const Table * t = tm.getTable("b"); // from tables2
+ ASSERT_TRUE(t != NULL);
+ EXPECT_TRUE(assertTable(*t, Table().add(40).add(50).add(60)));
+ EXPECT_TRUE(t == tm.getTable("b"));
+ }
+ {
+ EXPECT_TRUE(tm.getTable("c") == NULL);
+ EXPECT_TRUE(tm.getTable("c") == NULL);
+ }
+ }
+ {
+ TableManager tm;
+ ASSERT_TRUE(tm.getTable("a") == NULL);
+ }
+}
+
+int
+TableTest::Main()
+{
+ TEST_INIT("table_test");
+
+ testTable();
+ testFileTableFactory();
+ testFunctionTableFactory();
+ testTableManager();
+
+ TEST_DONE();
+}
+
+}
+}
+
+TEST_APPHOOK(search::fef::TableTest);
diff --git a/searchlib/src/tests/fef/table/tables1/a b/searchlib/src/tests/fef/table/tables1/a
new file mode 100644
index 00000000000..c46f4d59a71
--- /dev/null
+++ b/searchlib/src/tests/fef/table/tables1/a
@@ -0,0 +1,3 @@
+1.5
+2.25
+3
diff --git a/searchlib/src/tests/fef/table/tables2/a b/searchlib/src/tests/fef/table/tables2/a
new file mode 100644
index 00000000000..300ed6fcd17
--- /dev/null
+++ b/searchlib/src/tests/fef/table/tables2/a
@@ -0,0 +1,3 @@
+10
+20
+30
diff --git a/searchlib/src/tests/fef/table/tables2/b b/searchlib/src/tests/fef/table/tables2/b
new file mode 100644
index 00000000000..6f98b52f55f
--- /dev/null
+++ b/searchlib/src/tests/fef/table/tables2/b
@@ -0,0 +1,3 @@
+40
+50
+60
diff --git a/searchlib/src/tests/fef/termfieldmodel/.gitignore b/searchlib/src/tests/fef/termfieldmodel/.gitignore
new file mode 100644
index 00000000000..0f860efa14a
--- /dev/null
+++ b/searchlib/src/tests/fef/termfieldmodel/.gitignore
@@ -0,0 +1,4 @@
+*_test
+.depend
+Makefile
+searchlib_termfieldmodel_test_app
diff --git a/searchlib/src/tests/fef/termfieldmodel/CMakeLists.txt b/searchlib/src/tests/fef/termfieldmodel/CMakeLists.txt
new file mode 100644
index 00000000000..c8a678c11bb
--- /dev/null
+++ b/searchlib/src/tests/fef/termfieldmodel/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_termfieldmodel_test_app
+ SOURCES
+ termfieldmodel_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_termfieldmodel_test_app COMMAND searchlib_termfieldmodel_test_app)
diff --git a/searchlib/src/tests/fef/termfieldmodel/DESC b/searchlib/src/tests/fef/termfieldmodel/DESC
new file mode 100644
index 00000000000..2c8df5a8aab
--- /dev/null
+++ b/searchlib/src/tests/fef/termfieldmodel/DESC
@@ -0,0 +1 @@
+termfieldmodel test. Take a look at termfieldmodel_test.cpp for details.
diff --git a/searchlib/src/tests/fef/termfieldmodel/FILES b/searchlib/src/tests/fef/termfieldmodel/FILES
new file mode 100644
index 00000000000..b5440335bc6
--- /dev/null
+++ b/searchlib/src/tests/fef/termfieldmodel/FILES
@@ -0,0 +1 @@
+termfieldmodel_test.cpp
diff --git a/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp b/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp
new file mode 100644
index 00000000000..26a02d38adf
--- /dev/null
+++ b/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp
@@ -0,0 +1,209 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("termfieldmodel_test");
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+
+#include <algorithm>
+
+using namespace search::fef;
+
+struct State {
+ SimpleTermData term;
+ MatchData::UP md;
+ TermFieldMatchData *f3;
+ TermFieldMatchData *f5;
+ TermFieldMatchData *f7;
+ TermFieldMatchDataArray array;
+
+ State() : term(), md(), f3(0), f5(0), f7(0), array() {}
+
+ void setArray(TermFieldMatchDataArray value) {
+ array = value;
+ }
+};
+
+void testInvalidId() {
+ const TermFieldMatchData empty;
+ using search::queryeval::SearchIterator;
+
+ EXPECT_EQUAL(TermFieldMatchData::invalidId(), empty.getDocId());
+ EXPECT_TRUE(TermFieldMatchData::invalidId() < (SearchIterator::beginId() + 1 ) ||
+ TermFieldMatchData::invalidId() > (search::endDocId - 1));
+}
+
+void testSetup(State &state) {
+ MatchDataLayout layout;
+
+ state.term.addField(3); // docfreq = 1
+ state.term.addField(7); // docfreq = 2
+ state.term.addField(5); // docfreq = 3
+
+ typedef search::fef::ITermFieldRangeAdapter FRA;
+ typedef search::fef::SimpleTermFieldRangeAdapter SFR;
+
+ // lookup terms
+ {
+ int i = 1;
+ for (SFR iter(state.term); iter.valid(); iter.next()) {
+ iter.get().setDocFreq(0.25 * i++);
+ }
+ }
+
+ // reserve handles
+ {
+ for (SFR iter(state.term); iter.valid(); iter.next()) {
+ iter.get().setHandle(layout.allocTermField(iter.get().getFieldId()));
+ }
+ }
+
+ state.md = layout.createMatchData();
+
+ // init match data
+ {
+ for (FRA iter(state.term); iter.valid(); iter.next()) {
+ const ITermFieldData& tfd = iter.get();
+
+ TermFieldHandle handle = tfd.getHandle();
+ TermFieldMatchData *data = state.md->resolveTermField(handle);
+ switch (tfd.getFieldId()) {
+ case 3:
+ state.f3 = data;
+ break;
+ case 5:
+ state.f5 = data;
+ break;
+ case 7:
+ state.f7 = data;
+ break;
+ default:
+ EXPECT_TRUE(false);
+ }
+ }
+ EXPECT_EQUAL(3u, state.f3->getFieldId());
+ EXPECT_EQUAL(5u, state.f5->getFieldId());
+ EXPECT_EQUAL(7u, state.f7->getFieldId());
+ }
+
+ // test that we can setup array
+ EXPECT_EQUAL(false, state.array.valid());
+ state.setArray(TermFieldMatchDataArray().add(state.f3).add(state.f5).add(state.f7));
+ EXPECT_EQUAL(true, state.array.valid());
+}
+
+void testGenerate(State &state) {
+ // verify array
+ EXPECT_EQUAL(3u, state.array.size());
+ EXPECT_EQUAL(state.f3, state.array[0]);
+ EXPECT_EQUAL(state.f5, state.array[1]);
+ EXPECT_EQUAL(state.f7, state.array[2]);
+
+ // stale unpacked data
+ state.f5->reset(5);
+ EXPECT_EQUAL(5u, state.f5->getDocId());
+ {
+ TermFieldMatchDataPosition pos;
+ pos.setPosition(3);
+ pos.setElementId(0);
+ pos.setElementLen(10);
+ state.f5->appendPosition(pos);
+ EXPECT_EQUAL(1u, state.f5->getIterator().size());
+ EXPECT_EQUAL(10u, state.f5->getIterator().getFieldLength());
+ }
+ state.f5->reset(6);
+ EXPECT_EQUAL(6u, state.f5->getDocId());
+ EXPECT_EQUAL(FieldPositionsIterator::UNKNOWN_LENGTH,
+ state.f5->getIterator().getFieldLength());
+ EXPECT_EQUAL(0u, state.f5->getIterator().size());
+
+
+ // fresh unpacked data
+ state.md->setDocId(10);
+ state.f3->reset(10);
+ {
+ TermFieldMatchDataPosition pos;
+ pos.setPosition(3);
+ pos.setElementId(0);
+ pos.setElementLen(10);
+ EXPECT_EQUAL(FieldPositionsIterator::UNKNOWN_LENGTH,
+ state.f3->getIterator().getFieldLength());
+ state.f3->appendPosition(pos);
+ EXPECT_EQUAL(10u, state.f3->getIterator().getFieldLength());
+ }
+ {
+ TermFieldMatchDataPosition pos;
+ pos.setPosition(15);
+ pos.setElementId(1);
+ pos.setElementLen(20);
+ state.f3->appendPosition(pos);
+ EXPECT_EQUAL(20u, state.f3->getIterator().getFieldLength());
+ }
+ {
+ TermFieldMatchDataPosition pos;
+ pos.setPosition(1);
+ pos.setElementId(2);
+ pos.setElementLen(5);
+ state.f3->appendPosition(pos);
+ EXPECT_EQUAL(20u, state.f3->getIterator().getFieldLength());
+ }
+
+ // raw score
+ state.f7->setRawScore(10, 5.0);
+}
+
+void testAnalyze(State &state) {
+ EXPECT_EQUAL(state.md->getDocId(), state.f3->getDocId());
+ EXPECT_NOT_EQUAL(state.md->getDocId(), state.f5->getDocId());
+ EXPECT_EQUAL(state.md->getDocId(), state.f7->getDocId());
+
+ FieldPositionsIterator it = state.f3->getIterator();
+ EXPECT_EQUAL(20u, it.getFieldLength());
+ EXPECT_EQUAL(3u, it.size());
+ EXPECT_TRUE(it.valid());
+ EXPECT_EQUAL(3u, it.getPosition());
+ EXPECT_EQUAL(0u, it.getElementId());
+ EXPECT_EQUAL(10u, it.getElementLen());
+ it.next();
+ EXPECT_TRUE(it.valid());
+ EXPECT_EQUAL(15u, it.getPosition());
+ EXPECT_EQUAL(1u, it.getElementId());
+ EXPECT_EQUAL(20u, it.getElementLen());
+ it.next();
+ EXPECT_TRUE(it.valid());
+ EXPECT_EQUAL(1u, it.getPosition());
+ EXPECT_EQUAL(2u, it.getElementId());
+ EXPECT_EQUAL(5u, it.getElementLen());
+ it.next();
+ EXPECT_TRUE(!it.valid());
+
+ EXPECT_EQUAL(0.0, state.f3->getRawScore());
+ EXPECT_EQUAL(0.0, state.f5->getRawScore());
+ EXPECT_EQUAL(5.0, state.f7->getRawScore());
+}
+
+TEST("term field model") {
+ State state;
+ testSetup(state);
+ testGenerate(state);
+ testAnalyze(state);
+ testInvalidId();
+}
+
+TEST("Access subqueries") {
+ State state;
+ testSetup(state);
+ state.f3->reset(10);
+ state.f3->setSubqueries(10, 42);
+ EXPECT_EQUAL(42ULL, state.f3->getSubqueries());
+ state.f3->enableRawScore();
+ EXPECT_EQUAL(0ULL, state.f3->getSubqueries());
+
+ state.f3->reset(11);
+ state.f3->appendPosition(TermFieldMatchDataPosition());
+ state.f3->setSubqueries(11, 42);
+ EXPECT_EQUAL(0ULL, state.f3->getSubqueries());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/fef/termmatchdatamerger/.gitignore b/searchlib/src/tests/fef/termmatchdatamerger/.gitignore
new file mode 100644
index 00000000000..64f3f4a4600
--- /dev/null
+++ b/searchlib/src/tests/fef/termmatchdatamerger/.gitignore
@@ -0,0 +1,4 @@
+*_test
+.depend
+Makefile
+searchlib_termmatchdatamerger_test_app
diff --git a/searchlib/src/tests/fef/termmatchdatamerger/CMakeLists.txt b/searchlib/src/tests/fef/termmatchdatamerger/CMakeLists.txt
new file mode 100644
index 00000000000..cfb6ae2611f
--- /dev/null
+++ b/searchlib/src/tests/fef/termmatchdatamerger/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_termmatchdatamerger_test_app
+ SOURCES
+ termmatchdatamerger_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_termmatchdatamerger_test_app COMMAND searchlib_termmatchdatamerger_test_app)
diff --git a/searchlib/src/tests/fef/termmatchdatamerger/DESC b/searchlib/src/tests/fef/termmatchdatamerger/DESC
new file mode 100644
index 00000000000..abacd50b719
--- /dev/null
+++ b/searchlib/src/tests/fef/termmatchdatamerger/DESC
@@ -0,0 +1 @@
+termmatchdatamerger test. Take a look at termmatchdatamerger.cpp for details.
diff --git a/searchlib/src/tests/fef/termmatchdatamerger/FILES b/searchlib/src/tests/fef/termmatchdatamerger/FILES
new file mode 100644
index 00000000000..709c15d91b8
--- /dev/null
+++ b/searchlib/src/tests/fef/termmatchdatamerger/FILES
@@ -0,0 +1 @@
+termmatchdatamerger_test.cpp
diff --git a/searchlib/src/tests/fef/termmatchdatamerger/termmatchdatamerger_test.cpp b/searchlib/src/tests/fef/termmatchdatamerger/termmatchdatamerger_test.cpp
new file mode 100644
index 00000000000..14b74498f2d
--- /dev/null
+++ b/searchlib/src/tests/fef/termmatchdatamerger/termmatchdatamerger_test.cpp
@@ -0,0 +1,281 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("termmatchdatamerger_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/fef/termmatchdatamerger.h>
+
+using namespace search::fef;
+
+typedef TermMatchDataMerger::Input MDMI;
+typedef TermMatchDataMerger::Inputs MDMIs;
+
+namespace {
+
+TermFieldMatchDataPosition make_pos(uint32_t pos)
+{
+ return TermFieldMatchDataPosition(0, pos, 1, 1000);
+}
+
+} // namespace <unnamed>
+
+class Test : public vespalib::TestApp
+{
+public:
+ void testMergeEmptyInput();
+ void testMergeSimple();
+ void testMergeMultifield();
+ void testMergeDuplicates();
+ void testMergeFieldLength();
+ int Main();
+};
+
+void
+Test::testMergeEmptyInput()
+{
+ TermFieldMatchData out;
+ TermFieldMatchDataArray output;
+ output.add(&out);
+
+ TermFieldMatchData in;
+ MDMIs input;
+ input.push_back(MDMI(&in, 1.0));
+
+ TermMatchDataMerger merger(input, output);
+
+ uint32_t docid = 5;
+ in.reset(docid);
+ merger.merge(docid);
+ EXPECT_EQUAL(docid, out.getDocId());
+ EXPECT_TRUE(out.begin() == out.end());
+}
+
+void
+Test::testMergeSimple()
+{
+ TermFieldMatchData a;
+ TermFieldMatchData b;
+ TermFieldMatchData c;
+ MDMIs input;
+ input.push_back(MDMI(&a, 0.5));
+ input.push_back(MDMI(&b, 1.0));
+ input.push_back(MDMI(&c, 1.5));
+
+ TermFieldMatchData out;
+ TermFieldMatchDataArray output;
+ output.add(&out);
+ TermMatchDataMerger merger(input, output);
+
+ uint32_t docid = 5;
+
+ a.reset(docid);
+ a.appendPosition(make_pos(5).setMatchExactness(0.5));
+ a.appendPosition(make_pos(10).setMatchExactness(3.0));
+ a.appendPosition(make_pos(15).setMatchExactness(2.0));
+
+ b.reset(docid);
+ b.appendPosition(make_pos(7).setMatchExactness(0.5));
+ b.appendPosition(make_pos(20).setMatchExactness(4.0));
+
+ c.reset(docid);
+ c.appendPosition(make_pos(22).setMatchExactness(0.5));
+ c.appendPosition(make_pos(27).setMatchExactness(2.0));
+ c.appendPosition(make_pos(28).setMatchExactness(5.0));
+
+ merger.merge(docid);
+
+ EXPECT_EQUAL(docid, out.getDocId());
+ EXPECT_EQUAL(8u, out.end() - out.begin());
+
+ EXPECT_EQUAL( 5u, out.begin()[0].getPosition());
+ EXPECT_EQUAL( 7u, out.begin()[1].getPosition());
+ EXPECT_EQUAL(10u, out.begin()[2].getPosition());
+ EXPECT_EQUAL(15u, out.begin()[3].getPosition());
+ EXPECT_EQUAL(20u, out.begin()[4].getPosition());
+ EXPECT_EQUAL(22u, out.begin()[5].getPosition());
+ EXPECT_EQUAL(27u, out.begin()[6].getPosition());
+ EXPECT_EQUAL(28u, out.begin()[7].getPosition());
+
+ EXPECT_EQUAL(0.25, out.begin()[0].getMatchExactness());
+ EXPECT_EQUAL( 0.5, out.begin()[1].getMatchExactness());
+ EXPECT_EQUAL( 1.5, out.begin()[2].getMatchExactness());
+ EXPECT_EQUAL( 1.0, out.begin()[3].getMatchExactness());
+ EXPECT_EQUAL( 4.0, out.begin()[4].getMatchExactness());
+ EXPECT_EQUAL(0.75, out.begin()[5].getMatchExactness());
+ EXPECT_EQUAL( 3.0, out.begin()[6].getMatchExactness());
+ EXPECT_EQUAL( 7.5, out.begin()[7].getMatchExactness());
+
+ // one stale input
+
+ docid = 10;
+ a.reset(docid);
+ a.appendPosition(make_pos(5));
+ a.appendPosition(make_pos(10));
+ a.appendPosition(make_pos(15));
+
+ merger.merge(docid);
+
+ EXPECT_EQUAL(docid, out.getDocId());
+ EXPECT_EQUAL(3u, out.end() - out.begin());
+
+ EXPECT_EQUAL( 5u, out.begin()[0].getPosition());
+ EXPECT_EQUAL(10u, out.begin()[1].getPosition());
+ EXPECT_EQUAL(15u, out.begin()[2].getPosition());
+
+ // both inputs are stale
+
+ docid = 15;
+
+ merger.merge(docid);
+ EXPECT_NOT_EQUAL(docid, out.getDocId());
+}
+
+
+void
+Test::testMergeMultifield()
+{
+ TermFieldMatchData a;
+ TermFieldMatchData b;
+ TermFieldMatchData c;
+ MDMIs input;
+ a.setFieldId(1);
+ b.setFieldId(2);
+ c.setFieldId(2);
+ input.push_back(MDMI(&a, 1.0));
+ input.push_back(MDMI(&b, 0.5));
+ input.push_back(MDMI(&c, 1.5));
+
+ TermFieldMatchData out1;
+ TermFieldMatchData out2;
+ TermFieldMatchData out3;
+ TermFieldMatchDataArray output;
+ out1.setFieldId(1);
+ out2.setFieldId(2);
+ out3.setFieldId(3);
+ output.add(&out1).add(&out2).add(&out3);
+
+ TermMatchDataMerger merger(input, output);
+
+ uint32_t docid = 5;
+
+ a.reset(docid);
+ a.appendPosition(make_pos(5));
+ a.appendPosition(make_pos(15));
+
+ b.reset(docid);
+ b.appendPosition(make_pos(7));
+ b.appendPosition(make_pos(20));
+
+ c.reset(docid);
+ c.appendPosition(make_pos(5));
+ c.appendPosition(make_pos(20));
+
+ merger.merge(docid);
+
+ EXPECT_EQUAL(docid, out1.getDocId());
+ EXPECT_EQUAL(docid, out2.getDocId());
+ EXPECT_NOT_EQUAL(docid, out3.getDocId());
+
+ EXPECT_EQUAL(2u, out1.end() - out1.begin());
+ EXPECT_EQUAL(3u, out2.end() - out2.begin());
+
+ EXPECT_EQUAL( 5u, out1.begin()[0].getPosition());
+ EXPECT_EQUAL(15u, out1.begin()[1].getPosition());
+
+ EXPECT_EQUAL( 5u, out2.begin()[0].getPosition());
+ EXPECT_EQUAL( 7u, out2.begin()[1].getPosition());
+ EXPECT_EQUAL(20u, out2.begin()[2].getPosition());
+
+ EXPECT_EQUAL(1.0, out1.begin()[0].getMatchExactness());
+ EXPECT_EQUAL(1.0, out1.begin()[1].getMatchExactness());
+
+ EXPECT_EQUAL(1.5, out2.begin()[0].getMatchExactness());
+ EXPECT_EQUAL(0.5, out2.begin()[1].getMatchExactness());
+ EXPECT_EQUAL(1.5, out2.begin()[2].getMatchExactness());
+}
+
+void
+Test::testMergeDuplicates()
+{
+ TermFieldMatchData a;
+ TermFieldMatchData b;
+ MDMIs input;
+ input.push_back(MDMI(&a, 0.5));
+ input.push_back(MDMI(&b, 1.5));
+
+ TermFieldMatchData out;
+ TermFieldMatchDataArray output;
+ output.add(&out);
+ TermMatchDataMerger merger(input, output);
+
+ uint32_t docid = 5;
+
+ a.reset(docid);
+ a.appendPosition(make_pos(5));
+ a.appendPosition(make_pos(10));
+ a.appendPosition(make_pos(15));
+
+ b.reset(docid);
+ b.appendPosition(make_pos(3));
+ b.appendPosition(make_pos(10));
+ b.appendPosition(make_pos(15));
+ b.appendPosition(make_pos(17));
+
+ merger.merge(docid);
+
+ EXPECT_EQUAL(docid, out.getDocId());
+ EXPECT_EQUAL(5u, out.end() - out.begin());
+
+ EXPECT_EQUAL( 3u, out.begin()[0].getPosition());
+ EXPECT_EQUAL(1.5, out.begin()[0].getMatchExactness());
+ EXPECT_EQUAL( 5u, out.begin()[1].getPosition());
+ EXPECT_EQUAL(0.5, out.begin()[1].getMatchExactness());
+ EXPECT_EQUAL(10u, out.begin()[2].getPosition());
+ EXPECT_EQUAL(1.5, out.begin()[2].getMatchExactness());
+ EXPECT_EQUAL(15u, out.begin()[3].getPosition());
+ EXPECT_EQUAL(1.5, out.begin()[3].getMatchExactness());
+ EXPECT_EQUAL(17u, out.begin()[4].getPosition());
+ EXPECT_EQUAL(1.5, out.begin()[4].getMatchExactness());
+}
+
+void
+Test::testMergeFieldLength()
+{
+ TermFieldMatchData a;
+ TermFieldMatchData b;
+ MDMIs input;
+ input.push_back(MDMI(&a, 1.0));
+ input.push_back(MDMI(&b, 1.0));
+
+ TermFieldMatchData out;
+ TermFieldMatchDataArray output;
+ output.add(&out);
+ TermMatchDataMerger merger(input, output);
+
+ uint32_t docid = 5;
+ a.reset(docid);
+ a.appendPosition(make_pos(1));
+ b.reset(docid);
+ b.appendPosition(make_pos(2));
+ merger.merge(docid);
+
+ EXPECT_EQUAL(docid, out.getDocId());
+ EXPECT_EQUAL(1000u, out.getIterator().getFieldLength());
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("termmatchdatamerger_test");
+ testMergeEmptyInput();
+ testMergeSimple();
+ testMergeMultifield();
+ testMergeDuplicates();
+ testMergeFieldLength();
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/fileheaderinspect/.gitignore b/searchlib/src/tests/fileheaderinspect/.gitignore
new file mode 100644
index 00000000000..812991d07b5
--- /dev/null
+++ b/searchlib/src/tests/fileheaderinspect/.gitignore
@@ -0,0 +1,6 @@
+.depend
+Makefile
+fileheader.dat
+fileheaderinspect_test
+out
+searchlib_fileheaderinspect_test_app
diff --git a/searchlib/src/tests/fileheaderinspect/CMakeLists.txt b/searchlib/src/tests/fileheaderinspect/CMakeLists.txt
new file mode 100644
index 00000000000..024e83bde02
--- /dev/null
+++ b/searchlib/src/tests/fileheaderinspect/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_fileheaderinspect_test_app
+ SOURCES
+ fileheaderinspect.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_fileheaderinspect_test_app COMMAND searchlib_fileheaderinspect_test_app)
diff --git a/searchlib/src/tests/fileheaderinspect/DESC b/searchlib/src/tests/fileheaderinspect/DESC
new file mode 100644
index 00000000000..ee57a2fdde3
--- /dev/null
+++ b/searchlib/src/tests/fileheaderinspect/DESC
@@ -0,0 +1 @@
+fileheaderinspect test. Take a look at fileheaderinspect.cpp for details.
diff --git a/searchlib/src/tests/fileheaderinspect/FILES b/searchlib/src/tests/fileheaderinspect/FILES
new file mode 100644
index 00000000000..7c32fb811d5
--- /dev/null
+++ b/searchlib/src/tests/fileheaderinspect/FILES
@@ -0,0 +1 @@
+fileheaderinspect.cpp
diff --git a/searchlib/src/tests/fileheaderinspect/fileheaderinspect.cpp b/searchlib/src/tests/fileheaderinspect/fileheaderinspect.cpp
new file mode 100644
index 00000000000..75ad526e2f7
--- /dev/null
+++ b/searchlib/src/tests/fileheaderinspect/fileheaderinspect.cpp
@@ -0,0 +1,131 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("fileheaderinspect_test");
+
+#include <vespa/searchlib/util/fileheadertk.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using namespace search;
+using namespace vespalib;
+
+class Test : public vespalib::TestApp {
+private:
+ bool writeHeader(const FileHeader &header, const vespalib::string &fileName);
+ vespalib::string readFile(const vespalib::string &fileName);
+
+ void testError();
+ void testEscape();
+ void testDelimiter();
+ void testQuiet();
+ void testVerbose();
+
+public:
+ int Main() {
+ TEST_INIT("fileheaderinspect_test");
+
+ testError(); TEST_FLUSH();
+ testEscape(); TEST_FLUSH();
+ testDelimiter(); TEST_FLUSH();
+ testQuiet(); TEST_FLUSH();
+ testVerbose(); TEST_FLUSH();
+
+ TEST_DONE();
+ }
+};
+
+TEST_APPHOOK(Test);
+
+bool
+Test::writeHeader(const FileHeader &header, const vespalib::string &fileName)
+{
+ FastOS_File file;
+ if (!EXPECT_TRUE(file.OpenWriteOnlyTruncate(fileName.c_str()))) {
+ return false;
+ }
+ if (!EXPECT_EQUAL(header.getSize(), header.writeFile(file))) {
+ return false;
+ }
+ file.Close();
+ return true;
+}
+
+vespalib::string
+Test::readFile(const vespalib::string &fileName)
+{
+ FastOS_File file;
+ ASSERT_TRUE(file.OpenReadOnly(fileName.c_str()));
+
+ char buf[1024];
+ uint32_t len = file.Read(buf, 1024);
+ EXPECT_TRUE(len != 1024); // make sure we got everything
+
+ vespalib::string str(buf, len);
+ file.Close();
+ return str;
+}
+
+void
+Test::testError()
+{
+ EXPECT_TRUE(system("../../apps/fileheaderinspect/vespa-header-inspect notfound.dat") != 0);
+}
+
+void
+Test::testEscape()
+{
+ FileHeader header;
+ header.putTag(FileHeader::Tag("fanart", "\fa\na\r\t"));
+ ASSERT_TRUE(writeHeader(header, "fileheader.dat"));
+ EXPECT_TRUE(system("../../apps/fileheaderinspect/vespa-header-inspect -q fileheader.dat > out") == 0);
+ EXPECT_EQUAL("fanart;string;\\fa\\na\\r\\t\n", readFile("out"));
+}
+
+void
+Test::testDelimiter()
+{
+ FileHeader header;
+ header.putTag(FileHeader::Tag("string", "string"));
+ ASSERT_TRUE(writeHeader(header, "fileheader.dat"));
+ EXPECT_TRUE(system("../../apps/fileheaderinspect/vespa-header-inspect -d i -q fileheader.dat > out") == 0);
+ EXPECT_EQUAL("str\\ingistr\\ingistr\\ing\n", readFile("out"));
+}
+
+void
+Test::testVerbose()
+{
+ FileHeader header;
+ FileHeaderTk::addVersionTags(header);
+ ASSERT_TRUE(writeHeader(header, "fileheader.dat"));
+ EXPECT_TRUE(system("../../apps/fileheaderinspect/vespa-header-inspect fileheader.dat > out") == 0);
+ vespalib::string str = readFile("out");
+ EXPECT_TRUE(!str.empty());
+ for (uint32_t i = 0, numTags = header.getNumTags(); i < numTags; ++i) {
+ const FileHeader::Tag &tag = header.getTag(i);
+ EXPECT_TRUE(str.find(tag.getName()) != vespalib::string::npos);
+
+ vespalib::asciistream out;
+ out << tag;
+ EXPECT_TRUE(str.find(out.str()) != vespalib::string::npos);
+ }
+}
+
+void
+Test::testQuiet()
+{
+ FileHeader header;
+ FileHeaderTk::addVersionTags(header);
+ ASSERT_TRUE(writeHeader(header, "fileheader.dat"));
+ EXPECT_TRUE(system("../../apps/fileheaderinspect/vespa-header-inspect -q fileheader.dat > out") == 0);
+ vespalib::string str = readFile("out");
+ EXPECT_TRUE(!str.empty());
+ for (uint32_t i = 0, numTags = header.getNumTags(); i < numTags; ++i) {
+ const FileHeader::Tag &tag = header.getTag(i);
+ size_t pos = str.find(tag.getName());
+ EXPECT_TRUE(pos != vespalib::string::npos);
+
+ vespalib::asciistream out;
+ out << ";" << tag;
+ EXPECT_TRUE(str.find(out.str(), pos) != vespalib::string::npos);
+ }
+}
diff --git a/searchlib/src/tests/fileheadertk/.gitignore b/searchlib/src/tests/fileheadertk/.gitignore
new file mode 100644
index 00000000000..6aa8c365240
--- /dev/null
+++ b/searchlib/src/tests/fileheadertk/.gitignore
@@ -0,0 +1,6 @@
+.depend
+Makefile
+fileheadertk.dat
+fileheadertk_test
+versiontags.dat
+searchlib_fileheadertk_test_app
diff --git a/searchlib/src/tests/fileheadertk/CMakeLists.txt b/searchlib/src/tests/fileheadertk/CMakeLists.txt
new file mode 100644
index 00000000000..bc6969fbac2
--- /dev/null
+++ b/searchlib/src/tests/fileheadertk/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_fileheadertk_test_app
+ SOURCES
+ fileheadertk_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_fileheadertk_test_app COMMAND searchlib_fileheadertk_test_app)
diff --git a/searchlib/src/tests/fileheadertk/DESC b/searchlib/src/tests/fileheadertk/DESC
new file mode 100644
index 00000000000..08ad9a0769d
--- /dev/null
+++ b/searchlib/src/tests/fileheadertk/DESC
@@ -0,0 +1 @@
+Ensures that FileHeaderTk works as expected.
diff --git a/searchlib/src/tests/fileheadertk/FILES b/searchlib/src/tests/fileheadertk/FILES
new file mode 100644
index 00000000000..fe82bf13af7
--- /dev/null
+++ b/searchlib/src/tests/fileheadertk/FILES
@@ -0,0 +1 @@
+fileheadertk.cpp
diff --git a/searchlib/src/tests/fileheadertk/fileheadertk_test.cpp b/searchlib/src/tests/fileheadertk/fileheadertk_test.cpp
new file mode 100644
index 00000000000..14c5d0ed6f6
--- /dev/null
+++ b/searchlib/src/tests/fileheadertk/fileheadertk_test.cpp
@@ -0,0 +1,47 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("fileheadertk_test");
+
+#include <vespa/searchlib/util/fileheadertk.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using namespace search;
+
+class Test : public vespalib::TestApp {
+private:
+ void testVersionTags();
+
+public:
+ int Main() {
+ TEST_INIT("fileheadertk_test");
+
+ testVersionTags(); TEST_FLUSH();
+
+ TEST_DONE();
+ }
+};
+
+TEST_APPHOOK(Test);
+
+void
+Test::testVersionTags()
+{
+ vespalib::FileHeader header;
+ FileHeaderTk::addVersionTags(header);
+
+ FastOS_File file;
+ ASSERT_TRUE(file.OpenWriteOnlyTruncate("versiontags.dat"));
+ EXPECT_EQUAL(header.getSize(), header.writeFile(file));
+ file.Close();
+
+ EXPECT_EQUAL(8u, header.getNumTags());
+ EXPECT_TRUE(header.hasTag("version-arch"));
+ EXPECT_TRUE(header.hasTag("version-builder"));
+ EXPECT_TRUE(header.hasTag("version-component"));
+ EXPECT_TRUE(header.hasTag("version-date"));
+ EXPECT_TRUE(header.hasTag("version-system"));
+ EXPECT_TRUE(header.hasTag("version-system-rev"));
+ EXPECT_TRUE(header.hasTag("version-tag"));
+ EXPECT_TRUE(header.hasTag("version-pkg"));
+}
diff --git a/searchlib/src/tests/forcelink/.gitignore b/searchlib/src/tests/forcelink/.gitignore
new file mode 100644
index 00000000000..c74c5915388
--- /dev/null
+++ b/searchlib/src/tests/forcelink/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+forcelink_test
+searchlib_forcelink_test_app
diff --git a/searchlib/src/tests/forcelink/CMakeLists.txt b/searchlib/src/tests/forcelink/CMakeLists.txt
new file mode 100644
index 00000000000..50e39d2d844
--- /dev/null
+++ b/searchlib/src/tests/forcelink/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_forcelink_test_app
+ SOURCES
+ forcelink.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_forcelink_test_app COMMAND searchlib_forcelink_test_app)
diff --git a/searchlib/src/tests/forcelink/DESC b/searchlib/src/tests/forcelink/DESC
new file mode 100644
index 00000000000..c73185a8736
--- /dev/null
+++ b/searchlib/src/tests/forcelink/DESC
@@ -0,0 +1 @@
+forcelink test. Take a look at forcelink.cpp for details.
diff --git a/searchlib/src/tests/forcelink/FILES b/searchlib/src/tests/forcelink/FILES
new file mode 100644
index 00000000000..d917375ebf2
--- /dev/null
+++ b/searchlib/src/tests/forcelink/FILES
@@ -0,0 +1 @@
+forcelink.cpp
diff --git a/searchlib/src/tests/forcelink/forcelink.cpp b/searchlib/src/tests/forcelink/forcelink.cpp
new file mode 100644
index 00000000000..9f555e09480
--- /dev/null
+++ b/searchlib/src/tests/forcelink/forcelink.cpp
@@ -0,0 +1,18 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("forcelink_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/expression/forcelink.hpp>
+#include <vespa/searchlib/aggregation/forcelink.hpp>
+
+TEST_SETUP(Test);
+
+int
+Test::Main()
+{
+ TEST_INIT("forcelink_test");
+ forcelink_searchlib_expression();
+ forcelink_searchlib_aggregation();
+ TEST_DONE();
+}
diff --git a/searchlib/src/tests/grouping/.gitignore b/searchlib/src/tests/grouping/.gitignore
new file mode 100644
index 00000000000..c7654573dc5
--- /dev/null
+++ b/searchlib/src/tests/grouping/.gitignore
@@ -0,0 +1,11 @@
+.depend
+Makefile
+diff.txt
+grouping_test
+lhs.out
+rhs.out
+/grouping_benchmark
+searchlib_grouping_serialization_test_app
+searchlib_grouping_test_app
+searchlib_hyperloglog_test_app
+searchlib_sketch_test_app
diff --git a/searchlib/src/tests/grouping/CMakeLists.txt b/searchlib/src/tests/grouping/CMakeLists.txt
new file mode 100644
index 00000000000..ef44472edfc
--- /dev/null
+++ b/searchlib/src/tests/grouping/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_grouping_test_app
+ SOURCES
+ grouping_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_grouping_test_app COMMAND searchlib_grouping_test_app)
+vespa_add_executable(searchlib_hyperloglog_test_app
+ SOURCES
+ hyperloglog_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_hyperloglog_test_app COMMAND searchlib_hyperloglog_test_app)
+vespa_add_executable(searchlib_sketch_test_app
+ SOURCES
+ sketch_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_sketch_test_app COMMAND searchlib_sketch_test_app)
+vespa_add_executable(searchlib_grouping_serialization_test_app
+ SOURCES
+ grouping_serialization_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_grouping_serialization_test_app COMMAND searchlib_grouping_serialization_test_app)
diff --git a/searchlib/src/tests/grouping/DESC b/searchlib/src/tests/grouping/DESC
new file mode 100644
index 00000000000..1aa6cb37e89
--- /dev/null
+++ b/searchlib/src/tests/grouping/DESC
@@ -0,0 +1 @@
+grouping test. Take a look at grouping.cpp for details.
diff --git a/searchlib/src/tests/grouping/FILES b/searchlib/src/tests/grouping/FILES
new file mode 100644
index 00000000000..af7f7e71257
--- /dev/null
+++ b/searchlib/src/tests/grouping/FILES
@@ -0,0 +1,4 @@
+grouping.cpp
+lhs.out
+rhs.out
+diff.txt
diff --git a/searchlib/src/tests/grouping/grouping_serialization_test.cpp b/searchlib/src/tests/grouping/grouping_serialization_test.cpp
new file mode 100644
index 00000000000..99757af8439
--- /dev/null
+++ b/searchlib/src/tests/grouping/grouping_serialization_test.cpp
@@ -0,0 +1,339 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for grouping_serialization.
+
+#include <vespa/log/log.h>
+LOG_SETUP("grouping_serialization_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/aggregation/aggregation.h>
+#include <vespa/searchlib/aggregation/expressioncountaggregationresult.h>
+#include <vespa/searchlib/aggregation/fs4hit.h>
+#include <vespa/searchlib/aggregation/groupinglevel.h>
+#include <vespa/searchlib/aggregation/hitsaggregationresult.h>
+#include <vespa/searchlib/aggregation/perdocexpression.h>
+#include <vespa/searchlib/aggregation/vdshit.h>
+#include <vespa/searchlib/common/hitrank.h>
+#include <vespa/searchlib/common/identifiable.h>
+#include <vespa/searchlib/expression/fixedwidthbucketfunctionnode.h>
+#include <vespa/searchlib/expression/floatbucketresultnode.h>
+#include <vespa/searchlib/expression/getdocidnamespacespecificfunctionnode.h>
+#include <vespa/searchlib/expression/getymumchecksumfunctionnode.h>
+#include <vespa/searchlib/expression/integerbucketresultnode.h>
+#include <vespa/vespalib/objects/nboserializer.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <fstream>
+
+using search::HitRank;
+using vespalib::Identifiable;
+using vespalib::NBOSerializer;
+using vespalib::make_string;
+using vespalib::nbostream;
+using namespace search::aggregation;
+using namespace search::expression;
+
+namespace {
+
+document::GlobalId getGlobalId(uint32_t docId) {
+ return document::DocumentId(vespalib::make_string("doc:test:%u", docId))
+ .getGlobalId();
+}
+
+struct Fixture {
+ // Set WRITE_FILES to true to generate new expected serialization files.
+ const bool WRITE_FILES = false;
+ const std::string file_path = "../../test/files/";
+ std::string file_name;
+ std::ifstream file_stream;
+
+ Fixture(const std::string &file_name_in)
+ : file_name(file_path + file_name_in),
+ file_stream(file_name.c_str(),
+ std::ifstream::in | std::ifstream::binary) {
+ if (WRITE_FILES) {
+ std::ofstream out(file_name.c_str(),
+ std::ofstream::out | std::ofstream::trunc |
+ std::ofstream::binary);
+ }
+ }
+
+ void checkObject(const Identifiable &obj) {
+ if (WRITE_FILES) {
+ nbostream stream;
+ NBOSerializer serializer(stream);
+ serializer << obj;
+ std::ofstream out(file_name.c_str(),
+ std::ofstream::out | std::ofstream::app |
+ std::ofstream::binary);
+ uint32_t size = stream.size();
+ out.write(reinterpret_cast<const char *>(&size), sizeof(size));
+ out.write(stream.peek(), stream.size());
+ }
+
+ uint32_t size = 0;
+ file_stream.read(reinterpret_cast<char *>(&size), sizeof(size));
+ nbostream stream;
+ for (size_t i = 0; i < size; ++i) {
+ char c;
+ file_stream.read(&c, sizeof(c));
+ stream << c;
+ }
+ Identifiable::UP newObj = Identifiable::create(stream);
+
+ if (!EXPECT_TRUE(newObj.get() != 0)) {
+ LOG(error, "object of class '%s' resulted in empty echo",
+ obj.getClass().name());
+ return;
+ }
+ if (EXPECT_EQUAL(obj.asString(), newObj->asString())
+ && EXPECT_TRUE(newObj->cmp(obj) == 0)
+ && EXPECT_TRUE(obj.cmp(*newObj) == 0))
+ {
+ LOG(info, "object of class '%s' passed echo test : %s",
+ obj.getClass().name(), newObj->asString().c_str());
+ } else {
+ LOG(error, "object of class '%s' FAILED echo test",
+ obj.getClass().name());
+ }
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+ExpressionNode::CP createDummyExpression() {
+ return AddFunctionNode().addArg(ConstantNode(Int64ResultNode(2)))
+ .addArg(ConstantNode(Int64ResultNode(2)));
+}
+
+//-----------------------------------------------------------------------------
+
+TEST_F("testResultTypes", Fixture("testResultTypes")) {
+ f.checkObject(Int64ResultNode(7));
+ f.checkObject(FloatResultNode(7.3));
+ f.checkObject(StringResultNode("7.3"));
+ {
+ char tmp[7] = { (char)0xe5, (char)0xa6, (char)0x82, (char)0xe6,
+ (char)0x9e, (char)0x9c,0 };
+ f.checkObject(StringResultNode(tmp));
+ }
+ {
+ char tmp[] = { '7', '.', '4' };
+ f.checkObject(RawResultNode(tmp, 3));
+ }
+ f.checkObject(IntegerBucketResultNode());
+ f.checkObject(FloatBucketResultNode());
+ f.checkObject(IntegerBucketResultNode(10, 20));
+ f.checkObject(FloatBucketResultNode(10.0, 20.0));
+ f.checkObject(StringBucketResultNode("10.0", "20.0"));
+ char tmp[] = { 1, 0, 0};
+ char tmp2[] = { 1, 1, 0};
+ f.checkObject(
+ RawBucketResultNode(ResultNode::UP(new RawResultNode(tmp, 3)),
+ ResultNode::UP(new RawResultNode(tmp2, 3))));
+
+ IntegerBucketResultNodeVector iv;
+ iv.getVector().push_back(IntegerBucketResultNode(878, 3246823));
+ f.checkObject(iv);
+
+ FloatBucketResultNodeVector fv;
+ fv.getVector().push_back(FloatBucketResultNode(878, 3246823));
+ f.checkObject(fv);
+
+ StringBucketResultNodeVector sv;
+ sv.getVector().push_back(StringBucketResultNode("878", "3246823"));
+ f.checkObject(sv);
+
+ RawBucketResultNodeVector rv;
+ rv.getVector().push_back(
+ RawBucketResultNode(ResultNode::UP(new RawResultNode(tmp, 3)),
+ ResultNode::UP(new RawResultNode(tmp2, 3))));
+ f.checkObject(rv);
+}
+
+TEST_F("testSpecialNodes", Fixture("testSpecialNodes")) {
+ f.checkObject(AttributeNode("testattribute"));
+ f.checkObject(DocumentFieldNode("testdocumentfield"));
+ {
+ f.checkObject(GetDocIdNamespaceSpecificFunctionNode(
+ ResultNode::UP(new Int64ResultNode(7))));
+ }
+ f.checkObject(GetYMUMChecksumFunctionNode());
+}
+
+TEST_F("testFunctionNodes", Fixture("testFunctionNodes")) {
+ f.checkObject(AddFunctionNode()
+ .addArg(ConstantNode(Int64ResultNode(7)))
+ .addArg(ConstantNode(Int64ResultNode(8)))
+ .addArg(ConstantNode(Int64ResultNode(9))));
+ f.checkObject(XorFunctionNode()
+ .addArg(ConstantNode(Int64ResultNode(7)))
+ .addArg(ConstantNode(Int64ResultNode(8)))
+ .addArg(ConstantNode(Int64ResultNode(9))));
+ f.checkObject(MultiplyFunctionNode()
+ .addArg(ConstantNode(Int64ResultNode(7)))
+ .addArg(ConstantNode(Int64ResultNode(8)))
+ .addArg(ConstantNode(Int64ResultNode(9))));
+ f.checkObject(DivideFunctionNode()
+ .addArg(ConstantNode(Int64ResultNode(7)))
+ .addArg(ConstantNode(Int64ResultNode(8)))
+ .addArg(ConstantNode(Int64ResultNode(9))));
+ f.checkObject(ModuloFunctionNode()
+ .addArg(ConstantNode(Int64ResultNode(7)))
+ .addArg(ConstantNode(Int64ResultNode(8)))
+ .addArg(ConstantNode(Int64ResultNode(9))));
+ f.checkObject(MinFunctionNode()
+ .addArg(ConstantNode(Int64ResultNode(7)))
+ .addArg(ConstantNode(Int64ResultNode(8)))
+ .addArg(ConstantNode(Int64ResultNode(9))));
+ f.checkObject(MaxFunctionNode()
+ .addArg(ConstantNode(Int64ResultNode(7)))
+ .addArg(ConstantNode(Int64ResultNode(8)))
+ .addArg(ConstantNode(Int64ResultNode(9))));
+ f.checkObject(TimeStampFunctionNode(ConstantNode(Int64ResultNode(7)),
+ TimeStampFunctionNode::Hour, true));
+ f.checkObject(ZCurveFunctionNode(ConstantNode(Int64ResultNode(7)),
+ ZCurveFunctionNode::X));
+ f.checkObject(ZCurveFunctionNode(ConstantNode(Int64ResultNode(7)),
+ ZCurveFunctionNode::Y));
+ f.checkObject(NegateFunctionNode(ConstantNode(Int64ResultNode(7))));
+ f.checkObject(SortFunctionNode(ConstantNode(Int64ResultNode(7))));
+ f.checkObject(NormalizeSubjectFunctionNode(ConstantNode(
+ StringResultNode("foo"))));
+ f.checkObject(ReverseFunctionNode(ConstantNode(Int64ResultNode(7))));
+ f.checkObject(MD5BitFunctionNode(ConstantNode(Int64ResultNode(7)), 64));
+ f.checkObject(XorBitFunctionNode(ConstantNode(Int64ResultNode(7)), 64));
+ f.checkObject(CatFunctionNode()
+ .addArg(ConstantNode(Int64ResultNode(7)))
+ .addArg(ConstantNode(Int64ResultNode(8)))
+ .addArg(ConstantNode(Int64ResultNode(9))));
+ f.checkObject(FixedWidthBucketFunctionNode());
+ f.checkObject(FixedWidthBucketFunctionNode(AttributeNode("foo")));
+ f.checkObject(FixedWidthBucketFunctionNode(AttributeNode("foo"))
+ .setWidth(Int64ResultNode(10)));
+ f.checkObject(FixedWidthBucketFunctionNode(AttributeNode("foo"))
+ .setWidth(FloatResultNode(10.0)));
+ f.checkObject(RangeBucketPreDefFunctionNode());
+ f.checkObject(RangeBucketPreDefFunctionNode(AttributeNode("foo")));
+ f.checkObject(DebugWaitFunctionNode(ConstantNode(Int64ResultNode(5)),
+ 3.3, false));
+}
+
+TEST_F("testAggregatorResults", Fixture("testAggregatorResults")) {
+ f.checkObject(SumAggregationResult()
+ .setExpression(AttributeNode("attributeA"))
+ .setResult(Int64ResultNode(7)));
+ f.checkObject(XorAggregationResult()
+ .setXor(Int64ResultNode(7))
+ .setExpression(AttributeNode("attributeA")));
+ f.checkObject(CountAggregationResult()
+ .setCount(7)
+ .setExpression(AttributeNode("attributeA")));
+ f.checkObject(MinAggregationResult()
+ .setExpression(AttributeNode("attributeA"))
+ .setResult(Int64ResultNode(7)));
+ f.checkObject(MaxAggregationResult()
+ .setExpression(AttributeNode("attributeA"))
+ .setResult(Int64ResultNode(7)));
+ f.checkObject(AverageAggregationResult()
+ .setExpression(AttributeNode("attributeA"))
+ .setResult(Int64ResultNode(7)));
+ ExpressionCountAggregationResult expression_count;
+ expression_count.setExpression(ConstantNode(Int64ResultNode(67)))
+ .aggregate(DocId(42), HitRank(21));
+ f.checkObject(expression_count);
+}
+
+TEST_F("testHitCollection", Fixture("testHitCollection")) {
+ f.checkObject(FS4Hit());
+ f.checkObject(FS4Hit(0, 50.0).setGlobalId(getGlobalId(100)));
+ f.checkObject(VdsHit());
+ f.checkObject(VdsHit("100", 50.0));
+ f.checkObject(VdsHit("100", 50.0).setSummary("rawsummary", 10));
+ f.checkObject(HitsAggregationResult());
+ f.checkObject(HitsAggregationResult()
+ .setMaxHits(5)
+ .addHit(FS4Hit(0, 1.0).setGlobalId(getGlobalId(10)))
+ .addHit(FS4Hit(0, 2.0).setGlobalId(getGlobalId(20)))
+ .addHit(FS4Hit(0, 3.0).setGlobalId(getGlobalId(30)))
+ .addHit(FS4Hit(0, 4.0).setGlobalId(getGlobalId(40)))
+ .addHit(FS4Hit(0, 5.0).setGlobalId(getGlobalId(50)))
+ .setExpression(ConstantNode(Int64ResultNode(5))));
+ f.checkObject(HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(FS4Hit(0, 1.0).setGlobalId(getGlobalId(10))
+ .setDistributionKey(100))
+ .addHit(FS4Hit(0, 2.0).setGlobalId(getGlobalId(20))
+ .setDistributionKey(200))
+ .addHit(FS4Hit(0, 3.0).setGlobalId(getGlobalId(30))
+ .setDistributionKey(300))
+ .setExpression(ConstantNode(Int64ResultNode(5))));
+ f.checkObject(HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(VdsHit("10", 1.0).setSummary("100", 3))
+ .addHit(VdsHit("20", 2.0).setSummary("200", 3))
+ .addHit(VdsHit("30", 3.0).setSummary("300", 3))
+ .setExpression(ConstantNode(Int64ResultNode(5))));
+}
+
+TEST_F("testGroupingLevel", Fixture("testGroupingLevel")) {
+ f.checkObject(GroupingLevel()
+ .setMaxGroups(100)
+ .setExpression(createDummyExpression())
+ .addAggregationResult(SumAggregationResult()
+ .setExpression(createDummyExpression())));
+}
+
+TEST_F("testGroup", Fixture("testGroup")) {
+ f.checkObject(Group());
+ f.checkObject(Group().setId(Int64ResultNode(50))
+ .setRank(RawRank(10)));
+ f.checkObject(Group().setId(Int64ResultNode(100))
+ .addChild(Group().setId(Int64ResultNode(110)))
+ .addChild(Group().setId(Int64ResultNode(120))
+ .setRank(20.5)
+ .addAggregationResult(SumAggregationResult()
+ .setExpression(createDummyExpression()))
+ .addAggregationResult(SumAggregationResult()
+ .setExpression(createDummyExpression())))
+ .addChild(Group().setId(Int64ResultNode(130))
+ .addChild(Group().setId(Int64ResultNode(131)))));
+}
+
+TEST_F("testGrouping", Fixture("testGrouping")) {
+ f.checkObject(Grouping());
+ f.checkObject(Grouping()
+ .addLevel(GroupingLevel()
+ .setMaxGroups(100)
+ .setExpression(createDummyExpression())
+ .addAggregationResult(SumAggregationResult()
+ .setExpression(createDummyExpression())))
+ .addLevel(GroupingLevel()
+ .setMaxGroups(10)
+ .setExpression(createDummyExpression())
+ .addAggregationResult(SumAggregationResult()
+ .setExpression(createDummyExpression()))
+ .addAggregationResult(SumAggregationResult()
+ .setExpression(createDummyExpression()))));
+ f.checkObject(Grouping()
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("folder"))
+ .addAggregationResult(XorAggregationResult()
+ .setExpression(MD5BitFunctionNode(
+ AttributeNode("docid"), 64)))
+ .addAggregationResult(SumAggregationResult()
+ .setExpression(MinFunctionNode()
+ .addArg(AttributeNode("attribute1"))
+ .addArg(AttributeNode("attribute2")))
+ )
+ .addAggregationResult(XorAggregationResult()
+ .setExpression(
+ XorBitFunctionNode(CatFunctionNode()
+ .addArg(GetDocIdNamespaceSpecificFunctionNode())
+ .addArg(DocumentFieldNode("folder"))
+ .addArg(DocumentFieldNode("flags")), 64)))));
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/grouping/grouping_test.cpp b/searchlib/src/tests/grouping/grouping_test.cpp
new file mode 100644
index 00000000000..f9939f0d370
--- /dev/null
+++ b/searchlib/src/tests/grouping/grouping_test.cpp
@@ -0,0 +1,1912 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("grouping_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/aggregation/perdocexpression.h>
+#include <vespa/searchlib/aggregation/aggregation.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/searchlib/attribute/attributemanager.h>
+#include <vespa/searchlib/aggregation/hitsaggregationresult.h>
+#include <vespa/searchlib/aggregation/fs4hit.h>
+#include <vespa/searchlib/aggregation/predicates.h>
+#include <vespa/searchlib/expression/fixedwidthbucketfunctionnode.h>
+#include <algorithm>
+
+using namespace vespalib;
+using namespace search;
+using namespace search::aggregation;
+using namespace search::attribute;
+using namespace search::expression;
+
+//-----------------------------------------------------------------------------
+
+template<typename A, typename T>
+class AttrBuilder
+{
+private:
+ A *_attr;
+ AttributeVector::SP _attrSP;
+
+public:
+ AttrBuilder(const AttrBuilder &rhs)
+ : _attr(new A(rhs._attr->getName())),
+ _attrSP(_attr)
+ {
+ uint32_t numDocs = rhs._attr->getNumDocs();
+ for (uint32_t docid = 0; docid < numDocs; ++docid) {
+ T val;
+ uint32_t res = rhs._attr->get(docid, &val, 1);
+ LOG_ASSERT(res == 1);
+ add(val);
+ }
+ }
+ AttrBuilder(const std::string &name)
+ : _attr(new A(name)),
+ _attrSP(_attr)
+ {
+ }
+ AttrBuilder& operator=(const AttrBuilder &rhs) {
+ AttrBuilder tmp(rhs);
+ std::swap(_attr, tmp._attr);
+ _attrSP.swap(tmp._attrSP);
+ return *this;
+ }
+ AttrBuilder &add(T value) {
+ DocId ignore;
+ _attr->addDoc(ignore);
+ _attr->add(value);
+ return *this;
+ }
+ AttributeVector::SP sp() const {
+ return _attrSP;
+ }
+};
+
+typedef AttrBuilder<SingleIntegerExtAttribute, int64_t> IntAttrBuilder;
+typedef AttrBuilder<SingleFloatExtAttribute, double> FloatAttrBuilder;
+typedef AttrBuilder<SingleStringExtAttribute, const char *> StringAttrBuilder;
+
+//-----------------------------------------------------------------------------
+
+class ResultBuilder
+{
+private:
+ std::vector<RankedHit> _hits;
+
+public:
+ ResultBuilder() : _hits() {}
+ ResultBuilder &add(unsigned int docid, HitRank rank = 0) {
+ RankedHit hit;
+ hit._docId = docid;
+ hit._rankValue = rank;
+ _hits.push_back(hit);
+ for (uint32_t pos = (_hits.size() - 1);
+ pos > 0 && (_hits[pos]._rankValue > _hits[pos - 1]._rankValue);
+ --pos)
+ {
+ std::swap(_hits[pos], _hits[pos - 1]);
+ }
+ return *this;
+ }
+ const RankedHit *hits() const {
+ return &_hits[0];
+ }
+ uint32_t size() const {
+ return _hits.size();
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class AggregationContext
+{
+private:
+ AttributeManager _attrMan;
+ ResultBuilder _result;
+ IAttributeContext::UP _attrCtx;
+
+ AggregationContext(const AggregationContext &);
+ AggregationContext &operator=(const AggregationContext &);
+
+public:
+ AggregationContext() : _attrMan(), _result(), _attrCtx(_attrMan.createContext()) {}
+ ResultBuilder &result() { return _result; }
+ void add(AttributeVector::SP attr) {
+ _attrMan.add(attr);
+ }
+ void setup(Grouping &g) {
+ g.configureStaticStuff(ConfigureStaticParams(_attrCtx.get(), 0));
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class Test : public TestApp
+{
+public:
+ bool testAggregation(AggregationContext &ctx,
+ const Grouping &request,
+ const Group &expect);
+ bool testMerge(const Grouping &a, const Grouping &b,
+ const Group &expect);
+ bool testMerge(const Grouping &a, const Grouping &b, const Grouping &c,
+ const Group &expect);
+ bool testPrune(const Grouping &a, const Grouping &b,
+ const Group &expect);
+ bool testPartialMerge(const Grouping &a, const Grouping &b,
+ const Group &expect);
+ void testAggregationSimple();
+ void testAggregationLevels();
+ void testAggregationMaxGroups();
+ void testAggregationGroupOrder();
+ void testAggregationGroupRank();
+ void testAggregationGroupCapping();
+ void testMergeSimpleSum();
+ void testMergeLevels();
+ void testMergeGroups();
+ void testMergeTrees();
+ void testPruneSimple();
+ void testPruneComplex();
+ void testPartialMerging();
+ void testCount();
+ void testTopN();
+ void testFS4HitCollection();
+ bool checkBucket(const NumericResultNode &width, const NumericResultNode &value, const BucketResultNode &bucket);
+ bool checkHits(const Grouping &g, uint32_t first, uint32_t last, uint32_t cnt);
+ void testFixedWidthBuckets();
+ void testThatNanIsConverted();
+ void testNanSorting();
+ int Main();
+private:
+ void testAggregationSimpleSum(AggregationContext & ctx, const AggregationResult & aggr, const ResultNode & ir, const ResultNode & fr, const ResultNode & sr);
+ class CheckAttributeReferences : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+ {
+ public:
+ CheckAttributeReferences() : _numrefs(0) { }
+ int _numrefs;
+ private:
+ virtual void execute(vespalib::Identifiable &obj) {
+ if (static_cast<AttributeNode &>(obj).getAttribute() != NULL) {
+ _numrefs++;
+ }
+ }
+ virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(AttributeNode::classId); }
+ };
+};
+
+//-----------------------------------------------------------------------------
+
+/**
+ * Run the given grouping request and verify that the resulting group
+ * tree matches the expected value.
+ **/
+bool
+Test::testAggregation(AggregationContext &ctx,
+ const Grouping &request,
+ const Group &expect)
+{
+ Grouping tmp = request; // create local copy
+ ctx.setup(tmp);
+ tmp.aggregate(ctx.result().hits(), ctx.result().size());
+ tmp.cleanupAttributeReferences();
+ CheckAttributeReferences attrCheck;
+ tmp.select(attrCheck, attrCheck);
+ EXPECT_EQUAL(attrCheck._numrefs, 0);
+ bool ok = EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString());
+ if (!ok) {
+ std::cerr << tmp.getRoot().asString() << std::endl << expect.asString() << std::endl;
+ }
+ return ok;
+}
+
+/**
+ * Merge the given grouping requests and verify that the resulting
+ * group tree matches the expected value.
+ **/
+bool
+Test::testMerge(const Grouping &a, const Grouping &b,
+ const Group &expect)
+{
+ Grouping tmp = a; // create local copy
+ Grouping tmpB = b;
+ tmp.merge(tmpB);
+ tmp.postMerge();
+ tmp.sortById();
+ return EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString());
+}
+
+/**
+ * Prune the given grouping request and verify that the resulting
+ * group tree matches the expected value.
+ **/
+bool
+Test::testPrune(const Grouping &a, const Grouping &b,
+ const Group &expect)
+{
+ Grouping tmp = a; // create local copy
+ tmp.prune(b);
+ bool ok = EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString());
+ if (!ok) {
+ std::cerr << tmp.getRoot().asString() << std::endl << expect.asString() << std::endl;
+ }
+ return ok;
+}
+
+/**
+ * Merge a given grouping request to get a partial request back. Verify that the
+ * partial request is correct.
+ **/
+bool
+Test::testPartialMerge(const Grouping &a, const Grouping &b,
+ const Group &expect)
+{
+ Grouping tmp = a; // create local copy
+ tmp.mergePartial(b);
+ bool ok = EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString());
+ if (!ok) {
+ std::cerr << tmp.getRoot().asString() << std::endl << expect.asString() << std::endl;
+ }
+ return ok;
+}
+
+/**
+ * Merge the given grouping requests and verify that the resulting
+ * group tree matches the expected value.
+ **/
+bool
+Test::testMerge(const Grouping &a, const Grouping &b, const Grouping &c,
+ const Group &expect)
+{
+ Grouping tmp = a; // create local copy
+ Grouping tmpB = b; // create local copy
+ Grouping tmpC = c; // create local copy
+ tmp.merge(tmpB);
+ tmp.merge(tmpC);
+ tmp.postMerge();
+ tmp.sortById();
+ return EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString());
+}
+
+//-----------------------------------------------------------------------------
+
+/**
+ * Test collecting the sum of the values from a single attribute
+ * vector directly into the root node. Consider this a smoke test.
+ **/
+void
+Test::testAggregationSimple()
+{
+ AggregationContext ctx;
+ ctx.result().add(0).add(1).add(2);
+ ctx.add(IntAttrBuilder("int").add(3).add(7).add(15).sp());
+ ctx.add(FloatAttrBuilder("float").add(3).add(7).add(15).sp());
+ ctx.add(StringAttrBuilder("string").add("3").add("7").add("15").sp());
+
+ char strsum[3] = {-101, '5', 0};
+ testAggregationSimpleSum(ctx, SumAggregationResult(), Int64ResultNode(25), FloatResultNode(25), StringResultNode(strsum));
+ testAggregationSimpleSum(ctx, MinAggregationResult(), Int64ResultNode(3), FloatResultNode(3), StringResultNode("15"));
+ testAggregationSimpleSum(ctx, MaxAggregationResult(), Int64ResultNode(15), FloatResultNode(15), StringResultNode("7"));
+}
+
+void Test::testAggregationSimpleSum(AggregationContext & ctx, const AggregationResult & aggr, const ResultNode & ir, const ResultNode & fr, const ResultNode & sr)
+{
+ ExpressionNode::CP clone(aggr);
+ Grouping request = Grouping()
+ .setRoot(Group()
+ .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("int")))
+ .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("float")))
+ .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("string")))
+ );
+
+ Group expect = Group()
+ .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("int")).setResult(ir))
+ .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("float")).setResult(fr))
+ .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("string")).setResult(sr));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+}
+
+/**
+ * Verify that the backend aggregation will classify and collect on
+ * the appropriate levels, as indicated by the firstLevel and
+ * lastLevel parameters.
+ **/
+void
+Test::testAggregationLevels()
+{
+ AggregationContext ctx;
+ ctx.add(IntAttrBuilder("attr0").add(10).add(10).sp());
+ ctx.add(IntAttrBuilder("attr1").add(11).add(11).sp());
+ ctx.add(IntAttrBuilder("attr2").add(12).add(12).sp());
+ ctx.add(IntAttrBuilder("attr3").add(13).add(13).sp());
+ ctx.result().add(0).add(1);
+
+ Grouping baseRequest = Grouping()
+ .setRoot(Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr0"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr1"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr2"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr2"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr3"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr3"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr1"))));
+
+ Group notDone = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr0")));
+// Hmm, do not need to prepare more than the levels needed. .setResult(Int64ResultNode(0)));
+
+ Group done0 = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr0"))
+ .setResult(Int64ResultNode(20)))
+ .addChild(Group()
+ .setId(Int64ResultNode(11))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr2"))
+ .setResult(Int64ResultNode(0))));
+
+ Group done1 = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr0"))
+ .setResult(Int64ResultNode(20)))
+ .addChild(Group()
+ .setId(Int64ResultNode(11))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr2"))
+ .setResult(Int64ResultNode(24)))
+ .addChild(Group()
+ .setId(Int64ResultNode(12))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr3"))
+ .setResult(Int64ResultNode(0)))));
+
+ Group done2 = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr0"))
+ .setResult(Int64ResultNode(20)))
+ .addChild(Group()
+ .setId(Int64ResultNode(11))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr2"))
+ .setResult(Int64ResultNode(24)))
+ .addChild(Group()
+ .setId(Int64ResultNode(12))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr3"))
+ .setResult(Int64ResultNode(26)))
+ .addChild(Group()
+ .setId(Int64ResultNode(13))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr1"))
+ .setResult(Int64ResultNode(0))))));
+
+ Group done3 = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr0"))
+ .setResult(Int64ResultNode(20)))
+ .addChild(Group()
+ .setId(Int64ResultNode(11))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr2"))
+ .setResult(Int64ResultNode(24)))
+ .addChild(Group()
+ .setId(Int64ResultNode(12))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr3"))
+ .setResult(Int64ResultNode(26)))
+ .addChild(Group()
+ .setId(Int64ResultNode(13))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr1"))
+ .setResult(Int64ResultNode(22))))));
+
+ { // level 0 only
+ Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(0);
+ EXPECT_TRUE(testAggregation(ctx, request, done0));
+ }
+ { // level 0 and 1
+ Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(1);
+ EXPECT_TRUE(testAggregation(ctx, request, done1));
+ }
+ { // level 0,1 and 2
+ Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(2);
+ EXPECT_TRUE(testAggregation(ctx, request, done2));
+ }
+ { // level 0,1,2 and 3
+ Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(3);
+ EXPECT_TRUE(testAggregation(ctx, request, done3));
+ }
+ { // level 1 with level 0 as input
+ Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1).setRoot(done0);
+ EXPECT_TRUE(testAggregation(ctx, request, done1));
+ }
+ { // level 2 with level 0 and 1 as input
+ Grouping request = baseRequest.unchain().setFirstLevel(2).setLastLevel(2).setRoot(done1);
+ EXPECT_TRUE(testAggregation(ctx, request, done2));
+ }
+ { // level 3 with level 0,1 and 2 as input
+ Grouping request = baseRequest.unchain().setFirstLevel(3).setLastLevel(3).setRoot(done2);
+ EXPECT_TRUE(testAggregation(ctx, request, done3));
+ }
+ { // level 2 and 3 with level 0 and 1 as input
+ Grouping request = baseRequest.unchain().setFirstLevel(2).setLastLevel(3).setRoot(done1);
+ EXPECT_TRUE(testAggregation(ctx, request, done3));
+ }
+ { // level 1 without level 0 as input
+ Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1);
+ EXPECT_TRUE(testAggregation(ctx, request, notDone));
+ }
+}
+
+/**
+ * Verify that the aggregation step does not create more groups than
+ * indicated by the maxgroups parameter.
+ **/
+void
+Test::testAggregationMaxGroups()
+{
+ AggregationContext ctx;
+ ctx.add(IntAttrBuilder("attr").add(5).add(10).add(15).sp());
+ ctx.result().add(0).add(1).add(2);
+
+ Grouping baseRequest = Grouping()
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr")));
+
+ Group empty = Group();
+ Group grp1 = empty.unchain().addChild(Group().setId(Int64ResultNode(5)));
+ Group grp2 = grp1.unchain().addChild(Group().setId(Int64ResultNode(10)));
+ Group grp3 = grp2.unchain().addChild(Group().setId(Int64ResultNode(15)));
+
+ { // max 0 groups
+ Grouping request = baseRequest;
+ request.levels()[0].setMaxGroups(0);
+ EXPECT_TRUE(testAggregation(ctx, request, empty));
+ }
+ { // max 1 groups
+ Grouping request = baseRequest;
+ request.levels()[0].setMaxGroups(1);
+ EXPECT_TRUE(testAggregation(ctx, request, grp1));
+ }
+ { // max 2 groups
+ Grouping request = baseRequest;
+ request.levels()[0].setMaxGroups(2);
+ EXPECT_TRUE(testAggregation(ctx, request, grp2));
+ }
+ { // max 3 groups
+ Grouping request = baseRequest;
+ request.levels()[0].setMaxGroups(3);
+ EXPECT_TRUE(testAggregation(ctx, request, grp3));
+ }
+ { // max 4 groups
+ Grouping request = baseRequest;
+ request.levels()[0].setMaxGroups(4);
+ EXPECT_TRUE(testAggregation(ctx, request, grp3));
+ }
+ { // max -1 groups
+ Grouping request = baseRequest;
+ request.levels()[0].setMaxGroups(-1);
+ EXPECT_TRUE(testAggregation(ctx, request, grp3));
+ }
+}
+
+/**
+ * Verify that groups are sorted by group id
+ **/
+void
+Test::testAggregationGroupOrder()
+{
+ AggregationContext ctx;
+ ctx.add(IntAttrBuilder("attr").add(10).add(25).add(35).add(5).add(20).add(15).add(30).sp());
+ ctx.result().add(0).add(1).add(2).add(3).add(4).add(5).add(6);
+
+ Grouping request = Grouping()
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr")));
+
+ Group expect = Group()
+ .addChild(Group().setId(Int64ResultNode(5)))
+ .addChild(Group().setId(Int64ResultNode(10)))
+ .addChild(Group().setId(Int64ResultNode(15)))
+ .addChild(Group().setId(Int64ResultNode(20)))
+ .addChild(Group().setId(Int64ResultNode(25)))
+ .addChild(Group().setId(Int64ResultNode(30)))
+ .addChild(Group().setId(Int64ResultNode(35)));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+}
+
+/**
+ * Verify that groups are tagged with the appropriate rank value.
+ **/
+void
+Test::testAggregationGroupRank()
+{
+ AggregationContext ctx;
+ ctx.add(IntAttrBuilder("attr")
+ .add(1).add(1).add(1)
+ .add(2).add(2).add(2)
+ .add(3).add(3).add(3).sp());
+ ctx.result()
+ .add(0, 5).add(1, 10).add(2, 15)
+ .add(3, 10).add(4, 15).add(5, 5)
+ .add(6, 15).add(7, 5).add(8, 10);
+
+ Grouping request = Grouping().addLevel(
+ GroupingLevel().setExpression(AttributeNode("attr")));
+
+ Group expect = Group()
+ .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(15)))
+ .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(15)))
+ .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(15)));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+}
+
+void
+Test::testAggregationGroupCapping()
+{
+ AggregationContext ctx;
+ ctx.add(IntAttrBuilder("attr")
+ .add(1).add(2).add(3)
+ .add(4).add(5).add(6)
+ .add(7).add(8).add(9).sp());
+ ctx.result()
+ .add(0, 1).add(1, 2).add(2, 3)
+ .add(3, 4).add(4, 5).add(5, 6)
+ .add(6, 7).add(7, 8).add(8, 9);
+
+ {
+ Grouping request = Grouping().addLevel(
+ GroupingLevel().setExpression(AttributeNode("attr")));
+
+ Group expect = Group()
+ .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(1)))
+ .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(2)))
+ .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(3)))
+ .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(4)))
+ .addChild(Group().setId(Int64ResultNode(5)).setRank(RawRank(5)))
+ .addChild(Group().setId(Int64ResultNode(6)).setRank(RawRank(6)))
+ .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)))
+ .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)))
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+ }
+ {
+ Grouping request = Grouping().addLevel(
+ GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")));
+
+ Group expect = Group()
+ .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)))
+ .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)))
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+ }
+ {
+ Grouping request = Grouping().
+ setFirstLevel(0).
+ setLastLevel(1).
+ addLevel(
+ GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")).
+ addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))).
+ addOrderBy(AggregationRefNode(0), false));
+
+ Group expect = Group()
+ .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)).addAggregationResult(SumAggregationResult(Int64ResultNode(7)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false))
+ .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)).addAggregationResult(SumAggregationResult(Int64ResultNode(8)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false))
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)).addAggregationResult(SumAggregationResult(Int64ResultNode(9)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+ }
+ {
+ Grouping request = Grouping().
+ setFirstLevel(0).
+ setLastLevel(1).
+ addLevel(
+ GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")).
+ addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true));
+
+ Group expect = Group()
+ .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(1)).addAggregationResult(SumAggregationResult(Int64ResultNode(1)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true))
+ .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(2)).addAggregationResult(SumAggregationResult(Int64ResultNode(2)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true))
+ .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(3)).addAggregationResult(SumAggregationResult(Int64ResultNode(3)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+ }
+ {
+ AddFunctionNode *add = new AddFunctionNode();
+ add->addArg(AggregationRefNode(0));
+ add->appendArg(ConstantNode(Int64ResultNode(3)));
+ ExpressionNode::CP i1(add);
+ Grouping request = Grouping().
+ setFirstLevel(0).
+ setLastLevel(1).
+ addLevel(
+ GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")).
+ addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))).
+ addOrderBy(i1, false));
+
+ Group expect = Group()
+ .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)).addAggregationResult(SumAggregationResult(Int64ResultNode(7)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(10)), false))
+ .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)).addAggregationResult(SumAggregationResult(Int64ResultNode(8)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(11)), false))
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)).addAggregationResult(SumAggregationResult(Int64ResultNode(9)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(12)), false));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+ }
+
+}
+
+//-----------------------------------------------------------------------------
+
+/**
+ * Test merging the sum of the values from a single attribute vector
+ * that was collected directly into the root node. Consider this a
+ * smoke test.
+ **/
+void
+Test::testMergeSimpleSum()
+{
+ Grouping a = Grouping()
+ .setRoot(Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("foo"))
+ .setResult(Int64ResultNode(20))));
+
+ Grouping b = Grouping()
+ .setRoot(Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("foo"))
+ .setResult(Int64ResultNode(30))));
+
+ Group expect = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("foo"))
+ .setResult(Int64ResultNode(50)));
+
+ EXPECT_TRUE(testMerge(a, b, expect));
+}
+
+/**
+ * Verify that frozen levels are not touched during merge.
+ **/
+void
+Test::testMergeLevels()
+{
+ Grouping request = Grouping()
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("c1"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("c2"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("c3"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))));
+
+ Group a = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(5)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(15)))
+ .addChild(Group()
+ .setId(Int64ResultNode(30))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(20))))));
+
+ Group b = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(5)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(15)))
+ .addChild(Group()
+ .setId(Int64ResultNode(30))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(20))))));
+
+ Group expect_all = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(20)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(30)))
+ .addChild(Group()
+ .setId(Int64ResultNode(30))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(40))))));
+
+ Group expect_0 = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(5)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(20)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(30)))
+ .addChild(Group()
+ .setId(Int64ResultNode(30))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(40))))));
+
+
+ Group expect_1 = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(5)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(30)))
+ .addChild(Group()
+ .setId(Int64ResultNode(30))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(40))))));
+
+
+ Group expect_2 = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(5)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(15)))
+ .addChild(Group()
+ .setId(Int64ResultNode(30))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(40))))));
+
+
+ Group expect_3 = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(5)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(15)))
+ .addChild(Group()
+ .setId(Int64ResultNode(30))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(20))))));
+
+ EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(a),
+ request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(b),
+ expect_all));
+ EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(1).setLastLevel(3).setRoot(a),
+ request.unchain().setFirstLevel(1).setLastLevel(3).setRoot(b),
+ expect_0));
+ EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(2).setLastLevel(5).setRoot(a),
+ request.unchain().setFirstLevel(2).setLastLevel(5).setRoot(b),
+ expect_1));
+ EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(3).setLastLevel(5).setRoot(a),
+ request.unchain().setFirstLevel(3).setLastLevel(5).setRoot(b),
+ expect_2));
+ EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(4).setLastLevel(4).setRoot(a),
+ request.unchain().setFirstLevel(4).setLastLevel(4).setRoot(b),
+ expect_3));
+}
+
+/**
+ * Verify that the number of groups for a level is pruned down to
+ * maxGroups, that the remaining groups are the highest ranked ones,
+ * and that they are sorted by group id.
+ **/
+void
+Test::testMergeGroups()
+{
+ Grouping request = Grouping()
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr")));
+
+ Group a = Group()
+ .setId(NullResultNode())
+ .addChild(Group().setId(StringResultNode("05")).setRank(RawRank(5)))
+ .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(5))) // (2)
+ .addChild(Group().setId(StringResultNode("15")).setRank(RawRank(15)))
+ .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) // 1
+ .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); // 3
+
+ Group b = Group()
+ .setId(NullResultNode())
+ .addChild(Group().setId(StringResultNode("00")).setRank(RawRank(10)))
+ .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) // 2
+ .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25))) // 4
+ .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(10))) // (1)
+ .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20))); // 5
+
+ Group expect_3 = Group()
+ .setId(NullResultNode())
+ .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50)))
+ .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100)))
+ .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30)));
+
+ Group expect_5 = Group()
+ .setId(NullResultNode())
+ .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50)))
+ .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25)))
+ .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100)))
+ .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20)))
+ .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30)));
+
+ Group expect_all = Group()
+ .setId(NullResultNode())
+ .addChild(Group().setId(StringResultNode("00")).setRank(RawRank(10)))
+ .addChild(Group().setId(StringResultNode("05")).setRank(RawRank( 5)))
+ .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50)))
+ .addChild(Group().setId(StringResultNode("15")).setRank(RawRank(15)))
+ .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25)))
+ .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100)))
+ .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20)))
+ .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30)));
+
+ request.levels()[0].setMaxGroups(3);
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_3));
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_3));
+ request.levels()[0].setMaxGroups(5);
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_5));
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_5));
+ request.levels()[0].setMaxGroups(-1);
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_all));
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_all));
+}
+
+/**
+ * Merge two relatively complex tree structures and verify that the
+ * end result is as expected.
+ **/
+void
+Test::testMergeTrees()
+{
+ Grouping request = Grouping()
+ .addLevel(GroupingLevel()
+ .setMaxGroups(3)
+ .setExpression(AttributeNode("c1"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))))
+ .addLevel(GroupingLevel()
+ .setMaxGroups(2)
+ .setExpression(AttributeNode("c2"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))))
+ .addLevel(GroupingLevel()
+ .setMaxGroups(1)
+ .setExpression(AttributeNode("c3"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))));
+
+ Group a = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .setRank(RawRank(5)) // merged with 200 rank node
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .setRank(RawRank(500))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .setRank(RawRank(200))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ )
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .setRank(RawRank(100))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ // dummy child would be picked up here
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(200))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(300))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ )
+ .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(300))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .setRank(RawRank(100))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ )
+ );
+
+ Group b = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .setRank(RawRank(200))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .setRank(RawRank(400))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .setRank(RawRank(100))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ )
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .setRank(RawRank(100))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ // dummy child would be picket up here
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(200))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(5)) // merged with 300 rank node
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .setRank(RawRank(5)) // merged with 100 rank node
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .setRank(RawRank(500))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ .addChild(Group().setId(Int64ResultNode(24)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(25))
+ .setRank(RawRank(300))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(24)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(25))
+ .setRank(RawRank(400))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ );
+
+ Group expect = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .setRank(RawRank(200))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .setRank(RawRank(500))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .setRank(RawRank(200))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .setRank(RawRank(400))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .setRank(RawRank(100))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .setRank(RawRank(100))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(200))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(300))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(300))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .setRank(RawRank(100))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .setRank(RawRank(500))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(25))
+ .setRank(RawRank(300))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group()
+ .setId(Int64ResultNode(25))
+ .setRank(RawRank(400))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ );
+
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect));
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect));
+}
+
+void
+Test::testPruneComplex()
+{
+ { // First level
+ Group baseTree = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar000")))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002"))))
+ .addChild(Group().setId(StringResultNode("bar01"))))
+ .addChild(Group().setId(StringResultNode("baz0"))
+ .addChild(Group().setId(StringResultNode("baz00"))
+ .addChild(Group().setId(StringResultNode("baz000")))
+ .addChild(Group().setId(StringResultNode("baz001")))))
+ .addChild(Group().setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo00")))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+
+ Group prune = Group()
+ .addChild(Group().setId(StringResultNode("bar0")))
+ .addChild(Group().setId(StringResultNode("foo0")));
+
+ Group expect = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar000")))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002"))))
+ .addChild(Group().setId(StringResultNode("bar01"))))
+ .addChild(Group().setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo00")))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+ Grouping request = Grouping().setFirstLevel(1).setLastLevel(1);
+ Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3);
+ EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect));
+ }
+ { // Second level
+ Group baseTree = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar000")))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002"))))
+ .addChild(Group().setId(StringResultNode("bar01"))))
+ .addChild(Group().setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo00")))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+
+ Group prune = Group()
+ .addChild(Group()
+ .setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))))
+ .addChild(Group()
+ .setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+
+ Group expect = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar000")))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002")))))
+ .addChild(Group().setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+
+ Grouping request = Grouping().setFirstLevel(2).setLastLevel(2);
+ Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3);
+ EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect));
+ }
+ { // Third level
+ Group baseTree = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar000")))
+
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002")))))
+ .addChild(Group().setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+ Group prune = Group()
+ .addChild(Group()
+ .setId(StringResultNode("bar0"))
+ .addChild(Group()
+ .setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002")))));
+
+ Group expect = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002")))));
+ Grouping request = Grouping().setFirstLevel(3).setLastLevel(3);
+ Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3);
+ EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect));
+ }
+ { // Try pruning a grouping we don't have
+ Group baseTree = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar000")))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002"))))
+ .addChild(Group().setId(StringResultNode("bar01"))))
+ .addChild(Group().setId(StringResultNode("baz0"))
+ .addChild(Group().setId(StringResultNode("baz00"))
+ .addChild(Group().setId(StringResultNode("baz000")))
+ .addChild(Group().setId(StringResultNode("baz001")))))
+ .addChild(Group().setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo00")))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+
+ Group prune = Group()
+ .addChild(Group().setId(StringResultNode("bar0")))
+ .addChild(Group().setId(StringResultNode("boz0")))
+ .addChild(Group().setId(StringResultNode("foo0")))
+ .addChild(Group().setId(StringResultNode("goo0")));
+
+ Group expect = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar000")))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002"))))
+ .addChild(Group().setId(StringResultNode("bar01"))))
+ .addChild(Group().setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo00")))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+ Grouping request = Grouping().setFirstLevel(1).setLastLevel(1);
+ Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3);
+ EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect));
+ }
+}
+
+/**
+ * Test partial merge of a grouping tree, where all levels up to "lastLevel" is
+ * merged. The last level should not contain any children groups, and only empty
+ * results.
+ **/
+void
+Test::testPartialMerging()
+{
+ Grouping baseRequest = Grouping()
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("c1"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("c2"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("c3"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))));
+
+ // Cached result
+ Group cached = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(110)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(13))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group()
+ .setId(Int64ResultNode(14))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group()
+ .setId(Int64ResultNode(22))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ );
+
+
+ { // Merge lastlevel 0
+ Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(0);
+ Group incoming = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(0)));
+
+ Group expected = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(110)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ );
+ EXPECT_TRUE(testPartialMerge(request.unchain().setRoot(incoming), request.unchain().setLastLevel(3).setRoot(cached), expected));
+ }
+ {
+ // Merge existing tree. Assume we got modified data down again.
+ Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1);
+ Group incoming = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(3))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(7))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0))))
+ .addChild(Group()
+ .setId(Int64ResultNode(33))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ );
+ Group expected = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(3))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(13))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(0)))
+ )
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(7))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(0)))
+ )
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(33))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ );
+ EXPECT_TRUE(testPartialMerge(request.unchain().setRoot(incoming), request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(cached), expected));
+ }
+}
+
+/**
+ * Test that pruning a simple grouping tree works.
+ **/
+void
+Test::testPruneSimple()
+{
+ {
+ Grouping request = Grouping()
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr")))
+ .setFirstLevel(1)
+ .setLastLevel(1);
+
+ Group a = Group()
+ .addChild(Group().setId(StringResultNode("foo")))
+ .addChild(Group().setId(StringResultNode("bar")))
+ .addChild(Group().setId(StringResultNode("baz")));
+
+ Group b = Group()
+ .addChild(Group().setId(StringResultNode("foo")));
+
+ Group expect = Group()
+ .addChild(Group().setId(StringResultNode("foo")));
+
+ EXPECT_TRUE(testPrune(request.unchain().setFirstLevel(0).setRoot(a), request.unchain().setRoot(b), expect));
+ }
+}
+
+/**
+ * Test that simple counting works as long as we use an expression
+ * that we init, calculate and ignore.
+ **/
+void
+Test::testTopN()
+{
+ AggregationContext ctx;
+ ctx.result().add(0).add(1).add(2);
+ ctx.add(IntAttrBuilder("foo").add(3).add(7).add(15).sp());
+
+ Grouping request = Grouping()
+ .setRoot(Group()
+ .addResult(CountAggregationResult()
+ .setExpression(ConstantNode(Int64ResultNode(0)))
+ )
+ );
+ {
+ Group expect = Group()
+ .addResult(CountAggregationResult().setCount(3)
+ .setExpression(ConstantNode(Int64ResultNode(0)))
+ );
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+ }
+ {
+ Group expect = Group()
+ .addResult(CountAggregationResult().setCount(1)
+ .setExpression(ConstantNode(Int64ResultNode(0)))
+ );
+
+ EXPECT_TRUE(testAggregation(ctx, request.setTopN(1), expect));
+ }
+ {
+ Grouping request2 = Grouping()
+ .addLevel(GroupingLevel()
+ .addAggregationResult(SumAggregationResult())
+ .addOrderBy(AggregationRefNode(0), false));
+ EXPECT_TRUE(request2.needResort());
+ request2.setTopN(0);
+ EXPECT_TRUE(request2.needResort());
+ request2.setTopN(1);
+ EXPECT_TRUE(!request2.needResort());
+ request2.setTopN(100);
+ EXPECT_TRUE(!request2.needResort());
+ }
+}
+
+/**
+ * Test that simple counting works as long as we use an expression
+ * that we init, calculate and ignore.
+ **/
+void
+Test::testCount()
+{
+ AggregationContext ctx;
+ ctx.result().add(0).add(1).add(2);
+ ctx.add(IntAttrBuilder("foo").add(3).add(7).add(15).sp());
+
+ Grouping request = Grouping()
+ .setRoot(Group()
+ .addResult(CountAggregationResult()
+ .setExpression(ConstantNode(Int64ResultNode(0)))
+ )
+ );
+
+ Group expect = Group()
+ .addResult(CountAggregationResult().setCount(3)
+ .setExpression(ConstantNode(Int64ResultNode(0)))
+ );
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+}
+
+//-----------------------------------------------------------------------------
+
+bool
+Test::checkHits(const Grouping &g, uint32_t first, uint32_t last, uint32_t cnt)
+{
+ CountFS4Hits pop;
+ Grouping tmp = g;
+ tmp.setFirstLevel(first).setLastLevel(last).select(pop, pop);
+ return EXPECT_EQUAL(pop.getHitCount(), cnt);
+}
+
+void
+Test::testFS4HitCollection()
+{
+ { // aggregation
+ AggregationContext ctx;
+ ctx.result().add(30, 30.0).add(20, 20.0).add(10, 10.0).add(5, 5.0).add(25, 25.0);
+
+ Grouping request = Grouping()
+ .setRoot(Group()
+ .addResult(HitsAggregationResult()
+ .setMaxHits(3)
+ .setExpression(ConstantNode(Int64ResultNode(0))))
+ );
+
+ Group expect = Group()
+ .addResult(HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(FS4Hit(30, 30.0))
+ .addHit(FS4Hit(25, 25.0))
+ .addHit(FS4Hit(20, 20.0))
+ .sort()
+ .setExpression(ConstantNode(Int64ResultNode(0))));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+ }
+ { // merging
+
+ Grouping request = Grouping()
+ .setRoot(Group()
+ .addResult(HitsAggregationResult()
+ .setMaxHits(3)
+ .setExpression(ConstantNode(Int64ResultNode(0))))
+ );
+
+ Group expect = Group()
+ .setId(NullResultNode())
+ .addResult(HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(FS4Hit(30, 30.0))
+ .addHit(FS4Hit(20, 20.0))
+ .addHit(FS4Hit(10, 10.0))
+ .sort()
+ .setExpression(ConstantNode(Int64ResultNode(0))));
+
+ Group a = Group()
+ .setId(NullResultNode())
+ .addResult(HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(FS4Hit(10, 10.0))
+ .addHit(FS4Hit(1, 5.0))
+ .addHit(FS4Hit(2, 4.0))
+ .sort()
+ .setExpression(ConstantNode(Int64ResultNode(0))));
+
+ Group b = Group()
+ .setId(NullResultNode())
+ .addResult(HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(FS4Hit(20, 20.0))
+ .addHit(FS4Hit(3, 7.0))
+ .addHit(FS4Hit(4, 6.0))
+ .sort()
+ .setExpression(ConstantNode(Int64ResultNode(0))));
+
+ Group c = Group()
+ .setId(NullResultNode())
+ .addResult(HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(FS4Hit(30, 30.0))
+ .addHit(FS4Hit(5, 9.0))
+ .addHit(FS4Hit(6, 8.0))
+ .sort()
+ .setExpression(ConstantNode(Int64ResultNode(0))));
+
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), request.unchain().setRoot(c), expect));
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(c), request.unchain().setRoot(a), expect));
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(c), request.unchain().setRoot(a), request.unchain().setRoot(b), expect));
+ }
+ { // count hits (for external object selection)
+ HitsAggregationResult dummyHits = HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(FS4Hit(1, 3.0))
+ .addHit(FS4Hit(2, 2.0))
+ .addHit(FS4Hit(3, 1.0))
+ .sort();
+ Grouping g = Grouping().setRoot(Group().addResult(dummyHits)
+ .addChild(Group().addResult(dummyHits)
+ .addChild(Group().addResult(dummyHits))
+ )
+ .addChild(Group().addResult(dummyHits)
+ .addChild(Group().addResult(dummyHits)
+ .addChild(Group().addResult(dummyHits))
+ )
+ )
+ );
+ EXPECT_TRUE(checkHits(g, 0, 0, 3));
+ EXPECT_TRUE(checkHits(g, 1, 1, 6));
+ EXPECT_TRUE(checkHits(g, 2, 2, 6));
+ EXPECT_TRUE(checkHits(g, 3, 3, 3));
+ EXPECT_TRUE(checkHits(g, 4, 4, 0));
+
+ EXPECT_TRUE(checkHits(g, 0, 1, 9));
+ EXPECT_TRUE(checkHits(g, 0, 2, 15));
+ EXPECT_TRUE(checkHits(g, 0, 3, 18));
+ EXPECT_TRUE(checkHits(g, 0, 4, 18));
+ EXPECT_TRUE(checkHits(g, 1, 4, 15));
+ EXPECT_TRUE(checkHits(g, 2, 4, 9));
+ EXPECT_TRUE(checkHits(g, 3, 4, 3));
+
+ EXPECT_TRUE(checkHits(g, 1, 2, 12));
+ EXPECT_TRUE(checkHits(g, 2, 3, 9));
+ EXPECT_TRUE(checkHits(g, 3, 4, 3));
+ EXPECT_TRUE(checkHits(g, 4, 5, 0));
+ }
+}
+
+bool
+Test::checkBucket(const NumericResultNode &width, const NumericResultNode &value, const BucketResultNode &bucket)
+{
+ AggregationContext ctx;
+ ctx.result().add(0);
+ if (value.getClass().inherits(IntegerResultNode::classId)) {
+ ctx.add(IntAttrBuilder("attr").add(value.getInteger()).sp());
+ } else if (value.getClass().inherits(FloatResultNode::classId)) {
+ ctx.add(FloatAttrBuilder("attr").add(value.getFloat()).sp());
+ } else {
+ return EXPECT_TRUE(false);
+ }
+ Grouping request = Grouping()
+ .addLevel(GroupingLevel()
+ .setExpression(FixedWidthBucketFunctionNode(AttributeNode("attr")).setWidth(width)));
+ Group expect = Group().addChild(Group().setId(bucket));
+ return testAggregation(ctx, request, expect);
+}
+
+void
+Test::testFixedWidthBuckets()
+{
+ typedef Int64ResultNode Int;
+ typedef FloatResultNode Float;
+ typedef IntegerBucketResultNode IntBucket;
+ typedef FloatBucketResultNode FloatBucket;
+
+ // positive int buckets
+ EXPECT_TRUE(checkBucket(Int(10), Int(0), IntBucket(0,10)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(5), IntBucket(0,10)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(9), IntBucket(0,10)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(10), IntBucket(10,20)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(299), IntBucket(290,300)));
+
+ // negative int buckets
+ EXPECT_TRUE(checkBucket(Int(10), Int(-1), IntBucket(-10,0)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(-5), IntBucket(-10,0)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(-10), IntBucket(-10,0)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(-11), IntBucket(-20,-10)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(-300), IntBucket(-300,-290)));
+
+ // positive float buckets
+ EXPECT_TRUE(checkBucket(Int(10), Float(0.0), FloatBucket(0.0,10.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(5.0), FloatBucket(0.0,10.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(9.0), FloatBucket(0.0,10.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(10.0), FloatBucket(10.0,20.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(299.0), FloatBucket(290.0,300.0)));
+
+ // negative float buckets
+ EXPECT_TRUE(checkBucket(Int(10), Float(-1), FloatBucket(-10.0,0.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(-5), FloatBucket(-10.0,0.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(-10), FloatBucket(-10.0,0.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(-10.0000001), FloatBucket(-20.0,-10.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(-300), FloatBucket(-300.0,-290.0)));
+
+ // non-integer bucket width
+ EXPECT_TRUE(checkBucket(Float(0.5), Float(0.0), FloatBucket(0.0,0.5)));
+ EXPECT_TRUE(checkBucket(Float(0.5), Float(0.5), FloatBucket(0.5,1.0)));
+ EXPECT_TRUE(checkBucket(Float(0.5), Float(0.4999), FloatBucket(0.0,0.5)));
+ EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.0001), FloatBucket(-0.5,0.0)));
+ EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.5), FloatBucket(-0.5,0.0)));
+ EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.50001), FloatBucket(-1.0,-0.5)));
+
+ // zero-width buckets
+ EXPECT_TRUE(checkBucket(Int(0), Int(7), IntBucket(7,7)));
+ EXPECT_TRUE(checkBucket(Int(0), Float(7.5), FloatBucket(7.5,7.5)));
+
+ // bucket wrap protection
+ {
+ int64_t x = std::numeric_limits<int64_t>::min();
+ int64_t y = std::numeric_limits<int64_t>::max();
+ EXPECT_TRUE(checkBucket(Int(1000), Int(x + 5), IntBucket(x, (x/1000) * 1000)));
+ EXPECT_TRUE(checkBucket(Int(1000), Int(y - 5), IntBucket((y/1000) * 1000, y)));
+ }
+}
+
+
+void
+Test::testNanSorting()
+{
+ // Attempt at reproducing issue with segfault when setting NaN value. Not
+ // successful yet, so no point in running test.
+#if 0
+ double nan = sqrt(-1);
+ EXPECT_TRUE(isnan(nan));
+ EXPECT_TRUE(nan != nan);
+ EXPECT_FALSE(nan < nan);
+ EXPECT_FALSE(nan > nan);
+ EXPECT_FALSE(nan < 0.2);
+ EXPECT_FALSE(nan > 0.2);
+ EXPECT_FALSE(0.2 < nan);
+ EXPECT_FALSE(0.2 > nan);
+
+ FastOS_Time timer;
+ timer.SetNow();
+ std::vector<double> groups;
+ while (timer.MilliSecsToNow() < 60000.0) {
+ std::vector<double> vec;
+ srand((unsigned int)timer.MilliSecs());
+ size_t limit = 2345678;
+ size_t mod = rand() % limit;
+ for (size_t i = 0; i < limit; i++) {
+ if ((i % mod) == 0)
+ vec.push_back(nan);
+ else
+ vec.push_back(1.0 * rand());
+ }
+ }
+ std::sort(groups.begin(), groups.end());
+#endif
+}
+
+void
+Test::testThatNanIsConverted()
+{
+ Group g;
+ double nan = sqrt(-1);
+ g.setRank(nan);
+ // Must have been changed for this to work.
+ ASSERT_EQUAL(g.getRank(), g.getRank());
+}
+
+//-----------------------------------------------------------------------------
+
+struct RunDiff { ~RunDiff() { system("diff -u lhs.out rhs.out > diff.txt"); }};
+
+//-----------------------------------------------------------------------------
+
+int
+Test::Main()
+{
+ RunDiff runDiff;
+ (void) runDiff;
+ TEST_DEBUG("lhs.out", "rhs.out");
+ TEST_INIT("grouping_test");
+ testAggregationSimple();
+ testAggregationLevels();
+ testAggregationMaxGroups();
+ testAggregationGroupOrder();
+ testAggregationGroupRank();
+ testAggregationGroupCapping();
+ testMergeSimpleSum();
+ testMergeLevels();
+ testMergeGroups();
+ testMergeTrees();
+ testPruneSimple();
+ testPruneComplex();
+ testPartialMerging();
+ testFS4HitCollection();
+ testFixedWidthBuckets();
+ testCount();
+ testTopN();
+ testThatNanIsConverted();
+ testNanSorting();
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/grouping/hyperloglog_test.cpp b/searchlib/src/tests/grouping/hyperloglog_test.cpp
new file mode 100644
index 00000000000..15b4ae9ae39
--- /dev/null
+++ b/searchlib/src/tests/grouping/hyperloglog_test.cpp
@@ -0,0 +1,92 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for hyperloglog.
+
+#include <vespa/log/log.h>
+LOG_SETUP("hyperloglog_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/grouping/hyperloglog.h>
+#include <vespa/vespalib/objects/nboserializer.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using vespalib::NBOSerializer;
+using vespalib::nbostream;
+using namespace search;
+
+namespace {
+
+TEST("require that hyperloglog changes from sparse to normal sketch") {
+ HyperLogLog<> hll;
+ for (size_t i = 0; i < 256; ++i) {
+ EXPECT_TRUE(dynamic_cast<const SparseSketch<> *>(&hll.getSketch()));
+ EXPECT_EQUAL(1, hll.aggregate(i));
+ }
+ EXPECT_TRUE(dynamic_cast<const SparseSketch<> *>(&hll.getSketch()));
+ EXPECT_EQUAL(23, hll.aggregate(256));
+ EXPECT_TRUE(dynamic_cast<const NormalSketch<> *>(&hll.getSketch()));
+}
+
+TEST("require that hyperloglog can be (de)serialized") {
+ HyperLogLog<> hll;
+ for (size_t i = 0; i < 256; ++i) {
+ EXPECT_EQUAL(1, hll.aggregate(i));
+ }
+ nbostream stream;
+ NBOSerializer serializer(stream);
+
+ // Serializes with sparse sketch
+ hll.serialize(serializer);
+ HyperLogLog<> hll2;
+ hll2.deserialize(serializer);
+ EXPECT_TRUE(dynamic_cast<const SparseSketch<> *>(&hll2.getSketch()));
+ EXPECT_EQUAL(hll.getSketch(), hll2.getSketch());
+
+ // Serializes with normal sketch.
+ EXPECT_EQUAL(23, hll2.aggregate(256));
+ hll2.serialize(serializer);
+ hll.deserialize(serializer);
+ EXPECT_TRUE(dynamic_cast<const NormalSketch<> *>(&hll.getSketch()));
+ EXPECT_EQUAL(hll2.getSketch(), hll.getSketch());
+}
+
+TEST("require that sparse hyperloglogs can be merged") {
+ HyperLogLog<> hll;
+ for (size_t i = 0; i < 100; ++i) {
+ EXPECT_EQUAL(1, hll.aggregate(i));
+ }
+ HyperLogLog<> hll2;
+ for (size_t i = 100; i < 255; ++i) {
+ EXPECT_EQUAL(1, hll2.aggregate(i));
+ }
+ hll.merge(hll2);
+ EXPECT_TRUE(dynamic_cast<const SparseSketch<> *>(&hll.getSketch()));
+
+ EXPECT_EQUAL(1, hll2.aggregate(255));
+ hll.merge(hll2);
+ EXPECT_TRUE(dynamic_cast<const NormalSketch<> *>(&hll.getSketch()));
+}
+
+TEST("require that mixed hyperloglogs can be merged") {
+ HyperLogLog<> hll;
+ for (size_t i = 0; i < 256; ++i) {
+ EXPECT_EQUAL(1, hll.aggregate(i));
+ }
+ EXPECT_EQUAL(23, hll.aggregate(256)); // normal
+ HyperLogLog<> hll2;
+ for (size_t i = 100; i < 255; ++i) {
+ EXPECT_EQUAL(1, hll2.aggregate(i)); // sparse
+ }
+ hll.merge(hll2); // normal + sparse
+ hll2.merge(hll); // sparse + normal
+ EXPECT_EQUAL(hll.getSketch(), hll2.getSketch());
+
+ EXPECT_EQUAL(23, hll2.aggregate(500));
+ hll.merge(hll2); // normal + normal
+ EXPECT_EQUAL(hll.getSketch(), hll2.getSketch());
+ EXPECT_EQUAL(0, hll.aggregate(500));
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/grouping/sketch_test.cpp b/searchlib/src/tests/grouping/sketch_test.cpp
new file mode 100644
index 00000000000..c6c0b144983
--- /dev/null
+++ b/searchlib/src/tests/grouping/sketch_test.cpp
@@ -0,0 +1,151 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for sketch.
+
+#include <vespa/log/log.h>
+LOG_SETUP("sketch_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/grouping/sketch.h>
+#include <vespa/vespalib/objects/nboserializer.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+using vespalib::NBOSerializer;
+using vespalib::nbostream;
+using namespace search;
+using vespalib::make_string;
+
+namespace {
+
+TEST("require that normal sketch is initialized") {
+ NormalSketch<> sketch;
+ for (size_t i = 0; i < sketch.BUCKET_COUNT; ++i) {
+ EXPECT_EQUAL(0, sketch.bucket[i]);
+ }
+}
+
+template <typename NormalSketch>
+void checkBucketValue(NormalSketch &sketch, size_t bucket, uint32_t value) {
+ EXPECT_EQUAL(value, static_cast<size_t>(sketch.bucket[bucket]));
+}
+
+template <int BucketBits, typename HashT>
+void checkCountPrefixZeros() {
+ TEST_STATE(make_string("BucketBits: %d, HashBits: %d",
+ BucketBits, int(sizeof(HashT) * 8)).c_str());
+ NormalSketch<BucketBits, HashT> sketch;
+ const uint32_t prefix_bits = sizeof(HashT) * 8 - BucketBits;
+ const uint32_t hash_width = sizeof(HashT) * 8;
+ for (size_t i = 0; i < prefix_bits ; ++i) {
+ int increase = sketch.aggregate(HashT(1) << (hash_width - 1 - i));
+ EXPECT_EQUAL(1, increase); // bucket increases by 1 for each call
+ checkBucketValue(sketch, 0, i + 1);
+ }
+ sketch.aggregate(0);
+ checkBucketValue(sketch, prefix_bits + 1, 0);
+
+ checkBucketValue(sketch, HashT(1) << (BucketBits - 1), 0);
+ sketch.aggregate(HashT(1) << (hash_width - 1 - prefix_bits));
+ checkBucketValue(sketch, 0, prefix_bits + 1);
+ checkBucketValue(sketch, HashT(1) << (BucketBits - 1), prefix_bits + 1);
+}
+
+TEST("require that prefix zeros are counted.") {
+ checkCountPrefixZeros<10, uint32_t>();
+ checkCountPrefixZeros<12, uint32_t>();
+ checkCountPrefixZeros<10, uint64_t>();
+ checkCountPrefixZeros<12, uint64_t>();
+}
+
+TEST("require that aggregate returns bucket increase") {
+ NormalSketch<> sketch;
+ int increase = sketch.aggregate(-1);
+ EXPECT_EQUAL(1, increase);
+ increase = sketch.aggregate(1023);
+ EXPECT_EQUAL(22, increase);
+ increase = sketch.aggregate(0);
+ EXPECT_EQUAL(23, increase);
+}
+
+TEST("require that instances can be merged.") {
+ NormalSketch<> sketch;
+ sketch.aggregate(0);
+ NormalSketch<> sketch2;
+ sketch2.aggregate(-1);
+ sketch.merge(sketch2);
+ checkBucketValue(sketch, 0, 23);
+ checkBucketValue(sketch, 1023, 1);
+}
+
+TEST("require that different sketch type instances can be merged.") {
+ NormalSketch<> sketch;
+ sketch.aggregate(0);
+ SparseSketch<> sketch2;
+ sketch2.aggregate(-1);
+ sketch.merge(sketch2);
+ checkBucketValue(sketch, 0, 23);
+ checkBucketValue(sketch, 1023, 1);
+}
+
+TEST("require that normal sketch can be (de)serialized") {
+ NormalSketch<> sketch;
+ for (size_t i = 0; i < sketch.BUCKET_COUNT; ++i) {
+ sketch.aggregate(i | (1 << ((i % sketch.bucketBits) +
+ sketch.bucketBits)));
+ }
+ nbostream stream;
+ NBOSerializer serializer(stream);
+ sketch.serialize(serializer);
+ EXPECT_EQUAL(31u, stream.size());
+ uint32_t val;
+ stream >> val;
+ EXPECT_TRUE(sketch.BUCKET_COUNT == val);
+ stream >> val;
+ EXPECT_EQUAL(23u, val);
+ stream.adjustReadPos(-2 * sizeof(uint32_t));
+ NormalSketch<> sketch2;
+ sketch2.deserialize(serializer);
+ EXPECT_EQUAL(sketch, sketch2);
+}
+
+TEST("require that uncompressed data in normal sketch can be deserialized") {
+ NormalSketch<> sketch;
+ nbostream stream;
+ NBOSerializer serializer(stream);
+ stream << sketch.BUCKET_COUNT;
+ stream << sketch.BUCKET_COUNT;
+ const int hash_bits = sizeof(NormalSketch<>::hash_type) * 8;
+ const int value_bits = hash_bits - sketch.bucketBits;
+ for (size_t i = 0; i < sketch.BUCKET_COUNT; ++i) {
+ char bucket_val = (i % value_bits) + 1;
+ stream << bucket_val;
+ sketch.aggregate(i | (1 << (hash_bits - bucket_val)));
+ }
+ NormalSketch<> sketch2;
+ sketch2.deserialize(serializer);
+ EXPECT_EQUAL(sketch, sketch2);
+}
+
+TEST("require that sparse sketch can be (de)serialized") {
+ SparseSketch<> sketch;
+ const uint32_t hash_count = 10;
+ for (size_t hash = 0; hash < hash_count; ++hash) {
+ sketch.aggregate(hash);
+ }
+ nbostream stream;
+ NBOSerializer serializer(stream);
+ sketch.serialize(serializer);
+ EXPECT_EQUAL(4 * hash_count + 4u, stream.size());
+ uint32_t val;
+ stream >> val;
+ EXPECT_EQUAL(hash_count, val);
+ stream.adjustReadPos(-1 * sizeof(uint32_t));
+ SparseSketch<> sketch2;
+ sketch2.deserialize(serializer);
+ EXPECT_EQUAL(sketch, sketch2);
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/groupingengine/.gitignore b/searchlib/src/tests/groupingengine/.gitignore
new file mode 100644
index 00000000000..1eb2fc1fb29
--- /dev/null
+++ b/searchlib/src/tests/groupingengine/.gitignore
@@ -0,0 +1,7 @@
+/lhs.out
+/rhs.out
+/diff.txt
+/groupingengine_benchmark
+/vgcore.*
+searchlib_groupingengine_test_app
+searchlib_groupingengine_benchmark_app
diff --git a/searchlib/src/tests/groupingengine/CMakeLists.txt b/searchlib/src/tests/groupingengine/CMakeLists.txt
new file mode 100644
index 00000000000..74f4574a9a4
--- /dev/null
+++ b/searchlib/src/tests/groupingengine/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_groupingengine_test_app
+ SOURCES
+ groupingengine_test.cpp
+ DEPENDS
+ searchlib
+)
+#vespa_add_test(NAME searchlib_groupingengine_test_app COMMAND searchlib_groupingengine_test_app)
+vespa_add_executable(searchlib_groupingengine_benchmark_app
+ SOURCES
+ groupingengine_benchmark.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_groupingengine_benchmark_app COMMAND searchlib_groupingengine_benchmark_app BENCHMARK)
diff --git a/searchlib/src/tests/groupingengine/DESC b/searchlib/src/tests/groupingengine/DESC
new file mode 100644
index 00000000000..1aa6cb37e89
--- /dev/null
+++ b/searchlib/src/tests/groupingengine/DESC
@@ -0,0 +1 @@
+grouping test. Take a look at grouping.cpp for details.
diff --git a/searchlib/src/tests/groupingengine/FILES b/searchlib/src/tests/groupingengine/FILES
new file mode 100644
index 00000000000..af7f7e71257
--- /dev/null
+++ b/searchlib/src/tests/groupingengine/FILES
@@ -0,0 +1,4 @@
+grouping.cpp
+lhs.out
+rhs.out
+diff.txt
diff --git a/searchlib/src/tests/groupingengine/groupingengine_benchmark.cpp b/searchlib/src/tests/groupingengine/groupingengine_benchmark.cpp
new file mode 100644
index 00000000000..b7136741a4c
--- /dev/null
+++ b/searchlib/src/tests/groupingengine/groupingengine_benchmark.cpp
@@ -0,0 +1,292 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/aggregation/perdocexpression.h>
+#include <vespa/searchlib/aggregation/aggregation.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/searchlib/attribute/attributemanager.h>
+#include <vespa/searchlib/aggregation/hitsaggregationresult.h>
+#include <vespa/searchlib/aggregation/fs4hit.h>
+#include <vespa/searchlib/expression/fixedwidthbucketfunctionnode.h>
+#include <vespa/searchlib/grouping/groupingengine.h>
+#include <algorithm>
+#include <vespa/vespalib/objects/objectpredicate.h>
+#include <vespa/vespalib/objects/objectoperation.h>
+#include <vespa/vespalib/util/rusage.h>
+LOG_SETUP("grouping_benchmark");
+
+using namespace vespalib;
+using namespace search;
+using namespace search::attribute;
+using namespace search::expression;
+using namespace search::aggregation;
+using namespace search::grouping;
+
+//-----------------------------------------------------------------------------
+
+template<typename A, typename T>
+class AttrBuilder
+{
+private:
+ A *_attr;
+ AttributeVector::SP _attrSP;
+
+public:
+ AttrBuilder(const AttrBuilder &rhs)
+ : _attr(new A(rhs._attr->getName())),
+ _attrSP(_attr)
+ {
+ uint32_t numDocs = rhs._attr->getNumDocs();
+ for (uint32_t docid = 0; docid < numDocs; ++docid) {
+ T val;
+ uint32_t res = rhs._attr->get(docid, &val, 1);
+ LOG_ASSERT(res == 1);
+ add(val);
+ }
+ }
+ AttrBuilder(const std::string &name)
+ : _attr(new A(name)),
+ _attrSP(_attr)
+ {
+ }
+ AttrBuilder& operator=(const AttrBuilder &rhs) {
+ AttrBuilder tmp(rhs);
+ std::swap(_attr, tmp._attr);
+ _attrSP.swap(tmp._attrSP);
+ return *this;
+ }
+ AttrBuilder &add(T value) {
+ DocId ignore;
+ _attr->addDoc(ignore);
+ _attr->add(value);
+ return *this;
+ }
+ AttributeVector::SP sp() const {
+ return _attrSP;
+ }
+};
+
+typedef AttrBuilder<SingleIntegerExtAttribute, int64_t> IntAttrBuilder;
+typedef AttrBuilder<SingleFloatExtAttribute, double> FloatAttrBuilder;
+typedef AttrBuilder<SingleStringExtAttribute, const char *> StringAttrBuilder;
+
+//-----------------------------------------------------------------------------
+
+class ResultBuilder
+{
+private:
+ std::vector<RankedHit> _hits;
+
+public:
+ ResultBuilder() : _hits() {}
+ ResultBuilder &add(unsigned int docid, HitRank rank = 0) {
+ RankedHit hit;
+ hit._docId = docid;
+ hit._rankValue = rank;
+ _hits.push_back(hit);
+ for (uint32_t pos = (_hits.size() - 1);
+ pos > 0 && (_hits[pos]._rankValue > _hits[pos - 1]._rankValue);
+ --pos)
+ {
+ std::swap(_hits[pos], _hits[pos - 1]);
+ }
+ return *this;
+ }
+ const RankedHit *hits() const {
+ return &_hits[0];
+ }
+ uint32_t size() const {
+ return _hits.size();
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class AggregationContext
+{
+private:
+ AttributeManager _attrMan;
+ ResultBuilder _result;
+ IAttributeContext::UP _attrCtx;
+
+ AggregationContext(const AggregationContext &);
+ AggregationContext &operator=(const AggregationContext &);
+
+public:
+ AggregationContext() : _attrMan(), _result(), _attrCtx(_attrMan.createContext()) {}
+ ResultBuilder &result() { return _result; }
+ void add(AttributeVector::SP attr) {
+ _attrMan.add(attr);
+ }
+ void setup(Grouping &g) {
+ g.configureStaticStuff(ConfigureStaticParams(_attrCtx.get(), 0));
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class Test : public TestApp
+{
+public:
+private:
+ bool testAggregation(AggregationContext &ctx, const Grouping &request, bool useEngine);
+ void benchmarkIntegerSum(bool useEngine, size_t numDocs, size_t numQueries, int64_t maxGroups);
+ void benchmarkIntegerCount(bool useEngine, size_t numDocs, size_t numQueries, int64_t maxGroups);
+ class CheckAttributeReferences : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+ {
+ public:
+ CheckAttributeReferences() : _numrefs(0) { }
+ int _numrefs;
+ private:
+ virtual void execute(vespalib::Identifiable &obj) {
+ if (static_cast<AttributeNode &>(obj).getAttribute() != NULL) {
+ _numrefs++;
+ }
+ }
+ virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(AttributeNode::classId); }
+ };
+ int Main();
+};
+
+//-----------------------------------------------------------------------------
+
+/**
+ * Run the given grouping request and verify that the resulting group
+ * tree matches the expected value.
+ **/
+bool
+Test::testAggregation(AggregationContext &ctx, const Grouping &request, bool useEngine)
+{
+ Grouping tmp = request; // create local copy
+ ctx.setup(tmp);
+ if (useEngine) {
+ GroupingEngine engine(tmp);
+ engine.aggregate(ctx.result().hits(), ctx.result().size());
+ Group::UP result = engine.createResult();
+ } else {
+ tmp.aggregate(ctx.result().hits(), ctx.result().size());
+ }
+ tmp.cleanupAttributeReferences();
+ CheckAttributeReferences attrCheck;
+ tmp.select(attrCheck, attrCheck);
+ EXPECT_EQUAL(attrCheck._numrefs, 0);
+ return true;
+}
+
+void
+Test::benchmarkIntegerSum(bool useEngine, size_t numDocs, size_t numQueries, int64_t maxGroups)
+{
+ IntAttrBuilder attrB("attr0");
+ for (size_t i=0; i < numDocs; i++) {
+ attrB.add(i);
+ }
+ AggregationContext ctx;
+ for(size_t i(0); i < numDocs; i++) {
+ ctx.result().add(i, numDocs-i);
+ }
+ ctx.add(attrB.sp());
+ GroupingLevel level;
+ level.setExpression(AttributeNode("attr0")).setMaxGroups(maxGroups);
+ level.addResult(SumAggregationResult().setExpression(AttributeNode("attr0")));
+ if (maxGroups >= 0) {
+ level.addOrderBy(AggregationRefNode(0), false);
+ }
+ Grouping baseRequest = Grouping()
+ .setFirstLevel(0)
+ .setLastLevel(1)
+ .setRoot(Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr0"))))
+ .addLevel(level);
+
+ for (size_t i(0); i < numQueries; i++) {
+ testAggregation(ctx, baseRequest, useEngine);
+ }
+}
+
+void
+Test::benchmarkIntegerCount(bool useEngine, size_t numDocs, size_t numQueries, int64_t maxGroups)
+{
+ IntAttrBuilder attrB("attr0");
+ for (size_t i=0; i < numDocs; i++) {
+ attrB.add(i);
+ }
+ AggregationContext ctx;
+ for(size_t i(0); i < numDocs; i++) {
+ ctx.result().add(i);
+ }
+ ctx.add(attrB.sp());
+ GroupingLevel level;
+ level.setExpression(AttributeNode("attr0")).setMaxGroups(maxGroups);
+ level.addResult(CountAggregationResult().setExpression(AttributeNode("attr0")));
+ if (maxGroups >= 0) {
+ level.addOrderBy(AggregationRefNode(0), false);
+ }
+ Grouping baseRequest = Grouping()
+ .setFirstLevel(0)
+ .setLastLevel(1)
+ .setRoot(Group()
+ .addResult(CountAggregationResult()
+ .setExpression(AttributeNode("attr0"))))
+ .addLevel(level);
+
+ for (size_t i(0); i < numQueries; i++) {
+ testAggregation(ctx, baseRequest, useEngine);
+ }
+}
+
+int
+Test::Main()
+{
+ size_t numDocs = 1000000;
+ size_t numQueries = 1000;
+ int64_t maxGroups = -1;
+ bool useEngine = true;
+ vespalib::string idType = "int";
+ vespalib::string aggrType = "sum";
+ if (_argc > 1) {
+ useEngine = (strcmp(_argv[1], "tree") != 0);
+ }
+ if (_argc > 2) {
+ idType = _argv[2];
+ }
+ if (_argc > 3) {
+ aggrType = _argv[3];
+ }
+ if (_argc > 4) {
+ numDocs = strtol(_argv[4], NULL, 0);
+ }
+ if (_argc > 5) {
+ numQueries = strtol(_argv[5], NULL, 0);
+ }
+ if (_argc > 6) {
+ maxGroups = strtol(_argv[6], NULL, 0);
+ }
+ TEST_INIT("grouping_benchmark");
+ LOG(info, "sizeof(Group) = %ld", sizeof(Group));
+ LOG(info, "sizeof(ResultNode::CP) = %ld", sizeof(ResultNode::CP));
+ LOG(info, "sizeof(RawRank) = %ld", sizeof(RawRank));
+ LOG(info, "sizeof(SumAggregationResult) = %ld", sizeof(SumAggregationResult));
+ LOG(info, "sizeof(CountAggregationResult) = %ld", sizeof(CountAggregationResult));
+ LOG(info, "sizeof(Int64ResultNode) = %ld", sizeof(Int64ResultNode));
+
+ LOG(info, "sizeof(Group::ExpressionVector) = %ld", sizeof(Group::ExpressionVector));
+ fastos::TimeStamp start(fastos::ClockSystem::now());
+ if (idType == "int") {
+ if (aggrType == "sum") {
+ benchmarkIntegerSum(useEngine, numDocs, numQueries, maxGroups);
+ } else if (aggrType == "count") {
+ benchmarkIntegerCount(useEngine, numDocs, numQueries, maxGroups);
+ } else {
+ ASSERT_TRUE(false);
+ }
+ } else {
+ ASSERT_TRUE(false);
+ }
+ LOG(info, "rusage = {\n%s\n}", vespalib::RUsage::createSelf(start).toString().c_str());
+ ASSERT_EQUAL(0, kill(0, SIGPROF));
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/groupingengine/groupingengine_test.cpp b/searchlib/src/tests/groupingengine/groupingengine_test.cpp
new file mode 100644
index 00000000000..ab371cc3dcc
--- /dev/null
+++ b/searchlib/src/tests/groupingengine/groupingengine_test.cpp
@@ -0,0 +1,1985 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("groupingengine_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/aggregation/perdocexpression.h>
+#include <vespa/searchlib/aggregation/aggregation.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/searchlib/attribute/attributemanager.h>
+#include <vespa/searchlib/aggregation/hitsaggregationresult.h>
+#include <vespa/searchlib/aggregation/fs4hit.h>
+#include <vespa/searchlib/aggregation/predicates.h>
+#include <vespa/searchlib/expression/fixedwidthbucketfunctionnode.h>
+#include <vespa/searchlib/grouping/groupingengine.h>
+#include <algorithm>
+
+using namespace vespalib;
+using namespace search;
+using namespace search::attribute;
+using namespace search::expression;
+using namespace search::aggregation;
+using namespace search::grouping;
+
+//-----------------------------------------------------------------------------
+
+template<typename A, typename T>
+class AttrBuilder
+{
+private:
+ A *_attr;
+ AttributeVector::SP _attrSP;
+
+public:
+ AttrBuilder(const AttrBuilder &rhs)
+ : _attr(new A(rhs._attr->getName())),
+ _attrSP(_attr)
+ {
+ uint32_t numDocs = rhs._attr->getNumDocs();
+ for (uint32_t docid = 0; docid < numDocs; ++docid) {
+ T val;
+ uint32_t res = rhs._attr->get(docid, &val, 1);
+ LOG_ASSERT(res == 1);
+ add(val);
+ }
+ }
+ AttrBuilder(const std::string &name)
+ : _attr(new A(name)),
+ _attrSP(_attr)
+ {
+ }
+ AttrBuilder& operator=(const AttrBuilder &rhs) {
+ AttrBuilder tmp(rhs);
+ std::swap(_attr, tmp._attr);
+ _attrSP.swap(tmp._attrSP);
+ return *this;
+ }
+ AttrBuilder &add(T value) {
+ DocId ignore;
+ _attr->addDoc(ignore);
+ _attr->add(value);
+ return *this;
+ }
+ AttributeVector::SP sp() const {
+ return _attrSP;
+ }
+};
+
+typedef AttrBuilder<SingleIntegerExtAttribute, int64_t> IntAttrBuilder;
+typedef AttrBuilder<SingleFloatExtAttribute, double> FloatAttrBuilder;
+typedef AttrBuilder<SingleStringExtAttribute, const char *> StringAttrBuilder;
+
+//-----------------------------------------------------------------------------
+
+class ResultBuilder
+{
+private:
+ std::vector<RankedHit> _hits;
+
+public:
+ ResultBuilder() : _hits() {}
+ ResultBuilder &add(unsigned int docid, HitRank rank = 0) {
+ RankedHit hit;
+ hit._docId = docid;
+ hit._rankValue = rank;
+ _hits.push_back(hit);
+ for (uint32_t pos = (_hits.size() - 1);
+ pos > 0 && (_hits[pos]._rankValue > _hits[pos - 1]._rankValue);
+ --pos)
+ {
+ std::swap(_hits[pos], _hits[pos - 1]);
+ }
+ return *this;
+ }
+ const RankedHit *hits() const {
+ return &_hits[0];
+ }
+ uint32_t size() const {
+ return _hits.size();
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class AggregationContext
+{
+private:
+ AttributeManager _attrMan;
+ ResultBuilder _result;
+ IAttributeContext::UP _attrCtx;
+
+ AggregationContext(const AggregationContext &);
+ AggregationContext &operator=(const AggregationContext &);
+
+public:
+ AggregationContext() : _attrMan(), _result(), _attrCtx(_attrMan.createContext()) {}
+ ResultBuilder &result() { return _result; }
+ void add(AttributeVector::SP attr) {
+ _attrMan.add(attr);
+ }
+ void setup(Grouping &g) {
+ g.configureStaticStuff(ConfigureStaticParams(_attrCtx.get(), 0));
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class Test : public TestApp
+{
+public:
+ bool testAggregation(AggregationContext &ctx,
+ const Grouping &request,
+ const Group &expect);
+ bool testMerge(const Grouping &a, const Grouping &b,
+ const Group &expect);
+ bool testMerge(const Grouping &a, const Grouping &b, const Grouping &c,
+ const Group &expect);
+ bool testPrune(const Grouping &a, const Grouping &b,
+ const Group &expect);
+ bool testPartialMerge(const Grouping &a, const Grouping &b,
+ const Group &expect);
+ void testAggregationSimple();
+ void testAggregationLevels();
+ void testAggregationMaxGroups();
+ void testAggregationGroupOrder();
+ void testAggregationGroupRank();
+ void testAggregationGroupCapping();
+ void testMergeSimpleSum();
+ void testMergeLevels();
+ void testMergeGroups();
+ void testMergeTrees();
+ void testPruneSimple();
+ void testPruneComplex();
+ void testPartialMerging();
+ void testCount();
+ void testTopN();
+ void testFS4HitCollection();
+ bool checkBucket(const NumericResultNode &width, const NumericResultNode &value, const BucketResultNode &bucket);
+ bool checkHits(const Grouping &g, uint32_t first, uint32_t last, uint32_t cnt);
+ void testFixedWidthBuckets();
+ void testThatNanIsConverted();
+ void testNanSorting();
+ void testGroupingEngineFromRequest();
+ int Main();
+private:
+ bool verifyEqual(const Group & a, const Group & b);
+ void testAggregationSimpleSum(AggregationContext & ctx, const AggregationResult & aggr, const ResultNode & ir, const ResultNode & fr, const ResultNode & sr);
+ class CheckAttributeReferences : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+ {
+ public:
+ CheckAttributeReferences() : _numrefs(0) { }
+ int _numrefs;
+ private:
+ virtual void execute(vespalib::Identifiable &obj) {
+ if (static_cast<AttributeNode &>(obj).getAttribute() != NULL) {
+ _numrefs++;
+ }
+ }
+ virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(AttributeNode::classId); }
+ };
+};
+
+//-----------------------------------------------------------------------------
+
+/**
+ * Run the given grouping request and verify that the resulting group
+ * tree matches the expected value.
+ **/
+bool
+Test::testAggregation(AggregationContext &ctx,
+ const Grouping &request,
+ const Group &expect)
+{
+ Grouping tmp = request; // create local copy
+ ctx.setup(tmp);
+ GroupingEngine engine(tmp);
+ verifyEqual(*engine.createResult(), tmp.getRoot());
+ engine.aggregate(ctx.result().hits(), ctx.result().size());
+ tmp.cleanupAttributeReferences();
+ CheckAttributeReferences attrCheck;
+ tmp.select(attrCheck, attrCheck);
+ Group::UP result = engine.createResult();
+ EXPECT_EQUAL(attrCheck._numrefs, 0);
+ return verifyEqual(*result, expect);
+}
+
+bool Test::verifyEqual(const Group & a, const Group & b)
+{
+ bool ok = EXPECT_EQUAL(a.asString(), b.asString());
+ if (!ok) {
+ std::cerr << a.asString() << std::endl << b.asString() << std::endl;
+ }
+ return ok;
+}
+
+/**
+ * Merge the given grouping requests and verify that the resulting
+ * group tree matches the expected value.
+ **/
+bool
+Test::testMerge(const Grouping &a, const Grouping &b,
+ const Group &expect)
+{
+ Grouping tmp = a; // create local copy
+ Grouping tmpB = b;
+#if 0
+ tmp.merge(tmpB);
+ tmp.postMerge();
+ tmp.sortById();
+ return EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString());
+#else
+ GroupingEngine eA(tmp);
+ GroupingEngine eB(tmpB);
+ verifyEqual(*eA.createResult(), a.getRoot());
+ verifyEqual(*eB.createResult(), b.getRoot());
+ eA.merge(eB);
+ return verifyEqual(*eA.createResult(), expect);
+#endif
+}
+
+/**
+ * Prune the given grouping request and verify that the resulting
+ * group tree matches the expected value.
+ **/
+bool
+Test::testPrune(const Grouping &a, const Grouping &b,
+ const Group &expect)
+{
+ Grouping tmp = a; // create local copy
+ tmp.prune(b);
+ bool ok = EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString());
+ if (!ok) {
+ std::cerr << tmp.getRoot().asString() << std::endl << expect.asString() << std::endl;
+ }
+ return ok;
+}
+
+/**
+ * Merge a given grouping request to get a partial request back. Verify that the
+ * partial request is correct.
+ **/
+bool
+Test::testPartialMerge(const Grouping &a, const Grouping &b,
+ const Group &expect)
+{
+ Grouping tmp = a; // create local copy
+ tmp.mergePartial(b);
+ bool ok = EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString());
+ if (!ok) {
+ std::cerr << tmp.getRoot().asString() << std::endl << expect.asString() << std::endl;
+ }
+ return ok;
+}
+
+/**
+ * Merge the given grouping requests and verify that the resulting
+ * group tree matches the expected value.
+ **/
+bool
+Test::testMerge(const Grouping &a, const Grouping &b, const Grouping &c,
+ const Group &expect)
+{
+ Grouping tmp = a; // create local copy
+ Grouping tmpB = b; // create local copy
+ Grouping tmpC = c; // create local copy
+ tmp.merge(tmpB);
+ tmp.merge(tmpC);
+ tmp.postMerge();
+ tmp.sortById();
+ return EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString());
+}
+
+//-----------------------------------------------------------------------------
+
+/**
+ * Test collecting the sum of the values from a single attribute
+ * vector directly into the root node. Consider this a smoke test.
+ **/
+void
+Test::testAggregationSimple()
+{
+ AggregationContext ctx;
+ ctx.result().add(0).add(1).add(2);
+ ctx.add(IntAttrBuilder("int").add(3).add(7).add(15).sp());
+ ctx.add(FloatAttrBuilder("float").add(3).add(7).add(15).sp());
+ ctx.add(StringAttrBuilder("string").add("3").add("7").add("15").sp());
+
+ char strsum[3] = {-101, '5', 0};
+ testAggregationSimpleSum(ctx, SumAggregationResult(), Int64ResultNode(25), FloatResultNode(25), StringResultNode(strsum));
+ testAggregationSimpleSum(ctx, MinAggregationResult(), Int64ResultNode(3), FloatResultNode(3), StringResultNode("15"));
+ testAggregationSimpleSum(ctx, MaxAggregationResult(), Int64ResultNode(15), FloatResultNode(15), StringResultNode("7"));
+}
+
+void Test::testAggregationSimpleSum(AggregationContext & ctx, const AggregationResult & aggr, const ResultNode & ir, const ResultNode & fr, const ResultNode & sr)
+{
+ ExpressionNode::CP clone(aggr);
+ Grouping request = Grouping()
+ .setRoot(Group()
+ .setId(NullResultNode())
+ .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("int")))
+ .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("float")))
+ .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("string")))
+ );
+
+ Group expect = Group()
+ .setId(NullResultNode())
+ .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("int")).setResult(ir))
+ .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("float")).setResult(fr))
+ .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("string")).setResult(sr));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+}
+
+/**
+ * Verify that the backend aggregation will classify and collect on
+ * the appropriate levels, as indicated by the firstLevel and
+ * lastLevel parameters.
+ **/
+void
+Test::testAggregationLevels()
+{
+ AggregationContext ctx;
+ ctx.add(IntAttrBuilder("attr0").add(10).add(10).sp());
+ ctx.add(IntAttrBuilder("attr1").add(11).add(11).sp());
+ ctx.add(IntAttrBuilder("attr2").add(12).add(12).sp());
+ ctx.add(IntAttrBuilder("attr3").add(13).add(13).sp());
+ ctx.result().add(0).add(1);
+
+ Grouping baseRequest = Grouping()
+ .setRoot(Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr0"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr1"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr2"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr2"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr3"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr3"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr1"))));
+
+ Group notDone = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr0")));
+// Hmm, do not need to prepare more than the levels needed. .setResult(Int64ResultNode(0)));
+
+ Group done0 = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr0"))
+ .setResult(Int64ResultNode(20)))
+ .addChild(Group()
+ .setId(Int64ResultNode(11))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr2"))
+ .setResult(Int64ResultNode(0))));
+
+ Group done1 = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr0"))
+ .setResult(Int64ResultNode(20)))
+ .addChild(Group()
+ .setId(Int64ResultNode(11))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr2"))
+ .setResult(Int64ResultNode(24)))
+ .addChild(Group()
+ .setId(Int64ResultNode(12))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr3"))
+ .setResult(Int64ResultNode(0)))));
+
+ Group done2 = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr0"))
+ .setResult(Int64ResultNode(20)))
+ .addChild(Group()
+ .setId(Int64ResultNode(11))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr2"))
+ .setResult(Int64ResultNode(24)))
+ .addChild(Group()
+ .setId(Int64ResultNode(12))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr3"))
+ .setResult(Int64ResultNode(26)))
+ .addChild(Group()
+ .setId(Int64ResultNode(13))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr1"))
+ .setResult(Int64ResultNode(0))))));
+
+ Group done3 = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr0"))
+ .setResult(Int64ResultNode(20)))
+ .addChild(Group()
+ .setId(Int64ResultNode(11))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr2"))
+ .setResult(Int64ResultNode(24)))
+ .addChild(Group()
+ .setId(Int64ResultNode(12))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr3"))
+ .setResult(Int64ResultNode(26)))
+ .addChild(Group()
+ .setId(Int64ResultNode(13))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr1"))
+ .setResult(Int64ResultNode(22))))));
+
+ { // level 0 only
+ Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(0);
+ EXPECT_TRUE(testAggregation(ctx, request, done0));
+ }
+ { // level 0 and 1
+ Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(1);
+ EXPECT_TRUE(testAggregation(ctx, request, done1));
+ }
+ { // level 0,1 and 2
+ Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(2);
+ EXPECT_TRUE(testAggregation(ctx, request, done2));
+ }
+ { // level 0,1,2 and 3
+ Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(3);
+ EXPECT_TRUE(testAggregation(ctx, request, done3));
+ }
+ { // level 1 with level 0 as input
+ Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1).setRoot(done0);
+ EXPECT_TRUE(testAggregation(ctx, request, done1));
+ }
+ { // level 2 with level 0 and 1 as input
+ Grouping request = baseRequest.unchain().setFirstLevel(2).setLastLevel(2).setRoot(done1);
+ EXPECT_TRUE(testAggregation(ctx, request, done2));
+ }
+ { // level 3 with level 0,1 and 2 as input
+ Grouping request = baseRequest.unchain().setFirstLevel(3).setLastLevel(3).setRoot(done2);
+ EXPECT_TRUE(testAggregation(ctx, request, done3));
+ }
+ { // level 2 and 3 with level 0 and 1 as input
+ Grouping request = baseRequest.unchain().setFirstLevel(2).setLastLevel(3).setRoot(done1);
+ EXPECT_TRUE(testAggregation(ctx, request, done3));
+ }
+#if 0
+ { // level 1 without level 0 as input
+ Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1);
+ EXPECT_TRUE(testAggregation(ctx, request, notDone));
+ }
+#else
+ //#warning "Test has been temporarily disabled"
+#endif
+}
+
+/**
+ * Verify that the aggregation step does not create more groups than
+ * indicated by the maxgroups parameter.
+ **/
+void
+Test::testAggregationMaxGroups()
+{
+ AggregationContext ctx;
+ ctx.add(IntAttrBuilder("attr").add(5).add(10).add(15).sp());
+ ctx.result().add(0).add(1).add(2);
+
+ Grouping baseRequest = Grouping()
+ .setRoot(Group().setId(NullResultNode()))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr")));
+
+ Group empty = Group().setId(NullResultNode());
+ Group grp1 = empty.unchain().addChild(Group().setId(Int64ResultNode(5)));
+ Group grp2 = grp1.unchain().addChild(Group().setId(Int64ResultNode(10)));
+ Group grp3 = grp2.unchain().addChild(Group().setId(Int64ResultNode(15)));
+
+ { // max 0 groups
+ Grouping request = baseRequest;
+ request.levels()[0].setMaxGroups(0);
+ EXPECT_TRUE(testAggregation(ctx, request, empty));
+ }
+ { // max 1 groups
+ Grouping request = baseRequest;
+ request.levels()[0].setMaxGroups(1);
+ EXPECT_TRUE(testAggregation(ctx, request, grp1));
+ }
+ { // max 2 groups
+ Grouping request = baseRequest;
+ request.levels()[0].setMaxGroups(2);
+ EXPECT_TRUE(testAggregation(ctx, request, grp2));
+ }
+ { // max 3 groups
+ Grouping request = baseRequest;
+ request.levels()[0].setMaxGroups(3);
+ EXPECT_TRUE(testAggregation(ctx, request, grp3));
+ }
+ { // max 4 groups
+ Grouping request = baseRequest;
+ request.levels()[0].setMaxGroups(4);
+ EXPECT_TRUE(testAggregation(ctx, request, grp3));
+ }
+ { // max -1 groups
+ Grouping request = baseRequest;
+ request.levels()[0].setMaxGroups(-1);
+ EXPECT_TRUE(testAggregation(ctx, request, grp3));
+ }
+}
+
+/**
+ * Verify that groups are sorted by group id
+ **/
+void
+Test::testAggregationGroupOrder()
+{
+ AggregationContext ctx;
+ ctx.add(IntAttrBuilder("attr").add(10).add(25).add(35).add(5).add(20).add(15).add(30).sp());
+ ctx.result().add(0).add(1).add(2).add(3).add(4).add(5).add(6);
+
+ Grouping request = Grouping()
+ .setRoot(Group().setId(NullResultNode()))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr")));
+
+ Group expect = Group()
+ .setId(NullResultNode())
+ .addChild(Group().setId(Int64ResultNode(5)))
+ .addChild(Group().setId(Int64ResultNode(10)))
+ .addChild(Group().setId(Int64ResultNode(15)))
+ .addChild(Group().setId(Int64ResultNode(20)))
+ .addChild(Group().setId(Int64ResultNode(25)))
+ .addChild(Group().setId(Int64ResultNode(30)))
+ .addChild(Group().setId(Int64ResultNode(35)));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+}
+
+/**
+ * Verify that groups are tagged with the appropriate rank value.
+ **/
+void
+Test::testAggregationGroupRank()
+{
+ AggregationContext ctx;
+ ctx.add(IntAttrBuilder("attr")
+ .add(1).add(1).add(1)
+ .add(2).add(2).add(2)
+ .add(3).add(3).add(3).sp());
+ ctx.result()
+ .add(0, 5).add(1, 10).add(2, 15)
+ .add(3, 10).add(4, 15).add(5, 5)
+ .add(6, 15).add(7, 5).add(8, 10);
+
+ Grouping request = Grouping()
+ .setRoot(Group().setId(NullResultNode()))
+ .addLevel(GroupingLevel().setExpression(AttributeNode("attr")));
+
+ Group expect = Group()
+ .setId(NullResultNode())
+ .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(15)))
+ .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(15)))
+ .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(15)));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+}
+
+void
+Test::testAggregationGroupCapping()
+{
+ AggregationContext ctx;
+ ctx.add(IntAttrBuilder("attr")
+ .add(1).add(2).add(3)
+ .add(4).add(5).add(6)
+ .add(7).add(8).add(9).sp());
+ ctx.result()
+ .add(0, 1).add(1, 2).add(2, 3)
+ .add(3, 4).add(4, 5).add(5, 6)
+ .add(6, 7).add(7, 8).add(8, 9);
+
+ {
+ Grouping request = Grouping().setRoot(Group().setId(NullResultNode())).addLevel(
+ GroupingLevel().setExpression(AttributeNode("attr")));
+
+ Group expect = Group().setId(NullResultNode())
+ .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(1)))
+ .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(2)))
+ .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(3)))
+ .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(4)))
+ .addChild(Group().setId(Int64ResultNode(5)).setRank(RawRank(5)))
+ .addChild(Group().setId(Int64ResultNode(6)).setRank(RawRank(6)))
+ .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)))
+ .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)))
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+ }
+ {
+ Grouping request = Grouping().setRoot(Group().setId(NullResultNode())).addLevel(
+ GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")));
+
+ Group expect = Group().setId(NullResultNode())
+ .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)))
+ .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)))
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+ }
+ {
+ Grouping request = Grouping().
+ setRoot(Group().setId(NullResultNode())).
+ setFirstLevel(0).
+ setLastLevel(1).
+ addLevel(
+ GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")).
+ addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))).
+ addOrderBy(AggregationRefNode(0), false));
+
+ Group expect = Group().setId(NullResultNode())
+ .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)).addAggregationResult(SumAggregationResult(Int64ResultNode(7)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false))
+ .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)).addAggregationResult(SumAggregationResult(Int64ResultNode(8)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false))
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)).addAggregationResult(SumAggregationResult(Int64ResultNode(9)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+ }
+ {
+ Grouping request = Grouping().
+ setRoot(Group().setId(NullResultNode())).
+ setFirstLevel(0).
+ setLastLevel(1).
+ addLevel(
+ GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")).
+ addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true));
+
+ Group expect = Group().setId(NullResultNode())
+ .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(1)).addAggregationResult(SumAggregationResult(Int64ResultNode(1)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true))
+ .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(2)).addAggregationResult(SumAggregationResult(Int64ResultNode(2)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true))
+ .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(3)).addAggregationResult(SumAggregationResult(Int64ResultNode(3)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+ }
+ {
+ AddFunctionNode *add = new AddFunctionNode();
+ add->addArg(AggregationRefNode(0));
+ add->appendArg(ConstantNode(Int64ResultNode(3)));
+ ExpressionNode::CP i1(add);
+ Grouping request = Grouping().
+ setFirstLevel(0).
+ setLastLevel(1).
+ addLevel(
+ GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")).
+ addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))).
+ addOrderBy(i1, false));
+
+ Group expect = Group()
+ .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)).addAggregationResult(SumAggregationResult(Int64ResultNode(7)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(10)), false))
+ .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)).addAggregationResult(SumAggregationResult(Int64ResultNode(8)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(11)), false))
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)).addAggregationResult(SumAggregationResult(Int64ResultNode(9)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(12)), false));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+ }
+
+}
+
+//-----------------------------------------------------------------------------
+
+/**
+ * Test merging the sum of the values from a single attribute vector
+ * that was collected directly into the root node. Consider this a
+ * smoke test.
+ **/
+void
+Test::testMergeSimpleSum()
+{
+ Grouping a = Grouping()
+ .setRoot(Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("foo"))
+ .setResult(Int64ResultNode(20))));
+
+ Grouping b = Grouping()
+ .setRoot(Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("foo"))
+ .setResult(Int64ResultNode(30))));
+
+ Group expect = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("foo"))
+ .setResult(Int64ResultNode(50)));
+
+ EXPECT_TRUE(testMerge(a, b, expect));
+}
+
+/**
+ * Verify that frozen levels are not touched during merge.
+ **/
+void
+Test::testMergeLevels()
+{
+ Grouping request = Grouping()
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("c1"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("c2"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("c3"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))));
+
+ Group a = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(5)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(15)))
+ .addChild(Group()
+ .setId(Int64ResultNode(30))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(20))))));
+
+ Group b = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(5)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(15)))
+ .addChild(Group()
+ .setId(Int64ResultNode(30))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(20))))));
+
+ Group expect_all = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(20)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(30)))
+ .addChild(Group()
+ .setId(Int64ResultNode(30))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(40))))));
+
+ Group expect_0 = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(5)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(20)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(30)))
+ .addChild(Group()
+ .setId(Int64ResultNode(30))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(40))))));
+
+
+ Group expect_1 = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(5)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(30)))
+ .addChild(Group()
+ .setId(Int64ResultNode(30))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(40))))));
+
+
+ Group expect_2 = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(5)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(15)))
+ .addChild(Group()
+ .setId(Int64ResultNode(30))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(40))))));
+
+
+ Group expect_3 = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(5)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(15)))
+ .addChild(Group()
+ .setId(Int64ResultNode(30))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(20))))));
+
+ EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(a),
+ request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(b),
+ expect_all));
+ EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(1).setLastLevel(3).setRoot(a),
+ request.unchain().setFirstLevel(1).setLastLevel(3).setRoot(b),
+ expect_0));
+ EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(2).setLastLevel(5).setRoot(a),
+ request.unchain().setFirstLevel(2).setLastLevel(5).setRoot(b),
+ expect_1));
+ EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(3).setLastLevel(5).setRoot(a),
+ request.unchain().setFirstLevel(3).setLastLevel(5).setRoot(b),
+ expect_2));
+ EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(4).setLastLevel(4).setRoot(a),
+ request.unchain().setFirstLevel(4).setLastLevel(4).setRoot(b),
+ expect_3));
+}
+
+/**
+ * Verify that the number of groups for a level is pruned down to
+ * maxGroups, that the remaining groups are the highest ranked ones,
+ * and that they are sorted by group id.
+ **/
+void
+Test::testMergeGroups()
+{
+ Grouping request = Grouping()
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr")));
+
+ Group a = Group()
+ .setId(NullResultNode())
+ .addChild(Group().setId(StringResultNode("05")).setRank(RawRank(5)))
+ .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(5))) // (2)
+ .addChild(Group().setId(StringResultNode("15")).setRank(RawRank(15)))
+ .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) // 1
+ .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); // 3
+
+ Group b = Group()
+ .setId(NullResultNode())
+ .addChild(Group().setId(StringResultNode("00")).setRank(RawRank(10)))
+ .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) // 2
+ .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25))) // 4
+ .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(10))) // (1)
+ .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20))); // 5
+
+ Group expect_3 = Group()
+ .setId(NullResultNode())
+ .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50)))
+ .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100)))
+ .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30)));
+
+ Group expect_5 = Group()
+ .setId(NullResultNode())
+ .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50)))
+ .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25)))
+ .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100)))
+ .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20)))
+ .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30)));
+
+ Group expect_all = Group()
+ .setId(NullResultNode())
+ .addChild(Group().setId(StringResultNode("00")).setRank(RawRank(10)))
+ .addChild(Group().setId(StringResultNode("05")).setRank(RawRank( 5)))
+ .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50)))
+ .addChild(Group().setId(StringResultNode("15")).setRank(RawRank(15)))
+ .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25)))
+ .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100)))
+ .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20)))
+ .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30)));
+
+ request.levels()[0].setMaxGroups(3);
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_3));
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_3));
+ request.levels()[0].setMaxGroups(5);
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_5));
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_5));
+ request.levels()[0].setMaxGroups(-1);
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_all));
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_all));
+}
+
+/**
+ * Merge two relatively complex tree structures and verify that the
+ * end result is as expected.
+ **/
+void
+Test::testMergeTrees()
+{
+ Grouping request = Grouping()
+ .addLevel(GroupingLevel()
+ .setMaxGroups(3)
+ .setExpression(AttributeNode("c1"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))))
+ .addLevel(GroupingLevel()
+ .setMaxGroups(2)
+ .setExpression(AttributeNode("c2"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))))
+ .addLevel(GroupingLevel()
+ .setMaxGroups(1)
+ .setExpression(AttributeNode("c3"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))));
+
+ Group a = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .setRank(RawRank(5)) // merged with 200 rank node
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .setRank(RawRank(500))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .setRank(RawRank(200))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ )
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .setRank(RawRank(100))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ // dummy child would be picked up here
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(200))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(300))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ )
+ .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(300))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .setRank(RawRank(100))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ )
+ );
+
+ Group b = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .setRank(RawRank(200))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .setRank(RawRank(400))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .setRank(RawRank(100))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ )
+ .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .setRank(RawRank(100))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ // dummy child would be picket up here
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(200))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(5)) // merged with 300 rank node
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .setRank(RawRank(5)) // merged with 100 rank node
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .setRank(RawRank(500))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ .addChild(Group().setId(Int64ResultNode(24)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(25))
+ .setRank(RawRank(300))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group().setId(Int64ResultNode(24)).setRank(RawRank(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(25))
+ .setRank(RawRank(400))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ );
+
+ Group expect = Group()
+ .setId(NullResultNode())
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .setRank(RawRank(200))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .setRank(RawRank(500))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .setRank(RawRank(200))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .setRank(RawRank(400))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .setRank(RawRank(100))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .setRank(RawRank(100))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(200))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(300))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .setRank(RawRank(300))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .setRank(RawRank(100))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(20))
+ .setRank(RawRank(500))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(25))
+ .setRank(RawRank(300))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group()
+ .setId(Int64ResultNode(25))
+ .setRank(RawRank(400))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ );
+
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect));
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect));
+}
+
+void
+Test::testPruneComplex()
+{
+ { // First level
+ Group baseTree = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar000")))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002"))))
+ .addChild(Group().setId(StringResultNode("bar01"))))
+ .addChild(Group().setId(StringResultNode("baz0"))
+ .addChild(Group().setId(StringResultNode("baz00"))
+ .addChild(Group().setId(StringResultNode("baz000")))
+ .addChild(Group().setId(StringResultNode("baz001")))))
+ .addChild(Group().setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo00")))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+
+ Group prune = Group()
+ .addChild(Group().setId(StringResultNode("bar0")))
+ .addChild(Group().setId(StringResultNode("foo0")));
+
+ Group expect = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar000")))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002"))))
+ .addChild(Group().setId(StringResultNode("bar01"))))
+ .addChild(Group().setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo00")))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+ Grouping request = Grouping().setFirstLevel(1).setLastLevel(1);
+ Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3);
+ EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect));
+ }
+ { // Second level
+ Group baseTree = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar000")))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002"))))
+ .addChild(Group().setId(StringResultNode("bar01"))))
+ .addChild(Group().setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo00")))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+
+ Group prune = Group()
+ .addChild(Group()
+ .setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))))
+ .addChild(Group()
+ .setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+
+ Group expect = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar000")))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002")))))
+ .addChild(Group().setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+
+ Grouping request = Grouping().setFirstLevel(2).setLastLevel(2);
+ Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3);
+ EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect));
+ }
+ { // Third level
+ Group baseTree = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar000")))
+
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002")))))
+ .addChild(Group().setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+ Group prune = Group()
+ .addChild(Group()
+ .setId(StringResultNode("bar0"))
+ .addChild(Group()
+ .setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002")))));
+
+ Group expect = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002")))));
+ Grouping request = Grouping().setFirstLevel(3).setLastLevel(3);
+ Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3);
+ EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect));
+ }
+ { // Try pruning a grouping we don't have
+ Group baseTree = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar000")))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002"))))
+ .addChild(Group().setId(StringResultNode("bar01"))))
+ .addChild(Group().setId(StringResultNode("baz0"))
+ .addChild(Group().setId(StringResultNode("baz00"))
+ .addChild(Group().setId(StringResultNode("baz000")))
+ .addChild(Group().setId(StringResultNode("baz001")))))
+ .addChild(Group().setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo00")))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+
+ Group prune = Group()
+ .addChild(Group().setId(StringResultNode("bar0")))
+ .addChild(Group().setId(StringResultNode("boz0")))
+ .addChild(Group().setId(StringResultNode("foo0")))
+ .addChild(Group().setId(StringResultNode("goo0")));
+
+ Group expect = Group()
+ .addChild(Group().setId(StringResultNode("bar0"))
+ .addChild(Group().setId(StringResultNode("bar00"))
+ .addChild(Group().setId(StringResultNode("bar000")))
+ .addChild(Group().setId(StringResultNode("bar001")))
+ .addChild(Group().setId(StringResultNode("bar002"))))
+ .addChild(Group().setId(StringResultNode("bar01"))))
+ .addChild(Group().setId(StringResultNode("foo0"))
+ .addChild(Group().setId(StringResultNode("foo00")))
+ .addChild(Group().setId(StringResultNode("foo01"))));
+ Grouping request = Grouping().setFirstLevel(1).setLastLevel(1);
+ Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3);
+ EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect));
+ }
+}
+
+/**
+ * Test partial merge of a grouping tree, where all levels up to "lastLevel" is
+ * merged. The last level should not contain any children groups, and only empty
+ * results.
+ **/
+void
+Test::testPartialMerging()
+{
+ Grouping baseRequest = Grouping()
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("c1"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("c2"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("c3"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))));
+
+ // Cached result
+ Group cached = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(110)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(13))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group()
+ .setId(Int64ResultNode(14))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group()
+ .setId(Int64ResultNode(22))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s3"))
+ .setResult(Int64ResultNode(100)))
+ )
+ )
+ );
+
+
+ { // Merge lastlevel 0
+ Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(0);
+ Group incoming = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(0)));
+
+ Group expected = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(110)))
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ );
+ EXPECT_TRUE(testPartialMerge(request.unchain().setRoot(incoming), request.unchain().setLastLevel(3).setRoot(cached), expected));
+ }
+ {
+ // Merge existing tree. Assume we got modified data down again.
+ Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1);
+ Group incoming = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(3))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(7))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0))))
+ .addChild(Group()
+ .setId(Int64ResultNode(33))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ );
+ Group expected = Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s0"))
+ .setResult(Int64ResultNode(200)))
+ .addChild(Group()
+ .setId(Int64ResultNode(3))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(5))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(10)))
+ .addChild(Group()
+ .setId(Int64ResultNode(13))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(0)))
+ )
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(7))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(10))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(100)))
+ .addChild(Group()
+ .setId(Int64ResultNode(15))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s2"))
+ .setResult(Int64ResultNode(0)))
+ )
+ )
+ .addChild(Group()
+ .setId(Int64ResultNode(33))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("s1"))
+ .setResult(Int64ResultNode(0)))
+ );
+ EXPECT_TRUE(testPartialMerge(request.unchain().setRoot(incoming), request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(cached), expected));
+ }
+}
+
+/**
+ * Test that pruning a simple grouping tree works.
+ **/
+void
+Test::testPruneSimple()
+{
+ {
+ Grouping request = Grouping()
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr")))
+ .setFirstLevel(1)
+ .setLastLevel(1);
+
+ Group a = Group()
+ .addChild(Group().setId(StringResultNode("foo")))
+ .addChild(Group().setId(StringResultNode("bar")))
+ .addChild(Group().setId(StringResultNode("baz")));
+
+ Group b = Group()
+ .addChild(Group().setId(StringResultNode("foo")));
+
+ Group expect = Group()
+ .addChild(Group().setId(StringResultNode("foo")));
+
+ EXPECT_TRUE(testPrune(request.unchain().setFirstLevel(0).setRoot(a), request.unchain().setRoot(b), expect));
+ }
+}
+
+/**
+ * Test that simple counting works as long as we use an expression
+ * that we init, calculate and ignore.
+ **/
+void
+Test::testTopN()
+{
+ AggregationContext ctx;
+ ctx.result().add(0).add(1).add(2);
+ ctx.add(IntAttrBuilder("foo").add(3).add(7).add(15).sp());
+
+ Grouping request = Grouping()
+ .setRoot(Group().setId(NullResultNode())
+ .addResult(CountAggregationResult()
+ .setExpression(ConstantNode(Int64ResultNode(0)))
+ )
+ );
+ {
+ Group expect = Group().setId(NullResultNode())
+ .addResult(CountAggregationResult().setCount(3)
+ .setExpression(ConstantNode(Int64ResultNode(0)))
+ );
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+ }
+ {
+ Group expect = Group().setId(NullResultNode())
+ .addResult(CountAggregationResult().setCount(1)
+ .setExpression(ConstantNode(Int64ResultNode(0)))
+ );
+
+ EXPECT_TRUE(testAggregation(ctx, request.setTopN(1), expect));
+ }
+ {
+ Grouping request2 = Grouping()
+ .setRoot(Group().setId(NullResultNode()))
+ .addLevel(GroupingLevel()
+ .addAggregationResult(SumAggregationResult())
+ .addOrderBy(AggregationRefNode(0), false));
+ EXPECT_TRUE(request2.needResort());
+ request2.setTopN(0);
+ EXPECT_TRUE(request2.needResort());
+ request2.setTopN(1);
+ EXPECT_TRUE(!request2.needResort());
+ request2.setTopN(100);
+ EXPECT_TRUE(!request2.needResort());
+ }
+}
+
+/**
+ * Test that simple counting works as long as we use an expression
+ * that we init, calculate and ignore.
+ **/
+void
+Test::testCount()
+{
+ AggregationContext ctx;
+ ctx.result().add(0).add(1).add(2);
+ ctx.add(IntAttrBuilder("foo").add(3).add(7).add(15).sp());
+
+ Grouping request = Grouping()
+ .setRoot(Group().setId(NullResultNode())
+ .addResult(CountAggregationResult()
+ .setExpression(ConstantNode(Int64ResultNode(0)))
+ )
+ );
+
+ Group expect = Group().setId(NullResultNode())
+ .addResult(CountAggregationResult().setCount(3)
+ .setExpression(ConstantNode(Int64ResultNode(0)))
+ );
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+}
+
+//-----------------------------------------------------------------------------
+
+bool
+Test::checkHits(const Grouping &g, uint32_t first, uint32_t last, uint32_t cnt)
+{
+ CountFS4Hits pop;
+ Grouping tmp = g;
+ tmp.setFirstLevel(first).setLastLevel(last).select(pop, pop);
+ return EXPECT_EQUAL(pop.getHitCount(), cnt);
+}
+
+void
+Test::testFS4HitCollection()
+{
+ { // aggregation
+ AggregationContext ctx;
+ ctx.result().add(30, 30.0).add(20, 20.0).add(10, 10.0).add(5, 5.0).add(25, 25.0);
+
+ Grouping request = Grouping()
+ .setRoot(Group().setId(NullResultNode())
+ .addResult(HitsAggregationResult()
+ .setMaxHits(3)
+ .setExpression(ConstantNode(Int64ResultNode(0))))
+ );
+
+ Group expect = Group().setId(NullResultNode())
+ .addResult(HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(FS4Hit(30, 30.0))
+ .addHit(FS4Hit(25, 25.0))
+ .addHit(FS4Hit(20, 20.0))
+ .sort()
+ .setExpression(ConstantNode(Int64ResultNode(0))));
+
+ EXPECT_TRUE(testAggregation(ctx, request, expect));
+ }
+ { // merging
+
+ Grouping request = Grouping()
+ .setRoot(Group()
+ .addResult(HitsAggregationResult()
+ .setMaxHits(3)
+ .setExpression(ConstantNode(Int64ResultNode(0))))
+ );
+
+ Group expect = Group()
+ .setId(NullResultNode())
+ .addResult(HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(FS4Hit(30, 30.0))
+ .addHit(FS4Hit(20, 20.0))
+ .addHit(FS4Hit(10, 10.0))
+ .sort()
+ .setExpression(ConstantNode(Int64ResultNode(0))));
+
+ Group a = Group()
+ .setId(NullResultNode())
+ .addResult(HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(FS4Hit(10, 10.0))
+ .addHit(FS4Hit(1, 5.0))
+ .addHit(FS4Hit(2, 4.0))
+ .sort()
+ .setExpression(ConstantNode(Int64ResultNode(0))));
+
+ Group b = Group()
+ .setId(NullResultNode())
+ .addResult(HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(FS4Hit(20, 20.0))
+ .addHit(FS4Hit(3, 7.0))
+ .addHit(FS4Hit(4, 6.0))
+ .sort()
+ .setExpression(ConstantNode(Int64ResultNode(0))));
+
+ Group c = Group()
+ .setId(NullResultNode())
+ .addResult(HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(FS4Hit(30, 30.0))
+ .addHit(FS4Hit(5, 9.0))
+ .addHit(FS4Hit(6, 8.0))
+ .sort()
+ .setExpression(ConstantNode(Int64ResultNode(0))));
+
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), request.unchain().setRoot(c), expect));
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(c), request.unchain().setRoot(a), expect));
+ EXPECT_TRUE(testMerge(request.unchain().setRoot(c), request.unchain().setRoot(a), request.unchain().setRoot(b), expect));
+ }
+ { // count hits (for external object selection)
+ HitsAggregationResult dummyHits = HitsAggregationResult()
+ .setMaxHits(3)
+ .addHit(FS4Hit(1, 3.0))
+ .addHit(FS4Hit(2, 2.0))
+ .addHit(FS4Hit(3, 1.0))
+ .sort();
+ Grouping g = Grouping().setRoot(Group().addResult(dummyHits)
+ .addChild(Group().addResult(dummyHits)
+ .addChild(Group().addResult(dummyHits))
+ )
+ .addChild(Group().addResult(dummyHits)
+ .addChild(Group().addResult(dummyHits)
+ .addChild(Group().addResult(dummyHits))
+ )
+ )
+ );
+ EXPECT_TRUE(checkHits(g, 0, 0, 3));
+ EXPECT_TRUE(checkHits(g, 1, 1, 6));
+ EXPECT_TRUE(checkHits(g, 2, 2, 6));
+ EXPECT_TRUE(checkHits(g, 3, 3, 3));
+ EXPECT_TRUE(checkHits(g, 4, 4, 0));
+
+ EXPECT_TRUE(checkHits(g, 0, 1, 9));
+ EXPECT_TRUE(checkHits(g, 0, 2, 15));
+ EXPECT_TRUE(checkHits(g, 0, 3, 18));
+ EXPECT_TRUE(checkHits(g, 0, 4, 18));
+ EXPECT_TRUE(checkHits(g, 1, 4, 15));
+ EXPECT_TRUE(checkHits(g, 2, 4, 9));
+ EXPECT_TRUE(checkHits(g, 3, 4, 3));
+
+ EXPECT_TRUE(checkHits(g, 1, 2, 12));
+ EXPECT_TRUE(checkHits(g, 2, 3, 9));
+ EXPECT_TRUE(checkHits(g, 3, 4, 3));
+ EXPECT_TRUE(checkHits(g, 4, 5, 0));
+ }
+}
+
+bool
+Test::checkBucket(const NumericResultNode &width, const NumericResultNode &value, const BucketResultNode &bucket)
+{
+ AggregationContext ctx;
+ ctx.result().add(0);
+ if (value.getClass().inherits(IntegerResultNode::classId)) {
+ ctx.add(IntAttrBuilder("attr").add(value.getInteger()).sp());
+ } else if (value.getClass().inherits(FloatResultNode::classId)) {
+ ctx.add(FloatAttrBuilder("attr").add(value.getFloat()).sp());
+ } else {
+ return EXPECT_TRUE(false);
+ }
+ Grouping request = Grouping().setRoot(Group().setId(NullResultNode()))
+ .addLevel(GroupingLevel()
+ .setExpression(FixedWidthBucketFunctionNode(AttributeNode("attr")).setWidth(width)));
+ Group expect = Group().setId(NullResultNode()).addChild(Group().setId(bucket));
+ return testAggregation(ctx, request, expect);
+}
+
+void
+Test::testFixedWidthBuckets()
+{
+ typedef Int64ResultNode Int;
+ typedef FloatResultNode Float;
+ typedef IntegerBucketResultNode IntBucket;
+ typedef FloatBucketResultNode FloatBucket;
+
+ // positive int buckets
+ EXPECT_TRUE(checkBucket(Int(10), Int(0), IntBucket(0,10)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(5), IntBucket(0,10)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(9), IntBucket(0,10)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(10), IntBucket(10,20)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(299), IntBucket(290,300)));
+
+ // negative int buckets
+ EXPECT_TRUE(checkBucket(Int(10), Int(-1), IntBucket(-10,0)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(-5), IntBucket(-10,0)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(-10), IntBucket(-10,0)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(-11), IntBucket(-20,-10)));
+ EXPECT_TRUE(checkBucket(Int(10), Int(-300), IntBucket(-300,-290)));
+
+ // positive float buckets
+ EXPECT_TRUE(checkBucket(Int(10), Float(0.0), FloatBucket(0.0,10.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(5.0), FloatBucket(0.0,10.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(9.0), FloatBucket(0.0,10.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(10.0), FloatBucket(10.0,20.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(299.0), FloatBucket(290.0,300.0)));
+
+ // negative float buckets
+ EXPECT_TRUE(checkBucket(Int(10), Float(-1), FloatBucket(-10.0,0.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(-5), FloatBucket(-10.0,0.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(-10), FloatBucket(-10.0,0.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(-10.0000001), FloatBucket(-20.0,-10.0)));
+ EXPECT_TRUE(checkBucket(Int(10), Float(-300), FloatBucket(-300.0,-290.0)));
+
+ // non-integer bucket width
+ EXPECT_TRUE(checkBucket(Float(0.5), Float(0.0), FloatBucket(0.0,0.5)));
+ EXPECT_TRUE(checkBucket(Float(0.5), Float(0.5), FloatBucket(0.5,1.0)));
+ EXPECT_TRUE(checkBucket(Float(0.5), Float(0.4999), FloatBucket(0.0,0.5)));
+ EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.0001), FloatBucket(-0.5,0.0)));
+ EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.5), FloatBucket(-0.5,0.0)));
+ EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.50001), FloatBucket(-1.0,-0.5)));
+
+ // zero-width buckets
+ EXPECT_TRUE(checkBucket(Int(0), Int(7), IntBucket(7,7)));
+ EXPECT_TRUE(checkBucket(Int(0), Float(7.5), FloatBucket(7.5,7.5)));
+
+ // bucket wrap protection
+ {
+ int64_t x = std::numeric_limits<int64_t>::min();
+ int64_t y = std::numeric_limits<int64_t>::max();
+ EXPECT_TRUE(checkBucket(Int(1000), Int(x + 5), IntBucket(x, (x/1000) * 1000)));
+ EXPECT_TRUE(checkBucket(Int(1000), Int(y - 5), IntBucket((y/1000) * 1000, y)));
+ }
+}
+
+
+void
+Test::testNanSorting()
+{
+ // Attempt at reproducing issue with segfault when setting NaN value. Not
+ // successful yet, so no point in running test.
+#if 0
+ double nan = sqrt(-1);
+ EXPECT_TRUE(isnan(nan));
+ EXPECT_TRUE(nan != nan);
+ EXPECT_FALSE(nan < nan);
+ EXPECT_FALSE(nan > nan);
+ EXPECT_FALSE(nan < 0.2);
+ EXPECT_FALSE(nan > 0.2);
+ EXPECT_FALSE(0.2 < nan);
+ EXPECT_FALSE(0.2 > nan);
+
+ FastOS_Time timer;
+ timer.SetNow();
+ std::vector<double> groups;
+ while (timer.MilliSecsToNow() < 60000.0) {
+ std::vector<double> vec;
+ srand((unsigned int)timer.MilliSecs());
+ size_t limit = 2345678;
+ size_t mod = rand() % limit;
+ for (size_t i = 0; i < limit; i++) {
+ if ((i % mod) == 0)
+ vec.push_back(nan);
+ else
+ vec.push_back(1.0 * rand());
+ }
+ }
+ std::sort(groups.begin(), groups.end());
+#endif
+}
+
+void
+Test::testThatNanIsConverted()
+{
+ Group g;
+ double nan = sqrt(-1);
+ g.setRank(nan);
+ // Must have been changed for this to work.
+ ASSERT_EQUAL(g.getRank(), g.getRank());
+}
+
+void
+Test::testGroupingEngineFromRequest()
+{
+ AggregationContext ctx;
+ ctx.add(IntAttrBuilder("attr0").add(10).add(10).sp());
+ ctx.add(IntAttrBuilder("attr1").add(11).add(11).sp());
+ ctx.add(IntAttrBuilder("attr2").add(12).add(12).sp());
+ ctx.add(IntAttrBuilder("attr3").add(13).add(13).sp());
+ ctx.result().add(0).add(1);
+ Grouping baseRequest = Grouping()
+ .setRoot(Group()
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr0"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr1"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr2"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr2"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr3"))))
+ .addLevel(GroupingLevel()
+ .setExpression(AttributeNode("attr3"))
+ .addResult(SumAggregationResult()
+ .setExpression(AttributeNode("attr1"))));
+ ctx.setup(baseRequest);
+ GroupingEngine engine(baseRequest.setFirstLevel(0).setLastLevel(2));
+ EXPECT_EQUAL(4u, engine.getEngines().size());
+}
+
+//-----------------------------------------------------------------------------
+
+struct RunDiff { ~RunDiff() { system("diff -u lhs.out rhs.out > diff.txt"); }};
+
+//-----------------------------------------------------------------------------
+
+int
+Test::Main()
+{
+ RunDiff runDiff;
+ (void) runDiff;
+ TEST_DEBUG("lhs.out", "rhs.out");
+ TEST_INIT("groupingengine_test");
+ testGroupingEngineFromRequest();
+ testAggregationSimple();
+ testAggregationLevels();
+ testAggregationMaxGroups();
+ testAggregationGroupOrder();
+ testAggregationGroupRank();
+ testAggregationGroupCapping();
+#if 0
+ testMergeSimpleSum();
+ testMergeLevels();
+ testMergeGroups();
+ testMergeTrees();
+ testPruneSimple();
+ testPruneComplex();
+ testPartialMerging();
+#endif
+ testFS4HitCollection();
+ testFixedWidthBuckets();
+ testCount();
+ testTopN();
+ testThatNanIsConverted();
+ testNanSorting();
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/hitcollector/.gitignore b/searchlib/src/tests/hitcollector/.gitignore
new file mode 100644
index 00000000000..a4313eb2184
--- /dev/null
+++ b/searchlib/src/tests/hitcollector/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+hitcollector_test
+searchlib_hitcollector_test_app
diff --git a/searchlib/src/tests/hitcollector/CMakeLists.txt b/searchlib/src/tests/hitcollector/CMakeLists.txt
new file mode 100644
index 00000000000..c2b130b2890
--- /dev/null
+++ b/searchlib/src/tests/hitcollector/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_hitcollector_test_app
+ SOURCES
+ hitcollector_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_hitcollector_test_app COMMAND searchlib_hitcollector_test_app)
diff --git a/searchlib/src/tests/hitcollector/DESC b/searchlib/src/tests/hitcollector/DESC
new file mode 100644
index 00000000000..a8751d4a1fe
--- /dev/null
+++ b/searchlib/src/tests/hitcollector/DESC
@@ -0,0 +1 @@
+hitcollector test. Take a look at hitcollector.cpp for details.
diff --git a/searchlib/src/tests/hitcollector/FILES b/searchlib/src/tests/hitcollector/FILES
new file mode 100644
index 00000000000..88a0d4ba4b3
--- /dev/null
+++ b/searchlib/src/tests/hitcollector/FILES
@@ -0,0 +1 @@
+hitcollector.cpp
diff --git a/searchlib/src/tests/hitcollector/hitcollector_test.cpp b/searchlib/src/tests/hitcollector/hitcollector_test.cpp
new file mode 100644
index 00000000000..ec7c74913af
--- /dev/null
+++ b/searchlib/src/tests/hitcollector/hitcollector_test.cpp
@@ -0,0 +1,493 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("hitcollector_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <iostream>
+
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/queryeval/hitcollector.h>
+#include <vespa/searchlib/queryeval/scores.h>
+
+using namespace search;
+using namespace search::fef;
+using namespace search::queryeval;
+
+typedef std::map<uint32_t, feature_t> ScoreMap;
+
+struct BasicScorer : public HitCollector::DocumentScorer
+{
+ feature_t _scoreDelta;
+ BasicScorer(feature_t scoreDelta) : _scoreDelta(scoreDelta) {}
+ virtual feature_t score(uint32_t docId) {
+ return docId + _scoreDelta;
+ }
+};
+
+struct PredefinedScorer : public HitCollector::DocumentScorer
+{
+ ScoreMap _scores;
+ PredefinedScorer(const ScoreMap &scores) : _scores(scores) {}
+ virtual feature_t score(uint32_t docId) {
+ feature_t retval = 0.0;
+ auto itr = _scores.find(docId);
+ if (itr != _scores.end()) {
+ retval = itr->second;
+ }
+ return retval;
+ }
+};
+
+void checkResult(const ResultSet & rs, const std::vector<RankedHit> & exp)
+{
+ if (exp.size() > 0) {
+ const RankedHit * rh = rs.getArray();
+ ASSERT_TRUE(rh != NULL);
+ ASSERT_EQUAL(rs.getArrayUsed(), exp.size());
+
+ for (uint32_t i = 0; i < exp.size(); ++i) {
+#if 0
+ std::cout << " rh[" << i << "]._docId = " << rh[i]._docId << std::endl;
+ std::cout << "exp[" << i << "]._docId = " << exp[i]._docId << std::endl;
+ std::cout << " rh[" << i << "]._rankValue = " << rh[i]._rankValue << std::endl;
+ std::cout << "exp[" << i << "]._rankValue = " << exp[i]._rankValue << std::endl;
+#endif
+ EXPECT_EQUAL(rh[i]._docId, exp[i]._docId);
+ EXPECT_EQUAL(rh[i]._rankValue, exp[i]._rankValue);
+ }
+ } else {
+ ASSERT_TRUE(rs.getArray() == NULL);
+ }
+}
+
+void checkResult(ResultSet & rs, BitVector * exp)
+{
+ if (exp != NULL) {
+ BitVector * bv = rs.getBitOverflow();
+ ASSERT_TRUE(bv != NULL);
+ bv->invalidateCachedCount();
+ exp->invalidateCachedCount();
+ LOG(info, "bv.hits: %u, exp.hits: %u", bv->countTrueBits(), exp->countTrueBits());
+ ASSERT_TRUE(bv->countTrueBits() == exp->countTrueBits());
+ EXPECT_TRUE(*bv == *exp);
+ } else {
+ ASSERT_TRUE(rs.getBitOverflow() == NULL);
+ }
+}
+
+void testAddHit(uint32_t numDocs, uint32_t maxHitsSize, uint32_t maxHeapSize)
+{
+
+ LOG(info, "testAddHit: no hits");
+ { // no hits
+ HitCollector hc(numDocs, maxHitsSize, maxHeapSize);
+ std::vector<RankedHit> expRh;
+
+ std::unique_ptr<ResultSet> rs = hc.getResultSet();
+ TEST_DO(checkResult(*rs.get(), expRh));
+ TEST_DO(checkResult(*rs.get(), NULL));
+ }
+
+ LOG(info, "testAddHit: only ranked hits");
+ { // only ranked hits
+ HitCollector hc(numDocs, maxHitsSize, maxHeapSize);
+ std::vector<RankedHit> expRh;
+
+ for (uint32_t i = 0; i < maxHitsSize; ++i) {
+ hc.addHit(i, i + 100);
+
+ // build expected result set as we go along
+ expRh.push_back(RankedHit());
+ expRh.back()._docId = i;
+ expRh.back()._rankValue = i + 100;
+ }
+
+ std::unique_ptr<ResultSet> rs = hc.getResultSet();
+ TEST_DO(checkResult(*rs.get(), expRh));
+ TEST_DO(checkResult(*rs.get(), NULL));
+ }
+
+ LOG(info, "testAddHit: both ranked hits and bit vector hits");
+ { // both ranked hits and bit vector hits
+ HitCollector hc(numDocs, maxHitsSize, maxHeapSize);
+ std::vector<RankedHit> expRh;
+ BitVector::UP expBv(BitVector::create(numDocs));
+
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ hc.addHit(i, i + 100);
+
+ // build expected result set as we go along
+ expBv->setBit(i);
+ if (i >= (numDocs - maxHitsSize)) {
+ expRh.push_back(RankedHit());
+ expRh.back()._docId = i;
+ expRh.back()._rankValue = i + 100;
+ }
+ }
+
+ std::unique_ptr<ResultSet> rs = hc.getResultSet();
+ TEST_DO(checkResult(*rs.get(), expRh));
+ TEST_DO(checkResult(*rs.get(), expBv.get()));
+ }
+}
+
+TEST("testAddHit") {
+ TEST_DO(testAddHit(30, 10, 5));
+ TEST_DO(testAddHit(30, 10, 0));
+ TEST_DO(testAddHit(400, 10, 5)); // 400/32 = 12 which is bigger than 10.
+ TEST_DO(testAddHit(400, 10, 0));
+}
+
+struct Fixture {
+ HitCollector hc;
+ BitVector::UP expBv;
+ BasicScorer scorer;
+
+ Fixture()
+ : hc(20, 10, 5), expBv(BitVector::create(20)), scorer(200)
+ {
+ }
+ virtual ~Fixture() {}
+ virtual HitRank calculateScore(uint32_t) { return 0; }
+ void addHits() {
+ for (uint32_t i = 0; i < 20; ++i) {
+ hc.addHit(i, calculateScore(i));
+ expBv->setBit(i);
+ }
+ }
+ size_t reRank() {
+ return hc.reRank(scorer);
+ }
+ size_t reRank(size_t count) {
+ return hc.reRank(scorer, count);
+ }
+};
+
+struct AscendingScoreFixture : Fixture {
+ AscendingScoreFixture() : Fixture() {}
+ virtual HitRank calculateScore(uint32_t i) {
+ return i + 100;
+ }
+};
+
+struct DescendingScoreFixture : Fixture {
+ DescendingScoreFixture() : Fixture() {}
+ virtual HitRank calculateScore(uint32_t i) {
+ return 100 - i;
+ }
+};
+
+TEST_F("testReRank - empty", Fixture) {
+ EXPECT_EQUAL(0u, f.reRank());
+}
+
+TEST_F("testReRank - ascending", AscendingScoreFixture)
+{
+ f.addHits();
+ EXPECT_EQUAL(5u, f.reRank());
+
+ std::vector<RankedHit> expRh;
+ for (uint32_t i = 10; i < 20; ++i) { // 10 last are the best
+ expRh.push_back(RankedHit(i, f.calculateScore(i)));
+ if (i >= 15) { // hits from heap (5 last)
+ expRh.back()._rankValue = i + 200; // after reranking
+ }
+ }
+ EXPECT_EQUAL(expRh.size(), 10u);
+
+ std::unique_ptr<ResultSet> rs = f.hc.getResultSet();
+ TEST_DO(checkResult(*rs.get(), expRh));
+ TEST_DO(checkResult(*rs.get(), f.expBv.get()));
+}
+
+TEST_F("testReRank - descending", DescendingScoreFixture)
+{
+ f.addHits();
+ EXPECT_EQUAL(5u, f.reRank());
+
+ std::vector<RankedHit> expRh;
+ for (uint32_t i = 0; i < 10; ++i) { // 10 first are the best
+ expRh.push_back(RankedHit(i, f.calculateScore(i)));
+ if (i < 5) { // hits from heap (5 first)
+ expRh.back()._rankValue = i + 200; // after reranking
+ }
+ }
+ EXPECT_EQUAL(expRh.size(), 10u);
+
+ std::unique_ptr<ResultSet> rs = f.hc.getResultSet();
+ TEST_DO(checkResult(*rs.get(), expRh));
+ TEST_DO(checkResult(*rs.get(), f.expBv.get()));
+}
+
+TEST_F("testReRank - partial", AscendingScoreFixture)
+{
+ f.addHits();
+ EXPECT_EQUAL(3u, f.reRank(3));
+
+ std::vector<RankedHit> expRh;
+ for (uint32_t i = 10; i < 20; ++i) { // 10 last are the best
+ expRh.push_back(RankedHit(i, f.calculateScore(i)));
+ if (i >= 17) { // hits from heap (3 last)
+ expRh.back()._rankValue = i + 200; // after reranking
+ }
+ }
+ EXPECT_EQUAL(expRh.size(), 10u);
+
+ std::unique_ptr<ResultSet> rs = f.hc.getResultSet();
+ TEST_DO(checkResult(*rs.get(), expRh));
+ TEST_DO(checkResult(*rs.get(), f.expBv.get()));
+}
+
+TEST_F("require that scores for 2nd phase candidates can be retrieved", DescendingScoreFixture)
+{
+ f.addHits();
+ std::vector<feature_t> scores = f.hc.getSortedHeapScores();
+ ASSERT_EQUAL(5u, scores.size());
+ EXPECT_EQUAL(100, scores[0]);
+ EXPECT_EQUAL(99, scores[1]);
+ EXPECT_EQUAL(98, scores[2]);
+ EXPECT_EQUAL(97, scores[3]);
+ EXPECT_EQUAL(96, scores[4]);
+}
+
+TEST("require that score ranges can be read and set.") {
+ std::pair<Scores, Scores> ranges =
+ std::make_pair(Scores(1.0, 2.0), Scores(3.0, 4.0));
+ HitCollector hc(20, 10, 5);
+ hc.setRanges(ranges);
+ EXPECT_EQUAL(ranges.first.low, hc.getRanges().first.low);
+ EXPECT_EQUAL(ranges.first.high, hc.getRanges().first.high);
+ EXPECT_EQUAL(ranges.second.low, hc.getRanges().second.low);
+ EXPECT_EQUAL(ranges.second.high, hc.getRanges().second.high);
+}
+
+TEST("testNoHitsToReRank") {
+ uint32_t numDocs = 20;
+ uint32_t maxHitsSize = 10;
+
+ LOG(info, "testNoMDHeap: test it");
+ {
+ HitCollector hc(numDocs, maxHitsSize, 0);
+ std::vector<RankedHit> expRh;
+
+ for (uint32_t i = 0; i < maxHitsSize; ++i) {
+ hc.addHit(i, i + 100);
+
+ // build expected result set as we go along
+ expRh.push_back(RankedHit());
+ expRh.back()._docId = i;
+ expRh.back()._rankValue = i + 100;
+ }
+
+ std::unique_ptr<ResultSet> rs = hc.getResultSet();
+ TEST_DO(checkResult(*rs.get(), expRh));
+ TEST_DO(checkResult(*rs.get(), NULL));
+ }
+}
+
+void testScaling(const std::vector<feature_t> &initScores,
+ const ScoreMap &finalScores,
+ const std::vector<RankedHit> &expected)
+{
+ HitCollector hc(5, 5, 2);
+
+ // first phase ranking
+ for (uint32_t i = 0; i < 5; ++i) {
+ hc.addHit(i, initScores[i]);
+ }
+
+ PredefinedScorer scorer(finalScores);
+ // perform second phase ranking
+ EXPECT_EQUAL(2u, hc.reRank(scorer));
+
+ // check results
+ std::unique_ptr<ResultSet> rs = hc.getResultSet();
+ TEST_DO(checkResult(*rs.get(), expected));
+}
+
+TEST("testScaling") {
+ std::vector<feature_t> initScores(5);
+ initScores[0] = 1000;
+ initScores[1] = 2000;
+ initScores[2] = 3000;
+ initScores[3] = 4000;
+ initScores[4] = 5000;
+
+ // expected final rank scores
+ std::vector<RankedHit> exp(5);
+ for (uint32_t i = 0; i < 5; ++i) {
+ exp[i]._docId = i;
+ }
+
+ { // scale down and adjust down
+ exp[0]._rankValue = 0; // scaled
+ exp[1]._rankValue = 100; // scaled
+ exp[2]._rankValue = 200; // scaled
+ exp[3]._rankValue = 300; // from heap
+ exp[4]._rankValue = 400; // from heap
+
+ // second phase ranking scores
+ ScoreMap finalScores;
+ finalScores[3] = 300;
+ finalScores[4] = 400;
+
+ testScaling(initScores, finalScores, exp);
+ }
+ { // scale down and adjust up
+ exp[0]._rankValue = 200; // scaled
+ exp[1]._rankValue = 300; // scaled
+ exp[2]._rankValue = 400; // scaled
+ exp[3]._rankValue = 500; // from heap
+ exp[4]._rankValue = 600; // from heap
+
+ // second phase ranking scores
+ ScoreMap finalScores;
+ finalScores[3] = 500;
+ finalScores[4] = 600;
+
+ testScaling(initScores, finalScores, exp);
+ }
+ { // scale up and adjust down
+
+ exp[0]._rankValue = -500; // scaled (-500)
+ exp[1]._rankValue = 750; // scaled
+ exp[2]._rankValue = 2000; // scaled
+ exp[3]._rankValue = 3250; // from heap
+ exp[4]._rankValue = 4500; // from heap
+
+ // second phase ranking scores
+ ScoreMap finalScores;
+ finalScores[3] = 3250;
+ finalScores[4] = 4500;
+
+ testScaling(initScores, finalScores, exp);
+ }
+ { // minimal scale (second phase range = 0 (4 - 4) -> 1)
+ exp[0]._rankValue = 1; // scaled
+ exp[1]._rankValue = 2; // scaled
+ exp[2]._rankValue = 3; // scaled
+ exp[3]._rankValue = 4; // from heap
+ exp[4]._rankValue = 4; // from heap
+
+ // second phase ranking scores
+ ScoreMap finalScores;
+ finalScores[3] = 4;
+ finalScores[4] = 4;
+
+ testScaling(initScores, finalScores, exp);
+ }
+ { // minimal scale (first phase range = 0 (4000 - 4000) -> 1)
+ std::vector<feature_t> is(initScores);
+ is[4] = 4000;
+ exp[0]._rankValue = -299600; // scaled
+ exp[1]._rankValue = -199600; // scaled
+ exp[2]._rankValue = -99600; // scaled
+ exp[3]._rankValue = 400; // from heap
+ exp[4]._rankValue = 500; // from heap
+
+ // second phase ranking scores
+ ScoreMap finalScores;
+ finalScores[3] = 400;
+ finalScores[4] = 500;
+
+ testScaling(is, finalScores, exp);
+ }
+}
+
+TEST("testOnlyBitVector") {
+ uint32_t numDocs = 20;
+ LOG(info, "testOnlyBitVector: test it");
+ {
+ HitCollector hc(numDocs, 0, 0);
+ BitVector::UP expBv(BitVector::create(numDocs));
+
+ for (uint32_t i = 0; i < numDocs; i += 2) {
+ hc.addHit(i, i + 100);
+ // build expected result set as we go along
+ expBv->setBit(i);
+ }
+
+ std::unique_ptr<ResultSet> rs = hc.getResultSet();
+ std::vector<RankedHit> expRh;
+ TEST_DO(checkResult(*rs.get(), expRh)); // no ranked hits
+ TEST_DO(checkResult(*rs.get(), expBv.get())); // only bit vector
+ }
+}
+
+struct MergeResultSetFixture {
+ const uint32_t numDocs;
+ const uint32_t maxHitsSize;
+ const uint32_t maxHeapSize;
+ HitCollector hc;
+ MergeResultSetFixture()
+ : numDocs(100), maxHitsSize(80), maxHeapSize(30), hc(numDocs * 32, maxHitsSize, maxHeapSize)
+ {}
+};
+
+TEST_F("require that result set is merged correctly with first phase ranking",
+ MergeResultSetFixture)
+{
+ std::vector<RankedHit> expRh;
+ for (uint32_t i = 0; i < f.numDocs; ++i) {
+ f.hc.addHit(i, i + 1000);
+
+ // build expected result set
+ expRh.push_back(RankedHit());
+ expRh.back()._docId = i;
+ // only the maxHitsSize best hits gets a score
+ expRh.back()._rankValue = (i < f.numDocs - f.maxHitsSize) ? 0 : i + 1000;
+ }
+ std::unique_ptr<ResultSet> rs = f.hc.getResultSet();
+ TEST_DO(checkResult(*rs.get(), expRh));
+}
+
+void
+addExpectedHitForMergeTest(const MergeResultSetFixture &f, std::vector<RankedHit> &expRh, uint32_t docId)
+{
+ expRh.push_back(RankedHit());
+ expRh.back()._docId = docId;
+ if (docId < f.numDocs - f.maxHitsSize) { // only the maxHitsSize best hits gets a score
+ expRh.back()._rankValue = 0;
+ } else if (docId < f.numDocs - f.maxHeapSize) { // only first phase ranking
+ expRh.back()._rankValue = docId + 500; // adjusted with - 500
+ } else { // second phase ranking on the maxHeapSize best hits
+ expRh.back()._rankValue = docId + 500;
+ }
+}
+
+TEST_F("require that result set is merged correctly with second phase ranking (document scorer)",
+ MergeResultSetFixture)
+{
+ // with second phase ranking that triggers rescoring / scaling
+ BasicScorer scorer(500); // second phase ranking setting score to docId + 500
+ std::vector<RankedHit> expRh;
+ for (uint32_t i = 0; i < f.numDocs; ++i) {
+ f.hc.addHit(i, i + 1000);
+ addExpectedHitForMergeTest(f, expRh, i);
+ }
+ EXPECT_EQUAL(f.maxHeapSize, f.hc.reRank(scorer));
+ std::unique_ptr<ResultSet> rs = f.hc.getResultSet();
+ TEST_DO(checkResult(*rs.get(), expRh));
+}
+
+TEST("require that hits can be added out of order") {
+ HitCollector hc(1000, 100, 10);
+ std::vector<RankedHit> expRh;
+ // produce expected result in normal order
+ for (uint32_t i = 0; i < 5; ++i) {
+ expRh.push_back(RankedHit());
+ expRh.back()._docId = i;
+ expRh.back()._rankValue = i + 100;
+ }
+ // add results in reverse order
+ for (uint32_t i = 5; i-- > 0; ) {
+ hc.addHit(i, i + 100);
+ }
+ std::unique_ptr<ResultSet> rs = hc.getResultSet();
+ TEST_DO(checkResult(*rs.get(), expRh));
+ TEST_DO(checkResult(*rs.get(), nullptr));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/index/docbuilder/.gitignore b/searchlib/src/tests/index/docbuilder/.gitignore
new file mode 100644
index 00000000000..999644fce87
--- /dev/null
+++ b/searchlib/src/tests/index/docbuilder/.gitignore
@@ -0,0 +1,5 @@
+*_test
+.depend
+Makefile
+docbuilder_test
+searchlib_docbuilder_test_app
diff --git a/searchlib/src/tests/index/docbuilder/CMakeLists.txt b/searchlib/src/tests/index/docbuilder/CMakeLists.txt
new file mode 100644
index 00000000000..de382bcc2fe
--- /dev/null
+++ b/searchlib/src/tests/index/docbuilder/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_docbuilder_test_app
+ SOURCES
+ docbuilder_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_docbuilder_test_app COMMAND searchlib_docbuilder_test_app)
diff --git a/searchlib/src/tests/index/docbuilder/DESC b/searchlib/src/tests/index/docbuilder/DESC
new file mode 100644
index 00000000000..514903f9988
--- /dev/null
+++ b/searchlib/src/tests/index/docbuilder/DESC
@@ -0,0 +1 @@
+ildocbuilder test. Take a look at ildocbuilder.cpp for details.
diff --git a/searchlib/src/tests/index/docbuilder/FILES b/searchlib/src/tests/index/docbuilder/FILES
new file mode 100644
index 00000000000..4d90f226fb4
--- /dev/null
+++ b/searchlib/src/tests/index/docbuilder/FILES
@@ -0,0 +1 @@
+ildocbuilder.cpp
diff --git a/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp b/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp
new file mode 100644
index 00000000000..06599834ab5
--- /dev/null
+++ b/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp
@@ -0,0 +1,531 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/* -*- mode: C++; coding: utf-8; -*- */
+
+/* $Id$
+ *
+ * Copyright (C) 2011 Yahoo! Technologies Norway AS
+ *
+ * All Rights Reserved
+ *
+ */
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("docbuilder_test");
+#include <boost/algorithm/string/classification.hpp>
+#include <boost/algorithm/string/split.hpp>
+#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/vespalib/encoding/base64.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/document/repo/fixedtyperepo.h>
+
+using namespace document;
+
+namespace search {
+namespace index {
+
+namespace
+{
+std::string empty;
+}
+
+namespace linguistics
+{
+const vespalib::string SPANTREE_NAME("linguistics");
+}
+
+class Test : public vespalib::TestApp {
+private:
+ void testBuilder();
+public:
+ int Main();
+};
+
+void
+Test::testBuilder()
+{
+ Schema s;
+ s.addIndexField(Schema::IndexField("ia", Schema::STRING));
+ s.addIndexField(Schema::IndexField("ib", Schema::STRING, Schema::ARRAY));
+ s.addIndexField(Schema::IndexField("ic", Schema::STRING, Schema::WEIGHTEDSET));
+ s.addUriIndexFields(Schema::IndexField("iu", Schema::STRING));
+ s.addUriIndexFields(Schema::IndexField("iau",
+ Schema::STRING,
+ Schema::ARRAY));
+ s.addUriIndexFields(Schema::IndexField("iwu",
+ Schema::STRING,
+ Schema::WEIGHTEDSET));
+ s.addAttributeField(Schema::AttributeField("aa", Schema::INT32));
+ s.addAttributeField(Schema::AttributeField("ab", Schema::FLOAT));
+ s.addAttributeField(Schema::AttributeField("ac", Schema::STRING));
+ s.addAttributeField(Schema::AttributeField("ad", Schema::INT32, Schema::ARRAY));
+ s.addAttributeField(Schema::AttributeField("ae", Schema::FLOAT, Schema::ARRAY));
+ s.addAttributeField(Schema::AttributeField("af", Schema::STRING, Schema::ARRAY));
+ s.addAttributeField(Schema::AttributeField("ag", Schema::INT32, Schema::WEIGHTEDSET));
+ s.addAttributeField(Schema::AttributeField("ah", Schema::FLOAT, Schema::WEIGHTEDSET));
+ s.addAttributeField(Schema::AttributeField("ai", Schema::STRING, Schema::WEIGHTEDSET));
+ s.addAttributeField(Schema::AttributeField("asp1",
+ Schema::INT32));
+ s.addAttributeField(Schema::AttributeField("asp2",
+ Schema::INT64));
+ s.addAttributeField(Schema::AttributeField("aap1",
+ Schema::INT32,
+ Schema::ARRAY));
+ s.addAttributeField(Schema::AttributeField("aap2",
+ Schema::INT64,
+ Schema::ARRAY));
+ s.addAttributeField(Schema::AttributeField("awp1",
+ Schema::INT32,
+ Schema::WEIGHTEDSET));
+ s.addAttributeField(Schema::AttributeField("awp2",
+ Schema::INT64,
+ Schema::WEIGHTEDSET));
+
+ s.addSummaryField(Schema::SummaryField("sa", Schema::INT8));
+ s.addSummaryField(Schema::SummaryField("sb", Schema::INT16));
+ s.addSummaryField(Schema::SummaryField("sc", Schema::INT32));
+ s.addSummaryField(Schema::SummaryField("sd", Schema::INT64));
+ s.addSummaryField(Schema::SummaryField("se", Schema::FLOAT));
+ s.addSummaryField(Schema::SummaryField("sf", Schema::DOUBLE));
+ s.addSummaryField(Schema::SummaryField("sg", Schema::STRING));
+ s.addSummaryField(Schema::SummaryField("sh", Schema::RAW));
+ s.addSummaryField(Schema::SummaryField("si", Schema::RAW,
+ Schema::ARRAY));
+ s.addSummaryField(Schema::SummaryField("sj", Schema::RAW,
+ Schema::WEIGHTEDSET));
+
+ DocBuilder b(s);
+ Document::UP doc;
+ std::vector<std::string> lines;
+ std::vector<std::string>::const_iterator itr;
+ std::string xml;
+
+ { // empty
+ doc = b.startDocument("doc::0").endDocument();
+ xml = doc->toXml("");
+ boost::split(lines, xml, boost::is_any_of("\n"));
+ itr = lines.begin();
+ EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"doc::0\"/>", *itr++);
+ EXPECT_EQUAL("", *itr++);
+ EXPECT_TRUE(itr == lines.end());
+ }
+ { // all fields set
+ std::vector<char> binaryBlob;
+ binaryBlob.push_back('\0');
+ binaryBlob.push_back('\2');
+ binaryBlob.push_back('\1');
+ std::string raw1s("Single Raw Element");
+ std::string raw1a0("Array Raw Element 0");
+ std::string raw1a1("Array Raw Element 1");
+ std::string raw1w0("Weighted Set Raw Element 0");
+ std::string raw1w1("Weighted Set Raw Element 1");
+ raw1s += std::string(&binaryBlob[0],
+ &binaryBlob[0] + binaryBlob.size());
+ raw1a0 += std::string(&binaryBlob[0],
+ &binaryBlob[0] + binaryBlob.size());
+ raw1a1 += std::string(&binaryBlob[0],
+ &binaryBlob[0] + binaryBlob.size());
+ raw1w0 += std::string(&binaryBlob[0],
+ &binaryBlob[0] + binaryBlob.size());
+ raw1w1 += std::string(&binaryBlob[0],
+ &binaryBlob[0] + binaryBlob.size());
+ b.startDocument("doc::1");
+ b.startIndexField("ia").addStr("foo").addStr("bar").addStr("baz").addTermAnnotation("altbaz").endField();
+ b.startIndexField("ib").startElement().addStr("foo").endElement().
+ startElement(1).addStr("bar").addStr("baz").endElement().endField();
+ b. startIndexField("ic").
+ startElement(20).addStr("bar").addStr("baz").endElement().
+ startElement().addStr("foo").endElement().
+ endField();
+ b.startIndexField("iu").
+ startSubField("all").
+ addUrlTokenizedString("http://www.yahoo.com:81/fluke?ab=2#4").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.yahoo.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("81").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("4").
+ endSubField().
+ endField();
+ b.startIndexField("iau").
+ startElement(1).
+ startSubField("all").
+ addUrlTokenizedString("http://www.yahoo.com:82/fluke?ab=2#8").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.yahoo.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("82").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("8").
+ endSubField().
+ endElement().
+ startElement(1).
+ startSubField("all").
+ addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.flickr.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("82").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("9").
+ endSubField().
+ endElement().
+ endField();
+ b.startIndexField("iwu").
+ startElement(4).
+ startSubField("all").
+ addUrlTokenizedString("http://www.yahoo.com:83/fluke?ab=2#12").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.yahoo.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("83").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("12").
+ endSubField().
+ endElement().
+ startElement(7).
+ startSubField("all").
+ addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.flickr.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("85").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("13").
+ endSubField().
+ endElement().
+ endField();
+ b.startAttributeField("aa").addInt(2147483647).endField();
+ b.startAttributeField("ab").addFloat(1234.56).endField();
+ b.startAttributeField("ac").addStr("foo baz").endField();
+ b.startAttributeField("ad").startElement().addInt(10).endElement().endField();
+ b.startAttributeField("ae").startElement().addFloat(10.5).endElement().endField();
+ b.startAttributeField("af").startElement().addStr("foo").endElement().endField();
+ b.startAttributeField("ag").startElement(2).addInt(20).endElement().endField();
+ b.startAttributeField("ah").startElement(3).addFloat(20.5).endElement().endField();
+ b.startAttributeField("ai").startElement(4).addStr("bar").endElement().endField();
+ b.startAttributeField("asp1").addInt(1001).endField();
+ b.startAttributeField("asp2").addPosition(1002, 1003).endField();
+ b.startAttributeField("aap1").
+ startElement().addInt(1004).endElement().
+ startElement().addInt(1005).endElement().
+ endField();
+ b.startAttributeField("aap2").
+ startElement().addPosition(1006, 1007).endElement().
+ startElement().addPosition(1008, 1009).endElement().
+ endField();
+ b.startAttributeField("awp1").
+ startElement(41).addInt(1010).endElement().
+ startElement(42).addInt(1011).endElement().
+ endField();
+ b.startAttributeField("awp2").
+ startElement(43).addPosition(1012, 1013).endElement().
+ startElement(44).addPosition(1014, 1015).endElement().
+ endField();
+ b.startSummaryField("sa").addInt(127).endField();
+ b.startSummaryField("sb").addInt(32767).endField();
+ b.startSummaryField("sc").addInt(2147483647).endField();
+ b.startSummaryField("sd").addInt(2147483648).endField();
+ b.startSummaryField("se").addFloat(1234.56).endField();
+ b.startSummaryField("sf").addFloat(9876.54).endField();
+ b.startSummaryField("sg").addStr("foo bar").endField();
+ b.startSummaryField("sh").
+ addRaw(raw1s.c_str(), raw1s.size()).
+ endField();
+ b.startSummaryField("si").
+ startElement().
+ addRaw(raw1a0.c_str(), raw1a0.size()).
+ endElement().
+ startElement().
+ addRaw(raw1a1.c_str(), raw1a1.size()).
+ endElement().
+ endField();
+ b.startSummaryField("sj").
+ startElement(46).
+ addRaw(raw1w1.c_str(), raw1w1.size()).
+ endElement().
+ startElement(45).
+ addRaw(raw1w0.c_str(), raw1w0.size()).
+ endElement().
+ endField();
+ doc = b.endDocument();
+ xml = doc->toXml("");
+ boost::split(lines, xml, boost::is_any_of("\n"));
+ itr = lines.begin();
+ EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"doc::1\">", *itr++);
+ EXPECT_EQUAL("<ia>foo bar baz</ia>", *itr++);
+ EXPECT_EQUAL("<ib>", *itr++);
+ EXPECT_EQUAL("<item>foo</item>", *itr++);
+ EXPECT_EQUAL("<item>bar baz</item>", *itr++);
+ EXPECT_EQUAL("</ib>", *itr++);
+ EXPECT_EQUAL("<ic>", *itr++);
+ EXPECT_EQUAL("<item weight=\"20\">bar baz</item>", *itr++);
+ EXPECT_EQUAL("<item weight=\"1\">foo</item>", *itr++);
+ EXPECT_EQUAL("</ic>", *itr++);
+ EXPECT_EQUAL("<iu>", *itr++);
+ EXPECT_EQUAL("<all>http://www.yahoo.com:81/fluke?ab=2#4</all>", *itr++);
+ EXPECT_EQUAL("<host>www.yahoo.com</host>", *itr++);
+ EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
+ EXPECT_EQUAL("<path>/fluke</path>", *itr++);
+ EXPECT_EQUAL("<port>81</port>", *itr++);
+ EXPECT_EQUAL("<query>ab=2</query>", *itr++);
+ EXPECT_EQUAL("<fragment>4</fragment>", *itr++);
+ EXPECT_EQUAL("</iu>", *itr++);
+ EXPECT_EQUAL("<iau>", *itr++);
+ EXPECT_EQUAL("<item>", *itr++);
+ EXPECT_EQUAL("<all>http://www.yahoo.com:82/fluke?ab=2#8</all>", *itr++);
+ EXPECT_EQUAL("<host>www.yahoo.com</host>", *itr++);
+ EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
+ EXPECT_EQUAL("<path>/fluke</path>", *itr++);
+ EXPECT_EQUAL("<port>82</port>", *itr++);
+ EXPECT_EQUAL("<query>ab=2</query>", *itr++);
+ EXPECT_EQUAL("<fragment>8</fragment>", *itr++);
+ EXPECT_EQUAL("</item>", *itr++);
+ EXPECT_EQUAL("<item>", *itr++);
+ EXPECT_EQUAL("<all>http://www.flickr.com:82/fluke?ab=2#9</all>", *itr++);
+ EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++);
+ EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
+ EXPECT_EQUAL("<path>/fluke</path>", *itr++);
+ EXPECT_EQUAL("<port>82</port>", *itr++);
+ EXPECT_EQUAL("<query>ab=2</query>", *itr++);
+ EXPECT_EQUAL("<fragment>9</fragment>", *itr++);
+ EXPECT_EQUAL("</item>", *itr++);
+ EXPECT_EQUAL("</iau>", *itr++);
+ EXPECT_EQUAL("<iwu>", *itr++);
+ EXPECT_EQUAL("<item weight=\"4\">", *itr++);
+ EXPECT_EQUAL("<all>http://www.yahoo.com:83/fluke?ab=2#12</all>", *itr++);
+ EXPECT_EQUAL("<host>www.yahoo.com</host>", *itr++);
+ EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
+ EXPECT_EQUAL("<path>/fluke</path>", *itr++);
+ EXPECT_EQUAL("<port>83</port>", *itr++);
+ EXPECT_EQUAL("<query>ab=2</query>", *itr++);
+ EXPECT_EQUAL("<fragment>12</fragment>", *itr++);
+ EXPECT_EQUAL("</item>", *itr++);
+ EXPECT_EQUAL("<item weight=\"7\">", *itr++);
+ EXPECT_EQUAL("<all>http://www.flickr.com:85/fluke?ab=2#13</all>", *itr++);
+ EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++);
+ EXPECT_EQUAL("<scheme>http</scheme>", *itr++);
+ EXPECT_EQUAL("<path>/fluke</path>", *itr++);
+ EXPECT_EQUAL("<port>85</port>", *itr++);
+ EXPECT_EQUAL("<query>ab=2</query>", *itr++);
+ EXPECT_EQUAL("<fragment>13</fragment>", *itr++);
+ EXPECT_EQUAL("</item>", *itr++);
+ EXPECT_EQUAL("</iwu>", *itr++);
+ EXPECT_EQUAL("<aa>2147483647</aa>", *itr++);
+ EXPECT_EQUAL("<ab>1234.56</ab>", *itr++);
+ EXPECT_EQUAL("<ac>foo baz</ac>", *itr++);
+ EXPECT_EQUAL("<ad>", *itr++);
+ EXPECT_EQUAL("<item>10</item>", *itr++);
+ EXPECT_EQUAL("</ad>", *itr++);
+ EXPECT_EQUAL("<ae>", *itr++);
+ EXPECT_EQUAL("<item>10.5</item>", *itr++);
+ EXPECT_EQUAL("</ae>", *itr++);
+ EXPECT_EQUAL("<af>", *itr++);
+ EXPECT_EQUAL("<item>foo</item>", *itr++);
+ EXPECT_EQUAL("</af>", *itr++);
+ EXPECT_EQUAL("<ag>", *itr++);
+ EXPECT_EQUAL("<item weight=\"2\">20</item>", *itr++);
+ EXPECT_EQUAL("</ag>", *itr++);
+ EXPECT_EQUAL("<ah>", *itr++);
+ EXPECT_EQUAL("<item weight=\"3\">20.5</item>", *itr++);
+ EXPECT_EQUAL("</ah>", *itr++);
+ EXPECT_EQUAL("<ai>", *itr++);
+ EXPECT_EQUAL("<item weight=\"4\">bar</item>", *itr++);
+ EXPECT_EQUAL("</ai>", *itr++);
+ EXPECT_EQUAL("<asp1>1001</asp1>", *itr++);
+ EXPECT_EQUAL("<asp2>1047758</asp2>", *itr++);
+ EXPECT_EQUAL("<aap1>", *itr++);
+ EXPECT_EQUAL("<item>1004</item>", *itr++);
+ EXPECT_EQUAL("<item>1005</item>", *itr++);
+ EXPECT_EQUAL("</aap1>", *itr++);
+ EXPECT_EQUAL("<aap2>", *itr++);
+ EXPECT_EQUAL("<item>1047806</item>", *itr++);
+ EXPECT_EQUAL("<item>1048322</item>", *itr++);
+ EXPECT_EQUAL("</aap2>", *itr++);
+ EXPECT_EQUAL("<awp1>", *itr++);
+ EXPECT_EQUAL("<item weight=\"41\">1010</item>", *itr++);
+ EXPECT_EQUAL("<item weight=\"42\">1011</item>", *itr++);
+ EXPECT_EQUAL("</awp1>", *itr++);
+ EXPECT_EQUAL("<awp2>", *itr++);
+ EXPECT_EQUAL("<item weight=\"43\">1048370</item>", *itr++);
+ EXPECT_EQUAL("<item weight=\"44\">1048382</item>", *itr++);
+ EXPECT_EQUAL("</awp2>", *itr++);
+ EXPECT_EQUAL("<sa>127</sa>", *itr++);
+ EXPECT_EQUAL("<sb>32767</sb>", *itr++);
+ EXPECT_EQUAL("<sc>2147483647</sc>", *itr++);
+ EXPECT_EQUAL("<sd>2147483648</sd>", *itr++);
+ EXPECT_EQUAL("<se>1234.56</se>", *itr++);
+ EXPECT_EQUAL("<sf>9876.54</sf>", *itr++);
+ EXPECT_EQUAL("<sg>foo bar</sg>", *itr++);
+ EXPECT_EQUAL(empty + "<sh binaryencoding=\"base64\">" +
+ vespalib::Base64::encode(raw1s) +
+ "</sh>", *itr++);
+ EXPECT_EQUAL("<si>", *itr++);
+ EXPECT_EQUAL(empty + "<item binaryencoding=\"base64\">" +
+ vespalib::Base64::encode(raw1a0) +
+ "</item>", *itr++);
+ EXPECT_EQUAL(empty + "<item binaryencoding=\"base64\">" +
+ vespalib::Base64::encode(raw1a1) +
+ "</item>", *itr++);
+ EXPECT_EQUAL("</si>", *itr++);
+ EXPECT_EQUAL("<sj>", *itr++);
+ EXPECT_EQUAL(empty +"<item weight=\"46\" binaryencoding=\"base64\">" +
+ vespalib::Base64::encode(raw1w1) +
+ "</item>", *itr++);
+ EXPECT_EQUAL(empty + "<item weight=\"45\" binaryencoding=\"base64\">" +
+ vespalib::Base64::encode(raw1w0) +
+ "</item>", *itr++);
+ EXPECT_EQUAL("</sj>", *itr++);
+ EXPECT_EQUAL("</document>", *itr++);
+ EXPECT_TRUE(itr == lines.end());
+#if 1
+ std::cout << "onedoc xml start -----" << std::endl <<
+ xml << std::endl <<
+ "-------" << std::endl;
+ std::cout << "onedoc toString start ----" << std::endl <<
+ doc->toString(true) << std::endl <<
+ "-------" << std::endl;
+#endif
+ }
+ { // create one more to see that everything is cleared
+ b.startDocument("doc::2");
+ b.startIndexField("ia").addStr("yes").endField();
+ b.startAttributeField("aa").addInt(20).endField();
+ b.startSummaryField("sa").addInt(10).endField();
+ doc = b.endDocument();
+ xml = doc->toXml("");
+ boost::split(lines, xml, boost::is_any_of("\n"));
+ itr = lines.begin();
+ EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"doc::2\">", *itr++);
+ EXPECT_EQUAL("<ia>yes</ia>", *itr++);
+ EXPECT_EQUAL("<aa>20</aa>", *itr++);
+ EXPECT_EQUAL("<sa>10</sa>", *itr++);
+ EXPECT_EQUAL("</document>", *itr++);
+ EXPECT_TRUE(itr == lines.end());
+ }
+ { // create field with cjk chars
+ b.startDocument("doc::3");
+ b.startIndexField("ia").
+ addStr("我就是那个").
+ setAutoSpace(false).
+ addStr("大灰狼").
+ setAutoSpace(true).
+ endField();
+ doc = b.endDocument();
+ xml = doc->toXml("");
+ boost::split(lines, xml, boost::is_any_of("\n"));
+ itr = lines.begin();
+ EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"doc::3\">", *itr++);
+ EXPECT_EQUAL("<ia>我就是那个大灰狼</ia>", *itr++);
+ EXPECT_EQUAL("</document>", *itr++);
+ EXPECT_TRUE(itr == lines.end());
+ const FieldValue::UP iaval = doc->getValue("ia");
+ ASSERT_TRUE(iaval.get() != NULL);
+ const StringFieldValue *iasval = dynamic_cast<const StringFieldValue *>
+ (iaval.get());
+ ASSERT_TRUE(iasval != NULL);
+ StringFieldValue::SpanTrees trees = iasval->getSpanTrees();
+ const SpanTree *tree = StringFieldValue::findTree(trees, linguistics::SPANTREE_NAME);
+ ASSERT_TRUE(tree != NULL);
+ std::vector<Span> spans;
+ std::vector<Span> expSpans;
+ for (SpanTree::const_iterator i = tree->begin(), ie = tree->end();
+ i != ie; ++i) {
+ Annotation &ann = const_cast<Annotation &>(*i);
+ const Span *span = dynamic_cast<const Span *>(ann.getSpanNode());
+ if (span == NULL)
+ continue;
+ spans.push_back(*span);
+ }
+ expSpans.push_back(Span(0, 15));
+ expSpans.push_back(Span(0, 15));
+ expSpans.push_back(Span(15, 9));
+ expSpans.push_back(Span(15, 9));
+ ASSERT_TRUE(expSpans == spans);
+#if 1
+ std::cout << "onedoc xml start -----" << std::endl <<
+ xml << std::endl <<
+ "-------" << std::endl;
+ std::cout << "onedoc toString start ----" << std::endl <<
+ doc->toString(true) << std::endl <<
+ "-------" << std::endl;
+#endif
+ }
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("docbuilder_test");
+
+ testBuilder();
+
+ TEST_DONE();
+}
+
+}
+}
+
+TEST_APPHOOK(search::index::Test);
+
diff --git a/searchlib/src/tests/index/doctypebuilder/.gitignore b/searchlib/src/tests/index/doctypebuilder/.gitignore
new file mode 100644
index 00000000000..f15be1efcfe
--- /dev/null
+++ b/searchlib/src/tests/index/doctypebuilder/.gitignore
@@ -0,0 +1,5 @@
+*_test
+.depend
+Makefile
+doctypebuilder_test
+searchlib_doctypebuilder_test_app
diff --git a/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt b/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt
new file mode 100644
index 00000000000..51fb59421f9
--- /dev/null
+++ b/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_doctypebuilder_test_app
+ SOURCES
+ doctypebuilder_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_doctypebuilder_test_app COMMAND searchlib_doctypebuilder_test_app)
diff --git a/searchlib/src/tests/index/doctypebuilder/DESC b/searchlib/src/tests/index/doctypebuilder/DESC
new file mode 100644
index 00000000000..a199241a331
--- /dev/null
+++ b/searchlib/src/tests/index/doctypebuilder/DESC
@@ -0,0 +1 @@
+doctypebuilder test. Take a look at doctypebuilder.cpp for details.
diff --git a/searchlib/src/tests/index/doctypebuilder/FILES b/searchlib/src/tests/index/doctypebuilder/FILES
new file mode 100644
index 00000000000..9f261ca9a9a
--- /dev/null
+++ b/searchlib/src/tests/index/doctypebuilder/FILES
@@ -0,0 +1 @@
+doctypebuilder.cpp
diff --git a/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp b/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp
new file mode 100644
index 00000000000..3980700fa6b
--- /dev/null
+++ b/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp
@@ -0,0 +1,88 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("doctypebuilder_test");
+#include <boost/algorithm/string/classification.hpp>
+#include <boost/algorithm/string/split.hpp>
+#include <vespa/document/repo/documenttyperepo.h>
+#include <vespa/searchlib/index/doctypebuilder.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using namespace document;
+
+namespace search {
+namespace index {
+
+TEST("testSearchDocType") {
+ Schema s;
+ s.addIndexField(Schema::IndexField("ia", Schema::STRING));
+ s.addIndexField(Schema::IndexField("ib", Schema::STRING, Schema::ARRAY));
+ s.addIndexField(Schema::IndexField("ic", Schema::STRING, Schema::WEIGHTEDSET));
+ s.addUriIndexFields(Schema::IndexField("iu", Schema::STRING));
+ s.addUriIndexFields(Schema::IndexField("iau",
+ Schema::STRING,
+ Schema::ARRAY));
+ s.addUriIndexFields(Schema::IndexField("iwu",
+ Schema::STRING,
+ Schema::WEIGHTEDSET));
+ s.addAttributeField(Schema::AttributeField("aa", Schema::INT32));
+ s.addAttributeField(Schema::AttributeField("spos",
+ Schema::INT64));
+ s.addAttributeField(Schema::AttributeField("apos",
+ Schema::INT64,
+ Schema::ARRAY));
+ s.addAttributeField(Schema::AttributeField("wpos",
+ Schema::INT64,
+ Schema::WEIGHTEDSET));
+ s.addSummaryField(Schema::SummaryField("sa", Schema::STRING));
+
+ DocTypeBuilder docTypeBuilder(s);
+ document::DocumenttypesConfig config = docTypeBuilder.makeConfig();
+ DocumentTypeRepo repo(config);
+ const DocumentType *docType = repo.getDocumentType("searchdocument");
+ ASSERT_TRUE(docType);
+ EXPECT_EQUAL(11u, docType->getFieldCount());
+
+ EXPECT_EQUAL("String", docType->getField("ia").getDataType().getName());
+ EXPECT_EQUAL("Array<String>",
+ docType->getField("ib").getDataType().getName());
+ EXPECT_EQUAL("WeightedSet<String>",
+ docType->getField("ic").getDataType().getName());
+ EXPECT_EQUAL("url", docType->getField("iu").getDataType().getName());
+ EXPECT_EQUAL("Array<url>",
+ docType->getField("iau").getDataType().getName());
+ EXPECT_EQUAL("WeightedSet<url>",
+ docType->getField("iwu").getDataType().getName());
+
+ EXPECT_EQUAL("Int", docType->getField("aa").getDataType().getName());
+ EXPECT_EQUAL("Long", docType->getField("spos").getDataType().getName());
+ EXPECT_EQUAL("Array<Long>",
+ docType->getField("apos").getDataType().getName());
+ EXPECT_EQUAL("WeightedSet<Long>",
+ docType->getField("wpos").getDataType().getName());
+ EXPECT_EQUAL("String", docType->getField("sa").getDataType().getName());
+}
+
+TEST("require that multiple fields can have the same type") {
+ Schema s;
+ s.addIndexField(Schema::IndexField("array1", Schema::STRING,
+ Schema::ARRAY));
+ s.addIndexField(Schema::IndexField("array2", Schema::STRING,
+ Schema::ARRAY));
+ DocTypeBuilder docTypeBuilder(s);
+ document::DocumenttypesConfig config = docTypeBuilder.makeConfig();
+ DocumentTypeRepo repo(config);
+ const DocumentType *docType = repo.getDocumentType("searchdocument");
+ ASSERT_TRUE(docType);
+ EXPECT_EQUAL(2u, docType->getFieldCount());
+
+ EXPECT_EQUAL("Array<String>",
+ docType->getField("array1").getDataType().getName());
+ EXPECT_EQUAL("Array<String>",
+ docType->getField("array2").getDataType().getName());
+}
+
+} // namespace index
+} // namespace search
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/indexmetainfo/.gitignore b/searchlib/src/tests/indexmetainfo/.gitignore
new file mode 100644
index 00000000000..ddc0b5f4582
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/.gitignore
@@ -0,0 +1,5 @@
+.depend
+Makefile
+indexmetainfo_test
+test-save.txt
+searchlib_indexmetainfo_test_app
diff --git a/searchlib/src/tests/indexmetainfo/CMakeLists.txt b/searchlib/src/tests/indexmetainfo/CMakeLists.txt
new file mode 100644
index 00000000000..607ab7b7e5b
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_indexmetainfo_test_app
+ SOURCES
+ indexmetainfo_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_indexmetainfo_test_app COMMAND searchlib_indexmetainfo_test_app)
diff --git a/searchlib/src/tests/indexmetainfo/DESC b/searchlib/src/tests/indexmetainfo/DESC
new file mode 100644
index 00000000000..ee312b5fcdc
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/DESC
@@ -0,0 +1,2 @@
+Test the API class used to access the 'meta-info.txt' file used to
+hold meta information for an index.
diff --git a/searchlib/src/tests/indexmetainfo/FILES b/searchlib/src/tests/indexmetainfo/FILES
new file mode 100644
index 00000000000..8a96f5f3311
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/FILES
@@ -0,0 +1 @@
+indexmetainfo.cpp
diff --git a/searchlib/src/tests/indexmetainfo/bogus1.txt b/searchlib/src/tests/indexmetainfo/bogus1.txt
new file mode 100644
index 00000000000..6d412ad302e
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/bogus1.txt
@@ -0,0 +1 @@
+noAssign
diff --git a/searchlib/src/tests/indexmetainfo/bogus10.txt b/searchlib/src/tests/indexmetainfo/bogus10.txt
new file mode 100644
index 00000000000..e4f500cf897
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/bogus10.txt
@@ -0,0 +1,4 @@
+nextSnapshotId=128
+snapshot.0.valid=false
+snapshot.0.syncToken=bogus
+snapshot.0.dirName=foo
diff --git a/searchlib/src/tests/indexmetainfo/bogus2.txt b/searchlib/src/tests/indexmetainfo/bogus2.txt
new file mode 100644
index 00000000000..9895913aece
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/bogus2.txt
@@ -0,0 +1 @@
+=noKey
diff --git a/searchlib/src/tests/indexmetainfo/bogus3.txt b/searchlib/src/tests/indexmetainfo/bogus3.txt
new file mode 100644
index 00000000000..73c7da9da74
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/bogus3.txt
@@ -0,0 +1 @@
+unknownKey=magicValue
diff --git a/searchlib/src/tests/indexmetainfo/bogus4.txt b/searchlib/src/tests/indexmetainfo/bogus4.txt
new file mode 100644
index 00000000000..d841e7509ca
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/bogus4.txt
@@ -0,0 +1 @@
+nextSnapshotId=illegalNumber
diff --git a/searchlib/src/tests/indexmetainfo/bogus5.txt b/searchlib/src/tests/indexmetainfo/bogus5.txt
new file mode 100644
index 00000000000..08c64d393ba
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/bogus5.txt
@@ -0,0 +1,7 @@
+nextSnapshotId=128
+snapshot.1.valid=true
+snapshot.1.syncToken=50
+snapshot.1.dirName=foo
+snapshot.0.valid=false
+snapshot.0.syncToken=100
+snapshot.0.dirName=bar
diff --git a/searchlib/src/tests/indexmetainfo/bogus6.txt b/searchlib/src/tests/indexmetainfo/bogus6.txt
new file mode 100644
index 00000000000..5506704db80
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/bogus6.txt
@@ -0,0 +1,7 @@
+nextSnapshotId=128
+snapshot.0.valid=true
+snapshot.0.syncToken=50
+snapshot.0.dirName=foo
+snapshot.2.valid=false
+snapshot.2.syncToken=100
+snapshot.2.dirName=bar
diff --git a/searchlib/src/tests/indexmetainfo/bogus7.txt b/searchlib/src/tests/indexmetainfo/bogus7.txt
new file mode 100644
index 00000000000..efbc17b40b6
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/bogus7.txt
@@ -0,0 +1,4 @@
+nextSnapshotId=128
+snapshot..valid=true
+snapshot..syncToken=50
+snapshot..dirName=foo
diff --git a/searchlib/src/tests/indexmetainfo/bogus8.txt b/searchlib/src/tests/indexmetainfo/bogus8.txt
new file mode 100644
index 00000000000..e359ca68f12
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/bogus8.txt
@@ -0,0 +1,4 @@
+nextSnapshotId=128
+snapshot.x.valid=true
+snapshot.x.syncToken=50
+snapshot.x.dirName=foo
diff --git a/searchlib/src/tests/indexmetainfo/bogus9.txt b/searchlib/src/tests/indexmetainfo/bogus9.txt
new file mode 100644
index 00000000000..5dd606d8942
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/bogus9.txt
@@ -0,0 +1,4 @@
+nextSnapshotId=128
+snapshot.0.valid=xyz
+snapshot.0.syncToken=50
+snapshot.0.dirName=foo
diff --git a/searchlib/src/tests/indexmetainfo/indexmetainfo_test.cpp b/searchlib/src/tests/indexmetainfo/indexmetainfo_test.cpp
new file mode 100644
index 00000000000..e7dc828c9e5
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/indexmetainfo_test.cpp
@@ -0,0 +1,127 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("indexmetainfo_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/indexmetainfo.h>
+
+using search::IndexMetaInfo;
+
+typedef IndexMetaInfo::Snapshot Snap;
+
+TEST_SETUP(Test)
+
+int
+Test::Main()
+{
+ TEST_INIT("indexmetainfo_test");
+ { // load pregenerated file
+ IndexMetaInfo info("");
+ EXPECT_TRUE(info.load());
+ ASSERT_TRUE(info.snapshots().size() == 4);
+ EXPECT_TRUE(info.snapshots()[0].valid);
+ EXPECT_TRUE(info.snapshots()[0].syncToken == 50);
+ EXPECT_TRUE(info.snapshots()[0].dirName == "foo");
+ EXPECT_TRUE(!info.snapshots()[1].valid);
+ EXPECT_TRUE(info.snapshots()[1].syncToken == 100);
+ EXPECT_TRUE(info.snapshots()[1].dirName == "bar");
+ EXPECT_TRUE(info.snapshots()[2].valid);
+ EXPECT_TRUE(info.snapshots()[2].syncToken == 200);
+ EXPECT_TRUE(info.snapshots()[2].dirName == "baz");
+ EXPECT_TRUE(!info.snapshots()[3].valid);
+ EXPECT_TRUE(info.snapshots()[3].syncToken == 500);
+ EXPECT_TRUE(info.snapshots()[3].dirName == "last");
+ {
+ Snap s = info.getBestSnapshot();
+ EXPECT_TRUE(s.valid);
+ EXPECT_TRUE(s.syncToken == 200);
+ EXPECT_TRUE(s.dirName == "baz");
+ }
+ {
+ Snap s = info.getSnapshot(100);
+ EXPECT_TRUE(!s.valid);
+ EXPECT_TRUE(s.syncToken == 100);
+ EXPECT_TRUE(s.dirName == "bar");
+ }
+ {
+ Snap s = info.getSnapshot(666);
+ EXPECT_TRUE(!s.valid);
+ EXPECT_TRUE(s.syncToken == 0);
+ EXPECT_TRUE(s.dirName == "");
+ }
+ {
+ EXPECT_TRUE(info.invalidateSnapshot(200));
+ Snap s = info.getBestSnapshot();
+ EXPECT_TRUE(s.valid);
+ EXPECT_TRUE(s.syncToken == 50);
+ EXPECT_TRUE(s.dirName == "foo");
+ }
+ {
+ EXPECT_TRUE(info.invalidateSnapshot(50));
+ Snap s = info.getBestSnapshot();
+ EXPECT_TRUE(!s.valid);
+ EXPECT_TRUE(s.syncToken == 0);
+ EXPECT_TRUE(s.dirName == "");
+ }
+ {
+ EXPECT_TRUE(info.validateSnapshot(500));
+ Snap s = info.getBestSnapshot();
+ EXPECT_TRUE(s.valid);
+ EXPECT_TRUE(s.syncToken == 500);
+ EXPECT_TRUE(s.dirName == "last");
+ }
+ {
+ EXPECT_TRUE(!info.invalidateSnapshot(666));
+ EXPECT_TRUE(!info.validateSnapshot(666));
+ }
+ {
+ info.clear();
+ EXPECT_TRUE(info.snapshots().size() == 0);
+ Snap s = info.getBestSnapshot();
+ EXPECT_TRUE(!s.valid);
+ EXPECT_TRUE(s.syncToken == 0);
+ EXPECT_TRUE(s.dirName == "");
+ }
+ }
+ { // load file that does not exist
+ IndexMetaInfo info(".");
+ EXPECT_TRUE(!info.load("file-not-present.txt"));
+ }
+ { // load files with errors should fail
+ IndexMetaInfo info(".");
+ EXPECT_TRUE(!info.load("bogus1.txt"));
+ EXPECT_TRUE(!info.load("bogus2.txt"));
+ EXPECT_TRUE(!info.load("bogus3.txt"));
+ EXPECT_TRUE(!info.load("bogus4.txt"));
+ EXPECT_TRUE(!info.load("bogus5.txt"));
+ EXPECT_TRUE(!info.load("bogus6.txt"));
+ EXPECT_TRUE(!info.load("bogus7.txt"));
+ EXPECT_TRUE(!info.load("bogus8.txt"));
+ EXPECT_TRUE(!info.load("bogus9.txt"));
+ EXPECT_TRUE(!info.load("bogus10.txt"));
+ }
+ { // save/load/save/load/save/load test
+ std::string file("test-save.txt");
+ IndexMetaInfo a(".");
+ IndexMetaInfo b(".");
+ EXPECT_TRUE(a.addSnapshot(Snap(true, 50, "foo")));
+ EXPECT_TRUE(a.addSnapshot(Snap(false, 100, "bar")));
+ EXPECT_TRUE(!a.addSnapshot(Snap(false, 100, "bar")));
+ EXPECT_TRUE(a.save(file));
+ EXPECT_TRUE(b.load(file));
+ ASSERT_TRUE(b.snapshots().size() == 2);
+ EXPECT_TRUE(b.snapshots()[0] == Snap(true, 50, "foo"));
+ EXPECT_TRUE(b.snapshots()[1] == Snap(false, 100, "bar"));
+ EXPECT_TRUE(a.save(file));
+ EXPECT_TRUE(b.load(file));
+ ASSERT_TRUE(b.snapshots().size() == 2);
+ EXPECT_TRUE(b.snapshots()[0] == Snap(true, 50, "foo"));
+ EXPECT_TRUE(b.snapshots()[1] == Snap(false, 100, "bar"));
+ a.removeSnapshot(100);
+ EXPECT_TRUE(a.save(file));
+ EXPECT_TRUE(b.load(file));
+ ASSERT_TRUE(b.snapshots().size() == 1);
+ EXPECT_TRUE(b.snapshots()[0] == Snap(true, 50, "foo"));
+ }
+ TEST_DONE();
+}
diff --git a/searchlib/src/tests/indexmetainfo/meta-info.txt b/searchlib/src/tests/indexmetainfo/meta-info.txt
new file mode 100644
index 00000000000..20182f5786c
--- /dev/null
+++ b/searchlib/src/tests/indexmetainfo/meta-info.txt
@@ -0,0 +1,12 @@
+snapshot.0.valid=true
+snapshot.0.syncToken=50
+snapshot.0.dirName=foo
+snapshot.1.valid=true
+snapshot.1.syncToken=200
+snapshot.1.dirName=baz
+snapshot.2.valid=false
+snapshot.2.syncToken=100
+snapshot.2.dirName=bar
+snapshot.3.valid=false
+snapshot.3.syncToken=500
+snapshot.3.dirName=last
diff --git a/searchlib/src/tests/ld-library-path/.gitignore b/searchlib/src/tests/ld-library-path/.gitignore
new file mode 100644
index 00000000000..5f02ecfc8f8
--- /dev/null
+++ b/searchlib/src/tests/ld-library-path/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+ld-library-path_test
+searchlib_ld-library-path_test_app
diff --git a/searchlib/src/tests/ld-library-path/CMakeLists.txt b/searchlib/src/tests/ld-library-path/CMakeLists.txt
new file mode 100644
index 00000000000..47e1372ffc6
--- /dev/null
+++ b/searchlib/src/tests/ld-library-path/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_ld-library-path_test_app
+ SOURCES
+ ld-library-path.cpp
+ DEPENDS
+)
+vespa_add_test(NAME searchlib_ld-library-path_test_app COMMAND searchlib_ld-library-path_test_app)
diff --git a/searchlib/src/tests/ld-library-path/ld-library-path.cpp b/searchlib/src/tests/ld-library-path/ld-library-path.cpp
new file mode 100644
index 00000000000..c9a429b3b35
--- /dev/null
+++ b/searchlib/src/tests/ld-library-path/ld-library-path.cpp
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("");
+
+int
+main(int, char **)
+{
+ LOG(info, "LD_LIBRARY_PATH='%s'", getenv("LD_LIBRARY_PATH"));
+ return 0;
+}
diff --git a/searchlib/src/tests/memoryindex/btree/.gitignore b/searchlib/src/tests/memoryindex/btree/.gitignore
new file mode 100644
index 00000000000..94440affa90
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/btree/.gitignore
@@ -0,0 +1,6 @@
+.depend
+Makefile
+btree_test
+frozenbtree_test
+searchlib_btree_test_app
+searchlib_frozenbtree_test_app
diff --git a/searchlib/src/tests/memoryindex/btree/CMakeLists.txt b/searchlib/src/tests/memoryindex/btree/CMakeLists.txt
new file mode 100644
index 00000000000..8b523030cab
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/btree/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_btree_test_app
+ SOURCES
+ btree_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_btree_test_app COMMAND searchlib_btree_test_app)
+vespa_add_executable(searchlib_frozenbtree_test_app
+ SOURCES
+ frozenbtree_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_frozenbtree_test_app COMMAND searchlib_frozenbtree_test_app)
diff --git a/searchlib/src/tests/memoryindex/btree/DESC b/searchlib/src/tests/memoryindex/btree/DESC
new file mode 100644
index 00000000000..02739da7527
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/btree/DESC
@@ -0,0 +1 @@
+btree test. Take a look at btree_test.cpp for details.
diff --git a/searchlib/src/tests/memoryindex/btree/FILES b/searchlib/src/tests/memoryindex/btree/FILES
new file mode 100644
index 00000000000..e63a2f68eb4
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/btree/FILES
@@ -0,0 +1 @@
+btree_test.cpp
diff --git a/searchlib/src/tests/memoryindex/btree/btree_test.cpp b/searchlib/src/tests/memoryindex/btree/btree_test.cpp
new file mode 100644
index 00000000000..5fb6761ba57
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/btree/btree_test.cpp
@@ -0,0 +1,1282 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("btree_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <string>
+#include <vespa/searchlib/btree/btreeroot.h>
+#include <vespa/searchlib/btree/btreebuilder.h>
+#include <vespa/searchlib/btree/btreenodeallocator.h>
+#include <vespa/searchlib/btree/btree.h>
+#include <vespa/searchlib/btree/btreestore.h>
+#include <vespa/searchlib/util/rand48.h>
+
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodestore.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btreebuilder.hpp>
+#include <vespa/searchlib/btree/btree.hpp>
+#include <vespa/searchlib/btree/btreestore.hpp>
+
+using vespalib::GenerationHandler;
+
+namespace search {
+namespace btree {
+
+namespace {
+
+template <typename T>
+std::string
+toStr(const T & v)
+{
+ std::stringstream ss;
+ ss << v;
+ return ss.str();
+}
+
+}
+
+typedef BTreeTraits<4, 4, 31, false> MyTraits;
+
+#define KEYWRAP
+
+#ifdef KEYWRAP
+
+// Force use of functor to compare keys.
+class WrapInt
+{
+public:
+ int _val;
+ WrapInt(int val) : _val(val) {}
+ WrapInt(void) : _val(0) {}
+ bool operator==(const WrapInt & rhs) const { return _val == rhs._val; }
+};
+
+std::ostream &
+operator<<(std::ostream &s, const WrapInt &i)
+{
+ s << i._val;
+ return s;
+}
+
+typedef WrapInt MyKey;
+class MyComp
+{
+public:
+ bool
+ operator()(const WrapInt &a, const WrapInt &b) const
+ {
+ return a._val < b._val;
+ }
+};
+
+#define UNWRAP(key) (key._val)
+#else
+typedef int MyKey;
+typedef std::less<int> MyComp;
+#define UNWRAP(key) (key)
+#endif
+
+typedef BTree<MyKey, std::string,
+ btree::NoAggregated,
+ MyComp, MyTraits> MyTree;
+typedef BTreeStore<MyKey, std::string,
+ btree::NoAggregated,
+ MyComp, MyTraits> MyTreeStore;
+typedef MyTree::Builder MyTreeBuilder;
+typedef MyTree::LeafNodeType MyLeafNode;
+typedef MyTree::InternalNodeType MyInternalNode;
+typedef MyTree::NodeAllocatorType MyNodeAllocator;
+typedef std::pair<MyKey, std::string> LeafPair;
+typedef MyTreeStore::KeyDataType MyKeyData;
+typedef MyTreeStore::KeyDataTypeRefPair MyKeyDataRefPair;
+
+typedef BTree<int, BTreeNoLeafData, btree::NoAggregated> SetTreeB;
+
+typedef BTreeTraits<16, 16, 10, false> LSeekTraits;
+typedef BTree<int, BTreeNoLeafData, btree::NoAggregated,
+ std::less<int>, LSeekTraits> SetTreeL;
+
+struct LeafPairLess {
+ bool operator()(const LeafPair & lhs, const LeafPair & rhs) const {
+ return UNWRAP(lhs.first) < UNWRAP(rhs.first);
+ }
+};
+
+template <typename ManagerType>
+void
+cleanup(GenerationHandler & g, ManagerType & m)
+{
+ m.freeze();
+ m.transferHoldLists(g.getCurrentGeneration());
+ g.incGeneration();
+ m.trimHoldLists(g.getFirstUsedGeneration());
+}
+
+template <typename ManagerType, typename NodeType>
+void
+cleanup(GenerationHandler & g,
+ ManagerType & m,
+ BTreeNode::Ref n1Ref, NodeType * n1,
+ BTreeNode::Ref n2Ref = BTreeNode::Ref(), NodeType * n2 = NULL)
+{
+ assert(ManagerType::isValidRef(n1Ref));
+ m.holdNode(n1Ref, n1);
+ if (n2 != NULL) {
+ assert(ManagerType::isValidRef(n2Ref));
+ m.holdNode(n2Ref, n2);
+ } else {
+ assert(!ManagerType::isValidRef(n2Ref));
+ }
+ cleanup(g, m);
+}
+
+class Test : public vespalib::TestApp {
+private:
+ template <typename LeafNodeType>
+ bool assertLeafNode(const std::string & exp, const LeafNodeType & n);
+ bool assertSeek(int skey, int ekey, const MyTree & tree);
+ bool assertSeek(int skey, int ekey, MyTree::Iterator & itr);
+ bool assertMemoryUsage(const MemoryUsage & exp, const MemoryUsage & act);
+
+ void
+ buildSubTree(const std::vector<LeafPair> &sub,
+ size_t numEntries);
+
+ void requireThatNodeInsertWorks();
+ void requireThatNodeSplitInsertWorks();
+ void requireThatNodeStealWorks();
+ void requireThatNodeRemoveWorks();
+ void requireThatNodeLowerBoundWorks();
+ void requireThatWeCanInsertAndRemoveFromTree();
+ void requireThatSortedTreeInsertWorks();
+ void requireThatCornerCaseTreeFindWorks();
+ void requireThatBasicTreeIteratorWorks();
+ void requireThatTreeIteratorSeekWorks();
+ void requireThatTreeIteratorAssignWorks();
+ void requireThatMemoryUsageIsCalculated();
+ template <typename TreeType>
+ void requireThatLowerBoundWorksT();
+ void requireThatLowerBoundWorks();
+ template <typename TreeType>
+ void requireThatUpperBoundWorksT();
+ void requireThatUpperBoundWorks();
+ void requireThatUpdateOfKeyWorks();
+
+ void
+ requireThatSmallNodesWorks();
+
+ void
+ requireThatApplyWorks();
+
+ void
+ requireThatIteratorDistanceWorks(int numEntries);
+
+ void
+ requireThatIteratorDistanceWorks();
+public:
+ int Main();
+};
+
+template <typename LeafNodeType>
+bool
+Test::assertLeafNode(const std::string & exp, const LeafNodeType & n)
+{
+ std::stringstream ss;
+ ss << "[";
+ for (uint32_t i = 0; i < n.validSlots(); ++i) {
+ if (i > 0) ss << ",";
+ ss << n.getKey(i) << ":" << n.getData(i);
+ }
+ ss << "]";
+ if (!EXPECT_EQUAL(exp, ss.str())) return false;
+ return true;
+}
+
+bool
+Test::assertSeek(int skey, int ekey, const MyTree & tree)
+{
+ MyTree::Iterator itr = tree.begin();
+ return assertSeek(skey, ekey, itr);
+}
+
+bool
+Test::assertSeek(int skey, int ekey, MyTree::Iterator & itr)
+{
+ MyTree::Iterator bseekItr = itr;
+ MyTree::Iterator lseekItr = itr;
+ bseekItr.binarySeek(skey);
+ lseekItr.linearSeek(skey);
+ if (!EXPECT_EQUAL(ekey, UNWRAP(bseekItr.getKey()))) return false;
+ if (!EXPECT_EQUAL(ekey, UNWRAP(lseekItr.getKey()))) return false;
+ itr = bseekItr;
+ return true;
+}
+
+bool
+Test::assertMemoryUsage(const MemoryUsage & exp, const MemoryUsage & act)
+{
+ if (!EXPECT_EQUAL(exp.allocatedBytes(), act.allocatedBytes())) return false;
+ if (!EXPECT_EQUAL(exp.usedBytes(), act.usedBytes())) return false;
+ if (!EXPECT_EQUAL(exp.deadBytes(), act.deadBytes())) return false;
+ if (!EXPECT_EQUAL(exp.allocatedBytesOnHold(), act.allocatedBytesOnHold())) return false;
+ return true;
+}
+
+void
+Test::requireThatNodeInsertWorks()
+{
+ GenerationHandler g;
+ MyNodeAllocator m;
+ MyLeafNode::RefPair nPair = m.allocLeafNode();
+ MyLeafNode *n = nPair.second;
+ EXPECT_TRUE(n->isLeaf());
+ EXPECT_EQUAL(0u, n->validSlots());
+ n->insert(0, 20, "b");
+ EXPECT_TRUE(!n->isFull());
+ EXPECT_TRUE(!n->isAtLeastHalfFull());
+ EXPECT_TRUE(assertLeafNode("[20:b]", *n));
+ n->insert(0, 10, "a");
+ EXPECT_TRUE(!n->isFull());
+ EXPECT_TRUE(n->isAtLeastHalfFull());
+ EXPECT_TRUE(assertLeafNode("[10:a,20:b]", *n));
+ EXPECT_EQUAL(20, UNWRAP(n->getLastKey()));
+ EXPECT_EQUAL("b", n->getLastData());
+ n->insert(2, 30, "c");
+ EXPECT_TRUE(!n->isFull());
+ n->insert(3, 40, "d");
+ EXPECT_TRUE(n->isFull());
+ EXPECT_TRUE(n->isAtLeastHalfFull());
+ EXPECT_TRUE(assertLeafNode("[10:a,20:b,30:c,40:d]", *n));
+ cleanup(g, m, nPair.first, n);
+}
+
+MyLeafNode::RefPair
+getLeafNode(MyNodeAllocator &allocator)
+{
+ MyLeafNode::RefPair nPair = allocator.allocLeafNode();
+ MyLeafNode *n = nPair.second;
+ n->insert(0, 1, "a");
+ n->insert(1, 3, "c");
+ n->insert(2, 5, "e");
+ n->insert(3, 7, "g");
+ return nPair;
+}
+
+void
+Test::requireThatNodeSplitInsertWorks()
+{
+ { // new entry in current node
+ GenerationHandler g;
+ MyNodeAllocator m;
+ MyLeafNode::RefPair nPair = getLeafNode(m);
+ MyLeafNode *n = nPair.second;
+ MyLeafNode::RefPair sPair = m.allocLeafNode();
+ MyLeafNode *s = sPair.second;
+ n->splitInsert(s, 2, 4, "d");
+ EXPECT_TRUE(assertLeafNode("[1:a,3:c,4:d]", *n));
+ EXPECT_TRUE(assertLeafNode("[5:e,7:g]", *s));
+ cleanup(g, m, nPair.first, n, sPair.first, s);
+ }
+ { // new entry in split node
+ GenerationHandler g;
+ MyNodeAllocator m;
+ MyLeafNode::RefPair nPair = getLeafNode(m);
+ MyLeafNode *n = nPair.second;
+ MyLeafNode::RefPair sPair = m.allocLeafNode();
+ MyLeafNode *s = sPair.second;
+ n->splitInsert(s, 3, 6, "f");
+ EXPECT_TRUE(assertLeafNode("[1:a,3:c,5:e]", *n));
+ EXPECT_TRUE(assertLeafNode("[6:f,7:g]", *s));
+ cleanup(g, m, nPair.first, n, sPair.first, s);
+ }
+ { // new entry at end
+ GenerationHandler g;
+ MyNodeAllocator m;
+ MyLeafNode::RefPair nPair = getLeafNode(m);
+ MyLeafNode *n = nPair.second;
+ MyLeafNode::RefPair sPair = m.allocLeafNode();
+ MyLeafNode *s = sPair.second;
+ n->splitInsert(s, 4, 8, "h");
+ EXPECT_TRUE(assertLeafNode("[1:a,3:c,5:e]", *n));
+ EXPECT_TRUE(assertLeafNode("[7:g,8:h]", *s));
+ cleanup(g, m, nPair.first, n, sPair.first, s);
+ }
+}
+
+struct BTreeStealTraits
+{
+ static const size_t LEAF_SLOTS = 6;
+ static const size_t INTERNAL_SLOTS = 6;
+};
+
+void
+Test::requireThatNodeStealWorks()
+{
+ typedef BTreeLeafNode<int, std::string,
+ btree::NoAggregated, 6> MyStealNode;
+ typedef BTreeNodeAllocator<int, std::string,
+ btree::NoAggregated,
+ BTreeStealTraits::INTERNAL_SLOTS, BTreeStealTraits::LEAF_SLOTS>
+ MyStealManager;
+ { // steal all from left
+ GenerationHandler g;
+ MyStealManager m;
+ MyStealNode::RefPair nPair = m.allocLeafNode();
+ MyStealNode *n = nPair.second;
+ n->insert(0, 4, "d");
+ n->insert(1, 5, "e");
+ EXPECT_TRUE(!n->isAtLeastHalfFull());
+ MyStealNode::RefPair vPair = m.allocLeafNode();
+ MyStealNode *v = vPair.second;
+ v->insert(0, 1, "a");
+ v->insert(1, 2, "b");
+ v->insert(2, 3, "c");
+ n->stealAllFromLeftNode(v);
+ EXPECT_TRUE(n->isAtLeastHalfFull());
+ EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c,4:d,5:e]", *n));
+ cleanup(g, m, nPair.first, n, vPair.first, v);
+ }
+ { // steal all from right
+ GenerationHandler g;
+ MyStealManager m;
+ MyStealNode::RefPair nPair = m.allocLeafNode();
+ MyStealNode *n = nPair.second;
+ n->insert(0, 1, "a");
+ n->insert(1, 2, "b");
+ EXPECT_TRUE(!n->isAtLeastHalfFull());
+ MyStealNode::RefPair vPair = m.allocLeafNode();
+ MyStealNode *v = vPair.second;
+ v->insert(0, 3, "c");
+ v->insert(1, 4, "d");
+ v->insert(2, 5, "e");
+ n->stealAllFromRightNode(v);
+ EXPECT_TRUE(n->isAtLeastHalfFull());
+ EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c,4:d,5:e]", *n));
+ cleanup(g, m, nPair.first, n, vPair.first, v);
+ }
+ { // steal some from left
+ GenerationHandler g;
+ MyStealManager m;
+ MyStealNode::RefPair nPair = m.allocLeafNode();
+ MyStealNode *n = nPair.second;
+ n->insert(0, 5, "e");
+ n->insert(1, 6, "f");
+ EXPECT_TRUE(!n->isAtLeastHalfFull());
+ MyStealNode::RefPair vPair = m.allocLeafNode();
+ MyStealNode *v = vPair.second;
+ v->insert(0, 1, "a");
+ v->insert(1, 2, "b");
+ v->insert(2, 3, "c");
+ v->insert(3, 4, "d");
+ n->stealSomeFromLeftNode(v);
+ EXPECT_TRUE(n->isAtLeastHalfFull());
+ EXPECT_TRUE(v->isAtLeastHalfFull());
+ EXPECT_TRUE(assertLeafNode("[4:d,5:e,6:f]", *n));
+ EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c]", *v));
+ cleanup(g, m, nPair.first, n, vPair.first, v);
+ }
+ { // steal some from right
+ GenerationHandler g;
+ MyStealManager m;
+ MyStealNode::RefPair nPair = m.allocLeafNode();
+ MyStealNode *n = nPair.second;
+ n->insert(0, 1, "a");
+ n->insert(1, 2, "b");
+ EXPECT_TRUE(!n->isAtLeastHalfFull());
+ MyStealNode::RefPair vPair = m.allocLeafNode();
+ MyStealNode *v = vPair.second;
+ v->insert(0, 3, "c");
+ v->insert(1, 4, "d");
+ v->insert(2, 5, "e");
+ v->insert(3, 6, "f");
+ n->stealSomeFromRightNode(v);
+ EXPECT_TRUE(n->isAtLeastHalfFull());
+ EXPECT_TRUE(v->isAtLeastHalfFull());
+ EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c]", *n));
+ EXPECT_TRUE(assertLeafNode("[4:d,5:e,6:f]", *v));
+ cleanup(g, m, nPair.first, n, vPair.first, v);
+ }
+}
+
+void
+Test::requireThatNodeRemoveWorks()
+{
+ GenerationHandler g;
+ MyNodeAllocator m;
+ MyLeafNode::RefPair nPair = getLeafNode(m);
+ MyLeafNode *n = nPair.second;
+ n->remove(1);
+ EXPECT_TRUE(assertLeafNode("[1:a,5:e,7:g]", *n));
+ cleanup(g, m, nPair.first, n);
+}
+
+void
+Test::requireThatNodeLowerBoundWorks()
+{
+ GenerationHandler g;
+ MyNodeAllocator m;
+ MyLeafNode::RefPair nPair = getLeafNode(m);
+ MyLeafNode *n = nPair.second;
+ EXPECT_EQUAL(1u, n->lower_bound(3, MyComp()));
+ EXPECT_FALSE(MyComp()(3, n->getKey(1u)));
+ EXPECT_EQUAL(0u, n->lower_bound(0, MyComp()));
+ EXPECT_TRUE(MyComp()(0, n->getKey(0u)));
+ EXPECT_EQUAL(1u, n->lower_bound(2, MyComp()));
+ EXPECT_TRUE(MyComp()(2, n->getKey(1u)));
+ EXPECT_EQUAL(3u, n->lower_bound(6, MyComp()));
+ EXPECT_TRUE(MyComp()(6, n->getKey(3u)));
+ EXPECT_EQUAL(4u, n->lower_bound(8, MyComp()));
+ cleanup(g, m, nPair.first, n);
+}
+
+void
+generateData(std::vector<LeafPair> & data, size_t numEntries)
+{
+ data.reserve(numEntries);
+ Rand48 rnd;
+ rnd.srand48(10);
+ for (size_t i = 0; i < numEntries; ++i) {
+ int num = rnd.lrand48() % 10000000;
+ std::string str = toStr(num);
+ data.push_back(std::make_pair(num, str));
+ }
+}
+
+
+void
+Test::buildSubTree(const std::vector<LeafPair> &sub,
+ size_t numEntries)
+{
+ GenerationHandler g;
+ MyTree tree;
+ MyTreeBuilder builder(tree.getAllocator());
+
+ std::vector<LeafPair> sorted(sub.begin(), sub.begin() + numEntries);
+ std::sort(sorted.begin(), sorted.end(), LeafPairLess());
+ for (size_t i = 0; i < numEntries; ++i) {
+ int num = UNWRAP(sorted[i].first);
+ const std::string & str = sorted[i].second;
+ builder.insert(num, str);
+ }
+ tree.assign(builder);
+ assert(numEntries == tree.size());
+ assert(tree.isValid());
+ EXPECT_EQUAL(numEntries, tree.size());
+ EXPECT_TRUE(tree.isValid());
+ MyTree::Iterator itr = tree.begin();
+ MyTree::Iterator ritr = itr;
+ if (numEntries > 0) {
+ EXPECT_TRUE(ritr.valid());
+ EXPECT_EQUAL(0u, ritr.position());
+ --ritr;
+ EXPECT_TRUE(!ritr.valid());
+ EXPECT_EQUAL(numEntries, ritr.position());
+ --ritr;
+ EXPECT_TRUE(ritr.valid());
+ EXPECT_EQUAL(numEntries - 1, ritr.position());
+ } else {
+ EXPECT_TRUE(!ritr.valid());
+ EXPECT_EQUAL(0u, ritr.position());
+ --ritr;
+ EXPECT_TRUE(!ritr.valid());
+ EXPECT_EQUAL(0u, ritr.position());
+ }
+ for (size_t i = 0; i < numEntries; ++i) {
+ EXPECT_TRUE(itr.valid());
+ EXPECT_EQUAL(sorted[i].first, itr.getKey());
+ EXPECT_EQUAL(sorted[i].second, itr.getData());
+ ++itr;
+ }
+ EXPECT_TRUE(!itr.valid());
+ ritr = itr;
+ EXPECT_TRUE(!ritr.valid());
+ --ritr;
+ for (size_t i = 0; i < numEntries; ++i) {
+ EXPECT_TRUE(ritr.valid());
+ EXPECT_EQUAL(sorted[numEntries - 1 - i].first, ritr.getKey());
+ EXPECT_EQUAL(sorted[numEntries - 1 - i].second, ritr.getData());
+ --ritr;
+ }
+ EXPECT_TRUE(!ritr.valid());
+}
+
+void
+Test::requireThatWeCanInsertAndRemoveFromTree()
+{
+ GenerationHandler g;
+ MyTree tree;
+ std::vector<LeafPair> exp;
+ std::vector<LeafPair> sorted;
+ size_t numEntries = 1000;
+ generateData(exp, numEntries);
+ sorted = exp;
+ std::sort(sorted.begin(), sorted.end(), LeafPairLess());
+ // insert entries
+ for (size_t i = 0; i < numEntries; ++i) {
+ int num = UNWRAP(exp[i].first);
+ const std::string & str = exp[i].second;
+ EXPECT_TRUE(!tree.find(num).valid());
+ //LOG(info, "insert[%zu](%d, %s)", i, num, str.c_str());
+ EXPECT_TRUE(tree.insert(num, str));
+ EXPECT_TRUE(!tree.insert(num, str));
+ for (size_t j = 0; j <= i; ++j) {
+ //LOG(info, "find[%zu](%d)", j, exp[j].first._val);
+ MyTree::Iterator itr = tree.find(exp[j].first);
+ EXPECT_TRUE(itr.valid());
+ EXPECT_EQUAL(exp[j].first, itr.getKey());
+ EXPECT_EQUAL(exp[j].second, itr.getData());
+ }
+ EXPECT_EQUAL(i + 1u, tree.size());
+ EXPECT_TRUE(tree.isValid());
+ buildSubTree(exp, i + 1);
+ }
+ //std::cout << "tree: " << tree.toString() << std::endl;
+
+ {
+ MyTree::Iterator itr = tree.begin();
+ MyTree::Iterator itre = itr;
+ MyTree::Iterator itre2;
+ MyTree::Iterator ritr = itr;
+ while (itre.valid())
+ ++itre;
+ if (numEntries > 0) {
+ EXPECT_TRUE(ritr.valid());
+ EXPECT_EQUAL(0u, ritr.position());
+ --ritr;
+ EXPECT_TRUE(!ritr.valid());
+ EXPECT_EQUAL(numEntries, ritr.position());
+ --ritr;
+ EXPECT_TRUE(ritr.valid());
+ EXPECT_EQUAL(numEntries - 1, ritr.position());
+ } else {
+ EXPECT_TRUE(!ritr.valid());
+ EXPECT_EQUAL(0u, ritr.position());
+ --ritr;
+ EXPECT_TRUE(!ritr.valid());
+ EXPECT_EQUAL(0u, ritr.position());
+ }
+ MyTree::Iterator pitr = itr;
+ for (size_t i = 0; i < numEntries; ++i) {
+ ssize_t si = i;
+ ssize_t sileft = numEntries - i;
+ EXPECT_TRUE(itr.valid());
+ EXPECT_EQUAL(i, itr.position());
+ EXPECT_EQUAL(sileft, itre - itr);
+ EXPECT_EQUAL(-sileft, itr - itre);
+ EXPECT_EQUAL(sileft, itre2 - itr);
+ EXPECT_EQUAL(-sileft, itr - itre2);
+ EXPECT_EQUAL(si, itr - tree.begin());
+ EXPECT_EQUAL(-si, tree.begin() - itr);
+ EXPECT_EQUAL(i != 0, itr - pitr);
+ EXPECT_EQUAL(-(i != 0), pitr - itr);
+ EXPECT_EQUAL(sorted[i].first, itr.getKey());
+ EXPECT_EQUAL(sorted[i].second, itr.getData());
+ pitr = itr;
+ ++itr;
+ ritr = itr;
+ --ritr;
+ EXPECT_TRUE(ritr.valid());
+ EXPECT_TRUE(ritr == pitr);
+ }
+ EXPECT_TRUE(!itr.valid());
+ EXPECT_EQUAL(numEntries, itr.position());
+ ssize_t sNumEntries = numEntries;
+ EXPECT_EQUAL(sNumEntries, itr - tree.begin());
+ EXPECT_EQUAL(-sNumEntries, tree.begin() - itr);
+ EXPECT_EQUAL(1, itr - pitr);
+ EXPECT_EQUAL(-1, pitr - itr);
+ }
+ // compact full tree by calling incremental compaction methods in a loop
+ {
+ MyTree::NodeAllocatorType &manager = tree.getAllocator();
+ std::vector<uint32_t> toHold = manager.startCompact();
+ MyTree::Iterator itr = tree.begin();
+ tree.setRoot(itr.moveFirstLeafNode(tree.getRoot()));
+ while (itr.valid()) {
+ // LOG(info, "Leaf moved to %d", UNWRAP(itr.getKey()));
+ itr.moveNextLeafNode();
+ }
+ manager.finishCompact(toHold);
+ manager.freeze();
+ manager.transferHoldLists(g.getCurrentGeneration());
+ g.incGeneration();
+ manager.trimHoldLists(g.getFirstUsedGeneration());
+ }
+ // remove entries
+ for (size_t i = 0; i < numEntries; ++i) {
+ int num = UNWRAP(exp[i].first);
+ //LOG(info, "remove[%zu](%d)", i, num);
+ //std::cout << "tree: " << tree.toString() << std::endl;
+ EXPECT_TRUE(tree.remove(num));
+ EXPECT_TRUE(!tree.find(num).valid());
+ EXPECT_TRUE(!tree.remove(num));
+ EXPECT_TRUE(tree.isValid());
+ for (size_t j = i + 1; j < numEntries; ++j) {
+ MyTree::Iterator itr = tree.find(exp[j].first);
+ EXPECT_TRUE(itr.valid());
+ EXPECT_EQUAL(exp[j].first, itr.getKey());
+ EXPECT_EQUAL(exp[j].second, itr.getData());
+ }
+ EXPECT_EQUAL(numEntries - 1 - i, tree.size());
+ }
+}
+
+void
+Test::requireThatSortedTreeInsertWorks()
+{
+ {
+ GenerationHandler g;
+ MyTree tree;
+ for (int i = 0; i < 1000; ++i) {
+ EXPECT_TRUE(tree.insert(i, toStr(i)));
+ MyTree::Iterator itr = tree.find(i);
+ EXPECT_TRUE(itr.valid());
+ EXPECT_EQUAL(toStr(i), itr.getData());
+ EXPECT_TRUE(tree.isValid());
+ }
+ }
+ {
+ GenerationHandler g;
+ MyTree tree;
+ for (int i = 1000; i > 0; --i) {
+ EXPECT_TRUE(tree.insert(i, toStr(i)));
+ MyTree::Iterator itr = tree.find(i);
+ EXPECT_TRUE(itr.valid());
+ EXPECT_EQUAL(toStr(i), itr.getData());
+ EXPECT_TRUE(tree.isValid());
+ }
+ }
+}
+
+void
+Test::requireThatCornerCaseTreeFindWorks()
+{
+ GenerationHandler g;
+ MyTree tree;
+ for (int i = 1; i < 100; ++i) {
+ tree.insert(i, toStr(i));
+ }
+ EXPECT_TRUE(!tree.find(0).valid()); // lower than lowest
+ EXPECT_TRUE(!tree.find(1000).valid()); // higher than highest
+}
+
+void
+Test::requireThatBasicTreeIteratorWorks()
+{
+ GenerationHandler g;
+ MyTree tree;
+ EXPECT_TRUE(!tree.begin().valid());
+ std::vector<LeafPair> exp;
+ size_t numEntries = 1000;
+ generateData(exp, numEntries);
+ for (size_t i = 0; i < numEntries; ++i) {
+ tree.insert(exp[i].first, exp[i].second);
+ }
+ std::sort(exp.begin(), exp.end(), LeafPairLess());
+ size_t ei = 0;
+ MyTree::Iterator itr = tree.begin();
+ MyTree::Iterator ritr;
+ EXPECT_EQUAL(1000u, itr.size());
+ for (; itr.valid(); ++itr) {
+ //LOG(info, "itr(%d, %s)", itr.getKey(), itr.getData().c_str());
+ EXPECT_EQUAL(UNWRAP(exp[ei].first), UNWRAP(itr.getKey()));
+ EXPECT_EQUAL(exp[ei].second, itr.getData());
+ ei++;
+ ritr = itr;
+ }
+ EXPECT_EQUAL(numEntries, ei);
+ for (; ritr.valid(); --ritr) {
+ --ei;
+ //LOG(info, "itr(%d, %s)", itr.getKey(), itr.getData().c_str());
+ EXPECT_EQUAL(UNWRAP(exp[ei].first), UNWRAP(ritr.getKey()));
+ EXPECT_EQUAL(exp[ei].second, ritr.getData());
+ }
+}
+
+void
+Test::requireThatTreeIteratorSeekWorks()
+{
+ GenerationHandler g;
+ MyTree tree;
+ for (int i = 0; i < 40; i += 2) {
+ tree.insert(i, toStr(i));
+ }
+ //std::cout << tree.toString() << std::endl;
+ EXPECT_TRUE(assertSeek(2, 2, tree)); // next key
+ EXPECT_TRUE(assertSeek(10, 10, tree)); // skip to existing
+ EXPECT_TRUE(assertSeek(26, 26, tree)); // skip to existing
+ EXPECT_TRUE(assertSeek(11, 12, tree)); // skip to non-existing
+ EXPECT_TRUE(assertSeek(23, 24, tree)); // skip to non-existing
+ {
+ MyTree::Iterator itr = tree.begin();
+ EXPECT_TRUE(assertSeek(4, 4, itr));
+ EXPECT_TRUE(assertSeek(14, 14, itr));
+ EXPECT_TRUE(assertSeek(18, 18, itr));
+ EXPECT_TRUE(assertSeek(36, 36, itr));
+ }
+ {
+ MyTree::Iterator itr = tree.begin();
+ EXPECT_TRUE(assertSeek(3, 4, itr));
+ EXPECT_TRUE(assertSeek(13, 14, itr));
+ EXPECT_TRUE(assertSeek(17, 18, itr));
+ EXPECT_TRUE(assertSeek(35, 36, itr));
+ }
+ {
+ MyTree::Iterator itr = tree.begin();
+ MyTree::Iterator itr2 = tree.begin();
+ itr.binarySeek(40); // outside
+ itr2.linearSeek(40); // outside
+ EXPECT_TRUE(!itr.valid());
+ EXPECT_TRUE(!itr2.valid());
+ }
+ {
+ MyTree::Iterator itr = tree.begin();
+ EXPECT_TRUE(assertSeek(8, 8, itr));
+ for (int i = 10; i < 40; i += 2) {
+ ++itr;
+ EXPECT_EQUAL(i, UNWRAP(itr.getKey()));
+ }
+ }
+ {
+ MyTree::Iterator itr = tree.begin();
+ EXPECT_TRUE(assertSeek(26, 26, itr));
+ for (int i = 28; i < 40; i += 2) {
+ ++itr;
+ EXPECT_EQUAL(i, UNWRAP(itr.getKey()));
+ }
+ }
+ GenerationHandler g2;
+ MyTree tree2; // only leaf node
+ tree2.insert(0, "0");
+ tree2.insert(2, "2");
+ tree2.insert(4, "4");
+ EXPECT_TRUE(assertSeek(1, 2, tree2));
+ EXPECT_TRUE(assertSeek(2, 2, tree2));
+ {
+ MyTree::Iterator itr = tree2.begin();
+ MyTree::Iterator itr2 = tree2.begin();
+ itr.binarySeek(5); // outside
+ itr2.linearSeek(5); // outside
+ EXPECT_TRUE(!itr.valid());
+ EXPECT_TRUE(!itr2.valid());
+ }
+}
+
+void
+Test::requireThatTreeIteratorAssignWorks()
+{
+ GenerationHandler g;
+ MyTree tree;
+ for (int i = 0; i < 1000; ++i) {
+ tree.insert(i, toStr(i));
+ }
+ for (int i = 0; i < 1000; ++i) {
+ MyTree::Iterator itr = tree.find(i);
+ MyTree::Iterator itr2 = itr;
+ EXPECT_TRUE(itr == itr2);
+ int expNum = i;
+ for (; itr2.valid(); ++itr2) {
+ EXPECT_EQUAL(expNum++, UNWRAP(itr2.getKey()));
+ }
+ EXPECT_EQUAL(1000, expNum);
+ }
+}
+
+void
+Test::requireThatMemoryUsageIsCalculated()
+{
+ typedef BTreeNodeAllocator<int32_t, int8_t,
+ btree::NoAggregated,
+ MyTraits::INTERNAL_SLOTS, MyTraits::LEAF_SLOTS> NodeAllocator;
+ typedef NodeAllocator::InternalNodeType INode;
+ typedef NodeAllocator::LeafNodeType LNode;
+ typedef NodeAllocator::InternalNodeTypeRefPair IRef;
+ typedef NodeAllocator::LeafNodeTypeRefPair LRef;
+ LOG(info, "sizeof(BTreeNode)=%zu, sizeof(INode)=%zu, sizeof(LNode)=%zu",
+ sizeof(BTreeNode), sizeof(INode), sizeof(LNode));
+ EXPECT_GREATER(sizeof(INode), sizeof(LNode));
+ GenerationHandler gh;
+ gh.incGeneration();
+ NodeAllocator tm;
+ MemoryUsage mu;
+ const uint32_t initialInternalNodes = 128u;
+ const uint32_t initialLeafNodes = 128u;
+ mu.incAllocatedBytes(sizeof(INode) * initialInternalNodes);
+ mu.incAllocatedBytes(sizeof(LNode) * initialLeafNodes);
+ mu.incUsedBytes(sizeof(INode));
+ mu.incDeadBytes(sizeof(INode));
+ EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage()));
+
+ // add internal node
+ IRef ir = tm.allocInternalNode(1);
+ mu.incUsedBytes(sizeof(INode));
+ EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage()));
+
+ // add leaf node
+ LRef lr = tm.allocLeafNode();
+ mu.incUsedBytes(sizeof(LNode));
+ EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage()));
+
+ // move nodes to hold list
+ tm.freeze(); // mark allocated nodes as frozen so we can hold them later on
+ tm.holdNode(ir.first, ir.second);
+ mu.incAllocatedBytesOnHold(sizeof(INode));
+ EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage()));
+ tm.holdNode(lr.first, lr.second);
+ mu.incAllocatedBytesOnHold(sizeof(LNode));
+ EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage()));
+
+ // trim hold lists
+ tm.transferHoldLists(gh.getCurrentGeneration());
+ gh.incGeneration();
+ tm.trimHoldLists(gh.getFirstUsedGeneration());
+ mu = MemoryUsage();
+ mu.incAllocatedBytes(sizeof(INode) * initialInternalNodes);
+ mu.incAllocatedBytes(sizeof(LNode) * initialLeafNodes);
+ mu.incUsedBytes(sizeof(INode) * 2);
+ mu.incDeadBytes(sizeof(INode) * 2);
+ mu.incUsedBytes(sizeof(LNode));
+ mu.incDeadBytes(sizeof(LNode));
+ EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage()));
+}
+
+template <typename TreeType>
+void
+Test::requireThatLowerBoundWorksT()
+{
+ GenerationHandler g;
+ TreeType t;
+ EXPECT_TRUE(t.insert(10, BTreeNoLeafData()));
+ EXPECT_TRUE(t.insert(20, BTreeNoLeafData()));
+ EXPECT_TRUE(t.insert(30, BTreeNoLeafData()));
+ EXPECT_EQUAL(10, t.lowerBound(9).getKey());
+ EXPECT_EQUAL(20, t.lowerBound(20).getKey());
+ EXPECT_EQUAL(30, t.lowerBound(21).getKey());
+ EXPECT_EQUAL(30, t.lowerBound(30).getKey());
+ EXPECT_TRUE(!t.lowerBound(31).valid());
+ for (int i = 40; i < 1000; i+=10) {
+ EXPECT_TRUE(t.insert(i, BTreeNoLeafData()));
+ }
+ for (int i = 9; i < 990; i+=10) {
+ EXPECT_EQUAL(i + 1, t.lowerBound(i).getKey());
+ EXPECT_EQUAL(i + 1, t.lowerBound(i + 1).getKey());
+ }
+ EXPECT_TRUE(!t.lowerBound(991).valid());
+}
+
+void
+Test::requireThatLowerBoundWorks()
+{
+ requireThatLowerBoundWorksT<SetTreeB>();
+ requireThatLowerBoundWorksT<SetTreeL>();
+}
+
+template <typename TreeType>
+void
+Test::requireThatUpperBoundWorksT()
+{
+ GenerationHandler g;
+ TreeType t;
+ EXPECT_TRUE(t.insert(10, BTreeNoLeafData()));
+ EXPECT_TRUE(t.insert(20, BTreeNoLeafData()));
+ EXPECT_TRUE(t.insert(30, BTreeNoLeafData()));
+ EXPECT_EQUAL(10, t.upperBound(9).getKey());
+ EXPECT_EQUAL(30, t.upperBound(20).getKey());
+ EXPECT_EQUAL(30, t.upperBound(21).getKey());
+ EXPECT_TRUE(!t.upperBound(30).valid());
+ for (int i = 40; i < 1000; i+=10) {
+ EXPECT_TRUE(t.insert(i, BTreeNoLeafData()));
+ }
+ for (int i = 9; i < 980; i+=10) {
+ EXPECT_EQUAL(i + 1, t.upperBound(i).getKey());
+ EXPECT_EQUAL(i + 11, t.upperBound(i + 1).getKey());
+ }
+ EXPECT_TRUE(!t.upperBound(990).valid());
+}
+
+void
+Test::requireThatUpperBoundWorks()
+{
+ requireThatUpperBoundWorksT<SetTreeB>();
+ requireThatUpperBoundWorksT<SetTreeL>();
+}
+
+struct UpdKeyComp {
+ int _remainder;
+ mutable size_t _numErrors;
+ UpdKeyComp(int remainder) : _remainder(remainder), _numErrors(0) {}
+ bool operator() (const int & lhs, const int & rhs) const {
+ if (lhs % 2 != _remainder) ++_numErrors;
+ if (rhs % 2 != _remainder) ++_numErrors;
+ return lhs < rhs;
+ }
+};
+
+void
+Test::requireThatUpdateOfKeyWorks()
+{
+ typedef BTree<int, BTreeNoLeafData,
+ btree::NoAggregated,
+ UpdKeyComp &> UpdKeyTree;
+ typedef UpdKeyTree::Iterator UpdKeyTreeIterator;
+ GenerationHandler g;
+ UpdKeyTree t;
+ UpdKeyComp cmp1(0);
+ for (int i = 0; i < 1000; i+=2) {
+ EXPECT_TRUE(t.insert(i, BTreeNoLeafData(), cmp1));
+ }
+ EXPECT_EQUAL(0u, cmp1._numErrors);
+ for (int i = 0; i < 1000; i+=2) {
+ UpdKeyTreeIterator itr = t.find(i, cmp1);
+ itr.writeKey(i + 1);
+ }
+ UpdKeyComp cmp2(1);
+ for (int i = 1; i < 1000; i+=2) {
+ UpdKeyTreeIterator itr = t.find(i, cmp2);
+ EXPECT_TRUE(itr.valid());
+ }
+ EXPECT_EQUAL(0u, cmp2._numErrors);
+}
+
+
+void
+Test::requireThatSmallNodesWorks(void)
+{
+ typedef BTreeStore<MyKey, std::string, btree::NoAggregated, MyComp,
+ BTreeDefaultTraits> TreeStore;
+ GenerationHandler g;
+ TreeStore s;
+
+ EntryRef root;
+ EXPECT_EQUAL(0u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+ EXPECT_TRUE(s.insert(root, 40, "fourty"));
+ EXPECT_TRUE(!s.insert(root, 40, "fourty.not"));
+ EXPECT_EQUAL(1u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+ EXPECT_TRUE(s.insert(root, 20, "twenty"));
+ EXPECT_TRUE(!s.insert(root, 20, "twenty.not"));
+ EXPECT_TRUE(!s.insert(root, 40, "fourty.not"));
+ EXPECT_EQUAL(2u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+ EXPECT_TRUE(s.insert(root, 60, "sixty"));
+ EXPECT_TRUE(!s.insert(root, 60, "sixty.not"));
+ EXPECT_TRUE(!s.insert(root, 20, "twenty.not"));
+ EXPECT_TRUE(!s.insert(root, 40, "fourty.not"));
+ EXPECT_EQUAL(3u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+ EXPECT_TRUE(s.insert(root, 50, "fifty"));
+ EXPECT_TRUE(!s.insert(root, 50, "fifty.not"));
+ EXPECT_TRUE(!s.insert(root, 60, "sixty.not"));
+ EXPECT_TRUE(!s.insert(root, 20, "twenty.not"));
+ EXPECT_TRUE(!s.insert(root, 40, "fourty.not"));
+ EXPECT_EQUAL(4u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+
+ for (uint32_t i = 0; i < 100; ++i) {
+ EXPECT_TRUE(s.insert(root, 1000 + i, "big"));
+ if (i > 0) {
+ EXPECT_TRUE(!s.insert(root, 1000 + i - 1, "big"));
+ }
+ EXPECT_EQUAL(5u + i, s.size(root));
+ EXPECT_EQUAL(5u + i <= 8u, s.isSmallArray(root));
+ }
+ EXPECT_TRUE(s.remove(root, 40));
+ EXPECT_TRUE(!s.remove(root, 40));
+ EXPECT_EQUAL(103u, s.size(root));
+ EXPECT_TRUE(!s.isSmallArray(root));
+ EXPECT_TRUE(s.remove(root, 20));
+ EXPECT_TRUE(!s.remove(root, 20));
+ EXPECT_EQUAL(102u, s.size(root));
+ EXPECT_TRUE(!s.isSmallArray(root));
+ EXPECT_TRUE(s.remove(root, 50));
+ EXPECT_TRUE(!s.remove(root, 50));
+ EXPECT_EQUAL(101u, s.size(root));
+ EXPECT_TRUE(!s.isSmallArray(root));
+ for (uint32_t i = 0; i < 100; ++i) {
+ EXPECT_TRUE(s.remove(root, 1000 + i));
+ if (i > 0) {
+ EXPECT_TRUE(!s.remove(root, 1000 + i - 1));
+ }
+ EXPECT_EQUAL(100 - i, s.size(root));
+ EXPECT_EQUAL(100 - i <= 8u, s.isSmallArray(root));
+ }
+ EXPECT_EQUAL(1u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+
+ s.clear(root);
+ s.clearBuilder();
+ s.freeze();
+ s.transferHoldLists(g.getCurrentGeneration());
+ g.incGeneration();
+ s.trimHoldLists(g.getFirstUsedGeneration());
+}
+
+
+void
+Test::requireThatApplyWorks(void)
+{
+ typedef BTreeStore<MyKey, std::string, btree::NoAggregated, MyComp,
+ BTreeDefaultTraits> TreeStore;
+ typedef TreeStore::KeyType KeyType;
+ typedef TreeStore::KeyDataType KeyDataType;
+ GenerationHandler g;
+ TreeStore s;
+ std::vector<KeyDataType> additions;
+ std::vector<KeyType> removals;
+
+ EntryRef root;
+ EXPECT_EQUAL(0u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+
+ additions.clear();
+ removals.clear();
+ additions.push_back(KeyDataType(40, "fourty"));
+ s.apply(root, &additions[0], &additions[0] + additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ EXPECT_EQUAL(1u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+
+ additions.clear();
+ removals.clear();
+ additions.push_back(KeyDataType(20, "twenty"));
+ s.apply(root, &additions[0], &additions[0] + additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ EXPECT_EQUAL(2u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+
+ additions.clear();
+ removals.clear();
+ additions.push_back(KeyDataType(60, "sixty"));
+ s.apply(root, &additions[0], &additions[0] + additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ EXPECT_EQUAL(3u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+
+ additions.clear();
+ removals.clear();
+ additions.push_back(KeyDataType(50, "fifty"));
+ s.apply(root, &additions[0], &additions[0] + additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ EXPECT_EQUAL(4u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+
+ for (uint32_t i = 0; i < 100; ++i) {
+ additions.clear();
+ removals.clear();
+ additions.push_back(KeyDataType(1000 + i, "big"));
+ s.apply(root, &additions[0], &additions[0] + additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ EXPECT_EQUAL(5u + i, s.size(root));
+ EXPECT_EQUAL(5u + i <= 8u, s.isSmallArray(root));
+ }
+
+ additions.clear();
+ removals.clear();
+ removals.push_back(40);
+ s.apply(root, &additions[0], &additions[0] + additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ EXPECT_EQUAL(103u, s.size(root));
+ EXPECT_TRUE(!s.isSmallArray(root));
+
+ additions.clear();
+ removals.clear();
+ removals.push_back(20);
+ s.apply(root, &additions[0], &additions[0] + additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ EXPECT_EQUAL(102u, s.size(root));
+ EXPECT_TRUE(!s.isSmallArray(root));
+
+ additions.clear();
+ removals.clear();
+ removals.push_back(50);
+ s.apply(root, &additions[0], &additions[0] + additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ EXPECT_EQUAL(101u, s.size(root));
+ EXPECT_TRUE(!s.isSmallArray(root));
+ for (uint32_t i = 0; i < 100; ++i) {
+ additions.clear();
+ removals.clear();
+ removals.push_back(1000 +i);
+ s.apply(root, &additions[0], &additions[0] + additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ EXPECT_EQUAL(100 - i, s.size(root));
+ EXPECT_EQUAL(100 - i <= 8u, s.isSmallArray(root));
+ }
+ EXPECT_EQUAL(1u, s.size(root));
+ EXPECT_TRUE(s.isSmallArray(root));
+
+ additions.clear();
+ removals.clear();
+ for (uint32_t i = 0; i < 20; ++i)
+ additions.push_back(KeyDataType(1000 + i, "big"));
+ removals.push_back(60);
+ removals.push_back(1002);
+ s.apply(root, &additions[0], &additions[0] + additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ EXPECT_EQUAL(20u, s.size(root));
+ EXPECT_TRUE(!s.isSmallArray(root));
+
+ additions.clear();
+ s.apply(root, &additions[0], &additions[0] + additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ EXPECT_EQUAL(19u, s.size(root));
+ EXPECT_TRUE(!s.isSmallArray(root));
+
+ additions.clear();
+ removals.clear();
+ for (uint32_t i = 0; i < 20; ++i)
+ additions.push_back(KeyDataType(1100 + i, "big"));
+ for (uint32_t i = 0; i < 10; ++i)
+ removals.push_back(1000 + i);
+ s.apply(root, &additions[0], &additions[0] + additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ EXPECT_EQUAL(30u, s.size(root));
+ EXPECT_TRUE(!s.isSmallArray(root));
+
+ s.clear(root);
+ s.clearBuilder();
+ s.freeze();
+ s.transferHoldLists(g.getCurrentGeneration());
+ g.incGeneration();
+ s.trimHoldLists(g.getFirstUsedGeneration());
+}
+
+class MyTreeTestIterator : public MyTree::Iterator
+{
+public:
+ MyTreeTestIterator(const MyTree::Iterator &rhs)
+ : MyTree::Iterator(rhs)
+ {
+ }
+
+ int
+ getPathSize(void) const
+ {
+ return _pathSize;
+ }
+};
+
+
+void
+Test::requireThatIteratorDistanceWorks(int numEntries)
+{
+ GenerationHandler g;
+ MyTree tree;
+ typedef MyTree::Iterator Iterator;
+ for (int i = 0; i < numEntries; ++i) {
+ tree.insert(i, toStr(i));
+ }
+ MyTreeTestIterator tit = tree.begin();
+ LOG(info,
+ "numEntries=%d, iterator pathSize=%d",
+ numEntries, tit.getPathSize());
+ Iterator it = tree.begin();
+ for (int i = 0; i <= numEntries; ++i) {
+ Iterator iit = tree.lowerBound(i);
+ Iterator iitn = tree.lowerBound(i + 1);
+ Iterator iitu = tree.upperBound(i);
+ Iterator iitls = tree.begin();
+ Iterator iitbs = tree.begin();
+ Iterator iitlsp = tree.begin();
+ Iterator iitbsp = tree.begin();
+ Iterator iitlb(tree.getRoot(), tree.getAllocator());
+ iitlb.lower_bound(i);
+ Iterator iitlb2(BTreeNode::Ref(), tree.getAllocator());
+ iitlb2.lower_bound(tree.getRoot(), i);
+ if (i > 0) {
+ iitls.linearSeek(i);
+ iitbs.binarySeek(i);
+ ++it;
+ }
+ iitlsp.linearSeekPast(i);
+ iitbsp.binarySeekPast(i);
+ Iterator iitlsp2 = iitls;
+ Iterator iitbsp2 = iitbs;
+ Iterator iitnr = i < numEntries ? iitn : tree.begin();
+ --iitnr;
+ if (i < numEntries) {
+ iitlsp2.linearSeekPast(i);
+ iitbsp2.binarySeekPast(i);
+ }
+ EXPECT_EQUAL(i, static_cast<int>(iit.position()));
+ EXPECT_EQUAL(i < numEntries, iit.valid());
+ EXPECT_TRUE(iit.identical(it));
+ EXPECT_TRUE(iit.identical(iitls));
+ EXPECT_TRUE(iit.identical(iitbs));
+ EXPECT_TRUE(iit.identical(iitnr));
+ EXPECT_TRUE(iit.identical(iitlb));
+ EXPECT_TRUE(iit.identical(iitlb2));
+ EXPECT_TRUE(iitn.identical(iitu));
+ EXPECT_TRUE(iitn.identical(iitlsp));
+ EXPECT_TRUE(iitn.identical(iitbsp));
+ EXPECT_TRUE(iitn.identical(iitlsp2));
+ EXPECT_TRUE(iitn.identical(iitbsp2));
+ if (i < numEntries) {
+ EXPECT_EQUAL(i + 1, static_cast<int>(iitn.position()));
+ EXPECT_EQUAL(i + 1 < numEntries, iitn.valid());
+ }
+ for (int j = 0; j <= numEntries; ++j) {
+ Iterator jit = tree.lowerBound(j);
+ EXPECT_EQUAL(j, static_cast<int>(jit.position()));
+ EXPECT_EQUAL(j < numEntries, jit.valid());
+ EXPECT_EQUAL(i - j, iit - jit);
+ EXPECT_EQUAL(j - i, jit - iit);
+
+ Iterator jit2 = jit;
+ jit2.setupEnd();
+ EXPECT_EQUAL(numEntries - j, jit2 - jit);
+ EXPECT_EQUAL(numEntries - i, jit2 - iit);
+ EXPECT_EQUAL(j - numEntries, jit - jit2);
+ EXPECT_EQUAL(i - numEntries, iit - jit2);
+ }
+ }
+}
+
+
+void
+Test::requireThatIteratorDistanceWorks()
+{
+ requireThatIteratorDistanceWorks(1);
+ requireThatIteratorDistanceWorks(3);
+ requireThatIteratorDistanceWorks(8);
+ requireThatIteratorDistanceWorks(20);
+ requireThatIteratorDistanceWorks(100);
+ requireThatIteratorDistanceWorks(400);
+}
+
+
+int
+Test::Main()
+{
+ TEST_INIT("btree_test");
+
+ requireThatNodeInsertWorks();
+ requireThatNodeSplitInsertWorks();
+ requireThatNodeStealWorks();
+ requireThatNodeRemoveWorks();
+ requireThatNodeLowerBoundWorks();
+ requireThatWeCanInsertAndRemoveFromTree();
+ requireThatSortedTreeInsertWorks();
+ requireThatCornerCaseTreeFindWorks();
+ requireThatBasicTreeIteratorWorks();
+ requireThatTreeIteratorSeekWorks();
+ requireThatTreeIteratorAssignWorks();
+ requireThatMemoryUsageIsCalculated();
+ requireThatLowerBoundWorks();
+ requireThatUpperBoundWorks();
+ requireThatUpdateOfKeyWorks();
+ requireThatSmallNodesWorks();
+ requireThatApplyWorks();
+ requireThatIteratorDistanceWorks();
+
+ TEST_DONE();
+}
+
+}
+}
+
+TEST_APPHOOK(search::btree::Test);
diff --git a/searchlib/src/tests/memoryindex/btree/frozenbtree_test.cpp b/searchlib/src/tests/memoryindex/btree/frozenbtree_test.cpp
new file mode 100644
index 00000000000..817d024c60f
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/btree/frozenbtree_test.cpp
@@ -0,0 +1,513 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("frozenbtree_test");
+#define DEBUG_FROZENBTREE
+#define LOG_FROZENBTREEXX
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/util/rand48.h>
+#include <vespa/searchlib/btree/btreeroot.h>
+#include <vespa/searchlib/btree/btreenodeallocator.h>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodestore.hpp>
+#include <algorithm>
+#include <limits>
+#include <map>
+
+using search::btree::BTreeRoot;
+using search::btree::BTreeNode;
+using search::btree::BTreeInternalNode;
+using search::btree::BTreeLeafNode;
+using search::btree::BTreeDefaultTraits;
+using vespalib::GenerationHandler;
+
+namespace search {
+
+
+class FrozenBTreeTest : public vespalib::TestApp
+{
+public:
+ typedef int KeyType;
+private:
+ std::vector<KeyType> _randomValues;
+ std::vector<KeyType> _sortedRandomValues;
+
+public:
+ typedef int DataType;
+ typedef BTreeRoot<KeyType, DataType,
+ btree::NoAggregated,
+ std::less<KeyType>,
+ BTreeDefaultTraits> Tree;
+ typedef Tree::NodeAllocatorType NodeAllocator;
+ typedef Tree::InternalNodeType InternalNodeType;
+ typedef Tree::LeafNodeType LeafNodeType;
+ typedef Tree::Iterator Iterator;
+ typedef Tree::ConstIterator ConstIterator;
+private:
+ GenerationHandler *_generationHandler;
+ NodeAllocator *_allocator;
+ Tree *_tree;
+
+ Rand48 _randomGenerator;
+
+ void
+ allocTree(void);
+
+ void
+ freeTree(bool verbose);
+
+ void
+ fillRandomValues(unsigned int count);
+
+ void
+ insertRandomValues(Tree &tree,
+ NodeAllocator &allocator,
+ const std::vector<KeyType> &values);
+
+ void
+ removeRandomValues(Tree &tree,
+ NodeAllocator &allocator,
+ const std::vector<KeyType> &values);
+
+ void
+ lookupRandomValues(const Tree &tree,
+ NodeAllocator &allocator,
+ const std::vector<KeyType> &values);
+
+ void
+ lookupGoneRandomValues(const Tree &tree,
+ NodeAllocator &allocator,
+ const std::vector<KeyType> &values);
+
+ void
+ lookupFrozenRandomValues(const Tree &tree,
+ NodeAllocator &allocator,
+ const std::vector<KeyType> &values);
+
+ void
+ sortRandomValues(void);
+
+ void
+ traverseTreeIterator(const Tree &tree,
+ NodeAllocator &allocator,
+ const std::vector<KeyType> &sorted,
+ bool frozen);
+
+ void
+ printSubEnumTree(BTreeNode::Ref node,
+ NodeAllocator &allocator,
+ int indent) const;
+
+ void
+ printEnumTree(const Tree *tree,
+ NodeAllocator &allocator);
+
+ static const char *
+ frozenName(bool frozen)
+ {
+ return frozen ? "frozen" : "thawed";
+ }
+public:
+ FrozenBTreeTest(void)
+ : vespalib::TestApp(),
+ _randomValues(),
+ _sortedRandomValues(),
+ _generationHandler(NULL),
+ _allocator(NULL),
+ _tree(NULL),
+ _randomGenerator()
+ {
+ }
+
+ int Main(void);
+};
+
+
+
+void
+FrozenBTreeTest::allocTree(void)
+{
+ assert(_generationHandler == NULL);
+ assert(_allocator == NULL);
+ assert(_tree == NULL);
+ _generationHandler = new GenerationHandler;
+ _allocator = new NodeAllocator();
+ _tree = new Tree;
+}
+
+
+void
+FrozenBTreeTest::freeTree(bool verbose)
+{
+#if 0
+ LOG(info,
+ "freeTree before clear: %" PRIu64 " (%" PRIu64 " held)"
+ ", %" PRIu32 " leaves",
+ static_cast<uint64_t>(_intTree->getUsedMemory()),
+ static_cast<uint64_t>(_intTree->getHeldMemory()),
+ _intTree->validLeaves());
+ _intTree->clear();
+ LOG(info,
+ "freeTree before unhold: %" PRIu64 " (%" PRIu64 " held)",
+ static_cast<uint64_t>(_intTree->getUsedMemory()),
+ static_cast<uint64_t>(_intTree->getHeldMemory()));
+ _intTree->dropFrozen();
+ _intTree->removeOldGenerations(_intTree->getGeneration() + 1);
+ LOG(info,
+ "freeTree after unhold: %" PRIu64 " (%" PRIu64 " held)",
+ static_cast<uint64_t>(_intTree->getUsedMemory()),
+ static_cast<uint64_t>(_intTree->getHeldMemory()));
+ if (verbose)
+ LOG(info,
+ "%d+%d leftover tree nodes",
+ _intTree->getNumInternalNodes(),
+ _intTree->getNumLeafNodes());
+ EXPECT_TRUE(_intTree->getNumInternalNodes() == 0 &&
+ _intTree->getNumLeafNodes() == 0);
+ delete _intTree;
+ _intTree = NULL;
+ delete _intKeyStore;
+ _intKeyStore = NULL;
+#endif
+ (void) verbose;
+ _tree->clear(*_allocator);
+ _allocator->freeze();
+ _allocator->transferHoldLists(_generationHandler->getCurrentGeneration());
+ _generationHandler->incGeneration();
+ _allocator->trimHoldLists(_generationHandler->getFirstUsedGeneration());
+ delete _tree;
+ _tree = NULL;
+ delete _allocator;
+ _allocator = NULL;
+ delete _generationHandler;
+ _generationHandler = NULL;
+}
+
+
+void
+FrozenBTreeTest::fillRandomValues(unsigned int count)
+{
+ unsigned int i;
+
+ LOG(info,
+ "Filling %u random values", count);
+ _randomValues.clear();
+ _randomValues.reserve(count);
+ _randomGenerator.srand48(42);
+ for (i = 0; i <count; i++)
+ _randomValues.push_back(_randomGenerator.lrand48());
+
+ EXPECT_TRUE(_randomValues.size() == count);
+}
+
+
+void
+FrozenBTreeTest::
+insertRandomValues(Tree &tree,
+ NodeAllocator &allocator,
+ const std::vector<KeyType> &values)
+{
+ std::vector<KeyType>::const_iterator i(values.begin());
+ std::vector<KeyType>::const_iterator ie(values.end());
+ Iterator p;
+
+ LOG(info, "insertRandomValues start");
+ for (; i != ie; ++i) {
+#ifdef LOG_FROZENBTREE
+ LOG(info, "Try lookup %d before insert", *i);
+#endif
+ p = tree.find(*i, allocator);
+ if (!p.valid()) {
+ DataType val = *i + 42;
+ if (tree.insert(*i, val, allocator))
+ p = tree.find(*i, allocator);
+ }
+ ASSERT_TRUE(p.valid() && p.getKey() == *i && p.getData() == *i + 42);
+#ifdef DEBUG_FROZENBTREEX
+ printEnumTree(&tree);
+#endif
+ }
+ ASSERT_TRUE(tree.isValid(allocator));
+ ASSERT_TRUE(tree.isValidFrozen(allocator));
+ LOG(info, "insertRandomValues done");
+}
+
+
+void
+FrozenBTreeTest::
+removeRandomValues(Tree &tree,
+ NodeAllocator &allocator,
+ const std::vector<KeyType> & values)
+{
+ std::vector<KeyType>::const_iterator i(values.begin());
+ std::vector<KeyType>::const_iterator ie(values.end());
+ Iterator p;
+
+ LOG(info, "removeRandomValues start");
+ for (; i != ie; ++i) {
+#ifdef LOG_FROZENBTREE
+ LOG(info, "Try lookup %d before remove", *i);
+#endif
+ p = tree.find(*i, allocator);
+ if (p.valid()) {
+ if (tree.remove(*i, allocator))
+ p = tree.find(*i, allocator);
+ }
+ ASSERT_TRUE(!p.valid());
+#ifdef DEBUG_FROZENBTREEX
+ tree.printTree();
+#endif
+ }
+ ASSERT_TRUE(tree.isValid(allocator));
+ ASSERT_TRUE(tree.isValidFrozen(allocator));
+ LOG(info, "removeRandomValues done");
+}
+
+
+void
+FrozenBTreeTest::
+lookupRandomValues(const Tree &tree,
+ NodeAllocator &allocator,
+ const std::vector<KeyType> &values)
+{
+ std::vector<KeyType>::const_iterator i(values.begin());
+ std::vector<KeyType>::const_iterator ie(values.end());
+ Iterator p;
+
+ LOG(info, "lookupRandomValues start");
+ for (; i != ie; ++i) {
+ p = tree.find(*i, allocator);
+ ASSERT_TRUE(p.valid() && p.getKey() == *i);
+ }
+ LOG(info, "lookupRandomValues done");
+}
+
+
+void
+FrozenBTreeTest::
+lookupGoneRandomValues(const Tree &tree,
+ NodeAllocator &allocator,
+ const std::vector<KeyType> &values)
+{
+ std::vector<KeyType>::const_iterator i(values.begin());
+ std::vector<KeyType>::const_iterator ie(values.end());
+ Iterator p;
+
+ LOG(info, "lookupGoneRandomValues start");
+ for (; i != ie; ++i) {
+ p = tree.find(*i, allocator);
+ ASSERT_TRUE(!p.valid());
+ }
+ LOG(info, "lookupGoneRandomValues done");
+}
+
+
+void
+FrozenBTreeTest::
+lookupFrozenRandomValues(const Tree &tree,
+ NodeAllocator &allocator,
+ const std::vector<KeyType> &values)
+{
+ std::vector<KeyType>::const_iterator i(values.begin());
+ std::vector<KeyType>::const_iterator ie(values.end());
+ ConstIterator p;
+
+ LOG(info, "lookupFrozenRandomValues start");
+ for (; i != ie; ++i) {
+ p = tree.getFrozenView(allocator).find(*i, std::less<int>());
+ ASSERT_TRUE(p.valid() && p.getKey() == *i && p.getData() == *i + 42);
+ }
+ LOG(info, "lookupFrozenRandomValues done");
+}
+
+
+void
+FrozenBTreeTest::sortRandomValues(void)
+{
+ std::vector<KeyType>::iterator i;
+ std::vector<KeyType>::iterator ie;
+ uint32_t okcnt;
+ int prevVal;
+ std::vector<KeyType> sorted;
+
+ LOG(info, "sortRandomValues start");
+ sorted = _randomValues;
+ std::sort(sorted.begin(), sorted.end());
+ _sortedRandomValues.clear();
+ _sortedRandomValues.reserve(sorted.size());
+
+ okcnt = 0;
+ prevVal = 0;
+ ie = sorted.end();
+ for (i = sorted.begin(); i != ie; ++i) {
+ if (i == _sortedRandomValues.begin() || *i > prevVal) {
+ okcnt++;
+ _sortedRandomValues.push_back(*i);
+ } else if (*i == prevVal)
+ okcnt++;
+ else
+ abort();
+ prevVal = *i;
+ }
+ EXPECT_TRUE(okcnt == sorted.size());
+ LOG(info, "sortRandomValues done");
+}
+
+
+void
+FrozenBTreeTest::
+traverseTreeIterator(const Tree &tree,
+ NodeAllocator &allocator,
+ const std::vector<KeyType> &sorted,
+ bool frozen)
+{
+ LOG(info,
+ "traverseTreeIterator %s start",
+ frozenName(frozen));
+
+ std::vector<KeyType>::const_iterator i;
+
+ i = sorted.begin();
+ if (frozen) {
+ ConstIterator ai;
+ ai = tree.getFrozenView(allocator).begin();
+ for (;ai.valid(); ++ai, ++i)
+ {
+ ASSERT_TRUE(ai.getKey() == *i);
+ }
+ } else {
+ Iterator ai;
+ ai = tree.begin(allocator);
+ for (;ai.valid(); ++ai, ++i)
+ {
+ ASSERT_TRUE(ai.getKey() == *i);
+ }
+ }
+
+
+ ASSERT_TRUE(i == sorted.end());
+
+ LOG(info,
+ "traverseTreeIterator %s done",
+ frozenName(frozen));
+}
+
+
+void
+FrozenBTreeTest::
+printSubEnumTree(BTreeNode::Ref node,
+ NodeAllocator &allocator,
+ int indent) const
+{
+ // typedef BTreeNode Node;
+ typedef LeafNodeType LeafNode;
+ typedef InternalNodeType InternalNode;
+ BTreeNode::Ref subNode;
+ unsigned int i;
+
+ if (allocator.isLeafRef(node)) {
+ const LeafNode *lnode = allocator.mapLeafRef(node);
+ printf("%*s LeafNode %s valid=%d\n",
+ indent, "",
+ lnode->getFrozen() ? "frozen" : "thawed",
+ lnode->validSlots());
+ for (i = 0; i < lnode->validSlots(); i++) {
+
+ KeyType k = lnode->getKey(i);
+ DataType d = lnode->getData(i);
+ printf("leaf value %3d %d %d\n",
+ (int) i,
+ (int) k,
+ (int) d);
+ }
+ return;
+ }
+ const InternalNode *inode = allocator.mapInternalRef(node);
+ printf("%*s IntermediteNode %s valid=%d\n",
+ indent, "",
+ inode->getFrozen() ? "frozen" : "thawed",
+ inode->validSlots());
+ for (i = 0; i < inode->validSlots(); i++) {
+ subNode = inode->getChild(i);
+ assert(subNode != BTreeNode::Ref());
+ printSubEnumTree(subNode, allocator, indent + 4);
+ }
+}
+
+
+void
+FrozenBTreeTest::printEnumTree(const Tree *tree,
+ NodeAllocator &allocator)
+{
+ printf("Tree Dump start\n");
+ if (!NodeAllocator::isValidRef(tree->getRoot())) {
+ printf("EMPTY\n");
+ } else {
+ printSubEnumTree(tree->getRoot(), allocator, 0);
+ }
+ printf("Tree Dump done\n");
+}
+
+
+
+int
+FrozenBTreeTest::Main()
+{
+ TEST_INIT("frozenbtree_test");
+
+ fillRandomValues(1000);
+ sortRandomValues();
+
+ allocTree();
+ insertRandomValues(*_tree, *_allocator, _randomValues);
+ lookupRandomValues(*_tree, *_allocator, _randomValues);
+ _allocator->freeze();
+ _allocator->transferHoldLists(_generationHandler->getCurrentGeneration());
+ lookupFrozenRandomValues(*_tree, *_allocator, _randomValues);
+ traverseTreeIterator(*_tree,
+ *_allocator,
+ _sortedRandomValues,
+ false);
+ traverseTreeIterator(*_tree,
+ *_allocator,
+ _sortedRandomValues,
+ true);
+ traverseTreeIterator(*_tree,
+ *_allocator,
+ _sortedRandomValues,
+ false);
+ traverseTreeIterator(*_tree,
+ *_allocator,
+ _sortedRandomValues,
+ true);
+ removeRandomValues(*_tree, *_allocator, _randomValues);
+ lookupGoneRandomValues(*_tree, *_allocator, _randomValues);
+ lookupFrozenRandomValues(*_tree, *_allocator,_randomValues);
+ traverseTreeIterator(*_tree,
+ *_allocator,
+ _sortedRandomValues,
+ true);
+ insertRandomValues(*_tree, *_allocator, _randomValues);
+ freeTree(true);
+
+ fillRandomValues(1000000);
+ sortRandomValues();
+
+ allocTree();
+ insertRandomValues(*_tree, *_allocator, _randomValues);
+ traverseTreeIterator(*_tree,
+ *_allocator,
+ _sortedRandomValues,
+ false);
+ freeTree(false);
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::FrozenBTreeTest);
diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/.gitignore b/searchlib/src/tests/memoryindex/compact_document_words_store/.gitignore
new file mode 100644
index 00000000000..3ad290f1731
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/compact_document_words_store/.gitignore
@@ -0,0 +1 @@
+searchlib_compact_document_words_store_test_app
diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/CMakeLists.txt b/searchlib/src/tests/memoryindex/compact_document_words_store/CMakeLists.txt
new file mode 100644
index 00000000000..666639f20ba
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/compact_document_words_store/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_compact_document_words_store_test_app
+ SOURCES
+ compact_document_words_store_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_compact_document_words_store_test_app COMMAND searchlib_compact_document_words_store_test_app)
diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/DESC b/searchlib/src/tests/memoryindex/compact_document_words_store/DESC
new file mode 100644
index 00000000000..ee9c4b346a2
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/compact_document_words_store/DESC
@@ -0,0 +1 @@
+compact_document_words_store test. Take a look at compact_document_words_store_test.cpp for details.
diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/FILES b/searchlib/src/tests/memoryindex/compact_document_words_store/FILES
new file mode 100644
index 00000000000..fb2fb1d637b
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/compact_document_words_store/FILES
@@ -0,0 +1 @@
+compact_document_words_store_test.cpp
diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/compact_document_words_store_test.cpp b/searchlib/src/tests/memoryindex/compact_document_words_store/compact_document_words_store_test.cpp
new file mode 100644
index 00000000000..2a3bffb2fe6
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/compact_document_words_store/compact_document_words_store_test.cpp
@@ -0,0 +1,157 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".memoryindex.compact_document_words_store_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/btree/entryref.h>
+#include <vespa/searchlib/memoryindex/compact_document_words_store.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <iostream>
+#include <map>
+
+using namespace search;
+using namespace search::btree;
+using namespace search::memoryindex;
+
+typedef CompactDocumentWordsStore::Builder Builder;
+typedef CompactDocumentWordsStore::Iterator Iterator;
+typedef Builder::WordRefVector WordRefVector;
+
+const EntryRef w1(1);
+const EntryRef w2(2);
+const EntryRef w3(3);
+const EntryRef w4(4);
+const uint32_t d1(111);
+const uint32_t d2(222);
+const uint32_t d3(333);
+const uint32_t d4(444);
+
+WordRefVector
+build(Iterator itr)
+{
+ WordRefVector words;
+ for (; itr.valid(); ++itr) {
+ words.push_back(itr.wordRef());
+ }
+ return words;
+}
+
+vespalib::string
+toStr(Iterator itr)
+{
+ WordRefVector words = build(itr);
+ std::ostringstream oss;
+ oss << "[";
+ bool firstWord = true;
+ for (auto word : words) {
+ if (!firstWord) oss << ",";
+ oss << word.ref();
+ firstWord = false;
+ }
+ oss << "]";
+ return oss.str();
+}
+
+struct SingleFixture
+{
+ CompactDocumentWordsStore _store;
+ SingleFixture() : _store() {
+ _store.insert(Builder(d1).insert(w1).insert(w2).insert(w3));
+ }
+};
+
+struct MultiFixture
+{
+ CompactDocumentWordsStore _store;
+ MultiFixture() : _store() {
+ _store.insert(Builder(d1).insert(w1));
+ _store.insert(Builder(d2).insert(w2));
+ _store.insert(Builder(d3).insert(w3));
+ }
+};
+
+
+TEST_F("require that fields and words can be added for a document", SingleFixture)
+{
+ EXPECT_EQUAL("[1,2,3]", toStr(f._store.get(d1)));
+}
+
+TEST_F("require that multiple documents can be added", MultiFixture)
+{
+ EXPECT_EQUAL("[1]", toStr(f._store.get(d1)));
+ EXPECT_EQUAL("[2]", toStr(f._store.get(d2)));
+ EXPECT_EQUAL("[3]", toStr(f._store.get(d3)));
+ EXPECT_FALSE(f._store.get(d4).valid());
+}
+
+TEST_F("require that documents can be removed", MultiFixture)
+{
+ f._store.remove(d2);
+ EXPECT_TRUE(f._store.get(d1).valid());
+ EXPECT_FALSE(f._store.get(d2).valid());
+ EXPECT_TRUE(f._store.get(d3).valid());
+}
+
+TEST_F("require that documents can be removed and re-inserted", MultiFixture)
+{
+ f._store.remove(d2);
+ f._store.insert(Builder(d2).insert(w4));
+ EXPECT_EQUAL("[4]", toStr(f._store.get(d2)));
+}
+
+TEST("require that a lot of words can be inserted, retrieved and removed")
+{
+ CompactDocumentWordsStore store;
+ for (uint32_t docId = 0; docId < 50; ++docId) {
+ Builder b(docId);
+ for (uint32_t wordRef = 0; wordRef < 20000; ++wordRef) {
+ b.insert(wordRef);
+ }
+ store.insert(b);
+ MemoryUsage usage = store.getMemoryUsage();
+ std::cout << "memory usage (insert): docId=" << docId << ", alloc=" << usage.allocatedBytes() << ", used=" << usage.usedBytes() << std::endl;
+ }
+ for (uint32_t docId = 0; docId < 50; ++docId) {
+ WordRefVector words = build(store.get(docId));
+ EXPECT_EQUAL(20000u, words.size());
+ uint32_t wordRef = 0;
+ for (auto word : words) {
+ EXPECT_EQUAL(wordRef++, word.ref());
+ }
+ store.remove(docId);
+ MemoryUsage usage = store.getMemoryUsage();
+ std::cout << "memory usage (remove): docId=" << docId << ", alloc=" << usage.allocatedBytes() << ", used=" << usage.usedBytes() << std::endl;
+ }
+}
+
+TEST("require that initial memory usage is reported")
+{
+ CompactDocumentWordsStore store;
+ CompactDocumentWordsStore::DocumentWordsMap docs;
+ CompactDocumentWordsStore::Store internalStore;
+ MemoryUsage initExp;
+ initExp.incAllocatedBytes(docs.getMemoryConsumption());
+ initExp.incUsedBytes(docs.getMemoryUsed());
+ initExp.merge(internalStore.getMemoryUsage());
+ MemoryUsage init = store.getMemoryUsage();
+ EXPECT_EQUAL(initExp.allocatedBytes(), init.allocatedBytes());
+ EXPECT_EQUAL(initExp.usedBytes(), init.usedBytes());
+ EXPECT_GREATER(init.allocatedBytes(), init.usedBytes());
+ EXPECT_GREATER(init.allocatedBytes(), 0u);
+ EXPECT_GREATER(init.usedBytes(), 0u);
+}
+
+TEST("require that memory usage is updated after insert")
+{
+ CompactDocumentWordsStore store;
+ MemoryUsage init = store.getMemoryUsage();
+
+ store.insert(Builder(d1).insert(w1));
+ MemoryUsage after = store.getMemoryUsage();
+ EXPECT_GREATER_EQUAL(after.allocatedBytes(), init.allocatedBytes());
+ EXPECT_GREATER(after.usedBytes(), init.usedBytes());
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
+
diff --git a/searchlib/src/tests/memoryindex/datastore/.gitignore b/searchlib/src/tests/memoryindex/datastore/.gitignore
new file mode 100644
index 00000000000..98f4acc70a8
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/datastore/.gitignore
@@ -0,0 +1,8 @@
+.depend
+Makefile
+datastore_test
+featurestore_test
+wordstore_test
+searchlib_datastore_test_app
+searchlib_featurestore_test_app
+searchlib_wordstore_test_app
diff --git a/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt b/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt
new file mode 100644
index 00000000000..da45288fe5e
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_datastore_test_app
+ SOURCES
+ datastore_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_datastore_test_app COMMAND searchlib_datastore_test_app)
+vespa_add_executable(searchlib_featurestore_test_app
+ SOURCES
+ featurestore_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_featurestore_test_app COMMAND searchlib_featurestore_test_app)
+vespa_add_executable(searchlib_wordstore_test_app
+ SOURCES
+ wordstore_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_wordstore_test_app COMMAND searchlib_wordstore_test_app)
diff --git a/searchlib/src/tests/memoryindex/datastore/DESC b/searchlib/src/tests/memoryindex/datastore/DESC
new file mode 100644
index 00000000000..56725396b65
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/datastore/DESC
@@ -0,0 +1 @@
+datastore test. Take a look at datastore_test.cpp and wordstore_test.cpp for details.
diff --git a/searchlib/src/tests/memoryindex/datastore/FILES b/searchlib/src/tests/memoryindex/datastore/FILES
new file mode 100644
index 00000000000..6cbbaf6a328
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/datastore/FILES
@@ -0,0 +1,2 @@
+datastore_test.cpp
+wordstore_test.cpp
diff --git a/searchlib/src/tests/memoryindex/datastore/datastore_test.cpp b/searchlib/src/tests/memoryindex/datastore/datastore_test.cpp
new file mode 100644
index 00000000000..be55dd7ee1e
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/datastore/datastore_test.cpp
@@ -0,0 +1,432 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("datastore_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/btree/datastore.h>
+#include <vespa/searchlib/btree/datastore.hpp>
+
+namespace search {
+namespace btree {
+
+class MyStore : public DataStore<int, EntryRefT<3, 2> > {
+private:
+ typedef DataStore<int, EntryRefT<3, 2> > ParentType;
+ using ParentType::_buffers;
+ using ParentType::_states;
+ using ParentType::_activeBufferIds;
+public:
+ MyStore() {}
+
+ void
+ holdBuffer(uint32_t bufferId)
+ {
+ ParentType::holdBuffer(bufferId);
+ }
+
+ void
+ holdElem(EntryRef ref, uint64_t len)
+ {
+ ParentType::holdElem(ref, len);
+ }
+
+ void
+ transferHoldLists(generation_t generation)
+ {
+ ParentType::transferHoldLists(generation);
+ }
+
+ void trimElemHoldList(generation_t usedGen) {
+ ParentType::trimElemHoldList(usedGen);
+ }
+ void incDead(EntryRef ref, uint64_t dead) {
+ ParentType::incDead(ref, dead);
+ }
+ void ensureBufferCapacity(size_t sizeNeeded) {
+ ParentType::ensureBufferCapacity(0, sizeNeeded);
+ }
+ void enableFreeLists() {
+ ParentType::enableFreeLists();
+ }
+
+ void
+ switchActiveBuffer(void)
+ {
+ ParentType::switchActiveBuffer(0, 0u);
+ }
+ std::vector<void *> & buffers() { return _buffers; }
+ std::vector<BufferState> &statesVec() { return _states; }
+ size_t activeBufferId() const { return _activeBufferIds[0]; }
+};
+
+typedef MyStore::RefType MyRef;
+
+class Test : public vespalib::TestApp {
+private:
+ bool assertMemStats(const DataStoreBase::MemStats & exp,
+ const DataStoreBase::MemStats & act);
+ void requireThatEntryRefIsWorking();
+ void requireThatAlignedEntryRefIsWorking();
+ void requireThatEntriesCanBeAddedAndRetrieved();
+ void requireThatAddEntryTriggersChangeOfBuffer();
+ void requireThatWeCanHoldAndTrimBuffers();
+ void requireThatWeCanHoldAndTrimElements();
+ void requireThatWeCanUseFreeLists();
+ void requireThatMemoryStatsAreCalculated();
+ void requireThatMemoryUsageIsCalculated();
+
+ void
+ requireThatWecanDisableElemHoldList(void);
+public:
+ int Main();
+};
+
+bool
+Test::assertMemStats(const DataStoreBase::MemStats & exp,
+ const DataStoreBase::MemStats & act)
+{
+ if (!EXPECT_EQUAL(exp._allocElems, act._allocElems)) return false;
+ if (!EXPECT_EQUAL(exp._usedElems, act._usedElems)) return false;
+ if (!EXPECT_EQUAL(exp._deadElems, act._deadElems)) return false;
+ if (!EXPECT_EQUAL(exp._holdElems, act._holdElems)) return false;
+ if (!EXPECT_EQUAL(exp._freeBuffers, act._freeBuffers)) return false;
+ if (!EXPECT_EQUAL(exp._activeBuffers, act._activeBuffers)) return false;
+ if (!EXPECT_EQUAL(exp._holdBuffers, act._holdBuffers)) return false;
+ return true;
+}
+
+void
+Test::requireThatEntryRefIsWorking()
+{
+ typedef EntryRefT<22> MyRefType;
+ EXPECT_EQUAL(4194304u, MyRefType::offsetSize());
+ EXPECT_EQUAL(1024u, MyRefType::numBuffers());
+ {
+ MyRefType r(0, 0);
+ EXPECT_EQUAL(0u, r.offset());
+ EXPECT_EQUAL(0u, r.bufferId());
+ }
+ {
+ MyRefType r(237, 13);
+ EXPECT_EQUAL(237u, r.offset());
+ EXPECT_EQUAL(13u, r.bufferId());
+ }
+ {
+ MyRefType r(4194303, 1023);
+ EXPECT_EQUAL(4194303u, r.offset());
+ EXPECT_EQUAL(1023u, r.bufferId());
+ }
+ {
+ MyRefType r1(6498, 76);
+ MyRefType r2(r1);
+ EXPECT_EQUAL(r1.offset(), r2.offset());
+ EXPECT_EQUAL(r1.bufferId(), r2.bufferId());
+ }
+}
+
+void
+Test::requireThatAlignedEntryRefIsWorking()
+{
+ typedef AlignedEntryRefT<22, 2> MyRefType; // 4 byte alignement
+ EXPECT_EQUAL(4 * 4194304u, MyRefType::offsetSize());
+ EXPECT_EQUAL(1024u, MyRefType::numBuffers());
+ EXPECT_EQUAL(0u, MyRefType::align(0));
+ EXPECT_EQUAL(4u, MyRefType::align(1));
+ EXPECT_EQUAL(4u, MyRefType::align(2));
+ EXPECT_EQUAL(4u, MyRefType::align(3));
+ EXPECT_EQUAL(4u, MyRefType::align(4));
+ EXPECT_EQUAL(8u, MyRefType::align(5));
+ {
+ MyRefType r(0, 0);
+ EXPECT_EQUAL(0u, r.offset());
+ EXPECT_EQUAL(0u, r.bufferId());
+ }
+ {
+ MyRefType r(237, 13);
+ EXPECT_EQUAL(MyRefType::align(237), r.offset());
+ EXPECT_EQUAL(13u, r.bufferId());
+ }
+ {
+ MyRefType r(MyRefType::offsetSize() - 4, 1023);
+ EXPECT_EQUAL(MyRefType::align(MyRefType::offsetSize() - 4), r.offset());
+ EXPECT_EQUAL(1023u, r.bufferId());
+ }
+}
+
+void
+Test::requireThatEntriesCanBeAddedAndRetrieved()
+{
+ typedef DataStore<int> IntStore;
+ IntStore ds;
+ EntryRef r1 = ds.addEntry(10);
+ EntryRef r2 = ds.addEntry(20);
+ EntryRef r3 = ds.addEntry(30);
+ EXPECT_EQUAL(1u, IntStore::RefType(r1).offset());
+ EXPECT_EQUAL(2u, IntStore::RefType(r2).offset());
+ EXPECT_EQUAL(3u, IntStore::RefType(r3).offset());
+ EXPECT_EQUAL(0u, IntStore::RefType(r1).bufferId());
+ EXPECT_EQUAL(0u, IntStore::RefType(r2).bufferId());
+ EXPECT_EQUAL(0u, IntStore::RefType(r3).bufferId());
+ EXPECT_EQUAL(10, ds.getEntry(r1));
+ EXPECT_EQUAL(20, ds.getEntry(r2));
+ EXPECT_EQUAL(30, ds.getEntry(r3));
+}
+
+void
+Test::requireThatAddEntryTriggersChangeOfBuffer()
+{
+ typedef DataStore<uint64_t, EntryRefT<10, 10> > Store;
+ Store s;
+ uint64_t num = 0;
+ uint32_t lastId = 0;
+ uint64_t lastNum = 0;
+ for (;;++num) {
+ EntryRef r = s.addEntry(num);
+ EXPECT_EQUAL(num, s.getEntry(r));
+ uint32_t bufferId = Store::RefType(r).bufferId();
+ if (bufferId > lastId) {
+ LOG(info, "Changed to bufferId %u after %" PRIu64 " nums", bufferId, num);
+ EXPECT_EQUAL(Store::RefType::offsetSize() - (lastId == 0),
+ num - lastNum);
+ lastId = bufferId;
+ lastNum = num;
+ }
+ if (bufferId == 2) {
+ break;
+ }
+ }
+ EXPECT_EQUAL(Store::RefType::offsetSize() * 2 - 1, num);
+ LOG(info, "Added %" PRIu64 " nums in 2 buffers", num);
+}
+
+void
+Test::requireThatWeCanHoldAndTrimBuffers()
+{
+ MyStore s;
+ EXPECT_EQUAL(0u, MyRef(s.addEntry(1)).bufferId());
+ s.switchActiveBuffer();
+ EXPECT_EQUAL(1u, s.activeBufferId());
+ s.holdBuffer(0); // hold last buffer
+ s.transferHoldLists(10);
+
+ EXPECT_EQUAL(1u, MyRef(s.addEntry(2)).bufferId());
+ s.switchActiveBuffer();
+ EXPECT_EQUAL(2u, s.activeBufferId());
+ s.holdBuffer(1); // hold last buffer
+ s.transferHoldLists(20);
+
+ EXPECT_EQUAL(2u, MyRef(s.addEntry(3)).bufferId());
+ s.switchActiveBuffer();
+ EXPECT_EQUAL(3u, s.activeBufferId());
+ s.holdBuffer(2); // hold last buffer
+ s.transferHoldLists(30);
+
+ EXPECT_EQUAL(3u, MyRef(s.addEntry(4)).bufferId());
+ s.holdBuffer(3); // hold current buffer
+ s.transferHoldLists(40);
+
+ EXPECT_TRUE(s.statesVec()[0].size() != 0);
+ EXPECT_TRUE(s.statesVec()[1].size() != 0);
+ EXPECT_TRUE(s.statesVec()[2].size() != 0);
+ EXPECT_TRUE(s.statesVec()[3].size() != 0);
+ s.trimHoldLists(11);
+ EXPECT_TRUE(s.statesVec()[0].size() == 0);
+ EXPECT_TRUE(s.statesVec()[1].size() != 0);
+ EXPECT_TRUE(s.statesVec()[2].size() != 0);
+ EXPECT_TRUE(s.statesVec()[3].size() != 0);
+
+ s.switchActiveBuffer();
+ EXPECT_EQUAL(0u, s.activeBufferId());
+ EXPECT_EQUAL(0u, MyRef(s.addEntry(5)).bufferId());
+ s.trimHoldLists(41);
+ EXPECT_TRUE(s.statesVec()[0].size() != 0);
+ EXPECT_TRUE(s.statesVec()[1].size() == 0);
+ EXPECT_TRUE(s.statesVec()[2].size() == 0);
+ EXPECT_TRUE(s.statesVec()[3].size() == 0);
+}
+
+void
+Test::requireThatWeCanHoldAndTrimElements()
+{
+ MyStore s;
+ MyRef r1 = s.addEntry(1);
+ s.holdElem(r1, 1);
+ s.transferHoldLists(10);
+ MyRef r2 = s.addEntry(2);
+ s.holdElem(r2, 1);
+ s.transferHoldLists(20);
+ MyRef r3 = s.addEntry(3);
+ s.holdElem(r3, 1);
+ s.transferHoldLists(30);
+ EXPECT_EQUAL(1, s.getEntry(r1));
+ EXPECT_EQUAL(2, s.getEntry(r2));
+ EXPECT_EQUAL(3, s.getEntry(r3));
+ s.trimElemHoldList(11);
+ EXPECT_EQUAL(0, s.getEntry(r1));
+ EXPECT_EQUAL(2, s.getEntry(r2));
+ EXPECT_EQUAL(3, s.getEntry(r3));
+ s.trimElemHoldList(31);
+ EXPECT_EQUAL(0, s.getEntry(r1));
+ EXPECT_EQUAL(0, s.getEntry(r2));
+ EXPECT_EQUAL(0, s.getEntry(r3));
+}
+
+void
+Test::requireThatWeCanUseFreeLists()
+{
+ MyStore s;
+ s.enableFreeLists();
+ MyRef r1 = s.addEntry2(1);
+ s.holdElem(r1, 1);
+ s.transferHoldLists(10);
+ MyRef r2 = s.addEntry2(2);
+ s.holdElem(r2, 1);
+ s.transferHoldLists(20);
+ s.trimElemHoldList(11);
+ MyRef r3 = s.addEntry2(3); // reuse r1
+ EXPECT_EQUAL(r1.offset(), r3.offset());
+ EXPECT_EQUAL(r1.bufferId(), r3.bufferId());
+ MyRef r4 = s.addEntry2(4);
+ EXPECT_EQUAL(r2.offset() + 1, r4.offset());
+ s.trimElemHoldList(21);
+ MyRef r5 = s.addEntry2(5); // reuse r2
+ EXPECT_EQUAL(r2.offset(), r5.offset());
+ EXPECT_EQUAL(r2.bufferId(), r5.bufferId());
+ MyRef r6 = s.addEntry2(6);
+ EXPECT_EQUAL(r4.offset() + 1, r6.offset());
+ EXPECT_EQUAL(3, s.getEntry(r1));
+ EXPECT_EQUAL(5, s.getEntry(r2));
+ EXPECT_EQUAL(3, s.getEntry(r3));
+ EXPECT_EQUAL(4, s.getEntry(r4));
+ EXPECT_EQUAL(5, s.getEntry(r5));
+ EXPECT_EQUAL(6, s.getEntry(r6));
+}
+
+void
+Test::requireThatMemoryStatsAreCalculated()
+{
+ MyStore s;
+ DataStoreBase::MemStats m;
+ m._allocElems = MyRef::offsetSize();
+ m._usedElems = 1; // ref = 0 is reserved
+ m._deadElems = 1; // ref = 0 is reserved
+ m._holdElems = 0;
+ m._activeBuffers = 1;
+ m._freeBuffers = MyRef::numBuffers() - 1;
+ m._holdBuffers = 0;
+ EXPECT_TRUE(assertMemStats(m, s.getMemStats()));
+
+ // add entry
+ MyRef r = s.addEntry(10);
+ m._usedElems++;
+ EXPECT_TRUE(assertMemStats(m, s.getMemStats()));
+
+ // inc dead
+ s.incDead(r, 1);
+ m._deadElems++;
+ EXPECT_TRUE(assertMemStats(m, s.getMemStats()));
+
+ // hold buffer
+ s.addEntry(20);
+ s.addEntry(30);
+ s.holdBuffer(r.bufferId());
+ s.transferHoldLists(100);
+ m._usedElems += 2;
+ m._holdElems += 2; // used - dead
+ m._activeBuffers--;
+ m._holdBuffers++;
+ EXPECT_TRUE(assertMemStats(m, s.getMemStats()));
+
+ // new active buffer
+ s.switchActiveBuffer();
+ s.addEntry(40);
+ m._allocElems += MyRef::offsetSize();
+ m._usedElems++;
+ m._activeBuffers++;
+ m._freeBuffers--;
+
+ // trim hold buffer
+ s.trimHoldLists(101);
+ m._allocElems -= MyRef::offsetSize();
+ m._usedElems = 1;
+ m._deadElems = 0;
+ m._holdElems = 0;
+ m._freeBuffers = MyRef::numBuffers() - 1;
+ m._holdBuffers = 0;
+ EXPECT_TRUE(assertMemStats(m, s.getMemStats()));
+}
+
+void
+Test::requireThatMemoryUsageIsCalculated()
+{
+ MyStore s;
+ MyRef r = s.addEntry(10);
+ s.addEntry(20);
+ s.addEntry(30);
+ s.addEntry(40);
+ s.incDead(r, 1);
+ s.holdBuffer(r.bufferId());
+ s.transferHoldLists(100);
+ MemoryUsage m = s.getMemoryUsage();
+ EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes());
+ EXPECT_EQUAL(5 * sizeof(int), m.usedBytes());
+ EXPECT_EQUAL(2 * sizeof(int), m.deadBytes());
+ EXPECT_EQUAL(3 * sizeof(int), m.allocatedBytesOnHold());
+ s.trimHoldLists(101);
+}
+
+
+void
+Test::requireThatWecanDisableElemHoldList(void)
+{
+ MyStore s;
+ MyRef r1 = s.addEntry(10);
+ MyRef r2 = s.addEntry(20);
+ MyRef r3 = s.addEntry(30);
+ (void) r3;
+ MemoryUsage m = s.getMemoryUsage();
+ EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes());
+ EXPECT_EQUAL(4 * sizeof(int), m.usedBytes());
+ EXPECT_EQUAL(1 * sizeof(int), m.deadBytes());
+ EXPECT_EQUAL(0 * sizeof(int), m.allocatedBytesOnHold());
+ s.holdElem(r1, 1);
+ m = s.getMemoryUsage();
+ EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes());
+ EXPECT_EQUAL(4 * sizeof(int), m.usedBytes());
+ EXPECT_EQUAL(1 * sizeof(int), m.deadBytes());
+ EXPECT_EQUAL(1 * sizeof(int), m.allocatedBytesOnHold());
+ s.disableElemHoldList();
+ s.holdElem(r2, 1);
+ m = s.getMemoryUsage();
+ EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes());
+ EXPECT_EQUAL(4 * sizeof(int), m.usedBytes());
+ EXPECT_EQUAL(2 * sizeof(int), m.deadBytes());
+ EXPECT_EQUAL(1 * sizeof(int), m.allocatedBytesOnHold());
+ s.transferHoldLists(100);
+ s.trimHoldLists(101);
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("datastore_test");
+
+ requireThatEntryRefIsWorking();
+ requireThatAlignedEntryRefIsWorking();
+ requireThatEntriesCanBeAddedAndRetrieved();
+ requireThatAddEntryTriggersChangeOfBuffer();
+ requireThatWeCanHoldAndTrimBuffers();
+ requireThatWeCanHoldAndTrimElements();
+ requireThatWeCanUseFreeLists();
+ requireThatMemoryStatsAreCalculated();
+ requireThatMemoryUsageIsCalculated();
+ requireThatWecanDisableElemHoldList();
+
+ TEST_DONE();
+}
+
+}
+}
+
+TEST_APPHOOK(search::btree::Test);
+
diff --git a/searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp b/searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp
new file mode 100644
index 00000000000..87d32c90b78
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp
@@ -0,0 +1,245 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("featurestore_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/memoryindex/featurestore.h>
+
+using namespace search::btree;
+using namespace search::index;
+
+namespace search
+{
+
+
+namespace memoryindex
+{
+
+
+class Test : public vespalib::TestApp
+{
+private:
+ Schema _schema;
+
+ const Schema &
+ getSchema(void) const
+ {
+ return _schema;
+ }
+
+ bool
+ assertFeatures(const DocIdAndFeatures &exp,
+ const DocIdAndFeatures &act);
+
+ void
+ requireThatFeaturesCanBeAddedAndRetrieved(void);
+
+ void
+ requireThatNextWordsAreWorking(void);
+ void
+ requireThatAddFeaturesTriggersChangeOfBuffer(void);
+
+public:
+ Test(void);
+
+ int
+ Main(void);
+};
+
+
+bool
+Test::assertFeatures(const DocIdAndFeatures &exp,
+ const DocIdAndFeatures &act)
+{
+ // docid is not encoded as part of features
+ if (!EXPECT_EQUAL(exp._elements.size(),
+ act._elements.size()))
+ return false;
+ for (size_t i = 0; i < exp._elements.size(); ++i) {
+ if (!EXPECT_EQUAL(exp._elements[i]._elementId,
+ act._elements[i]._elementId))
+ return false;
+ if (!EXPECT_EQUAL(exp._elements[i]._numOccs,
+ act._elements[i]._numOccs))
+ return false;
+ if (!EXPECT_EQUAL(exp._elements[i]._weight, act._elements[i]._weight))
+ return false;
+ if (!EXPECT_EQUAL(exp._elements[i]._elementLen,
+ act._elements[i]._elementLen))
+ return false;
+ }
+ if (!EXPECT_EQUAL(exp._wordPositions.size(), act._wordPositions.size()))
+ return false;
+ for (size_t i = 0; i < exp._wordPositions.size(); ++i) {
+ if (!EXPECT_EQUAL(exp._wordPositions[i]._wordPos,
+ act._wordPositions[i]._wordPos)) return false;
+ }
+ return true;
+}
+
+
+DocIdAndFeatures
+getFeatures(uint32_t numOccs,
+ int32_t weight,
+ uint32_t elemLen)
+{
+ DocIdAndFeatures f;
+ f._docId = 0;
+ f._elements.push_back(WordDocElementFeatures(0));
+ f._elements.back().setNumOccs(numOccs);
+ f._elements.back().setWeight(weight);
+ f._elements.back().setElementLen(elemLen);
+ for (uint32_t i = 0; i < numOccs; ++i) {
+ f._wordPositions.push_back(WordDocElementWordPosFeatures(i));
+ }
+ return f;
+}
+
+
+void
+Test::requireThatFeaturesCanBeAddedAndRetrieved(void)
+{
+ FeatureStore fs(getSchema());
+ DocIdAndFeatures act;
+ EntryRef r1;
+ EntryRef r2;
+ std::pair<EntryRef, uint64_t> r;
+ {
+ DocIdAndFeatures f = getFeatures(2, 4, 8);
+ r = fs.addFeatures(0, f);
+ r1 = r.first;
+ EXPECT_TRUE(r.second > 0);
+ EXPECT_EQUAL(FeatureStore::RefType::align(1u),
+ FeatureStore::RefType(r1).offset());
+ EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId());
+ LOG(info,
+ "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)",
+ r.second,
+ FeatureStore::RefType(r1).offset(),
+ FeatureStore::RefType(r1).bufferId());
+ fs.getFeatures(0, r1, act);
+ // weight not encoded for single value
+ EXPECT_TRUE(assertFeatures(getFeatures(2, 1, 8), act));
+ }
+ {
+ DocIdAndFeatures f = getFeatures(4, 8, 16);
+ r = fs.addFeatures(1, f);
+ r2 = r.first;
+ EXPECT_TRUE(r.second > 0);
+ EXPECT_TRUE(FeatureStore::RefType(r2).offset() >
+ FeatureStore::RefType(r1).offset());
+ EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId());
+ LOG(info,
+ "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)",
+ r.second,
+ FeatureStore::RefType(r2).offset(),
+ FeatureStore::RefType(r2).bufferId());
+ fs.getFeatures(1, r2, act);
+ EXPECT_TRUE(assertFeatures(f, act));
+ }
+}
+
+
+void
+Test::requireThatNextWordsAreWorking(void)
+{
+ FeatureStore fs(getSchema());
+ DocIdAndFeatures act;
+ EntryRef r1;
+ EntryRef r2;
+ std::pair<EntryRef, uint64_t> r;
+ {
+ DocIdAndFeatures f = getFeatures(2, 4, 8);
+ r = fs.addFeatures(0, f);
+ r1 = r.first;
+ EXPECT_TRUE(r.second > 0);
+ EXPECT_EQUAL(FeatureStore::RefType::align(1u),
+ FeatureStore::RefType(r1).offset());
+ EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId());
+ LOG(info,
+ "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)",
+ r.second,
+ FeatureStore::RefType(r1).offset(),
+ FeatureStore::RefType(r1).bufferId());
+ fs.getFeatures(0, r1, act);
+ // weight not encoded for single value
+ EXPECT_TRUE(assertFeatures(getFeatures(2, 1, 8), act));
+ }
+ {
+ DocIdAndFeatures f = getFeatures(4, 8, 16);
+ r = fs.addFeatures(1, f);
+ r2 = r.first;
+ EXPECT_TRUE(r.second > 0);
+ EXPECT_TRUE(FeatureStore::RefType(r2).offset() >
+ FeatureStore::RefType(r1).offset());
+ EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId());
+ LOG(info,
+ "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)",
+ r.second,
+ FeatureStore::RefType(r2).offset(),
+ FeatureStore::RefType(r2).bufferId());
+ fs.getFeatures(1, r2, act);
+ EXPECT_TRUE(assertFeatures(f, act));
+ }
+}
+
+
+void
+Test::requireThatAddFeaturesTriggersChangeOfBuffer(void)
+{
+ FeatureStore fs(getSchema());
+ size_t cnt = 1;
+ DocIdAndFeatures act;
+ uint32_t lastId = 0;
+ for (;;++cnt) {
+ uint32_t numOccs = (cnt % 100) + 1;
+ DocIdAndFeatures f = getFeatures(numOccs, 1, numOccs + 1);
+ std::pair<EntryRef, uint64_t> r = fs.addFeatures(0, f);
+ fs.getFeatures(0, r.first, act);
+ EXPECT_TRUE(assertFeatures(f, act));
+ uint32_t bufferId = FeatureStore::RefType(r.first).bufferId();
+ if (bufferId > lastId) {
+ LOG(info,
+ "Changed to bufferId %u after %zu feature sets",
+ bufferId, cnt);
+ lastId = bufferId;
+ }
+ if (bufferId == 1) {
+ break;
+ }
+ }
+ EXPECT_EQUAL(1u, lastId);
+ LOG(info, "Added %zu feature sets in 1 buffer", cnt);
+}
+
+
+Test::Test()
+ : _schema()
+{
+ _schema.addIndexField(Schema::IndexField("f0", Schema::STRING));
+ _schema.addIndexField(Schema::IndexField("f1",
+ Schema::STRING,
+ Schema::WEIGHTEDSET));
+}
+
+
+int
+Test::Main()
+{
+ TEST_INIT("featurestore_test");
+
+ requireThatFeaturesCanBeAddedAndRetrieved();
+ requireThatNextWordsAreWorking();
+ requireThatAddFeaturesTriggersChangeOfBuffer();
+
+ TEST_DONE();
+}
+
+
+}
+
+
+}
+
+
+TEST_APPHOOK(search::memoryindex::Test);
diff --git a/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp b/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp
new file mode 100644
index 00000000000..825992b3b4f
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp
@@ -0,0 +1,104 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("wordstore_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/memoryindex/wordstore.h>
+
+using namespace search::btree;
+
+namespace search {
+namespace memoryindex {
+
+class Test : public vespalib::TestApp {
+private:
+ void requireThatWordsCanBeAddedAndRetrieved();
+ void requireThatAddWordTriggersChangeOfBuffer();
+public:
+ int Main();
+};
+
+void
+Test::requireThatWordsCanBeAddedAndRetrieved()
+{
+ std::string w1 = "require";
+ std::string w2 = "that";
+ std::string w3 = "words";
+ WordStore ws;
+ EntryRef r1 = ws.addWord(w1);
+ EntryRef r2 = ws.addWord(w2);
+ EntryRef r3 = ws.addWord(w3);
+ uint32_t invp = WordStore::RefType::align(1); // Reserved as invalid
+ uint32_t w1s = w1.size() + 1;
+ uint32_t w1p = WordStore::RefType::pad(w1s);
+ uint32_t w2s = w2.size() + 1;
+ uint32_t w2p = WordStore::RefType::pad(w2s);
+ EXPECT_EQUAL(invp, WordStore::RefType(r1).offset());
+ EXPECT_EQUAL(invp + w1s + w1p, WordStore::RefType(r2).offset());
+ EXPECT_EQUAL(invp + w1s + w1p + w2s + w2p, WordStore::RefType(r3).offset());
+ EXPECT_EQUAL(0u, WordStore::RefType(r1).bufferId());
+ EXPECT_EQUAL(0u, WordStore::RefType(r2).bufferId());
+ EXPECT_EQUAL(0u, WordStore::RefType(r3).bufferId());
+ EXPECT_EQUAL(std::string("require"), ws.getWord(r1));
+ EXPECT_EQUAL(std::string("that"), ws.getWord(r2));
+ EXPECT_EQUAL(std::string("words"), ws.getWord(r3));
+}
+
+void
+Test::requireThatAddWordTriggersChangeOfBuffer()
+{
+ WordStore ws;
+ size_t word = 0;
+ uint32_t lastId = 0;
+ size_t lastWord = 0;
+ char wordStr[10];
+ size_t entrySize = WordStore::RefType::align(6 + 1);
+ size_t initBufferSpace = 1024u * WordStore::RefType::align(1);
+ size_t bufferSpace = initBufferSpace;
+ size_t bufferWords = (bufferSpace - WordStore::RefType::align(1)) /
+ entrySize;
+ size_t usedSpace = 0;
+ size_t sumBufferWords = 0;
+ for (;;++word) {
+ sprintf(wordStr, "%6zu", word);
+ // all words uses 12 bytes (include padding)
+ EntryRef r = ws.addWord(std::string(wordStr));
+ EXPECT_EQUAL(std::string(wordStr), ws.getWord(r));
+ uint32_t bufferId = WordStore::RefType(r).bufferId();
+ if (bufferId > lastId) {
+ LOG(info,
+ "Changed to bufferId %u after %zu words",
+ bufferId, word);
+ EXPECT_EQUAL(bufferWords, word - lastWord);
+ lastId = bufferId;
+ lastWord = word;
+ usedSpace += bufferWords * entrySize;
+ sumBufferWords += bufferWords;
+ bufferSpace = usedSpace + initBufferSpace;
+ bufferWords = bufferSpace / entrySize;
+ }
+ if (bufferId == 4) {
+ break;
+ }
+ }
+ // each buffer can have offsetSize / 12 words
+ EXPECT_EQUAL(sumBufferWords, word);
+ LOG(info, "Added %zu words in 4 buffers", word);
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("wordstore_test");
+
+ requireThatWordsCanBeAddedAndRetrieved();
+ requireThatAddWordTriggersChangeOfBuffer();
+
+ TEST_DONE();
+}
+
+}
+}
+
+TEST_APPHOOK(search::memoryindex::Test);
+
diff --git a/searchlib/src/tests/memoryindex/dictionary/.gitignore b/searchlib/src/tests/memoryindex/dictionary/.gitignore
new file mode 100644
index 00000000000..d404d7d7063
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/dictionary/.gitignore
@@ -0,0 +1,6 @@
+.depend
+Makefile
+dictionary_test
+dump
+/urldump
+searchlib_dictionary_test_app
diff --git a/searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt b/searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt
new file mode 100644
index 00000000000..9520b37d267
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_dictionary_test_app
+ SOURCES
+ dictionary_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_dictionary_test_app COMMAND searchlib_dictionary_test_app)
diff --git a/searchlib/src/tests/memoryindex/dictionary/DESC b/searchlib/src/tests/memoryindex/dictionary/DESC
new file mode 100644
index 00000000000..ff559f42641
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/dictionary/DESC
@@ -0,0 +1 @@
+dictionary test. Take a look at dictionary_test.cpp for details.
diff --git a/searchlib/src/tests/memoryindex/dictionary/FILES b/searchlib/src/tests/memoryindex/dictionary/FILES
new file mode 100644
index 00000000000..1f3a8ebef87
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/dictionary/FILES
@@ -0,0 +1 @@
+dictionary_test.cpp
diff --git a/searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp b/searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp
new file mode 100644
index 00000000000..ef8383b23c7
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp
@@ -0,0 +1,1528 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/* -*- mode: C++; coding: utf-8; -*- */
+
+/* $Id$
+ *
+ * Copyright (C) 2011 Yahoo! Technologies Norway AS
+ *
+ * All Rights Reserved
+ *
+ */
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchlib/diskindex/checkpointfile.h>
+#include <vespa/searchlib/diskindex/fusion.h>
+#include <vespa/searchlib/diskindex/indexbuilder.h>
+#include <vespa/searchlib/diskindex/zcposoccrandread.h>
+#include <vespa/searchlib/fef/fieldpositionsiterator.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/index/indexbuilder.h>
+#include <vespa/searchlib/index/schemautil.h>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodestore.hpp>
+#include <vespa/searchlib/memoryindex/dictionary.h>
+#include <vespa/searchlib/memoryindex/documentinverter.h>
+#include <vespa/searchlib/memoryindex/fieldinverter.h>
+#include <vespa/searchlib/memoryindex/document_remover.h>
+#include <vespa/searchlib/memoryindex/featurestore.h>
+#include <vespa/searchlib/memoryindex/postingiterator.h>
+#include <vespa/searchlib/memoryindex/ordereddocumentinserter.h>
+#include <vespa/searchlib/common/sequencedtaskexecutor.h>
+#include <vespa/searchlib/test/initrange.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+LOG_SETUP("dictionary_test");
+
+namespace search
+{
+
+using namespace btree;
+using namespace fef;
+using namespace index;
+using queryeval::SearchIterator;
+using document::Document;
+using diskindex::CheckPointFile;
+using vespalib::GenerationHandler;
+using test::InitRangeVerifier;
+
+namespace memoryindex
+{
+
+typedef Dictionary::PostingList PostingList;
+typedef PostingList::Iterator PostingItr;
+typedef PostingList::ConstIterator PostingConstItr;
+
+class MyBuilder : public IndexBuilder {
+private:
+ std::stringstream _ss;
+ bool _insideWord;
+ bool _insideField;
+ bool _insideDoc;
+ bool _insideElem;
+ bool _firstWord;
+ bool _firstField;
+ bool _firstDoc;
+ bool _firstElem;
+ bool _firstPos;
+public:
+
+ MyBuilder(const Schema &schema)
+ : IndexBuilder(schema),
+ _ss(),
+ _insideWord(false),
+ _insideField(false),
+ _insideDoc(false),
+ _insideElem(false),
+ _firstWord(true),
+ _firstField(true),
+ _firstDoc(true),
+ _firstElem(true),
+ _firstPos(true)
+ {
+ }
+
+ virtual void
+ startWord(const vespalib::stringref &word)
+ {
+ assert(_insideField);
+ assert(!_insideWord);
+ if (!_firstWord)
+ _ss << ",";
+ _ss << "w=" << word << "[";
+ _firstDoc = true;
+ _insideWord = true;
+ }
+
+ virtual void
+ endWord(void)
+ {
+ assert(_insideWord);
+ assert(!_insideDoc);
+ _ss << "]";
+ _firstWord = false;
+ _insideWord = false;
+ }
+
+ virtual void
+ startField(uint32_t fieldId)
+ {
+ assert(!_insideField);
+ if (!_firstField) _ss << ",";
+ _ss << "f=" << fieldId << "[";
+ _firstWord = true;
+ _insideField = true;
+ }
+
+ virtual void
+ endField()
+ {
+ assert(_insideField);
+ assert(!_insideWord);
+ _ss << "]";
+ _firstField = false;
+ _insideField = false;
+ }
+
+ virtual void
+ startDocument(uint32_t docId)
+ {
+ assert(_insideWord);
+ assert(!_insideDoc);
+ if (!_firstDoc) _ss << ",";
+ _ss << "d=" << docId << "[";
+ _firstElem = true;
+ _insideDoc = true;
+ }
+
+ virtual void
+ endDocument(void)
+ {
+ assert(_insideDoc);
+ assert(!_insideElem);
+ _ss << "]";
+ _firstDoc = false;
+ _insideDoc = false;
+ }
+
+ virtual void
+ startElement(uint32_t elementId,
+ int32_t weight,
+ uint32_t elementLen)
+ {
+ assert(_insideDoc);
+ assert(!_insideElem);
+ if (!_firstElem)
+ _ss << ",";
+ _ss << "e=" << elementId <<
+ ",w=" << weight << ",l=" << elementLen << "[";
+ _firstPos = true;
+ _insideElem = true;
+ }
+
+ virtual void
+ endElement(void)
+ {
+ assert(_insideElem);
+ _ss << "]";
+ _firstElem = false;
+ _insideElem = false;
+ }
+
+ virtual void
+ addOcc(const WordDocElementWordPosFeatures &features)
+ {
+ assert(_insideElem);
+ if (!_firstPos) _ss << ",";
+ _ss << features.getWordPos();
+ _firstPos = false;
+ }
+
+ std::string
+ toStr(void) const
+ {
+ return _ss.str();
+ }
+};
+
+std::string
+toString(FieldPositionsIterator posItr,
+ bool hasElements = false,
+ bool hasWeights = false)
+{
+ std::stringstream ss;
+ ss << "{";
+ ss << posItr.getFieldLength() << ":";
+ bool first = true;
+ for (; posItr.valid(); posItr.next()) {
+ if (!first) ss << ",";
+ ss << posItr.getPosition();
+ first = false;
+ if (hasElements) {
+ ss << "[e=" << posItr.getElementId();
+ if (hasWeights)
+ ss << ",w=" << posItr.getElementWeight();
+ ss << ",l=" << posItr.getElementLen() << "]";
+ }
+ }
+ ss << "}";
+ return ss.str();
+}
+
+bool
+assertPostingList(const std::string &exp,
+ PostingConstItr itr,
+ const FeatureStore *store = NULL)
+{
+ std::stringstream ss;
+ FeatureStore::DecodeContextCooked decoder(NULL);
+ TermFieldMatchData tfmd;
+ TermFieldMatchDataArray matchData;
+ matchData.add(&tfmd);
+ ss << "[";
+ for (size_t i = 0; itr.valid(); ++itr, ++i) {
+ if (i > 0) ss << ",";
+ uint32_t docId = itr.getKey();
+ ss << docId;
+ if (store != NULL) { // consider features as well
+ EntryRef ref(itr.getData());
+ store->setupForField(0, decoder);
+ store->setupForUnpackFeatures(ref, decoder);
+ decoder.unpackFeatures(matchData, docId);
+ ss << toString(tfmd.getIterator());
+ }
+ }
+ ss << "]";
+ return EXPECT_EQUAL(exp, ss.str());
+}
+
+bool
+assertPostingList(std::vector<uint32_t> &exp, PostingConstItr itr)
+{
+ std::stringstream ss;
+ ss << "[";
+ for (size_t i = 0; i < exp.size(); ++i) {
+ if (i > 0) ss << ",";
+ ss << exp[i];
+ }
+ ss << "]";
+ return assertPostingList(ss.str(), itr);
+}
+
+
+namespace
+{
+
+/**
+ * MockDictionary is a simple mockup of memory index, used to verify
+ * that we get correct posting lists from real memory index.
+ */
+class MockDictionary
+{
+ std::map<std::pair<vespalib::string, uint32_t>, std::set<uint32_t>> _dict;
+ vespalib::string _word;
+ uint32_t _fieldId;
+
+public:
+ void
+ setNextWord(const vespalib::string &word)
+ {
+ _word = word;
+ }
+
+ void
+ setNextField(uint32_t fieldId)
+ {
+ _fieldId = fieldId;
+ }
+
+ void
+ add(uint32_t docId)
+ {
+ _dict[std::make_pair(_word, _fieldId)].insert(docId);
+ }
+
+ void
+ remove(uint32_t docId)
+ {
+ _dict[std::make_pair(_word, _fieldId)].erase(docId);
+ }
+
+ std::vector<uint32_t>
+ find(const vespalib::string &word, uint32_t fieldId)
+ {
+ std::vector<uint32_t> res;
+ for (auto docId : _dict[std::make_pair(word, fieldId)] ) {
+ res.push_back(docId);
+ }
+ return res;
+ }
+
+ auto begin()
+ {
+ return _dict.begin();
+ }
+
+ auto end()
+ {
+ return _dict.end();
+ }
+};
+
+
+/**
+ * MockWordStoreScan is a helper class to ensure that previous word is
+ * still stored safely in memory, to satisfy OrderedDocumentInserter
+ * needs.
+ */
+class MockWordStoreScan
+{
+ vespalib::string _word0;
+ vespalib::string _word1;
+ vespalib::string *_prevWord;
+ vespalib::string *_word;
+
+public:
+ MockWordStoreScan()
+ : _word0(),
+ _word1(),
+ _prevWord(&_word0),
+ _word(&_word1)
+ {
+ }
+
+ const vespalib::string &
+ getWord() const
+ {
+ return *_word;
+ }
+
+ const vespalib::string &
+ setWord(const vespalib::string &word)
+ {
+ std::swap(_prevWord, _word);
+ *_word = word;
+ return *_word;
+ }
+};
+
+/**
+ * MyInserter performs insertions on both a mockup version of memory index
+ * and a real memory index. Mockup version is used to calculate expected
+ * answers.
+ */
+class MyInserter
+{
+ MockWordStoreScan _wordStoreScan;
+ MockDictionary _mock;
+ Dictionary _d;
+ DocIdAndPosOccFeatures _features;
+ IOrderedDocumentInserter *_documentInserter;
+
+public:
+ MyInserter(const Schema &schema)
+ : _wordStoreScan(),
+ _mock(),
+ _d(schema),
+ _features(),
+ _documentInserter(nullptr)
+ {
+ _features.addNextOcc(0, 0, 1, 1);
+ }
+
+ void
+ setNextWord(const vespalib::string &word)
+ {
+ const vespalib::string &w = _wordStoreScan.setWord(word);
+ _documentInserter->setNextWord(w);
+ _mock.setNextWord(w);
+ }
+
+ void
+ setNextField(uint32_t fieldId)
+ {
+ if (_documentInserter != nullptr) {
+ _documentInserter->flush();
+ }
+ _documentInserter = &_d.getFieldIndex(fieldId)->getInserter();
+ _documentInserter->rewind();
+ _mock.setNextField(fieldId);
+ }
+
+ void
+ add(uint32_t docId)
+ {
+ _documentInserter->add(docId, _features);
+ _mock.add(docId);
+ }
+
+ void
+ remove(uint32_t docId)
+ {
+ _documentInserter->remove(docId);
+ _mock.remove(docId);
+ }
+
+ bool
+ assertPosting(const vespalib::string &word,
+ uint32_t fieldId)
+ {
+ std::vector<uint32_t> exp = _mock.find(word, fieldId);
+ PostingConstItr itr = _d.find(word, fieldId);
+ return EXPECT_TRUE(assertPostingList(exp, itr));
+ }
+
+ bool
+ assertPostings()
+ {
+ if (_documentInserter != nullptr) {
+ _documentInserter->flush();
+ }
+ for (auto wfp : _mock) {
+ auto &wf = wfp.first;
+ auto &word = wf.first;
+ auto fieldId = wf.second;
+ if (!EXPECT_TRUE(assertPosting(word, fieldId))) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ void
+ rewind()
+ {
+ if (_documentInserter != nullptr) {
+ _documentInserter->flush();
+ _documentInserter = nullptr;
+ }
+ }
+
+ uint32_t
+ getNumUniqueWords()
+ {
+ return _d.getNumUniqueWords();
+ }
+
+ Dictionary &getDict() { return _d; }
+};
+
+void
+myremove(uint32_t docId, DocumentInverter &inv, Dictionary &d,
+ ISequencedTaskExecutor &invertThreads)
+{
+ inv.removeDocument(docId);
+ invertThreads.sync();
+ inv.pushDocuments(d, std::shared_ptr<IDestructorCallback>());
+}
+
+
+class WrapInserter
+{
+ OrderedDocumentInserter &_inserter;
+public:
+ WrapInserter(Dictionary &d, uint32_t fieldId)
+ : _inserter(d.getFieldIndex(fieldId)->getInserter())
+ {
+ }
+
+ WrapInserter &word(const vespalib::stringref &word_)
+ {
+ _inserter.setNextWord(word_);
+ return *this;
+ }
+
+ WrapInserter &add(uint32_t docId, const index::DocIdAndFeatures &features)
+ {
+ _inserter.add(docId, features);
+ return *this;
+ }
+
+ WrapInserter &add(uint32_t docId)
+ {
+ DocIdAndPosOccFeatures features;
+ features.addNextOcc(0, 0, 1, 1);
+ return add(docId, features);
+ }
+
+ WrapInserter &remove(uint32_t docId)
+ {
+ _inserter.remove(docId);
+ return *this;
+ }
+
+ WrapInserter &flush()
+ {
+ _inserter.flush();
+ return *this;
+ }
+
+ WrapInserter &rewind()
+ {
+ _inserter.rewind();
+ return *this;
+ }
+
+ btree::EntryRef
+ getWordRef()
+ {
+ return _inserter.getWordRef();
+ }
+};
+
+
+class MyDrainRemoves : IDocumentRemoveListener
+{
+ DocumentRemover &_remover;
+public:
+ virtual void remove(const vespalib::stringref, uint32_t) override { }
+
+ MyDrainRemoves(Dictionary &d, uint32_t fieldId)
+ : _remover(d.getFieldIndex(fieldId)->getDocumentRemover())
+ {
+ }
+
+ void drain(uint32_t docId)
+ {
+ _remover.remove(docId, *this);
+ }
+};
+
+void
+myPushDocument(DocumentInverter &inv, Dictionary &d)
+{
+ inv.pushDocuments(d, std::shared_ptr<IDestructorCallback>());
+}
+
+
+const FeatureStore *
+featureStorePtr(const Dictionary &d, uint32_t fieldId)
+{
+ return &d.getFieldIndex(fieldId)->getFeatureStore();
+}
+
+const FeatureStore &
+featureStoreRef(const Dictionary &d, uint32_t fieldId)
+{
+ return d.getFieldIndex(fieldId)->getFeatureStore();
+}
+
+
+DataStoreBase::MemStats
+getFeatureStoreMemStats(const Dictionary &d)
+{
+ DataStoreBase::MemStats res;
+ uint32_t numFields = d.getNumFields();
+ for (uint32_t fieldId = 0; fieldId < numFields; ++fieldId) {
+ DataStoreBase::MemStats stats =
+ d.getFieldIndex(fieldId)->getFeatureStore().getMemStats();
+ res += stats;
+ }
+ return res;
+}
+
+
+void myCommit(Dictionary &d, ISequencedTaskExecutor &pushThreads)
+{
+ uint32_t fieldId = 0;
+ for (auto &fieldIndex : d.getFieldIndexes()) {
+ pushThreads.execute(fieldId,
+ [fieldIndex(fieldIndex.get())]()
+ { fieldIndex->commit(); });
+ ++fieldId;
+ }
+ pushThreads.sync();
+}
+
+
+void
+myCompactFeatures(Dictionary &d, ISequencedTaskExecutor &pushThreads)
+{
+ uint32_t fieldId = 0;
+ for (auto &fieldIndex : d.getFieldIndexes()) {
+ pushThreads.execute(fieldId,
+ [fieldIndex(fieldIndex.get())]()
+ { fieldIndex->compactFeatures(); });
+ ++fieldId;
+ }
+}
+
+}
+
+
+struct Fixture
+{
+ Schema _schema;
+ Fixture() : _schema() {
+ _schema.addIndexField(Schema::IndexField("f0", Schema::STRING));
+ _schema.addIndexField(Schema::IndexField("f1", Schema::STRING));
+ _schema.addIndexField(Schema::IndexField("f2", Schema::STRING,
+ Schema::ARRAY));
+ _schema.addIndexField(Schema::IndexField("f3", Schema::STRING,
+ Schema::WEIGHTEDSET));
+ }
+ const Schema & getSchema() const { return _schema; }
+};
+
+TEST_F("requireThatFreshInsertWorks", Fixture)
+{
+ Dictionary d(f.getSchema());
+ SequencedTaskExecutor pushThreads(2);
+ EXPECT_TRUE(assertPostingList("[]", d.find("a", 0)));
+ EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0)));
+ EXPECT_EQUAL(0u, d.getNumUniqueWords());
+ WrapInserter(d, 0).word("a").add(10).flush();
+ EXPECT_TRUE(assertPostingList("[10]", d.find("a", 0)));
+ EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0)));
+ myCommit(d, pushThreads);
+ EXPECT_TRUE(assertPostingList("[10]", d.findFrozen("a", 0)));
+ EXPECT_EQUAL(1u, d.getNumUniqueWords());
+}
+
+TEST_F("requireThatAppendInsertWorks", Fixture)
+{
+ Dictionary d(f.getSchema());
+ SequencedTaskExecutor pushThreads(2);
+ WrapInserter(d, 0).word("a").add(10).flush().rewind().
+ word("a").add(5).flush();
+ EXPECT_TRUE(assertPostingList("[5,10]", d.find("a", 0)));
+ EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0)));
+ WrapInserter(d, 0).rewind().word("a").add(20).flush();
+ EXPECT_TRUE(assertPostingList("[5,10,20]", d.find("a", 0)));
+ EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0)));
+ myCommit(d, pushThreads);
+ EXPECT_TRUE(assertPostingList("[5,10,20]", d.findFrozen("a", 0)));
+}
+
+TEST_F("requireThatMultiplePostingListsCanExist", Fixture)
+{
+ Dictionary d(f.getSchema());
+ WrapInserter(d, 0).word("a").add(10).word("b").add(11).add(15).flush();
+ WrapInserter(d, 1).word("a").add(5).word("b").add(12).flush();
+ EXPECT_EQUAL(4u, d.getNumUniqueWords());
+ EXPECT_TRUE(assertPostingList("[10]", d.find("a", 0)));
+ EXPECT_TRUE(assertPostingList("[5]", d.find("a", 1)));
+ EXPECT_TRUE(assertPostingList("[11,15]", d.find("b", 0)));
+ EXPECT_TRUE(assertPostingList("[12]", d.find("b", 1)));
+ EXPECT_TRUE(assertPostingList("[]", d.find("a", 2)));
+ EXPECT_TRUE(assertPostingList("[]", d.find("c", 0)));
+}
+
+TEST_F("requireThatRemoveWorks", Fixture)
+{
+ Dictionary d(f.getSchema());
+ WrapInserter(d, 0).word("a").remove(10).flush();
+ EXPECT_TRUE(assertPostingList("[]", d.find("a", 0)));
+ WrapInserter(d, 0).add(10).add(20).add(30).flush();
+ EXPECT_TRUE(assertPostingList("[10,20,30]", d.find("a", 0)));
+ WrapInserter(d, 0).rewind().word("a").remove(10).flush();
+ EXPECT_TRUE(assertPostingList("[20,30]", d.find("a", 0)));
+ WrapInserter(d, 0).remove(20).flush();
+ EXPECT_TRUE(assertPostingList("[30]", d.find("a", 0)));
+ WrapInserter(d, 0).remove(30).flush();
+ EXPECT_TRUE(assertPostingList("[]", d.find("a", 0)));
+ EXPECT_EQUAL(1u, d.getNumUniqueWords());
+ MyDrainRemoves(d, 0).drain(10);
+ WrapInserter(d, 0).rewind().word("a").add(10).flush();
+ EXPECT_TRUE(assertPostingList("[10]", d.find("a", 0)));
+}
+
+TEST_F("requireThatMultipleInsertAndRemoveWorks", Fixture)
+{
+ MyInserter inserter(f.getSchema());
+ uint32_t numFields = 4;
+ for (uint32_t fi = 0; fi < numFields; ++fi) {
+ inserter.setNextField(fi);
+ for (char w = 'a'; w <= 'z'; ++w) {
+ std::string word(&w, 1);
+ inserter.setNextWord(word);
+ for (uint32_t di = 0; di < (uint32_t) w; ++di) { // insert
+ inserter.add(di * 3);
+ }
+ EXPECT_EQUAL((w - 'a' + 1u) + ('z' - 'a' +1u) * fi,
+ inserter.getNumUniqueWords());
+ }
+ }
+ EXPECT_TRUE(inserter.assertPostings());
+ inserter.rewind();
+ for (uint32_t fi = 0; fi < numFields; ++fi) {
+ MyDrainRemoves drainRemoves(inserter.getDict(), fi);
+ for (uint32_t di = 0; di < 'z' * 2 + 1; ++di) {
+ drainRemoves.drain(di);
+ }
+ }
+ for (uint32_t fi = 0; fi < numFields; ++fi) {
+ inserter.setNextField(fi);
+ for (char w = 'a'; w <= 'z'; ++w) {
+ std::string word(&w, 1);
+ inserter.setNextWord(word);
+ for (uint32_t di = 0; di < (uint32_t) w; ++di) {
+ // remove half of the docs
+ if ((di % 2) == 0) {
+ inserter.remove(di * 2);
+ } else {
+ inserter.add(di * 2 + 1);
+ }
+ }
+ }
+ }
+ EXPECT_TRUE(inserter.assertPostings());
+}
+
+void
+addElement(DocIdAndFeatures &f,
+ uint32_t elemLen,
+ uint32_t numOccs,
+ int32_t weight = 1)
+{
+ f._elements.push_back(WordDocElementFeatures(f._elements.size()));
+ f._elements.back().setElementLen(elemLen);
+ f._elements.back().setWeight(weight);
+ f._elements.back().setNumOccs(numOccs);
+ for (uint32_t i = 0; i < numOccs; ++i) {
+ f._wordPositions.push_back(WordDocElementWordPosFeatures(i));
+ }
+}
+
+DocIdAndFeatures
+getFeatures(uint32_t elemLen, uint32_t numOccs, int32_t weight = 1)
+{
+ DocIdAndFeatures f;
+ addElement(f, elemLen, numOccs, weight);
+ return f;
+}
+
+TEST_F("requireThatFeaturesAreInPostingLists", Fixture)
+{
+ Dictionary d(f.getSchema());
+ WrapInserter(d, 0).word("a").add(1, getFeatures(4, 2)).flush();
+ EXPECT_TRUE(assertPostingList("[1{4:0,1}]",
+ d.find("a", 0),
+ featureStorePtr(d, 0)));
+ WrapInserter(d, 0).word("b").add(2, getFeatures(5, 1)).
+ add(3, getFeatures(6, 2)).flush();
+ EXPECT_TRUE(assertPostingList("[2{5:0},3{6:0,1}]",
+ d.find("b", 0),
+ featureStorePtr(d, 0)));
+ WrapInserter(d, 1).word("c").add(4, getFeatures(7, 2)).flush();
+ EXPECT_TRUE(assertPostingList("[4{7:0,1}]",
+ d.find("c", 1),
+ featureStorePtr(d, 1)));
+}
+
+TEST_F("require that initRange conforms", Fixture) {
+ Dictionary d(f.getSchema());
+ InitRangeVerifier ir;
+ WrapInserter inserter(d, 0);
+ inserter.word("a");
+ for (uint32_t docId : ir.getExpectedDocIds()) {
+ inserter.add(docId);
+ }
+ inserter.flush();
+
+ TermFieldMatchData tfmd;
+ TermFieldMatchDataArray matchData;
+ matchData.add(&tfmd);
+ PostingIterator itr(d.find("a", 0), featureStoreRef(d, 0), 0, matchData);
+ ir.verify(itr);
+}
+
+TEST_F("requireThatPostingIteratorIsWorking", Fixture)
+{
+ Dictionary d(f.getSchema());
+ WrapInserter(d, 0).word("a").add(10, getFeatures(4, 1)).
+ add(20, getFeatures(5, 2)).
+ add(30, getFeatures(6, 1)).
+ add(40, getFeatures(7, 2)).flush();
+ TermFieldMatchData tfmd;
+ TermFieldMatchDataArray matchData;
+ matchData.add(&tfmd);
+ {
+ PostingIterator itr(d.find("not", 0),
+ featureStoreRef(d, 0),
+ 0, matchData);
+ itr.initFullRange();
+ EXPECT_TRUE(itr.isAtEnd());
+ }
+ {
+ PostingIterator itr(d.find("a", 0),
+ featureStoreRef(d, 0),
+ 0, matchData);
+ itr.initFullRange();
+ EXPECT_EQUAL(10u, itr.getDocId());
+ itr.unpack(10);
+ EXPECT_EQUAL("{4:0}", toString(tfmd.getIterator()));
+ EXPECT_TRUE(!itr.seek(25));
+ EXPECT_EQUAL(30u, itr.getDocId());
+ itr.unpack(30);
+ EXPECT_EQUAL("{6:0}", toString(tfmd.getIterator()));
+ EXPECT_TRUE(itr.seek(40));
+ EXPECT_EQUAL(40u, itr.getDocId());
+ itr.unpack(40);
+ EXPECT_EQUAL("{7:0,1}", toString(tfmd.getIterator()));
+ EXPECT_TRUE(!itr.seek(41));
+ EXPECT_TRUE(itr.isAtEnd());
+ }
+}
+
+TEST_F("requireThatDumpingToIndexBuilderIsWorking", Fixture)
+{
+ {
+ MyBuilder b(f.getSchema());
+ WordDocElementWordPosFeatures wpf;
+ b.startField(4);
+ b.startWord("a");
+ b.startDocument(2);
+ b.startElement(0, 10, 20);
+ wpf.setWordPos(1);
+ b.addOcc(wpf);
+ wpf.setWordPos(3);
+ b.addOcc(wpf);
+ b.endElement();
+ b.endDocument();
+ b.endWord();
+ b.endField();
+ EXPECT_EQUAL("f=4[w=a[d=2[e=0,w=10,l=20[1,3]]]]", b.toStr());
+ }
+ {
+ Dictionary d(f.getSchema());
+ MyBuilder b(f.getSchema());
+ DocIdAndFeatures df;
+ WrapInserter(d, 1).word("a").add(5, getFeatures(2, 1)).
+ add(7, getFeatures(3, 2)).
+ word("b").add(5, getFeatures(12, 2)).flush();
+
+ df = getFeatures(4, 1);
+ addElement(df, 5, 2);
+ WrapInserter(d, 2).word("a").add(5, df);
+ df = getFeatures(6, 1);
+ addElement(df, 7, 2);
+ WrapInserter(d, 2).add(7, df).flush();
+
+ df = getFeatures(8, 1, 12);
+ addElement(df, 9, 2, 13);
+ WrapInserter(d, 3).word("a").add(5, df);
+ df = getFeatures(10, 1, 14);
+ addElement(df, 11, 2, 15);
+ WrapInserter(d, 3).add(7, df).flush();
+
+ d.dump(b);
+
+ EXPECT_EQUAL("f=0[],"
+ "f=1[w=a[d=5[e=0,w=1,l=2[0]],d=7[e=0,w=1,l=3[0,1]]],"
+ "w=b[d=5[e=0,w=1,l=12[0,1]]]],"
+ "f=2[w=a[d=5[e=0,w=1,l=4[0],e=1,w=1,l=5[0,1]],"
+ "d=7[e=0,w=1,l=6[0],e=1,w=1,l=7[0,1]]]],"
+ "f=3[w=a[d=5[e=0,w=12,l=8[0],e=1,w=13,l=9[0,1]],"
+ "d=7[e=0,w=14,l=10[0],e=1,w=15,l=11[0,1]]]]",
+ b.toStr());
+ }
+ { // test word with no docs
+ Dictionary d(f.getSchema());
+ WrapInserter(d, 0).word("a").add(2, getFeatures(2, 1)).
+ word("b").add(4, getFeatures(4, 1)).flush().rewind().
+ word("a").remove(2).flush();
+ {
+ MyBuilder b(f.getSchema());
+ d.dump(b);
+ EXPECT_EQUAL("f=0[w=b[d=4[e=0,w=1,l=4[0]]]],f=1[],f=2[],f=3[]",
+ b.toStr());
+ }
+ {
+ search::diskindex::IndexBuilder b(f.getSchema());
+ b.setPrefix("dump");
+ TuneFileIndexing tuneFileIndexing;
+ DummyFileHeaderContext fileHeaderContext;
+ b.open(5, 2, tuneFileIndexing, fileHeaderContext);
+ d.dump(b);
+ b.close();
+ }
+ }
+}
+
+
+template <typename FixtureBase>
+class DictionaryFixture : public FixtureBase
+{
+public:
+ using FixtureBase::getSchema;
+ Dictionary _d;
+ DocBuilder _b;
+ SequencedTaskExecutor _invertThreads;
+ SequencedTaskExecutor _pushThreads;
+ DocumentInverter _inv;
+
+ DictionaryFixture()
+ : FixtureBase(),
+ _d(getSchema()),
+ _b(getSchema()),
+ _invertThreads(2),
+ _pushThreads(2),
+ _inv(getSchema(), _invertThreads, _pushThreads)
+ {
+ }
+};
+
+
+TEST_F("requireThatInversionIsWorking", DictionaryFixture<Fixture>)
+{
+ Document::UP doc;
+
+ f._b.startDocument("doc::10");
+ f._b.startIndexField("f0").
+ addStr("a").addStr("b").addStr("c").addStr("d").
+ endField();
+ doc = f._b.endDocument();
+ f._inv.invertDocument(10, *doc);
+ f._invertThreads.sync();
+ myPushDocument(f._inv, f._d);
+ f._pushThreads.sync();
+
+ f._b.startDocument("doc::20");
+ f._b.startIndexField("f0").
+ addStr("a").addStr("a").addStr("b").addStr("c").addStr("d").
+ endField();
+ doc = f._b.endDocument();
+ f._inv.invertDocument(20, *doc);
+ f._invertThreads.sync();
+ myPushDocument(f._inv, f._d);
+ f._pushThreads.sync();
+
+ f._b.startDocument("doc::30");
+ f._b.startIndexField("f0").
+ addStr("a").addStr("b").addStr("c").addStr("d").
+ addStr("e").addStr("f").
+ endField();
+ f._b.startIndexField("f1").
+ addStr("\nw2").addStr("w").addStr("x").
+ addStr("\nw3").addStr("y").addStr("z").
+ endField();
+ f._b.startIndexField("f2").
+ startElement(4).
+ addStr("w").addStr("x").
+ endElement().
+ startElement(5).
+ addStr("y").addStr("z").
+ endElement().
+ endField();
+ f._b.startIndexField("f3").
+ startElement(6).
+ addStr("w").addStr("x").
+ endElement().
+ startElement(7).
+ addStr("y").addStr("z").
+ endElement().
+ endField();
+ doc = f._b.endDocument();
+ f._inv.invertDocument(30, *doc);
+ f._invertThreads.sync();
+ myPushDocument(f._inv, f._d);
+ f._pushThreads.sync();
+
+ f._b.startDocument("doc::40");
+ f._b.startIndexField("f0").
+ addStr("a").addStr("a").addStr("b").addStr("c").addStr("a").
+ addStr("e").addStr("f").
+ endField();
+ doc = f._b.endDocument();
+ f._inv.invertDocument(40, *doc);
+ f._invertThreads.sync();
+ myPushDocument(f._inv, f._d);
+ f._pushThreads.sync();
+
+ f._b.startDocument("doc::999");
+ f._b.startIndexField("f0").
+ addStr("this").addStr("is").addStr("_a_").addStr("test").
+ addStr("for").addStr("insertion").addStr("speed").addStr("with").
+ addStr("more").addStr("than").addStr("just").addStr("__a__").
+ addStr("few").addStr("words").addStr("present").addStr("in").
+ addStr("some").addStr("of").addStr("the").addStr("fields").
+ endField();
+ f._b.startIndexField("f1").
+ addStr("the").addStr("other").addStr("field").addStr("also").
+ addStr("has").addStr("some").addStr("content").
+ endField();
+ f._b.startIndexField("f2").
+ startElement(1).
+ addStr("strange").addStr("things").addStr("here").
+ addStr("has").addStr("some").addStr("content").
+ endElement().
+ endField();
+ f._b.startIndexField("f3").
+ startElement(3).
+ addStr("not").addStr("a").addStr("weighty").addStr("argument").
+ endElement().
+ endField();
+ doc = f._b.endDocument();
+ for (uint32_t docId = 10000; docId < 20000; ++docId) {
+ f._inv.invertDocument(docId, *doc);
+ f._invertThreads.sync();
+ myPushDocument(f._inv, f._d);
+ f._pushThreads.sync();
+ }
+
+ f._pushThreads.sync();
+ DataStoreBase::MemStats beforeStats = getFeatureStoreMemStats(f._d);
+ LOG(info,
+ "Before feature compaction: allocElems=%" PRIu64 ", usedElems=%" PRIu64
+ ", deadElems=%" PRIu64 ", holdElems=%" PRIu64
+ ", freeBuffers=%" PRIu32 ", activeBuffers=%" PRIu32
+ ", holdBuffers=%" PRIu32,
+ beforeStats._allocElems,
+ beforeStats._usedElems,
+ beforeStats._deadElems,
+ beforeStats._holdElems,
+ beforeStats._freeBuffers,
+ beforeStats._activeBuffers,
+ beforeStats._holdBuffers);
+ myCompactFeatures(f._d, f._pushThreads);
+ std::vector<std::unique_ptr<GenerationHandler::Guard>> guards;
+ for (auto &fieldIndex : f._d.getFieldIndexes()) {
+ guards.push_back(std::make_unique<GenerationHandler::Guard>
+ (fieldIndex->takeGenerationGuard()));
+ }
+ myCommit(f._d, f._pushThreads);
+ DataStoreBase::MemStats duringStats = getFeatureStoreMemStats(f._d);
+ LOG(info,
+ "During feature compaction: allocElems=%" PRIu64 ", usedElems=%" PRIu64
+ ", deadElems=%" PRIu64 ", holdElems=%" PRIu64
+ ", freeBuffers=%" PRIu32 ", activeBuffers=%" PRIu32
+ ", holdBuffers=%" PRIu32,
+ duringStats._allocElems,
+ duringStats._usedElems,
+ duringStats._deadElems,
+ duringStats._holdElems,
+ duringStats._freeBuffers,
+ duringStats._activeBuffers,
+ duringStats._holdBuffers);
+ guards.clear();
+ myCommit(f._d, f._pushThreads);
+ DataStoreBase::MemStats afterStats = getFeatureStoreMemStats(f._d);
+ LOG(info,
+ "After feature compaction: allocElems=%" PRIu64 ", usedElems=%" PRIu64
+ ", deadElems=%" PRIu64 ", holdElems=%" PRIu64
+ ", freeBuffers=%" PRIu32 ", activeBuffers=%" PRIu32
+ ", holdBuffers=%" PRIu32,
+ afterStats._allocElems,
+ afterStats._usedElems,
+ afterStats._deadElems,
+ afterStats._holdElems,
+ afterStats._freeBuffers,
+ afterStats._activeBuffers,
+ afterStats._holdBuffers);
+
+ TermFieldMatchData tfmd;
+ TermFieldMatchDataArray matchData;
+ matchData.add(&tfmd);
+ {
+ PostingIterator itr(f._d.findFrozen("not", 0), featureStoreRef(f._d, 0),
+ 0, matchData);
+ itr.initFullRange();
+ EXPECT_TRUE(itr.isAtEnd());
+ }
+ {
+ PostingIterator itr(f._d.findFrozen("a", 0), featureStoreRef(f._d, 0),
+ 0, matchData);
+ itr.initFullRange();
+ EXPECT_EQUAL(10u, itr.getDocId());
+ itr.unpack(10);
+ EXPECT_EQUAL("{4:0}", toString(tfmd.getIterator()));
+ EXPECT_TRUE(!itr.seek(25));
+ EXPECT_EQUAL(30u, itr.getDocId());
+ itr.unpack(30);
+ EXPECT_EQUAL("{6:0}", toString(tfmd.getIterator()));
+ EXPECT_TRUE(itr.seek(40));
+ EXPECT_EQUAL(40u, itr.getDocId());
+ itr.unpack(40);
+ EXPECT_EQUAL("{7:0,1,4}", toString(tfmd.getIterator()));
+ EXPECT_TRUE(!itr.seek(41));
+ EXPECT_TRUE(itr.isAtEnd());
+ }
+ {
+ PostingIterator itr(f._d.findFrozen("x", 0), featureStoreRef(f._d, 0),
+ 0, matchData);
+ itr.initFullRange();
+ EXPECT_TRUE(itr.isAtEnd());
+ }
+ {
+ PostingIterator itr(f._d.findFrozen("x", 1), featureStoreRef(f._d, 1),
+ 1, matchData);
+ itr.initFullRange();
+ EXPECT_EQUAL(30u, itr.getDocId());
+ itr.unpack(30);
+ EXPECT_EQUAL("{6:2[e=0,w=1,l=6]}",
+ toString(tfmd.getIterator(), true, true));
+ }
+ {
+ PostingIterator itr(f._d.findFrozen("x", 2), featureStoreRef(f._d, 2),
+ 2, matchData);
+ itr.initFullRange();
+ EXPECT_EQUAL(30u, itr.getDocId());
+ itr.unpack(30);
+ // weight is hardcoded to 1 for new style il doc array field
+ EXPECT_EQUAL("{2:1[e=0,w=1,l=2]}",
+ toString(tfmd.getIterator(), true, true));
+ }
+ {
+ PostingIterator itr(f._d.findFrozen("x", 3), featureStoreRef(f._d, 3),
+ 3, matchData);
+ itr.initFullRange();
+ EXPECT_EQUAL(30u, itr.getDocId());
+ itr.unpack(30);
+ EXPECT_EQUAL("{2:1[e=0,w=6,l=2]}",
+ toString(tfmd.getIterator(), true, true));
+ }
+}
+
+TEST_F("requireThatInverterHandlesRemoveViaDocumentRemover",
+ DictionaryFixture<Fixture>)
+{
+ Document::UP doc;
+
+ f._b.startDocument("doc::1");
+ f._b.startIndexField("f0").addStr("a").addStr("b").endField();
+ f._b.startIndexField("f1").addStr("a").addStr("c").endField();
+ Document::UP doc1 = f._b.endDocument();
+ f._inv.invertDocument(1, *doc1.get());
+ f._invertThreads.sync();
+ myPushDocument(f._inv, f._d);
+ f._pushThreads.sync();
+
+ f._b.startDocument("doc::2");
+ f._b.startIndexField("f0").addStr("b").addStr("c").endField();
+ Document::UP doc2 = f._b.endDocument();
+ f._inv.invertDocument(2, *doc2.get());
+ f._invertThreads.sync();
+ myPushDocument(f._inv, f._d);
+ f._pushThreads.sync();
+
+ EXPECT_TRUE(assertPostingList("[1]", f._d.find("a", 0)));
+ EXPECT_TRUE(assertPostingList("[1,2]", f._d.find("b", 0)));
+ EXPECT_TRUE(assertPostingList("[2]", f._d.find("c", 0)));
+ EXPECT_TRUE(assertPostingList("[1]", f._d.find("a", 1)));
+ EXPECT_TRUE(assertPostingList("[1]", f._d.find("c", 1)));
+
+ myremove(1, f._inv, f._d, f._invertThreads);
+ f._pushThreads.sync();
+
+ EXPECT_TRUE(assertPostingList("[]", f._d.find("a", 0)));
+ EXPECT_TRUE(assertPostingList("[2]", f._d.find("b", 0)));
+ EXPECT_TRUE(assertPostingList("[2]", f._d.find("c", 0)));
+ EXPECT_TRUE(assertPostingList("[]", f._d.find("a", 1)));
+ EXPECT_TRUE(assertPostingList("[]", f._d.find("c", 1)));
+}
+
+class UriFixture
+{
+public:
+ Schema _schema;
+ UriFixture()
+ : _schema()
+ {
+ _schema.addUriIndexFields(Schema::IndexField("iu",
+ Schema::STRING));
+ _schema.addUriIndexFields(Schema::IndexField("iau",
+ Schema::STRING,
+ Schema::ARRAY));
+ _schema.addUriIndexFields(Schema::IndexField("iwu",
+ Schema::STRING,
+ Schema::WEIGHTEDSET));
+ }
+ const Schema & getSchema() const { return _schema; }
+};
+
+
+TEST_F("requireThatUriIndexingIsWorking", DictionaryFixture<UriFixture>)
+{
+ Document::UP doc;
+
+ f._b.startDocument("doc::10");
+ f._b.startIndexField("iu").
+ startSubField("all").
+ addUrlTokenizedString("http://www.yahoo.com:81/fluke?ab=2#4").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.yahoo.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("81").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("4").
+ endSubField().
+ endField();
+ f._b.startIndexField("iau").
+ startElement(1).
+ startSubField("all").
+ addUrlTokenizedString("http://www.yahoo.com:82/fluke?ab=2#8").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.yahoo.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("82").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("8").
+ endSubField().
+ endElement().
+ startElement(1).
+ startSubField("all").
+ addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.flickr.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("82").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("9").
+ endSubField().
+ endElement().
+ endField();
+ f._b.startIndexField("iwu").
+ startElement(4).
+ startSubField("all").
+ addUrlTokenizedString("http://www.yahoo.com:83/fluke?ab=2#12").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.yahoo.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("83").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("12").
+ endSubField().
+ endElement().
+ startElement(7).
+ startSubField("all").
+ addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.flickr.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("85").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("13").
+ endSubField().
+ endElement().
+ endField();
+ doc = f._b.endDocument();
+ f._inv.invertDocument(10, *doc);
+ f._invertThreads.sync();
+ myPushDocument(f._inv, f._d);
+
+ f._pushThreads.sync();
+
+ TermFieldMatchData tfmd;
+ TermFieldMatchDataArray matchData;
+ matchData.add(&tfmd);
+ {
+ uint32_t fieldId = f.getSchema().getIndexFieldId("iu");
+ PostingIterator itr(f._d.findFrozen("not", fieldId),
+ featureStoreRef(f._d, fieldId),
+ fieldId, matchData);
+ itr.initFullRange();
+ EXPECT_TRUE(itr.isAtEnd());
+ }
+ {
+ uint32_t fieldId = f.getSchema().getIndexFieldId("iu");
+ PostingIterator itr(f._d.findFrozen("yahoo", fieldId),
+ featureStoreRef(f._d, fieldId),
+ fieldId, matchData);
+ itr.initFullRange();
+ EXPECT_EQUAL(10u, itr.getDocId());
+ itr.unpack(10);
+ EXPECT_EQUAL("{9:2}", toString(tfmd.getIterator()));
+ EXPECT_TRUE(!itr.seek(25));
+ EXPECT_TRUE(itr.isAtEnd());
+ }
+ {
+ uint32_t fieldId = f.getSchema().getIndexFieldId("iau");
+ PostingIterator itr(f._d.findFrozen("yahoo", fieldId),
+ featureStoreRef(f._d, fieldId),
+ fieldId, matchData);
+ itr.initFullRange();
+ EXPECT_EQUAL(10u, itr.getDocId());
+ itr.unpack(10);
+ EXPECT_EQUAL("{9:2[e=0,l=9]}",
+ toString(tfmd.getIterator(), true, false));
+ EXPECT_TRUE(!itr.seek(25));
+ EXPECT_TRUE(itr.isAtEnd());
+ }
+ {
+ uint32_t fieldId = f.getSchema().getIndexFieldId("iwu");
+ PostingIterator itr(f._d.findFrozen("yahoo", fieldId),
+ featureStoreRef(f._d, fieldId),
+ fieldId, matchData);
+ itr.initFullRange();
+ EXPECT_EQUAL(10u, itr.getDocId());
+ itr.unpack(10);
+ EXPECT_EQUAL("{9:2[e=0,w=4,l=9]}",
+ toString(tfmd.getIterator(), true, true));
+ EXPECT_TRUE(!itr.seek(25));
+ EXPECT_TRUE(itr.isAtEnd());
+ }
+ {
+ search::diskindex::IndexBuilder dib(f.getSchema());
+ dib.setPrefix("urldump");
+ TuneFileIndexing tuneFileIndexing;
+ DummyFileHeaderContext fileHeaderContext;
+ dib.open(11, f._d.getNumUniqueWords(), tuneFileIndexing,
+ fileHeaderContext);
+ f._d.dump(dib);
+ dib.close();
+ }
+}
+
+
+class SingleFieldFixture
+{
+public:
+ Schema _schema;
+ SingleFieldFixture()
+ : _schema()
+ {
+ _schema.addIndexField(Schema::IndexField("i", Schema::STRING));
+ }
+ const Schema & getSchema() const { return _schema; }
+};
+
+TEST_F("requireThatCjkIndexingIsWorking", DictionaryFixture<SingleFieldFixture>)
+{
+ Document::UP doc;
+
+ f._b.startDocument("doc::10");
+ f._b.startIndexField("i").
+ addStr("我就是那个").
+ setAutoSpace(false).
+ addStr("大灰狼").
+ setAutoSpace(true).
+ endField();
+ doc = f._b.endDocument();
+ f._inv.invertDocument(10, *doc);
+ f._invertThreads.sync();
+ myPushDocument(f._inv, f._d);
+
+ f._pushThreads.sync();
+
+ TermFieldMatchData tfmd;
+ TermFieldMatchDataArray matchData;
+ matchData.add(&tfmd);
+ {
+ uint32_t fieldId = f.getSchema().getIndexFieldId("i");
+ PostingIterator itr(f._d.findFrozen("not", fieldId),
+ featureStoreRef(f._d, fieldId),
+ fieldId, matchData);
+ itr.initFullRange();
+ EXPECT_TRUE(itr.isAtEnd());
+ }
+ {
+ uint32_t fieldId = f.getSchema().getIndexFieldId("i");
+ PostingIterator itr(f._d.findFrozen("我就"
+ "是那个",
+ fieldId),
+ featureStoreRef(f._d, fieldId),
+ fieldId, matchData);
+ itr.initFullRange();
+ EXPECT_EQUAL(10u, itr.getDocId());
+ itr.unpack(10);
+ EXPECT_EQUAL("{2:0}", toString(tfmd.getIterator()));
+ EXPECT_TRUE(!itr.seek(25));
+ EXPECT_TRUE(itr.isAtEnd());
+ }
+ {
+ uint32_t fieldId = f.getSchema().getIndexFieldId("i");
+ PostingIterator itr(f._d.findFrozen("大灰"
+ "狼",
+ fieldId),
+ featureStoreRef(f._d, fieldId),
+ fieldId, matchData);
+ itr.initFullRange();
+ EXPECT_EQUAL(10u, itr.getDocId());
+ itr.unpack(10);
+ EXPECT_EQUAL("{2:1}", toString(tfmd.getIterator()));
+ EXPECT_TRUE(!itr.seek(25));
+ EXPECT_TRUE(itr.isAtEnd());
+ }
+}
+
+void
+insertAndAssertTuple(const vespalib::string &word, uint32_t fieldId, uint32_t docId,
+ Dictionary &dict)
+{
+ EntryRef wordRef = WrapInserter(dict, fieldId).rewind().word(word).
+ add(docId).flush().getWordRef();
+ EXPECT_EQUAL(word,
+ dict.getFieldIndex(fieldId)->getWordStore().getWord(wordRef));
+ MyDrainRemoves(dict, fieldId).drain(docId);
+}
+
+TEST_F("require that insert tells which word ref that was inserted", Fixture)
+{
+ Dictionary d(f.getSchema());
+ insertAndAssertTuple("a", 1, 11, d);
+ insertAndAssertTuple("b", 1, 11, d);
+ insertAndAssertTuple("a", 2, 11, d);
+
+ insertAndAssertTuple("a", 1, 22, d);
+ insertAndAssertTuple("b", 2, 22, d);
+ insertAndAssertTuple("c", 2, 22, d);
+}
+
+struct RemoverFixture : public Fixture
+{
+ Dictionary _d;
+ SequencedTaskExecutor _invertThreads;
+ SequencedTaskExecutor _pushThreads;
+
+ RemoverFixture()
+ :
+ Fixture(),
+ _d(getSchema()),
+ _invertThreads(2),
+ _pushThreads(2)
+ {
+ }
+ void assertPostingLists(const vespalib::string &e1,
+ const vespalib::string &e2,
+ const vespalib::string &e3) {
+ EXPECT_TRUE(assertPostingList(e1, _d.find("a", 1)));
+ EXPECT_TRUE(assertPostingList(e2, _d.find("a", 2)));
+ EXPECT_TRUE(assertPostingList(e3, _d.find("b", 1)));
+ }
+ void remove(uint32_t docId) {
+ DocumentInverter inv(getSchema(), _invertThreads, _pushThreads);
+ myremove(docId, inv, _d, _invertThreads);
+ _pushThreads.sync();
+ EXPECT_FALSE(_d.getFieldIndex(0u)->getDocumentRemover().
+ getStore().get(docId).valid());
+ }
+};
+
+TEST_F("require that document remover can remove several documents", RemoverFixture)
+{
+ WrapInserter(f._d, 1).word("a").add(11).add(13).add(15).
+ word("b").add(11).add(15).flush();
+ WrapInserter(f._d, 2).word("a").add(11).add(13).flush();
+ f.assertPostingLists("[11,13,15]", "[11,13]", "[11,15]");
+
+ f.remove(13);
+ f.assertPostingLists("[11,15]", "[11]", "[11,15]");
+
+ f.remove(11);
+ f.assertPostingLists("[15]", "[]", "[15]");
+
+ f.remove(15);
+ f.assertPostingLists("[]", "[]", "[]");
+}
+
+TEST_F("require that removal of non-existing document does not do anything", RemoverFixture)
+{
+ WrapInserter(f._d, 1).word("a").add(11).word("b").add(11).flush();
+ WrapInserter(f._d, 2).word("a").add(11).flush();
+ f.assertPostingLists("[11]", "[11]", "[11]");
+ f.remove(13);
+ f.assertPostingLists("[11]", "[11]", "[11]");
+}
+
+} // namespace memoryindex
+} // namespace search
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/memoryindex/document_remover/.gitignore b/searchlib/src/tests/memoryindex/document_remover/.gitignore
new file mode 100644
index 00000000000..2126f9147bd
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/document_remover/.gitignore
@@ -0,0 +1 @@
+searchlib_document_remover_test_app
diff --git a/searchlib/src/tests/memoryindex/document_remover/CMakeLists.txt b/searchlib/src/tests/memoryindex/document_remover/CMakeLists.txt
new file mode 100644
index 00000000000..e918d0400b2
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/document_remover/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_document_remover_test_app
+ SOURCES
+ document_remover_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_document_remover_test_app COMMAND searchlib_document_remover_test_app)
diff --git a/searchlib/src/tests/memoryindex/document_remover/DESC b/searchlib/src/tests/memoryindex/document_remover/DESC
new file mode 100644
index 00000000000..7fe35ab896f
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/document_remover/DESC
@@ -0,0 +1 @@
+document remover test. Take a look at document_remover_test.cpp for details.
diff --git a/searchlib/src/tests/memoryindex/document_remover/FILES b/searchlib/src/tests/memoryindex/document_remover/FILES
new file mode 100644
index 00000000000..9b7cb9a8cfa
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/document_remover/FILES
@@ -0,0 +1 @@
+document_remover_test.cpp
diff --git a/searchlib/src/tests/memoryindex/document_remover/document_remover_test.cpp b/searchlib/src/tests/memoryindex/document_remover/document_remover_test.cpp
new file mode 100644
index 00000000000..8c6751adbeb
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/document_remover/document_remover_test.cpp
@@ -0,0 +1,144 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("document_remover_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/searchlib/memoryindex/document_remover.h>
+#include <vespa/searchlib/memoryindex/wordstore.h>
+#include <vespa/searchlib/memoryindex/i_document_remove_listener.h>
+#include <vespa/vespalib/test/insertion_operators.h>
+#include <map>
+
+using namespace search;
+using namespace search::memoryindex;
+
+struct WordFieldPair
+{
+ vespalib::string _word;
+ uint32_t _fieldId;
+ WordFieldPair(const vespalib::stringref &word, uint32_t fieldId)
+ : _word(word), _fieldId(fieldId)
+ {}
+ bool operator<(const WordFieldPair &rhs) {
+ if (_word != rhs._word) {
+ return _word < rhs._word;
+ }
+ return _fieldId < rhs._fieldId;
+ }
+};
+
+typedef std::vector<WordFieldPair> WordFieldVector;
+
+std::ostream &
+operator<<(std::ostream &os, const WordFieldPair &val)
+{
+ os << "{" << val._word << "," << val._fieldId << "}";
+ return os;
+}
+
+struct MockRemoveListener : public IDocumentRemoveListener
+{
+ WordFieldVector _words;
+ uint32_t _expDocId;
+ uint32_t _fieldId;
+ virtual void remove(const vespalib::stringref word, uint32_t docId) override {
+ EXPECT_EQUAL(_expDocId, docId);
+ _words.emplace_back(word, _fieldId);
+ }
+ void reset(uint32_t expDocId) {
+ _words.clear();
+ _expDocId = expDocId;
+ }
+ vespalib::string getWords() {
+ std::sort(_words.begin(), _words.end());
+ std::ostringstream oss;
+ oss << _words;
+ return oss.str();
+ }
+ void setFieldId(uint32_t fieldId) { _fieldId = fieldId; }
+};
+
+struct Fixture
+{
+ MockRemoveListener _listener;
+ std::vector<std::unique_ptr<WordStore>> _wordStores;
+ std::vector<std::map<vespalib::string, btree::EntryRef>> _wordToRefMaps;
+ std::vector<std::unique_ptr<DocumentRemover>> _removers;
+ Fixture()
+ : _listener(),
+ _wordStores(),
+ _wordToRefMaps(),
+ _removers()
+ {
+ uint32_t numFields = 4;
+ for (uint32_t fieldId = 0; fieldId < numFields; ++fieldId) {
+ _wordStores.push_back(std::make_unique<WordStore>());
+ _removers.push_back(std::make_unique<DocumentRemover>
+ (*_wordStores.back()));
+ }
+ _wordToRefMaps.resize(numFields);
+ }
+ btree::EntryRef getWordRef(const vespalib::string &word, uint32_t fieldId) {
+ auto &wordToRefMap = _wordToRefMaps[fieldId];
+ WordStore &wordStore = *_wordStores[fieldId];
+ auto itr = wordToRefMap.find(word);
+ if (itr == wordToRefMap.end()) {
+ btree::EntryRef ref = wordStore.addWord(word);
+ wordToRefMap[word] = ref;
+ return ref;
+ }
+ return itr->second;
+ }
+ Fixture &insert(const vespalib::string &word, uint32_t fieldId, uint32_t docId) {
+ assert(fieldId < _wordStores.size());
+ _removers[fieldId]->insert(getWordRef(word, fieldId), docId);
+ return *this;
+ }
+ void flush() {
+ for (auto &remover : _removers) {
+ remover->flush();
+ }
+ }
+ vespalib::string remove(uint32_t docId) {
+ _listener.reset(docId);
+ uint32_t fieldId = 0;
+ for (auto &remover : _removers) {
+ _listener.setFieldId(fieldId);
+ remover->remove(docId, _listener);
+ ++fieldId;
+ }
+ return _listener.getWords();
+ }
+};
+
+TEST_F("require that {word,fieldId} pairs for multiple doc ids can be inserted", Fixture)
+{
+ f.insert("a", 1, 10).insert("a", 1, 20).insert("a", 1, 30);
+ f.insert("a", 2, 10).insert("a", 2, 20);
+ f.insert("b", 1, 20).insert("b", 1, 30);
+ f.insert("b", 2, 10).insert("b", 2, 30);
+ f.insert("c", 1, 10);
+ f.insert("c", 2, 20);
+ f.insert("c", 3, 30);
+ f.flush();
+
+ EXPECT_EQUAL("[{a,1},{a,2},{b,2},{c,1}]", f.remove(10));
+ EXPECT_EQUAL("[{a,1},{a,2},{b,1},{c,2}]", f.remove(20));
+ EXPECT_EQUAL("[{a,1},{b,1},{b,2},{c,3}]", f.remove(30));
+}
+
+TEST_F("require that we can insert after flush", Fixture)
+{
+ f.insert("a", 1, 10).insert("b", 1, 10);
+ f.flush();
+ f.insert("b", 1, 20).insert("b", 2, 20);
+ f.flush();
+
+ EXPECT_EQUAL("[{a,1},{b,1}]", f.remove(10));
+ EXPECT_EQUAL("[{b,1},{b,2}]", f.remove(20));
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/memoryindex/documentinverter/.gitignore b/searchlib/src/tests/memoryindex/documentinverter/.gitignore
new file mode 100644
index 00000000000..1e9666b2d63
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/documentinverter/.gitignore
@@ -0,0 +1 @@
+searchlib_documentinverter_test_app
diff --git a/searchlib/src/tests/memoryindex/documentinverter/CMakeLists.txt b/searchlib/src/tests/memoryindex/documentinverter/CMakeLists.txt
new file mode 100644
index 00000000000..85a77fad361
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/documentinverter/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_documentinverter_test_app
+ SOURCES
+ documentinverter_test.cpp
+ DEPENDS
+ searchlib_test
+ searchlib
+)
+vespa_add_test(NAME searchlib_documentinverter_test_app COMMAND searchlib_documentinverter_test_app)
diff --git a/searchlib/src/tests/memoryindex/documentinverter/DESC b/searchlib/src/tests/memoryindex/documentinverter/DESC
new file mode 100644
index 00000000000..5dc610c2a24
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/documentinverter/DESC
@@ -0,0 +1 @@
+Document inverter test. Take a look at documentinverter_test.cpp for details.
diff --git a/searchlib/src/tests/memoryindex/documentinverter/FILES b/searchlib/src/tests/memoryindex/documentinverter/FILES
new file mode 100644
index 00000000000..c54817b9df1
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/documentinverter/FILES
@@ -0,0 +1 @@
+documentinverter_test.cpp
diff --git a/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp b/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp
new file mode 100644
index 00000000000..d3ad1f54e95
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp
@@ -0,0 +1,294 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/* -*- mode: C++; coding: utf-8; -*- */
+
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("documentinverter_test");
+#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/memoryindex/documentinverter.h>
+#include <vespa/searchlib/memoryindex/fieldinverter.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/searchlib/test/memoryindex/ordereddocumentinserter.h>
+#include <vespa/searchlib/common/sequencedtaskexecutor.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+namespace search
+{
+
+
+using document::Document;
+using index::DocBuilder;
+using index::Schema;
+
+namespace memoryindex
+{
+
+
+namespace
+{
+
+
+Document::UP
+makeDoc10(DocBuilder &b)
+{
+ b.startDocument("doc::10");
+ b.startIndexField("f0").
+ addStr("a").addStr("b").addStr("c").addStr("d").
+ endField();
+ return b.endDocument();
+}
+
+
+Document::UP
+makeDoc11(DocBuilder &b)
+{
+ b.startDocument("doc::11");
+ b.startIndexField("f0").
+ addStr("a").addStr("b").addStr("e").addStr("f").
+ endField();
+ b.startIndexField("f1").
+ addStr("a").addStr("g").
+ endField();
+ return b.endDocument();
+}
+
+
+Document::UP
+makeDoc12(DocBuilder &b)
+{
+ b.startDocument("doc::12");
+ b.startIndexField("f0").
+ addStr("h").addStr("doc12").
+ endField();
+ return b.endDocument();
+}
+
+
+Document::UP
+makeDoc13(DocBuilder &b)
+{
+ b.startDocument("doc::13");
+ b.startIndexField("f0").
+ addStr("i").addStr("doc13").
+ endField();
+ return b.endDocument();
+}
+
+
+Document::UP
+makeDoc14(DocBuilder &b)
+{
+ b.startDocument("doc::14");
+ b.startIndexField("f0").
+ addStr("j").addStr("doc14").
+ endField();
+ return b.endDocument();
+}
+
+
+Document::UP
+makeDoc15(DocBuilder &b)
+{
+ b.startDocument("doc::15");
+ return b.endDocument();
+}
+
+}
+
+struct Fixture
+{
+ Schema _schema;
+ DocBuilder _b;
+ SequencedTaskExecutor _invertThreads;
+ SequencedTaskExecutor _pushThreads;
+ DocumentInverter _inv;
+ test::OrderedDocumentInserter _inserter;
+
+ static Schema
+ makeSchema()
+ {
+ Schema schema;
+ schema.addIndexField(Schema::IndexField("f0", Schema::STRING));
+ schema.addIndexField(Schema::IndexField("f1", Schema::STRING));
+ schema.addIndexField(Schema::IndexField("f2", Schema::STRING,
+ Schema::ARRAY));
+ schema.addIndexField(Schema::IndexField("f3", Schema::STRING,
+ Schema::WEIGHTEDSET));
+ return schema;
+ }
+
+ Fixture()
+ : _schema(makeSchema()),
+ _b(_schema),
+ _invertThreads(2),
+ _pushThreads(2),
+ _inv(_schema, _invertThreads, _pushThreads),
+ _inserter()
+ {
+ }
+
+ void
+ pushDocuments()
+ {
+ _invertThreads.sync();
+ uint32_t fieldId = 0;
+ for (auto &inverter : _inv.getInverters()) {
+ _inserter.setFieldId(fieldId);
+ inverter->pushDocuments(_inserter);
+ ++fieldId;
+ }
+ _pushThreads.sync();
+ }
+};
+
+
+TEST_F("requireThatFreshInsertWorks", Fixture)
+{
+ f._inv.invertDocument(10, *makeDoc10(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,a=10,"
+ "w=b,a=10,"
+ "w=c,a=10,"
+ "w=d,a=10",
+ f._inserter.toStr());
+}
+
+
+TEST_F("requireThatMultipleDocsWork", Fixture)
+{
+ f._inv.invertDocument(10, *makeDoc10(f._b));
+ f._inv.invertDocument(11, *makeDoc11(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,a=10,a=11,"
+ "w=b,a=10,a=11,"
+ "w=c,a=10,w=d,a=10,"
+ "w=e,a=11,"
+ "w=f,a=11,"
+ "f=1,w=a,a=11,"
+ "w=g,a=11",
+ f._inserter.toStr());
+}
+
+
+TEST_F("requireThatRemoveWorks", Fixture)
+{
+ f._inv.getInverter(0)->remove("b", 10);
+ f._inv.getInverter(0)->remove("a", 10);
+ f._inv.getInverter(0)->remove("b", 11);
+ f._inv.getInverter(2)->remove("c", 12);
+ f._inv.getInverter(1)->remove("a", 10);
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,r=10,"
+ "w=b,r=10,r=11,"
+ "f=1,w=a,r=10,"
+ "f=2,w=c,r=12",
+ f._inserter.toStr());
+}
+
+
+TEST_F("requireThatReputWorks", Fixture)
+{
+ f._inv.invertDocument(10, *makeDoc10(f._b));
+ f._inv.invertDocument(10, *makeDoc11(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,a=10,"
+ "w=b,a=10,"
+ "w=e,a=10,"
+ "w=f,a=10,"
+ "f=1,w=a,a=10,"
+ "w=g,a=10",
+ f._inserter.toStr());
+}
+
+
+TEST_F("requireThatAbortPendingDocWorks", Fixture)
+{
+ Document::UP doc10 = makeDoc10(f._b);
+ Document::UP doc11 = makeDoc11(f._b);
+ Document::UP doc12 = makeDoc12(f._b);
+ Document::UP doc13 = makeDoc13(f._b);
+ Document::UP doc14 = makeDoc14(f._b);
+
+ f._inv.invertDocument(10, *doc10);
+ f._inv.invertDocument(11, *doc11);
+ f._inv.removeDocument(10);
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,a=11,"
+ "w=b,a=11,"
+ "w=e,a=11,"
+ "w=f,a=11,"
+ "f=1,w=a,a=11,"
+ "w=g,a=11",
+ f._inserter.toStr());
+
+ f._inv.invertDocument(10, *doc10);
+ f._inv.invertDocument(11, *doc11);
+ f._inv.invertDocument(12, *doc12);
+ f._inv.invertDocument(13, *doc13);
+ f._inv.invertDocument(14, *doc14);
+ f._inv.removeDocument(11);
+ f._inv.removeDocument(13);
+ f._inserter.reset();
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,a=10,"
+ "w=b,a=10,"
+ "w=c,a=10,"
+ "w=d,a=10,"
+ "w=doc12,a=12,"
+ "w=doc14,a=14,"
+ "w=h,a=12,"
+ "w=j,a=14",
+ f._inserter.toStr());
+
+ f._inv.invertDocument(10, *doc10);
+ f._inv.invertDocument(11, *doc11);
+ f._inv.invertDocument(12, *doc12);
+ f._inv.invertDocument(13, *doc13);
+ f._inv.invertDocument(14, *doc14);
+ f._inv.removeDocument(11);
+ f._inv.removeDocument(12);
+ f._inv.removeDocument(13);
+ f._inv.removeDocument(14);
+ f._inserter.reset();
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,a=10,"
+ "w=b,a=10,"
+ "w=c,a=10,"
+ "w=d,a=10",
+ f._inserter.toStr());
+
+
+}
+
+
+TEST_F("requireThatMixOfAddAndRemoveWorks", Fixture)
+{
+ f._inv.getInverter(0)->remove("a", 11);
+ f._inv.getInverter(0)->remove("c", 9);
+ f._inv.getInverter(0)->remove("d", 10);
+ f._inv.getInverter(0)->remove("z", 12);
+ f._inv.invertDocument(10, *makeDoc10(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,a=10,r=11,"
+ "w=b,a=10,"
+ "w=c,r=9,a=10,"
+ "w=d,r=10,a=10,"
+ "w=z,r=12",
+ f._inserter.toStr());
+}
+
+
+TEST_F("require that empty document can be inverted", Fixture)
+{
+ f._inv.invertDocument(15, *makeDoc15(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("",
+ f._inserter.toStr());
+}
+
+
+} // namespace memoryindex
+} // namespace search
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/memoryindex/fieldinverter/.gitignore b/searchlib/src/tests/memoryindex/fieldinverter/.gitignore
new file mode 100644
index 00000000000..482663dd92e
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/fieldinverter/.gitignore
@@ -0,0 +1 @@
+searchlib_fieldinverter_test_app
diff --git a/searchlib/src/tests/memoryindex/fieldinverter/CMakeLists.txt b/searchlib/src/tests/memoryindex/fieldinverter/CMakeLists.txt
new file mode 100644
index 00000000000..9d81ebbb57c
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/fieldinverter/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_fieldinverter_test_app
+ SOURCES
+ fieldinverter_test.cpp
+ DEPENDS
+ searchlib_test
+ searchlib
+)
+vespa_add_test(NAME searchlib_fieldinverter_test_app COMMAND searchlib_fieldinverter_test_app)
diff --git a/searchlib/src/tests/memoryindex/fieldinverter/DESC b/searchlib/src/tests/memoryindex/fieldinverter/DESC
new file mode 100644
index 00000000000..a40890fdc3d
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/fieldinverter/DESC
@@ -0,0 +1 @@
+Field inverter test. Take a look at fieldinverter_test.cpp for details.
diff --git a/searchlib/src/tests/memoryindex/fieldinverter/FILES b/searchlib/src/tests/memoryindex/fieldinverter/FILES
new file mode 100644
index 00000000000..892febd1c50
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/fieldinverter/FILES
@@ -0,0 +1 @@
+fieldinverter_test.cpp
diff --git a/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp b/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp
new file mode 100644
index 00000000000..6216ba9eb3c
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp
@@ -0,0 +1,338 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/* -*- mode: C++; coding: utf-8; -*- */
+
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("fieldinverter_test");
+#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/memoryindex/fieldinverter.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/searchlib/test/memoryindex/ordereddocumentinserter.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/document/repo/fixedtyperepo.h>
+
+namespace search
+{
+
+
+using document::Document;
+using index::DocBuilder;
+using index::Schema;
+
+namespace memoryindex
+{
+
+
+namespace
+{
+
+
+Document::UP
+makeDoc10(DocBuilder &b)
+{
+ b.startDocument("doc::10");
+ b.startIndexField("f0").
+ addStr("a").addStr("b").addStr("c").addStr("d").
+ endField();
+ return b.endDocument();
+}
+
+
+Document::UP
+makeDoc11(DocBuilder &b)
+{
+ b.startDocument("doc::11");
+ b.startIndexField("f0").
+ addStr("a").addStr("b").addStr("e").addStr("f").
+ endField();
+ b.startIndexField("f1").
+ addStr("a").addStr("g").
+ endField();
+ return b.endDocument();
+}
+
+
+Document::UP
+makeDoc12(DocBuilder &b)
+{
+ b.startDocument("doc::12");
+ b.startIndexField("f0").
+ addStr("h").addStr("doc12").
+ endField();
+ return b.endDocument();
+}
+
+
+Document::UP
+makeDoc13(DocBuilder &b)
+{
+ b.startDocument("doc::13");
+ b.startIndexField("f0").
+ addStr("i").addStr("doc13").
+ endField();
+ return b.endDocument();
+}
+
+
+Document::UP
+makeDoc14(DocBuilder &b)
+{
+ b.startDocument("doc::14");
+ b.startIndexField("f0").
+ addStr("j").addStr("doc14").
+ endField();
+ return b.endDocument();
+}
+
+
+Document::UP
+makeDoc15(DocBuilder &b)
+{
+ b.startDocument("doc::15");
+ return b.endDocument();
+}
+
+
+Document::UP
+makeDoc16(DocBuilder &b)
+{
+ b.startDocument("doc::16");
+ b.startIndexField("f0").addStr("foo").addStr("bar").addStr("baz").
+ addTermAnnotation("altbaz").addStr("y").addTermAnnotation("alty").
+ addStr("z").endField();
+ return b.endDocument();
+}
+
+}
+
+struct Fixture
+{
+ Schema _schema;
+ DocBuilder _b;
+ std::vector<std::unique_ptr<FieldInverter> > _inverters;
+ test::OrderedDocumentInserter _inserter;
+
+ static Schema
+ makeSchema()
+ {
+ Schema schema;
+ schema.addIndexField(Schema::IndexField("f0", Schema::STRING));
+ schema.addIndexField(Schema::IndexField("f1", Schema::STRING));
+ schema.addIndexField(Schema::IndexField("f2", Schema::STRING,
+ Schema::ARRAY));
+ schema.addIndexField(Schema::IndexField("f3", Schema::STRING,
+ Schema::WEIGHTEDSET));
+ return schema;
+ }
+
+ Fixture()
+ : _schema(makeSchema()),
+ _b(_schema),
+ _inverters(),
+ _inserter()
+ {
+ for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields();
+ ++fieldId) {
+ _inverters.push_back(std::make_unique<FieldInverter>(_schema,
+ fieldId));
+ }
+ }
+
+ void
+ invertDocument(uint32_t docId, const Document &doc)
+ {
+ uint32_t fieldId = 0;
+ for (auto &inverter : _inverters) {
+ vespalib::stringref fieldName =
+ _schema.getIndexField(fieldId).getName();
+ inverter->invertField(docId, doc.getValue(fieldName));
+ ++fieldId;
+ }
+ }
+
+ void
+ pushDocuments()
+ {
+ uint32_t fieldId = 0;
+ for (auto &inverter : _inverters) {
+ _inserter.setFieldId(fieldId);
+ inverter->pushDocuments(_inserter);
+ ++fieldId;
+ }
+ }
+
+ void
+ removeDocument(uint32_t docId) {
+ for (auto &inverter : _inverters) {
+ inverter->removeDocument(docId);
+ }
+ }
+};
+
+
+TEST_F("requireThatFreshInsertWorks", Fixture)
+{
+ f.invertDocument(10, *makeDoc10(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,a=10,"
+ "w=b,a=10,"
+ "w=c,a=10,"
+ "w=d,a=10",
+ f._inserter.toStr());
+}
+
+
+TEST_F("requireThatMultipleDocsWork", Fixture)
+{
+ f.invertDocument(10, *makeDoc10(f._b));
+ f.invertDocument(11, *makeDoc11(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,a=10,a=11,"
+ "w=b,a=10,a=11,"
+ "w=c,a=10,w=d,a=10,"
+ "w=e,a=11,"
+ "w=f,a=11,"
+ "f=1,w=a,a=11,"
+ "w=g,a=11",
+ f._inserter.toStr());
+}
+
+
+TEST_F("requireThatRemoveWorks", Fixture)
+{
+ f._inverters[0]->remove("b", 10);
+ f._inverters[0]->remove("a", 10);
+ f._inverters[0]->remove("b", 11);
+ f._inverters[2]->remove("c", 12);
+ f._inverters[1]->remove("a", 10);
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,r=10,"
+ "w=b,r=10,r=11,"
+ "f=1,w=a,r=10,"
+ "f=2,w=c,r=12",
+ f._inserter.toStr());
+}
+
+
+TEST_F("requireThatReputWorks", Fixture)
+{
+ f.invertDocument(10, *makeDoc10(f._b));
+ f.invertDocument(10, *makeDoc11(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,a=10,"
+ "w=b,a=10,"
+ "w=e,a=10,"
+ "w=f,a=10,"
+ "f=1,w=a,a=10,"
+ "w=g,a=10",
+ f._inserter.toStr());
+}
+
+
+TEST_F("requireThatAbortPendingDocWorks", Fixture)
+{
+ Document::UP doc10 = makeDoc10(f._b);
+ Document::UP doc11 = makeDoc11(f._b);
+ Document::UP doc12 = makeDoc12(f._b);
+ Document::UP doc13 = makeDoc13(f._b);
+ Document::UP doc14 = makeDoc14(f._b);
+
+ f.invertDocument(10, *doc10);
+ f.invertDocument(11, *doc11);
+ f.removeDocument(10);
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,a=11,"
+ "w=b,a=11,"
+ "w=e,a=11,"
+ "w=f,a=11,"
+ "f=1,w=a,a=11,"
+ "w=g,a=11",
+ f._inserter.toStr());
+
+ f.invertDocument(10, *doc10);
+ f.invertDocument(11, *doc11);
+ f.invertDocument(12, *doc12);
+ f.invertDocument(13, *doc13);
+ f.invertDocument(14, *doc14);
+ f.removeDocument(11);
+ f.removeDocument(13);
+ f._inserter.reset();
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,a=10,"
+ "w=b,a=10,"
+ "w=c,a=10,"
+ "w=d,a=10,"
+ "w=doc12,a=12,"
+ "w=doc14,a=14,"
+ "w=h,a=12,"
+ "w=j,a=14",
+ f._inserter.toStr());
+
+ f.invertDocument(10, *doc10);
+ f.invertDocument(11, *doc11);
+ f.invertDocument(12, *doc12);
+ f.invertDocument(13, *doc13);
+ f.invertDocument(14, *doc14);
+ f.removeDocument(11);
+ f.removeDocument(12);
+ f.removeDocument(13);
+ f.removeDocument(14);
+ f._inserter.reset();
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,a=10,"
+ "w=b,a=10,"
+ "w=c,a=10,"
+ "w=d,a=10",
+ f._inserter.toStr());
+
+
+}
+
+
+TEST_F("requireThatMixOfAddAndRemoveWorks", Fixture)
+{
+ f._inverters[0]->remove("a", 11);
+ f._inverters[0]->remove("c", 9);
+ f._inverters[0]->remove("d", 10);
+ f._inverters[0]->remove("z", 12);
+ f.invertDocument(10, *makeDoc10(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,w=a,a=10,r=11,"
+ "w=b,a=10,"
+ "w=c,r=9,a=10,"
+ "w=d,r=10,a=10,"
+ "w=z,r=12",
+ f._inserter.toStr());
+}
+
+
+TEST_F("require that empty document can be inverted", Fixture)
+{
+ f.invertDocument(15, *makeDoc15(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("",
+ f._inserter.toStr());
+}
+
+TEST_F("require that multiple words at same position works", Fixture)
+{
+ f.invertDocument(16, *makeDoc16(f._b));
+ f._inserter.setVerbose();
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,"
+ "w=altbaz,a=16(e=0,w=1,l=5[2]),"
+ "w=alty,a=16(e=0,w=1,l=5[3]),"
+ "w=bar,a=16(e=0,w=1,l=5[1]),"
+ "w=baz,a=16(e=0,w=1,l=5[2]),"
+ "w=foo,a=16(e=0,w=1,l=5[0]),"
+ "w=y,a=16(e=0,w=1,l=5[3]),"
+ "w=z,a=16(e=0,w=1,l=5[4])",
+ f._inserter.toStr());
+}
+
+
+} // namespace memoryindex
+} // namespace search
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/memoryindex/memoryindex/.gitignore b/searchlib/src/tests/memoryindex/memoryindex/.gitignore
new file mode 100644
index 00000000000..174d0a494e2
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/memoryindex/.gitignore
@@ -0,0 +1,5 @@
+.depend
+Makefile
+memoryindex_test
+sourceselectorwriter_test
+searchlib_memoryindex_test_app
diff --git a/searchlib/src/tests/memoryindex/memoryindex/CMakeLists.txt b/searchlib/src/tests/memoryindex/memoryindex/CMakeLists.txt
new file mode 100644
index 00000000000..f25089e85bb
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/memoryindex/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_memoryindex_test_app
+ SOURCES
+ memoryindex_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_memoryindex_test_app COMMAND searchlib_memoryindex_test_app)
diff --git a/searchlib/src/tests/memoryindex/memoryindex/DESC b/searchlib/src/tests/memoryindex/memoryindex/DESC
new file mode 100644
index 00000000000..87b69181803
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/memoryindex/DESC
@@ -0,0 +1 @@
+memoryindex test. Take a look at memoryindex_test.cpp for details.
diff --git a/searchlib/src/tests/memoryindex/memoryindex/FILES b/searchlib/src/tests/memoryindex/memoryindex/FILES
new file mode 100644
index 00000000000..4faa7668dfc
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/memoryindex/FILES
@@ -0,0 +1 @@
+memoryindex_test.cpp
diff --git a/searchlib/src/tests/memoryindex/memoryindex/memoryindex_test.cpp b/searchlib/src/tests/memoryindex/memoryindex/memoryindex_test.cpp
new file mode 100644
index 00000000000..7d2afc151d5
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/memoryindex/memoryindex_test.cpp
@@ -0,0 +1,438 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("memoryindex_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/searchlib/memoryindex/memoryindex.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
+#include <vespa/searchlib/queryeval/fake_search.h>
+#include <vespa/searchlib/queryeval/fake_searchable.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/common/sequencedtaskexecutor.h>
+#include <vespa/searchlib/common/scheduletaskcallback.h>
+#include <vespa/vespalib/util/threadstackexecutor.h>
+
+using document::Document;
+using document::FieldValue;
+using search::query::Node;
+using search::query::SimplePhrase;
+using search::query::SimpleStringTerm;
+using search::makeLambdaTask;
+using search::ScheduleTaskCallback;
+using namespace search::fef;
+using namespace search::index;
+using namespace search::memoryindex;
+using namespace search::queryeval;
+
+//-----------------------------------------------------------------------------
+
+struct Setup {
+ Schema schema;
+ Setup &field(const std::string &name) {
+ schema.addIndexField(Schema::IndexField(name,
+ Schema::STRING));
+ return *this;
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+struct Index {
+ Schema schema;
+ vespalib::ThreadStackExecutor _executor;
+ search::SequencedTaskExecutor _invertThreads;
+ search::SequencedTaskExecutor _pushThreads;
+ MemoryIndex index;
+ DocBuilder builder;
+ uint32_t docid;
+ std::string currentField;
+
+ Index(const Setup &setup)
+ : schema(setup.schema),
+ _executor(1, 128 * 1024),
+ _invertThreads(2),
+ _pushThreads(2),
+ index(schema, _invertThreads, _pushThreads),
+ builder(schema),
+ docid(1),
+ currentField()
+ {
+ }
+ void closeField() {
+ if (!currentField.empty()) {
+ builder.endField();
+ currentField.clear();
+ }
+ }
+ Index &doc(uint32_t id) {
+ docid = id;
+ builder.startDocument(vespalib::make_string("doc::%u", id));
+ return *this;
+ }
+ Index &field(const std::string &name) {
+ closeField();
+ builder.startIndexField(name);
+ currentField = name;
+ return *this;
+ }
+ Index &add(const std::string &token) {
+ builder.addStr(token);
+ return *this;
+ }
+ void internalSyncCommit() {
+ vespalib::Gate gate;
+ index.commit(std::make_shared<ScheduleTaskCallback>
+ (_executor,
+ makeLambdaTask([&]() { gate.countDown(); })));
+ gate.await();
+ }
+ Document::UP commit() {
+ closeField();
+ Document::UP d = builder.endDocument();
+ index.insertDocument(docid, *d);
+ internalSyncCommit();
+ return d;
+ }
+ Index &remove(uint32_t id) {
+ index.removeDocument(id);
+ internalSyncCommit();
+ return *this;
+ }
+
+private:
+ Index(const Index &index);
+ Index &operator=(const Index &index);
+};
+
+//-----------------------------------------------------------------------------
+
+std::string toString(SearchIterator & search)
+{
+ std::ostringstream oss;
+ bool first = true;
+ for (search.seek(1); ! search.isAtEnd(); search.seek(search.getDocId() + 1)) {
+ if (!first) oss << ",";
+ oss << search.getDocId();
+ first = false;
+ }
+ return oss.str();
+}
+
+//-----------------------------------------------------------------------------
+
+const std::string title("title");
+const std::string body("body");
+const std::string foo("foo");
+const std::string bar("bar");
+
+//-----------------------------------------------------------------------------
+
+bool
+verifyResult(const FakeResult &expect,
+ Searchable &index,
+ std::string fieldName,
+ const Node &term)
+{
+ uint32_t fieldId = 0;
+ FakeRequestContext requestContext;
+
+ MatchDataLayout mdl;
+ TermFieldHandle handle = mdl.allocTermField(fieldId);
+ MatchData::UP match_data = mdl.createMatchData();
+
+ FieldSpec field(fieldName, fieldId, handle);
+ FieldSpecList fields;
+ fields.add(field);
+
+ Blueprint::UP result = index.createBlueprint(requestContext, fields, term);
+ if (!EXPECT_TRUE(result.get() != 0)) {
+ return false;
+ }
+ EXPECT_EQUAL(expect.inspect().size(), result->getState().estimate().estHits);
+ EXPECT_EQUAL(expect.inspect().empty(), result->getState().estimate().empty);
+
+ result->fetchPostings(true);
+ SearchIterator::UP search = result->createSearch(*match_data, true);
+ if (!EXPECT_TRUE(search.get() != 0)) {
+ return false;
+ }
+ TermFieldMatchData &tmd = *match_data->resolveTermField(handle);
+
+ FakeResult actual;
+ search->initFullRange();
+ for (search->seek(1); !search->isAtEnd(); search->seek(search->getDocId() + 1)) {
+ actual.doc(search->getDocId());
+ search->unpack(search->getDocId());
+ EXPECT_EQUAL(search->getDocId(), tmd.getDocId());
+ FieldPositionsIterator p = tmd.getIterator();
+ actual.len(p.getFieldLength());
+ for (; p.valid(); p.next()) {
+ actual.pos(p.getPosition());
+ }
+ }
+ return EXPECT_EQUAL(expect, actual);
+}
+
+namespace {
+SimpleStringTerm makeTerm(const std::string &term) {
+ return SimpleStringTerm(term, "field", 0, search::query::Weight(0));
+}
+
+Node::UP makePhrase(const std::string &term1, const std::string &term2) {
+ SimplePhrase * phrase = new SimplePhrase("field", 0, search::query::Weight(0));
+ Node::UP node(phrase);
+ phrase->append(Node::UP(new SimpleStringTerm(makeTerm(term1))));
+ phrase->append(Node::UP(new SimpleStringTerm(makeTerm(term2))));
+ return node;
+}
+} // namespace
+
+// tests basic usage; index some documents in docid order and perform
+// some searches.
+TEST("testIndexAndSearch")
+{
+ Index index(Setup().field(title).field(body));
+ index.doc(1)
+ .field(title).add(foo).add(bar).add(foo)
+ .field(body).add(foo).add(foo).add(foo)
+ .commit();
+ index.doc(2)
+ .field(title).add(bar).add(foo)
+ .field(body).add(bar).add(bar).add(bar).add(bar)
+ .commit();
+
+ // search for "foo" in "title"
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(3).pos(0).pos(2)
+ .doc(2).len(2).pos(1),
+ index.index, title, makeTerm(foo)));
+
+ // search for "bar" in "title"
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(3).pos(1)
+ .doc(2).len(2).pos(0),
+ index.index, title, makeTerm(bar)));
+
+ // search for "foo" in "body"
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(3).pos(0).pos(1).pos(2),
+ index.index, body, makeTerm(foo)));
+
+ // search for "bar" in "body"
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(2).len(4).pos(0).pos(1).pos(2).pos(3),
+ index.index, body, makeTerm(bar)));
+
+ // search for "bogus" in "title"
+ EXPECT_TRUE(verifyResult(FakeResult(),
+ index.index, title, makeTerm("bogus")));
+
+ // search for "foo" in "bogus"
+ EXPECT_TRUE(verifyResult(FakeResult(),
+ index.index, "bogus", makeTerm(foo)));
+
+ // search for "bar foo" in "title"
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(3).pos(1)
+ .doc(2).len(2).pos(0),
+ index.index, title, *makePhrase(bar, foo)));
+
+}
+
+// tests index update behavior; remove/update and unordered docid
+// indexing.
+TEST("require that documents can be removed and updated")
+{
+ Index index(Setup().field(title));
+
+ // add unordered
+ index.doc(3).field(title).add(foo).add(foo).add(foo).commit();
+ Document::UP doc1 = index.doc(1).field(title).add(foo).commit();
+ Document::UP doc2 = index.doc(2).field(title).add(foo).add(foo).commit();
+
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(1).pos(0)
+ .doc(2).len(2).pos(0).pos(1)
+ .doc(3).len(3).pos(0).pos(1).pos(2),
+ index.index, title, makeTerm(foo)));
+
+ // remove document
+ index.remove(2);
+
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(1).pos(0)
+ .doc(3).len(3).pos(0).pos(1).pos(2),
+ index.index, title, makeTerm(foo)));
+
+ // update document
+ index.doc(1).field(title).add(bar).add(foo).add(foo).commit();
+
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(3).pos(1).pos(2)
+ .doc(3).len(3).pos(0).pos(1).pos(2),
+ index.index, title, makeTerm(foo)));
+}
+
+// test the fake field source here, to make sure it acts similar to
+// the memory index field source.
+TEST("testFakeSearchable")
+{
+ Index index(Setup().field(title).field(body));
+
+ // setup fake field source with predefined results
+ FakeSearchable fakeSource;
+ fakeSource.addResult(title, foo,
+ FakeResult()
+ .doc(1).len(3).pos(0).pos(2)
+ .doc(2).len(2).pos(1));
+ fakeSource.addResult(title, bar,
+ FakeResult()
+ .doc(1).len(3).pos(1)
+ .doc(2).len(2).pos(0));
+ fakeSource.addResult(body, foo,
+ FakeResult()
+ .doc(1).len(3).pos(0).pos(1).pos(2));
+ fakeSource.addResult(body, bar,
+ FakeResult()
+ .doc(2).len(4).pos(0).pos(1).pos(2).pos(3));
+
+ // search for "foo" in "title"
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(3).pos(0).pos(2)
+ .doc(2).len(2).pos(1),
+ fakeSource, title, makeTerm(foo)));
+
+ // search for "bar" in "title"
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(3).pos(1)
+ .doc(2).len(2).pos(0),
+ fakeSource, title, makeTerm(bar)));
+
+ // search for "foo" in "body"
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(1).len(3).pos(0).pos(1).pos(2),
+ fakeSource, body, makeTerm(foo)));
+
+ // search for "bar" in "body"
+ EXPECT_TRUE(verifyResult(FakeResult()
+ .doc(2).len(4).pos(0).pos(1).pos(2).pos(3),
+ fakeSource, body, makeTerm(bar)));
+
+ // search for "bogus" in "title"
+ EXPECT_TRUE(verifyResult(FakeResult(),
+ fakeSource, title, makeTerm("bogus")));
+
+ // search for foo in "bogus"
+ EXPECT_TRUE(verifyResult(FakeResult(),
+ fakeSource, "bogus", makeTerm(foo)));
+}
+
+TEST("requireThatFrozenIndexIgnoresUpdates")
+{
+ Index index(Setup().field(title));
+ Document::UP doc1 = index.doc(1).field(title).add(foo).add(bar).commit();
+ FakeResult ffr = FakeResult().doc(1).len(2).pos(0);
+ EXPECT_TRUE(verifyResult(ffr, index.index, title, makeTerm(foo)));
+ EXPECT_TRUE(!index.index.isFrozen());
+ index.index.freeze();
+ EXPECT_TRUE(index.index.isFrozen());
+ index.doc(2).field(title).add(bar).add(foo).commit(); // not added
+ EXPECT_TRUE(verifyResult(ffr, index.index, title, makeTerm(foo)));
+ index.remove(1); // not removed
+ EXPECT_TRUE(verifyResult(ffr, index.index, title, makeTerm(foo)));
+}
+
+TEST("requireThatNumDocsAndDocIdLimitIsReturned")
+{
+ Index index(Setup().field(title));
+ EXPECT_EQUAL(0u, index.index.getNumDocs());
+ EXPECT_EQUAL(1u, index.index.getDocIdLimit());
+ Document::UP doc1 = index.doc(1).field(title).add(foo).commit();
+ EXPECT_EQUAL(1u, index.index.getNumDocs());
+ EXPECT_EQUAL(2u, index.index.getDocIdLimit());
+ Document::UP doc4 = index.doc(4).field(title).add(foo).commit();
+ EXPECT_EQUAL(2u, index.index.getNumDocs());
+ EXPECT_EQUAL(5u, index.index.getDocIdLimit());
+ Document::UP doc2 = index.doc(2).field(title).add(foo).commit();
+ EXPECT_EQUAL(3u, index.index.getNumDocs());
+ EXPECT_EQUAL(5u, index.index.getDocIdLimit());
+ // re-add doc4
+ index.doc(4).field(title).add(bar).commit();
+ EXPECT_EQUAL(3u, index.index.getNumDocs());
+ EXPECT_EQUAL(5u, index.index.getDocIdLimit());
+ // remove doc2
+ index.remove(2);
+ EXPECT_EQUAL(2u, index.index.getNumDocs());
+ EXPECT_EQUAL(5u, index.index.getDocIdLimit());
+}
+
+TEST("requireThatWeUnderstandTheMemoryFootprint")
+{
+ {
+ Setup setup;
+ Index index(setup);
+ EXPECT_EQUAL(0u, index.index.getStaticMemoryFootprint());
+ EXPECT_EQUAL(index.index.getStaticMemoryFootprint(), index.index.getMemoryUsage().allocatedBytes());
+ }
+ {
+ Index index(Setup().field("f1"));
+ EXPECT_EQUAL(118852u, index.index.getStaticMemoryFootprint());
+ EXPECT_EQUAL(index.index.getStaticMemoryFootprint(), index.index.getMemoryUsage().allocatedBytes());
+ }
+ {
+ Index index(Setup().field("f1").field("f2"));
+ EXPECT_EQUAL(2*118852u, index.index.getStaticMemoryFootprint());
+ EXPECT_EQUAL(index.index.getStaticMemoryFootprint(), index.index.getMemoryUsage().allocatedBytes());
+ }
+}
+
+TEST("requireThatNumWordsIsReturned")
+{
+ Index index(Setup().field(title));
+ EXPECT_EQUAL(0u, index.index.getNumWords());
+ index.doc(1).field(title).add(foo).commit();
+ EXPECT_EQUAL(1u, index.index.getNumWords());
+ index.doc(2).field(title).add(foo).add(bar).add(body).commit();
+ EXPECT_EQUAL(3u, index.index.getNumWords());
+}
+
+TEST("requireThatWeCanFakeBitVector")
+{
+ Index index(Setup().field(title));
+ index.doc(1).field(title).add(foo).commit();
+ index.doc(3).field(title).add(foo).commit();
+ {
+ uint32_t fieldId = 0;
+
+ MatchDataLayout mdl;
+ FakeRequestContext requestContext;
+ TermFieldHandle handle = mdl.allocTermField(fieldId);
+ MatchData::UP match_data = mdl.createMatchData();
+
+ // filter field
+ FieldSpec field(title, fieldId, handle, true);
+ FieldSpecList fields;
+ fields.add(field);
+
+ Searchable &searchable = index.index;
+ Blueprint::UP res = searchable.createBlueprint(requestContext, fields, makeTerm(foo));
+ EXPECT_TRUE(res.get() != NULL);
+
+ res->fetchPostings(true);
+ SearchIterator::UP search = res->createSearch(*match_data, true);
+ EXPECT_TRUE(search.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<BooleanMatchIteratorWrapper *>(search.get()) != NULL);
+ search->initFullRange();
+ EXPECT_EQUAL("1,3", toString(*search));
+ }
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore b/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore
new file mode 100644
index 00000000000..b2636fe5e81
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore
@@ -0,0 +1 @@
+searchlib_urlfieldinverter_test_app
diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt b/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt
new file mode 100644
index 00000000000..c5a0374fad9
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_urlfieldinverter_test_app
+ SOURCES
+ urlfieldinverter_test.cpp
+ DEPENDS
+ searchlib_test
+ searchlib
+)
+vespa_add_test(NAME searchlib_urlfieldinverter_test_app COMMAND searchlib_urlfieldinverter_test_app)
diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/DESC b/searchlib/src/tests/memoryindex/urlfieldinverter/DESC
new file mode 100644
index 00000000000..00115ada607
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/urlfieldinverter/DESC
@@ -0,0 +1 @@
+UrlField inverter test. Take a look at urlfieldinverter_test.cpp for details.
diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/FILES b/searchlib/src/tests/memoryindex/urlfieldinverter/FILES
new file mode 100644
index 00000000000..ac08b0a3e90
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/urlfieldinverter/FILES
@@ -0,0 +1 @@
+urlfieldinverter_test.cpp
diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp b/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp
new file mode 100644
index 00000000000..30b5883f153
--- /dev/null
+++ b/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp
@@ -0,0 +1,579 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/* -*- mode: C++; coding: utf-8; -*- */
+
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("urlfieldinverter_test");
+#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/memoryindex/fieldinverter.h>
+#include <vespa/searchlib/memoryindex/urlfieldinverter.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/searchlib/test/memoryindex/ordereddocumentinserter.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/document/repo/fixedtyperepo.h>
+
+namespace search
+{
+
+
+using document::Document;
+using index::DocBuilder;
+using index::DocTypeBuilder;
+using index::Schema;
+
+namespace memoryindex
+{
+
+namespace {
+const vespalib::string url = "url";
+}
+
+
+namespace
+{
+
+Document::UP
+makeDoc10Single(DocBuilder &b)
+{
+ b.startDocument("doc::10");
+ b.startIndexField("url").
+ startSubField("all").
+ addUrlTokenizedString("http://www.yahoo.com:81/fluke?ab=2#4").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.yahoo.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("81").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ addTermAnnotation("altfluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("4").
+ endSubField().
+ endField();
+ return b.endDocument();
+}
+
+
+Document::UP
+makeDoc10Array(DocBuilder &b)
+{
+ b.startDocument("doc::10");
+ b.startIndexField("url").
+ startElement(1).
+ startSubField("all").
+ addUrlTokenizedString("http://www.yahoo.com:82/fluke?ab=2#8").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.yahoo.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("82").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ addTermAnnotation("altfluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("8").
+ endSubField().
+ endElement().
+ startElement(1).
+ startSubField("all").
+ addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.flickr.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("82").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("9").
+ endSubField().
+ endElement().
+ endField();
+ return b.endDocument();
+}
+
+Document::UP
+makeDoc10WeightedSet(DocBuilder &b)
+{
+ b.startDocument("doc::10");
+ b.startIndexField("url").
+ startElement(4).
+ startSubField("all").
+ addUrlTokenizedString("http://www.yahoo.com:83/fluke?ab=2#12").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.yahoo.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("83").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ addTermAnnotation("altfluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("12").
+ endSubField().
+ endElement().
+ startElement(7).
+ startSubField("all").
+ addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13").
+ endSubField().
+ startSubField("scheme").
+ addUrlTokenizedString("http").
+ endSubField().
+ startSubField("host").
+ addUrlTokenizedString("www.flickr.com").
+ endSubField().
+ startSubField("port").
+ addUrlTokenizedString("85").
+ endSubField().
+ startSubField("path").
+ addUrlTokenizedString("/fluke").
+ endSubField().
+ startSubField("query").
+ addUrlTokenizedString("ab=2").
+ endSubField().
+ startSubField("fragment").
+ addUrlTokenizedString("13").
+ endSubField().
+ endElement().
+ endField();
+ return b.endDocument();
+}
+
+
+Document::UP
+makeDoc10Empty(DocBuilder &b)
+{
+ b.startDocument("doc::10");
+ return b.endDocument();
+}
+
+}
+
+struct Fixture
+{
+ Schema _schema;
+ DocBuilder _b;
+ std::vector<std::unique_ptr<FieldInverter> > _inverters;
+ std::unique_ptr<UrlFieldInverter> _urlInverter;
+ test::OrderedDocumentInserter _inserter;
+ DocTypeBuilder::SchemaIndexFields _schemaIndexFields;
+
+ static Schema
+ makeSchema(Schema::CollectionType collectionType)
+ {
+ Schema schema;
+ schema.addUriIndexFields(Schema::IndexField("url", Schema::STRING,
+ collectionType));
+ return schema;
+ }
+
+ Fixture(Schema::CollectionType collectionType)
+ : _schema(makeSchema(collectionType)),
+ _b(_schema),
+ _inverters(),
+ _urlInverter(),
+ _inserter(),
+ _schemaIndexFields()
+ {
+ _schemaIndexFields.setup(_schema);
+ for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields();
+ ++fieldId) {
+ _inverters.push_back(std::make_unique<FieldInverter>(_schema,
+ fieldId));
+ }
+ DocTypeBuilder::UriField &urlField =
+ _schemaIndexFields._uriFields.front();
+ _urlInverter = std::make_unique<UrlFieldInverter>
+ (collectionType,
+ _inverters[urlField._all].get(),
+ _inverters[urlField._scheme].get(),
+ _inverters[urlField._host].get(),
+ _inverters[urlField._port].get(),
+ _inverters[urlField._path].get(),
+ _inverters[urlField._query].get(),
+ _inverters[urlField._fragment].get(),
+ _inverters[urlField._hostname].get());
+ }
+
+ void
+ invertDocument(uint32_t docId, const Document &doc)
+ {
+ _urlInverter->invertField(docId, doc.getValue(url));
+ }
+
+ void
+ pushDocuments()
+ {
+ uint32_t fieldId = 0;
+ for (auto &inverter : _inverters) {
+ _inserter.setFieldId(fieldId);
+ inverter->pushDocuments(_inserter);
+ ++fieldId;
+ }
+ }
+
+ void
+ enableAnnotations()
+ {
+ _urlInverter->setUseAnnotations(true);
+ }
+};
+
+
+TEST_F("requireThatSingleUrlFieldWorks", Fixture(Schema::SINGLE))
+{
+ f.invertDocument(10, *makeDoc10Single(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,"
+ "w=2,a=10,"
+ "w=4,a=10,"
+ "w=81,a=10,"
+ "w=ab,a=10,"
+ "w=com,a=10,"
+ "w=fluke,a=10,"
+ "w=http,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=1,"
+ "w=http,a=10,"
+ "f=2,"
+ "w=com,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=3,"
+ "w=81,a=10,"
+ "f=4,"
+ "w=fluke,a=10,"
+ "f=5,"
+ "w=2,a=10,"
+ "w=ab,a=10,"
+ "f=6,"
+ "w=4,a=10,"
+ "f=7,"
+ "w=EnDhOsT,a=10,"
+ "w=StArThOsT,a=10,"
+ "w=com,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10",
+ f._inserter.toStr());
+}
+
+
+TEST_F("requireThatArrayUrlFieldWorks", Fixture(Schema::ARRAY))
+{
+ f.invertDocument(10, *makeDoc10Array(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,"
+ "w=2,a=10,"
+ "w=8,a=10,"
+ "w=82,a=10,"
+ "w=9,a=10,"
+ "w=ab,a=10,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=fluke,a=10,"
+ "w=http,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=1,"
+ "w=http,a=10,"
+ "f=2,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=3,"
+ "w=82,a=10,"
+ "f=4,"
+ "w=fluke,a=10,"
+ "f=5,"
+ "w=2,a=10,"
+ "w=ab,a=10,"
+ "f=6,"
+ "w=8,a=10,"
+ "w=9,a=10,"
+ "f=7,"
+ "w=EnDhOsT,a=10,"
+ "w=StArThOsT,a=10,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatWeightedSetFieldWorks", Fixture(Schema::WEIGHTEDSET))
+{
+ f.invertDocument(10, *makeDoc10WeightedSet(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,"
+ "w=12,a=10,"
+ "w=13,a=10,"
+ "w=2,a=10,"
+ "w=83,a=10,"
+ "w=85,a=10,"
+ "w=ab,a=10,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=fluke,a=10,"
+ "w=http,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=1,"
+ "w=http,a=10,"
+ "f=2,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=3,"
+ "w=83,a=10,"
+ "w=85,a=10,"
+ "f=4,"
+ "w=fluke,a=10,"
+ "f=5,"
+ "w=2,a=10,"
+ "w=ab,a=10,"
+ "f=6,"
+ "w=12,a=10,"
+ "w=13,a=10,"
+ "f=7,"
+ "w=EnDhOsT,a=10,"
+ "w=StArThOsT,a=10,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatAnnotatedSingleUrlFieldWorks", Fixture(Schema::SINGLE))
+{
+ f.enableAnnotations();
+ f.invertDocument(10, *makeDoc10Single(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,"
+ "w=2,a=10,"
+ "w=4,a=10,"
+ "w=81,a=10,"
+ "w=ab,a=10,"
+ "w=com,a=10,"
+ "w=fluke,a=10,"
+ "w=http,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=1,"
+ "w=http,a=10,"
+ "f=2,"
+ "w=com,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=3,"
+ "w=81,a=10,"
+ "f=4,"
+ "w=altfluke,a=10,"
+ "w=fluke,a=10,"
+ "f=5,"
+ "w=2,a=10,"
+ "w=ab,a=10,"
+ "f=6,"
+ "w=4,a=10,"
+ "f=7,"
+ "w=EnDhOsT,a=10,"
+ "w=StArThOsT,a=10,"
+ "w=com,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10",
+ f._inserter.toStr());
+}
+
+
+TEST_F("requireThatAnnotatedArrayUrlFieldWorks", Fixture(Schema::ARRAY))
+{
+ f.enableAnnotations();
+ f.invertDocument(10, *makeDoc10Array(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,"
+ "w=2,a=10,"
+ "w=8,a=10,"
+ "w=82,a=10,"
+ "w=9,a=10,"
+ "w=ab,a=10,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=fluke,a=10,"
+ "w=http,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=1,"
+ "w=http,a=10,"
+ "f=2,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10,"
+ "f=3,"
+ "w=82,a=10,"
+ "f=4,"
+ "w=altfluke,a=10,"
+ "w=fluke,a=10,"
+ "f=5,"
+ "w=2,a=10,"
+ "w=ab,a=10,"
+ "f=6,"
+ "w=8,a=10,"
+ "w=9,a=10,"
+ "f=7,"
+ "w=EnDhOsT,a=10,"
+ "w=StArThOsT,a=10,"
+ "w=com,a=10,"
+ "w=flickr,a=10,"
+ "w=www,a=10,"
+ "w=yahoo,a=10",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatAnnotatedWeightedSetFieldWorks",
+ Fixture(Schema::WEIGHTEDSET))
+{
+ f.enableAnnotations();
+ f._inserter.setVerbose();
+ f.invertDocument(10, *makeDoc10WeightedSet(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("f=0,"
+ "w=12,a=10(e=0,w=4,l=9[8]),"
+ "w=13,a=10(e=1,w=7,l=9[8]),"
+ "w=2,a=10(e=0,w=4,l=9[7],e=1,w=7,l=9[7]),"
+ "w=83,a=10(e=0,w=4,l=9[4]),"
+ "w=85,a=10(e=1,w=7,l=9[4]),"
+ "w=ab,a=10(e=0,w=4,l=9[6],e=1,w=7,l=9[6]),"
+ "w=com,a=10(e=0,w=4,l=9[3],e=1,w=7,l=9[3]),"
+ "w=flickr,a=10(e=1,w=7,l=9[2]),"
+ "w=fluke,a=10(e=0,w=4,l=9[5],e=1,w=7,l=9[5]),"
+ "w=http,a=10(e=0,w=4,l=9[0],e=1,w=7,l=9[0]),"
+ "w=www,a=10(e=0,w=4,l=9[1],e=1,w=7,l=9[1]),"
+ "w=yahoo,a=10(e=0,w=4,l=9[2]),"
+ "f=1,"
+ "w=http,a=10(e=0,w=4,l=1[0],e=1,w=7,l=1[0]),"
+ "f=2,"
+ "w=com,a=10(e=0,w=4,l=3[2],e=1,w=7,l=3[2]),"
+ "w=flickr,a=10(e=1,w=7,l=3[1]),"
+ "w=www,a=10(e=0,w=4,l=3[0],e=1,w=7,l=3[0]),"
+ "w=yahoo,a=10(e=0,w=4,l=3[1]),"
+ "f=3,"
+ "w=83,a=10(e=0,w=4,l=1[0]),"
+ "w=85,a=10(e=1,w=7,l=1[0]),"
+ "f=4,"
+ "w=altfluke,a=10(e=0,w=4,l=1[0]),"
+ "w=fluke,a=10(e=0,w=4,l=1[0],e=1,w=7,l=1[0]),"
+ "f=5,"
+ "w=2,a=10(e=0,w=4,l=2[1],e=1,w=7,l=2[1]),"
+ "w=ab,a=10(e=0,w=4,l=2[0],e=1,w=7,l=2[0]),"
+ "f=6,"
+ "w=12,a=10(e=0,w=4,l=1[0]),"
+ "w=13,a=10(e=1,w=7,l=1[0]),"
+ "f=7,"
+ "w=EnDhOsT,a=10(e=0,w=4,l=5[4],e=1,w=7,l=5[4]),"
+ "w=StArThOsT,a=10(e=0,w=4,l=5[0],e=1,w=7,l=5[0]),"
+ "w=com,a=10(e=0,w=4,l=5[3],e=1,w=7,l=5[3]),"
+ "w=flickr,a=10(e=1,w=7,l=5[2]),"
+ "w=www,a=10(e=0,w=4,l=5[1],e=1,w=7,l=5[1]),"
+ "w=yahoo,a=10(e=0,w=4,l=5[2])",
+ f._inserter.toStr());
+}
+
+
+TEST_F("requireThatEmptySingleFieldWorks", Fixture(Schema::SINGLE))
+{
+ f.invertDocument(10, *makeDoc10Empty(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatEmptyArrayFieldWorks", Fixture(Schema::ARRAY))
+{
+ f.invertDocument(10, *makeDoc10Empty(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatEmptyWeightedSetFieldWorks", Fixture(Schema::WEIGHTEDSET))
+{
+ f.invertDocument(10, *makeDoc10Empty(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatAnnotatedEmptySingleFieldWorks", Fixture(Schema::SINGLE))
+{
+ f.enableAnnotations();
+ f.invertDocument(10, *makeDoc10Empty(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatAnnotatedEmptyArrayFieldWorks", Fixture(Schema::ARRAY))
+{
+ f.enableAnnotations();
+ f.invertDocument(10, *makeDoc10Empty(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("",
+ f._inserter.toStr());
+}
+
+TEST_F("requireThatAnnotatedEmptyWeightedSetFieldWorks",
+ Fixture(Schema::WEIGHTEDSET))
+{
+ f.enableAnnotations();
+ f.invertDocument(10, *makeDoc10Empty(f._b));
+ f.pushDocuments();
+ EXPECT_EQUAL("",
+ f._inserter.toStr());
+}
+
+} // namespace memoryindex
+} // namespace search
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/memorytub/.gitignore b/searchlib/src/tests/memorytub/.gitignore
new file mode 100644
index 00000000000..d3185d605a1
--- /dev/null
+++ b/searchlib/src/tests/memorytub/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+test_memorytub
+searchlib_test_memorytub_app
diff --git a/searchlib/src/tests/memorytub/CMakeLists.txt b/searchlib/src/tests/memorytub/CMakeLists.txt
new file mode 100644
index 00000000000..a06fb4de8e2
--- /dev/null
+++ b/searchlib/src/tests/memorytub/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_test_memorytub_app
+ SOURCES
+ memorytub_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_test_memorytub_app COMMAND searchlib_test_memorytub_app)
diff --git a/searchlib/src/tests/memorytub/memorytub_test.cpp b/searchlib/src/tests/memorytub/memorytub_test.cpp
new file mode 100644
index 00000000000..348aee2fe7f
--- /dev/null
+++ b/searchlib/src/tests/memorytub/memorytub_test.cpp
@@ -0,0 +1,205 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("test_memorytub");
+
+#include <vespa/searchlib/util/memorytub.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+#define MEMTUB_ARRAY_ALLOC(tub, type, size) ((type *) tub->Alloc(sizeof(type) * size))
+
+
+enum {
+ SMALL_STRING = 100,
+ BIG_STRING = 100000,
+ SMALL_SMALL_ARRAY = 10,
+ BIG_SMALL_ARRAY = 1000
+};
+
+
+class Small
+{
+public:
+ char filler[SMALL_STRING];
+};
+
+
+class Big
+{
+public:
+ char filler[BIG_STRING];
+};
+
+
+class Test : public vespalib::TestApp
+{
+private:
+ search::util::SmallMemoryTub _tub;
+
+public:
+ bool Overlap(char *start1, char *end1,
+ char *start2, char *end2);
+ bool InTub(char *pt, char *end);
+ bool NotInTub(char *pt, char *end);
+ int Main();
+
+ Test(void)
+ : _tub()
+ {
+ }
+};
+
+
+bool
+Test::Overlap(char *start1, char *end1,
+ char *start2, char *end2)
+{
+ if (start1 == end1)
+ return false;
+
+ if (start2 == end2)
+ return false;
+
+ if (start2 >= start1 && start2 < end1)
+ return true;
+
+ if (end2 > start1 && end2 <= end1)
+ return true;
+
+ if (start1 >= start2 && start1 < end2)
+ return true;
+
+ if (end1 > start2 && end1 <= end2)
+ return true;
+
+ return false;
+}
+
+
+bool
+Test::InTub(char *pt, char *end)
+{
+ for (char *p = pt; p < end; p++)
+ if (!_tub.InTub(p))
+ return false;
+ return true;
+}
+
+
+bool
+Test::NotInTub(char *pt, char *end)
+{
+ for (char *p = pt; p < end; p++)
+ if (_tub.InTub(p))
+ return false;
+ return true;
+}
+
+
+int
+Test::Main()
+{
+ TEST_INIT("memorytub-test");
+
+ Small *small = NULL;
+ Big *big = NULL;
+ char *small_string = NULL;
+ char *big_string = NULL;
+ Small *small_small_array = NULL;
+ Small *big_small_array = NULL;
+
+ EXPECT_TRUE(!_tub.InTub(&_tub));
+
+ EXPECT_TRUE(sizeof(Small) < _tub.GetAllocLimit());
+ EXPECT_TRUE(sizeof(Big) > _tub.GetAllocLimit());
+ EXPECT_TRUE(SMALL_STRING < _tub.GetAllocLimit());
+ EXPECT_TRUE(BIG_STRING > _tub.GetAllocLimit());
+ EXPECT_TRUE(sizeof(Small) * SMALL_SMALL_ARRAY < _tub.GetAllocLimit());
+ EXPECT_TRUE(sizeof(Small) * BIG_SMALL_ARRAY > _tub.GetAllocLimit());
+
+ small = new (&_tub) Small();
+ EXPECT_TRUE(((void *)small) != ((void *)&_tub));
+ EXPECT_TRUE(InTub((char *)small, (char *)(small + 1)));
+
+ big = new (&_tub) Big();
+ EXPECT_TRUE(((void *)big) != ((void *)&_tub));
+ EXPECT_TRUE(InTub((char *)big, (char *)(big + 1)));
+
+ small_string = MEMTUB_ARRAY_ALLOC((&_tub), char, SMALL_STRING);
+ EXPECT_TRUE(((void *)small_string) != ((void *)&_tub));
+ EXPECT_TRUE(InTub(small_string, small_string + SMALL_STRING));
+
+ big_string = MEMTUB_ARRAY_ALLOC((&_tub), char, BIG_STRING);
+ EXPECT_TRUE(((void *)big_string) != ((void *)&_tub));
+ EXPECT_TRUE(InTub(big_string, big_string + BIG_STRING));
+
+ small_small_array = MEMTUB_ARRAY_ALLOC((&_tub), Small, SMALL_SMALL_ARRAY);
+ EXPECT_TRUE(((void *)small_small_array) != ((void *)&_tub));
+ EXPECT_TRUE(InTub((char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY)));
+
+ big_small_array = MEMTUB_ARRAY_ALLOC((&_tub), Small, BIG_SMALL_ARRAY);
+ EXPECT_TRUE(((void *)big_small_array) != ((void *)&_tub));
+ EXPECT_TRUE(InTub((char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY)));
+
+
+ EXPECT_TRUE(!Overlap((char *)small, (char *)(small + 1),
+ (char *)big, (char *)(big + 1)));
+
+ EXPECT_TRUE(!Overlap((char *)small, (char *)(small + 1),
+ small_string, small_string + SMALL_STRING));
+
+ EXPECT_TRUE(!Overlap((char *)small, (char *)(small + 1),
+ big_string, big_string + BIG_STRING));
+
+ EXPECT_TRUE(!Overlap((char *)small, (char *)(small + 1),
+ (char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY)));
+
+ EXPECT_TRUE(!Overlap((char *)small, (char *)(small + 1),
+ (char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY)));
+
+
+ EXPECT_TRUE(!Overlap((char *)big, (char *)(big + 1),
+ small_string, small_string + SMALL_STRING));
+
+ EXPECT_TRUE(!Overlap((char *)big, (char *)(big + 1),
+ big_string, big_string + BIG_STRING));
+
+ EXPECT_TRUE(!Overlap((char *)big, (char *)(big + 1),
+ (char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY)));
+
+ EXPECT_TRUE(!Overlap((char *)big, (char *)(big + 1),
+ (char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY)));
+
+
+ EXPECT_TRUE(!Overlap(small_string, small_string + SMALL_STRING,
+ big_string, big_string + BIG_STRING));
+
+ EXPECT_TRUE(!Overlap(small_string, small_string + SMALL_STRING,
+ (char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY)));
+
+ EXPECT_TRUE(!Overlap(small_string, small_string + SMALL_STRING,
+ (char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY)));
+
+
+ EXPECT_TRUE(!Overlap(big_string, big_string + BIG_STRING,
+ (char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY)));
+
+ EXPECT_TRUE(!Overlap(big_string, big_string + BIG_STRING,
+ (char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY)));
+
+
+ EXPECT_TRUE(!Overlap((char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY),
+ (char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY)));
+
+
+ _tub.Reset();
+ EXPECT_TRUE(NotInTub((char *)small, (char *)(small + 1)));
+ EXPECT_TRUE(NotInTub((char *)big, (char *)(big + 1)));
+ EXPECT_TRUE(NotInTub(small_string, small_string + SMALL_STRING));
+ EXPECT_TRUE(NotInTub(big_string, big_string + BIG_STRING));
+ EXPECT_TRUE(NotInTub((char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY)));
+ EXPECT_TRUE(NotInTub((char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY)));
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test)
diff --git a/searchlib/src/tests/nativerank/.gitignore b/searchlib/src/tests/nativerank/.gitignore
new file mode 100644
index 00000000000..6a3051df4e7
--- /dev/null
+++ b/searchlib/src/tests/nativerank/.gitignore
@@ -0,0 +1,2 @@
+/vlog3.txt
+searchlib_nativerank_test_app
diff --git a/searchlib/src/tests/nativerank/CMakeLists.txt b/searchlib/src/tests/nativerank/CMakeLists.txt
new file mode 100644
index 00000000000..dc9542a4988
--- /dev/null
+++ b/searchlib/src/tests/nativerank/CMakeLists.txt
@@ -0,0 +1,12 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_nativerank_test_app
+ SOURCES
+ nativerank.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(
+ NAME searchlib_nativerank_test_app
+ COMMAND searchlib_nativerank_test_app
+ ENVIRONMENT "VESPA_LOG_TARGET=file:vlog3.txt"
+)
diff --git a/searchlib/src/tests/nativerank/nativerank.cpp b/searchlib/src/tests/nativerank/nativerank.cpp
new file mode 100644
index 00000000000..398ca52a190
--- /dev/null
+++ b/searchlib/src/tests/nativerank/nativerank.cpp
@@ -0,0 +1,828 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("nativerank_test");
+
+#include <vespa/searchlib/features/nativeattributematchfeature.h>
+#include <vespa/searchlib/features/nativefieldmatchfeature.h>
+#include <vespa/searchlib/features/nativeproximityfeature.h>
+#include <vespa/searchlib/features/nativerankfeature.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/features/utils.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/fef/functiontablefactory.h>
+#include <vespa/searchlib/fef/test/plugin/setup.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/fef/test/ftlib.h>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+
+using namespace search::fef;
+using namespace search::fef::test;
+
+const double EPS = 10e-4;
+
+namespace search {
+namespace features {
+
+class Test : public FtTestApp {
+private:
+ BlueprintFactory _factory;
+
+ struct ANAM {
+ int32_t attributeWeight;
+ search::query::Weight termWeight;
+ uint32_t fieldWeight;
+ uint32_t docId;
+ ANAM(int32_t aw, uint32_t tw = 100, uint32_t fw = 100, uint32_t id = 1) :
+ attributeWeight(aw), termWeight(tw), fieldWeight(fw), docId(id) {}
+ vespalib::string toString() const {
+ return vespalib::make_string("aw(%d), tw(%u), fw(%u), id(%u)",
+ attributeWeight, termWeight.percent(), fieldWeight, docId);
+ }
+ };
+
+ bool assertNativeFieldMatch(feature_t score, const vespalib::string & query, const vespalib::string & field,
+ const Properties & props = Properties(), uint32_t docId = 1);
+ bool assertNativeAttributeMatch(feature_t score, const ANAM & t1, const ANAM & t2,
+ const Properties & props = Properties());
+ bool assertNativeProximity(feature_t score, const vespalib::string & query, const vespalib::string & field,
+ const Properties & props = Properties(), uint32_t docId = 1);
+ bool assertNativeRank(feature_t score, feature_t fieldMatchWeight, feature_t attributeMatchWeight, feature_t proximityWeight);
+
+ void testNativeFieldMatch();
+ void testNativeAttributeMatch();
+ void testNativeProximity();
+ void testNativeRank();
+
+public:
+ int Main();
+};
+
+void
+Test::testNativeFieldMatch()
+{
+ { // test blueprint
+ NativeFieldMatchBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "nativeFieldMatch"));
+
+ FtFeatureTest ft(_factory, "");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "qux");
+ ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(16)));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params.add("baz")); // field 'baz' not found
+ params.clear();
+
+ Properties & p = ft.getIndexEnv().getProperties();
+ p.add("nativeFieldMatch.firstOccurrenceTable", "a");
+ FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // table 'a' not found
+ p.clear().add("nativeFieldMatch.occurrenceCountTable", "b");
+ FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // table 'b' not found
+
+ const TableManager & tm = ft.getIndexEnv().getTableManager();
+ {
+ p.clear();
+ p.add("nativeRank.useTableNormalization", "false");
+ FT_SETUP_OK(pt, params, in, out.add("score"));
+ Blueprint::UP bp = pt.createInstance();
+ DummyDependencyHandler deps(*bp);
+ bp->setup(ft.getIndexEnv(), params);
+ const NativeFieldMatchParams & pas = (dynamic_cast<NativeFieldMatchBlueprint *>(bp.get()))->getParams();
+ ASSERT_TRUE(pas.vector.size() == 3);
+ EXPECT_TRUE(pas.vector[0].firstOccTable == tm.getTable("expdecay(8000,12.50)"));
+ EXPECT_TRUE(pas.vector[1].firstOccTable == tm.getTable("expdecay(8000,12.50)"));
+ EXPECT_TRUE(pas.vector[0].numOccTable == tm.getTable("loggrowth(1500,4000,19)"));
+ EXPECT_TRUE(pas.vector[1].numOccTable == tm.getTable("loggrowth(1500,4000,19)"));
+ EXPECT_EQUAL(pas.vector[0].maxTableSum, 1);
+ EXPECT_EQUAL(pas.vector[1].maxTableSum, 1);
+ EXPECT_EQUAL(pas.vector[0].fieldWeight, 100u);
+ EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u);
+ EXPECT_EQUAL(pas.vector[0].field, true);
+ EXPECT_EQUAL(pas.vector[1].field, true);
+ EXPECT_EQUAL(pas.vector[2].field, false);
+ EXPECT_EQUAL(pas.vector[0].averageFieldLength, NativeFieldMatchParam::NOT_DEF_FIELD_LENGTH);
+ EXPECT_EQUAL(pas.vector[1].averageFieldLength, NativeFieldMatchParam::NOT_DEF_FIELD_LENGTH);
+ EXPECT_EQUAL(pas.minFieldLength, 6u);
+ EXPECT_EQUAL(pas.vector[0].firstOccImportance, 0.5);
+ EXPECT_EQUAL(pas.vector[1].firstOccImportance, 0.5);
+ }
+ {
+ p.clear();
+ p.add("nativeFieldMatch.firstOccurrenceTable", "linear(0,1)");
+ p.add("nativeFieldMatch.firstOccurrenceTable.foo", "linear(0,2)");
+ p.add("nativeFieldMatch.occurrenceCountTable", "linear(0,3)");
+ p.add("nativeFieldMatch.occurrenceCountTable.baz", "linear(0,4)");
+ p.add("vespa.fieldweight.foo", "200");
+ p.add("vespa.fieldweight.baz", "0");
+ p.add("nativeFieldMatch.averageFieldLength.foo", "400");
+ p.add("nativeFieldMatch.averageFieldLength.baz", "500");
+ p.add("nativeFieldMatch.minFieldLength", "12");
+ p.add("nativeFieldMatch.firstOccurrenceImportance", "0.8");
+ p.add("nativeFieldMatch.firstOccurrenceImportance.foo", "0.6");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "baz");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "quux");
+ ft.getIndexEnv().getFields()[4].setFilter(true);
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("foo").add("baz").add("quux"), in, out);
+ Blueprint::UP bp = pt.createInstance();
+ DummyDependencyHandler deps(*bp);
+ bp->setup(ft.getIndexEnv(), params);
+ const NativeFieldMatchParams & pas = (dynamic_cast<NativeFieldMatchBlueprint *>(bp.get()))->getParams();
+ ASSERT_TRUE(pas.vector.size() == 5);
+ EXPECT_TRUE(pas.vector[0].firstOccTable == tm.getTable("linear(0,2)"));
+ EXPECT_TRUE(pas.vector[3].firstOccTable == tm.getTable("linear(0,1)"));
+ EXPECT_TRUE(pas.vector[0].numOccTable == tm.getTable("linear(0,3)"));
+ EXPECT_TRUE(pas.vector[3].numOccTable == tm.getTable("linear(0,4)"));
+ EXPECT_APPROX(pas.vector[0].maxTableSum, 2.4, 10e-6);
+ EXPECT_APPROX(pas.vector[3].maxTableSum, 1.6, 10e-6);
+ EXPECT_EQUAL(pas.vector[0].fieldWeight, 200u);
+ EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u);
+ EXPECT_EQUAL(pas.vector[3].fieldWeight, 0u);
+ EXPECT_EQUAL(pas.vector[0].field, true);
+ EXPECT_EQUAL(pas.vector[1].field, false); // only 'foo' and 'baz' are specified explicit
+ EXPECT_EQUAL(pas.vector[2].field, false); // 'qux' is an attribute
+ EXPECT_EQUAL(pas.vector[3].field, false); // fieldWeight == 0 -> do not consider this field
+ EXPECT_EQUAL(pas.vector[4].field, false); // filter field
+ EXPECT_EQUAL(pas.vector[0].averageFieldLength, 400u);
+ EXPECT_EQUAL(pas.vector[3].averageFieldLength, 500u);
+ EXPECT_EQUAL(pas.minFieldLength, 12u);
+ EXPECT_EQUAL(pas.vector[0].firstOccImportance, 0.6);
+ EXPECT_EQUAL(pas.vector[3].firstOccImportance, 0.8);
+ }
+ {
+ FtIndexEnvironment ie;
+ FT_DUMP(_factory, "nativeFieldMatch", ie, StringList().add("nativeFieldMatch"));
+ }
+ }
+
+ { // test helper functions
+ FtFeatureTest ft(_factory, "");
+ NativeFieldMatchParams p;
+ NativeFieldMatchParam f;
+ Table t;
+ t.add(0).add(1).add(2).add(3).add(4).add(5).add(6).add(7);
+ f.firstOccTable = &t;
+ f.numOccTable = &t;
+ p.vector.push_back(f);
+ NativeFieldMatchExecutor nfme(ft.getQueryEnv(), p);
+ EXPECT_EQUAL(p.minFieldLength, 6u);
+ EXPECT_EQUAL(nfme.getFirstOccBoost(0, 0, 4), 0);
+ EXPECT_EQUAL(nfme.getFirstOccBoost(0, 1, 4), 1);
+ EXPECT_EQUAL(nfme.getFirstOccBoost(0, 2, 4), 2);
+ EXPECT_EQUAL(nfme.getFirstOccBoost(0, 3, 4), 4);
+ EXPECT_EQUAL(nfme.getFirstOccBoost(0, 3, 6), 4);
+ EXPECT_EQUAL(nfme.getFirstOccBoost(0, 4, 6), 5);
+ EXPECT_EQUAL(nfme.getFirstOccBoost(0, 5, 6), 7);
+ EXPECT_EQUAL(nfme.getFirstOccBoost(0, 0, 12), 0);
+ EXPECT_EQUAL(nfme.getFirstOccBoost(0, 4, 12), 2);
+ EXPECT_EQUAL(nfme.getFirstOccBoost(0, 11, 12), 7);
+ EXPECT_EQUAL(nfme.getNumOccBoost(0, 0, 4), 0);
+ EXPECT_EQUAL(nfme.getNumOccBoost(0, 2, 4), 2);
+ EXPECT_EQUAL(nfme.getNumOccBoost(0, 4, 4), 4);
+ EXPECT_EQUAL(nfme.getNumOccBoost(0, 4, 6), 4);
+ EXPECT_EQUAL(nfme.getNumOccBoost(0, 5, 6), 5);
+ EXPECT_EQUAL(nfme.getNumOccBoost(0, 6, 6), 7);
+ EXPECT_EQUAL(nfme.getNumOccBoost(0, 0, 12), 0);
+ EXPECT_EQUAL(nfme.getNumOccBoost(0, 6, 12), 3);
+ EXPECT_EQUAL(nfme.getNumOccBoost(0, 12, 12), 7);
+ }
+ { // test params object
+ NativeFieldMatchParams p;
+ p.resize(1);
+ p.setMaxTableSums(0, 0); // test reset to 1
+ EXPECT_EQUAL(p.vector[0].maxTableSum, 1);
+ }
+
+ { // test executor
+ // 1 term
+ EXPECT_TRUE(assertNativeFieldMatch(55, "a", "a"));
+ EXPECT_TRUE(assertNativeFieldMatch(40, "a", "x x x a"));
+ EXPECT_TRUE(assertNativeFieldMatch(70, "a", "a a a a"));
+
+ // 2 terms
+ EXPECT_TRUE(assertNativeFieldMatch(27.5, "a b", "a"));
+ EXPECT_TRUE(assertNativeFieldMatch(52.5, "a b", "a b"));
+ EXPECT_TRUE(assertNativeFieldMatch(67.5, "a b", "a b a b a b a b"));
+
+ // 3 terms
+ EXPECT_TRUE(assertNativeFieldMatch(50, "a b c", "a b c"));
+
+ // 4 terms
+ EXPECT_TRUE(assertNativeFieldMatch(47.5, "a b c d", "a b c d"));
+
+ // change term weight
+ EXPECT_TRUE(assertNativeFieldMatch(45, "a b", "a x x x b"));
+ EXPECT_TRUE(assertNativeFieldMatch(50, "a!600 b!200", "a x x x b"));
+ EXPECT_TRUE(assertNativeFieldMatch(40, "a!200 b!600", "a x x x b"));
+ EXPECT_TRUE(assertNativeFieldMatch(55, "a!200 b!0", "a x x x b"));
+
+ // change significance
+ EXPECT_TRUE(assertNativeFieldMatch(46, "a%0.4 b%0.1", "x a x x x b"));
+ EXPECT_TRUE(assertNativeFieldMatch(34, "a%0.1 b%0.4", "x a x x x b"));
+
+ // change firstOccImportance
+ Properties p = Properties().add("nativeFieldMatch.firstOccurrenceImportance", "1");
+ EXPECT_TRUE(assertNativeFieldMatch(100, "a", "a", p));
+ p.clear().add("nativeFieldMatch.firstOccurrenceImportance", "0");
+ EXPECT_TRUE(assertNativeFieldMatch(10, "a", "a", p));
+
+ // use table normalization
+ p.clear().add("nativeRank.useTableNormalization", "true");
+ // norm factor = (100*0.5 + 60*0.5) = 80
+ EXPECT_TRUE(assertNativeFieldMatch(0.6875, "a", "a", p)); // (55/80)
+ EXPECT_TRUE(assertNativeFieldMatch(1, "a", "a a a a a a", p)); // (80/80)
+ p.add("nativeFieldMatch.firstOccurrenceTable", "linear(0,0)");
+ p.add("nativeFieldMatch.occurrenceCountTable", "linear(0,0)");
+ EXPECT_TRUE(assertNativeFieldMatch(0, "a", "a", p));
+
+ // use average field length
+ p.clear().add("nativeFieldMatch.averageFieldLength.foo", "12");
+ EXPECT_TRUE(assertNativeFieldMatch(50, "a", "a", p)); // firstOccBoost: 100, numOccBoost: 0
+ EXPECT_TRUE(assertNativeFieldMatch(45, "a", "x x x a", p)); // firstOccBoost: 90, numOccBoost: 0
+ EXPECT_TRUE(assertNativeFieldMatch(50, "a", "x x x a a", p)); // firstOccBoost: 90, numOccBoost: 10
+
+ // change field weight
+ p.clear().add("vespa.fieldweight.foo", "0");
+ EXPECT_TRUE(assertNativeFieldMatch(0, "a", "a", p));
+
+ // change docId to give 0 hits
+ EXPECT_TRUE(assertNativeFieldMatch(0, "a", "a", p.clear(), 2));
+ }
+}
+
+bool
+Test::assertNativeFieldMatch(feature_t score,
+ const vespalib::string & query,
+ const vespalib::string & field,
+ const Properties & props,
+ uint32_t docId)
+{
+ LOG(info, "assertNativeFieldMatch(%f, '%s', '%s')", score, query.c_str(), field.c_str());
+
+ // Setup feature test.
+ vespalib::string feature = "nativeFieldMatch";
+ FtFeatureTest ft(_factory, feature);
+
+ StringVectorMap index;
+ index["foo"] = FtUtil::tokenize(field);
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(256)));
+ ft.getIndexEnv().getProperties().add("nativeFieldMatch.firstOccurrenceTable",
+ vespalib::make_string("linear(-10,100,%zu)", std::max((size_t)6, index["foo"].size())));
+ ft.getIndexEnv().getProperties().add("nativeFieldMatch.occurrenceCountTable",
+ vespalib::make_string("linear(10,0,%zu)", std::max((size_t)6, index["foo"].size()) + 1));
+ ft.getIndexEnv().getProperties().add("nativeRank.useTableNormalization", "false"); // make it easier to test
+ ft.getIndexEnv().getProperties().import(props);
+ FT_SETUP(ft, FtUtil::toQuery(query), index, 1);
+
+ // Execute and compare results.
+ if (!EXPECT_TRUE(ft.execute(score, EPS, docId))) {
+ return false;
+ }
+ return true;
+}
+
+void
+Test::testNativeAttributeMatch()
+{
+ { // test blueprint
+ NativeAttributeMatchBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "nativeAttributeMatch"));
+
+ FtFeatureTest ft(_factory, "");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "qux");
+ ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(16)));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params.add("baz")); // field 'baz' not found
+ params.clear();
+
+ Properties & p = ft.getIndexEnv().getProperties();
+ p.add("nativeAttributeMatch.weightTable", "a");
+ FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // table 'a' not found
+
+// const TableManager & tm = ft.getIndexEnv().getTableManager();
+ {
+ p.clear();
+ p.add("nativeRank.useTableNormalization", "false");
+ FT_SETUP_OK(pt, params, in, out.add("score"));
+ Blueprint::UP bp = pt.createInstance();
+ DummyDependencyHandler deps(*bp);
+ bp->setup(ft.getIndexEnv(), params);
+ const NativeAttributeMatchParams & pas = (dynamic_cast<NativeAttributeMatchBlueprint *>(bp.get()))->getParams();
+ ASSERT_TRUE(pas.vector.size() == 3);
+// EXPECT_TRUE(pas.vector[0].weightBoostTable == tm.getTable("linear(1,0)"));
+// EXPECT_TRUE(pas.vector[1].weightBoostTable == tm.getTable("linear(1,0)"));
+ EXPECT_EQUAL(pas.vector[0].maxTableSum, 1);
+ EXPECT_EQUAL(pas.vector[1].maxTableSum, 1);
+ EXPECT_EQUAL(pas.vector[0].fieldWeight, 100u);
+ EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u);
+ EXPECT_EQUAL(pas.vector[0].field, true);
+ EXPECT_EQUAL(pas.vector[1].field, true);
+ EXPECT_EQUAL(pas.vector[2].field, false);
+ }
+ {
+ p.clear();
+ p.add("nativeAttributeMatch.weightTable", "linear(0,3)");
+ p.add("nativeAttributeMatch.weightTable.foo", "linear(0,2)");
+ p.add("vespa.fieldweight.foo", "200");
+ p.add("vespa.fieldweight.baz", "0");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "baz");
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("foo").add("baz"), in, out);
+ Blueprint::UP bp = pt.createInstance();
+ DummyDependencyHandler deps(*bp);
+ bp->setup(ft.getIndexEnv(), params);
+ const NativeAttributeMatchParams & pas = (dynamic_cast<NativeAttributeMatchBlueprint *>(bp.get()))->getParams();
+ ASSERT_TRUE(pas.vector.size() == 4);
+// EXPECT_TRUE(pas.vector[0].weightBoostTable == tm.getTable("linear(0,2)"));
+// EXPECT_TRUE(pas.vector[3].weightBoostTable == tm.getTable("linear(0,3)"));
+ EXPECT_EQUAL(pas.vector[0].maxTableSum, 2);
+ EXPECT_EQUAL(pas.vector[3].maxTableSum, 3);
+ EXPECT_EQUAL(pas.vector[0].fieldWeight, 200u);
+ EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u);
+ EXPECT_EQUAL(pas.vector[3].fieldWeight, 0u);
+ EXPECT_EQUAL(pas.vector[0].field, true);
+ EXPECT_EQUAL(pas.vector[1].field, false); // only 'foo' and 'baz' are specified explicit
+ EXPECT_EQUAL(pas.vector[2].field, false); // 'qux' is an index
+ EXPECT_EQUAL(pas.vector[3].field, false); // fieldWeight == 0 -> do not consider this field
+ }
+
+ {
+ FtIndexEnvironment ie;
+ FT_DUMP(_factory, "nativeAttributeMatch", ie, StringList().add("nativeAttributeMatch"));
+ }
+ }
+ { // test executor
+
+ EXPECT_TRUE(assertNativeAttributeMatch(15, ANAM(10), ANAM(10))); // basic
+ EXPECT_TRUE(assertNativeAttributeMatch(5, ANAM(-10), ANAM(10))); // negative weight
+ EXPECT_TRUE(assertNativeAttributeMatch(12.5, ANAM(10, 600), ANAM(10, 200))); // change term weights
+ EXPECT_TRUE(assertNativeAttributeMatch(10, ANAM(10, 600), ANAM(10, 0))); // change term weights
+ EXPECT_TRUE(assertNativeAttributeMatch(18, ANAM(10, 100, 200), ANAM(10, 100, 800))); // change field weights
+ EXPECT_TRUE(assertNativeAttributeMatch(0, ANAM(10, 100, 0), ANAM(10, 100, 0))); // change field weights
+ EXPECT_TRUE(assertNativeAttributeMatch(10, ANAM(10, 100, 100, 2), ANAM(10, 100, 100))); // change docId to give 1 hit
+ EXPECT_TRUE(assertNativeAttributeMatch(0, ANAM(10, 100, 100, 2), ANAM(10, 100, 100, 2))); // change docId to give 0 hits
+ { // use table normalization
+ // foo: max table value: 255
+ // bar: max table value: 510
+ Properties p = Properties().add("nativeRank.useTableNormalization", "true");
+ EXPECT_TRUE(assertNativeAttributeMatch(0.2941, ANAM(100), ANAM(50), p)); // (100/255 + 100/510)*0.5
+ EXPECT_TRUE(assertNativeAttributeMatch(1, ANAM(255), ANAM(255), p)); // (255/255 + 510/510)*0.5
+ p.add("nativeAttributeMatch.weightTable.foo", "linear(0,0)");
+ p.add("nativeAttributeMatch.weightTable.bar", "linear(0,0)");
+ EXPECT_TRUE(assertNativeAttributeMatch(0, ANAM(100), ANAM(50), p));
+ }
+ }
+}
+
+bool
+Test::assertNativeAttributeMatch(feature_t score, const ANAM & t1, const ANAM & t2, const Properties & props)
+{
+ LOG(info, "assertNativeAttributeMatch(%f, '%s', '%s')", score, t1.toString().c_str(), t2.toString().c_str());
+ vespalib::string feature = "nativeAttributeMatch";
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar");
+ ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(256)));
+ ft.getIndexEnv().getProperties().add("nativeAttributeMatch.weightTable.foo", "linear(1,0)");
+ ft.getIndexEnv().getProperties().add("nativeAttributeMatch.weightTable.bar", "linear(2,0)");
+ ft.getIndexEnv().getProperties().add("vespa.fieldweight.foo", vespalib::make_string("%u", t1.fieldWeight));
+ ft.getIndexEnv().getProperties().add("vespa.fieldweight.bar", vespalib::make_string("%u", t2.fieldWeight));
+ ft.getIndexEnv().getProperties().add("nativeRank.useTableNormalization", "false"); // make it easier to test
+ ft.getIndexEnv().getProperties().import(props);
+ if (!EXPECT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("foo") != NULL)) { // t1
+ return false;
+ }
+ if (!EXPECT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("bar") != NULL)) { // t2
+ return false;
+ }
+ ft.getQueryEnv().getTerms()[0].setWeight(t1.termWeight);
+ ft.getQueryEnv().getTerms()[1].setWeight(t2.termWeight);
+ ASSERT_TRUE(ft.setup());
+
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ {
+ TermFieldMatchData *tfmd = mdb->getTermFieldMatchData(0, 0);
+ tfmd->reset(t1.docId);
+ TermFieldMatchDataPosition pos;
+ pos.setElementWeight(t1.attributeWeight);
+ tfmd->appendPosition(pos);
+ }
+ {
+ TermFieldMatchData *tfmd = mdb->getTermFieldMatchData(1, 1);
+ tfmd->reset(t2.docId);
+ TermFieldMatchDataPosition pos;
+ pos.setElementWeight(t2.attributeWeight);
+ tfmd->appendPosition(pos);
+ }
+ if (!EXPECT_TRUE(ft.execute(score, EPS))) {
+ return false;
+ }
+ return true;
+}
+
+void
+Test::testNativeProximity()
+{
+ { // test blueprint
+ NativeProximityBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "nativeProximity"));
+
+ FtFeatureTest ft(_factory, "");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "qux");
+ ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(16)));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params.add("baz")); // field 'baz' not found
+ params.clear();
+
+ Properties & p = ft.getIndexEnv().getProperties();
+ p.add("nativeProximity.proximityTable", "a");
+ FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // table 'a' not found
+ p.clear().add("nativeProximity.reverseProximityTable", "b");
+ FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // table 'b' not found
+
+ const TableManager & tm = ft.getIndexEnv().getTableManager();
+ {
+ p.clear();
+ p.add("nativeRank.useTableNormalization", "false");
+ FT_SETUP_OK(pt, params, in, out.add("score"));
+ Blueprint::UP bp = pt.createInstance();
+ DummyDependencyHandler deps(*bp);
+ bp->setup(ft.getIndexEnv(), params);
+ const NativeProximityParams & pas = (dynamic_cast<NativeProximityBlueprint *>(bp.get()))->getParams();
+ ASSERT_TRUE(pas.vector.size() == 3);
+ EXPECT_TRUE(pas.vector[0].proximityTable == tm.getTable("expdecay(500,3)"));
+ EXPECT_TRUE(pas.vector[1].proximityTable == tm.getTable("expdecay(500,3)"));
+ EXPECT_TRUE(pas.vector[0].revProximityTable == tm.getTable("expdecay(400,3)"));
+ EXPECT_TRUE(pas.vector[1].revProximityTable == tm.getTable("expdecay(400,3)"));
+ EXPECT_EQUAL(pas.vector[0].maxTableSum, 1);
+ EXPECT_EQUAL(pas.vector[1].maxTableSum, 1);
+ EXPECT_EQUAL(pas.vector[0].fieldWeight, 100u);
+ EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u);
+ EXPECT_EQUAL(pas.vector[0].field, true);
+ EXPECT_EQUAL(pas.vector[1].field, true);
+ EXPECT_EQUAL(pas.vector[2].field, false);
+ EXPECT_EQUAL(pas.slidingWindow, 4u);
+ EXPECT_EQUAL(pas.vector[0].proximityImportance, 0.5);
+ EXPECT_EQUAL(pas.vector[1].proximityImportance, 0.5);
+ }
+ {
+ p.clear();
+ p.add("nativeProximity.proximityTable", "linear(0,1)");
+ p.add("nativeProximity.proximityTable.foo", "linear(0,2)");
+ p.add("nativeProximity.reverseProximityTable", "linear(0,3)");
+ p.add("nativeProximity.reverseProximityTable.baz", "linear(0,4)");
+ p.add("vespa.fieldweight.foo", "200");
+ p.add("vespa.fieldweight.baz", "0");
+ p.add("nativeProximity.slidingWindowSize", "2");
+ p.add("nativeProximity.proximityImportance", "0.8");
+ p.add("nativeProximity.proximityImportance.foo", "0.6");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "baz");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "quux");
+ ft.getIndexEnv().getFields()[4].setFilter(true);
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("foo").add("baz"), in, out);
+ Blueprint::UP bp = pt.createInstance();
+ DummyDependencyHandler deps(*bp);
+ bp->setup(ft.getIndexEnv(), params);
+ const NativeProximityParams & pas = (dynamic_cast<NativeProximityBlueprint *>(bp.get()))->getParams();
+ ASSERT_TRUE(pas.vector.size() == 5);
+ EXPECT_TRUE(pas.vector[0].proximityTable == tm.getTable("linear(0,2)"));
+ EXPECT_TRUE(pas.vector[3].proximityTable == tm.getTable("linear(0,1)"));
+ EXPECT_TRUE(pas.vector[0].revProximityTable == tm.getTable("linear(0,3)"));
+ EXPECT_TRUE(pas.vector[3].revProximityTable == tm.getTable("linear(0,4)"));
+ EXPECT_APPROX(pas.vector[0].maxTableSum, 2.4, 10e-6);
+ EXPECT_APPROX(pas.vector[3].maxTableSum, 1.6, 10e-6);
+ EXPECT_EQUAL(pas.vector[0].fieldWeight, 200u);
+ EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u);
+ EXPECT_EQUAL(pas.vector[3].fieldWeight, 0u);
+ EXPECT_EQUAL(pas.vector[0].field, true);
+ EXPECT_EQUAL(pas.vector[1].field, false); // only 'foo' and 'baz' are specified explicit
+ EXPECT_EQUAL(pas.vector[2].field, false); // 'qux' is an attribute
+ EXPECT_EQUAL(pas.vector[3].field, false); // fieldWeight == 0 -> do not consider this field
+ EXPECT_EQUAL(pas.vector[4].field, false); // filter field
+ EXPECT_EQUAL(pas.slidingWindow, 2u);
+ EXPECT_EQUAL(pas.vector[0].proximityImportance, 0.6);
+ EXPECT_EQUAL(pas.vector[3].proximityImportance, 0.8);
+ }
+
+ {
+ FtIndexEnvironment ie;
+ FT_DUMP(_factory, "nativeProximity", ie, StringList().add("nativeProximity"));
+ }
+ }
+
+ { // test NativeProximityExecutor::generateTermPairs()
+ QueryTermVector terms;
+ SimpleTermData a, b, c;
+ a.setWeight(search::query::Weight(100));
+ a.setUniqueId(0);
+ b.setWeight(search::query::Weight(200));
+ b.setUniqueId(1);
+ c.setWeight(search::query::Weight(300));
+ c.setUniqueId(2);
+ terms.push_back(QueryTerm(&a, 0.1));
+ terms.push_back(QueryTerm(&b, 0.2));
+ terms.push_back(QueryTerm(&c, 0.3));
+ FtFeatureTest ft(_factory, "nativeProximity");
+ FtQueryEnvironment & env = ft.getQueryEnv();
+ env.getProperties().add("vespa.term.1.connexity", "0");
+ env.getProperties().add("vespa.term.1.connexity", "0.8");
+ env.getProperties().add("vespa.term.2.connexity", "1");
+ env.getProperties().add("vespa.term.2.connexity", "0.6");
+ {
+ NativeProximityExecutor::FieldSetup setup(0);
+ NativeProximityExecutor::TermPairVector & pairs = setup.pairs;
+ NativeProximityExecutor::generateTermPairs(env, terms, 0, setup);
+ EXPECT_EQUAL(pairs.size(), 0u);
+ NativeProximityExecutor::generateTermPairs(env, terms, 1, setup);
+ EXPECT_EQUAL(pairs.size(), 0u);
+ NativeProximityExecutor::generateTermPairs(env, terms, 2, setup);
+ EXPECT_EQUAL(pairs.size(), 2u);
+ EXPECT_TRUE(pairs[0].first.termData() == &a);
+ EXPECT_TRUE(pairs[0].second.termData() == &b);
+ EXPECT_EQUAL(pairs[0].connectedness, 0.8);
+ EXPECT_TRUE(pairs[1].first.termData() == &b);
+ EXPECT_TRUE(pairs[1].second.termData() == &c);
+ EXPECT_EQUAL(pairs[1].connectedness, 0.6);
+ EXPECT_EQUAL(setup.divisor, 118); // (10 + 40)*0.8 + (40 + 90)*0.6
+
+ pairs.clear();
+ setup.divisor = 0;
+
+ NativeProximityExecutor::generateTermPairs(env, terms, 3, setup);
+ EXPECT_EQUAL(pairs.size(), 3u);
+ EXPECT_TRUE(pairs[0].first.termData() == &a);
+ EXPECT_TRUE(pairs[0].second.termData() == &b);
+ EXPECT_EQUAL(pairs[0].connectedness, 0.8);
+ EXPECT_TRUE(pairs[1].first.termData() == &a);
+ EXPECT_TRUE(pairs[1].second.termData() == &c);
+ EXPECT_EQUAL(pairs[1].connectedness, 0.3);
+ EXPECT_TRUE(pairs[2].first.termData() == &b);
+ EXPECT_TRUE(pairs[2].second.termData() == &c);
+ EXPECT_EQUAL(pairs[2].connectedness, 0.6);
+ EXPECT_EQUAL(setup.divisor, 148); // (10 + 40)*0.8 + (10 + 90)*0.3 + (40 + 90)*0.6
+
+ pairs.clear();
+ setup.divisor = 0;
+ a.setWeight(search::query::Weight(0));
+ b.setWeight(search::query::Weight(0));
+
+ // test that (ab) is filtered away
+ NativeProximityExecutor::generateTermPairs(env, terms, 2, setup);
+ EXPECT_EQUAL(pairs.size(), 1u);
+ EXPECT_TRUE(pairs[0].first.termData() == &b);
+ EXPECT_TRUE(pairs[0].second.termData() == &c);
+ EXPECT_EQUAL(pairs[0].connectedness, 0.6);
+ }
+ }
+
+ { // test executor
+ // 1 pair (only forward)
+ EXPECT_TRUE(assertNativeProximity(0, "a", "a"));
+ EXPECT_TRUE(assertNativeProximity(0, "a b", "a"));
+ EXPECT_TRUE(assertNativeProximity(5, "a b", "a b"));
+ EXPECT_TRUE(assertNativeProximity(1, "a b", "a x x x x b"));
+ EXPECT_TRUE(assertNativeProximity(0, "a b", "a x x x x x b"));
+ EXPECT_TRUE(assertNativeProximity(0, "a b", "a x x x x x x b"));
+ EXPECT_TRUE(assertNativeProximity(5, "a b", "a x x a x a a b"));
+ EXPECT_TRUE(assertNativeProximity(5, "b a", "a x x a x a a b"));
+
+ // 1 pair (both forward and backward)
+ EXPECT_TRUE(assertNativeProximity(10, "a b", "a b a"));
+ EXPECT_TRUE(assertNativeProximity(10, "b a", "a b a"));
+ EXPECT_TRUE(assertNativeProximity(10, "a a", "a a")); // term distance 1
+ EXPECT_TRUE(assertNativeProximity(6, "a a", "a x x a")); // term distance 3
+ EXPECT_TRUE(assertNativeProximity(9, "a b", "a x x x x x b x x x x a x x x b x x a x b a"));
+ EXPECT_TRUE(assertNativeProximity(9, "b a", "a x x x x x b x x x x a x x x b x x a x b a"));
+
+ // 2 pairs ((ab),(bc))
+ EXPECT_TRUE(assertNativeProximity(5, "a b c", "a b c"));
+ EXPECT_TRUE(assertNativeProximity(10, "a b c", "a b c b a"));
+
+ // change weight
+ EXPECT_TRUE(assertNativeProximity(4, "a b c", "a b x x c"));
+ EXPECT_TRUE(assertNativeProximity(4.2, "a!200 b c", "a b x x c"));
+ EXPECT_TRUE(assertNativeProximity(3.8, "a b c!200", "a b x x c"));
+ EXPECT_TRUE(assertNativeProximity(4.333, "a b c!0", "a b x x c")); // ((100+100)*5 + (100+0)*3) / 300
+ EXPECT_TRUE(assertNativeProximity(5, "a b!0 c!0", "a b x x c")); // ((100+0)*5 + (0+0)*3) / 100
+ EXPECT_TRUE(assertNativeProximity(0, "a!0 b!0", "a b"));
+
+ // change significance
+ EXPECT_TRUE(assertNativeProximity(4.692, "a%1 b%0.1 c%0.1", "a b x x c"));
+ EXPECT_TRUE(assertNativeProximity(3.308, "a%0.1 b%0.1 c%1", "a b x x c"));
+
+ // change connectedness
+ EXPECT_TRUE(assertNativeProximity(4, "a 1:b 1:c", "a b x x c"));
+ EXPECT_TRUE(assertNativeProximity(3.667, "a 0.5:b 1:c", "a b x x c")); // (5*0.5 + 3*1) / (0.5 + 1)
+
+ // change proximityImportance
+ Properties p = Properties().add("nativeProximity.proximityImportance", "1");
+ EXPECT_TRUE(assertNativeProximity(10, "a b", "a b x x x a", p));
+ p.clear().add("nativeProximity.proximityImportance", "0");
+ EXPECT_TRUE(assertNativeProximity(4, "a b", "a b x x x a", p));
+
+ // use table normalization
+ p.clear().add("nativeRank.useTableNormalization", "true");
+ // norm factor = (10*0.5 + 10*0.5) = 10
+ EXPECT_TRUE(assertNativeProximity(0.5, "a b", "a b", p));
+ EXPECT_TRUE(assertNativeProximity(0.5, "a b c", "a b c", p));
+ EXPECT_TRUE(assertNativeProximity(1, "a b", "a b a", p));
+ EXPECT_TRUE(assertNativeProximity(1, "a b c", "a b c b a", p));
+ p.add("nativeProximity.proximityTable", "linear(0,0)");
+ p.add("nativeProximity.reverseProximityTable", "linear(0,0)");
+ EXPECT_TRUE(assertNativeProximity(0, "a b", "a b", p));
+
+ // change field weight
+ p.clear().add("vespa.fieldweight.foo", "0");
+ EXPECT_TRUE(assertNativeProximity(0, "a b", "a b", p));
+
+ // change docId to give 0 hits
+ EXPECT_TRUE(assertNativeProximity(0, "a b", "a b", p.clear(), 2));
+ }
+}
+
+bool
+Test::assertNativeProximity(feature_t score,
+ const vespalib::string & query,
+ const vespalib::string & field,
+ const Properties & props,
+ uint32_t docId)
+{
+ LOG(info, "assertNativeProximity(%f, '%s', '%s')", score, query.c_str(), field.c_str());
+
+ // Setup feature test.
+ vespalib::string feature = "nativeProximity";
+ FtFeatureTest ft(_factory, feature);
+
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(6)));
+ ft.getIndexEnv().getProperties().add("nativeProximity.proximityTable", "linear(-2,10)");
+ ft.getIndexEnv().getProperties().add("nativeProximity.reverseProximityTable", "linear(-2,10)");
+ ft.getIndexEnv().getProperties().add("nativeProximity.slidingWindowSize", "2");
+ ft.getIndexEnv().getProperties().add("nativeRank.useTableNormalization", "false"); // make it easier to test
+ ft.getIndexEnv().getProperties().import(props);
+ StringVectorMap index;
+ index["foo"] = FtUtil::tokenize(field);
+ FT_SETUP(ft, FtUtil::toQuery(query), index, 1);
+
+ // Execute and compare results.
+ if (!EXPECT_TRUE(ft.execute(score, EPS, docId))) {
+ return false;
+ }
+ return true;
+}
+
+void
+Test::testNativeRank()
+{
+ { // test blueprint
+ NativeRankBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "nativeRank"));
+
+ FtFeatureTest ft(_factory, "");
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params.add("foo")); // field 'foo' not found
+ params.clear();
+
+ {
+ FT_SETUP_OK(pt, params, in.add("nativeFieldMatch").add("nativeProximity").add("nativeAttributeMatch"),
+ out.add("score"));
+ Blueprint::UP bp = pt.createInstance();
+ DummyDependencyHandler deps(*bp);
+ bp->setup(ft.getIndexEnv(), params);
+ const NativeRankParams & pas = (dynamic_cast<NativeRankBlueprint *>(bp.get()))->getParams();
+ EXPECT_EQUAL(pas.fieldMatchWeight, 100u);
+ EXPECT_EQUAL(pas.attributeMatchWeight, 100u);
+ EXPECT_EQUAL(pas.proximityWeight, 25u);
+ }
+ {
+ Properties & p = ft.getIndexEnv().getProperties();
+ p.add("nativeRank.useTableNormalization", "false");
+ Blueprint::UP bp = pt.createInstance();
+ DummyDependencyHandler deps(*bp);
+ bp->setup(ft.getIndexEnv(), params);
+ const NativeRankParams & pas = (dynamic_cast<NativeRankBlueprint *>(bp.get()))->getParams();
+ EXPECT_EQUAL(pas.proximityWeight, 100u);
+ p.clear();
+ }
+ {
+ Properties & p = ft.getIndexEnv().getProperties();
+ p.add("nativeRank.fieldMatchWeight", "200");
+ p.add("nativeRank.attributeMatchWeight", "300");
+ p.add("nativeRank.proximityWeight", "400");
+ FT_SETUP_OK(pt, params, in, out);
+ Blueprint::UP bp = pt.createInstance();
+ DummyDependencyHandler deps(*bp);
+ bp->setup(ft.getIndexEnv(), params);
+ const NativeRankParams & pas = (dynamic_cast<NativeRankBlueprint *>(bp.get()))->getParams();
+ EXPECT_EQUAL(pas.fieldMatchWeight, 200u);
+ EXPECT_EQUAL(pas.attributeMatchWeight, 300u);
+ EXPECT_EQUAL(pas.proximityWeight, 400u);
+ }
+
+ FT_DUMP(_factory, "nativeRank", ft.getIndexEnv(), StringList().add("nativeRank"));
+
+ { // test optimizations when weight == 0
+ Properties & p = ft.getIndexEnv().getProperties();
+ p.clear();
+ p.add("nativeRank.fieldMatchWeight", "0");
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params,
+ in.clear().add("value(0)").add("nativeProximity").add("nativeAttributeMatch"), out);
+ p.add("nativeRank.proximityWeight", "0");
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params,
+ in.clear().add("value(0)").add("value(0)").add("nativeAttributeMatch"), out);
+ p.add("nativeRank.attributeMatchWeight", "0");
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params, in.clear().add("value(0)").add("value(0)").add("value(0)"), out);
+ }
+ { // nativeRank for a subset of fields
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "baz");
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("foo").add("bar"), in, out);
+ ft.getIndexEnv().getProperties().clear();
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params,
+ in.clear().add("nativeFieldMatch(foo)").add("nativeProximity(foo)").add("nativeAttributeMatch(bar)"), out);
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("foo").add("baz"),
+ in.clear().add("nativeFieldMatch(foo,baz)").add("nativeProximity(foo,baz)").add("value(0)"), out);
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("bar"),
+ in.clear().add("value(0)").add("value(0)").add("nativeAttributeMatch(bar)"), out);
+ }
+ }
+
+ { // test executor
+ assertNativeRank(60, 1, 1, 1);
+ assertNativeRank(72, 3, 1, 1);
+ assertNativeRank(37.5, 0, 1, 3);
+ }
+}
+
+bool
+Test::assertNativeRank(feature_t score,
+ feature_t fieldMatchWeight,
+ feature_t attributeMatchWeight,
+ feature_t proximityWeight)
+{
+ LOG(info, "assertNativeRank(%f, %f, %f, %f)", score, fieldMatchWeight, attributeMatchWeight, proximityWeight);
+
+ // Setup feature test.
+ vespalib::string feature = "nativeRank";
+ FtFeatureTest ft(_factory, feature);
+
+ ft.getIndexEnv().getProperties().add("nativeRank.fieldMatchWeight",
+ vespalib::make_string("%f", fieldMatchWeight));
+ ft.getIndexEnv().getProperties().add("nativeRank.attributeMatchWeight",
+ vespalib::make_string("%f", attributeMatchWeight));
+ ft.getIndexEnv().getProperties().add("nativeRank.proximityWeight",
+ vespalib::make_string("%f", proximityWeight));
+
+ ft.getOverrides().add("nativeFieldMatch", "90");
+ ft.getOverrides().add("nativeAttributeMatch", "60");
+ ft.getOverrides().add("nativeProximity", "30");
+
+ if (!EXPECT_TRUE(ft.setup())) {
+ return false;
+ }
+
+ // Execute and compare results.
+ if (!EXPECT_TRUE(ft.execute(score, EPS))) {
+ return false;
+ }
+ return true;
+}
+
+
+
+int
+Test::Main()
+{
+ TEST_INIT("nativerank_test");
+
+ // Configure factory with all known blueprints.
+ setup_fef_test_plugin(_factory);
+ setup_search_features(_factory);
+
+ testNativeFieldMatch();
+ testNativeAttributeMatch();
+ testNativeProximity();
+ testNativeRank();
+
+ TEST_DONE();
+ return 0;
+}
+
+}
+}
+
+TEST_APPHOOK(search::features::Test);
+
diff --git a/searchlib/src/tests/nearsearch/.gitignore b/searchlib/src/tests/nearsearch/.gitignore
new file mode 100644
index 00000000000..c6c72b1cd87
--- /dev/null
+++ b/searchlib/src/tests/nearsearch/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+nearsearch_test
+searchlib_nearsearch_test_app
diff --git a/searchlib/src/tests/nearsearch/CMakeLists.txt b/searchlib/src/tests/nearsearch/CMakeLists.txt
new file mode 100644
index 00000000000..a526a059a3d
--- /dev/null
+++ b/searchlib/src/tests/nearsearch/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_nearsearch_test_app
+ SOURCES
+ nearsearch_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_nearsearch_test_app COMMAND searchlib_nearsearch_test_app)
diff --git a/searchlib/src/tests/nearsearch/DESC b/searchlib/src/tests/nearsearch/DESC
new file mode 100644
index 00000000000..1af96b6ab4c
--- /dev/null
+++ b/searchlib/src/tests/nearsearch/DESC
@@ -0,0 +1 @@
+nearsearch test. Take a look at nearsearch.cpp for details.
diff --git a/searchlib/src/tests/nearsearch/FILES b/searchlib/src/tests/nearsearch/FILES
new file mode 100644
index 00000000000..e8ff3e62114
--- /dev/null
+++ b/searchlib/src/tests/nearsearch/FILES
@@ -0,0 +1 @@
+nearsearch.cpp
diff --git a/searchlib/src/tests/nearsearch/nearsearch_test.cpp b/searchlib/src/tests/nearsearch/nearsearch_test.cpp
new file mode 100644
index 00000000000..9942bcecd4a
--- /dev/null
+++ b/searchlib/src/tests/nearsearch/nearsearch_test.cpp
@@ -0,0 +1,247 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("nearsearch_test");
+
+#include <vespa/searchlib/common/resultset.h>
+#include <vespa/searchlib/queryeval/nearsearch.h>
+#include <vespa/searchlib/queryeval/searchable.h>
+#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
+#include <vespa/searchlib/queryeval/leaf_blueprints.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <set>
+#include <vespa/vespalib/testkit/testapp.h>
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// Utilities
+//
+////////////////////////////////////////////////////////////////////////////////
+
+class UIntList : public std::set<uint32_t> {
+public:
+ UIntList &add(uint32_t i) {
+ std::set<uint32_t>::insert(i);
+ return *this;
+ }
+};
+
+class MyTerm {
+private:
+ std::set<uint32_t> _docs;
+ std::set<uint32_t> _data;
+
+public:
+ MyTerm(const std::set<uint32_t> &doc,
+ const std::set<uint32_t> &pos)
+ : _docs(doc),
+ _data(pos)
+ {
+ }
+
+ search::queryeval::Blueprint::UP
+ make_blueprint(uint32_t fieldId, search::fef::TermFieldHandle handle) const
+ {
+ search::queryeval::FakeResult result;
+ for (std::set<uint32_t>::const_iterator doc = _docs.begin();
+ doc != _docs.end(); ++doc)
+ {
+ result.doc(*doc);
+ for (std::set<uint32_t>::const_iterator pos = _data.begin();
+ pos != _data.end(); ++pos)
+ {
+ result.pos(*pos);
+ }
+ }
+ return search::queryeval::Blueprint::UP(
+ new search::queryeval::FakeBlueprint(
+ search::queryeval::FieldSpec("<field>", fieldId, handle),
+ result));
+ }
+};
+
+class MyQuery {
+private:
+ std::vector<MyTerm*> _terms;
+ bool _ordered;
+ uint32_t _window;
+
+public:
+ MyQuery(bool ordered, uint32_t window) :
+ _terms(),
+ _ordered(ordered),
+ _window(window) {
+ // empty
+ }
+
+ MyQuery &addTerm(MyTerm &term) {
+ _terms.push_back(&term);
+ return *this;
+ }
+
+ uint32_t getNumTerms() const {
+ return _terms.size();
+ }
+
+ MyTerm &getTerm(uint32_t i) {
+ return *_terms[i];
+ }
+
+ bool isOrdered() const {
+ return _ordered;
+ }
+
+ uint32_t getWindow() const {
+ return _window;
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// Setup
+//
+////////////////////////////////////////////////////////////////////////////////
+
+class Test : public vespalib::TestApp {
+private:
+ bool testNearSearch(MyQuery &query, uint32_t matchId);
+
+public:
+ int Main();
+ void testBasicNear();
+ void testRepeatedTerms();
+};
+
+int
+Test::Main()
+{
+ TEST_INIT("nearsearch_test");
+
+ testBasicNear(); TEST_FLUSH();
+ testRepeatedTerms(); TEST_FLUSH();
+
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// Tests
+//
+////////////////////////////////////////////////////////////////////////////////
+
+void
+Test::testBasicNear()
+{
+ MyTerm foo(UIntList().add(69),
+ UIntList().add(6).add(11));
+ for (uint32_t i = 0; i <= 1; ++i) {
+ TEST_STATE(vespalib::make_string("i = %u", i).c_str());
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo), 69));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo), 69));
+ }
+
+ MyTerm bar(UIntList().add(68).add(69).add(70),
+ UIntList().add(7).add(10));
+ TEST_DO(testNearSearch(MyQuery(false, 0).addTerm(foo).addTerm(bar), 0));
+ TEST_DO(testNearSearch(MyQuery(true, 0).addTerm(foo).addTerm(bar), 0));
+ for (uint32_t i = 1; i <= 2; ++i) {
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(bar), 69));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(bar), 69));
+ }
+
+ MyTerm baz(UIntList().add(69).add(70).add(71),
+ UIntList().add(8).add(9));
+ for (uint32_t i = 0; i <= 1; ++i) {
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(bar).addTerm(baz), 0));
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(baz).addTerm(bar), 0));
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(bar).addTerm(baz).addTerm(foo), 0));
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(bar).addTerm(foo).addTerm(baz), 0));
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(baz).addTerm(foo).addTerm(bar), 0));
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(baz).addTerm(bar).addTerm(foo), 0));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(bar).addTerm(baz), 0));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(baz).addTerm(bar), 0));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(bar).addTerm(baz).addTerm(foo), 0));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(bar).addTerm(foo).addTerm(baz), 0));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(baz).addTerm(foo).addTerm(bar), 0));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(baz).addTerm(bar).addTerm(foo), 0));
+ }
+ for (uint32_t i = 2; i <= 3; ++i) {
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(bar).addTerm(baz), 69));
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(baz).addTerm(bar), 69));
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(bar).addTerm(baz).addTerm(foo), 69));
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(bar).addTerm(foo).addTerm(baz), 69));
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(baz).addTerm(foo).addTerm(bar), 69));
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(baz).addTerm(bar).addTerm(foo), 69));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(bar).addTerm(baz), 69));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(baz).addTerm(bar), 0));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(bar).addTerm(baz).addTerm(foo), 0));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(bar).addTerm(foo).addTerm(baz), 0));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(baz).addTerm(foo).addTerm(bar), 0));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(baz).addTerm(bar).addTerm(foo), 69));
+ }
+}
+
+void
+Test::testRepeatedTerms()
+{
+ MyTerm foo(UIntList().add(69),
+ UIntList().add(1).add(2).add(3));
+ TEST_DO(testNearSearch(MyQuery(false, 0).addTerm(foo).addTerm(foo), 69));
+ TEST_DO(testNearSearch(MyQuery(true, 0).addTerm(foo).addTerm(foo), 0));
+ for (uint32_t i = 1; i <= 2; ++i) {
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(foo), 69));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(foo), 69));
+ }
+
+ for (uint32_t i = 0; i <= 1; ++i) {
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(foo).addTerm(foo), 69));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(foo).addTerm(foo), 0));
+ }
+ for (uint32_t i = 2; i <= 3; ++i) {
+ TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(foo).addTerm(foo), 69));
+ TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(foo).addTerm(foo), 69));
+ }
+}
+
+bool
+Test::testNearSearch(MyQuery &query, uint32_t matchId)
+{
+ LOG(info, "testNearSearch(%d)", matchId);
+ search::queryeval::IntermediateBlueprint *near_b = 0;
+ if (query.isOrdered()) {
+ near_b = new search::queryeval::ONearBlueprint(query.getWindow());
+ } else {
+ near_b = new search::queryeval::NearBlueprint(query.getWindow());
+ }
+ search::queryeval::Blueprint::UP bp(near_b);
+ search::fef::MatchDataLayout layout;
+ for (uint32_t i = 0; i < query.getNumTerms(); ++i) {
+ uint32_t fieldId = 0;
+ layout.allocTermField(fieldId);
+ near_b->addChild(query.getTerm(i).make_blueprint(fieldId, i));
+ }
+ search::fef::MatchData::UP md(layout.createMatchData());
+
+ bp->fetchPostings(true);
+ search::queryeval::SearchIterator::UP near = bp->createSearch(*md, true);
+ near->initFullRange();
+ bool foundMatch = false;
+ for (near->seek(1u); ! near->isAtEnd(); near->seek(near->getDocId() + 1)) {
+ uint32_t docId = near->getDocId();
+ if (docId == matchId) {
+ foundMatch = true;
+ } else {
+ LOG(info, "Document %d matched unexpectedly.", docId);
+ return false;
+ }
+ }
+ if (matchId == 0) {
+ return EXPECT_TRUE(!foundMatch);
+ } else {
+ return EXPECT_TRUE(foundMatch);
+ }
+}
diff --git a/searchlib/src/tests/postinglistbm/.gitignore b/searchlib/src/tests/postinglistbm/.gitignore
new file mode 100644
index 00000000000..ac71dde13e2
--- /dev/null
+++ b/searchlib/src/tests/postinglistbm/.gitignore
@@ -0,0 +1,10 @@
+*.core
+*.ilk
+*.pdb
+.depend
+Makefile
+core
+core.*
+postinglistbm
+postinglistbm.exe
+searchlib_postinglistbm_app
diff --git a/searchlib/src/tests/postinglistbm/CMakeLists.txt b/searchlib/src/tests/postinglistbm/CMakeLists.txt
new file mode 100644
index 00000000000..403c12da1b1
--- /dev/null
+++ b/searchlib/src/tests/postinglistbm/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_postinglistbm_app
+ SOURCES
+ postinglistbm.cpp
+ andstress.cpp
+ DEPENDS
+ searchlib_test
+ searchlib
+)
+vespa_add_test(NAME searchlib_postinglistbm_app NO_VALGRIND COMMAND searchlib_postinglistbm_app -q -a)
diff --git a/searchlib/src/tests/postinglistbm/andstress.cpp b/searchlib/src/tests/postinglistbm/andstress.cpp
new file mode 100644
index 00000000000..f3fabde0d61
--- /dev/null
+++ b/searchlib/src/tests/postinglistbm/andstress.cpp
@@ -0,0 +1,536 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".andstress");
+#include "andstress.h"
+#include <vector>
+
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/test/fakedata/fakeword.h>
+#include <vespa/searchlib/test/fakedata/fakewordset.h>
+#include <vespa/searchlib/test/fakedata/fakeposting.h>
+#include <vespa/searchlib/test/fakedata/fakefilterocc.h>
+#include <vespa/searchlib/test/fakedata/fakeegcompr64filterocc.h>
+#include <vespa/searchlib/test/fakedata/fakezcfilterocc.h>
+#include <vespa/searchlib/test/fakedata/fakezcbfilterocc.h>
+#include <vespa/searchlib/test/fakedata/fpfactory.h>
+
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataArray;
+using search::queryeval::SearchIterator;
+using namespace search::fakedata;
+
+namespace postinglistbm
+{
+
+class AndStressWorker;
+
+class AndStressMaster
+{
+private:
+ AndStressMaster(const AndStressMaster &);
+
+ AndStressMaster &
+ operator=(const AndStressMaster &);
+
+ search::Rand48 &_rnd;
+ unsigned int _numDocs;
+ unsigned int _commonDocFreq;
+ std::vector<std::string> _postingTypes;
+ unsigned int _loops;
+ unsigned int _skipCommonPairsRate;
+ uint32_t _stride;
+ bool _unpack;
+
+ FastOS_ThreadPool *_threadPool;
+ std::vector<AndStressWorker *> _workers;
+ unsigned int _workersDone;
+
+ FakeWordSet &_wordSet;
+
+ std::vector<std::vector<FakePosting::SP> > _postings;
+
+ FastOS_Cond _taskCond;
+ unsigned int _taskIdx;
+ uint32_t _numTasks;
+
+public:
+ typedef std::pair<FakePosting *, FakePosting *> Task;
+private:
+ std::vector<Task> _tasks;
+public:
+ AndStressMaster(search::Rand48 &rnd,
+ FakeWordSet &wordSet,
+ unsigned int numDocs,
+ unsigned int commonDocFreq,
+ const std::vector<std::string> &postingType,
+ unsigned int loops,
+ unsigned int skipCommonPairsRate,
+ uint32_t numTasks,
+ uint32_t stride,
+ bool unpack);
+
+ ~AndStressMaster(void);
+
+ void
+ run(void);
+
+ void
+ makePostingsHelper(FPFactory *postingFactory,
+ const std::string &postingFormat,
+ bool validate, bool verbose);
+
+ void
+ dropPostings(void);
+
+ void
+ dropTasks(void);
+
+ void
+ resetTasks(void); // Prepare for rerun
+
+ void
+ setupTasks(unsigned int numTasks);
+
+ Task *
+ getTask(void);
+
+ unsigned int
+ getNumDocs(void) const
+ {
+ return _numDocs;
+ }
+
+ bool
+ getUnpack(void) const
+ {
+ return _unpack;
+ }
+
+ double
+ runWorkers(const std::string &postingFormat);
+};
+
+
+class AndStressWorker : public FastOS_Runnable
+{
+private:
+ AndStressWorker(const AndStressWorker &);
+
+ AndStressWorker &
+ operator=(const AndStressWorker &);
+
+ AndStressMaster &_master;
+ unsigned int _id;
+public:
+ AndStressWorker(AndStressMaster &master, unsigned int id);
+
+ ~AndStressWorker(void);
+
+ virtual void
+ Run(FastOS_ThreadInterface *thisThread, void *arg);
+};
+
+
+template <class P>
+FakePosting *
+makePosting(FakeWord &fw)
+{
+ return new P(fw);
+}
+
+
+AndStressMaster::AndStressMaster(search::Rand48 &rnd,
+ FakeWordSet &wordSet,
+ unsigned int numDocs,
+ unsigned int commonDocFreq,
+ const std::vector<std::string> &postingTypes,
+ unsigned int loops,
+ unsigned int skipCommonPairsRate,
+ uint32_t numTasks,
+ uint32_t stride,
+ bool unpack)
+ : _rnd(rnd),
+ _numDocs(numDocs),
+ _commonDocFreq(commonDocFreq),
+ _postingTypes(postingTypes),
+ _loops(loops),
+ _skipCommonPairsRate(skipCommonPairsRate),
+ _stride(stride),
+ _unpack(unpack),
+ _threadPool(NULL),
+ _workers(),
+ _workersDone(0),
+ _wordSet(wordSet),
+ _postings(FakeWordSet::NUM_WORDCLASSES),
+ _taskCond(),
+ _taskIdx(0),
+ _numTasks(numTasks),
+ _tasks()
+{
+ LOG(info, "AndStressMaster::AndStressMaster");
+
+ _threadPool = new FastOS_ThreadPool(128 * 1024, 400);
+}
+
+template <class C>
+static void
+clearPtrVector(std::vector<C> &v)
+{
+ for (unsigned int i = 0; i < v.size(); ++i)
+ delete v[i];
+ v.clear();
+}
+
+
+AndStressMaster::~AndStressMaster(void)
+{
+ LOG(info, "AndStressMaster::~AndStressMaster");
+
+ _threadPool->Close();
+ delete _threadPool;
+ _threadPool = NULL;
+ clearPtrVector(_workers);
+ dropPostings();
+}
+
+
+void
+AndStressMaster::dropPostings(void)
+{
+ for (unsigned int i = 0; i < _postings.size(); ++i)
+ _postings[i].clear();
+ dropTasks();
+}
+
+
+void
+AndStressMaster::dropTasks(void)
+{
+ _tasks.clear();
+ _taskIdx = 0;
+}
+
+
+void
+AndStressMaster::resetTasks(void)
+{
+ _taskIdx = 0;
+}
+
+
+static void
+makeSomePostings(FPFactory *postingFactory,
+ std::vector<FakeWord *> &w,
+ std::vector<FakePosting::SP> &p,
+ uint32_t stride,
+ bool validate,
+ bool verbose)
+{
+ for (unsigned int i = 0; i < w.size(); ++i) {
+ FakePosting::SP np(postingFactory->make(*w[i]));
+ if (validate) {
+ TermFieldMatchData md;
+ TermFieldMatchDataArray tfmda;
+ tfmda.add(&md);
+
+ std::unique_ptr<SearchIterator> sb(np->createIterator(tfmda));
+ if (np->hasWordPositions()) {
+ if (stride != 0)
+ w[i]->validate(sb.get(), tfmda, stride, verbose);
+ else
+ w[i]->validate(sb.get(), tfmda, verbose);
+ } else
+ w[i]->validate(sb.get(), verbose);
+ }
+ p.push_back(np);
+ }
+}
+
+void
+AndStressMaster::makePostingsHelper(FPFactory *postingFactory,
+ const std::string &postingFormat,
+ bool validate, bool verbose)
+{
+ FastOS_Time tv;
+ double before;
+ double after;
+
+ tv.SetNow();
+ before = tv.Secs();
+ postingFactory->setup(_wordSet);
+ for (unsigned int i = 0; i < _wordSet._words.size(); ++i)
+ makeSomePostings(postingFactory,
+ _wordSet._words[i], _postings[i],
+ _stride,
+ validate,
+ verbose);
+ tv.SetNow();
+ after = tv.Secs();
+ LOG(info,
+ "AndStressMaster::makePostingsHelper elapsed %10.6f s for %s format",
+ after - before,
+ postingFormat.c_str());
+}
+
+
+void
+AndStressMaster::setupTasks(unsigned int numTasks)
+{
+ unsigned int wordclass1;
+ unsigned int wordclass2;
+ unsigned int word1idx;
+ unsigned int word2idx;
+
+ for (unsigned int i = 0; i < numTasks; ++i) {
+ wordclass1 = _rnd.lrand48() % _postings.size();
+ wordclass2 = _rnd.lrand48() % _postings.size();
+ while (wordclass1 == FakeWordSet::COMMON_WORD &&
+ wordclass2 == FakeWordSet::COMMON_WORD &&
+ (_rnd.lrand48() % _skipCommonPairsRate) != 0) {
+ wordclass1 = _rnd.lrand48() % _postings.size();
+ wordclass2 = _rnd.lrand48() % _postings.size();
+ }
+ word1idx = _rnd.lrand48() % _postings[wordclass1].size();
+ word2idx = _rnd.lrand48() % _postings[wordclass2].size();
+ FakePosting::SP p1 = _postings[wordclass1][word1idx];
+ FakePosting::SP p2 = _postings[wordclass2][word2idx];
+ _tasks.push_back(std::make_pair(p1.get(), p2.get()));
+ }
+}
+
+
+AndStressMaster::Task *
+AndStressMaster::getTask(void)
+{
+ Task *result = NULL;
+ _taskCond.Lock();
+ if (_taskIdx < _tasks.size()) {
+ result = &_tasks[_taskIdx];
+ ++_taskIdx;
+ } else {
+ _workersDone++;
+ if (_workersDone == _workers.size())
+ _taskCond.Broadcast();
+ }
+ _taskCond.Unlock();
+ return result;
+}
+
+void
+AndStressMaster::run(void)
+{
+ LOG(info, "AndStressMaster::run");
+
+ std::vector<std::string>::const_iterator pti;
+ std::vector<std::string>::const_iterator ptie = _postingTypes.end() ;
+
+ for (pti = _postingTypes.begin(); pti != ptie; ++pti) {
+ std::unique_ptr<FPFactory> ff(getFPFactory(*pti, _wordSet.getSchema()));
+ makePostingsHelper(ff.get(), *pti, true, false);
+ setupTasks(_numTasks);
+ double totalTime = 0;
+ for (unsigned int loop = 0; loop < _loops; ++loop) {
+ totalTime += runWorkers(*pti);
+ resetTasks();
+ }
+ LOG(info, "AndStressMaster::average run elapsed %10.6f s for workers %s format",
+ totalTime / _loops, pti->c_str());
+ dropPostings();
+ }
+ FastOS_Thread::Sleep(250);
+}
+
+
+double
+AndStressMaster::runWorkers(const std::string &postingFormat)
+{
+ FastOS_Time tv;
+ double before;
+ double after;
+
+ tv.SetNow();
+ before = tv.Secs();
+ unsigned int numWorkers = 8;
+ for (unsigned int i = 0; i < numWorkers; ++i)
+ _workers.push_back(new AndStressWorker(*this, i));
+
+ for (unsigned int i = 0; i < _workers.size(); ++i)
+ _threadPool->NewThread(_workers[i]);
+ _taskCond.Lock();
+ while (_workersDone < _workers.size())
+ _taskCond.Wait();
+ _taskCond.Unlock();
+ tv.SetNow();
+ after = tv.Secs();
+ LOG(info,
+ "AndStressMaster::run elapsed %10.6f s for workers %s format",
+ after - before,
+ postingFormat.c_str());
+ clearPtrVector(_workers);
+ _workersDone = 0;
+ return after - before;
+}
+
+
+AndStressWorker::AndStressWorker(AndStressMaster &master, unsigned int id)
+ : _master(master),
+ _id(id)
+{
+ LOG(debug, "AndStressWorker::AndStressWorker, id=%u", id);
+}
+
+AndStressWorker::~AndStressWorker(void)
+{
+ LOG(debug, "AndStressWorker::~AndStressWorker, id=%u", _id);
+}
+
+
+static int
+highLevelAndPairPostingScan(SearchIterator &sb1,
+ SearchIterator &sb2,
+ uint32_t numDocs, uint64_t *cycles)
+{
+ uint32_t hits = 0;
+ uint64_t before = fastos::ClockSystem::now();
+ sb1.initFullRange();
+ sb2.initFullRange();
+ uint32_t docId = sb1.getDocId();
+ while (docId < numDocs) {
+ if (sb1.seek(docId)) {
+ if (sb2.seek(docId)) {
+ ++hits;
+ ++docId;
+ } else if (docId < sb2.getDocId())
+ docId = sb2.getDocId();
+ else
+ ++docId;
+ } else if (docId < sb1.getDocId())
+ docId= sb1.getDocId();
+ else
+ ++docId;
+ }
+ uint64_t after = fastos::ClockSystem::now();
+ *cycles = after - before;
+ return hits;
+}
+
+
+static int
+highLevelAndPairPostingScanUnpack(SearchIterator &sb1,
+ SearchIterator &sb2,
+ uint32_t numDocs,
+ uint64_t *cycles)
+{
+ uint32_t hits = 0;
+ uint64_t before = fastos::ClockSystem::now();
+ sb1.initFullRange();
+ sb2.initFullRange();
+ uint32_t docId = sb1.getDocId();
+ while (docId < numDocs) {
+ if (sb1.seek(docId)) {
+ if (sb2.seek(docId)) {
+ ++hits;
+ sb1.unpack(docId);
+ sb2.unpack(docId);
+ ++docId;
+ } else if (docId < sb2.getDocId())
+ docId = sb2.getDocId();
+ else
+ ++docId;
+ } else if (docId < sb1.getDocId())
+ docId= sb1.getDocId();
+ else
+ ++docId;
+ }
+ uint64_t after = fastos::ClockSystem::now();
+ *cycles = after - before;
+ return hits;
+}
+
+void
+testFakePair(FakePosting &f1, FakePosting &f2, unsigned int numDocs,
+ bool unpack)
+{
+ TermFieldMatchData md1;
+ TermFieldMatchDataArray tfmda1;
+ tfmda1.add(&md1);
+ std::unique_ptr<SearchIterator> sb1(f1.createIterator(tfmda1));
+
+ TermFieldMatchData md2;
+ TermFieldMatchDataArray tfmda2;
+ tfmda1.add(&md2);
+ std::unique_ptr<SearchIterator> sb2(f2.createIterator(tfmda2));
+
+ int hits = 0;
+ uint64_t scanUnpackTime = 0;
+ if (unpack)
+ hits = highLevelAndPairPostingScanUnpack(*sb1.get(), *sb2.get(),
+ numDocs, &scanUnpackTime);
+ else
+ hits = highLevelAndPairPostingScan(*sb1.get(), *sb2.get(),
+ numDocs, &scanUnpackTime);
+#if 0
+ printf("Fakepair %s AND %s => %d hits, %" PRIu64 " cycles\n",
+ f1.getName().c_str(),
+ f2.getName().c_str(),
+ hits,
+ scanUnpackTime);
+#else
+ (void)hits;
+#endif
+}
+
+void
+AndStressWorker::Run(FastOS_ThreadInterface *thisThread, void *arg)
+{
+ (void) thisThread;
+ (void) arg;
+ LOG(debug, "AndStressWorker::Run, id=%u", _id);
+
+ bool unpack = _master.getUnpack();
+ for (;;) {
+ AndStressMaster::Task *task = _master.getTask();
+ if (task == NULL)
+ break;
+ testFakePair(*task->first, *task->second, _master.getNumDocs(),
+ unpack);
+ }
+}
+
+
+AndStress::AndStress(void)
+{
+ LOG(debug, "Andstress::AndStress");
+}
+
+
+AndStress::~AndStress(void)
+{
+ LOG(debug, "Andstress::~AndStress");
+}
+
+void
+AndStress::run(search::Rand48 &rnd,
+ FakeWordSet &wordSet,
+ unsigned int numDocs,
+ unsigned int commonDocFreq,
+ const std::vector<std::string> &postingTypes,
+ unsigned int loops,
+ unsigned int skipCommonPairsRate,
+ uint32_t numTasks,
+ uint32_t stride,
+ bool unpack)
+{
+ LOG(debug, "Andstress::run");
+ AndStressMaster master(rnd, wordSet,
+ numDocs, commonDocFreq, postingTypes, loops,
+ skipCommonPairsRate,
+ numTasks,
+ stride,
+ unpack);
+ master.run();
+}
+
+}
diff --git a/searchlib/src/tests/postinglistbm/andstress.h b/searchlib/src/tests/postinglistbm/andstress.h
new file mode 100644
index 00000000000..458866b09d5
--- /dev/null
+++ b/searchlib/src/tests/postinglistbm/andstress.h
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+
+#include <vector>
+namespace search
+{
+class Rand48;
+
+namespace fakedata
+{
+
+class FakeWordSet;
+
+}
+
+}
+
+namespace postinglistbm
+{
+
+class AndStress
+{
+public:
+ AndStress(void);
+
+ ~AndStress(void);
+
+ void
+ run(search::Rand48 &rnd,
+ search::fakedata::FakeWordSet &wordSet,
+ unsigned int numDocs,
+ unsigned int commonDocFreq,
+ const std::vector<std::string> &postingTypes,
+ unsigned int loops,
+ unsigned int skipCommonPairsRate,
+ uint32_t numTasks,
+ uint32_t stride,
+ bool unpack);
+};
+
+} // namespace postinglistbm
+
diff --git a/searchlib/src/tests/postinglistbm/postinglistbm.cpp b/searchlib/src/tests/postinglistbm/postinglistbm.cpp
new file mode 100644
index 00000000000..fc93eb42dcd
--- /dev/null
+++ b/searchlib/src/tests/postinglistbm/postinglistbm.cpp
@@ -0,0 +1,491 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("postinglistbm");
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/common/resultset.h>
+#include <vespa/searchlib/util/rand48.h>
+#include "andstress.h"
+#include <vespa/searchlib/test/fakedata/fakeword.h>
+#include <vespa/searchlib/test/fakedata/fakeposting.h>
+#include <vespa/searchlib/test/fakedata/fakewordset.h>
+#include <vespa/searchlib/test/fakedata/fpfactory.h>
+#include <vespa/searchlib/index/docidandfeatures.h>
+
+using search::ResultSet;
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataArray;
+using search::queryeval::SearchIterator;
+using search::index::Schema;
+using namespace search::fakedata;
+
+// needed to resolve external symbol from httpd.h on AIX
+void FastS_block_usr2() {}
+
+
+namespace postinglistbm
+{
+
+class PostingListBM : public FastOS_Application
+{
+private:
+ bool _verbose;
+ uint32_t _numDocs;
+ uint32_t _commonDocFreq;
+ uint32_t _numWordsPerClass;
+ std::vector<std::string> _postingTypes;
+ uint32_t _loops;
+ unsigned int _skipCommonPairsRate;
+ FakeWordSet _wordSet;
+ uint32_t _stride;
+ bool _unpack;
+public:
+ search::Rand48 _rnd;
+
+private:
+ void Usage(void);
+
+ void
+ badPostingType(const std::string &postingType);
+
+ void
+ testFake(const std::string &postingType,
+ const Schema &schema,
+ const FakeWord &fw);
+public:
+ PostingListBM(void);
+ ~PostingListBM(void);
+ int Main(void);
+};
+
+
+void
+PostingListBM::Usage(void)
+{
+ printf("postinglistbm "
+ "[-C <skipCommonPairsRate>] "
+ "[-a] "
+ "[-c <commonDoqFreq>] "
+ "[-d <numDocs>] "
+ "[-l <numLoops>] "
+ "[-s <stride>] "
+ "[-t <postingType>] "
+ "[-u] "
+ "[-q] "
+ "[-v]\n");
+}
+
+
+void
+PostingListBM::badPostingType(const std::string &postingType)
+{
+ printf("Bad posting list type: %s\n", postingType.c_str());
+ printf("Supported types: ");
+
+ std::vector<std::string> postingTypes = getPostingTypes();
+ std::vector<std::string>::const_iterator pti;
+ std::vector<std::string>::const_iterator ptie = postingTypes.end();
+ bool first = true;
+
+ for (pti = postingTypes.begin(); pti != ptie; ++pti) {
+ if (first)
+ first = false;
+ else
+ printf(", ");
+ printf("%s", pti->c_str());
+ }
+ printf("\n");
+}
+
+
+PostingListBM::PostingListBM(void)
+ : _verbose(false),
+ _numDocs(10000000),
+ _commonDocFreq(50000),
+ _numWordsPerClass(100),
+ _postingTypes(),
+ _loops(1),
+ _skipCommonPairsRate(1),
+ _wordSet(),
+ _stride(0),
+ _unpack(false),
+ _rnd()
+{
+}
+
+
+PostingListBM::~PostingListBM(void)
+{
+}
+
+
+static int
+highLevelSinglePostingScan(SearchIterator &sb, uint32_t numDocs, uint64_t *cycles)
+{
+ uint32_t hits = 0;
+ uint64_t before = fastos::ClockSystem::now();
+ sb.initFullRange();
+ uint32_t docId = sb.getDocId();
+ while (docId < numDocs) {
+ if (sb.seek(docId)) {
+ ++hits;
+ ++docId;
+ } else if (docId < sb.getDocId())
+ docId= sb.getDocId();
+ else
+ ++docId;
+ }
+ uint64_t after = fastos::ClockSystem::now();
+ *cycles = after - before;
+ return hits;
+}
+
+
+static int
+highLevelSinglePostingScanUnpack(SearchIterator &sb,
+ uint32_t numDocs, uint64_t *cycles)
+{
+ uint32_t hits = 0;
+ uint64_t before = fastos::ClockSystem::now();
+ sb.initFullRange();
+ uint32_t docId = sb.getDocId();
+ while (docId < numDocs) {
+ if (sb.seek(docId)) {
+ ++hits;
+ sb.unpack(docId);
+ ++docId;
+ } else if (docId < sb.getDocId())
+ docId= sb.getDocId();
+ else
+ ++docId;
+ }
+ uint64_t after = fastos::ClockSystem::now();
+ *cycles = after - before;
+ return hits;
+}
+
+
+static int
+highLevelAndPairPostingScan(SearchIterator &sb1,
+ SearchIterator &sb2,
+ uint32_t numDocs, uint64_t *cycles)
+{
+ uint32_t hits = 0;
+ uint64_t before = fastos::ClockSystem::now();
+ sb1.initFullRange();
+ sb2.initFullRange();
+ uint32_t docId = sb1.getDocId();
+ while (docId < numDocs) {
+ if (sb1.seek(docId)) {
+ if (sb2.seek(docId)) {
+ ++hits;
+ ++docId;
+ } else if (docId < sb2.getDocId())
+ docId = sb2.getDocId();
+ else
+ ++docId;
+ } else if (docId < sb1.getDocId())
+ docId= sb1.getDocId();
+ else
+ ++docId;
+ }
+ uint64_t after = fastos::ClockSystem::now();
+ *cycles = after - before;
+ return hits;
+}
+
+
+static int
+highLevelAndPairPostingScanUnpack(SearchIterator &sb1,
+ SearchIterator &sb2,
+ uint32_t numDocs,
+ uint64_t *cycles)
+{
+ uint32_t hits = 0;
+ uint64_t before = fastos::ClockSystem::now();
+ sb1.initFullRange();
+ sb1.initFullRange();
+ uint32_t docId = sb1.getDocId();
+ while (docId < numDocs) {
+ if (sb1.seek(docId)) {
+ if (sb2.seek(docId)) {
+ ++hits;
+ sb1.unpack(docId);
+ sb2.unpack(docId);
+ ++docId;
+ } else if (docId < sb2.getDocId())
+ docId = sb2.getDocId();
+ else
+ ++docId;
+ } else if (docId < sb1.getDocId())
+ docId= sb1.getDocId();
+ else
+ ++docId;
+ }
+ uint64_t after = fastos::ClockSystem::now();
+ *cycles = after - before;
+ return hits;
+}
+
+
+void
+PostingListBM::testFake(const std::string &postingType,
+ const Schema &schema,
+ const FakeWord &fw)
+{
+ std::unique_ptr<FPFactory> ff(getFPFactory(postingType, schema));
+ std::vector<const FakeWord *> v;
+ v.push_back(&fw);
+ ff->setup(v);
+ FakePosting::SP f(ff->make(fw));
+
+ printf("%s.bitsize=%d+%d+%d+%d+%d\n",
+ f->getName().c_str(),
+ static_cast<int>(f->bitSize()),
+ static_cast<int>(f->l1SkipBitSize()),
+ static_cast<int>(f->l2SkipBitSize()),
+ static_cast<int>(f->l3SkipBitSize()),
+ static_cast<int>(f->l4SkipBitSize()));
+ TermFieldMatchData md;
+ TermFieldMatchDataArray tfmda;
+ tfmda.add(&md);
+
+ std::unique_ptr<SearchIterator> sb(f->createIterator(tfmda));
+ if (f->hasWordPositions())
+ fw.validate(sb.get(), tfmda, _verbose);
+ else
+ fw.validate(sb.get(), _verbose);
+ uint64_t scanTime = 0;
+ uint64_t scanUnpackTime = 0;
+ TermFieldMatchData md2;
+ TermFieldMatchDataArray tfmda2;
+ tfmda2.add(&md2);
+
+ std::unique_ptr<SearchIterator> sb2(f->createIterator(tfmda2));
+ int hits1 = highLevelSinglePostingScan(*sb2.get(), fw.getDocIdLimit(),
+ &scanTime);
+ TermFieldMatchData md3;
+ TermFieldMatchDataArray tfmda3;
+ tfmda3.add(&md3);
+
+ std::unique_ptr<SearchIterator> sb3(f->createIterator(tfmda3));
+ int hits2 = highLevelSinglePostingScanUnpack(*sb3.get(), fw.getDocIdLimit(),
+ &scanUnpackTime);
+ printf("testFake '%s' hits1=%d, hits2=%d, scanTime=%" PRIu64
+ ", scanUnpackTime=%" PRIu64 "\n",
+ f->getName().c_str(),
+ hits1, hits2, scanTime, scanUnpackTime);
+}
+
+
+void
+testFakePair(const std::string &postingType,
+ const Schema &schema,
+ bool unpack,
+ const FakeWord &fw1, const FakeWord &fw2)
+{
+ std::unique_ptr<FPFactory> ff(getFPFactory(postingType, schema));
+ std::vector<const FakeWord *> v;
+ v.push_back(&fw1);
+ v.push_back(&fw2);
+ ff->setup(v);
+ FakePosting::SP f1(ff->make(fw1));
+ FakePosting::SP f2(ff->make(fw2));
+
+ TermFieldMatchData md1;
+ TermFieldMatchDataArray tfmda1;
+ tfmda1.add(&md1);
+ std::unique_ptr<SearchIterator> sb1(f1->createIterator(tfmda1));
+
+ TermFieldMatchData md2;
+ TermFieldMatchDataArray tfmda2;
+ tfmda1.add(&md2);
+ std::unique_ptr<SearchIterator> sb2(f2->createIterator(tfmda2));
+
+ int hits = 0;
+ uint64_t scanUnpackTime = 0;
+ if (unpack)
+ hits = highLevelAndPairPostingScanUnpack(*sb1.get(), *sb2.get(),
+ fw1.getDocIdLimit(), &scanUnpackTime);
+ else
+ hits = highLevelAndPairPostingScan(*sb1.get(), *sb2.get(),
+ fw1.getDocIdLimit(), &scanUnpackTime);
+ printf("Fakepair %s AND %s => %d hits, %" PRIu64 " cycles\n",
+ f1->getName().c_str(),
+ f2->getName().c_str(),
+ hits,
+ scanUnpackTime);
+}
+
+
+int
+PostingListBM::Main(void)
+{
+ int argi;
+ char c;
+ const char *optArg;
+ bool doandstress;
+
+ doandstress = false;
+ argi = 1;
+ bool hasElements = false;
+ bool hasElementWeights = false;
+ bool quick = false;
+
+ while ((c = GetOpt("C:ac:d:l:s:t:uvw:T:q", optArg, argi)) != -1) {
+ switch(c) {
+ case 'C':
+ _skipCommonPairsRate = atoi(optArg);
+ break;
+ case 'T':
+ if (strcmp(optArg, "single") == 0) {
+ hasElements = false;
+ hasElementWeights = false;
+ } else if (strcmp(optArg, "array") == 0) {
+ hasElements = true;
+ hasElementWeights = false;
+ } else if (strcmp(optArg, "weightedSet") == 0) {
+ hasElements = true;
+ hasElementWeights = true;
+ } else {
+ printf("Bad collection type: %s\n", optArg);
+ return 1;
+ }
+ break;
+ case 'a':
+ doandstress = true;
+ break;
+ case 'c':
+ _commonDocFreq = atoi(optArg);
+ break;
+ case 'd':
+ _numDocs = atoi(optArg);
+ break;
+ case 'l':
+ _loops = atoi(optArg);
+ break;
+ case 's':
+ _stride = atoi(optArg);
+ break;
+ case 't':
+ do {
+ Schema schema;
+ Schema::IndexField indexField("field0",
+ Schema::STRING,
+ Schema::SINGLE);
+ schema.addIndexField(indexField);
+ std::unique_ptr<FPFactory> ff(getFPFactory(optArg, schema));
+ if (ff.get() == NULL) {
+ badPostingType(optArg);
+ return 1;
+ }
+ } while (0);
+ _postingTypes.push_back(optArg);
+ break;
+ case 'u':
+ _unpack = true;
+ break;
+ case 'v':
+ _verbose = true;
+ break;
+ case 'w':
+ _numWordsPerClass = atoi(optArg);
+ break;
+ case 'q':
+ quick = true;
+ _numDocs = 36000;
+ _commonDocFreq = 10000;
+ _numWordsPerClass = 5;
+ break;
+ default:
+ Usage();
+ return 1;
+ }
+ }
+
+ if (_commonDocFreq > _numDocs) {
+ Usage();
+ return 1;
+ }
+
+ _wordSet.setupParams(hasElements, hasElementWeights);
+
+ uint32_t w1dfreq = 10;
+ uint32_t w4dfreq = 790000;
+ uint32_t w5dfreq = 290000;
+ uint32_t w4w5od = 100000;
+ uint32_t numTasks = 40000;
+ if (quick) {
+ w1dfreq = 2;
+ w4dfreq = 19000;
+ w5dfreq = 5000;
+ w4w5od = 1000;
+ numTasks = 40;
+ }
+
+
+ FakeWord word1(_numDocs, w1dfreq, w1dfreq / 2, "word1", _rnd,
+ _wordSet.getFieldsParams(), _wordSet.getPackedIndex());
+ FakeWord word2(_numDocs, 1000, 500, "word2", word1, 4, _rnd,
+ _wordSet.getFieldsParams(), _wordSet.getPackedIndex());
+ FakeWord word3(_numDocs, _commonDocFreq, _commonDocFreq / 2,
+ "word3", word1, 10, _rnd,
+ _wordSet.getFieldsParams(), _wordSet.getPackedIndex());
+ FakeWord word4(_numDocs, w4dfreq, w4dfreq / 2,
+ "word4", _rnd,
+ _wordSet.getFieldsParams(), _wordSet.getPackedIndex());
+ FakeWord word5(_numDocs, w5dfreq, w5dfreq / 2,
+ "word5", word4, w4w5od, _rnd,
+ _wordSet.getFieldsParams(), _wordSet.getPackedIndex());
+
+ if (_postingTypes.empty())
+ _postingTypes = getPostingTypes();
+ std::vector<std::string>::const_iterator pti;
+ std::vector<std::string>::const_iterator ptie = _postingTypes.end() ;
+
+ for (pti = _postingTypes.begin(); pti != ptie; ++pti) {
+ testFake(*pti, _wordSet.getSchema(), word1);
+ testFake(*pti, _wordSet.getSchema(), word2);
+ testFake(*pti, _wordSet.getSchema(), word3);
+ }
+
+ for (pti = _postingTypes.begin(); pti != ptie; ++pti) {
+ testFakePair(*pti, _wordSet.getSchema(), false, word1, word3);
+ testFakePair(*pti, _wordSet.getSchema(), false, word2, word3);
+ }
+
+ for (pti = _postingTypes.begin(); pti != ptie; ++pti) {
+ testFakePair(*pti, _wordSet.getSchema(), false, word4, word5);
+ }
+
+ if (doandstress) {
+ _wordSet.setupWords(_rnd, _numDocs, _commonDocFreq, _numWordsPerClass);
+ }
+ if (doandstress) {
+ AndStress andstress;
+ andstress.run(_rnd, _wordSet,
+ _numDocs, _commonDocFreq, _postingTypes, _loops,
+ _skipCommonPairsRate,
+ numTasks,
+ _stride,
+ _unpack);
+ }
+ return 0;
+}
+
+} // namespace postinglistbm
+
+int
+main(int argc, char **argv)
+{
+ postinglistbm::PostingListBM app;
+
+ setvbuf(stdout, NULL, _IOLBF, 32768);
+ app._rnd.srand48(32);
+ return app.Entry(argc, argv);
+
+ return 0;
+}
diff --git a/searchlib/src/tests/postinglistbm/skip.txt b/searchlib/src/tests/postinglistbm/skip.txt
new file mode 100644
index 00000000000..9804bce3c33
--- /dev/null
+++ b/searchlib/src/tests/postinglistbm/skip.txt
@@ -0,0 +1,75 @@
+B tree view:
+
+ Leaf Nodes: segments of docid delta list
+ Interior Nodes: Segments of skip info lists
+
+ Interior Nodes 1 level above leaf nodes: L1 skip info
+ Interior Nodes 2 level above leaf nodes: L2 skip info
+
+Example posting list, with stride 4 for L1 skip and L2 skip:
+
+DocIdPos: 0 1 2 3| 4 5 6 7| 8 9 10 11| 12 13 14 15| 16 17 18
+DocId: 1 11 21 31|41 51 61 71|81 91 101 111|121 131 141 151|161 171 181
+
+(Assume continued with every 10. docid present)
+
+Old L1 skip info, pointing to start of leaf nodes, with first docid in
+leaf node pre-decoded (i.e. containing copy of first docid entry in leaf node):
+
+L1Pos: 0 1 2 3| 4 5 6 7| 8 9 10 11| 12 13 14 15| 16
+DocId: 41 81 121 161|201 241 281 321|361 401 441 481|521 561 601 641|681
+DocIdPos: 5 9 13 17| 21 25 29 33| 37 41 45 49| 53 57 61 65| 69
+
+Old L2 skip info, pointing to start of interior nodes 1 level above leaf nodes
+and containing copies of previous L1 skip entry:
+
+L2Pos: 0 1 2 3
+DocId: 161 321 481 641
+DocIdPos: 17 33 49 65
+L1Pos: 4 8 12 16
+
+Reason for change of skip info view: Avoiding null skips, simplifying code.
+
+Skip from docId 1 to docId 115 first skips to DocId 81 before ending
+up at DocId 121. If next seek is to below 161, a null skip to docid
+121 is performed since docId delta unpacking caught up with supposedly
+next L1 skip docid. With L1 skip stride being N, 1/N of longer seeks
+will unpack N extra docids, eating up the advantage of first docid in
+leaf node being pre-decoded.
+
+If a seek to docId 115 is followed by a seek to docId 121, an unpack
+of docId 121 and a sek to a higher docid, this causes, with the old L1
+skip info, features for docId 81, 91 101, 111 to be decoded with the
+result ignored before the features for docId 121 is decoded. For the
+next seek, the null skip of DocId is also associated with a backwards
+skip for features, so if the next feature to be decoded was for docId
+141 then features for docId 121 will be decoded again and ignored.
+
+New L1 skip info, pointing to start of leaf nodes, without first docid
+in leaf node pre-decoded (i.e. containing copy of last docid entry in
+previous leaf node):
+
+L1Pos: 0 1 2 3| 4 5 6 7| 8 9 10 11| 12 13 14 15| 16
+DocId: 31 71 111 151|191 231 271 311|351 391 431 471|511 551 591 631|671
+DocIdPos: 4 8 12 16| 20 24 28 32| 36 40 44 48| 52 56 60 64| 68
+
+New L2 skip info, pointing to start of interior nodes 1 level above leaf nodes
+and containing copies of previous L1 skip entry:
+
+L2Pos: 0 1 2 3
+DocId: 151 311 471 631
+DocIdPos: 16 32 48 64
+L1Pos: 4 8 12 16
+
+1 DocId delta is unpacked when using L1 or L2 skip, to get first docid
+in leaf node. With old skip info, this wasn't needed.
+
+With new skip info, docid delta unpacking should never catch up with
+next L1 skip docid (can become equal, but that's no longer sufficient
+for triggering a skip).
+
+For each level upwards in skip info, one extra number is needed per element in
+the skip info.
+
+For feature position (split docid/features), one extra number is needed per
+element in the skip info.
diff --git a/searchlib/src/tests/predicate/.gitignore b/searchlib/src/tests/predicate/.gitignore
new file mode 100644
index 00000000000..eea4d347d05
--- /dev/null
+++ b/searchlib/src/tests/predicate/.gitignore
@@ -0,0 +1,13 @@
+searchlib_document_features_store_test_app
+searchlib_predicate_bounds_posting_list_test_app
+searchlib_predicate_index_test_app
+searchlib_predicate_interval_posting_list_test_app
+searchlib_predicate_interval_store_test_app
+searchlib_predicate_range_term_expander_test_app
+searchlib_predicate_ref_cache_test_app
+searchlib_predicate_tree_analyzer_test_app
+searchlib_predicate_tree_annotator_test_app
+searchlib_predicate_zero_constraint_posting_list_test_app
+searchlib_predicate_zstar_compressed_posting_list_test_app
+searchlib_simple_index_test_app
+searchlib_tree_crumbs_test_app
diff --git a/searchlib/src/tests/predicate/CMakeLists.txt b/searchlib/src/tests/predicate/CMakeLists.txt
new file mode 100644
index 00000000000..cd15356eeee
--- /dev/null
+++ b/searchlib/src/tests/predicate/CMakeLists.txt
@@ -0,0 +1,92 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_predicate_index_test_app
+ SOURCES
+ predicate_index_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_predicate_index_test_app COMMAND searchlib_predicate_index_test_app)
+vespa_add_executable(searchlib_simple_index_test_app
+ SOURCES
+ simple_index_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_simple_index_test_app COMMAND searchlib_simple_index_test_app)
+vespa_add_executable(searchlib_tree_crumbs_test_app
+ SOURCES
+ tree_crumbs_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_tree_crumbs_test_app COMMAND searchlib_tree_crumbs_test_app)
+vespa_add_executable(searchlib_predicate_tree_analyzer_test_app
+ SOURCES
+ predicate_tree_analyzer_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_predicate_tree_analyzer_test_app COMMAND searchlib_predicate_tree_analyzer_test_app)
+vespa_add_executable(searchlib_predicate_tree_annotator_test_app
+ SOURCES
+ predicate_tree_annotator_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_predicate_tree_annotator_test_app COMMAND searchlib_predicate_tree_annotator_test_app)
+vespa_add_executable(searchlib_predicate_interval_store_test_app
+ SOURCES
+ predicate_interval_store_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_predicate_interval_store_test_app COMMAND searchlib_predicate_interval_store_test_app)
+vespa_add_executable(searchlib_document_features_store_test_app
+ SOURCES
+ document_features_store_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_document_features_store_test_app COMMAND searchlib_document_features_store_test_app)
+vespa_add_executable(searchlib_predicate_ref_cache_test_app
+ SOURCES
+ predicate_ref_cache_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_predicate_ref_cache_test_app COMMAND searchlib_predicate_ref_cache_test_app)
+vespa_add_executable(searchlib_predicate_interval_posting_list_test_app
+ SOURCES
+ predicate_interval_posting_list_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_predicate_interval_posting_list_test_app COMMAND searchlib_predicate_interval_posting_list_test_app)
+vespa_add_executable(searchlib_predicate_bounds_posting_list_test_app
+ SOURCES
+ predicate_bounds_posting_list_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_predicate_bounds_posting_list_test_app COMMAND searchlib_predicate_bounds_posting_list_test_app)
+vespa_add_executable(searchlib_predicate_zero_constraint_posting_list_test_app
+ SOURCES
+ predicate_zero_constraint_posting_list_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_predicate_zero_constraint_posting_list_test_app COMMAND searchlib_predicate_zero_constraint_posting_list_test_app)
+vespa_add_executable(searchlib_predicate_zstar_compressed_posting_list_test_app
+ SOURCES
+ predicate_zstar_compressed_posting_list_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_predicate_zstar_compressed_posting_list_test_app COMMAND searchlib_predicate_zstar_compressed_posting_list_test_app)
+vespa_add_executable(searchlib_predicate_range_term_expander_test_app
+ SOURCES
+ predicate_range_term_expander_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_predicate_range_term_expander_test_app COMMAND searchlib_predicate_range_term_expander_test_app)
diff --git a/searchlib/src/tests/predicate/OWNERS b/searchlib/src/tests/predicate/OWNERS
new file mode 100644
index 00000000000..569bf1cc3a1
--- /dev/null
+++ b/searchlib/src/tests/predicate/OWNERS
@@ -0,0 +1 @@
+bjorncs
diff --git a/searchlib/src/tests/predicate/document_features_store_test.cpp b/searchlib/src/tests/predicate/document_features_store_test.cpp
new file mode 100644
index 00000000000..4baf2d03fbe
--- /dev/null
+++ b/searchlib/src/tests/predicate/document_features_store_test.cpp
@@ -0,0 +1,225 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for document_features_store.
+
+#include <vespa/log/log.h>
+LOG_SETUP("document_features_store_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/predicate/document_features_store.h>
+#include <vespa/searchlib/predicate/predicate_index.h>
+#include <vespa/searchlib/predicate/predicate_tree_annotator.h>
+#include <vespa/searchlib/predicate/predicate_hash.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <string>
+
+using namespace search;
+using namespace search::predicate;
+using std::string;
+
+namespace {
+
+const uint64_t hash1 = 0x12345678;
+const uint64_t hash2 = 0x123456789a;
+const uint32_t doc_id = 42;
+
+TEST("require that DocumentFeaturesStore can store features.") {
+ DocumentFeaturesStore features_store(10);
+ PredicateTreeAnnotations annotations;
+ annotations.features.push_back(hash1);
+ annotations.features.push_back(hash2);
+ features_store.insert(annotations, doc_id);
+
+ auto features = features_store.get(doc_id);
+ ASSERT_EQUAL(2u, features.size());
+ EXPECT_EQUAL(1u, features.count(hash1));
+ EXPECT_EQUAL(1u, features.count(hash2));
+
+ features_store.remove(doc_id);
+ features = features_store.get(doc_id);
+ EXPECT_TRUE(features.empty());
+}
+
+template <typename Set>
+void expectHash(const string &label, const Set &set) {
+ TEST_STATE(label.c_str());
+ uint64_t hash = PredicateHash::hash64(label);
+ EXPECT_EQUAL(1u, set.count(hash));
+}
+
+TEST("require that DocumentFeaturesStore can store ranges.") {
+ DocumentFeaturesStore features_store(10);
+ PredicateTreeAnnotations annotations;
+ annotations.range_features.push_back({"foo", 2, 4});
+ annotations.range_features.push_back({"bar", 7, 13});
+ annotations.range_features.push_back({"baz", 9, 19});
+ annotations.range_features.push_back({"qux", -10, 10});
+ annotations.range_features.push_back({"quux", -39, -10});
+ annotations.range_features.push_back({"corge", -9, -1});
+ features_store.insert(annotations, doc_id);
+
+ auto features = features_store.get(doc_id);
+ ASSERT_EQUAL(13u, features.size());
+ expectHash("foo=0", features);
+
+ expectHash("bar=0", features);
+ expectHash("bar=10", features);
+
+ expectHash("baz=0", features);
+ expectHash("baz=10-19", features);
+
+ expectHash("qux=-10", features);
+ expectHash("qux=-9-0", features);
+ expectHash("qux=10", features);
+ expectHash("qux=0-9", features);
+
+ expectHash("quux=-19-10", features);
+ expectHash("quux=-29-20", features);
+ expectHash("quux=-39-30", features);
+
+ expectHash("corge=-9-0", features);
+}
+
+TEST("require that DocumentFeaturesStore can store large ranges.") {
+ DocumentFeaturesStore features_store(10);
+ PredicateTreeAnnotations annotations;
+ annotations.range_features.push_back({"foo", 10, 199});
+ annotations.range_features.push_back({"bar", 100, 239});
+ annotations.range_features.push_back({"baz", -999, 999});
+ features_store.insert(annotations, doc_id);
+
+ auto features = features_store.get(doc_id);
+ ASSERT_EQUAL(17u, features.size());
+ expectHash("foo=10-19", features);
+ expectHash("foo=20-29", features);
+ expectHash("foo=30-39", features);
+ expectHash("foo=40-49", features);
+ expectHash("foo=50-59", features);
+ expectHash("foo=60-69", features);
+ expectHash("foo=70-79", features);
+ expectHash("foo=80-89", features);
+ expectHash("foo=90-99", features);
+ expectHash("foo=100-199", features);
+
+ expectHash("bar=200-209", features);
+ expectHash("bar=210-219", features);
+ expectHash("bar=220-229", features);
+ expectHash("bar=230-239", features);
+ expectHash("bar=100-199", features);
+
+ expectHash("baz=-999-0", features);
+ expectHash("baz=0-999", features);
+}
+
+TEST("require that DocumentFeaturesStore can use very large ranges.") {
+ DocumentFeaturesStore features_store(2);
+ PredicateTreeAnnotations annotations;
+ annotations.range_features.push_back({"foo", LLONG_MIN, 39});
+ features_store.insert(annotations, doc_id);
+
+ auto features = features_store.get(doc_id);
+ ASSERT_EQUAL(4u, features.size());
+ expectHash("foo=-9223372036854775808", features);
+ expectHash("foo=-9223372036854775807-0", features);
+ expectHash("foo=0-31", features);
+ expectHash("foo=32-39", features);
+}
+
+TEST("require that duplicate range features are removed.") {
+ DocumentFeaturesStore features_store(10);
+ PredicateTreeAnnotations annotations;
+ annotations.range_features.push_back({"foo", 80, 199});
+ annotations.range_features.push_back({"foo", 85, 199});
+ annotations.range_features.push_back({"foo", 90, 199});
+ features_store.insert(annotations, doc_id);
+
+ auto features = features_store.get(doc_id);
+ ASSERT_EQUAL(4u, features.size());
+ expectHash("foo=80-89", features);
+ expectHash("foo=90-99", features);
+ expectHash("foo=100-199", features);
+ expectHash("foo=80", features);
+}
+
+TEST("require that only unique features are returned") {
+ DocumentFeaturesStore features_store(10);
+ PredicateTreeAnnotations annotations;
+ annotations.range_features.push_back({"foo", 100, 199});
+ annotations.features.push_back(PredicateHash::hash64("foo=100-199"));
+ features_store.insert(annotations, doc_id);
+
+ auto features = features_store.get(doc_id);
+ ASSERT_EQUAL(1u, features.size());
+ expectHash("foo=100-199", features);
+}
+
+TEST("require that both features and ranges are removed by 'remove'") {
+ DocumentFeaturesStore features_store(10);
+ PredicateTreeAnnotations annotations;
+ annotations.range_features.push_back({"foo", 100, 199});
+ annotations.features.push_back(PredicateHash::hash64("foo=100-199"));
+ features_store.insert(annotations, doc_id);
+ features_store.remove(doc_id);
+
+ auto features = features_store.get(doc_id);
+ ASSERT_EQUAL(0u, features.size());
+}
+
+TEST("require that both features and ranges counts towards memory usage") {
+ DocumentFeaturesStore features_store(10);
+ EXPECT_EQUAL(332u, features_store.getMemoryUsage().usedBytes());
+
+ PredicateTreeAnnotations annotations;
+ annotations.features.push_back(PredicateHash::hash64("foo=100-199"));
+ features_store.insert(annotations, doc_id);
+ EXPECT_EQUAL(340u, features_store.getMemoryUsage().usedBytes());
+
+ annotations.features.clear();
+ annotations.range_features.push_back({"foo", 100, 199});
+ features_store.insert(annotations, doc_id + 1);
+ EXPECT_EQUAL(436u, features_store.getMemoryUsage().usedBytes());
+}
+
+TEST("require that DocumentFeaturesStore can be serialized") {
+ DocumentFeaturesStore features_store(10);
+ PredicateTreeAnnotations annotations;
+ annotations.range_features.push_back({"foo", 100, 199});
+ annotations.features.push_back(PredicateHash::hash64("foo=bar"));
+ features_store.insert(annotations, doc_id);
+
+ auto features = features_store.get(doc_id);
+ ASSERT_EQUAL(2u, features.size());
+ expectHash("foo=bar", features);
+ expectHash("foo=100-199", features);
+
+ vespalib::MMapDataBuffer buffer;
+ features_store.serialize(buffer);
+
+ DocumentFeaturesStore features_store2(buffer);
+ features = features_store2.get(doc_id);
+ ASSERT_EQUAL(2u, features.size());
+ expectHash("foo=bar", features);
+ expectHash("foo=100-199", features);
+}
+
+TEST("require that serialization cleans up wordstore") {
+ DocumentFeaturesStore features_store(10);
+ PredicateTreeAnnotations annotations;
+ annotations.range_features.push_back({"foo", 100, 199});
+ features_store.insert(annotations, doc_id);
+ EXPECT_EQUAL(428u, features_store.getMemoryUsage().usedBytes());
+ annotations.range_features.push_back({"bar", 100, 199});
+ features_store.insert(annotations, doc_id + 1);
+ EXPECT_EQUAL(720u, features_store.getMemoryUsage().usedBytes());
+ features_store.remove(doc_id + 1);
+ EXPECT_EQUAL(672u, features_store.getMemoryUsage().usedBytes());
+
+ vespalib::MMapDataBuffer buffer;
+ features_store.serialize(buffer);
+ DocumentFeaturesStore features_store2(buffer);
+ EXPECT_EQUAL(428u, features_store2.getMemoryUsage().usedBytes());
+}
+
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/predicate/predicate_bounds_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_bounds_posting_list_test.cpp
new file mode 100644
index 00000000000..c54e6f49cc7
--- /dev/null
+++ b/searchlib/src/tests/predicate/predicate_bounds_posting_list_test.cpp
@@ -0,0 +1,107 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for predicate_bounds_posting_list.
+
+#include <vespa/log/log.h>
+LOG_SETUP("predicate_bounds_posting_list_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/predicate/predicate_tree_annotator.h>
+#include <vespa/searchlib/predicate/predicate_bounds_posting_list.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using namespace search;
+using namespace search::predicate;
+
+namespace {
+
+struct DummyDocIdLimitProvider : public DocIdLimitProvider {
+ virtual uint32_t getDocIdLimit() const { return 10000; }
+ virtual uint32_t getCommittedDocIdLimit() const { return 10000; }
+};
+
+vespalib::GenerationHandler generation_handler;
+vespalib::GenerationHolder generation_holder;
+DummyDocIdLimitProvider limit_provider;
+SimpleIndexConfig config;
+const uint64_t hash = 0x123;
+
+TEST("require that empty bounds posting list starts at 0.") {
+ PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8);
+ btree::EntryRef ref;
+ PredicateBoundsPostingList<PredicateIndex::BTreeIterator>
+ posting_list(index.getIntervalStore(),
+ index.getBoundsIndex().getBTreePostingList(ref), 42);
+ EXPECT_EQUAL(0u, posting_list.getDocId());
+ EXPECT_EQUAL(0u, posting_list.getInterval());
+ EXPECT_FALSE(posting_list.next(0));
+}
+
+void checkNext(PredicateBoundsPostingList<PredicateIndex::BTreeIterator> &posting_list, uint32_t move_past,
+ uint32_t doc_id, uint32_t interval_count) {
+ std::ostringstream ost;
+ ost << "checkNext(posting_list, " << move_past << ", " << doc_id
+ << ", " << interval_count << ")";
+ TEST_STATE(ost.str().c_str());
+ ASSERT_TRUE(posting_list.next(move_past));
+ EXPECT_EQUAL(doc_id, posting_list.getDocId());
+ for (uint32_t i = 0; i < interval_count - 1; ++i) {
+ ASSERT_TRUE(posting_list.nextInterval());
+ }
+ ASSERT_FALSE(posting_list.nextInterval());
+}
+
+TEST("require that bounds posting list checks bounds.") {
+ PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8);
+ const auto &bounds_index = index.getBoundsIndex();
+ for (uint32_t id = 1; id < 100; ++id) {
+ PredicateTreeAnnotations annotations(id);
+ auto &vec = annotations.bounds_map[hash];
+ for (uint32_t i = 0; i <= id; ++i) {
+ uint32_t bounds;
+ if (id < 30) {
+ bounds = 0x80000000 | i; // diff >= i
+ } else if (id < 60) {
+ bounds = 0x40000000 | i; // diff < i
+ } else {
+ bounds = (i << 16) | (i + 10); // i < diff < i+10
+ }
+ vec.push_back(IntervalWithBounds{(i + 1) << 16 | 0xffff, bounds});
+ }
+ index.indexDocument(id, annotations);
+ }
+ index.commit();
+ auto it = bounds_index.lookup(hash);
+ ASSERT_TRUE(it.valid());
+ auto ref = it.getData();
+
+ PredicateBoundsPostingList<PredicateIndex::BTreeIterator>
+ posting_list(index.getIntervalStore(),
+ bounds_index.getBTreePostingList(ref), 5);
+ checkNext(posting_list, 0, 1, 2); // [0..] -> [1..]
+ checkNext(posting_list, 1, 2, 3); // [0..] -> [2..]
+ checkNext(posting_list, 10, 11, 6); // [0..] -> [5..]
+ checkNext(posting_list, 20, 21, 6);
+
+ checkNext(posting_list, 30, 31, 26); // [..5] -> [..30]
+ checkNext(posting_list, 50, 51, 46);
+
+ checkNext(posting_list, 60, 61, 6); // [0..10] -> [5..15]
+
+
+ PredicateBoundsPostingList<PredicateIndex::BTreeIterator>
+ posting_list2(index.getIntervalStore(),
+ bounds_index.getBTreePostingList(ref), 40);
+ checkNext(posting_list2, 0, 1, 2);
+ checkNext(posting_list2, 1, 2, 3);
+ checkNext(posting_list2, 20, 21, 22); // [0..] -> [21..]
+
+ checkNext(posting_list2, 30, 41, 1); // skip ahead to match
+ checkNext(posting_list2, 35, 41, 1);
+ checkNext(posting_list2, 50, 51, 11); // [..40] -> [..50]
+
+ checkNext(posting_list2, 60, 61, 10); // [31..40] -> [40..49]
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/predicate/predicate_index_test.cpp b/searchlib/src/tests/predicate/predicate_index_test.cpp
new file mode 100644
index 00000000000..b22c80294d0
--- /dev/null
+++ b/searchlib/src/tests/predicate/predicate_index_test.cpp
@@ -0,0 +1,363 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for predicate_index.
+
+#include <vespa/log/log.h>
+LOG_SETUP("predicate_index_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/predicate/predicate_index.h>
+#include <vespa/searchlib/predicate/predicate_tree_annotator.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/attribute/predicate_attribute.h>
+
+using namespace search;
+using namespace search::predicate;
+using std::make_pair;
+using std::pair;
+using std::vector;
+
+namespace {
+
+struct DummyDocIdLimitProvider : public DocIdLimitProvider {
+ virtual uint32_t getDocIdLimit() const { return 10000; }
+ virtual uint32_t getCommittedDocIdLimit() const { return 10000; }
+};
+
+vespalib::GenerationHandler generation_handler;
+vespalib::GenerationHolder generation_holder;
+DummyDocIdLimitProvider dummy_provider;
+SimpleIndexConfig simple_index_config;
+
+TEST("require that PredicateIndex can index empty documents") {
+ PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10);
+ EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size());
+ index.indexEmptyDocument(2);
+ index.commit();
+ EXPECT_EQUAL(1u, index.getZeroConstraintDocs().size());
+}
+
+TEST("require that indexDocument don't index empty documents") {
+ PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10);
+ EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size());
+ PredicateTreeAnnotations annotations;
+ index.indexDocument(3, annotations);
+ index.commit();
+ EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size());
+}
+
+TEST("require that PredicateIndex can remove empty documents") {
+ PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10);
+ EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size());
+ index.indexEmptyDocument(2);
+ index.commit();
+ EXPECT_EQUAL(1u, index.getZeroConstraintDocs().size());
+ index.removeDocument(2);
+ index.commit();
+ EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size());
+}
+
+TEST("require that indexing the same empty document multiple times is ok") {
+ PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10);
+ EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size());
+ index.indexEmptyDocument(2);
+ index.commit();
+ EXPECT_EQUAL(1u, index.getZeroConstraintDocs().size());
+ index.indexEmptyDocument(2);
+ index.commit();
+ EXPECT_EQUAL(1u, index.getZeroConstraintDocs().size());
+}
+
+void indexFeature(PredicateIndex &attr, uint32_t doc_id, int min_feature,
+ const vector<pair<uint64_t, Interval>> &intervals,
+ const vector<pair<uint64_t, IntervalWithBounds>> &bounds) {
+ PredicateTreeAnnotations annotations(min_feature);
+ for (auto &p : intervals) {
+ annotations.interval_map[p.first] = std::vector<Interval>{{p.second}};
+ annotations.features.push_back(p.first);
+ }
+ for (auto &p : bounds) {
+ annotations.bounds_map[p.first] =
+ std::vector<IntervalWithBounds>{{p.second}};
+ annotations.features.push_back(p.first);
+ }
+ attr.indexDocument(doc_id, annotations);
+}
+
+PredicateIndex::BTreeIterator
+lookupPosting(const PredicateIndex &index, uint64_t hash) {
+ const auto &interval_index = index.getIntervalIndex();
+ auto it = interval_index.lookup(hash);
+ ASSERT_TRUE(it.valid());
+ auto entry = it.getData();
+ EXPECT_TRUE(entry.valid());
+
+ auto posting_it = interval_index.getBTreePostingList(entry);
+ ASSERT_TRUE(posting_it.valid());
+ return posting_it;
+}
+
+const int min_feature = 3;
+const int k = min_feature - 1;
+const uint32_t doc_id = 2;
+const uint64_t hash = 0x12345;
+const uint64_t hash2 = 0x3456;
+const Interval interval = {0x0001ffff};
+const IntervalWithBounds bounds = {0x0001ffff, 0x03};
+Interval single_buf;
+
+TEST("require that PredicateIndex can index document") {
+ PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10);
+ EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid());
+ indexFeature(index, doc_id, min_feature, {{hash, interval}}, {});
+ index.commit();
+
+ auto posting_it = lookupPosting(index, hash);
+ EXPECT_EQUAL(doc_id, posting_it.getKey());
+ uint32_t size;
+ const auto &interval_list =
+ index.getIntervalStore().get(posting_it.getData(), size, &single_buf);
+ ASSERT_EQUAL(1u, size);
+ EXPECT_EQUAL(interval, interval_list[0]);
+}
+
+TEST("require that PredicateIndex can index document with bounds") {
+ PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10);
+ EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid());
+ indexFeature(index, doc_id, min_feature, {}, {{hash, bounds}});
+ index.commit();
+
+ const auto &bounds_index = index.getBoundsIndex();
+ auto it = bounds_index.lookup(hash);
+ ASSERT_TRUE(it.valid());
+ auto entry = it.getData();
+ EXPECT_TRUE(entry.valid());
+
+ auto posting_it = bounds_index.getBTreePostingList(entry);
+ ASSERT_TRUE(posting_it.valid());
+ EXPECT_EQUAL(doc_id, posting_it.getKey());
+
+ uint32_t size;
+ IntervalWithBounds single;
+ const auto &interval_list =
+ index.getIntervalStore().get(posting_it.getData(), size, &single);
+ ASSERT_EQUAL(1u, size);
+ EXPECT_EQUAL(bounds, interval_list[0]);
+}
+
+TEST("require that PredicateIndex can index multiple documents "
+ "with the same feature") {
+ PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10);
+ EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid());
+ for (uint32_t id = 1; id < 100; ++id) {
+ indexFeature(index, id, min_feature, {{hash, interval}}, {});
+ }
+ index.commit();
+
+ auto posting_it = lookupPosting(index, hash);
+ for (uint32_t id = 1; id < 100; ++id) {
+ ASSERT_TRUE(posting_it.valid());
+ EXPECT_EQUAL(id, posting_it.getKey());
+ uint32_t size;
+ const auto &interval_list = index.getIntervalStore().get(
+ posting_it.getData(), size, &single_buf);
+ ASSERT_EQUAL(1u, size);
+ EXPECT_EQUAL(interval, interval_list[0]);
+ ++posting_it;
+ }
+ ASSERT_FALSE(posting_it.valid());
+}
+
+TEST("require that PredicateIndex can remove indexed documents") {
+ PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10);
+ EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid());
+ indexFeature(index, doc_id, min_feature,
+ {{hash, interval}}, {{hash2, bounds}});
+ index.removeDocument(doc_id);
+ index.commit();
+ auto it = index.getIntervalIndex().lookup(hash);
+ ASSERT_FALSE(it.valid());
+ auto it2 = index.getBoundsIndex().lookup(hash2);
+ ASSERT_FALSE(it2.valid());
+
+ // Remove again. Nothing should happen.
+ index.removeDocument(doc_id);
+}
+
+TEST("require that PredicateIndex can remove multiple documents") {
+ PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10);
+ const auto &interval_index = index.getIntervalIndex();
+ EXPECT_FALSE(interval_index.lookup(hash).valid());
+ for (uint32_t id = 1; id < 100; ++id) {
+ indexFeature(index, id, min_feature, {{hash, interval}}, {});
+ }
+ index.commit();
+ for (uint32_t id = 1; id < 110; ++id) {
+ index.removeDocument(id);
+ index.commit();
+ auto it = interval_index.lookup(hash);
+ if (id < 99) {
+ ASSERT_TRUE(it.valid());
+ } else {
+ ASSERT_FALSE(it.valid());
+ }
+ }
+}
+
+TEST("require that PredicateIndex can remove multiple documents with "
+ "multiple features") {
+ vector<pair<uint64_t, Interval>> intervals;
+ vector<pair<uint64_t, IntervalWithBounds>> bounds_intervals;
+ for (int i = 0; i < 100; ++i) {
+ intervals.push_back(make_pair(hash + i, interval));
+ bounds_intervals.push_back(make_pair(hash2 + i, bounds));
+ }
+ PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10);
+ const auto &interval_index = index.getIntervalIndex();
+ EXPECT_FALSE(interval_index.lookup(hash).valid());
+ for (uint32_t id = 1; id < 100; ++id) {
+ indexFeature(index, id, id, intervals, bounds_intervals);
+ }
+ index.commit();
+ for (uint32_t id = 1; id < 100; ++id) {
+ index.removeDocument((id + 50) % 99 + 1);
+ index.commit();
+ auto it = interval_index.lookup(hash);
+ if (id < 99) {
+ ASSERT_TRUE(it.valid());
+ } else {
+ ASSERT_FALSE(it.valid());
+ }
+ }
+}
+
+// Helper function for next test.
+template <typename Iterator, typename IntervalT>
+void checkAllIntervals(Iterator posting_it, IntervalT expected_interval,
+ const PredicateIntervalStore &interval_store) {
+ for (uint32_t id = 1; id < 100u; ++id) {
+ ASSERT_TRUE(posting_it.valid());
+ EXPECT_EQUAL(id, posting_it.getKey());
+ btree::EntryRef ref = posting_it.getData();
+ ASSERT_TRUE(ref.valid());
+ uint32_t size;
+ IntervalT single;
+ const IntervalT *read_interval =
+ interval_store.get(ref, size, &single);
+ EXPECT_EQUAL(1u, size);
+ EXPECT_EQUAL(expected_interval, read_interval[0]);
+ ++posting_it;
+ }
+}
+
+namespace {
+struct DocIdLimitFinder : SimpleIndexDeserializeObserver<> {
+ uint32_t &_doc_id_limit;
+ DocIdLimitFinder(uint32_t &doc_id_limit) : _doc_id_limit(doc_id_limit)
+ {
+ doc_id_limit = 0u;
+ }
+ void notifyInsert(uint64_t, uint32_t doc_id, uint32_t) {
+ _doc_id_limit = std::max(_doc_id_limit, doc_id);
+ }
+};
+}
+
+TEST("require that PredicateIndex can be (de)serialized") {
+ vector<pair<uint64_t, Interval>> intervals;
+ vector<pair<uint64_t, IntervalWithBounds>> bounds_intervals;
+ for (int i = 0; i < 100; ++i) {
+ intervals.push_back(make_pair(hash + i, interval));
+ bounds_intervals.push_back(make_pair(hash2 + i, bounds));
+ }
+ PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 8);
+ EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid());
+ for (uint32_t id = 1; id < 100; ++id) {
+ indexFeature(index, id, id, intervals, bounds_intervals);
+ index.indexEmptyDocument(id + 100);
+ }
+ index.commit();
+
+ vespalib::MMapDataBuffer buffer;
+ index.serialize(buffer);
+ uint32_t doc_id_limit;
+ DocIdLimitFinder finder(doc_id_limit);
+ PredicateIndex index2(generation_handler, generation_holder, dummy_provider, simple_index_config,
+ buffer, finder, PredicateAttribute::PREDICATE_ATTRIBUTE_VERSION);
+ const PredicateIntervalStore &interval_store = index2.getIntervalStore();
+ EXPECT_EQUAL(199u, doc_id_limit);
+
+ EXPECT_EQUAL(index.getArity(), index2.getArity());
+ EXPECT_EQUAL(index.getZeroConstraintDocs().size(),
+ index2.getZeroConstraintDocs().size());
+ {
+ auto it = index2.getZeroConstraintDocs().begin();
+ for (uint32_t i = 1; i < 100u; ++i) {
+ TEST_STATE(vespalib::make_string("%d", i).c_str());
+ ASSERT_TRUE(it.valid());
+ EXPECT_EQUAL(i + 100, it.getKey());
+ ++it;
+ }
+ EXPECT_FALSE(it.valid());
+ }
+
+ const auto &interval_index = index2.getIntervalIndex();
+ const auto &bounds_index = index2.getBoundsIndex();
+ for (int i = 0; i < 100; ++i) {
+ {
+ auto it = interval_index.lookup(hash + i);
+ ASSERT_TRUE(it.valid());
+ auto posting_it = interval_index.getBTreePostingList(it.getData());
+ checkAllIntervals(posting_it, interval, interval_store);
+ }
+ {
+ auto it = bounds_index.lookup(hash2 + i);
+ ASSERT_TRUE(it.valid());
+ auto posting_it = bounds_index.getBTreePostingList(it.getData());
+ checkAllIntervals(posting_it, bounds, interval_store);
+ }
+ }
+}
+
+TEST("require that DocumentFeaturesStore is restored on deserialization") {
+ PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10);
+ EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid());
+ indexFeature(index, doc_id, min_feature,
+ {{hash, interval}}, {{hash2, bounds}});
+ vespalib::MMapDataBuffer buffer;
+ index.serialize(buffer);
+ uint32_t doc_id_limit;
+ DocIdLimitFinder finder(doc_id_limit);
+ PredicateIndex index2(generation_handler, generation_holder, dummy_provider, simple_index_config,
+ buffer, finder, PredicateAttribute::PREDICATE_ATTRIBUTE_VERSION);
+ const auto &interval_index = index2.getIntervalIndex();
+ const auto &bounds_index = index2.getBoundsIndex();
+ EXPECT_EQUAL(doc_id, doc_id_limit);
+
+ auto it = interval_index.lookup(hash);
+ EXPECT_TRUE(it.valid());
+ auto it2 = bounds_index.lookup(hash2);
+ EXPECT_TRUE(it2.valid());
+
+ index2.removeDocument(doc_id);
+ index2.commit();
+
+ it = interval_index.lookup(hash);
+ EXPECT_FALSE(it.valid());
+ it2 = bounds_index.lookup(hash2);
+ EXPECT_FALSE(it2.valid());
+}
+
+TEST("require that hold lists are attempted emptied on destruction") {
+ PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10);
+ indexFeature(index, doc_id, min_feature,
+ {{hash, interval}}, {{hash2, bounds}});
+ {
+ auto guard = generation_handler.takeGuard();
+ index.removeDocument(doc_id);
+ index.commit();
+ }
+ // No assert on index destruction.
+}
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/predicate/predicate_interval_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_interval_posting_list_test.cpp
new file mode 100644
index 00000000000..1c44c096717
--- /dev/null
+++ b/searchlib/src/tests/predicate/predicate_interval_posting_list_test.cpp
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for predicate_interval_posting_list.
+
+#include <vespa/log/log.h>
+LOG_SETUP("predicate_interval_posting_list_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/predicate/predicate_tree_annotator.h>
+#include <vespa/searchlib/predicate/predicate_interval_posting_list.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using namespace search;
+using namespace search::predicate;
+namespace {
+
+struct DummyDocIdLimitProvider : public DocIdLimitProvider {
+ virtual uint32_t getDocIdLimit() const { return 10000; }
+ virtual uint32_t getCommittedDocIdLimit() const { return 10000; }
+};
+
+vespalib::GenerationHandler generation_handler;
+vespalib::GenerationHolder generation_holder;
+DummyDocIdLimitProvider limit_provider;
+SimpleIndexConfig config;
+const uint64_t hash = 0x123;
+
+TEST("require that empty posting list starts at 0.") {
+ PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8);
+ btree::EntryRef ref;
+ PredicateIntervalPostingList<PredicateIndex::BTreeIterator>
+ posting_list(index.getIntervalStore(), index.getIntervalIndex().getBTreePostingList(ref));
+ EXPECT_EQUAL(0u, posting_list.getDocId());
+ EXPECT_EQUAL(0u, posting_list.getInterval());
+ EXPECT_FALSE(posting_list.next(0));
+}
+
+TEST("require that posting list can iterate.") {
+ PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8);
+ const auto &interval_index = index.getIntervalIndex();
+ for (uint32_t id = 1; id < 100; ++id) {
+ PredicateTreeAnnotations annotations(id);
+ auto &vec = annotations.interval_map[hash];
+ for (uint32_t i = 0; i < id; ++i) {
+ vec.push_back(Interval{(i + 1) << 16 | 0xffff});
+ }
+ index.indexDocument(id, annotations);
+ }
+ index.commit();
+ auto it = interval_index.lookup(hash);
+ ASSERT_TRUE(it.valid());
+ auto ref = it.getData();
+
+ PredicateIntervalPostingList<PredicateIndex::BTreeIterator>
+ posting_list(index.getIntervalStore(), interval_index.getBTreePostingList(ref));
+ EXPECT_EQUAL(0u, posting_list.getDocId());
+ EXPECT_EQUAL(0u, posting_list.getInterval());
+ EXPECT_TRUE(posting_list.next(0));
+ EXPECT_EQUAL(1u, posting_list.getDocId());
+ EXPECT_EQUAL(0x0001ffffu, posting_list.getInterval());
+ ASSERT_FALSE(posting_list.nextInterval());
+ ASSERT_TRUE(posting_list.next(1));
+ EXPECT_EQUAL(2u, posting_list.getDocId());
+ EXPECT_EQUAL(0x0001ffffu, posting_list.getInterval());
+ ASSERT_TRUE(posting_list.nextInterval());
+ EXPECT_EQUAL(0x0002ffffu, posting_list.getInterval());
+ ASSERT_FALSE(posting_list.nextInterval());
+
+ ASSERT_TRUE(posting_list.next(50));
+ EXPECT_EQUAL(51u, posting_list.getDocId());
+ for (uint32_t i = 0; i < 50; ++i) {
+ EXPECT_EQUAL((i + 1) << 16 | 0xffff, posting_list.getInterval());
+ ASSERT_TRUE(posting_list.nextInterval());
+ }
+ EXPECT_EQUAL(0x0033ffffu, posting_list.getInterval());
+ ASSERT_FALSE(posting_list.nextInterval());
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/predicate/predicate_interval_store_test.cpp b/searchlib/src/tests/predicate/predicate_interval_store_test.cpp
new file mode 100644
index 00000000000..bfe6340e222
--- /dev/null
+++ b/searchlib/src/tests/predicate/predicate_interval_store_test.cpp
@@ -0,0 +1,152 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for predicate_interval_store.
+
+#include <vespa/log/log.h>
+LOG_SETUP("predicate_interval_store_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/predicate/predicate_interval_store.h>
+
+#include <vespa/searchlib/predicate/predicate_index.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vector>
+
+using namespace search;
+using namespace search::predicate;
+using std::vector;
+
+namespace {
+
+TEST("require that empty interval list gives invalid ref") {
+ PredicateIntervalStore store;
+ vector<Interval> interval_list;
+ auto ref = store.insert(interval_list);
+ ASSERT_FALSE(ref.valid());
+}
+
+Interval single_buf;
+
+template <typename IntervalT>
+void testInsertAndRetrieve(const std::vector<IntervalT> &interval_list) {
+ std::ostringstream ost;
+ ost << "Type name: " << typeid(IntervalT).name() << ", intervals:";
+ for (auto &i : interval_list) {
+ ost << " 0x" << std::hex << i.interval;
+ }
+ TEST_STATE(ost.str().c_str());
+ PredicateIntervalStore store;
+ auto ref = store.insert(interval_list);
+ ASSERT_TRUE(ref.valid());
+
+ uint32_t size;
+ IntervalT single;
+ const IntervalT *intervals = store.get(ref, size, &single);
+ EXPECT_EQUAL(interval_list.size(), size);
+ ASSERT_TRUE(intervals);
+ for (size_t i = 0; i < interval_list.size(); ++i) {
+ EXPECT_EQUAL(interval_list[i], intervals[i]);
+ }
+}
+
+TEST("require that single interval entry can be inserted") {
+ testInsertAndRetrieve<Interval>({{0x0001ffff}});
+ testInsertAndRetrieve<IntervalWithBounds>({{0x0001ffff, 0x3}});
+}
+
+TEST("require that multi-interval entry can be inserted") {
+ testInsertAndRetrieve<Interval>({{0x00010001}, {0x0002ffff}});
+ testInsertAndRetrieve<Interval>(
+ {{0x00010001}, {0x00020002}, {0x0003ffff}});
+ testInsertAndRetrieve<Interval>(
+ {{0x00010001}, {0x00020002}, {0x00030003}, {0x00040004},
+ {0x00050005}, {0x00060006}, {0x00070007}, {0x00080008},
+ {0x0009ffff}});
+ testInsertAndRetrieve<IntervalWithBounds>(
+ {{0x00010001, 0x4}, {0x0002ffff, 0x10}});
+ testInsertAndRetrieve<IntervalWithBounds>(
+ {{0x00010001, 0x4}, {0x00020002, 0x10}, {0x00030003, 0x20},
+ {0x00040004, 0x6}, {0x0005ffff, 0x7}});
+}
+
+TEST("require that multiple multi-interval entries can be retrieved") {
+ PredicateIntervalStore store;
+ auto ref = store.insert<Interval>({{1}, {2}});
+ ASSERT_TRUE(ref.valid());
+ ref = store.insert<Interval>({{3}, {4}});
+ ASSERT_TRUE(ref.valid());
+
+ uint32_t size;
+ const Interval *intervals = store.get(ref, size, &single_buf);
+ EXPECT_EQUAL(2u, size);
+ ASSERT_TRUE(intervals);
+ EXPECT_EQUAL(3u, intervals[0].interval);
+ EXPECT_EQUAL(4u, intervals[1].interval);
+}
+
+/*
+TEST("require that entries can be removed and reused") {
+ GenerationHandler gen_handler;
+ PredicateIntervalStore store(gen_handler);
+ auto ref = store.insert<IntervalWithBounds>({{0x0001ffff, 5}});
+ ASSERT_TRUE(ref.valid());
+ store.remove(ref);
+
+ auto ref2 = store.insert<Interval>({{1}, {2}, {3}, {4}, {5},
+ {6}, {7}, {8}, {9}});
+ ASSERT_TRUE(ref2.valid());
+ store.remove(ref2);
+ store.commit();
+
+ auto ref3 = store.insert<IntervalWithBounds>({{0x0002ffff, 10}});
+ ASSERT_EQUAL(ref.ref(), ref3.ref());
+
+ uint32_t size;
+ IntervalWithBounds single;
+ const IntervalWithBounds *bounds = store.get(ref3, size, &single);
+ EXPECT_EQUAL(1u, size);
+ EXPECT_EQUAL(0x0002ffffu, bounds->interval);
+ EXPECT_EQUAL(10u, bounds->bounds);
+
+ auto ref4 = store.insert<Interval>({{2}, {3}, {4}, {5},
+ {6}, {7}, {8}, {9}, {10}});
+ ASSERT_EQUAL(ref2.ref(), ref4.ref());
+
+ const Interval *intervals = store.get(ref4, size, &single_buf);
+ EXPECT_EQUAL(9u, size);
+ EXPECT_EQUAL(2u, intervals[0].interval);
+ EXPECT_EQUAL(10u, intervals[8].interval);
+}
+*/
+
+TEST("require that single interval entries are optimized") {
+ PredicateIntervalStore store;
+ auto ref = store.insert<Interval>({{0x0001ffff}});
+ ASSERT_TRUE(ref.valid());
+ ASSERT_EQUAL(0x0001ffffu, ref.ref());
+
+ uint32_t size;
+ const Interval *intervals = store.get(ref, size, &single_buf);
+ ASSERT_EQUAL(intervals, &single_buf);
+ EXPECT_EQUAL(0x0001ffffu, single_buf.interval);
+
+ store.remove(ref); // Should do nothing
+}
+
+TEST("require that interval refs are reused for identical data.") {
+ PredicateIntervalStore store;
+ auto ref = store.insert<Interval>({{0x00010001}, {0x0002ffff}});
+ ASSERT_TRUE(ref.valid());
+ ASSERT_EQUAL(0x02000040u, ref.ref());
+
+ auto ref2 = store.insert<Interval>({{0x00010001}, {0x0002ffff}});
+ EXPECT_EQUAL(ref.ref(), ref2.ref());
+
+ uint32_t size;
+ const Interval *intervals = store.get(ref, size, &single_buf);
+ EXPECT_EQUAL(0x00010001u, intervals[0].interval);
+ EXPECT_EQUAL(0x0002ffffu, intervals[1].interval);
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/predicate/predicate_range_term_expander_test.cpp b/searchlib/src/tests/predicate/predicate_range_term_expander_test.cpp
new file mode 100644
index 00000000000..47c29184dcf
--- /dev/null
+++ b/searchlib/src/tests/predicate/predicate_range_term_expander_test.cpp
@@ -0,0 +1,332 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for predicate_range_term_expander.
+
+#include <vespa/log/log.h>
+LOG_SETUP("predicate_range_term_expander_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/predicate/predicate_range_term_expander.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using search::predicate::PredicateRangeTermExpander;
+using std::vector;
+using vespalib::string;
+
+namespace {
+
+struct MyRangeHandler {
+ vector<string> expected_labels;
+ string expected_edge_label;
+ uint64_t expected_edge_value;
+ size_t i;
+ ~MyRangeHandler() {
+ EXPECT_EQUAL(expected_labels.size(), i);
+ }
+ void handleRange(const string &label) {
+ TEST_STATE(("handleRange: " + label).c_str());
+ ASSERT_TRUE(i < expected_labels.size());
+ EXPECT_EQUAL(expected_labels[i++], label);
+ }
+ void handleEdge(const string &label, uint64_t value) {
+ TEST_STATE(("handleEdge: " + label).c_str());
+ EXPECT_EQUAL(expected_edge_label, label);
+ EXPECT_EQUAL(expected_edge_value, value);
+ }
+};
+
+TEST("require that small range is expanded") {
+ PredicateRangeTermExpander expander(10);
+ MyRangeHandler range_handler{{
+ "key=40-49",
+ "key=0-99",
+ "key=0-999",
+ "key=0-9999",
+ "key=0-99999",
+ "key=0-999999",
+ "key=0-9999999",
+ "key=0-99999999",
+ "key=0-999999999",
+ "key=0-9999999999",
+ "key=0-99999999999",
+ "key=0-999999999999",
+ "key=0-9999999999999",
+ "key=0-99999999999999",
+ "key=0-999999999999999",
+ "key=0-9999999999999999",
+ "key=0-99999999999999999",
+ "key=0-999999999999999999"}, "key=40", 2, 0};
+ expander.expand("key", 42, range_handler);
+}
+
+TEST("require that large range is expanded") {
+ PredicateRangeTermExpander expander(10);
+ MyRangeHandler range_handler{{
+ "key=123456789012345670-123456789012345679",
+ "key=123456789012345600-123456789012345699",
+ "key=123456789012345000-123456789012345999",
+ "key=123456789012340000-123456789012349999",
+ "key=123456789012300000-123456789012399999",
+ "key=123456789012000000-123456789012999999",
+ "key=123456789010000000-123456789019999999",
+ "key=123456789000000000-123456789099999999",
+ "key=123456789000000000-123456789999999999",
+ "key=123456780000000000-123456789999999999",
+ "key=123456700000000000-123456799999999999",
+ "key=123456000000000000-123456999999999999",
+ "key=123450000000000000-123459999999999999",
+ "key=123400000000000000-123499999999999999",
+ "key=123000000000000000-123999999999999999",
+ "key=120000000000000000-129999999999999999",
+ "key=100000000000000000-199999999999999999",
+ "key=0-999999999999999999"},
+ "key=123456789012345670", 8, 0};
+ expander.expand("key", 123456789012345678, range_handler);
+}
+
+TEST("require that max range is expanded") {
+ PredicateRangeTermExpander expander(10);
+ MyRangeHandler range_handler{{}, "key=9223372036854775800", 7, 0};
+ expander.expand("key", 9223372036854775807, range_handler);
+}
+
+TEST("require that small negative range is expanded") {
+ PredicateRangeTermExpander expander(10);
+ MyRangeHandler range_handler{{
+ "key=-49-40",
+ "key=-99-0",
+ "key=-999-0",
+ "key=-9999-0",
+ "key=-99999-0",
+ "key=-999999-0",
+ "key=-9999999-0",
+ "key=-99999999-0",
+ "key=-999999999-0",
+ "key=-9999999999-0",
+ "key=-99999999999-0",
+ "key=-999999999999-0",
+ "key=-9999999999999-0",
+ "key=-99999999999999-0",
+ "key=-999999999999999-0",
+ "key=-9999999999999999-0",
+ "key=-99999999999999999-0",
+ "key=-999999999999999999-0"}, "key=-40", 2, 0};
+ expander.expand("key", -42, range_handler);
+}
+
+TEST("require that min range is expanded") {
+ PredicateRangeTermExpander expander(10);
+ MyRangeHandler range_handler{{}, "key=-9223372036854775800", 8, 0};
+ expander.expand("key", -9223372036854775808ull, range_handler);
+}
+TEST("require that min range - 9 is expanded") {
+ PredicateRangeTermExpander expander(10);
+ MyRangeHandler range_handler{{
+ "key=-9223372036854775799-9223372036854775790",
+ "key=-9223372036854775799-9223372036854775700"},
+ "key=-9223372036854775790", 9, 0};
+ expander.expand("key", -9223372036854775799ll, range_handler);
+}
+
+TEST("require that min range is expanded with arity 8") {
+ PredicateRangeTermExpander expander(8);
+ MyRangeHandler range_handler{{}, "key=-9223372036854775808", 0, 0};
+ expander.expand("key", -9223372036854775808ull, range_handler);
+}
+
+TEST("require that small range is expanded in arity 2") {
+ PredicateRangeTermExpander expander(2);
+ MyRangeHandler range_handler{{
+ "key=42-43",
+ "key=40-43",
+ "key=40-47",
+ "key=32-47",
+ "key=32-63",
+ "key=0-63",
+ "key=0-127",
+ "key=0-255",
+ "key=0-511",
+ "key=0-1023",
+ "key=0-2047",
+ "key=0-4095",
+ "key=0-8191",
+ "key=0-16383",
+ "key=0-32767",
+ "key=0-65535",
+ "key=0-131071",
+ "key=0-262143",
+ "key=0-524287",
+ "key=0-1048575",
+ "key=0-2097151",
+ "key=0-4194303",
+ "key=0-8388607",
+ "key=0-16777215",
+ "key=0-33554431",
+ "key=0-67108863",
+ "key=0-134217727",
+ "key=0-268435455",
+ "key=0-536870911",
+ "key=0-1073741823",
+ "key=0-2147483647",
+ "key=0-4294967295",
+ "key=0-8589934591",
+ "key=0-17179869183",
+ "key=0-34359738367",
+ "key=0-68719476735",
+ "key=0-137438953471",
+ "key=0-274877906943",
+ "key=0-549755813887",
+ "key=0-1099511627775",
+ "key=0-2199023255551",
+ "key=0-4398046511103",
+ "key=0-8796093022207",
+ "key=0-17592186044415",
+ "key=0-35184372088831",
+ "key=0-70368744177663",
+ "key=0-140737488355327",
+ "key=0-281474976710655",
+ "key=0-562949953421311",
+ "key=0-1125899906842623",
+ "key=0-2251799813685247",
+ "key=0-4503599627370495",
+ "key=0-9007199254740991",
+ "key=0-18014398509481983",
+ "key=0-36028797018963967",
+ "key=0-72057594037927935",
+ "key=0-144115188075855871",
+ "key=0-288230376151711743",
+ "key=0-576460752303423487",
+ "key=0-1152921504606846975",
+ "key=0-2305843009213693951",
+ "key=0-4611686018427387903",
+ "key=0-9223372036854775807"}, "key=42", 0, 0};
+ expander.expand("key", 42, range_handler);
+}
+
+TEST("require that small negative range is expanded in arity 2") {
+ PredicateRangeTermExpander expander(2);
+ MyRangeHandler range_handler{{
+ "key=-43-42",
+ "key=-43-40",
+ "key=-47-40",
+ "key=-47-32",
+ "key=-63-32",
+ "key=-63-0",
+ "key=-127-0",
+ "key=-255-0",
+ "key=-511-0",
+ "key=-1023-0",
+ "key=-2047-0",
+ "key=-4095-0",
+ "key=-8191-0",
+ "key=-16383-0",
+ "key=-32767-0",
+ "key=-65535-0",
+ "key=-131071-0",
+ "key=-262143-0",
+ "key=-524287-0",
+ "key=-1048575-0",
+ "key=-2097151-0",
+ "key=-4194303-0",
+ "key=-8388607-0",
+ "key=-16777215-0",
+ "key=-33554431-0",
+ "key=-67108863-0",
+ "key=-134217727-0",
+ "key=-268435455-0",
+ "key=-536870911-0",
+ "key=-1073741823-0",
+ "key=-2147483647-0",
+ "key=-4294967295-0",
+ "key=-8589934591-0",
+ "key=-17179869183-0",
+ "key=-34359738367-0",
+ "key=-68719476735-0",
+ "key=-137438953471-0",
+ "key=-274877906943-0",
+ "key=-549755813887-0",
+ "key=-1099511627775-0",
+ "key=-2199023255551-0",
+ "key=-4398046511103-0",
+ "key=-8796093022207-0",
+ "key=-17592186044415-0",
+ "key=-35184372088831-0",
+ "key=-70368744177663-0",
+ "key=-140737488355327-0",
+ "key=-281474976710655-0",
+ "key=-562949953421311-0",
+ "key=-1125899906842623-0",
+ "key=-2251799813685247-0",
+ "key=-4503599627370495-0",
+ "key=-9007199254740991-0",
+ "key=-18014398509481983-0",
+ "key=-36028797018963967-0",
+ "key=-72057594037927935-0",
+ "key=-144115188075855871-0",
+ "key=-288230376151711743-0",
+ "key=-576460752303423487-0",
+ "key=-1152921504606846975-0",
+ "key=-2305843009213693951-0",
+ "key=-4611686018427387903-0",
+ "key=-9223372036854775807-0"}, "key=-42", 0, 0};
+ expander.expand("key", -42, range_handler);
+}
+
+TEST("require that upper bound is used") {
+ PredicateRangeTermExpander expander(10, -99, 9999);
+ MyRangeHandler range_handler{{
+ "key=40-49",
+ "key=0-99",
+ "key=0-999",
+ "key=0-9999"}, "key=40", 2, 0};
+ expander.expand("key", 42, range_handler);
+}
+
+TEST("require that lower bound is used") {
+ PredicateRangeTermExpander expander(10, -9999, 99);
+ MyRangeHandler range_handler{{
+ "key=-49-40",
+ "key=-99-0",
+ "key=-999-0",
+ "key=-9999-0"}, "key=-40", 2, 0};
+ expander.expand("key", -42, range_handler);
+}
+
+TEST("require that value outside bounds is not used") {
+ PredicateRangeTermExpander expander(10, -99, 99);
+ MyRangeHandler range_handler{{}, "handleEdge is never called", 2, 0};
+ expander.expand("key", 100, range_handler);
+}
+
+TEST("require that upper and lower bound > 0 works") {
+ PredicateRangeTermExpander expander(10, 100, 9999);
+ MyRangeHandler range_handler{{
+ "key=140-149",
+ "key=100-199",
+ "key=0-999",
+ "key=0-9999"}, "key=140", 2, 0};
+ expander.expand("key", 142, range_handler);
+}
+
+TEST("require that search close to uneven upper bound is sensible") {
+ PredicateRangeTermExpander expander(10, -99, 1234);
+ MyRangeHandler range_handler{{
+ "key=40-49",
+ "key=0-99",
+ "key=0-999",
+ "key=0-9999"}, "key=40", 2, 0};
+ expander.expand("key", 42, range_handler);
+}
+
+TEST("require that search close to max uneven upper bound is sensible") {
+ PredicateRangeTermExpander expander(10, 0, 9223372036854771234);
+ MyRangeHandler range_handler{{
+ "key=9223372036854770000-9223372036854770009",
+ "key=9223372036854770000-9223372036854770099",
+ "key=9223372036854770000-9223372036854770999"},
+ "key=9223372036854770000", 0, 0};
+ expander.expand("key", 9223372036854770000, range_handler);
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/predicate/predicate_ref_cache_test.cpp b/searchlib/src/tests/predicate/predicate_ref_cache_test.cpp
new file mode 100644
index 00000000000..a51f3d678d5
--- /dev/null
+++ b/searchlib/src/tests/predicate/predicate_ref_cache_test.cpp
@@ -0,0 +1,106 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for predicate_ref_cache.
+
+#include <vespa/log/log.h>
+LOG_SETUP("predicate_ref_cache_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/predicate/predicate_ref_cache.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vector>
+
+using namespace search;
+using namespace search::predicate;
+
+namespace {
+
+struct MyBufferStore {
+ std::vector<uint32_t> store;
+ const uint32_t *getBuffer(uint32_t ref) const {
+ ASSERT_LESS(ref, store.size());
+ return &store[ref];
+ }
+ uint32_t insert(uint32_t value) {
+ size_t size = store.size();
+ store.push_back(value);
+ return size | 0x01000000; // size = 1
+ }
+ uint32_t insert(std::vector<uint32_t> data) {
+ size_t size = store.size();
+ uint8_t data_size = data.size();
+ if (data.size() >= 0xff) {
+ store.push_back(data.size());
+ data_size = 0xff;
+ }
+ store.insert(store.end(), data.begin(), data.end());
+ return size | (data_size << 24);
+ }
+};
+
+TEST("require that single entries are cached") {
+ MyBufferStore store;
+ PredicateRefCache<MyBufferStore> cache(store);
+
+ uint32_t ref = store.insert(42);
+ uint32_t new_ref = cache.insert(ref);
+ EXPECT_EQUAL(ref, new_ref);
+
+ uint32_t ref2 = store.insert(42);
+ new_ref = cache.insert(ref2);
+ EXPECT_EQUAL(ref, new_ref);
+
+ uint32_t ref3 = store.insert(44);
+ new_ref = cache.insert(ref3);
+ EXPECT_EQUAL(ref3, new_ref);
+}
+
+TEST("require that multivalue entries are cached") {
+ MyBufferStore store;
+ PredicateRefCache<MyBufferStore> cache(store);
+
+ std::vector<uint32_t> data1 = {1, 2, 3, 4, 5};
+ std::vector<uint32_t> data2 = {1, 2, 3, 4, 6};
+ uint32_t ref = store.insert(data1);
+ uint32_t new_ref = cache.insert(ref);
+ EXPECT_EQUAL(ref, new_ref);
+
+ uint32_t ref2 = store.insert(data1);
+ new_ref = cache.insert(ref2);
+ EXPECT_EQUAL(ref, new_ref);
+
+ uint32_t ref3 = store.insert(data2);
+ new_ref = cache.insert(ref3);
+ EXPECT_EQUAL(ref3, new_ref);
+}
+
+TEST("require that entries can be looked up") {
+ MyBufferStore store;
+ PredicateRefCache<MyBufferStore> cache(store);
+
+ uint32_t data = 42;
+ EXPECT_EQUAL(0u, cache.find(&data, 1));
+ uint32_t ref = store.insert(42);
+ cache.insert(ref);
+ EXPECT_EQUAL(ref, cache.find(&data, 1));
+}
+
+TEST("require that cache handles large entries") {
+ MyBufferStore store;
+ PredicateRefCache<MyBufferStore> cache(store);
+
+ std::vector<uint32_t> data1(300);
+ std::vector<uint32_t> data2(300);
+ data2.back() = 42;
+ uint32_t ref1 = store.insert(data1);
+ cache.insert(ref1);
+ EXPECT_EQUAL(ref1, cache.find(&data1[0], data1.size()));
+ EXPECT_EQUAL(0u, cache.find(&data2[0], data2.size()));
+ uint32_t ref2 = store.insert(data2);
+ uint32_t ref = cache.insert(ref2);
+ EXPECT_EQUAL(ref, ref2);
+ EXPECT_EQUAL(ref2, cache.find(&data2[0], data2.size()));
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/predicate/predicate_tree_analyzer_test.cpp b/searchlib/src/tests/predicate/predicate_tree_analyzer_test.cpp
new file mode 100644
index 00000000000..f455abced3f
--- /dev/null
+++ b/searchlib/src/tests/predicate/predicate_tree_analyzer_test.cpp
@@ -0,0 +1,157 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for PredicateTreeAnalyzer.
+
+#include <vespa/log/log.h>
+LOG_SETUP("PredicateTreeAnalyzer_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/document/predicate/predicate.h>
+#include <vespa/document/predicate/predicate_slime_builder.h>
+#include <vespa/searchlib/predicate/predicate_tree_analyzer.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using document::PredicateSlimeBuilder;
+using namespace search;
+using namespace search::predicate;
+using document::Predicate;
+using vespalib::Slime;
+using vespalib::slime::Cursor;
+using std::map;
+using std::string;
+
+namespace {
+typedef PredicateSlimeBuilder Builder;
+
+TEST("require that minfeature is 1 for simple term") {
+ auto slime(Builder().feature("foo").value("bar").build());
+ PredicateTreeAnalyzer analyzer(slime->get());
+ EXPECT_EQUAL(1, analyzer.getMinFeature());
+ EXPECT_EQUAL(1, analyzer.getSize());
+ EXPECT_TRUE(analyzer.getSizeMap().empty());
+}
+
+TEST("require that minfeature is 1 for simple negative term") {
+ auto slime(Builder().neg().feature("foo").value("bar").build());
+ PredicateTreeAnalyzer analyzer(slime->get());
+ EXPECT_EQUAL(1, analyzer.getMinFeature());
+ EXPECT_EQUAL(2, analyzer.getSize());
+}
+
+void checkSizeMap(const map<string, int> &map, const string &key, int val) {
+ auto it = map.find(key);
+ ASSERT_TRUE(it != map.end());
+ EXPECT_EQUAL(val, it->second);
+}
+
+TEST("require that minfeature is sum for and") {
+ auto slime(Builder()
+ .and_node({Builder().feature("foo").value("bar"),
+ Builder().feature("baz").value("qux"),
+ Builder().feature("quux").value("corge")}).build());
+ PredicateTreeAnalyzer analyzer(slime->get());
+ EXPECT_EQUAL(3, analyzer.getMinFeature());
+ EXPECT_EQUAL(3, analyzer.getSize());
+ EXPECT_EQUAL(3u, analyzer.getSizeMap().size());
+ TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a0", 1));
+ TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a1", 1));
+ TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a2", 1));
+}
+
+TEST("require that minfeature is min for or") {
+ auto slime(Builder().or_node
+ ({Builder().and_node
+ ({Builder().feature("foo").value("bar"),
+ Builder().feature("baz").value("qux"),
+ Builder().feature("quux").value("corge")}),
+ Builder().and_node
+ ({Builder().feature("grault").value("garply"),
+ Builder().feature("waldo").value("fred")})})
+ .build());
+ PredicateTreeAnalyzer analyzer(slime->get());
+ EXPECT_EQUAL(2, analyzer.getMinFeature());
+ EXPECT_EQUAL(5, analyzer.getSize());
+ EXPECT_EQUAL(5u, analyzer.getSizeMap().size());
+ TEST_DO(checkSizeMap(analyzer.getSizeMap(), "o0a0", 1));
+ TEST_DO(checkSizeMap(analyzer.getSizeMap(), "o0a1", 1));
+ TEST_DO(checkSizeMap(analyzer.getSizeMap(), "o0a2", 1));
+ TEST_DO(checkSizeMap(analyzer.getSizeMap(), "o1a0", 1));
+ TEST_DO(checkSizeMap(analyzer.getSizeMap(), "o1a1", 1));
+}
+
+TEST("require that minfeature rounds up") {
+ auto slime(Builder()
+ .or_node({Builder().feature("foo").value("bar"),
+ Builder().feature("foo").value("bar"),
+ Builder().feature("foo").value("bar")}).build());
+ PredicateTreeAnalyzer analyzer(slime->get());
+ EXPECT_EQUAL(1, analyzer.getMinFeature());
+ EXPECT_EQUAL(3, analyzer.getSize());
+}
+
+TEST("require that multivalue feature set considers all values") {
+ {
+ auto slime(Builder()
+ .and_node({Builder().feature("foo").value("A").value("B"),
+ Builder().feature("foo").value("B")}).build());
+ PredicateTreeAnalyzer analyzer(slime->get());
+ EXPECT_EQUAL(1, analyzer.getMinFeature());
+ EXPECT_EQUAL(2, analyzer.getSize());
+ }
+ {
+ auto slime(Builder()
+ .and_node({Builder().feature("foo").value("A").value("B"),
+ Builder().feature("foo").value("C")}).build());
+ PredicateTreeAnalyzer analyzer(slime->get());
+ EXPECT_EQUAL(2, analyzer.getMinFeature());
+ EXPECT_EQUAL(2, analyzer.getSize());
+ }
+}
+
+TEST("require that not-features don't count towards minfeature calculation") {
+ auto slime(Builder()
+ .and_node({Builder().feature("foo").value("A"),
+ Builder().neg().feature("foo").value("A"),
+ Builder().neg().feature("foo").value("B"),
+ Builder().feature("foo").value("B")}).build());
+ PredicateTreeAnalyzer analyzer(slime->get());
+ EXPECT_EQUAL(3, analyzer.getMinFeature());
+ EXPECT_EQUAL(6, analyzer.getSize());
+}
+
+TEST("require that not-ranges don't count towards minfeature calculation") {
+ auto slime(Builder()
+ .and_node({Builder().feature("foo").range(0, 10),
+ Builder().neg().feature("foo").range(0, 10),
+ Builder().neg().feature("bar").range(0, 10),
+ Builder().feature("bar").range(0, 10)}).build());
+ PredicateTreeAnalyzer analyzer(slime->get());
+ EXPECT_EQUAL(3, analyzer.getMinFeature());
+ EXPECT_EQUAL(6, analyzer.getSize());
+}
+
+TEST("require that multilevel AND stores sizes") {
+ auto slime(Builder().and_node
+ ({Builder().and_node
+ ({Builder().feature("foo").value("bar"),
+ Builder().feature("baz").value("qux"),
+ Builder().feature("quux").value("corge")}),
+ Builder().and_node
+ ({Builder().feature("grault").value("garply"),
+ Builder().feature("waldo").value("fred")})})
+ .build());
+ PredicateTreeAnalyzer analyzer(slime->get());
+ EXPECT_EQUAL(5, analyzer.getMinFeature());
+ EXPECT_EQUAL(5, analyzer.getSize());
+ EXPECT_EQUAL(7u, analyzer.getSizeMap().size());
+ TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a0", 3));
+ TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a1", 2));
+ TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a0a0", 1));
+ TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a0a1", 1));
+ TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a0a2", 1));
+ TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a1a0", 1));
+ TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a1a1", 1));
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/predicate/predicate_tree_annotator_test.cpp b/searchlib/src/tests/predicate/predicate_tree_annotator_test.cpp
new file mode 100644
index 00000000000..92271cd1c20
--- /dev/null
+++ b/searchlib/src/tests/predicate/predicate_tree_annotator_test.cpp
@@ -0,0 +1,381 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for PredicateTreeAnnotator.
+
+#include <vespa/log/log.h>
+LOG_SETUP("PredicateTreeAnnotator_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/document/predicate/predicate.h>
+#include <vespa/document/predicate/predicate_slime_builder.h>
+#include <vespa/searchlib/predicate/predicate_index.h>
+#include <vespa/searchlib/predicate/predicate_tree_annotator.h>
+#include <vespa/searchlib/predicate/predicate_hash.h>
+#include <vespa/vespalib/data/slime/slime.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <sstream>
+#include <vector>
+
+using document::Predicate;
+using std::ostringstream;
+using std::pair;
+using std::string;
+using std::vector;
+using vespalib::Slime;
+using vespalib::slime::Cursor;
+using namespace search;
+using namespace search::predicate;
+using namespace document::predicate_slime_builder;
+
+namespace {
+Cursor &makeAndNode(Cursor &obj) {
+ obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_CONJUNCTION);
+ return obj.setArray(Predicate::CHILDREN);
+}
+
+Cursor &makeOrNode(Cursor &obj) {
+ obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_DISJUNCTION);
+ return obj.setArray(Predicate::CHILDREN);
+}
+
+void makeFeatureSet(Cursor &obj, const string &key, const string &value) {
+ obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_FEATURE_SET);
+ obj.setString(Predicate::KEY, key);
+ Cursor &set = obj.setArray(Predicate::SET);
+ set.addString(value);
+}
+
+void makeHashedFeatureRange(Cursor &obj, const string &key,
+ const vector<string> &partitions,
+ const vector<vector<int64_t> >& edge_partitions) {
+ obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_FEATURE_RANGE);
+ obj.setString(Predicate::KEY, key);
+ Cursor &p = obj.setArray(Predicate::HASHED_PARTITIONS);
+ for (auto partition : partitions) {
+ p.addLong(PredicateHash::hash64(partition));
+ }
+ Cursor &e = obj.setArray(Predicate::HASHED_EDGE_PARTITIONS);
+ for (auto edge_partition : edge_partitions) {
+ ostringstream label;
+ label << key << "=" << edge_partition[0];
+ uint64_t hash = PredicateHash::hash64(label.str());
+ int64_t value = edge_partition[1];
+ int64_t payload = edge_partition[2];
+
+ Cursor &o = e.addObject();
+ o.setLong(Predicate::HASH, hash);
+ o.setLong(Predicate::VALUE, value);
+ o.setLong(Predicate::PAYLOAD, payload);
+ }
+}
+
+void checkInterval(const PredicateTreeAnnotations &result,
+ const string &feature, vector<uint32_t> expected) {
+ TEST_STATE(("Check interval: " + feature).c_str());
+ uint64_t hash = PredicateHash::hash64(feature);
+ auto it = result.interval_map.find(hash);
+ ASSERT_TRUE(it != result.interval_map.end());
+ const auto &intervals = it->second;
+ ASSERT_EQUAL(expected.size(), intervals.size());
+ for (size_t i = 0; i < expected.size(); ++i) {
+ EXPECT_EQUAL(expected[i], intervals[i].interval);
+ }
+}
+
+void checkBounds(const PredicateTreeAnnotations &result,
+ const string &feature,
+ vector<IntervalWithBounds> expected) {
+ TEST_STATE(("Check bounds: " + feature).c_str());
+ uint64_t hash = PredicateHash::hash64(feature);
+ auto it = result.bounds_map.find(hash);
+ ASSERT_TRUE(it != result.bounds_map.end());
+ const auto &intervals = it->second;
+ ASSERT_EQUAL(expected.size(), intervals.size());
+ for (size_t i = 0; i < expected.size(); ++i) {
+ EXPECT_EQUAL(expected[i].interval, intervals[i].interval);
+ EXPECT_EQUAL(expected[i].bounds, intervals[i].bounds);
+ }
+}
+
+TEST("require that OR intervals are the same") {
+ Slime slime;
+ Cursor &children = makeOrNode(slime.setObject());
+ makeFeatureSet(children.addObject(), "key1", "value1");
+ makeFeatureSet(children.addObject(), "key2", "value2");
+
+ PredicateTreeAnnotations result;
+ PredicateTreeAnnotator::annotate(slime.get(), result);
+
+ EXPECT_EQUAL(1u, result.min_feature);
+ EXPECT_EQUAL(2u, result.interval_range);
+ EXPECT_EQUAL(2u, result.interval_map.size());
+ checkInterval(result, "key1=value1", {0x00010002});
+ checkInterval(result, "key2=value2", {0x00010002});
+}
+
+TEST("require that ANDs below ORs get different intervals") {
+ auto slime = orNode({andNode({featureSet("key1", {"value1"}),
+ featureSet("key1", {"value1"}),
+ featureSet("key1", {"value1"})}),
+ andNode({featureSet("key2", {"value2"}),
+ featureSet("key2", {"value2"}),
+ featureSet("key2", {"value2"})})});
+ PredicateTreeAnnotations result;
+ PredicateTreeAnnotator::annotate(slime->get(), result);
+
+ EXPECT_EQUAL(1u, result.min_feature);
+ EXPECT_EQUAL(6u, result.interval_range);
+ EXPECT_EQUAL(2u, result.interval_map.size());
+ checkInterval(result, "key1=value1", {0x00010001, 0x00020002, 0x00030006});
+ checkInterval(result, "key2=value2", {0x00010004, 0x00050005, 0x00060006});
+}
+
+TEST("require that NOTs get correct intervals") {
+ auto slime = andNode({featureSet("key", {"value"}),
+ neg(featureSet("key", {"value"})),
+ featureSet("key", {"value"}),
+ neg(featureSet("key", {"value"}))});
+ PredicateTreeAnnotations result;
+ PredicateTreeAnnotator::annotate(slime->get(), result);
+
+ EXPECT_EQUAL(2u, result.min_feature); // needs key=value and z-star
+ EXPECT_EQUAL(6u, result.interval_range);
+ EXPECT_EQUAL(2u, result.interval_map.size());
+ checkInterval(result, "key=value",
+ {0x00010001, 0x00020002, 0x00040004, 0x00050005});
+ checkInterval(result, PredicateIndex::z_star_compressed_attribute_name,
+ {0x00020001, 0x00050004});
+}
+
+TEST("require that NOT inverts ANDs and ORs") {
+ auto slime = neg(andNode({featureSet("key", {"value"}),
+ neg(featureSet("key", {"value"}))}));
+ PredicateTreeAnnotations result;
+ PredicateTreeAnnotator::annotate(slime->get(), result);
+
+ EXPECT_EQUAL(1u, result.min_feature); // needs key=value or z-star
+ EXPECT_EQUAL(3u, result.interval_range);
+ EXPECT_EQUAL(2u, result.interval_map.size());
+ checkInterval(result, "key=value",
+ {0x00010002, 0x00010003});
+ checkInterval(result, PredicateIndex::z_star_compressed_attribute_name,
+ {0x00020000});
+}
+
+TEST("require that final first NOT-interval is extended") {
+ auto slime = neg(featureSet("key", {"A"}));
+ PredicateTreeAnnotations result;
+ PredicateTreeAnnotator::annotate(slime->get(), result);
+ EXPECT_EQUAL(1u, result.min_feature);
+ EXPECT_EQUAL(2u, result.interval_range);
+ EXPECT_EQUAL(2u, result.interval_map.size());
+ checkInterval(result, "key=A", {0x00010001});
+ checkInterval(result, PredicateIndex::z_star_compressed_attribute_name,
+ {0x00010000});
+}
+
+TEST("show different types of NOT-intervals") {
+ auto slime = andNode({orNode({andNode({featureSet("key", {"A"}),
+ neg(featureSet("key", {"B"}))}),
+ andNode({neg(featureSet("key", {"C"})),
+ featureSet("key", {"D"})})}),
+ featureSet("foo", {"bar"})});
+ PredicateTreeAnnotations result;
+ PredicateTreeAnnotator::annotate(slime->get(), result);
+ EXPECT_EQUAL(3u, result.min_feature);
+ EXPECT_EQUAL(7u, result.interval_range);
+ EXPECT_EQUAL(6u, result.interval_map.size());
+ checkInterval(result, "foo=bar", {0x00070007});
+ checkInterval(result, "key=A", {0x00010001});
+ checkInterval(result, "key=B", {0x00020002});
+ checkInterval(result, "key=C", {0x00010004});
+ checkInterval(result, "key=D", {0x00060006});
+ checkInterval(result, PredicateIndex::z_star_compressed_attribute_name,
+ {0x00020001, 0x00000006, 0x00040000});
+
+ slime = orNode({neg(featureSet("key", {"A"})),
+ neg(featureSet("key", {"B"}))});
+ result = PredicateTreeAnnotations();
+ PredicateTreeAnnotator::annotate(slime->get(), result);
+ EXPECT_EQUAL(1u, result.min_feature);
+ EXPECT_EQUAL(4u, result.interval_range);
+ EXPECT_EQUAL(3u, result.interval_map.size());
+ checkInterval(result, "key=A", {0x00010003});
+ checkInterval(result, "key=B", {0x00010003});
+ checkInterval(result, PredicateIndex::z_star_compressed_attribute_name,
+ {0x00030000, 0x00030000});
+
+ slime = orNode({andNode({neg(featureSet("key", {"A"})),
+ neg(featureSet("key", {"B"}))}),
+ andNode({neg(featureSet("key", {"C"})),
+ neg(featureSet("key", {"D"}))})});
+ result = PredicateTreeAnnotations();
+ PredicateTreeAnnotator::annotate(slime->get(), result);
+ EXPECT_EQUAL(1u, result.min_feature);
+ EXPECT_EQUAL(8u, result.interval_range);
+ EXPECT_EQUAL(5u, result.interval_map.size());
+ checkInterval(result, "key=A", {0x00010001});
+ checkInterval(result, "key=B", {0x00030007});
+ checkInterval(result, "key=C", {0x00010005});
+ checkInterval(result, "key=D", {0x00070007});
+ checkInterval(result, PredicateIndex::z_star_compressed_attribute_name,
+ {0x00010000, 0x00070002, 0x00050000,
+ 0x00070006});
+
+}
+
+TEST("require that hashed ranges get correct intervals") {
+ Slime slime;
+ Cursor &children = makeAndNode(slime.setObject());
+ makeHashedFeatureRange(
+ children.addObject(), "key",
+ {"key=10-19", "key=20-29"}, {{0, 5, -1}, {30, 0, 3}});
+ makeHashedFeatureRange(
+ children.addObject(), "foo",
+ {"foo=10-19", "foo=20-29"}, {{0, 5, -1}, {30, 0, 3}});
+
+ PredicateTreeAnnotations result;
+ PredicateTreeAnnotator::annotate(slime.get(), result);
+
+ EXPECT_EQUAL(2u, result.min_feature);
+ EXPECT_EQUAL(2u, result.interval_range);
+ EXPECT_EQUAL(4u, result.interval_map.size());
+ EXPECT_EQUAL(4u, result.bounds_map.size());
+ checkInterval(result, "key=10-19", {0x00010001});
+ checkInterval(result, "key=20-29", {0x00010001});
+ checkBounds(result, "key=0", {{0x00010001, 0xffffffff}});
+ checkBounds(result, "key=30", {{0x00010001, 3}});
+
+ checkInterval(result, "foo=10-19", {0x00020002});
+ checkInterval(result, "foo=20-29", {0x00020002});
+ checkBounds(result, "foo=0", {{0x00020002, 0xffffffff}});
+ checkBounds(result, "foo=30", {{0x00020002, 3}});
+}
+
+TEST("require that extreme ranges works") {
+ Slime slime;
+ Cursor &children = makeAndNode(slime.setObject());
+ makeHashedFeatureRange(
+ children.addObject(), "max range",
+ {"max range=9223372036854775806-9223372036854775807"}, {});
+ makeHashedFeatureRange(
+ children.addObject(), "max edge",
+ {}, {{9223372036854775807, 0, 0x40000001}});
+ makeHashedFeatureRange(
+ children.addObject(), "min range",
+ {"min range=-9223372036854775807-9223372036854775806"}, {});
+ makeHashedFeatureRange(
+ children.addObject(), "min edge",
+ {}, {{LLONG_MIN, 0, 0x40000001}});
+
+ PredicateTreeAnnotations result;
+ PredicateTreeAnnotator::annotate(slime.get(), result);
+
+ EXPECT_EQUAL(4u, result.min_feature);
+ EXPECT_EQUAL(4u, result.interval_range);
+ EXPECT_EQUAL(2u, result.interval_map.size());
+ EXPECT_EQUAL(2u, result.bounds_map.size());
+ checkInterval(result, "max range=9223372036854775806-9223372036854775807",
+ {0x00010001});
+ checkBounds(result, "max edge=9223372036854775807",
+ {{0x00020002, 0x40000001}});
+ checkInterval(result, "min range=-9223372036854775807-9223372036854775806",
+ {0x00030003});
+ checkBounds(result, "min edge=-9223372036854775808",
+ {{0x00040004, 0x40000001}});
+}
+
+TEST("require that unique features and all ranges are collected") {
+ auto slime = andNode({featureSet("key1", {"value1"}),
+ featureSet("key1", {"value1"}),
+ featureRange("key2", 9, 40),
+ featureRange("key2", 9, 40)});
+ Cursor &c1 = slime->get()[Predicate::CHILDREN][2]
+ .setArray(Predicate::HASHED_PARTITIONS);
+ c1.addLong(PredicateHash::hash64("key2=10-19"));
+ c1.addLong(PredicateHash::hash64("key2=20-29"));
+ c1.addLong(PredicateHash::hash64("key2=30-39"));
+ c1.addLong(PredicateHash::hash64("key2=0"));
+ c1.addLong(PredicateHash::hash64("key2=40"));
+ Cursor &c2 = slime->get()[Predicate::CHILDREN][3]
+ .setArray(Predicate::HASHED_PARTITIONS);
+ c2.addLong(PredicateHash::hash64("key2=10-19"));
+ c2.addLong(PredicateHash::hash64("key2=20-29"));
+ c2.addLong(PredicateHash::hash64("key2=30-39"));
+ c2.addLong(PredicateHash::hash64("key2=0"));
+ c2.addLong(PredicateHash::hash64("key2=40"));
+
+ PredicateTreeAnnotations result;
+ PredicateTreeAnnotator::annotate(slime->get(), result);
+
+ EXPECT_EQUAL(4u, result.interval_range);
+ ASSERT_EQUAL(1u, result.features.size());
+ EXPECT_EQUAL(static_cast<uint64_t>(PredicateHash::hash64("key1=value1")),
+ result.features[0]);
+ ASSERT_EQUAL(2u, result.range_features.size());
+ EXPECT_EQUAL("key2", result.range_features[0].label.make_string());
+ EXPECT_EQUAL(9, result.range_features[0].from);
+ EXPECT_EQUAL(40, result.range_features[0].to);
+ EXPECT_EQUAL("key2", result.range_features[1].label.make_string());
+ EXPECT_EQUAL(9, result.range_features[1].from);
+ EXPECT_EQUAL(40, result.range_features[1].to);
+}
+
+TEST("require that z-star feature is only registered once") {
+ auto slime = andNode({neg(featureSet("key1", {"value1"})),
+ neg(featureRange("key2", 10, 19))});
+ Cursor &c = slime->get()[Predicate::CHILDREN][1][Predicate::CHILDREN][0]
+ .setArray(Predicate::HASHED_PARTITIONS);
+ c.addLong(PredicateHash::hash64("key2=10-19"));
+
+ // simple range will be stored as a feature.
+ PredicateTreeAnnotations result;
+ PredicateTreeAnnotator::annotate(slime->get(), result);
+
+ EXPECT_EQUAL(4u, result.interval_range);
+ ASSERT_EQUAL(3u, result.features.size());
+ EXPECT_EQUAL(PredicateHash::hash64("key1=value1"), result.features[0]);
+ EXPECT_EQUAL(PredicateIndex::z_star_compressed_hash, result.features[1]);
+ EXPECT_EQUAL(PredicateHash::hash64("key2=10-19"), result.features[2]);
+ ASSERT_EQUAL(0u, result.range_features.size());
+}
+
+TEST("require that default open range works") {
+ auto slime = lessEqual("foo", 39);
+ Cursor &c = slime->get().setArray(Predicate::HASHED_PARTITIONS);
+ c.addLong(PredicateHash::hash64("foo=-9223372036854775808"));
+ c.addLong(PredicateHash::hash64("foo=-9223372036854775807-0"));
+ c.addLong(PredicateHash::hash64("foo=0-31"));
+ c.addLong(PredicateHash::hash64("foo=32-39"));
+
+ PredicateTreeAnnotations result;
+ PredicateTreeAnnotator::annotate(slime->get(), result);
+
+ EXPECT_EQUAL(1u, result.interval_range);
+ EXPECT_EQUAL(0u, result.features.size());
+ ASSERT_EQUAL(1u, result.range_features.size());
+ EXPECT_EQUAL("foo", result.range_features[0].label.make_string());
+ EXPECT_EQUAL(LLONG_MIN, result.range_features[0].from);
+ EXPECT_EQUAL(39, result.range_features[0].to);
+}
+
+TEST("require that open range works") {
+ auto slime = lessEqual("foo", 39);
+ Cursor &c = slime->get().setArray(Predicate::HASHED_PARTITIONS);
+ c.addLong(PredicateHash::hash64("foo=8-15"));
+ c.addLong(PredicateHash::hash64("foo=16-31"));
+ c.addLong(PredicateHash::hash64("foo=32-39"));
+
+ PredicateTreeAnnotations result;
+ PredicateTreeAnnotator::annotate(slime->get(), result, 8, 200);
+
+ EXPECT_EQUAL(1u, result.interval_range);
+ EXPECT_EQUAL(0u, result.features.size());
+ ASSERT_EQUAL(1u, result.range_features.size());
+ EXPECT_EQUAL("foo", result.range_features[0].label.make_string());
+ EXPECT_EQUAL(8, result.range_features[0].from);
+ EXPECT_EQUAL(39, result.range_features[0].to);
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/predicate/predicate_zero_constraint_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_zero_constraint_posting_list_test.cpp
new file mode 100644
index 00000000000..1751c725044
--- /dev/null
+++ b/searchlib/src/tests/predicate/predicate_zero_constraint_posting_list_test.cpp
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for predicate_zero_constraint_posting_list.
+
+#include <vespa/log/log.h>
+LOG_SETUP("predicate_zero_constraint_posting_list_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/predicate/predicate_zero_constraint_posting_list.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using namespace search;
+using namespace search::predicate;
+
+namespace {
+
+struct DummyDocIdLimitProvider : public DocIdLimitProvider {
+ virtual uint32_t getDocIdLimit() const { return 10000; }
+ virtual uint32_t getCommittedDocIdLimit() const { return 10000; }
+};
+
+vespalib::GenerationHandler generation_handler;
+vespalib::GenerationHolder generation_holder;
+DummyDocIdLimitProvider limit_provider;
+SimpleIndexConfig config;
+const uint64_t hash = 0x123;
+
+TEST("require that empty posting list starts at 0.") {
+ PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8);
+ PredicateZeroConstraintPostingList posting_list(index.getZeroConstraintDocs().begin());
+ EXPECT_EQUAL(0u, posting_list.getDocId());
+ EXPECT_EQUAL(0x00010001u, posting_list.getInterval());
+ EXPECT_FALSE(posting_list.next(0));
+}
+
+TEST("require that posting list can iterate.") {
+ PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8);
+ for (uint32_t id = 1; id < 100; ++id) {
+ index.indexEmptyDocument(id);
+ }
+ index.commit();
+ ASSERT_EQUAL(99u, index.getZeroConstraintDocs().size());
+
+ PredicateZeroConstraintPostingList posting_list(index.getZeroConstraintDocs().begin());
+ EXPECT_EQUAL(0u, posting_list.getDocId());
+ EXPECT_EQUAL(0x00010001u, posting_list.getInterval());
+
+ for (size_t i = 0; i < 99; ++i) {
+ EXPECT_TRUE(posting_list.next(i));
+ EXPECT_EQUAL(i + 1, posting_list.getDocId());
+ EXPECT_EQUAL(0x00010001u, posting_list.getInterval());
+ EXPECT_FALSE(posting_list.nextInterval());
+ }
+ EXPECT_FALSE(posting_list.next(99));
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/predicate/predicate_zstar_compressed_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_zstar_compressed_posting_list_test.cpp
new file mode 100644
index 00000000000..2dff14b4417
--- /dev/null
+++ b/searchlib/src/tests/predicate/predicate_zstar_compressed_posting_list_test.cpp
@@ -0,0 +1,95 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for predicate_zstar_compressed_posting_list.
+
+#include <vespa/log/log.h>
+LOG_SETUP("predicate_zstar_compressed_posting_list_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/predicate/predicate_tree_annotator.h>
+#include <vespa/searchlib/predicate/predicate_zstar_compressed_posting_list.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using namespace search;
+using namespace search::predicate;
+using std::vector;
+
+namespace {
+
+struct DummyDocIdLimitProvider : public DocIdLimitProvider {
+ virtual uint32_t getDocIdLimit() const { return 10000; }
+ virtual uint32_t getCommittedDocIdLimit() const { return 10000; }
+};
+
+vespalib::GenerationHandler generation_handler;
+vespalib::GenerationHolder generation_holder;
+DummyDocIdLimitProvider limit_provider;
+SimpleIndexConfig config;
+const uint64_t hash = 0x123;
+
+TEST("require that empty posting list starts at 0.") {
+ PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8);
+ btree::EntryRef ref;
+ PredicateZstarCompressedPostingList<PredicateIndex::BTreeIterator>
+ posting_list(index.getIntervalStore(), index.getIntervalIndex().getBTreePostingList(ref));
+ EXPECT_EQUAL(0u, posting_list.getDocId());
+ EXPECT_EQUAL(0u, posting_list.getInterval());
+ EXPECT_FALSE(posting_list.next(0));
+}
+
+TEST("require that posting list can iterate.") {
+ PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8);
+ const auto &interval_index = index.getIntervalIndex();
+ vector<vector<Interval>> intervals =
+ {{{0x00010000}},
+ {{0x00010000}, {0x0000ffff}},
+ {{0x00010000}, {0x00000003}, {0x00040003}, {0x00060005}}};
+ for (size_t i = 0; i < intervals.size(); ++i) {
+ PredicateTreeAnnotations annotations(1);
+ annotations.interval_map[hash] = intervals[i];
+ index.indexDocument(i + 1, annotations);
+ }
+ index.commit();
+ auto it = interval_index.lookup(hash);
+ ASSERT_TRUE(it.valid());
+ auto ref = it.getData();
+
+ PredicateZstarCompressedPostingList<PredicateIndex::BTreeIterator>
+ posting_list(index.getIntervalStore(), interval_index.getBTreePostingList(ref));
+ EXPECT_EQUAL(0u, posting_list.getDocId());
+ EXPECT_EQUAL(0u, posting_list.getInterval());
+
+ EXPECT_TRUE(posting_list.next(0));
+ EXPECT_EQUAL(1u, posting_list.getDocId());
+ EXPECT_EQUAL(0x00010000u, posting_list.getInterval());
+ ASSERT_TRUE(posting_list.nextInterval());
+ EXPECT_EQUAL(0x00020001u, posting_list.getInterval());
+ ASSERT_FALSE(posting_list.nextInterval());
+
+ EXPECT_TRUE(posting_list.next(1));
+ EXPECT_EQUAL(2u, posting_list.getDocId());
+ EXPECT_EQUAL(0x00010000u, posting_list.getInterval());
+ ASSERT_TRUE(posting_list.nextInterval());
+ EXPECT_EQUAL(0xffff0001u, posting_list.getInterval());
+ ASSERT_FALSE(posting_list.nextInterval());
+
+ ASSERT_TRUE(posting_list.next(2));
+ EXPECT_EQUAL(3u, posting_list.getDocId());
+ EXPECT_EQUAL(0x00010000u, posting_list.getInterval());
+ ASSERT_TRUE(posting_list.nextInterval());
+ EXPECT_EQUAL(0x00030001u, posting_list.getInterval());
+ ASSERT_TRUE(posting_list.nextInterval());
+ EXPECT_EQUAL(0x00040003u, posting_list.getInterval());
+ ASSERT_TRUE(posting_list.nextInterval());
+ EXPECT_EQUAL(0x00050004u, posting_list.getInterval());
+ ASSERT_TRUE(posting_list.nextInterval());
+ EXPECT_EQUAL(0x00060005u, posting_list.getInterval());
+ ASSERT_TRUE(posting_list.nextInterval());
+ EXPECT_EQUAL(0x00070006u, posting_list.getInterval());
+ ASSERT_FALSE(posting_list.nextInterval());
+
+ ASSERT_FALSE(posting_list.next(4));
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/predicate/simple_index_test.cpp b/searchlib/src/tests/predicate/simple_index_test.cpp
new file mode 100644
index 00000000000..8ba9e6182fb
--- /dev/null
+++ b/searchlib/src/tests/predicate/simple_index_test.cpp
@@ -0,0 +1,333 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for simple_index.
+
+#include <vespa/log/log.h>
+LOG_SETUP("simple_index_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/predicate/simple_index.hpp>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/attribute/predicate_attribute.h>
+
+using namespace search;
+using namespace search::predicate;
+using vespalib::GenerationHolder;
+
+namespace {
+
+struct MyData {
+ uint32_t data;
+ MyData() : data(0) {}
+ MyData(uint32_t d) : data(d) {}
+ bool valid() const {
+ return data != 0;
+ }
+};
+
+struct MyDataSerializer : PostingSerializer<MyData> {
+ void serialize(const MyData &data,
+ vespalib::MMapDataBuffer& buffer) const {
+ buffer.writeInt32(data.data);
+ }
+};
+
+struct MyDataDeserializer : PostingDeserializer<MyData> {
+ MyData deserialize(vespalib::MMapDataBuffer& buffer) {
+ return {buffer.readInt32()};
+ }
+};
+
+struct SimpleDocIdLimitProvider : public DocIdLimitProvider {
+ uint32_t _doc_id_limit = 1;
+ uint32_t _committed_doc_id_limit = 1;
+ virtual uint32_t getDocIdLimit() const { return _doc_id_limit; }
+ virtual uint32_t getCommittedDocIdLimit() const { return _committed_doc_id_limit; }
+};
+
+constexpr uint64_t key = 0x123456;
+constexpr uint32_t doc_id = 42;
+const MyData data{100};
+
+constexpr double UPPER_DOCID_FREQ_THRESHOLD = 0.5;
+constexpr double LOWER_DOCID_FREQ_THRESHOLD = 0.25;
+constexpr size_t UPPER_VECTOR_SIZE_THRESHOLD = 10;
+constexpr size_t LOWER_VECTOR_SIZE_THRESHOLD = 8;
+constexpr size_t VECTOR_PRUNE_FREQUENCY = 1;
+constexpr double FOREACH_VECTOR_THRESHOLD = 0.0;
+const auto config = SimpleIndexConfig(UPPER_DOCID_FREQ_THRESHOLD,
+ LOWER_DOCID_FREQ_THRESHOLD,
+ UPPER_VECTOR_SIZE_THRESHOLD,
+ LOWER_VECTOR_SIZE_THRESHOLD,
+ VECTOR_PRUNE_FREQUENCY,
+ FOREACH_VECTOR_THRESHOLD,
+ GrowStrategy());
+struct Fixture {
+ GenerationHolder _generation_holder;
+ SimpleDocIdLimitProvider _limit_provider;
+ SimpleIndex<MyData> _index;
+ Fixture() : _generation_holder(), _limit_provider(),
+ _index(_generation_holder, _limit_provider, config) {}
+ ~Fixture() {
+ _generation_holder.clearHoldLists();
+ }
+ SimpleIndex<MyData> &index() {
+ return _index;
+ }
+ void addPosting(uint64_t k, uint32_t id, const MyData &d) {
+ if (id >= _limit_provider._doc_id_limit) {
+ _limit_provider._doc_id_limit = id + 1;
+ }
+ _index.addPosting(k, id, d);
+ }
+ SimpleIndex<MyData>::DictionaryIterator lookup(uint64_t k) {
+ return _index.lookup(k);
+ }
+ bool hasKey(uint64_t k) {
+ return lookup(k).valid();
+ }
+ std::pair<MyData, bool> removeFromPostingList(uint64_t k, uint32_t id) {
+ return _index.removeFromPostingList(k, id);
+ }
+ bool hasVectorPostingList(uint64_t k) {
+ return _index.getVectorPostingList(k).operator bool();
+ }
+ SimpleIndex<MyData>::VectorIterator getVectorPostingList(uint64_t k) {
+ return *_index.getVectorPostingList(k);
+ }
+ SimpleIndex<MyData>::BTreeIterator getBTreePostingList(btree::EntryRef ref) {
+ return _index.getBTreePostingList(ref);
+ }
+ void commit() {
+ _index.commit();
+ _limit_provider._committed_doc_id_limit = _limit_provider._doc_id_limit;
+ }
+};
+
+TEST_F("require that SimpleIndex can insert and remove a value.", Fixture) {
+ f.addPosting(key, doc_id, data);
+ f.commit();
+ auto it = f.lookup(key);
+ ASSERT_TRUE(it.valid());
+ btree::EntryRef ref = it.getData();
+ auto posting_it = f.getBTreePostingList(ref);
+ ASSERT_TRUE(posting_it.valid());
+ EXPECT_EQUAL(doc_id, posting_it.getKey());
+ EXPECT_EQUAL(data.data, posting_it.getData().data);
+
+ auto result = f.removeFromPostingList(key, doc_id);
+ EXPECT_TRUE(result.second);
+ EXPECT_EQUAL(data.data, result.first.data);
+ f.commit();
+
+ result = f.removeFromPostingList(key, doc_id);
+ EXPECT_FALSE(result.second);
+ EXPECT_FALSE(result.first.valid());
+
+ ASSERT_FALSE(f.hasKey(key));
+}
+
+TEST_F("require that SimpleIndex can insert and remove many values.", Fixture) {
+ for (uint32_t id = 1; id < 100; ++id) {
+ f.addPosting(key, id, {id});
+ }
+ f.commit();
+ auto it = f.lookup(key);
+ ASSERT_TRUE(it.valid());
+ btree::EntryRef ref = it.getData();
+ auto posting_it = f.getBTreePostingList(ref);
+ for (size_t id = 1; id < 100; ++id) {
+ ASSERT_TRUE(posting_it.valid());
+ EXPECT_EQUAL(id, posting_it.getKey());
+ EXPECT_EQUAL(id, posting_it.getData().data);
+ ++posting_it;
+ }
+ ASSERT_FALSE(posting_it.valid());
+ for (uint32_t id = 1; id < 100; ++id) {
+ it = f.lookup(key);
+ ASSERT_TRUE(it.valid());
+ ref = it.getData();
+ auto result = f.removeFromPostingList(key, id);
+ EXPECT_TRUE(result.second);
+ EXPECT_EQUAL(id, result.first.data);
+ }
+ f.commit();
+ ASSERT_FALSE(f.hasKey(key));
+}
+
+struct MyObserver : SimpleIndexDeserializeObserver<> {
+ std::map<uint32_t, uint64_t> features;
+ void notifyInsert(uint64_t my_key, uint32_t my_doc_id, uint32_t) {
+ features[my_doc_id] = my_key;
+ }
+ bool hasSeenDoc(uint32_t doc) {
+ return features.find(doc) != features.end();
+ }
+};
+
+TEST_FF("require that SimpleIndex can be serialized and deserialized.", Fixture, Fixture) {
+ for (uint32_t id = 1; id < 100; ++id) {
+ f1.addPosting(key, id, {id});
+ }
+ f1.commit();
+ vespalib::MMapDataBuffer buffer;
+ f1.index().serialize(buffer, MyDataSerializer());
+ MyObserver observer;
+ MyDataDeserializer deserializer;
+ f2.index().deserialize(buffer, deserializer, observer, PredicateAttribute::PREDICATE_ATTRIBUTE_VERSION);
+
+ auto it = f2.lookup(key);
+ ASSERT_TRUE(it.valid());
+ btree::EntryRef ref = it.getData();
+ auto posting_it = f1.getBTreePostingList(ref);
+ for (uint32_t id = 1; id < 100; ++id) {
+ ASSERT_TRUE(posting_it.valid());
+ EXPECT_EQUAL(id, posting_it.getKey());
+ EXPECT_EQUAL(id, posting_it.getData().data);
+ EXPECT_TRUE(observer.hasSeenDoc(id));
+ ++posting_it;
+ }
+ EXPECT_FALSE(posting_it.valid());
+}
+
+TEST_F("require that SimpleIndex can update by inserting the same key twice.", Fixture) {
+ f.addPosting(key, doc_id, data);
+
+ MyData new_data{42};
+ f.addPosting(key, doc_id, new_data);
+ f.commit();
+
+ auto it = f.lookup(key);
+ ASSERT_TRUE(it.valid());
+ btree::EntryRef ref = it.getData();
+ auto posting_it = f.getBTreePostingList(ref);
+ ASSERT_TRUE(posting_it.valid());
+ EXPECT_EQUAL(doc_id, posting_it.getKey());
+ EXPECT_EQUAL(new_data.data, posting_it.getData().data);
+}
+
+TEST_F("require that only that btrees exceeding size threshold is promoted to vector", Fixture) {
+ for (uint32_t i = 1; i < 10; ++i) {
+ f.addPosting(key, i, {i});
+ }
+ f.commit();
+ ASSERT_TRUE(f.hasKey(key));
+ EXPECT_FALSE(f.hasVectorPostingList(key));
+ f.addPosting(key, 10, {10});
+ f.commit();
+ ASSERT_TRUE(f.hasVectorPostingList(key));
+}
+
+TEST_F("require that vectors below size threshold is pruned", Fixture) {
+ for (uint32_t i = 1; i <= 10; ++i) {
+ f.addPosting(key, i, {i});
+ }
+ f.commit();
+ auto it = f.lookup(key);
+ ASSERT_TRUE(it.valid());
+ for (uint32_t i = 10; i > 8; --i) {
+ f.removeFromPostingList(key, i);
+ }
+ f.commit();
+ EXPECT_TRUE(f.hasVectorPostingList(key));
+ f.removeFromPostingList(key, 8);
+ f.commit();
+ EXPECT_FALSE(f.hasVectorPostingList(key));
+}
+
+TEST_F("require that only btrees with high enough doc frequency is promoted to vector", Fixture) {
+ for (uint32_t i = 100; i > 51; --i) {
+ f.addPosting(key, i, {i});
+ }
+ f.commit();
+ auto it = f.lookup(key);
+ ASSERT_TRUE(it.valid());
+ EXPECT_FALSE(f.hasVectorPostingList(key));
+ f.addPosting(key, 51, {51});
+ f.commit();
+ ASSERT_TRUE(f.hasVectorPostingList(key));
+}
+
+TEST_F("require that vectors below doc frequency is pruned by removeFromPostingList", Fixture) {
+ for (uint32_t i = 1; i <= 100; ++i) {
+ f.addPosting(key, i, {i});
+ }
+ f.commit();
+ ASSERT_TRUE(f.hasKey(key));
+ EXPECT_TRUE(f.hasVectorPostingList(key));
+ for (uint32_t i = 100; i > 25; --i) {
+ f.removeFromPostingList(key, i);
+ }
+ f.commit();
+ EXPECT_TRUE(f.hasVectorPostingList(key));
+ f.removeFromPostingList(key, 25);
+ f.commit();
+ EXPECT_FALSE(f.hasVectorPostingList(key));
+}
+
+TEST_F("require that vectors below doc frequency is pruned by addPosting", Fixture) {
+ for (uint32_t i = 1; i <= 10; ++i) {
+ f.addPosting(key, i, {i});
+ }
+ f.commit();
+ ASSERT_TRUE(f.hasKey(key));
+ EXPECT_TRUE(f.hasVectorPostingList(key));
+ for (uint32_t i = 1; i <= 100; ++i) {
+ f.addPosting(key + 1, i, {i});
+ }
+ f.commit();
+ EXPECT_FALSE(f.hasVectorPostingList(key));
+}
+
+TEST_F("require that promoteOverThresholdVectors promotes posting lists over threshold to vectors", Fixture) {
+ f._limit_provider._doc_id_limit = 100;
+ for (uint32_t i = 1; i <= 20; ++i) {
+ f.addPosting(key + 0, i, {i});
+ f.addPosting(key + 1, i, {i});
+ f.addPosting(key + 2, i, {i});
+ }
+ for (uint32_t i = 21; i <= 40; ++i) {
+ f.addPosting(key + 0, i, {i});
+ f.addPosting(key + 2, i, {i});
+ }
+ f.commit();
+ EXPECT_FALSE(f.hasVectorPostingList(key + 0));
+ EXPECT_FALSE(f.hasVectorPostingList(key + 1));
+ EXPECT_FALSE(f.hasVectorPostingList(key + 2));
+ f._limit_provider._doc_id_limit = 50;
+ f.index().promoteOverThresholdVectors();
+ f.commit();
+ EXPECT_TRUE(f.hasVectorPostingList(key + 0));
+ EXPECT_FALSE(f.hasVectorPostingList(key + 1));
+ EXPECT_TRUE(f.hasVectorPostingList(key + 2));
+}
+
+TEST_F("require that vector contains correct postings", Fixture) {
+ for (uint32_t i = 1; i <= 100; ++i) {
+ f.addPosting(key, i, i % 5 > 0 ? MyData{i * 2} : MyData{0});
+ }
+ f.commit();
+ ASSERT_TRUE(f.hasKey(key));
+ ASSERT_TRUE(f.hasVectorPostingList(key));
+ auto v = f.getVectorPostingList(key);
+
+ EXPECT_EQUAL(1u, v.getKey());
+ EXPECT_EQUAL(2u, v.getData().data);
+
+ for (uint32_t i = 1; i < 100; ++i) {
+ v.linearSeek(i);
+ ASSERT_TRUE(v.valid());
+ if (i % 5 == 0) {
+ EXPECT_EQUAL(i + 1, v.getKey());
+ EXPECT_EQUAL((i + 1) * 2, v.getData().data);
+ } else {
+ EXPECT_EQUAL(i, v.getKey());
+ EXPECT_EQUAL(i * 2, v.getData().data);
+ }
+ }
+ v.linearSeek(100);
+ EXPECT_FALSE(v.valid());
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/predicate/tree_crumbs_test.cpp b/searchlib/src/tests/predicate/tree_crumbs_test.cpp
new file mode 100644
index 00000000000..2f38bb74507
--- /dev/null
+++ b/searchlib/src/tests/predicate/tree_crumbs_test.cpp
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for TreeCrumbs.
+
+#include <vespa/log/log.h>
+LOG_SETUP("TreeCrumbs_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/predicate/tree_crumbs.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using namespace search::predicate;
+
+namespace {
+
+TEST("require that crumbs can set child and resize") {
+ TreeCrumbs crumbs;
+ EXPECT_EQUAL(0u, crumbs.size());
+ EXPECT_EQUAL("", crumbs.getCrumb());
+ crumbs.setChild(2);
+ EXPECT_EQUAL(2u, crumbs.size());
+ EXPECT_EQUAL(":2", crumbs.getCrumb());
+ crumbs.setChild(12345);
+ EXPECT_EQUAL(8u, crumbs.size());
+ EXPECT_EQUAL(":2:12345", crumbs.getCrumb());
+ crumbs.resize(2);
+ EXPECT_EQUAL(2u, crumbs.size());
+ EXPECT_EQUAL(":2", crumbs.getCrumb());
+ crumbs.setChild(42);
+ EXPECT_EQUAL(5u, crumbs.size());
+ EXPECT_EQUAL(":2:42", crumbs.getCrumb());
+ crumbs.resize(2);
+ EXPECT_EQUAL(2u, crumbs.size());
+ EXPECT_EQUAL(":2", crumbs.getCrumb());
+ crumbs.resize(0);
+ EXPECT_EQUAL(0u, crumbs.size());
+ EXPECT_EQUAL("", crumbs.getCrumb());
+}
+
+TEST("require that child counts of 2^31 - 1 is ok") {
+ TreeCrumbs crumbs;
+ EXPECT_EQUAL(0u, crumbs.size());
+ EXPECT_EQUAL("", crumbs.getCrumb());
+ crumbs.setChild(0xffffffff);
+ EXPECT_EQUAL(11u, crumbs.size());
+ EXPECT_EQUAL(":4294967295", crumbs.getCrumb());
+}
+
+TEST("require that child 0 gets number") {
+ TreeCrumbs crumbs;
+ crumbs.setChild(0);
+ EXPECT_EQUAL(2u, crumbs.size());
+ EXPECT_EQUAL(":0", crumbs.getCrumb());
+}
+
+TEST("require that crumbs can set custom initial char") {
+ TreeCrumbs crumbs;
+ crumbs.setChild(0, 'a');
+ crumbs.setChild(1, 'b');
+ crumbs.setChild(2, 'c');
+ EXPECT_EQUAL("a0b1c2", crumbs.getCrumb());
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/prettyfloat/.gitignore b/searchlib/src/tests/prettyfloat/.gitignore
new file mode 100644
index 00000000000..bf0327f3372
--- /dev/null
+++ b/searchlib/src/tests/prettyfloat/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+prettyfloat_test
+searchlib_prettyfloat_test_app
diff --git a/searchlib/src/tests/prettyfloat/CMakeLists.txt b/searchlib/src/tests/prettyfloat/CMakeLists.txt
new file mode 100644
index 00000000000..74e91518030
--- /dev/null
+++ b/searchlib/src/tests/prettyfloat/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_prettyfloat_test_app
+ SOURCES
+ prettyfloat.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_prettyfloat_test_app COMMAND searchlib_prettyfloat_test_app)
diff --git a/searchlib/src/tests/prettyfloat/DESC b/searchlib/src/tests/prettyfloat/DESC
new file mode 100644
index 00000000000..fc4e85bcc09
--- /dev/null
+++ b/searchlib/src/tests/prettyfloat/DESC
@@ -0,0 +1 @@
+prettyfloat test. Take a look at prettyfloat.cpp for details.
diff --git a/searchlib/src/tests/prettyfloat/FILES b/searchlib/src/tests/prettyfloat/FILES
new file mode 100644
index 00000000000..fe3e151cf90
--- /dev/null
+++ b/searchlib/src/tests/prettyfloat/FILES
@@ -0,0 +1 @@
+prettyfloat.cpp
diff --git a/searchlib/src/tests/prettyfloat/prettyfloat.cpp b/searchlib/src/tests/prettyfloat/prettyfloat.cpp
new file mode 100644
index 00000000000..1ed9b7e1767
--- /dev/null
+++ b/searchlib/src/tests/prettyfloat/prettyfloat.cpp
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("prettyfloat_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/searchlib/common/hitrank.h>
+
+using namespace search;
+
+TEST_SETUP(Test);
+
+int
+Test::Main()
+{
+ TEST_INIT("prettyfloat_test");
+ {
+ RawBuf buf(5000);
+ SignedHitRank rank = 10;
+ buf.addSignedHitRank(rank);
+ *buf.GetWritableFillPos() = '\0';
+ EXPECT_EQUAL(std::string("10"), buf.GetDrainPos());
+ }
+ {
+ RawBuf buf(5000);
+ HitRank rank = 10;
+ buf.addHitRank(rank);
+ *buf.GetWritableFillPos() = '\0';
+ EXPECT_EQUAL(std::string("10"), buf.GetDrainPos());
+ }
+ TEST_DONE();
+}
diff --git a/searchlib/src/tests/query/.gitignore b/searchlib/src/tests/query/.gitignore
new file mode 100644
index 00000000000..8b9d7f9993f
--- /dev/null
+++ b/searchlib/src/tests/query/.gitignore
@@ -0,0 +1,10 @@
+*_test
+.depend
+Makefile
+searchlib_customtypevisitor_test_app
+searchlib_query-old-large_test_app
+searchlib_query-old_test_app
+searchlib_query_visitor_test_app
+searchlib_querybuilder_test_app
+searchlib_stackdumpquerycreator_test_app
+searchlib_templatetermvisitor_test_app
diff --git a/searchlib/src/tests/query/CMakeLists.txt b/searchlib/src/tests/query/CMakeLists.txt
new file mode 100644
index 00000000000..16a75b7142a
--- /dev/null
+++ b/searchlib/src/tests/query/CMakeLists.txt
@@ -0,0 +1,50 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_query_visitor_test_app
+ SOURCES
+ query_visitor_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_query_visitor_test_app COMMAND searchlib_query_visitor_test_app)
+vespa_add_executable(searchlib_customtypevisitor_test_app
+ SOURCES
+ customtypevisitor_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_customtypevisitor_test_app COMMAND searchlib_customtypevisitor_test_app)
+vespa_add_executable(searchlib_templatetermvisitor_test_app
+ SOURCES
+ templatetermvisitor_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_templatetermvisitor_test_app COMMAND searchlib_templatetermvisitor_test_app)
+vespa_add_executable(searchlib_querybuilder_test_app
+ SOURCES
+ querybuilder_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_querybuilder_test_app COMMAND searchlib_querybuilder_test_app)
+vespa_add_executable(searchlib_stackdumpquerycreator_test_app
+ SOURCES
+ stackdumpquerycreator_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_stackdumpquerycreator_test_app COMMAND searchlib_stackdumpquerycreator_test_app)
+vespa_add_executable(searchlib_query-old_test_app
+ SOURCES
+ query-old.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_query-old_test_app COMMAND searchlib_query-old_test_app)
+vespa_add_executable(searchlib_query-old-large_test_app
+ SOURCES
+ query-old-large.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_query-old-large_test_app COMMAND searchlib_query-old-large_test_app)
diff --git a/searchlib/src/tests/query/DESC b/searchlib/src/tests/query/DESC
new file mode 100644
index 00000000000..6461797f6bd
--- /dev/null
+++ b/searchlib/src/tests/query/DESC
@@ -0,0 +1 @@
+This is a test of the query interface.
diff --git a/searchlib/src/tests/query/FILES b/searchlib/src/tests/query/FILES
new file mode 100644
index 00000000000..6f11f149162
--- /dev/null
+++ b/searchlib/src/tests/query/FILES
@@ -0,0 +1,2 @@
+query.cpp
+query-old.cpp
diff --git a/searchlib/src/tests/query/customtypevisitor_test.cpp b/searchlib/src/tests/query/customtypevisitor_test.cpp
new file mode 100644
index 00000000000..c9da2757d81
--- /dev/null
+++ b/searchlib/src/tests/query/customtypevisitor_test.cpp
@@ -0,0 +1,157 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for customtypevisitor.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("customtypevisitor_test");
+
+#include <vespa/searchlib/query/tree/customtypevisitor.h>
+#include <vespa/searchlib/query/tree/intermediatenodes.h>
+#include <vespa/searchlib/query/tree/termnodes.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using std::string;
+
+using namespace search::query;
+
+namespace {
+
+class Test : public vespalib::TestApp {
+ const char *current_state;
+ virtual void DumpState(bool) {
+ fprintf(stderr, "%s: ERROR: in %s\n", GetName(), current_state);
+ }
+
+ template <class T> void requireThatNodeIsVisited();
+
+public:
+ int Main();
+};
+
+template <class Base>
+struct InitTerm : Base {
+ InitTerm() : Base(typename Base::Type(), "view", 0, Weight(0)) {}
+};
+
+struct MyAnd : And {};
+struct MyAndNot : AndNot {};
+struct MyEquiv : Equiv {};
+struct MyNear : Near { MyNear() : Near(1) {} };
+struct MyONear : ONear { MyONear() : ONear(1) {} };
+struct MyOr : Or {};
+struct MyPhrase : Phrase { MyPhrase() : Phrase("view", 0, Weight(42)) {} };
+struct MyRank : Rank {};
+struct MyNumberTerm : InitTerm<NumberTerm> {};
+struct MyLocationTerm : InitTerm<LocationTerm> {};
+struct MyPrefixTerm : InitTerm<PrefixTerm> {};
+struct MyRangeTerm : InitTerm<RangeTerm> {};
+struct MyStringTerm : InitTerm<StringTerm> {};
+struct MySubstrTerm : InitTerm<SubstringTerm> {};
+struct MySuffixTerm : InitTerm<SuffixTerm> {};
+struct MyWeakAnd : WeakAnd { MyWeakAnd() : WeakAnd(1234, "view") {} };
+struct MyWeightedSetTerm : WeightedSetTerm { MyWeightedSetTerm() : WeightedSetTerm("view", 0, Weight(42)) {} };
+struct MyDotProduct : DotProduct { MyDotProduct() : DotProduct("view", 0, Weight(42)) {} };
+struct MyWandTerm : WandTerm { MyWandTerm() : WandTerm("view", 0, Weight(42), 57, 67, 77.7) {} };
+struct MyPredicateQuery : InitTerm<PredicateQuery> {};
+struct MyRegExpTerm : InitTerm<RegExpTerm> {};
+
+struct MyQueryNodeTypes {
+ typedef MyAnd And;
+ typedef MyAndNot AndNot;
+ typedef MyEquiv Equiv;
+ typedef MyNumberTerm NumberTerm;
+ typedef MyLocationTerm LocationTerm;
+ typedef MyNear Near;
+ typedef MyONear ONear;
+ typedef MyOr Or;
+ typedef MyPhrase Phrase;
+ typedef MyPrefixTerm PrefixTerm;
+ typedef MyRangeTerm RangeTerm;
+ typedef MyRank Rank;
+ typedef MyStringTerm StringTerm;
+ typedef MySubstrTerm SubstringTerm;
+ typedef MySuffixTerm SuffixTerm;
+ typedef MyWeakAnd WeakAnd;
+ typedef MyWeightedSetTerm WeightedSetTerm;
+ typedef MyDotProduct DotProduct;
+ typedef MyWandTerm WandTerm;
+ typedef MyPredicateQuery PredicateQuery;
+ typedef MyRegExpTerm RegExpTerm;
+};
+
+class MyCustomVisitor : public CustomTypeVisitor<MyQueryNodeTypes>
+{
+public:
+ template <typename T>
+ bool &isVisited() {
+ static bool b;
+ return b;
+ }
+
+ template <typename T> void setVisited() { isVisited<T>() = true; }
+
+ virtual void visit(MyAnd &) { setVisited<MyAnd>(); }
+ virtual void visit(MyAndNot &) { setVisited<MyAndNot>(); }
+ virtual void visit(MyEquiv &) { setVisited<MyEquiv>(); }
+ virtual void visit(MyNumberTerm &) { setVisited<MyNumberTerm>(); }
+ virtual void visit(MyLocationTerm &) { setVisited<MyLocationTerm>(); }
+ virtual void visit(MyNear &) { setVisited<MyNear>(); }
+ virtual void visit(MyONear &) { setVisited<MyONear>(); }
+ virtual void visit(MyOr &) { setVisited<MyOr>(); }
+ virtual void visit(MyPhrase &) { setVisited<MyPhrase>(); }
+ virtual void visit(MyPrefixTerm &) { setVisited<MyPrefixTerm>(); }
+ virtual void visit(MyRangeTerm &) { setVisited<MyRangeTerm>(); }
+ virtual void visit(MyRank &) { setVisited<MyRank>(); }
+ virtual void visit(MyStringTerm &) { setVisited<MyStringTerm>(); }
+ virtual void visit(MySubstrTerm &) { setVisited<MySubstrTerm>(); }
+ virtual void visit(MySuffixTerm &) { setVisited<MySuffixTerm>(); }
+ virtual void visit(MyWeakAnd &) { setVisited<MyWeakAnd>(); }
+ virtual void visit(MyWeightedSetTerm &) { setVisited<MyWeightedSetTerm>(); }
+ virtual void visit(MyDotProduct &) { setVisited<MyDotProduct>(); }
+ virtual void visit(MyWandTerm &) { setVisited<MyWandTerm>(); }
+ virtual void visit(MyPredicateQuery &) { setVisited<MyPredicateQuery>(); }
+ virtual void visit(MyRegExpTerm &) { setVisited<MyRegExpTerm>(); }
+};
+
+template <class T>
+void Test::requireThatNodeIsVisited() {
+ MyCustomVisitor visitor;
+ Node::UP query(new T);
+ visitor.isVisited<T>() = false;
+ query->accept(visitor);
+ ASSERT_TRUE(visitor.isVisited<T>());
+}
+
+#define TEST_CALL(func) \
+ current_state = #func; \
+ func();
+
+int
+Test::Main()
+{
+ TEST_INIT("customtypevisitor_test");
+
+ TEST_CALL(requireThatNodeIsVisited<MyAnd>);
+ TEST_CALL(requireThatNodeIsVisited<MyAndNot>);
+ TEST_CALL(requireThatNodeIsVisited<MyNear>);
+ TEST_CALL(requireThatNodeIsVisited<MyONear>);
+ TEST_CALL(requireThatNodeIsVisited<MyOr>);
+ TEST_CALL(requireThatNodeIsVisited<MyPhrase>);
+ TEST_CALL(requireThatNodeIsVisited<MyRangeTerm>);
+ TEST_CALL(requireThatNodeIsVisited<MyRank>);
+ TEST_CALL(requireThatNodeIsVisited<MyNumberTerm>);
+ TEST_CALL(requireThatNodeIsVisited<MyPrefixTerm>);
+ TEST_CALL(requireThatNodeIsVisited<MyStringTerm>);
+ TEST_CALL(requireThatNodeIsVisited<MySubstrTerm>);
+ TEST_CALL(requireThatNodeIsVisited<MySuffixTerm>);
+ TEST_CALL(requireThatNodeIsVisited<MyWeightedSetTerm>);
+ TEST_CALL(requireThatNodeIsVisited<MyDotProduct>);
+ TEST_CALL(requireThatNodeIsVisited<MyWandTerm>);
+ TEST_CALL(requireThatNodeIsVisited<MyPredicateQuery>);
+ TEST_CALL(requireThatNodeIsVisited<MyRegExpTerm>);
+
+ TEST_DONE();
+}
+} // namespace
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/query/query-old-large.cpp b/searchlib/src/tests/query/query-old-large.cpp
new file mode 100644
index 00000000000..4e0d0fb85de
--- /dev/null
+++ b/searchlib/src/tests/query/query-old-large.cpp
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/query/query.h>
+#include <vespa/searchlib/query/tree/querybuilder.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/tree/stackdumpcreator.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <limits>
+
+using namespace search;
+using namespace search::query;
+
+namespace {
+
+void setMaxStackSize(rlim_t maxStackSize)
+{
+ struct rlimit limit;
+ getrlimit(RLIMIT_STACK, &limit);
+ limit.rlim_cur = maxStackSize;
+ setrlimit(RLIMIT_STACK, &limit);
+}
+
+}
+
+
+// NOTE: This test explicitly sets thread stack size and will fail due to
+// a stack overflow if the stack usage increases.
+TEST("testveryLongQueryResultingInBug6850778") {
+ const uint32_t NUMITEMS=20000;
+ setMaxStackSize(4 * 1024 * 1024);
+ QueryBuilder<SimpleQueryNodeTypes> builder;
+ for (uint32_t i=0; i <= NUMITEMS; i++) {
+ builder.addAnd(2);
+ builder.addStringTerm("a", "", 0, Weight(0));
+ if (i < NUMITEMS) {
+ } else {
+ builder.addStringTerm("b", "", 0, Weight(0));
+ }
+ }
+ Node::UP node = builder.build();
+ vespalib::string stackDump = StackDumpCreator::create(*node);
+
+ EmptyQueryNodeResult empty;
+ Query q(empty, stackDump);
+ QueryTermList terms;
+ QueryNodeRefList phrases;
+ q.getLeafs(terms);
+ ASSERT_EQUAL(NUMITEMS + 2, terms.size());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/query/query-old.cpp b/searchlib/src/tests/query/query-old.cpp
new file mode 100644
index 00000000000..94eeacc2b4d
--- /dev/null
+++ b/searchlib/src/tests/query/query-old.cpp
@@ -0,0 +1,650 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/query/query.h>
+#include <vespa/searchlib/query/tree/querybuilder.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/tree/stackdumpcreator.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <limits>
+
+using namespace search;
+using namespace search::query;
+
+void assertHit(const Hit & h, size_t expWordpos, size_t expContext, int32_t weight) {
+ EXPECT_EQUAL(h.wordpos(), expWordpos);
+ EXPECT_EQUAL(h.context(), expContext);
+ EXPECT_EQUAL(h.weight(), weight);
+}
+
+TEST("testQueryLanguage") {
+ EmptyQueryNodeResult eqnr;
+ int64_t ia(0), ib(0);
+ double da(0), db(0);
+
+ QueryTerm q(eqnr, "7", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, 7);
+ EXPECT_EQUAL(ib, 7);
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, 7);
+ EXPECT_EQUAL(db, 7);
+
+ q = QueryTerm(eqnr, "-7", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, -7);
+ EXPECT_EQUAL(ib, -7);
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, -7);
+ EXPECT_EQUAL(db, -7);
+
+ q = QueryTerm(eqnr, "7.5", "index", QueryTerm::WORD);
+ EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib));
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, 7.5);
+ EXPECT_EQUAL(db, 7.5);
+
+ q = QueryTerm(eqnr, "-7.5", "index", QueryTerm::WORD);
+ EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib));
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, -7.5);
+ EXPECT_EQUAL(db, -7.5);
+
+ q = QueryTerm(eqnr, "<7", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min());
+ EXPECT_EQUAL(ib, 6);
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, -std::numeric_limits<double>::max());
+ EXPECT_LESS(db, 7);
+ EXPECT_GREATER(db, 6.99);
+
+ q = QueryTerm(eqnr, "[;7]", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min());
+ EXPECT_EQUAL(ib, 7);
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, -std::numeric_limits<double>::max());
+ EXPECT_EQUAL(db, 7);
+
+ q = QueryTerm(eqnr, ">7", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, 8);
+ EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max());
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_GREATER(da, 7);
+ EXPECT_LESS(da, 7.01);
+ EXPECT_EQUAL(db, std::numeric_limits<double>::max());
+
+ q = QueryTerm(eqnr, "[7;]", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, 7);
+ EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max());
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, 7);
+ EXPECT_EQUAL(db, std::numeric_limits<double>::max());
+
+ q = QueryTerm(eqnr, "[-7;7]", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, -7);
+ EXPECT_EQUAL(ib, 7);
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, -7);
+ EXPECT_EQUAL(db, 7);
+
+ q = QueryTerm(eqnr, "[-7.1;7.1]", "index", QueryTerm::WORD);
+ EXPECT_FALSE(q.getAsIntegerTerm(ia, ib)); // This is dubious and perhaps a regression.
+ EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min());
+ EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max());
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, -7.1);
+ EXPECT_EQUAL(db, 7.1);
+
+ q = QueryTerm(eqnr, "[500.0;1.7976931348623157E308]", "index", QueryTerm::WORD);
+ EXPECT_FALSE(q.getAsIntegerTerm(ia, ib)); // This is dubious and perhaps a regression.
+ EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min());
+ EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max());
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, 500.0);
+ EXPECT_EQUAL(db, std::numeric_limits<double>::max());
+
+ const double minusSeven(-7), seven(7);
+ q = QueryTerm(eqnr, "<-7;7]", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, -6);
+ EXPECT_EQUAL(ib, 7);
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, nextafterf(minusSeven, seven));
+ EXPECT_EQUAL(db, seven);
+
+ q = QueryTerm(eqnr, "<-7;7>", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, -6);
+ EXPECT_EQUAL(ib, 6);
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, nextafterf(minusSeven, seven));
+ EXPECT_EQUAL(db, nextafterf(seven, minusSeven));
+
+ q = QueryTerm(eqnr, "<1;2>", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, 2);
+ EXPECT_EQUAL(ib, 1);
+
+ q = QueryTerm(eqnr, "[-7;7>", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, -7);
+ EXPECT_EQUAL(ib, 6);
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, minusSeven);
+ EXPECT_EQUAL(db, nextafterf(seven, minusSeven));
+
+ q = QueryTerm(eqnr, "<-7", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min());
+ EXPECT_EQUAL(ib, -8);
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, -std::numeric_limits<double>::max());
+ EXPECT_LESS(db, -7);
+ EXPECT_GREATER(db, -7.01);
+
+ q = QueryTerm(eqnr, "[;-7]", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min());
+ EXPECT_EQUAL(ib, -7);
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, -std::numeric_limits<double>::max());
+ EXPECT_EQUAL(db, -7);
+
+ q = QueryTerm(eqnr, "<;-7]", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min());
+ EXPECT_EQUAL(ib, -7);
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, -std::numeric_limits<double>::max());
+ EXPECT_EQUAL(db, -7);
+
+ q = QueryTerm(eqnr, ">-7", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, -6);
+ EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max());
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_GREATER(da, -7);
+ EXPECT_LESS(da, -6.99);
+ EXPECT_EQUAL(db, std::numeric_limits<double>::max());
+
+ q = QueryTerm(eqnr, "[-7;]", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, -7);
+ EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max());
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, -7);
+ EXPECT_EQUAL(db, std::numeric_limits<double>::max());
+
+ q = QueryTerm(eqnr, "[-7;>", "index", QueryTerm::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQUAL(ia, -7);
+ EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max());
+ EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_EQUAL(da, -7);
+ EXPECT_EQUAL(db, std::numeric_limits<double>::max());
+
+ q = QueryTerm(eqnr, "a", "index", QueryTerm::WORD);
+ EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib));
+ EXPECT_TRUE(!q.getAsDoubleTerm(da, db));
+
+ q = QueryTerm(eqnr, "word", "index", QueryTerm::WORD);
+ EXPECT_TRUE(!q.isPrefix());
+ EXPECT_TRUE(!q.isSubstring());
+ EXPECT_TRUE(!q.isSuffix());
+
+ q = QueryTerm(eqnr, "prefix", "index", QueryTerm::PREFIXTERM);
+ EXPECT_TRUE(q.isPrefix());
+ EXPECT_TRUE(!q.isSubstring());
+ EXPECT_TRUE(!q.isSuffix());
+
+ q = QueryTerm(eqnr, "substring", "index", QueryTerm::SUBSTRINGTERM);
+ EXPECT_TRUE(!q.isPrefix());
+ EXPECT_TRUE(q.isSubstring());
+ EXPECT_TRUE(!q.isSuffix());
+
+ q = QueryTerm(eqnr, "suffix", "index", QueryTerm::SUFFIXTERM);
+ EXPECT_TRUE(!q.isPrefix());
+ EXPECT_TRUE(!q.isSubstring());
+ EXPECT_TRUE(q.isSuffix());
+
+ q = QueryTerm(eqnr, "regexp", "index", QueryTerm::REGEXP);
+ EXPECT_TRUE(!q.isPrefix());
+ EXPECT_TRUE(!q.isSubstring());
+ EXPECT_TRUE(!q.isSuffix());
+ EXPECT_TRUE(q.isRegex());
+}
+
+class AllowRewrite : public EmptyQueryNodeResult
+{
+public:
+ DUPLICATE(AllowRewrite); // create duplicate function
+
+ virtual bool getRewriteFloatTerms() const { return true; }
+};
+
+IMPLEMENT_DUPLICATE(AllowRewrite);
+
+const char TERM_UNIQ = ParseItem::ITEM_TERM | ParseItem::IF_UNIQUEID;
+
+TEST("e is not rewritten even if allowed") {
+ const char term[6] = {TERM_UNIQ, 3, 1, 'c', 1, 'e'};
+ vespalib::stringref stackDump(term, sizeof(term));
+ EXPECT_EQUAL(6u, stackDump.size());
+ AllowRewrite empty;
+ Query q(empty, stackDump);
+ EXPECT_TRUE(q.valid());
+ const QueryNode::LP & root = q.getRoot();
+ EXPECT_EQUAL(QueryTerm::classId, root->getClass().id());
+ const QueryTerm & qt = static_cast<const QueryTerm &>(*root);
+ EXPECT_EQUAL("c", qt.index());
+ EXPECT_EQUAL(vespalib::stringref("e"), qt.getTerm());
+ EXPECT_EQUAL(3u, qt.uniqueId());
+}
+
+TEST("1.0e is not rewritten by default") {
+ const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'};
+ vespalib::stringref stackDump(term, sizeof(term));
+ EXPECT_EQUAL(9u, stackDump.size());
+ EmptyQueryNodeResult empty;
+ Query q(empty, stackDump);
+ EXPECT_TRUE(q.valid());
+ const QueryNode::LP & root = q.getRoot();
+ EXPECT_EQUAL(QueryTerm::classId, root->getClass().id());
+ const QueryTerm & qt = static_cast<const QueryTerm &>(*root);
+ EXPECT_EQUAL("c", qt.index());
+ EXPECT_EQUAL(vespalib::stringref("1.0e"), qt.getTerm());
+ EXPECT_EQUAL(3u, qt.uniqueId());
+}
+
+TEST("1.0e is rewritten if allowed too.") {
+ const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'};
+ vespalib::stringref stackDump(term, sizeof(term));
+ EXPECT_EQUAL(9u, stackDump.size());
+ AllowRewrite empty;
+ Query q(empty, stackDump);
+ EXPECT_TRUE(q.valid());
+ const QueryNode::LP & root = q.getRoot();
+ EXPECT_EQUAL(EquivQueryNode::classId, root->getClass().id());
+ const EquivQueryNode & equiv = static_cast<const EquivQueryNode &>(*root);
+ EXPECT_EQUAL(2u, equiv.size());
+ EXPECT_EQUAL(QueryTerm::classId, equiv[0]->getClass().id());
+ {
+ const QueryTerm & qt = static_cast<const QueryTerm &>(*equiv[0]);
+ EXPECT_EQUAL("c", qt.index());
+ EXPECT_EQUAL(vespalib::stringref("1.0e"), qt.getTerm());
+ EXPECT_EQUAL(3u, qt.uniqueId());
+ }
+ EXPECT_EQUAL(PhraseQueryNode::classId, equiv[1]->getClass().id());
+ {
+ const PhraseQueryNode & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]);
+ EXPECT_EQUAL(2u, phrase.size());
+ EXPECT_EQUAL(QueryTerm::classId, phrase[0]->getClass().id());
+ {
+ const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[0]);
+ EXPECT_EQUAL("c", qt.index());
+ EXPECT_EQUAL(vespalib::stringref("1"), qt.getTerm());
+ EXPECT_EQUAL(0u, qt.uniqueId());
+ }
+ EXPECT_EQUAL(QueryTerm::classId, phrase[1]->getClass().id());
+ {
+ const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[1]);
+ EXPECT_EQUAL("c", qt.index());
+ EXPECT_EQUAL(vespalib::stringref("0e"), qt.getTerm());
+ EXPECT_EQUAL(0u, qt.uniqueId());
+ }
+ }
+}
+
+TEST("testGetQueryParts") {
+ QueryBuilder<SimpleQueryNodeTypes> builder;
+ builder.addAnd(4);
+ {
+ builder.addStringTerm("a", "", 0, Weight(0));
+ builder.addPhrase(3, "", 0, Weight(0));
+ {
+ builder.addStringTerm("b", "", 0, Weight(0));
+ builder.addStringTerm("c", "", 0, Weight(0));
+ builder.addStringTerm("d", "", 0, Weight(0));
+ }
+ builder.addStringTerm("e", "", 0, Weight(0));
+ builder.addPhrase(2, "", 0, Weight(0));
+ {
+ builder.addStringTerm("f", "", 0, Weight(0));
+ builder.addStringTerm("g", "", 0, Weight(0));
+ }
+ }
+ Node::UP node = builder.build();
+ vespalib::string stackDump = StackDumpCreator::create(*node);
+
+ EmptyQueryNodeResult empty;
+ Query q(empty, stackDump);
+ QueryTermList terms;
+ QueryNodeRefList phrases;
+ q.getLeafs(terms);
+ q.getPhrases(phrases);
+ ASSERT_TRUE(terms.size() == 7);
+ ASSERT_TRUE(phrases.size() == 2);
+ {
+ QueryTermList pts;
+ phrases[0]->getLeafs(pts);
+ ASSERT_TRUE(pts.size() == 3);
+ for (size_t i = 0; i < 3; ++i) {
+ EXPECT_EQUAL(pts[i], terms[i + 1]);
+ }
+ }
+ {
+ QueryTermList pts;
+ phrases[1]->getLeafs(pts);
+ ASSERT_TRUE(pts.size() == 2);
+ for (size_t i = 0; i < 2; ++i) {
+ EXPECT_EQUAL(pts[i], terms[i + 5]);
+ }
+ }
+}
+
+TEST("testPhraseEvaluate") {
+ QueryBuilder<SimpleQueryNodeTypes> builder;
+ builder.addPhrase(3, "", 0, Weight(0));
+ {
+ builder.addStringTerm("a", "", 0, Weight(0));
+ builder.addStringTerm("b", "", 0, Weight(0));
+ builder.addStringTerm("c", "", 0, Weight(0));
+ }
+ Node::UP node = builder.build();
+ vespalib::string stackDump = StackDumpCreator::create(*node);
+ EmptyQueryNodeResult empty;
+ Query q(empty, stackDump);
+ QueryNodeRefList phrases;
+ q.getPhrases(phrases);
+ QueryTermList terms;
+ q.getLeafs(terms);
+ // field 0
+ terms[0]->add(0, 0, 1);
+ terms[1]->add(1, 0, 1);
+ terms[2]->add(2, 0, 1);
+ terms[0]->add(7, 0, 1);
+ terms[1]->add(8, 0, 1);
+ terms[2]->add(9, 0, 1);
+ // field 1
+ terms[0]->add(4, 1, 1);
+ terms[1]->add(5, 1, 1);
+ terms[2]->add(6, 1, 1);
+ // field 2 (not complete match)
+ terms[0]->add(1, 2, 1);
+ terms[1]->add(2, 2, 1);
+ terms[2]->add(4, 2, 1);
+ // field 3
+ terms[0]->add(0, 3, 1);
+ terms[1]->add(1, 3, 1);
+ terms[2]->add(2, 3, 1);
+ // field 4 (not complete match)
+ terms[0]->add(1, 4, 1);
+ terms[1]->add(2, 4, 1);
+ // field 5 (not complete match)
+ terms[0]->add(2, 5, 1);
+ terms[1]->add(1, 5, 1);
+ terms[2]->add(0, 5, 1);
+ HitList hits;
+ PhraseQueryNode * p = static_cast<PhraseQueryNode *>(phrases[0]);
+ p->evaluateHits(hits);
+ ASSERT_TRUE(hits.size() == 4);
+ EXPECT_EQUAL(hits[0].wordpos(), 2u);
+ EXPECT_EQUAL(hits[0].context(), 0u);
+ EXPECT_EQUAL(hits[1].wordpos(), 9u);
+ EXPECT_EQUAL(hits[1].context(), 0u);
+ EXPECT_EQUAL(hits[2].wordpos(), 6u);
+ EXPECT_EQUAL(hits[2].context(), 1u);
+ EXPECT_EQUAL(hits[3].wordpos(), 2u);
+ EXPECT_EQUAL(hits[3].context(), 3u);
+ ASSERT_TRUE(p->getFieldInfoSize() == 4);
+ EXPECT_EQUAL(p->getFieldInfo(0).getHitOffset(), 0u);
+ EXPECT_EQUAL(p->getFieldInfo(0).getHitCount(), 2u);
+ EXPECT_EQUAL(p->getFieldInfo(1).getHitOffset(), 2u);
+ EXPECT_EQUAL(p->getFieldInfo(1).getHitCount(), 1u);
+ EXPECT_EQUAL(p->getFieldInfo(2).getHitOffset(), 0u); // invalid, but will never be used
+ EXPECT_EQUAL(p->getFieldInfo(2).getHitCount(), 0u);
+ EXPECT_EQUAL(p->getFieldInfo(3).getHitOffset(), 3u);
+ EXPECT_EQUAL(p->getFieldInfo(3).getHitCount(), 1u);
+}
+
+TEST("testHit") {
+ // positions (0 - (2^24-1))
+ assertHit(Hit(0, 0, 0), 0, 0, 0);
+ assertHit(Hit(256, 0, 1), 256, 0, 1);
+ assertHit(Hit(16777215, 0, -1), 16777215, 0, -1);
+ assertHit(Hit(16777216, 0, 1), 0, 1, 1); // overflow
+
+ // contexts (0 - 255)
+ assertHit(Hit(0, 1, 1), 0, 1, 1);
+ assertHit(Hit(0, 255, 1), 0, 255, 1);
+ assertHit(Hit(0, 256, 1), 0, 0, 1); // overflow
+}
+
+void assertInt8Range(const std::string &term, bool expAdjusted, int64_t expLow, int64_t expHigh) {
+ QueryTermSimple q(term, QueryTermSimple::WORD);
+ QueryTermSimple::RangeResult<int8_t> res = q.getRange<int8_t>();
+ EXPECT_EQUAL(true, res.valid);
+ EXPECT_EQUAL(expAdjusted, res.adjusted);
+ EXPECT_EQUAL(expLow, (int64_t)res.low);
+ EXPECT_EQUAL(expHigh, (int64_t)res.high);
+}
+
+void assertInt32Range(const std::string &term, bool expAdjusted, int64_t expLow, int64_t expHigh) {
+ QueryTermSimple q(term, QueryTermSimple::WORD);
+ QueryTermSimple::RangeResult<int32_t> res = q.getRange<int32_t>();
+ EXPECT_EQUAL(true, res.valid);
+ EXPECT_EQUAL(expAdjusted, res.adjusted);
+ EXPECT_EQUAL(expLow, (int64_t)res.low);
+ EXPECT_EQUAL(expHigh, (int64_t)res.high);
+}
+
+void assertInt64Range(const std::string &term, bool expAdjusted, int64_t expLow, int64_t expHigh) {
+ QueryTermSimple q(term, QueryTermSimple::WORD);
+ QueryTermSimple::RangeResult<int64_t> res = q.getRange<int64_t>();
+ EXPECT_EQUAL(true, res.valid);
+ EXPECT_EQUAL(expAdjusted, res.adjusted);
+ EXPECT_EQUAL(expLow, (int64_t)res.low);
+ EXPECT_EQUAL(expHigh, (int64_t)res.high);
+}
+
+TEST("requireThatInt8LimitsAreEnforced") {
+ //std::numeric_limits<int8_t>::min() -> -128
+ //std::numeric_limits<int8_t>::max() -> 127
+
+ assertInt8Range("-129", true, -128, -128);
+ assertInt8Range("-128", false, -128, -128);
+ assertInt8Range("127", false, 127, 127);
+ assertInt8Range("128", true, 127, 127);
+ assertInt8Range("[-129;0]", true, -128, 0);
+ assertInt8Range("[-128;0]", false, -128, 0);
+ assertInt8Range("[0;127]", false, 0, 127);
+ assertInt8Range("[0;128]", true, 0, 127);
+ assertInt8Range("[-130;-129]", true, -128, -128);
+ assertInt8Range("[128;129]", true, 127, 127);
+ assertInt8Range("[-129;128]", true, -128, 127);
+}
+
+TEST("requireThatInt32LimitsAreEnforced") {
+ //std::numeric_limits<int32_t>::min() -> -2147483648
+ //std::numeric_limits<int32_t>::max() -> 2147483647
+
+ int64_t min = std::numeric_limits<int32_t>::min();
+ int64_t max = std::numeric_limits<int32_t>::max();
+
+ assertInt32Range("-2147483649", true, min, min);
+ assertInt32Range("-2147483648", false, min, min);
+ assertInt32Range("2147483647", false, max, max);
+ assertInt32Range("2147483648", true, max, max);
+ assertInt32Range("[-2147483649;0]", true, min, 0);
+ assertInt32Range("[-2147483648;0]", false, min, 0);
+ assertInt32Range("[0;2147483647]", false, 0, max);
+ assertInt32Range("[0;2147483648]", true, 0, max);
+ assertInt32Range("[-2147483650;-2147483649]", true, min, min);
+ assertInt32Range("[2147483648;2147483649]", true, max, max);
+ assertInt32Range("[-2147483649;2147483648]", true, min, max);
+}
+
+TEST("requireThatInt64LimitsAreEnforced") {
+ //std::numeric_limits<int64_t>::min() -> -9223372036854775808
+ //std::numeric_limits<int64_t>::max() -> 9223372036854775807
+
+ int64_t min = std::numeric_limits<int64_t>::min();
+ int64_t max = std::numeric_limits<int64_t>::max();
+
+ assertInt64Range("-9223372036854775809", false, min, min);
+ assertInt64Range("-9223372036854775808", false, min, min);
+ assertInt64Range("9223372036854775807", false, max, max);
+ assertInt64Range("9223372036854775808", false, max, max);
+ assertInt64Range("[-9223372036854775809;0]", false, min, 0);
+ assertInt64Range("[-9223372036854775808;0]", false, min, 0);
+ assertInt64Range("[0;9223372036854775807]", false, 0, max);
+ assertInt64Range("[0;9223372036854775808]", false, 0, max);
+ assertInt64Range("[-9223372036854775810;-9223372036854775809]", false, min, min);
+ assertInt64Range("[9223372036854775808;9223372036854775809]", false, max, max);
+ assertInt64Range("[-9223372036854775809;9223372036854775808]", false, min, max);
+}
+
+TEST("require sensible rounding when using integer attributes.") {
+ assertInt64Range("1.2", false, 1, 1);
+ assertInt64Range("1.51", false, 2, 2);
+ assertInt64Range("2.49", false, 2, 2);
+}
+
+TEST("require that we can take floating point values in range search too.") {
+ assertInt64Range("[1;2]", false, 1, 2);
+ assertInt64Range("[1.1;2.1]", false, 2, 2);
+ assertInt64Range("[1.9;3.9]", false, 2, 3);
+ assertInt64Range("[1.9;3.9]", false, 2, 3);
+ assertInt64Range("[1.0;3.0]", false, 1, 3);
+ assertInt64Range("<1.0;3.0>", false, 2, 2);
+ assertInt64Range("[500.0;1.7976931348623157E308]", false, 500, std::numeric_limits<int64_t>::max());
+ assertInt64Range("[500.0;1.6976931348623157E308]", false, 500, std::numeric_limits<int64_t>::max());
+ assertInt64Range("[-1.7976931348623157E308;500.0]", false, std::numeric_limits<int64_t>::min(), 500);
+ assertInt64Range("[-1.6976931348623157E308;500.0]", false, std::numeric_limits<int64_t>::min(), 500);
+ assertInt64Range("[10;-10]", false, 10, -10);
+ assertInt64Range("[10.0;-10.0]", false, 10, -10);
+ assertInt64Range("[1.6976931348623157E308;-1.6976931348623157E308]", false, std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::min());
+ assertInt64Range("[1.7976931348623157E308;-1.7976931348623157E308]", false, std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::min());
+}
+
+TEST("require that we handle empty range as expected") {
+ assertInt64Range("[1;1]", false, 1, 1);
+ assertInt64Range("<1;1]", false, 2, 1);
+ assertInt64Range("[0;1>", false, 0, 0);
+ assertInt64Range("[1;1>", false, 1, 0);
+ assertInt64Range("<1;1>", false, 2, 0);
+}
+
+TEST("require that ascending range can be specified with limit only") {
+ int64_t low_integer = 0;
+ int64_t high_integer = 0;
+ double low_double = 0.0;
+ double high_double = 0.0;
+
+ EmptyQueryNodeResult eqnr;
+ QueryTerm ascending_query(eqnr, "[;;500]", "index", QueryTerm::WORD);
+
+ EXPECT_TRUE(ascending_query.getAsIntegerTerm(low_integer, high_integer));
+ EXPECT_TRUE(ascending_query.getAsDoubleTerm(low_double, high_double));
+ EXPECT_EQUAL(std::numeric_limits<int64_t>::min(), low_integer);
+ EXPECT_EQUAL(std::numeric_limits<int64_t>::max(), high_integer);
+ EXPECT_EQUAL(-std::numeric_limits<double>::max(), low_double);
+ EXPECT_EQUAL(std::numeric_limits<double>::max(), high_double);
+ EXPECT_EQUAL(500, ascending_query.getRangeLimit());
+}
+
+TEST("require that descending range can be specified with limit only") {
+ int64_t low_integer = 0;
+ int64_t high_integer = 0;
+ double low_double = 0.0;
+ double high_double = 0.0;
+
+ EmptyQueryNodeResult eqnr;
+ QueryTerm descending_query(eqnr, "[;;-500]", "index", QueryTerm::WORD);
+
+ EXPECT_TRUE(descending_query.getAsIntegerTerm(low_integer, high_integer));
+ EXPECT_TRUE(descending_query.getAsDoubleTerm(low_double, high_double));
+ EXPECT_EQUAL(std::numeric_limits<int64_t>::min(), low_integer);
+ EXPECT_EQUAL(std::numeric_limits<int64_t>::max(), high_integer);
+ EXPECT_EQUAL(-std::numeric_limits<double>::max(), low_double);
+ EXPECT_EQUAL(std::numeric_limits<double>::max(), high_double);
+ EXPECT_EQUAL(-500, descending_query.getRangeLimit());
+}
+
+TEST("require that correctly specified diversity can be parsed") {
+ EmptyQueryNodeResult eqnr;
+ QueryTerm descending_query(eqnr, "[;;-500;ab56;78]", "index", QueryTerm::WORD);
+ EXPECT_TRUE(descending_query.isValid());
+ EXPECT_EQUAL(-500, descending_query.getRangeLimit());
+ EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute());
+ EXPECT_EQUAL(78u, descending_query.getMaxPerGroup());
+ EXPECT_EQUAL(std::numeric_limits<uint32_t>::max(), descending_query.getDiversityCutoffGroups());
+ EXPECT_FALSE(descending_query.getDiversityCutoffStrict());
+}
+
+TEST("require that correctly specified diversity with cutoff groups can be parsed") {
+ EmptyQueryNodeResult eqnr;
+ QueryTerm descending_query(eqnr, "[;;-500;ab56;78;93]", "index", QueryTerm::WORD);
+ EXPECT_TRUE(descending_query.isValid());
+ EXPECT_EQUAL(-500, descending_query.getRangeLimit());
+ EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute());
+ EXPECT_EQUAL(78u, descending_query.getMaxPerGroup());
+ EXPECT_EQUAL(93u, descending_query.getDiversityCutoffGroups());
+ EXPECT_FALSE(descending_query.getDiversityCutoffStrict());
+}
+
+TEST("require that correctly specified diversity with cutoff groups can be parsed") {
+ EmptyQueryNodeResult eqnr;
+ QueryTerm descending_query(eqnr, "[;;-500;ab56;78;13]", "index", QueryTerm::WORD);
+ EXPECT_TRUE(descending_query.isValid());
+ EXPECT_EQUAL(-500, descending_query.getRangeLimit());
+ EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute());
+ EXPECT_EQUAL(78u, descending_query.getMaxPerGroup());
+ EXPECT_EQUAL(13u, descending_query.getDiversityCutoffGroups());
+ EXPECT_FALSE(descending_query.getDiversityCutoffStrict());
+}
+
+TEST("require that correctly specified diversity with incorrect cutoff groups can be parsed") {
+ EmptyQueryNodeResult eqnr;
+ QueryTerm descending_query(eqnr, "[;;-500;ab56;78;a13.9]", "index", QueryTerm::WORD);
+ EXPECT_TRUE(descending_query.isValid());
+ EXPECT_EQUAL(-500, descending_query.getRangeLimit());
+ EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute());
+ EXPECT_EQUAL(78u, descending_query.getMaxPerGroup());
+ EXPECT_EQUAL(std::numeric_limits<uint32_t>::max(), descending_query.getDiversityCutoffGroups());
+ EXPECT_FALSE(descending_query.getDiversityCutoffStrict());
+}
+
+TEST("require that correctly specified diversity with cutoff strategy can be parsed") {
+ EmptyQueryNodeResult eqnr;
+ QueryTerm descending_query(eqnr, "[;;-500;ab56;78;93;anything but strict]", "index", QueryTerm::WORD);
+ EXPECT_TRUE(descending_query.isValid());
+ EXPECT_EQUAL(-500, descending_query.getRangeLimit());
+ EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute());
+ EXPECT_EQUAL(78u, descending_query.getMaxPerGroup());
+ EXPECT_EQUAL(93u, descending_query.getDiversityCutoffGroups());
+ EXPECT_FALSE(descending_query.getDiversityCutoffStrict());
+}
+
+TEST("require that correctly specified diversity with strict cutoff strategy can be parsed") {
+ EmptyQueryNodeResult eqnr;
+ QueryTerm descending_query(eqnr, "[;;-500;ab56;78;93;strict]", "index", QueryTerm::WORD);
+ EXPECT_TRUE(descending_query.isValid());
+ EXPECT_EQUAL(-500, descending_query.getRangeLimit());
+ EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute());
+ EXPECT_EQUAL(78u, descending_query.getMaxPerGroup());
+ EXPECT_EQUAL(93u, descending_query.getDiversityCutoffGroups());
+ EXPECT_TRUE(descending_query.getDiversityCutoffStrict());
+}
+
+TEST("require that incorrectly specified diversity can be parsed") {
+ EmptyQueryNodeResult eqnr;
+ QueryTerm descending_query(eqnr, "[;;-500;ab56]", "index", QueryTerm::WORD);
+ EXPECT_FALSE(descending_query.isValid());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/query/query_visitor_test.cpp b/searchlib/src/tests/query/query_visitor_test.cpp
new file mode 100644
index 00000000000..b98e14604e3
--- /dev/null
+++ b/searchlib/src/tests/query/query_visitor_test.cpp
@@ -0,0 +1,114 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for query_visitor.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("query_visitor_test");
+
+#include <vespa/searchlib/query/tree/intermediatenodes.h>
+#include <vespa/searchlib/query/tree/point.h>
+#include <vespa/searchlib/query/tree/queryvisitor.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/tree/termnodes.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using namespace search::query;
+
+namespace {
+
+class Test : public vespalib::TestApp {
+ void requireThatAllNodesCanBeVisited();
+
+ template <class T> void checkVisit(T *node);
+
+public:
+ int Main();
+};
+
+int
+Test::Main()
+{
+ TEST_INIT("query_visitor_test");
+
+ TEST_DO(requireThatAllNodesCanBeVisited());
+
+ TEST_DONE();
+}
+
+class MyVisitor : public QueryVisitor
+{
+public:
+ template <typename T>
+ bool &isVisited() {
+ static bool b;
+ return b;
+ }
+
+ virtual void visit(And &) { isVisited<And>() = true; }
+ virtual void visit(AndNot &) { isVisited<AndNot>() = true; }
+ virtual void visit(Equiv &) { isVisited<Equiv>() = true; }
+ virtual void visit(NumberTerm &) { isVisited<NumberTerm>() = true; }
+ virtual void visit(LocationTerm &) { isVisited<LocationTerm>() = true; }
+ virtual void visit(Near &) { isVisited<Near>() = true; }
+ virtual void visit(ONear &) { isVisited<ONear>() = true; }
+ virtual void visit(Or &) { isVisited<Or>() = true; }
+ virtual void visit(Phrase &) { isVisited<Phrase>() = true; }
+ virtual void visit(PrefixTerm &) { isVisited<PrefixTerm>() = true; }
+ virtual void visit(RangeTerm &) { isVisited<RangeTerm>() = true; }
+ virtual void visit(Rank &) { isVisited<Rank>() = true; }
+ virtual void visit(StringTerm &) { isVisited<StringTerm>() = true; }
+ virtual void visit(SubstringTerm &) { isVisited<SubstringTerm>() = true; }
+ virtual void visit(SuffixTerm &) { isVisited<SuffixTerm>() = true; }
+ virtual void visit(WeakAnd &) { isVisited<WeakAnd>() = true; }
+ virtual void visit(WeightedSetTerm &)
+ { isVisited<WeightedSetTerm>() = true; }
+ virtual void visit(DotProduct &) { isVisited<DotProduct>() = true; }
+ virtual void visit(WandTerm &) { isVisited<WandTerm>() = true; }
+ virtual void visit(PredicateQuery &)
+ { isVisited<PredicateQuery>() = true; }
+ virtual void visit(RegExpTerm &) { isVisited<RegExpTerm>() = true; }
+};
+
+template <class T>
+void Test::checkVisit(T *node) {
+ Node::UP query(node);
+ MyVisitor visitor;
+ visitor.isVisited<T>() = false;
+ query->accept(visitor);
+ ASSERT_TRUE(visitor.isVisited<T>());
+}
+
+void Test::requireThatAllNodesCanBeVisited() {
+ checkVisit<And>(new SimpleAnd);
+ checkVisit<AndNot>(new SimpleAndNot);
+ checkVisit<Near>(new SimpleNear(0));
+ checkVisit<ONear>(new SimpleONear(0));
+ checkVisit<Or>(new SimpleOr);
+ checkVisit<Phrase>(new SimplePhrase("field", 0, Weight(42)));
+ checkVisit<WeightedSetTerm>(
+ new SimpleWeightedSetTerm("field", 0, Weight(42)));
+ checkVisit<DotProduct>(new SimpleDotProduct("field", 0, Weight(42)));
+ checkVisit<WandTerm>(
+ new SimpleWandTerm("field", 0, Weight(42), 57, 67, 77.7));
+ checkVisit<Rank>(new SimpleRank);
+ checkVisit<NumberTerm>(
+ new SimpleNumberTerm("0.42", "field", 0, Weight(0)));
+ const Location location(Point(10, 10), 20, 0);
+ checkVisit<LocationTerm>(
+ new SimpleLocationTerm(location, "field", 0, Weight(0)));
+ checkVisit<PrefixTerm>(new SimplePrefixTerm("t", "field", 0, Weight(0)));
+ checkVisit<RangeTerm>(
+ new SimpleRangeTerm(Range(0, 1), "field", 0, Weight(0)));
+ checkVisit<StringTerm>(new SimpleStringTerm("t", "field", 0, Weight(0)));
+ checkVisit<SubstringTerm>(
+ new SimpleSubstringTerm("t", "field", 0, Weight(0)));
+ checkVisit<SuffixTerm>(new SimpleSuffixTerm("t", "field", 0, Weight(0)));
+ checkVisit<PredicateQuery>(
+ new SimplePredicateQuery(PredicateQueryTerm::UP(),
+ "field", 0, Weight(0)));
+ checkVisit<RegExpTerm>(new SimpleRegExpTerm("t", "field", 0, Weight(0)));
+}
+
+} // namespace
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/query/querybuilder_test.cpp b/searchlib/src/tests/query/querybuilder_test.cpp
new file mode 100644
index 00000000000..b64a46e9b18
--- /dev/null
+++ b/searchlib/src/tests/query/querybuilder_test.cpp
@@ -0,0 +1,615 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for querybuilder.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("querybuilder_test");
+
+#include <vespa/searchlib/parsequery/parse.h>
+#include <vespa/searchlib/parsequery/simplequerystack.h>
+#include <vespa/searchlib/query/tree/customtypevisitor.h>
+#include <vespa/searchlib/query/tree/intermediatenodes.h>
+#include <vespa/searchlib/query/tree/point.h>
+#include <vespa/searchlib/query/tree/querybuilder.h>
+#include <vespa/searchlib/query/tree/querytreecreator.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/tree/stackdumpcreator.h>
+#include <vespa/searchlib/query/tree/termnodes.h>
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <string>
+
+using std::string;
+using search::SimpleQueryStackDumpIterator;
+using namespace search::query;
+
+namespace {
+
+template <class NodeTypes> void checkQueryTreeTypes(Node *node);
+
+const string str[] = { "foo", "bar", "baz", "qux", "quux", "corge",
+ "grault", "garply", "waldo", "fred", "plugh" };
+const string (&view)[11] = str;
+const int32_t id[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+const Weight weight[] = { Weight(1), Weight(2), Weight(3), Weight(4),
+ Weight(5), Weight(6), Weight(7), Weight(8),
+ Weight(9), Weight(10), Weight(11) };
+const size_t distance = 4;
+const string int1 = "42";
+const string float1 = "3.14";
+const Range range(32, 64);
+const Point position(100, 100);
+const int max_distance = 20;
+const uint32_t x_aspect = 0;
+const Location location(position, max_distance, x_aspect);
+
+PredicateQueryTerm::UP getPredicateQueryTerm() {
+ PredicateQueryTerm::UP pqt(new PredicateQueryTerm);
+ pqt->addFeature("key", "value");
+ pqt->addRangeFeature("key2", 42, 0xfff);
+ return pqt;
+}
+
+template <class NodeTypes>
+Node::UP createQueryTree() {
+ QueryBuilder<NodeTypes> builder;
+ builder.addAnd(9);
+ {
+ builder.addRank(2);
+ {
+ builder.addNear(2, distance);
+ {
+ builder.addStringTerm(str[0], view[0], id[0], weight[0]);
+ builder.addSubstringTerm(str[1], view[1], id[1], weight[1]);
+ }
+ builder.addONear(2, distance);
+ {
+ builder.addSuffixTerm(str[2], view[2], id[2], weight[2]);
+ builder.addPrefixTerm(str[3], view[3], id[3], weight[3]);
+ }
+ }
+ builder.addOr(3);
+ {
+ builder.addPhrase(3, view[4], id[4], weight[4]);
+ {
+ builder.addStringTerm(str[4], view[4], id[4], weight[5]);
+ builder.addStringTerm(str[5], view[5], id[5], weight[6]);
+ builder.addStringTerm(str[6], view[6], id[6], weight[7]);
+ }
+ builder.addPhrase(2, view[4], id[4], weight[4])
+ .setRanked(false);
+ {
+ builder.addStringTerm(str[4], view[4], id[4], weight[5]);
+ builder.addStringTerm(str[5], view[5], id[5], weight[6]);
+ }
+ builder.addAndNot(2);
+ {
+ builder.addNumberTerm(int1, view[7], id[7], weight[7]);
+ builder.addNumberTerm(float1, view[8], id[8], weight[8])
+ .setRanked(false);
+ }
+ }
+ builder.addRangeTerm(range, view[9], id[9], weight[9]);
+ builder.addLocationTerm(location, view[10], id[10], weight[10]);
+ builder.addWeakAnd(2, 123, view[0]);
+ {
+ builder.addStringTerm(str[4], view[4], id[4], weight[4]);
+ builder.addStringTerm(str[5], view[5], id[5], weight[5]);
+ }
+ builder.addPredicateQuery(getPredicateQueryTerm(),
+ view[3], id[3], weight[3]);
+ builder.addDotProduct(3, view[2], id[2], weight[2]);
+ {
+ builder.addStringTerm(str[3], view[3], id[3], weight[3]);
+ builder.addStringTerm(str[4], view[4], id[4], weight[4]);
+ builder.addStringTerm(str[5], view[5], id[5], weight[5]);
+ }
+ builder.addWandTerm(2, view[0], id[0], weight[0], 57, 67, 77.7);
+ {
+ builder.addStringTerm(str[1], view[1], id[1], weight[1]);
+ builder.addStringTerm(str[2], view[2], id[2], weight[2]);
+ }
+ builder.addRegExpTerm(str[5], view[5], id[5], weight[5]);
+ }
+ Node::UP node = builder.build();
+ ASSERT_TRUE(node.get());
+ return node;
+}
+
+template <class TermType>
+bool compareTerms(const TermType &expected, const TermType &actual) {
+ return EXPECT_TRUE(expected == actual);
+}
+template <typename T>
+bool compareTerms(const std::unique_ptr<T> &expected,
+ const std::unique_ptr<T> &actual) {
+ return EXPECT_TRUE(*expected == *actual);
+}
+
+template <class Term>
+bool checkTerm(const Term *term, const typename Term::Type &t, const string &f,
+ int32_t i, Weight w, bool ranked = true,
+ bool use_position_data = true) {
+ return EXPECT_TRUE(term != 0) &&
+ (EXPECT_TRUE(compareTerms(t, term->getTerm())) &
+ EXPECT_EQUAL(f, term->getView()) &
+ EXPECT_EQUAL(i, term->getId()) &
+ EXPECT_EQUAL(w.percent(), term->getWeight().percent()) &
+ EXPECT_EQUAL(ranked, term->isRanked()) &
+ EXPECT_EQUAL(use_position_data, term->usePositionData()));
+}
+
+template <class NodeTypes>
+void checkQueryTreeTypes(Node *node) {
+ typedef typename NodeTypes::And And;
+ typedef typename NodeTypes::AndNot AndNot;
+ typedef typename NodeTypes::NumberTerm NumberTerm;
+ //typedef typename NodeTypes::NumberTerm FloatTrm;
+ typedef typename NodeTypes::Near Near;
+ typedef typename NodeTypes::ONear ONear;
+ typedef typename NodeTypes::Or Or;
+ typedef typename NodeTypes::Phrase Phrase;
+ typedef typename NodeTypes::PrefixTerm PrefixTerm;
+ typedef typename NodeTypes::RangeTerm RangeTerm;
+ typedef typename NodeTypes::Rank Rank;
+ typedef typename NodeTypes::StringTerm StringTerm;
+ //typedef typename NodeTypes::SubstringTerm SubstrTr;
+ typedef typename NodeTypes::SuffixTerm SuffixTerm;
+ typedef typename NodeTypes::LocationTerm LocationTerm;
+ //typedef typename NodeTypes::WeightedSetTerm WeightedSetTerm;
+ typedef typename NodeTypes::DotProduct DotProduct;
+ typedef typename NodeTypes::WandTerm WandTerm;
+ typedef typename NodeTypes::WeakAnd WeakAnd;
+ typedef typename NodeTypes::PredicateQuery PredicateQuery;
+ typedef typename NodeTypes::RegExpTerm RegExpTerm;
+
+ ASSERT_TRUE(node);
+ And *and_node = dynamic_cast<And *>(node);
+ ASSERT_TRUE(and_node);
+ EXPECT_EQUAL(9u, and_node->getChildren().size());
+
+
+ Rank *rank = dynamic_cast<Rank *>(and_node->getChildren()[0]);
+ ASSERT_TRUE(rank);
+ EXPECT_EQUAL(2u, rank->getChildren().size());
+
+ Near *near = dynamic_cast<Near *>(rank->getChildren()[0]);
+ ASSERT_TRUE(near);
+ EXPECT_EQUAL(2u, near->getChildren().size());
+ EXPECT_EQUAL(distance, near->getDistance());
+ StringTerm *string_term =
+ dynamic_cast<StringTerm *>(near->getChildren()[0]);
+ EXPECT_TRUE(checkTerm(string_term, str[0], view[0], id[0], weight[0]));
+ SubstringTerm *substring_term =
+ dynamic_cast<SubstringTerm *>(near->getChildren()[1]);
+ EXPECT_TRUE(checkTerm(substring_term, str[1], view[1], id[1], weight[1]));
+
+ ONear *onear = dynamic_cast<ONear *>(rank->getChildren()[1]);
+ ASSERT_TRUE(onear);
+ EXPECT_EQUAL(2u, onear->getChildren().size());
+ EXPECT_EQUAL(distance, onear->getDistance());
+ SuffixTerm *suffix_term =
+ dynamic_cast<SuffixTerm *>(onear->getChildren()[0]);
+ EXPECT_TRUE(checkTerm(suffix_term, str[2], view[2], id[2], weight[2]));
+ PrefixTerm *prefix_term =
+ dynamic_cast<PrefixTerm *>(onear->getChildren()[1]);
+ EXPECT_TRUE(checkTerm(prefix_term, str[3], view[3], id[3], weight[3]));
+
+
+ Or *or_node = dynamic_cast<Or *>(and_node->getChildren()[1]);
+ ASSERT_TRUE(or_node);
+ EXPECT_EQUAL(3u, or_node->getChildren().size());
+
+ Phrase *phrase = dynamic_cast<Phrase *>(or_node->getChildren()[0]);
+ ASSERT_TRUE(phrase);
+ EXPECT_TRUE(phrase->isRanked());
+ EXPECT_EQUAL(weight[4].percent(), phrase->getWeight().percent());
+ EXPECT_EQUAL(3u, phrase->getChildren().size());
+ string_term = dynamic_cast<StringTerm *>(phrase->getChildren()[0]);
+ EXPECT_TRUE(checkTerm(string_term, str[4], view[4], id[4], weight[4]));
+ string_term = dynamic_cast<StringTerm *>(phrase->getChildren()[1]);
+ EXPECT_TRUE(checkTerm(string_term, str[5], view[5], id[5], weight[4]));
+ string_term = dynamic_cast<StringTerm *>(phrase->getChildren()[2]);
+ EXPECT_TRUE(checkTerm(string_term, str[6], view[6], id[6], weight[4]));
+
+ phrase = dynamic_cast<Phrase *>(or_node->getChildren()[1]);
+ ASSERT_TRUE(phrase);
+ EXPECT_TRUE(!phrase->isRanked());
+ EXPECT_EQUAL(weight[4].percent(), phrase->getWeight().percent());
+ EXPECT_EQUAL(2u, phrase->getChildren().size());
+ string_term = dynamic_cast<StringTerm *>(phrase->getChildren()[0]);
+ EXPECT_TRUE(checkTerm(string_term, str[4], view[4], id[4], weight[4]));
+ string_term = dynamic_cast<StringTerm *>(phrase->getChildren()[1]);
+ EXPECT_TRUE(checkTerm(string_term, str[5], view[5], id[5], weight[4]));
+
+ AndNot *and_not = dynamic_cast<AndNot *>(or_node->getChildren()[2]);
+ ASSERT_TRUE(and_not);
+ EXPECT_EQUAL(2u, and_not->getChildren().size());
+ NumberTerm *integer_term =
+ dynamic_cast<NumberTerm *>(and_not->getChildren()[0]);
+ EXPECT_TRUE(checkTerm(integer_term, int1, view[7], id[7], weight[7]));
+ NumberTerm *float_term =
+ dynamic_cast<NumberTerm *>(and_not->getChildren()[1]);
+ EXPECT_TRUE(checkTerm(float_term, float1, view[8], id[8], weight[8],
+ false));
+
+
+ RangeTerm *range_term =
+ dynamic_cast<RangeTerm *>(and_node->getChildren()[2]);
+ ASSERT_TRUE(range_term);
+ EXPECT_TRUE(checkTerm(range_term, range, view[9], id[9], weight[9]));
+
+ LocationTerm *loc_term =
+ dynamic_cast<LocationTerm *>(and_node->getChildren()[3]);
+ ASSERT_TRUE(loc_term);
+ EXPECT_TRUE(checkTerm(loc_term, location, view[10], id[10], weight[10]));
+
+
+ WeakAnd *wand = dynamic_cast<WeakAnd *>(and_node->getChildren()[4]);
+ ASSERT_TRUE(wand != 0);
+ EXPECT_EQUAL(123u, wand->getMinHits());
+ EXPECT_EQUAL(2u, wand->getChildren().size());
+ string_term = dynamic_cast<StringTerm *>(wand->getChildren()[0]);
+ EXPECT_TRUE(checkTerm(string_term, str[4], view[4], id[4], weight[4]));
+ string_term = dynamic_cast<StringTerm *>(wand->getChildren()[1]);
+ EXPECT_TRUE(checkTerm(string_term, str[5], view[5], id[5], weight[5]));
+
+ PredicateQuery *predicateQuery =
+ dynamic_cast<PredicateQuery *>(and_node->getChildren()[5]);
+ ASSERT_TRUE(predicateQuery);
+ PredicateQueryTerm::UP pqt(new PredicateQueryTerm);
+ EXPECT_TRUE(checkTerm(predicateQuery, getPredicateQueryTerm(),
+ view[3], id[3], weight[3]));
+
+ DotProduct *dotProduct =
+ dynamic_cast<DotProduct *>(and_node->getChildren()[6]);
+ ASSERT_TRUE(dotProduct);
+ EXPECT_EQUAL(3u, dotProduct->getChildren().size());
+ string_term = dynamic_cast<StringTerm *>(dotProduct->getChildren()[0]);
+ EXPECT_TRUE(checkTerm(string_term, str[3], view[3], id[3], weight[3]));
+ string_term = dynamic_cast<StringTerm *>(dotProduct->getChildren()[1]);
+ EXPECT_TRUE(checkTerm(string_term, str[4], view[4], id[4], weight[4]));
+ string_term = dynamic_cast<StringTerm *>(dotProduct->getChildren()[2]);
+ EXPECT_TRUE(checkTerm(string_term, str[5], view[5], id[5], weight[5]));
+
+ WandTerm *wandTerm = dynamic_cast<WandTerm *>(and_node->getChildren()[7]);
+ ASSERT_TRUE(wandTerm);
+ EXPECT_EQUAL(57u, wandTerm->getTargetNumHits());
+ EXPECT_EQUAL(67, wandTerm->getScoreThreshold());
+ EXPECT_EQUAL(77.7, wandTerm->getThresholdBoostFactor());
+ EXPECT_EQUAL(2u, wandTerm->getChildren().size());
+ string_term = dynamic_cast<StringTerm *>(wandTerm->getChildren()[0]);
+ EXPECT_TRUE(checkTerm(string_term, str[1], view[1], id[1], weight[1]));
+ string_term = dynamic_cast<StringTerm *>(wandTerm->getChildren()[1]);
+ EXPECT_TRUE(checkTerm(string_term, str[2], view[2], id[2], weight[2]));
+
+ RegExpTerm *regexp_term =
+ dynamic_cast<RegExpTerm *>(and_node->getChildren()[8]);
+ EXPECT_TRUE(checkTerm(regexp_term, str[5], view[5], id[5], weight[5]));
+}
+
+struct AbstractTypes {
+ typedef search::query::And And;
+ typedef search::query::AndNot AndNot;
+ typedef search::query::NumberTerm NumberTerm;
+ typedef search::query::LocationTerm LocationTerm;
+ typedef search::query::Near Near;
+ typedef search::query::ONear ONear;
+ typedef search::query::Or Or;
+ typedef search::query::Phrase Phrase;
+ typedef search::query::PrefixTerm PrefixTerm;
+ typedef search::query::RangeTerm RangeTerm;
+ typedef search::query::Rank Rank;
+ typedef search::query::StringTerm StringTerm;
+ typedef search::query::SubstringTerm SubstringTerm;
+ typedef search::query::SuffixTerm SuffixTerm;
+ typedef search::query::WeightedSetTerm WeightedSetTerm;
+ typedef search::query::DotProduct DotProduct;
+ typedef search::query::WandTerm WandTerm;
+ typedef search::query::WeakAnd WeakAnd;
+ typedef search::query::PredicateQuery PredicateQuery;
+ typedef search::query::RegExpTerm RegExpTerm;
+};
+
+// Builds a tree with simplequery and checks that the results have the
+// correct abstract types.
+TEST("require that Query Trees Can Be Built") {
+ Node::UP node = createQueryTree<SimpleQueryNodeTypes>();
+ checkQueryTreeTypes<AbstractTypes>(node.get());
+}
+
+// Builds a tree with simplequery and checks that the results have the
+// correct concrete types.
+TEST("require that Simple Query Trees Can Be Built") {
+ Node::UP node = createQueryTree<SimpleQueryNodeTypes>();
+ checkQueryTreeTypes<SimpleQueryNodeTypes>(node.get());
+}
+
+struct MyAnd : And {};
+struct MyAndNot : AndNot {};
+struct MyEquiv : Equiv {
+ MyEquiv(int32_t i, Weight w) : Equiv(i, w) {}
+};
+struct MyNear : Near { MyNear(size_t dist) : Near(dist) {} };
+struct MyONear : ONear { MyONear(size_t dist) : ONear(dist) {} };
+struct MyWeakAnd : WeakAnd { MyWeakAnd(uint32_t minHits, const vespalib::string & v) : WeakAnd(minHits, v) {} };
+struct MyOr : Or {};
+struct MyPhrase : Phrase {
+ MyPhrase(const string &f, int32_t i, Weight w) : Phrase(f, i, w) {}
+};
+struct MyWeightedSetTerm : WeightedSetTerm {
+ MyWeightedSetTerm(const string &f, int32_t i, Weight w) : WeightedSetTerm(f, i, w) {}
+};
+struct MyDotProduct : DotProduct {
+ MyDotProduct(const string &f, int32_t i, Weight w) : DotProduct(f, i, w) {}
+};
+struct MyWandTerm : WandTerm {
+ MyWandTerm(const string &f, int32_t i, Weight w, uint32_t targetNumHits,
+ int64_t scoreThreshold, double thresholdBoostFactor)
+ : WandTerm(f, i, w, targetNumHits, scoreThreshold, thresholdBoostFactor) {}
+};
+struct MyRank : Rank {};
+struct MyNumberTerm : NumberTerm {
+ MyNumberTerm(Type t, const string &f, int32_t i, Weight w)
+ : NumberTerm(t, f, i, w) {
+ }
+};
+struct MyLocationTerm : LocationTerm {
+ MyLocationTerm(const Type &t, const string &f, int32_t i, Weight w)
+ : LocationTerm(t, f, i, w) {
+ }
+};
+struct MyPrefixTerm : PrefixTerm {
+ MyPrefixTerm(const Type &t, const string &f, int32_t i, Weight w)
+ : PrefixTerm(t, f, i, w) {
+ }
+};
+struct MyRangeTerm : RangeTerm {
+ MyRangeTerm(const Type &t, const string &f, int32_t i, Weight w)
+ : RangeTerm(t, f, i, w) {
+ }
+};
+struct MyStringTerm : StringTerm {
+ MyStringTerm(const Type &t, const string &f, int32_t i, Weight w)
+ : StringTerm(t, f, i, w) {
+ }
+};
+struct MySubstringTerm : SubstringTerm {
+ MySubstringTerm(const Type &t, const string &f, int32_t i, Weight w)
+ : SubstringTerm(t, f, i, w) {
+ }
+};
+struct MySuffixTerm : SuffixTerm {
+ MySuffixTerm(const Type &t, const string &f, int32_t i, Weight w)
+ : SuffixTerm(t, f, i, w) {
+ }
+};
+struct MyPredicateQuery : PredicateQuery {
+ MyPredicateQuery(Type &&t, const string &f, int32_t i, Weight w)
+ : PredicateQuery(std::move(t), f, i, w) {
+ }
+};
+struct MyRegExpTerm : RegExpTerm {
+ MyRegExpTerm(const Type &t, const string &f, int32_t i, Weight w)
+ : RegExpTerm(t, f, i, w) {
+ }
+};
+
+struct MyQueryNodeTypes {
+ typedef MyAnd And;
+ typedef MyAndNot AndNot;
+ typedef MyEquiv Equiv;
+ typedef MyNumberTerm NumberTerm;
+ typedef MyLocationTerm LocationTerm;
+ typedef MyNear Near;
+ typedef MyONear ONear;
+ typedef MyOr Or;
+ typedef MyPhrase Phrase;
+ typedef MyPrefixTerm PrefixTerm;
+ typedef MyRangeTerm RangeTerm;
+ typedef MyRank Rank;
+ typedef MyStringTerm StringTerm;
+ typedef MySubstringTerm SubstringTerm;
+ typedef MySuffixTerm SuffixTerm;
+ typedef MyWeakAnd WeakAnd;
+ typedef MyWeightedSetTerm WeightedSetTerm;
+ typedef MyDotProduct DotProduct;
+ typedef MyWandTerm WandTerm;
+ typedef MyPredicateQuery PredicateQuery;
+ typedef MyRegExpTerm RegExpTerm;
+};
+
+TEST("require that Custom Query Trees Can Be Built") {
+ Node::UP node = createQueryTree<MyQueryNodeTypes>();
+ checkQueryTreeTypes<MyQueryNodeTypes>(node.get());
+}
+
+TEST("require that Invalid Trees Cannot Be Built") {
+ // Incomplete tree.
+ QueryBuilder<SimpleQueryNodeTypes> builder;
+ builder.addAnd(1);
+ ASSERT_TRUE(!builder.build().get());
+ EXPECT_EQUAL("Trying to build incomplete query tree.", builder.error());
+
+ // Adding a node after build() and before reset() is a no-op.
+ builder.addStringTerm(str[0], view[0], id[0], weight[0]);
+ ASSERT_TRUE(!builder.build().get());
+ EXPECT_EQUAL("Trying to build incomplete query tree.", builder.error());
+
+ builder.reset();
+ EXPECT_TRUE(builder.error().empty());
+
+ // Too many nodes.
+ builder.addAnd(1);
+ builder.addStringTerm(str[0], view[0], id[0], weight[0]);
+ builder.addStringTerm(str[1], view[1], id[1], weight[1]);
+ ASSERT_TRUE(!builder.build().get());
+ EXPECT_EQUAL("QueryBuilder got invalid node structure.", builder.error());
+
+ // Adding an intermediate node after build() is also a no-op.
+ builder.addAnd(1);
+ ASSERT_TRUE(!builder.build().get());
+ EXPECT_EQUAL("QueryBuilder got invalid node structure.", builder.error());
+}
+
+TEST("require that Term Index Can Be Added") {
+ const int term_index0 = 14;
+ const int term_index1 = 65;
+
+ QueryBuilder<SimpleQueryNodeTypes> builder;
+ builder.addAnd(2);
+ builder.addStringTerm(str[0], view[0], id[0], weight[0])
+ .setTermIndex(term_index0);
+ builder.addSubstringTerm(str[1], view[1], id[1], weight[1])
+ .setTermIndex(term_index1);
+
+ Node::UP node = builder.build();
+ ASSERT_TRUE(!builder.hasError());
+ Intermediate *intermediate = dynamic_cast<Intermediate *>(node.get());
+ ASSERT_TRUE(intermediate);
+ ASSERT_TRUE(intermediate->getChildren().size() == 2);
+ Term *term = dynamic_cast<Term *>(intermediate->getChildren()[0]);
+ ASSERT_TRUE(term);
+ EXPECT_EQUAL(term_index0, term->getTermIndex());
+ term = dynamic_cast<Term *>(intermediate->getChildren()[1]);
+ ASSERT_TRUE(term);
+ EXPECT_EQUAL(term_index1, term->getTermIndex());
+}
+
+TEST("require that Rank Can Be Turned Off") {
+ QueryBuilder<SimpleQueryNodeTypes> builder;
+ builder.addAnd(3);
+ builder.addStringTerm(str[0], view[0], id[0], weight[0]);
+ builder.addSubstringTerm(str[1], view[1], id[1], weight[1])
+ .setRanked(false);
+ builder.addPhrase(2, view[2], id[2], weight[2])
+ .setRanked(false);
+ {
+ builder.addStringTerm(str[2], view[2], id[3], weight[3]);
+ builder.addStringTerm(str[3], view[2], id[4], weight[4]);
+ }
+
+ Node::UP node = builder.build();
+ ASSERT_TRUE(!builder.hasError());
+ Intermediate *intermediate = dynamic_cast<Intermediate *>(node.get());
+ ASSERT_TRUE(intermediate);
+ ASSERT_TRUE(intermediate->getChildren().size() == 3);
+ Term *term = dynamic_cast<Term *>(intermediate->getChildren()[0]);
+ ASSERT_TRUE(term);
+ EXPECT_TRUE(term->isRanked());
+ term = dynamic_cast<Term *>(intermediate->getChildren()[1]);
+ ASSERT_TRUE(term);
+ EXPECT_TRUE(!term->isRanked());
+ Phrase *phrase = dynamic_cast<Phrase *>(intermediate->getChildren()[2]);
+ ASSERT_TRUE(phrase);
+ EXPECT_TRUE(!phrase->isRanked());
+}
+
+TEST("require that Using Position Data Can Be Turned Off") {
+ QueryBuilder<SimpleQueryNodeTypes> builder;
+ builder.addAnd(2);
+ builder.addStringTerm(str[0], view[0], id[0], weight[0]).setPositionData(false);
+ builder.addPhrase(2, view[1], id[1], weight[1]).setPositionData(false);
+ builder.addStringTerm(str[2], view[1], id[2], weight[2]);
+ builder.addStringTerm(str[3], view[1], id[3], weight[3]);
+
+ Node::UP node = builder.build();
+ ASSERT_TRUE(!builder.hasError());
+ Intermediate * andNode = dynamic_cast<Intermediate *>(node.get());
+ ASSERT_TRUE(andNode != NULL);
+ ASSERT_TRUE(andNode->getChildren().size() == 2);
+ Term * term = dynamic_cast<Term *>(andNode->getChildren()[0]);
+ ASSERT_TRUE(term != NULL);
+ EXPECT_TRUE(!term->usePositionData());
+ Phrase * phrase = dynamic_cast<Phrase *>(andNode->getChildren()[1]);
+ ASSERT_TRUE(phrase != NULL);
+ EXPECT_TRUE(!phrase->usePositionData());
+}
+
+TEST("require that Weight Override Works Across Multiple Levels") {
+ QueryBuilder<SimpleQueryNodeTypes> builder;
+ builder.addPhrase(2, view[0], id[0], weight[0]);
+
+ SimpleStringTerm &string_term_1 =
+ builder.addStringTerm(str[1], view[1], id[1], weight[1]);
+ EXPECT_EQUAL(weight[0].percent(), string_term_1.getWeight().percent());
+
+ builder.addAnd(2);
+ SimpleStringTerm &string_term_2 =
+ builder.addStringTerm(str[2], view[2], id[2], weight[2]);
+ EXPECT_EQUAL(weight[0].percent(), string_term_2.getWeight().percent());
+}
+
+TEST("require that Query Tree Creator Can Replicate Queries") {
+ Node::UP node = createQueryTree<SimpleQueryNodeTypes>();
+ Node::UP new_node = QueryTreeCreator<MyQueryNodeTypes>::replicate(*node);
+
+ checkQueryTreeTypes<SimpleQueryNodeTypes>(node.get());
+ checkQueryTreeTypes<MyQueryNodeTypes>(new_node.get());
+}
+
+TEST("require that Query Tree Creator Can Create Queries From Stack") {
+ Node::UP node = createQueryTree<MyQueryNodeTypes>();
+ string stackDump = StackDumpCreator::create(*node);
+ SimpleQueryStackDumpIterator iterator(stackDump);
+
+ Node::UP new_node =
+ QueryTreeCreator<SimpleQueryNodeTypes>::create(iterator);
+ checkQueryTreeTypes<SimpleQueryNodeTypes>(new_node.get());
+}
+
+TEST("require that All Range Syntaxes Work") {
+
+ Range range0("[2,42.1]");
+ Range range1(">10");
+ Range range2("<45.23");
+
+ QueryBuilder<SimpleQueryNodeTypes> builder;
+ builder.addAnd(3);
+ builder.addRangeTerm(range0, "view", 0, Weight(0));
+ builder.addRangeTerm(range1, "view", 0, Weight(0));
+ builder.addRangeTerm(range2, "view", 0, Weight(0));
+ Node::UP node = builder.build();
+
+ string stackDump = StackDumpCreator::create(*node);
+ SimpleQueryStackDumpIterator iterator(stackDump);
+
+ Node::UP new_node =
+ QueryTreeCreator<SimpleQueryNodeTypes>::create(iterator);
+ And *and_node = dynamic_cast<And *>(new_node.get());
+ ASSERT_TRUE(and_node);
+ EXPECT_EQUAL(3u, and_node->getChildren().size());
+
+ RangeTerm *range_term =
+ dynamic_cast<RangeTerm *>(and_node->getChildren()[0]);
+ ASSERT_TRUE(range_term);
+ EXPECT_TRUE(range0 == range_term->getTerm());
+
+ range_term = dynamic_cast<RangeTerm *>(and_node->getChildren()[1]);
+ ASSERT_TRUE(range_term);
+ EXPECT_TRUE(range1 == range_term->getTerm());
+
+ range_term = dynamic_cast<RangeTerm *>(and_node->getChildren()[2]);
+ ASSERT_TRUE(range_term);
+ EXPECT_TRUE(range2 == range_term->getTerm());
+}
+
+TEST("require that empty intermediate node can be added") {
+ QueryBuilder<SimpleQueryNodeTypes> builder;
+ builder.addAnd(0);
+ Node::UP node = builder.build();
+ ASSERT_TRUE(node.get());
+
+ string stackDump = StackDumpCreator::create(*node);
+ SimpleQueryStackDumpIterator iterator(stackDump);
+
+ Node::UP new_node =
+ QueryTreeCreator<SimpleQueryNodeTypes>::create(iterator);
+ And *and_node = dynamic_cast<And *>(new_node.get());
+ ASSERT_TRUE(and_node);
+ EXPECT_EQUAL(0u, and_node->getChildren().size());
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/query/stackdumpquerycreator_test.cpp b/searchlib/src/tests/query/stackdumpquerycreator_test.cpp
new file mode 100644
index 00000000000..269947b7059
--- /dev/null
+++ b/searchlib/src/tests/query/stackdumpquerycreator_test.cpp
@@ -0,0 +1,116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for stackdumpquerycreator.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("stackdumpquerycreator_test");
+
+#include <vespa/searchlib/parsequery/parse.h>
+#include <vespa/searchlib/parsequery/stackdumpiterator.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/tree/stackdumpquerycreator.h>
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using search::ParseItem;
+using search::RawBuf;
+using search::SimpleQueryStackDumpIterator;
+using std::string;
+using namespace search::query;
+
+namespace {
+
+template <typename T>
+void append(RawBuf &buf, T i) {
+ buf.preAlloc(sizeof(T));
+ buf.PutToInet(i);
+}
+
+void appendString(RawBuf &buf, const string &s) {
+ buf.preAlloc(sizeof(uint32_t) + s.size());
+ buf.appendCompressedPositiveNumber(s.size());
+ buf.append(s.data(), s.size());
+}
+
+void appendNumTerm(RawBuf &buf, const string &term_string) {
+ uint8_t typefield = ParseItem::ITEM_NUMTERM |
+ ParseItem::IF_WEIGHT |
+ ParseItem::IF_UNIQUEID;
+ buf.append(typefield);
+ buf.appendCompressedNumber(2); // weight
+ buf.appendCompressedPositiveNumber(42); // id
+ appendString(buf, "view_name");
+ appendString(buf, term_string);
+}
+
+TEST("requireThatTooLargeNumTermIsTreatedAsFloat") {
+ const string term_string("99999999999999999999999999999999999");
+ RawBuf buf(1024);
+ appendNumTerm(buf, term_string);
+
+ SimpleQueryStackDumpIterator query_stack(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+ Node::UP node =
+ StackDumpQueryCreator<SimpleQueryNodeTypes>::create(query_stack);
+ ASSERT_TRUE(node.get());
+ NumberTerm *term = dynamic_cast<NumberTerm *>(node.get());
+ ASSERT_TRUE(term);
+ EXPECT_EQUAL(term_string, term->getTerm());
+}
+
+TEST("requireThatTooLargeFloatNumTermIsTreatedAsFloat") {
+ const string term_string = "1" + string(310, '0') + ".20";
+ RawBuf buf(1024);
+ appendNumTerm(buf, term_string);
+
+ SimpleQueryStackDumpIterator
+ query_stack(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+ Node::UP node =
+ StackDumpQueryCreator<SimpleQueryNodeTypes>::create(query_stack);
+ ASSERT_TRUE(node.get());
+ NumberTerm *term = dynamic_cast<NumberTerm *>(node.get());
+ ASSERT_TRUE(term);
+ EXPECT_EQUAL(term_string, term->getTerm());
+}
+
+TEST("require that PredicateQueryItem stack dump item can be read") {
+ RawBuf buf(1024);
+ uint8_t typefield = ParseItem::ITEM_PREDICATE_QUERY;
+ buf.append(typefield);
+ appendString(buf, "view_name");
+
+ buf.appendCompressedNumber(2);
+ appendString(buf, "key1");
+ appendString(buf, "value1");
+ buf.Put64ToInet(-1ULL);
+ appendString(buf, "key2");
+ appendString(buf, "value2");
+ buf.Put64ToInet(0xffffULL);
+
+ buf.appendCompressedNumber(2);
+ appendString(buf, "key3");
+ buf.Put64ToInet(42ULL);
+ buf.Put64ToInet(-1ULL);
+ appendString(buf, "key4");
+ buf.Put64ToInet(84ULL);
+ buf.Put64ToInet(0xffffULL);
+
+ SimpleQueryStackDumpIterator
+ query_stack(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+ Node::UP node =
+ StackDumpQueryCreator<SimpleQueryNodeTypes>::create(query_stack);
+ ASSERT_TRUE(node.get());
+ PredicateQuery *p = dynamic_cast<PredicateQuery *>(node.get());
+ ASSERT_TRUE(p);
+ const PredicateQueryTerm &term = *p->getTerm();
+ ASSERT_EQUAL(2u, term.getFeatures().size());
+ ASSERT_EQUAL(2u, term.getRangeFeatures().size());
+ ASSERT_EQUAL("value1", term.getFeatures()[0].getValue());
+ ASSERT_EQUAL(0xffffffffffffffffULL,
+ term.getFeatures()[0].getSubQueryBitmap());
+ ASSERT_EQUAL("key2", term.getFeatures()[1].getKey());
+ ASSERT_EQUAL(42u, term.getRangeFeatures()[0].getValue());
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/query/templatetermvisitor_test.cpp b/searchlib/src/tests/query/templatetermvisitor_test.cpp
new file mode 100644
index 00000000000..369266f5b2d
--- /dev/null
+++ b/searchlib/src/tests/query/templatetermvisitor_test.cpp
@@ -0,0 +1,87 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for templatetermvisitor.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("templatetermvisitor_test");
+
+#include <vespa/searchlib/query/tree/intermediatenodes.h>
+#include <vespa/searchlib/query/tree/templatetermvisitor.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/tree/termnodes.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using namespace search::query;
+
+namespace {
+
+class MyVisitor;
+
+class Test : public vespalib::TestApp {
+ void requireThatAllTermsCanBeVisited();
+
+public:
+ int Main();
+};
+
+int
+Test::Main()
+{
+ TEST_INIT("templatetermvisitor_test");
+
+ TEST_DO(requireThatAllTermsCanBeVisited());
+
+ TEST_DONE();
+}
+
+class MyVisitor : public TemplateTermVisitor<MyVisitor, SimpleQueryNodeTypes>
+{
+public:
+ template <typename T>
+ bool &isVisited() {
+ static bool b;
+ return b;
+ }
+
+ template <class TermType>
+ void visitTerm(TermType &) { isVisited<TermType>() = true; }
+};
+
+template <class T>
+bool checkVisit(T *q) {
+ Node::UP query(q);
+ MyVisitor visitor;
+ visitor.isVisited<T>() = false;
+ query->accept(visitor);
+ return visitor.isVisited<T>();
+}
+
+template <class T>
+bool checkVisit() {
+ return checkVisit(new T(typename T::Type(), "field", 0, Weight(0)));
+}
+
+void Test::requireThatAllTermsCanBeVisited() {
+ EXPECT_TRUE(checkVisit<SimpleNumberTerm>());
+ EXPECT_TRUE(checkVisit<SimpleLocationTerm>());
+ EXPECT_TRUE(checkVisit<SimplePrefixTerm>());
+ EXPECT_TRUE(checkVisit<SimpleRangeTerm>());
+ EXPECT_TRUE(checkVisit<SimpleStringTerm>());
+ EXPECT_TRUE(checkVisit<SimpleSubstringTerm>());
+ EXPECT_TRUE(checkVisit<SimpleSuffixTerm>());
+ EXPECT_TRUE(checkVisit<SimplePredicateQuery>());
+ EXPECT_TRUE(checkVisit<SimpleRegExpTerm>());
+ EXPECT_TRUE(checkVisit(new SimplePhrase("field", 0, Weight(0))));
+ EXPECT_TRUE(!checkVisit(new SimpleAnd));
+ EXPECT_TRUE(!checkVisit(new SimpleAndNot));
+ EXPECT_TRUE(!checkVisit(new SimpleEquiv(17, Weight(100))));
+ EXPECT_TRUE(!checkVisit(new SimpleNear(2)));
+ EXPECT_TRUE(!checkVisit(new SimpleONear(2)));
+ EXPECT_TRUE(!checkVisit(new SimpleOr));
+ EXPECT_TRUE(!checkVisit(new SimpleRank));
+}
+
+} // namespace
+
+TEST_APPHOOK(Test);
+#include <vespa/vespalib/testkit/testapp.h>
diff --git a/searchlib/src/tests/queryeval/.gitignore b/searchlib/src/tests/queryeval/.gitignore
new file mode 100644
index 00000000000..7039566e7c2
--- /dev/null
+++ b/searchlib/src/tests/queryeval/.gitignore
@@ -0,0 +1,5 @@
+.depend
+Makefile
+*_test
+*_bench
+searchlib_queryeval_test_app
diff --git a/searchlib/src/tests/queryeval/CMakeLists.txt b/searchlib/src/tests/queryeval/CMakeLists.txt
new file mode 100644
index 00000000000..35496b7f99a
--- /dev/null
+++ b/searchlib/src/tests/queryeval/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_queryeval_test_app
+ SOURCES
+ queryeval.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_queryeval_test_app COMMAND searchlib_queryeval_test_app)
diff --git a/searchlib/src/tests/queryeval/DESC b/searchlib/src/tests/queryeval/DESC
new file mode 100644
index 00000000000..15e6efd489d
--- /dev/null
+++ b/searchlib/src/tests/queryeval/DESC
@@ -0,0 +1 @@
+queryeval test. Take a look at queryeval.cpp for details.
diff --git a/searchlib/src/tests/queryeval/FILES b/searchlib/src/tests/queryeval/FILES
new file mode 100644
index 00000000000..d082d6f8725
--- /dev/null
+++ b/searchlib/src/tests/queryeval/FILES
@@ -0,0 +1 @@
+queryeval.cpp
diff --git a/searchlib/src/tests/queryeval/blueprint/.cvsignore b/searchlib/src/tests/queryeval/blueprint/.cvsignore
new file mode 100644
index 00000000000..a8da5289575
--- /dev/null
+++ b/searchlib/src/tests/queryeval/blueprint/.cvsignore
@@ -0,0 +1,3 @@
+.depend
+Makefile
+blueprint_test
diff --git a/searchlib/src/tests/queryeval/blueprint/.gitignore b/searchlib/src/tests/queryeval/blueprint/.gitignore
new file mode 100644
index 00000000000..da4bf633103
--- /dev/null
+++ b/searchlib/src/tests/queryeval/blueprint/.gitignore
@@ -0,0 +1,8 @@
+*_test
+.depend
+Makefile
+lhs.out
+rhs.out
+searchlib_blueprint_test_app
+searchlib_intermediate_blueprints_test_app
+searchlib_leaf_blueprints_test_app
diff --git a/searchlib/src/tests/queryeval/blueprint/CMakeLists.txt b/searchlib/src/tests/queryeval/blueprint/CMakeLists.txt
new file mode 100644
index 00000000000..88ba3deeb29
--- /dev/null
+++ b/searchlib/src/tests/queryeval/blueprint/CMakeLists.txt
@@ -0,0 +1,23 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_blueprint_test_app
+ SOURCES
+ blueprint_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_blueprint_test_app COMMAND searchlib_blueprint_test_app || diff -u lhs.out rhs.out)
+vespa_add_executable(searchlib_leaf_blueprints_test_app
+ SOURCES
+ leaf_blueprints_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_leaf_blueprints_test_app COMMAND searchlib_leaf_blueprints_test_app || diff -u lhs.out rhs.out)
+vespa_add_executable(searchlib_intermediate_blueprints_test_app
+ SOURCES
+ intermediate_blueprints_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_intermediate_blueprints_test_app COMMAND searchlib_intermediate_blueprints_test_app || diff -u lhs.out rhs.out)
diff --git a/searchlib/src/tests/queryeval/blueprint/DESC b/searchlib/src/tests/queryeval/blueprint/DESC
new file mode 100644
index 00000000000..a2634c017bd
--- /dev/null
+++ b/searchlib/src/tests/queryeval/blueprint/DESC
@@ -0,0 +1 @@
+blueprint test. Take a look at blueprint_test.cpp for details.
diff --git a/searchlib/src/tests/queryeval/blueprint/FILES b/searchlib/src/tests/queryeval/blueprint/FILES
new file mode 100644
index 00000000000..89c566c5aea
--- /dev/null
+++ b/searchlib/src/tests/queryeval/blueprint/FILES
@@ -0,0 +1 @@
+blueprint_test.cpp
diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
new file mode 100644
index 00000000000..79fec3770b3
--- /dev/null
+++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
@@ -0,0 +1,766 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("blueprint_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
+#include <vespa/vespalib/objects/objectdumper.h>
+#include <vespa/vespalib/objects/visit.h>
+
+#include "mysearch.h"
+
+using namespace search::queryeval;
+using namespace search::fef;
+
+namespace {
+
+//-----------------------------------------------------------------------------
+
+class MyOr : public IntermediateBlueprint
+{
+private:
+public:
+ virtual HitEstimate combine(const std::vector<HitEstimate> &data) const {
+ return max(data);
+ }
+
+ virtual FieldSpecBaseList exposeFields() const {
+ return mixChildrenFields();
+ }
+
+ virtual void sort(std::vector<Blueprint*> &children) const {
+ std::sort(children.begin(), children.end(), GreaterEstimate());
+ }
+
+ virtual bool inheritStrict(size_t i) const {
+ (void)i;
+ return true;
+ }
+
+ virtual SearchIterator::UP
+ createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, MatchData &md) const
+ {
+ return SearchIterator::UP(new MySearch("or", subSearches, &md, strict));
+ }
+
+ static MyOr& create() { return *(new MyOr()); }
+ MyOr& add(Blueprint *n) { addChild(UP(n)); return *this; }
+ MyOr& add(Blueprint &n) { addChild(UP(&n)); return *this; }
+};
+
+
+class OtherOr : public OrBlueprint
+{
+private:
+public:
+ virtual SearchIterator::UP
+ createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, MatchData &md) const
+ {
+ return SearchIterator::UP(new MySearch("or", subSearches, &md, strict));
+ }
+
+ static OtherOr& create() { return *(new OtherOr()); }
+ OtherOr& add(Blueprint *n) { addChild(UP(n)); return *this; }
+ OtherOr& add(Blueprint &n) { addChild(UP(&n)); return *this; }
+};
+
+//-----------------------------------------------------------------------------
+
+class MyAnd : public AndBlueprint
+{
+private:
+public:
+ virtual HitEstimate combine(const std::vector<HitEstimate> &data) const {
+ return min(data);
+ }
+
+ virtual FieldSpecBaseList exposeFields() const {
+ return FieldSpecBaseList();
+ }
+
+ virtual bool inheritStrict(size_t i) const {
+ return (i == 0);
+ }
+
+ virtual SearchIterator::UP
+ createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, MatchData &md) const
+ {
+ return SearchIterator::UP(new MySearch("and", subSearches, &md, strict));
+ }
+
+ static MyAnd& create() { return *(new MyAnd()); }
+ MyAnd& add(Blueprint *n) { addChild(UP(n)); return *this; }
+ MyAnd& add(Blueprint &n) { addChild(UP(&n)); return *this; }
+};
+
+
+class OtherAnd : public AndBlueprint
+{
+private:
+public:
+ virtual SearchIterator::UP
+ createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, MatchData &md) const
+ {
+ return SearchIterator::UP(new MySearch("and", subSearches, &md, strict));
+ }
+
+ static OtherAnd& create() { return *(new OtherAnd()); }
+ OtherAnd& add(Blueprint *n) { addChild(UP(n)); return *this; }
+ OtherAnd& add(Blueprint &n) { addChild(UP(&n)); return *this; }
+};
+
+class OtherAndNot : public AndNotBlueprint
+{
+public:
+ virtual SearchIterator::UP
+ createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, MatchData &md) const
+ {
+ return SearchIterator::UP(new MySearch("andnot", subSearches, &md, strict));
+ }
+
+ static OtherAndNot& create() { return *(new OtherAndNot()); }
+ OtherAndNot& add(Blueprint *n) { addChild(UP(n)); return *this; }
+ OtherAndNot& add(Blueprint &n) { addChild(UP(&n)); return *this; }
+
+};
+
+//-----------------------------------------------------------------------------
+
+struct MyTerm : SimpleLeafBlueprint {
+ MyTerm(const FieldSpecBaseList &fields, uint32_t hitEstimate) : SimpleLeafBlueprint(fields) {
+ setEstimate(HitEstimate(hitEstimate, false));
+ }
+ virtual SearchIterator::UP createLeafSearch(const search::fef::TermFieldMatchDataArray &, bool) const {
+ return SearchIterator::UP();
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace <unnamed>
+
+class Test : public vespalib::TestApp
+{
+private:
+ MatchData::UP _md;
+
+ static Blueprint::UP ap(Blueprint *b) { return Blueprint::UP(b); }
+ static Blueprint::UP ap(Blueprint &b) { return Blueprint::UP(&b); }
+
+ SearchIterator::UP create(const Blueprint &blueprint);
+ bool check_equal(const SearchIterator &a, const SearchIterator &b);
+ bool check_equal(const Blueprint &a, const Blueprint &b);
+ bool check_not_equal(const SearchIterator &a, const SearchIterator &b);
+ bool check_not_equal(const Blueprint &a, const Blueprint &b);
+
+public:
+ Test()
+ : vespalib::TestApp(),
+ _md(MatchData::makeTestInstance(0, 100, 10))
+ {
+ }
+ Blueprint::UP buildBlueprint1();
+ Blueprint::UP buildBlueprint2();
+ void testBlueprintBuilding();
+ void testHitEstimateCalculation();
+ void testHitEstimatePropagation();
+ void testMatchDataPropagation();
+ void testChildSorting();
+ void testChildAndNotCollapsing();
+ void testChildAndCollapsing();
+ void testChildOrCollapsing();
+ void testSearchCreation();
+ void testBlueprintMakeNew();
+ void requireThatAsStringWorks();
+ void requireThatVisitMembersWorks();
+ void requireThatDocIdLimitInjectionWorks();
+ int Main();
+};
+
+SearchIterator::UP
+Test::create(const Blueprint &blueprint)
+{
+ const_cast<Blueprint &>(blueprint).fetchPostings(true);
+ SearchIterator::UP search = blueprint.createSearch(*_md, true);
+ MySearch::verifyAndInfer(search.get(), *_md);
+ return search;
+}
+
+bool
+Test::check_equal(const SearchIterator &a, const SearchIterator &b)
+{
+ return EXPECT_EQUAL(a.asString(), b.asString());
+}
+
+bool
+Test::check_equal(const Blueprint &a, const Blueprint &b)
+{
+ SearchIterator::UP searchA = create(a);
+ SearchIterator::UP searchB = create(b);
+ TEST_STATE("check_equal");
+ bool ok = check_equal(*searchA, *searchB);
+ return ok;
+}
+
+bool
+Test::check_not_equal(const SearchIterator &a, const SearchIterator &b)
+{
+ return EXPECT_NOT_EQUAL(a.asString(), b.asString());
+}
+
+bool
+Test::check_not_equal(const Blueprint &a, const Blueprint &b)
+{
+ SearchIterator::UP searchA = create(a);
+ SearchIterator::UP searchB = create(b);
+ TEST_STATE("check_not_equal");
+ bool ok = check_not_equal(*searchA, *searchB);
+ return ok;
+}
+
+Blueprint::UP
+Test::buildBlueprint1()
+{
+ return ap(MyAnd::create()
+ .add(MyOr::create()
+ .add(MyLeafSpec(10).addField(1, 11).create())
+ .add(MyLeafSpec(20).addField(1, 21).create())
+ .add(MyLeafSpec(30).addField(1, 31).create())
+ )
+ .add(MyOr::create()
+ .add(MyLeafSpec(100).addField(2, 22).create())
+ .add(MyLeafSpec(200).addField(2, 42).create())
+ )
+ );
+}
+
+Blueprint::UP
+Test::buildBlueprint2()
+{
+ return ap(MyAnd::create()
+ .add(MyOr::create()
+ .add(MyLeafSpec(10).addField(1, 11).create())
+ .add(MyLeafSpec(20).addField(1, 21).create())
+ )
+ .add(MyOr::create()
+ .add(MyLeafSpec(100).addField(2, 22).create())
+ .add(MyLeafSpec(200).addField(2, 32).create())
+ .add(MyLeafSpec(300).addField(2, 42).create())
+ )
+ );
+}
+
+void
+Test::testBlueprintBuilding()
+{
+ Blueprint::UP root1 = buildBlueprint1();
+ Blueprint::UP root2 = buildBlueprint2();
+ SearchIterator::UP search1 = create(*root1);
+ SearchIterator::UP search2 = create(*root2);
+ // fprintf(stderr, "%s\n", search1->asString().c_str());
+ // fprintf(stderr, "%s\n", search2->asString().c_str());
+}
+
+void
+Test::testHitEstimateCalculation()
+{
+ {
+ Blueprint::UP leaf = ap(MyLeafSpec(37).create());
+ EXPECT_EQUAL(37u, leaf->getState().estimate().estHits);
+ EXPECT_EQUAL(0u, leaf->getState().numFields());
+ }
+ {
+ Blueprint::UP a1 = ap(MyAnd::create()
+ .add(MyLeafSpec(7).addField(1, 11).create())
+ .add(MyLeafSpec(4).addField(1, 21).create())
+ .add(MyLeafSpec(6).addField(1, 31).create()));
+ EXPECT_EQUAL(4u, a1->getState().estimate().estHits);
+ }
+ {
+ Blueprint::UP a2 = ap(MyAnd::create()
+ .add(MyLeafSpec(4).addField(1, 1).create())
+ .add(MyLeafSpec(7).addField(2, 2).create())
+ .add(MyLeafSpec(6).addField(3, 3).create()));
+ EXPECT_EQUAL(4u, a2->getState().estimate().estHits);
+ }
+ {
+ Blueprint::UP o1 = ap(MyOr::create()
+ .add(MyLeafSpec(7).addField(1, 11).create())
+ .add(MyLeafSpec(4).addField(1, 21).create())
+ .add(MyLeafSpec(6).addField(1, 31).create()));
+ EXPECT_EQUAL(7u, o1->getState().estimate().estHits);
+ }
+ {
+ Blueprint::UP o2 = ap(MyOr::create()
+ .add(MyLeafSpec(4).addField(1, 1).create())
+ .add(MyLeafSpec(7).addField(2, 2).create())
+ .add(MyLeafSpec(6).addField(3, 3).create()));
+ EXPECT_EQUAL(7u, o2->getState().estimate().estHits);
+ }
+ {
+ Blueprint::UP a = ap(MyAnd::create()
+ .add(MyLeafSpec(0).create())
+ .add(MyLeafSpec(0, true).create()));
+ EXPECT_EQUAL(0u, a->getState().estimate().estHits);
+ EXPECT_EQUAL(true, a->getState().estimate().empty);
+ }
+ {
+ Blueprint::UP o = ap(MyOr::create()
+ .add(MyLeafSpec(0).create())
+ .add(MyLeafSpec(0, true).create()));
+ EXPECT_EQUAL(0u, o->getState().estimate().estHits);
+ EXPECT_EQUAL(false, o->getState().estimate().empty);
+ }
+ {
+ Blueprint::UP tree1 = buildBlueprint1();
+ EXPECT_EQUAL(30u, tree1->getState().estimate().estHits);
+
+ Blueprint::UP tree2 = buildBlueprint2();
+ EXPECT_EQUAL(20u, tree2->getState().estimate().estHits);
+ }
+}
+
+void
+Test::testHitEstimatePropagation()
+{
+ MyLeaf *leaf1 = new MyLeaf(FieldSpecBaseList());
+ leaf1->estimate(10);
+
+ MyLeaf *leaf2 = new MyLeaf(FieldSpecBaseList());
+ leaf2->estimate(20);
+
+ MyLeaf *leaf3 = new MyLeaf(FieldSpecBaseList());
+ leaf3->estimate(30);
+
+ MyOr *parent = new MyOr();
+ MyOr *grandparent = new MyOr();
+
+ Blueprint::UP root(grandparent);
+
+ parent->addChild(ap(leaf1));
+ parent->addChild(ap(leaf3));
+ grandparent->addChild(ap(leaf2));
+ grandparent->addChild(ap(parent));
+ EXPECT_EQUAL(30u, root->getState().estimate().estHits);
+
+ // edit
+ leaf3->estimate(50);
+ EXPECT_EQUAL(50u, root->getState().estimate().estHits);
+
+ // remove
+ ASSERT_TRUE(parent->childCnt() == 2);
+ Blueprint::UP tmp = parent->removeChild(1);
+ ASSERT_TRUE(tmp.get() == leaf3);
+ EXPECT_EQUAL(1u, parent->childCnt());
+ EXPECT_EQUAL(20u, root->getState().estimate().estHits);
+
+ // add
+ leaf3->estimate(25);
+ EXPECT_EQUAL(20u, root->getState().estimate().estHits);
+ parent->addChild(std::move(tmp));
+ EXPECT_TRUE(tmp.get() == 0);
+ EXPECT_EQUAL(25u, root->getState().estimate().estHits);
+}
+
+void
+Test::testMatchDataPropagation()
+{
+ {
+ Blueprint::UP leaf = ap(MyLeafSpec(0, true).create());
+ EXPECT_EQUAL(0u, leaf->getState().numFields());
+ }
+ {
+ Blueprint::UP leaf = ap(MyLeafSpec(42)
+ .addField(1, 41)
+ .addField(2, 72).create());
+ EXPECT_EQUAL(42u, leaf->getState().estimate().estHits);
+ ASSERT_TRUE(leaf->getState().numFields() == 2);
+ EXPECT_EQUAL(1u, leaf->getState().field(0).getFieldId());
+ EXPECT_EQUAL(2u, leaf->getState().field(1).getFieldId());
+ EXPECT_EQUAL(41u, leaf->getState().field(0).getHandle());
+ EXPECT_EQUAL(72u, leaf->getState().field(1).getHandle());
+ }
+ {
+ Blueprint::UP a = ap(MyAnd::create()
+ .add(MyLeafSpec(7).addField(1, 11).create())
+ .add(MyLeafSpec(4).addField(1, 21).create())
+ .add(MyLeafSpec(6).addField(1, 31).create()));
+ EXPECT_EQUAL(0u, a->getState().numFields());
+ }
+ {
+ MyOr &o = MyOr::create()
+ .add(MyLeafSpec(1).addField(1, 1).create())
+ .add(MyLeafSpec(2).addField(2, 2).create());
+
+ Blueprint::UP a = ap(o);
+ ASSERT_TRUE(a->getState().numFields() == 2);
+ EXPECT_EQUAL(1u, a->getState().field(0).getFieldId());
+ EXPECT_EQUAL(2u, a->getState().field(1).getFieldId());
+ EXPECT_EQUAL(1u, a->getState().field(0).getHandle());
+ EXPECT_EQUAL(2u, a->getState().field(1).getHandle());
+ EXPECT_EQUAL(2u, a->getState().estimate().estHits);
+
+ o.add(MyLeafSpec(5).addField(2, 2).create());
+ ASSERT_TRUE(a->getState().numFields() == 2);
+ EXPECT_EQUAL(1u, a->getState().field(0).getFieldId());
+ EXPECT_EQUAL(2u, a->getState().field(1).getFieldId());
+ EXPECT_EQUAL(1u, a->getState().field(0).getHandle());
+ EXPECT_EQUAL(2u, a->getState().field(1).getHandle());
+ EXPECT_EQUAL(5u, a->getState().estimate().estHits);
+
+ o.add(MyLeafSpec(5).addField(2, 32).create());
+ EXPECT_EQUAL(0u, a->getState().numFields());
+ o.removeChild(3);
+ EXPECT_EQUAL(2u, a->getState().numFields());
+ o.add(MyLeafSpec(0, true).create());
+ EXPECT_EQUAL(0u, a->getState().numFields());
+ }
+}
+
+void
+Test::testChildAndNotCollapsing()
+{
+ Blueprint::UP unsorted = ap(OtherAndNot::create()
+ .add(OtherAndNot::create()
+ .add(OtherAndNot::create()
+ .add(MyLeafSpec(200).addField(1, 11).create())
+ .add(MyLeafSpec(100).addField(1, 21).create())
+ .add(MyLeafSpec(300).addField(1, 31).create())
+ )
+ .add(OtherAnd::create()
+ .add(MyLeafSpec(1).addField(2, 42).create())
+ .add(MyLeafSpec(2).addField(2, 52).create())
+ .add(MyLeafSpec(3).addField(2, 62).create())
+ )
+ )
+ .add(MyLeafSpec(30).addField(3, 73).create())
+ .add(MyLeafSpec(20).addField(3, 83).create())
+ .add(MyLeafSpec(10).addField(3, 93).create())
+ );
+
+ Blueprint::UP sorted = ap(OtherAndNot::create()
+ .add(MyLeafSpec(200).addField(1, 11).create())
+ .add(MyLeafSpec(300).addField(1, 31).create())
+ .add(MyLeafSpec(100).addField(1, 21).create())
+ .add(MyLeafSpec(30).addField(3, 73).create())
+ .add(MyLeafSpec(20).addField(3, 83).create())
+ .add(MyLeafSpec(10).addField(3, 93).create())
+ .add(OtherAnd::create()
+ .add(MyLeafSpec(1).addField(2, 42).create())
+ .add(MyLeafSpec(2).addField(2, 52).create())
+ .add(MyLeafSpec(3).addField(2, 62).create())
+ )
+ );
+ TEST_DO(check_not_equal(*sorted, *unsorted));
+ unsorted = Blueprint::optimize(std::move(unsorted));
+ TEST_DO(check_equal(*sorted, *unsorted));
+}
+
+void
+Test::testChildAndCollapsing()
+{
+ Blueprint::UP unsorted = ap(OtherAnd::create()
+ .add(OtherAnd::create()
+ .add(OtherAnd::create()
+ .add(MyLeafSpec(200).addField(1, 11).create())
+ .add(MyLeafSpec(100).addField(1, 21).create())
+ .add(MyLeafSpec(300).addField(1, 31).create())
+ )
+ .add(OtherAnd::create()
+ .add(MyLeafSpec(1).addField(2, 42).create())
+ .add(MyLeafSpec(2).addField(2, 52).create())
+ .add(MyLeafSpec(3).addField(2, 62).create())
+ )
+ )
+ .add(OtherAnd::create()
+ .add(MyLeafSpec(30).addField(3, 73).create())
+ .add(MyLeafSpec(20).addField(3, 83).create())
+ .add(MyLeafSpec(10).addField(3, 93).create())
+ )
+ );
+
+ Blueprint::UP sorted = ap(OtherAnd::create()
+ .add(MyLeafSpec(1).addField(2, 42).create())
+ .add(MyLeafSpec(2).addField(2, 52).create())
+ .add(MyLeafSpec(3).addField(2, 62).create())
+ .add(MyLeafSpec(10).addField(3, 93).create())
+ .add(MyLeafSpec(20).addField(3, 83).create())
+ .add(MyLeafSpec(30).addField(3, 73).create())
+ .add(MyLeafSpec(100).addField(1, 21).create())
+ .add(MyLeafSpec(200).addField(1, 11).create())
+ .add(MyLeafSpec(300).addField(1, 31).create())
+ );
+
+ TEST_DO(check_not_equal(*sorted, *unsorted));
+ unsorted = Blueprint::optimize(std::move(unsorted));
+ TEST_DO(check_equal(*sorted, *unsorted));
+}
+
+void
+Test::testChildOrCollapsing()
+{
+ Blueprint::UP unsorted = ap(OtherOr::create()
+ .add(OtherOr::create()
+ .add(OtherOr::create()
+ .add(MyLeafSpec(200).addField(1, 11).create())
+ .add(MyLeafSpec(100).addField(1, 21).create())
+ .add(MyLeafSpec(300).addField(1, 31).create())
+ )
+ .add(OtherOr::create()
+ .add(MyLeafSpec(1).addField(2, 42).create())
+ .add(MyLeafSpec(2).addField(2, 52).create())
+ .add(MyLeafSpec(3).addField(2, 62).create())
+ )
+ )
+ .add(OtherOr::create()
+ .add(MyLeafSpec(30).addField(3, 73).create())
+ .add(MyLeafSpec(20).addField(3, 83).create())
+ .add(MyLeafSpec(10).addField(3, 93).create())
+ )
+ );
+
+ Blueprint::UP sorted = ap(OtherOr::create()
+ .add(MyLeafSpec(300).addField(1, 31).create())
+ .add(MyLeafSpec(200).addField(1, 11).create())
+ .add(MyLeafSpec(100).addField(1, 21).create())
+ .add(MyLeafSpec(30).addField(3, 73).create())
+ .add(MyLeafSpec(20).addField(3, 83).create())
+ .add(MyLeafSpec(10).addField(3, 93).create())
+ .add(MyLeafSpec(3).addField(2, 62).create())
+ .add(MyLeafSpec(2).addField(2, 52).create())
+ .add(MyLeafSpec(1).addField(2, 42).create())
+ );
+ TEST_DO(check_not_equal(*sorted, *unsorted));
+ unsorted = Blueprint::optimize(std::move(unsorted));
+ TEST_DO(check_equal(*sorted, *unsorted));
+}
+
+void
+Test::testChildSorting()
+{
+ Blueprint::UP unsorted = ap(MyAnd::create()
+ .add(MyOr::create()
+ .add(MyLeafSpec(200).addField(1, 11).create())
+ .add(MyLeafSpec(100).addField(1, 21).create())
+ .add(MyLeafSpec(300).addField(1, 31).create())
+ )
+ .add(MyOr::create()
+ .add(MyLeafSpec(1).addField(2, 42).create())
+ .add(MyLeafSpec(2).addField(2, 52).create())
+ .add(MyLeafSpec(3).addField(2, 62).create())
+ )
+ .add(MyOr::create()
+ .add(MyLeafSpec(30).addField(3, 73).create())
+ .add(MyLeafSpec(20).addField(3, 83).create())
+ .add(MyLeafSpec(10).addField(3, 93).create())
+ )
+ );
+
+ Blueprint::UP sorted = ap(MyAnd::create()
+ .add(MyOr::create()
+ .add(MyLeafSpec(3).addField(2, 62).create())
+ .add(MyLeafSpec(2).addField(2, 52).create())
+ .add(MyLeafSpec(1).addField(2, 42).create())
+ )
+ .add(MyOr::create()
+ .add(MyLeafSpec(30).addField(3, 73).create())
+ .add(MyLeafSpec(20).addField(3, 83).create())
+ .add(MyLeafSpec(10).addField(3, 93).create())
+ )
+ .add(MyOr::create()
+ .add(MyLeafSpec(300).addField(1, 31).create())
+ .add(MyLeafSpec(200).addField(1, 11).create())
+ .add(MyLeafSpec(100).addField(1, 21).create())
+ )
+ );
+
+ TEST_DO(check_not_equal(*sorted, *unsorted));
+ unsorted = Blueprint::optimize(std::move(unsorted));
+ TEST_DO(check_equal(*sorted, *unsorted));
+}
+
+
+void
+Test::testSearchCreation()
+{
+ {
+ Blueprint::UP l = ap(MyLeafSpec(3)
+ .addField(1, 1)
+ .addField(2, 2)
+ .addField(3, 3).create());
+ SearchIterator::UP leafsearch = create(*l);
+
+ MySearch *lw = new MySearch("leaf", true, true);
+ lw->addHandle(1).addHandle(2).addHandle(3);
+ SearchIterator::UP wantleaf(lw);
+
+ TEST_DO(check_equal(*wantleaf, *leafsearch));
+ }
+ {
+ Blueprint::UP a = ap(MyAnd::create()
+ .add(MyLeafSpec(1).addField(1, 1).create())
+ .add(MyLeafSpec(2).addField(2, 2).create()));
+ SearchIterator::UP andsearch = create(*a);
+
+ MySearch *l1 = new MySearch("leaf", true, true);
+ MySearch *l2 = new MySearch("leaf", true, false);
+ l1->addHandle(1);
+ l2->addHandle(2);
+ MySearch *aw = new MySearch("and", false, true);
+ aw->add(l1);
+ aw->add(l2);
+ SearchIterator::UP wanted(aw);
+ TEST_DO(check_equal(*wanted, *andsearch));
+ }
+ {
+ Blueprint::UP o = ap(MyOr::create()
+ .add(MyLeafSpec(1).addField(1, 11).create())
+ .add(MyLeafSpec(2).addField(2, 22).create()));
+ SearchIterator::UP orsearch = create(*o);
+
+ MySearch *l1 = new MySearch("leaf", true, true);
+ MySearch *l2 = new MySearch("leaf", true, true);
+ l1->addHandle(11);
+ l2->addHandle(22);
+ MySearch *ow = new MySearch("or", false, true);
+ ow->add(l1);
+ ow->add(l2);
+ SearchIterator::UP wanted(ow);
+ TEST_DO(check_equal(*wanted, *orsearch));
+ }
+}
+
+template<typename T>
+Blueprint::UP makeNew(T *orig)
+{
+ return Blueprint::UP(new T(*orig));
+}
+
+void
+Test::testBlueprintMakeNew()
+{
+ Blueprint::UP orig = ap(MyOr::create()
+ .add(MyLeafSpec(1).addField(1, 11).create())
+ .add(MyLeafSpec(2).addField(2, 22).create()));
+ orig->setSourceId(42);
+ MyOr *myOr = dynamic_cast<MyOr*>(orig.get());
+ ASSERT_TRUE(myOr != 0);
+ Blueprint::UP copy1 = makeNew(myOr);
+ Blueprint::UP copy2 = makeNew(myOr);
+ TEST_DO(check_equal(*copy1, *copy2));
+ TEST_DO(check_not_equal(*orig, *copy1));
+ TEST_DO(check_not_equal(*orig, *copy2));
+ EXPECT_TRUE(dynamic_cast<MyOr*>(copy1.get()) != 0);
+ EXPECT_TRUE(dynamic_cast<MyOr*>(copy2.get()) != 0);
+ EXPECT_EQUAL(42u, orig->getSourceId());
+ EXPECT_EQUAL(42u, copy1->getSourceId());
+ EXPECT_EQUAL(2u, orig->getState().numFields());
+ EXPECT_EQUAL(0u, copy1->getState().numFields());
+}
+
+vespalib::string
+getExpectedBlueprint()
+{
+ return "(anonymous namespace)::MyOr {\n"
+ " isTermLike: true\n"
+ " fields: FieldList {\n"
+ " [0]: Field {\n"
+ " fieldId: 5\n"
+ " handle: 7\n"
+ " isFilter: false\n"
+ " }\n"
+ " }\n"
+ " estimate: HitEstimate {\n"
+ " empty: false\n"
+ " estHits: 9\n"
+ " tree_size: 2\n"
+ " allow_termwise_eval: 0\n"
+ " }\n"
+ " sourceId: 4294967295\n"
+ " docid_limit: 0\n"
+ " children: std::vector {\n"
+ " [0]: (anonymous namespace)::MyTerm {\n"
+ " isTermLike: true\n"
+ " fields: FieldList {\n"
+ " [0]: Field {\n"
+ " fieldId: 5\n"
+ " handle: 7\n"
+ " isFilter: false\n"
+ " }\n"
+ " }\n"
+ " estimate: HitEstimate {\n"
+ " empty: false\n"
+ " estHits: 9\n"
+ " tree_size: 1\n"
+ " allow_termwise_eval: 1\n"
+ " }\n"
+ " sourceId: 4294967295\n"
+ " docid_limit: 0\n"
+ " }\n"
+ " }\n"
+ "}\n";
+}
+
+struct BlueprintFixture
+{
+ MyOr _blueprint;
+ BlueprintFixture() : _blueprint() {
+ _blueprint.add(new MyTerm(FieldSpecBaseList().add(FieldSpecBase(5, 7)), 9));
+ }
+};
+
+void
+Test::requireThatAsStringWorks()
+{
+ BlueprintFixture f;
+ EXPECT_EQUAL(getExpectedBlueprint(), f._blueprint.asString());
+}
+
+void
+Test::requireThatVisitMembersWorks()
+{
+ BlueprintFixture f;
+ vespalib::ObjectDumper dumper;
+ visit(dumper, "", &f._blueprint);
+ EXPECT_EQUAL(getExpectedBlueprint(), dumper.toString());
+}
+
+void
+Test::requireThatDocIdLimitInjectionWorks()
+{
+ BlueprintFixture f;
+ ASSERT_GREATER(f._blueprint.childCnt(), 0u);
+ const MyTerm &term = dynamic_cast<MyTerm&>(f._blueprint.getChild(0));
+ EXPECT_EQUAL(0u, term.get_docid_limit());
+ f._blueprint.setDocIdLimit(1000);
+ EXPECT_EQUAL(1000u, term.get_docid_limit());
+}
+
+int
+Test::Main()
+{
+ TEST_DEBUG("lhs.out", "rhs.out");
+ TEST_INIT("blueprint_test");
+ testBlueprintBuilding();
+ testHitEstimateCalculation();
+ testHitEstimatePropagation();
+ testMatchDataPropagation();
+ testChildSorting();
+ testChildAndNotCollapsing();
+ testChildAndCollapsing();
+ testChildOrCollapsing();
+ testSearchCreation();
+ testBlueprintMakeNew();
+ requireThatAsStringWorks();
+ requireThatVisitMembersWorks();
+ requireThatDocIdLimitInjectionWorks();
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
new file mode 100644
index 00000000000..161537104e0
--- /dev/null
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -0,0 +1,1332 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("blueprint_test");
+
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
+#include <vespa/searchlib/queryeval/leaf_blueprints.h>
+#include <vespa/searchlib/queryeval/equiv_blueprint.h>
+#include <vespa/searchlib/queryeval/searchable.h>
+
+#include "mysearch.h"
+
+#include <vespa/searchlib/queryeval/multisearch.h>
+#include <vespa/searchlib/queryeval/andnotsearch.h>
+#include <vespa/searchlib/queryeval/andsearch.h>
+#include <vespa/searchlib/queryeval/orsearch.h>
+#include <vespa/searchlib/queryeval/nearsearch.h>
+#include <vespa/searchlib/queryeval/ranksearch.h>
+#include <vespa/searchlib/queryeval/wand/weak_and_search.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/searchlib/test/diskindex/testdiskindex.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/searchlib/diskindex/zcpostingiterators.h>
+
+#include <algorithm>
+
+using namespace search::queryeval;
+using namespace search::fef;
+using namespace search::query;
+
+struct WeightOrder {
+ bool operator()(const wand::Term &t1, const wand::Term &t2) const {
+ return (t1.weight < t2.weight);
+ }
+};
+
+Blueprint::UP ap(Blueprint *b) { return Blueprint::UP(b); }
+Blueprint::UP ap(Blueprint &b) { return Blueprint::UP(&b); }
+
+TEST("test AndNot Blueprint") {
+ AndNotBlueprint b;
+ { // combine
+ std::vector<Blueprint::HitEstimate> est;
+ EXPECT_EQUAL(true, b.combine(est).empty);
+ EXPECT_EQUAL(0u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(10, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(20, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(5, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ }
+ {
+ AndNotBlueprint a;
+ a.addChild(ap(MyLeafSpec(10).addField(1, 1).create()));
+ EXPECT_EQUAL(0u, a.exposeFields().size());
+ }
+ {
+ std::vector<Blueprint *> children;
+ Blueprint::UP c1 = ap(MyLeafSpec(10).create());
+ Blueprint::UP c2 = ap(MyLeafSpec(20).create());
+ Blueprint::UP c3 = ap(MyLeafSpec(40).create());
+ Blueprint::UP c4 = ap(MyLeafSpec(30).create());
+ children.push_back(c1.get());
+ children.push_back(c2.get());
+ children.push_back(c3.get());
+ children.push_back(c4.get());
+ b.sort(children);
+ EXPECT_EQUAL(c1.get(), children[0]);
+ EXPECT_EQUAL(c3.get(), children[1]);
+ EXPECT_EQUAL(c4.get(), children[2]);
+ EXPECT_EQUAL(c2.get(), children[3]);
+ }
+ {
+ EXPECT_EQUAL(true, b.inheritStrict(0));
+ EXPECT_EQUAL(false, b.inheritStrict(1));
+ EXPECT_EQUAL(false, b.inheritStrict(2));
+ EXPECT_EQUAL(false, b.inheritStrict(-1));
+ }
+ // createSearch tested by iterator unit test
+}
+
+TEST("test And Blueprint") {
+ AndBlueprint b;
+ { // combine
+ std::vector<Blueprint::HitEstimate> est;
+ EXPECT_EQUAL(true, b.combine(est).empty);
+ EXPECT_EQUAL(0u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(10, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(20, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(5, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(5u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(0, true));
+ EXPECT_EQUAL(true, b.combine(est).empty);
+ EXPECT_EQUAL(0u, b.combine(est).estHits);
+ }
+ {
+ AndBlueprint a;
+ a.addChild(ap(MyLeafSpec(10).addField(1, 1).create()));
+ EXPECT_EQUAL(0u, a.exposeFields().size());
+ }
+ {
+ std::vector<Blueprint *> children;
+ Blueprint::UP c1 = ap(MyLeafSpec(20).create());
+ Blueprint::UP c2 = ap(MyLeafSpec(40).create());
+ Blueprint::UP c3 = ap(MyLeafSpec(10).create());
+ Blueprint::UP c4 = ap(MyLeafSpec(30).create());
+ children.push_back(c1.get());
+ children.push_back(c2.get());
+ children.push_back(c3.get());
+ children.push_back(c4.get());
+ b.sort(children);
+ EXPECT_EQUAL(c3.get(), children[0]);
+ EXPECT_EQUAL(c1.get(), children[1]);
+ EXPECT_EQUAL(c4.get(), children[2]);
+ EXPECT_EQUAL(c2.get(), children[3]);
+ }
+ {
+ EXPECT_EQUAL(true, b.inheritStrict(0));
+ EXPECT_EQUAL(false, b.inheritStrict(1));
+ EXPECT_EQUAL(false, b.inheritStrict(2));
+ EXPECT_EQUAL(false, b.inheritStrict(-1));
+ }
+ // createSearch tested by iterator unit test
+}
+
+TEST("test Or Blueprint") {
+ OrBlueprint b;
+ { // combine
+ std::vector<Blueprint::HitEstimate> est;
+ EXPECT_EQUAL(true, b.combine(est).empty);
+ EXPECT_EQUAL(0u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(10, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(20, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(20u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(5, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(20u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(0, true));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(20u, b.combine(est).estHits);
+ }
+ {
+ OrBlueprint &o = *(new OrBlueprint());
+ o.addChild(ap(MyLeafSpec(1).addField(1, 1).create()));
+ o.addChild(ap(MyLeafSpec(2).addField(2, 2).create()));
+
+ Blueprint::UP a(&o);
+ ASSERT_TRUE(a->getState().numFields() == 2);
+ EXPECT_EQUAL(1u, a->getState().field(0).getFieldId());
+ EXPECT_EQUAL(2u, a->getState().field(1).getFieldId());
+ EXPECT_EQUAL(1u, a->getState().field(0).getHandle());
+ EXPECT_EQUAL(2u, a->getState().field(1).getHandle());
+ EXPECT_EQUAL(2u, a->getState().estimate().estHits);
+
+ o.addChild(ap(MyLeafSpec(5).addField(2, 2).create()));
+ ASSERT_TRUE(a->getState().numFields() == 2);
+ EXPECT_EQUAL(1u, a->getState().field(0).getFieldId());
+ EXPECT_EQUAL(2u, a->getState().field(1).getFieldId());
+ EXPECT_EQUAL(1u, a->getState().field(0).getHandle());
+ EXPECT_EQUAL(2u, a->getState().field(1).getHandle());
+ EXPECT_EQUAL(5u, a->getState().estimate().estHits);
+
+ o.addChild(ap(MyLeafSpec(5).addField(2, 3).create()));
+ EXPECT_EQUAL(0u, a->getState().numFields());
+ o.removeChild(3);
+ EXPECT_EQUAL(2u, a->getState().numFields());
+ o.addChild(ap(MyLeafSpec(0, true).create()));
+ EXPECT_EQUAL(0u, a->getState().numFields());
+ }
+ {
+ std::vector<Blueprint *> children;
+ Blueprint::UP c1 = ap(MyLeafSpec(10).create());
+ Blueprint::UP c2 = ap(MyLeafSpec(20).create());
+ Blueprint::UP c3 = ap(MyLeafSpec(40).create());
+ Blueprint::UP c4 = ap(MyLeafSpec(30).create());
+ children.push_back(c1.get());
+ children.push_back(c2.get());
+ children.push_back(c3.get());
+ children.push_back(c4.get());
+ b.sort(children);
+ EXPECT_EQUAL(c3.get(), children[0]);
+ EXPECT_EQUAL(c4.get(), children[1]);
+ EXPECT_EQUAL(c2.get(), children[2]);
+ EXPECT_EQUAL(c1.get(), children[3]);
+ }
+ {
+ EXPECT_EQUAL(true, b.inheritStrict(0));
+ EXPECT_EQUAL(true, b.inheritStrict(1));
+ EXPECT_EQUAL(true, b.inheritStrict(2));
+ EXPECT_EQUAL(true, b.inheritStrict(-1));
+ }
+ // createSearch tested by iterator unit test
+}
+
+TEST("test Near Blueprint") {
+ NearBlueprint b(7);
+ { // combine
+ std::vector<Blueprint::HitEstimate> est;
+ EXPECT_EQUAL(true, b.combine(est).empty);
+ EXPECT_EQUAL(0u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(10, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(20, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(5, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(5u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(0, true));
+ EXPECT_EQUAL(true, b.combine(est).empty);
+ EXPECT_EQUAL(0u, b.combine(est).estHits);
+ }
+ {
+ NearBlueprint a(7);
+ a.addChild(ap(MyLeafSpec(10).addField(1, 1).create()));
+ EXPECT_EQUAL(0u, a.exposeFields().size());
+ }
+ {
+ std::vector<Blueprint *> children;
+ Blueprint::UP c1 = ap(MyLeafSpec(40).create());
+ Blueprint::UP c2 = ap(MyLeafSpec(10).create());
+ Blueprint::UP c3 = ap(MyLeafSpec(30).create());
+ Blueprint::UP c4 = ap(MyLeafSpec(20).create());
+ children.push_back(c1.get());
+ children.push_back(c2.get());
+ children.push_back(c3.get());
+ children.push_back(c4.get());
+ b.sort(children);
+ EXPECT_EQUAL(c2.get(), children[0]);
+ EXPECT_EQUAL(c4.get(), children[1]);
+ EXPECT_EQUAL(c3.get(), children[2]);
+ EXPECT_EQUAL(c1.get(), children[3]);
+ }
+ {
+ EXPECT_EQUAL(true, b.inheritStrict(0));
+ EXPECT_EQUAL(false, b.inheritStrict(1));
+ EXPECT_EQUAL(false, b.inheritStrict(2));
+ EXPECT_EQUAL(false, b.inheritStrict(-1));
+ }
+ // createSearch tested by iterator unit test
+}
+
+TEST("test ONear Blueprint") {
+ ONearBlueprint b(8);
+ { // combine
+ std::vector<Blueprint::HitEstimate> est;
+ EXPECT_EQUAL(true, b.combine(est).empty);
+ EXPECT_EQUAL(0u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(10, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(20, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(5, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(5u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(0, true));
+ EXPECT_EQUAL(true, b.combine(est).empty);
+ EXPECT_EQUAL(0u, b.combine(est).estHits);
+ }
+ {
+ ONearBlueprint a(8);
+ a.addChild(ap(MyLeafSpec(10).addField(1, 1).create()));
+ EXPECT_EQUAL(0u, a.exposeFields().size());
+ }
+ {
+ std::vector<Blueprint *> children;
+ Blueprint::UP c1 = ap(MyLeafSpec(20).create());
+ Blueprint::UP c2 = ap(MyLeafSpec(10).create());
+ Blueprint::UP c3 = ap(MyLeafSpec(40).create());
+ Blueprint::UP c4 = ap(MyLeafSpec(30).create());
+ children.push_back(c1.get());
+ children.push_back(c2.get());
+ children.push_back(c3.get());
+ children.push_back(c4.get());
+ b.sort(children);
+ EXPECT_EQUAL(c1.get(), children[0]);
+ EXPECT_EQUAL(c2.get(), children[1]);
+ EXPECT_EQUAL(c3.get(), children[2]);
+ EXPECT_EQUAL(c4.get(), children[3]);
+ }
+ {
+ EXPECT_EQUAL(true, b.inheritStrict(0));
+ EXPECT_EQUAL(false, b.inheritStrict(1));
+ EXPECT_EQUAL(false, b.inheritStrict(2));
+ EXPECT_EQUAL(false, b.inheritStrict(-1));
+ }
+ // createSearch tested by iterator unit test
+}
+
+TEST("test Rank Blueprint") {
+ RankBlueprint b;
+ { // combine
+ std::vector<Blueprint::HitEstimate> est;
+ EXPECT_EQUAL(true, b.combine(est).empty);
+ EXPECT_EQUAL(0u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(10, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(20, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(5, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(0, true));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ }
+ {
+ RankBlueprint a;
+ a.addChild(ap(MyLeafSpec(10).addField(1, 1).create()));
+ EXPECT_EQUAL(0u, a.exposeFields().size());
+ }
+ {
+ std::vector<Blueprint *> children;
+ Blueprint::UP c1 = ap(MyLeafSpec(20).create());
+ Blueprint::UP c2 = ap(MyLeafSpec(10).create());
+ Blueprint::UP c3 = ap(MyLeafSpec(40).create());
+ Blueprint::UP c4 = ap(MyLeafSpec(30).create());
+ children.push_back(c1.get());
+ children.push_back(c2.get());
+ children.push_back(c3.get());
+ children.push_back(c4.get());
+ b.sort(children);
+ EXPECT_EQUAL(c1.get(), children[0]);
+ EXPECT_EQUAL(c2.get(), children[1]);
+ EXPECT_EQUAL(c3.get(), children[2]);
+ EXPECT_EQUAL(c4.get(), children[3]);
+ }
+ {
+ EXPECT_EQUAL(true, b.inheritStrict(0));
+ EXPECT_EQUAL(false, b.inheritStrict(1));
+ EXPECT_EQUAL(false, b.inheritStrict(2));
+ EXPECT_EQUAL(false, b.inheritStrict(-1));
+ }
+ // createSearch tested by iterator unit test
+}
+
+TEST("test SourceBlender Blueprint") {
+ ISourceSelector *selector = 0; // not needed here
+ SourceBlenderBlueprint b(*selector);
+ { // combine
+ std::vector<Blueprint::HitEstimate> est;
+ EXPECT_EQUAL(true, b.combine(est).empty);
+ EXPECT_EQUAL(0u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(10, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(20, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(20u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(5, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(20u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(0, true));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(20u, b.combine(est).estHits);
+ }
+ {
+ SourceBlenderBlueprint &o = *(new SourceBlenderBlueprint(*selector));
+ o.addChild(ap(MyLeafSpec(1).addField(1, 1).create()));
+ o.addChild(ap(MyLeafSpec(2).addField(2, 2).create()));
+
+ Blueprint::UP a(&o);
+ ASSERT_TRUE(a->getState().numFields() == 2);
+ EXPECT_EQUAL(1u, a->getState().field(0).getFieldId());
+ EXPECT_EQUAL(2u, a->getState().field(1).getFieldId());
+ EXPECT_EQUAL(1u, a->getState().field(0).getHandle());
+ EXPECT_EQUAL(2u, a->getState().field(1).getHandle());
+ EXPECT_EQUAL(2u, a->getState().estimate().estHits);
+
+ o.addChild(ap(MyLeafSpec(5).addField(2, 2).create()));
+ ASSERT_TRUE(a->getState().numFields() == 2);
+ EXPECT_EQUAL(1u, a->getState().field(0).getFieldId());
+ EXPECT_EQUAL(2u, a->getState().field(1).getFieldId());
+ EXPECT_EQUAL(1u, a->getState().field(0).getHandle());
+ EXPECT_EQUAL(2u, a->getState().field(1).getHandle());
+ EXPECT_EQUAL(5u, a->getState().estimate().estHits);
+
+ o.addChild(ap(MyLeafSpec(5).addField(2, 3).create()));
+ EXPECT_EQUAL(0u, a->getState().numFields());
+ o.removeChild(3);
+ EXPECT_EQUAL(2u, a->getState().numFields());
+ o.addChild(ap(MyLeafSpec(0, true).create()));
+ EXPECT_EQUAL(0u, a->getState().numFields());
+ }
+ {
+ std::vector<Blueprint *> children;
+ Blueprint::UP c1 = ap(MyLeafSpec(20).create());
+ Blueprint::UP c2 = ap(MyLeafSpec(10).create());
+ Blueprint::UP c3 = ap(MyLeafSpec(40).create());
+ Blueprint::UP c4 = ap(MyLeafSpec(30).create());
+ children.push_back(c1.get());
+ children.push_back(c2.get());
+ children.push_back(c3.get());
+ children.push_back(c4.get());
+ b.sort(children);
+ EXPECT_EQUAL(c1.get(), children[0]);
+ EXPECT_EQUAL(c2.get(), children[1]);
+ EXPECT_EQUAL(c3.get(), children[2]);
+ EXPECT_EQUAL(c4.get(), children[3]);
+ }
+ {
+ EXPECT_EQUAL(true, b.inheritStrict(0));
+ EXPECT_EQUAL(true, b.inheritStrict(1));
+ EXPECT_EQUAL(true, b.inheritStrict(2));
+ EXPECT_EQUAL(true, b.inheritStrict(-1));
+ }
+ // createSearch tested by iterator unit test
+}
+
+TEST("test SourceBlender below AND optimization") {
+ ISourceSelector *selector_1 = 0; // the one
+ ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one
+ //-------------------------------------------------------------------------
+ AndBlueprint *top = new AndBlueprint();
+ Blueprint::UP top_bp(top);
+ top->addChild(ap(MyLeafSpec(2).create()));
+ top->addChild(ap(MyLeafSpec(1).create()));
+ top->addChild(ap(MyLeafSpec(3).create()));
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(200).create()->setSourceId(2)));
+ blender->addChild(ap(MyLeafSpec(100).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(300).create()->setSourceId(3)));
+ top->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(30).create()->setSourceId(3)));
+ top->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2);
+ blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ top->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2)));
+ blender->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1)));
+ top->addChild(ap(blender));
+ }
+ //-------------------------------------------------------------------------
+ AndBlueprint *expect = new AndBlueprint();
+ Blueprint::UP expect_bp(expect);
+ expect->addChild(ap(MyLeafSpec(1).create()));
+ expect->addChild(ap(MyLeafSpec(2).create()));
+ expect->addChild(ap(MyLeafSpec(3).create()));
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2);
+ blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ expect->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender(new SourceBlenderBlueprint(*selector_1));
+ {
+ AndBlueprint *sub_and = new AndBlueprint();
+ sub_and->setSourceId(3);
+ sub_and->addChild(ap(MyLeafSpec(30).create()->setSourceId(3)));
+ sub_and->addChild(ap(MyLeafSpec(300).create()->setSourceId(3)));
+ blender->addChild(ap(sub_and));
+ }
+ {
+ AndBlueprint *sub_and = new AndBlueprint();
+ sub_and->setSourceId(2);
+ sub_and->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ sub_and->addChild(ap(MyLeafSpec(200).create()->setSourceId(2)));
+ sub_and->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2)));
+ blender->addChild(ap(sub_and));
+ }
+ {
+ AndBlueprint *sub_and = new AndBlueprint();
+ sub_and->setSourceId(1);
+ sub_and->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ sub_and->addChild(ap(MyLeafSpec(100).create()->setSourceId(1)));
+ sub_and->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1)));
+ blender->addChild(ap(sub_and));
+ }
+ expect->addChild(ap(blender));
+ }
+ //-------------------------------------------------------------------------
+ EXPECT_NOT_EQUAL(expect_bp->asString(), top_bp->asString());
+ top_bp = Blueprint::optimize(std::move(top_bp));
+ EXPECT_EQUAL(expect_bp->asString(), top_bp->asString());
+ expect_bp = Blueprint::optimize(std::move(expect_bp));
+ EXPECT_EQUAL(expect_bp->asString(), top_bp->asString());
+}
+
+TEST("test SourceBlender below OR optimization") {
+ ISourceSelector *selector_1 = 0; // the one
+ ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one
+ //-------------------------------------------------------------------------
+ OrBlueprint *top = new OrBlueprint();
+ Blueprint::UP top_up(top);
+ top->addChild(ap(MyLeafSpec(2).create()));
+ top->addChild(ap(MyLeafSpec(1).create()));
+ top->addChild(ap(MyLeafSpec(3).create()));
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(200).create()->setSourceId(2)));
+ blender->addChild(ap(MyLeafSpec(100).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(300).create()->setSourceId(3)));
+ top->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(30).create()->setSourceId(3)));
+ top->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2);
+ blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ top->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2)));
+ blender->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1)));
+ top->addChild(ap(blender));
+ }
+ //-------------------------------------------------------------------------
+ OrBlueprint *expect = new OrBlueprint();
+ Blueprint::UP expect_up(expect);
+ {
+ SourceBlenderBlueprint *blender(new SourceBlenderBlueprint(*selector_1));
+ {
+ OrBlueprint *sub_and = new OrBlueprint();
+ sub_and->setSourceId(3);
+ sub_and->addChild(ap(MyLeafSpec(300).create()->setSourceId(3)));
+ sub_and->addChild(ap(MyLeafSpec(30).create()->setSourceId(3)));
+ blender->addChild(ap(sub_and));
+ }
+ {
+ OrBlueprint *sub_and = new OrBlueprint();
+ sub_and->setSourceId(2);
+ sub_and->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2)));
+ sub_and->addChild(ap(MyLeafSpec(200).create()->setSourceId(2)));
+ sub_and->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ blender->addChild(ap(sub_and));
+ }
+ {
+ OrBlueprint *sub_and = new OrBlueprint();
+ sub_and->setSourceId(1);
+ sub_and->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1)));
+ sub_and->addChild(ap(MyLeafSpec(100).create()->setSourceId(1)));
+ sub_and->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ blender->addChild(ap(sub_and));
+ }
+ expect->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2);
+ blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ expect->addChild(ap(blender));
+ }
+ expect->addChild(ap(MyLeafSpec(3).create()));
+ expect->addChild(ap(MyLeafSpec(2).create()));
+ expect->addChild(ap(MyLeafSpec(1).create()));
+ //-------------------------------------------------------------------------
+ EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString());
+ top_up = Blueprint::optimize(std::move(top_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+ expect_up = Blueprint::optimize(std::move(expect_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+}
+
+TEST("test SourceBlender below AND_NOT optimization") {
+ ISourceSelector *selector_1 = 0; // the one
+ ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one
+ //-------------------------------------------------------------------------
+ AndNotBlueprint *top = new AndNotBlueprint();
+ Blueprint::UP top_up(top);
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(42).create()->setSourceId(1)));
+ top->addChild(ap(blender));
+ }
+ top->addChild(ap(MyLeafSpec(2).create()));
+ top->addChild(ap(MyLeafSpec(1).create()));
+ top->addChild(ap(MyLeafSpec(3).create()));
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(200).create()->setSourceId(2)));
+ blender->addChild(ap(MyLeafSpec(100).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(300).create()->setSourceId(3)));
+ top->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(30).create()->setSourceId(3)));
+ top->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2);
+ blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ top->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2)));
+ blender->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1)));
+ top->addChild(ap(blender));
+ }
+ //-------------------------------------------------------------------------
+ AndNotBlueprint *expect = new AndNotBlueprint();
+ Blueprint::UP expect_up(expect);
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(42).create()->setSourceId(1)));
+ expect->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender(new SourceBlenderBlueprint(*selector_1));
+ {
+ OrBlueprint *sub_and = new OrBlueprint();
+ sub_and->setSourceId(3);
+ sub_and->addChild(ap(MyLeafSpec(300).create()->setSourceId(3)));
+ sub_and->addChild(ap(MyLeafSpec(30).create()->setSourceId(3)));
+ blender->addChild(ap(sub_and));
+ }
+ {
+ OrBlueprint *sub_and = new OrBlueprint();
+ sub_and->setSourceId(2);
+ sub_and->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2)));
+ sub_and->addChild(ap(MyLeafSpec(200).create()->setSourceId(2)));
+ sub_and->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ blender->addChild(ap(sub_and));
+ }
+ {
+ OrBlueprint *sub_and = new OrBlueprint();
+ sub_and->setSourceId(1);
+ sub_and->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1)));
+ sub_and->addChild(ap(MyLeafSpec(100).create()->setSourceId(1)));
+ sub_and->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ blender->addChild(ap(sub_and));
+ }
+ expect->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2);
+ blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ expect->addChild(ap(blender));
+ }
+ expect->addChild(ap(MyLeafSpec(3).create()));
+ expect->addChild(ap(MyLeafSpec(2).create()));
+ expect->addChild(ap(MyLeafSpec(1).create()));
+ //-------------------------------------------------------------------------
+ EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString());
+ top_up = Blueprint::optimize(std::move(top_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+ expect_up = Blueprint::optimize(std::move(expect_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+}
+
+TEST("test SourceBlender below RANK optimization") {
+ ISourceSelector *selector_1 = 0; // the one
+ ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one
+ //-------------------------------------------------------------------------
+ RankBlueprint *top = new RankBlueprint();
+ Blueprint::UP top_up(top);
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(42).create()->setSourceId(1)));
+ top->addChild(ap(blender));
+ }
+ top->addChild(ap(MyLeafSpec(2).create()));
+ top->addChild(ap(MyLeafSpec(1).create()));
+ top->addChild(ap(MyLeafSpec(3).create()));
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(200).create()->setSourceId(2)));
+ blender->addChild(ap(MyLeafSpec(100).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(300).create()->setSourceId(3)));
+ top->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(30).create()->setSourceId(3)));
+ top->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2);
+ blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ top->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2)));
+ blender->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1)));
+ top->addChild(ap(blender));
+ }
+ //-------------------------------------------------------------------------
+ RankBlueprint *expect = new RankBlueprint();
+ Blueprint::UP expect_up(expect);
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1);
+ blender->addChild(ap(MyLeafSpec(42).create()->setSourceId(1)));
+ expect->addChild(ap(blender));
+ }
+ expect->addChild(ap(MyLeafSpec(2).create()));
+ expect->addChild(ap(MyLeafSpec(1).create()));
+ expect->addChild(ap(MyLeafSpec(3).create()));
+ {
+ SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2);
+ blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ expect->addChild(ap(blender));
+ }
+ {
+ SourceBlenderBlueprint *blender(new SourceBlenderBlueprint(*selector_1));
+ {
+ OrBlueprint *sub_and = new OrBlueprint();
+ sub_and->setSourceId(3);
+ sub_and->addChild(ap(MyLeafSpec(300).create()->setSourceId(3)));
+ sub_and->addChild(ap(MyLeafSpec(30).create()->setSourceId(3)));
+ blender->addChild(ap(sub_and));
+ }
+ {
+ OrBlueprint *sub_and = new OrBlueprint();
+ sub_and->setSourceId(2);
+ sub_and->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2)));
+ sub_and->addChild(ap(MyLeafSpec(200).create()->setSourceId(2)));
+ sub_and->addChild(ap(MyLeafSpec(20).create()->setSourceId(2)));
+ blender->addChild(ap(sub_and));
+ }
+ {
+ OrBlueprint *sub_and = new OrBlueprint();
+ sub_and->setSourceId(1);
+ sub_and->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1)));
+ sub_and->addChild(ap(MyLeafSpec(100).create()->setSourceId(1)));
+ sub_and->addChild(ap(MyLeafSpec(10).create()->setSourceId(1)));
+ blender->addChild(ap(sub_and));
+ }
+ expect->addChild(ap(blender));
+ }
+ //-------------------------------------------------------------------------
+ EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString());
+ top_up = Blueprint::optimize(std::move(top_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+ expect_up = Blueprint::optimize(std::move(expect_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+}
+
+TEST("test empty root node optimization and safeness") {
+ //-------------------------------------------------------------------------
+ // tests leaf node elimination
+ Blueprint::UP top1_up(ap(MyLeafSpec(0, true).create()));
+ //-------------------------------------------------------------------------
+ // tests intermediate node elimination
+ Blueprint::UP top2_up(ap((new AndBlueprint())->
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(10).create())).
+ addChild(ap(MyLeafSpec(20).create()))));
+ //-------------------------------------------------------------------------
+ // tests safety of empty AND_NOT child removal
+ Blueprint::UP top3_up(ap((new AndNotBlueprint())->
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(10).create())).
+ addChild(ap(MyLeafSpec(20).create()))));
+ //-------------------------------------------------------------------------
+ // tests safety of empty RANK child removal
+ Blueprint::UP top4_up(ap((new RankBlueprint())->
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(10).create())).
+ addChild(ap(MyLeafSpec(20).create()))));
+ //-------------------------------------------------------------------------
+ // tests safety of empty OR child removal
+ Blueprint::UP top5_up(ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(0, true).create()))));
+ //-------------------------------------------------------------------------
+ Blueprint::UP expect_up(new EmptyBlueprint());
+ //-------------------------------------------------------------------------
+ top1_up = Blueprint::optimize(std::move(top1_up));
+ top2_up = Blueprint::optimize(std::move(top2_up));
+ top3_up = Blueprint::optimize(std::move(top3_up));
+ top4_up = Blueprint::optimize(std::move(top4_up));
+ top5_up = Blueprint::optimize(std::move(top5_up));
+ EXPECT_EQUAL(expect_up->asString(), top1_up->asString());
+ EXPECT_EQUAL(expect_up->asString(), top2_up->asString());
+ EXPECT_EQUAL(expect_up->asString(), top3_up->asString());
+ EXPECT_EQUAL(expect_up->asString(), top4_up->asString());
+ EXPECT_EQUAL(expect_up->asString(), top5_up->asString());
+}
+
+TEST("and with one empty child is optimized away") {
+ ISourceSelector *selector = 0;
+ Blueprint::UP top(ap((new SourceBlenderBlueprint(*selector))->
+ addChild(ap(MyLeafSpec(10).create())).
+ addChild(ap((new AndBlueprint())->
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(10).create())).
+ addChild(ap(MyLeafSpec(20).create()))))));
+ top = Blueprint::optimize(std::move(top));
+ Blueprint::UP expect_up(ap((new SourceBlenderBlueprint(*selector))->
+ addChild(ap(MyLeafSpec(10).create())).
+ addChild(ap(new EmptyBlueprint()))));
+ EXPECT_EQUAL(expect_up->asString(), top->asString());
+}
+
+TEST("test single child optimization") {
+ ISourceSelector *selector = 0;
+ //-------------------------------------------------------------------------
+ Blueprint::UP top_up(
+ ap((new AndNotBlueprint())->
+ addChild(ap((new AndBlueprint())->
+ addChild(ap((new OrBlueprint())->
+ addChild(ap((new SourceBlenderBlueprint(*selector))->
+ addChild(ap((new RankBlueprint())->
+ addChild(ap(MyLeafSpec(42).create()))))))))))));
+ //-------------------------------------------------------------------------
+ Blueprint::UP expect_up(
+ ap((new SourceBlenderBlueprint(*selector))->
+ addChild(ap(MyLeafSpec(42).create()))));
+ //-------------------------------------------------------------------------
+ EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString());
+ top_up = Blueprint::optimize(std::move(top_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+ expect_up = Blueprint::optimize(std::move(expect_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+}
+
+TEST("test empty OR child optimization") {
+ //-------------------------------------------------------------------------
+ Blueprint::UP top_up(
+ ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(20).create())).
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(10).create())).
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(0).create())).
+ addChild(ap(MyLeafSpec(30).create())).
+ addChild(ap(MyLeafSpec(0, true).create()))));
+ //-------------------------------------------------------------------------
+ Blueprint::UP expect_up(
+ ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(30).create())).
+ addChild(ap(MyLeafSpec(20).create())).
+ addChild(ap(MyLeafSpec(10).create())).
+ addChild(ap(MyLeafSpec(0).create()))));
+ //-------------------------------------------------------------------------
+ EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString());
+ top_up = Blueprint::optimize(std::move(top_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+ expect_up = Blueprint::optimize(std::move(expect_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+}
+
+TEST("test empty AND_NOT child optimization") {
+ //-------------------------------------------------------------------------
+ Blueprint::UP top_up(
+ ap((new AndNotBlueprint())->
+ addChild(ap(MyLeafSpec(42).create())).
+ addChild(ap(MyLeafSpec(20).create())).
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(10).create())).
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(0).create())).
+ addChild(ap(MyLeafSpec(30).create())).
+ addChild(ap(MyLeafSpec(0, true).create()))));
+ //-------------------------------------------------------------------------
+ Blueprint::UP expect_up(
+ ap((new AndNotBlueprint())->
+ addChild(ap(MyLeafSpec(42).create())).
+ addChild(ap(MyLeafSpec(30).create())).
+ addChild(ap(MyLeafSpec(20).create())).
+ addChild(ap(MyLeafSpec(10).create())).
+ addChild(ap(MyLeafSpec(0).create()))));
+ //-------------------------------------------------------------------------
+ EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString());
+ top_up = Blueprint::optimize(std::move(top_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+ expect_up = Blueprint::optimize(std::move(expect_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+}
+
+TEST("test empty RANK child optimization") {
+ //-------------------------------------------------------------------------
+ Blueprint::UP top_up(
+ ap((new RankBlueprint())->
+ addChild(ap(MyLeafSpec(42).create())).
+ addChild(ap(MyLeafSpec(20).create())).
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(10).create())).
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(0).create())).
+ addChild(ap(MyLeafSpec(30).create())).
+ addChild(ap(MyLeafSpec(0, true).create()))));
+ //-------------------------------------------------------------------------
+ Blueprint::UP expect_up(
+ ap((new RankBlueprint())->
+ addChild(ap(MyLeafSpec(42).create())).
+ addChild(ap(MyLeafSpec(20).create())).
+ addChild(ap(MyLeafSpec(10).create())).
+ addChild(ap(MyLeafSpec(0).create())).
+ addChild(ap(MyLeafSpec(30).create()))));
+ //-------------------------------------------------------------------------
+ EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString());
+ top_up = Blueprint::optimize(std::move(top_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+ expect_up = Blueprint::optimize(std::move(expect_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+}
+
+TEST("require that replaced blueprints retain source id") {
+ //-------------------------------------------------------------------------
+ // replace empty root with empty search
+ Blueprint::UP top1_up(ap(MyLeafSpec(0, true).create()->setSourceId(13)));
+ Blueprint::UP expect1_up(new EmptyBlueprint());
+ expect1_up->setSourceId(13);
+ //-------------------------------------------------------------------------
+ // replace self with single child
+ Blueprint::UP top2_up(ap(static_cast<AndBlueprint&>((new AndBlueprint())->setSourceId(42)).
+ addChild(ap(MyLeafSpec(30).create()->setSourceId(55)))));
+ Blueprint::UP expect2_up(ap(MyLeafSpec(30).create()->setSourceId(42)));
+ //-------------------------------------------------------------------------
+ top1_up = Blueprint::optimize(std::move(top1_up));
+ top2_up = Blueprint::optimize(std::move(top2_up));
+ EXPECT_EQUAL(expect1_up->asString(), top1_up->asString());
+ EXPECT_EQUAL(expect2_up->asString(), top2_up->asString());
+ EXPECT_EQUAL(13u, top1_up->getSourceId());
+ EXPECT_EQUAL(42u, top2_up->getSourceId());
+}
+
+TEST("test Equiv Blueprint") {
+ FieldSpecBaseList fields;
+ search::fef::MatchDataLayout subLayout;
+ fields.add(FieldSpecBase(1, 1));
+ fields.add(FieldSpecBase(2, 2));
+ fields.add(FieldSpecBase(3, 3));
+ EquivBlueprint b(fields, subLayout);
+ {
+ EquivBlueprint &o = *(new EquivBlueprint(fields, subLayout));
+ o.addTerm(ap(MyLeafSpec(5).addField(1, 4).create()), 1.0);
+ o.addTerm(ap(MyLeafSpec(10).addField(1, 5).create()), 1.0);
+ o.addTerm(ap(MyLeafSpec(20).addField(1, 6).create()), 1.0);
+ o.addTerm(ap(MyLeafSpec(50).addField(2, 7).create()), 1.0);
+
+ Blueprint::UP a(&o);
+ ASSERT_TRUE(a->getState().numFields() == 3);
+ EXPECT_EQUAL(1u, a->getState().field(0).getFieldId());
+ EXPECT_EQUAL(2u, a->getState().field(1).getFieldId());
+ EXPECT_EQUAL(3u, a->getState().field(2).getFieldId());
+
+ EXPECT_EQUAL(1u, a->getState().field(0).getHandle());
+ EXPECT_EQUAL(2u, a->getState().field(1).getHandle());
+ EXPECT_EQUAL(3u, a->getState().field(2).getHandle());
+
+ EXPECT_EQUAL(50u, a->getState().estimate().estHits);
+ EXPECT_EQUAL(false, a->getState().estimate().empty);
+ }
+ // createSearch tested by iterator unit test
+}
+
+
+TEST("test WeakAnd Blueprint") {
+ WeakAndBlueprint b(1000);
+ { // combine
+ std::vector<Blueprint::HitEstimate> est;
+ EXPECT_EQUAL(true, b.combine(est).empty);
+ EXPECT_EQUAL(0u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(10, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(10u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(20, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(20u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(5, false));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(20u, b.combine(est).estHits);
+ est.push_back(Blueprint::HitEstimate(0, true));
+ EXPECT_EQUAL(false, b.combine(est).empty);
+ EXPECT_EQUAL(20u, b.combine(est).estHits);
+ }
+ {
+ WeakAndBlueprint a(1000);
+ a.addChild(ap(MyLeafSpec(10).addField(1, 1).create()));
+ EXPECT_EQUAL(0u, a.exposeFields().size());
+ }
+ {
+ std::vector<Blueprint *> children;
+ Blueprint::UP c1 = ap(MyLeafSpec(10).create());
+ Blueprint::UP c2 = ap(MyLeafSpec(20).create());
+ Blueprint::UP c3 = ap(MyLeafSpec(40).create());
+ Blueprint::UP c4 = ap(MyLeafSpec(30).create());
+ children.push_back(c1.get());
+ children.push_back(c2.get());
+ children.push_back(c3.get());
+ children.push_back(c4.get());
+ b.sort(children);
+ EXPECT_EQUAL(c1.get(), children[0]);
+ EXPECT_EQUAL(c2.get(), children[1]);
+ EXPECT_EQUAL(c3.get(), children[2]);
+ EXPECT_EQUAL(c4.get(), children[3]);
+ }
+ {
+ EXPECT_EQUAL(true, b.inheritStrict(0));
+ EXPECT_EQUAL(true, b.inheritStrict(1));
+ EXPECT_EQUAL(true, b.inheritStrict(2));
+ EXPECT_EQUAL(true, b.inheritStrict(-1));
+ }
+ {
+ FieldSpec field("foo", 1, 1);
+ FakeResult x = FakeResult().doc(1).doc(2).doc(5);
+ FakeResult y = FakeResult().doc(2);
+ FakeResult z = FakeResult().doc(1).doc(4);
+ {
+ WeakAndBlueprint wa(456);
+ MatchData::UP md = MatchData::makeTestInstance(0, 100, 10);
+ wa.addTerm(Blueprint::UP(new FakeBlueprint(field, x)), 120);
+ wa.addTerm(Blueprint::UP(new FakeBlueprint(field, z)), 140);
+ wa.addTerm(Blueprint::UP(new FakeBlueprint(field, y)), 130);
+ {
+ wa.fetchPostings(true);
+ SearchIterator::UP search = wa.createSearch(*md, true);
+ EXPECT_TRUE(dynamic_cast<WeakAndSearch*>(search.get()) != 0);
+ WeakAndSearch &s = dynamic_cast<WeakAndSearch&>(*search);
+ EXPECT_EQUAL(456u, s.getN());
+ ASSERT_EQUAL(3u, s.getTerms().size());
+ EXPECT_GREATER(s.get_max_score(0), 0.0);
+ EXPECT_GREATER(s.get_max_score(1), 0.0);
+ EXPECT_GREATER(s.get_max_score(2), 0.0);
+ wand::Terms terms = s.getTerms();
+ std::sort(terms.begin(), terms.end(), WeightOrder());
+ EXPECT_EQUAL(120, terms[0].weight);
+ EXPECT_EQUAL(3u, terms[0].estHits);
+ EXPECT_EQUAL(0u, terms[0].maxScore); // NB: not set
+ EXPECT_EQUAL(130, terms[1].weight);
+ EXPECT_EQUAL(1u, terms[1].estHits);
+ EXPECT_EQUAL(0u, terms[1].maxScore); // NB: not set
+ EXPECT_EQUAL(140, terms[2].weight);
+ EXPECT_EQUAL(2u, terms[2].estHits);
+ EXPECT_EQUAL(0u, terms[2].maxScore); // NB: not set
+ }
+ {
+ wa.fetchPostings(false);
+ SearchIterator::UP search = wa.createSearch(*md, false);
+ EXPECT_TRUE(dynamic_cast<WeakAndSearch*>(search.get()) != 0);
+ EXPECT_TRUE(search->seek(1));
+ EXPECT_TRUE(search->seek(2));
+ EXPECT_FALSE(search->seek(3));
+ EXPECT_TRUE(search->seek(4));
+ EXPECT_TRUE(search->seek(5));
+ EXPECT_FALSE(search->seek(6));
+ }
+ }
+ }
+}
+
+TEST("require_that_unpack_of_or_over_multisearch_is_optimized") {
+ Blueprint::UP child1(
+ ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(20).addField(1,1).create())).
+ addChild(ap(MyLeafSpec(20).addField(2,2).create())).
+ addChild(ap(MyLeafSpec(10).addField(3,3).create()))));
+ Blueprint::UP child2(
+ ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(20).addField(4,4).create())).
+ addChild(ap(MyLeafSpec(20).addField(5,5).create())).
+ addChild(ap(MyLeafSpec(10).addField(6,6).create()))));
+ Blueprint::UP top_up(
+ ap((new OrBlueprint())->
+ addChild(std::move(child1)).
+ addChild(std::move(child2))));
+ MatchData::UP md = MatchData::makeTestInstance(0, 100, 10);
+ top_up->fetchPostings(false);
+ EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::(anonymous namespace)::FullUnpack>",
+ top_up->createSearch(*md, false)->getClassName());
+ md->resolveTermField(2)->tagAsNotNeeded();
+ EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::(anonymous namespace)::FullUnpack>",
+ top_up->createSearch(*md, false)->getClassName());
+ md->resolveTermField(1)->tagAsNotNeeded();
+ md->resolveTermField(3)->tagAsNotNeeded();
+ EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::(anonymous namespace)::SelectiveUnpack>",
+ top_up->createSearch(*md, false)->getClassName());
+ md->resolveTermField(4)->tagAsNotNeeded();
+ md->resolveTermField(6)->tagAsNotNeeded();
+ EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::(anonymous namespace)::SelectiveUnpack>",
+ top_up->createSearch(*md, false)->getClassName());
+ md->resolveTermField(5)->tagAsNotNeeded();
+ EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::NoUnpack>",
+ top_up->createSearch(*md, false)->getClassName());
+}
+
+TEST("require_that_unpack_of_or_is_optimized") {
+ Blueprint::UP top_up(
+ ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(20).addField(1,1).create())).
+ addChild(ap(MyLeafSpec(20).addField(2,2).create())).
+ addChild(ap(MyLeafSpec(10).addField(3,3).create()))));
+ MatchData::UP md = MatchData::makeTestInstance(0, 100, 10);
+ top_up->fetchPostings(false);
+ EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::(anonymous namespace)::FullUnpack>",
+ top_up->createSearch(*md, false)->getClassName());
+ md->resolveTermField(2)->tagAsNotNeeded();
+ EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::(anonymous namespace)::SelectiveUnpack>",
+ top_up->createSearch(*md, false)->getClassName());
+ md->resolveTermField(1)->tagAsNotNeeded();
+ md->resolveTermField(3)->tagAsNotNeeded();
+ EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::NoUnpack>",
+ top_up->createSearch(*md, false)->getClassName());
+}
+
+TEST("require_that_unpack_of_and_is_optimized") {
+ Blueprint::UP top_up(
+ ap((new AndBlueprint())->
+ addChild(ap(MyLeafSpec(20).addField(1,1).create())).
+ addChild(ap(MyLeafSpec(20).addField(2,2).create())).
+ addChild(ap(MyLeafSpec(10).addField(3,3).create()))));
+ MatchData::UP md = MatchData::makeTestInstance(0, 100, 10);
+ top_up->fetchPostings(false);
+ EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::(anonymous namespace)::FullUnpack>",
+ top_up->createSearch(*md, false)->getClassName());
+ md->resolveTermField(2)->tagAsNotNeeded();
+ EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::(anonymous namespace)::SelectiveUnpack>",
+ top_up->createSearch(*md, false)->getClassName());
+ md->resolveTermField(1)->tagAsNotNeeded();
+ md->resolveTermField(3)->tagAsNotNeeded();
+ EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::NoUnpack>",
+ top_up->createSearch(*md, false)->getClassName());
+}
+
+TEST("require_that_unpack_optimization_is_honoured_by_parents") {
+ Blueprint::UP top_up(
+ ap((new AndBlueprint())->
+ addChild(ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(20).addField(1,1).create())).
+ addChild(ap(MyLeafSpec(20).addField(2,2).create())).
+ addChild(ap(MyLeafSpec(10).addField(3,3).create()))))));
+ MatchData::UP md = MatchData::makeTestInstance(0, 100, 10);
+ top_up->fetchPostings(false);
+ EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::(anonymous namespace)::FullUnpack>",
+ top_up->createSearch(*md, false)->getClassName());
+ md->resolveTermField(2)->tagAsNotNeeded();
+ EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::(anonymous namespace)::FullUnpack>",
+ top_up->createSearch(*md, false)->getClassName());
+ md->resolveTermField(1)->tagAsNotNeeded();
+ md->resolveTermField(3)->tagAsNotNeeded();
+ EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::NoUnpack>",
+ top_up->createSearch(*md, false)->getClassName());
+}
+
+namespace {
+
+SimpleStringTerm
+makeTerm(const std::string & term)
+{
+ return SimpleStringTerm(term, "field", 0, search::query::Weight(0));
+}
+
+}
+
+TEST("require that children does not optimize when parents refuse them to") {
+ FakeRequestContext requestContext;
+ search::diskindex::TestDiskIndex index;
+ vespalib::mkdir("index", false);
+ index.buildSchema();
+ index.openIndex("index/1", false, true, false, false, false);
+ FieldSpecBaseList fields;
+ fields.add(FieldSpecBase(1, 11));
+ fields.add(FieldSpecBase(2, 22));
+ search::fef::MatchDataLayout subLayout;
+ search::fef::TermFieldHandle idxth21 = subLayout.allocTermField(2);
+ search::fef::TermFieldHandle idxth22 = subLayout.allocTermField(2);
+ search::fef::TermFieldHandle idxth1 = subLayout.allocTermField(1);
+ Blueprint::UP top_up(
+ ap((new EquivBlueprint(fields, subLayout))->
+ addTerm(index.getIndex().createBlueprint(requestContext,
+ FieldSpec("f2", 2, idxth22, true),
+ makeTerm("w2")),
+ 1.0).
+ addTerm(index.getIndex().createBlueprint(requestContext,
+ FieldSpec("f1", 1, idxth1),
+ makeTerm("w1")),
+ 1.0).
+ addTerm(index.getIndex().createBlueprint(requestContext,
+ FieldSpec("f2", 2, idxth21), makeTerm("w2")),
+ 1.0)));
+ MatchData::UP md = MatchData::makeTestInstance(0, 100, 10);
+ top_up->fetchPostings(false);
+ SearchIterator::UP search = top_up->createSearch(*md, true);
+ EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName());
+ {
+ const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search);
+ EXPECT_EQUAL("search::BitVectorIteratorStrict", e.getChildren()[0]->getClassName());
+ EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[1]->getClassName());
+ EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[2]->getClassName());
+ }
+
+ md->resolveTermField(12)->tagAsNotNeeded();
+ search = top_up->createSearch(*md, true);
+ EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName());
+ {
+ const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search);
+ EXPECT_EQUAL("search::BitVectorIteratorStrict", e.getChildren()[0]->getClassName());
+ EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[1]->getClassName());
+ EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[2]->getClassName());
+ }
+}
+
+TEST("require_that_unpack_optimization_is_overruled_by_equiv") {
+ FieldSpecBaseList fields;
+ fields.add(FieldSpecBase(1, 1));
+ fields.add(FieldSpecBase(2, 2));
+ fields.add(FieldSpecBase(3, 3));
+ search::fef::MatchDataLayout subLayout;
+ search::fef::TermFieldHandle idxth1 = subLayout.allocTermField(1);
+ search::fef::TermFieldHandle idxth2 = subLayout.allocTermField(2);
+ search::fef::TermFieldHandle idxth3 = subLayout.allocTermField(3);
+ Blueprint::UP top_up(
+ ap((new EquivBlueprint(fields, subLayout))->
+ addTerm(ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(20).addField(1,idxth1).create())).
+ addChild(ap(MyLeafSpec(20).addField(2,idxth2).create())).
+ addChild(ap(MyLeafSpec(10).addField(3,idxth3).create()))),
+ 1.0)));
+ MatchData::UP md = MatchData::makeTestInstance(0, 100, 10);
+ top_up->fetchPostings(false);
+ SearchIterator::UP search = top_up->createSearch(*md, true);
+ EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName());
+ {
+ const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search);
+ EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::(anonymous namespace)::FullUnpack>",
+ e.getChildren()[0]->getClassName());
+ }
+
+ md->resolveTermField(2)->tagAsNotNeeded();
+ search = top_up->createSearch(*md, true);
+ EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName());
+ {
+ const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search);
+ EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::(anonymous namespace)::FullUnpack>",
+ e.getChildren()[0]->getClassName());
+ }
+
+ md->resolveTermField(1)->tagAsNotNeeded();
+ md->resolveTermField(3)->tagAsNotNeeded();
+ search = top_up->createSearch(*md, true);
+ EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName());
+ {
+ const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search);
+ EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::(anonymous namespace)::FullUnpack>",
+ e.getChildren()[0]->getClassName());
+ }
+}
+
+TEST("require that children of near are not optimized") {
+ //-------------------------------------------------------------------------
+ Blueprint::UP top_up(
+ ap((new NearBlueprint(10))->
+ addChild(ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(20).create())).
+ addChild(ap(MyLeafSpec(0, true).create())))).
+ addChild(ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(30).create()))))));
+ //-------------------------------------------------------------------------
+ Blueprint::UP expect_up(
+ ap((new NearBlueprint(10))->
+ addChild(ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(20).create())).
+ addChild(ap(MyLeafSpec(0, true).create())))).
+ addChild(ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(30).create()))))));
+ //-------------------------------------------------------------------------
+ top_up = Blueprint::optimize(std::move(top_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+}
+
+TEST("require that children of onear are not optimized") {
+ //-------------------------------------------------------------------------
+ Blueprint::UP top_up(
+ ap((new ONearBlueprint(10))->
+ addChild(ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(20).create()->estimate(20))).
+ addChild(ap(MyLeafSpec(0, true).create()->estimate(0, true))))).
+ addChild(ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(0, true).create()->estimate(0, true))).
+ addChild(ap(MyLeafSpec(30).create()->estimate(30)))))));
+ //-------------------------------------------------------------------------
+ Blueprint::UP expect_up(
+ ap((new ONearBlueprint(10))->
+ addChild(ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(20).create())).
+ addChild(ap(MyLeafSpec(0, true).create())))).
+ addChild(ap((new OrBlueprint())->
+ addChild(ap(MyLeafSpec(0, true).create())).
+ addChild(ap(MyLeafSpec(30).create()))))));
+ //-------------------------------------------------------------------------
+ top_up = Blueprint::optimize(std::move(top_up));
+ EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+}
+
+TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/blueprint/leaf_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/leaf_blueprints_test.cpp
new file mode 100644
index 00000000000..a2353184c9f
--- /dev/null
+++ b/searchlib/src/tests/queryeval/blueprint/leaf_blueprints_test.cpp
@@ -0,0 +1,125 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("blueprint_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/leaf_blueprints.h>
+#include <vespa/vespalib/objects/visit.h>
+
+using namespace search::queryeval;
+using namespace search::fef;
+
+class Test : public vespalib::TestApp
+{
+public:
+ void testEmptyBlueprint();
+ void testSimpleBlueprint();
+ void testFakeBlueprint();
+ int Main();
+};
+
+void
+Test::testEmptyBlueprint()
+{
+ MatchData::UP md(MatchData::makeTestInstance(0, 100, 10));
+ EmptyBlueprint empty(FieldSpecBase(1, 11));
+ EmptyBlueprint copy(empty);
+ ASSERT_TRUE(copy.getState().numFields() == 1u);
+ EXPECT_EQUAL(1u, copy.getState().field(0).getFieldId());
+ EXPECT_EQUAL(11u, copy.getState().field(0).getHandle());
+
+ copy.fetchPostings(true);
+ SearchIterator::UP search = copy.createSearch(*md, true);
+
+ SimpleResult res;
+ res.search(*search);
+ SimpleResult expect; // empty
+ EXPECT_EQUAL(res, expect);
+}
+
+void
+Test::testSimpleBlueprint()
+{
+ MatchData::UP md(MatchData::makeTestInstance(0, 100, 10));
+ SimpleResult a;
+ a.addHit(3).addHit(5).addHit(7);
+ SimpleBlueprint simple(a);
+ simple.tag("tag");
+ SimpleBlueprint copy(simple);
+ EXPECT_EQUAL("tag", copy.tag());
+ copy.fetchPostings(true);
+ SearchIterator::UP search = copy.createSearch(*md, true);
+
+ SimpleResult res;
+ res.search(*search);
+ SimpleResult expect;
+ expect.addHit(3).addHit(5).addHit(7);
+ EXPECT_EQUAL(res, expect);
+}
+
+void
+Test::testFakeBlueprint()
+{
+ MatchData::UP md(MatchData::makeTestInstance(0, 100, 10));
+ FakeResult fake;
+ fake.doc(10).len(50).pos(2).pos(3)
+ .doc(25).len(10).pos(5);
+
+ uint32_t fieldId = 0;
+ TermFieldHandle handle = 0;
+ FakeBlueprint orig(FieldSpec("<field>", fieldId, handle), fake);
+ FakeBlueprint copy(orig);
+
+ copy.fetchPostings(true);
+ SearchIterator::UP search = copy.createSearch(*md, true);
+ search->initFullRange();
+ EXPECT_TRUE(!search->seek(1u));
+ EXPECT_EQUAL(10u, search->getDocId());
+ {
+ search->unpack(10u);
+ TermFieldMatchData &data = *md->resolveTermField(handle);
+ EXPECT_EQUAL(fieldId, data.getFieldId());
+ EXPECT_EQUAL(10u, data.getDocId());
+ EXPECT_EQUAL(10u, data.getDocId());
+ FieldPositionsIterator itr = data.getIterator();
+ EXPECT_EQUAL(50u, itr.getFieldLength());
+ EXPECT_EQUAL(2u, itr.size());
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(2u, itr.getPosition());
+ itr.next();
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(3u, itr.getPosition());
+ itr.next();
+ EXPECT_TRUE(!itr.valid());
+ }
+ EXPECT_TRUE(search->seek(25));
+ EXPECT_EQUAL(25u, search->getDocId());
+ {
+ search->unpack(25u);
+ TermFieldMatchData &data = *md->resolveTermField(handle);
+ EXPECT_EQUAL(fieldId, data.getFieldId());
+ EXPECT_EQUAL(25u, data.getDocId());
+ FieldPositionsIterator itr = data.getIterator();
+ EXPECT_EQUAL(10u, itr.getFieldLength());
+ EXPECT_EQUAL(1u, itr.size());
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(5u, itr.getPosition());
+ itr.next();
+ EXPECT_TRUE(!itr.valid());
+ }
+ EXPECT_TRUE(!search->seek(50));
+ EXPECT_TRUE(search->isAtEnd());
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("leaf_blueprints_test");
+ testEmptyBlueprint();
+ testSimpleBlueprint();
+ testFakeBlueprint();
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/queryeval/blueprint/mysearch.h b/searchlib/src/tests/queryeval/blueprint/mysearch.h
new file mode 100644
index 00000000000..7ab852b384f
--- /dev/null
+++ b/searchlib/src/tests/queryeval/blueprint/mysearch.h
@@ -0,0 +1,155 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace queryeval {
+
+//-----------------------------------------------------------------------------
+
+class MySearch : public SearchIterator
+{
+public:
+ typedef MultiSearch::Children Children;
+ typedef std::vector<SearchIterator::UP> MyChildren;
+ typedef search::fef::TermFieldMatchDataArray TFMDA;
+ typedef search::fef::MatchData MatchData;
+
+private:
+ std::string _tag;
+ bool _isLeaf;
+ bool _isStrict;
+ MyChildren _children;
+ TFMDA _match;
+ MatchData *_md;
+
+ std::vector<uint32_t> _handles;
+
+protected:
+ virtual void doSeek(uint32_t) {}
+ virtual void doUnpack(uint32_t) {}
+
+public:
+ MySearch(const std::string &tag, bool leaf, bool strict)
+ : _tag(tag), _isLeaf(leaf), _isStrict(strict), _children(),
+ _match(), _md(0) {}
+
+ MySearch(const std::string &tag, const TFMDA &tfmda, bool strict)
+ : _tag(tag), _isLeaf(true), _isStrict(strict), _children(),
+ _match(tfmda), _md(0) {}
+
+ MySearch(const std::string &tag, const Children &children,
+ MatchData *md, bool strict)
+ : _tag(tag), _isLeaf(false), _isStrict(strict), _children(),
+ _match(), _md(md) {
+ for (size_t i(0); i < children.size(); i++) {
+ _children.emplace_back(children[i]);
+ }
+ }
+
+ MySearch &add(SearchIterator *search) {
+ _children.emplace_back(search);
+ return *this;
+ }
+
+ MySearch &addHandle(uint32_t handle) {
+ _handles.push_back(handle);
+ return *this;
+ }
+
+ bool verifyAndInferImpl(MatchData &md) {
+ bool ok = true;
+ if (!_isLeaf) {
+ ok &= (_md == &md);
+ }
+ for (size_t i = 0; i < _children.size(); ++i) {
+ MySearch *child = dynamic_cast<MySearch *>(_children[i].get());
+ ok &= (child != 0);
+ if (child != 0) {
+ ok &= child->verifyAndInferImpl(md);
+ }
+ }
+ for (size_t i = 0; i < _match.size(); ++i) {
+ search::fef::TermFieldMatchData *tfmd = _match[i];
+ _handles.push_back(search::fef::IllegalHandle);
+ for (search::fef::TermFieldHandle j = 0; j < md.getNumTermFields(); ++j) {
+ if (md.resolveTermField(j) == tfmd) {
+ _handles.back() = j;
+ break;
+ }
+ }
+ ok &= (_handles.back() != search::fef::IllegalHandle);
+ }
+ return ok;
+ }
+
+ static bool verifyAndInfer(SearchIterator *search, MatchData &md) {
+ MySearch *self = dynamic_cast<MySearch *>(search);
+ if (self == 0) {
+ return false;
+ } else {
+ return self->verifyAndInferImpl(md);
+ }
+ }
+
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const {
+ visit(visitor, "_tag", _tag);
+ visit(visitor, "_isLeaf", _isLeaf);
+ visit(visitor, "_isStrict", _isStrict);
+ visit(visitor, "_children", _children);
+ visit(visitor, "_handles", _handles);
+ }
+
+ virtual ~MySearch() {}
+};
+
+//-----------------------------------------------------------------------------
+
+class MyLeaf : public SimpleLeafBlueprint
+{
+ typedef search::fef::TermFieldMatchDataArray TFMDA;
+
+public:
+ virtual SearchIterator::UP
+ createLeafSearch(const TFMDA &tfmda, bool strict) const
+ {
+ return SearchIterator::UP(new MySearch("leaf", tfmda, strict));
+ }
+
+ MyLeaf(const FieldSpecBaseList &fields)
+ : SimpleLeafBlueprint(fields)
+ {}
+
+ MyLeaf &estimate(uint32_t hits, bool empty = false) {
+ setEstimate(HitEstimate(hits, empty));
+ return *this;
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class MyLeafSpec
+{
+private:
+ FieldSpecBaseList _fields;
+ Blueprint::HitEstimate _estimate;
+
+public:
+ explicit MyLeafSpec(uint32_t estHits, bool empty = false)
+ : _fields(), _estimate(estHits, empty) {}
+
+ MyLeafSpec &addField(uint32_t fieldId, uint32_t handle) {
+ _fields.add(FieldSpecBase(fieldId, handle));
+ return *this;
+ }
+ MyLeaf *create() const {
+ MyLeaf *leaf = new MyLeaf(_fields);
+ leaf->estimate(_estimate.estHits, _estimate.empty);
+ return leaf;
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.cvsignore b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.cvsignore
new file mode 100644
index 00000000000..9e6565f9d16
--- /dev/null
+++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.cvsignore
@@ -0,0 +1,3 @@
+.depend
+Makefile
+booleanmatchiteratorwrapper_test
diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.gitignore b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.gitignore
new file mode 100644
index 00000000000..b568b87514a
--- /dev/null
+++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+booleanmatchiteratorwrapper_test
+searchlib_booleanmatchiteratorwrapper_test_app
diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/CMakeLists.txt b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/CMakeLists.txt
new file mode 100644
index 00000000000..cf701c430aa
--- /dev/null
+++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_booleanmatchiteratorwrapper_test_app
+ SOURCES
+ booleanmatchiteratorwrapper_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_booleanmatchiteratorwrapper_test_app COMMAND searchlib_booleanmatchiteratorwrapper_test_app)
diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/DESC b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/DESC
new file mode 100644
index 00000000000..097198d38ef
--- /dev/null
+++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/DESC
@@ -0,0 +1 @@
+booleanmatchiteratorwrapper test. Take a look at booleanmatchiteratorwrapper.cpp for details.
diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/FILES b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/FILES
new file mode 100644
index 00000000000..a47b5b35a40
--- /dev/null
+++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/FILES
@@ -0,0 +1 @@
+booleanmatchiteratorwrapper.cpp
diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/booleanmatchiteratorwrapper_test.cpp b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/booleanmatchiteratorwrapper_test.cpp
new file mode 100644
index 00000000000..940f825b691
--- /dev/null
+++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/booleanmatchiteratorwrapper_test.cpp
@@ -0,0 +1,133 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("booleanmatchiteratorwrapper_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/searchlib/test/initrange.h>
+
+using namespace search::fef;
+using namespace search::queryeval;
+using search::BitVector;
+using search::BitVectorIterator;
+
+struct DummyItr : public SearchIterator {
+ static uint32_t seekCnt;
+ static uint32_t unpackCnt;
+ static uint32_t dtorCnt;
+ static uint32_t _unpackedDocId;
+ TermFieldMatchData *match;
+
+ DummyItr(TermFieldMatchData *m) {
+ match = m;
+ }
+
+ ~DummyItr() {
+ ++dtorCnt;
+ }
+
+ void doSeek(uint32_t docid) {
+ ++seekCnt;
+ if (docid <= 10) {
+ setDocId(10);
+ } else if (docid <= 20) {
+ setDocId(20);
+ } else {
+ setAtEnd();
+ }
+ }
+
+ void doUnpack(uint32_t docid) {
+ ++unpackCnt;
+ if (match != 0) {
+ _unpackedDocId = docid;
+ }
+ }
+};
+uint32_t DummyItr::seekCnt = 0;
+uint32_t DummyItr::unpackCnt = 0;
+uint32_t DummyItr::dtorCnt = 0;
+uint32_t DummyItr::_unpackedDocId = 0;
+
+
+TEST("mostly everything") {
+ EXPECT_EQUAL(DummyItr::seekCnt, 0u);
+ EXPECT_EQUAL(DummyItr::unpackCnt, 0u);
+ EXPECT_EQUAL(DummyItr::dtorCnt, 0u);
+ { // without wrapper
+ TermFieldMatchData match;
+ DummyItr::_unpackedDocId = 0;
+ SearchIterator::UP search(new DummyItr(&match));
+ search->initFullRange();
+ EXPECT_EQUAL(DummyItr::_unpackedDocId, 0u);
+ EXPECT_TRUE(!search->seek(1u));
+ EXPECT_EQUAL(search->getDocId(), 10u);
+ EXPECT_TRUE(search->seek(10));
+ search->unpack(10);
+ EXPECT_EQUAL(DummyItr::_unpackedDocId, 10u);
+ EXPECT_TRUE(!search->seek(15));
+ EXPECT_EQUAL(search->getDocId(), 20u);
+ EXPECT_TRUE(search->seek(20));
+ search->unpack(20);
+ EXPECT_EQUAL(DummyItr::_unpackedDocId, 20u);
+ EXPECT_TRUE(!search->seek(25));
+ EXPECT_TRUE(search->isAtEnd());
+ }
+ EXPECT_EQUAL(DummyItr::seekCnt, 3u);
+ EXPECT_EQUAL(DummyItr::unpackCnt, 2u);
+ EXPECT_EQUAL(DummyItr::dtorCnt, 1u);
+ { // with wrapper
+ TermFieldMatchData match;
+ TermFieldMatchDataArray tfmda;
+ tfmda.add(&match);
+ DummyItr::_unpackedDocId = 0;
+ SearchIterator::UP search(new BooleanMatchIteratorWrapper(SearchIterator::UP(new DummyItr(&match)), tfmda));
+ search->initFullRange();
+ EXPECT_EQUAL(DummyItr::_unpackedDocId, 0u);
+ EXPECT_TRUE(!search->seek(1u));
+ EXPECT_EQUAL(search->getDocId(), 10u);
+ EXPECT_TRUE(search->seek(10));
+ search->unpack(10);
+ EXPECT_EQUAL(DummyItr::_unpackedDocId, 0u);
+ EXPECT_TRUE(!search->seek(15));
+ EXPECT_EQUAL(search->getDocId(), 20u);
+ EXPECT_TRUE(search->seek(20));
+ search->unpack(20);
+ EXPECT_EQUAL(DummyItr::_unpackedDocId, 0u);
+ EXPECT_TRUE(!search->seek(25));
+ EXPECT_TRUE(search->isAtEnd());
+ }
+ EXPECT_EQUAL(DummyItr::seekCnt, 6u);
+ EXPECT_EQUAL(DummyItr::unpackCnt, 2u);
+ EXPECT_EQUAL(DummyItr::dtorCnt, 2u);
+ { // with wrapper, without match data
+ SearchIterator::UP search(new BooleanMatchIteratorWrapper(SearchIterator::UP(new DummyItr(0)), TermFieldMatchDataArray()));
+ search->initFullRange();
+ EXPECT_TRUE(!search->seek(1u));
+ EXPECT_EQUAL(search->getDocId(), 10u);
+ EXPECT_TRUE(search->seek(10));
+ search->unpack(10);
+ EXPECT_TRUE(!search->seek(15));
+ EXPECT_EQUAL(search->getDocId(), 20u);
+ EXPECT_TRUE(search->seek(20));
+ search->unpack(20);
+ EXPECT_TRUE(!search->seek(25));
+ EXPECT_TRUE(search->isAtEnd());
+ }
+ EXPECT_EQUAL(DummyItr::seekCnt, 9u);
+ EXPECT_EQUAL(DummyItr::unpackCnt, 2u);
+ EXPECT_EQUAL(DummyItr::dtorCnt, 3u);
+}
+
+TEST("Test boolean wrapper iterators adheres to initRange") {
+ search::test::InitRangeVerifier ir;
+ TermFieldMatchDataArray tfmda;
+ BooleanMatchIteratorWrapper relaxed(ir.createIterator(ir.getExpectedDocIds(), false), tfmda);
+ ir.verify(relaxed);
+ BooleanMatchIteratorWrapper strict(ir.createIterator(ir.getExpectedDocIds(), true), tfmda);
+ ir.verify(strict);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/dot_product/.gitignore b/searchlib/src/tests/queryeval/dot_product/.gitignore
new file mode 100644
index 00000000000..a22cb6c5ea0
--- /dev/null
+++ b/searchlib/src/tests/queryeval/dot_product/.gitignore
@@ -0,0 +1 @@
+searchlib_dot_product_test_app
diff --git a/searchlib/src/tests/queryeval/dot_product/CMakeLists.txt b/searchlib/src/tests/queryeval/dot_product/CMakeLists.txt
new file mode 100644
index 00000000000..91b78f2e54d
--- /dev/null
+++ b/searchlib/src/tests/queryeval/dot_product/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_dot_product_test_app
+ SOURCES
+ dot_product_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_dot_product_test_app COMMAND searchlib_dot_product_test_app)
diff --git a/searchlib/src/tests/queryeval/dot_product/FILES b/searchlib/src/tests/queryeval/dot_product/FILES
new file mode 100644
index 00000000000..cf1bcd96ec4
--- /dev/null
+++ b/searchlib/src/tests/queryeval/dot_product/FILES
@@ -0,0 +1 @@
+dot_product_test.cpp
diff --git a/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp b/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp
new file mode 100644
index 00000000000..e9dcc34219b
--- /dev/null
+++ b/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp
@@ -0,0 +1,219 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("dot_product_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/queryeval/dot_product_search.h>
+
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/queryeval/field_spec.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/fake_result.h>
+#include <vespa/searchlib/queryeval/fake_searchable.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <vespa/searchlib/queryeval/dot_product_search.h>
+#include <vespa/searchlib/test/initrange.h>
+#include <vespa/searchlib/test/document_weight_attribute_helper.h>
+#include <memory>
+#include <string>
+#include <map>
+
+using namespace search;
+using namespace search::query;
+using namespace search::fef;
+using namespace search::queryeval;
+using search::test::InitRangeVerifier;
+using search::test::DocumentWeightAttributeHelper;
+
+namespace {
+
+void setupFakeSearchable(FakeSearchable &fake) {
+ for (size_t docid = 1; docid < 10; ++docid) {
+ std::string token1 = vespalib::make_string("%zu", docid);
+ std::string token2 = vespalib::make_string("1%zu", docid);
+ std::string token3 = vespalib::make_string("2%zu", docid);
+
+ fake.addResult("field", token1, FakeResult().doc(docid).weight(docid).pos(0));
+ fake.addResult("multi-field", token1, FakeResult().doc(docid).weight(docid).pos(0));
+ fake.addResult("multi-field", token2, FakeResult().doc(docid).weight(2 * docid).pos(0));
+ fake.addResult("multi-field", token3, FakeResult().doc(docid).weight(3 * docid).pos(0));
+ }
+}
+
+struct DP {
+ static const uint32_t fieldId = 0;
+ static const TermFieldHandle handle = 0;
+ std::vector<std::pair<std::string, uint32_t> > tokens;
+
+ DP &add(const std::string &token, uint32_t weight) {
+ tokens.push_back(std::make_pair(token, weight));
+ return *this;
+ }
+
+ Node::UP createNode() const {
+ SimpleDotProduct *node = new SimpleDotProduct("view", 0, Weight(0));
+ for (size_t i = 0; i < tokens.size(); ++i) {
+ node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second))));
+ }
+ return Node::UP(node);
+ }
+
+ FakeResult search(Searchable &searchable, const std::string &field, bool strict) const {
+ MatchData::UP md(MatchData::makeTestInstance(0, 1, 1));
+ FakeRequestContext requestContext;
+ Node::UP node = createNode();
+ FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle));
+ queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node);
+ bp->fetchPostings(strict);
+ SearchIterator::UP sb = bp->createSearch(*md, strict);
+ EXPECT_TRUE(dynamic_cast<DotProductSearch*>(sb.get()) != 0);
+ sb->initFullRange();
+ FakeResult result;
+ for (uint32_t docId = 1; docId < 10; ++docId) {
+ if (sb->seek(docId)) {
+ sb->unpack(docId);
+ result.doc(docId);
+ double score = md->resolveTermField(handle)->getRawScore();
+ EXPECT_EQUAL((int)score, score);
+ result.score(score);
+ }
+ }
+ return result;
+ }
+};
+
+struct MockSearch : public SearchIterator {
+ int seekCnt;
+ uint32_t _initial;
+ MockSearch(uint32_t initial) : SearchIterator(), seekCnt(0), _initial(initial) { }
+ void initRange(uint32_t begin, uint32_t end) override {
+ SearchIterator::initRange(begin, end);
+ setDocId(_initial);
+ }
+ virtual void doSeek(uint32_t) {
+ ++seekCnt;
+ setAtEnd();
+ }
+ virtual void doUnpack(uint32_t) {}
+};
+
+struct MockFixture {
+ MockSearch *mock;
+ TermFieldMatchData tfmd;
+ std::unique_ptr<SearchIterator> search;
+ MockFixture(uint32_t initial) : mock(0), tfmd(), search() {
+ std::vector<SearchIterator*> children;
+ std::vector<TermFieldMatchData*> childMatch;
+ std::vector<int32_t> weights;
+ MatchData::UP md(MatchData::makeTestInstance(0, 1, 1));
+ mock = new MockSearch(initial);
+ children.push_back(mock);
+ childMatch.push_back(md->resolveTermField(0));
+ weights.push_back(1);
+ search = DotProductSearch::create(children, tfmd, childMatch, weights, std::move(md));
+ }
+};
+
+} // namespace <unnamed>
+
+TEST("test Simple") {
+ FakeSearchable index;
+ setupFakeSearchable(index);
+ FakeResult expect = FakeResult()
+ .doc(3).score(30 * 3)
+ .doc(5).score(50 * 5)
+ .doc(7).score(70 * 7);
+ DP ws = DP().add("7", 70).add("5", 50).add("3", 30).add("100", 1000);
+
+ EXPECT_EQUAL(expect, ws.search(index, "field", true));
+ EXPECT_EQUAL(expect, ws.search(index, "field", false));
+ EXPECT_EQUAL(expect, ws.search(index, "multi-field", true));
+ EXPECT_EQUAL(expect, ws.search(index, "multi-field", false));
+}
+
+TEST("test Multi") {
+ FakeSearchable index;
+ setupFakeSearchable(index);
+ FakeResult expect = FakeResult()
+ .doc(3).score(30 * 3 + 130 * 2 * 3 + 230 * 3 * 3)
+ .doc(5).score(50 * 5 + 150 * 2 * 5)
+ .doc(7).score(70 * 7);
+ DP ws = DP().add("7", 70).add("5", 50).add("3", 30)
+ .add("15", 150).add("13", 130)
+ .add("23", 230).add("100", 1000);
+
+ EXPECT_EQUAL(expect, ws.search(index, "multi-field", true));
+ EXPECT_EQUAL(expect, ws.search(index, "multi-field", false));
+}
+
+TEST_F("test Eager Empty Child", MockFixture(search::endDocId)) {
+ MockSearch *mock = f1.mock;
+ SearchIterator &search = *f1.search;
+ search.initFullRange();
+ EXPECT_EQUAL(search.beginId(), search.getDocId());
+ EXPECT_TRUE(!search.seek(1));
+ EXPECT_TRUE(search.isAtEnd());
+ EXPECT_EQUAL(0, mock->seekCnt);
+}
+
+TEST_F("test Eager Matching Child", MockFixture(5)) {
+ MockSearch *mock = f1.mock;
+ SearchIterator &search = *f1.search;
+ search.initFullRange();
+ EXPECT_EQUAL(search.beginId(), search.getDocId());
+ EXPECT_TRUE(!search.seek(3));
+ EXPECT_EQUAL(5u, search.getDocId());
+ EXPECT_EQUAL(0, mock->seekCnt);
+ EXPECT_TRUE(search.seek(5));
+ EXPECT_EQUAL(5u, search.getDocId());
+ EXPECT_EQUAL(0, mock->seekCnt);
+ EXPECT_TRUE(!search.seek(7));
+ EXPECT_TRUE(search.isAtEnd());
+ EXPECT_EQUAL(1, mock->seekCnt);
+}
+
+TEST("verify initRange with search iterator children") {
+ const size_t num_children = 7;
+ InitRangeVerifier ir;
+ using DocIds = InitRangeVerifier::DocIds;
+ std::vector<DocIds> split_lists(num_children);
+ auto full_list = ir.getExpectedDocIds();
+ for (size_t i = 0; i < full_list.size(); ++i) {
+ split_lists[i % num_children].push_back(full_list[i]);
+ }
+ bool strict = true;
+ std::vector<SearchIterator*> children;
+ for (size_t i = 0; i < num_children; ++i) {
+ children.push_back(ir.createIterator(split_lists[i], strict).release());
+ }
+ TermFieldMatchData tfmd;
+ std::vector<int32_t> weights(num_children, 1);
+ std::vector<fef::TermFieldMatchData*> no_child_match; // unpack not called
+ MatchData::UP no_match_data; // unpack not called
+ SearchIterator::UP itr = DotProductSearch::create(children, tfmd, no_child_match, weights, std::move(no_match_data));
+ ir.verify(*itr);
+}
+
+TEST("verify initRange with document weight iterator children") {
+ const size_t num_children = 7;
+ InitRangeVerifier ir;
+ DocumentWeightAttributeHelper helper;
+ helper.add_docs(ir.getDocIdLimit());
+ auto full_list = ir.getExpectedDocIds();
+ for (size_t i = 0; i < full_list.size(); ++i) {
+ helper.set_doc(full_list[i], i % num_children, 1);
+ }
+ TermFieldMatchData tfmd;
+ std::vector<int32_t> weights(num_children, 1);
+ std::vector<DocumentWeightIterator> children;
+ for (size_t i = 0; i < num_children; ++i) {
+ auto dict_entry = helper.dwa().lookup(vespalib::make_string("%zu", i).c_str());
+ helper.dwa().create(dict_entry.posting_idx, children);
+ }
+ SearchIterator::UP itr(DotProductSearch::create(tfmd, weights, std::move(children)));
+ ir.verify(*itr);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/equiv/.cvsignore b/searchlib/src/tests/queryeval/equiv/.cvsignore
new file mode 100644
index 00000000000..1f159f55125
--- /dev/null
+++ b/searchlib/src/tests/queryeval/equiv/.cvsignore
@@ -0,0 +1,3 @@
+.depend
+Makefile
+equiv_test
diff --git a/searchlib/src/tests/queryeval/equiv/.gitignore b/searchlib/src/tests/queryeval/equiv/.gitignore
new file mode 100644
index 00000000000..d28d4650b98
--- /dev/null
+++ b/searchlib/src/tests/queryeval/equiv/.gitignore
@@ -0,0 +1,4 @@
+*_test
+.depend
+Makefile
+searchlib_equiv_test_app
diff --git a/searchlib/src/tests/queryeval/equiv/CMakeLists.txt b/searchlib/src/tests/queryeval/equiv/CMakeLists.txt
new file mode 100644
index 00000000000..695e9b87121
--- /dev/null
+++ b/searchlib/src/tests/queryeval/equiv/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_equiv_test_app
+ SOURCES
+ equiv_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_equiv_test_app COMMAND searchlib_equiv_test_app)
diff --git a/searchlib/src/tests/queryeval/equiv/DESC b/searchlib/src/tests/queryeval/equiv/DESC
new file mode 100644
index 00000000000..e294d10cb23
--- /dev/null
+++ b/searchlib/src/tests/queryeval/equiv/DESC
@@ -0,0 +1 @@
+equiv test. Take a look at equiv_test.cpp for details.
diff --git a/searchlib/src/tests/queryeval/equiv/FILES b/searchlib/src/tests/queryeval/equiv/FILES
new file mode 100644
index 00000000000..79adf32c1d5
--- /dev/null
+++ b/searchlib/src/tests/queryeval/equiv/FILES
@@ -0,0 +1 @@
+equiv_test.cpp
diff --git a/searchlib/src/tests/queryeval/equiv/equiv_test.cpp b/searchlib/src/tests/queryeval/equiv/equiv_test.cpp
new file mode 100644
index 00000000000..3d97d05995f
--- /dev/null
+++ b/searchlib/src/tests/queryeval/equiv/equiv_test.cpp
@@ -0,0 +1,130 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("equiv_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/queryeval/leaf_blueprints.h>
+#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
+#include <vespa/searchlib/queryeval/equiv_blueprint.h>
+#include <vespa/searchlib/fef/matchdatalayout.h>
+
+using namespace search::queryeval;
+using search::fef::MatchData;
+using search::fef::MatchDataLayout;
+using search::fef::TermFieldHandle;
+using search::fef::TermFieldMatchData;
+using search::fef::FieldPositionsIterator;
+
+class Test : public vespalib::TestApp {
+public:
+ void testEquiv();
+ int Main();
+};
+
+void
+Test::testEquiv()
+{
+ FakeResult a;
+ FakeResult b;
+ FakeResult c;
+
+ a.doc(5).pos(1);
+ b.doc(5).pos(2);
+ c.doc(5).pos(3).doc(10).pos(4);
+
+ MatchDataLayout subLayout;
+ TermFieldHandle fbh11 = subLayout.allocTermField(1);
+ TermFieldHandle fbh21 = subLayout.allocTermField(2);
+ TermFieldHandle fbh22 = subLayout.allocTermField(2);
+
+ FieldSpecBaseList fields;
+ fields.add(FieldSpecBase(1, 1));
+ fields.add(FieldSpecBase(2, 2));
+ EquivBlueprint *eq_b = new EquivBlueprint(fields, subLayout);
+
+ eq_b->addTerm(Blueprint::UP(new FakeBlueprint(FieldSpec("foo", 1, fbh11), a)), 1.0);
+ eq_b->addTerm(Blueprint::UP(new FakeBlueprint(FieldSpec("bar", 2, fbh21), b)), 1.0);
+ eq_b->addTerm(Blueprint::UP(new FakeBlueprint(FieldSpec("bar", 2, fbh22), c)), 1.0);
+
+ Blueprint::UP bp(eq_b);
+ for (int i = 0; i <= 1; ++i) {
+ bool strict = (i == 0);
+ TEST_STATE(strict ? "strict" : "non-strict");
+ MatchData::UP md = MatchData::makeTestInstance(0, 100, 10);
+ bp->fetchPostings(strict);
+ SearchIterator::UP search = bp->createSearch(*md, strict);
+ search->initFullRange();
+
+ EXPECT_TRUE(!search->seek(3));
+ if (!strict) {
+ EXPECT_EQUAL(SearchIterator::beginId(), search->getDocId());
+ EXPECT_TRUE(search->seek(5u));
+ }
+ EXPECT_EQUAL(5u, search->getDocId());
+ { // test doc 5 results
+ search->unpack(5u);
+ {
+ TermFieldMatchData &data = *md->resolveTermField(1);
+ EXPECT_EQUAL(1u, data.getFieldId());
+ EXPECT_EQUAL(5u, data.getDocId());
+ FieldPositionsIterator itr = data.getIterator();
+ EXPECT_EQUAL(1u, itr.size());
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(1u, itr.getPosition());
+ itr.next();
+ EXPECT_TRUE(!itr.valid());
+ }
+ {
+ TermFieldMatchData &data = *md->resolveTermField(2);
+ EXPECT_EQUAL(2u, data.getFieldId());
+ EXPECT_EQUAL(5u, data.getDocId());
+ FieldPositionsIterator itr = data.getIterator();
+ EXPECT_EQUAL(2u, itr.size());
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(2u, itr.getPosition());
+ itr.next();
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(3u, itr.getPosition());
+ itr.next();
+ EXPECT_TRUE(!itr.valid());
+ }
+ }
+ EXPECT_TRUE(!search->seek(7));
+ if (!strict) {
+ EXPECT_EQUAL(5u, search->getDocId());
+ EXPECT_TRUE(search->seek(10u));
+ }
+ EXPECT_EQUAL(10u, search->getDocId());
+ { // test doc 10 results
+ search->unpack(10u);
+ EXPECT_EQUAL(5u, md->resolveTermField(1)->getDocId()); // no match
+ {
+ TermFieldMatchData &data = *md->resolveTermField(2);
+ EXPECT_EQUAL(2u, data.getFieldId());
+ EXPECT_EQUAL(10u, data.getDocId());
+ FieldPositionsIterator itr = data.getIterator();
+ EXPECT_EQUAL(1u, itr.size());
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(4u, itr.getPosition());
+ itr.next();
+ EXPECT_TRUE(!itr.valid());
+ }
+ }
+ EXPECT_TRUE(!search->seek(13));
+ if (strict) {
+ EXPECT_TRUE(search->isAtEnd());
+ } else {
+ EXPECT_EQUAL(10u, search->getDocId());
+ }
+ }
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("equiv_test");
+ testEquiv();
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/queryeval/fake_searchable/.cvsignore b/searchlib/src/tests/queryeval/fake_searchable/.cvsignore
new file mode 100644
index 00000000000..56a6e2188be
--- /dev/null
+++ b/searchlib/src/tests/queryeval/fake_searchable/.cvsignore
@@ -0,0 +1,3 @@
+.depend
+Makefile
+fake_searchable_test
diff --git a/searchlib/src/tests/queryeval/fake_searchable/.gitignore b/searchlib/src/tests/queryeval/fake_searchable/.gitignore
new file mode 100644
index 00000000000..42b48509660
--- /dev/null
+++ b/searchlib/src/tests/queryeval/fake_searchable/.gitignore
@@ -0,0 +1,4 @@
+*_test
+.depend
+Makefile
+searchlib_fake_searchable_test_app
diff --git a/searchlib/src/tests/queryeval/fake_searchable/CMakeLists.txt b/searchlib/src/tests/queryeval/fake_searchable/CMakeLists.txt
new file mode 100644
index 00000000000..75eaae7d9ed
--- /dev/null
+++ b/searchlib/src/tests/queryeval/fake_searchable/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_fake_searchable_test_app
+ SOURCES
+ fake_searchable_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_fake_searchable_test_app COMMAND searchlib_fake_searchable_test_app)
diff --git a/searchlib/src/tests/queryeval/fake_searchable/DESC b/searchlib/src/tests/queryeval/fake_searchable/DESC
new file mode 100644
index 00000000000..75ce65796f0
--- /dev/null
+++ b/searchlib/src/tests/queryeval/fake_searchable/DESC
@@ -0,0 +1 @@
+fake_searchable test. Take a look at fake_searchable_test.cpp for details.
diff --git a/searchlib/src/tests/queryeval/fake_searchable/FILES b/searchlib/src/tests/queryeval/fake_searchable/FILES
new file mode 100644
index 00000000000..b02a791e332
--- /dev/null
+++ b/searchlib/src/tests/queryeval/fake_searchable/FILES
@@ -0,0 +1 @@
+fake_searchable_test.cpp
diff --git a/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp b/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp
new file mode 100644
index 00000000000..c3ff31625d3
--- /dev/null
+++ b/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp
@@ -0,0 +1,379 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("fake_searchable_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/searchlib/queryeval/fake_searchable.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <vespa/searchlib/query/tree/intermediatenodes.h>
+#include <vespa/searchlib/query/tree/termnodes.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/fef/matchdata.h>
+
+using namespace search::queryeval;
+using namespace search::query;
+using namespace search::fef;
+
+class Test : public vespalib::TestApp {
+public:
+ int Main();
+ void testTestFakeResult();
+ void testTerm();
+ void testPhrase();
+ void testWeightedSet();
+ void testMultiField();
+ void testPhraseWithEmptyChild();
+private:
+ FakeRequestContext _requestContext;
+};
+
+void
+Test::testTestFakeResult()
+{
+ EXPECT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5).pos(6).elem(6).doc(6),
+ FakeResult().doc(5).elem(5).len(15).weight(5).pos(5).pos(6).elem(6).doc(6));
+
+ EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5),
+ FakeResult().doc(1).elem(5).len(15).weight(5).pos(5));
+
+ EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5),
+ FakeResult().doc(5).elem(1).len(15).weight(5).pos(5));
+
+ EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5),
+ FakeResult().doc(5).elem(5).len(19).weight(5).pos(5));
+
+ EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5),
+ FakeResult().doc(5).elem(5).len(15).weight(1).pos(5));
+
+ EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5),
+ FakeResult().doc(5).elem(5).len(15).weight(5).pos(1));
+
+ EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5),
+ FakeResult().doc(5).elem(5).len(15).weight(5).pos(5).doc(6));
+
+ EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5),
+ FakeResult().doc(5).elem(5).len(15).weight(5).pos(5).elem(6));
+
+ EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5),
+ FakeResult().doc(5).elem(5).len(15).weight(5).pos(5).pos(6));
+}
+
+void
+Test::testTerm()
+{
+ Weight w(100);
+
+ FakeSearchable source;
+ source.addResult("fieldfoo", "word1",
+ FakeResult().doc(5).pos(3));
+
+ SimpleStringTerm termNode("word1", "viewfoo", 1, w);
+
+ FieldSpecList fields;
+ fields.add(FieldSpec("fieldfoo", 1, 1));
+ Blueprint::UP bp = source.createBlueprint(_requestContext, fields, termNode);
+ for (int i = 0; i <= 1; ++i) {
+ bool strict = (i == 0);
+ TEST_STATE(strict ? "strict" : "non-strict");
+ MatchData::UP md = MatchData::makeTestInstance(0, 100, 10);
+ bp->fetchPostings(strict);
+ SearchIterator::UP search = bp->createSearch(*md, strict);
+ search->initFullRange();
+
+ EXPECT_TRUE(!search->seek(3));
+ if (strict) {
+ EXPECT_EQUAL(5u, search->getDocId());
+ } else {
+ EXPECT_TRUE(search->seek(5u));
+ }
+ EXPECT_EQUAL(5u, search->getDocId());
+ { // test doc 5 results
+ search->unpack(5u);
+ {
+ TermFieldMatchData &data = *md->resolveTermField(1);
+ EXPECT_EQUAL(1u, data.getFieldId());
+ EXPECT_EQUAL(5u, data.getDocId());
+ FieldPositionsIterator itr = data.getIterator();
+ EXPECT_EQUAL(1u, itr.size());
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(3u, itr.getPosition());
+ itr.next();
+ EXPECT_TRUE(!itr.valid());
+ }
+ }
+ EXPECT_TRUE(!search->seek(13));
+ if (strict) {
+ EXPECT_TRUE(search->isAtEnd());
+ }
+ }
+}
+
+void
+Test::testPhrase()
+{
+ Weight w(100);
+
+ FakeSearchable source;
+ source.addResult("fieldfoo", "word1",
+ FakeResult().doc(3).pos(7).doc(5).pos(3));
+ source.addResult("fieldfoo", "word2",
+ FakeResult().doc(2).pos(1).doc(3).pos(10).doc(5).pos(4));
+
+ SimplePhrase phraseNode("viewfoo", 1, w);
+ phraseNode.append(Node::UP(new SimpleStringTerm("word1", "viewfoo", 2, w)));
+ phraseNode.append(Node::UP(new SimpleStringTerm("word2", "viewfoo", 3, w)));
+
+ FieldSpecList fields;
+ fields.add(FieldSpec("fieldfoo", 1, 1));
+ Blueprint::UP bp = source.createBlueprint(_requestContext, fields, phraseNode);
+ for (int i = 0; i <= 1; ++i) {
+ bool strict = (i == 0);
+ TEST_STATE(strict ? "strict" : "non-strict");
+ MatchData::UP md = MatchData::makeTestInstance(0, 100, 10);
+ bp->fetchPostings(strict);
+ SearchIterator::UP search = bp->createSearch(*md, strict);
+ search->initFullRange();
+
+ EXPECT_TRUE(!search->seek(3));
+ if (strict) {
+ EXPECT_EQUAL(5u, search->getDocId());
+ } else {
+ EXPECT_TRUE(search->seek(5u));
+ }
+ EXPECT_EQUAL(5u, search->getDocId());
+ { // test doc 5 results
+ search->unpack(5u);
+ {
+ TermFieldMatchData &data = *md->resolveTermField(1);
+ EXPECT_EQUAL(1u, data.getFieldId());
+ EXPECT_EQUAL(5u, data.getDocId());
+ FieldPositionsIterator itr = data.getIterator();
+ EXPECT_EQUAL(1u, itr.size());
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(3u, itr.getPosition());
+ itr.next();
+ EXPECT_TRUE(!itr.valid());
+ }
+ }
+ EXPECT_TRUE(!search->seek(13));
+ if (strict) {
+ EXPECT_TRUE(search->isAtEnd());
+ }
+ }
+}
+
+void
+Test::testWeightedSet()
+{
+ Weight w(100);
+
+ FakeSearchable source;
+ source.addResult("fieldfoo", "friend1",
+ FakeResult().doc(3).doc(5).doc(7).doc(9));
+ source.addResult("fieldfoo", "friend2",
+ FakeResult().doc(3).doc(4).doc(5).doc(6));
+ source.addResult("fieldfoo", "friend3",
+ FakeResult().doc(5));
+
+ SimpleWeightedSetTerm weightedSet("fieldfoo", 1, w);
+ weightedSet.append(Node::UP(new SimpleStringTerm("friend1", "fieldfoo", 2, Weight(1))));
+ weightedSet.append(Node::UP(new SimpleStringTerm("friend2", "fieldfoo", 3, Weight(2))));
+
+ FieldSpecList fields;
+ fields.add(FieldSpec("fieldfoo", 1, 1));
+ Blueprint::UP bp = source.createBlueprint(_requestContext, fields, weightedSet);
+ for (int i = 0; i <= 1; ++i) {
+ bool strict = (i == 0);
+ TEST_STATE(strict ? "strict" : "non-strict");
+ MatchData::UP md = MatchData::makeTestInstance(0, 100, 10);
+ bp->fetchPostings(strict);
+ SearchIterator::UP search = bp->createSearch(*md, strict);
+ search->initFullRange();
+
+ EXPECT_TRUE(!search->seek(2));
+ if (strict) {
+ EXPECT_EQUAL(3u, search->getDocId());
+ } else {
+ EXPECT_TRUE(search->seek(3u));
+ }
+ EXPECT_EQUAL(3u, search->getDocId());
+ { // test doc 3 results
+ search->unpack(3u);
+ {
+ TermFieldMatchData &data = *md->resolveTermField(1);
+ EXPECT_EQUAL(1u, data.getFieldId());
+ EXPECT_EQUAL(3u, data.getDocId());
+ FieldPositionsIterator itr = data.getIterator();
+ EXPECT_EQUAL(2u, itr.size());
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(2, itr.getElementWeight());
+ itr.next();
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(1, itr.getElementWeight());
+ itr.next();
+ EXPECT_TRUE(!itr.valid());
+ }
+ }
+ EXPECT_TRUE(search->seek(4));
+ EXPECT_TRUE(search->seek(5));
+ EXPECT_TRUE(search->seek(6));
+ EXPECT_TRUE(search->seek(7));
+ EXPECT_TRUE(!search->seek(8));
+ EXPECT_TRUE(search->seek(9));
+ { // test doc 9 results
+ search->unpack(9u);
+ {
+ TermFieldMatchData &data = *md->resolveTermField(1);
+ EXPECT_EQUAL(1u, data.getFieldId());
+ EXPECT_EQUAL(9u, data.getDocId());
+ FieldPositionsIterator itr = data.getIterator();
+ EXPECT_EQUAL(1u, itr.size());
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(1, itr.getElementWeight());
+ itr.next();
+ EXPECT_TRUE(!itr.valid());
+ }
+ }
+ EXPECT_TRUE(!search->seek(13));
+ if (strict) {
+ EXPECT_TRUE(search->isAtEnd());
+ }
+ }
+}
+
+void
+Test::testMultiField()
+{
+ Weight w(100);
+
+ FakeSearchable source;
+ source.addResult("fieldfoo", "word1",
+ FakeResult().doc(5).pos(3));
+ source.addResult("fieldbar", "word1",
+ FakeResult().doc(5).pos(7).doc(10).pos(2));
+
+ SimpleStringTerm termNode("word1", "viewfoobar", 1, w);
+
+ FieldSpecList fields;
+ fields.add(FieldSpec("fieldfoo", 1, 1));
+ fields.add(FieldSpec("fieldbar", 2, 2));
+ Blueprint::UP bp = source.createBlueprint(_requestContext, fields, termNode);
+ for (int i = 0; i <= 1; ++i) {
+ bool strict = (i == 0);
+ TEST_STATE(strict ? "strict" : "non-strict");
+ MatchData::UP md = MatchData::makeTestInstance(0, 100, 10);
+ bp->fetchPostings(strict);
+ SearchIterator::UP search = bp->createSearch(*md, strict);
+ search->initFullRange();
+
+ EXPECT_TRUE(!search->seek(3));
+ if (strict) {
+ EXPECT_EQUAL(5u, search->getDocId());
+ } else {
+ EXPECT_TRUE(search->seek(5u));
+ }
+ EXPECT_EQUAL(5u, search->getDocId());
+ { // test doc 5 results
+ search->unpack(5u);
+ {
+ TermFieldMatchData &data = *md->resolveTermField(1);
+ EXPECT_EQUAL(1u, data.getFieldId());
+ EXPECT_EQUAL(5u, data.getDocId());
+ FieldPositionsIterator itr = data.getIterator();
+ EXPECT_EQUAL(1u, itr.size());
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(3u, itr.getPosition());
+ itr.next();
+ EXPECT_TRUE(!itr.valid());
+ }
+ {
+ TermFieldMatchData &data = *md->resolveTermField(2);
+ EXPECT_EQUAL(2u, data.getFieldId());
+ EXPECT_EQUAL(5u, data.getDocId());
+ FieldPositionsIterator itr = data.getIterator();
+ EXPECT_EQUAL(1u, itr.size());
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(7u, itr.getPosition());
+ itr.next();
+ EXPECT_TRUE(!itr.valid());
+ }
+ }
+ EXPECT_TRUE(!search->seek(7));
+ if (strict) {
+ EXPECT_EQUAL(10u, search->getDocId());
+ } else {
+ EXPECT_TRUE(search->seek(10u));
+ }
+ EXPECT_EQUAL(10u, search->getDocId());
+ { // test doc 10 results
+ search->unpack(10u);
+ {
+ TermFieldMatchData &data = *md->resolveTermField(1);
+ EXPECT_EQUAL(1u, data.getFieldId());
+ EXPECT_NOT_EQUAL(10u, data.getDocId());
+ }
+ {
+ TermFieldMatchData &data = *md->resolveTermField(2);
+ EXPECT_EQUAL(2u, data.getFieldId());
+ EXPECT_EQUAL(10u, data.getDocId());
+ FieldPositionsIterator itr = data.getIterator();
+ EXPECT_EQUAL(1u, itr.size());
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(2u, itr.getPosition());
+ itr.next();
+ EXPECT_TRUE(!itr.valid());
+ }
+ }
+ EXPECT_TRUE(!search->seek(13));
+ if (strict) {
+ EXPECT_TRUE(search->isAtEnd());
+ }
+ }
+}
+
+void
+Test::testPhraseWithEmptyChild()
+{
+ Weight w(100);
+
+ FakeSearchable source;
+ source.addResult("fieldfoo", "word1",
+ FakeResult().doc(3).pos(7).doc(5).pos(3));
+
+ SimplePhrase phraseNode("viewfoo", 1, w);
+ phraseNode.append(Node::UP(new SimpleStringTerm("word1", "viewfoo", 2, w)));
+ phraseNode.append(Node::UP(new SimpleStringTerm("word2", "viewfoo", 3, w)));
+
+ FieldSpecList fields;
+ fields.add(FieldSpec("fieldfoo", 1, 1));
+ Blueprint::UP bp = source.createBlueprint(_requestContext, fields, phraseNode);
+ for (int i = 0; i <= 1; ++i) {
+ bool strict = (i == 0);
+ TEST_STATE(strict ? "strict" : "non-strict");
+ MatchData::UP md = MatchData::makeTestInstance(0, 100, 10);
+ bp->fetchPostings(strict);
+ SearchIterator::UP search = bp->createSearch(*md, strict);
+ search->initFullRange();
+
+ EXPECT_TRUE(!search->seek(3));
+ if (strict) {
+ EXPECT_TRUE(search->isAtEnd());
+ }
+ }
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("fake_searchable_test");
+ testTestFakeResult();
+ testTerm();
+ testPhrase();
+ testWeightedSet();
+ testMultiField();
+ testPhraseWithEmptyChild();
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/queryeval/getnodeweight/.gitignore b/searchlib/src/tests/queryeval/getnodeweight/.gitignore
new file mode 100644
index 00000000000..a050aeb9215
--- /dev/null
+++ b/searchlib/src/tests/queryeval/getnodeweight/.gitignore
@@ -0,0 +1 @@
+searchlib_getnodeweight_test_app
diff --git a/searchlib/src/tests/queryeval/getnodeweight/CMakeLists.txt b/searchlib/src/tests/queryeval/getnodeweight/CMakeLists.txt
new file mode 100644
index 00000000000..5502e2c033b
--- /dev/null
+++ b/searchlib/src/tests/queryeval/getnodeweight/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_getnodeweight_test_app
+ SOURCES
+ getnodeweight_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_getnodeweight_test_app COMMAND searchlib_getnodeweight_test_app)
diff --git a/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp b/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp
new file mode 100644
index 00000000000..949a1364061
--- /dev/null
+++ b/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("getweight_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/queryeval/get_weight_from_node.h>
+
+using namespace search::query;
+using namespace search::queryeval;
+
+class Test : public vespalib::TestApp {
+public:
+ int32_t getWeight(const Node &node);
+ int Main();
+};
+
+int32_t
+Test::getWeight(const Node &node) {
+ return getWeightFromNode(node).percent();
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("getweight_test");
+ EXPECT_EQUAL(0, getWeight(SimpleAnd()));
+ EXPECT_EQUAL(0, getWeight(SimpleAndNot()));
+ EXPECT_EQUAL(42, getWeight(SimpleEquiv(0, Weight(42))));
+ EXPECT_EQUAL(42, getWeight(SimpleNumberTerm("foo", "bar", 1, Weight(42))));
+ EXPECT_EQUAL(42, getWeight(SimpleLocationTerm(Location(), "bar", 1, Weight(42))));
+ EXPECT_EQUAL(0, getWeight(SimpleNear(5)));
+ EXPECT_EQUAL(0, getWeight(SimpleONear(5)));
+ EXPECT_EQUAL(0, getWeight(SimpleOr()));
+ EXPECT_EQUAL(42, getWeight(SimplePhrase("bar", 1, Weight(42))));
+ EXPECT_EQUAL(42, getWeight(SimplePrefixTerm("foo", "bar", 1, Weight(42))));
+ EXPECT_EQUAL(42, getWeight(SimpleRangeTerm(Range(), "bar", 1, Weight(42))));
+ EXPECT_EQUAL(0, getWeight(SimpleRank()));
+ EXPECT_EQUAL(42, getWeight(SimpleStringTerm("foo", "bar", 1, Weight(42))));
+ EXPECT_EQUAL(42, getWeight(SimpleSubstringTerm("foo", "bar", 1, Weight(42))));
+ EXPECT_EQUAL(42, getWeight(SimpleSuffixTerm("foo", "bar", 1, Weight(42))));
+ EXPECT_EQUAL(42, getWeight(SimpleWeightedSetTerm("bar", 1, Weight(42))));
+ EXPECT_EQUAL(42, getWeight(SimpleDotProduct("bar", 1, Weight(42))));
+ EXPECT_EQUAL(42, getWeight(SimpleWandTerm("bar", 1, Weight(42), 57, 67, 77.7)));
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/queryeval/monitoring_search_iterator/.gitignore b/searchlib/src/tests/queryeval/monitoring_search_iterator/.gitignore
new file mode 100644
index 00000000000..a3012152158
--- /dev/null
+++ b/searchlib/src/tests/queryeval/monitoring_search_iterator/.gitignore
@@ -0,0 +1 @@
+searchlib_monitoring_search_iterator_test_app
diff --git a/searchlib/src/tests/queryeval/monitoring_search_iterator/CMakeLists.txt b/searchlib/src/tests/queryeval/monitoring_search_iterator/CMakeLists.txt
new file mode 100644
index 00000000000..eebc9c8cf17
--- /dev/null
+++ b/searchlib/src/tests/queryeval/monitoring_search_iterator/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_monitoring_search_iterator_test_app
+ SOURCES
+ monitoring_search_iterator_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_monitoring_search_iterator_test_app COMMAND searchlib_monitoring_search_iterator_test_app)
diff --git a/searchlib/src/tests/queryeval/monitoring_search_iterator/DESC b/searchlib/src/tests/queryeval/monitoring_search_iterator/DESC
new file mode 100644
index 00000000000..1c126deb4ed
--- /dev/null
+++ b/searchlib/src/tests/queryeval/monitoring_search_iterator/DESC
@@ -0,0 +1 @@
+monitoring_search_iterator test. Take a look at monitoring_search_iterator_test.cpp for details.
diff --git a/searchlib/src/tests/queryeval/monitoring_search_iterator/FILES b/searchlib/src/tests/queryeval/monitoring_search_iterator/FILES
new file mode 100644
index 00000000000..b514a3cf512
--- /dev/null
+++ b/searchlib/src/tests/queryeval/monitoring_search_iterator/FILES
@@ -0,0 +1 @@
+monitoring_search_iterator_test.cpp
diff --git a/searchlib/src/tests/queryeval/monitoring_search_iterator/monitoring_search_iterator_test.cpp b/searchlib/src/tests/queryeval/monitoring_search_iterator/monitoring_search_iterator_test.cpp
new file mode 100644
index 00000000000..a559be21ea3
--- /dev/null
+++ b/searchlib/src/tests/queryeval/monitoring_search_iterator/monitoring_search_iterator_test.cpp
@@ -0,0 +1,325 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/queryeval/andsearch.h>
+#include <vespa/searchlib/queryeval/monitoring_search_iterator.h>
+#include <vespa/searchlib/queryeval/monitoring_dump_iterator.h>
+#include <vespa/searchlib/queryeval/simpleresult.h>
+#include <vespa/searchlib/queryeval/simplesearch.h>
+#include <vespa/searchlib/queryeval/test/searchhistory.h>
+#include <vespa/vespalib/objects/objectdumper.h>
+#include <vespa/searchlib/test/initrange.h>
+#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+
+using namespace search::queryeval;
+using namespace search::queryeval::test;
+using namespace search::fef;
+using search::BitVector;
+using search::BitVectorIterator;
+using std::make_unique;
+
+struct HistorySearchIterator : public SearchIterator
+{
+ SearchHistory _history;
+ mutable bool _getPostingInfoCalled;
+ HistorySearchIterator() : _history(), _getPostingInfoCalled(false) {}
+ virtual void doSeek(uint32_t docId) {
+ _history.seek("x", docId);
+ setDocId(docId);
+ }
+ virtual void doUnpack(uint32_t docId) { _history.unpack("x", docId); }
+ virtual const PostingInfo *getPostingInfo() const {
+ _getPostingInfoCalled = true;
+ return NULL;
+ }
+};
+
+struct SimpleFixture
+{
+ MonitoringSearchIterator _itr;
+ SimpleResult _res;
+ SimpleFixture()
+ : _itr("SimpleIterator",
+ SearchIterator::UP(new SimpleSearch(SimpleResult().addHit(2).addHit(4).addHit(8))),
+ false),
+ _res()
+ {
+ _res.search(_itr);
+ }
+};
+
+struct AdvancedFixture
+{
+ MonitoringSearchIterator _itr;
+ AdvancedFixture()
+ : _itr("AdvancedIterator",
+ SearchIterator::UP(new SimpleSearch(SimpleResult().addHit(2).addHit(4).addHit(8).
+ addHit(16).addHit(32).addHit(64).addHit(128))),
+ true)
+ {
+ }
+};
+
+struct HistoryFixture
+{
+ MonitoringSearchIterator _itr;
+ HistoryFixture()
+ : _itr("HistoryIterator", SearchIterator::UP(new HistorySearchIterator()), false)
+ {
+ }
+};
+
+struct TreeFixture
+{
+ MonitoringSearchIterator::UP _itr;
+ SimpleResult _res;
+ TreeFixture()
+ : _itr()
+ {
+ MultiSearch::Children children;
+ children.push_back(new MonitoringSearchIterator("child1",
+ SearchIterator::UP
+ (new SimpleSearch(SimpleResult().addHit(2).addHit(4).addHit(6))),
+ false));
+ children.push_back(new MonitoringSearchIterator("child2",
+ SearchIterator::UP
+ (new SimpleSearch(SimpleResult().addHit(3).addHit(4).addHit(5))),
+ false));
+ _itr.reset(new MonitoringSearchIterator("and",
+ SearchIterator::UP(AndSearch::create(children, true)),
+ false));
+ _res.search(*_itr);
+ }
+};
+
+TEST_F("require that number of seeks is collected", SimpleFixture)
+{
+ EXPECT_EQUAL(4u, f._itr.getStats().getNumSeeks());
+ EXPECT_EQUAL(4.0 / 3.0, f._itr.getStats().getNumSeeksPerUnpack());
+}
+
+TEST_F("require that number of unpacks is collected", SimpleFixture)
+{
+ EXPECT_EQUAL(3u, f._itr.getStats().getNumUnpacks());
+}
+
+TEST_F("require that docId stepping is collected (root iterator)", SimpleFixture)
+{
+ EXPECT_EQUAL(4u, f._itr.getStats().getNumDocIdSteps());
+ EXPECT_EQUAL(1, f._itr.getStats().getAvgDocIdSteps());
+}
+
+TEST_F("require that docId stepping is collected (child iterator)", AdvancedFixture)
+{
+ f._itr.seek(1); // 2 - 1
+ EXPECT_EQUAL(1u, f._itr.getStats().getNumDocIdSteps());
+ f._itr.seek(19); // 19 - 2
+ EXPECT_EQUAL(18u, f._itr.getStats().getNumDocIdSteps());
+ f._itr.seek(64); // 64 - 32
+ EXPECT_EQUAL(50u, f._itr.getStats().getNumDocIdSteps());
+ f._itr.seek(74); // 74 - 64
+ EXPECT_EQUAL(60u, f._itr.getStats().getNumDocIdSteps());
+ EXPECT_EQUAL(60 / 4, f._itr.getStats().getAvgDocIdSteps());
+}
+
+TEST_F("require that hit skipping is collected ", AdvancedFixture)
+{
+ f._itr.seek(1);
+ EXPECT_EQUAL(0u, f._itr.getStats().getNumHitSkips());
+ f._itr.seek(4);
+ EXPECT_EQUAL(0u, f._itr.getStats().getNumHitSkips());
+ f._itr.seek(16);
+ EXPECT_EQUAL(1u, f._itr.getStats().getNumHitSkips());
+ f._itr.seek(120);
+ EXPECT_EQUAL(3u, f._itr.getStats().getNumHitSkips());
+ EXPECT_EQUAL(3.0 / 4.0, f._itr.getStats().getAvgHitSkips());
+}
+
+TEST_F("require that results from underlying iterator is exposed through monitoring iterator", SimpleFixture)
+{
+ EXPECT_EQUAL(SimpleResult().addHit(2).addHit(4).addHit(8), f._res);
+}
+
+TEST_F("require that calls are forwarded to underlying iterator", HistoryFixture)
+{
+ f._itr.seek(2);
+ EXPECT_EQUAL(2u, f._itr.getDocId());
+ f._itr.unpack(2);
+ f._itr.seek(4);
+ EXPECT_EQUAL(4u, f._itr.getDocId());
+ f._itr.unpack(4);
+ f._itr.seek(8);
+ EXPECT_EQUAL(8u, f._itr.getDocId());
+ f._itr.unpack(8);
+ f._itr.getPostingInfo();
+ const HistorySearchIterator &hsi = dynamic_cast<const HistorySearchIterator &>(f._itr.getIterator());
+ EXPECT_EQUAL(SearchHistory().seek("x", 2).unpack("x", 2).seek("x", 4).unpack("x", 4).seek("x", 8).unpack("x", 8),
+ hsi._history);
+ EXPECT_TRUE(hsi._getPostingInfoCalled);
+}
+
+void
+addIterator(MonitoringSearchIterator::Dumper &d,
+ const vespalib::string &name,
+ int64_t numSeeks,
+ double avgDocIdSteps,
+ double avgHitSkips,
+ int64_t numUnpacks,
+ double numSeeksPerUnpack)
+{
+ d.openStruct("void", "search::queryeval::MonitoringSearchIterator");
+ d.visitString("iteratorName", name);
+ {
+ d.openStruct("void", "MonitoringSearchIterator::Stats");
+ d.visitInt("numSeeks", numSeeks);
+ d.visitFloat("avgDocIdSteps", avgDocIdSteps);
+ d.visitFloat("avgHitSkips", avgHitSkips);
+ d.visitInt("numUnpacks", numUnpacks);
+ d.visitFloat("numSeeksPerUnpack", numSeeksPerUnpack);
+ d.closeStruct();
+ }
+ d.closeStruct();
+}
+
+TEST("require that dumper can handle formatting on several levels")
+{
+ MonitoringSearchIterator::Dumper d(2, 6, 6, 10, 3);
+ addIterator(d, "root", 1, 1.1, 11.22, 11, 111.3);
+ {
+ d.openStruct("children", "void");
+ addIterator(d, "c.1", 222222, 2.1111, 22.2222, 222000, 222.4444);
+ {
+ d.openStruct("children", "void");
+ addIterator(d, "c.1.1", 333333, 3.1111, 33.2222, 333000, 333333.4444);
+ addIterator(d, "c.1.2", 444, 4.22, 4.33, 440, 4.44);
+ d.closeStruct();
+ }
+ addIterator(d, "c.2", 555, 5.22, 5.33, 550, 5.44);
+ {
+ d.openStruct("children", "void");
+ addIterator(d, "c.2.1", 666666, 6.1111, 66.2222, 333000, 666666.4444);
+ addIterator(d, "c.2.2", 777, 7.22, 7.33, 770, 7.44);
+ d.closeStruct();
+ }
+ d.closeStruct();
+ }
+ EXPECT_EQUAL(
+ "root: 1 seeks, 1.100 steps/seek, 11.220 skips/seek, 11 unpacks, 111.300 seeks/unpack\n"
+ " c.1: 222222 seeks, 2.111 steps/seek, 22.222 skips/seek, 222000 unpacks, 222.444 seeks/unpack\n"
+ " c.1.1: 333333 seeks, 3.111 steps/seek, 33.222 skips/seek, 333000 unpacks, 333333.444 seeks/unpack\n"
+ " c.1.2: 444 seeks, 4.220 steps/seek, 4.330 skips/seek, 440 unpacks, 4.440 seeks/unpack\n"
+ " c.2: 555 seeks, 5.220 steps/seek, 5.330 skips/seek, 550 unpacks, 5.440 seeks/unpack\n"
+ " c.2.1: 666666 seeks, 6.111 steps/seek, 66.222 skips/seek, 333000 unpacks, 666666.444 seeks/unpack\n"
+ " c.2.2: 777 seeks, 7.220 steps/seek, 7.330 skips/seek, 770 unpacks, 7.440 seeks/unpack\n",
+ d.toString());
+}
+
+TEST_F("require that single iterator can be dumped compact", AdvancedFixture)
+{
+ f._itr.seek(6);
+ f._itr.seek(16);
+ f._itr.unpack(16);
+ MonitoringSearchIterator::Dumper dumper;
+ visit(dumper, "", f._itr);
+ EXPECT_EQUAL("AdvancedIterator: 2 seeks, 7.00 steps/seek, 1.00 skips/seek, 1 unpacks, 2.00 seeks/unpack\n",
+ dumper.toString());
+}
+
+TEST_F("require that iterator tree can be dumped compact", TreeFixture)
+{
+ MonitoringSearchIterator::Dumper dumper;
+ visit(dumper, "", f._itr.get());
+ EXPECT_EQUAL("and: 2 seeks, 1.00 steps/seek, 0.00 skips/seek, 1 unpacks, 2.00 seeks/unpack\n"
+ " child1: 3 seeks, 1.00 steps/seek, 0.00 skips/seek, 1 unpacks, 3.00 seeks/unpack\n"
+ " child2: 3 seeks, 1.67 steps/seek, 0.00 skips/seek, 1 unpacks, 3.00 seeks/unpack\n",
+ dumper.toString());
+}
+
+TEST_F("require that single iterator can be dumped verbosely", AdvancedFixture)
+{
+ f._itr.seek(6);
+ f._itr.seek(16);
+ f._itr.unpack(16);
+ vespalib::ObjectDumper dumper;
+ visit(dumper, "", &f._itr);
+ EXPECT_EQUAL("search::queryeval::MonitoringSearchIterator {\n"
+ " iteratorName: 'AdvancedIterator'\n"
+ " iteratorType: 'search::queryeval::SimpleSearch'\n"
+ " stats: MonitoringSearchIterator::Stats {\n"
+ " numSeeks: 2\n"
+ " numDocIdSteps: 14\n"
+ " avgDocIdSteps: 7\n"
+ " numHitSkips: 2\n"
+ " avgHitSkips: 1\n"
+ " numUnpacks: 1\n"
+ " numSeeksPerUnpack: 2\n"
+ " }\n"
+ " tag: '<null>'\n"
+ "}\n",
+ dumper.toString());
+}
+
+TEST_F("require that iterator tree can be dumped verbosely", TreeFixture)
+{
+ vespalib::ObjectDumper dumper;
+ visit(dumper, "", f._itr.get());
+ EXPECT_EQUAL("search::queryeval::MonitoringSearchIterator {\n"
+ " iteratorName: 'and'\n"
+ " iteratorType: 'search::queryeval::AndSearchStrict<search::queryeval::(anonymous namespace)::FullUnpack>'\n"
+ " stats: MonitoringSearchIterator::Stats {\n"
+ " numSeeks: 2\n"
+ " numDocIdSteps: 2\n"
+ " avgDocIdSteps: 1\n"
+ " numHitSkips: 0\n"
+ " avgHitSkips: 0\n"
+ " numUnpacks: 1\n"
+ " numSeeksPerUnpack: 2\n"
+ " }\n"
+ " children: std::vector {\n"
+ " [0]: search::queryeval::MonitoringSearchIterator {\n"
+ " iteratorName: 'child1'\n"
+ " iteratorType: 'search::queryeval::SimpleSearch'\n"
+ " stats: MonitoringSearchIterator::Stats {\n"
+ " numSeeks: 3\n"
+ " numDocIdSteps: 3\n"
+ " avgDocIdSteps: 1\n"
+ " numHitSkips: 0\n"
+ " avgHitSkips: 0\n"
+ " numUnpacks: 1\n"
+ " numSeeksPerUnpack: 3\n"
+ " }\n"
+ " tag: '<null>'\n"
+ " }\n"
+ " [1]: search::queryeval::MonitoringSearchIterator {\n"
+ " iteratorName: 'child2'\n"
+ " iteratorType: 'search::queryeval::SimpleSearch'\n"
+ " stats: MonitoringSearchIterator::Stats {\n"
+ " numSeeks: 3\n"
+ " numDocIdSteps: 5\n"
+ " avgDocIdSteps: 1.66667\n"
+ " numHitSkips: 0\n"
+ " avgHitSkips: 0\n"
+ " numUnpacks: 1\n"
+ " numSeeksPerUnpack: 3\n"
+ " }\n"
+ " tag: '<null>'\n"
+ " }\n"
+ " }\n"
+ "}\n",
+ dumper.toString());
+}
+
+MonitoringSearchIterator::UP
+create(SearchIterator::UP child) {
+ return make_unique<MonitoringSearchIterator>("test", std::move(child), false);
+}
+
+TEST("test monitoring search iterator handles initRange accoring to spec") {
+ search::test::InitRangeVerifier ir;
+ ir.verify(*create(ir.createIterator(ir.getExpectedDocIds(), false)));
+ ir.verify(*make_unique<MonitoringDumpIterator>(create(ir.createIterator(ir.getExpectedDocIds(), false))));
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/.gitignore b/searchlib/src/tests/queryeval/multibitvectoriterator/.gitignore
new file mode 100644
index 00000000000..415cfe14f11
--- /dev/null
+++ b/searchlib/src/tests/queryeval/multibitvectoriterator/.gitignore
@@ -0,0 +1,2 @@
+searchlib_multibitvectoriterator_test_app
+searchlib_multibitvectoriterator_bench_app
diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/CMakeLists.txt b/searchlib/src/tests/queryeval/multibitvectoriterator/CMakeLists.txt
new file mode 100644
index 00000000000..1bac095225f
--- /dev/null
+++ b/searchlib/src/tests/queryeval/multibitvectoriterator/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_multibitvectoriterator_test_app
+ SOURCES
+ multibitvectoriterator_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_multibitvectoriterator_test_app COMMAND searchlib_multibitvectoriterator_test_app)
+vespa_add_executable(searchlib_multibitvectoriterator_bench_app
+ SOURCES
+ multibitvectoriterator_bench.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_multibitvectoriterator_bench_app COMMAND searchlib_multibitvectoriterator_bench_app and no no 10 100000000 50 50 50 BENCHMARK)
diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/DESC b/searchlib/src/tests/queryeval/multibitvectoriterator/DESC
new file mode 100644
index 00000000000..96fc26f5950
--- /dev/null
+++ b/searchlib/src/tests/queryeval/multibitvectoriterator/DESC
@@ -0,0 +1 @@
+multibitvectoriterator test. Take a look at multibitvectoriterator_test.cpp for details.
diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/FILES b/searchlib/src/tests/queryeval/multibitvectoriterator/FILES
new file mode 100644
index 00000000000..7ae4331d090
--- /dev/null
+++ b/searchlib/src/tests/queryeval/multibitvectoriterator/FILES
@@ -0,0 +1,2 @@
+multibitvectoriterator_test.cpp
+multibitvectoriterator_bench.cpp
diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_bench.cpp b/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_bench.cpp
new file mode 100644
index 00000000000..8912be56351
--- /dev/null
+++ b/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_bench.cpp
@@ -0,0 +1,138 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("multibitvectoriterator_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/queryeval/multibitvectoriterator.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/searchlib/queryeval/andsearch.h>
+#include <vespa/searchlib/queryeval/orsearch.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+
+using namespace search::queryeval;
+using namespace search::fef;
+using namespace search;
+
+//-----------------------------------------------------------------------------
+
+class Test : public vespalib::TestApp
+{
+public:
+ void benchmark();
+ int Main();
+ template <typename T>
+ void testSearch(bool strict);
+private:
+ void searchAndCompare(SearchIterator::UP s, uint32_t docIdLimit);
+ void setup();
+ std::vector< BitVector::UP > _bvs;
+ uint32_t _numSearch;
+ uint32_t _numDocs;
+ bool _strict;
+ bool _optimize;
+ vespalib::string _type;
+ std::vector<int> _fillLimits;
+};
+
+void Test::setup()
+{
+ for(size_t i(0); i < _fillLimits.size(); i++) {
+ _bvs.push_back(BitVector::create(_numDocs));
+ BitVector & bv(*_bvs.back());
+ for (size_t j(0); j < bv.size(); j++) {
+ int r = rand();
+ if (r < _fillLimits[i]) {
+ bv.setBit(j);
+ }
+ }
+ bv.invalidateCachedCount();
+ LOG(info, "Filled bitvector %ld with %d bits", i, bv.countTrueBits());
+ }
+}
+
+typedef std::vector<uint32_t> H;
+
+H
+seek(SearchIterator & s, uint32_t docIdLimit)
+{
+ H h;
+ for (uint32_t docId(0); docId < docIdLimit; ) {
+ if (s.seek(docId)) {
+ h.push_back(docId);
+ docId++;
+ } else {
+ if (s.getDocId() > docId) {
+ docId = s.getDocId();
+ } else {
+ docId++;
+ }
+ }
+ //printf("docId = %u\n", docId);
+ }
+ return h;
+}
+
+void
+Test::benchmark()
+{
+ if (_type == "and") {
+ LOG(info, "Testing 'and'");
+ for (size_t i(0); i < _numSearch; i++) {
+ testSearch<AndSearch>(_strict);
+ }
+ } else {
+ LOG(info, "Testing 'or'");
+ for (size_t i(0); i < _numSearch; i++) {
+ testSearch<OrSearch>(_strict);
+ }
+ }
+}
+
+template <typename T>
+void
+Test::testSearch(bool strict)
+{
+ TermFieldMatchData tfmd;
+ TermFieldMatchDataArray tfmda;
+ tfmda.add(&tfmd);
+ MultiSearch::Children andd;
+ for (size_t i(0); i < _bvs.size(); i++) {
+ andd.push_back(BitVectorIterator::create(_bvs[i].get(), tfmda, strict).release());
+ }
+ SearchIterator::UP s(T::create(andd, strict));
+ if (_optimize) {
+ LOG(info, "Optimizing iterator");
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ }
+ H h = seek(*s, _numDocs);
+ LOG(info, "Found %ld hits", h.size());
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("multibitvectoriterator_benchmark");
+ if (_argc < 6) {
+ LOG(info, "%s <'and/or'> <'strict/no-strict'> <'optimize/no-optimize> <numsearch> <numdocs> <fill 1> [<fill N>]", _argv[0]);
+ return -1;
+ }
+ _type = _argv[1];
+ _strict = _argv[2] == vespalib::string("strict");
+ _optimize = _argv[3] == vespalib::string("optimize");
+ _numSearch = strtoul(_argv[4], NULL, 0);
+ _numDocs = strtoul(_argv[5], NULL, 0);
+ for (int i(6); i < _argc; i++) {
+ _fillLimits.push_back((RAND_MAX/100) * strtoul(_argv[i], NULL, 0));
+ }
+ LOG(info, "Start setup of '%s' isearch with %ld vectors with %d documents", _type.c_str(), _fillLimits.size(), _numDocs);
+ setup();
+ LOG(info, "Start benchmark");
+ benchmark();
+ LOG(info, "Done benchmark");
+ TEST_FLUSH();
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_test.cpp b/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_test.cpp
new file mode 100644
index 00000000000..f3a25d675b2
--- /dev/null
+++ b/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_test.cpp
@@ -0,0 +1,531 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("multibitvectoriterator_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/queryeval/multibitvectoriterator.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchlib/queryeval/truesearch.h>
+#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/searchlib/queryeval/andsearch.h>
+#include <vespa/searchlib/queryeval/andnotsearch.h>
+#include <vespa/searchlib/queryeval/orsearch.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+
+using namespace search::queryeval;
+using namespace search::fef;
+using namespace search;
+
+//-----------------------------------------------------------------------------
+
+class Test : public vespalib::TestApp
+{
+public:
+ void testAndNot();
+ void testAnd();
+ void testBug7163266();
+ void testOr();
+ void testAndWith();
+ void testEndGuard();
+ template<typename T>
+ void testThatOptimizePreservesUnpack();
+ template <typename T>
+ void testOptimizeCommon(bool isAnd);
+ template <typename T>
+ void testOptimizeAndOr();
+ template <typename T>
+ void testSearch(bool strict);
+ int Main();
+private:
+ void verifySelectiveUnpack(SearchIterator & s, const TermFieldMatchData * tfmd);
+ void searchAndCompare(SearchIterator::UP s, uint32_t docIdLimit);
+ void setup();
+ std::vector< BitVector::UP > _bvs;
+};
+
+void Test::setup()
+{
+ srand(7);
+ for(size_t i(0); i < 3; i++) {
+ _bvs.push_back(BitVector::create(10000));
+ BitVector & bv(*_bvs.back());
+ for (size_t j(0); j < bv.size(); j++) {
+ int r = rand();
+ if (r & 0x1) {
+ bv.setBit(j);
+ }
+ }
+ }
+}
+
+typedef std::vector<uint32_t> H;
+
+H
+seekNoReset(SearchIterator & s, uint32_t start, uint32_t docIdLimit)
+{
+ H h;
+ for (uint32_t docId(start); docId < docIdLimit; ) {
+ if (s.seek(docId)) {
+ h.push_back(docId);
+ docId++;
+ } else {
+ if (s.getDocId() > docId) {
+ docId = s.getDocId();
+ } else {
+ docId++;
+ }
+ }
+ //printf("docId = %u\n", docId);
+ }
+ return h;
+}
+
+H
+seek(SearchIterator & s, uint32_t docIdLimit)
+{
+ s.resetRange();
+ s.initFullRange();
+ return seekNoReset(s, 1, docIdLimit);
+}
+
+void
+Test::testAndWith()
+{
+ TermFieldMatchData tfmd;
+ TermFieldMatchDataArray tfmda;
+ tfmda.add(&tfmd);
+ {
+ MultiSearch::Children children;
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release());
+
+ SearchIterator::UP s(AndSearch::create(children, false));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+
+ s->initFullRange();
+ H firstHits2 = seekNoReset(*s, 1, 130);
+ SearchIterator::UP filter(s->andWith(BitVectorIterator::create(_bvs[2].get(), tfmda, false), 9));
+ H lastHits2F = seekNoReset(*s, 130, _bvs[0]->size());
+
+ children.clear();
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release());
+ children.push_back(BitVectorIterator::create(_bvs[2].get(), tfmda, false).release());
+ s.reset(AndSearch::create(children, false));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ s->initFullRange();
+ H firstHits3 = seekNoReset(*s, 1, 130);
+ H lastHits3 = seekNoReset(*s, 130, _bvs[0]->size());
+ //These constants will change if srand(7) is changed.
+ EXPECT_EQUAL(30u, firstHits2.size());
+ EXPECT_EQUAL(19u, firstHits3.size());
+ EXPECT_EQUAL(1234u, lastHits2F.size());
+ ASSERT_EQUAL(lastHits3.size(), lastHits2F.size());
+ for (size_t i(0); i < lastHits3.size(); i++) {
+ EXPECT_EQUAL(lastHits3[i], lastHits2F[i]);
+ }
+ }
+}
+
+void
+Test::testAndNot()
+{
+ testOptimizeCommon<AndNotSearch>(false);
+ testSearch<AndNotSearch>(false);
+ testSearch<AndNotSearch>(true);
+}
+
+void
+Test::testAnd()
+{
+ testOptimizeCommon<AndSearch>(true);
+ testOptimizeAndOr<AndSearch>();
+ testSearch<AndSearch>(false);
+ testSearch<AndSearch>(true);
+}
+
+void
+Test::testOr()
+{
+ testOptimizeCommon< OrSearch >(false);
+ testOptimizeAndOr< OrSearch >();
+ testSearch<OrSearch>(false);
+ testSearch<OrSearch>(true);
+}
+
+void
+Test::testBug7163266()
+{
+ TermFieldMatchData tfmd[30];
+ TermFieldMatchDataArray tfmda[30];
+ for (size_t i(0); i < 30; i++) {
+ tfmda[i].add(&tfmd[i]);
+ }
+ _bvs[0]->setBit(1);
+ _bvs[1]->setBit(1);
+ MultiSearch::Children children;
+ UnpackInfo unpackInfo;
+ for (size_t i(0); i < 28; i++) {
+ children.push_back(new TrueSearch(tfmd[2]));
+ unpackInfo.add(i);
+ }
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda[0], false).release());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda[1], false).release());
+ SearchIterator::UP s(AndSearch::create(children, false, unpackInfo));
+ const MultiSearch * ms = dynamic_cast<const MultiSearch *>(s.get());
+ EXPECT_TRUE(ms != NULL);
+ EXPECT_EQUAL(30u, ms->getChildren().size());
+ EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::(anonymous namespace)::SelectiveUnpack>", s->getClassName());
+ for (size_t i(0); i < 28; i++) {
+ EXPECT_TRUE(ms->needUnpack(i));
+ }
+ EXPECT_FALSE(ms->needUnpack(28));
+ EXPECT_FALSE(ms->needUnpack(29));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ ms = dynamic_cast<const MultiSearch *>(s.get());
+ EXPECT_TRUE(ms != NULL);
+ EXPECT_EQUAL(29u, ms->getChildren().size());
+ EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::(anonymous namespace)::SelectiveUnpack>", s->getClassName());
+ for (size_t i(0); i < 28; i++) {
+ EXPECT_TRUE(ms->needUnpack(i));
+ }
+ EXPECT_TRUE(ms->needUnpack(28)); // NB: force unpack all
+}
+
+template<typename T>
+void
+Test::testThatOptimizePreservesUnpack()
+{
+ TermFieldMatchData tfmd[4];
+ TermFieldMatchDataArray tfmda[4];
+ for (size_t i(0); i < 4; i++) {
+ tfmda[i].add(&tfmd[i]);
+ }
+ _bvs[0]->setBit(1);
+ _bvs[1]->setBit(1);
+ _bvs[2]->setBit(1);
+ MultiSearch::Children children;
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda[0], false).release());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda[1], false).release());
+ children.push_back(new TrueSearch(tfmd[2]));
+ children.push_back(BitVectorIterator::create(_bvs[2].get(), tfmda[3], false).release());
+ UnpackInfo unpackInfo;
+ unpackInfo.add(1);
+ unpackInfo.add(2);
+ SearchIterator::UP s(T::create(children, false, unpackInfo));
+ s->initFullRange();
+ const MultiSearch * ms = dynamic_cast<const MultiSearch *>(s.get());
+ EXPECT_TRUE(ms != NULL);
+ EXPECT_EQUAL(4u, ms->getChildren().size());
+ verifySelectiveUnpack(*s, tfmd);
+ tfmd[1].resetOnlyDocId(0);
+ tfmd[2].resetOnlyDocId(0);
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ s->resetRange();
+ s->initFullRange();
+ ms = dynamic_cast<const MultiSearch *>(s.get());
+ EXPECT_TRUE(ms != NULL);
+ EXPECT_EQUAL(2u, ms->getChildren().size());
+ verifySelectiveUnpack(*s, tfmd);
+}
+
+void
+Test::verifySelectiveUnpack(SearchIterator & s, const TermFieldMatchData * tfmd)
+{
+ s.seek(1);
+ EXPECT_EQUAL(0u, tfmd[0].getDocId());
+ EXPECT_EQUAL(0u, tfmd[1].getDocId());
+ EXPECT_EQUAL(0u, tfmd[2].getDocId());
+ EXPECT_EQUAL(0u, tfmd[3].getDocId());
+ s.unpack(1);
+ EXPECT_EQUAL(0u, tfmd[0].getDocId());
+ EXPECT_EQUAL(1u, tfmd[1].getDocId());
+ EXPECT_EQUAL(1u, tfmd[2].getDocId());
+ EXPECT_EQUAL(0u, tfmd[3].getDocId());
+}
+
+void
+Test::searchAndCompare(SearchIterator::UP s, uint32_t docIdLimit)
+{
+ H a = seek(*s, docIdLimit);
+ SearchIterator * p = s.get();
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ if (s.get() != p) {
+ H b = seek(*s, docIdLimit);
+ EXPECT_FALSE(a.empty());
+ EXPECT_EQUAL(a.size(), b.size());
+ for (size_t i(0); i < a.size(); i++) {
+ EXPECT_EQUAL(a[i], b[i]);
+ }
+ }
+}
+
+template <typename T>
+void
+Test::testSearch(bool strict)
+{
+ TermFieldMatchData tfmd;
+ TermFieldMatchDataArray tfmda;
+ tfmda.add(&tfmd);
+ uint32_t docIdLimit(_bvs[0]->size());
+ {
+ MultiSearch::Children children;
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, strict).release());
+ SearchIterator::UP s(T::create(children, strict));
+ searchAndCompare(std::move(s), docIdLimit);
+ }
+ {
+ MultiSearch::Children children;
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, strict).release());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, strict).release());
+ SearchIterator::UP s(T::create(children, strict));
+ searchAndCompare(std::move(s), docIdLimit);
+ }
+ {
+ MultiSearch::Children children;
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, strict).release());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, strict).release());
+ children.push_back(BitVectorIterator::create(_bvs[2].get(), tfmda, strict).release());
+ SearchIterator::UP s(T::create(children, strict));
+ searchAndCompare(std::move(s), docIdLimit);
+ }
+}
+
+template <typename T>
+void
+Test::testOptimizeCommon(bool isAnd)
+{
+ TermFieldMatchData tfmd;
+ TermFieldMatchDataArray tfmda;
+ tfmda.add(&tfmd);
+
+ {
+ MultiSearch::Children children;
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release());
+
+ SearchIterator::UP s(T::create(children, false));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL);
+ const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s));
+ EXPECT_EQUAL(1u, m.getChildren().size());
+ EXPECT_TRUE(dynamic_cast<const BitVectorIterator *>(m.getChildren()[0]) != NULL);
+ }
+ {
+ MultiSearch::Children children;
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release());
+ children.push_back(new EmptySearch());
+
+ SearchIterator::UP s(T::create(children, false));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL);
+ const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s));
+ EXPECT_EQUAL(2u, m.getChildren().size());
+ EXPECT_TRUE(dynamic_cast<const BitVectorIterator *>(m.getChildren()[0]) != NULL);
+ EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[1]) != NULL);
+ }
+ {
+ MultiSearch::Children children;
+ children.push_back(new EmptySearch());
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release());
+
+ SearchIterator::UP s(T::create(children, false));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL);
+ const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s));
+ EXPECT_EQUAL(2u, m.getChildren().size());
+ EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[0]) != NULL);
+ EXPECT_TRUE(dynamic_cast<const BitVectorIterator *>(m.getChildren()[1]) != NULL);
+ }
+ {
+ MultiSearch::Children children;
+ children.push_back(new EmptySearch());
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release());
+
+ SearchIterator::UP s(T::create(children, false));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ EXPECT_TRUE(s.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL);
+ const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s));
+ EXPECT_EQUAL(2u, m.getChildren().size());
+ EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[0]) != NULL);
+ EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[1]) != NULL);
+ EXPECT_FALSE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[1])->isStrict());
+ }
+ {
+ MultiSearch::Children children;
+ children.push_back(new EmptySearch());
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, true).release());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release());
+
+ SearchIterator::UP s(T::create(children, false));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ EXPECT_TRUE(s.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL);
+ const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s));
+ EXPECT_EQUAL(2u, m.getChildren().size());
+ EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[0]) != NULL);
+ EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[1]) != NULL);
+ EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[1])->isStrict());
+ }
+ {
+ MultiSearch::Children children;
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release());
+
+ SearchIterator::UP s(T::create(children, false));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ SearchIterator::UP filter(s->andWith(BitVectorIterator::create(_bvs[2].get(), tfmda, false), 9));
+
+ if (isAnd) {
+ EXPECT_TRUE(nullptr == filter.get());
+ } else {
+ EXPECT_FALSE(nullptr == filter.get());
+ }
+
+ children.clear();
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release());
+ s.reset(T::create(children, true));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ filter = s->andWith(BitVectorIterator::create(_bvs[2].get(), tfmda, false), 9);
+
+ if (isAnd) {
+ EXPECT_TRUE(nullptr == filter.get());
+ } else {
+ EXPECT_FALSE(nullptr == filter.get());
+ }
+ }
+}
+
+template <typename T>
+void
+Test::testOptimizeAndOr()
+{
+ TermFieldMatchData tfmd;
+ TermFieldMatchDataArray tfmda;
+ tfmda.add(&tfmd);
+
+ {
+ MultiSearch::Children children;
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release());
+
+ SearchIterator::UP s(T::create(children, false));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ EXPECT_TRUE(s.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(s.get()) != NULL);
+ EXPECT_FALSE(dynamic_cast<const MultiBitVectorIteratorBase *>(s.get())->isStrict());
+ }
+ {
+ MultiSearch::Children children;
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release());
+ children.push_back(new EmptySearch());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release());
+
+ SearchIterator::UP s(T::create(children, false));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ EXPECT_TRUE(s.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL);
+ const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s));
+ EXPECT_EQUAL(2u, m.getChildren().size());
+ EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0]) != NULL);
+ EXPECT_FALSE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0])->isStrict());
+ EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[1]) != NULL);
+ }
+ {
+ MultiSearch::Children children;
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release());
+ children.push_back(new EmptySearch());
+
+ SearchIterator::UP s(T::create(children, false));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ EXPECT_TRUE(s.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL);
+ const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s));
+ EXPECT_EQUAL(2u, m.getChildren().size());
+ EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0]) != NULL);
+ EXPECT_FALSE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0])->isStrict());
+ EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[1]) != NULL);
+ }
+ {
+ MultiSearch::Children children;
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, true).release());
+ children.push_back(new EmptySearch());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release());
+
+ SearchIterator::UP s(T::create(children, false));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ EXPECT_TRUE(s.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL);
+ const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s));
+ EXPECT_EQUAL(2u, m.getChildren().size());
+ EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0]) != NULL);
+ EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0])->isStrict());
+ EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[1]) != NULL);
+ }
+ {
+ MultiSearch::Children children;
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, true).release());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release());
+ children.push_back(new EmptySearch());
+
+ SearchIterator::UP s(T::create(children, false));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ EXPECT_TRUE(s.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL);
+ const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s));
+ EXPECT_EQUAL(2u, m.getChildren().size());
+ EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0]) != NULL);
+ EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0])->isStrict());
+ EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[1]) != NULL);
+ }
+}
+
+void
+Test::testEndGuard()
+{
+ typedef AndSearch T;
+ TermFieldMatchData tfmd;
+ TermFieldMatchDataArray tfmda;
+ tfmda.add(&tfmd);
+
+ MultiSearch::Children children;
+ children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, true).release());
+ children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, true).release());
+ SearchIterator::UP s(T::create(children, false));
+ s = MultiBitVectorIteratorBase::optimize(std::move(s));
+ s->initFullRange();
+ EXPECT_TRUE(s.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(s.get()) != NULL);
+ MultiSearch & m(dynamic_cast<MultiSearch &>(*s));
+ EXPECT_TRUE(m.seek(0) || !m.seek(0));
+ EXPECT_TRUE(m.seek(3) || !m.seek(3));
+ EXPECT_FALSE(m.seek(_bvs[0]->size()+987));
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("multibitvectoriterator_test");
+ setup();
+ testBug7163266();
+ testThatOptimizePreservesUnpack<OrSearch>();
+ testThatOptimizePreservesUnpack<AndSearch>();
+ TEST_FLUSH();
+ testEndGuard();
+ TEST_FLUSH();
+ testAndNot();
+ TEST_FLUSH();
+ testAnd();
+ TEST_FLUSH();
+ testOr();
+ TEST_FLUSH();
+ testAndWith();
+ TEST_FLUSH();
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/.gitignore b/searchlib/src/tests/queryeval/parallel_weak_and/.gitignore
new file mode 100644
index 00000000000..0a4881f0952
--- /dev/null
+++ b/searchlib/src/tests/queryeval/parallel_weak_and/.gitignore
@@ -0,0 +1 @@
+searchlib_parallel_weak_and_test_app
diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/CMakeLists.txt b/searchlib/src/tests/queryeval/parallel_weak_and/CMakeLists.txt
new file mode 100644
index 00000000000..b76286bea65
--- /dev/null
+++ b/searchlib/src/tests/queryeval/parallel_weak_and/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_parallel_weak_and_test_app
+ SOURCES
+ parallel_weak_and_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_parallel_weak_and_test_app COMMAND searchlib_parallel_weak_and_test_app)
diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/DESC b/searchlib/src/tests/queryeval/parallel_weak_and/DESC
new file mode 100644
index 00000000000..f58343f384b
--- /dev/null
+++ b/searchlib/src/tests/queryeval/parallel_weak_and/DESC
@@ -0,0 +1 @@
+parallel_weak_and test. Take a look at parallel_weak_and_test.cpp for details.
diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/FILES b/searchlib/src/tests/queryeval/parallel_weak_and/FILES
new file mode 100644
index 00000000000..972727bfa00
--- /dev/null
+++ b/searchlib/src/tests/queryeval/parallel_weak_and/FILES
@@ -0,0 +1,2 @@
+weak_and_test.cpp
+weak_and_bench.cpp
diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp
new file mode 100644
index 00000000000..74aa052e486
--- /dev/null
+++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp
@@ -0,0 +1,681 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/queryeval/fake_searchable.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h>
+#include <vespa/searchlib/queryeval/wand/parallel_weak_and_search.h>
+#include <vespa/searchlib/queryeval/simpleresult.h>
+#include <vespa/searchlib/queryeval/test/eagerchild.h>
+#include <vespa/searchlib/queryeval/test/leafspec.h>
+#include <vespa/searchlib/queryeval/test/searchhistory.h>
+#include <vespa/searchlib/queryeval/test/wandspec.h>
+#include <vespa/searchlib/test/initrange.h>
+#include <vespa/searchlib/test/document_weight_attribute_helper.h>
+#include <vespa/searchlib/queryeval/document_weight_search_iterator.h>
+#include <vespa/searchlib/fef/fef.h>
+
+using namespace search::query;
+using namespace search::queryeval;
+using namespace search::queryeval::test;
+
+typedef search::feature_t feature_t;
+typedef wand::score_t score_t;
+typedef ParallelWeakAndSearch::MatchParams MatchParams;
+typedef ParallelWeakAndSearch::RankParams RankParams;
+using search::test::InitRangeVerifier;
+using search::test::DocumentWeightAttributeHelper;
+using search::IDocumentWeightAttribute;
+using search::fef::TermFieldMatchData;
+using search::fef::MatchData;
+using search::fef::MatchDataLayout;
+using search::fef::TermFieldHandle;
+
+
+struct Scores : public std::vector<score_t>
+{
+ Scores &add(score_t val) {
+ push_back(val);
+ return *this;
+ }
+};
+
+struct ScoresHistory : public std::vector<Scores>
+{
+ ScoresHistory &add(const Scores &s) {
+ push_back(s);
+ return *this;
+ }
+};
+
+std::ostream &operator << (std::ostream &out, const ScoresHistory &hist)
+{
+ out << "ScoresHistory:\n";
+ for (size_t i = 0; i < hist.size(); ++i) {
+ const Scores &scores = hist[i];
+ out << "[" << i << "]: ";
+ for (size_t j = 0; j < scores.size(); ++j) {
+ if (j != 0) {
+ out << ",";
+ }
+ out << scores[j];
+ }
+ out << std::endl;
+ }
+ return out;
+}
+
+struct TestHeap : public WeakAndHeap
+{
+ ScoresHistory history;
+
+ TestHeap(uint32_t scoresToTrack_) : WeakAndHeap(scoresToTrack_), history() {}
+ virtual void adjust(score_t *begin, score_t *end) {
+ Scores scores;
+ for (score_t *itr = begin; itr != end; ++itr) {
+ scores.add(*itr);
+ }
+ history.push_back(scores);
+ setMinScore(1);
+ }
+ virtual size_t size() const { return history.size(); }
+};
+
+template <typename HeapType>
+struct WandTestSpec : public WandSpec
+{
+ HeapType heap;
+ TermFieldMatchData rootMatchData;
+ MatchParams matchParams;
+
+ WandTestSpec(uint32_t scoresToTrack, uint32_t scoresAdjustFrequency = 1,
+ score_t scoreThreshold = 0, double thresholdBoostFactor = 1)
+ : WandSpec(),
+ heap(scoresToTrack),
+ rootMatchData(),
+ matchParams(heap, scoreThreshold, thresholdBoostFactor, scoresAdjustFrequency)
+ {}
+ SearchIterator *create() {
+ MatchData::UP childrenMatchData = createMatchData();
+ MatchData *tmp = childrenMatchData.get();
+ return new TrackedSearch("PWAND", getHistory(), ParallelWeakAndSearch::create(getTerms(tmp),
+ matchParams,
+ RankParams(rootMatchData,
+ std::move(childrenMatchData)),
+ true));
+ }
+};
+
+typedef WandTestSpec<TestHeap> WandSpecWithTestHeap;
+typedef WandTestSpec<SharedWeakAndPriorityQueue> WandSpecWithRealHeap;
+
+FakeResult
+doSearch(SearchIterator &sb, const TermFieldMatchData &tfmd)
+{
+ FakeResult retval;
+ sb.initFullRange();
+ for (sb.seek(1); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) {
+ sb.unpack(sb.getDocId());
+ retval.doc(sb.getDocId());
+ feature_t score = tfmd.getRawScore();
+ retval.score(score);
+ }
+ return retval;
+}
+
+SimpleResult
+asSimpleResult(const FakeResult &result)
+{
+ SimpleResult retval;
+ for (size_t i = 0; i < result.inspect().size(); ++i) {
+ retval.addHit(result.inspect()[i].docId);
+ }
+ return retval;
+}
+
+struct WandBlueprintSpec
+{
+ static const uint32_t fieldId = 0;
+ static const TermFieldHandle handle = 0;
+ std::vector<std::pair<std::string, int32_t> > tokens;
+ uint32_t docIdLimit = 0;
+ FakeRequestContext requestContext;
+
+ WandBlueprintSpec &add(const std::string &token, int32_t weight) {
+ tokens.push_back(std::make_pair(token, weight));
+ return *this;
+ }
+
+ Node::UP createNode(uint32_t scoresToTrack = 100,
+ score_t scoreThreshold = 0,
+ double thresholdBoostFactor = 1) const {
+ SimpleWandTerm *node = new SimpleWandTerm("view", 0, Weight(0),
+ scoresToTrack, scoreThreshold, thresholdBoostFactor);
+ for (size_t i = 0; i < tokens.size(); ++i) {
+ node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0,
+ Weight(tokens[i].second))));
+ }
+ return Node::UP(node);
+ }
+
+ Blueprint::UP blueprint(Searchable &searchable, const std::string &field, const search::query::Node &term) const {
+ FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle));
+ Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, term);
+ EXPECT_TRUE(dynamic_cast<ParallelWeakAndBlueprint*>(bp.get()) != 0);
+ return bp;
+ }
+
+ SearchIterator::UP iterator(Searchable &searchable, const std::string &field) const {
+ Node::UP term = createNode();
+ Blueprint::UP bp = blueprint(searchable, field, *term);
+ MatchData::UP md(MatchData::makeTestInstance(0, 1, 1));
+ bp->fetchPostings(true);
+ bp->setDocIdLimit(docIdLimit);
+ SearchIterator::UP sb = bp->createSearch(*md, true);
+ EXPECT_TRUE(dynamic_cast<ParallelWeakAndSearch*>(sb.get()) != 0);
+ return sb;
+ }
+
+ FakeResult search(Searchable &searchable, const std::string &field) const {
+ Node::UP term = createNode();
+ return search(searchable, field, *term);
+ }
+
+ FakeResult search(Searchable &searchable, const std::string &field, const search::query::Node &term) const {
+ Blueprint::UP bp = blueprint(searchable, field, term);
+ MatchData::UP md(MatchData::makeTestInstance(0, 1, 1));
+ bp->fetchPostings(true);
+ bp->setDocIdLimit(docIdLimit);
+ SearchIterator::UP sb = bp->createSearch(*md, true);
+ EXPECT_TRUE(dynamic_cast<ParallelWeakAndSearch*>(sb.get()) != 0);
+ return doSearch(*sb, *md->resolveTermField(handle));
+ }
+};
+
+struct FixtureBase
+{
+ WandSpecWithRealHeap spec;
+ FakeResult result;
+ FixtureBase(uint32_t scoresToTrack,
+ uint32_t scoresAdjustFrequency,
+ score_t scoreThreshold = 0,
+ double boostFactor = 1.0)
+ : spec(scoresToTrack, scoresAdjustFrequency, scoreThreshold, boostFactor),
+ result() {}
+ void prepare() {
+ SearchIterator::UP si(spec.create());
+ result = doSearch(*si, spec.rootMatchData);
+ }
+};
+
+struct AlgoSimpleFixture : public FixtureBase
+{
+ AlgoSimpleFixture() : FixtureBase(2, 1) {
+ spec.leaf(LeafSpec("A", 1).doc(1, 1).doc(2, 2).doc(3, 3).doc(4, 4).doc(5, 5).doc(6, 6));
+ spec.leaf(LeafSpec("B", 4).doc(1, 1).doc(3, 3).doc(5, 5));
+ prepare();
+ }
+};
+
+struct AlgoAdvancedFixture : public FixtureBase
+{
+ AlgoAdvancedFixture() : FixtureBase(100, 1) {
+ spec.leaf(LeafSpec("1").doc(1, 1).doc(11, 1).doc(111, 1));
+ spec.leaf(LeafSpec("2").doc(2, 1).doc(12, 1).doc(112, 1));
+ spec.leaf(LeafSpec("3").doc(3, 1).doc(13, 1).doc(113, 1));
+ spec.leaf(LeafSpec("4").doc(4, 1).doc(14, 1).doc(114, 1));
+ spec.leaf(LeafSpec("5").doc(5, 1).doc(15, 1).doc(115, 1));
+ prepare();
+ }
+};
+
+struct AlgoSubsearchFixture : public FixtureBase
+{
+ AlgoSubsearchFixture() : FixtureBase(2, 1) {
+ spec.leaf(LeafSpec("A", 10).itr(new EagerChild(search::endDocId)));
+ spec.leaf(LeafSpec("B", 20).itr(new EagerChild(10)));
+ prepare();
+ }
+};
+
+struct AlgoSameScoreFixture : public FixtureBase
+{
+ AlgoSameScoreFixture() : FixtureBase(1, 1) {
+ spec.leaf(LeafSpec("A").doc(1, 1).doc(2, 1));
+ prepare();
+ }
+};
+
+struct AlgoScoreThresholdFixture : public FixtureBase
+{
+ AlgoScoreThresholdFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) {
+ spec.leaf(LeafSpec("A", 1).doc(1, 10).doc(2, 30));
+ spec.leaf(LeafSpec("B", 2).doc(1, 20).doc(3, 40));
+ prepare();
+ }
+};
+
+struct AlgoLargeScoresFixture : public FixtureBase
+{
+ AlgoLargeScoresFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) {
+ spec.leaf(LeafSpec("A", 60000).doc(1, 60000).doc(2, 70000));
+ spec.leaf(LeafSpec("B", 70000).doc(1, 80000).doc(3, 90000));
+ prepare();
+ }
+};
+
+struct AlgoExhaustPastFixture : public FixtureBase
+{
+ AlgoExhaustPastFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) {
+ spec.leaf(LeafSpec("A", 1).doc(1, 20).doc(3, 40).doc(5, 10));
+ spec.leaf(LeafSpec("B", 1).doc(5, 10));
+ spec.leaf(LeafSpec("C", 1).doc(5, 10));
+ prepare();
+ }
+};
+
+
+TEST_F("require that algorithm prunes bad hits after enough good ones are obtained", AlgoSimpleFixture)
+{
+ FakeResult expect = FakeResult()
+ .doc(1).score(1 * 1 + 4 * 1)
+ .doc(2).score(1 * 2)
+ .doc(3).score(1 * 3 + 4 * 3)
+ .doc(5).score(1 * 5 + 4 * 5);
+ EXPECT_EQUAL(expect, f.result);
+}
+
+TEST_F("require that algorithm uses subsearches as expected", AlgoSimpleFixture) {
+ EXPECT_EQUAL(SearchHistory()
+ .seek("PWAND", 1).seek("B", 1).step("B", 1).unpack("B", 1).step("PWAND", 1)
+ .unpack("PWAND", 1).seek("A", 1).step("A", 1).unpack("A", 1)
+ .seek("PWAND", 2).seek("B", 2).step("B", 3).seek("A", 2).step("A", 2).unpack("A", 2).step("PWAND", 2)
+ .unpack("PWAND", 2)
+ .seek("PWAND", 3).unpack("B", 3).step("PWAND", 3)
+ .unpack("PWAND", 3).seek("A", 3).step("A", 3).unpack("A", 3)
+ .seek("PWAND", 4).seek("B", 4).step("B", 5).seek("A", 4).step("A", 4).unpack("A", 4).unpack("B", 5).step("PWAND", 5)
+ .unpack("PWAND", 5).seek("A", 5).step("A", 5).unpack("A", 5)
+ .seek("PWAND", 6).seek("B", 6).step("B", search::endDocId).step("PWAND", search::endDocId),
+ f.spec.getHistory());
+}
+
+TEST_F("require that algorithm considers documents in the right order", AlgoAdvancedFixture)
+{
+ EXPECT_EQUAL(SimpleResult()
+ .addHit(1).addHit(2).addHit(3).addHit(4).addHit(5)
+ .addHit(11).addHit(12).addHit(13).addHit(14).addHit(15)
+ .addHit(111).addHit(112).addHit(113).addHit(114).addHit(115), asSimpleResult(f.result));
+}
+
+TEST_F("require that algorithm take initial docid for subsearches into account", AlgoSubsearchFixture)
+{
+ EXPECT_EQUAL(FakeResult().doc(10).score(20), f.result);
+ EXPECT_EQUAL(SearchHistory().seek("PWAND", 1).unpack("B", 10).step("PWAND", 10).unpack("PWAND", 10)
+ .seek("PWAND", 11).seek("B", 11).step("B", search::endDocId).step("PWAND", search::endDocId),
+ f.spec.getHistory());
+}
+
+TEST_F("require that algorithm uses first match when two matches have same score", AlgoSameScoreFixture)
+{
+ EXPECT_EQUAL(FakeResult().doc(1).score(100), f.result);
+}
+
+TEST_F("require that algorithm uses initial score threshold (all hits greater)", AlgoScoreThresholdFixture(29))
+{
+ EXPECT_EQUAL(FakeResult()
+ .doc(1).score(1 * 10 + 2 * 20)
+ .doc(2).score(1 * 30)
+ .doc(3).score(2 * 40), f.result);
+}
+
+TEST_F("require that algorithm uses initial score threshold (2 hits greater)", AlgoScoreThresholdFixture(30))
+{
+ EXPECT_EQUAL(FakeResult()
+ .doc(1).score(1 * 10 + 2 * 20)
+ .doc(3).score(2 * 40), f.result);
+}
+
+TEST_F("require that algorithm uses initial score threshold (1 hit greater)", AlgoScoreThresholdFixture(50))
+{
+ EXPECT_EQUAL(FakeResult()
+ .doc(3).score(2 * 40), f.result);
+}
+
+TEST_F("require that algorithm uses initial score threshold (0 hits greater)", AlgoScoreThresholdFixture(80))
+{
+ EXPECT_EQUAL(FakeResult(), f.result);
+}
+
+TEST_F("require that algorithm handle large scores", AlgoLargeScoresFixture(60000L * 70000L))
+{
+ EXPECT_EQUAL(FakeResult()
+ .doc(1).score(60000L * 60000L + 70000L * 80000L)
+ .doc(3).score(70000L * 90000L), f.result);
+}
+
+TEST_F("require that algorithm steps all present terms when past is empty", AlgoExhaustPastFixture(25))
+{
+ EXPECT_EQUAL(FakeResult()
+ .doc(3).score(40)
+ .doc(5).score(30), f.result);
+}
+
+struct HeapFixture
+{
+ WandSpecWithTestHeap spec;
+ SimpleResult result;
+ HeapFixture() : spec(2, 2), result() {
+ spec.leaf(LeafSpec("A", 1).doc(1, 1).doc(2, 2).doc(3, 3).doc(4, 4).doc(5, 5).doc(6, 6));
+ SearchIterator::UP sb(spec.create());
+ result.search(*sb);
+ }
+};
+
+TEST_F("require that scores are collected in batches before adjusting heap", HeapFixture)
+{
+ EXPECT_EQUAL(SimpleResult().addHit(1).addHit(2).addHit(3).addHit(4).addHit(5).addHit(6),
+ f.result);
+ EXPECT_EQUAL(ScoresHistory().add(Scores().add(1).add(2))
+ .add(Scores().add(3).add(4))
+ .add(Scores().add(5).add(6)),
+ f.spec.heap.history);
+}
+
+
+struct SearchFixture : public FixtureBase
+{
+ SearchFixture() : FixtureBase(10, 1) {
+ spec.leaf(LeafSpec("A", 1).doc(1, 10).doc(2, 30));
+ spec.leaf(LeafSpec("B", 2).doc(1, 20).doc(3, 40));
+ prepare();
+ }
+};
+
+TEST_F("require that dot product score is calculated", SearchFixture)
+{
+ FakeResult expect = FakeResult()
+ .doc(1).score(1 * 10 + 2 * 20)
+ .doc(2).score(1 * 30)
+ .doc(3).score(2 * 40);
+ EXPECT_EQUAL(expect, f.result);
+}
+
+
+struct BlueprintFixtureBase
+{
+ WandBlueprintSpec spec;
+ FakeSearchable searchable;
+ BlueprintFixtureBase() : spec(), searchable() {}
+ Blueprint::UP blueprint(const search::query::Node &term) {
+ return spec.blueprint(searchable, "field", term);
+ }
+ SearchIterator::UP iterator() {
+ return spec.iterator(searchable, "field");
+ }
+ FakeResult search(const search::query::Node &term) {
+ return spec.search(searchable, "field", term);
+ }
+ FakeResult search() {
+ return spec.search(searchable, "field");
+ }
+};
+
+struct BlueprintHitsFixture : public BlueprintFixtureBase
+{
+ FakeResult createResult(size_t hits) {
+ FakeResult result;
+ for (size_t i = 0; i < hits; ++i) {
+ result.doc(i + 1);
+ }
+ result.minMax(1, 10);
+ return result;
+ }
+ BlueprintHitsFixture(size_t hits_a, size_t hits_b, size_t docs) : BlueprintFixtureBase() {
+ spec.docIdLimit = docs + 1;
+ spec.add("A", 20).add("B", 10);
+ searchable.addResult("field", "A", createResult(hits_a));
+ searchable.addResult("field", "B", createResult(hits_b));
+ }
+ bool maxScoreFirst() {
+ SearchIterator::UP itr = iterator();
+ const ParallelWeakAndSearch *wand = dynamic_cast<ParallelWeakAndSearch*>(itr.get());
+ ASSERT_EQUAL(2u, wand->get_num_terms());
+ return (wand->get_term_weight(0) == 20);
+ }
+};
+
+struct ThresholdBoostFixture : public FixtureBase
+{
+ FakeResult result;
+ ThresholdBoostFixture(double boost) : FixtureBase(1, 1, 800, boost) {
+ spec.leaf(LeafSpec("A").doc(1, 10));
+ spec.leaf(LeafSpec("B").doc(2, 20));
+ spec.leaf(LeafSpec("C").doc(3, 30));
+ spec.leaf(LeafSpec("D").doc(4, 42));
+ SearchIterator::UP si(spec.create());
+ result = doSearch(*si, spec.rootMatchData);
+ }
+};
+
+struct BlueprintFixture : public BlueprintFixtureBase
+{
+ BlueprintFixture() : BlueprintFixtureBase() {
+ searchable.addResult("field", "A", FakeResult().doc(1).weight(10).pos(0).doc(2).weight(30).pos(0).minMax(0, 30));
+ searchable.addResult("field", "B", FakeResult().doc(1).weight(20).pos(0).doc(3).weight(40).pos(0).minMax(0, 40));
+ spec.add("A", 1).add("B", 2);
+ }
+};
+
+struct BlueprintLargeScoresFixture : public BlueprintFixtureBase
+{
+ BlueprintLargeScoresFixture() : BlueprintFixtureBase() {
+ searchable.addResult("field", "A", FakeResult().doc(1).weight(60000).pos(0).doc(2).weight(70000).pos(0).minMax(0, 70000));
+ searchable.addResult("field", "B", FakeResult().doc(1).weight(80000).pos(0).doc(3).weight(90000).pos(0).minMax(0, 90000));
+ spec.add("A", 60000).add("B", 70000);
+ }
+};
+
+struct BlueprintAsStringFixture : public BlueprintFixtureBase
+{
+ BlueprintAsStringFixture() : BlueprintFixtureBase() {
+ searchable.addResult("field", "A", FakeResult().doc(1).weight(10).pos(0).doc(2).weight(30).pos(0).minMax(0, 30));
+ spec.add("A", 5);
+ }
+};
+
+
+TEST_F("require that hit estimate is calculated", BlueprintFixture)
+{
+ Node::UP term = f.spec.createNode();
+ Blueprint::UP bp = f.blueprint(*term);
+ EXPECT_EQUAL(4u, bp->getState().estimate().estHits);
+}
+
+TEST_F("require that blueprint picks up docid limit", BlueprintFixture)
+{
+ Node::UP term = f.spec.createNode(57, 67, 77.7);
+ Blueprint::UP bp = f.blueprint(*term);
+ const ParallelWeakAndBlueprint * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get());
+ EXPECT_EQUAL(0u, pbp->get_docid_limit());
+ bp->setDocIdLimit(1000);
+ EXPECT_EQUAL(1000u, pbp->get_docid_limit());
+}
+
+TEST_F("require that scores to track, score threshold and threshold boost factor is passed down from query node to blueprint", BlueprintFixture)
+{
+ Node::UP term = f.spec.createNode(57, 67, 77.7);
+ Blueprint::UP bp = f.blueprint(*term);
+ const ParallelWeakAndBlueprint * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get());
+ EXPECT_EQUAL(57u, pbp->getScores().getScoresToTrack());
+ EXPECT_EQUAL(67u, pbp->getScoreThreshold());
+ EXPECT_EQUAL(77.7, pbp->getThresholdBoostFactor());
+}
+
+TEST_F("require that search iterator is correctly setup and executed", BlueprintFixture)
+{
+ FakeResult expect = FakeResult()
+ .doc(1).score(1 * 10 + 2 * 20)
+ .doc(2).score(1 * 30)
+ .doc(3).score(2 * 40);
+ EXPECT_EQUAL(expect, f.search());
+}
+
+TEST_F("require that initial score threshold can be specified (1 hit greater)", BlueprintFixture)
+{
+ Node::UP term = f.spec.createNode(3, 50);
+ EXPECT_EQUAL(FakeResult()
+ .doc(3).score(2 * 40), f.search(*term));
+}
+
+TEST_F("require that large scores are handled", BlueprintLargeScoresFixture)
+{
+ Node::UP term = f.spec.createNode(3, 60000L * 70000L);
+ EXPECT_EQUAL(FakeResult()
+ .doc(1).score(60000L * 60000L + 70000L * 80000L)
+ .doc(3).score(70000L * 90000L), f.search(*term));
+}
+
+TEST_F("require that docid limit is propagated to search iterator", BlueprintFixture())
+{
+ f1.spec.docIdLimit = 4050;
+ SearchIterator::UP itr = f1.iterator();
+ const ParallelWeakAndSearch *wand = dynamic_cast<ParallelWeakAndSearch*>(itr.get());
+ EXPECT_EQUAL(4050u, wand->getMatchParams().docIdLimit);
+}
+
+TEST_FFF("require that terms are sorted for maximum skipping",
+ BlueprintHitsFixture(50, 50, 100),
+ BlueprintHitsFixture(60, 50, 100),
+ BlueprintHitsFixture(80, 50, 100))
+{
+ EXPECT_TRUE(f1.maxScoreFirst());
+ EXPECT_TRUE(f2.maxScoreFirst());
+ EXPECT_FALSE(f3.maxScoreFirst());
+}
+
+TEST_FF("require that threshold boosting works as expected", ThresholdBoostFixture(1.0), ThresholdBoostFixture(2.0))
+{
+ EXPECT_EQUAL(FakeResult()
+ .doc(1).score(1000)
+ .doc(2).score(2000)
+ .doc(3).score(3000)
+ .doc(4).score(4200), f1.result);
+ EXPECT_EQUAL(FakeResult()
+ .doc(2).score(2000)
+ .doc(4).score(4200), f2.result);
+}
+
+TEST_F("require that asString() on blueprint works", BlueprintAsStringFixture)
+{
+ Node::UP term = f.spec.createNode(57, 67);
+ Blueprint::UP bp = f.blueprint(*term);
+ vespalib::string expStr = "search::queryeval::ParallelWeakAndBlueprint {\n"
+ " isTermLike: true\n"
+ " fields: FieldList {\n"
+ " [0]: Field {\n"
+ " fieldId: 0\n"
+ " handle: 0\n"
+ " isFilter: false\n"
+ " }\n"
+ " }\n"
+ " estimate: HitEstimate {\n"
+ " empty: false\n"
+ " estHits: 2\n"
+ " tree_size: 2\n"
+ " allow_termwise_eval: 0\n"
+ " }\n"
+ " sourceId: 4294967295\n"
+ " docid_limit: 0\n"
+ " _weights: std::vector {\n"
+ " [0]: 5\n"
+ " }\n"
+ " _terms: std::vector {\n"
+ " [0]: search::queryeval::FakeBlueprint {\n"
+ " isTermLike: true\n"
+ " fields: FieldList {\n"
+ " [0]: Field {\n"
+ " fieldId: 0\n"
+ " handle: 0\n"
+ " isFilter: false\n"
+ " }\n"
+ " }\n"
+ " estimate: HitEstimate {\n"
+ " empty: false\n"
+ " estHits: 2\n"
+ " tree_size: 1\n"
+ " allow_termwise_eval: 1\n"
+ " }\n"
+ " sourceId: 4294967295\n"
+ " docid_limit: 0\n"
+ " }\n"
+ " }\n"
+ "}\n";
+ EXPECT_EQUAL(expStr, bp->asString());
+}
+
+using MatchParams = ParallelWeakAndSearch::MatchParams;
+using RankParams = ParallelWeakAndSearch::RankParams;
+
+struct DummyHeap : public WeakAndHeap {
+ DummyHeap() : WeakAndHeap(9001) {}
+ void adjust(score_t *, score_t *) override {}
+};
+
+SearchIterator::UP create_wand(bool use_dwa,
+ TermFieldMatchData &tfmd,
+ const MatchParams &matchParams,
+ const std::vector<int32_t> &weights,
+ const std::vector<IDocumentWeightAttribute::LookupResult> &dict_entries,
+ const IDocumentWeightAttribute &attr,
+ bool strict)
+{
+ if (use_dwa) {
+ return ParallelWeakAndSearch::create(tfmd, matchParams, weights, dict_entries, attr, strict);
+ }
+ // use search iterators as children
+ MatchDataLayout layout;
+ std::vector<TermFieldHandle> handles;
+ for (size_t i = 0; i < weights.size(); ++i) {
+ handles.push_back(layout.allocTermField(tfmd.getFieldId()));
+ }
+ MatchData::UP childrenMatchData = layout.createMatchData();
+ assert(childrenMatchData->getNumTermFields() == dict_entries.size());
+ wand::Terms terms;
+ for (size_t i = 0; i < dict_entries.size(); ++i) {
+ terms.push_back(wand::Term(new DocumentWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]),
+ weights[i],
+ dict_entries[i].posting_size,
+ childrenMatchData->resolveTermField(handles[i])));
+ }
+ assert(terms.size() == dict_entries.size());
+ return SearchIterator::UP(ParallelWeakAndSearch::create(terms, matchParams, RankParams(tfmd, std::move(childrenMatchData)), strict));
+}
+
+TEST("verify initRange") {
+ const size_t num_children = 7;
+ InitRangeVerifier ir;
+ DocumentWeightAttributeHelper helper;
+ helper.add_docs(ir.getDocIdLimit());
+ auto full_list = ir.getExpectedDocIds();
+ for (size_t i = 0; i < full_list.size(); ++i) {
+ helper.set_doc(full_list[i], i % num_children, 1);
+ }
+ std::vector<int32_t> weights(num_children, 1);
+ for (bool use_dwa: {false, true}) {
+ for (bool strict: {false, true}) {
+ DummyHeap dummy_heap;
+ TermFieldMatchData tfmd;
+ MatchParams match_params(dummy_heap, dummy_heap.getMinScore(), 1.0, 1);
+ match_params.setDocIdLimit(ir.getDocIdLimit());
+ std::vector<IDocumentWeightAttribute::LookupResult> dict_entries;
+ for (size_t i = 0; i < num_children; ++i) {
+ dict_entries.push_back(helper.dwa().lookup(vespalib::make_string("%zu", i).c_str()));
+ }
+ auto search = create_wand(use_dwa, tfmd, match_params, weights, dict_entries, helper.dwa(), strict);
+ ir.verify(*search);
+ }
+ }
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/predicate/.gitignore b/searchlib/src/tests/queryeval/predicate/.gitignore
new file mode 100644
index 00000000000..7f94446d571
--- /dev/null
+++ b/searchlib/src/tests/queryeval/predicate/.gitignore
@@ -0,0 +1,2 @@
+searchlib_predicate_blueprint_test_app
+searchlib_predicate_search_test_app
diff --git a/searchlib/src/tests/queryeval/predicate/CMakeLists.txt b/searchlib/src/tests/queryeval/predicate/CMakeLists.txt
new file mode 100644
index 00000000000..e1c4ebf9aa8
--- /dev/null
+++ b/searchlib/src/tests/queryeval/predicate/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_predicate_blueprint_test_app
+ SOURCES
+ predicate_blueprint_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_predicate_blueprint_test_app COMMAND searchlib_predicate_blueprint_test_app)
+vespa_add_executable(searchlib_predicate_search_test_app
+ SOURCES
+ predicate_search_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_predicate_search_test_app COMMAND searchlib_predicate_search_test_app)
diff --git a/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp b/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp
new file mode 100644
index 00000000000..3b609849141
--- /dev/null
+++ b/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp
@@ -0,0 +1,241 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for predicate_blueprint.
+
+#include <vespa/log/log.h>
+LOG_SETUP("predicate_blueprint_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/predicate_attribute.h>
+#include <vespa/searchlib/predicate/predicate_tree_annotator.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/query/tree/predicate_query_term.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/weight.h>
+#include <vespa/searchlib/queryeval/field_spec.h>
+#include <vespa/searchlib/queryeval/predicate_blueprint.h>
+#include <vespa/searchlib/predicate/predicate_hash.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using namespace search;
+using namespace search::predicate;
+using search::fef::TermFieldMatchDataArray;
+using search::query::PredicateQueryTerm;
+using search::query::SimplePredicateQuery;
+using search::query::Weight;
+using search::queryeval::FieldSpecBase;
+using search::queryeval::PredicateBlueprint;
+using search::queryeval::SearchIterator;
+
+namespace {
+
+struct Fixture {
+ FieldSpecBase field;
+ AttributeVector::SP attribute;
+ vespalib::GenerationHandler generation_handler;
+ SimplePredicateQuery query;
+
+ using IntervalRange = PredicateAttribute::IntervalRange;
+
+ Fixture()
+ : field(42, 0),
+ attribute(new PredicateAttribute("f", attribute::Config(attribute::BasicType::PREDICATE))),
+ query(PredicateQueryTerm::UP(new PredicateQueryTerm),
+ "view", 0, Weight(1)) {
+ query.getTerm()->addFeature("key", "value");
+ query.getTerm()->addRangeFeature("range_key", 42);
+ }
+ PredicateAttribute & guard() {
+ return dynamic_cast<PredicateAttribute &>(*attribute);
+ }
+ PredicateIndex & index() {
+ return predicate().getIndex();
+ }
+ PredicateAttribute & predicate() { return static_cast<PredicateAttribute &>(*attribute); }
+ void resize(uint32_t doc_id) {
+ while (predicate().getNumDocs() <= doc_id) {
+ uint32_t tmp;
+ predicate().addDoc(tmp);
+ PredicateAttribute::MinFeatureHandle mfh = predicate().getMinFeatureVector();
+ const_cast<uint8_t *>(mfh.first)[tmp] = 0;
+ }
+ }
+ void setIntervalRange(uint32_t doc_id, IntervalRange interval_range) {
+ const_cast<IntervalRange *>(predicate().getIntervalRangeVector())[doc_id] = interval_range;
+ }
+ void indexEmptyDocument(uint32_t doc_id, IntervalRange ir = 0x1) {
+ resize(doc_id);
+ index().indexEmptyDocument(doc_id);
+ setIntervalRange(doc_id, ir);
+ predicate().updateMaxIntervalRange(ir);
+ predicate().commit(false);
+ }
+ void indexDocument(uint32_t doc_id, const PredicateTreeAnnotations &annotations, IntervalRange ir = 0xffff) {
+ resize(doc_id);
+ index().indexDocument(doc_id, annotations);
+ setIntervalRange(doc_id, ir);
+ predicate().updateMaxIntervalRange(ir);
+ predicate().commit(false);
+ }
+};
+
+TEST_F("require that blueprint with empty index estimates empty.", Fixture) {
+ PredicateBlueprint blueprint(f.field, f.guard(), f.query);
+ EXPECT_TRUE(blueprint.getState().estimate().empty);
+ EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits);
+}
+
+TEST_F("require that blueprint with zero-constraint doc estimates non-empty.",
+ Fixture) {
+ f.indexEmptyDocument(42);
+ PredicateBlueprint blueprint(f.field, f.guard(), f.query);
+ EXPECT_FALSE(blueprint.getState().estimate().empty);
+ EXPECT_EQUAL(1u, blueprint.getState().estimate().estHits);
+}
+
+const int min_feature = 1;
+const uint32_t doc_id = 2;
+const uint32_t interval = 0x0001ffff;
+
+TEST_F("require that blueprint with posting list entry estimates non-empty.",
+ Fixture) {
+ PredicateTreeAnnotations annotations(min_feature);
+ annotations.interval_map[PredicateHash::hash64("key=value")] =
+ std::vector<Interval>{{interval}};
+ f.indexDocument(doc_id, annotations);
+
+ PredicateBlueprint blueprint(f.field, f.guard(), f.query);
+ EXPECT_FALSE(blueprint.getState().estimate().empty);
+ EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits);
+}
+
+TEST_F("require that blueprint with 'bounds' posting list entry estimates "
+ "non-empty.", Fixture) {
+ PredicateTreeAnnotations annotations(min_feature);
+ annotations.bounds_map[PredicateHash::hash64("range_key=40")] =
+ std::vector<IntervalWithBounds>{{interval, 0x80000003}};
+ f.indexDocument(doc_id, annotations);
+
+ PredicateBlueprint blueprint(f.field, f.guard(), f.query);
+ EXPECT_FALSE(blueprint.getState().estimate().empty);
+ EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits);
+}
+
+TEST_F("require that blueprint with zstar-compressed estimates non-empty.",
+ Fixture) {
+ PredicateTreeAnnotations annotations(1);
+ annotations.interval_map[PredicateIndex::z_star_compressed_hash] =
+ std::vector<Interval>{{0xfffe0000}};
+ f.indexDocument(doc_id, annotations);
+ PredicateBlueprint blueprint(f.field, f.guard(), f.query);
+ EXPECT_FALSE(blueprint.getState().estimate().empty);
+ EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits);
+}
+
+TEST_F("require that blueprint can create search", Fixture) {
+ PredicateTreeAnnotations annotations(1);
+ annotations.interval_map[PredicateHash::hash64("key=value")] =
+ std::vector<Interval>{{interval}};
+ f.indexDocument(doc_id, annotations);
+
+ PredicateBlueprint blueprint(f.field, f.guard(), f.query);
+ blueprint.fetchPostings(true);
+ TermFieldMatchDataArray tfmda;
+ SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true);
+ ASSERT_TRUE(it.get());
+ it->initFullRange();
+ EXPECT_EQUAL(SearchIterator::beginId(), it->getDocId());
+ EXPECT_FALSE(it->seek(doc_id - 1));
+ EXPECT_EQUAL(doc_id, it->getDocId());
+ EXPECT_TRUE(it->seek(doc_id));
+ EXPECT_EQUAL(doc_id, it->getDocId());
+ EXPECT_FALSE(it->seek(doc_id + 1));
+ EXPECT_TRUE(it->isAtEnd());
+}
+
+TEST_F("require that blueprint can create more advanced search", Fixture) {
+ PredicateTreeAnnotations annotations(2);
+ annotations.interval_map[PredicateHash::hash64("key=value")] =
+ std::vector<Interval>{{0x00010001}};
+ annotations.bounds_map[PredicateHash::hash64("range_key=40")] =
+ std::vector<IntervalWithBounds>{{0x00020010, 0x40000005}}; // [40..44]
+ f.indexDocument(doc_id, annotations, 0x10);
+ f.indexEmptyDocument(doc_id + 2);
+
+ PredicateBlueprint blueprint(f.field, f.guard(), f.query);
+ blueprint.fetchPostings(true);
+ TermFieldMatchDataArray tfmda;
+ SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true);
+ ASSERT_TRUE(it.get());
+ it->initFullRange();
+ EXPECT_EQUAL(SearchIterator::beginId(), it->getDocId());
+ EXPECT_FALSE(it->seek(doc_id - 1));
+ EXPECT_EQUAL(doc_id, it->getDocId());
+ EXPECT_TRUE(it->seek(doc_id));
+ EXPECT_EQUAL(doc_id, it->getDocId());
+ EXPECT_FALSE(it->seek(doc_id + 1));
+ EXPECT_EQUAL(doc_id + 2, it->getDocId());
+ EXPECT_TRUE(it->seek(doc_id + 2));
+ EXPECT_FALSE(it->seek(doc_id + 3));
+ EXPECT_TRUE(it->isAtEnd());
+}
+
+TEST_F("require that blueprint can create NOT search", Fixture) {
+ PredicateTreeAnnotations annotations(1);
+ annotations.interval_map[PredicateIndex::z_star_hash] =
+ std::vector<Interval>{{0x00010000}, {0xffff0001}};
+ f.indexDocument(doc_id, annotations);
+
+ PredicateBlueprint blueprint(f.field, f.guard(), f.query);
+ blueprint.fetchPostings(true);
+ TermFieldMatchDataArray tfmda;
+ SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true);
+ ASSERT_TRUE(it.get());
+ it->initFullRange();
+ EXPECT_TRUE(it->seek(doc_id));
+ EXPECT_EQUAL(doc_id, it->getDocId());
+ EXPECT_FALSE(it->seek(doc_id + 1));
+}
+
+TEST_F("require that blueprint can create compressed NOT search", Fixture) {
+ PredicateTreeAnnotations annotations(1);
+ annotations.interval_map[PredicateIndex::z_star_compressed_hash] =
+ std::vector<Interval>{{0xfffe0000}};
+ f.indexDocument(doc_id, annotations);
+
+ PredicateBlueprint blueprint(f.field, f.guard(), f.query);
+ blueprint.fetchPostings(true);
+ TermFieldMatchDataArray tfmda;
+ SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true);
+ ASSERT_TRUE(it.get());
+ it->initFullRange();
+ EXPECT_TRUE(it->seek(doc_id));
+ EXPECT_EQUAL(doc_id, it->getDocId());
+ EXPECT_FALSE(it->seek(doc_id + 1));
+}
+
+TEST_F("require that blueprint can set up search with subqueries", Fixture) {
+ PredicateTreeAnnotations annotations(2);
+ annotations.interval_map[PredicateHash::hash64("key=value")] =
+ std::vector<Interval>{{0x00010001}};
+ annotations.interval_map[PredicateHash::hash64("key2=value")] =
+ std::vector<Interval>{{0x0002ffff}};
+ f.indexDocument(doc_id, annotations);
+
+ SimplePredicateQuery query(PredicateQueryTerm::UP(new PredicateQueryTerm),
+ "view", 0, Weight(1));
+ query.getTerm()->addFeature("key", "value", 1);
+ query.getTerm()->addFeature("key2", "value", 2);
+
+ PredicateBlueprint blueprint(f.field, f.guard(), query);
+ blueprint.fetchPostings(true);
+ TermFieldMatchDataArray tfmda;
+ SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true);
+ ASSERT_TRUE(it.get());
+ it->initFullRange();
+ EXPECT_FALSE(it->seek(doc_id));
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp b/searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp
new file mode 100644
index 00000000000..5954d51ec9b
--- /dev/null
+++ b/searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp
@@ -0,0 +1,370 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for predicate_search.
+
+#include <vespa/log/log.h>
+LOG_SETUP("predicate_search_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/queryeval/predicate_search.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/arraysize.h>
+
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataArray;
+using namespace search::queryeval;
+using namespace search::predicate;
+using std::pair;
+using std::vector;
+using vespalib::arraysize;
+
+namespace {
+
+class MyPostingList : public PredicatePostingList {
+ vector<pair<uint32_t, uint32_t>> _entries;
+ size_t _index;
+ uint32_t _interval;
+
+ void setInterval(uint32_t interval) { _interval = interval; }
+
+public:
+ MyPostingList(const vector<pair<uint32_t, uint32_t>> &entries)
+ : _entries(entries),
+ _index(0) {
+ }
+ MyPostingList(std::initializer_list<pair<uint32_t, uint32_t>> ilist)
+ : _entries(ilist.begin(), ilist.end()),
+ _index(0) {
+ }
+
+ bool next(uint32_t doc_id) override {
+ if (_index < _entries.size()) {
+ while (_entries[_index].first <= doc_id) {
+ ++_index;
+ if (_index == _entries.size()) {
+ setDocId(search::endDocId);
+ return false;
+ }
+ }
+ setDocId(_entries[_index].first);
+ setInterval(_entries[_index].second);
+ return true;
+ }
+ setDocId(search::endDocId);
+ return false;
+ }
+
+ bool nextInterval() override {
+ if (_index + 1 < _entries.size() &&
+ _entries[_index].first == _entries[_index + 1].first) {
+ ++_index;
+ setInterval(_entries[_index].second);
+ return true;
+ }
+ return false;
+ }
+ uint32_t getInterval() const override { return _interval; }
+};
+
+template <int N>
+vector<PredicatePostingList::UP>
+make_posting_lists_vector(MyPostingList (&plists)[N]) {
+ vector<PredicatePostingList::UP> posting_lists;
+ for (int i = 0; i < N; ++i) {
+ posting_lists.emplace_back(std::make_unique<MyPostingList>(plists[i]));
+ }
+ return posting_lists;
+}
+
+TermFieldMatchDataArray tfmda;
+typedef std::vector<uint8_t> CV;
+typedef std::vector<uint8_t> MF;
+typedef std::vector<uint16_t> IR;
+
+TEST("Require that the skipping is efficient") {
+ const uint8_t min_feature[] = { 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7};
+ const uint8_t kv[] = { 6,7,6,7,6,7,6,8,6,5,6,7,6,0,6,7,
+ 7,6,7,6,6,6,6,7,7,7,8,7,8,7,7,7,6,7};
+ SkipMinFeature::UP skip = SkipMinFeature::create(min_feature, kv, 34);
+ EXPECT_EQUAL(1u, skip->next());
+ EXPECT_EQUAL(3u, skip->next());
+ EXPECT_EQUAL(5u, skip->next());
+ EXPECT_EQUAL(7u, skip->next());
+ EXPECT_EQUAL(11u, skip->next());
+ EXPECT_EQUAL(15u, skip->next());
+ EXPECT_EQUAL(16u, skip->next());
+ EXPECT_EQUAL(18u, skip->next());
+ EXPECT_EQUAL(23u, skip->next());
+ EXPECT_EQUAL(24u, skip->next());
+ EXPECT_EQUAL(25u, skip->next());
+ EXPECT_EQUAL(26u, skip->next());
+ EXPECT_EQUAL(27u, skip->next());
+ EXPECT_EQUAL(28u, skip->next());
+ EXPECT_EQUAL(29u, skip->next());
+ EXPECT_EQUAL(30u, skip->next());
+ EXPECT_EQUAL(31u, skip->next());
+ EXPECT_EQUAL(33u, skip->next());
+}
+
+TEST("require that empty search yields no results") {
+ vector<PredicatePostingList::UP> posting_lists;
+ MF mf(3); CV cv(3); IR ir(3, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, std::move(posting_lists), tfmda);
+ search.initFullRange();
+ EXPECT_EQUAL(SearchIterator::beginId(), search.getDocId());
+ EXPECT_FALSE(search.seek(2));
+ EXPECT_TRUE(search.isAtEnd());
+}
+
+TEST("require that simple search yields result") {
+ MyPostingList plists[] = {{{2, 0x0001ffff}}};
+ MF mf{0, 0, 0};
+ CV cv{0, 0, 1};
+ IR ir(3, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_EQUAL(SearchIterator::beginId(), search.getDocId());
+ EXPECT_FALSE(search.seek(1));
+ EXPECT_EQUAL(2u, search.getDocId());
+ EXPECT_TRUE(search.seek(2));
+ EXPECT_EQUAL(2u, search.getDocId());
+ EXPECT_FALSE(search.seek(3));
+ EXPECT_TRUE(search.isAtEnd());
+}
+
+TEST("require that minFeature (K) is used to prune results") {
+ MyPostingList plists[] = {{{2, 0x0001ffff}},
+ {{5, 0x0001ffff}}};
+ MF mf{0, 0, 3, 0, 0, 0};
+ CV cv{1, 0, 0, 0, 0, 1};
+ IR ir(6, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_FALSE(search.seek(2));
+ EXPECT_EQUAL(5u, search.getDocId());
+}
+
+TEST("require that a high K (min_feature - 1) can yield results") {
+ MyPostingList plists[] = {{{2, 0x00010001}},
+ {{2, 0x0002ffff}}};
+ MF mf{0, 0, 2};
+ CV cv{0, 0, 2};
+ IR ir(3, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_TRUE(search.seek(2));
+}
+
+TEST("require that we can skip past entries") {
+ MyPostingList plists[] = {{{2, 0x0001ffff},
+ {5, 0x0001ffff}}};
+ MF mf{0, 0, 0, 0, 0, 0};
+ CV cv{0, 0, 1, 0, 0, 1};
+ IR ir(6, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_TRUE(search.seek(5));
+}
+
+TEST("require that posting lists are sorted after advancing") {
+ MyPostingList plists[] = {{{1, 0x0001ffff},
+ {5, 0x0001ffff}},
+ {{2, 0x0001ffff},
+ {4, 0x0001ffff}}};
+ MF mf{0, 2, 0, 0, 0, 0};
+ CV cv{0, 1, 1, 0, 1, 1};
+ IR ir(6, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_FALSE(search.seek(1));
+ EXPECT_FALSE(search.seek(3));
+ EXPECT_TRUE(search.seek(4));
+}
+
+TEST("require that short interval ranges works") {
+ MyPostingList plists[] = {{{1, 0x00010001},
+ {5, 0x00010001}},
+ {{2, 0x00010001},
+ {4, 0x00010001}}};
+ MF mf{0, 2, 0, 0, 0, 0};
+ CV cv{0, 1, 1, 0, 1, 1};
+ IR ir(6, 0x0001);
+ PredicateSearch search(&mf[0], &ir[0], 0x1, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_FALSE(search.seek(1));
+ EXPECT_FALSE(search.seek(3));
+ EXPECT_TRUE(search.seek(4));
+}
+
+TEST("require that empty posting lists work") {
+ MyPostingList plists[] = {{}};
+ MF mf(3); CV cv(3); IR ir(3, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_EQUAL(SearchIterator::beginId(), search.getDocId());
+ EXPECT_FALSE(search.seek(2));
+ EXPECT_TRUE(search.isAtEnd());
+}
+
+TEST("require that shorter posting list ending is ok") {
+ MyPostingList plists[] = {{{1, 0x0001ffff},
+ {2, 0x0001ffff}},
+ {{4, 0x0001ffff}}};
+ MF mf{0, 0, 0, 0, 0};
+ CV cv{0, 1, 1, 0, 1};
+ IR ir(5, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_TRUE(search.seek(1));
+ EXPECT_TRUE(search.seek(4));
+}
+
+TEST("require that sorting works for many posting lists") {
+ MyPostingList plists[] = {{{1, 0x0001ffff},
+ {2, 0x0001ffff}},
+ {{2, 0x0001ffff},
+ {4, 0x0001ffff}},
+ {{2, 0x0001ffff},
+ {5, 0x0001ffff}},
+ {{2, 0x0001ffff},
+ {4, 0x0001ffff}},
+ {{2, 0x0001ffff},
+ {5, 0x0001ffff}}};
+ MF mf{0, 1, 5, 0, 2, 2};
+ CV cv{0, 1, 5, 0, 2, 2};
+ IR ir(6, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_TRUE(search.seek(1));
+ EXPECT_TRUE(search.seek(2));
+ EXPECT_TRUE(search.seek(4));
+ EXPECT_TRUE(search.seek(5));
+}
+
+TEST("require that insufficient interval coverage prevents match") {
+ MyPostingList plists[] = {{{2, 0x00010001},
+ {3, 0x0002ffff}}};
+ MF mf{0, 0, 0, 0};
+ CV cv{0, 0, 1, 1};
+ IR ir(4, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_FALSE(search.seek(2));
+ EXPECT_FALSE(search.seek(3));
+}
+
+TEST("require that intervals are sorted") {
+ MyPostingList plists[] = {{{2, 0x00010001}},
+ {{2, 0x0003ffff}},
+ {{2, 0x00020002}}};
+ MF mf{0, 0, 0};
+ CV cv{0, 0, 3};
+ IR ir(3, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_TRUE(search.seek(2));
+}
+
+TEST("require that NOT is supported - no match") {
+ MyPostingList plists[] = {{{2, 0x00010001}}, // [l, r]
+ {{2, 0x00010000}, // [l, r]*
+ {2, 0xffff0001}}}; // [r+1, r+1]*
+ MF mf{0, 0, 0};
+ CV cv{0, 0, 3};
+ IR ir(3, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_FALSE(search.seek(2));
+}
+
+TEST("require that NOT is supported - match") {
+ MyPostingList plists[] = {{{2, 0x00010000}, // [l, r]*
+ {2, 0xffff0001}}}; // [r+1, r+1]*
+ MF mf{0, 0, 0};
+ CV cv{0, 0, 2};
+ IR ir(3, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_TRUE(search.seek(2));
+}
+
+TEST("require that NOT is supported - no match because of previous term") {
+ MyPostingList plists[] = {{{2, 0x00020001}, // [l, r]*
+ {2, 0xffff0002}}}; // [r+1, r+1]*
+ MF mf{0, 0, 0};
+ CV cv{0, 0, 2};
+ IR ir(3, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_FALSE(search.seek(2));
+}
+
+TEST("require that NOT is supported - subqueries") {
+ MyPostingList plists[] = {{{2, 0x00010001}}, // [l, r]
+ {{2, 0x00010000}, // [l, r]*
+ {2, 0xffff0001}}}; // [r+1, r+1]*
+ plists[0].setSubquery(0xffff);
+ MF mf{0, 0, 0};
+ CV cv{0, 0, 3};
+ IR ir(3, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_TRUE(search.seek(2));
+}
+
+TEST("require that there can be many intervals") {
+ MyPostingList plists[] = {{{2, 0x00010001},
+ {2, 0x00020002},
+ {2, 0x00030003},
+ {2, 0x0001ffff},
+ {2, 0x00040004},
+ {2, 0x00050005},
+ {2, 0x00060006}}};
+ MF mf{0, 0, 0};
+ CV cv{0, 0, 7};
+ IR ir(3, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_TRUE(search.seek(2));
+}
+
+TEST("require that match can require multiple postinglists.") {
+ MyPostingList plists[] = {{{2, 0x00010001}},
+ {{2, 0x0002000b},
+ {2, 0x00030003}},
+ {{2, 0x00040003}},
+ {{2, 0x00050004}},
+ {{2, 0x00010008},
+ {2, 0x00060006}},
+ {{2, 0x00020002},
+ {2, 0x0007ffff}}};
+ MF mf{0, 0, 0};
+ CV cv{0, 0, 9};
+ IR ir(3, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda);
+ search.initFullRange();
+ EXPECT_TRUE(search.seek(2));
+}
+
+TEST("require that subquery bitmap is unpacked to subqueries.") {
+ MyPostingList plists[] = {{{2, 0x0001ffff}}};
+ TermFieldMatchDataArray array;
+ TermFieldMatchData data;
+ array.add(&data);
+ MF mf{0, 0, 0};
+ CV cv{0, 0, 1};
+ IR ir(3, 0xffff);
+ PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), array);
+ search.initFullRange();
+ EXPECT_TRUE(search.seek(2));
+ search.unpack(2);
+ EXPECT_EQUAL(0xffffffffffffffffULL,
+ static_cast<unsigned long long>(data.getSubqueries()));
+}
+
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/queryeval.cpp b/searchlib/src/tests/queryeval/queryeval.cpp
new file mode 100644
index 00000000000..3a2070a1fd8
--- /dev/null
+++ b/searchlib/src/tests/queryeval/queryeval.cpp
@@ -0,0 +1,691 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+
+#include <vespa/searchlib/test/initrange.h>
+#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/searchlib/queryeval/andnotsearch.h>
+#include <vespa/searchlib/queryeval/andsearch.h>
+#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
+#include <vespa/searchlib/queryeval/nearsearch.h>
+#include <vespa/searchlib/queryeval/orsearch.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/simpleresult.h>
+#include <vespa/searchlib/queryeval/simplesearch.h>
+#include <vespa/searchlib/queryeval/ranksearch.h>
+#include <vespa/searchlib/queryeval/truesearch.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchlib/queryeval/sourceblendersearch.h>
+#include <vespa/searchlib/queryeval/leaf_blueprints.h>
+#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.hpp>
+#include <vespa/searchlib/attribute/singlesmallnumericattribute.h>
+#include <vespa/vespalib/test/insertion_operators.h>
+
+#include <vespa/searchlib/fef/fef.h>
+
+using namespace search::queryeval;
+using search::fef::MatchData;
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataArray;
+using search::BitVector;
+using search::BitVectorIterator;
+using search::test::InitRangeVerifier;
+
+//-----------------------------------------------------------------------------
+
+template <typename T, typename V=std::vector<T> >
+class Collect
+{
+private:
+ V _data;
+
+public:
+ Collect &add(const T &t) {
+ _data.push_back(t);
+ return *this;
+ }
+ operator const V &() const { return _data; }
+};
+
+SearchIterator *simple(const std::string &tag) {
+ return &((new SimpleSearch(SimpleResult()))->tag(tag));
+}
+
+Collect<SearchIterator*, MultiSearch::Children> search2(const std::string &t1, const std::string &t2) {
+ return Collect<SearchIterator*, MultiSearch::Children>().add(simple(t1)).add(simple(t2));
+}
+
+
+class ISourceSelectorDummy : public ISourceSelector
+{
+ static SourceStore _sourceStoreDummy;
+
+public:
+ static Iterator::UP
+ makeDummyIterator()
+ {
+ return Iterator::UP(new Iterator(_sourceStoreDummy));
+ }
+};
+
+ISourceSelector::SourceStore ISourceSelectorDummy::_sourceStoreDummy("foo");
+
+ISourceSelector::Iterator::UP selector() {
+ return ISourceSelectorDummy::makeDummyIterator();
+}
+
+//-----------------------------------------------------------------------------
+
+void testMultiSearch(SearchIterator & search) {
+ MultiSearch & ms = dynamic_cast<MultiSearch &>(search);
+ ms.initRange(3, 309);
+ EXPECT_EQUAL(2u, ms.getDocId());
+ EXPECT_EQUAL(309u, ms.getEndId());
+ for (const auto & child : ms.getChildren()) {
+ EXPECT_EQUAL(2u, child->getDocId());
+ EXPECT_EQUAL(309u, child->getEndId());
+ }
+}
+
+TEST("test that OR.andWith is a NOOP") {
+ TermFieldMatchData tfmd;
+ MultiSearch::Children ch;
+ ch.push_back(new TrueSearch(tfmd));
+ ch.push_back(new TrueSearch(tfmd));
+ SearchIterator::UP search(OrSearch::create(ch, true));
+ SearchIterator::UP filter(new TrueSearch(tfmd));
+
+ EXPECT_TRUE(nullptr != search->andWith(std::move(filter), 1).get());
+}
+
+TEST("test that non-strict AND.andWith is a NOOP") {
+ TermFieldMatchData tfmd;
+ MultiSearch::Children ch;
+ ch.push_back(new TrueSearch(tfmd));
+ ch.push_back(new TrueSearch(tfmd));
+ SearchIterator::UP search(AndSearch::create(ch, false));
+ SearchIterator::UP filter(new TrueSearch(tfmd));
+ filter = search->andWith(std::move(filter), 8);
+ EXPECT_TRUE(nullptr != filter.get());
+}
+
+TEST("test that strict AND.andWith steals filter and places it correctly based on estimate") {
+ TermFieldMatchData tfmd;
+ MultiSearch::Children ch;
+ ch.push_back(new TrueSearch(tfmd));
+ ch.push_back(new TrueSearch(tfmd));
+ SearchIterator::UP search(AndSearch::create(ch, true));
+ static_cast<AndSearch &>(*search).estimate(7);
+ SearchIterator::UP filter(new TrueSearch(tfmd));
+ SearchIterator * filterP = filter.get();
+
+ EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 8).get());
+ const MultiSearch::Children & andChildren = static_cast<MultiSearch &>(*search).getChildren();
+ EXPECT_EQUAL(3u, andChildren.size());
+ EXPECT_EQUAL(ch[0], andChildren[0]);
+ EXPECT_EQUAL(filterP, andChildren[1]);
+ EXPECT_EQUAL(ch[1], andChildren[2]);
+
+ SearchIterator::UP filter2(new TrueSearch(tfmd));
+ SearchIterator * filter2P = filter2.get();
+ EXPECT_TRUE(nullptr == search->andWith(std::move(filter2), 6).get());
+ EXPECT_EQUAL(4u, andChildren.size());
+ EXPECT_EQUAL(filter2P, andChildren[0]);
+ EXPECT_EQUAL(ch[0], andChildren[1]);
+ EXPECT_EQUAL(filterP, andChildren[2]);
+ EXPECT_EQUAL(ch[1], andChildren[3]);
+}
+
+class NonStrictTrueSearch : public TrueSearch
+{
+public:
+ NonStrictTrueSearch(TermFieldMatchData & tfmd) : TrueSearch(tfmd) { }
+ Trinary is_strict() const override { return Trinary::False; }
+};
+
+TEST("test that strict AND.andWith does not place non-strict iterator first") {
+ TermFieldMatchData tfmd;
+ MultiSearch::Children ch;
+ ch.push_back(new TrueSearch(tfmd));
+ ch.push_back(new TrueSearch(tfmd));
+ SearchIterator::UP search(AndSearch::create(ch, true));
+ static_cast<AndSearch &>(*search).estimate(7);
+ SearchIterator::UP filter(new NonStrictTrueSearch(tfmd));
+ SearchIterator * filterP = filter.get();
+ EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 6).get());
+ const MultiSearch::Children & andChildren = static_cast<MultiSearch &>(*search).getChildren();
+ EXPECT_EQUAL(3u, andChildren.size());
+ EXPECT_EQUAL(ch[0], andChildren[0]);
+ EXPECT_EQUAL(filterP, andChildren[1]);
+ EXPECT_EQUAL(ch[1], andChildren[2]);
+}
+
+TEST("test that strict rank search forwards to its greedy first child") {
+ TermFieldMatchData tfmd;
+ SearchIterator::UP search(
+ RankSearch::create(
+ Collect<SearchIterator*, MultiSearch::Children>()
+ .add(AndSearch::create(search2("a", "b"), true))
+ .add(new TrueSearch(tfmd)),
+ true)
+ );
+ SearchIterator::UP filter(new TrueSearch(tfmd));
+ EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 8).get());
+}
+
+TEST("test that non-strict rank search does NOT forward to its greedy first child") {
+ TermFieldMatchData tfmd;
+ SearchIterator::UP search(
+ RankSearch::create(
+ Collect<SearchIterator*, MultiSearch::Children>()
+ .add(AndSearch::create(search2("a", "b"), true))
+ .add(new TrueSearch(tfmd)),
+ false)
+ );
+ SearchIterator::UP filter(new TrueSearch(tfmd));
+ EXPECT_TRUE(nullptr != search->andWith(std::move(filter), 8).get());
+}
+
+TEST("test that strict andnot search forwards to its greedy first child") {
+ TermFieldMatchData tfmd;
+ SearchIterator::UP search(
+ AndNotSearch::create(
+ Collect<SearchIterator*, MultiSearch::Children>()
+ .add(AndSearch::create(search2("a", "b"), true))
+ .add(new TrueSearch(tfmd)),
+ true)
+ );
+ SearchIterator::UP filter(new TrueSearch(tfmd));
+ EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 8).get());
+}
+
+TEST("test that non-strict andnot search does NOT forward to its greedy first child") {
+ TermFieldMatchData tfmd;
+ SearchIterator::UP search(
+ AndNotSearch::create(
+ Collect<SearchIterator*, MultiSearch::Children>()
+ .add(AndSearch::create(search2("a", "b"), true))
+ .add(new TrueSearch(tfmd)),
+ false)
+ );
+ SearchIterator::UP filter(new TrueSearch(tfmd));
+ EXPECT_TRUE(nullptr != search->andWith(std::move(filter), 8).get());
+}
+
+TEST("testAnd") {
+ SimpleResult a;
+ SimpleResult b;
+ a.addHit(5).addHit(10).addHit(16).addHit(30);
+ b.addHit(3).addHit(5).addHit(17).addHit(30).addHit(52);
+
+ MatchData::UP md(MatchData::makeTestInstance(0, 100, 10));
+ AndBlueprint *and_b = new AndBlueprint();
+ and_b->addChild(Blueprint::UP(new SimpleBlueprint(a)));
+ and_b->addChild(Blueprint::UP(new SimpleBlueprint(b)));
+ Blueprint::UP bp(and_b);
+ bp->fetchPostings(true);
+ SearchIterator::UP and_ab = bp->createSearch(*md, true);
+
+ EXPECT_TRUE(dynamic_cast<const AndSearch *>(and_ab.get()) != nullptr);
+ EXPECT_EQUAL(4u, dynamic_cast<AndSearch &>(*and_ab).estimate());
+ SimpleResult res;
+ res.search(*and_ab);
+ SimpleResult expect;
+ expect.addHit(5).addHit(30);
+
+ EXPECT_EQUAL(res, expect);
+}
+
+TEST("mutisearch and initRange") {
+}
+
+TEST("testOr") {
+ {
+ SimpleResult a;
+ SimpleResult b;
+ a.addHit(5).addHit(10);
+ b.addHit(5).addHit(17).addHit(30);
+
+ MatchData::UP md(MatchData::makeTestInstance(0, 100, 10));
+ OrBlueprint *or_b = new OrBlueprint();
+ or_b->addChild(Blueprint::UP(new SimpleBlueprint(a)));
+ or_b->addChild(Blueprint::UP(new SimpleBlueprint(b)));
+ Blueprint::UP bp(or_b);
+ bp->fetchPostings(true);
+ SearchIterator::UP or_ab = bp->createSearch(*md, true);
+
+ SimpleResult res;
+ res.search(*or_ab);
+ SimpleResult expect;
+ expect.addHit(5).addHit(10).addHit(17).addHit(30);
+
+ EXPECT_EQUAL(res, expect);
+ }
+ {
+ TermFieldMatchData tfmd;
+ MultiSearch::Children ch;
+ ch.push_back(new TrueSearch(tfmd));
+ ch.push_back(new TrueSearch(tfmd));
+ ch.push_back(new TrueSearch(tfmd));
+ SearchIterator::UP orSearch(OrSearch::create(ch, true));
+ testMultiSearch(*orSearch);
+ }
+}
+
+class TestInsertRemoveSearch : public MultiSearch
+{
+public:
+ TestInsertRemoveSearch(const MultiSearch::Children & children) :
+ MultiSearch(children),
+ _accumRemove(0),
+ _accumInsert(0)
+ { }
+ virtual void onRemove(size_t index) { _accumRemove += index; }
+ virtual void onInsert(size_t index) { _accumInsert += index; }
+ size_t _accumRemove;
+ size_t _accumInsert;
+private:
+ virtual void doSeek(uint32_t docid) { (void) docid; }
+};
+
+TEST("testMultiSearch") {
+ MultiSearch::Children children;
+ children.push_back(new EmptySearch());
+ children.push_back(new EmptySearch());
+ children.push_back(new EmptySearch());
+ TestInsertRemoveSearch ms(children);
+ EXPECT_EQUAL(3u, ms.getChildren().size());
+ EXPECT_EQUAL(children[0], ms.getChildren()[0]);
+ EXPECT_EQUAL(children[1], ms.getChildren()[1]);
+ EXPECT_EQUAL(children[2], ms.getChildren()[2]);
+ EXPECT_EQUAL(0u, ms._accumInsert);
+ EXPECT_EQUAL(0u, ms._accumRemove);
+
+ EXPECT_EQUAL(children[1], ms.remove(1).get());
+ EXPECT_EQUAL(2u, ms.getChildren().size());
+ EXPECT_EQUAL(children[0], ms.getChildren()[0]);
+ EXPECT_EQUAL(children[2], ms.getChildren()[1]);
+ EXPECT_EQUAL(0u, ms._accumInsert);
+ EXPECT_EQUAL(1u, ms._accumRemove);
+
+ children.push_back(new EmptySearch());
+ ms.insert(1, SearchIterator::UP(children.back()));
+ EXPECT_EQUAL(3u, ms.getChildren().size());
+ EXPECT_EQUAL(children[0], ms.getChildren()[0]);
+ EXPECT_EQUAL(children[3], ms.getChildren()[1]);
+ EXPECT_EQUAL(children[2], ms.getChildren()[2]);
+ EXPECT_EQUAL(1u, ms._accumInsert);
+ EXPECT_EQUAL(1u, ms._accumRemove);
+}
+
+class DummySingleValueBitNumericAttributeBlueprint : public SimpleLeafBlueprint
+{
+public:
+ DummySingleValueBitNumericAttributeBlueprint(const SimpleResult & result) :
+ SimpleLeafBlueprint(FieldSpecBaseList()),
+ _a("a"),
+ _sc(),
+ _tfmd()
+ {
+ for (size_t i(0); i < result.getHitCount(); i++) {
+ size_t docId(result.getHit(i));
+ uint32_t curDoc(0);
+ for (_a.addDoc(curDoc); curDoc < docId; _a.addDoc(curDoc));
+ _a.update(docId, 1);
+ }
+ _a.commit();
+ _sc = _a.getSearch(search::QueryTermSimple::UP(new search::QueryTermSimple("1", search::QueryTermSimple::WORD)),
+ search::AttributeVector::SearchContext::Params().useBitVector(true));
+ }
+ virtual SearchIterator::UP
+ createLeafSearch(const TermFieldMatchDataArray &tfmda, bool strict) const
+ {
+ (void) tfmda;
+ return _sc->createIterator(&_tfmd, strict);
+ }
+private:
+ search::SingleValueBitNumericAttribute _a;
+ search::AttributeVector::SearchContext::UP _sc;
+ mutable TermFieldMatchData _tfmd;
+};
+
+
+TEST("testAndNot") {
+ {
+ SimpleResult a;
+ SimpleResult b;
+ a.addHit(5).addHit(10);
+ b.addHit(5).addHit(17).addHit(30);
+
+ MatchData::UP md(MatchData::makeTestInstance(0, 100, 10));
+ AndNotBlueprint *andnot_b = new AndNotBlueprint();
+ andnot_b->addChild(Blueprint::UP(new SimpleBlueprint(a)));
+ andnot_b->addChild(Blueprint::UP(new SimpleBlueprint(b)));
+ Blueprint::UP bp(andnot_b);
+ bp->fetchPostings(true);
+ SearchIterator::UP andnot_ab = bp->createSearch(*md, true);
+
+ SimpleResult res;
+ res.search(*andnot_ab);
+ SimpleResult expect;
+ expect.addHit(10);
+
+ EXPECT_EQUAL(res, expect);
+ }
+ {
+ SimpleResult a;
+ SimpleResult b;
+ a.addHit(1).addHit(5).addHit(10);
+ b.addHit(5).addHit(17).addHit(30);
+
+ MatchData::UP md(MatchData::makeTestInstance(2, 100, 10));
+ AndNotBlueprint *andnot_b = new AndNotBlueprint();
+ andnot_b->addChild(Blueprint::UP(new SimpleBlueprint(a)));
+ andnot_b->addChild(Blueprint::UP(new DummySingleValueBitNumericAttributeBlueprint(b)));
+ Blueprint::UP bp(andnot_b);
+ bp->fetchPostings(true);
+ SearchIterator::UP andnot_ab = bp->createSearch(*md, true);
+ EXPECT_TRUE(dynamic_cast<const OptimizedAndNotForBlackListing *>(andnot_ab.get()) != NULL);
+
+ SimpleResult res;
+ res.search(*andnot_ab);
+ SimpleResult expect;
+ expect.addHit(1).addHit(10);
+
+ EXPECT_EQUAL(res, expect);
+ }
+ {
+ SimpleResult a;
+ SimpleResult b;
+ SimpleResult c;
+ a.addHit(1).addHit(5).addHit(10);
+ b.addHit(5).addHit(17).addHit(30);
+ c.addHit(1).addHit(5).addHit(10).addHit(17).addHit(30);
+
+ MatchData::UP md(MatchData::makeTestInstance(0, 100, 10));
+ AndNotBlueprint *andnot_b = new AndNotBlueprint();
+ andnot_b->addChild(Blueprint::UP(new SimpleBlueprint(a)));
+ andnot_b->addChild(Blueprint::UP(new SimpleBlueprint(b)));
+
+ AndBlueprint *and_b = new AndBlueprint();
+ and_b->addChild(Blueprint::UP(new SimpleBlueprint(c)));
+ and_b->addChild(Blueprint::UP(andnot_b));
+ Blueprint::UP bp(and_b);
+ bp->fetchPostings(true);
+ SearchIterator::UP and_cab = bp->createSearch(*md, true);
+
+ SimpleResult res;
+ res.search(*and_cab);
+ SimpleResult expect;
+ expect.addHit(1).addHit(10);
+
+ EXPECT_EQUAL(res, expect);
+ }
+ {
+ }
+}
+
+TEST("testRank") {
+ {
+ SimpleResult a;
+ SimpleResult b;
+ a.addHit(5).addHit(10).addHit(16).addHit(30);
+ b.addHit(3).addHit(5).addHit(17).addHit(30).addHit(52);
+
+ MatchData::UP md(MatchData::makeTestInstance(0, 100, 10));
+ RankBlueprint *rank_b = new RankBlueprint();
+ rank_b->addChild(Blueprint::UP(new SimpleBlueprint(a)));
+ rank_b->addChild(Blueprint::UP(new SimpleBlueprint(b)));
+ Blueprint::UP bp(rank_b);
+ bp->fetchPostings(true);
+ SearchIterator::UP rank_ab = bp->createSearch(*md, true);
+
+ SimpleResult res;
+ res.search(*rank_ab);
+ SimpleResult expect;
+ expect.addHit(5).addHit(10).addHit(16).addHit(30);
+
+ EXPECT_EQUAL(res, expect);
+ }
+}
+
+TEST("testDump") {
+ typedef SourceBlenderSearch::Child Source;
+ SearchIterator::UP search(
+ AndSearch::create(
+ Collect<SearchIterator*, MultiSearch::Children>()
+ .add(AndNotSearch::create(search2("+", "-"), true))
+ .add(AndSearch::create(search2("and_a", "and_b"), true))
+ .add(new BooleanMatchIteratorWrapper(SearchIterator::UP(simple("wrapped")), TermFieldMatchDataArray()))
+ .add(new NearSearch(search2("near_a", "near_b"),
+ TermFieldMatchDataArray(),
+ 5u, true))
+ .add(new ONearSearch(search2("onear_a", "onear_b"),
+ TermFieldMatchDataArray(), 10, true))
+ .add(OrSearch::create(search2("or_a", "or_b"), false))
+ .add(RankSearch::create(search2("rank_a", "rank_b"),false))
+ .add(SourceBlenderSearch::create(selector(), Collect<Source, SourceBlenderSearch::Children>()
+ .add(Source(simple("blend_a"), 2))
+ .add(Source(simple("blend_b"), 4)), true))
+ , true));
+ vespalib::string sas = search->asString();
+ EXPECT_TRUE(sas.size() > 50);
+ // fprintf(stderr, "%s", search->asString().c_str());
+}
+
+TEST("testFieldSpec") {
+ EXPECT_EQUAL(8u, sizeof(FieldSpecBase));
+ EXPECT_EQUAL(72u, sizeof(FieldSpec));
+}
+
+
+const size_t unpack_child_cnt = 500;
+const size_t max_unpack_size = 31;
+const size_t max_unpack_index = 255;
+
+std::vector<size_t> vectorize(const UnpackInfo &unpack) {
+ std::vector<size_t> list;
+ unpack.each([&](size_t i){list.push_back(i);}, unpack_child_cnt);
+ return list;
+}
+
+std::vector<size_t> fill_vector(size_t begin, size_t end) {
+ std::vector<size_t> list;
+ for (size_t i = begin; i < end; ++i) {
+ list.push_back(i);
+ }
+ return list;
+}
+
+void verify_unpack(const UnpackInfo &unpack, const std::vector<size_t> &expect) {
+ std::vector<size_t> actual = vectorize(unpack);
+ EXPECT_EQUAL(unpack.empty(), expect.empty());
+ EXPECT_EQUAL(unpack.unpackAll(), (expect.size() == unpack_child_cnt));
+ EXPECT_EQUAL(expect, actual);
+ size_t child_idx = 0;
+ for (size_t next_unpack: expect) {
+ while (child_idx < next_unpack) {
+ EXPECT_FALSE(unpack.needUnpack(child_idx++));
+ }
+ EXPECT_TRUE(unpack.needUnpack(child_idx++));
+ }
+}
+
+TEST("require that unpack info has expected memory footprint") {
+ EXPECT_EQUAL(32u, sizeof(UnpackInfo));
+}
+
+TEST("require that unpack info starts out empty") {
+ verify_unpack(UnpackInfo(), {});
+}
+
+TEST("require that unpack info force all unpacks all children") {
+ verify_unpack(UnpackInfo().forceAll(), fill_vector(0, unpack_child_cnt));
+}
+
+TEST("require that adding a large index to unpack info forces unpack all") {
+ UnpackInfo unpack;
+ unpack.add(0);
+ unpack.add(max_unpack_index);
+ verify_unpack(unpack, {0, max_unpack_index});
+ unpack.add(max_unpack_index + 1);
+ verify_unpack(unpack, fill_vector(0, unpack_child_cnt));
+}
+
+TEST("require that adding too many children to unpack info forces unpack all") {
+ UnpackInfo unpack;
+ std::vector<size_t> expect;
+ for (size_t i = 0; i < max_unpack_size; ++i) {
+ unpack.add(i);
+ expect.push_back(i);
+ }
+ verify_unpack(unpack, expect);
+ unpack.add(100);
+ verify_unpack(unpack, fill_vector(0, unpack_child_cnt));
+}
+
+TEST("require that adding normal unpack info indexes works") {
+ UnpackInfo unpack;
+ unpack.add(3).add(5).add(7).add(14).add(50);
+ verify_unpack(unpack, {3,5,7,14,50});
+}
+
+TEST("require that adding unpack info indexes out of order works") {
+ UnpackInfo unpack;
+ unpack.add(5).add(3).add(7).add(50).add(14);
+ verify_unpack(unpack, {3,5,7,14,50});
+}
+
+TEST("require that basic insert remove of unpack info works") {
+ UnpackInfo unpack;
+ unpack.insert(1).insert(3);
+ verify_unpack(unpack, {1, 3});
+ unpack.insert(0);
+ verify_unpack(unpack, {0, 2, 4});
+ unpack.insert(3);
+ verify_unpack(unpack, {0, 2, 3, 5});
+ unpack.remove(1);
+ verify_unpack(unpack, {0, 1, 2, 4});
+ unpack.remove(1);
+ verify_unpack(unpack, {0, 1, 3});
+ unpack.remove(1);
+ verify_unpack(unpack, {0, 2});
+ unpack.remove(2);
+ verify_unpack(unpack, {0});
+ unpack.remove(0);
+ verify_unpack(unpack, {});
+}
+
+TEST("require that inserting too many indexs into unpack info forces unpack all") {
+ for (bool unpack_inserted: {true, false}) {
+ UnpackInfo unpack;
+ for (size_t i = 0; i < max_unpack_size; ++i) {
+ unpack.add(i);
+ }
+ EXPECT_FALSE(unpack.unpackAll());
+ unpack.insert(0, unpack_inserted);
+ if (unpack_inserted) {
+ verify_unpack(unpack, fill_vector(0, unpack_child_cnt));
+ } else {
+ verify_unpack(unpack, fill_vector(1, max_unpack_size + 1));
+ }
+ }
+}
+
+TEST("require that implicitly overflowing indexes during insert in unpack info forces unpack all") {
+ for (bool unpack_inserted: {true, false}) {
+ UnpackInfo unpack;
+ unpack.insert(max_unpack_index);
+ EXPECT_FALSE(unpack.unpackAll());
+ unpack.insert(5, unpack_inserted);
+ verify_unpack(unpack, fill_vector(0, unpack_child_cnt));
+ }
+}
+
+TEST("require that inserting a too high index into unpack info forces unpack all") {
+ for (bool unpack_inserted: {true, false}) {
+ UnpackInfo unpack;
+ for (size_t i = 0; i < 10; ++i) {
+ unpack.add(i);
+ }
+ EXPECT_FALSE(unpack.unpackAll());
+ unpack.insert(max_unpack_index + 1, unpack_inserted);
+ if (unpack_inserted) {
+ verify_unpack(unpack, fill_vector(0, unpack_child_cnt));
+ } else {
+ verify_unpack(unpack, fill_vector(0, 10));
+ }
+ }
+}
+
+TEST("require that we can insert indexes into unpack info that we do not unpack") {
+ UnpackInfo unpack;
+ unpack.add(10).add(20).add(30);
+ verify_unpack(unpack, {10, 20, 30});
+ unpack.insert(5, false).insert(15, false).insert(25, false).insert(35, false);
+ verify_unpack(unpack, {11, 22, 33});
+}
+
+TEST("testTrueSearch") {
+ EXPECT_EQUAL(16u, sizeof(EmptySearch));
+ EXPECT_EQUAL(24u, sizeof(TrueSearch));
+
+ TermFieldMatchData tfmd;
+ TrueSearch t(tfmd);
+ EXPECT_EQUAL(0u, t.getDocId());
+ EXPECT_EQUAL(0u, t.getEndId());
+ t.initRange(7, 10);
+ EXPECT_EQUAL(6u, t.getDocId());
+ EXPECT_EQUAL(10u, t.getEndId());
+ EXPECT_TRUE(t.seek(9));
+ EXPECT_EQUAL(9u, t.getDocId());
+ EXPECT_FALSE(t.isAtEnd());
+ EXPECT_TRUE(t.seek(10));
+ EXPECT_EQUAL(10u, t.getDocId());
+ EXPECT_TRUE(t.isAtEnd());
+ t.resetRange();
+ t.initRange(4, 14);
+ EXPECT_EQUAL(3u, t.getDocId());
+ EXPECT_EQUAL(14u, t.getEndId());
+ EXPECT_FALSE(t.isAtEnd());
+}
+
+TEST("test InitRangeVerifier") {
+ InitRangeVerifier ir;
+ EXPECT_EQUAL(207u, ir.getDocIdLimit());
+ EXPECT_EQUAL(41u, ir.getExpectedDocIds().size());
+ auto inverted = InitRangeVerifier::invert(ir.getExpectedDocIds(), 300);
+ size_t numInverted = 300 - 41 - 1;
+ EXPECT_EQUAL(numInverted, inverted.size());
+ EXPECT_EQUAL(2u, inverted[0]);
+ EXPECT_EQUAL(299u, inverted[numInverted - 1]);
+ ir.verify(*ir.createIterator(ir.getExpectedDocIds(), false));
+ ir.verify(*ir.createIterator(ir.getExpectedDocIds(), true));
+}
+
+TEST("Test multisearch and andsearchstrict iterators adheres to initRange") {
+ InitRangeVerifier ir;
+ ir.verify( AndSearch::create({ ir.createIterator(ir.getExpectedDocIds(), false).release(),
+ ir.createFullIterator().release() }, false));
+
+ ir.verify( AndSearch::create({ ir.createIterator(ir.getExpectedDocIds(), true).release(),
+ ir.createFullIterator().release() }, true));
+}
+
+TEST("Test andnotsearchstrict iterators adheres to initRange") {
+ InitRangeVerifier ir;
+
+ TEST_DO(ir.verify( AndNotSearch::create({ir.createIterator(ir.getExpectedDocIds(), false).release(),
+ ir.createEmptyIterator().release() }, false)));
+ TEST_DO(ir.verify( AndNotSearch::create({ir.createIterator(ir.getExpectedDocIds(), true).release(),
+ ir.createEmptyIterator().release() }, true)));
+
+ auto inverted = InitRangeVerifier::invert(ir.getExpectedDocIds(), ir.getDocIdLimit());
+ TEST_DO(ir.verify( AndNotSearch::create({ir.createFullIterator().release(),
+ ir.createIterator(inverted, false).release() }, false)));
+ TEST_DO(ir.verify( AndNotSearch::create({ir.createFullIterator().release(),
+ ir.createIterator(inverted, false).release() }, true)));
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/simple_phrase/.cvsignore b/searchlib/src/tests/queryeval/simple_phrase/.cvsignore
new file mode 100644
index 00000000000..78f4563a999
--- /dev/null
+++ b/searchlib/src/tests/queryeval/simple_phrase/.cvsignore
@@ -0,0 +1,3 @@
+.depend
+Makefile
+simple_phrase_test
diff --git a/searchlib/src/tests/queryeval/simple_phrase/.gitignore b/searchlib/src/tests/queryeval/simple_phrase/.gitignore
new file mode 100644
index 00000000000..bfdb1a61782
--- /dev/null
+++ b/searchlib/src/tests/queryeval/simple_phrase/.gitignore
@@ -0,0 +1,4 @@
+*_test
+.depend
+Makefile
+searchlib_simple_phrase_test_app
diff --git a/searchlib/src/tests/queryeval/simple_phrase/CMakeLists.txt b/searchlib/src/tests/queryeval/simple_phrase/CMakeLists.txt
new file mode 100644
index 00000000000..bc9e664a8cf
--- /dev/null
+++ b/searchlib/src/tests/queryeval/simple_phrase/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_simple_phrase_test_app
+ SOURCES
+ simple_phrase_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_simple_phrase_test_app COMMAND searchlib_simple_phrase_test_app)
diff --git a/searchlib/src/tests/queryeval/simple_phrase/DESC b/searchlib/src/tests/queryeval/simple_phrase/DESC
new file mode 100644
index 00000000000..4b1ad693a7b
--- /dev/null
+++ b/searchlib/src/tests/queryeval/simple_phrase/DESC
@@ -0,0 +1 @@
+simple_phrase test. Take a look at simple_phrase_test.cpp for details.
diff --git a/searchlib/src/tests/queryeval/simple_phrase/FILES b/searchlib/src/tests/queryeval/simple_phrase/FILES
new file mode 100644
index 00000000000..31d8e1af993
--- /dev/null
+++ b/searchlib/src/tests/queryeval/simple_phrase/FILES
@@ -0,0 +1 @@
+simple_phrase_test.cpp
diff --git a/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp b/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp
new file mode 100644
index 00000000000..f813d7203d0
--- /dev/null
+++ b/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp
@@ -0,0 +1,341 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("simple_phrase_test");
+
+#include <vespa/searchlib/queryeval/fake_result.h>
+#include <vespa/searchlib/queryeval/fake_searchable.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <vespa/searchlib/queryeval/simple_phrase_blueprint.h>
+#include <vespa/searchlib/queryeval/simple_phrase_search.h>
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/weight.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <algorithm>
+#include <string>
+#include <vector>
+
+using namespace search::queryeval;
+
+using search::fef::MatchData;
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldHandle;
+using search::query::SimpleStringTerm;
+using search::query::Weight;
+using std::unique_ptr;
+using std::copy;
+using std::string;
+using std::vector;
+
+namespace {
+
+struct MyTerm : public search::queryeval::SimpleLeafBlueprint {
+ MyTerm(const FieldSpec &field, uint32_t hits)
+ : search::queryeval::SimpleLeafBlueprint(field)
+ {
+ setEstimate(HitEstimate(hits, (hits == 0)));
+ }
+ virtual SearchIterator::UP createLeafSearch(
+ const search::fef::TermFieldMatchDataArray &, bool) const
+ {
+ return SearchIterator::UP();
+ }
+};
+
+class Test : public vespalib::TestApp {
+ void requireThatIteratorFindsSimplePhrase(bool useBlueprint);
+ void requireThatIteratorFindsLongPhrase(bool useBlueprint);
+ void requireThatStrictIteratorFindsNextMatch(bool useBlueprint);
+ void requireThatPhrasesAreUnpacked(bool useBlueprint);
+ void requireThatTermsCanBeEvaluatedInPriorityOrder();
+ void requireThatBlueprintExposesFieldWithEstimate();
+ void requireThatBlueprintForcesPositionDataOnChildren();
+ void requireThatIteratorHonorsFutureDoom();
+ void requireThatIteratorHonorsDoom();
+ void requireThatDoomIsPropagated();
+
+public:
+ int Main();
+};
+
+int
+Test::Main()
+{
+ TEST_INIT("phrasesearch_test");
+
+ TEST_DO(requireThatIteratorFindsSimplePhrase(false));
+ TEST_DO(requireThatIteratorFindsLongPhrase(false));
+ TEST_DO(requireThatStrictIteratorFindsNextMatch(false));
+ TEST_DO(requireThatPhrasesAreUnpacked(false));
+ TEST_DO(requireThatTermsCanBeEvaluatedInPriorityOrder());
+
+ TEST_DO(requireThatIteratorFindsSimplePhrase(true));
+ TEST_DO(requireThatIteratorFindsLongPhrase(true));
+ TEST_DO(requireThatStrictIteratorFindsNextMatch(true));
+ TEST_DO(requireThatPhrasesAreUnpacked(true));
+ TEST_DO(requireThatBlueprintExposesFieldWithEstimate());
+ TEST_DO(requireThatBlueprintForcesPositionDataOnChildren());
+ TEST_DO(requireThatIteratorHonorsFutureDoom());
+ TEST_DO(requireThatIteratorHonorsDoom());
+ TEST_DO(requireThatDoomIsPropagated());
+
+ TEST_DONE();
+}
+
+const string field = "field";
+const uint32_t fieldId = 1;
+const uint32_t doc_match = 42;
+const uint32_t doc_no_match = 43;
+const uint32_t phrase_handle = 1;
+
+class PhraseSearchTest
+{
+private:
+ FakeRequestContext _requestContext;
+ FakeSearchable _index;
+ FieldSpec _phrase_fs;
+ SimplePhraseBlueprint _phrase;
+ std::vector<Blueprint::UP> _children;
+ MatchData::UP _md;
+ vector<uint32_t> _order;
+ uint32_t _pos;
+ bool _strict;
+
+public:
+ PhraseSearchTest(bool expiredDoom=false) :
+ _requestContext(nullptr, expiredDoom ? 0 : std::numeric_limits<int64_t>::max()),
+ _index(),
+ _phrase_fs(field, fieldId, phrase_handle),
+ _phrase(_phrase_fs, _requestContext),
+ _children(),
+ _md(MatchData::makeTestInstance(0, 100, 10)),
+ _order(),
+ _pos(1),
+ _strict(false)
+ {}
+
+ TermFieldHandle childHandle(uint32_t idx) const { return (10 * idx + 11); }
+
+ void setStrict(bool strict) { _strict = strict; }
+ void setOrder(const vector<uint32_t> &order) { _order = order; }
+ const TermFieldMatchData &tmd() const { return *_md->resolveTermField(phrase_handle); }
+
+ PhraseSearchTest &addTerm(const string &term, bool last) {
+ return addTerm(term, FakeResult()
+ .doc(doc_match).pos(_pos)
+ .doc(doc_no_match).pos(_pos + last));
+ }
+
+ PhraseSearchTest &addTerm(const string &term, const FakeResult &r) {
+ _index.addResult(field, term, r);
+ ++_pos;
+ SimpleStringTerm term_node(term, field, 0, Weight(0));
+ {
+ // make one child blueprint for explicit use
+ FieldSpecList fields;
+ fields.add(FieldSpec(field, fieldId,
+ childHandle(_children.size())));
+ _children.push_back(_index.createBlueprint(_requestContext, fields, term_node));
+ }
+ {
+ // and one to be used by the phrase blueprint
+ FieldSpecList fields;
+ fields.add(_phrase.getNextChildField(_phrase_fs));
+ _phrase.addTerm(_index.createBlueprint(_requestContext, fields, term_node));
+ }
+ _order.push_back(_order.size());
+ return *this;
+ }
+
+ void
+ fetchPostings(bool useBlueprint)
+ {
+ if (useBlueprint) {
+ _phrase.fetchPostings(_strict);
+ return;
+ }
+ for (size_t i = 0; i < _children.size(); ++i) {
+ _children[i]->fetchPostings(_strict);
+ }
+ }
+
+ // NB: using blueprint will ignore eval order override
+ SearchIterator *createSearch(bool useBlueprint) {
+ SearchIterator::UP search;
+ if (useBlueprint) {
+ search = _phrase.createSearch(*_md, _strict);
+ } else {
+ search::fef::TermFieldMatchDataArray childMatch;
+ for (size_t i = 0; i < _children.size(); ++i) {
+ childMatch.add(_md->resolveTermField(childHandle(i)));
+ }
+ SimplePhraseSearch::Children children;
+ for (size_t i = 0; i < _children.size(); ++i) {
+ children.push_back(_children[i]->createSearch(*_md, _strict).release());
+ }
+ search.reset(new SimplePhraseSearch(children, MatchData::UP(),
+ childMatch, _order,
+ *_md->resolveTermField(phrase_handle),
+ _strict));
+ }
+ search->initFullRange();
+ return search.release();
+ }
+};
+
+void Test::requireThatIteratorFindsSimplePhrase(bool useBlueprint) {
+ PhraseSearchTest test;
+ test.addTerm("foo", 0).addTerm("bar", 1);
+
+ test.fetchPostings(useBlueprint);
+ unique_ptr<SearchIterator> search(test.createSearch(useBlueprint));
+ EXPECT_TRUE(!search->seek(1u));
+ EXPECT_TRUE(search->seek(doc_match));
+ EXPECT_TRUE(!search->seek(doc_no_match));
+}
+
+void Test::requireThatIteratorHonorsFutureDoom() {
+ PhraseSearchTest test;
+ test.addTerm("foo", 0).addTerm("bar", 1);
+
+ test.fetchPostings(false);
+ vespalib::Clock clock;
+ vespalib::Doom futureDoom(clock, std::numeric_limits<int64_t>::max());
+ unique_ptr<SearchIterator> search(test.createSearch(false));
+ static_cast<SimplePhraseSearch &>(*search).setDoom(&futureDoom);
+ EXPECT_TRUE(!search->seek(1u));
+ EXPECT_TRUE(search->seek(doc_match));
+ EXPECT_TRUE(!search->seek(doc_no_match));
+}
+
+void Test::requireThatIteratorHonorsDoom() {
+ PhraseSearchTest test;
+ test.addTerm("foo", 0).addTerm("bar", 1);
+
+ test.fetchPostings(false);
+ vespalib::Clock clock;
+ vespalib::Doom futureDoom(clock, 0);
+ unique_ptr<SearchIterator> search(test.createSearch(false));
+ static_cast<SimplePhraseSearch &>(*search).setDoom(&futureDoom);
+ EXPECT_TRUE(!search->seek(1u));
+ EXPECT_EQUAL(search->beginId(), search->getDocId());
+ EXPECT_TRUE(!search->seek(doc_match));
+ EXPECT_TRUE(search->isAtEnd());
+ EXPECT_TRUE(!search->seek(doc_no_match));
+ EXPECT_TRUE(search->isAtEnd());
+}
+
+void Test::requireThatDoomIsPropagated() {
+ PhraseSearchTest test(true);
+ test.addTerm("foo", 0).addTerm("bar", 1);
+
+ test.fetchPostings(true);
+ unique_ptr<SearchIterator> search(test.createSearch(true));
+ EXPECT_TRUE(!search->seek(1u));
+ EXPECT_EQUAL(search->beginId(), search->getDocId());
+ EXPECT_TRUE(!search->seek(doc_match));
+ EXPECT_TRUE(search->isAtEnd());
+ EXPECT_TRUE(!search->seek(doc_no_match));
+ EXPECT_TRUE(search->isAtEnd());
+}
+
+void Test::requireThatIteratorFindsLongPhrase(bool useBlueprint) {
+ PhraseSearchTest test;
+ test.addTerm("foo", 0).addTerm("bar", 0).addTerm("baz", 0)
+ .addTerm("qux", 1);
+
+ test.fetchPostings(useBlueprint);
+ unique_ptr<SearchIterator> search(test.createSearch(useBlueprint));
+ EXPECT_TRUE(!search->seek(1u));
+ EXPECT_TRUE(search->seek(doc_match));
+ EXPECT_TRUE(!search->seek(doc_no_match));
+}
+
+void Test::requireThatStrictIteratorFindsNextMatch(bool useBlueprint) {
+ PhraseSearchTest test;
+ test.setStrict(true);
+ test.addTerm("foo", 0).addTerm("bar", 1);
+
+ test.fetchPostings(useBlueprint);
+ unique_ptr<SearchIterator> search(test.createSearch(useBlueprint));
+ EXPECT_TRUE(!search->seek(1u));
+ EXPECT_EQUAL(doc_match, search->getDocId());
+ EXPECT_TRUE(!search->seek(doc_no_match));
+ EXPECT_TRUE(search->isAtEnd());
+}
+
+void Test::requireThatPhrasesAreUnpacked(bool useBlueprint) {
+ PhraseSearchTest test;
+ test.addTerm("foo", FakeResult()
+ .doc(doc_match).pos(1).pos(11).pos(21));
+ test.addTerm("bar", FakeResult()
+ .doc(doc_match).pos(2).pos(16).pos(22));
+ test.fetchPostings(useBlueprint);
+ unique_ptr<SearchIterator> search(test.createSearch(useBlueprint));
+ EXPECT_TRUE(search->seek(doc_match));
+ search->unpack(doc_match);
+
+ EXPECT_EQUAL(doc_match, test.tmd().getDocId());
+ EXPECT_EQUAL(2, std::distance(test.tmd().begin(), test.tmd().end()));
+ EXPECT_EQUAL(1u, test.tmd().begin()->getPosition());
+ EXPECT_EQUAL(21u, (test.tmd().begin() + 1)->getPosition());
+}
+
+void Test::requireThatTermsCanBeEvaluatedInPriorityOrder() {
+ vector<uint32_t> order;
+ order.push_back(2);
+ order.push_back(0);
+ order.push_back(1);
+ PhraseSearchTest test;
+ test.addTerm("foo", 0).addTerm("bar", 1).addTerm("baz", 1);
+ test.setOrder(order);
+
+ test.fetchPostings(false);
+ unique_ptr<SearchIterator> search(test.createSearch(false));
+ EXPECT_TRUE(!search->seek(1u));
+ EXPECT_TRUE(search->seek(doc_match));
+ EXPECT_TRUE(!search->seek(doc_no_match));
+}
+
+void
+Test::requireThatBlueprintExposesFieldWithEstimate()
+{
+ FakeRequestContext requestContext;
+ FieldSpec f("foo", 1, 1);
+ SimplePhraseBlueprint phrase(f, requestContext);
+ ASSERT_TRUE(phrase.getState().numFields() == 1);
+ EXPECT_EQUAL(f.getFieldId(), phrase.getState().field(0).getFieldId());
+ EXPECT_EQUAL(f.getHandle(), phrase.getState().field(0).getHandle());
+
+ EXPECT_EQUAL(true, phrase.getState().estimate().empty);
+ EXPECT_EQUAL(0u, phrase.getState().estimate().estHits);
+
+ phrase.addTerm(Blueprint::UP(new MyTerm(phrase.getNextChildField(f), 10)));
+ EXPECT_EQUAL(false, phrase.getState().estimate().empty);
+ EXPECT_EQUAL(10u, phrase.getState().estimate().estHits);
+
+ phrase.addTerm(Blueprint::UP(new MyTerm(phrase.getNextChildField(f), 5)));
+ EXPECT_EQUAL(false, phrase.getState().estimate().empty);
+ EXPECT_EQUAL(5u, phrase.getState().estimate().estHits);
+
+ phrase.addTerm(Blueprint::UP(new MyTerm(phrase.getNextChildField(f), 20)));
+ EXPECT_EQUAL(false, phrase.getState().estimate().empty);
+ EXPECT_EQUAL(5u, phrase.getState().estimate().estHits);
+}
+
+void
+Test::requireThatBlueprintForcesPositionDataOnChildren()
+{
+ FakeRequestContext requestContext;
+ FieldSpec f("foo", 1, 1, true);
+ SimplePhraseBlueprint phrase(f, requestContext);
+ EXPECT_TRUE(f.isFilter());
+ EXPECT_TRUE(!phrase.getNextChildField(f).isFilter());
+}
+
+} // namespace
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/queryeval/sourceblender/.gitignore b/searchlib/src/tests/queryeval/sourceblender/.gitignore
new file mode 100644
index 00000000000..e0d4b433d65
--- /dev/null
+++ b/searchlib/src/tests/queryeval/sourceblender/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+sourceblender_test
+searchlib_sourceblender_test_app
diff --git a/searchlib/src/tests/queryeval/sourceblender/CMakeLists.txt b/searchlib/src/tests/queryeval/sourceblender/CMakeLists.txt
new file mode 100644
index 00000000000..e566cb0fdf0
--- /dev/null
+++ b/searchlib/src/tests/queryeval/sourceblender/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_sourceblender_test_app
+ SOURCES
+ sourceblender.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_sourceblender_test_app COMMAND searchlib_sourceblender_test_app)
diff --git a/searchlib/src/tests/queryeval/sourceblender/DESC b/searchlib/src/tests/queryeval/sourceblender/DESC
new file mode 100644
index 00000000000..437dd818eb7
--- /dev/null
+++ b/searchlib/src/tests/queryeval/sourceblender/DESC
@@ -0,0 +1 @@
+sourceblender test. Take a look at sourceblender.cpp for details.
diff --git a/searchlib/src/tests/queryeval/sourceblender/FILES b/searchlib/src/tests/queryeval/sourceblender/FILES
new file mode 100644
index 00000000000..97be7bcaf53
--- /dev/null
+++ b/searchlib/src/tests/queryeval/sourceblender/FILES
@@ -0,0 +1 @@
+sourceblender.cpp
diff --git a/searchlib/src/tests/queryeval/sourceblender/sourceblender.cpp b/searchlib/src/tests/queryeval/sourceblender/sourceblender.cpp
new file mode 100644
index 00000000000..2cfcf4e3f1d
--- /dev/null
+++ b/searchlib/src/tests/queryeval/sourceblender/sourceblender.cpp
@@ -0,0 +1,169 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("sourceblender_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/queryeval/sourceblendersearch.h>
+#include <vespa/searchlib/queryeval/simplesearch.h>
+#include <vespa/searchlib/queryeval/simpleresult.h>
+#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
+#include <vespa/searchlib/queryeval/leaf_blueprints.h>
+#include <vespa/searchlib/test/initrange.h>
+#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/searchlib/attribute/fixedsourceselector.h>
+
+using namespace search::queryeval;
+using namespace search::fef;
+using namespace search;
+using std::make_unique;
+
+/**
+ * Proxy search used to verify unpack pattern
+ **/
+class UnpackChecker : public SearchIterator
+{
+private:
+ SearchIterator::UP _search;
+ SimpleResult _unpacked;
+
+protected:
+ virtual void doSeek(uint32_t docid) {
+ _search->seek(docid);
+ setDocId(_search->getDocId());
+ }
+ virtual void doUnpack(uint32_t docid) {
+ _unpacked.addHit(docid);
+ _search->unpack(docid);
+ }
+
+public:
+ UnpackChecker(SearchIterator *search) : _search(search), _unpacked() {}
+ const SimpleResult &getUnpacked() const { return _unpacked; }
+};
+
+class MySelector : public search::FixedSourceSelector
+{
+public:
+ MySelector(int defaultSource) : search::FixedSourceSelector(defaultSource, "fs") { }
+ MySelector & set(Source s, uint32_t docId) {
+ setSource(s, docId);
+ return *this;
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+TEST("test strictness") {
+ MatchData::UP md(MatchData::makeTestInstance(0, 100, 10));
+ for (uint32_t i = 0; i < 2; ++i) {
+ bool strict = (i == 0);
+
+ SimpleResult a;
+ SimpleResult b;
+
+ a.addHit(2).addHit(5).addHit(6).addHit(8);
+ b.addHit(3).addHit(5).addHit(6).addHit(7);
+
+ MySelector *sel = new MySelector(5);
+ sel->set(2, 1).set(3, 2).set(5, 2).set(7, 1);
+
+ SourceBlenderBlueprint *blend_b = new SourceBlenderBlueprint(*sel);
+ Blueprint::UP a_b(new SimpleBlueprint(a));
+ Blueprint::UP b_b(new SimpleBlueprint(b));
+ a_b->setSourceId(1);
+ b_b->setSourceId(2);
+ blend_b->addChild(std::move(a_b));
+ blend_b->addChild(std::move(b_b));
+ Blueprint::UP bp(blend_b);
+ bp->fetchPostings(strict);
+ SearchIterator::UP search = bp->createSearch(*md, strict);
+ search->initFullRange();
+ SearchIterator &blend = *search;
+
+ EXPECT_TRUE(!blend.seek(1u));
+ if (strict) {
+ EXPECT_EQUAL(2u, blend.getDocId());
+ } else {
+ EXPECT_EQUAL(blend.beginId(), blend.getDocId());
+ }
+ EXPECT_TRUE(blend.seek(5));
+ EXPECT_EQUAL(5u, blend.getDocId());
+ EXPECT_TRUE(!blend.seek(6));
+ if (strict) {
+ EXPECT_TRUE(blend.isAtEnd());
+ } else {
+ EXPECT_EQUAL(5u, blend.getDocId());
+ }
+ delete sel;
+ }
+}
+
+TEST("test full sourceblender search") {
+ SimpleResult a;
+ SimpleResult b;
+ SimpleResult c;
+
+ a.addHit(2).addHit(11).addHit(21).addHit(34);
+ b.addHit(3).addHit(11).addHit(21).addHit(33);
+ c.addHit(4).addHit(11).addHit(21).addHit(32);
+
+ // these are all handed over to the blender
+ UnpackChecker *ua = new UnpackChecker(new SimpleSearch(a));
+ UnpackChecker *ub = new UnpackChecker(new SimpleSearch(b));
+ UnpackChecker *uc = new UnpackChecker(new SimpleSearch(c));
+ auto sel = make_unique<MySelector>(5);
+
+ sel->set(2, 1).set(3, 2).set(11, 2).set(21, 3).set(34, 1);
+ SourceBlenderSearch::Children abc;
+ abc.push_back(SourceBlenderSearch::Child(ua, 1));
+ abc.push_back(SourceBlenderSearch::Child(ub, 2));
+ abc.push_back(SourceBlenderSearch::Child(uc, 3));
+
+ SearchIterator::UP blend(SourceBlenderSearch::create(sel->createIterator(), abc, true));
+ SimpleResult result;
+ result.search(*blend);
+
+ SimpleResult expect_result;
+ expect_result.addHit(2).addHit(3).addHit(11).addHit(21).addHit(34);
+
+ SimpleResult expect_unpacked_a;
+ expect_unpacked_a.addHit(2).addHit(34);
+
+ SimpleResult expect_unpacked_b;
+ expect_unpacked_b.addHit(3).addHit(11);
+
+ SimpleResult expect_unpacked_c;
+ expect_unpacked_c.addHit(21);
+
+ EXPECT_EQUAL(expect_result, result);
+ EXPECT_EQUAL(expect_unpacked_a, ua->getUnpacked());
+ EXPECT_EQUAL(expect_unpacked_b, ub->getUnpacked());
+ EXPECT_EQUAL(expect_unpacked_c, uc->getUnpacked());
+}
+
+using search::test::InitRangeVerifier;
+
+SourceBlenderSearch::Children
+createChildren(const std::vector<InitRangeVerifier::DocIds> & indexes, const InitRangeVerifier & ir, bool strict) {
+ SourceBlenderSearch::Children children;
+ for (size_t index(0); index < indexes.size(); index++) {
+ children.emplace_back(ir.createIterator(indexes[index], strict).release(), index);
+ }
+ return children;
+}
+
+TEST("test init range") {
+ InitRangeVerifier ir;
+ std::vector<InitRangeVerifier::DocIds> indexes(3);
+ auto sel = make_unique<MySelector>(ir.getDocIdLimit());
+ for (uint32_t docId : ir.getExpectedDocIds()) {
+ const size_t indexId = docId%indexes.size();
+ sel->set(docId, indexId);
+ indexes[indexId].push_back(docId);
+ }
+ TermFieldMatchData tfmd;
+ TEST_DO(ir.verify(SourceBlenderSearch::create(sel->createIterator(), createChildren(indexes, ir, false), false)));
+ TEST_DO(ir.verify(SourceBlenderSearch::create(sel->createIterator(), createChildren(indexes, ir, true), true)));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/.gitignore b/searchlib/src/tests/queryeval/sparse_vector_benchmark/.gitignore
new file mode 100644
index 00000000000..b0ce58fa658
--- /dev/null
+++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/.gitignore
@@ -0,0 +1,6 @@
+/log.txt
+/report.head
+/report.html
+/plot.*
+/*.png
+searchlib_sparse_vector_benchmark_test_app
diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/CMakeLists.txt b/searchlib/src/tests/queryeval/sparse_vector_benchmark/CMakeLists.txt
new file mode 100644
index 00000000000..8d4aa8c10be
--- /dev/null
+++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_sparse_vector_benchmark_test_app
+ SOURCES
+ sparse_vector_benchmark_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_sparse_vector_benchmark_test_app COMMAND searchlib_sparse_vector_benchmark_test_app BENCHMARK)
diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/FILES b/searchlib/src/tests/queryeval/sparse_vector_benchmark/FILES
new file mode 100644
index 00000000000..48eda2416c9
--- /dev/null
+++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/FILES
@@ -0,0 +1 @@
+sparse_vector_benchmark_test.cpp
diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp
new file mode 100644
index 00000000000..68d7dec0f87
--- /dev/null
+++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp
@@ -0,0 +1,429 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+
+#include "../weak_and/rise_wand.h"
+#include "../weak_and/rise_wand.hpp"
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/queryeval/andnotsearch.h>
+#include <vespa/searchlib/queryeval/andsearch.h>
+#include <vespa/searchlib/queryeval/dot_product_search.h>
+#include <vespa/searchlib/queryeval/fake_search.h>
+#include <vespa/searchlib/util/rand48.h>
+#include <vespa/searchlib/queryeval/orsearch.h>
+#include <vespa/searchlib/queryeval/simpleresult.h>
+#include <vespa/searchlib/queryeval/wand/weak_and_search.h>
+#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
+#include <vespa/vespalib/util/box.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+using namespace search::fef;
+using namespace search::queryeval;
+using namespace vespalib;
+
+namespace {
+
+//-----------------------------------------------------------------------------
+
+struct Writer {
+ FILE *file;
+ Writer(const std::string &file_name) {
+ file = fopen(file_name.c_str(), "w");
+ assert(file != 0);
+ }
+ void write(const char *data, size_t size) const {
+ fwrite(data, 1, size, file);
+ }
+ void fmt(const char *format, ...) const
+#ifdef __GNUC__
+ __attribute__ ((format (printf,2,3)))
+#endif
+ {
+ va_list ap;
+ va_start(ap, format);
+ vfprintf(file, format, ap);
+ va_end(ap);
+ }
+ ~Writer() { fclose(file); }
+};
+
+//-----------------------------------------------------------------------------
+
+// top-level html report (global, used by plots and graphs directly)
+class Report
+{
+private:
+ Writer _html;
+
+public:
+ Report(const std::string &file) : _html(file) {
+ _html.fmt("<html>\n");
+ _html.fmt("<head><title>Sparse Vector Search Benchmark Report</title></head>\n");
+ _html.fmt("<body>\n");
+ _html.fmt("<h1>Sparse Vector Search Benchmark Report</h1>\n");
+ }
+ void addPlot(const std::string &title, const std::string &png_file) {
+ _html.fmt("<h3>%s</h3>\n", title.c_str());
+ _html.fmt("<img src=\"%s\">\n", png_file.c_str());
+ }
+ ~Report() {
+ _html.fmt("<h2>Test Log with Numbers</h2>\n");
+ _html.fmt("<pre>\n");
+ // html file needs external termination
+ }
+};
+
+Report report("report.head");
+
+//-----------------------------------------------------------------------------
+
+// a single graph within a plot
+class Graph
+{
+private:
+ Writer _writer;
+
+public:
+ typedef std::unique_ptr<Graph> UP;
+ Graph(const std::string &file) : _writer(file) {}
+ void addValue(double x, double y) { _writer.fmt("%g %g\n", x, y); }
+};
+
+// a plot possibly containing multiple graphs
+class Plot
+{
+private:
+ std::string _name;
+ int _graphs;
+ Writer _writer;
+ static int _plots;
+
+public:
+ typedef std::unique_ptr<Plot> UP;
+
+ Plot(const std::string &title) : _name(vespalib::make_string("plot.%d", _plots++)), _graphs(0),
+ _writer(vespalib::make_string("%s.gnuplot", _name.c_str())) {
+ std::string png_file = vespalib::make_string("%s.png", _name.c_str());
+ _writer.fmt("set term png size 1200,800\n");
+ _writer.fmt("set output '%s'\n", png_file.c_str());
+ _writer.fmt("set title '%s'\n", title.c_str());
+ _writer.fmt("set xlabel 'term count'\n");
+ _writer.fmt("set ylabel 'time (ms)'\n");
+ report.addPlot(title, png_file);
+ }
+
+ ~Plot() {
+ _writer.fmt("\n");
+ }
+
+ Graph::UP createGraph(const std::string &legend) {
+ std::string file = vespalib::make_string("%s.graph.%d", _name.c_str(), _graphs);
+ _writer.fmt("%s '%s' using 1:2 title '%s' w lines",
+ (_graphs == 0) ? "plot " : ",", file.c_str(), legend.c_str());
+ ++_graphs;
+ return Graph::UP(new Graph(file));
+ }
+
+ static UP createPlot(const std::string &title) { return UP(new Plot(title)); }
+};
+
+int Plot::_plots = 0;
+
+//-----------------------------------------------------------------------------
+
+uint32_t default_weight = 100;
+double max_time = 1000000.0;
+
+//-----------------------------------------------------------------------------
+
+struct ChildFactory {
+ ChildFactory() {}
+ virtual std::string name() const = 0;
+ virtual SearchIterator *createChild(uint32_t idx, uint32_t limit) const = 0;
+ virtual ~ChildFactory() {}
+};
+
+struct SparseVectorFactory {
+ virtual std::string name() const = 0;
+ virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const = 0;
+ virtual ~SparseVectorFactory() {}
+};
+
+struct FilterStrategy {
+ virtual std::string name() const = 0;
+ virtual SearchIterator *createRoot(SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const = 0;
+ virtual ~FilterStrategy() {}
+};
+
+//-----------------------------------------------------------------------------
+
+struct ModSearch : SearchIterator {
+ uint32_t step;
+ uint32_t limit;
+ ModSearch(uint32_t step_in, uint32_t limit_in) : step(step_in), limit(limit_in) { setDocId(step); }
+ virtual void doSeek(uint32_t docid) {
+ assert(docid > getDocId());
+ uint32_t hit = (docid / step) * step;
+ if (hit < docid) {
+ hit += step;
+ }
+ if (hit < limit) {
+ assert(hit >= docid);
+ setDocId(hit);
+ } else {
+ setAtEnd();
+ }
+ }
+ virtual void doUnpack(uint32_t) {}
+};
+
+struct ModSearchFactory : ChildFactory {
+ uint32_t bias;
+ ModSearchFactory() : bias(1) {}
+ explicit ModSearchFactory(int b) : bias(b) {}
+ virtual std::string name() const {
+ return vespalib::make_string("ModSearch(%u)", bias);
+ }
+ virtual SearchIterator *createChild(uint32_t idx, uint32_t limit) const {
+ return new ModSearch(bias + idx, limit);
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+struct VespaWandFactory : SparseVectorFactory {
+ uint32_t n;
+ VespaWandFactory(uint32_t n_in) : n(n_in) {}
+ virtual std::string name() const {
+ return vespalib::make_string("VespaWand(%u)", n);
+ }
+ virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+ wand::Terms terms;
+ for (size_t i = 0; i < childCnt; ++i) {
+ terms.push_back(wand::Term(childFactory.createChild(i, limit), default_weight, limit / (i + 1)));
+ }
+ return WeakAndSearch::create(terms, n, true);
+ }
+};
+
+struct RiseWandFactory : SparseVectorFactory {
+ uint32_t n;
+ RiseWandFactory(uint32_t n_in) : n(n_in) {}
+ virtual std::string name() const {
+ return vespalib::make_string("RiseWand(%u)", n);
+ }
+ virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+ wand::Terms terms;
+ for (size_t i = 0; i < childCnt; ++i) {
+ terms.push_back(wand::Term(childFactory.createChild(i, limit), default_weight, limit / (i + 1)));
+ }
+ return new rise::TermFrequencyRiseWand(terms, n);
+ }
+};
+
+struct WeightedSetFactory : SparseVectorFactory {
+ mutable TermFieldMatchData tfmd;
+ virtual std::string name() const {
+ return vespalib::make_string("WeightedSet");
+ }
+ virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+ std::vector<SearchIterator*> terms;
+ std::vector<int32_t> weights;
+ for (size_t i = 0; i < childCnt; ++i) {
+ terms.push_back(childFactory.createChild(i, limit));
+ weights.push_back(default_weight);
+ }
+ return WeightedSetTermSearch::create(terms, tfmd, weights);
+ }
+};
+
+struct DotProductFactory : SparseVectorFactory {
+ mutable TermFieldMatchData tfmd;
+ virtual std::string name() const {
+ return vespalib::make_string("DotProduct");
+ }
+ virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+ MatchDataLayout layout;
+ std::vector<TermFieldHandle> handles;
+ for (size_t i = 0; i < childCnt; ++i) {
+ handles.push_back(layout.allocTermField(0));
+ }
+ std::vector<SearchIterator*> terms;
+ std::vector<TermFieldMatchData*> childMatch;
+ std::vector<int32_t> weights;
+ MatchData::UP md = layout.createMatchData();
+ for (size_t i = 0; i < childCnt; ++i) {
+ terms.push_back(childFactory.createChild(i, limit));
+ childMatch.push_back(md->resolveTermField(handles[i]));
+ weights.push_back(default_weight);
+ }
+ return DotProductSearch::create(terms, tfmd, childMatch, weights, std::move(md)).release();
+ }
+};
+
+struct OrFactory : SparseVectorFactory {
+ virtual std::string name() const {
+ return vespalib::make_string("Or");
+ }
+ virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+ OrSearch::Children children;
+ for (size_t i = 0; i < childCnt; ++i) {
+ children.push_back(childFactory.createChild(i, limit));
+ }
+ return OrSearch::create(children, true);
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+struct NoFilterStrategy : FilterStrategy {
+ virtual std::string name() const {
+ return vespalib::make_string("NoFilter");
+ }
+ virtual SearchIterator *createRoot(SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+ return vectorFactory.createSparseVector(childFactory, childCnt, limit);
+ }
+};
+
+struct PositiveFilterBeforeStrategy : FilterStrategy {
+ virtual std::string name() const {
+ return vespalib::make_string("PositiveBefore");
+ }
+ virtual SearchIterator *createRoot(SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+ AndSearch::Children children;
+ children.push_back(new ModSearch(2, limit)); // <- 50% hits (hardcoded)
+ children.push_back(vectorFactory.createSparseVector(childFactory, childCnt, limit));
+ return AndSearch::create(children, true);
+ }
+};
+
+struct NegativeFilterAfterStrategy : FilterStrategy {
+ virtual std::string name() const {
+ return vespalib::make_string("NegativeAfter");
+ }
+ virtual SearchIterator *createRoot(SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+ AndNotSearch::Children children;
+ children.push_back(vectorFactory.createSparseVector(childFactory, childCnt, limit));
+ children.push_back(new ModSearch(2, limit)); // <- 50% hits (hardcoded)
+ return AndNotSearch::create(children, true);
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+struct Result {
+ double time_ms;
+ uint32_t num_hits;
+ Result() : time_ms(max_time), num_hits(0) {}
+ Result(double t, uint32_t n) : time_ms(t), num_hits(n) {}
+ void combine(const Result &r) {
+ if (time_ms == max_time) {
+ *this = r;
+ } else {
+ assert(num_hits == r.num_hits);
+ time_ms = std::min(time_ms, r.time_ms);
+ }
+ }
+ std::string toString() const {
+ return vespalib::make_string("%u hits, %g ms", num_hits, time_ms);
+ }
+};
+
+Result run_single_benchmark(FilterStrategy &filterStrategy, SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) {
+ SearchIterator::UP search(filterStrategy.createRoot(vectorFactory, childFactory, childCnt, limit));
+ SearchIterator &sb = *search;
+ uint32_t num_hits = 0;
+ FastOS_Time timer;
+ timer.SetNow();
+ for (sb.seek(1); !sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) {
+ ++num_hits;
+ sb.unpack(sb.getDocId());
+ }
+ return Result(timer.MilliSecsToNow(), num_hits);
+}
+
+//-----------------------------------------------------------------------------
+
+// one setup is used to produce all graphs in a single plot
+class Setup
+{
+private:
+ FilterStrategy &_filterStrategy;
+ ChildFactory &_childFactory;
+ uint32_t _limit;
+ Plot::UP _plot;
+
+ std::string make_title() const {
+ return vespalib::make_string("%u docs, filter:%s, terms:%s", _limit, _filterStrategy.name().c_str(), _childFactory.name().c_str());
+ }
+
+public:
+ Setup(FilterStrategy &fs, ChildFactory &cf, uint32_t lim) : _filterStrategy(fs), _childFactory(cf), _limit(lim) {
+ _plot = Plot::createPlot(make_title());
+ fprintf(stderr, "benchmark setup: %s\n", make_title().c_str());
+ }
+
+ void benchmark(SparseVectorFactory &svf, const std::vector<uint32_t> &child_counts) {
+ Graph::UP graph = _plot->createGraph(svf.name());
+ fprintf(stderr, " search operator: %s\n", svf.name().c_str());
+ for (size_t i = 0; i < child_counts.size(); ++i) {
+ uint32_t childCnt = child_counts[i];
+ Result result;
+ for (int j = 0; j < 5; ++j) {
+ result.combine(run_single_benchmark(_filterStrategy, svf, _childFactory, childCnt, _limit));
+ }
+ graph->addValue(childCnt, result.time_ms);
+ fprintf(stderr, " %u children => %s\n", childCnt, result.toString().c_str());
+ }
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+void benchmark_all_operators(Setup &setup, const std::vector<uint32_t> &child_counts) {
+ VespaWandFactory vespaWand256(256);
+ RiseWandFactory riseWand256(256);
+ WeightedSetFactory weightedSet;
+ DotProductFactory dotProduct;
+ OrFactory plain_or;
+ setup.benchmark(vespaWand256, child_counts);
+ setup.benchmark(riseWand256, child_counts);
+ setup.benchmark(weightedSet, child_counts);
+ setup.benchmark(dotProduct, child_counts);
+ setup.benchmark(plain_or, child_counts);
+}
+
+//-----------------------------------------------------------------------------
+
+Box<uint32_t> make_full_child_counts() {
+ return Box<uint32_t>()
+ .add(10).add(20).add(30).add(40).add(50).add(60).add(70).add(80).add(90)
+ .add(100).add(125).add(150).add(175)
+ .add(200).add(250).add(300).add(350).add(400).add(450)
+ .add(500).add(600).add(700).add(800).add(900)
+ .add(1000).add(1200).add(1400).add(1600).add(1800)
+ .add(2000);
+}
+
+//-----------------------------------------------------------------------------
+
+} // namespace <unnamed>
+
+TEST_FFF("benchmark", NoFilterStrategy(), ModSearchFactory(), Setup(f1, f2, 5000000)) {
+ benchmark_all_operators(f3, make_full_child_counts());
+}
+
+TEST_FFF("benchmark", NoFilterStrategy(), ModSearchFactory(8), Setup(f1, f2, 5000000)) {
+ benchmark_all_operators(f3, make_full_child_counts());
+}
+
+TEST_FFF("benchmark", PositiveFilterBeforeStrategy(), ModSearchFactory(), Setup(f1, f2, 5000000)) {
+ benchmark_all_operators(f3, make_full_child_counts());
+}
+
+TEST_FFF("benchmark", NegativeFilterAfterStrategy(), ModSearchFactory(), Setup(f1, f2, 5000000)) {
+ benchmark_all_operators(f3, make_full_child_counts());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/termwise_eval/.gitignore b/searchlib/src/tests/queryeval/termwise_eval/.gitignore
new file mode 100644
index 00000000000..b6b345775f6
--- /dev/null
+++ b/searchlib/src/tests/queryeval/termwise_eval/.gitignore
@@ -0,0 +1 @@
+searchlib_termwise_eval_test_app
diff --git a/searchlib/src/tests/queryeval/termwise_eval/CMakeLists.txt b/searchlib/src/tests/queryeval/termwise_eval/CMakeLists.txt
new file mode 100644
index 00000000000..ab9362f6e99
--- /dev/null
+++ b/searchlib/src/tests/queryeval/termwise_eval/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_termwise_eval_test_app
+ SOURCES
+ termwise_eval_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_termwise_eval_test_app COMMAND searchlib_termwise_eval_test_app)
diff --git a/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp b/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp
new file mode 100644
index 00000000000..625d9928048
--- /dev/null
+++ b/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp
@@ -0,0 +1,641 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/andnotsearch.h>
+#include <vespa/searchlib/queryeval/andsearch.h>
+#include <vespa/searchlib/queryeval/orsearch.h>
+#include <vespa/searchlib/queryeval/termwise_search.h>
+#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
+#include <vespa/searchlib/queryeval/termwise_blueprint_helper.h>
+#include <vespa/vespalib/test/insertion_operators.h>
+#include <vespa/searchlib/test/initrange.h>
+#include <vespa/searchlib/common/bitvectoriterator.h>
+
+using namespace vespalib;
+using namespace search;
+using namespace search::fef;
+using namespace search::queryeval;
+
+//-----------------------------------------------------------------------------
+
+const uint32_t my_field = 0;
+
+//-----------------------------------------------------------------------------
+
+struct MyTerm : public SearchIterator {
+ size_t pos;
+ bool is_strict;
+ std::vector<uint32_t> hits;
+ MyTerm(const std::vector<uint32_t> &hits_in, bool is_strict_in)
+ : pos(0), is_strict(is_strict_in), hits(hits_in) {}
+ void initRange(uint32_t beginid, uint32_t endid) override {
+ SearchIterator::initRange(beginid, endid);
+ if (is_strict) {
+ doSeek(beginid);
+ }
+ }
+ void resetRange() override {
+ SearchIterator::resetRange();
+ pos = 0;
+ }
+ void doSeek(uint32_t docid) override {
+ while ((pos < hits.size()) && (hits[pos] < docid)) {
+ ++pos;
+ }
+ if (is_strict) {
+ if ((pos == hits.size()) || isAtEnd(hits[pos])) {
+ setAtEnd();
+ } else {
+ setDocId(hits[pos]);
+ }
+ } else {
+ if (isAtEnd(docid)) {
+ setAtEnd();
+ } else if ((pos < hits.size()) && (hits[pos] == docid)) {
+ setDocId(docid);
+ }
+ }
+ }
+ void doUnpack(uint32_t) override {}
+ void visitMembers(vespalib::ObjectVisitor &visitor) const {
+ visit(visitor, "hits", hits);
+ visit(visitor, "strict", is_strict);
+ }
+};
+
+struct MyBlueprint : SimpleLeafBlueprint {
+ std::vector<uint32_t> hits;
+ MyBlueprint(const std::vector<uint32_t> &hits_in)
+ : SimpleLeafBlueprint(FieldSpecBaseList()), hits(hits_in)
+ {
+ setEstimate(HitEstimate(hits.size(), hits.empty()));
+ }
+ MyBlueprint(const std::vector<uint32_t> &hits_in, bool allow_termwise_eval)
+ : SimpleLeafBlueprint(FieldSpecBaseList()), hits(hits_in)
+ {
+ setEstimate(HitEstimate(hits.size(), hits.empty()));
+ set_allow_termwise_eval(allow_termwise_eval);
+ }
+ MyBlueprint(const std::vector<uint32_t> &hits_in, bool allow_termwise_eval, TermFieldHandle handle)
+ : SimpleLeafBlueprint(FieldSpecBase(my_field, handle)), hits(hits_in)
+ {
+ setEstimate(HitEstimate(hits.size(), hits.empty()));
+ set_allow_termwise_eval(allow_termwise_eval);
+ }
+ SearchIterator::UP createLeafSearch(const fef::TermFieldMatchDataArray &,
+ bool strict) const override
+ {
+ return SearchIterator::UP(new MyTerm(hits, strict));
+ }
+};
+
+struct MyOr : OrBlueprint {
+ bool use_my_value;
+ bool my_value;
+ MyOr(bool use_my_value_in, bool my_value_in = true)
+ : use_my_value(use_my_value_in), my_value(my_value_in) {}
+ bool supports_termwise_children() const override {
+ if (use_my_value) {
+ return my_value;
+ }
+ // the default value for intermediate blueprints
+ return IntermediateBlueprint::supports_termwise_children();
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+UnpackInfo no_unpack() { return UnpackInfo(); }
+
+UnpackInfo selective_unpack() {
+ UnpackInfo unpack;
+ unpack.add(0); // 'only unpack first child' => trigger selective unpack
+ return unpack;
+}
+
+SearchIterator *TERM(std::initializer_list<uint32_t> hits, bool strict) {
+ return new MyTerm(hits, strict);
+}
+
+SearchIterator *ANDNOT(std::initializer_list<SearchIterator *> children, bool strict) {
+ return AndNotSearch::create(children, strict);
+}
+
+SearchIterator *AND(std::initializer_list<SearchIterator *> children, bool strict) {
+ return AndSearch::create(children, strict);
+}
+
+SearchIterator *ANDz(std::initializer_list<SearchIterator *> children, bool strict) {
+ return AndSearch::create(children, strict, no_unpack());
+}
+
+SearchIterator *ANDs(std::initializer_list<SearchIterator *> children, bool strict) {
+ return AndSearch::create(children, strict, selective_unpack());
+}
+
+SearchIterator *OR(std::initializer_list<SearchIterator *> children, bool strict) {
+ return OrSearch::create(children, strict);
+}
+
+SearchIterator *ORz(std::initializer_list<SearchIterator *> children, bool strict) {
+ return OrSearch::create(children, strict, no_unpack());
+}
+
+SearchIterator *ORs(std::initializer_list<SearchIterator *> children, bool strict) {
+ return OrSearch::create(children, strict, selective_unpack());
+}
+
+//-----------------------------------------------------------------------------
+
+template <typename T>
+std::unique_ptr<T> UP(T *t) { return std::unique_ptr<T>(t); }
+
+//-----------------------------------------------------------------------------
+
+SearchIterator::UP make_search(bool strict) {
+ return UP(AND({OR({TERM({2,7}, true),
+ TERM({4,8}, true),
+ TERM({5,6,9}, true)}, true),
+ OR({TERM({1,4,7}, false),
+ TERM({2,5,8}, true),
+ TERM({3,6}, false)}, false),
+ OR({TERM({1,2,3}, false),
+ TERM({4,6}, false),
+ TERM({8,9}, false)}, false)}, strict));
+}
+
+SearchIterator::UP make_filter_search(bool strict) {
+ return UP(ANDNOT({TERM({1,2,3,4,5,6,7,8,9}, true),
+ TERM({1,9}, false),
+ TERM({3,7}, true),
+ TERM({5}, false)}, strict));
+}
+
+void add_if_inside(uint32_t docid, uint32_t begin, uint32_t end, std::vector<uint32_t> &expect) {
+ if (docid >= begin && docid < end) {
+ expect.push_back(docid);
+ }
+}
+
+std::vector<uint32_t> make_expect(uint32_t begin, uint32_t end) {
+ std::vector<uint32_t> expect;
+ add_if_inside(2, begin, end, expect);
+ add_if_inside(4, begin, end, expect);
+ add_if_inside(6, begin, end, expect);
+ add_if_inside(8, begin, end, expect);
+ return expect;
+}
+
+void verify(const std::vector<uint32_t> &expect, SearchIterator &search, uint32_t begin, uint32_t end) {
+ std::vector<uint32_t> actual;
+ search.initRange(begin, end);
+ for (uint32_t docid = begin; docid < end; ++docid) {
+ if (search.seek(docid)) {
+ actual.push_back(docid);
+ }
+ }
+ EXPECT_EQUAL(expect, actual);
+}
+
+//-----------------------------------------------------------------------------
+
+MatchData::UP make_match_data() {
+ uint32_t num_features = 0;
+ uint32_t num_handles = 100;
+ uint32_t num_fields = 1;
+ return MatchData::makeTestInstance(num_features, num_handles, num_fields);
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that pseudo term produces correct results") {
+ TEST_DO(verify({1,2,3,4,5}, *UP(TERM({1,2,3,4,5}, true)), 1, 6));
+ TEST_DO(verify({1,2,3,4,5}, *UP(TERM({1,2,3,4,5}, false)), 1, 6));
+ TEST_DO(verify({3,4,5}, *UP(TERM({1,2,3,4,5}, true)), 3, 6));
+ TEST_DO(verify({3,4,5}, *UP(TERM({1,2,3,4,5}, false)), 3, 6));
+ TEST_DO(verify({1,2,3}, *UP(TERM({1,2,3,4,5}, true)), 1, 4));
+ TEST_DO(verify({1,2,3}, *UP(TERM({1,2,3,4,5}, false)), 1, 4));
+}
+
+TEST("require that normal search gives expected results") {
+ auto search = make_search(true);
+ TEST_DO(verify(make_expect(1, 10), *search, 1, 10));
+}
+
+TEST("require that filter search gives expected results") {
+ auto search = make_filter_search(true);
+ TEST_DO(verify(make_expect(1, 10), *search, 1, 10));
+}
+
+TEST("require that termwise AND/OR search produces appropriate results") {
+ for (uint32_t begin: {1, 2, 5}) {
+ for (uint32_t end: {6, 7, 10}) {
+ for (bool strict_search: {true, false}) {
+ for (bool strict_wrapper: {true, false}) {
+ TEST_STATE(make_string("begin: %u, end: %u, strict_search: %s, strict_wrapper: %s",
+ begin, end, strict_search ? "true" : "false",
+ strict_wrapper ? "true" : "false").c_str());
+ auto search = make_termwise(make_search(strict_search), strict_wrapper);
+ TEST_DO(verify(make_expect(begin, end), *search, begin, end));
+ }
+ }
+ }
+ }
+}
+
+TEST("require that termwise filter search produces appropriate results") {
+ for (uint32_t begin: {1, 2, 5}) {
+ for (uint32_t end: {6, 7, 10}) {
+ for (bool strict_search: {true, false}) {
+ for (bool strict_wrapper: {true, false}) {
+ TEST_STATE(make_string("begin: %u, end: %u, strict_search: %s, strict_wrapper: %s",
+ begin, end, strict_search ? "true" : "false",
+ strict_wrapper ? "true" : "false").c_str());
+ auto search = make_termwise(make_filter_search(strict_search), strict_wrapper);
+ TEST_DO(verify(make_expect(begin, end), *search, begin, end));
+ }
+ }
+ }
+ }
+}
+
+TEST("require that termwise ANDNOT with single term works") {
+ TEST_DO(verify({2,3,4}, *make_termwise(UP(ANDNOT({TERM({1,2,3,4,5}, true)}, true)), true), 2, 5));
+}
+
+TEST("require that pseudo term is rewindable") {
+ auto search = UP(TERM({1,2,3,4,5}, true));
+ TEST_DO(verify({3,4,5}, *search, 3, 6));
+ search->resetRange();
+ TEST_DO(verify({1,2,3,4}, *search, 1, 5));
+}
+
+TEST("require that termwise wrapper is rewindable") {
+ auto search = make_termwise(make_search(true), true);
+ TEST_DO(verify(make_expect(3, 7), *search, 3, 7));
+ search->resetRange();
+ TEST_DO(verify(make_expect(1, 5), *search, 1, 5));
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that leaf blueprints allow termwise evaluation by default") {
+ MyBlueprint bp({});
+ EXPECT_TRUE(bp.getState().allow_termwise_eval());
+}
+
+TEST("require that leaf blueprints can enable/disable termwise evaluation") {
+ MyBlueprint enable({}, true);
+ MyBlueprint disable({}, false);
+ EXPECT_TRUE(enable.getState().allow_termwise_eval());
+ EXPECT_FALSE(disable.getState().allow_termwise_eval());
+}
+
+TEST("require that intermediate blueprints disallow termwise evaluation by default") {
+ MyOr bp(false);
+ bp.addChild(UP(new MyBlueprint({}, true)));
+ bp.addChild(UP(new MyBlueprint({}, true)));
+ EXPECT_FALSE(bp.getState().allow_termwise_eval());
+}
+
+TEST("require that intermediate blueprints can enable/disable termwise evaluation") {
+ MyOr enable(true, true);
+ enable.addChild(UP(new MyBlueprint({}, true)));
+ enable.addChild(UP(new MyBlueprint({}, true)));
+ EXPECT_TRUE(enable.getState().allow_termwise_eval());
+ MyOr disable(true, false);
+ disable.addChild(UP(new MyBlueprint({}, true)));
+ disable.addChild(UP(new MyBlueprint({}, true)));
+ EXPECT_FALSE(disable.getState().allow_termwise_eval());
+}
+
+TEST("require that intermediate blueprints cannot be termwise unless all its children are termwise") {
+ MyOr bp(true, true);
+ bp.addChild(UP(new MyBlueprint({}, true)));
+ bp.addChild(UP(new MyBlueprint({}, false)));
+ EXPECT_FALSE(bp.getState().allow_termwise_eval());
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that leafs have tree size 1") {
+ MyBlueprint bp({});
+ EXPECT_EQUAL(1u, bp.getState().tree_size());
+}
+
+TEST("require that tree size is accumulated correctly by intermediate nodes") {
+ MyOr bp(false);
+ EXPECT_EQUAL(1u, bp.getState().tree_size());
+ bp.addChild(UP(new MyBlueprint({})));
+ bp.addChild(UP(new MyBlueprint({})));
+ EXPECT_EQUAL(3u, bp.getState().tree_size());
+ auto child = UP(new MyOr(false));
+ child->addChild(UP(new MyBlueprint({})));
+ child->addChild(UP(new MyBlueprint({})));
+ bp.addChild(std::move(child));
+ EXPECT_EQUAL(6u, bp.getState().tree_size());
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that any blueprint node can obtain the root") {
+ MyOr bp(false);
+ bp.addChild(UP(new MyBlueprint({1,2,3})));
+ bp.addChild(UP(new MyBlueprint({1,2,3,4,5,6})));
+ EXPECT_TRUE(&bp != &bp.getChild(0));
+ EXPECT_TRUE(&bp != &bp.getChild(1));
+ EXPECT_TRUE(&bp == &bp.getChild(0).root());
+ EXPECT_TRUE(&bp == &bp.getChild(1).root());
+ EXPECT_TRUE(&bp == &bp.root());
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that match data keeps track of the termwise limit") {
+ auto md = make_match_data();
+ EXPECT_EQUAL(1.0, md->get_termwise_limit());
+ md->set_termwise_limit(0.03);
+ EXPECT_EQUAL(0.03, md->get_termwise_limit());
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that terwise test search string dump is detailed enough") {
+ EXPECT_EQUAL(make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString(),
+ make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString());
+
+ EXPECT_NOT_EQUAL(make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString(),
+ make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, false), TERM({3}, true)}, true)), true)->asString());
+
+ EXPECT_NOT_EQUAL(make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString(),
+ make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, false)), true)->asString());
+
+ EXPECT_NOT_EQUAL(make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString(),
+ make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), false)->asString());
+
+ EXPECT_NOT_EQUAL(make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString(),
+ make_termwise(UP(OR({TERM({1,2,3}, true), TERM({3}, true), TERM({2,3}, true)}, true)), true)->asString());
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that basic termwise evaluation works") {
+ auto md = make_match_data();
+ md->set_termwise_limit(0.0);
+ md->resolveTermField(1)->tagAsNotNeeded();
+ md->resolveTermField(2)->tagAsNotNeeded();
+ OrBlueprint my_or;
+ my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
+ my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
+ for (bool strict: {true, false}) {
+ EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
+ make_termwise(UP(OR({TERM({1}, strict), TERM({2}, strict)}, strict)), strict)->asString());
+ }
+}
+
+TEST("require that the hit rate must be high enough for termwise evaluation to be activated") {
+ auto md = make_match_data();
+ md->set_termwise_limit(1.0); // <-
+ md->resolveTermField(1)->tagAsNotNeeded();
+ md->resolveTermField(2)->tagAsNotNeeded();
+ OrBlueprint my_or;
+ my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
+ my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
+ for (bool strict: {true, false}) {
+ EXPECT_TRUE(my_or.createSearch(*md, strict)->asString().find("TermwiseSearch") == vespalib::string::npos);
+ }
+}
+
+TEST("require that enough unranked termwise terms are present for termwise evaluation to be activated") {
+ auto md = make_match_data();
+ md->set_termwise_limit(0.0);
+ md->resolveTermField(1)->tagAsNotNeeded();
+ md->resolveTermField(2)->tagAsNotNeeded();
+ OrBlueprint my_or;
+ my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
+ my_or.addChild(UP(new MyBlueprint({2}, false, 2))); // <- not termwise
+ my_or.addChild(UP(new MyBlueprint({3}, true, 3))); // <- ranked
+ for (bool strict: {true, false}) {
+ EXPECT_TRUE(my_or.createSearch(*md, strict)->asString().find("TermwiseSearch") == vespalib::string::npos);
+ }
+}
+
+TEST("require that termwise evaluation can be multi-level, but not duplicated") {
+ auto md = make_match_data();
+ md->set_termwise_limit(0.0);
+ md->resolveTermField(1)->tagAsNotNeeded();
+ md->resolveTermField(2)->tagAsNotNeeded();
+ md->resolveTermField(3)->tagAsNotNeeded();
+ OrBlueprint my_or;
+ my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
+ auto child = UP(new OrBlueprint());
+ child->addChild(UP(new MyBlueprint({2}, true, 2)));
+ child->addChild(UP(new MyBlueprint({3}, true, 3)));
+ my_or.addChild(std::move(child));
+ for (bool strict: {true, false}) {
+ EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
+ make_termwise(UP(OR({TERM({1}, strict), ORz({TERM({2}, strict), TERM({3}, strict)}, strict)}, strict)), strict)->asString());
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that OR can be completely termwise") {
+ auto md = make_match_data();
+ md->set_termwise_limit(0.0);
+ md->resolveTermField(1)->tagAsNotNeeded();
+ md->resolveTermField(2)->tagAsNotNeeded();
+ OrBlueprint my_or;
+ my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
+ my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
+ for (bool strict: {true, false}) {
+ EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
+ make_termwise(UP(OR({TERM({1}, strict), TERM({2}, strict)}, strict)), strict)->asString());
+ }
+}
+
+TEST("require that OR can be partially termwise") {
+ auto md = make_match_data();
+ md->set_termwise_limit(0.0);
+ md->resolveTermField(1)->tagAsNotNeeded();
+ md->resolveTermField(3)->tagAsNotNeeded();
+ OrBlueprint my_or;
+ my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
+ my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
+ my_or.addChild(UP(new MyBlueprint({3}, true, 3)));
+ for (bool strict: {true, false}) {
+ EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
+ UP(ORs({make_termwise(UP(OR({TERM({1}, strict), TERM({3}, strict)}, strict)), strict).release(), TERM({2}, strict)}, strict))->asString());
+ }
+}
+
+TEST("require that OR puts termwise subquery at the right place") {
+ auto md = make_match_data();
+ md->set_termwise_limit(0.0);
+ md->resolveTermField(2)->tagAsNotNeeded();
+ md->resolveTermField(3)->tagAsNotNeeded();
+ OrBlueprint my_or;
+ my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
+ my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
+ my_or.addChild(UP(new MyBlueprint({3}, true, 3)));
+ for (bool strict: {true, false}) {
+ EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
+ UP(ORs({TERM({1}, strict), make_termwise(UP(OR({TERM({2}, strict), TERM({3}, strict)}, strict)), strict).release()}, strict))->asString());
+ }
+}
+
+TEST("require that OR can use termwise eval also when having non-termwise children") {
+ auto md = make_match_data();
+ md->set_termwise_limit(0.0);
+ md->resolveTermField(1)->tagAsNotNeeded();
+ md->resolveTermField(2)->tagAsNotNeeded();
+ md->resolveTermField(3)->tagAsNotNeeded();
+ OrBlueprint my_or;
+ my_or.addChild(UP(new MyBlueprint({1}, false, 1)));
+ my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
+ my_or.addChild(UP(new MyBlueprint({3}, true, 3)));
+ for (bool strict: {true, false}) {
+ EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
+ UP(ORz({TERM({1}, strict), make_termwise(UP(OR({TERM({2}, strict), TERM({3}, strict)}, strict)), strict).release()}, strict))->asString());
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that AND can be completely termwise") {
+ auto md = make_match_data();
+ md->set_termwise_limit(0.0);
+ md->resolveTermField(1)->tagAsNotNeeded();
+ md->resolveTermField(2)->tagAsNotNeeded();
+ AndBlueprint my_and;
+ my_and.addChild(UP(new MyBlueprint({1}, true, 1)));
+ my_and.addChild(UP(new MyBlueprint({2}, true, 2)));
+ for (bool strict: {true, false}) {
+ EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(),
+ make_termwise(UP(AND({TERM({1}, strict), TERM({2}, false)}, strict)), strict)->asString());
+ }
+}
+
+TEST("require that AND can be partially termwise") {
+ auto md = make_match_data();
+ md->set_termwise_limit(0.0);
+ md->resolveTermField(1)->tagAsNotNeeded();
+ md->resolveTermField(3)->tagAsNotNeeded();
+ AndBlueprint my_and;
+ my_and.addChild(UP(new MyBlueprint({1}, true, 1)));
+ my_and.addChild(UP(new MyBlueprint({2}, true, 2)));
+ my_and.addChild(UP(new MyBlueprint({3}, true, 3)));
+ for (bool strict: {true, false}) {
+ EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(),
+ UP(ANDs({make_termwise(UP(AND({TERM({1}, strict), TERM({3}, false)}, strict)), strict).release(), TERM({2}, false)}, strict))->asString());
+ }
+}
+
+TEST("require that AND puts termwise subquery at the right place") {
+ auto md = make_match_data();
+ md->set_termwise_limit(0.0);
+ md->resolveTermField(2)->tagAsNotNeeded();
+ md->resolveTermField(3)->tagAsNotNeeded();
+ AndBlueprint my_and;
+ my_and.addChild(UP(new MyBlueprint({1}, true, 1)));
+ my_and.addChild(UP(new MyBlueprint({2}, true, 2)));
+ my_and.addChild(UP(new MyBlueprint({3}, true, 3)));
+ for (bool strict: {true, false}) {
+ EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(),
+ UP(ANDs({TERM({1}, strict), make_termwise(UP(AND({TERM({2}, false), TERM({3}, false)}, false)), false).release()}, strict))->asString());
+ }
+}
+
+TEST("require that AND can use termwise eval also when having non-termwise children") {
+ auto md = make_match_data();
+ md->set_termwise_limit(0.0);
+ md->resolveTermField(1)->tagAsNotNeeded();
+ md->resolveTermField(2)->tagAsNotNeeded();
+ md->resolveTermField(3)->tagAsNotNeeded();
+ AndBlueprint my_and;
+ my_and.addChild(UP(new MyBlueprint({1}, false, 1)));
+ my_and.addChild(UP(new MyBlueprint({2}, true, 2)));
+ my_and.addChild(UP(new MyBlueprint({3}, true, 3)));
+ for (bool strict: {true, false}) {
+ EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(),
+ UP(ANDz({TERM({1}, strict), make_termwise(UP(AND({TERM({2}, false), TERM({3}, false)}, false)), false).release()}, strict))->asString());
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that ANDNOT can be completely termwise") {
+ auto md = make_match_data();
+ md->set_termwise_limit(0.0);
+ md->resolveTermField(1)->tagAsNotNeeded();
+ AndNotBlueprint my_andnot;
+ my_andnot.addChild(UP(new MyBlueprint({1}, true, 1)));
+ my_andnot.addChild(UP(new MyBlueprint({2}, true, 2)));
+ for (bool strict: {true, false}) {
+ EXPECT_EQUAL(my_andnot.createSearch(*md, strict)->asString(),
+ make_termwise(UP(ANDNOT({TERM({1}, strict), TERM({2}, false)}, strict)), strict)->asString());
+ }
+}
+
+TEST("require that ANDNOT can be partially termwise") {
+ auto md = make_match_data();
+ md->set_termwise_limit(0.0);
+ AndNotBlueprint my_andnot;
+ my_andnot.addChild(UP(new MyBlueprint({1}, true, 1)));
+ my_andnot.addChild(UP(new MyBlueprint({2}, true, 2)));
+ my_andnot.addChild(UP(new MyBlueprint({3}, true, 3)));
+ for (bool strict: {true, false}) {
+ EXPECT_EQUAL(my_andnot.createSearch(*md, strict)->asString(),
+ UP(ANDNOT({TERM({1}, strict), make_termwise(UP(OR({TERM({2}, false), TERM({3}, false)}, false)), false).release()}, strict))->asString());
+ }
+}
+
+TEST("require that ANDNOT can be partially termwise with first child being termwise") {
+ auto md = make_match_data();
+ md->set_termwise_limit(0.0);
+ md->resolveTermField(1)->tagAsNotNeeded();
+ AndNotBlueprint my_andnot;
+ my_andnot.addChild(UP(new MyBlueprint({1}, true, 1)));
+ my_andnot.addChild(UP(new MyBlueprint({2}, false, 2)));
+ my_andnot.addChild(UP(new MyBlueprint({3}, true, 3)));
+ for (bool strict: {true, false}) {
+ EXPECT_EQUAL(my_andnot.createSearch(*md, strict)->asString(),
+ UP(ANDNOT({make_termwise(UP(ANDNOT({TERM({1}, strict), TERM({3}, false)}, strict)), strict).release(), TERM({2}, false)}, strict))->asString());
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that termwise blueprint helper calculates unpack info correctly") {
+ OrBlueprint my_or;
+ my_or.addChild(UP(new MyBlueprint({1}, false, 1))); // termwise not allowed
+ my_or.addChild(UP(new MyBlueprint({2}, false, 2))); // termwise not allowed and ranked
+ my_or.addChild(UP(new MyBlueprint({3}, true, 3)));
+ my_or.addChild(UP(new MyBlueprint({4}, true, 4))); // ranked
+ my_or.addChild(UP(new MyBlueprint({5}, true, 5)));
+ MultiSearch::Children dummy_searches(5, nullptr);
+ UnpackInfo unpack; // non-termwise unpack info
+ unpack.add(1);
+ unpack.add(3);
+ TermwiseBlueprintHelper helper(my_or, dummy_searches, unpack);
+ EXPECT_EQUAL(helper.children.size(), 3u);
+ EXPECT_EQUAL(helper.termwise.size(), 2u);
+ EXPECT_EQUAL(helper.first_termwise, 2u);
+ EXPECT_TRUE(!helper.termwise_unpack.needUnpack(0));
+ EXPECT_TRUE(helper.termwise_unpack.needUnpack(1));
+ EXPECT_TRUE(!helper.termwise_unpack.needUnpack(2));
+ EXPECT_TRUE(helper.termwise_unpack.needUnpack(3));
+ EXPECT_TRUE(!helper.termwise_unpack.needUnpack(4));
+ EXPECT_TRUE(!helper.termwise_unpack.needUnpack(5));
+}
+
+TEST("test that init range works for terwise too.") {
+ search::test::InitRangeVerifier ir;
+ ir.verify(*make_termwise(ir.createIterator(ir.getExpectedDocIds(), false), false));
+ ir.verify(*make_termwise(ir.createIterator(ir.getExpectedDocIds(), true), true));
+}
+
+//-----------------------------------------------------------------------------
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/weak_and/.gitignore b/searchlib/src/tests/queryeval/weak_and/.gitignore
new file mode 100644
index 00000000000..5bbecb89249
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/.gitignore
@@ -0,0 +1,7 @@
+/weak_and_bench
+/weak_and_test_expensive
+/parallel_weak_and_bench
+searchlib_weak_and_test_app
+searchlib_parallel_weak_and_bench_app
+searchlib_weak_and_bench_app
+searchlib_weak_and_test_expensive_app
diff --git a/searchlib/src/tests/queryeval/weak_and/CMakeLists.txt b/searchlib/src/tests/queryeval/weak_and/CMakeLists.txt
new file mode 100644
index 00000000000..b3839ac75f3
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/CMakeLists.txt
@@ -0,0 +1,30 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_weak_and_test_app
+ SOURCES
+ weak_and_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_weak_and_test_app COMMAND searchlib_weak_and_test_app)
+vespa_add_executable(searchlib_weak_and_test_expensive_app
+ SOURCES
+ weak_and_test_expensive.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_weak_and_test_expensive_app NO_VALGRIND COMMAND searchlib_weak_and_test_expensive_app)
+vespa_add_executable(searchlib_weak_and_bench_app
+ SOURCES
+ weak_and_bench.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_weak_and_test_expensive_app COMMAND searchlib_weak_and_bench_app BENCHMARK)
+vespa_add_executable(searchlib_parallel_weak_and_bench_app
+ SOURCES
+ parallel_weak_and_bench.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_weak_and_test_expensive_app COMMAND searchlib_parallel_weak_and_bench_app BENCHMARK)
diff --git a/searchlib/src/tests/queryeval/weak_and/FILES b/searchlib/src/tests/queryeval/weak_and/FILES
new file mode 100644
index 00000000000..972727bfa00
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/FILES
@@ -0,0 +1,2 @@
+weak_and_test.cpp
+weak_and_bench.cpp
diff --git a/searchlib/src/tests/queryeval/weak_and/parallel_weak_and_bench.cpp b/searchlib/src/tests/queryeval/weak_and/parallel_weak_and_bench.cpp
new file mode 100644
index 00000000000..8e5b7fc7b85
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/parallel_weak_and_bench.cpp
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "wand_bench_setup.hpp"
+
+TEST_FF("benchmark", VespaParallelWandFactory(1000), WandSetup(f1, 10, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", DotProductRiseWandFactory(1000), WandSetup(f1, 10, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", VespaParallelWandFactory(1000), WandSetup(f1, 100, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", DotProductRiseWandFactory(1000), WandSetup(f1, 100, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", VespaParallelWandFactory(1000), WandSetup(f1, 1000, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", DotProductRiseWandFactory(1000), WandSetup(f1, 1000, 10000000)) { f2.benchmark(); }
+
+TEST_FFF("benchmark", VespaParallelWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 10, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", DotProductRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 10, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", VespaParallelWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 100, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", DotProductRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 100, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", VespaParallelWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 1000, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", DotProductRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 1000, 10000000)) { f3.benchmark(); }
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/weak_and/rise_wand.h b/searchlib/src/tests/queryeval/weak_and/rise_wand.h
new file mode 100644
index 00000000000..f130f0d1012
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/rise_wand.h
@@ -0,0 +1,132 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/queryeval/wand/weak_and_search.h>
+#include <vespa/searchlib/queryeval/wand/wand_parts.h>
+#include <vespa/vespalib/util/priority_queue.h>
+#include <functional>
+
+using search::queryeval::wand::DotProductScorer;
+using search::queryeval::wand::TermFrequencyScorer;
+using namespace search::queryeval;
+
+namespace rise {
+
+struct TermFreqScorer
+{
+ static int64_t calculateMaxScore(const wand::Term &term) {
+ return TermFrequencyScorer::calculateMaxScore(term);
+ }
+ static int64_t calculateScore(const wand::Term &term, uint32_t docId) {
+ term.search->unpack(docId);
+ return term.maxScore;
+ }
+};
+
+template <typename Scorer, typename Cmp>
+class RiseWand : public search::queryeval::SearchIterator
+{
+public:
+ typedef uint32_t docid_t;
+ typedef uint64_t score_t;
+ typedef search::queryeval::wand::Terms Terms;
+ typedef search::queryeval::SearchIterator *PostingStreamPtr;
+
+private:
+ // comparator class that compares two streams. The variables a and b are
+ // logically indices into the streams vector.
+ class StreamComparator : public std::binary_function<uint16_t, uint16_t, bool>
+ {
+ private:
+ const docid_t *_streamDocIds;
+ //const addr_t *const *_streamPayloads;
+
+ public:
+ StreamComparator(const docid_t *streamDocIds);
+ //const addr_t *const *streamPayloads);
+ inline bool operator()(const uint16_t a, const uint16_t b);
+ };
+
+ // number of streams present in the query
+ uint32_t _numStreams;
+
+ // we own our substreams
+ std::vector<PostingStreamPtr> _streams;
+
+ size_t _lastPivotIdx;
+
+ // array of current doc ids for the various streams
+ docid_t *_streamDocIds;
+
+ // two arrays of indices into the _streams vector. This is used for merge.
+ // inplace_merge is not as efficient as the copy merge.
+ uint16_t *_streamIndices;
+ uint16_t *_streamIndicesAux;
+
+ // comparator that compares two streams
+ StreamComparator _streamComparator;
+
+ //-------------------------------------------------------------------------
+ // variables used for scoring and pruning
+
+ size_t _n;
+ score_t _limit;
+ score_t *_streamScores;
+ vespalib::PriorityQueue<score_t> _scores;
+ Terms _terms;
+
+ //-------------------------------------------------------------------------
+
+ /**
+ * Find the pivot feature index
+ *
+ * @param threshold score threshold
+ * @param pivotIdx pivot index
+ *
+ * @return whether a valid pivot index is found
+ */
+ bool _findPivotFeatureIdx(const score_t threshold, uint32_t &pivotIdx);
+
+ /**
+ * let the first numStreamsToMove streams in the stream
+ * vector move to the next doc, and sort them.
+ *
+ * @param numStreamsToMove the number of streams that should move
+ */
+ void _moveStreamsAndSort(const uint32_t numStreamsToMove);
+
+ /**
+ * let the first numStreamsToMove streams in the stream
+ * vector move to desiredDocId or to the first docId greater than
+ * desiredDocId if desiredDocId does not exist in this stream,
+ * and sort them.
+ *
+ * @param numStreamsToMove the number of streams that should move
+ * @param desiredDocId desired doc id
+ *
+ */
+ void _moveStreamsToDocAndSort(const uint32_t numStreamsToMove, const docid_t desiredDocId);
+
+ /**
+ * do sort and merge for WAND
+ *
+ * @param numStreamsToSort the number of streams (starting from the first one) should
+ * be sorted and then merge sort with the rest
+ *
+ */
+ void _sortMerge(const uint32_t numStreamsToSort);
+
+public:
+ RiseWand(const Terms &terms, uint32_t n);
+ virtual ~RiseWand();
+ void next();
+ virtual void doSeek(uint32_t docid);
+ virtual void doUnpack(uint32_t docid);
+};
+
+typedef RiseWand<TermFreqScorer, std::greater_equal<uint64_t> > TermFrequencyRiseWand;
+typedef RiseWand<DotProductScorer, std::greater<uint64_t> > DotProductRiseWand;
+
+} // namespacve rise
+
diff --git a/searchlib/src/tests/queryeval/weak_and/rise_wand.hpp b/searchlib/src/tests/queryeval/weak_and/rise_wand.hpp
new file mode 100644
index 00000000000..02420e6c35d
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/rise_wand.hpp
@@ -0,0 +1,238 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include "rise_wand.h"
+#include <vespa/searchlib/queryeval/wand/wand_parts.h>
+#include <math.h>
+#include <iostream>
+
+using search::queryeval::wand::TermFrequencyScorer;
+
+namespace rise {
+
+template <typename Scorer, typename Cmp>
+RiseWand<Scorer, Cmp>::RiseWand(const Terms &terms, uint32_t n)
+ : _numStreams(0),
+ _streams(),
+ _lastPivotIdx(0),
+ _streamDocIds(new docid_t[terms.size()]),
+ _streamIndices(new uint16_t[terms.size()]),
+ _streamIndicesAux(new uint16_t[terms.size()]),
+ _streamComparator(_streamDocIds),
+ _n(n),
+ _limit(1),
+ _streamScores(new score_t[terms.size()]),
+ _scores(),
+ _terms(terms)
+{
+ for (size_t i = 0; i < terms.size(); ++i) {
+ _terms[i].maxScore = Scorer::calculateMaxScore(terms[i]);
+ _streamScores[i] = _terms[i].maxScore;
+ _streams.push_back(terms[i].search);
+ }
+ _numStreams = _streams.size();
+ if (_numStreams == 0) {
+ setAtEnd();
+ }
+ for (uint32_t i=0; i<_numStreams; ++i) {
+ _streamIndices[i] = i;
+ }
+ for (uint32_t i=0; i<_numStreams; ++i) {
+ _streamDocIds[i] = _streams[i]->getDocId();
+ }
+ std::sort(_streamIndices, _streamIndices+_numStreams, _streamComparator);
+}
+
+template <typename Scorer, typename Cmp>
+RiseWand<Scorer, Cmp>::~RiseWand()
+{
+ for (size_t i = 0; i < _streams.size(); ++i) {
+ delete _streams[i];
+ }
+ delete [] _streamScores;
+ delete [] _streamIndicesAux;
+ delete [] _streamIndices;
+ delete [] _streamDocIds;
+}
+
+template <typename Scorer, typename Cmp>
+void
+RiseWand<Scorer, Cmp>::next()
+{
+
+ // We do not check whether the stream is already at the end
+ // here based on the assumption that application won't call
+ // next() for streams that are already at the end, or atleast
+ // won't do this frequently.
+
+ uint32_t pivotIdx;
+ docid_t pivotDocId = search::endDocId;
+ score_t threshold = _limit;
+
+ while (true) {
+
+ if (!_findPivotFeatureIdx(threshold, pivotIdx)) {
+ setAtEnd();
+ return;
+ }
+
+ pivotDocId = _streamDocIds[_streamIndices[pivotIdx]];
+
+ if (_streamDocIds[_streamIndices[0]] == _streamDocIds[_streamIndices[pivotIdx]]) {
+
+ // Found candidate. All cursors before (*_streams)[pivotIdx] point to
+ // the same doc and this doc is the candidate for full evaluation.
+ setDocId(pivotDocId);
+
+ // Advance pivotIdx sufficiently so that all instances of pivotDocId are included
+ while (pivotIdx < _numStreams-1 && _streamDocIds[_streamIndices[pivotIdx+1]] == pivotDocId) {
+ ++pivotIdx;
+ }
+
+ _lastPivotIdx = pivotIdx;
+ return; // scoring and threshold adjustment is done in doUnpack
+
+ } else { // not all cursors upto the pivot are aligned at the same doc yet
+
+ // decreases pivotIdx to the first stream pointing at the pivotDocId
+ while (pivotIdx && _streamDocIds[_streamIndices[pivotIdx-1]] == pivotDocId) {
+ --pivotIdx;
+ }
+
+ _moveStreamsToDocAndSort(pivotIdx, pivotDocId);
+ }
+
+ } /* while (true) */
+}
+
+template <typename Scorer, typename Cmp>
+bool
+RiseWand<Scorer, Cmp>::_findPivotFeatureIdx(const score_t threshold, uint32_t &pivotIdx)
+{
+ uint32_t idx;
+ score_t accumUB = 0;
+ for (idx=0;
+ !Cmp()(accumUB, threshold) && idx < _numStreams;
+ ++idx) {
+ accumUB += _streamScores[_streamIndices[idx]];
+ }
+
+ if( Cmp()(accumUB, threshold) ) {
+ pivotIdx = idx - 1;
+ return true;
+ }
+ return false;
+}
+
+template <typename Scorer, typename Cmp>
+void
+RiseWand<Scorer, Cmp>::_moveStreamsAndSort(const uint32_t numStreamsToMove)
+{
+ for (uint32_t i=0; i<numStreamsToMove; ++i) {
+ _streams[_streamIndices[i]]->seek(_streams[_streamIndices[i]]->getDocId() + 1);
+ _streamDocIds[_streamIndices[i]] = _streams[_streamIndices[i]]->getDocId();
+ }
+ _sortMerge(numStreamsToMove);
+}
+
+template <typename Scorer, typename Cmp>
+void
+RiseWand<Scorer, Cmp>::_moveStreamsToDocAndSort(const uint32_t numStreamsToMove,
+ const docid_t desiredDocId)
+{
+ for (uint32_t i=0; i<numStreamsToMove; ++i) {
+ _streams[_streamIndices[i]]->seek(desiredDocId);
+ _streamDocIds[_streamIndices[i]] = _streams[_streamIndices[i]]->getDocId();
+ }
+ _sortMerge(numStreamsToMove);
+}
+
+template <typename Scorer, typename Cmp>
+inline
+void RiseWand<Scorer, Cmp>::_sortMerge(const uint32_t numStreamsToMove)
+{
+ for (uint32_t i=0; i<numStreamsToMove; ++i) {
+ _streamIndicesAux[i] = _streamIndices[i];
+ }
+ std::sort(_streamIndicesAux, _streamIndicesAux+numStreamsToMove, _streamComparator);
+
+ uint16_t j=numStreamsToMove, k=0, i=0;
+ while (i < numStreamsToMove && j < _numStreams) {
+ if (_streamComparator(_streamIndicesAux[i], _streamIndices[j])) {
+ _streamIndices[k++] = _streamIndicesAux[i++];
+ }
+ else {
+ _streamIndices[k++] = _streamIndices[j++];
+ }
+ }
+
+ if (j == _numStreams) {
+ while (i < numStreamsToMove) {
+ _streamIndices[k++] = _streamIndicesAux[i++];
+ }
+ }
+
+ while (_numStreams &&
+ _streamDocIds[_streamIndices[_numStreams-1]] == search::endDocId) {
+ --_numStreams;
+ }
+}
+
+template <typename Scorer, typename Cmp>
+void
+RiseWand<Scorer, Cmp>::doSeek(uint32_t docid)
+{
+ if (getDocId() != beginId() && (docid - 1) == getDocId()) {
+ _moveStreamsAndSort(_lastPivotIdx + 1);
+ } else {
+ _moveStreamsToDocAndSort(_numStreams, docid);
+ }
+ next();
+}
+
+template <typename Scorer, typename Cmp>
+void
+RiseWand<Scorer, Cmp>::doUnpack(uint32_t docid)
+{
+ score_t score = 0;
+ for (size_t i = 0; i <= _lastPivotIdx; ++i) {
+ score += Scorer::calculateScore(_terms[_streamIndices[i]], docid);
+ }
+ if (_scores.size() < _n || _scores.front() < score) {
+ _scores.push(score);
+ if (_scores.size() > _n) {
+ _scores.pop_front();
+ }
+ if (_scores.size() == _n) {
+ _limit = _scores.front();
+ }
+ }
+}
+
+/**
+ ************ BEGIN STREAM COMPARTOR *********************
+ */
+template <typename Scorer, typename Cmp>
+RiseWand<Scorer, Cmp>::StreamComparator::StreamComparator(
+ const docid_t *streamDocIds)
+ : _streamDocIds(streamDocIds)
+{
+}
+
+template <typename Scorer, typename Cmp>
+inline bool
+RiseWand<Scorer, Cmp>::StreamComparator::operator()(const uint16_t a,
+ const uint16_t b)
+{
+ if (_streamDocIds[a] < _streamDocIds[b]) return true;
+ return false;
+}
+
+/**
+ ************ END STREAM COMPARTOR *********************
+ */
+
+} // namespace rise
+
diff --git a/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp b/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp
new file mode 100644
index 00000000000..4c7116edfc4
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp
@@ -0,0 +1,248 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/queryeval/andsearch.h>
+#include <vespa/searchlib/queryeval/andnotsearch.h>
+#include <vespa/searchlib/queryeval/fake_search.h>
+#include <vespa/searchlib/queryeval/orsearch.h>
+#include <vespa/searchlib/queryeval/wand/parallel_weak_and_search.h>
+#include <vespa/searchlib/queryeval/simpleresult.h>
+#include <vespa/searchlib/queryeval/wand/weak_and_search.h>
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include "rise_wand.h"
+#include "rise_wand.hpp"
+
+using namespace search::fef;
+using namespace search::queryeval;
+using namespace vespalib;
+
+typedef ParallelWeakAndSearch::MatchParams PWMatchParams;
+typedef ParallelWeakAndSearch::RankParams PWRankParams;
+
+namespace {
+
+struct Stats {
+ size_t hitCnt;
+ size_t seekCnt;
+ size_t unpackCnt;
+ size_t skippedDocs;
+ size_t skippedHits;
+ Stats() : hitCnt(0), seekCnt(0), unpackCnt(0),
+ skippedDocs(0), skippedHits(0) {}
+ void hit() {
+ ++hitCnt;
+ }
+ void seek(size_t docs, size_t hits) {
+ ++seekCnt;
+ skippedDocs += docs;
+ skippedHits += hits;
+ }
+ void unpack() {
+ ++unpackCnt;
+ }
+ void print() {
+ fprintf(stderr, "Stats: hits=%zu, seeks=%zu, unpacks=%zu, skippedDocs=%zu, skippedHits=%zu\n",
+ hitCnt, seekCnt, unpackCnt, skippedDocs, skippedHits);
+ }
+};
+
+struct ModSearch : SearchIterator {
+ Stats &stats;
+ uint32_t step;
+ uint32_t limit;
+ MinMaxPostingInfo info;
+ TermFieldMatchData *tfmd;
+ ModSearch(Stats &stats_in, uint32_t step_in, uint32_t limit_in, int32_t maxWeight, TermFieldMatchData *tfmd_in)
+ : stats(stats_in), step(step_in), limit(limit_in), info(0, maxWeight), tfmd(tfmd_in) { }
+ void initRange(uint32_t begin, uint32_t end) override {
+ SearchIterator::initRange(begin, end);
+ setDocId(step);
+ }
+ virtual void doSeek(uint32_t docid) {
+ assert(docid > getDocId());
+ uint32_t skippedDocs = (docid - getDocId() - 1);
+ uint32_t skippedHits = (skippedDocs / step);
+ stats.seek(skippedDocs, skippedHits);
+ uint32_t hit = (docid / step) * step;
+ if (hit < docid) {
+ hit += step;
+ }
+ if (hit < limit) {
+ assert(hit >= docid);
+ setDocId(hit);
+ } else {
+ setAtEnd();
+ }
+ }
+ virtual void doUnpack(uint32_t docid) {
+ if (tfmd != NULL) {
+ tfmd->reset(docid);
+ search::fef::TermFieldMatchDataPosition pos;
+ pos.setElementWeight(info.getMaxWeight());
+ tfmd->appendPosition(pos);
+ }
+ stats.unpack();
+ }
+ virtual const PostingInfo *getPostingInfo() const { return &info; }
+};
+
+struct WandFactory {
+ virtual std::string name() const = 0;
+ virtual SearchIterator::UP create(const wand::Terms &terms) = 0;
+ virtual ~WandFactory() {}
+};
+
+struct VespaWandFactory : WandFactory {
+ uint32_t n;
+ VespaWandFactory(uint32_t n_in) : n(n_in) {}
+ virtual std::string name() const { return make_string("VESPA WAND (n=%u)", n); }
+ virtual SearchIterator::UP create(const wand::Terms &terms) {
+ return SearchIterator::UP(WeakAndSearch::create(terms, n, true));
+ }
+};
+
+struct VespaArrayWandFactory : WandFactory {
+ uint32_t n;
+ VespaArrayWandFactory(uint32_t n_in) : n(n_in) {}
+ virtual std::string name() const { return make_string("VESPA ARRAY WAND (n=%u)", n); }
+ virtual SearchIterator::UP create(const wand::Terms &terms) {
+ return SearchIterator::UP(WeakAndSearch::createArrayWand(terms, n, true));
+ }
+};
+
+struct VespaHeapWandFactory : WandFactory {
+ uint32_t n;
+ VespaHeapWandFactory(uint32_t n_in) : n(n_in) {}
+ virtual std::string name() const { return make_string("VESPA HEAP WAND (n=%u)", n); }
+ virtual SearchIterator::UP create(const wand::Terms &terms) {
+ return SearchIterator::UP(WeakAndSearch::createHeapWand(terms, n, true));
+ }
+};
+
+struct VespaParallelWandFactory : public WandFactory {
+ SharedWeakAndPriorityQueue scores;
+ TermFieldMatchData rootMatchData;
+ VespaParallelWandFactory(uint32_t n) : scores(n), rootMatchData() {}
+ virtual std::string name() const { return make_string("VESPA PWAND (n=%u)", scores.getScoresToTrack()); }
+ virtual SearchIterator::UP create(const wand::Terms &terms) {
+ return SearchIterator::UP(ParallelWeakAndSearch::create(terms,
+ PWMatchParams(scores, 0, 1, 1),
+ PWRankParams(rootMatchData, MatchData::UP()), true));
+ }
+};
+
+struct VespaParallelArrayWandFactory : public VespaParallelWandFactory {
+ VespaParallelArrayWandFactory(uint32_t n) : VespaParallelWandFactory(n) {}
+ virtual std::string name() const { return make_string("VESPA ARRAY PWAND (n=%u)", scores.getScoresToTrack()); }
+ virtual SearchIterator::UP create(const wand::Terms &terms) {
+ return SearchIterator::UP(ParallelWeakAndSearch::createArrayWand(terms,
+ PWMatchParams(scores, 0, 1, 1),
+ PWRankParams(rootMatchData, MatchData::UP()), true));
+ }
+};
+
+struct VespaParallelHeapWandFactory : public VespaParallelWandFactory {
+ VespaParallelHeapWandFactory(uint32_t n) : VespaParallelWandFactory(n) {}
+ virtual std::string name() const { return make_string("VESPA HEAP PWAND (n=%u)", scores.getScoresToTrack()); }
+ virtual SearchIterator::UP create(const wand::Terms &terms) {
+ return SearchIterator::UP(ParallelWeakAndSearch::createHeapWand(terms,
+ PWMatchParams(scores, 0, 1, 1),
+ PWRankParams(rootMatchData, MatchData::UP()), true));
+ }
+};
+
+struct TermFrequencyRiseWandFactory : WandFactory {
+ uint32_t n;
+ TermFrequencyRiseWandFactory(uint32_t n_in) : n(n_in) {}
+ virtual std::string name() const { return make_string("RISE WAND TF (n=%u)", n); }
+ virtual SearchIterator::UP create(const wand::Terms &terms) {
+ return SearchIterator::UP(new rise::TermFrequencyRiseWand(terms, n));
+ }
+};
+
+struct DotProductRiseWandFactory : WandFactory {
+ uint32_t n;
+ DotProductRiseWandFactory(uint32_t n_in) : n(n_in) {}
+ virtual std::string name() const { return make_string("RISE WAND DP (n=%u)", n); }
+ virtual SearchIterator::UP create(const wand::Terms &terms) {
+ return SearchIterator::UP(new rise::DotProductRiseWand(terms, n));
+ }
+};
+
+struct FilterFactory : WandFactory {
+ WandFactory &factory;
+ Stats stats;
+ uint32_t n;
+ FilterFactory(WandFactory &f, uint32_t n_in) : factory(f), n(n_in) {}
+ virtual std::string name() const { return make_string("Filter (mod=%u) [%s]", n, factory.name().c_str()); }
+ virtual SearchIterator::UP create(const wand::Terms &terms) {
+ AndNotSearch::Children children;
+ children.push_back(factory.create(terms).release());
+ children.push_back(new ModSearch(stats, n, search::endDocId, n, NULL));
+ return SearchIterator::UP(AndNotSearch::create(children, true));
+ }
+};
+
+struct Setup {
+ Stats stats;
+ double minTimeMs;
+ Setup() : stats(), minTimeMs(10000000.0) {}
+ virtual ~Setup() {}
+ virtual std::string name() const = 0;
+ virtual SearchIterator::UP create() = 0;
+ void perform() {
+ SearchIterator::UP search = create();
+ SearchIterator &sb = *search;
+ FastOS_Time timer;
+ timer.SetNow();
+ for (sb.seek(1); !sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) {
+ stats.hit();
+ sb.unpack(sb.getDocId());
+ }
+ double ms = timer.MilliSecsToNow();
+ if (ms < minTimeMs) {
+ minTimeMs = ms;
+ }
+ }
+ void benchmark() {
+ fprintf(stderr, "running benchmark for %s...\n", name().c_str());
+ for (size_t i = 0; i < 5; ++i) {
+ perform();
+ if (i == 0) {
+ stats.print();
+ }
+ }
+ fprintf(stderr, "time (ms): %g\n", minTimeMs);
+ }
+};
+
+struct WandSetup : Setup {
+ WandFactory &factory;
+ uint32_t childCnt;
+ uint32_t limit;
+ uint32_t weight;
+ MatchData::UP matchData;
+ WandSetup(WandFactory &f, uint32_t c, uint32_t l) : Setup(), factory(f), childCnt(c), limit(l), weight(100), matchData() {}
+ virtual std::string name() const {
+ return make_string("Wand Setup (terms=%u,docs=%u) [%s]", childCnt, limit, factory.name().c_str());
+ }
+ virtual SearchIterator::UP create() {
+ MatchDataLayout layout;
+ std::vector<TermFieldHandle> handles;
+ for (size_t i = 0; i < childCnt; ++i) {
+ handles.push_back(layout.allocTermField(0));
+ }
+ matchData = layout.createMatchData();
+ wand::Terms terms;
+ for (size_t i = 1; i <= childCnt; ++i) {
+ TermFieldMatchData *tfmd = matchData->resolveTermField(handles[i-1]);
+ terms.push_back(wand::Term(new ModSearch(stats, i, limit, i, tfmd), weight, limit / i, tfmd));
+ }
+ return factory.create(terms);
+ }
+};
+
+} // namespace <unnamed>
diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_bench.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_bench.cpp
new file mode 100644
index 00000000000..1eba66a524f
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/weak_and_bench.cpp
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "wand_bench_setup.hpp"
+
+TEST_FF("benchmark", VespaWandFactory(1000), WandSetup(f1, 10, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", TermFrequencyRiseWandFactory(1000), WandSetup(f1, 10, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", VespaWandFactory(1000), WandSetup(f1, 100, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", TermFrequencyRiseWandFactory(1000), WandSetup(f1, 100, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", VespaWandFactory(1000), WandSetup(f1, 1000, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", TermFrequencyRiseWandFactory(1000), WandSetup(f1, 1000, 10000000)) { f2.benchmark(); }
+
+TEST_FFF("benchmark", VespaWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 10, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", TermFrequencyRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 10, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", VespaWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 100, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", TermFrequencyRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 100, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", VespaWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 1000, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", TermFrequencyRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 1000, 10000000)) { f3.benchmark(); }
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp
new file mode 100644
index 00000000000..3c64db1eb84
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp
@@ -0,0 +1,128 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/queryeval/fake_search.h>
+#include <vespa/searchlib/queryeval/wand/weak_and_search.h>
+#include <vespa/searchlib/queryeval/simpleresult.h>
+#include <vespa/searchlib/queryeval/simplesearch.h>
+#include <vespa/searchlib/queryeval/test/eagerchild.h>
+#include <vespa/searchlib/queryeval/test/leafspec.h>
+#include <vespa/searchlib/queryeval/test/searchhistory.h>
+#include <vespa/searchlib/queryeval/test/trackedsearch.h>
+#include <vespa/searchlib/queryeval/test/wandspec.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/test/initrange.h>
+
+using namespace search::fef;
+using namespace search::queryeval;
+using namespace search::queryeval::test;
+using search::test::InitRangeVerifier;
+
+typedef SearchHistory History;
+
+namespace {
+
+struct MyWandSpec : public WandSpec
+{
+ uint32_t n;
+
+ MyWandSpec(uint32_t n_) : WandSpec(), n(n_) {}
+ SearchIterator *create() {
+ return new TrackedSearch("WAND", getHistory(), WeakAndSearch::create(getTerms(), n, true));
+ }
+};
+
+struct SimpleWandFixture {
+ MyWandSpec spec;
+ SimpleResult hits;
+ SimpleWandFixture() : spec(2), hits() {
+ spec.leaf(LeafSpec("foo").doc(1).doc(2).doc(3).doc(4).doc(5).doc(6));
+ spec.leaf(LeafSpec("bar").doc(1).doc(3).doc(5));
+ SearchIterator::UP search(spec.create());
+ hits.search(*search);
+ }
+};
+
+struct AdvancedWandFixture {
+ MyWandSpec spec;
+ SimpleResult hits;
+ AdvancedWandFixture() : spec(100), hits() {
+ spec.leaf(LeafSpec("1").doc(1).doc(11).doc(111));
+ spec.leaf(LeafSpec("2").doc(2).doc(12).doc(112));
+ spec.leaf(LeafSpec("3").doc(3).doc(13).doc(113));
+ spec.leaf(LeafSpec("4").doc(4).doc(14).doc(114));
+ spec.leaf(LeafSpec("5").doc(5).doc(15).doc(115));
+ SearchIterator::UP search(spec.create());
+ hits.search(*search);
+ }
+};
+
+struct WeightOrder {
+ bool operator()(const wand::Term &t1, const wand::Term &t2) const {
+ return (t1.weight < t2.weight);
+ }
+};
+
+} // namespace <unnamed>
+
+TEST_F("require that wand prunes bad hits after enough good ones are obtained", SimpleWandFixture) {
+ EXPECT_EQUAL(SimpleResult().addHit(1).addHit(2).addHit(3).addHit(5), f.hits);
+}
+
+TEST_F("require that wand uses subsearches as expected", SimpleWandFixture) {
+ EXPECT_EQUAL(History()
+ .seek("WAND", 1).seek("bar", 1).step("bar", 1).step("WAND", 1)
+ .unpack("WAND", 1).seek("foo", 1).step("foo", 1).unpack("bar", 1).unpack("foo", 1)
+ .seek("WAND", 2).seek("bar", 2).step("bar", 3).seek("foo", 2).step("foo", 2).step("WAND", 2)
+ .unpack("WAND", 2).unpack("foo", 2)
+ .seek("WAND", 3).step("WAND", 3)
+ .unpack("WAND", 3).seek("foo", 3).step("foo", 3).unpack("bar", 3).unpack("foo", 3)
+ .seek("WAND", 4).seek("bar", 4).step("bar", 5).seek("foo", 5).step("foo", 5).step("WAND", 5)
+ .unpack("WAND", 5).unpack("bar", 5).unpack("foo", 5)
+ .seek("WAND", 6).seek("bar", 6).step("bar", search::endDocId).step("WAND", search::endDocId),
+ f.spec.getHistory());
+}
+
+TEST_F("require that documents are considered in the right order", AdvancedWandFixture) {
+ EXPECT_EQUAL(SimpleResult()
+ .addHit(1).addHit(2).addHit(3).addHit(4).addHit(5)
+ .addHit(11).addHit(12).addHit(13).addHit(14).addHit(15)
+ .addHit(111).addHit(112).addHit(113).addHit(114).addHit(115), f.hits);
+}
+
+TEST("require that initial docid for subsearches are taken into account") {
+ History history;
+ wand::Terms terms;
+ terms.push_back(wand::Term(new TrackedSearch("foo", history, new EagerChild(search::endDocId)), 100, 1));
+ terms.push_back(wand::Term(new TrackedSearch("bar", history, new EagerChild(10)), 100, 2));
+ SearchIterator::UP search(new TrackedSearch("WAND", history, WeakAndSearch::create(terms, 2, true)));
+ SimpleResult hits;
+ hits.search(*search);
+ EXPECT_EQUAL(SimpleResult().addHit(10), hits);
+ EXPECT_EQUAL(History().seek("WAND", 1).step("WAND", 10).unpack("WAND", 10).unpack("bar", 10)
+ .seek("WAND", 11).seek("bar", 11).step("bar", search::endDocId).step("WAND", search::endDocId),
+ history);
+}
+
+TEST("verify initRange with search iterator children") {
+ const size_t num_children = 7;
+ InitRangeVerifier ir;
+ using DocIds = InitRangeVerifier::DocIds;
+ std::vector<DocIds> split_lists(num_children);
+ auto full_list = ir.getExpectedDocIds();
+ for (size_t i = 0; i < full_list.size(); ++i) {
+ split_lists[i % num_children].push_back(full_list[i]);
+ }
+ for (bool strict: {false, true}) {
+ wand::Terms terms;
+ for (size_t i = 0; i < num_children; ++i) {
+ terms.emplace_back(ir.createIterator(split_lists[i], strict).release(),
+ 100, split_lists[i].size());
+ }
+ SearchIterator::UP itr(WeakAndSearch::create(terms, -1, strict));
+ ir.verify(*itr);
+ }
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp
new file mode 100644
index 00000000000..8f60b6dd8c7
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp
@@ -0,0 +1,102 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "wand_bench_setup.hpp"
+
+using namespace rise;
+
+namespace {
+
+template <typename WeakAndType, typename RiseType>
+void checkWandHits(WandFactory &vespa, WandFactory &rise, uint32_t step, uint32_t filter) {
+ WandSetup vespaSetup(vespa, 500, 5000000);
+ WandSetup riseSetup(rise, 500, 5000000);
+ SearchIterator::UP s1 = vespaSetup.create();
+ s1->initFullRange();
+ SearchIterator::UP s2 = riseSetup.create();
+ s2->initFullRange();
+ ASSERT_TRUE(dynamic_cast<WeakAndType*>(s1.get()) != 0);
+ ASSERT_TRUE(dynamic_cast<WeakAndType*>(s2.get()) == 0);
+ ASSERT_TRUE(dynamic_cast<RiseType*>(s2.get()) != 0);
+ ASSERT_TRUE(dynamic_cast<RiseType*>(s1.get()) == 0);
+ s1->seek(1);
+ s2->seek(1);
+ while (!s1->isAtEnd() &&
+ !s2->isAtEnd())
+ {
+ ASSERT_EQUAL(s1->getDocId(), s2->getDocId());
+ if ((filter == 0) || ((s1->getDocId() % filter) != 0)) {
+ s1->unpack(s1->getDocId());
+ s2->unpack(s2->getDocId());
+ }
+ s1->seek(s1->getDocId() + step);
+ s2->seek(s2->getDocId() + step);
+ }
+ ASSERT_TRUE(s1->isAtEnd());
+ ASSERT_TRUE(s2->isAtEnd());
+}
+
+} // namespace <unnamed>
+
+TEST("require that mod search works") {
+ Stats stats;
+ SearchIterator::UP search(new ModSearch(stats, 3, 8, 3, NULL));
+ SimpleResult hits;
+ hits.search(*search);
+ EXPECT_EQUAL(SimpleResult().addHit(3).addHit(6), hits);
+}
+
+//---- WeakAndSearch ------------------------------------------------------------------------------
+
+TEST_FF("require that (array) WAND and RISE WAND gives the same hits",
+ VespaArrayWandFactory(500), TermFrequencyRiseWandFactory(500))
+{
+ checkWandHits<WeakAndSearch, TermFrequencyRiseWand>(f1, f2, 1, 0);
+}
+
+TEST_FF("require that (heap) WAND and RISE WAND gives the same hits",
+ VespaHeapWandFactory(500), TermFrequencyRiseWandFactory(500))
+{
+ checkWandHits<WeakAndSearch, TermFrequencyRiseWand>(f1, f2, 1, 0);
+}
+
+TEST_FF("require that (array) WAND and RISE WAND gives the same hits with filtering and skipping",
+ VespaArrayWandFactory(500), TermFrequencyRiseWandFactory(500))
+{
+ checkWandHits<WeakAndSearch, TermFrequencyRiseWand>(f1, f2, 123, 5);
+}
+
+TEST_FF("require that (heap) WAND and RISE WAND gives the same hits with filtering and skipping",
+ VespaHeapWandFactory(500), TermFrequencyRiseWandFactory(500))
+{
+ checkWandHits<WeakAndSearch, TermFrequencyRiseWand>(f1, f2, 123, 5);
+}
+
+
+//---- ParallelWeakAndSearch ----------------------------------------------------------------------
+
+TEST_FF("require that (array) PWAND and RISE WAND gives the same hits",
+ VespaParallelArrayWandFactory(500), DotProductRiseWandFactory(500))
+{
+ checkWandHits<ParallelWeakAndSearch, DotProductRiseWand>(f1, f2, 1, 0);
+}
+
+TEST_FF("require that (heap) PWAND and RISE WAND gives the same hits",
+ VespaParallelHeapWandFactory(500), DotProductRiseWandFactory(500))
+{
+ checkWandHits<ParallelWeakAndSearch, DotProductRiseWand>(f1, f2, 1, 0);
+}
+
+TEST_FF("require that (array) PWAND and RISE WAND gives the same hits with filtering and skipping",
+ VespaParallelArrayWandFactory(500), DotProductRiseWandFactory(500))
+{
+ checkWandHits<ParallelWeakAndSearch, DotProductRiseWand>(f1, f2, 123, 5);
+}
+
+TEST_FF("require that (heap) PWAND and RISE WAND gives the same hits with filtering and skipping",
+ VespaParallelHeapWandFactory(500), DotProductRiseWandFactory(500))
+{
+ checkWandHits<ParallelWeakAndSearch, DotProductRiseWand>(f1, f2, 123, 5);
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/weak_and_heap/.gitignore b/searchlib/src/tests/queryeval/weak_and_heap/.gitignore
new file mode 100644
index 00000000000..b10f1cb370d
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_heap/.gitignore
@@ -0,0 +1 @@
+searchlib_weak_and_heap_test_app
diff --git a/searchlib/src/tests/queryeval/weak_and_heap/CMakeLists.txt b/searchlib/src/tests/queryeval/weak_and_heap/CMakeLists.txt
new file mode 100644
index 00000000000..cacf4987aff
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_heap/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_weak_and_heap_test_app
+ SOURCES
+ weak_and_heap_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_weak_and_heap_test_app COMMAND searchlib_weak_and_heap_test_app)
diff --git a/searchlib/src/tests/queryeval/weak_and_heap/DESC b/searchlib/src/tests/queryeval/weak_and_heap/DESC
new file mode 100644
index 00000000000..447bfc21e7c
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_heap/DESC
@@ -0,0 +1 @@
+weak_and_heap test. Take a look at weak_and_heap_test.cpp for details.
diff --git a/searchlib/src/tests/queryeval/weak_and_heap/FILES b/searchlib/src/tests/queryeval/weak_and_heap/FILES
new file mode 100644
index 00000000000..05d3f4c5df0
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_heap/FILES
@@ -0,0 +1 @@
+weak_and_heap_test.cpp
diff --git a/searchlib/src/tests/queryeval/weak_and_heap/weak_and_heap_test.cpp b/searchlib/src/tests/queryeval/weak_and_heap/weak_and_heap_test.cpp
new file mode 100644
index 00000000000..ee44abf2b27
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_heap/weak_and_heap_test.cpp
@@ -0,0 +1,101 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/queryeval/wand/weak_and_heap.h>
+
+using namespace search::queryeval;
+typedef wand::score_t score_t;
+
+struct Scores : public std::vector<score_t> {
+ Scores &s(score_t val) {
+ push_back(val);
+ return *this;
+ }
+};
+
+void
+adjust(WeakAndHeap &heap, const Scores &scores)
+{
+ Scores tmp = scores;
+ heap.adjust(&tmp[0], &tmp[0] + tmp.size());
+}
+
+void
+assertScores(const Scores &exp, SharedWeakAndPriorityQueue &heap)
+{
+ ASSERT_EQUAL(exp.size(), heap.getScores().size());
+ for (size_t i = 0; i < exp.size(); ++i) {
+ score_t front = heap.getScores().front();
+ EXPECT_EQUAL(exp[i], front);
+ heap.getScores().pop_front();
+ }
+}
+
+struct NullFixture {
+ SharedWeakAndPriorityQueue h;
+ NullFixture() : h(0) {}
+};
+
+struct EmptyFixture {
+ SharedWeakAndPriorityQueue h;
+ EmptyFixture() : h(4) {}
+};
+
+struct FilledFixture {
+ SharedWeakAndPriorityQueue h;
+ FilledFixture() : h(4) {
+ adjust(h, Scores().s(3).s(5).s(7).s(9));
+ EXPECT_EQUAL(3, h.getMinScore());
+ }
+};
+
+TEST_F("require that SharedWeakAndPriorityQueue with 0 size gives max threshold", NullFixture)
+{
+ EXPECT_EQUAL(std::numeric_limits<score_t>::max(), f.h.getMinScore());
+ adjust(f.h, Scores().s(100));
+ EXPECT_EQUAL(std::numeric_limits<score_t>::max(), f.h.getMinScore());
+}
+
+TEST_F("require that SharedWeakAndPriorityQueue can be filled one-by-one", EmptyFixture)
+{
+ adjust(f.h, Scores().s(4));
+ EXPECT_EQUAL(0, f.h.getMinScore());
+ adjust(f.h, Scores().s(3));
+ EXPECT_EQUAL(0, f.h.getMinScore());
+ adjust(f.h, Scores().s(2));
+ EXPECT_EQUAL(0, f.h.getMinScore());
+ adjust(f.h, Scores().s(1));
+ EXPECT_EQUAL(1, f.h.getMinScore());
+ assertScores(Scores().s(1).s(2).s(3).s(4), f.h);
+}
+
+TEST_F("require that SharedWeakAndPriorityQueue can be filled all-at-once", EmptyFixture)
+{
+ adjust(f.h, Scores().s(4).s(3).s(2).s(1));
+ EXPECT_EQUAL(1, f.h.getMinScore());
+ assertScores(Scores().s(1).s(2).s(3).s(4), f.h);
+}
+
+TEST_F("require that SharedWeakAndPriorityQueue can be adjusted one-by-one", FilledFixture)
+{
+ adjust(f.h, Scores().s(2));
+ EXPECT_EQUAL(3, f.h.getMinScore());
+ adjust(f.h, Scores().s(3));
+ EXPECT_EQUAL(3, f.h.getMinScore());
+ adjust(f.h, Scores().s(6));
+ EXPECT_EQUAL(5, f.h.getMinScore());
+ adjust(f.h, Scores().s(8));
+ EXPECT_EQUAL(6, f.h.getMinScore());
+ adjust(f.h, Scores().s(4));
+ EXPECT_EQUAL(6, f.h.getMinScore());
+ assertScores(Scores().s(6).s(7).s(8).s(9), f.h);
+}
+
+TEST_F("require that SharedWeakAndPriorityQueue can be adjusted all-at-once", FilledFixture)
+{
+ adjust(f.h, Scores().s(2).s(3).s(6).s(8).s(4));
+ EXPECT_EQUAL(6, f.h.getMinScore());
+ assertScores(Scores().s(6).s(7).s(8).s(9), f.h);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/.gitignore b/searchlib/src/tests/queryeval/weak_and_scorers/.gitignore
new file mode 100644
index 00000000000..18fa7afeed4
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_scorers/.gitignore
@@ -0,0 +1 @@
+searchlib_weak_and_scorers_test_app
diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/CMakeLists.txt b/searchlib/src/tests/queryeval/weak_and_scorers/CMakeLists.txt
new file mode 100644
index 00000000000..74a37c8fce8
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_scorers/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_weak_and_scorers_test_app
+ SOURCES
+ weak_and_scorers_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_weak_and_scorers_test_app COMMAND searchlib_weak_and_scorers_test_app)
diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/DESC b/searchlib/src/tests/queryeval/weak_and_scorers/DESC
new file mode 100644
index 00000000000..ceaf1028aae
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_scorers/DESC
@@ -0,0 +1 @@
+weak_and_scorers test. Take a look at weak_and_scorers_test.cpp for details.
diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/FILES b/searchlib/src/tests/queryeval/weak_and_scorers/FILES
new file mode 100644
index 00000000000..7f3b71a9f34
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_scorers/FILES
@@ -0,0 +1 @@
+weak_and_scorers_test.cpp
diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp
new file mode 100644
index 00000000000..2dec1762c27
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/wand/wand_parts.h>
+
+using namespace search::queryeval;
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataPosition;
+
+typedef wand::Term Term;
+
+struct TestIterator : public SearchIterator
+{
+ MinMaxPostingInfo _info;
+ int32_t _termWeight;
+ bool _useInfo;
+ TermFieldMatchData _tfmd;
+ uint32_t _unpackDocId;
+
+ typedef std::unique_ptr<TestIterator> UP;
+ TestIterator(int32_t maxWeight, int32_t termWeight, bool useInfo)
+ : _info(0, maxWeight),
+ _termWeight(termWeight),
+ _useInfo(useInfo),
+ _unpackDocId(0)
+ {}
+ virtual void doSeek(uint32_t docId) {
+ (void) docId;
+ }
+ virtual void doUnpack(uint32_t docId) {
+ _unpackDocId = docId;
+ _tfmd.appendPosition(TermFieldMatchDataPosition(0, 0, _termWeight, 1));
+ }
+ virtual const PostingInfo *getPostingInfo() const {
+ return (_useInfo ? &_info : NULL);
+ }
+ static UP create(int32_t maxWeight, int32_t termWeight, bool useInfo) {
+ return UP(new TestIterator(maxWeight, termWeight, useInfo));
+ }
+};
+
+TEST("require that DotProductScorer calculates max score")
+{
+ TestIterator::UP itr = TestIterator::create(10, 0, true);
+ Term term(itr.get(), 5, 0);
+ EXPECT_EQUAL(50, wand::DotProductScorer::calculateMaxScore(term));
+}
+
+TEST("require that DotProductScorer uses default max weight when not available in search iterator")
+{
+ TestIterator::UP itr = TestIterator::create(10, 0, false);
+ Term term(itr.get(), 5, 0);
+ int64_t exp = (int64_t)5 * std::numeric_limits<int32_t>::max();
+ EXPECT_EQUAL(exp, wand::DotProductScorer::calculateMaxScore(term));
+}
+
+TEST("require that DotProductScorer calculates term score")
+{
+ TestIterator::UP itr = TestIterator::create(0, 7, false);
+ Term term(itr.get(), 5, 0, &itr->_tfmd);
+ EXPECT_EQUAL(35, wand::DotProductScorer::calculateScore(term, 11));
+ EXPECT_EQUAL(11u, itr->_unpackDocId);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/weighted_set_term/.gitignore b/searchlib/src/tests/queryeval/weighted_set_term/.gitignore
new file mode 100644
index 00000000000..ab8cbb5bd5a
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weighted_set_term/.gitignore
@@ -0,0 +1 @@
+searchlib_weighted_set_term_test_app
diff --git a/searchlib/src/tests/queryeval/weighted_set_term/CMakeLists.txt b/searchlib/src/tests/queryeval/weighted_set_term/CMakeLists.txt
new file mode 100644
index 00000000000..4083762d115
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weighted_set_term/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_weighted_set_term_test_app
+ SOURCES
+ weighted_set_term_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_weighted_set_term_test_app COMMAND searchlib_weighted_set_term_test_app)
diff --git a/searchlib/src/tests/queryeval/weighted_set_term/DESC b/searchlib/src/tests/queryeval/weighted_set_term/DESC
new file mode 100644
index 00000000000..040554bdd0e
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weighted_set_term/DESC
@@ -0,0 +1 @@
+weighted_set_term test. Take a look at weighted_set_term_test.cpp for details.
diff --git a/searchlib/src/tests/queryeval/weighted_set_term/FILES b/searchlib/src/tests/queryeval/weighted_set_term/FILES
new file mode 100644
index 00000000000..9912bc9a4a2
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weighted_set_term/FILES
@@ -0,0 +1 @@
+weighted_set_term_test.cpp
diff --git a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp
new file mode 100644
index 00000000000..7436913b642
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp
@@ -0,0 +1,240 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("weighted_set_term_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
+
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/queryeval/field_spec.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/fake_result.h>
+#include <vespa/searchlib/queryeval/fake_searchable.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
+#include <vespa/searchlib/test/initrange.h>
+#include <vespa/searchlib/test/document_weight_attribute_helper.h>
+#include <memory>
+#include <string>
+#include <map>
+
+using namespace search;
+using namespace search::query;
+using namespace search::fef;
+using namespace search::queryeval;
+using search::test::InitRangeVerifier;
+using search::test::DocumentWeightAttributeHelper;
+
+namespace {
+
+void setupFakeSearchable(FakeSearchable &fake) {
+ for (size_t docid = 1; docid < 10; ++docid) {
+ std::string token1 = vespalib::make_string("%zu", docid);
+ std::string token2 = vespalib::make_string("1%zu", docid);
+ std::string token3 = vespalib::make_string("2%zu", docid);
+
+ fake.addResult("field", token1, FakeResult().doc(docid));
+ fake.addResult("multi-field", token1, FakeResult().doc(docid));
+ fake.addResult("multi-field", token2, FakeResult().doc(docid));
+ fake.addResult("multi-field", token3, FakeResult().doc(docid));
+ }
+}
+
+struct WS {
+ static const uint32_t fieldId = 42;
+ MatchDataLayout layout;
+ TermFieldHandle handle;
+ std::vector<std::pair<std::string, uint32_t> > tokens;
+
+ WS() : layout(), handle(layout.allocTermField(fieldId)), tokens() {
+ MatchData::UP tmp = layout.createMatchData();
+ ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId);
+ }
+
+ WS &add(const std::string &token, uint32_t weight) {
+ tokens.push_back(std::make_pair(token, weight));
+ return *this;
+ }
+
+ Node::UP createNode() const {
+ SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm("view", 0, Weight(0));
+ for (size_t i = 0; i < tokens.size(); ++i) {
+ node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second))));
+ }
+ return Node::UP(node);
+ }
+
+ bool isGenericSearch(Searchable &searchable, const std::string &field, bool strict) const {
+ FakeRequestContext requestContext;
+ MatchData::UP md = layout.createMatchData();
+ Node::UP node = createNode();
+ FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle));
+ queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node);
+ bp->fetchPostings(strict);
+ SearchIterator::UP sb = bp->createSearch(*md, strict);
+ return (dynamic_cast<WeightedSetTermSearch*>(sb.get()) != 0);
+ }
+
+ FakeResult search(Searchable &searchable, const std::string &field, bool strict) const {
+ FakeRequestContext requestContext;
+ MatchData::UP md = layout.createMatchData();
+ Node::UP node = createNode();
+ FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle));
+ queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node);
+ bp->fetchPostings(strict);
+ SearchIterator::UP sb = bp->createSearch(*md, strict);
+ sb->initFullRange();
+ FakeResult result;
+ for (uint32_t docId = 1; docId < 10; ++docId) {
+ if (sb->seek(docId)) {
+ sb->unpack(docId);
+ result.doc(docId);
+ TermFieldMatchData &data = *md->resolveTermField(handle);
+ FieldPositionsIterator itr = data.getIterator();
+ for (; itr.valid(); itr.next()) {
+ result.elem(itr.getElementId());
+ result.weight(itr.getElementWeight());
+ result.pos(itr.getPosition());
+ }
+ }
+ }
+ return result;
+ }
+};
+
+struct MockSearch : public SearchIterator {
+ int seekCnt;
+ int _initial;
+ MockSearch(uint32_t initial) : SearchIterator(), seekCnt(0), _initial(initial) { }
+ void initRange(uint32_t begin, uint32_t end) override {
+ SearchIterator::initRange(begin, end);
+ setDocId(_initial);
+ }
+ virtual void doSeek(uint32_t) {
+ ++seekCnt;
+ setAtEnd();
+ }
+ virtual void doUnpack(uint32_t) {}
+};
+
+struct MockFixture {
+ MockSearch *mock;
+ TermFieldMatchData tfmd;
+ std::unique_ptr<SearchIterator> search;
+ MockFixture(uint32_t initial) : mock(0), tfmd(), search() {
+ std::vector<SearchIterator*> children;
+ std::vector<int32_t> weights;
+ mock = new MockSearch(initial);
+ children.push_back(mock);
+ weights.push_back(1);
+ search.reset(WeightedSetTermSearch::create(children, tfmd, weights));
+ }
+};
+
+} // namespace <unnamed>
+
+TEST("testSimple") {
+ FakeSearchable index;
+ setupFakeSearchable(index);
+ FakeResult expect = FakeResult()
+ .doc(3).elem(0).weight(30).pos(0)
+ .doc(5).elem(0).weight(50).pos(0)
+ .doc(7).elem(0).weight(70).pos(0);
+ WS ws = WS().add("7", 70).add("5", 50).add("3", 30).add("100", 1000);
+ EXPECT_TRUE(ws.isGenericSearch(index, "field", true));
+ EXPECT_TRUE(ws.isGenericSearch(index, "field", false));
+ EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true));
+ EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", false));
+
+ EXPECT_EQUAL(expect, ws.search(index, "field", true));
+ EXPECT_EQUAL(expect, ws.search(index, "field", false));
+ EXPECT_EQUAL(expect, ws.search(index, "multi-field", true));
+ EXPECT_EQUAL(expect, ws.search(index, "multi-field", false));
+}
+
+TEST("testMulti") {
+ FakeSearchable index;
+ setupFakeSearchable(index);
+ FakeResult expect = FakeResult()
+ .doc(3).elem(0).weight(230).pos(0).elem(0).weight(130).pos(0).elem(0).weight(30).pos(0)
+ .doc(5).elem(0).weight(150).pos(0).elem(0).weight(50).pos(0)
+ .doc(7).elem(0).weight(70).pos(0);
+ WS ws = WS().add("7", 70).add("5", 50).add("3", 30)
+ .add("15", 150).add("13", 130)
+ .add("23", 230).add("100", 1000);
+ EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true));
+ EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", false));
+
+ EXPECT_EQUAL(expect, ws.search(index, "multi-field", true));
+ EXPECT_EQUAL(expect, ws.search(index, "multi-field", false));
+}
+
+TEST_F("test Eager Empty Child", MockFixture(search::endDocId)) {
+ MockSearch *mock = f1.mock;
+ SearchIterator &search = *f1.search;
+ search.initFullRange();
+ EXPECT_EQUAL(search.beginId(), search.getDocId());
+ EXPECT_TRUE(!search.seek(1));
+ EXPECT_TRUE(search.isAtEnd());
+ EXPECT_EQUAL(0, mock->seekCnt);
+}
+
+TEST_F("test Eager Matching Child", MockFixture(5)) {
+ MockSearch *mock = f1.mock;
+ SearchIterator &search = *f1.search;
+ search.initFullRange();
+ EXPECT_EQUAL(search.beginId(), search.getDocId());
+ EXPECT_TRUE(!search.seek(3));
+ EXPECT_EQUAL(5u, search.getDocId());
+ EXPECT_EQUAL(0, mock->seekCnt);
+ EXPECT_TRUE(search.seek(5));
+ EXPECT_EQUAL(5u, search.getDocId());
+ EXPECT_EQUAL(0, mock->seekCnt);
+ EXPECT_TRUE(!search.seek(7));
+ EXPECT_TRUE(search.isAtEnd());
+ EXPECT_EQUAL(1, mock->seekCnt);
+}
+
+TEST("verify initRange with search iterator children") {
+ const size_t num_children = 7;
+ InitRangeVerifier ir;
+ using DocIds = InitRangeVerifier::DocIds;
+ std::vector<DocIds> split_lists(num_children);
+ auto full_list = ir.getExpectedDocIds();
+ for (size_t i = 0; i < full_list.size(); ++i) {
+ split_lists[i % num_children].push_back(full_list[i]);
+ }
+ bool strict = true;
+ std::vector<SearchIterator*> children;
+ for (size_t i = 0; i < num_children; ++i) {
+ children.push_back(ir.createIterator(split_lists[i], strict).release());
+ }
+ TermFieldMatchData tfmd;
+ std::vector<int32_t> weights(num_children, 1);
+ SearchIterator::UP itr(WeightedSetTermSearch::create(children, tfmd, weights));
+ ir.verify(*itr);
+}
+
+TEST("verify initRange with document weight iterator children") {
+ const size_t num_children = 7;
+ InitRangeVerifier ir;
+ DocumentWeightAttributeHelper helper;
+ helper.add_docs(ir.getDocIdLimit());
+ auto full_list = ir.getExpectedDocIds();
+ for (size_t i = 0; i < full_list.size(); ++i) {
+ helper.set_doc(full_list[i], i % num_children, 1);
+ }
+ TermFieldMatchData tfmd;
+ std::vector<int32_t> weights(num_children, 1);
+ std::vector<DocumentWeightIterator> children;
+ for (size_t i = 0; i < num_children; ++i) {
+ auto dict_entry = helper.dwa().lookup(vespalib::make_string("%zu", i).c_str());
+ helper.dwa().create(dict_entry.posting_idx, children);
+ }
+ SearchIterator::UP itr(WeightedSetTermSearch::create(tfmd, weights, std::move(children)));
+ ir.verify(*itr);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore b/searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore
new file mode 100644
index 00000000000..88c86c1720e
--- /dev/null
+++ b/searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore
@@ -0,0 +1 @@
+searchlib_feature_name_extractor_test_app
diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt b/searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt
new file mode 100644
index 00000000000..b1b81efd840
--- /dev/null
+++ b/searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_feature_name_extractor_test_app
+ SOURCES
+ feature_name_extractor_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_feature_name_extractor_test_app COMMAND searchlib_feature_name_extractor_test_app)
diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/FILES b/searchlib/src/tests/rankingexpression/feature_name_extractor/FILES
new file mode 100644
index 00000000000..6f6f6c1df43
--- /dev/null
+++ b/searchlib/src/tests/rankingexpression/feature_name_extractor/FILES
@@ -0,0 +1 @@
+feature_name_extractor_test.cpp
diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp b/searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp
new file mode 100644
index 00000000000..12ce67a586a
--- /dev/null
+++ b/searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp
@@ -0,0 +1,79 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/features/rankingexpression/feature_name_extractor.h>
+
+using search::features::rankingexpression::FeatureNameExtractor;
+
+void verify_extract(const vespalib::string &input,
+ const vespalib::string &expect_symbol,
+ const vespalib::string &expect_after)
+{
+ FeatureNameExtractor extractor;
+ const char *pos_in = input.data();
+ const char *end_in = input.data() + input.size();
+ vespalib::string symbol_out;
+ const char *pos_out = nullptr;
+ extractor.extract_symbol(pos_in, end_in, pos_out, symbol_out);
+ ASSERT_TRUE(pos_out != nullptr);
+ vespalib::string after(pos_out, end_in);
+ EXPECT_EQUAL(expect_symbol, symbol_out);
+ EXPECT_EQUAL(expect_after, after);
+}
+
+TEST("require that basic names are extracted correctly") {
+ TEST_DO(verify_extract("foo+", "foo", "+"));
+ TEST_DO(verify_extract("foo.out+", "foo.out", "+"));
+ TEST_DO(verify_extract("foo(p1,p2)+", "foo(p1,p2)", "+"));
+ TEST_DO(verify_extract("foo(p1,p2).out+", "foo(p1,p2).out", "+"));
+}
+
+TEST("require that special characters are allowed in prefix and suffix") {
+ TEST_DO(verify_extract("_@$+", "_@$", "+"));
+ TEST_DO(verify_extract("_@$.$@_+", "_@$.$@_", "+"));
+ TEST_DO(verify_extract("_@$(p1,p2)+", "_@$(p1,p2)", "+"));
+ TEST_DO(verify_extract("_@$(p1,p2).$@_+", "_@$(p1,p2).$@_", "+"));
+}
+
+TEST("require that dot is only allowed in suffix") {
+ TEST_DO(verify_extract("foo.bar+", "foo.bar", "+"));
+ TEST_DO(verify_extract("foo.bar.out+", "foo.bar.out", "+"));
+ TEST_DO(verify_extract("foo.bar(p1,p2)+", "foo.bar", "(p1,p2)+"));
+ TEST_DO(verify_extract("foo.bar(p1,p2).out+", "foo.bar", "(p1,p2).out+"));
+ TEST_DO(verify_extract("foo(p1,p2).out.bar+", "foo(p1,p2).out.bar", "+"));
+}
+
+TEST("require that parameters can be nested") {
+ TEST_DO(verify_extract("foo(p1(a,b),p2(c,d(e,f))).out+", "foo(p1(a,b),p2(c,d(e,f))).out", "+"));
+}
+
+TEST("require that space is allowed among parameters") {
+ TEST_DO(verify_extract("foo( p1 ( a , b ) ).out+", "foo( p1 ( a , b ) ).out", "+"));
+}
+
+TEST("require that space is now allowed outside parameters") {
+ TEST_DO(verify_extract("foo +", "foo", " +"));
+ TEST_DO(verify_extract("foo . out+", "foo", " . out+"));
+ TEST_DO(verify_extract("foo. out+", "foo.", " out+"));
+ TEST_DO(verify_extract("foo (p1,p2)+", "foo", " (p1,p2)+"));
+ TEST_DO(verify_extract("foo(p1,p2) +", "foo(p1,p2)", " +"));
+ TEST_DO(verify_extract("foo(p1,p2) .out+", "foo(p1,p2)", " .out+"));
+ TEST_DO(verify_extract("foo(p1,p2).out +", "foo(p1,p2).out", " +"));
+}
+
+TEST("require that parameters can be scientific numbers") {
+ TEST_DO(verify_extract("foo(1.3E+3,-1.9e-10).out+", "foo(1.3E+3,-1.9e-10).out", "+"));
+}
+
+TEST("require that quoted parenthesis are not counted") {
+ TEST_DO(verify_extract("foo(a,b,\")\").out+", "foo(a,b,\")\").out", "+"));
+}
+
+TEST("require that escaped quotes does not unquote") {
+ TEST_DO(verify_extract("foo(a,b,\"\\\")\").out+", "foo(a,b,\"\\\")\").out", "+"));
+}
+
+TEST("require that escaped escape does not hinder unquote") {
+ TEST_DO(verify_extract("foo(a,b,\"\\\\\")\").out+", "foo(a,b,\"\\\\\")", "\").out+"));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/rankingexpression/rankingexpressionlist b/searchlib/src/tests/rankingexpression/rankingexpressionlist
new file mode 100644
index 00000000000..2ff1350025b
--- /dev/null
+++ b/searchlib/src/tests/rankingexpression/rankingexpressionlist
@@ -0,0 +1,160 @@
+# This file is a list of semicolon separated strings. The first string is the expression to be parsed, whereas all
+# following strings are allowed ways to print the parsed expression. If no alternatives are given, the expression can be
+# printed as the original. Note that all strings are trimmed before they are parsed / compared.
+ 1
+1.0; 1.0; 1
+1e1; 1e1; 10
+1e-1; 1e-1; 0.1
+1.0e1; 1.0e1; 10
+1.0e-1; 1.0e-1; 0.1
+-1; -1
+1 + -1; 1 + -1; 1 - 1
+-1 + 1; -1 + 1
+tan(10)
+1
+ 1
+ 1 + 2
+ 1 - 2
+ 1 * 2
+ 1 / 2
+ 1 + 2 - 3
+ 1 + 2 - 3 * 4
+ 1 + 2 - 3 * 4 / 5
+1+2-3*4/5; 1 + 2 - 3 * 4 / 5
+(1)
+(1)+ 2; (1) + 2
+(1)+(2); (1) + (2)
+(1)+(2)-3; (1) + (2) - 3
+(1)+(2)-(3); (1) + (2) - (3)
+(1)+(2)-(3)*4; (1) + (2) - (3) * 4
+(1)+(2)-(3)*(4); (1) + (2) - (3) * (4)
+(1)+(2)-(3)*(4)/5; (1) + (2) - (3) * (4) / 5
+(1)+(2)-(3)*(4)/(5); (1) + (2) - (3) * (4) / (5)
+ 1 +(2)-(3)*(4)/(5); 1 + (2) - (3) * (4) / (5)
+ 1 + 2 -(3)*(4)/(5); 1 + 2 - (3) * (4) / (5)
+ 1 + 2 - 3 *(4)/(5); 1 + 2 - 3 * (4) / (5)
+ 1 + 2 - 3 * 4 /(5); 1 + 2 - 3 * 4 / (5)
+ 1 + 2 - 3 * 4 / 5 ; 1 + 2 - 3 * 4 / 5
+(1 + 2)
+(1 + 2)- 3; (1 + 2) - 3
+(1 + 2 - 3)
+(1 + 2 - 3)* 4; (1 + 2 - 3) * 4
+(1 + 2 - 3 * 4)
+(1 + 2 - 3 * 4)/ 5; (1 + 2 - 3 * 4) / 5
+(1 + 2 - 3 * 4 / 5)
+ 1 +(2 - 3 * 4 / 5); 1 + (2 - 3 * 4 / 5)
+ 1 + 2 -(3 * 4 / 5); 1 + 2 - (3 * 4 / 5)
+ 1 + 2 - 3 *(4 / 5); 1 + 2 - 3 * (4 / 5)
+1+2-3*(4/5); 1 + 2 - 3 * (4 / 5)
+log(1)
+log( 1 ); log(1)
+log( 1 + 2 ); log(1 + 2)
+log( 1 + 2 - 3 ); log(1 + 2 - 3)
+log( 1 + 2 - 3 * 4 ); log(1 + 2 - 3 * 4)
+log( 1 + 2 - 3 * 4 / 5 ); log(1 + 2 - 3 * 4 / 5)
+log((1 + 2)- 3 * 4 / 5 ); log((1 + 2) - 3 * 4 / 5)
+log( 1 +(2 - 3)* 4 / 5 ); log(1 + (2 - 3) * 4 / 5)
+log( 1 + 2 -(3 * 4)/ 5 ); log(1 + 2 - (3 * 4) / 5)
+log( 1 + 2 - 3 *(4 / 5)); log(1 + 2 - 3 * (4 / 5))
+log(1+2-3*4/5); log(1 + 2 - 3 * 4 / 5)
+""; ""
+"foo"
+"foo\""
+(1+"foo"); (1 + "foo")
+if("foo" == "bar", 1, 2); if ("foo" == "bar", 1, 2)
+cosh(1); cosh(1)
+cosh (1); cosh(1)
+cosh ( 1 ); cosh(1)
+cosh ( foo ); cosh(foo)
+cosh ( foo.out ); cosh(foo.out)
+cosh ( foo ( bar ) . out ); cosh(foo(bar).out)
+sin(10)
+cos(10)
+tan(10)
+acos(10)
+asin(10)
+atan(10)
+cosh(10)
+sinh(10)
+tanh(10)
+exp(10)
+log(10)
+log10(10)
+sqrt(10)
+ceil(10)
+fabs(10)
+floor(10)
+atan2(10, 20); atan2(10,20)
+ldexp(10, 20); ldexp(10,20)
+pow(10, 20); pow(10,20)
+fmod(10, 20); fmod(10,20)
+min(0, 1); min(0,1)
+max(1, 0); max(1,0)
+if(1<2,3,4); if (1 < 2, 3, 4)
+if(1>2,3,4); if (1 > 2, 3, 4)
+if(1==2,3,4); if (1 == 2, 3, 4)
+if(1~=2,3,4); if (1 ~= 2, 3, 4)
+if(1<=2,3,4); if (1 <= 2, 3, 4)
+if(1>=2,3,4); if (1 >= 2, 3, 4)
+if(1>=2,3,4,0.3); if (1 >= 2, 3, 4, 0.3)
+if(1>=2,3,4,0.5); if (1 >= 2, 3, 4, 0.5)
+if (1 < 2, 3, 4); if (1 < 2, 3, 4)
+if (1+2 < 3, 4, 5); if (1 + 2 < 3, 4, 5)
+if (1 < 2+3, 4, 5); if (1 < 2 + 3, 4, 5)
+if (1 < 2, 3+4, 5); if (1 < 2, 3 + 4, 5)
+if (1 < 2, 3, 4+5); if (1 < 2, 3, 4 + 5)
+if (foo in [bar], 6, 9); if (foo in [bar], 6, 9)
+if (foo in [bar,baz], 6, 9); if (foo in [bar, baz], 6, 9)
+if (foo in [bar,baz,cox], 6, 9); if (foo in [bar, baz, cox], 6, 9)
+if (foo in [bar], 6, 9)
+if (foo in [bar, baz], 6, 9)
+if (foo in [bar, baz, cox], 6, 9)
+if (foo in [ bar ], 6, 9); if (foo in [bar], 6, 9)
+if (foo in [ bar, baz ], 6, 9); if (foo in [bar, baz], 6, 9)
+if (foo in [ bar, baz, cox ], 6, 9); if (foo in [bar, baz, cox], 6, 9)
+feature; feature
+fe@ture; fe@ture
+featur@; featur@
+fe$ture; fe$ture
+featur$; featur$
+feature.out; feature.out
+feature .out; feature.out
+feature . out; feature.out
+feature.out.out; feature.out.out
+feature.if
+feature.in
+feature(arg1); feature(arg1)
+feature (arg1); feature(arg1)
+feature ( arg1); feature(arg1)
+feature ( arg1 ); feature(arg1)
+feature(arg1,arg2); feature(arg1,arg2)
+feature(arg1 ,arg2); feature(arg1,arg2)
+feature(arg1 , arg2); feature(arg1,arg2)
+feature(arg1 , arg2).out; feature(arg1,arg2).out
+feature(arg1 , arg2) . out; feature(arg1,arg2).out
+feature("\",difficult","\")arguments\\").out
+feature(arg1,arg2).out; feature(arg1,arg2).out
+feature(if)
+feature(in)
+feature(cos)
+feature("cos(1,2)")
+feature(cos,sin,tan,cosh,sinh,tanh,acos,asin,atan,exp,log10,log,sqrt,ceil,fabs,floor)
+feature(cos,"sin(1,2)",3)
+rankingExpression(foo@92c9e83e1b665d2c.fe5dbbcea5ce7e29).rankingScript
+rankingExpression(foo@92c9e83e1b665d2c.2e5dbbcea5ce7e29).rankingScript
+mysum ( mysum(4, 4), value( 4 ), value(4) ); mysum(mysum(4,4),value(4),value(4))
+"\\"
+"\""
+"\f"
+"\female"
+"\n"
+"\nude"
+"\r"
+"fa\rt"
+"\t"
+"fe\tish"
+"\x10081977"
+"10\x081977"
+"1008\x1977"
+"100819\x77"
+if(1.09999~=1.1,2,3); if (1.09999 ~= 1.1, 2, 3)
diff --git a/searchlib/src/tests/ranksetup/.gitignore b/searchlib/src/tests/ranksetup/.gitignore
new file mode 100644
index 00000000000..754597f65f8
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/.gitignore
@@ -0,0 +1,5 @@
+.depend
+Makefile
+ranksetup_test
+/.gdbinit
+searchlib_ranksetup_test_app
diff --git a/searchlib/src/tests/ranksetup/CMakeLists.txt b/searchlib/src/tests/ranksetup/CMakeLists.txt
new file mode 100644
index 00000000000..712f1ffefa4
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_ranksetup_test_app
+ SOURCES
+ ranksetup_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_ranksetup_test_app COMMAND searchlib_ranksetup_test_app)
diff --git a/searchlib/src/tests/ranksetup/DESC b/searchlib/src/tests/ranksetup/DESC
new file mode 100644
index 00000000000..37f7cc6f2c5
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/DESC
@@ -0,0 +1 @@
+ranksetup test. Take a look at ranksetup.cpp for details.
diff --git a/searchlib/src/tests/ranksetup/FILES b/searchlib/src/tests/ranksetup/FILES
new file mode 100644
index 00000000000..f1fce1d28ff
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/FILES
@@ -0,0 +1 @@
+ranksetup.cpp
diff --git a/searchlib/src/tests/ranksetup/ranksetup_test.cpp b/searchlib/src/tests/ranksetup/ranksetup_test.cpp
new file mode 100644
index 00000000000..aee04ef4cb7
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/ranksetup_test.cpp
@@ -0,0 +1,922 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("ranksetup_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include <vespa/searchlib/common/feature.h>
+
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/attributevector.hpp>
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/blueprintfactory.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/idumpfeaturevisitor.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/rank_program.h>
+#include <vespa/searchlib/fef/ranksetup.h>
+#include <vespa/searchlib/fef/utils.h>
+
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/fef/test/rankresult.h>
+
+#include <vespa/searchlib/features/rankingexpressionfeature.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/features/valuefeature.h>
+#include <vespa/searchlib/fef/test/plugin/chain.h>
+#include <vespa/searchlib/fef/test/plugin/double.h>
+#include <vespa/searchlib/fef/test/plugin/setup.h>
+#include <vespa/searchlib/fef/test/plugin/staticrank.h>
+#include <vespa/searchlib/fef/test/plugin/sum.h>
+#include <vespa/searchlib/fef/test/plugin/cfgvalue.h>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+
+using namespace search::fef;
+using namespace search::features;
+using namespace search::fef::test;
+using search::feature_t;
+
+typedef FeatureNameBuilder FNB;
+
+//-----------------------------------------------------------------------------
+// DumpFeatureVisitor
+//-----------------------------------------------------------------------------
+class DumpFeatureVisitor : public IDumpFeatureVisitor
+{
+public:
+ DumpFeatureVisitor() {}
+ virtual void visitDumpFeature(const vespalib::string & name) {
+ std::cout << "dump feature: " << name << std::endl;
+ }
+};
+
+
+//-----------------------------------------------------------------------------
+// RankEnvironment
+//-----------------------------------------------------------------------------
+class RankEnvironment
+{
+private:
+ const BlueprintFactory & _factory;
+ const IIndexEnvironment & _indexEnv;
+ const IQueryEnvironment & _queryEnv;
+
+public:
+ RankEnvironment(const BlueprintFactory & bfactory,
+ const IIndexEnvironment & indexEnv, const IQueryEnvironment & queryEnv) :
+ _factory(bfactory), _indexEnv(indexEnv), _queryEnv(queryEnv) {}
+
+ const BlueprintFactory & factory() const { return _factory; }
+ const IIndexEnvironment & indexEnvironment() const { return _indexEnv; }
+ const IQueryEnvironment & queryEnvironment() const { return _queryEnv; }
+};
+
+
+//-----------------------------------------------------------------------------
+// RankExecutor
+//-----------------------------------------------------------------------------
+class RankExecutor
+{
+private:
+ vespalib::string _initRank;
+ vespalib::string _finalRank;
+ const RankEnvironment & _rankEnv;
+ MatchDataLayout _layout;
+ std::unique_ptr<RankSetup> _rs;
+ RankProgram::UP _firstPhaseProgram;
+ RankProgram::UP _secondPhaseProgram;
+
+public:
+ RankExecutor(const vespalib::string &initRank,
+ const vespalib::string &finalRank, const RankEnvironment &rankEnv) :
+ _initRank(initRank), _finalRank(finalRank), _rankEnv(rankEnv), _layout(),
+ _rs(), _firstPhaseProgram(), _secondPhaseProgram() {}
+ bool setup();
+ RankResult execute(uint32_t docId = 0);
+};
+
+bool
+RankExecutor::setup()
+{
+ _rs = std::unique_ptr<RankSetup>(new RankSetup(_rankEnv.factory(), _rankEnv.indexEnvironment()));
+ if (_initRank.empty()) {
+ return false;
+ }
+ _rs->setFirstPhaseRank(_initRank);
+
+ if (!_finalRank.empty()) {
+ _rs->setSecondPhaseRank(_finalRank);
+ }
+
+ if (!_rs->compile()) {
+ return false;
+ }
+
+ _firstPhaseProgram = _rs->create_first_phase_program();
+ _firstPhaseProgram->setup(_layout, _rankEnv.queryEnvironment());
+ if (!_finalRank.empty()) {
+ _secondPhaseProgram = _rs->create_second_phase_program();
+ _secondPhaseProgram->setup(_layout, _rankEnv.queryEnvironment());
+ }
+ return true;
+}
+
+RankResult
+RankExecutor::execute(uint32_t docId)
+{
+ RankResult result;
+ _firstPhaseProgram->run(docId);
+ result.addScore(_initRank, *Utils::getScoreFeature(*_firstPhaseProgram));
+
+ if (_secondPhaseProgram.get() != nullptr) {
+ _secondPhaseProgram->run(docId);
+ result.addScore(_finalRank, *Utils::getScoreFeature(*_secondPhaseProgram));
+ }
+
+ return result;
+}
+
+
+//-----------------------------------------------------------------------------
+// FeatureDumper
+//-----------------------------------------------------------------------------
+class FeatureDumper
+{
+private:
+ const RankEnvironment & _rankEnv;
+ RankSetup _setup;
+ MatchDataLayout _layout;
+ RankProgram::UP _rankProgram;
+
+public:
+ FeatureDumper(const RankEnvironment & rankEnv) :
+ _rankEnv(rankEnv),
+ _setup(_rankEnv.factory(), _rankEnv.indexEnvironment()),
+ _layout(),
+ _rankProgram() {}
+ void addDumpFeature(const vespalib::string &name);
+ void configure();
+ bool setup();
+ RankResult dump();
+};
+
+void
+FeatureDumper::addDumpFeature(const vespalib::string &name)
+{
+ _setup.addDumpFeature(name);
+}
+
+void
+FeatureDumper::configure()
+{
+ _setup.configure();
+}
+
+bool
+FeatureDumper::setup()
+{
+ if (!_setup.compile()) {
+ return false;
+ }
+
+ _rankProgram = _setup.create_dump_program();
+ _rankProgram->setup(_layout, _rankEnv.queryEnvironment());
+ return true;
+}
+
+RankResult
+FeatureDumper::dump()
+{
+ _rankProgram->run(1);
+ std::map<vespalib::string, feature_t> features = Utils::getSeedFeatures(*_rankProgram);
+ RankResult retval;
+ for (auto itr = features.begin(); itr != features.end(); ++itr) {
+ retval.addScore(itr->first, itr->second);
+ }
+ return retval;
+}
+
+
+//-----------------------------------------------------------------------------
+// RankSetupTest
+//-----------------------------------------------------------------------------
+class RankSetupTest : public vespalib::TestApp
+{
+private:
+ BlueprintFactory _factory;
+ search::AttributeManager _manager;
+ IndexEnvironment _indexEnv;
+ QueryEnvironment _queryEnv;
+ RankEnvironment _rankEnv;
+ DumpFeatureVisitor _visitor;
+
+ void testValueBlueprint();
+ void testDoubleBlueprint();
+ void testSumBlueprint();
+ void testStaticRankBlueprint();
+ void testChainBlueprint();
+ void testCfgValueBlueprint();
+ void testCompilation();
+ void testRankSetup();
+ bool testExecution(const vespalib::string & initRank, feature_t initScore,
+ const vespalib::string & finalRank = "", feature_t finalScore = 0.0f, uint32_t docId = 0);
+ bool testExecution(const RankEnvironment &rankEnv,
+ const vespalib::string & initRank, feature_t initScore,
+ const vespalib::string & finalRank = "", feature_t finalScore = 0.0f, uint32_t docId = 0);
+ void testExecution();
+ void testFeatureDump();
+
+ void checkFeatures(std::map<vespalib::string, feature_t> &exp, std::map<vespalib::string, feature_t> &actual);
+ void testFeatureNormalization();
+
+public:
+ RankSetupTest();
+ int Main();
+};
+
+
+void
+RankSetupTest::testValueBlueprint()
+{
+ ValueBlueprint prototype;
+ prototype.visitDumpFeatures(_indexEnv, _visitor);
+ { // basic test
+ Blueprint::UP bp = prototype.createInstance();
+ DummyDependencyHandler deps(*bp);
+ bp->setName("value");
+ EXPECT_EQUAL(bp->getName(), "value");
+ std::vector<vespalib::string> params;
+ params.push_back("5.5");
+ params.push_back("10.5");
+ EXPECT_TRUE(bp->setup(_indexEnv, params));
+ EXPECT_EQUAL(deps.input.size(), 0u);
+ EXPECT_EQUAL(deps.output.size(), 2u);
+ EXPECT_EQUAL(deps.output[0], "0");
+ EXPECT_EQUAL(deps.output[1], "1");
+
+ FeatureExecutor::LP fe = bp->createExecutor(_queryEnv);
+ ValueExecutor * vfe = static_cast<ValueExecutor *>(fe.get());
+ EXPECT_EQUAL(vfe->getValues().size(), 2u);
+ EXPECT_EQUAL(vfe->getValues()[0], 5.5f);
+ EXPECT_EQUAL(vfe->getValues()[1], 10.5f);
+ }
+ { // invalid params
+ Blueprint::UP bp = prototype.createInstance();
+ DummyDependencyHandler deps(*bp);
+ std::vector<vespalib::string> params;
+ EXPECT_TRUE(!bp->setup(_indexEnv, params));
+ }
+}
+
+void
+RankSetupTest::testDoubleBlueprint()
+{
+ DoubleBlueprint prototype;
+ prototype.visitDumpFeatures(_indexEnv, _visitor);
+ { // basic test
+ Blueprint::UP bp = prototype.createInstance();
+ DummyDependencyHandler deps(*bp);
+ std::vector<vespalib::string> params;
+ params.push_back("value(5.5).0");
+ params.push_back("value(10.5).0");
+ EXPECT_TRUE(bp->setup(_indexEnv, params));
+ EXPECT_EQUAL(deps.input.size(), 2u);
+ EXPECT_EQUAL(deps.input[0], "value(5.5).0");
+ EXPECT_EQUAL(deps.input[1], "value(10.5).0");
+ EXPECT_EQUAL(deps.output.size(), 2u);
+ EXPECT_EQUAL(deps.output[0], "0");
+ EXPECT_EQUAL(deps.output[1], "1");
+ }
+}
+
+void
+RankSetupTest::testSumBlueprint()
+{
+ SumBlueprint prototype;
+ prototype.visitDumpFeatures(_indexEnv, _visitor);
+ { // basic test
+ Blueprint::UP bp = prototype.createInstance();
+ DummyDependencyHandler deps(*bp);
+ std::vector<vespalib::string> params;
+ params.push_back("value(5.5, 10.5).0");
+ params.push_back("value(5.5, 10.5).1");
+ EXPECT_TRUE(bp->setup(_indexEnv, params));
+ EXPECT_EQUAL(deps.input.size(), 2u);
+ EXPECT_EQUAL(deps.input[0], "value(5.5, 10.5).0");
+ EXPECT_EQUAL(deps.input[1], "value(5.5, 10.5).1");
+ EXPECT_EQUAL(deps.output.size(), 1u);
+ EXPECT_EQUAL(deps.output[0], "out");
+ }
+}
+
+void
+RankSetupTest::testStaticRankBlueprint()
+{
+ StaticRankBlueprint prototype;
+ { // basic test
+ Blueprint::UP bp = prototype.createInstance();
+ DummyDependencyHandler deps(*bp);
+ std::vector<vespalib::string> params;
+ params.push_back("sr1");
+ EXPECT_TRUE(bp->setup(_indexEnv, params));
+ EXPECT_EQUAL(deps.input.size(), 0u);
+ EXPECT_EQUAL(deps.output.size(), 1u);
+ EXPECT_EQUAL(deps.output[0], "out");
+ }
+ { // invalid params
+ Blueprint::UP bp = prototype.createInstance();
+ DummyDependencyHandler deps(*bp);
+ std::vector<vespalib::string> params;
+ EXPECT_TRUE(!bp->setup(_indexEnv, params));
+ params.push_back("sr1");
+ params.push_back("sr2");
+ EXPECT_TRUE(!bp->setup(_indexEnv, params));
+ }
+}
+
+void
+RankSetupTest::testChainBlueprint()
+{
+ ChainBlueprint prototype;
+ { // chaining
+ Blueprint::UP bp = prototype.createInstance();
+ DummyDependencyHandler deps(*bp);
+ std::vector<vespalib::string> params;
+ params.push_back("basic");
+ params.push_back("2");
+ params.push_back("4");
+ EXPECT_TRUE(bp->setup(_indexEnv, params));
+ EXPECT_EQUAL(deps.input.size(), 1u);
+ EXPECT_EQUAL(deps.input[0], "chain(basic,1,4)");
+ }
+ { // leaf node
+ Blueprint::UP bp = prototype.createInstance();
+ DummyDependencyHandler deps(*bp);
+ std::vector<vespalib::string> params;
+ params.push_back("basic");
+ params.push_back("1");
+ params.push_back("4");
+ EXPECT_TRUE(bp->setup(_indexEnv, params));
+ EXPECT_EQUAL(deps.input.size(), 1u);
+ EXPECT_EQUAL(deps.input[0], "value(4)");
+ }
+ { // cycle
+ Blueprint::UP bp = prototype.createInstance();
+ DummyDependencyHandler deps(*bp);
+ std::vector<vespalib::string> params;
+ params.push_back("cycle");
+ params.push_back("1");
+ params.push_back("4");
+ EXPECT_TRUE(bp->setup(_indexEnv, params));
+ EXPECT_EQUAL(deps.input.size(), 1u);
+ EXPECT_EQUAL(deps.input[0], "chain(cycle,4,4)");
+ }
+ { // invalid params
+ Blueprint::UP bp = prototype.createInstance();
+ DummyDependencyHandler deps(*bp);
+ std::vector<vespalib::string> params;
+ EXPECT_TRUE(!bp->setup(_indexEnv, params));
+ params.push_back("basic");
+ params.push_back("0");
+ params.push_back("4");
+ EXPECT_TRUE(!bp->setup(_indexEnv, params));
+ }
+}
+
+void
+RankSetupTest::testCfgValueBlueprint()
+{
+ CfgValueBlueprint prototype;
+ IndexEnvironment indexEnv;
+ indexEnv.getProperties().add("test_cfgvalue(foo).value", "1.0");
+ indexEnv.getProperties().add("test_cfgvalue(foo).value", "2.0");
+ indexEnv.getProperties().add("test_cfgvalue(foo).value", "3.0");
+
+ { // basic test
+ Blueprint::UP bp = prototype.createInstance();
+ DummyDependencyHandler deps(*bp);
+ bp->setName("test_cfgvalue(foo)");
+ std::vector<vespalib::string> params;
+ params.push_back("foo");
+
+ EXPECT_TRUE(bp->setup(indexEnv, params));
+ EXPECT_EQUAL(deps.input.size(), 0u);
+ EXPECT_EQUAL(deps.output.size(), 3u);
+ EXPECT_EQUAL(deps.output[0], "0");
+ EXPECT_EQUAL(deps.output[1], "1");
+ EXPECT_EQUAL(deps.output[2], "2");
+
+ FeatureExecutor::LP fe = bp->createExecutor(_queryEnv);
+ ValueExecutor *vfe = static_cast<ValueExecutor *>(fe.get());
+ EXPECT_EQUAL(vfe->getValues().size(), 3u);
+ EXPECT_EQUAL(vfe->getValues()[0], 1.0f);
+ EXPECT_EQUAL(vfe->getValues()[1], 2.0f);
+ EXPECT_EQUAL(vfe->getValues()[2], 3.0f);
+ }
+}
+
+
+void
+RankSetupTest::testCompilation()
+{
+ { // unknown blueprint
+ RankSetup rs(_factory, _indexEnv);
+ rs.setFirstPhaseRank("unknown");
+ EXPECT_TRUE(!rs.compile());
+ }
+ { // unknown output for initial rank
+ RankSetup rs(_factory, _indexEnv);
+ rs.setFirstPhaseRank("value(2).1");
+ EXPECT_TRUE(!rs.compile());
+ }
+ { // unknown output for dependency
+ RankSetup rs(_factory, _indexEnv);
+ rs.setFirstPhaseRank(FNB().baseName("mysum").parameter("value(2).1").buildName());
+ EXPECT_TRUE(!rs.compile());
+ }
+ { // illegal input parameters
+ RankSetup rs(_factory, _indexEnv);
+ rs.setFirstPhaseRank("value.0");
+ EXPECT_TRUE(!rs.compile());
+ }
+ { // illegal feature name
+ RankSetup rs(_factory, _indexEnv);
+ rs.setFirstPhaseRank("value(2).");
+ EXPECT_TRUE(!rs.compile());
+ }
+ { // almost too deep dependency graph
+ RankSetup rs(_factory, _indexEnv);
+ std::ostringstream oss;
+ oss << "chain(basic," << (BlueprintResolver::MAX_DEP_DEPTH - 1) << ",4)"; // gives tree height == MAX_DEP_DEPTH
+ rs.setFirstPhaseRank(oss.str());
+ EXPECT_TRUE(rs.compile());
+ }
+ { // too deep dependency graph
+ RankSetup rs(_factory, _indexEnv);
+ std::ostringstream oss;
+ oss << "chain(basic," << BlueprintResolver::MAX_DEP_DEPTH << ",4)"; // gives tree height == MAX_DEP_DEPTH + 1
+ rs.setFirstPhaseRank(oss.str());
+ EXPECT_TRUE(!rs.compile());
+ }
+ { // cycle
+ RankSetup rs(_factory, _indexEnv);
+ // c(c,4,2) -> c(c,3,2) -> c(c,2,2) -> c(c,1,2) -> c(c,2,2)
+ rs.setFirstPhaseRank("chain(cycle,4,2)");
+ EXPECT_TRUE(!rs.compile());
+ }
+}
+
+void RankSetupTest::testRankSetup()
+{
+ using namespace search::fef::indexproperties;
+ IndexEnvironment env;
+ env.getProperties().add(rank::FirstPhase::NAME, "firstphase");
+ env.getProperties().add(rank::SecondPhase::NAME, "secondphase");
+ env.getProperties().add(dump::Feature::NAME, "foo");
+ env.getProperties().add(dump::Feature::NAME, "bar");
+ env.getProperties().add(matching::NumThreadsPerSearch::NAME, "3");
+ env.getProperties().add(matchphase::DegradationAttribute::NAME, "mystaticrankattr");
+ env.getProperties().add(matchphase::DegradationAscendingOrder::NAME, "true");
+ env.getProperties().add(matchphase::DegradationMaxHits::NAME, "12345");
+ env.getProperties().add(matchphase::DegradationMaxFilterCoverage::NAME, "0.19");
+ env.getProperties().add(matchphase::DegradationSamplePercentage::NAME, "0.9");
+ env.getProperties().add(matchphase::DegradationPostFilterMultiplier::NAME, "0.7");
+ env.getProperties().add(matchphase::DiversityAttribute::NAME, "mycategoryattr");
+ env.getProperties().add(matchphase::DiversityMinGroups::NAME, "37");
+ env.getProperties().add(matchphase::DiversityCutoffFactor::NAME, "7.1");
+ env.getProperties().add(matchphase::DiversityCutoffStrategy::NAME, "strict");
+ env.getProperties().add(hitcollector::HeapSize::NAME, "50");
+ env.getProperties().add(hitcollector::ArraySize::NAME, "60");
+ env.getProperties().add(hitcollector::EstimatePoint::NAME, "70");
+ env.getProperties().add(hitcollector::EstimateLimit::NAME, "80");
+ env.getProperties().add(hitcollector::RankScoreDropLimit::NAME, "90.5");
+
+ RankSetup rs(_factory, env);
+ rs.configure();
+ EXPECT_EQUAL(rs.getFirstPhaseRank(), vespalib::string("firstphase"));
+ EXPECT_EQUAL(rs.getSecondPhaseRank(), vespalib::string("secondphase"));
+ ASSERT_TRUE(rs.getDumpFeatures().size() == 2);
+ EXPECT_EQUAL(rs.getDumpFeatures()[0], vespalib::string("foo"));
+ EXPECT_EQUAL(rs.getDumpFeatures()[1], vespalib::string("bar"));
+ EXPECT_EQUAL(rs.getNumThreadsPerSearch(), 3u);
+ EXPECT_EQUAL(rs.getDegradationAttribute(), "mystaticrankattr");
+ EXPECT_EQUAL(rs.isDegradationOrderAscending(), true);
+ EXPECT_EQUAL(rs.getDegradationMaxHits(), 12345u);
+ EXPECT_EQUAL(rs.getDegradationSamplePercentage(), 0.9);
+ EXPECT_EQUAL(rs.getDegradationMaxFilterCoverage(), 0.19);
+ EXPECT_EQUAL(rs.getDegradationPostFilterMultiplier(), 0.7);
+ EXPECT_EQUAL(rs.getDiversityAttribute(), "mycategoryattr");
+ EXPECT_EQUAL(rs.getDiversityMinGroups(), 37u);
+ EXPECT_EQUAL(rs.getDiversityCutoffFactor(), 7.1);
+ EXPECT_EQUAL(rs.getDiversityCutoffStrategy(), "strict");
+ EXPECT_EQUAL(rs.getHeapSize(), 50u);
+ EXPECT_EQUAL(rs.getArraySize(), 60u);
+ EXPECT_EQUAL(rs.getEstimatePoint(), 70u);
+ EXPECT_EQUAL(rs.getEstimateLimit(), 80u);
+ EXPECT_EQUAL(rs.getRankScoreDropLimit(), 90.5);
+}
+
+bool
+RankSetupTest::testExecution(const vespalib::string & initRank, feature_t initScore,
+ const vespalib::string & finalRank, feature_t finalScore, uint32_t docId)
+{
+ return testExecution(_rankEnv, initRank, initScore, finalRank, finalScore, docId);
+}
+
+bool
+RankSetupTest::testExecution(const RankEnvironment &rankEnv, const vespalib::string & initRank, feature_t initScore,
+ const vespalib::string & finalRank, feature_t finalScore, uint32_t docId)
+{
+ bool ok = true;
+ RankExecutor re(initRank, finalRank, rankEnv);
+ ok = ok && re.setup();
+ EXPECT_TRUE(ok);
+ RankResult exp;
+ exp.addScore(initRank, initScore);
+ if (finalRank != "") {
+ exp.addScore(finalRank, finalScore);
+ }
+ RankResult rs = re.execute(docId);
+ ok = ok && (exp == rs);
+ EXPECT_EQUAL(exp, rs);
+ return ok;
+}
+
+void
+RankSetupTest::testExecution()
+{
+ { // value executor
+ vespalib::string v = FNB().baseName("value").parameter("5.5").parameter("10.5").buildName();
+ EXPECT_TRUE(testExecution(v + ".0", 5.5f));
+ EXPECT_TRUE(testExecution(v + ".0", 5.5f, v + ".1", 10.5f));
+ EXPECT_TRUE(testExecution(v, 5.5f));
+ }
+ { // double executor
+ vespalib::string d1 = FNB().baseName("double").parameter("value(2).0").parameter("value(8).0").buildName();
+ vespalib::string d2 = FNB().baseName("double").parameter("value(2)").parameter("value(8)").buildName();
+ EXPECT_TRUE(testExecution(d1 + ".0", 4.0f));
+ EXPECT_TRUE(testExecution(d1 + ".0", 4.0f, d1 + ".1", 16.0f));
+ EXPECT_TRUE(testExecution(d2, 4.0f));
+ }
+ { // sum executor
+ vespalib::string s1 = FNB().baseName("mysum").parameter("value(2).0").parameter("value(4).0").output("out").buildName();
+ vespalib::string s2 = FNB().baseName("mysum").parameter("value(2)").parameter("value(4)").buildName();
+ EXPECT_TRUE(testExecution(s1, 6.0f));
+ EXPECT_TRUE(testExecution(s2, 6.0f));
+ }
+ { // static rank executor
+ vespalib::string sr1 = "staticrank(staticrank1)";
+ vespalib::string sr2 = "staticrank(staticrank2)";
+ for (uint32_t i = 0; i < 5; ++i) {
+ EXPECT_TRUE(testExecution(sr1, static_cast<feature_t>(i + 100),
+ sr2, static_cast<feature_t>(i + 200), i));
+ }
+ }
+ { // test topologic sorting
+ vespalib::string v1 = "value(2)";
+ vespalib::string d1 = FNB().baseName("double").parameter(v1).buildName();
+ vespalib::string d2 = FNB().baseName("double").parameter(d1).buildName();
+
+ {
+ vespalib::string s1 = FNB().baseName("mysum").parameter(v1).parameter(d1).parameter(d2).buildName();
+ EXPECT_TRUE(testExecution(s1, 14.0f));
+ }
+ {
+ vespalib::string s1 = FNB().baseName("mysum").parameter(d2).parameter(d1).parameter(v1).buildName();
+ EXPECT_TRUE(testExecution(s1, 14.0f));
+ }
+ }
+ { // output used by more than one
+ vespalib::string v1 = "value(2)";
+ vespalib::string d1 = FNB().baseName("double").parameter(v1).buildName();
+ vespalib::string d2 = FNB().baseName("double").parameter(v1).buildName();
+ vespalib::string s1 = FNB().baseName("mysum").parameter(d1).parameter(d2).buildName();
+ EXPECT_TRUE(testExecution(s1, 8.0f));
+ }
+ { // output not shared between phases
+ vespalib::string v1 = "value(2)";
+ vespalib::string v2 = "value(8)";
+ vespalib::string d1 = FNB().baseName("double").parameter(v1).buildName();
+ vespalib::string d2 = FNB().baseName("double").parameter(v2).buildName();
+ EXPECT_TRUE(testExecution(d1, 4.0f, d2, 16.0f));
+ }
+ { // output shared between phases
+ vespalib::string v1 = "value(2)";
+ vespalib::string v2 = "value(8)";
+ vespalib::string v3 = "value(32)";
+ vespalib::string d1 = FNB().baseName("double").parameter(v1).buildName();
+ vespalib::string d2 = FNB().baseName("double").parameter(v2).buildName();
+ vespalib::string d3 = FNB().baseName("double").parameter(v3).buildName();
+ vespalib::string s1 = FNB().baseName("mysum").parameter(d1).parameter(d2).buildName();
+ vespalib::string s2 = FNB().baseName("mysum").parameter(d2).parameter(d3).buildName();
+ EXPECT_TRUE(testExecution(s1, 20.0f, s2, 80.0f));
+ }
+ { // max dependency depth
+ uint32_t maxDepth = BlueprintResolver::MAX_DEP_DEPTH;
+ std::ostringstream oss;
+ oss << "chain(basic," << (maxDepth - 1) << ",4)"; // gives tree height == MAX_DEP_DEPTH;
+ EXPECT_TRUE(testExecution(oss.str(), 4.0f));
+ }
+ {
+ IndexEnvironment indexEnv;
+ indexEnv.getProperties().add("test_cfgvalue(foo).value", "1.0");
+ indexEnv.getProperties().add("test_cfgvalue(foo).value", "2.0");
+ indexEnv.getProperties().add("test_cfgvalue(bar).value", "5.0");
+
+ vespalib::string s = FNB().baseName("mysum")
+ .parameter("test_cfgvalue(foo).0")
+ .parameter("test_cfgvalue(foo).1")
+ .buildName();
+
+ EXPECT_TRUE(testExecution(RankEnvironment(_factory, indexEnv, _queryEnv),
+ s, 3.0f, "test_cfgvalue(bar).0", 5.0f));
+ }
+}
+
+void
+RankSetupTest::testFeatureDump()
+{
+ {
+ FeatureDumper dumper(_rankEnv);
+ dumper.addDumpFeature("value(2)");
+ dumper.addDumpFeature("value(4)");
+ dumper.addDumpFeature("double(value(4))");
+ dumper.addDumpFeature("double(value(8))");
+ dumper.addDumpFeature("mysum(value(4),value(16))");
+ dumper.addDumpFeature("mysum(double(value(8)),double(value(32)))");
+ EXPECT_TRUE(dumper.setup());
+
+ RankResult exp;
+ exp.addScore("value(2)", 2.0f);
+ exp.addScore("value(4)", 4.0f);
+ exp.addScore(FNB().baseName("double").parameter("value(4)").buildName(), 8.0f);
+ exp.addScore(FNB().baseName("double").parameter("value(8)").buildName(), 16.0f);
+ exp.addScore(FNB().baseName("mysum").parameter("value(4)").parameter("value(16)").buildName(), 20.0f);
+ exp.addScore(FNB().baseName("mysum").
+ parameter(FNB().baseName("double").parameter("value(8)").buildName()).
+ parameter(FNB().baseName("double").parameter("value(32)").buildName()).
+ buildName(), 80.0f);
+ EXPECT_EQUAL(exp, dumper.dump());
+ }
+ {
+ FeatureDumper dumper(_rankEnv);
+ dumper.addDumpFeature("value(50)");
+ dumper.addDumpFeature("value(100)");
+ EXPECT_TRUE(dumper.setup());
+ RankResult exp;
+ exp.addScore("value(50)", 50.0f);
+ exp.addScore("value(100)", 100.0f);
+ EXPECT_EQUAL(exp, dumper.dump());
+ }
+ {
+ FeatureDumper dumper(_rankEnv);
+ dumper.addDumpFeature(FNB().baseName("rankingExpression").parameter("if(4<2,3,4)").buildName());
+ EXPECT_TRUE(dumper.setup());
+ RankResult exp;
+ exp.addScore(FNB().baseName("rankingExpression").parameter("if(4<2,3,4)").buildName(), 4.0f);
+ EXPECT_EQUAL(exp, dumper.dump());
+ }
+
+ {
+ FeatureDumper dumper(_rankEnv);
+ dumper.addDumpFeature(FNB().baseName("rankingExpression").parameter("if(mysum(value(12),value(10))>2,3,4)").buildName());
+ EXPECT_TRUE(dumper.setup());
+ RankResult exp;
+ exp.addScore(FNB().baseName("rankingExpression").parameter("if(mysum(value(12),value(10))>2,3,4)").buildName(), 3.0f);
+ EXPECT_EQUAL(exp, dumper.dump());
+ }
+ { // dump features indicated by visitation
+ IndexEnvironment indexEnv;
+ indexEnv.getProperties().add("test_cfgvalue(foo).value", "1.0");
+ indexEnv.getProperties().add("test_cfgvalue(bar).value", "5.0");
+ indexEnv.getProperties().add("test_cfgvalue.dump", "test_cfgvalue(foo)");
+ indexEnv.getProperties().add("test_cfgvalue.dump", "test_cfgvalue(bar)");
+ indexEnv.getProperties().add(indexproperties::rank::FirstPhase::NAME, "");
+ indexEnv.getProperties().add(indexproperties::rank::SecondPhase::NAME, "");
+
+ RankEnvironment rankEnv(_factory, indexEnv, _queryEnv);
+ FeatureDumper dumper(rankEnv);
+ dumper.configure();
+ EXPECT_TRUE(dumper.setup());
+ RankResult exp;
+ exp.addScore("test_cfgvalue(foo)", 1.0);
+ exp.addScore("test_cfgvalue(bar)", 5.0);
+ EXPECT_EQUAL(exp, dumper.dump());
+ }
+ { // ignore features indicated by visitation
+ IndexEnvironment indexEnv;
+ indexEnv.getProperties().add("test_cfgvalue(foo).value", "1.0");
+ indexEnv.getProperties().add("test_cfgvalue(bar).value", "5.0");
+ indexEnv.getProperties().add("test_cfgvalue.dump", "test_cfgvalue(foo)");
+ indexEnv.getProperties().add("test_cfgvalue.dump", "test_cfgvalue(bar)");
+ indexEnv.getProperties().add(indexproperties::dump::IgnoreDefaultFeatures::NAME, "true");
+ indexEnv.getProperties().add(indexproperties::dump::Feature::NAME, "test_cfgvalue(foo)");
+ indexEnv.getProperties().add(indexproperties::rank::FirstPhase::NAME, "");
+ indexEnv.getProperties().add(indexproperties::rank::SecondPhase::NAME, "");
+
+ RankEnvironment rankEnv(_factory, indexEnv, _queryEnv);
+ FeatureDumper dumper(rankEnv);
+ dumper.configure();
+ EXPECT_TRUE(dumper.setup());
+ RankResult exp;
+ exp.addScore("test_cfgvalue(foo)", 1.0);
+ EXPECT_EQUAL(exp, dumper.dump());
+ }
+}
+
+void
+RankSetupTest::checkFeatures(std::map<vespalib::string, feature_t> &exp, std::map<vespalib::string, feature_t> &actual)
+{
+ typedef std::map<vespalib::string, feature_t>::const_iterator ITR;
+ if (!EXPECT_EQUAL(exp.size(), actual.size())) {
+ return;
+ }
+ ITR exp_itr = exp.begin();
+ ITR exp_end = exp.end();
+ ITR actual_itr = actual.begin();
+ ITR actual_end = actual.end();
+ for (; exp_itr != exp_end && actual_itr != actual_end; ++exp_itr, ++actual_itr) {
+ EXPECT_EQUAL(exp_itr->first, actual_itr->first);
+ EXPECT_APPROX(exp_itr->second, actual_itr->second, 0.001);
+ }
+ EXPECT_EQUAL(exp_itr == exp_end, actual_itr == actual_end);
+}
+
+void
+RankSetupTest::testFeatureNormalization()
+{
+ BlueprintFactory factory;
+ factory.addPrototype(Blueprint::SP(new ValueBlueprint()));
+ factory.addPrototype(Blueprint::SP(new SumBlueprint()));
+
+ IndexEnvironment idxEnv;
+ RankSetup rankSetup(factory, idxEnv);
+
+ rankSetup.setFirstPhaseRank(" mysum ( value ( 1 ) , value ( 1 ) ) ");
+ rankSetup.setSecondPhaseRank(" mysum ( value ( 2 ) , value ( 2 ) ) ");
+ rankSetup.addSummaryFeature(" mysum ( value ( 5 ) , value ( 5 ) ) ");
+ rankSetup.addSummaryFeature(" mysum ( \"value( 5 )\" , \"value( 5 )\" ) ");
+ rankSetup.addDumpFeature(" mysum ( value ( 10 ) , value ( 10 ) ) ");
+ rankSetup.addDumpFeature(" mysum ( \"value( 10 )\" , \"value( 10 )\" ) ");
+
+ ASSERT_TRUE(rankSetup.compile());
+
+ { // RANK context
+ MatchDataLayout layout;
+ QueryEnvironment queryEnv;
+ RankProgram::UP firstPhaseProgram = rankSetup.create_first_phase_program();
+ RankProgram::UP secondPhaseProgram = rankSetup.create_second_phase_program();
+ RankProgram::UP summaryProgram = rankSetup.create_summary_program();
+ firstPhaseProgram->setup(layout, queryEnv);
+ secondPhaseProgram->setup(layout, queryEnv);
+ summaryProgram->setup(layout, queryEnv);
+
+ firstPhaseProgram->run(1);
+ EXPECT_APPROX(2.0, *Utils::getScoreFeature(*firstPhaseProgram), 0.001);
+ secondPhaseProgram->run(1);
+ EXPECT_APPROX(4.0, *Utils::getScoreFeature(*secondPhaseProgram), 0.001);
+ summaryProgram->run(1);
+
+ { // rank seed features
+ std::map<vespalib::string, feature_t> actual = Utils::getSeedFeatures(*summaryProgram);
+ std::map<vespalib::string, feature_t> exp;
+ exp["mysum(value(5),value(5))"] = 10.0;
+ exp["mysum(\"value( 5 )\",\"value( 5 )\")"] = 10.0;
+ TEST_DO(checkFeatures(exp, actual));
+ }
+ { // all rank features (1. phase)
+ std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*firstPhaseProgram);
+ std::map<vespalib::string, feature_t> exp;
+ exp["value(1)"] = 1.0;
+ exp["value(1).0"] = 1.0;
+ exp["mysum(value(1),value(1))"] = 2.0;
+ exp["mysum(value(1),value(1)).out"] = 2.0;
+ TEST_DO(checkFeatures(exp, actual));
+ }
+ { // all rank features (2. phase)
+ std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*secondPhaseProgram);
+ std::map<vespalib::string, feature_t> exp;
+ exp["value(2)"] = 2.0;
+ exp["value(2).0"] = 2.0;
+ exp["mysum(value(2),value(2))"] = 4.0;
+ exp["mysum(value(2),value(2)).out"] = 4.0;
+ TEST_DO(checkFeatures(exp, actual));
+ }
+ { // all rank features (summary)
+ std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*summaryProgram);
+ std::map<vespalib::string, feature_t> exp;
+ exp["value(5)"] = 5.0;
+ exp["value(5).0"] = 5.0;
+ exp["mysum(value(5),value(5))"] = 10.0;
+ exp["mysum(value(5),value(5)).out"] = 10.0;
+ exp["mysum(\"value( 5 )\",\"value( 5 )\")"] = 10.0;
+ exp["mysum(\"value( 5 )\",\"value( 5 )\").out"] = 10.0;
+ TEST_DO(checkFeatures(exp, actual));
+ }
+ }
+
+ { // DUMP context
+ MatchDataLayout layout;
+ QueryEnvironment queryEnv;
+ RankProgram::UP rankProgram = rankSetup.create_dump_program();
+ rankProgram->setup(layout, queryEnv);
+ rankProgram->run(1);
+
+ { // dump seed features
+ std::map<vespalib::string, feature_t> actual = Utils::getSeedFeatures(*rankProgram);
+ std::map<vespalib::string, feature_t> exp;
+ exp["mysum(value(10),value(10))"] = 20.0;
+ exp["mysum(\"value( 10 )\",\"value( 10 )\")"] = 20.0;
+ TEST_DO(checkFeatures(exp, actual));
+ }
+
+ { // all dump features
+ std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*rankProgram);
+ std::map<vespalib::string, feature_t> exp;
+
+ exp["value(10)"] = 10.0;
+ exp["value(10).0"] = 10.0;
+
+ exp["mysum(value(10),value(10))"] = 20.0;
+ exp["mysum(value(10),value(10)).out"] = 20.0;
+
+ exp["mysum(\"value( 10 )\",\"value( 10 )\")"] = 20.0;
+ exp["mysum(\"value( 10 )\",\"value( 10 )\").out"] = 20.0;
+
+ TEST_DO(checkFeatures(exp, actual));
+ }
+ }
+}
+
+
+RankSetupTest::RankSetupTest() :
+ _factory(),
+ _manager(),
+ _indexEnv(),
+ _queryEnv(),
+ _rankEnv(_factory, _indexEnv, _queryEnv),
+ _visitor()
+{
+ // register blueprints
+ setup_fef_test_plugin(_factory);
+ _factory.addPrototype(Blueprint::SP(new ValueBlueprint()));
+ _factory.addPrototype(Blueprint::SP(new RankingExpressionBlueprint()));
+
+ // setup an original attribute manager with two attributes
+ search::attribute::Config cfg(search::attribute::BasicType::INT32,
+ search::attribute::CollectionType::SINGLE);
+ search::AttributeVector::SP av1 =
+ search::AttributeFactory::createAttribute("staticrank1", cfg);
+ search::AttributeVector::SP av2 =
+ search::AttributeFactory::createAttribute("staticrank2", cfg);
+ av1->addDocs(5);
+ av2->addDocs(5);
+ for (uint32_t i = 0; i < 5; ++i) {
+ (static_cast<search::IntegerAttribute *>(av1.get()))->update(i, i + 100);
+ (static_cast<search::IntegerAttribute *>(av2.get()))->update(i, i + 200);
+ }
+ av1->commit();
+ av2->commit();
+ _manager.add(av1);
+ _manager.add(av2);
+
+ // set the index environment
+ _queryEnv.setIndexEnv(&_indexEnv);
+
+ // set the manager
+ _queryEnv.overrideAttributeManager(&_manager);
+}
+
+
+int
+RankSetupTest::Main()
+{
+ TEST_INIT("ranksetup_test");
+
+ testValueBlueprint();
+ testDoubleBlueprint();
+ testSumBlueprint();
+ testStaticRankBlueprint();
+ testChainBlueprint();
+ testCfgValueBlueprint();
+
+ testCompilation();
+ testRankSetup();
+ testExecution();
+ testFeatureDump();
+ testFeatureNormalization();
+
+ TEST_DONE();
+}
+
+TEST_APPHOOK(RankSetupTest);
diff --git a/searchlib/src/tests/ranksetup/verify_feature/.gitignore b/searchlib/src/tests/ranksetup/verify_feature/.gitignore
new file mode 100644
index 00000000000..69a39cd13f2
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/verify_feature/.gitignore
@@ -0,0 +1 @@
+searchlib_verify_feature_test_app
diff --git a/searchlib/src/tests/ranksetup/verify_feature/CMakeLists.txt b/searchlib/src/tests/ranksetup/verify_feature/CMakeLists.txt
new file mode 100644
index 00000000000..8ffd79fe327
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/verify_feature/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_verify_feature_test_app
+ SOURCES
+ verify_feature_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_verify_feature_test_app COMMAND searchlib_verify_feature_test_app)
diff --git a/searchlib/src/tests/ranksetup/verify_feature/FILES b/searchlib/src/tests/ranksetup/verify_feature/FILES
new file mode 100644
index 00000000000..652373e33da
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/verify_feature/FILES
@@ -0,0 +1 @@
+verify_feature_test.cpp
diff --git a/searchlib/src/tests/ranksetup/verify_feature/verify_feature_test.cpp b/searchlib/src/tests/ranksetup/verify_feature/verify_feature_test.cpp
new file mode 100644
index 00000000000..1e49cd4aae6
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/verify_feature/verify_feature_test.cpp
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/plugin/setup.h>
+#include <vespa/searchlib/features/valuefeature.h>
+
+using namespace search::features;
+using namespace search::fef::test;
+using namespace search::fef;
+
+struct RankFixture {
+ BlueprintFactory factory;
+ IndexEnvironment indexEnv;
+
+ RankFixture() : factory(), indexEnv() {
+ setup_fef_test_plugin(factory);
+ factory.addPrototype(Blueprint::SP(new ValueBlueprint()));
+ }
+
+ bool verify(const std::string &feature) const {
+ return verifyFeature(factory, indexEnv, feature, "feature verification test");
+ }
+};
+
+TEST_F("verify valid rank feature", RankFixture) {
+ EXPECT_TRUE(f1.verify("value(1, 2, 3).0"));
+ EXPECT_TRUE(f1.verify("value(1, 2, 3).1"));
+ EXPECT_TRUE(f1.verify("value(1, 2, 3).2"));
+}
+
+TEST_F("verify unknown feature", RankFixture) {
+ EXPECT_FALSE(f1.verify("unknown"));
+}
+
+TEST_F("verify unknown output", RankFixture) {
+ EXPECT_FALSE(f1.verify("value(1, 2, 3).3"));
+}
+
+TEST_F("verify illegal input parameters", RankFixture) {
+ EXPECT_FALSE(f1.verify("value.0"));
+}
+
+TEST_F("verify illegal feature name", RankFixture) {
+ EXPECT_FALSE(f1.verify("value(2)."));
+}
+
+TEST_F("verify too deep dependency graph", RankFixture) {
+ EXPECT_TRUE(f1.verify("chain(basic, 63, 4)"));
+ EXPECT_FALSE(f1.verify("chain(basic, 64, 4)"));
+}
+
+TEST_F("verify dependency cycle", RankFixture) {
+ EXPECT_FALSE(f1.verify("chain(cycle, 4, 2)"));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/sha1/.gitignore b/searchlib/src/tests/sha1/.gitignore
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/searchlib/src/tests/sha1/.gitignore
diff --git a/searchlib/src/tests/sort/.gitignore b/searchlib/src/tests/sort/.gitignore
new file mode 100644
index 00000000000..7207ff4596d
--- /dev/null
+++ b/searchlib/src/tests/sort/.gitignore
@@ -0,0 +1,8 @@
+.depend
+Makefile
+sort_test
+uca_stress
+/sortbenchmark
+searchlib_sort_test_app
+searchlib_sortbenchmark_app
+searchlib_uca_stress_app
diff --git a/searchlib/src/tests/sort/CMakeLists.txt b/searchlib/src/tests/sort/CMakeLists.txt
new file mode 100644
index 00000000000..1830952bffd
--- /dev/null
+++ b/searchlib/src/tests/sort/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_sortbenchmark_app
+ SOURCES
+ sortbenchmark.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_sortbenchmark_app COMMAND searchlib_sortbenchmark_app BENCHMARK)
+vespa_add_executable(searchlib_sort_test_app
+ SOURCES
+ sort_test.cpp
+ DEPENDS
+ searchlib
+)
+#vespa_add_test(NAME searchlib_sort_test_app COMMAND searchlib_sort_test_app)
+vespa_add_executable(searchlib_uca_stress_app
+ SOURCES
+ uca.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_uca_stress_app COMMAND searchlib_uca_stress_app BENCHMARK)
diff --git a/searchlib/src/tests/sort/DESC b/searchlib/src/tests/sort/DESC
new file mode 100644
index 00000000000..ad8ab11f5ba
--- /dev/null
+++ b/searchlib/src/tests/sort/DESC
@@ -0,0 +1 @@
+Testing templatized radixsort.
diff --git a/searchlib/src/tests/sort/FILES b/searchlib/src/tests/sort/FILES
new file mode 100644
index 00000000000..e2ef9d3c1ab
--- /dev/null
+++ b/searchlib/src/tests/sort/FILES
@@ -0,0 +1 @@
+sort.cpp
diff --git a/searchlib/src/tests/sort/javaorder.zh b/searchlib/src/tests/sort/javaorder.zh
new file mode 100644
index 00000000000..0d29efc99bd
--- /dev/null
+++ b/searchlib/src/tests/sort/javaorder.zh
@@ -0,0 +1,158 @@
+
+
+
+30雜誌30雜誌
+asiatwnewsasiatwnews
+AZ時尚旅遊AZ時尚旅遊
+bobo小天才養成誌bobo小天才養成誌
+Career職場情報誌Career職場情報誌
+CheersCheers雜誌
+EMBAEMBA雜誌
+ETtodayETtoday
+FASHION QUEEN時尚女王FASHION QUEEN時尚女王
+iLOOKiLOOK電影雜誌
+men&amp;#39;s uno男人誌men&amp;#39;s uno男人誌
+Money 錢Money 錢
+NOWnewsNOWnews
+NOWnews今日新聞網
+PAR表演藝術PAR表演藝術雜誌
+Press Association ImagesPress Association Images
+Smart智富月刊Smart智富月刊
+Taipei WalkerTaipei Walker
+TSNATSNA
+TVBSTVBS
+Yahoo! Taiwan Specials without layoutYahoo! Taiwan Specials without layout
+Yahoo奇摩video.yahoo.com(勿用)
+Yahoo奇摩Yahoo奇摩(爆新聞)
+Yahoo奇摩Yahoo奇摩(新聞)
+Yahoo奇摩Yahoo奇摩(影音)
+Yahoo奇摩新聞Yahoo奇摩新聞(報氣象)
+YourNewsYourNews
+Y特別企畫Y特別企畫
+愛爾達愛爾達
+愛爾達愛爾達電視
+財訊快報財訊快報
+財訊快報財訊快報季刊
+財訊快報季刊財訊快報季刊
+財訊快報季刊財訊快報季刊
+財訊雙週刊財訊雙週刊
+常春月刊常春月刊
+朝鮮日報朝鮮日報
+達志達志
+達志達志
+達志達志
+大家健康雜誌大家健康雜誌
+大師輕鬆讀大師輕鬆讀
+大台灣旅遊網大台灣旅遊網
+東森新聞東森新聞
+東森新聞東森新聞
+東星東星
+俄羅斯新聞網俄羅斯新聞網
+法新社法新社
+非凡新聞非凡新聞
+非凡新聞節目非凡新聞節目
+富爾特消費新聞富爾特消費新聞
+公共電視公共電視
+公視公視
+古美術古美術
+管理雜誌管理雜誌
+光華雜誌台灣光華雜誌
+廣編特輯廣編特輯
+廣告雜誌廣告雜誌
+國際商情雙周刊國際商情雙周刊
+哈佛商業評論哈佛商業評論
+韓國朝鮮日報韓國朝鮮日報
+韓國中央日報韓國中央日報
+韓國中央日報韓國中央日報
+韓星網韓星網
+韓星網韓星網
+華人健康網華人健康網
+華視華視
+華視華視
+華視華視
+環境資訊中心環境資訊中心
+健康醫療網健康醫療網
+健康醫療網健康醫療網
+講義雜誌講義雜誌
+教育廣播電台國立教育廣播電台
+今藝術今藝術
+今周刊今周刊
+今周刊今周刊
+經理人經理人月刊
+鉅亨網鉅亨網
+軍聞社軍聞社
+卡優新聞網卡優新聞網
+康健雜誌康健雜誌
+科學人科學人雜誌
+客家電視客家電視台
+酷搜圖聞酷搜圖聞
+理財周刊理財周刊
+麗台運動報麗台運動報
+聯合文學聯合文學
+聯合新聞網聯合新聞網
+路透社路透社
+路透社路透社
+旅遊經旅遊經
+羅開Golf 頻道羅開Golf 頻道
+媽媽寶寶媽媽寶寶
+美麗佳人美麗佳人雜誌
+美聯社美聯社
+美通社美通社
+民視民視
+民視民視
+明報周刊明報周刊
+男人幫男人幫
+能力雜誌能力雜誌
+年代新聞年代新聞
+年代新聞年代新聞
+年代新聞年代新聞
+年代新聞年代新聞
+紐約時報中文網.紐約時報中文網.
+紐約時報中文網.紐約時報中文網.
+紐約時報中文網紐約時報中文網
+儂儂雜誌儂儂雜誌
+蓬勃網球蓬勃網球
+蘋果日報蘋果日報
+親子天下親子天下
+全國廣播全國廣播
+全球中央全球中央雜誌
+商業周刊商業周刊
+數位家庭數位家庭
+數位時代數位時代
+台灣立報台灣立報
+台灣新生報台灣新生報
+台灣醒報台灣醒報
+台灣醒報台灣醒報
+臺灣時報臺灣時報
+天下雜誌天下雜誌
+統一獅 Video統一獅 Video
+玩高爾夫玩高爾夫
+旺報旺報
+先探投資週刊先探投資週刊
+現代保險健康理財雜誌現代保險健康理財雜誌
+香港中文大學EMBA Videos香港中文大學EMBA
+新頭殼新頭殼
+新新聞周刊新新聞周刊
+兄弟象 Video兄弟象 Video
+訊息快遞Yahoo 奇摩新聞訊息快遞
+野球人野球人
+壹電視壹電視
+壹電視壹電視
+壹蘋果壹蘋果
+義大犀牛 Video義大犀牛 Video
+優活健康網優活健康網
+原視原視
+遠見雜誌遠見雜誌
+張老師月刊張老師月刊
+整形達人整形達人
+中廣中廣新聞網
+中華日報中華日報
+中華職棒中華職棒
+中時電子報中時電子報
+中央廣播電台中央廣播電台
+中央日報中央日報
+中央社中央社
+中央社中央社
+自立晚報自立晚報
+自由時報自由時報
+自由時報自由時報
diff --git a/searchlib/src/tests/sort/sort_test.cpp b/searchlib/src/tests/sort/sort_test.cpp
new file mode 100644
index 00000000000..cf5e1a1cb1f
--- /dev/null
+++ b/searchlib/src/tests/sort/sort_test.cpp
@@ -0,0 +1,295 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/sort.h>
+#include <vespa/searchlib/common/sortspec.h>
+#include <vespa/searchlib/common/converters.h>
+#include <vespa/vespalib/util/array.h>
+#include <vector>
+#include <fstream>
+#include <iostream>
+#include <iomanip>
+#include <stdexcept>
+#include <unicode/ustring.h>
+
+LOG_SETUP("sort_test");
+
+using vespalib::Array;
+using namespace search::common;
+using vespalib::ConstBufferRef;
+
+class Test : public vespalib::TestApp
+{
+public:
+ int Main();
+ void testUnsignedIntegerSort();
+ template <typename T>
+ void testSignedIntegerSort();
+ void testStringSort();
+ void testIcu();
+ void testStringCaseInsensitiveSort();
+ void testSortSpec();
+ void testSameAsJavaOrder();
+};
+
+struct LoadedStrings
+{
+ LoadedStrings(const char * v=NULL) : _value(v), _currRadix(_value) { }
+
+ class ValueRadix
+ {
+ public:
+ char operator () (LoadedStrings & x) const {
+ unsigned char c(*x._currRadix);
+ if (c) {
+ x._currRadix++;
+ }
+ return c;
+ }
+ };
+
+ class ValueCompare : public std::binary_function<LoadedStrings, LoadedStrings, bool> {
+ public:
+ bool operator() (const LoadedStrings & x, const LoadedStrings & y) const {
+ return strcmp(x._value, y._value) < 0;
+ }
+ };
+ const char * _value;
+ const char * _currRadix;
+};
+
+void Test::testIcu()
+{
+ {
+ const std::string src("Creation of Bob2007 this is atumated string\this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string; _ 12345567890-=,./;'[;");
+ std::vector<uint16_t> u16Buffer(100);
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t u16Wanted(0);
+ u_strFromUTF8(&u16Buffer[0], u16Buffer.size(), &u16Wanted, src.c_str(), -1, &status);
+ ASSERT_TRUE(U_SUCCESS(status) || (status == U_INVALID_CHAR_FOUND) || ((status == U_BUFFER_OVERFLOW_ERROR) && (u16Wanted > (int)u16Buffer.size())));
+ }
+}
+
+void Test::testUnsignedIntegerSort()
+{
+ search::NumericRadixSorter<uint32_t, true> S;
+ S(NULL, 0);
+
+ Array<uint32_t> array1(1);
+ array1[0] = 1567;
+ S(&array1[0], 1);
+ ASSERT_TRUE(array1[0] == 1567);
+
+ unsigned int N(0x100000);
+ Array<uint32_t> array(N);
+ unsigned seed(1);
+ for(size_t i(0); i < N; i++) {
+ array[i] = rand_r(&seed);
+ }
+ S(&array[0], N);
+ for (size_t i(1); i < N; i++) {
+ ASSERT_TRUE(array[i] >= array[i-1]);
+ }
+}
+
+template<typename T>
+class IntOrder {
+public:
+ uint64_t operator () (T v) const { return v ^ (std::numeric_limits<T>::max() + 1); }
+};
+
+template <typename T>
+void Test::testSignedIntegerSort()
+{
+ search::NumericRadixSorter<T, true> S;
+ S(NULL, 0);
+
+ Array<T> array1(1);
+ array1[0] = 1567;
+ S(&array1[0], 1);
+ ASSERT_TRUE(array1[0] == 1567);
+
+ unsigned int N(0x100000);
+ Array<T> array(N);
+ unsigned seed(1);
+ for(size_t i(0); i < N; i++) {
+ T v = rand_r(&seed);
+ array[i] = (i%2) ? v : -v;
+ }
+ S(&array[0], N);
+ for (size_t i(1); i < N; i++) {
+ ASSERT_TRUE(array[i] >= array[i-1]);
+ }
+}
+
+void Test::testStringSort()
+{
+ Array<LoadedStrings> array1(1);
+
+ unsigned int N(0x1000);
+ Array<LoadedStrings> loaded(N);
+ std::vector<uint32_t> radixScratchPad(N);
+ search::radix_sort(LoadedStrings::ValueRadix(), LoadedStrings::ValueCompare(), search::AlwaysEof<LoadedStrings>(), 1, static_cast<LoadedStrings *>(NULL), 0, &radixScratchPad[0], 0);
+
+ array1[0] = LoadedStrings("a");
+ search::radix_sort(LoadedStrings::ValueRadix(), LoadedStrings::ValueCompare(), search::AlwaysEof<LoadedStrings>(), 1, &array1[0], 1, &radixScratchPad[0], 0);
+ ASSERT_TRUE(strcmp(array1[0]._value, "a") == 0);
+
+ loaded[0] = LoadedStrings("a");
+ for(size_t i(1); i < N; i++) {
+ loaded[i] = LoadedStrings("");
+ }
+
+ search::radix_sort(LoadedStrings::ValueRadix(), LoadedStrings::ValueCompare(), search::AlwaysEof<LoadedStrings>(), 1, &loaded[0], N, &radixScratchPad[0], 0);
+ LoadedStrings::ValueCompare vc;
+ for(size_t i(1); i < N; i++) {
+ ASSERT_TRUE( ! vc(loaded[i], loaded[i-1]));
+ }
+}
+
+void Test::testStringCaseInsensitiveSort()
+{
+}
+
+void Test::testSortSpec()
+{
+ {
+ SortSpec sortspec("-name");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() == NULL);
+ }
+
+ {
+ SortSpec sortspec("-lowercase(name)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<LowercaseConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+
+ {
+ SortSpec sortspec("-uca(name,nn_no)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ SortSpec sortspec("-uca(name,nn_no,PRIMARY)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ SortSpec sortspec("-uca(name,nn_no,SECONDARY)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ SortSpec sortspec("-uca(name,nn_no,TERTIARY)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ SortSpec sortspec("-uca(name,nn_no,QUATERNARY)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ SortSpec sortspec("-uca(name,nn_no,IDENTICAL)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ SortSpec sortspec("-uca(name,zh)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ SortSpec sortspec("-uca(name,finnes_ikke)");
+ EXPECT_EQUAL(sortspec.size(), 1u);
+ EXPECT_EQUAL(sortspec[0]._field, "name");
+ EXPECT_TRUE( ! sortspec[0]._ascending);
+ EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+ EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL);
+ }
+ {
+ try {
+ SortSpec sortspec("-uca(name,nn_no,NTERTIARY)");
+ EXPECT_TRUE(false);
+ } catch (const std::runtime_error & e) {
+ EXPECT_TRUE(true);
+ EXPECT_TRUE(strcmp(e.what(), "Illegal uca collation strength : NTERTIARY") == 0);
+ }
+ }
+}
+
+void Test::testSameAsJavaOrder()
+{
+ std::vector<vespalib::string> javaOrder;
+ std::ifstream is("javaorder.zh");
+ while (!is.eof()) {
+ std::string line;
+ getline(is, line);
+ if (!is.eof()) {
+ javaOrder.push_back(line);
+ }
+ }
+ EXPECT_EQUAL(158u, javaOrder.size());
+ search::common::UcaConverter uca("zh", "PRIMARY");
+ vespalib::ConstBufferRef fkey = uca.convert(vespalib::ConstBufferRef(javaOrder[0].c_str(), javaOrder[0].size()));
+ vespalib::string prev(fkey.c_str(), fkey.size());
+ for (size_t i(1); i < javaOrder.size(); i++) {
+ vespalib::ConstBufferRef key = uca.convert(vespalib::ConstBufferRef(javaOrder[i].c_str(), javaOrder[i].size()));
+ vespalib::HexDump dump(key.c_str(), key.size());
+ vespalib::string current(key.c_str(), key.size());
+ UErrorCode status(U_ZERO_ERROR);
+ UCollationResult cr = uca.getCollator().compareUTF8(javaOrder[i-1].c_str(), javaOrder[i].c_str(), status);
+ std::cout << std::setw(3) << i << ": " << status << "(" << u_errorName(status) << ") - " << cr << " '" << dump << "' : '" << javaOrder[i] << "'" << std::endl;
+ EXPECT_TRUE(prev <= current);
+ EXPECT_TRUE(U_SUCCESS(status));
+ EXPECT_TRUE(cr == UCOL_LESS || cr == UCOL_EQUAL);
+ prev = current;
+ }
+}
+
+
+TEST_APPHOOK(Test);
+
+int Test::Main()
+{
+ TEST_INIT("sort_test");
+
+ testUnsignedIntegerSort();
+ testSignedIntegerSort<int32_t>();
+ testSignedIntegerSort<int64_t>();
+ testStringSort();
+ testStringCaseInsensitiveSort();
+ testSortSpec();
+ testIcu();
+ testSameAsJavaOrder();
+
+ TEST_DONE();
+}
diff --git a/searchlib/src/tests/sort/sortbenchmark.cpp b/searchlib/src/tests/sort/sortbenchmark.cpp
new file mode 100644
index 00000000000..1309cf57d5d
--- /dev/null
+++ b/searchlib/src/tests/sort/sortbenchmark.cpp
@@ -0,0 +1,115 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/sort.h>
+#include <vespa/vespalib/util/array.h>
+#include <vector>
+
+LOG_SETUP("sort_test");
+
+using vespalib::Array;
+using vespalib::ConstBufferRef;
+
+class Test : public vespalib::TestApp
+{
+public:
+ typedef std::vector<uint32_t> V;
+ std::vector< std::vector<uint32_t> > _data;
+ int Main();
+ void generateVectors(size_t numVectors, size_t values);
+ V merge();
+ void twoWayMerge();
+ V cat() const;
+};
+
+void Test::generateVectors(size_t numVectors, size_t values)
+{
+ _data.resize(numVectors);
+ for (size_t j(0); j < numVectors; j++) {
+ V & v(_data[j]);
+ v.resize(values);
+ for (size_t i(0); i < values; i++) {
+ v[i] = i;
+ }
+ }
+}
+
+Test::V Test::merge()
+{
+ twoWayMerge();
+ return _data[0];
+}
+
+void Test::twoWayMerge()
+{
+ std::vector<V> n((_data.size()+1)/2);
+
+ for ( size_t i(0), m(_data.size()/2); i < m; i++) {
+ const V & a = _data[i*2 + 0];
+ const V & b = _data[i*2 + 1];
+ n[i].resize(a.size() + b.size());
+ std::merge(a.begin(), a.end(), b.begin(), b.end(), n[i].begin());
+ }
+ if (_data.size()%2) {
+ n[n.size()-1].swap(_data[_data.size() - 1]);
+ }
+ _data.swap(n);
+ if (_data.size() > 1) {
+ twoWayMerge();
+ }
+}
+
+Test::V Test::cat() const
+{
+ size_t sum(0);
+ for (size_t i(0), m(_data.size()); i < m; i++) {
+ sum += _data[i].size();
+ }
+ V c;
+ c.reserve(sum);
+ for (size_t i(0), m(_data.size()); i < m; i++) {
+ const V & v(_data[i]);
+ c.insert(c.end(), v.begin(), v.end());
+ }
+
+ return c;
+}
+
+TEST_APPHOOK(Test);
+
+int Test::Main()
+{
+ TEST_INIT("sortbenchmark");
+ size_t numVectors(11);
+ size_t values(10000000);
+ vespalib::string type("radix");
+ if (_argc > 1) {
+ values = strtol(_argv[1], NULL, 0);
+ if (_argc > 2) {
+ numVectors = strtol(_argv[2], NULL, 0);
+ if (_argc > 2) {
+ type = _argv[3];
+ }
+ }
+ }
+
+ printf("Start with %ld vectors with %ld values and type '%s'(radix, qsort, merge)\n", numVectors, values, type.c_str());
+ generateVectors(numVectors, values);
+ printf("Start cat\n");
+ V v = cat();
+ printf("Cat %ld values\n", v.size());
+ if (type == "merge") {
+ V m = merge();
+ printf("Merged %ld values\n", m.size());
+ } else if (type == "qsort") {
+ std::sort(v.begin(), v.end());
+ printf("sorted %ld value with std::sort\n", v.size());
+ } else {
+ search::NumericRadixSorter<uint32_t, true> S;
+ S(&v[0], v.size());
+ printf("sorted %ld value with radix::sort\n", v.size());
+ }
+
+ TEST_DONE();
+}
diff --git a/searchlib/src/tests/sort/uca.cpp b/searchlib/src/tests/sort/uca.cpp
new file mode 100644
index 00000000000..b9225c94a66
--- /dev/null
+++ b/searchlib/src/tests/sort/uca.cpp
@@ -0,0 +1,121 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/sort.h>
+#include <vespa/searchlib/common/sortspec.h>
+#include <vespa/searchlib/common/converters.h>
+#include <vespa/vespalib/util/array.h>
+#include <memory>
+#include <string>
+#include <vector>
+#include <stdexcept>
+#include <unicode/ustring.h>
+
+LOG_SETUP("uca_stress");
+
+using icu::Collator;
+
+class Test : public vespalib::TestApp
+{
+public:
+ int Main();
+ void testFromDat();
+};
+
+
+void Test::testFromDat()
+{
+ size_t badnesses = 0;
+
+ std::string startMark("abc");
+ std::string midMark("def");
+ std::string endMark("ghi");
+
+ UErrorCode status = U_ZERO_ERROR;
+ auto coll = std::unique_ptr<Collator>(Collator::createInstance(icu::Locale("en"), status));
+
+ coll->setStrength(Collator::PRIMARY);
+
+ std::vector<uint16_t> u16buffer(100);
+ std::vector<uint8_t> u8buffer(10);
+
+ int fd = open("sort-blobs.dat", O_RDONLY);
+ char sbuf[4];
+
+ int num=0;
+
+ uint32_t atleast = 0;
+
+ while (read(fd, sbuf, 4) == 4) {
+ if (startMark == sbuf) {
+ uint32_t len = 0;
+ int r = read(fd, &len, 4);
+
+ EXPECT_EQUAL(4, r);
+ r = read(fd, sbuf, 4);
+ EXPECT_EQUAL(4, r);
+ EXPECT_EQUAL(midMark, sbuf);
+
+ if (u16buffer.size() < len) {
+ u16buffer.resize(len);
+ }
+ r = read(fd, &u16buffer[0], len*2);
+ EXPECT_EQUAL((int)len*2, r);
+
+ r = read(fd, sbuf, 4);
+ EXPECT_EQUAL(4, r);
+ EXPECT_EQUAL(endMark, sbuf);
+
+ uint32_t wanted = coll->getSortKey(&u16buffer[0], len, NULL, 0);
+
+ EXPECT_TRUE(wanted > 0);
+ EXPECT_TRUE(wanted >= len);
+ EXPECT_TRUE(wanted < len*6);
+
+ if (wanted + 20 > u8buffer.size()) {
+ u8buffer.resize(wanted+20);
+ }
+
+ for (uint32_t pretend = 1; pretend < wanted+8; ++pretend) {
+ memset(&u8buffer[0], 0x99, u8buffer.size());
+ uint32_t got = coll->getSortKey(&u16buffer[0], len, &u8buffer[0], pretend);
+ EXPECT_EQUAL(wanted, got);
+
+ if (u8buffer[pretend+1] != 0x99) {
+ printf("wrote 2 bytes too far: wanted space %d, pretend allocated %d, last good=%02x, bad=%02x %02x\n",
+ wanted, pretend, u8buffer[pretend-1],
+ u8buffer[pretend], u8buffer[pretend+1]);
+ } else if (u8buffer[pretend] != 0x99) {
+ ++badnesses;
+ if (wanted > atleast) {
+ atleast = wanted;
+ printf("wrote 1 byte too far: wanted space %d, pretend allocated %d, last good=%02x, bad=%02x\n",
+ wanted, pretend, u8buffer[pretend-1], u8buffer[pretend]);
+ }
+ }
+ }
+
+ memset(&u8buffer[0], 0x99, u8buffer.size());
+ uint32_t got = coll->getSortKey(&u16buffer[0], len, &u8buffer[0], u8buffer.size());
+ EXPECT_EQUAL(wanted, got);
+
+ EXPECT_EQUAL('\0', u8buffer[got-1]);
+ EXPECT_EQUAL((uint8_t)0x99, u8buffer[got]);
+ }
+ if (++num >= 10000) {
+ TEST_FLUSH();
+ num=0;
+ }
+ }
+ EXPECT_EQUAL(0u, badnesses);
+}
+
+TEST_APPHOOK(Test);
+
+int Test::Main()
+{
+ TEST_INIT("uca_stress");
+ testFromDat();
+ TEST_DONE();
+}
diff --git a/searchlib/src/tests/sortresults/.gitignore b/searchlib/src/tests/sortresults/.gitignore
new file mode 100644
index 00000000000..1b191671559
--- /dev/null
+++ b/searchlib/src/tests/sortresults/.gitignore
@@ -0,0 +1,7 @@
+*.core
+.depend
+Makefile
+core
+core.*
+sortresults
+searchlib_sortresults_app
diff --git a/searchlib/src/tests/sortresults/CMakeLists.txt b/searchlib/src/tests/sortresults/CMakeLists.txt
new file mode 100644
index 00000000000..6889e4c517d
--- /dev/null
+++ b/searchlib/src/tests/sortresults/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_sortresults_app
+ SOURCES
+ sorttest.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_sortresults_app COMMAND searchlib_sortresults_app)
diff --git a/searchlib/src/tests/sortresults/sorttest.cpp b/searchlib/src/tests/sortresults/sorttest.cpp
new file mode 100644
index 00000000000..4c3326f4cbc
--- /dev/null
+++ b/searchlib/src/tests/sortresults/sorttest.cpp
@@ -0,0 +1,99 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/common/sortresults.h>
+
+using search::RankedHit;
+
+unsigned int
+myrandom()
+{
+ unsigned int r;
+ r = random() & 0xffff;
+ r <<= 16;
+ r += random() & 0xffff;
+ return r;
+}
+
+
+bool
+test_sort(unsigned int caseNum, unsigned int n, unsigned int ntop)
+{
+ bool ok = true;
+ double minmax;
+ unsigned int i;
+ RankedHit *array;
+
+ if (ntop == 0) {
+ printf("CASE %03d: [%d/%d] PASS\n", caseNum, ntop, n);
+ return true;
+ }
+ if (ntop > n)
+ ntop = n;
+
+ array = new RankedHit[n];
+ assert(array != NULL);
+
+ for (i = 0; i < n; i++) {
+ array[i]._docId = i;
+ array[i]._rankValue = myrandom();
+ }
+ FastS_SortResults(array, n, ntop);
+
+ minmax = array[ntop - 1]._rankValue;
+ for(i = 0; i < n; i++) {
+ if (i < ntop && i > 0
+ && array[i]._rankValue > array[i - 1]._rankValue) {
+ printf("ERROR: rank(%d) > rank(%d)\n",
+ i, i - 1);
+ ok = false;
+ break;
+ }
+ if (i >= ntop &&
+ array[i]._rankValue > minmax) {
+ printf("ERROR: rank(%d) > rank(%d)\n",
+ i, ntop - 1);
+ ok = false;
+ break;
+ }
+ }
+ delete [] array;
+ printf("CASE %03d: [%d/%d] %s\n", caseNum, ntop, n,
+ (ok)? "PASS" : "FAIL");
+ return ok;
+}
+
+
+int
+main(int argc, char **argv)
+{
+ (void) argc;
+ (void) argv;
+
+ bool ok = true;
+ unsigned int caseNum = 0;
+ unsigned int i;
+
+ ok &= test_sort(++caseNum, 1, 1);
+ for (i = 0; i < 5; i++) {
+ ok &= test_sort(++caseNum, 2, 2);
+ }
+ for (i = 0; i < 5; i++) {
+ ok &= test_sort(++caseNum, 50, 50);
+ }
+ for (i = 0; i < 5; i++) {
+ ok &= test_sort(++caseNum, 50000, 1);
+ ok &= test_sort(++caseNum, 50000, 500);
+ ok &= test_sort(++caseNum, 50000, 1000);
+ ok &= test_sort(++caseNum, 50000, 2000);
+ ok &= test_sort(++caseNum, 50000, 5000);
+ ok &= test_sort(++caseNum, 50000, 10000);
+ ok &= test_sort(++caseNum, 50000, 50000);
+ }
+ printf("CONCLUSION: TEST %s\n", (ok)? "PASSED" : "FAILED");
+ return (ok)? 0 : 1;
+}
diff --git a/searchlib/src/tests/sortspec/.gitignore b/searchlib/src/tests/sortspec/.gitignore
new file mode 100644
index 00000000000..dff7336208a
--- /dev/null
+++ b/searchlib/src/tests/sortspec/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+multilevelsort_test
+searchlib_multilevelsort_test_app
diff --git a/searchlib/src/tests/sortspec/CMakeLists.txt b/searchlib/src/tests/sortspec/CMakeLists.txt
new file mode 100644
index 00000000000..a436bb744c5
--- /dev/null
+++ b/searchlib/src/tests/sortspec/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_multilevelsort_test_app
+ SOURCES
+ multilevelsort.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_multilevelsort_test_app COMMAND searchlib_multilevelsort_test_app)
diff --git a/searchlib/src/tests/sortspec/multilevelsort.cpp b/searchlib/src/tests/sortspec/multilevelsort.cpp
new file mode 100644
index 00000000000..f151bfaf132
--- /dev/null
+++ b/searchlib/src/tests/sortspec/multilevelsort.cpp
@@ -0,0 +1,413 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("multilevelsort_test");
+#include <vespa/searchlib/common/sortresults.h>
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <map>
+#include <sstream>
+
+using namespace search;
+
+typedef FastS_SortSpec::VectorRef VectorRef;
+typedef IntegerAttributeTemplate<uint8_t> Uint8;
+typedef IntegerAttributeTemplate<int8_t> Int8;
+typedef IntegerAttributeTemplate<uint16_t> Uint16;
+typedef IntegerAttributeTemplate<int16_t> Int16;
+typedef IntegerAttributeTemplate<uint32_t> Uint32;
+typedef IntegerAttributeTemplate<int32_t> Int32;
+typedef IntegerAttributeTemplate<uint64_t> Uint64;
+typedef IntegerAttributeTemplate<int64_t> Int64;
+typedef FloatingPointAttributeTemplate<float> Float;
+typedef FloatingPointAttributeTemplate<double> Double;
+typedef std::map<std::string, AttributeVector::SP > VectorMap;
+typedef AttributeVector::SP AttributePtr;
+using search::attribute::Config;
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
+
+class MultilevelSortTest : public vespalib::TestApp
+{
+public:
+ enum AttrType {
+ INT8,
+ INT16,
+ INT32,
+ INT64,
+ FLOAT,
+ DOUBLE,
+ STRING,
+ RANK,
+ DOCID,
+ NONE
+ };
+ struct Spec {
+ Spec() : _name("unknown"), _type(NONE), _asc(true) {}
+ Spec(const std::string &name, AttrType type) : _name(name), _type(type), _asc(true) {}
+ Spec(const std::string &name, AttrType type, bool asc) : _name(name), _type(type), _asc(asc) {}
+ std::string _name;
+ AttrType _type;
+ bool _asc;
+ };
+private:
+ int _sortMethod;
+ template<typename T>
+ T getRandomValue() {
+ T min = std::numeric_limits<T>::min();
+ T max = std::numeric_limits<T>::max();
+ return min + static_cast<T>((max - min) * (((float)rand() / (float)RAND_MAX)));
+ }
+ template<typename T>
+ void fill(IntegerAttribute *attr, uint32_t size, uint32_t unique = 0);
+ template<typename T>
+ void fill(FloatingPointAttribute *attr, uint32_t size, uint32_t unique = 0);
+ void fill(StringAttribute *attr, uint32_t size, const std::vector<std::string> &values);
+ template<typename T, typename V>
+ int compareTemplate(T *vector, uint32_t a, uint32_t b);
+ int compare(AttributeVector *vector, AttrType type,
+ uint32_t a, uint32_t b);
+ void sortAndCheck(const std::vector<Spec> &spec, uint32_t num,
+ uint32_t unique, const std::vector<std::string> &strValues);
+ void testSortMethod(int method);
+public:
+ MultilevelSortTest() : _sortMethod(0) { srand(time(NULL)); }
+ int Main();
+};
+
+template<typename T>
+void MultilevelSortTest::fill(IntegerAttribute *attr, uint32_t size,
+ uint32_t unique)
+{
+ ASSERT_TRUE(attr->addDocs(size));
+ std::vector<T> values;
+ for (uint32_t j = 0; j < unique; ++j) {
+ if (j % 2 == 0) {
+ values.push_back(std::numeric_limits<T>::min() + static_cast<T>(j));
+ } else {
+ values.push_back(std::numeric_limits<T>::max() - static_cast<T>(j));
+ }
+ }
+ for (uint32_t i = 0; i < size; ++i) {
+ if (unique == 0) {
+ attr->update(i, getRandomValue<T>());
+ } else {
+ uint32_t idx = rand() % values.size();
+ attr->update(i, values[idx]);
+ }
+ }
+}
+
+template<typename T>
+void MultilevelSortTest::fill(FloatingPointAttribute *attr, uint32_t size,
+ uint32_t unique)
+{
+ ASSERT_TRUE(attr->addDocs(size));
+ std::vector<T> values;
+ for (uint32_t j = 0; j < unique; ++j) {
+ if (j % 2 == 0) {
+ values.push_back(std::numeric_limits<T>::min() + static_cast<T>(j));
+ } else {
+ values.push_back(std::numeric_limits<T>::max() - static_cast<T>(j));
+ }
+ }
+ for (uint32_t i = 0; i < size; ++i) {
+ if (unique == 0) {
+ attr->update(i, getRandomValue<T>());
+ } else {
+ uint32_t idx = rand() % values.size();
+ //LOG(info, "fill vector<%s>::doc<%d> = %f (idx=%d)", attr->getName().c_str(), i, values[idx], idx);
+ attr->update(i, values[idx]);
+ }
+ }
+}
+
+void
+MultilevelSortTest::fill(StringAttribute *attr, uint32_t size,
+ const std::vector<std::string> &values)
+{
+ ASSERT_TRUE(attr->addDocs(size));
+ for (uint32_t i = 0; i < size; ++i) {
+ if (values.empty()) {
+ uint32_t len = 1 + static_cast<uint32_t>(127 * (((float)rand() / (float)RAND_MAX)));
+ std::string value;
+ for (uint32_t j = 0; j < len; ++j) {
+ char c = 'a' + static_cast<char>(('Z' - 'a') * (((float)rand() / (float)RAND_MAX)));
+ value.append(1, c);
+ }
+ attr->update(i, value.c_str());
+ } else {
+ uint32_t idx = rand() % values.size();
+ //LOG(info, "fill vector<%s>::doc<%d> = %s (idx=%d)", attr->getName().c_str(),
+ // i, values[idx].c_str(), idx);
+ attr->update(i, values[idx].c_str());
+ }
+ }
+}
+
+template<typename T, typename V>
+int
+MultilevelSortTest::compareTemplate(T *vector, uint32_t a, uint32_t b)
+{
+ V va;
+ V vb;
+ vector->getAll(a, &va, 1);
+ vector->getAll(b, &vb, 1);
+ if (va == vb) {
+ return 0;
+ } else if (va < vb) {
+ return -1;
+ }
+ return 1;
+}
+
+int
+MultilevelSortTest::compare(AttributeVector *vector, AttrType type,
+ uint32_t a, uint32_t b)
+{
+ if (type == INT8) {
+ return compareTemplate<Int8, int8_t>(static_cast<Int8*>(vector), a, b);
+ } else if (type == INT16) {
+ return compareTemplate<Int16, int16_t>(static_cast<Int16*>(vector), a, b);
+ } else if (type == INT32) {
+ return compareTemplate<Int32, int32_t>(static_cast<Int32*>(vector), a, b);
+ } else if (type == INT64) {
+ return compareTemplate<Int64, int64_t>(static_cast<Int64*>(vector), a, b);
+ } else if (type == FLOAT) {
+ return compareTemplate<Float, float>(static_cast<Float*>(vector), a, b);
+ } else if (type == DOUBLE) {
+ return compareTemplate<Double, double>(static_cast<Double*>(vector), a, b);
+ } else if (type == STRING) {
+ StringAttribute *vString = static_cast<StringAttribute*>(vector);
+ const char *va = vString->get(a);
+ const char *vb = vString->get(b);
+ std::string sa(va);
+ std::string sb(vb);
+ if (sa == sb) {
+ return 0;
+ } else if (sa < sb) {
+ return -1;
+ }
+ return 1;
+ } else {
+ ASSERT_TRUE(false);
+ return 0;
+ }
+}
+
+void
+MultilevelSortTest::sortAndCheck(const std::vector<Spec> &spec, uint32_t num,
+ uint32_t unique, const std::vector<std::string> &strValues)
+{
+ VectorMap vec;
+ // generate attribute vectors
+ for (uint32_t i = 0; i < spec.size(); ++i) {
+ std::string name = spec[i]._name;
+ AttrType type = spec[i]._type;
+ if (type == INT8) {
+ Config cfg(BasicType::INT8, CollectionType::SINGLE);
+ vec[name] = AttributeFactory::createAttribute(name, cfg);
+ fill<int8_t>(static_cast<IntegerAttribute *>(vec[name].get()), num, unique);
+ } else if (type == INT16) {
+ Config cfg(BasicType::INT16, CollectionType::SINGLE);
+ vec[name] = AttributeFactory::createAttribute(name, cfg);
+ fill<int16_t>(static_cast<IntegerAttribute *>(vec[name].get()), num, unique);
+ } else if (type == INT32) {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ vec[name] = AttributeFactory::createAttribute(name, cfg);
+ fill<int32_t>(static_cast<IntegerAttribute *>(vec[name].get()), num, unique);
+ } else if (type == INT64) {
+ Config cfg(BasicType::INT64, CollectionType::SINGLE);
+ vec[name] = AttributeFactory::createAttribute(name, cfg);
+ fill<int64_t>(static_cast<IntegerAttribute *>(vec[name].get()), num, unique);
+ } else if (type == FLOAT) {
+ Config cfg(BasicType::FLOAT, CollectionType::SINGLE);
+ vec[name] = AttributeFactory::createAttribute(name, cfg);
+ fill<float>(static_cast<FloatingPointAttribute *>(vec[name].get()), num, unique);
+ } else if (type == DOUBLE) {
+ Config cfg(BasicType::DOUBLE, CollectionType::SINGLE);
+ vec[name] = AttributeFactory::createAttribute(name, cfg);
+ fill<double>(static_cast<FloatingPointAttribute *>(vec[name].get()), num, unique);
+ } else if (type == STRING) {
+ Config cfg(BasicType::STRING, CollectionType::SINGLE);
+ vec[name] = AttributeFactory::createAttribute(name, cfg);
+ fill(static_cast<StringAttribute *>(vec[name].get()), num, strValues);
+ }
+ if (vec[name].get() != NULL)
+ vec[name]->commit();
+ }
+
+ RankedHit *hits = new RankedHit[num];
+ for (uint32_t i = 0; i < num; ++i) {
+ hits[i]._docId = i;
+ hits[i]._rankValue = getRandomValue<uint32_t>();
+ }
+
+ vespalib::Clock clock;
+ vespalib::Doom doom(clock, std::numeric_limits<long>::max());
+ FastS_SortSpec sorter(doom, _sortMethod);
+ // init sorter with sort data
+ for(uint32_t i = 0; i < spec.size(); ++i) {
+ AttributeGuard ag;
+ if (spec[i]._type == RANK) {
+ sorter._vectors.push_back
+ (VectorRef(spec[i]._asc ? FastS_SortSpec::ASC_RANK :
+ FastS_SortSpec::DESC_RANK, NULL, NULL));
+ } else if (spec[i]._type == DOCID) {
+ sorter._vectors.push_back
+ (VectorRef(spec[i]._asc ? FastS_SortSpec::ASC_DOCID :
+ FastS_SortSpec::DESC_DOCID, NULL, NULL));
+ } else {
+ const search::attribute::IAttributeVector * v = vec[spec[i]._name].get();
+ sorter._vectors.push_back
+ (VectorRef(spec[i]._asc ? FastS_SortSpec::ASC_VECTOR :
+ FastS_SortSpec::DESC_VECTOR, v, NULL));
+ }
+ }
+
+ FastOS_Time timer;
+ timer.SetNow();
+ sorter.sortResults(hits, num, num);
+ LOG(info, "sort time = %f ms", timer.MilliSecsToNow());
+
+ uint32_t *offsets = new uint32_t[num + 1];
+ char *buf = new char[sorter.getSortDataSize(0, num)];
+ sorter.copySortData(0, num, offsets, buf);
+
+ // check results
+ for (uint32_t i = 0; i < num - 1; ++i) {
+ for (uint32_t j = 0; j < spec.size(); ++j) {
+ int cmp = 0;
+ if (spec[j]._type == RANK) {
+ if (hits[i]._rankValue < hits[i+1]._rankValue) {
+ cmp = -1;
+ } else if (hits[i]._rankValue > hits[i+1]._rankValue) {
+ cmp = 1;
+ }
+ } else if (spec[j]._type == DOCID) {
+ if (hits[i]._docId < hits[i+1]._docId) {
+ cmp = -1;
+ } else if (hits[i]._docId > hits[i+1]._docId) {
+ cmp = 1;
+ }
+ } else {
+ AttributeVector *av = vec[spec[j]._name].get();
+ cmp = compare(av, spec[j]._type,
+ hits[i]._docId, hits[i+1]._docId);
+ }
+ if (spec[j]._asc) {
+ EXPECT_TRUE(cmp <= 0);
+ if (cmp < 0) {
+ break;
+ }
+ } else {
+ EXPECT_TRUE(cmp >= 0);
+ if (cmp > 0) {
+ break;
+ }
+ }
+ }
+ // check binary sort data
+ uint32_t minLen = std::min(sorter._sortDataArray[i]._len,
+ sorter._sortDataArray[i+1]._len);
+ int cmp = memcmp(&sorter._binarySortData[0] + sorter._sortDataArray[i]._idx,
+ &sorter._binarySortData[0] + sorter._sortDataArray[i+1]._idx,
+ minLen);
+ EXPECT_TRUE(cmp <= 0);
+ EXPECT_TRUE(sorter._sortDataArray[i]._len == (offsets[i+1] - offsets[i]));
+ cmp = memcmp(&sorter._binarySortData[0] + sorter._sortDataArray[i]._idx,
+ buf + offsets[i], sorter._sortDataArray[i]._len);
+ EXPECT_TRUE(cmp == 0);
+ }
+ EXPECT_TRUE(sorter._sortDataArray[num-1]._len == (offsets[num] - offsets[num-1]));
+ int cmp = memcmp(&sorter._binarySortData[0] + sorter._sortDataArray[num-1]._idx,
+ buf + offsets[num-1], sorter._sortDataArray[num-1]._len);
+ EXPECT_TRUE(cmp == 0);
+
+ delete [] hits;
+ delete [] offsets;
+ delete [] buf;
+}
+
+int
+MultilevelSortTest::Main()
+{
+ TEST_INIT("multilevelsort_test");
+
+ testSortMethod(0);
+ testSortMethod(1);
+ testSortMethod(2);
+
+ TEST_DONE();
+}
+
+void MultilevelSortTest::testSortMethod(int method)
+{
+ _sortMethod = method;
+ {
+ std::vector<Spec> spec;
+ spec.push_back(Spec("int8", INT8));
+ spec.push_back(Spec("int16", INT16));
+ spec.push_back(Spec("int32", INT32));
+ spec.push_back(Spec("int64", INT64));
+ spec.push_back(Spec("float", FLOAT));
+ spec.push_back(Spec("double", DOUBLE));
+ spec.push_back(Spec("string", STRING));
+ spec.push_back(Spec("rank", RANK));
+ spec.push_back(Spec("docid", DOCID));
+
+ std::vector<std::string> strValues;
+ strValues.push_back("applications");
+ strValues.push_back("places");
+ strValues.push_back("system");
+ strValues.push_back("vespa search core");
+
+ srand(12345);
+ sortAndCheck(spec, 5000, 4, strValues);
+ srand(time(NULL));
+ sortAndCheck(spec, 5000, 4, strValues);
+
+ strValues.push_back("multilevelsort");
+ strValues.push_back("trondheim");
+ strValues.push_back("ubuntu");
+ strValues.push_back("fastserver4");
+
+ srand(56789);
+ sortAndCheck(spec, 5000, 8, strValues);
+ srand(time(NULL));
+ sortAndCheck(spec, 5000, 8, strValues);
+ }
+ {
+ std::vector<std::string> none;
+ uint32_t num = 50;
+ sortAndCheck(std::vector<Spec>(1, Spec("int8", INT8, true)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("int16", INT16, true)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("int32", INT32, true)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("int64", INT64, true)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("float", FLOAT, true)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("double", DOUBLE, true)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("string", STRING, true)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("rank", RANK, true)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("docid", DOCID, true)), num, 0, none);
+
+ sortAndCheck(std::vector<Spec>(1, Spec("int8", INT8, false)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("int16", INT16, false)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("int32", INT32, false)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("int64", INT64, false)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("float", FLOAT, false)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("double", DOUBLE, false)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("string", STRING, false)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("rank", RANK, false)), num, 0, none);
+ sortAndCheck(std::vector<Spec>(1, Spec("docid", DOCID, false)), num, 0, none);
+ }
+
+}
+
+
+TEST_APPHOOK(MultilevelSortTest);
diff --git a/searchlib/src/tests/stackdumpiterator/.gitignore b/searchlib/src/tests/stackdumpiterator/.gitignore
new file mode 100644
index 00000000000..6f8239c6cc1
--- /dev/null
+++ b/searchlib/src/tests/stackdumpiterator/.gitignore
@@ -0,0 +1,7 @@
+*.core
+.depend
+Makefile
+core
+core.*
+stackdumpiteratortest
+searchlib_stackdumpiteratortest_app
diff --git a/searchlib/src/tests/stackdumpiterator/CMakeLists.txt b/searchlib/src/tests/stackdumpiterator/CMakeLists.txt
new file mode 100644
index 00000000000..dceeaad3f26
--- /dev/null
+++ b/searchlib/src/tests/stackdumpiterator/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_stackdumpiteratortest_app
+ SOURCES
+ stackdumpiteratortest.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_stackdumpiteratortest_app COMMAND searchlib_stackdumpiteratortest_app - *1000)
diff --git a/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.cpp b/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.cpp
new file mode 100644
index 00000000000..19ce69550f7
--- /dev/null
+++ b/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.cpp
@@ -0,0 +1,316 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/parsequery/simplequerystack.h>
+#include <vespa/searchlib/parsequery/stackdumpiterator.h>
+#include "stackdumpiteratortest.h"
+
+#define NUMTESTS 5
+
+int
+StackDumpIteratorTest::Main()
+{
+ int doTest[NUMTESTS];
+ int low, high, accnum, num;
+ int indicator;
+ bool verify = false;
+ int multiplier = 1;
+ bool failed = false;
+
+ if (_argc == 1) {
+ Usage(_argv[0]);
+ return 1;
+ }
+
+ // default initialize to not run any tests.
+ for (int n = 0; n < NUMTESTS; n++)
+ doTest[n] = 0;
+
+ // parse the command line arguments
+ for (int i = 1; i < _argc; i++) {
+ low = 0;
+ high = NUMTESTS - 1;
+ char *p = _argv[i];
+
+ // Check if a multiplier is specified
+ if (*p == '*') {
+ p++;
+ accnum = 0;
+ while (*p != '\0') {
+ num = *p - '0';
+ accnum = accnum * 10 + num;
+ p++;
+ }
+ multiplier = accnum;
+ continue;
+ }
+
+ // Default is to run the tests specified, unless the first char is '/'
+ indicator = 1;
+ if (*p == '/') {
+ p++;
+ indicator = 0;
+ }
+
+ // Find the first number
+ accnum = 0;
+ while (*p != '-' && *p != '\0') {
+ num = *p - '0';
+ accnum = accnum * 10 + num;
+ p++;
+ }
+ if (accnum >= NUMTESTS)
+ continue;
+ low = accnum;
+ // Check for range operator
+ if (*p == '-') {
+ p++;
+ // Find the second number
+ accnum = 0;
+ while (*p != '\0') {
+ num = *p - '0';
+ accnum = accnum * 10 + num;
+ p++;
+ }
+ if (accnum > 0)
+ high = accnum < NUMTESTS ? accnum : NUMTESTS-1;
+ } else
+ high = low;
+
+ // Indicate the runrequest for the desired range.
+ for (int j = low; j <= high; j++)
+ doTest[j] = indicator;
+ }
+
+ // Remove unused tests.
+ // doTest[1] = 0;
+
+ // Remember time
+ if (multiplier > 1) {
+ printf("Running all tests %d times.\n", multiplier);
+ verify = false;
+ } else {
+ verify = true;
+ }
+
+ int testCnt = 0;
+
+ FastOS_Time timer;
+ timer.SetNow();
+
+ // Actually run the tests that we wanted.
+ for (int j = 0; j < multiplier; j++)
+ for (int k = 0; k < NUMTESTS; k++)
+ if (doTest[k] == 1) {
+ if (!RunTest(k, verify))
+ failed = true;
+ testCnt++;
+ }
+
+ // Print time taken
+ double timeTaken = timer.MilliSecsToNow();
+
+ printf("Time taken : %f ms\n", timeTaken);
+ printf("Number of tests run: %d\n", testCnt);
+ double avgTestPrMSec = static_cast<double>(testCnt) / timeTaken;
+ printf("Tests pr Sec: %f\n", avgTestPrMSec * 1000.0);
+
+ return failed ? 1 : 0;
+}
+
+#define ITERATOR_NOERROR 0x0
+#define ITERATOR_ERROR_WRONG_NUM 0x1
+#define ITERATOR_ERROR_WRONG_TYPE 0x2
+#define ITERATOR_ERROR_WRONG_ARITY 0x4
+#define ITERATOR_ERROR_WRONG_INDEX 0x10
+#define ITERATOR_ERROR_WRONG_TERM 0x20
+#define ITERATOR_ERROR_WRONG_GETINDEX 0x40
+#define ITERATOR_ERROR_WRONG_GETTERM 0x80
+#define ITERATOR_ERROR_WRONG_SIZE 0x100
+
+bool
+StackDumpIteratorTest::ShowResult(int testNo,
+ search::SimpleQueryStackDumpIterator &actual,
+ search::SimpleQueryStack &correct,
+ unsigned int expected)
+{
+ unsigned int results = 0;
+ const char *idx_ptr;
+ const char *term_ptr;
+ size_t idx_len;
+ size_t term_len;
+
+ int num = 0;
+
+ search::ParseItem *item;
+
+ printf("%03d: ", testNo);
+
+ while (actual.next()) {
+ actual.getIndexName(&idx_ptr, &idx_len);
+ actual.getTerm(&term_ptr, &term_len);
+
+#if 0
+ printf("StackItem #%d: %d %d '%.*s:%.*s'\n",
+ actual.getNum(),
+ actual.getType(),
+ actual.getArity(),
+ idx_len, idx_ptr,
+ term_len, term_ptr);
+#endif
+
+ item = correct.Pop();
+
+ if (num++ != actual.getNum()) {
+ results |= ITERATOR_ERROR_WRONG_NUM;
+ delete item;
+ break;
+ }
+ if (item->Type() != actual.getType()) {
+ results |= ITERATOR_ERROR_WRONG_TYPE;
+ delete item;
+ break;
+ }
+ if (item->_arity != actual.getArity()) {
+ results |= ITERATOR_ERROR_WRONG_ARITY;
+ delete item;
+ break;
+ }
+ if (strncmp(item->_indexName.c_str(), idx_ptr, idx_len) != 0) {
+ results |= ITERATOR_ERROR_WRONG_INDEX;
+ delete item;
+ break;
+ }
+ if (strncmp(item->_term.c_str(), term_ptr, term_len) != 0) {
+ results |= ITERATOR_ERROR_WRONG_TERM;
+ delete item;
+ break;
+ }
+
+ delete item;
+ }
+ if (correct.GetSize() != 0) results |= ITERATOR_ERROR_WRONG_SIZE;
+
+ if (results == expected)
+ printf("ok\n");
+ else
+ printf("fail. exp: %x, result: %x\n", expected, results);
+
+ return results == expected;
+}
+
+/**
+ *
+ * @param testno The test to run.
+ * @param verify Verify the result of the test.
+ */
+bool
+StackDumpIteratorTest::RunTest(int testno, bool verify)
+{
+ search::SimpleQueryStack stack;
+ search::RawBuf buf(32768);
+
+ switch (testno) {
+ case 0:
+ {
+ // Simple term query
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar"));
+
+ stack.AppendBuffer(&buf);
+ search::SimpleQueryStackDumpIterator si(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+
+ if (verify)
+ return ShowResult(testno, si, stack, ITERATOR_NOERROR);
+ break;
+ }
+
+ case 1:
+ {
+ // multi term query
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo", "foobar"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo", "xyzzy"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "bar", "baz"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_OR, 2));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_AND, 2));
+
+ stack.AppendBuffer(&buf);
+ search::SimpleQueryStackDumpIterator si(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+
+ if (verify)
+ return ShowResult(testno, si, stack, ITERATOR_NOERROR);
+ break;
+ }
+
+ case 2:
+ {
+ // all stack items
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo", "foobar"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_NUMTERM, "foo", "[0;22]"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_PREFIXTERM, "bar", "baz"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_PHRASE, 3, "bar"));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_OR, 2));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_AND, 3));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_RANK, 5));
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_NOT, 3));
+
+ stack.AppendBuffer(&buf);
+ search::SimpleQueryStackDumpIterator si(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+
+ if (verify)
+ return ShowResult(testno, si, stack, ITERATOR_NOERROR);
+ break;
+ }
+
+ case 3:
+ {
+ // malicous type in buffer
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo", "foobar"));
+ stack.AppendBuffer(&buf);
+ *buf.GetWritableDrainPos(0) = 0x1e;
+ search::SimpleQueryStackDumpIterator si(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+ if (verify)
+ return ShowResult(testno, si, stack, ITERATOR_ERROR_WRONG_SIZE);
+ break;
+ }
+
+ case 4:
+ {
+ // malicous length in buffer
+ stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo", "foobar"));
+ stack.AppendBuffer(&buf);
+ *buf.GetWritableDrainPos(1) = 0xba;
+ search::SimpleQueryStackDumpIterator si(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+ if (verify)
+ return ShowResult(testno, si, stack, ITERATOR_ERROR_WRONG_SIZE);
+ break;
+ }
+
+
+ default:
+ {
+ printf("%03d: no such test\n", testno);
+ }
+ }
+
+ return true;
+}
+
+void
+StackDumpIteratorTest::Usage(char *progname)
+{
+ printf("%s {testnospec}+\n\
+ Where testnospec is:\n\
+ num: single test\n\
+ num-num: inclusive range (open range permitted)\n",progname);
+ printf("There are tests from %d to %d\n\n", 0, NUMTESTS-1);
+}
+
+int
+main(int argc, char** argv)
+{
+ StackDumpIteratorTest tester;
+ return tester.Entry(argc, argv);
+}
+
diff --git a/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.h b/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.h
new file mode 100644
index 00000000000..63d41005932
--- /dev/null
+++ b/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.h
@@ -0,0 +1,17 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/parsequery/stackdumpiterator.h>
+
+class StackDumpIteratorTest : public FastOS_Application
+{
+ int Main();
+ void Usage(char *progname);
+ bool ShowResult(int testNo, search::SimpleQueryStackDumpIterator &actual, search::SimpleQueryStack &correct, unsigned int expected);
+ bool RunTest(int i, bool verify);
+};
+
diff --git a/searchlib/src/tests/stackdumpiterator/testowner.ATS b/searchlib/src/tests/stackdumpiterator/testowner.ATS
new file mode 100644
index 00000000000..6d03b0836a4
--- /dev/null
+++ b/searchlib/src/tests/stackdumpiterator/testowner.ATS
@@ -0,0 +1 @@
+vlarsen
diff --git a/searchlib/src/tests/stringenum/.gitignore b/searchlib/src/tests/stringenum/.gitignore
new file mode 100644
index 00000000000..7a2f1dd659f
--- /dev/null
+++ b/searchlib/src/tests/stringenum/.gitignore
@@ -0,0 +1,8 @@
+*.core
+.depend
+Makefile
+core
+core.*
+stringenum
+tmp.enum
+searchlib_stringenum_test_app
diff --git a/searchlib/src/tests/stringenum/CMakeLists.txt b/searchlib/src/tests/stringenum/CMakeLists.txt
new file mode 100644
index 00000000000..b59e739be47
--- /dev/null
+++ b/searchlib/src/tests/stringenum/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_stringenum_test_app
+ SOURCES
+ stringenum_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_stringenum_test_app COMMAND searchlib_stringenum_test_app)
diff --git a/searchlib/src/tests/stringenum/stringenum_test.cpp b/searchlib/src/tests/stringenum/stringenum_test.cpp
new file mode 100644
index 00000000000..8c6ef64dbe3
--- /dev/null
+++ b/searchlib/src/tests/stringenum/stringenum_test.cpp
@@ -0,0 +1,147 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("stringenum");
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <vespa/searchlib/util/stringenum.h>
+
+
+#include <vespa/vespalib/testkit/testapp.h>
+
+using namespace vespalib;
+
+class MyApp : public vespalib::TestApp
+{
+public:
+ void CheckLookup( search::util::StringEnum *strEnum, const char *str, int value);
+ int Main();
+
+ MyApp(void) {}
+};
+
+
+void
+MyApp::CheckLookup( search::util::StringEnum *strEnum, const char *str, int value)
+{
+ EXPECT_EQUAL(0, strcmp(str, strEnum->Lookup(value)));
+ EXPECT_EQUAL(value, strEnum->Lookup(str));
+}
+
+
+int
+MyApp::Main()
+{
+ TEST_INIT("stringenum_test");
+
+ search::util::StringEnum enum1;
+ search::util::StringEnum enum2;
+
+ // check number of entries
+ EXPECT_EQUAL(enum1.GetNumEntries(), 0u);
+ EXPECT_EQUAL(enum2.GetNumEntries(), 0u);
+
+ // check add non-duplicates
+ EXPECT_EQUAL(enum1.Add("zero"), 0);
+ EXPECT_EQUAL(enum1.Add("one"), 1);
+ EXPECT_EQUAL(enum1.Add("two"), 2);
+ EXPECT_EQUAL(enum1.Add("three"), 3);
+ EXPECT_EQUAL(enum1.Add("four"), 4);
+ EXPECT_EQUAL(enum1.Add("five"), 5);
+ EXPECT_EQUAL(enum1.Add("six"), 6);
+ EXPECT_EQUAL(enum1.Add("seven"), 7);
+ EXPECT_EQUAL(enum1.Add("eight"), 8);
+ EXPECT_EQUAL(enum1.Add("nine"), 9);
+
+ // check add duplicates
+ EXPECT_EQUAL(enum1.Add("four"), 4);
+ EXPECT_EQUAL(enum1.Add("eight"), 8);
+ EXPECT_EQUAL(enum1.Add("six"), 6);
+ EXPECT_EQUAL(enum1.Add("seven"), 7);
+ EXPECT_EQUAL(enum1.Add("one"), 1);
+ EXPECT_EQUAL(enum1.Add("nine"), 9);
+ EXPECT_EQUAL(enum1.Add("five"), 5);
+ EXPECT_EQUAL(enum1.Add("zero"), 0);
+ EXPECT_EQUAL(enum1.Add("two"), 2);
+ EXPECT_EQUAL(enum1.Add("three"), 3);
+
+ // check add non-duplicate
+ EXPECT_EQUAL(enum1.Add("ten"), 10);
+
+ // check mapping and reverse mapping
+ EXPECT_EQUAL(enum1.GetNumEntries(), 11u);
+ TEST_DO(CheckLookup(&enum1, "zero", 0));
+ TEST_DO(CheckLookup(&enum1, "one", 1));
+ TEST_DO(CheckLookup(&enum1, "two", 2));
+ TEST_DO(CheckLookup(&enum1, "three", 3));
+ TEST_DO(CheckLookup(&enum1, "four", 4));
+ TEST_DO(CheckLookup(&enum1, "five", 5));
+ TEST_DO(CheckLookup(&enum1, "six", 6));
+ TEST_DO(CheckLookup(&enum1, "seven", 7));
+ TEST_DO(CheckLookup(&enum1, "eight", 8));
+ TEST_DO(CheckLookup(&enum1, "nine", 9));
+ TEST_DO(CheckLookup(&enum1, "ten", 10));
+
+ TEST_FLUSH();
+
+ // save/load
+ EXPECT_TRUE(enum1.Save("tmp.enum"));
+ EXPECT_TRUE(enum2.Load("tmp.enum"));
+
+ // check mapping and reverse mapping
+ EXPECT_EQUAL(enum2.GetNumEntries(), 11u);
+ TEST_DO(CheckLookup(&enum2, "zero", 0));
+ TEST_DO(CheckLookup(&enum2, "one", 1));
+ TEST_DO(CheckLookup(&enum2, "two", 2));
+ TEST_DO(CheckLookup(&enum2, "three", 3));
+ TEST_DO(CheckLookup(&enum2, "four", 4));
+ TEST_DO(CheckLookup(&enum2, "five", 5));
+ TEST_DO(CheckLookup(&enum2, "six", 6));
+ TEST_DO(CheckLookup(&enum2, "seven", 7));
+ TEST_DO(CheckLookup(&enum2, "eight", 8));
+ TEST_DO(CheckLookup(&enum2, "nine", 9));
+ TEST_DO(CheckLookup(&enum2, "ten", 10));
+
+ // add garbage
+ enum2.Add("sfsdffgdfh");
+ enum2.Add("sf24dfsgg3");
+ enum2.Add("sfwertfgdh");
+ enum2.Add("sfewrgtsfh");
+ enum2.Add("sfgdsdgdfh");
+
+ TEST_FLUSH();
+
+ // reload
+ EXPECT_TRUE(enum2.Load("tmp.enum"));
+
+ // check garbage lost
+ EXPECT_EQUAL(enum2.GetNumEntries(), 11u);
+ EXPECT_EQUAL(-1, enum2.Lookup("sfewrgtsfh"));
+ // check mapping and reverse mapping
+ TEST_DO(CheckLookup(&enum2, "zero", 0));
+ TEST_DO(CheckLookup(&enum2, "one", 1));
+ TEST_DO(CheckLookup(&enum2, "two", 2));
+ TEST_DO(CheckLookup(&enum2, "three", 3));
+ TEST_DO(CheckLookup(&enum2, "four", 4));
+ TEST_DO(CheckLookup(&enum2, "five", 5));
+ TEST_DO(CheckLookup(&enum2, "six", 6));
+ TEST_DO(CheckLookup(&enum2, "seven", 7));
+ TEST_DO(CheckLookup(&enum2, "eight", 8));
+ TEST_DO(CheckLookup(&enum2, "nine", 9));
+ TEST_DO(CheckLookup(&enum2, "ten", 10));
+
+ // clear
+ enum1.Clear();
+ enum2.Clear();
+
+ // check number of entries
+ EXPECT_EQUAL(enum1.GetNumEntries(), 0u);
+ EXPECT_EQUAL(enum2.GetNumEntries(), 0u);
+
+ TEST_DONE();
+}
+
+TEST_APPHOOK(MyApp);
diff --git a/searchlib/src/tests/transactionlog/.gitignore b/searchlib/src/tests/transactionlog/.gitignore
new file mode 100644
index 00000000000..a7bdcf0397d
--- /dev/null
+++ b/searchlib/src/tests/transactionlog/.gitignore
@@ -0,0 +1,7 @@
+.depend
+Makefile
+translogserver_test
+translogclient_test
+
+searchlib_translogclient_test_app
+searchlib_translogserver_test_app
diff --git a/searchlib/src/tests/transactionlog/CMakeLists.txt b/searchlib/src/tests/transactionlog/CMakeLists.txt
new file mode 100644
index 00000000000..545c81ba45f
--- /dev/null
+++ b/searchlib/src/tests/transactionlog/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_translogserver_test_app
+ SOURCES
+ translogserver_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_translogserver_test_app COMMAND searchlib_translogserver_test_app)
+vespa_add_executable(searchlib_translogclient_test_app
+ SOURCES
+ translogclient_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_translogclient_test_app COMMAND sh translogclient_test.sh)
diff --git a/searchlib/src/tests/transactionlog/DESC b/searchlib/src/tests/transactionlog/DESC
new file mode 100644
index 00000000000..db53d59fb6c
--- /dev/null
+++ b/searchlib/src/tests/transactionlog/DESC
@@ -0,0 +1 @@
+Thios is a test of the tls/tlc interface.
diff --git a/searchlib/src/tests/transactionlog/FILES b/searchlib/src/tests/transactionlog/FILES
new file mode 100644
index 00000000000..babcf181807
--- /dev/null
+++ b/searchlib/src/tests/transactionlog/FILES
@@ -0,0 +1,2 @@
+translogclient.cpp
+translogserver.cpp
diff --git a/searchlib/src/tests/transactionlog/translogclient_test.cpp b/searchlib/src/tests/transactionlog/translogclient_test.cpp
new file mode 100644
index 00000000000..775654d23fc
--- /dev/null
+++ b/searchlib/src/tests/transactionlog/translogclient_test.cpp
@@ -0,0 +1,926 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/transactionlog/translogclient.h>
+#include <vespa/searchlib/transactionlog/translogserver.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/objects/identifiable.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/log/log.h>
+#include <map>
+LOG_SETUP("translogclient_test");
+
+using namespace search;
+using namespace transactionlog;
+using namespace document;
+using namespace vespalib;
+using search::index::DummyFileHeaderContext;
+
+vespalib::string myhex(const void * b, size_t sz)
+{
+ static const char * hextab="0123456789ABCDEF";
+ const unsigned char * c = static_cast<const unsigned char *>(b);
+ vespalib::string s;
+ s.reserve(sz*2);
+ for (size_t i=0; i < sz; i++) {
+ s += hextab[c[i] >> 4];
+ s += hextab[c[i] & 0x0f];
+ }
+ return s;
+}
+
+class Test : public vespalib::TestApp
+{
+public:
+ int Main();
+private:
+ bool createDomainTest(TransLogClient & tls, const vespalib::string & name, size_t preExistingDomains=0);
+ TransLogClient::Session::UP openDomainTest(TransLogClient & tls, const vespalib::string & name);
+ bool fillDomainTest(TransLogClient::Session * s1, const vespalib::string & name);
+ void fillDomainTest(TransLogClient::Session * s1, size_t numPackets, size_t numEntries);
+ void fillDomainTest(TransLogClient::Session * s1, size_t numPackets, size_t numEntries, size_t entrySize);
+ uint32_t countFiles(const vespalib::string &dir);
+ void checkFilledDomainTest(const TransLogClient::Session::UP &s1, size_t numEntries);
+ bool visitDomainTest(TransLogClient & tls, TransLogClient::Session * s1, const vespalib::string & name);
+ bool subscribeDomainTest(TransLogClient & tls, const vespalib::string & name);
+ bool partialUpdateTest();
+ bool test1();
+ bool testRemove();
+ void createAndFillDomain(const vespalib::string & name, DomainPart::Crc crcMethod, size_t preExistingDomains);
+ void verifyDomain(const vespalib::string & name);
+ void testCrcVersions();
+ bool test2();
+ void testMany();
+ void testErase();
+ void testSync();
+ void testTruncateOnShortRead();
+ void testTruncateOnVersionMismatch();
+};
+
+TEST_APPHOOK(Test);
+
+class CallBackTest : public TransLogClient::Subscriber::Callback
+{
+private:
+ virtual RPC::Result receive(const Packet & packet);
+ virtual void inSync() { _inSync = true; }
+ virtual void eof() { _eof = true; }
+ typedef std::map<SerialNum, ByteBuffer> PacketMap;
+ PacketMap _packetMap;
+public:
+ CallBackTest() : _inSync(false), _eof(false) { }
+ size_t size() const { return _packetMap.size(); }
+ bool hasSerial(SerialNum n) const { return (_packetMap.find(n) != _packetMap.end()); }
+ void clear() { _inSync = false; _eof = false; _packetMap.clear(); }
+ const ByteBuffer & packet(SerialNum n) { return (_packetMap.find(n)->second); }
+
+ bool _inSync;
+ bool _eof;
+};
+
+RPC::Result CallBackTest::receive(const Packet & p)
+{
+ vespalib::nbostream h(p.getHandle().c_str(), p.getHandle().size(), true);
+ LOG(info,"CallBackTest::receive (%zu, %zu, %zu)(%s)", h.rp(), h.size(), h.capacity(), myhex(h.peek(), h.size()).c_str());
+ while(h.size() > 0) {
+ Packet::Entry e;
+ e.deserialize(h);
+ LOG(info,"CallBackTest::receive (%zu, %zu, %zu)(%s)", h.rp(), h.size(), h.capacity(), myhex(e.data().c_str(), e.data().size()).c_str());
+ _packetMap[e.serial()] = ByteBuffer(e.data().c_str(), e.data().size());
+ }
+ return RPC::OK;
+}
+
+class CallBackManyTest : public TransLogClient::Subscriber::Callback
+{
+private:
+ virtual RPC::Result receive(const Packet & packet);
+ virtual void inSync() { _inSync = true; }
+ virtual void eof() { _eof = true; }
+public:
+ CallBackManyTest(size_t start) : _inSync(false), _eof(false), _count(start), _value(start) { }
+ void clear() { _inSync = false; _eof = false; _count = 0; _value = 0; }
+ bool _inSync;
+ bool _eof;
+ size_t _count;
+ size_t _value;
+};
+
+RPC::Result CallBackManyTest::receive(const Packet & p)
+{
+ nbostream h(p.getHandle().c_str(), p.getHandle().size(), true);
+ for(;h.size() > 0; _count++, _value++) {
+ Packet::Entry e;
+ e.deserialize(h);
+ assert(e.data().size() == 8);
+ size_t v = *(const size_t*) e.data().c_str();
+ assert(_count+1 == e.serial());
+ assert(v == _value);
+ (void) v;
+ }
+ return RPC::OK;
+}
+
+class CallBackUpdate : public TransLogClient::Subscriber::Callback
+{
+public:
+ typedef std::map<SerialNum, Identifiable *> PacketMap;
+private:
+ virtual RPC::Result receive(const Packet & packet);
+ virtual void inSync() { _inSync = true; }
+ virtual void eof() { _eof = true; }
+ PacketMap _packetMap;
+public:
+ CallBackUpdate() : _inSync(false), _eof(false) { }
+ virtual ~CallBackUpdate() { while (_packetMap.begin() != _packetMap.end()) { delete _packetMap.begin()->second; _packetMap.erase(_packetMap.begin()); } }
+ bool hasSerial(SerialNum n) const { return (_packetMap.find(n) != _packetMap.end()); }
+ const PacketMap & map() const { return _packetMap; }
+ bool _inSync;
+ bool _eof;
+};
+
+
+RPC::Result CallBackUpdate::receive(const Packet & packet)
+{
+ nbostream h(packet.getHandle().c_str(), packet.getHandle().size(), true);
+ while (h.size() > 0) {
+ Packet::Entry e;
+ e.deserialize(h);
+ const vespalib::Identifiable::RuntimeClass * cl(vespalib::Identifiable::classFromId(e.type()));
+ if (cl) {
+ vespalib::Identifiable * obj(cl->create());
+ if (obj->inherits(Identifiable::classId)) {
+ Identifiable * ser = static_cast<Identifiable *>(obj);
+ nbostream is(e.data().c_str(), e.data().size());
+ try {
+ is >> *ser;
+ } catch (std::exception & ex) {
+ LOG(warning, "Failed deserializing (%" PRId64 ", %s) bb(%zu, %zu, %zu)=%s what=%s", e.serial(), cl->name(), is.rp(), is.size(), is.capacity(), myhex(is.peek(), is.size()).c_str(), ex.what());
+ assert(false);
+ return RPC::ERROR;
+ }
+ assert(is.state() == nbostream::ok);
+ assert(is.size() == 0);
+ _packetMap[e.serial()] = ser;
+ } else {
+ LOG(warning, "Packet::Entry(%" PRId64 ", %s) is not a Identifiable", e.serial(), cl->name());
+ }
+ } else {
+ LOG(warning, "Packet::Entry(%" PRId64 ", %d) is not recognized by vespalib::Identifiable", e.serial(), e.type());
+ }
+ }
+ return RPC::OK;
+}
+
+class CallBackStatsTest : public TransLogClient::Session::Callback
+{
+private:
+ virtual RPC::Result receive(const Packet & packet);
+ virtual void inSync() { _inSync = true; }
+ virtual void eof() { _eof = true; }
+public:
+ CallBackStatsTest() : _inSync(false), _eof(false),
+ _count(0), _inOrder(0),
+ _firstSerial(0), _lastSerial(0),
+ _prevSerial(0) { }
+ void clear() { _inSync = false; _eof = false; _count = 0; _inOrder = 0;
+ _firstSerial = 0; _lastSerial = 0; _inOrder = 0; }
+ bool _inSync;
+ bool _eof;
+ uint64_t _count;
+ uint64_t _inOrder; // increase when next entry is one above previous
+ SerialNum _firstSerial;
+ SerialNum _lastSerial;
+ SerialNum _prevSerial;
+};
+
+RPC::Result CallBackStatsTest::receive(const Packet & p)
+{
+ nbostream h(p.getHandle().c_str(), p.getHandle().size(), true);
+ for(;h.size() > 0; ++_count) {
+ Packet::Entry e;
+ e.deserialize(h);
+ SerialNum s = e.serial();
+ if (_count == 0) {
+ _firstSerial = s;
+ _lastSerial = s;
+ }
+ if (s == _prevSerial + 1) {
+ ++_inOrder;
+ }
+ _prevSerial = s;
+ if (_firstSerial > s) {
+ _firstSerial = s;
+ }
+ if (_lastSerial < s) {
+ _lastSerial = s;
+ }
+ }
+ return RPC::OK;
+}
+
+#define CID_TestIdentifiable 0x5762314
+
+class TestIdentifiable : public Identifiable
+{
+public:
+ DECLARE_IDENTIFIABLE(TestIdentifiable);
+ TestIdentifiable() { }
+};
+
+IMPLEMENT_IDENTIFIABLE(TestIdentifiable, Identifiable);
+
+bool Test::partialUpdateTest()
+{
+ bool retval(false);
+ DummyFileHeaderContext fileHeaderContext;
+ TransLogServer tlss("test7", 18377, ".", fileHeaderContext, 0x10000);
+ TransLogClient tls("tcp/localhost:18377");
+
+ TransLogClient::Session::UP s1 = openDomainTest(tls, "test1");
+ TransLogClient::Session & session = *s1;
+
+ TestIdentifiable du;
+
+ nbostream os;
+ os << du;
+
+ vespalib::ConstBufferRef bb(os.c_str(), os.size());
+ LOG(info, "DU : %s", myhex(bb.c_str(), bb.size()).c_str());
+ Packet::Entry e(7, du.getClass().id(), bb);
+ Packet pa;
+ pa.add(e);
+ pa.close();
+ ASSERT_TRUE(session.commit(vespalib::ConstBufferRef(pa.getHandle().c_str(), pa.getHandle().size())));
+
+ CallBackUpdate ca;
+ TransLogClient::Visitor::UP visitor = tls.createVisitor("test1", ca);
+ ASSERT_TRUE(visitor.get());
+ ASSERT_TRUE( visitor->visit(5, 7) );
+ for (size_t i(0); ! ca._eof && (i < 1000); i++ ) { FastOS_Thread::Sleep(10); }
+ ASSERT_TRUE( ! ca._inSync );
+ ASSERT_TRUE( ca._eof );
+ ASSERT_TRUE( ca.map().size() == 1);
+ ASSERT_TRUE( ca.hasSerial(7) );
+
+ CallBackUpdate ca1;
+ TransLogClient::Visitor::UP visitor1 = tls.createVisitor("test1", ca1);
+ ASSERT_TRUE(visitor1.get());
+ ASSERT_TRUE( visitor1->visit(4, 5) );
+ for (size_t i(0); ! ca1._eof && (i < 1000); i++ ) { FastOS_Thread::Sleep(10); }
+ ASSERT_TRUE( ! ca1._inSync );
+ ASSERT_TRUE( ca1._eof );
+ ASSERT_TRUE( ca1.map().size() == 0);
+
+ CallBackUpdate ca2;
+ TransLogClient::Visitor::UP visitor2 = tls.createVisitor("test1", ca2);
+ ASSERT_TRUE(visitor2.get());
+ ASSERT_TRUE( visitor2->visit(5, 6) );
+ for (size_t i(0); ! ca2._eof && (i < 1000); i++ ) { FastOS_Thread::Sleep(10); }
+ ASSERT_TRUE( ! ca2._inSync );
+ ASSERT_TRUE( ca2._eof );
+ ASSERT_TRUE( ca2.map().size() == 0);
+
+ CallBackUpdate ca3;
+ TransLogClient::Visitor::UP visitor3 = tls.createVisitor("test1", ca3);
+ ASSERT_TRUE(visitor3.get());
+ ASSERT_TRUE( visitor3->visit(5, 1000) );
+ for (size_t i(0); ! ca3._eof && (i < 1000); i++ ) { FastOS_Thread::Sleep(10); }
+ ASSERT_TRUE( ! ca3._inSync );
+ ASSERT_TRUE( ca3._eof );
+ ASSERT_TRUE( ca3.map().size() == 1);
+ ASSERT_TRUE( ca3.hasSerial(7) );
+
+ return retval;
+}
+
+bool Test::createDomainTest(TransLogClient & tls, const vespalib::string & name, size_t preExistingDomains)
+{
+ bool retval(true);
+ std::vector<vespalib::string> dir;
+ tls.listDomains(dir);
+ EXPECT_EQUAL (dir.size(), preExistingDomains);
+ TransLogClient::Session::UP s1 = tls.open(name);
+ ASSERT_TRUE (s1.get() == NULL);
+ retval = tls.create(name);
+ ASSERT_TRUE (retval);
+ dir.clear();
+ tls.listDomains(dir);
+ EXPECT_EQUAL (dir.size(), preExistingDomains+1);
+// ASSERT_TRUE (dir[0] == name);
+ return retval;
+}
+
+TransLogClient::Session::UP Test::openDomainTest(TransLogClient & tls, const vespalib::string & name)
+{
+ TransLogClient::Session::UP s1 = tls.open(name);
+ ASSERT_TRUE (s1.get() != NULL);
+ return s1;
+}
+
+bool Test::fillDomainTest(TransLogClient::Session * s1, const vespalib::string & name)
+{
+ bool retval(true);
+ Packet::Entry e1(1, 1, vespalib::ConstBufferRef("Content in buffer A", 20));
+ Packet::Entry e2(2, 2, vespalib::ConstBufferRef("Content in buffer B", 20));
+ Packet::Entry e3(3, 1, vespalib::ConstBufferRef("Content in buffer C", 20));
+
+ Packet a;
+ ASSERT_TRUE (a.add(e1));
+ Packet b;
+ ASSERT_TRUE (b.add(e2));
+ ASSERT_TRUE (b.add(e3));
+ ASSERT_TRUE (!b.add(e1));
+ a.close();
+ b.close();
+ ASSERT_TRUE (s1->commit(vespalib::ConstBufferRef(a.getHandle().c_str(), a.getHandle().size())));
+ ASSERT_TRUE (s1->commit(vespalib::ConstBufferRef(b.getHandle().c_str(), b.getHandle().size())));
+ try {
+ s1->commit(vespalib::ConstBufferRef(a.getHandle().c_str(), a.getHandle().size()));
+ ASSERT_TRUE(false);
+ } catch (const std::exception & e) {
+ EXPECT_EQUAL(vespalib::string("commit failed with code -2. server says: Exception during commit on " + name + " : Incomming serial number(1) must be bigger than the last one (3)."), e.what());
+ }
+ EXPECT_EQUAL(a.size(), 1u);
+ EXPECT_EQUAL(a.range().from(), 1u);
+ EXPECT_EQUAL(a.range().to(), 1u);
+ EXPECT_EQUAL(b.size(), 2u);
+ EXPECT_EQUAL(b.range().from(), 2u);
+ EXPECT_EQUAL(b.range().to(), 3u);
+ EXPECT_TRUE(a.merge(b));
+ EXPECT_EQUAL(a.size(), 3u);
+ EXPECT_EQUAL(a.range().from(), 1u);
+ EXPECT_EQUAL(a.range().to(), 3u);
+
+ Packet::Entry e;
+ vespalib::nbostream h(a.getHandle().c_str(), a.getHandle().size());
+ e.deserialize(h);
+ e.deserialize(h);
+ e.deserialize(h);
+ EXPECT_EQUAL(h.size(), 0u);
+
+ return retval;
+}
+
+void Test::fillDomainTest(TransLogClient::Session * s1, size_t numPackets, size_t numEntries)
+{
+ size_t value(0);
+ for(size_t i=0; i < numPackets; i++) {
+ std::unique_ptr<Packet> p(new Packet());
+ for(size_t j=0; j < numEntries; j++, value++) {
+ Packet::Entry e(value+1, j+1, vespalib::ConstBufferRef((const char *)&value, sizeof(value)));
+ if ( ! p->add(e) ) {
+ p->close();
+ ASSERT_TRUE(s1->commit(vespalib::ConstBufferRef(p->getHandle().c_str(), p->getHandle().size())));
+ p.reset(new Packet());
+ ASSERT_TRUE(p->add(e));
+ }
+ }
+ p->close();
+ ASSERT_TRUE(s1->commit(vespalib::ConstBufferRef(p->getHandle().c_str(), p->getHandle().size())));
+ }
+}
+
+
+void
+Test::fillDomainTest(TransLogClient::Session * s1,
+ size_t numPackets, size_t numEntries,
+ size_t entrySize)
+{
+ size_t value(0);
+ std::vector<char> entryBuffer(entrySize);
+ for(size_t i=0; i < numPackets; i++) {
+ std::unique_ptr<Packet> p(new Packet());
+ for(size_t j=0; j < numEntries; j++, value++) {
+ Packet::Entry e(value+1, j+1, vespalib::ConstBufferRef((const char *)&entryBuffer[0], entryBuffer.size()));
+ if ( ! p->add(e) ) {
+ p->close();
+ ASSERT_TRUE(s1->commit(vespalib::ConstBufferRef(p->getHandle().c_str(), p->getHandle().size())));
+ p.reset(new Packet());
+ ASSERT_TRUE(p->add(e));
+ }
+ }
+ p->close();
+ ASSERT_TRUE(s1->commit(vespalib::ConstBufferRef(p->getHandle().c_str(), p->getHandle().size())));
+ }
+}
+
+
+uint32_t
+Test::countFiles(const vespalib::string &dir)
+{
+ uint32_t res = 0;
+ FastOS_DirectoryScan dirScan(dir.c_str());
+ while (dirScan.ReadNext()) {
+ const char *ename = dirScan.GetName();
+ if (strcmp(ename, ".") == 0 ||
+ strcmp(ename, "..") == 0)
+ continue;
+ ++res;
+ }
+ return res;
+}
+
+
+void
+Test::checkFilledDomainTest(const TransLogClient::Session::UP &s1,
+ size_t numEntries)
+{
+ SerialNum b(0), e(0);
+ size_t c(0);
+ EXPECT_TRUE(s1->status(b, e, c));
+ EXPECT_EQUAL(b, 1u);
+ EXPECT_EQUAL(e, numEntries);
+ EXPECT_EQUAL(c, numEntries);
+}
+
+
+bool Test::visitDomainTest(TransLogClient & tls, TransLogClient::Session * s1, const vespalib::string & name)
+{
+ bool retval(true);
+
+ SerialNum b(0), e(0);
+ size_t c(0);
+ EXPECT_TRUE(s1->status(b, e, c));
+ EXPECT_EQUAL(b, 1u);
+ EXPECT_EQUAL(e, 3u);
+ EXPECT_EQUAL(c, 3u);
+
+ CallBackTest ca;
+ TransLogClient::Visitor::UP visitor = tls.createVisitor(name, ca);
+ ASSERT_TRUE(visitor.get());
+ EXPECT_TRUE( visitor->visit(0, 1) );
+ for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); }
+ EXPECT_TRUE( ! ca._inSync );
+ EXPECT_TRUE( ca._eof );
+ EXPECT_TRUE( ! ca.hasSerial(0) );
+ EXPECT_TRUE( ca.hasSerial(1) );
+ EXPECT_TRUE( ! ca.hasSerial(2) );
+ ca.clear();
+
+ visitor = tls.createVisitor(name, ca);
+ ASSERT_TRUE(visitor.get());
+ EXPECT_TRUE( visitor->visit(1, 2) );
+ for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); }
+ EXPECT_TRUE( ! ca._inSync );
+ EXPECT_TRUE( ca._eof );
+ EXPECT_TRUE( ! ca.hasSerial(0) );
+ EXPECT_TRUE( ! ca.hasSerial(1) );
+ EXPECT_TRUE( ca.hasSerial(2) );
+ EXPECT_TRUE( ! ca.hasSerial(3) );
+ ca.clear();
+
+ visitor = tls.createVisitor(name, ca);
+ EXPECT_TRUE(visitor.get());
+ EXPECT_TRUE( visitor->visit(0, 3) );
+ for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); }
+ EXPECT_TRUE( ! ca._inSync );
+ EXPECT_TRUE( ca._eof );
+ EXPECT_TRUE( ! ca.hasSerial(0) );
+ EXPECT_TRUE( ca.hasSerial(1) );
+ EXPECT_TRUE( ca.hasSerial(2) );
+ EXPECT_TRUE( ca.hasSerial(3) );
+ ca.clear();
+
+ visitor = tls.createVisitor(name, ca);
+ ASSERT_TRUE(visitor.get());
+ EXPECT_TRUE( visitor->visit(2, 3) );
+ for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); }
+ EXPECT_TRUE( ! ca._inSync );
+ EXPECT_TRUE( ca._eof );
+ EXPECT_TRUE( ! ca.hasSerial(0) );
+ EXPECT_TRUE( !ca.hasSerial(1) );
+ EXPECT_TRUE( !ca.hasSerial(2) );
+ EXPECT_TRUE( ca.hasSerial(3) );
+ ca.clear();
+
+ return retval;
+}
+
+bool Test::subscribeDomainTest(TransLogClient & tls, const vespalib::string & name)
+{
+ bool retval(true);
+ CallBackTest ca;
+ TransLogClient::Subscriber::UP subscriber = tls.createSubscriber(name, ca);
+ ASSERT_TRUE(subscriber.get());
+ ASSERT_TRUE( subscriber->subscribe(0) );
+ for (size_t i(0); ! ca._inSync && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); }
+ ASSERT_TRUE( ca._inSync );
+ ASSERT_TRUE( ! ca.hasSerial(0) );
+ ASSERT_TRUE( ! ca._eof );
+ ASSERT_TRUE( ca.hasSerial(1) );
+ ASSERT_TRUE( ca.hasSerial(2) );
+ ASSERT_TRUE( ca.hasSerial(3) );
+ return retval;
+}
+
+bool Test::test1()
+{
+ DummyFileHeaderContext fileHeaderContext;
+ TransLogServer tlss("test7", 18377, ".", fileHeaderContext, 0x10000);
+ TransLogClient tls("tcp/localhost:18377");
+
+ vespalib::string name("test1");
+ createDomainTest(tls, name);
+ TransLogClient::Session::UP s1 = openDomainTest(tls, name);
+ fillDomainTest(s1.get(), name);
+ visitDomainTest(tls, s1.get(), name);
+ subscribeDomainTest(tls, name);
+ return true;
+}
+
+void Test::createAndFillDomain(const vespalib::string & name, DomainPart::Crc crcMethod, size_t preExistingDomains)
+{
+ DummyFileHeaderContext fileHeaderContext;
+ TransLogServer tlss("test13", 18377, ".", fileHeaderContext, 0x10000, false, 4, crcMethod);
+ TransLogClient tls("tcp/localhost:18377");
+
+ createDomainTest(tls, name, preExistingDomains);
+ TransLogClient::Session::UP s1 = openDomainTest(tls, name);
+ fillDomainTest(s1.get(), name);
+}
+
+void Test::verifyDomain(const vespalib::string & name)
+{
+ DummyFileHeaderContext fileHeaderContext;
+ TransLogServer tlss("test13", 18377, ".", fileHeaderContext, 0x10000);
+ TransLogClient tls("tcp/localhost:18377");
+ TransLogClient::Session::UP s1 = openDomainTest(tls, name);
+ visitDomainTest(tls, s1.get(), name);
+}
+
+void Test::testCrcVersions()
+{
+ createAndFillDomain("ccitt_crc32", DomainPart::ccitt_crc32, 0);
+ createAndFillDomain("xxh64", DomainPart::xxh64, 1);
+
+ verifyDomain("ccitt_crc32");
+ verifyDomain("xxh64");
+}
+
+bool Test::testRemove()
+{
+ DummyFileHeaderContext fileHeaderContext;
+ TransLogServer tlss("testremove", 18377, ".", fileHeaderContext, 0x10000);
+ TransLogClient tls("tcp/localhost:18377");
+
+ vespalib::string name("test-delete");
+ createDomainTest(tls, name);
+ TransLogClient::Session::UP s1 = openDomainTest(tls, name);
+ fillDomainTest(s1.get(), name);
+ visitDomainTest(tls, s1.get(), name);
+ subscribeDomainTest(tls, name);
+ ASSERT_TRUE(tls.remove(name));
+
+ return true;
+}
+
+bool Test::test2()
+{
+ DummyFileHeaderContext fileHeaderContext;
+ TransLogServer tlss("test7", 18377, ".", fileHeaderContext, 0x10000);
+ TransLogClient tls("tcp/localhost:18377");
+
+ vespalib::string name("test1");
+ TransLogClient::Session::UP s1 = openDomainTest(tls, name);
+ visitDomainTest(tls, s1.get(), name);
+ subscribeDomainTest(tls, name);
+ return true;
+}
+
+namespace {
+
+void
+assertVisitStats(TransLogClient &tls, const vespalib::string &domain,
+ SerialNum visitStart, SerialNum visitEnd,
+ SerialNum expFirstSerial, SerialNum expLastSerial,
+ uint64_t expCount, uint64_t expInOrder)
+{
+ CallBackStatsTest ca;
+ TransLogClient::Visitor::UP visitor = tls.createVisitor(domain, ca);
+ ASSERT_TRUE(visitor.get());
+ ASSERT_TRUE( visitor->visit(visitStart, visitEnd) );
+ for (size_t i(0); ! ca._eof && (i < 60000); i++ ) {
+ FastOS_Thread::Sleep(10);
+ }
+ ASSERT_TRUE(!ca._inSync);
+ ASSERT_TRUE(ca._eof);
+ EXPECT_EQUAL(expFirstSerial, ca._firstSerial);
+ EXPECT_EQUAL(expLastSerial, ca._lastSerial);
+ EXPECT_EQUAL(expCount, ca._count);
+ EXPECT_EQUAL(expInOrder, ca._inOrder);
+}
+
+void
+assertStatus(TransLogClient::Session &s,
+ SerialNum expFirstSerial, SerialNum expLastSerial,
+ uint64_t expCount)
+{
+ SerialNum b(0), e(0);
+ size_t c(0);
+ EXPECT_TRUE(s.status(b, e, c));
+ EXPECT_EQUAL(expFirstSerial, b);
+ EXPECT_EQUAL(expLastSerial, e);
+ EXPECT_EQUAL(expCount, c);
+}
+
+}
+
+
+void Test::testMany()
+{
+ const unsigned int NUM_PACKETS = 1000;
+ const unsigned int NUM_ENTRIES = 100;
+ const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES;
+ {
+ DummyFileHeaderContext fileHeaderContext;
+ TransLogServer tlss("test8", 18377, ".", fileHeaderContext, 0x80000);
+ TransLogClient tls("tcp/localhost:18377");
+
+ createDomainTest(tls, "many", 0);
+ TransLogClient::Session::UP s1 = openDomainTest(tls, "many");
+ fillDomainTest(s1.get(), NUM_PACKETS, NUM_ENTRIES);
+ SerialNum b(0), e(0);
+ size_t c(0);
+ EXPECT_TRUE(s1->status(b, e, c));
+ EXPECT_EQUAL(b, 1u);
+ EXPECT_EQUAL(e, TOTAL_NUM_ENTRIES);
+ EXPECT_EQUAL(c, TOTAL_NUM_ENTRIES);
+ CallBackManyTest ca(2);
+ TransLogClient::Visitor::UP visitor = tls.createVisitor("many", ca);
+ ASSERT_TRUE(visitor.get());
+ ASSERT_TRUE( visitor->visit(2, TOTAL_NUM_ENTRIES) );
+ for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); }
+ ASSERT_TRUE( ! ca._inSync );
+ ASSERT_TRUE( ca._eof );
+ EXPECT_EQUAL(ca._count, TOTAL_NUM_ENTRIES);
+ EXPECT_EQUAL(ca._value, TOTAL_NUM_ENTRIES);
+ }
+ {
+ DummyFileHeaderContext fileHeaderContext;
+ TransLogServer tlss("test8", 18377, ".", fileHeaderContext, 0x1000000);
+ TransLogClient tls("tcp/localhost:18377");
+
+ TransLogClient::Session::UP s1 = openDomainTest(tls, "many");
+ SerialNum b(0), e(0);
+ size_t c(0);
+ EXPECT_TRUE(s1->status(b, e, c));
+ EXPECT_EQUAL(b, 1u);
+ EXPECT_EQUAL(e, TOTAL_NUM_ENTRIES);
+ EXPECT_EQUAL(c, TOTAL_NUM_ENTRIES);
+ CallBackManyTest ca(2);
+ TransLogClient::Visitor::UP visitor = tls.createVisitor("many", ca);
+ ASSERT_TRUE(visitor.get());
+ ASSERT_TRUE( visitor->visit(2, TOTAL_NUM_ENTRIES) );
+ for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); }
+ ASSERT_TRUE( ! ca._inSync );
+ ASSERT_TRUE( ca._eof );
+ EXPECT_EQUAL(ca._count, TOTAL_NUM_ENTRIES);
+ EXPECT_EQUAL(ca._value, TOTAL_NUM_ENTRIES);
+ }
+}
+
+void Test::testErase()
+{
+ const unsigned int NUM_PACKETS = 1000;
+ const unsigned int NUM_ENTRIES = 100;
+ const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES;
+ {
+ DummyFileHeaderContext fileHeaderContext;
+ TransLogServer tlss("test12", 18377, ".", fileHeaderContext, 0x80000);
+ TransLogClient tls("tcp/localhost:18377");
+
+ createDomainTest(tls, "erase", 0);
+ TransLogClient::Session::UP s1 = openDomainTest(tls, "erase");
+ fillDomainTest(s1.get(), NUM_PACKETS, NUM_ENTRIES);
+ }
+ {
+ DummyFileHeaderContext fileHeaderContext;
+ TransLogServer tlss("test12", 18377, ".", fileHeaderContext, 0x1000000);
+ TransLogClient tls("tcp/localhost:18377");
+
+ TransLogClient::Session::UP s1 = openDomainTest(tls, "erase");
+
+ // Before erase
+ TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES,
+ 3, TOTAL_NUM_ENTRIES,
+ TOTAL_NUM_ENTRIES -2, TOTAL_NUM_ENTRIES - 3));
+ DomainStats domainStats = tlss.getDomainStats();
+ DomainInfo domainInfo = domainStats["erase"];
+ size_t numParts = domainInfo.parts.size();
+ LOG(info, "%zu parts", numParts);
+ for (uint32_t partId = 0; partId < numParts; ++partId) {
+ const PartInfo &part = domainInfo.parts[partId];
+ LOG(info,
+ "part %u from %" PRIu64 " to %" PRIu64 ", "
+ "count %zu, numBytes %zu",
+ partId,
+ (uint64_t) part.range.from(), (uint64_t) part.range.to(),
+ part.count, part.byteSize);
+ }
+ ASSERT_LESS_EQUAL(2u, numParts);
+ // Erase everything before second to last domainpart file
+ SerialNum eraseSerial = domainInfo.parts[numParts - 2].range.from();
+ s1->erase(eraseSerial);
+ TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES,
+ eraseSerial, TOTAL_NUM_ENTRIES,
+ TOTAL_NUM_ENTRIES + 1 - eraseSerial,
+ TOTAL_NUM_ENTRIES - eraseSerial));
+ TEST_DO(assertStatus(*s1, eraseSerial, TOTAL_NUM_ENTRIES,
+ domainInfo.parts[numParts - 2].count +
+ domainInfo.parts[numParts - 1].count));
+ // No apparent effect of erasing just first entry in 2nd to last part
+ s1->erase(eraseSerial + 1);
+ TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES,
+ eraseSerial, TOTAL_NUM_ENTRIES,
+ TOTAL_NUM_ENTRIES + 1 - eraseSerial,
+ TOTAL_NUM_ENTRIES - eraseSerial));
+ TEST_DO(assertStatus(*s1, eraseSerial + 1, TOTAL_NUM_ENTRIES,
+ domainInfo.parts[numParts - 2].count +
+ domainInfo.parts[numParts - 1].count));
+ // No apparent effect of erasing almost all of 2nd to last part
+ SerialNum eraseSerial2 = domainInfo.parts[numParts - 2].range.to();
+ s1->erase(eraseSerial2);
+ TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES,
+ eraseSerial, TOTAL_NUM_ENTRIES,
+ TOTAL_NUM_ENTRIES + 1 - eraseSerial,
+ TOTAL_NUM_ENTRIES - eraseSerial));
+ TEST_DO(assertStatus(*s1, eraseSerial2, TOTAL_NUM_ENTRIES,
+ domainInfo.parts[numParts - 2].count +
+ domainInfo.parts[numParts - 1].count));
+ // Erase everything before last domainpart file
+ eraseSerial = domainInfo.parts[numParts - 1].range.from();
+ s1->erase(eraseSerial);
+ TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES,
+ eraseSerial, TOTAL_NUM_ENTRIES,
+ TOTAL_NUM_ENTRIES + 1 - eraseSerial,
+ TOTAL_NUM_ENTRIES - eraseSerial));
+ TEST_DO(assertStatus(*s1, eraseSerial, TOTAL_NUM_ENTRIES,
+ domainInfo.parts[numParts - 1].count));
+ // No apparent effect of erasing just first entry in last part
+ s1->erase(eraseSerial + 1);
+ TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES,
+ eraseSerial, TOTAL_NUM_ENTRIES,
+ TOTAL_NUM_ENTRIES + 1 - eraseSerial,
+ TOTAL_NUM_ENTRIES - eraseSerial));
+ TEST_DO(assertStatus(*s1, eraseSerial + 1, TOTAL_NUM_ENTRIES,
+ domainInfo.parts[numParts - 1].count));
+ // No apparent effect of erasing almost all of last part
+ eraseSerial2 = domainInfo.parts[numParts - 1].range.to();
+ s1->erase(eraseSerial2);
+ TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES,
+ eraseSerial, TOTAL_NUM_ENTRIES,
+ TOTAL_NUM_ENTRIES + 1 - eraseSerial,
+ TOTAL_NUM_ENTRIES - eraseSerial));
+ TEST_DO(assertStatus(*s1, eraseSerial2, TOTAL_NUM_ENTRIES,
+ domainInfo.parts[numParts - 1].count));
+ }
+}
+
+
+void
+Test::testSync()
+{
+ const unsigned int NUM_PACKETS = 3;
+ const unsigned int NUM_ENTRIES = 4;
+ const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES;
+
+ DummyFileHeaderContext fileHeaderContext;
+ TransLogServer tlss("test9", 18377, ".", fileHeaderContext, 0x1000000);
+ TransLogClient tls("tcp/localhost:18377");
+
+ createDomainTest(tls, "sync", 0);
+ TransLogClient::Session::UP s1 = openDomainTest(tls, "sync");
+ fillDomainTest(s1.get(), NUM_PACKETS, NUM_ENTRIES);
+
+ SerialNum syncedTo(0);
+
+ EXPECT_TRUE(s1->sync(2, syncedTo));
+ EXPECT_EQUAL(syncedTo, TOTAL_NUM_ENTRIES);
+}
+
+
+void
+Test::testTruncateOnVersionMismatch()
+{
+ const unsigned int NUM_PACKETS = 3;
+ const unsigned int NUM_ENTRIES = 4;
+ const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES;
+
+ uint64_t fromOld(0), toOld(0);
+ size_t countOld(0);
+ DummyFileHeaderContext fileHeaderContext;
+ {
+ TransLogServer tlss("test11", 18377, ".", fileHeaderContext, 0x1000000);
+ TransLogClient tls("tcp/localhost:18377");
+
+ createDomainTest(tls, "sync", 0);
+ TransLogClient::Session::UP s1 = openDomainTest(tls, "sync");
+ fillDomainTest(s1.get(), NUM_PACKETS, NUM_ENTRIES);
+ EXPECT_TRUE(s1->status(fromOld, toOld, countOld));
+ SerialNum syncedTo(0);
+
+ EXPECT_TRUE(s1->sync(2, syncedTo));
+ EXPECT_EQUAL(syncedTo, TOTAL_NUM_ENTRIES);
+ }
+ FastOS_File f("test11/sync/sync-0000000000000000");
+ EXPECT_TRUE(f.OpenWriteOnlyExisting());
+ EXPECT_TRUE(f.SetPosition(f.GetSize()));
+
+ char tmp[100];
+ memset(tmp, 0, sizeof(tmp));
+ EXPECT_EQUAL(static_cast<ssize_t>(sizeof(tmp)), f.Write2(tmp, sizeof(tmp)));
+ EXPECT_TRUE(f.Close());
+ {
+ TransLogServer tlss("test11", 18377, ".", fileHeaderContext, 0x1000000);
+ TransLogClient tls("tcp/localhost:18377");
+ TransLogClient::Session::UP s1 = openDomainTest(tls, "sync");
+ uint64_t from(0), to(0);
+ size_t count(0);
+ EXPECT_TRUE(s1->status(from, to, count));
+ ASSERT_EQUAL(fromOld, from);
+ ASSERT_EQUAL(toOld, to);
+ ASSERT_EQUAL(countOld, count);
+ }
+}
+
+void
+Test::testTruncateOnShortRead()
+{
+ const unsigned int NUM_PACKETS = 17;
+ const unsigned int NUM_ENTRIES = 1;
+ const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES;
+ const unsigned int ENTRYSIZE = 4080;
+ vespalib::string topdir("test10");
+ vespalib::string domain("truncate");
+ vespalib::string dir(topdir + "/" + domain);
+ vespalib::string tlsspec("tcp/localhost:18377");
+
+ DummyFileHeaderContext fileHeaderContext;
+ {
+ TransLogServer tlss(topdir, 18377, ".", fileHeaderContext, 0x10000);
+ TransLogClient tls(tlsspec);
+
+ createDomainTest(tls, domain, 0);
+ TransLogClient::Session::UP s1 = openDomainTest(tls, domain);
+ fillDomainTest(s1.get(), NUM_PACKETS, NUM_ENTRIES, ENTRYSIZE);
+
+ SerialNum syncedTo(0);
+
+ EXPECT_TRUE(s1->sync(TOTAL_NUM_ENTRIES, syncedTo));
+ EXPECT_EQUAL(syncedTo, TOTAL_NUM_ENTRIES);
+ }
+ {
+ EXPECT_EQUAL(2u, countFiles(dir));
+ }
+ {
+ TransLogServer tlss(topdir, 18377, ".", fileHeaderContext, 0x10000);
+ TransLogClient tls(tlsspec);
+ TransLogClient::Session::UP s1 = openDomainTest(tls, domain);
+ checkFilledDomainTest(s1, TOTAL_NUM_ENTRIES);
+ }
+ {
+ EXPECT_EQUAL(2u, countFiles(dir));
+ }
+ {
+ vespalib::string filename(dir + "/truncate-0000000000000017");
+ FastOS_File trfile(filename.c_str());
+ EXPECT_TRUE(trfile.OpenReadWrite(NULL));
+ trfile.SetSize(trfile.getSize() - 1);
+ trfile.Close();
+ }
+ {
+ TransLogServer tlss(topdir, 18377, ".", fileHeaderContext, 0x10000);
+ TransLogClient tls(tlsspec);
+ TransLogClient::Session::UP s1 = openDomainTest(tls, domain);
+ checkFilledDomainTest(s1, TOTAL_NUM_ENTRIES - 1);
+ }
+ {
+ EXPECT_EQUAL(2u, countFiles(dir));
+ }
+}
+
+
+int Test::Main()
+{
+ TEST_INIT("translogclient_test");
+
+ if (_argc > 0) {
+ DummyFileHeaderContext::setCreator(_argv[0]);
+ }
+ test1();
+ test2();
+ testMany();
+ testErase();
+ partialUpdateTest();
+
+ testRemove();
+
+ testSync();
+
+ testTruncateOnShortRead();
+ testTruncateOnVersionMismatch();
+
+ testCrcVersions();
+
+ TEST_DONE();
+}
diff --git a/searchlib/src/tests/transactionlog/translogclient_test.sh b/searchlib/src/tests/transactionlog/translogclient_test.sh
new file mode 100755
index 00000000000..bf7ddab1fcf
--- /dev/null
+++ b/searchlib/src/tests/transactionlog/translogclient_test.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+rm -rf test7 test8 test9 test10 test11 test12 test13 testremove
+$VALGRIND ./searchlib_translogclient_test_app
+rm -rf test7 test8 test9 test10 test11 test12 test13 testremove
diff --git a/searchlib/src/tests/transactionlog/translogserver_test.cpp b/searchlib/src/tests/transactionlog/translogserver_test.cpp
new file mode 100644
index 00000000000..fa03e4f5291
--- /dev/null
+++ b/searchlib/src/tests/transactionlog/translogserver_test.cpp
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/transactionlog/translogserver.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+
+using search::index::DummyFileHeaderContext;
+using search::transactionlog::TransLogServer;
+
+
+int main(int argc, char *argv[])
+{
+ if ((argc > 1) && (argv[0] != NULL)) {
+ DummyFileHeaderContext::setCreator(argv[0]);
+ }
+ DummyFileHeaderContext fileHeaderContext;
+ TransLogServer tls("test7", 18377, ".", fileHeaderContext, 0x10000);
+ sleep(60);
+ return 0;
+}
diff --git a/searchlib/src/tests/transactionlogstress/.gitignore b/searchlib/src/tests/transactionlogstress/.gitignore
new file mode 100644
index 00000000000..5913613b455
--- /dev/null
+++ b/searchlib/src/tests/transactionlogstress/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+server
+translogstress
diff --git a/searchlib/src/tests/transactionlogstress/CMakeLists.txt b/searchlib/src/tests/transactionlogstress/CMakeLists.txt
new file mode 100644
index 00000000000..c91b0e34b40
--- /dev/null
+++ b/searchlib/src/tests/transactionlogstress/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_translogstress_app
+ SOURCES
+ translogstress.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_translogstress_app COMMAND searchlib_translogstress_app BENCHMARK)
diff --git a/searchlib/src/tests/transactionlogstress/DESC b/searchlib/src/tests/transactionlogstress/DESC
new file mode 100644
index 00000000000..47dac6a9342
--- /dev/null
+++ b/searchlib/src/tests/transactionlogstress/DESC
@@ -0,0 +1 @@
+This is a stress test of the transaction log server.
diff --git a/searchlib/src/tests/transactionlogstress/FILES b/searchlib/src/tests/transactionlogstress/FILES
new file mode 100644
index 00000000000..68cc8402652
--- /dev/null
+++ b/searchlib/src/tests/transactionlogstress/FILES
@@ -0,0 +1 @@
+translogstress.cpp
diff --git a/searchlib/src/tests/transactionlogstress/translogstress.cpp b/searchlib/src/tests/transactionlogstress/translogstress.cpp
new file mode 100644
index 00000000000..1c51c81e633
--- /dev/null
+++ b/searchlib/src/tests/transactionlogstress/translogstress.cpp
@@ -0,0 +1,875 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/util/buffer.h>
+#include <vespa/searchlib/transactionlog/translogserver.h>
+#include <vespa/searchlib/transactionlog/translogclient.h>
+#include <vespa/searchlib/util/rand48.h>
+#include <vespa/searchlib/util/runnable.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <iostream>
+#include <stdexcept>
+
+LOG_SETUP("translogstress");
+
+using document::ByteBuffer;
+using search::Runnable;
+using vespalib::Monitor;
+using vespalib::MonitorGuard;
+using std::shared_ptr;
+using vespalib::make_string;
+using vespalib::ConstBufferRef;
+using search::index::DummyFileHeaderContext;
+
+namespace search {
+namespace transactionlog {
+
+using ClientSession = TransLogClient::Session;
+using Subscriber = TransLogClient::Subscriber;
+using Visitor = TransLogClient::Visitor;
+
+//-----------------------------------------------------------------------------
+// BufferGenerator
+//-----------------------------------------------------------------------------
+class BufferGenerator
+{
+private:
+ Rand48 _rnd;
+ uint32_t _minStrLen;
+ uint32_t _maxStrLen;
+
+public:
+ BufferGenerator() :
+ _rnd(), _minStrLen(0), _maxStrLen(0) {}
+ BufferGenerator(uint32_t minStrLen, uint32_t maxStrLen) :
+ _rnd(), _minStrLen(minStrLen), _maxStrLen(maxStrLen) {}
+ void setSeed(long seed) { _rnd.srand48(seed); }
+ ByteBuffer getRandomBuffer();
+};
+
+ByteBuffer
+BufferGenerator::getRandomBuffer()
+{
+ size_t len = _minStrLen + _rnd.lrand48() % (_maxStrLen - _minStrLen);
+ std::string str;
+ for (size_t i = 0; i < len; ++i) {
+ char c = 'a' + _rnd.lrand48() % ('z' - 'a' + 1);
+ str.push_back(c);
+ }
+ ByteBuffer buf(str.size() + 1);
+ buf.putBytes(str.c_str(), str.size() + 1);
+ buf.flip();
+ return buf;
+}
+
+
+//-----------------------------------------------------------------------------
+// EntryGenerator
+//-----------------------------------------------------------------------------
+class EntryGenerator
+{
+private:
+ Rand48 _rnd;
+ long _baseSeed;
+ BufferGenerator _bufferGenerator;
+ const std::vector<document::ByteBuffer> * _buffers;
+ ByteBuffer _lastGeneratedBuffer;
+
+public:
+ EntryGenerator(long baseSeed, const BufferGenerator & bufferGenerator) :
+ _rnd(), _baseSeed(baseSeed), _bufferGenerator(bufferGenerator), _buffers(NULL),
+ _lastGeneratedBuffer() {}
+ EntryGenerator(const EntryGenerator & rhs) :
+ _rnd(), _baseSeed(rhs._baseSeed), _bufferGenerator(rhs._bufferGenerator),
+ _buffers(rhs._buffers), _lastGeneratedBuffer(rhs._lastGeneratedBuffer) {}
+ EntryGenerator & operator=(const EntryGenerator & rhs) {
+ _rnd = rhs._rnd;
+ _baseSeed = rhs._baseSeed;
+ _bufferGenerator = rhs._bufferGenerator;
+ _buffers = rhs._buffers;
+ return *this;
+ };
+ SerialNum getRandomSerialNum(SerialNum begin, SerialNum end);
+ Packet::Entry getRandomEntry(SerialNum num);
+ Rand48 & getRnd() { return _rnd; }
+ void setBuffers(const std::vector<ByteBuffer> & buffers) {
+ _buffers = &buffers;
+ }
+};
+
+SerialNum
+EntryGenerator::getRandomSerialNum(SerialNum begin, SerialNum end)
+{
+ // return random number in range [begin, end]
+ assert(begin <= end);
+ if (begin == end) {
+ return SerialNum(begin);
+ } else {
+ return SerialNum(begin + _rnd.lrand48() % (end - begin + 1));
+ }
+}
+
+Packet::Entry
+EntryGenerator::getRandomEntry(SerialNum num)
+{
+ _rnd.srand48(_baseSeed + num);
+ if (_buffers != NULL) {
+ size_t i = _rnd.lrand48() % _buffers->size();
+ const ByteBuffer& buffer = (*_buffers)[i];
+ return Packet::Entry(num, 1024, ConstBufferRef(buffer.getBuffer(), buffer.getLength()));
+ } else {
+ _bufferGenerator.setSeed(_baseSeed + num);
+ _lastGeneratedBuffer = _bufferGenerator.getRandomBuffer();
+ return Packet::Entry(num, 1024, ConstBufferRef(_lastGeneratedBuffer.getBuffer(), _lastGeneratedBuffer.getLength()));
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// EntryComparator
+//-----------------------------------------------------------------------------
+class EntryComparator
+{
+public:
+ static bool cmp(const Packet::Entry & lhs, const Packet::Entry & rhs) {
+ if (lhs.serial() != rhs.serial()) {
+ return false;
+ }
+ if (lhs.type() != rhs.type()) {
+ return false;
+ }
+ if (lhs.data().size() != rhs.data().size()) {
+ return false;
+ }
+ if (memcmp(lhs.data().c_str(), rhs.data().c_str(), lhs.data().size()) != 0) {
+ return false;
+ }
+ return true;
+ }
+};
+
+
+//-----------------------------------------------------------------------------
+// EntryPrinter
+//-----------------------------------------------------------------------------
+class EntryPrinter
+{
+public:
+ static std::string toStr(const Packet::Entry & e) {
+ std::stringstream ss;
+ ss << "Entry(serial(" << e.serial() << "), type(" << e.type() << "), bufferSize(" <<
+ e.data().size() << "), buffer(";
+ for (size_t i = 0; i < e.data().size() - 1; ++i) {
+ ss << e.data().c_str()[i];
+ }
+ ss << ")";
+ return ss.str();
+ }
+};
+
+
+//-----------------------------------------------------------------------------
+// PacketPrinter
+//-----------------------------------------------------------------------------
+class PacketPrinter
+{
+public:
+ static std::string toStr(const Packet & p) {
+ std::stringstream ss;
+ ss << "Packet(entries(" << p.size() << "), range([" << p.range().from() << ", " << p.range().to()
+ << "]), bytes(" << p.getHandle().size() << "))";
+ return ss.str();
+ }
+};
+
+
+//-----------------------------------------------------------------------------
+// FeederThread
+//-----------------------------------------------------------------------------
+class FeederThread : public Runnable
+{
+private:
+ std::string _tlsSpec;
+ std::string _domain;
+ TransLogClient _client;
+ std::unique_ptr<ClientSession> _session;
+ EntryGenerator _generator;
+ uint32_t _feedRate;
+ Packet _packet;
+ SerialNum _current;
+ SerialNum _lastCommited;
+ FastOS_Time _timer;
+
+ void commitPacket();
+ bool addEntry(const Packet::Entry & e);
+
+public:
+ FeederThread(const std::string & tlsSpec, const std::string & domain,
+ const EntryGenerator & generator, uint32_t feedRate, size_t packetSize) :
+ _tlsSpec(tlsSpec), _domain(domain), _client(tlsSpec), _session(),
+ _generator(generator), _feedRate(feedRate), _packet(packetSize), _current(1), _lastCommited(1), _timer() {}
+ virtual void doRun();
+ SerialNumRange getRange() const { return SerialNumRange(1, _lastCommited); }
+};
+
+void
+FeederThread::commitPacket()
+{
+ _packet.close();
+ const vespalib::nbostream& stream = _packet.getHandle();
+ if (!_session->commit(ConstBufferRef(stream.c_str(), stream.size()))) {
+ throw std::runtime_error(vespalib::make_string
+ ("FeederThread: Failed commiting %s", PacketPrinter::toStr(_packet).c_str()));
+ } else {
+ LOG(info, "FeederThread: commited %s", PacketPrinter::toStr(_packet).c_str());
+ }
+ _packet.clear();
+ _lastCommited = _current - 1;
+}
+
+bool
+FeederThread::addEntry(const Packet::Entry & e)
+{
+ //LOG(info, "FeederThread: add %s", EntryPrinter::toStr(e).c_str());
+ return _packet.add(e);
+}
+
+void
+FeederThread::doRun()
+{
+ _session = _client.open(_domain);
+ if (_session.get() == NULL) {
+ throw std::runtime_error(vespalib::make_string("FeederThread: Could not open session to %s", _tlsSpec.c_str()));
+ }
+
+ while (!_done) {
+ if (_feedRate != 0) {
+ _timer.SetNow();
+ for (uint32_t i = 0; i < _feedRate; ++i) {
+ Packet::Entry entry = _generator.getRandomEntry(_current++);
+ if (!addEntry(entry)) {
+ commitPacket();
+ if (!addEntry(entry)) {
+ throw std::runtime_error(vespalib::make_string
+ ("FeederThread: Could not add %s", EntryPrinter::toStr(entry).c_str()));
+ }
+ }
+ }
+ commitPacket();
+
+ uint64_t milliSecsUsed = static_cast<uint64_t>(_timer.MilliSecsToNow());
+ if (milliSecsUsed < 1000) {
+ //LOG(info, "FeederThread: sleep %u ms", 1000 - milliSecsUsed);
+ FastOS_Thread::Sleep(1000 - milliSecsUsed);
+ } else {
+ LOG(info, "FeederThread: max throughput");
+ }
+ } else {
+ Packet::Entry entry = _generator.getRandomEntry(_current++);
+ if (!addEntry(entry)) {
+ commitPacket();
+ if (!addEntry(entry)) {
+ throw std::runtime_error(vespalib::make_string
+ ("FeederThread: Could not add %s", EntryPrinter::toStr(entry).c_str()));
+ }
+ }
+ }
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// Agent
+//-----------------------------------------------------------------------------
+class Agent : public ClientSession::Callback
+{
+protected:
+ std::string _tlsSpec;
+ std::string _domain;
+ TransLogClient _client;
+ EntryGenerator _generator;
+ std::string _name;
+ uint32_t _id;
+ bool _validate;
+
+public:
+ Agent(const std::string & tlsSpec, const std::string & domain,
+ const EntryGenerator & generator, const std::string & name, uint32_t id, bool validate) :
+ ClientSession::Callback(),
+ _tlsSpec(tlsSpec), _domain(domain), _client(tlsSpec),
+ _generator(generator), _name(name), _id(id), _validate(validate) {}
+ virtual ~Agent() {}
+ virtual RPC::Result receive(const Packet & packet) = 0;
+ virtual void inSync() {}
+ virtual void eof() {}
+ virtual void failed() {}
+};
+
+
+//-----------------------------------------------------------------------------
+// SubscriberAgent
+//-----------------------------------------------------------------------------
+class SubscriberAgent : public Agent
+{
+private:
+ std::unique_ptr<Subscriber> _subscriber;
+ SerialNum _from;
+ SerialNum _next;
+ Monitor _monitor;
+
+ SerialNum getNext() {
+ MonitorGuard guard(_monitor);
+ return _next++;
+ }
+
+public:
+ SubscriberAgent(const std::string & tlsSpec, const std::string & domain,
+ const EntryGenerator & generator, SerialNum from, uint32_t id, bool validate) :
+ Agent(tlsSpec, domain, generator, "SubscriberAgent", id, validate),
+ _subscriber(), _from(from), _next(from + 1) {}
+ virtual ~SubscriberAgent() {}
+ void start();
+ void stop();
+ SerialNum getExpectedNext() const {
+ MonitorGuard guard(_monitor);
+ return _next;
+ }
+ SerialNumRange getRange() const { return SerialNumRange(_from, _next - 1); }
+ virtual RPC::Result receive(const Packet & packet);
+};
+
+void
+SubscriberAgent::start()
+{
+ _subscriber = _client.createSubscriber(_domain, *this);
+ if (_subscriber.get() == NULL) {
+ throw std::runtime_error(vespalib::make_string
+ ("SubscriberAgent[%u]: Could not open subscriber to %s", _id, _tlsSpec.c_str()));
+ }
+ if (!_subscriber->subscribe(_from)) {
+ throw std::runtime_error(vespalib::make_string
+ ("SubscriberAgent[%u]: Could not subscribe to %s from serialnumber %" PRIu64,
+ _id, _tlsSpec.c_str(), _from));
+ }
+}
+
+void
+SubscriberAgent::stop()
+{
+ _subscriber.reset();
+}
+
+RPC::Result
+SubscriberAgent::receive(const Packet & packet)
+{
+ auto handle = packet.getHandle();
+ while (handle.size() > 0) {
+ Packet::Entry entry;
+ entry.deserialize(handle);
+ Packet::Entry expected = _generator.getRandomEntry(getNext());
+ if (_validate) {
+ if (!EntryComparator::cmp(entry, expected)) {
+ throw std::runtime_error(vespalib::make_string
+ ("SubscriberAgent[%u]: Got %s, expected %s", _id,
+ EntryPrinter::toStr(entry).c_str(),
+ EntryPrinter::toStr(expected).c_str()));
+ }
+ }
+ }
+ LOG(info, "SubscriberAgent[%u]: received %s", _id, PacketPrinter::toStr(packet).c_str());
+
+ return RPC::OK;
+}
+
+
+//-----------------------------------------------------------------------------
+// VisitorAgent
+//-----------------------------------------------------------------------------
+class VisitorAgent : public Agent
+{
+private:
+ enum State {
+ IDLE, RUNNING, FINISHED
+ };
+ std::unique_ptr<Visitor> _visitor;
+ SerialNum _from;
+ SerialNum _to;
+ SerialNum _next;
+ bool _running;
+ State _state;
+ Monitor _monitor;
+
+ void setState(State newState) {
+ MonitorGuard guard(_monitor);
+ //LOG(info, "VisitorAgent[%u]: setState(%s)", _id, newState == IDLE ? "idle" :
+ // (newState == RUNNING ? "running" : "finished"));
+ _state = newState;
+ }
+ SerialNum getNext();
+
+public:
+ VisitorAgent(const std::string & tlsSpec, const std::string & domain,
+ const EntryGenerator & generator, uint32_t id, bool validate) :
+ Agent(tlsSpec, domain, generator, "VisitorAgent", id, validate),
+ _visitor(), _from(0), _to(0), _next(0), _state(IDLE) {}
+ virtual ~VisitorAgent() {}
+ void start(SerialNum from, SerialNum to);
+ void setIdle();
+ bool idle() {
+ MonitorGuard guard(_monitor);
+ return _state == IDLE;
+ }
+ bool running() {
+ MonitorGuard guard(_monitor);
+ return _state == RUNNING;
+ }
+ bool finished() {
+ MonitorGuard guard(_monitor);
+ return _state == FINISHED;
+ }
+ std::string getState() {
+ MonitorGuard guard(_monitor);
+ if (_state == IDLE) {
+ return std::string("idle");
+ } else if (_state == FINISHED) {
+ return std::string("finished");
+ } else {
+ return std::string("running");
+ }
+ }
+ SerialNum getFrom() { return _from; }
+ virtual RPC::Result receive(const Packet & packet);
+ virtual void eof() {
+ LOG(info, "VisitorAgent[%u]: eof", _id);
+ setState(FINISHED);
+ }
+};
+
+SerialNum
+VisitorAgent::getNext()
+{
+ SerialNum retval = _next++;
+ if (retval > _to) {
+ throw std::runtime_error(make_string("VisitorAgent[%u]: SerialNum (%" PRIu64 ") outside "
+ "expected range <%" PRIu64 ", %" PRIu64 "]", _id,
+ retval, _from, _to));
+ }
+ return retval;
+}
+
+void
+VisitorAgent::start(SerialNum from, SerialNum to)
+{
+ assert(idle());
+ LOG(info, "VisitorAgent[%u]: start<%" PRIu64 ", %" PRIu64 "]", _id, from, to);
+ _from = from;
+ _to = to;
+ _next = from + 1;
+ _visitor = _client.createVisitor(_domain, *this);
+ if (_visitor.get() == NULL) {
+ throw std::runtime_error(vespalib::make_string
+ ("VisitorAgent[%u]: Could not open visitor to %s", _id, _tlsSpec.c_str()));
+ }
+ setState(RUNNING);
+ if (!_visitor->visit(_from, _to)) {
+ throw std::runtime_error(vespalib::make_string
+ ("VisitorAgent[%u]: Could not visit from %s with range <%" PRIu64 ", %" PRIu64 "]",
+ _id, _tlsSpec.c_str(), _from, _to));
+ }
+}
+
+void
+VisitorAgent::setIdle()
+{
+ assert(finished());
+ _visitor.reset();
+ setState(IDLE);
+}
+
+RPC::Result
+VisitorAgent::receive(const Packet & packet)
+{
+ auto handle = packet.getHandle();
+ while (handle.size() > 0) {
+ Packet::Entry entry;
+ entry.deserialize(handle);
+ Packet::Entry expected = _generator.getRandomEntry(getNext());
+ if (_validate) {
+ if (!EntryComparator::cmp(entry, expected)) {
+ throw std::runtime_error(vespalib::make_string
+ ("VisitorAgent[%u]: Got %s, expected %s", _id,
+ EntryPrinter::toStr(entry).c_str(),
+ EntryPrinter::toStr(expected).c_str()));
+ }
+ }
+ }
+
+ if (_next > _to + 1) {
+ throw std::runtime_error(vespalib::make_string
+ ("VisitorAgent[%u]: Visited range <%" PRIu64 ", %" PRIu64 "], expected "
+ "range <%" PRIu64 "u, %" PRIu64 "]", _id,
+ _from, _next - 1, _from, _to));
+ }
+
+ return RPC::OK;
+}
+
+
+//-----------------------------------------------------------------------------
+// ControllerThread
+//-----------------------------------------------------------------------------
+class ControllerThread : public Runnable
+{
+private:
+ std::string _tlsSpec;
+ std::string _domain;
+ TransLogClient _client;
+ std::unique_ptr<ClientSession> _session;
+ EntryGenerator _generator;
+ std::vector<std::shared_ptr<SubscriberAgent> > _subscribers;
+ std::vector<std::shared_ptr<VisitorAgent> > _visitors;
+ std::vector<std::shared_ptr<VisitorAgent> > _rndVisitors;
+ uint64_t _visitorInterval; // in milliseconds
+ uint64_t _pruneInterval; // in milliseconds
+ FastOS_Time _pruneTimer;
+ SerialNum _begin;
+ SerialNum _end;
+ size_t _count;
+
+ void getStatus();
+ void makeRandomVisitorVector();
+
+public:
+ ControllerThread(const std::string & tlsSpec, const std::string & domain,
+ const EntryGenerator & generator, uint32_t numSubscribers, uint32_t numVisitors,
+ uint64_t visitorInterval, uint64_t pruneInterval) :
+ _tlsSpec(tlsSpec), _domain(domain), _client(tlsSpec.c_str()), _session(),
+ _generator(generator), _subscribers(), _visitors(), _rndVisitors(), _visitorInterval(visitorInterval),
+ _pruneInterval(pruneInterval), _pruneTimer(), _begin(0), _end(0), _count(0)
+ {
+ for (uint32_t i = 0; i < numSubscribers; ++i) {
+ _subscribers.push_back(std::shared_ptr<SubscriberAgent>
+ (new SubscriberAgent(tlsSpec, domain, generator, 0, i, true)));
+ }
+
+ for (uint32_t i = 0; i < numVisitors; ++i) {
+ _visitors.push_back(std::shared_ptr<VisitorAgent>(new VisitorAgent(tlsSpec, domain, generator, i, true)));
+ }
+ }
+ void startSubscribers();
+ uint32_t runningVisitors();
+ std::vector<std::shared_ptr<SubscriberAgent> > & getSubscribers() { return _subscribers; }
+ std::vector<std::shared_ptr<VisitorAgent> > & getVisitors() { return _visitors; }
+ virtual void doRun();
+
+};
+
+void
+ControllerThread::getStatus()
+{
+ if (!_session->status(_begin, _end, _count)) {
+ throw std::runtime_error(vespalib::make_string("ControllerThread: Could not get status from %s", _tlsSpec.c_str()));
+ }
+}
+
+void
+ControllerThread::makeRandomVisitorVector()
+{
+ std::vector<std::shared_ptr<VisitorAgent> > tmp(_visitors);
+ _rndVisitors.clear();
+ while (tmp.size() > 0) {
+ size_t i = _generator.getRnd().lrand48() % tmp.size();
+ _rndVisitors.push_back(tmp[i]);
+ tmp.erase(tmp.begin() + i);
+ }
+}
+
+void
+ControllerThread::startSubscribers()
+{
+ for (size_t i = 0; i < _subscribers.size(); ++i) {
+ _subscribers[i]->start();
+ }
+}
+
+void
+ControllerThread::doRun()
+{
+ _session = _client.open(_domain);
+ if (_session.get() == NULL) {
+ throw std::runtime_error(vespalib::make_string("ControllerThread: Could not open session to %s", _tlsSpec.c_str()));
+ }
+
+ _pruneTimer.SetNow();
+ while (!_done) {
+ // set finished visitors as idle
+ for (size_t i = 0; i < _visitors.size(); ++i) {
+ if (_visitors[i]->finished()) {
+ _visitors[i]->setIdle();
+ }
+ }
+ // find idle visitor
+ makeRandomVisitorVector();
+ for (size_t i = 0; i < _rndVisitors.size(); ++i) {
+ if (_rndVisitors[i]->idle()) {
+ getStatus();
+ SerialNum from = _generator.getRandomSerialNum(_begin, _end) - 1;
+ SerialNum to = _generator.getRandomSerialNum(from + 1, _end);
+ _rndVisitors[i]->start(from, to);
+ break;
+ }
+ }
+ // prune transaction log server
+ if (_pruneTimer.MilliSecsToNow() > _pruneInterval) {
+ getStatus();
+ SerialNum safePrune = _end;
+ for (size_t i = 0; i < _visitors.size(); ++i) {
+ if (_visitors[i]->running() && _visitors[i]->getFrom() < safePrune) {
+ safePrune = _visitors[i]->getFrom();
+ }
+ }
+ for (size_t i = 0; i < _subscribers.size(); ++i) {
+ SerialNum next = _subscribers[i]->getExpectedNext();
+ if (next < safePrune) {
+ safePrune = next;
+ }
+ }
+ LOG(info, "ControllerThread: status: begin(%" PRIu64 "), end(%" PRIu64 "), count(%zu)", _begin, _end, _count);
+ LOG(info, "ControllerThread: prune [%" PRIu64 ", %" PRIu64 ">", _begin, safePrune);
+ if (!_session->erase(safePrune)) {
+ throw std::runtime_error(vespalib::make_string("ControllerThread: Could not erase up to %" PRIu64, safePrune));
+ }
+ _pruneTimer.SetNow();
+ }
+ FastOS_Thread::Sleep(_visitorInterval);
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// TransLogStress
+//-----------------------------------------------------------------------------
+class TransLogStress : public FastOS_Application
+{
+private:
+ class Config {
+ public:
+ uint64_t domainPartSize;
+ size_t packetSize;
+
+ uint64_t stressTime;
+ uint32_t feedRate;
+ uint32_t numSubscribers;
+ uint32_t numVisitors;
+ uint64_t visitorInterval;
+ uint64_t pruneInterval;
+
+ uint32_t numPreGeneratedBuffers;
+ uint32_t minStrLen;
+ uint32_t maxStrLen;
+ long baseSeed;
+
+ Config() :
+ domainPartSize(0), packetSize(0), stressTime(0), feedRate(0), numSubscribers(0),
+ numVisitors(0), visitorInterval(0), pruneInterval(0), minStrLen(0), maxStrLen(0), baseSeed(0) {}
+ };
+
+ Config _cfg;
+
+ void printConfig();
+ void usage();
+
+public:
+ int Main();
+};
+
+void
+TransLogStress::printConfig()
+{
+ std::cout << "######## Config ########" << std::endl;
+ std::cout << "stressTime: " << _cfg.stressTime / 1000 << " s" << std::endl;
+ std::cout << "feedRate: " << _cfg.feedRate << " per/sec" << std::endl;
+ std::cout << "numSubscribers: " << _cfg.numSubscribers << std::endl;
+ std::cout << "numVisitors: " << _cfg.numVisitors << std::endl;
+ std::cout << "visitorInterval: " << _cfg.visitorInterval << " ms" << std::endl;
+ std::cout << "pruneInterval: " << _cfg.pruneInterval / 1000 << " s" << std::endl;
+ std::cout << "numPreGeneratedBuffers: " << _cfg.numPreGeneratedBuffers << std::endl;
+ std::cout << "minStrLen: " << _cfg.minStrLen << std::endl;
+ std::cout << "maxStrLen: " << _cfg.maxStrLen << std::endl;
+ std::cout << "baseSeed: " << _cfg.baseSeed << std::endl;
+ std::cout << "domainPartSize: " << _cfg.domainPartSize << " bytes" << std::endl;
+ std::cout << "packetSize: " << _cfg.packetSize << " bytes" << std::endl;
+}
+
+void
+TransLogStress::usage()
+{
+ std::cout << "usage: translogstress [-t stressTime(s)] [-f feedRate] [-s numSubscribers]" << std::endl;
+ std::cout << " [-v numVisitors] [-c visitorInterval(ms)] [-e pruneInterval(s)]" << std::endl;
+ std::cout << " [-g numPreGeneratedBuffers] [-i minStrLen] [-a maxStrLen] [-b baseSeed]" << std::endl;
+ std::cout << " [-d domainPartSize] [-p packetSize]" << std::endl;
+}
+
+int
+TransLogStress::Main()
+{
+ std::string tlsSpec("tcp/localhost:17897");
+ std::string domain("translogstress");
+ _cfg.domainPartSize = 8000000; // ~8MB
+ _cfg.packetSize = 0x10000;
+
+ _cfg.stressTime = 1000 * 60;
+ _cfg.feedRate = 10000;
+ _cfg.numSubscribers = 1;
+ _cfg.numVisitors = 1;
+ _cfg.visitorInterval = 1000 * 1;
+ _cfg.pruneInterval = 1000 * 12;
+
+ _cfg.numPreGeneratedBuffers = 0;
+ _cfg.minStrLen = 40;
+ _cfg.maxStrLen = 80;
+ _cfg.baseSeed = 100;
+
+ uint64_t sleepTime = 4000;
+
+ int idx = 1;
+ char opt;
+ const char * arg;
+ bool optError = false;
+ while ((opt = GetOpt("d:p:t:f:s:v:c:e:g:i:a:b:h", arg, idx)) != -1) {
+ switch (opt) {
+ case 'd':
+ _cfg.domainPartSize = atol(arg);
+ break;
+ case 'p':
+ _cfg.packetSize = atol(arg);
+ break;
+ case 't':
+ _cfg.stressTime = 1000 * atol(arg);
+ break;
+ case 'f':
+ _cfg.feedRate = atoi(arg);
+ break;
+ case 's':
+ _cfg.numSubscribers = atoi(arg);
+ break;
+ case 'v':
+ _cfg.numVisitors = atoi(arg);
+ break;
+ case 'c':
+ _cfg.visitorInterval = atol(arg);
+ break;
+ case 'e':
+ _cfg.pruneInterval = 1000 * atol(arg);
+ break;
+ case 'g':
+ _cfg.numPreGeneratedBuffers = atoi(arg);
+ break;
+ case 'i':
+ _cfg.minStrLen = atoi(arg);
+ break;
+ case 'a':
+ _cfg.maxStrLen = atoi(arg);
+ break;
+ case 'b':
+ _cfg.baseSeed = atol(arg);
+ break;
+ case 'h':
+ usage();
+ return -1;
+ default:
+ optError = true;
+ break;
+ }
+ }
+
+ printConfig();
+ FastOS_Thread::Sleep(sleepTime);
+
+ if (_argc != idx || optError) {
+ usage();
+ return -1;
+ }
+
+ // start transaction log server
+ DummyFileHeaderContext fileHeaderContext;
+ TransLogServer tls("server", 17897, ".", fileHeaderContext, _cfg.domainPartSize);
+ TransLogClient client(tlsSpec);
+ client.create(domain);
+
+ FastOS_ThreadPool threadPool(256000);
+
+ BufferGenerator bufferGenerator(_cfg.minStrLen, _cfg.maxStrLen);
+ bufferGenerator.setSeed(_cfg.baseSeed);
+ std::vector<ByteBuffer> buffers;
+ for (uint32_t i = 0; i < _cfg.numPreGeneratedBuffers; ++i) {
+ buffers.push_back(bufferGenerator.getRandomBuffer());
+ }
+ EntryGenerator generator(_cfg.baseSeed, bufferGenerator);
+ if (buffers.size() > 0) {
+ generator.setBuffers(buffers);
+ }
+
+
+ // start feeder and controller
+ FeederThread feeder(tlsSpec, domain, generator, _cfg.feedRate, _cfg.packetSize);
+ threadPool.NewThread(&feeder);
+
+ FastOS_Thread::Sleep(sleepTime);
+
+ ControllerThread controller(tlsSpec, domain, generator, _cfg.numSubscribers, _cfg.numVisitors,
+ _cfg.visitorInterval, _cfg.pruneInterval);
+ threadPool.NewThread(&controller);
+
+ // start subscribers
+ controller.startSubscribers();
+
+ // stop feeder and controller
+ FastOS_Thread::Sleep(_cfg.stressTime);
+ printConfig();
+ LOG(info, "Stop feeder...");
+ feeder.stop();
+ feeder.join();
+ std::cout << "<feeder>" << std::endl;
+ std::cout << " <from>" << feeder.getRange().from() << "</from>" << std::endl;
+ std::cout << " <to>" << feeder.getRange().to() << "</to>" << std::endl;
+ std::cout << " <rate>" << 1000 * (feeder.getRange().to() - feeder.getRange().from()) / (sleepTime + _cfg.stressTime)
+ << "</rate>" << std::endl;
+ std::cout << "</feeder>" << std::endl;
+
+ LOG(info, "Stop controller...");
+ controller.stop();
+ controller.join();
+
+ FastOS_Thread::Sleep(sleepTime);
+ std::vector<std::shared_ptr<VisitorAgent> > & visitors = controller.getVisitors();
+ for (size_t i = 0; i < visitors.size(); ++i) {
+ std::cout << "<visitor id='" << i << "'>" << std::endl;
+ std::cout << "<state>" << visitors[i]->getState() << "</state>" << std::endl;
+ std::cout << "</visitor>" << std::endl;
+ }
+
+ // stop subscribers
+ LOG(info, "Stop subscribers...");
+ std::vector<std::shared_ptr<SubscriberAgent> > & subscribers = controller.getSubscribers();
+ for (size_t i = 0; i < subscribers.size(); ++i) {
+ subscribers[i]->stop();
+ std::cout << "<subscriber id='" << i << "'>" << std::endl;
+ std::cout << " <from>" << subscribers[i]->getRange().from() << "</from>" << std::endl;
+ std::cout << " <to>" << subscribers[i]->getRange().to() << "</to>" << std::endl;
+ std::cout << "</subscriber>" << std::endl;
+ }
+
+ threadPool.Close();
+
+ return 0;
+}
+
+}
+}
+
+int main(int argc, char ** argv)
+{
+ search::transactionlog::TransLogStress myApp;
+ return myApp.Entry(argc, argv);
+}
diff --git a/searchlib/src/tests/true/.gitignore b/searchlib/src/tests/true/.gitignore
new file mode 100644
index 00000000000..9ab22a26cdc
--- /dev/null
+++ b/searchlib/src/tests/true/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+true_test
+searchlib_true_test_app
diff --git a/searchlib/src/tests/true/CMakeLists.txt b/searchlib/src/tests/true/CMakeLists.txt
new file mode 100644
index 00000000000..f7c80db8f92
--- /dev/null
+++ b/searchlib/src/tests/true/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_true_test_app
+ SOURCES
+ true.cpp
+ DEPENDS
+)
+vespa_add_test(NAME searchlib_true_test_app COMMAND searchlib_true_test_app)
diff --git a/searchlib/src/tests/true/DESC b/searchlib/src/tests/true/DESC
new file mode 100644
index 00000000000..55b708ce96a
--- /dev/null
+++ b/searchlib/src/tests/true/DESC
@@ -0,0 +1 @@
+A very simple test. A good starting point for writing new tests.
diff --git a/searchlib/src/tests/true/FILES b/searchlib/src/tests/true/FILES
new file mode 100644
index 00000000000..7d324c5824d
--- /dev/null
+++ b/searchlib/src/tests/true/FILES
@@ -0,0 +1 @@
+true.cpp
diff --git a/searchlib/src/tests/true/true.cpp b/searchlib/src/tests/true/true.cpp
new file mode 100644
index 00000000000..d052f3ca4e4
--- /dev/null
+++ b/searchlib/src/tests/true/true.cpp
@@ -0,0 +1,15 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("true_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+TEST_SETUP(Test)
+
+int
+Test::Main()
+{
+ TEST_INIT("true_test");
+ EXPECT_TRUE(true);
+ TEST_DONE();
+}
diff --git a/searchlib/src/tests/url/.gitignore b/searchlib/src/tests/url/.gitignore
new file mode 100644
index 00000000000..6d6a20d8270
--- /dev/null
+++ b/searchlib/src/tests/url/.gitignore
@@ -0,0 +1,7 @@
+*.core
+.depend
+Makefile
+core
+core.*
+testurl
+searchlib_testurl_app
diff --git a/searchlib/src/tests/url/CMakeLists.txt b/searchlib/src/tests/url/CMakeLists.txt
new file mode 100644
index 00000000000..aa52f3d8374
--- /dev/null
+++ b/searchlib/src/tests/url/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_testurl_app
+ SOURCES
+ testurl.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_testurl_app COMMAND sh dotest.sh)
diff --git a/searchlib/src/tests/url/dotest.sh b/searchlib/src/tests/url/dotest.sh
new file mode 100755
index 00000000000..f7ac1fd1e69
--- /dev/null
+++ b/searchlib/src/tests/url/dotest.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+# Run test
+echo "Testing the FastS_URL class..."
+$VALGRIND ./searchlib_testurl_app
+if [ $? -eq 0 ]; then
+ echo "SUCCESS: Test on FastS_URL passed!"
+else
+ echo "FAILURE: Test on FastS_URL failed!"
+ exit 1
+fi
+exit 0
diff --git a/searchlib/src/tests/url/testurl.cpp b/searchlib/src/tests/url/testurl.cpp
new file mode 100644
index 00000000000..4ed28453890
--- /dev/null
+++ b/searchlib/src/tests/url/testurl.cpp
@@ -0,0 +1,750 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2000-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/util/url.h>
+
+
+// needed to resolve external symbol from httpd.h on AIX
+void FastS_block_usr2() {}
+
+
+static search::util::URL GlobalURL;
+
+static bool
+CheckString(const char *name,
+ const unsigned char *test1,
+ const unsigned char *test2)
+{
+ assert(test1 != NULL);
+ assert(test2 != NULL);
+
+ if (strcmp((const char*)test1, (const char*)test2)!=0) {
+ printf("FAILED: %s: '%s' != '%s'!\n", name, test1, test2);
+ GlobalURL.Dump();
+
+ return false;
+ }
+ return true;
+}
+
+static bool
+CheckInt(const char *name,
+ int test1,
+ int test2)
+{
+ if (test1 != test2) {
+ printf("FAILED: %s: %d != %d!\n", name, test1, test2);
+ GlobalURL.Dump();
+
+ return false;
+ }
+ return true;
+}
+
+const char *
+GetTokenString(search::util::URL &url)
+{
+ static char tokenbuffer[1000];
+
+ const unsigned char *token;
+ search::util::URL::URL_CONTEXT ctx;
+
+ tokenbuffer[0] = '\0';
+
+ while ((token = url.GetToken(ctx)) != NULL) {
+ if (tokenbuffer[0] != '\0')
+ strcat(tokenbuffer, ",");
+ strcat(tokenbuffer, url.ContextName(ctx));
+ strcat(tokenbuffer, ":");
+ strcat(tokenbuffer, (const char*)token);
+ }
+
+ return tokenbuffer;
+}
+
+
+static bool
+CheckURL(const char *url,
+ const char *scheme,
+ const char *host,
+ const char *domain,
+ const char *siteowner,
+ const char *tld,
+ const char *maintld,
+ const char */* tldregion */,
+ const char *port,
+ const char *path,
+ int pathdepth,
+ const char *filename,
+ const char *extension,
+ const char *params,
+ const char *query,
+ const char *fragment,
+ const char *address,
+ const char *tokens,
+ int verbose=0)
+{
+ if (verbose>0)
+ printf("Checking with URL: '%s'\n", url);
+
+ GlobalURL.SetURL((const unsigned char *)url);
+
+ if (verbose>0)
+ GlobalURL.Dump();
+ // GlobalURL.Dump();
+
+ return
+ CheckString("URL", (const unsigned char *)url, GlobalURL.GetURL()) &&
+ CheckString("urltype", (const unsigned char *)scheme,
+ GlobalURL.GetScheme()) &&
+ CheckString("host", (const unsigned char *)host,
+ GlobalURL.GetHost()) &&
+ CheckString("domain", (const unsigned char *)domain,
+ GlobalURL.GetDomain()) &&
+ CheckString("siteowner", (const unsigned char *)siteowner,
+ GlobalURL.GetSiteOwner()) &&
+ CheckString("tld", (const unsigned char *)tld,
+ GlobalURL.GetTLD()) &&
+ CheckString("maintld", (const unsigned char *)maintld,
+ GlobalURL.GetMainTLD()) &&
+#if 0
+ CheckString("tldregion", (const unsigned char *)tldregion,
+ GlobalURL.GetTLDRegion()) &&
+#endif
+ CheckString("port", (const unsigned char *)port,
+ GlobalURL.GetPort()) &&
+ CheckString("path", (const unsigned char *)path,
+ GlobalURL.GetPath()) &&
+ CheckInt("pathdepth", pathdepth,
+ GlobalURL.GetPathDepth()) &&
+ CheckString("filename", (const unsigned char *)filename,
+ GlobalURL.GetFilename()) &&
+ CheckString("extension", (const unsigned char *)extension,
+ GlobalURL.GetExtension()) &&
+ CheckString("params", (const unsigned char *)params,
+ GlobalURL.GetParams()) &&
+ CheckString("query", (const unsigned char *)query,
+ GlobalURL.GetQuery()) &&
+ CheckString("fragment", (const unsigned char *)fragment,
+ GlobalURL.GetFragment()) &&
+ CheckString("address", (const unsigned char *)address,
+ GlobalURL.GetAddress()) &&
+ CheckString("TOKENS", (const unsigned char *)tokens,
+ (const unsigned char*)GetTokenString(GlobalURL));
+}
+
+
+int main(int, char **)
+{
+ bool success = true;
+
+ success = success &&
+ CheckURL("", // URL
+ "", // scheme
+ "", // host
+ "", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ "", // path
+ 0, // pathdepth
+ "", // filename
+ "", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "");// Tokenstring
+ success = success &&
+ CheckURL(".", // URL
+ "", // scheme
+ "", // host
+ "", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ ".", // path
+ 1, // pathdepth
+ ".", // filename
+ "", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "");// Tokenstring
+ success = success &&
+ CheckURL("..", // URL
+ "", // scheme
+ "", // host
+ "", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ "..", // path
+ 1, // pathdepth
+ "..", // filename
+ "", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "");// Tokenstring
+ success = success &&
+ CheckURL("CHANGES_2.0a", // URL
+ "", // scheme
+ "", // host
+ "", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ "CHANGES_2.0a", // path
+ 1, // pathdepth
+ "CHANGES_2.0a", // filename
+ "0a", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "FILENAME:CHANGES_2,EXTENSION:0a");// Tokenstring
+ success = success &&
+ CheckURL("patches/patch-cvs-1.9.10", // URL
+ "", // scheme
+ "", // host
+ "", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ "patches/patch-cvs-1.9.10", // path
+ 2, // pathdepth
+ "patch-cvs-1.9.10", // filename
+ "10", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "PATH:patches,FILENAME:patch-cvs-1,FILENAME:9,EXTENSION:10");// Tokenstring
+ success = success &&
+ CheckURL("http:patches/patch-ssh-1.2.14", // URL
+ "http", // scheme
+ "", // host
+ "", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ "patches/patch-ssh-1.2.14", // path
+ 2, // pathdepth
+ "patch-ssh-1.2.14", // filename
+ "14", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "SCHEME:http,PATH:patches,FILENAME:patch-ssh-1,FILENAME:2,EXTENSION:14");// Tokenstring
+ success = success &&
+ CheckURL("http://180.uninett.no/servlet/online.Bransje", // URL
+ "http", // scheme
+ "180.uninett.no", // host
+ "uninett.no", // domain
+ "uninett", // siteowner
+ "no", // tld
+ "no", // maintld
+ "europe", // tldregion
+ "", // port
+ "/servlet/online.Bransje", // path
+ 2, // pathdepth
+ "online.Bransje", // filename
+ "Bransje", // extension
+ "", // query
+ "", // params
+ "", // fragment
+ "", // address
+ "SCHEME:http,HOST:180,DOMAIN:uninett,MAINTLD:no,PATH:servlet,FILENAME:online,EXTENSION:Bransje");// Tokenstring
+ success = success &&
+ CheckURL("Bilder.gif/rule11.GIF", // URL
+ "", // scheme
+ "", // host
+ "", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ "Bilder.gif/rule11.GIF", // path
+ 2, // pathdepth
+ "rule11.GIF", // filename
+ "GIF", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "PATH:Bilder,PATH:gif,FILENAME:rule11,EXTENSION:GIF");// Tokenstring
+ success = success &&
+ CheckURL("bilder/meny/Buer/bue_o.GIF", // URL
+ "", // scheme
+ "", // host
+ "", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ "bilder/meny/Buer/bue_o.GIF", // path
+ 4, // pathdepth
+ "bue_o.GIF", // filename
+ "GIF", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "PATH:bilder,PATH:meny,PATH:Buer,FILENAME:bue_o,EXTENSION:GIF");// Tokenstring
+ success = success &&
+ CheckURL("./fakadm/grafikk/indus_bilde.JPG", // URL
+ "", // scheme
+ "", // host
+ "", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ "./fakadm/grafikk/indus_bilde.JPG", // path
+ 4, // pathdepth
+ "indus_bilde.JPG", // filename
+ "JPG", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "PATH:fakadm,PATH:grafikk,FILENAME:indus_bilde,EXTENSION:JPG");// Tokenstring
+ success = success &&
+ CheckURL("linux-2.0.35.tar.bz2", // URL
+ "", // scheme
+ "", // host
+ "", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ "linux-2.0.35.tar.bz2", // path
+ 1, // pathdepth
+ "linux-2.0.35.tar.bz2", // filename
+ "bz2", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "FILENAME:linux-2,FILENAME:0,FILENAME:35,FILENAME:tar,EXTENSION:bz2");// Tokenstring
+ success = success &&
+ CheckURL("http://www.underdusken.no", // URL
+ "http", // scheme
+ "www.underdusken.no", // host
+ "underdusken.no", // domain
+ "underdusken", // siteowner
+ "no", // tld
+ "no", // maintld
+ "europe", // tldregion
+ "", // port
+ "", // path
+ 0, // pathdepth
+ "", // filename
+ "", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "SCHEME:http,HOST:www,DOMAIN:underdusken,MAINTLD:no");// Tokenstring
+ success = success &&
+ CheckURL("http://www.underdusken.no/?page=dusker/html/0008/Uholdbar.html", // URL
+ "http", // scheme
+ "www.underdusken.no", // host
+ "underdusken.no", // domain
+ "underdusken", // siteowner
+ "no", // tld
+ "no", // maintld
+ "europe", // tldregion
+ "", // port
+ "/", // path
+ 0, // pathdepth
+ "", // filename
+ "", // extension
+ "", // params
+ "page=dusker/html/0008/Uholdbar.html", // query
+ "", // fragment
+ "", // address
+ "SCHEME:http,HOST:www,DOMAIN:underdusken,MAINTLD:no,QUERY:page,QUERY:dusker,QUERY:html,QUERY:0008,QUERY:Uholdbar,QUERY:html");// Tokenstring
+ success = success &&
+ CheckURL("http://www.uni-karlsruhe.de/~ig25/ssh-faq/", // URL
+ "http", // scheme
+ "www.uni-karlsruhe.de", // host
+ "uni-karlsruhe.de", // domain
+ "uni-karlsruhe", // siteowner
+ "de", // tld
+ "de", // maintld
+ "", // tldregion
+ "", // port
+ "/~ig25/ssh-faq/", // path
+ 2, // pathdepth
+ "", // filename
+ "", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "SCHEME:http,HOST:www,DOMAIN:uni-karlsruhe,MAINTLD:de,PATH:ig25,PATH:ssh-faq");// Tokenstring
+ success = success &&
+ CheckURL("java/", // URL
+ "", // scheme
+ "", // host
+ "", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ "java/", // path
+ 1, // pathdepth
+ "", // filename
+ "", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "PATH:java");// Tokenstring
+ success = success &&
+ CheckURL("javascript:OpenWindow('/survey/faq.html', 'Issues', 'width=635,height=400,toolbars=no,location=no,menubar=yes,status=no,resizable=yes,scrollbars=yes", // URL
+ "javascript", // scheme
+ "", // host
+ "", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ "", // path
+ 0, // pathdepth
+ "", // filename
+ "", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "OpenWindow('/survey/faq.html', 'Issues', 'width=635,height=400,toolbars=no,location=no,menubar=yes,status=no,resizable=yes,scrollbars=yes", // address
+ "SCHEME:javascript,ADDRESS:OpenWindow,ADDRESS:survey,ADDRESS:faq,ADDRESS:html,ADDRESS:Issues,ADDRESS:width,ADDRESS:635,ADDRESS:height,ADDRESS:400,ADDRESS:toolbars,ADDRESS:no,ADDRESS:location,ADDRESS:no,ADDRESS:menubar,ADDRESS:yes,ADDRESS:status,ADDRESS:no,ADDRESS:resizable,ADDRESS:yes,ADDRESS:scrollbars,ADDRESS:yes");// Tokenstring
+ success = success &&
+ CheckURL("mailto: dmf-post@medisin.ntnu.no", // URL
+ "mailto", // scheme
+ "", // host
+ "", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ "", // path
+ 0, // pathdepth
+ "", // filename
+ "", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ " dmf-post@medisin.ntnu.no", // address
+ "SCHEME:mailto,ADDRESS:dmf-post,ADDRESS:medisin,ADDRESS:ntnu,ADDRESS:no");// Tokenstring
+ success = success &&
+ CheckURL("mailto:%20Harald%20Danielsen@energy.sintef.no", // URL
+ "mailto", // scheme
+ "", // host
+ "", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ "", // path
+ 0, // pathdepth
+ "", // filename
+ "", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "%20Harald%20Danielsen@energy.sintef.no", // address
+ "SCHEME:mailto,ADDRESS:20Harald,ADDRESS:20Danielsen,ADDRESS:energy,ADDRESS:sintef,ADDRESS:no");// Tokenstring
+ success = success &&
+ CheckURL("www.underdusken.no", // URL
+ "", // scheme
+ "www.underdusken.no", // host
+ "underdusken.no", // domain
+ "underdusken", // siteowner
+ "no", // tld
+ "no", // maintld
+ "europe", // tldregion
+ "", // port
+ "", // path
+ 0, // pathdepth
+ "", // filename
+ "", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "HOST:www,DOMAIN:underdusken,MAINTLD:no");// Tokenstring
+ success = success &&
+ CheckURL("~janie/", // URL
+ "", // scheme
+ "", // host
+ "", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ "~janie/", // path
+ 1, // pathdepth
+ "", // filename
+ "", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "PATH:janie");// Tokenstring
+ success = success &&
+ CheckURL("https://dette.er.en:2020/~janie/index.htm?param1=q&param2=r", // URL
+ "https", // scheme
+ "dette.er.en", // host
+ "er.en", // domain
+ "er", // siteowner
+ "en", // tld
+ "en", // maintld
+ "", // tldregion
+ "2020", // port
+ "/~janie/index.htm", // path
+ 2, // pathdepth
+ "index.htm", // filename
+ "htm", // extension
+ "", // params
+ "param1=q&param2=r", // query
+ "", // fragment
+ "", // address
+ "SCHEME:https,HOST:dette,DOMAIN:er,MAINTLD:en,PORT:2020,PATH:janie,FILENAME:index,EXTENSION:htm,QUERY:param1,QUERY:q,QUERY:param2,QUERY:r");// Tokenstring
+#if 0
+ success = success &&
+ CheckURL("http://www.sony.co.uk/", // URL
+ "http", // scheme
+ "www.sony.co.uk", // host
+ "sony.co.uk", // domain
+ "sony", // siteowner
+ "co.uk", // tld
+ "uk", // maintld
+ "unitedkingdom", // tldregion
+ "", // port
+ "/", // path
+ 0, // pathdepth
+ "", // filename
+ "", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "SCHEME:http,HOST:www,DOMAIN:sony,DOMAIN:co,MAINTLD:uk");// Tokenstring
+ success = success &&
+ CheckURL("http://sony.co.uk/", // URL
+ "http", // scheme
+ "sony.co.uk", // host
+ "sony.co.uk", // domain
+ "sony", // siteowner
+ "co.uk", // tld
+ "uk", // maintld
+ "unitedkingdom", // tldregion
+ "", // port
+ "/", // path
+ 0, // pathdepth
+ "", // filename
+ "", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "SCHEME:http,DOMAIN:sony,DOMAIN:co,MAINTLD:uk");// Tokenstring
+#endif
+ // Test fixes for bugs reported in cvs commit:
+ // tegge 2000/10/27 22:42:59 CEST
+ success = success &&
+ CheckURL("http://somehost.somedomain/this!is!it/boom", // URL
+ "http", // scheme
+ "somehost.somedomain", // host
+ "somehost.somedomain", // domain
+ "somehost", // siteowner
+ "somedomain", // tld
+ "somedomain", // maintld
+ "", // tldregion
+ "", // port
+ "/this!is!it/boom", // path
+ 2, // pathdepth
+ "boom", // filename
+ "", // extension
+ "", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "SCHEME:http,DOMAIN:somehost,MAINTLD:somedomain,PATH:this,PATH:is,PATH:it,FILENAME:boom");// Tokenstring
+ success = success &&
+ CheckURL("http://test.com/index.htm?p1=q%20test&p2=r%10d", // URL
+ "http", // scheme
+ "test.com", // host
+ "test.com", // domain
+ "test", // siteowner
+ "com", // tld
+ "com", // maintld
+ "northamerica", // tldregion
+ "", // port
+ "/index.htm", // path
+ 1, // pathdepth
+ "index.htm", // filename
+ "htm", // extension
+ "", // params
+ "p1=q%20test&p2=r%10d", // query
+ "", // fragment
+ "", // address
+ "SCHEME:http,DOMAIN:test,MAINTLD:com,FILENAME:index,EXTENSION:htm,QUERY:p1,QUERY:q,QUERY:20test,QUERY:p2,QUERY:r,QUERY:10d");// Tokenstring
+
+ // Test bugs found 2001/06/25
+ success = success &&
+ CheckURL("http://arthur/qm/images/qm1.gif", // URL
+ "http", // scheme
+ "arthur", // host
+ "arthur", // domain
+ "", // siteowner
+ "", // tld
+ "", // maintld
+ "", // tldregion
+ "", // port
+ "/qm/images/qm1.gif", // path
+ 3, // pathdepth
+ "qm1.gif", // filename
+ "gif", // extension
+ "", // params
+ "", // query
+ "", // address
+ "", // fragment
+ "SCHEME:http,MAINTLD:arthur,PATH:qm,PATH:images,FILENAME:qm1,EXTENSION:gif");// Tokenstring
+
+ // Test Orjan's hypothesis 2003/02/17
+ success = success &&
+ CheckURL("http://foo.com/ui;.gif", // URL
+ "http", // scheme
+ "foo.com", // host
+ "foo.com", // domain
+ "foo", // siteowner
+ "com", // tld
+ "com", // maintld
+ "northamerica", // tldregion
+ "", // port
+ "/ui;.gif", // path
+ 1, // pathdepth
+ "ui", // filename
+ "", // extension
+ ".gif", // params
+ "", // query
+ "", // address
+ "", // fragment
+ "SCHEME:http,DOMAIN:foo,MAINTLD:com,FILENAME:ui,PARAMS:gif");// Tokenstring
+
+ // Test Orjan's hypothesis 2003/02/17
+ success = success &&
+ CheckURL("http://foo.com/ui;.gif", // URL
+ "http", // scheme
+ "foo.com", // host
+ "foo.com", // domain
+ "foo", // siteowner
+ "com", // tld
+ "com", // maintld
+ "northamerica", // tldregion
+ "", // port
+ "/ui;.gif", // path
+ 1, // pathdepth
+ "ui", // filename
+ "", // extension
+ ".gif", // params
+ "", // query
+ "", // address
+ "", // fragment
+ "SCHEME:http,DOMAIN:foo,MAINTLD:com,FILENAME:ui,PARAMS:gif");// Tokenstring
+
+ // Verify params handling
+ success = success &&
+ CheckURL("http://foo.com/ui;par1=1/par2=2", // URL
+ "http", // scheme
+ "foo.com", // host
+ "foo.com", // domain
+ "foo", // siteowner
+ "com", // tld
+ "com", // maintld
+ "northamerica", // tldregion
+ "", // port
+ "/ui;par1=1/par2=2", // path
+ 1, // pathdepth
+ "ui", // filename
+ "", // extension
+ "par1=1/par2=2", // params
+ "", // query
+ "", // fragment
+ "", // address
+ "SCHEME:http,DOMAIN:foo,MAINTLD:com,FILENAME:ui,PARAMS:par1,PARAMS:1,PARAMS:par2,PARAMS:2");// Tokenstring
+
+ // Verify synthetic url
+ success = success &&
+ CheckURL("http://www.foo.no:8080/path/filename.ext;par1=hello/par2=world?query=test#fragment", // URL
+ "http", // scheme
+ "www.foo.no", // host
+ "foo.no", // domain
+ "foo", // siteowner
+ "no", // tld
+ "no", // maintld
+ "europe", // tldregion
+ "8080", // port
+ "/path/filename.ext;par1=hello/par2=world", // path
+ 2, // pathdepth
+ "filename.ext", // filename
+ "ext", // extension
+ "par1=hello/par2=world", // params
+ "query=test", // query
+ "fragment", // fragment
+ "", // address
+ "SCHEME:http,HOST:www,DOMAIN:foo,MAINTLD:no,PORT:8080,PATH:path,FILENAME:filename,EXTENSION:ext,PARAMS:par1,PARAMS:hello,PARAMS:par2,PARAMS:world,QUERY:query,QUERY:test,FRAGMENT:fragment");// Tokenstring
+
+ // '&' should be allowed in path according to RFC 1738, 2068 og 2396
+ success = success &&
+ CheckURL("http://canonsarang.com/zboard/data/gallery04/HU&BANG.jpg", // URL
+ "http", // scheme
+ "canonsarang.com", // host
+ "canonsarang.com", // domain
+ "canonsarang", // siteowner
+ "com", // tld
+ "com", // maintld
+ "northamerica", // tldregion
+ "", // port
+ "/zboard/data/gallery04/HU&BANG.jpg", // path
+ 4, // pathdepth
+ "HU&BANG.jpg", // filename
+ "jpg", // extension
+ "", // params
+ "", // query
+ "", // address
+ "", // fragment
+ "SCHEME:http,DOMAIN:canonsarang,MAINTLD:com,PATH:zboard,PATH:data,PATH:gallery04,FILENAME:HU,FILENAME:BANG,EXTENSION:jpg");// Tokenstring
+
+ return !success;
+}
diff --git a/searchlib/src/tests/util/.gitignore b/searchlib/src/tests/util/.gitignore
new file mode 100644
index 00000000000..a0da2dd3333
--- /dev/null
+++ b/searchlib/src/tests/util/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+*_test
+searchlib_rawbuf_test_app
diff --git a/searchlib/src/tests/util/CMakeLists.txt b/searchlib/src/tests/util/CMakeLists.txt
new file mode 100644
index 00000000000..43fec57c182
--- /dev/null
+++ b/searchlib/src/tests/util/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_rawbuf_test_app
+ SOURCES
+ rawbuf_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_rawbuf_test_app COMMAND searchlib_rawbuf_test_app)
diff --git a/searchlib/src/tests/util/bufferwriter/.gitignore b/searchlib/src/tests/util/bufferwriter/.gitignore
new file mode 100644
index 00000000000..171db45593c
--- /dev/null
+++ b/searchlib/src/tests/util/bufferwriter/.gitignore
@@ -0,0 +1,3 @@
+bufferwriter_bm
+searchlib_bufferwriter_test_app
+searchlib_bufferwriter_bm_app
diff --git a/searchlib/src/tests/util/bufferwriter/CMakeLists.txt b/searchlib/src/tests/util/bufferwriter/CMakeLists.txt
new file mode 100644
index 00000000000..a57749ff853
--- /dev/null
+++ b/searchlib/src/tests/util/bufferwriter/CMakeLists.txt
@@ -0,0 +1,16 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_bufferwriter_test_app
+ SOURCES
+ bufferwriter_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_bufferwriter_test_app COMMAND searchlib_bufferwriter_test_app)
+vespa_add_executable(searchlib_bufferwriter_bm_app
+ SOURCES
+ work.cpp
+ bm.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_bufferwriter_bm_app COMMAND searchlib_bufferwriter_bm_app BENCHMARK)
diff --git a/searchlib/src/tests/util/bufferwriter/bm.cpp b/searchlib/src/tests/util/bufferwriter/bm.cpp
new file mode 100644
index 00000000000..228ce4adcdc
--- /dev/null
+++ b/searchlib/src/tests/util/bufferwriter/bm.cpp
@@ -0,0 +1,95 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("bufferwriter_bm");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <iostream>
+#include "work.h"
+#include <vespa/searchlib/util/drainingbufferwriter.h>
+
+using search::DrainingBufferWriter;
+
+double getTime() { return fastos::TimeStamp(fastos::ClockSystem::now()).sec(); }
+
+constexpr size_t million = 1000000;
+
+enum class WorkFuncDispatch
+{
+ DIRECT,
+ LAMBDA,
+ FUNCTOR,
+ FUNCTOR2
+};
+
+
+template <typename T>
+void
+callWork(size_t size, WorkFuncDispatch dispatch)
+{
+ std::vector<T> foo;
+ DrainingBufferWriter writer;
+ foo.resize(size);
+ std::cout << "will write " << size << " elements of size " << sizeof(T) <<
+ std::endl;
+ double before = getTime();
+ switch (dispatch) {
+ case WorkFuncDispatch::DIRECT:
+ work(foo, writer);
+ break;
+ case WorkFuncDispatch::LAMBDA:
+ workLambda(foo, writer);
+ break;
+ case WorkFuncDispatch::FUNCTOR:
+ workFunctor(foo, writer);
+ break;
+ case WorkFuncDispatch::FUNCTOR2:
+ workFunctor2(foo, writer);
+ break;
+ default:
+ abort();
+ }
+ double after = getTime();
+ double delta = (after - before);
+ double writeSpeed = writer.getBytesWritten() / delta;
+ EXPECT_GREATER(writeSpeed, 1000);
+ std::cout << "written is " << writer.getBytesWritten() << std::endl;
+ std::cout << "time used is " << (delta * 1000.0) << " ms" << std::endl;
+ std::cout << "write speed is " << writeSpeed << std::endl;
+}
+
+
+void
+callWorks(WorkFuncDispatch dispatch)
+{
+ callWork<char>(million * 1000, dispatch);
+ callWork<short>(million * 500, dispatch);
+ callWork<int>(million * 250, dispatch);
+ callWork<long>(million * 125, dispatch);
+}
+
+TEST("simple bufferwriter speed test")
+{
+ callWorks(WorkFuncDispatch::DIRECT);
+}
+
+TEST("lambda func bufferwriter speed test")
+{
+ callWorks(WorkFuncDispatch::LAMBDA);
+}
+
+TEST("functor bufferwriter speed test")
+{
+ callWorks(WorkFuncDispatch::FUNCTOR);
+}
+
+TEST("functor2 bufferwriter speed test")
+{
+ callWorks(WorkFuncDispatch::FUNCTOR2);
+}
+
+
+TEST_MAIN()
+{
+ TEST_RUN_ALL();
+}
diff --git a/searchlib/src/tests/util/bufferwriter/bufferwriter_test.cpp b/searchlib/src/tests/util/bufferwriter/bufferwriter_test.cpp
new file mode 100644
index 00000000000..95e4db95e03
--- /dev/null
+++ b/searchlib/src/tests/util/bufferwriter/bufferwriter_test.cpp
@@ -0,0 +1,158 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("bufferwriter_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/searchlib/util/bufferwriter.h>
+#include <vespa/searchlib/util/drainingbufferwriter.h>
+#include <vespa/searchlib/util/rand48.h>
+
+namespace search
+{
+
+namespace
+{
+
+class StoreBufferWriter : public BufferWriter
+{
+ std::vector<char> _buf;
+ std::vector<std::unique_ptr<std::vector<char> > > _bufs;
+ size_t _bytesWritten;
+ uint32_t _incompleteBuffers;
+public:
+ static constexpr size_t BUFFER_SIZE = 262144;
+
+ StoreBufferWriter();
+
+ virtual ~StoreBufferWriter() = default;
+
+ virtual void flush() override;
+
+ size_t getBytesWritten() const { return _bytesWritten; }
+
+ std::vector<char> getSingleBuffer() const;
+
+};
+
+
+StoreBufferWriter::StoreBufferWriter()
+ : BufferWriter(),
+ _buf(),
+ _bytesWritten(0),
+ _incompleteBuffers(0)
+{
+ _buf.resize(BUFFER_SIZE);
+ setup(&_buf[0], _buf.size());
+}
+
+
+void
+StoreBufferWriter::flush() {
+ assert(_incompleteBuffers == 0); // all previous buffers must have been full
+ size_t nowLen = usedLen();
+ if (nowLen != _buf.size()) {
+ // buffer is not full, only allowed for last buffer
+ ++_incompleteBuffers;
+ }
+ if (nowLen == 0) {
+ return; // empty buffer
+ }
+ _bufs.emplace_back(std::make_unique<std::vector<char>>());
+ _bufs.back()->resize(BUFFER_SIZE);
+ _buf.resize(nowLen);
+ _bufs.back()->swap(_buf);
+ _bytesWritten += nowLen;
+ setup(&_buf[0], _buf.size());
+}
+
+
+std::vector<char>
+StoreBufferWriter::getSingleBuffer() const
+{
+ std::vector<char> res;
+ size_t needSize = 0;
+ for (const auto &buf : _bufs) {
+ needSize += buf->size();
+ }
+ res.reserve(needSize);
+ for (const auto &buf : _bufs) {
+ res.insert(res.end(), buf->cbegin(), buf->cend());
+ }
+ return std::move(res);
+}
+
+}
+
+
+TEST("Test that bufferwriter works with no writes")
+{
+ DrainingBufferWriter writer;
+ writer.flush();
+ EXPECT_EQUAL(0u, writer.getBytesWritten());
+}
+
+TEST("Test that bufferwriter works with single byte write")
+{
+ DrainingBufferWriter writer;
+ char a = 4;
+ writer.write(&a, sizeof(a));
+ writer.flush();
+ EXPECT_EQUAL(1u, writer.getBytesWritten());
+}
+
+TEST("Test that bufferwriter works with multiple writes")
+{
+ DrainingBufferWriter writer;
+ char a = 4;
+ int16_t b = 5;
+ int32_t c = 6;
+ writer.write(&a, sizeof(a));
+ writer.write(&b, sizeof(b));
+ writer.write(&c, sizeof(c));
+ writer.flush();
+ EXPECT_EQUAL(7u, writer.getBytesWritten());
+}
+
+
+TEST("Test that bufferwriter works with long writes")
+{
+ std::vector<char> a;
+ const size_t mysize = 10000000;
+ const size_t drainerBufferSize = DrainingBufferWriter::BUFFER_SIZE;
+ EXPECT_GREATER(mysize, drainerBufferSize);
+ a.resize(mysize);
+ DrainingBufferWriter writer;
+ writer.write(&a[0], a.size());
+ writer.flush();
+ EXPECT_EQUAL(a.size(), writer.getBytesWritten());
+}
+
+
+TEST("Test that bufferwriter passes on written data")
+{
+ std::vector<int> a;
+ const size_t mysize = 25000000;
+ const size_t drainerBufferSize = DrainingBufferWriter::BUFFER_SIZE;
+ EXPECT_GREATER(mysize * sizeof(int), drainerBufferSize);
+ a.reserve(mysize);
+ search::Rand48 rnd;
+ for (uint32_t i = 0; i < mysize; ++i) {
+ a.emplace_back(rnd.lrand48());
+ }
+ StoreBufferWriter writer;
+ writer.write(&a[0], a.size() * sizeof(int));
+ writer.flush();
+ EXPECT_EQUAL(a.size() * sizeof(int), writer.getBytesWritten());
+ std::vector<char> written = writer.getSingleBuffer();
+ EXPECT_EQUAL(a.size() * sizeof(int), written.size());
+ EXPECT_TRUE(memcmp(&a[0], &written[0], written.size()) == 0);
+}
+
+}
+
+TEST_MAIN()
+{
+ TEST_RUN_ALL();
+}
diff --git a/searchlib/src/tests/util/bufferwriter/work.cpp b/searchlib/src/tests/util/bufferwriter/work.cpp
new file mode 100644
index 00000000000..9835b28970e
--- /dev/null
+++ b/searchlib/src/tests/util/bufferwriter/work.cpp
@@ -0,0 +1,93 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "work.h"
+#include <vespa/searchlib/util/bufferwriter.h>
+
+namespace search
+{
+
+template <class T>
+class WriteFunctor
+{
+ BufferWriter &_writer;
+public:
+ WriteFunctor(BufferWriter &writer)
+ : _writer(writer)
+ {
+ }
+
+ void operator()(const T &val) { _writer.write(&val, sizeof(val)); }
+};
+
+template <class T>
+class WriteFunctor2
+{
+ BufferWriter &_writer;
+public:
+ WriteFunctor2(BufferWriter &writer)
+ : _writer(writer)
+ {
+ }
+
+ void operator()(const T &val) __attribute((noinline))
+ { _writer.write(&val, sizeof(val)); }
+};
+
+template <class T, class Func>
+void workLoop(const std::vector<T> &v, Func &&func)
+{
+ for (const auto &val : v) {
+ func(val);
+ }
+}
+
+template <class T>
+void work(const std::vector<T> &v, BufferWriter &writer)
+{
+ for (const auto &val : v) {
+ writer.write(&val, sizeof(val));
+ }
+ writer.flush();
+}
+
+template <class T>
+void workLambda(const std::vector<T> &v, BufferWriter &writer)
+{
+ workLoop<T>(v,
+ [&writer](const T &val) { writer.write(&val, sizeof(val)); });
+ writer.flush();
+}
+
+template <class T>
+void workFunctor(const std::vector<T> &v, BufferWriter &writer)
+{
+ workLoop<T>(v, WriteFunctor<T>(writer));
+ writer.flush();
+}
+
+template <class T>
+void workFunctor2(const std::vector<T> &v, BufferWriter &writer)
+{
+ workLoop<T>(v, WriteFunctor2<T>(writer));
+ writer.flush();
+}
+
+template void work(const std::vector<char> &v, BufferWriter &writer);
+template void work(const std::vector<short> &v, BufferWriter &writer);
+template void work(const std::vector<int> &v, BufferWriter &writer);
+template void work(const std::vector<long> &v, BufferWriter &writer);
+template void workLambda(const std::vector<char> &v, BufferWriter &writer);
+template void workLambda(const std::vector<short> &v, BufferWriter &writer);
+template void workLambda(const std::vector<int> &v, BufferWriter &writer);
+template void workLambda(const std::vector<long> &v, BufferWriter &writer);
+template void workFunctor(const std::vector<char> &v, BufferWriter &writer);
+template void workFunctor(const std::vector<short> &v, BufferWriter &writer);
+template void workFunctor(const std::vector<int> &v, BufferWriter &writer);
+template void workFunctor(const std::vector<long> &v, BufferWriter &writer);
+template void workFunctor2(const std::vector<char> &v, BufferWriter &writer);
+template void workFunctor2(const std::vector<short> &v, BufferWriter &writer);
+template void workFunctor2(const std::vector<int> &v, BufferWriter &writer);
+template void workFunctor2(const std::vector<long> &v, BufferWriter &writer);
+
+} // namespace search
diff --git a/searchlib/src/tests/util/bufferwriter/work.h b/searchlib/src/tests/util/bufferwriter/work.h
new file mode 100644
index 00000000000..49a1cb8017d
--- /dev/null
+++ b/searchlib/src/tests/util/bufferwriter/work.h
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+namespace search {
+
+class BufferWriter;
+
+template <class T>
+using WorkFunc = void (*)(const std::vector<T> &v, BufferWriter &writer);
+template <class T>
+void work(const std::vector<T> &v, BufferWriter &writer);
+template <class T>
+void workLambda(const std::vector<T> &v, BufferWriter &writer);
+template <class T>
+void workFunctor(const std::vector<T> &v, BufferWriter &writer);
+template <class T>
+void workFunctor2(const std::vector<T> &v, BufferWriter &writer);
+
+} // namespace search
+
diff --git a/searchlib/src/tests/util/ioerrorhandler/.gitignore b/searchlib/src/tests/util/ioerrorhandler/.gitignore
new file mode 100644
index 00000000000..2f5c2e77191
--- /dev/null
+++ b/searchlib/src/tests/util/ioerrorhandler/.gitignore
@@ -0,0 +1 @@
+searchlib_ioerrorhandler_test_app
diff --git a/searchlib/src/tests/util/ioerrorhandler/CMakeLists.txt b/searchlib/src/tests/util/ioerrorhandler/CMakeLists.txt
new file mode 100644
index 00000000000..92d6ab30d72
--- /dev/null
+++ b/searchlib/src/tests/util/ioerrorhandler/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_ioerrorhandler_test_app
+ SOURCES
+ ioerrorhandler_test.cpp
+ DEPENDS
+ searchlib_test
+ searchlib
+)
+vespa_add_test(NAME searchlib_ioerrorhandler_test_app COMMAND searchlib_ioerrorhandler_test_app)
diff --git a/searchlib/src/tests/util/ioerrorhandler/DESC b/searchlib/src/tests/util/ioerrorhandler/DESC
new file mode 100644
index 00000000000..3328798b936
--- /dev/null
+++ b/searchlib/src/tests/util/ioerrorhandler/DESC
@@ -0,0 +1 @@
+IOErrorHandler test. Take a look at ioerrorhandler_test.cpp for details.
diff --git a/searchlib/src/tests/util/ioerrorhandler/FILES b/searchlib/src/tests/util/ioerrorhandler/FILES
new file mode 100644
index 00000000000..2447a10e991
--- /dev/null
+++ b/searchlib/src/tests/util/ioerrorhandler/FILES
@@ -0,0 +1 @@
+ioerrorhandler_test.cpp
diff --git a/searchlib/src/tests/util/ioerrorhandler/ioerrorhandler_test.cpp b/searchlib/src/tests/util/ioerrorhandler/ioerrorhandler_test.cpp
new file mode 100644
index 00000000000..bcfaa5f8ac8
--- /dev/null
+++ b/searchlib/src/tests/util/ioerrorhandler/ioerrorhandler_test.cpp
@@ -0,0 +1,358 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("ioerrorhandler_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/searchlib/util/statefile.h>
+#include <vespa/searchlib/util/ioerrorhandler.h>
+#include <atomic>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <setjmp.h>
+#include <dlfcn.h>
+#include <vespa/searchlib/test/statefile.h>
+#include <vespa/searchlib/test/statestring.h>
+
+extern "C"
+{
+
+ssize_t read(int fd, void *buf, size_t count);
+ssize_t write(int fd, const void *buf, size_t count);
+ssize_t pread(int fd, void *buf, size_t count, off_t offset);
+ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset);
+
+
+}
+
+using ReadFunc = ssize_t (*)(int fd, void *buf, size_t count);
+using WriteFunc = ssize_t (*)(int fd, const void *buf, size_t count);
+using PreadFunc = ssize_t (*)(int fd, void *buf, size_t count, off_t offset);
+using PwriteFunc = ssize_t (*)(int fd, const void *buf, size_t count,
+ off_t offset);
+
+using namespace search::test::statefile;
+using namespace search::test::statestring;
+
+namespace
+{
+
+ReadFunc libc_read;
+WriteFunc libc_write;
+PreadFunc libc_pread;
+PwriteFunc libc_pwrite;
+
+}
+
+int injectErrno;
+std::atomic<int> injectreadErrnoTrigger;
+std::atomic<int> injectpreadErrnoTrigger;
+std::atomic<int> injectwriteErrnoTrigger;
+std::atomic<int> injectpwriteErrnoTrigger;
+
+ssize_t read(int fd, void *buf, size_t count)
+{
+ if (--injectreadErrnoTrigger == 0) {
+ errno = injectErrno;
+ return -1;
+ }
+ if (!libc_read) {
+ libc_read = reinterpret_cast<ReadFunc>(dlsym(RTLD_NEXT, "read"));
+ }
+ return libc_read(fd, buf, count);
+}
+
+ssize_t write(int fd, const void *buf, size_t count)
+{
+ if (--injectwriteErrnoTrigger == 0) {
+ errno = injectErrno;
+ return -1;
+ }
+ if (!libc_write) {
+ libc_write = reinterpret_cast<WriteFunc>(dlsym(RTLD_NEXT, "write"));
+ }
+ return libc_write(fd, buf, count);
+}
+
+ssize_t pread(int fd, void *buf, size_t count, off_t offset)
+{
+ if (--injectpreadErrnoTrigger == 0) {
+ errno = injectErrno;
+ return -1;
+ }
+ if (!libc_pread) {
+ libc_pread = reinterpret_cast<PreadFunc>(dlsym(RTLD_NEXT, "pread"));
+ }
+ return libc_pread(fd, buf, count, offset);
+}
+
+
+ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset)
+{
+ if (--injectpwriteErrnoTrigger == 0) {
+ errno = injectErrno;
+ return -1;
+ }
+ if (!libc_pwrite) {
+ libc_pwrite = reinterpret_cast<PwriteFunc>(dlsym(RTLD_NEXT, "pwrite"));
+ }
+ return libc_pwrite(fd, buf, count, offset);
+}
+
+
+
+namespace search
+{
+
+const char *testStringBase = "This is a test\n";
+
+using strvec = std::vector<vespalib::string>;
+
+namespace
+{
+
+bool
+assertHistory(std::vector<vespalib::string> &exp,
+ std::vector<vespalib::string> &act)
+{
+ if (!EXPECT_EQUAL(exp.size(), act.size())) {
+ return false;
+ }
+ for (size_t i = 0; i < exp.size(); ++i) {
+ if (!EXPECT_EQUAL(exp[i], act[i])) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+class Fixture
+{
+public:
+
+ std::unique_ptr<StateFile> sf;
+ std::unique_ptr<FastOS_File> file;
+ char buf[8192];
+ char *testString;
+
+ Fixture();
+
+ void openFile();
+
+ void openFileDIO();
+
+ void writeTestString();
+};
+
+
+Fixture::Fixture()
+ : sf(),
+ file()
+{
+ unlink("testfile");
+ StateFile::erase("state");
+ sf.reset(new StateFile("state"));
+ testString = &buf[0];
+ int off = reinterpret_cast<unsigned long>(testString) & 4095;
+ if (off != 0) {
+ testString += 4096 - off;
+ }
+ assert(testString + strlen(testStringBase) < &buf[0] + sizeof(buf));
+ strcpy(testString, testStringBase);
+}
+
+
+void
+Fixture::openFile()
+{
+ file.reset(new FastOS_File);
+ file->OpenReadWrite("testfile");
+}
+
+void
+Fixture::openFileDIO()
+{
+ file.reset(new FastOS_File);
+ file->EnableDirectIO();
+ file->OpenReadWrite("testfile");
+}
+
+void
+Fixture::writeTestString()
+{
+ file->WriteBuf(testString, strlen(testString));
+ file->SetPosition(0);
+}
+
+
+}
+
+
+TEST("Test that ioerror handler can be instantated")
+{
+ StateFile::erase("state");
+ StateFile sf("state");
+ IOErrorHandler ioeh(&sf);
+ EXPECT_FALSE(ioeh.fired());
+}
+
+
+TEST_F("Test that ioerror handler can process read error", Fixture)
+{
+ IOErrorHandler ioeh(f.sf.get());
+ EXPECT_FALSE(ioeh.fired());
+ f.openFile();
+ f.writeTestString();
+ uint64_t fileSize = f.file->GetSize();
+ EXPECT_EQUAL(strlen(f.testString), fileSize);
+ char buf[1024];
+ assert(fileSize <= sizeof(buf));
+ try {
+ f.file->SetPosition(0);
+ injectErrno = EIO;
+ injectreadErrnoTrigger = 1;
+ f.file->ReadBuf(buf, fileSize);
+ LOG(error, "Should never get here");
+ abort();
+ } catch (std::runtime_error &e) {
+ LOG(info, "Caught std::runtime_error exception: %s", e.what());
+ EXPECT_TRUE(strstr(e.what(), "Input/output error") != nullptr);
+ }
+ EXPECT_TRUE(ioeh.fired());
+ {
+ vespalib::string act = readState(*f.sf);
+ normalizeTimestamp(act);
+ vespalib::string exp = "state=down ts=0.0 operation=read "
+ "file=testfile error=5 offset=0 len=15 "
+ "rlen=-1\n";
+ EXPECT_EQUAL(exp, act);
+ }
+ {
+ strvec exp({ "state=down ts=0.0 operation=read "
+ "file=testfile error=5 offset=0 len=15 "
+ "rlen=-1\n"});
+ std::vector<vespalib::string> act(readHistory("state.history"));
+ normalizeTimestamps(act);
+ TEST_DO(assertHistory(exp, act));
+ }
+}
+
+TEST_F("Test that ioerror handler can process pread error", Fixture)
+{
+ IOErrorHandler ioeh(f.sf.get());
+ EXPECT_FALSE(ioeh.fired());
+ f.openFile();
+ f.writeTestString();
+ uint64_t fileSize = f.file->GetSize();
+ EXPECT_EQUAL(strlen(f.testString), fileSize);
+ char buf[1024];
+ assert(fileSize <= sizeof(buf));
+ try {
+ f.file->SetPosition(0);
+ injectErrno = EIO;
+ injectpreadErrnoTrigger = 1;
+ f.file->ReadBuf(buf, fileSize, 0);
+ LOG(error, "Should never get here");
+ abort();
+ } catch (std::runtime_error &e) {
+ LOG(info, "Caught std::runtime_error exception: %s", e.what());
+ EXPECT_TRUE(strstr(e.what(), "Input/output error") != nullptr);
+ }
+ EXPECT_TRUE(ioeh.fired());
+ {
+ vespalib::string act = readState(*f.sf);
+ normalizeTimestamp(act);
+ vespalib::string exp = "state=down ts=0.0 operation=read "
+ "file=testfile error=5 offset=0 len=15 "
+ "rlen=-1\n";
+ EXPECT_EQUAL(exp, act);
+ }
+ {
+ strvec exp({ "state=down ts=0.0 operation=read "
+ "file=testfile error=5 offset=0 len=15 "
+ "rlen=-1\n"});
+ std::vector<vespalib::string> act(readHistory("state.history"));
+ normalizeTimestamps(act);
+ TEST_DO(assertHistory(exp, act));
+ }
+}
+
+TEST_F("Test that ioerror handler can process write error", Fixture)
+{
+ IOErrorHandler ioeh(f.sf.get());
+ EXPECT_FALSE(ioeh.fired());
+ f.openFile();
+ try {
+ injectErrno = EIO;
+ injectwriteErrnoTrigger = 1;
+ f.writeTestString();
+ LOG(error, "Should never get here");
+ abort();
+ } catch (std::runtime_error &e) {
+ LOG(info, "Caught std::runtime_error exception: %s", e.what());
+ EXPECT_TRUE(strstr(e.what(), "Input/output error") != nullptr);
+ }
+ EXPECT_TRUE(ioeh.fired());
+ {
+ vespalib::string act = readState(*f.sf);
+ normalizeTimestamp(act);
+ vespalib::string exp = "state=down ts=0.0 operation=write "
+ "file=testfile error=5 offset=0 len=15 "
+ "rlen=-1\n";
+ EXPECT_EQUAL(exp, act);
+ }
+ {
+ strvec exp({ "state=down ts=0.0 operation=write "
+ "file=testfile error=5 offset=0 len=15 "
+ "rlen=-1\n"});
+ std::vector<vespalib::string> act(readHistory("state.history"));
+ normalizeTimestamps(act);
+ TEST_DO(assertHistory(exp, act));
+ }
+}
+
+
+TEST_F("Test that ioerror handler can process pwrite error", Fixture)
+{
+ IOErrorHandler ioeh(f.sf.get());
+ EXPECT_FALSE(ioeh.fired());
+ f.openFileDIO();
+ try {
+ injectErrno = EIO;
+ injectpwriteErrnoTrigger = 1;
+ f.writeTestString();
+ LOG(error, "Should never get here");
+ abort();
+ } catch (std::runtime_error &e) {
+ LOG(info, "Caught std::runtime_error exception: %s", e.what());
+ EXPECT_TRUE(strstr(e.what(), "Input/output error") != nullptr);
+ }
+ EXPECT_TRUE(ioeh.fired());
+ {
+ vespalib::string act = readState(*f.sf);
+ normalizeTimestamp(act);
+ vespalib::string exp = "state=down ts=0.0 operation=write "
+ "file=testfile error=5 offset=0 len=15 "
+ "rlen=-1\n";
+ EXPECT_EQUAL(exp, act);
+ }
+ {
+ strvec exp({ "state=down ts=0.0 operation=write "
+ "file=testfile error=5 offset=0 len=15 "
+ "rlen=-1\n"});
+ std::vector<vespalib::string> act(readHistory("state.history"));
+ normalizeTimestamps(act);
+ TEST_DO(assertHistory(exp, act));
+ }
+}
+
+}
+
+TEST_MAIN()
+{
+ TEST_RUN_ALL();
+ search::StateFile::erase("state");
+ unlink("testfile");
+}
diff --git a/searchlib/src/tests/util/rawbuf_test.cpp b/searchlib/src/tests/util/rawbuf_test.cpp
new file mode 100644
index 00000000000..e9dc139bda5
--- /dev/null
+++ b/searchlib/src/tests/util/rawbuf_test.cpp
@@ -0,0 +1,198 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for rawbuf.
+
+#include <vespa/log/log.h>
+LOG_SETUP("rawbuf_test");
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using vespalib::string;
+using namespace search;
+
+namespace {
+
+string getString(const RawBuf &buf) {
+ return string(buf.GetDrainPos(), buf.GetUsedLen());
+}
+
+TEST("require that rawbuf can append text") {
+ RawBuf buf(10);
+ buf += "foo";
+ buf += "bar";
+ EXPECT_EQUAL("foobar", getString(buf));
+}
+
+TEST("require that rawbuf expands when appended beyond size") {
+ RawBuf buf(4);
+ buf += "foo";
+ EXPECT_EQUAL(1u, buf.GetFreeLen());
+ buf += "bar";
+ EXPECT_EQUAL(2u, buf.GetFreeLen());
+ EXPECT_EQUAL("foobar", getString(buf));
+}
+
+TEST("require that a rawbuf can be appended to another") {
+ RawBuf buf1(10);
+ RawBuf buf2(10);
+ buf1 += "foo";
+ buf2 += "bar";
+ buf1 += buf2;
+ EXPECT_EQUAL("foobar", getString(buf1));
+}
+
+TEST("require that rawbufs can be tested for equality") {
+ RawBuf buf1(10);
+ RawBuf buf2(10);
+ buf1 += "foo";
+ buf2 += "bar";
+ EXPECT_TRUE(buf1 == buf1);
+ EXPECT_FALSE(buf1 == buf2);
+}
+
+template <typename T>
+void checkAddNum(void (RawBuf::*addNum)(T, size_t, char), size_t num,
+ size_t fieldw, char fill, const string &expected) {
+ RawBuf buf(10);
+ (buf.*addNum)(num, fieldw, fill);
+ EXPECT_EQUAL(expected, getString(buf));
+}
+
+TEST("require that rawbuf can add numbers in decimal") {
+ checkAddNum(&RawBuf::addNum, 0, 4, 'x', "xxx0");
+ checkAddNum(&RawBuf::addNum, 42, 4, '0', "0042");
+ checkAddNum(&RawBuf::addNum, 12345678901234, 4, '0', "12345678901234");
+ checkAddNum(&RawBuf::addNum, -1, 4, '0', "18446744073709551615");
+
+ checkAddNum(&RawBuf::addNum32, 0, 4, 'x', "xxx0");
+ checkAddNum(&RawBuf::addNum32, 42, 4, '0', "0042");
+ checkAddNum(&RawBuf::addNum32, 1234567890, 4, '0', "1234567890");
+ checkAddNum(&RawBuf::addNum32, -1, 0, '0', "-1");
+ checkAddNum(&RawBuf::addNum32, -1, 4, '0', "00-1");
+
+ checkAddNum(&RawBuf::addNum64, 0, 4, 'x', "xxx0");
+ checkAddNum(&RawBuf::addNum64, 42, 4, '0', "0042");
+ checkAddNum(&RawBuf::addNum64, 12345678901234, 4, '0', "12345678901234");
+ checkAddNum(&RawBuf::addNum64, -1, 0, '0', "-1");
+ checkAddNum(&RawBuf::addNum64, -1, 4, '0', "00-1");
+}
+
+TEST("require that rawbuf can add hitrank") {
+ RawBuf buf(10);
+ buf.addHitRank(HitRank(4.2));
+ EXPECT_EQUAL("4.2", getString(buf));
+}
+
+TEST("require that rawbuf can add signedhitrank") {
+ RawBuf buf(10);
+ buf.addHitRank(SignedHitRank(-4.213));
+ EXPECT_EQUAL("-4.213", getString(buf));
+}
+
+TEST("require that rawbuf can append data of known length") {
+ RawBuf buf(10);
+ const string data("foo bar baz qux quux");
+ buf.append(data.data(), data.size());
+ EXPECT_EQUAL(data, getString(buf));
+}
+
+TEST("require that rawbuf can be truncated shorter and longer") {
+ RawBuf buf(10);
+ buf += "foobarbaz";
+ buf.truncate(3);
+ buf += "qux";
+ buf.truncate(9);
+ EXPECT_EQUAL("fooquxbaz", getString(buf));
+}
+
+TEST("require that prealloc makes enough room") {
+ RawBuf buf(10);
+ buf += "foo";
+ EXPECT_EQUAL(7u, buf.GetFreeLen());
+ buf.preAlloc(100);
+ EXPECT_EQUAL("foo", getString(buf));
+ EXPECT_LESS_EQUAL(100u, buf.GetFreeLen());
+}
+
+TEST("require that rawbuf can read from file") {
+ FastOS_File file("mytemporaryfile");
+ file.OpenReadWrite();
+ file.Write2("barbaz", 6);
+ file.SetPosition(0);
+
+ RawBuf buf(10);
+ buf += "foo";
+ buf.readFile(file, 3);
+ EXPECT_EQUAL("foobar", getString(buf));
+ buf.readFile(file, 100);
+ EXPECT_EQUAL("foobarbaz", getString(buf));
+
+ file.Close();
+ file.Delete();
+}
+
+TEST("require that compact discards drained data") {
+ RawBuf buf(10);
+ buf += "foobar";
+ buf.Drain(3);
+ buf.Compact();
+ buf.Fill(3);
+ EXPECT_EQUAL("barbar", getString(buf));
+}
+
+TEST("require that reusing a buffer that has grown 4x will alloc new buffer") {
+ RawBuf buf(10);
+ buf.preAlloc(100);
+ EXPECT_LESS_EQUAL(100u, buf.GetFreeLen());
+ buf.Reuse();
+ EXPECT_EQUAL(10u, buf.GetFreeLen());
+}
+
+TEST("require that various length and position information can be found.") {
+ RawBuf buf(30);
+ buf += "foo bar baz qux quux corge";
+ buf.Drain(7);
+ EXPECT_EQUAL(7u, buf.GetDrainLen());
+ EXPECT_EQUAL(19u, buf.GetUsedLen());
+ EXPECT_EQUAL(26u, buf.GetUsedAndDrainLen());
+ EXPECT_EQUAL(4u, buf.GetFreeLen());
+}
+
+TEST("require that rawbuf can 'putToInet' 16-bit numbers") {
+ RawBuf buf(1);
+ buf.Put16ToInet(0x1234);
+ EXPECT_EQUAL(2, buf.GetFillPos() - buf.GetDrainPos());
+ EXPECT_EQUAL(0x12, (int) buf.GetDrainPos()[0] & 0xff);
+ EXPECT_EQUAL(0x34, (int) buf.GetDrainPos()[1] & 0xff);
+}
+
+TEST("require that rawbuf can 'putToInet' 32-bit numbers") {
+ RawBuf buf(1);
+ buf.PutToInet(0x12345678);
+ EXPECT_EQUAL(4, buf.GetFillPos() - buf.GetDrainPos());
+ EXPECT_EQUAL(0x12, (int) buf.GetDrainPos()[0] & 0xff);
+ EXPECT_EQUAL(0x34, (int) buf.GetDrainPos()[1] & 0xff);
+ EXPECT_EQUAL(0x56, (int) buf.GetDrainPos()[2] & 0xff);
+ EXPECT_EQUAL(0x78, (int) buf.GetDrainPos()[3] & 0xff);
+}
+
+TEST("require that rawbuf can 'putToInet' 64-bit numbers") {
+ RawBuf buf(1);
+ buf.Put64ToInet(0x123456789abcdef0ULL);
+ EXPECT_EQUAL(8, buf.GetFillPos() - buf.GetDrainPos());
+ EXPECT_EQUAL(0x12, (int) buf.GetDrainPos()[0] & 0xff);
+ EXPECT_EQUAL(0x34, (int) buf.GetDrainPos()[1] & 0xff);
+ EXPECT_EQUAL(0x56, (int) buf.GetDrainPos()[2] & 0xff);
+ EXPECT_EQUAL(0x78, (int) buf.GetDrainPos()[3] & 0xff);
+ EXPECT_EQUAL(0x9a, (int) buf.GetDrainPos()[4] & 0xff);
+ EXPECT_EQUAL(0xbc, (int) buf.GetDrainPos()[5] & 0xff);
+ EXPECT_EQUAL(0xde, (int) buf.GetDrainPos()[6] & 0xff);
+ EXPECT_EQUAL(0xf0, (int) buf.GetDrainPos()[7] & 0xff);
+}
+
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/util/searchable_stats/.gitignore b/searchlib/src/tests/util/searchable_stats/.gitignore
new file mode 100644
index 00000000000..08720bdefb5
--- /dev/null
+++ b/searchlib/src/tests/util/searchable_stats/.gitignore
@@ -0,0 +1,4 @@
+/.depend
+/Makefile
+/*_test
+searchlib_searchable_stats_test_app
diff --git a/searchlib/src/tests/util/searchable_stats/CMakeLists.txt b/searchlib/src/tests/util/searchable_stats/CMakeLists.txt
new file mode 100644
index 00000000000..3bc0fbe9c6b
--- /dev/null
+++ b/searchlib/src/tests/util/searchable_stats/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_searchable_stats_test_app
+ SOURCES
+ searchable_stats_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_searchable_stats_test_app COMMAND searchlib_searchable_stats_test_app)
diff --git a/searchlib/src/tests/util/searchable_stats/DESC b/searchlib/src/tests/util/searchable_stats/DESC
new file mode 100644
index 00000000000..b8127b278f8
--- /dev/null
+++ b/searchlib/src/tests/util/searchable_stats/DESC
@@ -0,0 +1 @@
+searchable_stats test. Take a look at searchable_stats_test.cpp for details.
diff --git a/searchlib/src/tests/util/searchable_stats/FILES b/searchlib/src/tests/util/searchable_stats/FILES
new file mode 100644
index 00000000000..94e6ce7f4df
--- /dev/null
+++ b/searchlib/src/tests/util/searchable_stats/FILES
@@ -0,0 +1 @@
+searchable_stats_test.cpp
diff --git a/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp b/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp
new file mode 100644
index 00000000000..83aba794824
--- /dev/null
+++ b/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("searchable_stats_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/util/searchable_stats.h>
+
+using namespace search;
+
+class Test : public vespalib::TestApp {
+public:
+ int Main();
+};
+
+int
+Test::Main()
+{
+ TEST_INIT("searchable_stats_test");
+ {
+ SearchableStats stats;
+ EXPECT_EQUAL(0u, stats.memoryUsage());
+ EXPECT_EQUAL(0u, stats.docsInMemory());
+ EXPECT_EQUAL(0u, stats.sizeOnDisk());
+ {
+ SearchableStats rhs;
+ EXPECT_EQUAL(&rhs.memoryUsage(100), &rhs);
+ EXPECT_EQUAL(&rhs.docsInMemory(10), &rhs);
+ EXPECT_EQUAL(&rhs.sizeOnDisk(1000), &rhs);
+ EXPECT_EQUAL(&stats.add(rhs), &stats);
+ }
+ EXPECT_EQUAL(100u, stats.memoryUsage());
+ EXPECT_EQUAL(10u, stats.docsInMemory());
+ EXPECT_EQUAL(1000u, stats.sizeOnDisk());
+ EXPECT_EQUAL(&stats.add(SearchableStats().memoryUsage(100).docsInMemory(10).sizeOnDisk(1000)), &stats);
+ EXPECT_EQUAL(200u, stats.memoryUsage());
+ EXPECT_EQUAL(20u, stats.docsInMemory());
+ EXPECT_EQUAL(2000u, stats.sizeOnDisk());
+ }
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/util/sigbushandler/.gitignore b/searchlib/src/tests/util/sigbushandler/.gitignore
new file mode 100644
index 00000000000..ab5a59f3296
--- /dev/null
+++ b/searchlib/src/tests/util/sigbushandler/.gitignore
@@ -0,0 +1 @@
+searchlib_sigbushandler_test_app
diff --git a/searchlib/src/tests/util/sigbushandler/CMakeLists.txt b/searchlib/src/tests/util/sigbushandler/CMakeLists.txt
new file mode 100644
index 00000000000..a55e1e354c7
--- /dev/null
+++ b/searchlib/src/tests/util/sigbushandler/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_sigbushandler_test_app
+ SOURCES
+ sigbushandler_test.cpp
+ DEPENDS
+ searchlib_test
+ searchlib
+)
+vespa_add_test(NAME searchlib_sigbushandler_test_app COMMAND searchlib_sigbushandler_test_app)
diff --git a/searchlib/src/tests/util/sigbushandler/DESC b/searchlib/src/tests/util/sigbushandler/DESC
new file mode 100644
index 00000000000..03ee94c85ba
--- /dev/null
+++ b/searchlib/src/tests/util/sigbushandler/DESC
@@ -0,0 +1 @@
+SigBusHandler test. Take a look at sigbushandler_test.cpp for details.
diff --git a/searchlib/src/tests/util/sigbushandler/FILES b/searchlib/src/tests/util/sigbushandler/FILES
new file mode 100644
index 00000000000..0998cd8a784
--- /dev/null
+++ b/searchlib/src/tests/util/sigbushandler/FILES
@@ -0,0 +1 @@
+sigbushandler_test.cpp
diff --git a/searchlib/src/tests/util/sigbushandler/sigbushandler_test.cpp b/searchlib/src/tests/util/sigbushandler/sigbushandler_test.cpp
new file mode 100644
index 00000000000..af657420575
--- /dev/null
+++ b/searchlib/src/tests/util/sigbushandler/sigbushandler_test.cpp
@@ -0,0 +1,131 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("sigbushandler_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/searchlib/util/statefile.h>
+#include <vespa/searchlib/util/sigbushandler.h>
+#include <atomic>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vespa/searchlib/test/statefile.h>
+#include <vespa/searchlib/test/statestring.h>
+
+using namespace search::test::statefile;
+using namespace search::test::statestring;
+
+namespace search
+{
+
+using strvec = std::vector<vespalib::string>;
+
+namespace
+{
+
+bool
+assertHistory(std::vector<vespalib::string> &exp,
+ std::vector<vespalib::string> &act)
+{
+ if (!EXPECT_EQUAL(exp.size(), act.size())) {
+ return false;
+ }
+ for (size_t i = 0; i < exp.size(); ++i) {
+ if (!EXPECT_EQUAL(exp[i], act[i])) {
+ return false;
+ }
+ }
+ return true;
+}
+
+}
+
+
+TEST("Test that sigbus handler can be instantated")
+{
+ StateFile::erase("state");
+ StateFile sf("state");
+ SigBusHandler sbh(&sf);
+ EXPECT_FALSE(sbh.fired());
+}
+
+
+TEST("Test that sigbus handler can trap synthetic sigbus")
+{
+ StateFile::erase("state");
+ StateFile sf("state");
+ SigBusHandler sbh(&sf);
+ EXPECT_FALSE(sbh.fired());
+ sigjmp_buf sjb;
+ if (sigsetjmp(sjb, 1) == 0) {
+ sbh.setUnwind(&sjb);
+ kill(getpid(), SIGBUS);
+ LOG(error, "Should never get here");
+ abort();
+ }
+ EXPECT_TRUE(sbh.fired());
+ {
+ vespalib::string act = readState(sf);
+ normalizeTimestamp(act);
+ EXPECT_EQUAL("state=down ts=0.0 operation=sigbus errno=0 code=0\n",
+ act);
+ }
+ {
+ strvec exp({"state=down ts=0.0 operation=sigbus errno=0 code=0\n" });
+ std::vector<vespalib::string> act(readHistory("state.history"));
+ normalizeTimestamps(act);
+ TEST_DO(assertHistory(exp, act));
+ }
+}
+
+TEST("Test that sigbus handler can trap normal sigbus")
+{
+ StateFile::erase("state");
+ StateFile sf("state");
+ SigBusHandler sbh(&sf);
+ EXPECT_FALSE(sbh.fired());
+
+ int fd = open("mmapfile", O_CREAT | O_TRUNC | O_RDWR, 0644);
+ assert(fd >= 0);
+ void *mmapres = mmap(nullptr, 4096, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ assert(mmapres != nullptr);
+ assert(mmapres != reinterpret_cast<void *>(-1l));
+ char *p = reinterpret_cast<char *>(mmapres) + 42;
+ volatile char r = 0;
+ sigjmp_buf sjb;
+ if (sigsetjmp(sjb, 1) == 0) {
+ sbh.setUnwind(&sjb);
+ r = *p;
+ LOG(error, "Should never get here");
+ abort();
+ }
+ EXPECT_TRUE(sbh.fired());
+ EXPECT_TRUE(r == '\0');
+ {
+ vespalib::string act = readState(sf);
+ vespalib::string exp ="state=down ts=0.0 operation=sigbus errno=0 "
+ "code=2 addr=0x0000000000000000\n";
+ normalizeAddr(exp, p);
+ normalizeTimestamp(act);
+ EXPECT_EQUAL(exp, act);
+ }
+ {
+ strvec exp({"state=down ts=0.0 operation=sigbus errno=0 code=2 "
+ "addr=0x0000000000000000\n" });
+ normalizeAddrs(exp, p);
+ std::vector<vespalib::string> act(readHistory("state.history"));
+ normalizeTimestamps(act);
+ TEST_DO(assertHistory(exp, act));
+ }
+}
+
+}
+
+TEST_MAIN()
+{
+ TEST_RUN_ALL();
+ search::StateFile::erase("state");
+ unlink("mmapfile");
+}
diff --git a/searchlib/src/tests/util/slime_output_raw_buf_adapter/.gitignore b/searchlib/src/tests/util/slime_output_raw_buf_adapter/.gitignore
new file mode 100644
index 00000000000..51a916d8333
--- /dev/null
+++ b/searchlib/src/tests/util/slime_output_raw_buf_adapter/.gitignore
@@ -0,0 +1 @@
+searchlib_slime_output_raw_buf_adapter_test_app
diff --git a/searchlib/src/tests/util/slime_output_raw_buf_adapter/CMakeLists.txt b/searchlib/src/tests/util/slime_output_raw_buf_adapter/CMakeLists.txt
new file mode 100644
index 00000000000..0735511fe7a
--- /dev/null
+++ b/searchlib/src/tests/util/slime_output_raw_buf_adapter/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_slime_output_raw_buf_adapter_test_app
+ SOURCES
+ slime_output_raw_buf_adapter_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_slime_output_raw_buf_adapter_test_app COMMAND searchlib_slime_output_raw_buf_adapter_test_app)
diff --git a/searchlib/src/tests/util/slime_output_raw_buf_adapter/FILES b/searchlib/src/tests/util/slime_output_raw_buf_adapter/FILES
new file mode 100644
index 00000000000..5870aa61349
--- /dev/null
+++ b/searchlib/src/tests/util/slime_output_raw_buf_adapter/FILES
@@ -0,0 +1 @@
+slime_output_raw_buf_adapter_test.cpp
diff --git a/searchlib/src/tests/util/slime_output_raw_buf_adapter/slime_output_raw_buf_adapter_test.cpp b/searchlib/src/tests/util/slime_output_raw_buf_adapter/slime_output_raw_buf_adapter_test.cpp
new file mode 100644
index 00000000000..5d48520a92c
--- /dev/null
+++ b/searchlib/src/tests/util/slime_output_raw_buf_adapter/slime_output_raw_buf_adapter_test.cpp
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/util/slime_output_raw_buf_adapter.h>
+#include <vespa/vespalib/data/slime/slime.h>
+
+using namespace search;
+using namespace vespalib::slime::convenience;
+
+TEST("use slime with rawbuf") {
+ RawBuf buffer(4096);
+ Slime src;
+ Slime dst;
+ {
+ Cursor &c = src.setObject();
+ c.setLong("foo", 5);
+ c.setString("bar", "text");
+ }
+ EXPECT_NOT_EQUAL(src, dst);
+ SlimeOutputRawBufAdapter adapter(buffer);
+ vespalib::slime::BinaryFormat::encode(src, adapter);
+ vespalib::slime::BinaryFormat::decode(Memory(buffer.GetDrainPos(), buffer.GetUsedLen()), dst);
+ EXPECT_EQUAL(src, dst);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/util/statebuf/.gitignore b/searchlib/src/tests/util/statebuf/.gitignore
new file mode 100644
index 00000000000..270347c1d6b
--- /dev/null
+++ b/searchlib/src/tests/util/statebuf/.gitignore
@@ -0,0 +1 @@
+searchlib_statebuf_test_app
diff --git a/searchlib/src/tests/util/statebuf/CMakeLists.txt b/searchlib/src/tests/util/statebuf/CMakeLists.txt
new file mode 100644
index 00000000000..0b24cd1552b
--- /dev/null
+++ b/searchlib/src/tests/util/statebuf/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_statebuf_test_app
+ SOURCES
+ statebuf_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_statebuf_test_app COMMAND searchlib_statebuf_test_app)
diff --git a/searchlib/src/tests/util/statebuf/DESC b/searchlib/src/tests/util/statebuf/DESC
new file mode 100644
index 00000000000..6368f32bcc6
--- /dev/null
+++ b/searchlib/src/tests/util/statebuf/DESC
@@ -0,0 +1 @@
+statefile test. Take a look at statefile_test.cpp for details.
diff --git a/searchlib/src/tests/util/statebuf/FILES b/searchlib/src/tests/util/statebuf/FILES
new file mode 100644
index 00000000000..c651bb2758f
--- /dev/null
+++ b/searchlib/src/tests/util/statebuf/FILES
@@ -0,0 +1 @@
+statefile_test.cpp
diff --git a/searchlib/src/tests/util/statebuf/statebuf_test.cpp b/searchlib/src/tests/util/statebuf/statebuf_test.cpp
new file mode 100644
index 00000000000..952412b9eb6
--- /dev/null
+++ b/searchlib/src/tests/util/statebuf/statebuf_test.cpp
@@ -0,0 +1,109 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("statebuf_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/searchlib/util/statebuf.h>
+#include <string>
+
+namespace search
+{
+
+namespace
+{
+
+
+}
+
+class Fixture : public StateBuf
+{
+ char _buf[1024];
+
+public:
+ Fixture()
+ : StateBuf(_buf, sizeof(_buf))
+ {
+ }
+};
+
+TEST_F("single character can be appended to stream", Fixture)
+{
+ f << 'H' << 'e' << 'l' << 'l' << 'o';
+ EXPECT_EQUAL("Hello", f.str());
+}
+
+
+TEST_F("strings can be appended to stream", Fixture)
+{
+ f << "Hello world";
+ EXPECT_EQUAL("Hello world", f.str());
+}
+
+TEST_F("quoted strings can be appended to stream", Fixture)
+{
+ f.appendQuoted("This is a quoting test, \\ \" \n oops");
+ EXPECT_EQUAL("\"This is a quoting test, \\\\ \\\" \\n oops\"", f.str());
+}
+
+TEST_F("keys can be appended to stream", Fixture)
+{
+ (f.appendKey("foo") << "fooval").appendKey("bar") << "barval";
+ EXPECT_EQUAL("foo=fooval bar=barval", f.str());
+}
+
+
+TEST_F("integers can be appended to stream", Fixture)
+{
+ f << (UINT64_C(1) << 63) << " " << -42l << " " << 0l;
+ EXPECT_EQUAL("9223372036854775808 -42 0", f.str());
+}
+
+TEST_F("struct timespec can be appended to stream", Fixture)
+{
+ struct timespec ts;
+ ts.tv_sec = 15;
+ ts.tv_nsec = 256;
+ f << ts;
+ EXPECT_EQUAL("15.000000256", f.str());
+}
+
+TEST_F("timestamp can be appended to stream", Fixture)
+{
+ struct timespec ts;
+ ts.tv_sec = 16;
+ ts.tv_nsec = 257;
+ f.appendTimestamp(ts);
+ EXPECT_EQUAL("ts=16.000000257", f.str());
+}
+
+
+TEST_F("hexadecimal numbers can be appended to stream", Fixture)
+{
+ (f.appendHex(0xdeadbeefcafebabeul) << " ").appendHex(0x123456789abcdef0ul);
+ EXPECT_EQUAL("0xdeadbeefcafebabe 0x123456789abcdef0", f.str());
+
+}
+
+TEST_F("pointer address can be appended to stream", Fixture)
+{
+ f.appendAddr(nullptr);
+ f.appendAddr(reinterpret_cast<void *>(0x12345ul));
+ EXPECT_EQUAL("addr=0x0000000000000000 addr=0x0000000000012345", f.str());
+}
+
+
+TEST_F("base and size methods can be called on stream", Fixture)
+{
+ f << "Hello world\n";
+ std::string s(f.base(), f.base() + f.size());
+ EXPECT_EQUAL("Hello world\n", s);
+}
+
+}
+
+
+TEST_MAIN()
+{
+ TEST_RUN_ALL();
+}
diff --git a/searchlib/src/tests/util/statefile/.gitignore b/searchlib/src/tests/util/statefile/.gitignore
new file mode 100644
index 00000000000..504b7431a7a
--- /dev/null
+++ b/searchlib/src/tests/util/statefile/.gitignore
@@ -0,0 +1 @@
+searchlib_statefile_test_app
diff --git a/searchlib/src/tests/util/statefile/CMakeLists.txt b/searchlib/src/tests/util/statefile/CMakeLists.txt
new file mode 100644
index 00000000000..b90b87fce7d
--- /dev/null
+++ b/searchlib/src/tests/util/statefile/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_statefile_test_app
+ SOURCES
+ statefile_test.cpp
+ DEPENDS
+ searchlib_test
+ searchlib
+)
+vespa_add_test(NAME searchlib_statefile_test_app COMMAND searchlib_statefile_test_app)
diff --git a/searchlib/src/tests/util/statefile/DESC b/searchlib/src/tests/util/statefile/DESC
new file mode 100644
index 00000000000..6368f32bcc6
--- /dev/null
+++ b/searchlib/src/tests/util/statefile/DESC
@@ -0,0 +1 @@
+statefile test. Take a look at statefile_test.cpp for details.
diff --git a/searchlib/src/tests/util/statefile/FILES b/searchlib/src/tests/util/statefile/FILES
new file mode 100644
index 00000000000..c651bb2758f
--- /dev/null
+++ b/searchlib/src/tests/util/statefile/FILES
@@ -0,0 +1 @@
+statefile_test.cpp
diff --git a/searchlib/src/tests/util/statefile/statefile_test.cpp b/searchlib/src/tests/util/statefile/statefile_test.cpp
new file mode 100644
index 00000000000..583d21e1cec
--- /dev/null
+++ b/searchlib/src/tests/util/statefile/statefile_test.cpp
@@ -0,0 +1,294 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("statefile_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/searchlib/util/statefile.h>
+#include <atomic>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vespa/searchlib/test/statefile.h>
+
+
+using namespace search::test::statefile;
+
+namespace search
+{
+
+namespace
+{
+
+bool
+hasFile(const char *name)
+{
+ return access(name, R_OK | W_OK) == 0;
+}
+
+
+void
+addState(StateFile &sf, const char *buf)
+{
+ size_t bufLen = strlen(buf);
+ sf.addState(buf, bufLen, false);
+}
+
+void
+addSignalState(StateFile &sf, const char *buf)
+{
+ size_t bufLen = strlen(buf);
+ sf.addState(buf, bufLen, true);
+}
+
+
+bool
+assertHistory(std::vector<vespalib::string> &exp,
+ std::vector<vespalib::string> &act)
+{
+ if (!EXPECT_EQUAL(exp.size(), act.size())) {
+ return false;
+ }
+ for (size_t i = 0; i < exp.size(); ++i) {
+ if (!EXPECT_EQUAL(exp[i], act[i])) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+int64_t
+getSize(const char *name)
+{
+ struct stat stbuf;
+ if (stat(name, &stbuf) != 0)
+ return 0;
+ return stbuf.st_size;
+}
+
+
+void
+setSize(const char *name, int64_t newSize)
+{
+ int truncRes = truncate(name, newSize);
+ assert(truncRes == 0);
+}
+
+
+}
+
+
+TEST("Test lock free atomic int used by async signal safe lock primitive")
+{
+ std::atomic<int> f;
+ ASSERT_TRUE(f.is_lock_free());
+}
+
+
+TEST("Test that statefile can be created")
+{
+ StateFile::erase("state");
+ EXPECT_FALSE(hasFile("state"));
+ EXPECT_FALSE(hasFile("state.history"));
+ StateFile sf("state");
+ EXPECT_TRUE(hasFile("state"));
+ EXPECT_TRUE(hasFile("state.history"));
+ EXPECT_EQUAL(0, sf.getGen());
+ StateFile::erase("state");
+ EXPECT_FALSE(hasFile("state"));
+ EXPECT_FALSE(hasFile("state.history"));
+ StateFile::erase("state");
+ EXPECT_FALSE(hasFile("state"));
+ EXPECT_FALSE(hasFile("state.history"));
+}
+
+
+TEST("Test that statefile can add event")
+{
+ StateFile::erase("state");
+ StateFile sf("state");
+
+ addState(sf, "Hello world\n");
+ vespalib::string check = readState(sf);
+ EXPECT_EQUAL("Hello world\n", check);
+ EXPECT_EQUAL(1, sf.getGen());
+}
+
+TEST("Test that history is appended to")
+{
+ StateFile::erase("state");
+ StateFile sf("state");
+
+ addState(sf, "Hello world\n");
+ addState(sf, "Foo bar\n");
+ vespalib::string check = readState(sf);
+ EXPECT_EQUAL("Foo bar\n", check);
+ EXPECT_EQUAL(2, sf.getGen());
+ {
+ std::vector<vespalib::string> exp({ "Hello world\n", "Foo bar\n" });
+ std::vector<vespalib::string> act(readHistory("state.history"));
+ TEST_DO(assertHistory(exp, act));
+ }
+}
+
+
+TEST("Test that truncated history is truncated at event boundary")
+{
+ StateFile::erase("state");
+ int64_t histSize = 1;
+ {
+ StateFile sf("state");
+ addState(sf, "Hello world\n");
+ addState(sf, "Foo bar\n");
+ EXPECT_EQUAL(2, sf.getGen());
+ histSize = getSize("state.history");
+ EXPECT_EQUAL(20, histSize);
+ addState(sf, "zap\n");
+ EXPECT_EQUAL(3, sf.getGen());
+ }
+ // Lose 2 last events in history
+ setSize("state.history", histSize - 1);
+ // Last event is restored to history from main state file
+ StateFile sf("state");
+ vespalib::string check = readState(sf);
+ EXPECT_EQUAL("zap\n", check);
+ EXPECT_EQUAL(0, sf.getGen());
+ {
+ std::vector<vespalib::string> exp({ "Hello world\n", "zap\n" });
+ std::vector<vespalib::string> act(readHistory("state.history"));
+ TEST_DO(assertHistory(exp, act));
+ }
+}
+
+
+TEST("Test that async signal safe path adds event")
+{
+ StateFile::erase("state");
+ StateFile sf("state");
+
+ addSignalState(sf, "Hello world\n");
+ addSignalState(sf, "Foo bar\n");
+ vespalib::string check = readState(sf);
+ EXPECT_EQUAL("Foo bar\n", check);
+ EXPECT_EQUAL(2, sf.getGen());
+ {
+ std::vector<vespalib::string> exp({ "Hello world\n", "Foo bar\n" });
+ std::vector<vespalib::string> act(readHistory("state.history"));
+ TEST_DO(assertHistory(exp, act));
+ }
+}
+
+
+TEST("Test that state file can be restored from history")
+{
+ StateFile::erase("state");
+ {
+ StateFile sf("state");
+ addState(sf, "Hello world\n");
+ addState(sf, "Foo bar\n");
+ EXPECT_EQUAL(2, sf.getGen());
+ }
+ // Lose event in main state file
+ setSize("state", 0);
+ EXPECT_EQUAL(0, getSize("state"));
+ // Last event is restored to history from main state file
+ StateFile sf("state");
+ EXPECT_NOT_EQUAL(0, getSize("state"));
+ vespalib::string check = readState(sf);
+ EXPECT_EQUAL("Foo bar\n", check);
+ {
+ std::vector<vespalib::string> exp({ "Hello world\n", "Foo bar\n" });
+ std::vector<vespalib::string> act(readHistory("state.history"));
+ TEST_DO(assertHistory(exp, act));
+ }
+}
+
+
+TEST("Test that different entry is added to history")
+{
+ StateFile::erase("state");
+ {
+ StateFile sf("state");
+ addState(sf, "Hello world\n");
+ EXPECT_EQUAL(1, sf.getGen());
+ }
+ // Write changed entry to main state file
+ {
+ std::ofstream of("state");
+ of << "zap\n";
+ }
+ // Add changed event to history
+ StateFile sf("state");
+ EXPECT_NOT_EQUAL(0, getSize("state"));
+ vespalib::string check = readState(sf);
+ EXPECT_EQUAL("zap\n", check);
+ {
+ std::vector<vespalib::string> exp({ "Hello world\n", "zap\n" });
+ std::vector<vespalib::string> act(readHistory("state.history"));
+ TEST_DO(assertHistory(exp, act));
+ }
+}
+
+
+TEST("Test that state history stops at NUL byte")
+{
+ StateFile::erase("state");
+ {
+ StateFile sf("state");
+ addState(sf, "Hello world\n");
+ addState(sf, "Foo bar\n");
+ EXPECT_EQUAL(2, sf.getGen());
+ }
+ // Corrupt history state file
+ {
+ char buf[1];
+ buf[0] = '\0';
+ std::ofstream of("state.history");
+ of.write(&buf[0], 1);
+ }
+ StateFile sf("state");
+ vespalib::string check = readState(sf);
+ EXPECT_EQUAL("Foo bar\n", check);
+ {
+ std::vector<vespalib::string> exp({ "Foo bar\n" });
+ std::vector<vespalib::string> act(readHistory("state.history"));
+ TEST_DO(assertHistory(exp, act));
+ }
+
+}
+
+TEST("Test that main state stops at NUL byte")
+{
+ StateFile::erase("state");
+ {
+ StateFile sf("state");
+ addState(sf, "Hello world\n");
+ addState(sf, "Foo bar\n");
+ EXPECT_EQUAL(2, sf.getGen());
+ }
+ // Corrupt history state file
+ {
+ char buf[10];
+ strcpy(buf, "zap");
+ std::ofstream of("state");
+ of.write(&buf[0], strlen(buf) + 1);
+ }
+ StateFile sf("state");
+ vespalib::string check = readState(sf);
+ EXPECT_EQUAL("Foo bar\n", check);
+ {
+ std::vector<vespalib::string> exp({ "Hello world\n", "Foo bar\n" });
+ std::vector<vespalib::string> act(readHistory("state.history"));
+ TEST_DO(assertHistory(exp, act));
+ }
+
+}
+
+}
+
+TEST_MAIN()
+{
+ TEST_RUN_ALL();
+ search::StateFile::erase("state");
+}
diff --git a/searchlib/src/vespa/searchlib/.gitignore b/searchlib/src/vespa/searchlib/.gitignore
new file mode 100644
index 00000000000..42f98e8f86d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+config.h
+/libsearchlib.so.5.1
diff --git a/searchlib/src/vespa/searchlib/CMakeLists.txt b/searchlib/src/vespa/searchlib/CMakeLists.txt
new file mode 100644
index 00000000000..6b94631cdeb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/CMakeLists.txt
@@ -0,0 +1,31 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib
+ SOURCES
+ $<TARGET_OBJECTS:searchlib_aggregation>
+ $<TARGET_OBJECTS:searchlib_grouping>
+ $<TARGET_OBJECTS:searchlib_attribute>
+ $<TARGET_OBJECTS:searchlib_btree>
+ $<TARGET_OBJECTS:searchlib_common>
+ $<TARGET_OBJECTS:searchlib_docstore>
+ $<TARGET_OBJECTS:searchlib_engine>
+ $<TARGET_OBJECTS:searchlib_expression>
+ $<TARGET_OBJECTS:searchlib_fef>
+ $<TARGET_OBJECTS:searchlib_fef_test>
+ $<TARGET_OBJECTS:searchlib_fef_test_plugin>
+ $<TARGET_OBJECTS:searchlib_parsequery>
+ $<TARGET_OBJECTS:searchlib_predicate>
+ $<TARGET_OBJECTS:searchlib_sconfig>
+ $<TARGET_OBJECTS:searchlib_searchlib_bitcompression>
+ $<TARGET_OBJECTS:searchlib_searchlib_diskindex>
+ $<TARGET_OBJECTS:searchlib_searchlib_index>
+ $<TARGET_OBJECTS:searchlib_searchlib_memoryindex>
+ $<TARGET_OBJECTS:searchlib_translog>
+ $<TARGET_OBJECTS:searchlib_util>
+ INSTALL lib64
+ DEPENDS
+ searchlib_features
+ searchlib_query
+ searchlib_queryeval
+ searchlib_queryeval_test
+ staging_vespalib
+)
diff --git a/searchlib/src/vespa/searchlib/aggregation/.gitignore b/searchlib/src/vespa/searchlib/aggregation/.gitignore
new file mode 100644
index 00000000000..ee8938b6bf4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/.gitignore
@@ -0,0 +1,6 @@
+*.So
+*.exe
+*.ilk
+*.pdb
+.depend*
+Makefile
diff --git a/searchlib/src/vespa/searchlib/aggregation/CMakeLists.txt b/searchlib/src/vespa/searchlib/aggregation/CMakeLists.txt
new file mode 100644
index 00000000000..921a20bb466
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/CMakeLists.txt
@@ -0,0 +1,16 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_aggregation OBJECT
+ SOURCES
+ aggregation.cpp
+ fs4hit.cpp
+ group.cpp
+ grouping.cpp
+ groupinglevel.cpp
+ hit.cpp
+ hitlist.cpp
+ hitsaggregationresult.cpp
+ modifiers.cpp
+ rawrank.cpp
+ vdshit.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/aggregation/OWNERS b/searchlib/src/vespa/searchlib/aggregation/OWNERS
new file mode 100644
index 00000000000..1037590124e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/OWNERS
@@ -0,0 +1 @@
+balder
diff --git a/searchlib/src/vespa/searchlib/aggregation/aggregation.cpp b/searchlib/src/vespa/searchlib/aggregation/aggregation.cpp
new file mode 100644
index 00000000000..07c21137b1d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/aggregation.cpp
@@ -0,0 +1,448 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "expressioncountaggregationresult.h"
+#include <vespa/searchlib/aggregation/aggregation.h>
+#include <stdexcept>
+#include <vespa/vespalib/objects/visit.h>
+#include <vespa/vespalib/xxhash/xxhash.h>
+
+namespace search {
+
+using namespace expression;
+
+namespace aggregation {
+
+namespace {
+
+bool isReady(const ResultNode *myRes, const ResultNode &ref) {
+ return (myRes != 0 && myRes->getClass().id() == ref.getClass().id());
+}
+
+} // namespace search::aggregation::<unnamed>
+
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+#define IMPLEMENT_ABSTRACT_AGGREGATIONRESULT(cclass, base) IMPLEMENT_IDENTIFIABLE_ABSTRACT_NS2(search, aggregation, cclass, base)
+#define IMPLEMENT_AGGREGATIONRESULT(cclass, base) IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, cclass, base)
+
+IMPLEMENT_ABSTRACT_AGGREGATIONRESULT(AggregationResult, ExpressionNode);
+IMPLEMENT_AGGREGATIONRESULT(CountAggregationResult, AggregationResult);
+IMPLEMENT_AGGREGATIONRESULT(SumAggregationResult, AggregationResult);
+IMPLEMENT_AGGREGATIONRESULT(MaxAggregationResult, AggregationResult);
+IMPLEMENT_AGGREGATIONRESULT(MinAggregationResult, AggregationResult);
+IMPLEMENT_AGGREGATIONRESULT(AverageAggregationResult, AggregationResult);
+IMPLEMENT_AGGREGATIONRESULT(XorAggregationResult, AggregationResult);
+IMPLEMENT_AGGREGATIONRESULT(ExpressionCountAggregationResult,
+ AggregationResult);
+
+bool AggregationResult::Configure::check(const vespalib::Identifiable &obj) const
+{
+ return obj.inherits(AggregationResult::classId);
+}
+
+void AggregationResult::Configure::execute(vespalib::Identifiable &obj)
+{
+ AggregationResult & a(static_cast<AggregationResult &>(obj));
+ a.prepare();
+}
+
+AggregationResult &
+AggregationResult::setExpression(const ExpressionNode::CP &expr)
+{
+ _expressionTree.reset(new ExpressionTree(expr));
+ prepare(&_expressionTree->getResult(), false);
+ return *this;
+}
+
+void CountAggregationResult::onPrepare(const ResultNode & result, bool useForInit)
+{
+ (void) result;
+ (void) useForInit;
+}
+
+void SumAggregationResult::onPrepare(const ResultNode & result, bool useForInit)
+{
+ if (isReady(_sum.get(), result)) {
+ return;
+ }
+ _sum.reset(dynamic_cast<SingleResultNode *>(result.createBaseType().release()));
+ if ( useForInit ) {
+ _sum->set(result);
+ }
+}
+
+void MinAggregationResult::onPrepare(const ResultNode & result, bool useForInit)
+{
+ if (isReady(_min.get(), result)) {
+ return;
+ }
+ _min.reset(dynamic_cast<SingleResultNode *>(result.createBaseType().release()));
+ if ( !useForInit ) {
+ _min->setMax();
+ } else {
+ _min->set(result);
+ }
+}
+
+void MaxAggregationResult::onPrepare(const ResultNode & result, bool useForInit)
+{
+ if (isReady(_max.get(), result)) {
+ return;
+ }
+ _max.reset(dynamic_cast<SingleResultNode *>(result.createBaseType().release()));
+ if ( !useForInit ) {
+ _max->setMin(); ///Should figure out how to set min too for float.
+ } else {
+ _max->set(result);
+ }
+}
+
+void AverageAggregationResult::onPrepare(const ResultNode & result, bool useForInit)
+{
+ if (isReady(_sum.get(), result)) {
+ return;
+ }
+ _sum.reset(dynamic_cast<NumericResultNode *>(result.createBaseType().release()));
+ if ( useForInit ) {
+ _sum->set(result);
+ }
+}
+
+void XorAggregationResult::onPrepare(const ResultNode & result, bool useForInit)
+{
+ (void) result;
+ (void) useForInit;
+}
+
+void SumAggregationResult::onMerge(const AggregationResult & b)
+{
+ _sum->add(*static_cast<const SumAggregationResult &>(b)._sum);
+}
+
+void SumAggregationResult::onAggregate(const ResultNode & result)
+{
+ if (result.isMultiValue()) {
+ static_cast<const ResultNodeVector &>(result).flattenSum(*_sum);
+ } else {
+ _sum->add(result);
+ }
+}
+
+void SumAggregationResult::onReset()
+{
+ _sum.reset(static_cast<SingleResultNode *>(_sum->getClass().create()));
+}
+
+void CountAggregationResult::onMerge(const AggregationResult & b)
+{
+ _count.add(static_cast<const CountAggregationResult &>(b)._count);
+}
+
+void CountAggregationResult::onAggregate(const ResultNode & result)
+{
+ if (result.isMultiValue()) {
+ _count += static_cast<const ResultNodeVector &>(result).size();
+ } else {
+ ++_count;
+ }
+}
+
+void CountAggregationResult::onReset()
+{
+ setCount(0);
+}
+
+void MaxAggregationResult::onMerge(const AggregationResult & b)
+{
+ _max->max(*static_cast<const MaxAggregationResult &>(b)._max);
+}
+
+void MaxAggregationResult::onAggregate(const ResultNode & result)
+{
+ if (result.isMultiValue()) {
+ static_cast<const ResultNodeVector &>(result).flattenMax(*_max);
+ } else {
+ _max->max(result);
+ }
+}
+
+void MaxAggregationResult::onReset()
+{
+ _max.reset(static_cast<SingleResultNode *>(_max->getClass().create()));
+ _max->setMin();
+}
+
+void MinAggregationResult::onMerge(const AggregationResult & b)
+{
+ _min->min(*static_cast<const MinAggregationResult &>(b)._min);
+}
+
+void MinAggregationResult::onAggregate(const ResultNode & result)
+{
+ if (result.isMultiValue()) {
+ static_cast<const ResultNodeVector &>(result).flattenMin(*_min);
+ } else {
+ _min->min(result);
+ }
+}
+
+void MinAggregationResult::onReset()
+{
+ _min.reset(static_cast<SingleResultNode *>(_min->getClass().create()));
+ _min->setMax();
+}
+
+void AverageAggregationResult::onMerge(const AggregationResult & b)
+{
+ const AverageAggregationResult & avg(static_cast<const AverageAggregationResult &>(b));
+ _sum->add(*avg._sum);
+ _count += avg._count;
+}
+
+void AverageAggregationResult::onAggregate(const ResultNode & result)
+{
+ if (result.isMultiValue()) {
+ static_cast<const ResultNodeVector &>(result).flattenSum(*_sum);
+ _count += static_cast<const ResultNodeVector &>(result).size();
+ } else {
+ _sum->add(result);
+ _count++;
+ }
+}
+
+void AverageAggregationResult::onReset()
+{
+ _count = 0;
+ _sum.reset(static_cast<NumericResultNode *>(_sum->getClass().create()));
+}
+
+const NumericResultNode & AverageAggregationResult::getAverage() const
+{
+ _averageScratchPad = _sum;
+ if ( _count > 0 ) {
+ _averageScratchPad->divide(Int64ResultNode(_count));
+ } else {
+ _averageScratchPad->set(Int64ResultNode(0));
+ }
+ return *_averageScratchPad;
+}
+
+void XorAggregationResult::onMerge(const AggregationResult & b)
+{
+ _xor.xorOp(static_cast<const XorAggregationResult &>(b)._xor);
+}
+
+void XorAggregationResult::onAggregate(const ResultNode & result)
+{
+ if (result.isMultiValue()) {
+ for (size_t i(0), m(static_cast<const ResultNodeVector &>(result).size()); i < m; i++) {
+ _xor.xorOp(static_cast<const ResultNodeVector &>(result).get(i));
+ }
+ } else {
+ _xor.xorOp(result);
+ }
+}
+
+void XorAggregationResult::onReset()
+{
+ _xor = 0;
+}
+
+static FieldBase _G_tagField("tag");
+
+Serializer & AggregationResult::onSerialize(Serializer & os) const
+{
+ return (os << *_expressionTree).put(_G_tagField, _tag);
+}
+
+Deserializer & AggregationResult::onDeserialize(Deserializer & is)
+{
+ _expressionTree.reset(new ExpressionTree());
+ return (is >> *_expressionTree).get(_G_tagField, _tag);
+}
+
+void
+AggregationResult::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "expression", _expressionTree);
+}
+
+void AggregationResult::selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation)
+{
+ _expressionTree->select(predicate,operation);
+}
+
+Serializer & CountAggregationResult::onSerialize(Serializer & os) const
+{
+ AggregationResult::onSerialize(os);
+ return _count.serialize(os);
+}
+
+Deserializer & CountAggregationResult::onDeserialize(Deserializer & is)
+{
+ AggregationResult::onDeserialize(is);
+ return _count.deserialize(is);
+}
+
+void
+CountAggregationResult::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ AggregationResult::visitMembers(visitor);
+ visit(visitor, "count", _count);
+}
+
+Serializer & SumAggregationResult::onSerialize(Serializer & os) const
+{
+ AggregationResult::onSerialize(os);
+ return os << _sum;
+}
+
+Deserializer & SumAggregationResult::onDeserialize(Deserializer & is)
+{
+ AggregationResult::onDeserialize(is);
+ return is >> _sum;
+}
+
+void
+SumAggregationResult::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ AggregationResult::visitMembers(visitor);
+ visit(visitor, "sum", _sum);
+}
+
+Serializer & MinAggregationResult::onSerialize(Serializer & os) const
+{
+ AggregationResult::onSerialize(os);
+ return os << _min;
+}
+
+Deserializer & MinAggregationResult::onDeserialize(Deserializer & is)
+{
+ AggregationResult::onDeserialize(is);
+ return is >> _min;
+}
+
+void
+MinAggregationResult::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ AggregationResult::visitMembers(visitor);
+ visit(visitor, "min", _min);
+}
+
+Serializer & MaxAggregationResult::onSerialize(Serializer & os) const
+{
+ AggregationResult::onSerialize(os);
+ return os << _max;
+}
+
+Deserializer & MaxAggregationResult::onDeserialize(Deserializer & is)
+{
+ AggregationResult::onDeserialize(is);
+ return is >> _max;
+}
+
+void
+MaxAggregationResult::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ AggregationResult::visitMembers(visitor);
+ visit(visitor, "max", _max);
+}
+
+static FieldBase _G_countField("count");
+static FieldBase _G_sumField("sum");
+
+Serializer & AverageAggregationResult::onSerialize(Serializer & os) const
+{
+ AggregationResult::onSerialize(os);
+ return os.put(_G_countField, _count) << _sum;
+}
+
+Deserializer & AverageAggregationResult::onDeserialize(Deserializer & is)
+{
+ AggregationResult::onDeserialize(is);
+ return is.get(_G_countField, _count) >> _sum;
+}
+
+void
+AverageAggregationResult::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ AggregationResult::visitMembers(visitor);
+ visit(visitor, "count", _count);
+ visit(visitor, "sum", _sum);
+}
+
+Serializer & XorAggregationResult::onSerialize(Serializer & os) const
+{
+ AggregationResult::onSerialize(os);
+ return _xor.serialize(os);
+}
+
+Deserializer & XorAggregationResult::onDeserialize(Deserializer & is)
+{
+ AggregationResult::onDeserialize(is);
+ return _xor.deserialize(is);
+}
+
+void
+XorAggregationResult::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ AggregationResult::visitMembers(visitor);
+ visit(visitor, "xor", _xor);
+}
+
+namespace {
+// Calculates the sum of all buckets.
+template <int BucketBits, typename HashT>
+int calculateRank(const Sketch<BucketBits, HashT> &sketch) {
+ if (sketch.getClassId() == SparseSketch<BucketBits, HashT>::classId) {
+ return static_cast<const SparseSketch<BucketBits, HashT>&>(sketch)
+ .getSize();
+ }
+ auto normal = static_cast<const NormalSketch<BucketBits, HashT>&>(sketch);
+ int rank = 0;
+ for (size_t i = 0; i < sketch.BUCKET_COUNT; ++i) {
+ rank += normal.bucket[i];
+ }
+ return rank;
+}
+} // namespace
+
+void ExpressionCountAggregationResult::onMerge(const AggregationResult &r) {
+ const ExpressionCountAggregationResult &result =
+ Identifiable::cast<const ExpressionCountAggregationResult &>(r);
+ _hll.merge(result._hll);
+ _rank.set(calculateRank(_hll.getSketch()));
+}
+void ExpressionCountAggregationResult::onAggregate(const ResultNode &result) {
+ size_t hash = result.hash();
+ const unsigned int seed = 42;
+ hash = XXH32(&hash, sizeof(hash), seed);
+ // The rank is a maintained sum of all buckets. This should give
+ // almost the same ordering as the actual estimates.
+ _rank += _hll.aggregate(hash);
+}
+void ExpressionCountAggregationResult::onReset() {
+ _hll = HyperLogLog<PRECISION>();
+ _rank.set(0);
+}
+Serializer &ExpressionCountAggregationResult::onSerialize(
+ Serializer &os) const {
+ AggregationResult::onSerialize(os);
+ _hll.serialize(os);
+ return os;
+}
+Deserializer &ExpressionCountAggregationResult::onDeserialize(
+ Deserializer &is) {
+ AggregationResult::onDeserialize(is);
+ _hll.deserialize(is);
+ _rank.set(calculateRank(_hll.getSketch()));
+ return is;
+}
+} // namespace aggregation
+} // namespace search
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_aggregation() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/aggregation.h b/searchlib/src/vespa/searchlib/aggregation/aggregation.h
new file mode 100644
index 00000000000..cedb5571c6e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/aggregation.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/common/identifiable.h>
+#include <vespa/searchlib/common/rankedhit.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/aggregation/countaggregationresult.h>
+#include <vespa/searchlib/aggregation/sumaggregationresult.h>
+#include <vespa/searchlib/aggregation/minaggregationresult.h>
+#include <vespa/searchlib/aggregation/maxaggregationresult.h>
+#include <vespa/searchlib/aggregation/averageaggregationresult.h>
+#include <vespa/searchlib/aggregation/xoraggregationresult.h>
+#include <vespa/searchlib/aggregation/hitsaggregationresult.h>
+#include <vespa/searchlib/aggregation/grouping.h>
+
+namespace search {
+namespace aggregation {
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/aggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/aggregationresult.h
new file mode 100644
index 00000000000..69e0fbd2145
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/aggregationresult.h
@@ -0,0 +1,116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/expressiontree.h>
+#include <vespa/searchlib/expression/numericresultnode.h>
+#include <vespa/searchlib/expression/integerresultnode.h>
+#include <stdexcept>
+
+namespace search {
+namespace aggregation {
+
+using search::expression::ExpressionTree;
+using search::expression::ExpressionNode;
+using search::expression::ResultNode;
+using search::expression::DocId;
+using search::expression::NumericResultNode;
+using search::expression::SingleResultNode;
+using search::expression::IntegerResultNode;
+using search::expression::Int64ResultNode;
+using search::expression::ConfigureStaticParams;
+
+#define DECLARE_ABSTRACT_AGGREGATIONRESULT(cclass) \
+ DECLARE_IDENTIFIABLE_ABSTRACT_NS2(search, aggregation, cclass); \
+ private: \
+ public:
+
+#define DECLARE_AGGREGATIONRESULT(cclass) \
+ DECLARE_IDENTIFIABLE_NS2(search, aggregation, cclass); \
+ DECLARE_NBO_SERIALIZE; \
+ virtual cclass *clone() const { return new cclass(*this); } \
+ private: \
+ virtual void onMerge(const AggregationResult & b); \
+ virtual void onAggregate(const ResultNode &result); \
+ virtual void onReset(); \
+ public:
+
+// resultNodePrimitive : countHits | hits(INTEGER) | groups(INTEGER) | xor | sum | min | max |avg
+
+
+class AggregationResult : public expression::ExpressionNode
+{
+public:
+ DECLARE_NBO_SERIALIZE;
+ DECLARE_ABSTRACT_AGGREGATIONRESULT(AggregationResult);
+ class Configure : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+ {
+ private:
+ virtual void execute(vespalib::Identifiable &obj);
+ virtual bool check(const vespalib::Identifiable &obj) const;
+ };
+
+ virtual void visitMembers(vespalib::ObjectVisitor & visitor) const;
+ virtual void selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation);
+
+ void reset() { onReset(); }
+ void merge(const AggregationResult & b) { onMerge(b); }
+ virtual void postMerge() {}
+ void aggregate(const document::Document & doc, HitRank rank) {
+ bool ok(_expressionTree->execute(doc, rank));
+ if (ok) {
+ onAggregate(_expressionTree->getResult(), doc, rank);
+ } else {
+ throw std::runtime_error(vespalib::make_string("aggregate(%s, %f) failed ", doc.getId().toString().c_str(), rank));
+ }
+ }
+ void aggregate(DocId docId, HitRank rank) {
+ bool ok(_expressionTree->execute(docId, rank));
+ if (ok) {
+ onAggregate(_expressionTree->getResult(), docId, rank);
+ } else {
+ throw std::runtime_error(vespalib::make_string("aggregate(%u, %f) failed ", docId, rank));
+ }
+ }
+ AggregationResult &setExpression(const ExpressionNode::CP &expr);
+ AggregationResult &setResult(const ResultNode::CP &result) {
+ prepare(result.get(), true);
+ return *this;
+ }
+
+ const ResultNode & getRank() const { return onGetRank(); }
+ const ResultNode & getResult() const { return onGetRank(); }
+ virtual ResultNode & getResult() { return const_cast<ResultNode &>(onGetRank()); }
+ virtual AggregationResult * clone() const = 0;
+ const ExpressionNode * getExpression() const { return _expressionTree->getRoot().get(); }
+ ExpressionNode * getExpression() { return _expressionTree->getRoot().get(); }
+protected:
+ AggregationResult() : _expressionTree(new ExpressionTree()), _tag(-1) { }
+private:
+ /// from expressionnode
+ virtual void onPrepare(bool preserveAccurateTypes) { (void) preserveAccurateTypes; }
+ /// from expressionnode
+ virtual bool onExecute() const { return true; }
+
+ void prepare() { if (getExpression() != NULL) { prepare(&getExpression()->getResult(), false); } }
+ void prepare(const ResultNode * result, bool useForInit) { if (result) { onPrepare(*result, useForInit); } }
+ virtual void onPrepare(const ResultNode & result, bool useForInit) = 0;
+ virtual void onMerge(const AggregationResult & b) = 0;
+ virtual void onReset() = 0;
+ virtual void onAggregate(const ResultNode &result) = 0;
+ virtual const ResultNode & onGetRank() const = 0;
+ virtual void onAggregate(const ResultNode &result, const document::Document & doc, HitRank rank) {
+ (void) doc;
+ (void) rank;
+ onAggregate(result);
+ }
+ virtual void onAggregate(const ResultNode &result, DocId docId, HitRank rank) {
+ (void) docId;
+ (void) rank;
+ onAggregate(result);
+ }
+ search::expression::ExpressionTree::LP _expressionTree;
+ uint32_t _tag;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/averageaggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/averageaggregationresult.h
new file mode 100644
index 00000000000..928594acf0a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/averageaggregationresult.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/aggregation/aggregationresult.h>
+
+namespace search {
+namespace aggregation {
+
+class AverageAggregationResult : public AggregationResult
+{
+public:
+ DECLARE_AGGREGATIONRESULT(AverageAggregationResult);
+ AverageAggregationResult() : _sum(), _count(0) {}
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ const NumericResultNode & getAverage() const;
+ const NumericResultNode & getSum() const { return *_sum; }
+ uint64_t getCount() const { return _count; }
+private:
+ virtual const ResultNode & onGetRank() const { return getAverage(); }
+ virtual void onPrepare(const ResultNode & result, bool useForInit);
+ NumericResultNode::CP _sum;
+ uint64_t _count;
+ mutable NumericResultNode::CP _averageScratchPad;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/countaggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/countaggregationresult.h
new file mode 100644
index 00000000000..a00e14f1e78
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/countaggregationresult.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/aggregation/aggregationresult.h>
+
+namespace search {
+namespace aggregation {
+
+class CountAggregationResult : public AggregationResult
+{
+public:
+ DECLARE_AGGREGATIONRESULT(CountAggregationResult);
+ CountAggregationResult() : AggregationResult(), _count(0) { }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ uint64_t getCount() const { return _count.get(); }
+ CountAggregationResult &setCount(uint64_t c) {
+ _count = c;
+ return *this;
+ }
+private:
+ virtual const ResultNode & onGetRank() const { return _count; }
+ virtual void onPrepare(const ResultNode & result, bool useForInit);
+ Int64ResultNode _count;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/expressioncountaggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/expressioncountaggregationresult.h
new file mode 100644
index 00000000000..182a28f2ec6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/expressioncountaggregationresult.h
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "aggregationresult.h"
+#include <vespa/searchlib/grouping/hyperloglog.h>
+
+namespace search {
+namespace aggregation {
+
+/**
+ * Estimates the number of unique values of an expression that has
+ * been observed. This class keeps track of the raw data needed for
+ * estimation (the sketch). Actual estimation is done on the QR
+ * server.
+ */
+class ExpressionCountAggregationResult : public AggregationResult {
+ static const int PRECISION = 10;
+
+ HyperLogLog<PRECISION> _hll;
+ Int64ResultNode _rank;
+
+ virtual const ResultNode & onGetRank() const { return _rank; }
+ virtual void onPrepare(const ResultNode &, bool) {}
+public:
+ DECLARE_AGGREGATIONRESULT(ExpressionCountAggregationResult);
+ ExpressionCountAggregationResult() : AggregationResult(), _hll() { }
+
+ virtual void visitMembers(vespalib::ObjectVisitor &) const {}
+ const Sketch<PRECISION, uint32_t> &getSketch() const
+ { return _hll.getSketch(); }
+};
+
+} // namespace aggregation
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/forcelink.hpp b/searchlib/src/vespa/searchlib/aggregation/forcelink.hpp
new file mode 100644
index 00000000000..4696d6f5c88
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/forcelink.hpp
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+void forcelink_file_searchlib_aggregation_grouping();
+void forcelink_file_searchlib_aggregation_modifiers();
+void forcelink_file_searchlib_aggregation_aggregation();
+void forcelink_file_searchlib_aggregation_hitlist();
+void forcelink_file_searchlib_aggregation_fs4hit();
+void forcelink_file_searchlib_aggregation_group();
+void forcelink_file_searchlib_aggregation_rawrank();
+void forcelink_file_searchlib_aggregation_hit();
+void forcelink_file_searchlib_aggregation_vdshit();
+void forcelink_file_searchlib_aggregation_hitsaggregationresult();
+void forcelink_file_searchlib_aggregation_groupinglevel();
+
+void forcelink_searchlib_aggregation() {
+ forcelink_file_searchlib_aggregation_grouping();
+ forcelink_file_searchlib_aggregation_modifiers();
+ forcelink_file_searchlib_aggregation_aggregation();
+ forcelink_file_searchlib_aggregation_hitlist();
+ forcelink_file_searchlib_aggregation_fs4hit();
+ forcelink_file_searchlib_aggregation_group();
+ forcelink_file_searchlib_aggregation_rawrank();
+ forcelink_file_searchlib_aggregation_hit();
+ forcelink_file_searchlib_aggregation_vdshit();
+ forcelink_file_searchlib_aggregation_hitsaggregationresult();
+ forcelink_file_searchlib_aggregation_groupinglevel();
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/fs4hit.cpp b/searchlib/src/vespa/searchlib/aggregation/fs4hit.cpp
new file mode 100644
index 00000000000..f63f44d3a12
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/fs4hit.cpp
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "fs4hit.h"
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace aggregation {
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+static FieldBase _G_pathField("path");
+static FieldBase _G_docIdField("docId");
+static FieldBase _G_globalIdField("globalId");
+static FieldBase _G_distributionKeyField("distributionKey");
+
+IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, FS4Hit, Hit);
+
+Serializer &
+FS4Hit::onSerialize(Serializer &os) const
+{
+ Hit::onSerialize(os);
+ os.put(_G_pathField, _path);
+ const unsigned char * rawGid = _globalId.get();
+ for (size_t i = 0; i < document::GlobalId::LENGTH; ++i) {
+ os.put(_G_globalIdField, rawGid[i]);
+ }
+ os.put(_G_distributionKeyField, _distributionKey);
+ return os;
+}
+
+Deserializer &
+FS4Hit::onDeserialize(Deserializer &is)
+{
+ Hit::onDeserialize(is);
+ is.get(_G_pathField, _path);
+ unsigned char rawGid[document::GlobalId::LENGTH];
+ for (size_t i = 0; i < document::GlobalId::LENGTH; ++i) {
+ is.get(_G_globalIdField, rawGid[i]);
+ }
+ _globalId.set(rawGid);
+ is.get(_G_distributionKeyField, _distributionKey);
+ return is;
+}
+
+void
+FS4Hit::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ Hit::visitMembers(visitor);
+ visit(visitor, _G_pathField.getName(), _path);
+ visit(visitor, _G_docIdField.getName(), _docId);
+ visit(visitor, _G_globalIdField.getName(), _globalId.toString());
+ visit(visitor, _G_distributionKeyField.getName(), _distributionKey);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_fs4hit() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/fs4hit.h b/searchlib/src/vespa/searchlib/aggregation/fs4hit.h
new file mode 100644
index 00000000000..b5392b25e72
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/fs4hit.h
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "hit.h"
+#include "aggregationresult.h"
+#include <vespa/document/base/globalid.h>
+#include <vespa/searchlib/common/docstamp.h>
+
+namespace search {
+namespace aggregation {
+
+class FS4Hit : public Hit
+{
+private:
+ uint32_t _path;
+ uint32_t _docId;
+ document::GlobalId _globalId;
+ uint32_t _distributionKey;
+
+public:
+ DECLARE_IDENTIFIABLE_NS2(search, aggregation, FS4Hit);
+ DECLARE_NBO_SERIALIZE;
+ FS4Hit() : Hit(), _path(0), _docId(0), _globalId(), _distributionKey(-1) {}
+ FS4Hit(DocId docId, HitRank rank)
+ : Hit(rank), _path(0), _docId(docId), _globalId(), _distributionKey(-1) {}
+ virtual FS4Hit *clone() const { return new FS4Hit(*this); }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ uint32_t getPath() const { return _path; }
+ FS4Hit &setPath(uint32_t val) { _path = val; return *this; }
+ uint32_t getDocId() const { return _docId; }
+ const document::GlobalId & getGlobalId() const { return _globalId; }
+ FS4Hit &setGlobalId(const document::GlobalId & globalId) { _globalId = globalId; return *this; }
+ FS4Hit &setDistributionKey(uint32_t val) { _distributionKey = val; return *this; }
+ bool operator < (const FS4Hit &b) const { return cmp(b) < 0; }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/group.cpp b/searchlib/src/vespa/searchlib/aggregation/group.cpp
new file mode 100644
index 00000000000..84f5504ccf4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/group.cpp
@@ -0,0 +1,671 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchlib/aggregation/group.h>
+#include <vespa/searchlib/aggregation/maxaggregationresult.h>
+#include <vespa/searchlib/aggregation/groupinglevel.h>
+#include <vespa/searchlib/aggregation/grouping.h>
+#include <vespa/searchlib/expression/aggregationrefnode.h>
+#include <vespa/vespalib/objects/visit.h>
+#include <vespa/vespalib/objects/objectpredicate.h>
+#include <vespa/vespalib/objects/objectoperation.h>
+#include <vespa/vespalib/objects/objectdumper.h>
+#include <vespa/vespalib/util/optimized.h>
+#include <vespa/vespalib/util/vstringfmt.h>
+
+LOG_SETUP(".searchlib.aggregation.group");
+
+namespace search {
+namespace aggregation {
+
+using search::expression::FloatResultNode;
+using search::expression::AggregationRefNode;
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+namespace {
+
+struct SortByGroupId {
+ bool operator()(const Group::ChildP & a, const Group::ChildP & b) {
+ return (a->cmpId(*b) < 0);
+ }
+};
+
+struct SortByGroupRank {
+ bool operator()(const Group::ChildP & a, const Group::ChildP & b) {
+ return (a->cmpRank(*b) < 0);
+ }
+};
+
+} // namespace search::aggregation::<unnamed>
+
+
+IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, Group, vespalib::Identifiable);
+
+void Group::destruct(GroupList & l, size_t m)
+{
+ for (size_t i(0); i < m; i++) {
+ destruct(l[i]);
+ }
+ delete [] l;
+ l = NULL;
+}
+
+int Group::cmpRank(const Group &rhs) const
+{
+ int diff(0);
+ for(size_t i(0), m(getOrderBySize()); (diff == 0) && (i < m); i++) {
+ uint32_t index = std::abs(getOrderBy(i)) - 1;
+ diff = expr(index).getResult().cmp(rhs.expr(index).getResult())*getOrderBy(i);
+ }
+ return diff
+ ? diff
+ : ((_rank > rhs._rank)
+ ? -1
+ : ((_rank < rhs._rank) ? 1 : 0));
+}
+
+Group & Group::addOrderBy(const ExpressionNode::CP & orderBy, bool ascending)
+{
+ assert(getOrderBySize() < sizeof(_orderBy)*2-1);
+ assert(getExprSize() < 15);
+ addExpressionResult(orderBy);
+ setOrderBy(getOrderBySize(), (ascending ? getExprSize() : -getExprSize()));
+ setOrderBySize(getOrderBySize() + 1);
+ setupAggregationReferences();
+ return *this;
+}
+
+Group & Group::addAggregationResult(const ExpressionNode::CP & aggr)
+{
+ assert(getAggrSize() < 15);
+ size_t newSize = getAggrSize() + 1 + getExprSize();
+ ExpressionVector n = new ExpressionNode::CP[newSize];
+ for (size_t i(0), m(getAggrSize()); i < m; i++) {
+ n[i] = _aggregationResults[i];
+ }
+ n[getAggrSize()] = aggr;
+ // Copy expressions after aggregationresults
+ for (size_t i(getAggrSize()); i < newSize - 1; i++) {
+ n[i + 1] = _aggregationResults[i];
+ }
+ delete [] _aggregationResults;
+ _aggregationResults = n;
+ setAggrSize(getAggrSize() + 1);
+ return *this;
+}
+
+Group & Group::addExpressionResult(const ExpressionNode::CP & expressionNode)
+{
+ uint32_t newSize = getAggrSize() + getExprSize() + 1;
+ ExpressionVector n = new ExpressionNode::CP[newSize];
+ for (uint32_t i(0); i < (newSize - 1); i++) {
+ n[i] = _aggregationResults[i];
+ }
+ n[newSize - 1] = expressionNode;
+ delete [] _aggregationResults;
+ _aggregationResults = n;
+ setExprSize(getExprSize()+1);
+ return *this;
+}
+
+void Group::setupAggregationReferences()
+{
+ AggregationRefNode::Configure exprRefSetup(_aggregationResults);
+ select(exprRefSetup, exprRefSetup);
+}
+
+Group & Group::addResult(const ExpressionNode::CP & aggr)
+{
+ assert(getExprSize() < 15);
+ addAggregationResult(aggr);
+ addExpressionResult(ExpressionNode::CP(new AggregationRefNode(getAggrSize() - 1)));
+ setupAggregationReferences();
+ return *this;
+}
+
+void Group::addChild(Group * child)
+{
+ const size_t sz(getChildrenSize());
+ assert(sz < 0xffffff);
+ if (_children == 0) {
+ _children = new ChildP[4];
+ } else if ((sz >=4) && vespalib::Optimized::msbIdx(sz) == vespalib::Optimized::lsbIdx(sz)) {
+ GroupList n = new ChildP[sz*2];
+ for (size_t i(0), m(getChildrenSize()); i < m; i++) {
+ n[i] = _children[i];
+ }
+ delete [] _children;
+ _children = n;
+ }
+ _children[sz] = child;
+ setChildrenSize(sz + 1);
+}
+
+void
+Group::selectMembers(const vespalib::ObjectPredicate &predicate,
+ vespalib::ObjectOperation &operation)
+{
+ if (_id.get()) {
+ _id->select(predicate, operation);
+ }
+ uint32_t totalSize = getAggrSize() + getExprSize();
+ for (uint32_t i(0); i < totalSize; i++) {
+ _aggregationResults[i]->select(predicate, operation);
+ }
+}
+
+void
+Group::preAggregate()
+{
+ assert(_childInfo._childMap == NULL);
+ _childInfo._childMap = new GroupHash(getChildrenSize()*2, GroupHasher(&_children), GroupEqual(&_children));
+ GroupHash & childMap = *_childInfo._childMap;
+ for (ChildP *it(_children), *mt(_children + getChildrenSize()); it != mt; ++it) {
+ (*it)->preAggregate();
+ childMap.insert(it - _children);
+ }
+}
+
+template <typename Doc>
+void Group::collect(const Doc & doc, HitRank rank)
+{
+ for(size_t i(0), m(getAggrSize()); i < m; i++) {
+ getAggr(i)->aggregate(doc, rank);
+ }
+}
+
+template <typename Doc>
+void
+Group::aggregate(const Grouping & grouping, uint32_t currentLevel, const Doc & doc, HitRank rank)
+{
+ if (currentLevel >= grouping.getFirstLevel()) {
+ collect(doc, rank);
+ }
+ if (currentLevel < grouping.getLevels().size()) {
+ groupNext(grouping.getLevels()[currentLevel], doc, rank);
+ }
+}
+
+template <typename Doc>
+void
+Group::groupNext(const GroupingLevel & level, const Doc & doc, HitRank rank)
+{
+ const ExpressionTree &selector = level.getExpression();
+ if (!selector.execute(doc, rank)) {
+ throw std::runtime_error("Does not know how to handle failed select statements");
+ }
+ const ResultNode &selectResult = selector.getResult();
+ level.group(*this, selectResult, doc, rank);
+}
+
+Group * Group::groupSingle(const ResultNode & selectResult, HitRank rank, const GroupingLevel & level)
+{
+ if (_childInfo._childMap == NULL) {
+ assert(getChildrenSize() == 0);
+ _childInfo._childMap = new GroupHash(1, GroupHasher(&_children), GroupEqual(&_children));
+ }
+ GroupHash & childMap = *_childInfo._childMap;
+ Group * group(NULL);
+ GroupHash::iterator found = childMap.find<ResultNode, GroupResult, ResultHash, ResultEqual>(selectResult, GroupResult(&_children));
+ if (found == childMap.end()) { // group not present in child map
+ if (level.allowMoreGroups(childMap.size())) {
+ group = new Group(level.getGroupPrototype());
+ group->setId(selectResult);
+ group->setRank(rank);
+ addChild(group);
+ childMap.insert(getChildrenSize() - 1);
+ }
+ } else {
+ group = _children[(*found)];
+ if ( ! level.isFrozen()) {
+ group->updateRank(rank);
+ }
+ }
+ return group;
+}
+
+void
+Group::postAggregate()
+{
+ delete _childInfo._childMap;
+ _childInfo._childMap = NULL;
+ for (ChildP *it(_children), *mt(_children + getChildrenSize()); it != mt; ++it) {
+ (*it)->postAggregate();
+ }
+}
+
+void
+Group::executeOrderBy()
+{
+ for (size_t i(0), m(getExprSize()); i < m; i++) {
+ ExpressionNode & e(expr(i));
+ e.prepare(false); // TODO: What should we do about this flag?
+ e.execute();
+ }
+}
+
+void Group::sortById()
+{
+ std::sort(_children, _children + getChildrenSize(), SortByGroupId());
+ for (ChildP *it(_children), *mt(_children + getChildrenSize()); it != mt; ++it) {
+ (*it)->sortById();
+ }
+}
+
+void
+Group::merge(const std::vector<GroupingLevel> &levels,
+ uint32_t firstLevel, uint32_t currentLevel, Group &b)
+{
+ bool frozen = (currentLevel < firstLevel); // is this level frozen ?
+ _rank = std::max(_rank, b._rank);
+
+ if (!frozen) { // should we merge collectors for this level ?
+ for(size_t i(0), m(getAggrSize()); i < m; i++) {
+ getAggr(i)->merge(*b.getAggr(i));
+ }
+ }
+ GroupList z = new ChildP[getChildrenSize() + b.getChildrenSize()];
+ size_t kept(0);
+ ChildP * px = _children;
+ ChildP * ex = _children + getChildrenSize();
+ ChildP * py = b._children;
+ ChildP * ey = b._children + b.getChildrenSize();
+ while (px != ex && py != ey) {
+ int c = (*px)->cmpId(**py);
+ if (c == 0) {
+ (*px)->merge(levels, firstLevel, currentLevel + 1, **py);
+ z[kept++] = *px;
+ reset(*px);
+ ++px;
+ ++py;
+ } else if (c < 0) {
+ z[kept++] = *px;
+ reset(*px);
+ ++px;
+ } else {
+ z[kept++] = *py;
+ reset(*py);
+ ++py;
+ }
+ }
+ for (; px != ex; ++px) {
+ z[kept++] = *px;
+ reset(*px);
+ }
+ for (; py != ey; ++py) {
+ z[kept++] = *py;
+ reset(*py);
+ }
+ std::swap(_children, z);
+ destruct(z, getAllChildrenSize());
+ setChildrenSize(kept);
+ _childInfo._allChildren = 0;
+}
+
+void
+Group::prune(const Group & b, uint32_t lastLevel, uint32_t currentLevel)
+{
+ if (currentLevel >= lastLevel) {
+ return;
+ }
+
+ GroupList keep = new ChildP[b.getChildrenSize()];
+ size_t kept(0);
+ ChildP * px = _children;
+ ChildP * ex = _children + getAllChildrenSize();
+ const ChildP * py = b._children;
+ const ChildP * ey = b._children + b.getChildrenSize();
+ // Assumes that both lists are ordered by group id
+ while (py != ey && px != ex) {
+ if ((*py)->cmpId(**px) > 0) {
+ px++;
+ } else if ((*py)->cmpId(**px) == 0) {
+ keep[kept++] = (*px);
+ (*px)->prune((**py), lastLevel, currentLevel + 1);
+ reset(*px);
+ px++;
+ py++;
+ } else if ((*py)->cmpId(**px) < 0) {
+ py++;
+ }
+ }
+ std::swap(_children, keep);
+ destruct(keep, getAllChildrenSize());
+ setChildrenSize(kept);
+ _childInfo._allChildren = 0;
+}
+
+void
+Group::mergePartial(const std::vector<GroupingLevel> &levels,
+ uint32_t firstLevel,
+ uint32_t lastLevel,
+ uint32_t currentLevel,
+ const Group & b)
+{
+ bool frozen = (currentLevel < firstLevel);
+
+ if (!frozen) {
+ for(size_t i(0), m(getAggrSize()); i < m; i++) {
+ getAggr(i)->merge(b.getAggr(i));
+ }
+ for(size_t i(0), m(getExprSize()); i < m; i++) {
+ expr(i).execute();
+ }
+
+
+ // At this level, we must create a copy of the other nodes children.
+ if (currentLevel >= lastLevel) {
+ for (ChildP *it(b._children), *mt(b._children + b.getChildrenSize()); it != mt; ++it) {
+ ChildP g(new Group(levels[currentLevel].getGroupPrototype()));
+ g->partialCopy(**it);
+ addChild(g);
+ }
+ return;
+ }
+ }
+
+ ChildP * px = _children;
+ ChildP * ex = _children + getChildrenSize();
+ const ChildP * py = b._children;
+ const ChildP * ey = b._children + b.getChildrenSize();
+ // Assumes that both lists are ordered by group id
+ while (py != ey && px != ex) {
+ if ((*py)->cmpId(**px) > 0) {
+ px++;
+ } else if ((*py)->cmpId(**px) == 0) {
+ (*px)->mergePartial(levels, firstLevel, lastLevel, currentLevel + 1, **py);
+ px++;
+ py++;
+ } else if ((*py)->cmpId(**px) < 0) {
+ py++;
+ }
+ }
+}
+
+void
+Group::postMerge(const std::vector<GroupingLevel> &levels,
+ uint32_t firstLevel,
+ uint32_t currentLevel)
+{
+ bool frozen = (currentLevel < firstLevel); // is this level frozen ?
+
+ if (!frozen) {
+ for(size_t i(0), m(getAggrSize()); i < m; i++) {
+ getAggr(i)->postMerge();
+ }
+ }
+ bool hasNext = (currentLevel < levels.size()); // is there a next level ?
+ if (!hasNext) { // we have reached the bottom of the tree
+ return;
+ }
+ for (ChildP *it(_children), *mt(_children + getChildrenSize()); it != mt; ++it) {
+ (*it)->executeOrderBy();
+ }
+ int64_t maxGroups = levels[currentLevel].getPrecision();
+ for (size_t i(getChildrenSize()); i < _childInfo._allChildren; i++) {
+ destruct(_children[i]);
+ reset(_children[i]);
+ }
+ _childInfo._allChildren = getChildrenSize();
+ if (getChildrenSize() > (uint64_t)maxGroups) { // prune groups
+ std::sort(_children, _children + getChildrenSize(), SortByGroupRank());
+ setChildrenSize(maxGroups);
+ }
+ for (ChildP *it(_children), *mt(_children + getChildrenSize()); it != mt; ++it) {
+ (*it)->postMerge(levels, firstLevel, currentLevel + 1);
+ }
+}
+
+Group & Group::setRank(RawRank r)
+{
+ _rank = isnan(r) ? -HUGE_VAL : r;
+ return *this;
+}
+
+Group & Group::updateRank(RawRank r)
+{
+ return setRank(std::max(_rank, r));
+}
+
+bool Group::needResort() const
+{
+ bool resort(needFullRank());
+ for (const ChildP *it(_children), *mt(_children + getChildrenSize()); !resort && (it != mt); ++it) {
+ resort = (*it)->needResort();
+ }
+ return resort;
+}
+
+Serializer & Group::onSerialize(Serializer & os) const
+{
+ if (getChildrenSize() > 1) {
+ for (size_t i(1), m(getChildrenSize()); i < m; i++) {
+ assert(_children[i]->cmpId(*_children[i-1]) > 0);
+ }
+ }
+ LOG(debug, "%s", _id->asString().c_str());
+ os << _id << _rank;
+ os << uint32_t(getOrderBySize());
+ for (size_t i(0), m(getOrderBySize()); i < m; i++) {
+ os << int32_t(getOrderBy(i));
+ }
+ os << uint32_t(getAggrSize());
+ for(size_t i(0), m(getAggrSize()); i < m; i++) {
+ os << getAggrCP(i);
+ }
+ os << uint32_t(getExprSize());
+ for(size_t i(0), m(getExprSize()); i < m; i++) {
+ os << getExprCP(i);
+ }
+ os << uint32_t(getChildrenSize());
+ for (size_t i(0), m(getChildrenSize()); i < m; i++) {
+ os << *_children[i];
+ }
+ return os << _tag;
+}
+
+Deserializer & Group::onDeserialize(Deserializer & is)
+{
+ uint32_t count(0);
+ is >> _id >> _rank >> count;
+ assert(count < sizeof(_orderBy)*2);
+ setOrderBySize(count);
+ for(uint32_t i(0); i < count; i++) {
+ int32_t tmp(0);
+ is >> tmp;
+ assert((-7<= tmp) && (tmp <= 7));
+ setOrderBy(i, tmp);
+ }
+ uint32_t aggrSize(0);
+ is >> aggrSize;
+ assert(aggrSize < 16);
+ // To avoid protocol changes, we must first deserialize the aggregation
+ // results into a temporary buffer, and then reallocate the actual
+ // vector when we know the total size. Then we copy the temp buffer and
+ // deserialize the rest to the end of the vector.
+ ExpressionVector tmpAggregationResults = new ExpressionNode::CP[aggrSize];
+ setAggrSize(aggrSize);
+ for(uint32_t i(0); i < aggrSize; i++) {
+ is >> tmpAggregationResults[i];
+ }
+ uint32_t exprSize(0);
+ is >> exprSize;
+ delete [] _aggregationResults;
+
+ _aggregationResults = new ExpressionNode::CP[aggrSize + exprSize];
+ for (uint32_t i(0); i < aggrSize; i++) {
+ _aggregationResults[i] = tmpAggregationResults[i];
+ }
+ delete [] tmpAggregationResults;
+
+ assert(exprSize < 16);
+ setExprSize(exprSize);
+ for (uint32_t i(aggrSize); i < aggrSize + exprSize; i++) {
+ is >> _aggregationResults[i];
+ }
+ setupAggregationReferences();
+ is >> count;
+ destruct(_children, getAllChildrenSize());
+ _childInfo._allChildren = 0;
+ _children = new ChildP[std::max(4ul, 2ul << vespalib::Optimized::msbIdx(count))];
+ setChildrenSize(count);
+ for(uint32_t i(0); i < count; i++) {
+ ChildP group(new Group);
+ is >> *group;
+ _children[i] = group;
+ }
+ is >> _tag;
+ LOG(debug, "%s", _id->asString().c_str());
+ if (getChildrenSize() > 1) {
+ for (size_t i(1), m(getChildrenSize()); i < m; i++) {
+ assert(_children[i]->cmpId(*_children[i-1]) > 0);
+ }
+ }
+ return is;
+}
+
+void
+Group::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "id", _id);
+ visit(visitor, "rank", _rank);
+// visit(visitor, "orderBy", _orderBy);
+ visitor.openStruct("orderBy", "[]");
+ visit(visitor, "size", getOrderBySize());
+ for (size_t i(0), m(getOrderBySize()); i < m; i++) {
+ visit(visitor, vespalib::make_vespa_string("[%lu]", i), getOrderBy(i));
+ }
+ visitor.closeStruct();
+// visit(visitor, "aggregationResults", _aggregationResults);
+ visitor.openStruct("aggregationresults", "[]");
+ visit(visitor, "size", getAggrSize());
+ for (size_t i(0), m(getAggrSize()); i < m; i++) {
+ visit(visitor, vespalib::make_vespa_string("[%lu]", i), getAggrCP(i));
+ }
+ visitor.closeStruct();
+// visit(visitor, "expressionResults", _expressionResults);
+ visitor.openStruct("expressionResults", "[]");
+ visit(visitor, "size", getExprSize());
+ for (size_t i(0), m(getExprSize()); i < m; i++) {
+ visit(visitor, vespalib::make_vespa_string("[%lu]", i), getExprCP(i));
+ }
+ visitor.closeStruct();
+ //visit(visitor, "children", _children);
+ visitor.openStruct("children", "[]");
+ visit(visitor, "size", getChildrenSize());
+ for (size_t i(0), m(getChildrenSize()); i < m; i++) {
+ visit(visitor, vespalib::make_vespa_string("[%lu]", i), getChild(i));
+ }
+ visitor.closeStruct();
+ visit(visitor, "tag", _tag);
+}
+
+Group::Group() :
+ _id(),
+ _rank(0),
+ _packedLength(0),
+ _tag(-1),
+ _aggregationResults(NULL),
+ _orderBy(),
+ _children(NULL),
+ _childInfo()
+{
+ memset(_orderBy, 0, sizeof(_orderBy));
+ _childInfo._childMap = NULL;
+}
+
+Group::Group(const Group & rhs) :
+ Identifiable(rhs),
+ _id(rhs._id),
+ _rank(rhs._rank),
+ _packedLength(rhs._packedLength),
+ _tag(rhs._tag),
+ _aggregationResults(NULL),
+ _orderBy(),
+ _children(NULL),
+ _childInfo()
+{
+ _childInfo._childMap = NULL;
+ memcpy(_orderBy, rhs._orderBy, sizeof(_orderBy));
+ uint32_t totalAggrSize = rhs.getAggrSize() + rhs.getExprSize();
+ if (totalAggrSize > 0) {
+ _aggregationResults = new ExpressionNode::CP[totalAggrSize];
+ for (size_t i(0), m(totalAggrSize); i < m; i++) {
+ _aggregationResults[i] = rhs._aggregationResults[i];
+ }
+ setupAggregationReferences();
+ }
+
+ if ( rhs.getChildrenSize() > 0 ) {
+ _children = new ChildP[std::max(4ul, 2ul << vespalib::Optimized::msbIdx(rhs.getChildrenSize()))];
+ size_t i(0);
+ for (const ChildP *it(rhs._children), *mt(rhs._children + rhs.getChildrenSize()); it != mt; ++it, i++) {
+ _children[i] = ChildP(new Group(**it));
+ }
+ }
+}
+
+Group::~Group()
+{
+ destruct(_children, getAllChildrenSize());
+ setChildrenSize(0);
+ _childInfo._allChildren = 0;
+ delete [] _aggregationResults;
+}
+
+Group & Group::operator =(const Group & rhs)
+{
+ if (&rhs != this) {
+ Group g(rhs);
+ swap(g);
+ }
+ return *this;
+}
+
+Group &
+Group::partialCopy(const Group & rhs)
+{
+ setId(*rhs._id);
+ _rank = rhs._rank;
+ uint32_t totalAggrSize = getAggrSize() + getExprSize();
+ for(size_t i(0), m(totalAggrSize); i < m; i++) {
+ _aggregationResults[i] = rhs._aggregationResults[i];
+ }
+ for(size_t i(0), m(getAggrSize()); i < m; i++) {
+ getAggr(i)->reset();
+ }
+ setAggrSize(rhs.getAggrSize());
+ setOrderBySize(rhs.getOrderBySize());
+ setExprSize(rhs.getExprSize());
+ setupAggregationReferences();
+ memcpy(_orderBy, rhs._orderBy, sizeof(_orderBy));
+ return *this;
+}
+
+void Group::swap(Group & rhs)
+{
+ _id.swap(rhs._id);
+ std::swap(_rank, rhs._rank);
+ std::swap(_aggregationResults, rhs._aggregationResults);
+ std::swap(_children, rhs._children);
+ std::swap(_childInfo._childMap, rhs._childInfo._childMap);
+ {
+ int8_t tmp[sizeof(_orderBy)];
+ memcpy(tmp, _orderBy, sizeof(_orderBy));
+ memcpy(_orderBy, rhs._orderBy, sizeof(_orderBy));
+ memcpy(rhs._orderBy, tmp, sizeof(_orderBy));
+ }
+ std::swap(_tag, rhs._tag);
+ std::swap(_packedLength, rhs._packedLength);
+}
+
+template void Group::aggregate(const Grouping & grouping, uint32_t currentLevel, const DocId & doc, HitRank rank);
+template void Group::aggregate(const Grouping & grouping, uint32_t currentLevel, const document::Document & doc, HitRank rank);
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_group() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/group.h b/searchlib/src/vespa/searchlib/aggregation/group.h
new file mode 100644
index 00000000000..1559f53cd9b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/group.h
@@ -0,0 +1,201 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/aggregation/aggregationresult.h>
+#include <vespa/vespalib/stllike/hash_set.h>
+#include <vector>
+#include "rawrank.h"
+
+namespace search {
+namespace aggregation {
+
+class GroupingLevel;
+class Grouping;
+
+/**
+ * Represents a Group instance. To make grouping fast, the serialization format and the group instance itself is very compact. The format is as follows:
+ *
+ * +-------------------------------------+-----------------+
+ * | what | number of bytes |
+ * +-------------------------------------+-----------------+
+ * | result node id ptr | 8 |
+ * | group rank | 8 |
+ * | serialized length | 4 |
+ * | group tag | 4 |
+ * | aggregator vector | 8 |
+ * | orderby vector | 2 |
+ * | sub group vector | 8 |
+ * | sub group vector size/temp hash map | 8 |
+ * +-------------------------------------+-----------------+
+ *
+ * Total: 50 bytes
+ */
+class Group : public vespalib::Identifiable
+{
+public:
+ typedef Group * ChildP;
+ typedef std::unique_ptr<Group> UP;
+ typedef ChildP * GroupList;
+ struct GroupEqual : public std::binary_function<ChildP, ChildP, bool> {
+ GroupEqual(const GroupList * v) : _v(v) { }
+ bool operator()(uint32_t a, uint32_t b) { return (*_v)[a]->getId().cmpFast((*_v)[b]->getId()) == 0; }
+ const GroupList *_v;
+ };
+ struct GroupHasher {
+ GroupHasher(const GroupList * v) : _v(v) { }
+ size_t operator() (uint32_t arg) const { return (*_v)[arg]->getId().hash(); }
+ const GroupList *_v;
+ };
+ struct GroupResult {
+ GroupResult(const GroupList * v) : _v(v) { }
+ const ResultNode & operator() (uint32_t arg) const { return (*_v)[arg]->getId(); }
+ const GroupList *_v;
+ };
+ struct ResultLess : public std::binary_function<ResultNode::CP, ResultNode::CP, bool> {
+ bool operator()(const ResultNode::CP & a, const ResultNode::CP & b) { return a->cmpFast(*b) < 0; }
+ };
+ struct ResultEqual : public std::binary_function<ResultNode, ResultNode, bool> {
+ bool operator()(const ResultNode & a, const ResultNode & b) { return a.cmpFast(b) == 0; }
+ };
+ struct ResultHash {
+ size_t operator() (const ResultNode & arg) const { return arg.hash(); }
+ };
+
+ typedef ExpressionNode::CP * ExpressionVector;
+ typedef vespalib::hash_set<uint32_t, GroupHasher, GroupEqual > GroupHash;
+ typedef std::vector<GroupingLevel> GroupingLevelList;
+
+private:
+ ResultNode::CP _id; // the label of this group, separating it from other groups
+ RawRank _rank; // The default rank taken from the highest hit relevance.
+ uint32_t _packedLength; // Length of the 3 vectors below
+ uint32_t _tag; // Opaque tag used to identify the group by the client.
+
+ // The collectors and expressions stored by this group. Currently, both aggregation results and expressions used by orderby() are stored in this
+ // array to save 8 bytes in the Group size. This makes it important to use the getAggr() and expr() methods for accessing elements,
+ // as they will correctly offset the index to the correct place in the array.
+ ExpressionVector _aggregationResults;
+
+ uint8_t _orderBy[2]; // How this group is ranked, negative means reverse rank.
+ ChildP *_children; // the sub-groups of this group. Great care must be taken to ensure proper destruct.
+ union ChildInfo {
+ GroupHash *_childMap; // child map used during aggregation
+ size_t _allChildren; // Keep real number of children.
+ } _childInfo;
+
+ bool needFullRank() const { return getOrderBySize() != 0; }
+ Group & partialCopy(const Group & rhs);
+ void setOrderBy(uint32_t i, int32_t v) {
+ if (v < 0) {
+ v = -v;
+ v = v | 0x8;
+ }
+ _orderBy[i/2] = (_orderBy[i/2] & (0xf0 >> (4*(i%2)))) | (v << (4*(i%2)));
+ }
+ uint32_t getExprSize() const { return (_packedLength >> 4) & 0x03; }
+ void setAggrSize(uint32_t v) { _packedLength = (_packedLength & ~0x0f) | v; }
+ void setExprSize(uint32_t v) { _packedLength = (_packedLength & ~0x30) | (v << 4); }
+ void setOrderBySize(uint32_t v) { _packedLength = (_packedLength & ~0xc0) | (v << 6); }
+ void setChildrenSize(uint32_t v) { _packedLength = (_packedLength & ~0xffffff00) | (v << 8); }
+ AggregationResult * getAggr(size_t i) { return static_cast<AggregationResult *>(_aggregationResults[i].get()); }
+ const AggregationResult & getAggr(size_t i) const { return static_cast<const AggregationResult &>(*_aggregationResults[i]); }
+ const ExpressionNode::CP & getAggrCP(size_t i) const { return _aggregationResults[i]; }
+ const ExpressionNode::CP & getExprCP(size_t i) const { return _aggregationResults[getExpr(i)]; }
+ ExpressionNode & expr(size_t i) { return *_aggregationResults[getExpr(i)]; }
+ const ExpressionNode & expr(size_t i) const { return *_aggregationResults[getExpr(i)]; }
+ static void reset(Group * & v) { v = NULL; }
+ static void destruct(Group * v) { if (v) { delete v; } }
+ static void destruct(GroupList & l, size_t sz);
+ void addChild(Group * child);
+ void setupAggregationReferences();
+ size_t getAllChildrenSize() const { return std::max(static_cast<size_t>(getChildrenSize()), _childInfo._allChildren); }
+ template <typename Doc>
+ VESPA_DLL_LOCAL void groupNext(const GroupingLevel & level, const Doc & docId, HitRank rank);
+public:
+ DECLARE_IDENTIFIABLE_NS2(search, aggregation, Group);
+ DECLARE_NBO_SERIALIZE;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ Group();
+ Group(const Group & rhs);
+ Group & operator =(const Group & rhs);
+ virtual ~Group();
+ void swap(Group & rhs);
+
+ int cmpId(const Group &rhs) const { return _id->cmpFast(*rhs._id); }
+ int cmpRank(const Group &rhs) const;
+ Group & setRank(RawRank r);
+ Group & updateRank(RawRank r);
+ RawRank getRank() const { return _rank; }
+
+ VESPA_DLL_LOCAL Group * groupSingle(const ResultNode & result, HitRank rank, const GroupingLevel & level);
+
+ bool hasId() const { return (_id.get() != NULL); }
+ const ResultNode &getId() const { return *_id; }
+
+ Group unchain() const { return *this; }
+
+ Group &setId(const ResultNode &id) { _id.reset(static_cast<ResultNode *>(id.clone())); return *this; }
+ Group &addAggregationResult(const ExpressionNode::CP &result);
+ Group &addResult(const ExpressionNode::CP &aggr);
+ Group &addExpressionResult(const ExpressionNode::CP &expressionNode);
+ Group &addOrderBy(const ExpressionNode::CP & orderBy, bool ascending);
+ Group &addChild(const Group &child) { addChild(new Group(child)); return *this; }
+ Group &addChild(Group::UP child) { addChild(child.release()); return *this; }
+
+ /**
+ * Prunes this tree, keeping only the nodes found in another
+ * tree.
+ *
+ * @param b The tree containing the nodes that should be kept.
+ * @param lastLevel The last level on which to perform pruning.
+ * @param currentLevel The current level on which to perform pruning.
+ **/
+ void prune(const Group & b, uint32_t lastLevel, uint32_t currentLevel);
+
+ /**
+ * Recursively checks if any itself or any children needs a full resort.
+ * Then all hits must be processed and should be doen before any hit sorting.
+ */
+ bool needResort() const;
+
+ virtual void selectMembers(const vespalib::ObjectPredicate &predicate,
+ vespalib::ObjectOperation &operation);
+
+ void preAggregate();
+ template <typename Doc>
+ VESPA_DLL_LOCAL void aggregate(const Grouping & grouping, uint32_t currentLevel, const Doc & docId, HitRank rank);
+
+ template <typename Doc>
+ void collect(const Doc & docId, HitRank rank);
+ void postAggregate();
+ void merge(const std::vector<GroupingLevel> &levels, uint32_t firstLevel, uint32_t currentLevel, Group &b);
+ void executeOrderBy();
+
+ /**
+ * Merge children and results of another tree within the unfrozen parts of
+ * this tree.
+ *
+ * @param b The tree to pick children and results from.
+ * @param firstLevel The first level to merge.
+ * @param lastLevel The last level to merge.
+ * @param currentLevel The current level on which merging should be done.
+ **/
+ void mergePartial(const std::vector<GroupingLevel> &levels, uint32_t firstLevel, uint32_t lastLevel, uint32_t currentLevel, const Group & b);
+ void postMerge(const std::vector<GroupingLevel> &levels, uint32_t firstLevel, uint32_t currentLevel);
+ void sortById();
+ uint32_t getChildrenSize() const { return (_packedLength >> 8); }
+ const Group & getChild(size_t i) const { return *_children[i]; }
+ GroupList groups() const { return _children; }
+ const AggregationResult & getAggregationResult(size_t i) const { return static_cast<const AggregationResult &>(*_aggregationResults[i]); }
+ AggregationResult & getAggregationResult(size_t i) { return static_cast<AggregationResult &>(*_aggregationResults[i]); }
+ uint32_t getAggrSize() const { return _packedLength & 0x0f; }
+ uint32_t getOrderBySize() const { return (_packedLength >> 6) & 0x03; }
+ uint32_t getExpr(uint32_t i) const { return getAggrSize() + i; }
+ int32_t getOrderBy(uint32_t i) const {
+ int32_t v((_orderBy[i/2] >> (4*(i%2))) & 0x0f);
+ return (v & 0x8) ? -(v&0x7) : v;
+ }
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/grouping.cpp b/searchlib/src/vespa/searchlib/aggregation/grouping.cpp
new file mode 100644
index 00000000000..22b438cd539
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/grouping.cpp
@@ -0,0 +1,357 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.aggregation.grouping");
+#include <vespa/searchlib/aggregation/grouping.h>
+#include <vespa/searchlib/aggregation/hitsaggregationresult.h>
+#include <stdexcept>
+#include <vespa/vespalib/objects/visit.h>
+#include <vespa/vespalib/objects/objectpredicate.h>
+#include <vespa/vespalib/objects/objectoperation.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+
+using namespace search::expression;
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+namespace search {
+namespace aggregation {
+
+namespace {
+
+void selectGroups(const vespalib::ObjectPredicate &p, vespalib::ObjectOperation &op,
+ Group &group, uint32_t first, uint32_t last, uint32_t curr)
+{
+ if (curr > last) {
+ return;
+ }
+ if (curr >= first) {
+ group.select(p, op);
+ }
+ Group::GroupList list = group.groups();
+ for (uint32_t i(0), m(group.getChildrenSize()); i < m; ++i) {
+ selectGroups(p, op, *list[i], first, last, curr + 1);
+ }
+}
+
+using search::aggregation::Grouping;
+using search::aggregation::GroupingLevel;
+using search::aggregation::Group;
+using search::expression::ExpressionTree;
+using search::expression::ExpressionNode;
+using search::expression::AttributeNode;
+using search::expression::EnumResultNode;
+using search::expression::EnumResultNodeVector;
+using search::expression::StringResultNode;
+using search::expression::ResultNode;
+using search::StringAttribute;
+
+class EnumConverter : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+{
+private:
+ Grouping &_grouping;
+ uint32_t _level;
+public:
+ EnumConverter(Grouping & g, uint32_t level) : _grouping(g), _level(level) { }
+ virtual void execute(vespalib::Identifiable &obj) {
+ Group &group = static_cast<Group &>(obj);
+ uint32_t tmplevel = _level;
+ if (group.hasId()) {
+ if (group.getId().inherits(EnumResultNode::classId)) {
+ const EnumResultNode & er = static_cast<const EnumResultNode &>(group.getId());
+ const Grouping::GroupingLevelList &gll = _grouping.getLevels();
+ const GroupingLevel & gl = gll[_level];
+ const ExpressionNode::LP & en = gl.getExpression().getRoot();
+ const AttributeNode & an = static_cast<const AttributeNode &>(*en);
+ StringResultNode srn((static_cast<const StringAttribute *>(an.getAttribute()))->getFromEnum(er.getEnum()));
+ group.setId(srn);
+ }
+ tmplevel++;
+ }
+ EnumConverter enumConverter(_grouping, tmplevel);
+ Group::GroupList list = group.groups();
+ for (uint32_t i(0), m(group.getChildrenSize()); i < m; ++i) {
+ list[i]->select(enumConverter, enumConverter);
+ }
+ }
+ virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(Group::classId); }
+};
+
+class GlobalIdConverter : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+{
+private:
+ const IDocumentMetaStore &_metaStore;
+public:
+ GlobalIdConverter(const IDocumentMetaStore &metaStore) : _metaStore(metaStore) {}
+ virtual void execute(vespalib::Identifiable & obj) {
+ FS4Hit & hit = static_cast<FS4Hit &>(obj);
+ document::GlobalId gid;
+ _metaStore.getGid(hit.getDocId(), gid);
+ hit.setGlobalId(gid);
+ LOG(debug, "GlobalIdConverter: lid(%u) -> gid(%s)", hit.getDocId(), hit.getGlobalId().toString().c_str());
+ }
+ virtual bool check(const vespalib::Identifiable & obj) const {
+ return obj.inherits(FS4Hit::classId);
+ }
+};
+
+
+} // namespace search::aggregation::<unnamed>
+
+IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, Grouping, vespalib::Identifiable);
+
+Grouping::Grouping()
+ : _id(0),
+ _valid(true),
+ _all(false),
+ _topN(-1),
+ _firstLevel(0),
+ _lastLevel(0),
+ _levels(),
+ _root(),
+ _clock(NULL),
+ _timeOfDoom(0)
+{
+}
+
+void
+Grouping::selectMembers(const vespalib::ObjectPredicate &predicate,
+ vespalib::ObjectOperation &operation)
+{
+ for (size_t i(0), m(_levels.size()); i < m; i++) {
+ _levels[i].select(predicate, operation);
+ }
+ selectGroups(predicate, operation, _root, _firstLevel, _lastLevel, 0);
+}
+
+void
+Grouping::prune(const Grouping & b)
+{
+ _root.prune(b._root, b._lastLevel, 0);
+}
+
+void
+Grouping::mergePartial(const Grouping & b)
+{
+ _root.mergePartial(_levels, _firstLevel, _lastLevel, 0, b._root);
+}
+
+
+void
+Grouping::merge(Grouping & b)
+{
+ _root.merge(_levels, _firstLevel, 0, b._root);
+}
+
+void
+Grouping::postMerge()
+{
+ _root.postMerge(_levels, _firstLevel, 0);
+}
+
+void
+Grouping::preAggregate(bool isOrdered)
+{
+ for (size_t i(0), m(_levels.size()); i < m; i++) {
+ _levels[i].prepare(this, i, isOrdered);
+ }
+ _root.preAggregate();
+}
+
+void Grouping::aggregate(DocId from, DocId to)
+{
+ preAggregate(false);
+ if (to > from) {
+ for(DocId i(from), m(i + getMaxN(to-from)); i < m; i++) {
+ aggregate(i, 0.0);
+ }
+ }
+ postProcess();
+}
+
+void Grouping::postProcess()
+{
+ postAggregate();
+ postMerge();
+ bool hasEnums(false);
+ for (size_t i(0), m(_levels.size()); !hasEnums && (i < m); i++) {
+ const GroupingLevel & l = _levels[i];
+ const ResultNode & id(l.getExpression().getResult());
+ hasEnums = id.inherits(EnumResultNode::classId) ||
+ id.inherits(EnumResultNodeVector::classId);
+ const Group & g(l.getGroupPrototype());
+ for (size_t j(0), n(g.getAggrSize()); !hasEnums && (j < n); j++) {
+ const ResultNode & r(g.getAggregationResult(j).getResult());
+ hasEnums = r.inherits(EnumResultNode::classId) ||
+ r.inherits(EnumResultNodeVector::classId);
+ }
+ }
+ if (hasEnums) {
+ EnumConverter enumConverter(*this, 0);
+ _root.select(enumConverter, enumConverter);
+ }
+ sortById();
+}
+
+void Grouping::aggregateWithoutClock(const RankedHit * rankedHit, unsigned int len) {
+ for(unsigned int i(0); i < len; i++) {
+ aggregate(rankedHit[i]._docId, rankedHit[i]._rankValue);
+ }
+}
+
+void Grouping::aggregateWithClock(const RankedHit * rankedHit, unsigned int len) {
+ for(unsigned int i(0); (i < len) && !hasExpired(); i++) {
+ aggregate(rankedHit[i]._docId, rankedHit[i]._rankValue);
+ }
+}
+
+void Grouping::aggregate(const RankedHit * rankedHit, unsigned int len)
+{
+ bool isOrdered(! needResort());
+ preAggregate(isOrdered);
+ HitsAggregationResult::SetOrdered pred;
+ select(pred, pred);
+ if (_clock == NULL) {
+ aggregateWithoutClock(rankedHit, getMaxN(len));
+ } else {
+ aggregateWithClock(rankedHit, getMaxN(len));
+ }
+ postProcess();
+}
+
+void Grouping::aggregate(const RankedHit * rankedHit, unsigned int len, const BitVector * bVec)
+{
+ preAggregate(false);
+ if (_clock == NULL) {
+ aggregateWithoutClock(rankedHit, getMaxN(len));
+ } else {
+ aggregateWithClock(rankedHit, getMaxN(len));
+ }
+ if (bVec != NULL) {
+ unsigned int sz(bVec->size());
+ if (_clock == NULL) {
+ if (getTopN() > 0) {
+ for(DocId d(bVec->getFirstTrueBit()), i(0), m(getMaxN(sz)); (d < sz) && (i < m); d = bVec->getNextTrueBit(d+1), i++) {
+ aggregate(d, 0.0);
+ }
+ } else {
+ for(DocId d(bVec->getFirstTrueBit()); d < sz; d = bVec->getNextTrueBit(d+1)) {
+ aggregate(d, 0.0);
+ }
+ }
+ } else {
+ if (getTopN() > 0) {
+ for(DocId d(bVec->getFirstTrueBit()), i(0), m(getMaxN(sz)); (d < sz) && (i < m) && !hasExpired(); d = bVec->getNextTrueBit(d+1), i++) {
+ aggregate(d, 0.0);
+ }
+ } else {
+ for(DocId d(bVec->getFirstTrueBit()); (d < sz) && !hasExpired(); d = bVec->getNextTrueBit(d+1)) {
+ aggregate(d, 0.0);
+ }
+ }
+ }
+ }
+ postProcess();
+}
+
+void Grouping::aggregate(DocId docId, HitRank rank)
+{
+ _root.aggregate(*this, 0, docId, rank);
+}
+
+void Grouping::aggregate(const document::Document & doc, HitRank rank)
+{
+ _root.aggregate(*this, 0, doc, rank);
+}
+
+void Grouping::convertToGlobalId(const search::IDocumentMetaStore &metaStore)
+{
+ GlobalIdConverter conv(metaStore);
+ select(conv, conv);
+}
+
+void Grouping::postAggregate()
+{
+ _root.postAggregate();
+}
+
+void Grouping::sortById()
+{
+ _root.sortById();
+}
+
+void Grouping::configureStaticStuff(const ConfigureStaticParams & params)
+{
+ if (params._attrCtx != NULL) {
+ AttributeNode::Configure confAttr(*params._attrCtx);
+ select(confAttr, confAttr);
+ }
+
+ if (params._docType != NULL) {
+ DocumentAccessorNode::Configure confDoc(*params._docType);
+ select(confDoc, confDoc);
+ }
+ ExpressionTree::Configure treeConf;
+ select(treeConf, treeConf);
+
+ AggregationResult::Configure aggrConf;
+ select(aggrConf, aggrConf);
+}
+
+void Grouping::cleanupAttributeReferences()
+{
+ AttributeNode::CleanupAttributeReferences cleanupAttr;
+ select(cleanupAttr, cleanupAttr);
+}
+
+void Grouping::cleanTemporary()
+{
+ for (GroupingLevelList::iterator it(_levels.begin()), mt(_levels.end()); it != mt; ++it) {
+ if (it->getExpression().getRoot()->inherits(FunctionNode::classId)) {
+ static_cast<FunctionNode &>(*it->getExpression().getRoot()).reset();
+ }
+ }
+}
+
+bool Grouping::needResort() const
+{
+ bool resort(_root.needResort());
+ for (GroupingLevelList::const_iterator it(_levels.begin()), mt(_levels.end()); !resort && (it != mt); ++it) {
+ resort = it->needResort();
+ }
+ return (resort && getTopN() <= 0);
+}
+
+
+Serializer & Grouping::onSerialize(Serializer & os) const
+{
+ LOG(spam, "Grouping = %s", asString().c_str());
+ return os << _id << _valid << _all << _topN << _firstLevel << _lastLevel << _levels << _root;
+}
+
+Deserializer & Grouping::onDeserialize(Deserializer & is)
+{
+ is >> _id >> _valid >> _all >> _topN >> _firstLevel >> _lastLevel >> _levels >> _root;
+ LOG(spam, "Grouping = %s", asString().c_str());
+ return is;
+}
+
+void
+Grouping::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "id", _id);
+ visit(visitor, "valid", _valid);
+ visit(visitor, "all", _all);
+ visit(visitor, "topN", _topN);
+ visit(visitor, "firstLevel", _firstLevel);
+ visit(visitor, "lastLevel", _lastLevel);
+ visit(visitor, "levels", _levels);
+ visit(visitor, "root", _root);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_grouping() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/grouping.h b/searchlib/src/vespa/searchlib/aggregation/grouping.h
new file mode 100644
index 00000000000..b9024c384bf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/grouping.h
@@ -0,0 +1,93 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "groupinglevel.h"
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/common/idocumentmetastore.h>
+#include <vespa/searchlib/common/rankedhit.h>
+#include <vespa/vespalib/util/clock.h>
+
+namespace search {
+namespace aggregation {
+
+/**
+ * This class represents a top-level grouping request.
+ **/
+class Grouping : public vespalib::Identifiable
+{
+public:
+ typedef std::vector<GroupingLevel> GroupingLevelList;
+ typedef std::unique_ptr<Grouping> UP;
+
+private:
+ uint32_t _id; // client id for this grouping
+ bool _valid; // is this grouping object valid?
+ bool _all; // if true, group all document, not just hits (streaming only)
+ int64_t _topN; // hits to process per search node
+ uint32_t _firstLevel; // first processing level this iteration (levels before considered frozen)
+ uint32_t _lastLevel; // last processing level this iteration
+ GroupingLevelList _levels; // grouping parameters per level
+ Group _root; // the grouping tree
+ const vespalib::Clock *_clock; // An optional clock to be used for timeout handling.
+ fastos::TimeStamp _timeOfDoom; // Used if clock is specified. This is time when request expires.
+
+ bool hasExpired() const { return _clock->getTimeNS() >= _timeOfDoom; }
+ void aggregateWithoutClock(const RankedHit * rankedHit, unsigned int len);
+ void aggregateWithClock(const RankedHit * rankedHit, unsigned int len);
+ void postProcess();
+public:
+ DECLARE_IDENTIFIABLE_NS2(search, aggregation, Grouping);
+ DECLARE_NBO_SERIALIZE;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ Grouping();
+
+ Grouping unchain() const { return *this; }
+
+ Grouping &setId(unsigned int i) { _id = i; return *this; }
+ Grouping &invalidate() { _valid = false; return *this; }
+ Grouping &setAll(bool v) { _all = v; return *this; }
+ Grouping &setTopN(int64_t v) { _topN = v; return *this; }
+ Grouping &setFirstLevel(unsigned int level) { _firstLevel = level; return *this; }
+ Grouping &setLastLevel(unsigned int level) { _lastLevel = level; return *this; }
+ Grouping &addLevel(const GroupingLevel &level) { _levels.push_back(level); return *this; }
+ Grouping &setRoot(const Group &root_) { _root = root_; return *this; }
+ Grouping &setClock(const vespalib::Clock * clock) { _clock = clock; return *this; }
+ Grouping &setTimeOfDoom(fastos::TimeStamp timeOfDoom) { _timeOfDoom = timeOfDoom; return *this; }
+
+ unsigned int getId() const { return _id; }
+ bool valid() const { return _valid; }
+ bool getAll() const { return _all; }
+ int64_t getTopN() const { return _topN; }
+ size_t getMaxN(size_t n) const { return std::min(n, static_cast<size_t>(getTopN())); }
+ uint32_t getFirstLevel() const { return _firstLevel; }
+ uint32_t getLastLevel() const { return _lastLevel; }
+ const GroupingLevelList &getLevels() const { return _levels; }
+ const Group &getRoot() const { return _root; }
+ bool needResort() const;
+
+ GroupingLevelList &levels() { return _levels; }
+ Group &root() { return _root; }
+
+ virtual void selectMembers(const vespalib::ObjectPredicate &predicate,
+ vespalib::ObjectOperation &operation);
+
+ void merge(Grouping & b);
+ void mergePartial(const Grouping & b);
+ void postMerge();
+ void preAggregate(bool isOrdered);
+ void prune(const Grouping & b);
+ void aggregate(DocId from, DocId to);
+ void aggregate(const RankedHit * rankedHit, unsigned int len);
+ void aggregate(const RankedHit * rankedHit, unsigned int len, const BitVector * bVec);
+ void aggregate(DocId docId, HitRank rank = 0);
+ void aggregate(const document::Document & doc, HitRank rank = 0);
+ void convertToGlobalId(const search::IDocumentMetaStore &metaStore);
+ void postAggregate();
+ void sortById();
+ void cleanTemporary();
+ void configureStaticStuff(const ConfigureStaticParams & params);
+ void cleanupAttributeReferences();
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/groupinglevel.cpp b/searchlib/src/vespa/searchlib/aggregation/groupinglevel.cpp
new file mode 100644
index 00000000000..bb801633b26
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/groupinglevel.cpp
@@ -0,0 +1,109 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/aggregation/groupinglevel.h>
+#include <vespa/searchlib/aggregation/grouping.h>
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace aggregation {
+
+using expression::ResultNodeVector;
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, GroupingLevel, vespalib::Identifiable);
+
+GroupingLevel::GroupingLevel() :
+ _maxGroups(-1),
+ _precision(-1),
+ _isOrdered(false),
+ _frozen(false),
+ _classify(),
+ _collect(),
+ _grouper(NULL)
+{
+}
+
+Serializer & GroupingLevel::onSerialize(Serializer & os) const
+{
+ return os << _maxGroups << _precision << _classify << _collect;
+}
+
+Deserializer & GroupingLevel::onDeserialize(Deserializer & is)
+{
+ return is >> _maxGroups >> _precision >> _classify >> _collect;
+}
+
+void
+GroupingLevel::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "maxGroups", _maxGroups);
+ visit(visitor, "precision", _precision);
+ visit(visitor, "classify", _classify);
+ visit(visitor, "collect", _collect);
+}
+
+void GroupingLevel::selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation)
+{
+ _classify.select(predicate, operation);
+ _collect.select(predicate, operation);
+}
+
+GroupingLevel::Grouper::Grouper(const Grouping * grouping, uint32_t level) :
+ _grouping(grouping),
+ _level(level),
+ _frozen(_level < _grouping->getFirstLevel()),
+ _hasNext(_level < _grouping->getLevels().size()),
+ _doNext(_level < _grouping->getLastLevel())
+{
+}
+
+bool GroupingLevel::Grouper::isFrosen(size_t level) const
+{
+ return level < _grouping->getFirstLevel();
+}
+
+bool GroupingLevel::Grouper::hasNext(size_t level) const
+{
+ return level < _grouping->getLevels().size();
+}
+
+template<typename Doc>
+void GroupingLevel::SingleValueGrouper::groupDoc(Group & g, const ResultNode & result, const Doc & doc, HitRank rank) const
+{
+ Group * next = g.groupSingle(result, rank, _grouping->getLevels()[_level]);
+ if ((next != NULL) && doNext()) { // do next level ?
+ next->aggregate(*_grouping, _level + 1, doc, rank);
+ }
+}
+
+template<typename Doc>
+void GroupingLevel::MultiValueGrouper::groupDoc(Group & g, const ResultNode & result, const Doc & doc, HitRank rank) const
+{
+ const ResultNodeVector & rv(static_cast<const ResultNodeVector &>(result));
+ for (size_t i(0), m(rv.size()); i < m; i++) {
+ const ResultNode & sr(rv.get(i));
+ SingleValueGrouper::groupDoc(g, sr, doc, rank);
+ }
+}
+
+void GroupingLevel::prepare(const Grouping * grouping, uint32_t level, bool isOrdered_)
+{
+ _isOrdered = isOrdered_;
+ _frozen = level < grouping->getFirstLevel();
+ if (_classify.getResult().inherits(ResultNodeVector::classId)) {
+ _grouper.reset(new MultiValueGrouper(grouping, level));
+ } else {
+ _grouper.reset(new SingleValueGrouper(grouping, level));
+ }
+}
+
+// template<> void GroupingLevel::MultiValueGrouper::groupDoc(Group & g, const ResultNode::CP & result, const document::Document & doc, HitRank rank, bool isOrdered) const;
+// template<> void GroupingLevel::MultiValueGrouper::groupDoc(Group & g, const ResultNode::CP & result, DocId doc, HitRank rank, bool isOrdered) const;
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_groupinglevel() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/groupinglevel.h b/searchlib/src/vespa/searchlib/aggregation/groupinglevel.h
new file mode 100644
index 00000000000..c4ce462b0ce
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/groupinglevel.h
@@ -0,0 +1,121 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "group.h"
+#include <vespa/searchlib/expression/aggregationrefnode.h>
+
+namespace search {
+namespace aggregation {
+
+class Grouping;
+
+/**
+ * This struct contains information about how grouping should be
+ * performed on a given level in the grouping tree. The Grouping class
+ * holds an array of these, one for each level in the tree below the
+ * root.
+ **/
+class GroupingLevel : public vespalib::Identifiable
+{
+private:
+ class Grouper {
+ public:
+ virtual ~Grouper() { }
+ virtual void group(Group & group, const ResultNode & result, DocId doc, HitRank rank) const = 0;
+ virtual void group(Group & group, const ResultNode & result, const document::Document & doc, HitRank rank) const = 0;
+ virtual Grouper * clone() const = 0;
+ protected:
+ Grouper(const Grouping * grouping, uint32_t level);
+ bool isFrozen() const { return _frozen; }
+ bool hasNext() const { return _hasNext; }
+ bool doNext() const { return _doNext; }
+ bool isFrosen(size_t level) const;
+ bool hasNext(size_t level) const;
+ const Grouping * _grouping;
+ uint32_t _level;
+ bool _frozen;
+ bool _hasNext;
+ bool _doNext;
+ };
+ class SingleValueGrouper : public Grouper {
+ public:
+ SingleValueGrouper(const Grouping * grouping, uint32_t level) : Grouper(grouping, level) { }
+ protected:
+ template<typename Doc>
+ void groupDoc(Group & group, const ResultNode & result, const Doc & doc, HitRank rank) const;
+ virtual void group(Group & g, const ResultNode & result, DocId doc, HitRank rank) const {
+ groupDoc(g, result, doc, rank);
+ }
+ virtual void group(Group & g, const ResultNode & result, const document::Document & doc, HitRank rank) const {
+ groupDoc(g, result, doc, rank);
+ }
+ virtual SingleValueGrouper * clone() const { return new SingleValueGrouper(*this); }
+ };
+ class MultiValueGrouper : public SingleValueGrouper {
+ public:
+ MultiValueGrouper(const Grouping * grouping, uint32_t level) : SingleValueGrouper(grouping, level) { }
+ private:
+ template<typename Doc>
+ void groupDoc(Group & group, const ResultNode & result, const Doc & doc, HitRank rank) const;
+ virtual void group(Group & g, const ResultNode & result, DocId doc, HitRank rank) const {
+ groupDoc(g, result, doc, rank);
+ }
+ virtual void group(Group & g, const ResultNode & result, const document::Document & doc, HitRank rank) const {
+ groupDoc(g, result, doc, rank);
+ }
+ virtual MultiValueGrouper * clone() const { return new MultiValueGrouper(*this); }
+ };
+ int64_t _maxGroups;
+ int64_t _precision;
+ bool _isOrdered;
+ bool _frozen;
+ search::expression::ExpressionTree _classify;
+ Group _collect;
+
+ vespalib::CloneablePtr<Grouper> _grouper;
+public:
+
+ GroupingLevel();
+ DECLARE_IDENTIFIABLE_NS2(search, aggregation, GroupingLevel);
+ DECLARE_NBO_SERIALIZE;
+
+ GroupingLevel unchain() const { return *this; }
+
+ GroupingLevel &setMaxGroups(int64_t maxGroups) {
+ _maxGroups = maxGroups;
+ if ((maxGroups == -1) || (maxGroups > _precision)) {
+ _precision = maxGroups;
+ }
+ return *this;
+ }
+ GroupingLevel & freeze() { _frozen = true; return *this; }
+ GroupingLevel &setPresicion(int64_t precision) { _precision = precision; return *this; }
+ GroupingLevel &setExpression(const ExpressionNode::CP &root) { _classify = root; return *this; }
+ GroupingLevel &addResult(const ExpressionNode::CP &result) { _collect.addResult(result); return *this; }
+ GroupingLevel &addAggregationResult(const ExpressionNode::CP &aggr) { _collect.addAggregationResult(aggr); return *this; }
+ GroupingLevel &addOrderBy(const ExpressionNode::CP & orderBy, bool ascending) { _collect.addOrderBy(orderBy, ascending); return *this; }
+ bool needResort() const { return _collect.needResort(); }
+
+ int64_t getMaxGroups() const { return _maxGroups; }
+ int64_t getPrecision() const { return _precision; }
+ bool isFrozen() const { return _frozen; }
+ bool allowMoreGroups(size_t sz) const { return (!_frozen && (!_isOrdered || (sz < (uint64_t)_precision))); }
+ const ExpressionTree & getExpression() const { return _classify; }
+ const Group &getGroupPrototype() const { return _collect; }
+ void prepare(const Grouping * grouping, uint32_t level, bool isOrdered_);
+
+ Group &groupPrototype() { return _collect; }
+ const Group & groupPrototype() const { return _collect; }
+
+ template<typename Doc>
+ void group(Group & g, const ResultNode & result, const Doc & doc, HitRank rank) const {
+ _grouper->group(g, result, doc, rank);
+ }
+
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ virtual void selectMembers(const vespalib::ObjectPredicate &predicate, vespalib::ObjectOperation &operation);
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/hit.cpp b/searchlib/src/vespa/searchlib/aggregation/hit.cpp
new file mode 100644
index 00000000000..9bb7cf95757
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/hit.cpp
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "hit.h"
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace aggregation {
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+IMPLEMENT_IDENTIFIABLE_ABSTRACT_NS2(search, aggregation, Hit, vespalib::Identifiable);
+
+static FieldBase _G_rankField("rank");
+
+Serializer &
+Hit::onSerialize(Serializer &os) const
+{
+ return os.put(_G_rankField, _rank);
+}
+
+Deserializer &
+Hit::onDeserialize(Deserializer &is)
+{
+ return is.get(_G_rankField, _rank);
+}
+
+int
+Hit::onCmp(const Identifiable &b) const
+{
+ const Hit &h = (const Hit &)b;
+ return (_rank > h._rank) ? -1 : ((_rank < h._rank) ? 1 : 0);
+}
+
+void
+Hit::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, _G_rankField.getName(), _rank);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_hit() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/hit.h b/searchlib/src/vespa/searchlib/aggregation/hit.h
new file mode 100644
index 00000000000..2fbed2510f2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/hit.h
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/common/identifiable.h>
+#include <vespa/searchlib/common/hitrank.h>
+#include "rawrank.h"
+
+
+namespace search {
+namespace aggregation {
+
+class Hit : public vespalib::Identifiable
+{
+private:
+ RawRank _rank;
+
+public:
+ DECLARE_IDENTIFIABLE_ABSTRACT_NS2(search, aggregation, Hit);
+ DECLARE_NBO_SERIALIZE;
+ typedef vespalib::IdentifiablePtr<Hit> CP;
+ typedef std::unique_ptr<Hit> UP;
+
+ Hit() : _rank() {}
+ Hit(RawRank rank) : _rank(rank) {}
+ RawRank getRank() const { return _rank; }
+ virtual Hit *clone() const = 0;
+ virtual int onCmp(const Identifiable &b) const;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/hitlist.cpp b/searchlib/src/vespa/searchlib/aggregation/hitlist.cpp
new file mode 100644
index 00000000000..7994856ec46
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/hitlist.cpp
@@ -0,0 +1,152 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "hitsaggregationresult.h"
+#include <vespa/vespalib/objects/visit.h>
+#include "fs4hit.h"
+#include "vdshit.h"
+#include <algorithm>
+#include <vespa/vespalib/objects/objectpredicate.h>
+#include <vespa/vespalib/objects/objectoperation.h>
+
+namespace search {
+namespace aggregation {
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, HitList, ResultNode);
+
+HitList & HitList::addHit(const FS4Hit & hit, uint32_t maxHits)
+{
+ if (_fs4hits.size() < maxHits) {
+ _fs4hits.push_back(hit);
+ if (_fs4hits.size() == maxHits) {
+ std::make_heap(_fs4hits.begin(), _fs4hits.end());
+ }
+ } else {
+ if (hit.cmp(_fs4hits.front()) < 0) {
+ std::pop_heap(_fs4hits.begin(), _fs4hits.end());
+ _fs4hits.push_back(hit);
+ std::push_heap(_fs4hits.begin(), _fs4hits.end());
+ }
+ }
+ return *this;
+}
+
+HitList & HitList::addHit(const VdsHit & hit, uint32_t maxHits)
+{
+ if (_vdshits.size() < maxHits) {
+ _vdshits.push_back(hit);
+ if (_vdshits.size() == maxHits) {
+ std::make_heap(_vdshits.begin(), _vdshits.end());
+ }
+ } else {
+ if (hit.cmp(_vdshits.front()) < 0) {
+ std::pop_heap(_vdshits.begin(), _vdshits.end());
+ _vdshits.push_back(hit);
+ std::push_heap(_vdshits.begin(), _vdshits.end());
+ }
+ }
+ return *this;
+}
+
+void
+HitList::onMerge(const HitList & b)
+{
+ _fs4hits.insert(_fs4hits.end(), b._fs4hits.begin(), b._fs4hits.end());
+ _vdshits.insert(_vdshits.end(), b._vdshits.begin(), b._vdshits.end());
+}
+
+void
+HitList::sort()
+{
+ std::sort(_fs4hits.begin(), _fs4hits.end());
+ std::sort(_vdshits.begin(), _vdshits.end());
+}
+
+void
+HitList::postMerge(uint32_t maxHits)
+{
+ sort();
+ if (_fs4hits.size() > maxHits) {
+ _fs4hits.resize(maxHits);
+ }
+ if (_vdshits.size() > maxHits) {
+ _vdshits.resize(maxHits);
+ }
+}
+
+Serializer &
+HitList::onSerialize(Serializer & os) const
+{
+ os << (uint32_t)(_fs4hits.size() + _vdshits.size());
+ for (uint32_t i(0); i < _fs4hits.size(); i++) {
+ Hit::CP hit(const_cast<FS4Hit *>(&_fs4hits[i]));
+ os << hit;
+ hit.release();
+ }
+ for (uint32_t i(0); i < _vdshits.size(); i++) {
+ Hit::CP hit(const_cast<VdsHit *>(&_vdshits[i]));
+ os << hit;
+ hit.release();
+ }
+ return os;
+}
+
+Deserializer &
+HitList::onDeserialize(Deserializer & is)
+{
+ uint32_t count(0);
+
+ is >> count;
+ for (uint32_t i(0); i < count; i++) {
+ Hit::CP hit;
+ is >> hit;
+ if (hit->inherits(FS4Hit::classId)) {
+ _fs4hits.push_back(static_cast<const FS4Hit &>(*hit));
+ } else {
+ _vdshits.push_back(static_cast<const VdsHit &>(*hit));
+ }
+ }
+ return is;
+}
+
+void
+HitList::clear()
+{
+ _fs4hits.clear();
+ _vdshits.clear();
+}
+
+void
+HitList::visitMembers(vespalib::ObjectVisitor & visitor) const
+{
+ visit(visitor, "fs4hits", _fs4hits);
+ visit(visitor, "vdshits", _vdshits);
+}
+
+void
+HitList::selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation)
+{
+ for (uint32_t i(0); i < _fs4hits.size(); ++i) {
+ _fs4hits[i].select(predicate, operation);
+ }
+ for (uint32_t i(0); i < _vdshits.size(); ++i) {
+ _vdshits[i].select(predicate, operation);
+ }
+}
+
+void
+HitList::set(const ResultNode & rhs)
+{
+ (void) rhs;
+ throw std::runtime_error("HitList::set(const ResultNode & rhs) not implemented.");
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_hitlist() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/hitlist.h b/searchlib/src/vespa/searchlib/aggregation/hitlist.h
new file mode 100644
index 00000000000..a63e923d05e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/hitlist.h
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/common/identifiable.h>
+#include "fs4hit.h"
+#include "vdshit.h"
+
+namespace search {
+namespace aggregation {
+
+
+class HitList : public ResultNode
+{
+public:
+private:
+ typedef std::vector<FS4Hit> Fs4V;
+ typedef std::vector<VdsHit> VdsV;
+ std::vector<FS4Hit> _fs4hits;
+ std::vector<VdsHit> _vdshits;
+
+ virtual int64_t onGetInteger(size_t index) const { (void) index; return 0; }
+ virtual double onGetFloat(size_t index) const { (void) index; return 0.0; }
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { (void) index; return buf; }
+ virtual size_t hash() const { return 0; }
+ virtual void set(const ResultNode & rhs);
+ virtual void decode(const void * buf) {
+ _fs4hits = *static_cast<const Fs4V *>(buf);
+ _vdshits = *static_cast<const VdsV *>(static_cast<const void *>(static_cast<const uint8_t *>(buf)+sizeof(_fs4hits)));
+ }
+ virtual void swap(void * buf) {
+ static_cast<Fs4V *>(buf)->swap(_fs4hits);
+ static_cast<VdsV *>(static_cast<void *>(static_cast<uint8_t *>(buf)+sizeof(_fs4hits)))->swap(_vdshits);
+ }
+ virtual void encode(void * buf) const {
+ *static_cast<Fs4V *>(buf) = _fs4hits;
+ *static_cast<VdsV *>(static_cast<void *>(static_cast<uint8_t *>(buf)+sizeof(_fs4hits))) = _vdshits;
+ }
+ virtual void create(void * buf) const {
+ new (buf) Fs4V();
+ new (static_cast<uint8_t *>(buf)+sizeof(_fs4hits)) VdsV();
+ }
+ virtual void destroy(void * buf) const {
+ static_cast<Fs4V *>(buf)->Fs4V::~Fs4V();
+ static_cast<VdsV *>(static_cast<void *>(static_cast<uint8_t *>(buf)+sizeof(_fs4hits)))->VdsV::~VdsV();
+ }
+ virtual size_t getRawByteSize() const { return sizeof(_fs4hits) + sizeof(_vdshits); }
+public:
+ DECLARE_IDENTIFIABLE_NS2(search, aggregation, HitList);
+ HitList * clone() const { return new HitList(*this); }
+ DECLARE_NBO_SERIALIZE;
+ HitList() :
+ _fs4hits(),
+ _vdshits()
+ {}
+ uint32_t size() const { return (_fs4hits.size() + _vdshits.size()); }
+ bool empty() const { return (_vdshits.empty() && _fs4hits.empty()); }
+ const Hit & front() const { return ((_fs4hits.size() > 0) ? (static_cast<const Hit &>(_fs4hits[0])) : (static_cast<const Hit &>(_vdshits[0]))); }
+
+ void postMerge(uint32_t maxHits);
+ void onMerge(const HitList & b);
+ void clear();
+
+ HitList & addHit(const FS4Hit & hit, uint32_t maxHits);
+ HitList & addHit(const VdsHit & hit, uint32_t maxHits);
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ virtual void selectMembers(const vespalib::ObjectPredicate &predicate,
+ vespalib::ObjectOperation &operation);
+ void sort();
+ HitList & sort2() { sort(); return *this; }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.cpp b/searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.cpp
new file mode 100644
index 00000000000..bdc705a9a7c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.cpp
@@ -0,0 +1,119 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "hitsaggregationresult.h"
+#include <vespa/vespalib/objects/visit.h>
+#include "fs4hit.h"
+#include "vdshit.h"
+#include <vespa/vespalib/objects/objectpredicate.h>
+#include <vespa/vespalib/objects/objectoperation.h>
+LOG_SETUP(".searchlib.aggregation.hitsaggregationresult");
+
+namespace search {
+namespace aggregation {
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, HitsAggregationResult, AggregationResult);
+
+void HitsAggregationResult::onPrepare(const ResultNode & result, bool useForInit)
+{
+ (void) result;
+ (void) useForInit;
+}
+
+void
+HitsAggregationResult::onMerge(const AggregationResult &b)
+{
+ const HitsAggregationResult &rhs = (const HitsAggregationResult &)b;
+ _hits.onMerge(rhs._hits);
+}
+
+void
+HitsAggregationResult::onAggregate(const ResultNode &result, DocId docId, HitRank rank)
+{
+ (void) result;
+ if ( ! _isOrdered || (_hits.size() < _maxHits)) {
+ _hits.addHit(FS4Hit(docId, rank), _maxHits);
+ }
+}
+
+void
+HitsAggregationResult::onAggregate(const ResultNode & result, const document::Document & doc, HitRank rank)
+{
+ (void) result;
+ LOG(spam, "Filling vdshit for %s hits=%lu, maxHits=%u", doc.getId().toString().c_str(), (unsigned long)_hits.size(), _maxHits);
+ if (!_isOrdered || (_hits.size() < _maxHits)) {
+ VdsHit hit(doc.getId().toString(), rank);
+ vespalib::ConstBufferRef docsum(_summaryGenerator->fillSummary(0, _summaryClass));
+ hit.setSummary(docsum.c_str(), docsum.size());
+ LOG(spam, "actually filled %s with summary %s with blob of size %lu", doc.getId().toString().c_str(),_summaryClass.c_str(), docsum.size() );
+ _hits.addHit(hit, _maxHits);
+ }
+}
+
+void
+HitsAggregationResult::onAggregate(const ResultNode & result)
+{
+ (void) result;
+ assert(false);
+}
+
+void
+HitsAggregationResult::onReset()
+{
+ _hits.clear();
+}
+
+Serializer &
+HitsAggregationResult::onSerialize(Serializer & os) const
+{
+ AggregationResult::onSerialize(os);
+ os << _summaryClass << _maxHits;
+ _hits.serialize(os);
+ return os;
+}
+
+Deserializer &
+HitsAggregationResult::onDeserialize(Deserializer & is)
+{
+ AggregationResult::onDeserialize(is);
+ is >> _summaryClass >> _maxHits;
+ _hits.deserialize(is);
+ if (_maxHits == 0) {
+ _maxHits = std::numeric_limits<uint32_t>::max();
+ }
+ return is;
+}
+
+void
+HitsAggregationResult::visitMembers(vespalib::ObjectVisitor & visitor) const
+{
+ AggregationResult::visitMembers(visitor);
+ visit(visitor, "summaryClass", _summaryClass);
+ visit(visitor, "maxHits", _maxHits);
+ _hits.visitMembers(visitor);
+}
+
+void
+HitsAggregationResult::selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation)
+{
+ AggregationResult::selectMembers(predicate, operation);
+ _hits.selectMembers(predicate, operation);
+}
+
+const ResultNode & HitsAggregationResult::onGetRank() const
+{
+ if ( ! _hits.empty() ) {
+ _bestHitRank = _hits.front().getRank();
+ }
+ return _bestHitRank;
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_hitsaggregationresult() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.h
new file mode 100644
index 00000000000..5b533360d01
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.h
@@ -0,0 +1,76 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "aggregationresult.h"
+#include "hitlist.h"
+
+namespace search {
+namespace aggregation {
+
+using search::expression::FloatResultNode;
+
+class HitsAggregationResult : public AggregationResult
+{
+public:
+ typedef vespalib::string SummaryClassType;
+
+ class SummaryGenerator
+ {
+ public:
+ virtual ~SummaryGenerator() { }
+ virtual vespalib::ConstBufferRef fillSummary(DocId lid, const SummaryClassType & summaryClass) = 0;
+ };
+
+private:
+ virtual void onPrepare(const ResultNode & result, bool useForInit);
+
+ virtual void onAggregate(const ResultNode &result, DocId docId, HitRank rank);
+ virtual void onAggregate(const ResultNode &result, const document::Document & doc, HitRank rank);
+ virtual const ResultNode & onGetRank() const;
+
+ SummaryClassType _summaryClass;
+ uint32_t _maxHits;
+ HitList _hits;
+ bool _isOrdered;
+ mutable FloatResultNode _bestHitRank;
+ SummaryGenerator *_summaryGenerator;
+
+public:
+ class SetOrdered : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+ {
+ private:
+ virtual void execute(vespalib::Identifiable &obj) { static_cast<HitsAggregationResult &>(obj)._isOrdered = true; }
+ virtual bool check(const vespalib::Identifiable &obj) const { return obj.getClass().inherits(HitsAggregationResult::classId); }
+ };
+
+ DECLARE_AGGREGATIONRESULT(HitsAggregationResult);
+ HitsAggregationResult() :
+ AggregationResult(),
+ _summaryClass("default"),
+ _maxHits(std::numeric_limits<uint32_t>::max()),
+ _hits(),
+ _isOrdered(false),
+ _bestHitRank(),
+ _summaryGenerator(0)
+ {}
+ virtual void postMerge() { _hits.postMerge(_maxHits); }
+ void setSummaryGenerator(SummaryGenerator & summaryGenerator) { _summaryGenerator = &summaryGenerator; }
+ const SummaryClassType & getSummaryClass() const { return _summaryClass; }
+ HitsAggregationResult setSummaryClass(const SummaryClassType & summaryClass) { _summaryClass = summaryClass; return *this; }
+ HitsAggregationResult &setMaxHits(uint32_t maxHits) {
+ _maxHits = (maxHits == 0) ? std::numeric_limits<uint32_t>::max() : maxHits;
+ return *this;
+ }
+ HitsAggregationResult & addHit(const FS4Hit &hit) { _hits.addHit(hit, _maxHits); return *this; }
+ HitsAggregationResult & addHit(const VdsHit &hit) { _hits.addHit(hit, _maxHits); return *this; }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ virtual void selectMembers(const vespalib::ObjectPredicate &predicate,
+ vespalib::ObjectOperation &operation);
+ HitsAggregationResult & sort() { _hits.sort(); return *this; }
+ virtual const ResultNode & getResult() const { return _hits; }
+ virtual ResultNode & getResult() { return _hits; }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/maxaggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/maxaggregationresult.h
new file mode 100644
index 00000000000..4f63b9decec
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/maxaggregationresult.h
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/aggregation/aggregationresult.h>
+
+namespace search {
+namespace aggregation {
+
+class MaxAggregationResult : public AggregationResult
+{
+public:
+ DECLARE_AGGREGATIONRESULT(MaxAggregationResult);
+ MaxAggregationResult() : AggregationResult(), _max() { }
+ MaxAggregationResult(const SingleResultNode & max) : AggregationResult(), _max(max) { }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ const SingleResultNode & getMax() const { return *_max; }
+private:
+ virtual const ResultNode & onGetRank() const { return getMax(); }
+ virtual void onPrepare(const ResultNode & result, bool useForInit);
+ SingleResultNode::CP _max;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/minaggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/minaggregationresult.h
new file mode 100644
index 00000000000..db909cce3f7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/minaggregationresult.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/aggregation/aggregationresult.h>
+
+namespace search {
+namespace aggregation {
+
+class MinAggregationResult : public AggregationResult
+{
+public:
+ DECLARE_AGGREGATIONRESULT(MinAggregationResult);
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ const SingleResultNode & getMin() const { return *_min; }
+private:
+ virtual const ResultNode & onGetRank() const { return getMin(); }
+ virtual void onPrepare(const ResultNode & result, bool useForInit);
+ SingleResultNode::CP _min;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/modifiers.cpp b/searchlib/src/vespa/searchlib/aggregation/modifiers.cpp
new file mode 100644
index 00000000000..9ce8da41ef3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/modifiers.cpp
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/aggregation/modifiers.h>
+#include <vespa/searchlib/aggregation/grouping.h>
+#include <vespa/searchlib/expression/multiargfunctionnode.h>
+
+using namespace search::expression;
+
+namespace search {
+namespace aggregation {
+
+bool Attribute2DocumentAccessor::check(const vespalib::Identifiable &obj) const
+{
+ return obj.getClass().inherits(GroupingLevel::classId) || obj.getClass().inherits(AggregationResult::classId) || obj.getClass().inherits(MultiArgFunctionNode::classId);
+}
+
+void Attribute2DocumentAccessor::execute(vespalib::Identifiable &obj)
+{
+ if (obj.getClass().inherits(GroupingLevel::classId)) {
+ GroupingLevel & g(static_cast<GroupingLevel &>(obj));
+ if (g.getExpression().getRoot()->inherits(AttributeNode::classId)) {
+ g.setExpression(new DocumentFieldNode(static_cast<const AttributeNode &>(*g.getExpression().getRoot()).getAttributeName()));
+ } else {
+ g.getExpression().getRoot()->select(*this, *this);
+ }
+ g.groupPrototype().select(*this, *this);
+ } else if(obj.getClass().inherits(AggregationResult::classId)) {
+ AggregationResult & a(static_cast<AggregationResult &>(obj));
+ ExpressionNode * e(a.getExpression());
+ if (e) {
+ if (e->inherits(AttributeNode::classId)) {
+ a.setExpression(new DocumentFieldNode(static_cast<const AttributeNode &>(*e).getAttributeName()));
+ } else {
+ e->select(*this, *this);
+ }
+ }
+ } else if(obj.getClass().inherits(MultiArgFunctionNode::classId)) {
+ MultiArgFunctionNode::ExpressionNodeVector & v(static_cast<MultiArgFunctionNode &>(obj).expressionNodeVector());
+ for(size_t i(0), m(v.size()); i < m; i++) {
+ ExpressionNode::CP & e(v[i]);
+ if (e->inherits(AttributeNode::classId)) {
+ e.reset(new DocumentFieldNode(static_cast<const AttributeNode &>(*e).getAttributeName()));
+ } else {
+ e->select(*this, *this);
+ }
+ }
+ }
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_modifiers() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/modifiers.h b/searchlib/src/vespa/searchlib/aggregation/modifiers.h
new file mode 100644
index 00000000000..048a8db43b7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/modifiers.h
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/objects/objectoperation.h>
+#include <vespa/vespalib/objects/objectpredicate.h>
+
+namespace search {
+namespace aggregation {
+
+class Attribute2DocumentAccessor : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+{
+private:
+ virtual void execute(vespalib::Identifiable &obj);
+ virtual bool check(const vespalib::Identifiable &obj) const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/perdocexpression.h b/searchlib/src/vespa/searchlib/aggregation/perdocexpression.h
new file mode 100644
index 00000000000..fcea0918af7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/perdocexpression.h
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/integerresultnode.h>
+#include <vespa/searchlib/expression/floatresultnode.h>
+#include <vespa/searchlib/expression/stringresultnode.h>
+#include <vespa/searchlib/expression/nullresultnode.h>
+#include <vespa/searchlib/expression/rawresultnode.h>
+#include <vespa/searchlib/expression/attributenode.h>
+#include <vespa/searchlib/expression/constantnode.h>
+#include <vespa/searchlib/expression/addfunctionnode.h>
+#include <vespa/searchlib/expression/dividefunctionnode.h>
+#include <vespa/searchlib/expression/multiplyfunctionnode.h>
+#include <vespa/searchlib/expression/modulofunctionnode.h>
+#include <vespa/searchlib/expression/minfunctionnode.h>
+#include <vespa/searchlib/expression/maxfunctionnode.h>
+#include <vespa/searchlib/expression/andfunctionnode.h>
+#include <vespa/searchlib/expression/orfunctionnode.h>
+#include <vespa/searchlib/expression/xorfunctionnode.h>
+#include <vespa/searchlib/expression/negatefunctionnode.h>
+#include <vespa/searchlib/expression/sortfunctionnode.h>
+#include <vespa/searchlib/expression/reversefunctionnode.h>
+#include <vespa/searchlib/expression/strlenfunctionnode.h>
+#include <vespa/searchlib/expression/normalizesubjectfunctionnode.h>
+#include <vespa/searchlib/expression/strcatfunctionnode.h>
+#include <vespa/searchlib/expression/numelemfunctionnode.h>
+#include <vespa/searchlib/expression/tostringfunctionnode.h>
+#include <vespa/searchlib/expression/torawfunctionnode.h>
+#include <vespa/searchlib/expression/catfunctionnode.h>
+#include <vespa/searchlib/expression/xorbitfunctionnode.h>
+#include <vespa/searchlib/expression/md5bitfunctionnode.h>
+#include <vespa/searchlib/expression/resultvector.h>
+#include <vespa/searchlib/expression/fixedwidthbucketfunctionnode.h>
+#include <vespa/searchlib/expression/rangebucketpredef.h>
+#include <vespa/searchlib/expression/timestamp.h>
+#include <vespa/searchlib/expression/relevancenode.h>
+#include <vespa/searchlib/expression/zcurve.h>
+#include <vespa/searchlib/expression/debugwaitfunctionnode.h>
+#include <vespa/searchlib/expression/aggregationrefnode.h>
+
+namespace search {
+namespace aggregation {
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/predicates.h b/searchlib/src/vespa/searchlib/aggregation/predicates.h
new file mode 100644
index 00000000000..c6ef6bc554a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/predicates.h
@@ -0,0 +1,47 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "fs4hit.h"
+#include <vespa/vespalib/objects/objectpredicate.h>
+#include <vespa/vespalib/objects/objectoperation.h>
+
+namespace search {
+namespace aggregation {
+
+class CountFS4Hits : public vespalib::ObjectPredicate,
+ public vespalib::ObjectOperation
+{
+private:
+ uint32_t _hitCnt;
+
+public:
+ CountFS4Hits() : _hitCnt(0) {}
+ uint32_t getHitCount() const { return _hitCnt; }
+ virtual bool check(const vespalib::Identifiable &obj) const {
+ return (obj.getClass().id() == FS4Hit::classId);
+ }
+ virtual void execute(vespalib::Identifiable &obj) {
+ (void) obj;
+ ++_hitCnt;
+ }
+};
+
+class FS4HitSetDistributionKey : public vespalib::ObjectPredicate,
+ public vespalib::ObjectOperation
+{
+private:
+ uint32_t _distributionKey;
+
+public:
+ FS4HitSetDistributionKey(uint32_t distributionKey) : _distributionKey(distributionKey) {}
+ virtual bool check(const vespalib::Identifiable &obj) const {
+ return (obj.getClass().id() == FS4Hit::classId);
+ }
+ virtual void execute(vespalib::Identifiable &obj) {
+ static_cast<FS4Hit &>(obj).setDistributionKey(_distributionKey);
+ }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/rawrank.cpp b/searchlib/src/vespa/searchlib/aggregation/rawrank.cpp
new file mode 100644
index 00000000000..b570c7795c0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/rawrank.cpp
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "rawrank.h"
+#include <vespa/vespalib/util/sort.h>
+#include <algorithm>
+
+#if 0
+namespace search {
+namespace aggregation {
+
+RawRank::RawRank(double rank)
+ : _rank()
+{
+ _rank.resize(sizeof(double));
+ vespalib::serializeForSort<vespalib::convertForSort<double, false> >(rank, &_rank[0]);
+}
+
+RawRank::RawRank(const char *buf, uint32_t len)
+ : _rank(buf, buf + len)
+{
+}
+
+int
+RawRank::cmp(const RawRank &rhs) const
+{
+ uint32_t l = std::min(_rank.size(), rhs._rank.size());
+ int diff = memcmp(&_rank[0], &rhs._rank[0], l);
+ if (diff == 0) {
+ diff = (_rank.size() - rhs._rank.size());
+ }
+ return diff;
+}
+
+vespalib::nbostream &
+operator << (vespalib::nbostream &os, const RawRank &rr)
+{
+ return os << rr._rank;
+}
+
+vespalib::nbostream &
+operator >> (vespalib::nbostream &is, RawRank &rr)
+{
+ return is >> rr._rank;
+}
+
+} // namespace search::aggregation
+} // namespace search
+
+#endif
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_rawrank() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/rawrank.h b/searchlib/src/vespa/searchlib/aggregation/rawrank.h
new file mode 100644
index 00000000000..f9ca265d84d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/rawrank.h
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vector>
+
+namespace search {
+namespace aggregation {
+
+#if 0
+/**
+ * Thin wrapper around a rank value represented as a sequence of
+ * bytes.
+ **/
+class RawRank
+{
+private:
+ std::vector<uint8_t> _rank;
+
+public:
+ RawRank() : _rank() {}
+ RawRank(double rank);
+ RawRank(const char *buf, uint32_t len);
+ int cmp(const RawRank &rhs) const;
+ const std::vector<uint8_t> &getRank() const { return _rank; }
+ friend vespalib::nbostream &operator << (vespalib::nbostream &os, const RawRank &rr);
+ friend vespalib::nbostream &operator >> (vespalib::nbostream &is, RawRank &rr);
+};
+#else
+typedef double RawRank;
+#endif
+
+} // namespace search::aggregation
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/sumaggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/sumaggregationresult.h
new file mode 100644
index 00000000000..51a6091b794
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/sumaggregationresult.h
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/aggregation/aggregationresult.h>
+
+namespace search {
+namespace aggregation {
+
+class SumAggregationResult : public AggregationResult
+{
+public:
+ DECLARE_AGGREGATIONRESULT(SumAggregationResult);
+ SumAggregationResult() : AggregationResult(), _sum() { }
+ SumAggregationResult(const SingleResultNode & sum) : AggregationResult(), _sum(sum) { }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ const SingleResultNode & getSum() const { return *_sum; }
+private:
+ virtual const ResultNode & onGetRank() const { return getSum(); }
+ virtual void onPrepare(const ResultNode & result, bool useForInit);
+ SingleResultNode::CP _sum;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/vdshit.cpp b/searchlib/src/vespa/searchlib/aggregation/vdshit.cpp
new file mode 100644
index 00000000000..a1df9646ebd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/vdshit.cpp
@@ -0,0 +1,45 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "vdshit.h"
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace aggregation {
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+static FieldBase _G_docIdField("docId");
+static FieldBase _G_summaryField("summary");
+
+
+IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, VdsHit, Hit);
+
+Serializer &
+VdsHit::onSerialize(Serializer &os) const
+{
+ Hit::onSerialize(os);
+ return os.put(_G_docIdField, _docId) << _summary;
+}
+
+Deserializer &
+VdsHit::onDeserialize(Deserializer &is)
+{
+ Hit::onDeserialize(is);
+ return is.get(_G_docIdField, _docId) >> _summary;
+}
+
+void
+VdsHit::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ Hit::visitMembers(visitor);
+ visit(visitor, _G_docIdField.getName(), _docId);
+ visit(visitor, _G_summaryField.getName(), _summary);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_vdshit() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/vdshit.h b/searchlib/src/vespa/searchlib/aggregation/vdshit.h
new file mode 100644
index 00000000000..89fb1acbae6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/vdshit.h
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "hit.h"
+#include "aggregationresult.h"
+#include <vespa/vespalib/util/array.h>
+
+namespace search {
+namespace aggregation {
+
+class VdsHit : public Hit
+{
+public:
+ typedef vespalib::Array<uint8_t> Summary;
+ typedef vespalib::string DocId;
+ DECLARE_IDENTIFIABLE_NS2(search, aggregation, VdsHit);
+ DECLARE_NBO_SERIALIZE;
+ VdsHit() : Hit(), _docId(), _summary() {}
+ VdsHit(DocId docId, HitRank rank) : Hit(rank), _docId(docId), _summary() {}
+ virtual VdsHit *clone() const { return new VdsHit(*this); }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ const DocId & getDocId() const { return _docId; }
+ const Summary & getSummary() const { return _summary; }
+ VdsHit & setDocId(DocId & docId) { _docId = docId; return *this; }
+ VdsHit & setSummary(const void * buf, size_t sz) {
+ const uint8_t * v(static_cast<const uint8_t *>(buf));
+ Summary n(v, v+sz);
+ _summary.swap(n);
+ return *this;
+ }
+ bool operator < (const VdsHit &b) const { return cmp(b) < 0; }
+
+private:
+ DocId _docId;
+ Summary _summary;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/xoraggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/xoraggregationresult.h
new file mode 100644
index 00000000000..9afc196ac33
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/xoraggregationresult.h
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/aggregation/aggregationresult.h>
+
+namespace search {
+namespace aggregation {
+
+class XorAggregationResult : public AggregationResult
+{
+public:
+ DECLARE_AGGREGATIONRESULT(XorAggregationResult);
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ const Int64ResultNode & getXor() const { return _xor; }
+ XorAggregationResult &setXor(const Int64ResultNode &i) {
+ _xor = i;
+ return *this;
+ }
+private:
+ virtual const ResultNode & onGetRank() const { return getXor(); }
+ virtual void onPrepare(const ResultNode & result, bool useForInit);
+ Int64ResultNode _xor;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/.gitignore b/searchlib/src/vespa/searchlib/attribute/.gitignore
new file mode 100644
index 00000000000..ee8938b6bf4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/.gitignore
@@ -0,0 +1,6 @@
+*.So
+*.exe
+*.ilk
+*.pdb
+.depend*
+Makefile
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
new file mode 100644
index 00000000000..b949943a42b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -0,0 +1,88 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_attribute OBJECT
+ SOURCES
+ address_space.cpp
+ address_space_usage.cpp
+ attribute.cpp
+ attribute_blueprint_factory.cpp
+ attribute_weighted_set_blueprint.cpp
+ attributecontext.cpp
+ attributefactory.cpp
+ attributefile.cpp
+ attributefilebufferwriter.cpp
+ attributefilesavetarget.cpp
+ attributefilewriter.cpp
+ attributeguard.cpp
+ attributeiterators.cpp
+ attributemanager.cpp
+ attributememoryfilebufferwriter.cpp
+ attributememoryfilewriter.cpp
+ attributememorysavetarget.cpp
+ attributesaver.cpp
+ attributevector.cpp
+ attrvector.cpp
+ changevector.cpp
+ configconverter.cpp
+ createarrayfastsearch.cpp
+ createarraystd.cpp
+ createsetfastsearch.cpp
+ createsetstd.cpp
+ createsinglefastsearch.cpp
+ createsinglestd.cpp
+ defines.cpp
+ dociditerator.cpp
+ enumattribute.cpp
+ enumattributesaver.cpp
+ enumcomparator.cpp
+ enumhintsearchcontext.cpp
+ enumstore.cpp
+ enumstorebase.cpp
+ extendableattributes.cpp
+ fixedsourceselector.cpp
+ flagattribute.cpp
+ floatbase.cpp
+ i_document_weight_attribute.cpp
+ iattributemanager.cpp
+ iattributesavetarget.cpp
+ integerbase.cpp
+ ipostinglistsearchcontext.cpp
+ iterator_pack.cpp
+ loadedenumvalue.cpp
+ loadednumericvalue.cpp
+ loadedstringvalue.cpp
+ loadedvalue.cpp
+ multienumattribute.cpp
+ multienumattributesaver.cpp
+ multinumericattribute.cpp
+ multinumericattributesaver.cpp
+ multinumericenumattribute.cpp
+ multinumericpostattribute.cpp
+ multistringattribute.cpp
+ multistringpostattribute.cpp
+ multivalueattribute.cpp
+ multivalueattributesaver.cpp
+ multivaluemapping.cpp
+ numericbase.cpp
+ postingchange.cpp
+ postinglistattribute.cpp
+ postinglistsearchcontext.cpp
+ postinglisttraits.cpp
+ postingstore.cpp
+ predicate_attribute.cpp
+ singleenumattribute.cpp
+ singleenumattributesaver.cpp
+ singlenumericattribute.cpp
+ singlenumericattributesaver.cpp
+ singlenumericenumattribute.cpp
+ singlenumericpostattribute.cpp
+ singlesmallnumericattribute.cpp
+ singlestringattribute.cpp
+ singlestringpostattribute.cpp
+ sourceselector.cpp
+ stringattribute.cpp
+ stringbase.cpp
+ tensorattribute.cpp
+ tensorattributesaver.cpp
+ tensorstore.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/attribute/OWNERS b/searchlib/src/vespa/searchlib/attribute/OWNERS
new file mode 100644
index 00000000000..7066165775a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/OWNERS
@@ -0,0 +1,3 @@
+tegge
+geirst
+balder
diff --git a/searchlib/src/vespa/searchlib/attribute/address_space.cpp b/searchlib/src/vespa/searchlib/attribute/address_space.cpp
new file mode 100644
index 00000000000..c953be81020
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/address_space.cpp
@@ -0,0 +1,20 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "address_space.h"
+#include <iostream>
+
+namespace search {
+
+AddressSpace::AddressSpace(size_t used_, size_t limit_)
+ : _used(used_),
+ _limit(limit_)
+{
+}
+
+std::ostream &operator << (std::ostream &out, const AddressSpace &rhs)
+{
+ return out << "used=" << rhs.used() << ", limit=" << rhs.limit();
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/address_space.h b/searchlib/src/vespa/searchlib/attribute/address_space.h
new file mode 100644
index 00000000000..f0c116785ea
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/address_space.h
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <iosfwd>
+
+namespace search {
+
+/**
+ * Represents an address space with number of bytes/entries used
+ * and the limit number of bytes/entries this address space can represent.
+ */
+class AddressSpace
+{
+private:
+ size_t _used;
+ size_t _limit;
+
+public:
+ AddressSpace(size_t used_, size_t limit_);
+ size_t used() const { return _used; }
+ size_t limit() const { return _limit; }
+ double usage() const {
+ if (_limit > 0) {
+ return (double)_used / (double)_limit;
+ }
+ return 0;
+ }
+ bool operator==(const AddressSpace &rhs) const {
+ return _used == rhs._used && _limit == rhs._limit;
+ }
+};
+
+std::ostream &operator << (std::ostream &out, const AddressSpace &rhs);
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/address_space_usage.cpp b/searchlib/src/vespa/searchlib/attribute/address_space_usage.cpp
new file mode 100644
index 00000000000..c127b71de6b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/address_space_usage.cpp
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "address_space_usage.h"
+#include "enumstorebase.h"
+#include "multivaluemapping.h"
+
+namespace search {
+
+AddressSpaceUsage::AddressSpaceUsage()
+ : _enumStoreUsage(defaultEnumStoreUsage()),
+ _multiValueUsage(defaultMultiValueUsage()) {
+}
+
+AddressSpaceUsage::AddressSpaceUsage(const AddressSpace &enumStoreUsage_,
+ const AddressSpace &multiValueUsage_)
+ : _enumStoreUsage(enumStoreUsage_),
+ _multiValueUsage(multiValueUsage_) {
+}
+
+AddressSpace
+AddressSpaceUsage::defaultEnumStoreUsage()
+{
+ return AddressSpace(0, EnumStoreBase::DataStoreType::RefType::offsetSize());
+}
+
+AddressSpace
+AddressSpaceUsage::defaultMultiValueUsage()
+{
+ return AddressSpace(0, multivalue::Index32::offsetSize());
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/address_space_usage.h b/searchlib/src/vespa/searchlib/attribute/address_space_usage.h
new file mode 100644
index 00000000000..e1c32d9fc48
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/address_space_usage.h
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "address_space.h"
+
+namespace search {
+
+/**
+ * Represents the address space usage for enum store and multi value mapping.
+ */
+class AddressSpaceUsage
+{
+private:
+ AddressSpace _enumStoreUsage;
+ AddressSpace _multiValueUsage;
+
+public:
+ AddressSpaceUsage();
+ AddressSpaceUsage(const AddressSpace &enumStoreUsage_,
+ const AddressSpace &multiValueUsage_);
+ static AddressSpace defaultEnumStoreUsage();
+ static AddressSpace defaultMultiValueUsage();
+ const AddressSpace &enumStoreUsage() const { return _enumStoreUsage; }
+ const AddressSpace &multiValueUsage() const { return _multiValueUsage; }
+
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute.cpp b/searchlib/src/vespa/searchlib/attribute/attribute.cpp
new file mode 100644
index 00000000000..649ac1c9786
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attribute.cpp
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "attribute.h"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.attribute");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute.h b/searchlib/src/vespa/searchlib/attribute/attribute.h
new file mode 100644
index 00000000000..ca14034c4bf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attribute.h
@@ -0,0 +1,8 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
new file mode 100644
index 00000000000..517998af50f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -0,0 +1,636 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.attribute.attribute_blueprint_factory");
+
+#include "attribute_blueprint_factory.h"
+#include "attribute_weighted_set_blueprint.h"
+#include "i_document_weight_attribute.h"
+#include "iterator_pack.h"
+#include "predicate_attribute.h"
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchlib/common/location.h>
+#include <vespa/searchlib/common/locationiterators.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/query/tree/stackdumpcreator.h>
+#include <vespa/searchlib/queryeval/andsearchstrict.h>
+#include <vespa/searchlib/queryeval/create_blueprint_visitor_helper.h>
+#include <vespa/searchlib/queryeval/document_weight_search_iterator.h>
+#include <vespa/searchlib/queryeval/dot_product_blueprint.h>
+#include <vespa/searchlib/queryeval/dot_product_search.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchlib/queryeval/get_weight_from_node.h>
+#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
+#include <vespa/searchlib/queryeval/leaf_blueprints.h>
+#include <vespa/searchlib/queryeval/orlikesearch.h>
+#include <vespa/searchlib/queryeval/orsearch.h>
+#include <vespa/searchlib/queryeval/predicate_blueprint.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/termasstring.h>
+#include <vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h>
+#include <vespa/searchlib/queryeval/wand/parallel_weak_and_search.h>
+#include <vespa/searchlib/queryeval/wand/weak_and_heap.h>
+#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
+#include <sstream>
+#include <utility>
+#include <vespa/vespalib/util/regexp.h>
+
+using search::AttributeVector;
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataArray;
+using search::fef::TermFieldMatchDataPosition;
+using search::query::Location;
+using search::query::LocationTerm;
+using search::query::Node;
+using search::query::NumberTerm;
+using search::query::PredicateQuery;
+using search::query::PrefixTerm;
+using search::query::RangeTerm;
+using search::query::StackDumpCreator;
+using search::query::StringTerm;
+using search::query::SubstringTerm;
+using search::query::SuffixTerm;
+using search::query::RegExpTerm;
+using search::queryeval::AndBlueprint;
+using search::queryeval::AndSearchStrict;
+using search::queryeval::Blueprint;
+using search::queryeval::CreateBlueprintVisitorHelper;
+using search::queryeval::DotProductBlueprint;
+using search::queryeval::FieldSpec;
+using search::queryeval::FieldSpecBaseList;
+using search::queryeval::MultiSearch;
+using search::queryeval::OrLikeSearch;
+using search::queryeval::OrSearch;
+using search::queryeval::ParallelWeakAndBlueprint;
+using search::queryeval::PredicateBlueprint;
+using search::queryeval::SearchIterator;
+using search::queryeval::Searchable;
+using search::queryeval::NoUnpack;
+using search::queryeval::IRequestContext;
+using search::queryeval::WeightedSetTermBlueprint;
+using vespalib::geo::ZCurve;
+using vespalib::string;
+
+namespace search {
+namespace {
+
+//-----------------------------------------------------------------------------
+
+/**
+ * Blueprint for creating regular, stack-based attribute iterators.
+ **/
+class AttributeFieldBlueprint :
+ public search::queryeval::SimpleLeafBlueprint
+{
+private:
+ AttributeVector::SearchContext::UP _search_context;
+
+ AttributeFieldBlueprint(const FieldSpec &field,
+ const AttributeVector &attribute,
+ const string &query_stack,
+ const AttributeVector::SearchContext::Params &params)
+ : SimpleLeafBlueprint(field),
+ _search_context(attribute.getSearch(query_stack, params).release())
+ {
+ uint32_t estHits = _search_context->approximateHits();
+ HitEstimate estimate(estHits, estHits == 0);
+ setEstimate(estimate);
+ }
+
+public:
+ AttributeFieldBlueprint(const FieldSpec &field,
+ const AttributeVector &attribute,
+ const string &query_stack)
+ : AttributeFieldBlueprint(field,
+ attribute,
+ query_stack,
+ AttributeVector::SearchContext::Params()
+ .useBitVector(field.isFilter()))
+ {
+ }
+
+ AttributeFieldBlueprint(const FieldSpec &field,
+ const AttributeVector &attribute,
+ const AttributeVector &diversity,
+ const string &query_stack,
+ size_t diversityCutoffGroups,
+ bool diversityCutoffStrict)
+ : AttributeFieldBlueprint(field,
+ attribute,
+ query_stack,
+ AttributeVector::SearchContext::Params()
+ .diversityAttribute(&diversity)
+ .useBitVector(field.isFilter())
+ .diversityCutoffGroups(diversityCutoffGroups)
+ .diversityCutoffStrict(diversityCutoffStrict))
+ {
+ }
+
+ virtual SearchIterator::UP
+ createLeafSearch(const TermFieldMatchDataArray &tfmda, bool strict) const
+ {
+ assert(tfmda.size() == 1);
+ return _search_context->createIterator(tfmda[0], strict);
+ }
+
+ virtual void
+ fetchPostings(bool strict)
+ {
+ _search_context->fetchPostings(strict);
+ }
+
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+};
+
+void
+AttributeFieldBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ search::queryeval::LeafBlueprint::visitMembers(visitor);
+ visit(visitor, "attribute", _search_context->attribute().getName());
+}
+
+//-----------------------------------------------------------------------------
+
+template <bool is_strict>
+struct LocationPreFilterIterator : public OrLikeSearch<is_strict, NoUnpack>
+{
+ LocationPreFilterIterator(const MultiSearch::Children &children) : OrLikeSearch<is_strict, NoUnpack>(children, NoUnpack()) {}
+ virtual void doUnpack(uint32_t) override {}
+};
+
+class LocationPreFilterBlueprint :
+ public search::queryeval::ComplexLeafBlueprint
+{
+private:
+ const AttributeVector & _attribute;
+ std::vector<AttributeVector::SearchContext::UP> _rangeSearches;
+ bool _should_use;
+
+public:
+ LocationPreFilterBlueprint(const FieldSpec &field, const AttributeVector &attribute, const ZCurve::RangeVector &rangeVector)
+ : ComplexLeafBlueprint(field),
+ _attribute(attribute),
+ _rangeSearches(),
+ _should_use(false)
+ {
+ uint64_t estHits(0);
+ const AttributeVector & attr(_attribute);
+ for (auto it(rangeVector.begin()), mt(rangeVector.end()); it != mt; it++) {
+ const ZCurve::Range &r(*it);
+ search::query::Range qr(r.min(), r.max());
+ search::query::SimpleRangeTerm rt(qr, "", 0, search::query::Weight(0));
+ string stack(StackDumpCreator::create(rt));
+ _rangeSearches.push_back(attr.getSearch(stack, AttributeVector::SearchContext::Params()));
+ estHits += _rangeSearches.back()->approximateHits();
+ LOG(debug, "Range '%s' estHits %ld", qr.getRangeString().c_str(), estHits);
+ }
+ if (estHits > attr.getNumDocs()) {
+ estHits = attr.getNumDocs();
+ }
+ if (estHits * 10 < attr.getNumDocs()) {
+ _should_use = true;
+ }
+ HitEstimate estimate(estHits, estHits == 0);
+ setEstimate(estimate);
+ }
+
+ bool should_use() const { return _should_use; }
+
+ virtual SearchIterator::UP
+ createLeafSearch(const TermFieldMatchDataArray &tfmda, bool strict) const override
+ {
+ MultiSearch::Children children;
+ for (auto it(_rangeSearches.begin()), mt(_rangeSearches.end()); it != mt; it++) {
+ children.push_back((*it)->createIterator(tfmda[0],
+ strict).release());
+ }
+ if (strict) {
+ return SearchIterator::UP(new LocationPreFilterIterator<true>(children));
+ } else {
+ return SearchIterator::UP(new LocationPreFilterIterator<false>(children));
+ }
+ }
+
+ virtual void fetchPostings(bool strict) override {
+ for (size_t i(0); i < _rangeSearches.size(); i++) {
+ _rangeSearches[i]->fetchPostings(strict);
+ }
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class LocationPostFilterBlueprint :
+ public search::queryeval::ComplexLeafBlueprint
+{
+private:
+ const AttributeVector & _attribute;
+ search::common::Location _location;
+
+public:
+ LocationPostFilterBlueprint(const FieldSpec &field, const AttributeVector &attribute, const Location &loc)
+ : ComplexLeafBlueprint(field),
+ _attribute(attribute),
+ _location()
+ {
+ _location.setVec(attribute);
+ _location.parse(loc.getLocationString());
+ uint32_t estHits = _attribute.getNumDocs();
+ HitEstimate estimate(estHits, estHits == 0);
+ setEstimate(estimate);
+ }
+
+ const search::common::Location &location() const { return _location; }
+
+ virtual SearchIterator::UP
+ createLeafSearch(const TermFieldMatchDataArray &, bool strict) const override
+ {
+ unsigned int num_docs = _attribute.getNumDocs();
+ return SearchIterator::UP(FastS_AllocLocationIterator(num_docs, strict, _location));
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+Blueprint::UP make_location_blueprint(const FieldSpec &field, const AttributeVector &attribute, const Location &loc) {
+ LocationPostFilterBlueprint *post_filter = new LocationPostFilterBlueprint(field, attribute, loc);
+ Blueprint::UP post_filter_bp(post_filter);
+ const search::common::Location &location = post_filter->location();
+ if (location.getMinX() > location.getMaxX() ||
+ location.getMinY() > location.getMaxY())
+ {
+ return Blueprint::UP(new queryeval::EmptyBlueprint(field));
+ }
+ ZCurve::RangeVector rangeVector = ZCurve::find_ranges(
+ location.getMinX(), location.getMinY(),
+ location.getMaxX(), location.getMaxY());
+ LocationPreFilterBlueprint *pre_filter = new LocationPreFilterBlueprint(field, attribute, rangeVector);
+ Blueprint::UP pre_filter_bp(pre_filter);
+ if (!pre_filter->should_use()) {
+ return post_filter_bp;
+ }
+ AndBlueprint *root = new AndBlueprint();
+ Blueprint::UP root_bp(root);
+ root->addChild(std::move(pre_filter_bp));
+ root->addChild(std::move(post_filter_bp));
+ return root_bp;
+}
+
+//-----------------------------------------------------------------------------
+
+template <typename SearchType>
+class DirectWeightedSetBlueprint : public search::queryeval::ComplexLeafBlueprint
+{
+private:
+ HitEstimate _estimate;
+ std::vector<int32_t> _weights;
+ std::vector<IDocumentWeightAttribute::LookupResult> _terms;
+ const IDocumentWeightAttribute &_attr;
+
+public:
+ DirectWeightedSetBlueprint(const FieldSpec &field,
+ const IDocumentWeightAttribute &attr, size_t size_hint)
+ : ComplexLeafBlueprint(field),
+ _estimate(),
+ _weights(),
+ _terms(),
+ _attr(attr)
+ {
+ _weights.reserve(size_hint);
+ _terms.reserve(size_hint);
+ }
+
+ void addTerm(const vespalib::string &term, int32_t weight) {
+ IDocumentWeightAttribute::LookupResult result = _attr.lookup(term);
+ HitEstimate childEst(result.posting_size, (result.posting_size == 0));
+ if (!childEst.empty) {
+ if (_estimate.empty) {
+ _estimate = childEst;
+ } else {
+ _estimate.estHits += childEst.estHits;
+ }
+ setEstimate(_estimate);
+ _weights.push_back(weight);
+ _terms.push_back(result);
+ }
+ }
+
+ SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool) const
+ {
+ assert(tfmda.size() == 1);
+ if (_terms.size() == 0) {
+ return SearchIterator::UP(new search::queryeval::EmptySearch());
+ }
+ std::vector<DocumentWeightIterator> iterators;
+ const size_t numChildren = _terms.size();
+ iterators.reserve(numChildren);
+ for (const IDocumentWeightAttribute::LookupResult &r : _terms) {
+ _attr.create(r.posting_idx, iterators);
+ }
+ return SearchType::create(*tfmda[0], _weights, std::move(iterators));
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class DirectWandBlueprint : public search::queryeval::ComplexLeafBlueprint
+{
+private:
+ HitEstimate _estimate;
+ mutable queryeval::SharedWeakAndPriorityQueue _scores;
+ const queryeval::wand::score_t _scoreThreshold;
+ double _thresholdBoostFactor;
+ const uint32_t _scoresAdjustFrequency;
+ std::vector<int32_t> _weights;
+ std::vector<IDocumentWeightAttribute::LookupResult> _terms;
+ const IDocumentWeightAttribute &_attr;
+
+public:
+ DirectWandBlueprint(const FieldSpec &field,
+ const IDocumentWeightAttribute &attr,
+ uint32_t scoresToTrack,
+ queryeval::wand::score_t scoreThreshold,
+ double thresholdBoostFactor,
+ size_t size_hint)
+ : ComplexLeafBlueprint(field),
+ _estimate(),
+ _scores(scoresToTrack),
+ _scoreThreshold(scoreThreshold),
+ _thresholdBoostFactor(thresholdBoostFactor),
+ _scoresAdjustFrequency(queryeval::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY),
+ _weights(),
+ _terms(),
+ _attr(attr)
+ {
+ _weights.reserve(size_hint);
+ _terms.reserve(size_hint);
+ }
+
+ void addTerm(const vespalib::string &term, int32_t weight) {
+ IDocumentWeightAttribute::LookupResult result = _attr.lookup(term);
+ HitEstimate childEst(result.posting_size, (result.posting_size == 0));
+ if (!childEst.empty) {
+ if (_estimate.empty) {
+ _estimate = childEst;
+ } else {
+ _estimate.estHits += childEst.estHits;
+ }
+ setEstimate(_estimate);
+ _weights.push_back(weight);
+ _terms.push_back(result);
+ }
+ }
+
+ SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool strict) const
+ {
+ assert(tfmda.size() == 1);
+ if (_terms.size() == 0) {
+ return SearchIterator::UP(new search::queryeval::EmptySearch());
+ }
+ return search::queryeval::ParallelWeakAndSearch::create(*tfmda[0],
+ queryeval::ParallelWeakAndSearch::MatchParams(_scores,
+ _scoreThreshold,
+ _thresholdBoostFactor,
+ _scoresAdjustFrequency).setDocIdLimit(get_docid_limit()),
+ _weights, _terms, _attr, strict);
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class DirectAttributeBlueprint : public search::queryeval::SimpleLeafBlueprint
+{
+private:
+ vespalib::string _attrName;
+ const IDocumentWeightAttribute &_attr;
+ IDocumentWeightAttribute::LookupResult _dict_entry;
+
+public:
+ DirectAttributeBlueprint(const FieldSpec &field,
+ const vespalib::string & name,
+ const IDocumentWeightAttribute &attr, const vespalib::string &term)
+ : SimpleLeafBlueprint(field),
+ _attrName(name),
+ _attr(attr),
+ _dict_entry(_attr.lookup(term))
+ {
+ setEstimate(HitEstimate(_dict_entry.posting_size, (_dict_entry.posting_size == 0)));
+ }
+
+ SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool) const
+ {
+ assert(tfmda.size() == 1);
+ if (_dict_entry.posting_size == 0) {
+ return SearchIterator::UP(new search::queryeval::EmptySearch());
+ }
+ return SearchIterator::UP(new queryeval::DocumentWeightSearchIterator(*tfmda[0], _attr, _dict_entry));
+ }
+
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const
+ {
+ search::queryeval::LeafBlueprint::visitMembers(visitor);
+ visit(visitor, "attribute", _attrName);
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+bool check_valid_diversity_attr(const AttributeVector *attr) {
+ if (attr == nullptr) {
+ return false;
+ }
+ if (attr->hasMultiValue()) {
+ return false;
+ }
+ return (attr->hasEnum() || attr->isIntegerType() || attr->isFloatingPointType());
+}
+
+//-----------------------------------------------------------------------------
+
+
+/**
+ * Determines the correct Blueprint to use.
+ **/
+class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper
+{
+private:
+ const FieldSpec &_field;
+ const AttributeVector & _attr;
+ const IDocumentWeightAttribute *_dwa;
+
+public:
+ CreateBlueprintVisitor(Searchable &searchable,
+ const IRequestContext &requestContext,
+ const FieldSpec &field,
+ const AttributeVector &attr)
+ : CreateBlueprintVisitorHelper(searchable, field, requestContext),
+ _field(field),
+ _attr(attr),
+ _dwa(attr.asDocumentWeightAttribute()) {}
+
+ template <class TermNode>
+ void visitTerm(TermNode &n, bool simple = false) {
+ if (simple && (_dwa != nullptr) && !_field.isFilter() && n.isRanked()) {
+ vespalib::string term = search::queryeval::termAsString(n);
+ setResult(make_UP(new DirectAttributeBlueprint(_field, _attr.getName(), *_dwa, term)));
+ } else {
+ const string stack = StackDumpCreator::create(n);
+ setResult(make_UP(new AttributeFieldBlueprint(_field, _attr, stack)));
+ }
+ }
+
+ void visitLocation(LocationTerm &node) {
+ Location loc(node.getTerm());
+ setResult(make_location_blueprint(_field, _attr, loc));
+ }
+
+ void visitPredicate(PredicateQuery &query) {
+ const PredicateAttribute *attr =
+ dynamic_cast<const PredicateAttribute *>(&_attr);
+ if (!attr) {
+ LOG(warning, "Trying to apply a PredicateQuery node to a "
+ "non-predicate attribute.");
+ setResult(Blueprint::UP(new queryeval::EmptyBlueprint(_field)));
+ } else {
+ setResult(Blueprint::UP(new PredicateBlueprint( _field, *attr, query)));
+ }
+ }
+
+ virtual void visit(NumberTerm & n) { visitTerm(n, true); }
+ virtual void visit(LocationTerm &n) { visitLocation(n); }
+ virtual void visit(PrefixTerm & n) { visitTerm(n); }
+
+ virtual void visit(RangeTerm &n) {
+ const string stack = StackDumpCreator::create(n);
+ const string term = search::queryeval::termAsString(n);
+ search::QueryTermSimple parsed_term(term, search::QueryTermSimple::WORD);
+ if (parsed_term.getMaxPerGroup() > 0) {
+ const AttributeVector * diversity(getRequestContext().getAttribute(parsed_term.getDiversityAttribute()));
+ if (check_valid_diversity_attr(diversity)) {
+ setResult(make_UP(new AttributeFieldBlueprint(_field, _attr, *diversity, stack,
+ parsed_term.getDiversityCutoffGroups(),
+ parsed_term.getDiversityCutoffStrict())));
+ } else {
+ setResult(Blueprint::UP(new queryeval::EmptyBlueprint(_field)));
+ }
+ } else {
+ setResult(make_UP(new AttributeFieldBlueprint(_field, _attr, stack)));
+ }
+ }
+
+ virtual void visit(StringTerm & n) { visitTerm(n, true); }
+ virtual void visit(SubstringTerm & n) {
+ search::query::SimpleRegExpTerm re(vespalib::Regexp::make_from_substring(n.getTerm()),
+ n.getView(), n.getId(), n.getWeight());
+ visitTerm(re);
+ }
+ virtual void visit(SuffixTerm & n) {
+ search::query::SimpleRegExpTerm re(vespalib::Regexp::make_from_suffix(n.getTerm()),
+ n.getView(), n.getId(), n.getWeight());
+ visitTerm(re);
+ }
+ virtual void visit(PredicateQuery &n) { visitPredicate(n); }
+ virtual void visit(RegExpTerm & n) { visitTerm(n); }
+
+ template <typename WS, typename NODE>
+ void createDirectWeightedSet(WS *bp, NODE &n) {
+ Blueprint::UP result(bp);
+ for (size_t i = 0; i < n.getChildren().size(); ++i) {
+ const search::query::Node &node = *n.getChildren()[i];
+ vespalib::string term = search::queryeval::termAsString(node);
+ uint32_t weight = search::queryeval::getWeightFromNode(node).percent();
+ bp->addTerm(term, weight);
+ }
+ setResult(std::move(result));
+ }
+
+ template <typename WS, typename NODE>
+ void createShallowWeightedSet(WS *bp, NODE &n, const FieldSpec &fs) {
+ Blueprint::UP result(bp);
+ for (size_t i = 0; i < n.getChildren().size(); ++i) {
+ const search::query::Node &node = *n.getChildren()[i];
+ uint32_t weight = search::queryeval::getWeightFromNode(node).percent();
+ const string stack = StackDumpCreator::create(node);
+ FieldSpec childfs = bp->getNextChildField(fs);
+ bp->addTerm(make_UP(new AttributeFieldBlueprint(childfs, _attr, stack)), weight);
+ }
+ setResult(std::move(result));
+ }
+
+ virtual void visit(search::query::WeightedSetTerm &n) {
+ bool isSingleValue = !_attr.hasMultiValue();
+ bool isString = (_attr.isStringType() && _attr.hasEnum());
+ bool isInteger = _attr.isIntegerType();
+ if (isSingleValue && (isString || isInteger)) {
+ AttributeWeightedSetBlueprint *ws
+ = new AttributeWeightedSetBlueprint(_field, _attr);
+ Blueprint::UP result(ws);
+ for (size_t i = 0; i < n.getChildren().size(); ++i) {
+ const search::query::Node &node = *n.getChildren()[i];
+ uint32_t weight = search::queryeval::getWeightFromNode(node).percent();
+ vespalib::string term = search::queryeval::termAsString(node);
+ search::QueryTermSimple::UP qt;
+ if (isInteger) {
+ qt.reset(new search::QueryTermSimple(term, search::QueryTermSimple::WORD));
+ } else {
+ qt.reset(new search::QueryTermBase(term, search::QueryTermSimple::WORD));
+ }
+ ws->addToken(_attr.getSearch(std::move(qt), AttributeVector::SearchContext::Params()), weight);
+ }
+ setResult(std::move(result));
+ } else {
+ if (_dwa != nullptr) {
+ auto *bp = new DirectWeightedSetBlueprint<queryeval::WeightedSetTermSearch>(_field, *_dwa, n.getChildren().size());
+ createDirectWeightedSet(bp, n);
+ } else {
+ auto *bp = new WeightedSetTermBlueprint(_field);
+ createShallowWeightedSet(bp, n, _field);
+ }
+ }
+ }
+
+ virtual void visit(search::query::DotProduct &n) {
+ if (_dwa != nullptr) {
+ auto *bp = new DirectWeightedSetBlueprint<queryeval::DotProductSearch>(_field, *_dwa, n.getChildren().size());
+ createDirectWeightedSet(bp, n);
+ } else {
+ auto *bp = new DotProductBlueprint(_field);
+ createShallowWeightedSet(bp, n, _field);
+ }
+ }
+
+ virtual void visit(search::query::WandTerm &n) {
+ if (_dwa != nullptr) {
+ auto *bp = new DirectWandBlueprint(_field, *_dwa,
+ n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(),
+ n.getChildren().size());
+ createDirectWeightedSet(bp, n);
+ } else {
+ auto *bp = new ParallelWeakAndBlueprint(_field,
+ n.getTargetNumHits(),
+ n.getScoreThreshold(),
+ n.getThresholdBoostFactor());
+ createShallowWeightedSet(bp, n, _field);
+ }
+ }
+};
+
+} // namespace
+
+//-----------------------------------------------------------------------------
+
+Blueprint::UP
+AttributeBlueprintFactory::createBlueprint(const IRequestContext & requestContext,
+ const FieldSpec &field,
+ const search::query::Node &term)
+{
+ const AttributeVector * attr(requestContext.getAttribute(field.getName()));
+ CreateBlueprintVisitor visitor(*this, requestContext, field, *attr);
+ const_cast<Node &>(term).accept(visitor);
+ return visitor.getResult();
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.h b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.h
new file mode 100644
index 00000000000..a8fb10da02b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.h
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/queryeval/searchable.h>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchlib/queryeval/irequestcontext.h>
+
+namespace search {
+
+class AttributeBlueprintFactory : public queryeval::Searchable
+{
+public:
+ // implements Searchable
+ queryeval::Blueprint::UP
+ createBlueprint(const queryeval::IRequestContext & requestContext,
+ const queryeval::FieldSpec &field,
+ const query::Node &term) override;
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
new file mode 100644
index 00000000000..501c78f75b6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
@@ -0,0 +1,187 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "attribute_weighted_set_blueprint.h"
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+
+namespace search {
+
+namespace {
+
+//-----------------------------------------------------------------------------
+
+class UseAttr
+{
+private:
+ const attribute::IAttributeVector &_attr;
+
+protected:
+ const attribute::IAttributeVector &attribute() const { return _attr; }
+
+public:
+ UseAttr(const attribute::IAttributeVector & attr)
+ : _attr(attr) {}
+};
+
+//-----------------------------------------------------------------------------
+
+class UseStringEnum : public UseAttr
+{
+public:
+ UseStringEnum(const AttributeVector & attr)
+ : UseAttr(attr) {}
+ bool mapToken(const AttributeVector::SearchContext &context,
+ int64_t &token) const
+ {
+ attribute::IAttributeVector::EnumHandle handle;
+ if (attribute().findEnum(context.queryTerm().getTerm(), handle)) {
+ token = handle;
+ return true;
+ }
+ return false;
+ }
+ int64_t getToken(uint32_t docId) const {
+ return attribute().getEnum(docId);
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class UseInteger : public UseAttr
+{
+public:
+ UseInteger(const AttributeVector & attr) : UseAttr(attr) {}
+ bool mapToken(const AttributeVector::SearchContext &context,
+ int64_t &token) const
+ {
+ Int64Range range(context.getAsIntegerTerm());
+ if (range.isPoint()) {
+ token = range.lower();
+ return true;
+ }
+ return false;
+ }
+ int64_t getToken(uint32_t docId) const {
+ return attribute().getInt(docId);
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+template <typename T>
+class AttributeFilter : public queryeval::SearchIterator
+{
+private:
+ typedef vespalib::hash_map<int64_t, int32_t> Map;
+ typedef fef::TermFieldMatchData TFMD;
+
+ TFMD &_tfmd;
+ T _attr;
+ Map _map;
+ int32_t _weight;
+
+public:
+ AttributeFilter(fef::TermFieldMatchData &tfmd,
+ const AttributeVector & attr,
+ const std::vector<int32_t> weights,
+ const std::vector<AttributeVector::SearchContext*> contexts)
+ : _tfmd(tfmd), _attr(attr), _map(), _weight(0)
+ {
+ for (size_t i = 0; i < contexts.size(); ++i) {
+ int64_t token(0);
+ if (_attr.mapToken(*contexts[i], token)) {
+ _map[token] = weights[i];
+ }
+ }
+ }
+ virtual void doSeek(uint32_t docId) {
+ Map::const_iterator pos = _map.find(_attr.getToken(docId));
+ if (pos != _map.end()) {
+ _weight = pos->second;
+ setDocId(docId);
+ }
+ }
+ virtual void doUnpack(uint32_t docId) {
+ _tfmd.reset(docId);
+ fef::TermFieldMatchDataPosition pos;
+ pos.setElementWeight(_weight);
+ _tfmd.appendPosition(pos);
+ }
+ virtual void visitMembers(vespalib::ObjectVisitor &) const {}
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace search::<unnamed>
+
+AttributeWeightedSetBlueprint::AttributeWeightedSetBlueprint(const queryeval::FieldSpec &field, const AttributeVector & attr)
+ : queryeval::ComplexLeafBlueprint(field),
+ _numDocs(attr.getNumDocs()),
+ _estHits(0),
+ _weights(),
+ _attr(attr),
+ _contexts()
+{
+}
+
+AttributeWeightedSetBlueprint::~AttributeWeightedSetBlueprint()
+{
+ while (!_contexts.empty()) {
+ delete _contexts.back();
+ _contexts.pop_back();
+ }
+}
+
+void
+AttributeWeightedSetBlueprint::addToken(AttributeVector::SearchContext::UP context, int32_t weight)
+{
+ _estHits = std::min(_estHits + context->approximateHits(), _numDocs);
+ setEstimate(HitEstimate(_estHits, (_estHits == 0)));
+ _weights.push_back(weight);
+ _contexts.push_back(context.get());
+ context.release();
+}
+
+queryeval::SearchIterator::UP
+AttributeWeightedSetBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda,
+ bool strict) const
+{
+ assert(tfmda.size() == 1);
+ fef::TermFieldMatchData &tfmd = *tfmda[0];
+ if (strict) { // use generic weighted set search
+ std::vector<queryeval::SearchIterator*> children(_contexts.size());
+ for (size_t i = 0; i < _contexts.size(); ++i) {
+ children[i] = _contexts[i]->createIterator(&tfmd,
+ true).release();
+ }
+ return queryeval::SearchIterator::UP(queryeval::WeightedSetTermSearch::create(children, tfmd, _weights));
+ } else { // use attribute filter optimization
+ bool isSingleValue = !_attr.hasMultiValue();
+ bool isString = (_attr.isStringType() && _attr.hasEnum());
+ bool isInteger = _attr.isIntegerType();
+ assert(isSingleValue);
+ (void) isSingleValue;
+ if (isString) {
+ return queryeval::SearchIterator::UP(new AttributeFilter<UseStringEnum>(tfmd, _attr, _weights, _contexts));
+ } else {
+ assert(isInteger);
+ (void) isInteger;
+ return queryeval::SearchIterator::UP(new AttributeFilter<UseInteger>(tfmd, _attr, _weights, _contexts));
+ }
+ }
+}
+
+void
+AttributeWeightedSetBlueprint::fetchPostings(bool strict)
+{
+ if (strict) {
+ for (size_t i = 0; i < _contexts.size(); ++i) {
+ _contexts[i]->fetchPostings(true);
+ }
+ }
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.h b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.h
new file mode 100644
index 00000000000..dadb0d1a0ec
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.h
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <memory>
+#include <vector>
+
+namespace search {
+
+class AttributeWeightedSetBlueprint : public queryeval::ComplexLeafBlueprint
+{
+private:
+ size_t _numDocs;
+ size_t _estHits;
+ std::vector<int32_t> _weights;
+ const AttributeVector & _attr;
+ std::vector<AttributeVector::SearchContext*> _contexts;
+
+ AttributeWeightedSetBlueprint(const AttributeWeightedSetBlueprint &); // disabled
+ AttributeWeightedSetBlueprint &operator=(const AttributeWeightedSetBlueprint &); // disabled
+
+public:
+ AttributeWeightedSetBlueprint(const queryeval::FieldSpec &field, const AttributeVector & attr);
+ virtual ~AttributeWeightedSetBlueprint();
+ void addToken(AttributeVector::SearchContext::UP context, int32_t weight);
+ virtual queryeval::SearchIterator::UP createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool strict) const;
+
+ virtual void
+ fetchPostings(bool strict);
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributecontext.cpp b/searchlib/src/vespa/searchlib/attribute/attributecontext.cpp
new file mode 100644
index 00000000000..3c1ada3108d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributecontext.cpp
@@ -0,0 +1,72 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".attributecontext");
+#include "attributecontext.h"
+
+using namespace search;
+using namespace search::attribute;
+
+namespace search {
+
+const IAttributeVector *
+AttributeContext::getAttribute(AttributeMap & map, const string & name, bool stableEnum) const
+{
+ AttributeMap::const_iterator itr = map.find(name);
+ if (itr != map.end()) {
+ return itr->second->operator->();
+ } else {
+ AttributeGuard::UP ret;
+ if (stableEnum) {
+ ret = _manager.getAttributeStableEnum(name);
+ } else {
+ ret = _manager.getAttribute(name);
+ }
+ if (ret) {
+ const AttributeGuard & guard = *ret;
+ map[name] = std::move(ret);
+ return guard.operator->();
+ }
+ return nullptr;
+ }
+}
+
+AttributeContext::AttributeContext(const IAttributeManager & manager) :
+ _manager(manager),
+ _attributes(),
+ _enumAttributes(),
+ _cacheLock()
+{
+}
+
+const IAttributeVector *
+AttributeContext::getAttribute(const string & name) const
+{
+ vespalib::LockGuard guard(_cacheLock);
+ return getAttribute(_attributes, name, false);
+}
+
+const IAttributeVector *
+AttributeContext::getAttributeStableEnum(const string & name) const
+{
+ vespalib::LockGuard guard(_cacheLock);
+ return getAttribute(_enumAttributes, name, true);
+}
+
+void AttributeContext::releaseEnumGuards() {
+ vespalib::LockGuard guard(_cacheLock);
+ _enumAttributes.clear();
+}
+
+void
+AttributeContext::getAttributeList(std::vector<const IAttributeVector *> & list) const
+{
+ std::vector<AttributeGuard> attributes;
+ _manager.getAttributeList(attributes);
+ for (size_t i = 0; i < attributes.size(); ++i) {
+ list.push_back(getAttribute(attributes[i]->getName()));
+ }
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributecontext.h b/searchlib/src/vespa/searchlib/attribute/attributecontext.h
new file mode 100644
index 00000000000..5d70c06d16a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributecontext.h
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vespa/vespalib/util/sync.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include "iattributemanager.h"
+
+namespace search {
+
+/**
+ * This class is wrapping an attribute manager and
+ * implements the IAttributeContext interface to provide read access to attribute vectors.
+ **/
+class AttributeContext : public attribute::IAttributeContext
+{
+private:
+ typedef vespalib::hash_map<string, AttributeGuard::UP> AttributeMap;
+
+ const search::IAttributeManager & _manager;
+ mutable AttributeMap _attributes;
+ mutable AttributeMap _enumAttributes;
+ mutable vespalib::Lock _cacheLock;
+
+ const attribute::IAttributeVector *
+ getAttribute(AttributeMap & map, const string & name, bool stableEnum) const;
+
+public:
+ AttributeContext(const search::IAttributeManager & manager);
+
+ // Implements IAttributeContext
+ const attribute::IAttributeVector * getAttribute(const string & name) const override;
+ const attribute::IAttributeVector * getAttributeStableEnum(const string & name) const override;
+ void getAttributeList(std::vector<const attribute::IAttributeVector *> & list) const override;
+ void releaseEnumGuards() override;
+
+ // Give acces to the underlying manager
+ const search::IAttributeManager & getManager() const { return _manager; }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefactory.cpp b/searchlib/src/vespa/searchlib/attribute/attributefactory.cpp
new file mode 100644
index 00000000000..f7d5adbe049
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefactory.cpp
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.attributefactory");
+
+#include "attributefactory.h"
+
+namespace search {
+
+using attribute::CollectionType;
+
+AttributeVector::SP
+AttributeFactory::createAttribute(const vespalib::string & baseFileName, const Config & cfg)
+{
+ AttributeVector::SP ret;
+ if (cfg.collectionType().type() == CollectionType::ARRAY) {
+ if (cfg.fastSearch()) {
+ ret = createArrayFastSearch(baseFileName, cfg);
+ if (ret.get() == NULL) {
+ LOG(warning, "Cannot apply fastsearch hint on attribute %s of type array<%s>. "
+ "Falling back to normal. You should correct your .sd file.",
+ baseFileName.c_str(), cfg.basicType().asString());
+ ret = createArrayStd(baseFileName, cfg);
+ }
+ } else {
+ ret = createArrayStd(baseFileName, cfg);
+ }
+ } else if (cfg.collectionType().type() == CollectionType::WSET) {
+ // Ignore if noupdate has been set.
+ if (cfg.fastSearch()) {
+ ret = createSetFastSearch(baseFileName, cfg);
+ if (ret.get() == NULL) {
+ LOG(warning, "Cannot apply fastsearch hint on attribute %s of type set<%s>. "
+ "Falling back to normal. You should correct your .sd file.",
+ baseFileName.c_str(), cfg.basicType().asString());
+ ret = createSetStd(baseFileName, cfg);
+ }
+ } else {
+ ret = createSetStd(baseFileName, cfg);
+ }
+ } else {
+ if (cfg.fastSearch()) {
+ ret = createSingleFastSearch(baseFileName, cfg);
+ if (ret.get() == NULL) {
+ LOG(warning, "Cannot apply fastsearch hint on attribute %s of type %s. "
+ "Falling back to normal. You should correct your .sd file.",
+ baseFileName.c_str(), cfg.basicType().asString());
+ ret = createSingleStd(baseFileName, cfg);
+ }
+ } else {
+ ret = createSingleStd(baseFileName, cfg);
+ }
+ }
+ return ret;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefactory.h b/searchlib/src/vespa/searchlib/attribute/attributefactory.h
new file mode 100644
index 00000000000..d2a1f567620
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefactory.h
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/attributevector.h>
+
+namespace search {
+
+/**
+ * Factory for creating attribute vector instances.
+ **/
+class AttributeFactory {
+private:
+ typedef attribute::Config Config;
+ static AttributeVector::SP createArrayStd(const vespalib::string & baseFileName, const Config & cfg);
+ static AttributeVector::SP createArrayFastSearch(const vespalib::string & baseFileName, const Config & cfg);
+ static AttributeVector::SP createSetStd(const vespalib::string & baseFileName, const Config & cfg);
+ static AttributeVector::SP createSetFastSearch(const vespalib::string & baseFileName, const Config & cfg);
+ static AttributeVector::SP createSingleStd(const vespalib::string & baseFileName, const Config & cfg);
+ static AttributeVector::SP createSingleFastSearch(const vespalib::string & baseFileName, const Config & cfg);
+ static AttributeVector::SP createSingleFastAggregate(const vespalib::string & baseFileName, const Config & cfg);
+ static AttributeVector::SP createArrayFastAggregate(const vespalib::string & baseFileName, const Config & cfg);
+ static AttributeVector::SP createSetFastAggregate(const vespalib::string & baseFileName, const Config & cfg);
+
+public:
+ /**
+ * Create an attribute vector with the given name based on the given config.
+ **/
+ static AttributeVector::SP createAttribute(const vespalib::string & baseFileName, const Config & cfg);
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefile.cpp b/searchlib/src/vespa/searchlib/attribute/attributefile.cpp
new file mode 100644
index 00000000000..c4384459f9d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefile.cpp
@@ -0,0 +1,457 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "attributefile.h"
+#include <stdexcept>
+#include <vespa/vespalib/util/error.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/searchlib/util/filesizecalculator.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".attributefile");
+
+using vespalib::IllegalStateException;
+using search::common::FileHeaderContext;
+using vespalib::getLastErrorString;
+
+namespace search
+{
+
+using attribute::BasicType;
+
+namespace
+{
+
+void
+updateHeader(const vespalib::string &name)
+{
+ vespalib::FileHeader h;
+ FastOS_File f;
+ f.OpenReadWrite(name.c_str());
+ h.readFile(f);
+ FileHeaderContext::setFreezeTime(h);
+ typedef vespalib::GenericHeader::Tag Tag;
+ h.putTag(Tag("frozen", 1));
+ h.rewriteFile(f);
+ f.Close();
+}
+
+}
+
+ReadAttributeFile::ReadAttributeFile(const vespalib::string & fileName,
+ const Config & config)
+ : AttributeFile(fileName, config)
+{
+ OpenReadOnly();
+ seekIdxPos(0);
+}
+
+
+WriteAttributeFile::WriteAttributeFile(const vespalib::string &fileName,
+ const Config &config,
+ const FileHeaderContext &
+ fileHeaderContext,
+ uint32_t docIdLimit)
+ : AttributeFile(fileName, config)
+{
+ OpenWriteOnly(fileHeaderContext, docIdLimit);
+}
+
+
+void
+AttributeFile::OpenReadOnly()
+{
+ if ( ! _datFile->OpenReadOnly() ) {
+ LOG(error, "could not open %s: %s",
+ _datFile->GetFileName(), getLastErrorString().c_str());
+ throw IllegalStateException(
+ vespalib::make_string(
+ "Failed opening attribute data file '%s' for reading",
+ _datFile->GetFileName()));
+ }
+ vespalib::FileHeader datHeader;
+ _datHeaderLen = datHeader.readFile(*_datFile);
+ _datFile->SetPosition(_datHeaderLen);
+ _datFileSize = _datFile->GetSize();
+ if (!FileSizeCalculator::extractFileSize(datHeader, _datHeaderLen,
+ _datFile->GetFileName(),
+ _datFileSize)) {
+ abort();
+ }
+ if (_idxFile.get()) {
+ if ( ! _idxFile->OpenReadOnly()) {
+ LOG(error, "could not open %s: %s",
+ _idxFile->GetFileName(), getLastErrorString().c_str());
+ throw IllegalStateException(
+ vespalib::make_string(
+ "Failed opening attribute idx file '%s'"
+ " for reading",
+ _idxFile->GetFileName()));
+ }
+ vespalib::FileHeader idxHeader;
+ _idxHeaderLen = idxHeader.readFile(*_idxFile);
+ _idxFile->SetPosition(_idxHeaderLen);
+ _idxFileSize = _idxFile->GetSize();
+ if (!FileSizeCalculator::extractFileSize(idxHeader, _idxHeaderLen,
+ _idxFile->GetFileName(),
+ _idxFileSize)) {
+ abort();
+ }
+ if (_weightFile.get()) {
+ if ( ! _weightFile->OpenReadOnly()) {
+ LOG(error, "could not open %s: %s",
+ _weightFile->GetFileName(), getLastErrorString().c_str());
+ throw IllegalStateException(
+ vespalib::make_string(
+ "Failed opening attribute weight file '%s'"
+ " for reading",
+ _weightFile->GetFileName()));
+ }
+ vespalib::FileHeader weightHeader;
+ _weightHeaderLen = weightHeader.readFile(*_weightFile);
+ _weightFile->SetPosition(_weightHeaderLen);
+ }
+ }
+}
+
+
+void
+AttributeFile::OpenWriteOnly(const FileHeaderContext &fileHeaderContext,
+ uint32_t docIdLimit)
+{
+ if ( ! _datFile->OpenWriteOnlyTruncate() ) {
+ LOG(error, "could not open %s: %s",
+ _datFile->GetFileName(), getLastErrorString().c_str());
+ throw IllegalStateException(
+ vespalib::make_string(
+ "Failed opening attribute data file '%s' for writing",
+ _datFile->GetFileName()));
+ }
+ vespalib::FileHeader datHeader;
+ typedef vespalib::GenericHeader::Tag Tag;
+ fileHeaderContext.addTags(datHeader, _datFile->GetFileName());
+ datHeader.putTag(Tag("desc", "Attribute vector data file"));
+
+ datHeader.putTag(Tag("datatype", _config.basicType().asString()));
+ datHeader.putTag(Tag("collectiontype",
+ _config.collectionType().asString()));
+ datHeader.putTag(Tag("docIdLimit", docIdLimit));
+ datHeader.putTag(Tag("frozen", 0));
+ _datHeaderLen = datHeader.writeFile(*_datFile);
+ if (_idxFile.get()) {
+ if ( ! _idxFile->OpenWriteOnlyTruncate()) {
+ LOG(error, "could not open %s: %s",
+ _idxFile->GetFileName(), getLastErrorString().c_str());
+ throw IllegalStateException(
+ vespalib::make_string(
+ "Failed opening attribute idx file '%s'"
+ " for writing",
+ _idxFile->GetFileName()));
+ }
+ vespalib::FileHeader idxHeader;
+ fileHeaderContext.addTags(idxHeader, _idxFile->GetFileName());
+ idxHeader.putTag(Tag("desc", "Attribute vector idx file"));
+ idxHeader.putTag(Tag("datatype",
+ _config.basicType().asString()));
+ idxHeader.putTag(Tag("collectiontype",
+ _config.collectionType().asString()));
+ idxHeader.putTag(Tag("docIdLimit", docIdLimit));
+ idxHeader.putTag(Tag("frozen", 0));
+ _idxHeaderLen = idxHeader.writeFile(*_idxFile);
+ if ( ! _idxFile->CheckedWrite(&_currIdx, sizeof(_currIdx))) {
+ LOG(error, "could not write to %s: %s",
+ _idxFile->GetFileName(), getLastErrorString().c_str());
+ throw IllegalStateException(
+ vespalib::make_string(
+ "Failed writing first idx"
+ " to attribute idx file '%s'",
+ _weightFile->GetFileName()));
+ }
+ if (_weightFile.get()) {
+ if ( ! _weightFile->OpenWriteOnlyTruncate()) {
+ LOG(error, "could not open %s: %s",
+ _weightFile->GetFileName(), getLastErrorString().c_str());
+ throw IllegalStateException(
+ vespalib::make_string(
+ "Failed opening attribute weight file '%s'"
+ " for writing",
+ _weightFile->GetFileName()));
+ }
+ vespalib::FileHeader weightHeader;
+ fileHeaderContext.addTags(weightHeader,
+ _weightFile->GetFileName());
+ weightHeader.putTag(Tag("desc", "Attribute vector weight file"));
+ weightHeader.putTag(Tag("datatype",
+ _config.basicType().asString()));
+ weightHeader.putTag(Tag("collectiontype",
+ _config.collectionType().asString()));
+ weightHeader.putTag(Tag("docIdLimit", docIdLimit));
+ weightHeader.putTag(Tag("frozen", 0));
+ _weightHeaderLen = weightHeader.writeFile(*_weightFile);
+ }
+ }
+}
+
+
+void
+AttributeFile::enableDirectIO()
+{
+ _datFile->EnableDirectIO();
+ if (_idxFile.get()) {
+ _idxFile->EnableDirectIO();
+ if (_weightFile.get()) {
+ _weightFile->EnableDirectIO();
+ }
+ }
+}
+
+
+void
+AttributeFile::Close()
+{
+ if (_datFile->IsOpened()) {
+ bool writeMode = _datFile->IsWriteMode();
+ _datFile->Flush();
+ _datFile->Close();
+ if (writeMode) {
+ updateHeader(_datFile->GetFileName());
+ }
+ }
+ if (_idxFile.get() != NULL && _idxFile->IsOpened()) {
+ bool writeMode = _idxFile->IsWriteMode();
+ _idxFile->Flush();
+ _idxFile->Close();
+ if (writeMode) {
+ updateHeader(_idxFile->GetFileName());
+ }
+ }
+ if (_weightFile.get() != NULL && _weightFile->IsOpened()) {
+ bool writeMode = _weightFile->IsWriteMode();
+ _weightFile->Flush();
+ _weightFile->Close();
+ if (writeMode) {
+ updateHeader(_weightFile->GetFileName());
+ }
+ }
+}
+
+
+AttributeFile::AttributeFile(const vespalib::string &fileName,
+ const Config &config)
+ : _currIdx(0),
+ _datFile(new Fast_BufferedFile( new FastOS_File((fileName + ".dat").c_str()))),
+ _idxFile(config.collectionType().isMultiValue() ?
+ new Fast_BufferedFile(new FastOS_File((fileName + ".idx").c_str())) :
+ NULL),
+ _weightFile(config.collectionType().isWeightedSet() ?
+ new Fast_BufferedFile( new FastOS_File((fileName + ".weight").c_str())) :
+ NULL),
+ _fileName(fileName),
+ _config(config),
+ _datHeaderLen(0u),
+ _idxHeaderLen(0u),
+ _weightHeaderLen(0u),
+ _datFileSize(0),
+ _idxFileSize(0)
+{
+}
+
+
+AttributeFile::~AttributeFile(void)
+{
+ Close();
+}
+
+
+bool
+AttributeFile::seekIdxPos(size_t idxPos)
+{
+ bool retval(false);
+ if (_idxFile.get()) {
+ _idxFile->SetPosition(_idxHeaderLen + idxPos * sizeof(uint32_t));
+ retval = (_idxFile->Read(&_currIdx, sizeof(_currIdx)) ==
+ sizeof(_currIdx));
+ }
+ return retval;
+}
+
+
+bool
+AttributeFile::read(Record &record)
+{
+ bool retval(true);
+ uint32_t nextIdx(_currIdx + 1);
+ if (_idxFile.get()) {
+ if (static_cast<uint64_t>(_idxFile->GetPosition()) >= _idxFileSize) {
+ retval = false;
+ } else {
+ retval = (_idxFile->Read(&nextIdx, sizeof(nextIdx))
+ == sizeof(nextIdx));
+ assert(nextIdx >= _currIdx);
+ }
+ } else {
+ if (static_cast<uint64_t>(_datFile->GetPosition()) >= _datFileSize) {
+ retval = false;
+ }
+ }
+ if (retval) {
+ retval = record.read(*this, nextIdx - _currIdx);
+ _currIdx = nextIdx;
+ }
+
+ return retval;
+}
+
+
+bool
+AttributeFile::write(const Record & record)
+{
+ bool retval(record.write(*this));
+ if (retval && _idxFile.get()) {
+ _currIdx += record.getValueCount();
+ retval = _idxFile->CheckedWrite(&_currIdx, sizeof(_currIdx));
+ }
+
+ return retval;
+}
+
+
+std::unique_ptr<AttributeFile::Record>
+AttributeFile::getRecord()
+{
+ std::unique_ptr<Record> record;
+ switch (_config.basicType().type()) {
+ case BasicType::UINT1:
+ case BasicType::UINT2:
+ case BasicType::UINT4:
+ case BasicType::INT8:
+ record.reset(new FixedRecord<int8_t>());
+ break;
+ case BasicType::INT16:
+ record.reset(new FixedRecord<int16_t>());
+ break;
+ case BasicType::INT32:
+ record.reset(new FixedRecord<int32_t>());
+ break;
+ case BasicType::INT64:
+ record.reset(new FixedRecord<int64_t>());
+ break;
+ case BasicType::FLOAT:
+ record.reset(new FixedRecord<float>());
+ break;
+ case BasicType::DOUBLE:
+ record.reset(new FixedRecord<double>());
+ break;
+ case BasicType::STRING:
+ record.reset(new VariableRecord());
+ break;
+ default:
+ break;
+ }
+ return record;
+}
+
+
+template <typename T>
+bool
+AttributeFile::FixedRecord<T>::onWrite(AttributeFile & dest) const
+{
+ bool retval(dest._datFile->CheckedWrite(&_data[0],
+ _data.size() * sizeof(T)));
+ if (retval && dest._weightFile.get()) {
+ retval = dest._weightFile->CheckedWrite(&_weight[0],
+ _weight.size() * sizeof(int32_t));
+ }
+ return retval;
+}
+
+
+bool
+AttributeFile::VariableRecord::onWrite(AttributeFile & dest) const
+{
+ bool retval(dest._datFile->CheckedWrite(&_data[0], _data.size()));
+ if (retval && dest._weightFile.get()) {
+ retval = dest._weightFile->CheckedWrite(&_weight[0],
+ _weight.size() * sizeof(int32_t));
+ }
+ return retval;
+}
+
+
+void
+AttributeFile::VariableRecord::setValue(const void * v, size_t len)
+{
+ _data.resize(len);
+ memcpy(&_data[0], v, len);
+ _weight.clear();
+}
+
+
+size_t
+AttributeFile::VariableRecord::getValueCount() const
+{
+ size_t numValues(_weight.size());
+ if ( numValues == 0) {
+ for(size_t i(0), m(_data.size()); i < m; i++) {
+ if (_data[i] == 0) {
+ numValues++;
+ }
+ }
+ }
+ return numValues;
+}
+
+
+template <typename T>
+bool
+AttributeFile::FixedRecord<T>::onRead(AttributeFile &src, size_t numValues)
+{
+ bool retval(true);
+ _data.resize(numValues);
+ if (numValues) {
+ const int bytesRead = src._datFile->Read(&_data[0],
+ _data.size() * sizeof(T));
+ retval = (bytesRead == int(_data.size() * sizeof(T)));
+ }
+ if (src._weightFile.get()) {
+ _weight.resize(numValues);
+ if (numValues && retval) {
+ const int bytesRead = src._weightFile->Read(&_weight[0],
+ _weight.size() * sizeof(uint32_t));
+ retval = (bytesRead == int(_weight.size() * sizeof(uint32_t)));
+ }
+ }
+ return retval;
+}
+
+
+bool
+AttributeFile::VariableRecord::onRead(AttributeFile &src, size_t numValues)
+{
+ bool retval(true);
+ _data.resize(0);
+ if (numValues) {
+ size_t stringsRead(0);
+ for (int c; (stringsRead < numValues) &&
+ ((c = src._datFile->GetByte()) >= 0); ) {
+ _data.push_back(c);
+ if (c == 0) {
+ stringsRead++;
+ }
+ }
+ retval = (stringsRead == numValues);
+ }
+ if (src._weightFile.get()) {
+ _weight.resize(numValues);
+ if (numValues && retval) {
+ const int bytesRead = src._weightFile->Read(&_weight[0],
+ _weight.size() * sizeof(uint32_t));
+ retval = (bytesRead == int(_weight.size() * sizeof(uint32_t)));
+ }
+ }
+ return retval;
+}
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefile.h b/searchlib/src/vespa/searchlib/attribute/attributefile.h
new file mode 100644
index 00000000000..895b2472dfb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefile.h
@@ -0,0 +1,113 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/common/fileheadercontext.h>
+#include <vespa/fastlib/io/bufferedfile.h>
+
+namespace search {
+
+namespace common {
+ class FileHeaderContext;
+}
+
+class AttributeFile
+{
+public:
+ class Record {
+ public:
+ virtual ~Record() { }
+ virtual size_t getValueCount() const = 0;
+ virtual void setValue(const void * v, size_t len) = 0;
+ protected:
+ std::vector<int32_t> _weight;
+ private:
+ bool write(AttributeFile & dest) const { return onWrite(dest); }
+ bool read(AttributeFile & src, size_t numValues) { return onRead(src, numValues); }
+ virtual bool onWrite(AttributeFile & dest) const = 0;
+ virtual bool onRead(AttributeFile & src, size_t numValues) = 0;
+
+ friend class AttributeFile;
+ };
+ template <typename T>
+ class FixedRecord : public Record
+ {
+ public:
+ virtual size_t getValueCount() const { return _data.size(); }
+ private:
+ virtual void
+ setValue(const void * v, size_t len) {
+ assert(len == sizeof(T));
+ (void) len;
+ _data.resize(1);
+ _weight.clear();
+ _data[0] = * static_cast<const T *>(v);
+ }
+
+ virtual bool onWrite(AttributeFile & dest) const;
+ virtual bool onRead(AttributeFile & src, size_t numValues);
+
+ std::vector<T> _data;
+ };
+
+ class VariableRecord : public Record
+ {
+ public:
+ virtual size_t
+ getValueCount() const;
+ private:
+ virtual void setValue(const void * v, size_t len);
+ virtual bool onWrite(AttributeFile & dest) const;
+ virtual bool onRead(AttributeFile & src, size_t numValues);
+ std::vector<char> _data;
+ };
+protected:
+ typedef attribute::Config Config;
+public:
+ AttributeFile(const vespalib::string & fileName, const Config & config);
+
+ ~AttributeFile(void);
+
+ std::unique_ptr<Record> getRecord();
+ bool read(Record & record);
+ bool write(const Record & toWrite);
+ void enableDirectIO();
+protected:
+ void OpenReadOnly();
+ void OpenWriteOnly(const search::common::FileHeaderContext &
+ fileHeaderContext,
+ uint32_t docIdLimit);
+ void Close(void);
+ bool seekIdxPos(size_t idxPos);
+private:
+ uint32_t _currIdx;
+ std::unique_ptr<Fast_BufferedFile> _datFile;
+ std::unique_ptr<Fast_BufferedFile> _idxFile;
+ std::unique_ptr<Fast_BufferedFile> _weightFile;
+ vespalib::string _fileName;
+ Config _config;
+ uint32_t _datHeaderLen;
+ uint32_t _idxHeaderLen;
+ uint32_t _weightHeaderLen;
+ uint64_t _datFileSize;
+ uint64_t _idxFileSize;
+};
+
+class ReadAttributeFile : public AttributeFile
+{
+public:
+ ReadAttributeFile(const vespalib::string &fileName, const Config &config);
+};
+
+class WriteAttributeFile : public AttributeFile
+{
+public:
+ WriteAttributeFile(const vespalib::string &fileName,
+ const Config &config,
+ const search::common::FileHeaderContext &
+ fileHeaderContext,
+ uint32_t docIdLimit);
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp
new file mode 100644
index 00000000000..15eceae889d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "attributefilebufferwriter.h"
+
+namespace search
+{
+
+AttributeFileBufferWriter::
+AttributeFileBufferWriter(IAttributeFileWriter &fileWriter)
+ : BufferWriter(),
+ _buf(),
+ _bytesWritten(0),
+ _incompleteBuffers(0),
+ _fileWriter(fileWriter)
+{
+ _buf = _fileWriter.allocBuf(BUFFER_SIZE);
+ assert(_buf->getFreeLen() >= BUFFER_SIZE);
+ setup(_buf->getFree(), BUFFER_SIZE);
+}
+
+
+AttributeFileBufferWriter::~AttributeFileBufferWriter()
+{
+ assert(usedLen() == 0);
+}
+
+
+void
+AttributeFileBufferWriter::flush()
+{
+ assert(_incompleteBuffers == 0); // all previous buffers must have been full
+ size_t nowLen = usedLen();
+ if (nowLen != BUFFER_SIZE) {
+ // buffer is not full, only allowed for last buffer
+ ++_incompleteBuffers;
+ }
+ if (nowLen == 0) {
+ return; // empty buffer
+ }
+ assert(_buf->getDataLen() == 0);
+ onFlush(nowLen);
+ assert(_buf->getFreeLen() >= BUFFER_SIZE);
+ setup(_buf->getFree(), BUFFER_SIZE);
+ _bytesWritten += nowLen;
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.h b/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.h
new file mode 100644
index 00000000000..580e4ed9f96
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.h
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iattributefilewriter.h"
+#include <vespa/searchlib/util/bufferwriter.h>
+
+namespace search
+{
+
+/*
+ * BufferWriter implementation that passes full buffers on to
+ * IAttributeFileWriter.
+ */
+class AttributeFileBufferWriter : public BufferWriter
+{
+protected:
+ using BufferBuf = IAttributeFileWriter::BufferBuf;
+ using Buffer = IAttributeFileWriter::Buffer;
+ Buffer _buf;
+ size_t _bytesWritten;
+ uint32_t _incompleteBuffers;
+ IAttributeFileWriter &_fileWriter;
+
+ virtual void onFlush(size_t nowLen) = 0;
+public:
+ static constexpr size_t BUFFER_SIZE = 4 * 1024 * 1024;
+
+ AttributeFileBufferWriter(IAttributeFileWriter &fileWriter);
+
+ virtual ~AttributeFileBufferWriter();
+
+ virtual void flush() override;
+
+ size_t getBytesWritten() const { return _bytesWritten; }
+};
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp
new file mode 100644
index 00000000000..e5b5b6567f8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.attribute.attributefilesavetarget");
+
+#include "attributefilesavetarget.h"
+#include "attributevector.h"
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/searchlib/common/fileheadercontext.h>
+#include <vespa/vespalib/util/error.h>
+
+using vespalib::getLastErrorString;
+
+namespace search
+{
+
+using common::FileHeaderContext;
+
+
+AttributeFileSaveTarget::
+AttributeFileSaveTarget(const TuneFileAttributes &tuneFileAttributes,
+ const FileHeaderContext &fileHeaderContext)
+ : IAttributeSaveTarget(),
+ _datWriter(tuneFileAttributes, fileHeaderContext, _cfg,
+ "Attribute vector data file"),
+ _idxWriter(tuneFileAttributes, fileHeaderContext, _cfg,
+ "Attribute vector idx file"),
+ _weightWriter(tuneFileAttributes, fileHeaderContext, _cfg,
+ "Attribute vector weight file"),
+ _udatWriter(tuneFileAttributes, fileHeaderContext, _cfg,
+ "Attribute vector unique data file")
+{
+}
+
+
+bool
+AttributeFileSaveTarget::setup()
+{
+ const vespalib::string & baseFileName = _cfg.getFileName();
+ vespalib::string datFileName(baseFileName + ".dat");
+ if (!_datWriter.open(datFileName)) {
+ return false;
+ }
+ if (_cfg.getEnumerated()) {
+ vespalib::string udatFileName(baseFileName + ".udat");
+ if (!_udatWriter.open(udatFileName)) {
+ return false;
+ }
+ }
+ if (_cfg.hasMultiValue()) {
+ vespalib::string idxFileName(baseFileName + ".idx");
+ if (!_idxWriter.open(idxFileName)) {
+ return false;
+ }
+ if (_cfg.hasWeightedSetType()) {
+ vespalib::string weightFileName(baseFileName + ".weight");
+ if (!_weightWriter.open(weightFileName)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+void
+AttributeFileSaveTarget::close()
+{
+ _datWriter.close();
+ _udatWriter.close();
+ _idxWriter.close();
+ _weightWriter.close();
+}
+
+
+IAttributeFileWriter &
+AttributeFileSaveTarget::datWriter()
+{
+ return _datWriter;
+}
+
+
+IAttributeFileWriter &
+AttributeFileSaveTarget::idxWriter()
+{
+ return _idxWriter;
+}
+
+
+IAttributeFileWriter &
+AttributeFileSaveTarget::weightWriter()
+{
+ return _weightWriter;
+}
+
+IAttributeFileWriter &
+AttributeFileSaveTarget::udatWriter()
+{
+ return _udatWriter;
+}
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h
new file mode 100644
index 00000000000..9b931be4fb8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iattributesavetarget.h"
+#include "attributefilewriter.h"
+
+namespace search
+{
+
+/**
+ * Class used to save an attribute vector to file(s).
+ **/
+class AttributeFileSaveTarget : public IAttributeSaveTarget
+{
+private:
+ AttributeFileWriter _datWriter;
+ AttributeFileWriter _idxWriter;
+ AttributeFileWriter _weightWriter;
+ AttributeFileWriter _udatWriter;
+
+public:
+ AttributeFileSaveTarget(const TuneFileAttributes &tuneFileAttributes,
+ const search::common::FileHeaderContext &
+ fileHeaderContext);
+
+ // Implements IAttributeSaveTarget
+ /** Setups this saveTarget by opening the relevant files **/
+ virtual bool setup() override;
+
+ /** Closes the files used **/
+ virtual void close() override;
+
+ virtual IAttributeFileWriter &datWriter() override;
+ virtual IAttributeFileWriter &idxWriter() override;
+ virtual IAttributeFileWriter &weightWriter() override;
+ virtual IAttributeFileWriter &udatWriter() override;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp
new file mode 100644
index 00000000000..d74b7b09c4e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp
@@ -0,0 +1,213 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.attribute.attributefilewriter");
+
+#include "attributefilewriter.h"
+#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/searchlib/common/fileheadercontext.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include "attributefilebufferwriter.h"
+
+using search::common::FileHeaderContext;
+using vespalib::getLastErrorString;
+
+
+namespace search
+{
+
+namespace
+{
+
+const uint32_t headerAlign = 4096;
+const uint32_t MIN_ALIGNMENT = 4096;
+
+void
+writeDirectIOAligned(FastOS_FileInterface &file, const void *buf,
+ size_t length)
+{
+ const char * data(static_cast<const char *>(buf));
+ size_t remaining(length);
+ for (size_t maxChunk(2048*1024); maxChunk >= MIN_ALIGNMENT; maxChunk >>= 1) {
+ for ( ; remaining > maxChunk; remaining -= maxChunk, data += maxChunk) {
+ file.WriteBuf(data, maxChunk);
+ }
+ }
+ if (remaining > 0) {
+ file.WriteBuf(data, remaining);
+ }
+}
+
+
+void
+updateHeader(const vespalib::string &name, uint64_t fileBitSize)
+{
+ vespalib::FileHeader h(headerAlign);
+ FastOS_File f;
+ f.OpenReadWrite(name.c_str());
+ h.readFile(f);
+ FileHeaderContext::setFreezeTime(h);
+ typedef vespalib::GenericHeader::Tag Tag;
+ h.putTag(Tag("frozen", 1));
+ h.putTag(Tag("fileBitSize", fileBitSize));
+ h.rewriteFile(f);
+ f.Sync();
+ f.Close();
+}
+
+/*
+ * BufferWriter implementation that passes full buffers on to
+ * AttributeFileWriter.
+ */
+class FileBackedBufferWriter : public AttributeFileBufferWriter
+{
+public:
+ FileBackedBufferWriter(AttributeFileWriter &fileWriter);
+
+ virtual ~FileBackedBufferWriter();
+
+ virtual void onFlush(size_t nowLen) override;
+};
+
+
+FileBackedBufferWriter::FileBackedBufferWriter(AttributeFileWriter &fileWriter)
+ : AttributeFileBufferWriter(fileWriter)
+{
+}
+
+
+FileBackedBufferWriter::~FileBackedBufferWriter()
+{
+}
+
+
+void
+FileBackedBufferWriter::onFlush(size_t nowLen) {
+ // Note: Must use const ptr to indicate that buffer is pre-filled.
+ Buffer buf(std::make_unique<BufferBuf>
+ ((const char *) _buf->getFree(), nowLen));
+ assert(buf->getDataLen() == nowLen);
+ assert(buf->getData() == _buf->getFree());
+ _fileWriter.writeBuf(std::move(buf));
+}
+
+}
+
+
+AttributeFileWriter::
+AttributeFileWriter(const TuneFileAttributes &tuneFileAttributes,
+ const FileHeaderContext &fileHeaderContext,
+ const IAttributeSaveTarget::Config &cfg,
+ const vespalib::string &desc)
+ : _tuneFileAttributes(tuneFileAttributes),
+ _fileHeaderContext(fileHeaderContext),
+ _cfg(cfg),
+ _desc(desc),
+ _fileBitSize(0)
+{
+}
+
+
+AttributeFileWriter::~AttributeFileWriter()
+{
+}
+
+
+bool
+AttributeFileWriter::open(const vespalib::string &fileName)
+{
+ if (_tuneFileAttributes._write.getWantSyncWrites()) {
+ _file.EnableSyncWrites();
+ }
+ if (_tuneFileAttributes._write.getWantDirectIO()) {
+ _file.EnableDirectIO();
+ }
+ _file.OpenWriteOnlyTruncate(fileName.c_str());
+ if (!_file.IsOpened()) {
+ LOG(error, "Could not open attribute vector '%s' for writing: %s",
+ fileName.c_str(), getLastErrorString().c_str());
+ return false;
+ }
+ writeHeader();
+ return true;
+}
+
+
+void
+AttributeFileWriter::writeHeader()
+{
+ vespalib::FileHeader header(headerAlign);
+ _fileHeaderContext.addTags(header, _file.GetFileName());
+ addTags(header);
+ size_t headerLen = header.writeFile(_file);
+ assert((headerLen % MIN_ALIGNMENT) == 0);
+ _fileBitSize = headerLen * 8;
+}
+
+
+void
+AttributeFileWriter::addTags(vespalib::GenericHeader &header)
+{
+ typedef vespalib::GenericHeader::Tag Tag;
+ header.putTag(Tag("datatype", _cfg.getBasicType()));
+ header.putTag(Tag("collectiontype", _cfg.getCollectionType()));
+ header.putTag(Tag("uniqueValueCount", _cfg.getUniqueValueCount()));
+ header.putTag(Tag("totalValueCount", _cfg.getTotalValueCount()));
+ header.putTag(Tag("docIdLimit", _cfg.getNumDocs()));
+ header.putTag(Tag("frozen", 0));
+ header.putTag(Tag("fileBitSize", 0));
+ header.putTag(Tag("version", _cfg.getVersion()));
+ if (_cfg.getEnumerated()) {
+ header.putTag(Tag("enumerated", 1));
+ }
+ uint64_t createSerialNum = _cfg.getCreateSerialNum();
+ if (createSerialNum != 0u) {
+ header.putTag(Tag("createSerialNum", createSerialNum));
+ }
+ const vespalib::string &tensorType = _cfg.getTensorType();
+ if (!tensorType.empty()) {
+ header.putTag(Tag("tensortype", tensorType));;
+ }
+ header.putTag(Tag("desc", _desc));
+}
+
+
+AttributeFileWriter::Buffer
+AttributeFileWriter::allocBuf(size_t size)
+{
+ return std::make_unique<BufferBuf>(size, MIN_ALIGNMENT);
+}
+
+
+void
+AttributeFileWriter::writeBuf(Buffer buf)
+{
+ size_t bufLen = buf->getDataLen();
+ // TODO: pad to DirectIO boundary when burning bridges
+ writeDirectIOAligned(_file, buf->getData(), bufLen);
+ _fileBitSize += bufLen * 8;
+}
+
+
+void
+AttributeFileWriter::close()
+{
+ if (_file.IsOpened()) {
+ _file.Sync();
+ _file.Close();
+ updateHeader(_file.GetFileName(), _fileBitSize);
+ }
+}
+
+
+std::unique_ptr<BufferWriter>
+AttributeFileWriter::allocBufferWriter()
+{
+ return std::make_unique<FileBackedBufferWriter>(*this);
+}
+
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilewriter.h b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.h
new file mode 100644
index 00000000000..fa0fd22b837
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.h
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iattributesavetarget.h"
+#include "iattributefilewriter.h"
+
+namespace vespalib
+{
+
+class GenericHeader;
+
+}
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+class TuneFileAttributes;
+
+/*
+ * Class to write to a single attribute vector file. Used by
+ * AttributeFileSaveTarget.
+ */
+class AttributeFileWriter : public IAttributeFileWriter
+{
+ FastOS_File _file;
+ const TuneFileAttributes &_tuneFileAttributes;
+ const search::common::FileHeaderContext &_fileHeaderContext;
+ const IAttributeSaveTarget::Config &_cfg;
+ vespalib::string _desc;
+ uint64_t _fileBitSize;
+
+ void addTags(vespalib::GenericHeader &header);
+
+ void writeHeader();
+public:
+ AttributeFileWriter(const TuneFileAttributes &tuneFileAttributes,
+ const search::common::FileHeaderContext &
+ fileHeaderContext,
+ const IAttributeSaveTarget::Config &cfg,
+ const vespalib::string &desc);
+ ~AttributeFileWriter();
+ virtual Buffer allocBuf(size_t size) override;
+ virtual void writeBuf(Buffer buf) override;
+ virtual std::unique_ptr<BufferWriter> allocBufferWriter() override;
+ bool open(const vespalib::string &fileName);
+ void close();
+};
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributeguard.cpp b/searchlib/src/vespa/searchlib/attribute/attributeguard.cpp
new file mode 100644
index 00000000000..11c473ed53f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributeguard.cpp
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "attributeguard.h"
+
+namespace search {
+
+AttributeGuard::AttributeGuard() :
+ ComponentGuard<AttributeVector>()
+{
+}
+
+AttributeGuard::AttributeGuard(const AttributeVector::SP & attr) :
+ ComponentGuard<AttributeVector>(attr)
+{
+}
+
+AttributeEnumGuard::AttributeEnumGuard(const AttributeVector::SP & attr) :
+ AttributeGuard(attr),
+ _lock()
+{
+ takeLock();
+}
+
+AttributeEnumGuard::AttributeEnumGuard(const AttributeGuard & attr) :
+ AttributeGuard(attr),
+ _lock()
+{
+ takeLock();
+}
+
+void AttributeEnumGuard::takeLock() {
+ if (valid()) {
+ std::shared_lock<std::shared_timed_mutex> take(get().getEnumLock(),
+ std::defer_lock);
+ _lock = std::move(take);
+ _lock.lock();
+ }
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/attributeguard.h b/searchlib/src/vespa/searchlib/attribute/attributeguard.h
new file mode 100644
index 00000000000..42eb381a9d5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributeguard.h
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <boost/noncopyable.hpp>
+
+namespace search {
+
+/**
+ * General class for guarding a component that is using an underlying generation handler.
+ **/
+template <typename T>
+class ComponentGuard
+{
+private:
+ typename T::SP _component;
+ typedef vespalib::GenerationHandler::Guard Guard;
+ Guard _generationGuard;
+public:
+ ComponentGuard();
+ virtual ~ComponentGuard() { }
+ /**
+ * Creates a guard for the shared pointer of the given component.
+ **/
+ ComponentGuard(const typename T::SP & component);
+ const T & get() const { return *_component; }
+
+ const typename T::SP & getSP(void) const { return _component; }
+ const T * operator -> () const { return _component.get(); }
+ const T & operator * () const { return *_component.get(); }
+ T & get() { return *_component; }
+ T * operator -> () { return _component.get(); }
+ T & operator * () { return *_component.get(); }
+ bool valid() const { return _component.get() != NULL; }
+};
+
+template <typename T>
+ComponentGuard<T>::ComponentGuard() :
+ _component(),
+ _generationGuard()
+{
+}
+
+template <typename T>
+ComponentGuard<T>::ComponentGuard(const typename T::SP & component) :
+ _component(component),
+ _generationGuard(valid() ? _component->takeGenerationGuard() : Guard())
+{
+}
+
+/**
+ * This class makes sure that you will have a consistent view per document in the attribute vector
+ * while the guard is held.
+ **/
+class AttributeGuard : public ComponentGuard<AttributeVector>
+{
+public:
+ typedef std::unique_ptr<AttributeGuard> UP;
+ typedef std::shared_ptr<AttributeGuard> SP;
+ AttributeGuard();
+ AttributeGuard(const AttributeVector::SP & attribute);
+};
+
+/**
+ * This class makes sure that the attribute vector is not updated with enum changes while the guard is held.
+ **/
+class AttributeEnumGuard : public AttributeGuard, public boost::noncopyable
+{
+public:
+ explicit AttributeEnumGuard(const AttributeVector::SP & attribute);
+ explicit AttributeEnumGuard(const AttributeGuard & attribute);
+private:
+ mutable std::shared_lock<std::shared_timed_mutex> _lock;
+ void takeLock();
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributeiterators.cpp b/searchlib/src/vespa/searchlib/attribute/attributeiterators.cpp
new file mode 100644
index 00000000000..ff0b5d4514a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributeiterators.cpp
@@ -0,0 +1,237 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "attributeiterators.h"
+#include "attributeiterators.hpp"
+#include "postinglistattribute.h"
+
+namespace search {
+
+using queryeval::MinMaxPostingInfo;
+using fef::TermFieldMatchData;
+
+AttributeIteratorBase::AttributeIteratorBase(TermFieldMatchData * matchData) :
+ _matchData(matchData),
+ _matchPosition(NULL)
+{
+ fef::TermFieldMatchDataPosition pos;
+ _matchData->appendPosition(pos);
+ _matchPosition = _matchData->getPositions();
+}
+
+void
+AttributeIteratorBase::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ SearchIterator::visitMembers(visitor);
+ visit(visitor, "tfmd.fieldId", _matchData->getFieldId());
+ visit(visitor, "tfmd.docId", _matchData->getDocId());
+}
+
+void
+FilterAttributeIterator::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ AttributeIteratorBase::visitMembers(visitor);
+ visit(visitor, "docIdLimit", _docIdLimit);
+}
+
+void
+AttributeIterator::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ AttributeIteratorBase::visitMembers(visitor);
+ visit(visitor, "docIdLimit", _docIdLimit);
+ visit(visitor, "weight", _weight);
+}
+
+
+void
+FlagAttributeIterator::doUnpack(uint32_t docId)
+{
+ _matchData->resetOnlyDocId(docId);
+}
+
+AttributePostingListIterator::
+ AttributePostingListIterator(bool hasWeight,
+ TermFieldMatchData *matchData)
+ : AttributeIteratorBase(matchData),
+ _hasWeight(hasWeight)
+ // _hasWeight(_searchContext.attribute().hasWeightedSetType())
+{
+}
+
+FilterAttributePostingListIterator::
+FilterAttributePostingListIterator(TermFieldMatchData *matchData)
+ : AttributeIteratorBase(matchData)
+{
+}
+
+void
+AttributeIterator::doUnpack(uint32_t docId)
+{
+ _matchData->resetOnlyDocId(docId);
+ _matchPosition->setElementWeight(_weight);
+}
+
+
+void
+FilterAttributeIterator::doUnpack(uint32_t docId)
+{
+ _matchData->resetOnlyDocId(docId);
+}
+
+template <>
+void
+AttributePostingListIteratorT<btree::
+BTreeConstIterator<uint32_t,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ std::less<uint32_t>,
+ btree::BTreeDefaultTraits> >::
+doUnpack(uint32_t docId)
+{
+ _matchData->resetOnlyDocId(docId);
+ _matchPosition->setElementWeight(getWeight());
+}
+
+
+template <>
+void
+AttributePostingListIteratorT<btree::
+BTreeConstIterator<uint32_t,
+ int32_t,
+ btree::MinMaxAggregated,
+ std::less<uint32_t>,
+ btree::BTreeDefaultTraits> >::
+doUnpack(uint32_t docId)
+{
+ _matchData->resetOnlyDocId(docId);
+ _matchPosition->setElementWeight(getWeight());
+}
+
+
+template <>
+void
+FilterAttributePostingListIteratorT<btree::
+BTreeConstIterator<uint32_t,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ std::less<uint32_t>,
+ btree::BTreeDefaultTraits> >::
+doUnpack(uint32_t docId)
+{
+ _matchData->resetOnlyDocId(docId);
+}
+
+
+template <>
+void
+FilterAttributePostingListIteratorT<btree::
+BTreeConstIterator<uint32_t,
+ int32_t,
+ btree::MinMaxAggregated,
+ std::less<uint32_t>,
+ btree::BTreeDefaultTraits> >::
+doUnpack(uint32_t docId)
+{
+ _matchData->resetOnlyDocId(docId);
+}
+
+
+template <>
+void
+AttributePostingListIteratorT<InnerAttributePostingListIterator>::
+setupPostingInfo(void)
+{
+ if (_iterator.valid()) {
+ _postingInfo = MinMaxPostingInfo(1, 1);
+ _postingInfoValid = true;
+ }
+}
+
+
+template <>
+void
+AttributePostingListIteratorT<WeightedInnerAttributePostingListIterator>::
+setupPostingInfo(void)
+{
+ if (_iterator.valid()) {
+ const btree::MinMaxAggregated &a(_iterator.getAggregated());
+ _postingInfo = MinMaxPostingInfo(a.getMin(), a.getMax());
+ _postingInfoValid = true;
+ }
+}
+
+
+template <>
+void
+AttributePostingListIteratorT<DocIdMinMaxIterator<AttributePosting> >::
+setupPostingInfo(void)
+{
+ if (_iterator.valid()) {
+ _postingInfo = MinMaxPostingInfo(1, 1);
+ _postingInfoValid = true;
+ }
+}
+
+
+template <>
+void
+AttributePostingListIteratorT<DocIdMinMaxIterator<AttributeWeightPosting> >::
+setupPostingInfo(void)
+{
+ if (_iterator.valid()) {
+ const btree::MinMaxAggregated a(_iterator.getAggregated());
+ _postingInfo = MinMaxPostingInfo(a.getMin(), a.getMax());
+ _postingInfoValid = true;
+ }
+}
+
+template <>
+void
+FilterAttributePostingListIteratorT<InnerAttributePostingListIterator>::
+setupPostingInfo(void)
+{
+ if (_iterator.valid()) {
+ _postingInfo = MinMaxPostingInfo(1, 1);
+ _postingInfoValid = true;
+ }
+}
+
+
+template <>
+void
+FilterAttributePostingListIteratorT<WeightedInnerAttributePostingListIterator>::
+setupPostingInfo(void)
+{
+ if (_iterator.valid()) {
+ _postingInfo = MinMaxPostingInfo(1, 1);
+ _postingInfoValid = true;
+ }
+}
+
+
+template <>
+void
+FilterAttributePostingListIteratorT<DocIdMinMaxIterator<AttributePosting> >::
+setupPostingInfo(void)
+{
+ if (_iterator.valid()) {
+ _postingInfo = MinMaxPostingInfo(1, 1);
+ _postingInfoValid = true;
+ }
+}
+
+
+template <>
+void
+FilterAttributePostingListIteratorT<DocIdMinMaxIterator<AttributeWeightPosting> >::
+setupPostingInfo(void)
+{
+ if (_iterator.valid()) {
+ _postingInfo = MinMaxPostingInfo(1, 1);
+ _postingInfoValid = true;
+ }
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributeiterators.h b/searchlib/src/vespa/searchlib/attribute/attributeiterators.h
new file mode 100644
index 00000000000..e2cf6a96e49
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributeiterators.h
@@ -0,0 +1,567 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataposition.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include "attributevector.h"
+#include <vespa/searchlib/btree/btreenode.h>
+#include <vespa/searchlib/btree/btreeiterator.h>
+#include <vespa/vespalib/objects/visit.h>
+#include "dociditerator.h"
+
+namespace search {
+
+/**
+ * Abstract superclass for all attribute iterators with convenience function
+ * for getting the type of the iterator (used for testing).
+ **/
+class AttributeIteratorBase : public queryeval::SearchIterator
+{
+protected:
+ void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+ fef::TermFieldMatchData * _matchData;
+ fef::TermFieldMatchDataPosition * _matchPosition;
+
+public:
+ AttributeIteratorBase(fef::TermFieldMatchData * matchData);
+ Trinary is_strict() const override { return Trinary::False; }
+};
+
+
+/**
+ * This class acts as an iterator over documents that are results for
+ * the subquery represented by the search context object associated
+ * with this iterator. The search context object contains an
+ * attribute vector that does not use posting lists.
+ *
+ * @param SC the specialized search context type associated with this iterator
+ */
+
+class AttributeIterator : public AttributeIteratorBase
+{
+public:
+ AttributeIterator(fef::TermFieldMatchData * matchData, uint32_t docIdLimit)
+ : AttributeIteratorBase(matchData),
+ _docIdLimit(docIdLimit),
+ _weight(1)
+ {
+ }
+protected:
+ void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+ void doUnpack(uint32_t docId) override;
+ uint32_t _docIdLimit;
+ int32_t _weight;
+};
+
+class FilterAttributeIterator : public AttributeIteratorBase
+{
+public:
+ FilterAttributeIterator(fef::TermFieldMatchData * matchData, uint32_t docIdLimit)
+ : AttributeIteratorBase(matchData),
+ _docIdLimit(docIdLimit)
+ {
+ _matchPosition->setElementWeight(1);
+ }
+protected:
+ void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+ void doUnpack(uint32_t docId) override;
+ uint32_t _docIdLimit;
+};
+
+template <typename SC>
+class AttributeIteratorT : public AttributeIterator
+{
+private:
+ void doSeek(uint32_t docId) override;
+ void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+
+protected:
+ const SC & _searchContext;
+
+public:
+ AttributeIteratorT(const SC &searchContext, fef::TermFieldMatchData *matchData);
+ bool seekFast(uint32_t docId) const { return _searchContext.cmp(docId); }
+};
+
+
+template <typename SC>
+class FilterAttributeIteratorT : public FilterAttributeIterator
+{
+private:
+ void doSeek(uint32_t docId) override;
+ void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+
+protected:
+ const SC & _searchContext;
+
+public:
+ FilterAttributeIteratorT(const SC &searchContext,
+ fef::TermFieldMatchData *matchData);
+ bool seekFast(uint32_t docId) const { return _searchContext.cmp(docId); }
+};
+
+
+/**
+ * This class acts as a strict iterator over documents that are
+ * results for the subquery represented by the search context object
+ * associated with this iterator. The search context object contains
+ * an attribute vector that does not use posting lists.
+ *
+ * @param SC the specialized search context type associated with this iterator
+ */
+template <typename SC>
+class AttributeIteratorStrict : public AttributeIteratorT<SC>
+{
+private:
+ using AttributeIteratorT<SC>::_docIdLimit;
+ using AttributeIteratorT<SC>::_searchContext;
+ using AttributeIteratorT<SC>::setDocId;
+ using AttributeIteratorT<SC>::setAtEnd;
+ using AttributeIteratorT<SC>::_weight;
+ using Trinary=vespalib::Trinary;
+ void doSeek(uint32_t docId) override;
+ Trinary is_strict() const override { return Trinary::True; }
+public:
+ AttributeIteratorStrict(const SC &searchContext, fef::TermFieldMatchData * matchData)
+ : AttributeIteratorT<SC>(searchContext, matchData)
+ {
+ }
+};
+
+
+template <typename SC>
+class FilterAttributeIteratorStrict : public FilterAttributeIteratorT<SC>
+{
+private:
+ using FilterAttributeIteratorT<SC>::_docIdLimit;
+ using FilterAttributeIteratorT<SC>::_searchContext;
+ using FilterAttributeIteratorT<SC>::setDocId;
+ using FilterAttributeIteratorT<SC>::setAtEnd;
+ using Trinary=vespalib::Trinary;
+ void doSeek(uint32_t docId) override;
+ Trinary is_strict() const override { return Trinary::True; }
+public:
+ FilterAttributeIteratorStrict(const SC &searchContext, fef::TermFieldMatchData * matchData)
+ : FilterAttributeIteratorT<SC>(searchContext, matchData)
+ {
+ }
+};
+
+
+template <typename SC>
+void
+AttributeIteratorT<SC>::doSeek(uint32_t docId)
+{
+ if (__builtin_expect(docId >= _docIdLimit, false)) {
+ setAtEnd();
+ } else if (_searchContext.cmp(docId, _weight)) {
+ setDocId(docId);
+ }
+}
+
+template <typename SC>
+void
+FilterAttributeIteratorT<SC>::doSeek(uint32_t docId)
+{
+ if (__builtin_expect(docId >= _docIdLimit, false)) {
+ setAtEnd();
+ } else if (_searchContext.cmp(docId)) {
+ setDocId(docId);
+ }
+}
+
+template <typename SC>
+void
+AttributeIteratorStrict<SC>::doSeek(uint32_t docId)
+{
+ for (uint32_t nextId = docId; nextId < _docIdLimit; ++nextId) {
+ if (_searchContext.cmp(nextId, _weight)) {
+ setDocId(nextId);
+ return;
+ }
+ }
+ setAtEnd();
+}
+
+template <typename SC>
+void
+FilterAttributeIteratorStrict<SC>::doSeek(uint32_t docId)
+{
+ for (uint32_t nextId = docId; nextId < _docIdLimit; ++nextId) {
+ if (_searchContext.cmp(nextId)) {
+ setDocId(nextId);
+ return;
+ }
+ }
+ setAtEnd();
+}
+
+template <typename SC>
+void
+AttributeIteratorT<SC>::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ AttributeIterator::visitMembers(visitor);
+ visit(visitor, "searchcontext.attribute", _searchContext.attribute().getName());
+ visit(visitor, "searchcontext.queryterm", _searchContext.queryTerm());
+}
+
+template <typename SC>
+void
+FilterAttributeIteratorT<SC>::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ FilterAttributeIterator::visitMembers(visitor);
+ visit(visitor, "searchcontext.attribute", _searchContext.attribute().getName());
+ visit(visitor, "searchcontext.queryterm", _searchContext.queryTerm());
+}
+
+template <typename SC>
+AttributeIteratorT<SC>::AttributeIteratorT(const SC &searchContext, fef::TermFieldMatchData *matchData)
+ : AttributeIterator(matchData, searchContext._attr.getCommittedDocIdLimit()),
+ _searchContext(searchContext)
+{
+}
+
+
+template <typename SC>
+FilterAttributeIteratorT<SC>::FilterAttributeIteratorT(const SC &searchContext, fef::TermFieldMatchData *matchData)
+ : FilterAttributeIterator(matchData, searchContext._attr.getCommittedDocIdLimit()),
+ _searchContext(searchContext)
+{
+}
+
+
+/**
+ * This class acts as an iterator over documents that are results for
+ * the subquery represented by the search context object associated
+ * with this iterator. The search context object contains an
+ * attribute vector that uses underlying posting lists, and the search
+ * context will setup a posting list iterator which is used by this
+ * class. This iterator is always strict.
+ *
+ * @param PL the posting list iterator type to work as an iterator over
+ */
+class AttributePostingListIterator : public AttributeIteratorBase
+{
+public:
+ AttributePostingListIterator(bool hasWeight, fef::TermFieldMatchData *matchData);
+ Trinary is_strict() const override { return Trinary::True; }
+protected:
+ bool _hasWeight;
+};
+
+
+class FilterAttributePostingListIterator : public AttributeIteratorBase
+{
+public:
+ FilterAttributePostingListIterator(fef::TermFieldMatchData *matchData);
+ Trinary is_strict() const override { return Trinary::True; }
+};
+
+
+typedef btree::BTreeConstIterator<uint32_t,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ std::less<uint32_t>,
+ btree::BTreeDefaultTraits>
+InnerAttributePostingListIterator;
+
+typedef btree::BTreeConstIterator<uint32_t,
+ int32_t,
+ btree::MinMaxAggregated,
+ std::less<uint32_t>,
+ btree::BTreeDefaultTraits>
+WeightedInnerAttributePostingListIterator;
+
+template <typename PL>
+class AttributePostingListIteratorT : public AttributePostingListIterator
+{
+private:
+ PL _iterator;
+ queryeval::MinMaxPostingInfo _postingInfo;
+ bool _postingInfoValid;
+
+ void doSeek(uint32_t docId) override;
+ void doUnpack(uint32_t docId) override;
+ void setupPostingInfo() { }
+ int32_t getWeight() { return _iterator.getData(); }
+
+ const queryeval::PostingInfo * getPostingInfo() const override {
+ return _postingInfoValid ? &_postingInfo : NULL;
+ }
+
+ void initRange(uint32_t begin, uint32_t end) override {
+ AttributePostingListIterator::initRange(begin, end);
+ _iterator.lower_bound(begin);
+ if (!_iterator.valid() || isAtEnd(_iterator.getKey())) {
+ setAtEnd();
+ } else {
+ setDocId(_iterator.getKey());
+ }
+ }
+
+public:
+ // Note: iterator constructor argument is destroyed
+ AttributePostingListIteratorT(PL &iterator,
+ bool hasWeight,
+ fef::TermFieldMatchData *matchData);
+};
+
+template <typename PL>
+class FilterAttributePostingListIteratorT
+ : public FilterAttributePostingListIterator
+{
+private:
+ PL _iterator;
+ queryeval::MinMaxPostingInfo _postingInfo;
+ bool _postingInfoValid;
+
+ void doSeek(uint32_t docId) override;
+ void doUnpack(uint32_t docId) override;
+ void setupPostingInfo() { }
+
+ const queryeval::PostingInfo * getPostingInfo() const override {
+ return _postingInfoValid ? &_postingInfo : NULL;
+ }
+
+ void initRange(uint32_t begin, uint32_t end) override {
+ FilterAttributePostingListIterator::initRange(begin, end);
+ _iterator.lower_bound(begin);
+ if (!_iterator.valid() || isAtEnd(_iterator.getKey())) {
+ setAtEnd();
+ } else {
+ setDocId(_iterator.getKey());
+ }
+ }
+
+public:
+ // Note: iterator constructor argument is destroyed
+ FilterAttributePostingListIteratorT(PL &iterator,
+ fef::TermFieldMatchData *matchData);
+};
+
+
+template <>
+inline int32_t
+AttributePostingListIteratorT<
+ btree::BTreeConstIterator<uint32_t,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ std::less<uint32_t>,
+ btree::BTreeDefaultTraits> >::
+getWeight()
+{
+ return 1; // default weight 1 for single value attributes
+}
+
+template <>
+void
+AttributePostingListIteratorT<btree::
+BTreeConstIterator<uint32_t,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ std::less<uint32_t>,
+ btree::BTreeDefaultTraits> >::
+doUnpack(uint32_t docId);
+
+
+template <>
+void
+AttributePostingListIteratorT<btree::
+BTreeConstIterator<uint32_t,
+ int32_t,
+ btree::MinMaxAggregated,
+ std::less<uint32_t>,
+ btree::BTreeDefaultTraits> >::
+doUnpack(uint32_t docId);
+
+
+template <>
+void
+AttributePostingListIteratorT<InnerAttributePostingListIterator>::
+setupPostingInfo();
+
+
+template <>
+void
+AttributePostingListIteratorT<WeightedInnerAttributePostingListIterator>::
+setupPostingInfo();
+
+
+template <>
+void
+AttributePostingListIteratorT<DocIdMinMaxIterator<AttributePosting> >::
+setupPostingInfo();
+
+
+template <>
+void
+AttributePostingListIteratorT<DocIdMinMaxIterator<AttributeWeightPosting> >::
+setupPostingInfo();
+
+
+template <>
+void
+FilterAttributePostingListIteratorT<InnerAttributePostingListIterator>::
+setupPostingInfo();
+
+
+template <>
+void
+FilterAttributePostingListIteratorT<WeightedInnerAttributePostingListIterator>::
+setupPostingInfo();
+
+
+template <>
+void
+FilterAttributePostingListIteratorT<DocIdMinMaxIterator<AttributePosting> >::
+setupPostingInfo();
+
+
+template <>
+void
+FilterAttributePostingListIteratorT<DocIdMinMaxIterator<AttributeWeightPosting> >::
+setupPostingInfo();
+
+
+template <typename PL>
+AttributePostingListIteratorT<PL>::
+AttributePostingListIteratorT(PL &iterator,
+ bool hasWeight,
+ fef::TermFieldMatchData *matchData)
+ : AttributePostingListIterator(hasWeight, matchData),
+ _iterator(),
+ _postingInfo(1, 1),
+ _postingInfoValid(false)
+{
+ _iterator.swap(iterator);
+ setupPostingInfo();
+}
+
+
+template <typename PL>
+FilterAttributePostingListIteratorT<PL>::
+FilterAttributePostingListIteratorT(PL &iterator,
+ fef::TermFieldMatchData *matchData)
+ : FilterAttributePostingListIterator(matchData),
+ _iterator(),
+ _postingInfo(1, 1),
+ _postingInfoValid(false)
+{
+ _iterator.swap(iterator);
+ setupPostingInfo();
+ _matchPosition->setElementWeight(1);
+}
+
+/**
+ * This class acts as an iterator over a flag attribute.
+ */
+class FlagAttributeIterator : public AttributeIteratorBase
+{
+public:
+ FlagAttributeIterator(fef::TermFieldMatchData * matchData)
+ : AttributeIteratorBase(matchData)
+ {
+ }
+protected:
+ void doUnpack(uint32_t docId) override;
+};
+
+template <typename SC>
+class FlagAttributeIteratorT : public FlagAttributeIterator
+{
+private:
+ void doSeek(uint32_t docId) override;
+
+protected:
+ const SC & _sc;
+ uint32_t _docIdLimit;
+
+public:
+ FlagAttributeIteratorT(const SC &sc, fef::TermFieldMatchData * matchData)
+ : FlagAttributeIterator(matchData),
+ _sc(sc),
+ _docIdLimit(static_cast<const typename SC::Attribute &>
+ (sc.attribute()).getCommittedDocIdLimit())
+ {
+ }
+
+ void initRange(uint32_t begin, uint32_t end) override {
+ FlagAttributeIterator::initRange(begin, end);
+ if ( _sc._zeroHits ) {
+ setAtEnd();
+ }
+ }
+
+};
+
+template <typename SC>
+class FlagAttributeIteratorStrict : public FlagAttributeIteratorT<SC>
+{
+private:
+ using FlagAttributeIteratorT<SC>::_docIdLimit;
+ using FlagAttributeIteratorT<SC>::_sc;
+ using FlagAttributeIteratorT<SC>::setDocId;
+ using FlagAttributeIteratorT<SC>::setAtEnd;
+ using Trinary=vespalib::Trinary;
+ void doSeek(uint32_t docId) override;
+ Trinary is_strict() const override { return Trinary::True; }
+
+public:
+ FlagAttributeIteratorStrict(const SC &sc,
+ fef::TermFieldMatchData *matchData)
+ : FlagAttributeIteratorT<SC>(sc, matchData)
+ {
+ }
+};
+
+template <typename SC>
+void
+FlagAttributeIteratorStrict<SC>::doSeek(uint32_t docId)
+{
+ const SC & sc(_sc);
+ const typename SC::Attribute &attr =
+ static_cast<const typename SC::Attribute &>(sc.attribute());
+ for (int i = sc._low; (i <= sc._high); ++i) {
+ const BitVector * bv = attr.getBitVector(i);
+ if ((bv != NULL) && docId < _docIdLimit && bv->testBit(docId)) {
+ setDocId(docId);
+ return;
+ }
+ }
+
+ uint32_t minNextBit(search::endDocId);
+ for (int i = sc._low; (i <= sc._high); ++i) {
+ const BitVector * bv = attr.getBitVector(i);
+ if (bv != NULL && docId < _docIdLimit) {
+ uint32_t nextBit = bv->getNextTrueBit(docId);
+ minNextBit = std::min(nextBit, minNextBit);
+ }
+ }
+ if (minNextBit < _docIdLimit) {
+ setDocId(minNextBit);
+ } else {
+ setAtEnd();
+ }
+}
+
+template <typename SC>
+void
+FlagAttributeIteratorT<SC>::doSeek(uint32_t docId)
+{
+ const SC & sc(_sc);
+ const typename SC::Attribute &attr =
+ static_cast<const typename SC::Attribute &>(sc.attribute());
+ for (int i = sc._low; (i <= sc._high); ++i) {
+ const BitVector * bv = attr.getBitVector(i);
+ if ((bv != NULL) && docId < _docIdLimit && bv->testBit(docId)) {
+ setDocId(docId);
+ return;
+ }
+ }
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp b/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp
new file mode 100644
index 00000000000..1d7448d04b1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+
+namespace search
+{
+
+
+template <typename PL>
+void
+AttributePostingListIteratorT<PL>::doSeek(uint32_t docId)
+{
+ _iterator.linearSeek(docId);
+ if (_iterator.valid()) {
+ setDocId(_iterator.getKey());
+ } else {
+ setAtEnd();
+ }
+}
+
+
+template <typename PL>
+void
+FilterAttributePostingListIteratorT<PL>::doSeek(uint32_t docId)
+{
+ _iterator.linearSeek(docId);
+ if (_iterator.valid()) {
+ setDocId(_iterator.getKey());
+ } else {
+ setAtEnd();
+ }
+}
+
+
+template <typename PL>
+void
+AttributePostingListIteratorT<PL>::doUnpack(uint32_t docId)
+{
+ _matchData->resetOnlyDocId(docId);
+
+ if (_hasWeight) {
+ _matchPosition->setElementWeight(getWeight());
+ } else {
+ uint32_t numOccs(0);
+ for(; _iterator.valid() && (_iterator.getKey() == docId); numOccs += getWeight(), ++_iterator);
+ _matchPosition->setElementWeight(numOccs);
+ }
+}
+
+
+template <typename PL>
+void
+FilterAttributePostingListIteratorT<PL>::doUnpack(uint32_t docId)
+{
+ _matchData->resetOnlyDocId(docId);
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributemanager.cpp b/searchlib/src/vespa/searchlib/attribute/attributemanager.cpp
new file mode 100644
index 00000000000..43bf6946feb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributemanager.cpp
@@ -0,0 +1,279 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "attributemanager.h"
+#include <vespa/searchlib/attribute/attributecontext.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributemanager.h>
+#include <vespa/searchlib/attribute/attrvector.h>
+#include <vespa/searchlib/attribute/attributefile.h>
+#include <stdexcept>
+#include <vespa/searchlib/attribute/attrvector.hpp>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include "interlock.h"
+
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.attributemanager");
+
+using vespalib::LockGuard;
+using vespalib::string;
+using vespalib::IllegalStateException;
+using search::attribute::IAttributeContext;
+
+namespace
+{
+
+vespalib::Monitor baseDirMonitor("attributemanagerbasedir", false);
+typedef std::set<string> BaseDirSet;
+BaseDirSet baseDirSet;
+
+static void
+waitBaseDir(const string &baseDir)
+{
+ if (baseDir.empty())
+ return;
+ vespalib::MonitorGuard guard(baseDirMonitor);
+ bool waited = false;
+
+ BaseDirSet::iterator it = baseDirSet.find(baseDir);
+ while (it != baseDirSet.end()) {
+ if (!waited) {
+ waited = true;
+ LOG(debug,
+ "AttributeManager: Waiting for basedir %s to be available",
+ baseDir.c_str());
+ }
+ guard.wait();
+ it = baseDirSet.find(baseDir);
+ }
+
+ baseDirSet.insert(baseDir);
+ if (waited)
+ LOG(debug,
+ "AttributeManager: basedir %s available",
+ baseDir.c_str());
+}
+
+
+static void
+dropBaseDir(const string &baseDir)
+{
+ if (baseDir.empty())
+ return;
+ vespalib::MonitorGuard guard(baseDirMonitor);
+
+ BaseDirSet::iterator it = baseDirSet.find(baseDir);
+ if (it == baseDirSet.end()) {
+ LOG(error,
+ "AttributeManager: Cannot drop basedir %s, already dropped",
+ baseDir.c_str());
+ } else
+ baseDirSet.erase(it);
+ guard.broadcast();
+}
+
+
+}
+
+namespace search {
+
+AttributeManager::AttributeManager()
+ : _attributes(),
+ _loadLock(),
+ _baseDir(),
+ _snapShot(),
+ _interlock(std::make_shared<attribute::Interlock>())
+{
+ LOG(debug,
+ "New attributeManager %p",
+ static_cast<const void *>(this));
+}
+
+
+AttributeManager::AttributeManager(const string & baseDir)
+ : _attributes(),
+ _loadLock(),
+ _baseDir(baseDir),
+ _snapShot(),
+ _interlock(std::make_shared<attribute::Interlock>())
+{
+ LOG(debug,
+ "New attributeManager %p, baseDir %s",
+ static_cast<const void *>(this),
+ baseDir.c_str());
+ waitBaseDir(baseDir);
+}
+
+
+void
+AttributeManager::setBaseDir(const string & base)
+{
+ dropBaseDir(_baseDir);
+ _baseDir = base;
+ LOG(debug,
+ "attributeManager %p new baseDir %s",
+ static_cast<const void *>(this),
+ _baseDir.c_str());
+ waitBaseDir(base);
+}
+
+
+AttributeManager::~AttributeManager(void)
+{
+ _attributes.clear();
+ LOG(debug,
+ "delete attributeManager %p baseDir %s",
+ static_cast<const void *>(this),
+ _baseDir.c_str());
+ dropBaseDir(_baseDir);
+}
+
+
+uint64_t AttributeManager::getMemoryFootprint() const
+{
+ uint64_t sum(0);
+ for(AttributeMap::const_iterator it(_attributes.begin()), mt(_attributes.end()); it != mt; it++) {
+ sum += it->second->getStatus().getAllocated();
+ }
+
+ return sum;
+}
+
+bool AttributeManager::hasReaders() const
+{
+ for(AttributeMap::const_iterator it(_attributes.begin()), mt(_attributes.end()); it != mt; it++) {
+ if (it->second->hasReaders())
+ return true;
+ }
+
+ return false;
+}
+
+const AttributeManager::VectorHolder *
+AttributeManager::findAndLoadAttribute(const string & name) const
+{
+ const VectorHolder * loadedVector(NULL);
+ AttributeMap::const_iterator found = _attributes.find(name);
+ if (found != _attributes.end()) {
+ AttributeVector & vec = *found->second;
+ if ( ! vec.isLoaded() ) {
+ vespalib::LockGuard loadGuard(_loadLock);
+ if ( ! vec.isLoaded() ) {
+ vec.load();
+ } else {
+ LOG(debug, "Multi load of %s prevented by double checked locking.", vec.getBaseFileName().c_str());
+ }
+ }
+ loadedVector = & found->second;
+ }
+ return loadedVector;
+}
+
+
+const AttributeManager::VectorHolder *
+AttributeManager::getAttributeRef(const string & name) const
+{
+ return findAndLoadAttribute(name);
+}
+
+AttributeGuard::UP
+AttributeManager::getAttribute(const string & name) const
+{
+ AttributeGuard::UP attrGuard(new AttributeGuard(VectorHolder()));
+ const VectorHolder * vh = findAndLoadAttribute(name);
+ if ( vh != NULL ) {
+ attrGuard.reset(new AttributeGuard(*vh));
+ }
+ return attrGuard;
+}
+
+AttributeGuard::UP
+AttributeManager::getAttributeStableEnum(const string & name) const
+{
+ AttributeGuard::UP attrGuard(new AttributeEnumGuard(VectorHolder()));
+ const VectorHolder * vh = findAndLoadAttribute(name);
+ if ( vh != NULL ) {
+ attrGuard.reset(new AttributeEnumGuard(*vh));
+ }
+ return attrGuard;
+}
+
+bool
+AttributeManager::add(const AttributeManager::VectorHolder & vector)
+{
+ bool retval(true);
+ AttributeMap::iterator found = _attributes.find(vector->getName());
+ if (found == _attributes.end()) {
+ vector->setInterlock(_interlock);
+ _attributes[vector->getName()] = vector;
+ retval = true;
+ }
+ return retval;
+}
+
+void
+AttributeManager::getAttributeList(AttributeList & list) const
+{
+ list.reserve(_attributes.size());
+ for(AttributeMap::const_iterator it(_attributes.begin()), mt(_attributes.end()); it != mt; it++) {
+ list.push_back(AttributeGuard(it->second));
+ }
+}
+
+IAttributeContext::UP
+AttributeManager::createContext() const
+{
+ return IAttributeContext::UP(new AttributeContext(*this));
+}
+
+string
+AttributeManager::createBaseFileName(const string & name, bool useSnapshot) const
+{
+ return AttributeVector::BaseName(getBaseDir(), useSnapshot ? getSnapshot().dirName : "", name);
+}
+
+bool
+AttributeManager::addVector(const string & name, const Config & config)
+{
+ bool retval = false;
+ AttributeGuard::UP vector_owner(getAttribute(name));
+ AttributeGuard &vector(*vector_owner);
+
+ if (vector.valid()) {
+ if ((vector->getInternalBasicType() == config.basicType()) &&
+ (vector->getInternalCollectionType() == config.collectionType()))
+ {
+ retval = true;
+ } else {
+ LOG(error, "Attribute Vector '%s' has type conflict", name.c_str());
+ }
+ } else {
+ AttributeMap::iterator found = _attributes.find(name);
+ if (found != _attributes.end()) {
+ const VectorHolder & vh(found->second);
+ if ( vh.get() &&
+ (vh->getInternalBasicType() == config.basicType()) &&
+ (vh->getInternalCollectionType() == config.collectionType()))
+ {
+ retval = true;
+ }
+ }
+ if (! retval ) {
+ string baseFileName = createBaseFileName(name, true);
+ VectorHolder vh(AttributeFactory::createAttribute(baseFileName, config));
+ assert(vh.get());
+ if (vh->load()) {
+ assert(vh->getInternalBasicType() == config.basicType());
+ assert(vh->getInternalCollectionType() == config.collectionType());
+ retval = add(vh);
+ } else {
+ retval = add(vh);
+ }
+ }
+ }
+ return retval;
+}
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/attributemanager.h b/searchlib/src/vespa/searchlib/attribute/attributemanager.h
new file mode 100644
index 00000000000..6e166fe0835
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributemanager.h
@@ -0,0 +1,73 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchlib/common/indexmetainfo.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+
+namespace search {
+
+/**
+ * You use the attribute manager to get access to attributes. You must specify what kind
+ * of access you want to have.
+ **/
+class AttributeManager : public IAttributeManager
+{
+private:
+ typedef attribute::Config Config;
+public:
+ typedef std::vector<string> StringVector;
+ typedef search::IndexMetaInfo::Snapshot Snapshot;
+ typedef std::vector<AttributeGuard> AttributeList;
+ typedef AttributeVector::SP VectorHolder;
+ AttributeManager();
+ AttributeManager(const string & base);
+ ~AttributeManager(void);
+
+ /**
+ * This will give you a handle to an attributevector. It
+ * guarantees that backed attribute is valid. But no guarantees
+ * about the content of the attribute. If that is required some of
+ * the other getAttributeXX methods must be used.
+ **/
+ const VectorHolder * getAttributeRef(const string & name) const;
+
+ // Implements IAttributeManager
+ virtual AttributeGuard::UP getAttribute(const string & name) const;
+
+ // Implements IAttributeManager
+ virtual AttributeGuard::UP getAttributeStableEnum(const string & name) const;
+ /**
+ * This will load attributes in the most memory economical way by loading largest first.
+ */
+ bool addVector(const string & name, const Config & config);
+
+ bool add(const VectorHolder & vector);
+
+ // Implements IAttributeManager
+ virtual void getAttributeList(AttributeList & list) const;
+
+ // Implements IAttributeManager
+ virtual attribute::IAttributeContext::UP createContext() const;
+
+ const Snapshot & getSnapshot() const { return _snapShot; }
+ const string & getBaseDir() const { return _baseDir; }
+ void setSnapshot(const Snapshot &snap) { _snapShot = snap; }
+ void setBaseDir(const string & base);
+ bool hasReaders(void) const;
+ uint64_t getMemoryFootprint() const;
+protected:
+ typedef vespalib::hash_map<string, VectorHolder> AttributeMap;
+ AttributeMap _attributes;
+ vespalib::Lock _loadLock;
+private:
+ const VectorHolder * findAndLoadAttribute(const string & name) const;
+ string createBaseFileName(const string & name, bool useSnapshot) const;
+ string _baseDir;
+ Snapshot _snapShot;
+ std::shared_ptr<attribute::Interlock> _interlock;
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp
new file mode 100644
index 00000000000..534a7d6ff2f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "attributememoryfilebufferwriter.h"
+
+namespace search
+{
+
+
+AttributeMemoryFileBufferWriter::
+AttributeMemoryFileBufferWriter(IAttributeFileWriter &memoryFileWriter)
+ : AttributeFileBufferWriter(memoryFileWriter)
+{
+}
+
+
+AttributeMemoryFileBufferWriter::~AttributeMemoryFileBufferWriter()
+{
+}
+
+
+void
+AttributeMemoryFileBufferWriter::onFlush(size_t nowLen)
+{
+ _buf->moveFreeToData(nowLen);
+ assert(_buf->getDataLen() == nowLen);
+ _fileWriter.writeBuf(std::move(_buf));
+ _buf = _fileWriter.allocBuf(BUFFER_SIZE);
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.h b/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.h
new file mode 100644
index 00000000000..c0ef15f571d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.h
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attributefilebufferwriter.h"
+
+namespace search
+{
+
+/*
+ * BufferWriter implementation that passes full buffers on to
+ * memory variant of IAttributeFileWriter.
+ */
+class AttributeMemoryFileBufferWriter : public AttributeFileBufferWriter
+{
+public:
+ AttributeMemoryFileBufferWriter(IAttributeFileWriter &memoryFileWriter);
+
+ virtual ~AttributeMemoryFileBufferWriter();
+
+ virtual void onFlush(size_t nowSize) override;
+};
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp
new file mode 100644
index 00000000000..a9b72350c7e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp
@@ -0,0 +1,60 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "attributememoryfilewriter.h"
+#include "attributememoryfilebufferwriter.h"
+
+namespace search
+{
+
+namespace
+{
+
+const uint32_t MIN_ALIGNMENT = 4096;
+
+}
+
+AttributeMemoryFileWriter::AttributeMemoryFileWriter()
+ : IAttributeFileWriter(),
+ _bufs()
+{
+}
+
+
+AttributeMemoryFileWriter::~AttributeMemoryFileWriter()
+{
+}
+
+
+AttributeMemoryFileWriter::Buffer
+AttributeMemoryFileWriter::allocBuf(size_t size)
+{
+ return std::make_unique<BufferBuf>(size, MIN_ALIGNMENT);
+}
+
+
+void
+AttributeMemoryFileWriter::writeBuf(Buffer buf)
+{
+ _bufs.emplace_back(std::move(buf));
+}
+
+
+void
+AttributeMemoryFileWriter::writeTo(IAttributeFileWriter &writer)
+{
+ for (auto &buf : _bufs) {
+ writer.writeBuf(std::move(buf));
+ }
+ _bufs.clear();
+}
+
+
+std::unique_ptr<BufferWriter>
+AttributeMemoryFileWriter::allocBufferWriter()
+{
+ return std::make_unique<AttributeMemoryFileBufferWriter>(*this);
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.h b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.h
new file mode 100644
index 00000000000..7afd6c92606
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iattributefilewriter.h"
+
+namespace search
+{
+
+/*
+ * Class to write to a memory buffer representation of a single
+ * attribute vector file (without header). Used by AttributeMemorySaveTarget.
+ */
+class AttributeMemoryFileWriter : public IAttributeFileWriter
+{
+ std::vector<Buffer> _bufs;
+public:
+ AttributeMemoryFileWriter();
+ ~AttributeMemoryFileWriter();
+ virtual Buffer allocBuf(size_t size) override;
+ virtual void writeBuf(Buffer buf) override;
+ virtual std::unique_ptr<BufferWriter> allocBufferWriter() override;
+ void writeTo(IAttributeFileWriter &writer);
+};
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp
new file mode 100644
index 00000000000..f1fbfef43aa
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.attribute.attributememorysavetarget");
+
+#include "attributememorysavetarget.h"
+#include "attributefilesavetarget.h"
+#include "attributevector.h"
+
+namespace search
+{
+
+using search::common::FileHeaderContext;
+
+AttributeMemorySaveTarget::AttributeMemorySaveTarget()
+ : _datWriter(),
+ _idxWriter(),
+ _weightWriter(),
+ _udatWriter()
+{
+}
+
+
+IAttributeFileWriter &
+AttributeMemorySaveTarget::datWriter()
+{
+ return _datWriter;
+}
+
+
+IAttributeFileWriter &
+AttributeMemorySaveTarget::idxWriter()
+{
+ return _idxWriter;
+}
+
+
+IAttributeFileWriter &
+AttributeMemorySaveTarget::weightWriter()
+{
+ return _weightWriter;
+}
+
+
+IAttributeFileWriter &
+AttributeMemorySaveTarget::udatWriter()
+{
+ return _udatWriter;
+}
+
+
+bool
+AttributeMemorySaveTarget::
+writeToFile(const TuneFileAttributes &tuneFileAttributes,
+ const FileHeaderContext &fileHeaderContext)
+{
+ AttributeFileSaveTarget saveTarget(tuneFileAttributes, fileHeaderContext);
+ saveTarget.setConfig(_cfg);
+ if (!saveTarget.setup()) {
+ return false;
+ }
+ _datWriter.writeTo(saveTarget.datWriter());
+ if (_cfg.getEnumerated()) {
+ _udatWriter.writeTo(saveTarget.udatWriter());
+ }
+ if (_cfg.hasMultiValue()) {
+ _idxWriter.writeTo(saveTarget.idxWriter());
+ if (_cfg.hasWeightedSetType()) {
+ _weightWriter.writeTo(saveTarget.weightWriter());
+ }
+ }
+ saveTarget.close();
+ return true;
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h
new file mode 100644
index 00000000000..48828039d9e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iattributesavetarget.h"
+#include <vespa/searchlib/util/rawbuf.h>
+#include <memory>
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include "attributememoryfilewriter.h"
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+class AttributeVector;
+
+/**
+ * Class used to save an attribute vector to memory buffer(s).
+ **/
+class AttributeMemorySaveTarget : public IAttributeSaveTarget
+{
+private:
+ AttributeMemoryFileWriter _datWriter;
+ AttributeMemoryFileWriter _idxWriter;
+ AttributeMemoryFileWriter _weightWriter;
+ AttributeMemoryFileWriter _udatWriter;
+
+public:
+ AttributeMemorySaveTarget();
+
+ /**
+ * Write the underlying buffer(s) to file(s).
+ **/
+ bool
+ writeToFile(const TuneFileAttributes &tuneFileAttributes,
+ const search::common::FileHeaderContext &fileHeaderContext);
+
+ // Implements IAttributeSaveTarget
+ virtual bool setup() override { return true; }
+ virtual void close() override {}
+ virtual IAttributeFileWriter &datWriter() override;
+ virtual IAttributeFileWriter &idxWriter() override;
+ virtual IAttributeFileWriter &weightWriter() override;
+ virtual IAttributeFileWriter &udatWriter() override;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/attributesaver.cpp
new file mode 100644
index 00000000000..cf8b9cdf1a2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributesaver.cpp
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "attributesaver.h"
+
+
+using vespalib::GenerationHandler;
+
+namespace search
+{
+
+AttributeSaver::AttributeSaver(GenerationHandler::Guard &&guard,
+ const IAttributeSaveTarget::Config &cfg)
+ : _guard(std::move(guard)),
+ _cfg(cfg)
+{
+}
+
+
+AttributeSaver::~AttributeSaver()
+{
+}
+
+
+bool
+AttributeSaver::save(IAttributeSaveTarget &saveTarget)
+{
+ saveTarget.setConfig(_cfg);
+ if (!saveTarget.setup()) {
+ return false;
+ }
+ if (!onSave(saveTarget)) {
+ return false;
+ }
+ saveTarget.close();
+ return true;
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributesaver.h b/searchlib/src/vespa/searchlib/attribute/attributesaver.h
new file mode 100644
index 00000000000..c398e0726ec
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributesaver.h
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/generationhandler.h>
+#include "iattributesavetarget.h"
+
+namespace search
+{
+
+/*
+ * Abstract class used to hold data outside attribute vector needed
+ * during a save operation, e.g. copy of data structure without
+ * snapshot property, and guards to protect frozen views on structures
+ * with snapshot properties.
+ */
+class AttributeSaver
+{
+private:
+ vespalib::GenerationHandler::Guard _guard;
+ IAttributeSaveTarget::Config _cfg;
+
+protected:
+ AttributeSaver(vespalib::GenerationHandler::Guard &&guard,
+ const IAttributeSaveTarget::Config &cfg);
+
+ virtual bool onSave(IAttributeSaveTarget &saveTarget) = 0;
+
+public:
+ virtual ~AttributeSaver();
+
+ bool save(IAttributeSaveTarget &saveTarget);
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
new file mode 100644
index 00000000000..cc223a4fada
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
@@ -0,0 +1,1110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "attributevector.h"
+#include "attributevector.hpp"
+#include <vespa/searchlib/attribute/attributefilesavetarget.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+#include <vespa/searchlib/attribute/attributeiterators.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include <vespa/searchlib/parsequery/stackdumpiterator.h>
+#include <vespa/searchlib/util/filekit.h>
+#include <vespa/searchlib/util/filesizecalculator.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/error.h>
+#include <vespa/vespalib/data/fileheader.h>
+#include <functional>
+#include <stdexcept>
+#include <vespa/log/log.h>
+#include "ipostinglistsearchcontext.h"
+#include "ipostinglistattributebase.h"
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include "interlock.h"
+#include "attributesaver.h"
+LOG_SETUP(".searchlib.attribute.attributevector");
+
+using vespalib::getLastErrorString;
+
+using document::ValueUpdate;
+using vespalib::make_string;
+using vespalib::Array;
+using vespalib::IllegalStateException;
+using search::common::FileHeaderContext;
+using search::index::DummyFileHeaderContext;
+using search::queryeval::SearchIterator;
+
+namespace {
+
+const vespalib::string enumeratedTag = "enumerated";
+const vespalib::string dataTypeTag = "datatype";
+const vespalib::string collectionTypeTag = "collectiontype";
+const vespalib::string createSerialNumTag = "createSerialNum";
+const vespalib::string versionTag = "version";
+const vespalib::string docIdLimitTag = "docIdLimit";
+
+bool allowEnumeratedLoad = true;
+const size_t DIRECTIO_ALIGNMENT(4096);
+
+bool
+isEnumerated(const vespalib::GenericHeader &header)
+{
+ return header.hasTag(enumeratedTag) &&
+ header.getTag(enumeratedTag).asInteger() != 0;
+}
+
+uint64_t
+extractCreateSerialNum(const vespalib::GenericHeader &header)
+{
+ if (header.hasTag(createSerialNumTag))
+ return header.getTag(createSerialNumTag).asInteger();
+ else
+ return 0u;
+}
+
+template <typename T>
+struct FuncMax : public std::binary_function<T, T, T> {
+ T operator() (const T & x, const T & y) const {
+ return std::max(x, y);
+ }
+};
+
+}
+
+namespace search {
+
+IMPLEMENT_IDENTIFIABLE_ABSTRACT(AttributeVector, vespalib::Identifiable);
+
+AttributeVector::BaseName::BaseName(const vespalib::stringref &base,
+ const vespalib::stringref &snap,
+ const vespalib::stringref &name)
+ : string(base),
+ _name(name)
+{
+ if (!empty()) {
+ push_back('/');
+ }
+ if ( ! snap.empty() ) {
+ append(snap);
+ push_back('/');
+ }
+ append(name);
+}
+
+
+AttributeVector::BaseName::string
+AttributeVector::BaseName::getIndexName() const
+{
+ // "$VESPA_HOME/var/db/vespa/search/cluster.search/r0/c0/typetest_search/1.ready/attribute/stringfield/snapshot-0/stringfield"
+ string index;
+ size_t snapshotPos(rfind("/snapshot-"));
+ if (snapshotPos == string::npos)
+ return index;
+ size_t attrNamePos(rfind('/', snapshotPos - 1));
+ if (attrNamePos == string::npos || attrNamePos == 0)
+ return index;
+ size_t attrStrPos(rfind('/', attrNamePos - 1));
+ if (attrStrPos == string::npos || attrStrPos == 0)
+ return index;
+ size_t subDBPos(rfind('/', attrStrPos - 1));
+ if (subDBPos == string::npos || subDBPos == 0)
+ return index;
+ size_t indexNamePos(rfind('/', subDBPos - 1));
+ if (indexNamePos == string::npos)
+ return substr(0, subDBPos);
+ return substr(indexNamePos + 1, subDBPos - indexNamePos - 1);
+}
+
+
+AttributeVector::BaseName::string
+AttributeVector::BaseName::getSnapshotName() const
+{
+ string snapShot;
+ size_t p(rfind("snapshot-"));
+ if (p != string::npos) {
+ string fullSnapshot(substr(p));
+ p = fullSnapshot.find('/');
+ if (p != string::npos) {
+ snapShot = fullSnapshot.substr(0, p);
+ }
+ }
+ return snapShot;
+}
+
+
+AttributeVector::BaseName::string
+AttributeVector::BaseName::createAttributeName(const vespalib::stringref & s)
+{
+ size_t p(s.rfind('/'));
+ if (p == string::npos) {
+ return s;
+ } else {
+ return s.substr(p+1);
+ }
+}
+
+
+AttributeVector::BaseName::string
+AttributeVector::BaseName::getDirName() const
+{
+ size_t p = rfind('/');
+ if (p == string::npos) {
+ return "";
+ } else {
+ return substr(0, p);
+ }
+}
+
+
+AttributeVector::ValueModifier::ValueModifier(AttributeVector &attr)
+ : _attr(&attr)
+{
+}
+
+
+AttributeVector::ValueModifier::ValueModifier(const ValueModifier &rhs)
+ : _attr(rhs.stealAttr())
+{
+}
+
+
+AttributeVector::ValueModifier::~ValueModifier()
+{
+ if (_attr) {
+ _attr->incGeneration();
+ }
+}
+
+
+AttributeVector::AttributeVector(const vespalib::stringref &baseFileName,
+ const Config &c)
+ : _baseFileName(baseFileName),
+ _config(c),
+ _interlock(std::make_shared<attribute::Interlock>()),
+ _enumLock(),
+ _genHandler(),
+ _genHolder(),
+ _status(Status::createName((_baseFileName.getIndexName() +
+ (_baseFileName.getSnapshotName().empty() ?
+ "" :
+ ".") +
+ _baseFileName.getSnapshotName()),
+ _baseFileName.getAttributeName())),
+ _highestValueCount(1),
+ _enumMax(0),
+ _committedDocIdLimit(0u),
+ _uncommittedDocIdLimit(0u),
+ _createSerialNum(0u),
+ _compactLidSpaceGeneration(0u),
+ _hasEnum(false),
+ _hasSortedEnum(false),
+ _loaded(false),
+ _enableEnumeratedSave(false)
+{
+}
+
+
+AttributeVector::~AttributeVector()
+{
+}
+
+void AttributeVector::updateStat(bool force)
+{
+ if (force) {
+ onUpdateStat();
+ } else if (_nextStatUpdateTime < fastos::ClockSystem::now()) {
+ onUpdateStat();
+ _nextStatUpdateTime = fastos::ClockSystem::now() +
+ fastos::TimeStamp::SEC;
+ }
+}
+
+
+void
+AttributeVector::commit(bool forceUpdateStat)
+{
+ onCommit();
+ updateCommittedDocIdLimit();
+ updateStat(forceUpdateStat);
+ _loaded = true;
+}
+
+
+void
+AttributeVector::commit(uint64_t firstSyncToken, uint64_t lastSyncToken)
+{
+ if (firstSyncToken < getStatus().getLastSyncToken()) {
+ LOG(error,
+ "Expected first token to be >= %" PRIu64 ", got %" PRIu64 ".",
+ getStatus().getLastSyncToken(), firstSyncToken);
+ abort();
+ }
+ commit();
+ _status.setLastSyncToken(lastSyncToken);
+}
+
+
+bool
+AttributeVector::addDocs(DocId &startDoc, DocId &lastDoc, uint32_t numDocs)
+{
+ if (numDocs != 0) {
+ if (!addDoc(startDoc)) {
+ return false;
+ }
+ lastDoc = startDoc;
+ for (uint32_t i = 1; i < numDocs; ++i) {
+ if (!addDoc(lastDoc)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+
+bool
+AttributeVector::addDocs(uint32_t numDocs)
+{
+ DocId doc;
+ return addDocs(doc, doc, numDocs);
+}
+
+
+void
+AttributeVector::incGeneration()
+{
+ // Freeze trees etc, to stop new readers from accessing currently held data
+ onGenerationChange(_genHandler.getNextGeneration());
+ _genHandler.incGeneration();
+ // Remove old data on hold lists that can no longer be reached by readers
+ removeAllOldGenerations();
+}
+
+
+void
+AttributeVector::updateStatistics(uint64_t numValues,
+ uint64_t numUniqueValue,
+ uint64_t allocated,
+ uint64_t used,
+ uint64_t dead,
+ uint64_t onHold)
+{
+ _status.updateStatistics(numValues,
+ numUniqueValue,
+ allocated,
+ used,
+ dead,
+ onHold);
+}
+
+AddressSpace
+AttributeVector::getEnumStoreAddressSpaceUsage() const
+{
+ return AddressSpaceUsage::defaultEnumStoreUsage();
+}
+
+AddressSpace
+AttributeVector::getMultiValueAddressSpaceUsage() const
+{
+ return AddressSpaceUsage::defaultMultiValueUsage();
+}
+
+AddressSpaceUsage
+AttributeVector::getAddressSpaceUsage() const
+{
+ return AddressSpaceUsage(getEnumStoreAddressSpaceUsage(),
+ getMultiValueAddressSpaceUsage());
+}
+
+bool
+AttributeVector::headerTypeOK(const vespalib::GenericHeader &header) const
+{
+ return header.hasTag(dataTypeTag) &&
+ header.hasTag(collectionTypeTag) &&
+ header.hasTag(docIdLimitTag) &&
+ header.getTag(dataTypeTag).asString() ==
+ getConfig().basicType().asString() &&
+ header.getTag(collectionTypeTag).asString() ==
+ getConfig().collectionType().asString();
+}
+
+
+std::unique_ptr<Fast_BufferedFile>
+AttributeVector::openFile(const char *suffix)
+{
+ BaseName::string fileName(getBaseFileName());
+ fileName += suffix;
+ return FileUtil::openFile(fileName);
+}
+
+
+std::unique_ptr<Fast_BufferedFile>
+AttributeVector::openDAT()
+{
+ return openFile(".dat");
+}
+
+
+std::unique_ptr<Fast_BufferedFile>
+AttributeVector::openIDX()
+{
+ return openFile(".idx");
+}
+
+
+std::unique_ptr<Fast_BufferedFile>
+AttributeVector::openWeight()
+{
+ return openFile(".weight");
+}
+
+
+std::unique_ptr<Fast_BufferedFile>
+AttributeVector::openUDAT()
+{
+ return openFile(".dat");
+}
+
+
+AttributeVector::ReaderBase::ReaderBase(AttributeVector &attr)
+ : _datFile(attr.openDAT()),
+ _weightFile(attr.hasWeightedSetType() ?
+ attr.openWeight() : std::unique_ptr<Fast_BufferedFile>()),
+ _idxFile(attr.hasMultiValue() ?
+ attr.openIDX() : std::unique_ptr<Fast_BufferedFile>()),
+ _udatFile(),
+ _weightReader(*_weightFile),
+ _idxReader(*_idxFile),
+ _enumReader(*_datFile),
+ _currIdx(0),
+ _datHeaderLen(0u),
+ _idxHeaderLen(0u),
+ _weightHeaderLen(0u),
+ _udatHeaderLen(0u),
+ _createSerialNum(0u),
+ _fixedWidth(attr.getFixedWidth()),
+ _enumerated(false),
+ _hasLoadData(false),
+ _version(0),
+ _docIdLimit(0),
+ _datHeader(DIRECTIO_ALIGNMENT),
+ _datFileSize(0),
+ _idxFileSize(0)
+{
+ _datHeaderLen = _datHeader.readFile(*_datFile);
+ _datFile->SetPosition(_datHeaderLen);
+ if (!attr.headerTypeOK(_datHeader) ||
+ !extractFileSize(_datHeader, *_datFile, _datFileSize)) {
+ _datFile->Close();
+ }
+ _createSerialNum = extractCreateSerialNum(_datHeader);
+ if (_datHeader.hasTag(versionTag)) {
+ _version = _datHeader.getTag(versionTag).asInteger();
+ }
+ _docIdLimit = _datHeader.getTag(docIdLimitTag).asInteger();
+ if (hasIdx()) {
+ vespalib::FileHeader idxHeader(DIRECTIO_ALIGNMENT);
+ _idxHeaderLen = idxHeader.readFile(*_idxFile);
+ _idxFile->SetPosition(_idxHeaderLen);
+ if (!attr.headerTypeOK(idxHeader) ||
+ !extractFileSize(idxHeader, *_idxFile, _idxFileSize)) {
+ _idxFile->Close();
+ } else {
+ _currIdx = _idxReader.readHostOrder();
+ }
+ }
+ if (hasWeight()) {
+ vespalib::FileHeader weightHeader(DIRECTIO_ALIGNMENT);
+ _weightHeaderLen = weightHeader.readFile(*_weightFile);
+ _weightFile->SetPosition(_weightHeaderLen);
+ if (!attr.headerTypeOK(weightHeader))
+ _weightFile->Close();
+ }
+ if (hasData() && isEnumerated(_datHeader)) {
+#if 1
+ if (!allowEnumeratedLoad) {
+ /*
+ * Block loading of enumerated attribute vector files until we have
+ * working unit tests in place.
+ */
+ vespalib::string s;
+ s = vespalib::make_string("Attribute vector file '%s' is"
+ " enumerated."
+ " Install a newer version of vespa that"
+ " supports enumerated"
+ " attribute vector files, or ask"
+ " vespa team to help "
+ " converting attribute vector to "
+ " non-enumerated form.",
+ _datFile->GetFileName());
+ LOG(error, "%s", s.c_str());
+ throw IllegalStateException(s);
+ }
+#endif
+ _enumerated = true;
+ _udatFile = attr.openUDAT();
+ vespalib::FileHeader udatHeader(DIRECTIO_ALIGNMENT);
+ _udatHeaderLen = udatHeader.readFile(*_udatFile);
+ _udatFile->SetPosition(_udatHeaderLen);
+ if (!attr.headerTypeOK(udatHeader))
+ _udatFile->Close();
+ }
+ _hasLoadData = hasData() &&
+ (!attr.hasMultiValue() || hasIdx()) &&
+ (!attr.hasWeightedSetType() || hasWeight()) &&
+ (!getEnumerated() || hasUData());
+}
+
+
+AttributeVector::ReaderBase::~ReaderBase()
+{
+}
+
+
+bool
+AttributeVector::ReaderBase::
+extractFileSize(const vespalib::GenericHeader &header,
+ FastOS_FileInterface &file, uint64_t &fileSize)
+{
+ fileSize = file.GetSize();
+ return FileSizeCalculator::extractFileSize(header, header.getSize(),
+ file.GetFileName(), fileSize);
+}
+
+
+void
+AttributeVector::ReaderBase::rewind()
+{
+ _datFile->SetPosition(_datHeaderLen);
+ _currIdx = 0;
+ if (hasIdx()) {
+ _idxFile->SetPosition(_idxHeaderLen);
+ _currIdx = _idxReader.readHostOrder();
+ }
+ if (hasWeight()) {
+ _weightFile->SetPosition(_weightHeaderLen);
+ }
+ if (getEnumerated()) {
+ _udatFile->SetPosition(_udatHeaderLen);
+ }
+}
+
+
+size_t
+AttributeVector::ReaderBase::getNumValues()
+{
+ if (getEnumerated()) {
+ return getEnumCount();
+ } else {
+ if (_fixedWidth > 0) {
+ size_t dataSize(_datFileSize - _datHeaderLen);
+ assert((dataSize % _fixedWidth) == 0);
+ return dataSize / _fixedWidth;
+ } else {
+ // TODO. This limits the number of multivalues to 2^32-1
+ // This is assert during write, so this should never be a problem here.
+ _idxFile->SetPosition(_idxFileSize - 4);
+ size_t numValues = _idxReader.readHostOrder();
+ rewind();
+ return numValues;
+ }
+ }
+}
+
+
+uint32_t
+ AttributeVector::ReaderBase::getNextValueCount()
+{
+ uint32_t nextIdx = _idxReader.readHostOrder();
+ uint32_t numValues = nextIdx - _currIdx;
+ _currIdx = nextIdx;
+ return numValues;
+}
+
+
+FileUtil::LoadedBuffer::UP
+AttributeVector::loadDAT()
+{
+ return loadFile(".dat");
+}
+
+
+FileUtil::LoadedBuffer::UP
+AttributeVector::loadIDX()
+{
+ return loadFile(".idx");
+}
+
+
+FileUtil::LoadedBuffer::UP
+AttributeVector::loadWeight()
+{
+ return loadFile(".weight");
+}
+
+
+FileUtil::LoadedBuffer::UP
+AttributeVector::loadUDAT()
+{
+ return loadFile(".udat");
+}
+
+
+FileUtil::LoadedBuffer::UP
+AttributeVector::loadFile(const char *suffix)
+{
+ BaseName::string fileName(getBaseFileName());
+ fileName += suffix;
+ return FileUtil::loadFile(fileName);
+}
+
+
+bool
+AttributeVector::saveAs(const vespalib::stringref &baseFileName)
+{
+ _baseFileName = baseFileName;
+ return save();
+}
+
+bool
+AttributeVector::saveAs(const vespalib::stringref &baseFileName,
+ IAttributeSaveTarget & saveTarget)
+{
+ _baseFileName = baseFileName;
+ return save(saveTarget);
+}
+
+
+bool
+AttributeVector::save()
+{
+ TuneFileAttributes tune;
+ DummyFileHeaderContext fileHeaderContext;
+ AttributeFileSaveTarget saveTarget(tune, fileHeaderContext);
+ return save(saveTarget);
+}
+
+
+bool
+AttributeVector::save(IAttributeSaveTarget &saveTarget)
+{
+ commit();
+ // First check if new style save is available.
+ std::unique_ptr<AttributeSaver> saver(onInitSave());
+ if (saver) {
+ // Normally, new style save happens in background, but here it
+ // will occur in the foreground.
+ return saver->save(saveTarget);
+ }
+ // New style save not available, use old style save
+ saveTarget.setConfig(createSaveTargetConfig());
+ if (!saveTarget.setup()) {
+ return false;
+ }
+ onSave(saveTarget);
+ saveTarget.close();
+ return true;
+}
+
+
+IAttributeSaveTarget::Config
+AttributeVector::createSaveTargetConfig() const
+{
+ return IAttributeSaveTarget::Config(getBaseFileName(),
+ getConfig().basicType().asString(),
+ getConfig().collectionType().asString(),
+ getConfig().basicType().type() ==
+ BasicType::Type::TENSOR ?
+ getConfig().tensorType().toSpec() :
+ "",
+ hasMultiValue(),
+ hasWeightedSetType(),
+ getEnumeratedSave(),
+ getCommittedDocIdLimit(),
+ getFixedWidth(),
+ getUniqueValueCount(),
+ getTotalValueCount(),
+ getCreateSerialNum(),
+ getVersion());
+}
+
+
+void
+AttributeVector::onSave(IAttributeSaveTarget & saveTarget)
+{
+ (void) saveTarget;
+ assert(false);
+}
+
+
+bool
+AttributeVector::hasLoadData() const
+{
+ FastOS_StatInfo statInfo;
+ if (!FastOS_File::Stat(vespalib::make_string("%s.dat",
+ getBaseFileName().c_str()).c_str(),
+ &statInfo)) {
+ return false;
+ }
+ if (hasMultiValue() &&
+ !FastOS_File::Stat(vespalib::make_string("%s.idx",
+ getBaseFileName().c_str()).c_str(),
+ &statInfo))
+ {
+ return false;
+ }
+ if (hasWeightedSetType() &&
+ !FastOS_File::Stat(vespalib::make_string("%s.weight",
+ getBaseFileName().c_str()).c_str(),
+ &statInfo))
+ {
+ return false;
+ }
+ if (isEnumeratedSaveFormat() &&
+ !FastOS_File::Stat(vespalib::make_string("%s.udat",
+ getBaseFileName().c_str()).c_str(),
+ &statInfo))
+ {
+ return false;
+ }
+ return true;
+}
+
+
+bool
+AttributeVector::isEnumeratedSaveFormat(void) const
+{
+ vespalib::string datName(vespalib::make_string("%s.dat",
+ getBaseFileName().c_str()));
+ Fast_BufferedFile datFile;
+ vespalib::FileHeader datHeader(DIRECTIO_ALIGNMENT);
+ if ( ! datFile.OpenReadOnly(datName.c_str()) ) {
+ LOG(error, "could not open %s: %s",
+ datFile.GetFileName(), getLastErrorString().c_str());
+ throw IllegalStateException(
+ vespalib::make_string(
+ "Failed opening attribute data file '%s' for reading",
+ datFile.GetFileName()));
+ }
+ datHeader.readFile(datFile);
+
+ return isEnumerated(datHeader);
+}
+
+
+bool
+AttributeVector::load() {
+ bool loaded = onLoad();
+ if (loaded) {
+ commit();
+ }
+ _loaded = loaded;
+ return _loaded;
+}
+
+
+bool
+AttributeVector::onLoad()
+{
+ return false;
+}
+
+
+int32_t
+AttributeVector::getWeight(DocId doc, uint32_t idx) const
+{
+ (void) doc;
+ (void) idx;
+ return 1;
+}
+
+AttributeVector::SearchContext::Params::Params() :
+ _diversityAttribute(nullptr),
+ _diversityCutoffGroups(std::numeric_limits<uint32_t>::max()),
+ _useBitVector(false),
+ _diversityCutoffStrict(false)
+{
+}
+
+AttributeVector::SearchContext::SearchContext(const AttributeVector &attr) :
+ _attr(attr),
+ _plsc(NULL)
+{
+}
+
+AttributeVector::SearchContext::UP
+AttributeVector::getSearch(const QueryPacketT & searchSpec,
+ const SearchContext::Params & params) const
+{
+ return getSearch(SearchContext::decodeQuery(searchSpec), params);
+}
+
+AttributeVector::SearchContext::~SearchContext()
+{
+}
+
+
+unsigned int
+AttributeVector::SearchContext::approximateHits() const
+{
+ if (_plsc != NULL) {
+ return _plsc->approximateHits();
+ }
+ return std::max(uint64_t(_attr.getNumDocs()),
+ _attr.getStatus().getNumValues());
+}
+
+
+QueryTermSimple::UP
+AttributeVector::SearchContext::decodeQuery(const QueryPacketT &searchSpec)
+{
+ QueryTermSimple::UP qt;
+ EmptyQueryNodeResult qnb;
+ Query q(qnb, searchSpec);
+ if (q.valid() && (dynamic_cast<QueryTerm *>(q.getRoot().get()))) {
+ qt.reset(static_cast<QueryTerm *>(q.getRoot().release()));
+ } else {
+ throw IllegalStateException("Failed decoding query");
+ }
+ return qt;
+}
+
+
+SearchIterator::UP
+AttributeVector::SearchContext::
+createIterator(fef::TermFieldMatchData *matchData, bool strict)
+{
+ if (_plsc != NULL) {
+ SearchIterator::UP res =
+ _plsc->createPostingIterator(matchData, strict);
+ if (res.get() != NULL)
+ return res;
+ }
+ return createFilterIterator(matchData, strict);
+}
+
+
+SearchIterator::UP
+AttributeVector::SearchContext::
+createFilterIterator(fef::TermFieldMatchData *matchData, bool strict)
+{
+ if (!valid())
+ return SearchIterator::UP(new queryeval::EmptySearch());
+ if (getIsFilter()) {
+ return SearchIterator::UP(strict ?
+ new FilterAttributeIteratorStrict<AttributeVector::SearchContext>
+ (*this, matchData) :
+ new FilterAttributeIteratorT<AttributeVector::SearchContext>
+ (*this, matchData));
+ }
+ return SearchIterator::UP(strict ?
+ new AttributeIteratorStrict<AttributeVector::SearchContext>
+ (*this, matchData) :
+ new AttributeIteratorT<AttributeVector::SearchContext>
+ (*this, matchData));
+}
+
+
+void
+AttributeVector::SearchContext::fetchPostings(bool strict)
+{
+ if (_plsc != NULL)
+ _plsc->fetchPostings(strict);
+}
+
+
+bool
+AttributeVector::apply(DocId doc, const MapValueUpdate &map)
+{
+ bool retval(doc < getNumDocs());
+ if (retval) {
+ const ValueUpdate & vu(map.getUpdate());
+ if (vu.inherits(ArithmeticValueUpdate::classId)) {
+ const ArithmeticValueUpdate &
+ au(static_cast<const ArithmeticValueUpdate &>(vu));
+ retval = applyWeight(doc, map.getKey(), au);
+ } else {
+ retval = false;
+ }
+ }
+ return retval;
+}
+
+
+bool
+AttributeVector::applyWeight(DocId, const FieldValue &,
+ const ArithmeticValueUpdate &)
+{
+ return false;
+}
+
+
+void
+AttributeVector::removeAllOldGenerations()
+{
+ _genHandler.updateFirstUsedGeneration();
+ removeOldGenerations(_genHandler.getFirstUsedGeneration());
+}
+
+
+void
+AttributeVector::divideByZeroWarning()
+{
+ LOG(warning,
+ "applyArithmetic(): "
+ "Divide by zero is an illegal operation on integer attributes "
+ "or weighted sets. Ignoring operation.");
+}
+
+
+void
+AttributeVector::performCompactionWarning()
+{
+ LOG(warning,
+ "Could not perform compaction on MultiValueMapping "
+ "with current generation = %" PRIu64,
+ _genHandler.getCurrentGeneration());
+}
+
+
+void
+AttributeVector::addReservedDoc(void)
+{
+ uint32_t docId = 42;
+ addDoc(docId); // Reserved
+ assert(docId == 0u);
+ assert(docId < getNumDocs());
+ clearDoc(docId);
+ commit();
+ const vespalib::Identifiable::RuntimeClass &info = getClass();
+ if (info.inherits(search::FloatingPointAttribute::classId)) {
+ FloatingPointAttribute &vec =
+ static_cast<FloatingPointAttribute &>(*this);
+ if (hasMultiValue()) {
+ bool appendedUndefined = vec.append(0, attribute::getUndefined<double>(), 1);
+ assert(appendedUndefined);
+ (void) appendedUndefined;
+ } else {
+ bool updatedUndefined = vec.update(0, attribute::getUndefined<double>());
+ assert(updatedUndefined);
+ (void) updatedUndefined;
+ }
+ commit();
+ }
+}
+
+
+void
+AttributeVector::enableEnumeratedSave(bool enable)
+{
+ if (hasEnum() || !enable)
+ _enableEnumeratedSave = enable;
+}
+
+
+void
+AttributeVector::enableEnumeratedLoad(void)
+{
+ allowEnumeratedLoad = true;
+}
+
+
+attribute::IPostingListAttributeBase *
+AttributeVector::getIPostingListAttributeBase(void)
+{
+ return NULL;
+}
+
+
+bool
+AttributeVector::hasPostings(void)
+{
+ return getIPostingListAttributeBase() != NULL;
+}
+
+
+uint64_t
+AttributeVector::getUniqueValueCount(void) const
+{
+ return getTotalValueCount();
+}
+
+
+uint64_t
+AttributeVector::getTotalValueCount(void) const
+{
+ return getNumDocs();
+}
+
+
+void
+AttributeVector::setCreateSerialNum(uint64_t createSerialNum)
+{
+ _createSerialNum = createSerialNum;
+}
+
+
+uint64_t
+AttributeVector::getCreateSerialNum(void) const
+{
+ return _createSerialNum;
+}
+
+uint32_t
+AttributeVector::getVersion() const {
+ return 0;
+}
+
+void
+AttributeVector::compactLidSpace(uint32_t wantedLidLimit)
+{
+ commit();
+ assert(_uncommittedDocIdLimit <= wantedLidLimit);
+ if (wantedLidLimit < _committedDocIdLimit) {
+ clearDocs(wantedLidLimit, _committedDocIdLimit);
+ }
+ commit();
+ _committedDocIdLimit = wantedLidLimit;
+ _compactLidSpaceGeneration = _genHandler.getCurrentGeneration();
+ incGeneration();
+}
+
+
+bool
+AttributeVector::canShrinkLidSpace(void) const
+{
+ return wantShrinkLidSpace() &&
+ _compactLidSpaceGeneration < getFirstUsedGeneration();
+}
+
+
+void
+AttributeVector::shrinkLidSpace(void)
+{
+ commit();
+ assert(canShrinkLidSpace());
+ uint32_t committedDocIdLimit = _committedDocIdLimit;
+ clearDocs(committedDocIdLimit, getNumDocs());
+ commit();
+ _committedDocIdLimit = committedDocIdLimit;
+ onShrinkLidSpace();
+ attribute::IPostingListAttributeBase *pab = getIPostingListAttributeBase();
+ if (pab != NULL) {
+ pab->forwardedShrinkLidSpace(_committedDocIdLimit);
+ }
+ incGeneration();
+ updateStat(true);
+}
+
+
+void
+AttributeVector::onShrinkLidSpace(void)
+{
+}
+
+
+void
+AttributeVector::clearDocs(DocId lidLow, DocId lidLimit)
+{
+ assert(lidLow <= lidLimit);
+ assert(lidLimit <= getNumDocs());
+ for (DocId lid = lidLow; lid < lidLimit; ++lid) {
+ clearDoc(lid);
+ }
+}
+
+AttributeVector::EnumModifier
+AttributeVector::getEnumModifier()
+{
+ attribute::InterlockGuard interlockGuard(*_interlock);
+ return EnumModifier(_enumLock, interlockGuard);
+}
+
+
+void
+AttributeVector::setInterlock(const std::shared_ptr<attribute::Interlock> &
+ interlock)
+{
+ _interlock = interlock;
+}
+
+
+std::unique_ptr<AttributeSaver>
+AttributeVector::initSave()
+{
+ commit();
+ return onInitSave();
+}
+
+std::unique_ptr<AttributeSaver>
+AttributeVector::onInitSave()
+{
+ return std::unique_ptr<AttributeSaver>();
+}
+
+
+IExtendAttribute *
+AttributeVector::getExtendInterface()
+{
+ return nullptr;
+}
+
+uint64_t
+AttributeVector::getEstimatedSaveByteSize() const
+{
+ uint64_t headerSize = 4096;
+ uint64_t totalValueCount = getTotalValueCount();
+ uint64_t uniqueValueCount = getUniqueValueCount();
+ uint64_t docIdLimit = getCommittedDocIdLimit();
+ uint64_t datFileSize = 0;
+ uint64_t weightFileSize = 0;
+ uint64_t idxFileSize = 0;
+ uint64_t udatFileSize = 0;
+ AddressSpace enumAddressSpace(getEnumStoreAddressSpaceUsage());
+
+ if (hasMultiValue()) {
+ idxFileSize = headerSize + sizeof(uint32_t) * (docIdLimit + 1);
+ }
+ if (hasWeightedSetType()) {
+ weightFileSize = headerSize + sizeof(int32_t) * totalValueCount;
+ }
+ if (hasEnum() && getEnumeratedSave()) {
+ datFileSize = headerSize + 4 * totalValueCount;
+ udatFileSize = headerSize + enumAddressSpace.used()
+ - 8 * uniqueValueCount;
+ } else {
+ BasicType::Type basicType(getBasicType());
+ const Status &status = getStatus();
+ int64_t memorySize = status.getUsed() - status.getDead();
+ if (memorySize < 0) {
+ memorySize = 0;
+ }
+ switch (basicType) {
+ case BasicType::Type::PREDICATE:
+ case BasicType::Type::TENSOR:
+ datFileSize = headerSize + memorySize;
+ break;
+ case BasicType::Type::STRING:
+ assert(hasEnum());
+ datFileSize = headerSize;
+ if (uniqueValueCount > 0) {
+ double avgEntrySize = (static_cast<double>(enumAddressSpace.used()) / uniqueValueCount) - 8;
+ datFileSize += avgEntrySize * totalValueCount;
+ }
+ break;
+ default:
+ datFileSize = headerSize + getFixedWidth() * totalValueCount;
+ break;
+ }
+ }
+ return datFileSize + weightFileSize + idxFileSize + udatFileSize;
+}
+
+
+template bool AttributeVector::append<StringChangeData>(ChangeVectorT< ChangeTemplate<StringChangeData> > &changes, uint32_t , const StringChangeData &, int32_t, bool);
+template bool AttributeVector::update<StringChangeData>(ChangeVectorT< ChangeTemplate<StringChangeData> > &changes, uint32_t , const StringChangeData &);
+template bool AttributeVector::remove<StringChangeData>(ChangeVectorT< ChangeTemplate<StringChangeData> > &changes, uint32_t , const StringChangeData &, int32_t);
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h
new file mode 100644
index 00000000000..aab20a5136d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h
@@ -0,0 +1,845 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "address_space.h"
+#include "address_space_usage.h"
+#include "iattributesavetarget.h"
+#include <vespa/document/update/arithmeticvalueupdate.h>
+#include <vespa/document/update/mapvalueupdate.h>
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <vespa/fastlib/text/normwordfolder.h>
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/searchcommon/attribute/iattributevector.h>
+#include <vespa/searchcommon/attribute/status.h>
+#include <vespa/searchcommon/common/undefinedvalues.h>
+#include <vespa/searchlib/attribute/changevector.h>
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/common/range.h>
+#include <vespa/searchlib/common/rcuvector.h>
+#include <vespa/searchlib/query/query.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/util/fileutil.h>
+#include <vespa/vespalib/objects/identifiable.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/util/rwlock.h>
+#include <vespa/vespalib/util/sync.h>
+#include <math.h>
+#include <mutex>
+#include <shared_mutex>
+#include <string>
+
+using document::ArithmeticValueUpdate;
+using document::MapValueUpdate;
+using document::FieldValue;
+
+namespace vespalib
+{
+
+class GenericHeader;
+
+}
+
+
+namespace search {
+
+template <typename T> class ComponentGuard;
+class AttributeReadGuard;
+class AttributeWriteGuard;
+class AttributeSaver;
+class EnumStoreBase;
+class MultiValueMappingBaseBase;
+
+class IDocumentWeightAttribute;
+
+namespace fef {
+class TermFieldMatchData;
+}
+
+namespace attribute
+{
+
+class IPostingListSearchContext;
+
+class IPostingListAttributeBase;
+
+class Interlock;
+class InterlockGuard;
+
+}
+
+using search::attribute::WeightedType;
+using search::attribute::Status;
+
+template <typename T>
+class UnWeightedType
+{
+public:
+ UnWeightedType() : _value(T()) { }
+ UnWeightedType(T v) : _value(v) { }
+ const T & getValue() const { return _value; }
+ void setValue(const T & v) { _value = v; }
+ int32_t getWeight() const { return 1; }
+ void setWeight(int32_t w) { (void) w; }
+
+ bool operator==(const UnWeightedType<T> & rhs) const {
+ return _value == rhs._value;
+ }
+
+ friend vespalib::asciistream &
+ operator << (vespalib::asciistream & os, const UnWeightedType & v) {
+ return os << "(" << v._value << ", 1)";
+ }
+private:
+ T _value;
+};
+
+class IExtendAttribute
+{
+public:
+ virtual bool add(int64_t, int32_t = 1) { return false; }
+ virtual bool add(double, int32_t = 1) { return false; }
+ virtual bool add(const char *, int32_t = 1) { return false; }
+
+ virtual ~IExtendAttribute() {}
+};
+
+class AttributeVector : public vespalib::Identifiable,
+ public attribute::IAttributeVector
+{
+protected:
+ typedef search::attribute::Config Config;
+ typedef search::attribute::CollectionType CollectionType;
+ typedef search::attribute::BasicType BasicType;
+public:
+ typedef std::shared_ptr<AttributeVector> SP;
+ class BaseName : public vespalib::string
+ {
+ public:
+ typedef vespalib::string string;
+ BaseName(const vespalib::stringref &s)
+ : string(s),
+ _name(createAttributeName(s))
+ {
+ }
+ BaseName & operator = (const vespalib::stringref & s) {
+ BaseName n(s);
+ std::swap(*this, n);
+ return *this;
+ }
+
+ BaseName(const vespalib::stringref &base,
+ const vespalib::stringref &snap,
+ const vespalib::stringref &name);
+
+ string getIndexName() const;
+ string getSnapshotName() const;
+ const string & getAttributeName() const { return _name; }
+ string getDirName() const;
+ private:
+ static string createAttributeName(const vespalib::stringref & s);
+ string _name;
+ };
+
+ class ReaderBase
+ {
+ public:
+ ReaderBase(AttributeVector & attr);
+
+ virtual ~ReaderBase();
+
+ void rewind();
+
+ bool hasWeight() const {
+ return _weightFile.get() && _weightFile->IsOpened();
+ }
+
+ bool hasIdx() const {
+ return _idxFile.get() && _idxFile->IsOpened();
+ }
+
+ bool hasData() const {
+ return _datFile.get() && _datFile->IsOpened();
+ }
+
+ bool hasUData() const {
+ return _udatFile.get() && _udatFile->IsOpened();
+ }
+
+ uint32_t getNumIdx() const {
+ return (_idxFileSize - _idxHeaderLen) /sizeof(uint32_t);
+ }
+
+ size_t getEnumCount(void) const {
+ size_t dataSize(_datFileSize - _datHeaderLen);
+ assert((dataSize % sizeof(uint32_t)) == 0);
+ return dataSize / sizeof(uint32_t);
+ }
+
+ static bool
+ extractFileSize(const vespalib::GenericHeader &header,
+ FastOS_FileInterface &file, uint64_t &fileSize);
+
+ size_t getNumValues();
+ int32_t getNextWeight() { return _weightReader.readHostOrder(); }
+ uint32_t getNextEnum(void) { return _enumReader.readHostOrder(); }
+ bool getEnumerated(void) const { return _enumerated; }
+ uint32_t getNextValueCount();
+ int64_t getCreateSerialNum(void) const { return _createSerialNum; }
+ bool getHasLoadData(void) const { return _hasLoadData; }
+ uint32_t getVersion() const { return _version; }
+ uint32_t getDocIdLimit() const { return _docIdLimit; }
+ const vespalib::GenericHeader &getDatHeader() const {
+ return _datHeader;
+ }
+ protected:
+ std::unique_ptr<Fast_BufferedFile> _datFile;
+ private:
+ std::unique_ptr<Fast_BufferedFile> _weightFile;
+ std::unique_ptr<Fast_BufferedFile> _idxFile;
+ std::unique_ptr<Fast_BufferedFile> _udatFile;
+ FileReader<int32_t> _weightReader;
+ FileReader<uint32_t> _idxReader;
+ FileReader<uint32_t> _enumReader;
+ uint32_t _currIdx;
+ uint32_t _datHeaderLen;
+ uint32_t _idxHeaderLen;
+ uint32_t _weightHeaderLen;
+ uint32_t _udatHeaderLen;
+ uint64_t _createSerialNum;
+ size_t _fixedWidth;
+ bool _enumerated;
+ bool _hasLoadData;
+ uint32_t _version;
+ uint32_t _docIdLimit;
+ vespalib::FileHeader _datHeader;
+ uint64_t _datFileSize;
+ uint64_t _idxFileSize;
+ protected:
+ size_t getDataCountHelper(size_t elemSize) const {
+ size_t dataSize(_datFileSize - _datHeaderLen);
+ return dataSize / elemSize;
+ }
+ };
+
+ template <typename T>
+ class PrimitiveReader : public ReaderBase
+ {
+ public:
+ PrimitiveReader(AttributeVector &attr)
+ : ReaderBase(attr),
+ _datReader(*_datFile)
+ {
+ }
+
+ virtual ~PrimitiveReader() { }
+ T getNextData() { return _datReader.readHostOrder(); }
+ size_t getDataCount() const { return getDataCountHelper(sizeof(T)); }
+ private:
+ FileReader<T> _datReader;
+ };
+
+ using GenerationHandler = vespalib::GenerationHandler;
+ using GenerationHolder = vespalib::GenerationHolder;
+ typedef GenerationHandler::generation_t generation_t;
+
+ virtual ~AttributeVector();
+protected:
+ /**
+ * Will update statistics by calling onUpdateStat if necessary.
+ */
+ void updateStat(bool forceUpdate);
+
+ void
+ updateStatistics(uint64_t numValues,
+ uint64_t numUniqueValue,
+ uint64_t allocated,
+ uint64_t used,
+ uint64_t dead,
+ uint64_t onHold);
+
+ void performCompactionWarning();
+
+ void getByType(DocId doc, const char *&v) const {
+ char tmp[1024]; v = getString(doc, tmp, sizeof(tmp));
+ }
+
+ void getByType(DocId doc, vespalib::string &v) const {
+ char tmp[1024]; v = getString(doc, tmp, sizeof(tmp));
+ }
+
+ void getByType(DocId doc, largeint_t & v) const {
+ v = getInt(doc);
+ }
+
+ void getByType(DocId doc, double &v) const {
+ v = getFloat(doc);
+ }
+
+ uint32_t getByType(DocId doc, const char **v, uint32_t sz) const {
+ return get(doc, v, sz);
+ }
+
+ uint32_t getByType(DocId doc, vespalib::string *v, uint32_t sz) const {
+ return get(doc, v, sz);
+ }
+
+ uint32_t getByType(DocId doc, largeint_t * v, uint32_t sz) const {
+ return get(doc, v, sz);
+ }
+
+ uint32_t getByType(DocId doc, double *v, uint32_t sz) const {
+ return get(doc, v, sz);
+ }
+
+
+ AttributeVector(const vespalib::stringref &baseFileName, const Config & c);
+
+ void checkSetMaxValueCount(int index) {
+ _highestValueCount = std::max(index, _highestValueCount);
+ }
+
+ void setEnumMax(uint32_t e) { _enumMax = e; setEnum(); }
+ void setEnum(bool hasEnum_=true) { _hasEnum = hasEnum_; }
+ void setSortedEnum(bool sorted=true) { _hasSortedEnum = sorted; }
+ void setNumDocs(uint32_t n) { _status.setNumDocs(n); }
+ void incNumDocs() { _status.incNumDocs(); }
+
+ std::unique_ptr<Fast_BufferedFile> openDAT();
+
+ std::unique_ptr<Fast_BufferedFile> openIDX();
+
+ std::unique_ptr<Fast_BufferedFile> openWeight();
+
+ std::unique_ptr<Fast_BufferedFile> openUDAT();
+
+ FileUtil::LoadedBuffer::UP loadDAT();
+
+ FileUtil::LoadedBuffer::UP loadIDX();
+
+ FileUtil::LoadedBuffer::UP loadWeight();
+
+ FileUtil::LoadedBuffer::UP loadUDAT();
+
+ class ValueModifier
+ {
+ public:
+ ValueModifier(AttributeVector &attr);
+ ValueModifier(const ValueModifier &rhs);
+ ~ValueModifier();
+ private:
+ AttributeVector * stealAttr() const {
+ AttributeVector * ret(_attr);
+ _attr = NULL;
+ return ret;
+ }
+
+ mutable AttributeVector * _attr;
+ };
+
+ class EnumModifier
+ {
+ std::unique_lock<std::shared_timed_mutex> _enumLock;
+ public:
+ EnumModifier(std::shared_timed_mutex &lock,
+ attribute::InterlockGuard &interlockGuard)
+ : _enumLock(lock)
+ {
+ (void) interlockGuard;
+ }
+ EnumModifier(EnumModifier &&rhs)
+ : _enumLock(std::move(rhs._enumLock))
+ {
+ }
+ EnumModifier &operator=(EnumModifier &&rhs)
+ {
+ _enumLock = std::move(rhs._enumLock);
+ return *this;
+ }
+ virtual ~EnumModifier()
+ {
+ }
+ };
+
+ EnumModifier getEnumModifier();
+ ValueModifier getValueModifier() { return ValueModifier(*this); }
+
+ void updateUncommittedDocIdLimit(DocId doc) {
+ if (_uncommittedDocIdLimit <= doc) {
+ _uncommittedDocIdLimit = doc + 1;
+ }
+ }
+
+ void updateCommittedDocIdLimit(void) {
+ if (_uncommittedDocIdLimit != 0) {
+ if (_uncommittedDocIdLimit > _committedDocIdLimit) {
+ std::atomic_thread_fence(std::memory_order_release);
+ _committedDocIdLimit = _uncommittedDocIdLimit;
+ }
+ _uncommittedDocIdLimit = 0;
+ }
+ }
+
+public:
+ void incGeneration();
+ void removeAllOldGenerations();
+
+ generation_t getFirstUsedGeneration() const {
+ return _genHandler.getFirstUsedGeneration();
+ }
+
+ generation_t getCurrentGeneration() const {
+ return _genHandler.getCurrentGeneration();
+ }
+
+ virtual IExtendAttribute * getExtendInterface();
+
+protected:
+ /**
+ * Returns the number of readers holding a generation guard.
+ * Should be called by the writer thread.
+ */
+ uint32_t getGenerationRefCount(generation_t gen) const {
+ return _genHandler.getGenerationRefCount(gen);
+ }
+
+ const GenerationHandler & getGenerationHandler() const {
+ return _genHandler;
+ }
+
+ GenerationHandler & getGenerationHandler() {
+ return _genHandler;
+ }
+
+ GenerationHolder & getGenerationHolder() {
+ return _genHolder;
+ }
+
+ template<typename T>
+ bool clearDoc(ChangeVectorT< ChangeTemplate<T> > &changes, DocId doc);
+
+ template<typename T>
+ bool update(ChangeVectorT< ChangeTemplate<T> > &changes, DocId doc, const T & v) __attribute__((noinline));
+
+ template<typename T>
+ bool append(ChangeVectorT< ChangeTemplate<T> > &changes, DocId doc, const T &v, int32_t w, bool doCount = true) __attribute__((noinline));
+ template<typename T, typename Accessor>
+ bool append(ChangeVectorT< ChangeTemplate<T> > &changes, DocId doc, Accessor & ac) __attribute__((noinline));
+
+ template<typename T>
+ bool remove(ChangeVectorT< ChangeTemplate<T> > & changes, DocId doc, const T &v, int32_t w);
+
+ template<typename T>
+ bool adjustWeight(ChangeVectorT< ChangeTemplate<T> > &changes, DocId doc, const T &v, const ArithmeticValueUpdate &wd);
+
+ template <typename T>
+ static int32_t
+ applyWeightChange(int32_t weight, const ChangeTemplate<T> &weightChange) {
+ if (weightChange._type == ChangeBase::INCREASEWEIGHT) {
+ return weight + weightChange._weight;
+ } else if (weightChange._type == ChangeBase::MULWEIGHT) {
+ return weight * weightChange._weight;
+ } else if (weightChange._type == ChangeBase::DIVWEIGHT) {
+ return weight / weightChange._weight;
+ }
+ return weight;
+ }
+
+ template<typename T>
+ bool applyArithmetic(ChangeVectorT< ChangeTemplate<T> > &changes, DocId doc, const T &v, const ArithmeticValueUpdate & arithm);
+
+ static double round(double v, double & r) { return r = v; }
+ static largeint_t round(double v, largeint_t &r) { return r = static_cast<largeint_t>(::floor(v+0.5)); }
+
+ template <typename BaseType, typename ChangeData>
+ static BaseType
+ applyArithmetic(const BaseType &value,
+ const ChangeTemplate<ChangeData> & arithmetic)
+ {
+ typedef typename ChangeData::DataType LargeType;
+ if (attribute::isUndefined(value)) {
+ return value;
+ } else if (arithmetic._type == ChangeBase::ADD) {
+ return value + static_cast<LargeType>(arithmetic._arithOperand);
+ } else if (arithmetic._type == ChangeBase::SUB) {
+ return value - static_cast<LargeType>(arithmetic._arithOperand);
+ } else if (arithmetic._type == ChangeBase::MUL) {
+ LargeType r;
+ return round((static_cast<double>(value) *
+ arithmetic._arithOperand), r);
+ } else if (arithmetic._type == ChangeBase::DIV) {
+ LargeType r;
+ return round(static_cast<double>(value) /
+ arithmetic._arithOperand, r);
+ }
+ return value;
+ }
+
+ virtual AddressSpace getEnumStoreAddressSpaceUsage() const;
+
+ virtual AddressSpace getMultiValueAddressSpaceUsage() const;
+
+public:
+ DECLARE_IDENTIFIABLE_ABSTRACT(AttributeVector);
+ bool isLoaded() const { return _loaded; }
+
+ /** Return the fixed length of the attribute. If 0 then you must inquire each document. */
+ virtual size_t getFixedWidth() const override { return _config.basicType().fixedSize(); }
+ const Config &getConfig() const { return _config; }
+ BasicType getInternalBasicType() const { return _config.basicType(); }
+ CollectionType getInternalCollectionType() const { return _config.collectionType(); }
+ const BaseName & getBaseFileName() const { return _baseFileName; }
+ void setBaseFileName(const vespalib::stringref & name) { _baseFileName = name; }
+
+ // Implements IAttributeVector
+ virtual const vespalib::string & getName(void) const {
+ return _baseFileName.getAttributeName();
+ }
+
+ virtual bool hasMultiValue() const {
+ return _config.collectionType().isMultiValue();
+ }
+
+ virtual bool hasWeightedSetType() const {
+ return _config.collectionType().isWeightedSet();
+ }
+
+ bool hasArrayType() const { return _config.collectionType().isArray(); }
+ virtual bool hasEnum() const { return _hasEnum; }
+ bool hasSortedEnum() const { return _hasSortedEnum; }
+ virtual bool hasEnum2Value() const { return false; }
+ virtual uint32_t getMaxValueCount() const { return _highestValueCount; }
+ uint32_t getEnumMax() const { return _enumMax; }
+
+ // Implements IAttributeVector
+ virtual uint32_t getNumDocs(void) const { return _status.getNumDocs(); }
+ uint32_t getCommittedDocIdLimit(void) const { return _committedDocIdLimit; }
+ uint32_t & getCommittedDocIdLimitRef(void) { return _committedDocIdLimit; }
+ void setCommittedDocIdLimit(uint32_t committedDocIdLimit) {
+ _committedDocIdLimit = committedDocIdLimit;
+ }
+
+ const Status & getStatus() const { return _status; }
+ Status & getStatus() { return _status; }
+
+ AddressSpaceUsage getAddressSpaceUsage() const;
+
+ // Implements IAttributeVector
+ virtual BasicType::Type getBasicType() const {
+ return getInternalBasicType().type();
+ }
+
+ virtual CollectionType::Type getCollectionType() const {
+ return getInternalCollectionType().type();
+ }
+
+ /**
+ * Updates the base file name of this attribute vector and saves
+ * it to file(s)
+ */
+ bool saveAs(const vespalib::stringref &baseFileName);
+
+ /**
+ * Updates the base file name of this attribute vector and saves
+ * it using the given saveTarget
+ */
+ bool saveAs(const vespalib::stringref &baseFileName,
+ IAttributeSaveTarget &saveTarget);
+
+ /** Saves this attribute vector to file(s) **/
+ bool save();
+
+ /** Saves this attribute vector using the given saveTarget **/
+ bool save(IAttributeSaveTarget & saveTarget);
+
+ IAttributeSaveTarget::Config createSaveTargetConfig() const;
+
+ /** Returns whether this attribute has load data files on disk **/
+ bool hasLoadData() const;
+
+ bool isEnumeratedSaveFormat(void) const;
+ bool load();
+ void commit(bool forceStatUpdate = false);
+ void commit(uint64_t firstSyncToken, uint64_t lastSyncToken);
+ void setCreateSerialNum(uint64_t createSerialNum);
+ uint64_t getCreateSerialNum(void) const;
+ virtual uint32_t getVersion() const;
+
+////// Interface to access single documents.
+ /**
+ * Interface to access the individual elements both for update and
+ * retrival are type specific. They are accessed by their proper
+ * type.
+ */
+ /** Get number of values per document. */
+ virtual uint32_t getValueCount(DocId doc) const = 0;
+
+ virtual uint32_t clearDoc(DocId doc) = 0;
+ virtual largeint_t getDefaultValue() const = 0;
+ virtual EnumHandle getEnum(DocId doc) const = 0;
+ virtual const char * getString(DocId doc, char * v, size_t sz) const = 0;
+ virtual largeint_t getInt(DocId doc) const = 0;
+ virtual double getFloat(DocId doc) const = 0;
+ virtual void getEnumValue(const EnumHandle *v, uint32_t *e, uint32_t sz) const = 0;
+
+ uint32_t getEnumValue(EnumHandle eh) const {
+ uint32_t e(0);
+ getEnumValue(&eh, &e, 1);
+ return e;
+ }
+
+ // Implements IAttributeVector
+ virtual uint32_t get(DocId doc, EnumHandle *v, uint32_t sz) const = 0;
+ virtual uint32_t get(DocId doc, vespalib::string *v, uint32_t sz) const = 0;
+ virtual uint32_t get(DocId doc, const char **v, uint32_t sz) const = 0;
+ virtual uint32_t get(DocId doc, largeint_t *v, uint32_t sz) const = 0;
+ virtual uint32_t get(DocId doc, double *v, uint32_t sz) const = 0;
+
+ // Implements IAttributeVector
+ virtual uint32_t get(DocId doc, WeightedEnum *v, uint32_t sz) const = 0;
+ virtual uint32_t get(DocId doc, WeightedString *v, uint32_t sz) const = 0;
+ virtual uint32_t get(DocId doc, WeightedConstChar *v, uint32_t sz) const = 0;
+ virtual uint32_t get(DocId doc, WeightedInt *v, uint32_t sz) const = 0;
+ virtual uint32_t get(DocId doc, WeightedFloat *v, uint32_t sz) const = 0;
+ virtual int32_t getWeight(DocId doc, uint32_t idx) const;
+
+ // Implements IAttributeVector
+ virtual bool findEnum(const char *value, EnumHandle &e) const {
+ (void) value;
+ (void) e;
+ return false;
+ }
+
+///// Modify API
+ virtual void onCommit() = 0;
+ virtual bool addDoc(DocId &doc) = 0;
+ virtual bool addDocs(DocId & startDoc, DocId & lastDoc, uint32_t numDocs);
+ virtual bool addDocs(uint32_t numDocs);
+ bool apply(DocId doc, const MapValueUpdate &map);
+
+////// Search API
+
+ // type-safe down-cast to attribute supporting direct document weight iterators
+ virtual const IDocumentWeightAttribute *asDocumentWeightAttribute() const { return nullptr; }
+
+ /**
+ - Search for equality
+ - Range search
+ */
+
+ class SearchContext : public vespalib::noncopyable
+ {
+ template <class SC> friend class AttributeIteratorT;
+ template <class SC> friend class FilterAttributeIteratorT;
+ template <class PL> friend class AttributePostingListIteratorT;
+ template <class PL> friend class FilterAttributePostingListIteratorT;
+ public:
+ class Params {
+ using IAttributeVector = attribute::IAttributeVector;
+ public:
+ Params();
+ bool useBitVector() const { return _useBitVector; }
+ const IAttributeVector * diversityAttribute() const { return _diversityAttribute; }
+ size_t diversityCutoffGroups() const { return _diversityCutoffGroups; }
+ bool diversityCutoffStrict() const { return _diversityCutoffStrict; }
+
+ Params & useBitVector(bool value) {
+ _useBitVector = value;
+ return *this;
+ }
+ Params & diversityAttribute(const IAttributeVector * value) {
+ _diversityAttribute = value;
+ return *this;
+ }
+ Params & diversityCutoffGroups(size_t groups) {
+ _diversityCutoffGroups = groups;
+ return *this;
+ }
+ Params & diversityCutoffStrict(bool strict) {
+ _diversityCutoffStrict = strict;
+ return *this;
+ }
+ private:
+ const IAttributeVector * _diversityAttribute;
+ size_t _diversityCutoffGroups;
+ bool _useBitVector;
+ bool _diversityCutoffStrict;
+ };
+ typedef std::unique_ptr<SearchContext> UP;
+ virtual ~SearchContext();
+ virtual unsigned int approximateHits() const;
+ static QueryTermSimple::UP decodeQuery(const QueryPacketT & searchSpec);
+
+ /**
+ * Creates an attribute search iterator associated with this
+ * search context.
+ *
+ * @return attribute search iterator
+ *
+ * @param matchData the attribute match data used when
+ * unpacking data for a hit
+ *
+ * @param strict whether the iterator should be strict or not
+ *
+ * @param useBitVector whether bitvectors should be used when available
+ **/
+ virtual queryeval::SearchIterator::UP
+ createIterator(fef::TermFieldMatchData *matchData, bool strict);
+
+ /**
+ * Creates an attribute search iterator associated with this
+ * search context. Postings lists are not used.
+ *
+ * @return attribute search iterator
+ *
+ * @param matchData the attribute match data used when
+ * unpacking data for a hit
+ *
+ * @param strict whether the iterator should be strict or not
+ **/
+ virtual queryeval::SearchIterator::UP
+ createFilterIterator(fef::TermFieldMatchData *matchData, bool strict);
+
+ /*
+ * Create temporary posting lists. Should be called before
+ * createIterator is called.
+ */
+ virtual void fetchPostings(bool strict);
+ bool cmp(DocId docId, int32_t &weight) const { return onCmp(docId, weight); }
+ bool cmp(DocId docId) const { return onCmp(docId); }
+ const AttributeVector & attribute() const { return _attr; }
+ virtual bool valid() const { return false; }
+ virtual Int64Range getAsIntegerTerm() const { return Int64Range(); }
+
+ virtual const QueryTermBase & queryTerm() const {
+ return *static_cast<const QueryTermBase *>(NULL);
+ }
+
+ protected:
+ SearchContext(const AttributeVector &attr);
+ private:
+ virtual bool onCmp(DocId docId, int32_t &weight) const = 0;
+ virtual bool onCmp(DocId docId) const = 0;
+
+ const AttributeVector & _attr;
+ protected:
+ attribute::IPostingListSearchContext *_plsc;
+
+ bool getIsFilter(void) const { return _attr.getConfig().getIsFilter(); }
+ };
+
+ SearchContext::UP getSearch(const QueryPacketT &searchSpec, const SearchContext::Params & params) const;
+ virtual SearchContext::UP getSearch(QueryTermSimple::UP term, const SearchContext::Params & params) const = 0;
+ virtual const EnumStoreBase *getEnumStoreBase() const { return nullptr; }
+ virtual const MultiValueMappingBaseBase *getMultiValueBase() const { return nullptr; }
+private:
+ void divideByZeroWarning();
+ virtual bool applyWeight(DocId doc, const FieldValue &fv, const ArithmeticValueUpdate &wAdjust);
+ virtual void onSave(IAttributeSaveTarget & saveTarget);
+ virtual bool onLoad();
+ bool headerTypeOK(const vespalib::GenericHeader &header) const;
+ std::unique_ptr<Fast_BufferedFile> openFile(const char *suffix);
+ FileUtil::LoadedBuffer::UP loadFile(const char *suffix);
+
+
+ BaseName _baseFileName;
+ Config _config;
+ std::shared_ptr<attribute::Interlock> _interlock;
+ std::shared_timed_mutex _enumLock;
+ GenerationHandler _genHandler;
+ GenerationHolder _genHolder;
+ Status _status;
+ int _highestValueCount;
+ uint32_t _enumMax;
+ uint32_t _committedDocIdLimit; // docid limit for search
+ uint32_t _uncommittedDocIdLimit; // based on queued changes
+ uint64_t _createSerialNum;
+ uint64_t _compactLidSpaceGeneration;
+ bool _hasEnum;
+ bool _hasSortedEnum;
+ bool _loaded;
+ bool _enableEnumeratedSave;
+ fastos::TimeStamp _nextStatUpdateTime;
+
+////// Locking strategy interface. only available from the Guards.
+ /**
+ * Used to guard that a value you reference will always reference
+ * a value. It might not be the same value, but at least it will
+ * be a value for that document. The guarantee holds as long as
+ * the guard is alive.
+ */
+ GenerationHandler::Guard takeGenerationGuard() { return _genHandler.takeGuard(); }
+
+ /// Clean up [0, firstUsed>
+ virtual void removeOldGenerations(generation_t firstUsed) { (void) firstUsed; }
+ virtual void onGenerationChange(generation_t generation) { (void) generation; }
+ virtual void onUpdateStat() = 0;
+ /**
+ * Used to regulate access to critical resources. Apply the
+ * reader/writer guards.
+ */
+ std::shared_timed_mutex & getEnumLock() { return _enumLock; }
+
+ friend class ComponentGuard<AttributeVector>;
+ friend class AttributeEnumGuard;
+ friend class AttributeValueGuard;
+ friend class AttributeTest;
+ friend class AttributeManagerTest;
+public:
+ /**
+ * Should be called by the writer thread.
+ */
+ void updateFirstUsedGeneration(void) {
+ _genHandler.updateFirstUsedGeneration();
+ }
+
+ /**
+ * Returns true if we might still have readers. False positives
+ * are possible if writer hasn't updated first used generation
+ * after last reader left.
+ */
+ bool hasReaders(void) const { return _genHandler.hasReaders(); }
+
+ /**
+ * Add reserved initial document with docId 0 and undefined value.
+ */
+ void addReservedDoc(void);
+
+ void enableEnumeratedSave(bool enable = true);
+
+ /*
+ * Temporary method, used by unit tests to enable enumerated load
+ * until it can be enabled by default.
+ */
+ static void enableEnumeratedLoad(void);
+
+ bool getEnumeratedSave(void) const { return _hasEnum && _enableEnumeratedSave; }
+
+ virtual attribute::IPostingListAttributeBase * getIPostingListAttributeBase();
+ bool hasPostings(void);
+ virtual uint64_t getUniqueValueCount(void) const;
+ virtual uint64_t getTotalValueCount(void) const;
+ virtual void compactLidSpace(uint32_t wantedLidLimit);
+ virtual void clearDocs(DocId lidLow, DocId lidLimit);
+ bool wantShrinkLidSpace(void) const { return _committedDocIdLimit < getNumDocs(); }
+ virtual bool canShrinkLidSpace(void) const;
+ void shrinkLidSpace(void);
+ virtual void onShrinkLidSpace(void);
+
+ void setInterlock(const std::shared_ptr<attribute::Interlock> &interlock);
+
+ const std::shared_ptr<attribute::Interlock> &getInterlock() const
+ {
+ return _interlock;
+ }
+
+ std::unique_ptr<AttributeSaver> initSave();
+
+ virtual std::unique_ptr<AttributeSaver> onInitSave();
+
+ virtual uint64_t getEstimatedSaveByteSize() const;
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.hpp b/searchlib/src/vespa/searchlib/attribute/attributevector.hpp
new file mode 100644
index 00000000000..20edc0826ad
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.hpp
@@ -0,0 +1,169 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <cmath>
+
+namespace search {
+
+template <typename T>
+inline bool myIsNan(T v) { (void)v; return false; }
+
+template <>
+inline bool
+myIsNan<float>(float v)
+{
+ return std::isnan(v);
+}
+
+template <>
+inline bool
+myIsNan<double>(double v)
+{
+ return std::isnan(v);
+}
+
+template<typename T>
+bool
+AttributeVector::adjustWeight(ChangeVectorT< ChangeTemplate<T> > & changes, DocId doc, const T & v,
+ const ArithmeticValueUpdate & wd)
+{
+ bool retval(hasWeightedSetType() && (doc < getNumDocs()));
+ if (retval) {
+ size_t oldSz(changes.size());
+ ArithmeticValueUpdate::Operator op(wd.getOperator());
+ int32_t w(static_cast<int32_t>(wd.getOperand()));
+ if (op == ArithmeticValueUpdate::Add) {
+ changes.push_back(ChangeTemplate<T>(ChangeBase::INCREASEWEIGHT, doc, v, w));
+ } else if (op == ArithmeticValueUpdate::Sub) {
+ changes.push_back(ChangeTemplate<T>(ChangeBase::INCREASEWEIGHT, doc, v, -w));
+ } else if (op == ArithmeticValueUpdate::Mul) {
+ changes.push_back(ChangeTemplate<T>(ChangeBase::MULWEIGHT, doc, v, w));
+ } else if (op == ArithmeticValueUpdate::Div) {
+ if (w != 0) {
+ changes.push_back(ChangeTemplate<T>(ChangeBase::DIVWEIGHT, doc, v, w));
+ } else {
+ divideByZeroWarning();
+ }
+ } else {
+ retval = false;
+ }
+ if (retval) {
+ const size_t diff = changes.size() - oldSz;
+ _status.incNonIdempotentUpdates(diff);
+ _status.incUpdates(diff);
+ }
+ }
+ return retval;
+}
+
+template<typename T>
+bool
+AttributeVector::applyArithmetic(ChangeVectorT< ChangeTemplate<T> > & changes, DocId doc, const T & v,
+ const ArithmeticValueUpdate & arithm)
+{
+ (void) v;
+ bool retval(!hasMultiValue() && (doc < getNumDocs()));
+ if (retval) {
+ size_t oldSz(changes.size());
+ ArithmeticValueUpdate::Operator op(arithm.getOperator());
+ double aop = arithm.getOperand();
+ if (op == ArithmeticValueUpdate::Add) {
+ changes.push_back(ChangeTemplate<T>(ChangeBase::ADD, doc, 0, 0));
+ } else if (op == ArithmeticValueUpdate::Sub) {
+ changes.push_back(ChangeTemplate<T>(ChangeBase::SUB, doc, 0, 0));
+ } else if (op == ArithmeticValueUpdate::Mul) {
+ changes.push_back(ChangeTemplate<T>(ChangeBase::MUL, doc, 0, 0));
+ } else if (op == ArithmeticValueUpdate::Div) {
+ if (this->getClass().inherits(IntegerAttribute::classId) && aop == 0) {
+ divideByZeroWarning();
+ } else {
+ changes.push_back(ChangeTemplate<T>(ChangeBase::DIV, doc, 0, 0));
+ }
+ } else {
+ retval = false;
+ }
+ if (retval) {
+ const size_t diff = changes.size() - oldSz;
+ _status.incNonIdempotentUpdates(diff);
+ _status.incUpdates(diff);
+ if (diff > 0) {
+ changes.back()._arithOperand = aop;
+ }
+ }
+ }
+ return retval;
+}
+
+template<typename T>
+bool AttributeVector::clearDoc(ChangeVectorT< ChangeTemplate<T> > & changes, DocId doc) {
+ bool retval(doc < getNumDocs());
+ if (retval) {
+ changes.push_back(ChangeTemplate<T>(ChangeBase::CLEARDOC, doc, T()));
+ _status.incUpdates();
+ updateUncommittedDocIdLimit(doc);
+ }
+ return retval;
+}
+
+template<typename T>
+bool AttributeVector::update(ChangeVectorT< ChangeTemplate<T> > & changes, DocId doc, const T & v) {
+ bool retval(doc < getNumDocs());
+ if (retval) {
+ if (hasMultiValue()) {
+ clearDoc(doc);
+ retval = append(changes, doc, v, 1, false);
+ } else {
+ changes.push_back(ChangeTemplate<T>(ChangeBase::UPDATE, doc, v));
+ _status.incUpdates();
+ updateUncommittedDocIdLimit(doc);
+ }
+ }
+ return retval;
+}
+
+template<typename T>
+bool AttributeVector::append(ChangeVectorT< ChangeTemplate<T> > & changes, DocId doc, const T & v, int32_t w, bool doCount) {
+ bool retval(hasMultiValue() && (doc < getNumDocs()));
+ if (retval) {
+ changes.push_back(ChangeTemplate<T>(ChangeBase::APPEND, doc, v, w));
+ _status.incUpdates();
+ updateUncommittedDocIdLimit(doc);
+ if ( hasArrayType() && doCount) {
+ _status.incNonIdempotentUpdates();
+ }
+ }
+ return retval;
+}
+
+template<typename T, typename Accessor>
+bool AttributeVector::append(ChangeVectorT< ChangeTemplate<T> > & changes, DocId doc, Accessor & ac) {
+ bool retval(hasMultiValue() && (doc < getNumDocs()));
+ if (retval) {
+ changes.push_back(doc, ac);
+ _status.incUpdates(ac.size());
+ updateUncommittedDocIdLimit(doc);
+ if ( hasArrayType() ) {
+ _status.incNonIdempotentUpdates(ac.size());
+ }
+ }
+ return retval;
+}
+
+template<typename T>
+bool AttributeVector::remove(ChangeVectorT< ChangeTemplate<T> > & changes, DocId doc, const T & v, int32_t w) {
+ bool retval(hasMultiValue() && (doc < getNumDocs()));
+ if (retval) {
+ changes.push_back(ChangeTemplate<T>(ChangeBase::REMOVE, doc, v, w));
+ _status.incUpdates();
+ updateUncommittedDocIdLimit(doc);
+ if ( hasArrayType() ) {
+ _status.incNonIdempotentUpdates();
+ }
+ }
+ return retval;
+}
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attrvector.cpp b/searchlib/src/vespa/searchlib/attribute/attrvector.cpp
new file mode 100644
index 00000000000..727cfcc3efe
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attrvector.cpp
@@ -0,0 +1,188 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "attrvector.h"
+#include "attrvector.hpp"
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <vespa/searchlib/attribute/attrvector.h>
+#include <vespa/searchlib/util/filekit.h>
+
+LOG_SETUP(".attrvector");
+
+namespace search {
+
+StringDirectAttribute::
+StringDirectAttribute(const vespalib::string & baseFileName, const Config & c)
+ : search::StringAttribute(baseFileName, c),
+ _buffer(),
+ _offsets(),
+ _idx()
+{
+}
+
+bool StringDirectAttribute::findEnum(const char * key, EnumHandle & e) const
+{
+ if (_offsets.size() < 1) {
+ e = 0;
+ return false;
+ }
+ int delta;
+ const int eMax = getEnumMax();
+ for (delta = 1; delta <= eMax; delta <<= 1) { }
+ delta >>= 1;
+ int pos = delta - 1;
+ int cmpres(0);
+
+ while (delta != 0) {
+ delta >>= 1;
+ if (pos >= eMax) {
+ pos -= delta;
+ } else {
+ const char *name = &_buffer[_offsets[pos]];
+ cmpres = strcmp(key, name);
+ if (cmpres == 0) {
+ e = pos;
+ return true;
+ }
+ pos += (cmpres < 0) ? -delta : +delta;
+ }
+ }
+ e = ((cmpres > 0) && (pos < eMax)) ? pos + 1 : pos;
+ return false;
+}
+
+void StringDirectAttribute::onSave(IAttributeSaveTarget & saveTarget)
+{
+ assert(!saveTarget.getEnumerated());
+ using Buffer = IAttributeSaveTarget::Buffer;
+ if ( hasEnum() ) {
+ uint32_t sz(getMaxValueCount());
+ Buffer dat(saveTarget.datWriter().allocBuf(sz*getNumDocs()*11));
+ const char * * v = new const char *[sz];
+ for(size_t i(0), m(getNumDocs()); i < m; i++) {
+ for(size_t j(0), k(static_cast<const AttributeVector &>(*this).get(i, v, sz)); j < k; j++) {
+ dat->writeBytes(v[j], strlen(v[j]) + 1);
+ }
+ }
+ delete [] v;
+ } else if ( ! _buffer.empty() ) {
+ Buffer dat(saveTarget.datWriter().allocBuf(_buffer.size()));
+ dat->writeBytes(&_buffer[0], _buffer.size());
+ saveTarget.datWriter().writeBuf(std::move(dat));
+ }
+
+ if (hasMultiValue()) {
+ Buffer buf(saveTarget.idxWriter().allocBuf(sizeof(uint32_t) *
+ _idx.size()));
+ buf->writeBytes(&_idx[0], sizeof(uint32_t) * _idx.size());
+ saveTarget.idxWriter().writeBuf(std::move(buf));
+ }
+}
+
+class stringComp : public std::binary_function<uint32_t, uint32_t, bool> {
+public:
+ stringComp(const char * buffer) : _buffer(buffer) { }
+ bool operator()(uint32_t x, uint32_t y) const { return strcmp(_buffer+x, _buffer+y) < 0; }
+private:
+ const char * _buffer;
+};
+
+void addString(const char * v, StringAttribute::OffsetVector & offsets, std::vector<char> & buffer)
+{
+ offsets.push_back(buffer.size());
+ for(const char *p(v); *p; p++) {
+ buffer.push_back(*p);
+ }
+ buffer.push_back('\0');
+}
+
+bool StringDirectAttribute::onLoad()
+{
+ {
+ std::vector<char> empty;
+ std::vector<uint32_t> empty1;
+ std::vector<uint32_t> empty2;
+ OffsetVector empty3;
+ std::swap(empty, _buffer);
+ std::swap(empty2, _idx);
+ std::swap(empty3, _offsets);
+ setNumDocs(0);
+ setCommittedDocIdLimit(0);
+ }
+
+ FileUtil::LoadedBuffer::UP tmpBuffer(loadDAT());
+ bool rc(tmpBuffer.get());
+ if (rc) {
+ if ( ! tmpBuffer->empty()) {
+ OffsetVector tmpOffsets;
+ tmpOffsets.reserve(countZero(tmpBuffer->c_str(), tmpBuffer->size()) + 1);
+ generateOffsets(tmpBuffer->c_str(), tmpBuffer->size(), tmpOffsets);
+
+ if ( hasEnum() ) {
+ std::sort(tmpOffsets.begin(), tmpOffsets.end(), stringComp(tmpBuffer->c_str()));
+ _offsets.clear();
+ _buffer.clear();
+ if (!tmpOffsets.empty()) {
+ const char * prev(tmpBuffer->c_str() + tmpOffsets[0]);
+ addString(prev, _offsets, _buffer);
+ for(OffsetVector::const_iterator it(tmpOffsets.begin()+1), mt(tmpOffsets.end()); it != mt; it++) {
+ if (strcmp(tmpBuffer->c_str() + *it, prev) != 0) {
+ prev = tmpBuffer->c_str() + *it;
+ addString(prev, _offsets, _buffer);
+ }
+ }
+ }
+ setEnumMax(_offsets.size());
+ generateOffsets(tmpBuffer->c_str(), tmpBuffer->size(), tmpOffsets);
+ } else {
+ _buffer.clear();
+ _buffer.reserve(tmpBuffer->size());
+ for (size_t i=0; i < tmpBuffer->size(); i++) {
+ _buffer.push_back(tmpBuffer->c_str()[i]);
+ }
+ std::swap(tmpOffsets, _offsets);
+ }
+ }
+
+ if (hasMultiValue()) {
+ FileUtil::LoadedBuffer::UP tmpIdx(loadIDX());
+ size_t tmpIdxLen(tmpIdx->size(sizeof(uint32_t)));
+ _idx.clear();
+ _idx.reserve(tmpIdxLen);
+ uint32_t prev(0);
+ const uint32_t * idxPtr(static_cast<const uint32_t *>(tmpIdx->buffer()));
+ for (size_t i=0; i < tmpIdxLen; i++) {
+ checkSetMaxValueCount(idxPtr[i] - prev);
+ prev = idxPtr[i];
+ _idx.push_back(prev);
+ }
+ rc = tmpIdx.get();
+ tmpIdx.reset();
+ }
+ uint32_t numDocs(hasMultiValue() ? (_idx.size()-1) : _offsets.size());
+ setNumDocs(numDocs);
+ setCommittedDocIdLimit(numDocs);
+ }
+
+ // update statistics
+ uint64_t numValues = _offsets.size();
+ uint64_t numUniqueValues = _offsets.size();
+ uint64_t allocated = _buffer.size() * sizeof(char) + _offsets.size() * sizeof(uint32_t) +
+ + _idx.size() * sizeof(uint32_t);
+ this->updateStatistics(numValues, numUniqueValues, allocated, allocated, 0, 0);
+ return rc;
+}
+
+void StringDirectAttribute::onCommit()
+{
+ abort();
+}
+
+bool StringDirectAttribute::addDoc(DocId & doc)
+{
+ (void) doc;
+ return false;
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attrvector.h b/searchlib/src/vespa/searchlib/attribute/attrvector.h
new file mode 100644
index 00000000000..a13c2a98002
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attrvector.h
@@ -0,0 +1,235 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/common/rankedhit.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+
+//TODO: This one should go.
+//
+using search::AttributeVector;
+
+//-----------------------------------------------------------------------------
+
+class AttrVector
+{
+public:
+ template <bool MULTI>
+ struct Features
+ {
+ typedef uint32_t EnumType;
+ static bool IsMultiValue() { return MULTI; }
+ };
+};
+
+namespace search {
+
+template <typename B>
+class NumericDirectAttribute : public B
+{
+private:
+ typedef typename B::EnumHandle EnumHandle;
+ NumericDirectAttribute(const NumericDirectAttribute &);
+ NumericDirectAttribute & operator=(const NumericDirectAttribute &);
+ virtual bool onLoad();
+ virtual typename B::BaseType getFromEnum(EnumHandle e) const { return _data[e]; }
+ virtual void getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const {
+ for (size_t i(0); i < sz; i++) {
+ e[i] = v[i];
+ }
+ }
+protected:
+ typedef typename B::BaseType BaseType;
+ typedef typename B::DocId DocId;
+ typedef typename B::Change Change;
+ typedef typename B::largeint_t largeint_t;
+ typedef typename B::Config Config;
+
+ NumericDirectAttribute(const vespalib::string & baseFileName, const Config & c);
+
+ virtual bool findEnum(BaseType value, EnumHandle & e) const;
+ virtual void onCommit();
+ virtual void onUpdateStat() { }
+ virtual bool addDoc(DocId & );
+
+ std::vector<BaseType> _data;
+ std::vector<uint32_t> _idx;
+};
+
+}
+
+template <typename F, typename B>
+class NumericDirectAttrVector : public search::NumericDirectAttribute<B>
+{
+protected:
+ typedef typename B::DocId DocId;
+ typedef NumericDirectAttrVector<F, B> NumDirectAttrVec;
+private:
+ typedef typename B::largeint_t largeint_t;
+public:
+ NumericDirectAttrVector(const vespalib::string & baseFileName);
+ NumericDirectAttrVector(const vespalib::string & baseFileName, const AttributeVector::Config & c);
+ virtual largeint_t getInt(DocId doc) const { return static_cast<largeint_t>(getHelper(doc, 0)); }
+ virtual double getFloat(DocId doc) const { return getHelper(doc, 0); }
+ virtual uint32_t get(DocId doc, largeint_t * v, uint32_t sz) const { return getAllHelper<largeint_t, largeint_t>(doc, v, sz); }
+ virtual uint32_t get(DocId doc, double * v, uint32_t sz) const { return getAllHelper<double, double>(doc, v, sz); }
+private:
+ typedef typename B::EnumHandle EnumHandle;
+ typedef typename B::BaseType BaseType;
+ typedef typename B::Weighted Weighted;
+ typedef typename B::WeightedEnum WeightedEnum;
+ typedef typename B::WeightedInt WeightedInt;
+ typedef typename B::WeightedFloat WeightedFloat;
+ virtual BaseType get(DocId doc) const { return getHelper(doc, 0); }
+ virtual EnumHandle getEnum(DocId doc) const { return getEnumHelper(doc, 0); }
+ virtual uint32_t getAll(DocId doc, BaseType * v, uint32_t sz) const { return getAllHelper<BaseType, BaseType>(doc, v, sz); }
+ virtual uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const { return getAllEnumHelper(doc, e, sz); }
+
+ virtual uint32_t getValueCount(DocId doc) const { return getValueCountHelper(doc); }
+ virtual bool hasEnum2Value() const { return false; }
+
+ uint32_t getValueCountHelper(DocId doc) const {
+ if (F::IsMultiValue()) {
+ return this->_idx[doc+1] - this->_idx[doc];
+ } else {
+ return 1;
+ }
+ }
+
+ EnumHandle getEnumHelper(DocId doc, int idx) const {
+ (void) doc;
+ (void) idx;
+ return uint32_t(-1);
+ }
+
+ BaseType getHelper(DocId doc, int idx) const {
+ if (F::IsMultiValue()) {
+ return this->_data[this->_idx[doc] + idx];
+ } else {
+ return this->_data[doc];
+ }
+ }
+ template <typename T, typename C>
+ uint32_t getAllHelper(DocId doc, T * v, uint32_t sz) const {
+ uint32_t available(getValueCountHelper(doc));
+ uint32_t num2Read(std::min(available, sz));
+ for (uint32_t i(0); i < num2Read; i++) {
+ v[i] = T(static_cast<C>(getHelper(doc, i)));
+ }
+ return available;
+ }
+ template <typename T>
+ uint32_t getAllEnumHelper(DocId doc, T * v, uint32_t sz) const {
+ uint32_t available(getValueCountHelper(doc));
+ uint32_t num2Read(std::min(available, sz));
+ for (uint32_t i(0); i < num2Read; i++) {
+ v[i] = T(getEnumHelper(doc, i));
+ }
+ return available;
+ }
+
+ virtual uint32_t get(DocId doc, WeightedEnum * v, uint32_t sz) const { return getAllEnumHelper(doc, v, sz); }
+ virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz) const { return getAllHelper<Weighted, BaseType>(doc, v, sz); }
+ virtual uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const { return getAllHelper<WeightedInt, largeint_t>(doc, v, sz); }
+ virtual uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const { return getAllHelper<WeightedFloat, double>(doc, v, sz); }
+};
+
+//-----------------------------------------------------------------------------
+
+namespace search {
+class StringDirectAttribute : public StringAttribute
+{
+private:
+ StringDirectAttribute(const StringDirectAttribute &);
+ StringDirectAttribute & operator=(const StringDirectAttribute &);
+ virtual void onSave(IAttributeSaveTarget & saveTarget);
+ virtual bool onLoad();
+ virtual const char * getFromEnum(EnumHandle e) const { return &_buffer[e]; }
+protected:
+ StringDirectAttribute(const vespalib::string & baseFileName, const Config & c);
+ virtual bool findEnum(const char * value, EnumHandle & e) const;
+ virtual void getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const {
+ for (size_t i(0); i < sz; i++) {
+ e[i] = v[i];
+ }
+ }
+ virtual void onCommit();
+ virtual void onUpdateStat() { }
+ virtual bool addDoc(DocId & );
+
+protected:
+ std::vector<char> _buffer;
+ OffsetVector _offsets;
+ std::vector<uint32_t> _idx;
+};
+
+}
+
+template <typename F>
+class StringDirectAttrVector : public search::StringDirectAttribute
+{
+
+public:
+ StringDirectAttrVector(const vespalib::string & baseFileName);
+ StringDirectAttrVector(const vespalib::string & baseFileName, const Config & c);
+ virtual const char * getString(DocId doc, char * v, size_t sz) const { (void) v; (void) sz; return getHelper(doc, 0); }
+ virtual uint32_t get(DocId doc, const char ** v, uint32_t sz) const { return getAllHelper(doc, v, sz); }
+private:
+ virtual uint32_t get(DocId doc, vespalib::string * v, uint32_t sz) const { return getAllHelper(doc, v, sz); }
+ virtual uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const { return getAllEnumHelper(doc, e, sz); }
+ virtual const char * get(DocId doc) const { return getHelper(doc, 0); }
+ virtual EnumHandle getEnum(DocId doc) const { return getEnumHelper(doc, 0); }
+ virtual uint32_t getValueCount(DocId doc) const { return getValueCountHelper(doc); }
+ virtual uint32_t get(DocId doc, WeightedEnum * e, uint32_t sz) const { return getAllEnumHelper(doc, e, sz); }
+ virtual uint32_t get(DocId doc, WeightedString * v, uint32_t sz) const { return getAllHelper(doc, v, sz); }
+ virtual uint32_t get(DocId doc, WeightedConstChar * v, uint32_t sz) const { return getAllHelper(doc, v, sz); }
+ virtual bool hasEnum2Value() const { return true; }
+
+ uint32_t getValueCountHelper(DocId doc) const {
+ if (F::IsMultiValue()) {
+ return this->_idx[doc+1] - this->_idx[doc];
+ } else {
+ return 1;
+ }
+ }
+
+ EnumHandle getEnumHelper(DocId doc, int idx) const {
+ if (F::IsMultiValue()) {
+ return this->_offsets[this->_idx[doc] + idx];
+ } else {
+ return this->_offsets[doc];
+ }
+ return uint32_t(-1);
+ }
+
+ const char *getHelper(DocId doc, int idx) const {
+ if (F::IsMultiValue()) {
+ return & this->_buffer[this->_offsets[this->_idx[doc] + idx]];
+ } else if (idx == 0) {
+ return & this->_buffer[this->_offsets[doc]];
+ }
+ return NULL;
+ }
+ template <typename T>
+ uint32_t getAllHelper(DocId doc, T * v, uint32_t sz) const
+ {
+ uint32_t available(getValueCountHelper(doc));
+ uint32_t num2Read(std::min(available, sz));
+ for (uint32_t i(0); i < num2Read; i++) {
+ v[i] = T(getHelper(doc, i));
+ }
+ return available;
+ }
+ template <typename T>
+ uint32_t getAllEnumHelper(DocId doc, T * v, uint32_t sz) const
+ {
+ uint32_t available(getValueCountHelper(doc));
+ uint32_t num2Read(std::min(available, sz));
+ for (uint32_t i(0); i < num2Read; i++) {
+ v[i] = T(getEnumHelper(doc, i));
+ }
+ return available;
+ }
+};
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attrvector.hpp b/searchlib/src/vespa/searchlib/attribute/attrvector.hpp
new file mode 100644
index 00000000000..592ed6fe737
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attrvector.hpp
@@ -0,0 +1,185 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/log/log.h>
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <vespa/searchlib/attribute/attrvector.h>
+#include <vespa/searchlib/util/filekit.h>
+
+namespace search {
+
+template <typename B>
+NumericDirectAttribute<B>::
+NumericDirectAttribute(const vespalib::string & baseFileName, const Config & c)
+ : B(baseFileName, c),
+ _data(),
+ _idx()
+{
+}
+
+template <typename B>
+bool NumericDirectAttribute<B>::onLoad()
+{
+ FileUtil::LoadedBuffer::UP dataBuffer(B::loadDAT());
+ bool rc(dataBuffer.get());
+ if (rc) {
+ const BaseType * tmpData(static_cast <const BaseType *>(dataBuffer->buffer()));
+ size_t tmpDataLen(dataBuffer->size(sizeof(BaseType)));
+ if (this->hasEnum() ) {
+ std::vector<BaseType> tmp;
+ tmp.reserve(tmpDataLen);
+ for(size_t i(0); i < tmpDataLen; i++) {
+ tmp.push_back(tmpData[i]);
+ }
+ std::sort(tmp.begin(), tmp.end());
+ _data.clear();
+ if (!tmp.empty()) {
+ BaseType prev = tmp[0];
+ _data.push_back(prev);
+ for(typename std::vector<BaseType>::const_iterator it(tmp.begin()+1), mt(tmp.end()); it != mt; it++) {
+ if (*it != prev) {
+ prev = *it;
+ _data.push_back(prev);
+ }
+ }
+ }
+ this->setEnumMax(_data.size());
+ } else {
+ _data.clear();
+ _data.reserve(tmpDataLen);
+ for (size_t i=0; i < tmpDataLen; i++) {
+ _data.push_back(tmpData[i]);
+ }
+ }
+ dataBuffer.reset();
+ if (this->hasMultiValue()) {
+ FileUtil::LoadedBuffer::UP idxBuffer(B::loadIDX());
+ rc = idxBuffer.get();
+ if (rc) {
+ const uint32_t * tmpIdx(static_cast<const uint32_t *>(idxBuffer->buffer()));
+ size_t tmpIdxLen(idxBuffer->size(sizeof(uint32_t)));
+ _idx.clear();
+ _idx.reserve(tmpIdxLen);
+ uint32_t prev(0);
+ for (size_t i=0; i < tmpIdxLen; i++) {
+ this->checkSetMaxValueCount(tmpIdx[i] - prev);
+ prev = tmpIdx[i];
+ _idx.push_back(prev);
+ }
+ }
+ }
+ }
+ if (rc) {
+ uint32_t numDocs(this->hasMultiValue() ? (_idx.size() - 1) : _data.size());
+ this->setNumDocs(numDocs);
+ this->setCommittedDocIdLimit(numDocs);
+ } else {
+ std::vector<BaseType> emptyData;
+ std::vector<uint32_t> empty1;
+ std::vector<uint32_t> empty2;
+ std::swap(emptyData, _data);
+ std::swap(empty2, _idx);
+ }
+
+ // update statistics
+ uint64_t numValues = _data.size();
+ uint64_t numUniqueValues = _data.size();
+ uint64_t allocated = _data.size() * sizeof(BaseType) + _idx.size() * sizeof(uint32_t);
+ this->updateStatistics(numValues, numUniqueValues, allocated, allocated, 0, 0);
+
+ return rc;
+}
+
+template <typename B>
+bool NumericDirectAttribute<B>::findEnum(typename B::BaseType key, EnumHandle & e) const
+{
+ if (_data.empty()) {
+ e = 0;
+ return false;
+ }
+ int delta;
+ const int eMax = B::getEnumMax();
+ for (delta = 1; delta <= eMax; delta <<= 1) { }
+ delta >>= 1;
+ int pos = delta - 1;
+ typename B::BaseType value = key;
+
+ while (delta != 0) {
+ delta >>= 1;
+ if (pos >= eMax) {
+ pos -= delta;
+ } else {
+ value = _data[pos];
+ if (value == key) {
+ e = pos;
+ return true;
+ } else if (value < key) {
+ pos += delta;
+ } else {
+ pos -= delta;
+ }
+ }
+ }
+ e = ((key > value) && (pos < eMax)) ? pos + 1 : pos;
+ return false;
+}
+
+template <typename B>
+void NumericDirectAttribute<B>::onCommit()
+{
+ B::_changes.clear();
+ abort();
+}
+
+template <typename B>
+bool NumericDirectAttribute<B>::addDoc(DocId & )
+{
+ return false;
+}
+
+}
+
+template <typename F, typename B>
+NumericDirectAttrVector<F, B>::
+NumericDirectAttrVector(const vespalib::string & baseFileName, const AttributeVector::Config & c)
+ : search::NumericDirectAttribute<B>(baseFileName, c)
+{
+ if (F::IsMultiValue()) {
+ this->_idx.push_back(0);
+ }
+}
+
+template <typename F, typename B>
+NumericDirectAttrVector<F, B>::
+NumericDirectAttrVector(const vespalib::string & baseFileName)
+ : search::NumericDirectAttribute<B>(baseFileName, AttributeVector::Config(AttributeVector::BasicType::fromType(BaseType()), F::IsMultiValue() ? search::attribute::CollectionType::ARRAY : search::attribute::CollectionType::SINGLE))
+{
+ if (F::IsMultiValue()) {
+ this->_idx.push_back(0);
+ }
+}
+
+template <typename F>
+StringDirectAttrVector<F>::
+StringDirectAttrVector(const vespalib::string & baseFileName, const Config & c) :
+ search::StringDirectAttribute(baseFileName, c)
+{
+ if (F::IsMultiValue()) {
+ _idx.push_back(0);
+ }
+ setEnum();
+ setSortedEnum(true);
+}
+
+template <typename F>
+StringDirectAttrVector<F>::
+StringDirectAttrVector(const vespalib::string & baseFileName) :
+ search::StringDirectAttribute(baseFileName, Config(BasicType::STRING, F::IsMultiValue() ? search::attribute::CollectionType::ARRAY : search::attribute::CollectionType::SINGLE))
+{
+ if (F::IsMultiValue()) {
+ _idx.push_back(0);
+ }
+ setEnum();
+ setSortedEnum(true);
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/changevector.cpp b/searchlib/src/vespa/searchlib/attribute/changevector.cpp
new file mode 100644
index 00000000000..028ae98d985
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/changevector.cpp
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/searchlib/attribute/changevector.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.attribute.changevector");
+
+namespace search {
+
+StringChangeData::StringChangeData(const vespalib::string & s)
+ : _s(s)
+{
+ if (StringAttribute::countZero(s.data(), s.size()) > 0) {
+ LOG(warning,
+ "StringChangeData(): "
+ "Input string contains <null> byte(s); "
+ "truncating. (ticket #3079131)");
+ _s.assign(s.data()); // keep data up to (not including) first '\0' byte
+ }
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/changevector.h b/searchlib/src/vespa/searchlib/attribute/changevector.h
new file mode 100644
index 00000000000..a714e502588
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/changevector.h
@@ -0,0 +1,230 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/searchcommon/common/undefinedvalues.h>
+
+namespace search {
+
+struct ChangeBase {
+ enum Type {
+ NOOP,
+ UPDATE,
+ APPEND,
+ REMOVE,
+ INCREASEWEIGHT,
+ MULWEIGHT,
+ DIVWEIGHT,
+ ADD,
+ SUB,
+ MUL,
+ DIV,
+ CLEARDOC
+ };
+ enum {TAIL=0, UNSET_ENUM = 0xffffffffu};
+
+ ChangeBase() :
+ _type(NOOP),
+ _next(TAIL),
+ _doc(0),
+ _weight(1),
+ _enumScratchPad(UNSET_ENUM),
+ _arithOperand(0)
+ { }
+
+ ChangeBase(Type type, uint32_t d, int32_t w = 1) :
+ _type(type),
+ _next(TAIL),
+ _doc(d),
+ _weight(w),
+ _enumScratchPad(UNSET_ENUM),
+ _arithOperand(0)
+ { }
+
+ int cmp(const ChangeBase &b) const { int diff(_doc - b._doc); return diff; }
+ bool operator <(const ChangeBase & b) const { return cmp(b) < 0; }
+ bool isAtEnd() const { return _next == TAIL; }
+ uint32_t getNext() const { return _next; }
+ void setNext(uint32_t next) { _next = next; }
+ uint32_t getEnum() const { return _enumScratchPad; }
+ void setEnum(uint32_t value) const { _enumScratchPad = value; }
+ bool isEnumValid() const { return _enumScratchPad != UNSET_ENUM; }
+ void invalidateEnum() const { _enumScratchPad = UNSET_ENUM; }
+
+ Type _type;
+private:
+ uint32_t _next;
+public:
+ uint32_t _doc;
+ int32_t _weight;
+ mutable uint32_t _enumScratchPad;
+ double _arithOperand;
+};
+
+template <typename T>
+class NumericChangeData {
+private:
+ T _v;
+public:
+ typedef T DataType;
+
+ NumericChangeData(T v) : _v(v) { }
+ NumericChangeData() : _v(T()) { }
+
+ T get() const { return _v; }
+ T raw() const { return _v; }
+ operator T() const { return _v; }
+ operator T&() { return _v; }
+ bool operator<(const NumericChangeData<T> &rhs) const { return _v < rhs._v; }
+};
+
+class StringChangeData {
+public:
+ typedef vespalib::string DataType;
+
+ StringChangeData(const DataType & s);
+ StringChangeData() : _s() { }
+
+ const DataType & get() const { return _s; }
+ const char * raw() const { return _s.c_str(); }
+ operator const DataType&() const { return _s; }
+ operator DataType&() { return _s; }
+ bool operator <(const StringChangeData & rhs) const { return _s < rhs._s; }
+private:
+ DataType _s;
+};
+
+template<typename T>
+struct ChangeTemplate : public ChangeBase {
+ typedef T DataType;
+
+ ChangeTemplate() : ChangeBase() { }
+ ChangeTemplate(Type type, uint32_t d, const T & v, int32_t w = 1) :
+ ChangeBase(type, d, w), _data(v)
+ { }
+
+ T _data;
+};
+
+template <>
+inline
+NumericChangeData<double>::NumericChangeData(double v) :
+ _v(attribute::isUndefined<double>(v) ? attribute::getUndefined<double>() : v)
+{
+}
+
+template <>
+inline bool
+NumericChangeData<double>::operator<(const NumericChangeData<double> &rhs) const
+{
+ if (std::isnan(_v)) {
+ return !std::isnan(rhs._v);
+ }
+ if (std::isnan(rhs._v)) {
+ return false;
+ }
+ return _v < rhs._v;
+}
+
+class ChangeVectorBase {
+protected:
+};
+
+/**
+ * Maintains a list of changes where changes to the same docid are adjacent, but ordered by insertion order.
+ * Apart from that no ordering by docid.
+ */
+template <typename T>
+class ChangeVectorT : public ChangeVectorBase {
+private:
+ typedef vespalib::hash_map<uint32_t, uint32_t> Map;
+ typedef vespalib::Array<T, vespalib::DefaultAlloc> Vector;
+public:
+ ChangeVectorT() : _tail(0) { }
+ class const_iterator {
+ public:
+ const_iterator(const Vector & vector, uint32_t next) : _v(&vector), _next(next) { }
+ bool operator == (const const_iterator & rhs) const { return _v == rhs._v && _next == rhs._next; }
+ bool operator != (const const_iterator & rhs) const { return _v != rhs._v || _next != rhs._next; }
+ const_iterator& operator++() { advance(); return *this; }
+ const_iterator operator++(int) { const_iterator other(*this); advance(); return other; }
+ const T & operator * () const { return v()[_next]; }
+ const T * operator -> () const { return &v()[_next]; }
+ private:
+ void advance() { _next = v()[_next].getNext(); }
+ const Vector & v() const { return *_v; }
+ const Vector * _v;
+ uint32_t _next;
+ };
+
+ void push_back(const T & c);
+ template <typename Accessor>
+ void push_back(uint32_t doc, Accessor & ac);
+ const T & back() const { return _v.back(); }
+ T & back() { return _v.back(); }
+ size_t size() const { return _v.size(); }
+ void clear() { _v.clear(); _docs.clear(); }
+ const_iterator begin() const { return const_iterator(_v, 0); }
+ const_iterator end() const { return const_iterator(_v, size()); }
+private:
+ void linkIn(uint32_t doc, size_t index, size_t last);
+ Vector _v;
+ Map _docs;
+ uint32_t _tail;
+};
+
+template <typename T>
+void
+ChangeVectorT<T>::push_back(const T & c)
+{
+ size_t index(size());
+ _v.push_back(c);
+ linkIn(c._doc, index, index);
+}
+
+template <typename T>
+template <typename Accessor>
+void
+ChangeVectorT<T>::push_back(uint32_t doc, Accessor & ac)
+{
+ if (ac.size() <= 0) { return; }
+
+ size_t index(size());
+ _v.reserve(vespalib::roundUp2inN(index + ac.size()));
+ for (size_t i(0), m(ac.size()); i < m; i++, ac.next()) {
+ _v.push_back(T(ChangeBase::APPEND, doc, typename T::DataType(ac.value()), ac.weight()));
+ _v.back().setNext(index + i + 1);
+ }
+ linkIn(doc, index, size() - 1);
+}
+
+template <typename T>
+void
+ChangeVectorT<T>::linkIn(uint32_t doc, size_t first, size_t last)
+{
+ if (first != 0 && (_v[_tail]._doc == doc)) {
+ _v[_tail].setNext(first);
+ _tail = last;
+ } else {
+ Map::iterator found(_docs.find(doc));
+ if (found == _docs.end()) {
+ _docs[doc] = last;
+ if (_tail != first) {
+ _v[_tail].setNext(first);
+ }
+ _tail = last;
+ } else {
+ uint32_t prev(found->second);
+ for (; _v[_v[prev].getNext()]._doc == doc; prev = _v[prev].getNext());
+ _v[last].setNext(_v[prev].getNext());
+ _v[prev].setNext(first);
+ found->second = last;
+ }
+ }
+ _v[_tail].setNext(size());
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp
new file mode 100644
index 00000000000..250cad79357
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.attribute.configconverter");
+#include "configconverter.h"
+
+using namespace vespa::config::search;
+using namespace search;
+
+
+namespace {
+
+using search::attribute::CollectionType;
+using search::attribute::BasicType;
+using vespalib::tensor::TensorType;
+
+typedef std::map<AttributesConfig::Attribute::Datatype, BasicType::Type> DataTypeMap;
+typedef std::map<AttributesConfig::Attribute::Collectiontype, CollectionType::Type> CollectionTypeMap;
+
+DataTypeMap
+getDataTypeMap()
+{
+ DataTypeMap map;
+ map[AttributesConfig::Attribute::STRING] = BasicType::STRING;
+ map[AttributesConfig::Attribute::UINT1] = BasicType::UINT1;
+ map[AttributesConfig::Attribute::UINT2] = BasicType::UINT2;
+ map[AttributesConfig::Attribute::UINT4] = BasicType::UINT4;
+ map[AttributesConfig::Attribute::INT8] = BasicType::INT8;
+ map[AttributesConfig::Attribute::INT16] = BasicType::INT16;
+ map[AttributesConfig::Attribute::INT32] = BasicType::INT32;
+ map[AttributesConfig::Attribute::INT64] = BasicType::INT64;
+ map[AttributesConfig::Attribute::FLOAT] = BasicType::FLOAT;
+ map[AttributesConfig::Attribute::DOUBLE] = BasicType::DOUBLE;
+ map[AttributesConfig::Attribute::PREDICATE] = BasicType::PREDICATE;
+ map[AttributesConfig::Attribute::TENSOR] = BasicType::TENSOR;
+ map[AttributesConfig::Attribute::NONE] = BasicType::NONE;
+ return map;
+}
+
+CollectionTypeMap
+getCollectionTypeMap()
+{
+ CollectionTypeMap map;
+ map[AttributesConfig::Attribute::SINGLE] = CollectionType::SINGLE;
+ map[AttributesConfig::Attribute::ARRAY] = CollectionType::ARRAY;
+ map[AttributesConfig::Attribute::WEIGHTEDSET] = CollectionType::WSET;
+ return map;
+}
+
+static DataTypeMap _dataTypeMap = getDataTypeMap();
+static CollectionTypeMap _collectionTypeMap = getCollectionTypeMap();
+
+}
+
+namespace search {
+namespace attribute {
+
+Config
+ConfigConverter::convert(const AttributesConfig::Attribute & cfg)
+{
+ BasicType bType(_dataTypeMap[cfg.datatype]);
+ CollectionType cType(_collectionTypeMap[cfg.collectiontype]);
+ cType.removeIfZero(cfg.removeifzero);
+ cType.createIfNonExistant(cfg.createifnonexistent);
+ Config retval(bType, cType);
+ retval.setFastSearch(cfg.fastsearch);
+ retval.setHuge(cfg.huge);
+ retval.setEnableBitVectors(cfg.enablebitvectors);
+ retval.setEnableOnlyBitVector(cfg.enableonlybitvector);
+ retval.setIsFilter(cfg.enableonlybitvector);
+ retval.setFastAccess(cfg.fastaccess);
+ retval.setArity(cfg.arity);
+ retval.setBounds(cfg.lowerbound, cfg.upperbound);
+ retval.setDensePostingListThreshold(cfg.densepostinglistthreshold);
+ if (retval.basicType().type() == BasicType::Type::TENSOR) {
+ retval.setTensorType(TensorType::fromSpec(cfg.tensortype));
+ }
+ return retval;
+}
+
+
+} // namespace attribute
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.h b/searchlib/src/vespa/searchlib/attribute/configconverter.h
new file mode 100644
index 00000000000..796c40c2eb1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/configconverter.h
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attributevector.h"
+#include <vespa/config-attributes.h>
+
+namespace search {
+namespace attribute {
+
+/**
+ * Class used to convert from attributes config to the config used by the AttributeVector implementation.
+ **/
+class ConfigConverter {
+public:
+ static Config convert(const vespa::config::search::AttributesConfig::Attribute & cfg);
+};
+
+} // namespace attribute
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/createarrayfastsearch.cpp b/searchlib/src/vespa/searchlib/attribute/createarrayfastsearch.cpp
new file mode 100644
index 00000000000..1312fd2b331
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/createarrayfastsearch.cpp
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+#include <vespa/searchlib/attribute/flagattribute.h>
+#include "defines.h"
+
+#include <vespa/log/log.h>
+LOG_SETUP(".createarrayfastsearch");
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/attribute/enumstore.hpp>
+#include <vespa/searchlib/attribute/enumattribute.hpp>
+#include <vespa/searchlib/attribute/multivalueattribute.hpp>
+#include <vespa/searchlib/attribute/multienumattribute.hpp>
+#include <vespa/searchlib/attribute/multinumericenumattribute.hpp>
+#include <vespa/searchlib/attribute/multinumericpostattribute.hpp>
+#include <vespa/searchlib/attribute/multistringpostattribute.hpp>
+
+namespace search {
+
+using attribute::BasicType;
+
+#define INTARRAY(T, I) MultiValueNumericPostingAttribute< ENUM_ATTRIBUTE(IntegerAttributeTemplate<T>), MULTIVALUE_ENUM_ARG(I) >
+#define FLOATARRAY(T, I) MultiValueNumericPostingAttribute< ENUM_ATTRIBUTE(FloatingPointAttributeTemplate<T>), MULTIVALUE_ENUM_ARG(I) >
+#define CREATEINTARRAY(T, H, fname, info) H ? static_cast<AttributeVector *>(new INTARRAY(T, multivalue::Index64)(fname, info)) : static_cast<AttributeVector *>(new INTARRAY(T, multivalue::Index32)(fname, info))
+#define CREATEFLOATARRAY(T, H, fname, info) H ? static_cast<AttributeVector *>(new FLOATARRAY(T, multivalue::Index64)(fname, info)) : static_cast<AttributeVector *>(new FLOATARRAY(T, multivalue::Index32)(fname, info))
+
+AttributeVector::SP
+AttributeFactory::createArrayFastSearch(const vespalib::string & baseFileName, const Config & info)
+{
+ assert(info.collectionType().type() == attribute::CollectionType::ARRAY);
+ assert(info.fastSearch());
+ AttributeVector::SP ret;
+ switch(info.basicType().type()) {
+ case BasicType::UINT1:
+ case BasicType::UINT2:
+ case BasicType::UINT4:
+ break;
+ case BasicType::INT8:
+ ret.reset(info.huge() ? static_cast<AttributeVector *>(new HugeFlagAttribute(baseFileName, info)) : static_cast<AttributeVector *>(new FlagAttribute(baseFileName, info)));
+ break;
+ case BasicType::INT16:
+ ret.reset(CREATEINTARRAY(int16_t, info.huge(), baseFileName, info));
+ break;
+ case BasicType::INT32:
+ ret.reset(CREATEINTARRAY(int32_t, info.huge(), baseFileName, info));
+ break;
+ case BasicType::INT64:
+ ret.reset(CREATEINTARRAY(int64_t, info.huge(), baseFileName, info));
+ break;
+ case BasicType::FLOAT:
+ ret.reset(CREATEFLOATARRAY(float, info.huge(), baseFileName, info));
+ break;
+ case BasicType::DOUBLE:
+ ret.reset(CREATEFLOATARRAY(double, info.huge(), baseFileName, info));
+ break;
+ case BasicType::STRING:
+ ret.reset(info.huge() ? static_cast<AttributeVector *>(new HugeArrayStringPostingAttribute(baseFileName, info)) : static_cast<AttributeVector *>(new ArrayStringPostingAttribute(baseFileName, info)));
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/createarraystd.cpp b/searchlib/src/vespa/searchlib/attribute/createarraystd.cpp
new file mode 100644
index 00000000000..9a1bb04cbac
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/createarraystd.cpp
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include "defines.h"
+
+#include <vespa/log/log.h>
+LOG_SETUP(".createarraystd");
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/attribute/multivalueattribute.hpp>
+#include <vespa/searchlib/attribute/multinumericattribute.hpp>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+
+namespace search {
+
+using attribute::BasicType;
+
+#define INTARRAY(T, I) MultiValueNumericAttribute< IntegerAttributeTemplate<T>, MULTIVALUE_ARG(T, I) >
+#define FLOATARRAY(T, I) MultiValueNumericAttribute< FloatingPointAttributeTemplate<T>, MULTIVALUE_ARG(T, I) >
+
+#define CREATEINTARRAY(T, H, fname, info) H ? static_cast<AttributeVector *>(new INTARRAY(T, multivalue::Index64)(fname, info)) : static_cast<AttributeVector *>(new INTARRAY(T, multivalue::Index32)(fname, info))
+#define CREATEFLOATARRAY(T, H, fname, info) H ? static_cast<AttributeVector *>(new FLOATARRAY(T, multivalue::Index64)(fname, info)) : static_cast<AttributeVector *>(new FLOATARRAY(T, multivalue::Index32)(fname, info))
+
+
+AttributeVector::SP
+AttributeFactory::createArrayStd(const vespalib::string & baseFileName, const Config & info)
+{
+ assert(info.collectionType().type() == attribute::CollectionType::ARRAY);
+ AttributeVector::SP ret;
+ switch(info.basicType().type()) {
+ case BasicType::UINT1:
+ case BasicType::UINT2:
+ case BasicType::UINT4:
+ break;
+ case BasicType::INT8:
+ ret.reset(CREATEINTARRAY(int8_t, info.huge(), baseFileName, info));
+ break;
+ case BasicType::INT16:
+ ret.reset(CREATEINTARRAY(int16_t, info.huge(), baseFileName, info));
+ break;
+ case BasicType::INT32:
+ ret.reset(CREATEINTARRAY(int32_t, info.huge(), baseFileName, info));
+ break;
+ case BasicType::INT64:
+ ret.reset(CREATEINTARRAY(int64_t, info.huge(), baseFileName, info));
+ break;
+ case BasicType::FLOAT:
+ ret.reset(CREATEFLOATARRAY(float, info.huge(), baseFileName, info));
+ break;
+ case BasicType::DOUBLE:
+ ret.reset(CREATEFLOATARRAY(double, info.huge(), baseFileName, info));
+ break;
+ case BasicType::STRING:
+ ret.reset(info.huge() ? static_cast<AttributeVector *>(new HugeArrayStringAttribute(baseFileName, info)) : static_cast<AttributeVector *>(new ArrayStringAttribute(baseFileName, info)));
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/createsetfastsearch.cpp b/searchlib/src/vespa/searchlib/attribute/createsetfastsearch.cpp
new file mode 100644
index 00000000000..0a15b252adc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/createsetfastsearch.cpp
@@ -0,0 +1,71 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+#include "defines.h"
+
+#include <vespa/log/log.h>
+LOG_SETUP(".createsetfastsearch");
+
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/attribute/enumstore.hpp>
+#include <vespa/searchlib/attribute/enumattribute.hpp>
+#include <vespa/searchlib/attribute/multivalueattribute.hpp>
+#include <vespa/searchlib/attribute/multienumattribute.hpp>
+#include <vespa/searchlib/attribute/multinumericenumattribute.hpp>
+#include <vespa/searchlib/attribute/multinumericpostattribute.hpp>
+#include <vespa/searchlib/attribute/multistringpostattribute.hpp>
+
+namespace search {
+
+using attribute::BasicType;
+
+#define INTSET(T, I) MultiValueNumericPostingAttribute< ENUM_ATTRIBUTE(IntegerAttributeTemplate<T>), WEIGHTED_MULTIVALUE_ENUM_ARG(I) >
+#define FLOATSET(T, I) MultiValueNumericPostingAttribute< ENUM_ATTRIBUTE(FloatingPointAttributeTemplate<T>), WEIGHTED_MULTIVALUE_ENUM_ARG(I) >
+
+#define CREATEINTSET(T, H, fname, info) H ? static_cast<AttributeVector *>(new INTSET(T, multivalue::Index64)(fname, info)) : static_cast<AttributeVector *>(new INTSET(T, multivalue::Index32)(fname, info))
+#define CREATEFLOATSET(T, H, fname, info) H ? static_cast<AttributeVector *>(new FLOATSET(T, multivalue::Index64)(fname, info)) : static_cast<AttributeVector *>(new FLOATSET(T, multivalue::Index32)(fname, info))
+
+
+AttributeVector::SP
+AttributeFactory::createSetFastSearch(const vespalib::string & baseFileName, const Config & info)
+{
+ assert(info.collectionType().type() == attribute::CollectionType::WSET);
+ assert(info.fastSearch());
+ AttributeVector::SP ret;
+ switch(info.basicType().type()) {
+ case BasicType::UINT1:
+ case BasicType::UINT2:
+ case BasicType::UINT4:
+ break;
+ case BasicType::INT8:
+ ret.reset(CREATEINTSET(int8_t, info.huge(), baseFileName, info));
+ break;
+ case BasicType::INT16:
+ ret.reset(CREATEINTSET(int16_t, info.huge(), baseFileName, info));
+ break;
+ case BasicType::INT32:
+ ret.reset(CREATEINTSET(int32_t, info.huge(), baseFileName, info));
+ break;
+ case BasicType::INT64:
+ ret.reset(CREATEINTSET(int64_t, info.huge(), baseFileName, info));
+ break;
+ case BasicType::FLOAT:
+ ret.reset(CREATEFLOATSET(float, info.huge(), baseFileName, info));
+ break;
+ case BasicType::DOUBLE:
+ ret.reset(CREATEFLOATSET(double, info.huge(), baseFileName, info));
+ break;
+ case BasicType::STRING:
+ ret.reset(info.huge() ? static_cast<AttributeVector *>(new HugeWeightedSetStringPostingAttribute(baseFileName, info)) : static_cast<AttributeVector *>(new WeightedSetStringPostingAttribute(baseFileName, info)));
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/createsetstd.cpp b/searchlib/src/vespa/searchlib/attribute/createsetstd.cpp
new file mode 100644
index 00000000000..b52593228b8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/createsetstd.cpp
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include "defines.h"
+
+#include <vespa/log/log.h>
+LOG_SETUP(".createsetstd");
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/attribute/multivalueattribute.hpp>
+#include <vespa/searchlib/attribute/multinumericattribute.hpp>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+
+namespace search {
+
+using attribute::BasicType;
+
+#define INTSET(T, I) MultiValueNumericAttribute< IntegerAttributeTemplate<T>, WEIGHTED_MULTIVALUE_ARG(T, I) >
+#define FLOATSET(T, I) MultiValueNumericAttribute< FloatingPointAttributeTemplate<T>, WEIGHTED_MULTIVALUE_ARG(T, I) >
+#define CREATEINTSET(T, H, fname, info) H ? static_cast<AttributeVector *>(new INTSET(T, multivalue::Index64)(fname, info)) : static_cast<AttributeVector *>(new INTSET(T, multivalue::Index32)(fname, info))
+#define CREATEFLOATSET(T, H, fname, info) H ? static_cast<AttributeVector *>(new FLOATSET(T, multivalue::Index64)(fname, info)) : static_cast<AttributeVector *>(new FLOATSET(T, multivalue::Index32)(fname, info))
+
+
+AttributeVector::SP
+AttributeFactory::createSetStd(const vespalib::string & baseFileName, const Config & info)
+{
+ assert(info.collectionType().type() == attribute::CollectionType::WSET);
+ AttributeVector::SP ret;
+ switch(info.basicType().type()) {
+ case BasicType::UINT1:
+ case BasicType::UINT2:
+ case BasicType::UINT4:
+ break;
+ case BasicType::INT8:
+ ret.reset(CREATEINTSET(int8_t, info.huge(), baseFileName, info));
+ break;
+ case BasicType::INT16:
+ ret.reset(CREATEINTSET(int16_t, info.huge(), baseFileName, info));
+ break;
+ case BasicType::INT32:
+ ret.reset(CREATEINTSET(int32_t, info.huge(), baseFileName, info));
+ break;
+ case BasicType::INT64:
+ ret.reset(CREATEINTSET(int64_t, info.huge(), baseFileName, info));
+ break;
+ case BasicType::FLOAT:
+ ret.reset(CREATEFLOATSET(float, info.huge(), baseFileName, info));
+ break;
+ case BasicType::DOUBLE:
+ ret.reset(CREATEFLOATSET(double, info.huge(), baseFileName, info));
+ break;
+ case BasicType::STRING:
+ ret.reset(info.huge() ? static_cast<AttributeVector *>(new HugeWeightedSetStringAttribute(baseFileName, info)) : static_cast<AttributeVector *>(new WeightedSetStringAttribute(baseFileName, info)));
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/createsinglefastsearch.cpp b/searchlib/src/vespa/searchlib/attribute/createsinglefastsearch.cpp
new file mode 100644
index 00000000000..02890eb8789
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/createsinglefastsearch.cpp
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+#include "defines.h"
+
+#include <vespa/log/log.h>
+LOG_SETUP(".createsinglefastsearch");
+
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/singlestringpostattribute.hpp>
+#include <vespa/searchlib/attribute/singlenumericenumattribute.hpp>
+#include <vespa/searchlib/attribute/singlenumericpostattribute.hpp>
+#include <vespa/searchlib/attribute/enumstore.hpp>
+#include <vespa/searchlib/attribute/enumattribute.hpp>
+#include <vespa/searchlib/attribute/singleenumattribute.hpp>
+
+#define INTPOSTING(T) SingleValueNumericPostingAttribute< ENUM_ATTRIBUTE(IntegerAttributeTemplate<T>) >
+#define FLOATPOSTING(T) SingleValueNumericPostingAttribute< ENUM_ATTRIBUTE(FloatingPointAttributeTemplate<T>) >
+
+namespace search {
+
+using attribute::BasicType;
+
+AttributeVector::SP
+AttributeFactory::createSingleFastSearch(const vespalib::string & baseFileName, const Config & info)
+{
+ assert(info.collectionType().type() == attribute::CollectionType::SINGLE);
+ assert(info.fastSearch());
+ AttributeVector::SP ret;
+ switch(info.basicType().type()) {
+ case BasicType::UINT1:
+ case BasicType::UINT2:
+ case BasicType::UINT4:
+ break;
+ case BasicType::INT8:
+ ret.reset(new INTPOSTING(int8_t)(baseFileName, info));
+ break;
+ case BasicType::INT16:
+ ret.reset(new INTPOSTING(int16_t)(baseFileName, info));
+ break;
+ case BasicType::INT32:
+ ret.reset(new INTPOSTING(int32_t)(baseFileName, info));
+ break;
+ case BasicType::INT64:
+ ret.reset(new INTPOSTING(int64_t)(baseFileName, info));
+ break;
+ case BasicType::FLOAT:
+ ret.reset(new FLOATPOSTING(float)(baseFileName, info));
+ break;
+ case BasicType::DOUBLE:
+ ret.reset(new FLOATPOSTING(double)(baseFileName, info));
+ break;
+ case BasicType::STRING:
+ ret.reset(new SingleValueStringPostingAttribute(baseFileName, info));
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp b/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp
new file mode 100644
index 00000000000..3d4ebaabc92
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp
@@ -0,0 +1,68 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".createsinglestd");
+
+#include "predicate_attribute.h"
+#include "singlesmallnumericattribute.h"
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/attribute/singlenumericattribute.hpp>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/tensorattribute.h>
+
+namespace search {
+
+using attribute::BasicType;
+
+AttributeVector::SP
+AttributeFactory::createSingleStd(const vespalib::string & baseFileName, const Config & info)
+{
+ assert(info.collectionType().type() == attribute::CollectionType::SINGLE);
+ AttributeVector::SP ret;
+ switch(info.basicType().type()) {
+ case BasicType::UINT1:
+ ret.reset(new SingleValueBitNumericAttribute(baseFileName));
+ break;
+ case BasicType::UINT2:
+ ret.reset(new SingleValueSemiNibbleNumericAttribute(baseFileName));
+ break;
+ case BasicType::UINT4:
+ ret.reset(new SingleValueNibbleNumericAttribute(baseFileName));
+ break;
+ case BasicType::INT8:
+ ret.reset(new SingleValueNumericAttribute<IntegerAttributeTemplate<int8_t> >(baseFileName, info));
+ break;
+ case BasicType::INT16:
+ // XXX: Unneeded since we don't have short document fields in java.
+ ret.reset(new SingleValueNumericAttribute<IntegerAttributeTemplate<int16_t> >(baseFileName, info));
+ break;
+ case BasicType::INT32:
+ ret.reset(new SingleValueNumericAttribute<IntegerAttributeTemplate<int32_t> >(baseFileName, info));
+ break;
+ case BasicType::INT64:
+ ret.reset(new SingleValueNumericAttribute<IntegerAttributeTemplate<int64_t> >(baseFileName, info));
+ break;
+ case BasicType::FLOAT:
+ ret.reset(new SingleValueNumericAttribute<FloatingPointAttributeTemplate<float> >(baseFileName, info));
+ break;
+ case BasicType::DOUBLE:
+ ret.reset(new SingleValueNumericAttribute<FloatingPointAttributeTemplate<double> >(baseFileName, info));
+ break;
+ case BasicType::STRING:
+ ret.reset(new SingleValueStringAttribute(baseFileName, info));
+ break;
+ case BasicType::PREDICATE:
+ ret.reset(new PredicateAttribute(baseFileName, info));
+ break;
+ case BasicType::TENSOR:
+ ret.reset(new attribute::TensorAttribute(baseFileName, info));
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/defines.cpp b/searchlib/src/vespa/searchlib/attribute/defines.cpp
new file mode 100644
index 00000000000..cbe69920ac3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/defines.cpp
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "defines.h"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.defines");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/defines.h b/searchlib/src/vespa/searchlib/attribute/defines.h
new file mode 100644
index 00000000000..355a841c0b9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/defines.h
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#define ENUM_ATTRIBUTE(B) EnumAttribute<B>
+
+#define MULTIVALUE_ARG(T, I) multivalue::MVMTemplateArg<multivalue::Value<T>, I>
+#define MULTIVALUE_ENUM_ARG(I) multivalue::MVMTemplateArg<multivalue::Value<EnumStoreBase::Index>, I>
+#define WEIGHTED_MULTIVALUE_ARG(T, I) multivalue::MVMTemplateArg<multivalue::WeightedValue<T>, I>
+#define WEIGHTED_MULTIVALUE_ENUM_ARG(I) multivalue::MVMTemplateArg<multivalue::WeightedValue<EnumStoreBase::Index>, I>
+
diff --git a/searchlib/src/vespa/searchlib/attribute/diversity.h b/searchlib/src/vespa/searchlib/attribute/diversity.h
new file mode 100644
index 00000000000..68440d05cea
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/diversity.h
@@ -0,0 +1,226 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/hash_map.h>
+#include "singleenumattribute.h"
+#include "singlenumericattribute.h"
+
+/**
+ * This file contains low-level code used to implement diversified
+ * limited attribute range searches. Terms on the form [;;100;foo;3]
+ * are used to specify unbound range searches in an attribute that
+ * produces a limited number of results while also ensuring
+ * diversified results based on a secondary attribute.
+ **/
+
+namespace search {
+namespace attribute {
+namespace diversity {
+
+template <typename ITR>
+class ForwardRange
+{
+private:
+ ITR _lower;
+ ITR _upper;
+public:
+ class Next {
+ private:
+ ITR &_lower;
+ public:
+ Next(const Next &) = delete;
+ explicit Next(ForwardRange &range) : _lower(range._lower) {}
+ const ITR &get() const { return _lower; }
+ ~Next() { ++_lower; }
+ };
+ ForwardRange(const ITR &lower, const ITR &upper) : _lower(lower), _upper(upper) {}
+ bool has_next() const { return _lower != _upper; }
+};
+
+template <typename ITR>
+class ReverseRange
+{
+private:
+ ITR _lower;
+ ITR _upper;
+public:
+ class Next {
+ private:
+ ITR &_upper;
+ public:
+ Next(const Next &) = delete;
+ explicit Next(ReverseRange &range) : _upper(range._upper) { --_upper; }
+ const ITR &get() const { return _upper; }
+ };
+ ReverseRange(const ITR &lower, const ITR &upper) : _lower(lower), _upper(upper) {}
+ bool has_next() const { return _lower != _upper; }
+};
+
+template <typename T>
+struct FetchNumberFast {
+ const T * const attr;
+ typedef typename T::LoadedValueType ValueType;
+ FetchNumberFast(const IAttributeVector &attr_in) : attr(dynamic_cast<const T *>(&attr_in)) {}
+ ValueType get(uint32_t docid) const { return attr->getFast(docid); }
+ bool valid() const { return (attr != nullptr); }
+};
+
+struct FetchEnumFast {
+ const SingleValueEnumAttributeBase * const attr;
+ typedef uint32_t ValueType;
+ FetchEnumFast(const IAttributeVector &attr_in) : attr(dynamic_cast<const SingleValueEnumAttributeBase *>(&attr_in)) {}
+ ValueType get(uint32_t docid) const { return attr->getE(docid); }
+ bool valid() const { return (attr != nullptr); }
+};
+
+struct FetchEnum {
+ const IAttributeVector &attr;
+ typedef uint32_t ValueType;
+ FetchEnum(const IAttributeVector &attr_in) : attr(attr_in) {}
+ ValueType get(uint32_t docid) const { return attr.getEnum(docid); }
+};
+
+struct FetchInteger {
+ const IAttributeVector &attr;
+ typedef int64_t ValueType;
+ FetchInteger(const IAttributeVector &attr_in) : attr(attr_in) {}
+ ValueType get(uint32_t docid) const { return attr.getInt(docid); }
+};
+
+struct FetchFloat {
+ const IAttributeVector &attr;
+ typedef double ValueType;
+ FetchFloat(const IAttributeVector &attr_in) : attr(attr_in) {}
+ ValueType get(uint32_t docid) const { return attr.getFloat(docid); }
+};
+
+template <typename Fetcher, typename Result>
+class DiversityFilter {
+private:
+ size_t _total_count;
+ size_t _max_total;
+ const Fetcher &_diversity;
+ size_t _max_per_group;
+ size_t _cutoff_max_groups;
+ bool _cutoff_strict;
+
+ typedef vespalib::hash_map<typename Fetcher::ValueType, uint32_t> Diversity;
+ Diversity _seen;
+ Result &_result;
+public:
+ DiversityFilter(const Fetcher &diversity, size_t max_per_group,
+ size_t cutoff_max_groups, bool cutoff_strict,
+ Result &result, size_t max_total)
+ : _total_count(0), _max_total(max_total), _diversity(diversity), _max_per_group(max_per_group),
+ _cutoff_max_groups(cutoff_max_groups), _cutoff_strict(cutoff_strict), _seen(std::min(cutoff_max_groups, 10000ul)*3), _result(result) {}
+ template <typename Item>
+ void push_back(Item item) {
+ if (_total_count < _max_total) {
+ if ((_seen.size() < _cutoff_max_groups) || _cutoff_strict) {
+ typename Fetcher::ValueType group = _diversity.get(item._key);
+ if (_seen.size() < _cutoff_max_groups) {
+ conditional_add(_seen[group], item);
+ } else {
+ auto found = _seen.find(group);
+ if (found == _seen.end()) {
+ add(item);
+ } else {
+ conditional_add(found->second, item);
+ }
+ }
+ } else if ( !_cutoff_strict) {
+ add(item);
+ }
+ }
+ }
+private:
+ template <typename Item>
+ void add(Item item) {
+ ++_total_count;
+ _result.push_back(item);
+ }
+ template <typename Item>
+ void conditional_add(uint32_t & group_count, Item item) {
+ if (group_count < _max_per_group) {
+ ++group_count;
+ add(item);
+ }
+ }
+};
+
+template <typename DictRange, typename PostingStore, typename Fetcher, typename Result>
+void diversify_3(const DictRange &range_in, const PostingStore &posting, size_t wanted_hits,
+ const Fetcher &diversity, size_t max_per_group,
+ size_t cutoff_max_groups, bool cutoff_strict,
+ Result &result, std::vector<size_t> &fragments)
+{
+ DictRange range(range_in);
+ using DataType = typename PostingStore::DataType;
+ using KeyDataType = typename PostingStore::KeyDataType;
+ DiversityFilter<Fetcher, Result> filter(diversity, max_per_group, cutoff_max_groups, cutoff_strict, result, wanted_hits);
+ while (range.has_next() && (result.size() < wanted_hits)) {
+ typename DictRange::Next dict_entry(range);
+ posting.foreach_frozen(dict_entry.get().getData(),
+ [&](uint32_t key, const DataType &data)
+ { filter.push_back(KeyDataType(key, data)); });
+ if (fragments.back() < result.size()) {
+ fragments.push_back(result.size());
+ }
+ }
+}
+
+template <typename DictRange, typename PostingStore, typename Result>
+void diversify_2(const DictRange &range_in, const PostingStore &posting, size_t wanted_hits,
+ const IAttributeVector &diversity_attr, size_t max_per_group,
+ size_t cutoff_max_groups, bool cutoff_strict,
+ Result &result, std::vector<size_t> &fragments)
+{
+ if (diversity_attr.hasEnum()) { // must handle enum first
+ FetchEnumFast fastEnum(diversity_attr);
+ if (fastEnum.valid()) {
+ diversify_3(range_in, posting, wanted_hits, fastEnum, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ } else {
+ diversify_3(range_in, posting, wanted_hits, FetchEnum(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ }
+ } else if (diversity_attr.isIntegerType()) {
+ FetchNumberFast<SingleValueNumericAttribute<IntegerAttributeTemplate<int32_t> > > fastInt32(diversity_attr);
+ FetchNumberFast<SingleValueNumericAttribute<IntegerAttributeTemplate<int64_t> > > fastInt64(diversity_attr);
+ if (fastInt32.valid()) {
+ diversify_3(range_in, posting, wanted_hits, fastInt32, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ } else if (fastInt64.valid()) {
+ diversify_3(range_in, posting, wanted_hits, fastInt64, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ } else {
+ diversify_3(range_in, posting, wanted_hits, FetchInteger(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ }
+ } else if (diversity_attr.isFloatingPointType()) {
+ FetchNumberFast<SingleValueNumericAttribute<FloatingPointAttributeTemplate<float> > > fastFloat(diversity_attr);
+ FetchNumberFast<SingleValueNumericAttribute<FloatingPointAttributeTemplate<double> > > fastDouble(diversity_attr);
+ if (fastFloat.valid()) {
+ diversify_3(range_in, posting, wanted_hits, fastFloat, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ } else if (fastDouble.valid()) {
+ diversify_3(range_in, posting, wanted_hits, fastDouble, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ } else {
+ diversify_3(range_in, posting, wanted_hits, FetchFloat(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ }
+ }
+}
+
+template <typename DictItr, typename PostingStore, typename Result>
+void diversify(bool forward, const DictItr &lower, const DictItr &upper, const PostingStore &posting, size_t wanted_hits,
+ const IAttributeVector &diversity_attr, size_t max_per_group,
+ size_t cutoff_max_groups, bool cutoff_strict,
+ Result &array, std::vector<size_t> &fragments)
+{
+ if (forward) {
+ diversify_2(ForwardRange<DictItr>(lower, upper), posting, wanted_hits,
+ diversity_attr, max_per_group, cutoff_max_groups, cutoff_strict, array, fragments);
+ } else {
+ diversify_2(ReverseRange<DictItr>(lower, upper), posting, wanted_hits,
+ diversity_attr, max_per_group, cutoff_max_groups, cutoff_strict, array, fragments);
+ }
+}
+
+} // namespace search::attribute::diversity
+} // namespace search::attribute
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/dociditerator.cpp b/searchlib/src/vespa/searchlib/attribute/dociditerator.cpp
new file mode 100644
index 00000000000..f7429070f34
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/dociditerator.cpp
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "dociditerator.h"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.dociditerator");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/dociditerator.h b/searchlib/src/vespa/searchlib/attribute/dociditerator.h
new file mode 100644
index 00000000000..c1d33d5769e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/dociditerator.h
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "postinglisttraits.h"
+
+namespace search
+{
+
+/**
+ * Inner attribute iterator used for temporary posting lists (range
+ * searches).
+ */
+
+template <typename P>
+class DocIdIterator
+{
+public:
+ DocIdIterator() : _cur(nullptr), _end(nullptr), _begin(nullptr) { }
+
+ const P * operator->() const { return _cur; }
+
+ DocIdIterator & operator++() {
+ ++_cur;
+ return *this;
+ }
+
+ bool valid() const { return _cur != _end; }
+
+ void linearSeek(uint32_t docId) {
+ while (_cur != _end && _cur->_key < docId) {
+ ++_cur;
+ }
+ }
+
+ uint32_t getKey(void) const { return _cur->_key; }
+ inline int32_t getData(void) const { return _cur->getData(); }
+
+ void set(const P *begin, const P *end) {
+ _cur = begin;
+ _end = end;
+ _begin = begin;
+ }
+
+ void lower_bound(uint32_t docId) {
+ if (valid() && (docId > getKey())) {
+ linearSeek(docId);
+ } else {
+ _cur = _begin;
+ linearSeek(docId);
+ }
+ }
+
+ void swap(DocIdIterator &rhs) {
+ std::swap(_cur, rhs._cur);
+ std::swap(_end, rhs._end);
+ std::swap(_begin, rhs._begin);
+ }
+protected:
+ const P *_cur;
+ const P *_end;
+ const P *_begin;
+};
+
+template <>
+inline int32_t
+DocIdIterator<AttributePosting>::getData(void) const
+{
+ return 1; // default weight 1 for single value attributes
+}
+
+
+/**
+ * Inner attribute iterator used for short posting lists (8 or less
+ * documents).
+ */
+
+template <typename P>
+class DocIdMinMaxIterator : public DocIdIterator<P>
+{
+public:
+ DocIdMinMaxIterator(void)
+ : DocIdIterator<P>()
+ { }
+
+ inline btree::MinMaxAggregated
+ getAggregated(void) const {
+ return btree::MinMaxAggregated(1, 1);
+ }
+};
+
+
+template<>
+inline btree::MinMaxAggregated
+DocIdMinMaxIterator<AttributeWeightPosting>::getAggregated(void) const
+{
+ btree::MinMaxAggregated a;
+ for (const AttributeWeightPosting *cur = _cur, *end = _end; cur != end; ++cur) {
+ a.add(cur->getData());
+ }
+ return a;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/enumattribute.cpp
new file mode 100644
index 00000000000..396c50eba65
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.cpp
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "enumattribute.h"
+#include "enumattribute.hpp"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.enumattribute");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.h b/searchlib/src/vespa/searchlib/attribute/enumattribute.h
new file mode 100644
index 00000000000..75d9e44b43f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.h
@@ -0,0 +1,98 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/enumstore.h>
+#include "attributevector.h"
+#include "loadedenumvalue.h"
+#include <set>
+
+namespace search {
+
+namespace attribute
+{
+
+template <typename, typename, typename > class PostingSearchContext;
+
+}
+
+template <typename B>
+class EnumAttribute : public B
+{
+ template <typename, typename, typename>
+ friend class attribute::PostingSearchContext; // getEnumStore()
+protected:
+ typedef B BaseClass;
+ typedef typename B::DocId DocId;
+ typedef typename B::EnumHandle EnumHandle;
+ typedef typename B::EnumEntryType EnumEntryType; // Template argument for enum store
+ typedef typename B::EnumEntryType::Type EnumType; // Type stored in enum store (integer, float, string)
+ typedef typename B::Change Change;
+ typedef typename B::Change::DataType ChangeDataType;
+ typedef typename B::ChangeVector ChangeVector;
+ typedef typename B::ChangeVector::const_iterator ChangeVectorIterator;
+ typedef typename B::EnumModifier EnumModifier;
+ typedef typename B::ValueModifier ValueModifier;
+public:
+ typedef typename B::LoadedVector LoadedVector;
+ typedef typename B::EnumIndexVector EnumIndexVector;
+ typedef typename B::EnumVector EnumVector;
+ typedef typename B::LoadedValueType LoadedValueType;
+protected:
+ typedef typename B::generation_t generation_t;
+ typedef std::set<ChangeDataType> UniqueSet;
+ typedef attribute::LoadedEnumAttributeVector
+ LoadedEnumAttributeVector;
+ using B::getGenerationHolder;
+ using B::getStatus;
+
+public:
+ typedef EnumStoreT<EnumEntryType> EnumStore;
+protected:
+ typedef EnumStoreBase::Index EnumIndex;
+
+ EnumStore _enumStore;
+
+ EnumStore & getEnumStore() { return _enumStore; }
+ const EnumStore & getEnumStore() const { return _enumStore; }
+
+ virtual const EnumStoreBase * getEnumStoreBase() const { return &_enumStore; }
+ virtual void getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const { _enumStore.getEnumValue(v, e, sz); }
+ virtual EnumType getFromEnum(EnumHandle e) const { return _enumStore.getValue(e); }
+
+ virtual void fillPostings(LoadedVector & loaded) { (void) loaded; }
+ virtual void fillEnum(LoadedVector & loaded);
+
+ virtual void
+ fillEnum0(const void *src,
+ size_t srcLen,
+ EnumIndexVector &eidxs);
+
+ virtual void
+ fixupEnumRefCounts(const EnumVector &enumHist);
+
+ virtual uint64_t
+ getUniqueValueCount(void) const;
+
+ static EnumType getDefaultEnumTypeValue() { return B::defaultValue(); }
+
+ /*
+ * Iterate through the change vector and find new unique values.
+ * Perform compaction if necessary and insert the new unique values into the EnumStore.
+ */
+ void insertNewUniqueValues(EnumStoreBase::IndexVector & newIndexes);
+ virtual void considerAttributeChange(const Change & c, UniqueSet & newUniques) = 0;
+ virtual void reEnumerate() = 0;
+ virtual bool hasEnum2Value() const { return true; }
+ virtual AddressSpace getEnumStoreAddressSpaceUsage() const override;
+
+public:
+ EnumAttribute(const vespalib::string & baseFileName,
+ const AttributeVector::Config & cfg);
+
+ virtual ~EnumAttribute();
+ virtual bool findEnum(EnumType v, EnumHandle & e) const { return _enumStore.findEnum(v, e); }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp
new file mode 100644
index 00000000000..7159dc42860
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp
@@ -0,0 +1,147 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/enumattribute.h>
+#include <vespa/searchlib/attribute/enumstore.hpp>
+
+namespace search {
+
+template <typename B>
+EnumAttribute<B>::
+EnumAttribute(const vespalib::string &baseFileName,
+ const AttributeVector::Config &cfg)
+ : B(baseFileName, cfg),
+ _enumStore(0, cfg.fastSearch())
+{
+ this->setEnum(true);
+}
+
+template <typename B>
+EnumAttribute<B>::~EnumAttribute()
+{
+}
+
+template <typename B>
+void EnumAttribute<B>::fillEnum(LoadedVector & loaded)
+{
+ typename EnumStore::Builder builder;
+ if (!loaded.empty()) {
+ typename LoadedVector::Type v = loaded.read();
+ LoadedValueType prev = v.getValue();
+ uint32_t prevRefCount(0);
+ EnumIndex index = builder.insert(v.getValue(), v._pidx.ref());
+ for(size_t i(0), m(loaded.size()); i < m; ++i, loaded.next()) {
+ v = loaded.read();
+ if (EnumStore::ComparatorType::compare(prev, v.getValue()) != 0) {
+ builder.updateRefCount(prevRefCount);
+ index = builder.insert(v.getValue(), v._pidx.ref());
+ prev = v.getValue();
+ prevRefCount = 1;
+ } else {
+ prevRefCount++;
+ }
+ v.setEidx(index);
+ loaded.write(v);
+ }
+ builder.updateRefCount(prevRefCount);
+ }
+ _enumStore.reset(builder);
+ this->setEnumMax(_enumStore.getLastEnum());
+}
+
+
+template <typename B>
+void
+EnumAttribute<B>::fillEnum0(const void *src,
+ size_t srcLen,
+ EnumIndexVector &eidxs)
+{
+ ssize_t sz = _enumStore.deserialize(src, srcLen, eidxs);
+ assert(static_cast<size_t>(sz) == srcLen);
+ (void) sz;
+ this->setEnumMax(_enumStore.getLastEnum());
+}
+
+
+template <typename B>
+void
+EnumAttribute<B>::fixupEnumRefCounts(
+ const EnumVector &enumHist)
+{
+ _enumStore.fixupRefCounts(enumHist);
+}
+
+
+template <typename B>
+uint64_t
+EnumAttribute<B>::getUniqueValueCount(void) const
+{
+ return _enumStore.getNumUniques();
+}
+
+
+
+template <typename B>
+void
+EnumAttribute<B>::insertNewUniqueValues(EnumStoreBase::IndexVector & newIndexes)
+{
+ UniqueSet newUniques;
+
+ // find new unique strings
+ for (const auto & data : this->_changes) {
+ considerAttributeChange(data, newUniques);
+ }
+
+ uint64_t extraBytesNeeded = 0;
+ for (const auto & data : newUniques) {
+ extraBytesNeeded += _enumStore.getEntrySize(data.raw());
+ }
+
+ do {
+ // perform compaction on EnumStore if necessary
+ if (extraBytesNeeded > this->_enumStore.getRemaining() ||
+ this->_enumStore.getPendingCompact()) {
+ this->_enumStore.clearPendingCompact();
+ this->removeAllOldGenerations();
+ if (!this->_enumStore.performCompaction(extraBytesNeeded)) {
+ // fallback to resize strategy
+ this->_enumStore.fallbackResize(extraBytesNeeded);
+ if (extraBytesNeeded > this->_enumStore.getRemaining()) {
+ fprintf(stderr, "Cannot fallbackResize enumStore\n");
+ abort();
+ }
+ break; // fallback resize performed instead of compaction.
+ }
+
+ // update underlying structure with new EnumIndex values.
+ reEnumerate();
+ // Clear scratch enumeration
+ for (auto & data : this->_changes) {
+ data._enumScratchPad = ChangeBase::UNSET_ENUM;
+ }
+
+ // clear mapping from old enum value to new index
+ _enumStore.clearIndexMap();
+ }
+ } while (0);
+
+ // insert new unique values in EnumStore
+ for (const auto & data : newUniques) {
+ EnumIndex idx;
+ _enumStore.addEnum(data.raw(), idx);
+ newIndexes.push_back(idx);
+ }
+}
+
+
+template <typename B>
+AddressSpace
+EnumAttribute<B>::getEnumStoreAddressSpaceUsage() const
+{
+ return _enumStore.getAddressSpaceUsage();
+}
+
+} // namespace search
+
+
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp
new file mode 100644
index 00000000000..37c66213134
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "enumattributesaver.h"
+#include "iattributesavetarget.h"
+#include <vespa/searchlib/util/bufferwriter.h>
+
+namespace search {
+
+EnumAttributeSaver::
+EnumAttributeSaver(const EnumStoreBase &enumStore, bool disableReEnumerate)
+ : _enumStore(enumStore),
+ _disableReEnumerate(disableReEnumerate),
+ _rootRef()
+{
+ if (_disableReEnumerate) {
+ // Prevent enum store from re-enumerating enum values during compaction
+ _enumStore.disableReEnumerate();
+ }
+ const EnumStoreDictBase &enumDict = enumStore.getEnumStoreDict();
+ _rootRef = enumDict.getFrozenRootRef();
+}
+
+EnumAttributeSaver::~EnumAttributeSaver()
+{
+ enableReEnumerate();
+}
+
+void
+EnumAttributeSaver::enableReEnumerate()
+{
+ if (_disableReEnumerate) {
+ // compaction of enumstore can now re-enumerate enum values
+ _enumStore.enableReEnumerate();
+ _disableReEnumerate = false;
+ }
+}
+
+void
+EnumAttributeSaver::writeUdat(IAttributeSaveTarget &saveTarget)
+{
+ if (saveTarget.getEnumerated()) {
+ std::unique_ptr<BufferWriter>
+ udatWriter(saveTarget.udatWriter().allocBufferWriter());
+ const EnumStoreDictBase &enumDict = _enumStore.getEnumStoreDict();
+ enumDict.writeAllValues(*udatWriter, _rootRef);
+ udatWriter->flush();
+ }
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.h b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.h
new file mode 100644
index 00000000000..c30a416c9fe
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.h
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "enumstorebase.h"
+
+namespace search {
+
+class IAttributeSaveTarget;
+
+/*
+ * Helper class for saving an enumerated multivalue attribute.
+ *
+ * It handles writing to the udat file.
+ */
+class EnumAttributeSaver
+{
+ const EnumStoreBase &_enumStore;
+ bool _disableReEnumerate;
+ btree::BTreeNode::Ref _rootRef;
+
+public:
+ EnumAttributeSaver(const EnumStoreBase &enumStore, bool disableReEnumerate);
+
+ ~EnumAttributeSaver();
+
+ void enableReEnumerate();
+
+ void writeUdat(IAttributeSaveTarget &saveTarget);
+
+ const EnumStoreBase &getEnumStore() const { return _enumStore; }
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp b/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp
new file mode 100644
index 00000000000..fde5666371b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp
@@ -0,0 +1,83 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "enumcomparator.h"
+#include <vespa/searchlib/util/foldedstringcompare.h>
+#include "enumstore.hpp"
+
+namespace search {
+
+namespace {
+
+FoldedStringCompare _strCmp;
+
+}
+
+template <>
+int
+EnumStoreComparatorT<NumericEntryType<float> >::compare(EntryValue lhs, EntryValue rhs)
+{
+ return FloatingPointCompareHelper::compare(lhs, rhs);
+}
+
+template <>
+int
+EnumStoreComparatorT<NumericEntryType<double> >::compare(EntryValue lhs, EntryValue rhs)
+{
+ return FloatingPointCompareHelper::compare(lhs, rhs);
+}
+
+template <>
+EnumStoreFoldedComparatorT<StringEntryType>::
+EnumStoreFoldedComparatorT(const EnumStoreType & enumStore,
+ EntryValue value, bool prefix)
+ : ParentType(enumStore, value),
+ _prefix(prefix),
+ _prefixLen(0u)
+{
+ if (getUsePrefix())
+ _prefixLen = _strCmp.size(value);
+}
+
+template <>
+int
+EnumStoreComparatorT<StringEntryType>::compare(EntryValue lhs, EntryValue rhs)
+{
+ return _strCmp.compare(lhs, rhs);
+}
+
+template <>
+int
+EnumStoreFoldedComparatorT<StringEntryType>::compareFolded(EntryValue lhs,
+ EntryValue rhs)
+{
+ return _strCmp.compareFolded(lhs, rhs);
+}
+
+template <>
+int
+EnumStoreFoldedComparatorT<StringEntryType>::
+compareFoldedPrefix(EntryValue lhs,
+ EntryValue rhs,
+ size_t prefixLen)
+{
+ return _strCmp.compareFoldedPrefix(lhs, rhs, prefixLen);
+}
+
+template class EnumStoreComparatorT<StringEntryType>;
+template class EnumStoreComparatorT<NumericEntryType<int8_t> >;
+template class EnumStoreComparatorT<NumericEntryType<int16_t> >;
+template class EnumStoreComparatorT<NumericEntryType<int32_t> >;
+template class EnumStoreComparatorT<NumericEntryType<int64_t> >;
+template class EnumStoreComparatorT<NumericEntryType<float> >;
+template class EnumStoreComparatorT<NumericEntryType<double> >;
+template class EnumStoreFoldedComparatorT<StringEntryType>;
+template class EnumStoreFoldedComparatorT<NumericEntryType<int8_t> >;
+template class EnumStoreFoldedComparatorT<NumericEntryType<int16_t> >;
+template class EnumStoreFoldedComparatorT<NumericEntryType<int32_t> >;
+template class EnumStoreFoldedComparatorT<NumericEntryType<int64_t> >;
+template class EnumStoreFoldedComparatorT<NumericEntryType<float> >;
+template class EnumStoreFoldedComparatorT<NumericEntryType<double> >;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/enumcomparator.h b/searchlib/src/vespa/searchlib/attribute/enumcomparator.h
new file mode 100644
index 00000000000..1a6dcb4c4e1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enumcomparator.h
@@ -0,0 +1,195 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "enumstore.h"
+
+namespace search {
+
+/**
+ * Template comparator class for the various entry types.
+ **/
+template <typename EntryType>
+class EnumStoreComparatorT : public EnumStoreComparator {
+public:
+ typedef EnumStoreT<EntryType> EnumStoreType;
+protected:
+ typedef typename EntryType::Type EntryValue;
+ const EnumStoreType & _enumStore;
+ EntryValue _value;
+ EntryValue getValue(const EnumIndex & idx) const {
+ if (idx.valid()) {
+ return _enumStore.getValue(idx);
+ }
+ return _value;
+ }
+public:
+ /**
+ * Creates a comparator using the given enum store.
+ **/
+ EnumStoreComparatorT(const EnumStoreType & enumStore);
+ /**
+ * Creates a comparator using the given enum store and that uses the
+ * given value during compare if the enum index is invalid.
+ **/
+ EnumStoreComparatorT(const EnumStoreType & enumStore,
+ EntryValue value);
+
+ static int compare(EntryValue lhs, EntryValue rhs) {
+ if (lhs < rhs) {
+ return -1;
+ } else if (lhs == rhs) {
+ return 0;
+ }
+ return 1;
+ }
+ virtual bool operator() (const EnumIndex & lhs, const EnumIndex & rhs) const {
+ return compare(getValue(lhs), getValue(rhs)) < 0;
+ }
+};
+
+
+/**
+ * Template comparator class for the various entry types that uses folded compare.
+ **/
+template <typename EntryType>
+class EnumStoreFoldedComparatorT : public EnumStoreComparatorT<EntryType> {
+private:
+ typedef EnumStoreComparatorT<EntryType> ParentType;
+ typedef typename ParentType::EnumStoreType EnumStoreType;
+ typedef typename ParentType::EnumIndex EnumIndex;
+ typedef typename ParentType::EntryValue EntryValue;
+ using ParentType::getValue;
+ bool _prefix;
+ size_t _prefixLen;
+public:
+ /**
+ * Creates a comparator using the given enum store.
+ * @param prefix whether we should perform prefix compare.
+ **/
+ EnumStoreFoldedComparatorT(const EnumStoreType & enumStore, bool prefix = false);
+ /**
+ * Creates a comparator using the given enum store and that uses the
+ * given value during compare if the enum index is invalid.
+ * @param prefix whether we should perform prefix compare.
+ **/
+ EnumStoreFoldedComparatorT(const EnumStoreType & enumStore,
+ EntryValue value, bool prefix = false);
+
+ inline bool
+ getUsePrefix(void) const
+ {
+ return false;
+ }
+
+ static int
+ compareFolded(EntryValue lhs, EntryValue rhs)
+ {
+ return ParentType::compare(lhs, rhs);
+ }
+
+ static int
+ compareFoldedPrefix(EntryValue lhs, EntryValue rhs, size_t prefixLen)
+ {
+ (void) prefixLen;
+ return ParentType::compare(lhs, rhs);
+ }
+
+ virtual bool
+ operator() (const EnumIndex & lhs, const EnumIndex & rhs) const
+ {
+ if (getUsePrefix())
+ return compareFoldedPrefix(getValue(lhs),
+ getValue(rhs), _prefixLen) < 0;
+ return compareFolded(getValue(lhs), getValue(rhs)) < 0;
+ }
+};
+
+
+template <typename EntryType>
+EnumStoreComparatorT<EntryType>::EnumStoreComparatorT(const EnumStoreType & enumStore) :
+ _enumStore(enumStore),
+ _value()
+{
+}
+
+template <typename EntryType>
+EnumStoreComparatorT<EntryType>::EnumStoreComparatorT(const EnumStoreType & enumStore,
+ EntryValue value) :
+ _enumStore(enumStore),
+ _value(value)
+{
+}
+
+template <>
+int
+EnumStoreComparatorT<NumericEntryType<float> >::compare(EntryValue lhs, EntryValue rhs);
+
+template <>
+int
+EnumStoreComparatorT<NumericEntryType<double> >::compare(EntryValue lhs, EntryValue rhs);
+
+template <>
+int
+EnumStoreComparatorT<StringEntryType>::compare(EntryValue lhs, EntryValue rhs);
+
+
+template <typename EntryType>
+EnumStoreFoldedComparatorT<EntryType>::
+EnumStoreFoldedComparatorT(const EnumStoreType & enumStore, bool prefix)
+ : ParentType(enumStore),
+ _prefix(prefix),
+ _prefixLen(0u)
+{
+}
+
+template <typename EntryType>
+EnumStoreFoldedComparatorT<EntryType>::
+EnumStoreFoldedComparatorT(const EnumStoreType & enumStore,
+ EntryValue value, bool prefix)
+ : ParentType(enumStore, value),
+ _prefix(prefix),
+ _prefixLen(0u)
+{
+}
+
+template <>
+EnumStoreFoldedComparatorT<StringEntryType>::
+EnumStoreFoldedComparatorT(const EnumStoreType & enumStore,
+ EntryValue value, bool prefix);
+
+template <>
+int
+EnumStoreFoldedComparatorT<StringEntryType>::compareFolded(EntryValue lhs,
+ EntryValue rhs);
+
+template <>
+int
+EnumStoreFoldedComparatorT<StringEntryType>::
+compareFoldedPrefix(EntryValue lhs, EntryValue rhs, size_t prefixLen);
+
+template <>
+inline bool
+EnumStoreFoldedComparatorT<StringEntryType>::getUsePrefix(void) const
+{
+ return _prefix;
+}
+
+
+extern template class EnumStoreComparatorT<StringEntryType>;
+extern template class EnumStoreComparatorT<NumericEntryType<int8_t> >;
+extern template class EnumStoreComparatorT<NumericEntryType<int16_t> >;
+extern template class EnumStoreComparatorT<NumericEntryType<int32_t> >;
+extern template class EnumStoreComparatorT<NumericEntryType<int64_t> >;
+extern template class EnumStoreComparatorT<NumericEntryType<float> >;
+extern template class EnumStoreComparatorT<NumericEntryType<double> >;
+extern template class EnumStoreFoldedComparatorT<StringEntryType>;
+extern template class EnumStoreFoldedComparatorT<NumericEntryType<int8_t> >;
+extern template class EnumStoreFoldedComparatorT<NumericEntryType<int16_t> >;
+extern template class EnumStoreFoldedComparatorT<NumericEntryType<int32_t> >;
+extern template class EnumStoreFoldedComparatorT<NumericEntryType<int64_t> >;
+extern template class EnumStoreFoldedComparatorT<NumericEntryType<float> >;
+extern template class EnumStoreFoldedComparatorT<NumericEntryType<double> >;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp
new file mode 100644
index 00000000000..3b01d02b166
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp
@@ -0,0 +1,79 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "enumhintsearchcontext.h"
+#include <vespa/searchlib/queryeval/emptysearch.h>
+
+namespace search
+{
+
+using queryeval::SearchIterator;
+
+namespace attribute
+{
+
+using btree::BTreeNode;
+using fef::TermFieldMatchData;
+
+EnumHintSearchContext::
+EnumHintSearchContext(const EnumStoreDictBase &dictionary,
+ uint32_t docIdLimit,
+ uint64_t numValues)
+ : _dictionary(dictionary),
+ _frozenRootRef(dictionary.getFrozenRootRef()),
+ _uniqueValues(0u),
+ _docIdLimit(docIdLimit),
+ _numValues(numValues)
+{
+}
+
+
+EnumHintSearchContext::~EnumHintSearchContext(void)
+{
+}
+
+
+void
+EnumHintSearchContext::lookupTerm(const EnumStoreComparator &comp)
+{
+ _uniqueValues = _dictionary.lookupFrozenTerm(_frozenRootRef, comp);
+}
+
+
+void
+EnumHintSearchContext::lookupRange(const EnumStoreComparator &low,
+ const EnumStoreComparator &high)
+{
+ _uniqueValues = _dictionary.lookupFrozenRange(_frozenRootRef, low, high);
+}
+
+void
+EnumHintSearchContext::fetchPostings(bool strict)
+{
+ (void) strict;
+}
+
+SearchIterator::UP
+EnumHintSearchContext::createPostingIterator(TermFieldMatchData *matchData,
+ bool strict)
+{
+ (void) matchData;
+ (void) strict;
+
+ return (_uniqueValues == 0u)
+ ? SearchIterator::UP(new queryeval::EmptySearch())
+ : SearchIterator::UP();
+}
+
+
+unsigned int
+EnumHintSearchContext::approximateHits(void) const
+{
+ return (_uniqueValues == 0u)
+ ? 0u
+ : std::max(uint64_t(_docIdLimit), _numValues);
+}
+
+} // namespace attribute
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h
new file mode 100644
index 00000000000..b77db84b520
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "enumstore.h"
+#include "postinglisttraits.h"
+#include "ipostinglistsearchcontext.h"
+#include <vespa/searchlib/queryeval/searchiterator.h>
+
+namespace search
+{
+
+namespace attribute
+{
+
+/**
+ * Search context helper for enumerated attributes, used to eliminate
+ * searches for values that are not present at all.
+ */
+
+class EnumHintSearchContext : public IPostingListSearchContext
+{
+ const EnumStoreDictBase &_dictionary;
+ const btree::BTreeNode::Ref _frozenRootRef;
+ uint32_t _uniqueValues;
+ uint32_t _docIdLimit;
+ uint64_t _numValues; // attr.getStatus().getNumValues();
+
+protected:
+ EnumHintSearchContext(const EnumStoreDictBase &dictionary,
+ uint32_t docIdLimit,
+ uint64_t numValues);
+ ~EnumHintSearchContext(void);
+
+ void lookupTerm(const EnumStoreComparator &comp);
+ void lookupRange(const EnumStoreComparator &low, const EnumStoreComparator &high);
+
+ queryeval::SearchIterator::UP
+ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) override;
+
+ void fetchPostings(bool strict) override;
+ unsigned int approximateHits(void) const override;
+};
+
+
+} // namespace attribute
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp
new file mode 100644
index 00000000000..10a8b46ce80
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp
@@ -0,0 +1,361 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "enumstore.h"
+#include "enumstore.hpp"
+#include <iomanip>
+
+namespace search {
+
+template <>
+void
+EnumStoreT<StringEntryType>::
+insertEntryValue(char * dst, Type value)
+{
+ strcpy(dst, value);
+}
+
+template <>
+void
+EnumStoreT<StringEntryType>::
+printEntry(vespalib::asciistream & os, const Entry & e) const
+{
+ os << "Entry: {";
+ os << "enum: " << e.getEnum();
+ os << ", refcount: " << e.getRefCount();
+ os << ", value: " << vespalib::string(e.getValue());
+ os << "}";
+}
+
+
+template <>
+void
+EnumStoreT<NumericEntryType<float> >::
+printEntry(vespalib::asciistream & os, const Entry & e) const
+{
+ os << "Entry: {";
+ os << "enum: " << e.getEnum();
+ os << ", refcount: " << e.getRefCount();
+ os << ", value: " << e.getValue();
+ union
+ {
+ unsigned int _asInt;
+ float _asFloat;
+ } u;
+ u._asFloat = e.getValue();
+ os << ", bvalue: 0x" << std::hex << u._asInt;
+ os << "}";
+}
+
+
+template <>
+void
+EnumStoreT<NumericEntryType<double> >::
+printEntry(vespalib::asciistream & os, const Entry & e) const
+{
+ os << "Entry: {";
+ os << "enum: " << e.getEnum();
+ os << ", refcount: " << e.getRefCount();
+ os << ", value: " << e.getValue();
+ union
+ {
+ unsigned long _asLong;
+ double _asDouble;
+ } u;
+ u._asDouble = e.getValue();
+ os << ", bvalue: 0x" << std::hex << u._asLong;
+ os << "}";
+}
+
+
+template <>
+void
+EnumStoreT<StringEntryType>::printValue(vespalib::asciistream & os, Index idx) const
+{
+ os << vespalib::string(getValue(idx));
+}
+
+template <>
+void
+EnumStoreT<StringEntryType>::printValue(vespalib::asciistream & os, Type value) const
+{
+ os << vespalib::string(value);
+}
+
+
+template <>
+void
+EnumStoreT<StringEntryType>::writeValues(BufferWriter &writer,
+ const Index *idxs,
+ size_t count) const
+{
+ for (uint32_t i = 0; i < count; ++i) {
+ Index idx = idxs[i];
+ const char *src(_store.getBufferEntry<char>(idx.bufferId(),
+ idx.offset()) +
+ EntryBase::size());
+ size_t sz = strlen(src) + 1;
+ writer.write(src, sz);
+ }
+}
+
+
+template <>
+ssize_t
+EnumStoreT<StringEntryType>::deserialize(const void *src,
+ size_t available,
+ size_t &initSpace)
+{
+ size_t slen = strlen(static_cast<const char *>(src));
+ size_t sz(StringEntryType::fixedSize() + slen);
+ if (available < sz)
+ return -1;
+ uint32_t entrySize(alignEntrySize(EntryBase::size() + sz));
+ initSpace += entrySize;
+ return sz;
+}
+
+
+template <>
+ssize_t
+EnumStoreT<StringEntryType>::deserialize(const void *src,
+ size_t available,
+ Index &idx)
+{
+ size_t slen = strlen(static_cast<const char *>(src));
+ size_t sz(StringEntryType::fixedSize() + slen);
+ if (available < sz)
+ return -1;
+ uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID);
+ btree::BufferState & buffer = _store.getBufferState(activeBufferId);
+ uint32_t entrySize(alignEntrySize(EntryBase::size() + sz));
+ if (buffer.remaining() < entrySize) {
+ fprintf(stderr, "Out of enumstore bufferspace\n");
+ abort(); // not enough space
+ }
+ uint64_t offset = buffer.size();
+ char *dst(_store.getBufferEntry<char>(activeBufferId, offset));
+ memcpy(dst, &_nextEnum, sizeof(uint32_t));
+ uint32_t pos = sizeof(uint32_t);
+ uint32_t refCount(0);
+ memcpy(dst + pos, &refCount, sizeof(uint32_t));
+ pos += sizeof(uint32_t);
+ memcpy(dst + pos, src, sz);
+ buffer.pushed_back(entrySize);
+ ++_nextEnum;
+
+ if (idx.valid()) {
+ assert(ComparatorType::compare(getValue(idx),
+ Entry(dst).getValue()) < 0);
+ }
+ idx = Index(offset, activeBufferId);
+ return sz;
+}
+
+
+template
+class btree::BTreeNodeDataWrap<btree::BTreeNoLeafData,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+template
+class btree::BTreeNodeDataWrap<btree::EntryRef,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+#if 0
+template
+class btree::BTreeKeyData<EnumStoreBase::Index,
+ btree::BTreeNoLeafData>;
+
+template
+class btree::BTreeKeyData<EnumStoreBase::Index,
+ btree::EntryRef>;
+#endif
+
+template
+class btree::BTreeNodeT<EnumStoreBase::Index,
+ EnumTreeTraits::INTERNAL_SLOTS>;
+
+#if 0
+template
+class btree::BTreeNodeT<EnumStoreBase::Index,
+ EnumTreeTraits::LEAF_SLOTS>;
+#endif
+
+template
+class btree::BTreeNodeTT<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS>;
+
+template
+class btree::BTreeNodeTT<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+#if 0
+template
+class btree::BTreeNodeTT<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::LEAF_SLOTS>;
+#endif
+
+template
+class btree::BTreeInternalNode<EnumStoreBase::Index,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS>;
+
+template
+class btree::BTreeLeafNode<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+template
+class btree::BTreeLeafNode<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+template
+class btree::BTreeLeafNodeTemp<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+template
+class btree::BTreeLeafNodeTemp<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+template
+class btree::BTreeNodeStore<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+template
+class btree::BTreeNodeStore<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+template
+class btree::BTreeIteratorBase<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS,
+ EnumTreeTraits::PATH_SIZE>;
+template
+class btree::BTreeIteratorBase<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS,
+ EnumTreeTraits::PATH_SIZE>;
+template
+class btree::BTreeIterator<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+template
+class btree::BTreeIterator<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+template
+class btree::BTree<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+template
+class btree::BTree<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+template
+class btree::BTreeRoot<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+
+template
+class btree::BTreeRoot<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+template
+class btree::BTreeRootT<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+
+template
+class btree::BTreeRootT<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+template
+class btree::BTreeRootBase<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+template
+class btree::BTreeRootBase<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+template
+class btree::BTreeNodeAllocator<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+template
+class btree::BTreeNodeAllocator<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+template
+class btree::BTreeBuilder<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+template
+class btree::BTreeBuilder<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+template class EnumStoreT< StringEntryType >;
+template class EnumStoreT<NumericEntryType<int8_t> >;
+template class EnumStoreT<NumericEntryType<int16_t> >;
+template class EnumStoreT<NumericEntryType<int32_t> >;
+template class EnumStoreT<NumericEntryType<int64_t> >;
+template class EnumStoreT<NumericEntryType<float> >;
+template class EnumStoreT<NumericEntryType<double> >;
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h
new file mode 100644
index 00000000000..0f0675248a7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h
@@ -0,0 +1,501 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include "enumstorebase.h"
+#include <vespa/searchlib/util/foldedstringcompare.h>
+#include <vespa/vespalib/util/buffer.h>
+#include <vespa/vespalib/util/array.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <cmath>
+#include <vespa/searchlib/btree/entryref.h>
+#include <vespa/searchlib/btree/btreenode.h>
+#include <vespa/searchlib/btree/btreenodeallocator.h>
+#include <vespa/searchlib/btree/btree.h>
+#include <vespa/searchlib/btree/btreebuilder.h>
+
+namespace search {
+
+template <typename> class EnumStoreComparatorT;
+template <typename> class EnumStoreFoldedComparatorT;
+
+/**
+ * Class representing a numeric entry type in a enum store.
+ * Used as template argument for EnumStoreT.
+ **/
+
+template <typename T>
+class NumericEntryType {
+public:
+ typedef T Type;
+ static uint32_t size(Type) { return fixedSize(); }
+ static uint32_t fixedSize() { return sizeof(T); }
+
+ static bool
+ hasFold(void)
+ {
+ return false;
+ }
+};
+
+/**
+ * Class representing a string entry type in a enum store.
+ * Used as template argument for EnumStoreT.
+ **/
+class StringEntryType {
+public:
+ typedef const char * Type;
+ static uint32_t size(Type value) { return strlen(value) + fixedSize(); }
+ static uint32_t fixedSize() { return 1; }
+
+ static bool
+ hasFold(void)
+ {
+ return true;
+ }
+};
+
+
+/**
+ * Used to determine the ordering between two floating point values that can be NAN.
+ **/
+struct FloatingPointCompareHelper
+{
+ template <typename T>
+ static int compare(T a, T b) {
+ if (std::isnan(a) && std::isnan(b)) {
+ return 0;
+ } else if (std::isnan(a)) {
+ return -1;
+ } else if (std::isnan(b)) {
+ return 1;
+ } else if (a < b) {
+ return -1;
+ } else if (a == b) {
+ return 0;
+ }
+ return 1;
+ }
+};
+
+
+//-----------------------------------------------------------------------------
+// EnumStoreT
+//-----------------------------------------------------------------------------
+template <class EntryType>
+class EnumStoreT : public EnumStoreBase
+{
+ friend class EnumStoreTest;
+public:
+ typedef typename EntryType::Type Type;
+ typedef EnumStoreComparatorT<EntryType> ComparatorType;
+ typedef EnumStoreFoldedComparatorT<EntryType> FoldedComparatorType;
+ using EnumStoreBase::deserialize;
+ using EnumStoreBase::fixupRefCounts;
+ using EnumStoreBase::reset;
+
+ class Entry : public EntryBase {
+ public:
+ Entry(void * data) : EntryBase(data) {}
+ Type getValue() const;
+ static uint32_t fixedSize() { return EntryBase::size() + EntryType::fixedSize(); }
+ };
+ static void insertEntry(char * dst, uint32_t enumValue, uint32_t refCount, Type value);
+
+private:
+ EnumStoreT(const EnumStoreT & rhs) = delete;
+ EnumStoreT & operator=(const EnumStoreT & rhs) = delete;
+
+ static void insertEntryValue(char * dst, Type value) {
+ memcpy(dst, &value, sizeof(Type));
+ }
+
+protected:
+ typedef EnumStoreBase::IndexSet IndexSet;
+ using EnumStoreBase::_store;
+ using EnumStoreBase::TYPE_ID;
+
+ Entry getEntry(Index idx) const {
+ return Entry(const_cast<DataStoreType &>(_store).getBufferEntry<char>(idx.bufferId(), idx.offset()));
+ }
+ void printEntry(vespalib::asciistream & os, const Entry & e) const;
+
+ virtual void
+ freeUnusedEnum(Index idx, IndexSet & unused);
+
+public:
+ EnumStoreT(uint64_t initBufferSize, bool hasPostings)
+ : EnumStoreBase(initBufferSize, hasPostings)
+ {
+ }
+
+ bool getValue(Index idx, Type & value) const;
+ Type getValue(uint32_t idx) const { return getValue(Index(btree::EntryRef(idx))); }
+ Type getValue(Index idx) const { return getEntry(idx).getValue(); }
+ virtual uint32_t getFixedSize() const { return Entry::fixedSize(); }
+
+ static uint32_t
+ getEntrySize(Type value)
+ {
+ return alignEntrySize(EntryBase::size() + EntryType::size(value));
+ }
+ void printBuffer(vespalib::asciistream & os, uint32_t bufferIdx) const;
+ void printValue(vespalib::asciistream & os, Index idx) const;
+ void printValue(vespalib::asciistream & os, Type value) const;
+
+ class Builder {
+ public:
+ struct UniqueEntry {
+ UniqueEntry(const Type & val, size_t sz, uint32_t pidx = 0) : _value(val), _sz(sz), _pidx(pidx), _refCount(1) { }
+ Type _value;
+ size_t _sz;
+ size_t _pidx;
+ uint32_t _refCount;
+ };
+
+ typedef vespalib::Array<UniqueEntry, vespalib::DefaultAlloc> Uniques;
+ private:
+ Uniques _uniques;
+ uint64_t _bufferSize;
+ public:
+ Builder() : _uniques(), _bufferSize(Index::align(1)) {}
+ Index insert(Type value, uint32_t pidx = 0) {
+ uint32_t entrySize = getEntrySize(value);
+ _uniques.push_back(UniqueEntry(value, entrySize, pidx));
+ Index index(_bufferSize, 0); // bufferId 0 should be used when resetting with a builder
+ _bufferSize += entrySize;
+ return index;
+ }
+ void updateRefCount(uint32_t refCount) { _uniques.rbegin()->_refCount = refCount; }
+ const Uniques & getUniques() const { return _uniques; }
+ uint64_t getBufferSize() const { return _bufferSize; }
+ };
+
+ virtual void
+ writeValues(BufferWriter &writer,
+ const Index *idxs, size_t count) const override;
+
+ virtual ssize_t
+ deserialize(const void *src, size_t available, size_t &initSpace);
+
+ virtual ssize_t
+ deserialize(const void *src, size_t available, Index &idx);
+
+ virtual bool
+ foldedChange(const Index &idx1, const Index &idx2);
+
+ virtual bool
+ findEnum(Type value, EnumStoreBase::EnumHandle &e) const;
+
+ void
+ addEnum(Type value, Index &newIdx);
+
+ virtual bool
+ findIndex(Type value, Index &idx) const;
+
+ virtual void
+ freeUnusedEnums(bool movePostingidx);
+
+ virtual void
+ freeUnusedEnums(const IndexVector &toRemove);
+
+ void
+ reset(Builder &builder);
+
+ virtual bool
+ performCompaction(uint64_t bytesNeeded);
+
+ void
+ printCurrentContent(vespalib::asciistream &os) const;
+
+private:
+ template <typename Dictionary>
+ void
+ reset(Builder &builder, Dictionary &dict);
+
+ template <typename Dictionary>
+ void
+ addEnum(Type value, Index &newIdx, Dictionary &dict);
+
+ template <typename Dictionary>
+ void
+ performCompaction(Dictionary &dict);
+
+ template <typename Dictionary>
+ void
+ printCurrentContent(vespalib::asciistream &os,
+ const Dictionary &dict) const;
+};
+
+template <typename EntryType>
+inline typename EntryType::Type
+EnumStoreT<EntryType>::Entry::getValue() const // implementation for numeric
+{
+ Type dst;
+ const char * src = _data + EntryBase::size();
+ memcpy(&dst, src, sizeof(Type));
+ return dst;
+}
+
+template <>
+inline StringEntryType::Type
+EnumStoreT<StringEntryType>::Entry::getValue() const
+{
+ return (_data + EntryBase::size());
+}
+
+
+template <>
+void
+EnumStoreT<StringEntryType>::writeValues(BufferWriter &writer,
+ const Index *idxs,
+ size_t count) const;
+
+template <>
+ssize_t
+EnumStoreT<StringEntryType>::deserialize(const void *src,
+ size_t available,
+ size_t &initSpace);
+
+template <>
+ssize_t
+EnumStoreT<StringEntryType>::deserialize(const void *src,
+ size_t available,
+ Index &idx);
+
+
+//-----------------------------------------------------------------------------
+// EnumStore
+//-----------------------------------------------------------------------------
+
+template <>
+void
+EnumStoreT<StringEntryType>::
+insertEntryValue(char * dst, Type value);
+
+template <>
+void
+EnumStoreT<StringEntryType>::
+printEntry(vespalib::asciistream & os, const Entry & e) const;
+
+template <>
+void
+EnumStoreT<NumericEntryType<float> >::
+printEntry(vespalib::asciistream & os, const Entry & e) const;
+
+template <>
+void
+EnumStoreT<NumericEntryType<double> >::
+printEntry(vespalib::asciistream & os, const Entry & e) const;
+
+template <>
+void
+EnumStoreT<StringEntryType>::printValue(vespalib::asciistream & os, Index idx) const;
+
+template <>
+void
+EnumStoreT<StringEntryType>::printValue(vespalib::asciistream & os, Type value) const;
+
+extern template
+class btree::BTreeNodeDataWrap<btree::BTreeNoLeafData,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+extern template
+class btree::BTreeNodeDataWrap<btree::EntryRef,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+#if 0
+extern template
+class btree::BTreeKeyData<EnumStoreBase::Index,
+ btree::BTreeNoLeafData>;
+
+extern template
+class btree::BTreeKeyData<EnumStoreBase::Index,
+ btree::EntryRef>;
+#endif
+
+extern template
+class btree::BTreeNodeT<EnumStoreBase::Index,
+ EnumTreeTraits::INTERNAL_SLOTS>;
+
+#if 0
+extern template
+class btree::BTreeNodeT<EnumStoreBase::Index,
+ EnumTreeTraits::LEAF_SLOTS>;
+#endif
+
+extern template
+class btree::BTreeNodeTT<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS>;
+
+extern template
+class btree::BTreeNodeTT<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+#if 0
+extern template
+class btree::BTreeNodeTT<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::LEAF_SLOTS>;
+#endif
+
+extern template
+class btree::BTreeInternalNode<EnumStoreBase::Index,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS>;
+
+extern template
+class btree::BTreeLeafNode<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+extern template
+class btree::BTreeLeafNode<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+extern template
+class btree::BTreeLeafNodeTemp<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+extern template
+class btree::BTreeLeafNodeTemp<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+extern template
+class btree::BTreeNodeStore<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+extern template
+class btree::BTreeNodeStore<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+extern template
+class btree::BTreeIteratorBase<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS,
+ EnumTreeTraits::PATH_SIZE>;
+extern template
+class btree::BTreeIteratorBase<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS,
+ EnumTreeTraits::PATH_SIZE>;
+extern template
+class btree::BTreeIterator<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+extern template
+class btree::BTreeIterator<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+extern template
+class btree::BTree<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+extern template
+class btree::BTree<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+extern template
+class btree::BTreeRoot<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+extern template
+class btree::BTreeRoot<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+extern template
+class btree::BTreeRootT<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+extern template
+class btree::BTreeRootT<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits>;
+extern template
+class btree::BTreeRootBase<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+extern template
+class btree::BTreeRootBase<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+extern template
+class btree::BTreeNodeAllocator<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+extern template
+class btree::BTreeNodeAllocator<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+extern template
+class btree::BTreeBuilder<EnumStoreBase::Index,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+extern template
+class btree::BTreeBuilder<EnumStoreBase::Index,
+ btree::EntryRef,
+ btree::NoAggregated,
+ EnumTreeTraits::INTERNAL_SLOTS,
+ EnumTreeTraits::LEAF_SLOTS>;
+
+extern template class EnumStoreT< StringEntryType >;
+extern template class EnumStoreT<NumericEntryType<int8_t> >;
+extern template class EnumStoreT<NumericEntryType<int16_t> >;
+extern template class EnumStoreT<NumericEntryType<int32_t> >;
+extern template class EnumStoreT<NumericEntryType<int64_t> >;
+extern template class EnumStoreT<NumericEntryType<float> >;
+extern template class EnumStoreT<NumericEntryType<double> >;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
new file mode 100644
index 00000000000..aa0de0e8845
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
@@ -0,0 +1,502 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/enumstore.h>
+#include <vespa/searchlib/attribute/enumcomparator.h>
+
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodestore.hpp>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btreebuilder.hpp>
+#include <vespa/searchlib/btree/btree.hpp>
+#include <vespa/searchlib/util/bufferwriter.h>
+
+namespace search {
+
+template <typename EntryType>
+void EnumStoreT<EntryType>::freeUnusedEnum(Index idx, IndexSet & unused)
+{
+ Entry e = getEntry(idx);
+ if (e.getRefCount() == 0) {
+ Type value = e.getValue();
+ if (unused.insert(idx).second) {
+ _store.incDead(idx.bufferId(), getEntrySize(value));
+ }
+ }
+}
+
+template <typename EntryType>
+void
+EnumStoreT<EntryType>::
+insertEntry(char * dst, uint32_t enumValue, uint32_t refCount, Type value)
+{
+ memcpy(dst, &enumValue, sizeof(uint32_t));
+ uint32_t pos = sizeof(uint32_t);
+ memcpy(dst + pos, &refCount, sizeof(uint32_t));
+ pos += sizeof(uint32_t);
+ insertEntryValue(dst + pos, value);
+}
+
+template <>
+void
+EnumStoreT<StringEntryType>::
+insertEntryValue(char * dst, Type value);
+
+template <typename EntryType>
+void
+EnumStoreT<EntryType>::printEntry(vespalib::asciistream & os, const Entry & e) const
+{
+ os << "Entry: {";
+ os << "enum: " << e.getEnum();
+ os << ", refcount: " << e.getRefCount();
+ os << ", value: " << e.getValue();
+ os << "}";
+}
+
+template <typename EntryType>
+bool
+EnumStoreT<EntryType>::getValue(Index idx, Type & value) const
+{
+ if (!validIndex(idx)) {
+ return false;
+ }
+ value = getEntry(idx).getValue();
+ return true;
+}
+
+template <typename EntryType>
+void
+EnumStoreT<EntryType>::printBuffer(vespalib::asciistream & os, uint32_t bufferIdx) const
+{
+ uint64_t i = 0;
+ while (i < _store.getBufferState(bufferIdx).size()) {
+ Index idx(i, bufferIdx);
+
+ Entry e = this->getEntry(idx);
+ this->printEntry(os, e);
+ os << ", " << idx << '\n';
+ i += this->getEntrySize(e.getValue());
+ }
+}
+
+template <typename EntryType>
+void
+EnumStoreT<EntryType>::printValue(vespalib::asciistream & os, Index idx) const
+{
+ os << getValue(idx);
+}
+
+template <typename EntryType>
+void
+EnumStoreT<EntryType>::printValue(vespalib::asciistream & os, Type value) const
+{
+ os << value;
+}
+
+
+template <class EntryType>
+void
+EnumStoreT<EntryType>::writeValues(BufferWriter &writer,
+ const Index *idxs, size_t count) const
+{
+ size_t sz(EntryType::fixedSize());
+ for (uint32_t i = 0; i < count; ++i) {
+ Index idx = idxs[i];
+ const char *src(_store.getBufferEntry<char>(idx.bufferId(),
+ idx.offset()) +
+ EntryBase::size());
+ writer.write(src, sz);
+ }
+}
+
+
+template <class EntryType>
+ssize_t
+EnumStoreT<EntryType>::deserialize(const void *src,
+ size_t available,
+ size_t &initSpace)
+{
+ (void) src;
+ size_t sz(EntryType::fixedSize());
+ if (available < sz)
+ return -1;
+ uint32_t entrySize(alignEntrySize(EntryBase::size() + sz));
+ initSpace += entrySize;
+ return sz;
+}
+
+template <class EntryType>
+ssize_t
+EnumStoreT<EntryType>::deserialize(const void *src,
+ size_t available,
+ Index &idx)
+{
+ size_t sz(EntryType::fixedSize());
+ if (available < sz)
+ return -1;
+ uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID);
+ btree::BufferState & buffer = _store.getBufferState(activeBufferId);
+ uint32_t entrySize(alignEntrySize(EntryBase::size() + sz));
+ if (buffer.remaining() < entrySize) {
+ abort(); // not enough space
+ }
+ uint64_t offset = buffer.size();
+ char *dst(_store.getBufferEntry<char>(activeBufferId, offset));
+ memcpy(dst, &_nextEnum, sizeof(uint32_t));
+ uint32_t pos = sizeof(uint32_t);
+ uint32_t refCount(0);
+ memcpy(dst + pos, &refCount, sizeof(uint32_t));
+ pos += sizeof(uint32_t);
+ memcpy(dst + pos, src, sz);
+ buffer.pushed_back(entrySize);
+ ++_nextEnum;
+
+ if (idx.valid()) {
+ assert(ComparatorType::compare(getValue(idx),
+ Entry(dst).getValue()) < 0);
+ }
+ idx = Index(offset, activeBufferId);
+ return sz;
+}
+
+
+template <class EntryType>
+bool
+EnumStoreT<EntryType>::foldedChange(const Index &idx1, const Index &idx2)
+{
+ int cmpres = FoldedComparatorType::compareFolded(getValue(idx1),
+ getValue(idx2));
+ assert(cmpres <= 0);
+ return cmpres < 0;
+}
+
+
+template <typename EntryType>
+bool
+EnumStoreT<EntryType>::findEnum(Type value,
+ EnumStoreBase::EnumHandle &e) const
+{
+ ComparatorType cmp(*this, value);
+ Index idx;
+ if (_enumDict->findFrozenIndex(cmp, idx)) {
+ e = idx.ref();
+ return true;
+ }
+ return false;
+}
+
+template <typename EntryType>
+bool
+EnumStoreT<EntryType>::findIndex(Type value, Index &idx) const
+{
+ ComparatorType cmp(*this, value);
+ return _enumDict->findIndex(cmp, idx);
+}
+
+
+template <typename EntryType>
+void
+EnumStoreT<EntryType>::freeUnusedEnums(bool movePostingIdx)
+{
+ ComparatorType cmp(*this);
+ if (EntryType::hasFold() && movePostingIdx) {
+ FoldedComparatorType fcmp(*this);
+ _enumDict->freeUnusedEnums(cmp, &fcmp);
+ } else {
+ _enumDict->freeUnusedEnums(cmp, NULL);
+ }
+}
+
+
+template <typename EntryType>
+void
+EnumStoreT<EntryType>::freeUnusedEnums(const IndexVector &toRemove)
+{
+ ComparatorType cmp(*this);
+ if (EntryType::hasFold()) {
+ FoldedComparatorType fcmp(*this);
+ _enumDict->freeUnusedEnums(toRemove, cmp, &fcmp);
+ } else {
+ _enumDict->freeUnusedEnums(toRemove, cmp, NULL);
+ }
+}
+
+
+template <typename EntryType>
+template <typename Dictionary>
+void
+EnumStoreT<EntryType>::addEnum(Type value,
+ Index &newIdx,
+ Dictionary &dict)
+{
+ typedef typename Dictionary::Iterator DictionaryIterator;
+ uint32_t entrySize = this->getEntrySize(value);
+ uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID);
+ btree::BufferState & buffer = _store.getBufferState(activeBufferId);
+#ifdef LOG_ENUM_STORE
+ LOG(info,
+ "addEnum(): buffer[%u]: capacity = %" PRIu64
+ ", size = %" PRIu64 ", remaining = %" PRIu64
+ ", dead = %" PRIu64 ", entrySize = %u",
+ activeBufferId, buffer.capacity(),
+ buffer.size(), buffer.remaining(),
+ buffer._deadElems, entrySize);
+#endif
+ if (buffer.remaining() < entrySize) {
+ abort(); // not enough space
+ }
+
+ // check if already present
+ ComparatorType cmp(*this, value);
+ DictionaryIterator it(btree::BTreeNode::Ref(), dict.getAllocator());
+ it.lower_bound(dict.getRoot(), Index(), cmp);
+ if (it.valid() && !cmp(Index(), it.getKey())) {
+ newIdx = it.getKey();
+ return;
+ }
+
+ uint64_t offset = buffer.size();
+ char * dst = _store.template getBufferEntry<char>(activeBufferId, offset);
+ this->insertEntry(dst, this->_nextEnum++, 0, value);
+ buffer.pushed_back(entrySize);
+ assert(Index::pad(offset) == 0);
+ newIdx = Index(offset, activeBufferId);
+
+ // update tree with new index
+ dict.insert(it, newIdx, typename Dictionary::DataType());
+
+ // Copy posting list idx from next entry if same
+ // folded value.
+ // Only for string posting list attributes, i.e. dictionary has
+ // data and entry type has folded compare.
+ if (DictionaryIterator::hasData() && EntryType::hasFold()) {
+ FoldedComparatorType foldCmp(*this);
+ ++it;
+ if (!it.valid() || foldCmp(newIdx, it.getKey()))
+ return; // Next entry does not use same posting list
+ --it;
+ --it;
+ if (it.valid() && !foldCmp(it.getKey(), newIdx))
+ return; // Previous entry uses same posting list
+ if (it.valid())
+ ++it;
+ else
+ it.begin();
+ assert(it.valid() && it.getKey() == newIdx);
+ ++it;
+ typename Dictionary::DataType pidx(it.getData());
+ dict.thaw(it);
+ it.writeData(typename Dictionary::DataType());
+ --it;
+ assert(it.valid() && it.getKey() == newIdx);
+ it.writeData(pidx);
+ }
+}
+
+
+template <typename EntryType>
+void
+EnumStoreT<EntryType>::addEnum(Type value, Index & newIdx)
+{
+ if (_enumDict->hasData())
+ addEnum(value, newIdx,
+ static_cast<EnumStoreDict<EnumPostingTree> *>(_enumDict)->
+ getDictionary());
+ else
+ addEnum(value, newIdx,
+ static_cast<EnumStoreDict<EnumTree> *>(_enumDict)->
+ getDictionary());
+}
+
+
+template <typename DictionaryType>
+struct TreeBuilderInserter {
+ static void insert(typename DictionaryType::Builder & builder,
+ EnumStoreBase::Index enumIdx,
+ btree::EntryRef postingIdx)
+ {
+ (void) postingIdx;
+ builder.insert(enumIdx, typename DictionaryType::DataType());
+ }
+};
+
+template <>
+struct TreeBuilderInserter<EnumPostingTree> {
+ static void insert(EnumPostingTree::Builder & builder,
+ EnumStoreBase::Index enumIdx,
+ btree::EntryRef postingIdx)
+ {
+ builder.insert(enumIdx, postingIdx);
+ }
+};
+
+
+template <typename EntryType>
+template <typename Dictionary>
+void
+EnumStoreT<EntryType>::reset(Builder &builder, Dictionary &dict)
+{
+ typedef typename Dictionary::Builder DictionaryBuilder;
+ EnumStoreBase::reset(builder.getBufferSize());
+
+ DictionaryBuilder treeBuilder(dict.getAllocator());
+ uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID);
+ btree::BufferState & state = _store.getBufferState(activeBufferId);
+
+ // insert entries and update DictionaryBuilder
+ const typename Builder::Uniques & uniques = builder.getUniques();
+ for (typename Builder::Uniques::const_iterator iter = uniques.begin();
+ iter != uniques.end(); ++iter)
+ {
+ uint64_t offset = state.size();
+ Index idx(offset, activeBufferId);
+ char * dst = _store.template getBufferEntry<char>(activeBufferId, offset);
+ this->insertEntry(dst, this->_nextEnum++, iter->_refCount, iter->_value);
+ state.pushed_back(iter->_sz);
+
+ // update DictionaryBuilder with enum index and posting index
+ TreeBuilderInserter<Dictionary>::insert(treeBuilder, idx, btree::EntryRef(iter->_pidx));
+ }
+
+ // reset Dictionary
+ dict.assign(treeBuilder); // destructive copy of treeBuilder
+}
+
+
+template <typename EntryType>
+void
+EnumStoreT<EntryType>::reset(Builder &builder)
+{
+ if (_enumDict->hasData())
+ reset(builder,
+ static_cast<EnumStoreDict<EnumPostingTree> *>(_enumDict)->
+ getDictionary());
+ else
+ reset(builder,
+ static_cast<EnumStoreDict<EnumTree> *>(_enumDict)->
+ getDictionary());
+}
+
+
+template <typename EntryType>
+template <typename Dictionary>
+void
+EnumStoreT<EntryType>::performCompaction(Dictionary &dict)
+{
+ typedef typename Dictionary::Iterator DictionaryIterator;
+ uint32_t freeBufferIdx = _store.getActiveBufferId(TYPE_ID);
+ btree::BufferState & freeBuf = _store.getBufferState(freeBufferIdx);
+ bool disabledReEnumerate = _disabledReEnumerate;
+
+ uint32_t newEnum = 0;
+ // copy entries from active buffer to free buffer
+ for (DictionaryIterator iter = dict.begin(); iter.valid(); ++iter) {
+ Index activeIdx = iter.getKey();
+
+ Entry e = this->getEntry(activeIdx);
+
+ // At this point the tree shal never reference any empy stuff.
+ assert(e.getRefCount() > 0);
+#ifdef LOG_ENUM_STORE
+ LOG(info, "performCompaction(): copy entry: enum = %u, refCount = %u, value = %s",
+ e.getEnum(), e.getRefCount(), e.getValue());
+#endif
+ Type value = e.getValue();
+ uint32_t refCount = e.getRefCount();
+ uint32_t oldEnum = e.getEnum();
+ uint32_t entrySize = this->getEntrySize(value);
+ if (disabledReEnumerate) {
+ newEnum = oldEnum; // use old enum value
+ }
+
+ uint64_t offset = freeBuf.size();
+ char * dst = _store.template getBufferEntry<char>(freeBufferIdx, offset);
+ // insert entry into free buffer
+ this->insertEntry(dst, newEnum, refCount, value);
+#ifdef LOG_ENUM_STORE
+ LOG(info, "performCompaction(): new entry: enum = %u, refCount = %u, value = %s", newEnum, 0, value);
+#endif
+ if (!disabledReEnumerate) {
+ ++newEnum;
+ }
+ freeBuf.pushed_back(entrySize);
+ assert(Index::pad(offset) == 0);
+ Index newIdx = Index(offset, freeBufferIdx);
+#ifdef LOG_ENUM_STORE
+ LOG(info,
+ "performCompaction(): new index: offset = %" PRIu64
+ ", bufferIdx = %u",
+ offset, freeBufferIdx);
+#endif
+
+ // update tree with new index
+ std::atomic_thread_fence(std::memory_order_release);
+ iter.writeKey(newIdx);
+
+ // update index map with new index
+ this->_indexMap[oldEnum] = newIdx;
+ }
+ if (disabledReEnumerate) {
+ newEnum = this->_nextEnum; // use old range of enum values
+ }
+ this->postCompact(newEnum);
+}
+
+
+template <typename EntryType>
+bool
+EnumStoreT<EntryType>::performCompaction(uint64_t bytesNeeded)
+{
+ if ( ! this->preCompact(bytesNeeded) ) {
+ return false;
+ }
+ if (_enumDict->hasData())
+ performCompaction(static_cast<EnumStoreDict<EnumPostingTree> *>
+ (_enumDict)->getDictionary());
+ else
+ performCompaction(static_cast<EnumStoreDict<EnumTree> *>
+ (_enumDict)->getDictionary());
+ return true;
+}
+
+
+template <typename EntryType>
+template <typename Dictionary>
+void
+EnumStoreT<EntryType>::printCurrentContent(vespalib::asciistream &os,
+ const Dictionary &dict) const
+{
+ typedef typename Dictionary::ConstIterator DictionaryConstIterator;
+
+ for (DictionaryConstIterator iter = dict.begin(); iter.valid(); ++iter) {
+ Index idx = iter.getKey();
+ if (!this->validIndex(idx)) {
+ os << "Bad entry: " << idx << '\n';
+ } else {
+ Entry e = this->getEntry(idx);
+ this->printEntry(os, e);
+ os << ", " << idx << '\n';
+ }
+ }
+}
+
+
+template <typename EntryType>
+void
+EnumStoreT<EntryType>::printCurrentContent(vespalib::asciistream &os) const
+{
+ if (_enumDict->hasData())
+ printCurrentContent(os,
+ static_cast<EnumStoreDict<EnumPostingTree> *>
+ (_enumDict)->getDictionary());
+ else
+ printCurrentContent(os,
+ static_cast<EnumStoreDict<EnumTree> *>
+ (_enumDict)->getDictionary());
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp
new file mode 100644
index 00000000000..3d5744b115b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp
@@ -0,0 +1,657 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "enumstorebase.h"
+#include <vespa/searchlib/btree/datastore.hpp>
+#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <stdexcept>
+#include "enumstore.h"
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/util/bufferwriter.h>
+
+namespace search
+{
+
+using btree::BTreeNode;
+
+void
+EnumStoreBase::verifyBufferSize(uint64_t initBufferSize)
+{
+ uint64_t alignedInitBufferSize = alignBufferSize(initBufferSize);
+ if (alignedInitBufferSize > Index::offsetSize()) {
+ failNewSize(alignedInitBufferSize, Index::offsetSize());
+ }
+}
+
+EnumStoreBase::EnumStoreBase(uint64_t initBufferSize,
+ bool hasPostings)
+ : _enumDict(NULL),
+ _store(),
+ _type(alignBufferSize(initBufferSize)),
+ _nextEnum(0),
+ _indexMap(),
+ _toHoldBuffers(),
+ _disabledReEnumerate(false)
+{
+ if (hasPostings)
+ _enumDict = new EnumStoreDict<EnumPostingTree>(*this);
+ else
+ _enumDict = new EnumStoreDict<EnumTree>(*this);
+ verifyBufferSize(initBufferSize);
+ _store.addType(&_type);
+ _store.initActiveBuffers();
+}
+
+EnumStoreBase::~EnumStoreBase()
+{
+ _store.clearHoldLists();
+ _store.dropBuffers();
+ delete _enumDict;
+}
+
+void
+EnumStoreBase::reset(uint64_t initBufferSize)
+{
+ verifyBufferSize(initBufferSize);
+ _store.clearHoldLists();
+ _store.dropBuffers();
+ _type.setInitBufferSize(alignBufferSize(initBufferSize));
+ _store.initActiveBuffers();
+ clearIndexMap();
+ _enumDict->onReset();
+ _nextEnum = 0;
+}
+
+uint32_t
+EnumStoreBase::getBufferIndex(btree::BufferState::State status)
+{
+ for (uint32_t i = 0; i < _store.getNumBuffers(); ++i) {
+ if (_store.getBufferState(i)._state == status) {
+ return i;
+ }
+ }
+ return Index::numBuffers();
+}
+
+bool
+EnumStoreBase::getCurrentIndex(Index oldIdx, Index & newIdx) const
+{
+ uint32_t oldEnum = getEnum(oldIdx);
+ if (oldEnum >= _indexMap.size()) {
+ return false;
+ }
+ newIdx = _indexMap[oldEnum];
+ return true;
+}
+
+MemoryUsage
+EnumStoreBase::getMemoryUsage() const
+{
+ return _store.getMemoryUsage();
+}
+
+AddressSpace
+EnumStoreBase::getAddressSpaceUsage() const
+{
+ const btree::BufferState &activeState =
+ _store.getBufferState(_store.getActiveBufferId(TYPE_ID));
+ return AddressSpace(activeState.size() - activeState.getDeadElems(),
+ DataStoreType::RefType::offsetSize());
+}
+
+void
+EnumStoreBase::getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const
+{
+ for(size_t i(0); i < sz; i++) {
+ e[i] = getEnum(Index(v[i]));
+ }
+}
+
+void
+EnumStoreBase::transferHoldLists(generation_t generation)
+{
+ _enumDict->onTransferHoldLists(generation);
+ _store.transferHoldLists(generation);
+}
+
+void
+EnumStoreBase::trimHoldLists(generation_t firstUsed)
+{
+ // remove generations in the range [0, firstUsed>
+ _enumDict->onTrimHoldLists(firstUsed);
+ _store.trimHoldLists(firstUsed);
+}
+
+bool
+EnumStoreBase::preCompact(uint64_t bytesNeeded)
+{
+ if (getBufferIndex(btree::BufferState::FREE) == Index::numBuffers()) {
+ return false;
+ }
+ btree::BufferState & activeBuf = _store.getBufferState(_store.getActiveBufferId(TYPE_ID));
+
+ // allocate enough space in free buffer
+ uint64_t newSize = computeNewSize(activeBuf.size(), activeBuf._deadElems, bytesNeeded);
+ _type.setInitBufferSize(newSize);
+ _toHoldBuffers = _store.startCompact(TYPE_ID);
+
+ _indexMap.resize(_nextEnum);
+ return true;
+}
+
+
+void
+EnumStoreBase::fallbackResize(uint64_t bytesNeeded)
+{
+ uint32_t activeBufId = _store.getActiveBufferId(TYPE_ID);
+ btree::BufferState &activeBuf = _store.getBufferState(activeBufId);
+
+ // allocate enough space in free buffer
+ uint64_t newSize = computeNewSize(activeBuf.size(),
+ activeBuf._deadElems,
+ bytesNeeded);
+
+ uint64_t maxSize = Index::offsetSize();
+
+ uint64_t fallbackNewSize = newSize + activeBuf._deadElems + 16384;
+ fallbackNewSize = alignBufferSize(fallbackNewSize);
+ if (fallbackNewSize > maxSize)
+ fallbackNewSize = maxSize;
+ if (fallbackNewSize <= activeBuf._allocElems ||
+ fallbackNewSize < activeBuf._usedElems + bytesNeeded)
+ failNewSize(activeBuf._usedElems + bytesNeeded, maxSize);
+
+ _type.setInitBufferSize(alignBufferSize(fallbackNewSize));
+ _type.setWantCompact();
+ _store.fallbackResize(activeBufId, fallbackNewSize);
+}
+
+
+void
+EnumStoreBase::disableReEnumerate() const
+{
+ assert(!_disabledReEnumerate);
+ _disabledReEnumerate = true;
+}
+
+
+void
+EnumStoreBase::enableReEnumerate() const
+{
+ assert(_disabledReEnumerate);
+ _disabledReEnumerate = false;
+}
+
+
+void
+EnumStoreBase::postCompact(uint32_t newEnum)
+{
+ _store.finishCompact(_toHoldBuffers);
+ _nextEnum = newEnum;
+}
+
+void
+EnumStoreBase::failNewSize(uint64_t minNewSize, uint64_t maxSize)
+{
+ throw vespalib::IllegalStateException(vespalib::make_string("EnumStoreBase::failNewSize: Minimum new size (%" PRIu64 ") exceeds max size (%" PRIu64 ")", minNewSize, maxSize));
+}
+
+uint64_t
+EnumStoreBase::computeNewSize(uint64_t used, uint64_t dead, uint64_t needed)
+{
+ double growRatio = 1.5f;
+ uint64_t maxSize = Index::offsetSize();
+ uint64_t newSize = static_cast<uint64_t>
+ ((used - dead + needed) * growRatio);
+ newSize = alignBufferSize(newSize);
+ if (newSize <= maxSize)
+ return newSize;
+ newSize = used - dead + needed + 1000000;
+ newSize = alignBufferSize(newSize);
+ if (newSize <= maxSize)
+ return maxSize;
+ failNewSize(newSize, maxSize);
+ return 0;
+}
+
+
+template <class Tree>
+void
+EnumStoreBase::reEnumerate(const Tree &tree)
+{
+ typedef typename Tree::Iterator Iterator;
+ Iterator it(tree.begin());
+ uint32_t enumValue = 0;
+ while (it.valid()) {
+ EntryBase eb(getEntryBase(it.getKey()));
+ eb.setEnum(enumValue);
+ ++enumValue;
+ ++it;
+ }
+ _nextEnum = enumValue;
+ std::atomic_thread_fence(std::memory_order_release);
+}
+
+
+ssize_t
+EnumStoreBase::deserialize0(const void *src,
+ size_t available,
+ IndexVector &idx)
+{
+ size_t left = available;
+ size_t initSpace = Index::align(1);
+ const char * p = static_cast<const char *>(src);
+ while (left > 0) {
+ ssize_t sz = deserialize(p, left, initSpace);
+ if (sz < 0)
+ return sz;
+ p += sz;
+ left -= sz;
+ }
+ reset(initSpace);
+ left = available;
+ p = static_cast<const char *>(src);
+ Index idx1;
+ while (left > 0) {
+ ssize_t sz = deserialize(p, left, idx1);
+ if (sz < 0)
+ return sz;
+ p += sz;
+ left -= sz;
+ idx.push_back(idx1);
+ }
+ return available - left;
+}
+
+
+template <typename Tree>
+ssize_t
+EnumStoreBase::deserialize(const void *src,
+ size_t available,
+ IndexVector &idx,
+ Tree &tree)
+{
+ ssize_t sz(deserialize0(src, available, idx));
+ if (sz >= 0) {
+ typename Tree::Builder builder(tree.getAllocator());
+ typedef IndexVector::const_iterator IT;
+ for (IT i(idx.begin()), ie(idx.end()); i != ie; ++i) {
+ builder.insert(*i, typename Tree::DataType());
+ }
+ tree.assign(builder);
+ }
+ return sz;
+}
+
+
+template <typename Tree>
+void
+EnumStoreBase::fixupRefCounts(const EnumVector &hist, Tree &tree)
+{
+ if ( hist.empty() )
+ return;
+ typename Tree::Iterator ti(tree.begin());
+ typedef EnumVector::const_iterator HistIT;
+
+ for (HistIT hi(hist.begin()), hie(hist.end()); hi != hie; ++hi, ++ti) {
+ assert(ti.valid());
+ fixupRefCount(ti.getKey(), *hi);
+ }
+ assert(!ti.valid());
+ freeUnusedEnums(false);
+}
+
+
+void
+EnumStoreBase::writeEnumValues(BufferWriter &writer,
+ const Index *idxs, size_t count) const
+{
+ for (uint32_t i = 0; i < count; ++i) {
+ uint32_t enumValue = getEnum(idxs[i]);
+ writer.write(&enumValue, sizeof(uint32_t));
+ }
+}
+
+
+vespalib::asciistream & operator << (vespalib::asciistream & os, const EnumStoreBase::Index & idx) {
+ return os << "offset(" << idx.offset() << "), bufferId(" << idx.bufferId() << "), idx(" << idx.ref() << ")";
+}
+
+
+EnumStoreDictBase::EnumStoreDictBase(EnumStoreBase &enumStore)
+ : _enumStore(enumStore)
+{
+}
+
+
+EnumStoreDictBase::~EnumStoreDictBase(void)
+{
+}
+
+
+template <typename Dictionary>
+EnumStoreDict<Dictionary>::EnumStoreDict(EnumStoreBase &enumStore)
+ : EnumStoreDictBase(enumStore),
+ _dict()
+{
+}
+
+template <typename Dictionary>
+EnumStoreDict<Dictionary>::~EnumStoreDict(void)
+{
+}
+
+
+template <typename Dictionary>
+void
+EnumStoreDict<Dictionary>::freezeTree(void)
+{
+ _dict.getAllocator().freeze();
+}
+
+template <typename Dictionary>
+uint32_t
+EnumStoreDict<Dictionary>::getNumUniques() const
+{
+ return _dict.size();
+}
+
+
+template <typename Dictionary>
+MemoryUsage
+EnumStoreDict<Dictionary>::getTreeMemoryUsage() const
+{
+ return _dict.getMemoryUsage();
+}
+
+template <typename Dictionary>
+void
+EnumStoreDict<Dictionary>::reEnumerate(void)
+{
+ _enumStore.reEnumerate(_dict);
+}
+
+
+template <typename Dictionary>
+void
+EnumStoreDict<Dictionary>::
+writeAllValues(BufferWriter &writer,
+ btree::BTreeNode::Ref rootRef) const
+{
+ constexpr size_t BATCHSIZE = 1000;
+ std::vector<Index> idxs;
+ idxs.reserve(BATCHSIZE);
+ typename Dictionary::Iterator it(rootRef, _dict.getAllocator());
+ while (it.valid()) {
+ if (idxs.size() >= idxs.capacity()) {
+ _enumStore.writeValues(writer, &idxs[0], idxs.size());
+ idxs.clear();
+ }
+ idxs.push_back(it.getKey());
+ ++it;
+ }
+ if (!idxs.empty()) {
+ _enumStore.writeValues(writer, &idxs[0], idxs.size());
+ }
+}
+
+
+template <typename Dictionary>
+ssize_t
+EnumStoreDict<Dictionary>::deserialize(const void *src,
+ size_t available,
+ IndexVector &idx)
+{
+ return _enumStore.deserialize(src, available, idx, _dict);
+}
+
+
+template <typename Dictionary>
+void
+EnumStoreDict<Dictionary>::fixupRefCounts(const EnumVector & hist)
+{
+ _enumStore.fixupRefCounts(hist, _dict);
+}
+
+
+template <typename Dictionary>
+void
+EnumStoreDict<Dictionary>::removeUnusedEnums(const IndexSet &unused,
+ const EnumStoreComparator &cmp,
+ const EnumStoreComparator *fcmp)
+{
+ typedef typename Dictionary::Iterator Iterator;
+ if (unused.empty())
+ return;
+ Iterator it(BTreeNode::Ref(), _dict.getAllocator());
+ for (IndexSet::const_iterator iter(unused.begin()), mt(unused.end());
+ iter != mt; ++iter) {
+ it.lower_bound(_dict.getRoot(), *iter, cmp);
+ assert(it.valid() && !cmp(*iter, it.getKey()));
+ if (Iterator::hasData() && fcmp != NULL) {
+ typename Dictionary::DataType pidx(it.getData());
+ _dict.remove(it);
+ if (!it.valid() || (*fcmp)(*iter, it.getKey()))
+ continue; // Next entry does not use same posting list
+ --it;
+ if (it.valid() && !(*fcmp)(it.getKey(), *iter))
+ continue; // Previous entry uses same posting list
+ if (it.valid())
+ ++it;
+ else
+ it.begin();
+ _dict.thaw(it);
+ it.writeData(pidx);
+ } else {
+ _dict.remove(it);
+ }
+ }
+}
+
+template <typename Dictionary>
+void
+EnumStoreDict<Dictionary>::freeUnusedEnums(const EnumStoreComparator &cmp,
+ const EnumStoreComparator *fcmp)
+{
+ IndexSet unused;
+
+ // find unused enums
+ for (typename Dictionary::Iterator iter(_dict.begin()); iter.valid();
+ ++iter) {
+ _enumStore.freeUnusedEnum(iter.getKey(), unused);
+ }
+ removeUnusedEnums(unused, cmp, fcmp);
+}
+
+template <typename Dictionary>
+void
+EnumStoreDict<Dictionary>::freeUnusedEnums(const IndexVector &toRemove,
+ const EnumStoreComparator &cmp,
+ const EnumStoreComparator *fcmp)
+{
+ IndexSet unused;
+ for(IndexVector::const_iterator it(toRemove.begin()), mt(toRemove.end());
+ it != mt; it++) {
+ _enumStore.freeUnusedEnum(*it, unused);
+ }
+
+ removeUnusedEnums(unused, cmp, fcmp);
+}
+
+
+template <typename Dictionary>
+bool
+EnumStoreDict<Dictionary>::findIndex(const EnumStoreComparator &cmp,
+ Index &idx) const
+{
+ typename Dictionary::Iterator itr = _dict.find(Index(), cmp);
+ if (!itr.valid()) {
+ return false;
+ }
+ idx = itr.getKey();
+ return true;
+}
+
+
+template <typename Dictionary>
+bool
+EnumStoreDict<Dictionary>::findFrozenIndex(const EnumStoreComparator &cmp,
+ Index &idx) const
+{
+ typename Dictionary::ConstIterator itr =
+ _dict.getFrozenView().find(Index(), cmp);
+ if (!itr.valid()) {
+ return false;
+ }
+ idx = itr.getKey();
+ return true;
+}
+
+
+template <typename Dictionary>
+void
+EnumStoreDict<Dictionary>::onReset(void)
+{
+ _dict.clear();
+}
+
+
+template <typename Dictionary>
+void
+EnumStoreDict<Dictionary>::onTransferHoldLists(generation_t generation)
+{
+ _dict.getAllocator().transferHoldLists(generation);
+}
+
+
+template <typename Dictionary>
+void
+EnumStoreDict<Dictionary>::onTrimHoldLists(generation_t firstUsed)
+{
+ _dict.getAllocator().trimHoldLists(firstUsed);
+}
+
+
+template <typename Dictionary>
+BTreeNode::Ref
+EnumStoreDict<Dictionary>::getFrozenRootRef(void) const
+{
+ return _dict.getFrozenView().getRoot();
+}
+
+
+template <typename Dictionary>
+uint32_t
+EnumStoreDict<Dictionary>::
+lookupFrozenTerm(BTreeNode::Ref frozenRootRef,
+ const EnumStoreComparator &comp) const
+{
+ typename Dictionary::ConstIterator itr(BTreeNode::Ref(),
+ _dict.getAllocator());
+ itr.lower_bound(frozenRootRef, Index(), comp);
+ if (itr.valid() && !comp(Index(), itr.getKey())) {
+ return 1u;
+ }
+ return 0u;
+}
+
+
+template <typename Dictionary>
+uint32_t
+EnumStoreDict<Dictionary>::
+lookupFrozenRange(BTreeNode::Ref frozenRootRef,
+ const EnumStoreComparator &low,
+ const EnumStoreComparator &high) const
+{
+ typename Dictionary::ConstIterator lowerDictItr(BTreeNode::Ref(),
+ _dict.getAllocator());
+ lowerDictItr.lower_bound(frozenRootRef, Index(), low);
+ typename Dictionary::ConstIterator upperDictItr = lowerDictItr;
+ if (upperDictItr.valid() && !high(Index(), upperDictItr.getKey()))
+ upperDictItr.seekPast(Index(), high);
+ return upperDictItr - lowerDictItr;
+}
+
+
+template <>
+EnumPostingTree &
+EnumStoreDict<EnumTree>::getPostingDictionary(void)
+{
+ abort();
+}
+
+
+template <>
+EnumPostingTree &
+EnumStoreDict<EnumPostingTree>::getPostingDictionary(void)
+{
+ return _dict;
+}
+
+
+template <>
+const EnumPostingTree &
+EnumStoreDict<EnumTree>::getPostingDictionary(void) const
+{
+ abort();
+}
+
+
+template <>
+const EnumPostingTree &
+EnumStoreDict<EnumPostingTree>::getPostingDictionary(void) const
+{
+ return _dict;
+}
+
+
+template <typename Dictionary>
+bool
+EnumStoreDict<Dictionary>::hasData(void) const
+{
+ return Dictionary::LeafNodeType::hasData();
+}
+
+
+template class btree::DataStoreT<btree::AlignedEntryRefT<31, 4> >;
+
+template
+void
+EnumStoreBase::reEnumerate<EnumTree>(const EnumTree &tree);
+
+template
+void
+EnumStoreBase::reEnumerate<EnumPostingTree>(const EnumPostingTree &tree);
+
+template
+ssize_t
+EnumStoreBase::deserialize<EnumTree>(const void *src,
+ size_t available,
+ IndexVector &idx,
+ EnumTree &tree);
+
+template
+ssize_t
+EnumStoreBase::deserialize<EnumPostingTree>(const void *src,
+ size_t available,
+ IndexVector &idx,
+ EnumPostingTree &tree);
+
+template
+void
+EnumStoreBase::fixupRefCounts<EnumTree>(const EnumVector &hist,
+ EnumTree &tree);
+
+template
+void
+EnumStoreBase::fixupRefCounts<EnumPostingTree>(
+ const EnumVector &hist,
+ EnumPostingTree &tree);
+
+template class EnumStoreDict<EnumTree>;
+
+template class EnumStoreDict<EnumPostingTree>;
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.h b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h
new file mode 100644
index 00000000000..3b8b9823d87
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h
@@ -0,0 +1,622 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "address_space.h"
+#include <vespa/searchcommon/attribute/iattributevector.h>
+#include <vespa/searchlib/btree/datastore.h>
+#include <vespa/searchlib/util/memoryusage.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/util/array.h>
+#include <list>
+#include <set>
+#include <vespa/searchlib/btree/btree.h>
+#include <vespa/searchlib/common/bitvector.h>
+#include <atomic>
+
+namespace search
+{
+
+class BufferWriter;
+
+namespace attribute
+{
+
+class Status;
+
+}
+
+class EnumStoreBase;
+
+class EnumStoreComparator;
+class EnumStoreComparatorWrapper;
+
+typedef btree::DataStoreT<btree::AlignedEntryRefT<31, 4> >
+EnumStoreDataStoreType;
+typedef EnumStoreDataStoreType::RefType EnumStoreIndex;
+typedef vespalib::Array<EnumStoreIndex, vespalib::DefaultAlloc> EnumStoreIndexVector;
+typedef vespalib::Array<uint32_t, vespalib::DefaultAlloc> EnumStoreEnumVector;
+
+typedef btree::BTreeTraits<32, 32, 7, true> EnumTreeTraits;
+
+typedef btree::BTree<EnumStoreIndex, btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits> EnumTree;
+typedef btree::BTree<EnumStoreIndex, btree::EntryRef,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper,
+ EnumTreeTraits> EnumPostingTree;
+
+struct CompareEnumIndex
+{
+ typedef EnumStoreIndex Index;
+
+ inline bool
+ operator()(const Index &lhs, const Index &rhs) const
+ {
+ return lhs.ref() < rhs.ref();
+ }
+};
+
+class EnumStoreDictBase
+{
+public:
+ typedef EnumStoreIndex Index;
+ typedef EnumStoreIndexVector IndexVector;
+ typedef EnumStoreEnumVector EnumVector;
+ typedef std::set<Index, CompareEnumIndex> IndexSet;
+ typedef vespalib::GenerationHandler::generation_t generation_t;
+
+protected:
+ EnumStoreBase &_enumStore;
+
+public:
+ EnumStoreDictBase(EnumStoreBase &enumStore);
+
+ virtual
+ ~EnumStoreDictBase(void);
+
+ virtual void
+ freezeTree(void) = 0;
+
+ virtual uint32_t
+ getNumUniques(void) const = 0;
+
+ virtual MemoryUsage
+ getTreeMemoryUsage(void) const = 0;
+
+ virtual void
+ reEnumerate(void) = 0;
+
+ virtual void
+ writeAllValues(BufferWriter &writer,
+ btree::BTreeNode::Ref rootRef) const = 0;
+
+ virtual ssize_t
+ deserialize(const void *src, size_t available,
+ IndexVector &idx) = 0;
+
+ virtual void
+ fixupRefCounts(const EnumVector &hist) = 0;
+
+ virtual void
+ freeUnusedEnums(const EnumStoreComparator &cmp,
+ const EnumStoreComparator *fcmp) = 0;
+
+ virtual void
+ freeUnusedEnums(const IndexVector &toRemove,
+ const EnumStoreComparator &cmp,
+ const EnumStoreComparator *fcmp) = 0;
+
+ virtual bool
+ findIndex(const EnumStoreComparator &cmp, Index &idx) const = 0;
+
+ virtual bool
+ findFrozenIndex(const EnumStoreComparator &cmp, Index &idx) const = 0;
+
+ virtual void
+ onReset(void) = 0;
+
+ virtual void
+ onTransferHoldLists(generation_t generation) = 0;
+
+ virtual void
+ onTrimHoldLists(generation_t firstUsed) = 0;
+
+ virtual btree::BTreeNode::Ref
+ getFrozenRootRef(void) const = 0;
+
+ virtual uint32_t
+ lookupFrozenTerm(btree::BTreeNode::Ref frozenRootRef,
+ const EnumStoreComparator &comp) const = 0;
+
+ virtual uint32_t
+ lookupFrozenRange(btree::BTreeNode::Ref frozenRootRef,
+ const EnumStoreComparator &low,
+ const EnumStoreComparator &high) const = 0;
+
+ virtual EnumPostingTree &
+ getPostingDictionary(void) = 0;
+
+ virtual const EnumPostingTree &
+ getPostingDictionary(void) const = 0;
+
+ virtual bool
+ hasData(void) const = 0;
+};
+
+
+template <typename Dictionary>
+class EnumStoreDict : public EnumStoreDictBase
+{
+protected:
+ Dictionary _dict;
+
+public:
+ EnumStoreDict(EnumStoreBase &enumStore);
+
+ virtual
+ ~EnumStoreDict(void);
+
+ const Dictionary &
+ getDictionary() const
+ {
+ return _dict;
+ }
+
+ Dictionary &
+ getDictionary()
+ {
+ return _dict;
+ }
+
+ virtual void
+ freezeTree(void);
+
+ virtual uint32_t
+ getNumUniques(void) const;
+
+ virtual MemoryUsage
+ getTreeMemoryUsage(void) const;
+
+ virtual void
+ reEnumerate(void);
+
+ virtual void
+ writeAllValues(BufferWriter &writer,
+ btree::BTreeNode::Ref rootRef) const override;
+
+ virtual ssize_t
+ deserialize(const void *src, size_t available,
+ IndexVector &idx);
+
+ virtual void
+ fixupRefCounts(const EnumVector &hist);
+
+ void
+ removeUnusedEnums(const IndexSet &unused,
+ const EnumStoreComparator &cmp,
+ const EnumStoreComparator *fcmp);
+
+ virtual void
+ freeUnusedEnums(const EnumStoreComparator &cmp,
+ const EnumStoreComparator *fcmp);
+
+ virtual void
+ freeUnusedEnums(const IndexVector &toRemove,
+ const EnumStoreComparator &cmp,
+ const EnumStoreComparator *fcmp);
+
+ virtual bool
+ findIndex(const EnumStoreComparator &cmp, Index &idx) const;
+
+ virtual bool
+ findFrozenIndex(const EnumStoreComparator &cmp, Index &idx) const;
+
+ virtual void
+ onReset(void);
+
+ virtual void
+ onTransferHoldLists(generation_t generation);
+
+ virtual void
+ onTrimHoldLists(generation_t firstUsed);
+
+ virtual btree::BTreeNode::Ref
+ getFrozenRootRef(void) const;
+
+ virtual uint32_t
+ lookupFrozenTerm(btree::BTreeNode::Ref frozenRootRef,
+ const EnumStoreComparator &comp) const;
+
+ virtual uint32_t
+ lookupFrozenRange(btree::BTreeNode::Ref frozenRootRef,
+ const EnumStoreComparator &low,
+ const EnumStoreComparator &high) const;
+
+ virtual EnumPostingTree &
+ getPostingDictionary(void);
+
+ virtual const EnumPostingTree &
+ getPostingDictionary(void) const;
+
+ virtual bool
+ hasData(void) const;
+};
+
+
+class EnumStoreBase
+{
+public:
+ typedef vespalib::GenerationHandler::generation_t generation_t;
+ typedef attribute::IAttributeVector::EnumHandle EnumHandle;
+ typedef EnumStoreDataStoreType DataStoreType;
+ typedef EnumStoreIndex Index;
+ typedef EnumStoreIndexVector IndexVector;
+ typedef EnumStoreEnumVector EnumVector;
+
+ class EntryBase {
+ protected:
+ char * _data;
+ public:
+ EntryBase(void * data)
+ :
+ _data(static_cast<char *>(data))
+ {
+ }
+
+ uint32_t
+ getEnum() const
+ {
+ uint32_t *src = reinterpret_cast<uint32_t *>(_data);
+ return *src;
+ }
+
+ uint32_t
+ getRefCount(void) const
+ {
+ uint32_t *src = reinterpret_cast<uint32_t *>(_data) + 1;
+ return *src;
+ }
+
+ void
+ incRefCount(void)
+ {
+ uint32_t *dst = reinterpret_cast<uint32_t *>(_data) + 1;
+ ++(*dst);
+ }
+
+ void
+ decRefCount(void)
+ {
+ uint32_t *dst = reinterpret_cast<uint32_t *>(_data) + 1;
+ --(*dst);
+ }
+
+ void
+ setEnum(uint32_t enumValue)
+ {
+ uint32_t *dst = reinterpret_cast<uint32_t *>(_data);
+ *dst = enumValue;
+ }
+
+ void
+ setRefCount(uint32_t refCount)
+ {
+ uint32_t *dst = reinterpret_cast<uint32_t *>(_data) + 1;
+ *dst = refCount;
+ }
+
+ static uint32_t size() { return 2*sizeof(uint32_t); }
+ };
+
+ typedef std::set<Index, CompareEnumIndex> IndexSet;
+
+private:
+ void verifyBufferSize(uint64_t initBufferSize);
+
+protected:
+
+ class EnumBufferType : public btree::BufferType<char> {
+ private:
+ uint64_t _initBufferSize; // in bytes
+ bool _pendingCompact;
+ bool _wantCompact;
+ public:
+ EnumBufferType(uint64_t initBufferSize)
+ : btree::BufferType<char>(Index::align(1),
+ Index::offsetSize() / Index::align(1),
+ Index::offsetSize() / Index::align(1)),
+ _initBufferSize(initBufferSize),
+ _pendingCompact(false),
+ _wantCompact(false)
+ {
+ }
+ virtual size_t calcClustersToAlloc(size_t sizeNeeded,
+ uint64_t clusterRefSize) const {
+ (void) sizeNeeded;
+ uint64_t clusterSize = elementSize() * getClusterSize();
+ uint64_t wantedClustersToAlloc = _initBufferSize / clusterSize;
+ assert(_initBufferSize % clusterSize == 0);
+ ++wantedClustersToAlloc; // Index(0,0) is illegal
+ return std::min(wantedClustersToAlloc, clusterRefSize);
+ }
+ void setInitBufferSize(uint64_t newSize) { _initBufferSize = newSize; }
+
+ virtual void
+ onFree(size_t usedElems)
+ {
+ btree::BufferType<char>::onFree(usedElems);
+ _pendingCompact = _wantCompact;
+ _wantCompact = false;
+ }
+
+ void
+ setWantCompact(void)
+ {
+ _wantCompact = true;
+ }
+
+ bool
+ getPendingCompact(void) const
+ {
+ return _pendingCompact;
+ }
+
+ void
+ clearPendingCompact(void)
+ {
+ _pendingCompact = false;
+ }
+ };
+
+ EnumStoreDictBase *_enumDict;
+ DataStoreType _store;
+ EnumBufferType _type;
+ uint32_t _nextEnum;
+ IndexVector _indexMap;
+ std::vector<uint32_t> _toHoldBuffers; // used during compaction
+ // set before backgound flush, cleared during background flush
+ mutable std::atomic<bool> _disabledReEnumerate;
+
+ static const uint32_t TYPE_ID = 0;
+
+ EnumStoreBase(uint64_t initBufferSize,
+ bool hasPostings);
+
+ virtual ~EnumStoreBase();
+
+ EntryBase getEntryBase(Index idx) const {
+ return EntryBase(const_cast<DataStoreType &>(_store).getBufferEntry<char>(idx.bufferId(), idx.offset()));
+ }
+ btree::BufferState & getBuffer(uint32_t bufferIdx) {
+ return _store.getBufferState(bufferIdx);
+ }
+ const btree::BufferState & getBuffer(uint32_t bufferIdx) const {
+ return _store.getBufferState(bufferIdx);
+ }
+ bool validIndex(Index idx) const {
+ return (idx.valid() && idx.offset() < _store.getBufferState(idx.bufferId()).size());
+ }
+
+ uint32_t getBufferIndex(btree::BufferState::State status);
+ void postCompact(uint32_t newEnum);
+ bool preCompact(uint64_t bytesNeeded);
+
+public:
+ void
+ reset(uint64_t initBufferSize);
+
+ virtual uint32_t getFixedSize() const = 0;
+ size_t getMaxEnumOffset() const {
+ return _store.getBufferState(_store.getActiveBufferId(TYPE_ID)).size();
+ }
+ void getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const;
+ uint32_t getRefCount(Index idx) const { return getEntryBase(idx).getRefCount(); }
+ uint32_t getEnum(Index idx) const { return getEntryBase(idx).getEnum(); }
+ void incRefCount(Index idx) { getEntryBase(idx).incRefCount(); }
+ void decRefCount(Index idx) { getEntryBase(idx).decRefCount(); }
+
+ // Only use when reading from enumerated attribute save files
+ void
+ fixupRefCount(Index idx, uint32_t refCount)
+ {
+ getEntryBase(idx).setRefCount(refCount);
+ }
+
+ template <typename Tree>
+ void
+ fixupRefCounts(const EnumVector &hist,
+ Tree &tree);
+
+ void clearIndexMap() { IndexVector().swap(_indexMap); }
+ uint32_t getLastEnum() const { return _nextEnum ? _nextEnum - 1 : _nextEnum; }
+
+ inline uint32_t
+ getNumUniques() const
+ {
+ return _enumDict->getNumUniques();
+ }
+
+ uint32_t getRemaining() const {
+ return _store.getBufferState(_store.getActiveBufferId(TYPE_ID)).remaining();
+ }
+ MemoryUsage getMemoryUsage() const;
+
+ inline MemoryUsage
+ getTreeMemoryUsage() const
+ {
+ return _enumDict->getTreeMemoryUsage();
+ }
+
+ AddressSpace getAddressSpaceUsage() const;
+
+ bool getCurrentIndex(Index oldIdx, Index & newIdx) const;
+
+ void
+ transferHoldLists(generation_t generation);
+
+ void trimHoldLists(generation_t firstUsed);
+
+ static void failNewSize(uint64_t minNewSize, uint64_t maxSize);
+ static uint64_t computeNewSize(uint64_t used, uint64_t dead, uint64_t needed);
+
+ // Align buffers and entries to 4 bytes boundary.
+ static uint64_t alignBufferSize(uint64_t val) {
+ return Index::align(val);
+ }
+ static uint32_t alignEntrySize(uint32_t val) {
+ return Index::align(val);
+ }
+
+ void
+ fallbackResize(uint64_t bytesNeeded);
+
+ bool
+ getPendingCompact(void) const
+ {
+ return _type.getPendingCompact();
+ }
+
+ void
+ clearPendingCompact(void)
+ {
+ _type.clearPendingCompact();
+ }
+
+ template <typename Tree>
+ void
+ reEnumerate(const Tree &tree);
+
+ inline void
+ reEnumerate(void)
+ {
+ _enumDict->reEnumerate();
+ }
+
+ // Disable reenumeration during compaction.
+ void disableReEnumerate() const;
+
+ // Allow reenumeration during compaction.
+ void enableReEnumerate() const;
+
+ virtual void writeValues(BufferWriter &writer,
+ const Index *idxs, size_t count) const = 0;
+
+ void writeEnumValues(BufferWriter &writer,
+ const Index *idxs, size_t count) const;
+
+ virtual ssize_t
+ deserialize(const void *src, size_t available, size_t &initSpace) = 0;
+
+ virtual ssize_t
+ deserialize(const void *src, size_t available, Index &idx) = 0;
+
+ virtual bool
+ foldedChange(const Index &idx1, const Index &idx2) = 0;
+
+ ssize_t
+ deserialize0(const void *src, size_t available, IndexVector &idx);
+
+ template <typename Tree>
+ ssize_t
+ deserialize(const void *src, size_t available, IndexVector &idx,
+ Tree &tree);
+
+ inline ssize_t
+ deserialize(const void *src, size_t available,
+ IndexVector &idx)
+ {
+ return _enumDict->deserialize(src, available, idx);
+ }
+
+ virtual void
+ freeUnusedEnum(Index idx, IndexSet &unused) = 0;
+
+ virtual void
+ freeUnusedEnums(bool movePostingIdx) = 0;
+
+ virtual void
+ freeUnusedEnums(const IndexVector &toRemove) = 0;
+
+ inline void
+ fixupRefCounts(const EnumVector &hist)
+ {
+ _enumDict->fixupRefCounts(hist);
+ }
+
+ inline void
+ freezeTree(void)
+ {
+ _enumDict->freezeTree();
+ }
+
+ virtual bool
+ performCompaction(uint64_t bytesNeeded) = 0;
+
+ EnumStoreDictBase &
+ getEnumStoreDict(void)
+ {
+ return *_enumDict;
+ }
+
+ const EnumStoreDictBase &
+ getEnumStoreDict(void) const
+ {
+ return *_enumDict;
+ }
+
+ EnumPostingTree &
+ getPostingDictionary(void)
+ {
+ return _enumDict->getPostingDictionary();
+ }
+
+ const EnumPostingTree &
+ getPostingDictionary(void) const
+ {
+ return _enumDict->getPostingDictionary();
+ }
+};
+
+
+vespalib::asciistream & operator << (vespalib::asciistream & os, const EnumStoreBase::Index & idx);
+
+
+/**
+ * Base comparator class needed by the btree.
+ **/
+class EnumStoreComparator {
+public:
+ typedef EnumStoreBase::Index EnumIndex;
+ virtual ~EnumStoreComparator() {}
+ /**
+ * Compare the values represented by the given enum indexes.
+ * Uses the enum store to map from enum index to actual value.
+ **/
+ virtual bool operator() (const EnumIndex & lhs, const EnumIndex & rhs) const = 0;
+};
+
+
+class EnumStoreComparatorWrapper
+{
+ const EnumStoreComparator &_comp;
+public:
+ typedef EnumStoreBase::Index EnumIndex;
+ EnumStoreComparatorWrapper(const EnumStoreComparator &comp)
+ : _comp(comp)
+ {
+ }
+
+ inline bool operator()(const EnumIndex &lhs, const EnumIndex &rhs) const
+ {
+ return _comp(lhs, rhs);
+ }
+};
+
+extern template class
+btree::DataStoreT<btree::AlignedEntryRefT<31, 4> >;
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/extendableattributes.cpp b/searchlib/src/vespa/searchlib/attribute/extendableattributes.cpp
new file mode 100644
index 00000000000..ae011bbcb0a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/extendableattributes.cpp
@@ -0,0 +1,162 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "extendableattributes.h"
+#include <vespa/searchlib/attribute/attrvector.hpp>
+
+namespace search {
+
+//******************** CollectionType::SINGLE ********************//
+
+
+SingleStringExtAttribute::SingleStringExtAttribute(const vespalib::string & name) :
+ StringDirectAttrVector< AttrVector::Features<false> >(name, Config(BasicType::STRING, CollectionType::SINGLE))
+{
+ setEnum(false);
+ setSortedEnum(false);
+}
+
+bool SingleStringExtAttribute::addDoc(DocId & docId)
+{
+ size_t offset(_buffer.size());
+ _buffer.push_back('\0');
+ _buffer.push_back(0);
+ docId = _offsets.size();
+ _offsets.push_back(offset);
+ incNumDocs();
+ setCommittedDocIdLimit(getNumDocs());
+ return true;
+}
+
+bool SingleStringExtAttribute::add(const char * v, int32_t)
+{
+ const size_t start(_offsets.back());
+ const size_t sz(strlen(v) + 1);
+ _buffer.resize(start+sz);
+ strcpy(&_buffer[start], v);
+ return true;
+}
+
+
+//******************** CollectionType::ARRAY ********************//
+
+
+MultiStringExtAttribute::MultiStringExtAttribute(const vespalib::string & name, const CollectionType & ctype) :
+ StringDirectAttrVector< AttrVector::Features<true> >
+ (name, Config(BasicType::STRING, ctype))
+{
+ setEnum(false);
+ setSortedEnum(false);
+}
+
+MultiStringExtAttribute::MultiStringExtAttribute(const vespalib::string & name) :
+ StringDirectAttrVector< AttrVector::Features<true> >
+ (name, Config(BasicType::STRING, CollectionType::ARRAY))
+{
+ setEnum(false);
+ setSortedEnum(false);
+}
+
+bool MultiStringExtAttribute::addDoc(DocId & docId)
+{
+ docId = _idx.size() - 1;
+ _idx.push_back(_idx.back());
+ incNumDocs();
+ setCommittedDocIdLimit(getNumDocs());
+ return true;
+}
+
+bool MultiStringExtAttribute::add(const char * v, int32_t)
+{
+ const size_t start(_buffer.size());
+ const size_t sz(strlen(v) + 1);
+ _buffer.resize(start+sz);
+ strcpy(&_buffer[start], v);
+
+ _offsets.push_back(start);
+
+ _idx.back()++;
+ checkSetMaxValueCount(_idx.back() - _idx[_idx.size() - 2]);
+ return true;
+}
+
+
+//******************** CollectionType::WSET ********************//
+
+WeightedSetIntegerExtAttribute::WeightedSetIntegerExtAttribute(const vespalib::string & name) :
+ WeightedSetExtAttributeBase<MultiIntegerExtAttribute>(name)
+{
+}
+
+bool
+WeightedSetIntegerExtAttribute::add(int64_t v, int32_t w)
+{
+ addWeight(w);
+ MultiIntegerExtAttribute::add(v);
+ return true;
+}
+
+uint32_t
+WeightedSetIntegerExtAttribute::get(DocId doc, AttributeVector::WeightedInt * v, uint32_t sz) const
+{
+ uint32_t valueCount = _idx[doc + 1] - _idx[doc];
+ uint32_t num2Read = std::min(valueCount, sz);
+ for (uint32_t i = 0; i < num2Read; ++i) {
+ v[i] = AttributeVector::WeightedInt(_data[_idx[doc] + i], getWeightHelper(doc, i));
+ }
+ return valueCount;
+}
+
+WeightedSetFloatExtAttribute::WeightedSetFloatExtAttribute(const vespalib::string & name) :
+ WeightedSetExtAttributeBase<MultiFloatExtAttribute>(name)
+{
+}
+
+bool
+WeightedSetFloatExtAttribute::add(double v, int32_t w)
+{
+ addWeight(w);
+ MultiFloatExtAttribute::add(v);
+ return true;
+}
+
+uint32_t
+WeightedSetFloatExtAttribute::get(DocId doc, AttributeVector::WeightedFloat * v, uint32_t sz) const
+{
+ uint32_t valueCount = _idx[doc + 1] - _idx[doc];
+ uint32_t num2Read = std::min(valueCount, sz);
+ for (uint32_t i = 0; i < num2Read; ++i) {
+ v[i] = AttributeVector::WeightedFloat(_data[_idx[doc] + i], getWeightHelper(doc, i));
+ }
+ return valueCount;
+}
+
+WeightedSetStringExtAttribute::WeightedSetStringExtAttribute(const vespalib::string & name) :
+ WeightedSetExtAttributeBase<MultiStringExtAttribute>(name)
+{
+ setEnum(false);
+ setSortedEnum(false);
+}
+
+bool
+WeightedSetStringExtAttribute::add(const char * v, int32_t w)
+{
+ addWeight(w);
+ MultiStringExtAttribute::add(v);
+ return true;
+}
+
+uint32_t
+WeightedSetStringExtAttribute::get(DocId doc, AttributeVector::WeightedString * v, uint32_t sz) const
+{
+ return getAllHelper(doc, v, sz);
+}
+
+uint32_t
+WeightedSetStringExtAttribute::get(DocId doc, AttributeVector::WeightedConstChar * v, uint32_t sz) const
+{
+ return getAllHelper(doc, v, sz);
+}
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/extendableattributes.h b/searchlib/src/vespa/searchlib/attribute/extendableattributes.h
new file mode 100644
index 00000000000..ba5776d6b0c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/extendableattributes.h
@@ -0,0 +1,245 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @class search::SearchVisitor
+ *
+ * @brief Visitor that applies a search query to visitor data and converts them to a SearchResultCommand
+ */
+#pragma once
+
+#include <vespa/searchlib/attribute/attrvector.h>
+#include <vespa/searchlib/attribute/attrvector.hpp>
+
+namespace search {
+
+// Translates the actual value type to the type required by IExtendAttribute.
+template <typename T> struct AddValueType {
+ typedef int64_t Type;
+};
+template <> struct AddValueType<double> {
+ typedef double Type;
+};
+
+//******************** CollectionType::SINGLE ********************//
+
+template <typename T> struct AttributeTemplate {
+ typedef search::IntegerAttributeTemplate<T> Type;
+};
+template <> struct AttributeTemplate<double> {
+ typedef search::FloatingPointAttributeTemplate<double> Type;
+};
+
+template <typename T>
+class SingleExtAttribute
+ : public NumericDirectAttrVector<AttrVector::Features<false>,
+ typename AttributeTemplate<T>::Type>,
+ public IExtendAttribute
+{
+ typedef typename SingleExtAttribute<T>::NumDirectAttrVec Super;
+ typedef typename Super::Config Config;
+ typedef typename Super::BasicType BasicType;
+
+ AttributeVector::SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override
+ {
+ (void) term;
+ (void) params;
+ return AttributeVector::SearchContext::UP();
+ }
+ IExtendAttribute * getExtendInterface() override { return this; }
+public:
+ SingleExtAttribute(const vespalib::string &name)
+ : Super(name, Config(BasicType::fromType(T()),
+ attribute::CollectionType::SINGLE)) {}
+
+ virtual bool addDoc(typename Super::DocId &docId) {
+ docId = this->_data.size();
+ this->_data.push_back(attribute::getUndefined<T>());
+ this->incNumDocs();
+ this->setCommittedDocIdLimit(this->getNumDocs());
+ return true;
+ }
+ virtual bool add(typename AddValueType<T>::Type v, int32_t = 1) {
+ this->_data.back() = v;
+ return true;
+ }
+ virtual bool onLoad() override {
+ return false; // Emulate that this attribute is never loaded
+ }
+};
+
+typedef SingleExtAttribute<int8_t> SingleInt8ExtAttribute;
+typedef SingleExtAttribute<int16_t> SingleInt16ExtAttribute;
+typedef SingleExtAttribute<int32_t> SingleInt32ExtAttribute;
+typedef SingleExtAttribute<int64_t> SingleInt64ExtAttribute;
+typedef SingleExtAttribute<double> SingleFloatExtAttribute;
+
+typedef SingleInt64ExtAttribute SingleIntegerExtAttribute;
+
+class SingleStringExtAttribute
+ : public StringDirectAttrVector< AttrVector::Features<false> >,
+ public IExtendAttribute
+{
+ IExtendAttribute * getExtendInterface() override { return this; }
+public:
+ SingleStringExtAttribute(const vespalib::string & name);
+ virtual bool addDoc(DocId & docId);
+ virtual bool add(const char * v, int32_t w = 1);
+ virtual bool onLoad() override {
+ return false; // Emulate that this attribute is never loaded
+ }
+};
+
+//******************** CollectionType::ARRAY ********************//
+
+template <typename T>
+class MultiExtAttribute
+ : public NumericDirectAttrVector<AttrVector::Features<true>,
+ typename AttributeTemplate<T>::Type>,
+ public IExtendAttribute
+{
+ typedef typename MultiExtAttribute<T>::NumDirectAttrVec Super;
+ typedef typename Super::Config Config;
+ typedef typename Super::BasicType BasicType;
+
+ AttributeVector::SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override
+ {
+ (void) term;
+ (void) params;
+ return AttributeVector::SearchContext::UP();
+ }
+ IExtendAttribute * getExtendInterface() override { return this; }
+protected:
+ MultiExtAttribute(const vespalib::string &name, const attribute::CollectionType &ctype)
+ : Super(name, Config(BasicType::fromType(T()), ctype)) {}
+public:
+ MultiExtAttribute(const vespalib::string &name)
+ : Super(name, Config(BasicType::fromType(static_cast<T>(0)),
+ attribute::CollectionType::ARRAY)) {}
+
+ virtual bool addDoc(typename Super::DocId &docId) {
+ docId = this->_idx.size() - 1;
+ this->_idx.push_back(this->_idx.back());
+ this->incNumDocs();
+ this->setCommittedDocIdLimit(this->getNumDocs());
+ return true;
+ }
+ virtual bool add(typename AddValueType<T>::Type v, int32_t = 1) {
+ this->_data.push_back(v);
+ std::vector<uint32_t> &idx = this->_idx;
+ idx.back()++;
+ this->checkSetMaxValueCount(idx.back() - idx[idx.size() - 2]);
+ return true;
+ }
+ virtual bool onLoad() override {
+ return false; // Emulate that this attribute is never loaded
+ }
+};
+
+typedef MultiExtAttribute<int8_t> MultiInt8ExtAttribute;
+typedef MultiExtAttribute<int16_t> MultiInt16ExtAttribute;
+typedef MultiExtAttribute<int32_t> MultiInt32ExtAttribute;
+typedef MultiExtAttribute<int64_t> MultiInt64ExtAttribute;
+typedef MultiExtAttribute<double> MultiFloatExtAttribute;
+
+typedef MultiInt64ExtAttribute MultiIntegerExtAttribute;
+
+class MultiStringExtAttribute :
+ public StringDirectAttrVector< AttrVector::Features<true> >,
+ public IExtendAttribute
+{
+ IExtendAttribute * getExtendInterface() override { return this; }
+protected:
+ MultiStringExtAttribute(const vespalib::string & name, const attribute::CollectionType & ctype);
+
+public:
+ MultiStringExtAttribute(const vespalib::string & name);
+ virtual bool addDoc(DocId & docId);
+ virtual bool add(const char * v, int32_t w = 1);
+ virtual bool onLoad() override {
+ return false; // Emulate that this attribute is never loaded
+ }
+};
+
+
+//******************** CollectionType::WSET ********************//
+
+template <typename B>
+class WeightedSetExtAttributeBase : public B
+{
+private:
+ std::vector<int32_t> _weights;
+
+protected:
+ void addWeight(int32_t w) {
+ _weights.push_back(w);
+ }
+ int32_t getWeightHelper(AttributeVector::DocId docId, uint32_t idx) const {
+ return _weights[this->_idx[docId] + idx];
+ }
+ WeightedSetExtAttributeBase(const vespalib::string & name) :
+ B(name, attribute::CollectionType::WSET),
+ _weights()
+ {
+ }
+};
+
+class WeightedSetIntegerExtAttribute
+ : public WeightedSetExtAttributeBase<MultiIntegerExtAttribute>
+{
+ AttributeVector::SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override
+ {
+ (void) term;
+ (void) params;
+ return AttributeVector::SearchContext::UP();
+ }
+public:
+ WeightedSetIntegerExtAttribute(const vespalib::string & name);
+ virtual bool add(int64_t v, int32_t w = 1);
+ virtual uint32_t get(DocId doc, AttributeVector::WeightedInt * v, uint32_t sz) const;
+};
+
+class WeightedSetFloatExtAttribute
+ : public WeightedSetExtAttributeBase<MultiFloatExtAttribute>
+{
+ AttributeVector::SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override
+ {
+ (void) term;
+ (void) params;
+ return AttributeVector::SearchContext::UP();
+ }
+public:
+ WeightedSetFloatExtAttribute(const vespalib::string & name);
+ virtual bool add(double v, int32_t w = 1);
+ virtual uint32_t get(DocId doc, AttributeVector::WeightedFloat * v, uint32_t sz) const;
+};
+
+class WeightedSetStringExtAttribute
+ : public WeightedSetExtAttributeBase<MultiStringExtAttribute>
+{
+private:
+ const char * getHelper(DocId doc, int idx) const {
+ return &_buffer[_offsets[_idx[doc] + idx]];
+ }
+ template <typename T>
+ uint32_t getAllHelper(DocId doc, T * v, uint32_t sz) const
+ {
+ uint32_t valueCount = _idx[doc + 1] - _idx[doc];
+ uint32_t num2Read = std::min(valueCount, sz);
+ for (uint32_t i = 0; i < num2Read; ++i) {
+ v[i] = T(getHelper(doc, i), getWeightHelper(doc, i));
+ }
+ return valueCount;
+ }
+
+public:
+ WeightedSetStringExtAttribute(const vespalib::string & name);
+ virtual bool add(const char * v, int32_t w = 1);
+ virtual uint32_t get(DocId doc, AttributeVector::WeightedString * v, uint32_t sz) const;
+ virtual uint32_t get(DocId doc, AttributeVector::WeightedConstChar * v, uint32_t sz) const;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp
new file mode 100644
index 00000000000..ba36774bf29
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp
@@ -0,0 +1,90 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "fixedsourceselector.h"
+#include "singlenumericattribute.hpp"
+
+namespace search {
+
+namespace {
+ attribute::Config getConfig() { return attribute::Config(attribute::BasicType::INT8); }
+}
+
+FixedSourceSelector::Iterator::Iterator(const FixedSourceSelector & sourceSelector) :
+ ISourceSelector::Iterator(sourceSelector._source),
+ _attributeGuard(sourceSelector._realSource)
+{
+}
+
+FixedSourceSelector::FixedSourceSelector(queryeval::Source defaultSource,
+ const vespalib::string & attrBaseFileName,
+ uint32_t initialNumDocs) :
+ SourceSelector(defaultSource, AttributeVector::SP(new SourceStore(attrBaseFileName, getConfig()))),
+ _source(static_cast<SourceStore &>(*_realSource))
+{
+ reserve(initialNumDocs);
+ _source.commit();
+}
+
+FixedSourceSelector::~FixedSourceSelector()
+{
+}
+
+FixedSourceSelector::UP
+FixedSourceSelector::cloneAndSubtract(const vespalib::string & attrBaseFileName,
+ uint32_t diff)
+{
+ queryeval::Source newDefault = getNewSource(getDefaultSource(), diff);
+ FixedSourceSelector::UP selector(new FixedSourceSelector(newDefault, attrBaseFileName, _source.getNumDocs()-1));
+ for (uint32_t docId = 0; docId < _source.getNumDocs(); ++docId) {
+ queryeval::Source src = _source.get(docId);
+ src = getNewSource(src, diff);
+ assert(src < SOURCE_LIMIT);
+ selector->_source.set(docId, src);
+ }
+ selector->_source.commit();
+ selector->setBaseId(getBaseId() + diff);
+ return selector;
+}
+
+FixedSourceSelector::UP
+FixedSourceSelector::load(const vespalib::string & baseFileName)
+{
+ LoadInfo::UP info = extractLoadInfo(baseFileName);
+ info->load();
+ FixedSourceSelector::UP selector(new FixedSourceSelector(
+ info->header()._defaultSource,
+ info->header()._baseFileName,
+ 0));
+ selector->setBaseId(info->header()._baseId);
+ selector->_source.load();
+ return selector;
+}
+
+void FixedSourceSelector::reserve(uint32_t numDocs)
+{
+ const uint32_t maxDoc(_source.getNumDocs());
+ const uint32_t newMaxDocIdPlussOne(numDocs + 1);
+ if (newMaxDocIdPlussOne > maxDoc) {
+ uint32_t newDocId(0);
+ for (_source.addDoc(newDocId); newDocId < numDocs; _source.addDoc(newDocId));
+ for (uint32_t i = maxDoc; i < newMaxDocIdPlussOne; ++i) {
+ _source.set(i, getDefaultSource());
+ }
+ }
+}
+
+void
+FixedSourceSelector::setSource(uint32_t docId, queryeval::Source source)
+{
+ assert(source < SOURCE_LIMIT);
+ /**
+ * Due to matchingloop advancing 1 past end, we need to initialize data that
+ * far too.
+ **/
+ reserve(docId+1);
+ _source.update(docId, source);
+ _source.commit();
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h
new file mode 100644
index 00000000000..3e787523909
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attributeguard.h"
+#include "sourceselector.h"
+
+namespace search {
+
+class FixedSourceSelector : public SourceSelector
+{
+private:
+ SourceStore & _source;
+ queryeval::Source getSource(uint32_t docId) const {
+ return _source.getFast(docId);
+ }
+ void reserve(uint32_t numDocs);
+
+public:
+ typedef std::unique_ptr<FixedSourceSelector> UP;
+ class Iterator : public ISourceSelector::Iterator {
+ private:
+ AttributeGuard _attributeGuard;
+ public:
+ Iterator(const FixedSourceSelector & sourceSelector);
+ };
+
+public:
+ FixedSourceSelector(queryeval::Source defaultSource,
+ const vespalib::string & attrBaseFileName,
+ uint32_t initialNumDocs = 0);
+ virtual ~FixedSourceSelector();
+
+ FixedSourceSelector::UP cloneAndSubtract(const vespalib::string & attrBaseFileName, uint32_t diff);
+ static FixedSourceSelector::UP load(const vespalib::string & baseFileName);
+
+ // Inherit doc from ISourceSelector
+ virtual void setSource(uint32_t docId, queryeval::Source source);
+ virtual uint32_t getDocIdLimit() const {
+ return _source.getNumDocs() - 1;
+ }
+ virtual ISourceSelector::Iterator::UP createIterator() const {
+ return ISourceSelector::Iterator::UP(new Iterator(*this));
+ }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp b/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp
new file mode 100644
index 00000000000..069a936c8ba
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp
@@ -0,0 +1,283 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "flagattribute.h"
+#include <vespa/searchlib/attribute/attributeiterators.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.attribute.flagattribute");
+
+#include <vespa/searchlib/attribute/multinumericattribute.hpp>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include "multivaluemapping.hpp"
+#include <vespa/searchlib/common/bitvectoriterator.h>
+
+namespace search {
+
+using queryeval::SearchIterator;
+namespace
+{
+
+
+template <class FA, typename T>
+class SaveBits
+{
+ const T *_map;
+ const size_t _mapSize;
+ FA &_fa;
+
+public:
+ SaveBits(const T *map,
+ const size_t mapSize,
+ FA &fa)
+ : _map(map),
+ _mapSize(mapSize),
+ _fa(fa)
+ {
+ }
+
+ void
+ save(uint32_t e, uint32_t docId, uint32_t vci, int32_t weight)
+ {
+ (void) vci;
+ (void) weight;
+ assert(e < _mapSize);
+ _fa.setNewBVValue(docId, _map[e]);
+ }
+};
+
+}
+
+
+template <typename B>
+FlagAttributeT<B>::FlagAttributeT(const vespalib::string & baseFileName, const AttributeVector::Config & cfg) :
+ B(baseFileName, cfg),
+ _bitVectorHolder(),
+ _bitVectorStore(256),
+ _bitVectors(256),
+ _bitVectorSize(cfg.getGrowStrategy().getDocsInitialCapacity())
+{
+}
+
+template <typename B>
+AttributeVector::SearchContext::UP
+FlagAttributeT<B>::getSearch(QueryTermSimple::UP qTerm,
+ const AttributeVector::SearchContext::Params & params) const
+{
+ (void) params;
+ return AttributeVector::SearchContext::UP (new SearchContext(std::move(qTerm), *this));
+}
+
+template <typename B>
+void FlagAttributeT<B>::clearOldValues(DocId doc)
+{
+ const typename B::WType * values(NULL);
+ for (uint32_t i(0), m(this->get(doc, values)); i < m; i++) {
+ BitVector * bv = _bitVectors[getOffset(values[i].value())];
+ if (bv != NULL) {
+ bv->clearBit(doc);
+ }
+ }
+}
+
+template <typename B>
+bool
+FlagAttributeT<B>::onLoadEnumerated(typename B::ReaderBase &attrReader)
+{
+ typedef typename B::WType::ValueType TT;
+
+ uint32_t numDocs = attrReader.getNumIdx() - 1;
+ uint64_t numValues = attrReader.getNumValues();
+ uint64_t enumCount = attrReader.getEnumCount();
+ assert(numValues == enumCount);
+ (void) enumCount;
+
+ this->setNumDocs(numDocs);
+ this->setCommittedDocIdLimit(numDocs);
+
+ if (numValues > 0)
+ _bitVectorSize = numDocs;
+
+ FileUtil::LoadedBuffer::UP udatBuffer(this->loadUDAT());
+ const TT *map = reinterpret_cast<const TT *>(udatBuffer->buffer());
+ assert((udatBuffer->size() % sizeof(TT)) == 0);
+ size_t mapSize = udatBuffer->size() / sizeof(TT);
+ SaveBits<FlagAttributeT<B>, TT> saver(map, mapSize, *this);
+ uint32_t maxvc = this->_mvMapping.fillMapped(attrReader,
+ numValues,
+ map,
+ mapSize,
+ saver,
+ this->getNumDocs(),
+ this->hasWeightedSetType());
+ this->checkSetMaxValueCount(maxvc);
+
+ return true;
+}
+
+template <typename B>
+bool FlagAttributeT<B>::onLoad()
+{
+ for (size_t i(0), m(_bitVectors.size()); i < m; i++) {
+ _bitVectorStore[i].reset();
+ _bitVectors[i] = NULL;
+ }
+ _bitVectorSize = 0;
+ return B::onLoad();
+}
+
+template <typename B>
+void FlagAttributeT<B>::setNewValues(DocId doc, const std::vector<typename B::WType> & values)
+{
+ B::setNewValues(doc, values);
+ if (_bitVectorSize == 0) { // attribute being loaded
+ _bitVectorSize = this->getNumDocs();
+ }
+ for (uint32_t i(0), m(values.size()); i < m; i++) {
+ typename B::WType value = values[i];
+ uint32_t offset = getOffset(value);
+ BitVector * bv = _bitVectors[offset];
+ if (bv == NULL) {
+ assert(_bitVectorSize >= this->getNumDocs());
+ _bitVectorStore[offset] = BitVector::create(_bitVectorSize);
+ _bitVectors[offset] = _bitVectorStore[offset].get();
+ bv = _bitVectors[offset];
+ bv->invalidateCachedCount();
+ ensureGuardBit(*bv);
+ }
+ bv->setBit(doc);
+ }
+}
+
+template <typename B>
+void
+FlagAttributeT<B>::setNewBVValue(DocId doc, typename B::WType::ValueType value)
+{
+ uint32_t offset = getOffset(value);
+ BitVector * bv = _bitVectors[offset];
+ if (bv == NULL) {
+ assert(_bitVectorSize >= this->getNumDocs());
+ _bitVectorStore[offset] = BitVector::create(_bitVectorSize);
+ _bitVectors[offset] = _bitVectorStore[offset].get();
+ bv = _bitVectors[offset];
+ bv->invalidateCachedCount();
+ ensureGuardBit(*bv);
+ }
+ bv->setBit(doc);
+}
+
+
+template <typename B>
+bool
+FlagAttributeT<B>::onAddDoc(DocId doc)
+{
+ bool retval = false;
+ if (doc >= _bitVectorSize) {
+ resizeBitVectors(this->getNumDocs());
+ retval = true;
+ } else {
+ ensureGuardBit();
+ }
+ std::atomic_thread_fence(std::memory_order_release);
+ clearGuardBit(doc);
+ return retval;
+}
+
+template <typename B>
+void
+FlagAttributeT<B>::ensureGuardBit(BitVector & bv)
+{
+ if (this->getNumDocs() < bv.size()) {
+ bv.setBit(this->getNumDocs()); // add guard bit to avoid scanning to the end during search
+ }
+}
+
+template <typename B>
+void
+FlagAttributeT<B>::ensureGuardBit()
+{
+ for (uint32_t i = 0; i < _bitVectors.size(); ++i) {
+ BitVector * bv = _bitVectors[i];
+ if (bv != NULL) {
+ ensureGuardBit(*bv);
+ }
+ }
+}
+
+template <typename B>
+void
+FlagAttributeT<B>::clearGuardBit(DocId doc)
+{
+ for (uint32_t i = 0; i < _bitVectors.size(); ++i) {
+ BitVector * bv = _bitVectors[i];
+ if (bv != NULL) {
+ bv->clearBit(doc); // clear guard bit and start using this doc id
+ }
+ }
+}
+
+template <typename B>
+void
+FlagAttributeT<B>::resizeBitVectors(uint32_t neededSize)
+{
+ const GrowStrategy & gs = this->getConfig().getGrowStrategy();
+ uint32_t newSize = neededSize + (neededSize * gs.getDocsGrowPercent() / 100) + gs.getDocsGrowDelta();
+ for (uint32_t i = 0; i < _bitVectors.size(); ++i) {
+ BitVector * bv = _bitVectors[i];
+ if (bv != NULL) {
+ vespalib::GenerationHeldBase::UP hold(bv->grow(newSize));
+ ensureGuardBit(*bv);
+ _bitVectorHolder.hold(std::move(hold));
+ }
+ }
+ _bitVectorSize = newSize;
+ _bitVectorHolder.transferHoldLists(this->getCurrentGeneration());
+}
+
+
+template <typename B>
+void
+FlagAttributeT<B>::removeOldGenerations(vespalib::GenerationHandler::generation_t firstUsed)
+{
+ B::removeOldGenerations(firstUsed);
+ _bitVectorHolder.trimHoldLists(firstUsed);
+}
+
+template <typename B>
+FlagAttributeT<B>::SearchContext::SearchContext(QueryTermSimple::UP qTerm, const FlagAttributeT<B> & toBeSearched) :
+ BaseSC(std::move(qTerm), toBeSearched),
+ _zeroHits(false)
+{
+}
+
+template <typename B>
+SearchIterator::UP
+FlagAttributeT<B>::SearchContext::createIterator(fef::TermFieldMatchData *
+ matchData,
+ bool strict)
+{
+ if (valid()) {
+ if (_low == _high) {
+ const Attribute & attr(static_cast<const Attribute &>(attribute()));
+ const BitVector * bv(attr.getBitVector(_low));
+ if (bv != NULL) {
+ return BitVectorIterator::create(bv, attr.getCommittedDocIdLimit(), *matchData, strict);
+ } else {
+ return SearchIterator::UP(new queryeval::EmptySearch());
+ }
+ } else {
+ SearchIterator::UP flagIterator(
+ strict
+ ? new FlagAttributeIteratorStrict<typename FlagAttributeT<B>::SearchContext>(*this, matchData)
+ : new FlagAttributeIteratorT<typename FlagAttributeT<B>::SearchContext>(*this, matchData));
+ return flagIterator;
+ }
+ } else {
+ return SearchIterator::UP(new queryeval::EmptySearch());
+ }
+}
+
+template class FlagAttributeT<FlagBaseImpl>;
+template class FlagAttributeT<HugeFlagBaseImpl>;
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/flagattribute.h b/searchlib/src/vespa/searchlib/attribute/flagattribute.h
new file mode 100644
index 00000000000..998cc54a44b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/flagattribute.h
@@ -0,0 +1,73 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/common/rcuvector.h>
+
+namespace search {
+
+typedef MultiValueNumericAttribute< IntegerAttributeTemplate<int8_t>, multivalue::MVMTemplateArg<multivalue::Value<int8_t>, multivalue::Index32> > FlagBaseImpl;
+typedef MultiValueNumericAttribute< IntegerAttributeTemplate<int8_t>, multivalue::MVMTemplateArg<multivalue::Value<int8_t>, multivalue::Index64> > HugeFlagBaseImpl;
+
+template <typename B>
+class FlagAttributeT : public B {
+public:
+ FlagAttributeT(const vespalib::string & baseFileName, const AttributeVector::Config & cfg);
+private:
+ typedef AttributeVector::DocId DocId;
+ typedef FlagBaseImpl::ArraySearchContext BaseSC;
+ class SearchContext : public BaseSC {
+ public:
+ typedef FlagAttributeT<B> Attribute;
+ SearchContext(QueryTermSimple::UP qTerm, const FlagAttributeT<B> & toBeSearched);
+
+ virtual std::unique_ptr<queryeval::SearchIterator>
+ createIterator(fef::TermFieldMatchData * matchData,
+ bool strict);
+
+ private:
+ bool _zeroHits;
+
+ template <class SC> friend class FlagAttributeIteratorT;
+ template <class SC> friend class FlagAttributeIteratorStrict;
+ };
+ virtual bool onLoad();
+
+ virtual bool
+ onLoadEnumerated(typename B::ReaderBase &attrReader);
+
+ AttributeVector::SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override;
+ virtual void clearOldValues(DocId doc);
+ virtual void setNewValues(DocId doc, const std::vector<typename B::WType> & values);
+
+public:
+ void
+ setNewBVValue(DocId doc, typename B::WType::ValueType value);
+
+private:
+ virtual bool onAddDoc(DocId doc);
+ void ensureGuardBit(BitVector & bv);
+ void ensureGuardBit();
+ void clearGuardBit(DocId doc);
+ void resizeBitVectors(uint32_t neededSize);
+ void removeOldGenerations(vespalib::GenerationHandler::generation_t firstUsed);
+ uint32_t getOffset(int8_t value) const { return value + 128; }
+ BitVector * getBitVector(typename B::BaseType value) const {
+ return _bitVectors[value + 128];
+ }
+
+ vespalib::GenerationHolder _bitVectorHolder;
+ std::vector<std::shared_ptr<BitVector> > _bitVectorStore;
+ std::vector<BitVector *> _bitVectors;
+ uint32_t _bitVectorSize;
+ template <class SC> friend class FlagAttributeIteratorT;
+ template <class SC> friend class FlagAttributeIteratorStrict;
+};
+
+typedef FlagAttributeT<FlagBaseImpl> FlagAttribute;
+typedef FlagAttributeT<HugeFlagBaseImpl> HugeFlagAttribute;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/floatbase.cpp b/searchlib/src/vespa/searchlib/attribute/floatbase.cpp
new file mode 100644
index 00000000000..8532302a70c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/floatbase.cpp
@@ -0,0 +1,91 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "floatbase.h"
+#include <vespa/searchlib/common/sort.h>
+
+LOG_SETUP(".searchlib.attribute.floatbase");
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+
+
+namespace search {
+
+IMPLEMENT_IDENTIFIABLE_ABSTRACT(FloatingPointAttribute, NumericAttribute);
+
+FloatingPointAttribute::FloatingPointAttribute(const vespalib::string & name, const Config & c) :
+ NumericAttribute(name, c),
+ _changes()
+{
+}
+
+uint32_t FloatingPointAttribute::clearDoc(DocId doc)
+{
+ uint32_t removed(0);
+ if (hasMultiValue() && (doc < getNumDocs())) {
+ removed = getValueCount(doc);
+ }
+ AttributeVector::clearDoc(_changes, doc);
+
+ return removed;
+}
+
+uint32_t FloatingPointAttribute::get(DocId doc, WeightedString * s, uint32_t sz) const
+{
+ WeightedFloat * v = new WeightedFloat[sz];
+ unsigned num(static_cast<const AttributeVector *>(this)->get(doc, v, sz));
+ for(unsigned i(0); i < num; i++) {
+ char tmp[32];
+ snprintf(tmp, sizeof(tmp), "%g", v[i].getValue());
+ s[i] = WeightedString(tmp, v[i].getWeight());
+ }
+ delete [] v;
+ return num;
+}
+
+uint32_t FloatingPointAttribute::get(DocId doc, WeightedConstChar * v, uint32_t sz) const
+{
+ (void) doc;
+ (void) v;
+ (void) sz;
+ return 0;
+}
+
+uint32_t FloatingPointAttribute::get(DocId doc, vespalib::string * s, uint32_t sz) const
+{
+ double * v = new double[sz];
+ unsigned num(static_cast<const AttributeVector *>(this)->get(doc, v, sz));
+ for(unsigned i(0); i < num; i++) {
+ char tmp[32];
+ snprintf(tmp, sizeof(tmp), "%g", v[i]);
+ s[i] = tmp;
+ }
+ delete [] v;
+ return num;
+}
+
+uint32_t FloatingPointAttribute::get(DocId doc, const char ** v, uint32_t sz) const
+{
+ (void) doc;
+ (void) v;
+ (void) sz;
+ return 0;
+}
+
+bool FloatingPointAttribute::applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust)
+{
+ double v = fv.getAsDouble();
+ return AttributeVector::adjustWeight(_changes, doc, NumericChangeData<double>(v), wAdjust);
+}
+
+bool FloatingPointAttribute::apply(DocId doc, const ArithmeticValueUpdate & op)
+{
+ bool retval(doc < getNumDocs());
+ if (retval) {
+ retval = AttributeVector::applyArithmetic(_changes, doc, NumericChangeData<double>(0), op);
+ }
+ return retval;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/floatbase.h b/searchlib/src/vespa/searchlib/attribute/floatbase.h
new file mode 100644
index 00000000000..e802fd9675a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/floatbase.h
@@ -0,0 +1,123 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/attribute/numericbase.h>
+#include <vespa/searchlib/attribute/multivalue.h>
+#include <vespa/searchlib/attribute/loadednumericvalue.h>
+#include <vespa/searchlib/attribute/changevector.h>
+
+namespace search {
+
+// forward declaration of class in enumstore.h
+template <typename T>
+class NumericEntryType;
+
+class FloatingPointAttribute : public NumericAttribute
+{
+public:
+ DECLARE_IDENTIFIABLE_ABSTRACT(FloatingPointAttribute);
+ template<typename Accessor>
+ bool append(DocId doc, Accessor & ac) {
+ return AttributeVector::append(_changes, doc, ac);
+ }
+ bool append(DocId doc, double v, int32_t weight) {
+ return AttributeVector::append(_changes, doc, NumericChangeData<double>(v), weight);
+ }
+ bool remove(DocId doc, double v, int32_t weight) {
+ return AttributeVector::remove(_changes, doc, NumericChangeData<double>(v), weight);
+ }
+ bool update(DocId doc, double v) {
+ return AttributeVector::update(_changes, doc, NumericChangeData<double>(v));
+ }
+ bool apply(DocId doc, const ArithmeticValueUpdate & op);
+ virtual bool applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust);
+ virtual uint32_t clearDoc(DocId doc);
+protected:
+ virtual const char * getString(DocId doc, char * s, size_t sz) const { double v = getFloat(doc); snprintf(s, sz, "%g", v); return s; }
+ FloatingPointAttribute(const vespalib::string & name, const Config & c);
+ typedef ChangeTemplate<NumericChangeData<double> > Change;
+ typedef ChangeVectorT< Change > ChangeVector;
+ ChangeVector _changes;
+
+private:
+ virtual uint32_t get(DocId doc, vespalib::string * v, uint32_t sz) const;
+ virtual uint32_t get(DocId doc, const char ** v, uint32_t sz) const;
+ virtual uint32_t get(DocId doc, WeightedString * v, uint32_t sz) const;
+ virtual uint32_t get(DocId doc, WeightedConstChar * v, uint32_t sz) const;
+ virtual double getFloatFromEnum(EnumHandle e) const = 0;
+};
+
+template<typename T>
+class FloatingPointAttributeTemplate : public FloatingPointAttribute
+{
+public:
+ typedef WeightedType<T> Weighted;
+ virtual uint32_t getAll(DocId doc, T * v, uint32_t sz) const = 0;
+ virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz) const = 0;
+protected:
+ typedef NumericEntryType<T> EnumEntryType;
+
+ typedef attribute::LoadedNumericValue<T> LoadedNumericValueT;
+
+public:
+ typedef T BaseType;
+ typedef T LoadedValueType;
+ typedef SequentialReadModifyWriteInterface<LoadedNumericValueT> LoadedVector;
+ virtual uint32_t getRawValues(DocId doc, const multivalue::Value<T> * & values) const {
+ (void) doc;
+ (void) values;
+ throw std::runtime_error(getNativeClassName() + "::getRawValues() not implemented.");
+ }
+
+protected:
+ FloatingPointAttributeTemplate(const vespalib::string & name) :
+ FloatingPointAttribute(name, BasicType::fromType(T())),
+ _defaultValue(ChangeBase::UPDATE, 0, attribute::getUndefined<T>())
+ { }
+ FloatingPointAttributeTemplate(const vespalib::string & name, const Config & c) :
+ FloatingPointAttribute(name, c),
+ _defaultValue(ChangeBase::UPDATE, 0, attribute::getUndefined<T>())
+ { assert(c.basicType() == BasicType::fromType(T())); }
+ static T defaultValue() { return attribute::getUndefined<T>(); }
+ virtual bool findEnum(T v, EnumHandle & e) const = 0;
+ virtual largeint_t getDefaultValue() const { return static_cast<largeint_t>(-std::numeric_limits<T>::max()); }
+ Change _defaultValue;
+private:
+ virtual bool findEnum(const char *value, EnumHandle &e) const {
+ vespalib::asciistream iss(value);
+ T fvalue = 0;
+ try {
+ iss >> fvalue;
+ } catch (const vespalib::IllegalArgumentException &) {
+ }
+ return findEnum(fvalue, e);
+ }
+ virtual bool isUndefined(DocId doc) const { return attribute::isUndefined(get(doc)); }
+ virtual T get(DocId doc) const = 0;
+ virtual T getFromEnum(EnumHandle e) const = 0;
+
+ virtual double getFloatFromEnum(EnumHandle e) const { return getFromEnum(e); }
+ virtual long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const {
+ (void) bc;
+ if (available >= long(sizeof(T))) {
+ T origValue(get(doc));
+ vespalib::serializeForSort< vespalib::convertForSort<T, true> >(origValue, serTo);
+ } else {
+ return -1;
+ }
+ return sizeof(T);
+ }
+ virtual long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const {
+ (void) bc;
+ if (available >= long(sizeof(T))) {
+ T origValue(get(doc));
+ vespalib::serializeForSort< vespalib::convertForSort<T, false> >(origValue, serTo);
+ } else {
+ return -1;
+ }
+ return sizeof(T);
+ }
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.cpp
new file mode 100644
index 00000000000..2b1d4a01838
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.cpp
@@ -0,0 +1,4 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "i_document_weight_attribute.h"
diff --git a/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h
new file mode 100644
index 00000000000..71d6e94bb87
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/btree/entryref.h>
+#include <vespa/searchlib/btree/btreeiterator.h>
+
+namespace search {
+
+namespace query { class Node; }
+
+typedef btree::BTreeConstIterator<uint32_t, int32_t, btree::MinMaxAggregated, std::less<uint32_t>, btree::BTreeDefaultTraits> DocumentWeightIterator;
+
+struct IDocumentWeightAttribute
+{
+ struct LookupResult {
+ const btree::EntryRef posting_idx;
+ const uint32_t posting_size;
+ const int32_t min_weight;
+ const int32_t max_weight;
+ LookupResult() : posting_idx(), posting_size(0), min_weight(0), max_weight(0) {}
+ LookupResult(btree::EntryRef posting_idx_in, uint32_t posting_size_in, int32_t min_weight_in, int32_t max_weight_in)
+ : posting_idx(posting_idx_in), posting_size(posting_size_in), min_weight(min_weight_in), max_weight(max_weight_in) {}
+ };
+ virtual LookupResult lookup(const vespalib::string &term) const = 0;
+ virtual void create(btree::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const = 0;
+ virtual DocumentWeightIterator create(btree::EntryRef idx) const = 0;
+ virtual ~IDocumentWeightAttribute() {}
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h b/searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h
new file mode 100644
index 00000000000..ec0fbf8e6a4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/data/databuffer.h>
+
+namespace search
+{
+
+class BufferWriter;
+
+/*
+ * Interface class to write to a single attribute vector file. Used by
+ * IAttributSaver.
+ */
+class IAttributeFileWriter
+{
+public:
+ using BufferBuf = vespalib::MMapDataBuffer;
+ using Buffer = std::unique_ptr<BufferBuf>;
+
+ virtual ~IAttributeFileWriter() = default;
+
+ /*
+ * Allocate a buffer that can later be passed on to writeBuf.
+ */
+ virtual Buffer allocBuf(size_t size) = 0;
+
+ /**
+ * Writes the given data. Multiple calls are allowed, but only the
+ * last call can provide an unaligned buffer.
+ **/
+ virtual void writeBuf(Buffer buf) = 0;
+
+ virtual std::unique_ptr<BufferWriter> allocBufferWriter() = 0;
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/iattributemanager.cpp b/searchlib/src/vespa/searchlib/attribute/iattributemanager.cpp
new file mode 100644
index 00000000000..0141534b1ba
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/iattributemanager.cpp
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "iattributemanager.h"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.iattributemanager");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/iattributemanager.h b/searchlib/src/vespa/searchlib/attribute/iattributemanager.h
new file mode 100644
index 00000000000..7e3ab9c164c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/iattributemanager.h
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attributeguard.h"
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vector>
+
+namespace search {
+
+/**
+ * This is an interface used to access all registered attribute vectors.
+ **/
+class IAttributeManager {
+public:
+ typedef std::shared_ptr<IAttributeManager> SP;
+ typedef vespalib::string string;
+
+ /**
+ * Returns a view of the attribute vector with the given name.
+ *
+ * @param name name of the attribute vector.
+ * @return view of the attribute vector or empty view if the attribute vector does not exists.
+ **/
+ virtual AttributeGuard::UP getAttribute(const string & name) const = 0;
+
+ /**
+ * Returns a view of the attribute vector with the given name.
+ * Makes sure that the underlying enum values are stable during the use of this attribute vector.
+ *
+ * @param name name of the attribute vector.
+ * @return view of the attribute vector or empty view if the attribute vector does not exists.
+ **/
+ virtual AttributeGuard::UP getAttributeStableEnum(const string & name) const = 0;
+
+ /**
+ * Fill the given list with all attribute vectors registered in this manager.
+ *
+ * @param list the list to fill in attribute vectors.
+ **/
+ virtual void getAttributeList(std::vector<AttributeGuard> & list) const = 0;
+
+ /**
+ * Creates a per thread attribute context used to provide read access to attributes.
+ *
+ * @return the attribute context
+ **/
+ virtual attribute::IAttributeContext::UP createContext() const = 0;
+
+ /**
+ * Virtual destructor to allow safe subclassing.
+ **/
+ virtual ~IAttributeManager() {}
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.cpp b/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.cpp
new file mode 100644
index 00000000000..e0f240dc15f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.cpp
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "iattributesavetarget.h"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.iattributesavetarget");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h b/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h
new file mode 100644
index 00000000000..d3e74ef4885
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h
@@ -0,0 +1,161 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/data/databuffer.h>
+#include <stdint.h>
+#include "iattributefilewriter.h"
+
+namespace search {
+
+/**
+ * Interface used for saving an attribute vector.
+ **/
+class IAttributeSaveTarget {
+public:
+ /**
+ * Config class used by actual saveTarget implementations.
+ **/
+ class Config {
+ private:
+ vespalib::string _fileName;
+ vespalib::string _basicType;
+ vespalib::string _collectionType;
+ vespalib::string _tensorType;
+ bool _hasMultiValue;
+ bool _hasWeightedSetType;
+ bool _enumerated;
+ uint32_t _numDocs;
+ uint32_t _fixedWidth;
+ uint64_t _uniqueValueCount;
+ uint64_t _totalValueCount;
+ uint64_t _createSerialNum;
+ uint32_t _version;
+ public:
+ Config()
+ : _fileName(""),
+ _basicType(""),
+ _collectionType(""),
+ _hasMultiValue(false),
+ _hasWeightedSetType(false),
+ _enumerated(false),
+ _numDocs(0),
+ _fixedWidth(0),
+ _uniqueValueCount(0),
+ _totalValueCount(0),
+ _createSerialNum(0u),
+ _version(0)
+ {
+ }
+
+ Config(const vespalib::string &fileName,
+ const vespalib::string &basicType,
+ const vespalib::string &collectionType,
+ const vespalib::string &tensorType,
+ bool multiValue, bool weightedSetType,
+ bool enumerated,
+ uint32_t numDocs,
+ uint32_t fixedWidth,
+ uint64_t uniqueValueCount,
+ uint64_t totalValueCount,
+ uint64_t createSerialNum,
+ uint32_t version
+ )
+ : _fileName(fileName),
+ _basicType(basicType),
+ _collectionType(collectionType),
+ _tensorType(tensorType),
+ _hasMultiValue(multiValue),
+ _hasWeightedSetType(weightedSetType),
+ _enumerated(enumerated),
+ _numDocs(numDocs),
+ _fixedWidth(fixedWidth),
+ _uniqueValueCount(uniqueValueCount),
+ _totalValueCount(totalValueCount),
+ _createSerialNum(createSerialNum),
+ _version(version)
+ {
+ }
+ const vespalib::string & getFileName() const { return _fileName; }
+
+ const vespalib::string &
+ getBasicType() const
+ {
+ return _basicType;
+ }
+
+ const vespalib::string &
+ getCollectionType() const
+ {
+ return _collectionType;
+ }
+
+ const vespalib::string &getTensorType() const {
+ return _tensorType;
+ }
+
+ bool hasMultiValue() const { return _hasMultiValue; }
+ bool hasWeightedSetType() const { return _hasWeightedSetType; }
+ uint32_t getNumDocs() const { return _numDocs; }
+ size_t getFixedWidth() const { return _fixedWidth; }
+
+ uint64_t
+ getUniqueValueCount(void) const
+ {
+ return _uniqueValueCount;
+ }
+
+ uint64_t
+ getTotalValueCount(void) const
+ {
+ return _totalValueCount;
+ }
+
+ bool
+ getEnumerated(void) const
+ {
+ return _enumerated;
+ }
+
+ uint64_t
+ getCreateSerialNum(void) const
+ {
+ return _createSerialNum;
+ }
+
+ uint32_t getVersion() const { return _version; }
+ };
+ using Buffer = IAttributeFileWriter::Buffer;
+protected:
+ Config _cfg;
+public:
+ IAttributeSaveTarget() : _cfg() {}
+ void setConfig(const Config & cfg) { _cfg = cfg; }
+
+ bool
+ getEnumerated(void) const
+ {
+ return _cfg.getEnumerated();
+ }
+
+ /**
+ * Setups this saveTarget before any data is written. Returns true
+ * on success.
+ **/
+ virtual bool setup() = 0;
+ /**
+ * Closes this saveTarget when all data is written.
+ **/
+ virtual void close() = 0;
+
+ virtual IAttributeFileWriter &datWriter() = 0;
+ virtual IAttributeFileWriter &idxWriter() = 0;
+ virtual IAttributeFileWriter &weightWriter() = 0;
+ virtual IAttributeFileWriter &udatWriter() = 0;
+
+ virtual ~IAttributeSaveTarget() {}
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/integerbase.cpp b/searchlib/src/vespa/searchlib/attribute/integerbase.cpp
new file mode 100644
index 00000000000..af49b222b04
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/integerbase.cpp
@@ -0,0 +1,90 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "integerbase.h"
+#include <vespa/searchlib/common/sort.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.integerbase");
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+
+namespace search {
+
+IMPLEMENT_IDENTIFIABLE_ABSTRACT(IntegerAttribute, NumericAttribute);
+
+IntegerAttribute::IntegerAttribute(const vespalib::string & name, const Config & c) :
+ NumericAttribute(name, c),
+ _changes()
+{
+}
+
+uint32_t IntegerAttribute::clearDoc(DocId doc)
+{
+ uint32_t removed(0);
+ if (hasMultiValue() && (doc < getNumDocs())) {
+ removed = getValueCount(doc);
+ }
+ AttributeVector::clearDoc(_changes, doc);
+
+ return removed;
+}
+
+uint32_t IntegerAttribute::get(DocId doc, WeightedString * s, uint32_t sz) const
+{
+ WeightedInt * v = new WeightedInt[sz];
+ unsigned num(static_cast<const AttributeVector *>(this)->get(doc, v, sz));
+ for(unsigned i(0); i < num; i++) {
+ char tmp[32];
+ snprintf(tmp, sizeof(tmp), "%" PRId64, v[i].getValue());
+ s[i] = WeightedString(tmp, v[i].getWeight());
+ }
+ delete [] v;
+ return num;
+}
+
+uint32_t IntegerAttribute::get(DocId doc, WeightedConstChar * v, uint32_t sz) const
+{
+ (void) doc;
+ (void) v;
+ (void) sz;
+ return 0;
+}
+
+uint32_t IntegerAttribute::get(DocId doc, vespalib::string * s, uint32_t sz) const
+{
+ largeint_t * v = new largeint_t[sz];
+ unsigned num(static_cast<const AttributeVector *>(this)->get(doc, v, sz));
+ for(unsigned i(0); i < num; i++) {
+ char tmp[32];
+ snprintf(tmp, sizeof(tmp), "%" PRId64, v[i]);
+ s[i] = tmp;
+ }
+ delete [] v;
+ return num;
+}
+
+uint32_t IntegerAttribute::get(DocId doc, const char ** v, uint32_t sz) const
+{
+ (void) doc;
+ (void) v;
+ (void) sz;
+ return 0;
+}
+
+bool IntegerAttribute::applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust)
+{
+ largeint_t v = fv.getAsLong();
+ return AttributeVector::adjustWeight(_changes, doc, NumericChangeData<largeint_t>(v), wAdjust);
+}
+
+bool IntegerAttribute::apply(DocId doc, const ArithmeticValueUpdate & op)
+{
+ bool retval(doc < getNumDocs());
+ if (retval) {
+ retval = AttributeVector::applyArithmetic(_changes, doc, NumericChangeData<largeint_t>(0), op);
+ }
+ return retval;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/integerbase.h b/searchlib/src/vespa/searchlib/attribute/integerbase.h
new file mode 100644
index 00000000000..702bab0772c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/integerbase.h
@@ -0,0 +1,136 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/attribute/numericbase.h>
+#include <vespa/searchlib/attribute/multivalue.h>
+#include <vespa/searchlib/attribute/loadednumericvalue.h>
+#include <vespa/searchlib/attribute/changevector.h>
+
+namespace search {
+
+// forward declaration of class in enumstore.h
+template <typename T>
+class NumericEntryType;
+
+class IntegerAttribute : public NumericAttribute
+{
+public:
+ DECLARE_IDENTIFIABLE_ABSTRACT(IntegerAttribute);
+ bool update(DocId doc, largeint_t v) {
+ return AttributeVector::update(_changes, doc, NumericChangeData<largeint_t>(v));
+ }
+ template<typename Accessor>
+ bool append(DocId doc, Accessor & ac) {
+ return AttributeVector::append(_changes, doc, ac);
+ }
+ bool append(DocId doc, largeint_t v, int32_t weight) {
+ return AttributeVector::append(_changes, doc, NumericChangeData<largeint_t>(v), weight);
+ }
+ bool remove(DocId doc, largeint_t v, int32_t weight) {
+ return AttributeVector::remove(_changes, doc, NumericChangeData<largeint_t>(v), weight);
+ }
+ bool apply(DocId doc, const ArithmeticValueUpdate & op);
+ virtual bool applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust);
+ virtual uint32_t clearDoc(DocId doc);
+protected:
+ IntegerAttribute(const vespalib::string & name, const Config & c);
+ typedef ChangeTemplate<NumericChangeData<largeint_t> > Change;
+ typedef ChangeVectorT< Change > ChangeVector;
+ ChangeVector _changes;
+
+private:
+ virtual const char * getString(DocId doc, char * s, size_t sz) const { largeint_t v = getInt(doc); snprintf(s, sz, "%" PRId64, v); return s; }
+ virtual uint32_t get(DocId doc, vespalib::string * v, uint32_t sz) const;
+ virtual uint32_t get(DocId doc, const char ** v, uint32_t sz) const;
+ virtual uint32_t get(DocId doc, WeightedString * v, uint32_t sz) const;
+ virtual uint32_t get(DocId doc, WeightedConstChar * v, uint32_t sz) const;
+ virtual largeint_t getIntFromEnum(EnumHandle e) const = 0;
+};
+
+template<typename T>
+class IntegerAttributeTemplate : public IntegerAttribute
+{
+public:
+ typedef WeightedType<T> Weighted;
+ virtual uint32_t getAll(DocId doc, T * v, uint32_t sz) const = 0;
+ virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz) const = 0;
+protected:
+ typedef NumericEntryType<T> EnumEntryType;
+ typedef attribute::LoadedNumericValue<T> LoadedNumericValueT;
+public:
+ typedef T BaseType;
+ typedef T LoadedValueType;
+ typedef SequentialReadModifyWriteInterface<LoadedNumericValueT> LoadedVector;
+ virtual uint32_t getRawValues(DocId doc, const multivalue::Value<T> * & values) const {
+ (void) doc;
+ (void) values;
+ throw std::runtime_error(getNativeClassName() + "::getRawValues() not implemented.");
+ }
+
+protected:
+ IntegerAttributeTemplate(const vespalib::string & name) :
+ IntegerAttribute(name, BasicType::fromType(T())),
+ _defaultValue(ChangeBase::UPDATE, 0, defaultValue())
+ { }
+ IntegerAttributeTemplate(const vespalib::string & name, const Config & c) :
+ IntegerAttribute(name, c),
+ _defaultValue(ChangeBase::UPDATE, 0, defaultValue())
+ { assert(c.basicType() == BasicType::fromType(T())); }
+ IntegerAttributeTemplate(const vespalib::string & name,
+ const Config & c,
+ const BasicType &realType)
+ : IntegerAttribute(name, c),
+ _defaultValue(ChangeBase::UPDATE, 0, 0u)
+ {
+ assert(c.basicType() == realType);
+ (void) realType;
+ assert(BasicType::fromType(T()) == BasicType::INT8);
+ }
+ static T defaultValue() { return attribute::getUndefined<T>(); }
+ virtual bool findEnum(T v, EnumHandle & e) const = 0;
+ virtual largeint_t getDefaultValue() const { return defaultValue(); }
+ virtual bool isUndefined(DocId doc) const { return get(doc) == defaultValue(); }
+ Change _defaultValue;
+private:
+ virtual bool findEnum(const char *value, EnumHandle &e) const {
+ vespalib::asciistream iss(value);
+ int64_t ivalue = 0;
+ try {
+ iss >> ivalue;
+ } catch (const vespalib::IllegalArgumentException &) {
+ }
+ return findEnum(ivalue, e);
+ }
+ virtual T get(DocId doc) const = 0;
+ virtual T getFromEnum(EnumHandle e) const = 0;
+ virtual largeint_t getIntFromEnum(EnumHandle e) const {
+ T v(getFromEnum(e));
+ if (attribute::isUndefined<T>(v)) {
+ return attribute::getUndefined<largeint_t>();
+ }
+ return v;
+ }
+ virtual long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const {
+ (void) bc;
+ if (available >= long(sizeof(T))) {
+ T origValue(get(doc));
+ vespalib::serializeForSort< vespalib::convertForSort<T, true> >(origValue, serTo);
+ } else {
+ return -1;
+ }
+ return sizeof(T);
+ }
+ virtual long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const {
+ (void) bc;
+ if (available >= long(sizeof(T))) {
+ T origValue(get(doc));
+ vespalib::serializeForSort< vespalib::convertForSort<T, false> >(origValue, serTo);
+ } else {
+ return -1;
+ }
+ return sizeof(T);
+ }
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/interlock.h b/searchlib/src/vespa/searchlib/attribute/interlock.h
new file mode 100644
index 00000000000..3600fe70bce
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/interlock.h
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <mutex>
+
+namespace search
+{
+
+namespace attribute
+{
+
+class InterlockGuard;
+
+/**
+ * Class used to serialize getting enum change exclusive lock. This
+ * eliminates the need for defining a locking order when getting enum
+ * change shared locks. Scenario avoided is:
+ *
+ * Threads T1, T2: Grouping queries
+ * Threads T3, T4: Attribute writer threads
+ *
+ * Thread T1 gets shared lock on A1
+ * Thread T2 gets shared lock on A2
+ * Theead T3 tries to get exclusive lock on A1
+ * Theead T4 tries to get exclusive lock on A2
+ * Thread T1 tries to get shared lock on A2
+ * Thread T2 tries to get shared lock on A1
+ *
+ * With the interlock properly used, thread T3 will hold the
+ * interlock, preventing thread T4 from registering intent to get
+ * write lock on A2, thus thread T1 can get a shared lock on A2 and complete.
+ */
+class Interlock {
+ std::mutex _mutex;
+ friend class InterlockGuard;
+public:
+ Interlock()
+ : _mutex()
+ {
+ }
+
+ virtual ~Interlock() { }
+};
+
+/**
+ * Class used to serialize getting enum change exclusive lock. The guard
+ * is passed to EnumModifier constructor to signal that interlock is held.
+ */
+class InterlockGuard
+{
+ std::lock_guard<std::mutex> _guard;
+public:
+ InterlockGuard(Interlock &interlock)
+ : _guard(interlock._mutex)
+ {
+ }
+
+ ~InterlockGuard() { }
+};
+
+
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h b/searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h
new file mode 100644
index 00000000000..b4dbf78f64c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+
+namespace search
+{
+
+namespace attribute
+{
+
+class IPostingListAttributeBase
+{
+public:
+ virtual
+ ~IPostingListAttributeBase()
+ {
+ }
+
+ virtual void
+ clearPostings(IAttributeVector::EnumHandle eidx,
+ uint32_t fromLid,
+ uint32_t toLid) = 0;
+
+ virtual void
+ forwardedShrinkLidSpace(uint32_t newSize) = 0;
+};
+
+
+} // namespace attribute
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.cpp b/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.cpp
new file mode 100644
index 00000000000..89927def124
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.cpp
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "ipostinglistsearchcontext.h"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.ipostinglistsearchcontext");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.h
new file mode 100644
index 00000000000..48f1d1f509a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.h
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+
+namespace search
+{
+
+namespace queryeval
+{
+
+class SearchIterator;
+}
+
+namespace fef
+{
+
+class TermFieldMatchData;
+}
+
+
+namespace attribute
+{
+
+
+/**
+ * Interface for search context helper classes to create attribute
+ * search iterators based on posting lists and using dictionary
+ * information to better estimate number of hits. Also used for
+ * enumerated attributes without posting lists to eliminate brute
+ * force searches for nonexisting values.
+ */
+
+class IPostingListSearchContext
+{
+protected:
+
+ IPostingListSearchContext(void)
+ {
+ }
+
+ virtual
+ ~IPostingListSearchContext(void)
+ {
+ }
+
+public:
+ virtual void
+ fetchPostings(bool strict) = 0;
+
+ virtual std::unique_ptr<queryeval::SearchIterator>
+ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) = 0;
+
+ virtual unsigned int
+ approximateHits(void) const = 0;
+};
+
+
+} // namespace attribute
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp b/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp
new file mode 100644
index 00000000000..31844ec3a02
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "iterator_pack.h"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.iterator_pack");
+namespace search {
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/iterator_pack.h b/searchlib/src/vespa/searchlib/attribute/iterator_pack.h
new file mode 100644
index 00000000000..fa4b64599f6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/iterator_pack.h
@@ -0,0 +1,56 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "i_document_weight_attribute.h"
+#include <vespa/searchlib/queryeval/begin_and_end_id.h>
+#include <assert.h>
+
+namespace search {
+
+class AttributeIteratorPack
+{
+private:
+ std::vector<DocumentWeightIterator> _children;
+
+public:
+ AttributeIteratorPack() : _children() {}
+ AttributeIteratorPack(AttributeIteratorPack &&rhs)
+ : _children(std::move(rhs._children)) {}
+
+ AttributeIteratorPack &operator=(AttributeIteratorPack &&rhs) {
+ _children = std::move(rhs._children);
+ return *this;
+ }
+
+ explicit AttributeIteratorPack(std::vector<DocumentWeightIterator> &&children)
+ : _children(std::move(children)) {}
+
+ uint32_t get_docid(uint16_t ref) const {
+ return _children[ref].valid() ? _children[ref].getKey() : endDocId;
+ }
+
+ uint32_t seek(uint16_t ref, uint32_t docid) {
+ _children[ref].linearSeek(docid);
+ if (__builtin_expect(_children[ref].valid(), true)) {
+ return _children[ref].getKey();
+ }
+ return endDocId;
+ }
+
+ int32_t get_weight(uint16_t ref, uint32_t) {
+ return _children[ref].getData();
+ }
+
+ size_t size() const { return _children.size(); }
+ void initRange(uint32_t begin, uint32_t end) {
+ (void) end;
+ for (auto &child: _children) {
+ child.lower_bound(begin);
+ }
+ }
+};
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp
new file mode 100644
index 00000000000..d86e41d332e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp
@@ -0,0 +1,28 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "loadedenumvalue.h"
+#include <vespa/searchlib/common/sort.h>
+
+
+namespace search
+{
+
+namespace attribute
+{
+
+void
+sortLoadedByEnum(LoadedEnumAttributeVector &loaded)
+{
+ ShiftBasedRadixSorter<LoadedEnumAttribute,
+ LoadedEnumAttribute::EnumRadix,
+ LoadedEnumAttribute::EnumCompare, 56>::
+ radix_sort(LoadedEnumAttribute::EnumRadix(),
+ LoadedEnumAttribute::EnumCompare(),
+ &loaded[0], loaded.size(), 16);
+}
+
+} // namespace attribute
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h
new file mode 100644
index 00000000000..47b7eab1b83
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h
@@ -0,0 +1,177 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/array.h>
+#include <vespa/searchlib/attribute/enumstorebase.h>
+
+namespace search
+{
+
+namespace attribute
+{
+
+/**
+ * Temporary representation of enumerated attribute loaded from enumerated
+ * save file.
+ */
+
+class LoadedEnumAttribute
+{
+private:
+ uint32_t _enum;
+ uint32_t _docId;
+ uint32_t _idx;
+ int32_t _weight;
+
+public:
+ class EnumRadix
+ {
+ public:
+ uint64_t
+ operator()(const LoadedEnumAttribute &v)
+ {
+ return (static_cast<uint64_t>(v._enum) << 32) | v._docId;
+ }
+ };
+
+ class EnumCompare : public std::binary_function<LoadedEnumAttribute,
+ LoadedEnumAttribute,
+ bool>
+ {
+ public:
+ bool
+ operator()(const LoadedEnumAttribute &x,
+ const LoadedEnumAttribute &y) const
+ {
+ if (x.getEnum() != y.getEnum())
+ return x.getEnum() < y.getEnum();
+ return x.getDocId() < y.getDocId();
+ }
+ };
+
+ LoadedEnumAttribute(void)
+ : _enum(0),
+ _docId(0),
+ _idx(0),
+ _weight(1)
+ {
+ }
+
+ LoadedEnumAttribute(uint32_t e,
+ uint32_t docId,
+ uint32_t idx,
+ int32_t weight)
+ : _enum(e),
+ _docId(docId),
+ _idx(idx),
+ _weight(weight)
+ {
+ }
+
+ uint32_t
+ getEnum(void) const
+ {
+ return _enum;
+ }
+
+ uint32_t
+ getDocId(void) const
+ {
+ return _docId;
+ }
+
+ uint32_t
+ getIdx(void) const
+ {
+ return _idx;
+ }
+
+ int32_t
+ getWeight(void) const
+ {
+ return _weight;
+ }
+};
+
+typedef vespalib::Array<LoadedEnumAttribute, vespalib::DefaultAlloc> LoadedEnumAttributeVector;
+
+
+/**
+ * Helper class used to populate temporary vector representing loaded
+ * enumerated attribute with posting lists loaded from enumerated save
+ * file.
+ */
+
+class SaveLoadedEnum
+{
+private:
+ LoadedEnumAttributeVector &_loaded;
+
+public:
+ SaveLoadedEnum(LoadedEnumAttributeVector &loaded)
+ : _loaded(loaded)
+ {
+ }
+
+ void
+ save(uint32_t e, uint32_t docId, uint32_t vci, int32_t weight)
+ {
+ _loaded.push_back(LoadedEnumAttribute(e, docId, vci, weight));
+ }
+};
+
+/**
+ * Helper class used when loading non-enumerated attribute from
+ * enumerated save file.
+ */
+
+class NoSaveLoadedEnum
+{
+public:
+ static void
+ save(uint32_t e, uint32_t docId, uint32_t vci, int32_t weight)
+ {
+ (void) e;
+ (void) docId;
+ (void) vci;
+ (void) weight;
+ }
+};
+
+/**
+ * Helper class used to populate temporary vector representing loaded
+ * enumerated attribute without posting lists loaded from enumerated
+ * save file.
+ */
+
+class SaveEnumHist
+{
+ uint32_t *const _hist;
+ const size_t _histSize;
+
+public:
+ SaveEnumHist(EnumStoreBase::EnumVector &enumHist)
+ : _hist(&enumHist[0]),
+ _histSize(enumHist.size())
+ {
+ }
+
+ void
+ save(uint32_t e, uint32_t docId, uint32_t vci, int32_t weight)
+ {
+ (void) docId;
+ (void) vci;
+ (void) weight;
+ assert(e < _histSize);
+ ++_hist[e];
+ }
+};
+
+void
+sortLoadedByEnum(LoadedEnumAttributeVector &loaded);
+
+} // namespace attribute
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp b/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp
new file mode 100644
index 00000000000..bea214ed8bd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp
@@ -0,0 +1,124 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "loadednumericvalue.h"
+
+
+namespace search
+{
+
+namespace attribute
+{
+
+template <typename T>
+void
+sortLoadedByValue(SequentialReadModifyWriteVector<LoadedNumericValue<T>,
+ vespalib::DefaultAlloc> &
+ loaded)
+{
+ ShiftBasedRadixSorter<LoadedNumericValue<T>,
+ typename LoadedNumericValue<T>::ValueRadix,
+ typename LoadedNumericValue<T>::ValueCompare, 56>::
+ radix_sort(typename LoadedNumericValue<T>::ValueRadix(),
+ typename LoadedNumericValue<T>::ValueCompare(),
+ &loaded[0],
+ loaded.size(),
+ 16);
+}
+
+
+template <typename T>
+void
+sortLoadedByDocId(SequentialReadModifyWriteVector<LoadedNumericValue<T>,
+ vespalib::DefaultAlloc> &
+ loaded)
+{
+ ShiftBasedRadixSorter<LoadedNumericValue<T>,
+ typename LoadedNumericValue<T>::DocRadix,
+ typename LoadedNumericValue<T>::DocOrderCompare, 56>::
+ radix_sort(typename LoadedNumericValue<T>::DocRadix(),
+ typename LoadedNumericValue<T>::DocOrderCompare(),
+ &loaded[0],
+ loaded.size(),
+ 16);
+}
+
+
+template
+void
+sortLoadedByValue(SequentialReadModifyWriteVector<LoadedNumericValue<int8_t>,
+ vespalib::DefaultAlloc> &
+ loaded);
+
+template
+void
+sortLoadedByValue(SequentialReadModifyWriteVector<LoadedNumericValue<int16_t>,
+ vespalib::DefaultAlloc> &
+ loaded);
+
+template
+void
+sortLoadedByValue(SequentialReadModifyWriteVector<LoadedNumericValue<int32_t>,
+ vespalib::DefaultAlloc> &
+ loaded);
+
+template
+void
+sortLoadedByValue(SequentialReadModifyWriteVector<LoadedNumericValue<int64_t>,
+ vespalib::DefaultAlloc> &
+ loaded);
+
+template
+void
+sortLoadedByValue(SequentialReadModifyWriteVector<LoadedNumericValue<float>,
+ vespalib::DefaultAlloc> &
+ loaded);
+
+template
+void
+sortLoadedByValue(SequentialReadModifyWriteVector<LoadedNumericValue<double>,
+ vespalib::DefaultAlloc> &
+ loaded);
+
+template
+void
+sortLoadedByDocId(SequentialReadModifyWriteVector<LoadedNumericValue<int8_t>,
+ vespalib::DefaultAlloc> &
+ loaded);
+
+template
+void
+sortLoadedByDocId(SequentialReadModifyWriteVector<LoadedNumericValue<int16_t>,
+ vespalib::DefaultAlloc> &
+ loaded);
+
+template
+void
+sortLoadedByDocId(SequentialReadModifyWriteVector<LoadedNumericValue<int32_t>,
+ vespalib::DefaultAlloc> &
+ loaded);
+
+template
+void
+sortLoadedByDocId(SequentialReadModifyWriteVector<LoadedNumericValue<int64_t>,
+ vespalib::DefaultAlloc> &
+ loaded);
+
+template
+void
+sortLoadedByDocId(SequentialReadModifyWriteVector<LoadedNumericValue<float>,
+ vespalib::DefaultAlloc> &
+ loaded);
+
+template
+void
+sortLoadedByDocId(SequentialReadModifyWriteVector<LoadedNumericValue<double>,
+ vespalib::DefaultAlloc> &
+ loaded);
+
+
+
+} // namespace attribute
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.h b/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.h
new file mode 100644
index 00000000000..a8855ba5f15
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.h
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/common/sort.h>
+#include <vespa/searchlib/util/fileutil.h>
+#include "loadedvalue.h"
+
+
+namespace search
+{
+
+namespace attribute
+{
+
+/**
+ * Temporary representation of enumerated attribute loaded from non-enumerated
+ * save file (i.e. old save format). For numeric data types.
+ */
+
+template <typename T>
+struct LoadedNumericValue : public LoadedValue<T>
+{
+ LoadedNumericValue()
+ : LoadedValue<T>()
+ {
+ }
+
+ class ValueCompare : public std::binary_function<LoadedNumericValue<T>,
+ LoadedNumericValue<T>,
+ bool>
+ {
+ public:
+ bool
+ operator()(const LoadedNumericValue<T> &x,
+ const LoadedNumericValue<T> &y) const
+ {
+ return x < y;
+ }
+ };
+
+ class ValueRadix
+ {
+ public:
+ uint64_t
+ operator()(const LoadedValue<T> &v) const
+ {
+ return vespalib::convertForSort<T, true>::convert(v.getValue());
+ }
+ };
+};
+
+
+template <typename T>
+void
+sortLoadedByValue(SequentialReadModifyWriteVector<LoadedNumericValue<T>,
+ vespalib::DefaultAlloc> &
+ loaded);
+
+template <typename T>
+void
+sortLoadedByDocId(SequentialReadModifyWriteVector<LoadedNumericValue<T>,
+ vespalib::DefaultAlloc> &
+ loaded);
+
+} // namespace attribute
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.cpp b/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.cpp
new file mode 100644
index 00000000000..335abb799b0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.cpp
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "loadedstringvalue.h"
+
+
+namespace search
+{
+
+namespace attribute
+{
+
+void
+sortLoadedByValue(LoadedStringVectorReal &loaded)
+{
+ vespalib::Array<unsigned, vespalib::MMapAlloc>
+ radixScratchPad(loaded.size());
+ for(size_t i(0), m(loaded.size()); i < m; i++) {
+ loaded[i].prepareRadixSort();
+ }
+ radix_sort(LoadedStringValue::ValueRadix(),
+ LoadedStringValue::ValueCompare(),
+ AlwaysEof<LoadedStringValue>(),
+ 1,
+ &loaded[0],
+ loaded.size(),
+ &radixScratchPad[0],
+ 0,
+ 96);
+}
+
+void
+sortLoadedByDocId(LoadedStringVectorReal &loaded)
+{
+ ShiftBasedRadixSorter<LoadedStringValue,
+ LoadedStringValue::DocRadix,
+ LoadedStringValue::DocOrderCompare, 56>::
+ radix_sort(LoadedStringValue::DocRadix(),
+ LoadedStringValue::DocOrderCompare(),
+ &loaded[0],
+ loaded.size(),
+ 16);
+}
+
+
+} // namespace attribute
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.h b/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.h
new file mode 100644
index 00000000000..87e2574bdb6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.h
@@ -0,0 +1,95 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/common/sort.h>
+#include <vespa/searchlib/util/fileutil.h>
+#include <vespa/searchlib/util/foldedstringcompare.h>
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/text/lowercase.h>
+#include "loadedvalue.h"
+
+namespace search
+{
+
+namespace attribute
+{
+
+/**
+ * Temporary representation of enumerated attribute loaded from non-enumerated
+ * save file (i.e. old save format). For string data types.
+ */
+
+template <typename B>
+struct RadixSortable : public B
+{
+ RadixSortable()
+ : B(),
+ _currRadix(NULL),
+ _currRadixFolding(false)
+ {
+ }
+
+ class ValueRadix
+ {
+ public:
+ uint32_t
+ operator ()(RadixSortable &x) const
+ {
+ vespalib::Utf8ReaderForZTS u8reader(x._currRadix);
+ uint32_t val = u8reader.getChar();
+ if (x._currRadixFolding) {
+ if (val != 0) {
+ val = vespalib::LowerCase::convert(val);
+ } else {
+ // switch to returning unfolded values
+ x._currRadix = x.getValue();
+ x._currRadixFolding = false;
+ val = 1;
+ }
+ }
+ return val;
+ }
+ };
+
+ class ValueCompare : public std::binary_function<B, B, bool>
+ {
+ FoldedStringCompare _compareHelper;
+ public:
+ bool
+ operator()(const B &x, const B &y) const
+ {
+ return _compareHelper.compare(x.getValue(), y.getValue()) < 0;
+ }
+ };
+
+ void
+ prepareRadixSort()
+ {
+ _currRadix = this->getValue();
+ _currRadixFolding = true;
+ }
+private:
+ const char * _currRadix;
+ bool _currRadixFolding;
+};
+
+typedef RadixSortable<LoadedValue<const char *> > LoadedStringValue;
+
+typedef SequentialReadModifyWriteInterface<LoadedStringValue> LoadedStringVector;
+
+typedef SequentialReadModifyWriteVector<LoadedStringValue, vespalib::DefaultAlloc>
+LoadedStringVectorReal;
+
+
+void
+sortLoadedByValue(LoadedStringVectorReal &loaded);
+
+void
+sortLoadedByDocId(LoadedStringVectorReal &loaded);
+
+
+} // namespace attribute
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/loadedvalue.cpp b/searchlib/src/vespa/searchlib/attribute/loadedvalue.cpp
new file mode 100644
index 00000000000..cf2e611459b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/loadedvalue.cpp
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "loadedvalue.h"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.loadedvalue");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/loadedvalue.h b/searchlib/src/vespa/searchlib/attribute/loadedvalue.h
new file mode 100644
index 00000000000..08c7833f394
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/loadedvalue.h
@@ -0,0 +1,163 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchcommon/common/undefinedvalues.h>
+#include <vespa/searchlib/attribute/enumstorebase.h>
+#include <vespa/searchlib/btree/entryref.h>
+
+namespace search
+{
+
+namespace attribute
+{
+
+/*
+ * Temporary representation of enumerated attribute loaded from non-enumerated
+ * save file (i.e. old save format).
+ */
+template <typename T>
+class LoadedValue
+{
+public:
+ LoadedValue()
+ : _docId(0),
+ _idx(0),
+ _pidx(0),
+ _weight(1)
+ {
+ memset(&_value, 0, sizeof(_value));
+ }
+
+ class DocRadix
+ {
+ public:
+ uint64_t
+ operator() (const LoadedValue<T> & v) const
+ {
+ uint64_t tmp(v._docId);
+ return tmp << 32 | v._idx;
+ }
+ };
+
+ class DocOrderCompare : public std::binary_function<LoadedValue<T>,
+ LoadedValue<T>,
+ bool>
+ {
+ public:
+ bool
+ operator()(const LoadedValue<T> &x,
+ const LoadedValue<T> &y) const
+ {
+ int32_t diff(x._docId - y._docId);
+ if (diff == 0) {
+ diff = x._idx - y._idx;
+ }
+ return diff < 0;
+ }
+ };
+
+ EnumStoreBase::Index
+ getEidx() const
+ {
+ return EnumStoreBase::Index(btree::EntryRef(_value._eidx));
+ }
+
+ void
+ setEidx(EnumStoreBase::Index v)
+ {
+ _value._eidx = v.ref();
+ }
+
+ T
+ getValue() const
+ {
+ return _value._value;
+ }
+
+ inline void
+ setValue(T v)
+ {
+ _value._value = v;
+ }
+
+ int32_t
+ getWeight() const
+ {
+ return _weight;
+ }
+
+ void
+ setWeight(int32_t v)
+ {
+ _weight = v;
+ }
+
+ inline bool
+ operator<(const LoadedValue<T> &rhs) const
+ {
+ return _value._value < rhs._value._value;
+ }
+
+ union Value {
+ T _value;
+ uint32_t _eidx;
+ };
+ uint32_t _docId;
+ uint32_t _idx;
+ btree::EntryRef _pidx;
+private:
+ int32_t _weight;
+ Value _value;
+};
+
+
+template <>
+inline void
+LoadedValue<float>::setValue(float v)
+{
+ // Consolidate nans during load to avoid sort order issues
+ _value._value = isUndefined<float>(v) ? getUndefined<float>() : v;
+}
+
+template <>
+inline void
+LoadedValue<double>::setValue(double v)
+{
+ // Consolidate nans during load to avoid sort order issues
+ _value._value = isUndefined<double>(v) ? getUndefined<double>() : v;
+}
+
+
+template <>
+inline bool
+LoadedValue<float>::operator<(const LoadedValue<float> &rhs) const
+{
+ if (std::isnan(_value._value)) {
+ return !std::isnan(rhs._value._value);
+ }
+ if (std::isnan(rhs._value._value)) {
+ return false;
+ }
+ return _value._value < rhs._value._value;
+}
+
+
+template <>
+inline bool
+LoadedValue<double>::operator<(const LoadedValue<double> &rhs) const
+{
+ if (std::isnan(_value._value)) {
+ return !std::isnan(rhs._value._value);
+ }
+ if (std::isnan(rhs._value._value)) {
+ return false;
+ }
+ return _value._value < rhs._value._value;
+}
+
+
+} // namespace attribute
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp
new file mode 100644
index 00000000000..33588c94694
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "multienumattribute.h"
+#include "multienumattribute.hpp"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.multienumattribute");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h
new file mode 100644
index 00000000000..b4da7d495f9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h
@@ -0,0 +1,120 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/multivalueattribute.h>
+#include <vespa/searchlib/attribute/enumstorebase.h>
+#include <vespa/searchlib/attribute/loadedenumvalue.h>
+
+namespace search {
+
+/*
+ * Implementation of multi value enum attribute that uses an underlying enum store
+ * to store unique values and a multi value mapping to store enum indices for each document.
+ *
+ * B: EnumAttribute<BaseClass>
+ * M: MultiValueType (MultiValueMapping template argument)
+ */
+template <typename B, typename M>
+class MultiValueEnumAttribute : public MultiValueAttribute<B, M>
+{
+protected:
+ typedef typename B::UniqueSet UniqueSet;
+
+ typedef typename B::BaseClass::Change Change;
+ typedef typename B::BaseClass::DocId DocId;
+ typedef typename B::BaseClass::EnumHandle EnumHandle;
+ typedef typename B::BaseClass::EnumModifier EnumModifier;
+ typedef typename B::BaseClass::generation_t generation_t;
+ typedef typename B::BaseClass::LoadedVector LoadedVector;
+ typedef typename B::BaseClass::ValueModifier ValueModifier;
+ typedef typename B::BaseClass::WeightedEnum WeightedEnum;
+
+ typedef typename EnumStoreBase::Index EnumIndex;
+ typedef typename EnumStoreBase::IndexVector EnumIndexVector;
+ typedef typename EnumStoreBase::EnumVector EnumVector;
+ typedef typename MultiValueAttribute<B, M>::MultiValueType WeightedIndex;
+ typedef typename MultiValueAttribute<B, M>::ValueVector WeightedIndexVector;
+ typedef typename MultiValueAttribute<B, M>::Histogram Histogram;
+ typedef typename MultiValueAttribute<B, M>::DocumentValues DocIndices;
+ typedef AttributeVector::ReaderBase ReaderBase;
+ typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector;
+ typedef attribute::LoadedEnumAttribute LoadedEnumAttribute;
+
+ // from MultiValueAttribute
+ virtual bool extractChangeData(const Change & c, EnumIndex & idx); // EnumIndex is ValueType. Use EnumStore
+
+ // from EnumAttribute
+ virtual void considerAttributeChange(const Change & c, UniqueSet & newUniques); // same for both string and numeric
+ virtual void reEnumerate(); // same for both string and numeric
+
+ virtual void applyValueChanges(const DocIndices & docIndices, EnumStoreBase::IndexVector & unused);
+
+ void incRefCount(const WeightedIndex & idx) { this->_enumStore.incRefCount(idx); }
+ void decRefCount(const WeightedIndex & idx) { this->_enumStore.decRefCount(idx); }
+
+ virtual void
+ freezeEnumDictionary()
+ {
+ this->getEnumStore().freezeTree();
+ }
+
+ virtual void fillValues(LoadedVector & loaded);
+
+ virtual void
+ fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndexVector &eidxs,
+ LoadedEnumAttributeVector &loaded);
+
+ virtual void
+ fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndexVector &eidxs,
+ EnumVector &enumHist);
+
+ virtual void mergeMemoryStats(MemoryUsage & total) { (void) total; }
+
+public:
+ MultiValueEnumAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg);
+
+ virtual void onCommit();
+ virtual void onUpdateStat();
+
+ virtual void removeOldGenerations(generation_t firstUsed);
+ virtual void onGenerationChange(generation_t generation);
+
+ //-----------------------------------------------------------------------------------------------------------------
+ // Attribute read API
+ //-----------------------------------------------------------------------------------------------------------------
+ virtual EnumHandle getEnum(DocId doc) const {
+ if (this->getValueCount(doc) == 0) {
+ return std::numeric_limits<uint32_t>::max();
+ } else {
+ WeightedIndex idx;
+ this->_mvMapping.get(doc, 0, idx);
+ return idx.value().ref();
+ }
+ }
+ virtual uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const {
+ const WeightedIndex * indices;
+ uint32_t valueCount = this->_mvMapping.get(doc, indices);
+ for (uint32_t i = 0, m = std::min(sz, valueCount); i < m; ++i) {
+ e[i] = indices[i].value().ref();
+ }
+ return valueCount;
+ }
+ virtual uint32_t get(DocId doc, WeightedEnum * e, uint32_t sz) const {
+ const WeightedIndex * indices;
+ uint32_t valueCount = this->_mvMapping.get(doc, indices);
+ for (uint32_t i = 0, m = std::min(sz, valueCount); i < m; ++i) {
+ e[i] = WeightedEnum(indices[i].value().ref(), indices[i].weight());
+ }
+ return valueCount;
+ }
+
+ virtual std::unique_ptr<AttributeSaver> onInitSave() override;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
new file mode 100644
index 00000000000..961fc3b1ee9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
@@ -0,0 +1,238 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/multienumattribute.h>
+#include <vespa/searchlib/attribute/multivalueattribute.hpp>
+#include "multienumattributesaver.h"
+
+#include <stdexcept>
+
+namespace search {
+
+template <typename B, typename M>
+bool
+MultiValueEnumAttribute<B, M>::extractChangeData(const Change & c, EnumIndex & idx)
+{
+ if (c._enumScratchPad == Change::UNSET_ENUM) {
+ return this->_enumStore.findIndex(c._data.raw(), idx);
+ }
+ idx = EnumIndex(c._enumScratchPad);
+ return true;
+}
+
+template <typename B, typename M>
+void
+MultiValueEnumAttribute<B, M>::considerAttributeChange(const Change & c, UniqueSet & newUniques)
+{
+ if (c._type == ChangeBase::APPEND ||
+ (this->getInternalCollectionType().createIfNonExistant() &&
+ (c._type >= ChangeBase::INCREASEWEIGHT && c._type <= ChangeBase::DIVWEIGHT)))
+ {
+ EnumIndex idx;
+ if (!this->_enumStore.findIndex(c._data.raw(), idx)) {
+ newUniques.insert(c._data);
+ } else {
+ c._enumScratchPad = idx.ref();
+ }
+ }
+}
+
+template <typename B, typename M>
+void
+MultiValueEnumAttribute<B, M>::reEnumerate()
+{
+ // update MultiValueMapping with new EnumIndex values.
+ EnumModifier enumGuard(this->getEnumModifier());
+ for (DocId doc = 0; doc < this->getNumDocs(); ++doc) {
+ uint32_t valueCount = this->_mvMapping.getValueCount(doc);
+ WeightedIndexVector indices(valueCount);
+ this->_mvMapping.get(doc, &indices[0], valueCount);
+
+ for (uint32_t i = 0; i < indices.size(); ++i) {
+ EnumIndex oldIndex = indices[i].value();
+ EnumIndex newIndex;
+ this->_enumStore.getCurrentIndex(oldIndex, newIndex);
+ std::atomic_thread_fence(std::memory_order_release);
+ indices[i] = WeightedIndex(newIndex, indices[i].weight());
+ }
+
+ std::atomic_thread_fence(std::memory_order_release);
+ this->_mvMapping.replace(doc, indices);
+ }
+}
+
+template <typename B, typename M>
+void
+MultiValueEnumAttribute<B, M>::applyValueChanges(const DocIndices & docIndices, EnumStoreBase::IndexVector & unused)
+{
+ // set new set of indices for documents with changes
+ ValueModifier valueGuard(this->getValueModifier());
+ for (typename DocIndices::const_iterator iter = docIndices.begin(); iter != docIndices.end(); ++iter) {
+ const WeightedIndex * oldIndices = NULL;
+ uint32_t valueCount(this->_mvMapping.get(iter->first, oldIndices));
+ this->_mvMapping.set(iter->first, iter->second);
+ for (uint32_t i = 0; i < iter->second.size(); ++i) {
+ incRefCount(iter->second[i]);
+ }
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ decRefCount(oldIndices[i]);
+ if (this->_enumStore.getRefCount(oldIndices[i]) == 0) {
+ unused.push_back(oldIndices[i].value());
+ }
+ }
+ }
+}
+
+template <typename B, typename M>
+void
+MultiValueEnumAttribute<B, M>::fillValues(LoadedVector & loaded)
+{
+ Histogram capacityNeeded = this->_mvMapping.getEmptyHistogram();
+ uint32_t numDocs(this->getNumDocs());
+ size_t numValues = loaded.size();
+ size_t count = 0;
+ for (DocId doc = 0; doc < numDocs; ++doc) {
+ uint32_t valueCount(0);
+ for(;(count < numValues) && (loaded.read()._docId == doc); count++, loaded.next()) {
+ valueCount++;
+ }
+ if (valueCount < this->_mvMapping.maxValues()) {
+ capacityNeeded[valueCount] += 1;
+ } else {
+ capacityNeeded[this->_mvMapping.maxValues()] += 1;
+ }
+ }
+
+ this->_mvMapping.reset(numDocs, capacityNeeded);
+
+ loaded.rewind();
+ count = 0;
+ WeightedIndexVector indices;
+ for (DocId doc = 0; doc < numDocs; ++doc) {
+ for(const typename LoadedVector::Type * v = & loaded.read();(count < numValues) && (v->_docId == doc); count++, loaded.next(), v = & loaded.read()) {
+ indices.push_back(WeightedIndex(v->getEidx(), v->getWeight()));
+ }
+ this->checkSetMaxValueCount(indices.size());
+ this->_mvMapping.set(doc, indices);
+ indices.clear();
+ }
+}
+
+
+template <typename B, typename M>
+void
+MultiValueEnumAttribute<B, M>::fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndexVector &eidxs,
+ LoadedEnumAttributeVector &loaded)
+{
+ attribute::SaveLoadedEnum saver(loaded);
+ uint32_t maxvc = this->_mvMapping.fillMapped(attrReader,
+ numValues,
+ &eidxs[0],
+ eidxs.size(),
+ saver,
+ this->getNumDocs(),
+ this->hasWeightedSetType());
+ this->checkSetMaxValueCount(maxvc);
+}
+
+template <typename B, typename M>
+void
+MultiValueEnumAttribute<B, M>::fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndexVector &eidxs,
+ EnumVector &enumHist)
+{
+ attribute::SaveEnumHist saver(enumHist);
+ uint32_t maxvc = this->_mvMapping.fillMapped(attrReader,
+ numValues,
+ &eidxs[0],
+ eidxs.size(),
+ saver,
+ this->getNumDocs(),
+ this->hasWeightedSetType());
+ this->checkSetMaxValueCount(maxvc);
+}
+
+template <typename B, typename M>
+MultiValueEnumAttribute<B, M>::
+MultiValueEnumAttribute(const vespalib::string &baseFileName,
+ const AttributeVector::Config & cfg)
+ : MultiValueAttribute<B, M>(baseFileName, cfg)
+{
+}
+
+template <typename B, typename M>
+void
+MultiValueEnumAttribute<B, M>::onCommit()
+{
+ // update enum store
+ EnumStoreBase::IndexVector possiblyUnused;
+ this->insertNewUniqueValues(possiblyUnused);
+ DocIndices docIndices;
+ this->applyAttributeChanges(docIndices);
+ applyValueChanges(docIndices, possiblyUnused);
+ this->_changes.clear();
+ this->_enumStore.freeUnusedEnums(possiblyUnused);
+ this->freezeEnumDictionary();
+ this->setEnumMax(this->_enumStore.getLastEnum());
+ std::atomic_thread_fence(std::memory_order_release);
+ this->removeAllOldGenerations();
+}
+template <typename B, typename M>
+void
+MultiValueEnumAttribute<B, M>::onUpdateStat()
+{
+ // update statistics
+ MemoryUsage total;
+ total.merge(this->_enumStore.getMemoryUsage());
+ total.merge(this->_enumStore.getTreeMemoryUsage());
+ total.merge(this->_mvMapping.getMemoryUsage());
+ mergeMemoryStats(total);
+ this->updateStatistics(this->_mvMapping.getTotalValueCnt(), this->_enumStore.getNumUniques(), total.allocatedBytes(),
+ total.usedBytes(), total.deadBytes(), total.allocatedBytesOnHold());
+}
+
+template <typename B, typename M>
+void
+MultiValueEnumAttribute<B, M>::removeOldGenerations(generation_t firstUsed)
+{
+ this->_enumStore.trimHoldLists(firstUsed);
+ this->_mvMapping.trimHoldLists(firstUsed);
+}
+
+template <typename B, typename M>
+void
+MultiValueEnumAttribute<B, M>::onGenerationChange(generation_t generation)
+{
+ /*
+ * Freeze tree before generation is increased in attribute vector
+ * but after generation is increased in tree. This ensures that
+ * unlocked readers accessing a frozen tree will access a
+ * sufficiently new frozen tree.
+ */
+ freezeEnumDictionary();
+ this->_mvMapping.transferHoldLists(generation - 1);
+ this->_enumStore.transferHoldLists(generation - 1);
+}
+
+template <typename B, typename M>
+std::unique_ptr<AttributeSaver>
+MultiValueEnumAttribute<B, M>::onInitSave()
+{
+ {
+ EnumModifier enumGuard(this->getEnumModifier());
+ this->_enumStore.reEnumerate();
+ }
+ vespalib::GenerationHandler::Guard guard(this->getGenerationHandler().
+ takeGuard());
+ return std::make_unique<MultiValueEnumAttributeSaver<WeightedIndex,
+ typename M::Index>>
+ (std::move(guard), this->createSaveTargetConfig(), this->_mvMapping,
+ this->_enumStore);
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp
new file mode 100644
index 00000000000..0b2d8cc00b9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp
@@ -0,0 +1,122 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "multienumattributesaver.h"
+#include <vespa/searchlib/util/bufferwriter.h>
+#include "multivalueattributesaverutils.h"
+
+using vespalib::GenerationHandler;
+using search::multivalueattributesaver::CountWriter;
+using search::multivalueattributesaver::WeightWriter;
+
+namespace search {
+
+namespace
+{
+
+/*
+ * Class to write enum indexes mapped over to either enum values
+ * or values, depending on the requirements of the save target.
+ */
+class DatWriter
+{
+ std::vector<EnumStoreIndex> _indexes;
+ const EnumStoreBase &_enumStore;
+ std::unique_ptr<search::BufferWriter> _datWriter;
+ bool _enumerated;
+public:
+ DatWriter(IAttributeSaveTarget &saveTarget,
+ const EnumStoreBase &enumStore)
+ : _indexes(),
+ _enumStore(enumStore),
+ _datWriter(saveTarget.datWriter().allocBufferWriter()),
+ _enumerated(saveTarget.getEnumerated())
+ {
+ _indexes.reserve(1000);
+ }
+
+ ~DatWriter()
+ {
+ assert(_indexes.empty());
+ _datWriter->flush();
+ }
+
+ void flush()
+ {
+ if (!_indexes.empty()) {
+ if (_enumerated) {
+ _enumStore.writeEnumValues(*_datWriter,
+ &_indexes[0], _indexes.size());
+ } else {
+ _enumStore.writeValues(*_datWriter,
+ &_indexes[0], _indexes.size());
+ }
+ _indexes.clear();
+ }
+ }
+
+ template <typename MultiValueT>
+ void
+ writeValues(const MultiValueT *values, uint32_t count) {
+ for (uint32_t i = 0; i < count; ++i) {
+ if (_indexes.size() >= _indexes.capacity()) {
+ flush();
+ }
+ _indexes.push_back(values[i].value());
+ }
+ }
+};
+
+}
+
+template <typename MultiValueT, typename IndexT>
+MultiValueEnumAttributeSaver<MultiValueT, IndexT>::
+MultiValueEnumAttributeSaver(GenerationHandler::Guard &&guard,
+ const IAttributeSaveTarget::Config &cfg,
+ const MultiValueMapping &mvMapping,
+ const EnumStoreBase &enumStore)
+ : Parent(std::move(guard), cfg, mvMapping),
+ _mvMapping(mvMapping),
+ _enumSaver(enumStore, true)
+{
+}
+
+
+
+template <typename MultiValueT, typename IndexT>
+MultiValueEnumAttributeSaver<MultiValueT, IndexT>::
+~MultiValueEnumAttributeSaver()
+{
+}
+
+template <typename MultiValueT, typename IndexT>
+bool
+MultiValueEnumAttributeSaver<MultiValueT, IndexT>::
+onSave(IAttributeSaveTarget &saveTarget)
+{
+ CountWriter countWriter(saveTarget);
+ WeightWriter<MultiValueType::_hasWeight> weightWriter(saveTarget);
+ DatWriter datWriter(saveTarget, _enumSaver.getEnumStore());
+ _enumSaver.writeUdat(saveTarget);
+ for (uint32_t docId = 0; docId < _frozenIndices.size(); ++docId) {
+ Index idx = _frozenIndices[docId];
+ const MultiValueType *handle;
+ uint32_t count = _mvMapping.getDataForIdx(idx, handle);
+ countWriter.writeCount(count);
+ weightWriter.writeWeights(handle, count);
+ datWriter.writeValues(handle, count);
+ }
+ datWriter.flush();
+ _enumSaver.enableReEnumerate();
+ return true;
+}
+
+using EnumIdxArray = multivalue::Value<EnumStoreIndex>;
+using EnumIdxWset = multivalue::WeightedValue<EnumStoreIndex>;
+
+template class MultiValueEnumAttributeSaver<EnumIdxArray, multivalue::Index32>;
+template class MultiValueEnumAttributeSaver<EnumIdxWset, multivalue::Index32>;
+template class MultiValueEnumAttributeSaver<EnumIdxArray, multivalue::Index64>;
+template class MultiValueEnumAttributeSaver<EnumIdxWset, multivalue::Index64>;
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h
new file mode 100644
index 00000000000..7d7b4f581ed
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "multivalueattributesaver.h"
+#include "enumattributesaver.h"
+
+namespace search {
+
+/*
+ * Class for saving an enumerated multivalue attribute.
+ *
+ * Template argument MultiValueT is either multivalue::Value<ValueType> or
+ * multivalue::WeightedValue<ValueType>
+ * Template argument IndexT is either multivalue::Index32 or multivalue::Index64
+ */
+template <typename MultiValueT, typename IndexT>
+class MultiValueEnumAttributeSaver : public MultiValueAttributeSaver<IndexT>
+{
+ using Parent = MultiValueAttributeSaver<IndexT>;
+ using Index = IndexT;
+ using MultiValueType = MultiValueT;
+ using ValueType = typename MultiValueType::ValueType;
+ using GenerationHandler = vespalib::GenerationHandler;
+ using Parent::_frozenIndices;
+ using MultiValueMapping = MultiValueMappingT<MultiValueType, Index>;
+
+ const MultiValueMapping &_mvMapping;
+ EnumAttributeSaver _enumSaver;
+public:
+ virtual bool onSave(IAttributeSaveTarget &saveTarget) override;
+ MultiValueEnumAttributeSaver(GenerationHandler::Guard &&guard,
+ const IAttributeSaveTarget::Config &cfg,
+ const MultiValueMapping &mvMapping,
+ const EnumStoreBase &enumStore);
+ virtual ~MultiValueEnumAttributeSaver();
+};
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.cpp
new file mode 100644
index 00000000000..b6021881ee6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.cpp
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "multinumericattribute.h"
+#include "multinumericattribute.hpp"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.multinumericattribute");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h
new file mode 100644
index 00000000000..63cf52a42bd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h
@@ -0,0 +1,333 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+#include <vespa/searchlib/attribute/multivaluemapping.h>
+#include <vespa/searchlib/attribute/multivalueattribute.h>
+#include <vespa/searchlib/attribute/attributeiterators.h>
+#include <vespa/searchlib/query/query.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <limits>
+#include <string>
+
+namespace search {
+
+/*
+ * Implementation of multi value numeric attribute that uses an underlying
+ * multi value mapping from MultiValueAttribute.
+ *
+ * B: Base class
+ * M: MultiValueType (MultiValueMapping template argument)
+ */
+template <typename B, typename M>
+class MultiValueNumericAttribute : public MultiValueAttribute<B, M>
+{
+private:
+ typedef typename B::BaseType T;
+ typedef typename B::DocId DocId;
+ typedef typename B::EnumHandle EnumHandle;
+ typedef typename B::largeint_t largeint_t;
+ typedef typename B::Weighted Weighted;
+ typedef typename B::WeightedInt WeightedInt;
+ typedef typename B::WeightedFloat WeightedFloat;
+ typedef typename B::WeightedEnum WeightedEnum;
+
+ typedef typename MultiValueAttribute<B, M>::MultiValueMapping MultiValueMapping;
+ typedef typename MultiValueAttribute<B, M>::DocumentValues DocumentValues;
+ typedef typename MultiValueAttribute<B, M>::Change Change;
+ typedef typename MultiValueAttribute<B, M>::ValueType MValueType; // = B::BaseType
+ typedef typename MultiValueAttribute<B, M>::MultiValueType MultiValueType; // = B::BaseType
+
+ virtual bool extractChangeData(const Change & c, MValueType & data) {
+ data = static_cast<MValueType>(c._data.get());
+ return true;
+ }
+
+ virtual T getFromEnum(EnumHandle e) const;
+ virtual bool findEnum(T value, EnumHandle & e) const;
+ virtual void getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const {
+ (void) v;
+ (void) e;
+ (void) sz;
+ }
+
+
+protected:
+ typedef typename B::generation_t generation_t;
+ typedef MultiValueType WType;
+ uint32_t get(DocId doc, const WType * & values) const { return this->_mvMapping.get(doc, values); }
+
+public:
+ virtual uint32_t getRawValues(DocId doc, const WType * & values) const { return get(doc, values); }
+ /*
+ * Specialization of SearchContext for weighted set type
+ */
+ class SetSearchContext : public NumericAttribute::Range<T>, public AttributeVector::SearchContext
+ {
+ private:
+ const MultiValueNumericAttribute<B, M> & _toBeSearched;
+
+ virtual bool
+ onCmp(DocId docId, int32_t & weight) const
+ {
+ return cmp(docId, weight);
+ }
+
+ virtual bool
+ onCmp(DocId docId) const
+ {
+ return cmp(docId);
+ }
+
+ virtual bool valid() const { return this->isValid(); }
+
+ public:
+ SetSearchContext(QueryTermSimple::UP qTerm, const NumericAttribute & toBeSearched) :
+ NumericAttribute::Range<T>(*qTerm),
+ AttributeVector::SearchContext(toBeSearched),
+ _toBeSearched(static_cast<const MultiValueNumericAttribute<B, M> &>(toBeSearched))
+ {
+ }
+
+ virtual Int64Range getAsIntegerTerm() const {
+ return this->getRange();
+ }
+
+ bool
+ cmp(DocId doc, int32_t & weight) const
+ {
+ const MultiValueType * buffer;
+ for (uint32_t i = 0, m = _toBeSearched._mvMapping.get(doc, buffer);
+ i < m; i++) {
+ T v(buffer[i].value());
+ if (this->match(v)) {
+ weight = buffer[i].weight();
+ return true;
+ }
+ }
+ return false;
+ }
+
+ bool
+ cmp(DocId doc) const
+ {
+ const MultiValueType * buffer;
+ for (uint32_t i = 0, m = _toBeSearched._mvMapping.get(doc, buffer);
+ i < m; i++) {
+ T v(buffer[i].value());
+ if (this->match(v)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ virtual std::unique_ptr<queryeval::SearchIterator>
+ createFilterIterator(fef::TermFieldMatchData * matchData, bool strict)
+ {
+ if (!valid()) {
+ return queryeval::SearchIterator::UP(
+ new queryeval::EmptySearch());
+ }
+ if (getIsFilter()) {
+ return queryeval::SearchIterator::UP
+ (strict
+ ? new FilterAttributeIteratorStrict<SetSearchContext>(*this, matchData)
+ : new FilterAttributeIteratorT<SetSearchContext>(*this, matchData));
+ }
+ return queryeval::SearchIterator::UP
+ (strict
+ ? new AttributeIteratorStrict<SetSearchContext>(*this, matchData)
+ : new AttributeIteratorT<SetSearchContext>(*this, matchData));
+ }
+ };
+
+ /*
+ * Specialization of SearchContext for array type
+ */
+ class ArraySearchContext : public NumericAttribute::Range<T>, public AttributeVector::SearchContext
+ {
+ private:
+ const MultiValueNumericAttribute<B, M> & _toBeSearched;
+
+ virtual bool
+ onCmp(DocId docId, int32_t & weight) const
+ {
+ return cmp(docId, weight);
+ }
+
+ virtual bool
+ onCmp(DocId docId) const
+ {
+ return cmp(docId);
+ }
+
+ protected:
+ virtual bool valid() const { return this->isValid(); }
+
+ public:
+ ArraySearchContext(QueryTermSimple::UP qTerm, const NumericAttribute & toBeSearched) :
+ NumericAttribute::Range<T>(*qTerm),
+ AttributeVector::SearchContext(toBeSearched),
+ _toBeSearched(static_cast<const MultiValueNumericAttribute<B, M> &>(toBeSearched))
+ {
+ }
+
+ bool
+ cmp(DocId doc, int32_t & weight) const
+ {
+ uint32_t hitCount = 0;
+ const MultiValueType * buffer;
+ for (uint32_t i = 0, m = _toBeSearched._mvMapping.get(doc, buffer);
+ i < m; i++) {
+ T v = buffer[i].value();
+ if (this->match(v)) {
+ hitCount++;
+ }
+ }
+ weight = hitCount;
+
+ return hitCount != 0;
+ }
+
+ bool
+ cmp(DocId doc) const
+ {
+ const MultiValueType * buffer;
+ for (uint32_t i = 0, m = _toBeSearched._mvMapping.get(doc, buffer);
+ i < m; i++) {
+ T v = buffer[i].value();
+ if (this->match(v)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ virtual Int64Range getAsIntegerTerm() const {
+ return this->getRange();
+ }
+
+ virtual std::unique_ptr<queryeval::SearchIterator>
+ createFilterIterator(fef::TermFieldMatchData * matchData, bool strict)
+ {
+ if (!valid()) {
+ return queryeval::SearchIterator::UP(
+ new queryeval::EmptySearch());
+ }
+ if (getIsFilter()) {
+ return queryeval::SearchIterator::UP
+ (strict
+ ? new FilterAttributeIteratorStrict<ArraySearchContext>(*this, matchData)
+ : new FilterAttributeIteratorT<ArraySearchContext>(*this, matchData));
+ }
+ return queryeval::SearchIterator::UP
+ (strict
+ ? new AttributeIteratorStrict<ArraySearchContext>(*this, matchData)
+ : new AttributeIteratorT<ArraySearchContext>(*this, matchData));
+ }
+ };
+
+ MultiValueNumericAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & c =
+ AttributeVector::Config(AttributeVector::BasicType::fromType(T()),
+ attribute::CollectionType::ARRAY));
+ virtual uint32_t getValueCount(DocId doc) const;
+ virtual void onCommit();
+ virtual void onUpdateStat();
+ virtual void removeOldGenerations(generation_t firstUsed);
+
+ virtual void onGenerationChange(generation_t generation);
+
+ virtual bool onLoad();
+
+ virtual bool
+ onLoadEnumerated(typename B::ReaderBase &attrReader);
+
+ AttributeVector::SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override;
+
+ virtual void clearOldValues(DocId doc);
+ virtual void setNewValues(DocId doc, const std::vector<WType> & values);
+
+ //-------------------------------------------------------------------------
+ // new read api
+ //-------------------------------------------------------------------------
+ virtual T get(DocId doc) const {
+ MultiValueType value;
+ this->_mvMapping.get(doc, 0, value);
+ return value;
+ }
+ virtual largeint_t getInt(DocId doc) const {
+ MultiValueType value;
+ this->_mvMapping.get(doc, 0, value);
+ return static_cast<largeint_t>(value.value());
+ }
+ virtual double getFloat(DocId doc) const {
+ MultiValueType value;
+ this->_mvMapping.get(doc, 0, value);
+ return static_cast<double>(value.value());
+ }
+ virtual EnumHandle getEnum(DocId doc) const {
+ (void) doc;
+ return std::numeric_limits<uint32_t>::max(); // does not have enum
+ }
+ virtual uint32_t getAll(DocId doc, T * v, uint32_t sz) const {
+ return getHelper(doc, v, sz);
+ }
+ virtual uint32_t get(DocId doc, largeint_t * v, uint32_t sz) const {
+ return getHelper(doc, v, sz);
+ }
+ virtual uint32_t get(DocId doc, double * v, uint32_t sz) const {
+ return getHelper(doc, v, sz);
+ }
+ template <typename BufferType>
+ uint32_t getHelper(DocId doc, BufferType * buffer, uint32_t sz) const {
+ const MultiValueType * handle;
+ uint32_t ret = this->_mvMapping.get(doc, handle);
+ for(size_t i(0), m(std::min(sz, ret)); i < m; i++) {
+ buffer[i] = static_cast<BufferType>(handle[i].value());
+ }
+ return ret;
+ }
+ virtual uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const {
+ return getEnumHelper(doc, e, sz);
+ }
+ virtual uint32_t get(DocId doc, WeightedEnum * e, uint32_t sz) const {
+ return getEnumHelper(doc, e, sz);
+ }
+ template <typename E>
+ uint32_t getEnumHelper(DocId doc, E * e, uint32_t sz) const {
+ uint32_t available = getValueCount(doc);
+ uint32_t num2Read = std::min(available, sz);
+ for (uint32_t i = 0; i < num2Read; ++i) {
+ e[i] = E(std::numeric_limits<uint32_t>::max()); // does not have enum
+ }
+ return available;
+ }
+ virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz) const {
+ return getWeightedHelper<Weighted, T>(doc, v, sz);
+ }
+ virtual uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const {
+ return getWeightedHelper<WeightedInt, largeint_t>(doc, v, sz);
+ }
+ virtual uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const {
+ return getWeightedHelper<WeightedFloat, double>(doc, v, sz);
+ }
+ template <typename WeightedType, typename ValueType>
+ uint32_t getWeightedHelper(DocId doc, WeightedType * buffer, uint32_t sz) const {
+ const MultiValueType * handle;
+ uint32_t ret = this->_mvMapping.get(doc, handle);
+ for(size_t i(0), m(std::min(sz, ret)); i < m; i++) {
+ buffer[i] = WeightedType(static_cast<ValueType>(handle[i].value()),
+ handle[i].weight());
+ }
+ return ret;
+ }
+
+ virtual std::unique_ptr<AttributeSaver> onInitSave() override;
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp
new file mode 100644
index 00000000000..25f0ed88e20
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp
@@ -0,0 +1,197 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/multivalueattribute.hpp>
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/util/fileutil.h>
+#include <vespa/fastlib/io/bufferedfile.h>
+#include "multinumericattributesaver.h"
+
+namespace search {
+
+template <typename B, typename M>
+typename MultiValueNumericAttribute<B, M>::T
+MultiValueNumericAttribute<B, M>::getFromEnum(EnumHandle e) const
+{
+ (void) e;
+ return 0;
+}
+
+template <typename B, typename M>
+bool MultiValueNumericAttribute<B, M>::findEnum(T value, EnumHandle & e) const
+{
+ (void) value; (void) e;
+ return false;
+}
+
+template <typename B, typename M>
+MultiValueNumericAttribute<B, M>::
+MultiValueNumericAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & c) :
+ MultiValueAttribute<B, M>(baseFileName, c)
+{
+}
+
+template <typename B, typename M>
+uint32_t MultiValueNumericAttribute<B, M>::getValueCount(DocId doc) const
+{
+ if (doc >= B::getNumDocs()) {
+ return 0;
+ }
+ return this->_mvMapping.getValueCount(doc);
+}
+
+template <typename B, typename M>
+void
+MultiValueNumericAttribute<B, M>::onCommit()
+{
+ DocumentValues docValues;
+ this->applyAttributeChanges(docValues);
+ {
+ typename B::ValueModifier valueGuard(this->getValueModifier());
+ for (const auto & value : docValues) {
+ clearOldValues(value.first);
+ setNewValues(value.first, value.second);
+ }
+ }
+
+ std::atomic_thread_fence(std::memory_order_release);
+ this->removeAllOldGenerations();
+
+ this->_changes.clear();
+}
+
+template <typename B, typename M>
+void MultiValueNumericAttribute<B, M>::onUpdateStat()
+{
+ MemoryUsage usage = this->_mvMapping.getMemoryUsage();
+ this->updateStatistics(this->_mvMapping.getTotalValueCnt(), this->_mvMapping.getTotalValueCnt(), usage.allocatedBytes(),
+ usage.usedBytes(), usage.deadBytes(), usage.allocatedBytesOnHold());
+}
+
+
+template <typename B, typename M>
+void MultiValueNumericAttribute<B, M>::clearOldValues(DocId doc)
+{
+ (void) doc;
+}
+
+template <typename B, typename M>
+void MultiValueNumericAttribute<B, M>::setNewValues(DocId doc, const std::vector<WType> & values)
+{
+ this->_mvMapping.set(doc, values);
+}
+
+template <typename B, typename M>
+void MultiValueNumericAttribute<B, M>::removeOldGenerations(generation_t firstUsed)
+{
+ this->_mvMapping.trimHoldLists(firstUsed);
+}
+
+
+template <typename B, typename M>
+void MultiValueNumericAttribute<B, M>::onGenerationChange(generation_t generation)
+{
+ this->_mvMapping.transferHoldLists(generation - 1);
+}
+
+template <typename B, typename M>
+bool
+MultiValueNumericAttribute<B, M>::onLoadEnumerated(typename B::ReaderBase &
+ attrReader)
+{
+ uint32_t numDocs = attrReader.getNumIdx() - 1;
+ uint64_t numValues = attrReader.getNumValues();
+ uint64_t enumCount = attrReader.getEnumCount();
+ assert(numValues == enumCount);
+ (void) enumCount;
+
+ this->setNumDocs(numDocs);
+ this->setCommittedDocIdLimit(numDocs);
+
+ FileUtil::LoadedBuffer::UP udatBuffer(this->loadUDAT());
+ const T *map = reinterpret_cast<const T *>(udatBuffer->buffer());
+ assert((udatBuffer->size() % sizeof(T)) == 0);
+ size_t mapSize = udatBuffer->size() / sizeof(T);
+ attribute::NoSaveLoadedEnum saver;
+ uint32_t maxvc = this->_mvMapping.fillMapped(attrReader,
+ numValues,
+ map,
+ mapSize,
+ saver,
+ this->getNumDocs(),
+ this->hasWeightedSetType());
+ this->checkSetMaxValueCount(maxvc);
+
+ return true;
+}
+
+template <typename B, typename M>
+bool
+MultiValueNumericAttribute<B, M>::onLoad()
+{
+ typename B::template PrimitiveReader<MValueType> attrReader(*this);
+ bool ok(attrReader.getHasLoadData());
+
+ if (!ok)
+ return false;
+
+ this->setCreateSerialNum(attrReader.getCreateSerialNum());
+
+ if (attrReader.getEnumerated())
+ return onLoadEnumerated(attrReader);
+
+ bool hasWeight(attrReader.hasWeight());
+ size_t numDocs = attrReader.getNumIdx() - 1;
+
+ typename MultiValueMappingBaseBase::Histogram capacityNeeded =
+ this->_mvMapping.getHistogram(attrReader);
+ this->_mvMapping.reset(numDocs, capacityNeeded);
+ // set values
+ std::vector<MultiValueType> values;
+ B::setNumDocs(numDocs);
+ B::setCommittedDocIdLimit(numDocs);
+ attrReader.rewind();
+ for (DocId doc = 0; doc < numDocs; ++doc) {
+ const uint32_t valueCount(attrReader.getNextValueCount());
+ for (uint32_t i(0); i < valueCount; i++) {
+ MValueType currData = attrReader.getNextData();
+ values.push_back(MultiValueType(currData,
+ hasWeight ?
+ attrReader.getNextWeight() : 1));
+ }
+ this->checkSetMaxValueCount(valueCount);
+ setNewValues(doc, values);
+ values.clear();
+ }
+ return true;
+}
+
+template <typename B, typename M>
+AttributeVector::SearchContext::UP
+MultiValueNumericAttribute<B, M>::getSearch(QueryTermSimple::UP qTerm,
+ const AttributeVector::SearchContext::Params & params) const
+{
+ (void) params;
+ if (this->hasArrayType()) {
+ return std::unique_ptr<AttributeVector::SearchContext>
+ (new ArraySearchContext(std::move(qTerm), *this));
+ } else {
+ return std::unique_ptr<AttributeVector::SearchContext>
+ (new SetSearchContext(std::move(qTerm), *this));
+ }
+}
+
+
+template <typename B, typename M>
+std::unique_ptr<AttributeSaver>
+MultiValueNumericAttribute<B, M>::onInitSave()
+{
+ vespalib::GenerationHandler::Guard guard(this->getGenerationHandler().
+ takeGuard());
+ return std::make_unique<MultiValueNumericAttributeSaver<MultiValueType,
+ typename M::Index>>
+ (std::move(guard), this->createSaveTargetConfig(), this->_mvMapping);
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.cpp
new file mode 100644
index 00000000000..fc45530594b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.cpp
@@ -0,0 +1,130 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "multinumericattributesaver.h"
+#include <vespa/searchlib/util/bufferwriter.h>
+#include "multivalueattributesaverutils.h"
+
+using vespalib::GenerationHandler;
+using search::multivalueattributesaver::CountWriter;
+using search::multivalueattributesaver::WeightWriter;
+
+namespace search {
+
+namespace
+{
+
+class DatWriter
+{
+ std::unique_ptr<search::BufferWriter> _datWriter;
+public:
+ DatWriter(IAttributeSaveTarget &saveTarget)
+ : _datWriter(saveTarget.datWriter().allocBufferWriter())
+ {
+ }
+
+ ~DatWriter()
+ {
+ _datWriter->flush();
+ }
+
+ template <typename MultiValueT>
+ void
+ writeValues(const MultiValueT *values, uint32_t count) {
+ for (uint32_t i = 0; i < count; ++i) {
+ typename MultiValueT::ValueType value(values[i]);
+ _datWriter->write(&value, sizeof(typename MultiValueT::ValueType));
+ }
+ }
+};
+
+}
+
+template <typename MultiValueT, typename IndexT>
+MultiValueNumericAttributeSaver<MultiValueT, IndexT>::
+MultiValueNumericAttributeSaver(GenerationHandler::Guard &&guard,
+ const IAttributeSaveTarget::Config &cfg,
+ const MultiValueMapping &mvMapping)
+ : Parent(std::move(guard), cfg, mvMapping),
+ _mvMapping(mvMapping)
+{
+}
+
+
+
+template <typename MultiValueT, typename IndexT>
+MultiValueNumericAttributeSaver<MultiValueT, IndexT>::
+~MultiValueNumericAttributeSaver()
+{
+}
+
+template <typename MultiValueT, typename IndexT>
+bool
+MultiValueNumericAttributeSaver<MultiValueT, IndexT>::
+onSave(IAttributeSaveTarget &saveTarget)
+{
+ CountWriter countWriter(saveTarget);
+ WeightWriter<MultiValueType::_hasWeight> weightWriter(saveTarget);
+ DatWriter datWriter(saveTarget);
+
+ for (uint32_t docId = 0; docId < _frozenIndices.size(); ++docId) {
+ Index idx = _frozenIndices[docId];
+ const MultiValueType *handle;
+ uint32_t count = _mvMapping.getDataForIdx(idx, handle);
+ countWriter.writeCount(count);
+ weightWriter.writeWeights(handle, count);
+ datWriter.writeValues(handle, count);
+ }
+ return true;
+}
+
+template class MultiValueNumericAttributeSaver<multivalue::Value<int8_t>,
+ multivalue::Index32>;
+template class MultiValueNumericAttributeSaver<multivalue::Value<int16_t>,
+ multivalue::Index32>;
+template class MultiValueNumericAttributeSaver<multivalue::Value<int32_t>,
+ multivalue::Index32>;
+template class MultiValueNumericAttributeSaver<multivalue::Value<int64_t>,
+ multivalue::Index32>;
+template class MultiValueNumericAttributeSaver<multivalue::Value<float>,
+ multivalue::Index32>;
+template class MultiValueNumericAttributeSaver<multivalue::Value<double>,
+ multivalue::Index32>;
+template class MultiValueNumericAttributeSaver<multivalue::WeightedValue<int8_t>,
+ multivalue::Index32>;
+template class MultiValueNumericAttributeSaver<multivalue::WeightedValue<int16_t>,
+ multivalue::Index32>;
+template class MultiValueNumericAttributeSaver<multivalue::WeightedValue<int32_t>,
+ multivalue::Index32>;
+template class MultiValueNumericAttributeSaver<multivalue::WeightedValue<int64_t>,
+ multivalue::Index32>;
+template class MultiValueNumericAttributeSaver<multivalue::WeightedValue<float>,
+ multivalue::Index32>;
+template class MultiValueNumericAttributeSaver<multivalue::WeightedValue<double>,
+ multivalue::Index32>;
+template class MultiValueNumericAttributeSaver<multivalue::Value<int8_t>,
+ multivalue::Index64>;
+template class MultiValueNumericAttributeSaver<multivalue::Value<int16_t>,
+ multivalue::Index64>;
+template class MultiValueNumericAttributeSaver<multivalue::Value<int32_t>,
+ multivalue::Index64>;
+template class MultiValueNumericAttributeSaver<multivalue::Value<int64_t>,
+ multivalue::Index64>;
+template class MultiValueNumericAttributeSaver<multivalue::Value<float>,
+ multivalue::Index64>;
+template class MultiValueNumericAttributeSaver<multivalue::Value<double>,
+ multivalue::Index64>;
+template class MultiValueNumericAttributeSaver<multivalue::WeightedValue<int8_t>,
+ multivalue::Index64>;
+template class MultiValueNumericAttributeSaver<multivalue::WeightedValue<int16_t>,
+ multivalue::Index64>;
+template class MultiValueNumericAttributeSaver<multivalue::WeightedValue<int32_t>,
+ multivalue::Index64>;
+template class MultiValueNumericAttributeSaver<multivalue::WeightedValue<int64_t>,
+ multivalue::Index64>;
+template class MultiValueNumericAttributeSaver<multivalue::WeightedValue<float>,
+ multivalue::Index64>;
+template class MultiValueNumericAttributeSaver<multivalue::WeightedValue<double>,
+ multivalue::Index64>;
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.h b/searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.h
new file mode 100644
index 00000000000..4b20f94cad0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.h
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "multivalueattributesaver.h"
+
+namespace search {
+
+/*
+ * Class for saving a multivalue attribute.
+ *
+ * Template argument MultiValueT is either multivalue::Value<ValueType> or
+ * multivalue::WeightedValue<ValueType>
+ * Template argument IndexT is either multivalue::Index32 or multivalue::Index64
+ */
+template <typename MultiValueT, typename IndexT>
+class MultiValueNumericAttributeSaver : public MultiValueAttributeSaver<IndexT>
+{
+ using Parent = MultiValueAttributeSaver<IndexT>;
+ using Index = IndexT;
+ using MultiValueType = MultiValueT;
+ using ValueType = typename MultiValueType::ValueType;
+ using GenerationHandler = vespalib::GenerationHandler;
+ using Parent::_frozenIndices;
+ using MultiValueMapping = MultiValueMappingT<MultiValueType, Index>;
+
+ const MultiValueMapping &_mvMapping;
+public:
+ virtual bool onSave(IAttributeSaveTarget &saveTarget) override;
+ MultiValueNumericAttributeSaver(GenerationHandler::Guard &&guard,
+ const IAttributeSaveTarget::Config &cfg,
+ const MultiValueMapping &mvMapping);
+
+ virtual ~MultiValueNumericAttributeSaver();
+};
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.cpp
new file mode 100644
index 00000000000..1f62445a2b2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.cpp
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "multinumericenumattribute.h"
+#include "multinumericenumattribute.hpp"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.multinumericenumattribute");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h
new file mode 100644
index 00000000000..8ef3d28549b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h
@@ -0,0 +1,289 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/multienumattribute.h>
+#include <vespa/searchlib/attribute/attributeiterators.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchlib/attribute/numericbase.h>
+
+namespace search {
+
+/*
+ * Implementation of multi value numeric attribute that uses an underlying enum store
+ * to store unique numeric values and a multi value mapping to store enum indices for each document.
+ * This class is used for both array and weighted set types.
+ *
+ * B: EnumAttribute<BaseClass>
+ * M: MultiValueType (MultiValueMapping template argument)
+ */
+template <typename B, typename M>
+class MultiValueNumericEnumAttribute : public MultiValueEnumAttribute<B, M>
+{
+protected:
+ typedef typename B::BaseClass::DocId DocId;
+ typedef typename B::BaseClass::EnumHandle EnumHandle;
+public:
+ typedef typename B::BaseClass::BaseType T;
+protected:
+ typedef typename B::BaseClass::largeint_t largeint_t;
+ typedef typename B::BaseClass::LoadedNumericValueT LoadedNumericValueT;
+ typedef typename B::BaseClass::LoadedVector LoadedVector;
+ typedef SequentialReadModifyWriteVector<LoadedNumericValueT, vespalib::DefaultAlloc> LoadedVectorR;
+ typedef typename B::BaseClass::Weighted Weighted;
+ typedef typename B::BaseClass::WeightedInt WeightedInt;
+ typedef typename B::BaseClass::WeightedFloat WeightedFloat;
+ typedef typename B::BaseClass::WeightedEnum WeightedEnum;
+
+ typedef typename MultiValueEnumAttribute<B, M>::MultiValueType WeightedIndex;
+ typedef attribute::LoadedEnumAttribute LoadedEnumAttribute;
+ typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector;
+ typedef EnumStoreBase::IndexVector EnumIndexVector;
+ typedef EnumStoreBase::EnumVector EnumVector;
+ typedef EnumStoreBase::Index EnumIndex;
+
+protected:
+ /*
+ * Specialization of SearchContext for weighted set type
+ */
+ class SetSearchContext : public NumericAttribute::Range<T>, public AttributeVector::SearchContext
+ {
+ protected:
+ const MultiValueNumericEnumAttribute<B, M> & _toBeSearched;
+
+ virtual bool
+ onCmp(DocId docId, int32_t & weight) const
+ {
+ return cmp(docId, weight);
+ }
+
+ virtual bool
+ onCmp(DocId docId) const
+ {
+ return cmp(docId);
+ }
+
+ virtual bool valid() const { return this->isValid(); }
+
+ public:
+ SetSearchContext(QueryTermSimple::UP qTerm, const NumericAttribute & toBeSearched) :
+ NumericAttribute::Range<T>(*qTerm),
+ SearchContext(toBeSearched),
+ _toBeSearched(static_cast<const MultiValueNumericEnumAttribute<B, M> &>(toBeSearched))
+ {
+ }
+
+ bool
+ cmp(DocId doc, int32_t & weight) const
+ {
+ const WeightedIndex * indices;
+ uint32_t valueCount = _toBeSearched._mvMapping.get(doc, indices);
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ T v = _toBeSearched._enumStore.getValue(indices[i].value());
+ if (this->match(v)) {
+ weight = indices[i].weight();
+ return true;
+ }
+ }
+ return false;
+ }
+
+ bool
+ cmp(DocId doc) const
+ {
+ const WeightedIndex * indices;
+ uint32_t valueCount = _toBeSearched._mvMapping.get(doc, indices);
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ T v = _toBeSearched._enumStore.getValue(indices[i].value());
+ if (this->match(v)) {
+ return true;
+ }
+ }
+ return false;
+ }
+ virtual Int64Range getAsIntegerTerm() const {
+ return this->getRange();
+ }
+
+ virtual std::unique_ptr<queryeval::SearchIterator>
+ createFilterIterator(fef::TermFieldMatchData * matchData, bool strict)
+ {
+ if (!valid()) {
+ return queryeval::SearchIterator::UP(
+ new queryeval::EmptySearch());
+ }
+ if (getIsFilter()) {
+ return queryeval::SearchIterator::UP
+ (strict
+ ? new FilterAttributeIteratorStrict<SetSearchContext>(*this, matchData)
+ : new FilterAttributeIteratorT<SetSearchContext>(*this, matchData));
+ }
+ return queryeval::SearchIterator::UP
+ (strict
+ ? new AttributeIteratorStrict<SetSearchContext>(*this, matchData)
+ : new AttributeIteratorT<SetSearchContext>(*this, matchData));
+ }
+ };
+
+ /*
+ * Specialization of SearchContext for array type
+ */
+ class ArraySearchContext : public NumericAttribute::Range<T>, public AttributeVector::SearchContext
+ {
+ protected:
+ const MultiValueNumericEnumAttribute<B, M> & _toBeSearched;
+
+ virtual bool
+ onCmp(DocId docId, int32_t & weight) const
+ {
+ return cmp(docId, weight);
+ }
+
+ virtual bool
+ onCmp(DocId docId) const
+ {
+ return cmp(docId);
+ }
+
+ virtual bool valid() const { return this->isValid(); }
+
+ public:
+ ArraySearchContext(QueryTermSimple::UP qTerm, const NumericAttribute & toBeSearched) :
+ NumericAttribute::Range<T>(*qTerm),
+ SearchContext(toBeSearched),
+ _toBeSearched(static_cast<const MultiValueNumericEnumAttribute<B, M> &>(toBeSearched))
+ {
+ }
+
+ virtual Int64Range getAsIntegerTerm() const {
+ return this->getRange();
+ }
+
+ bool
+ cmp(DocId doc, int32_t & weight) const
+ {
+ uint32_t hitCount = 0;
+ const WeightedIndex * indices;
+ uint32_t valueCount = _toBeSearched._mvMapping.get(doc, indices);
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ T v = _toBeSearched._enumStore.getValue(indices[i].value());
+ if (this->match(v)) {
+ hitCount++;
+ }
+ }
+ weight = hitCount;
+
+ return hitCount != 0;
+ }
+
+ bool
+ cmp(DocId doc) const
+ {
+ const WeightedIndex * indices;
+ uint32_t valueCount = _toBeSearched._mvMapping.get(doc, indices);
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ T v = _toBeSearched._enumStore.getValue(indices[i].value());
+ if (this->match(v)) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ virtual std::unique_ptr<queryeval::SearchIterator>
+ createFilterIterator(fef::TermFieldMatchData * matchData, bool strict)
+ {
+ if (!valid()) {
+ return queryeval::SearchIterator::UP(
+ new queryeval::EmptySearch());
+ }
+ if (getIsFilter()) {
+ return queryeval::SearchIterator::UP
+ (strict
+ ? new FilterAttributeIteratorStrict<ArraySearchContext>(*this, matchData)
+ : new FilterAttributeIteratorT<ArraySearchContext>(*this, matchData));
+ }
+ return queryeval::SearchIterator::UP
+ (strict
+ ? new AttributeIteratorStrict<ArraySearchContext>(*this, matchData)
+ : new AttributeIteratorT<ArraySearchContext>(*this, matchData));
+ }
+ };
+
+
+public:
+ MultiValueNumericEnumAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg);
+
+ virtual bool onLoad();
+
+ bool
+ onLoadEnumerated(typename B::ReaderBase &attrReader);
+
+ AttributeVector::SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override;
+
+ //-------------------------------------------------------------------------
+ // Attribute read API
+ //-------------------------------------------------------------------------
+ virtual T get(DocId doc) const {
+ if (this->getValueCount(doc) == 0) {
+ return T();
+ } else {
+ WeightedIndex idx;
+ this->_mvMapping.get(doc, 0, idx);
+ return this->_enumStore.getValue(idx.value());
+ }
+ }
+ virtual largeint_t getInt(DocId doc) const {
+ return static_cast<largeint_t>(get(doc));
+ }
+ virtual double getFloat(DocId doc) const {
+ return static_cast<double>(get(doc));
+ }
+
+ template <typename BufferType>
+ uint32_t getHelper(DocId doc, BufferType * buffer, uint32_t sz) const {
+ const WeightedIndex * indices;
+ uint32_t valueCount = this->_mvMapping.get(doc, indices);
+ for(uint32_t i = 0, m = std::min(sz, valueCount); i < m; i++) {
+ buffer[i] = static_cast<BufferType>(this->_enumStore.getValue(indices[i].value()));
+ }
+ return valueCount;
+ }
+ virtual uint32_t getAll(DocId doc, T * v, uint32_t sz) const {
+ return getHelper(doc, v, sz);
+ }
+ virtual uint32_t get(DocId doc, largeint_t * v, uint32_t sz) const {
+ return getHelper(doc, v, sz);
+ }
+ virtual uint32_t get(DocId doc, double * v, uint32_t sz) const {
+ return getHelper(doc, v, sz);
+ }
+
+ template <typename WeightedType, typename ValueType>
+ uint32_t getWeightedHelper(DocId doc, WeightedType * buffer, uint32_t sz) const {
+ const WeightedIndex * indices;
+ uint32_t valueCount = this->_mvMapping.get(doc, indices);
+ for (uint32_t i = 0, m = std::min(sz, valueCount); i < m; ++i) {
+ buffer[i] = WeightedType(static_cast<ValueType>(this->_enumStore.getValue(indices[i].value())), indices[i].weight());
+ }
+ return valueCount;
+ }
+ virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz) const {
+ return getWeightedHelper<Weighted, T>(doc, v, sz);
+ }
+ virtual uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const {
+ return getWeightedHelper<WeightedInt, largeint_t>(doc, v, sz);
+ }
+ virtual uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const {
+ return getWeightedHelper<WeightedFloat, double>(doc, v, sz);
+ }
+
+private:
+ typedef typename B::template PrimitiveReader<typename B::LoadedValueType> AttributeReader;
+ void loadAllAtOnce(AttributeReader & attrReader, size_t numDocs, size_t numValues);
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp
new file mode 100644
index 00000000000..52aef8bf54a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp
@@ -0,0 +1,145 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/multinumericenumattribute.h>
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <vespa/searchlib/attribute/loadednumericvalue.h>
+
+namespace search {
+
+template <typename B, typename M>
+MultiValueNumericEnumAttribute<B, M>::
+MultiValueNumericEnumAttribute(const vespalib::string & baseFileName,
+ const AttributeVector::Config & cfg)
+ : MultiValueEnumAttribute<B, M>(baseFileName, cfg)
+{
+}
+
+template <typename B, typename M>
+void
+MultiValueNumericEnumAttribute<B, M>::loadAllAtOnce(AttributeReader & attrReader, size_t numDocs, size_t numValues)
+{
+ LoadedVectorR loaded(numValues);
+
+ bool hasWeight(attrReader.hasWeight());
+ for (uint32_t docIdx(0), valueIdx(0); docIdx < numDocs; ++docIdx) {
+ const uint32_t currValueCount = attrReader.getNextValueCount();
+ for (uint32_t subIdx = 0; subIdx < currValueCount; ++subIdx) {
+ loaded[valueIdx]._docId = docIdx;
+ loaded[valueIdx]._idx = subIdx;
+ loaded[valueIdx].setValue(attrReader.getNextData());
+ loaded[valueIdx].setWeight(hasWeight ? attrReader.getNextWeight() : 1);
+ valueIdx++;
+ }
+ }
+
+ attribute::sortLoadedByValue(loaded);
+ this->fillPostings(loaded);
+ loaded.rewind();
+ this->fillEnum(loaded);
+ attribute::sortLoadedByDocId(loaded);
+
+ loaded.rewind();
+ this->fillValues(loaded);
+}
+
+template <typename B, typename M>
+bool
+MultiValueNumericEnumAttribute<B, M>::onLoadEnumerated(typename B::ReaderBase &
+ attrReader)
+{
+ FileUtil::LoadedBuffer::UP udatBuffer(this->loadUDAT());
+
+ uint32_t numDocs = attrReader.getNumIdx() - 1;
+ uint64_t numValues = attrReader.getNumValues();
+ uint64_t enumCount = attrReader.getEnumCount();
+ assert(numValues == enumCount);
+ (void) enumCount;
+
+ EnumIndexVector eidxs;
+ this->fillEnum0(udatBuffer->buffer(), udatBuffer->size(), eidxs);
+ this->setNumDocs(numDocs);
+ this->setCommittedDocIdLimit(numDocs);
+ LoadedEnumAttributeVector loaded;
+ EnumVector enumHist;
+ if (this->hasPostings()) {
+ loaded.reserve(numValues);
+ this->fillEnumIdx(attrReader,
+ numValues,
+ eidxs,
+ loaded);
+ } else {
+ EnumVector(eidxs.size(), 0).swap(enumHist);
+ this->fillEnumIdx(attrReader,
+ numValues,
+ eidxs,
+ enumHist);
+ }
+ EnumIndexVector().swap(eidxs);
+ if (this->hasPostings()) {
+ if (numDocs > 0) {
+ this->onAddDoc(numDocs - 1);
+ }
+ attribute::sortLoadedByEnum(loaded);
+ this->fillPostingsFixupEnum(loaded);
+ } else {
+ this->fixupEnumRefCounts(enumHist);
+ }
+ return true;
+}
+
+
+template <typename B, typename M>
+bool
+MultiValueNumericEnumAttribute<B, M>::onLoad()
+{
+ AttributeReader attrReader(*this);
+ bool ok(attrReader.getHasLoadData());
+
+ if (!ok)
+ return false;
+
+ this->setCreateSerialNum(attrReader.getCreateSerialNum());
+
+ if (attrReader.getEnumerated())
+ return onLoadEnumerated(attrReader);
+
+ size_t numDocs = attrReader.getNumIdx() - 1;
+ uint32_t numValues = attrReader.getNumValues();
+
+ this->setNumDocs(numDocs);
+ this->setCommittedDocIdLimit(numDocs);
+ if (numDocs > 0) {
+ this->onAddDoc(numDocs - 1);
+ }
+ loadAllAtOnce(attrReader, numDocs, numValues);
+
+ return true;
+}
+
+template <typename B, typename M>
+AttributeVector::SearchContext::UP
+MultiValueNumericEnumAttribute<B, M>::getSearch(QueryTermSimple::UP qTerm,
+ const AttributeVector::SearchContext::Params & params) const
+{
+ (void) params;
+ QueryTermSimple::RangeResult<T> res = qTerm->getRange<T>();
+ if (this->hasArrayType()) {
+ if (res.isEqual()) {
+ return AttributeVector::SearchContext::UP(new ArraySearchContext(std::move(qTerm), *this));
+ } else {
+ return AttributeVector::SearchContext::UP(new ArraySearchContext(std::move(qTerm), *this));
+ }
+ } else {
+ if (res.isEqual()) {
+ return AttributeVector::SearchContext::UP(new SetSearchContext(std::move(qTerm), *this));
+ } else {
+ return AttributeVector::SearchContext::UP(new SetSearchContext(std::move(qTerm), *this));
+ }
+ }
+}
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.cpp
new file mode 100644
index 00000000000..277fdafbf42
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.cpp
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "multinumericpostattribute.h"
+#include "multinumericpostattribute.hpp"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.multinumericpostattribute");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h
new file mode 100644
index 00000000000..cc42f1e4445
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h
@@ -0,0 +1,133 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/multinumericenumattribute.h>
+#include "postinglistattribute.h"
+#include "i_document_weight_attribute.h"
+
+namespace search {
+
+/*
+ * Implementation of multi value numeric attribute that in addition to enum store and
+ * multi value mapping uses an underlying posting list to provide faster search.
+ * This class is used for both array and weighted set types.
+ *
+ * B: EnumAttribute<P, BaseClass>
+ * M: multivalue::Value<EnumStoreBase::Index> (array) or
+ * multivalue::WeightedValue<EnumStoreBase::Index> (weighted set)
+ * M specifies the type stored in the MultiValueMapping
+ */
+template <typename B, typename M>
+class MultiValueNumericPostingAttribute
+ : public MultiValueNumericEnumAttribute<B, M>,
+ protected PostingListAttributeSubBase<AttributeWeightPosting,
+ typename B::LoadedVector,
+ typename B::LoadedValueType,
+ typename B::EnumStore>
+{
+private:
+ struct DocumentWeightAttributeAdapter : IDocumentWeightAttribute
+ {
+ const MultiValueNumericPostingAttribute &self;
+ DocumentWeightAttributeAdapter(const MultiValueNumericPostingAttribute &self_in) : self(self_in) {}
+ virtual LookupResult lookup(const vespalib::string &term) const override final;
+ virtual void create(btree::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const override final;
+ virtual DocumentWeightIterator create(btree::EntryRef idx) const override final;
+ };
+ DocumentWeightAttributeAdapter _document_weight_attribute_adapter;
+
+ friend class PostingListAttributeTest;
+ template <typename, typename, typename>
+ friend class attribute::PostingSearchContext; // getEnumStore()
+ typedef MultiValueNumericPostingAttribute<B, M> SelfType;
+public:
+ typedef typename B::EnumStore EnumStore;
+ typedef typename EnumStore::Index EnumIndex;
+private:
+ typedef typename B::DocId DocId;
+ typedef typename B::LoadedVector LoadedVector;
+ typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector;
+ typedef PostingListAttributeSubBase<AttributeWeightPosting,
+ LoadedVector,
+ typename B::LoadedValueType,
+ EnumStore> PostingParent;
+ typedef typename PostingParent::PostingList PostingList;
+ typedef typename PostingParent::PostingMap PostingMap;
+ typedef typename PostingParent::Posting Posting;
+ typedef EnumPostingTree Dictionary;
+ typedef typename Dictionary::Iterator DictionaryIterator;
+ typedef typename Dictionary::ConstIterator DictionaryConstIterator;
+ typedef typename Dictionary::FrozenView FrozenDictionary;
+ typedef typename EnumStore::ComparatorType ComparatorType;
+
+ typedef typename MultiValueNumericEnumAttribute<B, M>::DocIndices DocIndices;
+ typedef typename MultiValueNumericEnumAttribute<B, M>::generation_t generation_t;
+ typedef typename MultiValueNumericEnumAttribute<B, M>::WeightedIndex WeightedIndex;
+
+ typedef typename MultiValueNumericEnumAttribute<B, M>::ArraySearchContext ArraySearchContext;
+ typedef typename MultiValueNumericEnumAttribute<B, M>::SetSearchContext SetSearchContext;
+ typedef ArraySearchContext ArrayNumericSearchContext;
+ typedef SetSearchContext SetNumericSearchContext;
+ typedef attribute::NumericPostingSearchContext<ArrayNumericSearchContext,
+ SelfType,
+ int32_t>
+ ArrayPostingSearchContext;
+ typedef attribute::NumericPostingSearchContext<SetNumericSearchContext,
+ SelfType,
+ int32_t>
+ SetPostingSearchContext;
+ using PostingParent::_postingList;
+ using PostingParent::clearAllPostings;
+ using PostingParent::handleFillPostings;
+ using PostingParent::fillPostingsFixupEnumBase;
+ using PostingParent::forwardedOnAddDoc;
+
+ virtual void freezeEnumDictionary();
+ virtual void mergeMemoryStats(MemoryUsage & total);
+ virtual void applyValueChanges(const DocIndices & docIndices, EnumStoreBase::IndexVector & unused);
+
+public:
+ MultiValueNumericPostingAttribute(const vespalib::string & name, const AttributeVector::Config & cfg);
+
+ virtual
+ ~MultiValueNumericPostingAttribute();
+
+ virtual void removeOldGenerations(generation_t firstUsed);
+ virtual void onGenerationChange(generation_t generation);
+
+ AttributeVector::SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override;
+
+ virtual const IDocumentWeightAttribute *asDocumentWeightAttribute() const override;
+
+ virtual bool
+ onAddDoc(DocId doc)
+ {
+ return forwardedOnAddDoc(doc,
+ this->_mvMapping.getNumKeys(),
+ this->_mvMapping.getCapacityKeys());
+ }
+
+ virtual void
+ fillPostings(LoadedVector & loaded)
+ {
+ handleFillPostings(loaded);
+ }
+
+ virtual attribute::IPostingListAttributeBase *
+ getIPostingListAttributeBase(void)
+ {
+ return this;
+ }
+
+ virtual void
+ fillPostingsFixupEnum(const LoadedEnumAttributeVector &loaded)
+ {
+ fillPostingsFixupEnumBase(loaded);
+ }
+};
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
new file mode 100644
index 00000000000..c3e06976316
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
@@ -0,0 +1,143 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/multinumericpostattribute.h>
+
+namespace search {
+
+template <typename B, typename M>
+void
+MultiValueNumericPostingAttribute<B, M>::freezeEnumDictionary()
+{
+ this->getEnumStore().freezeTree();
+}
+
+template <typename B, typename M>
+void
+MultiValueNumericPostingAttribute<B, M>::mergeMemoryStats(MemoryUsage & total)
+{
+ total.merge(this->getPostingList().getMemoryUsage());
+}
+
+template <typename B, typename M>
+void
+MultiValueNumericPostingAttribute<B, M>::applyValueChanges(const DocIndices & docIndices, EnumStoreBase::IndexVector & unused)
+{
+ typedef PostingChangeComputerT<WeightedIndex, PostingMap> PostingChangeComputer;
+ EnumStore & enumStore = this->getEnumStore();
+ ComparatorType compare(enumStore);
+
+ EnumIndexMapper mapper;
+ PostingMap changePost(PostingChangeComputer::compute(this->getMultiValueMapping(), docIndices, compare, mapper));
+ this->updatePostings(changePost);
+ MultiValueNumericEnumAttribute<B, M>::applyValueChanges(docIndices, unused);
+}
+
+
+template <typename B, typename M>
+MultiValueNumericPostingAttribute<B, M>::MultiValueNumericPostingAttribute(const vespalib::string & name,
+ const AttributeVector::Config & cfg)
+ : MultiValueNumericEnumAttribute<B, M>(name, cfg),
+ PostingParent(*this, this->getEnumStore()),
+ _document_weight_attribute_adapter(*this)
+{
+}
+
+template <typename B, typename M>
+MultiValueNumericPostingAttribute<B, M>::
+~MultiValueNumericPostingAttribute(void)
+{
+ this->disableFreeLists();
+ this->disableElemHoldList();
+ clearAllPostings();
+}
+
+
+template <typename B, typename M>
+void
+MultiValueNumericPostingAttribute<B, M>::removeOldGenerations(generation_t firstUsed)
+{
+ MultiValueNumericEnumAttribute<B, M>::removeOldGenerations(firstUsed);
+ _postingList.trimHoldLists(firstUsed);
+}
+
+template <typename B, typename M>
+void
+MultiValueNumericPostingAttribute<B, M>::onGenerationChange(generation_t generation)
+{
+ _postingList.freeze();
+ MultiValueNumericEnumAttribute<B, M>::onGenerationChange(generation);
+ _postingList.transferHoldLists(generation - 1);
+}
+
+template <typename B, typename M>
+AttributeVector::SearchContext::UP
+MultiValueNumericPostingAttribute<B, M>::getSearch(QueryTermSimple::UP qTerm,
+ const AttributeVector::SearchContext::Params & params) const
+{
+ std::unique_ptr<search::AttributeVector::SearchContext> sc;
+ sc.reset(new typename std::conditional<M::Value::_hasWeight,
+ SetPostingSearchContext,
+ ArrayPostingSearchContext>::
+ type(std::move(qTerm), params, *this));
+ return sc;
+}
+
+
+template <typename B, typename M>
+IDocumentWeightAttribute::LookupResult
+MultiValueNumericPostingAttribute<B, M>::DocumentWeightAttributeAdapter::lookup(const vespalib::string &term) const
+{
+ const Dictionary &dictionary = self._enumStore.getPostingDictionary();
+ const FrozenDictionary frozenDictionary(dictionary.getFrozenView());
+ DictionaryConstIterator dictItr(btree::BTreeNode::Ref(), dictionary.getAllocator());
+
+ char *end = nullptr;
+ int64_t int_term = strtoll(term.c_str(), &end, 10);
+ if (*end == '\0') {
+ ComparatorType comp(self._enumStore, int_term);
+
+ dictItr.lower_bound(frozenDictionary.getRoot(), EnumIndex(), comp);
+ if (dictItr.valid() && !comp(EnumIndex(), dictItr.getKey())) {
+ btree::EntryRef pidx = dictItr.getData();
+ if (pidx.valid()) {
+ const PostingList &plist = self.getPostingList();
+ auto minmax = plist.getAggregated(pidx);
+ return LookupResult(pidx, plist.frozenSize(pidx), minmax.getMin(), minmax.getMax());
+ }
+ }
+ }
+ return LookupResult();
+}
+
+template <typename B, typename M>
+void
+MultiValueNumericPostingAttribute<B, M>::DocumentWeightAttributeAdapter::create(btree::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const
+{
+ assert(idx.valid());
+ self.getPostingList().beginFrozen(idx, dst);
+}
+
+template <typename B, typename M>
+DocumentWeightIterator
+MultiValueNumericPostingAttribute<B, M>::DocumentWeightAttributeAdapter::create(btree::EntryRef idx) const
+{
+ assert(idx.valid());
+ return self.getPostingList().beginFrozen(idx);
+}
+
+template <typename B, typename M>
+const IDocumentWeightAttribute *
+MultiValueNumericPostingAttribute<B, M>::asDocumentWeightAttribute() const
+{
+ if (this->hasWeightedSetType() &&
+ this->getBasicType() == AttributeVector::BasicType::INT64 &&
+ !this->getConfig().getIsFilter()) {
+ return &_document_weight_attribute_adapter;
+ }
+ return nullptr;
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.cpp
new file mode 100644
index 00000000000..9ffc62f219d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.cpp
@@ -0,0 +1,17 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "multistringattribute.h"
+#include "multistringattribute.hpp"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.multistringattribute");
+namespace search {
+
+template class MultiValueStringAttributeT<EnumAttribute<StringAttribute>, multivalue::MVMTemplateArg<multivalue::Value<EnumStoreBase::Index>, multivalue::Index32>>;
+template class MultiValueStringAttributeT<EnumAttribute<StringAttribute>, multivalue::MVMTemplateArg<multivalue::WeightedValue<EnumStoreBase::Index>, multivalue::Index32>>;
+template class MultiValueStringAttributeT<EnumAttribute<StringAttribute>, multivalue::MVMTemplateArg<multivalue::Value<EnumStoreBase::Index>, multivalue::Index64>>;
+template class MultiValueStringAttributeT<EnumAttribute<StringAttribute>, multivalue::MVMTemplateArg<multivalue::WeightedValue<EnumStoreBase::Index>, multivalue::Index64>>;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h
new file mode 100644
index 00000000000..2f740cd6b30
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h
@@ -0,0 +1,173 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/searchlib/attribute/enumattribute.h>
+#include <vespa/searchlib/attribute/enumstore.h>
+#include <vespa/searchlib/attribute/multienumattribute.h>
+#include <vespa/searchlib/attribute/multivaluemapping.h>
+#include "enumhintsearchcontext.h"
+
+namespace search {
+
+/*
+ * Implementation of multi value string attribute that uses an underlying enum store
+ * to store unique string values and a multi value mapping to store the enum store indices
+ * for each document.
+ * This class is used for both array and weighted set types.
+ *
+ * B: Base class: EnumAttribute<StringAttribute>
+ * M: multivalue::Value<EnumStoreBase::Index> (array) or
+ * multivalue::WeightedValue<EnumStoreBase::Index> (weighted set)
+ * M specifies the type stored in the MultiValueMapping
+ */
+template <typename B, typename M>
+class MultiValueStringAttributeT : public MultiValueEnumAttribute<B, M>
+{
+protected:
+ typedef typename B::EnumStore EnumStore;
+ typedef typename MultiValueAttribute<B, M>::MultiValueType WeightedIndex;
+ typedef typename MultiValueAttribute<B, M>::ValueType EnumIndex;
+ typedef typename MultiValueAttribute<B, M>::MultiValueMapping MultiValueMapping;
+ typedef typename MultiValueAttribute<B, M>::ValueVector WeightedIndexVector;
+ typedef typename MultiValueAttribute<B, M>::DocumentValues DocIndices;
+
+ typedef StringAttribute::DocId DocId;
+ typedef StringAttribute::EnumHandle EnumHandle;
+ typedef StringAttribute::LoadedVector LoadedVector;
+ typedef StringAttribute::generation_t generation_t;
+ typedef StringAttribute::WeightedString WeightedString;
+ typedef StringAttribute::WeightedConstChar WeightedConstChar;
+ typedef StringAttribute::SearchContext SearchContext;
+ typedef StringAttribute::Change Change;
+ typedef StringAttribute::ChangeVector ChangeVector;
+ typedef StringAttribute::ValueModifier ValueModifier;
+ typedef StringAttribute::EnumModifier EnumModifier;
+ typedef StringAttribute::WeightedEnum WeightedEnum;
+ typedef attribute::EnumHintSearchContext EnumHintSearchContext;
+
+private:
+ friend class StringAttributeTest;
+
+public:
+ typedef typename MultiValueMappingBaseBase::Histogram Histogram;
+
+ MultiValueStringAttributeT(const vespalib::string & name, const AttributeVector::Config & c =
+ AttributeVector::Config(AttributeVector::BasicType::STRING,
+ attribute::CollectionType::ARRAY));
+ ~MultiValueStringAttributeT();
+
+ virtual void
+ freezeEnumDictionary(void);
+
+ //-------------------------------------------------------------------------
+ // new read api
+ //-------------------------------------------------------------------------
+ virtual const char * get(DocId doc) const {
+ if (this->getValueCount(doc) == 0) {
+ return NULL;
+ } else {
+ WeightedIndex idx;
+ this->_mvMapping.get(doc, 0, idx);
+ return this->_enumStore.getValue(idx.value());
+ }
+ }
+ template <typename BufferType>
+ uint32_t getHelper(DocId doc, BufferType * buffer, uint32_t sz) const {
+ const WeightedIndex * indices;
+ uint32_t valueCount = this->_mvMapping.get(doc, indices);
+ for(uint32_t i = 0, m = std::min(sz, valueCount); i < m; i++) {
+ buffer[i] = this->_enumStore.getValue(indices[i].value());
+ }
+ return valueCount;
+ }
+ virtual uint32_t get(DocId doc, vespalib::string * v, uint32_t sz) const {
+ return getHelper(doc, v, sz);
+ }
+ virtual uint32_t get(DocId doc, const char ** v, uint32_t sz) const {
+ return getHelper(doc, v, sz);
+ }
+
+ /// Weighted interface
+ template <typename WeightedType>
+ uint32_t getWeightedHelper(DocId doc, WeightedType * buffer, uint32_t sz) const {
+ const WeightedIndex * indices;
+ uint32_t valueCount = this->_mvMapping.get(doc, indices);
+ for (uint32_t i = 0, m = std::min(sz, valueCount); i < m; ++i) {
+ buffer[i] = WeightedType(this->_enumStore.getValue(indices[i].value()), indices[i].weight());
+ }
+ return valueCount;
+ }
+ virtual uint32_t get(DocId doc, WeightedString * v, uint32_t sz) const {
+ return getWeightedHelper(doc, v, sz);
+ }
+ virtual uint32_t get(DocId doc, WeightedConstChar * v, uint32_t sz) const {
+ return getWeightedHelper(doc, v, sz);
+ }
+
+ /*
+ * Specialization of SearchContext for weighted set type
+ */
+ class StringImplSearchContext : public StringAttribute::StringSearchContext {
+ public:
+ StringImplSearchContext(QueryTermSimple::UP qTerm, const StringAttribute & toBeSearched) :
+ StringAttribute::StringSearchContext(std::move(qTerm), toBeSearched)
+ { }
+ protected:
+ const MultiValueStringAttributeT<B, M> & myAttribute() const {
+ return static_cast< const MultiValueStringAttributeT<B, M> & > (attribute());
+ }
+ bool onCmp(DocId docId) const override;
+
+ template <typename Collector>
+ bool collectWeight(DocId doc, int32_t & weight, Collector & collector) const;
+ };
+
+ /*
+ * Specialization of SearchContext for weighted set type
+ */
+ class StringSetImplSearchContext : public StringImplSearchContext {
+ public:
+ StringSetImplSearchContext(QueryTermSimple::UP qTerm, const StringAttribute & toBeSearched) :
+ StringImplSearchContext(std::move(qTerm), toBeSearched)
+ { }
+ protected:
+ bool onCmp(DocId docId, int32_t & weight) const override;
+ };
+
+ /*
+ * Specialization of SearchContext for array type
+ */
+ class StringArrayImplSearchContext : public StringImplSearchContext {
+ public:
+ StringArrayImplSearchContext(QueryTermSimple::UP qTerm, const StringAttribute & toBeSearched) :
+ StringImplSearchContext(std::move(qTerm), toBeSearched)
+ { }
+ protected:
+ bool onCmp(DocId docId, int32_t & weight) const override;
+ };
+
+ template <typename BT>
+ class StringTemplSearchContext : public BT,
+ public EnumHintSearchContext
+ {
+ using BT::queryTerm;
+ typedef MultiValueStringAttributeT<B, M> AttrType;
+ typedef typename EnumStore::FoldedComparatorType FoldedComparatorType;
+ public:
+ StringTemplSearchContext(QueryTermSimple::UP qTerm, const AttrType & toBeSearched);
+ };
+
+ SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override;
+};
+
+
+typedef MultiValueStringAttributeT<EnumAttribute<StringAttribute>, multivalue::MVMTemplateArg<multivalue::Value<EnumStoreBase::Index>, multivalue::Index32> > ArrayStringAttribute;
+typedef MultiValueStringAttributeT<EnumAttribute<StringAttribute>, multivalue::MVMTemplateArg<multivalue::WeightedValue<EnumStoreBase::Index>, multivalue::Index32> > WeightedSetStringAttribute;
+typedef MultiValueStringAttributeT<EnumAttribute<StringAttribute>, multivalue::MVMTemplateArg<multivalue::Value<EnumStoreBase::Index>, multivalue::Index64> > HugeArrayStringAttribute;
+typedef MultiValueStringAttributeT<EnumAttribute<StringAttribute>, multivalue::MVMTemplateArg<multivalue::WeightedValue<EnumStoreBase::Index>, multivalue::Index64> > HugeWeightedSetStringAttribute;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
new file mode 100644
index 00000000000..e791adb3231
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
@@ -0,0 +1,146 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/stringattribute.h>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/attribute/enumattribute.hpp>
+#include <vespa/searchlib/attribute/multienumattribute.hpp>
+#include <vespa/searchlib/util/bufferwriter.h>
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/text/lowercase.h>
+#include <set>
+#include <string>
+
+namespace search {
+
+//-----------------------------------------------------------------------------
+// MultiValueStringAttributeT public
+//-----------------------------------------------------------------------------
+template <typename B, typename M>
+MultiValueStringAttributeT<B, M>::
+MultiValueStringAttributeT(const vespalib::string &name,
+ const AttributeVector::Config &c)
+ : MultiValueEnumAttribute<B, M>(name, c)
+{
+}
+
+template <typename B, typename M>
+MultiValueStringAttributeT<B, M>::~MultiValueStringAttributeT()
+{
+}
+
+
+template <typename B, typename M>
+void
+MultiValueStringAttributeT<B, M>::freezeEnumDictionary(void)
+{
+ this->getEnumStore().freezeTree();
+}
+
+
+template <typename B, typename M>
+AttributeVector::SearchContext::UP
+MultiValueStringAttributeT<B, M>::getSearch(QueryTermSimple::UP qTerm,
+ const AttributeVector::SearchContext::Params & params) const
+{
+ (void) params;
+ std::unique_ptr<search::AttributeVector::SearchContext> sc;
+ if (this->getCollectionType() == attribute::CollectionType::WSET) {
+ sc.reset(new StringTemplSearchContext<StringSetImplSearchContext>(std::move(qTerm), *this));
+ } else {
+ sc.reset(new StringTemplSearchContext<StringArrayImplSearchContext>(std::move(qTerm), *this));
+ }
+ return sc;
+}
+
+namespace {
+
+template <typename E>
+class EnumAccessor {
+public:
+ EnumAccessor(const E & enumStore) : _enumStore(enumStore) { }
+ const char * get(typename E::Index index) const { return _enumStore.getValue(index); }
+private:
+ const E & _enumStore;
+};
+
+}
+
+template <typename B, typename M>
+bool
+MultiValueStringAttributeT<B, M>::StringSetImplSearchContext::onCmp(DocId doc, int32_t & weight) const
+{
+ StringAttribute::StringSearchContext::CollectWeight collector;
+ return this->collectWeight(doc, weight, collector);
+}
+
+template <typename B, typename M>
+bool
+MultiValueStringAttributeT<B, M>::StringArrayImplSearchContext::onCmp(DocId doc, int32_t & weight) const
+{
+ StringAttribute::StringSearchContext::CollectHitCount collector;
+ return this->collectWeight(doc, weight, collector);
+}
+
+template <typename B, typename M>
+template <typename Collector>
+bool
+MultiValueStringAttributeT<B, M>::StringImplSearchContext::collectWeight(DocId doc, int32_t & weight, Collector & collector) const
+{
+ const WeightedIndex * indices;
+ uint32_t valueCount = myAttribute()._mvMapping.get(doc, indices);
+
+ EnumAccessor<typename B::EnumStore> accessor(myAttribute()._enumStore);
+ collectMatches(indices, valueCount, accessor, collector);
+ weight = collector.getWeight();
+ return collector.hasMatch();
+}
+
+template <typename B, typename M>
+bool
+MultiValueStringAttributeT<B, M>::StringImplSearchContext::onCmp(DocId doc) const
+{
+ const MultiValueStringAttributeT<B, M> & attr(static_cast< const MultiValueStringAttributeT<B, M> & > (attribute()));
+ const WeightedIndex * indices;
+ uint32_t valueCount = attr._mvMapping.get(doc, indices);
+
+ for (uint32_t i(0); (i < valueCount); i++) {
+ if (isMatch(attr._enumStore.getValue(indices[i].value()))) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+template <typename B, typename M>
+template <typename BT>
+MultiValueStringAttributeT<B, M>::StringTemplSearchContext<BT>::
+StringTemplSearchContext(QueryTermSimple::UP qTerm, const AttrType & toBeSearched) :
+ BT(std::move(qTerm), toBeSearched),
+ EnumHintSearchContext(toBeSearched.getEnumStore().getEnumStoreDict(),
+ toBeSearched.getCommittedDocIdLimit(),
+ toBeSearched.getStatus().getNumValues())
+{
+ const EnumStore &enumStore(toBeSearched.getEnumStore());
+
+ this->_plsc = static_cast<attribute::IPostingListSearchContext *>(this);
+ if (this->valid()) {
+ if (this->isPrefix()) {
+ FoldedComparatorType comp(enumStore, queryTerm().getTerm(), true);
+ lookupRange(comp, comp);
+ } else if (this->isRegex()) {
+ vespalib::string prefix(vespalib::Regexp::get_prefix(this->queryTerm().getTerm()));
+ FoldedComparatorType comp(enumStore, prefix.c_str(), true);
+ lookupRange(comp, comp);
+ } else {
+ FoldedComparatorType comp(enumStore, queryTerm().getTerm());
+ lookupTerm(comp);
+ }
+ }
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.cpp
new file mode 100644
index 00000000000..a447ad1bd8d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.cpp
@@ -0,0 +1,18 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "multistringpostattribute.h"
+#include "multistringpostattribute.hpp"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.multistringpostattribute");
+namespace search {
+
+EnumStoreBase::Index
+StringEnumIndexMapper::map(EnumStoreBase::Index original, const EnumStoreComparator & compare) const
+{
+ return _dictionary.find(original, compare).getKey();
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h
new file mode 100644
index 00000000000..638e9b870e5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h
@@ -0,0 +1,123 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/attribute/postinglistattribute.h>
+#include "i_document_weight_attribute.h"
+
+namespace search {
+
+/*
+ * Implementation of multi value string attribute that in addition to enum store and
+ * multi value mapping uses an underlying posting list to provide faster search.
+ * This class is used for both array and weighted set types.
+ *
+ * B: EnumAttribute<StringAttribute>
+ * T: multivalue::Value<EnumStoreBase::Index> (array) or
+ * multivalue::WeightedValue<EnumStoreBase::Index> (weighted set)
+ * T specifies the type stored in the MultiValueMapping
+ */
+template <typename B, typename T>
+class MultiValueStringPostingAttributeT
+ : public MultiValueStringAttributeT<B, T>,
+ protected PostingListAttributeSubBase<AttributeWeightPosting,
+ typename B::LoadedVector,
+ typename B::LoadedValueType,
+ typename B::EnumStore>
+{
+private:
+ struct DocumentWeightAttributeAdapter : IDocumentWeightAttribute
+ {
+ const MultiValueStringPostingAttributeT &self;
+ DocumentWeightAttributeAdapter(const MultiValueStringPostingAttributeT &self_in) : self(self_in) {}
+ virtual LookupResult lookup(const vespalib::string &term) const override final;
+ virtual void create(btree::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const override final;
+ virtual DocumentWeightIterator create(btree::EntryRef idx) const override final;
+ };
+ DocumentWeightAttributeAdapter _document_weight_attribute_adapter;
+
+ friend class PostingListAttributeTest;
+ template <typename, typename, typename>
+ friend class attribute::PostingSearchContext; // getEnumStore()
+ friend class StringAttributeTest;
+ typedef MultiValueStringPostingAttributeT<B, T> SelfType;
+ typedef typename B::LoadedVector LoadedVector;
+ typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector;
+ typedef PostingListAttributeSubBase<AttributeWeightPosting,
+ LoadedVector,
+ typename B::LoadedValueType,
+ typename B::EnumStore> PostingParent;
+ typedef typename MultiValueStringAttributeT<B, T>::DocId DocId;
+public:
+ typedef typename MultiValueStringAttributeT<B, T>::EnumStore EnumStore;
+private:
+ typedef typename MultiValueStringAttributeT<B, T>::WeightedIndex WeightedIndex;
+ typedef typename MultiValueStringAttributeT<B, T>::DocIndices DocIndices;
+ typedef typename MultiValueStringAttributeT<B, T>::generation_t generation_t;
+ typedef typename PostingParent::PostingList PostingList;
+ typedef typename PostingParent::PostingMap PostingMap;
+ typedef typename PostingParent::Posting Posting;
+
+ typedef typename MultiValueStringAttributeT<B, T>::StringSetImplSearchContext StringSetImplSearchContext;
+ typedef typename MultiValueStringAttributeT<B, T>::StringArrayImplSearchContext StringArrayImplSearchContext;
+ typedef attribute::StringPostingSearchContext<StringSetImplSearchContext, SelfType, int32_t> StringSetPostingSearchContext;
+ typedef attribute::StringPostingSearchContext<StringArrayImplSearchContext, SelfType, int32_t> StringArrayPostingSearchContext;
+
+ typedef EnumPostingTree Dictionary;
+ typedef typename EnumStore::Index EnumIndex;
+ typedef typename EnumStore::ComparatorType ComparatorType;
+ typedef typename EnumStore::FoldedComparatorType FoldedComparatorType;
+ typedef typename Dictionary::Iterator DictionaryIterator;
+ typedef typename Dictionary::ConstIterator DictionaryConstIterator;
+ typedef typename Dictionary::FrozenView FrozenDictionary;
+ using PostingParent::_postingList;
+ using PostingParent::clearAllPostings;
+ using PostingParent::handleFillPostings;
+ using PostingParent::fillPostingsFixupEnumBase;
+ using PostingParent::forwardedOnAddDoc;
+
+ virtual void freezeEnumDictionary();
+ virtual void mergeMemoryStats(MemoryUsage & total);
+ virtual void applyValueChanges(const DocIndices & docIndices, EnumStoreBase::IndexVector & unused);
+
+public:
+ typedef MultiValueMappingBaseBase::Histogram Histogram;
+
+ MultiValueStringPostingAttributeT(const vespalib::string & name, const AttributeVector::Config & c =
+ AttributeVector::Config(AttributeVector::BasicType::STRING,
+ attribute::CollectionType::ARRAY));
+ ~MultiValueStringPostingAttributeT();
+
+ void removeOldGenerations(generation_t firstUsed) override;
+ void onGenerationChange(generation_t generation) override;
+
+ AttributeVector::SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override;
+
+ const IDocumentWeightAttribute *asDocumentWeightAttribute() const override;
+
+ bool onAddDoc(DocId doc) override {
+ return forwardedOnAddDoc(doc, this->_mvMapping.getNumKeys(), this->_mvMapping.getCapacityKeys());
+ }
+
+ void fillPostings(LoadedVector & loaded) override {
+ handleFillPostings(loaded);
+ }
+
+ attribute::IPostingListAttributeBase * getIPostingListAttributeBase(void) override {
+ return this;
+ }
+
+ void fillPostingsFixupEnum(const LoadedEnumAttributeVector &loaded) override {
+ fillPostingsFixupEnumBase(loaded);
+ }
+};
+
+typedef MultiValueStringPostingAttributeT<EnumAttribute<StringAttribute>, multivalue::MVMTemplateArg<multivalue::Value<EnumStoreBase::Index>, multivalue::Index32> > ArrayStringPostingAttribute;
+typedef MultiValueStringPostingAttributeT<EnumAttribute<StringAttribute>, multivalue::MVMTemplateArg<multivalue::WeightedValue<EnumStoreBase::Index>, multivalue::Index32> > WeightedSetStringPostingAttribute;
+typedef MultiValueStringPostingAttributeT<EnumAttribute<StringAttribute>, multivalue::MVMTemplateArg<multivalue::Value<EnumStoreBase::Index>, multivalue::Index64> > HugeArrayStringPostingAttribute;
+typedef MultiValueStringPostingAttributeT<EnumAttribute<StringAttribute>, multivalue::MVMTemplateArg<multivalue::WeightedValue<EnumStoreBase::Index>, multivalue::Index64> > HugeWeightedSetStringPostingAttribute;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
new file mode 100644
index 00000000000..4db2ce1da2c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
@@ -0,0 +1,152 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/stringattribute.h>
+#include <vespa/searchlib/attribute/multistringpostattribute.h>
+#include <vespa/searchlib/attribute/multistringattribute.hpp>
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <set>
+#include <string>
+
+namespace search {
+
+template <typename B, typename T>
+MultiValueStringPostingAttributeT<B, T>::MultiValueStringPostingAttributeT(const vespalib::string & name, const AttributeVector::Config & c)
+ : MultiValueStringAttributeT<B, T>(name, c),
+ PostingParent(*this, this->getEnumStore()),
+ _document_weight_attribute_adapter(*this)
+{
+}
+
+template <typename B, typename T>
+MultiValueStringPostingAttributeT<B, T>::~MultiValueStringPostingAttributeT()
+{
+ this->disableFreeLists();
+ this->disableElemHoldList();
+ clearAllPostings();
+}
+
+class StringEnumIndexMapper : public EnumIndexMapper
+{
+public:
+ StringEnumIndexMapper(const EnumPostingTree & dictionary) : _dictionary(dictionary) { }
+ EnumStoreBase::Index map(EnumStoreBase::Index original, const EnumStoreComparator & compare) const override;
+private:
+ const EnumPostingTree & _dictionary;
+};
+
+template <typename B, typename T>
+void
+MultiValueStringPostingAttributeT<B, T>::
+applyValueChanges(const DocIndices &docIndices, EnumStoreBase::IndexVector &unused)
+{
+ typedef PostingChangeComputerT<WeightedIndex, PostingMap> PostingChangeComputer;
+ EnumStore &enumStore(this->getEnumStore());
+ Dictionary &dict(enumStore.getPostingDictionary());
+ FoldedComparatorType compare(enumStore);
+
+ StringEnumIndexMapper mapper(dict);
+ PostingMap changePost(PostingChangeComputer::compute(this->getMultiValueMapping(), docIndices, compare, mapper));
+ this->updatePostings(changePost);
+ MultiValueStringAttributeT<B, T>::applyValueChanges(docIndices, unused);
+}
+
+template <typename B, typename T>
+void
+MultiValueStringPostingAttributeT<B, T>::freezeEnumDictionary()
+{
+ this->getEnumStore().freezeTree();
+}
+
+template <typename B, typename T>
+void
+MultiValueStringPostingAttributeT<B, T>::mergeMemoryStats(MemoryUsage &total)
+{
+ total.merge(this->_postingList.getMemoryUsage());
+}
+
+
+template <typename B, typename T>
+void
+MultiValueStringPostingAttributeT<B, T>::removeOldGenerations(generation_t firstUsed)
+{
+ MultiValueStringAttributeT<B, T>::removeOldGenerations(firstUsed);
+ _postingList.trimHoldLists(firstUsed);
+}
+
+template <typename B, typename T>
+void
+MultiValueStringPostingAttributeT<B, T>::onGenerationChange(generation_t generation)
+{
+ _postingList.freeze();
+ MultiValueStringAttributeT<B, T>::onGenerationChange(generation);
+ _postingList.transferHoldLists(generation - 1);
+}
+
+
+template <typename B, typename T>
+AttributeVector::SearchContext::UP
+MultiValueStringPostingAttributeT<B, T>::getSearch(QueryTermSimple::UP qTerm,
+ const AttributeVector::SearchContext::Params & params) const
+{
+ std::unique_ptr<search::AttributeVector::SearchContext> sc;
+ sc.reset(new typename std::conditional<T::Value::_hasWeight,
+ StringSetPostingSearchContext,
+ StringArrayPostingSearchContext>::
+ type(std::move(qTerm), params.useBitVector(), *this));
+ return sc;
+}
+
+
+template <typename B, typename T>
+IDocumentWeightAttribute::LookupResult
+MultiValueStringPostingAttributeT<B, T>::DocumentWeightAttributeAdapter::lookup(const vespalib::string &term) const
+{
+ const Dictionary &dictionary = self._enumStore.getPostingDictionary();
+ const FrozenDictionary frozenDictionary(dictionary.getFrozenView());
+ DictionaryConstIterator dictItr(btree::BTreeNode::Ref(), dictionary.getAllocator());
+ FoldedComparatorType comp(self._enumStore, term.c_str());
+
+ dictItr.lower_bound(frozenDictionary.getRoot(), EnumIndex(), comp);
+ if (dictItr.valid() && !comp(EnumIndex(), dictItr.getKey())) {
+ btree::EntryRef pidx = dictItr.getData();
+ if (pidx.valid()) {
+ const PostingList &plist = self.getPostingList();
+ auto minmax = plist.getAggregated(pidx);
+ return LookupResult(pidx, plist.frozenSize(pidx), minmax.getMin(), minmax.getMax());
+ }
+ }
+ return LookupResult();
+}
+
+template <typename B, typename T>
+void
+MultiValueStringPostingAttributeT<B, T>::DocumentWeightAttributeAdapter::create(btree::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const
+{
+ assert(idx.valid());
+ self.getPostingList().beginFrozen(idx, dst);
+}
+
+template <typename B, typename M>
+DocumentWeightIterator
+MultiValueStringPostingAttributeT<B, M>::DocumentWeightAttributeAdapter::create(btree::EntryRef idx) const
+{
+ assert(idx.valid());
+ return self.getPostingList().beginFrozen(idx);
+}
+
+template <typename B, typename T>
+const IDocumentWeightAttribute *
+MultiValueStringPostingAttributeT<B, T>::asDocumentWeightAttribute() const
+{
+ if (this->hasWeightedSetType() &&
+ this->getBasicType() == AttributeVector::BasicType::STRING &&
+ !this->getConfig().getIsFilter()) {
+ return &_document_weight_attribute_adapter;
+ }
+ return nullptr;
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multivalue.h b/searchlib/src/vespa/searchlib/attribute/multivalue.h
new file mode 100644
index 00000000000..ae8a96ff6dc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multivalue.h
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <stdint.h>
+
+namespace search {
+
+namespace multivalue {
+
+template <typename T>
+class Value {
+public:
+ typedef T ValueType;
+ Value()
+ : _v()
+ {
+ }
+ Value(T v) : _v(v) { }
+ Value(T v, int32_t w) : _v(v) { (void) w; }
+ T value() const { return _v; }
+ operator T () const { return _v; }
+ operator T & () { return _v; }
+ int32_t weight() const { return 1; }
+ void setWeight(int32_t w) { (void) w; }
+ void incWeight(int32_t w) { (void) w; }
+ bool operator ==(const Value<T> & rhs) const { return _v == rhs._v; }
+ bool operator <(const Value<T> & rhs) const { return _v < rhs._v; }
+ bool operator >(const Value<T> & rhs) const { return _v > rhs._v; }
+ static bool hasWeight(void) { return false; }
+
+ static const bool _hasWeight = false;
+private:
+ T _v;
+};
+
+template <typename T>
+class WeightedValue {
+public:
+ typedef T ValueType;
+ WeightedValue() : _v(), _w(1) { }
+ WeightedValue(T v, int32_t w) : _v(v), _w(w) { }
+ T value() const { return _v; }
+ operator T () const { return _v; }
+ operator T & () { return _v; }
+ int32_t weight() const { return _w; }
+ void setWeight(int32_t w) { _w = w; }
+ void incWeight(int32_t w) { _w += w; }
+
+ bool operator==(const WeightedValue<T> & rhs) const { return _v == rhs._v; }
+ bool operator <(const WeightedValue<T> & rhs) const { return _v < rhs._v; }
+ bool operator >(const WeightedValue<T> & rhs) const { return _v > rhs._v; }
+ static bool hasWeight(void) { return true; }
+
+ static const bool _hasWeight = true;
+private:
+ T _v;
+ int32_t _w;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.cpp
new file mode 100644
index 00000000000..9db883f7ad2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.cpp
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "multivalueattribute.h"
+#include "multivalueattribute.hpp"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.multivalueattribute");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.h b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.h
new file mode 100644
index 00000000000..4d94e74d37e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.h
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/multivaluemapping.h>
+
+namespace search {
+
+/*
+ * Implementation of multi value attribute using an underlying multi value mapping
+ *
+ * B: Base class
+ * M: MultiValueType (MultiValueMapping template argument)
+ */
+template <typename B, typename M>
+class MultiValueAttribute : public B
+{
+protected:
+ typedef typename B::DocId DocId;
+ typedef typename B::Change Change;
+ typedef typename B::ChangeVector ChangeVector;
+ typedef typename B::ChangeVector::const_iterator ChangeVectorIterator;
+
+ typedef typename M::Value MultiValueType;
+ typedef MultiValueMappingT<MultiValueType, typename M::Index> MultiValueMapping;
+ typedef typename MultiValueMappingBaseBase::Histogram Histogram;
+ typedef typename MultiValueType::ValueType ValueType;
+ typedef std::vector<MultiValueType> ValueVector;
+ typedef typename ValueVector::iterator ValueVectorIterator;
+ typedef std::vector<std::pair<DocId, ValueVector> > DocumentValues;
+
+ MultiValueMapping _mvMapping;
+
+ MultiValueMapping & getMultiValueMapping() { return _mvMapping; }
+ const MultiValueMapping & getMultiValueMapping() const { return _mvMapping; }
+
+ /*
+ * Iterate through the change vector and calculate new values for documents with changes
+ */
+ void applyAttributeChanges(DocumentValues & docValues);
+
+ virtual bool extractChangeData(const Change & c, ValueType & data) = 0;
+
+ /**
+ * Called when a new document has been added.
+ * Can be overridden by subclasses that need to resize structures as a result of this.
+ * Should return true if underlying structures were resized.
+ **/
+ virtual bool onAddDoc(DocId doc) { (void) doc; return false; }
+
+ virtual AddressSpace getMultiValueAddressSpaceUsage() const override;
+
+public:
+ MultiValueAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg);
+ virtual ~MultiValueAttribute();
+
+ virtual bool addDoc(DocId & doc);
+ virtual uint32_t getValueCount(DocId doc) const;
+ virtual const MultiValueMappingBaseBase *getMultiValueBase() const override {
+ return &getMultiValueMapping();
+ }
+
+private:
+ virtual int32_t getWeight(DocId doc, uint32_t idx) const;
+
+ virtual uint64_t
+ getTotalValueCount(void) const;
+
+public:
+ virtual void
+ clearDocs(DocId lidLow, DocId lidLimit);
+
+ virtual void
+ onShrinkLidSpace();
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp
new file mode 100644
index 00000000000..759364b4f93
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp
@@ -0,0 +1,203 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/multivalueattribute.h>
+
+namespace search {
+
+template <typename B, typename M>
+MultiValueAttribute<B, M>::
+MultiValueAttribute(const vespalib::string &baseFileName,
+ const AttributeVector::Config &cfg)
+ : B(baseFileName, cfg),
+ _mvMapping(this->getCommittedDocIdLimitRef(), cfg.getGrowStrategy())
+{
+}
+
+template <typename B, typename M>
+MultiValueAttribute<B, M>::~MultiValueAttribute()
+{
+}
+
+template <typename B, typename M>
+int32_t MultiValueAttribute<B, M>::getWeight(DocId doc, uint32_t idx) const
+{
+ MultiValueType value;
+ this->_mvMapping.get(doc, idx, value);
+ return (value.weight());
+}
+
+
+template <typename B, typename M>
+void
+MultiValueAttribute<B, M>::applyAttributeChanges(DocumentValues & docValues)
+{
+ Histogram capacityNeeded = _mvMapping.getEmptyHistogram();
+
+ // compute new values for each document with changes
+ for (ChangeVectorIterator current(this->_changes.begin()), end(this->_changes.end()); (current != end); ) {
+ DocId doc = current->_doc;
+
+ ValueVector newValues(_mvMapping.getValueCount(doc));
+ _mvMapping.get(doc, newValues);
+
+ // find last clear doc
+ ChangeVectorIterator lastClearDoc = end;
+ for (ChangeVectorIterator iter = current; (iter != end) && (iter->_doc == doc); ++iter) {
+ if (iter->_type == ChangeBase::CLEARDOC) {
+ lastClearDoc = iter;
+ }
+ }
+
+ // use last clear doc if found
+ if (lastClearDoc != end) {
+ current = lastClearDoc;
+ }
+
+ // iterate through all changes for this document
+ for (; (current != end) && (current->_doc == doc); ++current) {
+
+ if (current->_type == ChangeBase::CLEARDOC) {
+ newValues.clear();
+ continue;
+ }
+
+ ValueType data;
+ bool hasData = extractChangeData(*current, data);
+
+ if (current->_type == ChangeBase::APPEND) {
+ if (hasData) {
+ if (this->hasArrayType()) {
+ newValues.push_back(MultiValueType(data, current->_weight));
+ } else if (this->hasWeightedSetType()) {
+ ValueVectorIterator witer;
+ for (witer = newValues.begin(); witer != newValues.end(); ++witer) {
+ if (witer->value() == data) {
+ break;
+ }
+ }
+ if (witer != newValues.end()) {
+ witer->setWeight(current->_weight);
+ } else {
+ newValues.push_back(MultiValueType(data, current->_weight));
+ }
+ }
+ }
+ } else if (current->_type == ChangeBase::REMOVE) {
+ if (hasData) {
+ for (ValueVectorIterator witer = newValues.begin(); witer != newValues.end(); ) {
+ if (witer->value() == data) {
+ witer = newValues.erase(witer);
+ } else {
+ ++witer;
+ }
+ }
+ }
+ } else if ((current->_type >= ChangeBase::INCREASEWEIGHT) && (current->_type <= ChangeBase::DIVWEIGHT)) {
+ if (this->hasWeightedSetType() && hasData) {
+ ValueVectorIterator witer;
+ for (witer = newValues.begin(); witer != newValues.end(); ++witer) {
+ if (witer->value() == data) {
+ break;
+ }
+ }
+ if (witer != newValues.end()) {
+ witer->setWeight(this->applyWeightChange(witer->weight(), *current));
+ if (witer->weight() == 0 && this->getInternalCollectionType().removeIfZero()) {
+ newValues.erase(witer);
+ }
+ } else if (this->getInternalCollectionType().createIfNonExistant()) {
+ int32_t weight = this->applyWeightChange(0, *current);
+ if (weight != 0 || !this->getInternalCollectionType().removeIfZero()) {
+ newValues.push_back(MultiValueType(data, weight));
+ }
+ }
+ }
+ }
+ }
+
+ // update histogram
+ uint32_t maxValues = MultiValueMapping::maxValues();
+ if (newValues.size() < maxValues) {
+ capacityNeeded[newValues.size()] += 1;
+ } else {
+ capacityNeeded[maxValues] += 1;
+ }
+
+ this->checkSetMaxValueCount(newValues.size());
+
+ docValues.push_back(std::make_pair(doc, ValueVector()));
+ docValues.back().second.swap(newValues);
+ }
+
+ if (!_mvMapping.enoughCapacity(capacityNeeded)) {
+ this->removeAllOldGenerations();
+ _mvMapping.performCompaction(capacityNeeded);
+ }
+}
+
+
+template <typename B, typename M>
+AddressSpace
+MultiValueAttribute<B, M>::getMultiValueAddressSpaceUsage() const
+{
+ return _mvMapping.getAddressSpaceUsage();
+}
+
+
+template <typename B, typename M>
+bool
+MultiValueAttribute<B, M>::addDoc(DocId & doc)
+{
+ bool incGen = this->_mvMapping.isFull();
+ this->_mvMapping.addKey(doc);
+ this->incNumDocs();
+ this->updateUncommittedDocIdLimit(doc);
+ incGen |= onAddDoc(doc);
+ if (incGen) {
+ this->incGeneration();
+ } else
+ this->removeAllOldGenerations();
+ return true;
+}
+
+template <typename B, typename M>
+uint32_t
+MultiValueAttribute<B, M>::getValueCount(DocId doc) const
+{
+ if (doc >= this->getNumDocs()) {
+ return 0;
+ }
+ return this->_mvMapping.getValueCount(doc);
+}
+
+
+template <typename B, typename M>
+uint64_t
+MultiValueAttribute<B, M>::getTotalValueCount(void) const
+{
+ return _mvMapping.getTotalValueCnt();
+}
+
+
+template <typename B, typename M>
+void
+MultiValueAttribute<B, M>::clearDocs(DocId lidLow, DocId lidLimit)
+{
+ _mvMapping.clearDocs(lidLow, lidLimit, *this);
+}
+
+
+template <typename B, typename M>
+void
+MultiValueAttribute<B, M>::onShrinkLidSpace()
+{
+ uint32_t committedDocIdLimit = this->getCommittedDocIdLimit();
+ _mvMapping.shrinkKeys(committedDocIdLimit);
+ this->setNumDocs(committedDocIdLimit);
+}
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.cpp
new file mode 100644
index 00000000000..b3d38f60546
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.cpp
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "multivalueattributesaver.h"
+
+using vespalib::GenerationHandler;
+using search::IAttributeSaveTarget;
+
+namespace search {
+
+template <typename IndexT>
+MultiValueAttributeSaver<IndexT>::
+MultiValueAttributeSaver(GenerationHandler::Guard &&guard,
+ const IAttributeSaveTarget::Config &cfg,
+ const MultiValueMappingBase<IndexT> &mvMapping)
+ : AttributeSaver(std::move(guard), cfg),
+ _frozenIndices(mvMapping.getIndicesCopy())
+{
+}
+
+
+template <typename IndexT>
+MultiValueAttributeSaver<IndexT>::~MultiValueAttributeSaver()
+{
+}
+
+template class MultiValueAttributeSaver<multivalue::Index32>;
+
+template class MultiValueAttributeSaver<multivalue::Index64>;
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.h b/searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.h
new file mode 100644
index 00000000000..5332d01c980
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.h
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attributesaver.h"
+#include "iattributesavetarget.h"
+#include "multivaluemapping.h"
+
+namespace search {
+
+/*
+ * Base class for saving a multivalue attribute (e.g. weighted set of int).
+ *
+ * Template argument IndexT is either multivalue::Index32 or
+ * multivalue::Index64
+ */
+template <typename IndexT>
+class MultiValueAttributeSaver : public AttributeSaver
+{
+protected:
+ using Index = IndexT;
+ using GenerationHandler = vespalib::GenerationHandler;
+ using IndexCopyVector =
+ typename MultiValueMappingBase<Index>::IndexCopyVector;
+ IndexCopyVector _frozenIndices;
+
+public:
+ MultiValueAttributeSaver(GenerationHandler::Guard &&guard,
+ const IAttributeSaveTarget::Config &cfg,
+ const MultiValueMappingBase<IndexT> &mvMapping);
+
+ virtual ~MultiValueAttributeSaver();
+};
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattributesaverutils.h b/searchlib/src/vespa/searchlib/attribute/multivalueattributesaverutils.h
new file mode 100644
index 00000000000..92c9a169404
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multivalueattributesaverutils.h
@@ -0,0 +1,97 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search {
+
+namespace multivalueattributesaver {
+
+/*
+ * Class to write to count files for multivalue attributes (.idx suffix).
+ */
+class CountWriter
+{
+ std::unique_ptr<search::BufferWriter> _countWriter;
+ uint64_t _cnt;
+
+public:
+ CountWriter(IAttributeSaveTarget &saveTarget)
+ : _countWriter(saveTarget.idxWriter().allocBufferWriter()),
+ _cnt(0)
+ {
+ uint32_t initialCount = 0;
+ _countWriter->write(&initialCount, sizeof(uint32_t));
+ }
+
+ ~CountWriter()
+ {
+ _countWriter->flush();
+ }
+
+ void
+ writeCount(uint32_t count) {
+ _cnt += count;
+ uint32_t cnt32 = static_cast<uint32_t>(_cnt);
+ _countWriter->write(&cnt32, sizeof(uint32_t));
+ }
+};
+
+/*
+ * Class to write to weight files (or not) for multivalue attributes.
+ */
+template <bool hasWeight>
+class WeightWriter;
+
+/*
+ * Class to write to weight files for multivalue attributes (.weight suffix).
+ */
+template <>
+class WeightWriter<true>
+{
+ std::unique_ptr<search::BufferWriter> _weightWriter;
+
+public:
+ WeightWriter(IAttributeSaveTarget &saveTarget)
+ : _weightWriter(saveTarget.weightWriter().allocBufferWriter())
+ {
+ }
+
+ ~WeightWriter()
+ {
+ _weightWriter->flush();
+ }
+
+ template <typename MultiValueT>
+ void
+ writeWeights(const MultiValueT *values, uint32_t count) {
+ for (uint32_t i = 0; i < count; ++i) {
+ int32_t weight = values[i].weight();
+ _weightWriter->write(&weight, sizeof(int32_t));
+ }
+ }
+};
+
+/*
+ * Class to not write to weight files for multivalue attributes.
+ */
+template <>
+class WeightWriter<false>
+{
+public:
+ WeightWriter(IAttributeSaveTarget &)
+ {
+ }
+
+ ~WeightWriter()
+ {
+ }
+
+ template <typename MultiValueT>
+ void
+ writeWeights(const MultiValueT *, uint32_t) {
+ }
+};
+
+} // namespace search::multivalueattributesaver
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/multivaluemapping.cpp b/searchlib/src/vespa/searchlib/attribute/multivaluemapping.cpp
new file mode 100644
index 00000000000..e8e21073323
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multivaluemapping.cpp
@@ -0,0 +1,858 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.attribute.multivaluemapping");
+#include "multivaluemapping.h"
+#include "multivaluemapping.hpp"
+#include "attributevector.h"
+#include "loadedenumvalue.h"
+
+namespace search {
+
+using vespalib::GenerationHeldBase;
+
+MultiValueMappingBaseBase::MultiValueMappingBaseBase(size_t maxValues,
+ size_t maxAlternatives)
+ : _singleVectorsStatus(maxValues * maxAlternatives),
+ _vectorVectorsStatus(maxAlternatives),
+ _genHolder(),
+ _pendingCompactSingleVector(),
+ _pendingCompactVectorVector(false),
+ _pendingCompact(false),
+ _totalValueCnt(0)
+{
+}
+
+MultiValueMappingBaseBase::~MultiValueMappingBaseBase()
+{
+}
+
+void
+MultiValueMappingBaseBase::failNewSize(uint64_t minNewSize, uint64_t maxSize)
+{
+ LOG(fatal,
+ "MultiValueMappingBase::failNewSize: "
+ "Minimum new size (%" PRIu64 ") exceeds max size (%" PRIu64 ")",
+ minNewSize, maxSize);
+ abort();
+}
+
+size_t
+MultiValueMappingBaseBase::
+computeNewSize(size_t used, size_t dead, size_t needed, size_t maxSize)
+{
+ float growRatio = 1.5f;
+ size_t newSize = static_cast<size_t>
+ ((used - dead + needed) * growRatio);
+ if (newSize <= maxSize)
+ return newSize;
+ newSize = (used - dead + needed) + 1000000;
+ if (newSize <= maxSize)
+ return maxSize;
+ failNewSize(newSize, maxSize);
+ return 0;
+}
+
+MultiValueMappingBaseBase::Histogram::Histogram(size_t maxValues) :
+ _maxValues(maxValues),
+ _histogram()
+{
+}
+
+MultiValueMappingBaseBase::Histogram
+MultiValueMappingBaseBase::getEmptyHistogram(size_t maxValues) const
+{
+ return Histogram(maxValues);
+}
+
+MultiValueMappingBaseBase::Histogram
+MultiValueMappingBaseBase::getHistogram(AttributeVector::ReaderBase &reader)
+ const
+{
+ Histogram capacityNeeded = getEmptyHistogram();
+ uint32_t numDocs(reader.getNumIdx() - 1);
+ for (AttributeVector::DocId doc = 0; doc < numDocs; ++doc) {
+ const uint32_t valueCount(reader.getNextValueCount());
+ capacityNeeded[valueCount] += 1;
+ }
+ return capacityNeeded;
+}
+
+
+void
+MultiValueMappingBaseBase::clearPendingCompact(void)
+{
+ if (!_pendingCompact || _pendingCompactVectorVector ||
+ !_pendingCompactSingleVector.empty())
+ return;
+ _pendingCompact = false;
+}
+
+
+template <typename I>
+class MultiValueMappingHeldVector : public GenerationHeldBase
+{
+ typedef I Index;
+
+ MultiValueMappingBase<I> &_mvmb;
+ Index _idx;
+
+public:
+ MultiValueMappingHeldVector(size_t size,
+ MultiValueMappingBase<I> &mvmb,
+ Index &idx)
+ : GenerationHeldBase(size),
+ _mvmb(mvmb),
+ _idx(idx)
+ {
+ }
+
+ virtual
+ ~MultiValueMappingHeldVector(void)
+ {
+ _mvmb.doneHoldVector(_idx);
+ }
+};
+
+
+template <typename I>
+void MultiValueMappingBase<I>::doneHoldVector(Index idx)
+{
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info,
+ "free vector: idx.values() = %u, idx.alternative() = %u",
+ idx.values(), idx.alternative());
+#endif
+ clearVector(idx);
+ if (idx.values() < Index::maxValues()) {
+ _singleVectorsStatus[idx.vectorIdx()] = FREE;
+ } else if (idx.values() == Index::maxValues()) {
+ _vectorVectorsStatus[idx.alternative()] = FREE;
+ }
+}
+
+
+template <typename I>
+MemoryUsage
+MultiValueMappingBase<I>::getMemoryUsage() const
+{
+ MemoryUsage retval = _indices.getMemoryUsage();
+
+ for (uint32_t i = 0; i < _singleVectorsStatus.size(); ++i) {
+ if (_singleVectorsStatus[i] == HOLD)
+ continue;
+ const MemoryUsage & memUsage(getSingleVectorUsage(i));
+ retval.merge(memUsage);
+ }
+ for (uint32_t i = 0; i < _vectorVectorsStatus.size(); ++i) {
+ if (_vectorVectorsStatus[i] == HOLD)
+ continue;
+ const MemoryUsage & memUsage(getVectorVectorUsage(i));
+ retval.merge(memUsage);
+ }
+ retval.incAllocatedBytesOnHold(_genHolder.getHeldBytes());
+ return retval;
+}
+
+template <typename I>
+AddressSpace
+MultiValueMappingBase<I>::getAddressSpaceUsage() const
+{
+ size_t addressSpaceUsed = 0;
+ for (uint32_t i = 0; i < _singleVectorsStatus.size(); ++i) {
+ if (_singleVectorsStatus[i] == ACTIVE) {
+ addressSpaceUsed = std::max(addressSpaceUsed, getSingleVectorAddressSpaceUsed(i));
+ }
+ }
+ for (uint32_t i = 0; i < _vectorVectorsStatus.size(); ++i) {
+ if (_vectorVectorsStatus[i] == ACTIVE) {
+ addressSpaceUsed = std::max(addressSpaceUsed, getVectorVectorAddressSpaceUsed(i));
+ }
+ }
+ return AddressSpace(addressSpaceUsed, Index::offsetSize());
+}
+
+template <typename I>
+MultiValueMappingBase<I>::MultiValueMappingBase(uint32_t &committedDocIdLimit,
+ uint32_t numKeys,
+ const GrowStrategy & gs)
+ : MultiValueMappingBaseBase(Index::maxValues(), Index::alternativeSize()),
+ _indices(gs.getDocsInitialCapacity(),
+ gs.getDocsGrowPercent(),
+ gs.getDocsGrowDelta(),
+ _genHolder),
+ _committedDocIdLimit(committedDocIdLimit)
+{
+ _indices.unsafe_reserve(numKeys);
+ _indices.unsafe_resize(numKeys);
+}
+
+template <typename I>
+MultiValueMappingBase<I>::~MultiValueMappingBase()
+{
+}
+
+template <typename I>
+void MultiValueMappingBase<I>::insertIntoHoldList(Index idx)
+{
+ size_t holdBytes = 0u;
+ if (idx.values() < Index::maxValues()) {
+ _singleVectorsStatus[idx.vectorIdx()] = HOLD;
+ holdBytes = getSingleVectorUsage(idx.vectorIdx()).allocatedBytes();
+ } else {
+ _vectorVectorsStatus[idx.alternative()] = HOLD;
+ holdBytes = getVectorVectorUsage(idx.alternative()).allocatedBytes();
+ }
+ GenerationHeldBase::UP hold(new MultiValueMappingHeldVector<I>(holdBytes,
+ *this,
+ idx));
+ _genHolder.hold(std::move(hold));
+}
+
+
+template <typename I>
+void MultiValueMappingBase<I>::setActiveVector(Index idx)
+{
+ if (idx.values() < Index::maxValues()) {
+ _singleVectorsStatus[idx.vectorIdx()] = ACTIVE;
+ } else {
+ _vectorVectorsStatus[idx.alternative()] = ACTIVE;
+ }
+}
+
+template <typename I>
+void
+MultiValueMappingBase<I>::reset(uint32_t numKeys)
+{
+ _genHolder.clearHoldLists();
+ _indices.reset();
+ _indices.unsafe_reserve(numKeys);
+ for (size_t i = 0; i < numKeys; ++i) {
+ _indices.push_back(Index());
+ }
+}
+
+
+template <typename I>
+void
+MultiValueMappingBase<I>::addKey(uint32_t & key)
+{
+ uint32_t retval = _indices.size();
+ _indices.push_back(Index());
+ key = retval;
+}
+
+
+template <typename I>
+void
+MultiValueMappingBase<I>::shrinkKeys(uint32_t newSize)
+{
+ assert(newSize >= _committedDocIdLimit);
+ assert(newSize < _indices.size());
+ _indices.shrink(newSize);
+}
+
+
+template <typename I>
+void
+MultiValueMappingBase<I>::clearDocs(uint32_t lidLow, uint32_t lidLimit,
+ AttributeVector &v)
+{
+ assert(lidLow <= lidLimit);
+ assert(lidLimit <= v.getNumDocs());
+ assert(lidLimit <= _indices.size());
+ for (uint32_t lid = lidLow; lid < lidLimit; ++lid) {
+ if (_indices[lid].idx() != 0) {
+ v.clearDoc(lid);
+ }
+ }
+}
+
+template <typename I>
+class MultiValueMappingHoldElem : public GenerationHeldBase
+{
+ typedef I Index;
+
+ MultiValueMappingBase<I> &_mvmb;
+ Index _idx;
+public:
+ MultiValueMappingHoldElem(size_t size,
+ MultiValueMappingBase<I> &mvmb,
+ Index idx)
+ : GenerationHeldBase(size),
+ _mvmb(mvmb),
+ _idx(idx)
+ {
+ }
+
+ virtual ~MultiValueMappingHoldElem() {
+ _mvmb.doneHoldElem(_idx);
+ }
+};
+
+
+template <typename I>
+void
+MultiValueMappingBase<I>::holdElem(Index idx, size_t size)
+{
+ GenerationHeldBase::UP hold(new MultiValueMappingHoldElem<I>(size, *this,
+ idx));
+ _genHolder.hold(std::move(hold));
+}
+
+
+template class MultiValueMappingBase<multivalue::Index32>;
+template class MultiValueMappingBase<multivalue::Index64>;
+
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::Value<int8_t> >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::Value<int16_t> >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::Value<int32_t> >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::Value<int64_t> >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::Value<float> >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::Value<double> >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::Value<EnumStoreBase::Index> >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::WeightedValue<int8_t> >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::WeightedValue<int16_t> >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::WeightedValue<int32_t> >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::WeightedValue<int64_t> >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::WeightedValue<float> >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::WeightedValue<double> >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::WeightedValue<EnumStoreBase::Index> >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::Value<int8_t> > >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::Value<int16_t> > >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::Value<int32_t> > >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::Value<int64_t> > >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::Value<float> > >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::Value<double> > >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::Value<EnumStoreBase::Index> > >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::WeightedValue<int8_t> > >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::WeightedValue<int16_t> > >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::WeightedValue<int32_t> > >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::WeightedValue<int64_t> > >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::WeightedValue<float> > >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::WeightedValue<double> > >::VectorBase >;
+template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::WeightedValue<EnumStoreBase::Index> > >::VectorBase >;
+
+template class MultiValueMappingVector<
+ multivalue::Value<int8_t> >;
+template class MultiValueMappingVector<
+ multivalue::Value<int16_t> >;
+template class MultiValueMappingVector<
+ multivalue::Value<int32_t> >;
+template class MultiValueMappingVector<
+ multivalue::Value<int64_t> >;
+template class MultiValueMappingVector<
+ multivalue::Value<float> >;
+template class MultiValueMappingVector<
+ multivalue::Value<double> >;
+template class MultiValueMappingVector<
+ multivalue::Value<EnumStoreBase::Index> >;
+template class MultiValueMappingVector<
+ multivalue::WeightedValue<int8_t> >;
+template class MultiValueMappingVector<
+ multivalue::WeightedValue<int16_t> >;
+template class MultiValueMappingVector<
+ multivalue::WeightedValue<int32_t> >;
+template class MultiValueMappingVector<
+ multivalue::WeightedValue<int64_t> >;
+template class MultiValueMappingVector<
+ multivalue::WeightedValue<float> >;
+template class MultiValueMappingVector<
+ multivalue::WeightedValue<double> >;
+template class MultiValueMappingVector<
+ multivalue::WeightedValue<EnumStoreBase::Index> >;
+template class MultiValueMappingVector<
+ vespalib::Array<multivalue::Value<int8_t> > >;
+template class MultiValueMappingVector<
+ vespalib::Array<multivalue::Value<int16_t> > >;
+template class MultiValueMappingVector<
+ vespalib::Array<multivalue::Value<int32_t> > >;
+template class MultiValueMappingVector<
+ vespalib::Array<multivalue::Value<int64_t> > >;
+template class MultiValueMappingVector<
+ vespalib::Array<multivalue::Value<float> > >;
+template class MultiValueMappingVector<
+ vespalib::Array<multivalue::Value<double> > >;
+template class MultiValueMappingVector<
+ vespalib::Array<multivalue::Value<EnumStoreBase::Index> > >;
+template class MultiValueMappingVector<
+ vespalib::Array<multivalue::WeightedValue<int8_t> > >;
+template class MultiValueMappingVector<
+ vespalib::Array<multivalue::WeightedValue<int16_t> > >;
+template class MultiValueMappingVector<
+ vespalib::Array<multivalue::WeightedValue<int32_t> > >;
+template class MultiValueMappingVector<
+ vespalib::Array<multivalue::WeightedValue<int64_t> > >;
+template class MultiValueMappingVector<
+ vespalib::Array<multivalue::WeightedValue<float> > >;
+template class MultiValueMappingVector<
+ vespalib::Array<multivalue::WeightedValue<double> > >;
+template class MultiValueMappingVector<
+ vespalib::Array<multivalue::WeightedValue<EnumStoreBase::Index> > >;
+
+template class MultiValueMappingT<multivalue::Value<int8_t> >;
+template class MultiValueMappingT<multivalue::Value<int16_t> >;
+template class MultiValueMappingT<multivalue::Value<int32_t> >;
+template class MultiValueMappingT<multivalue::Value<int64_t> >;
+template class MultiValueMappingT<multivalue::Value<float> >;
+template class MultiValueMappingT<multivalue::Value<double> >;
+template class MultiValueMappingT<
+ multivalue::Value<EnumStoreBase::Index> >;
+template class MultiValueMappingT<multivalue::WeightedValue<int8_t> >;
+template class MultiValueMappingT<multivalue::WeightedValue<int16_t> >;
+template class MultiValueMappingT<multivalue::WeightedValue<int32_t> >;
+template class MultiValueMappingT<multivalue::WeightedValue<int64_t> >;
+template class MultiValueMappingT<multivalue::WeightedValue<float> >;
+template class MultiValueMappingT<multivalue::WeightedValue<double> >;
+template class MultiValueMappingT<
+ multivalue::WeightedValue<EnumStoreBase::Index> >;
+template class MultiValueMappingT<multivalue::Value<int8_t>,
+ multivalue::Index64>;
+template class MultiValueMappingT<multivalue::Value<int16_t>,
+ multivalue::Index64>;
+template class MultiValueMappingT<multivalue::Value<int32_t>,
+ multivalue::Index64>;
+template class MultiValueMappingT<multivalue::Value<int64_t>,
+ multivalue::Index64>;
+template class MultiValueMappingT<multivalue::Value<float>,
+ multivalue::Index64>;
+template class MultiValueMappingT<multivalue::Value<double>,
+ multivalue::Index64>;
+template class MultiValueMappingT<
+ multivalue::Value<EnumStoreBase::Index>,
+ multivalue::Index64>;
+template class MultiValueMappingT<multivalue::WeightedValue<int8_t>,
+ multivalue::Index64>;
+template class MultiValueMappingT<multivalue::WeightedValue<int16_t>,
+ multivalue::Index64>;
+template class MultiValueMappingT<multivalue::WeightedValue<int32_t>,
+ multivalue::Index64>;
+template class MultiValueMappingT<multivalue::WeightedValue<int64_t>,
+ multivalue::Index64>;
+template class MultiValueMappingT<multivalue::WeightedValue<float>,
+ multivalue::Index64>;
+template class MultiValueMappingT<multivalue::WeightedValue<double>,
+ multivalue::Index64>;
+template class MultiValueMappingT<
+ multivalue::WeightedValue<EnumStoreBase::Index>,
+ multivalue::Index64>;
+
+using attribute::SaveLoadedEnum;
+using attribute::NoSaveLoadedEnum;
+using attribute::SaveEnumHist;
+typedef EnumStoreBase::Index EnumIndex;
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<EnumIndex>,
+ multivalue::Index32>::
+fillMapped<EnumIndex, SaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndex *map,
+ size_t mapSize,
+ SaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<EnumIndex>,
+ multivalue::Index32>::
+fillMapped<EnumIndex, SaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndex *map,
+ size_t mapSize,
+ SaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<EnumIndex>,
+ multivalue::Index64>::
+fillMapped<EnumIndex, SaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndex *map,
+ size_t mapSize,
+ SaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<EnumIndex>,
+ multivalue::Index64>::
+fillMapped<EnumIndex, SaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndex *map,
+ size_t mapSize,
+ SaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<EnumIndex>,
+ multivalue::Index32>::
+fillMapped<EnumIndex, SaveEnumHist>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndex *map,
+ size_t mapSize,
+ SaveEnumHist &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<EnumIndex>,
+ multivalue::Index32>::
+fillMapped<EnumIndex, SaveEnumHist>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndex *map,
+ size_t mapSize,
+ SaveEnumHist &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<EnumIndex>,
+ multivalue::Index64>::
+fillMapped<EnumIndex, SaveEnumHist>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndex *map,
+ size_t mapSize,
+ SaveEnumHist &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<EnumIndex>,
+ multivalue::Index64>::
+fillMapped<EnumIndex, SaveEnumHist>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndex *map,
+ size_t mapSize,
+ SaveEnumHist &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<int8_t>,
+ multivalue::Index32>::
+fillMapped<int8_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int8_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<int16_t>,
+ multivalue::Index32>::
+fillMapped<int16_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int16_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<int32_t>,
+ multivalue::Index32>::
+fillMapped<int32_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int32_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<int64_t>,
+ multivalue::Index32>::
+fillMapped<int64_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int64_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<float>,
+ multivalue::Index32>::
+fillMapped<float, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const float *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<double>,
+ multivalue::Index32>::
+fillMapped<double, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const double *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<int8_t>,
+ multivalue::Index32>::
+fillMapped<int8_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int8_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<int16_t>,
+ multivalue::Index32>::
+fillMapped<int16_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int16_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<int32_t>,
+ multivalue::Index32>::
+fillMapped<int32_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int32_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<int64_t>,
+ multivalue::Index32>::
+fillMapped<int64_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int64_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<float>,
+ multivalue::Index32>::
+fillMapped<float, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const float *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<double>,
+ multivalue::Index32>::
+fillMapped<double, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const double *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<int8_t>,
+ multivalue::Index64>::
+fillMapped<int8_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int8_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<int16_t>,
+ multivalue::Index64>::
+fillMapped<int16_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int16_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<int32_t>,
+ multivalue::Index64>::
+fillMapped<int32_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int32_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<int64_t>,
+ multivalue::Index64>::
+fillMapped<int64_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int64_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<float>,
+ multivalue::Index64>::
+fillMapped<float, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const float *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::Value<double>,
+ multivalue::Index64>::
+fillMapped<double, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const double *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<int8_t>,
+ multivalue::Index64>::
+fillMapped<int8_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int8_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<int16_t>,
+ multivalue::Index64>::
+fillMapped<int16_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int16_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<int32_t>,
+ multivalue::Index64>::
+fillMapped<int32_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int32_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<int64_t>,
+ multivalue::Index64>::
+fillMapped<int64_t, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const int64_t *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<float>,
+ multivalue::Index64>::
+fillMapped<float, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const float *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+template
+uint32_t
+MultiValueMappingT<multivalue::WeightedValue<double>,
+ multivalue::Index64>::
+fillMapped<double, NoSaveLoadedEnum>(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const double *map,
+ size_t mapSize,
+ NoSaveLoadedEnum &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/multivaluemapping.h b/searchlib/src/vespa/searchlib/attribute/multivaluemapping.h
new file mode 100644
index 00000000000..3134f826774
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multivaluemapping.h
@@ -0,0 +1,1498 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vector>
+#include <set>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/common/rcuvector.h>
+#include <vespa/searchlib/attribute/multivalue.h>
+#include <vespa/searchlib/util/memoryusage.h>
+#include <vespa/vespalib/util/array.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include "address_space.h"
+#include "enumstorebase.h"
+#include <iostream>
+
+namespace search {
+
+namespace multivalue {
+
+template <typename T,
+ uint8_t NUM_OFFSET_BITS,
+ uint8_t NUM_VALUE_BITS,
+ uint8_t NUM_ALT_BITS>
+class Index {
+private:
+ // unused X | values (NUM_VALUE_BITS bit) |
+ // alternative (NUM_ALT_BITS bit) | offset (NUM_OFFSET_BITS bit)
+ T _idx;
+public:
+ Index() : _idx(0) {}
+ Index(uint32_t values_, uint32_t alternative_, uint32_t offset_)
+ : _idx(0)
+ {
+ _idx += static_cast<T>(values_) << (NUM_ALT_BITS+NUM_OFFSET_BITS);
+ _idx += static_cast<T>((alternative_) &
+ ((1<<NUM_ALT_BITS) - 1)) << NUM_OFFSET_BITS;
+ _idx += offset_;
+ }
+
+ uint32_t
+ values(void) const
+ {
+ return _idx >> (NUM_ALT_BITS+NUM_OFFSET_BITS);
+ }
+
+ uint32_t
+ alternative(void) const
+ {
+ return (_idx >> NUM_OFFSET_BITS) & ((1<<NUM_ALT_BITS) - 1);
+ }
+
+ // values and alternative combined
+ uint32_t
+ vectorIdx(void) const
+ {
+ return _idx >> NUM_OFFSET_BITS;
+ }
+
+ uint32_t offset(void) const
+ {
+ return (_idx & ((1u << NUM_OFFSET_BITS) - 1));
+ }
+
+ T idx() const { return _idx; }
+
+ static uint32_t
+ maxValues(void)
+ {
+ return (1 << NUM_VALUE_BITS) - 1;
+ }
+
+ static uint32_t
+ alternativeSize(void)
+ {
+ return 1 << NUM_ALT_BITS;
+ }
+
+ static T
+ offsetSize(void)
+ {
+ return 1 << (NUM_OFFSET_BITS);
+ }
+};
+
+typedef Index<uint32_t, 27,4,1> Index32;
+typedef Index<uint64_t, 31,10,1> Index64;
+
+template <typename T, typename I>
+struct MVMTemplateArg {
+ typedef T Value;
+ typedef I Index;
+};
+
+}
+
+class MultiValueMappingVectorBaseBase
+{
+public:
+ MultiValueMappingVectorBaseBase()
+ : _used(0),
+ _dead(0),
+ _wantCompact(false),
+ _usage()
+ {
+ }
+
+ uint32_t used() const { return _used; }
+ uint32_t dead() const { return _dead; }
+ void incUsed(uint32_t inc) { _used += inc; }
+ void incDead(uint32_t inc) { _dead += inc; }
+
+ void
+ setWantCompact(void)
+ {
+ _wantCompact = true;
+ }
+
+ bool
+ getWantCompact(void) const
+ {
+ return _wantCompact;
+ }
+
+ MemoryUsage & getUsage() { return _usage; }
+ const MemoryUsage & getUsage() const { return _usage; }
+protected:
+ void reset() { _used = 0; _dead = 0; }
+private:
+ uint32_t _used;
+ uint32_t _dead;
+ bool _wantCompact;
+ MemoryUsage _usage;
+};
+
+
+class MultiValueMappingBaseBase
+{
+public:
+ class Histogram
+ {
+ private:
+ typedef vespalib::hash_map<uint32_t, uint32_t> HistogramM;
+ public:
+ typedef HistogramM::const_iterator const_iterator;
+ Histogram(size_t maxValues);
+ uint32_t & operator [] (uint32_t i) { return _histogram[std::min(i, _maxValues)]; }
+ const_iterator begin() const { return _histogram.begin(); }
+ const_iterator end() const { return _histogram.end(); }
+ private:
+ uint32_t _maxValues;
+ HistogramM _histogram;
+ };
+protected:
+ MultiValueMappingBaseBase(size_t maxValues, size_t maxAlternatives);
+ virtual ~MultiValueMappingBaseBase();
+ //-------------------------------------------------------------------------
+ // private inner classes
+ //-------------------------------------------------------------------------
+
+ enum VectorStatus {
+ ACTIVE, FREE, HOLD
+ };
+
+ typedef AttributeVector::generation_t generation_t;
+ typedef vespalib::Array<VectorStatus> StatusVector;
+ typedef vespalib::GenerationHolder GenerationHolder;
+
+ // active -> hold
+ void incValueCnt(uint32_t cnt) { _totalValueCnt += cnt; }
+ void decValueCnt(uint32_t cnt) { _totalValueCnt -= cnt; }
+
+ StatusVector _singleVectorsStatus;
+ StatusVector _vectorVectorsStatus;
+ GenerationHolder _genHolder;
+ std::set<uint32_t> _pendingCompactSingleVector;
+ bool _pendingCompactVectorVector;
+ bool _pendingCompact;
+ Histogram getEmptyHistogram(size_t maxValues) const;
+ virtual const MemoryUsage & getSingleVectorUsage(size_t i) const = 0;
+ virtual const MemoryUsage & getVectorVectorUsage(size_t i) const = 0;
+ virtual size_t getSingleVectorAddressSpaceUsed(size_t i) const = 0;
+ virtual size_t getVectorVectorAddressSpaceUsed(size_t i) const = 0;
+
+private:
+ size_t _totalValueCnt;
+
+public:
+ virtual Histogram getEmptyHistogram() const = 0;
+ virtual MemoryUsage getMemoryUsage() const = 0;
+ Histogram getHistogram(AttributeVector::ReaderBase & reader) const;
+ size_t getTotalValueCnt() const { return _totalValueCnt; }
+ static void failNewSize(uint64_t minNewSize, uint64_t maxSize);
+
+ void
+ clearPendingCompact(void);
+
+ static size_t
+ computeNewSize(size_t used, size_t dead, size_t needed, size_t maxSize);
+
+ void
+ transferHoldLists(generation_t generation)
+ {
+ _genHolder.transferHoldLists(generation);
+ }
+
+ void
+ trimHoldLists(generation_t firstUsed)
+ {
+ _genHolder.trimHoldLists(firstUsed);
+ }
+};
+
+
+template <typename I>
+class MultiValueMappingBase : public MultiValueMappingBaseBase
+{
+protected:
+ typedef I Index;
+ MultiValueMappingBase(uint32_t &committedDocIdLimit,
+ uint32_t numKeys = 0,
+ const GrowStrategy &gs = GrowStrategy());
+ virtual ~MultiValueMappingBase();
+
+ typedef search::attribute::RcuVectorBase<Index> IndexVector;
+ IndexVector _indices;
+ uint32_t &_committedDocIdLimit;
+
+ // active -> hold
+ void insertIntoHoldList(Index idx);
+ void setActiveVector(Index idx);
+
+ void reset(uint32_t numKeys=0);
+private:
+ virtual void clearVector(Index idx) = 0;
+
+public:
+ using IndexCopyVector = vespalib::Array<Index, vespalib::DefaultAlloc>;
+
+ void
+ doneHoldVector(Index idx);
+
+ virtual Histogram getEmptyHistogram() const override {
+ return MultiValueMappingBaseBase::getEmptyHistogram(Index::maxValues());
+ }
+
+ virtual MemoryUsage getMemoryUsage() const override;
+
+ AddressSpace getAddressSpaceUsage() const;
+
+ size_t getNumKeys(void) const
+ {
+ return _indices.size();
+ }
+
+ size_t getCapacityKeys(void) const
+ {
+ return _indices.capacity();
+ }
+
+ IndexCopyVector
+ getIndicesCopy() const
+ {
+ uint32_t size = _committedDocIdLimit;
+ assert(size <= _indices.size());
+ return std::move(IndexCopyVector(&_indices[0], &_indices[0] + size));
+ }
+
+ bool
+ hasKey(uint32_t key) const
+ {
+ return key < _indices.size();
+ }
+
+ bool
+ hasReaderKey(uint32_t key) const
+ {
+ return key < _committedDocIdLimit && key < _indices.size();
+ }
+
+ bool
+ isFull(void) const
+ {
+ return _indices.isFull();
+ }
+
+ static size_t
+ maxValues(void)
+ {
+ return Index::maxValues();
+ }
+
+ void
+ addKey(uint32_t & key);
+
+ void
+ shrinkKeys(uint32_t newSize);
+
+ void
+ clearDocs(uint32_t lidLow, uint32_t lidLimit, AttributeVector &v);
+
+ void holdElem(Index idx, size_t size);
+
+ virtual void doneHoldElem(Index idx) = 0;
+};
+
+extern template class MultiValueMappingBase<multivalue::Index32>;
+extern template class MultiValueMappingBase<multivalue::Index64>;
+
+template <typename V>
+class MultiValueMappingFallbackVectorHold
+ : public vespalib::GenerationHeldBase
+{
+ V _hold;
+public:
+ MultiValueMappingFallbackVectorHold(size_t size,
+ V &rhs)
+ : vespalib::GenerationHeldBase(size),
+ _hold()
+ {
+ _hold.swap(rhs);
+ }
+
+ virtual
+ ~MultiValueMappingFallbackVectorHold(void)
+ {
+ }
+};
+
+
+template <typename VT>
+class MultiValueMappingVector : public vespalib::Array<VT, vespalib::DefaultAlloc>,
+ public MultiValueMappingVectorBaseBase
+{
+public:
+ typedef vespalib::Array<VT, vespalib::DefaultAlloc> VectorBase;
+ typedef MultiValueMappingFallbackVectorHold<VectorBase> FallBackHold;
+ MultiValueMappingVector();
+ MultiValueMappingVector(uint32_t n);
+ MultiValueMappingVector(const MultiValueMappingVector & rhs);
+ MultiValueMappingVector &
+ operator=(const MultiValueMappingVector & rhs);
+
+ ~MultiValueMappingVector();
+ void reset(uint32_t n);
+ uint32_t remaining() const { return this->size() - used(); }
+ void swapVector(MultiValueMappingVector & rhs);
+
+ vespalib::GenerationHeldBase::UP
+ fallbackResize(uint64_t newSize);
+};
+
+
+template <typename T, typename I=multivalue::Index32>
+class MultiValueMappingT : public MultiValueMappingBase<I>
+{
+public:
+ friend class MultiValueMappingTest;
+ typedef MultiValueMappingVectorBaseBase VectorBaseBase;
+ typedef MultiValueMappingBaseBase::Histogram Histogram;
+ typedef MultiValueMappingBaseBase::VectorStatus VectorStatus;
+ typedef typename MultiValueMappingBase<I>::Index Index;
+
+private:
+ using MultiValueMappingBase<I>::_pendingCompactSingleVector;
+ using MultiValueMappingBaseBase::_pendingCompactVectorVector;
+ using MultiValueMappingBaseBase::_pendingCompact;
+ using MultiValueMappingBaseBase::clearPendingCompact;
+ using MultiValueMappingBaseBase::failNewSize;
+ using MultiValueMappingBase<I>::_genHolder;
+ using MultiValueMappingBase<I>::holdElem;
+
+ typedef MultiValueMappingVector<T> SingleVector;
+ typedef std::pair<SingleVector*, Index> SingleVectorPtr;
+ typedef typename SingleVector::VectorBase VectorBase;
+ typedef MultiValueMappingVector<VectorBase > VectorVector;
+ typedef std::pair<VectorVector*, Index> VectorVectorPtr;
+
+ //-------------------------------------------------------------------------
+ // private variables
+ //-------------------------------------------------------------------------
+ std::vector<SingleVector> _singleVectors;
+ std::vector<VectorVector> _vectorVectors;
+
+ //-------------------------------------------------------------------------
+ // private methods
+ //-------------------------------------------------------------------------
+ virtual void clearVector(Index idx);
+ virtual const MemoryUsage & getSingleVectorUsage(size_t i) const override;
+ virtual const MemoryUsage & getVectorVectorUsage(size_t i) const override;
+ virtual size_t getSingleVectorAddressSpaceUsed(size_t i) const override;
+ virtual size_t getVectorVectorAddressSpaceUsed(size_t i) const override;
+ void initVectors(uint32_t initSize);
+ void initVectors(const Histogram & initCapacity);
+ bool getValidIndex(Index & newIdx, uint32_t numValues);
+
+ void
+ compactSingleVector(SingleVectorPtr &activeVector,
+ uint32_t valueCnt,
+ uint64_t newSize,
+ uint64_t neededEntries,
+ uint64_t maxSize);
+
+ void
+ compactVectorVector(VectorVectorPtr &activeVector,
+ uint64_t newSize,
+ uint64_t neededEntries,
+ uint64_t maxSize);
+
+ SingleVectorPtr getSingleVector(uint32_t numValues, VectorStatus status);
+ VectorVectorPtr getVectorVector(VectorStatus status);
+ Index getIndex(uint32_t numValues, VectorStatus status);
+
+ void incUsed(SingleVector & vec, uint32_t numValues) {
+ vec.incUsed(numValues);
+ vec.getUsage().incUsedBytes(numValues * sizeof(T));
+ }
+ void incDead(SingleVector & vec, uint32_t numValues) {
+ vec.incDead(numValues);
+ vec.getUsage().incDeadBytes(numValues * sizeof(T));
+ }
+ void swapVector(SingleVector & vec, uint32_t initSize) {
+ SingleVector(initSize).swapVector(vec);
+ vec.getUsage().setAllocatedBytes(initSize * sizeof(T));
+ }
+ void incUsed(VectorVector & vec, uint32_t numValues) {
+ vec.incUsed(1);
+ vec.getUsage().incUsedBytes(numValues * sizeof(T) +
+ sizeof(VectorBase));
+ vec.getUsage().incAllocatedBytes(numValues * sizeof(T));
+ }
+ void incDead(VectorVector & vec) {
+ vec.incDead(1);
+ }
+ void swapVector(VectorVector & vec, uint32_t initSize) {
+ VectorVector(initSize).swapVector(vec);
+ vec.getUsage().setAllocatedBytes(initSize * sizeof(VectorBase));
+ }
+
+
+public:
+ MultiValueMappingT(uint32_t &committedDocIdLimit,
+ const GrowStrategy & gs = GrowStrategy());
+ MultiValueMappingT(uint32_t &committedDocIdLimit,
+ uint32_t numKeys, uint32_t initSize = 0,
+ const GrowStrategy & gs = GrowStrategy());
+ MultiValueMappingT(uint32_t &committedDocIdLimit,
+ uint32_t numKeys, const Histogram & initCapacity,
+ const GrowStrategy & gs = GrowStrategy());
+ ~MultiValueMappingT();
+ void reset(uint32_t numKeys, uint32_t initSize = 0);
+ void reset(uint32_t numKeys, const Histogram & initCapacity);
+ uint32_t get(uint32_t key, std::vector<T> & buffer) const;
+ template <typename BufferType>
+ uint32_t get(uint32_t key, BufferType * buffer, uint32_t sz) const;
+ bool get(uint32_t key, uint32_t index, T & value) const;
+ uint32_t getDataForIdx(Index idx, const T * & handle) const {
+ if (__builtin_expect(idx.values() < Index::maxValues(), true)) {
+ // We do not need to specialcase 0 as _singleVectors will refer to valid stuff
+ // and handle SHALL not be used as the number of values returned shall be obeyed.
+ const SingleVector & vec = _singleVectors[idx.vectorIdx()];
+ handle = &vec[idx.offset() * idx.values()];
+ __builtin_prefetch(handle, 0, 0);
+ return idx.values();
+ } else {
+ const VectorBase & vec =
+ _vectorVectors[idx.alternative()][idx.offset()];
+ handle = &vec[0];
+ return vec.size();
+ }
+ }
+ uint32_t get(uint32_t key, const T * & handle) const {
+ return getDataForIdx(this->_indices[key], handle);
+ }
+ inline uint32_t getValueCount(uint32_t key) const;
+ void set(uint32_t key, const std::vector<T> & values);
+ void set(uint32_t key, const T * values, uint32_t numValues);
+
+ /* XXX: Unsafe operation, reader gets inconsistent view */
+ void replace(uint32_t key, const std::vector<T> & values);
+
+ /* XXX: Unsafe operation, reader gets inconsistent view */
+ void replace(uint32_t key, const T * values, uint32_t numValues);
+
+ Histogram getRemaining();
+ bool enoughCapacity(const Histogram & capacityNeeded);
+ void performCompaction(Histogram & capacityNeeded);
+
+ template <typename V, class Saver>
+ uint32_t
+ fillMapped(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const V *map,
+ size_t mapSize,
+ Saver &saver,
+ uint32_t numDocs,
+ bool hasWeights);
+
+ virtual void doneHoldElem(Index idx) override;
+
+#ifdef DEBUG_MULTIVALUE_MAPPING
+ void printContent() const;
+ void printVectorVectors() const;
+#endif
+};
+
+//-----------------------------------------------------------------------------
+// implementation of private methods
+//-----------------------------------------------------------------------------
+template <typename VT>
+MultiValueMappingVector<VT>::MultiValueMappingVector()
+ : VectorBase(),
+ MultiValueMappingVectorBaseBase()
+{
+}
+
+template <typename VT>
+MultiValueMappingVector<VT>::~MultiValueMappingVector()
+{
+}
+
+template <typename VT>
+MultiValueMappingVector<VT>::MultiValueMappingVector(uint32_t n)
+ : VectorBase(),
+ MultiValueMappingVectorBaseBase()
+{
+ reset(n);
+}
+
+template <typename VT>
+MultiValueMappingVector<VT>::MultiValueMappingVector(
+ const MultiValueMappingVector & rhs)
+ : VectorBase(rhs),
+ MultiValueMappingVectorBaseBase(rhs)
+{
+}
+
+template <typename VT>
+MultiValueMappingVector<VT> &
+MultiValueMappingVector<VT>::operator=(const MultiValueMappingVector & rhs)
+{
+ if (this != & rhs) {
+ VectorBase::operator=(rhs);
+ MultiValueMappingVectorBaseBase::operator=(rhs);
+ }
+ return *this;
+}
+
+template <typename VT>
+void
+MultiValueMappingVector<VT>::reset(uint32_t n)
+{
+ this->resize(n);
+ MultiValueMappingVectorBaseBase::reset();
+}
+
+template <typename VT>
+void
+MultiValueMappingVector<VT>::swapVector(MultiValueMappingVector & rhs)
+{
+ MultiValueMappingVectorBaseBase tmp(rhs);
+ rhs.MultiValueMappingVectorBaseBase::operator=(*this);
+ MultiValueMappingVectorBaseBase::operator=(tmp);
+ this->swap(rhs);
+}
+
+template <typename VT>
+vespalib::GenerationHeldBase::UP
+MultiValueMappingVector<VT>::fallbackResize(uint64_t newSize)
+{
+ VectorBase tmp(newSize);
+ VectorBase &old(*this);
+ size_t oldSize = old.size();
+ size_t oldCapacity = old.capacity();
+ for (size_t i = 0; i < oldSize; ++i) {
+ tmp[i] = old[i];
+ }
+ std::atomic_thread_fence(std::memory_order_release);
+ this->swap(tmp);
+ return vespalib::GenerationHeldBase::UP(
+ new MultiValueMappingFallbackVectorHold<VectorBase>
+ (oldCapacity * sizeof(VT),
+ tmp));
+}
+
+template <typename T, typename I>
+void
+MultiValueMappingT<T, I>::initVectors(uint32_t initSize)
+{
+ for (uint32_t i = 0; i < this->_singleVectorsStatus.size(); ++i) {
+ if (i % Index::alternativeSize() == 0) {
+ swapVector(_singleVectors[i], initSize);
+ this->_singleVectorsStatus[i] = MultiValueMappingBaseBase::ACTIVE;
+ } else {
+ swapVector(_singleVectors[i], 0);
+ this->_singleVectorsStatus[i] = MultiValueMappingBaseBase::FREE;
+ }
+ }
+ for (uint32_t i = 0; i < this->_vectorVectorsStatus.size(); ++i) {
+ if (i % Index::alternativeSize() == 0) {
+ swapVector(_vectorVectors[i], initSize);
+ this->_vectorVectorsStatus[i] = MultiValueMappingBaseBase::ACTIVE;
+ } else {
+ swapVector(_vectorVectors[i], 0);
+ this->_vectorVectorsStatus[i] = MultiValueMappingBaseBase::FREE;
+ }
+ }
+}
+
+template <typename T, typename I>
+void
+MultiValueMappingT<T, I>::initVectors(const Histogram &initCapacity)
+{
+ for (typename Histogram::const_iterator it(initCapacity.begin()), mt(initCapacity.end()); it != mt; ++it) {
+ uint32_t valueCnt = it->first;
+ uint64_t numEntries = it->second;
+ if (valueCnt != 0 && valueCnt < Index::maxValues()) {
+ uint64_t maxSize = Index::offsetSize() * valueCnt;
+ if (maxSize > std::numeric_limits<uint32_t>::max()) {
+ maxSize = std::numeric_limits<uint32_t>::max();
+ maxSize -= (maxSize % valueCnt);
+ }
+ if (numEntries * valueCnt > maxSize) {
+ failNewSize(numEntries * valueCnt, maxSize);
+ }
+ swapVector(_singleVectors[valueCnt * 2], valueCnt * numEntries);
+ } else if (valueCnt == Index::maxValues()) {
+ uint64_t maxSize = Index::offsetSize();
+ if (maxSize > std::numeric_limits<uint32_t>::max())
+ maxSize = std::numeric_limits<uint32_t>::max();
+ if (numEntries > maxSize) {
+ failNewSize(numEntries, maxSize);
+ }
+ swapVector(_vectorVectors[0], numEntries);
+ }
+ }
+}
+
+template <typename T, typename I>
+bool
+MultiValueMappingT<T, I>::getValidIndex(Index &newIdx, uint32_t numValues)
+{
+ if (numValues == 0) {
+ newIdx = Index();
+ } else if (numValues < Index::maxValues()) {
+ SingleVectorPtr active =
+ getSingleVector(numValues, MultiValueMappingBaseBase::ACTIVE);
+
+ if (active.first->remaining() < numValues) {
+ return false;
+ }
+
+ uint32_t used = active.first->used();
+ assert(used % numValues == 0);
+ incUsed(*active.first, numValues);
+ newIdx = Index(active.second.values(), active.second.alternative(),
+ used / numValues);
+ } else {
+ VectorVectorPtr active =
+ getVectorVector(MultiValueMappingBaseBase::ACTIVE);
+
+ if (active.first->remaining() == 0) {
+ return false;
+ }
+
+ uint32_t used = active.first->used();
+ incUsed(*active.first, numValues);
+ (*active.first)[used].resize(numValues);
+ newIdx = Index(active.second.values(), active.second.alternative(),
+ used);
+ }
+ return true;
+}
+
+template <typename T, typename I>
+void
+MultiValueMappingT<T, I>::
+compactSingleVector(SingleVectorPtr &activeVector,
+ uint32_t valueCnt,
+ uint64_t newSize,
+ uint64_t neededEntries,
+ uint64_t maxSize)
+{
+ _pendingCompactSingleVector.erase(activeVector.second.values());
+ clearPendingCompact();
+ SingleVectorPtr freeVector =
+ getSingleVector(valueCnt, MultiValueMappingBaseBase::FREE);
+ if (freeVector.first == NULL) {
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(warning, "did not find any free '%u-vector'", valueCnt);
+#endif
+ uint64_t dead = activeVector.first->dead();
+ uint64_t fallbackNewSize = newSize + dead * valueCnt + 1024 * valueCnt;
+ if (fallbackNewSize > maxSize)
+ fallbackNewSize = maxSize;
+ if (fallbackNewSize <= activeVector.first->size() ||
+ fallbackNewSize < activeVector.first->used() +
+ neededEntries * valueCnt) {
+ fprintf(stderr, "did not find any free '%u-vector'\n", valueCnt);
+ abort();
+ }
+ _genHolder.hold(activeVector.first->fallbackResize(fallbackNewSize));
+ // When held buffer is freed then pending compact should be set
+ SingleVectorPtr holdVector =
+ getSingleVector(valueCnt, MultiValueMappingBaseBase::HOLD);
+ assert(holdVector.first != NULL);
+ holdVector.first->setWantCompact();
+ return;
+ }
+ swapVector(*freeVector.first, newSize);
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info,
+ "compacting from '%u-vector(%u)' "
+ "(s = %u, u = %u, d = %u) to "
+ "'%u-vector(%u)' (s = %u)",
+ valueCnt, activeVector.second.alternative(),
+ activeVector.first->size(),
+ activeVector.first->used() , activeVector.first->dead(),
+ valueCnt, freeVector.second.alternative(), newSize);
+#endif
+ uint32_t activeVectorIdx = activeVector.second.vectorIdx();
+ for (uint32_t i = 0; i < this->_indices.size(); ++i) {
+ Index & idx = this->_indices[i];
+ if (activeVectorIdx == idx.vectorIdx()) {
+ for (uint32_t j = idx.offset() * idx.values(),
+ k = freeVector.first->used();
+ j < (idx.offset() + 1) * idx.values() &&
+ k < freeVector.first->used() + valueCnt; ++j, ++k)
+ {
+ (*freeVector.first)[k] = (*activeVector.first)[j];
+ }
+ assert(freeVector.first->used() % valueCnt == 0);
+ std::atomic_thread_fence(std::memory_order_release);
+ this->_indices[i] = Index(freeVector.second.values(),
+ freeVector.second.alternative(),
+ freeVector.first->used() / valueCnt);
+ incUsed(*freeVector.first, valueCnt);
+ }
+ }
+ // active -> hold
+ this->insertIntoHoldList(activeVector.second);
+ // free -> active
+ this->setActiveVector(freeVector.second);
+ activeVector = freeVector;
+}
+
+
+template <typename T, typename I>
+void
+MultiValueMappingT<T, I>::
+compactVectorVector(VectorVectorPtr &activeVector,
+ uint64_t newSize,
+ uint64_t neededEntries,
+ uint64_t maxSize)
+{
+ _pendingCompactVectorVector = false;
+ clearPendingCompact();
+ VectorVectorPtr freeVector =
+ getVectorVector(MultiValueMappingBaseBase::FREE);
+ if (freeVector.first == NULL) {
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(error, "did not find any free vectorvector");
+#endif
+ uint64_t dead = activeVector.first->dead();
+ uint64_t fallbackNewSize = newSize + dead + 1024;
+ if (fallbackNewSize > maxSize)
+ fallbackNewSize = maxSize;
+ if (fallbackNewSize <= activeVector.first->size() ||
+ fallbackNewSize < activeVector.first->used() + neededEntries) {
+ fprintf(stderr, "did not find any free vectorvector\n");
+ abort();
+ }
+ _genHolder.hold(activeVector.first->fallbackResize(fallbackNewSize));
+ // When held buffer is freed then pending compact should be set
+ VectorVectorPtr holdVector =
+ getVectorVector(MultiValueMappingBaseBase::HOLD);
+ assert(holdVector.first != NULL);
+ holdVector.first->setWantCompact();
+ return;
+ }
+ swapVector(*freeVector.first, newSize);
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info,
+ "compacting from 'vectorvector(%u)' "
+ "(s = %u, u = %u, d = %u) to "
+ "'vectorvector(%u)' (s = %u)",
+ activeVector.second.alternative(), activeVector.first->size(),
+ activeVector.first->used(), activeVector.first->dead(),
+ freeVector.second.alternative(), newSize);
+#endif
+ uint32_t activeVectorIdx = activeVector.second.vectorIdx();
+ for (uint32_t i = 0; i < this->_indices.size(); ++i) {
+ Index & idx = this->_indices[i];
+ if (activeVectorIdx == idx.vectorIdx()) {
+ uint32_t activeOffset = idx.offset();
+ uint32_t vecSize = (*activeVector.first)[activeOffset].size();
+ uint32_t freeOffset = freeVector.first->used();
+ (*freeVector.first)[freeOffset].resize(vecSize);
+ for (uint32_t j = 0; j < vecSize; ++j) {
+ (*freeVector.first)[freeOffset][j] =
+ (*activeVector.first)[activeOffset][j];
+ }
+ std::atomic_thread_fence(std::memory_order_release);
+ this->_indices[i] = Index(freeVector.second.values(),
+ freeVector.second.alternative(),
+ freeVector.first->used());
+ incUsed(*freeVector.first, vecSize);
+ }
+ }
+ // active -> hold
+ this->insertIntoHoldList(activeVector.second);
+ // free -> active
+ this->setActiveVector(freeVector.second);
+ activeVector = freeVector;
+}
+
+template <typename T, typename I>
+typename MultiValueMappingT<T, I>::SingleVectorPtr
+MultiValueMappingT<T, I>::getSingleVector(uint32_t numValues,
+ VectorStatus status)
+{
+ for (uint32_t i = numValues * Index::alternativeSize();
+ i < (numValues + 1) * Index::alternativeSize(); ++i)
+ {
+ if (this->_singleVectorsStatus[i] == status) {
+ return SingleVectorPtr(&_singleVectors[i],
+ Index(numValues,
+ i % Index::alternativeSize(),
+ 0));
+ }
+ }
+ return SingleVectorPtr(static_cast<SingleVector *>(NULL), Index());
+}
+
+template <typename T, typename I>
+typename MultiValueMappingT<T, I>::VectorVectorPtr
+MultiValueMappingT<T, I>::getVectorVector(VectorStatus status)
+{
+ for (uint32_t i = 0; i < _vectorVectors.size(); ++i) {
+ if (this->_vectorVectorsStatus[i] == status) {
+ return VectorVectorPtr(&_vectorVectors[i],
+ Index(Index::maxValues(), i, 0));
+ }
+ }
+ return VectorVectorPtr(static_cast<VectorVector *>(NULL), Index());
+}
+
+template <typename T, typename I>
+typename MultiValueMappingT<T, I>::Index
+MultiValueMappingT<T, I>::getIndex(uint32_t numValues, VectorStatus status)
+{
+ if (numValues < Index::maxValues()) {
+ return getSingleVector(numValues, status).second;
+ } else {
+ return getVectorVector(status).second;
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// implementation of public methods
+//-----------------------------------------------------------------------------
+
+template <typename T, typename I>
+MultiValueMappingT<T, I>::MultiValueMappingT(uint32_t &committedDocIdLimit,
+ const GrowStrategy & gs)
+ : MultiValueMappingBase<I>(committedDocIdLimit, 0, gs),
+ _singleVectors((Index::maxValues()) * Index::alternativeSize()),
+ _vectorVectors(Index::alternativeSize())
+{
+ initVectors(0);
+}
+
+template <typename T, typename I>
+MultiValueMappingT<T, I>::MultiValueMappingT(uint32_t &committedDocIdLimit,
+ uint32_t numKeys,
+ uint32_t initSize,
+ const GrowStrategy & gs)
+ : MultiValueMappingBase<I>(committedDocIdLimit, numKeys, gs),
+ _singleVectors((Index::maxValues()) * Index::alternativeSize()),
+ _vectorVectors(Index::alternativeSize())
+{
+ initVectors(initSize);
+}
+
+template <typename T, typename I>
+MultiValueMappingT<T, I>::MultiValueMappingT(uint32_t &committedDocIdLimit,
+ uint32_t numKeys,
+ const Histogram & initCapacity,
+ const GrowStrategy & gs)
+ : MultiValueMappingBase<I>(committedDocIdLimit, numKeys, gs),
+ _singleVectors((Index::maxValues()) * Index::alternativeSize()),
+ _vectorVectors(Index::alternativeSize())
+{
+ initVectors(0);
+ initVectors(initCapacity);
+}
+
+template <typename T, typename I>
+MultiValueMappingT<T, I>::~MultiValueMappingT()
+{
+ _genHolder.clearHoldLists();
+}
+
+template <typename T, typename I>
+void
+MultiValueMappingT<T, I>::reset(uint32_t numKeys, uint32_t initSize)
+{
+ MultiValueMappingBase<I>::reset(numKeys);
+ initVectors(initSize);
+}
+
+template <typename T, typename I>
+void
+MultiValueMappingT<T, I>::reset(uint32_t numKeys,
+ const Histogram &initCapacity)
+{
+ MultiValueMappingBase<I>::reset(numKeys);
+ initVectors(0);
+ initVectors(initCapacity);
+}
+
+
+template <typename T, typename I>
+uint32_t
+MultiValueMappingT<T, I>::get(uint32_t key, std::vector<T> & buffer) const
+{
+ return get(key, &buffer[0], buffer.size());
+}
+
+template <typename T, typename I>
+template <typename BufferType>
+uint32_t
+MultiValueMappingT<T, I>::get(uint32_t key,
+ BufferType * buffer,
+ uint32_t sz) const
+{
+ Index idx = this->_indices[key];
+ if (idx.values() < Index::maxValues()) {
+ uint32_t available = idx.values();
+ uint32_t num2Read = std::min(available, sz);
+ const SingleVector & vec = _singleVectors[idx.vectorIdx()];
+ for (uint32_t i = 0, j = idx.offset() * idx.values();
+ i < num2Read && j < (idx.offset() + 1) * idx.values(); ++i, ++j) {
+ buffer[i] = static_cast<BufferType>(vec[j]);
+ }
+ return available;
+ } else {
+ const VectorBase & vec =
+ _vectorVectors[idx.alternative()][idx.offset()];
+ uint32_t available = vec.size();
+ uint32_t num2Read = std::min(available, sz);
+ for (uint32_t i = 0; i < num2Read; ++i) {
+ buffer[i] = static_cast<BufferType>(vec[i]);
+ }
+ return available;
+ }
+}
+
+template <typename T, typename I>
+bool
+MultiValueMappingT<T, I>::get(uint32_t key, uint32_t index, T & value) const
+{
+ if (!this->hasReaderKey(key)) {
+ return false;
+ }
+ Index idx = this->_indices[key];
+ if (idx.values() < Index::maxValues()) {
+ if (index >= idx.values()) {
+ return false;
+ }
+ uint32_t offset = idx.offset() * idx.values() + index;
+ value = _singleVectors[idx.vectorIdx()][offset];
+ return true;
+ } else {
+ if (index >= _vectorVectors[idx.alternative()][idx.offset()].size()) {
+ return false;
+ }
+ value = _vectorVectors[idx.alternative()][idx.offset()][index];
+ return true;
+ }
+ return false;
+}
+
+template <typename T, typename I>
+inline uint32_t
+MultiValueMappingT<T, I>::getValueCount(uint32_t key) const
+{
+ if (!this->hasReaderKey(key)) {
+ return 0;
+ }
+ Index idx = this->_indices[key];
+ if (idx.values() < Index::maxValues()) {
+ return idx.values();
+ } else {
+ return _vectorVectors[idx.alternative()][idx.offset()].size();
+ }
+}
+
+template <typename T, typename I>
+void
+MultiValueMappingT<T, I>::set(uint32_t key, const std::vector<T> & values)
+{
+ set(key, &values[0], values.size());
+}
+
+template <typename T, typename I>
+void
+MultiValueMappingT<T, I>::set(uint32_t key,
+ const T * values,
+ uint32_t numValues)
+{
+ if (!this->hasKey(key)) {
+ abort();
+ }
+
+ Index oldIdx = this->_indices[key];
+ Index newIdx;
+ if (!getValidIndex(newIdx, numValues)) {
+ abort();
+ }
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info,
+ "newIdx: values = %u, alternative = %u, offset = %u",
+ newIdx.values(), newIdx.alternative(), newIdx.offset());
+#endif
+
+ if (newIdx.values() != 0 && newIdx.values() < Index::maxValues()) {
+ SingleVector & vec = _singleVectors[newIdx.vectorIdx()];
+ for (uint32_t i = newIdx.offset() * newIdx.values(), j = 0;
+ i < (newIdx.offset() + 1) * newIdx.values() && j < numValues;
+ ++i, ++j)
+ {
+ vec[i] = values[j];
+ }
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info,
+ "inserted in '%u-vector(%u)': "
+ "key = %u, size = %u, used = %u, dead = %u, offset = %u",
+ newIdx.values(), newIdx.alternative(),
+ key, vec.size(),
+ vec.used(), vec.dead(), newIdx.offset() * newIdx.values());
+#endif
+ } else if (newIdx.values() == Index::maxValues()) {
+ VectorVector & vec = _vectorVectors[newIdx.alternative()];
+ for (uint32_t i = 0; i < numValues; ++i) {
+ vec[newIdx.offset()][i] = values[i];
+ }
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info,
+ "inserted %u values in 'vector-vector(%u)': "
+ "key = %u, size = %u, used = %u, dead = %u, offset = %u",
+ numValues, newIdx.alternative(),
+ key, vec.size(), vec.used(), vec.dead(), newIdx.offset());
+#endif
+ }
+
+ std::atomic_thread_fence(std::memory_order_release);
+ this->_indices[key] = newIdx;
+ this->incValueCnt(numValues);
+
+ // mark space in oldIdx as dead;
+ if (oldIdx.values() != 0 && oldIdx.values() < Index::maxValues()) {
+ SingleVector & vec = _singleVectors[oldIdx.vectorIdx()];
+ incDead(vec, oldIdx.values());
+ this->decValueCnt(oldIdx.values());
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info,
+ "mark space dead in '%u-vector(%u)': "
+ "size = %u, used = %u, dead = %u",
+ oldIdx.values(), oldIdx.alternative(),
+ vec.size(), vec.used(), vec.dead());
+#endif
+ } else if (oldIdx.values() == Index::maxValues()) {
+ VectorVector & vec = _vectorVectors[oldIdx.alternative()];
+ uint32_t oldNumValues = vec[oldIdx.offset()].size();
+ incDead(vec);
+ this->decValueCnt(oldNumValues);
+ holdElem(oldIdx, sizeof(VectorBase) + sizeof(T) * oldNumValues);
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info,
+ "mark space dead in 'vector-vector(%u)': "
+ "size = %u, used = %u, dead = %u",
+ oldIdx.alternative(), vec.size(), vec.used(), vec.dead());
+#endif
+ }
+}
+
+template <typename T, typename I>
+void
+MultiValueMappingT<T, I>::replace(uint32_t key, const std::vector<T> & values)
+{
+ /* XXX: Unsafe operation, reader gets inconsistent view */
+ replace(key, &values[0], values.size());
+}
+
+template <typename T, typename I>
+void
+MultiValueMappingT<T, I>::replace(uint32_t key,
+ const T * values, uint32_t numValues)
+{
+ /* XXX: Unsafe operation, reader gets inconsistent view */
+ if (!this->hasKey(key)) {
+ abort();
+ }
+
+ Index currIdx = this->_indices[key];
+
+ if (currIdx.values() != 0 && currIdx.values() < Index::maxValues()) {
+ SingleVector & vec = _singleVectors[currIdx.vectorIdx()];
+ for (uint32_t i = currIdx.offset() * currIdx.values(), j = 0;
+ i < (currIdx.offset() + 1) * currIdx.values() && j < numValues;
+ ++i, ++j)
+ {
+ vec[i] = values[j];
+ }
+ } else if (currIdx.values() == Index::maxValues()) {
+ VectorBase & vec =
+ _vectorVectors[currIdx.alternative()][currIdx.offset()];
+ for (uint32_t i = 0; i < vec.size() && i < numValues; ++i) {
+ vec[i] = values[i];
+ }
+ }
+}
+
+
+template <typename T, typename I>
+void MultiValueMappingT<T, I>::clearVector(Index idx)
+{
+ if (idx.values() < Index::maxValues()) {
+ SingleVector &vec = _singleVectors[idx.vectorIdx()];
+ if (vec.getWantCompact()) {
+ _pendingCompactSingleVector.insert(idx.values());
+ _pendingCompact = true;
+ }
+ vec = SingleVector();
+ } else {
+ VectorVector &vec = _vectorVectors[idx.alternative()];
+ if (vec.getWantCompact()) {
+ _pendingCompactVectorVector = true;
+ _pendingCompact = true;
+ }
+ vec = VectorVector();
+ }
+}
+
+
+template <typename T, typename I>
+void
+MultiValueMappingT<T, I>::doneHoldElem(Index idx)
+{
+ assert(idx.values() == Index::maxValues());
+ VectorVector &vv = _vectorVectors[idx.alternative()];
+ VectorBase &v = vv[idx.offset()];
+ uint32_t numValues = v.size();
+ VectorBase().swap(v);
+ vv.getUsage().decAllocatedBytes(numValues * sizeof(T));
+ vv.getUsage().incDeadBytes(sizeof(VectorBase));
+}
+
+
+template <typename T, typename I>
+const MemoryUsage &
+MultiValueMappingT<T, I>::getSingleVectorUsage(size_t i) const
+{
+ return _singleVectors[i].getUsage();
+}
+
+template <typename T, typename I>
+const MemoryUsage &
+MultiValueMappingT<T, I>::getVectorVectorUsage(size_t i) const
+{
+ return _vectorVectors[i].getUsage();
+}
+
+template <typename T, typename I>
+size_t
+MultiValueMappingT<T, I>::getSingleVectorAddressSpaceUsed(size_t i) const
+{
+ if (i < Index::alternativeSize()) {
+ return 0;
+ }
+ size_t numValues = i / Index::alternativeSize();
+ size_t actualUsed = _singleVectors[i].used() - _singleVectors[i].dead();
+ return (actualUsed / numValues);
+}
+
+template <typename T, typename I>
+size_t
+MultiValueMappingT<T, I>::getVectorVectorAddressSpaceUsed(size_t i) const
+{
+ return _vectorVectors[i].used() - _vectorVectors[i].dead();
+}
+
+template <typename T, typename I>
+typename MultiValueMappingT<T, I>::Histogram
+MultiValueMappingT<T, I>::getRemaining()
+{
+ Histogram result(Index::maxValues());
+ result[0] = 0;
+ for (uint32_t key = 1; key < Index::maxValues(); ++key) {
+ SingleVectorPtr active =
+ getSingleVector(key, MultiValueMappingBaseBase::ACTIVE);
+ result[key] = active.first->remaining() / key;
+ }
+ VectorVectorPtr active =
+ getVectorVector(MultiValueMappingBaseBase::ACTIVE);
+ result[Index::maxValues()] = active.first->remaining();
+ return result;
+}
+
+template <typename T, typename I>
+bool
+MultiValueMappingT<T, I>::enoughCapacity(const Histogram & capacityNeeded)
+{
+ if (_pendingCompact)
+ return false;
+ for (typename Histogram::const_iterator it(capacityNeeded.begin()), mt(capacityNeeded.end()); it != mt; ++it) {
+ uint32_t valueCnt = it->first;
+ uint64_t numEntries = it->second;
+ if (valueCnt < Index::maxValues()) {
+ SingleVectorPtr active =
+ getSingleVector(valueCnt, MultiValueMappingBaseBase::ACTIVE);
+ if (active.first->remaining() < numEntries * valueCnt) {
+ return false;
+ }
+ } else if (valueCnt == Index::maxValues()) {
+ VectorVectorPtr active =
+ getVectorVector(MultiValueMappingBaseBase::ACTIVE);
+ if (active.first->remaining() < numEntries) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+template <typename T, typename I>
+void
+MultiValueMappingT<T, I>::performCompaction(Histogram & capacityNeeded)
+{
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info, "performCompaction()");
+#endif
+ if (_pendingCompact) {
+ // Further populate histogram to ensure pending compaction being done.
+ for (std::set<uint32_t>::const_iterator
+ pit(_pendingCompactSingleVector.begin()),
+ pmt(_pendingCompactSingleVector.end());
+ pit != pmt; ++pit) {
+ (void) capacityNeeded[*pit];
+ }
+ if (_pendingCompactVectorVector) {
+ (void) capacityNeeded[Index::maxValues()];
+ }
+ }
+ for (typename Histogram::const_iterator it(capacityNeeded.begin()), mt(capacityNeeded.end()); it != mt; ++it) {
+ uint32_t valueCnt = it->first;
+ uint64_t numEntries = it->second;
+ if (valueCnt != 0 && valueCnt < Index::maxValues()) {
+ SingleVectorPtr active =
+ getSingleVector(valueCnt, MultiValueMappingBaseBase::ACTIVE);
+
+ if (active.first->remaining() < valueCnt * numEntries ||
+ _pendingCompactSingleVector.find(valueCnt) !=
+ _pendingCompactSingleVector.end()) {
+ uint64_t maxSize = Index::offsetSize() * valueCnt;
+ if (maxSize > std::numeric_limits<uint32_t>::max()) {
+ maxSize = std::numeric_limits<uint32_t>::max();
+ maxSize -= (maxSize % valueCnt);
+ }
+ uint64_t newSize = this->computeNewSize(active.first->used(),
+ active.first->dead(),
+ valueCnt * numEntries,
+ maxSize);
+ compactSingleVector(active, valueCnt, newSize,
+ numEntries, maxSize);
+ }
+ } else if (valueCnt == Index::maxValues()) {
+ VectorVectorPtr active =
+ getVectorVector(MultiValueMappingBaseBase::ACTIVE);
+
+ if (active.first->remaining() < numEntries ||
+ _pendingCompactVectorVector) {
+ uint64_t maxSize = Index::offsetSize();
+ if (maxSize > std::numeric_limits<uint32_t>::max())
+ maxSize = std::numeric_limits<uint32_t>::max();
+ uint64_t newSize = this->computeNewSize(active.first->used(),
+ active.first->dead(),
+ numEntries,
+ maxSize);
+ compactVectorVector(active, newSize,
+ numEntries, maxSize);
+ }
+ }
+ }
+ assert(!_pendingCompact);
+}
+
+#ifdef DEBUG_MULTIVALUE_MAPPING
+template <typename T, typename I>
+void
+MultiValueMappingT<T, I>::printContent() const
+{
+ for (uint32_t key = 0; key < this->_indices.size(); ++key) {
+ std::vector<T> buffer(getValueCount(key));
+ get(key, buffer);
+ std::cout << "key = " << key << ", count = " <<
+ getValueCount(key) << ": ";
+ for (uint32_t i = 0; i < buffer.size(); ++i) {
+ std::cout << buffer[i] << ", ";
+ }
+ std::cout << '\n';
+ }
+}
+
+template <typename T, typename I>
+void
+MultiValueMappingT<T, I>::printVectorVectors() const
+{
+ for (uint32_t i = 0; i < _vectorVectors.size(); ++i) {
+ std::cout << "Alternative " << i << '\n';
+ for (uint32_t j = 0; j < _vectorVectors[i].size(); ++j) {
+ std::cout << "Vector " << j << ": [";
+ uint32_t size = _vectorVectors[i][j].size();
+ for (uint32_t k = 0; k < size; ++k) {
+ std::cout << _vectorVectors[i][j][k] << ", ";
+ }
+ std::cout << "]\n";
+ }
+ }
+}
+#endif
+
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::Value<int8_t> >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::Value<int16_t> >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::Value<int32_t> >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::Value<int64_t> >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::Value<float> >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::Value<double> >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::Value<EnumStoreBase::Index> >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::WeightedValue<int8_t> >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::WeightedValue<int16_t> >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::WeightedValue<int32_t> >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::WeightedValue<int64_t> >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::WeightedValue<float> >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::WeightedValue<double> >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<multivalue::WeightedValue<EnumStoreBase::Index> >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::Value<int8_t> > >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::Value<int16_t> > >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::Value<int32_t> > >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::Value<int64_t> > >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::Value<float> > >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::Value<double> > >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::Value<EnumStoreBase::Index> > >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::WeightedValue<int8_t> > >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::WeightedValue<int16_t> > >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::WeightedValue<int32_t> > >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::WeightedValue<int64_t> > >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::WeightedValue<float> > >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::WeightedValue<double> > >::VectorBase >;
+extern template class MultiValueMappingFallbackVectorHold<
+ MultiValueMappingVector<vespalib::Array<multivalue::WeightedValue<EnumStoreBase::Index> > >::VectorBase >;
+
+extern template class MultiValueMappingVector<
+ multivalue::Value<int8_t> >;
+extern template class MultiValueMappingVector<
+ multivalue::Value<int16_t> >;
+extern template class MultiValueMappingVector<
+ multivalue::Value<int32_t> >;
+extern template class MultiValueMappingVector<
+ multivalue::Value<int64_t> >;
+extern template class MultiValueMappingVector<
+ multivalue::Value<float> >;
+extern template class MultiValueMappingVector<
+ multivalue::Value<double> >;
+extern template class MultiValueMappingVector<
+ multivalue::Value<EnumStoreBase::Index> >;
+extern template class MultiValueMappingVector<
+ multivalue::WeightedValue<int8_t> >;
+extern template class MultiValueMappingVector<
+ multivalue::WeightedValue<int16_t> >;
+extern template class MultiValueMappingVector<
+ multivalue::WeightedValue<int32_t> >;
+extern template class MultiValueMappingVector<
+ multivalue::WeightedValue<int64_t> >;
+extern template class MultiValueMappingVector<
+ multivalue::WeightedValue<float> >;
+extern template class MultiValueMappingVector<
+ multivalue::WeightedValue<double> >;
+extern template class MultiValueMappingVector<
+ multivalue::WeightedValue<EnumStoreBase::Index> >;
+extern template class MultiValueMappingVector<
+ vespalib::Array<multivalue::Value<int8_t> > >;
+extern template class MultiValueMappingVector<
+ vespalib::Array<multivalue::Value<int16_t> > >;
+extern template class MultiValueMappingVector<
+ vespalib::Array<multivalue::Value<int32_t> > >;
+extern template class MultiValueMappingVector<
+ vespalib::Array<multivalue::Value<int64_t> > >;
+extern template class MultiValueMappingVector<
+ vespalib::Array<multivalue::Value<float> > >;
+extern template class MultiValueMappingVector<
+ vespalib::Array<multivalue::Value<double> > >;
+extern template class MultiValueMappingVector<
+ vespalib::Array<multivalue::Value<EnumStoreBase::Index> > >;
+extern template class MultiValueMappingVector<
+ vespalib::Array<multivalue::WeightedValue<int8_t> > >;
+extern template class MultiValueMappingVector<
+ vespalib::Array<multivalue::WeightedValue<int16_t> > >;
+extern template class MultiValueMappingVector<
+ vespalib::Array<multivalue::WeightedValue<int32_t> > >;
+extern template class MultiValueMappingVector<
+ vespalib::Array<multivalue::WeightedValue<int64_t> > >;
+extern template class MultiValueMappingVector<
+ vespalib::Array<multivalue::WeightedValue<float> > >;
+extern template class MultiValueMappingVector<
+ vespalib::Array<multivalue::WeightedValue<double> > >;
+extern template class MultiValueMappingVector<
+ vespalib::Array<multivalue::WeightedValue<EnumStoreBase::Index> > >;
+
+extern template class MultiValueMappingT<multivalue::Value<int8_t> >;
+extern template class MultiValueMappingT<multivalue::Value<int16_t> >;
+extern template class MultiValueMappingT<multivalue::Value<int32_t> >;
+extern template class MultiValueMappingT<multivalue::Value<int64_t> >;
+extern template class MultiValueMappingT<multivalue::Value<float> >;
+extern template class MultiValueMappingT<multivalue::Value<double> >;
+extern template class MultiValueMappingT<
+ multivalue::Value<EnumStoreBase::Index> >;
+extern template class MultiValueMappingT<multivalue::WeightedValue<int8_t> >;
+extern template class MultiValueMappingT<multivalue::WeightedValue<int16_t> >;
+extern template class MultiValueMappingT<multivalue::WeightedValue<int32_t> >;
+extern template class MultiValueMappingT<multivalue::WeightedValue<int64_t> >;
+extern template class MultiValueMappingT<multivalue::WeightedValue<float> >;
+extern template class MultiValueMappingT<multivalue::WeightedValue<double> >;
+extern template class MultiValueMappingT<
+ multivalue::WeightedValue<EnumStoreBase::Index> >;
+extern template class MultiValueMappingT<multivalue::Value<int8_t>,
+ multivalue::Index64>;
+extern template class MultiValueMappingT<multivalue::Value<int16_t>,
+ multivalue::Index64>;
+extern template class MultiValueMappingT<multivalue::Value<int32_t>,
+ multivalue::Index64>;
+extern template class MultiValueMappingT<multivalue::Value<int64_t>,
+ multivalue::Index64>;
+extern template class MultiValueMappingT<multivalue::Value<float>,
+ multivalue::Index64>;
+extern template class MultiValueMappingT<multivalue::Value<double>,
+ multivalue::Index64>;
+extern template class MultiValueMappingT<
+ multivalue::Value<EnumStoreBase::Index>,
+ multivalue::Index64>;
+extern template class MultiValueMappingT<multivalue::WeightedValue<int8_t>,
+ multivalue::Index64>;
+extern template class MultiValueMappingT<multivalue::WeightedValue<int16_t>,
+ multivalue::Index64>;
+extern template class MultiValueMappingT<multivalue::WeightedValue<int32_t>,
+ multivalue::Index64>;
+extern template class MultiValueMappingT<multivalue::WeightedValue<int64_t>,
+ multivalue::Index64>;
+extern template class MultiValueMappingT<multivalue::WeightedValue<float>,
+ multivalue::Index64>;
+extern template class MultiValueMappingT<multivalue::WeightedValue<double>,
+ multivalue::Index64>;
+extern template class MultiValueMappingT<
+ multivalue::WeightedValue<EnumStoreBase::Index>,
+ multivalue::Index64>;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/multivaluemapping.hpp b/searchlib/src/vespa/searchlib/attribute/multivaluemapping.hpp
new file mode 100644
index 00000000000..a1e06ee4759
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/multivaluemapping.hpp
@@ -0,0 +1,50 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search
+{
+
+
+template <typename T, typename I>
+template <typename V, class Saver>
+uint32_t
+MultiValueMappingT<T, I>::fillMapped(AttributeVector::ReaderBase &attrReader,
+ uint64_t numValues,
+ const V *map,
+ size_t mapSize,
+ Saver &saver,
+ uint32_t numDocs,
+ bool hasWeights)
+{
+ typedef AttributeVector::DocId DocId;
+ Histogram capacityNeeded = this->getHistogram(attrReader);
+ reset(numDocs, capacityNeeded);
+ attrReader.rewind();
+ std::vector<T> indices;
+ uint64_t di = 0;
+ uint32_t maxvc = 0;
+ for (DocId doc = 0; doc < numDocs; ++doc) {
+ indices.clear();
+ uint32_t vc = attrReader.getNextValueCount();
+ indices.reserve(vc);
+ for (uint32_t vci = 0; vci < vc; ++vci, ++di) {
+ uint32_t e = attrReader.getNextEnum();
+ assert(e < mapSize);
+ (void) mapSize;
+ int32_t weight = hasWeights ? attrReader.getNextWeight() : 1;
+ indices.push_back(T(map[e], weight));
+ saver.save(e, doc, vci, weight);
+ }
+ if (maxvc < indices.size())
+ maxvc = indices.size();
+ set(doc, indices);
+ }
+ assert(di == numValues);
+ (void) numValues;
+ return maxvc;
+}
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.h b/searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.h
new file mode 100644
index 00000000000..73f0491d808
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.h
@@ -0,0 +1,182 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attributevector.h"
+
+namespace search {
+
+struct NotImplementedAttribute : AttributeVector {
+ using AttributeVector::AttributeVector;
+
+ virtual void notImplemented() const __attribute__((noinline)) {
+ assert(false);
+ throw vespalib::IllegalStateException(
+ "The function is not implemented.");
+ }
+
+ virtual uint32_t
+ getValueCount(DocId) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual largeint_t
+ getInt(DocId) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual double
+ getFloat(DocId) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual const char *
+ getString(DocId, char *, size_t) const
+ {
+ notImplemented();
+ return NULL;
+ }
+
+ virtual uint32_t
+ get(DocId, largeint_t *, uint32_t) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual uint32_t
+ get(DocId, double *, uint32_t) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual uint32_t
+ get(DocId, vespalib::string *, uint32_t) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual uint32_t
+ get(DocId, const char **, uint32_t) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual uint32_t
+ get(DocId, EnumHandle *, uint32_t) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual uint32_t
+ get(DocId, WeightedInt *, uint32_t) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual uint32_t
+ get(DocId, WeightedFloat *, uint32_t) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual uint32_t
+ get(DocId, WeightedString *, uint32_t) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual uint32_t
+ get(DocId, WeightedConstChar *, uint32_t) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual uint32_t
+ get(DocId, WeightedEnum *, uint32_t) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual bool
+ findEnum(const char *, EnumHandle &) const
+ {
+ notImplemented();
+ return false;
+ }
+
+ virtual long
+ onSerializeForAscendingSort(DocId, void *, long,
+ const common::BlobConverter *) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual long
+ onSerializeForDescendingSort(DocId, void *, long,
+ const common::BlobConverter *) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual uint32_t
+ clearDoc(DocId)
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual int64_t
+ getDefaultValue() const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual uint32_t
+ getEnum(DocId) const
+ {
+ notImplemented();
+ return 0;
+ }
+
+ virtual void
+ getEnumValue(const EnumHandle *, uint32_t *, uint32_t) const
+ {
+ notImplemented();
+ }
+
+ virtual bool
+ addDoc(DocId &)
+ {
+ notImplemented();
+ return false;
+ }
+
+ SearchContext::UP
+ getSearch(QueryTermSimple::UP, const SearchContext::Params &) const override
+ {
+ notImplemented();
+ return SearchContext::UP();
+ }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/numericbase.cpp b/searchlib/src/vespa/searchlib/attribute/numericbase.cpp
new file mode 100644
index 00000000000..cbc8bde655e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/numericbase.cpp
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "numericbase.h"
+#include <vespa/fastlib/io/bufferedfile.h>
+#include "enumstorebase.h"
+#include <vespa/searchlib/util/bufferwriter.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.numericbase");
+
+namespace search
+{
+
+IMPLEMENT_IDENTIFIABLE_ABSTRACT(NumericAttribute, AttributeVector);
+
+using attribute::LoadedEnumAttributeVector;
+
+void
+NumericAttribute::fillEnum0(const void *src,
+ size_t srcLen,
+ EnumIndexVector &eidxs)
+{
+ (void) src;
+ (void) srcLen;
+ (void) eidxs;
+ fprintf(stderr, "NumericAttribute::fillEnum0\n");
+}
+
+
+void
+NumericAttribute::fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndexVector &eidxs,
+ LoadedEnumAttributeVector &loaded)
+{
+ (void) attrReader;
+ (void) numValues;
+ (void) eidxs;
+ (void) loaded;
+ fprintf(stderr, "NumericAttribute::fillEnumIdx (loaded)\n");
+}
+
+
+void
+NumericAttribute::fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndexVector &eidxs,
+ EnumVector &enumHist)
+{
+ (void) attrReader;
+ (void) numValues;
+ (void) eidxs;
+ (void) enumHist;
+ fprintf(stderr, "NumericAttribute::fillEnumIdx (enumHist)\n");
+}
+
+
+void
+NumericAttribute::fillPostingsFixupEnum(const LoadedEnumAttributeVector &
+ loaded)
+{
+ (void) loaded;
+ fprintf(stderr, "NumericAttribute::fillPostingsFixupEnum\n");
+}
+
+void
+NumericAttribute::fixupEnumRefCounts(const EnumVector &enumHist)
+{
+ (void) enumHist;
+ fprintf(stderr, "NumericAttribute::fixupEnumRefCounts\n");
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/numericbase.h b/searchlib/src/vespa/searchlib/attribute/numericbase.h
new file mode 100644
index 00000000000..d7b45b73574
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/numericbase.h
@@ -0,0 +1,147 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attributevector.h"
+#include <vespa/searchlib/common/sort.h>
+#include <vespa/searchlib/attribute/enumstorebase.h>
+#include "loadedenumvalue.h"
+
+namespace search {
+
+class NumericAttribute : public AttributeVector
+{
+protected:
+ typedef EnumStoreBase::Index EnumIndex;
+ typedef EnumStoreBase::IndexVector EnumIndexVector;
+ typedef EnumStoreBase::EnumVector EnumVector;
+
+ NumericAttribute(const vespalib::string & name,
+ const AttributeVector::Config & cfg)
+ : AttributeVector(name, cfg)
+ {
+ }
+
+ virtual void
+ fillEnum0(const void *src,
+ size_t srcLen,
+ EnumIndexVector &eidxs);
+
+ virtual void
+ fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndexVector &eidxs,
+ attribute::LoadedEnumAttributeVector &loaded);
+
+ virtual void
+ fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndexVector &eidxs,
+ EnumVector &enumHist);
+
+ virtual void
+ fillPostingsFixupEnum(const attribute::LoadedEnumAttributeVector &loaded);
+
+ virtual void
+ fixupEnumRefCounts(const EnumVector &enumHist);
+
+ template<typename T>
+ class Equal
+ {
+ private:
+ T _value;
+ bool _valid;
+ protected:
+ Equal(QueryTermSimple &queryTerm, bool avoidUndefinedInRange = false)
+ : _value(0),
+ _valid(false)
+ {
+ (void) avoidUndefinedInRange;
+ QueryTermSimple::RangeResult<T> res = queryTerm.getRange<T>();
+ _valid = res.valid && res.isEqual() && !res.adjusted;
+ _value = res.high;
+ }
+ bool isValid() const { return _valid; }
+ bool match(T v) const { return v == _value; }
+ Int64Range getRange() const {
+ return Int64Range(static_cast<int64_t>(_value));
+ }
+ };
+
+ template<typename T>
+ class Range
+ {
+ protected:
+ T _low;
+ T _high;
+ private:
+ bool _valid;
+ int _limit;
+ size_t _max_per_group;
+ protected:
+ Range(const QueryTermSimple & queryTerm,
+ bool avoidUndefinedInRange=false)
+ : _low(0),
+ _high(0),
+ _valid(false)
+ {
+ QueryTermSimple::RangeResult<T> res = queryTerm.getRange<T>();
+ _valid = res.isEqual() ? (res.valid && !res.adjusted) : res.valid;
+ _low = res.low;
+ _high = res.high;
+ _limit = queryTerm.getRangeLimit();
+ _max_per_group = queryTerm.getMaxPerGroup();
+ if (_valid && avoidUndefinedInRange &&
+ _low == std::numeric_limits<T>::min()) {
+ _low += 1;
+ }
+ }
+ Int64Range
+ getRange() const
+ {
+ return Int64Range(static_cast<int64_t>(_low),
+ static_cast<int64_t>(_high));
+ }
+ bool isValid() const { return _valid; }
+ bool match(T v) const { return (_low <= v) && (v <= _high); }
+ int getRangeLimit() const { return _limit; }
+ size_t getMaxPerGroup() const { return _max_per_group; }
+
+ template <typename BaseType>
+ search::Range<BaseType>
+ cappedRange(bool isFloat, bool isUnsigned)
+ {
+ BaseType low = static_cast<BaseType>(_low);
+ BaseType high = static_cast<BaseType>(_high);
+
+ BaseType numMin = std::numeric_limits<BaseType>::min();
+ BaseType numMax = std::numeric_limits<BaseType>::max();
+
+ if (isFloat)
+ {
+ if (_low <= (-numMax)) {
+ low = -numMax;
+ }
+ } else {
+ if (_low <= (numMin)) {
+ if (isUnsigned) {
+ low = numMin;
+ } else {
+ low = numMin + 1; // we must avoid the undefined value
+ }
+ }
+ }
+
+ if (_high >= (numMax)) {
+ high = numMax;
+ }
+ return search::Range<BaseType>(low, high);
+ }
+
+ };
+public:
+ DECLARE_IDENTIFIABLE_ABSTRACT(NumericAttribute);
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/postingchange.cpp b/searchlib/src/vespa/searchlib/attribute/postingchange.cpp
new file mode 100644
index 00000000000..2731fb0157d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/postingchange.cpp
@@ -0,0 +1,275 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "postingchange.h"
+#include "multivaluemapping.h"
+#include "postinglistattribute.h"
+#include <vespa/searchlib/common/bitvector.h>
+#include <map>
+
+namespace search {
+
+namespace
+{
+
+void
+removeDupAdditions(PostingChange<AttributePosting>::A &additions)
+{
+ typedef PostingChange<AttributePosting>::A::iterator Iterator;
+ if (additions.empty())
+ return;
+ if (additions.size() == 1)
+ return;
+ std::sort(additions.begin(), additions.end());
+ Iterator i = additions.begin();
+ Iterator ie = additions.end();
+ Iterator d = i;
+ for (++i; i != ie; ++i, ++d) {
+ if (d->_key == i->_key)
+ break;
+ }
+ if (i == ie)
+ return; // no dups found
+ for (++i; i != ie; ++i) {
+ if (d->_key != i->_key) {
+ ++d;
+ *d = *i;
+ }
+ }
+ additions.resize(d - additions.begin() + 1);
+}
+
+
+void
+removeDupAdditions(PostingChange<AttributeWeightPosting>::A &additions)
+{
+ typedef PostingChange<AttributeWeightPosting>::A::iterator Iterator;
+ if (additions.empty())
+ return;
+ if (additions.size() == 1u)
+ return;
+ std::sort(additions.begin(), additions.end());
+ Iterator i = additions.begin();
+ Iterator ie = additions.end();
+ Iterator d = i;
+ for (++i; i != ie; ++i, ++d) {
+ if (d->_key == i->_key)
+ break;
+ }
+ if (i == ie)
+ return; // no dups found
+ // sum weights together
+ d->setData(d->getData() + i->getData());
+ for (++i; i != ie; ++i) {
+ if (d->_key != i->_key) {
+ ++d;
+ *d = *i;
+ } else {
+ // sum weights together
+ d->setData(d->getData() + i->getData());
+ }
+ }
+ additions.resize(d - additions.begin() + 1);
+}
+
+void
+removeDupRemovals(std::vector<uint32_t> &removals)
+{
+ typedef std::vector<uint32_t>::iterator Iterator;
+ if (removals.empty())
+ return;
+ if (removals.size() == 1u)
+ return;
+ std::sort(removals.begin(), removals.end());
+ Iterator i = removals.begin();
+ Iterator ie = removals.end();
+ Iterator d = i;
+ for (++i; i != ie; ++i, ++d) {
+ if (*d == *i)
+ break;
+ }
+ if (i == ie)
+ return; // no dups found
+ for (++i; i != ie; ++i) {
+ if (*d != *i) {
+ ++d;
+ *d = *i;
+ }
+ }
+ removals.resize(d - removals.begin() + 1);
+}
+
+}
+
+EnumStoreBase::Index
+EnumIndexMapper::map(EnumStoreBase::Index original, const EnumStoreComparator & compare) const
+{
+ (void) compare;
+ return original;
+}
+
+template <>
+void
+PostingChange<AttributePosting>::removeDups(void)
+{
+ removeDupAdditions(_additions);
+ removeDupRemovals(_removals);
+}
+
+
+template <>
+void
+PostingChange<AttributeWeightPosting>::removeDups(void)
+{
+ removeDupAdditions(_additions);
+ removeDupRemovals(_removals);
+}
+
+
+template <typename P>
+void
+PostingChange<P>::apply(GrowableBitVector &bv)
+{
+ P *a = &_additions[0];
+ P *ae = &_additions[0] + _additions.size();
+ uint32_t *r = &_removals[0];
+ uint32_t *re = &_removals[0] + _removals.size();
+
+ while (a != ae || r != re) {
+ if (r != re && (a == ae || *r < a->_key)) {
+ // remove
+ assert(*r < bv.size());
+ bv.slowClearBit(*r);
+ ++r;
+ } else {
+ if (r != re && !(a->_key < *r)) {
+ // update or add
+ assert(a->_key < bv.size());
+ bv.slowSetBit(a->_key);
+ ++r;
+ } else {
+ assert(a->_key < bv.size());
+ bv.slowSetBit(a->_key);
+ }
+ ++a;
+ }
+ }
+}
+
+template <typename WeightedIndex>
+class ActualChangeComputer {
+public:
+ typedef std::vector<WeightedIndex> V;
+ void compute(const WeightedIndex * entriesNew, size_t szNew,
+ const WeightedIndex * entriesOld, size_t szOld,
+ V & added, V & changed, V & removed) const;
+private:
+ mutable V _oldEntries;
+ mutable V _newEntries;
+};
+
+template <typename WeightedIndex>
+void
+ActualChangeComputer<WeightedIndex>::compute(const WeightedIndex * entriesNew, size_t szNew,
+ const WeightedIndex * entriesOld, size_t szOld,
+ V & added, V & changed, V & removed) const
+{
+ _newEntries.reserve(szNew);
+ _oldEntries.reserve(szOld);
+ _newEntries.clear();
+ _oldEntries.clear();
+ _newEntries.insert(_newEntries.begin(), entriesNew, entriesNew + szNew);
+ _oldEntries.insert(_oldEntries.begin(), entriesOld, entriesOld + szOld);
+ std::sort(_newEntries.begin(), _newEntries.end());
+ std::sort(_oldEntries.begin(), _oldEntries.end());
+ auto newIt(_newEntries.begin()), oldIt(_oldEntries.begin());
+ while (newIt != _newEntries.end() && oldIt != _oldEntries.end()) {
+ if (newIt->value() == oldIt->value()) {
+ if (newIt->weight() != oldIt->weight()) {
+ changed.push_back(*newIt);
+ }
+ newIt++, oldIt++;
+ } else if (newIt->value() < oldIt->value()) {
+ added.push_back(*newIt++);
+ } else {
+ removed.push_back(*oldIt++);
+ }
+ }
+ added.insert(added.end(), newIt, _newEntries.end());
+ removed.insert(removed.end(), oldIt, _oldEntries.end());
+}
+
+template <typename WeightedIndex, typename PostingMap>
+template <typename MultivalueMapping>
+PostingMap
+PostingChangeComputerT<WeightedIndex, PostingMap>::
+compute(const MultivalueMapping & mvm, const DocIndices & docIndices,
+ const EnumStoreComparator & compare, const EnumIndexMapper & mapper)
+{
+ typedef ActualChangeComputer<WeightedIndex> AC;
+ AC actualChange;
+ typename AC::V added, changed, removed;
+ PostingMap changePost;
+
+ // generate add postings and remove postings
+ for (const auto & docIndex : docIndices) {
+ const WeightedIndex * oldIndices = NULL;
+ uint32_t valueCount = mvm.get(docIndex.first, oldIndices);
+ added.clear(), changed.clear(), removed.clear();
+ actualChange.compute(&docIndex.second[0], docIndex.second.size(), oldIndices, valueCount,
+ added, changed, removed);
+ for (const WeightedIndex & wi : added) {
+ changePost[EnumPostingPair(mapper.map(wi.value(), compare), &compare)].add(docIndex.first, wi.weight());
+ }
+ for (const WeightedIndex & wi : removed) {
+ changePost[EnumPostingPair(mapper.map(wi.value(), compare), &compare)].remove(docIndex.first);
+ }
+ for (const WeightedIndex & wi : changed) {
+ changePost[EnumPostingPair(mapper.map(wi.value(), compare), &compare)].remove(docIndex.first).add(docIndex.first, wi.weight());
+ }
+ }
+ return changePost;
+}
+
+template class PostingChange<AttributePosting>;
+
+template class PostingChange<AttributeWeightPosting>;
+
+typedef PostingChange<btree::BTreeKeyData<unsigned int, int> > WeightedPostingChange;
+typedef std::map<EnumPostingPair, WeightedPostingChange> WeightedPostingChangeMap;
+typedef EnumStoreBase::Index EnumIndex;
+typedef multivalue::WeightedValue<EnumIndex> WeightedIndex;
+typedef multivalue::Value<EnumIndex> ValueIndex;
+
+typedef MultiValueMappingT<WeightedIndex, multivalue::Index32> NormalWeightedMultiValueMapping;
+typedef MultiValueMappingT<WeightedIndex, multivalue::Index64> HugeWeightedMultiValueMapping;
+typedef MultiValueMappingT<ValueIndex, multivalue::Index32> NormalValueMultiValueMapping;
+typedef MultiValueMappingT<ValueIndex, multivalue::Index64> HugeValueMultiValueMapping;
+typedef std::vector<std::pair<uint32_t, std::vector<WeightedIndex>>> DocIndicesWeighted;
+typedef std::vector<std::pair<uint32_t, std::vector<ValueIndex>>> DocIndicesValue;
+
+template WeightedPostingChangeMap PostingChangeComputerT<WeightedIndex, WeightedPostingChangeMap>
+ ::compute<NormalWeightedMultiValueMapping>(const NormalWeightedMultiValueMapping &,
+ const DocIndicesWeighted &,
+ const EnumStoreComparator &,
+ const EnumIndexMapper &);
+
+template WeightedPostingChangeMap PostingChangeComputerT<WeightedIndex, WeightedPostingChangeMap>
+ ::compute<HugeWeightedMultiValueMapping>(const HugeWeightedMultiValueMapping &,
+ const DocIndicesWeighted &,
+ const EnumStoreComparator &,
+ const EnumIndexMapper &);
+
+template WeightedPostingChangeMap PostingChangeComputerT<ValueIndex, WeightedPostingChangeMap>
+ ::compute<NormalValueMultiValueMapping>(const NormalValueMultiValueMapping &,
+ const DocIndicesValue &,
+ const EnumStoreComparator &,
+ const EnumIndexMapper &);
+
+template WeightedPostingChangeMap PostingChangeComputerT<ValueIndex, WeightedPostingChangeMap>
+ ::compute<HugeValueMultiValueMapping>(const HugeValueMultiValueMapping &,
+ const DocIndicesValue &,
+ const EnumStoreComparator &,
+ const EnumIndexMapper &);
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/postingchange.h b/searchlib/src/vespa/searchlib/attribute/postingchange.h
new file mode 100644
index 00000000000..8309cf91516
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/postingchange.h
@@ -0,0 +1,86 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/array.h>
+#include "postinglisttraits.h"
+#include "enumstorebase.h"
+
+namespace search
+{
+
+class GrowableBitVector;
+
+/**
+ * Class representing changes to a posting list for a single value.
+ */
+template <typename P>
+class PostingChange
+{
+public:
+ typedef vespalib::Array<P, vespalib::DefaultAlloc> A;
+ typedef std::vector<uint32_t> R;
+ A _additions;
+ R _removals;
+
+ inline void add(uint32_t docId, int32_t weight);
+
+ PostingChange & remove(uint32_t docId) {
+ _removals.push_back(docId);
+ return *this;
+ }
+
+ void clear(void) {
+ _additions.clear();
+ _removals.clear();
+ }
+
+ /*
+ * Remove duplicates in additions and removals vectors, since new
+ * posting list tree doesn't support duplicate entries.
+ */
+ void removeDups(void);
+ void apply(GrowableBitVector &bv);
+};
+
+class EnumIndexMapper
+{
+public:
+ virtual ~EnumIndexMapper() { }
+ virtual EnumStoreBase::Index map(EnumStoreBase::Index original, const EnumStoreComparator & compare) const;
+};
+
+template <typename WeightedIndex, typename PostingMap>
+class PostingChangeComputerT
+{
+private:
+ typedef std::vector<std::pair<uint32_t, std::vector<WeightedIndex>>> DocIndices;
+public:
+ template <typename MultivalueMapping>
+ static PostingMap compute(const MultivalueMapping & mvm, const DocIndices & docIndices,
+ const EnumStoreComparator & compare, const EnumIndexMapper & mapper);
+};
+
+template <>
+inline void
+PostingChange<AttributePosting>::add(uint32_t docId, int32_t weight)
+{
+ (void) weight;
+ _additions.push_back(AttributePosting(docId,
+ btree::BTreeNoLeafData()));
+}
+
+
+template <>
+inline void
+PostingChange<AttributeWeightPosting>::add(uint32_t docId, int32_t weight)
+{
+ _additions.push_back(AttributeWeightPosting(docId, weight));
+}
+
+
+} // namespace search
+
+
+
+
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp
new file mode 100644
index 00000000000..2d79e80142a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp
@@ -0,0 +1,451 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "postinglistattribute.h"
+#include "loadednumericvalue.h"
+#include "loadedstringvalue.h"
+#include "enumcomparator.h"
+
+namespace search
+{
+
+using attribute::LoadedNumericValue;
+
+template <typename P>
+PostingListAttributeBase<P>::
+PostingListAttributeBase(AttributeVector &attr,
+ EnumStoreBase &enumStore)
+ : attribute::IPostingListAttributeBase(),
+ _postingList(enumStore.getPostingDictionary(), attr.getStatus(),
+ attr.getConfig()),
+ _attr(attr),
+ _dict(enumStore.getPostingDictionary()),
+ _esb(enumStore)
+{
+}
+
+
+template <typename P>
+PostingListAttributeBase<P>::~PostingListAttributeBase()
+{
+}
+
+
+template <typename P>
+void
+PostingListAttributeBase<P>::clearAllPostings()
+{
+ _postingList.clearBuilder();
+ _attr.incGeneration(); // Force freeze
+ EnumPostingTree::Iterator itr(_dict.begin());
+ EntryRef prev;
+ while (itr.valid()) {
+ EntryRef ref = itr.getData();
+ if (ref.ref() != prev.ref()) {
+ if (ref.valid()) {
+ _postingList.clear(ref);
+ }
+ prev = ref;
+ }
+ itr.writeData(EntryRef());
+ ++itr;
+ }
+ _attr.incGeneration(); // Force freeze
+}
+
+
+template <typename P>
+void
+PostingListAttributeBase<P>::fillPostingsFixupEnumBase(
+ const LoadedEnumAttributeVector &loaded)
+{
+ clearAllPostings();
+ uint32_t docIdLimit = _attr.getNumDocs();
+ EnumStoreBase &enumStore = _esb;
+ EntryRef newIndex;
+ PostingChange<P> postings;
+ if ( loaded.empty() )
+ return;
+ typedef LoadedEnumAttributeVector::const_iterator LoadedIT;
+ uint32_t preve = 0;
+ uint32_t refCount = 0;
+ typedef EnumPostingTree::Iterator DictIT;
+
+ DictIT di(_dict.begin());
+ DictIT pdi(di);
+ assert(di.valid());
+ for(LoadedIT i(loaded.begin()), ie(loaded.end()); i != ie; ++i) {
+ if (preve != i->getEnum()) {
+ assert(preve < i->getEnum());
+ enumStore.fixupRefCount(di.getKey(), refCount);
+ refCount = 0;
+ while (preve != i->getEnum()) {
+ ++di;
+ assert(di.valid());
+ ++preve;
+ }
+ assert(di.valid());
+ if (enumStore.foldedChange(pdi.getKey(), di.getKey())) {
+ postings.removeDups();
+ newIndex = EntryRef();
+ _postingList.apply(newIndex,
+ &postings._additions[0],
+ &postings._additions[0] +
+ postings._additions.size(),
+ &postings._removals[0],
+ &postings._removals[0] +
+ postings._removals.size());
+ pdi.writeData(newIndex);
+ while (pdi != di) {
+ ++pdi;
+ }
+ postings.clear();
+ }
+ }
+ ++refCount;
+ assert(i->getDocId() < docIdLimit);
+ (void) docIdLimit;
+ postings.add(i->getDocId(), i->getWeight());
+ }
+ assert(refCount != 0);
+ enumStore.fixupRefCount(di.getKey(), refCount);
+ postings.removeDups();
+ newIndex = EntryRef();
+ _postingList.apply(newIndex,
+ &postings._additions[0],
+ &postings._additions[0] + postings._additions.size(),
+ &postings._removals[0],
+ &postings._removals[0] + postings._removals.size());
+ pdi.writeData(newIndex);
+ enumStore.freeUnusedEnums(false);
+}
+
+
+template <typename P>
+void
+PostingListAttributeBase<P>::updatePostings(PostingMap &changePost,
+ EnumStoreComparator &cmp)
+{
+ for (typename PostingMap::iterator
+ it(changePost.begin()), mt(changePost.end()); it != mt; it++) {
+ PostingChange<P> &change(it->second);
+ EnumIndex idx(it->first.getEnumIdx());
+ typename EnumPostingTree::Iterator dictItr =
+ _dict.lowerBound(idx, cmp);
+ assert(dictItr.valid() && dictItr.getKey() == idx);
+ EntryRef newPosting = dictItr.getData();
+
+ change.removeDups();
+ _postingList.apply(newPosting,
+ &change._additions[0],
+ &change._additions[0] + change._additions.size(),
+ &change._removals[0],
+ &change._removals[0] + change._removals.size());
+
+ _dict.thaw(dictItr);
+ dictItr.writeData(newPosting);
+ }
+}
+
+
+template <typename P>
+bool
+PostingListAttributeBase<P>::forwardedOnAddDoc(DocId doc,
+ size_t wantSize,
+ size_t wantCapacity)
+{
+ if (!_postingList._enableBitVectors)
+ return false;
+ if (doc >= wantSize)
+ wantSize = doc + 1;
+ if (doc >= wantCapacity)
+ wantCapacity = doc + 1;
+ return _postingList.resizeBitVectors(wantSize, wantCapacity);
+}
+
+
+template <typename P>
+void
+PostingListAttributeBase<P>::
+clearPostings(attribute::IAttributeVector::EnumHandle eidx,
+ uint32_t fromLid,
+ uint32_t toLid,
+ EnumStoreComparator &cmp)
+{
+ PostingChange<P> postings;
+
+ for (uint32_t lid = fromLid; lid < toLid; ++lid) {
+ postings.remove(lid);
+ }
+
+ typedef EnumPostingTree::Iterator DictIT;
+ EntryRef er(eidx);
+ DictIT di(_dict.lowerBound(er, cmp));
+ assert(di.valid());
+
+ EntryRef newPosting = di.getData();
+ assert(newPosting.valid());
+
+ _postingList.apply(newPosting,
+ &postings._additions[0],
+ &postings._additions[0] +
+ postings._additions.size(),
+ &postings._removals[0],
+ &postings._removals[0] +
+ postings._removals.size());
+ _dict.thaw(di);
+ di.writeData(newPosting);
+}
+
+
+template <typename P>
+void
+PostingListAttributeBase<P>::forwardedShrinkLidSpace(uint32_t newSize)
+{
+ (void) _postingList.resizeBitVectors(newSize, newSize);
+}
+
+
+template <typename P, typename LoadedVector, typename LoadedValueType,
+ typename EnumStoreType>
+PostingListAttributeSubBase<P, LoadedVector, LoadedValueType, EnumStoreType>::
+PostingListAttributeSubBase(AttributeVector &attr,
+ EnumStore &enumStore)
+ : Parent(attr, enumStore),
+ _es(enumStore)
+{
+}
+
+
+template <typename P, typename LoadedVector, typename LoadedValueType,
+ typename EnumStoreType>
+PostingListAttributeSubBase<P, LoadedVector, LoadedValueType, EnumStoreType>::
+~PostingListAttributeSubBase(void)
+{
+}
+
+
+template <typename P, typename LoadedVector, typename LoadedValueType,
+ typename EnumStoreType>
+void
+PostingListAttributeSubBase<P, LoadedVector, LoadedValueType, EnumStoreType>::
+handleFillPostings(LoadedVector &loaded)
+{
+ clearAllPostings();
+ EntryRef newIndex;
+ PostingChange<P> postings;
+ uint32_t docIdLimit = _attr.getNumDocs();
+ _postingList.resizeBitVectors(docIdLimit, docIdLimit);
+ if ( ! loaded.empty() ) {
+ vespalib::Array<typename LoadedVector::Type,
+ vespalib::DefaultAlloc> similarValues;
+ typename LoadedVector::Type v = loaded.read();
+ LoadedValueType prev = v.getValue();
+ for(size_t i(0), m(loaded.size()); i < m; i++, loaded.next()) {
+ v = loaded.read();
+ if (FoldedComparatorType::compareFolded(prev, v.getValue()) == 0) {
+ // for single value attributes loaded[numDocs] is used
+ // for default value but we don't want to add an
+ // invalid docId to the posting list.
+ if (v._docId < docIdLimit) {
+ postings.add(v._docId, v.getWeight());
+ similarValues.push_back(v);
+ }
+ } else {
+ postings.removeDups();
+
+ newIndex = EntryRef();
+ _postingList.apply(newIndex,
+ &postings._additions[0],
+ &postings._additions[0] +
+ postings._additions.size(),
+ &postings._removals[0],
+ &postings._removals[0] +
+ postings._removals.size());
+ postings.clear();
+ if (v._docId < docIdLimit) {
+ postings.add(v._docId, v.getWeight());
+ }
+ similarValues[0]._pidx = newIndex;
+ for(size_t j(0), k(similarValues.size()); j < k; j++) {
+ loaded.write(similarValues[j]);
+ }
+ similarValues.clear();
+ similarValues.push_back(v);
+ prev = v.getValue();
+ }
+ }
+
+ postings.removeDups();
+ newIndex = EntryRef();
+ _postingList.apply(newIndex,
+ &postings._additions[0],
+ &postings._additions[0] +
+ postings._additions.size(),
+ &postings._removals[0],
+ &postings._removals[0] + postings._removals.size());
+ similarValues[0]._pidx = newIndex;
+ for(size_t i(0), m(similarValues.size()); i < m; i++) {
+ loaded.write(similarValues[i]);
+ }
+ }
+}
+
+
+template <typename P, typename LoadedVector, typename LoadedValueType,
+ typename EnumStoreType>
+void
+PostingListAttributeSubBase<P, LoadedVector, LoadedValueType, EnumStoreType>::
+updatePostings(PostingMap &changePost)
+{
+ FoldedComparatorType cmpa(_es);
+
+ updatePostings(changePost, cmpa);
+}
+
+
+template <typename P, typename LoadedVector, typename LoadedValueType,
+ typename EnumStoreType>
+void
+PostingListAttributeSubBase<P, LoadedVector, LoadedValueType, EnumStoreType>::
+printPostingListContent(vespalib::asciistream & os) const
+{
+ for (DictionaryIterator itr = _es.getPostingDictionary().begin();
+ itr.valid(); ++itr) {
+ EnumIndex enumIdx = itr.getKey();
+ os << "PostingList[";
+ _es.printValue(os, enumIdx);
+ os << "]: {";
+
+ EntryRef postIdx = itr.getData();
+ PostingIterator postings = _postingList.begin(postIdx);
+ for (; postings.valid(); ++postings) {
+ os << postings.getKey() << ", ";
+ }
+ os << "}\n";
+ }
+}
+
+
+template <typename P, typename LoadedVector, typename LoadedValueType,
+ typename EnumStoreType>
+void
+PostingListAttributeSubBase<P, LoadedVector, LoadedValueType, EnumStoreType>::
+clearPostings(attribute::IAttributeVector::EnumHandle eidx,
+ uint32_t fromLid,
+ uint32_t toLid)
+{
+ FoldedComparatorType cmp(_es);
+ clearPostings(eidx, fromLid, toLid, cmp);
+}
+
+
+
+template class PostingListAttributeBase<AttributePosting>;
+template class PostingListAttributeBase<AttributeWeightPosting>;
+
+typedef SequentialReadModifyWriteInterface<LoadedNumericValue<int8_t> >
+LoadedInt8Vector;
+
+typedef SequentialReadModifyWriteInterface<LoadedNumericValue<int16_t> >
+LoadedInt16Vector;
+
+typedef SequentialReadModifyWriteInterface<LoadedNumericValue<int32_t> >
+LoadedInt32Vector;
+
+typedef SequentialReadModifyWriteInterface<LoadedNumericValue<int64_t> >
+LoadedInt64Vector;
+
+typedef SequentialReadModifyWriteInterface<LoadedNumericValue<float> >
+LoadedFloatVector;
+
+typedef SequentialReadModifyWriteInterface<LoadedNumericValue<double> >
+LoadedDoubleVector;
+
+
+template class
+PostingListAttributeSubBase<AttributePosting,
+ LoadedInt8Vector,
+ int8_t,
+ EnumStoreT<NumericEntryType<int8_t> > >;
+
+template class
+PostingListAttributeSubBase<AttributePosting,
+ LoadedInt16Vector,
+ int16_t,
+ EnumStoreT<NumericEntryType<int16_t> > >;
+
+template class
+PostingListAttributeSubBase<AttributePosting,
+ LoadedInt32Vector,
+ int32_t,
+ EnumStoreT<NumericEntryType<int32_t> > >;
+
+template class
+PostingListAttributeSubBase<AttributePosting,
+ LoadedInt64Vector,
+ int64_t,
+ EnumStoreT<NumericEntryType<int64_t> > >;
+
+template class
+PostingListAttributeSubBase<AttributePosting,
+ LoadedFloatVector,
+ float,
+ EnumStoreT<NumericEntryType<float> > >;
+
+template class
+PostingListAttributeSubBase<AttributePosting,
+ LoadedDoubleVector,
+ double,
+ EnumStoreT<NumericEntryType<double> > >;
+
+template class
+PostingListAttributeSubBase<AttributePosting,
+ attribute::LoadedStringVector,
+ const char *,
+ EnumStoreT<StringEntryType > >;
+
+template class
+PostingListAttributeSubBase<AttributeWeightPosting,
+ LoadedInt8Vector,
+ int8_t,
+ EnumStoreT<NumericEntryType<int8_t> > >;
+
+template class
+PostingListAttributeSubBase<AttributeWeightPosting,
+ LoadedInt16Vector,
+ int16_t,
+ EnumStoreT<NumericEntryType<int16_t> > >;
+
+template class
+PostingListAttributeSubBase<AttributeWeightPosting,
+ LoadedInt32Vector,
+ int32_t,
+ EnumStoreT<NumericEntryType<int32_t> > >;
+
+template class
+PostingListAttributeSubBase<AttributeWeightPosting,
+ LoadedInt64Vector,
+ int64_t,
+ EnumStoreT<NumericEntryType<int64_t> > >;
+
+template class
+PostingListAttributeSubBase<AttributeWeightPosting,
+ LoadedFloatVector,
+ float,
+ EnumStoreT<NumericEntryType<float> > >;
+
+template class
+PostingListAttributeSubBase<AttributeWeightPosting,
+ LoadedDoubleVector,
+ double,
+ EnumStoreT<NumericEntryType<double> > >;
+
+template class
+PostingListAttributeSubBase<AttributeWeightPosting,
+ attribute::LoadedStringVector,
+ const char *,
+ EnumStoreT<StringEntryType > >;
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h
new file mode 100644
index 00000000000..3632e6b5f35
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h
@@ -0,0 +1,165 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/numericbase.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/searchlib/attribute/enumattribute.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/btree/entryref.h>
+#include <vespa/searchlib/btree/btreestore.h>
+#include "dociditerator.h"
+#include "postinglistsearchcontext.h"
+#include "postingchange.h"
+#include "ipostinglistattributebase.h"
+
+namespace search {
+
+class EnumPostingPair
+{
+private:
+ EnumStoreBase::Index _idx;
+ const EnumStoreComparator *_cmp;
+public:
+ // EnumPostingPair() : _itr() {}
+ EnumPostingPair(EnumStoreBase::Index idx,
+ const EnumStoreComparator *cmp)
+ : _idx(idx),
+ _cmp(cmp)
+ {
+ }
+
+ bool
+ operator<(const EnumPostingPair &rhs) const
+ {
+ return (*_cmp)(_idx, rhs._idx);
+ }
+
+ EnumStoreBase::Index
+ getEnumIdx() const
+ {
+ return _idx;
+ }
+};
+
+
+template <typename P>
+class PostingListAttributeBase : public attribute::IPostingListAttributeBase
+{
+protected:
+ typedef P Posting;
+ typedef typename Posting::DataType DataType;
+ typedef attribute::PostingListTraits<DataType> AggregationTraits;
+ typedef typename AggregationTraits::PostingList PostingList;
+ typedef AttributeVector::DocId DocId;
+ typedef std::map<EnumPostingPair, PostingChange<P> > PostingMap;
+ typedef btree::EntryRef EntryRef;
+ typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector;
+ typedef EnumStoreBase::Index EnumIndex;
+ PostingList _postingList;
+ AttributeVector &_attr;
+ EnumPostingTree &_dict;
+ EnumStoreBase &_esb;
+
+ PostingListAttributeBase(AttributeVector &attr,
+ EnumStoreBase &enumStore);
+ virtual ~PostingListAttributeBase();
+
+ virtual void
+ updatePostings(PostingMap & changePost) = 0;
+
+ void
+ updatePostings(PostingMap &changePost,
+ EnumStoreComparator &cmp);
+
+ void
+ clearAllPostings(void);
+
+ void disableFreeLists() { _postingList.disableFreeLists(); }
+
+ void
+ disableElemHoldList()
+ {
+ _postingList.disableElemHoldList();
+ }
+
+ void
+ fillPostingsFixupEnumBase(const LoadedEnumAttributeVector &loaded);
+
+ bool
+ forwardedOnAddDoc(DocId doc,
+ size_t wantSize,
+ size_t wantCapacity);
+
+ void
+ clearPostings(attribute::IAttributeVector::EnumHandle eidx,
+ uint32_t fromLid,
+ uint32_t toLid,
+ EnumStoreComparator &cmp);
+
+ virtual void
+ forwardedShrinkLidSpace(uint32_t newSize);
+
+public:
+ const PostingList & getPostingList() const { return _postingList; }
+ PostingList & getPostingList() { return _postingList; }
+};
+
+
+template <typename P, typename LoadedVector, typename LoadedValueType,
+ typename EnumStoreType>
+class PostingListAttributeSubBase : public PostingListAttributeBase<P>
+{
+public:
+ typedef PostingListAttributeBase<P> Parent;
+ typedef EnumStoreType EnumStore;
+ typedef EnumPostingTree Dictionary;
+ typedef typename Dictionary::Iterator DictionaryIterator;
+ typedef EnumStoreBase::Index EnumIndex;
+ typedef typename EnumStore::FoldedComparatorType FoldedComparatorType;
+ typedef btree::EntryRef EntryRef;
+ typedef typename Parent::PostingMap PostingMap;
+ typedef typename Parent::PostingList PostingList;
+ typedef typename PostingList::Iterator PostingIterator;
+ typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector;
+
+ using Parent::clearAllPostings;
+ using Parent::updatePostings;
+ using Parent::fillPostingsFixupEnumBase;
+ using Parent::clearPostings;
+ using Parent::_postingList;
+ using Parent::_attr;
+ using Parent::_dict;
+
+private:
+ EnumStore &_es;
+
+
+public:
+ PostingListAttributeSubBase(AttributeVector &attr,
+ EnumStore &enumStore);
+
+ virtual
+ ~PostingListAttributeSubBase(void);
+
+ void
+ handleFillPostings(LoadedVector &loaded);
+
+ virtual void
+ updatePostings(PostingMap &changePost);
+
+ void
+ printPostingListContent(vespalib::asciistream & os) const;
+
+ virtual void
+ clearPostings(attribute::IAttributeVector::EnumHandle eidx,
+ uint32_t fromLid,
+ uint32_t toLid);
+};
+
+
+extern template class PostingListAttributeBase<AttributePosting>;
+extern template class PostingListAttributeBase<AttributeWeightPosting>;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp
new file mode 100644
index 00000000000..2a64a4d2bdc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp
@@ -0,0 +1,93 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "postinglistsearchcontext.h"
+#include "postinglistsearchcontext.hpp"
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include "attributeiterators.hpp"
+
+
+namespace search
+{
+
+namespace attribute
+{
+
+using btree::BTreeNode;
+
+PostingListSearchContext::
+PostingListSearchContext(const Dictionary &dictionary,
+ uint32_t docIdLimit,
+ uint64_t numValues,
+ bool hasWeight,
+ const EnumStoreBase &esb,
+ uint32_t minBvDocFreq,
+ bool useBitVector)
+ : _frozenDictionary(dictionary.getFrozenView()),
+ _lowerDictItr(BTreeNode::Ref(), dictionary.getAllocator()),
+ _upperDictItr(BTreeNode::Ref(), dictionary.getAllocator()),
+ _uniqueValues(0u),
+ _docIdLimit(docIdLimit),
+ _dictSize(_frozenDictionary.size()),
+ _numValues(numValues),
+ _hasWeight(hasWeight),
+ _useBitVector(useBitVector),
+ _pidx(),
+ _frozenRoot(),
+ _FSTC(0.0),
+ _PLSTC(0.0),
+ _esb(esb),
+ _minBvDocFreq(minBvDocFreq),
+ _gbv(nullptr)
+{
+}
+
+
+PostingListSearchContext::~PostingListSearchContext(void)
+{
+}
+
+
+void
+PostingListSearchContext::lookupTerm(const EnumStoreComparator &comp)
+{
+ _lowerDictItr.lower_bound(_frozenDictionary.getRoot(), EnumIndex(), comp);
+ _upperDictItr = _lowerDictItr;
+ if (_upperDictItr.valid() && !comp(EnumIndex(), _upperDictItr.getKey())) {
+ ++_upperDictItr;
+ _uniqueValues = 1u;
+ }
+}
+
+
+void
+PostingListSearchContext::lookupRange(const EnumStoreComparator &low,
+ const EnumStoreComparator &high)
+{
+ _lowerDictItr.lower_bound(_frozenDictionary.getRoot(), EnumIndex(), low);
+ _upperDictItr = _lowerDictItr;
+ if (_upperDictItr.valid() && !high(EnumIndex(), _upperDictItr.getKey())) {
+ _upperDictItr.seekPast(EnumIndex(), high);
+ }
+ _uniqueValues = _upperDictItr - _lowerDictItr;
+}
+
+
+void
+PostingListSearchContext::lookupSingle(void)
+{
+ if (_lowerDictItr.valid()) {
+ _pidx = _lowerDictItr.getData();
+ }
+}
+
+template class PostingListSearchContextT<btree::BTreeNoLeafData>;
+template class PostingListSearchContextT<int32_t>;
+template class PostingListFoldedSearchContextT<btree::BTreeNoLeafData>;
+template class PostingListFoldedSearchContextT<int32_t>;
+
+
+} // namespace attribute
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
new file mode 100644
index 00000000000..f857c93049b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
@@ -0,0 +1,388 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "enumstore.h"
+#include "postinglisttraits.h"
+#include "postingstore.h"
+#include "ipostinglistsearchcontext.h"
+#include <vespa/searchlib/common/bitvector.h>
+#include "attributevector.h"
+#include <vespa/vespalib/util/regexp.h>
+
+namespace search
+{
+
+namespace attribute
+{
+
+
+/**
+ * Search context helper for posting list attributes, used to instantiate
+ * iterators based on posting lists instead of brute force filtering search.
+ */
+
+class PostingListSearchContext : public IPostingListSearchContext
+{
+protected:
+ typedef EnumPostingTree Dictionary;
+ typedef Dictionary::ConstIterator DictionaryConstIterator;
+ typedef Dictionary::FrozenView FrozenDictionary;
+ typedef EnumStoreBase::Index EnumIndex;
+
+ const FrozenDictionary _frozenDictionary;
+ DictionaryConstIterator _lowerDictItr;
+ DictionaryConstIterator _upperDictItr;
+ uint32_t _uniqueValues;
+ uint32_t _docIdLimit;
+ uint32_t _dictSize;
+ uint64_t _numValues; // attr.getStatus().getNumValues();
+ bool _hasWeight;
+ bool _useBitVector;
+ search::btree::EntryRef _pidx;
+ search::btree::EntryRef _frozenRoot; // Posting list in tree form
+ float _FSTC; // Filtering Search Time Constant
+ float _PLSTC; // Posting List Search Time Constant
+ const EnumStoreBase &_esb;
+ uint32_t _minBvDocFreq;
+ const GrowableBitVector *_gbv; // bitvector if _useBitVector has been set
+
+
+ PostingListSearchContext(const Dictionary &dictionary,
+ uint32_t docIdLimit,
+ uint64_t numValues,
+ bool hasWeight,
+ const EnumStoreBase &esb,
+ uint32_t minBvDocFreq,
+ bool useBitVector);
+
+ ~PostingListSearchContext(void);
+
+ void lookupTerm(const EnumStoreComparator &comp);
+ void lookupRange(const EnumStoreComparator &low, const EnumStoreComparator &high);
+ void lookupSingle(void);
+ virtual bool useThis(const DictionaryConstIterator & it) const {
+ (void) it;
+ return true;
+ }
+
+ float calculateFilteringCost(void) const {
+ // filtering search time (ms) ~ FSTC * numValues; (FSTC =
+ // Filtering Search Time Constant)
+ return _FSTC * _numValues;
+ }
+
+ float calculatePostingListCost(uint32_t approxNumHits) const {
+ // search time (ms) ~ PLSTC * numHits * log(numHits); (PLSTC =
+ // Posting List Search Time Constant)
+ return _PLSTC * approxNumHits;
+ }
+
+ uint32_t calculateApproxNumHits(void) const {
+ float docsPerUniqueValue = static_cast<float>(_docIdLimit) /
+ static_cast<float>(_dictSize);
+ return static_cast<uint32_t>(docsPerUniqueValue * _uniqueValues);
+ }
+
+ virtual bool fallbackToFiltering(void) const {
+ uint32_t numHits = calculateApproxNumHits();
+ // numHits > 1000: make sure that posting lists are unit tested.
+ return (numHits > 1000) &&
+ (calculateFilteringCost() < calculatePostingListCost(numHits));
+ }
+
+public:
+};
+
+
+template <class DataT>
+class PostingListSearchContextT : public PostingListSearchContext
+{
+protected:
+ typedef DataT DataType;
+ typedef PostingListTraits<DataType> Traits;
+ typedef typename Traits::PostingList PostingList;
+ typedef typename Traits::Posting Posting;
+ typedef std::vector<Posting> PostingVector;
+ typedef btree::EntryRef EntryRef;
+ typedef typename PostingList::ConstIterator PostingConstIterator;
+
+ const PostingList &_postingList;
+ /*
+ * Synthetic posting lists for range search, in array or bitvector form
+ */
+ PostingVector _array;
+ BitVector::UP _bitVector;
+ bool _fetchPostingsDone;
+ bool _arrayValid;
+
+ static const long MIN_UNIQUE_VALUES_BEFORE_APPROXIMATION = 100;
+ static const long MIN_UNIQUE_VALUES_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION = 20;
+ static const long MIN_APPROXHITS_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION = 10;
+
+ PostingListSearchContextT(const Dictionary &dictionary,
+ uint32_t docIdLimit,
+ uint64_t numValues,
+ bool hasWeight,
+ const PostingList &postingList,
+ const EnumStoreBase &esb,
+ uint32_t minBvCocFreq,
+ bool useBitVector);
+
+ void lookupSingle(void);
+ size_t countHits(void) const;
+ void fillArray(size_t numDocs);
+ void fillBitVector(void);
+
+ PostingVector &
+ merge(PostingVector &v, PostingVector &temp,
+ const std::vector<size_t> & startPos) __attribute__((noinline));
+
+ void fetchPostings(bool strict) override;
+ // this will be called instead of the fetchPostings function in some cases
+ void diversify(bool forward, size_t wanted_hits,
+ const IAttributeVector &diversity_attr, size_t max_per_group,
+ size_t cutoff_groups, bool cutoff_strict);
+
+ queryeval::SearchIterator::UP
+ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) override;
+
+ unsigned int singleHits(void) const;
+ unsigned int approximateHits(void) const override;
+ void applyRangeLimit(int rangeLimit);
+};
+
+
+template <class DataT>
+class PostingListFoldedSearchContextT : public PostingListSearchContextT<DataT>
+{
+protected:
+ typedef PostingListSearchContextT<DataT> Parent;
+ typedef typename Parent::Dictionary Dictionary;
+ typedef typename Parent::PostingList PostingList;
+ using Parent::_lowerDictItr;
+ using Parent::_uniqueValues;
+ using Parent::_postingList;
+ using Parent::_docIdLimit;
+ using Parent::countHits;
+ using Parent::singleHits;
+
+ PostingListFoldedSearchContextT(const Dictionary &dictionary,
+ uint32_t docIdLimit,
+ uint64_t numValues,
+ bool hasWeight,
+ const PostingList &postingList,
+ const EnumStoreBase &esb,
+ uint32_t minBvCocFreq,
+ bool useBitVector);
+
+ unsigned int approximateHits(void) const override;
+};
+
+
+template <typename BaseSC, typename BaseSC2, typename AttrT>
+class PostingSearchContext: public BaseSC,
+ public BaseSC2
+{
+public:
+ typedef typename AttrT::EnumStore EnumStore;
+protected:
+ const AttrT &_toBeSearched;
+ const EnumStore &_enumStore;
+
+ PostingSearchContext(QueryTermSimple::UP qTerm, bool useBitVector, const AttrT &toBeSearched);
+};
+
+template <typename BaseSC, typename AttrT, typename DataT>
+class StringPostingSearchContext
+ : public PostingSearchContext<BaseSC, PostingListFoldedSearchContextT<DataT>, AttrT>
+{
+private:
+ typedef PostingListTraits<DataT> AggregationTraits;
+ typedef typename AggregationTraits::PostingList PostingList;
+ typedef typename PostingList::Iterator PostingIterator;
+ typedef typename PostingList::ConstIterator PostingConstIterator;
+ typedef PostingSearchContext<BaseSC, PostingListFoldedSearchContextT<DataT>, AttrT>
+ Parent;
+ typedef typename Parent::EnumStore EnumStore;
+ typedef typename EnumStore::FoldedComparatorType FoldedComparatorType;
+ typedef vespalib::Regexp Regexp;
+ using Parent::_toBeSearched;
+ using Parent::_enumStore;
+ using Parent::getRegex;
+ bool useThis(const PostingListSearchContext::DictionaryConstIterator & it) const override {
+ return getRegex() ? getRegex()->match(_enumStore.getValue(it.getKey())) : true;
+ }
+public:
+ StringPostingSearchContext(QueryTermSimple::UP qTerm, bool useBitVector, const AttrT &toBeSearched);
+};
+
+template <typename BaseSC, typename AttrT, typename DataT>
+class NumericPostingSearchContext
+ : public PostingSearchContext<BaseSC, PostingListSearchContextT<DataT>, AttrT>
+{
+private:
+ typedef PostingSearchContext<BaseSC, PostingListSearchContextT<DataT>, AttrT> Parent;
+ typedef PostingListTraits<DataT> AggregationTraits;
+ typedef typename AggregationTraits::PostingList PostingList;
+ typedef typename PostingList::Iterator PostingIterator;
+ typedef typename PostingList::ConstIterator PostingConstIterator;
+ typedef typename Parent::EnumStore EnumStore;
+ typedef typename EnumStore::ComparatorType ComparatorType;
+ typedef typename AttrT::T BaseType;
+ typedef typename Parent::Params Params;
+ using Parent::_low;
+ using Parent::_high;
+ using Parent::_toBeSearched;
+ using Parent::_enumStore;
+ Params _params;
+
+ void getIterators(bool shouldApplyRangeLimit);
+ bool valid() const override { return this->isValid(); }
+
+ bool fallbackToFiltering(void) const override {
+ return (this->getRangeLimit() != 0)
+ ? false
+ : Parent::fallbackToFiltering();
+ }
+ unsigned int approximateHits(void) const override {
+ const unsigned int estimate = PostingListSearchContextT<DataT>::approximateHits();
+ const unsigned int limit = std::abs(this->getRangeLimit());
+ return ((limit > 0) && (limit < estimate))
+ ? limit
+ : estimate;
+ }
+ void fetchPostings(bool strict) override {
+ if (params().diversityAttribute() != nullptr) {
+ bool forward = (this->getRangeLimit() > 0);
+ size_t wanted_hits = std::abs(this->getRangeLimit());
+ PostingListSearchContextT<DataT>::diversify(forward, wanted_hits,
+ *(params().diversityAttribute()), this->getMaxPerGroup(),
+ params().diversityCutoffGroups(), params().diversityCutoffStrict());
+ } else {
+ PostingListSearchContextT<DataT>::fetchPostings(strict);
+ }
+ }
+
+public:
+ NumericPostingSearchContext(QueryTermSimple::UP qTerm, const Params & params, const AttrT &toBeSearched);
+ const Params &params() const { return _params; }
+};
+
+
+template <typename BaseSC, typename BaseSC2, typename AttrT>
+PostingSearchContext<BaseSC, BaseSC2, AttrT>::
+PostingSearchContext(QueryTermSimple::UP qTerm, bool useBitVector, const AttrT &toBeSearched)
+ : BaseSC(std::move(qTerm), toBeSearched),
+ BaseSC2(toBeSearched.getEnumStore().getPostingDictionary(),
+ toBeSearched.getCommittedDocIdLimit(),
+ toBeSearched.getStatus().getNumValues(),
+ toBeSearched.hasWeightedSetType(),
+ toBeSearched.getPostingList(),
+ toBeSearched.getEnumStore(),
+ toBeSearched._postingList._minBvDocFreq,
+ useBitVector),
+ _toBeSearched(toBeSearched),
+ _enumStore(_toBeSearched.getEnumStore())
+{
+ this->_plsc = static_cast<attribute::IPostingListSearchContext *>(this);
+}
+
+
+template <typename BaseSC, typename AttrT, typename DataT>
+StringPostingSearchContext<BaseSC, AttrT, DataT>::
+StringPostingSearchContext(QueryTermSimple::UP qTerm, bool useBitVector, const AttrT &toBeSearched)
+ : Parent(std::move(qTerm), useBitVector, toBeSearched)
+{
+ // after benchmarking prefix search performance on single, array, and weighted set fast-aggregate string attributes
+ // with 1M values the following constant has been derived:
+ this->_FSTC = 0.000028;
+
+ // after benchmarking prefix search performance on single, array, and weighted set fast-search string attributes
+ // with 1M values the following constant has been derived:
+ this->_PLSTC = 0.000000;
+
+ if (this->valid()) {
+ if (this->isPrefix()) {
+ FoldedComparatorType comp(_enumStore, this->queryTerm().getTerm(), true);
+ this->lookupRange(comp, comp);
+ } else if (this->isRegex()) {
+ vespalib::string prefix(Regexp::get_prefix(this->queryTerm().getTerm()));
+ FoldedComparatorType comp(_enumStore, prefix.c_str(), true);
+ this->lookupRange(comp, comp);
+ } else {
+ FoldedComparatorType comp(_enumStore, this->queryTerm().getTerm());
+ this->lookupTerm(comp);
+ }
+ if (this->_uniqueValues == 1u) {
+ this->lookupSingle();
+ }
+ }
+}
+
+
+template <typename BaseSC, typename AttrT, typename DataT>
+NumericPostingSearchContext<BaseSC, AttrT, DataT>::
+NumericPostingSearchContext(QueryTermSimple::UP qTerm, const Params & params_in, const AttrT &toBeSearched)
+ : Parent(std::move(qTerm), params_in.useBitVector(), toBeSearched),
+ _params(params_in)
+{
+ // after simplyfying the formula and simple benchmarking and thumbs in the air
+ // a ratio of 8 between numvalues and estimated number of hits has been found.
+ this->_FSTC = 1;
+
+ this->_PLSTC = 8;
+ if (valid()) {
+ if (_low == _high) {
+ ComparatorType comp(_enumStore, _low);
+ this->lookupTerm(comp);
+ } else if (_low < _high) {
+ bool shouldApplyRangeLimit = (params().diversityAttribute() == nullptr) &&
+ (this->getRangeLimit() != 0);
+ getIterators( shouldApplyRangeLimit );
+ }
+ if (this->_uniqueValues == 1u) {
+ this->lookupSingle();
+ }
+ }
+}
+
+
+template <typename BaseSC, typename AttrT, typename DataT>
+void
+NumericPostingSearchContext<BaseSC, AttrT, DataT>::
+getIterators(bool shouldApplyRangeLimit)
+{
+ bool isFloat =
+ _toBeSearched.getBasicType() == BasicType::FLOAT ||
+ _toBeSearched.getBasicType() == BasicType::DOUBLE;
+ bool isUnsigned = _toBeSearched.getInternalBasicType().isUnsigned();
+ search::Range<BaseType> capped = this->template cappedRange<BaseType>(isFloat, isUnsigned);
+
+ ComparatorType compLow(_enumStore, capped.lower());
+ ComparatorType compHigh(_enumStore, capped.upper());
+
+ this->lookupRange(compLow, compHigh);
+ if (shouldApplyRangeLimit) {
+ this->applyRangeLimit(this->getRangeLimit());
+ }
+
+ if (this->_lowerDictItr != this->_upperDictItr) {
+ _low = _enumStore.getValue(this->_lowerDictItr.getKey());
+ auto last = this->_upperDictItr;
+ --last;
+ _high = _enumStore.getValue(last.getKey());
+ }
+}
+
+
+
+extern template class PostingListSearchContextT<btree::BTreeNoLeafData>;
+extern template class PostingListSearchContextT<int32_t>;
+extern template class PostingListFoldedSearchContextT<btree::BTreeNoLeafData>;
+extern template class PostingListFoldedSearchContextT<int32_t>;
+
+} // namespace attribute
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
new file mode 100644
index 00000000000..295feae75a6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
@@ -0,0 +1,388 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include "dociditerator.h"
+#include "attributeiterators.h"
+#include <vespa/searchlib/common/bitvectoriterator.h>
+#include "diversity.h"
+
+namespace search
+{
+
+using queryeval::EmptySearch;
+using queryeval::SearchIterator;
+
+namespace attribute
+{
+
+template <typename DataT>
+PostingListSearchContextT<DataT>::
+PostingListSearchContextT(const Dictionary &dictionary,
+ uint32_t docIdLimit,
+ uint64_t numValues,
+ bool hasWeight,
+ const PostingList &postingList,
+ const EnumStoreBase &esb,
+ uint32_t minBvDocFreq,
+ bool useBitVector)
+ : PostingListSearchContext(dictionary, docIdLimit, numValues, hasWeight,
+ esb, minBvDocFreq, useBitVector),
+ _postingList(postingList),
+ _array(),
+ _bitVector(),
+ _fetchPostingsDone(false),
+ _arrayValid(false)
+{
+}
+
+
+template <typename DataT>
+void
+PostingListSearchContextT<DataT>::lookupSingle(void)
+{
+ PostingListSearchContext::lookupSingle();
+ if (!_pidx.valid())
+ return;
+ uint32_t typeId = _postingList.getTypeId(_pidx);
+ if (!_postingList.isSmallArray(typeId)) {
+ if (_postingList.isBitVector(typeId)) {
+ const BitVectorEntry *bve = _postingList.getBitVectorEntry(_pidx);
+ const GrowableBitVector *bv = bve->_bv.get();
+ if (_useBitVector) {
+ _gbv = bv;
+ } else {
+ _pidx = bve->_tree;
+ if (_pidx.valid()) {
+ typename PostingList::BTreeType::FrozenView
+ frozenView(_postingList.getTreeEntry(_pidx)->
+ getFrozenView(_postingList.getAllocator()));
+ _frozenRoot = frozenView.getRoot();
+ if (!_frozenRoot.valid()) {
+ _pidx = btree::EntryRef();
+ }
+ } else {
+ _gbv = bv;
+ }
+ }
+ } else {
+ typename PostingList::BTreeType::FrozenView
+ frozenView(_postingList.getTreeEntry(_pidx)->
+ getFrozenView(_postingList.getAllocator()));
+ _frozenRoot = frozenView.getRoot();
+ if (!_frozenRoot.valid()) {
+ _pidx = btree::EntryRef();
+ }
+ }
+ }
+}
+
+
+template <typename DataT>
+size_t
+PostingListSearchContextT<DataT>::countHits(void) const
+{
+ size_t sum(0);
+ for (auto it(_lowerDictItr); it != _upperDictItr; ++it) {
+ if (useThis(it)) {
+ sum += _postingList.frozenSize(it.getData());
+ }
+ }
+ return sum;
+}
+
+
+template <typename DataT>
+void
+PostingListSearchContextT<DataT>::fillArray(size_t numDocs)
+{
+ _array.clear();
+ _array.reserve(numDocs);
+ std::vector<size_t> startPos;
+ startPos.reserve(_uniqueValues + 1);
+ startPos.push_back(0);
+ for (auto it(_lowerDictItr); it != _upperDictItr; ++it) {
+ if (useThis(it)) {
+ _postingList.foreach_frozen(it.getData(),
+ [&](uint32_t key, const DataT &data)
+ { _array.push_back(Posting(key, data));
+ });
+ startPos.push_back(_array.size());
+ }
+ }
+ if (startPos.size() > 2) {
+ PostingVector temp(_array.size());
+ _array.swap(merge(_array, temp, startPos));
+ }
+ _arrayValid = true;
+}
+
+
+template <typename DataT>
+void
+PostingListSearchContextT<DataT>::fillBitVector(void)
+{
+ _bitVector = BitVector::create(_docIdLimit);
+ BitVector &bv(*_bitVector);
+ uint32_t limit = bv.size();
+ for (auto it(_lowerDictItr); it != _upperDictItr; ++it) {
+ if (useThis(it)) {
+ _postingList.foreach_frozen_key(it.getData(),
+ [&](uint32_t key)
+ { if (key < limit) {
+ bv.setBit(key);
+ }
+ });
+ }
+ }
+ bv.invalidateCachedCount();
+}
+
+
+template <typename DataT>
+typename PostingListSearchContextT<DataT>::PostingVector &
+PostingListSearchContextT<DataT>::
+merge(PostingVector &v, PostingVector &temp,
+ const std::vector<size_t> &startPos)
+{
+ std::vector<size_t> nextStartPos;
+ nextStartPos.reserve((startPos.size() + 1) / 2);
+ nextStartPos.push_back(0);
+ for (size_t i(0), m((startPos.size() - 1) / 2); i < m; i++) {
+ size_t aStart = startPos[i * 2 + 0];
+ size_t aEnd = startPos[i * 2 + 1];
+ size_t bStart = aEnd;
+ size_t bEnd = startPos[i * 2 + 2];
+ typename PostingVector::const_iterator it = v.begin();
+ std::merge(it + aStart, it + aEnd,
+ it + bStart, it + bEnd,
+ temp.begin() + aStart);
+ nextStartPos.push_back(bEnd);
+ }
+ if ((startPos.size() - 1) % 2) {
+ for (size_t i(startPos[startPos.size() - 2]), m(v.size()); i < m; i++) {
+ temp[i] = v[i];
+ }
+ nextStartPos.push_back(temp.size());
+ }
+ return (nextStartPos.size() > 2) ? merge(temp, v, nextStartPos) : temp;
+}
+
+
+template <typename DataT>
+void
+PostingListSearchContextT<DataT>::fetchPostings(bool strict)
+{
+ assert(!_fetchPostingsDone);
+ _fetchPostingsDone = true;
+ if (_uniqueValues < 2u) {
+ return;
+ }
+ if (strict && !fallbackToFiltering()) {
+ size_t sum(countHits());
+ if (sum < _docIdLimit / 64) {
+ fillArray(sum);
+ } else {
+ fillBitVector();
+ }
+ }
+}
+
+
+template <typename DataT>
+void
+PostingListSearchContextT<DataT>::diversify(bool forward, size_t wanted_hits,
+ const IAttributeVector &diversity_attr, size_t max_per_group,
+ size_t cutoff_groups, bool cutoff_strict)
+{
+ assert(!_fetchPostingsDone);
+ _fetchPostingsDone = true;
+ _array.clear();
+ _array.reserve(wanted_hits);
+ std::vector<size_t> fragments;
+ fragments.push_back(0);
+ diversity::diversify(forward, _lowerDictItr, _upperDictItr, _postingList, wanted_hits,
+ diversity_attr, max_per_group, cutoff_groups, cutoff_strict,
+ _array, fragments);
+ if (fragments.size() > 2) {
+ PostingVector temp(_array.size());
+ _array.swap(merge(_array, temp, fragments));
+ }
+ _arrayValid = true;
+}
+
+
+template <typename DataT>
+SearchIterator::UP
+PostingListSearchContextT<DataT>::
+createPostingIterator(fef::TermFieldMatchData *matchData, bool strict)
+{
+ assert(_fetchPostingsDone);
+ if (_uniqueValues == 0u) {
+ return SearchIterator::UP(new EmptySearch());
+ }
+ if (_arrayValid || (_bitVector.get() != nullptr)) { // synthetic results are available
+ if (!_array.empty()) {
+ assert(_arrayValid);
+ typedef DocIdIterator<Posting> DocIt;
+ DocIt postings;
+ postings.set(&_array[0], &_array[_array.size()]);
+ return (_postingList._isFilter)
+ ? SearchIterator::UP(new FilterAttributePostingListIteratorT<DocIt>(postings, matchData))
+ : SearchIterator::UP(new AttributePostingListIteratorT<DocIt>(postings, _hasWeight, matchData));
+ }
+ if (_arrayValid) {
+ return SearchIterator::UP(new EmptySearch());
+ }
+ BitVector *bv(_bitVector.get());
+ assert(bv != nullptr);
+ return search::BitVectorIterator::create(bv, bv->size(), *matchData, strict);
+ }
+ if (_uniqueValues == 1) {
+ if (_gbv != nullptr) {
+ return BitVectorIterator::create(_gbv, std::min(_gbv->size(), _docIdLimit), *matchData, strict);
+ }
+ if (!_pidx.valid()) {
+ return SearchIterator::UP(new EmptySearch());
+ }
+ const PostingList &postingList = _postingList;
+ if (!_frozenRoot.valid()) {
+ uint32_t clusterSize = _postingList.getClusterSize(_pidx);
+ assert(clusterSize != 0);
+ typedef DocIdMinMaxIterator<Posting> DocIt;
+ DocIt postings;
+ const Posting *array = postingList.getKeyDataEntry(_pidx, clusterSize);
+ postings.set(array, array + clusterSize);
+ return (postingList._isFilter)
+ ? SearchIterator::UP(new FilterAttributePostingListIteratorT<DocIt>(postings, matchData))
+ : SearchIterator::UP(new AttributePostingListIteratorT<DocIt>(postings, _hasWeight, matchData));
+ }
+ typename PostingList::BTreeType::FrozenView frozenView(_frozenRoot, postingList.getAllocator());
+ PostingConstIterator postings = frozenView.begin();
+ return (_postingList._isFilter)
+ ? SearchIterator::UP(new FilterAttributePostingListIteratorT<PostingConstIterator> (postings, matchData))
+ : SearchIterator::UP(new AttributePostingListIteratorT<PostingConstIterator> (postings, _hasWeight, matchData));
+ }
+ // returning nullptr will trigger fallback to filter iterator
+ return SearchIterator::UP();
+}
+
+
+template <typename DataT>
+unsigned int
+PostingListSearchContextT<DataT>::singleHits(void) const
+{
+ if (_gbv) {
+ const GrowableBitVector *bv = _gbv;
+ uint32_t extraGuards = bv->capacity() - bv->size();
+ // Some inaccuracy is expected, data changes underfeet
+ int32_t res = bv->countTrueBits() - extraGuards;
+ if (res < 1)
+ res = 1;
+ return res;
+ }
+ if (!_pidx.valid())
+ return 0u;
+ if (!_frozenRoot.valid())
+ return _postingList.getClusterSize(_pidx);
+ typename PostingList::BTreeType::FrozenView
+ frozenView(_frozenRoot, _postingList.getAllocator());
+ return frozenView.size();
+}
+
+template <typename DataT>
+unsigned int
+PostingListSearchContextT<DataT>::approximateHits(void) const
+{
+ unsigned int numHits = 0;
+ if (_uniqueValues == 0u) {
+ } else if (_uniqueValues == 1u) {
+ numHits = singleHits();
+ } else {
+ if (this->fallbackToFiltering()) {
+ numHits = _docIdLimit;
+ } else if (_uniqueValues > MIN_UNIQUE_VALUES_BEFORE_APPROXIMATION) {
+ if ((_uniqueValues *
+ MIN_UNIQUE_VALUES_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION >
+ static_cast<int>(_docIdLimit)) ||
+ (this->calculateApproxNumHits() *
+ MIN_APPROXHITS_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION >
+ _docIdLimit)) {
+ numHits = this->calculateApproxNumHits();
+ } else {
+ // XXX: Unsafe
+ numHits = countHits();
+ }
+ } else {
+ // XXX: Unsafe
+ numHits = countHits();
+ }
+ }
+ return numHits;
+}
+
+
+template <typename DataT>
+void
+PostingListSearchContextT<DataT>::applyRangeLimit(int rangeLimit)
+{
+ if (rangeLimit > 0) {
+ DictionaryConstIterator middle = _lowerDictItr;
+ for (int n(0); (n < rangeLimit) && (middle != _upperDictItr); ++middle) {
+ n += _postingList.frozenSize(middle.getData());
+ }
+ _upperDictItr = middle;
+ _uniqueValues = _upperDictItr - _lowerDictItr;
+ } else if ((rangeLimit < 0) && (_lowerDictItr != _upperDictItr)) {
+ rangeLimit = -rangeLimit;
+ DictionaryConstIterator middle = _upperDictItr;
+ for (int n(0); (n < rangeLimit) && (middle != _lowerDictItr); ) {
+ --middle;
+ n += _postingList.frozenSize(middle.getData());
+ }
+ _lowerDictItr = middle;
+ _uniqueValues = _upperDictItr - _lowerDictItr;
+ }
+}
+
+
+template <typename DataT>
+PostingListFoldedSearchContextT<DataT>::
+PostingListFoldedSearchContextT(const Dictionary &dictionary,
+ uint32_t docIdLimit,
+ uint64_t numValues,
+ bool hasWeight,
+ const PostingList &postingList,
+ const EnumStoreBase &esb,
+ uint32_t minBvDocFreq,
+ bool useBitVector)
+ : Parent(dictionary, docIdLimit, numValues, hasWeight, postingList,
+ esb, minBvDocFreq, useBitVector)
+{
+}
+
+
+template <typename DataT>
+unsigned int
+PostingListFoldedSearchContextT<DataT>::approximateHits(void) const
+{
+ unsigned int numHits = 0;
+ if (_uniqueValues == 0u) {
+ } else if (_uniqueValues == 1u) {
+ numHits = singleHits();
+ } else {
+ if (this->fallbackToFiltering()) {
+ numHits = _docIdLimit;
+ } else {
+ // XXX: Unsafe
+ numHits = countHits();
+ }
+ }
+ return numHits;
+}
+
+} // namespace attribute
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglisttraits.cpp b/searchlib/src/vespa/searchlib/attribute/postinglisttraits.cpp
new file mode 100644
index 00000000000..38bf41d4c49
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/postinglisttraits.cpp
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "postinglisttraits.h"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.postinglisttraits");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglisttraits.h b/searchlib/src/vespa/searchlib/attribute/postinglisttraits.h
new file mode 100644
index 00000000000..c2504f779a1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/postinglisttraits.h
@@ -0,0 +1,56 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/btree/btreestore.h>
+
+namespace search
+{
+
+namespace attribute
+{
+
+template <typename DataT> class PostingListTraits;
+template <typename DataT> class PostingStore;
+
+template <>
+class PostingListTraits<btree::BTreeNoLeafData>
+{
+public:
+ typedef btree::NoAggregated AggregatedType;
+ typedef btree::NoAggrCalc AggrCalcType;
+ typedef btree::BTreeStore<uint32_t, btree::BTreeNoLeafData,
+ AggregatedType,
+ std::less<uint32_t>,
+ btree::BTreeDefaultTraits,
+ AggrCalcType> PostingStoreBase;
+ typedef PostingStore<btree::BTreeNoLeafData> PostingList;
+ typedef PostingStoreBase::KeyDataType Posting;
+};
+
+
+template <>
+class PostingListTraits<int32_t>
+{
+public:
+ typedef btree::MinMaxAggregated AggregatedType;
+ typedef btree::MinMaxAggrCalc AggrCalcType;
+ typedef btree::BTreeStore<uint32_t, int32_t,
+ AggregatedType,
+ std::less<uint32_t>,
+ btree::BTreeDefaultTraits,
+ AggrCalcType> PostingStoreBase;
+ typedef PostingStore<int32_t> PostingList;
+ typedef PostingStoreBase::KeyDataType Posting;
+};
+
+
+} // namespace attribute
+
+typedef btree::BTreeKeyData<uint32_t, btree::BTreeNoLeafData> AttributePosting;
+
+typedef btree::BTreeKeyData<uint32_t, int32_t> AttributeWeightPosting;
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
new file mode 100644
index 00000000000..7902d3f8d87
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
@@ -0,0 +1,638 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "postingstore.h"
+#include <vespa/searchlib/btree/datastore.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/searchcommon/attribute/status.h>
+
+namespace search
+{
+
+namespace attribute
+{
+
+using btree::BTreeNoLeafData;
+
+// #define FORCE_BITVECTORS
+
+
+PostingStoreBase2::PostingStoreBase2(EnumPostingTree &dict, Status &status,
+ const Config &config)
+ :
+#ifdef FORCE_BITVECTORS
+ _enableBitVectors(true),
+#else
+ _enableBitVectors(config.getEnableBitVectors()),
+#endif
+ _enableOnlyBitVector(config.getEnableOnlyBitVector()),
+ _isFilter(config.getIsFilter()),
+ _bvSize(64u),
+ _bvCapacity(128u),
+ _minBvDocFreq(64),
+ _maxBvDocFreq(std::numeric_limits<uint32_t>::max()),
+ _bvs(),
+ _dict(dict),
+ _status(status),
+ _bvExtraBytes(0)
+{
+}
+
+
+PostingStoreBase2::~PostingStoreBase2()
+{
+}
+
+
+bool
+PostingStoreBase2::resizeBitVectors(uint32_t newSize, uint32_t newCapacity)
+{
+ assert(newCapacity >= newSize);
+ newSize = (newSize + 63) & ~63;
+ if (newSize >= newCapacity)
+ newSize = newCapacity;
+ if (newSize == _bvSize && newCapacity == _bvCapacity)
+ return false;
+ _minBvDocFreq = std::max(newSize >> 6, 64u);
+ _maxBvDocFreq = std::max(newSize >> 5, 128u);
+ if (_bvs.empty()) {
+ _bvSize = newSize;
+ _bvCapacity = newCapacity;
+ return false;
+ }
+ _bvSize = newSize;
+ _bvCapacity = newCapacity;
+ return removeSparseBitVectors();
+}
+
+
+template <typename DataT>
+PostingStore<DataT>::PostingStore(EnumPostingTree &dict, Status &status,
+ const Config &config)
+ : Parent(false),
+ PostingStoreBase2(dict, status, config),
+ _bvType(1, 1024u, RefType::offsetSize())
+{
+ // TODO: Add type for bitvector
+ _store.addType(&_bvType);
+ _store.initActiveBuffers();
+ _store.enableFreeLists();
+}
+
+
+template <typename DataT>
+PostingStore<DataT>::~PostingStore()
+{
+ _builder.clear();
+ _store.dropBuffers(); // Drop buffers before type handlers are dropped
+}
+
+
+template <typename DataT>
+bool
+PostingStore<DataT>::removeSparseBitVectors()
+{
+ bool res = false;
+ bool needscan = false;
+ for (auto &i : _bvs) {
+ RefType iRef(i);
+ uint32_t typeId = getTypeId(iRef);
+ assert(isBitVector(typeId));
+ BitVectorEntry *bve = getWBitVectorEntry(iRef);
+ GrowableBitVector &bv = *bve->_bv.get();
+ uint32_t docFreq = bv.countTrueBits();
+ if (bve->_tree.valid()) {
+ RefType iRef2(bve->_tree);
+ assert(isBTree(iRef2));
+ const BTreeType *tree = getTreeEntry(iRef2);
+ assert(tree->size(_allocator) == docFreq);
+ }
+ if (docFreq < _minBvDocFreq)
+ needscan = true;
+ unsigned int oldExtraSize = bv.extraByteSize();
+ if (bv.size() > _bvSize) {
+ bv.shrink(_bvSize);
+ res = true;
+ }
+ if (bv.capacity() < _bvCapacity) {
+ bv.reserve(_bvCapacity);
+ res = true;
+ }
+ if (bv.size() < _bvSize) {
+ bv.extend(_bvSize);
+ }
+ unsigned int newExtraSize = bv.extraByteSize();
+ if (oldExtraSize != newExtraSize) {
+ _bvExtraBytes = _bvExtraBytes + newExtraSize - oldExtraSize;
+ }
+ }
+ if (needscan) {
+ typedef EnumPostingTree::Iterator EnumIterator;
+ for (EnumIterator dictItr = _dict.begin(); dictItr.valid(); ++dictItr) {
+ if (!isBitVector(getTypeId(dictItr.getData())))
+ continue;
+ EntryRef ref(dictItr.getData());
+ RefType iRef(ref);
+ uint32_t typeId = getTypeId(iRef);
+ assert(isBitVector(typeId));
+ assert(_bvs.find(ref.ref() )!= _bvs.end());
+ BitVectorEntry *bve = getWBitVectorEntry(iRef);
+ BitVector &bv = *bve->_bv.get();
+ uint32_t docFreq = bv.countTrueBits();
+ if (bve->_tree.valid()) {
+ RefType iRef2(bve->_tree);
+ assert(isBTree(iRef2));
+ const BTreeType *tree = getTreeEntry(iRef2);
+ assert(tree->size(_allocator) == docFreq);
+ }
+ if (docFreq < _minBvDocFreq) {
+ dropBitVector(ref);
+ if (ref.valid()) {
+ iRef = ref;
+ typeId = getTypeId(iRef);
+ if (isBTree(typeId)) {
+ BTreeType *tree = getWTreeEntry(iRef);
+ normalizeTree(ref, tree, false);
+ }
+ }
+ _dict.thaw(dictItr);
+ dictItr.writeData(ref);
+ res = true;
+ }
+ }
+ }
+ return res;
+}
+
+
+template <typename DataT>
+void
+PostingStore<DataT>::applyNew(EntryRef &ref,
+ AddIter a,
+ AddIter ae)
+{
+ // No old data
+ assert(!ref.valid());
+ size_t additionSize(ae - a);
+ uint32_t clusterSize = additionSize;
+ if (clusterSize <= clusterLimit) {
+ applyNewArray(ref, a, ae);
+ } else if (_enableBitVectors && clusterSize >= _maxBvDocFreq) {
+ applyNewBitVector(ref, a, ae);
+ } else {
+ applyNewTree(ref, a, ae, CompareT());
+ }
+}
+
+
+template <typename DataT>
+void
+PostingStore<DataT>::makeDegradedTree(EntryRef &ref,
+ const BitVector &bv)
+{
+ assert(!ref.valid());
+ BTreeTypeRefPair tPair(allocBTree());
+ BTreeType *tree = tPair.second;
+ Builder &builder = _builder;
+ builder.reuse();
+ uint32_t docIdLimit = _bvSize;
+ assert(_bvSize == bv.size());
+ uint32_t docId = bv.getFirstTrueBit();
+ while (docId < docIdLimit) {
+ builder.insert(docId, bitVectorWeight());
+ docId = bv.getNextTrueBit(docId + 1);
+ }
+ tree->assign(builder, _allocator);
+ assert(tree->size(_allocator) == bv.countTrueBits());
+ // barrier ?
+ ref = tPair.first;
+}
+
+
+template <typename DataT>
+void
+PostingStore<DataT>::dropBitVector(EntryRef &ref)
+{
+ assert(ref.valid());
+ RefType iRef(ref);
+ uint32_t typeId = getTypeId(iRef);
+ assert(isBitVector(typeId));
+ BitVectorEntry *bve = getWBitVectorEntry(iRef);
+ AllocatedBitVector *bv = bve->_bv.get();
+ assert(bv);
+ uint32_t docFreq = bv->countTrueBits();
+ EntryRef ref2(bve->_tree);
+ if (!ref2.valid()) {
+ makeDegradedTree(ref2, *bv);
+ }
+ assert(ref2.valid());
+ assert(isBTree(ref2));
+ const BTreeType *tree = getTreeEntry(ref2);
+ assert(tree->size(_allocator) == docFreq);
+ _bvs.erase(ref.ref());
+ _store.holdElem(iRef, 1);
+ _status.decBitVectors();
+ _bvExtraBytes -= bv->extraByteSize();
+ ref = ref2;
+}
+
+
+template <typename DataT>
+void
+PostingStore<DataT>::makeBitVector(EntryRef &ref)
+{
+ assert(ref.valid());
+ RefType iRef(ref);
+ uint32_t typeId = getTypeId(iRef);
+ assert(isBTree(typeId));
+ std::shared_ptr<GrowableBitVector> bvsp;
+ vespalib::GenerationHolder &genHolder = _store.getGenerationHolder();
+ bvsp.reset(new GrowableBitVector(_bvSize, _bvCapacity, genHolder));
+ AllocatedBitVector &bv = *bvsp.get();
+ uint32_t docIdLimit = _bvSize;
+ Iterator it = begin(ref);
+ uint32_t expDocFreq = it.size();
+ (void) expDocFreq;
+ for (; it.valid(); ++it) {
+ uint32_t docId = it.getKey();
+ assert(docId < docIdLimit);
+ bv.setBit(docId);
+ }
+ bv.invalidateCachedCount();
+ assert(bv.countTrueBits() == expDocFreq);
+ BitVectorRefPair bPair(allocBitVector());
+ BitVectorEntry *bve = bPair.second;
+ if (_enableOnlyBitVector) {
+ BTreeType *tree = getWTreeEntry(iRef);
+ tree->clear(_allocator);
+ _store.holdElem(ref, 1);
+ } else {
+ bve->_tree = ref;
+ }
+ bve->_bv = bvsp;
+ _bvs.insert(bPair.first.ref());
+ _status.incBitVectors();
+ _bvExtraBytes += bv.extraByteSize();
+ // barrier ?
+ ref = bPair.first;
+}
+
+
+template <typename DataT>
+void
+PostingStore<DataT>::applyNewBitVector(EntryRef &ref,
+ AddIter aOrg,
+ AddIter ae)
+{
+ assert(!ref.valid());
+ RefType iRef(ref);
+ std::shared_ptr<GrowableBitVector> bvsp;
+ vespalib::GenerationHolder &genHolder = _store.getGenerationHolder();
+ bvsp.reset(new GrowableBitVector(_bvSize, _bvCapacity, genHolder));
+ AllocatedBitVector &bv = *bvsp.get();
+ uint32_t docIdLimit = _bvSize;
+ uint32_t expDocFreq = ae - aOrg;
+ (void) expDocFreq;
+ for (AddIter a = aOrg; a != ae; ++a) {
+ uint32_t docId = a->_key;
+ assert(docId < docIdLimit);
+ bv.setBit(docId);
+ }
+ bv.invalidateCachedCount();
+ assert(bv.countTrueBits() == expDocFreq);
+ BitVectorRefPair bPair(allocBitVector());
+ BitVectorEntry *bve = bPair.second;
+ if (!_enableOnlyBitVector) {
+ applyNewTree(bve->_tree, aOrg, ae, CompareT());
+ }
+ bve->_bv = bvsp;
+ _bvs.insert(bPair.first.ref());
+ _status.incBitVectors();
+ _bvExtraBytes += bv.extraByteSize();
+ // barrier ?
+ ref = bPair.first;
+}
+
+
+template <typename DataT>
+void
+PostingStore<DataT>::apply(BitVector &bv,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re)
+{
+ while (a != ae || r != re) {
+ if (r != re && (a == ae || *r < a->_key)) {
+ // remove
+ assert(*r < bv.size());
+ bv.slowClearBit(*r);
+ ++r;
+ } else {
+ if (r != re && !(a->_key < *r)) {
+ // update or add
+ assert(a->_key < bv.size());
+ bv.slowSetBit(a->_key);
+ ++r;
+ } else {
+ assert(a->_key < bv.size());
+ bv.slowSetBit(a->_key);
+ }
+ ++a;
+ }
+ }
+}
+
+
+template <typename DataT>
+void
+PostingStore<DataT>::apply(EntryRef &ref,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re)
+{
+ if (!ref.valid()) {
+ // No old data
+ applyNew(ref, a, ae);
+ return;
+ }
+ RefType iRef(ref);
+ bool wasArray = false;
+ uint32_t typeId = getTypeId(iRef);
+ uint32_t clusterSize = getClusterSize(typeId);
+ if (clusterSize != 0) {
+ wasArray = true;
+ if (applyCluster(ref, clusterSize, a, ae, r, re, CompareT()))
+ return;
+ iRef = ref;
+ typeId = getTypeId(iRef);
+ }
+ // Old data was tree or has been converted to a tree
+ // ... or old data was bitvector
+ if (isBitVector(typeId)) {
+ BitVectorEntry *bve = getWBitVectorEntry(iRef);
+ EntryRef ref2(bve->_tree);
+ RefType iRef2(ref2);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ BTreeType *tree = getWTreeEntry(iRef2);
+ applyTree(tree, a, ae, r, re, CompareT());
+ }
+ BitVector *bv = bve->_bv.get();
+ assert(bv);
+ apply(*bv, a, ae, r, re);
+ uint32_t docFreq = bv->countTrueBits();
+ if (docFreq < _minBvDocFreq) {
+ dropBitVector(ref);
+ if (ref.valid()) {
+ iRef = ref;
+ typeId = getTypeId(iRef);
+ if (isBTree(typeId)) {
+ BTreeType *tree = getWTreeEntry(iRef);
+ assert(tree->size(_allocator) == docFreq);
+ normalizeTree(ref, tree, wasArray);
+ }
+ }
+ }
+ } else {
+ BTreeType *tree = getWTreeEntry(iRef);
+ applyTree(tree, a, ae, r, re, CompareT());
+ if (_enableBitVectors) {
+ uint32_t docFreq = tree->size(_allocator);
+ if (docFreq >= _maxBvDocFreq) {
+ makeBitVector(ref);
+ return;
+ }
+ }
+ normalizeTree(ref, tree, wasArray);
+ }
+}
+
+
+template <typename DataT>
+size_t
+PostingStore<DataT>::internalSize(uint32_t typeId, const RefType & iRef) const
+{
+ if (isBitVector(typeId)) {
+ const BitVectorEntry *bve = getBitVectorEntry(iRef);
+ RefType iRef2(bve->_tree);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ const BTreeType *tree = getTreeEntry(iRef2);
+ return tree->size(_allocator);
+ } else {
+ const BitVector *bv = bve->_bv.get();
+ return bv->countTrueBits();
+ }
+ } else {
+ const BTreeType *tree = getTreeEntry(iRef);
+ return tree->size(_allocator);
+ }
+}
+
+
+template <typename DataT>
+size_t
+PostingStore<DataT>::internalFrozenSize(uint32_t typeId, const RefType & iRef) const
+{
+ if (isBitVector(typeId)) {
+ const BitVectorEntry *bve = getBitVectorEntry(iRef);
+ RefType iRef2(bve->_tree);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ const BTreeType *tree = getTreeEntry(iRef2);
+ return tree->frozenSize(_allocator);
+ } else {
+ const BitVector *bv = bve->_bv.get();
+ // Some inaccuracy is expected, data changes underfeet
+ int32_t res = bv->countTrueBits();
+ if (res < 1)
+ res = 1;
+ return res;
+ }
+ } else {
+ const BTreeType *tree = getTreeEntry(iRef);
+ return tree->frozenSize(_allocator);
+ }
+}
+
+
+template <typename DataT>
+typename PostingStore<DataT>::Iterator
+PostingStore<DataT>::begin(const EntryRef ref) const
+{
+ if (!ref.valid())
+ return Iterator();
+ RefType iRef(ref);
+ uint32_t typeId = getTypeId(iRef);
+ uint32_t clusterSize = getClusterSize(typeId);
+ if (clusterSize == 0) {
+ if (isBitVector(typeId)) {
+ const BitVectorEntry *bve = getBitVectorEntry(iRef);
+ RefType iRef2(bve->_tree);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ const BTreeType *tree = getTreeEntry(iRef2);
+ return tree->begin(_allocator);
+ }
+ return Iterator();
+ }
+ const BTreeType *tree = getTreeEntry(iRef);
+ return tree->begin(_allocator);
+ }
+ const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize);
+ return Iterator(shortArray, clusterSize, _allocator, _aggrCalc);
+}
+
+
+template <typename DataT>
+typename PostingStore<DataT>::ConstIterator
+PostingStore<DataT>::beginFrozen(const EntryRef ref) const
+{
+ if (!ref.valid())
+ return ConstIterator();
+ RefType iRef(ref);
+ uint32_t typeId = getTypeId(iRef);
+ uint32_t clusterSize = getClusterSize(typeId);
+ if (clusterSize == 0) {
+ if (isBitVector(typeId)) {
+ const BitVectorEntry *bve = getBitVectorEntry(iRef);
+ RefType iRef2(bve->_tree);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ const BTreeType *tree = getTreeEntry(iRef2);
+ return tree->getFrozenView(_allocator).begin();
+ }
+ return ConstIterator();
+ }
+ const BTreeType *tree = getTreeEntry(iRef);
+ return tree->getFrozenView(_allocator).begin();
+ }
+ const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize);
+ return ConstIterator(shortArray, clusterSize, _allocator, _aggrCalc);
+}
+
+
+template <typename DataT>
+void
+PostingStore<DataT>::beginFrozen(const EntryRef ref,
+ std::vector<ConstIterator> &where) const
+{
+ if (!ref.valid()) {
+ where.emplace_back();
+ return;
+ }
+ RefType iRef(ref);
+ uint32_t typeId = getTypeId(iRef);
+ uint32_t clusterSize = getClusterSize(typeId);
+ if (clusterSize == 0) {
+ if (isBitVector(typeId)) {
+ const BitVectorEntry *bve = getBitVectorEntry(iRef);
+ RefType iRef2(bve->_tree);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ const BTreeType *tree = getTreeEntry(iRef2);
+ tree->getFrozenView(_allocator).begin(where);
+ return;
+ }
+ where.emplace_back();
+ return;
+ }
+ const BTreeType *tree = getTreeEntry(iRef);
+ tree->getFrozenView(_allocator).begin(where);
+ return;
+ }
+ const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize);
+ where.emplace_back(shortArray, clusterSize, _allocator, _aggrCalc);
+}
+
+
+template <typename DataT>
+typename PostingStore<DataT>::AggregatedType
+PostingStore<DataT>::getAggregated(const EntryRef ref) const
+{
+ if (!ref.valid())
+ return AggregatedType();
+ RefType iRef(ref);
+ uint32_t typeId = getTypeId(iRef);
+ uint32_t clusterSize = getClusterSize(typeId);
+ if (clusterSize == 0) {
+ if (isBitVector(typeId)) {
+ const BitVectorEntry *bve = getBitVectorEntry(iRef);
+ RefType iRef2(bve->_tree);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ const BTreeType *tree = getTreeEntry(iRef2);
+ return tree->getAggregated(_allocator);
+ }
+ return AggregatedType();
+ }
+ const BTreeType *tree = getTreeEntry(iRef);
+ return tree->getAggregated(_allocator);
+ }
+ const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize);
+ AggregatedType a;
+ for (uint32_t i = 0; i < clusterSize; ++i) {
+ _aggrCalc.add(a, _aggrCalc.getVal(shortArray[i].getData()));
+ }
+ return a;
+}
+
+
+template <typename DataT>
+void
+PostingStore<DataT>::clear(const EntryRef ref)
+{
+ if (!ref.valid())
+ return;
+ RefType iRef(ref);
+ uint32_t typeId = getTypeId(iRef);
+ uint32_t clusterSize = getClusterSize(typeId);
+ if (clusterSize == 0) {
+ if (isBitVector(typeId)) {
+ const BitVectorEntry *bve = getBitVectorEntry(iRef);
+ RefType iRef2(bve->_tree);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ BTreeType *tree = getWTreeEntry(iRef2);
+ tree->clear(_allocator);
+ _store.holdElem(iRef2, 1);
+ }
+ _bvs.erase(ref.ref());
+ _status.decBitVectors();
+ _bvExtraBytes -= bve->_bv->extraByteSize();
+ _store.holdElem(ref, 1);
+ } else {
+ BTreeType *tree = getWTreeEntry(iRef);
+ tree->clear(_allocator);
+ _store.holdElem(ref, 1);
+ }
+ } else {
+ _store.holdElem(ref, clusterSize);
+ }
+}
+
+
+template <typename DataT>
+MemoryUsage
+PostingStore<DataT>::getMemoryUsage() const
+{
+ MemoryUsage usage;
+ usage.merge(_allocator.getMemoryUsage());
+ usage.merge(_store.getMemoryUsage());
+ uint64_t bvExtraBytes = _bvExtraBytes;
+ usage.incUsedBytes(bvExtraBytes);
+ usage.incAllocatedBytes(bvExtraBytes);
+ return usage;
+}
+
+
+template class PostingStore<BTreeNoLeafData>;
+
+template class PostingStore<int32_t>;
+
+} // namespace btree
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h
new file mode 100644
index 00000000000..cc379731bf9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h
@@ -0,0 +1,361 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "postinglisttraits.h"
+#include <set>
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/common/growablebitvector.h>
+#include "enumstorebase.h"
+
+namespace search
+{
+
+namespace attribute
+{
+
+class Status;
+class Config;
+
+class BitVectorEntry
+{
+public:
+ btree::EntryRef _tree; // Daisy chained reference to tree based posting list
+ std::shared_ptr<GrowableBitVector> _bv; // bitvector
+
+public:
+ BitVectorEntry()
+ : _tree(),
+ _bv()
+ {
+ }
+};
+
+
+class PostingStoreBase2
+{
+public:
+ bool _enableBitVectors;
+ bool _enableOnlyBitVector;
+ bool _isFilter;
+protected:
+ uint32_t _bvSize;
+ uint32_t _bvCapacity;
+public:
+ uint32_t _minBvDocFreq; // Less than this ==> destroy bv
+ uint32_t _maxBvDocFreq; // Greater than or equal to this ==> create bv
+protected:
+ std::set<uint32_t> _bvs; // Current bitvectors
+ EnumPostingTree &_dict;
+ Status &_status;
+ uint64_t _bvExtraBytes;
+
+ static constexpr uint32_t BUFFERTYPE_BITVECTOR = 9u;
+
+public:
+ PostingStoreBase2(EnumPostingTree &dict, Status &status,
+ const Config &config);
+
+ virtual
+ ~PostingStoreBase2();
+
+ bool
+ resizeBitVectors(uint32_t newSize, uint32_t newCapacity);
+
+ virtual bool
+ removeSparseBitVectors() = 0;
+};
+
+template <typename DataT>
+class PostingStore : public PostingListTraits<DataT>::PostingStoreBase,
+ public PostingStoreBase2
+{
+ btree::BufferType<BitVectorEntry> _bvType;
+public:
+ typedef DataT DataType;
+ typedef typename PostingListTraits<DataT>::PostingStoreBase Parent;
+ typedef typename Parent::AddIter AddIter;
+ typedef typename Parent::RemoveIter RemoveIter;
+ typedef typename Parent::RefType RefType;
+ typedef typename Parent::BTreeType BTreeType;
+ typedef typename Parent::Iterator Iterator;
+ typedef typename Parent::ConstIterator ConstIterator;
+ typedef typename Parent::KeyDataType KeyDataType;
+ typedef typename Parent::AggregatedType AggregatedType;
+ typedef typename Parent::BTreeTypeRefPair BTreeTypeRefPair;
+ typedef typename Parent::Builder Builder;
+ typedef btree::EntryRef EntryRef;
+ typedef std::less<uint32_t> CompareT;
+ using Parent::applyNewArray;
+ using Parent::applyNewTree;
+ using Parent::applyCluster;
+ using Parent::applyTree;
+ using Parent::normalizeTree;
+ using Parent::getTypeId;
+ using Parent::getClusterSize;
+ using Parent::getWTreeEntry;
+ using Parent::getTreeEntry;
+ using Parent::getKeyDataEntry;
+ using Parent::clusterLimit;
+ using Parent::allocBTree;
+ using Parent::_builder;
+ using Parent::_store;
+ using Parent::_allocator;
+ using Parent::_aggrCalc;
+ using Parent::BUFFERTYPE_BTREE;
+ typedef std::pair<EntryRef, BitVectorEntry *> BitVectorRefPair;
+
+
+ PostingStore(EnumPostingTree &dict, Status &status, const Config &config);
+
+ virtual
+ ~PostingStore();
+
+ virtual bool
+ removeSparseBitVectors();
+
+ static bool
+ isBitVector(uint32_t typeId)
+ {
+ return typeId == BUFFERTYPE_BITVECTOR;
+ }
+
+ static bool
+ isBTree(uint32_t typeId)
+ {
+ return typeId == BUFFERTYPE_BTREE;
+ }
+
+ bool
+ isBTree(RefType ref) const
+ {
+ return isBTree(getTypeId(ref));
+ }
+
+ void
+ applyNew(EntryRef &ref,
+ AddIter a,
+ AddIter ae);
+
+ BitVectorRefPair
+ allocBitVector(void)
+ {
+ return _store.template allocEntry<BitVectorEntry,
+ btree::DefaultReclaimer<BitVectorEntry> >(BUFFERTYPE_BITVECTOR);
+ }
+
+ /*
+ * Recreate btree from bitvector. Weight information is not recreated.
+ */
+ void
+ makeDegradedTree(EntryRef &ref, const BitVector &bv);
+
+ void
+ dropBitVector(EntryRef &ref);
+
+ void
+ makeBitVector(EntryRef &ref);
+
+ void
+ applyNewBitVector(EntryRef &ref,
+ AddIter aOrg,
+ AddIter ae);
+
+ void
+ apply(BitVector &bv,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re);
+
+ /**
+ * Apply multiple changes at once.
+ *
+ * additions and removals should be sorted on key without duplicates.
+ * Overlap between additions and removals indicates updates.
+ */
+ void
+ apply(EntryRef &ref,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re);
+
+ void
+ clear(const EntryRef ref);
+
+ size_t
+ size(const EntryRef ref) const {
+ if (!ref.valid())
+ return 0;
+ RefType iRef(ref);
+ uint32_t typeId = getTypeId(iRef);
+ uint32_t clusterSize = getClusterSize(typeId);
+ if (clusterSize == 0) {
+ return internalSize(typeId, iRef);
+ }
+ return clusterSize;
+ }
+
+ size_t
+ frozenSize(const EntryRef ref) const {
+ if (!ref.valid())
+ return 0;
+ RefType iRef(ref);
+ uint32_t typeId = getTypeId(iRef);
+ uint32_t clusterSize = getClusterSize(typeId);
+ if (clusterSize == 0) {
+ return internalFrozenSize(typeId, iRef);
+ }
+ return clusterSize;
+ }
+
+ Iterator
+ begin(const EntryRef ref) const;
+
+ ConstIterator
+ beginFrozen(const EntryRef ref) const;
+
+ void
+ beginFrozen(const EntryRef ref, std::vector<ConstIterator> &where) const;
+
+ template <typename FunctionType>
+ VESPA_DLL_LOCAL void
+ foreach_frozen_key(EntryRef ref, FunctionType func) const;
+
+ template <typename FunctionType>
+ VESPA_DLL_LOCAL void
+ foreach_frozen(EntryRef ref, FunctionType func) const;
+
+ AggregatedType
+ getAggregated(const EntryRef ref) const;
+
+ const BitVectorEntry *
+ getBitVectorEntry(RefType ref) const
+ {
+ return _store.template getBufferEntry<BitVectorEntry>(ref.bufferId(),
+ ref.offset());
+ }
+
+ BitVectorEntry *
+ getWBitVectorEntry(RefType ref)
+ {
+ return _store.template getBufferEntry<BitVectorEntry>(ref.bufferId(),
+ ref.offset());
+ }
+
+ static inline DataT
+ bitVectorWeight();
+
+ MemoryUsage
+ getMemoryUsage() const;
+
+private:
+ size_t internalSize(uint32_t typeId, const RefType & iRef) const;
+ size_t internalFrozenSize(uint32_t typeId, const RefType & iRef) const;
+};
+
+template <>
+inline btree::BTreeNoLeafData
+PostingStore<btree::BTreeNoLeafData>::bitVectorWeight()
+{
+ return btree::BTreeNoLeafData();
+}
+
+template <>
+inline int32_t
+PostingStore<int32_t>::bitVectorWeight()
+{
+ return 1;
+}
+
+template <typename DataT>
+template <typename FunctionType>
+void
+PostingStore<DataT>::foreach_frozen_key(EntryRef ref, FunctionType func) const
+{
+ if (!ref.valid())
+ return;
+ RefType iRef(ref);
+ uint32_t typeId = getTypeId(iRef);
+ uint32_t clusterSize = getClusterSize(typeId);
+ if (clusterSize == 0) {
+ if (isBitVector(typeId)) {
+ const BitVectorEntry *bve = getBitVectorEntry(iRef);
+ EntryRef ref2(bve->_tree);
+ RefType iRef2(ref2);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ const BTreeType *tree = getTreeEntry(iRef2);
+ _allocator.getNodeStore().foreach_key(tree->getFrozenRoot(), func);
+ } else {
+ const BitVector *bv = bve->_bv.get();
+ uint32_t docIdLimit = bv->size();
+ uint32_t docId = bv->getFirstTrueBit(1);
+ while (docId < docIdLimit) {
+ func(docId);
+ docId = bv->getNextTrueBit(docId + 1);
+ }
+ }
+ } else {
+ assert(isBTree(typeId));
+ const BTreeType *tree = getTreeEntry(iRef);
+ _allocator.getNodeStore().foreach_key(tree->getFrozenRoot(), func);
+ }
+ } else {
+ const KeyDataType *p = getKeyDataEntry(iRef, clusterSize);
+ const KeyDataType *pe = p + clusterSize;
+ for (; p != pe; ++p) {
+ func(p->_key);
+ }
+ }
+}
+
+
+template <typename DataT>
+template <typename FunctionType>
+void
+PostingStore<DataT>::foreach_frozen(EntryRef ref, FunctionType func) const
+{
+ if (!ref.valid())
+ return;
+ RefType iRef(ref);
+ uint32_t typeId = getTypeId(iRef);
+ uint32_t clusterSize = getClusterSize(typeId);
+ if (clusterSize == 0) {
+ if (isBitVector(typeId)) {
+ const BitVectorEntry *bve = getBitVectorEntry(iRef);
+ EntryRef ref2(bve->_tree);
+ RefType iRef2(ref2);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ const BTreeType *tree = getTreeEntry(iRef2);
+ _allocator.getNodeStore().foreach(tree->getFrozenRoot(), func);
+ } else {
+ const BitVector *bv = bve->_bv.get();
+ uint32_t docIdLimit = bv->size();
+ uint32_t docId = bv->getFirstTrueBit(1);
+ while (docId < docIdLimit) {
+ func(docId, bitVectorWeight());
+ docId = bv->getNextTrueBit(docId + 1);
+ }
+ }
+ } else {
+ const BTreeType *tree = getTreeEntry(iRef);
+ _allocator.getNodeStore().foreach(tree->getFrozenRoot(), func);
+ }
+ } else {
+ const KeyDataType *p = getKeyDataEntry(iRef, clusterSize);
+ const KeyDataType *pe = p + clusterSize;
+ for (; p != pe; ++p) {
+ func(p->_key, p->getData());
+ }
+ }
+}
+
+
+
+} // namespace attribute
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp
new file mode 100644
index 00000000000..a0693755666
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp
@@ -0,0 +1,277 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/log/log.h>
+LOG_SETUP(".predicate_attribute");
+#include <vespa/fastos/fastos.h>
+
+#include "predicate_attribute.h"
+#include <vespa/searchlib/predicate/predicate_tree_annotator.h>
+#include <vespa/document/fieldvalue/predicatefieldvalue.h>
+#include <vespa/document/predicate/predicate.h>
+#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/data/slime/slime.h>
+
+using document::Predicate;
+using document::PredicateFieldValue;
+using vespalib::MMapDataBuffer;
+using namespace search::predicate;
+
+namespace search {
+
+namespace {
+constexpr uint8_t MAX_MIN_FEATURE = 255;
+constexpr uint16_t MAX_INTERVAL_RANGE = static_cast<uint16_t>(predicate::MAX_INTERVAL);
+
+
+int64_t adjustBound(int32_t arity, int64_t bound) {
+ int64_t adjusted = arity;
+ int64_t value = bound;
+ int64_t max = LLONG_MAX / arity;
+ while ((value /= arity) > 0) {
+ if (adjusted > max) {
+ return bound;
+ }
+ adjusted *= arity;
+ }
+ return adjusted - 1;
+}
+
+int64_t adjustLowerBound(int32_t arity, int64_t lower_bound) {
+ if (lower_bound == LLONG_MIN) {
+ return lower_bound;
+ } else if (lower_bound > 0) {
+ return 0ll;
+ } else {
+ return -adjustBound(arity, -lower_bound);
+ }
+}
+
+int64_t adjustUpperBound(int32_t arity, int64_t upper_bound) {
+ if (upper_bound == LLONG_MAX) {
+ return upper_bound;
+ } else if (upper_bound < 0) {
+ return -1ll; // 0 belongs to the positive range.
+ } else {
+ return adjustBound(arity, upper_bound);
+ }
+}
+
+SimpleIndexConfig createSimpleIndexConfig(const search::attribute::Config &config) {
+ return SimpleIndexConfig(config.dense_posting_list_threshold(), config.getGrowStrategy());
+}
+
+} // namespace
+
+PredicateAttribute::PredicateAttribute(const vespalib::string &base_file_name,
+ const Config &config)
+ : NotImplementedAttribute(base_file_name, config),
+ _base_file_name(base_file_name),
+ _limit_provider(*this),
+ _index(new PredicateIndex(getGenerationHandler(), getGenerationHolder(),
+ _limit_provider, createSimpleIndexConfig(config), config.arity())),
+ _lower_bound(adjustLowerBound(config.arity(), config.lower_bound())),
+ _upper_bound(adjustUpperBound(config.arity(), config.upper_bound())),
+ _min_feature(config.getGrowStrategy(), getGenerationHolder()),
+ _interval_range_vector(config.getGrowStrategy(), getGenerationHolder()),
+ _max_interval_range(1)
+{
+}
+
+PredicateAttribute::~PredicateAttribute()
+{
+ getGenerationHolder().clearHoldLists();
+}
+
+uint32_t
+PredicateAttribute::getValueCount(DocId) const
+{
+ return 1;
+}
+
+void
+PredicateAttribute::onCommit()
+{
+ populateIfNeeded();
+ _index->commit();
+ incGeneration();
+}
+
+void
+PredicateAttribute::onUpdateStat()
+{
+ // update statistics
+ MemoryUsage combined;
+ combined.merge(_min_feature.getMemoryUsage());
+ combined.merge(_interval_range_vector.getMemoryUsage());
+ combined.merge(_index->getMemoryUsage());
+ combined.incAllocatedBytesOnHold(getGenerationHolder().getHeldBytes());
+ this->updateStatistics(_min_feature.size(), _min_feature.size(),
+ combined.allocatedBytes(), combined.usedBytes(),
+ combined.deadBytes(), combined.allocatedBytesOnHold());
+}
+
+void
+PredicateAttribute::removeOldGenerations(generation_t firstUsed)
+{
+ getGenerationHolder().trimHoldLists(firstUsed);
+ _index->trimHoldLists(firstUsed);
+}
+
+void
+PredicateAttribute::onGenerationChange(generation_t generation)
+{
+ getGenerationHolder().transferHoldLists(generation - 1);
+ _index->transferHoldLists(generation - 1);
+}
+
+void
+PredicateAttribute::onSave(IAttributeSaveTarget &saveTarget) {
+ LOG(info, "Saving predicate attribute version %d", getVersion());
+ IAttributeSaveTarget::Buffer buffer(saveTarget.datWriter().allocBuf(4096));
+ _index->serialize(*buffer);
+ uint32_t highest_doc_id = static_cast<uint32_t>(_min_feature.size() - 1);
+ buffer->writeInt32(highest_doc_id);
+ for (size_t i = 1; i <= highest_doc_id; ++i) {
+ buffer->writeInt8(_min_feature[i]);
+ }
+ for (size_t i = 1; i <= highest_doc_id; ++i) {
+ buffer->writeInt16(_interval_range_vector[i]);
+ }
+ buffer->writeInt16(_max_interval_range);
+ saveTarget.datWriter().writeBuf(std::move(buffer));
+}
+
+
+uint32_t
+PredicateAttribute::getVersion() const {
+ return PREDICATE_ATTRIBUTE_VERSION;
+}
+
+namespace {
+
+template <typename V>
+struct DocIdLimitFinderAndMinFeatureFiller : SimpleIndexDeserializeObserver<> {
+ uint32_t _highest_doc_id;
+ V & _min_feature;
+ PredicateIndex &_index;
+ DocIdLimitFinderAndMinFeatureFiller(V & min_feature,
+ PredicateIndex &index) :
+ _highest_doc_id(0),
+ _min_feature(min_feature),
+ _index(index)
+ {}
+ void notifyInsert(uint64_t, uint32_t doc_id, uint32_t min_feature) override {
+ if (doc_id > _highest_doc_id) {
+ _highest_doc_id = doc_id;
+ _min_feature.ensure_size(doc_id + 1, PredicateAttribute::MIN_FEATURE_FILL);
+ }
+ _min_feature[doc_id] = min_feature;
+ }
+};
+
+struct DummyObserver : SimpleIndexDeserializeObserver<> {
+ DummyObserver() {}
+ void notifyInsert(uint64_t, uint32_t, uint32_t) override {}
+};
+
+}
+
+bool PredicateAttribute::onLoad()
+{
+ FileUtil::LoadedBuffer::UP loaded_buffer = loadDAT();
+ char *rawBuffer = const_cast<char *>(static_cast<const char *>(loaded_buffer->buffer()));
+ size_t size = loaded_buffer->size();
+ MMapDataBuffer buffer(rawBuffer, size);
+ buffer.moveFreeToData(size);
+
+ const GenericHeader &header = loaded_buffer->getHeader();
+ uint32_t version = static_cast<uint32_t>(
+ header.hasTag("version") ? header.getTag("version").asInteger() : 0);
+ LOG(info, "Loading predicate attribute version %d. getVersion() = %d", version, getVersion());
+
+ DocId highest_doc_id;
+ if (version == 0) {
+ DocIdLimitFinderAndMinFeatureFiller<MinFeatureVector> observer(_min_feature, *_index);
+ _index.reset(new PredicateIndex(getGenerationHandler(), getGenerationHolder(),
+ _limit_provider, createSimpleIndexConfig(getConfig()),
+ buffer, observer, 0));
+ highest_doc_id = observer._highest_doc_id;
+ } else {
+ DummyObserver observer;
+ _index.reset(
+ new PredicateIndex(getGenerationHandler(), getGenerationHolder(), _limit_provider,
+ createSimpleIndexConfig(getConfig()), buffer, observer, version));
+ highest_doc_id = buffer.readInt32();
+ // Deserialize min feature vector
+ _min_feature.ensure_size(highest_doc_id + 1, PredicateAttribute::MIN_FEATURE_FILL);
+ for (uint32_t docId = 1; docId <= highest_doc_id; ++docId) {
+ _min_feature[docId] = buffer.readInt8();
+ }
+ }
+ _interval_range_vector.ensure_size(highest_doc_id + 1);
+ // Interval ranges are only stored in version >= 2
+ for (uint32_t docId = 1; docId <= highest_doc_id; ++docId) {
+ _interval_range_vector[docId] = version < 2 ? MAX_INTERVAL_RANGE : buffer.readInt16();
+ }
+ _max_interval_range = version < 2 ? MAX_INTERVAL_RANGE : buffer.readInt16();
+ _index->adjustDocIdLimit(highest_doc_id);
+ setNumDocs(highest_doc_id + 1);
+ setCommittedDocIdLimit(highest_doc_id + 1);
+ _index->onDeserializationCompleted();
+ return true;
+}
+
+bool
+PredicateAttribute::addDoc(DocId &doc_id)
+{
+ doc_id = getNumDocs();
+ incNumDocs();
+ updateUncommittedDocIdLimit(doc_id);
+ _index->adjustDocIdLimit(doc_id);
+ _interval_range_vector.ensure_size(doc_id + 1);
+ _min_feature.ensure_size(doc_id + 1);
+ return true;
+}
+uint32_t
+PredicateAttribute::clearDoc(DocId doc_id)
+{
+ _index->removeDocument(doc_id);
+ _min_feature[doc_id] = MIN_FEATURE_FILL;
+ _interval_range_vector[doc_id] = 0;
+ return 0;
+}
+
+void
+PredicateAttribute::updateValue(uint32_t doc_id, const PredicateFieldValue &value)
+{
+ const auto &inspector = value.getSlime().get();
+
+ _index->removeDocument(doc_id);
+ updateUncommittedDocIdLimit(doc_id);
+
+ long root_type = inspector[Predicate::NODE_TYPE].asLong();
+ if (root_type == Predicate::TYPE_FALSE) { // never match
+ _min_feature[doc_id] = MIN_FEATURE_FILL;
+ _interval_range_vector[doc_id] = 0;
+ return;
+ } else if (root_type == Predicate::TYPE_TRUE) {
+ _min_feature[doc_id] = 0;
+ _interval_range_vector[doc_id] = 0x1;
+ _index->indexEmptyDocument(doc_id);
+ return;
+ }
+ PredicateTreeAnnotations result;
+ PredicateTreeAnnotator::annotate(inspector, result,
+ _lower_bound, _upper_bound);
+ _index->indexDocument(doc_id, result);
+ assert(result.min_feature <= MAX_MIN_FEATURE);
+ uint8_t minFeature = static_cast<uint8_t>(result.min_feature);
+ _min_feature[doc_id] = minFeature;
+ _interval_range_vector[doc_id] = result.interval_range;
+ _max_interval_range = std::max(result.interval_range, _max_interval_range);
+ assert(result.interval_range > 0);
+}
+
+IMPLEMENT_IDENTIFIABLE_ABSTRACT(PredicateAttribute, AttributeVector);
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.h b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.h
new file mode 100644
index 00000000000..c5af5893305
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.h
@@ -0,0 +1,104 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "not_implemented_attribute.h"
+#include <vespa/searchlib/predicate/predicate_index.h>
+#include <vespa/searchlib/common/rcuvector.h>
+
+namespace document { class PredicateFieldValue; }
+
+namespace search {
+
+struct AttributeVectorDocIdLimitProvider : public predicate::DocIdLimitProvider {
+ AttributeVectorDocIdLimitProvider(const AttributeVector &attribute_vector) :
+ _attribute_vector(attribute_vector) {}
+
+ virtual uint32_t getDocIdLimit() const { return _attribute_vector.getNumDocs(); };
+ virtual uint32_t getCommittedDocIdLimit() const {
+ return _attribute_vector.getCommittedDocIdLimit();
+ }
+private:
+ const AttributeVector &_attribute_vector;
+};
+
+/**
+ * Attribute that manages a predicate index. It is not a traditional
+ * attribute in that it doesn't store values for each document, but
+ * rather keeps an index for boolean search. Summaries are not fetched
+ * from the attribute, but rather using the summary store like a
+ * non-index field.
+ */
+class PredicateAttribute : public NotImplementedAttribute {
+public:
+ typedef uint8_t MinFeature;
+ typedef std::pair<const MinFeature *, size_t> MinFeatureHandle;
+ using IntervalRange = uint16_t;
+ using IntervalRangeVector = attribute::RcuVectorBase<IntervalRange>;
+
+ DECLARE_IDENTIFIABLE_ABSTRACT(PredicateAttribute);
+
+ PredicateAttribute(const vespalib::string &base_file_name,
+ const Config &config);
+
+ virtual ~PredicateAttribute();
+
+ predicate::PredicateIndex &getIndex() { return *_index; }
+
+ void onSave(IAttributeSaveTarget & saveTarget) override;
+ bool onLoad() override;
+ void onCommit() override;
+ void removeOldGenerations(generation_t firstUsed) override;
+ void onGenerationChange(generation_t generation) override;
+ void onUpdateStat() override;
+ bool addDoc(DocId &doc_id) override;
+ uint32_t clearDoc(DocId doc_id) override;
+ uint32_t getValueCount(DocId doc) const override;
+
+ void updateValue(uint32_t doc_id,
+ const document::PredicateFieldValue &value);
+
+ /**
+ * Will return a handle with a pointer to the min_features and how many there are.
+ * The pointer is only guaranteed to be valid for as long as you hold the attribute guard.
+ **/
+ MinFeatureHandle getMinFeatureVector() const {
+ return MinFeatureHandle(&_min_feature[0], getNumDocs());
+ }
+
+ const IntervalRange * getIntervalRangeVector() const {
+ return &_interval_range_vector[0];
+ }
+
+ IntervalRange getMaxIntervalRange() const {
+ return _max_interval_range;
+ }
+
+ void updateMaxIntervalRange(IntervalRange intervalRange) {
+ _max_interval_range = std::max(intervalRange, _max_interval_range);
+ }
+
+ void populateIfNeeded() {
+ _index->populateIfNeeded(getNumDocs());
+ }
+private:
+ vespalib::string _base_file_name;
+ const AttributeVectorDocIdLimitProvider _limit_provider;
+ predicate::PredicateIndex::UP _index;
+ int64_t _lower_bound;
+ int64_t _upper_bound;
+
+ typedef attribute::RcuVectorBase<uint8_t> MinFeatureVector;
+ MinFeatureVector _min_feature;
+
+ IntervalRangeVector _interval_range_vector;
+ IntervalRange _max_interval_range;
+public:
+ static constexpr uint8_t MIN_FEATURE_FILL = 255;
+ static constexpr uint32_t PREDICATE_ATTRIBUTE_VERSION = 2;
+
+ virtual uint32_t getVersion() const override;
+
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp
new file mode 100644
index 00000000000..fe987a35d60
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "singleenumattribute.h"
+#include "singleenumattribute.hpp"
+
+namespace search
+{
+
+using attribute::Config;
+
+SingleValueEnumAttributeBase::
+SingleValueEnumAttributeBase(const Config & c, GenerationHolder &genHolder)
+ : _enumIndices(c.getGrowStrategy().getDocsInitialCapacity(),
+ c.getGrowStrategy().getDocsGrowPercent(),
+ c.getGrowStrategy().getDocsGrowDelta(),
+ genHolder)
+{
+}
+
+
+SingleValueEnumAttributeBase::~SingleValueEnumAttributeBase()
+{
+}
+
+
+AttributeVector::DocId
+SingleValueEnumAttributeBase::addDoc(bool &incGeneration)
+{
+ incGeneration = _enumIndices.isFull();
+ _enumIndices.push_back(EnumStoreBase::Index());
+ return _enumIndices.size() - 1;
+}
+
+
+SingleValueEnumAttributeBase::EnumIndexCopyVector
+SingleValueEnumAttributeBase::getIndicesCopy(uint32_t size) const
+{
+ assert(size <= _enumIndices.size());
+ return EnumIndexCopyVector(&_enumIndices[0], &_enumIndices[0] + size);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h
new file mode 100644
index 00000000000..ad0cc2a98a3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h
@@ -0,0 +1,152 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/enumattribute.h>
+#include <vespa/searchlib/common/rcuvector.h>
+
+namespace search {
+
+/*
+ * Implementation of single value enum attribute that uses an underlying enum store
+ * to store unique values.
+ *
+ * B: EnumAttribute<BaseClass>
+ */
+
+class SingleValueEnumAttributeBase
+{
+protected:
+ typedef EnumStoreBase::Index EnumIndex;
+ typedef search::attribute::RcuVectorBase<EnumIndex> EnumIndexVector;
+ typedef AttributeVector::DocId DocId;
+ typedef AttributeVector::EnumHandle EnumHandle;
+ typedef vespalib::GenerationHolder GenerationHolder;
+
+public:
+ using EnumIndexCopyVector = vespalib::Array<EnumIndex,
+ vespalib::DefaultAlloc>;
+
+ EnumStoreBase::Index getEnumIndex(DocId docId) const { return _enumIndices[docId]; }
+ EnumHandle getE(DocId doc) const { return _enumIndices[doc].ref(); }
+protected:
+ SingleValueEnumAttributeBase(const attribute::Config & c,
+ GenerationHolder &genHolder);
+ ~SingleValueEnumAttributeBase();
+ AttributeVector::DocId addDoc(bool & incGeneration);
+
+ EnumIndexVector _enumIndices;
+
+ EnumIndexCopyVector
+ getIndicesCopy(uint32_t size) const;
+};
+
+template <typename B>
+class SingleValueEnumAttribute : public B, public SingleValueEnumAttributeBase
+{
+protected:
+ typedef typename B::DocId DocId;
+ typedef typename B::WeightedEnum WeightedEnum;
+ typedef typename B::Change Change;
+ typedef typename B::ChangeVector ChangeVector;
+ typedef typename B::ChangeVector::const_iterator ChangeVectorIterator;
+ typedef typename B::generation_t generation_t;
+ typedef typename B::EnumModifier EnumModifier;
+ typedef typename B::ValueModifier ValueModifier;
+ typedef typename B::EnumStore EnumStore;
+ typedef typename B::LoadedVector LoadedVector;
+ typedef typename B::UniqueSet UniqueSet;
+ typedef AttributeVector::ReaderBase ReaderBase;
+ typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector;
+ typedef attribute::LoadedEnumAttribute LoadedEnumAttribute;
+ using B::getGenerationHolder;
+
+private:
+ void considerUpdateAttributeChange(const Change & c, UniqueSet & newUniques);
+ void applyUpdateValueChange(const Change & c, EnumStoreBase::IndexVector & unused);
+
+protected:
+ // from EnumAttribute
+ virtual void considerAttributeChange(const Change & c, UniqueSet & newUniques);
+ virtual void reEnumerate();
+
+ // implemented by single value numeric enum attribute.
+ virtual void considerUpdateAttributeChange(const Change & c) { (void) c; }
+ virtual void considerArithmeticAttributeChange(const Change & c, UniqueSet & newUniques) { (void) c; (void) newUniques; }
+
+ // update enum index vector with new values according to change vector
+ virtual void applyValueChanges(EnumStoreBase::IndexVector & unused);
+ virtual void applyArithmeticValueChange(const Change & c, EnumStoreBase::IndexVector & unused) {
+ (void) c; (void) unused;
+ }
+ void updateEnumRefCounts(const Change & c, EnumIndex newIdx, EnumIndex oldIdx, EnumStoreBase::IndexVector & unused);
+
+ virtual void
+ freezeEnumDictionary()
+ {
+ this->getEnumStore().freezeTree();
+ }
+
+ virtual void mergeMemoryStats(MemoryUsage & total) { (void) total; }
+
+ virtual void fillValues(LoadedVector & loaded);
+
+ virtual void
+ fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumStoreBase::IndexVector &eidxs,
+ LoadedEnumAttributeVector &loaded);
+
+ virtual void
+ fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumStoreBase::IndexVector &eidxs,
+ EnumStoreBase::EnumVector &enumHist);
+
+ /**
+ * Called when a new document has been added.
+ *
+ * Can be overridden by subclasses that need to resize structures
+ * as a result of this.
+ *
+ * Should return true if underlying structures were resized.
+ **/
+ virtual bool onAddDoc(DocId doc) { (void) doc; return false; }
+
+public:
+ SingleValueEnumAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg);
+ virtual ~SingleValueEnumAttribute();
+
+ virtual bool addDoc(DocId & doc);
+ virtual uint32_t getValueCount(DocId doc) const;
+ virtual void onCommit();
+ virtual void onUpdateStat();
+ virtual void removeOldGenerations(generation_t firstUsed);
+ virtual void onGenerationChange(generation_t generation);
+ virtual EnumHandle getEnum(DocId doc) const {
+ return getE(doc);
+ }
+ virtual uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const {
+ if (sz > 0) {
+ e[0] = getE(doc);
+ }
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, WeightedEnum * e, uint32_t sz) const {
+ if (sz > 0) {
+ e[0] = WeightedEnum(getE(doc), 1);
+ }
+ return 1;
+ }
+
+ virtual void
+ clearDocs(DocId lidLow, DocId lidLimit);
+
+ virtual void
+ onShrinkLidSpace();
+
+ virtual std::unique_ptr<AttributeSaver> onInitSave() override;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
new file mode 100644
index 00000000000..df48fe949fe
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
@@ -0,0 +1,310 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/singleenumattribute.h>
+#include <vespa/searchlib/attribute/enumattribute.hpp>
+#include "ipostinglistattributebase.h"
+#include "singleenumattributesaver.h"
+
+namespace search {
+
+template <typename B>
+SingleValueEnumAttribute<B>::
+SingleValueEnumAttribute(const vespalib::string &baseFileName,
+ const AttributeVector::Config &cfg)
+ : B(baseFileName, cfg),
+ SingleValueEnumAttributeBase(cfg, getGenerationHolder())
+{
+}
+
+template <typename B>
+SingleValueEnumAttribute<B>::~SingleValueEnumAttribute()
+{
+}
+
+template <typename B>
+bool
+SingleValueEnumAttribute<B>::addDoc(DocId & doc)
+{
+ bool incGen = false;
+ doc = SingleValueEnumAttributeBase::addDoc(incGen);
+ if (doc > 0u) {
+ // Make sure that a valid value(magic default) is referenced,
+ // even between addDoc and commit().
+ if (_enumIndices[0].valid()) {
+ _enumIndices[doc] = _enumIndices[0];
+ this->_enumStore.incRefCount(_enumIndices[0]);
+ }
+ }
+ this->incNumDocs();
+ this->updateUncommittedDocIdLimit(doc);
+ incGen |= onAddDoc(doc);
+ if (incGen) {
+ this->incGeneration();
+ } else
+ this->removeAllOldGenerations();
+ return true;
+}
+
+template <typename B>
+uint32_t
+SingleValueEnumAttribute<B>::getValueCount(DocId doc) const
+{
+ if (doc >= this->getNumDocs()) {
+ return 0;
+ }
+ return 1;
+}
+
+template <typename B>
+void
+SingleValueEnumAttribute<B>::onCommit()
+{
+ this->checkSetMaxValueCount(1);
+
+ // update enum store
+ EnumStoreBase::IndexVector possiblyUnused;
+ this->insertNewUniqueValues(possiblyUnused);
+ // apply updates
+ applyValueChanges(possiblyUnused);
+ this->_changes.clear();
+ this->_enumStore.freeUnusedEnums(possiblyUnused);
+ freezeEnumDictionary();
+ this->setEnumMax(this->_enumStore.getLastEnum());
+ std::atomic_thread_fence(std::memory_order_release);
+ this->removeAllOldGenerations();
+}
+
+template <typename B>
+void
+SingleValueEnumAttribute<B>::onUpdateStat()
+{
+ // update statistics
+ MemoryUsage total = _enumIndices.getMemoryUsage();
+ total.incAllocatedBytesOnHold(getGenerationHolder().getHeldBytes());
+ total.merge(this->_enumStore.getMemoryUsage());
+ total.merge(this->_enumStore.getTreeMemoryUsage());
+ mergeMemoryStats(total);
+ this->updateStatistics(_enumIndices.size(), this->_enumStore.getNumUniques(), total.allocatedBytes(),
+ total.usedBytes(), total.deadBytes(), total.allocatedBytesOnHold());
+}
+
+template <typename B>
+void
+SingleValueEnumAttribute<B>::considerUpdateAttributeChange(const Change & c, UniqueSet & newUniques)
+{
+ EnumIndex idx;
+ if (!this->_enumStore.findIndex(c._data.raw(), idx)) {
+ newUniques.insert(c._data);
+ }
+ considerUpdateAttributeChange(c); // for numeric
+}
+
+template <typename B>
+void
+SingleValueEnumAttribute<B>::considerAttributeChange(const Change & c, UniqueSet & newUniques)
+{
+ if (c._type == ChangeBase::UPDATE) {
+ considerUpdateAttributeChange(c, newUniques);
+ } else if (c._type >= ChangeBase::ADD && c._type <= ChangeBase::DIV) {
+ considerArithmeticAttributeChange(c, newUniques); // for numeric
+ } else if (c._type == ChangeBase::CLEARDOC) {
+ this->_defaultValue._doc = c._doc;
+ considerUpdateAttributeChange(this->_defaultValue, newUniques);
+ }
+}
+
+template <typename B>
+void
+SingleValueEnumAttribute<B>::reEnumerate()
+{
+ EnumModifier enumGuard(this->getEnumModifier());
+ for (uint32_t i = 0; i < _enumIndices.size(); ++i) {
+ EnumIndex oldIdx = _enumIndices[i];
+ if (oldIdx.valid()) {
+ EnumIndex newIdx;
+ this->_enumStore.getCurrentIndex(oldIdx, newIdx);
+ std::atomic_thread_fence(std::memory_order_release);
+ _enumIndices[i] = newIdx;
+ }
+ }
+}
+
+template <typename B>
+void
+SingleValueEnumAttribute<B>::applyUpdateValueChange(const Change & c, EnumStoreBase::IndexVector & unused)
+{
+ EnumIndex oldIdx = _enumIndices[c._doc];
+ EnumIndex newIdx;
+ this->_enumStore.findIndex(c._data.raw(), newIdx);
+ updateEnumRefCounts(c, newIdx, oldIdx, unused);
+}
+
+template <typename B>
+void
+SingleValueEnumAttribute<B>::applyValueChanges(EnumStoreBase::IndexVector & unused)
+{
+ ValueModifier valueGuard(this->getValueModifier());
+ for (ChangeVectorIterator iter = this->_changes.begin(), end = this->_changes.end(); iter != end; ++iter) {
+ if (iter->_type == ChangeBase::UPDATE) {
+ applyUpdateValueChange(*iter, unused);
+ } else if (iter->_type >= ChangeBase::ADD && iter->_type <= ChangeBase::DIV) {
+ applyArithmeticValueChange(*iter, unused);
+ } else if (iter->_type == ChangeBase::CLEARDOC) {
+ this->_defaultValue._doc = iter->_doc;
+ applyUpdateValueChange(this->_defaultValue, unused);
+ }
+ }
+}
+
+template <typename B>
+void
+SingleValueEnumAttribute<B>::updateEnumRefCounts(const Change & c, EnumIndex newIdx, EnumIndex oldIdx,
+ EnumStoreBase::IndexVector & unused)
+{
+ // increase and decrease refcount
+ this->_enumStore.incRefCount(newIdx);
+
+ _enumIndices[c._doc] = newIdx;
+
+ if (oldIdx.valid()) {
+ this->_enumStore.decRefCount(oldIdx);
+ if (this->_enumStore.getRefCount(oldIdx) == 0) {
+ unused.push_back(oldIdx);
+ }
+ }
+}
+
+template <typename B>
+void
+SingleValueEnumAttribute<B>::fillValues(LoadedVector & loaded)
+{
+ uint32_t numDocs = this->getNumDocs();
+ getGenerationHolder().clearHoldLists();
+ _enumIndices.reset();
+ _enumIndices.unsafe_reserve(numDocs);
+ for (DocId doc = 0; doc < numDocs; ++doc, loaded.next()) {
+ _enumIndices.push_back(loaded.read().getEidx());
+ }
+}
+
+
+template <typename B>
+void
+SingleValueEnumAttribute<B>::fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumStoreBase::IndexVector &eidxs,
+ LoadedEnumAttributeVector &loaded)
+{
+ attribute::SaveLoadedEnum saver(loaded);
+ _enumIndices.fillMapped(getGenerationHolder(),
+ attrReader,
+ numValues,
+ &eidxs[0],
+ eidxs.size(),
+ saver,
+ this->getNumDocs());
+}
+
+
+template <typename B>
+void
+SingleValueEnumAttribute<B>::fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumStoreBase::IndexVector &eidxs,
+ EnumStoreBase::EnumVector &enumHist)
+{
+ attribute::SaveEnumHist saver(enumHist);
+ _enumIndices.fillMapped(getGenerationHolder(),
+ attrReader,
+ numValues,
+ &eidxs[0],
+ eidxs.size(),
+ saver,
+ this->getNumDocs());
+}
+
+
+
+template <typename B>
+void
+SingleValueEnumAttribute<B>::removeOldGenerations(generation_t firstUsed)
+{
+ this->_enumStore.trimHoldLists(firstUsed);
+ getGenerationHolder().trimHoldLists(firstUsed);
+}
+
+template <typename B>
+void
+SingleValueEnumAttribute<B>::onGenerationChange(generation_t generation)
+{
+ /*
+ * Freeze tree before generation is increased in attribute vector
+ * but after generation is increased in tree. This ensures that
+ * unlocked readers accessing a frozen tree will access a
+ * sufficiently new frozen tree.
+ */
+ freezeEnumDictionary();
+ getGenerationHolder().transferHoldLists(generation - 1);
+ this->_enumStore.transferHoldLists(generation - 1);
+}
+
+
+template <typename B>
+void
+SingleValueEnumAttribute<B>::clearDocs(DocId lidLow, DocId lidLimit)
+{
+ EnumHandle e;
+ bool findDefaultEnumRes(this->findEnum(this->getDefaultEnumTypeValue(), e));
+ if (!findDefaultEnumRes) {
+ e = EnumHandle();
+ }
+ assert(lidLow <= lidLimit);
+ assert(lidLimit <= this->getNumDocs());
+ for (DocId lid = lidLow; lid < lidLimit; ++lid) {
+ if (_enumIndices[lid] != e) {
+ this->clearDoc(lid);
+ }
+ }
+}
+
+
+template <typename B>
+void
+SingleValueEnumAttribute<B>::onShrinkLidSpace(void)
+{
+ EnumHandle e;
+ bool findDefaultEnumRes(this->findEnum(this->getDefaultEnumTypeValue(), e));
+ assert(findDefaultEnumRes);
+ uint32_t committedDocIdLimit = this->getCommittedDocIdLimit();
+ assert(_enumIndices.size() >= committedDocIdLimit);
+ attribute::IPostingListAttributeBase *pab =
+ this->getIPostingListAttributeBase();
+ if (pab != NULL) {
+ pab->clearPostings(e, committedDocIdLimit, _enumIndices.size());
+ }
+ _enumIndices.shrink(committedDocIdLimit);
+ this->setNumDocs(committedDocIdLimit);
+}
+
+template <typename B>
+std::unique_ptr<AttributeSaver>
+SingleValueEnumAttribute<B>::onInitSave()
+{
+ {
+ EnumModifier enumGuard(this->getEnumModifier());
+ this->_enumStore.reEnumerate();
+ }
+ vespalib::GenerationHandler::Guard guard(this->getGenerationHandler().
+ takeGuard());
+ return std::make_unique<SingleValueEnumAttributeSaver>
+ (std::move(guard),
+ this->createSaveTargetConfig(),
+ getIndicesCopy(this->getCommittedDocIdLimit()),
+ this->_enumStore);
+}
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp
new file mode 100644
index 00000000000..d8185a0b614
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "singleenumattributesaver.h"
+#include <vespa/searchlib/util/bufferwriter.h>
+
+using vespalib::GenerationHandler;
+
+namespace search {
+
+SingleValueEnumAttributeSaver::
+SingleValueEnumAttributeSaver(GenerationHandler::Guard &&guard,
+ const IAttributeSaveTarget::Config &cfg,
+ EnumIndexCopyVector &&indices,
+ const EnumStoreBase &enumStore)
+ : AttributeSaver(std::move(guard), cfg),
+ _indices(std::move(indices)),
+ _enumSaver(enumStore, false)
+{
+}
+
+
+SingleValueEnumAttributeSaver::~SingleValueEnumAttributeSaver()
+{
+}
+
+
+bool
+SingleValueEnumAttributeSaver::onSave(IAttributeSaveTarget &saveTarget)
+{
+ _enumSaver.writeUdat(saveTarget);
+ const EnumStoreBase &enumStore = _enumSaver.getEnumStore();
+ std::unique_ptr<search::BufferWriter> datWriter(saveTarget.datWriter().
+ allocBufferWriter());
+ if (saveTarget.getEnumerated()) {
+ enumStore.writeEnumValues(*datWriter,
+ &_indices[0], _indices.size());
+ } else {
+ enumStore.writeValues(*datWriter,
+ &_indices[0], _indices.size());
+ }
+ datWriter->flush();
+ _enumSaver.enableReEnumerate();
+ return true;
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.h b/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.h
new file mode 100644
index 00000000000..7e7de3ef84e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.h
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attributesaver.h"
+#include "iattributesavetarget.h"
+#include "singleenumattribute.h"
+#include "enumattributesaver.h"
+
+namespace search {
+
+/*
+ * Class for saving a single value enumerated attribute.
+ */
+class SingleValueEnumAttributeSaver : public AttributeSaver
+{
+private:
+ using EnumIndexCopyVector =
+ SingleValueEnumAttributeBase::EnumIndexCopyVector;
+ EnumIndexCopyVector _indices;
+ EnumAttributeSaver _enumSaver;
+
+ virtual bool onSave(IAttributeSaveTarget &saveTarget) override;
+public:
+ SingleValueEnumAttributeSaver(vespalib::GenerationHandler::Guard &&guard,
+ const IAttributeSaveTarget::Config &cfg,
+ EnumIndexCopyVector &&indices,
+ const EnumStoreBase &enumStore);
+
+ virtual ~SingleValueEnumAttributeSaver();
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.cpp
new file mode 100644
index 00000000000..b08931f36fc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.cpp
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "singlenumericattribute.h"
+#include "singlenumericattribute.hpp"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.singlenumericattribute");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.h b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.h
new file mode 100644
index 00000000000..9cc2a90da32
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.h
@@ -0,0 +1,235 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+#include <vespa/searchlib/attribute/attributeiterators.h>
+#include <vespa/searchlib/common/rcuvector.h>
+#include <vespa/searchlib/query/query.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <limits>
+
+namespace search {
+
+template <typename B>
+class SingleValueNumericAttribute : public B
+{
+private:
+ typedef typename B::BaseType T;
+ typedef typename B::DocId DocId;
+ typedef typename B::EnumHandle EnumHandle;
+ typedef typename B::largeint_t largeint_t;
+ typedef typename B::Weighted Weighted;
+ typedef typename B::WeightedInt WeightedInt;
+ typedef typename B::WeightedFloat WeightedFloat;
+ typedef typename B::WeightedEnum WeightedEnum;
+ typedef typename B::generation_t generation_t;
+ using B::getGenerationHolder;
+
+ typedef attribute::RcuVectorBase<T> DataVector;
+ DataVector _data;
+
+ virtual T getFromEnum(EnumHandle e) const {
+ (void) e;
+ return T();
+ }
+
+ /*
+ * Specialization of SearchContext
+ */
+ template <typename M>
+ class SingleSearchContext : public M, public AttributeVector::SearchContext
+ {
+ private:
+ const T * _data;
+
+ virtual bool
+ onCmp(DocId docId, int32_t & weight) const
+ {
+ return cmp(docId, weight);
+ }
+
+ virtual bool
+ onCmp(DocId docId) const
+ {
+ return cmp(docId);
+ }
+
+ virtual bool valid() const { return M::isValid(); }
+
+ public:
+ SingleSearchContext(QueryTermSimple::UP qTerm, const NumericAttribute & toBeSearched) :
+ M(*qTerm, true),
+ AttributeVector::SearchContext(toBeSearched),
+ _data(&static_cast<const SingleValueNumericAttribute<B> &>(toBeSearched)._data[0])
+ {
+ }
+
+ bool
+ cmp(DocId docId, int32_t & weight) const
+ {
+ const T v = _data[docId];
+ weight = 1;
+ return this->match(v);
+ }
+
+ bool
+ cmp(DocId docId) const
+ {
+ const T v = _data[docId];
+ return this->match(v);
+ }
+
+ virtual Int64Range getAsIntegerTerm() const {
+ return M::getRange();
+ }
+
+ virtual std::unique_ptr<queryeval::SearchIterator>
+ createFilterIterator(fef::TermFieldMatchData * matchData, bool strict)
+ {
+ if (!valid()) {
+ return queryeval::SearchIterator::UP(
+ new queryeval::EmptySearch());
+ }
+ if (getIsFilter()) {
+ return queryeval::SearchIterator::UP
+ (strict
+ ? new FilterAttributeIteratorStrict<SingleSearchContext<M> >(*this, matchData)
+ : new FilterAttributeIteratorT<SingleSearchContext<M> >(*this, matchData));
+ }
+ return queryeval::SearchIterator::UP
+ (strict
+ ? new AttributeIteratorStrict<SingleSearchContext<M> >(*this, matchData)
+ : new AttributeIteratorT<SingleSearchContext<M> >(*this, matchData));
+ }
+ };
+
+
+protected:
+ virtual bool findEnum(T value, EnumHandle & e) const {
+ (void) value; (void) e;
+ return false;
+ }
+
+public:
+ SingleValueNumericAttribute(const vespalib::string & baseFileName,
+ const AttributeVector::Config & c =
+ AttributeVector::Config(AttributeVector::
+ BasicType::fromType(T()),
+ attribute::CollectionType::SINGLE));
+
+
+ virtual
+ ~SingleValueNumericAttribute(void);
+
+ virtual uint32_t getValueCount(DocId doc) const {
+ if (doc >= B::getNumDocs()) {
+ return 0;
+ }
+ return 1;
+ }
+ virtual void onCommit();
+ virtual void onUpdateStat();
+ virtual void removeOldGenerations(generation_t firstUsed);
+ virtual void onGenerationChange(generation_t generation);
+ virtual bool addDoc(DocId & doc) {
+ bool incGen = _data.isFull();
+ _data.push_back(attribute::getUndefined<T>());
+ std::atomic_thread_fence(std::memory_order_release);
+ B::incNumDocs();
+ doc = B::getNumDocs() - 1;
+ this->updateUncommittedDocIdLimit(doc);
+ if (incGen) {
+ this->incGeneration();
+ } else
+ this->removeAllOldGenerations();
+ return true;
+ }
+ virtual bool onLoad();
+
+ bool
+ onLoadEnumerated(typename B::ReaderBase &attrReader);
+
+ AttributeVector::SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override;
+
+ void set(DocId doc, T v) {
+ _data[doc] = v;
+ }
+
+ T getFast(DocId doc) const {
+ return _data[doc];
+ }
+
+ //-------------------------------------------------------------------------
+ // new read api
+ //-------------------------------------------------------------------------
+ virtual T get(DocId doc) const {
+ return getFast(doc);
+ }
+ virtual largeint_t getInt(DocId doc) const {
+ return static_cast<largeint_t>(getFast(doc));
+ }
+ virtual void getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const {
+ (void) v;
+ (void) e;
+ (void) sz;
+ }
+ virtual double getFloat(DocId doc) const {
+ return static_cast<double>(_data[doc]);
+ }
+ virtual uint32_t getEnum(DocId doc) const {
+ (void) doc;
+ return std::numeric_limits<uint32_t>::max(); // does not have enum
+ }
+ virtual uint32_t getAll(DocId doc, T * v, uint32_t sz) const {
+ (void) sz;
+ v[0] = _data[doc];
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, largeint_t * v, uint32_t sz) const {
+ (void) sz;
+ v[0] = static_cast<largeint_t>(_data[doc]);
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, double * v, uint32_t sz) const {
+ (void) sz;
+ v[0] = static_cast<double>(_data[doc]);
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const {
+ (void) sz;
+ e[0] = getEnum(doc);
+ return 1;
+ }
+ virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz) const {
+ (void) doc; (void) v; (void) sz;
+ return 0;
+ }
+ virtual uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const {
+ (void) sz;
+ v[0] = WeightedInt(static_cast<largeint_t>(_data[doc]));
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const {
+ (void) sz;
+ v[0] = WeightedFloat(static_cast<double>(_data[doc]));
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, WeightedEnum * e, uint32_t sz) const {
+ (void) doc; (void) e; (void) sz;
+ return 0;
+ }
+
+ virtual void
+ clearDocs(DocId lidLow, DocId lidLimit);
+
+ virtual void
+ onShrinkLidSpace();
+
+ virtual std::unique_ptr<AttributeSaver> onInitSave() override;
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp
new file mode 100644
index 00000000000..5c04375c31f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp
@@ -0,0 +1,188 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include "singlenumericattributesaver.h"
+
+namespace search {
+
+template <typename B>
+SingleValueNumericAttribute<B>::
+SingleValueNumericAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & c) :
+ B(baseFileName, c),
+ _data(c.getGrowStrategy().getDocsInitialCapacity(),
+ c.getGrowStrategy().getDocsGrowPercent(),
+ c.getGrowStrategy().getDocsGrowDelta(),
+ getGenerationHolder())
+{
+}
+
+
+template <typename B>
+SingleValueNumericAttribute<B>::~SingleValueNumericAttribute(void)
+{
+ getGenerationHolder().clearHoldLists();
+}
+
+
+template <typename B>
+void
+SingleValueNumericAttribute<B>::onCommit()
+{
+ this->checkSetMaxValueCount(1);
+
+ {
+ // apply updates
+ typename B::ValueModifier valueGuard(this->getValueModifier());
+ for (const auto & change : this->_changes) {
+ if (change._type == ChangeBase::UPDATE) {
+ std::atomic_thread_fence(std::memory_order_release);
+ _data[change._doc] = change._data;
+ } else if (change._type >= ChangeBase::ADD && change._type <= ChangeBase::DIV) {
+ std::atomic_thread_fence(std::memory_order_release);
+ _data[change._doc] = this->applyArithmetic(_data[change._doc], change);
+ } else if (change._type == ChangeBase::CLEARDOC) {
+ std::atomic_thread_fence(std::memory_order_release);
+ _data[change._doc] = this->_defaultValue._data;
+ }
+ }
+ }
+
+ std::atomic_thread_fence(std::memory_order_release);
+ this->removeAllOldGenerations();
+
+ this->_changes.clear();
+}
+
+template <typename B>
+void
+SingleValueNumericAttribute<B>::onUpdateStat()
+{
+ MemoryUsage usage = _data.getMemoryUsage();
+ usage.incAllocatedBytesOnHold(getGenerationHolder().getHeldBytes());
+ this->updateStatistics(_data.size(), _data.size(),
+ usage.allocatedBytes(), usage.usedBytes(), usage.deadBytes(), usage.allocatedBytesOnHold());
+}
+
+template <typename B>
+void
+SingleValueNumericAttribute<B>::removeOldGenerations(generation_t firstUsed)
+{
+ getGenerationHolder().trimHoldLists(firstUsed);
+}
+
+template <typename B>
+void
+SingleValueNumericAttribute<B>::onGenerationChange(generation_t generation)
+{
+ getGenerationHolder().transferHoldLists(generation - 1);
+}
+
+template <typename B>
+bool
+SingleValueNumericAttribute<B>::onLoadEnumerated(typename B::ReaderBase &
+ attrReader)
+{
+ uint64_t numValues = attrReader.getEnumCount();
+ uint32_t numDocs = numValues;
+
+ this->setNumDocs(numDocs);
+ this->setCommittedDocIdLimit(numDocs);
+
+ FileUtil::LoadedBuffer::UP udatBuffer(this->loadUDAT());
+ const T *map = reinterpret_cast<const T *>(udatBuffer->buffer());
+ assert((udatBuffer->size() % sizeof(T)) == 0);
+ size_t mapSize = udatBuffer->size() / sizeof(T);
+ attribute::NoSaveLoadedEnum saver;
+ _data.fillMapped(getGenerationHolder(),
+ attrReader,
+ numValues,
+ map,
+ mapSize,
+ saver,
+ numDocs);
+ return true;
+}
+
+
+template <typename B>
+bool
+SingleValueNumericAttribute<B>::onLoad()
+{
+ typename B::template PrimitiveReader<T> attrReader(*this);
+ bool ok(attrReader.getHasLoadData());
+
+ if (!ok)
+ return false;
+
+ this->setCreateSerialNum(attrReader.getCreateSerialNum());
+
+ if (attrReader.getEnumerated())
+ return onLoadEnumerated(attrReader);
+
+ const size_t sz(attrReader.getDataCount());
+ getGenerationHolder().clearHoldLists();
+ _data.reset();
+ _data.unsafe_reserve(sz);
+ for (uint32_t i = 0; i < sz; ++i) {
+ _data.push_back(attrReader.getNextData());
+ }
+
+ B::setNumDocs(sz);
+ B::setCommittedDocIdLimit(sz);
+
+ return true;
+}
+
+template <typename B>
+AttributeVector::SearchContext::UP
+SingleValueNumericAttribute<B>::getSearch(QueryTermSimple::UP qTerm,
+ const AttributeVector::SearchContext::Params & params) const
+{
+ (void) params;
+ QueryTermSimple::RangeResult<T> res = qTerm->getRange<T>();
+ if (res.isEqual()) {
+ return AttributeVector::SearchContext::UP(new SingleSearchContext< NumericAttribute::Equal<T> >(std::move(qTerm), *this));
+ } else {
+ return AttributeVector::SearchContext::UP(new SingleSearchContext< NumericAttribute::Range<T> >(std::move(qTerm), *this));
+ }
+}
+
+
+template <typename B>
+void
+SingleValueNumericAttribute<B>::clearDocs(DocId lidLow, DocId lidLimit)
+{
+ assert(lidLow <= lidLimit);
+ assert(lidLimit <= this->getNumDocs());
+ for (DocId lid = lidLow; lid < lidLimit; ++lid) {
+ if (!attribute::isUndefined(_data[lid])) {
+ this->clearDoc(lid);
+ }
+ }
+}
+
+template <typename B>
+void
+SingleValueNumericAttribute<B>::onShrinkLidSpace()
+{
+ uint32_t committedDocIdLimit = this->getCommittedDocIdLimit();
+ assert(_data.size() >= committedDocIdLimit);
+ _data.shrink(committedDocIdLimit);
+ this->setNumDocs(committedDocIdLimit);
+}
+
+template <typename B>
+std::unique_ptr<AttributeSaver>
+SingleValueNumericAttribute<B>::onInitSave()
+{
+ const uint32_t numDocs(this->getCommittedDocIdLimit());
+ assert(numDocs <= _data.size());
+ return std::make_unique<SingleValueNumericAttributeSaver>
+ (this->createSaveTargetConfig(), &_data[0], numDocs * sizeof(T));
+}
+
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp
new file mode 100644
index 00000000000..3320dd977d2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "singlenumericattributesaver.h"
+
+using vespalib::GenerationHandler;
+using search::IAttributeSaveTarget;
+
+namespace search {
+
+namespace
+{
+
+const uint32_t MIN_ALIGNMENT = 4096;
+
+}
+
+
+SingleValueNumericAttributeSaver::
+SingleValueNumericAttributeSaver(const IAttributeSaveTarget::Config &cfg,
+ const void *data, size_t size)
+ : AttributeSaver(vespalib::GenerationHandler::Guard(), cfg),
+ _buf()
+{
+ _buf = std::make_unique<BufferBuf>(size, MIN_ALIGNMENT);
+ assert(_buf->getFreeLen() >= size);
+ if (size > 0) {
+ memcpy(_buf->getFree(), data, size);
+ _buf->moveFreeToData(size);
+ }
+ assert(_buf->getDataLen() == size);
+}
+
+
+SingleValueNumericAttributeSaver::~SingleValueNumericAttributeSaver()
+{
+}
+
+
+bool
+SingleValueNumericAttributeSaver::onSave(IAttributeSaveTarget &saveTarget)
+{
+ saveTarget.datWriter().writeBuf(std::move(_buf));
+ return true;
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.h b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.h
new file mode 100644
index 00000000000..585e5c49dab
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.h
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attributesaver.h"
+#include "iattributefilewriter.h"
+
+namespace search {
+
+/*
+ * Class for saving a plain attribute (i.e. single value numeric
+ * atttribute).
+ */
+class SingleValueNumericAttributeSaver : public AttributeSaver
+{
+public:
+ using Buffer = IAttributeFileWriter::Buffer;
+
+private:
+ Buffer _buf;
+ using BufferBuf = IAttributeFileWriter::BufferBuf;
+
+ virtual bool onSave(IAttributeSaveTarget &saveTarget) override;
+public:
+ SingleValueNumericAttributeSaver(const IAttributeSaveTarget::Config &cfg,
+ const void *data, size_t size);
+
+ virtual ~SingleValueNumericAttributeSaver();
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.cpp
new file mode 100644
index 00000000000..df86159833d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.cpp
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "singlenumericenumattribute.h"
+#include "singlenumericenumattribute.hpp"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.singlenumericenumattribute");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.h b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.h
new file mode 100644
index 00000000000..3793431f75b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.h
@@ -0,0 +1,191 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/singleenumattribute.h>
+#include <vespa/searchlib/attribute/numericbase.h>
+#include <vespa/searchlib/attribute/attributeiterators.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+
+namespace search {
+
+/*
+ * Implementation of single value numeric enum attribute that uses an underlying enum store
+ * to store unique numeric values.
+ *
+ * B: EnumAttribute<NumericBaseClass>
+ */
+template <typename B>
+class SingleValueNumericEnumAttribute : public SingleValueEnumAttribute<B>
+{
+protected:
+ typedef typename B::BaseClass::BaseType T;
+ typedef typename B::BaseClass::Change Change;
+ typedef typename B::BaseClass::DocId DocId;
+ typedef typename B::BaseClass::EnumHandle EnumHandle;
+ typedef typename B::BaseClass::largeint_t largeint_t;
+ typedef typename B::BaseClass::Weighted Weighted;
+ typedef typename B::BaseClass::WeightedInt WeightedInt;
+ typedef typename B::BaseClass::WeightedFloat WeightedFloat;
+ typedef typename B::BaseClass::generation_t generation_t;
+ typedef typename B::BaseClass::LoadedNumericValueT LoadedNumericValueT;
+ typedef typename B::BaseClass::LoadedVector LoadedVector;
+ typedef SequentialReadModifyWriteVector<LoadedNumericValueT, vespalib::DefaultAlloc> LoadedVectorR;
+
+ typedef typename SingleValueEnumAttribute<B>::EnumStore EnumStore;
+ typedef typename SingleValueEnumAttributeBase::EnumIndex EnumIndex;
+ typedef typename SingleValueEnumAttribute<B>::UniqueSet UniqueSet;
+ typedef EnumStoreBase::IndexVector EnumIndexVector;
+ typedef EnumStoreBase::EnumVector EnumVector;
+ typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector;
+ typedef attribute::LoadedEnumAttribute LoadedEnumAttribute;
+
+private:
+ // used to make sure several arithmetic operations on the same document in a single commit works
+ std::map<DocId, T> _currDocValues;
+
+protected:
+
+ // from SingleValueEnumAttribute
+ virtual void considerUpdateAttributeChange(const Change & c);
+ virtual void considerArithmeticAttributeChange(const Change & c, UniqueSet & newUniques);
+ virtual void applyArithmeticValueChange(const Change & c, EnumStoreBase::IndexVector & unused);
+
+ /*
+ * Specialization of SearchContext
+ */
+ class SingleSearchContext : public NumericAttribute::Range<T>, public AttributeVector::SearchContext
+ {
+ protected:
+ const SingleValueNumericEnumAttribute<B> & _toBeSearched;
+
+ virtual bool
+ onCmp(DocId docId, int32_t & weight) const
+ {
+ return cmp(docId, weight);
+ }
+
+ virtual bool
+ onCmp(DocId docId) const
+ {
+ return cmp(docId);
+ }
+ virtual bool valid() const { return this->isValid(); }
+
+ public:
+ SingleSearchContext(QueryTermSimple::UP qTerm, const NumericAttribute & toBeSearched) :
+ NumericAttribute::Range<T>(*qTerm, true),
+ AttributeVector::SearchContext(toBeSearched),
+ _toBeSearched(static_cast<const SingleValueNumericEnumAttribute<B> &>(toBeSearched))
+ {
+ }
+
+ virtual Int64Range getAsIntegerTerm() const {
+ return this->getRange();
+ }
+
+ bool
+ cmp(DocId docId, int32_t & weight) const
+ {
+ T v = _toBeSearched._enumStore.getValue(
+ _toBeSearched.getEnumIndex(docId));
+ weight = 1;
+ return this->match(v);
+ }
+
+ bool
+ cmp(DocId docId) const
+ {
+ T v = _toBeSearched._enumStore.getValue(
+ _toBeSearched.getEnumIndex(docId));
+ return this->match(v);
+ }
+
+ virtual std::unique_ptr<queryeval::SearchIterator>
+ createFilterIterator(fef::TermFieldMatchData * matchData, bool strict)
+ {
+ if (!valid()) {
+ return queryeval::SearchIterator::UP(
+ new queryeval::EmptySearch());
+ }
+ if (getIsFilter()) {
+ return queryeval::SearchIterator::UP
+ (strict
+ ? new FilterAttributeIteratorStrict<SingleSearchContext>(*this, matchData)
+ : new FilterAttributeIteratorT<SingleSearchContext>(*this, matchData));
+ }
+ return queryeval::SearchIterator::UP
+ (strict
+ ? new AttributeIteratorStrict<SingleSearchContext>(*this, matchData)
+ : new AttributeIteratorT<SingleSearchContext>(*this, matchData));
+ }
+ };
+
+
+public:
+ SingleValueNumericEnumAttribute(const vespalib::string & baseFileName,
+ const AttributeVector::Config & c =
+ AttributeVector::Config(AttributeVector::BasicType::fromType(T()),
+ attribute::CollectionType::SINGLE));
+
+ virtual void onCommit();
+ virtual bool onLoad();
+
+ bool
+ onLoadEnumerated(typename B::ReaderBase &attrReader);
+
+ AttributeVector::SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override;
+
+ //-------------------------------------------------------------------------
+ // Attribute read API
+ //-------------------------------------------------------------------------
+ virtual T get(DocId doc) const {
+ return this->_enumStore.getValue(this->_enumIndices[doc]);
+ }
+ virtual largeint_t getInt(DocId doc) const {
+ return static_cast<largeint_t>(get(doc));
+ }
+ virtual double getFloat(DocId doc) const {
+ return static_cast<double>(get(doc));
+ }
+ virtual uint32_t getAll(DocId doc, T * v, uint32_t sz) const {
+ if (sz > 0) {
+ v[0] = get(doc);
+ }
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, largeint_t * v, uint32_t sz) const {
+ if (sz > 0) {
+ v[0] = getInt(doc);
+ }
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, double * v, uint32_t sz) const {
+ if (sz > 0) {
+ v[0] = getFloat(doc);
+ }
+ return 1;
+ }
+ virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz) const {
+ if (sz > 0) {
+ v[0] = Weighted(get(doc));
+ }
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const {
+ if (sz > 0) {
+ v[0] = WeightedInt(getInt(doc));
+ }
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const {
+ if (sz > 0) {
+ v[0] = WeightedFloat(getFloat(doc));
+ }
+ return 1;
+ }
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp
new file mode 100644
index 00000000000..f4447e7c6b7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp
@@ -0,0 +1,172 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/singlenumericenumattribute.h>
+#include <vespa/searchlib/common/sort.h>
+#include <vespa/searchlib/attribute/singleenumattribute.hpp>
+#include <vespa/searchlib/attribute/loadednumericvalue.h>
+
+namespace search {
+
+template <typename B>
+void
+SingleValueNumericEnumAttribute<B>::considerUpdateAttributeChange(const Change & c)
+{
+ _currDocValues[c._doc] = c._data.get();
+}
+
+template <typename B>
+void
+SingleValueNumericEnumAttribute<B>::considerArithmeticAttributeChange(const Change & c, UniqueSet & newUniques)
+{
+ T oldValue;
+ typename std::map<DocId, T>::const_iterator iter = _currDocValues.find(c._doc);
+ if (iter != _currDocValues.end()) {
+ oldValue = iter->second;
+ } else {
+ oldValue = get(c._doc);
+ }
+
+ T newValue = this->applyArithmetic(oldValue, c);
+
+ EnumIndex idx;
+ if (!this->_enumStore.findIndex(newValue, idx)) {
+ newUniques.insert(newValue);
+ }
+
+ _currDocValues[c._doc] = newValue;
+}
+
+template <typename B>
+void
+SingleValueNumericEnumAttribute<B>::applyArithmeticValueChange(const Change & c, EnumStoreBase::IndexVector & unused)
+{
+ EnumIndex oldIdx = this->_enumIndices[c._doc];
+ EnumIndex newIdx;
+ T newValue = this->applyArithmetic(get(c._doc), c);
+ this->_enumStore.findIndex(newValue, newIdx);
+
+ this->updateEnumRefCounts(c, newIdx, oldIdx, unused);
+}
+
+template <typename B>
+SingleValueNumericEnumAttribute<B>::
+SingleValueNumericEnumAttribute(const vespalib::string & baseFileName,
+ const AttributeVector::Config & c)
+ : SingleValueEnumAttribute<B>(baseFileName, c),
+ _currDocValues()
+{
+}
+
+
+template <typename B>
+void
+SingleValueNumericEnumAttribute<B>::onCommit()
+{
+ SingleValueEnumAttribute<B>::onCommit();
+ _currDocValues.clear();
+}
+
+
+template <typename B>
+bool
+SingleValueNumericEnumAttribute<B>::onLoadEnumerated(typename B::ReaderBase &
+ attrReader)
+{
+ FileUtil::LoadedBuffer::UP udatBuffer(this->loadUDAT());
+
+ uint64_t numValues = attrReader.getEnumCount();
+ uint32_t numDocs = numValues;
+
+ EnumIndexVector eidxs;
+ this->fillEnum0(udatBuffer->buffer(), udatBuffer->size(), eidxs);
+ this->setNumDocs(numDocs);
+ this->setCommittedDocIdLimit(numDocs);
+ LoadedEnumAttributeVector loaded;
+ EnumVector enumHist;
+ if (this->hasPostings()) {
+ loaded.reserve(numValues);
+ this->fillEnumIdx(attrReader,
+ numValues,
+ eidxs,
+ loaded);
+ } else {
+ EnumVector(eidxs.size(), 0).swap(enumHist);
+ this->fillEnumIdx(attrReader,
+ numValues,
+ eidxs,
+ enumHist);
+ }
+ EnumIndexVector().swap(eidxs);
+ if (this->hasPostings()) {
+ if (numDocs > 0) {
+ this->onAddDoc(numDocs - 1);
+ }
+ attribute::sortLoadedByEnum(loaded);
+ this->fillPostingsFixupEnum(loaded);
+ } else {
+ this->fixupEnumRefCounts(enumHist);
+ }
+ return true;
+}
+
+
+template <typename B>
+bool
+SingleValueNumericEnumAttribute<B>::onLoad()
+{
+ typename B::template PrimitiveReader<T> attrReader(*this);
+ bool ok(attrReader.getHasLoadData());
+
+ if (!ok)
+ return false;
+
+ this->setCreateSerialNum(attrReader.getCreateSerialNum());
+
+ if (attrReader.getEnumerated())
+ return onLoadEnumerated(attrReader);
+
+ const uint32_t numDocs(attrReader.getDataCount());
+ LoadedVectorR loaded(numDocs);
+
+ this->setNumDocs(numDocs);
+ this->setCommittedDocIdLimit(numDocs);
+ if (numDocs > 0) {
+ this->onAddDoc(numDocs - 1);
+ }
+ for (uint32_t docIdx = 0; docIdx < numDocs; ++docIdx) {
+ loaded[docIdx]._docId = docIdx;
+ loaded[docIdx]._idx = 0;
+ loaded[docIdx].setValue(attrReader.getNextData());
+ }
+
+ attribute::sortLoadedByValue(loaded);
+ this->fillPostings(loaded);
+ loaded.rewind();
+ this->fillEnum(loaded);
+ attribute::sortLoadedByDocId(loaded);
+ loaded.rewind();
+ this->fillValues(loaded);
+
+ return true;
+}
+
+
+template <typename B>
+AttributeVector::SearchContext::UP
+SingleValueNumericEnumAttribute<B>::getSearch(QueryTermSimple::UP qTerm,
+ const AttributeVector::SearchContext::Params & params) const
+{
+ (void) params;
+ QueryTermSimple::RangeResult<T> res = qTerm->getRange<T>();
+ if (res.isEqual()) {
+ return AttributeVector::SearchContext::UP (new SingleSearchContext(std::move(qTerm), *this));
+ } else {
+ return AttributeVector::SearchContext::UP (new SingleSearchContext(std::move(qTerm), *this));
+ }
+}
+
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.cpp
new file mode 100644
index 00000000000..3eb6f61101d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.cpp
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "singlenumericpostattribute.h"
+#include "singlenumericpostattribute.hpp"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.singlenumericpostattribute");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h
new file mode 100644
index 00000000000..55072b62d5a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h
@@ -0,0 +1,121 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/singlenumericenumattribute.h>
+#include <vespa/searchlib/attribute/postinglistattribute.h>
+#include "postinglistsearchcontext.h"
+
+namespace search {
+
+/*
+ * Implementation of single value numeric attribute that in addition to enum store
+ * uses an underlying posting list to provide faster search.
+ *
+ * B: EnumAttribute<BaseClass>
+ */
+template <typename B>
+class SingleValueNumericPostingAttribute
+ : public SingleValueNumericEnumAttribute<B>,
+ protected PostingListAttributeSubBase<AttributePosting,
+ typename B::LoadedVector,
+ typename B::LoadedValueType,
+ typename B::EnumStore>
+{
+private:
+ friend class PostingListAttributeTest;
+ template <typename, typename, typename>
+ friend class attribute::PostingSearchContext; // getEnumStore()
+ typedef SingleValueNumericPostingAttribute<B> SelfType;
+ typedef typename B::LoadedVector LoadedVector;
+ typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector;
+ typedef PostingListAttributeSubBase<AttributePosting,
+ LoadedVector,
+ typename B::LoadedValueType,
+ typename B::EnumStore> PostingParent;
+public:
+ typedef typename SingleValueNumericEnumAttribute<B>::EnumStore EnumStore;
+private:
+ typedef typename SingleValueEnumAttributeBase::EnumIndex EnumIndex;
+ typedef typename SingleValueNumericEnumAttribute<B>::generation_t generation_t;
+public:
+ typedef typename SingleValueNumericEnumAttribute<B>::T T;
+private:
+
+ typedef typename SingleValueNumericEnumAttribute<B>::SingleSearchContext SingleSearchContext;
+ typedef SingleSearchContext SingleNumericSearchContext;
+ typedef attribute::NumericPostingSearchContext<SingleNumericSearchContext,
+ SelfType,
+ btree::BTreeNoLeafData>
+ SinglePostingSearchContext;
+
+ typedef typename PostingParent::PostingMap PostingMap;
+ typedef typename B::BaseClass::Change Change;
+ typedef typename B::BaseClass::ChangeVector ChangeVector;
+ typedef typename B::BaseClass::ChangeVector::const_iterator ChangeVectorIterator;
+ typedef typename B::BaseClass::DocId DocId;
+ typedef typename B::BaseClass::ValueModifier ValueModifier;
+
+public:
+ typedef EnumPostingTree Dictionary;
+private:
+ typedef typename Dictionary::Iterator DictionaryIterator;
+ typedef typename Dictionary::ConstIterator DictionaryConstIterator;
+ typedef typename EnumStore::ComparatorType ComparatorType;
+ using PostingParent::_postingList;
+ using PostingParent::clearAllPostings;
+ using PostingParent::handleFillPostings;
+ using PostingParent::fillPostingsFixupEnumBase;
+ using PostingParent::forwardedOnAddDoc;
+
+ virtual void freezeEnumDictionary();
+ virtual void mergeMemoryStats(MemoryUsage & total);
+ void applyUpdateValueChange(const Change & c,
+ EnumStore & enumStore,
+ std::map<DocId, EnumIndex> & currEnumIndices);
+ void
+ makePostingChange(const EnumStoreComparator *cmp,
+ const std::map<DocId, EnumIndex> &currEnumIndices,
+ PostingMap &changePost);
+
+ virtual void applyValueChanges(EnumStoreBase::IndexVector & unused);
+
+public:
+ SingleValueNumericPostingAttribute(const vespalib::string & name, const AttributeVector::Config & cfg);
+ virtual ~SingleValueNumericPostingAttribute();
+
+ virtual void removeOldGenerations(generation_t firstUsed);
+ virtual void onGenerationChange(generation_t generation);
+
+ AttributeVector::SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override;
+
+ virtual bool
+ onAddDoc(DocId doc)
+ {
+ return forwardedOnAddDoc(doc,
+ this->_enumIndices.size(),
+ this->_enumIndices.capacity());
+ }
+
+ virtual void
+ fillPostings(LoadedVector & loaded)
+ {
+ handleFillPostings(loaded);
+ }
+
+ virtual attribute::IPostingListAttributeBase *
+ getIPostingListAttributeBase(void)
+ {
+ return this;
+ }
+
+ virtual void
+ fillPostingsFixupEnum(const LoadedEnumAttributeVector &loaded)
+ {
+ fillPostingsFixupEnumBase(loaded);
+ }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
new file mode 100644
index 00000000000..ebfdbe9b066
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
@@ -0,0 +1,153 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/singlenumericpostattribute.h>
+#include <vespa/searchlib/attribute/enumstore.h>
+#include <vespa/searchlib/attribute/enumcomparator.h>
+#include <vespa/searchlib/attribute/singlenumericenumattribute.hpp>
+
+namespace search {
+
+template <typename B>
+SingleValueNumericPostingAttribute<B>::~SingleValueNumericPostingAttribute()
+{
+ this->disableFreeLists();
+ this->disableElemHoldList();
+ clearAllPostings();
+}
+
+template <typename B>
+SingleValueNumericPostingAttribute<B>::SingleValueNumericPostingAttribute(const vespalib::string & name,
+ const AttributeVector::Config & c) :
+ SingleValueNumericEnumAttribute<B>(name, c),
+ PostingParent(*this, this->getEnumStore())
+{
+}
+
+template <typename B>
+void
+SingleValueNumericPostingAttribute<B>::freezeEnumDictionary()
+{
+ this->getEnumStore().freezeTree();
+}
+
+template <typename B>
+void
+SingleValueNumericPostingAttribute<B>::mergeMemoryStats(MemoryUsage & total)
+{
+ total.merge(this->_postingList.getMemoryUsage());
+}
+
+template <typename B>
+void
+SingleValueNumericPostingAttribute<B>::applyUpdateValueChange(const Change & c,
+ EnumStore & enumStore,
+ std::map<DocId, EnumIndex> & currEnumIndices)
+{
+ EnumIndex newIdx;
+ enumStore.findIndex(c._data.raw(), newIdx);
+ currEnumIndices[c._doc] = newIdx;
+}
+
+template <typename B>
+void
+SingleValueNumericPostingAttribute<B>::
+makePostingChange(const EnumStoreComparator *cmpa,
+ const std::map<DocId, EnumIndex> &currEnumIndices,
+ PostingMap &changePost)
+{
+ typedef typename std::map<DocId, EnumIndex>::const_iterator EnumIter;
+ for (EnumIter iter = currEnumIndices.begin(), end = currEnumIndices.end();
+ iter != end; ++iter) {
+ uint32_t docId = iter->first;
+ EnumIndex oldIdx = this->_enumIndices[docId];
+ EnumIndex newIdx = iter->second;
+
+ // add new posting
+ changePost[EnumPostingPair(newIdx, cmpa)].add(docId, 1);
+
+ // remove old posting
+ if ( oldIdx.valid()) {
+ changePost[EnumPostingPair(oldIdx, cmpa)].remove(docId);
+ }
+ }
+}
+
+
+template <typename B>
+void
+SingleValueNumericPostingAttribute<B>::applyValueChanges(EnumStoreBase::IndexVector & unused)
+{
+ EnumStore & enumStore = this->getEnumStore();
+ Dictionary & dict = enumStore.getPostingDictionary();
+ ComparatorType cmpa(enumStore);
+ PostingMap changePost;
+
+ // used to make sure several arithmetic operations on the same document in a single commit works
+ std::map<DocId, EnumIndex> currEnumIndices;
+
+ for (ChangeVectorIterator iter = this->_changes.begin(), end = this->_changes.end(); iter != end; ++iter) {
+ typename std::map<DocId, EnumIndex>::const_iterator enumIter = currEnumIndices.find(iter->_doc);
+ EnumIndex oldIdx;
+ if (enumIter != currEnumIndices.end()) {
+ oldIdx = enumIter->second;
+ } else {
+ oldIdx = this->_enumIndices[iter->_doc];
+ }
+
+ if (iter->_type == ChangeBase::UPDATE) {
+ applyUpdateValueChange(*iter, enumStore,
+ currEnumIndices);
+ } else if (iter->_type >= ChangeBase::ADD && iter->_type <= ChangeBase::DIV) {
+ if (oldIdx.valid()) {
+ T oldValue = enumStore.getValue(oldIdx);
+ T newValue = this->applyArithmetic(oldValue, *iter);
+
+ DictionaryIterator addItr = dict.find(EnumIndex(), ComparatorType(enumStore, newValue));
+ EnumIndex newIdx = addItr.getKey();
+ currEnumIndices[iter->_doc] = newIdx;
+ }
+ } else if(iter->_type == ChangeBase::CLEARDOC) {
+ this->_defaultValue._doc = iter->_doc;
+ applyUpdateValueChange(this->_defaultValue, enumStore,
+ currEnumIndices);
+ }
+ }
+
+ makePostingChange(&cmpa, currEnumIndices, changePost);
+
+ this->updatePostings(changePost);
+ SingleValueNumericEnumAttribute<B>::applyValueChanges(unused);
+}
+
+template <typename B>
+void
+SingleValueNumericPostingAttribute<B>::removeOldGenerations(generation_t firstUsed)
+{
+ SingleValueNumericEnumAttribute<B>::removeOldGenerations(firstUsed);
+ _postingList.trimHoldLists(firstUsed);
+}
+
+template <typename B>
+void
+SingleValueNumericPostingAttribute<B>::onGenerationChange(generation_t generation)
+{
+ _postingList.freeze();
+ SingleValueNumericEnumAttribute<B>::onGenerationChange(generation);
+ _postingList.transferHoldLists(generation - 1);
+}
+
+template <typename B>
+AttributeVector::SearchContext::UP
+SingleValueNumericPostingAttribute<B>::getSearch(QueryTermSimple::UP qTerm,
+ const AttributeVector::SearchContext::Params & params) const
+{
+ return std::unique_ptr<AttributeVector::SearchContext>
+ (new SinglePostingSearchContext(std::move(qTerm),
+ params,
+ *this));
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp
new file mode 100644
index 00000000000..a855adfdbf9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp
@@ -0,0 +1,242 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "singlesmallnumericattribute.h"
+#include <vespa/searchlib/attribute/attributevector.hpp>
+
+namespace search
+{
+
+SingleValueSmallNumericAttribute::
+SingleValueSmallNumericAttribute(const vespalib::string & baseFileName,
+ const Config & c,
+ Word valueMask,
+ uint32_t valueShiftShift,
+ uint32_t valueShiftMask,
+ uint32_t wordShift)
+ : B(baseFileName, c, c.basicType()),
+ _valueMask(valueMask),
+ _valueShiftShift(valueShiftShift),
+ _valueShiftMask(valueShiftMask),
+ _wordShift(wordShift),
+ _wordData(c.getGrowStrategy().getDocsInitialCapacity(),
+ c.getGrowStrategy().getDocsGrowPercent(),
+ c.getGrowStrategy().getDocsGrowDelta(),
+ getGenerationHolder())
+{
+ assert(_valueMask + 1 == (1u << (1u << valueShiftShift)));
+ assert((_valueShiftMask + 1) * (1u << valueShiftShift) ==
+ 8 * sizeof(Word));
+ assert(_valueShiftMask + 1 == (1u << wordShift));
+}
+
+
+SingleValueSmallNumericAttribute::~SingleValueSmallNumericAttribute(void)
+{
+ getGenerationHolder().clearHoldLists();
+}
+
+
+void
+SingleValueSmallNumericAttribute::onCommit()
+{
+ checkSetMaxValueCount(1);
+
+ {
+ // apply updates
+ B::ValueModifier valueGuard(getValueModifier());
+ for (const auto & change : _changes) {
+ if (change._type == ChangeBase::UPDATE) {
+ std::atomic_thread_fence(std::memory_order_release);
+ set(change._doc, change._data);
+ } else if (change._type >= ChangeBase::ADD &&
+ change._type <= ChangeBase::DIV) {
+ std::atomic_thread_fence(std::memory_order_release);
+ set(change._doc, applyArithmetic(getFast(change._doc), change));
+ } else if (change._type == ChangeBase::CLEARDOC) {
+ std::atomic_thread_fence(std::memory_order_release);
+ set(change._doc, 0u);
+ }
+ }
+ }
+
+ std::atomic_thread_fence(std::memory_order_release);
+ removeAllOldGenerations();
+
+ _changes.clear();
+}
+
+
+void
+SingleValueSmallNumericAttribute::onUpdateStat()
+{
+ MemoryUsage usage = _wordData.getMemoryUsage();
+ usage.incAllocatedBytesOnHold(getGenerationHolder().getHeldBytes());
+ uint32_t numDocs = B::getNumDocs();
+ updateStatistics(numDocs, numDocs,
+ usage.allocatedBytes(), usage.usedBytes(),
+ usage.deadBytes(), usage.allocatedBytesOnHold());
+}
+
+
+void
+SingleValueSmallNumericAttribute::removeOldGenerations(generation_t firstUsed)
+{
+ getGenerationHolder().trimHoldLists(firstUsed);
+}
+
+
+void
+SingleValueSmallNumericAttribute::onGenerationChange(generation_t generation)
+{
+ getGenerationHolder().transferHoldLists(generation - 1);
+}
+
+
+bool
+SingleValueSmallNumericAttribute::onLoad()
+{
+ B::PrimitiveReader<Word> attrReader(*this);
+ bool ok(attrReader.hasData());
+ if (ok) {
+ setCreateSerialNum(attrReader.getCreateSerialNum());
+ const size_t sz(attrReader.getDataCount());
+ getGenerationHolder().clearHoldLists();
+ _wordData.reset();
+ _wordData.unsafe_reserve(sz - 1);
+ Word numDocs = attrReader.getNextData();
+ for (uint32_t i = 1; i < sz; ++i) {
+ _wordData.push_back(attrReader.getNextData());
+ }
+ assert(((numDocs + _valueShiftMask) >> _wordShift) + 1 == sz);
+ B::setNumDocs(numDocs);
+ B::setCommittedDocIdLimit(numDocs);
+ }
+
+ return ok;
+}
+
+
+void
+SingleValueSmallNumericAttribute::onSave(IAttributeSaveTarget &saveTarget)
+{
+ assert(!saveTarget.getEnumerated());
+ const size_t numDocs(getCommittedDocIdLimit());
+ const size_t numDataWords((numDocs + _valueShiftMask) >> _wordShift);
+ const size_t sz((numDataWords + 1) * sizeof(Word));
+ IAttributeSaveTarget::Buffer buf(saveTarget.datWriter().allocBuf(sz));
+
+ char *p = buf->getFree();
+ const char *e = p + sz;
+ Word numDocs2 = numDocs;
+ memcpy(p, &numDocs2, sizeof(Word));
+ p += sizeof(Word);
+ memcpy(p, &_wordData[0], numDataWords * sizeof(Word));
+ p += numDataWords * sizeof(Word);
+ assert(p == e);
+ (void) e;
+ buf->moveFreeToData(sz);
+ saveTarget.datWriter().writeBuf(std::move(buf));
+ assert(numDocs == getCommittedDocIdLimit());
+}
+
+
+AttributeVector::SearchContext::UP
+SingleValueSmallNumericAttribute::getSearch(QueryTermSimple::UP qTerm,
+ const SearchContext::Params & params) const
+{
+ (void) params;
+ return SearchContext::UP(new SingleSearchContext(std::move(qTerm), *this));
+}
+
+
+void
+SingleValueSmallNumericAttribute::clearDocs(DocId lidLow, DocId lidLimit)
+{
+ assert(lidLow <= lidLimit);
+ assert(lidLimit <= getNumDocs());
+ for (DocId lid = lidLow; lid < lidLimit; ++lid) {
+ if (getFast(lid) != 0) {
+ clearDoc(lid);
+ }
+ }
+}
+
+
+void
+SingleValueSmallNumericAttribute::onShrinkLidSpace()
+{
+ uint32_t committedDocIdLimit = getCommittedDocIdLimit();
+ assert(committedDocIdLimit < getNumDocs());
+ const size_t numDocs(committedDocIdLimit);
+ const size_t numDataWords((numDocs + _valueShiftMask) >> _wordShift);
+ _wordData.shrink(numDataWords);
+ setNumDocs(committedDocIdLimit);
+}
+
+
+uint64_t
+SingleValueSmallNumericAttribute::getEstimatedSaveByteSize() const
+{
+ uint64_t headerSize = 4096;
+ const size_t numDocs(getCommittedDocIdLimit());
+ const size_t numDataWords((numDocs + _valueShiftMask) >> _wordShift);
+ const size_t sz((numDataWords + 1) * sizeof(Word));
+ return headerSize + sz;
+}
+
+
+namespace
+{
+
+template <typename TT>
+uint32_t
+log2bits(void);
+
+template <>
+uint32_t
+log2bits<uint32_t>(void)
+{
+ return 0x05u;
+}
+
+}
+
+
+SingleValueBitNumericAttribute::
+SingleValueBitNumericAttribute(const vespalib::string &baseFileName)
+ : SingleValueSmallNumericAttribute(baseFileName,
+ Config(BasicType::UINT1, CollectionType::SINGLE),
+ 0x01u /* valueMask */,
+ 0x00u /* valueShiftShift */,
+ 8 * sizeof(Word) - 1 /* valueShiftMask */,
+ log2bits<Word>() /* wordShift */)
+{
+}
+
+
+SingleValueSemiNibbleNumericAttribute::
+SingleValueSemiNibbleNumericAttribute(const vespalib::string &baseFileName)
+ : SingleValueSmallNumericAttribute(baseFileName,
+ Config(BasicType::UINT2, CollectionType::SINGLE),
+ 0x03u /* valueMask */,
+ 0x01u /* valueShiftShift */,
+ 4 * sizeof(Word) - 1 /* valueShiftMask */,
+ log2bits<Word>() - 1/* wordShift */)
+{
+}
+
+
+SingleValueNibbleNumericAttribute::
+SingleValueNibbleNumericAttribute(const vespalib::string &baseFileName)
+ : SingleValueSmallNumericAttribute(baseFileName,
+ Config(BasicType::UINT1, CollectionType::SINGLE),
+ 0x0fu /* valueMask */,
+ 0x02u /* valueShiftShift */,
+ 2 * sizeof(Word) - 1 /* valueShiftMask */,
+ log2bits<Word>() - 2/* wordShift */)
+{
+}
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h
new file mode 100644
index 00000000000..548f612a6f6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h
@@ -0,0 +1,313 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+#include <vespa/searchlib/attribute/attributeiterators.h>
+#include <vespa/searchlib/common/rcuvector.h>
+#include <vespa/searchlib/query/query.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <limits>
+#include <string>
+
+namespace search {
+
+class SingleValueSmallNumericAttribute :
+ public IntegerAttributeTemplate<int8_t>
+{
+private:
+// friend class AttributeVector::SearchContext;
+ typedef IntegerAttributeTemplate<int8_t> B;
+ typedef B::BaseType T;
+ typedef B::DocId DocId;
+ typedef B::EnumHandle EnumHandle;
+ typedef B::largeint_t largeint_t;
+ typedef B::Weighted Weighted;
+ typedef B::WeightedInt WeightedInt;
+ typedef B::WeightedFloat WeightedFloat;
+ typedef B::WeightedEnum WeightedEnum;
+ typedef B::generation_t generation_t;
+
+protected:
+ typedef uint32_t Word; // Large enough to contain numDocs.
+private:
+ Word _valueMask; // 0x01, 0x03 or 0x0f
+ uint32_t _valueShiftShift; // 0x00, 0x01 or 0x02
+ uint32_t _valueShiftMask; // 0x1f, 0x0f or 0x07
+ uint32_t _wordShift; // 0x05, 0x04 or 0x03
+
+ typedef search::attribute::RcuVectorBase<Word> DataVector;
+ DataVector _wordData;
+
+ virtual T getFromEnum(EnumHandle e) const {
+ (void) e;
+ return T();
+ }
+
+protected:
+ virtual bool
+ findEnum(T value, EnumHandle & e) const
+ {
+ (void) value; (void) e;
+ return false;
+ }
+
+ void
+ set(DocId doc, T v)
+ {
+ Word &word = _wordData[doc >> _wordShift];
+ uint32_t valueShift = (doc & _valueShiftMask) << _valueShiftShift;
+ word = (word & ~(_valueMask << valueShift)) |
+ ((v & _valueMask) << valueShift);
+ }
+
+
+public:
+ /*
+ * Specialization of SearchContext
+ */
+ class SingleSearchContext : public NumericAttribute::Range<T>, public SearchContext
+ {
+ private:
+ const Word *_wordData;
+ Word _valueMask;
+ uint32_t _valueShiftShift;
+ uint32_t _valueShiftMask;
+ uint32_t _wordShift;
+
+ virtual bool
+ onCmp(DocId docId, int32_t & weight) const
+ {
+ return cmp(docId, weight);
+ }
+
+ virtual bool
+ onCmp(DocId docId) const
+ {
+ return cmp(docId);
+ }
+
+ virtual bool valid() const { return this->isValid(); }
+
+ public:
+ SingleSearchContext(QueryTermSimple::UP qTerm, const NumericAttribute & toBeSearched)
+ : NumericAttribute::Range<T>(*qTerm),
+ SearchContext(toBeSearched),
+ _wordData(&static_cast<const SingleValueSmallNumericAttribute &>
+ (toBeSearched)._wordData[0]),
+ _valueMask(static_cast<const SingleValueSmallNumericAttribute &>
+ (toBeSearched)._valueMask),
+ _valueShiftShift(
+ static_cast<const SingleValueSmallNumericAttribute &>
+ (toBeSearched)._valueShiftShift),
+ _valueShiftMask(
+ static_cast<const SingleValueSmallNumericAttribute &>
+ (toBeSearched)._valueShiftMask),
+ _wordShift(static_cast<const SingleValueSmallNumericAttribute &>
+ (toBeSearched)._wordShift)
+ {
+ }
+
+ bool
+ cmp(DocId docId, int32_t & weight) const
+ {
+ const Word &word = _wordData[docId >> _wordShift];
+ uint32_t valueShift =
+ (docId & _valueShiftMask) << _valueShiftShift;
+ T v = (word >> valueShift) & _valueMask;
+ weight = 1;
+ return match(v);
+ }
+
+ bool
+ cmp(DocId docId) const
+ {
+ const Word &word = _wordData[docId >> _wordShift];
+ uint32_t valueShift =
+ (docId & _valueShiftMask) << _valueShiftShift;
+ T v = (word >> valueShift) & _valueMask;
+ return match(v);
+ }
+
+ virtual Int64Range getAsIntegerTerm() const {
+ return this->getRange();
+ }
+
+ virtual std::unique_ptr<queryeval::SearchIterator>
+ createFilterIterator(fef::TermFieldMatchData * matchData, bool strict)
+ {
+ if (!valid()) {
+ return queryeval::SearchIterator::UP(
+ new queryeval::EmptySearch());
+ }
+ if (getIsFilter()) {
+ return queryeval::SearchIterator::UP
+ (strict
+ ? new FilterAttributeIteratorStrict<SingleSearchContext>(*this, matchData)
+ : new FilterAttributeIteratorT<SingleSearchContext>(*this, matchData));
+ }
+ return queryeval::SearchIterator::UP
+ (strict
+ ? new AttributeIteratorStrict<SingleSearchContext>(*this, matchData)
+ : new AttributeIteratorT<SingleSearchContext>(*this, matchData));
+ }
+ };
+
+ SingleValueSmallNumericAttribute(const vespalib::string & baseFileName,
+ const Config &c,
+ Word valueMask,
+ uint32_t valueShiftShift,
+ uint32_t valueShiftMask,
+ uint32_t wordShift);
+
+ virtual
+ ~SingleValueSmallNumericAttribute(void);
+
+ virtual uint32_t
+ getValueCount(DocId doc) const
+ {
+ if (doc >= B::getNumDocs()) {
+ return 0;
+ }
+ return 1;
+ }
+ virtual void onCommit();
+ virtual void onUpdateStat();
+ virtual void removeOldGenerations(generation_t firstUsed);
+ virtual void onGenerationChange(generation_t generation);
+ virtual bool addDoc(DocId & doc) {
+ if ((B::getNumDocs() & _valueShiftMask) == 0) {
+ bool incGen = _wordData.isFull();
+ _wordData.push_back(Word());
+ std::atomic_thread_fence(std::memory_order_release);
+ B::incNumDocs();
+ doc = B::getNumDocs() - 1;
+ updateUncommittedDocIdLimit(doc);
+ if (incGen) {
+ this->incGeneration();
+ } else
+ this->removeAllOldGenerations();
+ } else {
+ B::incNumDocs();
+ doc = B::getNumDocs() - 1;
+ updateUncommittedDocIdLimit(doc);
+ }
+ return true;
+ }
+ virtual bool onLoad();
+
+ virtual void
+ onSave(IAttributeSaveTarget &saveTarget);
+
+ SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const SearchContext::Params & params) const override;
+
+ T getFast(DocId doc) const {
+ const Word &word = _wordData[doc >> _wordShift];
+ uint32_t valueShift = (doc & _valueShiftMask) << _valueShiftShift;
+ return (word >> valueShift) & _valueMask;
+ }
+
+ //-------------------------------------------------------------------------
+ // new read api
+ //-------------------------------------------------------------------------
+ virtual T get(DocId doc) const {
+ return getFast(doc);
+ }
+ virtual largeint_t getInt(DocId doc) const {
+ return static_cast<largeint_t>(getFast(doc));
+ }
+ virtual void
+ getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const {
+ (void) v;
+ (void) e;
+ (void) sz;
+ }
+ virtual double getFloat(DocId doc) const {
+ return static_cast<double>(getFast(doc));
+ }
+ virtual uint32_t getEnum(DocId doc) const {
+ (void) doc;
+ return std::numeric_limits<uint32_t>::max(); // does not have enum
+ }
+ virtual uint32_t getAll(DocId doc, T * v, uint32_t sz) const {
+ if (sz > 0) {
+ v[0] = getFast(doc);
+ }
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, largeint_t * v, uint32_t sz) const {
+ if (sz > 0) {
+ v[0] = static_cast<largeint_t>(getFast(doc));
+ }
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, double * v, uint32_t sz) const {
+ if (sz > 0) {
+ v[0] = static_cast<double>(getFast(doc));
+ }
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const {
+ if (sz > 0) {
+ e[0] = getEnum(doc);
+ }
+ return 1;
+ }
+ virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz) const {
+ (void) doc; (void) v; (void) sz;
+ return 0;
+ }
+ virtual uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const {
+ if (sz > 0) {
+ v[0] = WeightedInt(static_cast<largeint_t>(getFast(doc)));
+ }
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const {
+ if (sz > 0) {
+ v[0] = WeightedFloat(static_cast<double>(getFast(doc)));
+ }
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, WeightedEnum * e, uint32_t sz) const {
+ (void) doc; (void) e; (void) sz;
+ return 0;
+ }
+
+ virtual void
+ clearDocs(DocId lidLow, DocId lidLimit);
+
+ virtual void
+ onShrinkLidSpace();
+
+ virtual uint64_t getEstimatedSaveByteSize() const override;
+};
+
+
+class SingleValueBitNumericAttribute : public SingleValueSmallNumericAttribute
+{
+public:
+ SingleValueBitNumericAttribute(const vespalib::string & baseFileName);
+};
+
+
+class SingleValueSemiNibbleNumericAttribute :
+ public SingleValueSmallNumericAttribute
+{
+public:
+ SingleValueSemiNibbleNumericAttribute(const vespalib::string &
+ baseFileName);
+};
+
+class SingleValueNibbleNumericAttribute :
+ public SingleValueSmallNumericAttribute
+{
+public:
+ SingleValueNibbleNumericAttribute(const vespalib::string &
+ baseFileName);
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.cpp
new file mode 100644
index 00000000000..0c6c6d8ee06
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.cpp
@@ -0,0 +1,14 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "singlestringattribute.h"
+#include "singlestringattribute.hpp"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.singlestringattribute");
+namespace search {
+
+template class SingleValueStringAttributeT<EnumAttribute<StringAttribute>>;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.h b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.h
new file mode 100644
index 00000000000..805850839a6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.h
@@ -0,0 +1,115 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/searchlib/attribute/enumattribute.h>
+#include <vespa/searchlib/attribute/singleenumattribute.h>
+#include "enumhintsearchcontext.h"
+
+namespace search {
+
+/*
+ * Implementation of single value string attribute that uses an underlying enum store
+ * to store unique string values.
+ *
+ * B: EnumAttribute<StringAttribute>
+ */
+template <typename B>
+class SingleValueStringAttributeT : public SingleValueEnumAttribute<B>
+{
+protected:
+ typedef StringAttribute::DocId DocId;
+ typedef StringAttribute::EnumHandle EnumHandle;
+ typedef StringAttribute::generation_t generation_t;
+ typedef StringAttribute::WeightedString WeightedString;
+ typedef StringAttribute::WeightedConstChar WeightedConstChar;
+ typedef StringAttribute::WeightedEnum WeightedEnum;
+ typedef StringAttribute::SearchContext SearchContext;
+ typedef StringAttribute::ChangeVector ChangeVector;
+ typedef StringAttribute::Change Change;
+ typedef StringAttribute::ValueModifier ValueModifier;
+ typedef StringAttribute::EnumModifier EnumModifier;
+ typedef StringAttribute::LoadedVector LoadedVector;
+
+ typedef typename SingleValueEnumAttribute<B>::EnumStore EnumStore;
+ typedef typename SingleValueEnumAttributeBase::EnumIndex EnumIndex;
+ typedef typename SingleValueEnumAttributeBase::EnumIndexVector EnumIndexVector;
+ typedef attribute::EnumHintSearchContext EnumHintSearchContext;
+
+public:
+ SingleValueStringAttributeT(const vespalib::string & name, const AttributeVector::Config & c =
+ AttributeVector::Config(AttributeVector::BasicType::STRING));
+ ~SingleValueStringAttributeT();
+
+ virtual void
+ freezeEnumDictionary(void);
+
+ //-------------------------------------------------------------------------
+ // Attribute read API
+ //-------------------------------------------------------------------------
+ virtual bool isUndefined(DocId doc) const { return get(doc)[0] == '\0'; }
+ virtual const char * get(DocId doc) const {
+ return this->_enumStore.getValue(this->_enumIndices[doc]);
+ }
+ virtual uint32_t get(DocId doc, vespalib::string * v, uint32_t sz) const {
+ if (sz > 0) {
+ v[0] = get(doc);
+ }
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, const char ** v, uint32_t sz) const {
+ if (sz > 0) {
+ v[0] = get(doc);
+ }
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, WeightedString * v, uint32_t sz) const {
+ if (sz > 0) {
+ v[0] = WeightedString(get(doc), 1);
+ }
+ return 1;
+ }
+ virtual uint32_t get(DocId doc, WeightedConstChar * v, uint32_t sz) const {
+ if (sz > 0) {
+ v[0] = WeightedConstChar(get(doc), 1);
+ }
+ return 1;
+ }
+
+ AttributeVector::SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override;
+
+ class StringSingleImplSearchContext : public StringAttribute::StringSearchContext {
+ public:
+ StringSingleImplSearchContext(QueryTermSimple::UP qTerm, const StringAttribute & toBeSearched) :
+ StringSearchContext(std::move(qTerm), toBeSearched)
+ { }
+ protected:
+ bool onCmp(DocId doc, int32_t & weight) const override {
+ weight = 1;
+ return onCmp(doc);
+ }
+
+ bool onCmp(DocId doc) const override {
+ const SingleValueStringAttributeT<B> & attr(static_cast<const SingleValueStringAttributeT<B> &>(attribute()));
+ return isMatch(attr._enumStore.getValue(attr._enumIndices[doc]));
+ }
+
+ };
+
+ class StringTemplSearchContext : public StringSingleImplSearchContext,
+ public EnumHintSearchContext
+ {
+ using StringSingleImplSearchContext::queryTerm;
+ typedef SingleValueStringAttributeT<B> AttrType;
+ typedef typename EnumStore::FoldedComparatorType FoldedComparatorType;
+ public:
+ StringTemplSearchContext(QueryTermSimple::UP qTerm, const AttrType & toBeSearched);
+ };
+};
+
+typedef SingleValueStringAttributeT<EnumAttribute<StringAttribute> > SingleValueStringAttribute;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
new file mode 100644
index 00000000000..42859d0d862
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/log/log.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/stringattribute.h>
+#include <vespa/searchlib/attribute/singleenumattribute.hpp>
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/util/bufferwriter.h>
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/text/lowercase.h>
+#include <set>
+#include <string>
+
+namespace search {
+
+//-----------------------------------------------------------------------------
+// SingleValueStringAttributeT public
+//-----------------------------------------------------------------------------
+template <typename B>
+SingleValueStringAttributeT<B>::
+SingleValueStringAttributeT(const vespalib::string &name,
+ const AttributeVector::Config & c)
+ : SingleValueEnumAttribute<B>(name, c)
+{
+}
+
+template <typename B>
+SingleValueStringAttributeT<B>::~SingleValueStringAttributeT()
+{
+}
+
+template <typename B>
+void
+SingleValueStringAttributeT<B>::freezeEnumDictionary(void)
+{
+ this->getEnumStore().freezeTree();
+}
+
+
+template <typename B>
+AttributeVector::SearchContext::UP
+SingleValueStringAttributeT<B>::getSearch(QueryTermSimple::UP qTerm,
+ const AttributeVector::SearchContext::Params & params) const
+{
+ (void) params;
+ return std::unique_ptr<search::AttributeVector::SearchContext>
+ (new StringTemplSearchContext(std::move(qTerm), *this));
+}
+
+template <typename B>
+SingleValueStringAttributeT<B>::StringTemplSearchContext::StringTemplSearchContext(QueryTermSimple::UP qTerm, const AttrType & toBeSearched) :
+ StringSingleImplSearchContext(std::move(qTerm), toBeSearched),
+ EnumHintSearchContext(toBeSearched.getEnumStore().getEnumStoreDict(),
+ toBeSearched.getCommittedDocIdLimit(),
+ toBeSearched.getStatus().getNumValues())
+{
+ const EnumStore &enumStore(toBeSearched.getEnumStore());
+
+ this->_plsc = static_cast<attribute::IPostingListSearchContext *>(this);
+ if (this->valid()) {
+ if (this->isPrefix()) {
+ FoldedComparatorType comp(enumStore, queryTerm().getTerm(), true);
+ lookupRange(comp, comp);
+ } else if (this->isRegex()) {
+ vespalib::string prefix(vespalib::Regexp::get_prefix(this->queryTerm().getTerm()));
+ FoldedComparatorType comp(enumStore, prefix.c_str(), true);
+ lookupRange(comp, comp);
+ } else {
+ FoldedComparatorType comp(enumStore, queryTerm().getTerm());
+ lookupTerm(comp);
+ }
+ }
+}
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.cpp
new file mode 100644
index 00000000000..e0ac10c10af
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.cpp
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "singlestringpostattribute.h"
+#include "singlestringpostattribute.hpp"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.singlestringpostattribute");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h
new file mode 100644
index 00000000000..449c75cadc4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h
@@ -0,0 +1,127 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/postinglistattribute.h>
+
+namespace search {
+
+/*
+ * Implementation of single value string attribute that in addition to enum store
+ * uses an underlying posting list to provide faster search.
+ *
+ * B: EnumAttribute<StringAttribute>
+ */
+template <typename B>
+class SingleValueStringPostingAttributeT
+ : public SingleValueStringAttributeT<B>,
+ protected PostingListAttributeSubBase<AttributePosting,
+ typename B::LoadedVector,
+ typename B::LoadedValueType,
+ typename B::EnumStore>
+{
+private:
+ friend class PostingListAttributeTest;
+ template <typename, typename, typename>
+ friend class attribute::PostingSearchContext; // getEnumStore()
+ friend class StringAttributeTest;
+ typedef SingleValueStringPostingAttributeT<B> SelfType;
+ typedef typename B::LoadedVector LoadedVector;
+ typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector;
+ typedef PostingListAttributeSubBase<AttributePosting,
+ LoadedVector,
+ typename B::LoadedValueType,
+ typename B::EnumStore> PostingParent;
+ typedef typename SingleValueStringAttributeT<B>::DocId DocId;
+public:
+ typedef typename SingleValueStringAttributeT<B>::EnumStore EnumStore;
+private:
+ typedef typename SingleValueStringAttributeT<B>::EnumIndex EnumIndex;
+ typedef typename SingleValueStringAttributeT<B>::generation_t generation_t;
+ typedef typename SingleValueStringAttributeT<B>::ValueModifier ValueModifier;
+
+ typedef typename SingleValueStringAttributeT<B>::StringSingleImplSearchContext StringSingleImplSearchContext;
+ typedef attribute::StringPostingSearchContext<StringSingleImplSearchContext,
+ SelfType,
+ btree::BTreeNoLeafData>
+ StringSinglePostingSearchContext;
+
+ typedef StringAttribute::Change Change;
+ typedef StringAttribute::ChangeVector ChangeVector;
+
+ typedef typename PostingParent::PostingList PostingList;
+ typedef typename PostingParent::PostingMap PostingMap;
+ // typedef typename PostingParent::Posting Posting;
+
+ typedef EnumPostingTree Dictionary;
+ typedef typename EnumStore::ComparatorType ComparatorType;
+ typedef typename EnumStore::FoldedComparatorType FoldedComparatorType;
+ typedef typename Dictionary::Iterator DictionaryIterator;
+ typedef typename Dictionary::ConstIterator DictionaryConstIterator;
+ typedef typename Dictionary::FrozenView FrozenDictionary;
+ using PostingParent::_postingList;
+ using PostingParent::clearAllPostings;
+ using PostingParent::handleFillPostings;
+ using PostingParent::fillPostingsFixupEnumBase;
+ using PostingParent::forwardedOnAddDoc;
+public:
+ using PostingParent::getPostingList;
+
+private:
+ virtual void freezeEnumDictionary();
+ virtual void mergeMemoryStats(MemoryUsage & total);
+ void applyUpdateValueChange(const Change & c,
+ EnumStore & enumStore,
+ std::map<DocId, EnumIndex> &currEnumIndices);
+
+ void
+ makePostingChange(const EnumStoreComparator *cmp,
+ Dictionary &dict,
+ const std::map<DocId, EnumIndex> &currEnumIndices,
+ PostingMap &changePost);
+
+ virtual void applyValueChanges(EnumStoreBase::IndexVector & unused);
+public:
+ SingleValueStringPostingAttributeT(const vespalib::string & name, const AttributeVector::Config & c =
+ AttributeVector::Config(AttributeVector::BasicType::STRING));
+ ~SingleValueStringPostingAttributeT();
+
+ virtual void removeOldGenerations(generation_t firstUsed);
+ virtual void onGenerationChange(generation_t generation);
+
+ AttributeVector::SearchContext::UP
+ getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override;
+
+ virtual bool
+ onAddDoc(DocId doc)
+ {
+ return forwardedOnAddDoc(doc,
+ this->_enumIndices.size(),
+ this->_enumIndices.capacity());
+ }
+
+ virtual void
+ fillPostings(LoadedVector & loaded)
+ {
+ handleFillPostings(loaded);
+ }
+
+ virtual attribute::IPostingListAttributeBase *
+ getIPostingListAttributeBase(void)
+ {
+ return this;
+ }
+
+ virtual void
+ fillPostingsFixupEnum(const LoadedEnumAttributeVector &loaded)
+ {
+ fillPostingsFixupEnumBase(loaded);
+ }
+};
+
+typedef SingleValueStringPostingAttributeT<EnumAttribute<StringAttribute> > SingleValueStringPostingAttribute;
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
new file mode 100644
index 00000000000..a2fe36b2b16
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
@@ -0,0 +1,150 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/singlestringpostattribute.h>
+
+namespace search {
+
+template <typename B>
+SingleValueStringPostingAttributeT<B>::SingleValueStringPostingAttributeT(const vespalib::string & name,
+ const AttributeVector::Config & c) :
+ SingleValueStringAttributeT<B>(name, c),
+ PostingParent(*this, this->getEnumStore())
+{
+}
+
+template <typename B>
+SingleValueStringPostingAttributeT<B>::~SingleValueStringPostingAttributeT()
+{
+ this->disableFreeLists();
+ this->disableElemHoldList();
+ clearAllPostings();
+}
+
+template <typename B>
+void
+SingleValueStringPostingAttributeT<B>::freezeEnumDictionary()
+{
+ this->getEnumStore().freezeTree();
+}
+
+template <typename B>
+void
+SingleValueStringPostingAttributeT<B>::mergeMemoryStats(MemoryUsage & total)
+{
+ total.merge(this->_postingList.getMemoryUsage());
+}
+
+template <typename B>
+void
+SingleValueStringPostingAttributeT<B>::applyUpdateValueChange(const Change & c,
+ EnumStore & enumStore,
+ std::map<DocId, EnumIndex> &currEnumIndices)
+{
+ EnumIndex newIdx;
+ enumStore.findIndex(c._data.raw(), newIdx);
+
+ currEnumIndices[c._doc] = newIdx;
+
+}
+
+
+template <typename B>
+void
+SingleValueStringPostingAttributeT<B>::
+makePostingChange(const EnumStoreComparator *cmpa,
+ Dictionary &dict,
+ const std::map<DocId, EnumIndex> &currEnumIndices,
+ PostingMap &changePost)
+{
+ typedef typename std::map<DocId, EnumIndex>::const_iterator EnumIter;
+ for (EnumIter iter = currEnumIndices.begin(), end = currEnumIndices.end();
+ iter != end; ++iter) {
+
+ uint32_t docId = iter->first;
+ EnumIndex oldIdx = this->_enumIndices[docId];
+ EnumIndex newIdx = iter->second;
+
+ // add new posting
+ DictionaryIterator addItr = dict.find(newIdx, *cmpa);
+ changePost[EnumPostingPair(addItr.getKey(), cmpa)].add(docId, 1);
+
+ // remove old posting
+ if ( oldIdx.valid()) {
+ DictionaryIterator rmItr = dict.find(oldIdx, *cmpa);
+ changePost[EnumPostingPair(rmItr.getKey(), cmpa)].remove(docId);
+ }
+ }
+}
+
+
+template <typename B>
+void
+SingleValueStringPostingAttributeT<B>::applyValueChanges(EnumStoreBase::IndexVector & unused)
+{
+ EnumStore & enumStore = this->getEnumStore();
+ Dictionary & dict = enumStore.getPostingDictionary();
+ FoldedComparatorType cmpa(enumStore);
+ PostingMap changePost;
+
+ // used to make sure several arithmetic operations on the same document in a single commit works
+ std::map<DocId, EnumIndex> currEnumIndices;
+
+ typedef ChangeVector::const_iterator CVIterator;
+ for (CVIterator iter = this->_changes.begin(), end = this->_changes.end(); iter != end; ++iter) {
+ typename std::map<DocId, EnumIndex>::const_iterator enumIter = currEnumIndices.find(iter->_doc);
+ EnumIndex oldIdx;
+ if (enumIter != currEnumIndices.end()) {
+ oldIdx = enumIter->second;
+ } else {
+ oldIdx = this->_enumIndices[iter->_doc];
+ }
+ if (iter->_type == ChangeBase::UPDATE) {
+ applyUpdateValueChange(*iter, enumStore,
+ currEnumIndices);
+ } else if (iter->_type == ChangeBase::CLEARDOC) {
+ this->_defaultValue._doc = iter->_doc;
+ applyUpdateValueChange(this->_defaultValue, enumStore,
+ currEnumIndices);
+ }
+ }
+
+ makePostingChange(&cmpa, dict, currEnumIndices, changePost);
+
+ this->updatePostings(changePost);
+
+ SingleValueStringAttributeT<B>::applyValueChanges(unused);
+}
+
+template <typename B>
+void
+SingleValueStringPostingAttributeT<B>::removeOldGenerations(generation_t firstUsed)
+{
+ SingleValueStringAttributeT<B>::removeOldGenerations(firstUsed);
+ _postingList.trimHoldLists(firstUsed);
+}
+
+template <typename B>
+void
+SingleValueStringPostingAttributeT<B>::onGenerationChange(generation_t generation)
+{
+ _postingList.freeze();
+ SingleValueStringAttributeT<B>::onGenerationChange(generation);
+ _postingList.transferHoldLists(generation - 1);
+}
+
+template <typename B>
+AttributeVector::SearchContext::UP
+SingleValueStringPostingAttributeT<B>::getSearch(QueryTermSimple::UP qTerm,
+ const AttributeVector::SearchContext::Params & params) const
+{
+ return std::unique_ptr<search::AttributeVector::SearchContext>
+ (new StringSinglePostingSearchContext(std::move(qTerm),
+ params.useBitVector(),
+ *this));
+}
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp b/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp
new file mode 100644
index 00000000000..bac7dcfa7f7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp
@@ -0,0 +1,136 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "sourceselector.h"
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/vespalib/util/sync.h>
+#include <memory>
+#include <vespa/searchlib/common/fileheadercontext.h>
+
+using search::queryeval::Source;
+using vespalib::FileHeader;
+using vespalib::GenericHeader;
+using search::common::FileHeaderContext;
+
+namespace search {
+
+namespace {
+
+const vespalib::string defaultSourceTag = "Default source";
+const vespalib::string baseIdTag = "Base id";
+const vespalib::string docIdLimitTag = "Doc id limit";
+
+class AddMyHeaderTags : public FileHeaderContext
+{
+ const SourceSelector::HeaderInfo &_hi;
+ const FileHeaderContext &_parent;
+
+public:
+ AddMyHeaderTags(const SourceSelector::HeaderInfo &hi,
+ const FileHeaderContext &parent)
+ : _hi(hi),
+ _parent(parent)
+ {
+ }
+
+ virtual void
+ addTags(GenericHeader &header, const vespalib::string &name) const
+ {
+ typedef GenericHeader::Tag Tag;
+ _parent.addTags(header, name);
+ header.putTag(Tag(defaultSourceTag, _hi._defaultSource));
+ header.putTag(Tag(baseIdTag, _hi._baseId));
+ header.putTag(Tag(docIdLimitTag, _hi._docIdLimit));
+ }
+};
+
+} // namespace
+
+SourceSelector::HeaderInfo::HeaderInfo(const vespalib::string & baseFileName,
+ Source defaultSource,
+ uint32_t baseId,
+ uint32_t docIdLimit) :
+ _baseFileName(baseFileName),
+ _defaultSource(defaultSource),
+ _baseId(baseId),
+ _docIdLimit(docIdLimit)
+{
+}
+
+SourceSelector::SaveInfo::SaveInfo(const vespalib::string & baseFileName,
+ Source defaultSource,
+ uint32_t baseId,
+ uint32_t docIdLimit,
+ AttributeVector & sourceStore)
+ : _header(baseFileName, defaultSource, baseId, docIdLimit),
+ _memSaver()
+{
+ vespalib::string attrName = sourceStore.getBaseFileName();
+ sourceStore.saveAs(_header._baseFileName, _memSaver);
+ sourceStore.setBaseFileName(attrName);
+}
+
+bool
+SourceSelector::SaveInfo::save(const TuneFileAttributes &tuneFileAttributes,
+ const FileHeaderContext &fileHeaderContext)
+{
+ AddMyHeaderTags fh(_header, fileHeaderContext);
+ return _memSaver.writeToFile(tuneFileAttributes, fh);
+}
+
+SourceSelector::LoadInfo::LoadInfo(const vespalib::string &baseFileName)
+ : _header(baseFileName, 0, 0, 0)
+{
+}
+
+void
+SourceSelector::LoadInfo::load()
+{
+ const vespalib::string fileName = _header._baseFileName + ".dat";
+ Fast_BufferedFile file;
+ // XXX no checking for success
+ file.ReadOpen(fileName.c_str());
+
+ FileHeader fileHeader(4096);
+ fileHeader.readFile(file);
+ if (fileHeader.hasTag(defaultSourceTag)) {
+ _header._defaultSource = fileHeader.getTag(defaultSourceTag).asInteger();
+ }
+ if (fileHeader.hasTag(baseIdTag)) {
+ _header._baseId = fileHeader.getTag(baseIdTag).asInteger();
+ }
+ if (fileHeader.hasTag(docIdLimitTag)) {
+ _header._docIdLimit = fileHeader.getTag(docIdLimitTag).asInteger();
+ }
+ file.Close();
+}
+
+SourceSelector::SourceSelector(Source defaultSource, AttributeVector::SP realSource) :
+ ISourceSelector(defaultSource),
+ _realSource(realSource)
+{
+}
+
+SourceSelector::SaveInfo::UP
+SourceSelector::extractSaveInfo(const vespalib::string & baseFileName)
+{
+ return SaveInfo::UP(new SaveInfo(baseFileName, getDefaultSource(), getBaseId(),
+ getDocIdLimit(), *_realSource));
+}
+
+SourceSelector::LoadInfo::UP
+SourceSelector::extractLoadInfo(const vespalib::string & baseFileName)
+{
+ return LoadInfo::UP(new LoadInfo(baseFileName));
+}
+
+SourceSelector::Histogram SourceSelector::getDistribution() const
+{
+ Histogram h;
+ ISourceSelector::Iterator::UP it = createIterator();
+ for (size_t i(0), m(getDocIdLimit()); i < m; i++) {
+ h.inc(it->getSource(i));
+ }
+ return h;
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/sourceselector.h b/searchlib/src/vespa/searchlib/attribute/sourceselector.h
new file mode 100644
index 00000000000..424839c7495
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/sourceselector.h
@@ -0,0 +1,85 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attributememorysavetarget.h"
+#include "attributevector.h"
+#include <vespa/searchlib/queryeval/isourceselector.h>
+
+namespace search {
+
+class SourceSelector : public queryeval::ISourceSelector
+{
+private:
+protected:
+ AttributeVector::SP _realSource;
+
+ queryeval::Source getNewSource(queryeval::Source src, uint32_t diff) {
+ return src > diff ? src - diff : 0;
+ }
+
+public:
+ struct HeaderInfo {
+ vespalib::string _baseFileName;
+ queryeval::Source _defaultSource;
+ uint32_t _baseId;
+ uint32_t _docIdLimit;
+ HeaderInfo(const vespalib::string & baseFileName,
+ queryeval::Source defaultSource,
+ uint32_t baseId,
+ uint32_t docIdLimit);
+ };
+
+ class SaveInfo {
+ private:
+ HeaderInfo _header;
+ AttributeMemorySaveTarget _memSaver;
+ public:
+ typedef std::unique_ptr<SaveInfo> UP;
+ typedef std::shared_ptr<SaveInfo> SP;
+ SaveInfo(const vespalib::string & baseFileName,
+ queryeval::Source defaultSource,
+ uint32_t baseId,
+ uint32_t docIdLimit,
+ AttributeVector & sourceStore);
+ const HeaderInfo & getHeader() const { return _header; }
+ bool save(const TuneFileAttributes &tuneFileAttributes,
+ const search::common::FileHeaderContext &fileHeaderContext);
+ };
+
+ class LoadInfo {
+ private:
+ HeaderInfo _header;
+ public:
+ typedef std::unique_ptr<LoadInfo> UP;
+ LoadInfo(const vespalib::string & baseFileName);
+ void load();
+ const HeaderInfo & header() const { return _header; }
+ };
+
+ class Histogram {
+ public:
+ Histogram() { memset(_h, 0, sizeof(_h)); }
+ uint32_t operator [] (queryeval::Source s) const { return _h[s]; }
+ void inc(queryeval::Source s) { _h[s]++; }
+ private:
+ uint32_t _h[256];
+ };
+
+public:
+ typedef std::unique_ptr<SourceSelector> UP;
+ SourceSelector(queryeval::Source defaultSource, AttributeVector::SP realSource);
+ /**
+ * This will compute the distribution of the sources used over the whole lid space.
+ */
+ Histogram getDistribution() const;
+ SaveInfo::UP extractSaveInfo(const vespalib::string & baseFileName);
+ static LoadInfo::UP extractLoadInfo(const vespalib::string & baseFileName);
+
+ // Inherit doc from ISourceSelector
+ virtual void setSource(uint32_t docId, queryeval::Source source) = 0;
+ virtual ISourceSelector::Iterator::UP createIterator() const = 0;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/stringattribute.cpp b/searchlib/src/vespa/searchlib/attribute/stringattribute.cpp
new file mode 100644
index 00000000000..95f38484fae
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/stringattribute.cpp
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "stringattribute.h"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.attribute.stringattribute");
+
+namespace search {
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/stringattribute.h b/searchlib/src/vespa/searchlib/attribute/stringattribute.h
new file mode 100644
index 00000000000..8d38f5f1910
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/stringattribute.h
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/searchlib/attribute/enumstore.h>
+#include <vespa/searchlib/util/foldedstringcompare.h>
+
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
new file mode 100644
index 00000000000..911905aaf83
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
@@ -0,0 +1,542 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "stringbase.h"
+#include <vespa/log/log.h>
+#include <vespa/vespalib/util/array.h>
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/text/lowercase.h>
+#include <vespa/searchlib/common/sort.h>
+
+LOG_SETUP(".searchlib.attribute.stringbase");
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+
+namespace search
+{
+
+IMPLEMENT_IDENTIFIABLE_ABSTRACT(StringAttribute, AttributeVector);
+
+using attribute::LoadedEnumAttribute;
+using attribute::LoadedEnumAttributeVector;
+using vespalib::Regexp;
+
+AttributeVector::SearchContext::UP
+StringAttribute::getSearch(QueryTermSimple::UP term, const SearchContext::Params & params) const
+{
+ (void) params;
+ return SearchContext::UP(new StringSearchContext(std::move(term), *this));
+}
+
+class SortDataChar {
+public:
+ SortDataChar() { }
+ SortDataChar(const char *s) : _data(s), _pos(0) { }
+ operator const char * () const { return _data; }
+ bool operator != (const vespalib::string & b) const { return b != _data; }
+ const char * _data;
+ uint32_t _pos;
+};
+
+class SortDataCharRadix
+{
+public:
+ uint32_t operator () (SortDataChar & a) const {
+ uint32_t r(0);
+ const uint8_t *u((const uint8_t *)(a._data));
+ if (u[a._pos]) {
+ r |= u[a._pos + 0] << 24;
+ if (u[a._pos + 1]) {
+ r |= u[a._pos + 1] << 16;
+ if (u[a._pos + 2]) {
+ r |= u[a._pos + 2] << 8;
+ if (u[a._pos + 3]) {
+ r |= u[a._pos + 3];
+ a._pos += 4;
+ } else {
+ a._pos += 3;
+ }
+ } else {
+ a._pos += 2;
+ }
+ } else {
+ a._pos += 1;
+ }
+ }
+ return r;
+ }
+};
+
+class StdSortDataCharCompare : public std::binary_function<SortDataChar, SortDataChar, bool>
+{
+public:
+ bool operator() (const SortDataChar & x, const SortDataChar & y) const {
+ return cmp(x, y) < 0;
+ }
+ int cmp(const SortDataChar & a, const SortDataChar & b) const {
+ int retval = strcmp(a._data, b._data);
+ return retval;
+ }
+};
+
+
+class SortDataCharEof
+{
+public:
+ bool operator () (const SortDataChar & a) const { return a._data[a._pos] == 0; }
+ static bool alwaysEofOnCheck() { return false; }
+};
+
+class StringSorter {
+public:
+ typedef const char * constcharp;
+ void operator() (SortDataChar * start, size_t sz) const {
+ vespalib::Array<uint32_t> radixScratchPad(sz);
+ search::radix_sort(SortDataCharRadix(), StdSortDataCharCompare(), SortDataCharEof(), 1, start, sz, &radixScratchPad[0], 0, 32);
+ }
+};
+
+size_t StringAttribute::countZero(const char * bt, size_t sz)
+{
+ size_t size(0);
+ for(size_t i(0); i < sz; i++) {
+ if (bt[i] == '\0') {
+ size++;
+ }
+ }
+ return size;
+}
+
+void StringAttribute::generateOffsets(const char * bt, size_t sz, OffsetVector & offsets)
+{
+ offsets.clear();
+ uint32_t start(0);
+ for (size_t i(0); i < sz; i++) {
+ if (bt[i] == '\0') {
+ offsets.push_back(start);
+ start = i + 1;
+ }
+ }
+}
+
+StringAttribute::StringAttribute(const vespalib::string & name) :
+ AttributeVector(name, Config(BasicType::STRING)),
+ _changes(),
+ _defaultValue(ChangeBase::UPDATE, 0, vespalib::string(""))
+{
+}
+
+StringAttribute::StringAttribute(const vespalib::string & name, const Config & c) :
+ AttributeVector(name, c),
+ _changes(),
+ _defaultValue(ChangeBase::UPDATE, 0, vespalib::string(""))
+{
+}
+
+uint32_t StringAttribute::get(DocId doc, WeightedInt * v, uint32_t sz) const
+{
+ WeightedConstChar * s = new WeightedConstChar[sz];
+ uint32_t n = static_cast<const AttributeVector *>(this)->get(doc, s, sz);
+ for(uint32_t i(0),m(std::min(n,sz)); i<m; i++) {
+ v[i] = WeightedInt(strtoll(s[i].getValue(), NULL, 0), s[i].getWeight());
+ }
+ delete [] s;
+ return n;
+}
+
+uint32_t StringAttribute::get(DocId doc, WeightedFloat * v, uint32_t sz) const
+{
+ WeightedConstChar * s = new WeightedConstChar[sz];
+ uint32_t n = static_cast<const AttributeVector *>(this)->get(doc, s, sz);
+ for(uint32_t i(0),m(std::min(n,sz)); i<m; i++) {
+ v[i] = WeightedFloat(strtod(s[i].getValue(), NULL), s[i].getWeight());
+ }
+ delete [] s;
+ return n;
+}
+
+uint32_t StringAttribute::get(DocId doc, double * v, uint32_t sz) const
+{
+ const char ** s = new const char *[sz];
+ uint32_t n = static_cast<const AttributeVector *>(this)->get(doc, s, sz);
+ for(uint32_t i(0),m(std::min(n,sz)); i<m; i++) {
+ v[i] = strtod(s[i], NULL);
+ }
+ delete [] s;
+ return n;
+}
+
+uint32_t StringAttribute::get(DocId doc, largeint_t * v, uint32_t sz) const
+{
+ const char ** s = new const char *[sz];
+ uint32_t n = static_cast<const AttributeVector *>(this)->get(doc, s, sz);
+ for(uint32_t i(0),m(std::min(n,sz)); i<m; i++) {
+ v[i] = strtoll(s[i], NULL, 0);
+ }
+ delete [] s;
+ return n;
+}
+
+long StringAttribute::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const
+{
+ unsigned char *dst = static_cast<unsigned char *>(serTo);
+ const char *value(get(doc));
+ int size = strlen(value) + 1;
+ vespalib::ConstBufferRef buf(value, size);
+ if (bc != 0) {
+ buf = bc->convert(buf);
+ }
+ if (available >= (long)buf.size()) {
+ memcpy(dst, buf.data(), buf.size());
+ } else {
+ return -1;
+ }
+ return buf.size();
+}
+
+long StringAttribute::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const
+{
+ (void) bc;
+ unsigned char *dst = static_cast<unsigned char *>(serTo);
+ const char *value(get(doc));
+ int size = strlen(value) + 1;
+ vespalib::ConstBufferRef buf(value, size);
+ if (bc != 0) {
+ buf = bc->convert(buf);
+ }
+ if (available >= (long)buf.size()) {
+ const uint8_t * src(static_cast<const uint8_t *>(buf.data()));
+ for (size_t i(0), m(buf.size()); i < m; ++i) {
+ dst[i] = 0xff - src[i];
+ }
+ } else {
+ return -1;
+ }
+ return buf.size();
+}
+
+StringAttribute::StringSearchContext::StringSearchContext(QueryTermSimple::UP qTerm,
+ const StringAttribute & toBeSearched) :
+ SearchContext(toBeSearched),
+ _isPrefix(qTerm->isPrefix()),
+ _isRegex(qTerm->isRegex()),
+ _queryTerm(std::move(qTerm)),
+ _bufferLen(toBeSearched.getMaxValueCount()),
+ _buffer()
+{
+ queryTerm().term(_termUCS4);
+ if (isRegex()) {
+ _regex.reset(new Regexp(_queryTerm->getTerm(), Regexp::Flags().enableICASE()));
+ }
+}
+
+StringAttribute::StringSearchContext::~StringSearchContext()
+{
+ if (_buffer != NULL) {
+ delete [] _buffer;
+ }
+}
+
+
+uint32_t StringAttribute::clearDoc(DocId doc)
+{
+ uint32_t removed(0);
+ if (hasMultiValue() && (doc < getNumDocs())) {
+ removed = getValueCount(doc);
+ }
+ AttributeVector::clearDoc(_changes, doc);
+
+ return removed;
+}
+
+namespace {
+
+class DirectAccessor {
+public:
+ DirectAccessor() { }
+ const char * get(const char * v) const { return v; }
+};
+
+}
+
+bool
+StringAttribute::StringSearchContext::onCmp(DocId docId, int32_t & weight) const
+{
+ WeightedConstChar * buffer = getBuffer();
+ uint32_t valueCount = attribute().get(docId, buffer, _bufferLen);
+
+ CollectWeight collector;
+ DirectAccessor accessor;
+ collectMatches(buffer, std::min(valueCount, _bufferLen), accessor, collector);
+ weight = collector.getWeight();
+ return collector.hasMatch();
+}
+
+bool
+StringAttribute::StringSearchContext::onCmp(DocId docId) const
+{
+ WeightedConstChar * buffer = getBuffer();
+ uint32_t valueCount = attribute().get(docId, buffer, _bufferLen);
+ for (uint32_t i = 0, m = std::min(valueCount, _bufferLen); (i < m); i++) {
+ if (isMatch(buffer[i].getValue())) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool StringAttribute::applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust)
+{
+ vespalib::string v = fv.getAsString();
+ return AttributeVector::adjustWeight(_changes, doc, StringChangeData(v), wAdjust);
+}
+
+bool StringAttribute::apply(DocId, const ArithmeticValueUpdate & )
+{
+ return false;
+}
+
+template <typename T>
+void StringAttribute::loadAllAtOnce(T & loaded, FileUtil::LoadedBuffer::UP dataBuffer, uint32_t numDocs, ReaderBase & attrReader, bool hasWeight, bool hasIdx)
+{
+ if (dataBuffer->c_str()) {
+ const char *value = dataBuffer->c_str();
+ for(uint32_t docIdx(0), valueIdx(0); docIdx < numDocs; docIdx++) {
+ uint32_t currValueCount(hasIdx ? attrReader.getNextValueCount() : 1);
+ for(uint32_t subIdx(0); subIdx < currValueCount; subIdx++) {
+ loaded[valueIdx]._docId = docIdx;
+ loaded[valueIdx]._idx = subIdx;
+ loaded[valueIdx].setValue(value);
+ loaded[valueIdx].setWeight(hasWeight ? attrReader.getNextWeight() : 1);
+ valueIdx++;
+ while(*value++) { }
+ }
+ }
+ }
+
+ attribute::sortLoadedByValue(loaded);
+ fillPostings(loaded);
+ loaded.rewind();
+ fillEnum(loaded);
+
+ dataBuffer.reset();
+
+ attribute::sortLoadedByDocId(loaded);
+ loaded.rewind();
+ fillValues(loaded);
+}
+
+bool
+StringAttribute::onLoadEnumerated(ReaderBase &attrReader)
+{
+ FileUtil::LoadedBuffer::UP udatBuffer(loadUDAT());
+
+ bool hasIdx(attrReader.hasIdx());
+ size_t numDocs(0);
+ uint64_t numValues(0);
+ if (hasIdx) {
+ numDocs = attrReader.getNumIdx() - 1;
+ numValues = attrReader.getNumValues();
+ uint64_t enumCount = attrReader.getEnumCount();
+ assert(numValues == enumCount);
+ (void) enumCount;
+ } else {
+ numValues = attrReader.getEnumCount();
+ numDocs = numValues;
+ }
+
+ LOG(debug,
+ "StringAttribute::onLoadEnumerated: attribute '%s' %u docs, %u values",
+ getBaseFileName().c_str(),
+ (unsigned int) numDocs,
+ (unsigned int) numValues);
+ EnumIndexVector eidxs;
+ FastOS_Time timer;
+ FastOS_Time timer0;
+ timer0.SetNow();
+ LOG(debug, "start fillEnum0");
+ timer.SetNow();
+ fillEnum0(udatBuffer->buffer(), udatBuffer->size(), eidxs);
+ LOG(debug, "done fillEnum0, %u unique values, %8.3f s elapsed",
+ (unsigned int) eidxs.size(), timer.MilliSecsToNow() / 1000);
+ setNumDocs(numDocs);
+ setCommittedDocIdLimit(numDocs);
+ LoadedEnumAttributeVector loaded;
+ EnumVector enumHist;
+ if (hasPostings()) {
+ loaded.reserve(numValues);
+ } else {
+ EnumVector(eidxs.size(), 0).swap(enumHist);
+ }
+ timer.SetNow();
+ LOG(debug, "start fillEnumIdx");
+ if(hasPostings()) {
+ fillEnumIdx(attrReader,
+ numValues,
+ eidxs,
+ loaded);
+ } else {
+ fillEnumIdx(attrReader,
+ numValues,
+ eidxs,
+ enumHist);
+ }
+ LOG(debug, "done fillEnumIdx, %8.3f s elapsed",
+ timer.MilliSecsToNow() / 1000);
+
+ EnumIndexVector().swap(eidxs);
+
+ if (hasPostings()) {
+ LOG(debug, "start sort loaded");
+ timer.SetNow();
+
+ attribute::sortLoadedByEnum(loaded);
+
+ LOG(debug, "done sort loaded, %8.3f s elapsed",
+ timer.MilliSecsToNow() / 1000);
+
+ LOG(debug, "start fillPostingsFixupEnum");
+ timer.SetNow();
+
+ if (numDocs > 0) {
+ onAddDoc(numDocs - 1);
+ }
+ fillPostingsFixupEnum(loaded);
+
+ LOG(debug, "done fillPostingsFixupEnum, %8.3f s elapsed",
+ timer.MilliSecsToNow() / 1000);
+ } else {
+ LOG(debug, "start fixupEnumRefCounts");
+ timer.SetNow();
+
+ fixupEnumRefCounts(enumHist);
+
+ LOG(debug, "done fixupEnumRefCounts, %8.3f s elapsed",
+ timer.MilliSecsToNow() / 1000);
+ }
+
+ LOG(debug, "attribute '%s', loaded, %8.3f s elapsed",
+ getBaseFileName().c_str(),
+ timer0.MilliSecsToNow() / 1000);
+ return true;
+}
+
+bool StringAttribute::onLoad()
+{
+ ReaderBase attrReader(*this);
+ bool ok(attrReader.getHasLoadData());
+
+ if (!ok)
+ return false;
+
+ setCreateSerialNum(attrReader.getCreateSerialNum());
+
+ if (attrReader.getEnumerated())
+ return onLoadEnumerated(attrReader);
+
+ FileUtil::LoadedBuffer::UP dataBuffer(loadDAT());
+
+ bool hasIdx(attrReader.hasIdx());
+ size_t numDocs(0);
+ uint32_t numValues(0);
+ if (hasIdx) {
+ numDocs = attrReader.getNumIdx() - 1;
+ numValues = attrReader.getNumValues();
+ } else if (dataBuffer->c_str()) {
+ numValues = countZero(dataBuffer->c_str(), dataBuffer->size());
+ numDocs = numValues;
+ }
+
+ setNumDocs(numDocs);
+ setCommittedDocIdLimit(numDocs);
+ if (numDocs > 0) {
+ onAddDoc(numDocs - 1);
+ }
+
+ LoadedVectorR loaded(numValues);
+ loadAllAtOnce(loaded, std::move(dataBuffer), numDocs, attrReader,
+ hasWeightedSetType(), hasIdx);
+
+ return true;
+}
+
+
+bool
+StringAttribute::onAddDoc(DocId doc)
+{
+ (void) doc;
+ return false;
+}
+
+
+void StringAttribute::fillPostings(LoadedVector & loaded)
+{
+ (void) loaded;
+}
+
+void StringAttribute::fillEnum(LoadedVector & loaded)
+{
+ (void) loaded;
+}
+
+void StringAttribute::fillValues(LoadedVector & loaded)
+{
+ (void) loaded;
+}
+
+void
+StringAttribute::fillEnum0(const void *src,
+ size_t srcLen,
+ EnumIndexVector &eidxs)
+{
+ (void) src;
+ (void) srcLen;
+ (void) eidxs;
+ fprintf(stderr, "StringAttribute::fillEnum0\n");
+}
+
+
+void
+StringAttribute::fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndexVector &eidxs,
+ LoadedEnumAttributeVector &loaded)
+{
+ (void) attrReader;
+ (void) numValues;
+ (void) eidxs;
+ (void) loaded;
+ fprintf(stderr, "StringAttribute::fillEnumIdx (loaded)\n");
+}
+
+
+void
+StringAttribute::fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndexVector &eidxs,
+ EnumVector &enumHist)
+{
+ (void) attrReader;
+ (void) numValues;
+ (void) eidxs;
+ (void) enumHist;
+ fprintf(stderr, "StringAttribute::fillEnumIdx (enumHist)\n");
+}
+
+
+void
+StringAttribute::fillPostingsFixupEnum(const LoadedEnumAttributeVector &loaded)
+{
+ (void) loaded;
+ fprintf(stderr, "StringAttribute::fillPostingsFixupEnum\n");
+}
+
+void
+StringAttribute::fixupEnumRefCounts(const EnumVector &enumHist)
+{
+ (void) enumHist;
+ fprintf(stderr, "StringAttribute::fixupEnumRefCounts\n");
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h
new file mode 100644
index 00000000000..a70cc6ecfab
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h
@@ -0,0 +1,201 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/util/foldedstringcompare.h>
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/util/regexp.h>
+#include <vespa/vespalib/text/lowercase.h>
+#include <vespa/searchlib/attribute/enumstorebase.h>
+#include <vespa/searchlib/attribute/loadedenumvalue.h>
+#include <vespa/searchlib/attribute/loadedstringvalue.h>
+#include <vespa/searchlib/attribute/changevector.h>
+
+namespace search {
+
+class StringEntryType;
+
+class StringAttribute : public AttributeVector
+{
+public:
+ typedef vespalib::Array<uint32_t, vespalib::DefaultAlloc> OffsetVector;
+ typedef const char * LoadedValueType;
+ typedef EnumStoreBase::Index EnumIndex;
+ typedef EnumStoreBase::IndexVector EnumIndexVector;
+ typedef EnumStoreBase::EnumVector EnumVector;
+ typedef attribute::LoadedStringVector LoadedVector;
+public:
+ DECLARE_IDENTIFIABLE_ABSTRACT(StringAttribute);
+ bool append(DocId doc, const vespalib::string & v, int32_t weight) {
+ return AttributeVector::append(_changes, doc, StringChangeData(v), weight);
+ }
+ template<typename Accessor>
+ bool append(DocId doc, Accessor & ac) {
+ return AttributeVector::append(_changes, doc, ac);
+ }
+ bool remove(DocId doc, const vespalib::string & v, int32_t weight) {
+ return AttributeVector::remove(_changes, doc, StringChangeData(v), weight);
+ }
+ bool update(DocId doc, const vespalib::string & v) {
+ return AttributeVector::update(_changes, doc, StringChangeData(v));
+ }
+ bool apply(DocId doc, const ArithmeticValueUpdate & op);
+ virtual bool applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust);
+ virtual bool findEnum(const char * value, EnumHandle & e) const = 0;
+ virtual uint32_t get(DocId doc, largeint_t * v, uint32_t sz) const;
+ virtual uint32_t get(DocId doc, double * v, uint32_t sz) const;
+ virtual uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const;
+ virtual uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const;
+ virtual const char *get(DocId doc) const = 0;
+ virtual uint32_t clearDoc(DocId doc);
+ virtual largeint_t getDefaultValue() const { return 0; }
+ static size_t countZero(const char * bt, size_t sz);
+ static void generateOffsets(const char * bt, size_t sz, OffsetVector & offsets);
+ virtual const char * getFromEnum(EnumHandle e) const = 0;
+
+protected:
+ StringAttribute(const vespalib::string & name);
+ StringAttribute(const vespalib::string & name, const Config & c);
+ static const char * defaultValue() { return ""; }
+ typedef ChangeTemplate<StringChangeData> Change;
+ typedef ChangeVectorT< Change > ChangeVector;
+ typedef StringEntryType EnumEntryType;
+ ChangeVector _changes;
+ Change _defaultValue;
+ virtual bool onLoad();
+
+ bool onLoadEnumerated(ReaderBase &attrReader);
+
+ virtual bool
+ onAddDoc(DocId doc);
+private:
+ typedef attribute::LoadedStringVectorReal LoadedVectorR;
+ virtual void fillPostings(LoadedVector & loaded);
+ virtual void fillEnum(LoadedVector & loaded);
+ virtual void fillValues(LoadedVector & loaded);
+
+ virtual void
+ fillEnum0(const void *src,
+ size_t srcLen,
+ EnumIndexVector &eidxs);
+
+ virtual void
+ fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndexVector &eidxs,
+ attribute::LoadedEnumAttributeVector &loaded);
+
+ virtual void
+ fillEnumIdx(ReaderBase &attrReader,
+ uint64_t numValues,
+ const EnumIndexVector &eidxs,
+ EnumVector &enumHist);
+
+ virtual void
+ fillPostingsFixupEnum(const attribute::LoadedEnumAttributeVector &loaded);
+
+ virtual void
+ fixupEnumRefCounts(const EnumVector &enumHist);
+
+ virtual largeint_t getInt(DocId doc) const { return strtoll(get(doc), NULL, 0); }
+ virtual double getFloat(DocId doc) const { return strtod(get(doc), NULL); }
+ virtual const char * getString(DocId doc, char * v, size_t sz) const { (void) v; (void) sz; return get(doc); }
+
+ virtual long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const;
+ virtual long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const;
+
+ template <typename T>
+ void loadAllAtOnce(T & loaded, FileUtil::LoadedBuffer::UP dataBuffer, uint32_t numDocs, ReaderBase & attrReader, bool hasWeight, bool hasIdx);
+
+ class StringSearchContext : public SearchContext {
+ public:
+ StringSearchContext(QueryTermSimple::UP qTerm, const StringAttribute & toBeSearched);
+ virtual ~StringSearchContext();
+ private:
+ bool _isPrefix;
+ bool _isRegex;
+ protected:
+ bool valid() const override {
+ return (_queryTerm.get() && (!_queryTerm->empty()));
+ }
+
+ const QueryTermBase & queryTerm() const override {
+ return static_cast<const QueryTermBase &>(*_queryTerm);
+ }
+ bool isMatch(const char *src) const {
+ if (__builtin_expect(isRegex(), false)) {
+ return getRegex()->match(src);
+ }
+ vespalib::Utf8ReaderForZTS u8reader(src);
+ uint32_t j = 0;
+ uint32_t val;
+ for (;; ++j) {
+ val = u8reader.getChar();
+ val = vespalib::LowerCase::convert(val);
+ if (_termUCS4[j] == 0 || _termUCS4[j] != val) {
+ break;
+ }
+ }
+ return (_termUCS4[j] == 0 && (val == 0 || isPrefix()));
+ }
+ class CollectHitCount {
+ public:
+ CollectHitCount() : _hitCount(0) { }
+ void addWeight(int32_t w) {
+ (void) w;
+ _hitCount++;
+ }
+ int32_t getWeight() const { return _hitCount; }
+ bool hasMatch() const { return _hitCount != 0; }
+ private:
+ uint32_t _hitCount;
+ };
+ class CollectWeight {
+ public:
+ CollectWeight() : _hitCount(0), _weight(0) { }
+ void addWeight(int32_t w) {
+ _weight += w;
+ _hitCount++;
+ }
+ int32_t getWeight() const { return _weight; }
+ bool hasMatch() const { return _hitCount != 0; }
+ private:
+ uint32_t _hitCount;
+ int32_t _weight;
+ };
+
+ template<typename WeightedT, typename Accessor, typename Collector>
+ void collectMatches(const WeightedT * w, size_t sz, const Accessor & ac, Collector & collector) const {
+ for (uint32_t i(0); i < sz; i++) {
+ if (isMatch(ac.get(w[i].value()))) {
+ collector.addWeight(w[i].weight());
+ }
+ }
+ }
+
+
+ bool onCmp(DocId docId, int32_t & weight) const override;
+ bool onCmp(DocId docId) const override;
+
+ bool isPrefix() const { return _isPrefix; }
+ bool isRegex() const { return _isRegex; }
+ QueryTermSimple::UP _queryTerm;
+ const ucs4_t * _termUCS4;
+ const vespalib::Regexp * getRegex() const { return _regex.get(); }
+ private:
+ WeightedConstChar * getBuffer() const {
+ if (_buffer == NULL) {
+ _buffer = new WeightedConstChar[_bufferLen];
+ }
+ return _buffer;
+ }
+ unsigned _bufferLen;
+ mutable WeightedConstChar * _buffer;
+ std::unique_ptr<vespalib::Regexp> _regex;
+ };
+ SearchContext::UP getSearch(QueryTermSimple::UP term, const SearchContext::Params & params) const override;
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/tensorattribute.cpp b/searchlib/src/vespa/searchlib/attribute/tensorattribute.cpp
new file mode 100644
index 00000000000..0e83749847f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/tensorattribute.cpp
@@ -0,0 +1,270 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "tensorattribute.h"
+#include <vespa/vespalib/tensor/tensor.h>
+#include "tensorattributesaver.h"
+
+using vespalib::tensor::Tensor;
+
+namespace search {
+
+namespace attribute {
+
+namespace {
+
+constexpr uint32_t TENSOR_ATTRIBUTE_VERSION = 0;
+
+// minimum dead bytes in tensor attribute before consider compaction
+constexpr size_t DEAD_SLACK = 0x10000u;
+
+
+class TensorReader : public AttributeVector::ReaderBase
+{
+private:
+ FileReader<uint32_t> _tensorSizeReader;
+public:
+ TensorReader(AttributeVector &attr)
+ : AttributeVector::ReaderBase(attr),
+ _tensorSizeReader(*_datFile)
+ {
+ }
+ uint32_t getNextTensorSize() { return _tensorSizeReader.readHostOrder(); }
+ void readTensor(void *buf, size_t len) { _datFile->ReadBuf(buf, len); }
+};
+
+}
+
+TensorAttribute::TensorAttribute(const vespalib::stringref &baseFileName,
+ const Config &cfg)
+ : NotImplementedAttribute(baseFileName, cfg),
+ _refVector(cfg.getGrowStrategy().getDocsInitialCapacity(),
+ cfg.getGrowStrategy().getDocsGrowPercent(),
+ cfg.getGrowStrategy().getDocsGrowDelta(),
+ getGenerationHolder()),
+ _tensorStore(),
+ _tensorMapper(cfg.tensorType()),
+ _compactGeneration(0)
+{
+}
+
+
+TensorAttribute::~TensorAttribute()
+{
+ getGenerationHolder().clearHoldLists();
+ _tensorStore.clearHoldLists();
+}
+
+
+
+uint32_t
+TensorAttribute::clearDoc(DocId docId)
+{
+ RefType oldRef(_refVector[docId]);
+ updateUncommittedDocIdLimit(docId);
+ _refVector[docId] = RefType();
+ if (oldRef.valid()) {
+ _tensorStore.holdTensor(oldRef);
+ return 1u;
+ }
+ return 0u;
+}
+
+
+void
+TensorAttribute::compactWorst()
+{
+ uint32_t bufferId = _tensorStore.startCompactWorstBuffer();
+ size_t lidLimit = _refVector.size();
+ for (uint32_t lid = 0; lid < lidLimit; ++lid) {
+ RefType ref = _refVector[lid];
+ if (ref.valid() && ref.bufferId() == bufferId) {
+ RefType newRef = _tensorStore.move(ref);
+ // TODO: validate if following fence is sufficient.
+ std::atomic_thread_fence(std::memory_order_release);
+ _refVector[lid] = newRef;
+ }
+ }
+ _tensorStore.finishCompactWorstBuffer(bufferId);
+ _compactGeneration = getCurrentGeneration();
+ incGeneration();
+ updateStat(true);
+}
+
+void
+TensorAttribute::onCommit()
+{
+ // Note: Cost can be reduced if unneeded generation increments are dropped
+ incGeneration();
+ if (getFirstUsedGeneration() > _compactGeneration) {
+ // No data held from previous compact operation
+ Status &status = getStatus();
+ size_t used = status.getUsed();
+ size_t dead = status.getDead();
+ if ((dead >= DEAD_SLACK) && (dead * 5 > used)) {
+ compactWorst();
+ }
+ }
+}
+
+
+void
+TensorAttribute::onUpdateStat()
+{
+ // update statistics
+ MemoryUsage total = _refVector.getMemoryUsage();
+ total.merge(_tensorStore.getMemoryUsage());
+ total.incAllocatedBytesOnHold(getGenerationHolder().getHeldBytes());
+ this->updateStatistics(_refVector.size(),
+ _refVector.size(),
+ total.allocatedBytes(),
+ total.usedBytes(),
+ total.deadBytes(),
+ total.allocatedBytesOnHold());
+}
+
+
+void
+TensorAttribute::removeOldGenerations(generation_t firstUsed)
+{
+ _tensorStore.trimHoldLists(firstUsed);
+ getGenerationHolder().trimHoldLists(firstUsed);
+}
+
+void
+TensorAttribute::onGenerationChange(generation_t generation)
+{
+ getGenerationHolder().transferHoldLists(generation - 1);
+ _tensorStore.transferHoldLists(generation - 1);
+}
+
+
+bool
+TensorAttribute::addDoc(DocId &docId)
+{
+ bool incGen = _refVector.isFull();
+ _refVector.push_back(RefType());
+ AttributeVector::incNumDocs();
+ docId = AttributeVector::getNumDocs() - 1;
+ updateUncommittedDocIdLimit(docId);
+ if (incGen) {
+ incGeneration();
+ } else {
+ removeAllOldGenerations();
+ }
+ return true;
+}
+
+
+void
+TensorAttribute::setTensor(DocId docId, const Tensor &tensor)
+{
+ assert(docId < _refVector.size());
+ updateUncommittedDocIdLimit(docId);
+ // TODO: Handle generic tensor attribute in a better way ?
+ RefType ref = _tensorStore.setTensor(
+ getConfig().tensorType().is_tensor() ?
+ *_tensorMapper.map(tensor) : tensor);
+ // TODO: validate if following fence is sufficient.
+ std::atomic_thread_fence(std::memory_order_release);
+ // TODO: Check if refVector must consist of std::atomic<RefType>
+ _refVector[docId] = ref;
+}
+
+
+std::unique_ptr<Tensor>
+TensorAttribute::getTensor(DocId docId) const
+{
+ RefType ref;
+ if (docId < getCommittedDocIdLimit()) {
+ ref = _refVector[docId];
+ }
+ if (!ref.valid()) {
+ return std::unique_ptr<Tensor>();
+ }
+ return _tensorStore.getTensor(ref);
+}
+
+
+void
+TensorAttribute::clearDocs(DocId lidLow, DocId lidLimit)
+{
+ assert(lidLow <= lidLimit);
+ assert(lidLimit <= this->getNumDocs());
+ for (DocId lid = lidLow; lid < lidLimit; ++lid) {
+ RefType &ref = _refVector[lid];
+ if (ref.valid()) {
+ _tensorStore.holdTensor(ref);
+ ref = RefType();
+ }
+ }
+}
+
+
+void
+TensorAttribute::onShrinkLidSpace()
+{
+ // Tensors for lids > committedDocIdLimit have been cleared.
+ uint32_t committedDocIdLimit = getCommittedDocIdLimit();
+ assert(_refVector.size() >= committedDocIdLimit);
+ _refVector.shrink(committedDocIdLimit);
+ setNumDocs(committedDocIdLimit);
+}
+
+
+bool
+TensorAttribute::onLoad()
+{
+ TensorReader tensorReader(*this);
+ if (!tensorReader.hasData()) {
+ return false;
+ }
+ setCreateSerialNum(tensorReader.getCreateSerialNum());
+ assert(tensorReader.getVersion() == TENSOR_ATTRIBUTE_VERSION);
+ uint32_t numDocs(tensorReader.getDocIdLimit());
+ _refVector.reset();
+ _refVector.unsafe_reserve(numDocs);
+ for (uint32_t lid = 0; lid < numDocs; ++lid) {
+ uint32_t tensorSize = tensorReader.getNextTensorSize();
+ auto raw = _tensorStore.allocRawBuffer(tensorSize);
+ if (tensorSize != 0) {
+ tensorReader.readTensor(raw.first, tensorSize);
+ }
+ _refVector.push_back(raw.second);
+ }
+ setNumDocs(numDocs);
+ setCommittedDocIdLimit(numDocs);
+ return true;
+}
+
+
+uint32_t
+TensorAttribute::getVersion() const
+{
+ return TENSOR_ATTRIBUTE_VERSION;
+}
+
+
+TensorAttribute::RefCopyVector
+TensorAttribute::getRefCopy() const
+{
+ uint32_t size = getCommittedDocIdLimit();
+ assert(size <= _refVector.size());
+ return RefCopyVector(&_refVector[0], &_refVector[0] + size);
+}
+
+std::unique_ptr<AttributeSaver>
+TensorAttribute::onInitSave()
+{
+ vespalib::GenerationHandler::Guard guard(getGenerationHandler().
+ takeGuard());
+ return std::make_unique<TensorAttributeSaver>
+ (std::move(guard),
+ this->createSaveTargetConfig(),
+ getRefCopy(),
+ _tensorStore);
+}
+
+} // namespace search::attribute
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/tensorattribute.h b/searchlib/src/vespa/searchlib/attribute/tensorattribute.h
new file mode 100644
index 00000000000..954d211d13f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/tensorattribute.h
@@ -0,0 +1,55 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "not_implemented_attribute.h"
+#include "tensorstore.h"
+#include <vespa/searchlib/common/rcuvector.h>
+#include <vespa/vespalib/tensor/tensor_mapper.h>
+
+namespace vespalib { namespace tensor { class Tensor; } }
+
+namespace search {
+
+namespace attribute {
+
+/**
+ * Attribute vector class used to store tensors for all documents in memory.
+ */
+class TensorAttribute : public NotImplementedAttribute
+{
+private:
+ using RefType = TensorStore::RefType;
+ using RefVector = RcuVectorBase<RefType>;
+
+ RefVector _refVector; // docId -> ref in data store for serialized tensor
+ TensorStore _tensorStore; // data store for serialized tensors
+ vespalib::tensor::TensorMapper _tensorMapper; // mapper to our tensor type
+ uint64_t _compactGeneration; // Generation when last compact occurred
+
+ void compactWorst();
+public:
+ using RefCopyVector = vespalib::Array<RefType, vespalib::DefaultAlloc>;
+ using Tensor = vespalib::tensor::Tensor;
+ TensorAttribute(const vespalib::stringref &baseFileName, const Config &cfg);
+ ~TensorAttribute();
+ virtual uint32_t clearDoc(DocId docId) override;
+ virtual void onCommit() override;
+ virtual void onUpdateStat() override;
+ virtual void removeOldGenerations(generation_t firstUsed) override;
+ virtual void onGenerationChange(generation_t generation) override;
+ virtual bool addDoc(DocId &docId) override;
+ void setTensor(DocId docId, const Tensor &tensor);
+ std::unique_ptr<Tensor> getTensor(DocId docId) const;
+ virtual void clearDocs(DocId lidLow, DocId lidLimit) override;
+ virtual void onShrinkLidSpace() override;
+ virtual bool onLoad() override;
+ virtual uint32_t getVersion() const override;
+ RefCopyVector getRefCopy() const;
+ virtual std::unique_ptr<AttributeSaver> onInitSave() override;
+};
+
+
+} // namespace search::attribute
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/tensorattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/tensorattributesaver.cpp
new file mode 100644
index 00000000000..6c27689a0c9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/tensorattributesaver.cpp
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "tensorattributesaver.h"
+#include <vespa/searchlib/util/bufferwriter.h>
+
+using vespalib::GenerationHandler;
+using search::IAttributeSaveTarget;
+
+namespace search {
+
+namespace attribute {
+
+TensorAttributeSaver::
+TensorAttributeSaver(GenerationHandler::Guard &&guard,
+ const IAttributeSaveTarget::Config &cfg,
+ RefCopyVector &&refs,
+ const TensorStore &tensorStore)
+ : AttributeSaver(std::move(guard), cfg),
+ _refs(std::move(refs)),
+ _tensorStore(tensorStore)
+{
+}
+
+
+TensorAttributeSaver::~TensorAttributeSaver()
+{
+}
+
+
+bool
+TensorAttributeSaver::onSave(IAttributeSaveTarget &saveTarget)
+{
+ std::unique_ptr<BufferWriter>
+ datWriter(saveTarget.datWriter().allocBufferWriter());
+ const uint32_t docIdLimit(_refs.size());
+ for (uint32_t lid = 0; lid < docIdLimit; ++lid) {
+ auto raw = _tensorStore.getRawBuffer(_refs[lid]);
+ datWriter->write(&raw.second, sizeof(raw.second));
+ if (raw.second != 0) {
+ datWriter->write(raw.first, raw.second);
+ }
+ }
+ datWriter->flush();
+ return true;
+}
+
+
+} // namespace search::attribute
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/tensorattributesaver.h b/searchlib/src/vespa/searchlib/attribute/tensorattributesaver.h
new file mode 100644
index 00000000000..e988e1b05ec
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/tensorattributesaver.h
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attributesaver.h"
+#include "iattributesavetarget.h"
+#include "tensorattribute.h"
+
+namespace search {
+
+namespace attribute {
+
+/*
+ * Class for saving a tensor attribute.
+ */
+class TensorAttributeSaver : public AttributeSaver
+{
+public:
+ using RefCopyVector = TensorAttribute::RefCopyVector;
+private:
+ RefCopyVector _refs;
+ const TensorStore &_tensorStore;
+ using GenerationHandler = vespalib::GenerationHandler;
+
+ virtual bool onSave(IAttributeSaveTarget &saveTarget) override;
+public:
+ TensorAttributeSaver(GenerationHandler::Guard &&guard,
+ const IAttributeSaveTarget::Config &cfg,
+ RefCopyVector &&refs,
+ const TensorStore &tensorStore);
+
+ virtual ~TensorAttributeSaver();
+};
+
+} // namespace search::attribute
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/tensorstore.cpp b/searchlib/src/vespa/searchlib/attribute/tensorstore.cpp
new file mode 100644
index 00000000000..83b870cfaeb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/tensorstore.cpp
@@ -0,0 +1,133 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "tensorstore.h"
+#include <vespa/vespalib/tensor/tensor.h>
+#include <vespa/vespalib/tensor/serialization/typed_binary_format.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/macro.h>
+#include <vespa/document/util/serializable.h>
+#include <vespa/searchlib/btree/datastore.hpp>
+
+using vespalib::tensor::Tensor;
+using vespalib::tensor::TypedBinaryFormat;
+using document::DeserializeException;
+
+namespace search {
+
+namespace attribute {
+
+constexpr size_t MIN_BUFFER_CLUSTERS = 1024;
+
+TensorStore::TensorStore()
+ : _store(),
+ _type(RefType::align(1),
+ MIN_BUFFER_CLUSTERS,
+ RefType::offsetSize() / RefType::align(1)),
+ _typeId(0)
+{
+ _store.addType(&_type);
+ _store.initActiveBuffers();
+}
+
+
+TensorStore::~TensorStore()
+{
+ _store.dropBuffers();
+}
+
+
+std::pair<const void *, uint32_t>
+TensorStore::getRawBuffer(RefType ref) const
+{
+ if (!ref.valid()) {
+ return std::make_pair(nullptr, 0u);
+ }
+ const char *buf = _store.getBufferEntry<char>(ref.bufferId(),
+ ref.offset());
+ uint32_t len = *reinterpret_cast<const uint32_t *>(buf);
+ return std::make_pair(buf + sizeof(uint32_t), len);
+}
+
+
+std::pair<void *, TensorStore::RefType>
+TensorStore::allocRawBuffer(uint32_t size)
+{
+ if (size == 0) {
+ return std::make_pair(nullptr, RefType());
+ }
+ size_t extSize = size + sizeof(uint32_t);
+ size_t bufSize = RefType::align(extSize);
+ _store.ensureBufferCapacity(_typeId, bufSize);
+ uint32_t activeBufferId = _store.getActiveBufferId(_typeId);
+ btree::BufferState &state = _store.getBufferState(activeBufferId);
+ size_t oldSize = state.size();
+ char *bufferEntryWritePtr =
+ _store.getBufferEntry<char>(activeBufferId, oldSize);
+ *reinterpret_cast<uint32_t *>(bufferEntryWritePtr) = size;
+ char *padWritePtr = bufferEntryWritePtr + extSize;
+ for (size_t i = extSize; i < bufSize; ++i) {
+ *padWritePtr++ = 0;
+ }
+ state.pushed_back(bufSize);
+ return std::make_pair(bufferEntryWritePtr + sizeof(uint32_t),
+ RefType(oldSize, activeBufferId));
+}
+
+void
+TensorStore::hold(RefType ref)
+{
+ if (!ref.valid()) {
+ return;
+ }
+ const char *buf = _store.getBufferEntry<char>(ref.bufferId(),
+ ref.offset());
+ uint32_t len = *reinterpret_cast<const uint32_t *>(buf);
+ _store.holdElem(ref, len + sizeof(uint32_t));
+}
+
+
+TensorStore::RefType
+TensorStore::move(RefType ref) {
+ if (!ref.valid()) {
+ return RefType();
+ }
+ auto oldraw = getRawBuffer(ref);
+ auto newraw = allocRawBuffer(oldraw.second);
+ memcpy(newraw.first, oldraw.first, oldraw.second);
+ _store.holdElem(ref, oldraw.second + sizeof(uint32_t));
+ return newraw.second;
+}
+
+std::unique_ptr<Tensor>
+TensorStore::getTensor(RefType ref) const
+{
+ auto raw = getRawBuffer(ref);
+ if (raw.second == 0u) {
+ return std::unique_ptr<Tensor>();
+ }
+ vespalib::nbostream wrapStream(raw.first, raw.second);
+ auto tensor = TypedBinaryFormat::deserialize(wrapStream);
+ if (wrapStream.size() != 0) {
+ throw DeserializeException("Leftover bytes deserializing "
+ "tensor attribute value.",
+ VESPA_STRLOC);
+ }
+ return std::move(tensor);
+}
+
+
+TensorStore::RefType
+TensorStore::setTensor(const Tensor &tensor)
+{
+ vespalib::nbostream stream;
+ TypedBinaryFormat::serialize(stream, tensor);
+ auto raw = allocRawBuffer(stream.size());
+ memcpy(raw.first, stream.peek(), stream.size());
+ return raw.second;
+}
+
+} // namespace search::attribute
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/tensorstore.h b/searchlib/src/vespa/searchlib/attribute/tensorstore.h
new file mode 100644
index 00000000000..669362ea57f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/tensorstore.h
@@ -0,0 +1,93 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/btree/entryref.h>
+#include <vespa/searchlib/btree/datastore.h>
+#include <vespa/vespalib/util/generationhandler.h>
+
+namespace vespalib { namespace tensor { class Tensor; } }
+
+namespace search {
+
+namespace attribute {
+
+/**
+ * Class for storing serialized tensors in memory, used by TensorAttribute.
+ *
+ * Serialization format is subject to change. Changes to serialization format
+ * might also require corresponding changes to implemented optimized tensor
+ * operations that use the serialized tensor as argument.
+ */
+class TensorStore
+{
+public:
+ using RefType = btree::AlignedEntryRefT<22, 2>;
+ using DataStoreType = btree::DataStoreT<RefType>;
+ typedef vespalib::GenerationHandler::generation_t generation_t;
+ using Tensor = vespalib::tensor::Tensor;
+
+private:
+ DataStoreType _store;
+ btree::BufferType<char> _type;
+ const uint32_t _typeId;
+
+public:
+ TensorStore();
+
+ ~TensorStore();
+
+ // Inherit doc from DataStoreBase
+ void
+ trimHoldLists(generation_t usedGen)
+ {
+ _store.trimHoldLists(usedGen);
+ }
+
+ // Inherit doc from DataStoreBase
+ void
+ transferHoldLists(generation_t generation)
+ {
+ _store.transferHoldLists(generation);
+ }
+
+ void
+ clearHoldLists(void)
+ {
+ _store.clearHoldLists();
+ }
+
+ MemoryUsage
+ getMemoryUsage() const
+ {
+ return _store.getMemoryUsage();
+ }
+
+
+ std::pair<const void *, uint32_t> getRawBuffer(RefType ref) const;
+
+ std::pair<void *, RefType> allocRawBuffer(uint32_t size);
+
+ void hold(RefType ref);
+
+ RefType move(RefType ref);
+
+ std::unique_ptr<Tensor> getTensor(RefType ref) const;
+
+ void holdTensor(RefType ref) { hold(ref); }
+
+ RefType setTensor(const Tensor &tensor);
+
+ uint32_t startCompactWorstBuffer() {
+ return _store.startCompactWorstBuffer(_typeId);
+ }
+
+ void finishCompactWorstBuffer(uint32_t bufferId) {
+ _store.holdBuffer(bufferId);
+ }
+};
+
+
+} // namespace search::attribute
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/bitcompression/.gitignore b/searchlib/src/vespa/searchlib/bitcompression/.gitignore
new file mode 100644
index 00000000000..0b3af54ee50
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/bitcompression/.gitignore
@@ -0,0 +1,3 @@
+*.So
+.depend*
+Makefile
diff --git a/searchlib/src/vespa/searchlib/bitcompression/CMakeLists.txt b/searchlib/src/vespa/searchlib/bitcompression/CMakeLists.txt
new file mode 100644
index 00000000000..51d299bacfa
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/bitcompression/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_searchlib_bitcompression OBJECT
+ SOURCES
+ compression.cpp
+ countcompression.cpp
+ pagedict4.cpp
+ posocccompression.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/bitcompression/OWNERS b/searchlib/src/vespa/searchlib/bitcompression/OWNERS
new file mode 100644
index 00000000000..64735d11d93
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/bitcompression/OWNERS
@@ -0,0 +1 @@
+tegge
diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp
new file mode 100644
index 00000000000..06c96dc96ee
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp
@@ -0,0 +1,450 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".compression");
+#include "compression.h"
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/vespalib/data/fileheader.h>
+
+namespace search
+{
+
+namespace bitcompression
+{
+
+using vespalib::nbostream;
+
+uint8_t CodingTables::_log2Table[65536];
+
+CodingTables tables; // Static initializer
+
+CodingTables::CodingTables()
+{
+ unsigned int x;
+ uint8_t log2Val;
+
+ for(x=0; x<65536; x++) {
+ unsigned int val = x;
+ for (log2Val = 0; (val >>= 1) != 0; log2Val++);
+ _log2Table[x] = log2Val;
+ }
+}
+
+uint64_t CodingTables::_intMask64[65] =
+{
+ (UINT64_C(1) << 0) - 1, (UINT64_C(1) << 1) - 1,
+ (UINT64_C(1) << 2) - 1, (UINT64_C(1) << 3) - 1,
+ (UINT64_C(1) << 4) - 1, (UINT64_C(1) << 5) - 1,
+ (UINT64_C(1) << 6) - 1, (UINT64_C(1) << 7) - 1,
+ (UINT64_C(1) << 8) - 1, (UINT64_C(1) << 9) - 1,
+ (UINT64_C(1) << 10) - 1, (UINT64_C(1) << 11) - 1,
+ (UINT64_C(1) << 12) - 1, (UINT64_C(1) << 13) - 1,
+ (UINT64_C(1) << 14) - 1, (UINT64_C(1) << 15) - 1,
+ (UINT64_C(1) << 16) - 1, (UINT64_C(1) << 17) - 1,
+ (UINT64_C(1) << 18) - 1, (UINT64_C(1) << 19) - 1,
+ (UINT64_C(1) << 20) - 1, (UINT64_C(1) << 21) - 1,
+ (UINT64_C(1) << 22) - 1, (UINT64_C(1) << 23) - 1,
+ (UINT64_C(1) << 24) - 1, (UINT64_C(1) << 25) - 1,
+ (UINT64_C(1) << 26) - 1, (UINT64_C(1) << 27) - 1,
+ (UINT64_C(1) << 28) - 1, (UINT64_C(1) << 29) - 1,
+ (UINT64_C(1) << 30) - 1, (UINT64_C(1) << 31) - 1,
+ (UINT64_C(1) << 32) - 1, (UINT64_C(1) << 33) - 1,
+ (UINT64_C(1) << 34) - 1, (UINT64_C(1) << 35) - 1,
+ (UINT64_C(1) << 36) - 1, (UINT64_C(1) << 37) - 1,
+ (UINT64_C(1) << 38) - 1, (UINT64_C(1) << 39) - 1,
+ (UINT64_C(1) << 40) - 1, (UINT64_C(1) << 41) - 1,
+ (UINT64_C(1) << 42) - 1, (UINT64_C(1) << 43) - 1,
+ (UINT64_C(1) << 44) - 1, (UINT64_C(1) << 45) - 1,
+ (UINT64_C(1) << 46) - 1, (UINT64_C(1) << 47) - 1,
+ (UINT64_C(1) << 48) - 1, (UINT64_C(1) << 49) - 1,
+ (UINT64_C(1) << 50) - 1, (UINT64_C(1) << 51) - 1,
+ (UINT64_C(1) << 52) - 1, (UINT64_C(1) << 53) - 1,
+ (UINT64_C(1) << 54) - 1, (UINT64_C(1) << 55) - 1,
+ (UINT64_C(1) << 56) - 1, (UINT64_C(1) << 57) - 1,
+ (UINT64_C(1) << 58) - 1, (UINT64_C(1) << 59) - 1,
+ (UINT64_C(1) << 60) - 1, (UINT64_C(1) << 61) - 1,
+ (UINT64_C(1) << 62) - 1, (UINT64_C(1) << 63) - 1,
+ static_cast<uint64_t>(-1),
+};
+
+
+uint64_t
+CodingTables::_intMask64le[65] =
+{
+ /**/ 0, -(UINT64_C(1) << 63),
+ -(UINT64_C(1) << 62), -(UINT64_C(1) << 61),
+ -(UINT64_C(1) << 60), -(UINT64_C(1) << 59),
+ -(UINT64_C(1) << 58), -(UINT64_C(1) << 57),
+ -(UINT64_C(1) << 56), -(UINT64_C(1) << 55),
+ -(UINT64_C(1) << 54), -(UINT64_C(1) << 53),
+ -(UINT64_C(1) << 52), -(UINT64_C(1) << 51),
+ -(UINT64_C(1) << 50), -(UINT64_C(1) << 49),
+ -(UINT64_C(1) << 48), -(UINT64_C(1) << 47),
+ -(UINT64_C(1) << 46), -(UINT64_C(1) << 45),
+ -(UINT64_C(1) << 44), -(UINT64_C(1) << 43),
+ -(UINT64_C(1) << 42), -(UINT64_C(1) << 41),
+ -(UINT64_C(1) << 40), -(UINT64_C(1) << 39),
+ -(UINT64_C(1) << 38), -(UINT64_C(1) << 37),
+ -(UINT64_C(1) << 36), -(UINT64_C(1) << 35),
+ -(UINT64_C(1) << 34), -(UINT64_C(1) << 33),
+ -(UINT64_C(1) << 32), -(UINT64_C(1) << 31),
+ -(UINT64_C(1) << 30), -(UINT64_C(1) << 29),
+ -(UINT64_C(1) << 28), -(UINT64_C(1) << 27),
+ -(UINT64_C(1) << 26), -(UINT64_C(1) << 25),
+ -(UINT64_C(1) << 24), -(UINT64_C(1) << 23),
+ -(UINT64_C(1) << 22), -(UINT64_C(1) << 21),
+ -(UINT64_C(1) << 20), -(UINT64_C(1) << 19),
+ -(UINT64_C(1) << 18), -(UINT64_C(1) << 17),
+ -(UINT64_C(1) << 16), -(UINT64_C(1) << 15),
+ -(UINT64_C(1) << 14), -(UINT64_C(1) << 13),
+ -(UINT64_C(1) << 12), -(UINT64_C(1) << 11),
+ -(UINT64_C(1) << 10), -(UINT64_C(1) << 9),
+ -(UINT64_C(1) << 8), -(UINT64_C(1) << 7),
+ -(UINT64_C(1) << 6), -(UINT64_C(1) << 5),
+ -(UINT64_C(1) << 4), -(UINT64_C(1) << 3),
+ -(UINT64_C(1) << 2), -(UINT64_C(1) << 1),
+ static_cast<uint64_t>(-1),
+};
+
+
+void
+EncodeContext64Base::checkPointWrite(nbostream &out)
+{
+ out << _cacheInt << _cacheFree;
+}
+
+
+void
+EncodeContext64Base::checkPointRead(nbostream &in)
+{
+ in >> _cacheInt >> _cacheFree;
+}
+
+
+void
+DecodeContext64Base::checkPointWrite(nbostream &out)
+{
+ (void) out;
+}
+
+
+void
+DecodeContext64Base::checkPointRead(nbostream &in)
+{
+ (void) in;
+}
+
+} // namespace bitcompression
+
+
+namespace
+{
+
+vespalib::string noFeatures = "NoFeatures";
+
+}
+
+namespace bitcompression
+{
+
+template <bool bigEndian>
+void
+FeatureDecodeContext<bigEndian>::
+readBytes(uint8_t *buf, size_t len)
+{
+ while (len > 0) {
+ // Ensure that buffer to read from isn't empty
+ if (__builtin_expect(_valI >= _valE, false))
+ _readContext->readComprBuffer();
+ uint64_t readOffset = getReadOffset();
+ // Validate that read offset is byte aligned
+ assert((readOffset & 7) == 0);
+ // Get start and end of buffer to read from, then calculate size
+ const uint8_t *rbuf = reinterpret_cast<const uint8_t *>(getCompr()) +
+ (getBitOffset() >> 3);
+ const uint8_t *rbufE = reinterpret_cast<const uint8_t *>(_realValE);
+ size_t rbufSize = rbufE - rbuf; // Size of buffer to read from
+ // How much to copy in this iteration of the loop
+ size_t copySize = std::min(rbufSize, len);
+ // Something must be copied during each iteration
+ assert(copySize > 0);
+ memcpy(buf, rbuf, copySize);
+ buf += copySize;
+ len -= copySize;
+ // Adjust read position to account for bytes read
+ _readContext->setPosition(readOffset + copySize * 8);
+ }
+ if (__builtin_expect(_valI >= _valE, false))
+ _readContext->readComprBuffer();
+}
+
+
+template <bool bigEndian>
+uint32_t
+FeatureDecodeContext<bigEndian>::
+readHeader(vespalib::GenericHeader &header, int64_t fileSize)
+{
+ size_t hhSize = vespalib::GenericHeader::getMinSize();
+ assert(static_cast<int64_t>(hhSize) <= fileSize);
+ vespalib::DataBuffer dataBuffer(32768u);
+ dataBuffer.ensureFree(hhSize);
+ readBytes(reinterpret_cast<uint8_t *>(dataBuffer.getFree()),
+ hhSize);
+ dataBuffer.moveFreeToData(hhSize);
+ vespalib::GenericHeader::BufferReader bufferReader(dataBuffer);
+ uint32_t headerLen = vespalib::GenericHeader::readSize(bufferReader);
+ // Undo read from buffer
+ dataBuffer.moveDeadToData(hhSize - dataBuffer.getDataLen());
+ assert(headerLen <= fileSize);
+ (void) fileSize;
+ if (headerLen > hhSize) {
+ // Read remaining header into buffer
+ dataBuffer.ensureFree(headerLen - hhSize);
+ readBytes(reinterpret_cast<uint8_t *>(dataBuffer.getFree()),
+ headerLen - hhSize);
+ dataBuffer.moveFreeToData(headerLen - hhSize);
+ }
+ uint32_t len = header.read(bufferReader);
+ assert(len >= header.getSize());
+ assert(len == headerLen);
+ return headerLen;
+}
+
+
+template <bool bigEndian>
+void
+FeatureEncodeContext<bigEndian>::
+writeBits(const uint64_t *bits, uint32_t bitOffset, uint32_t bitLength)
+{
+ typedef FeatureEncodeContext<bigEndian> EC;
+ UC64_ENCODECONTEXT_CONSTRUCTOR(o, _);
+
+ if (bitOffset + bitLength < 64) {
+ uint32_t length = bitLength;
+ if (bigEndian) {
+ uint64_t data = (EC::bswap(*bits) >>
+ (64 - bitOffset - length)) &
+ CodingTables::_intMask64[length];
+ UC64BE_WRITEBITS_NS(o, EC);
+ } else {
+ uint64_t data = (EC::bswap(*bits) >> bitOffset) &
+ CodingTables::_intMask64[length];
+ UC64LE_WRITEBITS_NS(o, EC);
+ }
+ } else {
+ uint32_t bitsLeft = bitLength;
+ do {
+ uint32_t length = 64 - bitOffset;
+ bitsLeft -= length;
+ if (bigEndian) {
+ uint64_t data = EC::bswap(*bits) &
+ CodingTables::_intMask64[length];
+ UC64BE_WRITEBITS_NS(o, EC);
+ } else {
+ uint64_t data = (EC::bswap(*bits) >> bitOffset) &
+ CodingTables::_intMask64[length];
+ UC64LE_WRITEBITS_NS(o, EC);
+ }
+ ++bits;
+ } while (0);
+ while (bitsLeft >= 64) {
+ uint32_t length = 64;
+ uint64_t data = EC::bswap(*bits);
+ UC64_WRITEBITS_NS(o, EC);
+ ++bits;
+ bitsLeft -= 64;
+ if (__builtin_expect(oBufI >= _valE, false)) {
+ UC64_ENCODECONTEXT_STORE(o, _);
+ _writeContext->writeComprBuffer(false);
+ UC64_ENCODECONTEXT_LOAD(o, _);
+ }
+ }
+ if (bitsLeft > 0) {
+ uint32_t length = bitsLeft;
+ if (bigEndian) {
+ uint64_t data = EC::bswap(*bits) >> (64 - length);
+ UC64BE_WRITEBITS_NS(o, EC);
+ } else {
+ uint64_t data = EC::bswap(*bits) &
+ CodingTables::_intMask64[length];
+ UC64LE_WRITEBITS_NS(o, EC);
+ }
+ }
+ }
+ UC64_ENCODECONTEXT_STORE(o, _);
+ if (__builtin_expect(oBufI >= _valE, false)) {
+ _writeContext->writeComprBuffer(false);
+ }
+}
+
+
+template <bool bigEndian>
+void
+FeatureEncodeContext<bigEndian>::
+writeString(const vespalib::stringref &buf)
+{
+ size_t len = buf.size();
+ for (unsigned int i = 0; i < len; ++i) {
+ writeBits(static_cast<unsigned char>(buf[i]), 8);
+ if (__builtin_expect(_valI >= _valE, false))
+ _writeContext->writeComprBuffer(false);
+ }
+ writeBits(0, 8);
+}
+
+
+template <bool bigEndian>
+void
+FeatureEncodeContext<bigEndian>::
+writeHeader(const vespalib::GenericHeader &header)
+{
+ vespalib::DataBuffer dataBuffer(32768u);
+ vespalib::GenericHeader::BufferWriter bufferWriter(dataBuffer);
+ dataBuffer.ensureFree(header.getSize());
+ header.write(bufferWriter);
+ const uint8_t *data = reinterpret_cast<const uint8_t *>
+ (dataBuffer.getData());
+ uint32_t offset = (reinterpret_cast<unsigned long>(data) & 7);
+ data -= offset;
+ uint32_t bitOffset = offset * 8;
+ uint32_t bitLen = dataBuffer.getDataLen() * 8;
+ writeBits(reinterpret_cast<const uint64_t *>(data), bitOffset, bitLen);
+}
+
+
+template <bool bigEndian>
+void
+FeatureDecodeContext<bigEndian>::
+readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix)
+{
+ (void) header;
+ (void) prefix;
+}
+
+
+template <bool bigEndian>
+const vespalib::string &
+FeatureDecodeContext<bigEndian>::getIdentifier(void) const
+{
+ return noFeatures;
+}
+
+
+template <bool bigEndian>
+void
+FeatureDecodeContext<bigEndian>::readFeatures(DocIdAndFeatures &features)
+{
+ (void) features;
+}
+
+
+template <bool bigEndian>
+void
+FeatureDecodeContext<bigEndian>::skipFeatures(unsigned int count)
+{
+ (void) count;
+}
+
+
+template <bool bigEndian>
+void
+FeatureDecodeContext<bigEndian>::
+unpackFeatures(const search::fef::TermFieldMatchDataArray &matchData,
+ uint32_t docId)
+{
+ if (matchData.size() == 1) {
+ matchData[0]->reset(docId);
+ }
+}
+
+
+template <bool bigEndian>
+void
+FeatureDecodeContext<bigEndian>::
+setParams(const PostingListParams &params)
+{
+ (void) params;
+}
+
+
+template <bool bigEndian>
+void
+FeatureDecodeContext<bigEndian>::
+getParams(PostingListParams &params) const
+{
+ params.clear();
+}
+
+
+template <bool bigEndian>
+void
+FeatureEncodeContext<bigEndian>::
+readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix)
+{
+ (void) header;
+ (void) prefix;
+}
+
+
+template <bool bigEndian>
+void
+FeatureEncodeContext<bigEndian>::
+writeHeader(vespalib::GenericHeader &header,
+ const vespalib::string &prefix) const
+{
+ (void) header;
+ (void) prefix;
+}
+
+
+template <bool bigEndian>
+const vespalib::string &
+FeatureEncodeContext<bigEndian>::getIdentifier(void) const
+{
+ return noFeatures;
+}
+
+
+template <bool bigEndian>
+void
+FeatureEncodeContext<bigEndian>::writeFeatures(const DocIdAndFeatures &features)
+{
+ (void) features;
+}
+
+
+template <bool bigEndian>
+void
+FeatureEncodeContext<bigEndian>::
+setParams(const PostingListParams &params)
+{
+ (void) params;
+}
+
+
+template <bool bigEndian>
+void
+FeatureEncodeContext<bigEndian>::
+getParams(PostingListParams &params) const
+{
+ params.clear();
+}
+
+
+template class FeatureDecodeContext<true>;
+template class FeatureDecodeContext<false>;
+
+template class FeatureEncodeContext<true>;
+template class FeatureEncodeContext<false>;
+
+
+} // namespace bitcompression
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.h b/searchlib/src/vespa/searchlib/bitcompression/compression.h
new file mode 100644
index 00000000000..a2d1eaff93e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/bitcompression/compression.h
@@ -0,0 +1,1933 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/util/comprfile.h>
+#include <vespa/searchlib/index/postinglistparams.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <map>
+
+namespace vespalib
+{
+
+class nbostream;
+class GenericHeader;
+
+}
+
+namespace search
+{
+
+namespace index
+{
+
+class DocIdAndFeatures;
+
+} // namespace index
+
+namespace fef
+{
+
+class TermFieldMatchDataArray;
+
+} // namespace fef
+
+
+namespace bitcompression
+{
+
+class Position {
+public:
+ Position(const uint64_t * occurences, int bitOffset) : _occurences(occurences), _bitOffset(bitOffset) { }
+ const uint64_t * getOccurences() const { return _occurences; }
+ int getBitOffset() const { return _bitOffset; }
+private:
+ const uint64_t * _occurences;
+ int _bitOffset;
+};
+
+// Use inline assembly for log2 calculations
+#define DO_ASMLOG
+
+/*
+ * The so-called rice2 code is very similar to the well known exp
+ * golomb code. One difference is that the first bits are inverted.
+ * rice code is a special case of golomb code, with M being a power of
+ * two (2^k). rice coding uses unary coding for quotient, while remainder
+ * bits are just written as they are.
+ *
+ * Rice2 (k=0) starts with: 0, 100, 101, 11000, 11001, 11010, 11011
+ * Rice2 (k=1) starts with: 00, 01, 1000, 1001, 1010, 1011, 110000
+ * Exp golomb (k=0) starts with: 1, 010, 011, 00100, 00101, 00101, 00111
+ * Exp golomb (k=1) starts with: 10, 11, 0100, 0101, 0110, 0111, 001000
+ * unary coding: 0, 10, 110, 1110, 11110, 111110, 1111110
+ * rice coding (k=0) 0, 10, 110, 1110, 11110, 111110, 1111110
+ * rice coding (k=1) 00, 01, 100, 101, 1100, 1101, 11100
+ *
+ * For k=0, exp golomb coding is the same as elias gamma coding.
+ * For k=0, rice coding is the same as unary coding.
+ *
+ * k values up to and including 63 is supported for exp golomb coding
+ * and decoding.
+
+ * The *SMALL* macros only supports k values up to and including 62
+ * (trading flexibility for a minor speed improvement) and numbers
+ * that can be encoded within 64 bits.
+ */
+
+#define TOP_BIT64 UINT64_C(0x8000000000000000)
+#define TOP_2_BITS64 UINT64_C(0xC000000000000000)
+#define TOP_4_BITS64 UINT64_C(0xF000000000000000)
+
+// Compression parameters for zcposting file word headers.
+#define K_VALUE_ZCPOSTING_NUMDOCS 0
+#define K_VALUE_ZCPOSTING_LASTDOCID 22
+#define K_VALUE_ZCPOSTING_DOCIDSSIZE 22
+#define K_VALUE_ZCPOSTING_L1SKIPSIZE 12
+#define K_VALUE_ZCPOSTING_L2SKIPSIZE 10
+#define K_VALUE_ZCPOSTING_L3SKIPSIZE 8
+#define K_VALUE_ZCPOSTING_L4SKIPSIZE 6
+#define K_VALUE_ZCPOSTING_FEATURESSIZE 25
+#define K_VALUE_ZCPOSTING_DELTA_DOCID 22
+
+/**
+ * Lookup tables used for compression / decompression.
+ */
+class CodingTables
+{
+public:
+ static uint8_t _log2Table[65536];
+ static uint64_t _intMask64[65];
+ static uint64_t _intMask64le[65];
+
+ CodingTables(void);
+};
+
+#define UC64_DECODECONTEXT(prefix) \
+ const uint64_t * prefix ## Compr; \
+ uint64_t prefix ## Val; \
+ uint64_t prefix ## CacheInt; \
+ uint32_t prefix ## PreRead;
+
+#define UC64_DECODECONTEXT_CONSTRUCTOR(prefix, ctx) \
+ const uint64_t * prefix ## Compr = ctx ## valI; \
+ uint64_t prefix ## Val = ctx ## val; \
+ uint64_t prefix ## CacheInt = ctx ## cacheInt; \
+ uint32_t prefix ## PreRead = ctx ## preRead;
+
+#define UC64_DECODECONTEXT_LOAD(prefix, ctx) \
+ prefix ## Compr = ctx ## valI; \
+ prefix ## Val = ctx ## val; \
+ prefix ## CacheInt = ctx ## cacheInt; \
+ prefix ## PreRead = ctx ## preRead;
+
+#define UC64_DECODECONTEXT_LOAD_PARTIAL(prefix, ctx) \
+ prefix ## Compr = ctx ## valI;
+
+#define UC64_DECODECONTEXT_STORE(prefix, ctx) \
+ ctx ## valI = prefix ## Compr; \
+ ctx ## val = prefix ## Val; \
+ ctx ## cacheInt = prefix ## CacheInt; \
+ ctx ## preRead = prefix ## PreRead;
+
+
+#define UC64_DECODECONTEXT_STORE_PARTIAL(prefix, ctx) \
+ ctx ## valI = prefix ## Compr;
+
+#define UC64BE_READBITS(val, valI, preRead, cacheInt, EC) \
+ do { \
+ if (__builtin_expect(length <= preRead, true)) { \
+ val |= ((cacheInt >> (preRead - length)) & \
+ ::search::bitcompression::CodingTables::_intMask64[length]); \
+ preRead -= length; \
+ } else { \
+ if (__builtin_expect(preRead > 0, true)) { \
+ length -= preRead; \
+ val |= ((cacheInt & \
+ ::search::bitcompression::CodingTables:: \
+ _intMask64[preRead]) << length); \
+ } \
+ cacheInt = EC::bswap(*valI++); \
+ preRead = 64 - length; \
+ val |= cacheInt >> preRead; \
+ } \
+ } while (0)
+
+#define UC64BE_READBITS_NS(prefix, EC) \
+ UC64BE_READBITS(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, prefix ## CacheInt, EC)
+
+#define UC64BE_READBITS_CTX(ctx, EC) \
+ UC64BE_READBITS(ctx._val, ctx._valI, \
+ ctx._preRead, ctx._cacheInt, EC);
+
+
+#define UC64BE_SETUPBITS(bitOffset, val, valI, preRead, cacheInt, EC) \
+ do { \
+ cacheInt = EC::bswap(*valI++); \
+ preRead = 64 - bitOffset; \
+ val = 0; \
+ length = 64; \
+ UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \
+ } while (0)
+
+#define UC64BE_SETUPBITS_NS(ns, comprData, bitOffset, EC) \
+ ns ## Compr = comprData; \
+ UC64BE_SETUPBITS((bitOffset), ns ## Val, ns ## Compr, \
+ ns ## PreRead, ns ## CacheInt, EC);
+
+#define UC64BE_SETUPBITS_CTX(ctx, comprData, bitOffset, EC) \
+ ctx._valI = comprData; \
+ UC64BE_SETUPBITS((bitOffset), ctx._val, ctx._valI, \
+ ctx._preRead, ctx._cacheInt, EC);
+
+#define UC64BE_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \
+ do { \
+ length = \
+ 63 - ::search::bitcompression::EncodeContext64BE::log2(val); \
+ unsigned int olength = length; \
+ val <<= length; \
+ if (__builtin_expect(length * 2 + 1 + (k) > 64, false)) { \
+ UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \
+ length = 0; \
+ } \
+ val64 = (val >> (63 - olength - (k))) - (UINT64_C(1) << (k)); \
+ val <<= olength + 1 + (k); \
+ if (__builtin_expect(olength + 1 + (k) == 64, false)) \
+ val = 0; \
+ length += olength + 1 + (k); \
+ UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \
+ } while (0)
+
+
+#define UC64BE_DECODEEXPGOLOMB_NS(prefix, k, EC) \
+ do { \
+ UC64BE_DECODEEXPGOLOMB(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, prefix ## CacheInt, \
+ k, EC); \
+ } while (0)
+
+#define UC64BE_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \
+ EC) \
+ do { \
+ length = \
+ 63 - ::search::bitcompression::EncodeContext64BE::log2(val); \
+ val <<= length; \
+ val64 = (val >> (63 - length - (k))) - (UINT64_C(1) << (k)); \
+ val <<= length + 1 + (k); \
+ length += length + 1 + (k); \
+ UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \
+ } while (0)
+
+#define UC64BE_DECODEEXPGOLOMB_SMALL_NS(prefix, k, EC) \
+ do { \
+ UC64BE_DECODEEXPGOLOMB_SMALL(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, prefix ## CacheInt, \
+ k, EC); \
+ } while (0)
+
+#define UC64BE_DECODEEXPGOLOMB_SMALL_CTX(ctx, k, EC) \
+ do { \
+ UC64BE_DECODEEXPGOLOMB_SMALL(ctx._val, ctx._valI, \
+ ctx._preRead, ctx._cacheInt, \
+ k, EC); \
+ } while (0)
+
+#define UC64BE_DECODEEXPGOLOMB_SMALL_APPLY(val, valI, preRead, cacheInt,\
+ k, EC, resop) \
+ do { \
+ length = \
+ 63 - ::search::bitcompression::EncodeContext64BE::log2(val); \
+ val <<= length; \
+ resop (val >> (63 - length - (k))) - (UINT64_C(1) << (k)); \
+ val <<= length + 1 + (k); \
+ length += length + 1 + (k); \
+ UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \
+ } while (0)
+
+
+#define UC64BE_SKIPEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \
+ do { \
+ length = \
+ 63 - ::search::bitcompression::EncodeContext64BE::log2(val); \
+ unsigned int olength = length; \
+ val <<= length; \
+ if (__builtin_expect(length * 2 + 1 + (k) > 64, false)) { \
+ UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \
+ length = 0; \
+ } \
+ val <<= olength + 1 + (k); \
+ if (__builtin_expect(olength + 1 + (k) == 64, false)) \
+ val = 0; \
+ length += olength + 1 + (k); \
+ UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \
+ } while (0)
+
+
+#define UC64BE_SKIPEXPGOLOMB_NS(prefix, k, EC) \
+ do { \
+ UC64BE_SKIPEXPGOLOMB(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, prefix ## CacheInt, \
+ k, EC); \
+ } while (0)
+
+#define UC64BE_SKIPEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \
+ EC) \
+ do { \
+ length = \
+ 63 - ::search::bitcompression::EncodeContext64BE::log2(val); \
+ val <<= length; \
+ val <<= length + 1 + (k); \
+ length += length + 1 + (k); \
+ UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \
+ } while (0)
+
+#define UC64BE_SKIPEXPGOLOMB_SMALL_NS(prefix, k, EC) \
+ do { \
+ UC64BE_SKIPEXPGOLOMB_SMALL(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, prefix ## CacheInt, \
+ k, EC); \
+ } while (0)
+
+#define UC64BE_WRITEBITS(cacheInt, cacheFree, bufI, EC) \
+ do { \
+ if (length >= cacheFree) { \
+ cacheInt |= ((data >> (length - cacheFree)) & \
+ ::search::bitcompression::CodingTables:: \
+ _intMask64[cacheFree]); \
+ *bufI++ = EC::bswap(cacheInt); \
+ length -= cacheFree; \
+ cacheInt = 0; \
+ cacheFree = 64; \
+ } \
+ if (length > 0) { \
+ uint64_t dataFragment = \
+ (data & ::search::bitcompression::CodingTables:: \
+ _intMask64[length]); \
+ cacheInt |= (dataFragment << (cacheFree - length)); \
+ cacheFree -= length; \
+ } \
+ } while (0)
+
+
+#define UC64BE_WRITEBITS_NS(prefix, EC) \
+ do { \
+ UC64BE_WRITEBITS(prefix ## CacheInt, prefix ## CacheFree, \
+ prefix ## BufI, EC); \
+ } while (0)
+
+#define UC64BE_WRITEBITS_CTX(ctx, EC) \
+ do { \
+ UC64BE_WRITEBITS(ctx ## cacheInt, ctx ## cacheFree, \
+ ctx ## valI, EC); \
+ } while (0)
+
+#define UC64BE_DECODEDEXPGOLOMB_NS(prefix, k, EC) \
+ do { \
+ if ((prefix ## Val & TOP_BIT64) == 0) { \
+ length = 1; \
+ prefix ## Val <<= 1; \
+ val64 = 0; \
+ UC64BE_READBITS_NS(prefix, EC); \
+ } else { \
+ if ((prefix ## Val & TOP_2_BITS64) != TOP_2_BITS64) { \
+ length = 2; \
+ prefix ## Val <<= 2; \
+ val64 = 1; \
+ UC64BE_READBITS_NS(prefix, EC); \
+ } else { \
+ length = 2; \
+ prefix ## Val <<= 2; \
+ UC64BE_READBITS_NS(prefix, EC); \
+ UC64BE_DECODEEXPGOLOMB_NS(prefix, k, EC); \
+ val64 += 2; \
+ } \
+ } \
+ } while (0)
+
+#define UC64BE_DECODED0EXPGOLOMB_NS(prefix, k, EC) \
+ do { \
+ if ((prefix ## Val & TOP_BIT64) == 0) { \
+ length = 1; \
+ prefix ## Val <<= 1; \
+ val64 = 0; \
+ UC64BE_READBITS_NS(prefix, EC); \
+ } else { \
+ length = 1; \
+ prefix ## Val <<= 1; \
+ UC64BE_READBITS_NS(prefix, EC); \
+ UC64BE_DECODEEXPGOLOMB_NS(prefix, k, EC); \
+ val64 += 1; \
+ } \
+ } while (0)
+
+#define UC64LE_READBITS(val, valI, preRead, cacheInt, EC) \
+ do { \
+ if (__builtin_expect(length <= preRead, true)) { \
+ val |= ((cacheInt << (preRead - length)) & \
+ ::search::bitcompression::CodingTables::_intMask64le[length]); \
+ preRead -= length; \
+ } else { \
+ if (__builtin_expect(preRead > 0, true)) { \
+ length -= preRead; \
+ val |= ((cacheInt & \
+ ::search::bitcompression::CodingTables:: \
+ _intMask64le[preRead]) >> length); \
+ } \
+ cacheInt = EC::bswap(*valI++); \
+ preRead = 64 - length; \
+ val |= cacheInt << preRead; \
+ } \
+ } while (0)
+
+#define UC64LE_READBITS_NS(prefix, EC) \
+ UC64LE_READBITS(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, prefix ## CacheInt, EC)
+
+#define UC64LE_READBITS_CTX(ctx, EC) \
+ UC64LE_READBITS(ctx._val, ctx._valI, \
+ ctx._preRead, ctx._cacheInt, EC);
+
+
+#define UC64LE_SETUPBITS(bitOffset, val, valI, preRead, cacheInt, EC) \
+ do { \
+ cacheInt = EC::bswap(*valI++); \
+ preRead = 64 - bitOffset; \
+ val = 0; \
+ length = 64; \
+ UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \
+ } while (0)
+
+#define UC64LE_SETUPBITS_NS(ns, comprData, bitOffset, EC) \
+ ns ## Compr = comprData; \
+ UC64LE_SETUPBITS((bitOffset), ns ## Val, ns ## Compr, \
+ ns ## PreRead, ns ## CacheInt, EC);
+
+#define UC64LE_SETUPBITS_CTX(ctx, comprData, bitOffset, EC) \
+ ctx._valI = comprData; \
+ UC64LE_SETUPBITS((bitOffset), ctx._val, ctx._valI, \
+ ctx._preRead, ctx._cacheInt, EC);
+
+#define UC64LE_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \
+ do { \
+ unsigned int olength = \
+ ::search::bitcompression::EncodeContext64LE::ffsl(val); \
+ length = olength + 1; \
+ val >>= length; \
+ if (__builtin_expect(length == 64, false)) \
+ val = 0; \
+ if (__builtin_expect(olength * 2 + 1 + (k) > 64, false)) { \
+ UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \
+ length = 0; \
+ } \
+ val64 = (val & ((UINT64_C(1) << (olength + (k))) - 1)) + \
+ (UINT64_C(1) << (olength + (k))) - (UINT64_C(1) << (k)); \
+ val >>= olength + (k); \
+ length += olength + (k); \
+ UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \
+ } while (0)
+
+
+#define UC64LE_DECODEEXPGOLOMB_NS(prefix, k, EC) \
+ do { \
+ UC64LE_DECODEEXPGOLOMB(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, prefix ## CacheInt, \
+ k, EC); \
+ } while (0)
+
+#define UC64LE_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \
+ EC) \
+ do { \
+ length = ::search::bitcompression::EncodeContext64LE::ffsl(val); \
+ val >>= length + 1; \
+ val64 = (val & ((UINT64_C(1) << (length + (k))) - 1)) + \
+ (UINT64_C(1) << (length + (k))) - (UINT64_C(1) << (k)); \
+ val >>= length + (k); \
+ length += length + 1 + (k); \
+ UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \
+ } while (0)
+
+#define UC64LE_DECODEEXPGOLOMB_SMALL_NS(prefix, k, EC) \
+ do { \
+ UC64LE_DECODEEXPGOLOMB_SMALL(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, prefix ## CacheInt, \
+ k, EC); \
+ } while (0)
+
+#define UC64LE_DECODEEXPGOLOMB_SMALL_CTX(ctx, k, EC) \
+ do { \
+ UC64LE_DECODEEXPGOLOMB_SMALL(ctx._val, ctx._valI, \
+ ctx._preRead, ctx._cacheInt, \
+ k, EC); \
+ } while (0)
+
+#define UC64LE_DECODEEXPGOLOMB_SMALL_APPLY(val, valI, preRead, cacheInt,\
+ k, EC, resop) \
+ do { \
+ length = ::search::bitcompression::EncodeContext64LE::ffsl(val); \
+ val >>= length + 1; \
+ resop (val & ((UINT64_C(1) << (length + (k))) - 1)) + \
+ (UINT64_C(1) << (length + (k))) - (UINT64_C(1) << (k)); \
+ val >>= length + (k); \
+ length += length + 1 + (k); \
+ UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \
+ } while (0)
+
+
+#define UC64LE_SKIPEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \
+ do { \
+ unsigned int olength = \
+ ::search::bitcompression::EncodeContext64LE::ffsl(val); \
+ length = olength + 1; \
+ val >>= length; \
+ if (__builtin_expect(length == 64, false)) \
+ val = 0; \
+ if (__builtin_expect(olength * 2 + 1 + (k) > 64, false)) { \
+ UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \
+ length = 0; \
+ } \
+ val >>= olength + (k); \
+ length += olength + (k); \
+ UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \
+ } while (0)
+
+
+#define UC64LE_SKIPEXPGOLOMB_NS(prefix, k, EC) \
+ do { \
+ UC64LE_SKIPEXPGOLOMB(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, prefix ## CacheInt, \
+ k, EC); \
+ } while (0)
+
+#define UC64LE_SKIPEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \
+ EC) \
+ do { \
+ length = ::search::bitcompression::EncodeContext64LE::ffsl(val); \
+ val >>= length + 1; \
+ val >>= length + (k); \
+ length += length + 1 + (k); \
+ UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \
+ } while (0)
+
+#define UC64LE_SKIPEXPGOLOMB_SMALL_NS(prefix, k, EC) \
+ do { \
+ UC64LE_SKIPEXPGOLOMB_SMALL(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, prefix ## CacheInt, \
+ k, EC); \
+ } while (0)
+
+#define UC64LE_WRITEBITS(cacheInt, cacheFree, bufI, EC) \
+ do { \
+ if (length >= cacheFree) { \
+ cacheInt |= (data << (64 - cacheFree)); \
+ *bufI++ = EC::bswap(cacheInt); \
+ data >>= cacheFree; \
+ length -= cacheFree; \
+ cacheInt = 0; \
+ cacheFree = 64; \
+ } \
+ if (length > 0) { \
+ uint64_t dataFragment = \
+ (data & ::search::bitcompression::CodingTables:: \
+ _intMask64[length]); \
+ cacheInt |= (dataFragment << (64 - cacheFree)); \
+ cacheFree -= length; \
+ } \
+ } while (0)
+
+
+#define UC64LE_WRITEBITS_NS(prefix, EC) \
+ do { \
+ UC64LE_WRITEBITS(prefix ## CacheInt, prefix ## CacheFree, \
+ prefix ## BufI, EC); \
+ } while (0)
+
+#define UC64LE_WRITEBITS_CTX(ctx, EC) \
+ do { \
+ UC64LE_WRITEBITS(ctx ## cacheInt, ctx ## cacheFree, \
+ ctx ## valI, EC); \
+ } while (0)
+
+#define UC64_READBITS(val, valI, preRead, cacheInt, EC) \
+ do { \
+ if (bigEndian) { \
+ UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \
+ } else { \
+ UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \
+ } \
+ } while (0)
+
+#define UC64_READBITS_NS(prefix, EC) \
+ UC64_READBITS(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, prefix ## CacheInt, EC)
+
+#define UC64_READBITS_CTX(ctx, EC) \
+ UC64_READBITS(ctx._val, ctx._valI, \
+ ctx._preRead, ctx._cacheInt, EC)
+
+
+#define UC64_SETUPBITS(bitOffset, val, valI, preRead, cacheInt, EC) \
+ do { \
+ if (bigEndian) { \
+ UC64BE_SETUPBITS(bitOffset, val, valI, preRead, cacheInt, EC); \
+ } else { \
+ UC64LE_SETUPBITS(bitOffset, val, valI, preRead, cacheInt, EC); \
+ } \
+ } while (0)
+
+#define UC64_SETUPBITS_NS(ns, comprData, bitOffset, EC) \
+ ns ## Compr = comprData; \
+ UC64_SETUPBITS((bitOffset), ns ## Val, ns ## Compr, \
+ ns ## PreRead, ns ## CacheInt, EC);
+
+#define UC64_SETUPBITS_CTX(ctx, comprData, bitOffset, EC) \
+ ctx._valI = comprData; \
+ UC64_SETUPBITS((bitOffset), ctx._val, ctx._valI, \
+ ctx._preRead, ctx._cacheInt, EC);
+
+#define UC64_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \
+ do { \
+ if (bigEndian) { \
+ UC64BE_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, k, EC); \
+ } else { \
+ UC64LE_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, k, EC); \
+ } \
+ } while (0)
+
+#define UC64_DECODEEXPGOLOMB_NS(prefix, k, EC) \
+ do { \
+ UC64_DECODEEXPGOLOMB(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, prefix ## CacheInt, \
+ k, EC); \
+ } while (0)
+
+#define UC64_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, EC) \
+ do { \
+ if (bigEndian) { \
+ UC64BE_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, EC);\
+ } else { \
+ UC64LE_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, EC);\
+ } \
+ } while (0)
+
+#define UC64_DECODEEXPGOLOMB_SMALL_NS(prefix, k, EC) \
+ do { \
+ UC64_DECODEEXPGOLOMB_SMALL(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, prefix ## CacheInt, \
+ k, EC); \
+ } while (0)
+
+#define UC64_DECODEEXPGOLOMB_SMALL_CTX(ctx, k, EC) \
+ do { \
+ UC64_DECODEEXPGOLOMB_SMALL(ctx._val, ctx._valI, \
+ ctx._preRead, ctx._cacheInt, \
+ k, EC); \
+ } while (0)
+
+#define UC64_DECODEEXPGOLOMB_SMALL_APPLY(val, valI, preRead, cacheInt, \
+ k, EC, resop) \
+ do { \
+ if (bigEndian) { \
+ UC64BE_DECODEEXPGOLOMB_SMALL_APPLY(val, valI, preRead, cacheInt, \
+ k, EC, resop); \
+ } else { \
+ UC64LE_DECODEEXPGOLOMB_SMALL_APPLY(val, valI, preRead, cacheInt, \
+ k, EC, resop); \
+ } \
+ } while (0)
+
+#define UC64_SKIPEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \
+ do { \
+ if (bigEndian) { \
+ UC64BE_SKIPEXPGOLOMB(val, valI, preRead, cacheInt, k, EC); \
+ } else { \
+ UC64LE_SKIPEXPGOLOMB(val, valI, preRead, cacheInt, k, EC); \
+ } \
+ } while (0)
+
+#define UC64_SKIPEXPGOLOMB_NS(prefix, k, EC) \
+ do { \
+ UC64_SKIPEXPGOLOMB(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, prefix ## CacheInt, \
+ k, EC); \
+ } while (0)
+
+#define UC64_SKIPEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \
+ EC) \
+ do { \
+ if (bigEndian) { \
+ UC64BE_SKIPEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, EC); \
+ } else { \
+ UC64LE_SKIPEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, EC); \
+ } \
+ } while (0)
+
+#define UC64_SKIPEXPGOLOMB_SMALL_NS(prefix, k, EC) \
+ do { \
+ UC64_SKIPEXPGOLOMB_SMALL(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, prefix ## CacheInt, \
+ k, EC); \
+ } while (0)
+
+#define UC64_WRITEBITS(cacheInt, cacheFree, bufI, EC) \
+ do { \
+ if (bigEndian) { \
+ UC64BE_WRITEBITS(cacheInt, cacheFree, bufI, EC); \
+ } else { \
+ UC64LE_WRITEBITS(cacheInt, cacheFree, bufI, EC); \
+ } \
+ } while (0)
+
+
+#define UC64_WRITEBITS_NS(prefix, EC) \
+ do { \
+ UC64_WRITEBITS(prefix ## CacheInt, prefix ## CacheFree, \
+ prefix ## BufI, EC); \
+ } while (0)
+
+#define UC64_WRITEBITS_CTX(ctx, EC) \
+ do { \
+ UC64_WRITEBITS(ctx ## cacheInt, ctx ## cacheFree, \
+ ctx ## valI, EC); \
+ } while (0)
+
+#define UC64_ENCODECONTEXT(prefix) \
+ uint64_t *prefix ## BufI; \
+ uint64_t prefix ## CacheInt; \
+ uint32_t prefix ## CacheFree;
+
+#define UC64_ENCODECONTEXT_CONSTRUCTOR(prefix, ctx) \
+ uint64_t *prefix ## BufI = ctx ## valI; \
+ uint64_t prefix ## CacheInt = ctx ## cacheInt; \
+ uint32_t prefix ## CacheFree = ctx ## cacheFree;
+
+#define UC64_ENCODECONTEXT_LOAD(prefix, ctx) \
+ prefix ## BufI = ctx ## valI; \
+ prefix ## CacheInt = ctx ## cacheInt; \
+ prefix ## CacheFree = ctx ## cacheFree;
+
+#define UC64_ENCODECONTEXT_LOAD_PARTIAL(prefix, ctx) \
+ prefix ## BufI = ctx ## valI;
+
+#define UC64_ENCODECONTEXT_STORE(prefix, ctx) \
+ ctx ## valI = prefix ## BufI; \
+ ctx ## cacheInt = prefix ## CacheInt; \
+ ctx ## cacheFree = prefix ## CacheFree;
+
+#define UC64_ENCODECONTEXT_STORE_PARTIAL(prefix, ctx) \
+ ctx ## valI = prefix ## BufI;
+
+
+class EncodeContext64Base : public search::ComprFileEncodeContext
+{
+public:
+ enum Constants {
+ END_BUFFER_SAFETY = 4
+ };
+
+ typedef uint64_t UnitType;
+
+ // Pointers to compressed data
+ uint64_t *_valI;
+ const uint64_t *_valE;
+
+ // Cached integers
+
+ // _cacheInt is the second level of integer cache. It holds the
+ // next bits (_cacheFree bits of this integer is free)
+ uint64_t _cacheInt;
+ uint32_t _cacheFree;
+
+ // File position for start of buffer minus byte address of start of buffer
+ // plus sizeof uint64_t. Then shifted left by 3 to represent bits.
+ uint64_t _fileWriteBias;
+
+ EncodeContext64Base(void)
+ : search::ComprFileEncodeContext(),
+ _valI(NULL),
+ _valE(NULL),
+ _cacheInt(0),
+ _cacheFree(64),
+ _fileWriteBias(64)
+ {
+ }
+
+ EncodeContext64Base(const EncodeContext64Base &other)
+ : search::ComprFileEncodeContext(other),
+ _valI(other._valI),
+ _valE(other._valE),
+ _cacheInt(other._cacheInt),
+ _cacheFree(other._cacheFree),
+ _fileWriteBias(other._fileWriteBias)
+ {
+ }
+
+ virtual
+ ~EncodeContext64Base(void)
+ {
+ }
+
+ EncodeContext64Base &
+ operator=(const EncodeContext64Base &rhs)
+ {
+ search::ComprFileEncodeContext::operator=(rhs);
+ _valI = rhs._valI;
+ _valE = rhs._valE;
+ _cacheInt = rhs._cacheInt;
+ _cacheFree = rhs._cacheFree;
+ _fileWriteBias = rhs._fileWriteBias;
+ return *this;
+ }
+
+ /**
+ * Get number of used units (e.g. _valI - start)
+ */
+ virtual int
+ getUsedUnits(void *start)
+ {
+ return _valI - static_cast<uint64_t *>(start);
+ }
+
+ /**
+ * Get normal full buffer size (e.g. _valE - start)
+ */
+ virtual int
+ getNormalMaxUnits(void *start)
+ {
+ return _valE - static_cast<uint64_t *>(start);
+ }
+
+ /**
+ * Adjust buffer after write (e.g. _valI, _fileWriteBias)
+ */
+ virtual void
+ afterWrite(search::ComprBuffer &cbuf,
+ uint32_t remainingUnits,
+ uint64_t bufferStartFilePos)
+ {
+ _valI = static_cast<uint64_t *>(cbuf._comprBuf) + remainingUnits;
+ _fileWriteBias = (bufferStartFilePos -
+ reinterpret_cast<unsigned long>(cbuf._comprBuf) +
+ sizeof(uint64_t)) << 3;
+ adjustBufSize(cbuf);
+ }
+
+ /**
+ * Adjust buffer size to align end of buffer.
+ */
+ virtual void
+ adjustBufSize(search::ComprBuffer &cbuf)
+ {
+ uint64_t fileWriteOffset =
+ (_fileWriteBias +
+ ((reinterpret_cast<unsigned long>(cbuf._comprBuf) -
+ sizeof(uint64_t)) << 3)) >> 3;
+ _valE = static_cast<uint64_t *>(cbuf._comprBuf) +
+ cbuf._aligner.adjustElements(
+ fileWriteOffset / sizeof(uint64_t),
+ cbuf._comprBufSize);
+ }
+
+ virtual uint32_t
+ getUnitByteSize(void) const
+ {
+ return sizeof(uint64_t);
+ }
+
+ void
+ setupWrite(search::ComprBuffer &cbuf)
+ {
+ _valI = static_cast<uint64_t *>(cbuf._comprBuf);
+
+ _fileWriteBias =
+ (sizeof(uint64_t) -
+ reinterpret_cast<unsigned long>(cbuf._comprBuf)) << 3;
+ // Buffer for compressed data now has padding after it
+ adjustBufSize(cbuf);
+ _cacheInt = 0;
+ _cacheFree = 64;
+ }
+
+ void
+ reload(const EncodeContext64Base &other)
+ {
+ _valI = other._valI;
+ _valE = other._valE;
+ _cacheInt = other._cacheInt;
+ _cacheFree = other._cacheFree;
+ _fileWriteBias = other._fileWriteBias;
+ }
+
+ void
+ pushBack(EncodeContext64Base &other) const
+ {
+ other._valI = _valI;
+ other._cacheInt = _cacheInt;
+ other._cacheFree = _cacheFree;
+ }
+
+ virtual void
+ checkPointWrite(vespalib::nbostream &out);
+
+ virtual void
+ checkPointRead(vespalib::nbostream &in);
+
+ uint64_t
+ getWriteOffset(void) const
+ {
+ return _fileWriteBias +
+ (reinterpret_cast<unsigned long>(_valI) << 3) - _cacheFree;
+ }
+
+ void
+ defineWriteOffset(uint64_t writeOffset)
+ {
+ _fileWriteBias = writeOffset -
+ (reinterpret_cast<unsigned long>(_valI) << 3) +
+ _cacheFree;
+ }
+
+ virtual uint64_t
+ getBitPosV(void) const
+ {
+ return getWriteOffset();
+ }
+
+ /*
+ * Return max value that can be exp golomb encoded with our implementation
+ * ot the encoding method. Handling of larger numbers would require changes
+ * to both decode macros (making them slower) and encoding method (making
+ * it slower).
+ */
+ static uint64_t
+ maxExpGolombVal(uint32_t kValue)
+ {
+ return static_cast<uint64_t>
+ (- (UINT64_C(1) << kValue) - 1);
+ }
+
+ /*
+ * Return max value that can be exp golomb encoded within maxBits
+ * using kValue encoding parameter.
+ *
+ * maxBits must be larger than kValue
+ */
+ static uint64_t
+ maxExpGolombVal(uint32_t kValue, uint32_t maxBits)
+ {
+ if ((maxBits + kValue + 1) / 2 > 64) {
+ return static_cast<uint64_t>(-1);
+ }
+ if ((maxBits + kValue + 1) / 2 == 64) {
+ return static_cast<uint64_t>
+ (- (UINT64_C(1) << kValue) - 1);
+ }
+ return static_cast<uint64_t>
+ ((UINT64_C(1) << ((maxBits + kValue + 1) / 2)) -
+ (UINT64_C(1) << kValue) - 1);
+ }
+
+};
+
+
+template <bool bigEndian>
+class EncodeContext64EBase : public EncodeContext64Base
+{
+public:
+ static inline uint64_t
+ bswap(uint64_t val);
+
+ /**
+ * Write bits
+ *
+ * @param data The bits to be written to file.
+ * @param length The number of bits to be written to file.
+ */
+ void inline
+ writeBits(uint64_t data, uint32_t length);
+
+ /**
+ * Flushes the last integer to disk if there are remaining bits left in
+ * the _cacheInt. Padding of trailing 0-bits is automatically added.
+ */
+ void
+ flush(void)
+ {
+ if (_cacheFree < 64) {
+ *_valI++ = bswap(_cacheInt);
+ _cacheInt = 0;
+ _cacheFree = 64;
+ }
+ }
+
+ void
+ smallPadBits(uint32_t length)
+ {
+ if (length > 0)
+ writeBits(0, length);
+ }
+
+ virtual void
+ padBits(uint32_t length)
+ {
+ while (length > 64) {
+ writeBits(0, 64);
+ length -= 64;
+ }
+ smallPadBits(length);
+ }
+
+ void
+ align(uint32_t alignment)
+ {
+ uint64_t length = (- getWriteOffset()) & (alignment - 1);
+ padBits(length);
+ }
+
+ void
+ alignDirectIO()
+ {
+ align(4096*8);
+ }
+
+ /*
+ * Small alignment (max 64 bits alignment)
+ */
+ void
+ smallAlign(uint32_t alignment)
+ {
+ uint64_t length = _cacheFree & (alignment - 1);
+ smallPadBits(length);
+ }
+};
+
+
+template <>
+inline uint64_t
+EncodeContext64EBase<true>::bswap(uint64_t val)
+{
+ __asm__("bswap %0" : "=r" (val) : "0" (val));
+ return val;
+}
+
+
+template <>
+inline void
+EncodeContext64EBase<true>::writeBits(uint64_t data, uint32_t length)
+{
+ // While there are enough bits remaining in "data",
+ // fill the cacheInt and flush it to vector
+ if (length >= _cacheFree) {
+ // Shift new bits into cacheInt
+ _cacheInt |= ((data >> (length - _cacheFree)) &
+ CodingTables::_intMask64[_cacheFree]);
+ *_valI++ = bswap(_cacheInt);
+
+ // Initialize variables for receiving new bits
+ length -= _cacheFree;
+ _cacheInt = 0;
+ _cacheFree = 64;
+ }
+
+ if (length > 0) {
+ uint64_t dataFragment = (data & CodingTables::_intMask64[length]);
+ _cacheInt |= (dataFragment << (_cacheFree - length));
+ _cacheFree -= length;
+ }
+}
+
+
+template <>
+inline uint64_t
+EncodeContext64EBase<false>::bswap(uint64_t val)
+{
+ return val;
+}
+
+
+template <>
+inline void
+EncodeContext64EBase<false>::writeBits(uint64_t data, uint32_t length)
+{
+ // While there are enough bits remaining in "data",
+ // fill the cacheInt and flush it to vector
+ if (length >= _cacheFree) {
+ // Shift new bits into cacheInt
+ _cacheInt |= (data << (64 - _cacheFree));
+ *_valI++ = bswap(_cacheInt);
+
+ data >>= _cacheFree;
+ // Initialize variables for receiving new bits
+ length -= _cacheFree;
+ _cacheInt = 0;
+ _cacheFree = 64;
+ }
+
+ if (length > 0) {
+ uint64_t dataFragment = (data & CodingTables::_intMask64[length]);
+ _cacheInt |= (dataFragment << (64 - _cacheFree));
+ _cacheFree -= length;
+ }
+}
+
+typedef EncodeContext64EBase<true> EncodeContext64BEBase;
+
+typedef EncodeContext64EBase<false> EncodeContext64LEBase;
+
+
+template<bool bigEndian>
+class EncodeContext64 : public EncodeContext64EBase<bigEndian>
+{
+public:
+ typedef EncodeContext64EBase<bigEndian> BaseClass;
+ using BaseClass::writeBits;
+
+ /**
+ * Calculate floor(log2(x))
+ */
+ static inline uint32_t
+ log2(uint64_t x)
+ {
+ uint64_t retVal;
+
+#if (defined(__x86_64__)) && defined(DO_ASMLOG)
+ __asm("bsrq %1,%0" : "=r" (retVal) : "r" (x));
+
+#else
+ register uint64_t lower = x;
+ uint32_t upper32 = lower >> 32;
+ if (upper32 != 0) {
+ uint32_t upper16 = upper32 >> 16;
+ if (upper16 != 0)
+ retVal = 48 + CodingTables::_log2Table[upper16];
+ else
+ retVal = 32 + CodingTables::_log2Table[upper32];
+ } else {
+ uint32_t lower32 = static_cast<uint32_t>(x);
+ uint32_t upper16 = lower32 >> 16;
+
+ if (upper16 != 0)
+ retVal = 16 + CodingTables::_log2Table[upper16];
+ else
+ retVal = CodingTables::_log2Table[lower32];
+ }
+#endif
+
+ return retVal;
+ }
+
+ static inline uint64_t
+ ffsl(uint64_t x)
+ {
+ uint64_t retVal;
+ __asm("bsfq %1,%0" : "=r" (retVal) : "r" (x));
+ return retVal;
+ }
+
+ /**
+ * ExpGolomb-encode an integer
+ * @param x integer to be encoded (lowest value is 0).
+ * @param k k parameter
+ *
+ * Note: This method doesn't work when x > maxExpGolombVal(k).
+ */
+ void
+ encodeExpGolomb(uint64_t x, uint32_t k)
+ {
+ if (bigEndian) {
+ uint32_t log2qx2 = log2((x >> k) + 1) * 2;
+ uint64_t expGolomb = x + (UINT64_C(1) << k);
+
+ if (log2qx2 < 64 - k)
+ writeBits(expGolomb, k + log2qx2 + 1);
+ else {
+ writeBits(0, k + log2qx2 + 1 - 64);
+ writeBits(expGolomb, 64);
+ }
+ } else {
+ uint32_t log2q = log2((x >> k) + 1);
+ uint32_t log2qx2 = log2q * 2;
+ uint64_t expGolomb = x + (UINT64_C(1) << k) -
+ (UINT64_C(1) << (k + log2q));
+
+ if (log2qx2 < 64 - k)
+ writeBits(((expGolomb << 1) | 1) << log2q, k + log2qx2 + 1);
+ else {
+ writeBits(0, log2q);
+ writeBits((expGolomb << 1) | 1, log2q + k + 1);
+ }
+ }
+ }
+
+ static uint32_t
+ encodeExpGolombSpace(uint64_t x, uint32_t k)
+ {
+ return k + log2((x >> k) + 1) * 2 + 1;
+ }
+
+ void
+ encodeDExpGolomb(uint64_t x, uint32_t k)
+ {
+ if (x == 0) {
+ writeBits(0, 1);
+ return;
+ }
+ if (x == 1) {
+ writeBits(bigEndian ? 2 : 1, 2);
+ return;
+ }
+ writeBits(3, 2);
+ encodeExpGolomb(x - 2, k);
+ }
+
+ static uint32_t
+ encodeDExpGolombSpace(uint64_t x, uint32_t k)
+ {
+ if (x == 0)
+ return 1;
+ if (x == 1)
+ return 2;
+ return 2 + encodeExpGolombSpace(x, k);
+ }
+
+ void
+ encodeD0ExpGolomb(uint64_t x, uint32_t k)
+ {
+ if (x == 0) {
+ writeBits(0, 1);
+ return;
+ }
+ writeBits(1, 1);
+ encodeExpGolomb(x - 1, k);
+ }
+
+ static uint32_t
+ encodeD0ExpGolombSpace(uint64_t x, uint32_t k)
+ {
+ if (x == 0)
+ return 1;
+ return 1 + encodeExpGolombSpace(x, k);
+ }
+
+ static uint64_t
+ convertToUnsigned(int64_t val)
+ {
+ if (val < 0)
+ return ((- val) << 1) - 1;
+ else
+ return (val << 1);
+ }
+};
+
+
+typedef EncodeContext64<true> EncodeContext64BE;
+
+typedef EncodeContext64<false> EncodeContext64LE;
+
+class DecodeContext64Base : public search::ComprFileDecodeContext
+{
+private:
+ DecodeContext64Base(const DecodeContext64Base &);
+
+public:
+ enum Constants {
+ END_BUFFER_SAFETY = 4
+ };
+
+ // Pointers to compressed data
+ const uint64_t *_valI;
+ const uint64_t *_valE;
+ const uint64_t *_realValE;
+
+ // Cached integers
+
+ // _val is the work-integer which is by convention always filled
+ // with the next 64 bits (the first bit is #31)
+ uint64_t _val;
+
+ // _cacheInt is the second level of integer cache. It holds the
+ // next bits (_preRead bits of this integer is valid)
+ uint64_t _cacheInt;
+ uint32_t _preRead;
+
+ // File position for end of buffer minus byte address of end of buffer
+ // minus sizeof uint64_t. Then shifted left by 3 to represent bits.
+ uint64_t _fileReadBias;
+
+ DecodeContext64Base(void)
+ : search::ComprFileDecodeContext(),
+ _valI(NULL),
+ _valE(NULL),
+ _realValE(NULL),
+ _val(0),
+ _cacheInt(0),
+ _preRead(0),
+ _fileReadBias(0)
+ {
+ }
+
+
+ DecodeContext64Base(const uint64_t *valI,
+ const uint64_t *valE,
+ const uint64_t *realValE,
+ uint64_t val,
+ uint64_t cacheInt,
+ uint32_t preRead)
+ : search::ComprFileDecodeContext(),
+ _valI(valI),
+ _valE(valE),
+ _realValE(realValE),
+ _val(val),
+ _cacheInt(cacheInt),
+ _preRead(preRead),
+ _fileReadBias(0)
+ {
+ }
+
+ virtual
+ ~DecodeContext64Base(void)
+ {
+ }
+
+ DecodeContext64Base &
+ operator=(const DecodeContext64Base &rhs)
+ {
+ search::ComprFileDecodeContext::operator=(rhs);
+ _valI = rhs._valI;
+ _valE = rhs._valE;
+ _realValE = rhs._realValE;
+ _val = rhs._val;
+ _cacheInt = rhs._cacheInt;
+ _preRead = rhs._preRead;
+ _fileReadBias = rhs._fileReadBias;
+ return *this;
+ }
+
+ /**
+ *
+ * Check if the chunk referenced by the decode context was the
+ * last chunk in the file (e.g. _valE > _realValE)
+ */
+ virtual bool
+ lastChunk(void) const
+ {
+ return _valE > _realValE;
+ }
+
+ /**
+ * Check if we're at the end of the current chunk (e.g. _valI >= _valE)
+ */
+ virtual bool
+ endOfChunk(void) const
+ {
+ return _valI >= _valE;
+ }
+
+ /**
+ * Get remaining units in buffer (e.g. _realValE - _valI)
+ */
+
+ virtual int32_t
+ remainingUnits(void) const
+ {
+ return _realValE - _valI;
+ }
+
+ /**
+ * Get unit ptr (e.g. _valI) from decode context.
+ */
+ virtual const void *
+ getUnitPtr(void) const
+ {
+ return _valI;
+ }
+
+ virtual void
+ afterRead(const void *start,
+ size_t bufferUnits,
+ uint64_t bufferEndFilePos,
+ bool isMore)
+ {
+ _valI = static_cast<const uint64_t *>(start);
+ setEnd(bufferUnits, isMore);
+ _fileReadBias = (bufferEndFilePos -
+ reinterpret_cast<unsigned long>(_realValE + 1)) << 3;
+ }
+
+ virtual uint64_t
+ getBitPos(int bitOffset,
+ uint64_t bufferEndFilePos) const
+ {
+ int intOffset = _realValE - _valI;
+ if (bitOffset == -1)
+ bitOffset = -64 - _preRead;
+
+ return (bufferEndFilePos << 3) -
+ (static_cast<uint64_t>(intOffset) << 6) + bitOffset;
+ }
+
+ uint64_t
+ getReadOffset(void) const
+ {
+ return _fileReadBias +
+ (reinterpret_cast<unsigned long>(_valI) << 3) - _preRead;
+ }
+
+ void
+ defineReadOffset(uint64_t readOffset)
+ {
+ _fileReadBias = readOffset -
+ (reinterpret_cast<unsigned long>(_valI) << 3) +
+ _preRead;
+ }
+
+ virtual uint64_t
+ getBitPosV(void) const
+ {
+ return getReadOffset();
+ }
+
+ virtual void
+ adjUnitPtr(int newRemainingUnits)
+ {
+ _valI = _realValE - newRemainingUnits;
+ }
+
+ virtual void
+ emptyBuffer(uint64_t newBitPosition)
+ {
+ _fileReadBias = newBitPosition;
+ _valI = NULL;
+ _valE = NULL;
+ _realValE = NULL;
+ _preRead = 0;
+ }
+
+ virtual uint32_t
+ getUnitByteSize(void) const
+ {
+ return sizeof(uint64_t);
+ }
+
+ /**
+ * Set the end of the buffer
+ * @param unitCount Number of bytes in buffer
+ * @param moreData Set if there is more data available
+ */
+ void
+ setEnd(unsigned int unitCount, bool moreData)
+ {
+ _valE = _realValE = _valI + unitCount;
+ if (moreData)
+ _valE -= END_BUFFER_SAFETY;
+ else
+ _valE += END_BUFFER_SAFETY;
+ }
+
+ const uint64_t *
+ getCompr(void) const
+ {
+ return (_preRead == 0) ? (_valI - 1) : (_valI - 2);
+ }
+
+ int
+ getBitOffset(void) const
+ {
+ return (_preRead == 0) ? 0 : 64 - _preRead;
+ }
+
+ virtual void
+ checkPointWrite(vespalib::nbostream &out);
+
+ virtual void
+ checkPointRead(vespalib::nbostream &in);
+
+ static int64_t
+ convertToSigned(uint64_t val)
+ {
+ if ((val & 1) != 0)
+ return - (val >> 1) - 1;
+ else
+ return (val >> 1);
+ }
+};
+
+
+template <bool bigEndian>
+class DecodeContext64 : public DecodeContext64Base
+{
+private:
+ DecodeContext64(const DecodeContext64 &);
+
+public:
+ typedef EncodeContext64<bigEndian> EC;
+
+ DecodeContext64(void)
+ : DecodeContext64Base()
+ {
+ }
+
+
+ DecodeContext64(const uint64_t *compr,
+ int bitOffset)
+ : DecodeContext64Base(compr + 1,
+ NULL,
+ NULL,
+ 0,
+ EC::bswap(*compr),
+ 64 - bitOffset)
+ {
+ uint32_t length = 64;
+ UC64_READBITS(_val, _valI, _preRead, _cacheInt, EC);
+ }
+
+ /*
+ * Setup decode context without read context, all data is in memory.
+ * Assumes that last word is fully readable, and that some extra
+ * data beyond is available, to avoid issues when prefetching bits
+ * into two registers (_val and _cacheInt).
+ */
+ DecodeContext64(const uint64_t *compr,
+ int bitOffset,
+ uint64_t bitLength)
+ : DecodeContext64Base(compr + 1,
+ NULL,
+ NULL,
+ 0,
+ EC::bswap(*compr),
+ 64 - bitOffset)
+ {
+ uint32_t length = 64;
+ UC64_READBITS(_val, _valI, _preRead, _cacheInt, EC);
+ _realValE = compr + (bitOffset + bitLength + 63) / 64;
+ _valE = _realValE + END_BUFFER_SAFETY;
+ }
+
+ DecodeContext64 &
+ operator=(const DecodeContext64 &rhs)
+ {
+ DecodeContext64Base::operator=(rhs);
+ return *this;
+ }
+
+ /**
+ * Read [length] bits from a bitstream
+ *
+ * @param length Number of bits to read (0 < length < 64)
+ * @param val Current integer holding bits
+ * @param cacheInt 2nd level integer cache
+ * @param preRead Number of valid bits in cacheInt
+ * @param valI Pointer to next integer in bitstream
+ */
+ static void
+ ReadBits(unsigned int length, uint64_t &val,
+ uint64_t &cacheInt, unsigned int &preRead,
+ const uint64_t * &valI)
+ {
+ if (length <= preRead) {
+ if (bigEndian) {
+ val |= ((cacheInt >> (preRead - length)) &
+ CodingTables::_intMask64[length]);
+ } else {
+ val |= ((cacheInt << (preRead - length)) &
+ CodingTables::_intMask64le[length]);
+ }
+ preRead -= length;
+ return;
+ }
+
+ if (preRead > 0) {
+ length -= preRead;
+ if (bigEndian) {
+ val |= ((cacheInt &
+ CodingTables::_intMask64[preRead]) << length);
+ } else {
+ val |= ((cacheInt &
+ CodingTables::_intMask64le[preRead]) >> length);
+ }
+ }
+
+ cacheInt = EC::bswap(*valI++);
+ preRead = 64 - length;
+ if (bigEndian)
+ val |= (cacheInt >> preRead);
+ else
+ val |= (cacheInt << preRead);
+ };
+
+ virtual void
+ skipBits(int bits)
+ {
+ while (bits >= 64) {
+ _val = 0;
+ ReadBits(64, _val, _cacheInt, _preRead, _valI);
+ bits -= 64;
+ }
+ if (bits > 0) {
+ if (bigEndian)
+ _val <<= bits;
+ else
+ _val >>= bits;
+ ReadBits(bits, _val, _cacheInt, _preRead, _valI);
+ }
+ }
+
+ /**
+ * Setup for bitwise reading.
+ */
+ virtual void
+ setupBits(int bitOffset)
+ {
+ unsigned int length;
+ UC64_SETUPBITS(bitOffset, _val, _valI, _preRead, _cacheInt, EC);
+ }
+
+ void setPosition(Position pos) {
+ _valI = pos.getOccurences();
+ setupBits(pos.getBitOffset());
+ }
+
+ /**
+ * Used by iterators when switching from bitwise to bytewise decoding.
+ */
+ const uint8_t *
+ getByteCompr(void) const
+ {
+ assert((_preRead & 7) == 0);
+ return reinterpret_cast<const uint8_t *>(getCompr()) +
+ (getBitOffset() >> 3);
+ }
+
+ /**
+ * Used by iterators when switching from bytewise to bitwise decoding.
+ */
+ void
+ setByteCompr(const uint8_t *bCompr)
+ {
+ int byteOffset = reinterpret_cast<unsigned long>(bCompr) & 7;
+ _valI = reinterpret_cast<const uint64_t *>(bCompr - byteOffset);
+ setupBits(byteOffset * 8);
+ }
+
+ uint64_t
+ readBits(uint32_t length)
+ {
+ uint64_t res;
+ if (length < 64) {
+ if (bigEndian) {
+ res = _val >> (64 - length);
+ _val <<= length;
+ } else {
+ res = _val & CodingTables::_intMask64[length];
+ _val >>= length;
+ }
+ } else {
+ res = _val;
+ _val = 0;
+ }
+ UC64_READBITS(_val, _valI, _preRead, _cacheInt, EC);
+ return res;
+ }
+
+ void
+ align(uint32_t alignment)
+ {
+ uint64_t pad = (- getReadOffset()) & (alignment - 1);
+ while (pad > 64) {
+ (void) readBits(64);
+ pad -= 64;
+ }
+ if (pad > 0)
+ (void) readBits(pad);
+ }
+
+ /*
+ * Small alignment (max 64 bits alignment)
+ */
+ void
+ smallAlign(uint32_t alignment)
+ {
+ uint64_t pad = _preRead & (alignment - 1);
+ if (pad > 0)
+ (void) readBits(pad);
+ }
+};
+
+typedef DecodeContext64<true> DecodeContext64BE;
+
+typedef DecodeContext64<false> DecodeContext64LE;
+
+template <bool bigEndian>
+class FeatureDecodeContext : public DecodeContext64<bigEndian>
+{
+public:
+ search::ComprFileReadContext *_readContext;
+ typedef DecodeContext64<bigEndian> ParentClass;
+ typedef index::DocIdAndFeatures DocIdAndFeatures;
+ typedef index::PostingListParams PostingListParams;
+ using ParentClass::_val;
+ using ParentClass::_valI;
+ using ParentClass::_valE;
+ using ParentClass::_realValE;
+ using ParentClass::_cacheInt;
+ using ParentClass::_preRead;
+ using ParentClass::getReadOffset;
+ using ParentClass::getCompr;
+ using ParentClass::getBitOffset;
+ using ParentClass::readBits;
+ using ParentClass::ReadBits;
+
+ FeatureDecodeContext(void)
+ : ParentClass(),
+ _readContext(NULL)
+ {
+ }
+
+ FeatureDecodeContext(const uint64_t *compr,
+ int bitOffset)
+ : ParentClass(compr, bitOffset),
+ _readContext(NULL)
+ {
+ }
+
+ FeatureDecodeContext(const uint64_t *compr,
+ int bitOffset,
+ uint64_t bitLength)
+ : ParentClass(compr, bitOffset, bitLength),
+ _readContext(NULL)
+ {
+ }
+
+ FeatureDecodeContext &
+ operator=(const FeatureDecodeContext &rhs)
+ {
+ ParentClass::operator=(rhs);
+ _readContext = rhs._readContext;
+ return *this;
+ }
+
+ void
+ setReadContext(search::ComprFileReadContext *readContext)
+ {
+ _readContext = readContext;
+ }
+
+ search::ComprFileReadContext *
+ getReadContext(void) const
+ {
+ return _readContext;
+ }
+
+ void
+ readComprBuffer(void)
+ {
+ _readContext->readComprBuffer();
+ }
+
+ void
+ readComprBufferIfNeeded(void)
+ {
+ if (__builtin_expect(_valI >= _valE, false))
+ readComprBuffer();
+ }
+
+ void
+ readBytes(uint8_t *buf, size_t len);
+
+ virtual uint32_t
+ readHeader(vespalib::GenericHeader &header, int64_t fileSize);
+
+ virtual void
+ readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix);
+
+ virtual const vespalib::string &
+ getIdentifier(void) const;
+
+ virtual void
+ readFeatures(DocIdAndFeatures &features);
+
+ virtual void
+ skipFeatures(unsigned int count);
+
+ virtual void
+ unpackFeatures(const search::fef::TermFieldMatchDataArray &matchData,
+ uint32_t docId);
+
+ /*
+ * Set parameters.
+ */
+ virtual void
+ setParams(const PostingListParams &params);
+
+ /*
+ * Get current parameters.
+ */
+ virtual void
+ getParams(PostingListParams &params) const;
+
+ virtual void
+ skipBits(int bits)
+ {
+ readComprBufferIfNeeded();
+ while (bits >= 64) {
+ _val = 0;
+ ReadBits(64, _val, _cacheInt, _preRead, _valI);
+ bits -= 64;
+ readComprBufferIfNeeded();
+ }
+ if (bits > 0) {
+ if (bigEndian)
+ _val <<= bits;
+ else
+ _val >>= bits;
+ ReadBits(bits, _val, _cacheInt, _preRead, _valI);
+ readComprBufferIfNeeded();
+ }
+ }
+
+ void
+ align(uint32_t alignment)
+ {
+ readComprBufferIfNeeded();
+ uint64_t pad = (- getReadOffset()) & (alignment - 1);
+ while (pad > 64) {
+ (void) readBits(64);
+ pad -= 64;
+ readComprBufferIfNeeded();
+ }
+ if (pad > 0)
+ (void) readBits(pad);
+ readComprBufferIfNeeded();
+ }
+};
+
+typedef FeatureDecodeContext<true> FeatureDecodeContextBE;
+
+typedef FeatureDecodeContext<false> FeatureDecodeContextLE;
+
+template <bool bigEndian>
+class FeatureEncodeContext : public EncodeContext64<bigEndian>
+{
+public:
+ search::ComprFileWriteContext *_writeContext;
+ typedef EncodeContext64<bigEndian> ParentClass;
+ typedef index::DocIdAndFeatures DocIdAndFeatures;
+ typedef index::PostingListParams PostingListParams;
+ using ParentClass::_cacheInt;
+ using ParentClass::_cacheFree;
+ using ParentClass::smallPadBits;
+
+public:
+ FeatureEncodeContext(void)
+ : ParentClass(),
+ _writeContext(NULL)
+ {
+ }
+
+ FeatureEncodeContext &
+ operator=(const FeatureEncodeContext &rhs)
+ {
+ ParentClass::operator=(rhs);
+ _writeContext = rhs._writeContext;
+ return *this;
+ }
+
+ void
+ setWriteContext(search::ComprFileWriteContext *writeContext)
+ {
+ _writeContext = writeContext;
+ }
+
+ using ParentClass::log2;
+ using ParentClass::_valI;
+ using ParentClass::_valE;
+
+ static int
+ calcDocIdK(uint32_t numDocs, uint32_t docIdLimit)
+ {
+ uint32_t avgDelta = docIdLimit / (numDocs + 1);
+ uint32_t docIdK = (avgDelta < 4) ? 1 : (log2(avgDelta));
+ return docIdK;
+ }
+
+ using ParentClass::writeBits;
+
+ void
+ writeBits(const uint64_t *bits, uint32_t bitOffset, uint32_t bitLength);
+
+ void
+ writeString(const vespalib::stringref &buf);
+
+ virtual void
+ writeHeader(const vespalib::GenericHeader &header);
+
+ void
+ writeComprBufferIfNeeded(void)
+ {
+ if (_valI >= _valE)
+ _writeContext->writeComprBuffer(false);
+ }
+
+ void
+ writeComprBuffer(void)
+ {
+ _writeContext->writeComprBuffer(true);
+ }
+
+ virtual void
+ padBits(uint32_t length)
+ {
+ while (length > 64) {
+ writeBits(0, 64);
+ length -= 64;
+ writeComprBufferIfNeeded();
+ }
+ smallPadBits(length);
+ writeComprBufferIfNeeded();
+ }
+
+ virtual void
+ readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix);
+
+ virtual void
+ writeHeader(vespalib::GenericHeader &header,
+ const vespalib::string &prefix) const;
+
+ virtual const vespalib::string &
+ getIdentifier(void) const;
+
+ virtual void
+ writeFeatures(const DocIdAndFeatures &features);
+
+ /*
+ * Set parameters.
+ */
+ virtual void
+ setParams(const PostingListParams &params);
+
+ /*
+ * Get current parameters.
+ */
+ virtual void
+ getParams(PostingListParams &params) const;
+};
+
+typedef FeatureEncodeContext<true> FeatureEncodeContextBE;
+
+typedef FeatureEncodeContext<false> FeatureEncodeContextLE;
+
+extern template class FeatureDecodeContext<true>;
+extern template class FeatureDecodeContext<false>;
+
+extern template class FeatureEncodeContext<true>;
+extern template class FeatureEncodeContext<false>;
+
+} // namespace bitcompression
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/bitcompression/countcompression.cpp b/searchlib/src/vespa/searchlib/bitcompression/countcompression.cpp
new file mode 100644
index 00000000000..9c0b7115a72
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/bitcompression/countcompression.cpp
@@ -0,0 +1,241 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include "compression.h"
+#include "countcompression.h"
+#include <vespa/searchlib/index/postinglistcounts.h>
+#include <vespa/vespalib/objects/nbostream.h>
+
+namespace search
+{
+
+namespace bitcompression
+{
+
+using vespalib::nbostream;
+
+#define K_VALUE_COUNTFILE_LASTDOCID 22
+#define K_VALUE_COUNTFILE_NUMCHUNKS 1
+#define K_VALUE_COUNTFILE_CHUNKNUMDOCS 18
+#define K_VALUE_COUNTFILE_WORDNUMDELTA 0
+#define K_VALUE_COUNTFILE_SPNUMDOCS 0
+
+
+void
+PostingListCountFileDecodeContext::checkPointWrite(nbostream &out)
+{
+ ParentClass::checkPointWrite(out);
+ out << _avgBitsPerDoc << _minChunkDocs << _docIdLimit << _numWordIds;
+ out << _minWordNum;
+}
+
+
+void
+PostingListCountFileDecodeContext::checkPointRead(nbostream &in)
+{
+ ParentClass::checkPointRead(in);
+ in >> _avgBitsPerDoc >> _minChunkDocs >> _docIdLimit >> _numWordIds;
+ in >> _minWordNum;
+}
+
+
+void
+PostingListCountFileDecodeContext::
+readCounts(PostingListCounts &counts)
+{
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, _);
+ uint32_t length;
+ uint64_t val64;
+ const uint64_t *valE = _valE;
+ uint32_t numDocs;
+
+ counts._segments.clear();
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_COUNTFILE_SPNUMDOCS,
+ EC);
+ numDocs = static_cast<uint32_t>(val64) + 1;
+ counts._numDocs = numDocs;
+ if (numDocs != 0) {
+ uint64_t expVal = numDocs * static_cast<uint64_t>(_avgBitsPerDoc);
+ uint32_t kVal = (expVal < 4) ? 1 : EC::log2(expVal);
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ kVal,
+ EC);
+ counts._bitLength = val64;
+ } else
+ counts._bitLength = 0;
+ if (__builtin_expect(oCompr >= valE, false)) {
+ UC64_DECODECONTEXT_STORE(o, _);
+ _readContext->readComprBuffer();
+ valE = _valE;
+ UC64_DECODECONTEXT_LOAD(o, _);
+ }
+ uint32_t numChunks = 0;
+ if (numDocs >= _minChunkDocs) {
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_COUNTFILE_NUMCHUNKS,
+ EC);
+ numChunks = static_cast<uint32_t>(val64);
+ if (__builtin_expect(oCompr >= valE, false)) {
+ UC64_DECODECONTEXT_STORE(o, _);
+ _readContext->readComprBuffer();
+ valE = _valE;
+ UC64_DECODECONTEXT_LOAD(o, _);
+ }
+ }
+ if (numChunks != 0) {
+ uint32_t prevLastDoc = 0u;
+ for (uint32_t chunk = 0; chunk < numChunks; ++chunk) {
+ if (__builtin_expect(oCompr >= valE, false)) {
+ UC64_DECODECONTEXT_STORE(o, _);
+ _readContext->readComprBuffer();
+ valE = _valE;
+ UC64_DECODECONTEXT_LOAD(o, _);
+ }
+ PostingListCounts::Segment seg;
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_COUNTFILE_CHUNKNUMDOCS,
+ EC);
+ seg._numDocs = static_cast<uint32_t>(val64) + 1;
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_COUNTFILE_POSOCCBITS,
+ EC);
+ seg._bitLength = val64;
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_COUNTFILE_LASTDOCID,
+ EC);
+ seg._lastDoc =
+ static_cast<uint32_t>(val64) + seg._numDocs + prevLastDoc;
+ prevLastDoc = seg._lastDoc;
+ counts._segments.push_back(seg);
+ }
+ }
+ UC64_DECODECONTEXT_STORE(o, _);
+ if (__builtin_expect(oCompr >= valE, false))
+ _readContext->readComprBuffer();
+}
+
+
+void
+PostingListCountFileDecodeContext::
+readWordNum(uint64_t &wordNum)
+{
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, _);
+ uint32_t length;
+ uint64_t val64;
+ const uint64_t *valE = _valE;
+
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_COUNTFILE_WORDNUMDELTA,
+ EC);
+ wordNum = _minWordNum + val64;
+ UC64_DECODECONTEXT_STORE(o, _);
+ if (__builtin_expect(oCompr >= valE, false))
+ _readContext->readComprBuffer();
+ _minWordNum = wordNum + 1;
+}
+
+
+void
+PostingListCountFileDecodeContext::
+copyParams(const PostingListCountFileDecodeContext &rhs)
+{
+ _avgBitsPerDoc = rhs._avgBitsPerDoc;
+ _minChunkDocs = rhs._minChunkDocs;
+ _docIdLimit = rhs._docIdLimit;
+ _numWordIds = rhs._numWordIds;
+}
+
+
+void
+PostingListCountFileEncodeContext::checkPointWrite(nbostream &out)
+{
+ ParentClass::checkPointWrite(out);
+ out << _avgBitsPerDoc << _minChunkDocs << _docIdLimit << _numWordIds;
+ out << _minWordNum;
+}
+
+
+void
+PostingListCountFileEncodeContext::checkPointRead(nbostream &in)
+{
+ ParentClass::checkPointRead(in);
+ in >> _avgBitsPerDoc >> _minChunkDocs >> _docIdLimit >> _numWordIds;
+ in >> _minWordNum;
+}
+
+
+void
+PostingListCountFileEncodeContext::
+writeCounts(const PostingListCounts &counts)
+{
+ assert(counts._segments.empty() ||
+ counts._numDocs >= _minChunkDocs);
+ uint32_t numDocs = counts._numDocs;
+ assert(numDocs > 0);
+ encodeExpGolomb(numDocs - 1, K_VALUE_COUNTFILE_SPNUMDOCS);
+ if (numDocs == 0) {
+ if (__builtin_expect(_valI >= _valE, false))
+ _writeContext->writeComprBuffer(false);
+ return;
+ }
+ uint64_t encodeVal = counts._bitLength;
+ uint64_t expVal = numDocs * static_cast<uint64_t>(_avgBitsPerDoc);
+ uint32_t kVal = (expVal < 4) ? 1 : log2(expVal);
+ encodeExpGolomb(encodeVal, kVal);
+ uint32_t numChunks = counts._segments.size();
+ if (numDocs >= _minChunkDocs)
+ encodeExpGolomb(numChunks, K_VALUE_COUNTFILE_NUMCHUNKS);
+ if (numChunks != 0) {
+ typedef std::vector<PostingListCounts::Segment>::const_iterator segit;
+
+ segit ite = counts._segments.end();
+
+ uint32_t prevLastDoc = 0u;
+ for (segit it = counts._segments.begin(); it != ite; ++it) {
+ if (__builtin_expect(_valI >= _valE, false))
+ _writeContext->writeComprBuffer(false);
+ encodeExpGolomb(it->_numDocs - 1,
+ K_VALUE_COUNTFILE_CHUNKNUMDOCS);
+ encodeExpGolomb(it->_bitLength,
+ K_VALUE_COUNTFILE_POSOCCBITS);
+ encodeExpGolomb(it->_lastDoc - prevLastDoc - it->_numDocs,
+ K_VALUE_COUNTFILE_LASTDOCID);
+ prevLastDoc = it->_lastDoc;
+ }
+ }
+ if (__builtin_expect(_valI >= _valE, false))
+ _writeContext->writeComprBuffer(false);
+}
+
+
+void
+PostingListCountFileEncodeContext::
+writeWordNum(uint64_t wordNum)
+{
+ assert(wordNum >= _minWordNum);
+ assert(wordNum <= _numWordIds);
+ encodeExpGolomb(wordNum - _minWordNum,
+ K_VALUE_COUNTFILE_WORDNUMDELTA);
+ if (__builtin_expect(_valI >= _valE, false))
+ _writeContext->writeComprBuffer(false);
+ _minWordNum = wordNum + 1;
+}
+
+
+void
+PostingListCountFileEncodeContext::
+copyParams(const PostingListCountFileEncodeContext &rhs)
+{
+ _avgBitsPerDoc = rhs._avgBitsPerDoc;
+ _minChunkDocs = rhs._minChunkDocs;
+ _docIdLimit = rhs._docIdLimit;
+ _numWordIds = rhs._numWordIds;
+}
+
+
+} // namespace bitcompression
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/bitcompression/countcompression.h b/searchlib/src/vespa/searchlib/bitcompression/countcompression.h
new file mode 100644
index 00000000000..284b441aa3b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/bitcompression/countcompression.h
@@ -0,0 +1,110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <limits>
+#include <vespa/searchlib/index/postinglistcounts.h>
+
+#define K_VALUE_COUNTFILE_POSOCCBITS 6
+
+namespace search
+{
+
+namespace bitcompression
+{
+
+class PostingListCountFileDecodeContext : public FeatureDecodeContext<true>
+{
+public:
+ typedef FeatureDecodeContext<true> ParentClass;
+ typedef index::PostingListCounts PostingListCounts;
+ uint32_t _avgBitsPerDoc; // Average number of bits per document
+ uint32_t _minChunkDocs; // Minimum number of documents for chunking
+ uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit)
+ uint64_t _numWordIds; // Number of words in dictionary
+ uint64_t _minWordNum; // Minimum word number
+
+ PostingListCountFileDecodeContext(void)
+ : ParentClass(),
+ _avgBitsPerDoc(10),
+ _minChunkDocs(262144),
+ _docIdLimit(10000000),
+ _numWordIds(0),
+ _minWordNum(0u)
+ {
+ }
+
+ virtual void
+ checkPointWrite(vespalib::nbostream &out);
+
+ virtual void
+ checkPointRead(vespalib::nbostream &in);
+
+ void
+ readCounts(PostingListCounts &counts);
+
+ void
+ readWordNum(uint64_t &wordNum);
+
+ static uint64_t
+ noWordNum(void)
+ {
+ return std::numeric_limits<uint64_t>::max();
+ }
+
+ void
+ copyParams(const PostingListCountFileDecodeContext &rhs);
+};
+
+
+class PostingListCountFileEncodeContext : public FeatureEncodeContext<true>
+{
+public:
+ typedef FeatureEncodeContext<true> ParentClass;
+ typedef index::PostingListCounts PostingListCounts;
+ uint32_t _avgBitsPerDoc; // Average number of bits per document
+ uint32_t _minChunkDocs; // Minimum number of documents for chunking
+ uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit)
+ uint64_t _numWordIds; // Number of words in dictionary
+ uint64_t _minWordNum; // Mininum word number
+
+ PostingListCountFileEncodeContext(void)
+ : ParentClass(),
+ _avgBitsPerDoc(10),
+ _minChunkDocs(262144),
+ _docIdLimit(10000000),
+ _numWordIds(0),
+ _minWordNum(0u)
+ {
+ }
+
+ virtual void
+ checkPointWrite(vespalib::nbostream &out);
+
+ virtual void
+ checkPointRead(vespalib::nbostream &in);
+
+ void
+ writeCounts(const PostingListCounts &counts);
+
+ void
+ writeWordNum(uint64_t wordNum);
+
+ static uint64_t
+ noWordNum(void)
+ {
+ return std::numeric_limits<uint64_t>::max();
+ }
+
+ void
+ copyParams(const PostingListCountFileEncodeContext &rhs);
+};
+
+
+} // namespace bitcompression
+
+} // namespace search
+
+
diff --git a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp
new file mode 100644
index 00000000000..7a9875c1316
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp
@@ -0,0 +1,2586 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "compression.h"
+#include "countcompression.h"
+#include <vespa/searchlib/index/postinglistcounts.h>
+#include <vespa/searchlib/index/dictionaryfile.h>
+#include "pagedict4.h"
+#include <map>
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <vespa/vespalib/objects/nbostream.h>
+
+LOG_SETUP(".pagedict4");
+
+namespace search
+{
+
+namespace bitcompression
+{
+
+namespace
+{
+
+void
+setDecoderPositionHelper(PostingListCountFileDecodeContext &ctx,
+ const void *buffer,
+ uint64_t offset)
+{
+ const uint64_t *p = static_cast<const uint64_t *>(buffer);
+ ctx._valI = p + offset / 64;
+ ctx.setupBits(offset & 63);
+ ctx.defineReadOffset(offset);
+}
+
+void
+setDecoderPositionInPage(PostingListCountFileDecodeContext &ctx,
+ const void *buffer,
+ uint64_t offset)
+{
+ ctx.afterRead(buffer,
+ PageDict4PageParams::getPageBitSize() / 64,
+ PageDict4PageParams::getPageBitSize() / 8,
+ false);
+ setDecoderPositionHelper(ctx, buffer, offset);
+}
+
+void
+setDecoderPosition(PostingListCountFileDecodeContext &ctx,
+ const ComprBuffer &cb,
+ uint64_t offset)
+{
+ ctx.afterRead(cb._comprBuf,
+ cb._comprBufSize,
+ cb._comprBufSize * sizeof(uint64_t),
+ false);
+ setDecoderPositionHelper(ctx, cb._comprBuf, offset);
+}
+
+
+}
+
+
+uint32_t
+PageDict4PageParams::getFileHeaderPad(uint32_t offset)
+{
+ uint32_t pad = (- offset & getPageBitSize());
+ return pad > getMaxFileHeaderPad() ? 0u : pad;
+}
+
+
+std::ostream &
+operator<<(std::ostream &stream, const index::PostingListCounts &counts)
+{
+ stream << "(d=" << counts._numDocs << ",b=" << counts._bitLength << ")";
+ return stream;
+}
+
+vespalib::nbostream &
+operator<<(vespalib::nbostream &stream,
+ const PageDict4StartOffset &startOffset)
+{
+ stream << startOffset._fileOffset << startOffset._accNumDocs;
+ return stream;
+}
+
+vespalib::nbostream &
+operator>>(vespalib::nbostream &stream, PageDict4StartOffset &startOffset)
+{
+ stream >> startOffset._fileOffset >> startOffset._accNumDocs;
+ return stream;
+}
+
+
+vespalib::nbostream &
+operator<<(vespalib::nbostream &stream,
+ const PageDict4SSReader::L7Entry &l7Entry)
+{
+ stream << l7Entry._l7Word << l7Entry._l7StartOffset << l7Entry._l7WordNum;
+ stream << l7Entry._l6Offset << l7Entry._sparsePageNum << l7Entry._pageNum;
+ stream << l7Entry._l7Ref;
+ return stream;
+}
+
+
+vespalib::nbostream &
+operator>>(vespalib::nbostream &stream,
+ PageDict4SSReader::L7Entry &l7Entry)
+{
+ stream >> l7Entry._l7Word >> l7Entry._l7StartOffset >> l7Entry._l7WordNum;
+ stream >> l7Entry._l6Offset >> l7Entry._sparsePageNum >> l7Entry._pageNum;
+ stream >> l7Entry._l7Ref;
+ return stream;
+}
+
+
+vespalib::nbostream &
+operator<<(vespalib::nbostream &stream,
+ const PageDict4SSReader::OverflowRef &oref)
+{
+ stream << oref._wordNum << oref._l7Ref;
+ return stream;
+}
+
+
+vespalib::nbostream &
+operator>>(vespalib::nbostream &stream, PageDict4SSReader::OverflowRef &oref)
+{
+ stream >> oref._wordNum >> oref._l7Ref;
+ return stream;
+}
+
+typedef index::PostingListCounts Counts;
+typedef PageDict4StartOffset StartOffset;
+
+#define K_VALUE_COUNTFILE_L1_FILEOFFSET 7
+#define K_VALUE_COUNTFILE_L2_FILEOFFSET 11
+#define K_VALUE_COUNTFILE_L3_FILEOFFSET 13
+#define K_VALUE_COUNTFILE_L4_FILEOFFSET 15
+#define K_VALUE_COUNTFILE_L5_FILEOFFSET 17
+#define K_VALUE_COUNTFILE_L6_FILEOFFSET 19
+
+#define K_VALUE_COUNTFILE_L1_WORDOFFSET 7
+#define K_VALUE_COUNTFILE_L2_WORDOFFSET 10
+#define K_VALUE_COUNTFILE_L4_WORDOFFSET 7
+#define K_VALUE_COUNTFILE_L5_WORDOFFSET 10
+
+#define K_VALUE_COUNTFILE_L1_COUNTOFFSET 8
+#define K_VALUE_COUNTFILE_L2_COUNTOFFSET 11
+#define K_VALUE_COUNTFILE_L2_L1OFFSET 8
+
+#define K_VALUE_COUNTFILE_L4_L3OFFSET 8
+#define K_VALUE_COUNTFILE_L5_L3OFFSET 11
+#define K_VALUE_COUNTFILE_L5_L4OFFSET 8
+
+#define K_VALUE_COUNTFILE_L6_PAGENUM 7
+
+#define K_VALUE_COUNTFILE_L3_WORDNUM 7
+#define K_VALUE_COUNTFILE_L4_WORDNUM 11
+#define K_VALUE_COUNTFILE_L5_WORDNUM 14
+#define K_VALUE_COUNTFILE_L6_WORDNUM 17
+
+#define K_VALUE_COUNTFILE_L1_ACCNUMDOCS 4
+#define K_VALUE_COUNTFILE_L2_ACCNUMDOCS 8
+#define K_VALUE_COUNTFILE_L3_ACCNUMDOCS 10
+#define K_VALUE_COUNTFILE_L4_ACCNUMDOCS 12
+#define K_VALUE_COUNTFILE_L5_ACCNUMDOCS 14
+#define K_VALUE_COUNTFILE_L6_ACCNUMDOCS 16
+
+static uint32_t
+getLCP(const vespalib::stringref &word,
+ const vespalib::stringref &prevWord)
+{
+ size_t len1 = word.size();
+ size_t len2 = prevWord.size();
+
+ size_t res = 0;
+ while (res < len1 &&
+ res < len2 &&
+ res < 254u &&
+ word[res] == prevWord[res])
+ ++res;
+ return res;
+}
+
+
+static void
+addLCPWord(const vespalib::stringref &word, size_t lcp, std::vector<char> &v)
+{
+ v.push_back(lcp);
+ size_t pos = lcp;
+ size_t len = word.size();
+ while (pos < len) {
+ v.push_back(word[pos]);
+ ++pos;
+ }
+ v.push_back(0);
+}
+
+
+static void
+writeStartOffset(PostingListCountFileEncodeContext &e,
+ const StartOffset &startOffset,
+ const StartOffset &prevStartOffset,
+ uint32_t fileOffsetK,
+ uint32_t accNumDocsK)
+{
+ e.encodeExpGolomb(startOffset._fileOffset -
+ prevStartOffset._fileOffset,
+ fileOffsetK);
+ e.encodeExpGolomb(startOffset._accNumDocs -
+ prevStartOffset._accNumDocs,
+ accNumDocsK);
+ e.writeComprBufferIfNeeded();
+}
+
+
+static void
+readStartOffset(PostingListCountFileDecodeContext &d,
+ StartOffset &startOffset,
+ uint32_t fileOffsetK,
+ uint32_t accNumDocsK)
+{
+ typedef PostingListCountFileEncodeContext EC;
+
+ UC64_DECODECONTEXT(o);
+ uint32_t length;
+ uint64_t val64;
+ const bool bigEndian = true;
+ UC64_DECODECONTEXT_LOAD(o, d._);
+ UC64_DECODEEXPGOLOMB_NS(o,
+ fileOffsetK,
+ EC);
+ startOffset._fileOffset += val64;
+ UC64_DECODEEXPGOLOMB_NS(o,
+ accNumDocsK,
+ EC);
+ startOffset._accNumDocs += val64;
+ UC64_DECODECONTEXT_STORE(o, d._);
+ d.readComprBufferIfNeeded();
+}
+
+
+PageDict4SSWriter::PageDict4SSWriter(SSEC &sse)
+ : _eL6(sse),
+ _l6Word(),
+ _l6StartOffset(),
+ _l6PageNum(0u),
+ _l6SparsePageNum(0u),
+ _l6WordNum(1u)
+{
+}
+
+PageDict4SSWriter::~PageDict4SSWriter(void)
+{
+}
+
+void
+PageDict4SSWriter::addL6Skip(const vespalib::stringref &word,
+ const StartOffset &startOffset,
+ uint64_t wordNum,
+ uint64_t pageNum,
+ uint32_t sparsePageNum)
+{
+#if 0
+ LOG(info,
+ "addL6SKip, \"%s\" -> wordnum %d, page (%d,%d) startOffset %" PRId64
+ ", SS bitOffset %" PRIu64,
+ word.c_str(),
+ (int) wordNum,
+ (int) pageNum,
+ (int) sparsePageNum,
+ startOffset.empty() ?
+ static_cast<int64_t>(0) :
+ startOffset[0]._fileOffset,
+ _eL6.getWriteOffset());
+#endif
+ _eL6.writeBits(0, 1); // Selector bit
+ writeStartOffset(_eL6,
+ startOffset,
+ _l6StartOffset,
+ K_VALUE_COUNTFILE_L6_FILEOFFSET,
+ K_VALUE_COUNTFILE_L6_ACCNUMDOCS);
+ _eL6.encodeExpGolomb(wordNum - _l6WordNum,
+ K_VALUE_COUNTFILE_L6_WORDNUM);
+ _eL6.writeComprBufferIfNeeded();
+ size_t lcp = getLCP(word, _l6Word);
+ vespalib::stringref wordSuffix = word.substr(lcp);
+ _eL6.smallAlign(8);
+#if 0
+ LOG(info,
+ "lcp=%d, at offset %" PRIu64 ,
+ (int) lcp,
+ _eL6.getWriteOffset());
+#endif
+ _eL6.writeBits(lcp, 8);
+ _eL6.writeComprBufferIfNeeded();
+ _eL6.writeString(wordSuffix);
+ assert(pageNum >= _l6PageNum);
+ _eL6.encodeExpGolomb(pageNum - _l6PageNum,
+ K_VALUE_COUNTFILE_L6_PAGENUM);
+ _eL6.writeComprBufferIfNeeded();
+ assert(_l6PageNum < pageNum);
+ assert(_l6SparsePageNum + 1 == sparsePageNum);
+ _l6SparsePageNum = sparsePageNum;
+ _l6PageNum = pageNum;
+ _l6StartOffset = startOffset;
+ _l6Word = word;
+ _l6WordNum = wordNum;
+#if 0
+ LOG(info, "after .. SS bit Offset %" PRId64,
+ _eL6.getWriteOffset());
+#endif
+}
+
+
+void
+PageDict4SSWriter::
+addOverflowCounts(const vespalib::stringref &word,
+ const Counts &counts,
+ const StartOffset &startOffset,
+ uint64_t wordNum)
+{
+#if 0
+ std::ostringstream txtCounts;
+ std::ostringstream txtStartOffset;
+ std::ostringstream txtL6StartOffset;
+ txtCounts << counts;
+ txtStartOffset << startOffset;
+ txtL6StartOffset << _l6StartOffset;
+ LOG(info,
+ "addL6Overflow, \"%s\" wordNum %d, counts %s fileoffset %s l6startOffset %s",
+ word.c_str(),
+ (int) wordNum,
+ txtCounts.str().c_str(),
+ txtStartOffset.str().c_str(),
+ txtL6StartOffset.str().c_str());
+#endif
+ _eL6.writeBits(1, 1); // Selector bit
+ writeStartOffset(_eL6,
+ startOffset,
+ _l6StartOffset,
+ K_VALUE_COUNTFILE_L6_FILEOFFSET,
+ K_VALUE_COUNTFILE_L6_ACCNUMDOCS);
+ _eL6.encodeExpGolomb(wordNum - _l6WordNum,
+ K_VALUE_COUNTFILE_L6_WORDNUM);
+ _eL6.writeComprBufferIfNeeded();
+ _eL6.smallAlign(8);
+ size_t lcp = getLCP(word, _l6Word);
+ vespalib::stringref wordSuffix = word.substr(lcp);
+ _eL6.writeBits(lcp, 8);
+ _eL6.writeComprBufferIfNeeded();
+ _eL6.writeString(wordSuffix);
+ _eL6.writeCounts(counts);
+ _l6StartOffset = startOffset;
+ _l6StartOffset.adjust(counts);
+ _l6Word = word;
+ _l6WordNum = wordNum;
+}
+
+
+void
+PageDict4SSWriter::flush(void)
+{
+}
+
+
+void
+PageDict4SSWriter::checkPointWrite(vespalib::nbostream &out)
+{
+ out << _l6Word;
+ out << _l6StartOffset;
+ out << _l6PageNum;
+ out << _l6SparsePageNum;
+ out << _l6WordNum;
+}
+
+
+void
+PageDict4SSWriter::checkPointRead(vespalib::nbostream &in)
+{
+ in >> _l6Word;
+ in >> _l6StartOffset;
+ in >> _l6PageNum;
+ in >> _l6SparsePageNum;
+ in >> _l6WordNum;
+}
+
+
+PageDict4SPWriter::PageDict4SPWriter(SSWriter &ssWriter,
+ EC &spe)
+ : _eL3(),
+ _wcL3(_eL3),
+ _eL4(),
+ _wcL4(_eL4),
+ _eL5(),
+ _wcL5(_eL5),
+ _l3Word(),
+ _l4Word(),
+ _l5Word(),
+ _l6Word(),
+ _l3WordOffset(0u),
+ _l4WordOffset(0u),
+ _l5WordOffset(0u),
+ _l3StartOffset(),
+ _l4StartOffset(),
+ _l5StartOffset(),
+ _l6StartOffset(),
+ _l3WordNum(1u),
+ _l4WordNum(1u),
+ _l5WordNum(1u),
+ _l6WordNum(1u),
+ _curL3OffsetL4(0u),
+ _curL3OffsetL5(0u),
+ _curL4OffsetL5(0u),
+ _headerSize(getPageHeaderBitSize()),
+ _l3Entries(0u),
+ _l4StrideCheck(0u),
+ _l5StrideCheck(0u),
+ _l3Size(0u),
+ _l4Size(0u),
+ _l5Size(0u),
+ _prevL3Size(0u),
+ _prevL4Size(0u),
+ _prevL5Size(0u),
+ _prevWordsSize(0u),
+ _sparsePageNum(0u),
+ _l3PageNum(0u),
+ _ssWriter(ssWriter),
+ _spe(spe)
+{
+}
+
+
+void
+PageDict4SPWriter::setup()
+{
+ _eL3.copyParams(_spe);
+ _eL4.copyParams(_spe);
+ _eL5.copyParams(_spe);
+ _l6Word.clear();
+ _wcL3.allocComprBuf(getPageByteSize() * 2, getPageByteSize() * 2);
+ _wcL4.allocComprBuf(getPageByteSize() * 2, getPageByteSize() * 2);
+ _wcL5.allocComprBuf(getPageByteSize() * 2, getPageByteSize() * 2);
+ _eL3.setWriteContext(&_wcL3);
+ _eL4.setWriteContext(&_wcL4);
+ _eL5.setWriteContext(&_wcL5);
+ _l3Word = _l6Word;
+ _l4Word = _l6Word;
+ _l5Word = _l6Word;
+ _l3WordOffset = 0u;
+ _l4WordOffset = 0u;
+ _l5WordOffset = 0u;
+ _l3StartOffset = _l6StartOffset;
+ // Handle extra padding after file header
+ _spe.padBits(getFileHeaderPad(_spe.getWriteOffset()));
+ resetPage();
+ _headerSize += _spe.getWriteOffset() & (getPageBitSize() - 1);
+}
+
+
+PageDict4SPWriter::~PageDict4SPWriter(void)
+{
+}
+
+
+void
+PageDict4SPWriter::flushPage(void)
+{
+ assert(_l3Entries > 0);
+ assert(_l3Size > 0);
+ assert(_headerSize >= getPageHeaderBitSize());
+ uint32_t wordsSize = _prevWordsSize;
+ assert(_prevL3Size + _prevL4Size + _prevL5Size + _headerSize +
+ wordsSize * 8 <= getPageBitSize());
+ assert(_prevL5Size < (1u << 15));
+ assert(_prevL4Size < (1u << 15));
+ assert(_prevL3Size < (1u << 15));
+ assert(_l3Entries < (1u << 15));
+ assert(wordsSize < (1u << 12));
+ assert(wordsSize <= _words.size());
+
+ uint32_t l4Residue = getL4Entries(_l3Entries);
+ uint32_t l5Residue = getL5Entries(l4Residue);
+
+ assert((l4Residue == 0) == (_prevL4Size == 0));
+ assert((l5Residue == 0) == (_prevL5Size == 0));
+ (void) l5Residue;
+
+ EC &e = _spe;
+ e.writeBits(_prevL5Size, 15);
+ e.writeBits(_prevL4Size, 15);
+ e.writeBits(_l3Entries, 15);
+ e.writeBits(wordsSize, 12);
+ e.writeComprBufferIfNeeded();
+ if (_prevL5Size > 0) {
+ _eL5.flush();
+ const uint64_t *l5Buf = static_cast<const uint64_t *>(_wcL5._comprBuf);
+ e.writeBits(l5Buf, 0, _prevL5Size);
+ }
+ if (_prevL4Size > 0) {
+ _eL4.flush();
+ const uint64_t *l4Buf = static_cast<const uint64_t *>(_wcL4._comprBuf);
+ e.writeBits(l4Buf, 0, _prevL4Size);
+ }
+ _eL3.flush();
+ const uint64_t *l3Buf = static_cast<const uint64_t *>(_wcL3._comprBuf);
+ e.writeBits(l3Buf, 0, _prevL3Size);
+ uint32_t padding = getPageBitSize() - _headerSize - _prevL5Size - _prevL4Size -
+ _prevL3Size - wordsSize * 8;
+ e.padBits(padding);
+ if (wordsSize > 0) {
+ // Pad with 7 NUL bytes to silence testing tools.
+ _words.reserve(_words.size() + 7);
+ memset(&*_words.end(), '\0', 7);
+ const char *wordsBufX = static_cast<const char *>(&_words[0]);
+ size_t wordsBufXOff = reinterpret_cast<unsigned long>(wordsBufX) & 7;
+ const uint64_t *wordsBuf = reinterpret_cast<const uint64_t *>
+ (wordsBufX - wordsBufXOff);
+ e.writeBits(wordsBuf, 8 * wordsBufXOff, wordsSize * 8);
+ }
+ assert((e.getWriteOffset() & (getPageBitSize() - 1)) == 0);
+ _l6Word = _l3Word;
+ _l6StartOffset = _l3StartOffset;
+ _l6WordNum = _l3WordNum;
+ ++_sparsePageNum;
+}
+
+
+void
+PageDict4SPWriter::flush(void)
+{
+ if (!empty()) {
+ flushPage();
+ _ssWriter.addL6Skip(_l6Word,
+ _l6StartOffset,
+ _l6WordNum,
+ _l3PageNum, getSparsePageNum());
+ }
+ _ssWriter.flush();
+}
+
+
+void
+PageDict4SPWriter::resetPage(void)
+{
+ _eL3.setupWrite(_wcL3);
+ _eL4.setupWrite(_wcL4);
+ _eL5.setupWrite(_wcL5);
+ assert(_eL3.getWriteOffset() == 0);
+ assert(_eL4.getWriteOffset() == 0);
+ assert(_eL5.getWriteOffset() == 0);
+ _l3Word = _l6Word;
+ _l4Word = _l6Word;
+ _l5Word = _l6Word;
+ _l3WordOffset = 0u;
+ _l4WordOffset = 0u;
+ _l5WordOffset = 0u;
+ _l3StartOffset = _l6StartOffset;
+ _l4StartOffset = _l6StartOffset;
+ _l5StartOffset = _l6StartOffset;
+ _l3WordNum = _l6WordNum;
+ _l4WordNum = _l6WordNum;
+ _l5WordNum = _l6WordNum;
+ _curL3OffsetL4 = 0u;
+ _curL3OffsetL5 = 0u;
+ _curL4OffsetL5 = 0u;
+ _l3Entries = 0u;
+ _l4StrideCheck = 0u;
+ _l5StrideCheck = 0u;
+ _l3Size = 0u;
+ _l4Size = 0u;
+ _l5Size = 0u;
+ _prevL3Size = 0u;
+ _prevL4Size = 0u;
+ _prevL5Size = 0u;
+ _prevWordsSize = 0u;
+ _words.clear();
+ _headerSize = getPageHeaderBitSize();
+}
+
+
+void
+PageDict4SPWriter::addL3Skip(const vespalib::stringref &word,
+ const StartOffset &startOffset,
+ uint64_t wordNum,
+ uint64_t pageNum)
+{
+#if 0
+ LOG(info,
+ "addL3Skip(\"%s\"), wordNum=%d pageNum=%d",
+ word.c_str(), (int) wordNum, (int) pageNum);
+#endif
+ assert(_l3WordOffset == _words.size());
+ /*
+ * Update notion of previous size, converting tentative writes to
+ * full writes. This is used when flushing page, since last entry
+ * on each page (possibly overflowing page) is elided, in practice
+ * promoted to an L6 entry at SS level.
+ */
+ _prevL3Size = _l3Size;
+ _prevL4Size = _l4Size;
+ _prevL5Size = _l5Size;
+ _prevWordsSize = _l3WordOffset;
+
+ /*
+ * Tentative write of counts, word and skip info. Converted to full
+ * write when new entry is tentatively added to same page.
+ */
+ writeStartOffset(_eL3,
+ startOffset,
+ _l3StartOffset,
+ K_VALUE_COUNTFILE_L3_FILEOFFSET,
+ K_VALUE_COUNTFILE_L3_ACCNUMDOCS);
+#if 0
+ LOG(info,
+ "Adding l3 delta %d", (int) (wordNum - _l3WordNum));
+#endif
+ _eL3.encodeExpGolomb(wordNum - _l3WordNum,
+ K_VALUE_COUNTFILE_L3_WORDNUM);
+ _eL3.writeComprBufferIfNeeded();
+ _l3Size = static_cast<uint32_t>(_eL3.getWriteOffset());
+ size_t lcp = getLCP(word, _l3Word);
+ _l3Word = word;
+ _l3StartOffset = startOffset;
+ _l3WordNum = wordNum;
+ ++_l3Entries;
+ ++_l4StrideCheck;
+ if (_l4StrideCheck >= getL4SkipStride())
+ addL4Skip(lcp);
+ addLCPWord(word, lcp, _words);
+ _l3WordOffset = _words.size();
+ _l3PageNum = pageNum;
+ if (_l3Size + _l4Size + _l5Size + _headerSize + 8 * _l3WordOffset >
+ getPageBitSize()) {
+ // Cannot convert tentative writes to full writes due to overflow.
+ // Flush existing full writes.
+ flushPage();
+
+ // Compensate for elided entry.
+ _l6Word = word;
+ _l6StartOffset = startOffset;
+ _l6WordNum = wordNum;
+
+ _ssWriter.addL6Skip(_l6Word,
+ _l6StartOffset,
+ _l6WordNum,
+ _l3PageNum, getSparsePageNum());
+ resetPage();
+ }
+}
+
+
+void
+PageDict4SPWriter::addL4Skip(size_t &lcp)
+{
+#if 0
+ LOG(info,
+ "addL4Skip(\"%s\")",
+ _l3Word.c_str());
+#endif
+ size_t tlcp = getLCP(_l3Word, _l4Word);
+ assert(tlcp <= lcp);
+ if (tlcp < lcp)
+ lcp = tlcp;
+ _l4StrideCheck = 0u;
+ _eL4.encodeExpGolomb(_l3WordOffset - _l4WordOffset,
+ K_VALUE_COUNTFILE_L4_WORDOFFSET);
+ _eL4.writeComprBufferIfNeeded();
+ writeStartOffset(_eL4,
+ _l3StartOffset,
+ _l4StartOffset,
+ K_VALUE_COUNTFILE_L4_FILEOFFSET,
+ K_VALUE_COUNTFILE_L4_ACCNUMDOCS);
+ _eL4.encodeExpGolomb(_l3WordNum - _l4WordNum,
+ K_VALUE_COUNTFILE_L4_WORDNUM);
+ _eL4.writeComprBufferIfNeeded();
+ _eL4.encodeExpGolomb(_l3Size - _curL3OffsetL4,
+ K_VALUE_COUNTFILE_L4_L3OFFSET);
+ _eL4.writeComprBufferIfNeeded();
+ _l4StartOffset = _l3StartOffset;
+ _l4WordNum = _l3WordNum;
+ _curL3OffsetL4 = _l3Size;
+ _l4Size = _eL4.getWriteOffset();
+ _l4Word = _l3Word;
+ ++_l5StrideCheck;
+ if (_l5StrideCheck >= getL5SkipStride()) {
+ addL5Skip(lcp);
+ _l5StrideCheck = 0;
+ }
+ _l4WordOffset = _l3WordOffset + 2 + _l3Word.size() - lcp;
+}
+
+
+void
+PageDict4SPWriter::addL5Skip(size_t &lcp)
+{
+#if 0
+ LOG(info,
+ "addL5Skip(\"%s\")",
+ _l3Word.c_str());
+#endif
+ size_t tlcp = getLCP(_l3Word, _l5Word);
+ assert(tlcp <= lcp);
+ if (tlcp < lcp)
+ lcp = tlcp;
+ _eL5.encodeExpGolomb(_l3WordOffset - _l5WordOffset,
+ K_VALUE_COUNTFILE_L5_WORDOFFSET);
+ _eL5.writeComprBufferIfNeeded();
+ writeStartOffset(_eL5,
+ _l3StartOffset,
+ _l5StartOffset,
+ K_VALUE_COUNTFILE_L5_FILEOFFSET,
+ K_VALUE_COUNTFILE_L5_ACCNUMDOCS);
+ _eL5.encodeExpGolomb(_l3WordNum - _l5WordNum,
+ K_VALUE_COUNTFILE_L5_WORDNUM);
+ _eL5.writeComprBufferIfNeeded();
+ _eL5.encodeExpGolomb(_l3Size - _curL3OffsetL5,
+ K_VALUE_COUNTFILE_L5_L3OFFSET);
+ _eL5.encodeExpGolomb(_l4Size - _curL4OffsetL5,
+ K_VALUE_COUNTFILE_L5_L4OFFSET);
+ _eL5.writeComprBufferIfNeeded();
+ _l5StartOffset = _l3StartOffset;
+ _l5WordNum = _l3WordNum;
+ _curL3OffsetL5 = _l3Size;
+ _curL4OffsetL5 = _l4Size;
+ _l5Size = _eL5.getWriteOffset();
+ _l5Word = _l3Word;
+ _l5WordOffset = _l3WordOffset + 2 + _l3Word.size() - lcp;
+}
+
+
+void
+PageDict4SPWriter::checkPointWrite(vespalib::nbostream &out)
+{
+ _wcL3.checkPointWrite(out);
+ _wcL4.checkPointWrite(out);
+ _wcL5.checkPointWrite(out);
+ out << _l3Word << _l4Word << _l5Word << _l6Word;
+ out << _l3WordOffset << _l4WordOffset << _l5WordOffset;
+ out << _l3StartOffset << _l4StartOffset << _l5StartOffset << _l6StartOffset;
+ out << _l3WordNum << _l4WordNum << _l5WordNum << _l6WordNum;
+ out << _curL3OffsetL4 << _curL3OffsetL5 << _curL4OffsetL5;
+ out << _headerSize;
+ out << _l3Entries;
+ out << _l4StrideCheck << _l5StrideCheck;
+ out << _l3Size << _l4Size << _l5Size;
+ out << _prevL3Size << _prevL4Size << _prevL5Size << _prevWordsSize;
+ out << _sparsePageNum << _l3PageNum;
+ out << _words;
+}
+
+
+void
+PageDict4SPWriter::checkPointRead(vespalib::nbostream &in)
+{
+ _wcL3.checkPointRead(in);
+ _wcL4.checkPointRead(in);
+ _wcL5.checkPointRead(in);
+ in >> _l3Word >> _l4Word >> _l5Word >> _l6Word;
+ in >> _l3WordOffset >> _l4WordOffset >> _l5WordOffset;
+ in >> _l3StartOffset >> _l4StartOffset >> _l5StartOffset >> _l6StartOffset;
+ in >> _l3WordNum >> _l4WordNum >> _l5WordNum >> _l6WordNum;
+ in >> _curL3OffsetL4 >> _curL3OffsetL5 >> _curL4OffsetL5;
+ in >> _headerSize;
+ in >> _l3Entries;
+ in >> _l4StrideCheck >> _l5StrideCheck;
+ in >> _l3Size >> _l4Size >> _l5Size;
+ in >> _prevL3Size >> _prevL4Size >> _prevL5Size >> _prevWordsSize;
+ in >> _sparsePageNum >> _l3PageNum;
+ in >> _words;
+}
+
+
+PageDict4PWriter::PageDict4PWriter(SPWriter &spWriter,
+ EC &pe)
+ : _eCounts(),
+ _wcCounts(_eCounts),
+ _eL1(),
+ _wcL1(_eL1),
+ _eL2(),
+ _wcL2(_eL2),
+ _countsWord(),
+ _l1Word(),
+ _l2Word(),
+ _l3Word(),
+ _pendingCountsWord(),
+ _countsWordOffset(0u),
+ _l1WordOffset(0u),
+ _l2WordOffset(0u),
+ _countsStartOffset(),
+ _l1StartOffset(),
+ _l2StartOffset(),
+ _l3StartOffset(),
+ _curCountOffsetL1(0u),
+ _curCountOffsetL2(0u),
+ _curL1OffsetL2(0u),
+ _headerSize(getPageHeaderBitSize()),
+ _countsEntries(0u),
+ _l1StrideCheck(0u),
+ _l2StrideCheck(0u),
+ _countsSize(0u),
+ _l1Size(0u),
+ _l2Size(0u),
+ _prevL1Size(0u),
+ _prevL2Size(0u),
+ _pageNum(0u),
+ _l3WordNum(1u),
+ _wordNum(1u),
+ _words(),
+ _spWriter(spWriter),
+ _pe(pe)
+{
+}
+
+
+void
+PageDict4PWriter::setup()
+{
+ _eCounts.copyParams(_pe);
+ _eL1.copyParams(_pe);
+ _eL2.copyParams(_pe);
+ _l3Word.clear();
+ _wcCounts.allocComprBuf(getPageByteSize() * 2, getPageByteSize() * 2);
+ _wcL1.allocComprBuf(getPageByteSize() * 2, getPageByteSize() * 2);
+ _wcL2.allocComprBuf(getPageByteSize() * 2, getPageByteSize() * 2);
+ _eCounts.setWriteContext(&_wcCounts);
+ _eL1.setWriteContext(&_wcL1);
+ _eL2.setWriteContext(&_wcL2);
+ _countsWord = _l3Word;
+ _l1Word = _l3Word;
+ _l2Word = _l3Word;
+ _pendingCountsWord.clear();
+ _countsWordOffset = 0u;
+ _l1WordOffset = 0u;
+ _l2WordOffset = 0u;
+ _countsStartOffset = _l3StartOffset;
+ // Handle extra padding after file header
+ _pe.padBits(getFileHeaderPad(_pe.getWriteOffset()));
+ resetPage();
+ _headerSize += _pe.getWriteOffset() & (getPageBitSize() - 1);
+}
+
+
+PageDict4PWriter::~PageDict4PWriter(void)
+{
+}
+
+
+void
+PageDict4PWriter::flushPage(void)
+{
+ assert(_countsEntries > 0);
+ assert(_countsSize > 0);
+ assert(_headerSize >= getPageHeaderBitSize());
+ assert(_countsSize + _l1Size + _l2Size + _headerSize +
+ 8 * _countsWordOffset <= getPageBitSize());
+ assert(_l2Size < (1u << 15));
+ assert(_l1Size < (1u << 15));
+ assert(_countsEntries < (1u << 15));
+ assert(_countsWordOffset < (1u << 12));
+
+ uint32_t l1Residue = getL1Entries(_countsEntries);
+ uint32_t l2Residue = getL2Entries(l1Residue);
+
+ assert((l1Residue == 0) == (_l1Size == 0));
+ assert((l2Residue == 0) == (_l2Size == 0));
+ (void) l2Residue;
+
+ EC &e = _pe;
+ e.writeBits(_l2Size, 15);
+ e.writeBits(_l1Size, 15);
+ e.writeBits(_countsEntries, 15);
+ e.writeBits(_countsWordOffset, 12);
+ e.writeComprBufferIfNeeded();
+ if (_l2Size > 0) {
+ _eL2.flush();
+ const uint64_t *l2Buf = static_cast<const uint64_t *>(_wcL2._comprBuf);
+ e.writeBits(l2Buf, 0, _l2Size);
+ }
+ if (_l1Size > 0) {
+ _eL1.flush();
+ const uint64_t *l1Buf = static_cast<const uint64_t *>(_wcL1._comprBuf);
+ e.writeBits(l1Buf, 0, _l1Size);
+ }
+ _eCounts.flush();
+ const uint64_t *countsBuf = static_cast<const uint64_t *>
+ (_wcCounts._comprBuf);
+ e.writeBits(countsBuf, 0, _countsSize);
+ uint32_t padding = getPageBitSize() - _headerSize - _l2Size - _l1Size -
+ _countsSize - _countsWordOffset * 8;
+ e.padBits(padding);
+ if (_countsWordOffset > 0) {
+ // Pad with 7 NUL bytes to silence testing tools.
+ _words.reserve(_words.size() + 7);
+ memset(&*_words.end(), '\0', 7);
+ const char *wordsBufX = static_cast<const char *>(&_words[0]);
+ size_t wordsBufXOff = reinterpret_cast<unsigned long>(wordsBufX) & 7;
+ const uint64_t *wordsBuf = reinterpret_cast<const uint64_t *>
+ (wordsBufX - wordsBufXOff);
+ e.writeBits(wordsBuf, 8 * wordsBufXOff, _countsWordOffset * 8);
+ }
+ assert((e.getWriteOffset() & (getPageBitSize() - 1)) == 0);
+ _l3Word = _pendingCountsWord;
+ _l3StartOffset = _countsStartOffset;
+ _l3WordNum = _wordNum;
+ ++_pageNum;
+}
+
+
+void
+PageDict4PWriter::flush(void)
+{
+ if (!empty()) {
+ flushPage();
+ _spWriter.addL3Skip(_l3Word,
+ _l3StartOffset,
+ _l3WordNum,
+ getPageNum());
+ }
+ _spWriter.flush();
+}
+
+
+void
+PageDict4PWriter::resetPage(void)
+{
+ _eCounts.setupWrite(_wcCounts);
+ _eL1.setupWrite(_wcL1);
+ _eL2.setupWrite(_wcL2);
+ assert(_eCounts.getWriteOffset() == 0);
+ assert(_eL1.getWriteOffset() == 0);
+ assert(_eL2.getWriteOffset() == 0);
+ _countsWord = _l3Word;
+ _l1Word = _l3Word;
+ _l2Word = _l3Word;
+ _pendingCountsWord.clear();
+ _countsWordOffset = 0u;
+ _l1WordOffset = 0u;
+ _l2WordOffset = 0u;
+ _countsStartOffset = _l3StartOffset;
+ _l1StartOffset = _l3StartOffset;
+ _l2StartOffset = _l3StartOffset;
+ _curCountOffsetL1 = 0u;
+ _curCountOffsetL2 = 0u;
+ _curL1OffsetL2 = 0u;
+ _countsEntries = 0u;
+ _l1StrideCheck = 0u;
+ _l2StrideCheck = 0u;
+ _countsSize = 0u;
+ _l1Size = 0u;
+ _l2Size = 0u;
+ _prevL1Size = 0u;
+ _prevL2Size = 0u;
+ _words.clear();
+ _headerSize = getPageHeaderBitSize();
+}
+
+
+void
+PageDict4PWriter::
+addCounts(const vespalib::stringref &word,
+ const Counts &counts)
+{
+#if 0
+ std::ostringstream txtcounts;
+ txtcounts << counts;
+ LOG(info,
+ "addCounts(\"%s\", %s), wordNum=%d",
+ word.c_str(),
+ txtcounts.str().c_str(),
+ (int) _wordNum);
+#endif
+ assert(_countsWordOffset == _words.size());
+ size_t lcp = getLCP(_pendingCountsWord, _countsWord);
+ if (_l1StrideCheck >= getL1SkipStride())
+ addL1Skip(lcp);
+ if (_countsEntries > 0)
+ addLCPWord(_pendingCountsWord, lcp, _words);
+ _eCounts.writeCounts(counts);
+ uint32_t eCountsOffset = static_cast<uint32_t>(_eCounts.getWriteOffset());
+ if (eCountsOffset + _l1Size + _l2Size + _headerSize +
+ 8 * (_countsWordOffset + 2 + _pendingCountsWord.size() - lcp) >
+ getPageBitSize()) {
+#if 0
+ LOG(info,
+ "Backtrack: eCountsOffset=%d, l1size=%d, l2size=%d, hdrsize=%d",
+ (int) eCountsOffset,
+ (int) _l1Size,
+ (int) _l2Size,
+ (int) _headerSize);
+#endif
+ if (_l1StrideCheck == 0u) {
+ _l1Size = _prevL1Size; // Undo L1
+ _l2Size = _prevL2Size; // Undo L2
+ }
+ if (_countsEntries > 0) {
+ flushPage();
+ _spWriter.addL3Skip(_l3Word,
+ _l3StartOffset,
+ _l3WordNum,
+ getPageNum());
+ resetPage();
+ _eCounts.writeCounts(counts);
+ eCountsOffset = static_cast<uint32_t>(_eCounts.getWriteOffset());
+ }
+ if (eCountsOffset + _headerSize > getPageBitSize()) {
+ // overflow page.
+ addOverflowCounts(word, counts);
+ _spWriter.addOverflowCounts(word, counts, _countsStartOffset,
+ _l3WordNum);
+ _spWriter.addL3Skip(_l3Word,
+ _l3StartOffset,
+ _l3WordNum,
+ getPageNum());
+ resetPage();
+#if 0
+ std::ostringstream txtoffsets;
+ txtoffsets << _countsStartOffset;
+ LOG(info, "countsStartOffsets=%s", txtoffsets.str().c_str());
+#endif
+ return;
+ }
+ }
+ _countsSize = eCountsOffset;
+ ++_countsEntries;
+ ++_l1StrideCheck;
+ _countsStartOffset.adjust(counts);
+#if 0
+ std::ostringstream txtoffsets;
+ txtoffsets << _countsStartOffset;
+ LOG(info, "countsStartOffsets=%s", txtoffsets.str().c_str());
+#endif
+ _countsWord = _pendingCountsWord;
+ _countsWordOffset = _words.size();
+ _pendingCountsWord = word;
+ _wordNum++;
+}
+
+
+/* Private use */
+void
+PageDict4PWriter::addOverflowCounts(const vespalib::stringref &word,
+ const Counts &counts)
+{
+ assert(_countsEntries == 0);
+ assert(_countsSize == 0);
+ assert(_headerSize >= getPageHeaderBitSize());
+ assert(_countsSize + _l1Size + _l2Size + _headerSize <= getPageBitSize());
+ assert(_l2Size == 0);
+ assert(_l1Size == 0);
+ assert(_countsSize == 0);
+ assert(_countsWordOffset == 0);
+
+ EC &e = _pe;
+ e.writeBits(0, 15);
+ e.writeBits(0, 15);
+ e.writeBits(0, 15);
+ e.writeBits(0, 12);
+ e.smallAlign(64);
+ e.writeComprBufferIfNeeded();
+ e.writeBits(_wordNum, 64); // Identifies overflow for later read
+#if 0
+ LOG(info,
+ "AddOverflowCounts wordnum %d", (int) _wordNum);
+#endif
+ uint32_t alignedHeaderSize = (_headerSize + 63) & -64;
+ uint32_t padding = getPageBitSize() - alignedHeaderSize - 64;
+ e.padBits(padding);
+ assert((e.getWriteOffset() & (getPageBitSize() - 1)) == 0);
+ _l3Word = word;
+ _l3StartOffset = _countsStartOffset;
+ _l3StartOffset.adjust(counts);
+ ++_pageNum;
+ ++_wordNum;
+ _l3WordNum = _wordNum;
+}
+
+
+void
+PageDict4PWriter::addL1Skip(size_t &lcp)
+{
+ _prevL1Size = _l1Size; // Prepare for undo
+ _prevL2Size = _l2Size; // Prepare for undo
+ size_t tlcp = getLCP(_pendingCountsWord, _l1Word);
+ assert(tlcp <= lcp);
+ if (tlcp < lcp)
+ lcp = tlcp;
+ _l1StrideCheck = 0u;
+#if 0
+ LOG(info,
+ "addL1SKip(\"%s\"), lcp=%d, offset=%d -> %d",
+ _pendingCountsWord.c_str(),
+ (int) lcp,
+ (int) _l1WordOffset,
+ (int) _countsWordOffset);
+#endif
+ _eL1.encodeExpGolomb(_countsWordOffset - _l1WordOffset,
+ K_VALUE_COUNTFILE_L1_WORDOFFSET);
+ _eL1.writeComprBufferIfNeeded();
+ writeStartOffset(_eL1,
+ _countsStartOffset,
+ _l1StartOffset,
+ K_VALUE_COUNTFILE_L1_FILEOFFSET,
+ K_VALUE_COUNTFILE_L1_ACCNUMDOCS);
+ _eL1.encodeExpGolomb(_countsSize - _curCountOffsetL1,
+ K_VALUE_COUNTFILE_L1_COUNTOFFSET);
+ _eL1.writeComprBufferIfNeeded();
+ _l1StartOffset = _countsStartOffset;
+ _curCountOffsetL1 = _countsSize;
+ _l1Size = _eL1.getWriteOffset();
+ ++_l2StrideCheck;
+ if (_l2StrideCheck >= getL2SkipStride())
+ addL2Skip(lcp);
+ _l1WordOffset = _countsWordOffset + 2 + _pendingCountsWord.size() - lcp;
+}
+
+
+void
+PageDict4PWriter::addL2Skip(size_t &lcp)
+{
+ size_t tlcp = getLCP(_pendingCountsWord, _l2Word);
+ assert(tlcp <= lcp);
+ if (tlcp < lcp)
+ lcp = tlcp;
+ _l2StrideCheck = 0;
+#if 0
+ LOG(info,
+ "addL2SKip(\"%s\"), lcp=%d, offset=%d -> %d",
+ _pendingCountsWord.c_str(),
+ (int) lcp,
+ (int) _l2WordOffset,
+ (int) _countsWordOffset);
+#endif
+ _eL2.encodeExpGolomb(_countsWordOffset - _l2WordOffset,
+ K_VALUE_COUNTFILE_L2_WORDOFFSET);
+ _eL2.writeComprBufferIfNeeded();
+ writeStartOffset(_eL2,
+ _countsStartOffset,
+ _l2StartOffset,
+ K_VALUE_COUNTFILE_L2_FILEOFFSET,
+ K_VALUE_COUNTFILE_L2_ACCNUMDOCS);
+ _eL2.encodeExpGolomb(_countsSize - _curCountOffsetL2,
+ K_VALUE_COUNTFILE_L2_COUNTOFFSET);
+ _eL2.encodeExpGolomb(_l1Size - _curL1OffsetL2,
+ K_VALUE_COUNTFILE_L2_L1OFFSET);
+ _eL2.writeComprBufferIfNeeded();
+ _l2StartOffset = _countsStartOffset;
+ _curCountOffsetL2 = _countsSize;
+ _curL1OffsetL2 = _l1Size;
+ _l2Size = _eL2.getWriteOffset();
+ _l2WordOffset = _countsWordOffset + 2 + _pendingCountsWord.size() - lcp;
+}
+
+
+void
+PageDict4PWriter::checkPointWrite(vespalib::nbostream &out)
+{
+ _wcCounts.checkPointWrite(out);
+ _wcL1.checkPointWrite(out);
+ _wcL2.checkPointWrite(out);
+ out << _countsWord << _l1Word << _l2Word << _l3Word;
+ out << _pendingCountsWord;
+ out << _countsWordOffset << _l1WordOffset << _l2WordOffset;
+ out << _countsStartOffset << _l1StartOffset << _l2StartOffset;
+ out << _l3StartOffset;
+ out << _curCountOffsetL1 << _curCountOffsetL2 << _curL1OffsetL2;
+ out << _headerSize;
+ out << _countsEntries;
+ out << _l1StrideCheck << _l2StrideCheck;
+ out << _countsSize << _l1Size << _l2Size;
+ out << _prevL1Size << _prevL2Size;
+ out << _pageNum;
+ out << _l3WordNum << _wordNum;
+ out << _words;
+}
+
+
+void
+PageDict4PWriter::checkPointRead(vespalib::nbostream &in)
+{
+ _wcCounts.checkPointRead(in);
+ _wcL1.checkPointRead(in);
+ _wcL2.checkPointRead(in);
+ in >> _countsWord >> _l1Word >> _l2Word >> _l3Word;
+ in >> _pendingCountsWord;
+ in >> _countsWordOffset >> _l1WordOffset >> _l2WordOffset;
+ in >> _countsStartOffset >> _l1StartOffset >> _l2StartOffset;
+ in >> _l3StartOffset;
+ in >> _curCountOffsetL1 >> _curCountOffsetL2 >> _curL1OffsetL2;
+ in >> _headerSize;
+ in >> _countsEntries;
+ in >> _l1StrideCheck >> _l2StrideCheck;
+ in >> _countsSize >> _l1Size >> _l2Size;
+ in >> _prevL1Size >> _prevL2Size;
+ in >> _pageNum;
+ in >> _l3WordNum >> _wordNum;
+ in >> _words;
+}
+
+
+PageDict4SSLookupRes::
+PageDict4SSLookupRes(void)
+ : _l6Word(),
+ _lastWord(),
+ _l6StartOffset(),
+ _counts(),
+ _pageNum(0u),
+ _sparsePageNum(0u),
+ _l6WordNum(1u),
+ _startOffset(),
+ _res(false),
+ _overflow(false)
+{
+}
+
+
+PageDict4SSLookupRes::
+~PageDict4SSLookupRes(void)
+{
+}
+
+
+PageDict4SSReader::
+PageDict4SSReader(ComprBuffer &cb,
+ uint32_t ssFileHeaderSize,
+ uint64_t ssFileBitLen,
+ uint32_t spFileHeaderSize,
+ uint64_t spFileBitLen,
+ uint32_t pFileHeaderSize,
+ uint64_t pFileBitLen)
+ : _cb(sizeof(uint64_t)),
+ _ssFileBitLen(ssFileBitLen),
+ _ssStartOffset(ssFileHeaderSize * 8),
+ _l7(),
+ _ssd(),
+ _spFileBitLen(spFileBitLen),
+ _pFileBitLen(pFileBitLen),
+ _spStartOffset(spFileHeaderSize * 8),
+ _pStartOffset(pFileHeaderSize * 8),
+ _spFirstPageNum(0u),
+ _spFirstPageOffset(0u),
+ _pFirstPageNum(0u),
+ _pFirstPageOffset(0u),
+ _overflows()
+{
+ // Reference existing compressed buffer
+ _cb._comprBuf = cb._comprBuf;
+ _cb._comprBufSize = cb._comprBufSize;
+}
+
+
+PageDict4SSReader::
+~PageDict4SSReader(void)
+{
+}
+
+
+void
+PageDict4SSReader::setup(DC &ssd)
+{
+ _ssd.copyParams(ssd);
+ // Handle extra padding after file header
+ uint32_t offset = _spStartOffset + getFileHeaderPad(_spStartOffset);
+ _spFirstPageNum = offset / getPageBitSize();
+ _spFirstPageOffset = offset & (getPageBitSize() - 1);
+ offset = _pStartOffset + getFileHeaderPad(_pStartOffset);
+ _pFirstPageNum = offset / getPageBitSize();
+ _pFirstPageOffset = offset & (getPageBitSize() - 1);
+ // setup();
+
+ DC dL6;
+
+#if 0
+ LOG(info,
+ "comprBuf=%p, comprBufSize=%d",
+ static_cast<const void *>(_cb._comprBuf),
+ (int) _cb._comprBufSize);
+#endif
+ setDecoderPosition(dL6, _cb, _ssStartOffset);
+
+ dL6.copyParams(_ssd);
+
+ _l7.clear();
+
+ vespalib::string word;
+ Counts counts;
+ StartOffset startOffset;
+ uint64_t pageNum = _pFirstPageNum;
+ uint32_t sparsePageNum = _spFirstPageNum;
+ uint32_t l7StrideCheck = 0;
+ uint32_t l7Ref = noL7Ref(); // Last L6 entry not after this L7 entry
+
+ uint32_t l6Offset = dL6.getReadOffset();
+ uint64_t l6WordNum = 1;
+ bool forceL7Entry = false;
+ bool overflow = false;
+
+ while (l6Offset < _ssFileBitLen) {
+#if 0
+ LOG(info,
+ "L6Offset=%" PRIu32 ", bitLen=%" PRIu64,
+ l6Offset,
+ _ssFileBitLen);
+#endif
+ UC64_DECODECONTEXT(o);
+ uint32_t length;
+ uint64_t val64;
+ const bool bigEndian = true;
+ UC64_DECODECONTEXT_LOAD(o, dL6._);
+ overflow = ((oVal & TOP_BIT64) != 0);
+ oVal <<= 1;
+ length = 1;
+ UC64_READBITS_NS(o, EC);
+ UC64_DECODECONTEXT_STORE(o, dL6._);
+
+ /*
+ * L7 entry for each 16th L6 entry and right before and after any
+ * overflow entry.
+ */
+ if (l7StrideCheck >= getL7SkipStride() ||
+ (l7StrideCheck > 0 && (overflow || forceL7Entry))) {
+ // Don't update l7Ref if this L7 entry points to an overflow entry
+ if (!forceL7Entry)
+ l7Ref = _l7.size(); // Self-ref if referencing L6 entry
+ _l7.push_back(L7Entry(word, startOffset, l6WordNum,
+ l6Offset, sparsePageNum, pageNum, l7Ref));
+ l7StrideCheck = 0;
+ forceL7Entry = false;
+ }
+ readStartOffset(dL6,
+ startOffset,
+ K_VALUE_COUNTFILE_L6_FILEOFFSET,
+ K_VALUE_COUNTFILE_L6_ACCNUMDOCS);
+ UC64_DECODECONTEXT_LOAD(o, dL6._);
+ UC64_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_COUNTFILE_L6_WORDNUM,
+ EC);
+#if 0
+ LOG(info,
+ "Bumping l6wordnum from %d to %d (delta %d)",
+ (int) l6WordNum, (int) (l6WordNum + val64) , (int) val64);
+#endif
+ l6WordNum += val64;
+ UC64_DECODECONTEXT_STORE(o, dL6._);
+ dL6.smallAlign(8);
+ const uint8_t *bytes = dL6.getByteCompr();
+ size_t lcp = *bytes;
+ ++bytes;
+ assert(lcp <= word.size());
+ word.resize(lcp);
+ word += reinterpret_cast<const char *>(bytes);
+ dL6.setByteCompr(bytes + word.size() + 1 - lcp);
+ if (overflow) {
+#if 0
+ LOG(info,
+ "AddOverflowRef2 wordnum %d", (int) (l6WordNum - 1));
+#endif
+ _overflows.push_back(OverflowRef(l6WordNum - 1, _l7.size()));
+ dL6.readCounts(counts);
+ startOffset.adjust(counts);
+ forceL7Entry = true; // Add new L7 entry as soon as possible
+ } else {
+ UC64_DECODECONTEXT_LOAD(o, dL6._);
+ UC64_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_COUNTFILE_L6_PAGENUM,
+ EC);
+ pageNum += val64;
+ ++sparsePageNum;
+ UC64_DECODECONTEXT_STORE(o, dL6._);
+ }
+#if 0
+ std::ostringstream txtfileoffset;
+ txtfileoffset << startOffset;
+ LOG(info,
+ "ssreader::setup "
+ "word=%s, l6offset=%d->%d, startOffsets=%s overflow=%s",
+ word.c_str(),
+ (int) l6Offset,
+ (int) dL6.getReadOffset(),
+ txtfileoffset.str().c_str(),
+ overflow ? "true" : "false");
+#endif
+ ++l7StrideCheck;
+ l6Offset = dL6.getReadOffset();
+ }
+ if (l7StrideCheck > 0) {
+ if (!forceL7Entry)
+ l7Ref = _l7.size(); // Self-ref if referencing L6 entry
+ _l7.push_back(L7Entry(word, startOffset, l6WordNum,
+ l6Offset, sparsePageNum, pageNum, l7Ref));
+ }
+ assert(l6Offset == _ssFileBitLen);
+}
+
+
+PageDict4SSLookupRes
+PageDict4SSReader::
+lookup(const vespalib::stringref &key)
+{
+ PageDict4SSLookupRes res;
+
+ DC dL6;
+
+ dL6.copyParams(_ssd);
+
+ uint32_t l7Pos = 0;
+ uint32_t l7Ref = noL7Ref();
+
+ L7Vector::const_iterator l7lb;
+ l7lb = std::lower_bound(_l7.begin(), _l7.end(), key);
+
+ l7Pos = &*l7lb - &_l7[0];
+ StartOffset startOffset;
+ uint64_t pageNum = _pFirstPageNum;
+ uint32_t sparsePageNum = _spFirstPageNum;
+ uint32_t l6Offset = _ssStartOffset;
+ uint64_t l6WordNum = 1;
+ uint64_t wordNum = l6WordNum;
+
+ vespalib::string l6Word; // Last L6 entry word
+ vespalib::string word;
+ StartOffset l6StartOffset; // Last L6 entry file offset
+
+ // Setup for decoding of L6+overflow stream
+ if (l7Pos > 0) {
+ L7Entry &l7e = _l7[l7Pos - 1];
+ l7Ref = l7e._l7Ref;
+ startOffset = l7e._l7StartOffset;
+ word = l7e._l7Word;
+ l6Offset = l7e._l6Offset;
+ wordNum = l7e._l7WordNum;
+ }
+
+ /*
+ * Setup L6 only variables, used when no overflow matches.
+ *
+ * l7Ref == l7Pos - 1, when _l7[l7Pos -1] references end of L6
+ * entry in L6+overflow stream.
+ *
+ * l7Ref != l7Pos - 1, when _l7[l7Pos -1] references end of overflow
+ * entry in L6+overflow stream, and is used for backtracking to end
+ * of previous L6 entry in L6+overflow stream.
+ */
+ if (l7Ref != noL7Ref()) {
+ L7Entry &l7e = _l7[l7Ref];
+ sparsePageNum = l7e._sparsePageNum;
+ pageNum = l7e._pageNum;
+ l6Word = l7e._l7Word;
+ l6StartOffset = l7e._l7StartOffset;
+ l6WordNum = l7e._l7WordNum;
+ }
+
+#if 0
+ LOG(info,
+ "sslookup1: l6WordNum=%d, l6Word=\"%s\", key=\"%s\", l6Offset=%d",
+ (int) l6WordNum,
+ l6Word.c_str(),
+ key.c_str(),
+ (int) l6Offset);
+#endif
+
+ setDecoderPosition(dL6, _cb, l6Offset);
+
+ Counts counts;
+
+ while (l6Offset < _ssFileBitLen) {
+ UC64_DECODECONTEXT(o);
+ uint32_t length;
+ uint64_t val64;
+ const bool bigEndian = true;
+ UC64_DECODECONTEXT_LOAD(o, dL6._);
+ bool overflow = ((oVal & TOP_BIT64) != 0);
+ oVal <<= 1;
+ length = 1;
+ UC64_READBITS_NS(o, EC);
+ UC64_DECODECONTEXT_STORE(o, dL6._);
+
+ readStartOffset(dL6,
+ startOffset,
+ K_VALUE_COUNTFILE_L6_FILEOFFSET,
+ K_VALUE_COUNTFILE_L6_ACCNUMDOCS);
+ UC64_DECODECONTEXT_LOAD(o, dL6._);
+ UC64_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_COUNTFILE_L6_WORDNUM,
+ EC);
+ wordNum += val64;
+ UC64_DECODECONTEXT_STORE(o, dL6._);
+ dL6.smallAlign(8);
+ const uint8_t *bytes = dL6.getByteCompr();
+ size_t lcp = *bytes;
+ ++bytes;
+ assert(lcp <= word.size());
+ word.resize(lcp);
+ word += reinterpret_cast<const char *>(bytes);
+ dL6.setByteCompr(bytes + word.size() + 1 - lcp);
+ if (overflow) {
+#if 0
+ LOG(info,
+ "sslookup: wordNum=%d, word=\"%s\", key=\"%s\"",
+ (int) wordNum,
+ word.c_str(),
+ key.c_str());
+#endif
+ bool l6NotLessThanKey = !(word < key);
+ if (l6NotLessThanKey) {
+ if (key == word) {
+ dL6.readCounts(counts);
+ res._overflow = true;
+ res._counts = counts;
+ res._startOffset = startOffset;
+ l6WordNum = wordNum - 1; // overloaded meaning
+ }
+ break; // key < counts
+ }
+ LOG(error, "FATAL: Missing L7 entry for overflow entry");
+ abort(); // counts < key, should not happen (missing L7 entry)
+ } else {
+ bool l6NotLessThanKey = !(word < key);
+ if (l6NotLessThanKey)
+ break; // key <= counts
+ UC64_DECODECONTEXT_LOAD(o, dL6._);
+ UC64_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_COUNTFILE_L6_PAGENUM,
+ EC);
+ pageNum += val64;
+ ++sparsePageNum;
+ UC64_DECODECONTEXT_STORE(o, dL6._);
+ l6Word = word;
+ l6StartOffset = startOffset;
+ l6WordNum = wordNum;
+ }
+ l6Offset = dL6.getReadOffset();
+ }
+ assert(l6Offset <= _ssFileBitLen);
+ res._l6Word = l6Word;
+ if (l6Offset >= _ssFileBitLen)
+ res._lastWord.clear(); // Mark that word is beyond end of dictionary
+ else
+ res._lastWord = word;
+ res._l6StartOffset = l6StartOffset;
+ res._pageNum = pageNum;
+ res._sparsePageNum = sparsePageNum;
+ res._l6WordNum = l6WordNum;
+ // Lookup succeeded if not run to end of L6 info or if overflow was found
+ // Failed lookup means we want keys larger than the highest present.
+ res._res = l6Offset < _ssFileBitLen || res._overflow;
+ return res;
+}
+
+
+PageDict4SSLookupRes
+PageDict4SSReader::
+lookupOverflow(uint64_t wordNum) const
+{
+ PageDict4SSLookupRes res;
+
+ assert(!_overflows.empty());
+
+ OverflowVector::const_iterator lb =
+ std::lower_bound(_overflows.begin(),
+ _overflows.end(),
+ wordNum);
+
+ assert(lb != _overflows.end());
+ assert(lb->_wordNum == wordNum);
+ uint32_t l7Ref = lb->_l7Ref;
+ assert(l7Ref < _l7.size());
+
+ const vespalib::string &word = _l7[l7Ref]._l7Word;
+#if 0
+ LOG(info,
+ "lookupOverflow: wordNum %d -> word %s, next l7 Pos is %d",
+ (int) wordNum,
+ word.c_str(),
+ (int) l7Ref);
+#endif
+ uint64_t l6Offset = _ssStartOffset;
+ StartOffset startOffset;
+ if (l7Ref > 0) {
+ l6Offset = _l7[l7Ref - 1]._l6Offset;
+ startOffset = _l7[l7Ref - 1]._l7StartOffset;
+ }
+
+ StartOffset l6StartOffset;
+ vespalib::string l6Word;
+
+ uint32_t l7Ref2 = _l7[l7Ref]._l7Ref;
+ if (l7Ref2 != noL7Ref()) {
+ // last L6 entry before overflow entry
+ const L7Entry &l6Ref = _l7[l7Ref2];
+ l6Word = l6Ref._l7Word;
+ l6StartOffset = l6Ref._l7StartOffset;
+ }
+
+ DC dL6;
+
+ dL6.copyParams(_ssd);
+ setDecoderPosition(dL6, _cb, l6Offset);
+
+#if 0
+ std::ostringstream txtStartOffset;
+ std::ostringstream txtL6StartOffset;
+ txtStartOffset << startOffset;
+ txtL6StartOffset << l6StartOffset;
+ LOG(info,
+ "Lookupoverflow l6Offset=%d, l6fileoffset=%s, fileoffset=%s",
+ (int) l6Offset,
+ txtL6StartOffset.str().c_str(),
+ txtStartOffset.str().c_str());
+#endif
+
+ UC64_DECODECONTEXT(o);
+ uint32_t length;
+ const bool bigEndian = true;
+ UC64_DECODECONTEXT_LOAD(o, dL6._);
+ bool overflow = ((oVal & TOP_BIT64) != 0);
+ oVal <<= 1;
+ length = 1;
+ UC64_READBITS_NS(o, EC);
+ assert(overflow);
+ (void) overflow;
+ UC64_DECODECONTEXT_STORE(o, dL6._);
+
+ readStartOffset(dL6,
+ startOffset,
+ K_VALUE_COUNTFILE_L6_FILEOFFSET,
+ K_VALUE_COUNTFILE_L6_ACCNUMDOCS);
+ UC64_DECODECONTEXT_LOAD(o, dL6._);
+ UC64_SKIPEXPGOLOMB_NS(o,
+ K_VALUE_COUNTFILE_L6_WORDNUM,
+ EC);
+ UC64_DECODECONTEXT_STORE(o, dL6._);
+
+ dL6.smallAlign(8);
+ const uint8_t *bytes = dL6.getByteCompr();
+ size_t lcp = *bytes;
+ ++bytes;
+ assert(lcp <= word.size());
+ vespalib::stringref suffix = reinterpret_cast<const char *>(bytes);
+ dL6.setByteCompr(bytes + suffix.size() + 1);
+ assert(lcp + suffix.size() == word.size());
+ assert(suffix == word.substr(lcp));
+ (void) lcp;
+ Counts counts;
+ dL6.readCounts(counts);
+#if 0
+ std::ostringstream txtCounts;
+ txtStartOffset.str("");
+ txtStartOffset << startOffset;
+ txtCounts << counts;
+ LOG(info,
+ "Lookupoverflow fileoffset=%s, counts=%s",
+ txtStartOffset.str().c_str(),
+ txtCounts.str().c_str());
+#endif
+ res._overflow = true;
+ res._counts = counts;
+ res._startOffset = startOffset;
+ res._l6StartOffset = l6StartOffset;
+ res._l6Word = l6Word;
+ res._lastWord = word;
+ res._res = true;
+ return res;
+}
+
+
+void
+PageDict4SSReader::checkPointWrite(vespalib::nbostream &out)
+{
+ out << _ssFileBitLen << _ssStartOffset;
+ out << _l7;
+ _ssd.checkPointWrite(out);
+ out << _spFileBitLen << _pFileBitLen;
+ out << _spStartOffset << _pStartOffset;
+ out << _spFirstPageNum << _spFirstPageOffset;
+ out << _pFirstPageNum << _pFirstPageOffset;
+ out << _overflows;
+}
+
+
+void
+PageDict4SSReader::checkPointRead(vespalib::nbostream &in)
+{
+ in >> _ssFileBitLen >> _ssStartOffset;
+ in >> _l7;
+ _ssd.checkPointRead(in);
+ in >> _spFileBitLen >> _pFileBitLen;
+ in >> _spStartOffset >> _pStartOffset;
+ in >> _spFirstPageNum >> _spFirstPageOffset;
+ in >> _pFirstPageNum >> _pFirstPageOffset;
+ in >> _overflows;
+}
+
+
+PageDict4SPLookupRes::
+PageDict4SPLookupRes(void)
+ : _l3Word(),
+ _lastWord(),
+ _l3StartOffset(),
+ _pageNum(0u),
+ _l3WordNum(1u)
+{
+}
+
+
+PageDict4SPLookupRes::
+~PageDict4SPLookupRes(void)
+{
+}
+
+
+void
+PageDict4SPLookupRes::
+lookup(const SSReader &ssReader,
+ const void *sparsePage,
+ const vespalib::stringref &key,
+ const vespalib::stringref &l6Word,
+ const vespalib::stringref &lastSPWord,
+ const StartOffset &l6StartOffset,
+ uint64_t l6WordNum,
+ uint64_t lowestPageNum)
+{
+// const uint64_t *p = static_cast<const uint64_t *>(sparsePage);
+
+ DC dL3; // L3 stream
+ DC dL4; // L4 stream
+ DC dL5; // L5 stream
+
+ dL3.copyParams(ssReader.getSSD());
+ dL4.copyParams(ssReader.getSSD());
+ dL5.copyParams(ssReader.getSSD());
+ uint32_t spStartOffset = 0;
+ if (l6WordNum == 1)
+ spStartOffset = ssReader._spFirstPageOffset;
+ setDecoderPositionInPage(dL5, sparsePage, spStartOffset);
+
+ uint32_t l5Size = dL5.readBits(15);
+ uint32_t l4Size = dL5.readBits(15);
+ uint32_t l3Entries = dL5.readBits(15);
+ uint32_t wordsSize = dL5.readBits(12);
+ uint32_t l3Residue = l3Entries;
+
+ assert(l3Entries > 0);
+ uint32_t l4Residue = getL4Entries(l3Entries);
+ uint32_t l5Residue = getL5Entries(l4Residue);
+
+ assert((l4Residue == 0) == (l4Size == 0));
+ assert((l5Residue == 0) == (l5Size == 0));
+
+ uint32_t l5Offset = getPageHeaderBitSize() + spStartOffset;
+ uint32_t l4Offset = l5Offset + l5Size;
+ uint32_t l3Offset = l4Offset + l4Size;
+
+ assert(l5Offset == dL5.getReadOffset());
+
+ uint32_t wordOffset = getPageByteSize() - wordsSize;
+ const char *wordBuf = static_cast<const char *>(sparsePage) + wordOffset;
+
+ _l3Word = l6Word;
+ _l3StartOffset = l6StartOffset;
+ vespalib::string word;
+ uint32_t l3WordOffset = 0;
+ uint32_t l5WordOffset = l3WordOffset;
+ uint64_t l3WordNum = l6WordNum;
+
+ while (l5Residue > 0) {
+ UC64_DECODECONTEXT(o);
+ uint32_t length;
+ uint64_t val64;
+ const bool bigEndian = true;
+ UC64_DECODECONTEXT_LOAD(o, dL5._);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_WORDOFFSET, EC);
+ l5WordOffset += val64;
+ UC64_DECODECONTEXT_STORE(o, dL5._);
+ const char *l5WordBuf = wordBuf + l5WordOffset;
+ size_t lcp = *reinterpret_cast<const unsigned char *>(l5WordBuf);
+ ++l5WordBuf;
+ assert(lcp <= _l3Word.size());
+ word = _l3Word.substr(0, lcp) + l5WordBuf;
+ bool l3NotLessThanKey = !(word < key);
+ if (l3NotLessThanKey)
+ break;
+ _l3Word = word;
+ l3WordOffset = l5WordOffset + 2 + word.size() - lcp;
+ l5WordOffset = l3WordOffset;
+ readStartOffset(dL5,
+ _l3StartOffset,
+ K_VALUE_COUNTFILE_L5_FILEOFFSET,
+ K_VALUE_COUNTFILE_L5_ACCNUMDOCS);
+ UC64_DECODECONTEXT_LOAD(o, dL5._);
+ UC64_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_COUNTFILE_L5_WORDNUM,
+ EC);
+ l3WordNum += val64;
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_L3OFFSET, EC);
+ l3Offset += val64;
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_L4OFFSET, EC);
+ l4Offset += val64;
+ UC64_DECODECONTEXT_STORE(o, dL5._);
+ --l5Residue;
+ assert(l4Residue >= getL5SkipStride());
+ l4Residue -= getL5SkipStride();
+ assert(l3Residue > getL5SkipStride() * getL4SkipStride());
+ l3Residue -= getL5SkipStride() * getL4SkipStride();
+ }
+ setDecoderPositionInPage(dL4, sparsePage, l4Offset);
+ uint32_t l4WordOffset = l3WordOffset;
+ while (l4Residue > 0) {
+ UC64_DECODECONTEXT(o);
+ uint32_t length;
+ uint64_t val64;
+ const bool bigEndian = true;
+ UC64_DECODECONTEXT_LOAD(o, dL4._);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_WORDOFFSET, EC);
+ l4WordOffset += val64;
+ UC64_DECODECONTEXT_STORE(o, dL4._);
+ const char *l4WordBuf = wordBuf + l4WordOffset;
+ size_t lcp = *reinterpret_cast<const unsigned char *>(l4WordBuf);
+ ++l4WordBuf;
+ assert(lcp <= _l3Word.size());
+ word = _l3Word.substr(0, lcp) + l4WordBuf;
+ bool l3NotLessThanKey = !(word < key);
+ if (l3NotLessThanKey)
+ break;
+ _l3Word = word;
+ l3WordOffset = l4WordOffset + 2 + word.size() - lcp;
+ l4WordOffset = l3WordOffset;
+ readStartOffset(dL4,
+ _l3StartOffset,
+ K_VALUE_COUNTFILE_L4_FILEOFFSET,
+ K_VALUE_COUNTFILE_L4_ACCNUMDOCS);
+ UC64_DECODECONTEXT_LOAD(o, dL4._);
+ UC64_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_COUNTFILE_L4_WORDNUM,
+ EC);
+ l3WordNum += val64;
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_L3OFFSET, EC);
+ l3Offset += val64;
+ UC64_DECODECONTEXT_STORE(o, dL4._);
+ --l4Residue;
+ assert(l3Residue > getL4SkipStride());
+ l3Residue -= getL4SkipStride();
+ }
+
+ setDecoderPositionInPage(dL3, sparsePage, l3Offset);
+ assert(l3Residue > 0);
+ while (l3Residue > 0) {
+ if (l3Residue > 1) {
+ const char *l3WordBuf = wordBuf + l3WordOffset;
+ size_t lcp = *reinterpret_cast<const unsigned char *>(l3WordBuf);
+ ++l3WordBuf;
+ assert(lcp <= _l3Word.size());
+ word = _l3Word.substr(0, lcp) + l3WordBuf;
+ bool l3NotLessThanKey = !(word < key);
+ if (l3NotLessThanKey)
+ break;
+ _l3Word = word;
+ l3WordOffset += 2 + word.size() - lcp;
+ } else {
+ word = lastSPWord;
+ assert(!word.empty()); // Should've stopped at SS level
+ bool l3NotLessThanKey = !(word < key);
+ if (l3NotLessThanKey)
+ break;
+ abort();
+ _l3Word = word;
+ }
+ readStartOffset(dL3,
+ _l3StartOffset,
+ K_VALUE_COUNTFILE_L3_FILEOFFSET,
+ K_VALUE_COUNTFILE_L3_ACCNUMDOCS);
+ UC64_DECODECONTEXT(o);
+ uint32_t length;
+ uint64_t val64;
+ const bool bigEndian = true;
+ UC64_DECODECONTEXT_LOAD(o, dL3._);
+ UC64_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_COUNTFILE_L3_WORDNUM,
+ EC);
+ UC64_DECODECONTEXT_STORE(o, dL3._);
+ l3WordNum += val64;
+ --l3Residue;
+ }
+ _lastWord = word;
+ _pageNum = lowestPageNum + l3Entries - l3Residue;
+ _l3WordNum = l3WordNum;
+ // Lookup succeded if not run to end of L3 info.
+ // Shoudn't have tried to look at page if word < key, i.e. lookup at this
+ // level should always succeed.
+ assert(l3Residue > 0);
+}
+
+
+PageDict4PLookupRes::
+PageDict4PLookupRes(void)
+ : _counts(),
+ _startOffset(),
+ _wordNum(1u),
+ _res(false),
+ _nextWord(NULL)
+{
+}
+
+
+PageDict4PLookupRes::
+~PageDict4PLookupRes(void)
+{
+}
+
+bool
+PageDict4PLookupRes::
+lookup(const SSReader &ssReader,
+ const void *page,
+ const vespalib::stringref &key,
+ const vespalib::stringref &l3Word,
+ const vespalib::stringref &lastPWord,
+ const StartOffset &l3StartOffset,
+ uint64_t l3WordNum)
+{
+ DC dCounts; // counts stream (sparse counts)
+ DC dL1; // L1 stream
+ DC dL2; // L2 stream
+
+ dCounts.copyParams(ssReader.getSSD());
+ dL1.copyParams(ssReader.getSSD());
+ dL2.copyParams(ssReader.getSSD());
+
+ uint32_t pStartOffset = 0;
+ if (l3WordNum == 1)
+ pStartOffset = ssReader._pFirstPageOffset;
+ setDecoderPositionInPage(dL2, page, pStartOffset);
+
+ uint32_t l2Size = dL2.readBits(15);
+ uint32_t l1Size = dL2.readBits(15);
+ uint32_t countsEntries = dL2.readBits(15);
+ uint32_t wordsSize = dL2.readBits(12);
+ uint32_t countsResidue = countsEntries;
+
+ if (countsEntries == 0) {
+ /*
+ * Tried to lookup word that is between an overflow word and
+ * the previous word in the dictionary.
+ */
+ _startOffset = l3StartOffset;
+ _wordNum = l3WordNum;
+ return false;
+ }
+
+ uint32_t l1Residue = getL1Entries(countsEntries);
+ uint32_t l2Residue = getL2Entries(l1Residue);
+
+ assert((l1Residue == 0) == (l1Size == 0));
+ assert((l2Residue == 0) == (l2Size == 0));
+
+ uint32_t l2Offset = getPageHeaderBitSize() + pStartOffset;
+ uint32_t l1Offset = l2Offset + l2Size;
+ uint32_t countsOffset = l1Offset + l1Size;
+
+ assert(l2Offset == dL2.getReadOffset());
+
+ uint32_t wordOffset = getPageByteSize() - wordsSize;
+ const char *wordBuf = static_cast<const char *>(page) + wordOffset;
+
+ vespalib::string countsWord = l3Word;
+ StartOffset countsStartOffset = l3StartOffset;
+ vespalib::string word;
+ Counts counts;
+
+ uint32_t countsWordOffset = 0;
+ uint32_t l2WordOffset = countsWordOffset;
+ uint64_t wordNum = l3WordNum;
+ while (l2Residue > 0) {
+ UC64_DECODECONTEXT(o);
+ uint32_t length;
+ uint64_t val64;
+ const bool bigEndian = true;
+ UC64_DECODECONTEXT_LOAD(o, dL2._);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_WORDOFFSET, EC);
+ l2WordOffset += val64;
+ UC64_DECODECONTEXT_STORE(o, dL2._);
+ const char *l2WordBuf = wordBuf + l2WordOffset;
+ size_t lcp = *reinterpret_cast<const unsigned char *>(l2WordBuf);
+ ++l2WordBuf;
+ assert(lcp <= countsWord.size());
+ word = countsWord.substr(0, lcp) + l2WordBuf;
+ bool countsNotLessThanKey = !(word < key);
+ if (countsNotLessThanKey)
+ break;
+ countsWord = word;
+ countsWordOffset = l2WordOffset + 2 + word.size() - lcp;
+ l2WordOffset = countsWordOffset;
+
+ readStartOffset(dL2,
+ countsStartOffset,
+ K_VALUE_COUNTFILE_L2_FILEOFFSET,
+ K_VALUE_COUNTFILE_L2_ACCNUMDOCS);
+ UC64_DECODECONTEXT_LOAD(o, dL2._);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_COUNTOFFSET, EC);
+ countsOffset += val64;
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_L1OFFSET, EC);
+ l1Offset += val64;
+ UC64_DECODECONTEXT_STORE(o, dL2._);
+ --l2Residue;
+ assert(l1Residue >= getL2SkipStride());
+ l1Residue -= getL2SkipStride();
+ assert(countsResidue > getL2SkipStride() * getL1SkipStride());
+ countsResidue -= getL2SkipStride() * getL1SkipStride();
+ wordNum += getL2SkipStride() * getL1SkipStride();
+ }
+ setDecoderPositionInPage(dL1, page, l1Offset);
+ uint32_t l1WordOffset = countsWordOffset;
+ while (l1Residue > 0) {
+ UC64_DECODECONTEXT(o);
+ uint32_t length;
+ uint64_t val64;
+ const bool bigEndian = true;
+ UC64_DECODECONTEXT_LOAD(o, dL1._);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L1_WORDOFFSET, EC);
+ l1WordOffset += val64;
+ UC64_DECODECONTEXT_STORE(o, dL1._);
+ const char *l1WordBuf = wordBuf + l1WordOffset;
+ size_t lcp = *reinterpret_cast<const unsigned char *>(l1WordBuf);
+ ++l1WordBuf;
+ assert(lcp <= countsWord.size());
+ word = countsWord.substr(0, lcp) + l1WordBuf;
+ bool countsNotLessThanKey = !(word < key);
+ if (countsNotLessThanKey)
+ break;
+ countsWord = word;
+ countsWordOffset = l1WordOffset + 2 + word.size() - lcp;
+ l1WordOffset = countsWordOffset;
+
+ readStartOffset(dL1,
+ countsStartOffset,
+ K_VALUE_COUNTFILE_L1_FILEOFFSET,
+ K_VALUE_COUNTFILE_L1_ACCNUMDOCS);
+ UC64_DECODECONTEXT_LOAD(o, dL1._);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L1_COUNTOFFSET, EC);
+ countsOffset += val64;
+ UC64_DECODECONTEXT_STORE(o, dL1._);
+ --l1Residue;
+ assert(countsResidue > getL1SkipStride());
+ countsResidue -= getL1SkipStride();
+ wordNum += getL1SkipStride();
+ }
+
+ setDecoderPositionInPage(dCounts, page, countsOffset);
+ assert(countsResidue > 0);
+ while (countsResidue > 0) {
+ dCounts.readCounts(counts);
+ if (countsResidue > 1) {
+ const char *countsWordBuf = wordBuf + countsWordOffset;
+ size_t lcp =
+ *reinterpret_cast<const unsigned char *>(countsWordBuf);
+ ++countsWordBuf;
+ assert(lcp <= countsWord.size());
+ word = countsWord.substr(0, lcp) + countsWordBuf;
+ bool countsNotLessThanKey = !(word < key);
+ if (countsNotLessThanKey)
+ break;
+ countsWordOffset += 2 + word.size() - lcp;
+ countsWord = word;
+ } else {
+ word = lastPWord;
+ assert(!word.empty()); // Should've stopped at SS level
+ bool countsNotLessThanKey = !(word < key);
+ if (countsNotLessThanKey)
+ break;
+ }
+ countsStartOffset.adjust(counts);
+ ++wordNum;
+ --countsResidue;
+ }
+ _startOffset = countsStartOffset;
+ _wordNum = wordNum;
+ // Lookup succeded if word found.
+ if (key == word) {
+ _counts = counts;
+ _res = true;
+ } else {
+ // Shouldn't have tried to look at page if word < key, and we know
+ // that key != word. Thus we can assert that key < word.
+ assert(key < word);
+ }
+ return _res;
+}
+
+PageDict4Reader::PageDict4Reader(const SSReader &ssReader,
+ DC &spd,
+ DC &pd)
+ : _pd(pd),
+ _countsResidue(0),
+ _ssReader(ssReader),
+ _pFileBitLen(ssReader._pFileBitLen),
+ _startOffset(),
+ _overflowPage(false),
+ _counts(),
+ _cc(),
+ _ce(),
+ _words(),
+ _wc(),
+ _we(),
+ _lastWord(),
+ _lastSSWord(),
+ _spd(spd),
+ _l3Residue(0u),
+ _spwords(),
+ _spwc(),
+ _spwe(),
+ _ssd(),
+ _wordNum(1u)
+{
+}
+
+
+void
+PageDict4Reader::setup()
+{
+ _ssd.copyParams(_ssReader.getSSD());
+ _spd.copyParams(_ssReader.getSSD());
+ _pd.copyParams(_ssReader.getSSD());
+ assert(_pd.getReadOffset() == _ssReader._pStartOffset);
+ assert(_spd.getReadOffset() == _ssReader._spStartOffset);
+ // Handle extra padding after file header
+ _pd.skipBits(getFileHeaderPad(_ssReader._pStartOffset));
+ _spd.skipBits(getFileHeaderPad(_ssReader._spStartOffset));
+ assert(_pFileBitLen >= _pd.getReadOffset());
+ if (_pFileBitLen > _pd.getReadOffset()) {
+ setupPage();
+ setupSPage();
+ }
+
+ const ComprBuffer &sscb = _ssReader._cb;
+ uint32_t ssStartOffset = _ssReader._ssStartOffset;
+ setDecoderPosition(_ssd, sscb, ssStartOffset);
+}
+
+
+PageDict4Reader::~PageDict4Reader(void)
+{
+}
+
+
+void
+PageDict4Reader::setupPage(void)
+{
+#if 0
+ LOG(info,
+ "setupPage(%ld), "
+ (long int) _pd.getReadOffset());
+#endif
+ uint32_t l2Size = _pd.readBits(15);
+ uint32_t l1Size = _pd.readBits(15);
+ uint32_t countsEntries = _pd.readBits(15);
+ uint32_t wordsSize = _pd.readBits(12);
+ _countsResidue = countsEntries;
+
+#if 0
+ _pd.skipBits(l2Size + l1Size);
+ Counts counts;
+#else
+ if (countsEntries == 0 && l1Size == 0 && l2Size == 0) {
+ _pd.smallAlign(64);
+ _overflowPage = true;
+ return;
+ }
+ _overflowPage = false;
+ assert(countsEntries > 0);
+ uint32_t l1Residue = getL1Entries(countsEntries);
+ uint32_t l2Residue = getL2Entries(l1Residue);
+
+ uint64_t beforePos = _pd.getReadOffset();
+ Counts counts;
+ StartOffset startOffset;
+ while (l2Residue > 0) {
+ UC64_DECODECONTEXT(o);
+ uint32_t length;
+ const bool bigEndian = true;
+ UC64_DECODECONTEXT_LOAD(o, _pd._);
+ UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_WORDOFFSET, EC);
+ UC64_DECODECONTEXT_STORE(o, _pd._);
+ readStartOffset(_pd,
+ startOffset,
+ K_VALUE_COUNTFILE_L2_FILEOFFSET,
+ K_VALUE_COUNTFILE_L2_ACCNUMDOCS);
+ UC64_DECODECONTEXT_LOAD(o, _pd._);
+ UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_COUNTOFFSET, EC);
+ UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_L1OFFSET, EC);
+ UC64_DECODECONTEXT_STORE(o, _pd._);
+ --l2Residue;
+ }
+ assert(_pd.getReadOffset() == beforePos + l2Size);
+ while (l1Residue > 0) {
+ UC64_DECODECONTEXT(o);
+ uint32_t length;
+ const bool bigEndian = true;
+ UC64_DECODECONTEXT_LOAD(o, _pd._);
+ UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L1_WORDOFFSET, EC);
+ UC64_DECODECONTEXT_STORE(o, _pd._);
+ readStartOffset(_pd,
+ startOffset,
+ K_VALUE_COUNTFILE_L1_FILEOFFSET,
+ K_VALUE_COUNTFILE_L1_ACCNUMDOCS);
+ UC64_DECODECONTEXT_LOAD(o, _pd._);
+ UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L1_COUNTOFFSET, EC);
+ UC64_DECODECONTEXT_STORE(o, _pd._);
+ --l1Residue;
+ }
+ assert(_pd.getReadOffset() == beforePos + l2Size + l1Size);
+ (void) beforePos;
+#endif
+ _counts.clear();
+ while (countsEntries > 0) {
+ _pd.readCounts(counts);
+ _counts.push_back(counts);
+ --countsEntries;
+ }
+ _cc = _counts.begin();
+ _ce = _counts.end();
+ uint32_t pageOffset = _pd.getReadOffset() & (getPageBitSize() - 1);
+ uint32_t padding = getPageBitSize() - wordsSize * 8 - pageOffset;
+ _pd.skipBits(padding);
+ _words.resize(wordsSize);
+ _pd.readBytes(reinterpret_cast<uint8_t *>(&_words[0]), wordsSize);
+ _wc = _words.begin();
+ _we = _words.end();
+}
+
+
+void
+PageDict4Reader::setupSPage(void)
+{
+#if 0
+ LOG(info, "setupSPage(%d),", (int) _spd.getReadOffset());
+#endif
+ uint32_t l5Size = _spd.readBits(15);
+ uint32_t l4Size = _spd.readBits(15);
+ uint32_t l3Entries = _spd.readBits(15);
+ uint32_t wordsSize = _spd.readBits(12);
+ _l3Residue = l3Entries;
+
+#if 0
+ _spd.skipBits(l5Size + l4Size);
+#else
+
+ assert(l3Entries > 0);
+ uint32_t l4Residue = getL4Entries(l3Entries);
+ uint32_t l5Residue = getL5Entries(l4Residue);
+
+ uint64_t beforePos = _spd.getReadOffset();
+ StartOffset startOffset;
+ while (l5Residue > 0) {
+ UC64_DECODECONTEXT(o);
+ uint32_t length;
+ const bool bigEndian = true;
+ UC64_DECODECONTEXT_LOAD(o, _spd._);
+ UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_WORDOFFSET, EC);
+ UC64_DECODECONTEXT_STORE(o, _spd._);
+ readStartOffset(_spd,
+ startOffset,
+ K_VALUE_COUNTFILE_L5_FILEOFFSET,
+ K_VALUE_COUNTFILE_L5_ACCNUMDOCS);
+ UC64_DECODECONTEXT_LOAD(o, _spd._);
+ UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_WORDNUM, EC);
+ UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_L3OFFSET, EC);
+ UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_L4OFFSET, EC);
+ UC64_DECODECONTEXT_STORE(o, _spd._);
+ --l5Residue;
+ }
+ assert(_spd.getReadOffset() == beforePos + l5Size);
+ while (l4Residue > 0) {
+ UC64_DECODECONTEXT(o);
+ uint32_t length;
+ const bool bigEndian = true;
+ UC64_DECODECONTEXT_LOAD(o, _spd._);
+ UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_WORDOFFSET, EC);
+ UC64_DECODECONTEXT_STORE(o, _spd._);
+ readStartOffset(_spd,
+ startOffset,
+ K_VALUE_COUNTFILE_L4_FILEOFFSET,
+ K_VALUE_COUNTFILE_L4_ACCNUMDOCS);
+ UC64_DECODECONTEXT_LOAD(o, _spd._);
+ UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_WORDNUM, EC);
+ UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_L3OFFSET, EC);
+ UC64_DECODECONTEXT_STORE(o, _spd._);
+ --l4Residue;
+ }
+ assert(_spd.getReadOffset() == beforePos + l5Size + l4Size);
+ (void) l4Size;
+ (void) l5Size;
+ (void) beforePos;
+#endif
+ while (l3Entries > 1) {
+ readStartOffset(_spd,
+ startOffset,
+ K_VALUE_COUNTFILE_L3_FILEOFFSET,
+ K_VALUE_COUNTFILE_L3_ACCNUMDOCS);
+ UC64_DECODECONTEXT(o);
+ uint32_t length;
+ const bool bigEndian = true;
+ UC64_DECODECONTEXT_LOAD(o, _spd._);
+ UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L3_WORDNUM, EC);
+ UC64_DECODECONTEXT_STORE(o, _spd._);
+ --l3Entries;
+ }
+ uint32_t pageOffset = _spd.getReadOffset() & (getPageBitSize() - 1);
+ uint32_t padding = getPageBitSize() - wordsSize * 8 - pageOffset;
+ _spd.skipBits(padding);
+ _spwords.resize(wordsSize);
+ _spd.readBytes(reinterpret_cast<uint8_t *>(&_spwords[0]), wordsSize);
+ _spwc = _spwords.begin();
+ _spwe = _spwords.end();
+}
+
+
+void
+PageDict4Reader::decodePWord(vespalib::string &word)
+{
+ assert(_wc != _we);
+ size_t lcp = static_cast<unsigned char>(*_wc);
+ ++_wc;
+ assert(lcp <= _lastWord.size());
+ assert(_wc != _we);
+ word = _lastWord.substr(0, lcp);
+ while (*_wc != 0) {
+ word += *_wc;
+ assert(_wc != _we);
+ ++_wc;
+ }
+ assert(_wc != _we);
+ ++_wc;
+}
+
+
+void
+PageDict4Reader::decodeSPWord(vespalib::string &word)
+{
+ assert(_spwc != _spwe);
+ size_t lcp = static_cast<unsigned char>(*_spwc);
+ ++_spwc;
+ assert(lcp <= _lastWord.size());
+ assert(_spwc != _spwe);
+ word = _lastWord.substr(0, lcp);
+ while (*_spwc != 0) {
+ word += *_spwc;
+ assert(_spwc != _spwe);
+ ++_spwc;
+ }
+ assert(_spwc != _spwe);
+ ++_spwc;
+}
+
+
+void
+PageDict4Reader::decodeSSWord(vespalib::string &word)
+{
+ uint32_t l6Offset = _ssd.getReadOffset();
+
+ while (l6Offset < _ssReader._ssFileBitLen) {
+ UC64_DECODECONTEXT(o);
+ uint32_t length;
+ const bool bigEndian = true;
+ UC64_DECODECONTEXT_LOAD(o, _ssd._);
+ bool overflow = ((oVal & TOP_BIT64) != 0);
+ oVal <<= 1;
+ length = 1;
+ UC64_READBITS_NS(o, EC);
+ UC64_DECODECONTEXT_STORE(o, _ssd._);
+
+ StartOffset startOffset;
+ readStartOffset(_ssd,
+ startOffset,
+ K_VALUE_COUNTFILE_L6_FILEOFFSET,
+ K_VALUE_COUNTFILE_L6_ACCNUMDOCS);
+ UC64_DECODECONTEXT_LOAD(o, _ssd._);
+ UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L6_WORDNUM, EC);
+ UC64_DECODECONTEXT_STORE(o, _ssd._);
+
+ _ssd.smallAlign(8);
+ const uint8_t *bytes = _ssd.getByteCompr();
+ size_t lcp = *bytes;
+ ++bytes;
+ assert(lcp <= _lastSSWord.size());
+ word = _lastSSWord.substr(0, lcp);
+ word += reinterpret_cast<const char *>(bytes);
+ _ssd.setByteCompr(bytes + word.size() + 1 - lcp);
+ _lastSSWord = word;
+#if 0
+ LOG(info,
+ "word is %s LCP %d, overflow=%s",
+ word.c_str(),
+ (int) lcp,
+ overflow ? "true" : "false");
+#endif
+ if (overflow) {
+ Counts counts;
+ _ssd.readCounts(counts);
+ } else {
+ UC64_DECODECONTEXT_LOAD(o, _ssd._);
+ UC64_SKIPEXPGOLOMB_NS(o,
+ K_VALUE_COUNTFILE_L6_PAGENUM,
+ EC);
+ UC64_DECODECONTEXT_STORE(o, _ssd._);
+ break;
+ }
+ l6Offset = _ssd.getReadOffset();
+ }
+}
+
+void
+PageDict4Reader::readCounts(vespalib::string &word,
+ uint64_t &wordNum,
+ Counts &counts)
+{
+ if (_countsResidue > 0) {
+ assert(_cc != _ce);
+ counts = *_cc;
+ ++_cc;
+ if (_countsResidue > 1) {
+ assert(_cc != _ce);
+ } else {
+ assert(_cc == _ce);
+ }
+ _startOffset.adjust(counts);
+ if (_countsResidue > 1) {
+ decodePWord(word);
+ _lastWord = word;
+ if (_countsResidue == 2) {
+ assert(_wc == _we);
+ } else {
+ assert(_wc != _we);
+ }
+ } else {
+ assert(_l3Residue > 0);
+ if (_l3Residue > 1)
+ decodeSPWord(word);
+ else
+ decodeSSWord(word);
+ _lastWord = word;
+ --_l3Residue;
+ }
+ --_countsResidue;
+ if (_countsResidue == 0) {
+ assert((_pd.getReadOffset() & (getPageBitSize() - 1)) == 0);
+ if (_pd.getReadOffset() < _pFileBitLen) {
+ setupPage();
+ if (_l3Residue == 0)
+ setupSPage();
+ } else {
+ assert(_pd.getReadOffset() == _pFileBitLen);
+ }
+ }
+ wordNum = _wordNum++;
+ } else if (_overflowPage) {
+ readOverflowCounts(word, counts);
+ _overflowPage = false;
+ assert(_l3Residue > 0);
+ vespalib::string tword;
+ if (_l3Residue > 1)
+ decodeSPWord(tword);
+ else
+ decodeSSWord(tword);
+ assert(tword == word);
+ --_l3Residue;
+ _lastWord = word;
+ _pd.align(getPageBitSize());
+ if (_pd.getReadOffset() < _pFileBitLen) {
+ setupPage();
+ if (_l3Residue == 0)
+ setupSPage();
+ } else {
+ assert(_pd.getReadOffset() == _pFileBitLen);
+ }
+ wordNum = _wordNum++;
+ } else {
+ // Mark end of file.
+ word.clear();
+ counts.clear();
+ wordNum = search::index::DictionaryFileSeqRead::noWordNumHigh();
+ }
+}
+
+
+void
+PageDict4Reader::readOverflowCounts(vespalib::string &word,
+ Counts &counts)
+{
+ uint64_t wordNum = _pd.readBits(64);
+
+ PageDict4SSLookupRes wtsslr;
+ wtsslr = _ssReader.lookupOverflow(wordNum);
+ assert(wtsslr._overflow);
+ assert(wtsslr._res);
+
+ word = wtsslr._lastWord;
+ counts = wtsslr._counts;
+
+#if 0
+ std::ostringstream txtCounts;
+ std::ostringstream txtStartOffset;
+ std::ostringstream txtLRStartOffset;
+
+ txtCounts << counts;
+ txtStartOffset << _startOffset;
+ txtLRStartOffset << wtsslr._startOffset;
+ LOG(info,
+ "readOverflowCounts _wordNum=%" PRIu64
+ ", counts=%s, startOffset=%s (should be %s)",
+ _wordNum,
+ txtCounts.str().c_str(),
+ txtLRStartOffset.str().c_str(),
+ txtStartOffset.str().c_str());
+#endif
+
+ assert(wtsslr._startOffset == _startOffset);
+ _startOffset.adjust(counts);
+}
+
+void
+PageDict4Reader::checkPointWrite(vespalib::nbostream &out)
+{
+ out << _countsResidue;
+ out << _overflowPage;
+ out << _counts;
+ size_t ccOff = _cc - _counts.begin();
+ size_t ceOff = _ce - _counts.begin();
+ assert(ceOff == _counts.size());
+ out << ccOff << ceOff;
+ out << _words;
+ size_t wcOff = _wc - _words.begin();
+ size_t weOff = _we - _words.begin();
+ assert(weOff = _words.size());
+ out << wcOff << weOff;
+ out << _lastWord;
+ out << _lastSSWord;
+ out << _l3Residue;
+ out << _spwords;
+ size_t spwcOff = _spwc - _spwords.begin();
+ size_t spweOff = _spwe - _spwords.begin();
+ assert(spweOff == _spwords.size());
+ out << spwcOff << spweOff;
+ _ssd.checkPointWrite(out);
+ out << _ssd.getReadOffset();
+ out << _wordNum;
+}
+
+void
+PageDict4Reader::checkPointRead(vespalib::nbostream &in)
+{
+ in >> _countsResidue;
+ in >> _overflowPage;
+ in >> _counts;
+ size_t ccOff;
+ size_t ceOff;
+ in >> ccOff >> ceOff;
+ _cc = _counts.begin() + ccOff;
+ _ce = _counts.begin() + ceOff;
+ in >> _words;
+ size_t wcOff;
+ size_t weOff;
+ in >> wcOff >> weOff;
+ _wc = _words.begin() + wcOff;
+ _we = _words.begin() + weOff;
+ in >> _lastWord;
+ in >> _lastSSWord;
+ in >> _l3Residue;
+ in >> _spwords;
+ size_t spwcOff;
+ size_t spweOff;
+ in >> spwcOff >> spweOff;
+ _spwc = _spwords.begin() + spwcOff;
+ _spwe = _spwords.begin() + spweOff;
+ _ssd.checkPointRead(in);
+ int64_t ssReadOffset;
+ in >> ssReadOffset;
+ const ComprBuffer &sscb = _ssReader._cb;
+ setDecoderPosition(_ssd, sscb, ssReadOffset);
+ in >> _wordNum;
+}
+
+} // namespace bitcompression
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.h b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.h
new file mode 100644
index 00000000000..895a15aa9f3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.h
@@ -0,0 +1,836 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <limits>
+#include <vespa/vespalib/stllike/string.h>
+#include "countcompression.h"
+
+namespace search
+{
+
+namespace bitcompression
+{
+
+
+class PageDict4StartOffset
+{
+public:
+ uint64_t _fileOffset;
+ uint64_t _accNumDocs;
+
+ PageDict4StartOffset(void)
+ : _fileOffset(0u),
+ _accNumDocs(0u)
+ {
+ }
+
+ PageDict4StartOffset(uint64_t fileOffset, uint64_t accNumDocs)
+ : _fileOffset(fileOffset),
+ _accNumDocs(accNumDocs)
+ {
+ }
+
+ bool
+ operator>=(const PageDict4StartOffset &rhs) const
+ {
+ if (_fileOffset >= rhs._fileOffset) {
+ assert(_accNumDocs >= rhs._accNumDocs);
+ return true;
+ }
+ assert(_accNumDocs < rhs._accNumDocs);
+ return false;
+ }
+
+ bool
+ operator>(const PageDict4StartOffset &rhs) const
+ {
+ if (_fileOffset > rhs._fileOffset) {
+ assert(_accNumDocs > rhs._accNumDocs);
+ return true;
+ }
+ assert(_accNumDocs <= rhs._accNumDocs);
+ return false;
+ }
+
+ bool
+ operator==(const PageDict4StartOffset &rhs) const
+ {
+ if (_fileOffset == rhs._fileOffset) {
+ assert(_accNumDocs == rhs._accNumDocs);
+ return true;
+ }
+ assert(_accNumDocs != rhs._accNumDocs);
+ if (_fileOffset < rhs._fileOffset) {
+ assert(_accNumDocs < rhs._accNumDocs);
+ } else {
+ assert(_accNumDocs > rhs._accNumDocs);
+ }
+ return false;
+ }
+
+ void
+ adjust(const index::PostingListCounts &counts)
+ {
+ _fileOffset += counts._bitLength;
+ _accNumDocs += counts._numDocs;
+ }
+};
+
+std::ostream &
+operator<<(std::ostream &stream, const index::PostingListCounts &counts);
+
+class PageDict4PageParams
+{
+public:
+ using Counts = index::PostingListCounts;
+ typedef PageDict4StartOffset StartOffset;
+
+ static uint32_t
+ getPageByteSize(void)
+ {
+ return 4096;
+ }
+
+ static uint32_t
+ getPageBitSize(void)
+ {
+ return getPageByteSize() * 8;
+ }
+
+ static uint32_t
+ getPageHeaderBitSize(void)
+ {
+ return 15u + 15u + 15u + 12u;
+ }
+
+ static uint32_t
+ getMaxFileHeaderPad(void)
+ {
+ return 999u;
+ }
+
+ static uint32_t
+ getFileHeaderPad(uint32_t offset);
+
+ static uint32_t
+ getL1SkipStride(void)
+ {
+ return 16;
+ }
+
+ static uint32_t
+ getL2SkipStride(void)
+ {
+ return 8;
+ }
+
+ static uint32_t
+ getL4SkipStride(void)
+ {
+ return 16;
+ }
+
+ static uint32_t
+ getL5SkipStride(void)
+ {
+ return 8;
+ }
+
+ static uint32_t
+ getL7SkipStride(void)
+ {
+ return 8;
+ }
+
+ static uint32_t
+ noL7Ref(void)
+ {
+ return std::numeric_limits<uint32_t>::max();
+ }
+
+ static uint32_t
+ getL1Entries(uint32_t countsEntries)
+ {
+ return (countsEntries - 1) / getL1SkipStride();
+ }
+
+ static uint32_t
+ getL2Entries(uint32_t l1Entries)
+ {
+ return l1Entries / getL2SkipStride();
+ }
+
+ static uint32_t
+ getL4Entries(uint32_t l3Entries)
+ {
+ return (l3Entries - 1) / getL4SkipStride();
+ }
+
+ static uint32_t
+ getL5Entries(uint32_t l4Entries)
+ {
+ return l4Entries / getL5SkipStride();
+ }
+};
+/*
+ * Sparse sparse layout for random access word counts:
+ *
+ * selector bit
+ * 0 => L6 entry, with word, data file deltas
+ * 1 => overflow entry, with word, data file deltas, sparse counts
+ *
+ * Read from file to memory (compressed mix of L6 entries and overflow entries)
+ *
+ * Uncompressed L7 array in memory, usable for binary search.
+ *
+ * File header should contain number of entries
+ */
+
+class PageDict4SSWriter : public PageDict4PageParams
+{
+ typedef PostingListCountFileEncodeContext EC;
+ typedef EC SSEC;
+
+private:
+ EC &_eL6; // L6 stream
+ vespalib::string _l6Word; // L6 word
+ StartOffset _l6StartOffset; // file offsets + accnum
+ uint64_t _l6PageNum; // Page number for last L6 entry
+ uint32_t _l6SparsePageNum; // Sparse page number for last L6 entry
+ uint64_t _l6WordNum;
+
+public:
+ PageDict4SSWriter(SSEC &sse);
+
+ ~PageDict4SSWriter(void);
+
+ /*
+ * Add L6 skip entry.
+ *
+ * startOffset represents file position / accNumDocs after word.
+ */
+ void
+ addL6Skip(const vespalib::stringref &word,
+ const StartOffset &startOffset,
+ uint64_t wordNum,
+ uint64_t pageNum,
+ uint32_t sparsePageNum);
+
+ /*
+ * Add overflow counts entry.
+ *
+ * startOffset represents file position / accNumDocs at start of entry.
+ */
+ void
+ addOverflowCounts(const vespalib::stringref &word,
+ const Counts &counts,
+ const StartOffset &startOffset,
+ uint64_t wordNum);
+
+ void
+ flush(void);
+
+
+ void
+ checkPointWrite(vespalib::nbostream &out);
+
+ void
+ checkPointRead(vespalib::nbostream &in);
+};
+
+
+/*
+ * Sparse page layout for random access word counts:
+ *
+ * 15 bits L5 size
+ * 15 bits L4 size
+ * 15 bits number of L3 entries in page
+ * this can be used to derive number of L4 and L5 entries, using
+ * skip stride info.
+ * 12 bits word string size
+ * L5 data (word ref delta, offset to L4 and L3 data, data file delta)
+ * L4 data (word ref delta, offset to L3 data, data file delta)
+ * L3 data (word ref delta, offset to full page file is implicit, data file delta)
+ * padding
+ * word strings (LCP + suffix + NUL)
+ *
+ * File header should be defined
+ */
+
+class PageDict4SPWriter : public PageDict4PageParams
+{
+ typedef PostingListCountFileEncodeContext EC;
+ typedef PageDict4SSWriter SSWriter;
+
+private:
+ EC _eL3; // L3 stream
+ ComprFileWriteContext _wcL3;// L3 buffer
+ EC _eL4; // L4 stream
+ ComprFileWriteContext _wcL4;// L4 buffer
+ EC _eL5; // L5 stream
+ ComprFileWriteContext _wcL5;// L5 buffer
+ vespalib::string _l3Word; // last L3 word written
+ vespalib::string _l4Word; // last L4 word written
+ vespalib::string _l5Word; // last L5 word written
+ vespalib::string _l6Word; // word before this sparse page
+ uint32_t _l3WordOffset; // Offset for next L3 word to write
+ uint32_t _l4WordOffset; // Offset for last L4 word written
+ uint32_t _l5WordOffset; // Offset for last L5 word written
+
+ // file offsets
+ StartOffset _l3StartOffset;
+
+ // Offsets in data files for last L4 entry
+ StartOffset _l4StartOffset;
+
+ // Offsets in data files for last L5 entry
+ StartOffset _l5StartOffset;
+
+ // Offsets in data files for last L6 entry
+ StartOffset _l6StartOffset;
+
+ uint64_t _l3WordNum; // word number next L3 entry to write
+ uint64_t _l4WordNum; // word number last L4 entry
+ uint64_t _l5WordNum; // word number last L5 entry
+ uint64_t _l6WordNum; // word number last L6 entry
+
+ uint32_t _curL3OffsetL4; // Offset in L3 for last L4 entry
+ uint32_t _curL3OffsetL5; // Offset in L3 for last L5 entry
+ uint32_t _curL4OffsetL5; // Offset in L4 for last L5 entry
+
+ uint32_t _headerSize; // Size of page header
+
+ uint32_t _l3Entries; // Number of L3 entries on page
+ uint32_t _l4StrideCheck; // L3 entries since last L4 entry
+ uint32_t _l5StrideCheck; // L4 entries since last L5 entry
+
+ uint32_t _l3Size; // Size of L3 entries
+ uint32_t _l4Size; // Size of L4 entries
+ uint32_t _l5Size; // Size of L5 entries
+ uint32_t _prevL3Size; // Previous size of L3 entries
+ uint32_t _prevL4Size; // Previous size of L4 entries
+ uint32_t _prevL5Size; // Previous size of L5 entries
+ uint32_t _prevWordsSize; // previous size of words
+ uint32_t _sparsePageNum;
+ uint32_t _l3PageNum; // Page number for last L3 entry
+ std::vector<char> _words; // Word buffer
+
+ // Sparse sparse entries and counts that don't fit in a page
+ SSWriter &_ssWriter;
+ // Encode context where paged sparse counts go
+ EC &_spe;
+
+public:
+ PageDict4SPWriter(SSWriter &sparseSparsewriter,
+ EC &spe);
+
+ ~PageDict4SPWriter(void);
+
+ void
+ setup();
+
+ void
+ flushPage(void);
+
+ void
+ flush(void);
+
+ void
+ resetPage(void);
+
+ void
+ addL3Skip(const vespalib::stringref &word,
+ const StartOffset &startOffset,
+ uint64_t wordNum,
+ uint64_t pageNum);
+
+
+ void
+ addL4Skip(size_t &lcp);
+
+ void
+ addL5Skip(size_t &lcp);
+
+ bool
+ empty(void) const
+ {
+ return _l3Entries == 0;
+ }
+
+ uint32_t
+ getSparsePageNum(void) const
+ {
+ return _sparsePageNum;
+ }
+
+ /*
+ * Add overflow counts entry.
+ *
+ * startOffset represents file position / accNumDocs at start of entry.
+ */
+ void
+ addOverflowCounts(const vespalib::stringref &word,
+ const Counts &counts,
+ const StartOffset &startOffset,
+ uint64_t wordNum)
+ {
+ _ssWriter.addOverflowCounts(word, counts, startOffset, wordNum);
+ }
+
+ void
+ checkPointWrite(vespalib::nbostream &out);
+
+ void
+ checkPointRead(vespalib::nbostream &in);
+};
+
+/*
+ * Page layout for random access word counts:
+ *
+ * 15 bits L2 size
+ * 15 bits L1 size
+ * 15 bits number of words in page
+ * this can be used to derive number of L1 and L2 entries, using
+ * skip stride info.
+ * 12 bits word string size
+ * L2 data (word ref delta, offset to L1 and counts data, data file delta)
+ * L1 data (word ref delta, offset to counts, data file delta)
+ * counts (sparse count)
+ * padding
+ * word strings (LCP + suffix + NULL)
+ *
+ * Alternate layout for overflow page:
+ *
+ * 15 bits L2 size hardcoded to 0
+ * 15 bits L1 size hardcoded to 0
+ * 15 bits number of words in page, hardcoded to 0
+ * 12 bits word string size, hardcoded to 0
+ * More info in sparse sparse file.
+ *
+ * File header should be defined
+ */
+
+class PageDict4PWriter : public PageDict4PageParams
+{
+public:
+ typedef PageDict4SPWriter SPWriter;
+ typedef PostingListCountFileEncodeContext EC;
+
+private:
+ EC _eCounts; // counts stream (sparse counts)
+ ComprFileWriteContext _wcCounts;// counts buffer
+ EC _eL1; // L1 stream
+ ComprFileWriteContext _wcL1;// L1 buffer
+ EC _eL2; // L2 stream
+ ComprFileWriteContext _wcL2;// L2 buffer
+ vespalib::string _countsWord; // last counts on page
+ vespalib::string _l1Word; // Last L1 word written
+ vespalib::string _l2Word; // Last L2 word written
+ vespalib::string _l3Word; // word before this page
+ vespalib::string _pendingCountsWord; // pending counts word (counts written)
+ uint32_t _countsWordOffset; // Offset for next counts word to write
+ uint32_t _l1WordOffset; // Offset of last L1 word written
+ uint32_t _l2WordOffset; // Offset of last L2 word written
+
+ // file offsets
+ StartOffset _countsStartOffset;
+
+ // Offsets in data files for last L1 entry
+ StartOffset _l1StartOffset;
+
+ // Offsets in data files for last L2 entry
+ StartOffset _l2StartOffset;
+
+ // Offsets in data files for last L3 entry
+ StartOffset _l3StartOffset;
+
+ uint32_t _curCountOffsetL1; // Offset in eCounts for last L1 entry
+ uint32_t _curCountOffsetL2; // Offset in eCounts for last L2 entry
+ uint32_t _curL1OffsetL2; // Offset in eL1 for last L2 entry
+
+ uint32_t _headerSize; // Size of page header
+
+ uint32_t _countsEntries; // Number of count entries on page
+ uint32_t _l1StrideCheck; // Count entries since last L1 entry
+ uint32_t _l2StrideCheck; // L1 entries since last L2 entry
+
+ uint32_t _countsSize; // Size of counts
+ uint32_t _l1Size; // Size of L1 entries
+ uint32_t _l2Size; // Size of L2 entries
+ uint32_t _prevL1Size; // Previous size of L1 entries
+ uint32_t _prevL2Size; // Previous size of L2 entries
+ uint64_t _pageNum; // Page number.
+ uint64_t _l3WordNum; // last L3 word num written
+ uint64_t _wordNum; // current word number
+ std::vector<char> _words; // Word buffer
+ SPWriter &_spWriter;
+ // Encode context where paged counts go
+ EC &_pe;
+
+ void
+ addOverflowCounts(const vespalib::stringref &word,
+ const Counts &counts);
+
+public:
+ PageDict4PWriter(SPWriter &spWriter,
+ EC &pe);
+
+ ~PageDict4PWriter(void);
+
+ void
+ setup();
+
+ void
+ flushPage(void);
+
+ void
+ flush(void);
+
+ void
+ resetPage(void);
+
+ void
+ addCounts(const vespalib::stringref &word,
+ const Counts &counts);
+
+ void
+ addL1Skip(size_t &lcp);
+
+ void
+ addL2Skip(size_t &lcp);
+
+ bool
+ empty(void) const
+ {
+ return _countsEntries == 0;
+ }
+
+ uint64_t
+ getPageNum(void) const
+ {
+ return _pageNum;
+ }
+
+ uint64_t
+ getWordNum() const
+ {
+ return _wordNum - 1;
+ }
+
+ void
+ checkPointWrite(vespalib::nbostream &out);
+
+ void
+ checkPointRead(vespalib::nbostream &in);
+};
+
+
+class PageDict4SSLookupRes
+{
+public:
+ using Counts = index::PostingListCounts;
+ typedef PageDict4StartOffset StartOffset;
+
+ vespalib::string _l6Word; // last L6 word before key
+ vespalib::string _lastWord; // L6 or overflow word >= key
+ StartOffset _l6StartOffset; // File offsets
+ Counts _counts; // Counts valid if overflow
+ uint64_t _pageNum;
+ uint64_t _sparsePageNum;
+ uint64_t _l6WordNum; // wordnum if overflow
+ StartOffset _startOffset; // valid if overflow
+ bool _res;
+ bool _overflow;
+
+ PageDict4SSLookupRes(void);
+
+ ~PageDict4SSLookupRes(void);
+};
+
+/* Reader for sparse sparse file.
+ *
+ * Read from file to memory (compressed mix of L6 entries and overflow entries)
+ *
+ * Uncompressed L7 array in memory, usable for binary search.
+ */
+
+class PageDict4SSReader : public PageDict4PageParams
+{
+ typedef PostingListCountFileEncodeContext EC;
+ typedef PostingListCountFileDecodeContext DC;
+public:
+ class L7Entry
+ {
+ public:
+ vespalib::string _l7Word;
+ StartOffset _l7StartOffset; // Offsets in data files
+ uint64_t _l7WordNum;
+ uint32_t _l6Offset; // Offset in L6+overflow stream
+ uint32_t _sparsePageNum;// page number for sparse file
+ uint64_t _pageNum; // page number in full file
+ uint32_t _l7Ref; // L7 entry before overflow, or self-ref if L6
+
+ L7Entry()
+ : _l7Word(0),
+ _l7StartOffset(),
+ _l7WordNum(0),
+ _l6Offset(0),
+ _sparsePageNum(0),
+ _pageNum(0),
+ _l7Ref(0)
+ {
+ }
+
+ L7Entry(const vespalib::stringref &l7Word,
+ const StartOffset &l7StartOffset,
+ uint64_t l7WordNum,
+ uint32_t l6Offset,
+ uint32_t sparsePageNum,
+ uint64_t pageNum,
+ uint32_t l7Ref)
+ : _l7Word(l7Word),
+ _l7StartOffset(l7StartOffset),
+ _l7WordNum(l7WordNum),
+ _l6Offset(l6Offset),
+ _sparsePageNum(sparsePageNum),
+ _pageNum(pageNum),
+ _l7Ref(l7Ref)
+ {
+ }
+
+ bool
+ operator<(const vespalib::stringref &word) const
+ {
+ return _l7Word < word;
+ }
+
+ friend vespalib::nbostream &
+ operator<<(vespalib::nbostream &stream, const L7Entry &l7Entry);
+
+ friend vespalib::nbostream &
+ operator>>(vespalib::nbostream &stream, L7Entry &l7Entry);
+ };
+
+ class OverflowRef
+ {
+ public:
+ uint64_t _wordNum;
+ uint32_t _l7Ref; // overflow entry in L7 table
+
+ OverflowRef()
+ : _wordNum(0),
+ _l7Ref(0)
+ {
+ }
+
+ OverflowRef(uint64_t wordNum, uint32_t l7Ref)
+ : _wordNum(wordNum),
+ _l7Ref(l7Ref)
+ {
+ }
+
+ bool
+ operator<(uint64_t wordNum) const
+ {
+ return _wordNum < wordNum;
+ }
+
+ friend vespalib::nbostream &
+ operator<<(vespalib::nbostream &stream, const OverflowRef &oref);
+
+ friend vespalib::nbostream &
+ operator>>(vespalib::nbostream &stream, OverflowRef &oref);
+ };
+
+ ComprBuffer _cb;
+ uint64_t _ssFileBitLen; // File size in bits
+ uint32_t _ssStartOffset; // Header size in bits
+
+ typedef std::vector<L7Entry> L7Vector;
+ L7Vector _l7;// Uncompressed skip list for sparse sparse file
+
+ DC _ssd; // used to store compression parameters
+ uint64_t _spFileBitLen;
+ uint64_t _pFileBitLen;
+ uint32_t _spStartOffset;
+ uint32_t _pStartOffset;
+ uint32_t _spFirstPageNum;
+ uint32_t _spFirstPageOffset;
+ uint32_t _pFirstPageNum;
+ uint32_t _pFirstPageOffset;
+
+ typedef std::vector<OverflowRef> OverflowVector;
+ OverflowVector _overflows;
+
+ PageDict4SSReader(ComprBuffer &cb,
+ uint32_t ssFileHeaderSize,
+ uint64_t ssFileBitLen,
+ uint32_t spFileHeaderSize,
+ uint64_t spFileBitLen,
+ uint32_t pFileHeaderSize,
+ uint64_t pFileBitLen);
+
+ ~PageDict4SSReader(void);
+
+ void
+ setup(DC &ssd);
+
+ PageDict4SSLookupRes
+ lookup(const vespalib::stringref &key);
+
+ PageDict4SSLookupRes
+ lookupOverflow(uint64_t wordNum) const;
+
+ const DC &
+ getSSD(void) const
+ {
+ return _ssd;
+ }
+
+ void
+ checkPointWrite(vespalib::nbostream &out);
+
+ void
+ checkPointRead(vespalib::nbostream &in);
+};
+
+
+class PageDict4SPLookupRes : public PageDict4PageParams
+{
+ typedef PostingListCountFileEncodeContext EC;
+ typedef PostingListCountFileDecodeContext DC;
+ typedef PageDict4SSReader SSReader;
+
+public:
+ vespalib::string _l3Word;
+ vespalib::string _lastWord; // L3 word >= key
+ StartOffset _l3StartOffset;
+ uint64_t _pageNum;
+ uint64_t _l3WordNum;
+
+public:
+ PageDict4SPLookupRes(void);
+
+ ~PageDict4SPLookupRes(void);
+
+ void
+ lookup(const SSReader &ssReader,
+ const void *sparsePage,
+ const vespalib::stringref &key,
+ const vespalib::stringref &l6Word,
+ const vespalib::stringref &lastSPWord,
+ const StartOffset &l6StartOffset,
+ uint64_t l6WordNum,
+ uint64_t lowestPageNum);
+};
+
+
+class PageDict4PLookupRes : public PageDict4PageParams
+{
+public:
+ typedef PostingListCountFileEncodeContext EC;
+ typedef PostingListCountFileDecodeContext DC;
+ typedef PageDict4SSReader SSReader;
+
+public:
+ Counts _counts;
+ StartOffset _startOffset;
+ uint64_t _wordNum;
+ bool _res;
+ vespalib::string *_nextWord;
+
+public:
+ PageDict4PLookupRes(void);
+
+ ~PageDict4PLookupRes(void);
+
+ bool
+ lookup(const SSReader &ssReader,
+ const void *page,
+ const vespalib::stringref &key,
+ const vespalib::stringref &l3Word,
+ const vespalib::stringref &lastPWord,
+ const StartOffset &l3StartOffset,
+ uint64_t l3WordNum);
+};
+
+
+class PageDict4Reader : public PageDict4PageParams
+{
+public:
+ typedef PostingListCountFileDecodeContext DC;
+ typedef PostingListCountFileEncodeContext EC;
+ typedef PageDict4SSReader SSReader;
+
+ DC &_pd;
+ uint32_t _countsResidue;
+ const SSReader &_ssReader;
+ uint64_t _pFileBitLen;
+ StartOffset _startOffset;
+ bool _overflowPage;
+ typedef std::vector<Counts> PCV;
+ PCV _counts;
+ PCV::const_iterator _cc;
+ PCV::const_iterator _ce;
+ typedef std::vector<char> WV;
+ WV _words;
+ WV::const_iterator _wc;
+ WV::const_iterator _we;
+ vespalib::string _lastWord;
+ vespalib::string _lastSSWord;
+
+ DC &_spd;
+ uint32_t _l3Residue;
+ WV _spwords;
+ WV::const_iterator _spwc;
+ WV::const_iterator _spwe;
+
+ DC _ssd;
+ uint64_t _wordNum;
+
+
+ PageDict4Reader(const SSReader &ssReader,
+ DC &spd,
+ DC &pd);
+
+ ~PageDict4Reader(void);
+
+ void
+ setup();
+
+ void
+ setupPage();
+
+ void
+ setupSPage();
+
+ void
+ decodePWord(vespalib::string &word);
+
+ void
+ decodeSPWord(vespalib::string &word);
+
+ void
+ decodeSSWord(vespalib::string &word);
+
+ void
+ readCounts(vespalib::string &word,
+ uint64_t &wordNum,
+ Counts &counts);
+
+ void
+ readOverflowCounts(vespalib::string &word,
+ Counts &counts);
+
+ void
+ checkPointWrite(vespalib::nbostream &out);
+
+ void
+ checkPointRead(vespalib::nbostream &in);
+};
+
+} // namespace bitcompression
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp
new file mode 100644
index 00000000000..ebf4ff59889
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp
@@ -0,0 +1,1355 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".posocccompression");
+#include "compression.h"
+#include "posocccompression.h"
+#include <vespa/searchlib/index/schemautil.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/data/fileheader.h>
+
+using search::index::DocIdAndFeatures;
+using search::index::WordDocElementFeatures;
+using search::index::WordDocElementWordPosFeatures;
+using search::index::PostingListParams;
+using search::index::SchemaUtil;
+using search::index::Schema;
+using search::fef::TermFieldMatchData;
+using vespalib::GenericHeader;
+
+namespace
+{
+
+vespalib::string PosOccId = "PosOcc.3";
+
+vespalib::string PosOccIdCooked = "PosOcc.3.Cooked";
+
+}
+
+namespace
+{
+
+vespalib::string EG64PosOccId = "EG64PosOcc.3"; // Dynamic k values
+vespalib::string EG64PosOccId2 = "EG64PosOcc.2"; // Fixed k values
+
+}
+
+namespace search
+{
+
+namespace bitcompression
+{
+
+
+PosOccFieldParams::PosOccFieldParams(void)
+ : _elemLenK(0),
+ _hasElements(false),
+ _hasElementWeights(false),
+ _avgElemLen(512),
+ _collectionType(SINGLE),
+ _name()
+{
+}
+
+
+bool
+PosOccFieldParams::operator==(const PosOccFieldParams &rhs) const
+{
+ return _collectionType == rhs._collectionType &&
+ _avgElemLen == rhs._avgElemLen &&
+ _name == rhs._name;
+}
+
+
+vespalib::string
+PosOccFieldParams::getParamsPrefix(uint32_t idx)
+{
+ vespalib::asciistream paramsPrefix;
+ paramsPrefix << "fieldParams.";
+ paramsPrefix << idx;
+ return paramsPrefix.str();
+}
+
+
+void
+PosOccFieldParams::getParams(PostingListParams &params, uint32_t idx) const
+{
+ vespalib::string paramsPrefix = getParamsPrefix(idx);
+ vespalib::string collStr = paramsPrefix + ".collectionType";
+ vespalib::string avgElemLenStr = paramsPrefix + ".avgElemLen";
+ vespalib::string nameStr = paramsPrefix + ".name";
+
+ switch (_collectionType) {
+ case SINGLE:
+ params.setStr(collStr, "single");
+ break;
+ case ARRAY:
+ params.setStr(collStr, "array");
+ break;
+ case WEIGHTEDSET:
+ params.setStr(collStr, "weightedSet");
+ break;
+ }
+ params.set(avgElemLenStr, _avgElemLen);
+ params.setStr(nameStr, _name);
+}
+
+
+void
+PosOccFieldParams::setParams(const PostingListParams &params, uint32_t idx)
+{
+ vespalib::string paramsPrefix = getParamsPrefix(idx);
+ vespalib::string collStr = paramsPrefix + ".collectionType";
+ vespalib::string avgElemLenStr = paramsPrefix + ".avgElemLen";
+ vespalib::string nameStr = paramsPrefix + ".name";
+
+ if (params.isSet(collStr)) {
+ vespalib::string collVal = params.getStr(collStr);
+ if (collVal == "single") {
+ _collectionType = SINGLE;
+ _hasElements = false;
+ _hasElementWeights = false;
+ } else if (collVal == "array") {
+ _collectionType = ARRAY;
+ _hasElements = true;
+ _hasElementWeights = false;
+ } else if (collVal == "weightedSet") {
+ _collectionType = WEIGHTEDSET;
+ _hasElements = true;
+ _hasElementWeights = true;
+ }
+ }
+ params.get(avgElemLenStr, _avgElemLen);
+ if (params.isSet(nameStr))
+ _name = params.getStr(nameStr);
+}
+
+
+void
+PosOccFieldParams::setSchemaParams(const Schema &schema, uint32_t fieldId)
+{
+ assert(fieldId < schema.getNumIndexFields());
+ const Schema::IndexField &field = schema.getIndexField(fieldId);
+ switch (field.getCollectionType()) {
+ case Schema::SINGLE:
+ _collectionType = SINGLE;
+ _hasElements = false;
+ _hasElementWeights = false;
+ break;
+ case Schema::ARRAY:
+ _collectionType = ARRAY;
+ _hasElements = true;
+ _hasElementWeights = false;
+ break;
+ case Schema::WEIGHTEDSET:
+ _collectionType = WEIGHTEDSET;
+ _hasElements = true;
+ _hasElementWeights = true;
+ break;
+ default:
+ LOG(error,
+ "Bad collection type");
+ abort();
+ }
+ _avgElemLen = field.getAvgElemLen();
+ _name = field.getName();
+}
+
+
+void
+PosOccFieldParams::readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix)
+{
+ vespalib::string nameKey(prefix + "fieldName");
+ vespalib::string collKey(prefix + "collectionType");
+ vespalib::string avgElemLenKey(prefix + "avgElemLen");
+ _name = header.getTag(nameKey).asString();
+ Schema::CollectionType ct =
+ Schema::collectionTypeFromName(header.getTag(collKey).asString());
+ switch (ct) {
+ case Schema::SINGLE:
+ _collectionType = SINGLE;
+ _hasElements = false;
+ _hasElementWeights = false;
+ break;
+ case Schema::ARRAY:
+ _collectionType = ARRAY;
+ _hasElements = true;
+ _hasElementWeights = false;
+ break;
+ case Schema::WEIGHTEDSET:
+ _collectionType = WEIGHTEDSET;
+ _hasElements = true;
+ _hasElementWeights = true;
+ break;
+ default:
+ LOG(error,
+ "Bad collection type when reading field param in header");
+ abort();
+ }
+ _avgElemLen = header.getTag(avgElemLenKey).asInteger();
+}
+
+
+void
+PosOccFieldParams::writeHeader(vespalib::GenericHeader &header,
+ const vespalib::string &prefix) const
+{
+ vespalib::string nameKey(prefix + "fieldName");
+ vespalib::string collKey(prefix + "collectionType");
+ vespalib::string avgElemLenKey(prefix + "avgElemLen");
+ header.putTag(GenericHeader::Tag(nameKey, _name));
+ Schema::CollectionType ct(Schema::SINGLE);
+ switch (_collectionType) {
+ case SINGLE:
+ ct = Schema::SINGLE;
+ break;
+ case ARRAY:
+ ct = Schema::ARRAY;
+ break;
+ case WEIGHTEDSET:
+ ct = Schema::WEIGHTEDSET;
+ break;
+ default:
+ LOG(error,
+ "Bad collection type when writing field param in header");
+ abort();
+ }
+ header.putTag(GenericHeader::Tag(collKey, Schema::getTypeName(ct)));
+ header.putTag(GenericHeader::Tag(avgElemLenKey, _avgElemLen));
+}
+
+
+PosOccFieldsParams::PosOccFieldsParams(void)
+ : _numFields(0u),
+ _fieldParams(NULL),
+ _params()
+{
+}
+
+PosOccFieldsParams::PosOccFieldsParams(const PosOccFieldsParams &rhs)
+ : _numFields(0u),
+ _fieldParams(NULL),
+ _params(rhs._params)
+{
+ cacheParamsRef();
+}
+
+PosOccFieldsParams &
+PosOccFieldsParams::operator=(const PosOccFieldsParams &rhs)
+{
+ assertCachedParamsRef();
+ _params = rhs._params;
+ cacheParamsRef();
+ return *this;
+}
+
+
+bool
+PosOccFieldsParams::operator==(const PosOccFieldsParams &rhs) const
+{
+ return _params == rhs._params;
+}
+
+
+void
+PosOccFieldsParams::getParams(PostingListParams &params) const
+{
+ assertCachedParamsRef();
+ assert(_numFields == 1u); // Only single field for now
+ params.set("numFields", _numFields);
+ // Single posting file index format will have multiple fields in file
+ for (uint32_t field = 0; field < _numFields; ++field)
+ _fieldParams[field].getParams(params, field);
+}
+
+
+void
+PosOccFieldsParams::setParams(const PostingListParams &params)
+{
+ assertCachedParamsRef();
+ uint32_t numFields = _numFields;
+ params.get("numFields", numFields);
+ assert(numFields == 1u);
+ _params.resize(numFields);
+ cacheParamsRef();
+ // Single posting file index format will have multiple fields in file
+ for (uint32_t field = 0; field < numFields; ++field)
+ _params[field].setParams(params, field);
+}
+
+
+void
+PosOccFieldsParams::setSchemaParams(const Schema &schema,
+ const uint32_t indexId)
+{
+ assertCachedParamsRef();
+ SchemaUtil::IndexIterator i(schema, indexId);
+ assert(i.isValid());
+ _params.resize(1u);
+ cacheParamsRef();
+ const Schema::IndexField &field = schema.getIndexField(indexId);
+ if (!SchemaUtil::validateIndexField(field))
+ abort();
+ _params[0].setSchemaParams(schema, indexId);
+}
+
+
+void
+PosOccFieldsParams::readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix)
+{
+ vespalib::string numFieldsKey(prefix + "numFields");
+ assertCachedParamsRef();
+ uint32_t numFields = header.getTag(numFieldsKey).asInteger();
+ assert(numFields == 1u);
+ _params.resize(numFields);
+ cacheParamsRef();
+ // Single posting file index format will have multiple fields in file
+ for (uint32_t field = 0; field < numFields; ++field) {
+ vespalib::asciistream as;
+ as << prefix << "field[" << field << "].";
+ vespalib::string subPrefix(as.str());
+ _params[field].readHeader(header, subPrefix);
+ }
+}
+
+
+void
+PosOccFieldsParams::writeHeader(vespalib::GenericHeader &header,
+ const vespalib::string &prefix) const
+{
+ vespalib::string numFieldsKey(prefix + "numFields");
+ assertCachedParamsRef();
+ assert(_numFields == 1u);
+ header.putTag(GenericHeader::Tag(numFieldsKey, _numFields));
+ // Single posting file index format will have multiple fields in file
+ for (uint32_t field = 0; field < _numFields; ++field) {
+ vespalib::asciistream as;
+ as << prefix << "field[" << field << "].";
+ vespalib::string subPrefix(as.str());
+ _params[field].writeHeader(header, subPrefix);
+ }
+}
+
+
+template <bool bigEndian>
+void
+EG2PosOccDecodeContext<bigEndian>::
+readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix)
+{
+ const_cast<PosOccFieldsParams *>(_fieldsParams)->readHeader(header,
+ prefix);
+}
+
+
+template <bool bigEndian>
+void
+EG2PosOccDecodeContext<bigEndian>::
+readFeatures(search::index::DocIdAndFeatures &features)
+{
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, _);
+ uint32_t length;
+ uint64_t val64;
+ const uint64_t *valE = _valE;
+
+ features.clearFeatures((oPreRead == 0) ? 0 : 64 - oPreRead);
+ features.setRaw(true);
+ const uint64_t *rawFeatures =
+ (oPreRead == 0) ? (oCompr - 1) : (oCompr - 2);
+ uint64_t rawFeaturesStartBitPos =
+ _fileReadBias + (reinterpret_cast<unsigned long>(oCompr) << 3) -
+ oPreRead;
+
+ const PosOccFieldParams &fieldParams =
+ _fieldsParams->getFieldParams()[0];
+ uint32_t numElements = 1;
+ if (fieldParams._hasElements) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMELEMENTS,
+ EC);
+ numElements = static_cast<uint32_t>(val64) + 1;
+ }
+ for (uint32_t elementDone = 0; elementDone < numElements;
+ ++elementDone) {
+ if (fieldParams._hasElements) {
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTID,
+ EC);
+ if (fieldParams._hasElementWeights) {
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTWEIGHT,
+ EC);
+ }
+ if (__builtin_expect(oCompr >= valE, false)) {
+ while (rawFeatures < oCompr) {
+ features._blob.push_back(*rawFeatures);
+ ++rawFeatures;
+ }
+ UC64_DECODECONTEXT_STORE(o, _);
+ _readContext->readComprBuffer();
+ valE = _valE;
+ UC64_DECODECONTEXT_LOAD(o, _);
+ rawFeatures = oCompr;
+ }
+ }
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTLEN,
+ EC);
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMPOSITIONS,
+ EC);
+ uint32_t numPositions = static_cast<uint32_t>(val64) + 1;
+
+ do {
+ if (__builtin_expect(oCompr >= valE, false)) {
+ while (rawFeatures < oCompr) {
+ features._blob.push_back(*rawFeatures);
+ ++rawFeatures;
+ }
+ UC64_DECODECONTEXT_STORE(o, _);
+ _readContext->readComprBuffer();
+ valE = _valE;
+ UC64_DECODECONTEXT_LOAD(o, _);
+ rawFeatures = oCompr;
+ }
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_FIRST_WORDPOS,
+ EC);
+ } while (0);
+ for (uint32_t pos = 1; pos < numPositions; ++pos) {
+ if (__builtin_expect(oCompr >= valE, false)) {
+ while (rawFeatures < oCompr) {
+ features._blob.push_back(*rawFeatures);
+ ++rawFeatures;
+ }
+ UC64_DECODECONTEXT_STORE(o, _);
+ _readContext->readComprBuffer();
+ valE = _valE;
+ UC64_DECODECONTEXT_LOAD(o, _);
+ rawFeatures = oCompr;
+ }
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_DELTA_WORDPOS,
+ EC);
+ }
+ }
+ UC64_DECODECONTEXT_STORE(o, _);
+ uint64_t rawFeaturesEndBitPos =
+ _fileReadBias +
+ (reinterpret_cast<unsigned long>(oCompr) << 3) -
+ oPreRead;
+ features._bitLength = rawFeaturesEndBitPos - rawFeaturesStartBitPos;
+ while (rawFeatures < oCompr) {
+ features._blob.push_back(*rawFeatures);
+ ++rawFeatures;
+ }
+ if (__builtin_expect(oCompr >= valE, false)) {
+ _readContext->readComprBuffer();
+ }
+}
+
+
+template <bool bigEndian>
+void
+EG2PosOccDecodeContextCooked<bigEndian>::
+readFeatures(search::index::DocIdAndFeatures &features)
+{
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, _);
+ uint32_t length;
+ uint64_t val64;
+ const uint64_t *valE = _valE;
+
+ features.clearFeatures();
+ features.setRaw(false);
+
+ const PosOccFieldParams &fieldParams = _fieldsParams->getFieldParams()[0];
+ uint32_t numElements = 1;
+ if (fieldParams._hasElements) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMELEMENTS,
+ EC);
+ numElements = static_cast<uint32_t>(val64) + 1;
+ }
+ uint32_t elementId = 0;
+ for (uint32_t elementDone = 0; elementDone < numElements;
+ ++elementDone, ++elementId) {
+ if (fieldParams._hasElements) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTID,
+ EC);
+ elementId += static_cast<uint32_t>(val64);
+ }
+ features._elements.
+ push_back(WordDocElementFeatures(elementId));
+ if (fieldParams._hasElementWeights) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTWEIGHT,
+ EC);
+ int32_t elementWeight = this->convertToSigned(val64);
+ features._elements.back().setWeight(elementWeight);
+ }
+ if (__builtin_expect(oCompr >= valE, false)) {
+ UC64_DECODECONTEXT_STORE(o, _);
+ _readContext->readComprBuffer();
+ valE = _valE;
+ UC64_DECODECONTEXT_LOAD(o, _);
+ }
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTLEN,
+ EC);
+ uint32_t elementLen = static_cast<uint32_t>(val64) + 1;
+ features._elements.back().setElementLen(elementLen);
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMPOSITIONS,
+ EC);
+ uint32_t numPositions = static_cast<uint32_t>(val64) + 1;
+
+ uint32_t wordPos = static_cast<uint32_t>(-1);
+ do {
+ if (__builtin_expect(oCompr >= valE, false)) {
+ UC64_DECODECONTEXT_STORE(o, _);
+ _readContext->readComprBuffer();
+ valE = _valE;
+ UC64_DECODECONTEXT_LOAD(o, _);
+ }
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_FIRST_WORDPOS,
+ EC);
+ wordPos = static_cast<uint32_t>(val64);
+ features._elements.back().incNumOccs();
+ features._wordPositions.push_back(
+ WordDocElementWordPosFeatures(wordPos));
+ } while (0);
+ for (uint32_t pos = 1; pos < numPositions; ++pos) {
+ if (__builtin_expect(oCompr >= valE, false)) {
+ UC64_DECODECONTEXT_STORE(o, _);
+ _readContext->readComprBuffer();
+ valE = _valE;
+ UC64_DECODECONTEXT_LOAD(o, _);
+ }
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_DELTA_WORDPOS,
+ EC);
+ wordPos += 1 + static_cast<uint32_t>(val64);
+ features._elements.back().incNumOccs();
+ features._wordPositions.push_back(
+ WordDocElementWordPosFeatures(wordPos));
+ }
+ }
+ UC64_DECODECONTEXT_STORE(o, _);
+ if (__builtin_expect(oCompr >= valE, false))
+ _readContext->readComprBuffer();
+}
+
+
+template <bool bigEndian>
+void
+EG2PosOccDecodeContext<bigEndian>::
+skipFeatures(unsigned int count)
+{
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, _);
+ uint32_t length;
+ uint64_t val64;
+
+ for (unsigned int i = count; i > 0; --i) {
+ const PosOccFieldParams &fieldParams =
+ _fieldsParams->getFieldParams()[0];
+ uint32_t numElements = 1;
+ if (fieldParams._hasElements) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMELEMENTS,
+ EC);
+ numElements = static_cast<uint32_t>(val64) + 1;
+ }
+ for (uint32_t elementDone = 0; elementDone < numElements;
+ ++elementDone) {
+ if (fieldParams._hasElements) {
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTID,
+ EC);
+ if (fieldParams._hasElementWeights) {
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTWEIGHT,
+ EC);
+ }
+ }
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTLEN,
+ EC);
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMPOSITIONS,
+ EC);
+ uint32_t numPositions = static_cast<uint32_t>(val64) + 1;
+
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_FIRST_WORDPOS,
+ EC);
+ for (uint32_t pos = 1; pos < numPositions; ++pos) {
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_DELTA_WORDPOS,
+ EC);
+ }
+ }
+ }
+ UC64_DECODECONTEXT_STORE(o, _);
+}
+
+
+template <bool bigEndian>
+void
+EG2PosOccDecodeContext<bigEndian>::
+unpackFeatures(const search::fef::TermFieldMatchDataArray &matchData,
+ uint32_t docId)
+{
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, _);
+ uint32_t length;
+ uint64_t val64;
+
+ const PosOccFieldParams &fieldParams =
+ _fieldsParams->getFieldParams()[0];
+ uint32_t numElements = 1;
+ if (fieldParams._hasElements) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMELEMENTS,
+ EC);
+ numElements = static_cast<uint32_t>(val64) + 1;
+ }
+ TermFieldMatchData *tfmd = matchData[0];
+ tfmd->reset(docId);
+ uint32_t elementId = 0;
+ for (uint32_t elementDone = 0; elementDone < numElements;
+ ++elementDone, ++elementId) {
+ int32_t elementWeight = 1;
+ if (fieldParams._hasElements) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTID,
+ EC);
+ elementId += static_cast<uint32_t>(val64);
+ if (fieldParams._hasElementWeights) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTWEIGHT,
+ EC);
+ elementWeight = this->convertToSigned(val64);
+ }
+ }
+
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTLEN,
+ EC);
+ uint32_t elementLen = static_cast<uint32_t>(val64) + 1;
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMPOSITIONS,
+ EC);
+ uint32_t numPositions = static_cast<uint32_t>(val64) + 1;
+
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_FIRST_WORDPOS,
+ EC);
+ uint32_t wordPos = static_cast<uint32_t>(val64);
+ {
+ search::fef::TermFieldMatchDataPosition
+ pos(elementId, wordPos, elementWeight, elementLen);
+ tfmd->appendPosition(pos);
+ }
+ for (uint32_t wi = 1; wi < numPositions; ++wi) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_DELTA_WORDPOS,
+ EC);
+ wordPos += 1 + static_cast<uint32_t>(val64);
+ {
+ search::fef::TermFieldMatchDataPosition
+ pos(elementId, wordPos, elementWeight,
+ elementLen);
+ tfmd->appendPosition(pos);
+ }
+ }
+ }
+ UC64_DECODECONTEXT_STORE(o, _);
+}
+
+
+template <bool bigEndian>
+void
+EG2PosOccDecodeContext<bigEndian>::
+setParams(const PostingListParams &params)
+{
+ const_cast<PosOccFieldsParams *>(_fieldsParams)->setParams(params);
+}
+
+
+template <bool bigEndian>
+void
+EG2PosOccDecodeContext<bigEndian>::
+getParams(PostingListParams &params) const
+{
+ params.clear();
+ params.setStr("encoding", EG64PosOccId2);
+ _fieldsParams->getParams(params);
+}
+
+
+template <bool bigEndian>
+void
+EG2PosOccDecodeContextCooked<bigEndian>::
+getParams(PostingListParams &params) const
+{
+ ParentClass::getParams(params);
+ params.setStr("cookedEncoding", PosOccIdCooked);
+}
+
+
+template <bool bigEndian>
+void
+EG2PosOccEncodeContext<bigEndian>::
+readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix)
+{
+ const_cast<PosOccFieldsParams *>(_fieldsParams)->readHeader(header,
+ prefix);
+}
+
+
+template <bool bigEndian>
+const vespalib::string &
+EG2PosOccDecodeContext<bigEndian>::getIdentifier(void) const
+{
+ return EG64PosOccId2;
+}
+
+
+template <bool bigEndian>
+void
+EG2PosOccEncodeContext<bigEndian>::
+writeHeader(vespalib::GenericHeader &header,
+ const vespalib::string &prefix) const
+{
+ _fieldsParams->writeHeader(header, prefix);
+}
+
+
+template <bool bigEndian>
+const vespalib::string &
+EG2PosOccEncodeContext<bigEndian>::getIdentifier(void) const
+{
+ return EG64PosOccId2;
+}
+
+
+template <bool bigEndian>
+void
+EG2PosOccEncodeContext<bigEndian>::
+writeFeatures(const search::index::DocIdAndFeatures &features)
+{
+ if (features.getRaw()) {
+ writeBits(&features._blob[0],
+ features._bitOffset, features._bitLength);
+ return;
+ }
+ typedef WordDocElementFeatures Elements;
+ typedef WordDocElementWordPosFeatures Positions;
+
+ std::vector<Elements>::const_iterator element = features._elements.begin();
+
+ std::vector<Positions>::const_iterator position =
+ features._wordPositions.begin();
+
+ const PosOccFieldParams &fieldParams =
+ _fieldsParams->getFieldParams()[0];
+
+ uint32_t numElements = features._elements.size();
+ if (fieldParams._hasElements) {
+ assert(numElements > 0u);
+ encodeExpGolomb(numElements - 1,
+ K_VALUE_POSOCC_NUMELEMENTS);
+ } else {
+ assert(numElements == 1);
+ }
+ uint32_t minElementId = 0;
+ for (uint32_t elementDone = 0; elementDone < numElements;
+ ++elementDone, ++element) {
+ if (fieldParams._hasElements) {
+ uint32_t elementId = element->getElementId();
+ assert(elementId >= minElementId);
+ encodeExpGolomb(elementId - minElementId,
+ K_VALUE_POSOCC_ELEMENTID);
+ minElementId = elementId + 1;
+ if (fieldParams._hasElementWeights) {
+ int32_t elementWeight = element->getWeight();
+ encodeExpGolomb(this->convertToUnsigned(elementWeight),
+ K_VALUE_POSOCC_ELEMENTWEIGHT);
+ }
+ if (__builtin_expect(_valI >= _valE, false))
+ _writeContext->writeComprBuffer(false);
+ } else {
+ uint32_t elementId = element->getElementId();
+ assert(elementId == 0);
+ (void) elementId;
+ }
+
+ encodeExpGolomb(element->getElementLen() - 1,
+ K_VALUE_POSOCC_ELEMENTLEN);
+ uint32_t numPositions = element->getNumOccs();
+ assert(numPositions > 0);
+ encodeExpGolomb(numPositions - 1,
+ K_VALUE_POSOCC_NUMPOSITIONS);
+
+ uint32_t wordPos = static_cast<uint32_t>(-1);
+ do {
+ uint32_t lastWordPos = wordPos;
+ wordPos = position->getWordPos();
+ encodeExpGolomb(wordPos - lastWordPos - 1,
+ K_VALUE_POSOCC_FIRST_WORDPOS);
+ if (__builtin_expect(_valI >= _valE, false))
+ _writeContext->writeComprBuffer(false);
+ ++position;
+ } while (0);
+ uint32_t positionResidue = numPositions - 1;
+ while (positionResidue > 0) {
+ uint32_t lastWordPos = wordPos;
+ wordPos = position->getWordPos();
+ encodeExpGolomb(wordPos - lastWordPos - 1,
+ K_VALUE_POSOCC_DELTA_WORDPOS);
+ if (__builtin_expect(_valI >= _valE, false))
+ _writeContext->writeComprBuffer(false);
+ ++position;
+ --positionResidue;
+ }
+ }
+}
+
+
+template <bool bigEndian>
+void
+EG2PosOccEncodeContext<bigEndian>::
+setParams(const PostingListParams &params)
+{
+ const_cast<PosOccFieldsParams *>(_fieldsParams)->setParams(params);
+}
+
+
+template <bool bigEndian>
+void
+EG2PosOccEncodeContext<bigEndian>::
+getParams(PostingListParams &params) const
+{
+ params.clear();
+ params.setStr("encoding", EG64PosOccId2);
+ params.setStr("cookedEncoding", PosOccIdCooked);
+ _fieldsParams->getParams(params);
+}
+
+
+template <bool bigEndian>
+void
+EGPosOccDecodeContext<bigEndian>::
+readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix)
+{
+ ParentClass::readHeader(header, prefix);
+}
+
+
+template <bool bigEndian>
+void
+EGPosOccDecodeContext<bigEndian>::
+readFeatures(search::index::DocIdAndFeatures &features)
+{
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, _);
+ uint32_t length;
+ uint64_t val64;
+ const uint64_t *valE = _valE;
+
+ features.clearFeatures((oPreRead == 0) ? 0 : 64 - oPreRead);
+ features.setRaw(true);
+ const uint64_t *rawFeatures =
+ (oPreRead == 0) ? (oCompr - 1) : (oCompr - 2);
+ uint64_t rawFeaturesStartBitPos =
+ _fileReadBias + (reinterpret_cast<unsigned long>(oCompr) << 3) -
+ oPreRead;
+
+ const PosOccFieldParams &fieldParams =
+ _fieldsParams->getFieldParams()[0];
+ uint32_t elementLenK = EGPosOccEncodeContext<bigEndian>::
+ calcElementLenK(fieldParams._avgElemLen);
+ uint32_t numElements = 1;
+ if (fieldParams._hasElements) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMELEMENTS,
+ EC);
+ numElements = static_cast<uint32_t>(val64) + 1;
+ }
+ for (uint32_t elementDone = 0; elementDone < numElements;
+ ++elementDone) {
+ if (fieldParams._hasElements) {
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTID,
+ EC);
+ if (fieldParams._hasElementWeights) {
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTWEIGHT,
+ EC);
+ }
+ if (__builtin_expect(oCompr >= valE, false)) {
+ while (rawFeatures < oCompr) {
+ features._blob.push_back(*rawFeatures);
+ ++rawFeatures;
+ }
+ UC64_DECODECONTEXT_STORE(o, _);
+ _readContext->readComprBuffer();
+ valE = _valE;
+ UC64_DECODECONTEXT_LOAD(o, _);
+ rawFeatures = oCompr;
+ }
+ }
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ elementLenK,
+ EC);
+ uint32_t elementLen = static_cast<uint32_t>(val64) + 1;
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMPOSITIONS,
+ EC);
+ uint32_t numPositions = static_cast<uint32_t>(val64) + 1;
+
+ uint32_t wordPosK = EGPosOccEncodeContext<bigEndian>::
+ calcWordPosK(numPositions, elementLen);
+
+ for (uint32_t pos = 0; pos < numPositions; ++pos) {
+ if (__builtin_expect(oCompr >= valE, false)) {
+ while (rawFeatures < oCompr) {
+ features._blob.push_back(*rawFeatures);
+ ++rawFeatures;
+ }
+ UC64_DECODECONTEXT_STORE(o, _);
+ _readContext->readComprBuffer();
+ valE = _valE;
+ UC64_DECODECONTEXT_LOAD(o, _);
+ rawFeatures = oCompr;
+ }
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ wordPosK,
+ EC);
+ }
+ }
+ UC64_DECODECONTEXT_STORE(o, _);
+ uint64_t rawFeaturesEndBitPos =
+ _fileReadBias +
+ (reinterpret_cast<unsigned long>(oCompr) << 3) -
+ oPreRead;
+ features._bitLength = rawFeaturesEndBitPos - rawFeaturesStartBitPos;
+ while (rawFeatures < oCompr) {
+ features._blob.push_back(*rawFeatures);
+ ++rawFeatures;
+ }
+ if (__builtin_expect(oCompr >= valE, false)) {
+ _readContext->readComprBuffer();
+ }
+}
+
+
+template <bool bigEndian>
+void
+EGPosOccDecodeContextCooked<bigEndian>::
+readFeatures(search::index::DocIdAndFeatures &features)
+{
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, _);
+ uint32_t length;
+ uint64_t val64;
+ const uint64_t *valE = _valE;
+
+ features.clearFeatures();
+ features.setRaw(false);
+
+ const PosOccFieldParams &fieldParams =
+ _fieldsParams->getFieldParams()[0];
+ uint32_t elementLenK = EGPosOccEncodeContext<bigEndian>::
+ calcElementLenK(fieldParams._avgElemLen);
+ uint32_t numElements = 1;
+ if (fieldParams._hasElements) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMELEMENTS,
+ EC);
+ numElements = static_cast<uint32_t>(val64) + 1;
+ }
+ uint32_t elementId = 0;
+ for (uint32_t elementDone = 0; elementDone < numElements;
+ ++elementDone, ++elementId) {
+ if (fieldParams._hasElements) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTID,
+ EC);
+ elementId += static_cast<uint32_t>(val64);
+ }
+ features._elements.
+ push_back(WordDocElementFeatures(elementId));
+ if (fieldParams._hasElementWeights) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTWEIGHT,
+ EC);
+ int32_t elementWeight = this->convertToSigned(val64);
+ features._elements.back().setWeight(elementWeight);
+ }
+ if (__builtin_expect(oCompr >= valE, false)) {
+ UC64_DECODECONTEXT_STORE(o, _);
+ _readContext->readComprBuffer();
+ valE = _valE;
+ UC64_DECODECONTEXT_LOAD(o, _);
+ }
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ elementLenK,
+ EC);
+ uint32_t elementLen = static_cast<uint32_t>(val64) + 1;
+ features._elements.back().setElementLen(elementLen);
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMPOSITIONS,
+ EC);
+ uint32_t numPositions = static_cast<uint32_t>(val64) + 1;
+
+ features._bitLength = numPositions * 64;
+
+ uint32_t wordPosK = EGPosOccEncodeContext<bigEndian>::
+ calcWordPosK(numPositions, elementLen);
+
+ uint32_t wordPos = static_cast<uint32_t>(-1);
+ for (uint32_t pos = 0; pos < numPositions; ++pos) {
+ if (__builtin_expect(oCompr >= valE, false)) {
+ UC64_DECODECONTEXT_STORE(o, _);
+ _readContext->readComprBuffer();
+ valE = _valE;
+ UC64_DECODECONTEXT_LOAD(o, _);
+ }
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ wordPosK,
+ EC);
+ wordPos += 1 + static_cast<uint32_t>(val64);
+ features._elements.back().incNumOccs();
+ features._wordPositions.push_back(
+ WordDocElementWordPosFeatures(wordPos));
+ }
+ }
+ UC64_DECODECONTEXT_STORE(o, _);
+ if (__builtin_expect(oCompr >= valE, false))
+ _readContext->readComprBuffer();
+}
+
+
+template <bool bigEndian>
+void
+EGPosOccDecodeContext<bigEndian>::
+skipFeatures(unsigned int count)
+{
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, _);
+ uint32_t length;
+ uint64_t val64;
+
+ for (unsigned int i = count; i > 0; --i) {
+ const PosOccFieldParams &fieldParams =
+ _fieldsParams->getFieldParams()[0];
+ uint32_t elementLenK = EGPosOccEncodeContext<bigEndian>::
+ calcElementLenK(fieldParams._avgElemLen);
+ uint32_t numElements = 1;
+ if (fieldParams._hasElements) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMELEMENTS,
+ EC);
+ numElements = static_cast<uint32_t>(val64) + 1;
+ }
+ for (uint32_t elementDone = 0; elementDone < numElements;
+ ++elementDone) {
+ if (fieldParams._hasElements) {
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTID,
+ EC);
+ if (fieldParams._hasElementWeights) {
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTWEIGHT,
+ EC);
+ }
+ }
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ elementLenK,
+ EC);
+ uint32_t elementLen = static_cast<uint32_t>(val64) + 1;
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMPOSITIONS,
+ EC);
+ uint32_t numPositions = static_cast<uint32_t>(val64) + 1;
+
+ uint32_t wordPosK = EGPosOccEncodeContext<bigEndian>::
+ calcWordPosK(numPositions, elementLen);
+
+ for (uint32_t pos = 0; pos < numPositions; ++pos) {
+ UC64_SKIPEXPGOLOMB_SMALL_NS(o,
+ wordPosK,
+ EC);
+ }
+ }
+ }
+ UC64_DECODECONTEXT_STORE(o, _);
+}
+
+
+template <bool bigEndian>
+void
+EGPosOccDecodeContext<bigEndian>::
+unpackFeatures(const search::fef::TermFieldMatchDataArray &matchData,
+ uint32_t docId)
+{
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, _);
+ uint32_t length;
+ uint64_t val64;
+
+ const PosOccFieldParams &fieldParams =
+ _fieldsParams->getFieldParams()[0];
+ uint32_t elementLenK = EGPosOccEncodeContext<bigEndian>::
+ calcElementLenK(fieldParams._avgElemLen);
+ uint32_t numElements = 1;
+ if (fieldParams._hasElements) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMELEMENTS,
+ EC);
+ numElements = static_cast<uint32_t>(val64) + 1;
+ }
+ TermFieldMatchData *tfmd = matchData[0];
+ tfmd->reset(docId);
+ uint32_t elementId = 0;
+ for (uint32_t elementDone = 0; elementDone < numElements;
+ ++elementDone, ++elementId) {
+ int32_t elementWeight = 1;
+ if (fieldParams._hasElements) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTID,
+ EC);
+ elementId += static_cast<uint32_t>(val64);
+ if (fieldParams._hasElementWeights) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_ELEMENTWEIGHT,
+ EC);
+ elementWeight = this->convertToSigned(val64);
+ }
+ }
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ elementLenK,
+ EC);
+ uint32_t elementLen = static_cast<uint32_t>(val64) + 1;
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_POSOCC_NUMPOSITIONS,
+ EC);
+ uint32_t numPositions = static_cast<uint32_t>(val64) + 1;
+
+ uint32_t wordPosK = EGPosOccEncodeContext<bigEndian>::
+ calcWordPosK(numPositions, elementLen);
+
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ wordPosK,
+ EC);
+ uint32_t wordPos = static_cast<uint32_t>(val64);
+ {
+ search::fef::TermFieldMatchDataPosition
+ pos(elementId, wordPos, elementWeight, elementLen);
+ tfmd->appendPosition(pos);
+ }
+ for (uint32_t wi = 1; wi < numPositions; ++wi) {
+ UC64_DECODEEXPGOLOMB_SMALL_NS(o,
+ wordPosK,
+ EC);
+ wordPos += 1 + static_cast<uint32_t>(val64);
+ {
+ search::fef::TermFieldMatchDataPosition
+ pos(elementId, wordPos, elementWeight,
+ elementLen);
+ tfmd->appendPosition(pos);
+ }
+ }
+ }
+ UC64_DECODECONTEXT_STORE(o, _);
+}
+
+
+template <bool bigEndian>
+void
+EGPosOccDecodeContext<bigEndian>::
+setParams(const PostingListParams &params)
+{
+ ParentClass::setParams(params);
+}
+
+
+template <bool bigEndian>
+void
+EGPosOccDecodeContext<bigEndian>::
+getParams(PostingListParams &params) const
+{
+ ParentClass::getParams(params);
+ params.setStr("encoding", EG64PosOccId);
+}
+
+
+template <bool bigEndian>
+void
+EGPosOccDecodeContextCooked<bigEndian>::
+getParams(PostingListParams &params) const
+{
+ ParentClass::getParams(params);
+ params.setStr("cookedEncoding", PosOccIdCooked);
+}
+
+
+template <bool bigEndian>
+void
+EGPosOccEncodeContext<bigEndian>::
+readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix)
+{
+ ParentClass::readHeader(header, prefix);
+}
+
+
+template <bool bigEndian>
+const vespalib::string &
+EGPosOccDecodeContext<bigEndian>::getIdentifier(void) const
+{
+ return EG64PosOccId;
+}
+
+
+template <bool bigEndian>
+void
+EGPosOccEncodeContext<bigEndian>::
+writeHeader(vespalib::GenericHeader &header,
+ const vespalib::string &prefix) const
+{
+ ParentClass::writeHeader(header, prefix);
+}
+
+
+template <bool bigEndian>
+const vespalib::string &
+EGPosOccEncodeContext<bigEndian>::getIdentifier(void) const
+{
+ return EG64PosOccId;
+}
+
+
+template <bool bigEndian>
+void
+EGPosOccEncodeContext<bigEndian>::
+writeFeatures(const search::index::DocIdAndFeatures &features)
+{
+ if (features.getRaw()) {
+ writeBits(&features._blob[0],
+ features._bitOffset, features._bitLength);
+ return;
+ }
+ typedef WordDocElementFeatures Elements;
+ typedef WordDocElementWordPosFeatures Positions;
+
+ std::vector<Elements>::const_iterator element = features._elements.begin();
+
+ std::vector<Positions>::const_iterator position =
+ features._wordPositions.begin();
+ const PosOccFieldParams &fieldParams =
+ _fieldsParams->getFieldParams()[0];
+ uint32_t elementLenK = calcElementLenK(fieldParams._avgElemLen);
+
+ uint32_t numElements = features._elements.size();
+ if (fieldParams._hasElements) {
+ assert(numElements > 0u);
+ encodeExpGolomb(numElements - 1,
+ K_VALUE_POSOCC_NUMELEMENTS);
+ } else {
+ assert(numElements == 1);
+ }
+ uint32_t minElementId = 0;
+ for (uint32_t elementDone = 0; elementDone < numElements;
+ ++elementDone, ++element) {
+ if (fieldParams._hasElements) {
+ uint32_t elementId = element->getElementId();
+ assert(elementId >= minElementId);
+ encodeExpGolomb(elementId - minElementId,
+ K_VALUE_POSOCC_ELEMENTID);
+ minElementId = elementId + 1;
+ if (fieldParams._hasElementWeights) {
+ int32_t elementWeight = element->getWeight();
+ encodeExpGolomb(this->convertToUnsigned(elementWeight),
+ K_VALUE_POSOCC_ELEMENTWEIGHT);
+ }
+ if (__builtin_expect(_valI >= _valE, false))
+ _writeContext->writeComprBuffer(false);
+ } else {
+ uint32_t elementId = element->getElementId();
+ assert(elementId == 0);
+ (void) elementId;
+ }
+ uint32_t elementLen = element->getElementLen();
+ encodeExpGolomb(elementLen - 1, elementLenK);
+ uint32_t numPositions = element->getNumOccs();
+ assert(numPositions > 0);
+ encodeExpGolomb(numPositions - 1,
+ K_VALUE_POSOCC_NUMPOSITIONS);
+
+ uint32_t wordPosK = calcWordPosK(numPositions, elementLen);
+ uint32_t wordPos = static_cast<uint32_t>(-1);
+ uint32_t positionResidue = numPositions;
+ while (positionResidue > 0) {
+ uint32_t lastWordPos = wordPos;
+ wordPos = position->getWordPos();
+ encodeExpGolomb(wordPos - lastWordPos - 1,
+ wordPosK);
+ if (__builtin_expect(_valI >= _valE, false))
+ _writeContext->writeComprBuffer(false);
+ ++position;
+ --positionResidue;
+ }
+ }
+}
+
+
+
+template <bool bigEndian>
+void
+EGPosOccEncodeContext<bigEndian>::
+setParams(const PostingListParams &params)
+{
+ ParentClass::setParams(params);
+}
+
+
+template <bool bigEndian>
+void
+EGPosOccEncodeContext<bigEndian>::
+getParams(PostingListParams &params) const
+{
+ ParentClass::getParams(params);
+ params.setStr("encoding", EG64PosOccId);
+ params.setStr("cookedEncoding", PosOccIdCooked);
+}
+
+
+template class EG2PosOccDecodeContext<true>;
+template class EG2PosOccDecodeContext<false>;
+
+template class EG2PosOccDecodeContextCooked<true>;
+template class EG2PosOccDecodeContextCooked<false>;
+
+template class EG2PosOccEncodeContext<true>;
+template class EG2PosOccEncodeContext<false>;
+
+template class EGPosOccDecodeContext<true>;
+template class EGPosOccDecodeContext<false>;
+
+template class EGPosOccDecodeContextCooked<true>;
+template class EGPosOccDecodeContextCooked<false>;
+
+template class EGPosOccEncodeContext<true>;
+template class EGPosOccEncodeContext<false>;
+
+} // namespace index
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h
new file mode 100644
index 00000000000..cdf9c73fdc0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h
@@ -0,0 +1,616 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/searchcommon/common/schema.h>
+
+
+#define K_VALUE_POSOCC_FIRST_DOCID 22
+#define MAXRICE2_POSOCC_FIRST_DOCID MAX_RICE2VAL_L32_K22
+
+#define K_VALUE_POSOCC_DELTA_DOCID 7
+#define MAXRICE2_POSOCC_DELTA_DOCID MAX_RICE2VAL_L30_K7
+
+#define K_VALUE_POSOCC_FIRST_WORDPOS 8
+#define MAXRICE2_POSOCC_FIRST_WORDPOS MAX_RICE2VAL_L32_K8
+
+#define K_VALUE_POSOCC_DELTA_WORDPOS 4
+#define MAXRICE2_POSOCC_DELTA_WORDPOS MAX_RICE2VAL_L31_K4
+
+// Compression parameters for EGPosOcc encode/decode context
+#define K_VALUE_POSOCC_ELEMENTLEN 9
+#define K_VALUE_POSOCC_NUMPOSITIONS 0
+#define K_VALUE_POSOCC_NUMFIELDS 0
+#define K_VALUE_POSOCC_FIELDID 0
+
+#define K_VALUE_POSOCC_NUMELEMENTS 0
+#define K_VALUE_POSOCC_ELEMENTID 0
+#define K_VALUE_POSOCC_ELEMENTWEIGHT 9
+
+namespace search
+{
+
+namespace index
+{
+
+class DocIdAndPosOccFeatures : public DocIdAndFeatures
+{
+public:
+
+ void
+ addNextOcc(uint32_t elementId,
+ uint32_t wordPos,
+ int32_t elementWeight,
+ uint32_t elementLen)
+ {
+ assert(wordPos < elementLen);
+ if (_elements.empty() ||
+ elementId > _elements.back().getElementId()) {
+ _elements.emplace_back(elementId, elementWeight, elementLen);
+ } else {
+ assert(elementId == _elements.back().getElementId());
+ assert(elementWeight == _elements.back().getWeight());
+ assert(elementLen == _elements.back().getElementLen());
+ }
+ assert(_elements.back().getNumOccs() == 0 ||
+ wordPos > _wordPositions.back().getWordPos());
+ _elements.back().incNumOccs();
+ _wordPositions.emplace_back(wordPos);
+ }
+};
+
+} // namespace search::index
+
+} // namespace search
+
+
+namespace search
+{
+
+namespace bitcompression
+{
+
+class PosOccFieldParams
+{
+public:
+ typedef index::PostingListParams PostingListParams;
+ typedef index::Schema Schema;
+
+ enum CollectionType
+ {
+ SINGLE,
+ ARRAY,
+ WEIGHTEDSET
+ };
+
+ uint8_t _elemLenK;
+ bool _hasElements;
+ bool _hasElementWeights;
+ uint32_t _avgElemLen;
+ CollectionType _collectionType;
+ vespalib::string _name;
+
+ PosOccFieldParams(void);
+
+ bool
+ operator==(const PosOccFieldParams &rhs) const;
+
+ static vespalib::string
+ getParamsPrefix(uint32_t idx);
+
+ void
+ getParams(PostingListParams &params, uint32_t idx) const;
+
+ void
+ setParams(const PostingListParams &params, uint32_t idx);
+
+ void
+ setSchemaParams(const Schema &schema, uint32_t fieldId);
+
+ void
+ readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix);
+
+ void
+ writeHeader(vespalib::GenericHeader &header,
+ const vespalib::string &prefix) const;
+};
+
+
+class PosOccFieldsParams
+{
+ // Cache pointers.
+ uint32_t _numFields;
+ const PosOccFieldParams *_fieldParams;
+
+ // Storage
+ std::vector<PosOccFieldParams> _params;
+
+public:
+ typedef index::PostingListParams PostingListParams;
+ typedef index::Schema Schema;
+
+ PosOccFieldsParams(void);
+
+ PosOccFieldsParams(const PosOccFieldsParams &rhs);
+
+ PosOccFieldsParams &
+ operator=(const PosOccFieldsParams &rhs);
+
+ bool
+ operator==(const PosOccFieldsParams &rhs) const;
+
+ void
+ cacheParamsRef(void)
+ {
+ _numFields = _params.size();
+ _fieldParams = _params.empty() ? NULL : &_params[0];
+ }
+
+ void
+ assertCachedParamsRef(void) const
+ {
+ assert(_numFields == _params.size());
+ assert(_fieldParams == (_params.empty() ? NULL : &_params[0]));
+ }
+
+ uint32_t
+ getNumFields(void) const
+ {
+ return _numFields;
+ }
+
+ const PosOccFieldParams *
+ getFieldParams(void) const
+ {
+ return _fieldParams;
+ }
+
+ void
+ getParams(PostingListParams &params) const;
+
+ void
+ setParams(const PostingListParams &params);
+
+ void
+ setSchemaParams(const Schema &schema, const uint32_t indexId);
+
+ void
+ readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix);
+
+ void
+ writeHeader(vespalib::GenericHeader &header,
+ const vespalib::string &prefix) const;
+};
+
+template <bool bigEndian>
+class EG2PosOccDecodeContext : public FeatureDecodeContext<bigEndian>
+{
+public:
+ typedef FeatureDecodeContext<bigEndian> ParentClass;
+ using ParentClass::smallAlign;
+ using ParentClass::readBits;
+ using ParentClass::_valI;
+ using ParentClass::_val;
+ using ParentClass::_cacheInt;
+ using ParentClass::_preRead;
+ using ParentClass::_valE;
+ using ParentClass::_fileReadBias;
+ using ParentClass::_readContext;
+ using ParentClass::readHeader;
+ typedef EncodeContext64<bigEndian> EC;
+ typedef index::PostingListParams PostingListParams;
+
+ const PosOccFieldsParams *_fieldsParams;
+
+ EG2PosOccDecodeContext(const PosOccFieldsParams *fieldsParams)
+ : FeatureDecodeContext<bigEndian>(),
+ _fieldsParams(fieldsParams)
+ {
+ }
+
+ EG2PosOccDecodeContext(const uint64_t *compr, int bitOffset,
+ const PosOccFieldsParams *fieldsParams)
+ : FeatureDecodeContext<bigEndian>(compr, bitOffset),
+ _fieldsParams(fieldsParams)
+ {
+ }
+
+
+ EG2PosOccDecodeContext(const uint64_t *compr,
+ int bitOffset,
+ uint64_t bitLength,
+ const PosOccFieldsParams *fieldsParams)
+ : FeatureDecodeContext<bigEndian>(compr, bitOffset, bitLength),
+ _fieldsParams(fieldsParams)
+ {
+ }
+
+
+ EG2PosOccDecodeContext &
+ operator=(const EG2PosOccDecodeContext &rhs)
+ {
+ FeatureDecodeContext<bigEndian>::operator=(rhs);
+ _fieldsParams = rhs._fieldsParams;
+ return *this;
+ }
+
+ virtual void
+ readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix);
+
+ virtual const vespalib::string &
+ getIdentifier(void) const;
+
+ virtual void
+ readFeatures(search::index::DocIdAndFeatures &features);
+
+ virtual void
+ skipFeatures(unsigned int count);
+
+ virtual void
+ unpackFeatures(const search::fef::TermFieldMatchDataArray &matchData,
+ uint32_t docId);
+
+ /*
+ * Set parameters.
+ */
+ virtual void
+ setParams(const PostingListParams &params);
+
+ /*
+ * Get current parameters.
+ */
+ virtual void
+ getParams(PostingListParams &params) const;
+};
+
+
+template <bool bigEndian>
+class EG2PosOccDecodeContextCooked : public EG2PosOccDecodeContext<bigEndian>
+{
+public:
+ typedef EG2PosOccDecodeContext<bigEndian> ParentClass;
+ using ParentClass::smallAlign;
+ using ParentClass::readBits;
+ using ParentClass::_valI;
+ using ParentClass::_val;
+ using ParentClass::_cacheInt;
+ using ParentClass::_preRead;
+ using ParentClass::_valE;
+ using ParentClass::_fileReadBias;
+ using ParentClass::_readContext;
+ using ParentClass::_fieldsParams;
+ typedef EncodeContext64<bigEndian> EC;
+ typedef index::PostingListParams PostingListParams;
+
+ EG2PosOccDecodeContextCooked(const PosOccFieldsParams *fieldsParams)
+ : EG2PosOccDecodeContext<bigEndian>(fieldsParams)
+ {
+ }
+
+ EG2PosOccDecodeContextCooked(const uint64_t *compr, int bitOffset,
+ const PosOccFieldsParams *fieldsParams)
+ : EG2PosOccDecodeContext<bigEndian>(compr, bitOffset, fieldsParams)
+ {
+ }
+
+
+ EG2PosOccDecodeContextCooked(const uint64_t *compr,
+ int bitOffset,
+ uint64_t bitLength,
+ const PosOccFieldsParams *fieldsParams)
+ : EG2PosOccDecodeContext<bigEndian>(compr, bitOffset, bitLength,
+ fieldsParams)
+ {
+ }
+
+
+ EG2PosOccDecodeContextCooked &
+ operator=(const EG2PosOccDecodeContext<bigEndian> &rhs)
+ {
+ EG2PosOccDecodeContext<bigEndian>::operator=(rhs);
+ return *this;
+ }
+
+ virtual void
+ readFeatures(search::index::DocIdAndFeatures &features);
+
+ virtual void
+ getParams(PostingListParams &params) const;
+};
+
+
+template <bool bigEndian>
+class EG2PosOccEncodeContext : public FeatureEncodeContext<bigEndian>
+{
+public:
+ typedef FeatureEncodeContext<bigEndian> ParentClass;
+ typedef index::DocIdAndFeatures DocIdAndFeatures;
+ typedef index::PostingListParams PostingListParams;
+ using ParentClass::smallAlign;
+ using ParentClass::writeBits;
+ using ParentClass::_valI;
+ using ParentClass::_valE;
+ using ParentClass::_writeContext;
+ using ParentClass::encodeExpGolomb;
+ using ParentClass::readHeader;
+ using ParentClass::writeHeader;
+
+ const PosOccFieldsParams *_fieldsParams;
+
+ EG2PosOccEncodeContext(const PosOccFieldsParams *fieldsParams)
+ : FeatureEncodeContext<bigEndian>(),
+ _fieldsParams(fieldsParams)
+ {
+ }
+
+ EG2PosOccEncodeContext &
+ operator=(const EG2PosOccEncodeContext &rhs)
+ {
+ FeatureEncodeContext<bigEndian>::operator=(rhs);
+ _fieldsParams = rhs._fieldsParams;
+ return *this;
+ }
+
+ virtual void
+ readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix);
+
+ virtual void
+ writeHeader(vespalib::GenericHeader &header,
+ const vespalib::string &prefix) const;
+
+ virtual const vespalib::string &
+ getIdentifier(void) const;
+
+ virtual void
+ writeFeatures(const DocIdAndFeatures &features);
+
+ /*
+ * Set parameters.
+ */
+ virtual void
+ setParams(const PostingListParams &params);
+
+ /*
+ * Get current parameters.
+ */
+ virtual void
+ getParams(PostingListParams &params) const;
+};
+
+
+template <bool bigEndian>
+class EGPosOccDecodeContext : public EG2PosOccDecodeContext<bigEndian>
+{
+public:
+ typedef EG2PosOccDecodeContext<bigEndian> ParentClass;
+ typedef index::DocIdAndFeatures DocIdAndFeatures;
+ typedef index::PostingListParams PostingListParams;
+ using ParentClass::smallAlign;
+ using ParentClass::readBits;
+ using ParentClass::_valI;
+ using ParentClass::_val;
+ using ParentClass::_cacheInt;
+ using ParentClass::_preRead;
+ using ParentClass::_valE;
+ using ParentClass::_fileReadBias;
+ using ParentClass::_readContext;
+ using ParentClass::_fieldsParams;
+ using ParentClass::readHeader;
+ typedef EncodeContext64<bigEndian> EC;
+
+ EGPosOccDecodeContext(const PosOccFieldsParams *fieldsParams)
+ : EG2PosOccDecodeContext<bigEndian>(fieldsParams)
+ {
+ }
+
+ EGPosOccDecodeContext(const uint64_t *compr, int bitOffset,
+ const PosOccFieldsParams *fieldsParams)
+ : EG2PosOccDecodeContext<bigEndian>(compr, bitOffset, fieldsParams)
+ {
+ }
+
+
+ EGPosOccDecodeContext(const uint64_t *compr,
+ int bitOffset,
+ uint64_t bitLength,
+ const PosOccFieldsParams *fieldsParams)
+ : EG2PosOccDecodeContext<bigEndian>(compr, bitOffset, bitLength,
+ fieldsParams)
+ {
+ }
+
+
+ EGPosOccDecodeContext &
+ operator=(const EGPosOccDecodeContext &rhs)
+ {
+ EG2PosOccDecodeContext<bigEndian>::operator=(rhs);
+ return *this;
+ }
+
+ virtual void
+ readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix);
+
+ virtual const vespalib::string &
+ getIdentifier(void) const;
+
+ virtual void
+ readFeatures(search::index::DocIdAndFeatures &features);
+
+ virtual void
+ skipFeatures(unsigned int count);
+
+ virtual void
+ unpackFeatures(const search::fef::TermFieldMatchDataArray &matchData,
+ uint32_t docId);
+
+ /*
+ * Set parameters.
+ */
+ virtual void
+ setParams(const PostingListParams &params);
+
+ /*
+ * Get current parameters.
+ */
+ virtual void
+ getParams(PostingListParams &params) const;
+};
+
+
+template <bool bigEndian>
+class EGPosOccDecodeContextCooked : public EGPosOccDecodeContext<bigEndian>
+{
+public:
+ typedef EGPosOccDecodeContext<bigEndian> ParentClass;
+ typedef index::DocIdAndFeatures DocIdAndFeatures;
+ typedef index::PostingListParams PostingListParams;
+ using ParentClass::smallAlign;
+ using ParentClass::readBits;
+ using ParentClass::_valI;
+ using ParentClass::_val;
+ using ParentClass::_cacheInt;
+ using ParentClass::_preRead;
+ using ParentClass::_valE;
+ using ParentClass::_fileReadBias;
+ using ParentClass::_readContext;
+ using ParentClass::_fieldsParams;
+ typedef EncodeContext64<bigEndian> EC;
+
+ EGPosOccDecodeContextCooked(const PosOccFieldsParams *fieldsParams)
+ : EGPosOccDecodeContext<bigEndian>(fieldsParams)
+ {
+ }
+
+ EGPosOccDecodeContextCooked(const uint64_t *compr, int bitOffset,
+ const PosOccFieldsParams *fieldsParams)
+ : EGPosOccDecodeContext<bigEndian>(compr, bitOffset, fieldsParams)
+ {
+ }
+
+
+ EGPosOccDecodeContextCooked(const uint64_t *compr,
+ int bitOffset,
+ uint64_t bitLength,
+ const PosOccFieldsParams *fieldsParams)
+ : EGPosOccDecodeContext<bigEndian>(compr, bitOffset, bitLength,
+ fieldsParams)
+ {
+ }
+
+
+ EGPosOccDecodeContextCooked &
+ operator=(const EGPosOccDecodeContext<bigEndian> &rhs)
+ {
+ EGPosOccDecodeContext<bigEndian>::operator=(rhs);
+ return *this;
+ }
+
+ virtual void
+ readFeatures(search::index::DocIdAndFeatures &features);
+
+ virtual void
+ getParams(PostingListParams &params) const;
+};
+
+
+template <bool bigEndian>
+class EGPosOccEncodeContext : public EG2PosOccEncodeContext<bigEndian>
+{
+public:
+ typedef EG2PosOccEncodeContext<bigEndian> ParentClass;
+ typedef index::DocIdAndFeatures DocIdAndFeatures;
+ typedef index::PostingListParams PostingListParams;
+ using ParentClass::smallAlign;
+ using ParentClass::writeBits;
+ using ParentClass::_valI;
+ using ParentClass::_valE;
+ using ParentClass::_writeContext;
+ using ParentClass::log2;
+ using ParentClass::encodeExpGolomb;
+ using ParentClass::_fieldsParams;
+ using ParentClass::readHeader;
+ using ParentClass::writeHeader;
+
+ EGPosOccEncodeContext(const PosOccFieldsParams *fieldsParams)
+ : EG2PosOccEncodeContext<bigEndian>(fieldsParams)
+ {
+ }
+
+ EGPosOccEncodeContext &
+ operator=(const EGPosOccEncodeContext &rhs)
+ {
+ EG2PosOccEncodeContext<bigEndian>::operator=(rhs);
+ return *this;
+ }
+
+ virtual void
+ readHeader(const vespalib::GenericHeader &header,
+ const vespalib::string &prefix);
+
+ virtual void
+ writeHeader(vespalib::GenericHeader &header,
+ const vespalib::string &prefix) const;
+
+ virtual const vespalib::string &
+ getIdentifier(void) const;
+
+ virtual void
+ writeFeatures(const DocIdAndFeatures &features);
+
+ /*
+ * Set parameters.
+ */
+ virtual void
+ setParams(const PostingListParams &params);
+
+ /*
+ * Get current parameters.
+ */
+ virtual void
+ getParams(PostingListParams &params) const;
+
+ static uint32_t
+ calcElementLenK(uint32_t avgElementLen)
+ {
+ return (avgElementLen < 4) ? 1u : (log2(avgElementLen));
+ }
+
+ static uint32_t
+ calcWordPosK(uint32_t numPositions, uint32_t elementLen)
+ {
+ uint32_t avgDelta = elementLen / (numPositions + 1);
+ uint32_t wordPosK = (avgDelta < 4) ? 1 : (log2(avgDelta));
+ return wordPosK;
+ }
+};
+
+
+extern template class EG2PosOccDecodeContext<true>;
+extern template class EG2PosOccDecodeContext<false>;
+
+extern template class EG2PosOccDecodeContextCooked<true>;
+extern template class EG2PosOccDecodeContextCooked<false>;
+
+extern template class EG2PosOccEncodeContext<true>;
+extern template class EG2PosOccEncodeContext<false>;
+
+extern template class EGPosOccDecodeContext<true>;
+extern template class EGPosOccDecodeContext<false>;
+
+extern template class EGPosOccDecodeContextCooked<true>;
+extern template class EGPosOccDecodeContextCooked<false>;
+
+extern template class EGPosOccEncodeContext<true>;
+extern template class EGPosOccEncodeContext<false>;
+
+} // namespace bitcompression
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/CMakeLists.txt b/searchlib/src/vespa/searchlib/btree/CMakeLists.txt
new file mode 100644
index 00000000000..ce4e71729b3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_btree OBJECT
+ SOURCES
+ btreeaggregator.cpp
+ btreebuilder.cpp
+ btreeinserter.cpp
+ btreeiterator.cpp
+ btreenode.cpp
+ btreenodeallocator.cpp
+ btreenodestore.cpp
+ btreeremover.cpp
+ btreeroot.cpp
+ btreerootbase.cpp
+ btreestore.cpp
+ bufferstate.cpp
+ datastore.cpp
+ datastorebase.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/btree/OWNERS b/searchlib/src/vespa/searchlib/btree/OWNERS
new file mode 100644
index 00000000000..e6340232840
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/OWNERS
@@ -0,0 +1,2 @@
+tegge
+geirst
diff --git a/searchlib/src/vespa/searchlib/btree/btree.h b/searchlib/src/vespa/searchlib/btree/btree.h
new file mode 100644
index 00000000000..7f96e3647e2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btree.h
@@ -0,0 +1,170 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreeroot.h"
+#include "noaggrcalc.h"
+#include <vespa/vespalib/util/generationhandler.h>
+
+namespace search {
+namespace btree {
+
+/**
+ * Class that wraps a btree root and an allocator and that provides the same API as
+ * a standalone btree root without needing to pass the allocator to all functions.
+ **/
+template <typename KeyT,
+ typename DataT,
+ typename AggrT = NoAggregated,
+ typename CompareT = std::less<KeyT>,
+ typename TraitsT = BTreeDefaultTraits,
+ class AggrCalcT = NoAggrCalc>
+class BTree
+{
+public:
+ typedef BTreeRoot<KeyT, DataT, AggrT, CompareT, TraitsT,
+ AggrCalcT> TreeType;
+ typedef BTreeNodeAllocator<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS> NodeAllocatorType;
+ typedef BTreeBuilder<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS,
+ AggrCalcT> Builder;
+ typedef typename TreeType::InternalNodeType InternalNodeType;
+ typedef typename TreeType::LeafNodeType LeafNodeType;
+ typedef typename TreeType::KeyType KeyType;
+ typedef typename TreeType::DataType DataType;
+ typedef typename TreeType::Iterator Iterator;
+ typedef typename TreeType::ConstIterator ConstIterator;
+ typedef typename TreeType::FrozenView FrozenView;
+ typedef typename TreeType::AggrCalcType AggrCalcType;
+private:
+ NodeAllocatorType _alloc;
+ TreeType _tree;
+
+ BTree(const BTree &rhs);
+
+ BTree &
+ operator=(BTree &rhs);
+
+public:
+ BTree();
+ ~BTree();
+
+ const NodeAllocatorType &getAllocator() const { return _alloc; }
+ NodeAllocatorType &getAllocator() { return _alloc; }
+
+ void
+ disableFreeLists() {
+ _alloc.disableFreeLists();
+ }
+
+ void
+ disableElemHoldList()
+ {
+ _alloc.disableElemHoldList();
+ }
+
+ // Inherit doc from BTreeRoot
+ void clear() {
+ _tree.clear(_alloc);
+ }
+ void assign(Builder & rhs) {
+ _tree.assign(rhs, _alloc);
+ }
+ bool insert(const KeyType & key, const DataType & data, CompareT comp = CompareT()) {
+ return _tree.insert(key, data, _alloc, comp);
+ }
+
+ void
+ insert(Iterator &itr,
+ const KeyType &key, const DataType &data)
+ {
+ _tree.insert(itr, key, data);
+ }
+
+ Iterator find(const KeyType & key, CompareT comp = CompareT()) const {
+ return _tree.find(key, _alloc, comp);
+ }
+ Iterator lowerBound(const KeyType & key, CompareT comp = CompareT()) const {
+ return _tree.lowerBound(key, _alloc, comp);
+ }
+ Iterator upperBound(const KeyType & key, CompareT comp = CompareT()) const {
+ return _tree.upperBound(key, _alloc, comp);
+ }
+ bool remove(const KeyType & key, CompareT comp = CompareT()) {
+ return _tree.remove(key, _alloc, comp);
+ }
+
+ void
+ remove(Iterator &itr)
+ {
+ _tree.remove(itr);
+ }
+
+ Iterator begin() const {
+ return _tree.begin(_alloc);
+ }
+ FrozenView getFrozenView() const {
+ return _tree.getFrozenView(_alloc);
+ }
+ size_t size() const {
+ return _tree.size(_alloc);
+ }
+ vespalib::string toString() const {
+ return _tree.toString(_alloc);
+ }
+ bool isValid(CompareT comp = CompareT()) const {
+ return _tree.isValid(_alloc, comp);
+ }
+ bool isValidFrozen(CompareT comp = CompareT()) const {
+ return _tree.isValidFrozen(_alloc, comp);
+ }
+ size_t bitSize() const {
+ return _tree.bitSize(_alloc);
+ }
+ size_t bitSize(BTreeNode::Ref node) const {
+ return _tree.bitSize(node, _alloc);
+ }
+ void setRoot(BTreeNode::Ref newRoot) {
+ _tree.setRoot(newRoot, _alloc);
+ }
+ BTreeNode::Ref getRoot() const {
+ return _tree.getRoot();
+ }
+ MemoryUsage getMemoryUsage() const {
+ return _alloc.getMemoryUsage();
+ }
+
+ const AggrT &
+ getAggregated(void) const
+ {
+ return _tree.getAggregated(_alloc);
+ }
+
+ void
+ thaw(Iterator &itr)
+ {
+ assert(&itr.getAllocator() == &getAllocator());
+ _tree.thaw(itr);
+ }
+
+ template <typename FunctionType>
+ void
+ foreach_key(FunctionType func) const
+ {
+ _alloc.getNodeStore().foreach_key(_tree.getRoot(), func);
+ }
+
+ template <typename FunctionType>
+ void
+ foreach(FunctionType func) const
+ {
+ _alloc.getNodeStore().foreach(_tree.getRoot(), func);
+ }
+};
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btree.hpp b/searchlib/src/vespa/searchlib/btree/btree.hpp
new file mode 100644
index 00000000000..71a05a1d832
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btree.hpp
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btree.h"
+
+namespace search {
+namespace btree {
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, class AggrCalcT>
+BTree<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::BTree()
+ : _alloc(),
+ _tree()
+{
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, class AggrCalcT>
+BTree<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::~BTree()
+{
+ clear();
+ _alloc.freeze();
+ _alloc.clearHoldLists();
+}
+
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreeaggregator.cpp b/searchlib/src/vespa/searchlib/btree/btreeaggregator.cpp
new file mode 100644
index 00000000000..75e07cd7514
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreeaggregator.cpp
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "btreetraits.h"
+#include "btreeaggregator.hpp"
+#include "noaggrcalc.h"
+#include "minmaxaggrcalc.h"
+
+namespace search
+{
+
+namespace btree
+{
+
+template class BTreeAggregator<uint32_t, uint32_t>;
+template class BTreeAggregator<uint32_t, BTreeNoLeafData>;
+template class BTreeAggregator<uint32_t, int32_t, MinMaxAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS,
+ MinMaxAggrCalc>;
+
+
+} // namespace btree
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/btree/btreeaggregator.h b/searchlib/src/vespa/searchlib/btree/btreeaggregator.h
new file mode 100644
index 00000000000..8ba42aba42a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreeaggregator.h
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreenode.h"
+#include "btreenodeallocator.h"
+#include "btreetraits.h"
+#include "noaggrcalc.h"
+
+namespace search
+{
+
+namespace btree
+{
+
+template <typename KeyT,
+ typename DataT,
+ typename AggrT = NoAggregated,
+ size_t INTERNAL_SLOTS = BTreeDefaultTraits::INTERNAL_SLOTS,
+ size_t LEAF_SLOTS = BTreeDefaultTraits::LEAF_SLOTS,
+ class AggrCalcT = NoAggrCalc>
+class BTreeAggregator
+{
+public:
+ typedef BTreeNodeAllocator<KeyT, DataT, AggrT,
+ INTERNAL_SLOTS,
+ LEAF_SLOTS> NodeAllocatorType;
+ typedef BTreeInternalNode<KeyT, AggrT, INTERNAL_SLOTS>
+ InternalNodeType;
+ typedef BTreeLeafNode<KeyT, DataT, AggrT, LEAF_SLOTS>
+ LeafNodeType;
+ typedef AggrT AggregatedType;
+
+ static void
+ recalc(LeafNodeType &node, const AggrCalcT &aggrCalc);
+
+ static void
+ recalc(LeafNodeType &node,
+ const NodeAllocatorType &allocator,
+ const AggrCalcT &aggrCalc)
+ {
+ (void) allocator;
+ recalc(node, aggrCalc);
+ }
+
+ static void
+ recalc(InternalNodeType &node,
+ const NodeAllocatorType &allocator,
+ const AggrCalcT &aggrCalc);
+
+ static AggregatedType
+ recalc(LeafNodeType &node,
+ LeafNodeType &splitNode,
+ const AggrCalcT &aggrCalc);
+
+ static AggregatedType
+ recalc(InternalNodeType &node,
+ InternalNodeType &splitNode,
+ const NodeAllocatorType &allocator,
+ const AggrCalcT &aggrCalc);
+};
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreeaggregator.hpp b/searchlib/src/vespa/searchlib/btree/btreeaggregator.hpp
new file mode 100644
index 00000000000..9f9183b72cb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreeaggregator.hpp
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreeaggregator.h"
+
+namespace search
+{
+
+namespace btree
+{
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS, class AggrCalcT>
+void
+BTreeAggregator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::
+recalc(LeafNodeType &node, const AggrCalcT &aggrCalc)
+{
+ AggrT a;
+ for (uint32_t i = 0, ie = node.validSlots(); i < ie; ++i) {
+ aggrCalc.add(a, aggrCalc.getVal(node.getData(i)));
+ }
+ node.getAggregated() = a;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS, class AggrCalcT>
+void
+BTreeAggregator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::
+recalc(InternalNodeType &node,
+ const NodeAllocatorType &allocator,
+ const AggrCalcT &aggrCalc)
+{
+ AggrT a;
+ for (uint32_t i = 0, ie = node.validSlots(); i < ie; ++i) {
+ const BTreeNode::Ref childRef = node.getChild(i);
+ const AggrT &ca(allocator.getAggregated(childRef));
+ aggrCalc.add(a, ca);
+ }
+ node.getAggregated() = a;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS, class AggrCalcT>
+typename BTreeAggregator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS,
+ AggrCalcT>::AggregatedType
+BTreeAggregator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::
+recalc(LeafNodeType &node,
+ LeafNodeType &splitNode,
+ const AggrCalcT &aggrCalc)
+{
+ AggrT a;
+ recalc(node, aggrCalc);
+ recalc(splitNode, aggrCalc);
+ a = node.getAggregated();
+ aggrCalc.add(a, splitNode.getAggregated());
+ return a;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS, class AggrCalcT>
+typename BTreeAggregator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS,
+ AggrCalcT>::AggregatedType
+BTreeAggregator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::
+ recalc(InternalNodeType &node,
+ InternalNodeType &splitNode,
+ const NodeAllocatorType &allocator,
+ const AggrCalcT &aggrCalc)
+{
+ AggrT a;
+ recalc(node, allocator, aggrCalc);
+ recalc(splitNode, allocator, aggrCalc);
+ a = node.getAggregated();
+ aggrCalc.add(a, splitNode.getAggregated());
+ return a;
+}
+
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreebuilder.cpp b/searchlib/src/vespa/searchlib/btree/btreebuilder.cpp
new file mode 100644
index 00000000000..418f2d8665c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreebuilder.cpp
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "btreebuilder.h"
+#include "btreenode.hpp"
+#include "btreebuilder.hpp"
+
+namespace search
+{
+
+namespace btree
+{
+
+template class BTreeBuilder<uint32_t, uint32_t,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+template class BTreeBuilder<uint32_t, BTreeNoLeafData,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+template class BTreeBuilder<uint32_t, int32_t,
+ MinMaxAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS,
+ MinMaxAggrCalc>;
+
+} // namespace btree
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/btree/btreebuilder.h b/searchlib/src/vespa/searchlib/btree/btreebuilder.h
new file mode 100644
index 00000000000..b68a4e440d5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreebuilder.h
@@ -0,0 +1,100 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreenode.h"
+#include "btreerootbase.h"
+#include "btreenodeallocator.h"
+#include "noaggrcalc.h"
+#include "minmaxaggrcalc.h"
+#include "btreeaggregator.h"
+
+namespace search
+{
+
+namespace btree
+{
+
+template <typename KeyT,
+ typename DataT,
+ typename AggrT,
+ size_t INTERNAL_SLOTS,
+ size_t LEAF_SLOTS,
+ class AggrCalcT = NoAggrCalc>
+class BTreeBuilder
+{
+public:
+ typedef BTreeNodeAllocator<KeyT, DataT, AggrT,
+ INTERNAL_SLOTS, LEAF_SLOTS> NodeAllocatorType;
+ typedef typename NodeAllocatorType::BTreeRootBaseType BTreeRootBaseType;
+ typedef typename NodeAllocatorType::InternalNodeType InternalNodeType;
+ typedef typename NodeAllocatorType::LeafNodeType LeafNodeType;
+ typedef BTreeAggregator<KeyT, DataT, AggrT,
+ INTERNAL_SLOTS,
+ LEAF_SLOTS,
+ AggrCalcT> Aggregator;
+private:
+ typedef KeyT KeyType;
+ typedef DataT DataType;
+ typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair;
+ typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair;
+ typedef BTreeNode::Ref NodeRef;
+
+ NodeAllocatorType &_allocator;
+ int _numInternalNodes;
+ int _numLeafNodes;
+ uint32_t _numInserts;
+ std::vector<InternalNodeTypeRefPair> _inodes;
+ LeafNodeTypeRefPair _leaf;
+ AggrCalcT _defaultAggrCalc;
+ const AggrCalcT &_aggrCalc;
+
+ void
+ normalize(void);
+
+ void
+ allocNewLeafNode(void);
+
+ InternalNodeType *
+ createInternalNode(void);
+public:
+ BTreeBuilder(NodeAllocatorType &allocator);
+
+ BTreeBuilder(NodeAllocatorType &allocator, const AggrCalcT &aggrCalc);
+
+ ~BTreeBuilder(void);
+
+ void
+ recursiveDelete(NodeRef node);
+
+ void
+ insert(const KeyT &key, const DataT &data);
+
+ NodeRef
+ handover(void);
+
+ void
+ reuse(void);
+
+ void
+ clear(void);
+};
+
+extern template class BTreeBuilder<uint32_t, uint32_t,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+extern template class BTreeBuilder<uint32_t, BTreeNoLeafData,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+extern template class BTreeBuilder<uint32_t, int32_t,
+ MinMaxAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS,
+ MinMaxAggrCalc>;
+
+} // namespace btree
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreebuilder.hpp b/searchlib/src/vespa/searchlib/btree/btreebuilder.hpp
new file mode 100644
index 00000000000..25c24a75561
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreebuilder.hpp
@@ -0,0 +1,459 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreebuilder.h"
+
+namespace search
+{
+
+namespace btree
+{
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS, class AggrCalcT>
+BTreeBuilder<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::
+BTreeBuilder(NodeAllocatorType &allocator)
+ : _allocator(allocator),
+ _numInternalNodes(0),
+ _numLeafNodes(0),
+ _numInserts(0),
+ _inodes(),
+ _leaf(),
+ _defaultAggrCalc(),
+ _aggrCalc(_defaultAggrCalc)
+{
+ _leaf = _allocator.allocLeafNode();
+ ++_numLeafNodes;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS, class AggrCalcT>
+BTreeBuilder<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::
+BTreeBuilder(NodeAllocatorType &allocator, const AggrCalcT &aggrCalc)
+ : _allocator(allocator),
+ _numInternalNodes(0),
+ _numLeafNodes(0),
+ _numInserts(0),
+ _inodes(),
+ _leaf(),
+ _defaultAggrCalc(),
+ _aggrCalc(aggrCalc)
+{
+ _leaf = _allocator.allocLeafNode();
+ ++_numLeafNodes;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS, class AggrCalcT>
+BTreeBuilder<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::
+~BTreeBuilder(void)
+{
+ clear();
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS, class AggrCalcT>
+void
+BTreeBuilder<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::
+recursiveDelete(NodeRef node)
+{
+ assert(_allocator.isValidRef(node));
+ if (_allocator.isLeafRef(node)) {
+ _allocator.holdNode(node, _allocator.mapLeafRef(node));
+ _numLeafNodes--;
+ return;
+ }
+ InternalNodeType *inode = _allocator.mapInternalRef(node);
+ for (unsigned int i = 0; i < inode->validSlots(); ++i) {
+ recursiveDelete(inode->getChild(i));
+ }
+ _allocator.holdNode(node, inode);
+ _numInternalNodes--;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS, class AggrCalcT>
+void
+BTreeBuilder<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::
+normalize(void)
+{
+ std::vector<NodeRef> leftInodes; // left to rightmost nodes in tree
+ LeafNodeType *leftLeaf;
+ NodeRef child;
+ unsigned int level;
+ LeafNodeType *leafNode = _leaf.second;
+
+ if (_inodes.size() == 0) {
+ if (leafNode->validSlots() == 0) {
+ assert(_numLeafNodes == 1);
+ assert(_numInserts == 0);
+ _allocator.holdNode(_leaf.first, _leaf.second);
+ _numLeafNodes--;
+ _leaf = std::make_pair(NodeRef(),
+ static_cast<LeafNodeType *>(NULL));
+ }
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*leafNode, _aggrCalc);
+ }
+ assert(_numInserts == leafNode->validSlots());
+ return;
+ }
+
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*leafNode, _aggrCalc);
+ }
+ /* Adjust validLeaves for rightmost nodes */
+ for (level = 0; level < _inodes.size(); level++) {
+ InternalNodeType *inode = _inodes[level].second;
+ NodeRef lcRef(inode->getLastChild());
+ assert(NodeAllocatorType::isValidRef(lcRef));
+ assert((level == 0) == _allocator.isLeafRef(lcRef));
+ inode->incValidLeaves(_allocator.validLeaves(inode->getLastChild()));
+ inode->update(inode->validSlots() - 1,
+ level == 0 ?
+ _allocator.mapLeafRef(lcRef)->getLastKey() :
+ _allocator.mapInternalRef(lcRef)->getLastKey(),
+ lcRef);
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*inode, _allocator, _aggrCalc);
+ }
+ }
+ for (level = 0; level + 1 < _inodes.size(); level++) {
+ leftInodes.push_back(NodeRef());
+ }
+ /* Build vector of left to rightmost internal nodes (except root level) */
+ level = _inodes.size() - 1;
+ for (;;) {
+ NodeRef iRef = _inodes[level].first;
+ InternalNodeType *inode = _inodes[level].second;
+ if (inode->validSlots() < 2) {
+ /* Use last child of left to rightmost node on level */
+ assert(level + 1 < _inodes.size());
+ iRef = leftInodes[level];
+ inode = _allocator.mapInternalRef(iRef);
+ assert(inode != NULL);
+ assert(inode->validSlots() >= 1);
+ child = inode->getLastChild();
+ } else {
+ /* Use next to last child of rightmost node on level */
+ child = inode->getChild(inode->validSlots() - 2);
+ }
+ if (level == 0)
+ break;
+ level--;
+ assert(!_allocator.isLeafRef(child));
+ leftInodes[level] = child;
+ }
+ /* Remember left to rightmost leaf node */
+ assert(_allocator.isLeafRef(child));
+ leftLeaf = _allocator.mapLeafRef(child);
+
+ /* Check fanout on rightmost leaf node */
+ if (leafNode->validSlots() < LeafNodeType::minSlots()) {
+ InternalNodeType *pnode = _inodes[0].second;
+ if (leftLeaf->validSlots() + leafNode->validSlots() <
+ 2 * LeafNodeType::minSlots()) {
+ leftLeaf->stealAllFromRightNode(leafNode);
+ if (pnode->validSlots() == 1) {
+ InternalNodeType *lpnode =
+ _allocator.mapInternalRef(leftInodes[0]);
+ lpnode->incValidLeaves(pnode->validLeaves());
+ pnode->setValidLeaves(0);
+ }
+ /* Unlink from parent node */
+ pnode->remove(pnode->validSlots() - 1);
+ _allocator.holdNode(_leaf.first, leafNode);
+ _numLeafNodes--;
+ _leaf = std::make_pair(child, leftLeaf);
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*leftLeaf, _aggrCalc);
+ }
+ } else {
+ leafNode->stealSomeFromLeftNode(leftLeaf);
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*leftLeaf, _aggrCalc);
+ Aggregator::recalc(*leafNode, _aggrCalc);
+ }
+ if (pnode->validSlots() == 1) {
+ InternalNodeType *lpnode =
+ _allocator.mapInternalRef(leftInodes[0]);
+ uint32_t steal = leafNode->validLeaves() -
+ pnode->validLeaves();
+ pnode->incValidLeaves(steal);
+ lpnode->decValidLeaves(steal);
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*lpnode, _allocator, _aggrCalc);
+ Aggregator::recalc(*pnode, _allocator, _aggrCalc);
+ }
+ }
+ }
+ if (pnode->validSlots() > 0) {
+ uint32_t s = pnode->validSlots() - 1;
+ LeafNodeType *l = _allocator.mapLeafRef(pnode->getChild(s));
+ pnode->writeKey(s, l->getLastKey());
+ if (s > 0) {
+ --s;
+ l = _allocator.mapLeafRef(pnode->getChild(s));
+ pnode->writeKey(s, l->getLastKey());
+ }
+ }
+ if (!leftInodes.empty() && _allocator.isValidRef(leftInodes[0])) {
+ InternalNodeType *lpnode =
+ _allocator.mapInternalRef(leftInodes[0]);
+ uint32_t s = lpnode->validSlots() - 1;
+ LeafNodeType *l = _allocator.mapLeafRef(lpnode->getChild(s));
+ lpnode->writeKey(s, l->getLastKey());
+ }
+ }
+
+ /* Check fanout on rightmost internal nodes except root node */
+ for (level = 0; level + 1 < _inodes.size(); level++) {
+ InternalNodeType *inode = _inodes[level].second;
+ NodeRef leftInodeRef = leftInodes[level];
+ assert(NodeAllocatorType::isValidRef(leftInodeRef));
+ InternalNodeType *leftInode = _allocator.mapInternalRef(leftInodeRef);
+
+ InternalNodeType *pnode = _inodes[level + 1].second;
+ if (inode->validSlots() < InternalNodeType::minSlots()) {
+ if (leftInode->validSlots() + inode->validSlots() <
+ 2 * InternalNodeType::minSlots()) {
+ leftInode->stealAllFromRightNode(inode);
+ if (pnode->validSlots() == 1) {
+ InternalNodeType *lpnode =
+ _allocator.mapInternalRef(leftInodes[level + 1]);
+ lpnode->incValidLeaves(pnode->validLeaves());
+ pnode->setValidLeaves(0);
+ }
+ /* Unlink from parent node */
+ pnode->remove(pnode->validSlots() - 1);
+ _allocator.holdNode(_inodes[level].first, inode);
+ _numInternalNodes--;
+ _inodes[level] = std::make_pair(leftInodeRef, leftInode);
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*leftInode, _allocator, _aggrCalc);
+ }
+ } else {
+ inode->stealSomeFromLeftNode(leftInode, _allocator);
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*leftInode, _allocator, _aggrCalc);
+ Aggregator::recalc(*inode, _allocator, _aggrCalc);
+ }
+ if (pnode->validSlots() == 1) {
+ InternalNodeType *lpnode =
+ _allocator.mapInternalRef(leftInodes[level + 1]);
+ uint32_t steal = inode->validLeaves() -
+ pnode->validLeaves();
+ pnode->incValidLeaves(steal);
+ lpnode->decValidLeaves(steal);
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*lpnode, _allocator, _aggrCalc);
+ Aggregator::recalc(*pnode, _allocator, _aggrCalc);
+ }
+ }
+ }
+ }
+ if (pnode->validSlots() > 0) {
+ uint32_t s = pnode->validSlots() - 1;
+ InternalNodeType *n =
+ _allocator.mapInternalRef(pnode->getChild(s));
+ pnode->writeKey(s, n->getLastKey());
+ if (s > 0) {
+ --s;
+ n = _allocator.mapInternalRef(pnode->getChild(s));
+ pnode->writeKey(s, n->getLastKey());
+ }
+ }
+ if (level + 1 < leftInodes.size() &&
+ _allocator.isValidRef(leftInodes[level + 1])) {
+ InternalNodeType *lpnode =
+ _allocator.mapInternalRef(leftInodes[level + 1]);
+ uint32_t s = lpnode->validSlots() - 1;
+ InternalNodeType *n =
+ _allocator.mapInternalRef(lpnode->getChild(s));
+ lpnode->writeKey(s, n->getLastKey());
+ }
+ }
+ /* Check fanout on root node */
+ assert(level < _inodes.size());
+ InternalNodeType *inode = _inodes[level].second;
+ assert(inode != NULL);
+ assert(inode->validSlots() >= 1);
+ if (inode->validSlots() == 1) {
+ /* Remove top level from proposed tree since fanout is 1 */
+ NodeRef iRef = _inodes[level].first;
+ _inodes.pop_back();
+ _allocator.holdNode(iRef, inode);
+ _numInternalNodes--;
+ }
+ if (!_inodes.empty()) {
+ assert(_numInserts == _inodes.back().second->validLeaves());
+ } else {
+ assert(_numInserts == _leaf.second->validLeaves());
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS, class AggrCalcT>
+void
+BTreeBuilder<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::
+allocNewLeafNode(void)
+{
+ InternalNodeType *inode;
+ NodeRef child;
+
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*_leaf.second, _aggrCalc);
+ }
+ LeafNodeTypeRefPair lPair(_allocator.allocLeafNode());
+ _numLeafNodes++;
+
+ child = lPair.first;
+
+ unsigned int level = 0;
+ for (;;) {
+ if (level >= _inodes.size()) {
+ InternalNodeTypeRefPair iPair(
+ _allocator.allocInternalNode(level + 1));
+ inode = iPair.second;
+ _numInternalNodes++;
+ if (level > 0) {
+ InternalNodeType *cnode = _inodes[level - 1].second;
+ inode->insert(0, cnode->getLastKey(),
+ _inodes[level - 1].first);
+ inode->setValidLeaves(cnode->validLeaves());
+ } else {
+ inode->insert(0, _leaf.second->getLastKey(), _leaf.first);
+ inode->setValidLeaves(_leaf.second->validLeaves());
+ }
+ inode->insert(1, KeyType(), child);
+ _inodes.push_back(iPair);
+ break;
+ }
+ inode = _inodes[level].second;
+ assert(inode->validSlots() > 0);
+ NodeRef lcRef(inode->getLastChild());
+ inode->incValidLeaves(_allocator.validLeaves(lcRef));
+ inode->update(inode->validSlots() - 1,
+ level == 0 ?
+ _allocator.mapLeafRef(lcRef)->getLastKey() :
+ _allocator.mapInternalRef(lcRef)->getLastKey(),
+ lcRef);
+ if (inode->validSlots() >= InternalNodeType::maxSlots()) {
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*inode, _allocator, _aggrCalc);
+ }
+ InternalNodeTypeRefPair iPair(
+ _allocator.allocInternalNode(level + 1));
+ inode = iPair.second;
+ _numInternalNodes++;
+ inode->insert(0, KeyType(), child);
+ child = iPair.first;
+ level++;
+ continue;
+ }
+ inode->insert(inode->validSlots(), KeyType(), child);
+ break;
+ }
+ while (level > 0) {
+ assert(inode->validSlots() > 0);
+ child = inode->getLastChild();
+ assert(!_allocator.isLeafRef(child));
+ inode = _allocator.mapInternalRef(child);
+ level--;
+ _inodes[level] = std::make_pair(child, inode);
+ }
+ _leaf = lPair;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS, class AggrCalcT>
+void
+BTreeBuilder<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::
+insert(const KeyT &key,
+ const DataT &data)
+{
+ if (_leaf.second->validSlots() >= LeafNodeType::maxSlots())
+ allocNewLeafNode();
+ LeafNodeType *leaf = _leaf.second;
+ leaf->insert(leaf->validSlots(), key, data);
+ ++_numInserts;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS, class AggrCalcT>
+typename BTreeBuilder<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS,
+ AggrCalcT>::NodeRef
+BTreeBuilder<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::
+handover(void)
+{
+ NodeRef ret;
+
+ normalize();
+
+ if (!_inodes.empty())
+ ret = _inodes.back().first;
+ else
+ ret = _leaf.first;
+
+ _leaf = std::make_pair(NodeRef(),
+ static_cast<LeafNodeType *>(NULL));
+
+ _inodes.clear();
+ _numInternalNodes = 0;
+ _numLeafNodes = 0;
+ return ret;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS, class AggrCalcT>
+void
+BTreeBuilder<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::
+reuse(void)
+{
+ clear();
+ _leaf = _allocator.allocLeafNode();
+ ++_numLeafNodes;
+ _numInserts = 0u;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS, class AggrCalcT>
+void
+BTreeBuilder<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::
+clear(void)
+{
+ if (!_inodes.empty()) {
+ recursiveDelete(_inodes.back().first);
+ _leaf = std::make_pair(NodeRef(),
+ static_cast<LeafNodeType *>(NULL));
+ _inodes.clear();
+ }
+ if (NodeAllocatorType::isValidRef(_leaf.first)) {
+ assert(_leaf.second != NULL);
+ assert(_numLeafNodes == 1);
+ _allocator.holdNode(_leaf.first, _leaf.second);
+ --_numLeafNodes;
+ _leaf = std::make_pair(NodeRef(),
+ static_cast<LeafNodeType *>(NULL));
+ } else {
+ assert(_leaf.second == NULL);
+ }
+ assert(_numLeafNodes == 0);
+ assert(_numInternalNodes == 0);
+}
+
+
+} // namespace btree
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreeinserter.cpp b/searchlib/src/vespa/searchlib/btree/btreeinserter.cpp
new file mode 100644
index 00000000000..cf67831fde1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreeinserter.cpp
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "btreeinserter.h"
+#include "btreenodeallocator.h"
+#include "btreerootbase.hpp"
+#include "btreeinserter.hpp"
+#include "btreenode.hpp"
+
+namespace search
+{
+
+namespace btree
+{
+
+template class BTreeInserter<uint32_t, uint32_t, NoAggregated>;
+template class BTreeInserter<uint32_t, BTreeNoLeafData, NoAggregated>;
+template class BTreeInserter<uint32_t, int32_t, MinMaxAggregated,
+ std::less<uint32_t>,
+ BTreeDefaultTraits,
+ MinMaxAggrCalc>;
+
+} // namespace btree
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/btree/btreeinserter.h b/searchlib/src/vespa/searchlib/btree/btreeinserter.h
new file mode 100644
index 00000000000..d0d01892500
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreeinserter.h
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreenode.h"
+#include "btreenodeallocator.h"
+#include "btreerootbase.h"
+#include "btreeaggregator.h"
+#include "noaggrcalc.h"
+#include "minmaxaggrcalc.h"
+#include "btreeiterator.h"
+
+namespace search
+{
+
+namespace btree
+{
+
+template <typename KeyT,
+ typename DataT,
+ typename AggrT = NoAggregated,
+ typename CompareT = std::less<KeyT>,
+ typename TraitsT = BTreeDefaultTraits,
+ class AggrCalcT = NoAggrCalc>
+class BTreeInserter
+{
+public:
+ typedef BTreeNodeAllocator<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS> NodeAllocatorType;
+ typedef BTreeAggregator<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS,
+ AggrCalcT> Aggregator;
+ typedef BTreeIterator<KeyT, DataT, AggrT,
+ CompareT, TraitsT> Iterator;
+ typedef BTreeInternalNode<KeyT, AggrT, TraitsT::INTERNAL_SLOTS>
+ InternalNodeType;
+ typedef BTreeLeafNode<KeyT, DataT, AggrT, TraitsT::LEAF_SLOTS>
+ LeafNodeType;
+ typedef KeyT KeyType;
+ typedef DataT DataType;
+ typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair;
+ typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair;
+
+ static void
+ insert(BTreeNode::Ref &root,
+ Iterator &itr,
+ const KeyType &key, const DataType &data,
+ const AggrCalcT &aggrCalc);
+};
+
+extern template class BTreeInserter<uint32_t, uint32_t, NoAggregated>;
+extern template class BTreeInserter<uint32_t, BTreeNoLeafData, NoAggregated>;
+extern template class BTreeInserter<uint32_t, int32_t, MinMaxAggregated,
+ std::less<uint32_t>,
+ BTreeDefaultTraits,
+ MinMaxAggrCalc>;
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp b/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp
new file mode 100644
index 00000000000..597f75aa5ef
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp
@@ -0,0 +1,113 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreeinserter.h"
+#include "btreerootbase.hpp"
+#include "btreeiterator.hpp"
+#include <vespa/vespalib/stllike/asciistream.h>
+
+namespace search {
+namespace btree {
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, class AggrCalcT>
+void
+BTreeInserter<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+insert(BTreeNode::Ref &root,
+ Iterator &itr,
+ const KeyType &key, const DataType &data,
+ const AggrCalcT &aggrCalc)
+{
+ if (!NodeAllocatorType::isValidRef(root)) {
+ root = itr.insertFirst(key, data, aggrCalc);
+ return;
+ }
+ NodeAllocatorType &allocator(itr.getAllocator());
+ bool inRange = itr.valid();
+ if (!inRange) {
+ --itr;
+ }
+ root = itr.thaw(root);
+ uint32_t idx = itr.getLeafNodeIdx() + (inRange ? 0 : 1);
+ LeafNodeType * lnode = itr.getLeafNode();
+ BTreeNode::Ref splitNodeRef;
+ const KeyT *splitLastKey = NULL;
+ bool inRightSplit = false;
+ AggrT oldca(AggrCalcT::hasAggregated() ? lnode->getAggregated() : AggrT());
+ AggrT ca;
+ if (lnode->isFull()) {
+ LeafNodeTypeRefPair splitNode = allocator.allocLeafNode();
+ lnode->splitInsert(splitNode.second, idx, key, data);
+ if (AggrCalcT::hasAggregated()) {
+ ca = Aggregator::recalc(*lnode, *splitNode.second, aggrCalc);
+ }
+ splitNodeRef = splitNode.first; // to signal that a split occured
+ splitLastKey = &splitNode.second->getLastKey();
+ inRightSplit = itr.setLeafNodeIdx(idx, splitNode.second);
+ } else {
+ lnode->insert(idx, key, data);
+ itr.setLeafNodeIdx(idx);
+ if (AggrCalcT::hasAggregated()) {
+ aggrCalc.add(lnode->getAggregated(), aggrCalc.getVal(data));
+ ca = lnode->getAggregated();
+ }
+ }
+ const KeyT *lastKey = &lnode->getLastKey();
+ uint32_t level = 0;
+ uint32_t levels = itr.getPathSize();
+ for (; level < levels; ++level) {
+ typename Iterator::PathElement &pe = itr.getPath(level);
+ InternalNodeType *node(pe.getWNode());
+ idx = pe.getIdx();
+ AggrT olda(AggrCalcT::hasAggregated() ?
+ node->getAggregated() : AggrT());
+ BTreeNode::Ref subNode = node->getChild(idx);
+ node->update(idx, *lastKey, subNode);
+ node->incValidLeaves(1);
+ if (NodeAllocatorType::isValidRef(splitNodeRef)) {
+ idx++; // the extra node is inserted in the next slot
+ if (node->isFull()) {
+ InternalNodeTypeRefPair splitNode =
+ allocator.allocInternalNode(level + 1);
+ node->splitInsert(splitNode.second, idx,
+ *splitLastKey, splitNodeRef, allocator);
+ inRightSplit = pe.adjustSplit(inRightSplit, splitNode.second);
+ if (AggrCalcT::hasAggregated()) {
+ ca = Aggregator::recalc(*node, *splitNode.second,
+ allocator, aggrCalc);
+ }
+ splitNodeRef = splitNode.first;
+ splitLastKey = &splitNode.second->getLastKey();
+ } else {
+ node->insert(idx, *splitLastKey, splitNodeRef);
+ pe.adjustSplit(inRightSplit);
+ inRightSplit = false;
+ if (AggrCalcT::hasAggregated()) {
+ aggrCalc.add(node->getAggregated(), oldca, ca);
+ ca = node->getAggregated();
+ }
+ splitNodeRef = BTreeNode::Ref();
+ splitLastKey = NULL;
+ }
+ } else {
+ if (AggrCalcT::hasAggregated()) {
+ aggrCalc.add(node->getAggregated(), oldca, ca);
+ ca = node->getAggregated();
+ }
+ }
+ if (AggrCalcT::hasAggregated()) {
+ oldca = olda;
+ }
+ lastKey = &node->getLastKey();
+ }
+ if (NodeAllocatorType::isValidRef(splitNodeRef)) {
+ root = itr.addLevel(root, splitNodeRef, inRightSplit, aggrCalc);
+ }
+}
+
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreeiterator.cpp b/searchlib/src/vespa/searchlib/btree/btreeiterator.cpp
new file mode 100644
index 00000000000..cdc9895d8aa
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreeiterator.cpp
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "btreeroot.h"
+#include "btreenodeallocator.h"
+#include "btreeiterator.hpp"
+#include "btreenode.hpp"
+
+namespace search
+{
+
+namespace btree
+{
+
+template class BTreeIteratorBase<uint32_t, uint32_t, NoAggregated>;
+template class BTreeIteratorBase<uint32_t, BTreeNoLeafData, NoAggregated>;
+template class BTreeIteratorBase<uint32_t, int32_t, MinMaxAggregated>;
+template class BTreeConstIterator<uint32_t, uint32_t, NoAggregated>;
+template class BTreeConstIterator<uint32_t, BTreeNoLeafData, NoAggregated>;
+template class BTreeConstIterator<uint32_t, int32_t, MinMaxAggregated>;
+template class BTreeIterator<uint32_t, uint32_t, NoAggregated>;
+template class BTreeIterator<uint32_t, BTreeNoLeafData, NoAggregated>;
+template class BTreeIterator<uint32_t, int32_t, MinMaxAggregated>;
+
+} // namespace btree
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/btree/btreeiterator.h b/searchlib/src/vespa/searchlib/btree/btreeiterator.h
new file mode 100644
index 00000000000..ef8c3babb25
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreeiterator.h
@@ -0,0 +1,885 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreenode.h"
+#include "btreenodeallocator.h"
+#include "btreetraits.h"
+
+namespace search {
+namespace btree {
+
+template <typename, typename, typename, typename, typename, class>
+class BTreeInserter;
+template <typename, typename, typename, size_t, size_t, class>
+class BTreeRemoverBase;
+template <typename, typename, typename, typename, typename, class>
+class BTreeRemover;
+template <typename, typename, typename, typename, typename>
+class BTreeIterator;
+
+/**
+ * Helper class to provide internal or leaf node and position within node.
+ */
+template <class NodeT>
+class NodeElement
+{
+ template <typename, typename, typename, typename, typename, class>
+ friend class BTreeInserter;
+ template <typename, typename, typename, size_t, size_t, class>
+ friend class BTreeRemoverBase;
+ template <typename, typename, typename, typename, typename, class>
+ friend class BTreeRemover;
+ template <typename, typename, typename, typename, typename>
+ friend class BTreeIterator;
+
+ typedef NodeT NodeType;
+ typedef typename NodeType::KeyType KeyType;
+ typedef typename NodeType::DataType DataType;
+ const NodeType *_node;
+ uint32_t _idx;
+
+ NodeType *
+ getWNode(void) const
+ {
+ return const_cast<NodeType *>(_node);
+ }
+
+public:
+ NodeElement(void)
+ : _node(NULL),
+ _idx(0u)
+ {
+ }
+
+ NodeElement(const NodeType *node, uint32_t idx)
+ : _node(node),
+ _idx(idx)
+ {
+ }
+
+ void
+ setNode(const NodeType *node)
+ {
+ _node = node;
+ }
+
+ const NodeType *
+ getNode(void) const
+ {
+ return _node;
+ }
+
+ void
+ setIdx(uint32_t idx)
+ {
+ _idx = idx;
+ }
+
+ uint32_t
+ getIdx(void) const
+ {
+ return _idx;
+ }
+
+ void
+ incIdx(void)
+ {
+ ++_idx;
+ }
+
+ void
+ decIdx(void)
+ {
+ --_idx;
+ }
+
+ void
+ setNodeAndIdx(const NodeType *node, uint32_t idx)
+ {
+ _node = node;
+ _idx = idx;
+ }
+
+ const KeyType &
+ getKey() const
+ {
+ return _node->getKey(_idx);
+ }
+
+ const DataType &
+ getData() const
+ {
+ return _node->getData(_idx);
+ }
+
+ bool
+ valid() const
+ {
+ return _node != NULL;
+ }
+
+ void
+ adjustLeftVictimKilled(void)
+ {
+ assert(_idx > 0);
+ --_idx;
+ }
+
+ void
+ adjustSteal(uint32_t stolen)
+ {
+ assert(_idx + stolen < _node->validSlots());
+ _idx += stolen;
+ }
+
+ void
+ adjustSplit(bool inRightSplit)
+ {
+ if (inRightSplit)
+ ++_idx;
+ }
+
+ bool
+ adjustSplit(bool inRightSplit, const NodeType *splitNode)
+ {
+ adjustSplit(inRightSplit);
+ if (_idx >= _node->validSlots()) {
+ _idx -= _node->validSlots();
+ _node = splitNode;
+ return true;
+ }
+ return false;
+ }
+
+ void
+ swap(NodeElement &rhs)
+ {
+ std::swap(_node, rhs._node);
+ std::swap(_idx, rhs._idx);
+ }
+
+ bool
+ operator!=(const NodeElement &rhs) const
+ {
+ return _node != rhs._node ||
+ _idx != rhs._idx;
+ }
+};
+
+
+/**
+ * Base class for B-tree iterators. It defines all members needed
+ * for the iterator and methods that don't depend on tree ordering.
+ */
+template <typename KeyT,
+ typename DataT,
+ typename AggrT,
+ uint32_t INTERNAL_SLOTS = BTreeDefaultTraits::INTERNAL_SLOTS,
+ uint32_t LEAF_SLOTS = BTreeDefaultTraits::LEAF_SLOTS,
+ uint32_t PATH_SIZE = BTreeDefaultTraits::PATH_SIZE>
+class BTreeIteratorBase
+{
+protected:
+ typedef BTreeNodeAllocator<KeyT, DataT, AggrT,
+ INTERNAL_SLOTS,
+ LEAF_SLOTS> NodeAllocatorType;
+ typedef BTreeInternalNode<KeyT, AggrT, INTERNAL_SLOTS> InternalNodeType;
+ typedef BTreeLeafNode<KeyT, DataT, AggrT, LEAF_SLOTS> LeafNodeType;
+ typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair;
+ typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair;
+ typedef BTreeLeafNodeTemp<KeyT, DataT, AggrT, LEAF_SLOTS> LeafNodeTempType;
+ typedef BTreeKeyData<KeyT, DataT> KeyDataType;
+ typedef KeyT KeyType;
+ typedef DataT DataType;
+ template <typename, typename, typename, typename, typename, class>
+ friend class BTreeInserter;
+ template <typename, typename, typename, size_t, size_t, class>
+ friend class BTreeRemoverBase;
+ template <typename, typename, typename, typename, typename, class>
+ friend class BTreeRemover;
+
+ typedef NodeElement<LeafNodeType> LeafElement;
+
+ /**
+ * Current leaf node and current index within it.
+ */
+ LeafElement _leaf;
+ /**
+ * Pointer to internal node and index to the child used to
+ * traverse down the tree
+ */
+ typedef NodeElement<InternalNodeType> PathElement;
+ /**
+ * Path from current leaf node up to the root (path[0] is the
+ * parent of the leaf node)
+ */
+ PathElement _path[PATH_SIZE];
+ size_t _pathSize;
+
+ const NodeAllocatorType *_allocator;
+
+ const LeafNodeType *_leafRoot; // Root node for small tree/array
+
+ // Temporary leaf node when iterating over short arrays
+ std::unique_ptr<LeafNodeTempType> _compatLeafNode;
+
+private:
+ /*
+ * Find the next leaf node, called by operator++() as needed.
+ */
+ void findNextLeafNode();
+
+ /*
+ * Find the previous leaf node, called by operator--() as needed.
+ */
+ VESPA_DLL_LOCAL void findPrevLeafNode();
+
+protected:
+ /*
+ * Report current position in tree.
+ *
+ * @param pidx Number of levels above leaf nodes to take into account.
+ */
+ size_t
+ position(uint32_t pidx) const;
+
+ /**
+ * Create iterator pointing to first element in the tree referenced
+ * by root.
+ *
+ * @param root Reference to root of tree
+ * @param allocator B-tree node allocator helper class.
+ */
+ BTreeIteratorBase(BTreeNode::Ref root, const NodeAllocatorType &allocator);
+
+ /**
+ * Compability constructor, creating a temporary tree with only a
+ * temporary leaf node owned by the iterator.
+ */
+ template <class AggrCalcT>
+ BTreeIteratorBase(const KeyDataType *shortArray,
+ uint32_t arraySize,
+ const NodeAllocatorType &allocator,
+ const AggrCalcT &aggrCalc);
+
+ /**
+ * Default constructor. Iterator is not associated with a tree.
+ */
+ BTreeIteratorBase();
+
+ /**
+ * Step iterator forwards. If at end then leave it at end.
+ */
+ BTreeIteratorBase &
+ operator++() {
+ if (_leaf.getNode() == NULL) {
+ return *this;
+ }
+ _leaf.incIdx();
+ if (_leaf.getIdx() < _leaf.getNode()->validSlots()) {
+ return *this;
+ }
+ findNextLeafNode();
+ return *this;
+ }
+
+ /**
+ * Step iterator backwards. If at end then place it at last valid
+ * position in tree (cf. rbegin())
+ */
+ BTreeIteratorBase &
+ operator--();
+
+ ~BTreeIteratorBase();
+ BTreeIteratorBase(const BTreeIteratorBase &other);
+ BTreeIteratorBase &operator=(const BTreeIteratorBase &other);
+
+
+ /**
+ * Set new tree height and clear portions of path that are now
+ * beyond new tree height. For internal use only.
+ *
+ * @param pathSize New tree height (number of levels of internal nodes)
+ */
+ void
+ clearPath(uint32_t pathSize);
+public:
+
+ bool
+ operator==(const BTreeIteratorBase & rhs) const {
+ if (_leaf.getNode() != rhs._leaf.getNode() ||
+ _leaf.getIdx() != rhs._leaf.getIdx()) {
+ return false;
+ }
+ return true;
+ }
+
+ bool
+ operator!=(const BTreeIteratorBase & rhs) const
+ {
+ return !operator==(rhs);
+ }
+
+ /**
+ * Swap iterator with the other.
+ *
+ * @param rhs Other iterator.
+ */
+ void
+ swap(BTreeIteratorBase & rhs);
+
+ /**
+ * Get key at current iterator location.
+ */
+ const KeyType &
+ getKey() const
+ {
+ return _leaf.getKey();
+ }
+
+ /**
+ * Get data at current iterator location.
+ */
+ const DataType &
+ getData() const
+ {
+ return _leaf.getData();
+ }
+
+ /**
+ * Check if iterator is at a valid element, i.e. not at end.
+ */
+ bool
+ valid() const
+ {
+ return _leaf.valid();
+ }
+
+ /**
+ * Return the number of elements in the tree.
+ */
+ size_t
+ size() const;
+
+
+ /**
+ * Return the current position in the tree.
+ */
+ size_t
+ position() const
+ {
+ return position(_pathSize);
+ }
+
+ /**
+ * Return the distance between two positions in the tree.
+ */
+ ssize_t
+ operator-(const BTreeIteratorBase &rhs) const;
+
+ /**
+ * Return if the tree has data or not (e.g. keys and data or only keys).
+ */
+ static bool
+ hasData(void)
+ {
+ return LeafNodeType::hasData();
+ }
+
+ /**
+ * Move the iterator directly to end. Used by findHelper method in BTree.
+ */
+ void
+ setupEnd(void);
+
+ /**
+ * Setup iterator to be empty and not be associated with any tree.
+ */
+ void
+ setupEmpty(void);
+
+ /**
+ * Move iterator to beyond last element in the current tree.
+ */
+ void
+ end(void) __attribute__((noinline));
+
+ /**
+ * Move iterator to beyond last element in the given tree.
+ *
+ * @param rootRef Reference to root of tree.
+ */
+ void
+ end(BTreeNode::Ref rootRef);
+
+ /**
+ * Move iterator to first element in the current tree.
+ */
+ void
+ begin(void);
+
+ /**
+ * Move iterator to first element in the given tree.
+ *
+ * @param rootRef Reference to root of tree.
+ */
+ void
+ begin(BTreeNode::Ref rootRef);
+
+ /**
+ * Move iterator to last element in the current tree.
+ */
+ void
+ rbegin(void);
+
+ /*
+ * Get aggregated values for the current tree.
+ */
+ const AggrT &
+ getAggregated(void) const;
+
+ bool
+ identical(const BTreeIteratorBase &rhs) const;
+
+ template <typename FunctionType>
+ void
+ foreach_key(FunctionType func) const
+ {
+ if (_pathSize > 0) {
+ _path[_pathSize - 1].getNode()->
+ foreach_key(_allocator->getNodeStore(), func);
+ } else if (_leafRoot != nullptr) {
+ _leafRoot->foreach_key(func);
+ }
+ }
+};
+
+
+/**
+ * Iterator class for read access to B-trees. It defines methods to
+ * navigate in the tree, useable for implementing search iterators and
+ * for positioning in preparation for tree changes (cf. BTreeInserter and
+ * BTreeRemover).
+ */
+template <typename KeyT,
+ typename DataT,
+ typename AggrT = NoAggregated,
+ typename CompareT = std::less<KeyT>,
+ typename TraitsT = BTreeDefaultTraits>
+class BTreeConstIterator : public BTreeIteratorBase<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS,
+ TraitsT::PATH_SIZE>
+{
+protected:
+ typedef BTreeIteratorBase<KeyT,
+ DataT,
+ AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS,
+ TraitsT::PATH_SIZE> ParentType;
+ typedef typename ParentType::NodeAllocatorType NodeAllocatorType;
+ typedef typename ParentType::InternalNodeType InternalNodeType;
+ typedef typename ParentType::LeafNodeType LeafNodeType;
+ typedef typename ParentType::InternalNodeTypeRefPair
+ InternalNodeTypeRefPair;
+ typedef typename ParentType::LeafNodeTypeRefPair LeafNodeTypeRefPair;
+ typedef typename ParentType::LeafNodeTempType LeafNodeTempType;
+ typedef typename ParentType::KeyDataType KeyDataType;
+ typedef typename ParentType::KeyType KeyType;
+ typedef typename ParentType::DataType DataType;
+ typedef typename ParentType::PathElement PathElement;
+
+ using ParentType::_leaf;
+ using ParentType::_path;
+ using ParentType::_pathSize;
+ using ParentType::_allocator;
+ using ParentType::_leafRoot;
+ using ParentType::_compatLeafNode;
+ using ParentType::clearPath;
+ using ParentType::setupEmpty;
+public:
+ using ParentType::end;
+
+protected:
+ /** Pointer to seek node and path index to the parent node **/
+ typedef std::pair<const BTreeNode *, uint32_t> SeekNode;
+
+public:
+ /**
+ * Create iterator pointing to first element in the tree referenced
+ * by root.
+ *
+ * @param root Reference to root of tree
+ * @param allocator B-tree node allocator helper class.
+ */
+ BTreeConstIterator(BTreeNode::Ref root, const NodeAllocatorType &allocator)
+ : ParentType(root, allocator)
+ {
+ }
+
+ /**
+ * Compability constructor, creating a temporary tree with only a
+ * temporary leaf node owned by the iterator.
+ */
+ template <class AggrCalcT>
+ BTreeConstIterator(const KeyDataType *shortArray,
+ uint32_t arraySize,
+ const NodeAllocatorType &allocator,
+ const AggrCalcT &aggrCalc)
+ : ParentType(shortArray, arraySize, allocator, aggrCalc)
+ {
+ }
+
+ /**
+ * Default constructor. Iterator is not associated with a tree.
+ */
+ BTreeConstIterator()
+ : ParentType()
+ {
+ }
+
+ /**
+ * Step iterator forwards. If at end then leave it at end.
+ */
+ BTreeConstIterator &
+ operator++()
+ {
+ ParentType::operator++();
+ return *this;
+ }
+
+ /**
+ * Step iterator backwards. If at end then place it at last valid
+ * position in tree (cf. rbegin())
+ */
+ BTreeConstIterator &
+ operator--()
+ {
+ ParentType::operator--();
+ return *this;
+ }
+
+ /**
+ * Position iterator at first position with a key that is greater
+ * than or equal to the key argument. The iterator must be set up
+ * for the same tree before this method is called.
+ *
+ * @param key Key to search for
+ * @param comp Comparator for the tree ordering.
+ */
+ void
+ lower_bound(const KeyType & key, CompareT comp = CompareT());
+
+ /**
+ * Position iterator at first position with a key that is greater
+ * than or equal to the key argument in the tree referenced by rootRef.
+ *
+ * @param key Key to search for
+ * @param comp Comparator for the tree ordering.
+ */
+ void
+ lower_bound(BTreeNode::Ref rootRef,
+ const KeyType & key, CompareT comp = CompareT());
+
+ /**
+ * Step iterator forwards until it is at a position with a key
+ * that is greater than or equal to the key argument. Original
+ * position must be valid with a key that is less than the key argument.
+ *
+ * Tree traits determine if binary or linear search is performed within
+ * each tree node.
+ *
+ * @param key Key to search for
+ * @param comp Comparator for the tree ordering.
+ */
+ void
+ seek(const KeyType &key, CompareT comp = CompareT());
+
+ /**
+ * Step iterator forwards until it is at a position with a key
+ * that is greater than or equal to the key argument. Original
+ * position must be valid with a key that is less than the key argument.
+ *
+ * Binary search is performed within each tree node.
+ *
+ * @param key Key to search for
+ * @param comp Comparator for the tree ordering.
+ */
+ void
+ binarySeek(const KeyType &key, CompareT comp = CompareT());
+
+ /**
+ * Step iterator forwards until it is at a position with a key
+ * that is greater than or equal to the key argument. Original
+ * position must be valid with a key that is less than the key argument.
+ *
+ * Linear search is performed within each tree node.
+ *
+ * @param key Key to search for
+ * @param comp Comparator for the tree ordering.
+ */
+ void
+ linearSeek(const KeyType &key, CompareT comp = CompareT());
+
+ /**
+ * Step iterator forwards until it is at a position with a key
+ * that is greater than the key argument. Original position must
+ * be valid with a key that is less than or equal to the key argument.
+ *
+ * Tree traits determine if binary or linear search is performed within
+ * each tree node.
+ *
+ * @param key Key to search for
+ * @param comp Comparator for the tree ordering.
+ */
+ void
+ seekPast(const KeyType &key, CompareT comp = CompareT());
+
+ /**
+ * Step iterator forwards until it is at a position with a key
+ * that is greater than the key argument. Original position must
+ * be valid with a key that is less than or equal to the key argument.
+ *
+ * Binary search is performed within each tree node.
+ *
+ * @param key Key to search for
+ * @param comp Comparator for the tree ordering.
+ */
+ void
+ binarySeekPast(const KeyType &key, CompareT comp = CompareT());
+
+ /**
+ * Step iterator forwards until it is at a position with a key
+ * that is greater than the key argument. Original position must
+ * be valid with a key that is less than or equal to the key argument.
+ *
+ * Linear search is performed within each tree node.
+ *
+ * @param key Key to search for
+ * @param comp Comparator for the tree ordering.
+ */
+ void
+ linearSeekPast(const KeyType &key, CompareT comp = CompareT());
+
+ /**
+ * Validate the iterator as a valid iterator or positioned at
+ * end in the tree referenced by rootRef. Validation failure
+ * triggers asserts. This method is for internal debugging use only.
+ *
+ * @param rootRef Reference to root of tree to operate on
+ * @param comp Comparator for the tree ordering.
+ */
+ void
+ validate(BTreeNode::Ref rootRef, CompareT comp = CompareT());
+};
+
+
+/**
+ * Iterator class for write access to B-trees. It contains some helper
+ * methods used by BTreeInserter and BTreeRemover when modifying a tree.
+ */
+template <typename KeyT,
+ typename DataT,
+ typename AggrT = NoAggregated,
+ typename CompareT = std::less<KeyT>,
+ typename TraitsT = BTreeDefaultTraits>
+class BTreeIterator : public BTreeConstIterator<KeyT, DataT, AggrT,
+ CompareT, TraitsT>
+{
+public:
+ typedef BTreeConstIterator<KeyT,
+ DataT,
+ AggrT,
+ CompareT,
+ TraitsT> ParentType;
+ typedef typename ParentType::NodeAllocatorType NodeAllocatorType;
+ typedef typename ParentType::InternalNodeType InternalNodeType;
+ typedef typename ParentType::LeafNodeType LeafNodeType;
+ typedef typename ParentType::InternalNodeTypeRefPair
+ InternalNodeTypeRefPair;
+ typedef typename ParentType::LeafNodeTypeRefPair LeafNodeTypeRefPair;
+ typedef typename ParentType::LeafNodeTempType LeafNodeTempType;
+ typedef typename ParentType::KeyDataType KeyDataType;
+ typedef typename ParentType::KeyType KeyType;
+ typedef typename ParentType::DataType DataType;
+ typedef typename ParentType::PathElement PathElement;
+ template <typename, typename, typename, typename, typename, class>
+ friend class BTreeInserter;
+ template <typename, typename, typename, size_t, size_t, class>
+ friend class BTreeRemoverBase;
+ template <typename, typename, typename, typename, typename, class>
+ friend class BTreeRemover;
+
+ using ParentType::_leaf;
+ using ParentType::_path;
+ using ParentType::_pathSize;
+ using ParentType::_allocator;
+ using ParentType::_leafRoot;
+ using ParentType::_compatLeafNode;
+ using ParentType::end;
+
+ BTreeIterator(BTreeNode::Ref root, const NodeAllocatorType &allocator)
+ : ParentType(root, allocator)
+ {
+ }
+
+ template <class AggrCalcT>
+ BTreeIterator(const KeyDataType *shortArray,
+ uint32_t arraySize,
+ const NodeAllocatorType &allocator,
+ const AggrCalcT &aggrCalc)
+ : ParentType(shortArray, arraySize, allocator, aggrCalc)
+ {
+ }
+
+ BTreeIterator()
+ : ParentType()
+ {
+ }
+
+ BTreeIterator &
+ operator++()
+ {
+ ParentType::operator++();
+ return *this;
+ }
+
+ BTreeIterator &
+ operator--()
+ {
+ ParentType::operator--();
+ return *this;
+ }
+
+ NodeAllocatorType &
+ getAllocator(void) const
+ {
+ return const_cast<NodeAllocatorType &>(*_allocator);
+ }
+
+ BTreeNode::Ref
+ moveFirstLeafNode(BTreeNode::Ref rootRef);
+
+ void
+ moveNextLeafNode();
+
+ void
+ writeData(const DataType &data)
+ {
+ _leaf.getWNode()->writeData(_leaf.getIdx(), data);
+ }
+
+ /**
+ * Set a new key for the current iterator position.
+ * The new key must have the same semantic meaning as the old key.
+ * Typically used when compacting data store containing keys.
+ */
+ void
+ writeKey(const KeyType &key);
+
+ /**
+ * Updata data at the current iterator position. The tree should
+ * have been thawed.
+ *
+ * @param data New data value
+ * @param aggrCalc Calculator for updating aggregated information.
+ */
+ template <class AggrCalcT>
+ void
+ updateData(const DataType &data, const AggrCalcT &aggrCalc);
+
+ /**
+ * Thaw a path from the root node down the the current leaf node in
+ * the current tree, allowing for updates to be performed without
+ * disturbing the frozen version of the tree.
+ */
+ BTreeNode::Ref
+ thaw(BTreeNode::Ref rootRef);
+
+private:
+ /* Insert into empty tree */
+ template <class AggrCalcT>
+ BTreeNode::Ref
+ insertFirst(const KeyType &key, const DataType &data,
+ const AggrCalcT &aggrCalc);
+
+ LeafNodeType *
+ getLeafNode(void) const
+ {
+ return _leaf.getWNode();
+ }
+
+ bool
+ setLeafNodeIdx(uint32_t idx, const LeafNodeType *splitLeafNode);
+
+ void
+ setLeafNodeIdx(uint32_t idx)
+ {
+ _leaf.setIdx(idx);
+ }
+
+ uint32_t
+ getLeafNodeIdx(void) const
+ {
+ return _leaf.getIdx();
+ }
+
+ uint32_t
+ getPathSize(void) const
+ {
+ return _pathSize;
+ }
+
+ PathElement &
+ getPath(uint32_t pidx)
+ {
+ return _path[pidx];
+ }
+
+ template <class AggrCalcT>
+ BTreeNode::Ref
+ addLevel(BTreeNode::Ref rootRef, BTreeNode::Ref splitNodeRef,
+ bool inRightSplit, const AggrCalcT &aggrCalc);
+
+ BTreeNode::Ref
+ removeLevel(BTreeNode::Ref rootRef, InternalNodeType *rootNode);
+
+ void
+ removeLast(BTreeNode::Ref rootRef);
+
+ void
+ adjustSteal(uint32_t level, bool leftVictimKilled, uint32_t stolen)
+ {
+ assert(_pathSize > level);
+ if (leftVictimKilled) {
+ _path[level].adjustLeftVictimKilled();
+ }
+ if (stolen != 0) {
+ if (level > 0)
+ _path[level - 1].adjustSteal(stolen);
+ else
+ _leaf.adjustSteal(stolen);
+ }
+ }
+};
+
+
+extern template class BTreeIteratorBase<uint32_t, uint32_t, NoAggregated>;
+extern template class BTreeIteratorBase<uint32_t, BTreeNoLeafData,
+ NoAggregated>;
+extern template class BTreeIteratorBase<uint32_t, int32_t,
+ MinMaxAggregated>;
+extern template class BTreeConstIterator<uint32_t, uint32_t, NoAggregated>;
+extern template class BTreeConstIterator<uint32_t, BTreeNoLeafData,
+ NoAggregated>;
+extern template class BTreeConstIterator<uint32_t, int32_t,
+ MinMaxAggregated>;
+extern template class BTreeIterator<uint32_t, uint32_t, NoAggregated>;
+extern template class BTreeIterator<uint32_t, BTreeNoLeafData, NoAggregated>;
+extern template class BTreeIterator<uint32_t, int32_t, MinMaxAggregated>;
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp b/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp
new file mode 100644
index 00000000000..d9439f94bfb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp
@@ -0,0 +1,1330 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreeiterator.h"
+#include "btreeaggregator.h"
+#include <vespa/vespalib/stllike/asciistream.h>
+
+namespace search {
+namespace btree {
+
+#define STRICT_BTREE_ITERATOR_SEEK
+
+namespace {
+
+template <typename KeyT>
+vespalib::string
+keyToStr(const KeyT & key)
+{
+ vespalib::asciistream ss;
+ ss << key;
+ return ss.str();
+}
+
+}
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+BTreeIteratorBase(const BTreeIteratorBase &other)
+ : _leaf(other._leaf),
+ _pathSize(other._pathSize),
+ _allocator(other._allocator),
+ _leafRoot(other._leafRoot),
+ _compatLeafNode()
+{
+ for (size_t i = 0; i < _pathSize; ++i) {
+ _path[i] = other._path[i];
+ }
+ if (other._compatLeafNode.get()) {
+ _compatLeafNode.reset( new LeafNodeTempType(*other._compatLeafNode));
+ }
+ if (other._leaf.getNode() == other._compatLeafNode.get()) {
+ _leaf.setNode(_compatLeafNode.get());
+ }
+ if (other._leafRoot == other._compatLeafNode.get()) {
+ _leafRoot = _compatLeafNode.get();
+ }
+}
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+void
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+swap(BTreeIteratorBase & other)
+{
+ std::swap(_leaf, other._leaf);
+ std::swap(_pathSize, other._pathSize);
+ std::swap(_path, other._path);
+ std::swap(_allocator, other._allocator);
+ std::swap(_leafRoot, other._leafRoot);
+ std::swap(_compatLeafNode, other._compatLeafNode);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+void
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+clearPath(uint32_t pathSize)
+{
+ uint32_t level = _pathSize;
+ while (level > pathSize) {
+ --level;
+ _path[level].setNodeAndIdx(NULL, 0u);
+ }
+ _pathSize = pathSize;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE> &
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+operator=(const BTreeIteratorBase &other)
+{
+ if (&other == this) {
+ return *this;
+ }
+ BTreeIteratorBase tmp(other);
+ swap(tmp);
+ return *this;
+}
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+~BTreeIteratorBase()
+{
+}
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+void
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+setupEnd(void)
+{
+ _leaf.setNodeAndIdx(NULL, 0u);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+void
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+setupEmpty(void)
+{
+ clearPath(0u);
+ _leaf.setNodeAndIdx(NULL, 0u);
+ _leafRoot = NULL;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+void
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+end(void)
+{
+ if (_pathSize == 0) {
+ if (_leafRoot == NULL)
+ return;
+ _leaf.setNodeAndIdx(NULL, 0u);
+ return;
+ }
+ uint32_t level = _pathSize - 1;
+ PathElement &pe = _path[level];
+ const InternalNodeType *inode = pe.getNode();
+ uint32_t idx = inode->validSlots();
+ pe.setIdx(idx);
+ BTreeNode::Ref childRef = inode->getChild(idx - 1);
+ while (level > 0) {
+ --level;
+ assert(!_allocator->isLeafRef(childRef));
+ inode = _allocator->mapInternalRef(childRef);
+ idx = inode->validSlots();
+ _path[level].setNodeAndIdx(inode, idx);
+ childRef = inode->getChild(idx - 1);
+ assert(childRef.valid());
+ }
+ assert(_allocator->isLeafRef(childRef));
+ _leaf.setNodeAndIdx(NULL, 0u);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+void
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+end(BTreeNode::Ref rootRef)
+{
+ if (!rootRef.valid()) {
+ setupEmpty();
+ return;
+ }
+ if (_allocator->isLeafRef(rootRef)) {
+ clearPath(0u);
+ const LeafNodeType *lnode = _allocator->mapLeafRef(rootRef);
+ _leafRoot = lnode;
+ _leaf.setNodeAndIdx(NULL, 0u);
+ return;
+ }
+ _leafRoot = NULL;
+ const InternalNodeType *inode = _allocator->mapInternalRef(rootRef);
+ uint32_t idx = inode->validSlots();
+ uint32_t pidx = inode->getLevel();
+ clearPath(pidx);
+ --pidx;
+ assert(pidx < PATH_SIZE);
+ _path[pidx].setNodeAndIdx(inode, idx);
+ BTreeNode::Ref childRef = inode->getChild(idx - 1);
+ assert(childRef.valid());
+ while (pidx != 0) {
+ --pidx;
+ inode = _allocator->mapInternalRef(childRef);
+ idx = inode->validSlots();
+ assert(idx > 0u);
+ _path[pidx].setNodeAndIdx(inode, idx);
+ childRef = inode->getChild(idx - 1);
+ assert(childRef.valid());
+ }
+ _leaf.setNodeAndIdx(NULL, 0u);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+void
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+findNextLeafNode()
+{
+ uint32_t pidx;
+ for (pidx = 0; pidx < _pathSize; ++pidx) {
+ PathElement & elem = _path[pidx];
+ const InternalNodeType * inode = elem.getNode();
+ elem.incIdx(); // advance to the next child
+ if (elem.getIdx() < inode->validSlots()) {
+ BTreeNode::Ref node = inode->getChild(elem.getIdx());
+ while (pidx > 0) {
+ // find the first leaf node under this child and update path
+ inode = _allocator->mapInternalRef(node);
+ pidx--;
+ _path[pidx].setNodeAndIdx(inode, 0u);
+ node = inode->getChild(0);
+ }
+ _leaf.setNodeAndIdx(_allocator->mapLeafRef(node), 0u);
+ return;
+ }
+ }
+ _leaf.setNodeAndIdx(NULL, 0u);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+void
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+findPrevLeafNode()
+{
+ uint32_t pidx;
+ for (pidx = 0; pidx < _pathSize; ++pidx) {
+ PathElement & elem = _path[pidx];
+ const InternalNodeType * inode = elem.getNode();
+ if (elem.getIdx() > 0u) {
+ elem.decIdx(); // advance to the previous child
+ BTreeNode::Ref node = inode->getChild(elem.getIdx());
+ while (pidx > 0) {
+ // find the last leaf node under this child and update path
+ inode = _allocator->mapInternalRef(node);
+ uint16_t slot = inode->validSlots() - 1;
+ pidx--;
+ _path[pidx].setNodeAndIdx(inode, slot);
+ node = inode->getChild(slot);
+ }
+ const LeafNodeType *lnode(_allocator->mapLeafRef(node));
+ _leaf.setNodeAndIdx(lnode, lnode->validSlots() - 1);
+ return;
+ }
+ }
+ // XXX: position wraps around for now, to end of list.
+ end();
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+void
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+begin(void)
+{
+ uint32_t pidx = _pathSize;
+ if (pidx > 0u) {
+ --pidx;
+ PathElement &elem = _path[pidx];
+ elem.setIdx(0);
+ BTreeNode::Ref node = elem.getNode()->getChild(0);
+ while (pidx > 0) {
+ // find the first leaf node under this child and update path
+ const InternalNodeType * inode = _allocator->mapInternalRef(node);
+ pidx--;
+ _path[pidx].setNodeAndIdx(inode, 0u);
+ node = inode->getChild(0);
+ }
+ _leaf.setNodeAndIdx(_allocator->mapLeafRef(node), 0u);
+ } else {
+ _leaf.setNodeAndIdx(_leafRoot, 0u);
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+void
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+begin(BTreeNode::Ref rootRef)
+{
+ if (!rootRef.valid()) {
+ setupEmpty();
+ return;
+ }
+ if (_allocator->isLeafRef(rootRef)) {
+ clearPath(0u);
+ const LeafNodeType *lnode = _allocator->mapLeafRef(rootRef);
+ _leafRoot = lnode;
+ _leaf.setNodeAndIdx(lnode, 0u);
+ return;
+ }
+ _leafRoot = NULL;
+ const InternalNodeType *inode = _allocator->mapInternalRef(rootRef);
+ uint32_t pidx = inode->getLevel();
+ clearPath(pidx);
+ --pidx;
+ assert(pidx < PATH_SIZE);
+ _path[pidx].setNodeAndIdx(inode, 0);
+ BTreeNode::Ref childRef = inode->getChild(0);
+ assert(childRef.valid());
+ while (pidx != 0) {
+ --pidx;
+ inode = _allocator->mapInternalRef(childRef);
+ _path[pidx].setNodeAndIdx(inode, 0);
+ childRef = inode->getChild(0);
+ assert(childRef.valid());
+ }
+ _leaf.setNodeAndIdx(_allocator->mapLeafRef(childRef), 0u);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+void
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+rbegin(void)
+{
+ uint32_t pidx = _pathSize;
+ if (pidx > 0u) {
+ --pidx;
+ PathElement &elem = _path[pidx];
+ const InternalNodeType * inode = elem.getNode();
+ uint16_t slot = inode->validSlots() - 1;
+ elem.setIdx(slot);
+ BTreeNode::Ref node = inode->getChild(slot);
+ while (pidx > 0) {
+ // find the last leaf node under this child and update path
+ inode = _allocator->mapInternalRef(node);
+ slot = inode->validSlots() - 1;
+ pidx--;
+ _path[pidx].setNodeAndIdx(inode, slot);
+ node = inode->getChild(slot);
+ }
+ const LeafNodeType *lnode(_allocator->mapLeafRef(node));
+ _leaf.setNodeAndIdx(lnode, lnode->validSlots() - 1);
+ } else {
+ _leaf.setNodeAndIdx(_leafRoot,
+ (_leafRoot != NULL) ?
+ _leafRoot->validSlots() - 1 :
+ 0u);
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+const AggrT &
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+getAggregated(void) const
+{
+ // XXX: Undefined behavior if tree is empty.
+ uint32_t pidx = _pathSize;
+ if (pidx > 0u) {
+ return _path[pidx - 1].getNode()->getAggregated();
+ } else if (_leafRoot != NULL) {
+ return _leafRoot->getAggregated();
+ } else {
+ return LeafNodeType::getEmptyAggregated();
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+size_t
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+position(uint32_t levels) const
+{
+ assert(_pathSize >= levels);
+ if (_leaf.getNode() == NULL)
+ return size();
+ size_t res = _leaf.getIdx();
+ if (levels == 0)
+ return res;
+ {
+ const PathElement & elem = _path[0];
+ const InternalNodeType * inode = elem.getNode();
+ uint32_t slots = inode->validSlots();
+ if (elem.getIdx() * 2 > slots) {
+ res += inode->validLeaves();
+ for (uint32_t c = elem.getIdx(); c < slots; ++c) {
+ BTreeNode::Ref node = inode->getChild(c);
+ const LeafNodeType *lnode = _allocator->mapLeafRef(node);
+ res -= lnode->validSlots();
+ }
+ } else {
+ for (uint32_t c = 0; c < elem.getIdx(); ++c) {
+ BTreeNode::Ref node = inode->getChild(c);
+ const LeafNodeType *lnode = _allocator->mapLeafRef(node);
+ res += lnode->validSlots();
+ }
+ }
+ }
+ for (uint32_t pidx = 1; pidx < levels; ++pidx) {
+ const PathElement & elem = _path[pidx];
+ const InternalNodeType * inode = elem.getNode();
+ uint32_t slots = inode->validSlots();
+ if (elem.getIdx() * 2 > slots) {
+ res += inode->validLeaves();
+ for (uint32_t c = elem.getIdx(); c < slots; ++c) {
+ BTreeNode::Ref node = inode->getChild(c);
+ const InternalNodeType *jnode =
+ _allocator->mapInternalRef(node);
+ res -= jnode->validLeaves();
+ }
+ } else {
+ for (uint32_t c = 0; c < elem.getIdx(); ++c) {
+ BTreeNode::Ref node = inode->getChild(c);
+ const InternalNodeType *jnode =
+ _allocator->mapInternalRef(node);
+ res += jnode->validLeaves();
+ }
+ }
+ }
+ return res;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+BTreeIteratorBase(BTreeNode::Ref root,
+ const NodeAllocatorType &allocator)
+ : _leaf(NULL, 0u),
+ _path(),
+ _pathSize(0),
+ _allocator(&allocator),
+ _leafRoot(NULL),
+ _compatLeafNode()
+{
+ begin(root);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+template <class AggrCalcT>
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+BTreeIteratorBase(const KeyDataType *shortArray,
+ uint32_t arraySize,
+ const NodeAllocatorType &allocator,
+ const AggrCalcT &aggrCalc)
+ : _leaf(NULL, 0u),
+ _path(),
+ _pathSize(0),
+ _allocator(&allocator),
+ _leafRoot(NULL),
+ _compatLeafNode()
+{
+ if(arraySize > 0) {
+ _compatLeafNode.reset(new LeafNodeTempType(shortArray, arraySize));
+ _leaf.setNode(_compatLeafNode.get());
+ _leafRoot = _leaf.getNode();
+ typedef BTreeAggregator<KeyT, DataT, AggrT,
+ INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT> Aggregator;
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(const_cast<LeafNodeType &>(*_leaf.getNode()),
+ aggrCalc);
+ }
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+BTreeIteratorBase()
+ : _leaf(NULL, 0u),
+ _path(),
+ _pathSize(0),
+ _allocator(NULL),
+ _leafRoot(NULL),
+ _compatLeafNode()
+{
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE> &
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+operator--()
+{
+ if (_leaf.getNode() == NULL) {
+ rbegin();
+ return *this;
+ }
+ if (_leaf.getIdx() > 0u) {
+ _leaf.decIdx();
+ return *this;
+ }
+ findPrevLeafNode();
+ return *this;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+size_t
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+size() const
+{
+ if (_pathSize > 0) {
+ return _path[_pathSize - 1].getNode()->validLeaves();
+ }
+ if (_leafRoot != nullptr) {
+ return _leafRoot->validSlots();
+ }
+ return 0u;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+ssize_t
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+operator-(const BTreeIteratorBase &rhs) const
+{
+ if (_leaf.getNode() == NULL) {
+ if (rhs._leaf.getNode() == NULL)
+ return 0;
+ // *this might not be normalized (i.e. default constructor)
+ return rhs.size() - rhs.position(rhs._pathSize);
+ } else if (rhs._leaf.getNode() == NULL) {
+ // rhs might not be normalized (i.e. default constructor)
+ return position(_pathSize) - size();
+ }
+ assert(_pathSize == rhs._pathSize);
+ if (_pathSize != 0) {
+ uint32_t pidx = _pathSize;
+ while (pidx > 0) {
+ assert(_path[pidx - 1].getNode() == rhs._path[pidx - 1].getNode());
+ if (_path[pidx - 1].getIdx() != rhs._path[pidx - 1].getIdx())
+ break;
+ --pidx;
+ }
+ return position(pidx) - rhs.position(pidx);
+ } else {
+ assert(_leaf.getNode() == NULL || rhs._leaf.getNode() == NULL ||
+ _leaf.getNode() == rhs._leaf.getNode());
+ return position(0) - rhs.position(0);
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t INTERNAL_SLOTS, uint32_t LEAF_SLOTS, uint32_t PATH_SIZE>
+bool
+BTreeIteratorBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, PATH_SIZE>::
+identical(const BTreeIteratorBase &rhs) const
+{
+ if (_pathSize != rhs._pathSize || _leaf != rhs._leaf) {
+ abort();
+ return false;
+ }
+ for (uint32_t level = 0; level < _pathSize; ++level) {
+ if (_path[level] != rhs._path[level]) {
+ abort();
+ return false;
+ }
+ }
+ if (_leafRoot != rhs._leafRoot) {
+ abort();
+ return false;
+ }
+ return true;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+void
+BTreeConstIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+lower_bound(const KeyType & key, CompareT comp)
+{
+ if (_pathSize == 0) {
+ if (_leafRoot == NULL)
+ return;
+ uint32_t idx = _leafRoot->template lower_bound<CompareT>(key, comp);
+ if (idx >= _leafRoot->validSlots()) {
+ _leaf.setNodeAndIdx(NULL, 0u);
+ } else {
+ _leaf.setNodeAndIdx(_leafRoot, idx);
+ }
+ return;
+ }
+ uint32_t level = _pathSize - 1;
+ PathElement &pe = _path[level];
+ const InternalNodeType *inode = pe.getNode();
+ uint32_t idx = inode->template lower_bound<CompareT>(key, comp);
+ if (__builtin_expect(idx >= inode->validSlots(), false)) {
+ end();
+ return;
+ }
+ pe.setIdx(idx);
+ BTreeNode::Ref childRef = inode->getChild(idx);
+ while (level > 0) {
+ --level;
+ assert(!_allocator->isLeafRef(childRef));
+ inode = _allocator->mapInternalRef(childRef);
+ idx = inode->template lower_bound<CompareT>(key, comp);
+ assert(idx < inode->validSlots());
+ _path[level].setNodeAndIdx(inode, idx);
+ childRef = inode->getChild(idx);
+ assert(childRef.valid());
+ }
+ assert(_allocator->isLeafRef(childRef));
+ const LeafNodeType *lnode = _allocator->mapLeafRef(childRef);
+ idx = lnode->template lower_bound<CompareT>(key, comp);
+ assert(idx < lnode->validSlots());
+ _leaf.setNodeAndIdx(lnode, idx);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+void
+BTreeConstIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+lower_bound(BTreeNode::Ref rootRef, const KeyType & key, CompareT comp)
+{
+ if (!rootRef.valid()) {
+ setupEmpty();
+ return;
+ }
+ if (_allocator->isLeafRef(rootRef)) {
+ clearPath(0u);
+ const LeafNodeType *lnode = _allocator->mapLeafRef(rootRef);
+ _leafRoot = lnode;
+ uint32_t idx = lnode->template lower_bound<CompareT>(key, comp);
+ if (idx >= lnode->validSlots()) {
+ _leaf.setNodeAndIdx(NULL, 0u);
+ } else {
+ _leaf.setNodeAndIdx(lnode, idx);
+ }
+ return;
+ }
+ _leafRoot = NULL;
+ const InternalNodeType *inode = _allocator->mapInternalRef(rootRef);
+ uint32_t idx = inode->template lower_bound<CompareT>(key, comp);
+ if (idx >= inode->validSlots()) {
+ end(rootRef);
+ return;
+ }
+ uint32_t pidx = inode->getLevel();
+ clearPath(pidx);
+ --pidx;
+ assert(pidx < TraitsT::PATH_SIZE);
+ _path[pidx].setNodeAndIdx(inode, idx);
+ BTreeNode::Ref childRef = inode->getChild(idx);
+ assert(childRef.valid());
+ while (pidx != 0) {
+ --pidx;
+ inode = _allocator->mapInternalRef(childRef);
+ idx = inode->template lower_bound<CompareT>(key, comp);
+ assert(idx < inode->validSlots());
+ _path[pidx].setNodeAndIdx(inode, idx);
+ childRef = inode->getChild(idx);
+ assert(childRef.valid());
+ }
+ const LeafNodeType *lnode = _allocator->mapLeafRef(childRef);
+ idx = lnode->template lower_bound<CompareT>(key, comp);
+ assert(idx < lnode->validSlots());
+ _leaf.setNodeAndIdx(lnode, idx);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+void
+BTreeConstIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+seek(const KeyType & key, CompareT comp)
+{
+ if (TraitsT::BINARY_SEEK) {
+ binarySeek(key, comp);
+ } else {
+ linearSeek(key, comp);
+ }
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+void
+BTreeConstIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+binarySeek(const KeyType & key, CompareT comp)
+{
+ const LeafNodeType *lnode = _leaf.getNode();
+ uint32_t lidx = _leaf.getIdx();
+#ifdef STRICT_BTREE_ITERATOR_SEEK
+ assert(_leaf.valid() && comp(lnode->getKey(lidx), key));
+#endif
+ ++lidx;
+ if (lidx < lnode->validSlots()) {
+ if (!comp(lnode->getKey(lidx), key)) {
+ _leaf.setIdx(lidx);
+ return;
+ } else {
+ ++lidx;
+ }
+ }
+ if (comp(lnode->getLastKey(), key)) {
+ uint32_t level = 0;
+ uint32_t levels = _pathSize;
+ while (level < levels &&
+ comp(_path[level].getNode()->getLastKey(), key))
+ ++level;
+ if (__builtin_expect(level >= levels, false)) {
+ end();
+ return;
+ } else {
+ const InternalNodeType *node = _path[level].getNode();
+ uint32_t idx = _path[level].getIdx();
+ idx = node->template lower_bound<CompareT>(idx + 1, key, comp);
+ _path[level].setIdx(idx);
+ while (level > 0) {
+ --level;
+ node = _allocator->mapInternalRef(node->getChild(idx));
+ idx = node->template lower_bound<CompareT>(0, key, comp);
+ _path[level].setNodeAndIdx(node, idx);
+ }
+ lnode = _allocator->mapLeafRef(node->getChild(idx));
+ _leaf.setNode(lnode);
+ lidx = 0;
+ }
+ }
+ lidx = lnode->template lower_bound<CompareT>(lidx, key, comp);
+ _leaf.setIdx(lidx);
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+void
+BTreeConstIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+linearSeek(const KeyType & key, CompareT comp)
+{
+ const LeafNodeType *lnode = _leaf.getNode();
+ uint32_t lidx = _leaf.getIdx();
+#ifdef STRICT_BTREE_ITERATOR_SEEK
+ assert(_leaf.valid() && comp(lnode->getKey(lidx), key));
+#endif
+ ++lidx;
+ if (lidx < lnode->validSlots()) {
+ if (!comp(lnode->getKey(lidx), key)) {
+ _leaf.setIdx(lidx);
+ return;
+ } else {
+ ++lidx;
+ }
+ }
+ if (comp(lnode->getLastKey(), key)) {
+ uint32_t level = 0;
+ uint32_t levels = _pathSize;
+ while (level < levels &&
+ comp(_path[level].getNode()->getLastKey(), key))
+ ++level;
+ if (__builtin_expect(level >= levels, false)) {
+ end();
+ return;
+ } else {
+ const InternalNodeType *node = _path[level].getNode();
+ uint32_t idx = _path[level].getIdx();
+ do {
+ ++idx;
+ } while (comp(node->getKey(idx), key));
+ _path[level].setIdx(idx);
+ while (level > 0) {
+ --level;
+ node = _allocator->mapInternalRef(node->getChild(idx));
+ idx = 0;
+ while (comp(node->getKey(idx), key)) {
+ ++idx;
+ }
+ _path[level].setNodeAndIdx(node, idx);
+ }
+ lnode = _allocator->mapLeafRef(node->getChild(idx));
+ _leaf.setNode(lnode);
+ lidx = 0;
+ }
+ }
+ while (comp(lnode->getKey(lidx), key)) {
+ ++lidx;
+ }
+ _leaf.setIdx(lidx);
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+void
+BTreeConstIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+seekPast(const KeyType & key, CompareT comp)
+{
+ if (TraitsT::BINARY_SEEK) {
+ binarySeekPast(key, comp);
+ } else {
+ linearSeekPast(key, comp);
+ }
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+void
+BTreeConstIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+binarySeekPast(const KeyType & key, CompareT comp)
+{
+ const LeafNodeType *lnode = _leaf.getNode();
+ uint32_t lidx = _leaf.getIdx();
+#ifdef STRICT_BTREE_ITERATOR_SEEK
+ assert(_leaf.valid() && !comp(key, lnode->getKey(lidx)));
+#endif
+ ++lidx;
+ if (lidx < lnode->validSlots()) {
+ if (comp(key, lnode->getKey(lidx))) {
+ _leaf.setIdx(lidx);
+ return;
+ } else {
+ ++lidx;
+ }
+ }
+ if (!comp(key, lnode->getLastKey())) {
+ uint32_t level = 0;
+ uint32_t levels = _pathSize;
+ while (level < levels &&
+ !comp(key, _path[level].getNode()->getLastKey()))
+ ++level;
+ if (__builtin_expect(level >= levels, false)) {
+ end();
+ return;
+ } else {
+ const InternalNodeType *node = _path[level].getNode();
+ uint32_t idx = _path[level].getIdx();
+ idx = node->template upper_bound<CompareT>(idx + 1, key, comp);
+ _path[level].setIdx(idx);
+ while (level > 0) {
+ --level;
+ node = _allocator->mapInternalRef(node->getChild(idx));
+ idx = node->template upper_bound<CompareT>(0, key, comp);
+ _path[level].setNodeAndIdx(node, idx);
+ }
+ lnode = _allocator->mapLeafRef(node->getChild(idx));
+ _leaf.setNode(lnode);
+ lidx = 0;
+ }
+ }
+ lidx = lnode->template upper_bound<CompareT>(lidx, key, comp);
+ _leaf.setIdx(lidx);
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+void
+BTreeConstIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+linearSeekPast(const KeyType & key, CompareT comp)
+{
+ const LeafNodeType *lnode = _leaf.getNode();
+ uint32_t lidx = _leaf.getIdx();
+#ifdef STRICT_BTREE_ITERATOR_SEEK
+ assert(_leaf.valid() && !comp(key, lnode->getKey(lidx)));
+#endif
+ ++lidx;
+ if (lidx < lnode->validSlots()) {
+ if (comp(key, lnode->getKey(lidx))) {
+ _leaf.setIdx(lidx);
+ return;
+ } else {
+ ++lidx;
+ }
+ }
+ if (!comp(key, lnode->getLastKey())) {
+ uint32_t level = 0;
+ uint32_t levels = _pathSize;
+ while (level < levels &&
+ !comp(key, _path[level].getNode()->getLastKey()))
+ ++level;
+ if (__builtin_expect(level >= levels, false)) {
+ end();
+ return;
+ } else {
+ const InternalNodeType *node = _path[level].getNode();
+ uint32_t idx = _path[level].getIdx();
+ do {
+ ++idx;
+ } while (!comp(key, node->getKey(idx)));
+ _path[level].setIdx(idx);
+ while (level > 0) {
+ --level;
+ node = _allocator->mapInternalRef(node->getChild(idx));
+ idx = 0;
+ while (!comp(key, node->getKey(idx))) {
+ ++idx;
+ }
+ _path[level].setNodeAndIdx(node, idx);
+ }
+ lnode = _allocator->mapLeafRef(node->getChild(idx));
+ _leaf.setNode(lnode);
+ lidx = 0;
+ }
+ }
+ while (!comp(key, lnode->getKey(lidx))) {
+ ++lidx;
+ }
+ _leaf.setIdx(lidx);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+void
+BTreeConstIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+validate(BTreeNode::Ref rootRef, CompareT comp)
+{
+ bool frozen = false;
+ if (!rootRef.valid()) {
+ assert(_pathSize == 0u);
+ assert(_leafRoot == NULL);
+ assert(_leaf.getNode() == NULL);
+ return;
+ }
+ uint32_t level = _pathSize;
+ BTreeNode::Ref nodeRef = rootRef;
+ const KeyT *parentKey = NULL;
+ const KeyT *leafKey = NULL;
+ if (_leaf.getNode() != NULL) {
+ leafKey = &_leaf.getNode()->getKey(_leaf.getIdx());
+ }
+ while (level > 0) {
+ --level;
+ assert(!_allocator->isLeafRef(nodeRef));
+ const PathElement &pe = _path[level];
+ assert(pe.getNode() == _allocator->mapInternalRef(nodeRef));
+ uint32_t idx = pe.getIdx();
+ if (leafKey == NULL) {
+ assert(idx == 0 ||
+ idx == pe.getNode()->validSlots());
+ if (idx == pe.getNode()->validSlots())
+ --idx;
+ }
+ assert(idx < pe.getNode()->validSlots());
+ assert(!frozen || pe.getNode()->getFrozen());
+ (void) frozen;
+ frozen = pe.getNode()->getFrozen();
+ if (parentKey != NULL) {
+ assert(idx + 1 == pe.getNode()->validSlots() ||
+ comp(pe.getNode()->getKey(idx), *parentKey));
+ assert(!comp(*parentKey, pe.getNode()->getKey(idx)));
+ (void) comp;
+ }
+ if (leafKey != NULL) {
+ assert(idx == 0 ||
+ comp(pe.getNode()->getKey(idx - 1), *leafKey));
+ assert(idx + 1 == pe.getNode()->validSlots() ||
+ comp(*leafKey, pe.getNode()->getKey(idx + 1)));
+ assert(!comp(pe.getNode()->getKey(idx), *leafKey));
+ (void) comp;
+ }
+ parentKey = &pe.getNode()->getKey(idx);
+ nodeRef = pe.getNode()->getChild(idx);
+ assert(nodeRef.valid());
+ }
+ assert(_allocator->isLeafRef(nodeRef));
+ if (_pathSize == 0) {
+ assert(_leafRoot == _allocator->mapLeafRef(nodeRef));
+ assert(_leaf.getNode() == NULL || _leaf.getNode() == _leafRoot);
+ } else {
+ assert(_leafRoot == NULL);
+ assert(_leaf.getNode() == _allocator->mapLeafRef(nodeRef) ||
+ _leaf.getNode() == NULL);
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+BTreeNode::Ref
+BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+moveFirstLeafNode(BTreeNode::Ref rootRef)
+{
+ if (!NodeAllocatorType::isValidRef(rootRef)) {
+ assert(_pathSize == 0);
+ assert(_leaf.getNode() == NULL);
+ return rootRef;
+ }
+
+ assert(_leaf.getNode() != NULL);
+ NodeAllocatorType &allocator = getAllocator();
+
+ if (_pathSize == 0) {
+ BTreeNode::Ref newRootRef = rootRef;
+ assert(_leaf.getNode() == allocator.mapLeafRef(rootRef));
+ if (allocator.getCompacting(rootRef)) {
+ LeafNodeTypeRefPair lPair(allocator.moveLeafNode(_leaf.getNode()));
+ _leaf.setNode(lPair.second);
+ // Before updating root
+ std::atomic_thread_fence(std::memory_order_release);
+ newRootRef = lPair.first;
+ }
+ _leaf.setIdx(_leaf.getNode()->validSlots() - 1);
+ return newRootRef;
+ }
+
+ uint32_t level = _pathSize;
+ BTreeNode::Ref newRootRef = rootRef;
+
+ --level;
+ InternalNodeType *node = _path[level].getWNode();
+ assert(node == allocator.mapInternalRef(rootRef));
+ bool moved = allocator.getCompacting(rootRef);
+ if (moved) {
+ InternalNodeTypeRefPair iPair(allocator.moveInternalNode(node));
+ newRootRef = iPair.first;
+ node = iPair.second;
+ }
+ _path[level].setNodeAndIdx(node, 0u);
+ while (level > 0) {
+ --level;
+ EntryRef nodeRef = node->getChild(0);
+ InternalNodeType *pnode = node;
+ node = allocator.mapInternalRef(nodeRef);
+ if (allocator.getCompacting(nodeRef)) {
+ InternalNodeTypeRefPair iPair = allocator.moveInternalNode(node);
+ nodeRef = iPair.first;
+ node = iPair.second;
+ pnode->setChild(0, nodeRef);
+ moved = true;
+ }
+ _path[level].setNodeAndIdx(node, 0u);
+ }
+ EntryRef nodeRef = node->getChild(0);
+ _leaf.setNode(allocator.mapLeafRef(nodeRef));
+ if (allocator.getCompacting(nodeRef)) {
+ LeafNodeTypeRefPair
+ lPair(allocator.moveLeafNode(_leaf.getNode()));
+ _leaf.setNode(lPair.second);
+ node->setChild(0, lPair.first);
+ moved = true;
+ }
+ if (moved) {
+ // Before updating root
+ std::atomic_thread_fence(std::memory_order_release);
+ }
+ _leaf.setIdx(_leaf.getNode()->validSlots() - 1);
+ return newRootRef;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+void
+BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+moveNextLeafNode()
+{
+ uint32_t level = 0;
+ uint32_t levels = _pathSize;
+ while (level < levels &&
+ _path[level].getNode()->validSlots() <= _path[level].getIdx() + 1)
+ ++level;
+ if (__builtin_expect(level >= levels, false)) {
+ end();
+ return;
+ } else {
+ NodeAllocatorType &allocator = getAllocator();
+ InternalNodeType *node = _path[level].getWNode();
+ uint32_t idx = _path[level].getIdx();
+ ++idx;
+ _path[level].setIdx(idx);
+ while (level > 0) {
+ --level;
+ EntryRef nodeRef = node->getChild(idx);
+ InternalNodeType *pnode = node;
+ node = allocator.mapInternalRef(nodeRef);
+ if (allocator.getCompacting(nodeRef)) {
+ InternalNodeTypeRefPair iPair(allocator.moveInternalNode(node));
+ nodeRef = iPair.first;
+ node = iPair.second;
+ std::atomic_thread_fence(std::memory_order_release);
+ pnode->setChild(idx, nodeRef);
+ }
+ idx = 0;
+ _path[level].setNodeAndIdx(node, idx);
+ }
+ EntryRef nodeRef = node->getChild(idx);
+ _leaf.setNode(allocator.mapLeafRef(nodeRef));
+ if (allocator.getCompacting(nodeRef)) {
+ LeafNodeTypeRefPair lPair(allocator.moveLeafNode(_leaf.getNode()));
+ _leaf.setNode(lPair.second);
+ std::atomic_thread_fence(std::memory_order_release);
+ node->setChild(idx, lPair.first);
+ }
+ _leaf.setIdx(_leaf.getNode()->validSlots() - 1);
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+void
+BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+writeKey(const KeyType & key)
+{
+ LeafNodeType * lnode = getLeafNode();
+ lnode->writeKey(_leaf.getIdx(), key);
+ // must also update the key towards the root as long as the key is
+ // the last one in the current node
+ if (_leaf.getIdx() + 1 == lnode->validSlots()) {
+ for (uint32_t i = 0; i < _pathSize; ++i) {
+ const PathElement & pe = _path[i];
+ InternalNodeType *inode = pe.getWNode();
+ uint32_t childIdx = pe.getIdx();
+ inode->writeKey(childIdx, key);
+ if (childIdx + 1 != inode->validSlots()) {
+ break;
+ }
+ }
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+template <class AggrCalcT>
+void
+BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+updateData(const DataType & data, const AggrCalcT &aggrCalc)
+{
+ LeafNodeType * lnode = getLeafNode();
+ if (AggrCalcT::hasAggregated()) {
+ AggrT oldca(lnode->getAggregated());
+ typedef BTreeAggregator<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS,
+ AggrCalcT> Aggregator;
+ if (aggrCalc.update(lnode->getAggregated(),
+ aggrCalc.getVal(lnode->getData(_leaf.getIdx())),
+ aggrCalc.getVal(data))) {
+ lnode->writeData(_leaf.getIdx(), data);
+ Aggregator::recalc(*lnode, aggrCalc);
+ } else {
+ lnode->writeData(_leaf.getIdx(), data);
+ }
+ AggrT ca(lnode->getAggregated());
+ // must also update aggregated values towards the root.
+ for (uint32_t i = 0; i < _pathSize; ++i) {
+ const PathElement & pe = _path[i];
+ InternalNodeType * inode = pe.getWNode();
+ AggrT oldpa(inode->getAggregated());
+ if (aggrCalc.update(inode->getAggregated(),
+ oldca, ca)) {
+ Aggregator::recalc(*inode, *_allocator, aggrCalc);
+ }
+ AggrT pa(inode->getAggregated());
+ oldca = oldpa;
+ ca = pa;
+ }
+ } else {
+ lnode->writeData(_leaf.getIdx(), data);
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+BTreeNode::Ref
+BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+thaw(BTreeNode::Ref rootRef)
+{
+ assert(_leaf.getNode() != NULL && _compatLeafNode.get() == NULL);
+ if (!_leaf.getNode()->getFrozen())
+ return rootRef;
+ NodeAllocatorType &allocator = getAllocator();
+ if (_pathSize == 0) {
+ LeafNodeType *leafNode = allocator.mapLeafRef(rootRef);
+ assert(leafNode == _leaf.getNode());
+ assert(leafNode == _leafRoot);
+ LeafNodeTypeRefPair thawedLeaf = allocator.thawNode(rootRef,
+ leafNode);
+ _leaf.setNode(thawedLeaf.second);
+ _leafRoot = thawedLeaf.second;
+ return thawedLeaf.first;
+ }
+ assert(_leafRoot == NULL);
+ assert(_path[_pathSize - 1].getNode() ==
+ allocator.mapInternalRef(rootRef));
+ BTreeNode::Ref childRef(_path[0].getNode()->getChild(_path[0].getIdx()));
+ LeafNodeType *leafNode = allocator.mapLeafRef(childRef);
+ assert(leafNode == _leaf.getNode());
+ LeafNodeTypeRefPair thawedLeaf = allocator.thawNode(childRef,
+ leafNode);
+ _leaf.setNode(thawedLeaf.second);
+ childRef = thawedLeaf.first;
+ uint32_t level = 0;
+ uint32_t levels = _pathSize;
+ while (level < levels) {
+ PathElement &pe = _path[level];
+ InternalNodeType *node(pe.getWNode());
+ BTreeNode::Ref nodeRef = level + 1 < levels ?
+ _path[level + 1].getNode()->
+ getChild(_path[level + 1].getIdx()) :
+ rootRef;
+ assert(node == allocator.mapInternalRef(nodeRef));
+ if (!node->getFrozen()) {
+ node->setChild(pe.getIdx(), childRef);
+ return rootRef;
+ }
+ InternalNodeTypeRefPair thawed = allocator.thawNode(nodeRef, node);
+ node = thawed.second;
+ pe.setNode(node);
+ node->setChild(pe.getIdx(), childRef);
+ childRef = thawed.first;
+ ++level;
+ }
+ return childRef; // Root node was thawed
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+template <class AggrCalcT>
+BTreeNode::Ref
+BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+insertFirst(const KeyType &key, const DataType &data,
+ const AggrCalcT &aggrCalc)
+{
+ assert(_pathSize == 0);
+ assert(_leafRoot == NULL);
+ NodeAllocatorType &allocator = getAllocator();
+ LeafNodeTypeRefPair lnode = allocator.allocLeafNode();
+ lnode.second->insert(0, key, data);
+ if (AggrCalcT::hasAggregated()) {
+ AggrT a;
+ aggrCalc.add(a, aggrCalc.getVal(data));
+ lnode.second->getAggregated() = a;
+ }
+ _leafRoot = lnode.second;
+ _leaf.setNodeAndIdx(lnode.second, 0u);
+ return lnode.first;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+bool
+BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+setLeafNodeIdx(uint32_t idx, const LeafNodeType *splitLeafNode)
+{
+ uint32_t leafSlots = _leaf.getNode()->validSlots();
+ if (idx >= leafSlots) {
+ _leaf.setNodeAndIdx(splitLeafNode,
+ idx - leafSlots);
+ if (_pathSize == 0) {
+ _leafRoot = splitLeafNode;
+ }
+ return true;
+ } else {
+ _leaf.setIdx(idx);
+ return false;
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+template <class AggrCalcT>
+BTreeNode::Ref
+BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+addLevel(BTreeNode::Ref rootRef, BTreeNode::Ref splitNodeRef,
+ bool inRightSplit, const AggrCalcT &aggrCalc)
+{
+ typedef BTreeAggregator<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS,
+ AggrCalcT> Aggregator;
+
+ NodeAllocatorType &allocator(getAllocator());
+
+ InternalNodeTypeRefPair inodePair(allocator.allocInternalNode(_pathSize + 1));
+ InternalNodeType *inode = inodePair.second;
+ inode->setValidLeaves(allocator.validLeaves(rootRef) +
+ allocator.validLeaves(splitNodeRef));
+ inode->insert(0, allocator.getLastKey(rootRef), rootRef);
+ inode->insert(1, allocator.getLastKey(splitNodeRef), splitNodeRef);
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*inode, allocator, aggrCalc);
+ }
+ _path[_pathSize].setNodeAndIdx(inode, inRightSplit ? 1u : 0u);
+ if (_pathSize == 0) {
+ _leafRoot = NULL;
+ }
+ ++_pathSize;
+ return inodePair.first;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+BTreeNode::Ref
+BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+removeLevel(BTreeNode::Ref rootRef, InternalNodeType *rootNode)
+{
+ BTreeNode::Ref newRoot = rootNode->getChild(0);
+ NodeAllocatorType &allocator(getAllocator());
+ allocator.holdNode(rootRef, rootNode);
+ --_pathSize;
+ _path[_pathSize].setNodeAndIdx(NULL, 0u);
+ if (_pathSize == 0) {
+ _leafRoot = _leaf.getNode();
+ }
+ return newRoot;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+void
+BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::
+removeLast(BTreeNode::Ref rootRef)
+{
+ NodeAllocatorType &allocator(getAllocator());
+ allocator.holdNode(rootRef, getLeafNode());
+ _leafRoot = NULL;
+ _leaf.setNode(NULL);
+}
+
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreenode.cpp b/searchlib/src/vespa/searchlib/btree/btreenode.cpp
new file mode 100644
index 00000000000..9d8f6d686d5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreenode.cpp
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "btreenode.h"
+#include "btreenode.hpp"
+
+namespace search {
+namespace btree {
+
+BTreeNoLeafData BTreeNoLeafData::_instance;
+
+NoAggregated BTreeNodeAggregatedWrap<NoAggregated>::_instance;
+template <>
+MinMaxAggregated BTreeNodeAggregatedWrap<MinMaxAggregated>::_instance =
+ MinMaxAggregated();
+
+template class BTreeNodeDataWrap<uint32_t, 16>;
+template class BTreeNodeDataWrap<BTreeNoLeafData, 16>;
+template class BTreeKeyData<uint32_t, uint32_t>;
+template class BTreeKeyData<uint32_t, int32_t>;
+template class BTreeNodeT<uint32_t, 16>;
+template class BTreeNodeTT<uint32_t, uint32_t, NoAggregated, 16>;
+template class BTreeNodeTT<uint32_t, BTreeNoLeafData, NoAggregated, 16>;
+template class BTreeNodeTT<uint32_t, EntryRef, NoAggregated, 16>;
+template class BTreeNodeTT<uint32_t, int32_t, MinMaxAggregated, 16>;
+template class BTreeInternalNode<uint32_t, NoAggregated, 16>;
+template class BTreeInternalNode<uint32_t, MinMaxAggregated, 16>;
+template class BTreeLeafNode<uint32_t, uint32_t, NoAggregated, 16>;
+template class BTreeLeafNode<uint32_t, BTreeNoLeafData, NoAggregated, 16>;
+template class BTreeLeafNode<uint32_t, int32_t, MinMaxAggregated, 16>;
+template class BTreeLeafNodeTemp<uint32_t, uint32_t, NoAggregated, 16>;
+template class BTreeLeafNodeTemp<uint32_t, int32_t, MinMaxAggregated,
+ 16>;
+template class BTreeLeafNodeTemp<uint32_t, BTreeNoLeafData, NoAggregated, 16>;
+
+} // namespace search::btree
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/btree/btreenode.h b/searchlib/src/vespa/searchlib/btree/btreenode.h
new file mode 100644
index 00000000000..c44743fceb0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreenode.h
@@ -0,0 +1,784 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "entryref.h"
+#include <sys/types.h>
+#include <assert.h>
+#include <utility>
+#include "noaggregated.h"
+#include "minmaxaggregated.h"
+
+namespace search {
+namespace btree {
+
+template <typename, typename, typename, size_t, size_t>
+class BTreeNodeAllocator;
+template <typename, typename, typename, size_t, size_t> class BTreeNodeStore;
+template <typename> class BufferType;
+template <typename> class DataStoreT;
+
+class NoAggregated;
+
+class BTreeNode {
+private:
+ uint8_t _level;
+ bool _isFrozen;
+public:
+ static constexpr uint8_t EMPTY_LEVEL = 255;
+ static constexpr uint8_t LEAF_LEVEL = 0;
+protected:
+ uint16_t _validSlots;
+ BTreeNode(uint8_t level)
+ : _level(level),
+ _isFrozen(false),
+ _validSlots(0)
+ {
+ }
+
+ BTreeNode(const BTreeNode &rhs)
+ : _level(rhs._level),
+ _isFrozen(rhs._isFrozen),
+ _validSlots(rhs._validSlots)
+ {
+ }
+
+ BTreeNode &
+ operator=(const BTreeNode &rhs)
+ {
+ assert(!_isFrozen);
+ _level = rhs._level;
+ _isFrozen = rhs._isFrozen;
+ _validSlots = rhs._validSlots;
+ return *this;
+ }
+
+ ~BTreeNode(void)
+ {
+ assert(_isFrozen);
+ }
+
+public:
+ typedef EntryRef Ref;
+
+ bool isLeaf() const { return _level == 0u; }
+
+ bool
+ getFrozen(void) const
+ {
+ return _isFrozen;
+ }
+
+ void
+ freeze(void)
+ {
+ _isFrozen = true;
+ }
+
+ void
+ unFreeze(void)
+ {
+ _isFrozen = false;
+ }
+
+
+ void
+ setLevel(uint8_t level)
+ {
+ _level = level;
+ }
+
+ uint32_t getLevel() const { return _level; }
+
+ uint32_t validSlots() const { return _validSlots; }
+
+ void
+ setValidSlots(uint16_t validSlots_)
+ {
+ _validSlots = validSlots_;
+ }
+};
+
+
+template <class DataT, uint32_t NumSlots>
+class BTreeNodeDataWrap
+{
+public:
+ DataT _data[NumSlots];
+
+ BTreeNodeDataWrap(void)
+ : _data()
+ {
+ }
+
+ void
+ copyData(const BTreeNodeDataWrap &rhs, uint32_t validSlots)
+ {
+ const DataT *rdata = rhs._data;
+ DataT *ldata = _data;
+ DataT *ldatae = _data + validSlots;
+ for (; ldata != ldatae; ++ldata, ++rdata)
+ *ldata = *rdata;
+ }
+
+ const DataT &
+ getData(uint32_t idx) const
+ {
+ return _data[idx];
+ }
+
+ void
+ setData(uint32_t idx, const DataT &data)
+ {
+ _data[idx] = data;
+ }
+
+ static bool
+ hasData(void)
+ {
+ return true;
+ }
+};
+
+
+/**
+ * Empty class to use as DataT template parameter for BTree classes to
+ * indicate that leaf nodes have no data (similar to std::set having less
+ * information than std::map). Use of this class triggers the below
+ * partial specialization of BTreeNodeDataWrap to prevent unneeded
+ * storage overhead.
+ */
+class BTreeNoLeafData
+{
+public:
+ static BTreeNoLeafData _instance;
+};
+
+
+template <uint32_t NumSlots>
+class BTreeNodeDataWrap<BTreeNoLeafData, NumSlots>
+{
+public:
+ BTreeNodeDataWrap(void)
+ {
+ }
+
+ void
+ copyData(const BTreeNodeDataWrap &rhs, uint32_t validSlots)
+ {
+ (void) rhs;
+ (void) validSlots;
+ }
+
+ const BTreeNoLeafData &
+ getData(uint32_t idx) const
+ {
+ (void) idx;
+ return BTreeNoLeafData::_instance;
+ }
+
+ void
+ setData(uint32_t idx, const BTreeNoLeafData &data)
+ {
+ (void) idx;
+ (void) data;
+ }
+
+ static bool
+ hasData(void)
+ {
+ return false;
+ }
+};
+
+
+template <typename KeyT, typename DataT>
+class BTreeKeyData
+{
+public:
+ typedef KeyT KeyType;
+ typedef DataT DataType;
+
+ KeyT _key;
+ DataT _data;
+
+ BTreeKeyData(void)
+ : _key(),
+ _data()
+ {
+ }
+
+ BTreeKeyData(const KeyT &key, const DataT &data)
+ : _key(key),
+ _data(data)
+ {
+ }
+
+ void
+ setData(const DataT &data)
+ {
+ _data = data;
+ }
+
+ const DataT &
+ getData(void) const
+ {
+ return _data;
+ }
+
+ /**
+ * This operator only works when using direct keys. References to
+ * externally stored keys will not be properly sorted.
+ */
+ bool
+ operator<(const BTreeKeyData &rhs) const
+ {
+ return _key < rhs._key;
+ }
+};
+
+
+template <typename KeyT>
+class BTreeKeyData<KeyT, BTreeNoLeafData>
+{
+public:
+ typedef KeyT KeyType;
+ typedef BTreeNoLeafData DataType;
+
+ KeyT _key;
+
+ BTreeKeyData(void)
+ : _key()
+ {
+ }
+
+ BTreeKeyData(const KeyT &key, const BTreeNoLeafData &data)
+ : _key(key)
+ {
+ (void) data;
+ }
+
+ void
+ setData(const BTreeNoLeafData &data)
+ {
+ (void) data;
+ }
+
+ const BTreeNoLeafData &
+ getData(void) const
+ {
+ return BTreeNoLeafData::_instance;
+ }
+
+ /**
+ * This operator only works when using direct keys. References to
+ * externally stored keys will not be properly sorted.
+ */
+ bool
+ operator<(const BTreeKeyData &rhs) const
+ {
+ return _key < rhs._key;
+ }
+};
+
+
+template <typename AggrT>
+class BTreeNodeAggregatedWrap
+{
+ typedef AggrT AggregatedType;
+
+ AggrT _aggr;
+ static AggrT _instance;
+
+public:
+ BTreeNodeAggregatedWrap(void)
+ : _aggr()
+ {
+ }
+
+ AggrT &
+ getAggregated(void)
+ {
+ return _aggr;
+ }
+
+ const AggrT &
+ getAggregated(void) const
+ {
+ return _aggr;
+ }
+
+ static const AggrT &
+ getEmptyAggregated(void)
+ {
+ return _instance;
+ }
+};
+
+
+template <>
+class BTreeNodeAggregatedWrap<NoAggregated>
+{
+ typedef NoAggregated AggregatedType;
+
+ static NoAggregated _instance;
+public:
+ BTreeNodeAggregatedWrap(void)
+ {
+ }
+
+ NoAggregated &
+ getAggregated(void)
+ {
+ return _instance;
+ }
+
+ const NoAggregated &
+ getAggregated(void) const
+ {
+ return _instance;
+ }
+
+ static const NoAggregated &
+ getEmptyAggregated(void)
+ {
+ return _instance;
+ }
+};
+
+
+template <typename KeyT, uint32_t NumSlots>
+class BTreeNodeT : public BTreeNode {
+protected:
+ KeyT _keys[NumSlots];
+ BTreeNodeT(uint8_t level)
+ : BTreeNode(level),
+ _keys()
+ {
+ }
+
+ ~BTreeNodeT(void)
+ {
+ }
+
+ BTreeNodeT(const BTreeNodeT &rhs)
+ : BTreeNode(rhs)
+ {
+ const KeyT *rkeys = rhs._keys;
+ KeyT *lkeys = _keys;
+ KeyT *lkeyse = _keys + _validSlots;
+ for (; lkeys != lkeyse; ++lkeys, ++rkeys)
+ *lkeys = *rkeys;
+ }
+
+ BTreeNodeT &
+ operator=(const BTreeNodeT &rhs)
+ {
+ BTreeNode::operator=(rhs);
+ const KeyT *rkeys = rhs._keys;
+ KeyT *lkeys = _keys;
+ KeyT *lkeyse = _keys + _validSlots;
+ for (; lkeys != lkeyse; ++lkeys, ++rkeys)
+ *lkeys = *rkeys;
+ return *this;
+ }
+
+public:
+ const KeyT & getKey(uint32_t idx) const { return _keys[idx]; }
+ const KeyT & getLastKey() const { return _keys[validSlots() - 1]; }
+ void writeKey(uint32_t idx, const KeyT & key) { _keys[idx] = key; }
+
+ template <typename CompareT>
+ uint32_t
+ lower_bound(uint32_t sidx, const KeyT & key, CompareT comp) const;
+
+ template <typename CompareT>
+ uint32_t
+ lower_bound(const KeyT & key, CompareT comp) const;
+
+ template <typename CompareT>
+ uint32_t
+ upper_bound(uint32_t sidx, const KeyT & key, CompareT comp) const;
+
+ bool isFull() const { return validSlots() == NumSlots; }
+ bool isAtLeastHalfFull() const { return validSlots() >= minSlots(); }
+ static uint32_t maxSlots() { return NumSlots; }
+ static uint32_t minSlots() { return NumSlots / 2; }
+};
+
+template <typename KeyT, typename DataT, typename AggrT, uint32_t NumSlots>
+class BTreeNodeTT : public BTreeNodeT<KeyT, NumSlots>,
+ public BTreeNodeDataWrap<DataT, NumSlots>,
+ public BTreeNodeAggregatedWrap<AggrT>
+{
+public:
+ typedef BTreeNodeT<KeyT, NumSlots> ParentType;
+ typedef BTreeNodeDataWrap<DataT, NumSlots> DataWrapType;
+ typedef BTreeNodeAggregatedWrap<AggrT> AggrWrapType;
+ using ParentType::_validSlots;
+ using ParentType::validSlots;
+ using ParentType::getFrozen;
+ using ParentType::_keys;
+ using DataWrapType::getData;
+ using DataWrapType::setData;
+ using DataWrapType::copyData;
+protected:
+ BTreeNodeTT(uint8_t level)
+ : ParentType(level),
+ DataWrapType()
+ {
+ }
+
+ ~BTreeNodeTT(void)
+ {
+ }
+
+ BTreeNodeTT(const BTreeNodeTT &rhs)
+ : ParentType(rhs),
+ DataWrapType(rhs),
+ AggrWrapType(rhs)
+ {
+ copyData(rhs, _validSlots);
+ }
+
+ BTreeNodeTT &
+ operator=(const BTreeNodeTT &rhs)
+ {
+ ParentType::operator=(rhs);
+ AggrWrapType::operator=(rhs);
+ copyData(rhs, _validSlots);
+ return *this;
+ }
+
+public:
+ typedef BTreeNodeTT<KeyT, DataT, AggrT, NumSlots> NodeType;
+ void insert(uint32_t idx, const KeyT & key, const DataT & data);
+ void update(uint32_t idx, const KeyT & key, const DataT & data) {
+ // assert(idx < NodeType::maxSlots());
+ // assert(!getFrozen());
+ _keys[idx] = key;
+ setData(idx, data);
+ }
+ void splitInsert(NodeType * splitNode, uint32_t idx, const KeyT & key, const DataT & data);
+ void remove(uint32_t idx);
+ void stealAllFromLeftNode(const NodeType * victim);
+ void stealAllFromRightNode(const NodeType * victim);
+ void stealSomeFromLeftNode(NodeType * victim);
+ void stealSomeFromRightNode(NodeType * victim);
+ void cleanRange(uint32_t from, uint32_t to);
+ void clean(void);
+
+ void cleanFrozen(void);
+};
+
+template <typename KeyT, typename AggrT, uint32_t NumSlots = 16>
+class BTreeInternalNode : public BTreeNodeTT<KeyT, BTreeNode::Ref, AggrT,
+ NumSlots>
+{
+public:
+ typedef BTreeNodeTT<KeyT, BTreeNode::Ref, AggrT, NumSlots> ParentType;
+ typedef BTreeInternalNode<KeyT, AggrT, NumSlots> InternalNodeType;
+ template <typename, typename, typename, size_t, size_t>
+ friend class BTreeNodeAllocator;
+ template <typename, typename, typename, size_t, size_t>
+ friend class BTreeNodeStore;
+ template <typename, uint32_t>
+ friend class BTreeNodeDataWrap;
+ template <typename>
+ friend class BufferType;
+ template <typename>
+ friend class DataStoreT;
+ typedef BTreeNode::Ref Ref;
+ typedef std::pair<Ref, InternalNodeType *> RefPair;
+ using ParentType::_keys;
+ using ParentType::validSlots;
+ using ParentType::_validSlots;
+ using ParentType::getFrozen;
+ using ParentType::getData;
+ using ParentType::setData;
+ using ParentType::setLevel;
+ using ParentType::EMPTY_LEVEL;
+ typedef KeyT KeyType;
+ typedef Ref DataType;
+private:
+ uint32_t _validLeaves;
+
+ BTreeInternalNode()
+ : ParentType(EMPTY_LEVEL),
+ _validLeaves(0u)
+ {
+ }
+
+
+ BTreeInternalNode(const BTreeInternalNode &rhs)
+ : ParentType(rhs),
+ _validLeaves(rhs._validLeaves)
+ {
+ }
+
+ ~BTreeInternalNode(void)
+ {
+ }
+
+ BTreeInternalNode &
+ operator=(const BTreeInternalNode &rhs)
+ {
+ ParentType::operator=(rhs);
+ _validLeaves = rhs._validLeaves;
+ return *this;
+ }
+public:
+ BTreeNode::Ref
+ getChild(uint32_t idx) const
+ {
+ return getData(idx);
+ }
+
+ void
+ setChild(uint32_t idx, BTreeNode::Ref child)
+ {
+ setData(idx, child);
+ }
+
+ BTreeNode::Ref getLastChild() const { return getChild(validSlots() - 1); }
+
+ uint32_t
+ validLeaves(void) const
+ {
+ return _validLeaves;
+ }
+
+ void
+ setValidLeaves(uint32_t newValidLeaves)
+ {
+ _validLeaves = newValidLeaves;
+ }
+
+ void
+ incValidLeaves(uint32_t delta)
+ {
+ _validLeaves += delta;
+ }
+
+ void
+ decValidLeaves(uint32_t delta)
+ {
+ _validLeaves -= delta;
+ }
+
+ template <typename NodeAllocatorType>
+ void
+ splitInsert(BTreeInternalNode *splitNode, uint32_t idx, const KeyT &key,
+ const BTreeNode::Ref &data,
+ NodeAllocatorType &allocator);
+
+ void
+ stealAllFromLeftNode(const BTreeInternalNode *victim);
+
+ void
+ stealAllFromRightNode(const BTreeInternalNode *victim);
+
+ template <typename NodeAllocatorType>
+ void
+ stealSomeFromLeftNode(BTreeInternalNode *victim,
+ NodeAllocatorType &allocator);
+
+ template <typename NodeAllocatorType>
+ void
+ stealSomeFromRightNode(BTreeInternalNode *victim,
+ NodeAllocatorType &allocator);
+
+ void
+ clean(void);
+
+ void
+ cleanFrozen(void);
+
+ template <typename NodeStoreType, typename FunctionType>
+ void
+ foreach_key(NodeStoreType &store, FunctionType func) const
+ {
+ const BTreeNode::Ref *it = this->_data;
+ const BTreeNode::Ref *ite = it + _validSlots;
+ if (this->getLevel() > 1u) {
+ for (; it != ite; ++it) {
+ store.mapInternalRef(*it)->foreach_key(store, func);
+ }
+ } else {
+ for (; it != ite; ++it) {
+ store.mapLeafRef(*it)->foreach_key(func);
+ }
+ }
+ }
+
+ template <typename NodeStoreType, typename FunctionType>
+ void
+ foreach(NodeStoreType &store, FunctionType func) const
+ {
+ const BTreeNode::Ref *it = this->_data;
+ const BTreeNode::Ref *ite = it + _validSlots;
+ if (this->getLevel() > 1u) {
+ for (; it != ite; ++it) {
+ store.mapInternalRef(*it)->foreach(store, func);
+ }
+ } else {
+ for (; it != ite; ++it) {
+ store.mapLeafRef(*it)->foreach(func);
+ }
+ }
+ }
+};
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t NumSlots = 16>
+class BTreeLeafNode : public BTreeNodeTT<KeyT, DataT, AggrT, NumSlots>
+{
+public:
+ typedef BTreeNodeTT<KeyT, DataT, AggrT, NumSlots> ParentType;
+ typedef BTreeLeafNode<KeyT, DataT, AggrT, NumSlots> LeafNodeType;
+ template <typename, typename, typename, size_t, size_t>
+ friend class BTreeNodeAllocator;
+ template <typename, typename, typename, size_t, size_t>
+ friend class BTreeNodeStore;
+ template <typename>
+ friend class BufferType;
+ template <typename>
+ friend class DataStoreT;
+ typedef BTreeNode::Ref Ref;
+ typedef std::pair<Ref, LeafNodeType *> RefPair;
+ using ParentType::validSlots;
+ using ParentType::_validSlots;
+ using ParentType::_keys;
+ using ParentType::freeze;
+ using ParentType::stealSomeFromLeftNode;
+ using ParentType::stealSomeFromRightNode;
+ using ParentType::LEAF_LEVEL;
+ typedef BTreeKeyData<KeyT, DataT> KeyDataType;
+ typedef KeyT KeyType;
+ typedef DataT DataType;
+private:
+ BTreeLeafNode() : ParentType(LEAF_LEVEL) {}
+
+protected:
+ BTreeLeafNode(const BTreeLeafNode &rhs)
+ : ParentType(rhs)
+ {
+ }
+
+ BTreeLeafNode(const KeyDataType *smallArray, uint32_t arraySize);
+
+ ~BTreeLeafNode(void)
+ {
+ }
+
+ BTreeLeafNode &
+ operator=(const BTreeLeafNode &rhs)
+ {
+ ParentType::operator=(rhs);
+ return *this;
+ }
+public:
+ template <typename NodeAllocatorType>
+ void
+ stealSomeFromLeftNode(BTreeLeafNode *victim,
+ NodeAllocatorType &allocator)
+ {
+ (void) allocator;
+ stealSomeFromLeftNode(victim);
+ }
+
+ template <typename NodeAllocatorType>
+ void
+ stealSomeFromRightNode(BTreeLeafNode *victim,
+ NodeAllocatorType &allocator)
+ {
+ (void) allocator;
+ stealSomeFromRightNode(victim);
+ }
+
+ const DataT &
+ getLastData() const
+ {
+ return this->getData(validSlots() - 1);
+ }
+
+ void
+ writeData(uint32_t idx, const DataT &data)
+ {
+ this->setData(idx, data);
+ }
+
+ uint32_t
+ validLeaves(void) const
+ {
+ return validSlots();
+ }
+
+ template <typename FunctionType>
+ void
+ foreach_key(FunctionType func) const
+ {
+ const KeyT *it = _keys;
+ const KeyT *ite = it + _validSlots;
+ for (; it != ite; ++it) {
+ func(*it);
+ }
+ }
+
+ template <typename FunctionType>
+ void
+ foreach(FunctionType func) const
+ {
+ const KeyT *it = _keys;
+ const KeyT *ite = it + _validSlots;
+ uint32_t idx = 0;
+ for (; it != ite; ++it) {
+ func(*it, this->getData(idx++));
+ }
+ }
+};
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ uint32_t NumSlots = 16>
+class BTreeLeafNodeTemp : public BTreeLeafNode<KeyT, DataT, AggrT, NumSlots>
+{
+public:
+ typedef BTreeLeafNode<KeyT, DataT, AggrT, NumSlots> ParentType;
+ typedef typename ParentType::KeyDataType KeyDataType;
+
+ BTreeLeafNodeTemp(const KeyDataType *smallArray,
+ uint32_t arraySize)
+ : ParentType(smallArray, arraySize)
+ {
+ }
+
+ ~BTreeLeafNodeTemp(void)
+ {
+ }
+};
+
+extern template class BTreeNodeDataWrap<uint32_t, 16>;
+extern template class BTreeNodeDataWrap<BTreeNoLeafData, 16>;
+extern template class BTreeKeyData<uint32_t, uint32_t>;
+extern template class BTreeKeyData<uint32_t, int32_t>;
+extern template class BTreeNodeT<uint32_t, 16>;
+extern template class BTreeNodeTT<uint32_t, uint32_t, NoAggregated, 16>;
+extern template class BTreeNodeTT<uint32_t, BTreeNoLeafData, NoAggregated, 16>;
+extern template class BTreeNodeTT<uint32_t, EntryRef, NoAggregated, 16>;
+extern template class BTreeNodeTT<uint32_t, int32_t, MinMaxAggregated, 16>;
+extern template class BTreeInternalNode<uint32_t, NoAggregated, 16>;
+extern template class BTreeInternalNode<uint32_t, MinMaxAggregated, 16>;
+extern template class BTreeLeafNode<uint32_t, uint32_t, NoAggregated, 16>;
+extern template class BTreeLeafNode<uint32_t, BTreeNoLeafData, NoAggregated,
+ 16>;
+extern template class BTreeLeafNode<uint32_t, int32_t, MinMaxAggregated, 16>;
+extern template class BTreeLeafNodeTemp<uint32_t, uint32_t, NoAggregated, 16>;
+extern template class BTreeLeafNodeTemp<uint32_t, int32_t, MinMaxAggregated,
+ 16>;
+extern template class BTreeLeafNodeTemp<uint32_t, BTreeNoLeafData,
+ NoAggregated, 16>;
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreenode.hpp b/searchlib/src/vespa/searchlib/btree/btreenode.hpp
new file mode 100644
index 00000000000..958b9e5fa5d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreenode.hpp
@@ -0,0 +1,402 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreenode.h"
+#include <algorithm>
+
+namespace search {
+namespace btree {
+
+namespace {
+
+class SplitInsertHelper {
+private:
+ uint32_t _idx;
+ uint32_t _median;
+ bool _medianBumped;
+public:
+ SplitInsertHelper(uint32_t idx, uint32_t validSlots) :
+ _idx(idx),
+ _median(validSlots / 2),
+ _medianBumped(false)
+ {
+ if (idx > _median) {
+ _median++;
+ _medianBumped = true;
+ }
+ }
+ uint32_t getMedian() const { return _median; }
+ bool insertInSplitNode() const {
+ if (_median >= _idx && !_medianBumped) {
+ return false;
+ }
+ return true;
+ }
+};
+
+
+}
+
+template <typename KeyT, uint32_t NumSlots>
+template <typename CompareT>
+uint32_t
+BTreeNodeT<KeyT, NumSlots>::
+lower_bound(uint32_t sidx, const KeyT & key, CompareT comp) const
+{
+ const KeyT * itr = std::lower_bound<const KeyT *, KeyT, CompareT>
+ (_keys + sidx, _keys + validSlots(), key, comp);
+ return itr - _keys;
+}
+
+template <typename KeyT, uint32_t NumSlots>
+template <typename CompareT>
+uint32_t
+BTreeNodeT<KeyT, NumSlots>::lower_bound(const KeyT & key, CompareT comp) const
+{
+
+ const KeyT * itr = std::lower_bound<const KeyT *, KeyT, CompareT>
+ (_keys, _keys + validSlots(), key, comp);
+ return itr - _keys;
+}
+
+
+template <typename KeyT, uint32_t NumSlots>
+template <typename CompareT>
+uint32_t
+BTreeNodeT<KeyT, NumSlots>::
+upper_bound(uint32_t sidx, const KeyT & key, CompareT comp) const
+{
+ const KeyT * itr = std::upper_bound<const KeyT *, KeyT, CompareT>
+ (_keys + sidx, _keys + validSlots(), key, comp);
+ return itr - _keys;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, uint32_t NumSlots>
+void
+BTreeNodeTT<KeyT, DataT, AggrT, NumSlots>::insert(uint32_t idx,
+ const KeyT &key,
+ const DataT &data)
+{
+ assert(validSlots() < NodeType::maxSlots());
+ assert(!getFrozen());
+ for (uint32_t i = validSlots(); i > idx; --i) {
+ _keys[i] = _keys[i - 1];
+ setData(i, getData(i - 1));
+ }
+ _keys[idx] = key;
+ setData(idx, data);
+ _validSlots++;
+}
+
+template <typename KeyT, typename DataT, typename AggrT, uint32_t NumSlots>
+void
+BTreeNodeTT<KeyT, DataT, AggrT, NumSlots>::splitInsert(NodeType *splitNode,
+ uint32_t idx,
+ const KeyT &key,
+ const DataT &data)
+{
+ assert(!getFrozen());
+ assert(!splitNode->getFrozen());
+ SplitInsertHelper sih(idx, validSlots());
+ splitNode->_validSlots = validSlots() - sih.getMedian();
+ for (uint32_t i = sih.getMedian(); i < validSlots(); ++i) {
+ splitNode->_keys[i - sih.getMedian()] = _keys[i];
+ splitNode->setData(i - sih.getMedian(), getData(i));
+ }
+ cleanRange(sih.getMedian(), validSlots());
+ _validSlots = sih.getMedian();
+ if (sih.insertInSplitNode()) {
+ splitNode->insert(idx - sih.getMedian(), key, data);
+ } else {
+ insert(idx, key, data);
+ }
+}
+
+template <typename KeyT, typename DataT, typename AggrT, uint32_t NumSlots>
+void
+BTreeNodeTT<KeyT, DataT, AggrT, NumSlots>::remove(uint32_t idx)
+{
+ assert(!getFrozen());
+ for (uint32_t i = idx + 1; i < validSlots(); ++i) {
+ _keys[i - 1] = _keys[i];
+ setData(i - 1, getData(i));
+ }
+ _validSlots--;
+ _keys[validSlots()] = KeyT();
+ setData(validSlots(), DataT());
+}
+
+template <typename KeyT, typename DataT, typename AggrT, uint32_t NumSlots>
+void
+BTreeNodeTT<KeyT, DataT, AggrT, NumSlots>::
+stealAllFromLeftNode(const NodeType *victim)
+{
+ assert(validSlots() + victim->validSlots() <= NodeType::maxSlots());
+ assert(!getFrozen());
+ for (int i = validSlots() - 1; i >= 0; --i) {
+ _keys[i + victim->validSlots()] = _keys[i];
+ setData(i + victim->validSlots(), getData(i));
+ }
+ for (uint32_t i = 0; i < victim->validSlots(); ++i) {
+ _keys[i] = victim->_keys[i];
+ setData(i, victim->getData(i));
+ }
+ _validSlots += victim->validSlots();
+}
+
+template <typename KeyT, typename DataT, typename AggrT, uint32_t NumSlots>
+void
+BTreeNodeTT<KeyT, DataT, AggrT, NumSlots>::
+stealAllFromRightNode(const NodeType *victim)
+{
+ assert(validSlots() + victim->validSlots() <= NodeType::maxSlots());
+ assert(!getFrozen());
+ for (uint32_t i = 0; i < victim->validSlots(); ++i) {
+ _keys[validSlots() + i] = victim->_keys[i];
+ setData(validSlots() + i, victim->getData(i));
+ }
+ _validSlots += victim->validSlots();
+}
+
+template <typename KeyT, typename DataT, typename AggrT, uint32_t NumSlots>
+void
+BTreeNodeTT<KeyT, DataT, AggrT, NumSlots>::
+stealSomeFromLeftNode(NodeType *victim)
+{
+ assert(validSlots() + victim->validSlots() >= NodeType::minSlots());
+ assert(!getFrozen());
+ assert(!victim->getFrozen());
+ uint32_t median = (validSlots() + victim->validSlots()) / 2;
+ uint32_t steal = median - validSlots();
+ _validSlots += steal;
+ for (int32_t i = validSlots() - 1; i >= static_cast<int32_t>(steal); --i) {
+ _keys[i] = _keys[i - steal];
+ setData(i, getData(i - steal));
+ }
+ for (uint32_t i = 0; i < steal; ++i) {
+ _keys[i] = victim->_keys[victim->validSlots() - steal + i];
+ setData(i, victim->getData(victim->validSlots() - steal + i));
+ }
+ victim->cleanRange(victim->validSlots() - steal, victim->validSlots());
+ victim->_validSlots -= steal;
+}
+
+template <typename KeyT, typename DataT, typename AggrT, uint32_t NumSlots>
+void
+BTreeNodeTT<KeyT, DataT, AggrT, NumSlots>::
+stealSomeFromRightNode(NodeType *victim)
+{
+ assert(validSlots() + victim->validSlots() >= NodeType::minSlots());
+ assert(!getFrozen());
+ assert(!victim->getFrozen());
+ uint32_t median = (validSlots() + victim->validSlots()) / 2;
+ uint32_t steal = median - validSlots();
+ for (uint32_t i = 0; i < steal; ++i) {
+ _keys[validSlots() + i] = victim->_keys[i];
+ setData(validSlots() + i, victim->getData(i));
+ }
+ _validSlots += steal;
+ for (uint32_t i = steal; i < victim->validSlots(); ++i) {
+ victim->_keys[i - steal] = victim->_keys[i];
+ victim->setData(i - steal, victim->getData(i));
+ }
+ victim->cleanRange(victim->validSlots() - steal, victim->validSlots());
+ victim->_validSlots -= steal;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, uint32_t NumSlots>
+void
+BTreeNodeTT<KeyT, DataT, AggrT, NumSlots>::cleanRange(uint32_t from,
+ uint32_t to)
+{
+ assert(from < to);
+ assert(to <= validSlots());
+ assert(validSlots() <= NodeType::maxSlots());
+ assert(!getFrozen());
+ KeyT emptyKey = KeyT();
+ for (KeyT *k = _keys + from, *ke = _keys + to; k != ke; ++k)
+ *k = emptyKey;
+ DataT emptyData = DataT();
+ for (uint32_t i = from; i != to; ++i)
+ setData(i, emptyData);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, uint32_t NumSlots>
+void
+BTreeNodeTT<KeyT, DataT, AggrT, NumSlots>::clean(void)
+{
+ if (validSlots() == 0)
+ return;
+ cleanRange(0, validSlots());
+ _validSlots = 0;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, uint32_t NumSlots>
+void
+BTreeNodeTT<KeyT, DataT, AggrT, NumSlots>::cleanFrozen(void)
+{
+ assert(validSlots() <= NodeType::maxSlots());
+ assert(getFrozen());
+ if (validSlots() == 0)
+ return;
+ KeyT emptyKey = KeyT();
+ for (KeyT *k = _keys, *ke = _keys + validSlots(); k != ke; ++k)
+ *k = emptyKey;
+ DataT emptyData = DataT();
+ for (uint32_t i = 0, ie = validSlots(); i != ie; ++i)
+ setData(i, emptyData);
+ _validSlots = 0;
+}
+
+
+template <typename KeyT, typename AggrT, uint32_t NumSlots>
+template <typename NodeAllocatorType>
+void
+BTreeInternalNode<KeyT, AggrT, NumSlots>::
+splitInsert(BTreeInternalNode *splitNode, uint32_t idx, const KeyT &key,
+ const BTreeNode::Ref &data,
+ NodeAllocatorType &allocator)
+{
+ assert(!getFrozen());
+ assert(!splitNode->getFrozen());
+ SplitInsertHelper sih(idx, validSlots());
+ splitNode->_validSlots = validSlots() - sih.getMedian();
+ uint32_t splitLeaves = 0;
+ uint32_t newLeaves = allocator.validLeaves(data);
+ for (uint32_t i = sih.getMedian(); i < validSlots(); ++i) {
+ splitNode->_keys[i - sih.getMedian()] = _keys[i];
+ splitNode->setData(i - sih.getMedian(), getData(i));
+ splitLeaves += allocator.validLeaves(getData(i));
+ }
+ splitNode->_validLeaves = splitLeaves;
+ this->cleanRange(sih.getMedian(), validSlots());
+ _validLeaves -= splitLeaves + newLeaves;
+ _validSlots = sih.getMedian();
+ if (sih.insertInSplitNode()) {
+ splitNode->insert(idx - sih.getMedian(), key, data);
+ splitNode->_validLeaves += newLeaves;
+ } else {
+ this->insert(idx, key, data);
+ _validLeaves += newLeaves;
+ }
+}
+
+
+template <typename KeyT, typename AggrT, uint32_t NumSlots>
+void
+BTreeInternalNode<KeyT, AggrT, NumSlots>::
+stealAllFromLeftNode(const BTreeInternalNode *victim)
+{
+ ParentType::stealAllFromLeftNode(victim);
+ _validLeaves += victim->_validLeaves;
+}
+
+template <typename KeyT, typename AggrT, uint32_t NumSlots>
+void
+BTreeInternalNode<KeyT, AggrT, NumSlots>::
+stealAllFromRightNode(const BTreeInternalNode *victim)
+{
+ ParentType::stealAllFromRightNode(victim);
+ _validLeaves += victim->_validLeaves;
+}
+
+
+template <typename KeyT, typename AggrT, uint32_t NumSlots>
+template <typename NodeAllocatorType>
+void
+BTreeInternalNode<KeyT, AggrT, NumSlots>::
+stealSomeFromLeftNode(BTreeInternalNode *victim, NodeAllocatorType &allocator)
+{
+ assert(validSlots() + victim->validSlots() >= BTreeInternalNode::minSlots());
+ assert(!getFrozen());
+ assert(!victim->getFrozen());
+ uint32_t median = (validSlots() + victim->validSlots()) / 2;
+ uint32_t steal = median - validSlots();
+ _validSlots += steal;
+ for (int32_t i = validSlots() - 1; i >= static_cast<int32_t>(steal); --i) {
+ _keys[i] = _keys[i - steal];
+ setData(i, getData(i - steal));
+ }
+ uint32_t stolenLeaves = 0;
+ for (uint32_t i = 0; i < steal; ++i) {
+ _keys[i] = victim->_keys[victim->validSlots() - steal + i];
+ setData(i, victim->getData(victim->validSlots() - steal + i));
+ stolenLeaves += allocator.validLeaves(getData(i));
+ }
+ _validLeaves += stolenLeaves;
+ victim->_validLeaves -= stolenLeaves;
+ victim->cleanRange(victim->validSlots() - steal, victim->validSlots());
+ victim->_validSlots -= steal;
+}
+
+
+template <typename KeyT, typename AggrT, uint32_t NumSlots>
+template <typename NodeAllocatorType>
+void
+BTreeInternalNode<KeyT, AggrT, NumSlots>::
+stealSomeFromRightNode(BTreeInternalNode *victim, NodeAllocatorType &allocator)
+{
+ assert(validSlots() + victim->validSlots() >= BTreeInternalNode::minSlots());
+ assert(!getFrozen());
+ assert(!victim->getFrozen());
+ uint32_t median = (validSlots() + victim->validSlots()) / 2;
+ uint32_t steal = median - validSlots();
+ uint32_t stolenLeaves = 0;
+ for (uint32_t i = 0; i < steal; ++i) {
+ _keys[validSlots() + i] = victim->_keys[i];
+ setData(validSlots() + i, victim->getData(i));
+ stolenLeaves += allocator.validLeaves(victim->getData(i));
+ }
+ _validSlots += steal;
+ _validLeaves += stolenLeaves;
+ victim->_validLeaves -= stolenLeaves;
+ for (uint32_t i = steal; i < victim->validSlots(); ++i) {
+ victim->_keys[i - steal] = victim->_keys[i];
+ victim->setData(i - steal, victim->getData(i));
+ }
+ victim->cleanRange(victim->validSlots() - steal, victim->validSlots());
+ victim->_validSlots -= steal;
+}
+
+
+template <typename KeyT, typename AggrT, uint32_t NumSlots>
+void
+BTreeInternalNode<KeyT, AggrT, NumSlots>::clean(void)
+{
+ ParentType::clean();
+ _validLeaves = 0;
+}
+
+
+template <typename KeyT, typename AggrT, uint32_t NumSlots>
+void
+BTreeInternalNode<KeyT, AggrT, NumSlots>::cleanFrozen(void)
+{
+ ParentType::cleanFrozen();
+ _validLeaves = 0;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, uint32_t NumSlots>
+BTreeLeafNode<KeyT, DataT, AggrT, NumSlots>::
+BTreeLeafNode(const KeyDataType *smallArray, uint32_t arraySize)
+ : ParentType(LEAF_LEVEL)
+{
+ assert(arraySize <= BTreeLeafNode::maxSlots());
+ _validSlots = arraySize;
+ for (uint32_t idx = 0; idx < arraySize; ++idx) {
+ _keys[idx] = smallArray[idx]._key;
+ this->setData(idx, smallArray[idx].getData());
+ }
+ freeze();
+}
+
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreenodeallocator.cpp b/searchlib/src/vespa/searchlib/btree/btreenodeallocator.cpp
new file mode 100644
index 00000000000..11f371360e4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreenodeallocator.cpp
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "btreenodeallocator.h"
+#include "btreerootbase.h"
+#include "btreenodeallocator.hpp"
+
+namespace search
+{
+namespace btree
+{
+
+template class BTreeNodeAllocator<uint32_t, uint32_t,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+template class BTreeNodeAllocator<uint32_t, BTreeNoLeafData,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+template class BTreeNodeAllocator<uint32_t, int32_t,
+ MinMaxAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+} // namespace btree
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/btree/btreenodeallocator.h b/searchlib/src/vespa/searchlib/btree/btreenodeallocator.h
new file mode 100644
index 00000000000..53e949bacbe
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreenodeallocator.h
@@ -0,0 +1,271 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+#include <deque>
+
+#include "btreenode.h"
+#include <vespa/vespalib/util/generationhandler.h>
+#include <vespa/searchlib/util/memoryusage.h>
+#include "btreenodestore.h"
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/util/array.h>
+
+namespace search
+{
+
+namespace btree
+{
+
+template <typename, typename, typename, size_t, size_t> class BTreeRootBase;
+
+template <typename KeyT,
+ typename DataT,
+ typename AggrT,
+ size_t INTERNAL_SLOTS,
+ size_t LEAF_SLOTS>
+class BTreeNodeAllocator
+{
+public:
+ typedef BTreeInternalNode<KeyT, AggrT, INTERNAL_SLOTS> InternalNodeType;
+ typedef BTreeLeafNode<KeyT, DataT, AggrT, LEAF_SLOTS> LeafNodeType;
+ typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair;
+ typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair;
+ typedef BTreeRootBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>
+ BTreeRootBaseType;
+ typedef vespalib::GenerationHandler::generation_t generation_t;
+ typedef BTreeNodeStore<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>
+ NodeStore;
+
+private:
+ BTreeNodeAllocator(const BTreeNodeAllocator &rhs);
+
+ BTreeNodeAllocator &
+ operator=(const BTreeNodeAllocator &rhs);
+
+ NodeStore _nodeStore;
+
+ typedef vespalib::Array<BTreeNode::Ref, vespalib::DefaultAlloc> RefVector;
+ typedef vespalib::Array<BTreeRootBaseType *, vespalib::DefaultAlloc> BTreeRootBaseTypeVector;
+
+ // Nodes that might not be frozen.
+ RefVector _internalToFreeze;
+ RefVector _leafToFreeze;
+ BTreeRootBaseTypeVector _treeToFreeze;
+
+ // Nodes held until freeze is performed
+ RefVector _internalHoldUntilFreeze;
+ RefVector _leafHoldUntilFreeze;
+
+public:
+ BTreeNodeAllocator(void);
+
+ ~BTreeNodeAllocator(void);
+
+ void
+ disableFreeLists() {
+ _nodeStore.disableFreeLists();
+ }
+
+ void
+ disableElemHoldList()
+ {
+ _nodeStore.disableElemHoldList();
+ }
+
+ /**
+ * Allocate internal node.
+ */
+ InternalNodeTypeRefPair
+ allocInternalNode(uint8_t level);
+
+ /*
+ * Allocate leaf node.
+ */
+ LeafNodeTypeRefPair
+ allocLeafNode(void);
+
+ InternalNodeTypeRefPair
+ thawNode(BTreeNode::Ref nodeRef, InternalNodeType *node);
+
+ LeafNodeTypeRefPair
+ thawNode(BTreeNode::Ref nodeRef, LeafNodeType *node);
+
+ BTreeNode::Ref
+ thawNode(BTreeNode::Ref node);
+
+ /**
+ * hold internal node until freeze/generation constraint is satisfied.
+ */
+ void
+ holdNode(BTreeNode::Ref nodeRef, InternalNodeType *node);
+
+ /**
+ * hold leaf node until freeze/generation constraint is satisfied.
+ */
+ void
+ holdNode(BTreeNode::Ref nodeRef, LeafNodeType *node);
+
+ /**
+ * Mark that tree needs to be frozen. Tree must be kept alive until
+ * freeze operation has completed.
+ */
+ void
+ needFreeze(BTreeRootBaseType *tree);
+
+ /**
+ * Freeze all nodes that are not already frozen.
+ */
+ void
+ freeze(void);
+
+ /**
+ * Try to free held nodes if nobody can be referencing them.
+ */
+ void
+ trimHoldLists(generation_t usedGen);
+
+ /**
+ * Transfer nodes from hold1 lists to hold2 lists, they are no
+ * longer referenced by new frozen structures, but readers accessing
+ * older versions of the frozen structure must leave before elements
+ * can be unheld.
+ */
+ void
+ transferHoldLists(generation_t generation);
+
+ void
+ clearHoldLists(void);
+
+ static bool
+ isValidRef(BTreeNode::Ref ref)
+ {
+ return NodeStore::isValidRef(ref);
+ }
+
+ bool
+ isLeafRef(BTreeNode::Ref ref) const
+ {
+ if (!isValidRef(ref))
+ return false;
+ return _nodeStore.isLeafRef(ref);
+ }
+
+ const InternalNodeType *
+ mapInternalRef(BTreeNode::Ref ref) const
+ {
+ return _nodeStore.mapInternalRef(ref);
+ }
+
+ InternalNodeType *
+ mapInternalRef(BTreeNode::Ref ref)
+ {
+ return _nodeStore.mapInternalRef(ref);
+ }
+
+ const LeafNodeType *
+ mapLeafRef(BTreeNode::Ref ref) const
+ {
+ return _nodeStore.mapLeafRef(ref);
+ }
+
+ LeafNodeType *
+ mapLeafRef(BTreeNode::Ref ref)
+ {
+ return _nodeStore.mapLeafRef(ref);
+ }
+
+ template <typename NodeType>
+ const NodeType *
+ mapRef(BTreeNode::Ref ref) const
+ {
+ return _nodeStore.template mapRef<NodeType>(ref);
+ }
+
+ template <typename NodeType>
+ NodeType *
+ mapRef(BTreeNode::Ref ref)
+ {
+ return _nodeStore.template mapRef<NodeType>(ref);
+ }
+
+ InternalNodeTypeRefPair
+ moveInternalNode(const InternalNodeType *node);
+
+ LeafNodeTypeRefPair
+ moveLeafNode(const LeafNodeType *node);
+
+ uint32_t
+ validLeaves(BTreeNode::Ref ref) const;
+
+ /*
+ * Extract level from ref.
+ */
+ uint32_t
+ getLevel(BTreeNode::Ref ref) const;
+
+ const KeyT &
+ getLastKey(BTreeNode::Ref node) const;
+
+ const AggrT &
+ getAggregated(BTreeNode::Ref node) const;
+
+ MemoryUsage getMemoryUsage() const;
+
+ vespalib::string toString(BTreeNode::Ref ref) const;
+
+ vespalib::string toString(const BTreeNode * node) const;
+
+ bool
+ getCompacting(EntryRef ref) const
+ {
+ return _nodeStore.getCompacting(ref);
+ }
+
+ std::vector<uint32_t>
+ startCompact(void)
+ {
+ return _nodeStore.startCompact();
+ }
+
+ void
+ finishCompact(const std::vector<uint32_t> &toHold)
+ {
+ return _nodeStore.finishCompact(toHold);
+ }
+
+ template <typename FunctionType>
+ void
+ foreach_key(EntryRef ref, FunctionType func) const
+ {
+ _nodeStore.foreach_key(ref, func);
+ }
+
+ template <typename FunctionType>
+ void
+ foreach(EntryRef ref, FunctionType func) const
+ {
+ _nodeStore.foreach(ref, func);
+ }
+
+ const NodeStore &getNodeStore() const { return _nodeStore; }
+};
+
+extern template class BTreeNodeAllocator<uint32_t, uint32_t,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+extern template class BTreeNodeAllocator<uint32_t, BTreeNoLeafData,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+extern template class BTreeNodeAllocator<uint32_t, int32_t,
+ MinMaxAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+} // namespace btree
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreenodeallocator.hpp b/searchlib/src/vespa/searchlib/btree/btreenodeallocator.hpp
new file mode 100644
index 00000000000..2b189058544
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreenodeallocator.hpp
@@ -0,0 +1,437 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreenodeallocator.h"
+#include "btreerootbase.h"
+#include <vespa/vespalib/stllike/asciistream.h>
+#include "btreenodestore.hpp"
+
+namespace search {
+namespace btree {
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+BTreeNodeAllocator(void)
+ : _nodeStore(),
+ _internalToFreeze(),
+ _leafToFreeze(),
+ _treeToFreeze(),
+ _internalHoldUntilFreeze(),
+ _leafHoldUntilFreeze()
+{
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+~BTreeNodeAllocator(void)
+{
+ assert(_internalToFreeze.empty());
+ assert(_leafToFreeze.empty());
+ assert(_treeToFreeze.empty());
+ assert(_internalHoldUntilFreeze.empty());
+ assert(_leafHoldUntilFreeze.empty());
+ DataStoreBase::MemStats stats = _nodeStore.getMemStats();
+ assert(stats._usedBytes == stats._deadBytes);
+ assert(stats._holdBytes == 0);
+ (void) stats;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+typename BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+InternalNodeTypeRefPair
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+allocInternalNode(uint8_t level)
+{
+ if (_internalHoldUntilFreeze.empty()) {
+ InternalNodeTypeRefPair nodeRef = _nodeStore.allocInternalNode();
+ assert(nodeRef.first.valid());
+ _internalToFreeze.push_back(nodeRef.first);
+ nodeRef.second->setLevel(level);
+ return nodeRef;
+ }
+ BTreeNode::Ref nodeRef = _internalHoldUntilFreeze.back();
+ _internalHoldUntilFreeze.pop_back();
+ InternalNodeType *node = mapInternalRef(nodeRef);
+ assert(!node->getFrozen());
+ node->setLevel(level);
+ return std::make_pair(nodeRef, node);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+typename BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+LeafNodeTypeRefPair
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+allocLeafNode(void)
+{
+ if (_leafHoldUntilFreeze.empty()) {
+ LeafNodeTypeRefPair nodeRef = _nodeStore.allocLeafNode();
+ _leafToFreeze.push_back(nodeRef.first);
+ return nodeRef;
+ }
+ BTreeNode::Ref nodeRef = _leafHoldUntilFreeze.back();
+ _leafHoldUntilFreeze.pop_back();
+ LeafNodeType *node = mapLeafRef(nodeRef);
+ assert(!node->getFrozen());
+ return std::make_pair(nodeRef, node);
+}
+
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+typename BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+InternalNodeTypeRefPair
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+thawNode(BTreeNode::Ref nodeRef, InternalNodeType *node)
+{
+ if (_internalHoldUntilFreeze.empty()) {
+ InternalNodeTypeRefPair retNodeRef =
+ _nodeStore.allocInternalNodeCopy(*node);
+ assert(retNodeRef.second->getFrozen());
+ retNodeRef.second->unFreeze();
+ assert(retNodeRef.first.valid());
+ _internalToFreeze.push_back(retNodeRef.first);
+ holdNode(nodeRef, node);
+ return retNodeRef;
+ }
+ BTreeNode::Ref retNodeRef = _internalHoldUntilFreeze.back();
+ InternalNodeType *retNode = mapInternalRef(retNodeRef);
+ _internalHoldUntilFreeze.pop_back();
+ assert(!retNode->getFrozen());
+ *retNode = static_cast<const InternalNodeType &>(*node);
+ assert(retNode->getFrozen());
+ retNode->unFreeze();
+ holdNode(nodeRef, node);
+ return std::make_pair(retNodeRef, retNode);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+typename BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+LeafNodeTypeRefPair
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+thawNode(BTreeNode::Ref nodeRef, LeafNodeType *node)
+{
+ if (_leafHoldUntilFreeze.empty()) {
+ LeafNodeTypeRefPair retNodeRef =
+ _nodeStore.allocLeafNodeCopy(*node);
+ assert(retNodeRef.second->getFrozen());
+ retNodeRef.second->unFreeze();
+ _leafToFreeze.push_back(retNodeRef.first);
+ holdNode(nodeRef, node);
+ return retNodeRef;
+ }
+ BTreeNode::Ref retNodeRef = _leafHoldUntilFreeze.back();
+ LeafNodeType *retNode = mapLeafRef(retNodeRef);
+ _leafHoldUntilFreeze.pop_back();
+ assert(!retNode->getFrozen());
+ *retNode = static_cast<const LeafNodeType &>(*node);
+ assert(retNode->getFrozen());
+ retNode->unFreeze();
+ holdNode(nodeRef, node);
+ return std::make_pair(retNodeRef, retNode);
+}
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+BTreeNode::Ref
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+thawNode(BTreeNode::Ref node)
+{
+ if (isLeafRef(node))
+ return thawNode(node, mapLeafRef(node)).first;
+ else
+ return thawNode(node, mapInternalRef(node)).first;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+void
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+holdNode(BTreeNode::Ref nodeRef,
+ InternalNodeType *node)
+{
+ if (node->getFrozen()) {
+ _nodeStore.holdElem(nodeRef);
+ } else {
+ node->clean();
+ _internalHoldUntilFreeze.push_back(nodeRef);
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+void
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+holdNode(BTreeNode::Ref nodeRef,
+ LeafNodeType *node)
+{
+ if (node->getFrozen()) {
+ _nodeStore.holdElem(nodeRef);
+ } else {
+ node->clean();
+ _leafHoldUntilFreeze.push_back(nodeRef);
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+void
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+freeze(void)
+{
+ // Freeze nodes.
+
+ if (!_internalToFreeze.empty() || !_leafToFreeze.empty()) {
+ {
+ for (auto &i : _internalToFreeze) {
+ assert(i.valid());
+ mapInternalRef(i)->freeze();
+ }
+ _internalToFreeze.clear();
+ }
+ {
+ for (auto &i : _leafToFreeze) {
+ assert(i.valid());
+ mapLeafRef(i)->freeze();
+ }
+ _leafToFreeze.clear();
+ }
+
+ // Tree node freezes must be visible before tree freezes to
+ // ensure that readers see a frozen world
+ std::atomic_thread_fence(std::memory_order_release);
+ }
+
+ // Freeze trees.
+
+ if (!_treeToFreeze.empty()) {
+ for (auto &i : _treeToFreeze) {
+ i->freeze(*this);
+ }
+ _treeToFreeze.clear();
+ // Tree freezes must be visible before held nodes are freed
+ std::atomic_thread_fence(std::memory_order_release);
+ }
+
+
+ // Free nodes that were only held due to freezing.
+
+ {
+ for (auto &i : _internalHoldUntilFreeze) {
+ assert(!isLeafRef(i));
+ InternalNodeType *inode = mapInternalRef(i);
+ (void) inode;
+ assert(inode->getFrozen());
+ _nodeStore.freeElem(i);
+ }
+ _internalHoldUntilFreeze.clear();
+ }
+ {
+ for (auto &i : _leafHoldUntilFreeze) {
+ assert(isLeafRef(i));
+ LeafNodeType *lnode = mapLeafRef(i);
+ (void) lnode;
+ assert(lnode->getFrozen());
+ _nodeStore.freeElem(i);
+ }
+ _leafHoldUntilFreeze.clear();
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+void
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+needFreeze(BTreeRootBaseType *tree)
+{
+ _treeToFreeze.push_back(tree);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+void
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+trimHoldLists(generation_t usedGen)
+{
+ _nodeStore.trimHoldLists(usedGen);
+}
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+void
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+transferHoldLists(generation_t generation)
+{
+ _nodeStore.transferHoldLists(generation);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+void
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+clearHoldLists(void)
+{
+ _nodeStore.clearHoldLists();
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+typename BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+InternalNodeTypeRefPair
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+moveInternalNode(const InternalNodeType *node)
+{
+ InternalNodeTypeRefPair iPair;
+ iPair = _nodeStore.allocNewInternalNodeCopy(*node);
+ assert(iPair.first.valid());
+ _internalToFreeze.push_back(iPair.first);
+ return iPair;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+typename BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+LeafNodeTypeRefPair
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+moveLeafNode(const LeafNodeType *node)
+{
+ LeafNodeTypeRefPair lPair;
+ lPair = _nodeStore.allocNewLeafNodeCopy(*node);
+ _leafToFreeze.push_back(lPair.first);
+ return lPair;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+uint32_t
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+validLeaves(BTreeNode::Ref ref) const
+{
+ if (isLeafRef(ref))
+ return mapLeafRef(ref)->validSlots();
+ else
+ return mapInternalRef(ref)->validLeaves();
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+uint32_t
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+getLevel(BTreeNode::Ref ref) const
+{
+ if (isLeafRef(ref))
+ return BTreeNode::LEAF_LEVEL;
+ else
+ return mapInternalRef(ref)->getLevel();
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+const KeyT &
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+getLastKey(BTreeNode::Ref node) const
+{
+ if (isLeafRef(node))
+ return mapLeafRef(node)->getLastKey();
+ else
+ return mapInternalRef(node)->getLastKey();
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+const AggrT &
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+getAggregated(BTreeNode::Ref node) const
+{
+ if (!node.valid())
+ return LeafNodeType::getEmptyAggregated();
+ if (isLeafRef(node))
+ return mapLeafRef(node)->getAggregated();
+ else
+ return mapInternalRef(node)->getAggregated();
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+MemoryUsage
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+getMemoryUsage() const
+{
+ MemoryUsage usage = _nodeStore.getMemoryUsage();
+ return usage;
+}
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+vespalib::string
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+toString(BTreeNode::Ref ref) const
+{
+ if (!isValidRef(ref)) {
+ return "NULL";
+ }
+ if (isLeafRef(ref))
+ return toString(mapLeafRef(ref));
+ else
+ return toString(mapInternalRef(ref));
+}
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+vespalib::string
+BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+toString(const BTreeNode * node) const
+{
+ if (node == NULL) {
+ return "NULL";
+ }
+ vespalib::asciistream ss;
+ if (node->isLeaf()) {
+ const LeafNodeType * lnode = static_cast<const LeafNodeType *>(node);
+ ss << "L: keys(" << lnode->validSlots() << ")[";
+ for (uint32_t i = 0; i < lnode->validSlots(); ++i) {
+ if (i > 0) ss << ",";
+ ss << lnode->getKey(i);
+ }
+ ss << "]";
+ } else {
+ const InternalNodeType * inode =
+ static_cast<const InternalNodeType *>(node);
+ ss << "I: validLeaves(" << inode->validLeaves() <<
+ "), keys(" << inode->validSlots() << ")[";
+ for (uint32_t i = 0; i < inode->validSlots(); ++i) {
+ if (i > 0) ss << ",";
+ ss << inode->getKey(i);
+ }
+ ss << "]";
+ }
+ return ss.str();
+}
+
+
+} // namespace btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreenodestore.cpp b/searchlib/src/vespa/searchlib/btree/btreenodestore.cpp
new file mode 100644
index 00000000000..07a2f60fe54
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreenodestore.cpp
@@ -0,0 +1,117 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "btreenodestore.hpp"
+#include "datastore.h"
+#include "btreenode.h"
+#include "btreerootbase.h"
+#include "btreeroot.h"
+#include "btreenodeallocator.h"
+
+namespace search
+{
+
+namespace btree
+{
+
+template class BTreeNodeStore<uint32_t, uint32_t,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+template class BTreeNodeStore<uint32_t, BTreeNoLeafData,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+template class BTreeNodeStore<uint32_t, int32_t,
+ MinMaxAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+typedef EntryRefT<22> MyRef;
+
+typedef BTreeNodeStore<uint32_t, uint32_t, NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS> MyNodeStore1;
+typedef BTreeNodeStore<uint32_t, BTreeNoLeafData, NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS> MyNodeStore2;
+typedef BTreeNodeStore<uint32_t, int32_t, MinMaxAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS> MyNodeStore3;
+
+typedef BTreeLeafNode<uint32_t, uint32_t, NoAggregated> MyEntry1;
+typedef BTreeLeafNode<uint32_t, BTreeNoLeafData, NoAggregated> MyEntry2;
+typedef BTreeInternalNode<uint32_t, NoAggregated> MyEntry4;
+typedef BTreeLeafNode<uint32_t, int32_t, MinMaxAggregated> MyEntry5;
+typedef BTreeInternalNode<uint32_t, MinMaxAggregated> MyEntry6;
+
+template
+std::pair<MyRef, MyEntry1 *>
+DataStoreT<MyRef>::allocNewEntryCopy<MyEntry1>(uint32_t, const MyEntry1 &);
+
+template
+std::pair<MyRef, MyEntry2 *>
+DataStoreT<MyRef>::allocNewEntryCopy<MyEntry2>(uint32_t, const MyEntry2 &);
+
+template
+std::pair<MyRef, MyEntry4 *>
+DataStoreT<MyRef>::allocNewEntryCopy<MyEntry4>(uint32_t, const MyEntry4 &);
+
+template
+std::pair<MyRef, MyEntry5 *>
+DataStoreT<MyRef>::allocNewEntryCopy<MyEntry5>(uint32_t, const MyEntry5 &);
+
+template
+std::pair<MyRef, MyEntry6 *>
+DataStoreT<MyRef>::allocNewEntryCopy<MyEntry6>(uint32_t, const MyEntry6 &);
+
+template
+std::pair<MyRef, MyEntry1 *>
+DataStoreT<MyRef>::allocEntry<MyEntry1, BTreeNodeReclaimer>(uint32_t);
+
+template
+std::pair<MyRef, MyEntry2 *>
+DataStoreT<MyRef>::allocEntry<MyEntry2, BTreeNodeReclaimer>(uint32_t);
+
+template
+std::pair<MyRef, MyEntry4 *>
+DataStoreT<MyRef>::allocEntry<MyEntry4, BTreeNodeReclaimer>(uint32_t);
+
+template
+std::pair<MyRef, MyEntry5 *>
+DataStoreT<MyRef>::allocEntry<MyEntry5, BTreeNodeReclaimer>(uint32_t);
+
+template
+std::pair<MyRef, MyEntry6 *>
+DataStoreT<MyRef>::allocEntry<MyEntry6, BTreeNodeReclaimer>(uint32_t);
+
+template
+std::pair<MyRef, MyEntry1 *>
+DataStoreT<MyRef>::allocEntryCopy<MyEntry1, BTreeNodeReclaimer>(
+ uint32_t, const MyEntry1 &);
+
+template
+std::pair<MyRef, MyEntry2 *>
+DataStoreT<MyRef>::allocEntryCopy<MyEntry2, BTreeNodeReclaimer>(
+ uint32_t, const MyEntry2 &);
+
+template
+std::pair<MyRef, MyEntry4 *>
+DataStoreT<MyRef>::allocEntryCopy<MyEntry4, BTreeNodeReclaimer>(
+ uint32_t, const MyEntry4 &);
+
+template
+std::pair<MyRef, MyEntry5 *>
+DataStoreT<MyRef>::allocEntryCopy<MyEntry5, BTreeNodeReclaimer>(
+ uint32_t, const MyEntry5 &);
+
+
+template
+std::pair<MyRef, MyEntry6 *>
+DataStoreT<MyRef>::allocEntryCopy<MyEntry6, BTreeNodeReclaimer>(
+ uint32_t, const MyEntry6 &);
+
+
+
+} // namespace btree
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/btree/btreenodestore.h b/searchlib/src/vespa/searchlib/btree/btreenodestore.h
new file mode 100644
index 00000000000..1786f3996d7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreenodestore.h
@@ -0,0 +1,399 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "datastore.h"
+#include "btreetraits.h"
+
+namespace search
+{
+
+namespace btree
+{
+
+class BTreeNodeReclaimer
+{
+public:
+ static void reclaim(BTreeNode * node)
+ {
+ node->unFreeze();
+ }
+};
+
+
+template <typename EntryType>
+class BTreeNodeBufferType : public BufferType<EntryType>
+{
+ typedef BufferType<EntryType> ParentType;
+ using ParentType::_emptyEntry;
+ using ParentType::_clusterSize;
+public:
+ BTreeNodeBufferType(uint32_t minClusters,
+ uint32_t maxClusters)
+ : ParentType(1, minClusters, maxClusters)
+ {
+ _emptyEntry.freeze();
+ }
+
+ virtual void
+ cleanInitialElements(void *buffer);
+
+ virtual void
+ cleanHold(void *buffer, uint64_t offset, uint64_t len);
+};
+
+
+template <typename KeyT,
+ typename DataT,
+ typename AggrT,
+ size_t INTERNAL_SLOTS,
+ size_t LEAF_SLOTS>
+class BTreeNodeStore
+{
+public:
+ typedef DataStoreT<EntryRefT<22> > DataStoreType;
+ typedef DataStoreType::RefType RefType;
+ typedef BTreeInternalNode<KeyT, AggrT, INTERNAL_SLOTS> InternalNodeType;
+ typedef BTreeLeafNode<KeyT, DataT, AggrT, LEAF_SLOTS> LeafNodeType;
+ typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair;
+ typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair;
+ typedef vespalib::GenerationHandler::generation_t generation_t;
+
+ enum NodeTypes
+ {
+ NODETYPE_INTERNAL = 0,
+ NODETYPE_LEAF = 1
+ };
+
+
+private:
+ static constexpr size_t MIN_CLUSTERS = 128u;
+ DataStoreType _store;
+ BTreeNodeBufferType<InternalNodeType> _internalNodeType;
+ BTreeNodeBufferType<LeafNodeType> _leafNodeType;
+
+public:
+ BTreeNodeStore(void);
+
+ ~BTreeNodeStore(void);
+
+ void
+ disableFreeLists() {
+ _store.disableFreeLists();
+ }
+
+ void
+ disableElemHoldList()
+ {
+ _store.disableElemHoldList();
+ }
+
+ static bool
+ isValidRef(EntryRef ref)
+ {
+ return ref.valid();
+ }
+
+ bool
+ isLeafRef(EntryRef ref) const
+ {
+ RefType iRef(ref);
+ return _store.getTypeId(iRef.bufferId()) == NODETYPE_LEAF;
+ }
+
+ const InternalNodeType *
+ mapInternalRef(EntryRef ref) const
+ {
+ RefType iRef(ref);
+ return _store.getBufferEntry<InternalNodeType>(iRef.bufferId(),
+ iRef.offset());
+ }
+
+ InternalNodeType *
+ mapInternalRef(EntryRef ref)
+ {
+ RefType iRef(ref);
+ return _store.getBufferEntry<InternalNodeType>(iRef.bufferId(),
+ iRef.offset());
+ }
+
+ const LeafNodeType *
+ mapLeafRef(EntryRef ref) const
+ {
+ RefType iRef(ref);
+ return _store.getBufferEntry<LeafNodeType>(iRef.bufferId(),
+ iRef.offset());
+ }
+
+ LeafNodeType *
+ mapLeafRef(EntryRef ref)
+ {
+ RefType iRef(ref);
+ return _store.getBufferEntry<LeafNodeType>(iRef.bufferId(),
+ iRef.offset());
+ }
+
+ template <typename NodeType>
+ const NodeType *
+ mapRef(EntryRef ref) const
+ {
+ RefType iRef(ref);
+ return _store.getBufferEntry<NodeType>(iRef.bufferId(),
+ iRef.offset());
+ }
+
+ template <typename NodeType>
+ NodeType *
+ mapRef(EntryRef ref)
+ {
+ RefType iRef(ref);
+ return _store.getBufferEntry<NodeType>(iRef.bufferId(),
+ iRef.offset());
+ }
+
+ LeafNodeTypeRefPair
+ allocNewLeafNode(void) {
+ return _store.allocNewEntry<LeafNodeType>(NODETYPE_LEAF);
+ }
+
+ LeafNodeTypeRefPair
+ allocLeafNode(void) {
+ return _store.allocEntry<LeafNodeType, BTreeNodeReclaimer>(NODETYPE_LEAF);
+ }
+
+ LeafNodeTypeRefPair
+ allocNewLeafNodeCopy(const LeafNodeType &rhs) {
+ return _store.allocNewEntryCopy<LeafNodeType>(NODETYPE_LEAF, rhs);
+ }
+
+ LeafNodeTypeRefPair
+ allocLeafNodeCopy(const LeafNodeType &rhs) {
+ return _store.allocEntryCopy<LeafNodeType, BTreeNodeReclaimer>(NODETYPE_LEAF, rhs);
+ }
+
+ InternalNodeTypeRefPair
+ allocNewInternalNode(void) {
+ return _store.allocNewEntry<InternalNodeType>(NODETYPE_INTERNAL);
+ }
+
+ InternalNodeTypeRefPair
+ allocInternalNode(void) {
+ return _store.allocEntry<InternalNodeType, BTreeNodeReclaimer>(NODETYPE_INTERNAL);
+ }
+
+ InternalNodeTypeRefPair
+ allocNewInternalNodeCopy(const InternalNodeType &rhs) {
+ return _store.allocNewEntryCopy<InternalNodeType>(NODETYPE_INTERNAL, rhs);
+ }
+
+ InternalNodeTypeRefPair
+ allocInternalNodeCopy(const InternalNodeType &rhs) {
+ return _store.allocEntryCopy<InternalNodeType, BTreeNodeReclaimer>(NODETYPE_INTERNAL, rhs);
+ }
+
+ void
+ holdElem(EntryRef ref)
+ {
+ _store.holdElem(ref, 1);
+ }
+
+ void
+ freeElem(EntryRef ref)
+ {
+ _store.freeElem(ref, 1);
+ }
+
+ std::vector<uint32_t>
+ startCompact(void);
+
+ void
+ finishCompact(const std::vector<uint32_t> &toHold);
+
+ void
+ transferHoldLists(generation_t generation)
+ {
+ _store.transferHoldLists(generation);
+ }
+
+ // Inherit doc from DataStoreBase
+ DataStoreBase::MemStats getMemStats() const {
+ return _store.getMemStats();
+ }
+
+ // Inherit doc from DataStoreBase
+ void
+ trimHoldLists(generation_t usedGen)
+ {
+ _store.trimHoldLists(usedGen);
+ }
+
+ void
+ clearHoldLists(void)
+ {
+ _store.clearHoldLists();
+ }
+
+ // Inherit doc from DataStoreBase
+ MemoryUsage getMemoryUsage() const {
+ return _store.getMemoryUsage();
+ }
+
+ // Inherit doc from DataStoreT
+ bool getCompacting(EntryRef ref) const {
+ return _store.getCompacting(ref);
+ }
+
+ template <typename FunctionType>
+ void
+ foreach_key(EntryRef ref, FunctionType func) const
+ {
+ if (!ref.valid())
+ return;
+ if (isLeafRef(ref)) {
+ mapLeafRef(ref)->foreach_key(func);
+ } else {
+ mapInternalRef(ref)->foreach_key(*this, func);
+ }
+ }
+
+ template <typename FunctionType>
+ void
+ foreach(EntryRef ref, FunctionType func) const
+ {
+ if (!ref.valid())
+ return;
+ if (isLeafRef(ref)) {
+ mapLeafRef(ref)->foreach(func);
+ } else {
+ mapInternalRef(ref)->foreach(*this, func);
+ }
+ }
+};
+
+extern template class BTreeNodeStore<uint32_t, uint32_t,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+extern template class BTreeNodeStore<uint32_t, BTreeNoLeafData,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+extern template class BTreeNodeStore<uint32_t, int32_t,
+ MinMaxAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+extern template
+std::pair<EntryRefT<22>, BTreeLeafNode<uint32_t, uint32_t, NoAggregated> *>
+DataStoreT<EntryRefT<22> >::
+allocNewEntryCopy<BTreeLeafNode<uint32_t,
+ uint32_t,
+ NoAggregated> >
+(uint32_t, const BTreeLeafNode<uint32_t, uint32_t, NoAggregated> &);
+
+extern template
+std::pair<EntryRefT<22>, BTreeLeafNode<uint32_t, BTreeNoLeafData,
+ NoAggregated> *>
+DataStoreT<EntryRefT<22> >::
+allocNewEntryCopy<BTreeLeafNode<uint32_t, BTreeNoLeafData, NoAggregated> >(
+ uint32_t,
+ const BTreeLeafNode<uint32_t, BTreeNoLeafData, NoAggregated> &);
+
+extern template
+std::pair<EntryRefT<22>, BTreeInternalNode<uint32_t, NoAggregated> *>
+DataStoreT<EntryRefT<22> >::
+allocNewEntryCopy<BTreeInternalNode<uint32_t, NoAggregated> >(
+ uint32_t, const BTreeInternalNode<uint32_t, NoAggregated> &);
+
+extern template
+std::pair<EntryRefT<22>, BTreeLeafNode<uint32_t, int32_t, MinMaxAggregated> *>
+DataStoreT<EntryRefT<22> >::
+allocNewEntryCopy<BTreeLeafNode<uint32_t,
+ int32_t,
+ MinMaxAggregated> >
+(uint32_t, const BTreeLeafNode<uint32_t, int32_t, MinMaxAggregated> &);
+
+extern template
+std::pair<EntryRefT<22>, BTreeInternalNode<uint32_t, MinMaxAggregated> *>
+DataStoreT<EntryRefT<22> >::
+allocNewEntryCopy<BTreeInternalNode<uint32_t, MinMaxAggregated> >(
+ uint32_t, const BTreeInternalNode<uint32_t, MinMaxAggregated> &);
+
+extern template
+std::pair<EntryRefT<22>, BTreeLeafNode<uint32_t, uint32_t, NoAggregated> *>
+DataStoreT<EntryRefT<22> >::
+allocEntry<BTreeLeafNode<uint32_t, uint32_t, NoAggregated>,
+ BTreeNodeReclaimer>(uint32_t);
+
+extern template
+std::pair<EntryRefT<22>, BTreeLeafNode<uint32_t, BTreeNoLeafData,
+ NoAggregated> *>
+DataStoreT<EntryRefT<22> >::
+allocEntry<BTreeLeafNode<uint32_t, BTreeNoLeafData, NoAggregated>,
+ BTreeNodeReclaimer>(uint32_t);
+
+extern template
+std::pair<EntryRefT<22>, BTreeInternalNode<uint32_t, NoAggregated> *>
+DataStoreT<EntryRefT<22> >::
+allocEntry<BTreeInternalNode<uint32_t, NoAggregated>,
+ BTreeNodeReclaimer>(uint32_t);
+
+extern template
+std::pair<EntryRefT<22>, BTreeLeafNode<uint32_t, int32_t, MinMaxAggregated> *>
+DataStoreT<EntryRefT<22> >::
+allocEntry<BTreeLeafNode<uint32_t, int32_t, MinMaxAggregated>,
+ BTreeNodeReclaimer>(uint32_t);
+
+extern template
+std::pair<EntryRefT<22>, BTreeInternalNode<uint32_t, MinMaxAggregated> *>
+DataStoreT<EntryRefT<22> >::
+allocEntry<BTreeInternalNode<uint32_t, MinMaxAggregated>,
+ BTreeNodeReclaimer>(uint32_t);
+
+extern template
+std::pair<EntryRefT<22>, BTreeLeafNode<uint32_t, uint32_t, NoAggregated> *>
+DataStoreT<EntryRefT<22> >::
+allocEntryCopy<BTreeLeafNode<uint32_t, uint32_t, NoAggregated>,
+ BTreeNodeReclaimer>(
+ uint32_t,
+ const BTreeLeafNode<uint32_t, uint32_t,
+ NoAggregated> &);
+
+extern template
+std::pair<EntryRefT<22>, BTreeLeafNode<uint32_t, BTreeNoLeafData,
+ NoAggregated> *>
+DataStoreT<EntryRefT<22> >::
+allocEntryCopy<BTreeLeafNode<uint32_t, BTreeNoLeafData, NoAggregated>,
+ BTreeNodeReclaimer>(
+ uint32_t,
+ const BTreeLeafNode<uint32_t, BTreeNoLeafData,
+ NoAggregated> &);
+
+extern template
+std::pair<EntryRefT<22>, BTreeInternalNode<uint32_t, NoAggregated> *>
+DataStoreT<EntryRefT<22> >::
+allocEntryCopy<BTreeInternalNode<uint32_t, NoAggregated>, BTreeNodeReclaimer>(
+ uint32_t, const BTreeInternalNode<uint32_t, NoAggregated> &);
+
+extern template
+std::pair<EntryRefT<22>, BTreeLeafNode<uint32_t, int32_t, MinMaxAggregated> *>
+DataStoreT<EntryRefT<22> >::
+allocEntryCopy<BTreeLeafNode<uint32_t, int32_t, MinMaxAggregated>,
+ BTreeNodeReclaimer>(
+ uint32_t,
+ const BTreeLeafNode<uint32_t, int32_t,
+ MinMaxAggregated> &);
+
+extern template
+std::pair<EntryRefT<22>, BTreeInternalNode<uint32_t, MinMaxAggregated> *>
+DataStoreT<EntryRefT<22> >::
+allocEntryCopy<BTreeInternalNode<uint32_t, MinMaxAggregated>,
+ BTreeNodeReclaimer>(
+ uint32_t, const BTreeInternalNode<uint32_t, MinMaxAggregated> &);
+
+
+} // namespace btree
+
+} // namespace search
+
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreenodestore.hpp b/searchlib/src/vespa/searchlib/btree/btreenodestore.hpp
new file mode 100644
index 00000000000..8ba828aa07f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreenodestore.hpp
@@ -0,0 +1,98 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreenodestore.h"
+#include "datastore.hpp"
+
+namespace search
+{
+
+namespace btree
+{
+
+template <typename EntryType>
+void
+BTreeNodeBufferType<EntryType>::cleanInitialElements(void *buffer)
+{
+ ParentType::cleanInitialElements(buffer);
+ EntryType *e = static_cast<EntryType *>(buffer);
+ for (size_t j = _clusterSize; j != 0; --j) {
+ e->freeze();
+ ++e;
+ }
+}
+
+
+template <typename EntryType>
+void
+BTreeNodeBufferType<EntryType>::cleanHold(void *buffer,
+ uint64_t offset,
+ uint64_t len)
+{
+ EntryType *e = static_cast<EntryType *>(buffer) + offset;
+ for (size_t j = len; j != 0; --j) {
+ e->cleanFrozen();
+ ++e;
+ }
+}
+
+
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+BTreeNodeStore<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+BTreeNodeStore(void)
+ : _store(),
+ _internalNodeType(MIN_CLUSTERS, RefType::offsetSize()),
+ _leafNodeType(MIN_CLUSTERS, RefType::offsetSize())
+{
+ _store.addType(&_internalNodeType);
+ _store.addType(&_leafNodeType);
+ _store.initActiveBuffers();
+ _store.enableFreeLists();
+}
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+BTreeNodeStore<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+~BTreeNodeStore(void)
+{
+ _store.dropBuffers(); // Drop buffers before type handlers are dropped
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+std::vector<uint32_t>
+BTreeNodeStore<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+startCompact(void)
+{
+ std::vector<uint32_t> iToHold =
+ _store.startCompact(NODETYPE_INTERNAL);
+ std::vector<uint32_t> lToHold =
+ _store.startCompact(NODETYPE_LEAF);
+ std::vector<uint32_t> ret = iToHold;
+ for (std::vector<uint32_t>::const_iterator
+ i = lToHold.begin(), ie = lToHold.end(); i != ie; ++i)
+ ret.push_back(*i);
+ return ret;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+void
+BTreeNodeStore<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+finishCompact(const std::vector<uint32_t> &toHold)
+{
+ _store.finishCompact(toHold);
+}
+
+
+} // namespace btree
+
+} // namespace search
+
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreeremover.cpp b/searchlib/src/vespa/searchlib/btree/btreeremover.cpp
new file mode 100644
index 00000000000..a8dceb35dac
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreeremover.cpp
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "btreeremover.h"
+#include "btreenodeallocator.h"
+#include "btreerootbase.hpp"
+#include "btreeremover.hpp"
+#include "btreenode.hpp"
+
+namespace search
+{
+
+namespace btree
+{
+
+template class BTreeRemover<uint32_t, uint32_t, NoAggregated>;
+template class BTreeRemover<uint32_t, BTreeNoLeafData, NoAggregated>;
+template class BTreeRemover<uint32_t, int32_t, MinMaxAggregated,
+ std::less<uint32_t>,
+ BTreeDefaultTraits,
+ MinMaxAggrCalc>;
+
+} // namespace btree
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/btree/btreeremover.h b/searchlib/src/vespa/searchlib/btree/btreeremover.h
new file mode 100644
index 00000000000..bc78a6be3a9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreeremover.h
@@ -0,0 +1,104 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreenode.h"
+#include "btreenodeallocator.h"
+#include "btreerootbase.h"
+#include "btreeaggregator.h"
+#include "noaggrcalc.h"
+#include "minmaxaggrcalc.h"
+#include "btreeiterator.h"
+
+namespace search
+{
+
+namespace btree
+{
+
+template <typename KeyT,
+ typename DataT,
+ typename AggrT,
+ size_t INTERNAL_SLOTS,
+ size_t LEAF_SLOTS,
+ class AggrCalcT>
+class BTreeRemoverBase
+{
+public:
+ typedef BTreeNodeAllocator<KeyT, DataT, AggrT,
+ INTERNAL_SLOTS,
+ LEAF_SLOTS> NodeAllocatorType;
+ typedef BTreeAggregator<KeyT, DataT, AggrT,
+ INTERNAL_SLOTS,
+ LEAF_SLOTS,
+ AggrCalcT> Aggregator;
+ typedef BTreeInternalNode<KeyT, AggrT, INTERNAL_SLOTS> InternalNodeType;
+ typedef BTreeLeafNode<KeyT, DataT, AggrT, LEAF_SLOTS> LeafNodeType;
+ typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair;
+ typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair;
+
+ template <typename NodeType, typename NodeTypeRefPair,
+ class Iterator>
+ static void
+ steal(InternalNodeType *pNode,
+ BTreeNode::Ref sNodeRef,
+ NodeType *sNode,
+ uint32_t idx,
+ NodeAllocatorType &allocator,
+ const AggrCalcT &aggrCalc,
+ Iterator &itr,
+ uint32_t level);
+};
+
+template <typename KeyT,
+ typename DataT,
+ typename AggrT,
+ typename CompareT = std::less<KeyT>,
+ typename TraitsT = BTreeDefaultTraits,
+ class AggrCalcT = NoAggrCalc>
+class BTreeRemover : public BTreeRemoverBase<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS,
+ AggrCalcT>
+
+{
+public:
+ typedef BTreeRemoverBase<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS,
+ AggrCalcT> ParentType;
+ typedef BTreeNodeAllocator<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS> NodeAllocatorType;
+ typedef BTreeAggregator<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS,
+ AggrCalcT> Aggregator;
+ typedef BTreeInternalNode<KeyT, AggrT, TraitsT::INTERNAL_SLOTS>
+ InternalNodeType;
+ typedef BTreeLeafNode<KeyT, DataT, AggrT, TraitsT::LEAF_SLOTS>
+ LeafNodeType;
+ typedef KeyT KeyType;
+ typedef DataT DataType;
+ typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair;
+ typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair;
+ typedef BTreeIterator<KeyT, DataT, AggrT,
+ CompareT, TraitsT> Iterator;
+
+ static void
+ remove(BTreeNode::Ref &root,
+ Iterator &itr,
+ const AggrCalcT &aggrCalc);
+};
+
+extern template class BTreeRemover<uint32_t, uint32_t, NoAggregated>;
+extern template class BTreeRemover<uint32_t, BTreeNoLeafData, NoAggregated>;
+extern template class BTreeRemover<uint32_t, int32_t,
+ MinMaxAggregated,
+ std::less<uint32_t>,
+ BTreeDefaultTraits,
+ MinMaxAggrCalc>;
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreeremover.hpp b/searchlib/src/vespa/searchlib/btree/btreeremover.hpp
new file mode 100644
index 00000000000..864a2833993
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreeremover.hpp
@@ -0,0 +1,185 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreeremover.h"
+#include "btreerootbase.hpp"
+#include <vespa/vespalib/stllike/asciistream.h>
+
+namespace search
+{
+
+namespace btree
+{
+
+template <typename KeyT, typename DataT, typename AggrT, size_t INTERNAL_SLOTS,
+ size_t LEAF_SLOTS, class AggrCalcT>
+template <typename NodeType, typename NodeTypeRefPair, class Iterator>
+void
+BTreeRemoverBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS, AggrCalcT>::
+steal(InternalNodeType *pNode,
+ BTreeNode::Ref sNodeRef,
+ NodeType * sNode, uint32_t idx, NodeAllocatorType &allocator,
+ const AggrCalcT &aggrCalc,
+ Iterator &itr,
+ uint32_t level)
+{
+ BTreeNode::Ref leftVictimRef = BTreeNode::Ref();
+ NodeType * leftVictim = NULL;
+ BTreeNode::Ref rightVictimRef = BTreeNode::Ref();
+ NodeType * rightVictim = NULL;
+ if (idx > 0) {
+ leftVictimRef = pNode->getChild(idx - 1);
+ leftVictim = allocator.template mapRef<NodeType>(leftVictimRef);
+ }
+ if (idx + 1 < pNode->validSlots()) {
+ rightVictimRef = pNode->getChild(idx + 1);
+ rightVictim = allocator.template mapRef<NodeType>(rightVictimRef);
+ }
+ if (leftVictim != NULL &&
+ leftVictim->validSlots() + sNode->validSlots() <=
+ NodeType::maxSlots())
+ {
+ uint32_t stolen = leftVictim->validSlots();
+ sNode->stealAllFromLeftNode(leftVictim);
+ pNode->update(idx, sNode->getLastKey(), sNodeRef);
+ pNode->remove(idx - 1);
+ allocator.holdNode(leftVictimRef, leftVictim);
+ itr.adjustSteal(level, true, stolen);
+ } else if (rightVictim != NULL &&
+ rightVictim->validSlots() + sNode->validSlots() <=
+ NodeType::maxSlots())
+ {
+ sNode->stealAllFromRightNode(rightVictim);
+ pNode->update(idx, sNode->getLastKey(), sNodeRef);
+ pNode->remove(idx + 1);
+ allocator.holdNode(rightVictimRef, rightVictim);
+ } else if (leftVictim != NULL &&
+ (rightVictim == NULL ||
+ leftVictim->validSlots() > rightVictim->validSlots()))
+ {
+ if (leftVictim->getFrozen()) {
+ NodeTypeRefPair thawed =
+ allocator.thawNode(leftVictimRef, leftVictim);
+ leftVictimRef = thawed.first;
+ leftVictim = thawed.second;
+ }
+ uint32_t oldLeftValid = leftVictim->validSlots();
+ sNode->stealSomeFromLeftNode(leftVictim, allocator);
+ uint32_t stolen = oldLeftValid - leftVictim->validSlots();
+ pNode->update(idx, sNode->getLastKey(), sNodeRef);
+ pNode->update(idx - 1, leftVictim->getLastKey(), leftVictimRef);
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*leftVictim, allocator, aggrCalc);
+ }
+ itr.adjustSteal(level, false, stolen);
+ } else if (rightVictim != NULL) {
+ if (rightVictim->getFrozen()) {
+ NodeTypeRefPair thawed =
+ allocator.thawNode(rightVictimRef, rightVictim);
+ rightVictimRef = thawed.first;
+ rightVictim = thawed.second;
+ }
+ sNode->stealSomeFromRightNode(rightVictim, allocator);
+ pNode->update(idx, sNode->getLastKey(), sNodeRef);
+ pNode->update(idx + 1, rightVictim->getLastKey(), rightVictimRef);
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*rightVictim, allocator, aggrCalc);
+ }
+ }
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*sNode, allocator, aggrCalc);
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, class AggrCalcT>
+void
+BTreeRemover<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+remove(BTreeNode::Ref &root,
+ Iterator &itr,
+ const AggrCalcT &aggrCalc)
+{
+ assert(itr.valid());
+ root = itr.thaw(root);
+
+ uint32_t idx = itr.getLeafNodeIdx();
+ LeafNodeType * lnode = itr.getLeafNode();
+ if (lnode->validSlots() == 1u) {
+ itr.removeLast(root);
+ root = BTreeNode::Ref();
+ return;
+ }
+ NodeAllocatorType &allocator(itr.getAllocator());
+ AggrT oldca(AggrCalcT::hasAggregated() ? lnode->getAggregated() : AggrT());
+ AggrT ca;
+ if (AggrCalcT::hasAggregated() &&
+ aggrCalc.remove(lnode->getAggregated(),
+ aggrCalc.getVal(lnode->getData(idx)))) {
+ lnode->remove(idx);
+ Aggregator::recalc(*lnode, aggrCalc);
+ } else {
+ lnode->remove(idx);
+ }
+ if (AggrCalcT::hasAggregated()) {
+ ca = lnode->getAggregated();
+ }
+ bool steppedBack = idx >= lnode->validSlots();
+ if (steppedBack) {
+ itr.setLeafNodeIdx(itr.getLeafNodeIdx() - 1);
+ --idx;
+ }
+ uint32_t level = 0;
+ uint32_t levels = itr.getPathSize();
+ InternalNodeType *node;
+ for (; level < levels; ++level) {
+ typename Iterator::PathElement &pe = itr.getPath(level);
+ node = pe.getWNode();
+ idx = pe.getIdx();
+ AggrT olda(AggrCalcT::hasAggregated() ?
+ node->getAggregated() : AggrT());
+ BTreeNode::Ref subNode = node->getChild(idx);
+ node->update(idx, allocator.getLastKey(subNode), subNode);
+ node->decValidLeaves(1);
+ if (level == 0) {
+ LeafNodeType * sNode = allocator.mapLeafRef(subNode);
+ assert(sNode == lnode);
+ if (!sNode->isAtLeastHalfFull()) {
+ // too few elements in sub node, steal from left or
+ // right sibling
+ ParentType::template steal<LeafNodeType,
+ LeafNodeTypeRefPair>
+ (node, subNode, sNode, idx, allocator, aggrCalc,
+ itr, level);
+ }
+ } else {
+ InternalNodeType * sNode = allocator.mapInternalRef(subNode);
+ if (!sNode->isAtLeastHalfFull()) {
+ // too few elements in sub node, steal from left or
+ // right sibling
+ ParentType::template steal<InternalNodeType,
+ InternalNodeTypeRefPair>
+ (node, subNode, sNode, idx, allocator, aggrCalc,
+ itr, level);
+ }
+ }
+ if (AggrCalcT::hasAggregated()) {
+ if (aggrCalc.remove(node->getAggregated(), oldca, ca)) {
+ Aggregator::recalc(*node, allocator, aggrCalc);
+ }
+ ca = node->getAggregated();
+ oldca = olda;
+ }
+ }
+ if (level > 0 && node->validSlots() == 1) {
+ root = itr.removeLevel(root, node);
+ }
+ if (steppedBack)
+ ++itr;
+}
+
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreeroot.cpp b/searchlib/src/vespa/searchlib/btree/btreeroot.cpp
new file mode 100644
index 00000000000..fdfa0834b95
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreeroot.cpp
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "btreeroot.h"
+#include "btreenodeallocator.h"
+#include "btreeiterator.hpp"
+#include "btreeroot.hpp"
+#include "btreenode.hpp"
+
+namespace search
+{
+
+namespace btree
+{
+
+template class BTreeRootT<uint32_t, uint32_t, NoAggregated>;
+template class BTreeRootT<uint32_t, BTreeNoLeafData, NoAggregated>;
+template class BTreeRootT<uint32_t, int32_t, MinMaxAggregated>;
+template class BTreeRoot<uint32_t, uint32_t, NoAggregated>;
+template class BTreeRoot<uint32_t, BTreeNoLeafData, NoAggregated>;
+template class BTreeRoot<uint32_t, int32_t, MinMaxAggregated,
+ std::less<uint32_t>,
+ BTreeDefaultTraits, MinMaxAggrCalc>;
+
+} // namespace btree
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/btree/btreeroot.h b/searchlib/src/vespa/searchlib/btree/btreeroot.h
new file mode 100644
index 00000000000..b06050904f1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreeroot.h
@@ -0,0 +1,253 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreeiterator.h"
+#include "btreenode.h"
+#include "btreenodeallocator.h"
+#include "btreerootbase.h"
+#include "noaggrcalc.h"
+#include "minmaxaggrcalc.h"
+
+namespace search {
+namespace btree {
+
+template <typename, typename, typename, size_t, size_t>
+class BTreeNodeAllocator;
+template <typename, typename, typename, size_t, size_t, class> class
+BTreeBuilder;
+template <typename, typename, typename, size_t, size_t, class> class
+BTreeAggregator;
+
+template <typename KeyT,
+ typename DataT,
+ typename AggrT = NoAggregated,
+ typename CompareT = std::less<KeyT>,
+ typename TraitsT = BTreeDefaultTraits>
+class BTreeRootT : public BTreeRootBase<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS>
+{
+public:
+ typedef BTreeRootBase<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS, TraitsT::LEAF_SLOTS> ParentType;
+ typedef typename ParentType::NodeAllocatorType NodeAllocatorType;
+ typedef BTreeKeyData<KeyT, DataT> KeyDataType;
+ typedef typename ParentType::InternalNodeType InternalNodeType;
+ typedef typename ParentType::LeafNodeType LeafNodeType;
+ typedef BTreeLeafNodeTemp<KeyT, DataT, AggrT, TraitsT::LEAF_SLOTS>
+ LeafNodeTempType;
+ typedef BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT> Iterator;
+ typedef BTreeConstIterator<KeyT, DataT, AggrT, CompareT, TraitsT>
+ ConstIterator;
+
+ typedef typename ParentType::KeyType KeyType;
+ typedef typename ParentType::DataType DataType;
+protected:
+ typedef typename ParentType::BTreeRootBaseType BTreeRootBaseType;
+ typedef BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT> BTreeRootTType;
+ typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair;
+ typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair;
+ using ParentType::_root;
+ using ParentType::getFrozenRoot;
+ using ParentType::getFrozenRootRelaxed;
+ using ParentType::isFrozen;
+
+ vespalib::string toString(BTreeNode::Ref node, const NodeAllocatorType &allocator) const;
+ bool isValid(BTreeNode::Ref node, bool ignoreMinSlots, uint32_t level,
+ const NodeAllocatorType &allocator, CompareT comp) const;
+
+public:
+ /**
+ * Read view of the frozen version of the tree.
+ * Should be used by reader threads.
+ **/
+ class FrozenView {
+ private:
+ BTreeNode::Ref _frozenRoot;
+ const NodeAllocatorType & _allocator;
+ public:
+ typedef ConstIterator Iterator;
+ FrozenView(BTreeNode::Ref frozenRoot,
+ const NodeAllocatorType & allocator);
+ ConstIterator find(const KeyType& key,
+ CompareT comp = CompareT()) const;
+ ConstIterator lowerBound(const KeyType &key,
+ CompareT comp = CompareT()) const;
+ ConstIterator upperBound(const KeyType &key,
+ CompareT comp = CompareT()) const;
+ ConstIterator begin() const {
+ return ConstIterator(_frozenRoot, _allocator);
+ }
+ void begin(std::vector<ConstIterator> &where) const {
+ where.emplace_back(_frozenRoot, _allocator);
+ }
+
+ BTreeNode::Ref
+ getRoot(void) const
+ {
+ return _frozenRoot;
+ }
+
+ size_t
+ size(void) const;
+
+ const NodeAllocatorType &
+ getAllocator(void) const
+ {
+ return _allocator;
+ }
+
+ template <typename FunctionType>
+ void
+ foreach_key(FunctionType func) const {
+ _allocator.getNodeStore().foreach_key(_frozenRoot, func);
+ }
+
+ template <typename FunctionType>
+ void
+ foreach(FunctionType func) const {
+ _allocator.getNodeStore().foreach(_frozenRoot, func);
+ }
+ };
+
+private:
+
+ static Iterator findHelper(BTreeNode::Ref root, const KeyType & key,
+ const NodeAllocatorType & allocator, CompareT comp = CompareT());
+
+ static Iterator lowerBoundHelper(BTreeNode::Ref root, const KeyType & key,
+ const NodeAllocatorType & allocator, CompareT comp = CompareT());
+
+ static Iterator upperBoundHelper(BTreeNode::Ref root, const KeyType & key,
+ const NodeAllocatorType & allocator, CompareT comp = CompareT());
+
+public:
+ BTreeRootT();
+ ~BTreeRootT();
+
+ void
+ clear(NodeAllocatorType &allocator);
+
+ Iterator
+ find(const KeyType & key, const NodeAllocatorType &allocator,
+ CompareT comp = CompareT()) const;
+
+ Iterator
+ lowerBound(const KeyType & key, const NodeAllocatorType & allocator,
+ CompareT comp = CompareT()) const;
+
+ Iterator
+ upperBound(const KeyType & key, const NodeAllocatorType & allocator,
+ CompareT comp = CompareT()) const;
+
+ Iterator begin(const NodeAllocatorType &allocator) const {
+ return Iterator(_root, allocator);
+ }
+
+ FrozenView getFrozenView(const NodeAllocatorType & allocator) const {
+ return FrozenView(getFrozenRoot(), allocator);
+ }
+
+ size_t
+ size(const NodeAllocatorType &allocator) const;
+
+ size_t
+ frozenSize(const NodeAllocatorType &allocator) const;
+
+ vespalib::string toString(const NodeAllocatorType &allocator) const;
+
+ bool
+ isValid(const NodeAllocatorType &allocator, CompareT comp = CompareT()) const;
+
+ bool
+ isValidFrozen(const NodeAllocatorType &allocator, CompareT comp = CompareT()) const;
+
+ size_t
+ bitSize(const NodeAllocatorType &allocator) const;
+
+ size_t
+ bitSize(BTreeNode::Ref node, const NodeAllocatorType &allocator) const;
+
+ void
+ thaw(Iterator &itr);
+};
+
+
+template <typename KeyT,
+ typename DataT,
+ typename AggrT = NoAggregated,
+ typename CompareT = std::less<KeyT>,
+ typename TraitsT = BTreeDefaultTraits,
+ class AggrCalcT = NoAggrCalc>
+class BTreeRoot : public BTreeRootT<KeyT, DataT, AggrT,
+ CompareT, TraitsT>
+{
+public:
+ typedef BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT> ParentType;
+ typedef typename ParentType::ParentType Parent2Type;
+ typedef typename ParentType::NodeAllocatorType NodeAllocatorType;
+ typedef typename ParentType::KeyType KeyType;
+ typedef typename ParentType::DataType DataType;
+ typedef typename ParentType::LeafNodeType LeafNodeType;
+ typedef typename ParentType::InternalNodeType InternalNodeType;
+ typedef typename ParentType::LeafNodeTypeRefPair LeafNodeTypeRefPair;
+ typedef typename ParentType::InternalNodeTypeRefPair
+ InternalNodeTypeRefPair;
+ typedef typename ParentType::Iterator Iterator;
+ typedef BTreeBuilder<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS, TraitsT::LEAF_SLOTS,
+ AggrCalcT> Builder;
+ typedef BTreeAggregator<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS,
+ AggrCalcT> Aggregator;
+ typedef AggrCalcT AggrCalcType;
+ using Parent2Type::_root;
+ using Parent2Type::getFrozenRoot;
+ using Parent2Type::getFrozenRootRelaxed;
+ using Parent2Type::isFrozen;
+
+public:
+ /**
+ * Create a tree from a tree builder. This is a destructive
+ * assignment, old content of tree is destroyed and tree
+ * builder is emptied when tree grabs ownership of nodes.
+ */
+ void
+ assign(Builder &rhs, NodeAllocatorType &allocator);
+
+ bool
+ insert(const KeyType & key, const DataType & data,
+ NodeAllocatorType &allocator, CompareT comp = CompareT(),
+ const AggrCalcT &aggrCalc = AggrCalcT());
+
+ void
+ insert(Iterator &itr,
+ const KeyType &key, const DataType &data,
+ const AggrCalcT &aggrCalc = AggrCalcT());
+
+ bool
+ remove(const KeyType & key,
+ NodeAllocatorType &allocator, CompareT comp = CompareT(),
+ const AggrCalcT &aggrCalc = AggrCalcT());
+
+ void
+ remove(Iterator &itr,
+ const AggrCalcT &aggrCalc = AggrCalcT());
+};
+
+
+
+extern template class BTreeRootT<uint32_t, uint32_t, NoAggregated>;
+extern template class BTreeRootT<uint32_t, BTreeNoLeafData, NoAggregated>;
+extern template class BTreeRootT<uint32_t, int32_t, MinMaxAggregated>;
+extern template class BTreeRoot<uint32_t, uint32_t, NoAggregated>;
+extern template class BTreeRoot<uint32_t, BTreeNoLeafData, NoAggregated>;
+extern template class BTreeRoot<uint32_t, int32_t, MinMaxAggregated,
+ std::less<uint32_t>,
+ BTreeDefaultTraits, MinMaxAggrCalc>;
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreeroot.hpp b/searchlib/src/vespa/searchlib/btree/btreeroot.hpp
new file mode 100644
index 00000000000..6b39e142d28
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreeroot.hpp
@@ -0,0 +1,486 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreeroot.h"
+#include "btreebuilder.h"
+#include "btreerootbase.hpp"
+#include "btreeinserter.hpp"
+#include "btreeremover.hpp"
+#include "btreeaggregator.hpp"
+#include <vespa/vespalib/stllike/asciistream.h>
+
+namespace search {
+namespace btree {
+
+//----------------------- BTreeRoot ------------------------------------------//
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+vespalib::string
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+toString(BTreeNode::Ref node,
+ const NodeAllocatorType &allocator) const
+{
+ if (allocator.isLeafRef(node)) {
+ vespalib::asciistream ss;
+ ss << "{" << allocator.toString(node) << "}";
+ return ss.str();
+ } else {
+ const InternalNodeType * inode = allocator.mapInternalRef(node);
+ vespalib::asciistream ss;
+ ss << "{" << allocator.toString(inode) << ",children(" << inode->validSlots() << ")[";
+ for (size_t i = 0; i < inode->validSlots(); ++i) {
+ if (i > 0) ss << ",";
+ ss << "c[" << i << "]" << toString(inode->getChild(i), allocator);
+ }
+ ss << "]}";
+ return ss.str();
+ }
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+bool
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+isValid(BTreeNode::Ref node,
+ bool ignoreMinSlots, uint32_t level, const NodeAllocatorType &allocator,
+ CompareT comp) const
+{
+ if (allocator.isLeafRef(node)) {
+ if (level != 0) {
+ return false;
+ }
+ const LeafNodeType * lnode = allocator.mapLeafRef(node);
+ if (level != lnode->getLevel()) {
+ return false;
+ }
+ if (lnode->validSlots() > LeafNodeType::maxSlots())
+ return false;
+ if (lnode->validSlots() < LeafNodeType::minSlots() && !ignoreMinSlots)
+ return false;
+ for (size_t i = 1; i < lnode->validSlots(); ++i) {
+ if (!comp(lnode->getKey(i - 1), lnode->getKey(i))) {
+ return false;
+ }
+ }
+ } else {
+ if (level == 0) {
+ return false;
+ }
+ const InternalNodeType * inode = allocator.mapInternalRef(node);
+ if (level != inode->getLevel()) {
+ return false;
+ }
+ if (inode->validSlots() > InternalNodeType::maxSlots())
+ return false;
+ if (inode->validSlots() < InternalNodeType::minSlots() &&
+ !ignoreMinSlots)
+ return false;
+ size_t lChildren = 0;
+ size_t iChildren = 0;
+ uint32_t validLeaves = 0;
+ for (size_t i = 0; i < inode->validSlots(); ++i) {
+ if (i > 0 && !comp(inode->getKey(i - 1), inode->getKey(i))) {
+ return false;
+ }
+ const BTreeNode::Ref childRef = inode->getChild(i);
+ if (!allocator.isValidRef(childRef))
+ return false;
+ validLeaves += allocator.validLeaves(childRef);
+ if (allocator.isLeafRef(childRef))
+ lChildren++;
+ else
+ iChildren++;
+ if (comp(inode->getKey(i), allocator.getLastKey(childRef))) {
+ return false;
+ }
+ if (comp(allocator.getLastKey(childRef), inode->getKey(i))) {
+ return false;
+ }
+ if (!isValid(childRef, false, level - 1, allocator, comp)) {
+ return false;
+ }
+ }
+ if (validLeaves != inode->validLeaves()) {
+ return false;
+ }
+ if (lChildren < inode->validSlots() && iChildren < inode->validSlots()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+typename BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::Iterator
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+findHelper(BTreeNode::Ref root, const KeyType & key,
+ const NodeAllocatorType & allocator, CompareT comp)
+{
+ Iterator itr(BTreeNode::Ref(), allocator);
+ itr.lower_bound(root, key, comp);
+ if (itr.valid() && comp(key, itr.getKey())) {
+ itr.setupEnd();
+ }
+ return itr;
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+typename BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::Iterator
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+lowerBoundHelper(BTreeNode::Ref root, const KeyType & key,
+ const NodeAllocatorType & allocator, CompareT comp)
+{
+ Iterator itr(BTreeNode::Ref(), allocator);
+ itr.lower_bound(root, key, comp);
+ return itr;
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+typename BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::Iterator
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+upperBoundHelper(BTreeNode::Ref root, const KeyType & key,
+ const NodeAllocatorType & allocator, CompareT comp)
+{
+ Iterator itr(root, allocator);
+ if (itr.valid() && !comp(key, itr.getKey())) {
+ itr.seekPast(key, comp);
+ }
+ return itr;
+}
+
+
+//----------------------- BTreeRoot::FrozenView ----------------------------------//
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+FrozenView::FrozenView(BTreeNode::Ref frozenRoot,
+ const NodeAllocatorType & allocator) :
+ _frozenRoot(frozenRoot),
+ _allocator(allocator)
+{
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+typename BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::ConstIterator
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+FrozenView::find(const KeyType & key,
+ CompareT comp) const
+{
+ ConstIterator itr(BTreeNode::Ref(), _allocator);
+ itr.lower_bound(_frozenRoot, key, comp);
+ if (itr.valid() && comp(key, itr.getKey())) {
+ itr.setupEnd();
+ }
+ return itr;
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+typename BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::ConstIterator
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+FrozenView::lowerBound(const KeyType & key,
+ CompareT comp) const
+{
+ ConstIterator itr(BTreeNode::Ref(), _allocator);
+ itr.lower_bound(_frozenRoot, key, comp);
+ return itr;
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+typename BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::ConstIterator
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+FrozenView::upperBound(const KeyType & key,
+ CompareT comp) const
+{
+ ConstIterator itr(_frozenRoot, _allocator);
+ if (itr.valid() && !comp(key, itr.getKey())) {
+ itr.seekPast(key, comp);
+ }
+ return itr;
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+size_t
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+FrozenView::size(void) const
+{
+ if (NodeAllocatorType::isValidRef(_frozenRoot)) {
+ return _allocator.validLeaves(_frozenRoot);
+ }
+ return 0u;
+}
+
+//----------------------- BTreeRoot ----------------------------------------------//
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::BTreeRootT()
+ : ParentType()
+{
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::~BTreeRootT()
+{
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+void
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+clear(NodeAllocatorType &allocator)
+{
+ if (NodeAllocatorType::isValidRef(_root)) {
+ this->recursiveDelete(_root, allocator);
+ _root = BTreeNode::Ref();
+ if (NodeAllocatorType::isValidRef(getFrozenRootRelaxed()))
+ allocator.needFreeze(this);
+ }
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+typename BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::Iterator
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+find(const KeyType & key, const NodeAllocatorType & allocator,
+ CompareT comp) const
+{
+ return findHelper(_root, key, allocator, comp);
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+typename BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::Iterator
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+lowerBound(const KeyType & key, const NodeAllocatorType & allocator,
+ CompareT comp) const
+{
+ return lowerBoundHelper(_root, key, allocator, comp);
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+typename BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::Iterator
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+upperBound(const KeyType & key, const NodeAllocatorType & allocator,
+ CompareT comp) const
+{
+ return upperBoundHelper(_root, key, allocator, comp);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+size_t
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+size(const NodeAllocatorType &allocator) const
+{
+ if (NodeAllocatorType::isValidRef(_root)) {
+ return allocator.validLeaves(_root);
+ }
+ return 0u;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+size_t
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+frozenSize(const NodeAllocatorType &allocator) const
+{
+ BTreeNode::Ref frozenRoot = getFrozenRoot();
+ if (NodeAllocatorType::isValidRef(frozenRoot)) {
+ return allocator.validLeaves(frozenRoot);
+ }
+ return 0u;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+vespalib::string
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+toString(const NodeAllocatorType &allocator) const
+{
+ vespalib::asciistream ss;
+ if (NodeAllocatorType::isValidRef(_root)) {
+ ss << "root(" << toString(_root, allocator) << ")";
+ }
+ return ss.str();
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+bool
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+isValid(const NodeAllocatorType &allocator,
+ CompareT comp) const
+{
+ if (NodeAllocatorType::isValidRef(_root)) {
+ uint32_t level = allocator.getLevel(_root);
+ return isValid(_root, true, level, allocator, comp);
+ }
+ return true;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+bool
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+isValidFrozen(const NodeAllocatorType &allocator,
+ CompareT comp) const
+{
+ BTreeNode::Ref frozenRoot = getFrozenRoot();
+ if (NodeAllocatorType::isValidRef(frozenRoot)) {
+ uint32_t level = allocator.getLevel(frozenRoot);
+ return isValid(frozenRoot, true, level, allocator, comp);
+ }
+ return true;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+size_t
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+bitSize(const NodeAllocatorType &allocator) const
+{
+ size_t ret = sizeof(BTreeRootT) * 8;
+ if (NodeAllocatorType::isValidRef(_root))
+ ret += bitSize(_root, allocator);
+ return ret;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+size_t
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+bitSize(BTreeNode::Ref node,
+ const NodeAllocatorType &allocator) const
+{
+ if (allocator.isLeafRef(node)) {
+ return sizeof(LeafNodeType) * 8;
+ } else {
+ size_t ret = sizeof(InternalNodeType) * 8;
+ const InternalNodeType * inode = allocator.mapInternalRef(node);
+ size_t slots = inode->validSlots();
+ for (size_t i = 0; i < slots; ++i) {
+ ret += bitSize(inode->getChild(i), allocator);
+ }
+ return ret;
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT>
+void
+BTreeRootT<KeyT, DataT, AggrT, CompareT, TraitsT>::
+thaw(Iterator &itr)
+{
+ bool oldFrozen = isFrozen();
+ _root = itr.thaw(_root);
+ if (oldFrozen && !isFrozen())
+ itr.getAllocator().needFreeze(this);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, class AggrCalcT>
+void
+BTreeRoot<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+assign(Builder &rhs,
+ NodeAllocatorType &allocator)
+{
+ this->clear(allocator);
+
+ bool oldFrozen = isFrozen();
+ _root = rhs.handover();
+ if (oldFrozen && !isFrozen())
+ allocator.needFreeze(this);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, class AggrCalcT>
+bool
+BTreeRoot<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+insert(const KeyType & key, const DataType & data,
+ NodeAllocatorType &allocator, CompareT comp,
+ const AggrCalcT &aggrCalc)
+{
+ Iterator itr(BTreeNode::Ref(), allocator);
+ itr.lower_bound(_root, key, comp);
+ if (itr.valid() && !comp(key, itr.getKey()))
+ return false; // Element already exists
+ insert(itr, key, data, aggrCalc);
+ return true;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, class AggrCalcT>
+void
+BTreeRoot<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+insert(Iterator &itr,
+ const KeyType &key, const DataType &data,
+ const AggrCalcT &aggrCalc)
+{
+ typedef BTreeInserter<KeyT, DataT, AggrT, CompareT, TraitsT,
+ AggrCalcT> Inserter;
+ bool oldFrozen = isFrozen();
+ Inserter::insert(_root, itr, key, data,
+ aggrCalc);
+ if (oldFrozen && !isFrozen())
+ itr.getAllocator().needFreeze(this);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, class AggrCalcT>
+bool
+BTreeRoot<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+remove(const KeyType & key,
+ NodeAllocatorType &allocator, CompareT comp,
+ const AggrCalcT &aggrCalc)
+{
+ Iterator itr(BTreeNode::Ref(), allocator);
+ itr.lower_bound(_root, key, comp);
+ if (!itr.valid() || comp(key, itr.getKey()))
+ return false;
+ remove(itr, aggrCalc);
+ return true;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, class AggrCalcT>
+void
+BTreeRoot<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+remove(Iterator &itr,
+ const AggrCalcT &aggrCalc)
+{
+ typedef BTreeRemover<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>
+ Remover;
+ bool oldFrozen = isFrozen();
+ Remover::remove(_root, itr, aggrCalc);
+ if (oldFrozen && !isFrozen())
+ itr.getAllocator().needFreeze(this);
+}
+
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreerootbase.cpp b/searchlib/src/vespa/searchlib/btree/btreerootbase.cpp
new file mode 100644
index 00000000000..a31ea1206c9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreerootbase.cpp
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "btreerootbase.h"
+#include "btreerootbase.hpp"
+
+namespace search
+{
+
+namespace btree
+{
+
+template class BTreeRootBase<uint32_t, uint32_t,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+template class BTreeRootBase<uint32_t, BTreeNoLeafData,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+template class BTreeRootBase<uint32_t, int32_t,
+ MinMaxAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+} // namespace btree
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/btree/btreerootbase.h b/searchlib/src/vespa/searchlib/btree/btreerootbase.h
new file mode 100644
index 00000000000..8d16402a030
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreerootbase.h
@@ -0,0 +1,121 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreetraits.h"
+#include "btreenode.h"
+#include "btreenodeallocator.h"
+#include <atomic>
+
+namespace search {
+namespace btree {
+
+template <typename KeyT,
+ typename DataT,
+ typename AggrT,
+ size_t INTERNAL_SLOTS,
+ size_t LEAF_SLOTS>
+class BTreeRootBase
+{
+protected:
+ typedef KeyT KeyType;
+ typedef DataT DataType;
+ typedef AggrT AggregatedType;
+ typedef BTreeRootBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>
+ BTreeRootBaseType;
+ typedef BTreeInternalNode<KeyT, AggrT, INTERNAL_SLOTS> InternalNodeType;
+ typedef BTreeLeafNode<KeyT, DataT, AggrT, LEAF_SLOTS> LeafNodeType;
+ typedef BTreeNodeAllocator<KeyT, DataT, AggrT,
+ INTERNAL_SLOTS, LEAF_SLOTS> NodeAllocatorType;
+
+ BTreeNode::Ref _root;
+ std::atomic<uint32_t> _frozenRoot;
+
+ static_assert(sizeof(_root) == sizeof(_frozenRoot),
+ "BTree root reference size mismatch");
+
+ BTreeRootBase(void);
+
+ BTreeRootBase(const BTreeRootBase &rhs);
+
+ BTreeRootBase &operator=(const BTreeRootBase &rhs);
+
+ ~BTreeRootBase(void);
+
+public:
+ void
+ freeze(NodeAllocatorType &allocator);
+
+ bool isFrozen() const {
+ return (_root.ref() == _frozenRoot.load(std::memory_order_relaxed));
+ }
+
+ void
+ setRoot(BTreeNode::Ref newRoot, NodeAllocatorType &allocator)
+ {
+ bool oldFrozen = isFrozen();
+ _root = newRoot;
+ if (oldFrozen && !isFrozen())
+ allocator.needFreeze(this);
+ }
+
+ void
+ setRoots(BTreeNode::Ref newRoot)
+ {
+ _root = newRoot;
+ _frozenRoot = newRoot.ref();
+ }
+
+ BTreeNode::Ref
+ getRoot(void) const
+ {
+ return _root;
+ }
+
+ BTreeNode::Ref
+ getFrozenRoot(void) const
+ {
+ return BTreeNode::Ref(_frozenRoot.load(std::memory_order_acquire));
+ }
+
+ BTreeNode::Ref
+ getFrozenRootRelaxed(void) const
+ {
+ return BTreeNode::Ref(_frozenRoot.load(std::memory_order_relaxed));
+ }
+
+ const AggrT &
+ getAggregated(const NodeAllocatorType &allocator) const
+ {
+ return allocator.getAggregated(_root);
+ }
+
+ void
+ recycle(void)
+ {
+ _root = BTreeNode::Ref();
+ _frozenRoot = BTreeNode::Ref().ref();
+ }
+
+protected:
+ void
+ recursiveDelete(BTreeNode::Ref node, NodeAllocatorType &allocator);
+};
+
+extern template class BTreeRootBase<uint32_t, uint32_t,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+extern template class BTreeRootBase<uint32_t, BTreeNoLeafData,
+ NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+extern template class BTreeRootBase<uint32_t, int32_t,
+ MinMaxAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+} // namespace btree
+} // namespace search
+
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreerootbase.hpp b/searchlib/src/vespa/searchlib/btree/btreerootbase.hpp
new file mode 100644
index 00000000000..4641bc6dad7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreerootbase.hpp
@@ -0,0 +1,90 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreerootbase.h"
+
+namespace search {
+namespace btree {
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+BTreeRootBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::BTreeRootBase()
+ : _root(BTreeNode::Ref()),
+ _frozenRoot(BTreeNode::Ref().ref())
+{
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+BTreeRootBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+BTreeRootBase(const BTreeRootBase &rhs)
+ : _root(rhs._root),
+ _frozenRoot(rhs._frozenRoot.load())
+{
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+BTreeRootBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::~BTreeRootBase()
+{
+ assert(!_root.valid());
+#if 0
+ assert(!_frozenRoot.valid());
+#endif
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+BTreeRootBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS> &
+BTreeRootBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+operator=(const BTreeRootBase &rhs)
+{
+ _root = rhs._root;
+ _frozenRoot.store(rhs._frozenRoot.load(), std::memory_order_release);
+ return *this;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+void
+BTreeRootBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+freeze(NodeAllocatorType &allocator)
+{
+ if (NodeAllocatorType::isValidRef(_root)) {
+ if (allocator.isLeafRef(_root))
+ assert(allocator.mapLeafRef(_root)->getFrozen());
+ else
+ assert(allocator.mapInternalRef(_root)->getFrozen());
+ }
+ _frozenRoot.store(_root.ref(), std::memory_order_release);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT,
+ size_t INTERNAL_SLOTS, size_t LEAF_SLOTS>
+void
+BTreeRootBase<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>::
+recursiveDelete(BTreeNode::Ref node,
+ NodeAllocatorType &allocator)
+{
+ assert(allocator.isValidRef(node));
+ if (!allocator.isLeafRef(node)) {
+ InternalNodeType * inode = allocator.mapInternalRef(node);
+ for (size_t i = 0; i < inode->validSlots(); ++i) {
+ recursiveDelete(inode->getChild(i), allocator);
+ }
+ allocator.holdNode(node, inode);
+ } else {
+ allocator.holdNode(node, allocator.mapLeafRef(node));
+ }
+}
+
+} // namespace btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreestore.cpp b/searchlib/src/vespa/searchlib/btree/btreestore.cpp
new file mode 100644
index 00000000000..862d05baf55
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreestore.cpp
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "btreestore.h"
+#include "datastore.h"
+#include "btreenode.h"
+#include "btreerootbase.h"
+#include "btreeroot.h"
+#include "btreenodeallocator.h"
+#include "btreeiterator.hpp"
+#include "btreestore.hpp"
+
+namespace search
+{
+
+namespace btree
+{
+
+template class BTreeStore<uint32_t, uint32_t,
+ NoAggregated,
+ std::less<uint32_t>,
+ BTreeDefaultTraits>;
+
+template class BTreeStore<uint32_t, BTreeNoLeafData,
+ NoAggregated,
+ std::less<uint32_t>,
+ BTreeDefaultTraits>;
+
+template class BTreeStore<uint32_t, int32_t,
+ MinMaxAggregated,
+ std::less<uint32_t>,
+ BTreeDefaultTraits,
+ MinMaxAggrCalc>;
+
+} // namespace btree
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/btree/btreestore.h b/searchlib/src/vespa/searchlib/btree/btreestore.h
new file mode 100644
index 00000000000..143a491a725
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreestore.h
@@ -0,0 +1,511 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "datastore.h"
+#include "btreenode.h"
+#include "btreebuilder.h"
+#include "btreeroot.h"
+#include "noaggrcalc.h"
+#include "minmaxaggrcalc.h"
+
+namespace search
+{
+
+namespace btree
+{
+
+template <typename KeyT,
+ typename DataT,
+ typename AggrT,
+ typename CompareT,
+ typename TraitsT,
+ typename AggrCalcT = NoAggrCalc>
+class BTreeStore
+{
+public:
+ typedef KeyT KeyType;
+ typedef DataT DataType;
+ typedef AggrT AggregatedType;
+ typedef DataStoreT<EntryRefT<22> > DataStoreType;
+ typedef DataStoreType::RefType RefType;
+ typedef BTreeKeyData<KeyT, DataT> KeyDataType;
+
+ typedef BTreeRoot<KeyT, DataT, AggrT, CompareT, TraitsT,
+ AggrCalcT> BTreeType;
+ typedef BTreeInternalNode<KeyT, AggrT,
+ TraitsT::INTERNAL_SLOTS> InternalNodeType;
+ typedef BTreeLeafNode<KeyT, DataT, AggrT, TraitsT::LEAF_SLOTS>
+ LeafNodeType;
+ typedef std::pair<EntryRef, BTreeType *> BTreeTypeRefPair;
+ typedef std::pair<EntryRef, KeyDataType *> KeyDataTypeRefPair;
+ typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair;
+ typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair;
+ typedef vespalib::GenerationHandler::generation_t generation_t;
+ typedef BTreeNodeAllocator<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS> NodeAllocatorType;
+ typedef typename BTreeType::Iterator Iterator;
+ typedef typename BTreeType::ConstIterator ConstIterator;
+ typedef const KeyDataType * AddIter;
+ typedef const KeyType * RemoveIter;
+ typedef BTreeBuilder<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS,
+ TraitsT::LEAF_SLOTS,
+ AggrCalcT> Builder;
+
+ static constexpr uint32_t clusterLimit = 8;
+
+ enum BufferTypes
+ {
+ BUFFERTYPE_ARRAY1 = 0,
+ BUFFERTYPE_ARRAY2 = 1,
+ BUFFERTYPE_ARRAY3 = 2,
+ BUFFERTYPE_ARRAY4 = 3,
+ BUFFERTYPE_ARRAY5 = 4,
+ BUFFERTYPE_ARRAY6 = 5,
+ BUFFERTYPE_ARRAY7 = 6,
+ BUFFERTYPE_ARRAY8 = 7,
+ BUFFERTYPE_BTREE = 8
+ };
+protected:
+ struct TreeReclaimer {
+ static void reclaim(BTreeType * tree) {
+ tree->recycle();
+ }
+ };
+
+ DataStoreType _store;
+ BufferType<BTreeType> _treeType;
+ BufferType<KeyDataType> _small1Type;
+ BufferType<KeyDataType> _small2Type;
+ BufferType<KeyDataType> _small3Type;
+ BufferType<KeyDataType> _small4Type;
+ BufferType<KeyDataType> _small5Type;
+ BufferType<KeyDataType> _small6Type;
+ BufferType<KeyDataType> _small7Type;
+ BufferType<KeyDataType> _small8Type;
+ NodeAllocatorType _allocator;
+ AggrCalcT _aggrCalc;
+ Builder _builder;
+
+ BTreeType * getWTreeEntry(RefType ref) {
+ return _store.getBufferEntry<BTreeType>(ref.bufferId(), ref.offset());
+ }
+
+public:
+ BTreeStore();
+
+ BTreeStore(bool init);
+
+ ~BTreeStore(void);
+
+ const NodeAllocatorType &getAllocator() const { return _allocator; }
+
+ void
+ disableFreeLists() {
+ _store.disableFreeLists();
+ _allocator.disableFreeLists();
+ }
+
+ void
+ disableElemHoldList()
+ {
+ _store.disableElemHoldList();
+ _allocator.disableElemHoldList();
+ }
+
+ BTreeTypeRefPair
+ allocNewBTree(void) {
+ return _store.allocNewEntry<BTreeType>(BUFFERTYPE_BTREE);
+ }
+
+ BTreeTypeRefPair
+ allocBTree(void) {
+ return _store.allocEntry<BTreeType, TreeReclaimer>(BUFFERTYPE_BTREE);
+ }
+
+ BTreeTypeRefPair
+ allocNewBTreeCopy(const BTreeType &rhs) {
+ return _store.allocNewEntryCopy<BTreeType>(BUFFERTYPE_BTREE, rhs);
+ }
+
+ BTreeTypeRefPair
+ allocBTreeCopy(const BTreeType &rhs) {
+ return _store.allocEntryCopy<BTreeType, DefaultReclaimer<BTreeType> >(
+ BUFFERTYPE_BTREE, rhs);
+ }
+
+ KeyDataTypeRefPair
+ allocNewKeyData(uint32_t clusterSize);
+
+ KeyDataTypeRefPair
+ allocKeyData(uint32_t clusterSize);
+
+ KeyDataTypeRefPair
+ allocNewKeyDataCopy(const KeyDataType *rhs, uint32_t clusterSize);
+
+ KeyDataTypeRefPair
+ allocKeyDataCopy(const KeyDataType *rhs, uint32_t clusterSize);
+
+ std::vector<uint32_t>
+ startCompact(void);
+
+ void
+ finishCompact(const std::vector<uint32_t> &toHold);
+
+
+ const KeyDataType *
+ lower_bound(const KeyDataType *b, const KeyDataType *e,
+ const KeyType &key, CompareT comp);
+
+ void
+ makeTree(EntryRef &ref,
+ const KeyDataType *array, uint32_t clusterSize);
+
+ void
+ makeArray(EntryRef &ref, EntryRef leafRef, LeafNodeType *leafNode);
+
+ bool
+ insert(EntryRef &ref,
+ const KeyType &key, const DataType &data,
+ CompareT comp = CompareT());
+
+ bool
+ remove(EntryRef &ref,
+ const KeyType &key,
+ CompareT comp = CompareT());
+
+ uint32_t
+ getNewClusterSize(const KeyDataType *o,
+ const KeyDataType *oe,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re,
+ CompareT comp);
+
+ void
+ applyCluster(const KeyDataType *o,
+ const KeyDataType *oe,
+ KeyDataType *d,
+ const KeyDataType *de,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re,
+ CompareT comp);
+
+
+ void
+ applyModifyTree(BTreeType *tree,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re,
+ CompareT comp);
+
+ void
+ applyBuildTree(BTreeType *tree,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re,
+ CompareT comp);
+
+ void
+ applyNewArray(EntryRef &ref,
+ AddIter aOrg,
+ AddIter ae);
+
+ void
+ applyNewTree(EntryRef &ref,
+ AddIter a,
+ AddIter ae,
+ CompareT comp);
+
+ void
+ applyNew(EntryRef &ref,
+ AddIter a,
+ AddIter ae,
+ CompareT comp);
+
+
+ bool
+ applyCluster(EntryRef &ref,
+ uint32_t clusterSize,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re,
+ CompareT comp);
+
+ void
+ applyTree(BTreeType *tree,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re,
+ CompareT comp);
+
+ void
+ normalizeTree(EntryRef &ref,
+ BTreeType *tree,
+ bool wasArray);
+ /**
+ * Apply multiple changes at once.
+ *
+ * additions and removals should be sorted on key without duplicates.
+ * Overlap between additions and removals indicates updates.
+ */
+ void
+ apply(EntryRef &ref,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re,
+ CompareT comp = CompareT());
+
+ void
+ clear(const EntryRef ref);
+
+ size_t
+ size(const EntryRef ref) const;
+
+ size_t
+ frozenSize(const EntryRef ref) const;
+
+ Iterator
+ begin(const EntryRef ref) const;
+
+ ConstIterator
+ beginFrozen(const EntryRef ref) const;
+
+ void
+ beginFrozen(const EntryRef ref, std::vector<ConstIterator> &where) const;
+
+ uint32_t
+ getTypeId(RefType ref) const
+ {
+ return _store.getBufferState(ref.bufferId()).getTypeId();
+ }
+
+ static bool
+ isSmallArray(uint32_t typeId)
+ {
+ return typeId < clusterLimit;
+ }
+
+ bool
+ isSmallArray(const EntryRef ref) const;
+
+ /**
+ * Returns the cluster size for the type id.
+ * Cluster size == 0 means we have a tree for the given reference.
+ * The reference must be valid.
+ **/
+ static uint32_t
+ getClusterSize(uint32_t typeId)
+ {
+ return (typeId < clusterLimit) ? typeId + 1 : 0;
+ }
+
+ /**
+ * Returns the cluster size for the entry pointed to by the given reference.
+ * Cluster size == 0 means we have a tree for the given reference.
+ * The reference must be valid.
+ **/
+ uint32_t
+ getClusterSize(RefType ref) const
+ {
+ return getClusterSize(getTypeId(ref));
+ }
+
+ const BTreeType * getTreeEntry(RefType ref) const {
+ return _store.getBufferEntry<BTreeType>(ref.bufferId(), ref.offset());
+ }
+
+ const KeyDataType * getKeyDataEntry(RefType ref, uint32_t clusterSize) const {
+ return _store.getBufferEntry<KeyDataType>(ref.bufferId(), ref.offset() * clusterSize);
+ }
+
+ void freeze() {
+ _allocator.freeze();
+ }
+
+ // Inherit doc from DataStoreBase
+ void
+ trimHoldLists(generation_t usedGen)
+ {
+ _allocator.trimHoldLists(usedGen);
+ _store.trimHoldLists(usedGen);
+ }
+
+ // Inherit doc from DataStoreBase
+ void
+ transferHoldLists(generation_t generation)
+ {
+ _allocator.transferHoldLists(generation);
+ _store.transferHoldLists(generation);
+ }
+
+ void
+ clearHoldLists(void)
+ {
+ _allocator.clearHoldLists();
+ _store.clearHoldLists();
+ }
+
+
+ // Inherit doc from DataStoreBase
+ MemoryUsage getMemoryUsage() const {
+ MemoryUsage usage;
+ usage.merge(_allocator.getMemoryUsage());
+ usage.merge(_store.getMemoryUsage());
+ return usage;
+ }
+
+ void
+ clearBuilder(void)
+ {
+ _builder.clear();
+ }
+
+ AggregatedType
+ getAggregated(const EntryRef ref) const;
+
+ template <typename FunctionType>
+ void
+ foreach_unfrozen_key(EntryRef ref, FunctionType func) const;
+
+ template <typename FunctionType>
+ void
+ foreach_frozen_key(EntryRef ref, FunctionType func) const;
+
+ template <typename FunctionType>
+ void
+ foreach_unfrozen(EntryRef ref, FunctionType func) const;
+
+ template <typename FunctionType>
+ void
+ foreach_frozen(EntryRef ref, FunctionType func) const;
+
+private:
+ static constexpr size_t MIN_CLUSTERS = 128u;
+ template <typename FunctionType, bool Frozen>
+ void
+ foreach_key(EntryRef ref, FunctionType func) const;
+
+ template <typename FunctionType, bool Frozen>
+ void
+ foreach(EntryRef ref, FunctionType func) const;
+};
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+template <typename FunctionType>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+foreach_unfrozen_key(EntryRef ref, FunctionType func) const {
+ foreach_key<FunctionType, false>(ref, func);
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+template <typename FunctionType>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+foreach_frozen_key(EntryRef ref, FunctionType func) const
+{
+ foreach_key<FunctionType, true>(ref, func);
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+template <typename FunctionType>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+foreach_unfrozen(EntryRef ref, FunctionType func) const
+{
+ foreach<FunctionType, false>(ref, func);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+template <typename FunctionType>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+foreach_frozen(EntryRef ref, FunctionType func) const
+{
+ foreach<FunctionType, true>(ref, func);
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+template <typename FunctionType, bool Frozen>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+foreach_key(EntryRef ref, FunctionType func) const
+{
+ if (!ref.valid())
+ return;
+ RefType iRef(ref);
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize == 0) {
+ const BTreeType *tree = getTreeEntry(iRef);
+ _allocator.getNodeStore().foreach_key(Frozen ? tree->getFrozenRoot() : tree->getRoot(), func);
+ } else {
+ const KeyDataType *p = getKeyDataEntry(iRef, clusterSize);
+ const KeyDataType *pe = p + clusterSize;
+ for (; p != pe; ++p) {
+ func(p->_key);
+ }
+ }
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+template <typename FunctionType, bool Frozen>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+foreach(EntryRef ref, FunctionType func) const
+{
+ if (!ref.valid())
+ return;
+ RefType iRef(ref);
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize == 0) {
+ const BTreeType *tree = getTreeEntry(iRef);
+ _allocator.getNodeStore().foreach(Frozen ? tree->getFrozenRoot() : tree->getRoot(), func);
+ } else {
+ const KeyDataType *p = getKeyDataEntry(iRef, clusterSize);
+ const KeyDataType *pe = p + clusterSize;
+ for (; p != pe; ++p) {
+ func(p->_key, p->getData());
+ }
+ }
+}
+
+
+extern template class BTreeStore<uint32_t, uint32_t,
+ NoAggregated,
+ std::less<uint32_t>,
+ BTreeDefaultTraits>;
+
+extern template class BTreeStore<uint32_t, BTreeNoLeafData,
+ NoAggregated,
+ std::less<uint32_t>,
+ BTreeDefaultTraits>;
+
+extern template class BTreeStore<uint32_t, int32_t,
+ MinMaxAggregated,
+ std::less<uint32_t>,
+ BTreeDefaultTraits,
+ MinMaxAggrCalc>;
+
+} // namespace btree
+
+} // namespace search
+
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreestore.hpp b/searchlib/src/vespa/searchlib/btree/btreestore.hpp
new file mode 100644
index 00000000000..f6223d93731
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreestore.hpp
@@ -0,0 +1,1005 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "btreestore.h"
+#include <vespa/searchlib/bitcompression/compression.h>
+#include "btreebuilder.h"
+#include "btreebuilder.hpp"
+#include "datastore.hpp"
+
+namespace search
+{
+
+namespace btree
+{
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+BTreeStore()
+ : BTreeStore(true)
+{
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+BTreeStore(bool init)
+ : _store(),
+ _treeType(1, MIN_CLUSTERS, RefType::offsetSize()),
+ _small1Type(1, MIN_CLUSTERS, RefType::offsetSize()),
+ _small2Type(2, MIN_CLUSTERS, RefType::offsetSize()),
+ _small3Type(3, MIN_CLUSTERS, RefType::offsetSize()),
+ _small4Type(4, MIN_CLUSTERS, RefType::offsetSize()),
+ _small5Type(5, MIN_CLUSTERS, RefType::offsetSize()),
+ _small6Type(6, MIN_CLUSTERS, RefType::offsetSize()),
+ _small7Type(7, MIN_CLUSTERS, RefType::offsetSize()),
+ _small8Type(8, MIN_CLUSTERS, RefType::offsetSize()),
+ _allocator(),
+ _aggrCalc(),
+ _builder(_allocator, _aggrCalc)
+{
+ // XXX: order here makes typeId + 1 == clusterSize for small arrays,
+ // code elsewhere depends on it.
+ _store.addType(&_small1Type);
+ _store.addType(&_small2Type);
+ _store.addType(&_small3Type);
+ _store.addType(&_small4Type);
+ _store.addType(&_small5Type);
+ _store.addType(&_small6Type);
+ _store.addType(&_small7Type);
+ _store.addType(&_small8Type);
+ _store.addType(&_treeType);
+ if (init) {
+ _store.initActiveBuffers();
+ _store.enableFreeLists();
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+BTreeStore<KeyT, DataT, AggrT, CompareT,TraitsT, AggrCalcT>::~BTreeStore(void)
+{
+ _builder.clear();
+ _store.dropBuffers(); // Drop buffers before type handlers are dropped
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+typename BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+KeyDataTypeRefPair
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+allocNewKeyData(uint32_t clusterSize)
+{
+ assert(clusterSize >= 1 && clusterSize <= clusterLimit);
+ uint32_t typeId = clusterSize - 1;
+ _store.ensureBufferCapacity(typeId, clusterSize);
+ uint32_t activeBufferId = _store.getActiveBufferId(typeId);
+ BufferState &state = _store.getBufferState(activeBufferId);
+ assert(state._state == BufferState::ACTIVE);
+ size_t oldSize = state.size();
+ KeyDataType *node =
+ _store.getBufferEntry<KeyDataType>(activeBufferId, oldSize);
+ for (uint32_t i = 0; i < clusterSize; ++i)
+ new (static_cast<void *>(node + i)) KeyDataType();
+ state.pushed_back(clusterSize);
+ return std::make_pair(RefType(oldSize / clusterSize, activeBufferId),
+ node);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+typename BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+KeyDataTypeRefPair
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+allocKeyData(uint32_t clusterSize)
+{
+ assert(clusterSize >= 1 && clusterSize <= clusterLimit);
+ uint32_t typeId = clusterSize - 1;
+ BufferState::FreeListList &freeListList = _store.getFreeList(typeId);
+ if (freeListList._head == NULL)
+ return allocNewKeyData(clusterSize);
+ BufferState &state = *freeListList._head;
+ assert(state._state == BufferState::ACTIVE);
+ RefType ref(state.popFreeList());
+ KeyDataType *node =
+ _store.getBufferEntry<KeyDataType>(ref.bufferId(),
+ ref.offset() * clusterSize);
+ return std::make_pair(ref, node);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+typename BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+KeyDataTypeRefPair
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+allocNewKeyDataCopy(const KeyDataType *rhs, uint32_t clusterSize)
+{
+ assert(clusterSize >= 1 && clusterSize <= clusterLimit);
+ uint32_t typeId = clusterSize - 1;
+ _store.ensureBufferCapacity(typeId, clusterSize);
+ uint32_t activeBufferId = _store.getActiveBufferId(typeId);
+ BufferState &state = _store.getBufferState(activeBufferId);
+ assert(state._state == BufferState::ACTIVE);
+ size_t oldSize = state.size();
+ KeyDataType *node =
+ _store.getBufferEntry<KeyDataType>(activeBufferId, oldSize);
+ for (uint32_t i = 0; i < clusterSize; ++i)
+ new (static_cast<void *>(node + i)) KeyDataType(*(rhs + i));
+ state.pushed_back(clusterSize);
+ return std::make_pair(RefType(oldSize / clusterSize, activeBufferId),
+ node);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+typename BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+KeyDataTypeRefPair
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+allocKeyDataCopy(const KeyDataType *rhs, uint32_t clusterSize)
+{
+ assert(clusterSize >= 1 && clusterSize <= clusterLimit);
+ uint32_t typeId = clusterSize - 1;
+ BufferState::FreeListList &freeListList = _store.getFreeList(typeId);
+ if (freeListList._head == NULL)
+ return allocNewKeyDataCopy(rhs, clusterSize);
+ BufferState &state = *freeListList._head;
+ assert(state._state == BufferState::ACTIVE);
+ RefType ref(state.popFreeList());
+ KeyDataType *node =
+ _store.getBufferEntry<KeyDataType>(ref.bufferId(),
+ ref.offset() * clusterSize);
+ for (uint32_t i = 0; i < clusterSize; ++i)
+ *(node + i) = *(rhs + i);
+ return std::make_pair(ref, node);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+std::vector<uint32_t>
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::startCompact(void)
+{
+ std::vector<uint32_t> ret = _store.startCompact(clusterLimit);
+ for (uint32_t clusterSize = 1; clusterSize <= clusterLimit; ++clusterSize) {
+ uint32_t typeId = clusterSize - 1;
+ std::vector<uint32_t> toHold = _store.startCompact(typeId);
+ for (auto i : toHold) {
+ ret.push_back(i);
+ }
+ }
+ return ret;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+finishCompact(const std::vector<uint32_t> &toHold)
+{
+ _store.finishCompact(toHold);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+const typename BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+KeyDataType *
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+lower_bound(const KeyDataType *b, const KeyDataType *e,
+ const KeyType &key, CompareT comp)
+{
+ const KeyDataType *i = b;
+ for (; i != e; ++i) {
+ if (!comp(i->_key, key))
+ break;
+ }
+ return i;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+makeTree(EntryRef &ref,
+ const KeyDataType *array, uint32_t clusterSize)
+{
+ LeafNodeTypeRefPair lPair(_allocator.allocLeafNode());
+ LeafNodeType *lNode = lPair.second;
+ lNode->setValidSlots(clusterSize);
+ const KeyDataType *o = array;
+ for (uint32_t idx = 0; idx < clusterSize; ++idx, ++o) {
+ lNode->update(idx, o->_key, o->getData());
+ }
+ typedef BTreeAggregator<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS, TraitsT::LEAF_SLOTS, AggrCalcT> Aggregator;
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*lNode, _aggrCalc);
+ }
+ lNode->freeze();
+ BTreeTypeRefPair tPair(allocBTree());
+ tPair.second->setRoots(lPair.first);
+ _store.holdElem(ref, clusterSize);
+ ref = tPair.first;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+makeArray(EntryRef &ref, EntryRef root, LeafNodeType *leafNode)
+{
+ uint32_t clusterSize = leafNode->validSlots();
+ KeyDataTypeRefPair kPair(allocKeyData(clusterSize));
+ KeyDataType *kd = kPair.second;
+ // Copy whole leaf node
+ for (uint32_t idx = 0; idx < clusterSize; ++idx, ++kd) {
+ kd->_key = leafNode->getKey(idx);
+ kd->setData(leafNode->getData(idx));
+ }
+ assert(kd == kPair.second + clusterSize);
+ _store.holdElem(ref, 1);
+ if (!leafNode->getFrozen()) {
+ leafNode->freeze();
+ }
+ _allocator.holdNode(root, leafNode);
+ ref = kPair.first;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+bool
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+insert(EntryRef &ref,
+ const KeyType &key, const DataType &data,
+ CompareT comp)
+{
+#ifdef FORCE_APPLY
+ bool retVal = true;
+ if (ref.valid()) {
+ RefType iRef(ref);
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize == 0) {
+ const BTreeType *tree = getTreeEntry(iRef);
+ Iterator itr = tree->find(key, _allocator, comp);
+ if (itr.valid())
+ retVal = false;
+ } else {
+ const KeyDataType *old = getKeyDataEntry(iRef, clusterSize);
+ const KeyDataType *olde = old + clusterSize;
+ const KeyDataType *oldi = lower_bound(old, olde, key, comp);
+ if (oldi < olde && !comp(key, oldi->_key))
+ retVal = false; // key already present
+ }
+ }
+ KeyDataType addition(key, data);
+ if (retVal) {
+ apply(ref, &addition, &addition+1, NULL, NULL, comp);
+ }
+ return retVal;
+#else
+ if (!ref.valid()) {
+ KeyDataTypeRefPair kPair(allocKeyData(1));
+ KeyDataType *kd = kPair.second;
+ kd->_key = key;
+ kd->setData(data);
+ ref = kPair.first;
+ return true;
+ }
+ RefType iRef(ref);
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize == 0) {
+ BTreeType *tree = getWTreeEntry(iRef);
+ return tree->insert(key, data, _allocator, comp, _aggrCalc);
+ }
+ const KeyDataType *old = getKeyDataEntry(iRef, clusterSize);
+ const KeyDataType *olde = old + clusterSize;
+ const KeyDataType *oldi = lower_bound(old, olde, key, comp);
+ if (oldi < olde && !comp(key, oldi->_key))
+ return false; // key already present
+ if (clusterSize < clusterLimit) {
+ // Grow array
+ KeyDataTypeRefPair kPair(allocKeyData(clusterSize + 1));
+ KeyDataType *kd = kPair.second;
+ // Copy data before key
+ for (const KeyDataType *i = old; i != oldi; ++i, ++kd) {
+ kd->_key = i->_key;
+ kd->setData(i->getData());
+ }
+ // Copy key
+ kd->_key = key;
+ kd->setData(data);
+ ++kd;
+ // Copy data after key
+ for (const KeyDataType *i = oldi; i != olde; ++i, ++kd) {
+ kd->_key = i->_key;
+ kd->setData(i->getData());
+ }
+ assert(kd == kPair.second + clusterSize + 1);
+ _store.holdElem(ref, clusterSize);
+ ref = kPair.first;
+ return true;
+ }
+ // Convert from short array to tree
+ LeafNodeTypeRefPair lPair(_allocator.allocLeafNode());
+ LeafNodeType *lNode = lPair.second;
+ uint32_t idx = 0;
+ lNode->setValidSlots(clusterSize + 1);
+ // Copy data before key
+ for (const KeyDataType *i = old; i != oldi; ++i, ++idx) {
+ lNode->update(idx, i->_key, i->getData());
+ }
+ // Copy key
+ lNode->update(idx, key, data);
+ ++idx;
+ // Copy data after key
+ for (const KeyDataType *i = oldi; i != olde; ++i, ++idx) {
+ lNode->update(idx, i->_key, i->getData());
+ }
+ assert(idx == clusterSize + 1);
+ typedef BTreeAggregator<KeyT, DataT, AggrT,
+ TraitsT::INTERNAL_SLOTS, TraitsT::LEAF_SLOTS, AggrCalcT> Aggregator;
+ if (AggrCalcT::hasAggregated()) {
+ Aggregator::recalc(*lNode, _aggrCalc);
+ }
+ lNode->freeze();
+ BTreeTypeRefPair tPair(allocBTree());
+ tPair.second->setRoots(lPair.first); // allow immediate access to readers
+ _store.holdElem(ref, clusterSize);
+ ref = tPair.first;
+ return true;
+#endif
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+bool
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+remove(EntryRef &ref,
+ const KeyType &key,
+ CompareT comp)
+{
+#ifdef FORCE_APPLY
+ bool retVal = true;
+ if (!ref.valid())
+ retVal = false; // not found
+ else {
+ RefType iRef(ref);
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize == 0) {
+ const BTreeType *tree = getTreeEntry(iRef);
+ Iterator itr = tree->find(key, _allocator, comp);
+ if (!itr.valid())
+ retVal = false;
+ } else {
+ const KeyDataType *old = getKeyDataEntry(iRef, clusterSize);
+ const KeyDataType *olde = old + clusterSize;
+ const KeyDataType *oldi = lower_bound(old, olde, key, comp);
+ if (oldi == olde || comp(key, oldi->_key))
+ retVal = false; // not found
+ }
+ }
+ std::vector<KeyDataType> additions;
+ std::vector<KeyType> removals;
+ removals.push_back(key);
+ apply(ref,
+ &additions[0], &additions[additions.size()],
+ &removals[0], &removals[removals.size()],
+ comp);
+ return retVal;
+#else
+ if (!ref.valid())
+ return false; // not found
+ RefType iRef(ref);
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize != 0) {
+ const KeyDataType *old = getKeyDataEntry(iRef, clusterSize);
+ const KeyDataType *olde = old + clusterSize;
+ const KeyDataType *oldi = lower_bound(old, olde, key, comp);
+ if (oldi == olde || comp(key, oldi->_key))
+ return false; // not found
+ if (clusterSize == 1) {
+ _store.holdElem(ref, 1);
+ ref = EntryRef();
+ return true;
+ }
+ // Copy to smaller array
+ KeyDataTypeRefPair kPair(allocKeyData(clusterSize - 1));
+ KeyDataType *kd = kPair.second;
+ // Copy data before key
+ for (const KeyDataType *i = old; i != oldi; ++i, ++kd) {
+ kd->_key = i->_key;
+ kd->setData(i->getData());
+ }
+ // Copy data after key
+ for (const KeyDataType *i = oldi + 1; i != olde; ++i, ++kd) {
+ kd->_key = i->_key;
+ kd->setData(i->getData());
+ }
+ assert(kd == kPair.second + clusterSize - 1);
+ _store.holdElem(ref, clusterSize);
+ ref = kPair.first;
+ return true;
+ }
+ BTreeType *tree = getWTreeEntry(iRef);
+ if (!tree->remove(key, _allocator, comp, _aggrCalc))
+ return false; // not found
+ EntryRef root = tree->getRoot();
+ assert(NodeAllocatorType::isValidRef(root));
+ if (!_allocator.isLeafRef(root))
+ return true;
+ LeafNodeType *lNode = _allocator.mapLeafRef(root);
+ clusterSize = lNode->validSlots();
+ assert(clusterSize > 0);
+ if (clusterSize > clusterLimit)
+ return true;
+ // Convert from tree to short array
+ makeArray(ref, root, lNode);
+ return true;
+#endif
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+uint32_t
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+getNewClusterSize(const KeyDataType *o,
+ const KeyDataType *oe,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re,
+ CompareT comp)
+{
+ uint32_t d = 0u;
+ if (o == oe && a == ae)
+ return 0u;
+ while (a != ae || r != re) {
+ if (r != re && (a == ae || comp(*r, a->_key))) {
+ // remove
+ while (o != oe && comp(o->_key, *r)) {
+ ++d;
+ ++o;
+ }
+ if (o != oe && !comp(*r, o->_key))
+ ++o;
+ ++r;
+ } else {
+ // add or update
+ while (o != oe && comp(o->_key, a->_key)) {
+ ++d;
+ ++o;
+ }
+ if (o != oe && !comp(a->_key, o->_key))
+ ++o;
+ ++d;
+ if (r != re && !comp(a->_key, *r))
+ ++r;
+ ++a;
+ }
+ }
+ while (o != oe) {
+ ++d;
+ ++o;
+ }
+ return d;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+applyCluster(const KeyDataType *o,
+ const KeyDataType *oe,
+ KeyDataType *d,
+ const KeyDataType *de,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re,
+ CompareT comp)
+{
+ while (a != ae || r != re) {
+ if (r != re && (a == ae || comp(*r, a->_key))) {
+ // remove
+ while (o != oe && comp(o->_key, *r)) {
+ d->_key = o->_key;
+ d->setData(o->getData());
+ ++d;
+ ++o;
+ }
+ if (o != oe && !comp(*r, o->_key))
+ ++o;
+ ++r;
+ } else {
+ // add or update
+ while (o != oe && comp(o->_key, a->_key)) {
+ d->_key = o->_key;
+ d->setData(o->getData());
+ ++d;
+ ++o;
+ }
+ if (o != oe && !comp(a->_key, o->_key))
+ ++o;
+ d->_key = a->_key;
+ d->setData(a->getData());
+ ++d;
+ if (r != re && !comp(a->_key, *r))
+ ++r;
+ ++a;
+ }
+ }
+ while (o != oe) {
+ d->_key = o->_key;
+ d->setData(o->getData());
+ ++d;
+ ++o;
+ }
+ assert(d == de);
+ (void) de;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+applyModifyTree(BTreeType *tree,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re,
+ CompareT comp)
+{
+ if (a == ae && r == re)
+ return;
+ Iterator itr(BTreeNode::Ref(), _allocator);
+ itr.lower_bound(tree->getRoot(),
+ (a != ae && r != re) ? (comp(a->_key, *r) ? a->_key : *r) :
+ ((a != ae) ? a->_key : *r),
+ comp);
+ while (a != ae || r != re) {
+ if (r != re && (a == ae || comp(*r, a->_key))) {
+ // remove
+ if (itr.valid() && comp(itr.getKey(), *r)) {
+ itr.binarySeek(*r, comp);
+ }
+ if (itr.valid() && !comp(*r, itr.getKey())) {
+ tree->remove(itr, _aggrCalc);
+ }
+ ++r;
+ } else {
+ // update or add
+ if (itr.valid() && comp(itr.getKey(), a->_key)) {
+ itr.binarySeek(a->_key, comp);
+ }
+ if (itr.valid() && !comp(a->_key, itr.getKey())) {
+ tree->thaw(itr);
+ itr.updateData(a->getData(), _aggrCalc);
+ } else {
+ tree->insert(itr, a->_key, a->getData(), _aggrCalc);
+ }
+ if (r != re && !comp(a->_key, *r)) {
+ ++r;
+ }
+ ++a;
+ }
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+applyBuildTree(BTreeType *tree,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re,
+ CompareT comp)
+{
+ Iterator itr = tree->begin(_allocator);
+ Builder &builder = _builder;
+ builder.reuse();
+ while (a != ae || r != re) {
+ if (r != re && (a == ae || comp(*r, a->_key))) {
+ // remove
+ while (itr.valid() && comp(itr.getKey(), *r)) {
+ builder.insert(itr.getKey(), itr.getData());
+ ++itr;
+ }
+ if (itr.valid() && !comp(*r, itr.getKey()))
+ ++itr;
+ ++r;
+ } else {
+ // add or update
+ while (itr.valid() && comp(itr.getKey(), a->_key)) {
+ builder.insert(itr.getKey(), itr.getData());
+ ++itr;
+ }
+ if (itr.valid() && !comp(a->_key, itr.getKey()))
+ ++itr;
+ builder.insert(a->_key, a->getData());
+ if (r != re && !comp(a->_key, *r))
+ ++r;
+ ++a;
+ }
+ }
+ while (itr.valid()) {
+ builder.insert(itr.getKey(), itr.getData());
+ ++itr;
+ }
+ tree->assign(builder, _allocator);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+applyNewArray(EntryRef &ref,
+ AddIter aOrg,
+ AddIter ae)
+{
+ assert(!ref.valid());
+ if (aOrg == ae) {
+ // No new data
+ return;
+ }
+ size_t additionSize(ae - aOrg);
+ uint32_t clusterSize = additionSize;
+ assert(clusterSize <= clusterLimit);
+ KeyDataTypeRefPair kPair(allocKeyData(clusterSize));
+ KeyDataType *kd = kPair.second;
+ AddIter a = aOrg;
+ for (;a != ae; ++a, ++kd) {
+ kd->_key = a->_key;
+ kd->setData(a->getData());
+ }
+ assert(kd == kPair.second + clusterSize);
+ assert(a == ae);
+ ref = kPair.first;
+ }
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+applyNewTree(EntryRef &ref,
+ AddIter a,
+ AddIter ae,
+ CompareT comp)
+{
+ assert(!ref.valid());
+ size_t additionSize(ae - a);
+ BTreeTypeRefPair tPair(allocBTree());
+ BTreeType *tree = tPair.second;
+ applyBuildTree(tree, a, ae, nullptr, nullptr, comp);
+ assert(tree->size(_allocator) == additionSize);
+ ref = tPair.first;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+applyNew(EntryRef &ref,
+ AddIter a,
+ AddIter ae,
+ CompareT comp)
+{
+ // No old data
+ assert(!ref.valid());
+ size_t additionSize(ae - a);
+ uint32_t clusterSize = additionSize;
+ if (clusterSize <= clusterLimit) {
+ applyNewArray(ref, a, ae);
+ } else {
+ applyNewTree(ref, a, ae, comp);
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+bool
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+applyCluster(EntryRef &ref,
+ uint32_t clusterSize,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re,
+ CompareT comp)
+{
+ size_t additionSize(ae - a);
+ size_t removeSize(re - r);
+ uint32_t newSizeMin =
+ std::max(clusterSize,
+ static_cast<uint32_t>(additionSize)) -
+ std::min(clusterSize, static_cast<uint32_t>(removeSize));
+ RefType iRef(ref);
+ const KeyDataType *ob = getKeyDataEntry(iRef, clusterSize);
+ const KeyDataType *oe = ob + clusterSize;
+ if (newSizeMin <= clusterLimit) {
+ uint32_t newSize = getNewClusterSize(ob, oe, a, ae, r, re, comp);
+ if (newSize == 0) {
+ _store.holdElem(ref, clusterSize);
+ ref = EntryRef();
+ return true;
+ }
+ if (newSize <= clusterLimit) {
+ KeyDataTypeRefPair kPair(allocKeyData(newSize));
+ applyCluster(ob, oe, kPair.second, kPair.second + newSize,
+ a, ae, r, re, comp);
+ _store.holdElem(ref, clusterSize);
+ ref = kPair.first;
+ return true;
+ }
+ }
+ // Convert from short array to tree
+ makeTree(ref, ob, clusterSize);
+ return false;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+applyTree(BTreeType *tree,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re,
+ CompareT comp)
+{
+ // Old data was tree or has been converted to a tree
+ uint32_t treeSize = tree->size(_allocator);
+ size_t additionSize(ae - a);
+ size_t removeSize(re - r);
+ uint64_t buildCost = treeSize * 2 + additionSize;
+ typedef bitcompression::EncodeContext64BE EC;
+ uint64_t modifyCost = (EC::log2(treeSize + additionSize) + 1) *
+ (additionSize + removeSize);
+ if (modifyCost < buildCost)
+ applyModifyTree(tree, a, ae, r, re, comp);
+ else
+ applyBuildTree(tree, a, ae, r, re, comp);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+normalizeTree(EntryRef &ref,
+ BTreeType *tree,
+ bool wasArray)
+{
+ EntryRef root = tree->getRoot();
+ if (!NodeAllocatorType::isValidRef(root)) {
+ _store.holdElem(ref, 1);
+ ref = EntryRef();
+ return;
+ }
+ if (!_allocator.isLeafRef(root))
+ return;
+ LeafNodeType *lNode = _allocator.mapLeafRef(root);
+ uint32_t treeSize = lNode->validSlots();
+ assert(treeSize > 0);
+ if (treeSize > clusterLimit)
+ return;
+ assert(!wasArray); // Should never have used tree
+ (void) wasArray;
+ // Convert from tree to short array
+ makeArray(ref, root, lNode);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+apply(EntryRef &ref,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re,
+ CompareT comp)
+{
+ if (!ref.valid()) {
+ // No old data
+ applyNew(ref, a, ae, comp);
+ return;
+ }
+ RefType iRef(ref);
+ bool wasArray = false;
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize != 0) {
+ wasArray = true;
+ if (applyCluster(ref, clusterSize, a, ae, r, re, comp))
+ return;
+ iRef = ref;
+ }
+ // Old data was tree or has been converted to a tree
+ BTreeType *tree = getWTreeEntry(iRef);
+ applyTree(tree, a, ae, r, re, comp);
+ normalizeTree(ref, tree, wasArray);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+clear(const EntryRef ref)
+{
+ if (!ref.valid())
+ return;
+ RefType iRef(ref);
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize == 0) {
+ BTreeType *tree = getWTreeEntry(iRef);
+ tree->clear(_allocator);
+ _store.holdElem(ref, 1);
+ } else {
+ _store.holdElem(ref, clusterSize);
+ }
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+size_t
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+size(const EntryRef ref) const
+{
+ if (!ref.valid())
+ return 0;
+ RefType iRef(ref);
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize == 0) {
+ const BTreeType *tree = getTreeEntry(iRef);
+ return tree->size(_allocator);
+ }
+ return clusterSize;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+size_t
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+frozenSize(const EntryRef ref) const
+{
+ if (!ref.valid())
+ return 0;
+ RefType iRef(ref);
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize == 0) {
+ const BTreeType *tree = getTreeEntry(iRef);
+ return tree->frozenSize(_allocator);
+ }
+ return clusterSize;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+bool
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+isSmallArray(const EntryRef ref) const
+{
+ if (!ref.valid())
+ return true;
+ RefType iRef(ref);
+ uint32_t typeId(_store.getBufferState(iRef.bufferId()).getTypeId());
+ return typeId < clusterLimit;
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+typename BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+Iterator
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+begin(const EntryRef ref) const
+{
+ if (!ref.valid())
+ return Iterator();
+ RefType iRef(ref);
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize == 0) {
+ const BTreeType *tree = getTreeEntry(iRef);
+ return tree->begin(_allocator);
+ }
+ const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize);
+ return Iterator(shortArray, clusterSize, _allocator, _aggrCalc);
+}
+
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+typename BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+ConstIterator
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+beginFrozen(const EntryRef ref) const
+{
+ if (!ref.valid())
+ return ConstIterator();
+ RefType iRef(ref);
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize == 0) {
+ const BTreeType *tree = getTreeEntry(iRef);
+ return tree->getFrozenView(_allocator).begin();
+ }
+ const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize);
+ return ConstIterator(shortArray, clusterSize, _allocator, _aggrCalc);
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+void
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+beginFrozen(const EntryRef ref, std::vector<ConstIterator> &where) const
+{
+ if (!ref.valid()) {
+ where.emplace_back();
+ return;
+ }
+ RefType iRef(ref);
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize == 0) {
+ const BTreeType *tree = getTreeEntry(iRef);
+ tree->getFrozenView(_allocator).begin(where);
+ return;
+ }
+ const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize);
+ where.emplace_back(shortArray, clusterSize, _allocator, _aggrCalc);
+}
+
+template <typename KeyT, typename DataT, typename AggrT, typename CompareT,
+ typename TraitsT, typename AggrCalcT>
+typename BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+AggregatedType
+BTreeStore<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::
+getAggregated(const EntryRef ref) const
+{
+ if (!ref.valid())
+ return AggregatedType();
+ RefType iRef(ref);
+ uint32_t clusterSize = getClusterSize(iRef);
+ if (clusterSize == 0) {
+ const BTreeType *tree = getTreeEntry(iRef);
+ return tree->getAggregated(_allocator);
+ }
+ const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize);
+ AggregatedType a;
+ for (uint32_t i = 0; i < clusterSize; ++i) {
+ _aggrCalc.add(a, _aggrCalc.getVal(shortArray[i].getData()));
+ }
+ return a;
+}
+
+} // namespace btree
+
+} // namespace search
+
+
diff --git a/searchlib/src/vespa/searchlib/btree/btreetraits.h b/searchlib/src/vespa/searchlib/btree/btreetraits.h
new file mode 100644
index 00000000000..3b3962fb7ba
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/btreetraits.h
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <sys/types.h>
+
+namespace search
+{
+
+namespace btree
+{
+
+template <size_t LS, size_t IS, size_t PS, bool BS>
+struct BTreeTraits {
+ static const size_t LEAF_SLOTS = LS;
+ static const size_t INTERNAL_SLOTS = IS;
+ static const size_t PATH_SIZE = PS;
+ static const bool BINARY_SEEK = BS;
+};
+
+typedef BTreeTraits<16, 16, 10, true> BTreeDefaultTraits;
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/bufferstate.cpp b/searchlib/src/vespa/searchlib/btree/bufferstate.cpp
new file mode 100644
index 00000000000..21f548187ee
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/bufferstate.cpp
@@ -0,0 +1,351 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "bufferstate.h"
+#include <limits>
+
+namespace search
+{
+
+namespace btree
+{
+
+
+BufferTypeBase::BufferTypeBase(uint32_t clusterSize,
+ uint32_t minClusters,
+ uint32_t maxClusters)
+ : _clusterSize(clusterSize),
+ _minClusters(std::min(minClusters, maxClusters)),
+ _maxClusters(maxClusters),
+ _activeBuffers(0),
+ _holdBuffers(0),
+ _activeUsedElems(0),
+ _holdUsedElems(0),
+ _lastUsedElems(NULL)
+{
+}
+
+
+BufferTypeBase::~BufferTypeBase(void)
+{
+ assert(_activeBuffers == 0);
+ assert(_holdBuffers == 0);
+ assert(_activeUsedElems == 0);
+ assert(_holdUsedElems == 0);
+ assert(_lastUsedElems == NULL);
+}
+
+
+void
+BufferTypeBase::flushLastUsed(void)
+{
+ if (_lastUsedElems != NULL) {
+ _activeUsedElems += *_lastUsedElems;
+ _lastUsedElems = NULL;
+ }
+}
+
+
+void
+BufferTypeBase::onActive(const size_t *usedElems)
+{
+ flushLastUsed();
+ ++_activeBuffers;
+ _lastUsedElems = usedElems;
+}
+
+
+void
+BufferTypeBase::onHold(const size_t *usedElems)
+{
+ if (usedElems == _lastUsedElems)
+ flushLastUsed();
+ --_activeBuffers;
+ ++_holdBuffers;
+ assert(_activeUsedElems >= *usedElems);
+ _activeUsedElems -= *usedElems;
+ _holdUsedElems += *usedElems;
+}
+
+
+void
+BufferTypeBase::onFree(size_t usedElems)
+{
+ --_holdBuffers;
+ assert(_holdUsedElems >= usedElems);
+ _holdUsedElems -= usedElems;
+}
+
+
+size_t
+BufferTypeBase::calcClustersToAlloc(size_t sizeNeeded,
+ uint64_t clusterRefSize) const
+{
+ size_t usedElems = _activeUsedElems;
+ if (_lastUsedElems != NULL)
+ usedElems += *_lastUsedElems;
+ assert((usedElems % _clusterSize) == 0);
+ uint64_t maxClusters = std::numeric_limits<size_t>::max() / _clusterSize;
+ uint64_t maxClusters2 = clusterRefSize;
+ if (maxClusters > maxClusters2)
+ maxClusters = maxClusters2;
+ if (maxClusters > _maxClusters)
+ maxClusters = _maxClusters;
+ uint32_t minClusters = _minClusters;
+ if (minClusters > maxClusters)
+ minClusters = maxClusters;
+ size_t usedClusters = usedElems / _clusterSize;
+ size_t needClusters = (sizeNeeded + _clusterSize - 1) / _clusterSize;
+ uint64_t wantClusters = usedClusters + minClusters;
+ if (wantClusters < needClusters)
+ wantClusters = needClusters;
+ if (wantClusters > maxClusters)
+ wantClusters = maxClusters;
+ return wantClusters;
+}
+
+
+BufferState::FreeListList::~FreeListList(void)
+{
+ assert(_head == NULL); // Owner should have disabled free lists
+}
+
+
+BufferState::BufferState(void)
+ : _usedElems(0),
+ _allocElems(0),
+ _deadElems(0u),
+ _state(FREE),
+ _disableElemHoldList(false),
+ _holdElems(0u),
+ _freeList(),
+ _freeListList(NULL),
+ _nextHasFree(NULL),
+ _prevHasFree(NULL),
+ _typeHandler(NULL),
+ _typeId(0),
+ _clusterSize(0),
+ _compacting(false),
+ _buffer()
+{
+ _buffer.reset(new Alloc());
+}
+
+
+BufferState::~BufferState(void)
+{
+ assert(_state == FREE);
+ assert(_freeListList == NULL);
+ assert(_nextHasFree == NULL);
+ assert(_prevHasFree == NULL);
+ assert(_holdElems == 0);
+ assert(_freeList.empty());
+}
+
+
+void
+BufferState::onActive(uint32_t bufferId, uint32_t typeId,
+ BufferTypeBase *typeHandler,
+ size_t sizeNeeded,
+ size_t maxClusters,
+ void *&buffer)
+{
+ assert(buffer == NULL);
+ assert(_buffer->get() == NULL);
+ assert(_state == FREE);
+ assert(_typeHandler == NULL);
+ assert(_allocElems == 0);
+ assert(_usedElems == 0);
+ assert(_deadElems == 0u);
+ assert(_holdElems == 0);
+ assert(_freeList.empty());
+ assert(_nextHasFree == NULL);
+ assert(_prevHasFree == NULL);
+ assert(_freeListList == NULL || _freeListList->_head != this);
+
+ size_t initialSizeNeeded = 0;
+ if (bufferId == 0)
+ initialSizeNeeded = typeHandler->getClusterSize();
+ size_t allocClusters =
+ typeHandler->calcClustersToAlloc(initialSizeNeeded + sizeNeeded,
+ maxClusters);
+ size_t allocSize = allocClusters * typeHandler->getClusterSize();
+ assert(allocSize >= initialSizeNeeded + sizeNeeded);
+ _buffer.reset(new Alloc(allocSize * typeHandler->elementSize()));
+ buffer = _buffer->get();
+ typeHandler->onActive(&_usedElems);
+ assert(buffer != NULL);
+ _allocElems = allocSize;
+ _state = ACTIVE;
+ _typeHandler = typeHandler;
+ _typeId = typeId;
+ _clusterSize = _typeHandler->getClusterSize();
+ if (bufferId == 0) {
+ typeHandler->cleanInitialElements(buffer);
+ pushed_back(_clusterSize);
+ _deadElems = _clusterSize;
+ }
+}
+
+
+void
+BufferState::onHold(void)
+{
+ assert(_state == ACTIVE);
+ assert(_typeHandler != NULL);
+ _state = HOLD;
+ _compacting = false;
+ assert(_deadElems <= _usedElems);
+ assert(_holdElems <= (_usedElems - _deadElems));
+ _holdElems = _usedElems - _deadElems; // Put everyting not dead on hold
+ _typeHandler->onHold(&_usedElems);
+ if (!_freeList.empty()) {
+ removeFromFreeListList();
+ FreeList().swap(_freeList);
+ }
+ assert(_nextHasFree == NULL);
+ assert(_prevHasFree == NULL);
+ assert(_freeListList == NULL || _freeListList->_head != this);
+ setFreeListList(NULL);
+}
+
+
+void
+BufferState::onFree(void *&buffer)
+{
+ assert(buffer == _buffer->get());
+ assert(_state == HOLD);
+ assert(_typeHandler != NULL);
+ assert(_deadElems <= _usedElems);
+ assert(_holdElems == _usedElems - _deadElems);
+ _typeHandler->destroyElements(buffer, _usedElems);
+ Alloc().swap(*_buffer);
+ _typeHandler->onFree(_usedElems);
+ buffer = NULL;
+ _usedElems = 0;
+ _allocElems = 0;
+ _deadElems = 0u;
+ _holdElems = 0u;
+ _state = FREE;
+ _typeHandler = NULL;
+ _clusterSize = 0;
+ assert(_freeList.empty());
+ assert(_nextHasFree == NULL);
+ assert(_prevHasFree == NULL);
+ assert(_freeListList == NULL || _freeListList->_head != this);
+ setFreeListList(NULL);
+ _disableElemHoldList = false;
+}
+
+
+void
+BufferState::dropBuffer(void *&buffer)
+{
+ if (_state == FREE) {
+ assert(buffer == NULL);
+ return;
+ }
+ assert(buffer != NULL);
+ if (_state == ACTIVE)
+ onHold();
+ if (_state == HOLD)
+ onFree(buffer);
+ assert(_state == FREE);
+ assert(buffer == NULL);
+}
+
+
+void
+BufferState::setFreeListList(FreeListList *freeListList)
+{
+ if (_state == FREE && freeListList != NULL)
+ return;
+ if (freeListList == _freeListList)
+ return; // No change
+ if (_freeListList != NULL && !_freeList.empty())
+ removeFromFreeListList(); // Remove from old free list
+ _freeListList = freeListList;
+ if (!_freeList.empty()) {
+ if (freeListList != NULL)
+ addToFreeListList(); // Changed free list list
+ else
+ FreeList().swap(_freeList); // Free lists have been disabled
+ }
+}
+
+
+void
+BufferState::addToFreeListList(void)
+{
+ assert(_freeListList != NULL && _freeListList->_head != this);
+ assert(_nextHasFree == NULL);
+ assert(_prevHasFree == NULL);
+ if (_freeListList->_head != NULL) {
+ _nextHasFree = _freeListList->_head;
+ _prevHasFree = _nextHasFree->_prevHasFree;
+ _nextHasFree->_prevHasFree = this;
+ _prevHasFree->_nextHasFree = this;
+ } else {
+ _nextHasFree = this;
+ _prevHasFree = this;
+ }
+ _freeListList->_head = this;
+}
+
+
+void
+BufferState::removeFromFreeListList(void)
+{
+ assert(_freeListList != NULL);
+ assert(_nextHasFree != NULL);
+ assert(_prevHasFree != NULL);
+ if (_nextHasFree == this) {
+ assert(_prevHasFree == this);
+ assert(_freeListList->_head == this);
+ _freeListList->_head = NULL;
+ } else {
+ assert(_prevHasFree != this);
+ _freeListList->_head = _nextHasFree;
+ _nextHasFree->_prevHasFree = _prevHasFree;
+ _prevHasFree->_nextHasFree = _nextHasFree;
+ }
+ _nextHasFree = NULL;
+ _prevHasFree = NULL;
+}
+
+
+void
+BufferState::disableElemHoldList(void)
+{
+ _disableElemHoldList = true;
+}
+
+
+void
+BufferState::fallbackResize(uint64_t newSize,
+ size_t maxClusters,
+ void *&buffer,
+ Alloc &holdBuffer)
+{
+ assert(_state == ACTIVE);
+ assert(_typeHandler != NULL);
+ assert(holdBuffer.get() == NULL);
+ size_t allocClusters = _typeHandler->calcClustersToAlloc(newSize,
+ maxClusters);
+ size_t allocSize = allocClusters * _typeHandler->getClusterSize();
+ assert(allocSize >= newSize);
+ assert(allocSize > _allocElems);
+ Alloc::UP newBuffer(std::make_unique<Alloc>
+ (allocSize * _typeHandler->elementSize()));
+ _typeHandler->fallbackCopy(newBuffer->get(), buffer, _usedElems);
+ holdBuffer.swap(*_buffer);
+ std::atomic_thread_fence(std::memory_order_release);
+ _buffer = std::move(newBuffer);
+ buffer = _buffer->get();
+ _allocElems = allocSize;
+ std::atomic_thread_fence(std::memory_order_release);
+}
+
+} // namespace btree
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/bufferstate.h b/searchlib/src/vespa/searchlib/btree/bufferstate.h
new file mode 100644
index 00000000000..3c7a3557952
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/bufferstate.h
@@ -0,0 +1,389 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+#include <deque>
+#include <vespa/vespalib/util/alloc.h>
+#include <vespa/vespalib/util/array.h>
+
+#include "entryref.h"
+#include <vespa/vespalib/util/generationhandler.h>
+
+namespace search {
+namespace btree {
+
+
+class BufferTypeBase
+{
+private:
+ BufferTypeBase(const BufferTypeBase &rhs);
+
+ BufferTypeBase &
+ operator=(const BufferTypeBase &rhs);
+protected:
+ uint32_t _clusterSize; // Number of elements in an allocation unit
+ uint32_t _minClusters; // Minimum number of clusters to allocate
+ uint32_t _maxClusters; // Maximum number of clusters to allocate
+ uint32_t _activeBuffers;
+ uint32_t _holdBuffers;
+ size_t _activeUsedElems; // used elements in all but last active buffer
+ size_t _holdUsedElems; // used elements in all held buffers
+ const size_t *_lastUsedElems; // used elements in last active buffer
+
+public:
+ BufferTypeBase(uint32_t clusterSize,
+ uint32_t minClusters,
+ uint32_t maxClusters);
+
+ virtual
+ ~BufferTypeBase(void);
+
+ virtual void
+ destroyElements(void *buffer, size_t numElements) = 0;
+
+ virtual void
+ fallbackCopy(void *newBuffer,
+ const void *oldBuffer,
+ size_t numElements) = 0;
+
+ virtual void
+ cleanInitialElements(void *buffer) = 0;
+
+ virtual size_t
+ elementSize(void) const = 0;
+
+ virtual void
+ cleanHold(void *buffer, uint64_t offset, uint64_t len) = 0;
+
+ uint32_t
+ getClusterSize(void) const
+ {
+ return _clusterSize;
+ }
+
+ void
+ flushLastUsed(void);
+
+ void
+ onActive(const size_t *usedElems);
+
+ void
+ onHold(const size_t *usedElems);
+
+ virtual void
+ onFree(size_t usedElems);
+
+ /**
+ * Calculate number of clusters to allocate for new buffer.
+ *
+ * @param sizeNeeded number of elements needed now
+ * @param clusterRefSize number of clusters expressable via reference type
+ *
+ * @return number of clusters to allocate for new buffer
+ */
+ virtual size_t
+ calcClustersToAlloc(size_t sizeNeeded,
+ uint64_t clusterRefSize) const;
+
+ uint32_t getActiveBuffers() const { return _activeBuffers; }
+};
+
+
+template <typename EntryType>
+class BufferType : public BufferTypeBase
+{
+private:
+ BufferType(const BufferType &rhs);
+
+ BufferType &
+ operator=(const BufferType &rhs);
+public:
+ EntryType _emptyEntry;
+
+ BufferType(uint32_t clusterSize,
+ uint32_t minClusters,
+ uint32_t maxClusters)
+ : BufferTypeBase(clusterSize, minClusters, maxClusters),
+ _emptyEntry()
+ {
+ }
+
+ virtual void
+ destroyElements(void *buffer, size_t numElements);
+
+ virtual void
+ fallbackCopy(void *newBuffer,
+ const void *oldBuffer,
+ size_t numElements);
+
+ virtual void
+ cleanInitialElements(void *buffer);
+
+ virtual void
+ cleanHold(void *buffer, uint64_t offset, uint64_t len);
+
+ virtual size_t
+ elementSize(void) const
+ {
+ return sizeof(EntryType);
+ }
+};
+
+
+template <typename EntryType>
+void
+BufferType<EntryType>::destroyElements(void *buffer, size_t numElements)
+{
+ EntryType *e = static_cast<EntryType *>(buffer);
+ for (size_t j = numElements; j != 0; --j) {
+ e->~EntryType();
+ ++e;
+ }
+}
+
+
+template <typename EntryType>
+void
+BufferType<EntryType>::fallbackCopy(void *newBuffer,
+ const void *oldBuffer,
+ size_t numElements)
+{
+ EntryType *d = static_cast<EntryType *>(newBuffer);
+ const EntryType *s = static_cast<const EntryType *>(oldBuffer);
+ for (size_t j = numElements; j != 0; --j) {
+ new (static_cast<void *>(d)) EntryType(*s);
+ ++s;
+ ++d;
+ }
+}
+
+
+template <typename EntryType>
+void
+BufferType<EntryType>::cleanInitialElements(void *buffer)
+{
+ EntryType *e = static_cast<EntryType *>(buffer);
+ for (size_t j = _clusterSize; j != 0; --j) {
+ new (static_cast<void *>(e)) EntryType(_emptyEntry);
+ ++e;
+ }
+}
+
+
+template <typename EntryType>
+void
+BufferType<EntryType>::cleanHold(void *buffer, uint64_t offset, uint64_t len)
+{
+ EntryType *e = static_cast<EntryType *>(buffer) + offset;
+ for (size_t j = len; j != 0; --j) {
+ *e = _emptyEntry;
+ ++e;
+ }
+}
+
+
+class BufferState
+{
+public:
+ typedef vespalib::DefaultAlloc Alloc;
+
+ class FreeListList
+ {
+ public:
+ BufferState *_head;
+
+ FreeListList(void)
+ : _head(NULL)
+ {
+ }
+
+ ~FreeListList(void);
+ };
+
+ typedef vespalib::Array<EntryRef, vespalib::DefaultAlloc> FreeList;
+
+ enum State
+ {
+ FREE,
+ ACTIVE,
+ HOLD
+ };
+
+ size_t _usedElems;
+ size_t _allocElems;
+ uint64_t _deadElems;
+ State _state;
+ bool _disableElemHoldList;
+ uint64_t _holdElems;
+ FreeList _freeList;
+ FreeListList *_freeListList; // non-NULL if free lists are enabled
+
+ // NULL pointers if not on circular list of buffer states with free elems
+ BufferState *_nextHasFree;
+ BufferState *_prevHasFree;
+
+ BufferTypeBase *_typeHandler;
+ uint32_t _typeId;
+ uint32_t _clusterSize;
+ bool _compacting;
+
+ /*
+ * TODO: Check if per-buffer free lists are useful, or if
+ *compaction should always be used to free up whole buffers.
+ */
+
+ BufferState(void);
+
+ ~BufferState(void);
+
+ /**
+ * Transition from FREE to ACTIVE state.
+ *
+ * @param bufferId Id of buffer to be active.
+ * @param typeId registered data type for buffer.
+ * @param typeHandler type handler for registered data type.
+ * @param sizeNeeded Number of elements needed to be free
+ * @param maxSize number of clusters expressable via reference
+ * type
+ * @param buffer start of buffer.
+ */
+ void
+ onActive(uint32_t bufferId, uint32_t typeId, BufferTypeBase *typeHandler,
+ size_t sizeNeeded,
+ size_t maxSize,
+ void *&buffer);
+
+ /**
+ * Transition from ACTIVE to HOLD state.
+ */
+ void
+ onHold(void);
+
+ /**
+ * Transition from HOLD to FREE state.
+ */
+ void
+ onFree(void *&buffer);
+
+ /**
+ * Set list of buffer states with nonempty free lists.
+ *
+ * @param freeListList List of buffer states. If NULL then free lists
+ * are disabled.
+ */
+ void
+ setFreeListList(FreeListList *freeListList);
+
+ /**
+ * Add buffer state to list of buffer states with nonempty free lists.
+ */
+ void
+ addToFreeListList(void);
+
+ /**
+ * Remove buffer state from list of buffer states with nonempty free lists.
+ */
+ void
+ removeFromFreeListList(void);
+
+ /**
+ * Disable hold of elements, just mark then as dead without
+ * cleanup. Typically used when tearing down data structure in a
+ * controlled manner.
+ */
+ void
+ disableElemHoldList(void);
+
+ /**
+ * Pop element from free list.
+ */
+ EntryRef
+ popFreeList(void)
+ {
+ EntryRef ret = _freeList.back();
+ _freeList.pop_back();
+ if (_freeList.empty())
+ removeFromFreeListList();
+ _deadElems -= _clusterSize;
+ return ret;
+ }
+
+
+ size_t
+ size(void) const
+ {
+ return _usedElems;
+ }
+
+ size_t
+ capacity(void) const
+ {
+ return _allocElems;
+ }
+
+ size_t
+ remaining(void) const
+ {
+ return _allocElems - _usedElems;
+ }
+
+ void
+ pushed_back(uint64_t len)
+ {
+ _usedElems += len;
+ }
+
+ void
+ cleanHold(void *buffer, uint64_t offset, uint64_t len)
+ {
+ _typeHandler->cleanHold(buffer, offset, len);
+ }
+
+ void
+ dropBuffer(void *&buffer);
+
+ uint32_t
+ getTypeId(void) const
+ {
+ return _typeId;
+ }
+
+ uint32_t
+ getClusterSize(void) const
+ {
+ return _clusterSize;
+ }
+
+ uint64_t getDeadElems() const { return _deadElems; }
+
+ bool
+ getCompacting(void) const
+ {
+ return _compacting;
+ }
+
+ void
+ setCompacting(void)
+ {
+ _compacting = true;
+ }
+
+ void
+ fallbackResize(uint64_t newSize,
+ size_t maxClusters,
+ void *&buffer,
+ Alloc &holdBuffer);
+
+ bool isActive(uint32_t typeId) const {
+ return ((_state == ACTIVE) && (_typeId == typeId));
+ }
+
+private:
+ Alloc::UP _buffer;
+};
+
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/datastore.cpp b/searchlib/src/vespa/searchlib/btree/datastore.cpp
new file mode 100644
index 00000000000..4af74d6f861
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/datastore.cpp
@@ -0,0 +1,16 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "datastore.h"
+#include "datastore.hpp"
+
+namespace search
+{
+
+namespace btree
+{
+
+template class DataStoreT<EntryRefT<22> >;
+
+} // namespace btree
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/btree/datastore.h b/searchlib/src/vespa/searchlib/btree/datastore.h
new file mode 100644
index 00000000000..b709052f4ac
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/datastore.h
@@ -0,0 +1,139 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "datastorebase.h"
+#include "btreenode.h"
+
+namespace search
+{
+
+namespace btree
+{
+
+template <typename EntryType>
+struct DefaultReclaimer {
+ static void reclaim(EntryType * entry) {
+ (void) entry;
+ }
+};
+
+template <typename RefT = EntryRefT<22> >
+class DataStoreT : public DataStoreBase
+{
+private:
+ DataStoreT(const DataStoreT &rhs);
+
+ DataStoreT &
+ operator=(const DataStoreT &rhs);
+public:
+ typedef RefT RefType;
+
+ DataStoreT(void);
+
+ ~DataStoreT(void);
+
+ /**
+ * Increase number of dead elements in buffer.
+ *
+ * @param ref Reference to dead stored features
+ * @param dead Number of newly dead elements
+ */
+ void
+ incDead(EntryRef ref, uint64_t dead)
+ {
+ RefType intRef(ref);
+ DataStoreBase::incDead(intRef.bufferId(), dead);
+ }
+
+ /**
+ * Free element.
+ */
+ void
+ freeElem(EntryRef ref, uint64_t len);
+
+ /**
+ * Hold element.
+ */
+ void
+ holdElem(EntryRef ref, uint64_t len);
+
+ /**
+ * Trim elem hold list, freeing elements that no longer needs to be held.
+ *
+ * @param usedGen lowest generation that is still used.
+ */
+ virtual void
+ trimElemHoldList(generation_t usedGen);
+
+ virtual void
+ clearElemHoldList(void);
+
+ bool
+ getCompacting(EntryRef ref) const
+ {
+ return getBufferState(RefType(ref).bufferId()).getCompacting();
+ }
+
+ template <typename EntryType>
+ std::pair<RefType, EntryType *>
+ allocNewEntry(uint32_t typeId);
+
+ template <typename EntryType, typename Reclaimer>
+ std::pair<RefType, EntryType *>
+ allocEntry(uint32_t typeId);
+
+ template <typename EntryType>
+ std::pair<RefType, EntryType *>
+ allocNewEntryCopy(uint32_t typeId, const EntryType &rhs);
+
+ template <typename EntryType, typename Reclaimer>
+ std::pair<RefType, EntryType *>
+ allocEntryCopy(uint32_t typeId, const EntryType &rhs);
+
+};
+
+
+template <typename EntryType, typename RefT = EntryRefT<22> >
+class DataStore : public DataStoreT<RefT>
+{
+private:
+ DataStore(const DataStore &rhs);
+
+ DataStore &
+ operator=(const DataStore &rhs);
+protected:
+ typedef DataStoreT<RefT> ParentType;
+ using ParentType::ensureBufferCapacity;
+ // using ParentType::activeBuffer;
+ using ParentType::_activeBufferIds;
+ using ParentType::_buffers;
+ using ParentType::_states;
+ using ParentType::_freeListLists;
+ using ParentType::getBufferEntry;
+ using ParentType::dropBuffers;
+ using ParentType::initActiveBuffers;
+ using ParentType::addType;
+
+ BufferType<EntryType> _type;
+public:
+ typedef typename ParentType::RefType RefType;
+ DataStore();
+
+ ~DataStore(void);
+
+ EntryRef
+ addEntry(const EntryType &e);
+
+ EntryRef
+ addEntry2(const EntryType &e);
+
+ const EntryType &
+ getEntry(EntryRef ref) const;
+};
+
+extern template class DataStoreT<EntryRefT<22> >;
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/datastore.hpp b/searchlib/src/vespa/searchlib/btree/datastore.hpp
new file mode 100644
index 00000000000..64e55b840fa
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/datastore.hpp
@@ -0,0 +1,248 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "datastore.h"
+
+namespace search
+{
+
+namespace btree
+{
+
+
+template <typename RefT>
+DataStoreT<RefT>::DataStoreT(void)
+ : DataStoreBase(RefType::numBuffers(),
+ RefType::offsetSize() / RefType::align(1))
+{
+}
+
+
+template <typename RefT>
+DataStoreT<RefT>::~DataStoreT(void)
+{
+}
+
+
+template <typename RefT>
+void
+DataStoreT<RefT>::freeElem(EntryRef ref, uint64_t len)
+{
+ RefType intRef(ref);
+ BufferState &state = _states[intRef.bufferId()];
+ if (state._state == BufferState::ACTIVE) {
+ if (state._freeListList != NULL && len == state.getClusterSize()) {
+ if (state._freeList.empty())
+ state.addToFreeListList();
+ state._freeList.push_back(ref);
+ }
+ } else {
+ assert(state._state == BufferState::HOLD);
+ }
+ state._deadElems += len;
+ state.cleanHold(_buffers[intRef.bufferId()],
+ (intRef.offset() / RefType::align(1)) *
+ state.getClusterSize(), len);
+}
+
+
+template <typename RefT>
+void
+DataStoreT<RefT>::holdElem(EntryRef ref, uint64_t len)
+{
+ RefType intRef(ref);
+ uint64_t alignedLen = RefType::align(len);
+ BufferState &state = _states[intRef.bufferId()];
+ assert(state._state == BufferState::ACTIVE);
+ if (state._disableElemHoldList) {
+ state._deadElems += alignedLen;
+ return;
+ }
+ _elemHold1List.push_back(ElemHold1ListElem(ref, alignedLen));
+ state._holdElems += alignedLen;
+}
+
+
+template <typename RefT>
+void
+DataStoreT<RefT>::trimElemHoldList(generation_t usedGen)
+{
+ ElemHold2List &elemHold2List = _elemHold2List;
+
+ ElemHold2List::iterator it(elemHold2List.begin());
+ ElemHold2List::iterator ite(elemHold2List.end());
+ uint32_t freed = 0;
+ for (; it != ite; ++it) {
+ if (static_cast<sgeneration_t>(it->_generation - usedGen) >= 0)
+ break;
+ RefType intRef(it->_ref);
+ BufferState &state = _states[intRef.bufferId()];
+ freeElem(it->_ref, it->_len);
+ assert(state._holdElems >= it->_len);
+ state._holdElems -= it->_len;
+ ++freed;
+ }
+ if (freed != 0) {
+ elemHold2List.erase(elemHold2List.begin(), it);
+ }
+}
+
+
+template <typename RefT>
+void
+DataStoreT<RefT>::clearElemHoldList(void)
+{
+ ElemHold2List &elemHold2List = _elemHold2List;
+
+ ElemHold2List::iterator it(elemHold2List.begin());
+ ElemHold2List::iterator ite(elemHold2List.end());
+ for (; it != ite; ++it) {
+ RefType intRef(it->_ref);
+ BufferState &state = _states[intRef.bufferId()];
+ freeElem(it->_ref, it->_len);
+ assert(state._holdElems >= it->_len);
+ state._holdElems -= it->_len;
+ }
+ elemHold2List.clear();
+}
+
+
+template <typename RefT>
+template <typename EntryType>
+std::pair<RefT, EntryType *>
+DataStoreT<RefT>::allocNewEntry(uint32_t typeId)
+{
+ ensureBufferCapacity(typeId, 1);
+ uint32_t activeBufferId = getActiveBufferId(typeId);
+ BufferState &state = getBufferState(activeBufferId);
+ assert(state._state == BufferState::ACTIVE);
+ size_t oldSize = state.size();
+ EntryType *entry = getBufferEntry<EntryType>(activeBufferId, oldSize);
+ new (static_cast<void *>(entry)) EntryType();
+ state.pushed_back(1);
+ return std::make_pair(RefType(oldSize, activeBufferId), entry);
+}
+
+
+template <typename RefT>
+template <typename EntryType, typename Reclaimer>
+std::pair<RefT, EntryType *>
+DataStoreT<RefT>::allocEntry(uint32_t typeId)
+{
+ BufferState::FreeListList &freeListList = getFreeList(typeId);
+ if (freeListList._head == NULL) {
+ return allocNewEntry<EntryType>(typeId);
+ }
+ BufferState &state = *freeListList._head;
+ assert(state._state == BufferState::ACTIVE);
+ RefType ref(state.popFreeList());
+ EntryType *entry =
+ getBufferEntry<EntryType>(ref.bufferId(), ref.offset());
+ Reclaimer::reclaim(entry);
+ return std::make_pair(ref, entry);
+}
+
+
+template <typename RefT>
+template <typename EntryType>
+std::pair<RefT, EntryType *>
+DataStoreT<RefT>::allocNewEntryCopy(uint32_t typeId, const EntryType &rhs)
+{
+ ensureBufferCapacity(typeId, 1);
+ uint32_t activeBufferId = getActiveBufferId(typeId);
+ BufferState &state = getBufferState(activeBufferId);
+ assert(state._state == BufferState::ACTIVE);
+ size_t oldSize = state.size();
+ EntryType *entry = getBufferEntry<EntryType>(activeBufferId, oldSize);
+ new (static_cast<void *>(entry)) EntryType(rhs);
+ state.pushed_back(1);
+ return std::make_pair(RefType(oldSize, activeBufferId), entry);
+}
+
+
+template <typename RefT>
+template <typename EntryType, typename Reclaimer>
+std::pair<RefT, EntryType *>
+DataStoreT<RefT>::allocEntryCopy(uint32_t typeId, const EntryType &rhs)
+{
+ BufferState::FreeListList &freeListList = getFreeList(typeId);
+ if (freeListList._head == NULL) {
+ return allocNewEntryCopy<EntryType>(typeId, rhs);
+ }
+ BufferState &state = *freeListList._head;
+ assert(state._state == BufferState::ACTIVE);
+ RefType ref(state.popFreeList());
+ EntryType *entry =
+ getBufferEntry<EntryType>(ref.bufferId(), ref.offset());
+ Reclaimer::reclaim(entry);
+ *entry = rhs;
+ return std::make_pair(ref, entry);
+}
+
+
+
+template <typename EntryType, typename RefT>
+DataStore<EntryType, RefT>::DataStore(void)
+ : ParentType(),
+ _type(1, RefType::offsetSize(), RefType::offsetSize())
+{
+ addType(&_type);
+ initActiveBuffers();
+}
+
+template <typename EntryType, typename RefT>
+DataStore<EntryType, RefT>::~DataStore(void)
+{
+ dropBuffers(); // Drop buffers before type handlers are dropped
+}
+
+template <typename EntryType, typename RefT>
+EntryRef
+DataStore<EntryType, RefT>::addEntry(const EntryType &e)
+{
+ ensureBufferCapacity(0, 1);
+ uint32_t activeBufferId = _activeBufferIds[0];
+ BufferState &state = _states[activeBufferId];
+ size_t oldSize = state.size();
+ EntryType *be = static_cast<EntryType *>(_buffers[activeBufferId]) +
+ oldSize;
+ new (static_cast<void *>(be)) EntryType(e);
+ RefType ref(oldSize, activeBufferId);
+ state.pushed_back(1);
+ return ref;
+}
+
+template <typename EntryType, typename RefT>
+EntryRef
+DataStore<EntryType, RefT>::addEntry2(const EntryType &e)
+{
+ BufferState::FreeListList &freeListList = _freeListLists[0];
+ if (freeListList._head == NULL)
+ return addEntry(e);
+ BufferState &state = *freeListList._head;
+ assert(state._state == BufferState::ACTIVE);
+ RefType ref(state.popFreeList());
+ EntryType *be =
+ this->template
+ getBufferEntry<EntryType>(ref.bufferId(), ref.offset());
+ *be = e;
+ return ref;
+}
+
+template <typename EntryType, typename RefT>
+const EntryType &
+DataStore<EntryType, RefT>::getEntry(EntryRef ref) const
+{
+ RefType intRef(ref);
+ const EntryType *be =
+ this->template
+ getBufferEntry<EntryType>(intRef.bufferId(), intRef.offset());
+ return *be;
+}
+
+extern template class DataStoreT<EntryRefT<22> >;
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/datastorebase.cpp b/searchlib/src/vespa/searchlib/btree/datastorebase.cpp
new file mode 100644
index 00000000000..45c68630773
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/datastorebase.cpp
@@ -0,0 +1,426 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "datastore.h"
+
+using vespalib::GenerationHeldBase;
+
+namespace search
+{
+
+namespace btree
+{
+
+namespace
+{
+
+/*
+ * Minimum dead bytes in active write buffer before switching to new
+ * active write buffer even if another active buffer has more dead
+ * bytes due to considering the active write buffer as too dead.
+ */
+constexpr size_t TOODEAD_SLACK = 0x4000u;
+
+/*
+ * Check if active write buffer is too dead for further use, i.e. if it
+ * is likely to be the worst buffer at next compaction. If so, filling it
+ * up completely will be wasted work, as data will have to be moved again
+ * rather soon.
+ */
+bool activeWriteBufferTooDead(const BufferState &state)
+{
+ size_t deadElems = state.getDeadElems();
+ size_t deadBytes = deadElems * state.getClusterSize();
+ return ((deadBytes >= TOODEAD_SLACK) && (deadElems * 2 >= state.size()));
+}
+
+}
+
+DataStoreBase::FallbackHold::FallbackHold(size_t size,
+ BufferState::Alloc &&buffer,
+ size_t usedElems,
+ BufferTypeBase *typeHandler,
+ uint32_t typeId)
+ : GenerationHeldBase(size),
+ _buffer(std::move(buffer)),
+ _usedElems(usedElems),
+ _typeHandler(typeHandler),
+ _typeId(typeId)
+{
+}
+
+
+DataStoreBase::FallbackHold::~FallbackHold(void)
+{
+ _typeHandler->destroyElements(_buffer.get(), _usedElems);
+}
+
+
+class DataStoreBase::BufferHold : public GenerationHeldBase
+{
+ DataStoreBase &_dsb;
+ uint32_t _bufferId;
+
+public:
+ BufferHold(size_t size,
+ DataStoreBase &dsb,
+ uint32_t bufferId)
+ : GenerationHeldBase(size),
+ _dsb(dsb),
+ _bufferId(bufferId)
+ {
+ }
+
+ virtual
+ ~BufferHold(void)
+ {
+ _dsb.doneHoldBuffer(_bufferId);
+ }
+};
+
+
+DataStoreBase::DataStoreBase(uint32_t numBuffers,
+ size_t maxClusters)
+ : _buffers(numBuffers),
+ _activeBufferIds(),
+ _states(numBuffers),
+ _typeHandlers(),
+ _freeListLists(),
+ _freeListsEnabled(false),
+ _elemHold1List(),
+ _elemHold2List(),
+ _numBuffers(numBuffers),
+ _maxClusters(maxClusters),
+ _genHolder()
+{
+}
+
+
+DataStoreBase::~DataStoreBase(void)
+{
+ disableFreeLists();
+
+ assert(_elemHold1List.empty());
+ assert(_elemHold2List.empty());
+}
+
+
+void
+DataStoreBase::switchActiveBuffer(uint32_t typeId, size_t sizeNeeded)
+{
+ size_t activeBufferId = _activeBufferIds[typeId];
+ do {
+ // start using next buffer
+ activeBufferId = nextBufferId(activeBufferId);
+ } while (_states[activeBufferId]._state != BufferState::FREE);
+ onActive(activeBufferId, typeId, sizeNeeded, _maxClusters);
+ _activeBufferIds[typeId] = activeBufferId;
+}
+
+
+void
+DataStoreBase::initActiveBuffers(void)
+{
+ uint32_t numTypes = _activeBufferIds.size();
+ for (uint32_t typeId = 0; typeId < numTypes; ++typeId) {
+ size_t activeBufferId = 0;
+ while (_states[activeBufferId]._state != BufferState::FREE) {
+ // start using next buffer
+ activeBufferId = nextBufferId(activeBufferId);
+ }
+ onActive(activeBufferId, typeId, 0u, _maxClusters);
+ _activeBufferIds[typeId] = activeBufferId;
+ }
+}
+
+
+void
+DataStoreBase::addType(BufferTypeBase *typeHandler)
+{
+ _activeBufferIds.push_back(0);
+ _typeHandlers.push_back(typeHandler);
+ _freeListLists.push_back(BufferState::FreeListList());
+}
+
+
+void
+DataStoreBase::transferElemHoldList(generation_t generation)
+{
+ ElemHold2List &elemHold2List = _elemHold2List;
+ for (const ElemHold1ListElem & elemHold1 : _elemHold1List) {
+ elemHold2List.push_back(ElemHold2ListElem(elemHold1, generation));
+ }
+ _elemHold1List.clear();
+}
+
+
+void
+DataStoreBase::transferHoldLists(generation_t generation)
+{
+ _genHolder.transferHoldLists(generation);
+ if (hasElemHold1())
+ transferElemHoldList(generation);
+}
+
+
+void
+DataStoreBase::doneHoldBuffer(uint32_t bufferId)
+{
+ _states[bufferId].onFree(_buffers[bufferId]);
+}
+
+
+void
+DataStoreBase::trimHoldLists(generation_t usedGen)
+{
+ trimElemHoldList(usedGen); // Trim entries before trimming buffers
+
+ _genHolder.trimHoldLists(usedGen);
+}
+
+
+void
+DataStoreBase::clearHoldLists(void)
+{
+ transferElemHoldList(0);
+ clearElemHoldList();
+ _genHolder.clearHoldLists();
+}
+
+
+void
+DataStoreBase::dropBuffers(void)
+{
+ uint32_t numBuffers = _buffers.size();
+ for (uint32_t bufferId = 0; bufferId < numBuffers; ++bufferId) {
+ _states[bufferId].dropBuffer(_buffers[bufferId]);
+ }
+ _genHolder.clearHoldLists();
+}
+
+
+MemoryUsage
+DataStoreBase::getMemoryUsage(void) const
+{
+ MemStats stats = getMemStats();
+ MemoryUsage usage;
+ usage.setAllocatedBytes(stats._allocBytes);
+ usage.setUsedBytes(stats._usedBytes);
+ usage.setDeadBytes(stats._deadBytes);
+ usage.setAllocatedBytesOnHold(stats._holdBytes);
+ return usage;
+}
+
+
+void
+DataStoreBase::holdBuffer(uint32_t bufferId)
+{
+ _states[bufferId].onHold();
+ size_t holdBytes = 0u; // getMemStats() still accounts held buffers
+ GenerationHeldBase::UP hold(new BufferHold(holdBytes, *this, bufferId));
+ _genHolder.hold(std::move(hold));
+}
+
+
+void
+DataStoreBase::enableFreeLists(void)
+{
+ for (BufferState & bState : _states) {
+ if (bState._state != BufferState::ACTIVE || bState.getCompacting())
+ continue;
+ bState.setFreeListList(&_freeListLists[bState._typeId]);
+ }
+ _freeListsEnabled = true;
+}
+
+
+void
+DataStoreBase::disableFreeLists(void)
+{
+ for (BufferState & bState : _states) {
+ bState.setFreeListList(nullptr);
+ }
+ _freeListsEnabled = false;
+}
+
+
+void
+DataStoreBase::enableFreeList(uint32_t bufferId)
+{
+ BufferState &state = _states[bufferId];
+ if (_freeListsEnabled &&
+ state._state == BufferState::ACTIVE &&
+ !state.getCompacting())
+ state.setFreeListList(&_freeListLists[state._typeId]);
+}
+
+
+void
+DataStoreBase::disableFreeList(uint32_t bufferId)
+{
+ _states[bufferId].setFreeListList(NULL);
+}
+
+
+void
+DataStoreBase::disableElemHoldList(void)
+{
+ for (auto &state : _states) {
+ if (state._state != BufferState::FREE)
+ state.disableElemHoldList();
+ }
+}
+
+
+DataStoreBase::MemStats
+DataStoreBase::getMemStats(void) const
+{
+ MemStats stats;
+
+ for (const BufferState & bState: _states) {
+ auto typeHandler = bState._typeHandler;
+ BufferState::State state = bState._state;
+ if ((state == BufferState::FREE) || (typeHandler == nullptr)) {
+ ++stats._freeBuffers;
+ } else if (state == BufferState::ACTIVE) {
+ size_t elementSize = typeHandler->elementSize();
+ ++stats._activeBuffers;
+ stats._allocElems += bState._allocElems;
+ stats._usedElems += bState._usedElems;
+ stats._deadElems += bState._deadElems;
+ stats._holdElems += bState._holdElems;
+ stats._allocBytes += bState._allocElems * elementSize;
+ stats._usedBytes += bState._usedElems * elementSize;
+ stats._deadBytes += bState._deadElems * elementSize;
+ stats._holdBytes += bState._holdElems * elementSize;
+ } else if (state == BufferState::HOLD) {
+ size_t elementSize = typeHandler->elementSize();
+ ++stats._holdBuffers;
+ stats._allocElems += bState._allocElems;
+ stats._usedElems += bState._usedElems;
+ stats._deadElems += bState._deadElems;
+ stats._holdElems += bState._holdElems;
+ stats._allocBytes += bState._allocElems * elementSize;
+ stats._usedBytes += bState._usedElems * elementSize;
+ stats._deadBytes += bState._deadElems * elementSize;
+ stats._holdBytes += bState._holdElems * elementSize;
+ } else {
+ abort();
+ }
+ }
+ return stats;
+}
+
+
+void
+DataStoreBase::onActive(uint32_t bufferId, uint32_t typeId,
+ size_t sizeNeeded,
+ size_t maxClusters)
+{
+ assert(typeId < _typeHandlers.size());
+ assert(bufferId < _numBuffers);
+ BufferState &state = _states[bufferId];
+ state.onActive(bufferId, typeId,
+ _typeHandlers[typeId],
+ sizeNeeded,
+ maxClusters,
+ _buffers[bufferId]);
+ enableFreeList(bufferId);
+}
+
+std::vector<uint32_t>
+DataStoreBase::startCompact(uint32_t typeId)
+{
+ std::vector<uint32_t> toHold;
+
+ for (uint32_t bufferId = 0; bufferId < _numBuffers; ++bufferId) {
+ BufferState &state = getBufferState(bufferId);
+ if (state._state == BufferState::ACTIVE &&
+ state.getTypeId() == typeId &&
+ !state.getCompacting()) {
+ state.setCompacting();
+ toHold.push_back(bufferId);
+ disableFreeList(bufferId);
+ }
+ }
+ switchActiveBuffer(typeId, 0u);
+ return toHold;
+}
+
+void
+DataStoreBase::finishCompact(const std::vector<uint32_t> &toHold)
+{
+ for (uint32_t bufferId : toHold) {
+ holdBuffer(bufferId);
+ }
+}
+
+
+void
+DataStoreBase::fallbackResize(uint32_t bufferId,
+ uint64_t newSize)
+{
+ BufferState &state = getBufferState(bufferId);
+ BufferState::Alloc toHoldBuffer;
+ size_t oldUsedElems = state._usedElems;
+ size_t oldAllocElems = state._allocElems;
+ size_t elementSize = state._typeHandler->elementSize();
+ state.fallbackResize(newSize,
+ _maxClusters,
+ _buffers[bufferId],
+ toHoldBuffer);
+ GenerationHeldBase::UP
+ hold(new FallbackHold(oldAllocElems * elementSize,
+ std::move(toHoldBuffer),
+ oldUsedElems,
+ state._typeHandler,
+ state._typeId));
+ _genHolder.hold(std::move(hold));
+}
+
+
+uint32_t
+DataStoreBase::startCompactWorstBuffer(uint32_t typeId)
+{
+ uint32_t activeBufferId = getActiveBufferId(typeId);
+ const BufferTypeBase *typeHandler = _typeHandlers[typeId];
+ assert(typeHandler->getActiveBuffers() >= 1u);
+ if (typeHandler->getActiveBuffers() == 1u) {
+ // Single active buffer for type, no need for scan
+ _states[activeBufferId].setCompacting();
+ _states[activeBufferId].disableElemHoldList();
+ disableFreeList(activeBufferId);
+ switchActiveBuffer(typeId, 0u);
+ return activeBufferId;
+ }
+ // Multiple active buffers for type, must perform full scan
+ uint32_t worstBufferId = activeBufferId;
+ uint32_t worstDead = 0;
+ for (uint32_t bufferId = 0; bufferId < _numBuffers; ++bufferId) {
+ const auto &state = _states[bufferId];
+ if (state.isActive(typeId)) {
+ size_t dead = state.getDeadElems();
+ if (bufferId == 0u) {
+ // buffer 0 is special due to invalid ref -> (buf 0, offset 0)
+ dead -= state.getClusterSize();
+ }
+ if (dead > worstDead) {
+ worstBufferId = bufferId;
+ worstDead = dead;
+ }
+ }
+ }
+ if ((worstBufferId == activeBufferId) ||
+ activeWriteBufferTooDead(_states[activeBufferId]))
+ {
+ switchActiveBuffer(typeId, 0u);
+ }
+ _states[worstBufferId].setCompacting();
+ _states[worstBufferId].disableElemHoldList();
+ disableFreeList(worstBufferId);
+ return worstBufferId;
+}
+
+
+} // namespace btree
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/btree/datastorebase.h b/searchlib/src/vespa/searchlib/btree/datastorebase.h
new file mode 100644
index 00000000000..0c44b485d18
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/datastorebase.h
@@ -0,0 +1,404 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+
+#include "bufferstate.h"
+#include <vespa/vespalib/util/generationholder.h>
+#include <vespa/searchlib/util/memoryusage.h>
+
+
+namespace search
+{
+
+namespace btree
+{
+
+class DataStoreBase
+{
+private:
+ DataStoreBase(const DataStoreBase &rhs);
+
+ DataStoreBase &
+ operator=(const DataStoreBase &rhs);
+protected:
+ typedef vespalib::GenerationHandler::generation_t generation_t;
+ typedef vespalib::GenerationHandler::sgeneration_t sgeneration_t;
+
+ std::vector<void *> _buffers; // For fast mapping with known types
+ std::vector<uint32_t> _activeBufferIds; // typeId -> active buffer
+
+ // Hold list before freeze, before knowing how long elements must be held
+ class ElemHold1ListElem
+ {
+ public:
+ EntryRef _ref;
+ uint64_t _len; // Aligned length
+
+ ElemHold1ListElem(EntryRef ref, uint64_t len)
+ : _ref(ref),
+ _len(len)
+ {
+ }
+ };
+
+ // Hold list at freeze, when knowing how long elements must be held
+ class ElemHold2ListElem : public ElemHold1ListElem
+ {
+ public:
+ generation_t _generation;
+
+ ElemHold2ListElem(const ElemHold1ListElem &hold1,
+ generation_t generation)
+ : ElemHold1ListElem(hold1),
+ _generation(generation)
+ {
+ }
+ };
+
+ typedef vespalib::Array<ElemHold1ListElem, vespalib::DefaultAlloc> ElemHold1List;
+ typedef std::deque<ElemHold2ListElem> ElemHold2List;
+
+ class FallbackHold : public vespalib::GenerationHeldBase
+ {
+ public:
+ BufferState::Alloc _buffer;
+ size_t _usedElems;
+ BufferTypeBase *_typeHandler;
+ uint32_t _typeId;
+
+ FallbackHold(size_t size,
+ BufferState::Alloc &&buffer,
+ size_t usedElems,
+ BufferTypeBase *typeHandler,
+ uint32_t typeId);
+
+ virtual
+ ~FallbackHold(void);
+ };
+
+ class BufferHold;
+
+public:
+ class MemStats
+ {
+ public:
+ uint64_t _allocElems;
+ uint64_t _usedElems;
+ uint64_t _deadElems;
+ uint64_t _holdElems;
+ uint64_t _allocBytes;
+ uint64_t _usedBytes;
+ uint64_t _deadBytes;
+ uint64_t _holdBytes;
+ uint32_t _freeBuffers;
+ uint32_t _activeBuffers;
+ uint32_t _holdBuffers;
+
+ MemStats(void)
+ : _allocElems(0),
+ _usedElems(0),
+ _deadElems(0),
+ _holdElems(0),
+ _allocBytes(0),
+ _usedBytes(0),
+ _deadBytes(0),
+ _holdBytes(0),
+ _freeBuffers(0),
+ _activeBuffers(0),
+ _holdBuffers(0)
+ {
+ }
+
+ MemStats &
+ operator+=(const MemStats &rhs)
+ {
+ _allocElems += rhs._allocElems;
+ _usedElems += rhs._usedElems;
+ _deadElems += rhs._deadElems;
+ _holdElems += rhs._holdElems;
+ _allocBytes += rhs._allocBytes;
+ _usedBytes += rhs._usedBytes;
+ _deadBytes += rhs._deadBytes;
+ _holdBytes += rhs._holdBytes;
+ _freeBuffers += rhs._freeBuffers;
+ _activeBuffers += rhs._activeBuffers;
+ _holdBuffers += rhs._holdBuffers;
+ return *this;
+ }
+ };
+
+protected:
+ std::vector<BufferState> _states;
+ std::vector<BufferTypeBase *> _typeHandlers; // TypeId -> handler
+
+ std::vector<BufferState::FreeListList> _freeListLists;
+ bool _freeListsEnabled;
+
+ ElemHold1List _elemHold1List;
+ ElemHold2List _elemHold2List;
+
+ uint32_t _numBuffers;
+ size_t _maxClusters;
+
+ vespalib::GenerationHolder _genHolder;
+
+ DataStoreBase(uint32_t numBuffers, size_t maxClusters);
+
+ virtual
+ ~DataStoreBase(void);
+
+ /**
+ * Get next buffer id
+ *
+ * @param bufferId current buffer id
+ * @return next buffer id
+ */
+ uint32_t
+ nextBufferId(uint32_t bufferId)
+ {
+ uint32_t ret = bufferId + 1;
+ if (ret == _numBuffers)
+ ret = 0;
+ return ret;
+ }
+
+ /**
+ * Get active buffer
+ *
+ * @return active buffer
+ */
+ void *
+ activeBuffer(uint32_t typeId)
+ {
+ return _buffers[_activeBufferIds[typeId]];
+ }
+
+ /**
+ * Trim elem hold list, freeing elements that no longer needs to be held.
+ *
+ * @param usedGen lowest generation that is still used.
+ */
+ virtual void
+ trimElemHoldList(generation_t usedGen) = 0;
+
+ virtual void
+ clearElemHoldList(void) = 0;
+
+public:
+ void
+ addType(BufferTypeBase *typeHandler);
+
+ void
+ initActiveBuffers(void);
+
+ /**
+ * Ensure that active buffer has a given number of elements free at end.
+ * Switch to new buffer if current buffer is too full.
+ *
+ * @param typeId registered data type for buffer.
+ * @param sizeNeeded Number of elements needed to be free
+ */
+ void
+ ensureBufferCapacity(uint32_t typeId, size_t sizeNeeded)
+ {
+ if (__builtin_expect(sizeNeeded >
+ _states[_activeBufferIds[typeId]].remaining(),
+ false)) {
+ switchActiveBuffer(typeId, sizeNeeded);
+ }
+ }
+
+ /**
+ * Put buffer on hold list, as part of compaction.
+ *
+ * @param bufferId Id of buffer to be held.
+ */
+ void
+ holdBuffer(uint32_t bufferId);
+
+ /**
+ * Switch to new active buffer, typically in preparation for compaction
+ * or when current active buffer no longer has free space.
+ *
+ * @param typeId registered data type for buffer.
+ * @param sizeNeeded Number of elements needed to be free
+ */
+ void
+ switchActiveBuffer(uint32_t typeId, size_t sizeNeeded);
+
+ MemoryUsage getMemoryUsage() const;
+
+ /**
+ * Get active buffer id for the given type id.
+ */
+ uint32_t
+ getActiveBufferId(uint32_t typeId) const
+ {
+ return _activeBufferIds[typeId];
+ }
+
+ const BufferState &
+ getBufferState(uint32_t bufferId) const
+ {
+ return _states[bufferId];
+ }
+
+ BufferState &
+ getBufferState(uint32_t bufferId)
+ {
+ return _states[bufferId];
+ }
+
+ uint32_t
+ getNumBuffers(void) const
+ {
+ return _numBuffers;
+ }
+
+ bool
+ hasElemHold1(void) const
+ {
+ return !_elemHold1List.empty();
+ }
+
+ /**
+ * Transfer element holds from hold1 list to hold2 list.
+ */
+ void
+ transferElemHoldList(generation_t generation);
+
+ /**
+ * Transfer holds from hold1 to hold2 lists, assigning generation.
+ */
+ void
+ transferHoldLists(generation_t generation);
+
+ /**
+ * Hold of buffer has ended.
+ */
+ void
+ doneHoldBuffer(uint32_t bufferId);
+
+ /**
+ * Trim hold lists, freeing buffers that no longer needs to be held.
+ *
+ * @param usedGen lowest generation that is still used.
+ */
+ void
+ trimHoldLists(generation_t usedGen);
+
+ void
+ clearHoldLists(void);
+
+ template <typename EntryType>
+ EntryType *
+ getBufferEntry(uint32_t bufferId, uint64_t offset)
+ {
+ return static_cast<EntryType *>(_buffers[bufferId]) +
+ offset;
+ }
+
+ template <typename EntryType>
+ const EntryType *
+ getBufferEntry(uint32_t bufferId, uint64_t offset) const
+ {
+ return static_cast<const EntryType *>(_buffers[bufferId]) +
+ offset;
+ }
+
+ void
+ dropBuffers(void);
+
+
+ void
+ incDead(uint32_t bufferId, uint64_t dead)
+ {
+ BufferState &state = _states[bufferId];
+ state._deadElems += dead;
+ }
+
+ /**
+ * Enable free list management. This only works for fixed size elements.
+ */
+ void
+ enableFreeLists(void);
+
+ /**
+ * Disable free list management.
+ */
+ void
+ disableFreeLists(void);
+
+ /**
+ * Enable free list management. This only works for fixed size elements.
+ */
+ void
+ enableFreeList(uint32_t bufferId);
+
+ /**
+ * Disable free list management.
+ */
+ void
+ disableFreeList(uint32_t bufferId);
+
+ void
+ disableElemHoldList(void);
+
+ /**
+ * Returns the free list for the given type id.
+ */
+ BufferState::FreeListList &
+ getFreeList(uint32_t typeId)
+ {
+ return _freeListLists[typeId];
+ }
+
+ MemStats
+ getMemStats(void) const;
+
+ /**
+ * Switch buffer state to active.
+ *
+ * @param bufferId Id of buffer to be active.
+ * @param typeId registered data type for buffer.
+ * @param sizeNeeded Number of elements needed to be free
+ * @param maxSize number of clusters expressable via reference
+ * type
+ */
+ void
+ onActive(uint32_t bufferId, uint32_t typeId,
+ size_t sizeNeeded,
+ size_t maxSize);
+
+ uint32_t
+ getTypeId(uint32_t bufferId) const
+ {
+ return _states[bufferId].getTypeId();
+ }
+
+ std::vector<uint32_t>
+ startCompact(uint32_t typeId);
+
+ void
+ finishCompact(const std::vector<uint32_t> &toHold);
+
+ void
+ fallbackResize(uint32_t bufferId,
+ uint64_t newSize);
+
+ vespalib::GenerationHolder &
+ getGenerationHolder(void)
+ {
+ return _genHolder;
+ }
+
+ uint32_t startCompactWorstBuffer(uint32_t typeId);
+};
+
+
+} // namespace btree
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/entryref.h b/searchlib/src/vespa/searchlib/btree/entryref.h
new file mode 100644
index 00000000000..e2d2a8d89ba
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/entryref.h
@@ -0,0 +1,64 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <stdint.h>
+
+namespace search {
+namespace btree {
+
+class EntryRef {
+protected:
+ uint32_t _ref;
+public:
+ EntryRef(void) : _ref(0u) { }
+ EntryRef(uint32_t ref_) : _ref(ref_) { }
+ uint32_t ref(void) const { return _ref; }
+ bool valid(void) const { return _ref != 0u; }
+ bool operator==(const EntryRef &rhs) const { return _ref == rhs._ref; }
+ bool operator!=(const EntryRef &rhs) const { return _ref != rhs._ref; }
+ bool operator <(const EntryRef &rhs) const { return _ref < rhs._ref; }
+};
+
+/**
+ * Class for entry reference where we use OffsetBits bits for offset into buffer,
+ * and (32 - OffsetBits) bits for buffer id.
+ **/
+template <uint32_t OffsetBits, uint32_t BufferBits = 32u - OffsetBits>
+class EntryRefT : public EntryRef {
+public:
+ EntryRefT() : EntryRef() {}
+ EntryRefT(uint64_t offset_, uint32_t bufferId_) :
+ EntryRef((offset_ << BufferBits) + bufferId_) {}
+ EntryRefT(const EntryRef & ref_) : EntryRef(ref_.ref()) {}
+ uint64_t offset() const { return _ref >> BufferBits; }
+ uint32_t bufferId() const { return _ref & (numBuffers() - 1); }
+ static uint64_t offsetSize() { return 1ul << OffsetBits; }
+ static uint32_t numBuffers() { return 1 << BufferBits; }
+ static uint64_t align(uint64_t val) { return val; }
+ static uint64_t pad(uint64_t val) { (void) val; return 0ul; }
+};
+
+/**
+ * Class for entry reference that is similar to EntryRefT,
+ * except that we use (2^OffsetAlign) byte alignment on the offset.
+ **/
+template <uint32_t OffsetBits, uint32_t OffsetAlign>
+class AlignedEntryRefT : public EntryRefT<OffsetBits> {
+private:
+ typedef EntryRefT<OffsetBits> ParentType;
+ static const uint32_t PadConstant = ((1 << OffsetAlign) - 1);
+public:
+ AlignedEntryRefT() : ParentType() {}
+ AlignedEntryRefT(uint64_t offset_, uint32_t bufferId_) :
+ ParentType(align(offset_) >> OffsetAlign, bufferId_) {}
+ AlignedEntryRefT(const EntryRef & ref_) : ParentType(ref_) {}
+ uint64_t offset() const { return ParentType::offset() << OffsetAlign; }
+ static uint64_t offsetSize() { return ParentType::offsetSize() << OffsetAlign; }
+ static uint64_t align(uint64_t val) { return val + pad(val); }
+ static uint64_t pad(uint64_t val) { return (-val & PadConstant); }
+};
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/minmaxaggrcalc.h b/searchlib/src/vespa/searchlib/btree/minmaxaggrcalc.h
new file mode 100644
index 00000000000..09cd8ea7d23
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/minmaxaggrcalc.h
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search
+{
+namespace btree
+{
+
+class MinMaxAggrCalc
+{
+public:
+ MinMaxAggrCalc(void)
+ {
+ }
+
+ static bool
+ hasAggregated(void)
+ {
+ return true;
+ }
+
+ static int32_t
+ getVal(int32_t val)
+ {
+ return val;
+ }
+
+ static void
+ add(MinMaxAggregated &a, int32_t val)
+ {
+ a.add(val);
+ }
+
+ static void
+ add(MinMaxAggregated &a, const MinMaxAggregated &ca)
+ {
+ a.add(ca);
+ }
+
+ static void
+ add(MinMaxAggregated &a, const MinMaxAggregated &oldca,
+ const MinMaxAggregated &ca)
+ {
+ a.add(oldca, ca);
+ }
+
+ /* Returns true if recalculation is needed */
+ static bool
+ remove(MinMaxAggregated &a, int32_t val)
+ {
+ return a.remove(val);
+ }
+
+ /* Returns true if recalculation is needed */
+ static bool
+ remove(MinMaxAggregated &a, const MinMaxAggregated &oldca,
+ const MinMaxAggregated &ca)
+ {
+ return a.remove(oldca, ca);
+ }
+
+ /* Returns true if recalculation is needed */
+ static bool
+ update(MinMaxAggregated &a, int32_t oldVal, int32_t val)
+ {
+ return a.update(oldVal, val);
+ }
+
+ /* Returns true if recalculation is needed */
+ static bool
+ update(MinMaxAggregated &a, const MinMaxAggregated &oldca,
+ const MinMaxAggregated &ca)
+ {
+ return a.update(oldca, ca);
+ }
+};
+
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/minmaxaggregated.h b/searchlib/src/vespa/searchlib/btree/minmaxaggregated.h
new file mode 100644
index 00000000000..1b876918d1a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/minmaxaggregated.h
@@ -0,0 +1,113 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <limits>
+
+namespace search
+{
+namespace btree
+{
+
+class MinMaxAggregated
+{
+ int32_t _min;
+ int32_t _max;
+
+public:
+ MinMaxAggregated(void)
+ : _min(std::numeric_limits<int32_t>::max()),
+ _max(std::numeric_limits<int32_t>::min())
+ {
+ }
+
+ MinMaxAggregated(int32_t min, int32_t max)
+ : _min(min),
+ _max(max)
+ {
+ }
+
+ int32_t
+ getMin(void) const
+ {
+ return _min;
+ }
+
+ int32_t
+ getMax(void) const
+ {
+ return _max;
+ }
+
+ void
+ add(int32_t val)
+ {
+ if (_min > val)
+ _min = val;
+ if (_max < val)
+ _max = val;
+ }
+
+ void
+ add(const MinMaxAggregated &ca)
+ {
+ if (_min > ca._min)
+ _min = ca._min;
+ if (_max < ca._max)
+ _max = ca._max;
+ }
+
+ void
+ add(const MinMaxAggregated &oldca,
+ const MinMaxAggregated &ca)
+ {
+ (void) oldca;
+ add(ca);
+ }
+
+ /* Returns true if recalculation is needed */
+ bool
+ remove(int32_t val)
+ {
+ return (_min == val || _max == val);
+ }
+
+ /* Returns true if recalculation is needed */
+ bool
+ remove(const MinMaxAggregated &oldca,
+ const MinMaxAggregated &ca)
+ {
+ return (_min == oldca._min && _min != ca._min) ||
+ (_max == oldca._max && _max != ca._max);
+ }
+
+ /* Returns true if recalculation is needed */
+ bool
+ update(int32_t oldVal, int32_t val)
+ {
+ if ((_min == oldVal && _min < val) ||
+ (_max == oldVal && _max > val)) {
+ return true;
+ }
+ add(val);
+ return false;
+ }
+
+ /* Returns true if recalculation is needed */
+ bool
+ update(const MinMaxAggregated &oldca,
+ const MinMaxAggregated &ca)
+ {
+ if ((_min == oldca._min && _min < ca._min) ||
+ (_max == oldca._max && _max > ca._max)) {
+ return true;
+ }
+ add(ca);
+ return false;
+ }
+};
+
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/noaggrcalc.h b/searchlib/src/vespa/searchlib/btree/noaggrcalc.h
new file mode 100644
index 00000000000..57d7ccd2f45
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/noaggrcalc.h
@@ -0,0 +1,98 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search
+{
+namespace btree
+{
+
+class NoAggrCalc
+{
+public:
+ NoAggrCalc(void)
+ {
+ }
+
+ static bool
+ hasAggregated(void)
+ {
+ return false;
+ }
+
+ template <typename DataT>
+ static inline int32_t
+ getVal(const DataT &val)
+ {
+ (void) val;
+ return 0;
+ }
+
+ static void
+ add(NoAggregated &a, int32_t val)
+ {
+ (void) a;
+ (void) val;
+ }
+
+ static void
+ add(NoAggregated &a, const NoAggregated &ca)
+ {
+ (void) a;
+ (void) ca;
+ }
+
+ static void
+ add(NoAggregated &a,
+ const NoAggregated &oldca,
+ const NoAggregated &ca)
+ {
+ (void) a;
+ (void) oldca;
+ (void) ca;
+ }
+
+ /* Returns true if recalculation is needed */
+ static bool
+ remove(NoAggregated &a, int32_t val)
+ {
+ (void) a;
+ (void) val;
+ return false;
+ }
+
+ /* Returns true if recalculation is needed */
+ static bool
+ remove(NoAggregated &a, const NoAggregated &oldca, const NoAggregated &ca)
+ {
+ (void) a;
+ (void) oldca;
+ (void) ca;
+ return false;
+ }
+
+ /* Returns true if recalculation is needed */
+ static bool
+ update(NoAggregated &a, int32_t oldVal, int32_t val)
+ {
+ (void) a;
+ (void) oldVal;
+ (void) val;
+ return false;
+ }
+
+ /* Returns true if recalculation is needed */
+ static bool
+ update(NoAggregated &a, const NoAggregated &oldca, const NoAggregated &ca)
+ {
+ (void) a;
+ (void) oldca;
+ (void) ca;
+ return false;
+ }
+};
+
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/btree/noaggregated.h b/searchlib/src/vespa/searchlib/btree/noaggregated.h
new file mode 100644
index 00000000000..9f710840d5a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/btree/noaggregated.h
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search
+{
+namespace btree
+{
+
+class NoAggregated
+{
+public:
+ NoAggregated(void)
+ {
+ }
+};
+
+
+} // namespace search::btree
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/.gitignore b/searchlib/src/vespa/searchlib/common/.gitignore
new file mode 100644
index 00000000000..ee8938b6bf4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/.gitignore
@@ -0,0 +1,6 @@
+*.So
+*.exe
+*.ilk
+*.pdb
+.depend*
+Makefile
diff --git a/searchlib/src/vespa/searchlib/common/CMakeLists.txt b/searchlib/src/vespa/searchlib/common/CMakeLists.txt
new file mode 100644
index 00000000000..0bad6ad9381
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/CMakeLists.txt
@@ -0,0 +1,30 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_common OBJECT
+ SOURCES
+ bitvector.cpp
+ allocatedbitvector.cpp
+ partialbitvector.cpp
+ growablebitvector.cpp
+ bitvectorcache.cpp
+ bitvectoriterator.cpp
+ condensedbitvectors.cpp
+ documentlocations.cpp
+ documentsummary.cpp
+ featureset.cpp
+ fileheadercontext.cpp
+ foregroundtaskexecutor.cpp
+ indexmetainfo.cpp
+ location.cpp
+ locationiterators.cpp
+ mapnames.cpp
+ packets.cpp
+ resultset.cpp
+ sequencedtaskexecutor.cpp
+ serialnumfileheadercontext.cpp
+ sort.cpp
+ sortdata.cpp
+ sortresults.cpp
+ sortspec.cpp
+ tunefileinfo.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp b/searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp
new file mode 100644
index 00000000000..59d190b2b50
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp
@@ -0,0 +1,156 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include "allocatedbitvector.h"
+
+namespace search
+{
+
+using vespalib::nbostream;
+using vespalib::GenerationHeldBase;
+using vespalib::GenerationHeldAlloc;
+using vespalib::GenerationHolder;
+
+void AllocatedBitVector::alloc()
+{
+ uint32_t words = capacityWords();
+ words += (-words & 15); // Pad to 64 byte alignment
+ const size_t sz(words * sizeof(Word));
+ Alloc(sz).swap(_alloc);
+ assert(_alloc.size()/sizeof(Word) >= words);
+ // Clear padding
+ memset(static_cast<char *>(_alloc.get()) + sizeBytes(), 0, sz - sizeBytes());
+}
+
+//////////////////////////////////////////////////////////////////////
+// Parameterized Constructor
+//////////////////////////////////////////////////////////////////////
+AllocatedBitVector::AllocatedBitVector(Index numberOfElements) :
+ BitVector(),
+ _capacityBits(numberOfElements),
+ _alloc()
+{
+ alloc();
+ init(_alloc.get(), 0, numberOfElements);
+ clear();
+}
+
+AllocatedBitVector::AllocatedBitVector(Index numberOfElements, Alloc buffer, size_t offset) :
+ BitVector(static_cast<char *>(buffer.get()) + offset, numberOfElements),
+ _capacityBits(numberOfElements),
+ _alloc(std::move(buffer))
+{
+}
+
+AllocatedBitVector::AllocatedBitVector(Index numberOfElements, Index capacityBits, const void * rhsBuf, size_t rhsSize) :
+ BitVector(),
+ _capacityBits(capacityBits),
+ _alloc()
+{
+ alloc();
+ init(_alloc.get(), 0, numberOfElements);
+ clear();
+ if (rhsSize > 0) {
+ size_t minCount = std::min(static_cast<size_t>(numberOfElements), rhsSize);
+ memcpy(getStart(), rhsBuf, numBytes(minCount));
+ if (minCount/8 == numberOfElements/8) {
+ static_cast<Word *>(getStart())[numWords()-1] &= ~endBits(minCount);
+ }
+ setBit(size()); // Guard bit
+ }
+}
+
+AllocatedBitVector::AllocatedBitVector(const AllocatedBitVector & rhs) :
+ AllocatedBitVector(rhs, rhs.capacity())
+{
+}
+
+AllocatedBitVector::AllocatedBitVector(const BitVector & rhs) :
+ AllocatedBitVector(rhs, rhs.size())
+{
+}
+
+AllocatedBitVector::AllocatedBitVector(const BitVector & rhs, Index capacity_) :
+ BitVector(),
+ _capacityBits(capacity_),
+ _alloc()
+{
+ alloc();
+ memcpy(_alloc.get(), rhs.getStart(), rhs.sizeBytes());
+ init(_alloc.get(), 0, rhs.size());
+}
+
+//////////////////////////////////////////////////////////////////////
+// Destructor
+//////////////////////////////////////////////////////////////////////
+AllocatedBitVector::~AllocatedBitVector(void)
+{
+}
+
+void
+AllocatedBitVector::cleanup(void)
+{
+ init(nullptr, 0, 0);
+ Alloc().swap(_alloc);
+ _capacityBits = 0;
+}
+
+void
+AllocatedBitVector::resize(Index newLength)
+{
+ _capacityBits = newLength;
+ alloc();
+ init(_alloc.get(), 0, newLength);
+ clear();
+}
+
+AllocatedBitVector &
+AllocatedBitVector::operator=(const AllocatedBitVector & rhs)
+{
+ AllocatedBitVector tmp(rhs);
+ swap(tmp);
+ assert(testBit(size()));
+
+ return *this;
+}
+AllocatedBitVector &
+AllocatedBitVector::operator=(const BitVector & rhs)
+{
+ AllocatedBitVector tmp(rhs);
+ swap(tmp);
+ assert(testBit(size()));
+
+ return *this;
+}
+
+GenerationHeldBase::UP
+AllocatedBitVector::grow(Index newSize, Index newCapacity)
+{
+ assert(newCapacity >= newSize);
+ GenerationHeldBase::UP ret;
+ if (newCapacity != capacity()) {
+ AllocatedBitVector tbv(newSize, newCapacity, _alloc.get(), size());
+ if (newSize > size()) {
+ tbv.clearBit(size()); // Clear old guard bit.
+ }
+ ret.reset(new GenerationHeldAlloc<Alloc>(_alloc));
+ if (( newSize >= size()) && isValidCount()) {
+ tbv.setTrueBits(countTrueBits());
+ }
+ swap(tbv);
+ } else {
+ if (newSize > size()) {
+ Index oldSz(size());
+ setSize(newSize);
+ clearIntervalNoInvalidation(oldSz, newSize);
+ } else {
+ clearInterval(newSize, size());
+ setSize(newSize);
+ }
+ }
+ return ret;
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/allocatedbitvector.h b/searchlib/src/vespa/searchlib/common/allocatedbitvector.h
new file mode 100644
index 00000000000..8a52a07e29b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/allocatedbitvector.h
@@ -0,0 +1,92 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/searchlib/common/bitvector.h>
+
+namespace search {
+
+class BitVectorTest;
+
+/**
+ * search::AllocatedBitVector provides an interface to a bit vector
+ * internally implemented as an array of words.
+ */
+class AllocatedBitVector : public BitVector
+{
+public:
+ typedef vespalib::AutoAlloc<0x800000, 0x1000> Alloc;
+
+ /**
+ * Class constructor specifying size but not content. New bitvector
+ * is cleared.
+ *
+ * @param numberOfElements The size of the bit vector in bits.
+ *
+ */
+ explicit AllocatedBitVector(Index numberOfElements);
+ /**
+ *
+ * @param numberOfElements The size of the bit vector in bits.
+ * @param buffer The buffer backing the bit vector.
+ * @param offset Where bitvector image is located in the buffer.
+ */
+ AllocatedBitVector(Index numberOfElements, Alloc buffer, size_t offset);
+
+ /**
+ * Creates a new bitvector with room for numberOfElements bits.
+ * Copies what it can from the original vector. This is used for extending vector.
+ */
+ AllocatedBitVector(Index numberOfElements, Index capacity, const void * rhsBuf, size_t rhsSize);
+
+ AllocatedBitVector(const BitVector &other);
+ AllocatedBitVector(const AllocatedBitVector &other);
+ virtual ~AllocatedBitVector(void);
+ AllocatedBitVector &operator=(const AllocatedBitVector &other);
+ AllocatedBitVector &operator=(const BitVector &other);
+
+ /**
+ * Query the size of the bit vector.
+ *
+ * @return number of legal index positions (bits).
+ */
+ Index capacity() const { return _capacityBits; }
+
+ Index extraByteSize(void) const { return _alloc.size(); }
+
+ /**
+ * Set new length of bit vector, possibly destroying content.
+ *
+ * @param newLength the new length of the bit vector (in bits)
+ */
+ void resize(Index newLength) override;
+
+ GenerationHeldBase::UP grow(Index newLength, Index newCapacity) override;
+
+protected:
+ Index _capacityBits;
+ Alloc _alloc;
+
+private:
+ friend class BitVectorTest;
+ void alloc();
+ void swap(AllocatedBitVector & rhs) {
+ std::swap(_capacityBits, rhs._capacityBits);
+ _alloc.swap(rhs._alloc);
+ BitVector::swap(rhs);
+ }
+
+ AllocatedBitVector(const BitVector &other, Index capacity);
+
+ /**
+ * Prepare for potential reuse where new value might be filled in by
+ * Read method.
+ */
+ void cleanup(void);
+ Index capacityWords() const { return numWords(capacity()); }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/base.h b/searchlib/src/vespa/searchlib/common/base.h
new file mode 100644
index 00000000000..d91acbd29f6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/base.h
@@ -0,0 +1,16 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1999-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+namespace search {
+
+// This constant defines the illegal/undefined value for unsigned 32-bit
+// integer ids. Use this instead of the function below to get less
+// overhead with not-so-smart compilers.
+
+const uint32_t NoId32 = static_cast<uint32_t>(-1);
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp
new file mode 100644
index 00000000000..25edae290de
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp
@@ -0,0 +1,421 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include "bitvector.h"
+#include "allocatedbitvector.h"
+#include "growablebitvector.h"
+#include "partialbitvector.h"
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+using vespalib::make_string;
+using vespalib::IllegalArgumentException;
+using vespalib::hwaccelrated::IAccelrated;
+using vespalib::Optimized;
+
+namespace {
+
+void verifyContains(const search::BitVector & a, const search::BitVector & b) __attribute__((noinline));
+
+void verifyContains(const search::BitVector & a, const search::BitVector & b)
+{
+ if ((a.getStartIndex() < b.getStartIndex()) || (a.size() > b.size())) {
+ throw IllegalArgumentException(make_string("[%d, %d] is not contained in [%d, %d]",
+ a.getStartIndex(), a.size(), b.getStartIndex(), b.size()),
+ VESPA_STRLOC);
+ }
+}
+
+}
+
+/////////////////////////////////
+namespace search
+{
+
+using vespalib::nbostream;
+using vespalib::GenerationHeldBase;
+using vespalib::GenerationHeldAlloc;
+using vespalib::GenerationHolder;
+
+namespace {
+
+template <typename T>
+void fillUp(T * v, T startVal) {
+ for (size_t i(0); i < (sizeof(T)*8); i++) {
+ v[i] = startVal << i;
+ }
+}
+
+}
+
+BitWord::Init BitWord::_initializer;
+
+BitWord::Init::Init()
+{
+ fillUp(BitWord::_checkTab, std::numeric_limits<BitWord::Word>::max());
+}
+
+BitWord::Word BitWord::_checkTab[BitWord::WordLen];
+
+
+BitVector::BitVector(void * buf, Index start, Index end) :
+ _words(static_cast<Word *>(buf) - wordNum(start)),
+ _startOffset(start),
+ _sz(end),
+ _numTrueBits(invalidCount())
+{
+ assert((reinterpret_cast<size_t>(_words) & (sizeof(Word) - 1ul)) == 0);
+}
+
+void
+BitVector::init(void * buf, Index start, Index end)
+{
+ _words = static_cast<Word *>(buf) - wordNum(start);
+ _startOffset = start;
+ _sz = end;
+ _numTrueBits = invalidCount();
+}
+
+void
+BitVector::clear(void)
+{
+ memset(getActiveStart(), '\0', getActiveBytes());
+ setBit(size()); // Guard bit
+ setTrueBits(0);
+}
+
+void
+BitVector::clearInterval(Index start, Index end)
+{
+ clearIntervalNoInvalidation(start, end);
+
+ invalidateCachedCount();
+}
+
+void
+BitVector::clearIntervalNoInvalidation(Index start, Index end)
+{
+ if (start >= end) { return; }
+
+ Index last = std::min(end, size()) - 1;
+ Index startw = wordNum(start);
+ Index endw = wordNum(last);
+
+ if (endw > startw) {
+ _words[startw++] &= startBits(start);
+ memset(_words+startw, 0, sizeof(*_words)*(endw-startw));
+ _words[endw] &= endBits(last);
+ } else {
+ _words[startw] &= (startBits(start) | endBits(last));
+ }
+}
+
+void
+BitVector::setInterval(Index start, Index end)
+{
+ if (start >= end) { return; }
+
+ Index last = std::min(end, size()) - 1;
+ Index startw = wordNum(start);
+ Index endw = wordNum(last);
+
+ if (endw > startw) {
+ _words[startw++] |= checkTab(start);
+ memset(_words + startw, 0xff, sizeof(*_words)*(endw-startw));
+ _words[endw] |= ~endBits(last);
+ } else {
+ _words[startw] |= ~(startBits(start) | endBits(last));
+ }
+
+ invalidateCachedCount();
+}
+
+BitVector::Index
+BitVector::count(void) const
+{
+ // Subtract by one to compensate for guard bit
+ return internalCount(getActiveStart(), numActiveWords()) - 1;
+}
+
+BitVector::Index
+BitVector::internalCount(const Word *tarr, size_t sz)
+{
+ Index count(0);
+ for (size_t i(0); i < sz; i++) {
+ count += Optimized::popCount(tarr[i]);
+ }
+ return count;
+}
+
+BitVector::Index
+BitVector::countInterval(Index start, Index end) const
+{
+ if (start >= end) return 0;
+
+ Index last = std::min(end, size()) - 1;
+ // Count bits in range [start..end>
+ Index startw = wordNum(start);
+ Index endw = wordNum(last);
+ Word *bitValues = _words;
+
+ if (startw == endw) {
+ return Optimized::popCount(bitValues[startw] & ~(startBits(start) | endBits(last)));
+ }
+ Index res = 0;
+ // Limit to full words
+ if ((start & (WordLen - 1)) != 0) {
+ res += Optimized::popCount(bitValues[startw] & ~startBits(start));
+ ++startw;
+ }
+ // Align start to 16 bytes
+ while (startw < endw && (startw & 3) != 0) {
+ res += Optimized::popCount(bitValues[startw]);
+ ++startw;
+ }
+ bool partialEnd = (last & (WordLen - 1)) != (WordLen - 1);
+ if (!partialEnd) {
+ ++endw;
+ }
+ if (startw < endw) {
+ res += internalCount(bitValues + startw, endw - startw);
+ }
+ if (partialEnd) {
+ res += Optimized::popCount(bitValues[endw] & ~endBits(last));
+ }
+
+ return res;
+}
+
+void
+BitVector::orWith(const BitVector & right)
+{
+ verifyContains(*this, right);
+ IAccelrated::getAccelrator()->orBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes());
+
+ repairEnds();
+ invalidateCachedCount();
+}
+
+void
+BitVector::repairEnds()
+{
+ if (size() == 0) return;
+ Index start(getStartIndex());
+ Index last(size() - 1);
+ getWordIndex(start)[0] &= ~startBits(start);
+ getWordIndex(last)[0] &= ~endBits(last);
+ setGuardBit();
+}
+
+
+void
+BitVector::andWith(const BitVector & right)
+{
+ verifyContains(*this, right);
+
+ IAccelrated::getAccelrator()->andBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes());
+
+ setGuardBit();
+ invalidateCachedCount();
+}
+
+
+void
+BitVector::andNotWith(const BitVector& right)
+{
+ verifyContains(*this, right);
+
+ IAccelrated::getAccelrator()->andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes());
+
+ setGuardBit();
+ invalidateCachedCount();
+}
+
+bool
+BitVector::operator==(const BitVector &rhs) const
+{
+ if ((size() != rhs.size()) || (getStartIndex() != rhs.getStartIndex())) {
+ return false;
+ }
+
+ Index bitVectorSize = numActiveWords();
+ const Word *words = getActiveStart();
+ const Word *oWords = rhs.getActiveStart();
+ for (Index i = 0; i < bitVectorSize; i++) {
+ if (words[i] != oWords[i]) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool
+BitVector::hasTrueBitsInternal(void) const
+{
+ Index bitVectorSizeL1(numActiveWords() - 1);
+ const Word *words(getActiveStart());
+ for (Index i = 0; i < bitVectorSizeL1; i++) {
+ if (words[i] != 0) {
+ return true;
+ }
+ }
+
+ // Ignore guard bit.
+ if ((words[bitVectorSizeL1] & ~mask(size())) != 0)
+ return true;
+
+ return false;
+}
+
+//////////////////////////////////////////////////////////////////////
+// Set new length. Destruction of content
+//////////////////////////////////////////////////////////////////////
+void
+BitVector::resize(Index newLength)
+{
+ (void) newLength;
+ assert(false);
+}
+GenerationHeldBase::UP
+BitVector::grow(Index newSize, Index newCapacity)
+{
+ (void) newSize;
+ (void) newCapacity;
+ assert(false);
+}
+
+size_t
+BitVector::getFileBytes(Index bits)
+{
+ Index bytes = numBytes(bits);
+ bytes += (-bytes & (getAlignment() - 1));
+ return bytes;
+}
+
+class MMappedBitVector : public BitVector
+{
+public:
+ MMappedBitVector(Index numberOfElements,
+ FastOS_FileInterface &file,
+ int64_t offset,
+ Index doccount);
+
+private:
+ void read(Index numberOfElements,
+ FastOS_FileInterface &file,
+ int64_t offset,
+ Index doccount);
+};
+
+BitVector::UP
+BitVector::create(Index numberOfElements,
+ FastOS_FileInterface &file,
+ int64_t offset,
+ Index doccount)
+{
+ UP bv;
+ if (file.IsMemoryMapped()) {
+ bv.reset(new MMappedBitVector(numberOfElements, file, offset, doccount));
+ } else {
+ size_t padbefore, padafter;
+ size_t vectorsize = getFileBytes(numberOfElements);
+ file.DirectIOPadding(offset, vectorsize, padbefore, padafter);
+ assert((padbefore & (getAlignment() - 1)) == 0);
+ AllocatedBitVector::Alloc alloc(padbefore + vectorsize + padafter);
+ void * alignedBuffer = alloc.get();
+ file.ReadBuf(alignedBuffer, alloc.size(), offset - padbefore);
+ bv.reset(new AllocatedBitVector(numberOfElements, std::move(alloc), padbefore));
+ bv->setTrueBits(doccount);
+ // Check guard bit for getNextTrueBit()
+ assert(bv->testBit(bv->size()));
+ }
+ return bv;
+}
+
+BitVector::UP
+BitVector::create(Index start, Index end)
+{
+ return (start == 0)
+ ? create(end)
+ : UP(new PartialBitVector(start, end));
+}
+
+BitVector::UP
+BitVector::create(Index numberOfElements)
+{
+ return UP(new AllocatedBitVector(numberOfElements));
+}
+
+BitVector::UP
+BitVector::create(const BitVector & rhs)
+{
+ return UP(new AllocatedBitVector(rhs));
+}
+
+BitVector::UP
+BitVector::create(Index numberOfElements, Index newCapacity, GenerationHolder &generationHolder)
+{
+ return UP(new GrowableBitVector(numberOfElements, newCapacity, generationHolder));
+}
+
+MMappedBitVector::MMappedBitVector(Index numberOfElements,
+ FastOS_FileInterface &file,
+ int64_t offset,
+ Index doccount) :
+ BitVector()
+{
+ read(numberOfElements, file, offset, doccount);
+}
+
+void
+MMappedBitVector::read(Index numberOfElements,
+ FastOS_FileInterface &file,
+ int64_t offset,
+ Index doccount)
+{
+ assert((offset & (getAlignment() - 1)) == 0);
+ void *mapptr = file.MemoryMapPtr(offset);
+ assert(mapptr != NULL);
+ if (mapptr != NULL) {
+ init(mapptr, 0, numberOfElements);
+ }
+ setTrueBits(doccount);
+}
+
+nbostream &
+operator<<(nbostream &out, const BitVector &bv)
+{
+ size_t size = bv.size();
+ size_t cachedHits = bv.countTrueBits();
+ size_t fileBytes = bv.getFileBytes();
+ assert(size <= std::numeric_limits<BitVector::Index>::max());
+ assert(cachedHits <= size || ! bv.isValidCount(cachedHits));
+ assert(bv.testBit(size));
+ out << size << cachedHits << fileBytes;
+ out.write(bv.getStart(), bv.getFileBytes());
+ return out;
+}
+
+
+nbostream &
+operator>>(nbostream &in, BitVector &bv)
+{
+ size_t size;
+ size_t cachedHits;
+ size_t fileBytes;
+ in >> size >> cachedHits >> fileBytes;
+ assert(size <= std::numeric_limits<BitVector::Index>::max());
+ assert(cachedHits <= size || ! bv.isValidCount(cachedHits));
+ if (bv.size() != size)
+ bv.resize(size);
+ assert(bv.getFileBytes() == fileBytes);
+ in.read(bv.getStart(), bv.getFileBytes());
+ assert(bv.testBit(size));
+ bv.setTrueBits(cachedHits);
+ return in;
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/bitvector.h b/searchlib/src/vespa/searchlib/common/bitvector.h
new file mode 100644
index 00000000000..70864c938d3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/bitvector.h
@@ -0,0 +1,354 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <memory>
+#include <vespa/vespalib/util/alloc.h>
+#include <vespa/vespalib/util/generationholder.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/fastos/file.h>
+#include <bits/stl_algo.h>
+#include <bits/stl_function.h>
+
+namespace search {
+
+class BitWord {
+public:
+ typedef uint64_t Word;
+ typedef uint32_t Index;
+ static Word checkTab(Index index) { return _checkTab[bitNum(index)]; }
+ static Word startBits(Index index) { return (std::numeric_limits<Word>::max() >> 1) >> (WordLen - 1 - bitNum(index)); }
+ static constexpr size_t WordLen = sizeof(Word)*8;
+ static uint8_t bitNum(Index idx) { return (idx % WordLen); }
+ static Word endBits(Index index) { return (std::numeric_limits<Word>::max() - 1) << bitNum(index); }
+ static Index wordNum(Index idx) { return idx >> numWordBits(); }
+ static Word mask(Index idx) { return Word(1) << bitNum(idx); }
+ static constexpr uint8_t size_bits(uint8_t n) { return (n > 1) ? (1 + size_bits(n >> 1)) : 0; }
+ static uint8_t numWordBits() { return size_bits(WordLen); }
+private:
+
+ static Word _checkTab[WordLen];
+ struct Init {
+ Init();
+ };
+ static Init _initializer;
+};
+
+class BitVector : protected BitWord, private vespalib::noncopyable
+{
+public:
+ typedef BitWord::Index Index;
+ typedef vespalib::GenerationHolder GenerationHolder;
+ typedef vespalib::GenerationHeldBase GenerationHeldBase;
+ typedef std::unique_ptr<BitVector> UP;
+ virtual ~BitVector() { }
+ bool operator == (const BitVector &right) const;
+ const void * getStart() const { return _words; }
+ void * getStart() { return _words; }
+ Index size() const { return _sz; }
+ Index sizeBytes() const { return numBytes(getActiveSize()); }
+ bool testBit(Index idx) const {
+ return ((_words[wordNum(idx)] & mask(idx)) != 0);
+ }
+ bool hasTrueBits() const {
+ return isValidCount()
+ ? (countTrueBits() != 0)
+ : hasTrueBitsInternal();
+ }
+ Index countTrueBits() const {
+ if ( ! isValidCount()) {
+ _numTrueBits = count();
+ }
+ return _numTrueBits;
+ }
+
+ /**
+ * Will provide the first valid bit of the bitvector.
+ *
+ * @return The Index of the first valid bit of the bitvector.
+ */
+ Index getStartIndex() const { return _startOffset; }
+
+ /**
+ * Get next bit set in the bitvector (inclusive start).
+ * It assumes that bitvector is non-zero terminated.
+ *
+ * @param start first bit to check
+ * @return next bit set in the bitvector.
+ */
+ Index getNextTrueBit(Index start) const {
+ Index index(wordNum(start));
+ const Word *words(_words);
+ Word t(words[index] & checkTab(start));
+
+ // In order to avoid a test an extra guard bit is added
+ // after the bitvector as a termination.
+ // Also bitvector will normally at least 1 bit set per 32 bits.
+ // So that is what we should expect.
+ while (__builtin_expect(t == 0, false)) {
+ t = words[++index];
+ }
+
+ return (index << numWordBits()) + vespalib::Optimized::lsbIdx(t);
+ }
+
+ /**
+ * Iterate over all true bits in th einclusive range.
+ *
+ * @param func callback
+ * @param start first bit
+ * @param last bit
+ */
+ template <typename FunctionType>
+ void
+ foreach_truebit(FunctionType func, Index start=0, Index end=std::numeric_limits<Index>::max()) const
+ {
+ foreach(func, [&](Word w) { return w; }, start, end);
+ }
+
+ /**
+ * Iterate over all true bits in th einclusive range.
+ *
+ * @param func callback
+ * @param start first bit
+ * @param last bit
+ */
+ template <typename FunctionType>
+ void
+ foreach_falsebit(FunctionType func, Index start=0, Index end=std::numeric_limits<Index>::max()) const
+ {
+ foreach(func, [&](Word w) { return ~w; }, start, end);
+ }
+
+ Index getFirstTrueBit(Index start=0) const {
+ return getNextTrueBit(std::max(start, getStartIndex()));
+ }
+
+ Index getPrevTrueBit(Index start) const {
+ Index index(wordNum(start));
+ const Word *words(_words);
+ Word t(words[index] & ~endBits(start));
+
+ while(t == 0 && index > getStartWordNum()) {
+ t = words[--index];
+ }
+
+ return (t != 0)
+ ? (index << numWordBits()) + vespalib::Optimized::msbIdx(t)
+ : getStartIndex();
+ }
+
+ void setSize(Index sz) {
+ clearBit(size());
+ _sz = sz;
+ setBit(size());
+ }
+ void setBit(Index idx) {
+ _words[wordNum(idx)] |= mask(idx);
+ }
+ void clearBit(Index idx) {
+ _words[wordNum(idx)] &= ~ mask(idx);
+ }
+ void flip(Index idx) {
+ _words[wordNum(idx)] ^= mask(idx);
+ }
+ void slowSetBit(Index idx) {
+ if ( ! testBit(idx) ) {
+ setBit(idx);
+ if ( isValidCount() ) {
+ _numTrueBits++;
+ }
+ }
+ }
+
+ void andWith(const BitVector &right);
+ void orWith(const BitVector &right);
+ void andNotWith(const BitVector &right);
+
+ /**
+ * Clear all bits in the bit vector.
+ */
+ void clear();
+
+ /**
+ * Clear a sequence of bits [..>.
+ *
+ * @param start first bit to be cleared
+ * @param end limit
+ */
+ void clearInterval(Index start, Index end);
+ /**
+ * Set a sequence of bits.
+ *
+ * @param start first bit to be set [..>
+ * @param end limit
+ */
+ void setInterval(Index start, Index end);
+
+ void slowClearBit(Index idx) {
+ if (testBit(idx)) {
+ clearBit(idx);
+ if ( isValidCount() ) {
+ _numTrueBits--;
+ }
+ }
+ }
+
+ /**
+ * Invalidate cached count of bits set in bit vector. This method
+ * should be called before calling Test/Clear/Flip methods.
+ */
+ void invalidateCachedCount() const {
+ _numTrueBits = invalidCount();
+ }
+
+ void swap(BitVector & rhs) {
+ std::swap(_words, rhs._words);
+ std::swap(_startOffset, rhs._startOffset);
+ std::swap(_sz, rhs._sz);
+ std::swap(_numTrueBits, rhs._numTrueBits);
+ }
+
+ /**
+ * Count bits in partial bitvector [..>.
+ *
+ * @param start first bit to be counted
+ * @param end limit
+ */
+ Index countInterval(Index start, Index end) const;
+
+ /**
+ * Perform an andnot with an internal array representation.
+ *
+ * @param other internal array representation
+ * @param otherCount number of elements in array
+ */
+ template <typename T>
+ void andNotWithT(T it);
+
+ /*
+ * Calculate the size of a bitmap when performing file io.
+ */
+ static size_t getFileBytes(Index bits);
+
+ /*
+ * Calculate the size of a bitmap when performing file io.
+ */
+ size_t getFileBytes(void) const {
+ return getFileBytes(size());
+ }
+
+ virtual void resize(Index newLength);
+
+ virtual GenerationHeldBase::UP grow(Index newLength, Index newCapacity);
+ GenerationHeldBase::UP grow(Index newLength) { return grow(newLength, newLength); }
+
+ /**
+ * This will create the appropriate vector.
+ *
+ * @param numberOfElements The size of the bit vector in bits.
+ * @param file The file from which to read the bit vector.
+ * @param offset Where bitvector image is located in the file.
+ * @param doccount Number of bits set in bitvector
+ */
+ static UP create(Index numberOfElements,
+ FastOS_FileInterface &file,
+ int64_t offset,
+ Index doccount);
+ static UP create(Index start, Index end);
+ static UP create(Index numberOfElements);
+ static UP create(const BitVector & rhs);
+ static UP create(Index newSize, Index newCapacity, GenerationHolder &generationHolder);
+protected:
+ VESPA_DLL_LOCAL BitVector(void * buf, Index start, Index end);
+ BitVector(void * buf, Index sz) : BitVector(buf, 0, sz) { }
+ BitVector() : BitVector(nullptr, 0) { }
+ void init(void * buf, Index start, Index end);
+ void setTrueBits(Index numTrueBits) { _numTrueBits = numTrueBits; }
+ VESPA_DLL_LOCAL void clearIntervalNoInvalidation(Index start, Index end);
+ bool isValidCount() const { return isValidCount(_numTrueBits); }
+ static bool isValidCount(Index v) { return v != invalidCount(); }
+ static Index numWords(Index bits) { return wordNum(bits + 1 + (WordLen - 1)); }
+ static Index numBytes(Index bits) { return numWords(bits) * sizeof(Word); }
+ size_t numWords() const { return numWords(size()); }
+ static size_t getAlignment() { return 0x40u; }
+ static size_t numActiveBytes(Index start, Index end) { return numActiveWords(start, end) * sizeof(Word); }
+
+private:
+ const Word * getWordIndex(Index index) const { return static_cast<const Word *>(getStart()) + wordNum(index); }
+ Word * getWordIndex(Index index) { return static_cast<Word *>(getStart()) + wordNum(index); }
+ const Word * getActiveStart() const { return getWordIndex(getStartIndex()); }
+ Word * getActiveStart() { return getWordIndex(getStartIndex()); }
+ Index getStartWordNum() const { return wordNum(getStartIndex()); }
+ Index getActiveSize() const { return size() - getStartIndex(); }
+ size_t getActiveBytes() const { return numActiveBytes(getStartIndex(), size()); }
+ size_t numActiveWords() const { return numActiveWords(getStartIndex(), size()); }
+ static size_t numActiveWords(Index start, Index end) { return (numWords(end) - wordNum(start)); }
+ static Index invalidCount() { return std::numeric_limits<Index>::max(); }
+ void setGuardBit() { setBit(size()); }
+ VESPA_DLL_LOCAL void repairEnds();
+ VESPA_DLL_LOCAL static Index internalCount(const Word *tarr, size_t sz);
+ Index count(void) const;
+ bool hasTrueBitsInternal() const;
+ template <typename FunctionType, typename WordConverter>
+ void
+ foreach(FunctionType func, WordConverter conv, Index start, Index end) const
+ {
+ if ((end <= start) || (size() == 0)) return;
+ Index last = std::min(end, size()) - 1;
+ if (start < getStartIndex()) start = getStartIndex();
+
+ Index index(wordNum(start));
+ Index lastIndex(wordNum(last));
+ Word word(conv(_words[index]) & checkTab(start));
+ for ( ; index < lastIndex; word = conv(_words[++index])) {
+ foreach_bit(func, word, index << numWordBits());
+ }
+ foreach_bit(func, word & ~endBits(last), lastIndex << numWordBits());
+ }
+ template <typename FunctionType>
+ static void
+ foreach_bit(FunctionType func, Word word, Index start)
+ {
+ while (word) {
+ uint32_t pos = vespalib::Optimized::lsbIdx(word);
+ func(start+pos);
+ start += pos + 1;
+ word >>= pos;
+ word >>= 1;
+ }
+ }
+
+
+ Word *_words; // This is the buffer staring at Index 0
+ Index _startOffset; // This is the official start
+ Index _sz; // This is the official end.
+ mutable Index _numTrueBits;
+
+protected:
+ friend vespalib::nbostream &
+ operator<<(vespalib::nbostream &out, const BitVector &bv);
+ friend vespalib::nbostream &
+ operator>>(vespalib::nbostream &in, BitVector &bv);
+};
+
+typedef BitVector ConstBitVectorReference;
+
+vespalib::nbostream &
+operator<<(vespalib::nbostream &out, const BitVector &bv);
+
+vespalib::nbostream &
+operator>>(vespalib::nbostream &in, BitVector &bv);
+
+template <typename T>
+void BitVector::andNotWithT(T it) {
+ while (it.hasNext()) {
+ clearBit(it.next());
+ }
+ invalidateCachedCount();
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/bitvectorcache.cpp b/searchlib/src/vespa/searchlib/common/bitvectorcache.cpp
new file mode 100644
index 00000000000..cf2011b1f2a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/bitvectorcache.cpp
@@ -0,0 +1,218 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/searchlib/common/bitvectorcache.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.common.bitvectorcache");
+
+namespace search {
+
+BitVectorCache::BitVectorCache(GenerationHolder &genHolder) :
+ _lookupCount(0),
+ _needPopulation(false),
+ _lock(),
+ _keys(),
+ _chunks(),
+ _genHolder(genHolder)
+{
+}
+
+BitVectorCache::~BitVectorCache()
+{
+}
+
+void
+BitVectorCache::computeCountVector(KeySet & keys, CountVector & v) const
+{
+ std::vector<Key> notFound;
+ std::vector<CondensedBitVector::KeySet> keySets;
+ ChunkV chunks;
+ {
+ vespalib::LockGuard guard(_lock);
+ keySets.resize(_chunks.size());
+ Key2Index::const_iterator end(_keys.end());
+ for (Key k : keys) {
+ Key2Index::const_iterator found = _keys.find(k);
+ if (found != end) {
+ const KeyMeta & m = found->second;
+ keySets[m.chunkId()].insert(m.chunkIndex());
+ }
+ }
+ chunks = _chunks;
+ }
+ for (Key k : notFound) {
+ keys.erase(k);
+ }
+ size_t index(0);
+ if (chunks.empty()) {
+ memset(&v[0], 0, v.size());
+ }
+ for (const auto & chunk : chunks) {
+ if (index == 0) {
+ chunk->initializeCountVector(keySets[index++], v);
+ } else {
+ chunk->addCountVector(keySets[index++], v);
+ }
+ }
+}
+
+BitVectorCache::KeySet
+BitVectorCache::lookupCachedSet(const KeyAndCountSet & keys)
+{
+ KeySet cached(keys.size()*3);
+ vespalib::LockGuard guard(_lock);
+ _lookupCount++;
+ if (_lookupCount == 2000) {
+ _needPopulation = true;
+ } else if ((_lookupCount & 0x1fffff) == 0x100000) {
+ if (hasCostChanged(guard)) {
+ _needPopulation = true;
+ }
+ }
+ for (const auto & e : keys) {
+ auto found = _keys.find(e.first);
+ if (found != _keys.end()) {
+ KeyMeta & m = found->second;
+ m.lookup();
+ if (m.isCached()) {
+ cached.insert(e.first);
+ }
+ } else {
+ _keys[e.first] = KeyMeta().lookup().bitCount(e.second);
+ }
+ }
+ return cached;
+}
+
+BitVectorCache::SortedKeyMeta
+BitVectorCache::getSorted(Key2Index & keys)
+{
+ std::vector<std::pair<Key, KeyMeta *>> sorted;
+ sorted.reserve(keys.size());
+ for (auto & e : keys) {
+ sorted.push_back({e.first, &e.second});
+ }
+ std::sort(sorted.begin(), sorted.end(),
+ [&] (const auto & a, const auto & b) {
+ return a.second->cost() > b.second->cost();
+ });
+ return sorted;
+}
+
+bool
+BitVectorCache::hasCostChanged(const vespalib::LockGuard & guard)
+{
+ (void) guard;
+ if ( ! _chunks.empty()) {
+ SortedKeyMeta sorted(getSorted(_keys));
+ double oldCached(0);
+ for (auto & e : sorted) {
+ const KeyMeta & m = *e.second;
+ if ( m.isCached() ) {
+ oldCached += m.cost();
+ }
+ }
+ double newCached(0);
+ for (size_t i(0); i < sorted.size() && i < _chunks[0]->getKeyCapacity(); i++) {
+ const KeyMeta & m = *sorted[i].second;
+ newCached += m.cost();
+ }
+ if (newCached > oldCached * 1.01) { // 1% change needed.
+ return true;
+ }
+ }
+ return false;
+}
+
+void
+BitVectorCache::populate(Key2Index & newKeys, CondensedBitVector & chunk, const PopulateInterface & lookup)
+{
+ SortedKeyMeta sorted(getSorted(newKeys));
+
+ double sum(0);
+ for (auto & e : sorted) {
+ e.second->unCache();
+ sum += e.second->cost();
+ }
+ double accum(0.0);
+ uint32_t index(0);
+ for (const auto & e : sorted) {
+ KeyMeta & m = *e.second;
+ if (index >= chunk.getKeyCapacity()) {
+ assert( ! m.isCached());
+ } else {
+ double percentage(m.cost()*100.0/sum);
+ accum += percentage;
+ m.chunkId(0);
+ m.chunkIndex(index);
+ LOG(info, "Populating bitvector %2d with feature %ld and %ld bits set. Cost is %8f = %2.2f%%, accumulated cost is %2.2f%%",
+ index, e.first, m.bitCount(), m.cost(), percentage, accum);
+ index++;
+ assert(m.isCached());
+ assert(newKeys[e.first].isCached());
+ assert(&m == &newKeys[e.first]);
+ PopulateInterface::Iterator::UP iterator = lookup.lookup(e.first);
+ for (int32_t docId(iterator->getNext()); docId >= 0; docId = iterator->getNext()) {
+ chunk.set(m.chunkIndex(), docId, true);
+ }
+ }
+ }
+}
+
+void
+BitVectorCache::populate(uint32_t sz, const PopulateInterface & lookup)
+{
+ vespalib::LockGuard guard1(_lock);
+ if (! _needPopulation) {
+ return;
+ }
+ Key2Index newKeys(_keys);
+ guard1.unlock();
+
+ CondensedBitVector::UP chunk(CondensedBitVector::create(sz, _genHolder));
+ populate(newKeys, *chunk, lookup);
+
+ vespalib::LockGuard guard2(_lock);
+ _chunks.push_back(std::move(chunk));
+ _keys.swap(newKeys);
+ _needPopulation = false;
+}
+
+void
+BitVectorCache::set(Key key, uint32_t index, bool v)
+{
+ vespalib::LockGuard guard(_lock);
+ auto found = _keys.find(key);
+ if (found != _keys.end()) {
+ const KeyMeta & m(found->second);
+ if (m.isCached()) {
+ _chunks[m.chunkId()]->set(m.chunkIndex(), index, v);
+ }
+ }
+}
+
+bool
+BitVectorCache::get(Key key, uint32_t index) const
+{
+ (void) key; (void) index;
+ return false;
+}
+
+void
+BitVectorCache::removeIndex(uint32_t index)
+{
+ vespalib::LockGuard guard(_lock);
+ for (auto & chunk : _chunks) {
+ chunk->clearIndex(index);
+ }
+}
+
+
+void
+BitVectorCache::adjustDocIdLimit(uint32_t docId)
+{
+ for (auto &chunk : _chunks) {
+ chunk->adjustDocIdLimit(docId);
+ }
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/common/bitvectorcache.h b/searchlib/src/vespa/searchlib/common/bitvectorcache.h
new file mode 100644
index 00000000000..3405adaf1fd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/bitvectorcache.h
@@ -0,0 +1,86 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/common/condensedbitvectors.h>
+#include <vespa/vespalib/util/sync.h>
+#include <vespa/vespalib/stllike/hash_set.h>
+
+namespace search {
+
+class PopulateInterface
+{
+public:
+ class Iterator {
+ public:
+ typedef std::unique_ptr<Iterator> UP;
+ virtual ~Iterator() { }
+ virtual int32_t getNext() = 0;
+ };
+ virtual ~PopulateInterface() { }
+ virtual Iterator::UP lookup(uint64_t key) const = 0;
+};
+
+class BitVectorCache
+{
+public:
+ typedef uint64_t Key;
+ typedef vespalib::hash_set<Key> KeySet;
+ typedef std::vector<std::pair<Key, size_t>> KeyAndCountSet;
+ typedef CondensedBitVector::CountVector CountVector;
+ typedef vespalib::GenerationHolder GenerationHolder;
+
+ BitVectorCache(GenerationHolder &genHolder);
+ ~BitVectorCache();
+ void computeCountVector(KeySet & keys, CountVector & v) const;
+ KeySet lookupCachedSet(const KeyAndCountSet & keys);
+ void set(Key key, uint32_t index, bool v);
+ bool get(Key key, uint32_t index) const;
+ void removeIndex(uint32_t index);
+ void adjustDocIdLimit(uint32_t docId);
+ void populate(uint32_t count, const PopulateInterface &);
+ bool needPopulation() const { return _needPopulation; }
+private:
+ class KeyMeta {
+ public:
+ KeyMeta() :
+ _lookupCount(0),
+ _bitCount(0),
+ _chunkId(-1),
+ _chunkIndex(0)
+ { }
+ double cost() const { return _bitCount * _lookupCount; }
+ bool isCached() const { return _chunkId >= 0; }
+ size_t bitCount() const { return _bitCount; }
+ size_t chunkIndex() const { return _chunkIndex; }
+ size_t chunkId() const { return _chunkId; }
+ size_t lookupCount() const { return _lookupCount; }
+ KeyMeta & incBits() { _bitCount++; return *this; }
+ KeyMeta & decBits() { _bitCount--; return *this; }
+ KeyMeta & lookup() { _lookupCount++; return *this; }
+ KeyMeta & bitCount(uint32_t v) { _bitCount = v; return *this; }
+ KeyMeta & chunkId(uint32_t v) { _chunkId = v; return *this; }
+ KeyMeta & chunkIndex(uint32_t v) { _chunkIndex = v; return *this; }
+ KeyMeta & unCache() { _chunkId = -1; return *this; }
+ private:
+ size_t _lookupCount;
+ uint32_t _bitCount;
+ int32_t _chunkId;
+ uint32_t _chunkIndex;
+ };
+ typedef vespalib::hash_map<Key, KeyMeta> Key2Index;
+ typedef std::vector<std::pair<Key, KeyMeta *>> SortedKeyMeta;
+ typedef std::vector<CondensedBitVector::SP> ChunkV;
+
+ VESPA_DLL_LOCAL static SortedKeyMeta getSorted(Key2Index & keys);
+ VESPA_DLL_LOCAL static void populate(Key2Index & newKeys, CondensedBitVector & chunk, const PopulateInterface & lookup);
+ VESPA_DLL_LOCAL bool hasCostChanged(const vespalib::LockGuard &);
+
+ uint64_t _lookupCount;
+ bool _needPopulation;
+ vespalib::Lock _lock;
+ Key2Index _keys;
+ ChunkV _chunks;
+ GenerationHolder &_genHolder;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp b/searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp
new file mode 100644
index 00000000000..012fab70f51
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp
@@ -0,0 +1,116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".bitvectoriterators");
+
+#include "bitvectoriterator.h"
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search
+{
+
+using fef::TermFieldMatchDataArray;
+using fef::TermFieldMatchData;
+
+BitVectorIterator::BitVectorIterator(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData) :
+ _docIdLimit(std::min(docIdLimit, bv.size())),
+ _bv(bv),
+ _tfmd(matchData)
+{
+ assert(docIdLimit <= bv.size());
+ _tfmd.reset(0);
+}
+
+void
+BitVectorIterator::initRange(uint32_t begin, uint32_t end)
+{
+ SearchIterator::initRange(begin, end);
+ if (begin >= _docIdLimit) {
+ setAtEnd();
+ } else {
+ uint32_t docId = _bv.getFirstTrueBit(begin);
+ if (docId >= _docIdLimit) {
+ setAtEnd();
+ } else {
+ setDocId(docId);
+ }
+ }
+}
+
+void
+BitVectorIterator::doSeek(uint32_t docId)
+{
+ if (__builtin_expect(docId >= _docIdLimit, false)) {
+ setAtEnd();
+ } else if (_bv.testBit(docId)) {
+ setDocId(docId);
+ }
+}
+
+void
+BitVectorIterator::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ SearchIterator::visitMembers(visitor);
+ visit(visitor, "docIdLimit", _docIdLimit);
+ visit(visitor, "termfieldmatchdata.fieldId", _tfmd.getFieldId());
+ visit(visitor, "termfieldmatchdata.docid", _tfmd.getDocId());
+}
+
+void
+BitVectorIterator::doUnpack(uint32_t docId)
+{
+ _tfmd.resetOnlyDocId(docId);
+}
+
+class BitVectorIteratorStrict : public BitVectorIterator
+{
+public:
+ BitVectorIteratorStrict(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData);
+private:
+ void doSeek(uint32_t docId) override;
+ Trinary is_strict() const override { return Trinary::True; }
+};
+
+BitVectorIteratorStrict::BitVectorIteratorStrict(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData) :
+ BitVectorIterator(bv, docIdLimit, matchData)
+{
+}
+
+void
+BitVectorIteratorStrict::doSeek(uint32_t docId)
+{
+ if (__builtin_expect(docId >= _docIdLimit, false)) {
+ setAtEnd();
+ return;
+ }
+
+ docId = _bv.getNextTrueBit(docId);
+ if (__builtin_expect(docId >= _docIdLimit, false)) {
+ setAtEnd();
+ } else {
+ setDocId(docId);
+ }
+}
+
+queryeval::SearchIterator::UP BitVectorIterator::create(const BitVector *const bv, const TermFieldMatchDataArray &matchData, bool strict)
+{
+ assert(matchData.size() == 1);
+ return create(bv, bv->size(), *matchData[0], strict);
+}
+queryeval::SearchIterator::UP BitVectorIterator::create(const BitVector *const bv, uint32_t docIdLimit, TermFieldMatchData &matchData, bool strict)
+{
+ if (bv == NULL) {
+ return UP(new queryeval::EmptySearch());
+ } else if (strict) {
+ return UP(new BitVectorIteratorStrict(*bv, docIdLimit, matchData));
+ } else {
+ return UP(new BitVectorIterator(*bv, docIdLimit, matchData));
+ }
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/bitvectoriterator.h b/searchlib/src/vespa/searchlib/common/bitvectoriterator.h
new file mode 100644
index 00000000000..ab8b506792b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/bitvectoriterator.h
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include "bitvector.h"
+
+
+namespace search
+{
+
+namespace fef { class TermFieldMatchDataArray; }
+namespace fef { class TermFieldMatchData; }
+
+class BitVectorIterator : public queryeval::SearchIterator
+{
+protected:
+ BitVectorIterator(const BitVector & other, uint32_t docIdLimit, fef::TermFieldMatchData &matchData);
+
+ uint32_t _docIdLimit;
+ const BitVector & _bv;
+private:
+ void initRange(uint32_t begin, uint32_t end) override;
+ void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+ void doSeek(uint32_t docId) override;
+ void doUnpack(uint32_t docId) override;
+ bool isBitVector() const override { return true; }
+ fef::TermFieldMatchData &_tfmd;
+public:
+ const void * getBitValues() const { return _bv.getStart(); }
+
+ Trinary is_strict() const override { return Trinary::False; }
+ virtual bool isStrict() const { return (is_strict() == Trinary::True); }
+ uint32_t getDocIdLimit() const { return _docIdLimit; }
+ static UP create(const BitVector *const other, const fef::TermFieldMatchDataArray &matchData, bool strict);
+ static UP create(const BitVector *const other, uint32_t docIdLimit, fef::TermFieldMatchData &matchData, bool strict);
+};
+
+
+} // namespace search
+
+
diff --git a/searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp b/searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp
new file mode 100644
index 00000000000..fa94eb5dca4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp
@@ -0,0 +1,148 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/searchlib/common/condensedbitvectors.h>
+#include <vespa/searchlib/common/rcuvector.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+using vespalib::IllegalArgumentException;
+using vespalib::make_string;
+using vespalib::GenerationHolder;
+
+namespace search {
+
+namespace {
+
+template <typename T>
+class CondensedBitVectorT : public CondensedBitVector
+{
+public:
+ CondensedBitVectorT(size_t sz, GenerationHolder &genHolder) :
+ _v(sz, 30, 1000, genHolder)
+ {
+ for (size_t i = 0; i < sz; ++i) {
+ _v.push_back(0);
+ }
+ }
+private:
+ static uint8_t countBits(T v) {
+ return ((sizeof(T)) <= 4)
+ ? __builtin_popcount(v)
+ : __builtin_popcountl(v);
+ }
+ T computeMask(const KeySet & keys) const __attribute__ ((noinline)) {
+ T mask(0);
+ for (size_t i : keys) {
+ assert(i < getKeyCapacity());
+ mask |= (B << i);
+ }
+ return mask;
+ }
+ static const uint64_t B = 1ul;
+ void initializeCountVector(const KeySet & keys, CountVector & cv) const override {
+ struct S {
+ void operator () (uint8_t & cv, uint8_t v) { cv = v; }
+ };
+ computeCountVector(computeMask(keys), cv, S());
+ }
+ void addCountVector(const KeySet & keys, CountVector & cv) const override {
+ struct S {
+ void operator () (uint8_t & cv, uint8_t v) { cv += v; }
+ };
+ computeCountVector(computeMask(keys), cv, S());
+ }
+
+ void clearIndex(uint32_t index) override {
+ _v[index] = 0;
+ }
+
+ template <typename F>
+ VESPA_DLL_LOCAL void computeCountVector(T mask, CountVector & cv, F func) const __attribute__((noinline));
+
+ template <typename F>
+ VESPA_DLL_LOCAL void computeTail(T mask, CountVector & cv, F func, size_t i) const __attribute__((noinline));
+
+ void set(Key key, uint32_t index, bool v) override {
+ assert(key < getKeyCapacity());
+ if (v) {
+ _v[index] |= B << key;
+ } else {
+ _v[index] &= ~(B << key);
+ }
+ }
+ bool get(Key key, uint32_t index) const override {
+ assert(key < getKeyCapacity());
+ return (_v[index] & (B << key)) != 0;
+ }
+
+ size_t getKeyCapacity() const override { return sizeof(T)*8; }
+ size_t getCapacity() const override { return _v.capacity(); }
+ size_t getSize() const { return _v.size(); }
+ void adjustDocIdLimit(uint32_t docId);
+ attribute::RcuVectorBase<T> _v;
+};
+
+template <typename T>
+template <typename F>
+VESPA_DLL_LOCAL void
+CondensedBitVectorT<T>::computeCountVector(T mask, CountVector & cv, F func) const
+{
+ size_t i(0);
+ const size_t UNROLL = 2;
+ uint8_t *d = &cv[0];
+ const T *v = &_v[0];
+ for (const size_t m(cv.size() - (UNROLL - 1)); i < m; i+=UNROLL) {
+ for (size_t j(0); j < UNROLL; j++) {
+ func(d[i+j], countBits(v[i+j] & mask));
+ }
+ }
+ computeTail(mask, cv, func, i);
+}
+
+template <typename T>
+template <typename F>
+VESPA_DLL_LOCAL void
+CondensedBitVectorT<T>::computeTail(T mask, CountVector & cv, F func, size_t i) const
+{
+ for (; i < cv.size(); i++) {
+ func(cv[i], countBits(_v[i] & mask));
+ }
+}
+
+
+template <typename T>
+void
+CondensedBitVectorT<T>:: adjustDocIdLimit(uint32_t docId)
+{
+ while (_v.size() <= docId) {
+ _v.push_back(0);
+ }
+}
+
+
+void throwIllegalKey(size_t numKeys, size_t key) __attribute__((noinline));
+
+void throwIllegalKey(size_t numKeys, size_t key)
+{
+ throw IllegalArgumentException(make_string("All %ld possible keys are used. Key %ld is not added", numKeys, key), VESPA_STRLOC);
+}
+
+}
+
+CondensedBitVector::~CondensedBitVector()
+{
+}
+
+void
+CondensedBitVector::addKey(Key key) const
+{
+ if ( ! hasKey(key)) {
+ throwIllegalKey(getKeyCapacity(), key);
+ }
+}
+
+CondensedBitVector::UP
+CondensedBitVector::create(size_t size, GenerationHolder &genHolder)
+{
+ return UP(new CondensedBitVectorT<uint32_t>(size, genHolder));
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/common/condensedbitvectors.h b/searchlib/src/vespa/searchlib/common/condensedbitvectors.h
new file mode 100644
index 00000000000..b8d97cbcb07
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/condensedbitvectors.h
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/vespalib/util/generationholder.h>
+#include <vector>
+#include <set>
+
+namespace search {
+
+class CondensedBitVector
+{
+public:
+ typedef std::unique_ptr<CondensedBitVector> UP;
+ typedef std::shared_ptr<CondensedBitVector> SP;
+ typedef uint32_t Key;
+ typedef std::set<Key> KeySet;
+ typedef vespalib::ArrayRef<uint8_t> CountVector;
+
+ virtual ~CondensedBitVector();
+
+ virtual void initializeCountVector(const KeySet & keys, CountVector & v) const = 0;
+ virtual void addCountVector(const KeySet & keys, CountVector & v) const = 0;
+ virtual void set(Key key, uint32_t index, bool v) = 0;
+ virtual bool get(Key key, uint32_t index) const = 0;
+ virtual void clearIndex(uint32_t index) = 0;
+ virtual size_t getKeyCapacity() const = 0;
+ virtual size_t getCapacity() const = 0;
+ virtual size_t getSize() const = 0;
+ virtual void adjustDocIdLimit(uint32_t docId) = 0;
+ bool hasKey(Key key) const { return key < getKeyCapacity(); }
+ void addKey(Key key) const;
+ static CondensedBitVector::UP create(size_t size, vespalib::GenerationHolder &genHolder);
+private:
+ typedef vespalib::hash_map<Key, uint32_t> Key2Index;
+ Key2Index _keys;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/common/converters.h b/searchlib/src/vespa/searchlib/common/converters.h
new file mode 100644
index 00000000000..ccd15c6105c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/converters.h
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchcommon/common/iblobconverter.h>
+#include <unicode/coll.h>
+#include <vector>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace common {
+
+class PassThroughConverter : public BlobConverter
+{
+private:
+ virtual vespalib::ConstBufferRef onConvert(const vespalib::ConstBufferRef & src) const;
+};
+
+class LowercaseConverter : public BlobConverter
+{
+public:
+ LowercaseConverter();
+private:
+ virtual vespalib::ConstBufferRef onConvert(const vespalib::ConstBufferRef & src) const;
+ mutable vespalib::string _buffer;
+};
+
+class UcaConverter : public BlobConverter
+{
+public:
+ typedef icu::Collator Collator;
+ UcaConverter(const vespalib::string & locale, const vespalib::string & strength);
+ const Collator & getCollator() const { return *_collator; }
+private:
+ struct Buffer {
+ vespalib::string _data;
+ uint8_t *ptr() { return (uint8_t *)_data.begin(); }
+ int32_t siz() { return _data.size(); }
+ Buffer() : _data() {
+ reserve(_data.capacity()-8); // do not cause extra malloc() by default
+ }
+ void reserve(size_t size) {
+ _data.reserve(size+8);
+ _data.resize(size);
+ _data[size+1] = '\0';
+ _data[size+2] = '\0';
+ _data[size+3] = 'd';
+ _data[size+4] = 'e';
+ _data[size+5] = 'a';
+ _data[size+6] = 'd';
+ _data[size+7] = '\0';
+ }
+ void check() {
+ assert(_data[siz()+3] == 'd');
+ assert(_data[siz()+4] == 'e');
+ assert(_data[siz()+5] == 'a');
+ assert(_data[siz()+6] == 'd');
+ }
+ };
+ int utf8ToUtf16(const vespalib::ConstBufferRef & src) const;
+ virtual vespalib::ConstBufferRef onConvert(const vespalib::ConstBufferRef & src) const;
+ mutable Buffer _buffer;
+ mutable std::vector<UChar> _u16Buffer;
+ std::unique_ptr<Collator> _collator;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/common/docstamp.h b/searchlib/src/vespa/searchlib/common/docstamp.h
new file mode 100644
index 00000000000..d1f22eb5abb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/docstamp.h
@@ -0,0 +1,18 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search {
+
+// since everything is real-time, the docstamp does no longer change
+// as before. The value 0 still means invalid in fdispatch, and the
+// value 42 was selected randomly to reflect a valid value. Defined
+// here for a single source of truth.
+
+struct DocStamp {
+ static uint32_t good() { return 42; }
+ static uint32_t bad() { return 0; }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/documentlocations.cpp b/searchlib/src/vespa/searchlib/common/documentlocations.cpp
new file mode 100644
index 00000000000..e5811045e9f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/documentlocations.cpp
@@ -0,0 +1,14 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "documentlocations.h"
+
+namespace search {
+namespace common {
+
+DocumentLocations::DocumentLocations(void)
+ : _vec_guard(new AttributeGuard),
+ _vec(NULL) {
+}
+
+} // namespace common
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/documentlocations.h b/searchlib/src/vespa/searchlib/common/documentlocations.h
new file mode 100644
index 00000000000..8b5372b601f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/documentlocations.h
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/attribute/attributeguard.h>
+
+namespace search {
+namespace common {
+
+
+/**
+ * This class contains meta-information about document locations (positions)
+ * for all documents in the index, and references to the attributes
+ * containing the actual document locations.
+ */
+class DocumentLocations
+{
+
+private:
+ search::AttributeGuard::UP _vec_guard;
+ const search::attribute::IAttributeVector *_vec;
+
+public:
+ DocumentLocations(void);
+
+ void setVecGuard(search::AttributeGuard::UP guard) {
+ _vec_guard = std::move(guard);
+ setVec(_vec_guard.get()->get());
+ }
+
+ void setVec(const search::attribute::IAttributeVector &vec) {
+ _vec = &vec;
+ }
+
+ const search::attribute::IAttributeVector *getVec() const {
+ return _vec;
+ }
+};
+
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/common/documentsummary.cpp b/searchlib/src/vespa/searchlib/common/documentsummary.cpp
new file mode 100644
index 00000000000..03faf45c1a9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/documentsummary.cpp
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <vespa/searchlib/util/filekit.h>
+#include <vespa/searchlib/common/documentsummary.h>
+#include <vespa/vespalib/util/error.h>
+
+LOG_SETUP(".searchlib.docsummary.documentsummary");
+
+using vespalib::getLastErrorString;
+
+namespace search {
+namespace docsummary {
+
+bool
+DocumentSummary::readDocIdLimit(const vespalib::string &dir, uint32_t &count)
+{
+ char numbuf[20];
+ Fast_BufferedFile qcntfile(4096);
+ unsigned int qcnt;
+ vespalib::string qcntname;
+ const char *p;
+
+ qcntname = dir + "/docsum.qcnt";
+
+ count = qcnt = 0;
+ // XXX no checking for success
+ qcntfile.ReadOpen(qcntname.c_str());
+ if (!qcntfile.IsOpened() || qcntfile.Eof())
+ return false;
+ p = qcntfile.ReadLine(numbuf, sizeof(numbuf));
+ while (*p >= '0' && *p <= '9')
+ qcnt = qcnt * 10 + *p++ - '0';
+ qcntfile.Close();
+ count = qcnt;
+ return true;
+}
+
+
+bool
+DocumentSummary::writeDocIdLimit(const vespalib::string &dir, uint32_t count)
+{
+ vespalib::string qcntname = dir + "/docsum.qcnt";
+ Fast_BufferedFile qcntfile(new FastOS_File);
+
+ qcntfile.WriteOpen(qcntname.c_str());
+ if (!qcntfile.IsOpened()) {
+ LOG(error, "Could not open %s: %s", qcntname.c_str(), getLastErrorString().c_str());
+ return false;
+ }
+ qcntfile.addNum(count, 0, ' ');
+ qcntfile.WriteByte('\n');
+ qcntfile.Sync();
+ qcntfile.Close();
+ return true;
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/common/documentsummary.h b/searchlib/src/vespa/searchlib/common/documentsummary.h
new file mode 100644
index 00000000000..86e45ca921d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/documentsummary.h
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace docsummary {
+
+class DocumentSummary
+{
+public:
+ static bool
+ readDocIdLimit(const vespalib::string &dir, uint32_t &docIdLimit);
+
+ static bool
+ writeDocIdLimit(const vespalib::string &dir, uint32_t docIdLimit);
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/common/feature.h b/searchlib/src/vespa/searchlib/common/feature.h
new file mode 100644
index 00000000000..b0eb8d3e463
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/feature.h
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search {
+
+typedef double feature_t;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/featureset.cpp b/searchlib/src/vespa/searchlib/common/featureset.cpp
new file mode 100644
index 00000000000..826382f0e8e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/featureset.cpp
@@ -0,0 +1,90 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/common/featureset.h>
+
+namespace search {
+
+FeatureSet::FeatureSet()
+ : _names(),
+ _docIds(),
+ _values()
+{
+}
+
+FeatureSet::FeatureSet(const StringVector &names,
+ uint32_t expectDocs)
+ : _names(names),
+ _docIds(),
+ _values()
+{
+ _docIds.reserve(expectDocs);
+ _values.reserve(expectDocs * names.size());
+}
+
+bool
+FeatureSet::equals(const FeatureSet &rhs) const
+{
+ return ((_docIds == rhs._docIds) &&
+ (_values == rhs._values) &&
+ (_names == rhs._names)); // do names last, as they are most likely to match
+}
+
+uint32_t
+FeatureSet::addDocId(uint32_t docId)
+{
+ _docIds.push_back(docId);
+ _values.resize(_names.size() * _docIds.size());
+ return (_docIds.size() - 1);
+}
+
+bool
+FeatureSet::contains(const std::vector<uint32_t> &docIds) const
+{
+ typedef std::vector<uint32_t>::const_iterator ITR;
+ ITR myPos = _docIds.begin();
+ ITR myEnd = _docIds.end();
+ ITR pos = docIds.begin();
+ ITR end = docIds.end();
+
+ for (; pos != end; ++pos) {
+ while (myPos != myEnd && *myPos < *pos) {
+ ++myPos;
+ }
+ if (myPos == myEnd || *myPos != *pos) {
+ return false;
+ }
+ ++myPos;
+ }
+ return true;
+}
+
+feature_t *
+FeatureSet::getFeaturesByIndex(uint32_t idx)
+{
+ if (idx >= _docIds.size()) {
+ return 0;
+ }
+ return &(_values[idx * _names.size()]);
+}
+
+const feature_t *
+FeatureSet::getFeaturesByDocId(uint32_t docId) const
+{
+ uint32_t low = 0;
+ uint32_t hi = _docIds.size();
+ while (low < hi) {
+ uint32_t pos = (low + hi) >> 1;
+ uint32_t val = _docIds[pos];
+ if (val < docId) {
+ low = pos + 1;
+ } else if (val > docId) {
+ hi = pos;
+ } else {
+ return &(_values[pos * _names.size()]);
+ }
+ }
+ return 0;
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/featureset.h b/searchlib/src/vespa/searchlib/common/featureset.h
new file mode 100644
index 00000000000..cd1f0595da7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/featureset.h
@@ -0,0 +1,128 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <map>
+#include <vespa/vespalib/stllike/string.h>
+#include <vector>
+#include "feature.h"
+
+namespace search {
+
+/**
+ * This class holds information about a set of features for a set of
+ * documents.
+ **/
+class FeatureSet
+{
+public:
+ typedef vespalib::string string;
+ typedef std::vector<string> StringVector;
+private:
+ StringVector _names;
+ std::vector<uint32_t> _docIds;
+ std::vector<feature_t> _values;
+
+ FeatureSet(const FeatureSet &);
+ FeatureSet & operator=(const FeatureSet &);
+
+public:
+ /**
+ * Convenience typedef for a shared pointer to an object of this
+ * class.
+ **/
+ typedef std::shared_ptr<FeatureSet> SP;
+
+ /**
+ * Create a new object without any feature information.
+ **/
+ FeatureSet();
+
+ /**
+ * Create a new object that will contain information about the
+ * given features.
+ *
+ * @param names names of all features
+ * @param expectDocs the number of documents we expect to store information about
+ **/
+ FeatureSet(const StringVector &names, uint32_t expectDocs);
+
+ /**
+ * Check whether this object is equal to the given object.
+ *
+ * @return true if the objects are equal.
+ **/
+ bool equals(const FeatureSet &rhs) const;
+
+ /**
+ * Obtain the names of all the features tracked by this object.
+ *
+ * @return feature names
+ **/
+ const StringVector &getNames() const { return _names; }
+
+ /**
+ * Obtain the number of features this object contains information
+ * about.
+ *
+ * @return number of features
+ **/
+ uint32_t numFeatures() const { return _names.size(); }
+
+ /**
+ * Obtain the number of documents this object contains information
+ * about.
+ *
+ * @return number of documents.
+ **/
+ uint32_t numDocs() const { return _docIds.size(); }
+
+ /**
+ * Add a document to the set of documents this object contains
+ * information about. Documents must be added in ascending
+ * order. When a new document is added, all features are
+ * initialized to 0.0. The return value from this method can be
+ * used together with the @ref getFeaturesByIndex method to set
+ * the actual feature values. The ordering among features are
+ * assumed to be the same as in the name vector passed to the
+ * constructor.
+ *
+ * @return the index of the document just added
+ * @param docid the id of the document to add
+ **/
+ uint32_t addDocId(uint32_t docid);
+
+ /**
+ * Check whether this object contains information about the given
+ * set of documents. The given set of documents must be sorted on
+ * document id; lowest first.
+ *
+ * @return true if this object contains information about all the given documents
+ * @param docIds the documents we want information about
+ **/
+ bool contains(const std::vector<uint32_t> &docIds) const;
+
+ /**
+ * Obtain the feature values belonging to a document based on the
+ * index into the internal docid array. This method is intended
+ * for use only when filling in the feature values during object
+ * initialization.
+ *
+ * @return pointer to features
+ * @param idx index into docid array
+ **/
+ feature_t *getFeaturesByIndex(uint32_t idx);
+
+ /**
+ * Obtain the feature values belonging to a document based on the
+ * docid value. This method is intended for lookup when generating
+ * the summary features or rank features docsum field.
+ *
+ * @return pointer to features
+ * @param docId docid value
+ **/
+ const feature_t *getFeaturesByDocId(uint32_t docId) const;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/fileheadercontext.cpp b/searchlib/src/vespa/searchlib/common/fileheadercontext.cpp
new file mode 100644
index 00000000000..c5d1399c47d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/fileheadercontext.cpp
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".index.fileheadercontext");
+#include "fileheadercontext.h"
+#include <vespa/vespalib/data/fileheader.h>
+
+namespace search
+{
+
+namespace common
+{
+
+using vespalib::GenericHeader;
+
+FileHeaderContext::FileHeaderContext(void)
+{
+}
+
+
+FileHeaderContext::~FileHeaderContext(void)
+{
+}
+
+
+void
+FileHeaderContext::addCreateAndFreezeTime(GenericHeader &header)
+{
+ typedef GenericHeader::Tag Tag;
+ fastos::TimeStamp ts(fastos::ClockSystem::now());
+ header.putTag(Tag("createTime", ts.us()));
+ header.putTag(Tag("freezeTime", 0));
+}
+
+
+void
+FileHeaderContext::setFreezeTime(GenericHeader &header)
+{
+ typedef GenericHeader::Tag Tag;
+ if (header.hasTag("freezeTime") &&
+ header.getTag("freezeTime").getType() == Tag::TYPE_INTEGER) {
+ fastos::TimeStamp ts(fastos::ClockSystem::now());
+ header.putTag(Tag("freezeTime", ts.us()));
+ }
+}
+
+
+} // namespace common
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/fileheadercontext.h b/searchlib/src/vespa/searchlib/common/fileheadercontext.h
new file mode 100644
index 00000000000..cb0f31edc38
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/fileheadercontext.h
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace vespalib
+{
+
+class GenericHeader;
+
+}
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext
+{
+public:
+ FileHeaderContext(void);
+
+ virtual
+ ~FileHeaderContext(void);
+
+ virtual void
+ addTags(vespalib::GenericHeader &header,
+ const vespalib::string &name) const = 0;
+
+ static void
+ addCreateAndFreezeTime(vespalib::GenericHeader &header);
+
+ static void
+ setFreezeTime(vespalib::GenericHeader &header);
+};
+
+
+} // namespace common
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.cpp b/searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.cpp
new file mode 100644
index 00000000000..2c50f20df30
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.cpp
@@ -0,0 +1,47 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".common.foregroundtaskexecutor");
+
+#include "foregroundtaskexecutor.h"
+#include <vespa/vespalib/util/threadstackexecutor.h>
+
+using vespalib::ThreadStackExecutor;
+
+namespace search
+{
+
+namespace
+{
+
+constexpr uint32_t stackSize = 128 * 1024;
+
+}
+
+
+ForegroundTaskExecutor::ForegroundTaskExecutor()
+{
+}
+
+ForegroundTaskExecutor::~ForegroundTaskExecutor()
+{
+}
+
+
+void
+ForegroundTaskExecutor::executeTask(uint64_t id,
+ vespalib::Executor::Task::UP task)
+{
+ (void) id;
+ task->run();
+}
+
+
+void
+ForegroundTaskExecutor::sync()
+{
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.h b/searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.h
new file mode 100644
index 00000000000..ee481f5e496
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.h
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "isequencedtaskexecutor.h"
+
+namespace vespalib
+{
+
+class ThreadStackExecutorBase;
+
+}
+
+namespace search
+{
+
+/**
+ * Class to run multiple tasks in parallel, but tasks with same
+ * id has to be run in sequence.
+ *
+ * Currently, this is a dummy version that runs everything in the foreground.
+ */
+class ForegroundTaskExecutor : public ISequencedTaskExecutor
+{
+public:
+ ForegroundTaskExecutor();
+
+ ~ForegroundTaskExecutor();
+
+ virtual void executeTask(uint64_t id,
+ vespalib::Executor::Task::UP task) override;
+
+ virtual void sync() override;
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/fslimits.h b/searchlib/src/vespa/searchlib/common/fslimits.h
new file mode 100644
index 00000000000..64de5e3be37
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/fslimits.h
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1999-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+// define min/max number of bits that may be used to
+// encode partid/rowid into the partition path field.
+// NB: MIN_ROWBITS == 0
+// Constraint: MIN_PARTBITS >= 1
+// Constraint: MIN_PARTBITS <= 6 <= MAX_PARTBITS
+
+#define MIN_PARTBITS 1
+#define MAX_PARTBITS 8
+
+#define MAX_ROWBITS 8
+
+// Currently, max word length and max number of indexes are limited by
+// the layout of binary dictionaries; see class FastS_Pagedict.
+
+#define MAX_WORD_LEN 1000
+#define MAX_INDEXES 64
+
+// max number of tiers in a multi-tier dataset.
+// may currently not be greater than 16, due to the
+// partition path encoding algorithm used.
+
+#define MAX_TIERS 16
+
+// max number of explicitly defined term rank limits
+#define MAX_TERMRANKLIMITS 32
+
+// Max number of fallthrough classes in Multi-tier fallthrough selector, just set a limit..
+#define MAX_FALLTHROUGH_SELECTORS 32
+
+#define SEARCHLIB_FEF_UNKNOWN_FIELD_LENGTH 1000000u
+
diff --git a/searchlib/src/vespa/searchlib/common/gid.h b/searchlib/src/vespa/searchlib/common/gid.h
new file mode 100644
index 00000000000..ac76c72fe7e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/gid.h
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+
+namespace search {
+
+class Lid {
+public:
+ Lid() { memset(_lid, 0, sizeof(_lid)); }
+ Lid(uint32_t l) { lid(l);}
+ uint32_t lid() const
+ {
+ return (_lid[0] << 24) +
+ (_lid[1] << 16) +
+ (_lid[2] << 8) +
+ _lid[3];
+ }
+ void lid(uint32_t v)
+ {
+ _lid[0] = (v >> 24) & 0xff;
+ _lid[1] = (v >> 16) & 0xff;
+ _lid[2] = (v >> 8) & 0xff;
+ _lid[3] = v & 0xff;
+ }
+ int cmp(const Lid & b) const { return lid() - b.lid(); }
+private:
+ typedef unsigned char LidT[4];
+ LidT _lid;
+};
+
+class Gid {
+public:
+ Gid() { memset(_gid, 0, sizeof(_gid)); }
+ Gid(const char *g) { memcpy(_gid, g, sizeof(_gid)); }
+ const char * gid() const { return _gid; }
+ int cmp(const Gid & b) const { return memcmp(_gid, b._gid, sizeof(_gid)); }
+private:
+ typedef char GidT[12];
+ GidT _gid;
+};
+
+inline bool operator < (const Gid & a, const Gid & b) { return a.cmp(b) < 0; }
+inline bool operator <= (const Gid & a, const Gid & b) { return a.cmp(b) <= 0; }
+inline bool operator == (const Gid & a, const Gid & b) { return a.cmp(b) == 0; }
+inline bool operator != (const Gid & a, const Gid & b) { return a.cmp(b) != 0; }
+inline bool operator > (const Gid & a, const Gid & b) { return a.cmp(b) > 0; }
+inline bool operator >= (const Gid & a, const Gid & b) { return a.cmp(b) >= 0; }
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/common/growablebitvector.cpp b/searchlib/src/vespa/searchlib/common/growablebitvector.cpp
new file mode 100644
index 00000000000..f3a4e87257c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/growablebitvector.cpp
@@ -0,0 +1,55 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include "growablebitvector.h"
+
+/////////////////////////////////
+namespace search
+{
+
+using vespalib::GenerationHeldBase;
+using vespalib::GenerationHolder;
+
+GrowableBitVector::GrowableBitVector(Index newSize,
+ Index newCapacity,
+ GenerationHolder &generationHolder)
+ : AllocatedBitVector(newSize, newCapacity, nullptr, 0),
+ _generationHolder(generationHolder)
+{
+ assert(newSize <= newCapacity);
+}
+
+void
+GrowableBitVector::reserve(Index newCapacity)
+{
+ Index oldCapacity = capacity();
+ assert(newCapacity >= oldCapacity);
+ if (newCapacity == oldCapacity)
+ return;
+ hold(grow(size(), newCapacity));
+}
+
+void GrowableBitVector::hold(GenerationHeldBase::UP v)
+{
+ if (v) {
+ _generationHolder.hold(std::move(v));
+ }
+}
+
+void
+GrowableBitVector::shrink(Index newCapacity)
+{
+ Index oldCapacity = capacity();
+ assert(newCapacity <= oldCapacity);
+ hold(grow(newCapacity, std::max(capacity(), newCapacity)));
+}
+
+void
+GrowableBitVector::extend(Index newCapacity)
+{
+ hold(grow(newCapacity, std::max(capacity(), newCapacity)));
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/growablebitvector.h b/searchlib/src/vespa/searchlib/common/growablebitvector.h
new file mode 100644
index 00000000000..799babaa78d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/growablebitvector.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/searchlib/common/allocatedbitvector.h>
+
+namespace search {
+
+class GrowableBitVector : public AllocatedBitVector
+{
+public:
+ GrowableBitVector(Index newSize,
+ Index newCapacity,
+ GenerationHolder &generationHolder);
+
+ void reserve(Index newCapacity);
+ void shrink(Index newCapacity);
+ void extend(Index newCapacity);
+private:
+ VESPA_DLL_LOCAL void hold(GenerationHeldBase::UP v);
+ GenerationHolder &_generationHolder;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/hitrank.h b/searchlib/src/vespa/searchlib/common/hitrank.h
new file mode 100644
index 00000000000..38ef0dc8858
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/hitrank.h
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search
+{
+
+typedef double HitRank;
+typedef double SignedHitRank;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/identifiable.h b/searchlib/src/vespa/searchlib/common/identifiable.h
new file mode 100644
index 00000000000..12c04b7d86a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/identifiable.h
@@ -0,0 +1,167 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/objects/identifiable.h>
+
+#define CID_AttributeVector SEARCHLIB_CID(1)
+#define CID_NumericAttribute SEARCHLIB_CID(2)
+#define CID_IntegerAttribute SEARCHLIB_CID(3)
+#define CID_FloatingPointAttribute SEARCHLIB_CID(4)
+#define CID_StringAttribute SEARCHLIB_CID(5)
+#define CID_Int8Attribute SEARCHLIB_CID(6)
+#define CID_Int16Attribute SEARCHLIB_CID(7)
+#define CID_Int32Attribute SEARCHLIB_CID(8)
+#define CID_Int64Attribute SEARCHLIB_CID(9)
+#define CID_UInt8Attribute SEARCHLIB_CID(10)
+#define CID_UInt16Attribute SEARCHLIB_CID(11)
+#define CID_UInt32Attribute SEARCHLIB_CID(12)
+#define CID_UInt64Attribute SEARCHLIB_CID(13)
+#define CID_FloatAttribute SEARCHLIB_CID(14)
+#define CID_DoubleAttribute SEARCHLIB_CID(15)
+#define CID_AttributeVectorBase SEARCHLIB_CID(16)
+
+#define CID_Aggregator SEARCHLIB_CID(20)
+#define CID_MultiScalarIntegerAggregator SEARCHLIB_CID(21)
+#define CID_MultiScalarFloatingPointAggregator SEARCHLIB_CID(22)
+#define CID_BucketAggregator SEARCHLIB_CID(23)
+#define CID_UniqueAggregator SEARCHLIB_CID(24)
+#define CID_FixedWidthAggregator SEARCHLIB_CID(25)
+
+#define CID_BucketList SEARCHLIB_CID(30)
+#define CID_EnumBucketList SEARCHLIB_CID(31)
+#define CID_IntBucketList SEARCHLIB_CID(32)
+#define CID_FloatBucketList SEARCHLIB_CID(33)
+#define CID_StringBucketList SEARCHLIB_CID(34)
+
+#define CID_search_expression_ExpressionNode SEARCHLIB_CID(40)
+#define CID_search_expression_ResultNode SEARCHLIB_CID(41)
+#define CID_search_expression_FunctionNode SEARCHLIB_CID(42)
+#define CID_search_expression_UnaryFunctionNode SEARCHLIB_CID(43)
+#define CID_search_expression_BinaryFunctionNode SEARCHLIB_CID(44)
+#define CID_search_expression_MultiArgFunctionNode SEARCHLIB_CID(45)
+#define CID_search_expression_UnaryBitFunctionNode SEARCHLIB_CID(46)
+#define CID_search_expression_BitFunctionNode SEARCHLIB_CID(47)
+#define CID_search_expression_DocumentAccessorNode SEARCHLIB_CID(48)
+#define CID_search_expression_ConstantNode SEARCHLIB_CID(49)
+
+#define CID_search_expression_NumericResultNode SEARCHLIB_CID(50)
+#define CID_search_expression_IntegerResultNode SEARCHLIB_CID(51)
+#define CID_search_expression_FloatResultNode SEARCHLIB_CID(52)
+#define CID_search_expression_StringResultNode SEARCHLIB_CID(53)
+#define CID_search_expression_RawResultNode SEARCHLIB_CID(54)
+#define CID_search_expression_AttributeNode SEARCHLIB_CID(55)
+#define CID_search_expression_DocumentFieldNode SEARCHLIB_CID(56)
+#define CID_search_expression_NullResultNode SEARCHLIB_CID(57)
+#define CID_search_expression_FieldValue2ResultNode \
+ SEARCHLIB_CID(58)
+#define CID_search_expression_RelevanceNode SEARCHLIB_CID(59)
+#define CID_search_expression_InterpolatedLookup SEARCHLIB_CID(39)
+#define CID_search_expression_ArrayAtLookup SEARCHLIB_CID(38)
+#define CID_search_expression_ArrayOperationNode SEARCHLIB_CID(37)
+
+#define CID_search_expression_NegateFunctionNode SEARCHLIB_CID(60)
+#define CID_search_expression_AddFunctionNode SEARCHLIB_CID(61)
+#define CID_search_expression_MultiplyFunctionNode SEARCHLIB_CID(62)
+#define CID_search_expression_DivideFunctionNode SEARCHLIB_CID(63)
+#define CID_search_expression_ModuloFunctionNode SEARCHLIB_CID(64)
+#define CID_search_expression_MinFunctionNode SEARCHLIB_CID(65)
+#define CID_search_expression_MaxFunctionNode SEARCHLIB_CID(66)
+#define CID_search_expression_AndFunctionNode SEARCHLIB_CID(67)
+#define CID_search_expression_OrFunctionNode SEARCHLIB_CID(68)
+#define CID_search_expression_XorFunctionNode SEARCHLIB_CID(69)
+#define CID_search_expression_MD5BitFunctionNode SEARCHLIB_CID(70)
+#define CID_search_expression_XorBitFunctionNode SEARCHLIB_CID(71)
+#define CID_search_expression_CatFunctionNode SEARCHLIB_CID(72)
+#define CID_search_expression_GetDocIdNamespaceSpecificFunctionNode SEARCHLIB_CID(73)
+#define CID_search_expression_GetYMUMChecksumFunctionNode SEARCHLIB_CID(74)
+#define CID_search_expression_TimeStampFunctionNode SEARCHLIB_CID(75)
+#define CID_search_expression_RangeBucketPreDefFunctionNode SEARCHLIB_CID(76)
+#define CID_search_expression_FixedWidthBucketFunctionNode SEARCHLIB_CID(77)
+#define CID_search_expression_NumericFunctionNode SEARCHLIB_CID(78)
+#define CID_search_expression_ExpressionTree SEARCHLIB_CID(79)
+
+#define CID_search_aggregation_AggregationResult SEARCHLIB_CID(80)
+#define CID_search_aggregation_CountAggregationResult SEARCHLIB_CID(81)
+#define CID_search_aggregation_SumAggregationResult SEARCHLIB_CID(82)
+#define CID_search_aggregation_MaxAggregationResult SEARCHLIB_CID(83)
+#define CID_search_aggregation_MinAggregationResult SEARCHLIB_CID(84)
+#define CID_search_aggregation_AverageAggregationResult SEARCHLIB_CID(85)
+#define CID_search_aggregation_XorAggregationResult SEARCHLIB_CID(86)
+#define CID_search_aggregation_HitsAggregationResult SEARCHLIB_CID(87)
+#define CID_search_aggregation_ExpressionCountAggregationResult \
+ SEARCHLIB_CID(88)
+
+#define CID_search_aggregation_Group SEARCHLIB_CID(90)
+#define CID_search_aggregation_Grouping SEARCHLIB_CID(91)
+#define CID_search_aggregation_GroupingLevel SEARCHLIB_CID(93)
+#define CID_search_aggregation_Hit SEARCHLIB_CID(94)
+#define CID_search_aggregation_FS4Hit SEARCHLIB_CID(95)
+#define CID_search_aggregation_VdsHit SEARCHLIB_CID(96)
+#define CID_search_aggregation_HitList SEARCHLIB_CID(97)
+
+#define CID_search_expression_BucketResultNode SEARCHLIB_CID(100)
+#define CID_search_expression_IntegerBucketResultNode SEARCHLIB_CID(101)
+#define CID_search_expression_FloatBucketResultNode SEARCHLIB_CID(102)
+#define CID_search_expression_StringBucketResultNode SEARCHLIB_CID(103)
+#define CID_search_expression_Int8ResultNode SEARCHLIB_CID(104)
+#define CID_search_expression_Int16ResultNode SEARCHLIB_CID(105)
+#define CID_search_expression_Int32ResultNode SEARCHLIB_CID(106)
+#define CID_search_expression_Int64ResultNode SEARCHLIB_CID(107)
+
+#define CID_search_expression_ResultNodeVector SEARCHLIB_CID(108)
+#define CID_search_expression_IntegerResultNodeVector SEARCHLIB_CID(109)
+#define CID_search_expression_FloatResultNodeVector SEARCHLIB_CID(110)
+#define CID_search_expression_StringResultNodeVector SEARCHLIB_CID(111)
+#define CID_search_expression_IntegerBucketResultNodeVector SEARCHLIB_CID(112)
+#define CID_search_expression_FloatBucketResultNodeVector SEARCHLIB_CID(113)
+#define CID_search_expression_StringBucketResultNodeVector SEARCHLIB_CID(114)
+#define CID_search_expression_RawResultNodeVector SEARCHLIB_CID(115)
+#define CID_search_expression_Int8ResultNodeVector SEARCHLIB_CID(116)
+#define CID_search_expression_Int16ResultNodeVector SEARCHLIB_CID(117)
+#define CID_search_expression_Int32ResultNodeVector SEARCHLIB_CID(118)
+#define CID_search_expression_Int64ResultNodeVector SEARCHLIB_CID(119)
+#define CID_search_expression_DefaultValue SEARCHLIB_CID(120)
+#define CID_search_expression_SingleResultNode SEARCHLIB_CID(121)
+#define CID_search_expression_EnumResultNode SEARCHLIB_CID(122)
+#define CID_search_expression_EnumResultNodeVector SEARCHLIB_CID(123)
+#define CID_search_expression_PositiveInfinityResultNode SEARCHLIB_CID(124)
+#define CID_search_expression_RawBucketResultNode SEARCHLIB_CID(125)
+#define CID_search_expression_RawBucketResultNodeVector SEARCHLIB_CID(126)
+#define CID_search_expression_AttributeResult SEARCHLIB_CID(127)
+#define CID_search_expression_GeneralResultNodeVector SEARCHLIB_CID(128)
+#define CID_search_expression_EnumAttributeResult SEARCHLIB_CID(129)
+
+#define CID_search_expression_StrLenFunctionNode SEARCHLIB_CID(130)
+#define CID_search_expression_ToStringFunctionNode SEARCHLIB_CID(131)
+#define CID_search_expression_NumElemFunctionNode SEARCHLIB_CID(132)
+#define CID_search_expression_StrCatFunctionNode SEARCHLIB_CID(133)
+#define CID_search_expression_ToFloatFunctionNode SEARCHLIB_CID(134)
+#define CID_search_expression_ToIntFunctionNode SEARCHLIB_CID(135)
+#define CID_search_expression_MathFunctionNode SEARCHLIB_CID(136)
+#define CID_search_expression_SortFunctionNode SEARCHLIB_CID(137)
+#define CID_search_expression_ReverseFunctionNode SEARCHLIB_CID(138)
+#define CID_search_expression_ZCurveFunctionNode SEARCHLIB_CID(139)
+#define CID_search_expression_UcaFunctionNode SEARCHLIB_CID(140)
+#define CID_search_expression_ToRawFunctionNode SEARCHLIB_CID(141)
+#define CID_search_expression_AggregationRefNode SEARCHLIB_CID(142)
+#define CID_search_expression_NormalizeSubjectFunctionNode SEARCHLIB_CID(143)
+#define CID_search_expression_DebugWaitFunctionNode SEARCHLIB_CID(144)
+
+#define CID_search_QueryNode SEARCHLIB_CID(150)
+#define CID_search_Query SEARCHLIB_CID(151)
+#define CID_search_QueryTerm SEARCHLIB_CID(152)
+#define CID_search_QueryConnector SEARCHLIB_CID(153)
+#define CID_search_AndQueryNode SEARCHLIB_CID(154)
+#define CID_search_AndNotQueryNode SEARCHLIB_CID(155)
+#define CID_search_OrQueryNode SEARCHLIB_CID(156)
+#define CID_search_EquivQueryNode SEARCHLIB_CID(157)
+#define CID_search_PhraseQueryNode SEARCHLIB_CID(158)
+#define CID_search_NotQueryNode SEARCHLIB_CID(159)
+#define CID_search_NearQueryNode SEARCHLIB_CID(160)
+#define CID_search_ONearQueryNode SEARCHLIB_CID(161)
+#define CID_search_TrueNode SEARCHLIB_CID(162)
+
+#define CID_PredicateAttribute SEARCHLIB_CID(163)
+
+#define CID_search_NormalSketch SEARCHLIB_CID(170)
+#define CID_search_SparseSketch SEARCHLIB_CID(171)
diff --git a/searchlib/src/vespa/searchlib/common/idestructorcallback.h b/searchlib/src/vespa/searchlib/common/idestructorcallback.h
new file mode 100644
index 00000000000..bcc6a5a78bb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/idestructorcallback.h
@@ -0,0 +1,20 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+namespace search
+{
+
+/**
+ * Interface for class that performs a callback when instance is
+ * destroyed. Typically a shared pointer to an instance is passed
+ * around to multiple worker threads that performs portions of a
+ * larger task before dropping the shared pointer, triggering the
+ * callback when all worker threads have completed.
+ */
+class IDestructorCallback
+{
+public:
+ virtual ~IDestructorCallback() { }
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/idocumentmetastore.h b/searchlib/src/vespa/searchlib/common/idocumentmetastore.h
new file mode 100644
index 00000000000..82db8df4947
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/idocumentmetastore.h
@@ -0,0 +1,152 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "lid_usage_stats.h"
+#include <vespa/document/base/globalid.h>
+#include <vespa/document/bucket/bucketid.h>
+#include <persistence/spi/types.h>
+
+namespace search {
+
+/**
+ * Meta data for a single document.
+ **/
+struct DocumentMetaData {
+ typedef uint32_t DocId;
+ DocId lid;
+ storage::spi::Timestamp timestamp;
+ document::BucketId bucketId;
+ document::GlobalId gid;
+ bool removed;
+
+ typedef std::vector<DocumentMetaData> Vector;
+
+ DocumentMetaData()
+ : lid(0),
+ timestamp(0),
+ bucketId(),
+ gid(),
+ removed(false)
+ {
+ }
+
+ DocumentMetaData(DocId lid_,
+ storage::spi::Timestamp timestamp_,
+ document::BucketId bucketId_,
+ const document::GlobalId &gid_)
+ : lid(lid_),
+ timestamp(timestamp_),
+ bucketId(bucketId_),
+ gid(gid_),
+ removed(false)
+ {
+ }
+
+ DocumentMetaData(DocId lid_,
+ storage::spi::Timestamp timestamp_,
+ document::BucketId bucketId_,
+ const document::GlobalId &gid_,
+ bool removed_)
+ : lid(lid_),
+ timestamp(timestamp_),
+ bucketId(bucketId_),
+ gid(gid_),
+ removed(removed_)
+ {
+ }
+
+ bool valid() const {
+ return lid != 0 && timestamp != 0 && bucketId.isSet();
+ }
+};
+
+namespace queryeval {
+
+class Blueprint;
+
+}
+
+
+/**
+ * Read interface for a document meta store that provides mapping between
+ * global document id (gid) and local document id (lid) with additional
+ * meta data per document.
+ **/
+struct IDocumentMetaStore {
+ typedef uint32_t DocId;
+ typedef document::GlobalId GlobalId;
+ typedef document::BucketId BucketId;
+ typedef storage::spi::Timestamp Timestamp;
+
+ virtual ~IDocumentMetaStore() {}
+
+ /**
+ * Retrieves the gid associated with the given lid.
+ * Returns true if found, false otherwise.
+ **/
+ virtual bool getGid(DocId lid, GlobalId &gid) const = 0;
+
+ /**
+ * Retrieves the lid associated with the given gid.
+ * Returns true if found, false otherwise.
+ **/
+ virtual bool getLid(const GlobalId &gid, DocId &lid) const = 0;
+
+ /**
+ * Retrieves the meta data for the document with the given gid.
+ **/
+ virtual DocumentMetaData getMetaData(const GlobalId &gid) const = 0;
+
+ /**
+ * Retrieves meta data for all documents contained in the given bucket.
+ **/
+ virtual void getMetaData(const BucketId &bucketId, DocumentMetaData::Vector &result) const = 0;
+
+ /**
+ * Returns the lid following the largest lid used in the store.
+ *
+ * As long as the reader holds a read guard on the document meta
+ * store, we guarantee that the meta store info for lids that were
+ * valid when calling this method will remain valid while the
+ * guard is held, i.e. lids for newly removed documents are not
+ * reused while the read guard is held.
+ *
+ * Access to lids beyond the returned limit is not safe.
+ *
+ * The return value can be used as lid range for queries when
+ * attribute writer threads are synced, and is propagated as such
+ * when visibility delay is nonzero and forceCommit() method is
+ * called regularly on feed views, cf. proton::FastAccessFeedView.
+ *
+ * In the future, this method might be renamed to getReaderDocIdLimit().
+ **/
+ virtual DocId getCommittedDocIdLimit() const = 0;
+
+ /**
+ * Returns the number of used lids in this store.
+ */
+ virtual DocId getNumUsedLids() const = 0;
+
+ /**
+ * Returns the number of active lids in this store.
+ * This should be <= getNumUsedLids().
+ * Active lids correspond to documents in active buckets.
+ */
+ virtual DocId getNumActiveLids() const = 0;
+
+ /**
+ * Returns stats on the usage and availability of lids in this store.
+ */
+ virtual LidUsageStats getLidUsageStats() const = 0;
+
+ /**
+ * Creates a black list blueprint that returns a search iterator
+ * that gives hits for all documents that should not be visible.
+ **/
+ virtual std::unique_ptr<queryeval::Blueprint> createBlackListBlueprint() const = 0;
+};
+
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/common/indexmetainfo.cpp b/searchlib/src/vespa/searchlib/common/indexmetainfo.cpp
new file mode 100644
index 00000000000..45d7f0ac82d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/indexmetainfo.cpp
@@ -0,0 +1,354 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".indexmetainfo");
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/guard.h>
+#include <vespa/searchlib/common/indexmetainfo.h>
+#include <algorithm>
+
+namespace {
+
+class Parser {
+private:
+ vespalib::string _name;
+ vespalib::FilePointer _file;
+ uint32_t _line;
+ char _buf[2048];
+ bool _error;
+ vespalib::string _lastKey;
+ vespalib::string _lastValue;
+ uint32_t _lastIdx;
+ bool _matched;
+
+public:
+ Parser(const vespalib::string &name)
+ : _name(name),
+ _file(fopen(name.c_str(), "r")),
+ _line(0),
+ _buf(),
+ _error(false),
+ _lastKey(),
+ _lastValue(),
+ _lastIdx(0),
+ _matched(true)
+ {
+ _error = !_file.valid();
+ }
+ bool openFailed() {
+ LOG(warning, "could not open file for reading: %s", _name.c_str());
+ _error = true;
+ return false;
+ }
+ bool illegalLine() {
+ LOG(warning, "%s:%d: illegal line: %s",
+ _name.c_str(), _line, _buf);
+ _error = true;
+ return false;
+ }
+ bool illegalArrayKey() {
+ LOG(warning, "%s:%d: illegal array key '%s'(value='%s')",
+ _name.c_str(), _line, _lastKey.c_str(), _lastValue.c_str());
+ _error = true;
+ return false;
+ }
+ bool illegalValue() {
+ LOG(warning, "%s:%d: illegal value for '%s': %s",
+ _name.c_str(), _line, _lastKey.c_str(),
+ _lastValue.c_str());
+ _error = true;
+ return false;
+ }
+ bool unknown() {
+ LOG(warning, "%s:%d: unknown key '%s'(value='%s')",
+ _name.c_str(), _line, _lastKey.c_str(), _lastValue.c_str());
+ _error = true;
+ return false;
+ }
+ bool status() const { return !_error; }
+ bool next() {
+ if (_error) {
+ return false;
+ }
+ if (!_matched) {
+ return unknown();
+ }
+ if (!_file.valid()) {
+ return openFailed();
+ }
+ if (fgets(_buf, sizeof(_buf), _file) == NULL) {
+ return false; // EOF
+ }
+ ++_line;
+ uint32_t len = strlen(_buf);
+ if (len > 0 && _buf[len - 1] == '\n') {
+ _buf[--len] = '\0';
+ }
+ char *split = strchr(_buf, '=');
+ if (split == NULL || (split - _buf) == 0) {
+ return illegalLine();
+ }
+ _lastKey = vespalib::string(_buf, split - _buf);
+ _lastValue = vespalib::string(split + 1, (_buf + len) - (split + 1));
+ _matched = false;
+ return true;
+ }
+ const vespalib::string key() const { return _lastKey; }
+ const vespalib::string value() const { return _lastValue; }
+ void parseBool(const vespalib::string &k, bool &v) {
+ if (!_matched && !_error && _lastKey == k) {
+ _matched = true;
+ if (_lastValue == "true") {
+ v = true;
+ } else if (_lastValue == "false") {
+ v = false;
+ } else {
+ illegalValue();
+ }
+ }
+ }
+ void parseString(const vespalib::string &k, vespalib::string &v) {
+ if (!_matched && !_error && _lastKey == k) {
+ _matched = true;
+ v = _lastValue;
+ }
+ }
+ void parseInt64(const vespalib::string &k, uint64_t &v) {
+ if (!_matched && !_error && _lastKey == k) {
+ _matched = true;
+ char *end = NULL;
+ uint64_t val = strtoull(_lastValue.c_str(), &end, 10);
+ if (end == NULL || *end != '\0' ||
+ val == static_cast<uint64_t>(-1)) {
+ illegalValue();
+ return;
+ }
+ v = val;
+ }
+ }
+ bool parseArray(const vespalib::string &name, uint32_t size) {
+ if (_matched || _error
+ || _lastKey.length() < name.length() + 1
+ || strncmp(_lastKey.c_str(), name.c_str(), name.length()) != 0
+ || _lastKey[name.length()] != '.')
+ {
+ return false;
+ }
+ vespalib::string::size_type dot2 = _lastKey.find('.', name.length() + 1);
+ if (dot2 == vespalib::string::npos) {
+ return illegalArrayKey();
+ }
+ char *end = NULL;
+ const char *pt = _lastKey.c_str() + name.length() + 1;
+ uint32_t val = strtoul(pt, &end, 10);
+ if (end == NULL || end == pt || *end != '.'
+ || val > size || size > val + 1)
+ {
+ return illegalArrayKey();
+ }
+ _lastIdx = val;
+ _lastKey = _lastKey.substr(dot2 + 1);
+ return true;
+ }
+ uint32_t idx() const { return _lastIdx; }
+};
+
+} // namespace <unnamed>
+
+namespace search {
+
+vespalib::string
+IndexMetaInfo::makeFileName(const vespalib::string &baseName)
+{
+ if (_path.length() == 0 || _path == ".") {
+ return baseName;
+ } else if (_path[_path.length() - 1] == '/') {
+ return vespalib::make_string("%s%s", _path.c_str(), baseName.c_str());
+ }
+ return vespalib::make_string("%s/%s", _path.c_str(), baseName.c_str());
+}
+
+
+IndexMetaInfo::Snapshot &
+IndexMetaInfo::getCreateSnapshot(uint32_t idx)
+{
+ while (idx >= _snapshots.size()) {
+ _snapshots.push_back(Snapshot());
+ }
+ return _snapshots[idx];
+}
+
+
+IndexMetaInfo::SnapshotList::iterator
+IndexMetaInfo::findSnapshot(uint64_t syncToken)
+{
+ for (SnapItr it = _snapshots.begin(); it != _snapshots.end(); ++it) {
+ if (it->syncToken == syncToken) {
+ return it;
+ }
+ }
+ return _snapshots.end();
+}
+
+
+IndexMetaInfo::IndexMetaInfo(const vespalib::string &path)
+ : _path(path),
+ _snapshots()
+{
+}
+
+
+IndexMetaInfo::Snapshot
+IndexMetaInfo::getBestSnapshot() const
+{
+ int idx = _snapshots.size() - 1;
+ while (idx >= 0 && !_snapshots[idx].valid) {
+ --idx;
+ }
+ if (idx >= 0) {
+ return _snapshots[idx];
+ } else {
+ return Snapshot();
+ }
+}
+
+
+IndexMetaInfo::Snapshot
+IndexMetaInfo::getSnapshot(uint64_t syncToken) const
+{
+ IndexMetaInfo *self = const_cast<IndexMetaInfo *>(this);
+ SnapItr itr = self->findSnapshot(syncToken);
+ if (itr == _snapshots.end()) {
+ return Snapshot();
+ }
+ return *itr;
+}
+
+
+bool
+IndexMetaInfo::addSnapshot(const Snapshot &snap)
+{
+ if (snap.dirName.empty()
+ || findSnapshot(snap.syncToken) != _snapshots.end())
+ {
+ return false;
+ }
+ assert(snap.syncToken != uint64_t(-1));
+ _snapshots.push_back(snap);
+ std::sort(_snapshots.begin(), _snapshots.end());
+ return true;
+}
+
+
+bool
+IndexMetaInfo::removeSnapshot(uint64_t syncToken)
+{
+ SnapItr itr = findSnapshot(syncToken);
+ if (itr == _snapshots.end()) {
+ return false;
+ }
+ _snapshots.erase(itr);
+ return true;
+}
+
+
+bool
+IndexMetaInfo::validateSnapshot(uint64_t syncToken)
+{
+ SnapItr itr = findSnapshot(syncToken);
+ if (itr == _snapshots.end()) {
+ return false;
+ }
+ itr->valid = true;
+ return true;
+}
+
+
+bool
+IndexMetaInfo::invalidateSnapshot(uint64_t syncToken)
+{
+ SnapItr itr = findSnapshot(syncToken);
+ if (itr == _snapshots.end()) {
+ return false;
+ }
+ itr->valid = false;
+ return true;
+}
+
+
+void
+IndexMetaInfo::clear()
+{
+ _snapshots.resize(0);
+}
+
+
+bool
+IndexMetaInfo::load(const vespalib::string &baseName)
+{
+ clear();
+ Parser parser(makeFileName(baseName));
+ while (parser.status() && parser.next()) {
+ if (parser.parseArray("snapshot", _snapshots.size())) {
+ Snapshot &snap = getCreateSnapshot(parser.idx());
+ parser.parseBool("valid", snap.valid);
+ parser.parseInt64("syncToken", snap.syncToken);
+ parser.parseString("dirName", snap.dirName);
+ assert(snap.syncToken != static_cast<uint64_t>(-1));
+ }
+ }
+ std::sort(_snapshots.begin(), _snapshots.end());
+ return parser.status();
+}
+
+
+bool
+IndexMetaInfo::save(const vespalib::string &baseName)
+{
+ vespalib::string fileName = makeFileName(baseName);
+ vespalib::string newName = fileName + ".new";
+ vespalib::FilePointer f(fopen(newName.c_str(), "w"));
+ if (!f.valid()) {
+ LOG(warning, "could not open file for writing: %s", newName.c_str());
+ return false;
+ }
+ for (uint32_t i = 0; i < _snapshots.size(); ++i) {
+ Snapshot &snap = _snapshots[i];
+ fprintf(f, "snapshot.%d.valid=%s\n", i, snap.valid? "true" : "false");
+ fprintf(f, "snapshot.%d.syncToken=%" PRIu64 "\n", i, snap.syncToken);
+ fprintf(f, "snapshot.%d.dirName=%s\n", i, snap.dirName.c_str());
+ }
+ if (ferror(f) != 0) {
+ LOG(error,
+ "Could not write to file %s",
+ newName.c_str());
+ return false;
+ }
+ if (fflush(f) != 0) {
+ LOG(error,
+ "Could not flush file %s",
+ newName.c_str());
+ return false;
+ }
+ if (fsync(fileno(f)) != 0) {
+ LOG(error,
+ "Could not fsync file %s",
+ newName.c_str());
+ return false;
+ }
+ if (fclose(f.release()) != 0) {
+ LOG(error,
+ "Could not close file %s",
+ newName.c_str());
+ return false;
+ }
+ if (rename(newName.c_str(), fileName.c_str()) != 0) {
+ LOG(warning, "could not rename: %s->%s",
+ newName.c_str(), fileName.c_str());
+ return false;
+ }
+ return true;
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/indexmetainfo.h b/searchlib/src/vespa/searchlib/common/indexmetainfo.h
new file mode 100644
index 00000000000..823f69e7a94
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/indexmetainfo.h
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vector>
+
+namespace search {
+
+class IndexMetaInfo
+{
+public:
+ struct Snapshot
+ {
+ bool valid;
+ uint64_t syncToken;
+ vespalib::string dirName;
+ Snapshot() : valid(false), syncToken(0), dirName() {}
+ Snapshot(bool valid_, uint64_t syncToken_, const vespalib::string &dirName_)
+ : valid(valid_), syncToken(syncToken_), dirName(dirName_) {}
+ bool operator==(const Snapshot &rhs) const {
+ return (valid == rhs.valid
+ && syncToken == rhs.syncToken
+ && dirName == rhs.dirName);
+ }
+ bool operator<(const Snapshot &rhs) const {
+ return syncToken < rhs.syncToken;
+ }
+ };
+ typedef std::vector<Snapshot> SnapshotList;
+ typedef SnapshotList::iterator SnapItr;
+
+private:
+ vespalib::string _path;
+ SnapshotList _snapshots;
+
+ vespalib::string makeFileName(const vespalib::string &baseName);
+ Snapshot &getCreateSnapshot(uint32_t idx);
+
+ SnapItr findSnapshot(uint64_t syncToken);
+
+public:
+ IndexMetaInfo(const vespalib::string &path);
+ vespalib::string getPath() const { return _path; }
+ void setPath(const vespalib::string &path) { _path = path; }
+
+ const SnapshotList &snapshots() const { return _snapshots; }
+
+ Snapshot getSnapshot(uint64_t syncToken) const;
+ Snapshot getBestSnapshot() const;
+ bool addSnapshot(const Snapshot &snap);
+ bool removeSnapshot(uint64_t syncToken);
+ bool validateSnapshot(uint64_t syncToken);
+ bool invalidateSnapshot(uint64_t syncToken);
+
+ void clear();
+ bool load(const vespalib::string &baseName = "meta-info.txt");
+ bool save(const vespalib::string &baseName = "meta-info.txt");
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h b/searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h
new file mode 100644
index 00000000000..f978cb30ff5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h
@@ -0,0 +1,66 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/util/executor.h>
+#include <vespa/vespalib/stllike/hash_fun.h>
+#include "lambdatask.h"
+
+namespace search
+{
+
+/**
+ * Interface class to run multiple tasks in parallel, but tasks with same
+ * id has to be run in sequence.
+ */
+class ISequencedTaskExecutor
+{
+public:
+ virtual ~ISequencedTaskExecutor() { }
+
+ /**
+ * Schedule a task to run after all previously scheduled tasks with
+ * same id. All tasks must be scheduled from same thread.
+ *
+ * @param id task id.
+ * @param task unique pointer to the task to be executed
+ */
+ virtual void executeTask(uint64_t id,
+ vespalib::Executor::Task::UP task) = 0;
+
+ /**
+ * Wait for all scheduled tasks to complete.
+ */
+ virtual void sync() = 0;
+
+ /**
+ * Wrap lambda function into a task and schedule it to be run.
+ * Caller must ensure that pointers and references are valid and
+ * call sync before tearing down pointed to/referenced data.
+ * All tasks must be scheduled from same thread.
+ *
+ * @param id task id.
+ * @param function function to be wrapped in a task and later executed
+ */
+ template <class FunctionType>
+ inline void execute(uint64_t id, FunctionType &&function) {
+ executeTask(id, makeLambdaTask(std::forward<FunctionType>(function)));
+ }
+
+ /**
+ * Wrap lambda function into a task and schedule it to be run.
+ * Caller must ensure that pointers and references are valid and
+ * call sync before tearing down pointed to/referenced data.
+ * All tasks must be scheduled from same thread.
+ *
+ * @param id task id.
+ * @param function function to be wrapped in a task and later executed
+ */
+ template <class FunctionType>
+ inline void execute(const vespalib::stringref id, FunctionType &&function) {
+ vespalib::hash<vespalib::stringref> hashfun;
+ executeTask(hashfun(id),
+ makeLambdaTask(std::forward<FunctionType>(function)));
+ }
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/lambdatask.h b/searchlib/src/vespa/searchlib/common/lambdatask.h
new file mode 100644
index 00000000000..d03d23ba3dd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/lambdatask.h
@@ -0,0 +1,28 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/util/executor.h>
+
+namespace search
+{
+
+template <class FunctionType>
+class LambdaTask : public vespalib::Executor::Task {
+ FunctionType _func;
+
+public:
+ LambdaTask(const FunctionType &func) : _func(func) {}
+ LambdaTask(FunctionType &&func) : _func(std::move(func)) {}
+ virtual void run() { _func(); }
+};
+
+template <class FunctionType>
+inline vespalib::Executor::Task::UP
+makeLambdaTask(FunctionType &&function)
+{
+ return std::make_unique<LambdaTask<std::decay_t<FunctionType>>>
+ (std::forward<FunctionType>(function));
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/lid_usage_stats.h b/searchlib/src/vespa/searchlib/common/lid_usage_stats.h
new file mode 100644
index 00000000000..ced4bc36c8d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/lid_usage_stats.h
@@ -0,0 +1,66 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/document/bucket/bucketid.h>
+#include <persistence/spi/types.h>
+
+namespace search {
+
+/**
+ * Stats on the usage and availability of lids in a document meta store.
+ */
+class LidUsageStats
+{
+private:
+ uint32_t _lidLimit;
+ uint32_t _usedLids;
+ uint32_t _lowestFreeLid;
+ uint32_t _highestUsedLid;
+
+public:
+ LidUsageStats()
+ : _lidLimit(0),
+ _usedLids(0),
+ _lowestFreeLid(0),
+ _highestUsedLid(0)
+ {
+ }
+ LidUsageStats(uint32_t lidLimit,
+ uint32_t usedLids,
+ uint32_t lowestFreeLid,
+ uint32_t highestUsedLid)
+ : _lidLimit(lidLimit),
+ _usedLids(usedLids),
+ _lowestFreeLid(lowestFreeLid),
+ _highestUsedLid(highestUsedLid)
+ {
+ }
+ uint32_t getLidLimit() const { return _lidLimit; }
+ uint32_t getUsedLids() const { return _usedLids; }
+ uint32_t getLowestFreeLid() const { return _lowestFreeLid; }
+ uint32_t getHighestUsedLid() const { return _highestUsedLid; }
+ uint32_t getLidBloat() const {
+ // Account for reserved lid 0
+ int32_t lidBloat = getLidLimit() - getUsedLids() - 1;
+ if (lidBloat < 0) {
+ return 0u;
+ }
+ return lidBloat;
+ }
+ double getLidBloatFactor() const {
+ return (double)getLidBloat() / (double)getLidLimit();
+ }
+ double getLidFragmentationFactor() const {
+ int32_t freeLids = getHighestUsedLid() - getUsedLids();
+ if (freeLids < 0) {
+ return 0;
+ }
+ if (getHighestUsedLid() == 0) {
+ return 0;
+ }
+ return (double)freeLids / (double)getHighestUsedLid();
+ }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/location.cpp b/searchlib/src/vespa/searchlib/common/location.cpp
new file mode 100644
index 00000000000..5374870773e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/location.cpp
@@ -0,0 +1,205 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1999-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/common/location.h>
+
+namespace search {
+namespace common {
+
+Location::Location(void) :
+ _zBoundingBox(0,0,0,0),
+ _x(0),
+ _y(0),
+ _xAspect(0u),
+ _radius(std::numeric_limits<uint32_t>::max()),
+ _minx(std::numeric_limits<int32_t>::min()),
+ _maxx(std::numeric_limits<int32_t>::max()),
+ _miny(std::numeric_limits<int32_t>::min()),
+ _maxy(std::numeric_limits<int32_t>::max()),
+ _rankOnDistance(false),
+ _pruneOnDistance(false),
+ _parseError(NULL)
+{
+}
+
+
+bool
+Location::getDimensionality(const char **pp)
+{
+ if (**pp == '2') {
+ (*pp)++;
+ if (**pp != ',') {
+ _parseError = "Missing comma after 2D dimensionality";
+ return false;
+ }
+ (*pp)++;
+ return true;
+ }
+ _parseError = "Bad dimensionality spec, not 2D";
+ return false;
+}
+
+
+int
+Location::getInt(const char **pp)
+{
+ const char *p = *pp;
+ int val;
+ bool isminus;
+
+ val = 0;
+ isminus = false;
+ if (*p == '-') {
+ isminus = true;
+ p++;
+ }
+ while (*p >= '0' && *p <= '9')
+ val = val * 10 + *p++ - '0';
+ *pp = p;
+ return isminus ? - val : val;
+}
+
+bool Location::parse(const vespalib::string &locStr)
+{
+ bool hadCutoff = false;
+ bool hadLoc = false;
+ const char *p = locStr.c_str();
+ while (*p != '\0') {
+ if (*p == '[') {
+ p++;
+ if (hadCutoff) {
+ _parseError = "Duplicate square cutoff";
+ return false;
+ }
+ hadCutoff = true;
+ if (!getDimensionality(&p))
+ return false;
+ _minx = getInt(&p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after minx";
+ return false;
+ }
+ p++;
+ _miny = getInt(&p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after miny";
+ return false;
+ }
+ p++;
+ _maxx = getInt(&p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after maxx";
+ return false;
+ }
+ p++;
+ _maxy = getInt(&p);
+ if (*p != ']') {
+ _parseError = "Missing ']' after maxy";
+ return false;
+ }
+ p++;
+ } else if (*p == '(') {
+ p++;
+ if (hadLoc) {
+ _parseError = "Duplicate location";
+ return false;
+ }
+ hadLoc = true;
+ if (!getDimensionality(&p))
+ return false;
+ _x = getInt(&p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after x position";
+ return false;
+ }
+ p++;
+ _y = getInt(&p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after y position";
+ return false;
+ }
+ p++;
+ _radius = getInt(&p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after radius";
+ return false;
+ }
+ p++;
+ /* _tableID = */ (void) getInt(&p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after tableID";
+ return false;
+ }
+ p++;
+ /* _rankMultiplier = */ (void) getInt(&p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after rank multiplier";
+ return false;
+ }
+ p++;
+ /* _rankOnlyOnDistance = */ (void) (getInt(&p) != 0);
+ if (*p == ',') {
+ p++;
+ _xAspect = getInt(&p);
+ if (*p != ')') {
+ _parseError = "Missing ')' after xAspect";
+ return false;
+ }
+ } else {
+ if (*p != ')') {
+ _parseError = "Missing ')' after rankOnlyOnDistance flag";
+ return false;
+ }
+ }
+ p++;
+ } else if (*p == ' ')
+ p++;
+ else {
+ _parseError = "Unexpected char in location spec";
+ return false;
+ }
+ }
+
+ if (hadLoc) {
+ _rankOnDistance = true;
+ uint32_t maxdx = _radius;
+ if (_xAspect != 0) {
+ uint64_t maxdx2 = ((static_cast<uint64_t>(_radius) << 32) + 0xffffffffu) /
+ _xAspect;
+ if (maxdx2 >= 0xffffffffu)
+ maxdx = 0xffffffffu;
+ else
+ maxdx = static_cast<uint32_t>(maxdx2);
+ }
+ if (static_cast<int32_t>(_x - maxdx) > _minx &&
+ static_cast<int64_t>(_x) - static_cast<int64_t>(maxdx) >
+ static_cast<int64_t>(_minx))
+ _minx = _x - maxdx;
+ if (static_cast<int32_t>(_x + maxdx) < _maxx &&
+ static_cast<int64_t>(_x) + static_cast<int64_t>(maxdx) <
+ static_cast<int64_t>(_maxx))
+ _maxx = _x + maxdx;
+ if (static_cast<int32_t>(_y - _radius) > _miny &&
+ static_cast<int64_t>(_y) - static_cast<int64_t>(_radius) >
+ static_cast<int64_t>(_miny))
+ _miny = _y - _radius;
+ if (static_cast<int32_t>(_y + _radius) < _maxy &&
+ static_cast<int64_t>(_y) + static_cast<int64_t>(_radius) <
+ static_cast<int64_t>(_maxy))
+ _maxy = _y + _radius;
+ }
+ if (_minx != std::numeric_limits<int32_t>::min() ||
+ _maxx != std::numeric_limits<int32_t>::max() ||
+ _miny != std::numeric_limits<int32_t>::min() ||
+ _maxy != std::numeric_limits<int32_t>::max())
+ {
+ _pruneOnDistance = true;
+ }
+ _zBoundingBox = vespalib::geo::ZCurve::BoundingBox(_minx, _maxx, _miny, _maxy);
+
+ return true;
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/common/location.h b/searchlib/src/vespa/searchlib/common/location.h
new file mode 100644
index 00000000000..9faa42d0ca2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/location.h
@@ -0,0 +1,56 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2004 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/vespalib/geo/zcurve.h>
+#include "documentlocations.h"
+
+namespace search {
+namespace common {
+
+class Location : public DocumentLocations
+{
+private:
+ static int getInt(const char **pp);
+ bool getDimensionality(const char **pp);
+
+public:
+ Location(void);
+ bool getRankOnDistance() const { return _rankOnDistance; }
+ bool getPruneOnDistance() const { return _pruneOnDistance; }
+ uint32_t getXAspect() const { return _xAspect; }
+ int32_t getX() const { return _x; }
+ int32_t getY() const { return _y; }
+ uint32_t getRadius() const { return _radius; }
+ const char * getParseError() const { return _parseError; }
+ int32_t getMinX() const { return _minx; }
+ int32_t getMinY() const { return _miny; }
+ int32_t getMaxX() const { return _maxx; }
+ int32_t getMaxY() const { return _maxy; }
+ bool getzFailBoundingBoxTest(int64_t docxy) const {
+ return _zBoundingBox.getzFailBoundingBoxTest(docxy);
+ }
+
+ bool parse(const vespalib::string &locStr);
+
+private:
+ vespalib::geo::ZCurve::BoundingBox _zBoundingBox;
+ int32_t _x; /* Query X position */
+ int32_t _y; /* Query Y position */
+ uint32_t _xAspect; /* X distance multiplier fraction */
+ uint32_t _radius; /* Radius for euclidian distance */
+ int32_t _minx; /* Min X coordinate */
+ int32_t _maxx; /* Max X coordinate */
+ int32_t _miny; /* Min Y coordinate */
+ int32_t _maxy; /* Max Y coordinate */
+
+ bool _rankOnDistance;
+ bool _pruneOnDistance;
+ const char *_parseError;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/common/locationiterators.cpp b/searchlib/src/vespa/searchlib/common/locationiterators.cpp
new file mode 100644
index 00000000000..ba959114c77
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/locationiterators.cpp
@@ -0,0 +1,121 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".locationiterators");
+#include <vespa/vespalib/geo/zcurve.h>
+
+#include <vespa/searchlib/bitcompression/compression.h>
+
+#include "locationiterators.h"
+
+using namespace search::common;
+
+class FastS_2DZLocationIterator : public search::queryeval::SearchIterator, public vespalib::noncopyable
+{
+private:
+ const unsigned int _numDocs;
+ const bool _strict;
+ const uint64_t _radius2;
+ const Location & _location;
+ std::vector<search::AttributeVector::largeint_t> _pos;
+
+ virtual void doSeek(uint32_t docId);
+ virtual void doUnpack(uint32_t docId);
+public:
+ FastS_2DZLocationIterator(unsigned int numDocs,
+ bool strict,
+ const Location & location);
+
+ virtual ~FastS_2DZLocationIterator(void);
+};
+
+
+FastS_2DZLocationIterator::
+FastS_2DZLocationIterator(unsigned int numDocs,
+ bool strict,
+ const Location & location)
+ : SearchIterator(),
+ _numDocs(numDocs),
+ _strict(strict),
+ _radius2(static_cast<uint64_t>(location.getRadius()) * location.getRadius()),
+ _location(location),
+ _pos()
+{
+ _pos.resize(1); //Need at least 1 entry as the singlevalue attributes does not honour given size.
+};
+
+
+FastS_2DZLocationIterator::~FastS_2DZLocationIterator(void)
+{
+};
+
+
+void
+FastS_2DZLocationIterator::doSeek(uint32_t docId)
+{
+ if (__builtin_expect(docId >= _numDocs, false)) {
+ setAtEnd();
+ return;
+ }
+
+ const Location &location = _location;
+ std::vector<search::AttributeVector::largeint_t> &pos = _pos;
+
+ for (;;) {
+ uint32_t numValues =
+ location.getVec()->get(docId, &pos[0], pos.size());
+ if (numValues > pos.size()) {
+ pos.resize(numValues);
+ numValues = location.getVec()->get(docId, &pos[0], pos.size());
+ }
+ for (uint32_t i = 0; i < numValues; i++) {
+ int64_t docxy(pos[i]);
+ if ( ! location.getzFailBoundingBoxTest(docxy)) {
+ int32_t docx = 0;
+ int32_t docy = 0;
+ vespalib::geo::ZCurve::decode(docxy, &docx, &docy);
+ uint32_t dx = (location.getX() > docx)
+ ? location.getX() - docx
+ : docx - location.getX();
+ if (location.getXAspect() != 0)
+ dx = ((uint64_t) dx * location.getXAspect()) >> 32;
+
+ uint32_t dy = (location.getY() > docy)
+ ? location.getY() - docy
+ : docy - location.getY();
+ uint64_t dist2 = (uint64_t) dx * dx + (uint64_t) dy * dy;
+ if (dist2 <= _radius2) {
+ setDocId(docId);
+ return;
+ }
+ }
+ }
+
+ if (__builtin_expect(docId + 1 >= _numDocs, false)) {
+ setAtEnd();
+ return;
+ }
+
+ if (!_strict) {
+ return;
+ }
+ docId++;
+ }
+}
+
+
+void
+FastS_2DZLocationIterator::doUnpack(uint32_t docId)
+{
+ (void) docId;
+}
+
+
+search::queryeval::SearchIterator *
+FastS_AllocLocationIterator(unsigned int numDocs,
+ bool strict,
+ const Location & location)
+{
+ return new FastS_2DZLocationIterator(numDocs, strict, location);
+}
diff --git a/searchlib/src/vespa/searchlib/common/locationiterators.h b/searchlib/src/vespa/searchlib/common/locationiterators.h
new file mode 100644
index 00000000000..d55cc2ff16e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/locationiterators.h
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/queryeval/iterators.h>
+#include <vespa/searchlib/common/location.h>
+
+search::queryeval::SearchIterator *
+FastS_AllocLocationIterator(unsigned int numDocs,
+ bool strict,
+ const search::common::Location & location);
+
diff --git a/searchlib/src/vespa/searchlib/common/mapnames.cpp b/searchlib/src/vespa/searchlib/common/mapnames.cpp
new file mode 100644
index 00000000000..2597ae1c6fb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/mapnames.cpp
@@ -0,0 +1,14 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "mapnames.h"
+
+namespace search {
+
+const vespalib::string MapNames::RANK("rank");
+const vespalib::string MapNames::FEATURE("feature");
+const vespalib::string MapNames::HIGHLIGHTTERMS("highlightterms");
+const vespalib::string MapNames::MATCH("match");
+const vespalib::string MapNames::CACHES("caches");
+const vespalib::string MapNames::MODEL("model");
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/mapnames.h b/searchlib/src/vespa/searchlib/common/mapnames.h
new file mode 100644
index 00000000000..fa8d7f97578
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/mapnames.h
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+
+/**
+ * A simple wrapper class for the named maps of properties.
+ **/
+struct MapNames
+{
+ /** name of rank feature property collection **/
+ static const vespalib::string RANK;
+
+ /** name of feature override property collection **/
+ static const vespalib::string FEATURE;
+
+ /** name of highlightterms property collection **/
+ static const vespalib::string HIGHLIGHTTERMS;
+
+ /** name of match property collection **/
+ static const vespalib::string MATCH;
+
+ /** name of cache property collection **/
+ static const vespalib::string CACHES;
+
+ /** name of model property collection **/
+ static const vespalib::string MODEL;
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/packets.cpp b/searchlib/src/vespa/searchlib/common/packets.cpp
new file mode 100644
index 00000000000..769cbbeeed4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/packets.cpp
@@ -0,0 +1,2198 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+
+#include <vespa/document/util/compressionconfig.h>
+#include <vespa/document/util/compressor.h>
+#include <vespa/searchlib/common/mapnames.h>
+#include <vespa/searchlib/common/packets.h>
+#include <vespa/searchlib/common/sortdata.h>
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/util/buffer.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/data/slime/slime.h>
+
+LOG_SETUP(".searchlib.common.fs4packets");
+
+using document::CompressionConfig;
+using vespalib::ConstBufferRef;
+using vespalib::make_string;
+using vespalib::stringref;
+
+namespace search {
+namespace fs4transport {
+
+/**
+ * Persistent packet streamer.
+ **/
+FS4PersistentPacketStreamer FS4PersistentPacketStreamer::
+Instance(FS4PacketFactory::CreateFS4Packet);
+
+//============================================================
+
+bool
+FS4PersistentPacketStreamer::HasChannelID(uint32_t pcode)
+{
+ switch(pcode & PCODE_MASK) {
+ case search::fs4transport::PCODE_EOL:
+ case search::fs4transport::PCODE_QUERYRESULT:
+ case search::fs4transport::PCODE_ERROR:
+ case search::fs4transport::PCODE_GETDOCSUMS:
+ case search::fs4transport::PCODE_DOCSUM:
+ case search::fs4transport::PCODE_MLD_QUERYRESULT:
+ case search::fs4transport::PCODE_MLD_GETDOCSUMS:
+ case search::fs4transport::PCODE_PARSEDQUERY2:
+ case search::fs4transport::PCODE_QUERYRESULTX:
+ case search::fs4transport::PCODE_QUERYX:
+ case search::fs4transport::PCODE_GETDOCSUMSX:
+ case search::fs4transport::PCODE_TRACEREPLY:
+ return true;
+ default:
+ return false;
+ }
+}
+
+FS4PersistentPacketStreamer::
+FS4PersistentPacketStreamer(FS4PacketFactory::CreatePacket_t cp)
+ : _compressionLimit(0),
+ _compressionLevel(9),
+ _compressionType(CompressionConfig::LZ4),
+ _conservative(false),
+ _createPacket(cp) {
+}
+
+
+bool
+FS4PersistentPacketStreamer::GetPacketInfo(FNET_DataBuffer *src,
+ uint32_t *plen, uint32_t *pcode,
+ uint32_t *chid, bool *broken)
+{
+ uint32_t tmpVal;
+ bool hasCHID;
+
+ if (src->GetDataLen() < 2 * sizeof(uint32_t) ||
+ ((hasCHID = HasChannelID(src->PeekInt32(sizeof(uint32_t)))) &&
+ src->GetDataLen() < 3 * sizeof(uint32_t)))
+ return false;
+
+ if (hasCHID) {
+ tmpVal = src->ReadInt32();
+ if (tmpVal < 2 * sizeof(uint32_t)) {
+ // This is not a valid packet length. We might
+ // be out of sync.
+ *broken = _conservative;
+ if (*broken) {
+ LOG(warning, "Out of sync! Invalid packet length %u\n", tmpVal);
+ }
+ return false;
+ } else {
+ *plen = tmpVal - 2 * sizeof(uint32_t);
+ }
+ tmpVal = src->ReadInt32();
+ if (!ValidPCode(tmpVal)) {
+ // Out of sync?
+ *broken = _conservative;
+ if (*broken) {
+ LOG(warning, "Out of sync! Invalid pcode %u (%u)\n", tmpVal, *plen);
+ }
+ return false;
+ } else {
+ *pcode = tmpVal;
+ }
+ *chid = src->ReadInt32();
+ } else {
+ tmpVal = src->ReadInt32();
+ if (tmpVal < sizeof(uint32_t)) {
+ // This is not a valid packet length. We might
+ // be out of sync.
+ *broken = _conservative;
+ if (*broken) {
+ LOG(warning, "Out of sync! Invalid length (noch) %u\n", tmpVal);
+ }
+ return false;
+ } else {
+ *plen = tmpVal - sizeof(uint32_t);
+ }
+ tmpVal = src->ReadInt32();
+ if (!ValidPCode(tmpVal)) {
+ // Out of sync?
+ *broken = _conservative;
+ if (*broken) {
+ LOG(warning, "Out of sync! Invalid pcode (noch) %u (%u)\n", tmpVal, *plen);
+ }
+ return false;
+ } else {
+ *pcode = tmpVal;
+ }
+ *chid = FNET_NOID;
+ }
+ return true;
+}
+
+namespace {
+void decodePacket(FNET_Packet *&packet, FNET_DataBuffer &buf, uint32_t size,
+ uint32_t pcode) {
+ try {
+ if (!packet->Decode(&buf, size)) {
+ LOG(error, "could not decode packet (pcode=%u); "
+ "this could be caused by a protocol and/or "
+ "version incompatibility\n", pcode);
+ packet->Free();
+ packet = NULL;
+ }
+ } catch (const vespalib::Exception & e) {
+ packet->Free();
+ packet = NULL;
+ LOG(error, "%s", e.toString().c_str());
+ }
+}
+} // namespace
+
+FNET_Packet*
+FS4PersistentPacketStreamer::Decode(FNET_DataBuffer *src, uint32_t plen, uint32_t pcode, FNET_Context)
+{
+ FNET_Packet *packet;
+
+ packet = _createPacket(pcode & PCODE_MASK);
+ if (packet != NULL) {
+ uint32_t compressionByte = (pcode & ~PCODE_MASK) >> 24;
+ CompressionConfig::Type compressionType(CompressionConfig::toType(compressionByte));
+ if (compressionType != 0) {
+ uint32_t uncompressed_size = src->ReadInt32();
+ ConstBufferRef org(src->GetData(), plen - sizeof(uint32_t));
+ vespalib::DataBuffer uncompressed(uncompressed_size);
+ document::decompress(compressionType, uncompressed_size, org, uncompressed, false);
+ FNET_DataBuffer buf(uncompressed.getData(), uncompressed.getDataLen());
+ decodePacket(packet, buf, uncompressed_size, pcode);
+ src->DataToDead(plen - sizeof(uint32_t));
+ } else {
+ decodePacket(packet, *src, plen, pcode);
+ }
+ } else {
+ src->DataToDead(plen);
+ }
+ return packet;
+}
+
+
+void
+FS4PersistentPacketStreamer::Encode(FNET_Packet *packet, uint32_t chid, FNET_DataBuffer *dst)
+{
+ uint32_t len = packet->GetLength();
+ uint32_t pcode = packet->GetPCODE();
+
+ uint32_t packet_start = dst->GetDataLen();
+ if (HasChannelID(pcode)) {
+ dst->EnsureFree(len + 3 * sizeof(uint32_t));
+ dst->WriteInt32Fast(len + 2 * sizeof(uint32_t));
+ dst->WriteInt32Fast(pcode);
+ dst->WriteInt32Fast(chid);
+ } else {
+ dst->EnsureFree(len + 2 * sizeof(uint32_t));
+ dst->WriteInt32Fast(len + sizeof(uint32_t));
+ dst->WriteInt32Fast(pcode);
+ }
+ uint32_t header_len = dst->GetDataLen() - packet_start;
+ packet->Encode(dst);
+ dst->AssertValid();
+ uint32_t body_len = dst->GetDataLen() - packet_start - header_len;
+ bool isCompressable((pcode & ~PCODE_MASK) == 0);
+
+ if (isCompressable && _compressionLimit && (body_len > _compressionLimit)) {
+ CompressionConfig config(_compressionType, _compressionLevel, 90);
+ ConstBufferRef org(dst->GetData() + packet_start + header_len, body_len);
+ vespalib::DataBuffer compressed(org.size());
+ CompressionConfig::Type r = document::compress(config, org, compressed, false);
+ if (r != CompressionConfig::NONE) {
+ dst->DataToFree(body_len + header_len);
+ // sizeof(data + header + uncompressed_size) - sizeof(uint32_t)
+ dst->WriteInt32Fast(compressed.getDataLen() + header_len);
+ dst->WriteInt32Fast(pcode | (_compressionType << 24));
+ if (HasChannelID(pcode)) {
+ dst->FreeToData(sizeof(uint32_t)); // channel
+ }
+ dst->WriteInt32Fast(body_len);
+ dst->WriteBytes(compressed.getData(), compressed.getDataLen());
+ dst->AssertValid();
+ }
+ }
+}
+
+//============================================================
+
+FS4Properties::FS4Properties()
+ : _entries(),
+ _name(0),
+ _backing()
+{
+}
+
+FS4Properties::FS4Properties(FS4Properties && rhs)
+ : _entries(std::move(rhs._entries)),
+ _name(std::move(rhs._name)),
+ _backing(std::move(rhs._backing))
+{
+}
+
+FS4Properties &
+FS4Properties::operator=(FS4Properties && rhs)
+{
+ _entries = std::move(rhs._entries);
+ _name = std::move(rhs._name);
+ _backing = std::move(rhs._backing);
+ return *this;
+}
+
+FS4Properties::~FS4Properties()
+{
+}
+
+void
+FS4Properties::allocEntries(uint32_t cnt)
+{
+ _entries.resize(cnt);
+ _backing.reserve(cnt*2*40); // Assume strings are average 40 bytes
+}
+
+void FS4Properties::set(StringRef & e, const vespalib::stringref & s)
+{
+ e.first = _backing.size();
+ e.second = s.size();
+ _backing.append(s.c_str(), s.size());
+}
+
+void
+FS4Properties::setKey(uint32_t entry, const char *key, uint32_t keySize)
+{
+ set(_entries[entry].first, vespalib::stringref(key, keySize));
+}
+
+void
+FS4Properties::setValue(uint32_t entry, const char *value, uint32_t valueSize)
+{
+ set(_entries[entry].second, vespalib::stringref(value, valueSize));
+}
+
+uint32_t
+FS4Properties::getLength()
+{
+ uint32_t len = sizeof(uint32_t) * 2 + getNameLen();
+ len += _backing.size();
+ len += _entries.size() * sizeof(uint32_t) * 2;
+ return len;
+}
+
+void
+FS4Properties::encode(FNET_DataBuffer &dst)
+{
+ dst.WriteInt32Fast(_name.size());
+ dst.WriteBytesFast(_name.c_str(), _name.size());
+ dst.WriteInt32Fast(size());
+ for (uint32_t i = 0; i < size(); ++i) {
+ dst.WriteInt32Fast(getKeyLen(i));
+ dst.WriteBytesFast(getKey(i), getKeyLen(i));
+ dst.WriteInt32Fast(getValueLen(i));
+ dst.WriteBytesFast(getValue(i), getValueLen(i));
+ }
+}
+
+bool
+FS4Properties::decode(FNET_DataBuffer &src, uint32_t &len)
+{
+ uint32_t strLen;
+ if (len < sizeof(uint32_t)) return false;
+ strLen = src.ReadInt32();
+ len -= sizeof(uint32_t);
+ if (len < strLen) return false;
+ setName(src.GetData(), strLen);
+ src.DataToDead(strLen);
+ len -= strLen;
+ if (len < sizeof(uint32_t)) return false;
+ uint32_t cnt = src.ReadInt32();
+ len -= sizeof(uint32_t);
+ allocEntries(cnt);
+ for (uint32_t i = 0; i < cnt; ++i) {
+ if (len < sizeof(uint32_t)) return false;
+ strLen = src.ReadInt32();
+ len -= sizeof(uint32_t);
+ if (len < strLen) return false;
+ setKey(i, src.GetData(), strLen);
+ src.DataToDead(strLen);
+ len -= strLen;
+ if (len < sizeof(uint32_t)) return false;
+ strLen = src.ReadInt32();
+ len -= sizeof(uint32_t);
+ if (len < strLen) return false;
+ setValue(i, src.GetData(), strLen);
+ src.DataToDead(strLen);
+ len -= strLen;
+ }
+ return true;
+}
+
+vespalib::string
+FS4Properties::toString(uint32_t indent) const
+{
+ vespalib::string s;
+ s += make_string("%*sProperties {\n", indent, "");
+ s += make_string("%*s name: ", indent, "");
+ s += _name;
+ s += "\n";
+ for (uint32_t i = 0; i < size(); ++i) {
+ s += make_string("%*s Entry[%d] {\n", indent, "", i);
+ s += make_string("%*s key : %s\n", indent, "", vespalib::string(getKey(i), getKeyLen(i)).c_str());
+ s += make_string("%*s value: %s\n", indent, "", vespalib::string(getValue(i), getValueLen(i)).c_str());
+ s += make_string("%*s }\n", indent, "");
+ }
+ s += make_string("%*s}\n", indent, "");
+ return s;
+}
+
+//============================================================
+
+
+/**
+ * Write a string in usual format to a buffer. Usual format is first
+ * a 32-bit integer holding the string length, then the bytes that the
+ * string contained. Skip checking for free space.
+ *
+ * @param buf buffer to write to
+ * @param str string to write, of any type that has c_str() and size()
+ **/
+template<typename STR>
+void
+writeLenString(FNET_DataBuffer *buf, const STR &str)
+{
+ buf->WriteInt32Fast(str.size());
+ buf->WriteBytesFast(str.c_str(), str.size());
+}
+
+
+//============================================================
+
+FS4Packet::FS4Packet()
+ : FNET_Packet()
+{
+}
+
+
+FS4Packet::~FS4Packet()
+{
+}
+
+
+void
+FS4Packet::Free()
+{
+ delete this;
+}
+
+vespalib::string
+FS4Packet::Print(uint32_t indent)
+{
+ return toString(indent);
+}
+
+//============================================================
+
+FS4Packet_EOL::FS4Packet_EOL()
+ : FS4Packet()
+{
+}
+
+
+FS4Packet_EOL::~FS4Packet_EOL()
+{
+}
+
+
+uint32_t
+FS4Packet_EOL::GetLength()
+{
+ return 0;
+}
+
+
+void
+FS4Packet_EOL::Encode(FNET_DataBuffer *dst)
+{
+ (void) dst;
+}
+
+
+bool
+FS4Packet_EOL::Decode(FNET_DataBuffer *src, uint32_t len)
+{
+ src->DataToDead(len);
+ return (len == 0);
+}
+
+
+vespalib::string
+FS4Packet_EOL::toString(uint32_t indent) const
+{
+ return make_string("%*sFS4Packet_EOL {}\n", indent, "");
+}
+
+//============================================================
+
+FS4Packet_Shared::FS4Packet_Shared(FNET_Packet::SP packet)
+ : FS4Packet(),
+ _packet(std::move(packet))
+{
+}
+
+FS4Packet_Shared::~FS4Packet_Shared()
+{
+}
+
+uint32_t
+FS4Packet_Shared::GetPCODE()
+{
+ return _packet->GetPCODE();
+}
+
+uint32_t
+FS4Packet_Shared::GetLength()
+{
+ return _packet->GetLength();
+}
+
+void
+FS4Packet_Shared::Encode(FNET_DataBuffer *dst)
+{
+ _packet->Encode(dst);
+}
+
+bool
+FS4Packet_Shared::Decode(FNET_DataBuffer *, uint32_t )
+{
+ assert(false);
+}
+
+vespalib::string
+FS4Packet_Shared::toString(uint32_t indent) const
+{
+ return _packet->Print(indent);
+}
+
+//============================================================
+
+FS4Packet_PreSerialized::FS4Packet_PreSerialized(FNET_Packet & packet)
+ : FS4Packet(),
+ _pcode(packet.GetPCODE()),
+ _compressionType(CompressionConfig::NONE),
+ _data(packet.GetLength() + 1*sizeof(uint32_t))
+{
+ const uint32_t body_len(packet.GetLength());
+ const uint32_t compressionLimit=FS4PersistentPacketStreamer::Instance.getCompressionLimit();
+ if (compressionLimit && (body_len > compressionLimit)) {
+ FNET_DataBuffer tmp(packet.GetLength());
+ packet.Encode(&tmp);
+ tmp.AssertValid();
+ CompressionConfig config(FS4PersistentPacketStreamer::Instance.getCompressionType(),
+ FS4PersistentPacketStreamer::Instance.getCompressionLevel(),
+ 90);
+ ConstBufferRef org(tmp.GetData(), tmp.GetDataLen());
+ vespalib::DataBuffer compressed(org.size());
+ _compressionType = document::compress(config, org, compressed, false);
+ if (_compressionType != CompressionConfig::NONE) {
+ _data.WriteInt32Fast(body_len);
+ _data.WriteBytes(compressed.getData(), compressed.getDataLen());
+ _data.AssertValid();
+ } else {
+ packet.Encode(&_data);
+ }
+ } else {
+ packet.Encode(&_data);
+ }
+}
+
+FS4Packet_PreSerialized::~FS4Packet_PreSerialized()
+{
+}
+
+uint32_t
+FS4Packet_PreSerialized::GetPCODE()
+{
+ return ((_compressionType == CompressionConfig::NONE)
+ ? _pcode
+ : (_pcode | (_compressionType << 24)));
+}
+
+uint32_t
+FS4Packet_PreSerialized::GetLength()
+{
+ return _data.GetDataLen();
+}
+
+void
+FS4Packet_PreSerialized::Encode(FNET_DataBuffer *dst)
+{
+ dst->WriteBytes(_data.GetData(), _data.GetDataLen());
+}
+
+bool
+FS4Packet_PreSerialized::Decode(FNET_DataBuffer *, uint32_t)
+{
+ assert(false);
+}
+
+vespalib::string
+FS4Packet_PreSerialized::toString(uint32_t indent) const
+{
+ vespalib::string s;
+ s += make_string("%*sFS4Packet_PreSerialized {\n", indent, "");
+ s += make_string("%*s length : %d\n", indent, "", _data.GetDataLen());
+ s += make_string("%*s}\n", indent, "");
+ return s;
+}
+
+//============================================================
+
+FS4Packet_ERROR::FS4Packet_ERROR()
+ : FS4Packet(),
+ _errorCode(0),
+ _message()
+{
+}
+
+
+FS4Packet_ERROR::~FS4Packet_ERROR()
+{
+}
+
+
+uint32_t
+FS4Packet_ERROR::GetLength()
+{
+ return 2 * sizeof(uint32_t) + _message.size();
+}
+
+
+void
+FS4Packet_ERROR::Encode(FNET_DataBuffer *dst)
+{
+ dst->WriteInt32Fast(_errorCode);
+ writeLenString(dst, _message);
+}
+
+
+bool
+FS4Packet_ERROR::Decode(FNET_DataBuffer *src, uint32_t len)
+{
+ if (len < sizeof(uint32_t) * 2) {
+ src->DataToDead(len);
+ return false;
+ }
+ _errorCode = src->ReadInt32();
+ uint32_t messageLen = src->ReadInt32();
+ len -= 2 * sizeof(uint32_t);
+ if (len != messageLen) {
+ src->DataToDead(len);
+ return false;
+ }
+ setErrorMessage(stringref(src->GetData(), messageLen));
+ src->DataToDead(messageLen);
+ return true;
+}
+
+
+vespalib::string
+FS4Packet_ERROR::toString(uint32_t indent) const
+{
+ vespalib::string s;
+ s += make_string("%*sFS4Packet_ERROR {\n", indent, "");
+ s += make_string("%*s errorCode : %d\n", indent, "", _errorCode);
+ s += make_string("%*s message : %s\n", indent, "", _message.c_str());
+ s += make_string("%*s}\n", indent, "");
+ return s;
+}
+
+//============================================================
+
+void
+FS4Packet_DOCSUM::SetBuf(const char *buf, uint32_t len)
+{
+ _buf.resize(len);
+ memcpy(_buf.str(), buf, len);
+}
+
+
+FS4Packet_DOCSUM::FS4Packet_DOCSUM()
+ : FS4Packet(),
+ _gid(),
+ _buf()
+{
+}
+
+
+FS4Packet_DOCSUM::~FS4Packet_DOCSUM()
+{
+}
+
+void
+FS4Packet_DOCSUM::Encode(FNET_DataBuffer *dst)
+{
+ dst->WriteBytesFast(_gid.get(), document::GlobalId::LENGTH);
+ dst->WriteBytesFast(_buf.c_str(), _buf.size());
+}
+
+
+bool
+FS4Packet_DOCSUM::Decode(FNET_DataBuffer *src, uint32_t len)
+{
+ if (len < document::GlobalId::LENGTH) {
+ src->DataToDead(len);
+ return false;
+ }
+ unsigned char rawGid[document::GlobalId::LENGTH];
+ src->ReadBytes(rawGid, document::GlobalId::LENGTH);
+ _gid.set(rawGid);
+ len -= document::GlobalId::LENGTH;
+ SetBuf(src->GetData(), len);
+ src->DataToDead(len);
+ return true;
+}
+
+
+vespalib::string
+FS4Packet_DOCSUM::toString(uint32_t indent) const
+{
+ vespalib::string s;
+ s += make_string("%*sFS4Packet_DOCSUM {\n", indent, "");
+ s += make_string("%*s gid : %s\n", indent, "", _gid.toString().c_str());
+
+ uint32_t magic = ::search::fs4transport::SLIME_MAGIC_ID;
+ if (_buf.size() >= sizeof(magic) &&
+ memcmp(_buf.c_str(), &magic, sizeof(magic)) == 0) {
+ vespalib::Slime slime;
+ vespalib::slime::Memory input(_buf.c_str() + sizeof(magic),
+ _buf.size() - sizeof(magic));
+ vespalib::slime::SimpleBuffer buf;
+ vespalib::slime::BinaryFormat::decode(input, slime);
+ vespalib::slime::JsonFormat::encode(slime, buf, false);
+ s += make_string("%*s json dump : ", indent, "");
+ s += buf.get().make_string();
+ } else {
+ s += make_string("%*s data dump :\n", indent, "");
+ const char *pt = _buf.c_str();
+ uint32_t i = 0;
+ if ( ! _buf.empty())
+ s += make_string("%*s ", indent, "");
+ while (i < _buf.size()) {
+ s += make_string("%x ", (unsigned char) pt[i]);
+ if ((++i % 16) == 0)
+ s += make_string("\n%*s ", indent, "");
+ }
+ if ((i % 16) != 0)
+ s += make_string("\n");
+ }
+ s += make_string("%*s}\n", indent, "");
+ return s;
+}
+
+//============================================================
+
+FS4Packet_MONITORQUERYX::FS4Packet_MONITORQUERYX(uint32_t pcode)
+ : FS4Packet(),
+ _pcode(pcode),
+ _features(0),
+ _qflags(0u)
+{
+ UpdateCompatFeatures();
+}
+
+
+FS4Packet_MONITORQUERYX::~FS4Packet_MONITORQUERYX()
+{
+}
+
+
+void
+FS4Packet_MONITORQUERYX::UpdateCompatPCODE(void)
+{
+ if (_features == search::fs4transport::MQF_MONITORQUERY_MASK)
+ _pcode = search::fs4transport::PCODE_MONITORQUERY;
+ else
+ _pcode = search::fs4transport::PCODE_MONITORQUERYX;
+}
+
+
+void
+FS4Packet_MONITORQUERYX::UpdateCompatFeatures(void)
+{
+ if (_pcode == search::fs4transport::PCODE_MONITORQUERY)
+ _features = search::fs4transport::MQF_MONITORQUERY_MASK;
+}
+
+
+uint32_t
+FS4Packet_MONITORQUERYX::GetLength()
+{
+ uint32_t plen = 0;
+
+ if (_pcode == search::fs4transport::PCODE_MONITORQUERYX)
+ plen += sizeof(uint32_t);
+ if (_features & search::fs4transport::MQF_QFLAGS)
+ plen += sizeof(uint32_t);
+ return plen;
+}
+
+
+void
+FS4Packet_MONITORQUERYX::Encode(FNET_DataBuffer *dst)
+{
+ if (_pcode == search::fs4transport::PCODE_MONITORQUERYX)
+ dst->WriteInt32Fast(_features);
+
+ if ((_features & search::fs4transport::MQF_QFLAGS) != 0)
+ dst->WriteInt32Fast(_qflags);
+}
+
+
+bool
+FS4Packet_MONITORQUERYX::Decode(FNET_DataBuffer *src, uint32_t len)
+{
+ if (_pcode == search::fs4transport::PCODE_MONITORQUERYX) {
+ if (len < sizeof(uint32_t))
+ goto error;
+ _features = src->ReadInt32();
+ len -= sizeof(uint32_t);
+ }
+ if ((_features & ~search::fs4transport::FNET_MQF_SUPPORTED_MASK) != 0)
+ goto error;
+
+ if ((_features & search::fs4transport::MQF_QFLAGS) != 0) {
+ if (len < sizeof(uint32_t))
+ goto error;
+ _qflags = src->ReadInt32();
+ len -= sizeof(uint32_t);
+ }
+
+ if (len != 0)
+ goto error;
+
+ SetRealPCODE();
+ return true; // OK
+ error:
+ src->DataToDead(len);
+ return false; // FAIL
+}
+
+
+vespalib::string
+FS4Packet_MONITORQUERYX::toString(uint32_t indent) const
+{
+ vespalib::string s;
+ s += make_string("%*sFS4Packet_MONITORQUERYX {\n", indent, "");
+ s += make_string("%*s pcode : %d\n", indent, "", _pcode);
+ s += make_string("%*s features : 0x%x\n", indent, "", _features);
+ s += make_string("%*s qflags : %d\n", indent, "", _qflags);
+ s += make_string("%*s}\n", indent, "");
+ return s;
+}
+
+//============================================================
+
+FS4Packet_MONITORRESULTX::
+FS4Packet_MONITORRESULTX(uint32_t pcode)
+ : FS4Packet(),
+ _pcode(pcode),
+ _features(0),
+ _partid(0),
+ _timestamp(0),
+ _totalNodes(0),
+ _activeNodes(0),
+ _totalParts(0),
+ _activeParts(0),
+ _rflags(0u),
+ _activeDocs(0)
+{
+ UpdateCompatFeatures();
+}
+
+
+FS4Packet_MONITORRESULTX::~FS4Packet_MONITORRESULTX(void)
+{
+}
+
+
+void
+FS4Packet_MONITORRESULTX::UpdateCompatPCODE(void)
+{
+ if (_features == search::fs4transport::MRF_MONITORRESULT_MASK)
+ _pcode = search::fs4transport::PCODE_MONITORRESULT;
+ else if (_features == search::fs4transport::MRF_MLD_MONITORRESULT_MASK)
+ _pcode = search::fs4transport::PCODE_MLD_MONITORRESULT;
+ else
+ _pcode = search::fs4transport::PCODE_MONITORRESULTX;
+}
+
+
+void
+FS4Packet_MONITORRESULTX::UpdateCompatFeatures(void)
+{
+ if (_pcode == search::fs4transport::PCODE_MONITORRESULT)
+ _features = search::fs4transport::MRF_MONITORRESULT_MASK;
+ else if (_pcode == search::fs4transport::PCODE_MLD_MONITORRESULT)
+ _features = search::fs4transport::MRF_MLD_MONITORRESULT_MASK;
+}
+
+
+uint32_t
+FS4Packet_MONITORRESULTX::GetLength(void)
+{
+ uint32_t plen = 2 * sizeof(uint32_t);
+
+ if (_pcode == search::fs4transport::PCODE_MONITORRESULTX)
+ plen += sizeof(uint32_t);
+ if ((_features & search::fs4transport::MRF_MLD) != 0)
+ plen += 4 * sizeof(uint32_t);
+ if ((_features & search::fs4transport::MRF_RFLAGS) != 0)
+ plen += sizeof(uint32_t);
+ if ((_features & search::fs4transport::MRF_ACTIVEDOCS) != 0)
+ plen += sizeof(uint64_t);
+
+ return plen;
+}
+
+
+void
+FS4Packet_MONITORRESULTX::Encode(FNET_DataBuffer *dst)
+{
+ if (_pcode == search::fs4transport::PCODE_MONITORRESULTX)
+ dst->WriteInt32Fast(_features);
+
+ dst->WriteInt32Fast(_partid);
+ dst->WriteInt32Fast(_timestamp);
+ if ((_features & search::fs4transport::MRF_MLD) != 0) {
+ dst->WriteInt32Fast(_totalNodes);
+ dst->WriteInt32Fast(_activeNodes);
+ dst->WriteInt32Fast(_totalParts);
+ dst->WriteInt32Fast(_activeParts);
+ }
+ if ((_features & search::fs4transport::MRF_RFLAGS) != 0) {
+ dst->WriteInt32Fast(_rflags);
+ }
+ if ((_features & search::fs4transport::MRF_ACTIVEDOCS) != 0) {
+ dst->WriteInt64Fast(_activeDocs);
+ }
+}
+
+
+bool
+FS4Packet_MONITORRESULTX::Decode(FNET_DataBuffer *src, uint32_t len)
+{
+ if (_pcode == search::fs4transport::PCODE_MONITORRESULTX) {
+ if (len < sizeof(uint32_t)) goto error;
+ _features = src->ReadInt32();
+ len -= sizeof(uint32_t);
+ }
+ if ((_features & ~search::fs4transport::FNET_MRF_SUPPORTED_MASK) != 0)
+ goto error;
+
+ if (len < 2 * sizeof(uint32_t))
+ goto error;
+ _partid = src->ReadInt32();
+ _timestamp = src->ReadInt32();
+ len -= 2 * sizeof(uint32_t);
+
+ if ((_features & search::fs4transport::MRF_MLD) != 0) {
+ if (len < 4 * sizeof(uint32_t))
+ goto error;
+ _totalNodes = src->ReadInt32();
+ _activeNodes = src->ReadInt32();
+ _totalParts = src->ReadInt32();
+ _activeParts = src->ReadInt32();
+ len -= 4 * sizeof(uint32_t);
+ }
+
+ if ((_features & search::fs4transport::MRF_RFLAGS) != 0) {
+ if (len < sizeof(uint32_t))
+ goto error;
+ _rflags = src->ReadInt32();
+ len -= sizeof(uint32_t);
+ }
+
+ if ((_features & search::fs4transport::MRF_ACTIVEDOCS) != 0) {
+ if (len < sizeof(uint64_t))
+ goto error;
+ _activeDocs = src->ReadInt64();
+ len -= sizeof(uint64_t);
+ }
+
+ if (len != 0)
+ goto error;
+
+ SetRealPCODE();
+ return true; // OK
+ error:
+ src->DataToDead(len);
+ return false; // FAIL
+}
+
+
+vespalib::string
+FS4Packet_MONITORRESULTX::toString(uint32_t indent) const
+{
+ vespalib::string s;
+ s += make_string("%*sFS4Packet_MONITORRESULTX {\n", indent, "");
+ s += make_string("%*s pcode : %d\n", indent, "", _pcode);
+ s += make_string("%*s features : 0x%x\n", indent, "", _features);
+ s += make_string("%*s partid : %d\n", indent, "", _partid);
+ s += make_string("%*s timestamp : %d\n", indent, "", _timestamp);
+ s += make_string("%*s totalnodes : %d\n", indent, "", _totalNodes);
+ s += make_string("%*s activenodes : %d\n", indent, "", _activeNodes);
+ s += make_string("%*s totalparts : %d\n", indent, "", _totalParts);
+ s += make_string("%*s activeparts : %d\n", indent, "", _activeParts);
+ s += make_string("%*s}\n", indent, "");
+ return s;
+}
+
+//============================================================
+
+FS4Packet_CLEARCACHES::FS4Packet_CLEARCACHES()
+ : FS4Packet()
+{
+}
+
+
+FS4Packet_CLEARCACHES::~FS4Packet_CLEARCACHES()
+{
+}
+
+
+uint32_t
+FS4Packet_CLEARCACHES::GetLength()
+{
+ return 0;
+}
+
+
+void
+FS4Packet_CLEARCACHES::Encode(FNET_DataBuffer *dst)
+{
+ (void) dst;
+}
+
+
+bool
+FS4Packet_CLEARCACHES::Decode(FNET_DataBuffer *src, uint32_t len)
+{
+ src->DataToDead(len);
+ return (len == 0);
+}
+
+
+vespalib::string
+FS4Packet_CLEARCACHES::toString(uint32_t indent) const
+{
+ return make_string("%*sFS4Packet_CLEARCACHES {}\n", indent, "");
+}
+
+//============================================================
+
+FS4Packet_QUEUELEN::FS4Packet_QUEUELEN()
+ : FS4Packet(),
+ _queueLen(0),
+ _dispatchers(0)
+{
+}
+
+
+FS4Packet_QUEUELEN::~FS4Packet_QUEUELEN()
+{
+}
+
+
+uint32_t
+FS4Packet_QUEUELEN::GetLength()
+{
+ return 2 * sizeof(uint32_t);
+}
+
+
+void
+FS4Packet_QUEUELEN::Encode(FNET_DataBuffer *dst)
+{
+ dst->WriteInt32Fast(_queueLen);
+ dst->WriteInt32Fast(_dispatchers);
+}
+
+
+bool
+FS4Packet_QUEUELEN::Decode(FNET_DataBuffer *src, uint32_t len)
+{
+ if (len != 2 * sizeof(uint32_t)) {
+ src->DataToDead(len);
+ return false;
+ }
+ _queueLen = src->ReadInt32();
+ _dispatchers = src->ReadInt32();
+ return true;
+}
+
+
+vespalib::string
+FS4Packet_QUEUELEN::toString(uint32_t indent) const
+{
+ vespalib::string s;
+ s += make_string("%*sFS4Packet_QUEUELEN {\n", indent, "");
+ s += make_string("%*s queue len : %d\n", indent, "", _queueLen);
+ s += make_string("%*s dispatchers : %d\n", indent, "", _dispatchers);
+ s += make_string("%*s}\n", indent, "");
+ return s;
+}
+
+//============================================================
+
+void
+FS4Packet_QUERYRESULTX::AllocateSortIndex(uint32_t cnt)
+{
+ if (cnt == 0)
+ return;
+
+ cnt++; // end of data index entry
+ _sortIndex = new uint32_t[cnt];
+}
+
+
+void
+FS4Packet_QUERYRESULTX::AllocateSortData(uint32_t len)
+{
+ if (len == 0)
+ return;
+
+ _sortData = (char *) malloc(len);
+}
+
+
+void
+FS4Packet_QUERYRESULTX::SetSortDataRef(uint32_t cnt,
+ uint32_t *sortIndex,
+ const char *sortData)
+{
+ if (cnt == 0)
+ return;
+
+ AllocateSortIndex(cnt);
+ AllocateSortData(sortIndex[cnt] - sortIndex[0]);
+ _sortIndex[0] = 0;
+ search::common::SortData::Copy(cnt, _sortIndex, _sortData, sortIndex, sortData);
+}
+
+
+void
+FS4Packet_QUERYRESULTX::AllocateAggrData(uint32_t len)
+{
+ if (len == 0)
+ return;
+
+ _aggrData = (char *) malloc(len);
+ _aggrDataLen = len;
+}
+
+
+void
+FS4Packet_QUERYRESULTX::SetAggrDataRef(const char *aggrData,
+ uint32_t len)
+{
+ if (len == 0)
+ return;
+
+ AllocateAggrData(len);
+ memcpy(_aggrData, aggrData, len);
+}
+
+
+void
+FS4Packet_QUERYRESULTX::AllocateGroupData(uint32_t len)
+{
+ if (len == 0)
+ return;
+
+ _groupData = (char *) malloc(len);
+ _groupDataLen = len;
+}
+
+
+void
+FS4Packet_QUERYRESULTX::SetGroupDataRef(const char *groupData,
+ uint32_t len)
+{
+ if (len == 0)
+ return;
+
+ AllocateGroupData(len);
+ memcpy(_groupData, groupData, len);
+}
+
+
+void
+FS4Packet_QUERYRESULTX::AllocateHits(uint32_t cnt)
+{
+ if (cnt == 0)
+ return;
+
+ _hits = new FS4_hit[cnt];
+ _numDocs = cnt;
+}
+
+
+FS4Packet_QUERYRESULTX::FS4Packet_QUERYRESULTX(uint32_t pcode)
+ : FS4Packet(),
+ _pcode(pcode),
+ _distributionKey(0),
+ _features(0),
+ _offset(0),
+ _numDocs(0),
+ _totNumDocs(0),
+ _maxRank(0),
+ _sortIndex(NULL),
+ _sortData(NULL),
+ _aggrDataLen(0),
+ _aggrData(NULL),
+ _groupDataLen(0),
+ _groupData(NULL),
+ _coverageDocs(0),
+ _activeDocs(0),
+ _hits(NULL),
+ _propsVector()
+{
+ UpdateCompatFeatures();
+}
+
+
+FS4Packet_QUERYRESULTX::~FS4Packet_QUERYRESULTX()
+{
+ if (_sortIndex) { delete [] _sortIndex; }
+ if (_sortData) { free(_sortData); }
+ if (_aggrData) { free(_aggrData); }
+ if (_groupData) { free(_groupData); }
+ if (_hits) { delete [] _hits; }
+}
+
+
+void
+FS4Packet_QUERYRESULTX::UpdateCompatPCODE()
+{
+ if (_features == search::fs4transport::QRF_QUERYRESULT_MASK)
+ _pcode = search::fs4transport::PCODE_QUERYRESULT;
+ else if (_features == search::fs4transport::QRF_MLD_QUERYRESULT_MASK)
+ _pcode = search::fs4transport::PCODE_MLD_QUERYRESULT;
+ else
+ _pcode = search::fs4transport::PCODE_QUERYRESULTX;
+}
+
+
+void
+FS4Packet_QUERYRESULTX::UpdateCompatFeatures()
+{
+ if (_pcode == search::fs4transport::PCODE_QUERYRESULT)
+ _features = search::fs4transport::QRF_QUERYRESULT_MASK;
+ else if (_pcode == search::fs4transport::PCODE_MLD_QUERYRESULT)
+ _features = search::fs4transport::QRF_MLD_QUERYRESULT_MASK;
+}
+
+
+uint32_t
+FS4Packet_QUERYRESULTX::GetLength()
+{
+ uint32_t plen = 3 * sizeof(uint32_t) +
+ sizeof(uint64_t) + // hit count is now 64-bit
+ sizeof(search::HitRank) +
+ _numDocs * (sizeof(document::GlobalId) + sizeof(search::HitRank));
+
+ if (_pcode == search::fs4transport::PCODE_QUERYRESULTX)
+ plen += sizeof(uint32_t);
+
+ if ((_features & search::fs4transport::QRF_MLD) != 0)
+ plen += _numDocs * 2 * sizeof(uint32_t);
+
+ if (((_features & search::fs4transport::QRF_SORTDATA) != 0) &&
+ (_numDocs > 0))
+ plen += _numDocs * sizeof(uint32_t)
+ + (_sortIndex[_numDocs] - _sortIndex[0]);
+
+ if ((_features & search::fs4transport::QRF_AGGRDATA) != 0)
+ plen += sizeof(uint32_t)
+ + _aggrDataLen;
+
+ if ((_features & search::fs4transport::QRF_GROUPDATA) != 0)
+ plen += sizeof(uint32_t)
+ + _groupDataLen;
+
+ if ((_features & search::fs4transport::QRF_COVERAGE) != 0)
+ plen += sizeof(uint64_t)
+ + 2 * sizeof(uint32_t);
+
+ if ((_features & search::fs4transport::QRF_PROPERTIES) != 0) {
+ plen += sizeof(uint32_t);
+ for (uint32_t i = 0; i < _propsVector.size(); ++i) {
+ plen += _propsVector[i].getLength();
+ }
+ }
+
+ return plen;
+}
+
+
+void
+FS4Packet_QUERYRESULTX::Encode(FNET_DataBuffer *dst)
+{
+ if (_pcode == search::fs4transport::PCODE_QUERYRESULTX) {
+ // Never provide QF_WARMUP downwards
+ dst->WriteInt32Fast(_features & ~QF_WARMUP);
+ }
+ dst->WriteInt32Fast(_offset);
+ dst->WriteInt32Fast(_numDocs);
+ dst->WriteInt64Fast(_totNumDocs);
+ union { uint64_t INT64; double DOUBLE; } mrval;
+ mrval.DOUBLE = _maxRank;
+ dst->WriteInt64Fast(mrval.INT64);
+ dst->WriteInt32Fast(_distributionKey);
+
+ if (((_features & search::fs4transport::QRF_SORTDATA) != 0) &&
+ (_numDocs > 0))
+ {
+ uint32_t idx0 = _sortIndex[0];
+ // implicit: first index entry always 0
+ for (uint32_t i = 1; i <= _numDocs; i++) {
+ dst->WriteInt32Fast(_sortIndex[i] - idx0);
+ }
+ dst->WriteBytesFast(_sortData + idx0,
+ _sortIndex[_numDocs] - idx0);
+ }
+
+ if ((_features & search::fs4transport::QRF_AGGRDATA) != 0) {
+ dst->WriteInt32Fast(_aggrDataLen);
+ dst->WriteBytesFast(_aggrData, _aggrDataLen);
+ }
+
+ if ((_features & search::fs4transport::QRF_GROUPDATA) != 0) {
+ dst->WriteInt32Fast(_groupDataLen);
+ dst->WriteBytesFast(_groupData, _groupDataLen);
+ }
+
+ if ((_features & search::fs4transport::QRF_COVERAGE) != 0) {
+ dst->WriteInt64Fast(_coverageDocs);
+ dst->WriteInt64Fast(_activeDocs);
+ }
+
+ for (uint32_t i = 0; i < _numDocs; i++) {
+ dst->WriteBytesFast(_hits[i]._gid.get(), document::GlobalId::LENGTH);
+ union { uint64_t INT64; double DOUBLE; } val;
+ val.DOUBLE = _hits[i]._metric;
+ dst->WriteInt64Fast(val.INT64);
+ if ((_features & search::fs4transport::QRF_MLD) != 0) {
+ dst->WriteInt32Fast(_hits[i]._partid);
+ dst->WriteInt32Fast(_hits[i].getDistributionKey());
+ }
+ }
+
+ if ((_features & search::fs4transport::QRF_PROPERTIES) != 0) {
+ dst->WriteInt32Fast(_propsVector.size());
+ for (uint32_t i = 0; i < _propsVector.size(); ++i) {
+ _propsVector[i].encode(*dst);
+ }
+ }
+
+}
+
+
+bool
+FS4Packet_QUERYRESULTX::Decode(FNET_DataBuffer *src, uint32_t len)
+{
+ uint32_t i;
+ uint32_t hitSize = sizeof(document::GlobalId);
+
+ if (_pcode == search::fs4transport::PCODE_QUERYRESULTX) {
+ if (len < sizeof(uint32_t)) goto error;
+ _features = src->ReadInt32();
+ len -= sizeof(uint32_t);
+ }
+
+ if ((_features & ~search::fs4transport::FNET_QRF_SUPPORTED_MASK) != 0) {
+ throwUnsupportedFeatures(_features, search::fs4transport::FNET_QRF_SUPPORTED_MASK);
+ }
+ hitSize += sizeof(uint64_t);
+
+ if (len < 3 * sizeof(uint32_t) + sizeof(uint64_t) + sizeof(search::HitRank)) goto error;
+ _offset = src->ReadInt32();
+ _numDocs = src->ReadInt32();
+ _totNumDocs = src->ReadInt64();
+ union { uint64_t INT64; double DOUBLE; } mrval;
+ mrval.INT64 = src->ReadInt64();
+ _maxRank = mrval.DOUBLE;
+ _distributionKey = src->ReadInt32();
+ len -= 3 * sizeof(uint32_t) + sizeof(uint64_t) + sizeof(search::HitRank);
+
+ if (((_features & search::fs4transport::QRF_SORTDATA) != 0) &&
+ (_numDocs > 0)) {
+ if (len < _numDocs * sizeof(uint32_t)) goto error;
+ AllocateSortIndex(_numDocs);
+ _sortIndex[0] = 0; // implicit
+ for (i = 1; i <= _numDocs; i++)
+ _sortIndex[i] = src->ReadInt32();
+ len -= _numDocs * sizeof(uint32_t);
+ uint32_t sortDataLen = _sortIndex[_numDocs];
+
+ if (len < sortDataLen) goto error;
+ AllocateSortData(sortDataLen);
+ src->ReadBytes(_sortData, sortDataLen);
+ len -= sortDataLen;
+ }
+
+ if ((_features & search::fs4transport::QRF_AGGRDATA) != 0) {
+ if (len < sizeof(uint32_t)) goto error;
+ _aggrDataLen = src->ReadInt32();
+ len -= sizeof(uint32_t);
+
+ if (len < _aggrDataLen) goto error;
+ AllocateAggrData(_aggrDataLen);
+ src->ReadBytes(_aggrData, _aggrDataLen);
+ len -= _aggrDataLen;
+ }
+
+ if ((_features & search::fs4transport::QRF_GROUPDATA) != 0) {
+ if (len < sizeof(uint32_t)) goto error;
+ _groupDataLen = src->ReadInt32();
+ len -= sizeof(uint32_t);
+
+ if (len < _groupDataLen) goto error;
+ AllocateGroupData(_groupDataLen);
+ src->ReadBytes(_groupData, _groupDataLen);
+ len -= _groupDataLen;
+ }
+
+ if ((_features & search::fs4transport::QRF_COVERAGE) != 0) {
+ if (len < 2 * sizeof(uint64_t)) goto error;
+ _coverageDocs = src->ReadInt64();
+ _activeDocs = src->ReadInt64();
+ len -= 2 * sizeof(uint64_t);
+ }
+
+ if ((_features & search::fs4transport::QRF_MLD) != 0)
+ hitSize += 2 * sizeof(uint32_t);
+
+ if (len < _numDocs * hitSize) goto error;
+ AllocateHits(_numDocs);
+ unsigned char rawGid[document::GlobalId::LENGTH];
+ for (i = 0; i < _numDocs; i++) {
+ src->ReadBytes(rawGid, document::GlobalId::LENGTH);
+ _hits[i]._gid.set(rawGid);
+ union { uint64_t INT64; double DOUBLE; } val;
+ val.INT64 = src->ReadInt64();
+ _hits[i]._metric = val.DOUBLE;
+ if ((_features & search::fs4transport::QRF_MLD) != 0) {
+ _hits[i]._partid = src->ReadInt32();
+ _hits[i].setDistributionKey(src->ReadInt32());
+ } else {
+ _hits[i]._partid = 0; // partid not available
+ _hits[i].setDistributionKey(getDistributionKey());
+ }
+ }
+ len -= _numDocs * hitSize;
+
+ if ((_features & search::fs4transport::QRF_PROPERTIES) != 0) {
+ uint32_t sz = src->ReadInt32();
+ _propsVector.resize(sz);
+ len -= sizeof(uint32_t);
+ for (i = 0; i < sz; ++i) {
+ if (! _propsVector[i].decode(*src, len)) goto error;
+ }
+ }
+
+ if (len != 0) goto error;
+
+ SetRealPCODE();
+ return true; // OK
+
+ error:
+ src->DataToDead(len);
+ return false; // FAIL
+}
+
+
+vespalib::string
+FS4Packet_QUERYRESULTX::toString(uint32_t indent) const
+{
+ vespalib::string s;
+ uint32_t i;
+
+ s += make_string("%*sFS4Packet_QUERYRESULTX {\n", indent, "");
+ s += make_string("%*s pcode : %d\n", indent, "", _pcode);
+ s += make_string("%*s features : 0x%x\n", indent, "", _features);
+ s += make_string("%*s offset : %d\n", indent, "", _offset);
+ s += make_string("%*s numDocs : %d\n", indent, "", _numDocs);
+ s += make_string("%*s totNumDocs : %" PRIu64 "\n", indent, "", _totNumDocs);
+ s += make_string("%*s maxRank : %f\n", indent, "", _maxRank);
+ s += make_string("%*s distrib key : %d\n", indent, "", getDistributionKey());
+ if (_numDocs > 0 && _sortIndex != NULL) {
+ uint32_t offset = _sortIndex[0];
+ for (i = 0; i < _numDocs; i++) {
+ uint32_t end = _sortIndex[i + 1];
+ s += make_string("%*s sort[%d] = { 0x", indent, "", i);
+ for (; offset < end; offset++)
+ s += make_string("%02x", (unsigned char)*(_sortData + offset));
+ s += make_string(" }\n");
+ }
+ }
+ s += make_string("%*s aggrData : %d bytes\n", indent, "", _aggrDataLen);
+ s += make_string("%*s groupData : %d bytes\n", indent, "", _groupDataLen);
+ s += make_string("%*s coverageDocs : %" PRIu64 "\n", indent, "", _coverageDocs);
+ s += make_string("%*s activeDocs : %" PRIu64 "\n", indent, "", _activeDocs);
+ for (i = 0; i < _numDocs; i++) {
+ s += make_string("%*s hit {", indent, "");
+ s += make_string("gid=%s, ", _hits[i]._gid.toString().c_str());
+ s += make_string("metric=%f, ", _hits[i]._metric);
+ s += make_string("partid=%d, ", _hits[i]._partid);
+ s += make_string("distribkey=%d, ", _hits[i].getDistributionKey());
+ }
+ s += make_string("%*s}\n", indent, "");
+ return s;
+}
+
+//============================================================
+
+
+FS4Packet_QUERYX::FS4Packet_QUERYX(uint32_t pcode)
+ : FS4Packet(),
+ _pcode(pcode),
+ _timeout(0),
+ _features(0),
+ _offset(0),
+ _maxhits(0),
+ _qflags(0),
+ _ranking(),
+ _propsVector(),
+ _sortSpec(),
+ _aggrSpec(),
+ _groupSpec(),
+ _sessionId(),
+ _location(),
+ _numStackItems(0),
+ _stackDump()
+{
+ UpdateCompatFeatures();
+}
+
+
+FS4Packet_QUERYX::~FS4Packet_QUERYX()
+{
+}
+
+
+void
+FS4Packet_QUERYX::UpdateCompatPCODE()
+{
+ if (_features == search::fs4transport::QF_PARSEDQUERY2_MASK)
+ _pcode = search::fs4transport::PCODE_PARSEDQUERY2;
+ else
+ _pcode = search::fs4transport::PCODE_QUERYX;
+}
+
+
+void
+FS4Packet_QUERYX::UpdateCompatFeatures()
+{
+ if (_pcode == search::fs4transport::PCODE_PARSEDQUERY2)
+ _features = search::fs4transport::QF_PARSEDQUERY2_MASK;
+}
+
+
+uint32_t
+FS4Packet_QUERYX::GetLength()
+{
+ uint32_t plen = 2 * sizeof(uint32_t);
+ plen += FNET_DataBuffer::getCompressedPositiveLength(_offset);
+ plen += FNET_DataBuffer::getCompressedPositiveLength(_maxhits);
+ if (_pcode == search::fs4transport::PCODE_QUERYX)
+ plen += sizeof(uint32_t);
+
+ if ((_features & search::fs4transport::QF_PARSEDQUERY) != 0) {
+ plen += sizeof(uint32_t)*2;
+ plen += _stackDump.size();
+ }
+ if ((_features & search::fs4transport::QF_RANKP) != 0) {
+ plen += FNET_DataBuffer::getCompressedPositiveLength(_ranking.size());
+ plen += _ranking.size();
+ }
+ if ((_features & search::fs4transport::QF_PROPERTIES) != 0) {
+ plen += sizeof(uint32_t);
+ for (uint32_t i = 0; i < _propsVector.size(); ++i) {
+ plen += _propsVector[i].getLength();
+ }
+ }
+
+ if ((_features & search::fs4transport::QF_SORTSPEC) != 0)
+ plen += sizeof(uint32_t)
+ + _sortSpec.size();
+
+ if ((_features & search::fs4transport::QF_AGGRSPEC) != 0)
+ plen += sizeof(uint32_t)
+ + _aggrSpec.size();
+
+ if ((_features & search::fs4transport::QF_GROUPSPEC) != 0)
+ plen += sizeof(uint32_t)
+ + _groupSpec.size();
+
+ if ((_features & search::fs4transport::QF_SESSIONID) != 0)
+ plen += sizeof(uint32_t)
+ + _sessionId.size();
+
+ if ((_features & search::fs4transport::QF_LOCATION) != 0)
+ plen += sizeof(uint32_t)
+ + _location.size();
+
+ return plen;
+}
+
+
+void
+FS4Packet_QUERYX::Encode(FNET_DataBuffer *dst)
+{
+ if (_pcode == search::fs4transport::PCODE_QUERYX)
+ dst->WriteInt32Fast(_features);
+
+ dst->writeCompressedPositive(_offset);
+ dst->writeCompressedPositive(_maxhits);
+ dst->WriteInt32Fast(_timeout);
+ dst->WriteInt32Fast(_qflags);
+
+ if ((_features & search::fs4transport::QF_RANKP) != 0) {
+ dst->writeCompressedPositive(_ranking.size());
+ dst->WriteBytesFast(_ranking.c_str(), _ranking.size());
+ }
+
+ if ((_features & search::fs4transport::QF_PROPERTIES) != 0) {
+ dst->WriteInt32Fast(_propsVector.size());
+ for (uint32_t i = 0; i < _propsVector.size(); ++i) {
+ _propsVector[i].encode(*dst);
+ }
+ }
+
+ if ((_features & search::fs4transport::QF_SORTSPEC) != 0) {
+ dst->WriteInt32Fast(_sortSpec.size());
+ dst->WriteBytesFast(_sortSpec.c_str(), _sortSpec.size());
+ }
+
+ if ((_features & search::fs4transport::QF_AGGRSPEC) != 0) {
+ dst->WriteInt32Fast(_aggrSpec.size());
+ dst->WriteBytesFast(_aggrSpec.c_str(), _aggrSpec.size());
+ }
+
+ if ((_features & search::fs4transport::QF_GROUPSPEC) != 0) {
+ dst->WriteInt32Fast(_groupSpec.size());
+ dst->WriteBytesFast(_groupSpec.c_str(), _groupSpec.size());
+ }
+
+ if ((_features & search::fs4transport::QF_SESSIONID) != 0) {
+ dst->WriteInt32Fast(_sessionId.size());
+ dst->WriteBytesFast(_sessionId.c_str(), _sessionId.size());
+ }
+
+ if ((_features & search::fs4transport::QF_LOCATION) != 0) {
+ dst->WriteInt32Fast(_location.size());
+ dst->WriteBytesFast(_location.c_str(), _location.size());
+ }
+
+ if ((_features & search::fs4transport::QF_PARSEDQUERY) != 0) {
+ dst->WriteInt32Fast(_numStackItems);
+ dst->WriteInt32Fast(_stackDump.size());
+ dst->WriteBytesFast(_stackDump.c_str(), _stackDump.size());
+ }
+}
+
+void FS4Packet::throwPropertieDecodeError(size_t i)
+{
+ throw vespalib::IllegalArgumentException(vespalib::make_string("Failed decoding properties[%ld]", i));
+}
+
+void FS4Packet::throwUnsupportedFeatures(uint32_t features, uint32_t set)
+{
+ throw vespalib::UnderflowException(vespalib::make_string("Unsupported features(%x), supported set(%x)", features, set));
+}
+
+void FS4Packet::throwNotEnoughData(FNET_DataBuffer & buf, uint32_t left, uint32_t needed, const char * text)
+{
+ (void) buf;
+ throw vespalib::UnderflowException(vespalib::make_string("Failed decoding packet of type %d. Only %d bytes left, needed %d from '%s'", GetPCODE(), left, needed, text));
+}
+
+#define VERIFY_LEN(needed, text) \
+ { \
+ if (len < needed) { \
+ throwNotEnoughData(*src, len, needed, text); \
+ } \
+ len -= needed; \
+ }
+
+uint32_t FS4Packet::readUInt32(FNET_DataBuffer & buf, uint32_t & len, const char *text)
+{
+ if (len < sizeof(uint32_t)) {
+ throwNotEnoughData(buf, len, sizeof(uint32_t), text); \
+ }
+ len -= sizeof(uint32_t);
+ return buf.ReadInt32();
+}
+
+void
+FS4Packet_GETDOCSUMSX::setTimeout(const fastos::TimeStamp & timeout)
+{
+ _timeout = std::max(0l, timeout.ms());
+}
+
+fastos::TimeStamp
+FS4Packet_GETDOCSUMSX::getTimeout() const
+{
+ return fastos::TimeStamp(_timeout*fastos::TimeStamp::MS);
+}
+
+void
+FS4Packet_QUERYX::setTimeout(const fastos::TimeStamp & timeout)
+{
+ _timeout = std::max(0l, timeout.ms());
+}
+
+fastos::TimeStamp
+FS4Packet_QUERYX::getTimeout() const
+{
+ return fastos::TimeStamp(_timeout*fastos::TimeStamp::MS);
+}
+
+bool
+FS4Packet_QUERYX::Decode(FNET_DataBuffer *src, uint32_t len)
+{
+ if (_pcode == search::fs4transport::PCODE_QUERYX) {
+ _features = readUInt32(*src, len, "features");
+ }
+
+ if (((_features & ~search::fs4transport::FNET_QF_SUPPORTED_MASK) != 0)) {
+ throwUnsupportedFeatures(_features, search::fs4transport::FNET_QF_SUPPORTED_MASK);
+ }
+ _offset = src->readCompressedPositiveInteger();
+ len -= FNET_DataBuffer::getCompressedPositiveLength(_offset);
+ _maxhits = src->readCompressedPositiveInteger();
+ len -= FNET_DataBuffer::getCompressedPositiveLength(_maxhits);
+ VERIFY_LEN(2 * sizeof(uint32_t), "offset, maxhits, timeout and qflags");
+ _timeout = src->ReadInt32();
+ _qflags = src->ReadInt32();
+
+ if ((_features & search::fs4transport::QF_RANKP) != 0) {
+ uint32_t rankingLen = src->readCompressedPositiveInteger();
+ len -= FNET_DataBuffer::getCompressedPositiveLength(rankingLen);
+ VERIFY_LEN(rankingLen, "ranking blob");
+ setRanking(stringref(src->GetData(), rankingLen));
+ src->DataToDead(rankingLen);
+ }
+
+ if ((_features & search::fs4transport::QF_PROPERTIES) != 0) {
+ uint32_t cnt = readUInt32(*src, len, "#properties");
+ _propsVector.resize(cnt);
+ for (uint32_t i = 0; i < cnt; ++i) {
+ if (!_propsVector[i].decode(*src, len)) {
+ throwPropertieDecodeError(i);
+ }
+ }
+ }
+
+ if ((_features & search::fs4transport::QF_SORTSPEC) != 0) {
+ uint32_t sortSpecLen = readUInt32(*src, len, "sortspec length");
+
+ VERIFY_LEN(sortSpecLen, "sortspec string");
+ setSortSpec(stringref(src->GetData(), sortSpecLen));
+ src->DataToDead(sortSpecLen);
+ }
+
+ if ((_features & search::fs4transport::QF_AGGRSPEC) != 0) {
+ uint32_t aggrSpecLen = readUInt32(*src, len, "aggrspec length");
+
+ VERIFY_LEN(aggrSpecLen, "aggrspec string");
+ setAggrSpec(stringref(src->GetData(), aggrSpecLen));
+ src->DataToDead(aggrSpecLen);
+ }
+
+ if ((_features & search::fs4transport::QF_GROUPSPEC) != 0) {
+ uint32_t groupSpecLen = readUInt32(*src, len, "groupspec length");
+
+ VERIFY_LEN(groupSpecLen, "groupspec string");
+ setGroupSpec(stringref(src->GetData(), groupSpecLen));
+ src->DataToDead(groupSpecLen);
+ }
+
+ if ((_features & search::fs4transport::QF_SESSIONID) != 0) {
+ uint32_t sessionIdLen = readUInt32(*src, len, "sessionid length");
+ VERIFY_LEN(sessionIdLen, "sessionid string");
+ setSessionId(stringref(src->GetData(), sessionIdLen));
+ src->DataToDead(sessionIdLen);
+ }
+
+ if ((_features & search::fs4transport::QF_LOCATION) != 0) {
+ uint32_t locationLen = readUInt32(*src, len, "location length");
+
+ VERIFY_LEN(locationLen, "location string");
+ setLocation(stringref(src->GetData(), locationLen));
+ src->DataToDead(locationLen);
+ }
+
+ if ((_features & search::fs4transport::QF_WARMUP) != 0) {
+ (void) readUInt32(*src, len, "warmup");
+ }
+
+ if ((_features & search::fs4transport::QF_PARSEDQUERY) != 0) {
+ _numStackItems = readUInt32(*src, len, "# querystack items");
+
+ uint32_t stackDumpLen = readUInt32(*src, len, "stackdump length");
+ VERIFY_LEN(stackDumpLen, "stackdump");
+ setStackDump(stringref(src->GetData(), stackDumpLen));
+ src->DataToDead(stackDumpLen);
+ }
+ if (len != 0) {
+ throwNotEnoughData(*src, len, 0, "eof");
+ }
+
+ SetRealPCODE();
+ return true; // OK
+}
+
+
+vespalib::string
+FS4Packet_QUERYX::toString(uint32_t indent) const
+{
+ vespalib::string s;
+ s += make_string("%*sFS4Packet_QUERYX {\n", indent, "");
+ s += make_string("%*s pcode : %d\n", indent, "", _pcode);
+ s += make_string("%*s features : 0x%x\n", indent, "", _features);
+ s += make_string("%*s offset : %d\n", indent, "", _offset);
+ s += make_string("%*s maxhits : %d\n", indent, "", _maxhits);
+ s += make_string("%*s qflags : %x\n", indent, "", _qflags);
+ s += make_string("%*s ranking : %s\n", indent, "", _ranking.c_str());
+ for (uint32_t i = 0; i < _propsVector.size(); ++i) {
+ s += _propsVector[i].toString(indent + 2);
+ }
+ s += make_string("%*s sortspec : %s\n", indent, "", _sortSpec.c_str());
+ s += make_string("%*s aggrspec : %s\n", indent, "", _aggrSpec.c_str());
+ s += make_string("%*s groupspec : (%d bytes)\n", indent, "", (int)_groupSpec.size());
+ s += make_string("%*s sessionId : (%d bytes)\n", indent, "", (int)_sessionId.size());
+ s += make_string("%*s location : %s\n", indent, "", _location.c_str());
+ s += make_string("%*s timeout : %d\n", indent, "", _timeout);
+ s += make_string("%*s stackitems : %d\n", indent, "", _numStackItems);
+ s += make_string("%*s stack dump :\n", indent, "");
+ if (_stackDump.size() > 0) {
+ const char *pt = _stackDump.c_str();
+ s += make_string("%*s ", indent, "");
+ uint32_t i = 0;
+ while (i < _stackDump.size()) {
+ s += make_string("%x ", (unsigned char) pt[i]);
+ if ((++i % 16) == 0 && i < _stackDump.size()) {
+ s += make_string("\n%*s ", indent, "");
+ }
+ }
+ if ((i % 16) != 0) s += make_string("\n");
+ }
+ s += make_string("%*s}\n", indent, "");
+ return s;
+}
+
+//============================================================
+
+
+void
+FS4Packet_GETDOCSUMSX::AllocateDocIDs(uint32_t cnt)
+{
+ if (cnt == 0)
+ return;
+
+ _docid = new FS4_docid[cnt];
+ _docidCnt = cnt;
+}
+
+
+FS4Packet_GETDOCSUMSX::FS4Packet_GETDOCSUMSX(uint32_t pcode)
+ : FS4Packet(),
+ _pcode(pcode),
+ _timeout(0),
+ _features(0),
+ _ranking(),
+ _qflags(0),
+ _resultClassName(),
+ _propsVector(),
+ _stackItems(0),
+ _stackDump(),
+ _location(),
+ _flags(0u),
+ _docid(NULL),
+ _docidCnt(0)
+{
+ UpdateCompatFeatures();
+}
+
+
+FS4Packet_GETDOCSUMSX::~FS4Packet_GETDOCSUMSX()
+{
+ delete [] _docid;
+}
+
+
+void
+FS4Packet_GETDOCSUMSX::UpdateCompatPCODE()
+{
+ if (_features == search::fs4transport::GDF_GETDOCSUMS_MASK)
+ _pcode = search::fs4transport::PCODE_GETDOCSUMS;
+ else if (_features == search::fs4transport::GDF_MLD_GETDOCSUMS_MASK)
+ _pcode = search::fs4transport::PCODE_MLD_GETDOCSUMS;
+ else
+ _pcode = search::fs4transport::PCODE_GETDOCSUMSX;
+}
+
+
+void
+FS4Packet_GETDOCSUMSX::UpdateCompatFeatures()
+{
+ if (_pcode == search::fs4transport::PCODE_GETDOCSUMS)
+ _features = search::fs4transport::GDF_GETDOCSUMS_MASK;
+ else if (_pcode == search::fs4transport::PCODE_MLD_GETDOCSUMS)
+ _features = search::fs4transport::GDF_MLD_GETDOCSUMS_MASK;
+}
+
+
+uint32_t
+FS4Packet_GETDOCSUMSX::GetLength()
+{
+ uint32_t plen = 2 * sizeof(uint32_t) +
+ + _docidCnt * (sizeof(document::GlobalId));
+
+ if (_pcode == search::fs4transport::PCODE_GETDOCSUMSX)
+ plen += sizeof(uint32_t);
+
+ if ((_features & search::fs4transport::GDF_MLD) != 0)
+ plen += 2 * _docidCnt * sizeof(uint32_t);
+
+ if ((_features & search::fs4transport::GDF_QUERYSTACK) != 0)
+ plen += 2 * sizeof(uint32_t) + _stackDump.size();
+
+ if ((_features & search::fs4transport::GDF_RESCLASSNAME) != 0)
+ plen += sizeof(uint32_t) + _resultClassName.size();
+
+ if ((_features & search::fs4transport::GDF_PROPERTIES) != 0) {
+ plen += sizeof(uint32_t);
+ for (uint32_t i = 0; i < _propsVector.size(); ++i) {
+ plen += _propsVector[i].getLength();
+ }
+ }
+
+ if ((_features & search::fs4transport::GDF_RANKP_QFLAGS) != 0) {
+ plen += FNET_DataBuffer::getCompressedPositiveLength(_ranking.size());
+ plen += _ranking.size();
+ plen += sizeof(uint32_t);
+ }
+
+ if ((_features & search::fs4transport::GDF_LOCATION) != 0)
+ plen += sizeof(uint32_t)
+ + _location.size();
+
+ if ((_features & search::fs4transport::GDF_FLAGS) != 0)
+ plen += sizeof(uint32_t);
+
+ return plen;
+}
+
+
+void
+FS4Packet_GETDOCSUMSX::Encode(FNET_DataBuffer *dst)
+{
+ if (_pcode == search::fs4transport::PCODE_GETDOCSUMSX)
+ dst->WriteInt32Fast(_features);
+
+ dst->WriteInt32Fast(0);
+ dst->WriteInt32Fast(_timeout);
+
+ if ((_features & search::fs4transport::GDF_RANKP_QFLAGS) != 0) {
+ dst->writeCompressedPositive(_ranking.size());
+ dst->WriteBytesFast(_ranking.c_str(), _ranking.size());
+ dst->WriteInt32Fast(_qflags);
+ }
+
+ if ((_features & search::fs4transport::GDF_RESCLASSNAME) != 0) {
+ writeLenString(dst, _resultClassName);
+ }
+
+ if ((_features & search::fs4transport::GDF_PROPERTIES) != 0) {
+ dst->WriteInt32Fast(_propsVector.size());
+ for (uint32_t i = 0; i < _propsVector.size(); ++i) {
+ _propsVector[i].encode(*dst);
+ }
+ }
+
+ if ((_features & search::fs4transport::GDF_QUERYSTACK) != 0) {
+ dst->WriteInt32Fast(_stackItems);
+ writeLenString(dst, _stackDump);
+ }
+
+ if ((_features & search::fs4transport::GDF_LOCATION) != 0) {
+ writeLenString(dst, _location);
+ }
+
+ if ((_features & search::fs4transport::GDF_FLAGS) != 0) {
+ dst->WriteInt32Fast(_flags);
+ }
+
+ for (uint32_t i = 0; i < _docidCnt; i++) {
+ dst->WriteBytesFast(_docid[i]._gid.get(), document::GlobalId::LENGTH);
+
+ if ((_features & search::fs4transport::GDF_MLD) != 0) {
+ dst->WriteInt32Fast(_docid[i]._partid);
+ dst->WriteInt32Fast(0);
+ }
+ }
+}
+
+
+bool
+FS4Packet_GETDOCSUMSX::Decode(FNET_DataBuffer *src, uint32_t len)
+{
+ uint32_t docidSize = sizeof(document::GlobalId);
+
+ if (_pcode == search::fs4transport::PCODE_GETDOCSUMSX) {
+ _features = readUInt32(*src, len, "features");
+ }
+
+ if ((_features & ~search::fs4transport::FNET_GDF_SUPPORTED_MASK) != 0) {
+ throwUnsupportedFeatures(_features, search::fs4transport::FNET_GDF_SUPPORTED_MASK);
+ }
+
+ VERIFY_LEN(2*sizeof(uint32_t), "unused and timeout");
+ src->ReadInt32(); // unused
+ _timeout = src->ReadInt32();
+
+ if ((_features & search::fs4transport::GDF_RANKP_QFLAGS) != 0) {
+ uint32_t rankingLen = src->readCompressedPositiveInteger();
+ len -= FNET_DataBuffer::getCompressedPositiveLength(rankingLen);
+
+ VERIFY_LEN(rankingLen, "ranking blob");
+ setRanking(vespalib::stringref(src->GetData(), rankingLen));
+ src->DataToDead(rankingLen);
+
+ _qflags = readUInt32(*src, len, "qflags");
+ }
+
+ if ((_features & search::fs4transport::GDF_RESCLASSNAME) != 0) {
+ uint32_t resultClassNameLen = readUInt32(*src, len, "result class name length");
+
+ VERIFY_LEN(resultClassNameLen, "result class");
+ setResultClassName(stringref(src->GetData(), resultClassNameLen));
+ src->DataToDead(resultClassNameLen);
+ }
+
+ if ((_features & search::fs4transport::GDF_PROPERTIES) != 0) {
+ uint32_t cnt = readUInt32(*src, len, "#properties");
+ _propsVector.resize(cnt);
+ for (uint32_t i = 0; i < cnt; ++i) {
+ if (!_propsVector[i].decode(*src, len)) {
+ throwPropertieDecodeError(i);
+ }
+ }
+ }
+
+ if ((_features & search::fs4transport::GDF_QUERYSTACK) != 0) {
+ _stackItems = readUInt32(*src, len, "num stack items");
+ uint32_t stackDumpLen = readUInt32(*src, len, "stackdump length");
+ VERIFY_LEN(stackDumpLen, "stackdump");
+ setStackDump(stringref(src->GetData(), stackDumpLen));
+ src->DataToDead(stackDumpLen);
+ }
+
+ if ((_features & search::fs4transport::GDF_LOCATION) != 0) {
+ uint32_t locationLen = readUInt32(*src, len, "location length");
+ VERIFY_LEN(locationLen, "location string");
+ setLocation(stringref(src->GetData(), locationLen));
+ src->DataToDead(locationLen);
+ }
+
+ if ((_features & search::fs4transport::GDF_FLAGS) != 0) {
+ _flags = readUInt32(*src, len, "flags");
+ }
+
+ if ((_features & search::fs4transport::GDF_MLD) != 0)
+ docidSize += 2 * sizeof(uint32_t);
+
+ _docidCnt = len / docidSize;
+ AllocateDocIDs(_docidCnt);
+
+ unsigned char rawGid[document::GlobalId::LENGTH];
+ for (uint32_t i = 0; i < _docidCnt; i++) {
+ src->ReadBytes(rawGid, document::GlobalId::LENGTH);
+ _docid[i]._gid.set(rawGid);
+
+ if ((_features & search::fs4transport::GDF_MLD) != 0) {
+ _docid[i]._partid = src->ReadInt32();
+ src->ReadInt32(); // unused
+ } else {
+ _docid[i]._partid = 0; // partid not available
+ }
+ }
+ len -= _docidCnt * docidSize;
+
+ if (len != 0) {
+ throwNotEnoughData(*src, len, 0, "eof");
+ }
+
+ SetRealPCODE();
+ return true; // OK
+}
+
+
+vespalib::string
+FS4Packet_GETDOCSUMSX::toString(uint32_t indent) const
+{
+ vespalib::string s;
+ s += make_string("%*sFS4Packet_GETDOCSUMSX {\n", indent, "");
+ s += make_string("%*s features : %d\n", indent, "", _features);
+ s += make_string("%*s ranking : %s\n", indent, "", _ranking.c_str());
+ s += make_string("%*s qflags : %x\n", indent, "", _qflags);
+ s += make_string("%*s resClassName: %s\n", indent, "", _resultClassName.c_str());
+ for (uint32_t i = 0; i < _propsVector.size(); ++i) {
+ s += _propsVector[i].toString(indent + 2);
+ }
+ s += make_string("%*s stackItems : %d\n", indent, "", _stackItems);
+ s += make_string("%*s stackDumpLen : %d\n", indent, "", (int)_stackDump.size());
+ s += make_string("%*s stackDump :\n", indent, "");
+
+ uint32_t i = 0;
+ if (_stackDump.size() > 0) {
+ const char *pt = _stackDump.c_str();
+ s += make_string("%*s ", indent, "");
+ while (i < _stackDump.size()) {
+ s += make_string("%x ", (unsigned char) pt[i]);
+ if ((++i % 16) == 0)
+ s += make_string("\n%*s ", indent, "");
+ }
+ if ((i % 16) != 0) s += make_string("\n");
+ }
+ for (i = 0; i < _docidCnt; i++) {
+ s += make_string("%*s gid=%s, partid=%d\n", indent, "",
+ _docid[i]._gid.toString().c_str(), _docid[i]._partid);
+ }
+ s += make_string("%*s location : %s\n", indent, "", _location.c_str());
+ s += make_string("%*s timeout : %d\n", indent, "", _timeout);
+ s += make_string("%*s flags : %d\n", indent, "", _flags);
+ s += make_string("%*s}\n", indent, "");
+ return s;
+}
+
+//============================================================
+
+uint32_t
+FS4Packet_TRACEREPLY::GetLength()
+{
+ uint32_t plen = sizeof(uint32_t);
+ for (uint32_t i = 0; i < _propsVector.size(); ++i) {
+ plen += _propsVector[i].getLength();
+ }
+ return plen;
+}
+
+void
+FS4Packet_TRACEREPLY::Encode(FNET_DataBuffer *dst)
+{
+ dst->WriteInt32Fast(_propsVector.size());
+ for (uint32_t i = 0; i < _propsVector.size(); ++i) {
+ _propsVector[i].encode(*dst);
+ }
+}
+
+bool
+FS4Packet_TRACEREPLY::Decode(FNET_DataBuffer *src, uint32_t len)
+{
+ uint32_t cnt = readUInt32(*src, len, "#properties");
+ _propsVector.resize(cnt);
+ for (uint32_t i = 0; i < cnt; ++i) {
+ if (!_propsVector[i].decode(*src, len)) {
+ throwPropertieDecodeError(i);
+ }
+ }
+ if (len != 0) goto error;
+ return true; // OK
+ error:
+ src->DataToDead(len);
+ return false; // FAIL
+}
+
+vespalib::string
+FS4Packet_TRACEREPLY::toString(uint32_t indent) const
+{
+ vespalib::string s;
+ s += make_string("%*sFS4Packet_TRACEREPLY {\n", indent, "");
+ for (uint32_t i = 0; i < _propsVector.size(); ++i) {
+ s += _propsVector[i].toString(indent + 2);
+ }
+ s += make_string("%*s}\n", indent, "");
+ return s;
+}
+
+
+//============================================================
+
+FNET_Packet*
+FS4PacketFactory::CreateFS4Packet(uint32_t pcode)
+{
+ switch(pcode) {
+ case search::fs4transport::PCODE_EOL:
+ return new FS4Packet_EOL;
+ case search::fs4transport::PCODE_QUERYRESULT:
+ return new FS4Packet_QUERYRESULTX(search::fs4transport::
+ PCODE_QUERYRESULT);
+ case search::fs4transport::PCODE_ERROR:
+ return new FS4Packet_ERROR;
+ case search::fs4transport::PCODE_GETDOCSUMS:
+ return new FS4Packet_GETDOCSUMSX(search::fs4transport::
+ PCODE_GETDOCSUMS);
+ case search::fs4transport::PCODE_DOCSUM:
+ return new FS4Packet_DOCSUM;
+ case search::fs4transport::PCODE_MONITORQUERY:
+ return new FS4Packet_MONITORQUERYX(search::fs4transport::
+ PCODE_MONITORQUERY);
+ case search::fs4transport::PCODE_MONITORRESULT:
+ return new FS4Packet_MONITORRESULTX(search::fs4transport::
+ PCODE_MONITORRESULT);
+ case search::fs4transport::PCODE_MLD_QUERYRESULT:
+ return new FS4Packet_QUERYRESULTX(search::fs4transport::
+ PCODE_MLD_QUERYRESULT);
+ case search::fs4transport::PCODE_MLD_GETDOCSUMS:
+ return new FS4Packet_GETDOCSUMSX(search::fs4transport::
+ PCODE_MLD_GETDOCSUMS);
+ case search::fs4transport::PCODE_MLD_MONITORRESULT:
+ return new FS4Packet_MONITORRESULTX(search::fs4transport::
+ PCODE_MLD_MONITORRESULT);
+ case search::fs4transport::PCODE_CLEARCACHES:
+ return new FS4Packet_CLEARCACHES;
+ case search::fs4transport::PCODE_PARSEDQUERY2:
+ return new FS4Packet_QUERYX(search::fs4transport::PCODE_PARSEDQUERY2);
+ case search::fs4transport::PCODE_QUEUELEN:
+ return new FS4Packet_QUEUELEN;
+ case search::fs4transport::PCODE_QUERYRESULTX:
+ return new FS4Packet_QUERYRESULTX;
+ case search::fs4transport::PCODE_QUERYX:
+ return new FS4Packet_QUERYX;
+ case search::fs4transport::PCODE_GETDOCSUMSX:
+ return new FS4Packet_GETDOCSUMSX;
+ case search::fs4transport::PCODE_MONITORQUERYX:
+ return new FS4Packet_MONITORQUERYX;
+ case search::fs4transport::PCODE_MONITORRESULTX:
+ return new FS4Packet_MONITORRESULTX;
+ case search::fs4transport::PCODE_TRACEREPLY:
+ return new FS4Packet_TRACEREPLY;
+ default:
+ return NULL;
+ }
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/common/packets.h b/searchlib/src/vespa/searchlib/common/packets.h
new file mode 100644
index 00000000000..f3ea8e5b225
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/packets.h
@@ -0,0 +1,593 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/fnet/fnet.h>
+#include <vespa/vespalib/util/memory.h>
+#include <vespa/document/base/globalid.h>
+#include <vespa/document/util/compressionconfig.h>
+#include <vespa/searchlib/common/transport.h>
+#include <vespa/searchlib/common/hitrank.h>
+
+namespace search
+{
+
+namespace fs4transport
+{
+using vespalib::string;
+
+enum fnet_feature_masks {
+ FNET_QRF_SUPPORTED_MASK = (QRF_MLD |
+ QRF_SORTDATA |
+ QRF_AGGRDATA |
+ QRF_COVERAGE |
+ QRF_GROUPDATA |
+ QRF_PROPERTIES),
+
+ FNET_QF_SUPPORTED_MASK = (QF_PARSEDQUERY |
+ QF_RANKP |
+ QF_SORTSPEC |
+ QF_AGGRSPEC |
+ QF_LOCATION |
+ QF_PROPERTIES |
+ QF_GROUPSPEC |
+ QF_SESSIONID),
+
+ FNET_GDF_SUPPORTED_MASK = (GDF_MLD |
+ GDF_QUERYSTACK |
+ GDF_RANKP_QFLAGS |
+ GDF_LOCATION |
+ GDF_RESCLASSNAME |
+ GDF_PROPERTIES |
+ GDF_FLAGS),
+
+ FNET_MQF_SUPPORTED_MASK = (MQF_QFLAGS),
+
+ FNET_MRF_SUPPORTED_MASK = (MRF_MLD | MRF_RFLAGS | MRF_ACTIVEDOCS)
+};
+
+enum pcode_mask {
+ PCODE_MASK = 0x00ffffff
+};
+
+//==========================================================================
+
+class PacketArray
+{
+private:
+ PacketArray(const PacketArray &);
+ PacketArray& operator=(const PacketArray &);
+
+ FNET_Packet **_extArray;
+ FNET_Packet **_array;
+ uint32_t _size;
+ uint32_t _used;
+
+public:
+ PacketArray(FNET_Packet **arr = NULL,
+ uint32_t size = 0)
+ : _extArray(arr),
+ _array(arr),
+ _size(size),
+ _used(0) {}
+ ~PacketArray()
+ {
+ if (_array != _extArray)
+ delete [] _array;
+ }
+ void Add(FNET_Packet *packet)
+ {
+ if (_used == _size) {
+ _size *= 2;
+ if (_size < 16)
+ _size = 16;
+ FNET_Packet **newArray = new FNET_Packet*[_size];
+ for (uint32_t i = 0; i < _used; i++)
+ newArray[i] = _array[i];
+ if (_array != _extArray)
+ delete [] _array;
+ _array = newArray;
+ }
+ _array[_used++] = packet;
+ }
+ FNET_Packet **Array() const { return _array; }
+ uint32_t Length() const { return _used; }
+};
+
+//==========================================================================
+
+class FS4PacketFactory
+{
+public:
+ typedef FNET_Packet *(* CreatePacket_t)(uint32_t pcode);
+
+ static FNET_Packet *CreateFS4Packet(uint32_t pcode);
+};
+
+//==========================================================================
+
+class FS4PersistentPacketStreamer : public FNET_IPacketStreamer {
+ FS4PersistentPacketStreamer(const FS4PersistentPacketStreamer &);
+ FS4PersistentPacketStreamer& operator=(const FS4PersistentPacketStreamer &);
+
+ unsigned int _compressionLimit;
+ unsigned int _compressionLevel;
+ document::CompressionConfig::Type _compressionType;
+protected:
+ bool _conservative; // Set to true if out of sync should mark the
+ // stream as broken.
+ FS4PacketFactory::CreatePacket_t _createPacket;
+
+ bool HasChannelID(uint32_t pcode);
+ bool ValidPCode(uint32_t pcode) const {
+ return ((pcode & PCODE_MASK) >= PCODE_EOL)
+ && ((pcode & PCODE_MASK) < PCODE_LastCode);
+ }
+
+public:
+ static FS4PersistentPacketStreamer Instance;
+
+ FS4PersistentPacketStreamer(FS4PacketFactory::CreatePacket_t cp);
+
+ bool GetPacketInfo(FNET_DataBuffer *src, uint32_t *plen,
+ uint32_t *pcode, uint32_t *chid, bool *broken);
+ FNET_Packet *Decode(FNET_DataBuffer *src, uint32_t plen,
+ uint32_t pcode, FNET_Context context);
+ void Encode(FNET_Packet *packet, uint32_t chid, FNET_DataBuffer *dst);
+
+ void SetConservativeMode(bool cons) { _conservative = cons; }
+ void SetCompressionLimit(unsigned int limit) { _compressionLimit = limit; }
+ void SetCompressionLevel(unsigned int level) { _compressionLevel = level; }
+ void SetCompressionType(document::CompressionConfig::Type compressionType) { _compressionType = compressionType; }
+ document::CompressionConfig::Type getCompressionType() const { return _compressionType; }
+ uint32_t getCompressionLimit() const { return _compressionLimit; }
+ uint32_t getCompressionLevel() const { return _compressionLevel; }
+};
+
+//==========================================================================
+
+class FS4Properties
+{
+private:
+ typedef std::pair<uint32_t, uint32_t> StringRef;
+ typedef std::pair<StringRef, StringRef> Entry;
+ typedef std::vector<Entry> KeyValueVector;
+
+ KeyValueVector _entries;
+ vespalib::string _name;
+ vespalib::string _backing;
+ const char * c_str(size_t sz) const { return _backing.c_str() + sz; }
+ void set(StringRef & e, const vespalib::stringref & s);
+public:
+ FS4Properties(FS4Properties &&);
+ FS4Properties &operator=(FS4Properties &&);
+
+ FS4Properties();
+ ~FS4Properties();
+ void allocEntries(uint32_t cnt);
+ void setName(const char *name, uint32_t nameSize) { _name.assign(name, nameSize); }
+ void setName(const vespalib::stringref &val) {
+ setName(val.data(), val.size());
+ }
+ void setKey(uint32_t entry, const char *key, uint32_t keySize);
+ void setKey(uint32_t entry, const vespalib::stringref &val) {
+ setKey(entry, val.data(), val.size());
+ }
+ void setValue(uint32_t entry, const char *value, uint32_t valueSize);
+ void setValue(uint32_t entry, const vespalib::stringref &val) {
+ setValue(entry, val.data(), val.size());
+ }
+ uint32_t size() const { return _entries.size(); }
+ const char *getName() const { return _name.c_str(); }
+ uint32_t getNameLen() const { return _name.size(); }
+ const char *getKey(uint32_t entry) const { return c_str(_entries[entry].first.first); }
+ uint32_t getKeyLen(uint32_t entry) const { return _entries[entry].first.second; }
+ const char *getValue(uint32_t entry) const { return c_str(_entries[entry].second.first); }
+ uint32_t getValueLen(uint32_t entry) const { return _entries[entry].second.second; }
+
+ // sub-packet methods below
+ uint32_t getLength();
+ void encode(FNET_DataBuffer &dst);
+ bool decode(FNET_DataBuffer &src, uint32_t &len);
+ vespalib::string toString(uint32_t indent = 0) const;
+};
+
+//==========================================================================
+
+typedef std::vector<FS4Properties> PropsVector;
+
+//==========================================================================
+
+class FS4Packet : public FNET_Packet
+{
+private:
+ FS4Packet(const FS4Packet &);
+ FS4Packet& operator=(const FS4Packet &);
+
+public:
+ FS4Packet();
+ ~FS4Packet();
+ vespalib::string Print(uint32_t indent) override;
+ void Free() override;
+ virtual vespalib::string toString(uint32_t indent) const = 0;
+protected:
+ uint32_t readUInt32(FNET_DataBuffer & buf, uint32_t & len, const char *text) __attribute__((noinline));
+ void throwNotEnoughData(FNET_DataBuffer & buf, uint32_t left, uint32_t needed, const char * text) __attribute__((noinline));
+ void throwUnsupportedFeatures(uint32_t features, uint32_t set) __attribute__((noinline));
+ void throwPropertieDecodeError(size_t i) __attribute__((noinline));
+};
+
+//==========================================================================
+
+class FS4Packet_EOL : public FS4Packet
+{
+public:
+ FS4Packet_EOL();
+ ~FS4Packet_EOL();
+ uint32_t GetPCODE() override { return PCODE_EOL; }
+ uint32_t GetLength() override;
+ void Encode(FNET_DataBuffer *dst) override;
+ bool Decode(FNET_DataBuffer *src, uint32_t len) override;
+ vespalib::string toString(uint32_t indent) const override;
+};
+
+class FS4Packet_PreSerialized : public FS4Packet
+{
+public:
+ FS4Packet_PreSerialized(FNET_Packet & packet);
+ ~FS4Packet_PreSerialized();
+ uint32_t GetPCODE() override;
+ uint32_t GetLength() override;
+ void Encode(FNET_DataBuffer *dst) override;
+ bool Decode(FNET_DataBuffer *src, uint32_t len) override;
+ vespalib::string toString(uint32_t indent) const override;
+private:
+ uint32_t _pcode;
+ document::CompressionConfig::Type _compressionType;
+ FNET_DataBuffer _data;
+};
+
+class FS4Packet_Shared : public FS4Packet
+{
+public:
+ FS4Packet_Shared(FNET_Packet::SP packet);
+ ~FS4Packet_Shared();
+ uint32_t GetPCODE() override;
+ uint32_t GetLength() override;
+ void Encode(FNET_DataBuffer *dst) override;
+ bool Decode(FNET_DataBuffer *, uint32_t ) override;
+ vespalib::string toString(uint32_t indent) const override;
+private:
+ FNET_Packet::SP _packet;
+};
+
+//==========================================================================
+
+class FS4Packet_ERROR : public FS4Packet
+{
+private:
+ FS4Packet_ERROR(const FS4Packet_ERROR &);
+ FS4Packet_ERROR& operator=(const FS4Packet_ERROR &);
+
+public:
+ uint32_t _errorCode;
+ string _message;
+
+ void setErrorMessage(const vespalib::stringref &msg) { _message = msg; }
+
+ FS4Packet_ERROR();
+ ~FS4Packet_ERROR();
+ uint32_t GetPCODE() override { return PCODE_ERROR; }
+ uint32_t GetLength() override;
+ void Encode(FNET_DataBuffer *dst) override;
+ bool Decode(FNET_DataBuffer *src, uint32_t len) override;
+ vespalib::string toString(uint32_t indent) const override;
+};
+
+//==========================================================================
+
+class FS4Packet_DOCSUM : public FS4Packet
+{
+public:
+ typedef vespalib::MallocPtr Buf;
+private:
+ FS4Packet_DOCSUM(const FS4Packet_DOCSUM &);
+ FS4Packet_DOCSUM& operator=(const FS4Packet_DOCSUM &);
+
+ document::GlobalId _gid;
+ Buf _buf;
+public:
+ FS4Packet_DOCSUM();
+ ~FS4Packet_DOCSUM();
+ const Buf & getBuf() const { return _buf; }
+ void swapBuf(Buf & other) { _buf.swap(other); }
+ void setGid(const document::GlobalId & gid) { _gid = gid; }
+ const document::GlobalId & getGid() const { return _gid; }
+ bool empty() const { return _buf.empty(); }
+ void SetBuf(const char *buf, uint32_t len);
+ uint32_t GetPCODE() override { return PCODE_DOCSUM; }
+ uint32_t GetLength() override { return sizeof(_gid) + _buf.size(); }
+ void Encode(FNET_DataBuffer *dst) override;
+ bool Decode(FNET_DataBuffer *src, uint32_t len) override;
+ vespalib::string toString(uint32_t indent) const override;
+};
+
+//==========================================================================
+
+class FS4Packet_MONITORQUERYX : public FS4Packet
+{
+ FS4Packet_MONITORQUERYX(const FS4Packet_MONITORQUERYX &);
+ FS4Packet_MONITORQUERYX& operator=(const FS4Packet_MONITORQUERYX &);
+
+ uint32_t _pcode;
+public:
+ uint32_t _features; // see monitorquery_features
+ uint32_t _qflags; // if MQF_QFLAGS
+
+ FS4Packet_MONITORQUERYX(uint32_t pcode = PCODE_MONITORQUERYX);
+ ~FS4Packet_MONITORQUERYX();
+ void UpdateCompatPCODE();
+ void UpdateCompatFeatures();
+ void SetRealPCODE(void) { _pcode = PCODE_MONITORQUERYX; }
+ uint32_t GetPCODE() override { return _pcode; }
+ uint32_t GetLength() override;
+ void Encode(FNET_DataBuffer *dst) override;
+ bool Decode(FNET_DataBuffer *src, uint32_t len) override;
+ vespalib::string toString(uint32_t indent) const override;
+};
+
+//==========================================================================
+
+class FS4Packet_MONITORRESULTX : public FS4Packet
+{
+private:
+ FS4Packet_MONITORRESULTX(const FS4Packet_MONITORRESULTX &);
+ FS4Packet_MONITORRESULTX& operator=(const FS4Packet_MONITORRESULTX &);
+
+ uint32_t _pcode;
+public:
+ uint32_t _features; // see monitor
+ uint32_t _partid;
+ uint32_t _timestamp;
+
+ uint32_t _totalNodes; // if MRF_MLD
+ uint32_t _activeNodes; // if MRF_MLD
+ uint32_t _totalParts; // if MRF_MLD
+ uint32_t _activeParts; // if MRF_MLD
+
+ uint32_t _rflags; // if MRF_RFLAGS
+ uint64_t _activeDocs; // if MRF_ACTIVEDOCS
+
+ FS4Packet_MONITORRESULTX(uint32_t pcode = PCODE_MONITORRESULTX);
+ ~FS4Packet_MONITORRESULTX();
+ void UpdateCompatPCODE();
+ void UpdateCompatFeatures();
+ void SetRealPCODE(void) { _pcode = PCODE_MONITORRESULTX; }
+ uint32_t GetPCODE() override { return _pcode; }
+ uint32_t GetLength() override;
+ void Encode(FNET_DataBuffer *dst) override;
+ bool Decode(FNET_DataBuffer *src, uint32_t len) override;
+ vespalib::string toString(uint32_t indent) const override;
+};
+
+//==========================================================================
+
+class FS4Packet_CLEARCACHES : public FS4Packet
+{
+public:
+ FS4Packet_CLEARCACHES();
+ ~FS4Packet_CLEARCACHES();
+ uint32_t GetPCODE() override { return PCODE_CLEARCACHES; }
+ uint32_t GetLength() override;
+ void Encode(FNET_DataBuffer *dst) override;
+ bool Decode(FNET_DataBuffer *src, uint32_t len) override;
+ vespalib::string toString(uint32_t indent) const override;
+};
+
+//==========================================================================
+
+class FS4Packet_QUEUELEN : public FS4Packet
+{
+public:
+ uint32_t _queueLen;
+ uint32_t _dispatchers;
+
+ FS4Packet_QUEUELEN();
+ ~FS4Packet_QUEUELEN();
+ uint32_t GetPCODE() override { return PCODE_QUEUELEN; }
+ uint32_t GetLength() override;
+ void Encode(FNET_DataBuffer *dst) override;
+ bool Decode(FNET_DataBuffer *src, uint32_t len) override;
+ vespalib::string toString(uint32_t indent) const override;
+};
+
+//==========================================================================
+
+class FS4Packet_QUERYRESULTX : public FS4Packet
+{
+private:
+ FS4Packet_QUERYRESULTX(const FS4Packet_QUERYRESULTX &);
+ FS4Packet_QUERYRESULTX& operator=(const FS4Packet_QUERYRESULTX &);
+
+ uint32_t _pcode;
+ uint32_t _distributionKey;
+
+public:
+ uint32_t _features; // see queryresult_features
+ uint32_t _offset;
+ uint32_t _numDocs;
+ uint64_t _totNumDocs;
+ search::HitRank _maxRank;
+ uint32_t *_sortIndex; // if QRF_SORTDATA
+ char *_sortData; // if QRF_SORTDATA
+ uint32_t _aggrDataLen; // if QRF_AGGRDATA
+ char *_aggrData; // if QRF_AGGRDATA
+ uint32_t _groupDataLen; // if QRF_GROUPDATA
+ char *_groupData; // if QRF_GROUPDATA
+ uint64_t _coverageDocs; // if QRF_COVERAGE
+ uint64_t _activeDocs; // if QRF_COVERAGE
+ class FS4_hit {
+ public:
+ FS4_hit() : _gid(), _metric(0), _partid(0), _distributionKey(0) { }
+ uint32_t getDistributionKey() const { return _distributionKey; }
+ void setDistributionKey(uint32_t key) { _distributionKey = key; }
+ const document::GlobalId & HT_GetGlobalID() const { return _gid; }
+ search::HitRank HT_GetMetric() const { return _metric; }
+ uint32_t HT_GetPartID() const { return _partid; }
+
+ void HT_SetGlobalID(const document::GlobalId & val) { _gid = val; }
+ void HT_SetMetric(search::HitRank val) { _metric = val; }
+ void HT_SetPartID(uint32_t val) { _partid = val; }
+ document::GlobalId _gid;
+ search::HitRank _metric;
+ uint32_t _partid; // if QRF_MLD
+ private:
+ uint32_t _distributionKey; // if QRF_MLD
+ } *_hits;
+ PropsVector _propsVector; // if QRF_PROPERTIES
+
+ void AllocateSortIndex(uint32_t cnt);
+ void AllocateSortData(uint32_t len);
+ void SetSortDataRef(uint32_t cnt, uint32_t *sortIndex, const char *sortData);
+ void AllocateAggrData(uint32_t len);
+ void SetAggrDataRef(const char *aggrData, uint32_t len);
+ void AllocateGroupData(uint32_t len);
+ void SetGroupDataRef(const char *groupData, uint32_t len);
+ void AllocateHits(uint32_t cnt);
+
+ FS4Packet_QUERYRESULTX(uint32_t pcode = PCODE_QUERYRESULTX);
+ ~FS4Packet_QUERYRESULTX();
+ void UpdateCompatPCODE();
+ void UpdateCompatFeatures();
+ void SetRealPCODE() { _pcode = PCODE_QUERYRESULTX; }
+ uint32_t GetPCODE() override { return _pcode; }
+ uint32_t GetLength() override;
+ void Encode(FNET_DataBuffer *dst) override;
+ bool Decode(FNET_DataBuffer *src, uint32_t len) override ;
+ vespalib::string toString(uint32_t indent) const override ;
+ uint32_t getDistributionKey() const { return _distributionKey; }
+ void setDistributionKey(uint32_t key) { _distributionKey = key; }
+};
+
+//==========================================================================
+
+class FS4Packet_QUERYX : public FS4Packet
+{
+private:
+ FS4Packet_QUERYX(const FS4Packet_QUERYX &);
+ FS4Packet_QUERYX& operator=(const FS4Packet_QUERYX &);
+
+ uint32_t _pcode;
+ uint32_t _timeout;
+
+public:
+ uint32_t _features; // see query_features
+ uint32_t _offset;
+ uint32_t _maxhits;
+ uint32_t _qflags;
+ string _ranking; // if QF_RANKP
+ PropsVector _propsVector; // if QF_PROPERTIES
+ string _sortSpec; // if QF_SORTSPEC
+ string _aggrSpec; // if QF_AGGRSPEC
+ string _groupSpec; // if QF_GROUPSPEC
+ string _sessionId; // if QF_SESSIONID
+ string _location; // if QF_LOCATION
+
+ uint32_t _numStackItems; // if QF_PARSEDQUERY
+ string _stackDump; // if QF_PARSEDQUERY
+
+ void setRanking(const vespalib::stringref &ranking) { _ranking = ranking; }
+ void setSortSpec(const vespalib::stringref &spec) { _sortSpec = spec; }
+ void setAggrSpec(const vespalib::stringref &spec) { _aggrSpec = spec; }
+ void setGroupSpec(const vespalib::stringref &spec) { _groupSpec = spec; }
+ void setSessionId(const vespalib::stringref &sid) { _sessionId = sid; }
+ void setLocation(const vespalib::stringref &loc) { _location = loc; }
+ void setStackDump(const vespalib::stringref &buf) { _stackDump = buf; }
+ void setTimeout(const fastos::TimeStamp & timeout);
+ fastos::TimeStamp getTimeout() const;
+
+ explicit FS4Packet_QUERYX(uint32_t pcode = PCODE_QUERYX);
+ ~FS4Packet_QUERYX();
+ void UpdateCompatPCODE();
+ void UpdateCompatFeatures();
+ void SetRealPCODE() { _pcode = PCODE_QUERYX; }
+ uint32_t GetPCODE() override { return _pcode; }
+ uint32_t GetLength() override;
+ void Encode(FNET_DataBuffer *dst) override;
+ bool Decode(FNET_DataBuffer *src, uint32_t len) override;
+ vespalib::string toString(uint32_t indent) const override;
+};
+
+//==========================================================================
+
+class FS4Packet_GETDOCSUMSX : public FS4Packet
+{
+private:
+ FS4Packet_GETDOCSUMSX(const FS4Packet_GETDOCSUMSX &);
+ FS4Packet_GETDOCSUMSX& operator=(const FS4Packet_GETDOCSUMSX &);
+
+ uint32_t _pcode;
+ uint32_t _timeout;
+public:
+ uint32_t _features; // see getdocsums_features
+ string _ranking; // if GDF_RANKP_QFLAGS
+ uint32_t _qflags; // if GDF_RANKP_QFLAGS
+ string _resultClassName; // if GDF_RESCLASSNAME
+ PropsVector _propsVector; // if GDF_PROPERTIES
+ uint32_t _stackItems; // if GDF_QUERYSTACK
+ string _stackDump; // if GDF_QUERYSTACK
+ string _location; // if GDF_LOCATION
+ uint32_t _flags; // if GDF_FLAGS
+ class FS4_docid {
+ public:
+ FS4_docid() : _gid(), _partid(0) { }
+ document::GlobalId _gid;
+ uint32_t _partid; // if GDF_MLD
+ } *_docid;
+
+ uint32_t _docidCnt;
+
+ void AllocateDocIDs(uint32_t cnt);
+
+ void setResultClassName(const vespalib::stringref &name) { _resultClassName = name; }
+ void setStackDump(const vespalib::stringref &buf) { _stackDump = buf; }
+ void setRanking(const vespalib::stringref &ranking) { _ranking = ranking; }
+ void setLocation(const vespalib::stringref &loc) { _location = loc; }
+ void setTimeout(const fastos::TimeStamp & timeout);
+ fastos::TimeStamp getTimeout() const;
+
+ FS4Packet_GETDOCSUMSX(uint32_t pcode = PCODE_GETDOCSUMSX);
+ ~FS4Packet_GETDOCSUMSX();
+ void UpdateCompatPCODE();
+ void UpdateCompatFeatures();
+ void SetRealPCODE() { _pcode = PCODE_GETDOCSUMSX; }
+ uint32_t GetPCODE() override { return _pcode; }
+ uint32_t GetLength() override;
+ void Encode(FNET_DataBuffer *dst) override;
+ bool Decode(FNET_DataBuffer *src, uint32_t len) override;
+ vespalib::string toString(uint32_t indent) const override;
+};
+
+//==========================================================================
+
+class FS4Packet_TRACEREPLY : public FS4Packet
+{
+public:
+ FS4Packet_TRACEREPLY() {}
+ ~FS4Packet_TRACEREPLY() {}
+ uint32_t GetPCODE() override { return PCODE_TRACEREPLY; }
+ uint32_t GetLength() override;
+ void Encode(FNET_DataBuffer *dst) override;
+ bool Decode(FNET_DataBuffer *src, uint32_t len) override;
+ vespalib::string toString(uint32_t indent) const override;
+
+ PropsVector _propsVector;
+};
+
+//==========================================================================
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/common/partialbitvector.cpp b/searchlib/src/vespa/searchlib/common/partialbitvector.cpp
new file mode 100644
index 00000000000..b3472abe89a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/partialbitvector.cpp
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include "partialbitvector.h"
+
+/////////////////////////////////
+namespace search
+{
+
+PartialBitVector::PartialBitVector(Index start, Index end) :
+ BitVector(),
+ _alloc(numActiveBytes(start, end))
+{
+ init(_alloc.get(), start, end);
+ clear();
+}
+
+PartialBitVector::~PartialBitVector()
+{
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/partialbitvector.h b/searchlib/src/vespa/searchlib/common/partialbitvector.h
new file mode 100644
index 00000000000..94facc9512a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/partialbitvector.h
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/searchlib/common/bitvector.h>
+
+namespace search {
+
+/**
+ * search::PartialBitVector is a bitvector that is only represents 1 part
+ * of the full space. All operations concerning the whole vector while only
+ * be conducted on this smaller area.
+ */
+class PartialBitVector : public BitVector
+{
+public:
+ typedef vespalib::AutoAlloc<0x800000, 0x1000> Alloc;
+
+ /**
+ * Class constructor specifying startindex and endindex.
+ * Allocated area is zeroed.
+ *
+ * @param start is the beginning.
+ * @end is the end.
+ *
+ */
+ PartialBitVector(Index start, Index end);
+
+ virtual ~PartialBitVector(void);
+
+private:
+ Alloc _alloc;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/range.h b/searchlib/src/vespa/searchlib/common/range.h
new file mode 100644
index 00000000000..3fd53b43a97
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/range.h
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+namespace search
+{
+
+template <typename T>
+class Range {
+public:
+ Range() :
+ _lower(std::numeric_limits<T>::max()),
+ _upper(std::numeric_limits<T>::min()) { }
+ Range(T v) : _lower(v), _upper(v) { }
+ Range(T low, T high) : _lower(low), _upper(high) { }
+ T lower() const { return _lower; }
+ T upper() const { return _upper; }
+ bool valid() const { return _lower <= _upper; }
+ bool isPoint() const { return _lower == _upper; }
+private:
+ T _lower;
+ T _upper;
+};
+
+typedef Range<int64_t> Int64Range;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/rankedhit.h b/searchlib/src/vespa/searchlib/common/rankedhit.h
new file mode 100644
index 00000000000..8776f997d38
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/rankedhit.h
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include "hitrank.h"
+
+namespace search
+{
+
+struct RankedHit {
+ RankedHit() : _docId(0), _rankValue(0) { }
+ RankedHit(unsigned int docId, HitRank rank=0.0) : _docId(docId), _rankValue(rank) { }
+ unsigned int getDocId() const { return _docId & 0x7fffffff; }
+ bool hasMore() const { return _docId & 0x80000000; }
+ HitRank getRank() const { return _rankValue; }
+//:private
+ unsigned int _docId;
+ HitRank _rankValue;
+};
+
+class RankedHitIterator {
+public:
+ RankedHitIterator(const RankedHit * h, size_t sz) : _h(h), _sz(sz), _pos(0) { }
+ bool hasNext() const { return _pos < _sz; }
+ uint32_t next() { return _h[_pos++].getDocId(); }
+private:
+ const RankedHit *_h;
+ const size_t _sz;
+ size_t _pos;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/rcuvector.h b/searchlib/src/vespa/searchlib/common/rcuvector.h
new file mode 100644
index 00000000000..9c5954848c4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/rcuvector.h
@@ -0,0 +1,354 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/generationholder.h>
+#include <vespa/searchlib/util/memoryusage.h>
+#include <vespa/searchcommon/common/growstrategy.h>
+#include <vespa/vespalib/util/array.h>
+
+namespace search {
+namespace attribute {
+
+template <typename T>
+class RcuVectorHeld : public vespalib::GenerationHeldBase
+{
+ std::unique_ptr<T> _data;
+
+public:
+ RcuVectorHeld(size_t size, std::unique_ptr<T> data)
+ : vespalib::GenerationHeldBase(size),
+ _data(std::move(data))
+ {
+ }
+
+ virtual
+ ~RcuVectorHeld(void)
+ {
+ }
+};
+
+
+/**
+ * Vector class for elements of type T using the read-copy-update
+ * mechanism to ensure that reader threads will have a consistent view
+ * of the vector while the update thread is inserting new elements.
+ * The update thread is also responsible for updating the current
+ * generation of the vector, and initiating removing of old underlying
+ * data vectors.
+ **/
+template <typename T>
+class RcuVectorBase
+{
+ static_assert(std::is_trivially_destructible<T>::value,
+ "Value type must be trivially destructible");
+
+protected:
+ typedef vespalib::Array<T, vespalib::DefaultAlloc> Array;
+ typedef vespalib::GenerationHandler::generation_t generation_t;
+ typedef vespalib::GenerationHolder GenerationHolder;
+ Array _data;
+ size_t _growPercent;
+ size_t _growDelta;
+ GenerationHolder &_genHolder;
+
+ size_t
+ calcSize(size_t baseSize) const
+ {
+ size_t delta = (baseSize * _growPercent / 100) + _growDelta;
+ return baseSize + std::max(delta, static_cast<size_t>(1));
+ }
+ size_t
+ calcSize() const
+ {
+ return calcSize(_data.capacity());
+ }
+ void expand(size_t newCapacity);
+ void expandAndInsert(const T & v);
+
+public:
+ RcuVectorBase(GenerationHolder &genHolder);
+
+ /**
+ * Construct a new vector with the given initial capacity and grow
+ * parameters.
+ *
+ * New capacity is calculated based on old capacity and grow parameters:
+ * nc = oc + (oc * growPercent / 100) + growDelta.
+ **/
+ RcuVectorBase(size_t initialCapacity, size_t growPercent, size_t growDelta,
+ GenerationHolder &genHolder);
+
+ RcuVectorBase(GrowStrategy growStrategy, GenerationHolder &genHolder)
+ : RcuVectorBase(growStrategy.getDocsInitialCapacity(),
+ growStrategy.getDocsGrowPercent(),
+ growStrategy.getDocsGrowDelta(),
+ genHolder) {}
+
+ /**
+ * Return whether all capacity has been used. If true the next
+ * call to push_back() will cause an expand of the underlying
+ * data.
+ **/
+ bool isFull() const { return _data.size() == _data.capacity(); }
+
+ /**
+ * Return the combined memory usage for this instance.
+ **/
+ MemoryUsage getMemoryUsage() const;
+
+ // vector interface
+ // no swap method, use reset() to forget old capacity and holds
+ // NOTE: Unsafe resize/reserve may invalidate data references held by readers!
+ void unsafe_resize(size_t n) { _data.resize(n); }
+ void unsafe_reserve(size_t n) { _data.reserve(n); }
+ void ensure_size(size_t n, T fill = T()) {
+ if (n > capacity()) {
+ expand(calcSize(n));
+ }
+ while (size() < n) {
+ _data.push_back(fill);
+ }
+ }
+ void push_back(const T & v) {
+ if (_data.size() < _data.capacity()) {
+ _data.push_back(v);
+ } else {
+ expandAndInsert(v);
+ }
+ }
+
+ bool
+ empty(void) const
+ {
+ return _data.empty();
+ }
+
+ size_t size() const { return _data.size(); }
+ size_t capacity() const { return _data.capacity(); }
+ void clear() { _data.clear(); }
+ T & operator[](size_t i) { return _data[i]; }
+ const T & operator[](size_t i) const { return _data[i]; }
+
+ void
+ reset(void)
+ {
+ // Assumes no readers at this moment
+ Array().swap(_data);
+ _data.reserve(16);
+ }
+
+ void
+ shrink(size_t newSize) __attribute__((noinline));
+
+ template <class Reader, class Saver>
+ void
+ fillMapped(GenerationHolder &genHolder,
+ Reader &reader,
+ uint64_t numValues,
+ const T *map,
+ size_t mapSize,
+ Saver &saver,
+ uint32_t numDocs);
+};
+
+template <typename T>
+void
+RcuVectorBase<T>::expand(size_t newCapacity) {
+ std::unique_ptr<Array> tmpData(new Array());
+ tmpData->reserve(newCapacity);
+ tmpData->resize(_data.size());
+ memcpy(tmpData->begin(), _data.begin(), _data.size() * sizeof(T));
+ tmpData->swap(_data); // atomic switch of underlying data
+ size_t holdSize = tmpData->size() * sizeof(T);
+ vespalib::GenerationHeldBase::UP hold(new RcuVectorHeld<Array>(holdSize, std::move(tmpData)));
+ _genHolder.hold(std::move(hold));
+}
+
+template <typename T>
+void
+RcuVectorBase<T>::expandAndInsert(const T & v)
+{
+ expand(calcSize());
+ assert(_data.size() < _data.capacity());
+ _data.push_back(v);
+}
+
+
+template <typename T>
+void
+RcuVectorBase<T>::shrink(size_t newSize)
+{
+ // TODO: Extend Array class to support more optimial shrink when
+ // backing store is memory mapped.
+ assert(newSize <= _data.size());
+ std::unique_ptr<Array> tmpData(new Array());
+ tmpData->reserve(newSize);
+ tmpData->resize(newSize);
+ for (uint32_t i = 0; i < newSize; ++i) {
+ (*tmpData)[i] = _data[i];
+ }
+ // Users of RCU vector must ensure that no readers use old size
+ // after swap. Attribute vectors uses _committedDocIdLimit for this.
+ tmpData->swap(_data); // atomic switch of underlying data
+ // Use capacity() instead of size() ?
+ size_t holdSize = tmpData->size() * sizeof(T);
+ vespalib::GenerationHeldBase::UP hold(new RcuVectorHeld<Array>(holdSize, std::move(tmpData)));
+ _genHolder.hold(std::move(hold));
+}
+
+
+template <typename T>
+RcuVectorBase<T>::RcuVectorBase(GenerationHolder &genHolder)
+ : _data(),
+ _growPercent(100),
+ _growDelta(0),
+ _genHolder(genHolder)
+{
+ _data.reserve(16);
+}
+
+template <typename T>
+RcuVectorBase<T>::RcuVectorBase(size_t initialCapacity,
+ size_t growPercent,
+ size_t growDelta,
+ GenerationHolder &genHolder)
+ : _data(),
+ _growPercent(growPercent),
+ _growDelta(growDelta),
+ _genHolder(genHolder)
+{
+ _data.reserve(initialCapacity);
+}
+
+template <typename T>
+MemoryUsage
+RcuVectorBase<T>::getMemoryUsage() const
+{
+ MemoryUsage retval;
+ retval.incAllocatedBytes(_data.capacity() * sizeof(T));
+ retval.incUsedBytes(_data.size() * sizeof(T));
+ return retval;
+}
+
+
+template <class T>
+template <class Reader, class Saver>
+void
+RcuVectorBase<T>::fillMapped(GenerationHolder &genHolder,
+ Reader &reader,
+ uint64_t numValues,
+ const T *map,
+ size_t mapSize,
+ Saver &saver,
+ uint32_t numDocs)
+{
+ assert(numDocs == numValues);
+ (void) numValues;
+ genHolder.clearHoldLists();
+ reset();
+ unsafe_reserve(numDocs);
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t e = reader.getNextEnum();
+ assert(e < mapSize);
+ (void) mapSize;
+ push_back(map[e]);
+ saver.save(e, doc, 0, 1);
+ }
+}
+
+
+template <typename T>
+class RcuVector : public RcuVectorBase<T>
+{
+private:
+ typedef typename RcuVectorBase<T>::generation_t generation_t;
+ typedef typename RcuVectorBase<T>::GenerationHolder GenerationHolder;
+ using RcuVectorBase<T>::_data;
+ generation_t _generation;
+ GenerationHolder _genHolderStore;
+
+ void
+ expandAndInsert(const T & v)
+ {
+ RcuVectorBase<T>::expandAndInsert(v);
+ _genHolderStore.transferHoldLists(_generation);
+ }
+
+public:
+ RcuVector()
+ : RcuVectorBase<T>(_genHolderStore),
+ _generation(0),
+ _genHolderStore()
+ {
+ }
+
+ /**
+ * Construct a new vector with the given initial capacity and grow
+ * parameters.
+ *
+ * New capacity is calculated based on old capacity and grow parameters:
+ * nc = oc + (oc * growPercent / 100) + growDelta.
+ **/
+ RcuVector(size_t initialCapacity, size_t growPercent, size_t growDelta)
+ : RcuVectorBase<T>(initialCapacity, growPercent, growDelta,
+ _genHolderStore),
+ _generation(0),
+ _genHolderStore()
+ {
+ }
+
+ RcuVector(GrowStrategy growStrategy)
+ : RcuVectorBase<T>(growStrategy, _genHolderStore), _generation(0), _genHolderStore()
+ {
+ }
+
+ ~RcuVector()
+ {
+ _genHolderStore.clearHoldLists();
+ }
+
+ generation_t
+ getGeneration() const
+ {
+ return _generation;
+ }
+
+ void
+ setGeneration(generation_t generation)
+ {
+ _generation = generation;
+ }
+
+ /**
+ * Remove all old data vectors where generation < firstUsed.
+ **/
+ void
+ removeOldGenerations(generation_t firstUsed)
+ {
+ _genHolderStore.trimHoldLists(firstUsed);
+ }
+
+ void
+ push_back(const T & v)
+ {
+ if (_data.size() < _data.capacity()) {
+ _data.push_back(v);
+ } else {
+ expandAndInsert(v);
+ }
+ }
+
+ MemoryUsage
+ getMemoryUsage() const
+ {
+ MemoryUsage retval(RcuVectorBase<T>::getMemoryUsage());
+ retval.incAllocatedBytesOnHold(_genHolderStore.getHeldBytes());
+ return retval;
+ }
+};
+
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/common/reserved.h b/searchlib/src/vespa/searchlib/common/reserved.h
new file mode 100644
index 00000000000..d97cb2216df
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/reserved.h
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+// These are used by FAST Web Search for host name anchoring.
+
+// NB! Should be changed to uppercase once the functionality is implemented!!
+
+static const char *ANCHOR_START_OF_HOST = "StArThOsT";
+static const char *ANCHOR_END_OF_HOST = "EnDhOsT";
+
+// These are used in the query parser when parsing fields with parsemode
+// 'boundaries'. Not used otherwise. Lowercased for performance reasons.
+
+#define ANCHOR_LEFT_BOUNDARY "fastpbfast"
+#define ANCHOR_RIGHT_BOUNDARY "fastpbfast"
+
diff --git a/searchlib/src/vespa/searchlib/common/resultset.cpp b/searchlib/src/vespa/searchlib/common/resultset.cpp
new file mode 100644
index 00000000000..ac69680b427
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/resultset.cpp
@@ -0,0 +1,149 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("");
+
+#include <vespa/searchlib/common/resultset.h>
+#include <vespa/searchlib/common/bitvector.h>
+
+namespace search
+{
+
+
+ResultSet::ResultSet(void)
+ : _elemsUsedInRankedHitsArray(0u),
+ _rankedHitsArrayAllocElements(0u),
+ _bitOverflow(),
+ _rankedHitsArray()
+{
+}
+
+
+ResultSet::ResultSet(const ResultSet &other)
+ : _elemsUsedInRankedHitsArray(0),
+ _rankedHitsArrayAllocElements(0),
+ _bitOverflow(),
+ _rankedHitsArray()
+{
+ allocArray(other._elemsUsedInRankedHitsArray);
+ _elemsUsedInRankedHitsArray = other._elemsUsedInRankedHitsArray;
+ if (_elemsUsedInRankedHitsArray > 0)
+ memcpy(_rankedHitsArray.get(),
+ other._rankedHitsArray.get(),
+ _elemsUsedInRankedHitsArray * sizeof(RankedHit));
+
+ if (other._bitOverflow) {
+ _bitOverflow = BitVector::create(*other._bitOverflow);
+ }
+}
+
+
+ResultSet::~ResultSet(void)
+{
+}
+
+
+void
+ResultSet::allocArray(unsigned int arrayAllocated)
+{
+ if (arrayAllocated > 0) {
+ ArrayAlloc n(arrayAllocated * sizeof(RankedHit));
+ _rankedHitsArray.swap(n);
+ } else {
+ ArrayAlloc n;
+ _rankedHitsArray.swap(n);
+ }
+ _rankedHitsArrayAllocElements = arrayAllocated;
+ _elemsUsedInRankedHitsArray = 0;
+}
+
+
+void
+ResultSet::setArrayUsed(unsigned int arrayUsed)
+{
+ assert(arrayUsed <= _rankedHitsArrayAllocElements);
+ _elemsUsedInRankedHitsArray = arrayUsed;
+}
+
+
+void
+ResultSet::setBitOverflow(BitVector::UP newBitOverflow)
+{
+ _bitOverflow = std::move(newBitOverflow);
+}
+
+
+//////////////////////////////////////////////////////////////////////
+// Find number of hits
+//////////////////////////////////////////////////////////////////////
+unsigned int
+ResultSet::getNumHits(void) const
+{
+ return (_bitOverflow) ? _bitOverflow->countTrueBits() : _elemsUsedInRankedHitsArray;
+}
+
+
+void
+ResultSet::mergeWithBitOverflow(void)
+{
+ if ( ! _bitOverflow) {
+ return;
+ }
+
+ const BitVector *bitVector = _bitOverflow.get();
+
+ const RankedHit *oldA = getArray();
+ const RankedHit *oldAEnd = oldA + _elemsUsedInRankedHitsArray;
+ uint32_t bidx = bitVector->getFirstTrueBit();
+
+ uint32_t actualHits = getNumHits();
+ ArrayAlloc newHitsAlloc(actualHits*sizeof(RankedHit));
+ RankedHit *newHitsArray = static_cast<RankedHit *>(newHitsAlloc.get());
+
+ RankedHit * tgtA = newHitsArray;
+ RankedHit * tgtAEnd = newHitsArray + actualHits;
+
+ if (oldAEnd > oldA) { // we have array hits
+ uint32_t firstArrayHit = oldA->_docId;
+ uint32_t lastArrayHit = (oldAEnd - 1)->_docId;
+
+ // bitvector hits before array hits
+ while (bidx < firstArrayHit) {
+ tgtA->_docId = bidx;
+ tgtA->_rankValue = 0;
+ tgtA++;
+ bidx = bitVector->getNextTrueBit(bidx + 1);
+ }
+
+ // merge bitvector and array hits
+ while (bidx <= lastArrayHit) {
+ tgtA->_docId = bidx;
+ if (bidx == oldA->_docId) {
+ tgtA->_rankValue = oldA->_rankValue;
+ oldA++;
+ } else {
+ tgtA->_rankValue = 0;
+ }
+ tgtA++;
+ bidx = bitVector->getNextTrueBit(bidx + 1);
+ }
+ }
+ assert(oldA == oldAEnd);
+
+ // bitvector hits after array hits
+ while (tgtA < tgtAEnd) {
+ tgtA->_docId = bidx;
+ tgtA->_rankValue = 0;
+ tgtA++;
+ bidx = bitVector->getNextTrueBit(bidx + 1);
+ }
+ _rankedHitsArrayAllocElements = actualHits;
+ _elemsUsedInRankedHitsArray = actualHits;
+ _rankedHitsArray.swap(newHitsAlloc);
+ setBitOverflow(NULL);
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/resultset.h b/searchlib/src/vespa/searchlib/common/resultset.h
new file mode 100644
index 00000000000..4489654d0a5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/resultset.h
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/searchlib/common/rankedhit.h>
+#include <vespa/searchlib/common/bitvector.h>
+
+namespace search
+{
+
+class ResultSet
+{
+private:
+ // Everything above 8m we return to OS.
+ typedef vespalib::AutoAlloc<0x800000> ArrayAlloc;
+ ResultSet& operator=(const ResultSet &);
+
+ unsigned int _elemsUsedInRankedHitsArray;
+ unsigned int _rankedHitsArrayAllocElements;
+ BitVector::UP _bitOverflow;
+ ArrayAlloc _rankedHitsArray;
+
+public:
+ typedef std::unique_ptr<ResultSet> UP;
+ typedef std::shared_ptr<ResultSet> SP;
+ ResultSet(void);
+ ResultSet(const ResultSet &); // Used only for testing .....
+ virtual ~ResultSet(void);
+
+ void allocArray(unsigned int arrayAllocated);
+
+ void setArrayUsed(unsigned int arrayUsed);
+ void setBitOverflow(BitVector::UP newBitOverflow);
+ const RankedHit * getArray(void) const { return static_cast<const RankedHit *>(_rankedHitsArray.get()); }
+ RankedHit * getArray(void) { return static_cast<RankedHit *>(_rankedHitsArray.get()); }
+ unsigned int getArrayUsed(void) const { return _elemsUsedInRankedHitsArray; }
+ unsigned int getArrayAllocated(void) const { return _rankedHitsArrayAllocElements; }
+
+ const BitVector * getBitOverflow(void) const { return _bitOverflow.get(); }
+ BitVector * getBitOverflow(void) { return _bitOverflow.get(); }
+ unsigned int getNumHits(void) const;
+ void mergeWithBitOverflow(void);
+
+ /* isEmpty() is allowed to return false even if bitmap has no hits */
+ bool isEmpty(void) const { return (_bitOverflow == NULL && _elemsUsedInRankedHitsArray == 0); }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/scheduletaskcallback.h b/searchlib/src/vespa/searchlib/common/scheduletaskcallback.h
new file mode 100644
index 00000000000..d6c6f29abaf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/scheduletaskcallback.h
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "idestructorcallback.h"
+
+namespace search
+{
+
+/**
+ * Class that schedules a task when instance is destroyed. Typically a
+ * shared pointer to an instance is passed around to multiple worker
+ * threads that performs portions of a larger task before dropping the
+ * shared pointer, triggering the callback when all worker threads
+ * have completed.
+ */
+class ScheduleTaskCallback : public IDestructorCallback
+{
+ vespalib::Executor &_executor;
+ vespalib::Executor::Task::UP _task;
+public:
+ ScheduleTaskCallback(vespalib::Executor &executor,
+ vespalib::Executor::Task::UP task)
+ : _executor(executor),
+ _task(std::move(task))
+ {
+ }
+ virtual ~ScheduleTaskCallback() {
+ _executor.execute(std::move(_task));
+ }
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.cpp b/searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.cpp
new file mode 100644
index 00000000000..2fe4a23e3ae
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.cpp
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".common.sequencedtaskexecutor");
+
+#include "sequencedtaskexecutor.h"
+#include <vespa/vespalib/util/threadstackexecutor.h>
+
+using vespalib::ThreadStackExecutor;
+
+namespace search
+{
+
+namespace
+{
+
+constexpr uint32_t stackSize = 128 * 1024;
+
+}
+
+
+SequencedTaskExecutor::SequencedTaskExecutor(uint32_t threads)
+ : _executors()
+{
+ for (uint32_t id = 0; id < threads; ++id) {
+ auto executor = std::make_unique<ThreadStackExecutor>(1, stackSize);
+ _executors.push_back(std::move(executor));
+ }
+}
+
+SequencedTaskExecutor::~SequencedTaskExecutor()
+{
+ sync();
+}
+
+
+void
+SequencedTaskExecutor::executeTask(uint64_t id,
+ vespalib::Executor::Task::UP task)
+{
+ auto itr = _ids.find(id);
+ if (itr == _ids.end()) {
+ auto insarg = std::make_pair(id, _ids.size() % _executors.size());
+ auto insres = _ids.insert(insarg);
+ assert(insres.second);
+ itr = insres.first;
+ }
+ size_t executorId = itr->second;
+ vespalib::ThreadStackExecutorBase &executor(*_executors[executorId]);
+ auto rejectedTask = executor.execute(std::move(task));
+ assert(!rejectedTask);
+}
+
+
+void
+SequencedTaskExecutor::sync()
+{
+ for (auto &executor : _executors) {
+ executor->sync();
+ }
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.h b/searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.h
new file mode 100644
index 00000000000..c3b4a778cf2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.h
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "isequencedtaskexecutor.h"
+#include <vespa/vespalib/stllike/hash_map.h>
+
+namespace vespalib
+{
+
+class ThreadStackExecutorBase;
+
+}
+
+namespace search
+{
+
+/**
+ * Class to run multiple tasks in parallel, but tasks with same
+ * id has to be run in sequence.
+ */
+class SequencedTaskExecutor : public ISequencedTaskExecutor
+{
+ std::vector<std::shared_ptr<vespalib::ThreadStackExecutorBase>> _executors;
+ vespalib::hash_map<size_t, size_t> _ids;
+public:
+ SequencedTaskExecutor(uint32_t threads);
+
+ ~SequencedTaskExecutor();
+
+ virtual void executeTask(uint64_t id,
+ vespalib::Executor::Task::UP task) override;
+
+ virtual void sync() override;
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/sequencedtaskexecutorobserver.h b/searchlib/src/vespa/searchlib/common/sequencedtaskexecutorobserver.h
new file mode 100644
index 00000000000..ffc6ba7f55b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/sequencedtaskexecutorobserver.h
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "isequencedtaskexecutor.h"
+#include <atomic>
+
+namespace search
+{
+
+/**
+ * Observer class to observe class to run multiple tasks in parallel,
+ * but tasks with same id has to be run in sequence.
+ */
+class SequencedTaskExecutorObserver : public ISequencedTaskExecutor
+{
+ ISequencedTaskExecutor &_executor;
+ std::atomic<uint32_t> _executeCnt;
+ std::atomic<uint32_t> _syncCnt;
+public:
+ SequencedTaskExecutorObserver(ISequencedTaskExecutor &executor)
+ : _executor(executor),
+ _executeCnt(0u),
+ _syncCnt(0u)
+ {
+ }
+
+ virtual ~SequencedTaskExecutorObserver() { }
+
+ virtual void executeTask(uint64_t id,
+ vespalib::Executor::Task::UP task) override {
+ ++_executeCnt;
+ _executor.executeTask(id, std::move(task));
+ }
+
+ virtual void sync() override {
+ ++_syncCnt;
+ _executor.sync();
+ }
+
+ uint32_t getExecuteCnt() const { return _executeCnt; }
+ uint32_t getSyncCnt() const { return _syncCnt; }
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/serialnum.h b/searchlib/src/vespa/searchlib/common/serialnum.h
new file mode 100644
index 00000000000..f71f10719d2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/serialnum.h
@@ -0,0 +1,13 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <stdint.h>
+
+namespace search {
+
+// This is a unique identification number.
+typedef uint64_t SerialNum;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.cpp b/searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.cpp
new file mode 100644
index 00000000000..d9b575678d9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.cpp
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "serialnumfileheadercontext.h"
+#include <vespa/vespalib/data/fileheader.h>
+
+
+namespace search
+{
+
+namespace common
+{
+
+
+SerialNumFileHeaderContext::SerialNumFileHeaderContext(
+ const FileHeaderContext &parentFileHeaderContext,
+ SerialNum serialNum)
+ : FileHeaderContext(),
+ _parentFileHeaderContext(parentFileHeaderContext),
+ _serialNum(serialNum)
+{
+}
+
+
+void
+SerialNumFileHeaderContext::addTags(vespalib::GenericHeader &header,
+ const vespalib::string &name) const
+{
+ _parentFileHeaderContext.addTags(header, name);
+ typedef vespalib::GenericHeader::Tag Tag;
+ if (_serialNum != 0u)
+ header.putTag(Tag("serialNum", _serialNum));
+}
+
+} // namespace common
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.h b/searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.h
new file mode 100644
index 00000000000..24969193347
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.h
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "fileheadercontext.h"
+#include "serialnum.h"
+
+namespace search
+{
+
+namespace common
+{
+
+class SerialNumFileHeaderContext : public FileHeaderContext
+{
+ const FileHeaderContext &_parentFileHeaderContext;
+ SerialNum _serialNum;
+
+public:
+ SerialNumFileHeaderContext(const FileHeaderContext &
+ parentFileHeaderContext,
+ SerialNum serialNum);
+
+ virtual void
+ addTags(vespalib::GenericHeader &header,
+ const vespalib::string &name) const;
+};
+
+} // namespace common
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/sort.cpp b/searchlib/src/vespa/searchlib/common/sort.cpp
new file mode 100644
index 00000000000..9d43e98457d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/sort.cpp
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/common/sort.h>
+
+namespace search {
+
+bool radix_prepare(unsigned int n, unsigned int last[257], unsigned int ptr[256], unsigned int cnt[256])
+{
+ // Accumulate cnt positions
+ bool sorted = (cnt[0]==n);
+ ptr[0] = 0;
+ for(unsigned int i(1); i<256; i++) {
+ ptr[i] = ptr[i-1] + cnt[i-1];
+ sorted |= (cnt[i]==n);
+ }
+ memcpy(last, ptr, 256*sizeof(unsigned int));
+ last[256] = last[255] + cnt[255];
+ return sorted;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/common/sort.h b/searchlib/src/vespa/searchlib/common/sort.h
new file mode 100644
index 00000000000..231865321c0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/sort.h
@@ -0,0 +1,537 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/util/optimized.h>
+#include <vespa/vespalib/util/sort.h>
+#include <functional>
+#include <limits>
+#include <algorithm>
+
+
+namespace search
+{
+
+bool radix_prepare(unsigned int n, unsigned int last[257], unsigned int ptr[256], unsigned int cnt[256]);
+
+template<typename T>
+void radix_sort_core(const unsigned int * last, T * a, unsigned int n, uint32_t * radixScratch, unsigned int shiftWidth) __attribute__ ((noinline));
+
+template<typename T>
+void radix_sort_core(const unsigned int * last, T * a, unsigned int n, uint32_t * radixScratch, unsigned int shiftWidth)
+{
+ T temp, swap;
+ // Go through all permutation cycles until all
+ // elements are moved or found to be already in place
+ unsigned int ptr[256];
+ unsigned int i, j, k;
+ memcpy(ptr, last, sizeof(ptr));
+ i = 0;
+ unsigned int remain = n;
+
+ while (remain > 0) {
+ // Find first uncompleted class
+ while (ptr[i] == last[i+1]) {
+ i++;
+ }
+
+ // Grab first element to move
+ j = ptr[i];
+ uint32_t swapK = radixScratch[j];
+ k = (swapK >> shiftWidth) & 0xFF;
+
+ // Swap into correct class until cycle completed
+ if (i != k) {
+ swap = a[j];
+ do {
+ unsigned int t(ptr[k]);
+ temp = a[t];
+ uint32_t tempK(radixScratch[t]);
+ radixScratch[t] = swapK;
+ a[t] = swap;
+ ptr[k]++;
+ swapK = tempK;
+ swap = temp;
+ k = (tempK >> shiftWidth) & 0xFF;
+ remain--;
+ } while (i!=k);
+ // Place last element in cycle
+ a[j] = swap;
+ radixScratch[j] = swapK;
+ }
+ ptr[k]++;
+ remain--;
+ }
+}
+
+template<typename T, typename GR>
+unsigned int radix_fetch(T *a, unsigned int n, uint32_t * radixScratch, GR R) __attribute__ ((noinline));
+
+template<typename T, typename GR>
+unsigned int radix_fetch(T *a, unsigned int n, uint32_t * radixScratch, GR R)
+{
+ unsigned int i = 0;
+ uint32_t usedBits = 0;
+ if (n > 3) {
+ for(; i < n - 3; i += 4) {
+ radixScratch[i + 0] = R(a[i + 0]);
+ radixScratch[i + 1] = R(a[i + 1]);
+ radixScratch[i + 2] = R(a[i + 2]);
+ radixScratch[i + 3] = R(a[i + 3]);
+ usedBits |= radixScratch[i + 0];
+ usedBits |= radixScratch[i + 1];
+ usedBits |= radixScratch[i + 2];
+ usedBits |= radixScratch[i + 3];
+ }
+ }
+ for(; i < n; i++) {
+ radixScratch[i] = R(a[i]);
+ usedBits |= radixScratch[i];
+ }
+ if (usedBits != 0) {
+ int msb = vespalib::Optimized::msbIdx(usedBits);
+ return (msb+8) & ~0x7;
+ }
+ return 0;
+}
+
+template <typename T>
+class AlwaysEof
+{
+public:
+ bool operator () (const T &) const { return true; }
+ static bool alwaysEofOnCheck() { return true; }
+};
+
+template<typename T, typename ER>
+bool radix_eof(const T *a, unsigned int n, ER E) __attribute__ ((noinline));
+
+template<typename T, typename ER>
+bool radix_eof(const T *a, unsigned int n, ER E)
+{
+ unsigned int i = 0;
+ bool eof(true);
+ if (n > 3) {
+ for(; eof && (i < n - 3); i += 4) {
+ eof = E(a[i + 0]) &&
+ E(a[i + 1]) &&
+ E(a[i + 2]) &&
+ E(a[i + 3]);
+ }
+ }
+ for(; eof && (i < n); i++) {
+ eof = E(a[i]);
+ }
+ return eof;
+}
+
+/**
+ * radix sort implementation.
+ *
+ * @param stackDepth recursion level reached; since radix_sort uses
+ * lots of stack we try another algorithm if this
+ * becomes too high.
+ * @param a Pointer to the start of the array to sort
+ * @param n number of data elements to sort
+ * @param radixScratch scratch area for upto 32bits of sorting data
+ * @param radixBits how many bits of sorting data radixScratch contains
+ * @param insertSortLevel when to fall back to simple insertion sort
+ **/
+template<typename T, typename GR, typename GE, typename GRE>
+void radix_sort(GR R, GE E, GRE EE, int stackDepth,
+ T * a, unsigned int n,
+ uint32_t *radixScratch,
+ int radixBits,
+ unsigned insertSortLevel=10,
+ unsigned int topn=std::numeric_limits<unsigned int>::max())
+{
+ if (((stackDepth > 20) && (radixBits == 0)) || (n < insertSortLevel)) {
+ // switch to simpler sort if few elements
+ if (n > 1) {
+ std::sort(a, a+n, E);
+ }
+ return;
+ }
+
+ unsigned int last[257];
+ unsigned int cnt[256];
+ int shiftWidth = radixBits - 8;
+ for (bool allInOneBucket(true); allInOneBucket;) {
+ while ( radixBits == 0 ) {
+ // no data left in scratch buffer; fill up with upto 32 new bits
+ radixBits = radix_fetch(a, n, radixScratch, R);
+ if (radixBits == 0) {
+ if (EE.alwaysEofOnCheck() || radix_eof(a, n, EE)) {
+ // everything has reached end-of-string terminating zero,
+ // so we are done sorting.
+ return;
+ }
+ }
+ }
+
+ shiftWidth = radixBits - 8;
+ memset(cnt, 0, sizeof(cnt));
+ unsigned int i = 0;
+ if (n > 3) {
+ for(; i < n - 3; i += 4) {
+ cnt[(radixScratch[i + 0] >> shiftWidth) & 0xFF]++;
+ cnt[(radixScratch[i + 1] >> shiftWidth) & 0xFF]++;
+ cnt[(radixScratch[i + 2] >> shiftWidth) & 0xFF]++;
+ cnt[(radixScratch[i + 3] >> shiftWidth) & 0xFF]++;
+ }
+ }
+ for(; i < n; i++) {
+ cnt[(radixScratch[i] >> shiftWidth) & 0xFF]++;
+ }
+
+ // Accumulate cnt positions
+ allInOneBucket = false;
+ last[0] = 0;
+ for(i = 1; (i < 257) && !allInOneBucket; i++) {
+ last[i] = last[i-1] + cnt[i-1];
+ allInOneBucket = (cnt[i-1] == n);
+ }
+
+ radixBits -= 8;
+ }
+
+ radix_sort_core(last, a, n, radixScratch, shiftWidth);
+
+ // Sort on next 8 bits of key
+ for(unsigned i(0), sum(0); (i<256) && (sum < topn); i++) {
+ const unsigned l(last[i]);
+ const unsigned c(cnt[i]);
+ if (c) {
+ if (c > insertSortLevel) {
+ radix_sort(R, E, EE, stackDepth + 1, &a[l], c, &radixScratch[l], radixBits, insertSortLevel, topn-sum);
+ } else {
+ std::sort(&a[l], &a[l]+c, E);
+ }
+ sum += c;
+ }
+ }
+}
+
+
+template<typename GR, typename T, int SHIFT>
+class ShiftBasedRadixSorterBase
+{
+protected:
+ static void radix_fetch(GR R, unsigned int cnt[256], const T * a, unsigned int n) __attribute__((noinline));
+ static void radix_sort_core(GR R, unsigned int ptr[256], unsigned int last[257], T * a, unsigned int n) __attribute__((noinline));
+};
+
+template<typename GR, typename T, int SHIFT>
+void ShiftBasedRadixSorterBase<GR, T, SHIFT>::radix_fetch(GR R, unsigned int cnt[256], const T * a, unsigned int n)
+{
+ memset(cnt, 0, 256*sizeof(unsigned int));
+ unsigned int p(0);
+ if (n > 3) {
+ for(; p < n - 3; p += 4) {
+ cnt[(R(a[p]) >> SHIFT) & 0xFF]++;
+ cnt[(R(a[p + 1]) >> SHIFT) & 0xFF]++;
+ cnt[(R(a[p + 2]) >> SHIFT) & 0xFF]++;
+ cnt[(R(a[p + 3]) >> SHIFT) & 0xFF]++;
+ }
+ }
+ for(; p < n; p++) {
+ cnt[(R(a[p]) >> SHIFT) & 0xFF]++;
+ }
+}
+
+
+template<typename GR, typename T, int SHIFT>
+void ShiftBasedRadixSorterBase<GR, T, SHIFT>::radix_sort_core(GR R, unsigned int ptr[256], unsigned int last[257], T * a, unsigned int n)
+{
+ // Go through all permutation cycles until all
+ // elements are moved or found to be already in place
+ unsigned int i(0), remain(n);
+ unsigned int j, k;
+ T temp, swap;
+
+ while(remain>0) {
+ // Find first uncompleted class
+ while(ptr[i]==last[i+1]) {
+ i++;
+ }
+
+ // Grab first element to move
+ j = ptr[i];
+ k = (R(a[j]) >> SHIFT) & 0xFF;
+
+ // Swap into correct class until cycle completed
+ if (i!=k) {
+ swap = a[j];
+ do {
+ temp = a[ptr[k]];
+ a[ptr[k]++] = swap;
+ k = (R(swap=temp) >> SHIFT) & 0xFF;
+ remain--;
+ } while (i!=k);
+ // Place last element in cycle
+ a[j] = swap;
+ }
+ ptr[k]++;
+ remain--;
+ }
+}
+
+/**
+ * @param T the type of the object being sorted
+ * @param GR the functor used to fetch the number used for radix sorting. It must enure same sorting as GE.
+ * @param GE the functor used for testing if one object is orderers ahead of another.
+ * @param SHIFT is the number of significant bits in the radix - 8. Must a multiple of 8.
+ * @param continueAfterRadixEnds indicates if the radix only represents a prefix of the objects. If it is true we
+ * will continue using std::sort to order objects that have equal radix representation.
+ */
+template<typename T, typename GR, typename GE, int SHIFT, bool continueAfterRadixEnds=false>
+class ShiftBasedRadixSorter : private ShiftBasedRadixSorterBase<GR, T, SHIFT>
+{
+public:
+ static size_t radix_sort(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel=10, unsigned int topn=std::numeric_limits<unsigned int>::max());
+ static size_t radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn);
+private:
+ typedef ShiftBasedRadixSorterBase<GR, T, SHIFT> Base;
+};
+
+template<typename T, typename GR, typename GE, int SHIFT, bool continueAfterRadixEnds>
+size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn)
+{
+ unsigned int last[257], ptr[256], cnt[256];
+ unsigned int sum(n);
+
+ Base::radix_fetch(R, cnt, a, n);
+
+ bool sorted = radix_prepare(n, last, ptr, cnt);
+
+ if (!sorted) {
+ Base::radix_sort_core(R, ptr, last, a, n);
+ } else {
+ return ShiftBasedRadixSorter<T, GR, GE, SHIFT - 8, continueAfterRadixEnds>::radix_sort_internal(R, E, a, n, insertSortLevel, topn);
+ }
+
+ if (SHIFT>0 || continueAfterRadixEnds) {
+ // Sort on next key
+ sum = 0;
+ for(unsigned i(0); (i<256) && (sum < topn); i++) {
+ const unsigned int c(cnt[i]);
+ const unsigned int l(last[i]);
+ if (c) {
+ if (c>insertSortLevel) {
+ sum += ShiftBasedRadixSorter<T, GR, GE, SHIFT - 8, continueAfterRadixEnds>::radix_sort_internal(R, E, &a[l], c, insertSortLevel, topn-sum);
+ } else {
+ std::sort(a+l, a+l+c, E);
+ sum += c;
+ }
+ }
+ }
+ }
+ return sum;
+}
+
+
+template<typename T, typename GR, typename GE, int SHIFT, bool continueAfterRadixEnds>
+size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_sort(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn)
+{
+ if (n > insertSortLevel) {
+ return radix_sort_internal(R, E, a, n, insertSortLevel, topn);
+ } else if (n > 1) {
+ std::sort(a, a + n, E);
+ }
+ return n;
+}
+
+template<typename A, typename B, typename C>
+class ShiftBasedRadixSorter<A, B, C, -8, false> {
+public:
+ static size_t radix_sort_internal(B, C, A *, unsigned int, unsigned int, unsigned int) {
+ return 0;
+ }
+};
+
+template<typename A, typename B, typename C>
+class ShiftBasedRadixSorter<A, B, C, -8, true> {
+public:
+ static size_t radix_sort_internal(B, C E, A * v, unsigned int sz, unsigned int, unsigned int) {
+ std::sort(v, v + sz, E);
+ return sz;
+ }
+};
+
+template<typename T, bool asc=true>
+class NumericRadixSorter
+{
+public:
+ typedef vespalib::convertForSort<T, asc> C;
+ class RadixSortable {
+ public:
+ typename C::UIntType operator () (typename C::InputType v) const { return C::convert(v); }
+ };
+ void operator() (T * start, size_t sz, unsigned topn = std::numeric_limits<uint32_t>::max()) const {
+ if (sz > 16) {
+ ShiftBasedRadixSorter<typename C::InputType, RadixSortable, typename C::Compare, 8*(sizeof(typename C::UIntType) -1)>::radix_sort_internal(RadixSortable(), typename C::Compare(), start, sz, 16, topn);
+ } else {
+ std::sort(start, start + sz, typename C::Compare());
+ }
+ }
+};
+
+template<typename GR, typename T, int IDX>
+void radix_fetch2(GR R, unsigned int cnt[256], const T * a, unsigned int n) __attribute__ ((noinline));
+
+template<typename GR, typename T, int IDX>
+void radix_fetch2(GR R, unsigned int cnt[256], const T * a, unsigned int n)
+{
+ memset(cnt, 0, 256*sizeof(unsigned int));
+ unsigned int p(0);
+ if (n > 3) {
+ for(; p < n - 3; p += 4) {
+ cnt[R(a[p + 0], IDX)]++;
+ cnt[R(a[p + 1], IDX)]++;
+ cnt[R(a[p + 2], IDX)]++;
+ cnt[R(a[p + 3], IDX)]++;
+ }
+ }
+ for(; p < n; p++) {
+ cnt[R(a[p], IDX)]++;
+ }
+}
+
+template<typename T, typename GR, typename GE, int LEN, int POS>
+void radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn)
+{
+ unsigned int last[257], ptr[256], cnt[256];
+
+ radix_fetch2<GR, T, LEN-POS>(R, cnt, a, n);
+
+ bool sorted = radix_prepare(n, last, ptr, cnt);
+
+ if (!sorted) {
+ // Go through all permutation cycles until all
+ // elements are moved or found to be already in place
+ unsigned int i(0), remain(n);
+ unsigned int j, k;
+ T temp, swap;
+
+ while(remain>0) {
+ // Find first uncompleted class
+ while(ptr[i]==last[i+1]) {
+ i++;
+ }
+
+ // Grab first element to move
+ j = ptr[i];
+ k = R(a[j], LEN-POS);
+
+ // Swap into correct class until cycle completed
+ if (i!=k) {
+ swap = a[j];
+ do {
+ temp = a[ptr[k]];
+ a[ptr[k]++] = swap;
+ k = R(swap=temp, LEN-POS);
+ remain--;
+ } while (i!=k);
+ // Place last element in cycle
+ a[j] = swap;
+ }
+ ptr[k]++;
+ remain--;
+ }
+ } else {
+ radix_sort_internal<T, GR, GE, LEN, POS - 1>(R, E, a, n, insertSortLevel, topn);
+ return;
+ }
+
+ if (LEN>0) {
+ // Sort on next key
+ for(unsigned i(0), sum(0); (i<256) && (sum < topn); i++) {
+ const unsigned int c(cnt[i]);
+ const unsigned int l(last[i]);
+ if (c) {
+ if (c>insertSortLevel) {
+ radix_sort_internal<T, GR, GE, LEN, POS - 1>(R, E, &a[l], c, insertSortLevel, topn-sum);
+ } else {
+ std::sort(a+l, a+l+c, E);
+ }
+ sum += c;
+ }
+ }
+ }
+}
+
+
+template<typename T, typename GR, typename GE, int LEN, int POS>
+void radix_sort(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel=10, unsigned int topn=std::numeric_limits<unsigned int>::max())
+{
+ if (n > insertSortLevel) {
+ radix_sort_internal<T, GR, GE, LEN, POS>(R, E, a, n, insertSortLevel,
+ topn);
+ } else if (n > 1) {
+ std::sort(a, a + n, E);
+ }
+}
+
+
+template<typename T, typename GR, int SHIFT>
+void radix_stable_core(GR R, unsigned int ptr[256], const T * a, T * b, unsigned int n) __attribute__ ((noinline));
+
+template<typename T, typename GR, int SHIFT>
+void radix_stable_core(GR R, unsigned int ptr[256], const T * a, T * b, unsigned int n)
+{
+ unsigned int k;
+ for (unsigned int i(0); i < n; i++) {
+ k = (R(a[i]) >> SHIFT) & 0xFF;
+ b[ptr[k]] = a[i];
+ ptr[k]++;
+ }
+}
+
+template<typename T, typename GR, typename GE, int SHIFT>
+T * radix_stable_sort_internal(GR R, GE E, T * a, T * b, unsigned int n, unsigned int insertSortLevel=10)
+{
+ unsigned int last[257], ptr[256], cnt[256];
+
+ radix_fetch<GR, T, SHIFT>(R, cnt, a, n);
+
+ bool sorted = radix_prepare(n, last, ptr, cnt);
+
+ if (!sorted) {
+ radix_stable_core<T, R, SHIFT>(R, ptr, a, b, n);
+ } else {
+ return radix_stable_sort_internal<T, GR, GE, SHIFT - 8>(R, E, a, b, n, insertSortLevel);
+ }
+
+ if (SHIFT>0) {
+ // Sort on next key
+ for(unsigned i(0); i<256 ; i++) {
+ const unsigned int c(cnt[i]);
+ const unsigned int l(last[i]);
+ if (c>insertSortLevel) {
+ const T * r = radix_stable_sort_internal<T, GR, GE, SHIFT - 8>(R, E, &b[l], &a[l], c, insertSortLevel);
+ if (r != &b[l]) {
+ memcpy(&b[l], &a[l], c*sizeof(*r));
+ }
+ } else {
+ if (c>1) {
+ std::stable_sort(b+l, b+l+c, E);
+ }
+ }
+ }
+ }
+ return b;
+}
+
+template<typename T, typename GR, typename GE, int SHIFT>
+T* radix_stable_sort(GR R, GE E, T * a, T * b, unsigned int n, unsigned int insertSortLevel=10)
+{
+ if (n > insertSortLevel) {
+ return radix_stable_sort_internal<T, GR, GE, SHIFT>(R, E, a, b, n,
+ insertSortLevel);
+ } else if (n > 1) {
+ std::stable_sort(a, a + n, E);
+ }
+ return a;
+}
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/common/sortdata.cpp b/searchlib/src/vespa/searchlib/common/sortdata.cpp
new file mode 100644
index 00000000000..a0923611b4e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/sortdata.cpp
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/common/sortdata.h>
+
+namespace search {
+namespace common {
+
+uint32_t
+SortData::GetSize(uint32_t hitcnt,
+ const uint32_t *sortIndex)
+{
+ if (hitcnt == 0)
+ return 0;
+ return ((hitcnt + 1) * sizeof(uint32_t)
+ + (sortIndex[hitcnt] - sortIndex[0]));
+}
+
+
+bool
+SortData::Equals(uint32_t hitcnt,
+ const uint32_t *sortIndex_1,
+ const char *sortData_1,
+ const uint32_t *sortIndex_2,
+ const char *sortData_2)
+{
+ if (hitcnt == 0)
+ return true;
+ uint32_t diff = sortIndex_2[0] - sortIndex_1[0];
+ for (uint32_t i = 1; i <= hitcnt; i++) {
+ if (diff != (sortIndex_2[i] - sortIndex_1[i]))
+ return false;
+ }
+ assert((sortIndex_1[hitcnt] - sortIndex_1[0]) ==
+ (sortIndex_2[hitcnt] - sortIndex_2[0]));
+ return (memcmp(sortData_1 + sortIndex_1[0],
+ sortData_2 + sortIndex_2[0],
+ sortIndex_1[hitcnt] - sortIndex_1[0]) == 0);
+}
+
+
+void
+SortData::Copy(uint32_t hitcnt,
+ uint32_t *sortIndex_dst,
+ char *sortData_dst,
+ const uint32_t *sortIndex_src,
+ const char *sortData_src)
+{
+ if (hitcnt == 0)
+ return;
+ uint32_t diff = sortIndex_dst[0] - sortIndex_src[0];
+ for (uint32_t i = 1; i <= hitcnt; i++) {
+ sortIndex_dst[i] = sortIndex_src[i] + diff;
+ }
+ assert((sortIndex_dst[hitcnt] - sortIndex_dst[0]) ==
+ (sortIndex_src[hitcnt] - sortIndex_src[0]));
+ memcpy(sortData_dst + sortIndex_dst[0],
+ sortData_src + sortIndex_src[0],
+ sortIndex_dst[hitcnt] - sortIndex_dst[0]);
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/common/sortdata.h b/searchlib/src/vespa/searchlib/common/sortdata.h
new file mode 100644
index 00000000000..186e534ad5b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/sortdata.h
@@ -0,0 +1,99 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+
+namespace search {
+namespace common {
+
+class SortData
+{
+public:
+ struct Ref
+ {
+ const char *_buf;
+ uint32_t _len;
+ };
+
+ static uint32_t GetSize(uint32_t hitcnt,
+ const uint32_t *sortIndex);
+
+ static bool Equals(uint32_t hitcnt,
+ const uint32_t *sortIndex_1,
+ const char *sortData_1,
+ const uint32_t *sortIndex_2,
+ const char *sortData_2);
+
+ // NB: first element of sortIndex_dst must be set
+ static void Copy(uint32_t hitcnt,
+ uint32_t *sortIndex_dst,
+ char *sortData_dst,
+ const uint32_t *sortIndex_src,
+ const char *sortData_src);
+};
+
+
+class SortDataIterator
+{
+private:
+ const uint32_t *_ofs;
+ const uint32_t *_ofs_end;
+ const char *_data;
+ const char *_buf;
+ uint32_t _len;
+
+public:
+ SortDataIterator()
+ : _ofs(NULL), _ofs_end(NULL), _data(NULL),
+ _buf(NULL), _len(0) {}
+
+ void Next()
+ {
+ if (_ofs >= _ofs_end) {
+ _buf = NULL;
+ _len = 0;
+ return;
+ }
+ uint32_t tmp = *_ofs++;
+ _buf = _data + tmp;
+ // NB: *_ofs_end is a valid index entry
+ _len = *_ofs - tmp;
+ }
+
+ void Init(uint32_t cnt,
+ const uint32_t *idx,
+ const char *data)
+ {
+ _ofs = idx;
+ _ofs_end = idx + cnt;
+ _data = data;
+ _buf = NULL;
+ _len = 0;
+ Next();
+ }
+
+ uint32_t GetLen() const { return _len; }
+ const char *GetBuf() const { return _buf; }
+ bool Before(SortDataIterator *other, bool beforeOnMatch = false)
+ {
+ uint32_t tlen = GetLen();
+ uint32_t olen = other->GetLen();
+ uint32_t mlen = (tlen <= olen) ? tlen : olen;
+
+ if (mlen == 0)
+ return (tlen != 0 || beforeOnMatch);
+
+ int res = memcmp(GetBuf(), other->GetBuf(), mlen);
+
+ if (res != 0)
+ return (res < 0);
+ return (tlen < olen || (tlen == olen && beforeOnMatch));
+ }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/common/sortresults.cpp b/searchlib/src/vespa/searchlib/common/sortresults.cpp
new file mode 100644
index 00000000000..c58f15a8372
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/sortresults.cpp
@@ -0,0 +1,507 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include "sortresults.h"
+#include <vespa/searchlib/util/sort.h>
+#include <vespa/searchlib/common/sort.h>
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/vespalib/util/array.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".search.attribute.sortresults");
+
+using search::RankedHit;
+using search::common::SortSpec;
+using search::common::SortInfo;
+using search::attribute::IAttributeContext;
+using search::attribute::IAttributeVector;
+
+namespace {
+
+template<typename T>
+class RadixHelper
+{
+public:
+ typedef vespalib::convertForSort<T, true> C;
+ inline typename C::UIntType
+ operator()(typename C::InputType v) const
+ {
+ return C::convert(v);
+ }
+};
+
+} // namespace <unnamed>
+
+
+inline void
+FastS_insertion_sort(RankedHit a[], uint32_t n)
+{
+ uint32_t i, j;
+ RankedHit swap;
+ typedef RadixHelper<search::HitRank> RT;
+ RT R;
+
+ for (i=1; i<n ; i++)
+ {
+ swap = a[i];
+ j = i;
+ while (R(swap._rankValue) > R(a[j-1]._rankValue))
+ {
+ a[j] = a[j-1];
+ if (!(--j)) break;;
+ }
+ a[j] = swap;
+ }
+}
+
+
+template<int SHIFT>
+void
+FastS_radixsort(RankedHit a[], uint32_t n, uint32_t ntop)
+{
+ uint32_t last[256], ptr[256], cnt[256];
+ uint32_t sorted, remain;
+ uint32_t i, j, k;
+ RankedHit temp, swap;
+ typedef RadixHelper<search::HitRank> RT;
+ RT R;
+
+ memset(cnt, 0, 256*sizeof(uint32_t));
+ // Count occurrences [NB: will fail with n < 3]
+ for(i = 0; i < n - 3; i += 4) {
+ FastOS_Prefetch::NT(((char *)(&a[i])) + PREFETCH);
+ cnt[(R(a[i]._rankValue) >> SHIFT) & 0xFF]++;
+ cnt[(R(a[i + 1]._rankValue) >> SHIFT) & 0xFF]++;
+ cnt[(R(a[i + 2]._rankValue) >> SHIFT) & 0xFF]++;
+ cnt[(R(a[i + 3]._rankValue) >> SHIFT) & 0xFF]++;
+ }
+ for(; i < n; i++)
+ cnt[(R(a[i]._rankValue) >> SHIFT) & 0xFF]++;
+
+ // Accumulate cnt positions
+ sorted = (cnt[0]==n);
+ ptr[0] = n-cnt[0];
+ last[0] = n;
+ for(i=1; i<256; i++)
+ {
+ ptr[i] = (last[i]=ptr[i-1]) - cnt[i];
+ sorted |= (cnt[i]==n);
+ }
+
+ if (!sorted)
+ {
+ // Go through all permutation cycles until all
+ // elements are moved or found to be already in place
+ i = 255;
+ remain = n;
+
+ while(remain>0)
+ {
+ // Find first uncompleted class
+ while(ptr[i]==last[i])
+ {
+ i--;
+ }
+
+ // Stop if top candidates in place
+ if (last[i]-cnt[i]>=ntop) break;
+
+ // Grab first element to move
+ j = ptr[i];
+ swap = a[j];
+ k = (R(swap._rankValue) >> SHIFT) & 0xFF;
+
+ // Swap into correct class until cycle completed
+ if (i!=k)
+ {
+ do
+ {
+ temp = a[ptr[k]];
+ a[ptr[k]++] = swap;
+ k = (R((swap = temp)._rankValue) >> SHIFT) & 0xFF;
+ remain--;
+ } while (i!=k);
+ // Place last element in cycle
+ a[j] = swap;
+ }
+ ptr[k]++;
+ remain--;
+ }
+ } else {
+ FastS_radixsort<SHIFT - 8>(a, n, ntop);
+ return;
+ }
+
+ if (SHIFT>0)
+ {
+ // Sort on next key
+ for(i=0; i<256 ; i++)
+ if ((last[i]-cnt[i])<ntop) {
+ if (cnt[i]>INSERT_SORT_LEVEL) {
+ if (last[i]<ntop) {
+ FastS_radixsort<SHIFT - 8>(&a[last[i]-cnt[i]], cnt[i],
+ cnt[i]);
+ } else {
+ FastS_radixsort<SHIFT - 8>(&a[last[i]-cnt[i]], cnt[i],
+ cnt[i]+ntop-last[i]);
+ }
+ } else if (cnt[i]>1) {
+ FastS_insertion_sort(&a[last[i]-cnt[i]], cnt[i]);
+ }
+ }
+ }
+}
+template<>
+void
+FastS_radixsort<-8>(RankedHit *, uint32_t, uint32_t) {}
+
+void
+FastS_SortResults(RankedHit a[], uint32_t n, uint32_t ntop)
+{
+ if (n > INSERT_SORT_LEVEL) {
+ FastS_radixsort<sizeof(search::HitRank)*8 - 8>(a, n, ntop);
+ } else {
+ FastS_insertion_sort(a, n);
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+FastS_DefaultResultSorter FastS_DefaultResultSorter::__instance;
+
+//-----------------------------------------------------------------------------
+
+FastS_DocIdResultSorter FastS_DocIdResultSorter::__instance;
+
+//-----------------------------------------------------------------------------
+
+bool
+FastS_SortSpec::Add(IAttributeContext & vecMan, const SortInfo & sInfo)
+{
+ if (sInfo._field.empty())
+ return false;
+
+ uint32_t type = ASC_VECTOR;
+ const IAttributeVector * vector(NULL);
+
+ if ((sInfo._field.size() == 6) && (sInfo._field == "[rank]")) {
+ type = (sInfo._ascending) ? ASC_RANK : DESC_RANK;
+ } else if ((sInfo._field.size() == 7) && (sInfo._field == "[docid]")) {
+ type = (sInfo._ascending) ? ASC_DOCID : DESC_DOCID;
+ } else {
+ type = (sInfo._ascending) ? ASC_VECTOR : DESC_VECTOR;
+ vector = vecMan.getAttribute(sInfo._field);
+ if ( !vector || vector->hasMultiValue()) {
+ const char * err = "OK";
+ if ( !vector ) {
+ err = "not valid";
+ } else if ( vector->hasMultiValue()) {
+ err = "multivalued";
+ }
+ LOG(warning, "Attribute vector '%s' is %s. Skipped in sorting", sInfo._field.c_str(), err);
+ return false;
+ }
+ }
+
+ LOG(spam, "SortSpec: adding vector (%s)'%s'",
+ (sInfo._ascending) ? "+" : "-", sInfo._field.c_str());
+
+ _vectors.push_back(VectorRef(type, vector, sInfo._converter.get()));
+
+ return true;
+}
+
+uint8_t *
+FastS_SortSpec::realloc(uint32_t n, size_t & variableWidth, uint32_t & available, uint32_t & dataSize, uint8_t *mySortData)
+{
+ // realloc
+ variableWidth *= 2;
+ available += variableWidth * n;
+ dataSize += variableWidth * n;
+ uint32_t byteUsed = mySortData - &_binarySortData[0];
+ _binarySortData.resize(dataSize);
+ return &_binarySortData[0] + byteUsed;
+}
+
+void
+FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n)
+{
+ freeSortData();
+ size_t fixedWidth = 0;
+ size_t variableWidth = 0;
+ for (auto iter = _vectors.begin(); iter != _vectors.end(); ++iter) {
+ if (iter->_type >= ASC_DOCID) { // doc id
+ fixedWidth += 4;
+ }else if (iter->_type >= ASC_RANK) { // rank value
+ fixedWidth += sizeof(search::HitRank);
+ } else {
+ size_t numBytes = iter->_vector->getFixedWidth();
+ if (numBytes == 0) { // string
+ variableWidth += 11;
+ } else if (!iter->_vector->hasMultiValue()) {
+ fixedWidth += numBytes;
+ }
+ }
+ }
+ uint32_t dataSize = (fixedWidth + variableWidth) * n;
+ uint32_t available = dataSize;
+ _binarySortData.resize(dataSize);
+ uint8_t *mySortData = &_binarySortData[0];
+
+ _sortDataArray.resize(n);
+
+ for (uint32_t i(0), idx(0); (i < n) && !_doom.doom(); ++i) {
+ uint32_t len = 0;
+ for (auto iter = _vectors.begin(); iter != _vectors.end(); ++iter) {
+ int written(0);
+ if (available < std::max(sizeof(hits->_docId), sizeof(hits->_rankValue))) {
+ mySortData = realloc(n, variableWidth, available, dataSize, mySortData);
+ }
+ do {
+ switch (iter->_type) {
+ case ASC_DOCID:
+ vespalib::serializeForSort<vespalib::convertForSort<uint32_t, true> >(hits[i].getDocId(), mySortData);
+ written = sizeof(hits->_docId);
+ break;
+ case DESC_DOCID:
+ vespalib::serializeForSort<vespalib::convertForSort<uint32_t, false> >(hits[i].getDocId(), mySortData);
+ written = sizeof(hits->_docId);
+ break;
+ case ASC_RANK:
+ vespalib::serializeForSort<vespalib::convertForSort<search::HitRank, true> >(hits[i]._rankValue, mySortData);
+ written = sizeof(hits->_rankValue);
+ break;
+ case DESC_RANK:
+ vespalib::serializeForSort<vespalib::convertForSort<search::HitRank, false> >(hits[i]._rankValue, mySortData);
+ written = sizeof(hits->_rankValue);
+ break;
+ case ASC_VECTOR:
+ written = iter->_vector->serializeForAscendingSort(hits[i].getDocId(), mySortData, available, iter->_converter);
+ break;
+ case DESC_VECTOR:
+ written = iter->_vector->serializeForDescendingSort(hits[i].getDocId(), mySortData, available, iter->_converter);
+ break;
+ }
+ if (written == -1) {
+ mySortData = realloc(n, variableWidth, available, dataSize, mySortData);
+ }
+ } while(written == -1);
+ available -= written;
+ mySortData += written;
+ len += written;
+ }
+ SortData & sd = _sortDataArray[i];
+ sd._docId = hits[i]._docId;
+ sd._rankValue = hits[i]._rankValue;
+ sd._idx = idx;
+ sd._len = len;
+ sd._pos = 0;
+ idx += len;
+ }
+}
+
+
+FastS_SortSpec::FastS_SortSpec(const vespalib::Doom & doom, int method) :
+ _doom(doom),
+ _method(method),
+ _sortSpec(),
+ _vectors()
+{
+}
+
+
+FastS_SortSpec::~FastS_SortSpec()
+{
+ freeSortData();
+}
+
+
+bool
+FastS_SortSpec::Init(const vespalib::string & sortStr, IAttributeContext & vecMan)
+{
+ LOG(spam, "sortStr = %s", sortStr.c_str());
+ bool retval(true);
+ try {
+ _sortSpec = SortSpec(sortStr);
+ for (SortSpec::const_iterator it(_sortSpec.begin()), mt(_sortSpec.end()); retval && (it < mt); it++) {
+ retval = Add(vecMan, *it);
+ }
+ } catch (const std::exception & e) {
+ LOG(warning, "Failed parsing sortspec: %s", sortStr.c_str());
+ return retval;
+ }
+
+ return retval;
+}
+
+
+uint32_t
+FastS_SortSpec::getSortDataSize(uint32_t offset, uint32_t n)
+{
+ uint32_t size = 0;
+ for (uint32_t i = offset; i < (offset + n); ++i) {
+ size += _sortDataArray[i]._len;
+ }
+ return size;
+}
+
+void
+FastS_SortSpec::copySortData(uint32_t offset, uint32_t n,
+ uint32_t *idx, char *buf)
+{
+ const uint8_t * sortData = &_binarySortData[0];
+ uint32_t totalLen = 0;
+ for (uint32_t i = offset; i < (offset + n); ++i, ++idx) {
+ const uint8_t * src = sortData + _sortDataArray[i]._idx;
+ uint32_t len = _sortDataArray[i]._len;
+ memcpy(buf, src, len);
+ buf += len;
+ *idx = totalLen;
+ totalLen += len;
+ }
+ *idx = totalLen; // end of data index entry
+}
+
+void
+FastS_SortSpec::freeSortData()
+{
+ {
+ BinarySortData tmp;
+ _binarySortData.swap(tmp);
+ }
+ {
+ SortDataArray tmp;
+ _sortDataArray.swap(tmp);
+ }
+}
+
+bool
+FastS_SortSpec::hasSortData() const
+{
+ return ! _binarySortData.empty() && ! _sortDataArray.empty();
+}
+
+void
+FastS_SortSpec::initWithoutSorting(const RankedHit * hits, uint32_t hitCnt)
+{
+ initSortData(hits, hitCnt);
+}
+
+inline int
+FastS_SortSpec::Compare(const FastS_SortSpec *self, const SortData &a,
+ const SortData &b)
+{
+ const uint8_t * ref = &(self->_binarySortData[0]);
+ uint32_t len = a._len < b._len ? a._len : b._len;
+ int retval = memcmp(ref + a._idx,
+ ref + b._idx, len);
+ if (retval < 0) {
+ return -1;
+ } else if (retval > 0) {
+ return 1;
+ }
+ return 0;
+}
+
+template <typename T, typename Compare>
+inline T *
+FastS_median3(T *a, T *b, T *c, Compare *compobj)
+{
+ return Compare::Compare(compobj, *a, *b) < 0 ?
+ (Compare::Compare(compobj, *b, *c) < 0 ? b : Compare::Compare(compobj,
+ *a, *c) < 0 ? c : a) :
+ (Compare::Compare(compobj, *b, *c) > 0 ? b : Compare::Compare(compobj,
+ *a, *c) > 0 ? c : a);
+}
+
+
+template <typename T, typename Compare>
+void
+FastS_insertion_sort(T a[], uint32_t n, Compare *compobj)
+{
+ uint32_t i, j;
+ T swap;
+
+ for (i=1; i<n ; i++)
+ {
+ swap = a[i];
+ j = i;
+ while (Compare::Compare(compobj, swap, a[j-1]) < 0)
+ {
+ a[j] = a[j-1];
+ if (!(--j)) break;;
+ }
+ a[j] = swap;
+ }
+}
+
+class StdSortDataCompare : public std::binary_function<FastS_SortSpec::SortData, FastS_SortSpec::SortData, bool>
+{
+public:
+ StdSortDataCompare(const uint8_t * s) : _sortSpec(s) { }
+ bool operator() (const FastS_SortSpec::SortData & x, const FastS_SortSpec::SortData & y) const {
+ return cmp(x, y) < 0;
+ }
+ int cmp(const FastS_SortSpec::SortData & a, const FastS_SortSpec::SortData & b) const {
+ uint32_t len = std::min(a._len, b._len);
+ int retval = memcmp(_sortSpec + a._idx, _sortSpec + b._idx, len);
+ return retval ? retval : a._len - b._len;
+ }
+private:
+ const uint8_t * _sortSpec;
+};
+
+class SortDataRadix
+{
+public:
+ SortDataRadix(const uint8_t * s) : _data(s) { }
+ uint32_t operator () (FastS_SortSpec::SortData & a) const {
+ uint32_t r(0);
+ uint32_t left(a._len - a._pos);
+ switch (left) {
+ default:
+ case 4:
+ r |= _data[a._idx + a._pos + 3] << 0;
+ case 3:
+ r |= _data[a._idx + a._pos + 2] << 8;
+ case 2:
+ r |= _data[a._idx + a._pos + 1] << 16;
+ case 1:
+ r |= _data[a._idx + a._pos + 0] << 24;
+ case 0:
+ ;
+ }
+ a._pos += std::min(4u, left);
+ return r;
+ }
+private:
+ const uint8_t * _data;
+};
+
+class SortDataEof
+{
+public:
+ bool operator () (const FastS_SortSpec::SortData & a) const { return a._pos >= a._len; }
+ static bool alwaysEofOnCheck() { return false; }
+};
+
+
+void
+FastS_SortSpec::sortResults(RankedHit a[], uint32_t n, uint32_t topn)
+{
+ initSortData(a, n);
+ SortData * sortData = &_sortDataArray[0];
+ if (_method == 0) {
+ search::qsort<7, 40, SortData, FastS_SortSpec>(sortData, n, this);
+ } else if (_method == 1) {
+ std::sort(sortData, sortData + n, StdSortDataCompare(&_binarySortData[0]));
+ } else {
+ vespalib::Array<uint32_t, Alloc> radixScratchPad(n);
+ search::radix_sort(SortDataRadix(&_binarySortData[0]), StdSortDataCompare(&_binarySortData[0]), SortDataEof(), 1, sortData, n, &radixScratchPad[0], 0, 96, topn);
+ }
+ for (uint32_t i(0), m(_sortDataArray.size()); i < m; ++i) {
+ a[i]._rankValue = _sortDataArray[i]._rankValue;
+ a[i]._docId = _sortDataArray[i]._docId;
+ }
+}
diff --git a/searchlib/src/vespa/searchlib/common/sortresults.h b/searchlib/src/vespa/searchlib/common/sortresults.h
new file mode 100644
index 00000000000..8da643411a0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/sortresults.h
@@ -0,0 +1,157 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/searchlib/common/rankedhit.h>
+#include <vespa/searchlib/common/sortspec.h>
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <algorithm>
+#include <vector>
+#include <vespa/vespalib/util/array.h>
+#include <vespa/vespalib/util/doom.h>
+
+#define PREFETCH 64
+#define INSERT_SORT_LEVEL 80
+
+/**
+ * Sort the given array of results.
+ *
+ * @param a the array of hits
+ * @param n the number of hits
+ * @param ntop the number of hits needed in correct order
+ **/
+void FastS_SortResults(search::RankedHit a[],
+ unsigned int n, unsigned int ntop);
+
+//-----------------------------------------------------------------------------
+
+struct FastS_IResultSorter {
+ /**
+ * Destructor. No cleanup needed for base class.
+ */
+ virtual ~FastS_IResultSorter(void) {}
+
+ /**
+ * @return should bitvector hits also be sorted?
+ **/
+ virtual bool completeSort() const = 0;
+
+ /**
+ * Sort the given array of results.
+ *
+ * @param a the array of hits
+ * @param n the number of hits
+ * @param ntop the number of hits needed in correct order
+ **/
+ virtual void sortResults(search::RankedHit a[], uint32_t n,
+ uint32_t ntop) = 0;
+};
+
+//-----------------------------------------------------------------------------
+
+class FastS_DefaultResultSorter : public FastS_IResultSorter
+{
+private:
+ static FastS_DefaultResultSorter __instance;
+
+public:
+ static FastS_DefaultResultSorter *instance() { return &__instance; }
+ virtual bool completeSort() const { return false; }
+ virtual void sortResults(search::RankedHit a[], uint32_t n,
+ uint32_t ntop)
+ {
+ return FastS_SortResults(a, n, ntop);
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class FastS_DocIdResultSorter : public FastS_IResultSorter
+{
+private:
+ static FastS_DocIdResultSorter __instance;
+
+public:
+ static FastS_DocIdResultSorter *Instance() { return &__instance; }
+ virtual bool completeSort() const { return true; }
+ virtual void sortResults(search::RankedHit[], uint32_t, uint32_t) {
+ // already sorted on docid
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class FastS_SortSpec : public FastS_IResultSorter, public vespalib::noncopyable
+{
+private:
+ friend class MultilevelSortTest;
+public:
+ enum {
+ ASC_VECTOR = 0,
+ DESC_VECTOR = 1,
+ ASC_RANK = 2,
+ DESC_RANK = 3,
+ ASC_DOCID = 4,
+ DESC_DOCID = 5
+ };
+
+ struct VectorRef
+ {
+ VectorRef(uint32_t type, const search::attribute::IAttributeVector * vector, const search::common::BlobConverter *converter)
+ : _type(type),
+ _vector(vector),
+ _converter(converter)
+ {
+ }
+ uint32_t _type;
+ const search::attribute::IAttributeVector *_vector;
+ const search::common::BlobConverter *_converter;
+ };
+
+ struct SortData : public search::RankedHit
+ {
+ uint32_t _idx;
+ uint32_t _len;
+ uint32_t _pos;
+ };
+
+private:
+ typedef std::vector<VectorRef> VectorRefList;
+ typedef vespalib::AutoAlloc<0x800000> Alloc;
+ typedef vespalib::Array<uint8_t, Alloc> BinarySortData;
+ typedef vespalib::Array<SortData, Alloc> SortDataArray;
+ vespalib::Doom _doom;
+ int _method;
+ search::common::SortSpec _sortSpec;
+ VectorRefList _vectors;
+ BinarySortData _binarySortData;
+ SortDataArray _sortDataArray;
+
+ bool Add(search::attribute::IAttributeContext & vecMan, const search::common::SortInfo & sInfo);
+ void initSortData(const search::RankedHit *a, uint32_t n);
+ uint8_t * realloc(uint32_t n, size_t & variableWidth, uint32_t & available, uint32_t & dataSize, uint8_t *mySortData);
+
+public:
+ FastS_SortSpec(const vespalib::Doom & doom, int method=2);
+ virtual ~FastS_SortSpec();
+
+ std::pair<const char *, size_t> getSortRef(size_t i) const {
+ return std::pair<const char *, size_t>((const char*)(&_binarySortData[0] + _sortDataArray[i]._idx),
+ _sortDataArray[i]._len);
+ }
+ bool Init(const vespalib::string & sortSpec, search::attribute::IAttributeContext & vecMan);
+ virtual bool completeSort() const { return true; }
+ virtual void sortResults(search::RankedHit a[], uint32_t n, uint32_t topn);
+ uint32_t getSortDataSize(uint32_t offset, uint32_t n);
+ void copySortData(uint32_t offset, uint32_t n, uint32_t *idx, char *buf);
+ void freeSortData();
+ bool hasSortData() const;
+ void initWithoutSorting(const search::RankedHit * hits,
+ uint32_t hitCnt);
+ static int Compare(const FastS_SortSpec *self, const SortData &a, const SortData &b);
+};
+
+//-----------------------------------------------------------------------------
+
diff --git a/searchlib/src/vespa/searchlib/common/sortspec.cpp b/searchlib/src/vespa/searchlib/common/sortspec.cpp
new file mode 100644
index 00000000000..b522d76ebaa
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/sortspec.cpp
@@ -0,0 +1,180 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/common/sortspec.h>
+#include <vespa/searchlib/common/converters.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/sync.h>
+#include <unicode/ustring.h>
+#include <stdexcept>
+#include <vespa/fastlib/text/normwordfolder.h>
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".search.common.sortspec");
+
+namespace search {
+namespace common {
+
+using vespalib::ConstBufferRef;
+using vespalib::make_string;
+
+ConstBufferRef PassThroughConverter::onConvert(const ConstBufferRef & src) const
+{
+ return src;
+}
+
+LowercaseConverter::LowercaseConverter() :
+ _buffer()
+{
+}
+
+ConstBufferRef LowercaseConverter::onConvert(const ConstBufferRef & src) const
+{
+ _buffer.clear();
+ vespalib::stringref input((const char *)src.data(), src.size());
+ vespalib::Utf8Reader r(input);
+ vespalib::Utf8Writer w(_buffer);
+ while (r.hasMore()) {
+ ucs4_t c = r.getChar(0xFFFD);
+ c = Fast_NormalizeWordFolder::ToFold(c);
+ w.putChar(c);
+ }
+ return ConstBufferRef(_buffer.begin(), _buffer.size());
+}
+
+namespace {
+ vespalib::Lock _GlobalDirtyICUThreadSafeLock;
+}
+
+UcaConverter::UcaConverter(const vespalib::string & locale, const vespalib::string & strength) :
+ _buffer(),
+ _u16Buffer(128),
+ _collator()
+{
+ UErrorCode status = U_ZERO_ERROR;
+ Collator *coll(NULL);
+ {
+ vespalib::LockGuard guard(_GlobalDirtyICUThreadSafeLock);
+ coll = Collator::createInstance(icu::Locale(locale.c_str()), status);
+ }
+ if(U_SUCCESS(status)) {
+ _collator.reset(coll);
+ if (strength.empty()) {
+ _collator->setStrength(Collator::PRIMARY);
+ } else if (strength == "PRIMARY") {
+ _collator->setStrength(Collator::PRIMARY);
+ } else if (strength == "SECONDARY") {
+ _collator->setStrength(Collator::SECONDARY);
+ } else if (strength == "TERTIARY") {
+ _collator->setStrength(Collator::TERTIARY);
+ } else if (strength == "QUATERNARY") {
+ _collator->setStrength(Collator::QUATERNARY);
+ } else if (strength == "IDENTICAL") {
+ _collator->setStrength(Collator::IDENTICAL);
+ } else {
+ throw std::runtime_error("Illegal uca collation strength : " + strength);
+ }
+ } else {
+ delete coll;
+ throw std::runtime_error("Failed Collator::createInstance(Locale(locale.c_str()), status) with locale : " + locale);
+ }
+}
+
+int UcaConverter::utf8ToUtf16(const ConstBufferRef & src) const
+{
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t u16Wanted(0);
+ u_strFromUTF8(&_u16Buffer[0], _u16Buffer.size(), &u16Wanted, static_cast<const char *>(src.data()), -1, &status);
+ if (U_SUCCESS(status)) {
+ } else if (status == U_INVALID_CHAR_FOUND) {
+ LOG(warning, "ICU was not able to convert the %ld alleged utf8 characters'%s' to utf16", src.size(), src.c_str());
+ } else if (status == U_BUFFER_OVERFLOW_ERROR) {
+ //Ignore as this is handled on the outside.
+ } else {
+ LOG(warning, "ICU made a undefined complaint(%d) about the %ld alleged utf8 characters'%s' to utf16", status, src.size(), src.c_str());
+ }
+ return u16Wanted;
+}
+
+ConstBufferRef UcaConverter::onConvert(const ConstBufferRef & src) const
+{
+ int32_t u16Wanted(utf8ToUtf16(src));
+ if (u16Wanted > (int)_u16Buffer.size()) {
+ _u16Buffer.resize(u16Wanted);
+ u16Wanted = utf8ToUtf16(src);
+ }
+ int wanted = _collator->getSortKey(&_u16Buffer[0], u16Wanted, _buffer.ptr(), _buffer.siz());
+ _buffer.check();
+ if (wanted > _buffer.siz()) {
+ _buffer.reserve(wanted);
+ wanted = _collator->getSortKey(&_u16Buffer[0], u16Wanted, _buffer.ptr(), _buffer.siz());
+ _buffer.check();
+ }
+ return ConstBufferRef(_buffer.ptr(), wanted);
+}
+
+SortSpec::SortSpec(const vespalib::string & spec) :
+ _spec(spec)
+{
+ for (const char *pt(spec.c_str()), *mt(spec.c_str() + spec.size()); pt < mt;) {
+ for (; pt < mt && *pt != '+' && *pt != '-'; pt++);
+ if (pt != mt) {
+ bool ascending = (*pt++ == '+');
+ const char *vectorName = pt;
+ for (;pt < mt && *pt != ' '; pt++);
+ vespalib::string funcSpec(vectorName, pt - vectorName);
+ const char * func = funcSpec.c_str();
+ const char *p = func;
+ const char *e = func+funcSpec.size();
+ for(; (p < e) && (*p != '('); p++);
+ if (*p == '(') {
+ if (strncmp(func, "uca", std::min(3l, p-func)) == 0) {
+ p++;
+ const char * attrName = p;
+ for(; (p < e) && (*p != ','); p++);
+ if (*p == ',') {
+ vespalib::string attr(attrName, p-attrName);
+ p++;
+ const char *localeName = p;
+ for(; (p < e) && (*p != ')') && (*p != ','); p++);
+ if (*p == ',') {
+ vespalib::string locale(localeName, p-localeName);
+ p++;
+ const char *strengthName = p;
+ for(; (p < e) && (*p != ')'); p++);
+ if (*p == ')') {
+ vespalib::string strength(strengthName, p - strengthName);
+ push_back(SortInfo(attr, ascending, BlobConverter::SP(new UcaConverter(locale, strength))));
+ } else {
+ throw std::runtime_error(make_string("Missing ')' at %s attr=%s locale=%s strength=%s", p, attr.c_str(), localeName, strengthName));
+ }
+ } else if (*p == ')') {
+ vespalib::string locale(localeName, p-localeName);
+ push_back(SortInfo(attr, ascending, BlobConverter::SP(new UcaConverter(locale, ""))));
+ } else {
+ throw std::runtime_error(make_string("Missing ')' or ',' at %s attr=%s locale=%s", p, attr.c_str(), localeName));
+ }
+ } else {
+ throw std::runtime_error(make_string("Missing ',' at %s", p));
+ }
+ } else if (strncmp(func, "lowercase", std::min(9l, p-func)) == 0) {
+ p++;
+ const char * attrName = p;
+ for(; (p < e) && (*p != ')'); p++);
+ if (*p == ')') {
+ vespalib::string attr(attrName, p-attrName);
+ push_back(SortInfo(attr, ascending, BlobConverter::SP(new LowercaseConverter())));
+ } else {
+ throw std::runtime_error("Missing ')'");
+ }
+ } else {
+ throw std::runtime_error("Unknown func " + vespalib::string(func, p-func));
+ }
+ } else {
+ push_back(SortInfo(funcSpec, ascending, BlobConverter::SP(NULL)));
+ }
+ }
+ }
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/common/sortspec.h b/searchlib/src/vespa/searchlib/common/sortspec.h
new file mode 100644
index 00000000000..bfa6a064105
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/sortspec.h
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/util/buffer.h>
+#include <vector>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/searchcommon/common/iblobconverter.h>
+
+namespace search {
+namespace common {
+
+struct SortInfo {
+ SortInfo(const vespalib::string & field, bool ascending, const BlobConverter::SP & converter) : _field(field), _ascending(ascending), _converter(converter) { }
+ vespalib::string _field;
+ bool _ascending;
+ BlobConverter::SP _converter;
+};
+
+class SortSpec : public std::vector<SortInfo>
+{
+public:
+ SortSpec() : _spec() { }
+ SortSpec(const vespalib::string & spec);
+ const vespalib::string & getSpec() const { return _spec; }
+private:
+ vespalib::string _spec;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/common/transport.h b/searchlib/src/vespa/searchlib/common/transport.h
new file mode 100644
index 00000000000..9b4f2ecb5c2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/transport.h
@@ -0,0 +1,401 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+
+#pragma once
+
+
+#include <vespa/fastos/fastos.h>
+
+namespace search
+{
+
+namespace fs4transport
+{
+
+/**
+ * Instead of using a 32-bit number to send the 'usehardware' flag, we
+ * now use this 32-bit number to send 32 flags. The currently defined flags
+ * are as follows:
+ * <ul>
+ * <li><b>QFLAG_ALLOW_ERRORPACKET</b>: Allow an error packet to be sent as
+ * response to this query packet.</li>
+ * <li><b>QFLAG_REPORT_QUEUELEN</b>: Send an extra queue length packet before
+ * query result packets.</li>
+ * <li><b>QFLAG_ESTIMATE</b>: Indicates that the query is performed to get
+ * an estimate of the total number of hits</li>
+ * <li><b>QFLAG_DUMP_FEATURES</b>: Dump detailed ranking information. Note that
+ * this flag will only be considered when sent in a
+ * GETDOCSUMSX packet. Is is put here to avoid having
+ * 2 separate query related flag spaces</li>
+ * <li><b>QFLAG_DROP_SORTDATA</b>: Don't return any sort data even if sortspec
+ * is used.</li>
+ * <li><b>QFLAG_NO_RESULTCACHE</b>: Do not use any result cache. Perform query no matter what.</li>
+ * </ul>
+ **/
+enum queryflags {
+ QFLAG_ALLOW_ERRORPACKET = 0x00000004,
+ QFLAG_REPORT_QUEUELEN = 0x00000008,
+ QFLAG_ESTIMATE = 0x00000080,
+ QFLAG_DROP_SORTDATA = 0x00004000,
+ QFLAG_REPORT_COVERAGE = 0x00008000,
+ QFLAG_NO_RESULTCACHE = 0x00010000,
+ QFLAG_DUMP_FEATURES = 0x00040000,
+
+ QFLAG_CACHE_MASK = (// which flags affect the cache
+ QFLAG_ESTIMATE |
+ QFLAG_DROP_SORTDATA |
+ QFLAG_REPORT_COVERAGE)
+};
+
+
+/**
+ * The new PCODE_QUERYRESULTX packet contains a 32-bit field called
+ * 'featureflags'. Each bit in that field denotes a separate feature
+ * that may be present in the query result packet or not. The comment
+ * describing the packet format indicates what data fields depend on
+ * what features. Note that after removing the query id and the
+ * feature flags from a PCODE_QUERYRESULTX packet it is binary
+ * compatible with the PCODE_QUERYRESULT, PCODE_MLD_QUERYRESULT and
+ * PCODE_MLD_QUERYRESULT2 packets given the correct set of
+ * features. The features present in the 'old' query result packets
+ * are defined in this enum along with the Query Result Features
+ * themselves. The value called QRF_SUPPORTED_MASK denotes which
+ * features are supported by the current version. If a packet with
+ * unknown features is received on the network is is discarded (as it
+ * would be if it had an illegal PCODE).
+ **/
+enum queryresult_features {
+ QRF_MLD = 0x00000001,
+ QRF_SORTDATA = 0x00000010,
+ QRF_AGGRDATA = 0x00000020,
+ QRF_COVERAGE = 0x00000040,
+ QRF_GROUPDATA = 0x00000200,
+ QRF_PROPERTIES = 0x00000400,
+
+ QRF_QUERYRESULT_MASK = 0,
+ QRF_MLD_QUERYRESULT_MASK = QRF_MLD
+};
+
+
+/**
+ * The new PCODE_QUERYX packet contains a 32-bit field called
+ * 'featureflags'. Each bit in that field denotes a separate feature
+ * that may be present in the query packet or not. The comment
+ * describing the packet format indicates what data fields depend on
+ * what features. Note that after removing the query id and the
+ * feature flags from a PCODE_QUERYX packet it is binary compatible
+ * with the PCODE_PARSEDQUERY2 packets
+ * given the correct set of features. The features present in the
+ * 'old' query packets are defined in this enum along with the Query
+ * Features themselves. The values called
+ * QF_SUPPORTED_[FSEARCH/FDISPATCH]_MASK denotes which features are
+ * supported by the current version. If a packet with unknown features
+ * is received on the network is is discarded (as it would be if it
+ * had an illegal PCODE).
+ **/
+enum query_features {
+ QF_PARSEDQUERY = 0x00000002,
+ QF_RANKP = 0x00000004,
+ QF_SORTSPEC = 0x00000080,
+ QF_AGGRSPEC = 0x00000100,
+ QF_LOCATION = 0x00000800,
+ QF_PROPERTIES = 0x00100000,
+ QF_WARMUP = 0x00200000, // Deprecated, do not use!
+ QF_GROUPSPEC = 0x00400000,
+ QF_SESSIONID = 0x00800000,
+
+ QF_PARSEDQUERY2_MASK = (QF_PARSEDQUERY | QF_RANKP)
+};
+
+
+/**
+ * The new PCODE_GETDOCSUMSX packet contains a 32-bit field called
+ * 'featureflags'. Each bit in that field denotes a separate feature
+ * that may be present in the getdocsums packet or not. The comment
+ * describing the packet format indicates what data fields depend on
+ * what features. Note that after removing the query id and the
+ * feature flags from a PCODE_GETDOCSUMSX packet it is binary
+ * compatible with the PCODE_GETDOCSUMS, PCODE_MLD_GETDOCSUMS and
+ * PCODE_MLD_GETDOCSUMS2 packets given the correct set of
+ * features. The features present in the 'old' getdocsums packets are
+ * defined in this enum along with the GetDocsums Features
+ * themselves. The values called
+ * GDF_SUPPORTED_[FSEARCH/FDISPATCH]_MASK denotes which features are
+ * supported by the current version. If a packet with unknown features
+ * is received on the network is is discarded (as it would be if it
+ * had an illegal PCODE).
+ **/
+enum getdocsums_features {
+ GDF_MLD = 0x00000001,
+ GDF_QUERYSTACK = 0x00000004,
+ GDF_RANKP_QFLAGS = 0x00000010,
+ GDF_LOCATION = 0x00000080,
+ GDF_RESCLASSNAME = 0x00000800,
+ GDF_PROPERTIES = 0x00001000,
+ GDF_FLAGS = 0x00002000,
+
+ GDF_GETDOCSUMS_MASK = 0,
+ GDF_MLD_GETDOCSUMS_MASK = (GDF_MLD)
+};
+
+
+enum getdocsums_flags
+{
+ GDFLAG_IGNORE_ROW = 0x00000001,
+ GDFLAG_ALLOW_SLIME = 0x00000002
+};
+
+// docsum class for slime tunneling
+const uint32_t SLIME_MAGIC_ID = 0x55555555;
+
+enum monitorquery_features
+{
+ MQF_QFLAGS = 0x00000002,
+
+ MQF_MONITORQUERY_MASK = 0
+};
+
+
+enum monitorquery_flags
+{
+ // NOT_USED MQFLAG_REPORT_SOFTOFFLINE = 0x00000010,
+ MQFLAG_REPORT_ACTIVEDOCS = 0x00000020
+};
+
+
+enum monitorresult_features
+{
+ MRF_MLD = 0x00000001,
+ MRF_RFLAGS = 0x00000008,
+ MRF_ACTIVEDOCS = 0x00000010,
+
+ MRF_MONITORRESULT_MASK = 0,
+ MRF_MLD_MONITORRESULT_MASK = (MRF_MLD)
+};
+
+
+enum monitorresult_flags
+{
+ // NOT_USED MRFLAG_SOFTOFFLINE = 0x00000001
+};
+
+
+/**
+ * Codes for packets between dispatch nodes and search nodes.
+ * general packet (i.e. message) format:
+ * uint32_t packetLength- length in bytes, EXCLUDING this length field
+ * packetcode pCode - see the enum below; same length as uint32_t
+ * packetData - variable length
+ */
+enum packetcode {
+ PCODE_EOL = 200, /* ..fdispatch <-> ..fsearch. PacketData:
+ *0 {uint32_t queryId,} - only in new format!*/
+ PCODE_QUERY_NOTUSED = 201,
+ PCODE_QUERYRESULT = 202, /* ..fdispatch <- ..fsearch. PacketData:
+ *0 {uint32_t queryId,} - only in new format!
+ *1 uint32_t offset,
+ *2 uint32_t numDocs,
+ *3 uint32_t totNumDocs,
+ *4 search::HitRank maxRank,
+ *5 time_t docstamp, - sent as Uint32
+ *6 struct FastS_connhitresult {
+ * uint32_t docid;
+ * search::HitRank metric
+ * }[] hits */
+ PCODE_ERROR = 203, /* ..fdispatch <- ..fsearch/..fdispatch
+ * {uint32_t queryId,} - only in new format!
+ * uint32_t error_code [see common/errorcodes.h]
+ * uint32_t message_len
+ * char[] message (UTF-8) */
+ PCODE_GETDOCSUMS = 204, /* ..fdispatch -> ..fsearch. PacketData:
+ *0 {uint32_t queryId,} - only in new format!
+ * time_t docstamp - header
+ * uint32_t[] docid - body */
+ PCODE_DOCSUM = 205, /* ..fdispatch <- ..fsearch.
+ *0 {uint32_t queryId,} - only in new format!
+ *1 uint32_t location
+ *2 char[] <title, incipit, URL, ...>
+ */
+ PCODE_MONITORQUERY = 206, /* ..fdispatch -> ..fsearch. No packet data.
+ */
+ PCODE_MONITORRESULT = 207, /* ..fdispatch <- ..fsearch. PacketData:
+ * int partitionId,
+ * time_t timeStamp */
+ PCODE_MLD_QUERYRESULT = 208,/* ..fdispatch <- ..fdispatch.
+ * header: {queryId,} offset, numdocs, tnumdocs,
+ * maxRank, docstamp
+ * body: (docid, metric, partition, docstamp)*
+ */
+ PCODE_MLD_GETDOCSUMS = 209, /* ..fdispatch -> ..fdispatch.
+ * header: {queryId,} docstamp
+ * body: (docid, partition, docstamp)*
+ */
+ PCODE_MLD_MONITORRESULT = 210 ,/* ..fdispatch <- ..fdispatch NB: no queryId!
+ * lowest partition id,
+ * timestamp,
+ * total number of nodes,
+ * active nodes,
+ * total number of partitions,
+ * active partitions
+ */
+ PCODE_CLEARCACHES = 211, /* ..fdispatch -> ..fdispatch. No packet data/ NotUsed
+ */
+ PCODE_QUERY2_NOTUSED = 212,
+ PCODE_PARSEDQUERY2 = 213, /* ..fdispatch -> ..fsearch. PacketData:
+ *0 {uint32_t queryId,} - only in new format!
+ *1 ..query::querytypes searchType, - all/any/exact
+ *2 uint32_t offset,
+ *3 uint32_t maxhits,
+ *4 uint32_t qflags, (including usehardware)
+ *5 uint32_t rankprofile, - enum
+ *6 uint32_t numStackItems,
+ *7 multiple encoded stackitems:
+ - uint32_t OR|AND|NOT|RANK
+ uint32_t arity
+ - uint32_t PHRASE
+ uint32_t arity
+ uint32_t indexNameLen
+ char[] indexName
+ - uint32_t TERM
+ uint32_t indexNameLen
+ char[] indexName
+ uint32_t termLen
+ char[] term
+ */
+ PCODE_MLD_QUERYRESULT2_NOTUSED = 214,
+ PCODE_MLD_GETDOCSUMS2_NOTUSED = 215,
+
+ PCODE_QUEUELEN = 216, /* fdispatch <- fsearch.
+ * header: queueLen, dispatchers
+ */
+ PCODE_QUERYRESULTX = 217, /*
+ * {uint32_t queryId,} - only if persistent
+ * uint32_t featureflags, - see 'queryresult_features'
+ * uint32_t offset,
+ * uint32_t numDocs,
+ * uint32_t totNumDocs,
+ * search::HitRank maxRank,
+ * uint32_t docstamp,
+ * uint32_t[numDocs] sortIndex - if QRF_SORTDATA
+ * char[sidx[n - 1]] sortData - if QRF_SORTDATA
+ * uint32_t aggrDataLen - if QRF_AGGRDATA
+ * char[aggrDataLen] aggrData - if QRF_AGGRDATA
+ * uint32_t groupDataLen - if QRF_GROUPDATA
+ * char[groupDataLen] groupData - if QRF_GROUPDATA
+ * uint64_t coverageDocs - if QRF_COVERAGE
+ * uint32_t coverageNodes - if QRF_COVERAGE
+ * uint32_t coverageFull - if QRF_COVERAGE
+ * numDocs * hit {
+ * uint32_t docid,
+ * search::HitRank metric,
+ * uint32_t partid, - if QRF_MLD
+ * uint32_t docstamp, - if QRF_MLD
+ * } */
+ PCODE_QUERYX = 218, /*
+ * {uint32_t queryId,} - only if persistent
+ * uint32_t featureflags, - see 'query_features'
+ * uint32_t querytype
+ * uint32_t offset,
+ * uint32_t maxhits,
+ * uint32_t qflags,
+ * uint32_t minhits, - if QF_MINHITS
+ * uint32_t numProperties - if QF_PROPERTIES
+ * numProperties * props { - if QF_PROPERTIES
+ * uint32_t nameLen
+ * char[nameLen] name
+ * uint32_t numEntries
+ * numentries * entry {
+ * uint32_t keyLen
+ * char[keyLen] key
+ * uint32_t valueLen
+ * char[valueLen] value
+ * }
+ * }
+ * uint32_t sortSpecLen - if QF_SORTSPEC
+ * char[sortSpecLen] sortSpec - if QF_SORTSPEC
+ * uint32_t aggrSpecLen - if QF_AGGRSPEC
+ * char[aggrSpecLen] aggrSpec - if QF_AGGRSPEC
+ * uint32_t groupSpecLen - if QF_GROUPSPEC
+ * char[groupSpecLen] groupSpec - if QF_GROUPSPEC
+ * uint32_t locationLen - if QF_LOCATION
+ * char[locationLen] location - if QF_LOCATION
+ * uint32_t numStackItems, - if QF_PARSEDQUERY
+ * multiple encoded stackitems: - if QF_PARSEDQUERY
+ - uint32_t OR|AND|NOT|RANK
+ uint32_t arity
+ - uint32_t PHRASE
+ uint32_t arity
+ uint32_t indexNameLen
+ char[] indexName
+ - uint32_t TERM
+ uint32_t indexNameLen
+ char[] indexName
+ uint32_t termLen
+ char[] term
+ */
+ PCODE_GETDOCSUMSX = 219, /*
+ * {uint32_t queryId,} - only if persistent
+ * uint32_t featureflags, - see 'getdocsums_features'
+ * uint32_t docstamp,
+ * uint32_t rankprofile, - if GDF_RANKP_QFLAGS
+ * uint32_t qflags, - if GDF_RANKP_QFLAGS
+ * uint32_t resClassNameLen - if GDF_RESCLASSNAME
+ * char [] resClassName - if GDF_RESCLASSNAME
+ * uint32_t numProperties - if GDF_PROPERTIES
+ * numProperties * props { - if GDF_PROPERTIES
+ * uint32_t nameLen
+ * char[nameLen] name
+ * uint32_t numEntries
+ * numentries * entry {
+ * uint32_t keyLen
+ * char[keyLen] key
+ * uint32_t valueLen
+ * char[valueLen] value
+ * }
+ * }
+ * uint32_t stackItems, - if GDF_STACKDUMP
+ * uint32_t stackDumpLen, - if GDF_STACKDUMP
+ * char[stackDumpLen] stackDump, - if GDF_STACKDUMP
+ * uint32_t locationLen - if GDF_LOCATION
+ * char[locationLen] location - if GDF_LOCATION
+ * N * doc {
+ * uint32_t docid,
+ * uint32_t partid, - if GDF_MLD
+ * uint32_t docstamp, - if GDF_MLD
+ * }
+ */
+ PCODE_MONITORQUERYX = 220, /*
+ * uint32_t featureFlags;
+ * - see monitorquery_features
+ */
+ PCODE_MONITORRESULTX = 221, /*
+ * uint32_t featureFlags;
+ * - see monitorresult_features
+ * uint32_t partitionId;
+ * uint32_t timestamp;
+ * uint32_t totalNodes; - if MRF_MLD
+ * uint32_t activeNodes; - if MRF_MLD
+ * uint32_t totalParts; - if MRF_MLD
+ * uint32_t activeParts; - if MRF_MLD
+ */
+ PCODE_TRACEREPLY = 222, /*
+ * numProperties * props {
+ * uint32_t nameLen
+ * char[nameLen] name
+ * uint32_t numEntries
+ * numentries * entry {
+ * uint32_t keyLen
+ * char[keyLen] key
+ * uint32_t valueLen
+ * char[valueLen] value
+ * }
+ * }
+ */
+ PCODE_LastCode = 223 // Used for consistency checking only, must be last.
+};
+
+} // namespace fs4transport
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/common/tunefileinfo.cpp b/searchlib/src/vespa/searchlib/common/tunefileinfo.cpp
new file mode 100644
index 00000000000..231407fadca
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/tunefileinfo.cpp
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "tunefileinfo.h"
+
+
+namespace search
+{
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/tunefileinfo.h b/searchlib/src/vespa/searchlib/common/tunefileinfo.h
new file mode 100644
index 00000000000..d2281cd112c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/tunefileinfo.h
@@ -0,0 +1,431 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <stdexcept>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <sys/mman.h>
+#include <linux/mman.h>
+
+namespace search
+{
+
+class TuneFileSeqRead
+{
+public:
+ enum TuneControl
+ {
+ NORMAL,
+ DIRECTIO
+ };
+
+private:
+
+ TuneControl _tuneControl;
+
+public:
+ TuneFileSeqRead(void)
+ : _tuneControl(NORMAL)
+ {
+ }
+
+ void
+ setWantNormal(void)
+ {
+ _tuneControl = NORMAL;
+ }
+
+ void
+ setWantDirectIO(void)
+ {
+ _tuneControl = DIRECTIO;
+ }
+
+ bool
+ getWantDirectIO(void) const
+ {
+ return _tuneControl == DIRECTIO;
+ }
+
+ template <typename Config>
+ void
+ setFromConfig(const enum Config::Io &config)
+ {
+ switch (config) {
+ case Config::NORMAL:
+ _tuneControl = NORMAL;
+ break;
+ case Config::DIRECTIO:
+ _tuneControl = DIRECTIO;
+ break;
+ default:
+ _tuneControl = NORMAL;
+ break;
+ }
+ }
+
+ bool
+ operator==(const TuneFileSeqRead &rhs) const
+ {
+ return _tuneControl == rhs._tuneControl;
+ }
+
+ bool
+ operator!=(const TuneFileSeqRead &rhs) const
+ {
+ return _tuneControl != rhs._tuneControl;
+ }
+};
+
+
+class TuneFileSeqWrite
+{
+public:
+ enum TuneControl
+ {
+ NORMAL,
+ OSYNC,
+ DIRECTIO
+ };
+
+private:
+
+ TuneControl _tuneControl;
+
+public:
+ TuneFileSeqWrite(void)
+ : _tuneControl(NORMAL)
+ {
+ }
+
+ void
+ setWantNormal(void)
+ {
+ _tuneControl = NORMAL;
+ }
+
+ void
+ setWantSyncWrites(void)
+ {
+ _tuneControl = OSYNC;
+ }
+
+ void
+ setWantDirectIO(void)
+ {
+ _tuneControl = DIRECTIO;
+ }
+
+ bool
+ getWantDirectIO(void) const
+ {
+ return _tuneControl == DIRECTIO;
+ }
+
+ bool
+ getWantSyncWrites(void) const
+ {
+ return _tuneControl == OSYNC;
+ }
+
+ template <typename Config>
+ void
+ setFromConfig(const enum Config::Io &config)
+ {
+ switch (config) {
+ case Config::NORMAL:
+ _tuneControl = NORMAL;
+ break;
+ case Config::OSYNC:
+ _tuneControl = OSYNC;
+ break;
+ case Config::DIRECTIO:
+ _tuneControl = DIRECTIO;
+ break;
+ default:
+ _tuneControl = NORMAL;
+ break;
+ }
+ }
+
+ bool
+ operator==(const TuneFileSeqWrite &rhs) const
+ {
+ return _tuneControl == rhs._tuneControl;
+ }
+
+ bool
+ operator!=(const TuneFileSeqWrite &rhs) const
+ {
+ return _tuneControl != rhs._tuneControl;
+ }
+};
+
+
+class TuneFileRandRead
+{
+public:
+ enum TuneControl { NORMAL, DIRECTIO, MMAP };
+private:
+ TuneControl _tuneControl;
+ int _mmapFlags;
+ int _advise;
+public:
+ TuneFileRandRead(void)
+ : _tuneControl(NORMAL),
+ _mmapFlags(0),
+ _advise(0)
+ {
+ }
+
+ void setMemoryMapFlags(int flags) { _mmapFlags = flags; }
+ void setAdvise(int advise) { _advise = advise; }
+ void setWantMemoryMap() { _tuneControl = MMAP; }
+ void setWantDirectIO() { _tuneControl = DIRECTIO; }
+ void setWantNormal() { _tuneControl = NORMAL; }
+ bool getWantDirectIO() const { return _tuneControl == DIRECTIO; }
+ bool getWantMemoryMap() const { return _tuneControl == MMAP; }
+ int getMemoryMapFlags() const { return _mmapFlags; }
+ int getAdvise() const { return _advise; }
+
+ template <typename TuneControlConfig, typename MMapConfig>
+ void
+ setFromConfig(const enum TuneControlConfig::Io & tuneControlConfig, const MMapConfig & mmapFlags)
+ {
+ switch ( tuneControlConfig) {
+ case TuneControlConfig::NORMAL: _tuneControl = NORMAL; break;
+ case TuneControlConfig::DIRECTIO: _tuneControl = DIRECTIO; break;
+ case TuneControlConfig::MMAP: _tuneControl = MMAP; break;
+ default: _tuneControl = NORMAL; break;
+ }
+ for (size_t i(0), m(mmapFlags.options.size()); i < m; i++) {
+ switch (mmapFlags.options[i]) {
+ case MMapConfig::MLOCK: _mmapFlags |= MAP_LOCKED; break;
+ case MMapConfig::POPULATE: _mmapFlags |= MAP_POPULATE; break;
+ case MMapConfig::HUGETLB: _mmapFlags |= MAP_HUGETLB; break;
+ }
+ }
+ switch (mmapFlags.advise) {
+ case MMapConfig::NORMAL: setAdvise(POSIX_FADV_NORMAL); break;
+ case MMapConfig::RANDOM: setAdvise(POSIX_FADV_RANDOM); break;
+ case MMapConfig::SEQUENTIAL: setAdvise(POSIX_FADV_SEQUENTIAL); break;
+ }
+ }
+
+ bool
+ operator==(const TuneFileRandRead &rhs) const {
+ return (_tuneControl == rhs._tuneControl) && (_mmapFlags == rhs._mmapFlags);
+ }
+
+ bool
+ operator!=(const TuneFileRandRead &rhs) const {
+ return (_tuneControl != rhs._tuneControl) && (_mmapFlags == rhs._mmapFlags);
+ }
+};
+
+
+/**
+ * Controls file access for indexed fields, word list and dictionary
+ * during memory dump and fusion.
+ */
+class TuneFileIndexing
+{
+public:
+ TuneFileSeqRead _read;
+ TuneFileSeqWrite _write;
+
+ TuneFileIndexing(void)
+ : _read(),
+ _write()
+ {
+ }
+
+ TuneFileIndexing(const TuneFileSeqRead &r,
+ const TuneFileSeqWrite &w)
+ : _read(r),
+ _write(w)
+ {
+ }
+
+ bool
+ operator==(const TuneFileIndexing &rhs) const
+ {
+ return _read == rhs._read &&
+ _write == rhs._write;
+ }
+
+ bool
+ operator!=(const TuneFileIndexing &rhs) const
+ {
+ return _read != rhs._read ||
+ _write != rhs._write;
+ }
+};
+
+
+/**
+ * Controls file access for indexed fields and dictionary during
+ * search.
+ */
+class TuneFileSearch
+{
+public:
+ TuneFileRandRead _read;
+
+ TuneFileSearch(void)
+ : _read()
+ {
+ }
+
+ TuneFileSearch(const TuneFileRandRead &r)
+ : _read(r)
+ {
+ }
+
+ bool
+ operator==(const TuneFileSearch &rhs) const
+ {
+ return _read == rhs._read;
+ }
+
+ bool
+ operator!=(const TuneFileSearch &rhs) const
+ {
+ return _read != rhs._read;
+ }
+};
+
+
+/**
+ * Controls file access for indexed fields and dictionary during
+ * memory dump, fusion and search.
+ */
+class TuneFileIndexManager
+{
+public:
+ TuneFileIndexing _indexing;
+ TuneFileSearch _search;
+
+ TuneFileIndexManager(void)
+ : _indexing(),
+ _search()
+ {
+ }
+
+ bool
+ operator==(const TuneFileIndexManager &rhs) const
+ {
+ return _indexing == rhs._indexing &&
+ _search == rhs._search;
+ }
+
+ bool
+ operator!=(const TuneFileIndexManager &rhs) const
+ {
+ return _indexing != rhs._indexing ||
+ _search != rhs._search;
+ }
+};
+
+
+/**
+ * Controls file access for writing attributes to disk.
+ */
+class TuneFileAttributes
+{
+public:
+ TuneFileSeqWrite _write;
+
+ TuneFileAttributes(void)
+ : _write()
+ {
+ }
+
+ bool
+ operator==(const TuneFileAttributes &rhs) const
+ {
+ return _write == rhs._write;
+ }
+
+ bool
+ operator!=(const TuneFileAttributes &rhs) const
+ {
+ return _write != rhs._write;
+ }
+};
+
+
+/**
+ * Controls file access for summaries (docstore).
+ */
+class TuneFileSummary
+{
+public:
+ TuneFileSeqRead _seqRead;
+ TuneFileSeqWrite _write;
+ TuneFileRandRead _randRead;
+
+ TuneFileSummary(void)
+ : _seqRead(),
+ _write(),
+ _randRead()
+ {
+ }
+
+ bool
+ operator==(const TuneFileSummary &rhs) const
+ {
+ return _seqRead == rhs._seqRead &&
+ _write == rhs._write &&
+ _randRead == rhs._randRead;
+ }
+
+ bool
+ operator!=(const TuneFileSummary &rhs) const
+ {
+ return _seqRead != rhs._seqRead ||
+ _write != rhs._write ||
+ _randRead != rhs._randRead;
+ }
+};
+
+
+/**
+ * Controls file access for document db, i.e. "everything".
+ */
+class TuneFileDocumentDB
+{
+public:
+ typedef std::shared_ptr<TuneFileDocumentDB> SP;
+
+ TuneFileIndexManager _index;
+ TuneFileAttributes _attr;
+ TuneFileSummary _summary;
+
+ TuneFileDocumentDB(void)
+ : _index(),
+ _attr(),
+ _summary()
+ {
+ }
+
+ bool
+ operator==(const TuneFileDocumentDB &rhs) const
+ {
+ return _index == rhs._index &&
+ _attr == rhs._attr &&
+ _summary == rhs._summary;
+ }
+
+ bool
+ operator!=(const TuneFileDocumentDB &rhs) const
+ {
+ return _index != rhs._index ||
+ _attr != rhs._attr ||
+ _summary != rhs._summary;
+ }
+};
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/config/.gitignore b/searchlib/src/vespa/searchlib/config/.gitignore
new file mode 100644
index 00000000000..0d614ad8ec7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/config/.gitignore
@@ -0,0 +1,5 @@
+*.So
+.depend
+Makefile
+config-*.cpp
+config-*.h
diff --git a/searchlib/src/vespa/searchlib/config/CMakeLists.txt b/searchlib/src/vespa/searchlib/config/CMakeLists.txt
new file mode 100644
index 00000000000..ad1a75f8b84
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/config/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_sconfig OBJECT
+ SOURCES
+ DEPENDS
+)
+vespa_generate_config(searchlib_sconfig translogserver.def)
+install(FILES translogserver.def DESTINATION var/db/vespa/config_server/serverdb/classes)
diff --git a/searchlib/src/vespa/searchlib/config/translogserver.def b/searchlib/src/vespa/searchlib/config/translogserver.def
new file mode 100644
index 00000000000..b617e6e2783
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/config/translogserver.def
@@ -0,0 +1,24 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+namespace=searchlib
+
+## Port number to use for listening.
+listenport int default=13700 restart
+
+## Max file size (50M)
+filesizemax int default=50000000 restart
+
+## Server name to identify server.
+servername string default="tls" restart
+
+## Base directory. The default is not used as it is decided by the model.
+basedir string default="tmp" restart
+
+## Use fsync after each commit.
+## If not the below interval is used.
+usefsync bool default=false restart
+
+##Number of threads available for visiting/subscription.
+maxthreads int default=4 restart
+
+##Default crc method used
+crcmethod enum {ccitt_crc32, xxh64} default=xxh64
diff --git a/searchlib/src/vespa/searchlib/diskindex/.gitignore b/searchlib/src/vespa/searchlib/diskindex/.gitignore
new file mode 100644
index 00000000000..0b3af54ee50
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/.gitignore
@@ -0,0 +1,3 @@
+*.So
+.depend*
+Makefile
diff --git a/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt
new file mode 100644
index 00000000000..1cde63458ec
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt
@@ -0,0 +1,28 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_searchlib_diskindex OBJECT
+ SOURCES
+ bitvectordictionary.cpp
+ bitvectorfile.cpp
+ bitvectoridxfile.cpp
+ bitvectorkeyscope.cpp
+ checkpointfile.cpp
+ dictionarywordreader.cpp
+ diskindex.cpp
+ disktermblueprint.cpp
+ docidmapper.cpp
+ extposocc.cpp
+ fieldreader.cpp
+ fieldwriter.cpp
+ fileheader.cpp
+ fusion.cpp
+ indexbuilder.cpp
+ pagedict4file.cpp
+ pagedict4randread.cpp
+ wordnummapper.cpp
+ zcposocc.cpp
+ zcposocciterators.cpp
+ zcposoccrandread.cpp
+ zcposting.cpp
+ zcpostingiterators.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/diskindex/OWNERS b/searchlib/src/vespa/searchlib/diskindex/OWNERS
new file mode 100644
index 00000000000..64735d11d93
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/OWNERS
@@ -0,0 +1 @@
+tegge
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp
new file mode 100644
index 00000000000..799b02dd071
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp
@@ -0,0 +1,108 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".diskindex.bitvectordictionary");
+#include "bitvectordictionary.h"
+#include <vespa/vespalib/data/fileheader.h>
+
+namespace search
+{
+
+namespace diskindex
+{
+
+
+BitVectorDictionary::BitVectorDictionary()
+ : _docIdLimit(),
+ _entries(),
+ _vectorSize(),
+ _datFile(),
+ _datHeaderLen(0u)
+{
+}
+
+
+BitVectorDictionary::~BitVectorDictionary()
+{
+ if (_datFile.get() != NULL) {
+ _datFile->Close();
+ }
+}
+
+
+bool
+BitVectorDictionary::open(const vespalib::string &pathPrefix,
+ const TuneFileRandRead &tuneFileRead,
+ BitVectorKeyScope scope)
+{
+ vespalib::string booloccIdxName = pathPrefix + "boolocc" +
+ getBitVectorKeyScopeSuffix(scope);
+ vespalib::string booloccDatName = pathPrefix + "boolocc.bdat";
+ FastOS_File idxFile;
+ idxFile.OpenReadOnly(booloccIdxName.c_str());
+ if (!idxFile.IsOpened()) {
+ LOG(warning, "Could not open bitvector idx file '%s'",
+ booloccIdxName.c_str());
+ return false;
+ }
+
+ vespalib::FileHeader idxHeader;
+ uint32_t idxHeaderLen = idxHeader.readFile(idxFile);
+ idxFile.SetPosition(idxHeaderLen);
+ assert(idxHeader.hasTag("frozen"));
+ assert(idxHeader.hasTag("docIdLimit"));
+ assert(idxHeader.hasTag("numKeys"));
+ assert(idxHeader.getTag("frozen").asInteger() != 0);
+ _docIdLimit = idxHeader.getTag("docIdLimit").asInteger();
+ uint32_t numEntries = idxHeader.getTag("numKeys").asInteger();
+
+ _entries.resize(numEntries);
+ size_t bufSize = sizeof(WordSingleKey) * numEntries;
+ assert(idxFile.GetSize() >= static_cast<int64_t>(idxHeaderLen + bufSize));
+ if (bufSize > 0) {
+ idxFile.Read(&_entries[0], bufSize);
+ }
+ idxFile.Close();
+
+ _vectorSize = BitVector::getFileBytes(_docIdLimit);
+ _datFile.reset(new FastOS_File());
+ if (tuneFileRead.getWantMemoryMap()) {
+ _datFile->enableMemoryMap(tuneFileRead.getMemoryMapFlags());
+ } else if (tuneFileRead.getWantDirectIO()) {
+ _datFile->EnableDirectIO();
+ }
+ _datFile->OpenReadOnly(booloccDatName.c_str());
+ if (!_datFile->IsOpened()) {
+ LOG(warning, "Could not open bitvector dat file '%s'",
+ booloccDatName.c_str());
+ return false;
+ }
+ vespalib::FileHeader datHeader(64);
+ _datHeaderLen = datHeader.readFile(*_datFile);
+ assert(_datFile->GetSize() >=
+ static_cast<int64_t>(_vectorSize) * numEntries + _datHeaderLen);
+ return true;
+}
+
+
+BitVector::UP
+BitVectorDictionary::lookup(uint64_t wordNum)
+{
+ WordSingleKey key;
+ key._wordNum = wordNum;
+ std::vector<WordSingleKey>::const_iterator itr =
+ std::lower_bound(_entries.begin(), _entries.end(), key);
+ if (itr == _entries.end() || key < *itr) {
+ return BitVector::UP();
+ }
+ int64_t pos = &*itr - &_entries[0];
+ return BitVector::create(_docIdLimit, *_datFile,
+ ((int64_t) _vectorSize) * pos + _datHeaderLen,
+ itr->_numDocs);
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h
new file mode 100644
index 00000000000..75b88de1a75
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h
@@ -0,0 +1,81 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/index/bitvectorkeys.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include <vespa/vespalib/stllike/string.h>
+#include "bitvectorkeyscope.h"
+
+namespace search {
+
+namespace diskindex {
+
+/**
+ * This dictionary provides a sparse mapping from word number -> BitVector.
+ * The dictionary is constructed based on the boolocc idx file and
+ * the actual bit vectors are stored in the boolocc dat file.
+ **/
+class BitVectorDictionary
+{
+private:
+ BitVectorDictionary(const BitVectorDictionary &rhs);
+
+ BitVectorDictionary &
+ operator=(const BitVectorDictionary &rhs);
+
+ typedef search::index::BitVectorWordSingleKey WordSingleKey;
+
+ uint32_t _docIdLimit;
+ std::vector<WordSingleKey> _entries;
+ size_t _vectorSize;
+ std::unique_ptr<FastOS_File> _datFile;
+ uint32_t _datHeaderLen;
+
+public:
+ typedef std::shared_ptr<BitVectorDictionary> SP;
+
+ BitVectorDictionary();
+ ~BitVectorDictionary();
+
+ /**
+ * Open this dictionary using the following path prefix to where
+ * the files are located. The boolocc idx file is loaded into
+ * memory while the dat file is just opened.
+ *
+ * @param pathPrefix the path prefix to where the boolocc files
+ * are located.
+ * @return true if the files could be opened.
+ **/
+ bool
+ open(const vespalib::string &pathPrefix,
+ const TuneFileRandRead &tuneFileRead,
+ BitVectorKeyScope scope);
+
+ /**
+ * Lookup the given word number and load and return the associated
+ * bit vector if found.
+ *
+ * @param wordNum the word number to lookup a bit vector for.
+ * @return the loaded bit vector or NULL if not found.
+ **/
+ BitVector::UP
+ lookup(uint64_t wordNum);
+
+ uint32_t
+ getDocIdLimit() const
+ {
+ return _docIdLimit;
+ }
+
+ const std::vector<WordSingleKey> &
+ getEntries() const
+ {
+ return _entries;
+ }
+};
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp
new file mode 100644
index 00000000000..0a2c9cbc955
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp
@@ -0,0 +1,238 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".diskindex.bitvectorfile");
+#include <vespa/searchlib/common/bitvector.h>
+#include "bitvectorfile.h"
+#include <vespa/searchlib/index/bitvectorkeys.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/searchlib/common/fileheadercontext.h>
+
+namespace search
+{
+
+namespace diskindex
+{
+
+using vespalib::nbostream;
+using search::index::BitVectorWordSingleKey;
+using search::common::FileHeaderContext;
+
+namespace {
+
+void
+readHeader(vespalib::FileHeader &h,
+ const vespalib::string &name)
+{
+ Fast_BufferedFile file(32768u);
+ file.OpenReadOnly(name.c_str());
+ h.readFile(file);
+ file.Close();
+}
+
+const size_t FILE_HEADERSIZE_ALIGNMENT = 4096;
+
+}
+
+BitVectorFileWrite::BitVectorFileWrite(BitVectorKeyScope scope)
+ : BitVectorIdxFileWrite(scope),
+ _datFile(NULL),
+ _datHeaderLen(0)
+{
+}
+
+
+BitVectorFileWrite::~BitVectorFileWrite(void)
+{
+ // No implicit close() call, but cleanup memory allocations.
+ delete _datFile;
+}
+
+
+void
+BitVectorFileWrite::checkPointWrite(nbostream &out)
+{
+ flush();
+ Parent::checkPointWriteCommon(out);
+ out << _datHeaderLen;
+ sync();
+}
+
+
+void
+BitVectorFileWrite::checkPointRead(nbostream &in)
+{
+ Parent::checkPointRead(in);
+ in >> _datHeaderLen;
+}
+
+
+void
+BitVectorFileWrite::open(const vespalib::string &name,
+ uint32_t docIdLimit,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const FileHeaderContext &fileHeaderContext)
+{
+ vespalib::string datname = name + ".bdat";
+
+ assert(_datFile == NULL);
+
+ Parent::open(name, docIdLimit, tuneFileWrite, fileHeaderContext);
+
+ FastOS_FileInterface *datfile = new FastOS_File;
+ _datFile = new Fast_BufferedFile(datfile);
+ if (tuneFileWrite.getWantSyncWrites())
+ _datFile->EnableSyncWrites();
+ if (tuneFileWrite.getWantDirectIO())
+ _datFile->EnableDirectIO();
+ // XXX no checking for success:
+ _datFile->OpenWriteOnly(datname.c_str());
+
+ if (_datHeaderLen == 0) {
+ assert(_numKeys == 0);
+ makeDatHeader(fileHeaderContext);
+ }
+
+ int64_t pos;
+ size_t bitmapbytes;
+
+ bitmapbytes = BitVector::getFileBytes(_docIdLimit);
+
+ pos = static_cast<int64_t>(_numKeys) *
+ static_cast<int64_t>(bitmapbytes) + _datHeaderLen;
+
+ int64_t olddatsize = _datFile->GetSize();
+ assert(olddatsize >= pos);
+ (void) olddatsize;
+
+ _datFile->SetSize(pos);
+
+ assert(pos == _datFile->GetPosition());
+}
+
+
+void
+BitVectorFileWrite::makeDatHeader(const FileHeaderContext &fileHeaderContext)
+{
+ vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT); // 64 byte alignment on bitvector.dat header
+ typedef vespalib::GenericHeader::Tag Tag;
+ fileHeaderContext.addTags(h, _datFile->GetFileName());
+ h.putTag(Tag("docIdLimit", _docIdLimit));
+ h.putTag(Tag("numKeys", _numKeys));
+ h.putTag(Tag("frozen", 0));
+ h.putTag(Tag("fileBitSize", 0));
+ h.putTag(Tag("desc", "Bitvector data file"));
+ _datFile->SetPosition(0);
+ _datHeaderLen = h.writeFile(*_datFile);
+ _datFile->Flush();
+}
+
+
+void
+BitVectorFileWrite::updateDatHeader(uint64_t fileBitSize)
+{
+ vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT);
+ typedef vespalib::GenericHeader::Tag Tag;
+ readHeader(h, _datFile->GetFileName());
+ FileHeaderContext::setFreezeTime(h);
+ h.putTag(Tag("numKeys", _numKeys));
+ h.putTag(Tag("frozen", 1));
+ h.putTag(Tag("fileBitSize", fileBitSize));
+ _datFile->Flush();
+ _datFile->Sync();
+ assert(h.getSize() == _datHeaderLen);
+ _datFile->SetPosition(0);
+ h.writeFile(*_datFile);
+ _datFile->Flush();
+ _datFile->Sync();
+}
+
+
+void
+BitVectorFileWrite::addWordSingle(uint64_t wordNum,
+ const BitVector &bitVector)
+{
+ assert(bitVector.size() == _docIdLimit);
+ bitVector.invalidateCachedCount();
+ Parent::addWordSingle(wordNum, bitVector.countTrueBits());
+ _datFile->WriteBuf(bitVector.getStart(),
+ bitVector.getFileBytes());
+}
+
+
+void
+BitVectorFileWrite::flush(void)
+{
+ Parent::flush();
+ _datFile->Flush();
+}
+
+
+void
+BitVectorFileWrite::sync(void)
+{
+ flush();
+ Parent::syncCommon();
+ _datFile->Sync();
+}
+
+
+void
+BitVectorFileWrite::close(void)
+{
+ size_t bitmapbytes = BitVector::getFileBytes(_docIdLimit);
+
+ if (_datFile != NULL) {
+ if (_datFile->IsOpened()) {
+ uint64_t pos = _datFile->GetPosition();
+ assert(pos == static_cast<uint64_t>(_numKeys) *
+ static_cast<uint64_t>(bitmapbytes) + _datHeaderLen);
+ (void) bitmapbytes;
+ _datFile->alignEndForDirectIO();
+ updateDatHeader(pos * 8);
+ _datFile->Close();
+ }
+ delete _datFile;
+ _datFile = NULL;
+ }
+ Parent::close();
+}
+
+
+void
+BitVectorCandidate::checkPointWrite(nbostream &out)
+{
+ uint32_t docIdLimit = _bv->size();
+ out << docIdLimit << _numDocs << _bitVectorLimit;
+ out.saveVector(_array);
+ if (getCrossedBitVectorLimit())
+ out << *_bv;
+}
+
+
+void
+BitVectorCandidate::checkPointRead(nbostream &in)
+{
+ uint32_t docIdLimit = _bv->size();
+ uint32_t checkDocIdLimit;
+ uint32_t checkBitVectorLimit;
+ in >> checkDocIdLimit >> _numDocs >> checkBitVectorLimit;
+ assert(checkDocIdLimit == docIdLimit);
+ (void) docIdLimit;
+ assert(checkBitVectorLimit == _bitVectorLimit);
+ in.restoreVector(_array);
+ if (getCrossedBitVectorLimit()) {
+ in >> *_bv;
+ } else {
+ _bv->clear();
+ }
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h
new file mode 100644
index 00000000000..a33bd8e6c0f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h
@@ -0,0 +1,204 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+#pragma once
+
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include <vespa/vespalib/stllike/string.h>
+#include "bitvectoridxfile.h"
+
+namespace search
+{
+
+
+namespace diskindex
+{
+
+
+class BitVectorFileWrite : public BitVectorIdxFileWrite
+{
+private:
+ BitVectorFileWrite(const BitVectorFileWrite &) = delete;
+ BitVectorFileWrite(const BitVectorFileWrite &&) = delete;
+ BitVectorFileWrite& operator=(const BitVectorFileWrite &) = delete;
+ BitVectorFileWrite& operator=(const BitVectorFileWrite &&) = delete;
+
+ using Parent = BitVectorIdxFileWrite;
+
+ Fast_BufferedFile *_datFile;
+public:
+
+private:
+ uint32_t _datHeaderLen;
+
+public:
+ BitVectorFileWrite(BitVectorKeyScope scope);
+
+ ~BitVectorFileWrite(void);
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt. Implies
+ * flush from memory to disk, and possibly also sync to permanent
+ * storage media.
+ */
+ void
+ checkPointWrite(vespalib::nbostream &out);
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ */
+ void
+ checkPointRead(vespalib::nbostream &in);
+
+ void
+ open(const vespalib::string &name, uint32_t docIdLimit,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const common::FileHeaderContext &fileHeaderContext);
+
+
+ void
+ addWordSingle(uint64_t wordNum, const BitVector &bitVector);
+
+ void
+ flush(void);
+
+ void
+ sync(void);
+
+ void
+ close(void);
+
+ void
+ makeDatHeader(const common::FileHeaderContext &fileHeaderContext);
+
+ void
+ updateDatHeader(uint64_t fileBitSize);
+};
+
+
+/*
+ * Buffer document ids for a candidate bitvector.
+ */
+class BitVectorCandidate
+{
+private:
+ std::vector<uint32_t> _array;
+ uint64_t _numDocs;
+ uint32_t _bitVectorLimit;
+ BitVector::UP _bv;
+
+public:
+ BitVectorCandidate(uint32_t docIdLimit, uint32_t bitVectorLimit)
+ : _array(),
+ _numDocs(0u),
+ _bitVectorLimit(bitVectorLimit),
+ _bv(BitVector::create(docIdLimit))
+ {
+ _array.reserve(_bitVectorLimit);
+ }
+
+
+ BitVectorCandidate(uint32_t docIdLimit)
+ : _array(),
+ _numDocs(0u),
+ _bitVectorLimit(BitVectorFileWrite::getBitVectorLimit(docIdLimit)),
+ _bv(BitVector::create(docIdLimit))
+ {
+ _array.reserve(_bitVectorLimit);
+ }
+
+ void
+ clear(void)
+ {
+ if (__builtin_expect(_numDocs > _bitVectorLimit, false)) {
+ _bv->clear();
+ }
+ _numDocs = 0;
+ _array.clear();
+ }
+
+ void
+ flush(BitVector &obv)
+ {
+ if (__builtin_expect(_numDocs > _bitVectorLimit, false)) {
+ obv.orWith(*_bv);
+ } else {
+ for (uint32_t i : _array) {
+ obv.setBit(i);
+ }
+ }
+ clear();
+ }
+
+ void
+ add(uint32_t docId)
+ {
+ if (_numDocs < _bitVectorLimit) {
+ _array.push_back(docId);
+ } else {
+ if (__builtin_expect(_numDocs == _bitVectorLimit, false)) {
+ for (uint32_t i : _array) {
+ _bv->setBit(i);
+ }
+ _array.clear();
+ }
+ _bv->setBit(docId);
+ }
+ ++_numDocs;
+ }
+
+ /*
+ * Get number of documents buffered. This might include duplicates.
+ */
+ uint64_t
+ getNumDocs(void) const
+ {
+ return _numDocs;
+ }
+
+ bool
+ empty(void) const
+ {
+ return _numDocs == 0;
+ }
+
+ /*
+ * Return true if array limit has been exceeded and bitvector has been
+ * populated.
+ */
+ bool
+ getCrossedBitVectorLimit(void) const
+ {
+ return _numDocs > _bitVectorLimit;
+ }
+
+ BitVector &
+ getBitVector(void)
+ {
+ return *_bv;
+ }
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt. Implies
+ * flush from memory to disk, and possibly also sync to permanent
+ * storage media.
+ */
+ void
+ checkPointWrite(vespalib::nbostream &out);
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ */
+ void
+ checkPointRead(vespalib::nbostream &in);
+};
+
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp
new file mode 100644
index 00000000000..82c46d2172f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp
@@ -0,0 +1,233 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".diskindex.bitvectoridxfile");
+#include <vespa/searchlib/common/bitvector.h>
+#include "bitvectoridxfile.h"
+#include <vespa/searchlib/index/bitvectorkeys.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/searchlib/common/fileheadercontext.h>
+
+namespace search
+{
+
+namespace diskindex
+{
+
+using vespalib::nbostream;
+using search::index::BitVectorWordSingleKey;
+using search::common::FileHeaderContext;
+
+namespace {
+
+void
+readHeader(vespalib::FileHeader &h,
+ const vespalib::string &name)
+{
+ Fast_BufferedFile file(32768u);
+ file.OpenReadOnly(name.c_str());
+ h.readFile(file);
+ file.Close();
+}
+
+const size_t FILE_HEADERSIZE_ALIGNMENT = 4096;
+
+}
+
+BitVectorIdxFileWrite::BitVectorIdxFileWrite(BitVectorKeyScope scope)
+ : _idxFile(NULL),
+ _numKeys(0),
+ _docIdLimit(0),
+ _idxHeaderLen(0),
+ _scope(scope)
+{
+}
+
+
+BitVectorIdxFileWrite::~BitVectorIdxFileWrite(void)
+{
+ // No implicit close() call, but cleanup memory allocations.
+ delete _idxFile;
+}
+
+
+uint64_t
+BitVectorIdxFileWrite::idxSize(void) const
+{
+ return _idxHeaderLen +
+ static_cast<int64_t>(_numKeys) * sizeof(BitVectorWordSingleKey);
+}
+
+
+void
+BitVectorIdxFileWrite::checkPointWriteCommon(nbostream &out)
+{
+ out << _scope;
+ out << _docIdLimit << _numKeys;
+ out << _idxHeaderLen;
+}
+
+
+void
+BitVectorIdxFileWrite::checkPointWrite(nbostream &out)
+{
+ flush();
+ checkPointWriteCommon(out);
+ sync();
+}
+
+
+void
+BitVectorIdxFileWrite::checkPointRead(nbostream &in)
+{
+ BitVectorKeyScope checkScope;
+ in >> checkScope;
+ assert(checkScope == _scope);
+ in >> _docIdLimit >> _numKeys;
+ in >> _idxHeaderLen;
+}
+
+
+void
+BitVectorIdxFileWrite::open(const vespalib::string &name,
+ uint32_t docIdLimit,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const FileHeaderContext &fileHeaderContext)
+{
+ if (_numKeys != 0) {
+ assert(docIdLimit == _docIdLimit);
+ } else {
+ _docIdLimit = docIdLimit;
+ }
+ vespalib::string idxname = name + getBitVectorKeyScopeSuffix(_scope);
+
+ assert(_idxFile == NULL);
+ FastOS_FileInterface *idxfile = new FastOS_File;
+ _idxFile = new Fast_BufferedFile(idxfile);
+ if (tuneFileWrite.getWantSyncWrites())
+ _idxFile->EnableSyncWrites();
+ if (tuneFileWrite.getWantDirectIO())
+ _idxFile->EnableDirectIO();
+
+ // XXX no checking for success:
+ _idxFile->OpenWriteOnly(idxname.c_str());
+
+ if (_idxHeaderLen == 0) {
+ assert(_numKeys == 0);
+ makeIdxHeader(fileHeaderContext);
+ }
+
+ int64_t pos = idxSize();
+
+ int64_t oldidxsize = _idxFile->GetSize();
+ assert(oldidxsize >= pos);
+ (void) oldidxsize;
+
+ _idxFile->SetSize(pos);
+
+ assert(pos == _idxFile->GetPosition());
+}
+
+
+void
+BitVectorIdxFileWrite::makeIdxHeader(const FileHeaderContext &fileHeaderContext)
+{
+ vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT);
+ typedef vespalib::GenericHeader::Tag Tag;
+ fileHeaderContext.addTags(h, _idxFile->GetFileName());
+ h.putTag(Tag("docIdLimit", _docIdLimit));
+ h.putTag(Tag("numKeys", _numKeys));
+ h.putTag(Tag("frozen", 0));
+ if (_scope != BitVectorKeyScope::SHARED_WORDS) {
+ h.putTag(Tag("fileBitSize", 0));
+ }
+ h.putTag(Tag("desc", "Bitvector dictionary file, single words"));
+ _idxFile->SetPosition(0);
+ _idxHeaderLen = h.writeFile(*_idxFile);
+ _idxFile->Flush();
+}
+
+
+void
+BitVectorIdxFileWrite::updateIdxHeader(uint64_t fileBitSize)
+{
+ vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT);
+ typedef vespalib::GenericHeader::Tag Tag;
+ readHeader(h, _idxFile->GetFileName());
+ FileHeaderContext::setFreezeTime(h);
+ h.putTag(Tag("numKeys", _numKeys));
+ h.putTag(Tag("frozen", 1));
+ if (_scope != BitVectorKeyScope::SHARED_WORDS) {
+ h.putTag(Tag("fileBitSize", fileBitSize));
+ }
+ _idxFile->Flush();
+ _idxFile->Sync();
+ assert(h.getSize() == _idxHeaderLen);
+ _idxFile->SetPosition(0);
+ h.writeFile(*_idxFile);
+ _idxFile->Flush();
+ _idxFile->Sync();
+}
+
+
+void
+BitVectorIdxFileWrite::addWordSingle(uint64_t wordNum, uint32_t numDocs)
+{
+ BitVectorWordSingleKey key;
+ key._wordNum = wordNum;
+ key._numDocs = numDocs;
+ _idxFile->WriteBuf(&key, sizeof(key));
+ ++_numKeys;
+}
+
+
+void
+BitVectorIdxFileWrite::flush(void)
+{
+ _idxFile->Flush();
+
+ uint64_t pos = _idxFile->GetPosition();
+ assert(pos == idxSize());
+ (void) pos;
+}
+
+
+void
+BitVectorIdxFileWrite::syncCommon()
+{
+ _idxFile->Sync();
+}
+
+
+void
+BitVectorIdxFileWrite::sync(void)
+{
+ flush();
+ syncCommon();
+}
+
+
+void
+BitVectorIdxFileWrite::close(void)
+{
+ if (_idxFile != NULL) {
+ if (_idxFile->IsOpened()) {
+ uint64_t pos = _idxFile->GetPosition();
+ assert(pos == idxSize());
+ _idxFile->alignEndForDirectIO();
+ updateIdxHeader(pos * 8);
+ _idxFile->Close();
+ }
+ delete _idxFile;
+ _idxFile = NULL;
+ }
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h
new file mode 100644
index 00000000000..269b6e659af
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h
@@ -0,0 +1,122 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+#pragma once
+
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include <vespa/vespalib/stllike/string.h>
+#include "bitvectorkeyscope.h"
+
+namespace vespalib
+{
+
+class nbostream;
+
+}
+
+namespace search
+{
+
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+
+namespace diskindex
+{
+
+class BitVectorIdxFileWrite
+{
+private:
+ BitVectorIdxFileWrite(const BitVectorIdxFileWrite &) = delete;
+ BitVectorIdxFileWrite(const BitVectorIdxFileWrite &&) = delete;
+ BitVectorIdxFileWrite& operator=(const BitVectorIdxFileWrite &) = delete;
+ BitVectorIdxFileWrite& operator=(const BitVectorIdxFileWrite &&) = delete;
+
+ Fast_BufferedFile *_idxFile;
+
+public:
+
+protected:
+ uint32_t _numKeys; // Number of bitvectors and keys
+ uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit)
+ uint32_t _idxHeaderLen;
+ BitVectorKeyScope _scope;
+
+ uint64_t
+ idxSize(void) const;
+
+ void
+ checkPointWriteCommon(vespalib::nbostream &out);
+
+ void syncCommon();
+
+public:
+ BitVectorIdxFileWrite(BitVectorKeyScope scope);
+
+ ~BitVectorIdxFileWrite(void);
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt. Implies
+ * flush from memory to disk, and possibly also sync to permanent
+ * storage media.
+ */
+ void
+ checkPointWrite(vespalib::nbostream &out);
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ */
+ void
+ checkPointRead(vespalib::nbostream &in);
+
+ void
+ open(const vespalib::string &name, uint32_t docIdLimit,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const search::common::FileHeaderContext &fileHeaderContext);
+
+
+
+ void
+ addWordSingle(uint64_t wordNum, uint32_t numDocs);
+
+ void
+ flush(void);
+
+ void
+ sync(void);
+
+ void
+ close(void);
+
+ static uint32_t
+ getBitVectorLimit(uint32_t docIdLimit)
+ {
+ // Must match FastS_BinSizeParams::CalcMaxBinSize()
+ uint32_t ret = (docIdLimit + 63) / 64;
+ if (ret < 16)
+ ret = 16;
+ if (ret > docIdLimit)
+ ret = docIdLimit;
+ return ret;
+ }
+
+ void
+ makeIdxHeader(const search::common::FileHeaderContext &fileHeaderContext);
+
+ void
+ updateIdxHeader(uint64_t fileBitSize);
+};
+
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.cpp
new file mode 100644
index 00000000000..5c783035236
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.cpp
@@ -0,0 +1,73 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "bitvectorkeyscope.h"
+#include <vespa/vespalib/objects/nbostream.h>
+
+
+using search::diskindex::BitVectorKeyScope;
+
+namespace search
+{
+
+namespace diskindex
+{
+
+const char *getBitVectorKeyScopeSuffix(BitVectorKeyScope scope)
+{
+ switch (scope) {
+ case BitVectorKeyScope::SHARED_WORDS:
+ return ".bidx";
+ default:
+ return ".idx";
+ }
+}
+
+}
+
+}
+
+
+namespace {
+
+uint8_t
+getVal(BitVectorKeyScope scope)
+{
+ switch (scope) {
+ case BitVectorKeyScope::SHARED_WORDS:
+ return 0u;
+ default:
+ return 1u;
+ }
+}
+
+
+const BitVectorKeyScope scopes[] = { BitVectorKeyScope::SHARED_WORDS,
+ BitVectorKeyScope::PERFIELD_WORDS };
+
+}
+
+
+namespace vespalib
+{
+
+nbostream &
+operator<<(nbostream &stream, const BitVectorKeyScope &scope)
+{
+ uint8_t val = getVal(scope);
+ stream << val;
+ return stream;
+}
+
+nbostream &
+operator>>(nbostream &stream, BitVectorKeyScope &scope)
+{
+ uint8_t val;
+ stream >> val;
+ assert(val < sizeof(scopes) / sizeof(scopes[0]));
+ scope = scopes[val];
+ return stream;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.h b/searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.h
new file mode 100644
index 00000000000..5ae7a223629
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.h
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+
+namespace vespalib
+{
+
+class nbostream;
+
+}
+
+
+namespace search
+{
+
+namespace diskindex
+{
+
+enum class BitVectorKeyScope
+{
+ SHARED_WORDS,
+ PERFIELD_WORDS
+};
+
+const char *getBitVectorKeyScopeSuffix(BitVectorKeyScope scope);
+
+}
+
+}
+
+namespace vespalib
+{
+
+nbostream &
+operator<<(nbostream &stream,
+ const search::diskindex::BitVectorKeyScope &scope);
+
+nbostream &
+operator>>(nbostream &stream,
+ search::diskindex::BitVectorKeyScope &scope);
+
+}
diff --git a/searchlib/src/vespa/searchlib/diskindex/checkpointfile.cpp b/searchlib/src/vespa/searchlib/diskindex/checkpointfile.cpp
new file mode 100644
index 00000000000..f8005a6ba97
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/checkpointfile.cpp
@@ -0,0 +1,189 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".diskindex.checkpointfile");
+#include "checkpointfile.h"
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/searchlib/common/fileheadercontext.h>
+
+using vespalib::getLastErrorString;
+
+namespace search
+{
+
+namespace diskindex
+{
+
+using common::FileHeaderContext;
+
+CheckPointFile::CheckPointFile(const vespalib::string &name)
+ : _file(),
+ _name(name),
+ _nameNew(name + ".NEW"),
+ _nameNewNew(name + ".NEW.NEW"),
+ _writeOpened(false),
+ _headerLen(0u)
+{
+}
+
+
+CheckPointFile::~CheckPointFile(void)
+{
+ close();
+}
+
+
+void
+CheckPointFile::writeOpen(const FileHeaderContext &fileHeaderContext)
+{
+ FastOS_File::Delete(_nameNewNew.c_str());
+ _file.OpenWriteOnly(_nameNewNew.c_str());
+ _writeOpened = true;
+ makeHeader(fileHeaderContext);
+}
+
+
+bool
+CheckPointFile::readOpen(void)
+{
+ bool openres;
+
+ openres = _file.OpenReadOnly(_name.c_str());
+ if (!openres) {
+ bool renameres = FastOS_File::Rename(_nameNew.c_str(),
+ _name.c_str());
+ if (!renameres)
+ return false;
+ openres = _file.OpenReadOnly(_name.c_str());
+ if (!openres)
+ return false;
+ }
+ _headerLen = readHeader();
+ return true;
+}
+
+
+void
+CheckPointFile::close(void)
+{
+ if (_writeOpened) {
+ _file.Sync();
+ }
+ _file.Close();
+ if (_writeOpened) {
+ updateHeader();
+ rename1();
+ rename2();
+ }
+ _writeOpened = false;
+}
+
+
+void
+CheckPointFile::rename1(void)
+{
+ FastOS_File::Delete(_nameNew.c_str());
+ bool renameres = FastOS_File::Rename(_nameNewNew.c_str(),
+ _nameNew.c_str());
+ if (!renameres) {
+ LOG(error, "FATAL: rename %s -> %s failed: %s",
+ _nameNewNew.c_str(), _nameNew.c_str(), getLastErrorString().c_str());
+ abort();
+ }
+}
+
+
+void
+CheckPointFile::rename2(void)
+{
+ FastOS_File::Delete(_name.c_str());
+ bool renameres = FastOS_File::Rename(_nameNew.c_str(), _name.c_str());
+ if (!renameres) {
+ LOG(error, "FATAL: rename %s -> %s failed: %s",
+ _nameNew.c_str(), _name.c_str(), getLastErrorString().c_str());
+ abort();
+ }
+}
+
+
+void
+CheckPointFile::remove(void)
+{
+ FastOS_File::Delete(_nameNew.c_str());
+ FastOS_File::Delete(_name.c_str());
+}
+
+
+
+void
+CheckPointFile::write(vespalib::nbostream &buf,
+ const FileHeaderContext &fileHeaderContext)
+{
+ writeOpen(fileHeaderContext);
+ _file.WriteBuf(buf.peek(), buf.size());
+ close();
+}
+
+
+bool
+CheckPointFile::read(vespalib::nbostream &buf)
+{
+ if (!readOpen())
+ return false;
+ size_t sz = _file.GetSize() - _headerLen;
+
+ std::vector<char> tmp(sz);
+ _file.ReadBuf(&tmp[0], sz);
+ buf.clear();
+ buf.write(&tmp[0], sz);
+ std::vector<char>().swap(tmp);
+ close();
+ return true;
+}
+
+
+void
+CheckPointFile::makeHeader(const FileHeaderContext &fileHeaderContext)
+{
+ vespalib::FileHeader header;
+
+ typedef vespalib::GenericHeader::Tag Tag;
+ fileHeaderContext.addTags(header, _file.GetFileName());
+ header.putTag(Tag("frozen", 0));
+ header.putTag(Tag("desc", "Check point file"));
+ header.writeFile(_file);
+}
+
+
+void
+CheckPointFile::updateHeader(void)
+{
+ vespalib::FileHeader h;
+ FastOS_File f;
+ f.OpenReadWrite(_nameNewNew.c_str());
+ h.readFile(f);
+ FileHeaderContext::setFreezeTime(h);
+ typedef vespalib::GenericHeader::Tag Tag;
+ h.putTag(Tag("frozen", 1));
+ h.rewriteFile(f);
+ f.Sync();
+ f.Close();
+}
+
+
+uint32_t
+CheckPointFile::readHeader(void)
+{
+ vespalib::FileHeader h;
+ uint32_t headerLen = h.readFile(_file);
+ _file.SetPosition(headerLen);
+ assert(h.hasTag("frozen"));
+ assert(h.getTag("frozen").asInteger() != 0);
+ return headerLen;
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/checkpointfile.h b/searchlib/src/vespa/searchlib/diskindex/checkpointfile.h
new file mode 100644
index 00000000000..f8aed3a6cdd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/checkpointfile.h
@@ -0,0 +1,73 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/objects/nbostream.h>
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+namespace diskindex
+{
+
+class CheckPointFile
+{
+public:
+ FastOS_File _file;
+ vespalib::string _name;
+ vespalib::string _nameNew;
+ vespalib::string _nameNewNew;
+ bool _writeOpened;
+ uint32_t _headerLen;
+
+ void
+ writeOpen(const common::FileHeaderContext &fileHeaderContext);
+
+ bool
+ readOpen(void);
+
+ void
+ close(void);
+
+ void
+ rename1(void);
+
+ void
+ rename2(void);
+
+ void
+ remove(void);
+
+ void
+ makeHeader(const common::FileHeaderContext &fileHeaderContext);
+
+ void
+ updateHeader(void);
+
+ uint32_t
+ readHeader(void);
+public:
+ CheckPointFile(const vespalib::string &name);
+
+ ~CheckPointFile(void);
+
+ void
+ write(vespalib::nbostream &buf,
+ const common::FileHeaderContext &fileHeaderContext);
+
+ bool
+ read(vespalib::nbostream &buf);
+};
+
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp b/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp
new file mode 100644
index 00000000000..d0e611e3136
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp
@@ -0,0 +1,71 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "dictionarywordreader.h"
+#include <vespa/searchlib/index/schemautil.h>
+#include <vespa/searchlib/index/olddictionaryfile.h>
+#include <vespa/vespalib/util/error.h>
+LOG_SETUP(".diskindex.dictionarywordreader");
+
+namespace search
+{
+
+namespace diskindex
+{
+
+using vespalib::getLastErrorString;
+using index::SchemaUtil;
+
+DictionaryWordReader::DictionaryWordReader(void)
+ : _word(),
+ _wordNum(noWordNumHigh()),
+ _old2newwordfile(),
+ _dictFile()
+{
+}
+
+
+DictionaryWordReader::~DictionaryWordReader(void)
+{
+}
+
+
+bool
+DictionaryWordReader::open(const vespalib::stringref &dictionaryName,
+ const vespalib::stringref & wordMapName,
+ const TuneFileSeqRead &tuneFileRead)
+{
+ _old2newwordfile.reset(new Fast_BufferedFile(new FastOS_File));
+ _dictFile.reset(new PageDict4FileSeqRead);
+ if (!_dictFile->open(dictionaryName, tuneFileRead)) {
+ LOG(error, "Could not open dictionary %s: %s",
+ dictionaryName.c_str(), getLastErrorString().c_str());
+ return false;
+ }
+ _wordNum = noWordNum();
+
+ // Make a mapping from old to new wordID
+ if (tuneFileRead.getWantDirectIO())
+ _old2newwordfile->EnableDirectIO();
+ // no checking possible
+ _old2newwordfile->WriteOpen(wordMapName.c_str());
+ _old2newwordfile->SetSize(0);
+
+ return true;
+}
+
+void
+DictionaryWordReader::close(void)
+{
+ if (!_dictFile->close())
+ LOG(error, "Error closing input dictionary");
+ _old2newwordfile->Flush();
+ _old2newwordfile->Sync();
+ _old2newwordfile->Close();
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.h b/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.h
new file mode 100644
index 00000000000..744b73bf370
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.h
@@ -0,0 +1,135 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "pagedict4file.h"
+#include <vespa/fastlib/io/bufferedfile.h>
+
+namespace search
+{
+
+namespace diskindex
+{
+
+
+/*
+ * Helper class, will be used by fusion later to handle generation of
+ * word numbering without writing a word list file.
+ */
+class WordAggregator
+{
+private:
+ vespalib::string _word;
+ uint64_t _wordNum;
+
+public:
+ WordAggregator()
+ : _word(),
+ _wordNum(0)
+ {
+ }
+
+ void
+ tryWriteWord(const vespalib::stringref &word)
+ {
+ if (word != _word || _wordNum == 0) {
+ ++_wordNum;
+ _word = word;
+ }
+ }
+
+ uint64_t
+ getWordNum() const
+ {
+ return _wordNum;
+ }
+};
+
+
+/*
+ * Class used to merge words in multiple dictionaries for
+ * new style fusion (using WordAggregator).
+ */
+class DictionaryWordReader
+{
+public:
+ vespalib::string _word;
+ uint64_t _wordNum;
+ index::PostingListCounts _counts;
+
+private:
+ // "owners" of file handles.
+ std::unique_ptr<Fast_BufferedFile> _old2newwordfile;
+
+ using DictionaryFileSeqRead = index::DictionaryFileSeqRead;
+ std::unique_ptr<DictionaryFileSeqRead> _dictFile;
+
+ void
+ allocFiles(void);
+
+ static uint64_t
+ noWordNumHigh(void)
+ {
+ return std::numeric_limits<uint64_t>::max();
+ }
+
+ static uint64_t
+ noWordNum(void)
+ {
+ return 0u;
+ }
+
+public:
+ DictionaryWordReader(void);
+
+ ~DictionaryWordReader(void);
+
+ bool
+ isValid(void) const
+ {
+ return _wordNum != noWordNumHigh();
+ }
+
+ bool
+ operator<(const DictionaryWordReader &rhs) const
+ {
+ if (!isValid())
+ return false;
+ if (!rhs.isValid())
+ return true;
+ return _word < rhs._word;
+ }
+
+ void
+ read(void)
+ {
+ _dictFile->readWord(_word, _wordNum, _counts);
+ }
+
+ bool
+ open(const vespalib::stringref & dictionaryName,
+ const vespalib::stringref & wordMapName,
+ const TuneFileSeqRead &tuneFileRead);
+
+ void
+ close(void);
+
+ void
+ writeNewWordNum(uint64_t newWordNum)
+ {
+ _old2newwordfile->WriteBuf(&newWordNum, sizeof(newWordNum));
+ }
+
+ void
+ write(WordAggregator &writer)
+ {
+ writer.tryWriteWord(_word);
+ writeNewWordNum(writer.getWordNum());
+ }
+};
+
+
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
new file mode 100644
index 00000000000..8cc12c88463
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
@@ -0,0 +1,476 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".diskindex.diskindex");
+#include "diskindex.h"
+#include "disktermblueprint.h"
+#include <vespa/searchlib/index/schemautil.h>
+#include <vespa/searchlib/queryeval/create_blueprint_visitor_helper.h>
+#include <vespa/searchlib/queryeval/split_float.h>
+#include <vespa/searchlib/queryeval/leaf_blueprints.h>
+#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
+#include <vespa/searchlib/queryeval/termasstring.h>
+#include <vespa/searchlib/util/dirtraverse.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include "pagedict4randread.h"
+#include "fileheader.h"
+#include "bitvectorkeyscope.h"
+
+using namespace search::index;
+using namespace search::query;
+using namespace search::queryeval;
+
+namespace search
+{
+
+namespace diskindex
+{
+
+void swap(DiskIndex::LookupResult & a, DiskIndex::LookupResult & b)
+{
+ a.swap(b);
+}
+
+DiskIndex::LookupResult::LookupResult()
+ : indexId(0u),
+ wordNum(0),
+ counts(),
+ bitOffset(0)
+{
+}
+
+DiskIndex::DiskIndex(const vespalib::string &indexDir, size_t cacheSize)
+ : _indexDir(indexDir),
+ _cacheSize(cacheSize),
+ _schema(),
+ _postingFiles(),
+ _bitVectorDicts(),
+ _dicts(),
+ _tuneFileSearch(),
+ _cache(*this, cacheSize)
+{
+}
+
+bool
+DiskIndex::loadSchema(void)
+{
+ vespalib::string schemaName = _indexDir + "/schema.txt";
+ if (!_schema.loadFromFile(schemaName)) {
+ LOG(error,
+ "Could not open schema '%s'",
+ schemaName.c_str());
+ return false;
+ }
+ if (!SchemaUtil::validateSchema(_schema)) {
+ LOG(error,
+ "Could not validate schema loaded from '%s'",
+ schemaName.c_str());
+ return false;
+ }
+ return true;
+}
+
+
+bool
+DiskIndex::openDictionaries(const TuneFileSearch &tuneFileSearch)
+{
+ for (SchemaUtil::IndexIterator itr(_schema); itr.isValid(); ++itr) {
+ vespalib::string dictName =
+ _indexDir + "/" + itr.getName() + "/dictionary";
+ auto dict = std::make_unique<PageDict4RandRead>();
+ if (!dict->open(dictName, tuneFileSearch._read)) {
+ LOG(warning, "Could not open disk dictionary '%s'",
+ dictName.c_str());
+ _dicts.clear();
+ return false;
+ }
+ _dicts.push_back(std::move(dict));
+ }
+ return true;
+}
+
+
+bool
+DiskIndex::openField(const vespalib::string &fieldDir,
+ const TuneFileSearch &tuneFileSearch)
+{
+ vespalib::string postingName = fieldDir + "posocc.dat.compressed";
+
+ DiskPostingFile::SP pFile;
+ BitVectorDictionary::SP bDict;
+ FileHeader fileHeader;
+ bool dynamicK = false;
+ if (fileHeader.taste(postingName, tuneFileSearch._read)) {
+ if (fileHeader.getVersion() == 1 &&
+ fileHeader.getBigEndian() &&
+ fileHeader.getFormats().size() == 2 &&
+ fileHeader.getFormats()[0] ==
+ DiskPostingFileDynamicKReal::getIdentifier() &&
+ fileHeader.getFormats()[1] ==
+ DiskPostingFileDynamicKReal::getSubIdentifier()) {
+ dynamicK = true;
+ } else if (fileHeader.getVersion() == 1 &&
+ fileHeader.getBigEndian() &&
+ fileHeader.getFormats().size() == 2 &&
+ fileHeader.getFormats()[0] ==
+ DiskPostingFileReal::getIdentifier() &&
+ fileHeader.getFormats()[1] ==
+ DiskPostingFileReal::getSubIdentifier()) {
+ dynamicK = false;
+ } else {
+ LOG(warning,
+ "Could not detect format for posocc file read %s",
+ postingName.c_str());
+ }
+ }
+ pFile.reset(dynamicK ?
+ new DiskPostingFileDynamicKReal() :
+ new DiskPostingFileReal());
+ if (!pFile->open(postingName, tuneFileSearch._read)) {
+ LOG(warning,
+ "Could not open posting list file '%s'",
+ postingName.c_str());
+ return false;
+ }
+
+ bDict.reset(new BitVectorDictionary());
+ if (!bDict->open(fieldDir, tuneFileSearch._read,
+ BitVectorKeyScope::PERFIELD_WORDS)) {
+ LOG(warning,
+ "Could not open bit vector dictionary in '%s'",
+ fieldDir.c_str());
+ return false;
+ }
+ _postingFiles.push_back(pFile);
+ _bitVectorDicts.push_back(bDict);
+ return true;
+}
+
+
+bool
+DiskIndex::setup(const TuneFileSearch &tuneFileSearch)
+{
+ if (!loadSchema() || !openDictionaries(tuneFileSearch))
+ return false;
+ for (SchemaUtil::IndexIterator itr(_schema); itr.isValid(); ++itr) {
+ vespalib::string fieldDir =
+ _indexDir + "/" + itr.getName() + "/";
+ if (!openField(fieldDir, tuneFileSearch))
+ return false;
+ }
+ _tuneFileSearch = tuneFileSearch;
+ return true;
+}
+
+
+bool
+DiskIndex::setup(const TuneFileSearch &tuneFileSearch,
+ const DiskIndex &old)
+{
+ if (tuneFileSearch != old._tuneFileSearch)
+ return setup(tuneFileSearch);
+ if (!loadSchema() || !openDictionaries(tuneFileSearch))
+ return false;
+ const Schema &oldSchema = old._schema;
+ for (SchemaUtil::IndexIterator itr(_schema); itr.isValid(); ++itr) {
+ vespalib::string fieldDir =
+ _indexDir + "/" + itr.getName() + "/";
+ SchemaUtil::IndexSettings settings = itr.getIndexSettings();
+ if (settings.hasError())
+ return false;
+ bool hasPhraseOcc = settings.hasPhrases();
+ SchemaUtil::IndexIterator oItr(oldSchema, itr);
+ if (!itr.hasMatchingOldFields(oldSchema, hasPhraseOcc) ||
+ !oItr.isValid()) {
+ if (!openField(fieldDir, tuneFileSearch))
+ return false;
+ } else {
+ uint32_t oldPacked = oItr.getIndex();
+ _postingFiles.push_back(old._postingFiles[oldPacked]);
+ _bitVectorDicts.push_back(old._bitVectorDicts[oldPacked]);
+ }
+ }
+ _tuneFileSearch = tuneFileSearch;
+ return true;
+}
+
+DiskIndex::LookupResult::UP
+DiskIndex::lookup(uint32_t index, const vespalib::stringref & word)
+{
+ /** Only used for testing */
+ IndexList indexes;
+ indexes.push_back(index);
+ Key key(indexes, word);
+ LookupResultVector resultV(indexes.size());
+ LookupResult::UP result;
+ if ( read(key, resultV)) {
+ result.reset(new LookupResult());
+ result->swap(resultV[0]);
+ }
+ return result;
+}
+
+namespace {
+
+bool
+containsAll(const DiskIndex::IndexList & indexes, const DiskIndex::LookupResultVector & result)
+{
+ for (uint32_t index : indexes) {
+ bool found(false);
+ for (size_t i(0); !found && (i < result.size()); i++) {
+ found = index == result[i].indexId;
+ }
+ if ( ! found ) {
+ return false;
+ }
+ }
+ return true;
+}
+
+DiskIndex::IndexList
+unite(const DiskIndex::IndexList & indexes, const DiskIndex::LookupResultVector & result)
+{
+ vespalib::hash_set<uint32_t> all;
+ for (uint32_t index : indexes) {
+ all.insert(index);
+ }
+ for (const DiskIndex::LookupResult & lr : result) {
+ all.insert(lr.indexId);
+ }
+ DiskIndex::IndexList v;
+ v.reserve(all.size());
+ for (uint32_t indexId : all) {
+ v.push_back(indexId);
+ }
+ return v;
+}
+
+}
+
+DiskIndex::LookupResultVector
+DiskIndex::lookup(const std::vector<uint32_t> & indexes, const vespalib::stringref & word)
+{
+ Key key(indexes, word);
+ LookupResultVector result;
+ if (_cacheSize > 0) {
+ result = _cache.read(key);
+ if (!containsAll(indexes, result)) {
+ key = Key(unite(indexes, result), word);
+ _cache.invalidate(key);
+ result = _cache.read(key);
+ }
+ } else {
+ read(key, result);
+ }
+ return result;
+}
+
+bool
+DiskIndex::read(const Key & key, LookupResultVector & result)
+{
+ uint64_t wordNum(0);
+ const IndexList & indexes(key.getIndexes());
+ result.resize(indexes.size());
+ for (size_t i(0); i < result.size(); i++) {
+ LookupResult & lr(result[i]);
+ lr.indexId = indexes[i];
+ PostingListOffsetAndCounts offsetAndCounts;
+ wordNum = 0;
+ SchemaUtil::IndexIterator it(_schema, lr.indexId);
+ uint32_t fieldId = it.getIndex();
+ if (fieldId < _dicts.size()) {
+ (void) _dicts[fieldId]->lookup(key.getWord(), wordNum,
+ offsetAndCounts);
+ }
+ lr.wordNum = wordNum;
+ lr.counts.swap(offsetAndCounts._counts);
+ lr.bitOffset = offsetAndCounts._offset;
+ }
+ return true;
+}
+
+index::PostingListHandle::UP
+DiskIndex::readPostingList(const LookupResult &lookupRes) const
+{
+ PostingListHandle::UP handle(new PostingListHandle());
+ handle->_bitOffset = lookupRes.bitOffset;
+ handle->_bitLength = lookupRes.counts._bitLength;
+ SchemaUtil::IndexIterator it(_schema, lookupRes.indexId);
+ handle->_file = _postingFiles[it.getIndex()].get();
+ if (handle->_file == NULL) {
+ return PostingListHandle::UP();
+ }
+ const uint32_t firstSegment = 0;
+ const uint32_t numSegments = 0; // means all segments
+ handle->_file->readPostingList(lookupRes.counts,
+ firstSegment,
+ numSegments,
+ *handle);
+ return handle;
+}
+
+
+BitVector::UP
+DiskIndex::readBitVector(const LookupResult &lookupRes) const
+{
+ SchemaUtil::IndexIterator it(_schema, lookupRes.indexId);
+ BitVectorDictionary * dict = _bitVectorDicts[it.getIndex()].get();
+ if (dict == NULL) {
+ return BitVector::UP();
+ }
+ return dict->lookup(lookupRes.wordNum);
+}
+
+
+uint64_t
+DiskIndex::getSize() const
+{
+ search::DirectoryTraverse dirt(_indexDir.c_str());
+ return dirt.GetTreeSize();
+}
+
+
+namespace
+{
+
+DiskIndex::LookupResult _G_nothing;
+
+class LookupCache
+{
+public:
+ LookupCache(DiskIndex & diskIndex, const std::vector<uint32_t> & fieldIds) :
+ _diskIndex(diskIndex),
+ _fieldIds(fieldIds),
+ _cache()
+ {
+ }
+ const DiskIndex::LookupResult &
+ lookup(const vespalib::string & word, uint32_t fieldId) {
+ Cache::const_iterator it = _cache.find(word);
+ if (it == _cache.end()) {
+ _cache[word] = _diskIndex.lookup(_fieldIds, word);
+ it = _cache.find(word);
+ }
+ for (size_t i(0); i < it->second.size(); i++) {
+ if (it->second[i].indexId == fieldId) {
+ return it->second[i];
+ }
+ }
+ return _G_nothing;
+ }
+private:
+ typedef vespalib::hash_map<vespalib::string, DiskIndex::LookupResultVector> Cache;
+ DiskIndex & _diskIndex;
+ const std::vector<uint32_t> & _fieldIds;
+ Cache _cache;
+};
+
+class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper
+{
+private:
+ LookupCache &_cache;
+ DiskIndex &_diskIndex;
+ const FieldSpec &_field;
+ const uint32_t _fieldId;
+
+public:
+ CreateBlueprintVisitor(LookupCache & cache, DiskIndex &diskIndex,
+ const IRequestContext & requestContext,
+ const FieldSpec &field,
+ uint32_t fieldId)
+ : CreateBlueprintVisitorHelper(diskIndex, field, requestContext),
+ _cache(cache),
+ _diskIndex(diskIndex),
+ _field(field),
+ _fieldId(fieldId)
+ {
+ }
+
+ template <class TermNode>
+ void
+ visitTerm(TermNode &n)
+ {
+ const vespalib::string termStr = termAsString(n);
+ const DiskIndex::LookupResult & lookupRes = _cache.lookup(termStr, _fieldId);
+ if (lookupRes.valid()) {
+ bool useBitVector = _field.isFilter();
+ DiskIndex::LookupResult::UP copy(new DiskIndex::LookupResult(lookupRes));
+ setResult(make_UP(new DiskTermBlueprint(_field, _diskIndex, std::move(copy), useBitVector)));
+ } else {
+ setResult(make_UP(new EmptyBlueprint(_field)));
+ }
+ }
+
+ virtual void visit(NumberTerm &n) {
+ handleNumberTermAsText(n);
+ }
+
+ virtual void visit(LocationTerm &n) { visitTerm(n); }
+ virtual void visit(PrefixTerm &n) { visitTerm(n); }
+ virtual void visit(RangeTerm &n) { visitTerm(n); }
+ virtual void visit(StringTerm &n) { visitTerm(n); }
+ virtual void visit(SubstringTerm &n) { visitTerm(n); }
+ virtual void visit(SuffixTerm &n) { visitTerm(n); }
+ virtual void visit(RegExpTerm &n) { visitTerm(n); }
+ virtual void visit(PredicateQuery &) { }
+};
+
+
+Blueprint::UP
+createBlueprintHelper(LookupCache & cache, DiskIndex & diskIndex, const IRequestContext & requestContext,
+ const FieldSpec &field, uint32_t fieldId, const Node &term)
+{
+ if (fieldId != Schema::UNKNOWN_FIELD_ID) {
+ CreateBlueprintVisitor visitor(cache, diskIndex, requestContext, field, fieldId);
+ const_cast<Node &>(term).accept(visitor);
+ return visitor.getResult();
+ }
+ return Blueprint::UP(new EmptyBlueprint(field));
+}
+
+} // namespace <unnamed>
+
+Blueprint::UP
+DiskIndex::createBlueprint(const IRequestContext & requestContext, const FieldSpec &field, const Node &term)
+{
+ std::vector<uint32_t> fieldIds;
+ fieldIds.push_back(_schema.getIndexFieldId(field.getName()));
+ LookupCache cache(*this, fieldIds);
+ return createBlueprintHelper(cache, *this, requestContext, field, fieldIds[0], term);
+}
+
+
+Blueprint::UP
+DiskIndex::createBlueprint(const IRequestContext & requestContext, const FieldSpecList &fields, const Node &term)
+{
+ if (fields.empty()) {
+ return Blueprint::UP(new EmptyBlueprint());
+ }
+
+ std::vector<uint32_t> fieldIds;
+ fieldIds.reserve(fields.size());
+ for (size_t i(0); i< fields.size(); i++) {
+ const FieldSpec & field = fields[i];
+ uint32_t fieldId = _schema.getIndexFieldId(field.getName());
+ if (fieldId != Schema::UNKNOWN_FIELD_ID) {
+ fieldIds.push_back(_schema.getIndexFieldId(field.getName()));
+ }
+ }
+ Blueprint::UP result(new OrBlueprint());
+ OrBlueprint & orbp(static_cast<OrBlueprint &>(*result));
+ LookupCache cache(*this, fieldIds);
+ for (size_t i(0); i< fields.size(); i++) {
+ const FieldSpec & field = fields[i];
+ orbp.addChild(createBlueprintHelper(cache, *this, requestContext, field, _schema.getIndexFieldId(field.getName()), term));
+ }
+ if (orbp.childCnt() == 1) {
+ return orbp.removeChild(0);
+ } else {
+ return result;
+ }
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.h b/searchlib/src/vespa/searchlib/diskindex/diskindex.h
new file mode 100644
index 00000000000..840f4c32738
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.h
@@ -0,0 +1,193 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/cache.h>
+#include <vespa/searchlib/diskindex/bitvectordictionary.h>
+#include <vespa/searchlib/index/dictionaryfile.h>
+#include <vespa/searchlib/diskindex/zcposoccrandread.h>
+#include <vespa/searchlib/queryeval/searchable.h>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+
+namespace diskindex {
+
+/**
+ * This class represents a disk index with a common dictionary, and
+ * posting list files and bit vector files for each field.
+ * Parts of the disk dictionary and all bit vector
+ * dictionaries are loaded into memory during setup. All other files
+ * are just opened, ready for later access.
+ **/
+class DiskIndex : public queryeval::Searchable
+{
+public:
+ /**
+ * The result after performing a disk dictionary lookup.
+ **/
+ struct LookupResult {
+ uint32_t indexId;
+ uint64_t wordNum;
+ index::PostingListCounts counts;
+ uint64_t bitOffset;
+ typedef std::unique_ptr<LookupResult> UP;
+ LookupResult();
+ bool valid() const { return counts._numDocs > 0; }
+ void swap(LookupResult & rhs) {
+ std::swap(indexId , rhs.indexId);
+ std::swap(wordNum , rhs.wordNum);
+ counts.swap(rhs.counts);
+ std::swap(bitOffset , rhs.bitOffset);
+ }
+ };
+ typedef std::vector<LookupResult> LookupResultVector;
+ typedef std::vector<uint32_t> IndexList;
+
+ class Key {
+ public:
+ Key() : _indexes() { }
+ Key(const IndexList & indexes, vespalib::stringref word) :
+ _word(word),
+ _indexes(indexes)
+ { }
+ uint32_t hash() const {
+ return vespalib::hashValue(_word.c_str(), _word.size());
+ }
+ bool operator == (const Key & rhs) const {
+ return _word == rhs._word;
+ }
+ void push_back(uint32_t indexId) { _indexes.push_back(indexId); }
+ const IndexList & getIndexes() const { return _indexes; }
+ const vespalib::string & getWord() const { return _word; }
+ private:
+ vespalib::string _word;
+ IndexList _indexes;
+ };
+private:
+ typedef index::PostingListFileRandRead DiskPostingFile;
+ typedef Zc4PosOccRandRead DiskPostingFileReal;
+ typedef ZcPosOccRandRead DiskPostingFileDynamicKReal;
+ typedef vespalib::cache<vespalib::CacheParam<vespalib::LruParam<Key, LookupResultVector>, DiskIndex>> Cache;
+
+ vespalib::string _indexDir;
+ size_t _cacheSize;
+ index::Schema _schema;
+ std::vector<DiskPostingFile::SP> _postingFiles;
+ std::vector<BitVectorDictionary::SP> _bitVectorDicts;
+ std::vector<std::unique_ptr<index::DictionaryFileRandRead>> _dicts;
+ TuneFileSearch _tuneFileSearch;
+ Cache _cache;
+
+ bool
+ loadSchema(void);
+
+ bool
+ openDictionaries(const TuneFileSearch &tuneFileSearch);
+
+ bool
+ openField(const vespalib::string &fieldDir,
+ const TuneFileSearch &tuneFileSearch);
+
+public:
+ /**
+ * Create a view of the disk index located in the given directory
+ * described by the given schema.
+ *
+ * @param indexDir the directory where the disk index is located.
+ **/
+ DiskIndex(const vespalib::string &indexDir, size_t cacheSize=0);
+
+ /**
+ * Setup this instance by opening and loading relevant index files.
+ *
+ * @return true if this instance was successfully setup.
+ **/
+ bool
+ setup(const TuneFileSearch &tuneFileSearch);
+
+ bool
+ setup(const TuneFileSearch &tuneFileSearch, const DiskIndex &old);
+
+ /**
+ * Perform a dictionary lookup for the given word in the given
+ * field.
+ *
+ * @param indexId the id of the field to
+ * perform lookup for.
+ * @param word the word to lookup.
+ * @return the lookup result or NULL if the word is not found.
+ **/
+ LookupResult::UP
+ lookup(uint32_t indexId, const vespalib::stringref & word);
+
+ LookupResultVector
+ lookup(const std::vector<uint32_t> & indexes, const vespalib::stringref & word);
+
+
+ /**
+ * Read the posting list corresponding to the given lookup result.
+ *
+ * @param lookupRes the result of the previous dictionary lookup.
+ * @return a handle for the posting list in memory.
+ **/
+ index::PostingListHandle::UP
+ readPostingList(const LookupResult &lookupRes) const;
+
+ /**
+ * Read the bit vector corresponding to the given lookup result.
+ *
+ * @param lookupRes the result of the previous dictionary lookup.
+ * @return the bit vector or NULL if no bit vector exists for the
+ * word in the lookup result.
+ **/
+ BitVector::UP
+ readBitVector(const LookupResult &lookupRes) const;
+
+ // Inherit doc from Searchable
+ virtual queryeval::Blueprint::UP
+ createBlueprint(const queryeval::IRequestContext & requestContext,
+ const queryeval::FieldSpec &field,
+ const query::Node &term);
+
+ virtual queryeval::Blueprint::UP
+ createBlueprint(const queryeval::IRequestContext & requestContext,
+ const queryeval::FieldSpecList &fields,
+ const query::Node &term);
+
+ /**
+ * Get the size on disk of this index.
+ * @return the size of the index.
+ */
+ uint64_t
+ getSize() const;
+
+ const index::Schema &
+ getSchema(void) const
+ {
+ return _schema;
+ }
+
+ const vespalib::string &
+ getIndexDir(void) const
+ {
+ return _indexDir;
+ }
+
+ const TuneFileSearch &
+ getTuneFileSearch(void) const
+ {
+ return _tuneFileSearch;
+ }
+
+ /**
+ * Needed for the Cache::BackingStore interface.
+ */
+ bool read(const Key & key, LookupResultVector & result);
+};
+
+void swap(DiskIndex::LookupResult & a, DiskIndex::LookupResult & b);
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp
new file mode 100644
index 00000000000..258eaac51cf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp
@@ -0,0 +1,124 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".diskindex.disktermblueprint");
+
+#include "disktermblueprint.h"
+#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
+#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
+#include <vespa/searchlib/queryeval/equiv_blueprint.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+using search::BitVectorIterator;
+using search::fef::TermFieldMatchDataArray;
+using search::index::Schema;
+using search::queryeval::BooleanMatchIteratorWrapper;
+using search::queryeval::FieldSpecBase;
+using search::queryeval::FieldSpecBaseList;
+using search::queryeval::SearchIterator;
+using search::queryeval::LeafBlueprint;
+using search::queryeval::EquivBlueprint;
+using search::queryeval::Blueprint;
+
+namespace search {
+namespace diskindex {
+
+namespace {
+
+vespalib::string
+getName(uint32_t indexId)
+{
+ return vespalib::make_string("fieldId(%u)", indexId);
+}
+
+}
+
+DiskTermBlueprint::DiskTermBlueprint(const FieldSpecBase & field,
+ const search::diskindex::DiskIndex & diskIndex,
+ search::diskindex::DiskIndex::LookupResult::UP lookupRes,
+ bool useBitVector) :
+ SimpleLeafBlueprint(field),
+ _field(field),
+ _diskIndex(diskIndex),
+ _lookupRes(std::move(lookupRes)),
+ _useBitVector(useBitVector),
+ _fetchPostingsDone(false),
+ _hasEquivParent(false),
+ _postingHandle(),
+ _bitVector()
+{
+ setEstimate(HitEstimate(_lookupRes->counts._numDocs,
+ _lookupRes->counts._numDocs == 0));
+}
+
+
+DiskTermBlueprint::DiskTermBlueprint(const DiskTermBlueprint & rhs)
+ : SimpleLeafBlueprint(rhs),
+ _field(rhs._field),
+ _diskIndex(rhs._diskIndex),
+ _lookupRes(new DiskIndex::LookupResult(*rhs._lookupRes)),
+ _useBitVector(rhs._useBitVector),
+ _fetchPostingsDone(rhs._fetchPostingsDone),
+ _hasEquivParent(rhs._hasEquivParent),
+ _postingHandle(),
+ _bitVector()
+{
+ if (_fetchPostingsDone) {
+ if (rhs._bitVector.get() != NULL)
+ _bitVector = BitVector::create(*rhs._bitVector);
+ if (_bitVector.get() == NULL) {
+ _postingHandle = _diskIndex.readPostingList(*_lookupRes);
+ }
+ }
+}
+
+namespace {
+
+bool
+areAnyParentsEquiv(const Blueprint * node)
+{
+ return (node == NULL)
+ ? false
+ : (dynamic_cast<const EquivBlueprint *>(node) != NULL)
+ ? true
+ : areAnyParentsEquiv(node->getParent());
+}
+
+}
+
+void
+DiskTermBlueprint::fetchPostings(bool strict)
+{
+ (void) strict;
+ _hasEquivParent = areAnyParentsEquiv(getParent());
+ _bitVector = _diskIndex.readBitVector(*_lookupRes);
+ if (!_useBitVector || (_bitVector.get() == NULL)) {
+ _postingHandle = _diskIndex.readPostingList(*_lookupRes);
+ }
+ _fetchPostingsDone = true;
+}
+
+SearchIterator::UP
+DiskTermBlueprint::createLeafSearch(const TermFieldMatchDataArray & tfmda, bool strict) const
+{
+ if ((_bitVector.get() != NULL) && (_useBitVector || (tfmda[0]->isNotNeeded() && !_hasEquivParent))) {
+ LOG(debug, "Return BitVectorIterator: %s, wordNum(%" PRIu64 "), docCount(%" PRIu64 ")",
+ getName(_lookupRes->indexId).c_str(), _lookupRes->wordNum, _lookupRes->counts._numDocs);
+ return BitVectorIterator::create(_bitVector.get(), tfmda, strict);
+ }
+ SearchIterator::UP search(_postingHandle->createIterator(_lookupRes->counts, tfmda, _useBitVector));
+ if (_useBitVector) {
+ LOG(debug, "Return BooleanMatchIteratorWrapper: %s, wordNum(%" PRIu64 "), docCount(%" PRIu64 ")",
+ getName(_lookupRes->indexId).c_str(), _lookupRes->wordNum, _lookupRes->counts._numDocs);
+ return SearchIterator::UP(new BooleanMatchIteratorWrapper(std::move(search), tfmda));
+ }
+ LOG(debug, "Return posting list iterator: %s, wordNum(%" PRIu64 "), docCount(%" PRIu64 ")",
+ getName(_lookupRes->indexId).c_str(), _lookupRes->wordNum, _lookupRes->counts._numDocs);
+ return search;
+}
+
+} // namespace diskindex
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h
new file mode 100644
index 00000000000..f1790cd0cbd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h
@@ -0,0 +1,53 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "diskindex.h"
+#include <vespa/searchlib/queryeval/blueprint.h>
+
+namespace search {
+namespace diskindex {
+
+/**
+ * Blueprint implementation for term searching in a disk index.
+ **/
+class DiskTermBlueprint : public search::queryeval::SimpleLeafBlueprint
+{
+private:
+ search::queryeval::FieldSpecBase _field;
+ const search::diskindex::DiskIndex & _diskIndex;
+ DiskIndex::LookupResult::UP _lookupRes;
+ bool _useBitVector;
+ bool _fetchPostingsDone;
+ bool _hasEquivParent;
+ search::index::PostingListHandle::UP _postingHandle;
+ search::BitVector::UP _bitVector;
+
+public:
+ /**
+ * Create a new blueprint.
+ *
+ * @param field the field to search in.
+ * @param diskIndex the disk index used to read the bit vector or posting list.
+ * @param lookupRes the result after disk dictionary lookup.
+ * @param useBitVector whether or not we should use bit vector.
+ **/
+ DiskTermBlueprint(const search::queryeval::FieldSpecBase & field,
+ const search::diskindex::DiskIndex & diskIndex,
+ search::diskindex::DiskIndex::LookupResult::UP lookupRes,
+ bool useBitVector);
+
+ DiskTermBlueprint(const DiskTermBlueprint &);
+
+ // Inherit doc from Blueprint.
+ // For now, this DiskTermBlueprint instance must have longer lifetime than the created iterator.
+ virtual search::queryeval::SearchIterator::UP
+ createLeafSearch(const search::fef::TermFieldMatchDataArray & tfmda, bool strict) const;
+
+ virtual void
+ fetchPostings(bool strict);
+};
+
+} // namespace diskindex
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/docidmapper.cpp b/searchlib/src/vespa/searchlib/diskindex/docidmapper.cpp
new file mode 100644
index 00000000000..073c4b79031
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/docidmapper.cpp
@@ -0,0 +1,73 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".diskindex.docidmapper");
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/searchlib/common/documentsummary.h>
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/fastlib/io/bufferedfile.h>
+#include "docidmapper.h"
+
+#define NO_DOC static_cast<uint32_t>(-1)
+
+namespace search
+{
+
+namespace diskindex
+{
+
+
+DocIdMapping::DocIdMapping(void)
+ : _docIdLimit(0u),
+ _selector(NULL),
+ _selectorId(0)
+{
+}
+
+
+void
+DocIdMapping::clear(void)
+{
+ _docIdLimit = 0;
+ _selector = NULL;
+ _selectorId = 0;
+}
+
+
+void
+DocIdMapping::setup(uint32_t docIdLimit)
+{
+ _docIdLimit = docIdLimit;
+ _selector = NULL;
+ _selectorId = 0;
+}
+
+
+void
+DocIdMapping::setup(uint32_t docIdLimit,
+ const SelectorArray *selector,
+ uint8_t selectorId)
+{
+ _docIdLimit = docIdLimit;
+ _selector = selector;
+ _selectorId = selectorId;
+}
+
+
+bool
+DocIdMapping::readDocIdLimit(const vespalib::string &mergedDir)
+{
+ uint32_t docIdLimit = 0;
+ if (!search::docsummary::DocumentSummary::
+ readDocIdLimit(mergedDir, docIdLimit))
+ return false;
+ _docIdLimit = docIdLimit;
+ return true;
+}
+
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/docidmapper.h b/searchlib/src/vespa/searchlib/diskindex/docidmapper.h
new file mode 100644
index 00000000000..43e1ea44b89
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/docidmapper.h
@@ -0,0 +1,91 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/util/array.h>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search
+{
+
+class BitVector;
+
+namespace diskindex
+{
+
+typedef vespalib::Array<uint8_t, vespalib::DefaultAlloc> SelectorArray;
+
+class DocIdMapping
+{
+public:
+ uint32_t _docIdLimit;
+ const SelectorArray *_selector; // External ownership
+ uint8_t _selectorId;
+
+ DocIdMapping(void);
+
+ void
+ clear(void);
+
+ void
+ setup(uint32_t docIdLimit);
+
+ void
+ setup(uint32_t docIdLimit,
+ const SelectorArray *selector,
+ uint8_t selectorId);
+
+ bool
+ readDocIdLimit(const vespalib::string &dir);
+};
+
+
+class DocIdMapper
+{
+public:
+ const uint8_t *_selector;
+ uint32_t _docIdLimit; // Limit on legal input values
+ uint32_t _selectorLimit; // Limit on output
+ uint8_t _selectorId;
+
+ DocIdMapper(void)
+ : _selector(NULL),
+ _docIdLimit(0u),
+ _selectorLimit(0),
+ _selectorId(0u)
+ {
+ }
+
+ void
+ setup(const DocIdMapping &mapping)
+ {
+ _selector = (mapping._selector != NULL) ?
+ &((*mapping._selector)[0]) : NULL;
+ _docIdLimit = mapping._docIdLimit;
+ _selectorLimit = (mapping._selector != NULL) ?
+ (*mapping._selector).size() :
+ 0u;
+ _selectorId = mapping._selectorId;
+ }
+
+ static uint32_t
+ noDocId(void)
+ {
+ return static_cast<uint32_t>(-1);
+ }
+
+ uint32_t
+ mapDocId(uint32_t docId) const
+ {
+ assert(docId < _docIdLimit);
+ if (_selector != NULL &&
+ (docId >= _selectorLimit || _selector[docId] != _selectorId)) {
+ docId = noDocId();
+ }
+ return docId;
+ }
+};
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp b/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp
new file mode 100644
index 00000000000..9eaac550192
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp
@@ -0,0 +1,157 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+
+#include "extposocc.h"
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/searchlib/index/postinglistcounts.h>
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/searchlib/index/postinglistcounts.h>
+#include <vespa/searchlib/index/postinglistcountfile.h>
+#include "zcposocc.h"
+#include "fileheader.h"
+
+LOG_SETUP(".diskindex.extposocc");
+
+using search::index::PostingListFileSeqRead;
+using search::index::PostingListFileSeqWrite;
+using search::index::PostingListCountFileSeqRead;
+using search::index::PostingListCountFileSeqWrite;
+using search::index::DocIdAndFeatures;
+using search::index::WordDocElementFeatures;
+using search::index::WordDocElementWordPosFeatures;
+using search::index::PostingListCounts;
+using search::index::PostingListParams;
+using search::index::Schema;
+
+namespace
+{
+
+vespalib::string PosOccIdCooked = "PosOcc.1.Cooked";
+
+}
+
+namespace search
+{
+
+namespace diskindex
+{
+
+void
+setupDefaultPosOccParameters(PostingListParams *countParams,
+ PostingListParams *params,
+ uint64_t numWordIds,
+ uint32_t docIdLimit)
+{
+ params->set("minSkipDocs", 64u);
+ params->set("minChunkDocs", 262144u);
+
+ countParams->set("numWordIds", numWordIds);
+ /*
+ * ZcPosOcc interleaved min: 2 + 1 + 2 + 1 = 6, assuming k == 1
+ * for both docid delta and wordpos delta, i.e. average docsize is
+ * less than 8.
+ */
+ countParams->set("avgBitsPerDoc", static_cast<uint32_t>(27));
+ countParams->set("minChunkDocs", static_cast<uint32_t>(262144));
+ countParams->set("docIdLimit", docIdLimit);
+}
+
+
+PostingListFileSeqWrite *
+makePosOccWrite(const vespalib::string &name,
+ PostingListCountFileSeqWrite *const posOccCountWrite,
+ bool dynamicK,
+ const PostingListParams &params,
+ const PostingListParams &featureParams,
+ const Schema &schema,
+ uint32_t indexId,
+ const TuneFileSeqWrite &tuneFileWrite)
+{
+ PostingListFileSeqWrite *posOccWrite = NULL;
+
+ FileHeader fileHeader;
+ if (fileHeader.taste(name, tuneFileWrite)) {
+ if (fileHeader.getVersion() == 1 &&
+ fileHeader.getBigEndian() &&
+ fileHeader.getFormats().size() == 2 &&
+ fileHeader.getFormats()[0] ==
+ ZcPosOccSeqRead::getIdentifier() &&
+ fileHeader.getFormats()[1] ==
+ ZcPosOccSeqRead::getSubIdentifier()) {
+ dynamicK = true;
+ } else if (fileHeader.getVersion() == 1 &&
+ fileHeader.getBigEndian() &&
+ fileHeader.getFormats().size() == 2 &&
+ fileHeader.getFormats()[0] ==
+ Zc4PosOccSeqRead::getIdentifier() &&
+ fileHeader.getFormats()[1] ==
+ Zc4PosOccSeqRead::getSubIdentifier()) {
+ dynamicK = false;
+ } else {
+ LOG(warning,
+ "Could not detect format for posocc file write %s",
+ name.c_str());
+ }
+ }
+ if (dynamicK)
+ posOccWrite = new ZcPosOccSeqWrite(schema, indexId, posOccCountWrite);
+ else
+ posOccWrite =
+ new Zc4PosOccSeqWrite(schema, indexId, posOccCountWrite);
+
+ posOccWrite->setFeatureParams(featureParams);
+ posOccWrite->setParams(params);
+ return posOccWrite;
+}
+
+
+PostingListFileSeqRead *
+makePosOccRead(const vespalib::string &name,
+ PostingListCountFileSeqRead *const posOccCountRead,
+ bool dynamicK,
+ const PostingListParams &featureParams,
+ const TuneFileSeqRead &tuneFileRead)
+{
+ PostingListFileSeqRead *posOccRead = NULL;
+
+ FileHeader fileHeader;
+ if (fileHeader.taste(name, tuneFileRead)) {
+ if (fileHeader.getVersion() == 1 &&
+ fileHeader.getBigEndian() &&
+ fileHeader.getFormats().size() == 2 &&
+ fileHeader.getFormats()[0] ==
+ ZcPosOccSeqRead::getIdentifier() &&
+ fileHeader.getFormats()[1] ==
+ ZcPosOccSeqRead::getSubIdentifier()) {
+ dynamicK = true;
+ } else if (fileHeader.getVersion() == 1 &&
+ fileHeader.getBigEndian() &&
+ fileHeader.getFormats().size() == 2 &&
+ fileHeader.getFormats()[0] ==
+ Zc4PosOccSeqRead::getIdentifier() &&
+ fileHeader.getFormats()[1] ==
+ Zc4PosOccSeqRead::getSubIdentifier()) {
+ dynamicK = false;
+ } else {
+ LOG(warning,
+ "Could not detect format for posocc file read %s",
+ name.c_str());
+ }
+ }
+ if (dynamicK)
+ posOccRead = new ZcPosOccSeqRead(posOccCountRead);
+ else
+ posOccRead = new Zc4PosOccSeqRead(posOccCountRead);
+
+ posOccRead->setFeatureParams(featureParams);
+ return posOccRead;
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/extposocc.h b/searchlib/src/vespa/searchlib/diskindex/extposocc.h
new file mode 100644
index 00000000000..1deb788d488
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/extposocc.h
@@ -0,0 +1,56 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search
+{
+
+class TuneFileSeqRead;
+class TuneFileSeqWrite;
+
+namespace index {
+
+class PostingListParams;
+class PostingListCountFileSeqWrite;
+class PostingListCountFileSeqRead;
+class PostingListFileSeqWrite;
+class PostingListFileSeqRead;
+class Schema;
+
+}
+
+namespace diskindex
+{
+
+
+void
+setupDefaultPosOccParameters(index::PostingListParams *countParams,
+ index::PostingListParams *params,
+ uint64_t numWordIds,
+ uint32_t docIdLimit);
+
+index::PostingListFileSeqWrite *
+makePosOccWrite(const vespalib::string &name,
+ index::PostingListCountFileSeqWrite *const posOccCountWrite,
+ bool dynamicK,
+ const index::PostingListParams &params,
+ const index::PostingListParams &featureParams,
+ const index::Schema &schema,
+ uint32_t indexId,
+ const TuneFileSeqWrite &tuneFileWrite);
+
+index::PostingListFileSeqRead *
+makePosOccRead(const vespalib::string &name,
+ index::PostingListCountFileSeqRead *const posOccCountRead,
+ bool dynamicK,
+ const index::PostingListParams &featureParams,
+ const TuneFileSeqRead &tuneFileRead);
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp
new file mode 100644
index 00000000000..279a73935f5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp
@@ -0,0 +1,385 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include "fieldreader.h"
+#include "zcposocc.h"
+#include "extposocc.h"
+#include <vespa/vespalib/util/error.h>
+#include "pagedict4file.h"
+LOG_SETUP(".diskindex.fieldreader");
+
+#define NO_DOC static_cast<uint32_t>(-1)
+
+namespace
+{
+
+vespalib::string PosOccIdCooked = "PosOcc.3.Cooked";
+
+}
+
+using vespalib::getLastErrorString;
+using search::index::Schema;
+using search::index::SchemaUtil;
+using search::bitcompression::PosOccFieldParams;
+using search::bitcompression::PosOccFieldsParams;
+
+namespace search
+{
+
+namespace diskindex
+{
+
+
+FieldReader::FieldReader(void)
+ : _wordNum(noWordNumHigh()),
+ _docIdAndFeatures(),
+ _dictFile(),
+ _oldposoccfile(),
+ _wordNumMapper(),
+ _docIdMapper(),
+ _oldWordNum(noWordNumHigh()),
+ _residue(0u),
+ _docIdLimit(0u),
+ _checkPointResume(false),
+ _word()
+{
+}
+
+
+FieldReader::~FieldReader(void)
+{
+}
+
+
+void
+FieldReader::readCounts(void)
+{
+ PostingListCounts counts;
+ _dictFile->readWord(_word, _oldWordNum, counts);
+ _oldposoccfile->readCounts(counts);
+ if (_oldWordNum != noWordNumHigh()) {
+ _wordNum = _wordNumMapper.map(_oldWordNum);
+ assert(_wordNum != noWordNum());
+ assert(_wordNum != noWordNumHigh());
+ _residue = counts._numDocs;
+ } else
+ _wordNum = _oldWordNum;
+}
+
+
+void
+FieldReader::readDocIdAndFeatures(void)
+{
+ _oldposoccfile->readDocIdAndFeatures(_docIdAndFeatures);
+ _docIdAndFeatures._docId = _docIdMapper.mapDocId(_docIdAndFeatures._docId);
+}
+
+
+void
+FieldReader::read(void)
+{
+ for (;;) {
+ while (_residue == 0) {
+ readCounts();
+ if (_wordNum == noWordNumHigh()) {
+ assert(_residue == 0);
+ _docIdAndFeatures._docId = NO_DOC;
+ return;
+ }
+ }
+ --_residue;
+ readDocIdAndFeatures();
+ if (_docIdAndFeatures._docId != NO_DOC)
+ return;
+ }
+}
+
+
+bool
+FieldReader::allowRawFeatures(void)
+{
+ return true;
+}
+
+
+void
+FieldReader::setup(const WordNumMapping &wordNumMapping,
+ const DocIdMapping &docIdMapping)
+{
+ _wordNumMapper.setup(wordNumMapping);
+ _docIdMapper.setup(docIdMapping);
+}
+
+
+bool
+FieldReader::earlyOpen(const vespalib::string &prefix,
+ const TuneFileSeqRead &tuneFileRead)
+{
+ vespalib::string name = prefix + "posocc.dat.compressed";
+ FastOS_StatInfo statInfo;
+ bool statres;
+
+ bool dynamicKPosOccFormat = false; // Will autodetect anyway
+ statres = FastOS_File::Stat(name.c_str(), &statInfo);
+ if (!statres) {
+ LOG(error,
+ "Could not stat compressed posocc file %s: %s",
+ name.c_str(), getLastErrorString().c_str());
+ return false;
+ }
+
+ _dictFile.reset(new search::diskindex::PageDict4FileSeqRead);
+ PostingListParams featureParams;
+ _oldposoccfile.reset(search::diskindex::makePosOccRead(name,
+ _dictFile.get(),
+ dynamicKPosOccFormat,
+ featureParams,
+ tuneFileRead));
+ return true;
+}
+
+
+bool
+FieldReader::lateOpen(const vespalib::string &prefix,
+ const TuneFileSeqRead &tuneFileRead)
+{
+ vespalib::string cname = prefix + "dictionary";
+ vespalib::string name = prefix + "posocc.dat.compressed";
+
+ if (!_dictFile->open(cname, tuneFileRead)) {
+ LOG(error,
+ "Could not open posocc count file %s for read",
+ cname.c_str());
+ }
+
+ // open posocc.dat
+ if (!_oldposoccfile->open(name, tuneFileRead)) {
+ LOG(error,
+ "Could not open posocc file %s for read",
+ name.c_str());
+ }
+ if (!_checkPointResume) {
+ _oldWordNum = noWordNum();
+ _wordNum = _oldWordNum;
+ PostingListParams params;
+ _oldposoccfile->getParams(params);
+ params.get("docIdLimit", _docIdLimit);
+ }
+ return true;
+}
+
+
+bool
+FieldReader::open(const vespalib::string &prefix,
+ const TuneFileSeqRead &tuneFileRead)
+{
+ if (!earlyOpen(prefix, tuneFileRead))
+ return false;
+ return lateOpen(prefix, tuneFileRead);
+}
+
+
+bool
+FieldReader::close(void)
+{
+ bool ret = true;
+
+ if (_oldposoccfile) {
+ bool closeRes = _oldposoccfile->close();
+ if (!closeRes) {
+ LOG(error,
+ "Could not close posocc file for read");
+ ret = false;
+ }
+ _oldposoccfile.reset();
+ }
+ if (_dictFile) {
+ bool closeRes = _dictFile->close();
+ if (!closeRes) {
+ LOG(error,
+ "Could not close posocc file for read");
+ ret = false;
+ }
+ _dictFile.reset();
+ }
+
+ return ret;
+}
+
+
+void
+FieldReader::checkPointWrite(vespalib::nbostream &out)
+{
+ out << _wordNum << _oldWordNum;
+ out << _residue << _docIdAndFeatures;
+ out << _docIdLimit;
+ out << _word;
+ _oldposoccfile->checkPointWrite(out);
+ _dictFile->checkPointWrite(out);
+}
+
+void
+FieldReader::checkPointRead(vespalib::nbostream &in)
+{
+ in >> _wordNum >> _oldWordNum;
+ in >> _residue >> _docIdAndFeatures;
+ in >> _docIdLimit;
+ in >> _word;
+ _oldposoccfile->checkPointRead(in);
+ _dictFile->checkPointRead(in);
+ _checkPointResume = true;
+}
+
+void
+FieldReader::setFeatureParams(const PostingListParams &params)
+{
+ _oldposoccfile->setFeatureParams(params);
+}
+
+
+void
+FieldReader::getFeatureParams(PostingListParams &params)
+{
+ _oldposoccfile->getFeatureParams(params);
+}
+
+
+std::unique_ptr<FieldReader>
+FieldReader::allocFieldReader(const SchemaUtil::IndexIterator &index,
+ const Schema &oldSchema)
+{
+ assert(index.isValid());
+ if (index.hasMatchingOldFields(oldSchema, false))
+ return std::make_unique<FieldReader>(); // The common case
+ if (!index.hasOldFields(oldSchema, false))
+ return std::make_unique<FieldReaderEmpty>(index); // drop data
+ // field exists in old schema with different collection type setting
+ return std::make_unique<FieldReaderStripInfo>(index); // degraded
+}
+
+
+FieldReaderEmpty::FieldReaderEmpty(const IndexIterator &index)
+ : _index(index)
+{
+}
+
+
+bool
+FieldReaderEmpty::earlyOpen(const vespalib::string &prefix,
+ const TuneFileSeqRead &tuneFileRead)
+{
+ (void) prefix;
+ (void) tuneFileRead;
+ return true;
+}
+
+
+bool
+FieldReaderEmpty::lateOpen(const vespalib::string &prefix,
+ const TuneFileSeqRead &tuneFileRead)
+{
+ (void) prefix;
+ (void) tuneFileRead;
+ return true;
+}
+
+
+bool
+FieldReaderEmpty::open(const vespalib::string &prefix,
+ const TuneFileSeqRead &tuneFileRead)
+{
+ (void) prefix;
+ (void) tuneFileRead;
+ return true;
+}
+
+
+void
+FieldReaderEmpty::getFeatureParams(PostingListParams &params)
+{
+ PosOccFieldsParams fieldsParams;
+ fieldsParams.setSchemaParams(_index.getSchema(), _index.getIndex());
+ params.clear();
+ fieldsParams.getParams(params);
+}
+
+
+FieldReaderStripInfo::FieldReaderStripInfo(const IndexIterator &index)
+ : _hasElements(false),
+ _hasElementWeights(false)
+{
+ PosOccFieldsParams fieldsParams;
+ fieldsParams.setSchemaParams(index.getSchema(), index.getIndex());
+ assert(fieldsParams.getNumFields() > 0);
+ const PosOccFieldParams &fieldParams = fieldsParams.getFieldParams()[0];
+ _hasElements = fieldParams._hasElements;
+ _hasElementWeights = fieldParams._hasElementWeights;
+}
+
+
+bool
+FieldReaderStripInfo::allowRawFeatures(void)
+{
+ return false;
+}
+
+
+void
+FieldReaderStripInfo::read(void)
+{
+ typedef search::index::WordDocElementFeatures Element;
+
+ for (;;) {
+ FieldReader::read();
+ DocIdAndFeatures &features = _docIdAndFeatures;
+ if (_wordNum == noWordNumHigh())
+ return;
+ assert(!features.getRaw());
+ uint32_t numElements = features._elements.size();
+ assert(numElements > 0);
+ std::vector<Element>::iterator element =
+ features._elements.begin();
+ if (_hasElements) {
+ if (!_hasElementWeights) {
+ for (uint32_t elementDone = 0; elementDone < numElements;
+ ++elementDone, ++element) {
+ element->setWeight(1);
+ }
+ assert(element == features._elements.end());
+ }
+ } else {
+ if (element->getElementId() != 0)
+ continue; // Drop this entry, try to read new entry
+ element->setWeight(1);
+ features._wordPositions.resize(element->getNumOccs());
+ if (numElements > 1) {
+ features._elements.resize(1);
+ }
+ }
+ break;
+ }
+}
+
+
+void
+FieldReaderStripInfo::getFeatureParams(PostingListParams &params)
+{
+ FieldReader::getFeatureParams(params);
+ vespalib::string paramsPrefix = PosOccFieldParams::getParamsPrefix(0);
+ vespalib::string collStr = paramsPrefix + ".collectionType";
+ if (_hasElements) {
+ if (_hasElementWeights)
+ params.setStr(collStr, "weightedSet");
+ else
+ params.setStr(collStr, "array");
+ } else
+ params.setStr(collStr, "single");
+ params.erase("encoding");
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldreader.h b/searchlib/src/vespa/searchlib/diskindex/fieldreader.h
new file mode 100644
index 00000000000..b3cf6446419
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/fieldreader.h
@@ -0,0 +1,216 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/index/postinglistcounts.h>
+#include <vespa/searchlib/index/dictionaryfile.h>
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/searchlib/index/postinglistfile.h>
+#include <vespa/searchlib/index/schemautil.h>
+#include <vespa/searchlib/util/postingpriorityqueue.h>
+#include "wordnummapper.h"
+#include "docidmapper.h"
+#include "fieldwriter.h"
+
+namespace search
+{
+
+namespace diskindex
+{
+
+class FieldReaderFieldInfo;
+
+/*
+ * FieldReader is used to read a dictionary and posting list file
+ * together, and get a sequential view of the stored data.
+ *
+ * It can use mappings for word numbers and document ids to skip
+ * documents that are logically removed and use shared word numbers
+ * with other field readers.
+ *
+ * It is used by the fusion code as one of many input objects connected
+ * to a FieldWriter class that writes the merged output for the field.
+ */
+class FieldReader
+{
+ FieldReader(const FieldReader &rhs) = delete;
+ FieldReader(const FieldReader &&rhs) = delete;
+ FieldReader &operator=(const FieldReader &rhs) = delete;
+ FieldReader &operator=(const FieldReader &&rhs) = delete;
+public:
+ using DictionaryFileSeqRead = index::DictionaryFileSeqRead;
+
+ typedef index::PostingListFileSeqRead PostingListFileSeqRead;
+ typedef index::DocIdAndFeatures DocIdAndFeatures;
+ typedef index::Schema Schema;
+ typedef index::SchemaUtil::IndexIterator IndexIterator;
+ typedef index::PostingListCounts PostingListCounts;
+ typedef index::PostingListParams PostingListParams;
+
+ uint64_t _wordNum;
+ DocIdAndFeatures _docIdAndFeatures;
+protected:
+ std::unique_ptr<DictionaryFileSeqRead> _dictFile;
+ std::unique_ptr<PostingListFileSeqRead> _oldposoccfile;
+ WordNumMapper _wordNumMapper;
+ DocIdMapper _docIdMapper;
+ uint64_t _oldWordNum;
+ uint32_t _residue;
+ uint32_t _docIdLimit;
+ bool _checkPointResume;
+ vespalib::string _word;
+
+ static uint64_t
+ noWordNumHigh(void)
+ {
+ return std::numeric_limits<uint64_t>::max();
+ }
+
+ static uint64_t
+ noWordNum(void)
+ {
+ return 0u;
+ }
+
+ void
+ readCounts(void);
+
+ void
+ readDocIdAndFeatures(void);
+
+public:
+ FieldReader(void);
+
+ virtual
+ ~FieldReader(void);
+
+ virtual void
+ read(void);
+
+ virtual bool
+ allowRawFeatures(void);
+
+ void
+ write(FieldWriter &writer)
+ {
+ if (_wordNum != writer.getSparseWordNum()) {
+ writer.newWord(_wordNum, _word);
+ }
+ writer.add(_docIdAndFeatures);
+ }
+
+ bool
+ isValid(void) const
+ {
+ return _wordNum != noWordNumHigh();
+ }
+
+ bool
+ operator<(const FieldReader &rhs) const
+ {
+ return _wordNum < rhs._wordNum ||
+ (_wordNum == rhs._wordNum &&
+ _docIdAndFeatures._docId < rhs._docIdAndFeatures._docId);
+ }
+
+ virtual void
+ setup(const WordNumMapping &wordNumMapping,
+ const DocIdMapping &docIdMapping);
+
+ virtual bool
+ earlyOpen(const vespalib::string &prefix,
+ const TuneFileSeqRead &tuneFileRead);
+
+ virtual bool
+ lateOpen(const vespalib::string &prefix,
+ const TuneFileSeqRead &tuneFileRead);
+
+ virtual bool
+ open(const vespalib::string &prefix, const TuneFileSeqRead &tuneFileRead);
+
+ virtual bool
+ close(void);
+
+ /*
+ * To be called between words, not in the middle of one.
+ */
+ virtual void
+ checkPointWrite(vespalib::nbostream &out);
+
+ /*
+ * To be called after earlyOpen() but before afterOpen().
+ */
+ virtual void
+ checkPointRead(vespalib::nbostream &in);
+
+ virtual void
+ setFeatureParams(const PostingListParams &params);
+
+ virtual void
+ getFeatureParams(PostingListParams &params);
+
+ uint32_t
+ getDocIdLimit(void) const
+ {
+ return _docIdLimit;
+ }
+
+ static std::unique_ptr<FieldReader>
+ allocFieldReader(const IndexIterator &index, const Schema &oldSchema);
+};
+
+
+/*
+ * Field reader that pretends that input is empty, e.g. due to field
+ * not existing in source or being incompatible.
+ */
+class FieldReaderEmpty : public FieldReader
+{
+private:
+ const IndexIterator _index;
+
+public:
+ FieldReaderEmpty(const IndexIterator &index);
+
+ virtual bool
+ earlyOpen(const vespalib::string &prefix,
+ const TuneFileSeqRead &tuneFileRead) override;
+
+ virtual bool
+ lateOpen(const vespalib::string &prefix,
+ const TuneFileSeqRead &tuneFileRead) override;
+
+ virtual bool
+ open(const vespalib::string &prefix, const TuneFileSeqRead &tuneFileRead)
+ override;
+
+ virtual void
+ getFeatureParams(PostingListParams &params) override;
+};
+
+/*
+ * Field reader that strips information from source, e.g. remove
+ * weights or discard nonzero elements, due to collection type change.
+ */
+class FieldReaderStripInfo : public FieldReader
+{
+private:
+ bool _hasElements;
+ bool _hasElementWeights;
+public:
+ FieldReaderStripInfo(const IndexIterator &index);
+
+ virtual bool
+ allowRawFeatures(void) override;
+
+ virtual void
+ read(void) override;
+
+ virtual void
+ getFeatureParams(PostingListParams &params) override;
+};
+
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp
new file mode 100644
index 00000000000..7449a946286
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp
@@ -0,0 +1,258 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include "fieldwriter.h"
+#include "zcposocc.h"
+#include "extposocc.h"
+#include <vespa/vespalib/util/error.h>
+#include "pagedict4file.h"
+LOG_SETUP(".diskindex.fieldwriter");
+
+namespace search
+{
+
+namespace diskindex
+{
+
+using vespalib::nbostream;
+using vespalib::getLastErrorString;
+using common::FileHeaderContext;
+
+FieldWriter::FieldWriter(uint32_t docIdLimit,
+ uint64_t numWordIds)
+ : _wordNum(noWordNum()),
+ _prevDocId(0),
+ _dictFile(),
+ _posoccfile(),
+ _bvc(docIdLimit),
+ _bmapfile(BitVectorKeyScope::PERFIELD_WORDS),
+ _docIdLimit(docIdLimit),
+ _numWordIds(numWordIds),
+ _prefix(),
+ _compactWordNum(0),
+ _word()
+{
+}
+
+
+void
+FieldWriter::earlyOpen(const vespalib::string &prefix,
+ uint32_t minSkipDocs,
+ uint32_t minChunkDocs,
+ bool dynamicKPosOccFormat,
+ const Schema &schema,
+ const uint32_t indexId,
+ const TuneFileSeqWrite &tuneFileWrite)
+{
+ _prefix = prefix;
+ vespalib::string name = prefix + "posocc.dat.compressed";
+
+ PostingListParams params;
+ PostingListParams featureParams;
+ PostingListParams countParams;
+
+ diskindex::setupDefaultPosOccParameters(&countParams,
+ &params,
+ _numWordIds,
+ _docIdLimit);
+
+ if (minSkipDocs != 0) {
+ countParams.set("minSkipDocs", minSkipDocs);
+ params.set("minSkipDocs", minSkipDocs);
+ }
+ if (minChunkDocs != 0) {
+ countParams.set("minChunkDocs", minChunkDocs);
+ params.set("minChunkDocs", minChunkDocs);
+ }
+
+ _dictFile.reset(new PageDict4FileSeqWrite);
+ _dictFile->setParams(countParams);
+
+ _posoccfile.reset(diskindex::makePosOccWrite(name,
+ _dictFile.get(),
+ dynamicKPosOccFormat,
+ params,
+ featureParams,
+ schema,
+ indexId,
+ tuneFileWrite));
+}
+
+
+bool
+FieldWriter::lateOpen(const TuneFileSeqWrite &tuneFileWrite,
+ const FileHeaderContext &fileHeaderContext)
+{
+ vespalib::string cname = _prefix + "dictionary";
+ vespalib::string name = _prefix + "posocc.dat.compressed";
+
+ // Open output dictionary file
+ if (!_dictFile->open(cname, tuneFileWrite, fileHeaderContext)) {
+ LOG(error, "Could not open posocc count file %s for write: %s",
+ cname.c_str(), getLastErrorString().c_str());
+ return false;
+ }
+
+ // Open output posocc.dat file
+ if (!_posoccfile->open(name, tuneFileWrite, fileHeaderContext)) {
+ LOG(error, "Could not open posocc file %s for write: %s",
+ name.c_str(), getLastErrorString().c_str());
+ return false;
+ }
+
+ // Open output boolocc.bdat file
+ vespalib::string booloccbidxname = _prefix + "boolocc";
+ _bmapfile.open(booloccbidxname.c_str(), _docIdLimit, tuneFileWrite,
+ fileHeaderContext);
+
+ return true;
+}
+
+
+void
+FieldWriter::flush(void)
+{
+ _posoccfile->flushWord();
+ PostingListCounts &counts = _posoccfile->getCounts();
+ if (counts._numDocs != 0) {
+ assert(_compactWordNum != 0);
+ _dictFile->writeWord(_word, counts);
+ // Write bitmap entries
+ if (_bvc.getCrossedBitVectorLimit())
+ _bmapfile.addWordSingle(_compactWordNum, _bvc.getBitVector());
+ _bvc.clear();
+ counts.clear();
+ } else {
+ assert(counts._bitLength == 0);
+ assert(_bvc.empty());
+ assert(_compactWordNum == 0);
+ }
+}
+
+
+void
+FieldWriter::newWord(uint64_t wordNum, const vespalib::stringref &word)
+{
+ assert(wordNum <= _numWordIds);
+ assert(wordNum != noWordNum());
+ assert(wordNum > _wordNum);
+ flush();
+ _wordNum = wordNum;
+ ++_compactWordNum;
+ _word = word;
+ _prevDocId = 0;
+}
+
+
+void
+FieldWriter::newWord(const vespalib::stringref &word)
+{
+ newWord(_wordNum + 1, word);
+}
+
+
+bool
+FieldWriter::close(void)
+{
+ bool ret = true;
+ flush();
+ _wordNum = noWordNum();
+ if (_posoccfile) {
+ bool closeRes = _posoccfile->close();
+ if (!closeRes) {
+ LOG(error,
+ "Could not close posocc file for write");
+ ret = false;
+ }
+ _posoccfile.reset();
+ }
+ if (_dictFile) {
+ bool closeRes = _dictFile->close();
+ if (!closeRes) {
+ LOG(error,
+ "Could not close posocc count file for write");
+ ret = false;
+ }
+ _dictFile.reset();
+ }
+
+ _bmapfile.close();
+ return ret;
+}
+
+
+void
+FieldWriter::checkPointWrite(nbostream &out)
+{
+ out << _wordNum << _prevDocId;
+ out << _docIdLimit << _numWordIds;
+ out << _compactWordNum << _word;
+ _posoccfile->checkPointWrite(out);
+ _dictFile->checkPointWrite(out);
+ _bvc.checkPointWrite(out);
+ _bmapfile.checkPointWrite(out);
+}
+
+
+void
+FieldWriter::checkPointRead(nbostream &in)
+{
+ in >> _wordNum >> _prevDocId;
+ uint32_t checkDocIdLimit = 0;
+ uint64_t checkNumWordIds = 0;
+ in >> checkDocIdLimit >> checkNumWordIds;
+ assert(checkDocIdLimit == _docIdLimit);
+ assert(checkNumWordIds == _numWordIds);
+ in >> _compactWordNum >> _word;
+ _posoccfile->checkPointRead(in);
+ _dictFile->checkPointRead(in);
+ _bvc.checkPointRead(in);
+ _bmapfile.checkPointRead(in);
+}
+
+
+void
+FieldWriter::setFeatureParams(const PostingListParams &params)
+{
+ _posoccfile->setFeatureParams(params);
+}
+
+
+void
+FieldWriter::getFeatureParams(PostingListParams &params)
+{
+ _posoccfile->getFeatureParams(params);
+}
+
+
+static const char *termOccNames[] =
+{
+ "boolocc.bdat",
+ "boolocc.bidx",
+ "boolocc.idx",
+ "posocc.ccnt",
+ "posocc.cnt",
+ "posocc.dat.compressed",
+ "dictionary.pdat",
+ "dictionary.spdat",
+ "dictionary.ssdat",
+ "dictionary.words",
+ NULL,
+};
+
+
+void
+FieldWriter::remove(const vespalib::string &prefix)
+{
+ for (const char **j = termOccNames; *j != NULL; ++j) {
+ vespalib::string tmpName = prefix + *j;
+ FastOS_File::Delete(tmpName.c_str());
+ }
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h
new file mode 100644
index 00000000000..dfed6036405
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h
@@ -0,0 +1,138 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/index/dictionaryfile.h>
+#include <vespa/searchlib/index/postinglistfile.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/countcompression.h>
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+#include "bitvectorfile.h"
+
+namespace vespalib
+{
+
+class nbostream;
+
+}
+
+namespace search
+{
+
+namespace diskindex
+{
+
+/*
+ * FieldWriter is used to write a dictionary and posting list file
+ * together.
+ *
+ * It is used by the fusion code to write the merged output for a field,
+ * and by the memory index dump code to write a field to disk.
+ */
+class FieldWriter
+{
+private:
+ FieldWriter(const FieldWriter &rhs) = delete;
+ FieldWriter(const FieldWriter &&rhs) = delete;
+ FieldWriter &operator=(const FieldWriter &rhs) = delete;
+ FieldWriter &operator=(const FieldWriter &&rhs) = delete;
+
+ uint64_t _wordNum;
+ uint32_t _prevDocId;
+
+ static uint64_t
+ noWordNum(void)
+ {
+ return 0u;
+ }
+public:
+
+ using DictionaryFileSeqWrite = index::DictionaryFileSeqWrite;
+
+ typedef index::PostingListFileSeqWrite PostingListFileSeqWrite;
+ typedef index::DocIdAndFeatures DocIdAndFeatures;
+ typedef index::Schema Schema;
+ typedef index::PostingListCounts PostingListCounts;
+ typedef index::PostingListParams PostingListParams;
+
+ std::unique_ptr<DictionaryFileSeqWrite> _dictFile;
+ std::unique_ptr<PostingListFileSeqWrite> _posoccfile;
+private:
+ BitVectorCandidate _bvc;
+ BitVectorFileWrite _bmapfile;
+ uint32_t _docIdLimit;
+ uint64_t _numWordIds;
+ vespalib::string _prefix;
+ uint64_t _compactWordNum;
+ vespalib::string _word;
+
+ void
+ flush(void);
+
+public:
+ FieldWriter(uint32_t docIdLimit,
+ uint64_t numWordIds);
+
+ void
+ newWord(uint64_t wordNum, const vespalib::stringref &word);
+
+ void
+ newWord(const vespalib::stringref &word);
+
+ void
+ add(const DocIdAndFeatures &features)
+ {
+ assert(features._docId < _docIdLimit);
+ assert(features._docId > _prevDocId);
+ _posoccfile->writeDocIdAndFeatures(features);
+ _bvc.add(features._docId);
+ _prevDocId = features._docId;
+ }
+
+ uint64_t
+ getSparseWordNum() const
+ {
+ return _wordNum;
+ }
+
+ void
+ earlyOpen(const vespalib::string &prefix,
+ uint32_t minSkipDocs,
+ uint32_t minChunkDocs,
+ bool dynamicKPosOccFormat,
+ const Schema &schema,
+ uint32_t indexId,
+ const TuneFileSeqWrite &tuneFileWrite);
+
+ bool
+ lateOpen(const TuneFileSeqWrite &tuneFileWrite,
+ const search::common::FileHeaderContext &fileHeaderContext);
+
+ bool
+ close(void);
+
+ /*
+ * To be called between words, not in the middle of one.
+ */
+ void
+ checkPointWrite(vespalib::nbostream &out);
+
+ /*
+ * To be called after earlyOpen() but before afterOpen().
+ */
+ void
+ checkPointRead(vespalib::nbostream &in);
+
+ void
+ setFeatureParams(const PostingListParams &params);
+
+ void
+ getFeatureParams(PostingListParams &params);
+
+ static void
+ remove(const vespalib::string &prefix);
+};
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/fileheader.cpp b/searchlib/src/vespa/searchlib/diskindex/fileheader.cpp
new file mode 100644
index 00000000000..5c67ea5f064
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/fileheader.cpp
@@ -0,0 +1,165 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "fileheader.h"
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/data/fileheader.h>
+
+LOG_SETUP(".diskindex.fileheader");
+
+namespace search
+{
+
+namespace diskindex
+{
+
+using bitcompression::FeatureDecodeContextBE;
+
+FileHeader::FileHeader(void)
+ : _bigEndian(false),
+ _hostEndian(false),
+ _completed(false),
+ _allowNoFileBitSize(false),
+ _version(0),
+ _headerLen(0),
+ _fileBitSize(0),
+ _formats()
+{
+}
+
+
+FileHeader::~FileHeader(void)
+{
+}
+
+
+static inline uint32_t
+bswap(uint32_t val)
+{
+ __asm__("bswap %0" : "=r" (val) : "0" (val));
+ return val;
+}
+
+
+bool
+FileHeader::taste(const vespalib::string &name,
+ const TuneFileSeqRead &tuneFileRead)
+{
+ vespalib::FileHeader header;
+ FastOS_File file;
+
+ if (tuneFileRead.getWantDirectIO())
+ file.EnableDirectIO();
+ bool res = file.OpenReadOnly(name.c_str());
+ if (!res) {
+ return false;
+ }
+
+ uint32_t headerLen = 0u;
+ uint64_t fileSize = file.GetSize();
+ try {
+ headerLen = header.readFile(file);
+ assert(headerLen >= header.getSize());
+ (void) headerLen;
+ } catch (vespalib::IllegalHeaderException &e) {
+ if (e.getMessage() != "Failed to read header info." &&
+ e.getMessage() != "Failed to verify magic bits.") {
+ LOG(error,
+ "FileHeader::tastGeneric(\"%s\") exception: %s",
+ name.c_str(),
+ e.getMessage().c_str());
+ }
+ file.Close();
+ return false;
+ }
+ file.Close();
+
+ _version = 1;
+ _headerLen = headerLen;
+ _bigEndian = htonl(1) == 1;
+ if (header.hasTag("endian")) {
+ vespalib::string endian(header.getTag("endian").asString());
+ if (endian == "big") {
+ _bigEndian = true;
+ } else if (endian == "little") {
+ _bigEndian = false;
+ } else {
+ LOG(error,
+ "Bad endian: %s",
+ endian.c_str());
+ return false;
+ }
+ }
+ _hostEndian = _bigEndian == (htonl(1) == 1);
+ if (header.hasTag("frozen")) {
+ _completed = header.getTag("frozen").asInteger() != 0;
+ } else {
+ LOG(error,
+ "FileHeader::taste(\"%s\"): Missing frozen tag",
+ name.c_str());
+ return false;
+ }
+ if (header.hasTag("fileBitSize")) {
+ _fileBitSize = header.getTag("fileBitSize").asInteger();
+ if (_completed && _fileBitSize < 8 * _headerLen) {
+ LOG(error,
+ "FileHeader::taste(\"%s\"): "
+ "fleBitSize(%" PRIu64 ") < 8 * headerLen(%u)",
+ name.c_str(),
+ _fileBitSize, _headerLen);
+ return false;
+ }
+ if (_completed && _fileBitSize > 8 * fileSize) {
+ LOG(error,
+ "FileHeader::taste(\"%s\"): "
+ "fleBitSize(%" PRIu64 ") > 8 * fileSize(%" PRIu64 ")",
+ name.c_str(),
+ _fileBitSize, fileSize);
+ abort();
+ }
+ } else if (!_allowNoFileBitSize) {
+ LOG(error,
+ "FileHeader::taste(\"%s\"): Missing fileBitSize tag",
+ name.c_str());
+ return false;
+ }
+ for (uint32_t i = 0; ;++i) {
+ vespalib::asciistream as;
+ as << "format." << i;
+ vespalib::stringref key(as.str());
+ if (!header.hasTag(key))
+ break;
+ _formats.push_back(header.getTag(key).asString());
+ }
+ return true;
+}
+
+
+bool
+FileHeader::taste(const vespalib::string &name,
+ const TuneFileSeqWrite &tuneFileWrite)
+{
+ TuneFileSeqRead tuneFileRead;
+ if (tuneFileWrite.getWantDirectIO())
+ tuneFileRead.setWantDirectIO();
+ return taste(name, tuneFileRead);
+}
+
+
+bool
+FileHeader::taste(const vespalib::string &name,
+ const TuneFileRandRead &tuneFileSearch)
+{
+ TuneFileSeqRead tuneFileRead;
+ if (tuneFileSearch.getWantDirectIO())
+ tuneFileRead.setWantDirectIO();
+ return taste(name, tuneFileRead);
+}
+
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/fileheader.h b/searchlib/src/vespa/searchlib/diskindex/fileheader.h
new file mode 100644
index 00000000000..05db2d22e10
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/fileheader.h
@@ -0,0 +1,91 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+
+namespace search
+{
+
+namespace diskindex
+{
+
+class FileHeader
+{
+private:
+ bool _bigEndian;
+ bool _hostEndian;
+ bool _completed;
+ bool _allowNoFileBitSize;
+ uint32_t _version;
+ uint32_t _headerLen;
+ uint64_t _fileBitSize;
+ std::vector<vespalib::string> _formats;
+
+public:
+ FileHeader(void);
+
+ ~FileHeader(void);
+
+ bool
+ taste(const vespalib::string &name,
+ const TuneFileSeqRead &tuneFileRead);
+
+ bool
+ taste(const vespalib::string &name,
+ const TuneFileSeqWrite &tuneFileWrite);
+
+ bool
+ taste(const vespalib::string &name,
+ const TuneFileRandRead &tuneFileSearch);
+
+ bool
+ getBigEndian(void) const
+ {
+ return _bigEndian;
+ }
+
+ bool
+ getHostEndian(void) const
+ {
+ return _hostEndian;
+ }
+
+ uint32_t
+ getVersion(void) const
+ {
+ return _version;
+ }
+
+ uint32_t
+ getHeaderLen(void) const
+ {
+ return _headerLen;
+ }
+
+ const std::vector<vespalib::string> &
+ getFormats(void) const
+ {
+ return _formats;
+ }
+
+ bool
+ getCompleted() const
+ {
+ return _completed;
+ }
+
+ void
+ setAllowNoFileBitSize()
+ {
+ _allowNoFileBitSize = true;
+ }
+};
+
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp
new file mode 100644
index 00000000000..eb6e4c9dad5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp
@@ -0,0 +1,606 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2003 Fast Search & Transfer ASA
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "fusion.h"
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/common/fslimits.h>
+
+#include <vespa/searchlib/util/filekit.h>
+#include <vespa/searchlib/util/dirtraverse.h>
+#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/searchlib/common/documentsummary.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include <vespa/searchlib/index/postinglistparams.h>
+#include <vespa/searchlib/util/postingpriorityqueue.h>
+#include "fieldreader.h"
+#include "fieldwriter.h"
+#include "dictionarywordreader.h"
+
+LOG_SETUP(".diskindex.fusion");
+
+using search::FileKit;
+using search::index::PostingListParams;
+using search::index::Schema;
+using search::index::SchemaUtil;
+using search::common::FileHeaderContext;
+using search::diskindex::DocIdMapping;
+using search::diskindex::WordNumMapping;
+using search::PostingPriorityQueue;
+using search::docsummary::DocumentSummary;
+using vespalib::getLastErrorString;
+
+
+namespace search
+{
+
+namespace diskindex
+{
+
+
+void
+FusionInputIndex::setSchema(const Schema::SP &schema)
+{
+ _schema = schema;
+}
+
+Fusion::Fusion(bool dynamicKPosIndexFormat,
+ const TuneFileIndexing &tuneFileIndexing,
+ const FileHeaderContext &fileHeaderContext)
+ : _schema(NULL),
+ _oldIndexes(),
+ _docIdLimit(0u),
+ _numWordIds(0u),
+ _dynamicKPosIndexFormat(dynamicKPosIndexFormat),
+ _outDir("merged"),
+ _tuneFileIndexing(tuneFileIndexing),
+ _fileHeaderContext(fileHeaderContext)
+{
+}
+
+
+Fusion::~Fusion()
+{
+ ReleaseMappingTables();
+}
+
+
+void
+Fusion::setSchema(const Schema *schema)
+{
+ _schema = schema;
+}
+
+
+void
+Fusion::setOutDir(const vespalib::string &outDir)
+{
+ _outDir = outDir;
+}
+
+
+void
+Fusion::SetOldIndexList(const std::vector<vespalib::string> &oldIndexList)
+{
+ _oldIndexes.resize(oldIndexList.size());
+ OldIndexIterator oldIndexIt = _oldIndexes.begin();
+ uint32_t i = 0;
+ for (std::vector<vespalib::string>::const_iterator
+ it = oldIndexList.begin(), ite = oldIndexList.end();
+ it != ite;
+ ++it, ++oldIndexIt, ++i) {
+ oldIndexIt->reset(allocOldIndex());
+ OldIndex &oi = **oldIndexIt;
+ oi.setPath(*it);
+ std::ostringstream tmpindexpath0;
+ tmpindexpath0 << _outDir;
+ tmpindexpath0 << "/tmpindex";
+ tmpindexpath0 << i;
+ oi.setTmpPath(tmpindexpath0.str());
+ }
+}
+
+
+bool
+Fusion::openInputWordReaders(const SchemaUtil::IndexIterator &index,
+ std::vector<
+ std::unique_ptr<DictionaryWordReader> > &
+ readers,
+ PostingPriorityQueue<DictionaryWordReader> &heap)
+{
+ for (auto &i : getOldIndexes()) {
+ OldIndex &oi = *i;
+ auto reader(std::make_unique<DictionaryWordReader>());
+ const vespalib::string &tmpindexpath = oi.getTmpPath();
+ const vespalib::string &oldindexpath = oi.getPath();
+ vespalib::string wordMapName = tmpindexpath + "/old2new.dat";
+ vespalib::string fieldDir(oldindexpath + "/" + index.getName());
+ vespalib::string dictName(fieldDir + "/dictionary");
+ const Schema &oldSchema = oi.getSchema();
+ if (!index.hasOldFields(oldSchema, false)) {
+ continue; // drop data
+ }
+ bool res = reader->open(dictName,
+ wordMapName,
+ _tuneFileIndexing._read);
+ if (!res) {
+ LOG(error, "Could not open dictionary %s to generate %s",
+ dictName.c_str(), wordMapName.c_str());
+ return false;
+ }
+ reader->read();
+ if (reader->isValid()) {
+ readers.push_back(std::move(reader));
+ heap.initialAdd(readers.back().get());
+ }
+ }
+ return true;
+}
+
+
+bool
+Fusion::renumberFieldWordIds(const SchemaUtil::IndexIterator &index)
+{
+ vespalib::string indexName = index.getName();
+ LOG(debug, "Renumber word IDs for field %s", indexName.c_str());
+
+ std::vector<std::unique_ptr<DictionaryWordReader>> readers;
+ PostingPriorityQueue<DictionaryWordReader> heap;
+ WordAggregator out;
+
+ if (!openInputWordReaders(index, readers, heap))
+ return false;
+
+
+ heap.merge(out, 4);
+ assert(heap.empty());
+ _numWordIds = out.getWordNum();
+
+ // Close files
+ for (auto &i : readers) {
+ i->close();
+ }
+
+ // Now read mapping files back into an array
+ // XXX: avoid this, and instead make the array here
+ if (!ReadMappingFiles(&index))
+ return false;
+
+ LOG(debug, "Finished renumbering words IDs for field %s",
+ indexName.c_str());
+
+ return true;
+}
+
+
+bool
+Fusion::mergeFields()
+{
+ typedef SchemaUtil::IndexIterator IndexIterator;
+
+ const Schema &schema = getSchema();
+ for (IndexIterator index(schema); index.isValid(); ++index) {
+ if (!mergeField(index.getIndex()))
+ return false;
+ }
+ return true;
+}
+
+
+bool
+Fusion::mergeField(uint32_t id)
+{
+ typedef SchemaUtil::IndexIterator IndexIterator;
+ typedef SchemaUtil::IndexSettings IndexSettings;
+
+ const Schema &schema = getSchema();
+ IndexIterator index(schema, id);
+ const vespalib::string &indexName = index.getName();
+ IndexSettings settings = index.getIndexSettings();
+ if (settings.hasError())
+ return false;
+ vespalib::string indexDir = _outDir + "/" + indexName;
+
+ if (FileKit::hasStamp(indexDir + "/.mergeocc_done"))
+ return true;
+
+ vespalib::mkdir(indexDir.c_str(), false);
+
+ LOG(debug, "mergeField for field %s dir %s",
+ indexName.c_str(), indexDir.c_str());
+
+ makeTmpDirs();
+
+ if (!renumberFieldWordIds(index)) {
+ LOG(error, "Could not renumber field word ids for field %s dir %s",
+ indexName.c_str(), indexDir.c_str());
+ return false;
+ }
+
+ // Tokamak
+ bool res = mergeFieldPostings(index);
+ if (!res) {
+ LOG(error, "Could not merge field postings for field %s dir %s",
+ indexName.c_str(), indexDir.c_str());
+ abort();
+ }
+ if (!FileKit::createStamp(indexDir + "/.mergeocc_done"))
+ return false;
+
+ if (!CleanTmpDirs())
+ return false;
+
+ LOG(debug, "Finished mergeField for field %s dir %s",
+ indexName.c_str(), indexDir.c_str());
+
+ return true;
+}
+
+template <class Reader, class Writer>
+bool
+Fusion::selectCookedOrRawFeatures(Reader &reader, Writer &writer)
+{
+ bool rawFormatOK = true;
+ bool cookedFormatOK = true;
+ PostingListParams featureParams;
+ PostingListParams outFeatureParams;
+ vespalib::string cookedFormat;
+ vespalib::string rawFormat;
+
+ if (!reader.isValid())
+ return true;
+ {
+ writer.getFeatureParams(featureParams);
+ cookedFormat = featureParams.getStr("cookedEncoding");
+ rawFormat = featureParams.getStr("encoding");
+ if (rawFormat == "")
+ rawFormatOK = false; // Typically uncompressed file
+ outFeatureParams = featureParams;
+ }
+ {
+ reader.getFeatureParams(featureParams);
+ if (cookedFormat != featureParams.getStr("cookedEncoding"))
+ cookedFormatOK = false;
+ if (rawFormat != featureParams.getStr("encoding"))
+ rawFormatOK = false;
+ if (featureParams != outFeatureParams)
+ rawFormatOK = false;
+ if (!reader.allowRawFeatures())
+ rawFormatOK = false; // Reader transforms data
+ }
+ if (!cookedFormatOK) {
+ LOG(error,
+ "Cannot perform fusion, cooked feature formats don't match");
+ return false;
+ }
+ if (rawFormatOK) {
+ featureParams.clear();
+ featureParams.set("cooked", false);
+ reader.setFeatureParams(featureParams);
+ reader.getFeatureParams(featureParams);
+ if (featureParams.isSet("cookedEncoding") ||
+ rawFormat != featureParams.getStr("encoding"))
+ rawFormatOK = false;
+ if (!rawFormatOK) {
+ LOG(error, "Cannot perform fusion, raw format setting failed");
+ return false;
+ }
+ LOG(debug, "Using raw feature format for fusion of posting files");
+ }
+ return true;
+}
+
+
+bool
+Fusion::openInputFieldReaders(const SchemaUtil::IndexIterator &index,
+ std::vector<std::unique_ptr<FieldReader> > &
+ readers)
+{
+ vespalib::string indexName = index.getName();
+ for (auto &i : _oldIndexes) {
+ OldIndex &oi = *i;
+ const Schema &oldSchema = oi.getSchema();
+ if (!index.hasOldFields(oldSchema, false)) {
+ continue; // drop data
+ }
+ auto reader = FieldReader::allocFieldReader(index, oldSchema);
+ reader->setup(oi.getWordNumMapping(),
+ oi.getDocIdMapping());
+ if (!reader->open(oi.getPath() + "/" +
+ indexName + "/",
+ _tuneFileIndexing._read))
+ return false;
+ readers.push_back(std::move(reader));
+ }
+ return true;
+}
+
+
+bool
+Fusion::openFieldWriter(const SchemaUtil::IndexIterator &index,
+ FieldWriter &writer)
+{
+ vespalib::string dir = _outDir + "/" + index.getName();
+
+ writer.earlyOpen(dir + "/",
+ 64,
+ 262144,
+ _dynamicKPosIndexFormat,
+ index.getSchema(),
+ index.getIndex(),
+ _tuneFileIndexing._write);
+ // No checkpointing
+ if (!writer.lateOpen(_tuneFileIndexing._write,
+ _fileHeaderContext)) {
+ LOG(error, "Could not open output posocc + dictionary in %s",
+ dir.c_str());
+ abort();
+ return false;
+ }
+ return true;
+}
+
+
+bool
+Fusion::setupMergeHeap(const std::vector<std::unique_ptr<FieldReader> > &
+ readers,
+ FieldWriter &writer,
+ PostingPriorityQueue<FieldReader> &heap)
+{
+ for (auto &reader : readers) {
+ if (!selectCookedOrRawFeatures(*reader, writer))
+ return false;
+ if (reader->isValid())
+ reader->read();
+ if (reader->isValid())
+ heap.initialAdd(reader.get());
+ }
+ return true;
+}
+
+
+bool
+Fusion::mergeFieldPostings(const SchemaUtil::IndexIterator &index)
+{
+ std::vector<std::unique_ptr<FieldReader>> readers;
+ PostingPriorityQueue<FieldReader> heap;
+ /* OUTPUT */
+ FieldWriter fieldWriter(_docIdLimit, _numWordIds);
+ vespalib::string indexName = index.getName();
+
+ if (!openInputFieldReaders(index, readers))
+ return false;
+ if (!openFieldWriter(index, fieldWriter))
+ return false;
+ if (!setupMergeHeap(readers, fieldWriter, heap))
+ return false;
+
+ heap.merge(fieldWriter, 4);
+ assert(heap.empty());
+
+ for (auto &reader : readers) {
+ if (!reader->close())
+ return false;
+ }
+ if (!fieldWriter.close()) {
+ LOG(error, "Could not close output posocc + dictionary in %s/%s",
+ _outDir.c_str(), indexName.c_str());
+ abort();
+ }
+ return true;
+}
+
+
+bool
+Fusion::ReadMappingFiles(const SchemaUtil::IndexIterator *index)
+{
+ ReleaseMappingTables();
+
+ size_t numberOfOldIndexes = _oldIndexes.size();
+ for (uint32_t i = 0; i < numberOfOldIndexes; i++)
+ {
+ OldIndex &oi = *_oldIndexes[i];
+ WordNumMapping &wordNumMapping = oi.getWordNumMapping();
+ std::vector<uint32_t> oldIndexes;
+ const Schema &oldSchema = oi.getSchema();
+ if (!SchemaUtil::getIndexIds(oldSchema,
+ Schema::STRING,
+ oldIndexes))
+ return false;
+ if (oldIndexes.empty()) {
+ wordNumMapping.noMappingFile();
+ continue;
+ }
+ if (index && !index->hasOldFields(oldSchema, false)) {
+ continue; // drop data
+ }
+
+ // Open word mapping file
+ vespalib::string old2newname = oi.getTmpPath() +
+ "/old2new.dat";
+ wordNumMapping.readMappingFile(old2newname, _tuneFileIndexing._read);
+ }
+
+ return true;
+}
+
+
+bool
+Fusion::ReleaseMappingTables()
+{
+ size_t numberOfOldIndexes = _oldIndexes.size();
+ for (uint32_t i = 0; i < numberOfOldIndexes; i++)
+ {
+ OldIndex &oi = *_oldIndexes[i];
+ oi.getWordNumMapping().clear();
+ }
+ return true;
+}
+
+
+void
+Fusion::makeTmpDirs()
+{
+ for (auto &i : getOldIndexes()) {
+ OldIndex &oi = *i;
+ // Make tmpindex directories
+ const vespalib::string &tmpindexpath = oi.getTmpPath();
+ vespalib::mkdir(tmpindexpath, false);
+ }
+}
+
+bool
+Fusion::CleanTmpDirs(void)
+{
+ uint32_t i = 0;
+ for (;;) {
+ std::ostringstream tmpindexpath0;
+ tmpindexpath0 << _outDir;
+ tmpindexpath0 << "/tmpindex";
+ tmpindexpath0 << i;
+ const vespalib::string &tmpindexpath = tmpindexpath0.str();
+ FastOS_StatInfo statInfo;
+ if (!FastOS_File::Stat(tmpindexpath.c_str(), &statInfo)) {
+ if (statInfo._error == FastOS_StatInfo::FileNotFound)
+ break;
+ LOG(error, "Failed to stat tmpdir %s", tmpindexpath.c_str());
+ return false;
+ }
+ i++;
+ }
+ while (i > 0) {
+ i--;
+ // Remove tmpindex directories
+ std::ostringstream tmpindexpath0;
+ tmpindexpath0 << _outDir;
+ tmpindexpath0 << "/tmpindex";
+ tmpindexpath0 << i;
+ const vespalib::string &tmpindexpath = tmpindexpath0.str();
+ search::DirectoryTraverse dt(tmpindexpath.c_str());
+ if (!dt.RemoveTree()) {
+ LOG(error, "Failed to clean tmpdir %s", tmpindexpath.c_str());
+ return false;
+ }
+ }
+ return true;
+}
+
+
+bool
+Fusion::checkSchemaCompat(void)
+{
+ return true;
+}
+
+
+bool
+Fusion::readSchemaFiles(void)
+{
+ OldIndexIterator oldIndexIt = _oldIndexes.begin();
+ OldIndexIterator oldIndexIte = _oldIndexes.end();
+
+ for(; oldIndexIt != oldIndexIte; ++oldIndexIt) {
+ OldIndex &oi = **oldIndexIt;
+ vespalib::string oldcfname = oi.getPath() + "/schema.txt";
+ Schema::SP schema(new Schema);
+ if (!schema->loadFromFile(oldcfname))
+ return false;
+ if (!SchemaUtil::validateSchema(*_schema))
+ return false;
+ oi.setSchema(schema);
+ }
+
+ /* TODO: Check compatibility */
+ bool res = checkSchemaCompat();
+ if (!res)
+ LOG(error, "Index fusion cannot continue due to incompatible indexes");
+ return res;
+}
+
+
+bool
+Fusion::merge(const Schema &schema,
+ const vespalib::string &dir,
+ const std::vector<vespalib::string> &sources,
+ const SelectorArray &selector,
+ bool dynamicKPosOccFormat,
+ const TuneFileIndexing &tuneFileIndexing,
+ const FileHeaderContext &fileHeaderContext)
+{
+ assert(sources.size() <= 255);
+ uint32_t docIdLimit = selector.size();
+ uint32_t trimmedDocIdLimit = docIdLimit;
+
+ // Limit docIdLimit in output based on selections that cannot be satisfied
+ uint32_t sourcesSize = sources.size();
+ while (trimmedDocIdLimit > 0 &&
+ selector[trimmedDocIdLimit - 1] >= sourcesSize)
+ --trimmedDocIdLimit;
+
+ FastOS_StatInfo statInfo;
+ if (!FastOS_File::Stat(dir.c_str(), &statInfo)) {
+ if (statInfo._error != FastOS_StatInfo::FileNotFound) {
+ LOG(error, "Could not stat \"%s\"", dir.c_str());
+ return false;
+ }
+ } else {
+ if (!statInfo._isDirectory) {
+ LOG(error, "\"%s\" is not a directory", dir.c_str());
+ return false;
+ }
+ search::DirectoryTraverse dt(dir.c_str());
+ if (!dt.RemoveTree()) {
+ LOG(error, "Failed to clean directory \"%s\"", dir.c_str());
+ return false;
+ }
+ }
+
+ vespalib::mkdir(dir, false);
+ schema.saveToFile(dir + "/schema.txt");
+ if (!DocumentSummary::writeDocIdLimit(dir, trimmedDocIdLimit)) {
+ LOG(error, "Could not write docsum count in dir %s: %s",
+ dir.c_str(), getLastErrorString().c_str());
+ return false;
+ }
+
+ std::unique_ptr<Fusion> fusion(new Fusion(dynamicKPosOccFormat,
+ tuneFileIndexing,
+ fileHeaderContext));
+ fusion->setSchema(&schema);
+ fusion->setOutDir(dir);
+ fusion->SetOldIndexList(sources);
+ if (!fusion->readSchemaFiles()) {
+ LOG(error, "Cannot read schema files for source indexes");
+ return false;
+ }
+ uint32_t idx = 0;
+ std::vector<std::shared_ptr<OldIndex> > &oldIndexes =
+ fusion->getOldIndexes();
+
+ for (OldIndexIterator i = oldIndexes.begin(), ie = oldIndexes.end();
+ i != ie; ++i, ++idx) {
+ OldIndex &oi = **i;
+ // Make tmpindex directories
+ const vespalib::string &tmpindexpath = oi.getTmpPath();
+ vespalib::mkdir(tmpindexpath, false);
+ DocIdMapping &docIdMapping = oi.getDocIdMapping();
+ if (!docIdMapping.readDocIdLimit(oi.getPath())) {
+ LOG(error, "Cannot determine docIdLimit for old index \"%s\"",
+ oi.getPath().c_str());
+ return false;
+ }
+ docIdMapping.setup(docIdMapping._docIdLimit,
+ &selector,
+ idx);
+ }
+ fusion->setDocIdLimit(trimmedDocIdLimit);
+ if (!fusion->mergeFields())
+ return false;
+ return true;
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/fusion.h b/searchlib/src/vespa/searchlib/diskindex/fusion.h
new file mode 100644
index 00000000000..1cc23c61f10
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/fusion.h
@@ -0,0 +1,265 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include "docidmapper.h"
+#include "wordnummapper.h"
+
+#include <vespa/searchlib/index/schemautil.h>
+#include <vector>
+#include <string>
+
+namespace search
+{
+
+template <class IN>
+class PostingPriorityQueue;
+
+namespace common
+{
+
+class TuneFileIndexing;
+class FileHeaderContext;
+
+}
+
+namespace diskindex
+{
+
+class FieldReader;
+class FieldWriter;
+class DictionaryWordReader;
+
+class FusionInputIndex
+{
+public:
+ typedef diskindex::WordNumMapping WordNumMapping;
+ typedef diskindex::DocIdMapping DocIdMapping;
+private:
+ vespalib::string _path;
+ WordNumMapping _wordNumMapping;
+ DocIdMapping _docIdMapping;
+ vespalib::string _tmpPath;
+ index::Schema::SP _schema;
+
+public:
+ FusionInputIndex(void)
+ : _path(),
+ _wordNumMapping(),
+ _docIdMapping(),
+ _tmpPath(),
+ _schema()
+ {
+ }
+
+ virtual
+ ~FusionInputIndex(void)
+ {
+ }
+
+ void
+ setPath(const vespalib::string &path)
+ {
+ _path = path;
+ }
+
+ const vespalib::string &
+ getPath(void) const
+ {
+ return _path;
+ }
+
+ void
+ setTmpPath(const vespalib::string &tmpPath)
+ {
+ _tmpPath = tmpPath;
+ }
+
+ const vespalib::string &
+ getTmpPath(void) const
+ {
+ return _tmpPath;
+ }
+
+ const WordNumMapping &
+ getWordNumMapping(void) const
+ {
+ return _wordNumMapping;
+ }
+
+ WordNumMapping &
+ getWordNumMapping(void)
+ {
+ return _wordNumMapping;
+ }
+
+ const DocIdMapping &
+ getDocIdMapping(void) const
+ {
+ return _docIdMapping;
+ }
+
+ DocIdMapping &
+ getDocIdMapping(void)
+ {
+ return _docIdMapping;
+ }
+
+ const index::Schema &
+ getSchema(void) const
+ {
+ assert(_schema.get() != NULL);
+ return *_schema.get();
+ }
+
+ void
+ setSchema(const index::Schema::SP &schema);
+};
+
+
+class Fusion
+{
+public:
+ typedef search::index::Schema Schema;
+ typedef search::index::SchemaUtil SchemaUtil;
+
+private:
+ Fusion(const Fusion &);
+ Fusion& operator=(const Fusion &);
+
+public:
+ Fusion(bool dynamicKPosIndexFormat,
+ const TuneFileIndexing &tuneFileIndexing,
+ const search::common::FileHeaderContext &fileHeaderContext);
+
+ virtual
+ ~Fusion(void);
+
+ void SetOldIndexList(const std::vector<vespalib::string> &oldIndexList);
+
+ bool mergeFields();
+ bool mergeField(uint32_t id);
+ bool openInputFieldReaders(const SchemaUtil::IndexIterator &index,
+ std::vector<std::unique_ptr<FieldReader> > &
+ readers);
+ bool openFieldWriter(const SchemaUtil::IndexIterator &index,
+ FieldWriter &writer);
+ bool setupMergeHeap(const std::vector<std::unique_ptr<FieldReader> > &
+ readers,
+ FieldWriter &writer,
+ PostingPriorityQueue<FieldReader> &heap);
+ bool mergeFieldPostings(const SchemaUtil::IndexIterator &index);
+ bool openInputWordReaders(const SchemaUtil::IndexIterator &index,
+ std::vector<
+ std::unique_ptr<DictionaryWordReader> > &
+ readers,
+ PostingPriorityQueue<DictionaryWordReader> &heap);
+ bool renumberFieldWordIds(const SchemaUtil::IndexIterator &index);
+
+ void
+ setSchema(const Schema *schema);
+
+ void
+ setOutDir(const vespalib::string &outDir);
+
+ void makeTmpDirs();
+
+ bool CleanTmpDirs(void);
+
+ bool
+ readSchemaFiles(void);
+
+ bool
+ checkSchemaCompat(void);
+
+ template <class Reader, class Writer>
+ static bool
+ selectCookedOrRawFeatures(Reader &reader, Writer &writer);
+
+protected:
+ bool ReadMappingFiles(const SchemaUtil::IndexIterator *index);
+ bool ReleaseMappingTables();
+
+ static unsigned int noGen(void)
+ {
+ return static_cast<unsigned int>(-1);
+ }
+
+protected:
+
+ typedef FusionInputIndex OldIndex;
+
+ const Schema *_schema; // External ownership
+ std::vector<std::shared_ptr<OldIndex> > _oldIndexes;
+ typedef std::vector<std::shared_ptr<OldIndex> >::iterator
+ OldIndexIterator;
+
+ // OUTPUT:
+
+ uint32_t _docIdLimit;
+ uint64_t _numWordIds;
+
+ // Index format parameters.
+ bool _dynamicKPosIndexFormat;
+
+ // Index location parameters
+
+ /*
+ * Output location
+ */
+ vespalib::string _outDir;
+
+ const TuneFileIndexing &_tuneFileIndexing;
+ const search::common::FileHeaderContext &_fileHeaderContext;
+
+ const Schema &
+ getSchema(void) const
+ {
+ assert(_schema != NULL);
+ return *_schema;
+ }
+public:
+
+ void
+ setDocIdLimit(uint32_t docIdLimit)
+ {
+ _docIdLimit = docIdLimit;
+ }
+
+ void
+ setNumWordIds(uint64_t numWordIds)
+ {
+ _numWordIds = numWordIds;
+ }
+
+ std::vector<std::shared_ptr<OldIndex> > &
+ getOldIndexes(void)
+ {
+ return _oldIndexes;
+ }
+
+ virtual OldIndex *
+ allocOldIndex(void)
+ {
+ return new OldIndex;
+ }
+
+ /**
+ * This method is used by new indexing pipeline to merge indexes.
+ */
+ static bool
+ merge(const Schema &schema,
+ const vespalib::string &dir,
+ const std::vector<vespalib::string> &sources,
+ const SelectorArray &docIdSelector,
+ bool dynamicKPosOccFormat,
+ const TuneFileIndexing &tuneFileIndexing,
+ const search::common::FileHeaderContext &fileHeaderContext);
+};
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp
new file mode 100644
index 00000000000..a88ce029814
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp
@@ -0,0 +1,720 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "indexbuilder.h"
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/searchlib/index/schemautil.h>
+#include <vespa/searchlib/common/documentsummary.h>
+#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/searchlib/diskindex/fieldwriter.h>
+
+LOG_SETUP(".diskindex.indexbuilder");
+
+
+namespace search
+{
+
+namespace diskindex
+{
+
+namespace
+{
+
+using index::DocIdAndFeatures;
+using index::PostingListCounts;
+using index::Schema;
+using index::SchemaUtil;
+using index::WordDocElementFeatures;
+using common::FileHeaderContext;
+using vespalib::getLastErrorString;
+
+static uint32_t
+noWordPos(void)
+{
+ return std::numeric_limits<uint32_t>::max();
+}
+
+
+class FileHandle
+{
+public:
+ FieldWriter *_fieldWriter;
+ DocIdAndFeatures _docIdAndFeatures;
+
+ FileHandle(void);
+
+ ~FileHandle(void);
+
+ void
+ open(const vespalib::stringref &dir,
+ const SchemaUtil::IndexIterator &index,
+ uint32_t docIdLimit, uint64_t numWordIds,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const FileHeaderContext &fileHeaderContext);
+
+ void
+ close(void);
+};
+
+
+}
+
+inline IndexBuilder::FieldHandle &
+IndexBuilder::getIndexFieldHandle(uint32_t fieldId)
+{
+ return _fields[fieldId];
+}
+
+
+class IndexBuilder::FieldHandle
+{
+public:
+ FieldHandle(const Schema &schema,
+ uint32_t fieldId,
+ IndexBuilder *ib);
+
+ ~FieldHandle(void);
+
+ static uint32_t
+ noDocRef(void)
+ {
+ return std::numeric_limits<uint32_t>::max();
+ }
+
+ static uint32_t
+ noElRef(void)
+ {
+ return std::numeric_limits<uint32_t>::max();
+ }
+
+ class FHWordDocFieldFeatures
+ {
+ public:
+ uint32_t _docId;
+ uint32_t _numElements;
+
+ FHWordDocFieldFeatures(uint32_t docId)
+ : _docId(docId),
+ _numElements(0u)
+ {
+ }
+
+ uint32_t
+ getDocId(void) const
+ {
+ return _docId;
+ }
+
+ uint32_t
+ getNumElements(void) const
+ {
+ return _numElements;
+ }
+
+ void
+ incNumElements(void)
+ {
+ ++_numElements;
+ }
+ };
+
+ class FHWordDocElementFeatures
+ : public WordDocElementFeatures
+ {
+ public:
+ uint32_t _docRef;
+
+ FHWordDocElementFeatures(uint32_t elementId,
+ int32_t weight,
+ uint32_t elementLen,
+ uint32_t docRef)
+ : WordDocElementFeatures(elementId),
+ _docRef(docRef)
+ {
+ setWeight(weight);
+ setElementLen(elementLen);
+ }
+ };
+
+ class FHWordDocElementWordPosFeatures
+ : public WordDocElementWordPosFeatures
+ {
+ public:
+ uint32_t _elementRef;
+
+ FHWordDocElementWordPosFeatures(
+ const WordDocElementWordPosFeatures &features,
+ uint32_t elementRef)
+ : WordDocElementWordPosFeatures(features),
+ _elementRef(elementRef)
+ {
+ }
+ };
+
+ typedef vespalib::Array<FHWordDocFieldFeatures, vespalib::DefaultAlloc> FHWordDocFieldFeaturesVector;
+ typedef vespalib::Array<FHWordDocElementFeatures, vespalib::DefaultAlloc> FHWordDocElementFeaturesVector;
+ typedef vespalib::Array<FHWordDocElementWordPosFeatures, vespalib::DefaultAlloc> FHWordDocElementWordPosFeaturesVector;
+
+ FHWordDocFieldFeaturesVector _wdff;
+ FHWordDocElementFeaturesVector _wdfef;
+ FHWordDocElementWordPosFeaturesVector _wdfepf;
+
+ uint32_t _docRef;
+ uint32_t _elRef;
+ bool _valid;
+ const Schema *_schema; // Ptr to allow being std::vector member
+ uint32_t _fieldId;
+ IndexBuilder *_ib; // Ptr to allow being std::vector member
+
+ uint32_t _lowestOKElementId;
+ uint32_t _lowestOKWordPos;
+
+ FileHandle _files;
+
+ void
+ startWord(const vespalib::stringref &word);
+
+ void
+ endWord(void);
+
+ void
+ startDocument(uint32_t docId);
+
+ void
+ endDocument(void);
+
+ void
+ startElement(uint32_t elementId,
+ int32_t weight,
+ uint32_t elementLen);
+
+ void
+ endElement(void);
+
+ void
+ addOcc(const WordDocElementWordPosFeatures &features);
+
+ void
+ setValid(void)
+ {
+ _valid = true;
+ }
+
+ bool
+ getValid(void) const
+ {
+ return _valid;
+ }
+
+ const Schema::IndexField &
+ getSchemaField(void);
+
+ const vespalib::string &
+ getName(void);
+
+ vespalib::string
+ getDir(void);
+
+ void
+ open(uint32_t docIdLimit, uint64_t numWordIds,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const FileHeaderContext &fileHeaderContext);
+
+ void
+ close(void);
+
+ uint32_t
+ getIndexId(void) const
+ {
+ return _fieldId;
+ }
+};
+
+
+namespace {
+
+class SingleIterator
+{
+public:
+ typedef IndexBuilder::FieldHandle FH;
+ FH::FHWordDocFieldFeaturesVector::const_iterator _dFeatures;
+ FH::FHWordDocFieldFeaturesVector::const_iterator _dFeaturesE;
+ FH::FHWordDocElementFeaturesVector::const_iterator _elFeatures;
+ FH::FHWordDocElementWordPosFeaturesVector::const_iterator _pFeatures;
+ uint32_t _docId;
+ uint32_t _localFieldId;
+
+ SingleIterator(FH &fieldHandle, uint32_t localFieldId);
+
+ void
+ appendFeatures(DocIdAndFeatures &features);
+
+ bool
+ isValid(void) const
+ {
+ return _dFeatures != _dFeaturesE;
+ }
+
+ bool
+ operator<(const SingleIterator &rhs) const
+ {
+ if (_docId != rhs._docId)
+ return _docId < rhs._docId;
+ return _localFieldId < rhs._localFieldId;
+ }
+};
+
+
+}
+
+
+FileHandle::FileHandle(void)
+ : _fieldWriter(NULL),
+ _docIdAndFeatures()
+{
+}
+
+
+FileHandle::~FileHandle(void)
+{
+ delete _fieldWriter;
+}
+
+
+void
+FileHandle::open(const vespalib::stringref &dir,
+ const SchemaUtil::IndexIterator &index,
+ uint32_t docIdLimit, uint64_t numWordIds,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const FileHeaderContext &fileHeaderContext)
+{
+ assert(_fieldWriter == NULL);
+
+ _fieldWriter = new FieldWriter(docIdLimit, numWordIds);
+
+ _fieldWriter->earlyOpen(dir + "/", 64, 262144u, false,
+ index.getSchema(), index.getIndex(),
+ tuneFileWrite);
+
+ // No checkpointing
+
+ if (!_fieldWriter->lateOpen(tuneFileWrite, fileHeaderContext)) {
+ LOG(error, "Could not open term writer %s for write (%s)",
+ dir.c_str(), getLastErrorString().c_str());
+ abort();
+ }
+}
+
+
+void
+FileHandle::close(void)
+{
+ bool ret = true;
+ if (_fieldWriter != NULL) {
+ bool closeRes = _fieldWriter->close();
+ delete _fieldWriter;
+ _fieldWriter = NULL;
+ if (!closeRes) {
+ LOG(error,
+ "Could not close term writer");
+ ret = false;
+ }
+ }
+ assert(ret);
+ (void) ret;
+}
+
+
+IndexBuilder::FieldHandle::FieldHandle(const Schema &schema,
+ uint32_t fieldId,
+ IndexBuilder *ib)
+ : _wdff(),
+ _wdfef(),
+ _wdfepf(),
+ _docRef(noDocRef()),
+ _elRef(noElRef()),
+ _valid(false),
+ _schema(&schema),
+ _fieldId(fieldId),
+ _ib(ib),
+ _lowestOKElementId(0u),
+ _lowestOKWordPos(0u),
+ _files()
+{
+}
+
+
+IndexBuilder::FieldHandle::~FieldHandle(void)
+{
+}
+
+
+void
+IndexBuilder::FieldHandle::startWord(const vespalib::stringref &word)
+{
+ assert(_valid);
+ _files._fieldWriter->newWord(word);
+}
+
+
+void
+IndexBuilder::FieldHandle::endWord()
+{
+ DocIdAndFeatures &features = _files._docIdAndFeatures;
+ SingleIterator si(*this, 0u);
+ for (; si.isValid();) {
+ features.clear(si._docId);
+ si.appendFeatures(features);
+ _files._fieldWriter->add(features);
+ }
+ assert(si._elFeatures == _wdfef.end());
+ assert(si._pFeatures == _wdfepf.end());
+ _wdff.clear();
+ _wdfef.clear();
+ _wdfepf.clear();
+ _docRef = noDocRef();
+ _elRef = noElRef();
+}
+
+
+void
+IndexBuilder::FieldHandle::startDocument(uint32_t docId)
+{
+ assert(_docRef == noDocRef());
+ assert(_wdff.empty() || _wdff.back().getDocId() < docId);
+ _wdff.push_back(FHWordDocFieldFeatures(docId));
+ _docRef = _wdff.size() - 1;
+ _lowestOKElementId = 0u;
+}
+
+
+void
+IndexBuilder::FieldHandle::endDocument(void)
+{
+ assert(_docRef != noDocRef());
+ assert(_elRef == noElRef());
+ FHWordDocFieldFeatures &ff = _wdff[_docRef];
+ assert(ff.getNumElements() > 0);
+ (void) ff;
+ _docRef = noDocRef();
+}
+
+
+void
+IndexBuilder::FieldHandle::
+startElement(uint32_t elementId,
+ int32_t weight,
+ uint32_t elementLen)
+{
+ assert(_docRef != noDocRef());
+ assert(_elRef == noElRef());
+ assert(elementId >= _lowestOKElementId);
+
+ FHWordDocFieldFeatures &ff = _wdff[_docRef];
+ _wdfef.push_back(
+ FHWordDocElementFeatures(elementId,
+ weight,
+ elementLen,
+ _docRef));
+ ff.incNumElements();
+ _elRef = _wdfef.size() - 1;
+ _lowestOKWordPos = 0u;
+}
+
+
+void
+IndexBuilder::FieldHandle::endElement(void)
+{
+ assert(_elRef != noElRef());
+ FHWordDocElementFeatures &ef = _wdfef[_elRef];
+ assert(ef.getNumOccs() > 0);
+ _elRef = noElRef();
+ _lowestOKElementId = ef.getElementId() + 1;
+}
+
+
+void
+IndexBuilder::FieldHandle::
+addOcc(const WordDocElementWordPosFeatures &features)
+{
+ assert(_elRef != noElRef());
+ FHWordDocElementFeatures &ef = _wdfef[_elRef];
+ uint32_t wordPos = features.getWordPos();
+ assert(wordPos < ef.getElementLen());
+ assert(wordPos >= _lowestOKWordPos);
+ _lowestOKWordPos = wordPos;
+ _wdfepf.push_back(
+ FHWordDocElementWordPosFeatures(features,
+ _elRef));
+ ef.incNumOccs();
+}
+
+
+const Schema::IndexField &
+IndexBuilder::FieldHandle::getSchemaField(void)
+{
+ return _schema->getIndexField(_fieldId);
+}
+
+
+const vespalib::string &
+IndexBuilder::FieldHandle::getName(void)
+{
+ return getSchemaField().getName();
+
+}
+
+
+vespalib::string
+IndexBuilder::FieldHandle::getDir(void)
+{
+ return _ib->appendToPrefix(getName());
+}
+
+
+void
+IndexBuilder::FieldHandle::open(uint32_t docIdLimit, uint64_t numWordIds,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const FileHeaderContext &fileHeaderContext)
+{
+ _files.open(getDir(),
+ SchemaUtil::IndexIterator(*_schema, getIndexId()),
+ docIdLimit, numWordIds, tuneFileWrite, fileHeaderContext);
+}
+
+
+void
+IndexBuilder::FieldHandle::close(void)
+{
+ _files.close();
+}
+
+
+SingleIterator::SingleIterator(FH &fieldHandle, uint32_t localFieldId)
+ : _dFeatures(fieldHandle._wdff.begin()),
+ _dFeaturesE(fieldHandle._wdff.end()),
+ _elFeatures(fieldHandle._wdfef.begin()),
+ _pFeatures(fieldHandle._wdfepf.begin()),
+ _docId(_dFeatures->getDocId()),
+ _localFieldId(localFieldId)
+{
+}
+
+
+void
+SingleIterator::appendFeatures(DocIdAndFeatures &features)
+{
+ uint32_t elCount = _dFeatures->getNumElements();
+ for (uint32_t elId = 0; elId < elCount; ++elId, ++_elFeatures) {
+ features._elements.push_back(*_elFeatures);
+ features._elements.back().setNumOccs(0);
+ uint32_t posCount = _elFeatures->getNumOccs();
+ uint32_t lastWordPos = noWordPos();
+ for (uint32_t posId = 0; posId < posCount; ++posId, ++_pFeatures) {
+ uint32_t wordPos = _pFeatures->getWordPos();
+ if (wordPos != lastWordPos) {
+ lastWordPos = wordPos;
+ features._elements.back().incNumOccs();
+ features._wordPositions.push_back(*_pFeatures);
+ }
+ }
+ }
+ ++_dFeatures;
+ if (_dFeatures != _dFeaturesE)
+ _docId = _dFeatures->getDocId();
+}
+
+
+IndexBuilder::IndexBuilder(const Schema &schema)
+ : index::IndexBuilder(schema),
+ _currentField(NULL),
+ _curDocId(noDocId()),
+ _lowestOKDocId(1u),
+ _curWord(),
+ _inWord(false),
+ _lowestOKFieldId(0u),
+ _fields(),
+ _prefix(),
+ _docIdLimit(0u),
+ _numWordIds(0u),
+ _schema(schema)
+{
+ // TODO: Filter for text indexes
+ for (uint32_t i = 0, ie = schema.getNumIndexFields(); i < ie; ++i) {
+ const Schema::IndexField &iField = schema.getIndexField(i);
+ FieldHandle fh(schema, i, this);
+ // Only know how to handle string index for now.
+ if (iField.getDataType() == Schema::STRING)
+ fh.setValid();
+ _fields.push_back(fh);
+ }
+}
+
+
+IndexBuilder::~IndexBuilder(void)
+{
+}
+
+
+void
+IndexBuilder::startWord(const vespalib::stringref &word)
+{
+ assert(_currentField != nullptr);
+ assert(!_inWord);
+ // TODO: Check sort order
+ _curWord = word;
+ _inWord = true;
+ _currentField->startWord(word);
+}
+
+
+void
+IndexBuilder::endWord(void)
+{
+ assert(_inWord);
+ assert(_currentField != NULL);
+ _currentField->endWord();
+ _inWord = false;
+ _lowestOKDocId = 1u;
+}
+
+
+void
+IndexBuilder::startDocument(uint32_t docId)
+{
+ assert(_curDocId == noDocId());
+ assert(docId >= _lowestOKDocId);
+ assert(docId < _docIdLimit);
+ assert(_currentField != NULL);
+ _curDocId = docId;
+ assert(_curDocId != noDocId());
+ _currentField->startDocument(docId);
+}
+
+
+void
+IndexBuilder::endDocument(void)
+{
+ assert(_curDocId != noDocId());
+ assert(_currentField != NULL);
+ _currentField->endDocument();
+ _lowestOKDocId = _curDocId + 1;
+ _curDocId = noDocId();
+}
+
+
+void
+IndexBuilder::startField(uint32_t fieldId)
+{
+ assert(_curDocId == noDocId());
+ assert(_currentField == NULL);
+ assert(fieldId < _fields.size());
+ assert(fieldId >= _lowestOKFieldId);
+ _currentField = &_fields[fieldId];
+ assert(_currentField != NULL);
+}
+
+
+void
+IndexBuilder::endField(void)
+{
+ assert(_curDocId == noDocId());
+ assert(!_inWord);
+ assert(_currentField != NULL);
+ _lowestOKFieldId = _currentField->_fieldId + 1;
+ _currentField = NULL;
+}
+
+
+void
+IndexBuilder::startElement(uint32_t elementId,
+ int32_t weight,
+ uint32_t elementLen)
+{
+ assert(_currentField != NULL);
+ _currentField->startElement(elementId, weight, elementLen);
+}
+
+
+void
+IndexBuilder::endElement(void)
+{
+ assert(_currentField != NULL);
+ _currentField->endElement();
+}
+
+
+void
+IndexBuilder::addOcc(const WordDocElementWordPosFeatures &features)
+{
+ assert(_currentField != NULL);
+ _currentField->addOcc(features);
+}
+
+
+void
+IndexBuilder::setPrefix(const vespalib::stringref &prefix)
+{
+ _prefix = prefix;
+}
+
+
+vespalib::string
+IndexBuilder::appendToPrefix(const vespalib::stringref &name)
+{
+ if (_prefix.empty())
+ return name;
+ return _prefix + "/" + name;
+}
+
+
+void
+IndexBuilder::open(uint32_t docIdLimit, uint64_t numWordIds,
+ const TuneFileIndexing &tuneFileIndexing,
+ const FileHeaderContext &fileHeaderContext)
+{
+ std::vector<uint32_t> indexes;
+
+ _docIdLimit = docIdLimit;
+ _numWordIds = numWordIds;
+ if (!_prefix.empty()) {
+ vespalib::mkdir(_prefix, false);
+ }
+ // TODO: Filter for text indexes
+ for (FieldHandle & fh : _fields) {
+ if (!fh.getValid())
+ continue;
+ vespalib::mkdir(fh.getDir(), false);
+ fh.open(docIdLimit, numWordIds, tuneFileIndexing._write,
+ fileHeaderContext);
+ indexes.push_back(fh.getIndexId());
+ }
+ vespalib::string schemaFile = appendToPrefix("schema.txt");
+ if (!_schema.saveToFile(schemaFile)) {
+ LOG(error, "Cannot save schema to \"%s\"", schemaFile.c_str());
+ abort();
+ }
+}
+
+
+void
+IndexBuilder::close(void)
+{
+ // TODO: Filter for text indexes
+ for (FieldHandle & fh : _fields) {
+ if (fh.getValid()) {
+ fh.close();
+ }
+ }
+ if (!docsummary::DocumentSummary::writeDocIdLimit(_prefix, _docIdLimit)) {
+ LOG(error, "Could not write docsum count in dir %s: %s",
+ _prefix.c_str(), getLastErrorString().c_str());
+ abort();
+ }
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h
new file mode 100644
index 00000000000..cf8735154f8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h
@@ -0,0 +1,124 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/index/indexbuilder.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include <map>
+#include <limits>
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+namespace diskindex
+{
+
+class BitVectorCandidate;
+
+class IndexBuilder : public index::IndexBuilder
+{
+public:
+ class FieldHandle;
+
+ typedef index::Schema Schema;
+private:
+ // Text fields
+ FieldHandle *_currentField;
+ uint32_t _curDocId;
+ uint32_t _lowestOKDocId;
+ vespalib::string _curWord;
+ bool _inWord;
+ uint32_t _lowestOKFieldId;
+ std::vector<FieldHandle> _fields; // Defined fields.
+ vespalib::string _prefix;
+ uint32_t _docIdLimit;
+ uint64_t _numWordIds;
+
+ const Schema &_schema; // Ptr to allow being std::vector member
+
+ static uint32_t
+ noDocId(void)
+ {
+ return std::numeric_limits<uint32_t>::max();
+ }
+
+ static uint64_t
+ noWordNumHigh(void)
+ {
+ return std::numeric_limits<uint64_t>::max();
+ }
+
+public:
+ typedef index::WordDocElementWordPosFeatures
+ WordDocElementWordPosFeatures;
+
+ // schema argument must live until indexbuilder has been deleted.
+ IndexBuilder(const Schema &schema);
+
+ virtual
+ ~IndexBuilder(void);
+
+ virtual void
+ startWord(const vespalib::stringref &word) override;
+
+ virtual void
+ endWord(void) override;
+
+ virtual void
+ startDocument(uint32_t docId) override;
+
+ virtual void
+ endDocument(void) override;
+
+ virtual void
+ startField(uint32_t fieldId) override;
+
+ virtual void
+ endField(void) override;
+
+ virtual void
+ startElement(uint32_t elementId, int32_t weight, uint32_t elementLen)
+ override;
+
+ virtual void
+ endElement(void) override;
+
+ virtual void
+ addOcc(const WordDocElementWordPosFeatures &features) override;
+
+ // TODO: methods for attribute vectors.
+
+ // TODO: methods for document summary.
+ inline FieldHandle &
+ getIndexFieldHandle(uint32_t fieldId);
+
+ void
+ setPrefix(const vespalib::stringref &prefix);
+
+ vespalib::string
+ appendToPrefix(const vespalib::stringref &name);
+
+ void
+ open(uint32_t docIdLimit, uint64_t numWordIds,
+ const TuneFileIndexing &tuneFileIndexing,
+ const search::common::FileHeaderContext &fileHandleContext);
+
+ void
+ close(void);
+};
+
+
+
+} // namespace diskindex
+
+} // namespace search
+
+
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp
new file mode 100644
index 00000000000..495654fa5cc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp
@@ -0,0 +1,738 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".diskindex.pagedict4file");
+#include "pagedict4file.h"
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/searchlib/common/fileheadercontext.h>
+#include <vespa/vespalib/objects/nbostream.h>
+
+namespace
+{
+
+vespalib::string myPId("PageDict4P.1");
+vespalib::string mySPId("PageDict4SP.1");
+vespalib::string mySSId("PageDict4SS.1");
+vespalib::string emptyId;
+
+}
+
+using search::common::FileHeaderContext;
+using search::index::PostingListParams;
+using vespalib::getLastErrorString;
+
+namespace search
+{
+
+namespace diskindex
+{
+
+namespace
+{
+
+const uint32_t headerAlign = 4096;
+
+}
+
+PageDict4FileSeqRead::PageDict4FileSeqRead(void)
+ : _pReader(NULL),
+ _ssReader(NULL),
+ _ssd(),
+ _ssReadContext(_ssd),
+ _ssfile(),
+ _spd(),
+ _spReadContext(_spd),
+ _spfile(),
+ _pd(),
+ _pReadContext(_pd),
+ _pfile(),
+ _ssFileBitSize(0u),
+ _spFileBitSize(0u),
+ _pFileBitSize(0u),
+ _ssHeaderLen(0u),
+ _spHeaderLen(0u),
+ _pHeaderLen(0u),
+ _ssCompleted(false),
+ _spCompleted(false),
+ _pCompleted(false),
+ _wordNum(0u),
+ _checkPointData()
+{
+ _ssd.setReadContext(&_ssReadContext);
+ _spd.setReadContext(&_spReadContext);
+ _pd.setReadContext(&_pReadContext);
+}
+
+
+PageDict4FileSeqRead::~PageDict4FileSeqRead(void)
+{
+ delete _pReader;
+ delete _ssReader;
+}
+
+
+void
+PageDict4FileSeqRead::readSSHeader()
+{
+ DC &ssd = _ssd;
+
+ vespalib::FileHeader header;
+ uint32_t headerLen = ssd.readHeader(header, _ssfile.getSize());
+ assert(header.hasTag("frozen"));
+ assert(header.hasTag("fileBitSize"));
+ assert(header.hasTag("format.0"));
+ assert(!header.hasTag("format.1"));
+ assert(header.hasTag("numWordIds"));
+ assert(header.hasTag("avgBitsPerDoc"));
+ assert(header.hasTag("minChunkDocs"));
+ assert(header.hasTag("docIdLimit"));
+ assert(header.hasTag("endian"));
+ _ssCompleted = header.getTag("frozen").asInteger() != 0;
+ _ssFileBitSize = header.getTag("fileBitSize").asInteger();
+ assert(header.getTag("format.0").asString() == mySSId);
+ ssd._numWordIds = header.getTag("numWordIds").asInteger();
+ ssd._avgBitsPerDoc = header.getTag("avgBitsPerDoc").asInteger();
+ ssd._minChunkDocs = header.getTag("minChunkDocs").asInteger();
+ ssd._docIdLimit = header.getTag("docIdLimit").asInteger();
+
+ assert(header.getTag("endian").asString() == "big");
+ ssd.smallAlign(64);
+ uint32_t minHeaderLen = header.getSize();
+ minHeaderLen += (-minHeaderLen & 7);
+ assert(headerLen >= minHeaderLen);
+ assert(ssd.getReadOffset() == headerLen * 8);
+ _ssHeaderLen = headerLen;
+}
+
+
+void
+PageDict4FileSeqRead::readSPHeader(void)
+{
+ DC &spd = _spd;
+
+ vespalib::FileHeader header;
+ uint32_t headerLen = spd.readHeader(header, _spfile.getSize());
+ assert(header.hasTag("frozen"));
+ assert(header.hasTag("fileBitSize"));
+ assert(header.hasTag("format.0"));
+ assert(!header.hasTag("format.1"));
+ assert(header.hasTag("endian"));
+ _spCompleted = header.getTag("frozen").asInteger() != 0;
+ _spFileBitSize = header.getTag("fileBitSize").asInteger();
+ assert(header.getTag("format.0").asString() == mySPId);
+ assert(header.getTag("endian").asString() == "big");
+ spd.smallAlign(64);
+ uint32_t minHeaderLen = header.getSize();
+ minHeaderLen += (-minHeaderLen & 7);
+ assert(headerLen >= minHeaderLen);
+ assert(spd.getReadOffset() == headerLen * 8);
+ _spHeaderLen = headerLen;
+}
+
+
+void
+PageDict4FileSeqRead::readPHeader(void)
+{
+ DC &pd = _pd;
+
+ vespalib::FileHeader header;
+ uint32_t headerLen = pd.readHeader(header, _pfile.getSize());
+ assert(header.hasTag("frozen"));
+ assert(header.hasTag("fileBitSize"));
+ assert(header.hasTag("format.0"));
+ assert(!header.hasTag("format.1"));
+ assert(header.hasTag("endian"));
+ _pCompleted = header.getTag("frozen").asInteger() != 0;
+ _pFileBitSize = header.getTag("fileBitSize").asInteger();
+ assert(header.getTag("format.0").asString() == myPId);
+ assert(header.getTag("endian").asString() == "big");
+ pd.smallAlign(64);
+ uint32_t minHeaderLen = header.getSize();
+ minHeaderLen += (-minHeaderLen & 7);
+ assert(headerLen >= minHeaderLen);
+ assert(pd.getReadOffset() == headerLen * 8);
+ _pHeaderLen = headerLen;
+}
+
+
+void
+PageDict4FileSeqRead::readWord(vespalib::string &word,
+ uint64_t &wordNum,
+ PostingListCounts &counts)
+{
+ // Map to external ids and filter by what's present in the schema.
+ uint64_t checkWordNum = 0;
+ _pReader->readCounts(word, checkWordNum, counts);
+ if (checkWordNum != noWordNumHigh()) {
+ wordNum = ++_wordNum;
+ assert(wordNum == checkWordNum);
+ } else {
+ wordNum = noWordNumHigh();
+ counts.clear();
+ }
+}
+
+
+bool
+PageDict4FileSeqRead::open(const vespalib::string &name,
+ const TuneFileSeqRead &tuneFileRead)
+{
+ if (tuneFileRead.getWantDirectIO()) {
+ _ssfile.EnableDirectIO();
+ _spfile.EnableDirectIO();
+ _pfile.EnableDirectIO();
+ }
+
+ vespalib::string pname = name + ".pdat";
+ vespalib::string spname = name + ".spdat";
+ vespalib::string ssname = name + ".ssdat";
+
+ if (!_ssfile.OpenReadOnly(ssname.c_str())) {
+ LOG(error, "could not open %s: %s",
+ _ssfile.GetFileName(), getLastErrorString().c_str());
+ return false;
+ }
+ if (!_spfile.OpenReadOnly(spname.c_str())) {
+ LOG(error, "could not open %s: %s",
+ _spfile.GetFileName(), getLastErrorString().c_str());
+ return false;
+ }
+ if (!_pfile.OpenReadOnly(pname.c_str())) {
+ LOG(error, "could not open %s: %s",
+ _pfile.GetFileName(), getLastErrorString().c_str());
+ return false;
+ }
+
+ _spReadContext.setFile(&_spfile);
+ _spReadContext.setFileSize(_spfile.GetSize());
+ _spReadContext.allocComprBuf(65536u, 32768u);
+ _spd.emptyBuffer(0);
+
+ _pReadContext.setFile(&_pfile);
+ _pReadContext.setFileSize(_pfile.GetSize());
+ _pReadContext.allocComprBuf(65536u, 32768u);
+ _pd.emptyBuffer(0);
+
+ uint64_t fileSize = _ssfile.GetSize();
+ _ssReadContext.setFile(&_ssfile);
+ _ssReadContext.setFileSize(fileSize);
+ _ssReadContext.allocComprBuf((fileSize + sizeof(uint64_t) - 1) /
+ sizeof(uint64_t),
+ 32768u);
+ _ssd.emptyBuffer(0);
+
+ if (_checkPointData) {
+ _ssReadContext.setPosition(_ssReadContext.getCheckPointOffset());
+ if (_ssd._valI >= _ssd._valE)
+ _ssReadContext.readComprBuffer();
+ _spReadContext.setPosition(_spReadContext.getCheckPointOffset());
+ if (_spd._valI >= _spd._valE)
+ _spReadContext.readComprBuffer();
+ _pReadContext.setPosition(_pReadContext.getCheckPointOffset());
+ if (_pd._valI >= _pd._valE)
+ _pReadContext.readComprBuffer();
+ } else {
+ _ssReadContext.readComprBuffer();
+ assert(_ssReadContext.getBufferEndFilePos() >= fileSize);
+ readSSHeader();
+ _spReadContext.readComprBuffer();
+ readSPHeader();
+ _pReadContext.readComprBuffer();
+ readPHeader();
+ }
+
+
+ _ssReader = new SSReader(_ssReadContext,
+ _ssHeaderLen,
+ _ssFileBitSize,
+ _spHeaderLen,
+ _spFileBitSize,
+ _pHeaderLen,
+ _pFileBitSize);
+
+ // Instantiate helper class for reading
+ _pReader = new Reader(*_ssReader,
+ _spd,
+ _pd);
+
+ if (_checkPointData) {
+ _ssReader->checkPointRead(*_checkPointData);
+ _pReader->checkPointRead(*_checkPointData);
+ assert(_checkPointData->empty());
+ } else {
+ _ssReader->setup(_ssd);
+ _pReader->setup();
+ _wordNum = 0;
+ }
+
+ return true;
+}
+
+
+bool
+PageDict4FileSeqRead::close(void)
+{
+ delete _pReader;
+ delete _ssReader;
+ _pReader = NULL;
+ _ssReader = NULL;
+
+ _ssReadContext.dropComprBuf();
+ _spReadContext.dropComprBuf();
+ _pReadContext.dropComprBuf();
+ _ssReadContext.setFile(NULL);
+ _spReadContext.setFile(NULL);
+ _pReadContext.setFile(NULL);
+ _ssfile.Close();
+ _spfile.Close();
+ _pfile.Close();
+ return true;
+}
+
+
+void
+PageDict4FileSeqRead::checkPointWrite(vespalib::nbostream &out)
+{
+ _ssd.checkPointWrite(out);
+ _spReadContext.checkPointWrite(out);
+ _pReadContext.checkPointWrite(out);
+ vespalib::nbostream data;
+ _ssReader->checkPointWrite(data);
+ _pReader->checkPointWrite(data);
+ std::vector<char> checkPointData(data.size());
+ data.read(&checkPointData[0], data.size());
+ out << checkPointData;
+ out << _wordNum;
+ out << _ssCompleted << _ssFileBitSize << _ssHeaderLen;
+ out << _spCompleted << _spFileBitSize << _spHeaderLen;
+ out << _pCompleted << _pFileBitSize << _pHeaderLen;
+}
+
+void
+PageDict4FileSeqRead::checkPointRead(vespalib::nbostream &in)
+{
+ _ssd.checkPointRead(in);
+ _spReadContext.checkPointRead(in);
+ _pReadContext.checkPointRead(in);
+ std::vector<char> checkPointData;
+ in >> checkPointData;
+ _checkPointData.reset(new vespalib::nbostream(checkPointData.size()));
+ _checkPointData->write(&checkPointData[0], checkPointData.size());
+ in >> _wordNum;
+ in >> _ssCompleted >> _ssFileBitSize >> _ssHeaderLen;
+ in >> _spCompleted >> _spFileBitSize >> _spHeaderLen;
+ in >> _pCompleted >> _pFileBitSize >> _pHeaderLen;
+}
+
+void
+PageDict4FileSeqRead::getParams(PostingListParams &params)
+{
+ params.clear();
+ params.set("avgBitsPerDoc", _ssd._avgBitsPerDoc);
+ params.set("minChunkDocs", _ssd._minChunkDocs);
+ params.set("docIdLimit", _ssd._docIdLimit);
+ params.set("numWordIds", _ssd._numWordIds);
+ params.set("numCounts", _ssd._numWordIds);
+}
+
+
+PageDict4FileSeqWrite::PageDict4FileSeqWrite(void)
+ : _pWriter(NULL),
+ _spWriter(NULL),
+ _ssWriter(NULL),
+ _pe(),
+ _pWriteContext(_pe),
+ _pfile(),
+ _spe(),
+ _spWriteContext(_spe),
+ _spfile(),
+ _sse(),
+ _ssWriteContext(_sse),
+ _ssfile(),
+ _pHeaderLen(0),
+ _spHeaderLen(0),
+ _ssHeaderLen(0)
+{
+ _pe.setWriteContext(&_pWriteContext);
+ _spe.setWriteContext(&_spWriteContext);
+ _sse.setWriteContext(&_ssWriteContext);
+}
+
+
+PageDict4FileSeqWrite::~PageDict4FileSeqWrite(void)
+{
+ delete _pWriter;
+ delete _spWriter;
+ delete _ssWriter;
+}
+
+
+void
+PageDict4FileSeqWrite::writeWord(const vespalib::stringref &word,
+ const PostingListCounts &counts)
+{
+ _pWriter->addCounts(word, counts);
+}
+
+
+bool
+PageDict4FileSeqWrite::open(const vespalib::string &name,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const FileHeaderContext &fileHeaderContext)
+{
+ assert(_pWriter == NULL);
+ assert(_spWriter == NULL);
+ assert(_ssWriter == NULL);
+
+ vespalib::string pname = name + ".pdat";
+ vespalib::string spname = name + ".spdat";
+ vespalib::string ssname = name + ".ssdat";
+
+ if (tuneFileWrite.getWantSyncWrites()) {
+ _pfile.EnableSyncWrites();
+ _spfile.EnableSyncWrites();
+ _ssfile.EnableSyncWrites();
+ }
+ if (tuneFileWrite.getWantDirectIO()) {
+ _pfile.EnableDirectIO();
+ _spfile.EnableDirectIO();
+ _ssfile.EnableDirectIO();
+ }
+ bool ok = _pfile.OpenWriteOnly(pname.c_str());
+ assert(ok);
+ (void) ok;
+ _pWriteContext.setFile(&_pfile);
+
+ ok = _spfile.OpenWriteOnly(spname.c_str());
+ assert(ok);
+ _spWriteContext.setFile(&_spfile);
+
+ ok = _ssfile.OpenWriteOnly(ssname.c_str());
+ assert(ok);
+ _ssWriteContext.setFile(&_ssfile);
+
+ if (!_checkPointData) {
+ _pWriteContext.allocComprBuf(65536u, 32768u);
+ _spWriteContext.allocComprBuf(65536u, 32768u);
+ _ssWriteContext.allocComprBuf(65536u, 32768u);
+ }
+
+ uint64_t pFileSize = _pfile.GetSize();
+ uint64_t spFileSize = _spfile.GetSize();
+ uint64_t ssFileSize = _ssfile.GetSize();
+ uint64_t pBufferStartFilePos = _pWriteContext.getBufferStartFilePos();
+ uint64_t spBufferStartFilePos = _spWriteContext.getBufferStartFilePos();
+ uint64_t ssBufferStartFilePos = _ssWriteContext.getBufferStartFilePos();
+ assert(pFileSize >= pBufferStartFilePos);
+ assert(spFileSize >= spBufferStartFilePos);
+ assert(ssFileSize >= ssBufferStartFilePos);
+ _pfile.SetSize(pBufferStartFilePos);
+ _spfile.SetSize(spBufferStartFilePos);
+ _ssfile.SetSize(ssBufferStartFilePos);
+ assert(pBufferStartFilePos == static_cast<uint64_t>(_pfile.GetPosition()));
+ assert(spBufferStartFilePos ==
+ static_cast<uint64_t>(_spfile.GetPosition()));
+ assert(ssBufferStartFilePos ==
+ static_cast<uint64_t>(_ssfile.GetPosition()));
+
+ if (!_checkPointData) {
+ _pe.setupWrite(_pWriteContext);
+ _spe.setupWrite(_spWriteContext);
+ _sse.setupWrite(_ssWriteContext);
+ assert(_pe.getWriteOffset() == 0);
+ assert(_spe.getWriteOffset() == 0);
+ assert(_sse.getWriteOffset() == 0);
+ _spe.copyParams(_sse);
+ _pe.copyParams(_sse);
+ // Write initial file headers
+ makePHeader(fileHeaderContext);
+ makeSPHeader(fileHeaderContext);
+ makeSSHeader(fileHeaderContext);
+ }
+
+ _ssWriter = new SSWriter(_sse);
+ _spWriter = new SPWriter(*_ssWriter, _spe);
+ _pWriter = new PWriter(*_spWriter, _pe);
+ if (_checkPointData) {
+ _ssWriter->checkPointRead(*_checkPointData);
+ _spWriter->checkPointRead(*_checkPointData);
+ _pWriter->checkPointRead(*_checkPointData);
+ assert(_checkPointData->empty());
+ } else {
+ _spWriter->setup();
+ _pWriter->setup();
+ }
+
+ return true;
+}
+
+
+bool
+PageDict4FileSeqWrite::close(void)
+{
+ _pWriter->flush();
+ uint64_t usedPBits = _pe.getWriteOffset();
+ uint64_t usedSPBits = _spe.getWriteOffset();
+ uint64_t usedSSBits = _sse.getWriteOffset();
+ _pe.flush();
+ _pWriteContext.writeComprBuffer(true);
+ _spe.flush();
+ _spWriteContext.writeComprBuffer(true);
+ _sse.flush();
+ _ssWriteContext.writeComprBuffer(true);
+
+ _pWriteContext.dropComprBuf();
+ _pfile.Sync();
+ _pfile.Close();
+ _pWriteContext.setFile(NULL);
+ _spWriteContext.dropComprBuf();
+ _spfile.Sync();
+ _spfile.Close();
+ _spWriteContext.setFile(NULL);
+ _ssWriteContext.dropComprBuf();
+ _ssfile.Sync();
+ _ssfile.Close();
+ _ssWriteContext.setFile(NULL);
+
+ // Update file headers
+ updatePHeader(usedPBits);
+ updateSPHeader(usedSPBits);
+ updateSSHeader(usedSSBits);
+
+ delete _pWriter;
+ delete _spWriter;
+ delete _ssWriter;
+ _pWriter = NULL;
+ _spWriter = NULL;
+ _ssWriter = NULL;
+
+ return true;
+}
+
+
+void
+PageDict4FileSeqWrite::writeSSSubHeader(vespalib::GenericHeader &header)
+{
+ SSEC &e = _sse;
+ typedef vespalib::GenericHeader::Tag Tag;
+ header.putTag(Tag("numWordIds", e._numWordIds));
+ header.putTag(Tag("avgBitsPerDoc", e._avgBitsPerDoc));
+ header.putTag(Tag("minChunkDocs", e._minChunkDocs));
+ header.putTag(Tag("docIdLimit", e._docIdLimit));
+}
+
+
+void
+PageDict4FileSeqWrite::makePHeader(const FileHeaderContext &fileHeaderContext)
+{
+ PEC &e = _pe;
+ ComprFileWriteContext &wc = _pWriteContext;
+
+ // subheader only written to SS file.
+
+ typedef vespalib::GenericHeader::Tag Tag;
+ vespalib::FileHeader header(headerAlign);
+
+ fileHeaderContext.addTags(header, _pfile.GetFileName());
+ header.putTag(Tag("frozen", 0));
+ header.putTag(Tag("fileBitSize", 0));
+ header.putTag(Tag("format.0", myPId));
+ header.putTag(Tag("endian", "big"));
+ header.putTag(Tag("desc", "Dictionary page file"));
+ e.setupWrite(wc);
+ e.writeHeader(header);
+ e.smallAlign(64);
+ e.flush();
+ uint32_t headerLen = header.getSize();
+ headerLen += (-headerLen & 7);
+ assert(e.getWriteOffset() == headerLen * 8);
+ assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned
+ if (_pHeaderLen != 0) {
+ assert(_pHeaderLen == headerLen);
+ }
+ _pHeaderLen = headerLen;
+}
+
+
+void
+PageDict4FileSeqWrite::makeSPHeader(const FileHeaderContext &fileHeaderContext)
+{
+ SPEC &e = _spe;
+ ComprFileWriteContext &wc = _spWriteContext;
+
+ // subheader only written to SS file.
+
+ typedef vespalib::GenericHeader::Tag Tag;
+ vespalib::FileHeader header(headerAlign);
+
+ fileHeaderContext.addTags(header, _spfile.GetFileName());
+ header.putTag(Tag("frozen", 0));
+ header.putTag(Tag("fileBitSize", 0));
+ header.putTag(Tag("format.0", mySPId));
+ header.putTag(Tag("endian", "big"));
+ header.putTag(Tag("desc", "Dictionary sparse page file"));
+ e.setupWrite(wc);
+ e.writeHeader(header);
+ e.smallAlign(64);
+ e.flush();
+ uint32_t headerLen = header.getSize();
+ headerLen += (-headerLen & 7);
+ assert(e.getWriteOffset() == headerLen * 8);
+ assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned
+ if (_spHeaderLen != 0) {
+ assert(_spHeaderLen == headerLen);
+ }
+ _spHeaderLen = headerLen;
+}
+
+
+void
+PageDict4FileSeqWrite::makeSSHeader(const FileHeaderContext &fileHeaderContext)
+{
+ SSEC &e = _sse;
+ ComprFileWriteContext &wc = _ssWriteContext;
+
+ typedef vespalib::GenericHeader::Tag Tag;
+ vespalib::FileHeader header(headerAlign);
+
+ fileHeaderContext.addTags(header, _ssfile.GetFileName());
+ header.putTag(Tag("frozen", 0));
+ header.putTag(Tag("fileBitSize", 0));
+ header.putTag(Tag("format.0", mySSId));
+ header.putTag(Tag("endian", "big"));
+ header.putTag(Tag("desc", "Dictionary sparse sparse file"));
+ writeSSSubHeader(header);
+
+ e.setupWrite(wc);
+ e.writeHeader(header);
+ e.smallAlign(64);
+ e.flush();
+ uint32_t headerLen = header.getSize();
+ headerLen += (-headerLen & 7);
+ assert(e.getWriteOffset() == headerLen * 8);
+ assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned
+ if (_ssHeaderLen != 0) {
+ assert(_ssHeaderLen == headerLen);
+ }
+ _ssHeaderLen = headerLen;
+}
+
+
+void
+PageDict4FileSeqWrite::updatePHeader(uint64_t fileBitSize)
+{
+ vespalib::FileHeader h(headerAlign);
+ FastOS_File f;
+ f.OpenReadWrite(_pfile.GetFileName());
+ h.readFile(f);
+ FileHeaderContext::setFreezeTime(h);
+ typedef vespalib::GenericHeader::Tag Tag;
+ h.putTag(Tag("frozen", 1));
+ h.putTag(Tag("fileBitSize", fileBitSize));
+ h.rewriteFile(f);
+ f.Sync();
+ f.Close();
+}
+
+
+void
+PageDict4FileSeqWrite::updateSPHeader(uint64_t fileBitSize)
+{
+ vespalib::FileHeader h(headerAlign);
+ FastOS_File f;
+ f.OpenReadWrite(_spfile.GetFileName());
+ h.readFile(f);
+ FileHeaderContext::setFreezeTime(h);
+ typedef vespalib::GenericHeader::Tag Tag;
+ h.putTag(Tag("frozen", 1));
+ h.putTag(Tag("fileBitSize", fileBitSize));
+ h.rewriteFile(f);
+ f.Sync();
+ f.Close();
+}
+
+
+void
+PageDict4FileSeqWrite::updateSSHeader(uint64_t fileBitSize)
+{
+ vespalib::FileHeader h(headerAlign);
+ FastOS_File f;
+ f.OpenReadWrite(_ssfile.GetFileName());
+ h.readFile(f);
+ FileHeaderContext::setFreezeTime(h);
+ typedef vespalib::GenericHeader::Tag Tag;
+ h.putTag(Tag("frozen", 1));
+ h.putTag(Tag("fileBitSize", fileBitSize));
+ uint64_t wordNum = _pWriter->getWordNum();
+ assert(wordNum <= _sse._numWordIds);
+ h.putTag(Tag("numWordIds", wordNum));
+ h.rewriteFile(f);
+ f.Sync();
+ f.Close();
+}
+
+
+void
+PageDict4FileSeqWrite::checkPointWrite(vespalib::nbostream &out)
+{
+ _ssWriteContext.writeComprBuffer(true);
+ _spWriteContext.writeComprBuffer(true);
+ _pWriteContext.writeComprBuffer(true);
+ _ssWriteContext.checkPointWrite(out);
+ _spWriteContext.checkPointWrite(out);
+ _pWriteContext.checkPointWrite(out);
+ vespalib::nbostream data;
+ _ssWriter->checkPointWrite(data);
+ _spWriter->checkPointWrite(data);
+ _pWriter->checkPointWrite(data);
+ std::vector<char> checkPointData(data.size());
+ data.read(&checkPointData[0], data.size());
+ out << checkPointData;
+ out << _ssHeaderLen << _spHeaderLen << _pHeaderLen;
+ _ssfile.Sync();
+ _spfile.Sync();
+ _pfile.Sync();
+}
+
+
+void
+PageDict4FileSeqWrite::checkPointRead(vespalib::nbostream &in)
+{
+ _ssWriteContext.checkPointRead(in);
+ _spWriteContext.checkPointRead(in);
+ _pWriteContext.checkPointRead(in);
+ std::vector<char> checkPointData;
+ in >> checkPointData;
+ _checkPointData.reset(new vespalib::nbostream(checkPointData.size()));
+ _checkPointData->write(&checkPointData[0], checkPointData.size());
+ in >> _ssHeaderLen >> _spHeaderLen >> _pHeaderLen;
+}
+
+
+void
+PageDict4FileSeqWrite::setParams(const PostingListParams &params)
+{
+ params.get("avgBitsPerDoc", _sse._avgBitsPerDoc);
+ params.get("minChunkDocs", _sse._minChunkDocs);
+ params.get("docIdLimit", _sse._docIdLimit);
+ params.get("numWordIds", _sse._numWordIds);
+ _spe.copyParams(_sse);
+ _pe.copyParams(_sse);
+}
+
+
+void
+PageDict4FileSeqWrite::getParams(PostingListParams &params)
+{
+ params.clear();
+ params.set("avgBitsPerDoc", _sse._avgBitsPerDoc);
+ params.set("minChunkDocs", _sse._minChunkDocs);
+ params.set("docIdLimit", _sse._docIdLimit);
+ params.set("numWordIds", _sse._numWordIds);
+}
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h
new file mode 100644
index 00000000000..bc080fc58c3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h
@@ -0,0 +1,239 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/index/dictionaryfile.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/countcompression.h>
+#include <vespa/searchlib/bitcompression/pagedict4.h>
+
+namespace vespalib
+{
+
+class GenericHeader;
+
+}
+
+namespace search
+{
+
+namespace diskindex
+{
+
+/**
+ * Dictionary file containing words and counts for words.
+ */
+class PageDict4FileSeqRead : public index::DictionaryFileSeqRead
+{
+ typedef bitcompression::PostingListCountFileDecodeContext DC;
+ typedef bitcompression::PageDict4SSReader SSReader;
+ typedef bitcompression::PageDict4Reader Reader;
+
+ typedef index::PostingListCounts PostingListCounts;
+
+ Reader *_pReader;
+ SSReader *_ssReader;
+
+ DC _ssd;
+ ComprFileReadContext _ssReadContext;
+ FastOS_File _ssfile;
+
+ DC _spd;
+ ComprFileReadContext _spReadContext;
+ FastOS_File _spfile;
+
+ DC _pd;
+ ComprFileReadContext _pReadContext;
+ FastOS_File _pfile;
+
+ uint64_t _ssFileBitSize;
+ uint64_t _spFileBitSize;
+ uint64_t _pFileBitSize;
+ uint32_t _ssHeaderLen;
+ uint32_t _spHeaderLen;
+ uint32_t _pHeaderLen;
+
+ bool _ssCompleted;
+ bool _spCompleted;
+ bool _pCompleted;
+
+ uint64_t _wordNum;
+
+ std::unique_ptr<vespalib::nbostream> _checkPointData;
+
+ void
+ readSSHeader();
+
+ void
+ readSPHeader(void);
+
+ void
+ readPHeader(void);
+
+public:
+
+ PageDict4FileSeqRead(void);
+
+ virtual
+ ~PageDict4FileSeqRead(void);
+
+ /**
+ * Read word and counts. Only nonzero counts are returned. If at
+ * end of dictionary then noWordNumHigh() is returned as word number.
+ */
+ virtual void
+ readWord(vespalib::string &word,
+ uint64_t &wordNum,
+ PostingListCounts &counts) override;
+
+ virtual bool open(const vespalib::string &name,
+ const TuneFileSeqRead &tuneFileRead) override;
+
+ /**
+ * Close dictionary file.
+ */
+ virtual bool close(void) override;
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt. Implies
+ * flush from memory to disk, and possibly also sync to permanent
+ * storage media.
+ */
+ virtual void
+ checkPointWrite(vespalib::nbostream &out) override;
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ */
+ virtual void
+ checkPointRead(vespalib::nbostream &in) override;
+
+ /*
+ * Get current parameters.
+ */
+ virtual void
+ getParams(index::PostingListParams &params) override;
+};
+
+/**
+ * Interface for dictionary file containing words and count for words.
+ */
+class PageDict4FileSeqWrite : public index::DictionaryFileSeqWrite
+{
+ typedef bitcompression::PostingListCountFileEncodeContext EC;
+ typedef EC SPEC;
+ typedef EC PEC;
+ typedef EC SSEC;
+ typedef bitcompression::PageDict4SSWriter SSWriter;
+ typedef bitcompression::PageDict4SPWriter SPWriter;
+ typedef bitcompression::PageDict4PWriter PWriter;
+
+ typedef index::PostingListCounts PostingListCounts;
+
+ PWriter *_pWriter;
+ SPWriter *_spWriter;
+ SSWriter *_ssWriter;
+
+ EC _pe;
+ ComprFileWriteContext _pWriteContext;
+ FastOS_File _pfile;
+
+ EC _spe;
+ ComprFileWriteContext _spWriteContext;
+ FastOS_File _spfile;
+
+ EC _sse;
+ ComprFileWriteContext _ssWriteContext;
+ FastOS_File _ssfile;
+
+ uint32_t _pHeaderLen; // Length of header for page file (bytes)
+ uint32_t _spHeaderLen; // Length of header for sparse page file (bytes)
+ uint32_t _ssHeaderLen; // Length of header for sparse sparse file (bytes)
+
+ std::unique_ptr<vespalib::nbostream> _checkPointData;
+
+ void
+ writeIndexNames(vespalib::GenericHeader &header);
+
+ void
+ writeSSSubHeader(vespalib::GenericHeader &header);
+
+ void
+ makePHeader(const search::common::FileHeaderContext &fileHeaderContext);
+
+ void
+ makeSPHeader(const search::common::FileHeaderContext &fileHeaderContext);
+
+ void
+ makeSSHeader(const search::common::FileHeaderContext &fileHeaderContext);
+
+ void
+ updatePHeader(uint64_t fileBitSize);
+
+ void
+ updateSPHeader(uint64_t fileBitSize);
+
+ void
+ updateSSHeader(uint64_t fileBitSize);
+
+public:
+ PageDict4FileSeqWrite(void);
+
+ virtual
+ ~PageDict4FileSeqWrite(void);
+
+ /**
+ * Write word and counts. Only nonzero counts should be supplied.
+ */
+ virtual void
+ writeWord(const vespalib::stringref &word,
+ const PostingListCounts &counts) override;
+
+ /**
+ * Open dictionary file for sequential write. The index with most
+ * words should be first for optimal compression.
+ */
+ virtual bool
+ open(const vespalib::string &name,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const search::common::FileHeaderContext &fileHeaderContext) override;
+
+ /**
+ * Close dictionary file.
+ */
+ virtual bool
+ close(void) override;
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt. Implies
+ * flush from memory to disk, and possibly also sync to permanent
+ * storage media.
+ */
+ virtual void
+ checkPointWrite(vespalib::nbostream &out) override;
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ */
+ virtual void
+ checkPointRead(vespalib::nbostream &in) override;
+
+ /*
+ * Set parameters.
+ */
+ virtual void
+ setParams(const index::PostingListParams &params) override;
+
+ /*
+ * Get current parameters.
+ */
+ virtual void
+ getParams(index::PostingListParams &params) override;
+};
+
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp
new file mode 100644
index 00000000000..f1261c83a51
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp
@@ -0,0 +1,300 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".diskindex.pagedict4randread");
+#include <vespa/searchlib/util/filekit.h>
+#include "pagedict4randread.h"
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/data/fileheader.h>
+
+
+namespace
+{
+
+vespalib::string myPId("PageDict4P.1");
+vespalib::string mySPId("PageDict4SP.1");
+vespalib::string mySSId("PageDict4SS.1");
+vespalib::string emptyId;
+vespalib::string emptyStr;
+
+}
+
+using vespalib::getLastErrorString;
+
+namespace search
+{
+
+namespace diskindex
+{
+
+
+PageDict4RandRead::PageDict4RandRead(void)
+ : DictionaryFileRandRead(),
+ _ssReader(NULL),
+ _ssd(),
+ _ssReadContext(_ssd),
+ _ssfile(),
+ _spfile(),
+ _pfile(),
+ _ssFileBitSize(0u),
+ _spFileBitSize(0u),
+ _pFileBitSize(0u),
+ _ssHeaderLen(0u),
+ _spHeaderLen(0u),
+ _pHeaderLen(0u)
+{
+ _ssd.setReadContext(&_ssReadContext);
+}
+
+
+PageDict4RandRead::~PageDict4RandRead(void)
+{
+ delete _ssReader;
+}
+
+
+void
+PageDict4RandRead::readSSHeader()
+{
+ DC &ssd = _ssd;
+
+ vespalib::FileHeader header;
+ uint32_t headerLen = ssd.readHeader(header, _ssfile.getSize());
+ assert(header.hasTag("frozen"));
+ assert(header.hasTag("fileBitSize"));
+ assert(header.hasTag("format.0"));
+ assert(!header.hasTag("format.1"));
+ assert(header.hasTag("numWordIds"));
+ assert(header.hasTag("avgBitsPerDoc"));
+ assert(header.hasTag("minChunkDocs"));
+ assert(header.hasTag("docIdLimit"));
+ assert(header.hasTag("endian"));
+ assert(header.getTag("frozen").asInteger() != 0);
+ _ssFileBitSize = header.getTag("fileBitSize").asInteger();
+ assert(header.getTag("format.0").asString() == mySSId);
+ ssd._numWordIds = header.getTag("numWordIds").asInteger();
+ ssd._avgBitsPerDoc = header.getTag("avgBitsPerDoc").asInteger();
+ ssd._minChunkDocs = header.getTag("minChunkDocs").asInteger();
+ ssd._docIdLimit = header.getTag("docIdLimit").asInteger();
+
+ assert(header.getTag("endian").asString() == "big");
+ ssd.smallAlign(64);
+ uint32_t minHeaderLen = header.getSize();
+ minHeaderLen += (-minHeaderLen & 7);
+ assert(headerLen >= minHeaderLen);
+ assert(ssd.getReadOffset() == headerLen * 8);
+ _ssHeaderLen = headerLen;
+}
+
+
+void
+PageDict4RandRead::readSPHeader(void)
+{
+ DC d;
+ ComprFileReadContext rc(d);
+
+ d.setReadContext(&rc);
+ rc.setFile(&_spfile);
+ rc.setFileSize(_spfile.GetSize());
+ rc.allocComprBuf(512, 32768u);
+ d.emptyBuffer(0);
+ rc.readComprBuffer();
+
+ vespalib::FileHeader header;
+ uint32_t headerLen = d.readHeader(header, _spfile.getSize());
+ assert(header.hasTag("frozen"));
+ assert(header.hasTag("fileBitSize"));
+ assert(header.hasTag("format.0"));
+ assert(!header.hasTag("format.1"));
+ assert(header.hasTag("endian"));
+ assert(header.getTag("frozen").asInteger() != 0);
+ _spFileBitSize = header.getTag("fileBitSize").asInteger();
+ assert(header.getTag("format.0").asString() == mySPId);
+ assert(header.getTag("endian").asString() == "big");
+ d.smallAlign(64);
+ uint32_t minHeaderLen = header.getSize();
+ minHeaderLen += (-minHeaderLen & 7);
+ assert(headerLen >= minHeaderLen);
+ assert(d.getReadOffset() == headerLen * 8);
+ _spHeaderLen = headerLen;
+}
+
+
+void
+PageDict4RandRead::readPHeader(void)
+{
+ DC d;
+ ComprFileReadContext rc(d);
+
+ d.setReadContext(&rc);
+ rc.setFile(&_pfile);
+ rc.setFileSize(_pfile.GetSize());
+ rc.allocComprBuf(512, 32768u);
+ d.emptyBuffer(0);
+ rc.readComprBuffer();
+
+ vespalib::FileHeader header;
+ uint32_t headerLen = d.readHeader(header, _pfile.getSize());
+ assert(header.hasTag("frozen"));
+ assert(header.hasTag("fileBitSize"));
+ assert(header.hasTag("format.0"));
+ assert(!header.hasTag("format.1"));
+ assert(header.hasTag("endian"));
+ assert(header.getTag("frozen").asInteger() != 0);
+ _pFileBitSize = header.getTag("fileBitSize").asInteger();
+ assert(header.getTag("format.0").asString() == myPId);
+ assert(header.getTag("endian").asString() == "big");
+ d.smallAlign(64);
+ uint32_t minHeaderLen = header.getSize();
+ minHeaderLen += (-minHeaderLen & 7);
+ assert(headerLen >= minHeaderLen);
+ assert(d.getReadOffset() == headerLen * 8);
+ _pHeaderLen = headerLen;
+}
+
+
+bool
+PageDict4RandRead::lookup(const vespalib::stringref &word,
+ uint64_t &wordNum,
+ PostingListOffsetAndCounts &offsetAndCounts)
+{
+ SSLookupRes ssRes(_ssReader->lookup(word));
+ if (!ssRes._res) {
+ offsetAndCounts._offset = ssRes._l6StartOffset._fileOffset;
+ offsetAndCounts._accNumDocs = ssRes._l6StartOffset._accNumDocs;
+ wordNum = ssRes._l6WordNum; // XXX ?
+ offsetAndCounts._counts.clear();
+ return false;
+ }
+
+ if (ssRes._overflow) {
+ offsetAndCounts._offset = ssRes._startOffset._fileOffset;
+ offsetAndCounts._accNumDocs = ssRes._startOffset._accNumDocs;
+ wordNum = ssRes._l6WordNum;
+ offsetAndCounts._counts = ssRes._counts;
+ return true;
+ } else {
+ SPLookupRes spRes;
+ size_t pageSize = PageDict4PageParams::getPageByteSize();
+ const char *spData = static_cast<const char *>
+ (_spfile.MemoryMapPtr(0));
+ spRes.lookup(*_ssReader,
+ spData + pageSize * ssRes._sparsePageNum,
+ word,
+ ssRes._l6Word,
+ ssRes._lastWord,
+ ssRes._l6StartOffset,
+ ssRes._l6WordNum,
+ ssRes._pageNum);
+
+ PLookupRes pRes;
+ const char *pData = static_cast<const char *>
+ (_pfile.MemoryMapPtr(0));
+ pRes.lookup(*_ssReader,
+ pData + pageSize * spRes._pageNum,
+ word,
+ spRes._l3Word,
+ spRes._lastWord,
+ spRes._l3StartOffset,
+ spRes._l3WordNum);
+ offsetAndCounts._offset = pRes._startOffset._fileOffset;
+ offsetAndCounts._accNumDocs = pRes._startOffset._accNumDocs;
+ wordNum = pRes._wordNum;
+ if (!pRes._res) {
+ offsetAndCounts._counts.clear();
+ return false;
+ }
+ offsetAndCounts._counts = pRes._counts;
+ return true;
+ }
+}
+
+
+bool
+PageDict4RandRead::open(const vespalib::string &name,
+ const TuneFileRandRead &tuneFileRead)
+{
+ vespalib::string pname = name + ".pdat";
+ vespalib::string spname = name + ".spdat";
+ vespalib::string ssname = name + ".ssdat";
+
+ if (tuneFileRead.getWantMemoryMap() || true) {
+ int mmapFlags(tuneFileRead.getMemoryMapFlags());
+ _ssfile.enableMemoryMap(mmapFlags);
+ _spfile.enableMemoryMap(mmapFlags);
+ _pfile.enableMemoryMap(mmapFlags);
+ } else if (tuneFileRead.getWantDirectIO()) {
+ _ssfile.EnableDirectIO();
+ _spfile.EnableDirectIO();
+ _pfile.EnableDirectIO();
+ }
+
+ if (!_ssfile.OpenReadOnly(ssname.c_str())) {
+ LOG(error, "could not open %s: %s",
+ _ssfile.GetFileName(), getLastErrorString().c_str());
+ return false;
+ }
+ if (!_spfile.OpenReadOnly(spname.c_str())) {
+ LOG(error, "could not open %s: %s",
+ _spfile.GetFileName(), getLastErrorString().c_str());
+ return false;
+ }
+ if (!_pfile.OpenReadOnly(pname.c_str())) {
+ LOG(error, "could not open %s: %s",
+ _pfile.GetFileName(), getLastErrorString().c_str());
+ return false;
+ }
+
+ uint64_t fileSize = _ssfile.GetSize();
+ _ssReadContext.setFile(&_ssfile);
+ _ssReadContext.setFileSize(fileSize);
+ _ssReadContext.allocComprBuf((fileSize + sizeof(uint64_t) - 1) /
+ sizeof(uint64_t),
+ 32768u);
+ _ssd.emptyBuffer(0);
+ _ssReadContext.readComprBuffer();
+ assert(_ssReadContext.getBufferEndFilePos() >= fileSize);
+
+ readSSHeader();
+ readSPHeader();
+ readPHeader();
+
+ _ssReader = new SSReader(_ssReadContext,
+ _ssHeaderLen,
+ _ssFileBitSize,
+ _spHeaderLen,
+ _spFileBitSize,
+ _pHeaderLen,
+ _pFileBitSize);
+ _ssReader->setup(_ssd);
+
+ return true;
+}
+
+
+bool
+PageDict4RandRead::close(void)
+{
+ delete _ssReader;
+ _ssReader = NULL;
+
+ _ssReadContext.dropComprBuf();
+ _ssReadContext.setFile(NULL);
+ _ssfile.Close();
+ _spfile.Close();
+ _pfile.Close();
+ return true;
+}
+
+
+uint64_t
+PageDict4RandRead::getNumWordIds(void) const
+{
+ return _ssd._numWordIds;
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h
new file mode 100644
index 00000000000..fba9dfd483b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h
@@ -0,0 +1,85 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/index/dictionaryfile.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/countcompression.h>
+#include <vespa/searchlib/bitcompression/pagedict4.h>
+
+namespace search
+{
+
+namespace diskindex
+{
+
+class PageDict4RandRead : public index::DictionaryFileRandRead
+{
+ typedef bitcompression::PostingListCountFileDecodeContext DC;
+ typedef bitcompression::PageDict4SSReader SSReader;
+
+ typedef bitcompression::PageDict4SSLookupRes SSLookupRes;
+ typedef bitcompression::PageDict4SPLookupRes SPLookupRes;
+ typedef bitcompression::PageDict4PLookupRes PLookupRes;
+ typedef bitcompression::PageDict4PageParams PageDict4PageParams;
+
+ typedef index::PostingListCounts PostingListCounts;
+ typedef index::PostingListOffsetAndCounts PostingListOffsetAndCounts;
+
+ SSReader *_ssReader;
+
+ DC _ssd;
+ ComprFileReadContext _ssReadContext;
+ FastOS_File _ssfile;
+ FastOS_File _spfile;
+ FastOS_File _pfile;
+
+ uint64_t _ssFileBitSize;
+ uint64_t _spFileBitSize;
+ uint64_t _pFileBitSize;
+ uint32_t _ssHeaderLen;
+ uint32_t _spHeaderLen;
+ uint32_t _pHeaderLen;
+
+ void
+ readSSHeader();
+
+ void
+ readSPHeader(void);
+
+ void
+ readPHeader(void);
+
+public:
+ PageDict4RandRead(void);
+
+ virtual
+ ~PageDict4RandRead(void);
+
+ virtual bool
+ lookup(const vespalib::stringref &word,
+ uint64_t &wordNum,
+ PostingListOffsetAndCounts &offsetAndCounts);
+
+ /**
+ * Open dictionary file for random read.
+ */
+ virtual bool open(const vespalib::string &name,
+ const TuneFileRandRead &tuneFileRead);
+
+ /**
+ * Close dictionary file.
+ */
+ virtual bool close(void);
+
+ virtual uint64_t
+ getNumWordIds(void) const;
+};
+
+
+} // namespace diskindex
+
+} // namespace search
+
+
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/wordnummapper.cpp b/searchlib/src/vespa/searchlib/diskindex/wordnummapper.cpp
new file mode 100644
index 00000000000..3a66fc05685
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/wordnummapper.cpp
@@ -0,0 +1,110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".diskindex.wordnummapper");
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/fastlib/io/bufferedfile.h>
+#include "wordnummapper.h"
+
+namespace search
+{
+
+namespace diskindex
+{
+
+WordNumMapping::WordNumMapping(void)
+ : _old2newwords(),
+ _oldDictSize(0u)
+{
+}
+
+
+void
+WordNumMapping::readMappingFile(const vespalib::string &name,
+ const TuneFileSeqRead &tuneFileRead)
+{
+ // Open word mapping file
+ Fast_BufferedFile old2newwordfile(new FastOS_File);
+ if (tuneFileRead.getWantDirectIO())
+ old2newwordfile.EnableDirectIO();
+ // XXX no checking for success
+ old2newwordfile.ReadOpen(name.c_str());
+ int64_t tempfilesize = old2newwordfile.GetSize();
+ uint64_t tempfileentries = static_cast<uint64_t>(tempfilesize /
+ sizeof(uint64_t));
+ Array &map = _old2newwords;
+ map.resize(tempfileentries + 2);
+ _oldDictSize = tempfileentries;
+
+ old2newwordfile.Read(&map[1],
+ static_cast<size_t>(tempfilesize));
+ old2newwordfile.Close();
+ map[0] = noWordNum();
+ map[tempfileentries + 1] = noWordNumHigh();
+}
+
+
+void
+WordNumMapping::noMappingFile(void)
+{
+ Array &map = _old2newwords;
+ map.resize(2);
+ map[0] = noWordNum();
+ map[1] = noWordNumHigh();
+ _oldDictSize = 0;
+}
+
+
+void
+WordNumMapping::clear(void)
+{
+ Array &map = _old2newwords;
+ map.clear();
+ _oldDictSize = 0;
+}
+
+
+void
+WordNumMapping::setup(uint32_t numWordIds)
+{
+ _oldDictSize = numWordIds;
+}
+
+
+void
+WordNumMapper::sanityCheck(bool allowHoles)
+{
+ uint64_t dictSize = getMaxWordNum();
+ uint64_t mappedWordNum = map(0u);
+ assert(mappedWordNum == 0u);
+ for (uint64_t wordNum = 1; wordNum <= dictSize; ++wordNum) {
+ uint64_t prevMappedWordNum = mappedWordNum;
+ mappedWordNum = map(wordNum);
+ if (mappedWordNum == 0u && allowHoles)
+ continue; // In case some words are being removed
+ assert(mappedWordNum > prevMappedWordNum);
+ (void) prevMappedWordNum;
+ }
+}
+
+
+uint64_t
+WordNumMapping::getMaxMappedWordNum(void) const
+{
+ WordNumMapper mapper(*this);
+ return mapper.getMaxMappedWordNum();
+}
+
+
+void
+WordNumMapping::sanityCheck(bool allowHoles)
+{
+ WordNumMapper mapper(*this);
+ mapper.sanityCheck(allowHoles);
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/wordnummapper.h b/searchlib/src/vespa/searchlib/diskindex/wordnummapper.h
new file mode 100644
index 00000000000..a1a72757f22
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/wordnummapper.h
@@ -0,0 +1,137 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/util/array.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+
+namespace search
+{
+
+namespace diskindex
+{
+
+class WordNumMapper;
+
+class WordNumMapping
+{
+ typedef vespalib::Array<uint64_t, vespalib::DefaultAlloc> Array;
+
+ static uint64_t
+ noWordNumHigh(void)
+ {
+ return std::numeric_limits<uint64_t>::max();
+ }
+
+ static uint64_t
+ noWordNum(void)
+ {
+ return 0u;
+ }
+
+ Array _old2newwords;
+ uint64_t _oldDictSize;
+public:
+
+ WordNumMapping(void);
+
+ const uint64_t *
+ getOld2NewWordNums(void) const
+ {
+ return (_old2newwords.empty())
+ ? NULL
+ : &_old2newwords[0];
+ }
+
+ uint64_t
+ getOldDictSize(void) const
+ {
+ return _oldDictSize;
+ }
+
+ void
+ readMappingFile(const vespalib::string &name,
+ const TuneFileSeqRead &tuneFileRead);
+
+ void
+ noMappingFile(void);
+
+ void
+ clear(void);
+
+ void
+ setup(uint32_t numWordIds);
+
+ uint64_t
+ getMaxMappedWordNum(void) const;
+
+ void
+ sanityCheck(bool allowHoles);
+};
+
+
+class WordNumMapper
+{
+ static uint64_t
+ noWordNumHigh(void)
+ {
+ return std::numeric_limits<uint64_t>::max();
+ }
+
+ static uint64_t
+ noWordNum(void)
+ {
+ return 0u;
+ }
+
+ const uint64_t *_old2newwords;
+ uint64_t _oldDictSize;
+
+public:
+ WordNumMapper(void)
+ : _old2newwords(NULL),
+ _oldDictSize(0)
+ {
+ }
+
+ WordNumMapper(const WordNumMapping &mapping)
+ : _old2newwords(NULL),
+ _oldDictSize(0)
+ {
+ setup(mapping);
+ }
+
+ void
+ setup(const WordNumMapping &mapping)
+ {
+ _old2newwords = mapping.getOld2NewWordNums();
+ _oldDictSize = mapping.getOldDictSize();
+ }
+
+ uint64_t
+ map(uint32_t wordNum) const
+ {
+ return (_old2newwords != NULL)
+ ? _old2newwords[wordNum]
+ : wordNum;
+ }
+
+ uint64_t
+ getMaxWordNum(void) const
+ {
+ return _oldDictSize;
+ }
+
+ uint64_t
+ getMaxMappedWordNum(void) const
+ {
+ return map(_oldDictSize);
+ }
+
+ void
+ sanityCheck(bool allowHoles);
+};
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp
new file mode 100644
index 00000000000..440a61fcab2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp
@@ -0,0 +1,137 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "zcposocc.h"
+#include <vespa/searchlib/index/postinglistcounts.h>
+#include <vespa/searchlib/index/postinglistcountfile.h>
+#include <vespa/searchlib/index/postinglistfile.h>
+#include <vespa/searchlib/index/docidandfeatures.h>
+
+LOG_SETUP(".diskindex.zcposocc");
+
+namespace search
+{
+
+namespace diskindex
+{
+
+using search::bitcompression::PosOccFieldsParams;
+using search::bitcompression::EG2PosOccDecodeContext;
+using search::bitcompression::EGPosOccDecodeContext;
+using search::index::PostingListCountFileSeqRead;
+using search::index::PostingListCountFileSeqWrite;
+
+Zc4PosOccSeqRead::Zc4PosOccSeqRead(PostingListCountFileSeqRead *countFile)
+ : Zc4PostingSeqRead(countFile),
+ _fieldsParams(),
+ _cookedDecodeContext(&_fieldsParams),
+ _rawDecodeContext(&_fieldsParams)
+{
+ _decodeContext = &_cookedDecodeContext;
+ _decodeContext->setReadContext(&_readContext);
+ _readContext.setDecodeContext(_decodeContext);
+}
+
+
+void
+Zc4PosOccSeqRead::
+setFeatureParams(const PostingListParams &params)
+{
+ bool oldCooked = _decodeContext == &_cookedDecodeContext;
+ bool newCooked = oldCooked;
+ params.get("cooked", newCooked);
+ if (oldCooked != newCooked) {
+ if (newCooked) {
+ _cookedDecodeContext = _rawDecodeContext;
+ _decodeContext = &_cookedDecodeContext;
+ } else {
+ _rawDecodeContext = _cookedDecodeContext;
+ _decodeContext = &_rawDecodeContext;
+ }
+ _readContext.setDecodeContext(_decodeContext);
+ }
+}
+
+
+const vespalib::string &
+Zc4PosOccSeqRead::getSubIdentifier(void)
+{
+ PosOccFieldsParams fieldsParams;
+ EG2PosOccDecodeContext<true> d(&fieldsParams);
+ return d.getIdentifier();
+}
+
+
+Zc4PosOccSeqWrite::Zc4PosOccSeqWrite(const Schema &schema,
+ uint32_t indexId,
+ PostingListCountFileSeqWrite *countFile)
+ : Zc4PostingSeqWrite(countFile),
+ _fieldsParams(),
+ _realEncodeFeatures(&_fieldsParams)
+{
+ _encodeFeatures = &_realEncodeFeatures;
+ _encodeFeatures->setWriteContext(&_featureWriteContext);
+ _featureWriteContext.setEncodeContext(_encodeFeatures);
+ _fieldsParams.setSchemaParams(schema, indexId);
+}
+
+
+ZcPosOccSeqRead::ZcPosOccSeqRead(PostingListCountFileSeqRead *countFile)
+ : ZcPostingSeqRead(countFile),
+ _fieldsParams(),
+ _cookedDecodeContext(&_fieldsParams),
+ _rawDecodeContext(&_fieldsParams)
+{
+ _decodeContext = &_cookedDecodeContext;
+ _decodeContext->setReadContext(&_readContext);
+ _readContext.setDecodeContext(_decodeContext);
+}
+
+
+void
+ZcPosOccSeqRead::
+setFeatureParams(const PostingListParams &params)
+{
+ bool oldCooked = _decodeContext == &_cookedDecodeContext;
+ bool newCooked = oldCooked;
+ params.get("cooked", newCooked);
+ if (oldCooked != newCooked) {
+ if (newCooked) {
+ _cookedDecodeContext = _rawDecodeContext;
+ _decodeContext = &_cookedDecodeContext;
+ } else {
+ _rawDecodeContext = _cookedDecodeContext;
+ _decodeContext = &_rawDecodeContext;
+ }
+ _readContext.setDecodeContext(_decodeContext);
+ }
+}
+
+
+const vespalib::string &
+ZcPosOccSeqRead::getSubIdentifier(void)
+{
+ PosOccFieldsParams fieldsParams;
+ EGPosOccDecodeContext<true> d(&fieldsParams);
+ return d.getIdentifier();
+}
+
+
+ZcPosOccSeqWrite::ZcPosOccSeqWrite(const Schema &schema,
+ uint32_t indexId,
+ PostingListCountFileSeqWrite *countFile)
+ : ZcPostingSeqWrite(countFile),
+ _fieldsParams(),
+ _realEncodeFeatures(&_fieldsParams)
+{
+ _encodeFeatures = &_realEncodeFeatures;
+ _encodeFeatures->setWriteContext(&_featureWriteContext);
+ _featureWriteContext.setEncodeContext(_encodeFeatures);
+ _fieldsParams.setSchemaParams(schema, indexId);
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposocc.h b/searchlib/src/vespa/searchlib/diskindex/zcposocc.h
new file mode 100644
index 00000000000..cbd6791198d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/zcposocc.h
@@ -0,0 +1,83 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "zcposting.h"
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+
+namespace search
+{
+
+namespace diskindex
+{
+
+class Zc4PosOccSeqRead : public Zc4PostingSeqRead
+{
+private:
+ bitcompression::PosOccFieldsParams _fieldsParams;
+ bitcompression::EG2PosOccDecodeContextCooked<true> _cookedDecodeContext;
+ bitcompression::EG2PosOccDecodeContext<true> _rawDecodeContext;
+
+public:
+ Zc4PosOccSeqRead(index::PostingListCountFileSeqRead *countFile);
+
+ virtual void
+ setFeatureParams(const PostingListParams &params);
+
+ static const vespalib::string &
+ getSubIdentifier(void);
+};
+
+
+class Zc4PosOccSeqWrite : public Zc4PostingSeqWrite
+{
+private:
+ bitcompression::PosOccFieldsParams _fieldsParams;
+ bitcompression::EG2PosOccEncodeContext<true> _realEncodeFeatures;
+
+public:
+ typedef index::Schema Schema;
+
+ Zc4PosOccSeqWrite(const Schema &schema,
+ uint32_t indexId,
+ index::PostingListCountFileSeqWrite *countFile);
+};
+
+
+class ZcPosOccSeqRead : public ZcPostingSeqRead
+{
+private:
+ bitcompression::PosOccFieldsParams _fieldsParams;
+ bitcompression::EGPosOccDecodeContextCooked<true> _cookedDecodeContext;
+ bitcompression::EGPosOccDecodeContext<true> _rawDecodeContext;
+
+public:
+ ZcPosOccSeqRead(index::PostingListCountFileSeqRead *countFile);
+
+ virtual void
+ setFeatureParams(const PostingListParams &params);
+
+ static const vespalib::string &
+ getSubIdentifier(void);
+};
+
+
+class ZcPosOccSeqWrite : public ZcPostingSeqWrite
+{
+private:
+ bitcompression::PosOccFieldsParams _fieldsParams;
+ bitcompression::EGPosOccEncodeContext<true> _realEncodeFeatures;
+
+public:
+ typedef index::Schema Schema;
+
+ ZcPosOccSeqWrite(const Schema &schema,
+ uint32_t indexId,
+ index::PostingListCountFileSeqWrite *countFile);
+};
+
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.cpp
new file mode 100644
index 00000000000..fdb498e8e28
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.cpp
@@ -0,0 +1,89 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".diskindex.zcposocciterators");
+#include "zcposocciterators.h"
+
+namespace search
+{
+
+namespace diskindex
+{
+
+using search::fef::TermFieldMatchDataArray;
+using search::bitcompression::PosOccFieldsParams;
+using search::index::PostingListCounts;
+
+#define DEBUG_ZCFILTEROCC_PRINTF 0
+#define DEBUG_ZCFILTEROCC_ASSERT 0
+
+template <bool bigEndian>
+Zc4RareWordPosOccIterator<bigEndian>::
+Zc4RareWordPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit,
+ const PosOccFieldsParams *fieldsParams,
+ const TermFieldMatchDataArray &matchData)
+ : Zc4RareWordPostingIterator<bigEndian>(matchData, start, docIdLimit),
+ _decodeContextReal(start.getOccurences(), start.getBitOffset(), bitLength, fieldsParams)
+{
+ LOG_ASSERT(!matchData.valid() || (fieldsParams->getNumFields() == matchData.size()));
+ _decodeContext = &_decodeContextReal;
+}
+
+
+template <bool bigEndian>
+Zc4PosOccIterator<bigEndian>::
+Zc4PosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit,
+ uint32_t minChunkDocs, const PostingListCounts &counts,
+ const PosOccFieldsParams *fieldsParams,
+ const TermFieldMatchDataArray &matchData)
+ : ZcPostingIterator<bigEndian>(minChunkDocs, false, counts, matchData, start, docIdLimit),
+ _decodeContextReal(start.getOccurences(), start.getBitOffset(), bitLength, fieldsParams)
+{
+ LOG_ASSERT(!matchData.valid() || (fieldsParams->getNumFields() == matchData.size()));
+ _decodeContext = &_decodeContextReal;
+}
+
+
+template <bool bigEndian>
+ZcRareWordPosOccIterator<bigEndian>::
+ZcRareWordPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit,
+ const PosOccFieldsParams *fieldsParams,
+ const TermFieldMatchDataArray &matchData)
+ : ZcRareWordPostingIterator<bigEndian>(matchData, start, docIdLimit),
+ _decodeContextReal(start.getOccurences(), start.getBitOffset(), bitLength, fieldsParams)
+{
+ LOG_ASSERT(!matchData.valid() || (fieldsParams->getNumFields() == matchData.size()));
+ _decodeContext = &_decodeContextReal;
+}
+
+
+template <bool bigEndian>
+ZcPosOccIterator<bigEndian>::
+ZcPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit,
+ uint32_t minChunkDocs, const PostingListCounts &counts,
+ const PosOccFieldsParams *fieldsParams,
+ const TermFieldMatchDataArray &matchData)
+ : ZcPostingIterator<bigEndian>(minChunkDocs, true, counts, matchData, start, docIdLimit),
+ _decodeContextReal(start.getOccurences(), start.getBitOffset(), bitLength, fieldsParams)
+{
+ LOG_ASSERT(!matchData.valid() || (fieldsParams->getNumFields() == matchData.size()));
+ _decodeContext = &_decodeContextReal;
+}
+
+
+template class Zc4RareWordPosOccIterator<true>;
+template class Zc4RareWordPosOccIterator<false>;
+
+template class Zc4PosOccIterator<true>;
+template class Zc4PosOccIterator<false>;
+
+template class ZcRareWordPosOccIterator<true>;
+template class ZcRareWordPosOccIterator<false>;
+
+template class ZcPosOccIterator<true>;
+template class ZcPosOccIterator<false>;
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.h b/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.h
new file mode 100644
index 00000000000..3af84f888ea
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.h
@@ -0,0 +1,93 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "zcpostingiterators.h"
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+
+namespace search {
+
+namespace diskindex {
+
+template <bool bigEndian>
+class Zc4RareWordPosOccIterator : public Zc4RareWordPostingIterator<bigEndian>
+{
+private:
+ typedef Zc4RareWordPostingIterator<bigEndian> ParentClass;
+ using ParentClass::_decodeContext;
+
+ typedef bitcompression::EG2PosOccDecodeContextCooked<bigEndian> DecodeContextReal;
+ DecodeContextReal _decodeContextReal;
+public:
+ Zc4RareWordPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit,
+ const bitcompression::PosOccFieldsParams *fieldsParams,
+ const search::fef::TermFieldMatchDataArray &matchData);
+};
+
+
+template <bool bigEndian>
+class Zc4PosOccIterator : public ZcPostingIterator<bigEndian>
+{
+private:
+ typedef ZcPostingIterator<bigEndian> ParentClass;
+ using ParentClass::_decodeContext;
+
+ typedef bitcompression::EG2PosOccDecodeContextCooked<bigEndian> DecodeContext;
+ DecodeContext _decodeContextReal;
+public:
+ Zc4PosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit,
+ uint32_t minChunkDocs, const index::PostingListCounts &counts,
+ const bitcompression::PosOccFieldsParams *fieldsParams,
+ const search::fef::TermFieldMatchDataArray &matchData);
+};
+
+
+template <bool bigEndian>
+class ZcRareWordPosOccIterator : public ZcRareWordPostingIterator<bigEndian>
+{
+private:
+ typedef ZcRareWordPostingIterator<bigEndian> ParentClass;
+ using ParentClass::_decodeContext;
+
+ typedef bitcompression::EGPosOccDecodeContextCooked<bigEndian> DecodeContextReal;
+ DecodeContextReal _decodeContextReal;
+public:
+ ZcRareWordPosOccIterator(Position start, uint64_t bitLength, uint32_t docidLimit,
+ const bitcompression::PosOccFieldsParams *fieldsParams,
+ const search::fef::TermFieldMatchDataArray &matchData);
+};
+
+
+template <bool bigEndian>
+class ZcPosOccIterator : public ZcPostingIterator<bigEndian>
+{
+private:
+ typedef ZcPostingIterator<bigEndian> ParentClass;
+ using ParentClass::_decodeContext;
+
+ typedef bitcompression::EGPosOccDecodeContextCooked<bigEndian> DecodeContext;
+ DecodeContext _decodeContextReal;
+public:
+ ZcPosOccIterator(Position start, uint64_t bitLength, uint32_t docidLimit,
+ uint32_t minChunkDocs, const index::PostingListCounts &counts,
+ const bitcompression::PosOccFieldsParams *fieldsParams,
+ const search::fef::TermFieldMatchDataArray &matchData);
+};
+
+
+extern template class Zc4RareWordPosOccIterator<true>;
+extern template class Zc4RareWordPosOccIterator<false>;
+
+extern template class Zc4PosOccIterator<true>;
+extern template class Zc4PosOccIterator<false>;
+
+extern template class ZcRareWordPosOccIterator<true>;
+extern template class ZcRareWordPosOccIterator<false>;
+
+extern template class ZcPosOccIterator<true>;
+extern template class ZcPosOccIterator<false>;
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp
new file mode 100644
index 00000000000..26ed327ec52
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp
@@ -0,0 +1,381 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".diskindex.zcposoccrandread");
+#include <vespa/searchlib/common/bitvector.h>
+#include "zcposoccrandread.h"
+#include "zcposocciterators.h"
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+
+using search::bitcompression::EG2PosOccEncodeContext;
+using search::bitcompression::EGPosOccEncodeContext;
+using search::bitcompression::EG2PosOccDecodeContext;
+using search::bitcompression::EG2PosOccDecodeContextCooked;
+using search::bitcompression::EGPosOccDecodeContext;
+using search::bitcompression::EGPosOccDecodeContextCooked;
+using search::bitcompression::PosOccFieldsParams;
+using search::bitcompression::FeatureDecodeContext;
+using search::index::PostingListCounts;
+using search::index::PostingListHandle;
+using search::ComprFileReadContext;
+
+namespace
+{
+
+vespalib::string myId4("Zc.4");
+vespalib::string myId5("Zc.5");
+
+}
+
+namespace search
+{
+
+namespace diskindex
+{
+
+using vespalib::getLastErrorString;
+
+ZcPosOccRandRead::ZcPosOccRandRead(void)
+ : _file(),
+ _fileSize(0),
+ _minChunkDocs(1 << 30),
+ _minSkipDocs(64),
+ _docIdLimit(10000000),
+ _numWords(0),
+ _fileBitSize(0),
+ _headerBitSize(0),
+ _fieldsParams(),
+ _dynamicK(true)
+{
+}
+
+
+ZcPosOccRandRead::~ZcPosOccRandRead(void)
+{
+ if (_file.IsOpened())
+ close();
+}
+
+
+search::queryeval::SearchIterator *
+ZcPosOccRandRead::
+createIterator(const PostingListCounts &counts,
+ const PostingListHandle &handle,
+ const search::fef::TermFieldMatchDataArray &matchData,
+ bool usebitVector) const
+{
+ (void) counts;
+ (void) handle;
+ (void) matchData;
+ (void) usebitVector;
+
+ typedef EGPosOccEncodeContext<true> EC;
+
+ assert((handle._bitLength != 0) == (counts._bitLength != 0));
+ assert((counts._numDocs != 0) == (counts._bitLength != 0));
+ assert(handle._bitOffsetMem <= handle._bitOffset);
+
+ if (handle._bitLength == 0)
+ return new search::queryeval::EmptySearch;
+
+ const char *cmem = static_cast<const char *>(handle._mem);
+ uint64_t memOffset = reinterpret_cast<unsigned long>(cmem) & 7;
+ const uint64_t *mem = reinterpret_cast<const uint64_t *>
+ (cmem - memOffset) +
+ (memOffset * 8 + handle._bitOffset -
+ handle._bitOffsetMem) / 64;
+ int bitOffset = (memOffset * 8 + handle._bitOffset -
+ handle._bitOffsetMem) & 63;
+
+ Position start(mem, bitOffset);
+
+ EGPosOccDecodeContext<true> d(mem, bitOffset, &_fieldsParams);
+
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
+ uint32_t length;
+ uint64_t val64;
+
+ UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC);
+
+ uint32_t numDocs = static_cast<uint32_t>(val64) + 1;
+
+ if (numDocs < _minSkipDocs) {
+ return new ZcRareWordPosOccIterator<true>(start, handle._bitLength, _docIdLimit, &_fieldsParams, matchData);
+ } else {
+ return new ZcPosOccIterator<true>(start, handle._bitLength, _docIdLimit, _minChunkDocs, counts, &_fieldsParams, matchData);
+ }
+}
+
+
+void
+ZcPosOccRandRead::readPostingList(const PostingListCounts &counts,
+ uint32_t firstSegment,
+ uint32_t numSegments,
+ PostingListHandle &handle)
+{
+ // XXX: Ignore segments for now.
+ (void) firstSegment;
+ (void) numSegments;
+ (void) counts;
+
+ handle.drop();
+ if (handle._bitLength == 0)
+ return;
+
+ uint64_t startOffset = (handle._bitOffset + _headerBitSize) >> 3;
+ // Align start at 64-bit boundary
+ startOffset -= (startOffset & 7);
+
+ void *mapPtr = _file.MemoryMapPtr(startOffset);
+ if (mapPtr != NULL) {
+ handle._mem = mapPtr;
+ handle._allocMem = NULL;
+ handle._allocSize = 0;
+ } else {
+ uint64_t endOffset = (handle._bitOffset + _headerBitSize +
+ handle._bitLength + 7) >> 3;
+ // Align end at 64-bit boundary
+ endOffset += (-endOffset & 7);
+
+ uint64_t vectorLen = endOffset - startOffset;
+ size_t padBefore;
+ size_t padAfter;
+ size_t padExtraAfter; // Decode prefetch space
+ _file.DirectIOPadding(startOffset, vectorLen,
+ padBefore, padAfter);
+ padExtraAfter = 0;
+ if (padAfter < 16)
+ padExtraAfter = 16 - padAfter;
+
+ size_t mallocLen = padBefore + vectorLen + padAfter + padExtraAfter;
+ void *mallocStart = NULL;
+ void *alignedBuffer = NULL;
+ if (mallocLen > 0) {
+ alignedBuffer = _file.AllocateDirectIOBuffer(mallocLen,
+ mallocStart);
+ assert(mallocStart != NULL);
+ assert(endOffset + padAfter + padExtraAfter <= _fileSize);
+ _file.ReadBuf(alignedBuffer,
+ padBefore + vectorLen + padAfter,
+ startOffset - padBefore);
+ }
+ // Zero decode prefetch memory to avoid uninitialized reads
+ if (padExtraAfter > 0) {
+ memset(reinterpret_cast<char *>(alignedBuffer) +
+ padBefore + vectorLen + padAfter,
+ '\0',
+ padExtraAfter);
+ }
+ handle._mem = static_cast<char *>(alignedBuffer) + padBefore;
+ handle._allocMem = mallocStart;
+ handle._allocSize = mallocLen;
+ }
+ handle._bitOffsetMem = (startOffset << 3) - _headerBitSize;
+}
+
+
+bool
+ZcPosOccRandRead::
+open(const vespalib::string &name, const TuneFileRandRead &tuneFileRead)
+{
+ if (tuneFileRead.getWantMemoryMap()) {
+ _file.enableMemoryMap(tuneFileRead.getMemoryMapFlags());
+ } else if (tuneFileRead.getWantDirectIO())
+ _file.EnableDirectIO();
+ bool res = _file.OpenReadOnly(name.c_str());
+ if (!res) {
+ LOG(error, "could not open %s: %s",
+ _file.GetFileName(), getLastErrorString().c_str());
+ return false;
+ }
+ _fileSize = _file.GetSize();
+
+ readHeader();
+ return true;
+}
+
+
+bool
+ZcPosOccRandRead::close(void)
+{
+ _file.Close();
+ return true;
+}
+
+
+void
+ZcPosOccRandRead::readHeader(void)
+{
+ EGPosOccDecodeContext<true> d(&_fieldsParams);
+ ComprFileReadContext drc(d);
+
+ drc.setFile(&_file);
+ drc.setFileSize(_file.GetSize());
+ drc.allocComprBuf(512, 32768u);
+ d.emptyBuffer(0);
+ drc.readComprBuffer();
+ d.setReadContext(&drc);
+
+ vespalib::FileHeader header;
+ d.readHeader(header, _file.getSize());
+ uint32_t headerLen = header.getSize();
+ assert(header.hasTag("frozen"));
+ assert(header.hasTag("fileBitSize"));
+ assert(header.hasTag("format.0"));
+ assert(header.hasTag("format.1"));
+ assert(!header.hasTag("format.2"));
+ assert(header.hasTag("numWords"));
+ assert(header.hasTag("minChunkDocs"));
+ assert(header.hasTag("docIdLimit"));
+ assert(header.hasTag("minSkipDocs"));
+ assert(header.getTag("frozen").asInteger() != 0);
+ _fileBitSize = header.getTag("fileBitSize").asInteger();
+ assert(header.getTag("format.0").asString() == myId5);
+ assert(header.getTag("format.1").asString() == d.getIdentifier());
+ _numWords = header.getTag("numWords").asInteger();
+ _minChunkDocs = header.getTag("minChunkDocs").asInteger();
+ _docIdLimit = header.getTag("docIdLimit").asInteger();
+ _minSkipDocs = header.getTag("minSkipDocs").asInteger();
+ // Read feature decoding specific subheader
+ d.readHeader(header, "features.");
+ // Align on 64-bit unit
+ d.smallAlign(64);
+ headerLen += (-headerLen & 7);
+ assert(d.getReadOffset() == headerLen * 8);
+ _headerBitSize = d.getReadOffset();
+}
+
+
+const vespalib::string &
+ZcPosOccRandRead::getIdentifier(void)
+{
+ return myId5;
+}
+
+
+const vespalib::string &
+ZcPosOccRandRead::getSubIdentifier(void)
+{
+ PosOccFieldsParams fieldsParams;
+ EGPosOccDecodeContext<true> d(&fieldsParams);
+ return d.getIdentifier();
+}
+
+
+Zc4PosOccRandRead::
+Zc4PosOccRandRead(void)
+ : ZcPosOccRandRead()
+{
+ _dynamicK = false;
+}
+
+
+search::queryeval::SearchIterator *
+Zc4PosOccRandRead::
+createIterator(const PostingListCounts &counts,
+ const PostingListHandle &handle,
+ const search::fef::TermFieldMatchDataArray &matchData,
+ bool usebitVector) const
+{
+ (void) usebitVector;
+ typedef EGPosOccEncodeContext<true> EC;
+
+ assert((handle._bitLength != 0) == (counts._bitLength != 0));
+ assert((counts._numDocs != 0) == (counts._bitLength != 0));
+ assert(handle._bitOffsetMem <= handle._bitOffset);
+
+ if (handle._bitLength == 0)
+ return new search::queryeval::EmptySearch;
+
+ const char *cmem = static_cast<const char *>(handle._mem);
+ uint64_t memOffset = reinterpret_cast<unsigned long>(cmem) & 7;
+ const uint64_t *mem = reinterpret_cast<const uint64_t *>
+ (cmem - memOffset) +
+ (memOffset * 8 + handle._bitOffset -
+ handle._bitOffsetMem) / 64;
+ int bitOffset = (memOffset * 8 + handle._bitOffset -
+ handle._bitOffsetMem) & 63;
+
+ Position start(mem, bitOffset);
+ EG2PosOccDecodeContext<true> d(mem, bitOffset, &_fieldsParams);
+
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
+ uint32_t length;
+ uint64_t val64;
+
+ UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC);
+
+ uint32_t numDocs = static_cast<uint32_t>(val64) + 1;
+
+ if (numDocs < _minSkipDocs) {
+ return new Zc4RareWordPosOccIterator<true>(start, handle._bitLength, _docIdLimit, &_fieldsParams, matchData);
+ } else {
+ return new Zc4PosOccIterator<true>(start, handle._bitLength, _docIdLimit, _minChunkDocs, counts, &_fieldsParams, matchData);
+ }
+}
+
+
+void
+Zc4PosOccRandRead::readHeader(void)
+{
+ EG2PosOccDecodeContext<true> d(&_fieldsParams);
+ ComprFileReadContext drc(d);
+
+ drc.setFile(&_file);
+ drc.setFileSize(_file.GetSize());
+ drc.allocComprBuf(512, 32768u);
+ d.emptyBuffer(0);
+ drc.readComprBuffer();
+ d.setReadContext(&drc);
+
+ vespalib::FileHeader header;
+ d.readHeader(header, _file.getSize());
+ uint32_t headerLen = header.getSize();
+ assert(header.hasTag("frozen"));
+ assert(header.hasTag("fileBitSize"));
+ assert(header.hasTag("format.0"));
+ assert(header.hasTag("format.1"));
+ assert(!header.hasTag("format.2"));
+ assert(header.hasTag("numWords"));
+ assert(header.hasTag("minChunkDocs"));
+ assert(header.hasTag("docIdLimit"));
+ assert(header.hasTag("minSkipDocs"));
+ assert(header.getTag("frozen").asInteger() != 0);
+ _fileBitSize = header.getTag("fileBitSize").asInteger();
+ assert(header.getTag("format.0").asString() == myId4);
+ assert(header.getTag("format.1").asString() == d.getIdentifier());
+ _numWords = header.getTag("numWords").asInteger();
+ _minChunkDocs = header.getTag("minChunkDocs").asInteger();
+ _docIdLimit = header.getTag("docIdLimit").asInteger();
+ _minSkipDocs = header.getTag("minSkipDocs").asInteger();
+ // Read feature decoding specific subheader
+ d.readHeader(header, "features.");
+ // Align on 64-bit unit
+ d.smallAlign(64);
+ headerLen += (-headerLen & 7);
+ assert(d.getReadOffset() == headerLen * 8);
+ _headerBitSize = d.getReadOffset();
+}
+
+
+const vespalib::string &
+Zc4PosOccRandRead::getIdentifier(void)
+{
+ return myId4;
+}
+
+
+const vespalib::string &
+Zc4PosOccRandRead::getSubIdentifier(void)
+{
+ PosOccFieldsParams fieldsParams;
+ EG2PosOccDecodeContext<true> d(&fieldsParams);
+ return d.getIdentifier();
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h
new file mode 100644
index 00000000000..c86d9a2cd13
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h
@@ -0,0 +1,112 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/index/postinglistfile.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+
+namespace search
+{
+
+namespace diskindex
+{
+
+class ZcPosOccRandRead : public index::PostingListFileRandRead
+{
+protected:
+ FastOS_File _file;
+ uint64_t _fileSize;
+
+ uint32_t _minChunkDocs; // # of documents needed for chunking
+ uint32_t _minSkipDocs; // # of documents needed for skipping
+ uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit)
+
+ uint64_t _numWords; // Number of words in file
+ uint64_t _fileBitSize;
+ uint64_t _headerBitSize;
+ bitcompression::PosOccFieldsParams _fieldsParams;
+ bool _dynamicK;
+
+
+public:
+ ZcPosOccRandRead(void);
+
+ virtual
+ ~ZcPosOccRandRead(void);
+
+ typedef index::PostingListCounts PostingListCounts;
+ typedef index::PostingListHandle PostingListHandle;
+
+ /**
+ * Create iterator for single word. Semantic lifetime of counts and
+ * handle must exceed lifetime of iterator.
+ */
+ virtual search::queryeval::SearchIterator *
+ createIterator(const PostingListCounts &counts,
+ const PostingListHandle &handle,
+ const search::fef::TermFieldMatchDataArray &matchData,
+ bool usebitVector) const;
+
+ /**
+ * Read (possibly partial) posting list into handle.
+ */
+ virtual void
+ readPostingList(const PostingListCounts &counts,
+ uint32_t firstSegment,
+ uint32_t numSegments,
+ PostingListHandle &handle);
+
+ /**
+ * Open posting list file for random read.
+ */
+ virtual bool
+ open(const vespalib::string &name, const TuneFileRandRead &tuneFileRead);
+
+ /**
+ * Close posting list file.
+ */
+ virtual bool
+ close(void);
+
+ virtual void
+ readHeader(void);
+
+ static const vespalib::string &
+ getIdentifier(void);
+
+ static const vespalib::string &
+ getSubIdentifier(void);
+};
+
+class Zc4PosOccRandRead : public ZcPosOccRandRead
+{
+public:
+ Zc4PosOccRandRead(void);
+
+ /**
+ * Create iterator for single word. Semantic lifetime of counts and
+ * handle must exceed lifetime of iterator.
+ */
+ virtual search::queryeval::SearchIterator *
+ createIterator(const PostingListCounts &counts,
+ const PostingListHandle &handle,
+ const search::fef::TermFieldMatchDataArray &matchData,
+ bool usebitVector) const;
+
+ virtual void
+ readHeader(void);
+
+ static const vespalib::string &
+ getIdentifier(void);
+
+ static const vespalib::string &
+ getSubIdentifier(void);
+};
+
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp
new file mode 100644
index 00000000000..13b536e8a6b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp
@@ -0,0 +1,1470 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "zcposting.h"
+#include <vespa/searchlib/index/postinglistcounts.h>
+#include <vespa/searchlib/index/postinglistcountfile.h>
+#include <vespa/searchlib/index/postinglistfile.h>
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/searchlib/common/fileheadercontext.h>
+
+LOG_SETUP(".diskindex.zcposting");
+
+namespace
+{
+
+vespalib::string myId5("Zc.5");
+vespalib::string myId4("Zc.4");
+vespalib::string emptyId;
+
+}
+
+namespace search
+{
+
+namespace diskindex
+{
+
+using index::PostingListCountFileSeqRead;
+using index::PostingListCountFileSeqWrite;
+using common::FileHeaderContext;
+using bitcompression::FeatureDecodeContextBE;
+using bitcompression::FeatureEncodeContextBE;
+using vespalib::nbostream;
+using vespalib::getLastErrorString;
+
+
+void
+ZcBuf::clearReserve(size_t reserveSize)
+{
+ if (reserveSize + zcSlack() > _mallocSize) {
+ size_t newSize = _mallocSize * 2;
+ if (newSize < 16)
+ newSize = 16;
+ while (newSize < reserveSize + zcSlack())
+ newSize *= 2;
+ uint8_t *newBuf = static_cast<uint8_t *>(malloc(newSize));
+ free(_mallocStart);
+ _mallocStart = newBuf;
+ _mallocSize = newSize;
+ }
+ _valE = _mallocStart + _mallocSize - zcSlack();
+ _valI = _mallocStart;
+}
+
+
+void
+ZcBuf::expand(void)
+{
+ size_t newSize = _mallocSize * 2;
+ size_t oldSize = size();
+ if (newSize < 16)
+ newSize = 16;
+
+ uint8_t *newBuf = static_cast<uint8_t *>(malloc(newSize));
+
+ if (oldSize > 0)
+ memcpy(newBuf, _mallocStart, oldSize);
+ free(_mallocStart);
+ _mallocStart = newBuf;
+ _mallocSize = newSize;
+ _valI = _mallocStart + oldSize;
+ _valE = _mallocStart + newSize - zcSlack();
+}
+
+
+Zc4PostingSeqRead::
+Zc4PostingSeqRead(PostingListCountFileSeqRead *countFile)
+ : PostingListFileSeqRead(),
+ _decodeContext(),
+ _docIdK(0),
+ _prevDocId(0),
+ _numDocs(0),
+ _readContext(sizeof(uint64_t)),
+ _file(),
+ _hasMore(false),
+ _dynamicK(false),
+ _lastDocId(0),
+ _minChunkDocs(1 << 30),
+ _minSkipDocs(64),
+ _docIdLimit(10000000),
+ _zcDocIds(),
+ _l1Skip(),
+ _l2Skip(),
+ _l3Skip(),
+ _l4Skip(),
+ _numWords(0),
+ _fileBitSize(0),
+ _chunkNo(0),
+ _l1SkipDocId(0),
+ _l1SkipDocIdPos(0),
+ _l1SkipFeaturesPos(0),
+ _l2SkipDocId(0),
+ _l2SkipDocIdPos(0),
+ _l2SkipL1SkipPos(0),
+ _l2SkipFeaturesPos(0),
+ _l3SkipDocId(0),
+ _l3SkipDocIdPos(0),
+ _l3SkipL1SkipPos(0),
+ _l3SkipL2SkipPos(0),
+ _l3SkipFeaturesPos(0),
+ _l4SkipDocId(0),
+ _l4SkipDocIdPos(0),
+ _l4SkipL1SkipPos(0),
+ _l4SkipL2SkipPos(0),
+ _l4SkipL3SkipPos(0),
+ _l4SkipFeaturesPos(0),
+ _featuresSize(0),
+ _countFile(countFile),
+ _headerBitLen(0),
+ _rangeEndOffset(0),
+ _readAheadEndOffset(0),
+ _wordStart(0),
+ _checkPointPos(0),
+ _residue(0),
+ _checkPointChunkNo(0u),
+ _checkPointResidue(0u),
+ _checkPointHasMore(false)
+{
+ if (_countFile != NULL) {
+ PostingListParams params;
+ _countFile->getParams(params);
+ params.get("docIdLimit", _docIdLimit);
+ params.get("minChunkDocs", _minChunkDocs);
+ }
+}
+
+
+Zc4PostingSeqRead::~Zc4PostingSeqRead(void)
+{
+}
+
+
+void
+Zc4PostingSeqRead::
+readCommonWordDocIdAndFeatures(DocIdAndFeatures &features)
+{
+ if (_zcDocIds._valI >= _zcDocIds._valE && _hasMore)
+ readWordStart(); // Read start of next chunk
+ // Split docid & features.
+ assert(_zcDocIds._valI < _zcDocIds._valE);
+ uint32_t docIdPos = _zcDocIds.pos();
+ uint32_t docId = _prevDocId + 1 + _zcDocIds.decode();
+ features._docId = docId;
+ _prevDocId = docId;
+ assert(docId <= _lastDocId);
+ if (docId > _l1SkipDocId) {
+ _l1SkipDocIdPos += _l1Skip.decode() + 1;
+ assert(docIdPos == _l1SkipDocIdPos);
+ _l1SkipFeaturesPos += _l1Skip.decode() + 1;
+ uint64_t featuresPos = _decodeContext->getReadOffset();
+ assert(featuresPos == _l1SkipFeaturesPos);
+ (void) featuresPos;
+ if (docId > _l2SkipDocId) {
+ _l2SkipDocIdPos += _l2Skip.decode() + 1;
+ assert(docIdPos == _l2SkipDocIdPos);
+ _l2SkipFeaturesPos += _l2Skip.decode() + 1;
+ assert(featuresPos == _l2SkipFeaturesPos);
+ _l2SkipL1SkipPos += _l2Skip.decode() + 1;
+ assert(_l1Skip.pos() == _l2SkipL1SkipPos);
+ if (docId > _l3SkipDocId) {
+ _l3SkipDocIdPos += _l3Skip.decode() + 1;
+ assert(docIdPos == _l3SkipDocIdPos);
+ _l3SkipFeaturesPos += _l3Skip.decode() + 1;
+ assert(featuresPos == _l3SkipFeaturesPos);
+ _l3SkipL1SkipPos += _l3Skip.decode() + 1;
+ assert(_l1Skip.pos() == _l3SkipL1SkipPos);
+ _l3SkipL2SkipPos += _l3Skip.decode() + 1;
+ assert(_l2Skip.pos() == _l3SkipL2SkipPos);
+ if (docId > _l4SkipDocId) {
+ _l4SkipDocIdPos += _l4Skip.decode() + 1;
+ assert(docIdPos == _l4SkipDocIdPos);
+ (void) docIdPos;
+ _l4SkipFeaturesPos += _l4Skip.decode() + 1;
+ assert(featuresPos == _l4SkipFeaturesPos);
+ _l4SkipL1SkipPos += _l4Skip.decode() + 1;
+ assert(_l1Skip.pos() == _l4SkipL1SkipPos);
+ _l4SkipL2SkipPos += _l4Skip.decode() + 1;
+ assert(_l2Skip.pos() == _l4SkipL2SkipPos);
+ _l4SkipL3SkipPos += _l4Skip.decode() + 1;
+ assert(_l3Skip.pos() == _l4SkipL3SkipPos);
+ _l4SkipDocId += _l4Skip.decode() + 1;
+ assert(_l4SkipDocId <= _lastDocId);
+ assert(_l4SkipDocId >= docId);
+ }
+ _l3SkipDocId += _l3Skip.decode() + 1;
+ assert(_l3SkipDocId <= _lastDocId);
+ assert(_l3SkipDocId <= _l4SkipDocId);
+ assert(_l3SkipDocId >= docId);
+ }
+ _l2SkipDocId += _l2Skip.decode() + 1;
+ assert(_l2SkipDocId <= _lastDocId);
+ assert(_l2SkipDocId <= _l4SkipDocId);
+ assert(_l2SkipDocId <= _l3SkipDocId);
+ assert(_l2SkipDocId >= docId);
+ }
+ _l1SkipDocId += _l1Skip.decode() + 1;
+ assert(_l1SkipDocId <= _lastDocId);
+ assert(_l1SkipDocId <= _l4SkipDocId);
+ assert(_l1SkipDocId <= _l3SkipDocId);
+ assert(_l1SkipDocId <= _l2SkipDocId);
+ assert(_l1SkipDocId >= docId);
+ }
+ if (docId < _lastDocId) {
+ // Assert more space available when not yet at last docid
+ assert(_zcDocIds._valI < _zcDocIds._valE);
+ } else {
+ // Assert that space has been used when at last docid
+ assert(_zcDocIds._valI == _zcDocIds._valE);
+ // Assert that we've read to end of skip info
+ assert(_l1SkipDocId == _lastDocId);
+ assert(_l2SkipDocId == _lastDocId);
+ assert(_l3SkipDocId == _lastDocId);
+ assert(_l4SkipDocId == _lastDocId);
+ if (!_hasMore) {
+ _chunkNo = 0;
+ }
+ }
+ _decodeContext->readFeatures(features);
+ --_residue;
+}
+
+
+void
+Zc4PostingSeqRead::
+readDocIdAndFeatures(DocIdAndFeatures &features)
+{
+ if (_residue == 0 && !_hasMore) {
+ if (_rangeEndOffset != 0) {
+ DecodeContext &d = *_decodeContext;
+ uint64_t curOffset = d.getReadOffset();
+ assert(curOffset <= _rangeEndOffset);
+ if (curOffset < _rangeEndOffset)
+ readWordStart();
+ }
+ if (_residue == 0) {
+ // Don't read past end of posting list.
+ features.clear(static_cast<uint32_t>(-1));
+ return;
+ }
+ }
+ if (_lastDocId > 0)
+ return readCommonWordDocIdAndFeatures(features);
+ // Interleaves docid & features
+ typedef FeatureEncodeContextBE EC;
+ DecodeContext &d = *_decodeContext;
+ uint32_t length;
+ uint64_t val64;
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
+
+ UC64BE_DECODEEXPGOLOMB_SMALL_NS(o,
+ K_VALUE_ZCPOSTING_DELTA_DOCID,
+ EC);
+ uint32_t docId = _prevDocId + 1 + val64;
+ features._docId = docId;
+ _prevDocId = docId;
+ UC64_DECODECONTEXT_STORE(o, d._);
+ if (__builtin_expect(oCompr >= d._valE, false)) {
+ _readContext.readComprBuffer();
+ }
+ _decodeContext->readFeatures(features);
+ --_residue;
+}
+
+
+void
+Zc4PostingSeqRead::checkPointWrite(nbostream &out)
+{
+ out << _counts;
+ out << _wordStart;
+ uint64_t curPos = _decodeContext->getReadOffset();
+ out << curPos;
+ out << _residue;
+ out << _chunkNo;
+ out << _hasMore;
+}
+
+
+void
+Zc4PostingSeqRead::checkPointRead(nbostream &in)
+{
+ in >> _counts;
+ in >> _wordStart;
+ in >> _checkPointPos;
+ in >> _checkPointResidue;
+ in >> _checkPointChunkNo;
+ in >> _checkPointHasMore;
+ assert(_checkPointPos >= _wordStart);
+}
+
+
+void
+Zc4PostingSeqRead::readWordStartWithSkip(void)
+{
+ typedef FeatureEncodeContextBE EC;
+ DecodeContext &d = *_decodeContext;
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
+ uint32_t length;
+ uint64_t val64;
+ const uint64_t *valE = d._valE;
+
+ if (_hasMore)
+ ++_chunkNo;
+ else
+ _chunkNo = 0;
+ assert(_numDocs >= _minSkipDocs || _hasMore);
+ bool hasMore = false;
+ if (__builtin_expect(_numDocs >= _minChunkDocs, false)) {
+ hasMore = static_cast<int64_t>(oVal) < 0;
+ oVal <<= 1;
+ length = 1;
+ UC64BE_READBITS_NS(o, EC);
+ }
+ if (_dynamicK)
+ _docIdK = EC::calcDocIdK((_hasMore || hasMore) ? 1 : _numDocs,
+ _docIdLimit);
+ if (_hasMore || hasMore) {
+ if (_rangeEndOffset == 0) {
+ assert(hasMore == (_chunkNo + 1 < _counts._segments.size()));
+ assert(_numDocs == _counts._segments[_chunkNo]._numDocs);
+ }
+ if (hasMore) {
+ assert(_numDocs >= _minSkipDocs);
+ assert(_numDocs >= _minChunkDocs);
+ }
+ } else {
+ assert(_numDocs >= _minSkipDocs);
+ if (_rangeEndOffset == 0) {
+ assert(_numDocs == _counts._numDocs);
+ }
+ }
+ if (__builtin_expect(oCompr >= valE, false)) {
+ UC64_DECODECONTEXT_STORE(o, d._);
+ _readContext.readComprBuffer();
+ valE = d._valE;
+ UC64_DECODECONTEXT_LOAD(o, d._);
+ }
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_ZCPOSTING_DOCIDSSIZE,
+ EC);
+ uint32_t docIdsSize = val64 + 1;
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_ZCPOSTING_L1SKIPSIZE,
+ EC);
+ uint32_t l1SkipSize = val64;
+ if (__builtin_expect(oCompr >= valE, false)) {
+ UC64_DECODECONTEXT_STORE(o, d._);
+ _readContext.readComprBuffer();
+ valE = d._valE;
+ UC64_DECODECONTEXT_LOAD(o, d._);
+ }
+ uint32_t l2SkipSize = 0;
+ if (l1SkipSize != 0) {
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_ZCPOSTING_L2SKIPSIZE,
+ EC);
+ l2SkipSize = val64;
+ }
+ uint32_t l3SkipSize = 0;
+ if (l2SkipSize != 0) {
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_ZCPOSTING_L3SKIPSIZE,
+ EC);
+ l3SkipSize = val64;
+ }
+ if (__builtin_expect(oCompr >= valE, false)) {
+ UC64_DECODECONTEXT_STORE(o, d._);
+ _readContext.readComprBuffer();
+ valE = d._valE;
+ UC64_DECODECONTEXT_LOAD(o, d._);
+ }
+ uint32_t l4SkipSize = 0;
+ if (l3SkipSize != 0) {
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_ZCPOSTING_L4SKIPSIZE,
+ EC);
+ l4SkipSize = val64;
+ }
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_ZCPOSTING_FEATURESSIZE,
+ EC);
+ _featuresSize = val64;
+ if (__builtin_expect(oCompr >= valE, false)) {
+ UC64_DECODECONTEXT_STORE(o, d._);
+ _readContext.readComprBuffer();
+ valE = d._valE;
+ UC64_DECODECONTEXT_LOAD(o, d._);
+ }
+ if (_dynamicK) {
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ _docIdK,
+ EC);
+ } else {
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_ZCPOSTING_LASTDOCID,
+ EC);
+ }
+ _lastDocId = _docIdLimit - 1 - val64;
+ if (_hasMore || hasMore) {
+ if (_rangeEndOffset == 0) {
+ assert(_lastDocId == _counts._segments[_chunkNo]._lastDoc);
+ }
+ }
+
+ if (__builtin_expect(oCompr >= valE, false)) {
+ UC64_DECODECONTEXT_STORE(o, d._);
+ _readContext.readComprBuffer();
+ valE = d._valE;
+ UC64_DECODECONTEXT_LOAD(o, d._);
+ }
+ uint64_t bytePad = oPreRead & 7;
+ if (bytePad > 0) {
+ length = bytePad;
+ oVal <<= length;
+ UC64BE_READBITS_NS(o, EC);
+ }
+ UC64_DECODECONTEXT_STORE(o, d._);
+ if (__builtin_expect(oCompr >= valE, false)) {
+ _readContext.readComprBuffer();
+ }
+ _zcDocIds.clearReserve(docIdsSize);
+ _l1Skip.clearReserve(l1SkipSize);
+ _l2Skip.clearReserve(l2SkipSize);
+ _l3Skip.clearReserve(l3SkipSize);
+ _l4Skip.clearReserve(l4SkipSize);
+ _decodeContext->readBytes(_zcDocIds._valI, docIdsSize);
+ _zcDocIds._valE = _zcDocIds._valI + docIdsSize;
+ if (l1SkipSize > 0)
+ _decodeContext->readBytes(_l1Skip._valI, l1SkipSize);
+ _l1Skip._valE = _l1Skip._valI + l1SkipSize;
+ if (l2SkipSize > 0)
+ _decodeContext->readBytes(_l2Skip._valI, l2SkipSize);
+ _l2Skip._valE = _l2Skip._valI + l2SkipSize;
+ if (l3SkipSize > 0)
+ _decodeContext->readBytes(_l3Skip._valI, l3SkipSize);
+ _l3Skip._valE = _l3Skip._valI + l3SkipSize;
+ if (l4SkipSize > 0)
+ _decodeContext->readBytes(_l4Skip._valI, l4SkipSize);
+ _l4Skip._valE = _l4Skip._valI + l4SkipSize;
+
+ if (l1SkipSize > 0)
+ _l1SkipDocId = _l1Skip.decode() + 1 + _prevDocId;
+ else
+ _l1SkipDocId = _lastDocId;
+ if (l2SkipSize > 0)
+ _l2SkipDocId = _l2Skip.decode() + 1 + _prevDocId;
+ else
+ _l2SkipDocId = _lastDocId;
+ if (l3SkipSize > 0)
+ _l3SkipDocId = _l3Skip.decode() + 1 + _prevDocId;
+ else
+ _l3SkipDocId = _lastDocId;
+ if (l4SkipSize > 0)
+ _l4SkipDocId = _l4Skip.decode() + 1 + _prevDocId;
+ else
+ _l4SkipDocId = _lastDocId;
+ _l1SkipDocIdPos = 0;
+ _l1SkipFeaturesPos = _decodeContext->getReadOffset();
+ _l2SkipDocIdPos = 0;
+ _l2SkipL1SkipPos = 0;
+ _l2SkipFeaturesPos = _decodeContext->getReadOffset();
+ _l3SkipDocIdPos = 0;
+ _l3SkipL1SkipPos = 0;
+ _l3SkipL2SkipPos = 0;
+ _l3SkipFeaturesPos = _decodeContext->getReadOffset();
+ _l4SkipDocIdPos = 0;
+ _l4SkipL1SkipPos = 0;
+ _l4SkipL2SkipPos = 0;
+ _l4SkipL3SkipPos = 0;
+ _l4SkipFeaturesPos = _decodeContext->getReadOffset();
+ _hasMore = hasMore;
+ // Decode context is now positioned at start of features
+}
+
+
+void
+Zc4PostingSeqRead::readWordStart(void)
+{
+ typedef FeatureEncodeContextBE EC;
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, _decodeContext->_);
+ uint32_t length;
+ uint64_t val64;
+ const uint64_t *valE = _decodeContext->_valE;
+
+ UC64BE_DECODEEXPGOLOMB_NS(o,
+ K_VALUE_ZCPOSTING_NUMDOCS,
+ EC);
+ UC64_DECODECONTEXT_STORE(o, _decodeContext->_);
+ if (oCompr >= valE)
+ _readContext.readComprBuffer();
+ _numDocs = static_cast<uint32_t>(val64) + 1;
+ _residue = _numDocs;
+ _prevDocId = _hasMore ? _lastDocId : 0u;
+ if (_rangeEndOffset == 0) {
+ assert(_numDocs <= _counts._numDocs);
+ assert(_numDocs == _counts._numDocs ||
+ _numDocs >= _minChunkDocs ||
+ _hasMore);
+ }
+
+ if (_numDocs >= _minSkipDocs || _hasMore) {
+ readWordStartWithSkip();
+ // Decode context is not positioned at start of features
+ } else {
+ if (_dynamicK)
+ _docIdK = EC::calcDocIdK(_numDocs, _docIdLimit);
+ _lastDocId = 0u;
+ // Decode context is not positioned at start of docids & features
+ }
+}
+
+
+void
+Zc4PostingSeqRead::readCounts(const PostingListCounts &counts)
+{
+ assert(!_hasMore); // Previous words must have been read.
+
+ _counts = counts;
+
+ assert((_counts._numDocs == 0) == (_counts._bitLength == 0));
+ if (_counts._numDocs > 0) {
+ _wordStart = _decodeContext->getReadOffset();
+ readWordStart();
+ }
+}
+
+
+bool
+Zc4PostingSeqRead::open(const vespalib::string &name,
+ const TuneFileSeqRead &tuneFileRead)
+{
+ if (tuneFileRead.getWantDirectIO())
+ _file.EnableDirectIO();
+ bool res = _file.OpenReadOnly(name.c_str());
+ if (res) {
+ _readContext.setFile(&_file);
+ _readContext.setFileSize(_file.GetSize());
+ DecodeContext &d = *_decodeContext;
+ _readContext.allocComprBuf(65536u, 32768u);
+ d.emptyBuffer(0);
+ _readContext.readComprBuffer();
+
+ readHeader();
+ if (d._valI >= d._valE)
+ _readContext.readComprBuffer();
+ if (_checkPointPos != 0) {
+ if (_checkPointResidue != 0 || _checkPointHasMore) {
+ // Checkpointed in the middle of a word. Read from
+ // start at word until at right position.
+ DocIdAndFeatures features;
+ _readContext.setPosition(_wordStart);
+ assert(_decodeContext->getReadOffset() == _wordStart);
+ _readContext.readComprBuffer();
+ readWordStart();
+ assert(_chunkNo < _checkPointChunkNo ||
+ (_chunkNo == _checkPointChunkNo &&
+ _residue >= _checkPointResidue));
+ while (_chunkNo < _checkPointChunkNo ||
+ _residue > _checkPointResidue) {
+ readDocIdAndFeatures(features);
+ }
+ assert(_chunkNo == _checkPointChunkNo);
+ assert(_residue == _checkPointResidue);
+ assert(_hasMore == _checkPointHasMore);
+ assert(_decodeContext->getReadOffset() == _checkPointPos);
+ } else {
+ // Checkpointed between words.
+ _readContext.setPosition(_checkPointPos);
+ assert(_decodeContext->getReadOffset() == _checkPointPos);
+ _readContext.readComprBuffer();
+ }
+ }
+ } else {
+ LOG(error, "could not open %s: %s",
+ _file.GetFileName(), getLastErrorString().c_str());
+ }
+ return res;
+}
+
+
+bool
+Zc4PostingSeqRead::close(void)
+{
+ _readContext.dropComprBuf();
+ _file.Close();
+ _readContext.setFile(NULL);
+ return true;
+}
+
+
+void
+Zc4PostingSeqRead::getParams(PostingListParams &params)
+{
+ if (_countFile != NULL) {
+ PostingListParams countParams;
+ _countFile->getParams(countParams);
+ params = countParams;
+ uint32_t countDocIdLimit = 0;
+ uint32_t countMinChunkDocs = 0;
+ countParams.get("docIdLimit", countDocIdLimit);
+ countParams.get("minChunkDocs", countMinChunkDocs);
+ assert(_docIdLimit == countDocIdLimit);
+ assert(_minChunkDocs == countMinChunkDocs);
+ } else {
+ params.clear();
+ params.set("docIdLimit", _docIdLimit);
+ params.set("minChunkDocs", _minChunkDocs);
+ }
+ params.set("minSkipDocs", _minSkipDocs);
+}
+
+
+void
+Zc4PostingSeqRead::getFeatureParams(PostingListParams &params)
+{
+ _decodeContext->getParams(params);
+}
+
+
+void
+Zc4PostingSeqRead::readHeader(void)
+{
+ FeatureDecodeContextBE &d = *_decodeContext;
+ const vespalib::string &myId = _dynamicK ? myId5 : myId4;
+
+ vespalib::FileHeader header;
+ d.readHeader(header, _file.getSize());
+ uint32_t headerLen = header.getSize();
+ assert(header.hasTag("frozen"));
+ assert(header.hasTag("fileBitSize"));
+ assert(header.hasTag("format.0"));
+ assert(header.hasTag("format.1"));
+ assert(!header.hasTag("format.2"));
+ assert(header.hasTag("numWords"));
+ assert(header.hasTag("minChunkDocs"));
+ assert(header.hasTag("docIdLimit"));
+ assert(header.hasTag("minSkipDocs"));
+ assert(header.hasTag("endian"));
+ bool completed = header.getTag("frozen").asInteger() != 0;
+ _fileBitSize = header.getTag("fileBitSize").asInteger();
+ headerLen += (-headerLen & 7);
+ assert(completed);
+ (void) completed;
+ assert(_fileBitSize >= 8 * headerLen);
+ assert(header.getTag("format.0").asString() == myId);
+ (void) myId;
+ assert(header.getTag("format.1").asString() == d.getIdentifier());
+ _numWords = header.getTag("numWords").asInteger();
+ _minChunkDocs = header.getTag("minChunkDocs").asInteger();
+ _docIdLimit = header.getTag("docIdLimit").asInteger();
+ _minSkipDocs = header.getTag("minSkipDocs").asInteger();
+ assert(header.getTag("endian").asString() == "big");
+ // Read feature decoding specific subheader
+ d.readHeader(header, "features.");
+ // Align on 64-bit unit
+ d.smallAlign(64);
+ assert(d.getReadOffset() == headerLen * 8);
+ _headerBitLen = d.getReadOffset();
+}
+
+
+const vespalib::string &
+Zc4PostingSeqRead::getIdentifier(void)
+{
+ return myId4;
+}
+
+
+uint64_t
+Zc4PostingSeqRead::getCurrentPostingOffset(void) const
+{
+ FeatureDecodeContextBE &d = *_decodeContext;
+ return d.getReadOffset() - _headerBitLen;
+}
+
+
+void
+Zc4PostingSeqRead::setPostingOffset(uint64_t offset,
+ uint64_t endOffset,
+ uint64_t readAheadOffset)
+{
+ assert(_residue == 0); // Only to be called between posting lists
+
+ FeatureDecodeContextBE &d = *_decodeContext;
+
+ _rangeEndOffset = endOffset + _headerBitLen;
+ _readAheadEndOffset = readAheadOffset + _headerBitLen;
+ _readContext.setStopOffset(_readAheadEndOffset, false);
+ uint64_t newOffset = offset + _headerBitLen;
+ if (newOffset != d.getReadOffset()) {
+ _readContext.setPosition(newOffset);
+ assert(newOffset == d.getReadOffset());
+ _readContext.readComprBuffer();
+ }
+}
+
+
+Zc4PostingSeqWrite::
+Zc4PostingSeqWrite(PostingListCountFileSeqWrite *countFile)
+ : PostingListFileSeqWrite(),
+ _encodeContext(),
+ _writeContext(_encodeContext),
+ _file(),
+ _minChunkDocs(1 << 30),
+ _minSkipDocs(64),
+ _docIdLimit(10000000),
+ _docIds(),
+ _encodeFeatures(NULL),
+ _featureOffset(0),
+ _featureWriteContext(sizeof(uint64_t)),
+ _writePos(0),
+ _dynamicK(false),
+ _zcDocIds(),
+ _l1Skip(),
+ _l2Skip(),
+ _l3Skip(),
+ _l4Skip(),
+ _numWords(0),
+ _fileBitSize(0),
+ _countFile(countFile)
+{
+ _encodeContext.setWriteContext(&_writeContext);
+
+ if (_countFile != NULL) {
+ PostingListParams params;
+ _countFile->getParams(params);
+ params.get("docIdLimit", _docIdLimit);
+ params.get("minChunkDocs", _minChunkDocs);
+ }
+ _featureWriteContext.allocComprBuf(64, 1);
+}
+
+
+Zc4PostingSeqWrite::~Zc4PostingSeqWrite(void)
+{
+}
+
+
+void
+Zc4PostingSeqWrite::
+writeDocIdAndFeatures(const DocIdAndFeatures &features)
+{
+ if (__builtin_expect(_docIds.size() >= _minChunkDocs, false))
+ flushChunk();
+ _encodeFeatures->writeFeatures(features);
+ uint64_t writeOffset = _encodeFeatures->getWriteOffset();
+ uint64_t featureSize = writeOffset - _featureOffset;
+ assert(static_cast<uint32_t>(featureSize) == featureSize);
+ _docIds.push_back(std::make_pair(features._docId,
+ static_cast<uint32_t>(featureSize)));
+ _featureOffset = writeOffset;
+}
+
+
+void
+Zc4PostingSeqWrite::flushWord(void)
+{
+ if (__builtin_expect(_docIds.size() >= _minSkipDocs ||
+ !_counts._segments.empty(), false)) {
+ // Use skip information if enough documents of chunking has happened
+ flushWordWithSkip(false);
+ _numWords++;
+ } else if (_docIds.size() > 0) {
+ flushWordNoSkip();
+ _numWords++;
+ }
+
+ EncodeContext &e = _encodeContext;
+ uint64_t writePos = e.getWriteOffset();
+
+ _counts._bitLength = writePos - _writePos;
+ _writePos = writePos;
+}
+
+
+void
+Zc4PostingSeqWrite::checkPointWrite(nbostream &out)
+{
+ _writeContext.writeComprBuffer(true); // Also flush slack
+ out << _numWords;
+ _writeContext.checkPointWrite(out);
+ _featureWriteContext.checkPointWrite(out);
+ out.saveVector(_docIds) << _writePos << _counts;
+ _file.Sync();
+}
+
+
+void
+Zc4PostingSeqWrite::checkPointRead(nbostream &in)
+{
+ in >> _numWords;
+ _writeContext.checkPointRead(in);
+ _featureWriteContext.checkPointRead(in);
+ _featureOffset = _encodeFeatures->getWriteOffset();
+ in.restoreVector(_docIds) >> _writePos >> _counts;
+}
+
+
+uint32_t
+Zc4PostingSeqWrite::readHeader(const vespalib::string &name)
+{
+ EncodeContext &f = *_encodeFeatures;
+
+ FeatureDecodeContextBE d;
+ ComprFileReadContext drc(d);
+ FastOS_File file;
+ const vespalib::string &myId = _dynamicK ? myId5 : myId4;
+
+ d.setReadContext(&drc);
+ bool res = file.OpenReadOnly(name.c_str());
+ if (!res) {
+ LOG(error, "Could not open %s for reading file header: %s",
+ name.c_str(), getLastErrorString().c_str());
+ abort();
+ }
+
+ drc.setFile(&file);
+ drc.setFileSize(file.GetSize());
+ drc.allocComprBuf(512, 32768u);
+ d.emptyBuffer(0);
+ drc.readComprBuffer();
+
+ vespalib::FileHeader header;
+ d.readHeader(header, file.getSize());
+ uint32_t headerLen = header.getSize();
+ assert(header.hasTag("frozen"));
+ assert(header.hasTag("fileBitSize"));
+ assert(header.hasTag("format.0"));
+ assert(header.hasTag("format.1"));
+ assert(!header.hasTag("format.2"));
+ assert(header.hasTag("numWords"));
+ assert(header.hasTag("minChunkDocs"));
+ assert(header.hasTag("docIdLimit"));
+ assert(header.hasTag("minSkipDocs"));
+ assert(header.hasTag("endian"));
+ bool headerCompleted = header.getTag("frozen").asInteger() != 0;
+ uint64_t headerFileBitSize = header.getTag("fileBitSize").asInteger();
+ headerLen += (-headerLen & 7);
+ assert(!headerCompleted || headerFileBitSize >= headerLen * 8);
+ (void) headerCompleted;
+ (void) headerFileBitSize;
+ assert(header.getTag("format.0").asString() == myId);
+ (void) myId;
+ assert(header.getTag("format.1").asString() == f.getIdentifier());
+ _minChunkDocs = header.getTag("minChunkDocs").asInteger();
+ _docIdLimit = header.getTag("docIdLimit").asInteger();
+ _minSkipDocs = header.getTag("minSkipDocs").asInteger();
+ assert(header.getTag("endian").asString() == "big");
+ // Read feature decoding specific subheader using helper decode context
+ f.readHeader(header, "features.");
+ // Align on 64-bit unit
+ d.smallAlign(64);
+ assert(d.getReadOffset() == headerLen * 8);
+ file.Close();
+ return headerLen;
+}
+
+
+void
+Zc4PostingSeqWrite::makeHeader(const FileHeaderContext &fileHeaderContext)
+{
+ EncodeContext &f = *_encodeFeatures;
+ EncodeContext &e = _encodeContext;
+ ComprFileWriteContext &wce = _writeContext;
+
+ const vespalib::string &myId = _dynamicK ? myId5 : myId4;
+ vespalib::FileHeader header;
+
+ typedef vespalib::GenericHeader::Tag Tag;
+ fileHeaderContext.addTags(header, _file.GetFileName());
+ header.putTag(Tag("frozen", 0));
+ header.putTag(Tag("fileBitSize", 0));
+ header.putTag(Tag("format.0", myId));
+ header.putTag(Tag("format.1", f.getIdentifier()));
+ header.putTag(Tag("numWords", 0));
+ header.putTag(Tag("minChunkDocs", _minChunkDocs));
+ header.putTag(Tag("docIdLimit", _docIdLimit));
+ header.putTag(Tag("minSkipDocs", _minSkipDocs));
+ header.putTag(Tag("endian", "big"));
+ header.putTag(Tag("desc", "Posting list file"));
+
+ f.writeHeader(header, "features.");
+ e.setupWrite(wce);
+ e.writeHeader(header);
+ e.smallAlign(64);
+ e.flush();
+ uint32_t headerLen = header.getSize();
+ headerLen += (-headerLen & 7); // Then to uint64_t
+ assert(e.getWriteOffset() == headerLen * 8);
+ assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned
+}
+
+
+void
+Zc4PostingSeqWrite::updateHeader(void)
+{
+ vespalib::FileHeader h;
+ FastOS_File f;
+ f.OpenReadWrite(_file.GetFileName());
+ h.readFile(f);
+ FileHeaderContext::setFreezeTime(h);
+ typedef vespalib::GenericHeader::Tag Tag;
+ h.putTag(Tag("frozen", 1));
+ h.putTag(Tag("fileBitSize", _fileBitSize));
+ h.putTag(Tag("numWords", _numWords));
+ h.rewriteFile(f);
+ f.Sync();
+ f.Close();
+}
+
+
+bool
+Zc4PostingSeqWrite::open(const vespalib::string &name,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const FileHeaderContext &fileHeaderContext)
+{
+ if (tuneFileWrite.getWantSyncWrites())
+ _file.EnableSyncWrites();
+ if (tuneFileWrite.getWantDirectIO())
+ _file.EnableDirectIO();
+ bool ok = _file.OpenWriteOnly(name.c_str());
+ if (!ok) {
+ LOG(error, "could not open '%s' for writing: %s",
+ _file.GetFileName(), getLastErrorString().c_str());
+ // XXX may need to do something more here, I don't know what...
+ return false;
+ }
+ uint64_t fileSize = _file.GetSize();
+ uint64_t bufferStartFilePos = _writeContext.getBufferStartFilePos();
+ assert(fileSize >= bufferStartFilePos);
+ (void) fileSize;
+ _file.SetSize(bufferStartFilePos);
+ assert(bufferStartFilePos == static_cast<uint64_t>(_file.GetPosition()));
+ _writeContext.setFile(&_file);
+ search::ComprBuffer &cb = _writeContext;
+ EncodeContext &e = _encodeContext;
+ _writeContext.allocComprBuf(65536u, 32768u);
+ if (bufferStartFilePos == 0) {
+ e.setupWrite(cb);
+ // Reset accumulated stats
+ _fileBitSize = 0;
+ _numWords = 0;
+ // Start write initial header
+ makeHeader(fileHeaderContext);
+ _encodeFeatures->setupWrite(_featureWriteContext);
+ // end write initial header
+ _writePos = e.getWriteOffset();
+ } else {
+ assert(bufferStartFilePos >= 8u);
+ uint32_t headerSize = readHeader(name); // Read existing header
+ assert(bufferStartFilePos >= headerSize);
+ (void) headerSize;
+ e.afterWrite(_writeContext, 0, bufferStartFilePos);
+ }
+
+ // Ensure that some space is initially available in encoding buffers
+ _zcDocIds.maybeExpand();
+ _l1Skip.maybeExpand();
+ _l2Skip.maybeExpand();
+ _l3Skip.maybeExpand();
+ _l4Skip.maybeExpand();
+ return true; // Assume success
+}
+
+
+bool
+Zc4PostingSeqWrite::close(void)
+{
+ EncodeContext &e = _encodeContext;
+
+ _fileBitSize = e.getWriteOffset();
+ // Write some pad bits to avoid decompression readahead going past
+ // memory mapped file during search and into SIGSEGV territory.
+
+ // First pad to 64 bits alignment.
+ e.smallAlign(64);
+ e.writeComprBufferIfNeeded();
+
+ // Then write 128 more bits. This allows for 64-bit decoding
+ // with a readbits that always leaves a nonzero preRead
+ e.padBits(128);
+ e.alignDirectIO();
+ e.flush();
+ e.writeComprBuffer(); // Also flushes slack
+
+ _writeContext.dropComprBuf();
+ _file.Sync();
+ _file.Close();
+ _writeContext.setFile(NULL);
+ updateHeader();
+ return true;
+}
+
+
+
+void
+Zc4PostingSeqWrite::
+setParams(const PostingListParams &params)
+{
+ if (_countFile != NULL)
+ _countFile->setParams(params);
+ params.get("docIdLimit", _docIdLimit);
+ params.get("minChunkDocs", _minChunkDocs);
+ params.get("minSkipDocs", _minSkipDocs);
+}
+
+
+void
+Zc4PostingSeqWrite::
+getParams(PostingListParams &params)
+{
+ if (_countFile != NULL) {
+ PostingListParams countParams;
+ _countFile->getParams(countParams);
+ params = countParams;
+ uint32_t countDocIdLimit = 0;
+ uint32_t countMinChunkDocs = 0;
+ countParams.get("docIdLimit", countDocIdLimit);
+ countParams.get("minChunkDocs", countMinChunkDocs);
+ assert(_docIdLimit == countDocIdLimit);
+ assert(_minChunkDocs == countMinChunkDocs);
+ } else {
+ params.clear();
+ params.set("docIdLimit", _docIdLimit);
+ params.set("minChunkDocs", _minChunkDocs);
+ }
+ params.set("minSkipDocs", _minSkipDocs);
+}
+
+
+void
+Zc4PostingSeqWrite::
+setFeatureParams(const PostingListParams &params)
+{
+ _encodeFeatures->setParams(params);
+}
+
+
+void
+Zc4PostingSeqWrite::
+getFeatureParams(PostingListParams &params)
+{
+ _encodeFeatures->getParams(params);
+}
+
+
+void
+Zc4PostingSeqWrite::flushChunk(void)
+{
+ /* TODO: Flush chunk and prepare for new (possible short) chunk */
+ flushWordWithSkip(true);
+}
+
+#define L1SKIPSTRIDE 16
+#define L2SKIPSTRIDE 8
+#define L3SKIPSTRIDE 8
+#define L4SKIPSTRIDE 8
+
+
+void
+Zc4PostingSeqWrite::calcSkipInfo(void)
+{
+ uint32_t lastDocId = 0u;
+ uint32_t lastL1SkipDocId = 0u;
+ uint32_t lastL1SkipDocIdPos = 0;
+ uint32_t lastL1SkipFeaturePos = 0;
+ uint32_t lastL2SkipDocId = 0u;
+ uint32_t lastL2SkipDocIdPos = 0;
+ uint32_t lastL2SkipFeaturePos = 0;
+ uint32_t lastL2SkipL1SkipPos = 0;
+ uint32_t lastL3SkipDocId = 0u;
+ uint32_t lastL3SkipDocIdPos = 0;
+ uint32_t lastL3SkipFeaturePos = 0;
+ uint32_t lastL3SkipL1SkipPos = 0;
+ uint32_t lastL3SkipL2SkipPos = 0;
+ uint32_t lastL4SkipDocId = 0u;
+ uint32_t lastL4SkipDocIdPos = 0;
+ uint32_t lastL4SkipFeaturePos = 0;
+ uint32_t lastL4SkipL1SkipPos = 0;
+ uint32_t lastL4SkipL2SkipPos = 0;
+ uint32_t lastL4SkipL3SkipPos = 0;
+ unsigned int l1SkipCnt = 0;
+ unsigned int l2SkipCnt = 0;
+ unsigned int l3SkipCnt = 0;
+ unsigned int l4SkipCnt = 0;
+ uint64_t featurePos = 0;
+
+ std::vector<DocIdAndFeatureSize>::const_iterator dit = _docIds.begin();
+ std::vector<DocIdAndFeatureSize>::const_iterator dite = _docIds.end();
+
+ if (!_counts._segments.empty()) {
+ lastDocId = _counts._segments.back()._lastDoc;
+ lastL1SkipDocId = lastDocId;
+ lastL2SkipDocId = lastDocId;
+ lastL3SkipDocId = lastDocId;
+ lastL4SkipDocId = lastDocId;
+ }
+
+ for (; dit != dite; ++dit) {
+ if (l1SkipCnt >= L1SKIPSTRIDE) {
+ // L1 docid delta
+ uint32_t docIdDelta = lastDocId - lastL1SkipDocId;
+ assert(static_cast<int32_t>(docIdDelta) > 0);
+ _l1Skip.encode(docIdDelta - 1);
+ lastL1SkipDocId = lastDocId;
+ // L1 docid pos
+ uint64_t docIdPos = _zcDocIds.size();
+ _l1Skip.encode(docIdPos - lastL1SkipDocIdPos - 1);
+ lastL1SkipDocIdPos = docIdPos;
+ // L1 features pos
+ _l1Skip.encode(featurePos - lastL1SkipFeaturePos - 1);
+ lastL1SkipFeaturePos = featurePos;
+ l1SkipCnt = 0;
+ ++l2SkipCnt;
+ if (l2SkipCnt >= L2SKIPSTRIDE) {
+ // L2 docid delta
+ docIdDelta = lastDocId - lastL2SkipDocId;
+ assert(static_cast<int32_t>(docIdDelta) > 0);
+ _l2Skip.encode(docIdDelta - 1);
+ lastL2SkipDocId = lastDocId;
+ // L2 docid pos
+ docIdPos = _zcDocIds.size();
+ _l2Skip.encode(docIdPos - lastL2SkipDocIdPos - 1);
+ lastL2SkipDocIdPos = docIdPos;
+ // L2 features pos
+ _l2Skip.encode(featurePos - lastL2SkipFeaturePos - 1);
+ lastL2SkipFeaturePos = featurePos;
+ // L2 L1Skip pos
+ uint64_t l1SkipPos = _l1Skip.size();
+ _l2Skip.encode(l1SkipPos - lastL2SkipL1SkipPos - 1);
+ lastL2SkipL1SkipPos = l1SkipPos;
+ l2SkipCnt = 0;
+ ++l3SkipCnt;
+ if (l3SkipCnt >= L3SKIPSTRIDE) {
+ // L3 docid delta
+ docIdDelta = lastDocId - lastL3SkipDocId;
+ assert(static_cast<int32_t>(docIdDelta) > 0);
+ _l3Skip.encode(docIdDelta - 1);
+ lastL3SkipDocId = lastDocId;
+ // L3 docid pos
+ docIdPos = _zcDocIds.size();
+ _l3Skip.encode(docIdPos - lastL3SkipDocIdPos - 1);
+ lastL3SkipDocIdPos = docIdPos;
+ // L3 features pos
+ _l3Skip.encode(featurePos - lastL3SkipFeaturePos - 1);
+ lastL3SkipFeaturePos = featurePos;
+ // L3 L1Skip pos
+ l1SkipPos = _l1Skip.size();
+ _l3Skip.encode(l1SkipPos - lastL3SkipL1SkipPos - 1);
+ lastL3SkipL1SkipPos = l1SkipPos;
+ // L3 L2Skip pos
+ uint64_t l2SkipPos = _l2Skip.size();
+ _l3Skip.encode(l2SkipPos - lastL3SkipL2SkipPos - 1);
+ lastL3SkipL2SkipPos = l2SkipPos;
+ l3SkipCnt = 0;
+ ++l4SkipCnt;
+ if (l4SkipCnt >= L4SKIPSTRIDE) {
+ // L4 docid delta
+ docIdDelta = lastDocId - lastL4SkipDocId;
+ assert(static_cast<int32_t>(docIdDelta) > 0);
+ _l4Skip.encode(docIdDelta - 1);
+ lastL4SkipDocId = lastDocId;
+ // L4 docid pos
+ docIdPos = _zcDocIds.size();
+ _l4Skip.encode(docIdPos - lastL4SkipDocIdPos - 1);
+ lastL4SkipDocIdPos = docIdPos;
+ // L4 features pos
+ _l4Skip.encode(featurePos - lastL4SkipFeaturePos - 1);
+ lastL4SkipFeaturePos = featurePos;
+ // L4 L1Skip pos
+ l1SkipPos = _l1Skip.size();
+ _l4Skip.encode(l1SkipPos - lastL4SkipL1SkipPos - 1);
+ lastL4SkipL1SkipPos = l1SkipPos;
+ // L4 L2Skip pos
+ l2SkipPos = _l2Skip.size();
+ _l4Skip.encode(l2SkipPos - lastL4SkipL2SkipPos - 1);
+ lastL4SkipL2SkipPos = l2SkipPos;
+ // L4 L3Skip pos
+ uint64_t l3SkipPos = _l3Skip.size();
+ _l4Skip.encode(l3SkipPos - lastL4SkipL3SkipPos - 1);
+ lastL4SkipL3SkipPos = l3SkipPos;
+ l4SkipCnt = 0;
+ }
+ }
+ }
+ }
+ uint32_t docId = dit->first;
+ featurePos += dit->second;
+ _zcDocIds.encode(docId - lastDocId - 1);
+ lastDocId = docId;
+ ++l1SkipCnt;
+ }
+ // Extra partial entries for skip tables to simplify iterator during search
+ if (_l1Skip.size() > 0)
+ _l1Skip.encode(lastDocId - lastL1SkipDocId - 1);
+ if (_l2Skip.size() > 0)
+ _l2Skip.encode(lastDocId - lastL2SkipDocId - 1);
+ if (_l3Skip.size() > 0)
+ _l3Skip.encode(lastDocId - lastL3SkipDocId - 1);
+ if (_l4Skip.size() > 0)
+ _l4Skip.encode(lastDocId - lastL4SkipDocId - 1);
+}
+
+
+void
+Zc4PostingSeqWrite::flushWordWithSkip(bool hasMore)
+{
+ assert(_docIds.size() >= _minSkipDocs || !_counts._segments.empty());
+
+ _encodeFeatures->flush();
+ EncodeContext &e = _encodeContext;
+
+ uint32_t numDocs = _docIds.size();
+
+ e.encodeExpGolomb(numDocs - 1, K_VALUE_ZCPOSTING_NUMDOCS);
+ if (numDocs >= _minChunkDocs)
+ e.writeBits((hasMore ? 1 : 0), 1);
+
+ // TODO: Calculate docids size, possible also k parameter */
+ calcSkipInfo();
+
+ uint32_t docIdsSize = _zcDocIds.size();
+ uint32_t l1SkipSize = _l1Skip.size();
+ uint32_t l2SkipSize = _l2Skip.size();
+ uint32_t l3SkipSize = _l3Skip.size();
+ uint32_t l4SkipSize = _l4Skip.size();
+
+ e.encodeExpGolomb(docIdsSize - 1, K_VALUE_ZCPOSTING_DOCIDSSIZE);
+ e.encodeExpGolomb(l1SkipSize, K_VALUE_ZCPOSTING_L1SKIPSIZE);
+ if (l1SkipSize != 0) {
+ e.encodeExpGolomb(l2SkipSize, K_VALUE_ZCPOSTING_L2SKIPSIZE);
+ if (l2SkipSize != 0) {
+ e.encodeExpGolomb(l3SkipSize, K_VALUE_ZCPOSTING_L3SKIPSIZE);
+ if (l3SkipSize != 0) {
+ e.encodeExpGolomb(l4SkipSize, K_VALUE_ZCPOSTING_L4SKIPSIZE);
+ }
+ }
+ }
+ e.encodeExpGolomb(_featureOffset, K_VALUE_ZCPOSTING_FEATURESSIZE);
+
+ // Encode last document id in chunk or word.
+ if (_dynamicK) {
+ uint32_t docIdK = e.calcDocIdK((_counts._segments.empty() &&
+ !hasMore) ?
+ numDocs : 1,
+ _docIdLimit);
+ e.encodeExpGolomb(_docIdLimit - 1 - _docIds.back().first,
+ docIdK);
+ } else {
+ e.encodeExpGolomb(_docIdLimit - 1 - _docIds.back().first,
+ K_VALUE_ZCPOSTING_LASTDOCID);
+ }
+
+ e.smallAlign(8); // Byte align
+
+ uint8_t *docIds = _zcDocIds._mallocStart;
+ e.writeBits(reinterpret_cast<const uint64_t *>(docIds),
+ 0,
+ docIdsSize * 8);
+ if (l1SkipSize > 0) {
+ uint8_t *l1Skip = _l1Skip._mallocStart;
+ e.writeBits(reinterpret_cast<const uint64_t *>(l1Skip),
+ 0,
+ l1SkipSize * 8);
+ }
+ if (l2SkipSize > 0) {
+ uint8_t *l2Skip = _l2Skip._mallocStart;
+ e.writeBits(reinterpret_cast<const uint64_t *>(l2Skip),
+ 0,
+ l2SkipSize * 8);
+ }
+ if (l3SkipSize > 0) {
+ uint8_t *l3Skip = _l3Skip._mallocStart;
+ e.writeBits(reinterpret_cast<const uint64_t *>(l3Skip),
+ 0,
+ l3SkipSize * 8);
+ }
+ if (l4SkipSize > 0) {
+ uint8_t *l4Skip = _l4Skip._mallocStart;
+ e.writeBits(reinterpret_cast<const uint64_t *>(l4Skip),
+ 0,
+ l4SkipSize * 8);
+ }
+
+ // Write features
+ e.writeBits(static_cast<const uint64_t *>(_featureWriteContext._comprBuf),
+ 0,
+ _featureOffset);
+
+ _counts._numDocs += numDocs;
+ if (hasMore || !_counts._segments.empty()) {
+ uint64_t writePos = e.getWriteOffset();
+ PostingListCounts::Segment seg;
+ seg._bitLength = writePos - (_writePos + _counts._bitLength);
+ seg._numDocs = numDocs;
+ seg._lastDoc = _docIds.back().first;
+ _counts._segments.push_back(seg);
+ _counts._bitLength += seg._bitLength;
+ }
+ // reset tables in preparation for next word or next chunk
+ _zcDocIds.clear();
+ _l1Skip.clear();
+ _l2Skip.clear();
+ _l3Skip.clear();
+ _l4Skip.clear();
+ resetWord();
+}
+
+
+void
+Zc4PostingSeqWrite::flushWordNoSkip(void)
+{
+ // Too few document ids for skip info.
+ assert(_docIds.size() < _minSkipDocs && _counts._segments.empty());
+
+ _encodeFeatures->flush();
+ EncodeContext &e = _encodeContext;
+ uint32_t numDocs = _docIds.size();
+
+ e.encodeExpGolomb(numDocs - 1, K_VALUE_ZCPOSTING_NUMDOCS);
+
+ uint32_t baseDocId = 1;
+ const uint64_t *features =
+ static_cast<const uint64_t *>(_featureWriteContext._comprBuf);
+ uint64_t featureOffset = 0;
+
+ std::vector<DocIdAndFeatureSize>::const_iterator dit = _docIds.begin();
+ std::vector<DocIdAndFeatureSize>::const_iterator dite = _docIds.end();
+
+ for (; dit != dite; ++dit) {
+ uint32_t docId = dit->first;
+ uint32_t featureSize = dit->second;
+ e.encodeExpGolomb(docId - baseDocId, K_VALUE_ZCPOSTING_DELTA_DOCID);
+ baseDocId = docId + 1;
+ e.writeBits(features + (featureOffset >> 6),
+ featureOffset & 63,
+ featureSize);
+ featureOffset += featureSize;
+ }
+ _counts._numDocs += numDocs;
+ resetWord();
+}
+
+
+void
+Zc4PostingSeqWrite::resetWord(void)
+{
+ _docIds.clear();
+ _encodeFeatures->setupWrite(_featureWriteContext);
+ _featureOffset = 0;
+}
+
+
+ZcPostingSeqRead::ZcPostingSeqRead(PostingListCountFileSeqRead *countFile)
+ : Zc4PostingSeqRead(countFile)
+{
+ _dynamicK = true;
+}
+
+
+void
+ZcPostingSeqRead::
+readDocIdAndFeatures(DocIdAndFeatures &features)
+{
+ if (_residue == 0 && !_hasMore) {
+ if (_rangeEndOffset != 0) {
+ DecodeContext &d = *_decodeContext;
+ uint64_t curOffset = d.getReadOffset();
+ assert(curOffset <= _rangeEndOffset);
+ if (curOffset < _rangeEndOffset)
+ readWordStart();
+ }
+ if (_residue == 0) {
+ // Don't read past end of posting list.
+ features.clear(static_cast<uint32_t>(-1));
+ return;
+ }
+ }
+ if (_lastDocId > 0) {
+ readCommonWordDocIdAndFeatures(features);
+ return;
+ }
+ // Interleaves docid & features
+ typedef FeatureEncodeContextBE EC;
+ DecodeContext &d = *_decodeContext;
+ uint32_t length;
+ uint64_t val64;
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
+
+ UC64BE_DECODEEXPGOLOMB_SMALL_NS(o,
+ _docIdK,
+ EC);
+ uint32_t docId = _prevDocId + 1 + val64;
+ features._docId = docId;
+ _prevDocId = docId;
+ UC64_DECODECONTEXT_STORE(o, d._);
+ if (__builtin_expect(oCompr >= d._valE, false)) {
+ _readContext.readComprBuffer();
+ }
+ _decodeContext->readFeatures(features);
+ --_residue;
+}
+
+
+const vespalib::string &
+ZcPostingSeqRead::getIdentifier(void)
+{
+ return myId5;
+}
+
+
+ZcPostingSeqWrite::ZcPostingSeqWrite(PostingListCountFileSeqWrite *countFile)
+ : Zc4PostingSeqWrite(countFile)
+{
+ _dynamicK = true;
+}
+
+
+void
+ZcPostingSeqWrite::flushWordNoSkip(void)
+{
+ // Too few document ids for skip info.
+ assert(_docIds.size() < _minSkipDocs && _counts._segments.empty());
+
+ _encodeFeatures->flush();
+ EncodeContext &e = _encodeContext;
+ uint32_t numDocs = _docIds.size();
+
+ e.encodeExpGolomb(numDocs - 1, K_VALUE_ZCPOSTING_NUMDOCS);
+
+ uint32_t docIdK = e.calcDocIdK(numDocs, _docIdLimit);
+
+ uint32_t baseDocId = 1;
+ const uint64_t *features =
+ static_cast<const uint64_t *>(_featureWriteContext._comprBuf);
+ uint64_t featureOffset = 0;
+
+ std::vector<DocIdAndFeatureSize>::const_iterator dit = _docIds.begin();
+ std::vector<DocIdAndFeatureSize>::const_iterator dite = _docIds.end();
+
+ for (; dit != dite; ++dit) {
+ uint32_t docId = dit->first;
+ uint32_t featureSize = dit->second;
+ e.encodeExpGolomb(docId - baseDocId, docIdK);
+ baseDocId = docId + 1;
+ e.writeBits(features + (featureOffset >> 6),
+ featureOffset & 63,
+ featureSize);
+ featureOffset += featureSize;
+ }
+ _counts._numDocs += numDocs;
+ resetWord();
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposting.h b/searchlib/src/vespa/searchlib/diskindex/zcposting.h
new file mode 100644
index 00000000000..447216d84cb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/zcposting.h
@@ -0,0 +1,495 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/index/postinglistfile.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+
+namespace search
+{
+
+namespace index
+{
+
+class PostingListCountFileSeqRead;
+
+class PostingListCountFileSeqWrite;
+
+}
+
+namespace diskindex
+{
+
+class ZcBuf
+{
+public:
+ uint8_t *_valI;
+ uint8_t *_valE;
+ uint8_t *_mallocStart;
+ size_t _mallocSize;
+
+ ZcBuf(void)
+ : _valI(NULL),
+ _valE(NULL),
+ _mallocStart(NULL),
+ _mallocSize(0)
+ {
+ }
+
+ ~ZcBuf(void)
+ {
+ free(_mallocStart);
+ }
+
+
+ static size_t
+ zcSlack(void)
+ {
+ return 4;
+ }
+
+ void
+ clearReserve(size_t reserveSize);
+
+ void
+ clear(void)
+ {
+ _valI = _mallocStart;
+ }
+
+ size_t
+ capacity(void) const
+ {
+ return _valE - _mallocStart;
+ }
+
+ size_t
+ size(void) const
+ {
+ return _valI - _mallocStart;
+ }
+
+ size_t
+ pos(void) const
+ {
+ return _valI - _mallocStart;
+ }
+
+ void
+ expand(void);
+
+ void
+ maybeExpand(void)
+ {
+ if (__builtin_expect(_valI >= _valE, false))
+ expand();
+ }
+
+ void
+ encode(uint32_t num)
+ {
+ for (;;) {
+ if (num < (1 << 7)) {
+ *_valI++ = num;
+ break;
+ }
+ *_valI++ = (num & ((1 << 7) - 1)) | (1 << 7);
+ num >>= 7;
+ }
+ maybeExpand();
+ }
+
+ uint32_t
+ decode(void)
+ {
+ uint32_t res;
+ uint8_t *valI = _valI;
+ if (__builtin_expect(valI[0] < (1 << 7), true)) {
+ res = valI[0];
+ valI += 1;
+ } else if (__builtin_expect(valI[1] < (1 << 7), true)) {
+ res = (valI[0] & ((1 << 7) - 1)) +
+ (valI[1] << 7);
+ valI += 2;
+ } else if (__builtin_expect(valI[2] < (1 << 7), true)) {
+ res = (valI[0] & ((1 << 7) - 1)) +
+ ((valI[1] & ((1 << 7) - 1)) << 7) +
+ (valI[2] << 14);
+ valI += 3;
+ } else if (__builtin_expect(valI[3] < (1 << 7), true)) {
+ res = (valI[0] & ((1 << 7) - 1)) +
+ ((valI[1] & ((1 << 7) - 1)) << 7) +
+ ((valI[2] & ((1 << 7) - 1)) << 14) +
+ (valI[3] << 21);
+ valI += 4;
+ } else {
+ res = (valI[0] & ((1 << 7) - 1)) +
+ ((valI[1] & ((1 << 7) - 1)) << 7) +
+ ((valI[2] & ((1 << 7) - 1)) << 14) +
+ ((valI[3] & ((1 << 7) - 1)) << 21) +
+ (valI[4] << 28);
+ valI += 5;
+ }
+ _valI = valI;
+ return res;
+ }
+};
+
+class Zc4PostingSeqRead : public index::PostingListFileSeqRead
+{
+ Zc4PostingSeqRead(const Zc4PostingSeqRead &);
+
+ Zc4PostingSeqRead &
+ operator=(const Zc4PostingSeqRead &);
+
+protected:
+ typedef bitcompression::FeatureDecodeContextBE DecodeContext;
+ typedef bitcompression::FeatureEncodeContextBE EncodeContext;
+
+ DecodeContext *_decodeContext;
+ uint32_t _docIdK;
+ uint32_t _prevDocId; // Previous document id
+ uint32_t _numDocs; // Documents in chunk or word
+ search::ComprFileReadContext _readContext;
+ FastOS_File _file;
+ bool _hasMore;
+ bool _dynamicK; // Caclulate EG compression parameters ?
+ uint32_t _lastDocId; // last document in chunk or word
+ uint32_t _minChunkDocs; // # of documents needed for chunking
+ uint32_t _minSkipDocs; // # of documents needed for skipping
+ uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit)
+
+ ZcBuf _zcDocIds; // Document id deltas
+ ZcBuf _l1Skip; // L1 skip info
+ ZcBuf _l2Skip; // L2 skip info
+ ZcBuf _l3Skip; // L3 skip info
+ ZcBuf _l4Skip; // L4 skip info
+
+ uint64_t _numWords; // Number of words in file
+ uint64_t _fileBitSize;
+ uint32_t _chunkNo; // Chunk number
+
+ // Variables for validating skip information while reading
+ uint32_t _l1SkipDocId;
+ uint32_t _l1SkipDocIdPos;
+ uint64_t _l1SkipFeaturesPos;
+ uint32_t _l2SkipDocId;
+ uint32_t _l2SkipDocIdPos;
+ uint32_t _l2SkipL1SkipPos;
+ uint64_t _l2SkipFeaturesPos;
+ uint32_t _l3SkipDocId;
+ uint32_t _l3SkipDocIdPos;
+ uint32_t _l3SkipL1SkipPos;
+ uint32_t _l3SkipL2SkipPos;
+ uint64_t _l3SkipFeaturesPos;
+ uint32_t _l4SkipDocId;
+ uint32_t _l4SkipDocIdPos;
+ uint32_t _l4SkipL1SkipPos;
+ uint32_t _l4SkipL2SkipPos;
+ uint32_t _l4SkipL3SkipPos;
+ uint64_t _l4SkipFeaturesPos;
+
+ // Variable for validating chunk information while reading
+ uint64_t _featuresSize;
+ index::PostingListCountFileSeqRead *const _countFile;
+
+ uint64_t _headerBitLen; // Size of file header in bits
+ uint64_t _rangeEndOffset; // End offset for word pair
+ uint64_t _readAheadEndOffset;// Readahead end offset for word pair
+ uint64_t _wordStart; // last word header position
+ uint64_t _checkPointPos; // file position when checkpointing
+ uint32_t _residue; // Number of unread documents after word header
+ uint32_t _checkPointChunkNo; // _chunkNo when checkpointing
+ uint32_t _checkPointResidue;// _residue when checkpointing
+ bool _checkPointHasMore; // _hasMore when checkpointing
+public:
+ Zc4PostingSeqRead(index::PostingListCountFileSeqRead *countFile);
+
+ virtual
+ ~Zc4PostingSeqRead(void);
+
+ typedef index::DocIdAndFeatures DocIdAndFeatures;
+ typedef index::PostingListCounts PostingListCounts;
+ typedef index::PostingListParams PostingListParams;
+
+ /**
+ * Read document id and features for common word.
+ */
+ virtual void
+ readCommonWordDocIdAndFeatures(DocIdAndFeatures &features);
+
+ /**
+ * Read document id and features.
+ */
+ virtual void
+ readDocIdAndFeatures(DocIdAndFeatures &features);
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt. Implies
+ * flush from memory to disk, and possibly also sync to permanent
+ * storage media.
+ */
+ virtual void
+ checkPointWrite(vespalib::nbostream &out);
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ */
+ virtual void
+ checkPointRead(vespalib::nbostream &in);
+
+ /**
+ * Read counts for a word.
+ */
+ virtual void
+ readCounts(const PostingListCounts &counts); // Fill in for next word
+
+ /**
+ * Open posting list file for sequential read.
+ */
+ virtual bool
+ open(const vespalib::string &name, const TuneFileSeqRead &tuneFileRead);
+
+ /**
+ * Close posting list file.
+ */
+ virtual bool
+ close(void);
+
+ /*
+ * Get current parameters.
+ */
+ virtual void
+ getParams(PostingListParams &params);
+
+ /*
+ * Get current feature parameters.
+ */
+ virtual void
+ getFeatureParams(PostingListParams &params);
+
+ void
+ readWordStartWithSkip(void);
+
+ void
+ readWordStart(void);
+
+ void
+ readHeader(void);
+
+ static const vespalib::string &
+ getIdentifier(void);
+
+ // Methods used when generating posting list for common word pairs.
+
+ /*
+ * Get current posting offset, measured in bits. First posting list
+ * starts at 0, i.e. file header is not accounted for here.
+ *
+ * @return current posting offset, measured in bits.
+ */
+ virtual uint64_t
+ getCurrentPostingOffset(void) const;
+
+ /**
+ * Set current posting offset, measured in bits. First posting
+ * list starts at 0, i.e. file header is not accounted for here.
+ *
+ * @param Offset start of posting lists for word pair.
+ * @param endOffset end of posting lists for word pair.
+ * @param readAheadOffset end of posting list for either this or a
+ * later word pair, depending on disk seek cost.
+ */
+ virtual void
+ setPostingOffset(uint64_t offset,
+ uint64_t endOffset,
+ uint64_t readAheadOffset);
+};
+
+
+class Zc4PostingSeqWrite : public index::PostingListFileSeqWrite
+{
+ Zc4PostingSeqWrite(const Zc4PostingSeqWrite &);
+
+ Zc4PostingSeqWrite &
+ operator=(const Zc4PostingSeqWrite &);
+
+protected:
+ typedef bitcompression::FeatureEncodeContextBE EncodeContext;
+
+ EncodeContext _encodeContext;
+ search::ComprFileWriteContext _writeContext;
+ FastOS_File _file;
+ uint32_t _minChunkDocs; // # of documents needed for chunking
+ uint32_t _minSkipDocs; // # of documents needed for skipping
+ uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit)
+ // Unpacked document ids for word and feature sizes
+ typedef std::pair<uint32_t, uint32_t> DocIdAndFeatureSize;
+ std::vector<DocIdAndFeatureSize> _docIds;
+
+ // Buffer up features in memory
+ EncodeContext *_encodeFeatures;
+ uint64_t _featureOffset; // Bit offset of next feature
+ search::ComprFileWriteContext _featureWriteContext;
+ uint64_t _writePos; // Bit position for start of current word
+ bool _dynamicK; // Caclulate EG compression parameters ?
+ ZcBuf _zcDocIds; // Document id deltas
+ ZcBuf _l1Skip; // L1 skip info
+ ZcBuf _l2Skip; // L2 skip info
+ ZcBuf _l3Skip; // L3 skip info
+ ZcBuf _l4Skip; // L4 skip info
+
+ uint64_t _numWords; // Number of words in file
+ uint64_t _fileBitSize;
+ index::PostingListCountFileSeqWrite *const _countFile;
+public:
+ Zc4PostingSeqWrite(index::PostingListCountFileSeqWrite *countFile);
+
+ virtual
+ ~Zc4PostingSeqWrite(void);
+
+ typedef index::DocIdAndFeatures DocIdAndFeatures;
+ typedef index::PostingListCounts PostingListCounts;
+ typedef index::PostingListParams PostingListParams;
+
+ /**
+ * Write document id and features.
+ */
+ virtual void
+ writeDocIdAndFeatures(const DocIdAndFeatures &features);
+
+ /**
+ * Flush word (during write) after it is complete to buffers, i.e.
+ * prepare for next word, but not for application crash.
+ */
+ virtual void
+ flushWord(void);
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt. Implies
+ * flush from memory to disk, and possibly also sync to permanent
+ * storage media.
+ */
+ virtual void
+ checkPointWrite(vespalib::nbostream &out);
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ */
+ virtual void
+ checkPointRead(vespalib::nbostream &in);
+
+ /**
+ * Open posting list file for sequential write.
+ */
+ virtual bool
+ open(const vespalib::string &name,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const search::common::FileHeaderContext &fileHeaderContext);
+
+ /**
+ * Close posting list file.
+ */
+ virtual bool
+ close(void);
+
+ /*
+ * Set parameters.
+ */
+ virtual void
+ setParams(const PostingListParams &params);
+
+ /*
+ * Get current parameters.
+ */
+ virtual void
+ getParams(PostingListParams &params);
+
+ /*
+ * Set feature parameters.
+ */
+ virtual void
+ setFeatureParams(const PostingListParams &params);
+
+ /*
+ * Get current feature parameters.
+ */
+ virtual void
+ getFeatureParams(PostingListParams &params);
+
+ /**
+ * Flush chunk to file.
+ */
+ void
+ flushChunk(void);
+
+ /**
+ *
+ */
+ void
+ calcSkipInfo(void);
+
+ /**
+ * Flush word with skip info to disk
+ */
+ void
+ flushWordWithSkip(bool hasMore);
+
+
+ /**
+ * Flush word without skip info to disk.
+ */
+ virtual void
+ flushWordNoSkip(void);
+
+ /**
+ * Prepare for next word or next chunk.
+ */
+ void
+ resetWord(void);
+
+ /**
+ * Make header using feature encode write context.
+ */
+ void
+ makeHeader(const search::common::FileHeaderContext &fileHeaderContext);
+
+ void
+ updateHeader(void);
+
+ /**
+ * Read header, using temporary feature decode context.
+ */
+ uint32_t
+ readHeader(const vespalib::string &name);
+};
+
+
+class ZcPostingSeqRead : public Zc4PostingSeqRead
+{
+public:
+ ZcPostingSeqRead(index::PostingListCountFileSeqRead *countFile);
+
+ virtual void
+ readDocIdAndFeatures(DocIdAndFeatures &features);
+
+ static const vespalib::string &
+ getIdentifier(void);
+};
+
+class ZcPostingSeqWrite : public Zc4PostingSeqWrite
+{
+public:
+ ZcPostingSeqWrite(index::PostingListCountFileSeqWrite *countFile);
+
+ virtual void
+ flushWordNoSkip(void);
+
+};
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp
new file mode 100644
index 00000000000..884bdfa6415
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp
@@ -0,0 +1,700 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".diskindex.zcpostingiterators");
+#include "zcpostingiterators.h"
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+
+namespace search
+{
+
+namespace diskindex
+{
+
+using search::fef::TermFieldMatchDataArray;
+using search::bitcompression::FeatureDecodeContext;
+using search::bitcompression::FeatureEncodeContext;
+using queryeval::RankedSearchIteratorBase;
+
+#define DEBUG_ZCPOSTING_PRINTF 0
+#define DEBUG_ZCPOSTING_ASSERT 0
+
+ZcIteratorBase::ZcIteratorBase(const TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit) :
+ RankedSearchIteratorBase(matchData),
+ _docIdLimit(docIdLimit),
+ _start(start)
+{ }
+
+void
+ZcIteratorBase::initRange(uint32_t beginid, uint32_t endid)
+{
+ uint32_t prev = getDocId();
+ RankedSearchIteratorBase::initRange(beginid, endid);
+ if ((beginid <= prev) || (prev == 0)) {
+ rewind(_start);
+ readWordStart(getDocIdLimit());
+ }
+ seek(beginid);
+}
+
+
+template <bool bigEndian>
+Zc4RareWordPostingIterator<bigEndian>::
+Zc4RareWordPostingIterator(const TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit)
+ : ZcIteratorBase(matchData, start, docIdLimit),
+ _decodeContext(NULL),
+ _residue(0),
+ _prevDocId(0),
+ _numDocs(0)
+{ }
+
+
+template <bool bigEndian>
+void
+Zc4RareWordPostingIterator<bigEndian>::doSeek(uint32_t docId)
+{
+ typedef FeatureEncodeContext<bigEndian> EC;
+ uint32_t length;
+ uint64_t val64;
+
+ uint32_t oDocId = getDocId();
+
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, _decodeContext->_);
+ if (getUnpacked()) {
+ clearUnpacked();
+ if (__builtin_expect(--_residue == 0, false))
+ goto atbreak;
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DELTA_DOCID, EC);
+ oDocId += 1 + static_cast<uint32_t>(val64);
+#if DEBUG_ZCPOSTING_PRINTF
+ printf("Decode docId=%d\n",
+ oDocId);
+#endif
+ }
+ while (__builtin_expect(oDocId < docId, true)) {
+ UC64_DECODECONTEXT_STORE(o, _decodeContext->_);
+ _decodeContext->skipFeatures(1);
+ UC64_DECODECONTEXT_LOAD(o, _decodeContext->_);
+ if (__builtin_expect(--_residue == 0, false))
+ goto atbreak;
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DELTA_DOCID, EC);
+ oDocId += 1 + static_cast<uint32_t>(val64);
+#if DEBUG_ZCPOSTING_PRINTF
+ printf("Decode docId=%d\n",
+ oDocId);
+#endif
+ }
+ UC64_DECODECONTEXT_STORE(o, _decodeContext->_);
+ setDocId(oDocId);
+ return;
+ atbreak:
+ setAtEnd(); // Mark end of data
+ return;
+}
+
+
+template <bool bigEndian>
+void
+Zc4RareWordPostingIterator<bigEndian>::doUnpack(uint32_t docId)
+{
+ if (!_matchData.valid() || getUnpacked())
+ return;
+ assert(docId == getDocId());
+ _decodeContext->unpackFeatures(_matchData, docId);
+ setUnpacked();
+}
+
+template <bool bigEndian>
+void Zc4RareWordPostingIterator<bigEndian>::rewind(Position start)
+{
+ _decodeContext->setPosition(start);
+}
+
+template <bool bigEndian>
+void
+Zc4RareWordPostingIterator<bigEndian>::readWordStart(uint32_t docIdLimit)
+{
+ (void) docIdLimit;
+ typedef FeatureEncodeContext<bigEndian> EC;
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, _decodeContext->_);
+ uint32_t length;
+ uint64_t val64;
+
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC);
+
+ _numDocs = static_cast<uint32_t>(val64) + 1;
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DELTA_DOCID, EC);
+ uint32_t docId = static_cast<uint32_t>(val64) + 1;
+ UC64_DECODECONTEXT_STORE(o, _decodeContext->_);
+
+ setDocId(docId);
+ _residue = _numDocs;
+ clearUnpacked();
+}
+
+
+template <bool bigEndian>
+ZcRareWordPostingIterator<bigEndian>::
+ZcRareWordPostingIterator(const TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit)
+ : Zc4RareWordPostingIterator<bigEndian>(matchData, start, docIdLimit),
+ _docIdK(0)
+{
+}
+
+
+template <bool bigEndian>
+void
+ZcRareWordPostingIterator<bigEndian>::doSeek(uint32_t docId)
+{
+ typedef FeatureEncodeContext<bigEndian> EC;
+ uint32_t length;
+ uint64_t val64;
+
+ uint32_t oDocId = getDocId();
+
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, _decodeContext->_);
+ if (getUnpacked()) {
+ clearUnpacked();
+ if (__builtin_expect(--_residue == 0, false))
+ goto atbreak;
+ UC64_DECODEEXPGOLOMB_NS(o, _docIdK, EC);
+ oDocId += 1 + static_cast<uint32_t>(val64);
+#if DEBUG_ZCPOSTING_PRINTF
+ printf("Decode docId=%d\n",
+ oDocId);
+#endif
+ }
+ while (__builtin_expect(oDocId < docId, true)) {
+ UC64_DECODECONTEXT_STORE(o, _decodeContext->_);
+ _decodeContext->skipFeatures(1);
+ UC64_DECODECONTEXT_LOAD(o, _decodeContext->_);
+ if (__builtin_expect(--_residue == 0, false))
+ goto atbreak;
+ UC64_DECODEEXPGOLOMB_NS(o, _docIdK, EC);
+ oDocId += 1 + static_cast<uint32_t>(val64);
+#if DEBUG_ZCPOSTING_PRINTF
+ printf("Decode docId=%d\n",
+ oDocId);
+#endif
+ }
+ UC64_DECODECONTEXT_STORE(o, _decodeContext->_);
+ setDocId(oDocId);
+ return;
+ atbreak:
+ setAtEnd(); // Mark end of data
+ return;
+}
+
+
+template <bool bigEndian>
+void
+ZcRareWordPostingIterator<bigEndian>::readWordStart(uint32_t docIdLimit)
+{
+ typedef FeatureEncodeContext<bigEndian> EC;
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, _decodeContext->_);
+ uint32_t length;
+ uint64_t val64;
+
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC);
+ _numDocs = static_cast<uint32_t>(val64) + 1;
+ _docIdK = EC::calcDocIdK(_numDocs, docIdLimit);
+ UC64_DECODEEXPGOLOMB_NS(o, _docIdK, EC);
+ uint32_t docId = static_cast<uint32_t>(val64) + 1;
+ UC64_DECODECONTEXT_STORE(o, _decodeContext->_);
+
+ setDocId(docId);
+ _residue = _numDocs;
+ clearUnpacked();
+}
+
+
+template <bool bigEndian>
+ZcPostingIterator<bigEndian>::
+ZcPostingIterator(uint32_t minChunkDocs,
+ bool dynamicK,
+ const PostingListCounts &counts,
+ const search::fef::TermFieldMatchDataArray &matchData,
+ Position start, uint32_t docIdLimit)
+ : ZcIteratorBase(matchData, start, docIdLimit),
+ _valI(NULL),
+ _lastDocId(0),
+ _l1SkipDocId(0),
+ _l2SkipDocId(0),
+ _l3SkipDocId(0),
+ _l4SkipDocId(0),
+ _l1SkipDocIdPos(NULL),
+ _l1SkipValI(NULL),
+ _l1SkipFeaturePos(0),
+ _valIBase(NULL),
+ _l1SkipValIBase(NULL),
+ _l2SkipDocIdPos(NULL),
+ _l2SkipValI(NULL),
+ _l2SkipFeaturePos(0),
+ _l2SkipL1SkipPos(NULL),
+ _l2SkipValIBase(NULL),
+ _l3SkipDocIdPos(NULL),
+ _l3SkipValI(NULL),
+ _l3SkipFeaturePos(0),
+ _l3SkipL1SkipPos(NULL),
+ _l3SkipL2SkipPos(NULL),
+ _l3SkipValIBase(NULL),
+ _l4SkipDocIdPos(NULL),
+ _l4SkipValI(NULL),
+ _l4SkipFeaturePos(0),
+ _l4SkipL1SkipPos(NULL),
+ _l4SkipL2SkipPos(NULL),
+ _l4SkipL3SkipPos(NULL),
+ _decodeContext(NULL),
+ _minChunkDocs(minChunkDocs),
+ _docIdK(0),
+ _hasMore(false),
+ _dynamicK(dynamicK),
+ _chunkNo(0),
+ _numDocs(0),
+ _featuresSize(0),
+ _featureSeekPos(0),
+ _featuresValI(NULL),
+ _featuresBitOffset(0),
+ _counts(counts)
+{ }
+
+
+template <bool bigEndian>
+void
+ZcPostingIterator<bigEndian>::readWordStart(uint32_t docIdLimit)
+{
+ typedef FeatureEncodeContext<bigEndian> EC;
+ DecodeContextBase &d = *_decodeContext;
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
+ uint32_t length;
+ uint64_t val64;
+
+ uint32_t prevDocId = _hasMore ? _lastDocId : 0u;
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC);
+
+ _numDocs = static_cast<uint32_t>(val64) + 1;
+ bool hasMore = false;
+ if (__builtin_expect(_numDocs >= _minChunkDocs, false)) {
+ if (bigEndian) {
+ hasMore = static_cast<int64_t>(oVal) < 0;
+ oVal <<= 1;
+ length = 1;
+ } else {
+ hasMore = (oVal & 1) != 0;
+ oVal >>= 1;
+ length = 1;
+ }
+ UC64_READBITS_NS(o, EC);
+ }
+ if (_dynamicK)
+ _docIdK = EC::calcDocIdK((_hasMore || hasMore) ? 1 : _numDocs, docIdLimit);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DOCIDSSIZE, EC);
+ uint32_t docIdsSize = val64 + 1;
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L1SKIPSIZE, EC);
+ uint32_t l1SkipSize = val64;
+ uint32_t l2SkipSize = 0;
+ if (l1SkipSize != 0) {
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L2SKIPSIZE, EC);
+ l2SkipSize = val64;
+ }
+ uint32_t l3SkipSize = 0;
+ if (l2SkipSize != 0) {
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L3SKIPSIZE, EC);
+ l3SkipSize = val64;
+ }
+ uint32_t l4SkipSize = 0;
+ if (l3SkipSize != 0) {
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC);
+ l4SkipSize = val64;
+ }
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_FEATURESSIZE, EC);
+ _featuresSize = val64;
+ if (_dynamicK) {
+ UC64_DECODEEXPGOLOMB_NS(o, _docIdK, EC);
+ } else {
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_LASTDOCID, EC);
+ }
+ _lastDocId = docIdLimit - 1 - val64;
+ if (_hasMore || hasMore) {
+ if (!_counts._segments.empty()) {
+ assert(_lastDocId == _counts._segments[_chunkNo]._lastDoc);
+ }
+ }
+
+ uint64_t bytePad = oPreRead & 7;
+ if (bytePad > 0) {
+ length = bytePad;
+ UC64_READBITS_NS(o, EC);
+ }
+
+ UC64_DECODECONTEXT_STORE(o, d._);
+ assert((d.getBitOffset() & 7) == 0);
+ const uint8_t *bcompr = d.getByteCompr();
+ _valIBase = _valI = bcompr;
+ _l1SkipDocIdPos = _l2SkipDocIdPos = bcompr;
+ _l3SkipDocIdPos = _l4SkipDocIdPos = bcompr;
+ bcompr += docIdsSize;
+ if (l1SkipSize != 0) {
+ _l1SkipValIBase = _l1SkipValI = bcompr;
+ _l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos = bcompr;
+ bcompr += l1SkipSize;
+ } else {
+ _l1SkipValIBase = _l1SkipValI = NULL;
+ _l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos = NULL;
+ }
+ if (l2SkipSize != 0) {
+ _l2SkipValIBase = _l2SkipValI = bcompr;
+ _l3SkipL2SkipPos = _l4SkipL2SkipPos = bcompr;
+ bcompr += l2SkipSize;
+ } else {
+ _l2SkipValIBase = _l2SkipValI = NULL;
+ _l3SkipL2SkipPos = _l4SkipL2SkipPos = NULL;
+ }
+ if (l3SkipSize != 0) {
+ _l3SkipValIBase = _l3SkipValI = bcompr;
+ _l4SkipL3SkipPos = bcompr;
+ bcompr += l3SkipSize;
+ } else {
+ _l3SkipValIBase = _l3SkipValI = NULL;
+ _l4SkipL3SkipPos = NULL;
+ }
+ if (l4SkipSize != 0) {
+ _l4SkipValI = bcompr;
+ bcompr += l4SkipSize;
+ } else {
+ _l4SkipValI = NULL;
+ }
+ d.setByteCompr(bcompr);
+ _hasMore = hasMore;
+ // Save information about start of next chunk
+ _featuresValI = d.getCompr();
+ _featuresBitOffset = d.getBitOffset();
+ _l1SkipFeaturePos = _l2SkipFeaturePos = 0;
+ _l3SkipFeaturePos = _l4SkipFeaturePos = 0;
+ _featureSeekPos = 0;
+ clearUnpacked();
+ // Unpack first docid delta in chunk
+ uint32_t oDocId = prevDocId;
+ ZCDECODE(_valI, oDocId += 1 +);
+#if DEBUG_ZCPOSTING_PRINTF
+ printf("Decode docId=%d\n",
+ oDocId);
+#endif
+ setDocId(oDocId);
+ // Unpack first L1 Skip info docid delta
+ if (_l1SkipValI != NULL) {
+ _l1SkipDocId = prevDocId;
+ ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +);
+ } else
+ _l1SkipDocId = _lastDocId;
+ // Unpack first L2 skip info docid delta
+ if (_l2SkipValI != NULL) {
+ _l2SkipDocId = prevDocId;
+ ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 +);
+ } else
+ _l2SkipDocId = _lastDocId;
+ // Unpack first L3 skip info docid delta
+ if (_l3SkipValI != NULL) {
+ _l3SkipDocId = prevDocId;
+ ZCDECODE(_l3SkipValI, _l3SkipDocId += 1 +);
+ } else
+ _l3SkipDocId = _lastDocId;
+ // Unpack first L4 skip info docid delta
+ if (_l4SkipValI != NULL) {
+ _l4SkipDocId = prevDocId;
+ ZCDECODE(_l4SkipValI, _l4SkipDocId += 1 +);
+ } else
+ _l4SkipDocId = _lastDocId;
+}
+
+
+template <bool bigEndian>
+void
+ZcPostingIterator<bigEndian>::doChunkSkipSeek(uint32_t docId)
+{
+ while (docId > _lastDocId && _hasMore) {
+ // Skip to start of next chunk
+ _featureSeekPos = 0;
+ featureSeek(_featuresSize);
+ _chunkNo++;
+ readWordStart(getDocIdLimit()); // Read word start for next chunk
+ }
+ if (docId > _lastDocId) {
+ _l4SkipDocId = _l3SkipDocId = _l2SkipDocId = _l1SkipDocId = search::endDocId;
+ setAtEnd();
+ }
+}
+
+
+template <bool bigEndian>
+void
+ZcPostingIterator<bigEndian>::doL4SkipSeek(uint32_t docId)
+{
+ uint32_t lastL4SkipDocId;
+
+ if (__builtin_expect(docId > _lastDocId, false)) {
+ doChunkSkipSeek(docId);
+ if (docId <= _l4SkipDocId)
+ return;
+ }
+ do {
+ lastL4SkipDocId = _l4SkipDocId;
+ ZCDECODE(_l4SkipValI, _l4SkipDocIdPos += 1 +);
+ ZCDECODE(_l4SkipValI, _l4SkipFeaturePos += 1 +);
+ ZCDECODE(_l4SkipValI, _l4SkipL1SkipPos += 1 + );
+ ZCDECODE(_l4SkipValI, _l4SkipL2SkipPos += 1 + );
+ ZCDECODE(_l4SkipValI, _l4SkipL3SkipPos += 1 + );
+ ZCDECODE(_l4SkipValI, _l4SkipDocId += 1 + );
+#if DEBUG_ZCPOSTING_PRINTF
+ printf("L4Decode docId %d, docIdPos %d,"
+ "l1SkipPos %d, l2SkipPos %d, l3SkipPos %d, nextDocId %d\n",
+ lastL4SkipDocId,
+ (int) (_l4SkipDocIdPos - _valIBase),
+ (int) (_l4SkipL1SkipPos - _l1SkipValIBase),
+ (int) (_l4SkipL2SkipPos - _l2SkipValIBase),
+ (int) (_l4SkipL3SkipPos - _l3SkipValIBase),
+ _l4SkipDocId);
+#endif
+ } while (docId > _l4SkipDocId);
+ _valI = _l1SkipDocIdPos = _l2SkipDocIdPos = _l3SkipDocIdPos =
+ _l4SkipDocIdPos;
+ _l1SkipFeaturePos = _l2SkipFeaturePos = _l3SkipFeaturePos =
+ _l4SkipFeaturePos;
+ _l1SkipDocId = _l2SkipDocId = _l3SkipDocId = lastL4SkipDocId;
+ _l1SkipValI = _l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos;
+ _l2SkipValI = _l3SkipL2SkipPos = _l4SkipL2SkipPos;
+ _l3SkipValI = _l4SkipL3SkipPos;
+ ZCDECODE(_valI, lastL4SkipDocId += 1 +);
+ ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +);
+ ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 +);
+ ZCDECODE(_l3SkipValI, _l3SkipDocId += 1 +);
+#if DEBUG_ZCPOSTING_PRINTF
+ printf("L4Seek, docId %d docIdPos %d"
+ " L1SkipPos %d L2SkipPos %d L3SkipPos %d, nextDocId %d\n",
+ lastL4SkipDocId,
+ (int) (_l4SkipDocIdPos - _valIBase),
+ (int) (_l4SkipL1SkipPos - _l1SkipValIBase),
+ (int) (_l4SkipL2SkipPos - _l2SkipValIBase),
+ (int) (_l4SkipL3SkipPos - _l3SkipValIBase),
+ _l4SkipDocId);
+#endif
+ setDocId(lastL4SkipDocId);
+ _featureSeekPos = _l4SkipFeaturePos;
+ clearUnpacked();
+}
+
+
+template <bool bigEndian>
+void
+ZcPostingIterator<bigEndian>::doL3SkipSeek(uint32_t docId)
+{
+ uint32_t lastL3SkipDocId;
+
+ if (__builtin_expect(docId > _l4SkipDocId, false)) {
+ doL4SkipSeek(docId);
+ if (docId <= _l3SkipDocId)
+ return;
+ }
+ do {
+ lastL3SkipDocId = _l3SkipDocId;
+ ZCDECODE(_l3SkipValI, _l3SkipDocIdPos += 1 +);
+ ZCDECODE(_l3SkipValI, _l3SkipFeaturePos += 1 +);
+ ZCDECODE(_l3SkipValI, _l3SkipL1SkipPos += 1 + );
+ ZCDECODE(_l3SkipValI, _l3SkipL2SkipPos += 1 + );
+ ZCDECODE(_l3SkipValI, _l3SkipDocId += 1 + );
+#if DEBUG_ZCPOSTING_PRINTF
+ printf("L3Decode docId %d, docIdPos %d,"
+ "l1SkipPos %d, l2SkipPos %d, nextDocId %d\n",
+ lastL3SkipDocId,
+ (int) (_l3SkipDocIdPos - _valIBase),
+ (int) (_l3SkipL1SkipPos - _l1SkipValIBase),
+ (int) (_l3SkipL2SkipPos - _l2SkipValIBase),
+ _l3SkipDocId);
+#endif
+ } while (docId > _l3SkipDocId);
+ _valI = _l1SkipDocIdPos = _l2SkipDocIdPos = _l3SkipDocIdPos;
+ _l1SkipFeaturePos = _l2SkipFeaturePos = _l3SkipFeaturePos;
+ _l1SkipDocId = _l2SkipDocId = lastL3SkipDocId;
+ _l1SkipValI = _l2SkipL1SkipPos = _l3SkipL1SkipPos;
+ _l2SkipValI = _l3SkipL2SkipPos;
+ ZCDECODE(_valI, lastL3SkipDocId += 1 +);
+ ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +);
+ ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 +);
+#if DEBUG_ZCPOSTING_PRINTF
+ printf("L3Seek, docId %d docIdPos %d"
+ " L1SkipPos %d L2SkipPos %d, nextDocId %d\n",
+ lastL3SkipDocId,
+ (int) (_l3SkipDocIdPos - _valIBase),
+ (int) (_l3SkipL1SkipPos - _l1SkipValIBase),
+ (int) (_l3SkipL2SkipPos - _l2SkipValIBase),
+ _l3SkipDocId);
+#endif
+ setDocId(lastL3SkipDocId);
+ _featureSeekPos = _l3SkipFeaturePos;
+ clearUnpacked();
+}
+
+
+template <bool bigEndian>
+void
+ZcPostingIterator<bigEndian>::doL2SkipSeek(uint32_t docId)
+{
+ uint32_t lastL2SkipDocId;
+
+ if (__builtin_expect(docId > _l3SkipDocId, false)) {
+ doL3SkipSeek(docId);
+ if (docId <= _l2SkipDocId)
+ return;
+ }
+ do {
+ lastL2SkipDocId = _l2SkipDocId;
+ ZCDECODE(_l2SkipValI, _l2SkipDocIdPos += 1 +);
+ ZCDECODE(_l2SkipValI, _l2SkipFeaturePos += 1 +);
+ ZCDECODE(_l2SkipValI, _l2SkipL1SkipPos += 1 + );
+ ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 + );
+#if DEBUG_ZCPOSTING_PRINTF
+ printf("L2Decode docId %d, docIdPos %d, l1SkipPos %d, nextDocId %d\n",
+ lastL2SkipDocId,
+ (int) (_l2SkipDocIdPos - _valIBase),
+ (int) (_l2SkipL1SkipPos - _l1SkipValIBase),
+ _l2SkipDocId);
+#endif
+ } while (docId > _l2SkipDocId);
+ _valI = _l1SkipDocIdPos = _l2SkipDocIdPos;
+ _l1SkipFeaturePos = _l2SkipFeaturePos;
+ _l1SkipDocId = lastL2SkipDocId;
+ _l1SkipValI = _l2SkipL1SkipPos;
+ ZCDECODE(_valI, lastL2SkipDocId += 1 +);
+ ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +);
+#if DEBUG_ZCPOSTING_PRINTF
+ printf("L2Seek, docId %d docIdPos %d L1SkipPos %d, nextDocId %d\n",
+ lastL2SkipDocId,
+ (int) (_l2SkipDocIdPos - _valIBase),
+ (int) (_l2SkipL1SkipPos - _l1SkipValIBase),
+ _l2SkipDocId);
+#endif
+ setDocId(lastL2SkipDocId);
+ _featureSeekPos = _l2SkipFeaturePos;
+ clearUnpacked();
+}
+
+
+template <bool bigEndian>
+void
+ZcPostingIterator<bigEndian>::doL1SkipSeek(uint32_t docId)
+{
+ uint32_t lastL1SkipDocId;
+ if (__builtin_expect(docId > _l2SkipDocId, false)) {
+ doL2SkipSeek(docId);
+ if (docId <= _l1SkipDocId)
+ return;
+ }
+ do {
+ lastL1SkipDocId = _l1SkipDocId;
+ ZCDECODE(_l1SkipValI, _l1SkipDocIdPos += 1 +);
+ ZCDECODE(_l1SkipValI, _l1SkipFeaturePos += 1 +);
+ ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +);
+#if DEBUG_ZCPOSTING_PRINTF
+ printf("L1Decode docId %d, docIdPos %d, L1SkipPos %d, nextDocId %d\n",
+ lastL1SkipDocId,
+ (int) (_l1SkipDocIdPos - _valIBase),
+ (int) (_l1SkipValI - _l1SkipValIBase),
+ _l1SkipDocId);
+#endif
+ } while (docId > _l1SkipDocId);
+ _valI = _l1SkipDocIdPos;
+ ZCDECODE(_valI, lastL1SkipDocId += 1 +);
+ setDocId(lastL1SkipDocId);
+#if DEBUG_ZCPOSTING_PRINTF
+ printf("L1SkipSeek, docId %d docIdPos %d, nextDocId %d\n",
+ lastL1SkipDocId,
+ (int) (_l1SkipDocIdPos - _valIBase),
+ _l1SkipDocId);
+#endif
+ _featureSeekPos = _l1SkipFeaturePos;
+ clearUnpacked();
+}
+
+
+template <bool bigEndian>
+void
+ZcPostingIterator<bigEndian>::doSeek(uint32_t docId)
+{
+ if (docId > _l1SkipDocId) {
+ doL1SkipSeek(docId);
+ }
+ uint32_t oDocId = getDocId();
+#if DEBUG_ZCPOSTING_ASSERT
+ assert(oDocId <= _l1SkipDocId);
+ assert(docId <= _l1SkipDocId);
+ assert(oDocId <= _l2SkipDocId);
+ assert(docId <= _l2SkipDocId);
+ assert(oDocId <= _l3SkipDocId);
+ assert(docId <= _l3SkipDocId);
+ assert(oDocId <= _l4SkipDocId);
+ assert(docId <= _l4SkipDocId);
+#endif
+ const uint8_t *oCompr = _valI;
+ while (__builtin_expect(oDocId < docId, true)) {
+#if DEBUG_ZCPOSTING_ASSERT
+ assert(oDocId <= _l1SkipDocId);
+ assert(oDocId <= _l2SkipDocId);
+ assert(oDocId <= _l3SkipDocId);
+ assert(oDocId <= _l4SkipDocId);
+#endif
+ ZCDECODE(oCompr, oDocId += 1 +);
+#if DEBUG_ZCPOSTING_PRINTF
+ printf("Decode docId=%d\n",
+ oDocId);
+#endif
+ incNeedUnpack();
+ }
+ _valI = oCompr;
+ setDocId(oDocId);
+ return;
+}
+
+
+template <bool bigEndian>
+void
+ZcPostingIterator<bigEndian>::doUnpack(uint32_t docId)
+{
+ if (!_matchData.valid() || getUnpacked())
+ return;
+ if (_featureSeekPos != 0) {
+ // Handle deferred feature position seek now.
+ featureSeek(_featureSeekPos);
+ _featureSeekPos = 0;
+ }
+ assert(docId == getDocId());
+ uint32_t needUnpack = getNeedUnpack();
+ if (needUnpack > 1)
+ _decodeContext->skipFeatures(needUnpack - 1);
+ _decodeContext->unpackFeatures(_matchData, docId);
+ setUnpacked();
+}
+
+template <bool bigEndian>
+void ZcPostingIterator<bigEndian>::rewind(Position start)
+{
+ _decodeContext->setPosition(start);
+ _hasMore = false;
+ _lastDocId = 0;
+ _chunkNo = 0;
+}
+
+
+template class Zc4RareWordPostingIterator<true>;
+template class Zc4RareWordPostingIterator<false>;
+
+template class ZcPostingIterator<true>;
+template class ZcPostingIterator<false>;
+
+template class ZcRareWordPostingIterator<true>;
+template class ZcRareWordPostingIterator<false>;
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h
new file mode 100644
index 00000000000..f0bf5b99a30
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h
@@ -0,0 +1,200 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/index/postinglistfile.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/queryeval/iterators.h>
+
+namespace search
+{
+
+namespace diskindex
+{
+
+using bitcompression::Position;
+
+#define ZCDECODE(valI, resop) \
+do { \
+ if (__builtin_expect(valI[0] < (1 << 7), true)) { \
+ resop valI[0]; \
+ valI += 1; \
+ } else if (__builtin_expect(valI[1] < (1 << 7), true)) { \
+ resop (valI[0] & ((1 << 7) - 1)) + \
+ (valI[1] << 7); \
+ valI += 2; \
+ } else if (__builtin_expect(valI[2] < (1 << 7), true)) { \
+ resop (valI[0] & ((1 << 7) - 1)) + \
+ ((valI[1] & ((1 << 7) - 1)) << 7) + \
+ (valI[2] << 14); \
+ valI += 3; \
+ } else if (__builtin_expect(valI[3] < (1 << 7), true)) { \
+ resop (valI[0] & ((1 << 7) - 1)) + \
+ ((valI[1] & ((1 << 7) - 1)) << 7) + \
+ ((valI[2] & ((1 << 7) - 1)) << 14) + \
+ (valI[3] << 21); \
+ valI += 4; \
+ } else { \
+ resop (valI[0] & ((1 << 7) - 1)) + \
+ ((valI[1] & ((1 << 7) - 1)) << 7) + \
+ ((valI[2] & ((1 << 7) - 1)) << 14) + \
+ ((valI[3] & ((1 << 7) - 1)) << 21) + \
+ (valI[4] << 28); \
+ valI += 5; \
+ } \
+} while (0)
+
+class ZcIteratorBase : public queryeval::RankedSearchIteratorBase
+{
+protected:
+ ZcIteratorBase(const fef::TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit);
+ virtual void readWordStart(uint32_t docIdLimit) = 0;
+ virtual void rewind(Position start) = 0;
+ void initRange(uint32_t beginid, uint32_t endid) override;
+ uint32_t getDocIdLimit() const { return _docIdLimit; }
+ Trinary is_strict() const override { return Trinary::True; }
+private:
+ uint32_t _docIdLimit;
+ Position _start;
+};
+
+template <bool bigEndian>
+class Zc4RareWordPostingIterator : public ZcIteratorBase
+{
+private:
+ typedef ZcIteratorBase ParentClass;
+
+public:
+ typedef bitcompression::FeatureDecodeContext<bigEndian> DecodeContextBase;
+ typedef index::DocIdAndFeatures DocIdAndFeatures;
+ DecodeContextBase *_decodeContext;
+ unsigned int _residue;
+ uint32_t _prevDocId; // Previous document id
+ uint32_t _numDocs; // Documents in chunk or word
+
+ Zc4RareWordPostingIterator(const fef::TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit);
+
+ void doUnpack(uint32_t docId) override;
+ void doSeek(uint32_t docId) override;
+ void readWordStart(uint32_t docIdLimit) override;
+ void rewind(Position start) override;
+};
+
+template <bool bigEndian>
+class ZcRareWordPostingIterator : public Zc4RareWordPostingIterator<bigEndian>
+{
+private:
+ typedef Zc4RareWordPostingIterator<bigEndian> ParentClass;
+ using ParentClass::getDocId;
+ using ParentClass::getUnpacked;
+ using ParentClass::clearUnpacked;
+ using ParentClass::_residue;
+ using ParentClass::setDocId;
+ using ParentClass::setAtEnd;
+ using ParentClass::_numDocs;
+
+ uint32_t _docIdK;
+
+public:
+ using ParentClass::_decodeContext;
+ ZcRareWordPostingIterator(const search::fef::TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit);
+
+ void doSeek(uint32_t docId) override;
+ void readWordStart(uint32_t docIdLimit) override;
+};
+
+
+template <bool bigEndian>
+class ZcPostingIterator : public ZcIteratorBase
+{
+private:
+ typedef ZcIteratorBase ParentClass;
+ using ParentClass::getDocId;
+
+public:
+ // Pointer to compressed data
+ const uint8_t *_valI;
+ uint32_t _lastDocId;
+ uint32_t _l1SkipDocId;
+ uint32_t _l2SkipDocId;
+ uint32_t _l3SkipDocId;
+ uint32_t _l4SkipDocId;
+ const uint8_t *_l1SkipDocIdPos;
+ const uint8_t *_l1SkipValI;
+ uint64_t _l1SkipFeaturePos;
+ const uint8_t *_valIBase;
+ const uint8_t *_l1SkipValIBase;
+ const uint8_t *_l2SkipDocIdPos;
+ const uint8_t *_l2SkipValI;
+ uint64_t _l2SkipFeaturePos;
+ const uint8_t *_l2SkipL1SkipPos;
+ const uint8_t *_l2SkipValIBase;
+ const uint8_t *_l3SkipDocIdPos;
+ const uint8_t *_l3SkipValI;
+ uint64_t _l3SkipFeaturePos;
+ const uint8_t *_l3SkipL1SkipPos;
+ const uint8_t *_l3SkipL2SkipPos;
+ const uint8_t *_l3SkipValIBase;
+ const uint8_t *_l4SkipDocIdPos;
+ const uint8_t *_l4SkipValI;
+ uint64_t _l4SkipFeaturePos;
+ const uint8_t *_l4SkipL1SkipPos;
+ const uint8_t *_l4SkipL2SkipPos;
+ const uint8_t *_l4SkipL3SkipPos;
+
+ typedef bitcompression::FeatureDecodeContext<bigEndian> DecodeContextBase;
+ typedef index::DocIdAndFeatures DocIdAndFeatures;
+ typedef index::PostingListCounts PostingListCounts;
+ DecodeContextBase *_decodeContext;
+ uint32_t _minChunkDocs;
+ uint32_t _docIdK;
+ bool _hasMore;
+ bool _dynamicK;
+ uint32_t _chunkNo;
+ uint32_t _numDocs;
+ uint64_t _featuresSize;
+ uint64_t _featureSeekPos;
+ // Start of current features block, needed for seeks
+ const uint64_t *_featuresValI;
+ int _featuresBitOffset;
+ // Counts used for assertions
+ const PostingListCounts &_counts;
+
+ ZcPostingIterator(uint32_t minChunkDocs,
+ bool dynamicK,
+ const PostingListCounts &counts,
+ const search::fef::TermFieldMatchDataArray &matchData,
+ Position start, uint32_t docIdLimit);
+
+
+ void doUnpack(uint32_t docId) override;
+ void doSeek(uint32_t docId) override;
+ void readWordStart(uint32_t docIdLimit) override;
+ void rewind(Position start) override;
+ VESPA_DLL_LOCAL void doChunkSkipSeek(uint32_t docId);
+ VESPA_DLL_LOCAL void doL4SkipSeek(uint32_t docId);
+ VESPA_DLL_LOCAL void doL3SkipSeek(uint32_t docId);
+ VESPA_DLL_LOCAL void doL2SkipSeek(uint32_t docId);
+ VESPA_DLL_LOCAL void doL1SkipSeek(uint32_t docId);
+
+ void featureSeek(uint64_t offset) {
+ _decodeContext->_valI = _featuresValI + (_featuresBitOffset + offset) / 64;
+ _decodeContext->setupBits((_featuresBitOffset + offset) & 63);
+ }
+};
+
+
+extern template class Zc4RareWordPostingIterator<true>;
+extern template class Zc4RareWordPostingIterator<false>;
+
+extern template class ZcPostingIterator<true>;
+extern template class ZcPostingIterator<false>;
+
+extern template class ZcRareWordPostingIterator<true>;
+extern template class ZcRareWordPostingIterator<false>;
+
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/.gitignore b/searchlib/src/vespa/searchlib/docstore/.gitignore
new file mode 100644
index 00000000000..ee8938b6bf4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/.gitignore
@@ -0,0 +1,6 @@
+*.So
+*.exe
+*.ilk
+*.pdb
+.depend*
+Makefile
diff --git a/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt b/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt
new file mode 100644
index 00000000000..347f3562794
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_docstore OBJECT
+ SOURCES
+ bytecomplens.cpp
+ chunk.cpp
+ chunkformat.cpp
+ chunkformats.cpp
+ data_store_file_chunk_id.cpp
+ documentstore.cpp
+ document_store_visitor_progress.cpp
+ filechunk.cpp
+ idatastore.cpp
+ idocumentstore.cpp
+ logdatastore.cpp
+ logdocumentstore.cpp
+ writeablefilechunk.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/docstore/OWNERS b/searchlib/src/vespa/searchlib/docstore/OWNERS
new file mode 100644
index 00000000000..3e9fc8ab356
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/OWNERS
@@ -0,0 +1,2 @@
+balder
+tegge
diff --git a/searchlib/src/vespa/searchlib/docstore/bytecomplens.cpp b/searchlib/src/vespa/searchlib/docstore/bytecomplens.cpp
new file mode 100644
index 00000000000..1a2b8f090e1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/bytecomplens.cpp
@@ -0,0 +1,260 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".search.docstore");
+#include "bytecomplens.h"
+
+namespace search {
+
+static inline uint64_t getBCN(const uint8_t *&buffer) __attribute__((__always_inline__));
+
+/**
+ * get "Byte Compressed Number" from buffer, incrementing pointer
+ **/
+static inline uint64_t getBCN(const uint8_t *&buffer)
+{
+ uint8_t b = *buffer++;
+ uint64_t len = (b & 127);
+ unsigned shiftLen = 0;
+ while (b & 128) {
+ shiftLen += 7;
+ b = *buffer++;
+ len |= ((b & 127) << shiftLen);
+ }
+ return len;
+}
+
+static size_t writeLen(vespalib::DataBuffer &buf, uint64_t len)
+{
+ size_t bytes = 0;
+ do {
+ uint8_t b = len & 127;
+ len >>= 7;
+ if (len > 0) {
+ b |= 128;
+ }
+ buf.writeInt8(b);
+ ++bytes;
+ } while (len > 0);
+ return bytes;
+}
+
+
+ByteCompressedLengths::ByteCompressedLengths()
+ : _l0space(),
+ _l1space(),
+ _l2space(),
+ _l3table(),
+ _entries(0),
+ _progress(),
+ _ptrcache(),
+ _hasInitialOffset(false)
+{
+ clear();
+}
+
+
+void
+ByteCompressedLengths::clear()
+{
+ _l0space.clear();
+ _l1space.clear();
+ _l2space.clear();
+ _l3table.clear();
+
+ _entries = 0;
+
+ _progress.lenSum1 = 0;
+ _progress.lenSum2 = 0;
+ _progress.l0oSum1 = 0;
+ _progress.l0oSum2 = 0;
+ _progress.l1oSum2 = 0;
+ _progress.last_offset = 0;
+
+ _ptrcache.l0table = NULL;
+ _ptrcache.l1table = NULL;
+ _ptrcache.l2table = NULL;
+
+ _hasInitialOffset = false;
+}
+
+
+void
+ByteCompressedLengths::swap(ByteCompressedLengths& other)
+{
+ _l0space.swap(other._l0space);
+ _l1space.swap(other._l1space);
+ _l2space.swap(other._l2space);
+ _l3table.swap(other._l3table);
+
+ std::swap(_entries, other._entries);
+ std::swap(_progress, other._progress);
+ std::swap(_ptrcache, other._ptrcache);
+ std::swap(_hasInitialOffset, other._hasInitialOffset);
+}
+
+
+// add a new offset to the compressed tables
+void
+ByteCompressedLengths::addOffset(uint64_t offset)
+{
+ assert(offset >= _progress.last_offset);
+
+ // delta from last offset:
+ uint64_t len = offset - _progress.last_offset;
+
+ // which entry is this:
+ uint64_t idx = _entries++;
+
+ if ((idx & 31) == 0) {
+ // add entry to some skip-table
+ _progress.lenSum2 += _progress.lenSum1; // accumulate to Level2
+ _progress.l0oSum2 += _progress.l0oSum1; // accumulate to Level2
+
+ uint64_t t1n = idx >> 5;
+ if ((t1n & 31) == 0) {
+ // add Level2 or Level3 table entry:
+ uint64_t t2n = t1n >> 5;
+
+ if ((t2n & 31) == 0) {
+ // add new Level3 table entry:
+ L3Entry e;
+ e.offset = _progress.last_offset;
+ e.l0toff = _l0space.getDataLen();
+ e.l1toff = _l1space.getDataLen();
+ e.l2toff = _l2space.getDataLen();
+
+ _l3table.push_back(e);
+ } else {
+ // write to Level2 table, sums since last reset:
+ writeLen(_l2space, _progress.lenSum2); // sum of Level0 lengths
+ writeLen(_l2space, _progress.l0oSum2); // sum size of Level0 entries
+ writeLen(_l2space, _progress.l1oSum2); // sum size of Level1 entries
+ }
+ // reset Level2 sums:
+ _progress.lenSum2 = 0;
+ _progress.l0oSum2 = 0;
+ _progress.l1oSum2 = 0;
+ } else {
+ // write to Level1 table, sums since last reset:
+ _progress.l1oSum2 += writeLen(_l1space, _progress.lenSum1); // sum of Level0 lengths
+ _progress.l1oSum2 += writeLen(_l1space, _progress.l0oSum1); // sum size of Level0 entries
+ }
+ // reset Level1 sums:
+ _progress.lenSum1 = 0;
+ _progress.l0oSum1 = 0;
+ }
+ // always write length (offset delta) to Level0 table:
+ _progress.l0oSum1 += writeLen(_l0space, len); // accumulate to Level1
+ _progress.lenSum1 += len; // accumulate to Level1
+ _progress.last_offset = offset;
+}
+
+
+void
+ByteCompressedLengths::addOffsetTable(uint64_t entries, uint64_t *offsets)
+{
+ // ignore NOP:
+ if (entries == 0) return;
+
+ // Do we have some offsets already?
+ if (_hasInitialOffset) {
+ // yes, add first offset normally
+ addOffset(offsets[0]);
+ } else {
+ // no, special treatment for very first offset
+ _progress.last_offset = offsets[0];
+ _hasInitialOffset = true;
+ }
+ for (uint64_t cnt = 1; cnt < entries; ++cnt) {
+ addOffset(offsets[cnt]);
+ }
+
+ // Simplify access to actual data:
+ _ptrcache.l0table = (uint8_t *)_l0space.getData();
+ _ptrcache.l1table = (uint8_t *)_l1space.getData();
+ _ptrcache.l2table = (uint8_t *)_l2space.getData();
+
+ // some statistics available when debug logging:
+ LOG(debug, "compressed %ld offsets", (_entries+1));
+ LOG(debug, "(%ld bytes)", (_entries+1)*sizeof(uint64_t));
+ LOG(debug, "to (%ld + %ld + %ld) bytes + %ld l3entries",
+ _l0space.getDataLen(),
+ _l1space.getDataLen(),
+ _l2space.getDataLen(),
+ _l3table.size());
+ LOG(debug, "(%ld bytes)",
+ (_l0space.getDataLen() + _l1space.getDataLen() + _l2space.getDataLen() +
+ _l3table.size()*sizeof(L3Entry)));
+}
+
+
+ByteCompressedLengths::~ByteCompressedLengths()
+{
+}
+
+ByteCompressedLengths::OffLen
+ByteCompressedLengths::getOffLen(uint64_t idx) const
+{
+ assert(idx < _entries);
+
+ unsigned skipL0 = idx & 31;
+ unsigned skipL1 = (idx >> 5) & 31;
+ unsigned skipL2 = (idx >> 10) & 31;
+ uint64_t skipL3 = (idx >> 15);
+
+ uint64_t offset = _l3table[skipL3].offset;
+ uint64_t l0toff = _l3table[skipL3].l0toff;
+ uint64_t l1toff = _l3table[skipL3].l1toff;
+ uint64_t l2toff = _l3table[skipL3].l2toff;
+
+ // printf("start off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff);
+
+ const uint8_t *l2pos = _ptrcache.l2table + l2toff;
+
+ while (skipL2 > 0) {
+ --skipL2;
+ offset += getBCN(l2pos);
+ l0toff += getBCN(l2pos);
+ l1toff += getBCN(l2pos);
+ }
+
+ const uint8_t *l1pos = _ptrcache.l1table + l1toff;
+
+ while (skipL1 > 0) {
+ --skipL1;
+ offset += getBCN(l1pos);
+ l0toff += getBCN(l1pos);
+
+ }
+ const uint8_t *l0pos = _ptrcache.l0table + l0toff;
+
+ while (skipL0 > 0) {
+ --skipL0;
+ offset += getBCN(l0pos);
+ }
+ // printf("end off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff);
+ OffLen retval;
+ retval.offset = offset;
+ retval.length = getBCN(l0pos);
+ return retval;
+}
+
+
+size_t
+ByteCompressedLengths::memoryUsed() const
+{
+ size_t mem = sizeof(*this);
+ mem += _l0space.getBufSize();
+ mem += _l1space.getBufSize();
+ mem += _l2space.getBufSize();
+ mem += _l3table.capacity() * sizeof(L3Entry);
+ return mem;
+}
+
+
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/bytecomplens.h b/searchlib/src/vespa/searchlib/docstore/bytecomplens.h
new file mode 100644
index 00000000000..5e4675ae297
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/bytecomplens.h
@@ -0,0 +1,110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+#include <vespa/vespalib/data/databuffer.h>
+
+namespace search {
+
+/**
+ * Class compressing a table of offsets in memory.
+ * After adding (n) offsets you can access
+ * (n-1) pairs of (length, offset).
+ * All offsets must be increasing, but they
+ * may be added in several chunks.
+ **/
+class ByteCompressedLengths
+{
+public:
+ /**
+ * Construct an empty instance
+ **/
+ ByteCompressedLengths();
+
+ /**
+ * add the given offset table.
+ * @param entries number of offsets to store.
+ * @param offsets pointer to table that contains (entries) offsets.
+ **/
+ void addOffsetTable(uint64_t entries, uint64_t *offsets);
+
+ /**
+ * free resources
+ **/
+ ~ByteCompressedLengths();
+
+ struct OffLen
+ {
+ uint64_t offset;
+ uint64_t length;
+ };
+
+ /**
+ * Fetch an offset and length from compressed data.
+ * Note restriction: idx must be < size()
+ *
+ * @param idx The index into the offset table
+ * @return offset[id] and the delta (offset[id+1] - offset[id])
+ **/
+ OffLen getOffLen(uint64_t idx) const;
+
+ /**
+ * The number of (length, offset) pairs stored
+ * Note that size() == sum(entries) - 1
+ **/
+ uint64_t size() const { return _entries; }
+
+ /**
+ * remove all data from this instance
+ **/
+ void clear();
+
+ /**
+ * swap all data with another instance
+ **/
+ void swap(ByteCompressedLengths& other);
+
+ /**
+ * Calculate memory used by this instance
+ * @return memory usage (in bytes)
+ **/
+ size_t memoryUsed() const;
+
+private:
+ struct L3Entry {
+ uint64_t offset;
+ uint64_t l0toff;
+ uint64_t l1toff;
+ uint64_t l2toff;
+ };
+ vespalib::DataBuffer _l0space;
+ vespalib::DataBuffer _l1space;
+ vespalib::DataBuffer _l2space;
+
+ std::vector<L3Entry> _l3table;
+
+ uint64_t _entries;
+
+ struct ProgressPoint {
+ uint64_t lenSum1;
+ uint64_t lenSum2;
+ uint64_t l0oSum1;
+ uint64_t l0oSum2;
+ uint64_t l1oSum2;
+ uint64_t last_offset;
+ } _progress;
+
+ struct CachedPointers {
+ const uint8_t *l0table;
+ const uint8_t *l1table;
+ const uint8_t *l2table;
+ } _ptrcache;
+
+ bool _hasInitialOffset;
+
+ void addOffset(uint64_t offset);
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/cachestats.h b/searchlib/src/vespa/searchlib/docstore/cachestats.h
new file mode 100644
index 00000000000..216b62f199a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/cachestats.h
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search {
+
+struct CacheStats {
+ size_t hits;
+ size_t misses;
+ size_t elements;
+ size_t memory_used;
+
+ CacheStats(void)
+ : hits(0),
+ misses(0),
+ elements(0),
+ memory_used(0)
+ {
+ }
+
+ CacheStats(size_t hit, size_t miss, size_t elem, size_t mem)
+ : hits(hit),
+ misses(miss),
+ elements(elem),
+ memory_used(mem)
+ {
+ }
+
+ CacheStats &
+ operator+=(const CacheStats &rhs)
+ {
+ hits += rhs.hits;
+ misses += rhs.misses;
+ elements += rhs.elements;
+ memory_used += rhs.memory_used;
+ return *this;
+ }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/chunk.cpp b/searchlib/src/vespa/searchlib/docstore/chunk.cpp
new file mode 100644
index 00000000000..931e3a11353
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/chunk.cpp
@@ -0,0 +1,139 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/docstore/chunk.h>
+#include <vespa/searchlib/docstore/chunkformats.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+
+namespace search {
+
+LidMeta
+Chunk::append(uint32_t lid, const void * buffer, size_t len)
+{
+ vespalib::nbostream & os = getData();
+ size_t oldSz(os.size());
+ os << lid << static_cast<uint32_t>(len);
+ os.write(buffer, len);
+ _lids.push_back(Entry(lid, len, oldSz));
+ return LidMeta(lid, len);
+}
+
+ssize_t
+Chunk::read(uint32_t lid, vespalib::DataBuffer & buffer) const
+{
+ vespalib::ConstBufferRef buf = getLid(lid);
+ if (buf.size() != 0) {
+ buffer.writeBytes(buf.c_str(), buf.size());
+ }
+ return buf.size();
+}
+
+bool
+Chunk::hasRoom(size_t len) const
+{
+ const size_t HeaderSize(2*sizeof(uint32_t));
+ const size_t TrailerSize(sizeof(uint64_t));
+ // To avoid read races during compacting These buffers must be preallocated.
+ // There is always room for at least one element.
+ // There is also room as long as neither _lids[] nor _dataBuf[] require reallocation.
+ // Remember to account for Header and Trailer space requirement.
+ const vespalib::nbostream & os = getData();
+ return _lids.empty()
+ || (((HeaderSize + TrailerSize + os.size() + len) <= os.capacity())
+ && ((_lids.size() + 1) <= _lids.capacity()));
+}
+
+void
+Chunk::pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const document::CompressionConfig & compression)
+{
+ _lastSerial = lastSerial;
+ _format->pack(_lastSerial, compressed, compression);
+}
+
+Chunk::Chunk(uint32_t id, const Config & config) :
+ _id(id),
+ _nextOffset(0),
+ _lastSerial(static_cast<uint64_t>(-1l)),
+ _format(new ChunkFormatV2(config.getMaxBytes()))
+{
+ _lids.reserve(config.getMaxCount());
+}
+
+Chunk::Chunk(uint32_t id, const void * buffer, size_t len, bool skipcrc) :
+ _id(id),
+ _nextOffset(0),
+ _lastSerial(static_cast<uint64_t>(-1l)),
+ _format(ChunkFormat::deserialize(buffer, len, skipcrc))
+{
+ vespalib::nbostream &os = getData();
+ while (os.size() > sizeof(_lastSerial)) {
+ uint32_t sz(0);
+ uint32_t lid(0);
+ ssize_t oldRp(os.rp());
+ os >> lid >> sz;
+ os.adjustReadPos(sz);
+ _lids.push_back(Entry(lid, sz, oldRp));
+ }
+ os >> _lastSerial;
+}
+
+vespalib::ConstBufferRef
+Chunk::getLid(uint32_t lid) const
+{
+ vespalib::ConstBufferRef buf;
+ for (LidList::const_iterator it(_lids.begin()), mt(_lids.end()); it != mt; it++) {
+ if (it->getLid() == lid) {
+#if 1
+ uint32_t bLid(0), bLen(0);
+ vespalib::nbostream is(getData().c_str()+it->getOffset(), it->size());
+ is >> bLid >> bLen;
+ assert(bLid == lid);
+ assert(bLen == it->netSize());
+ assert((bLen + 2*sizeof(uint32_t)) == it->size());
+#endif
+ buf = vespalib::ConstBufferRef(getData().c_str() + it->getNetOffset(), it->netSize());
+ }
+ }
+ return buf;
+}
+
+Chunk::LidList
+Chunk::getUniqueLids() const
+{
+ vespalib::hash_map<uint32_t, Entry> last;
+ for (const Entry & e : _lids) {
+ last[e.getLid()] = e;
+ }
+ LidList unique;
+ unique.reserve(last.size());
+ for (auto it(last.begin()), mt(last.end()); it != mt; it++) {
+ unique.push_back(it->second);
+ }
+ return unique;
+}
+
+vespalib::nbostream &
+ChunkMeta::deserialize(vespalib::nbostream & is)
+{
+ return is >> _offset >> _size >> _lastSerial >> _numEntries;
+}
+
+vespalib::nbostream &
+ChunkMeta::serialize(vespalib::nbostream & os) const
+{
+ return os << _offset << _size << _lastSerial << _numEntries;
+}
+
+vespalib::nbostream &
+LidMeta::deserialize(vespalib::nbostream & is)
+{
+ return is >> _lid >> _size;
+}
+
+vespalib::nbostream &
+LidMeta::serialize(vespalib::nbostream & os) const
+{
+ return os << _lid << _size;
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/docstore/chunk.h b/searchlib/src/vespa/searchlib/docstore/chunk.h
new file mode 100644
index 00000000000..b4c521aa3da
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/chunk.h
@@ -0,0 +1,108 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/docstore/chunkformat.h>
+#include <vespa/vespalib/util/memory.h>
+
+namespace search {
+
+class ChunkMeta {
+public:
+ ChunkMeta() :
+ _offset(0),
+ _lastSerial(0),
+ _size(0),
+ _numEntries(0)
+ { }
+ ChunkMeta(uint64_t offset, uint32_t size, uint64_t lastSerial, uint32_t numEntries) :
+ _offset(offset),
+ _lastSerial(lastSerial),
+ _size(size),
+ _numEntries(numEntries)
+ { }
+ uint32_t getNumEntries() const { return _numEntries; }
+ uint32_t getSize() const { return _size; }
+ uint64_t getLastSerial() const { return _lastSerial; }
+ uint64_t getOffset() const { return _offset; }
+ vespalib::nbostream & deserialize(vespalib::nbostream & is);
+ vespalib::nbostream & serialize(vespalib::nbostream & os) const;
+ bool operator < (const ChunkMeta & b) const { return _lastSerial < b._lastSerial; }
+private:
+ uint64_t _offset;
+ uint64_t _lastSerial;
+ uint32_t _size;
+ uint32_t _numEntries;
+};
+
+class LidMeta {
+public:
+ LidMeta() : _lid(0), _size(0) { }
+ LidMeta(uint32_t lid, uint32_t sz) : _lid(lid), _size(sz) { }
+ uint32_t getLid() const { return _lid; }
+ uint32_t size() const { return _size; }
+ vespalib::nbostream & deserialize(vespalib::nbostream & is);
+ vespalib::nbostream & serialize(vespalib::nbostream & os) const;
+private:
+ uint32_t _lid;
+ uint32_t _size;
+};
+
+class Chunk {
+public:
+ typedef std::unique_ptr<Chunk> UP;
+ class Config {
+ public:
+ Config(size_t maxBytes, size_t maxCount) : _maxBytes(maxBytes), _maxCount(maxCount) { }
+ size_t getMaxBytes() const { return _maxBytes; }
+ size_t getMaxCount() const { return _maxCount; }
+ private:
+ size_t _maxBytes;
+ size_t _maxCount;
+ };
+ class Entry {
+ public:
+ Entry() : _lid(0), _sz(0), _offset(0) { }
+ Entry(uint32_t lid, uint32_t sz, uint32_t offset) : _lid(lid), _sz(sz), _offset(offset) { }
+ uint32_t getLid() const { return _lid; }
+ uint32_t size() const { return _sz + 2*4; }
+ uint32_t netSize() const { return _sz; }
+ uint32_t getNetOffset() const { return _offset + 2*4; }
+ uint32_t getOffset() const { return _offset; }
+ private:
+ uint32_t _lid;
+ uint32_t _sz;
+ uint32_t _offset;
+ };
+ typedef std::vector<Entry> LidList;
+ Chunk(uint32_t id, const Config & config);
+ Chunk(uint32_t id, const void * buffer, size_t len, bool skipcrc=false);
+ LidMeta append(uint32_t lid, const void * buffer, size_t len);
+ ssize_t read(uint32_t lid, vespalib::DataBuffer & buffer) const;
+ size_t count() const { return _lids.size(); }
+ bool empty() const { return count() == 0; }
+ size_t size() const { return getData().size(); }
+ const LidList & getLids() const { return _lids; }
+ LidList getUniqueLids() const;
+ size_t getMaxPackSize(const document::CompressionConfig & compression) const { return _format->getMaxPackSize(compression); }
+ void pack(uint64_t lastSerial, vespalib::DataBuffer & buffer, const document::CompressionConfig & compression);
+ uint64_t getLastSerial() const { return _lastSerial; }
+ uint32_t getId() const { return _id; }
+ bool validSerial() const { return getLastSerial() != static_cast<uint64_t>(-1l); }
+ vespalib::ConstBufferRef getLid(uint32_t lid) const;
+ const vespalib::nbostream & getData() const { return _format->getBuffer(); }
+ bool hasRoom(size_t len) const;
+private:
+ vespalib::nbostream & getData() { return _format->getBuffer(); }
+
+ uint32_t _id;
+ uint32_t _nextOffset;
+ uint64_t _lastSerial;
+ ChunkFormat::UP _format;
+ LidList _lids;
+};
+
+typedef std::vector<ChunkMeta> ChunkMetaV;
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp
new file mode 100644
index 00000000000..1b243e1546e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp
@@ -0,0 +1,158 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/docstore/chunkformats.h>
+#include <vespa/document/util/compressor.h>
+
+namespace search {
+
+using vespalib::make_string;
+using vespalib::Exception;
+
+ChunkException::ChunkException(const vespalib::stringref & msg, const vespalib::stringref & location) :
+ Exception(make_string("Illegal chunk: %s", msg.c_str()), location)
+{
+}
+
+void
+ChunkFormat::pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const document::CompressionConfig & compression)
+{
+ vespalib::nbostream & os = _dataBuf;
+ os << lastSerial;
+ const uint8_t version(getVersion());
+ compressed.writeInt8(version);
+ writeHeader(compressed);
+ const size_t serializedSizePos(compressed.getDataLen());
+ if (includeSerializedSize()) {
+ compressed.writeInt32(0);
+ }
+ const size_t oldPos(compressed.getDataLen());
+ compressed.writeInt8(compression.type);
+ compressed.writeInt32(os.size());
+ document::CompressionConfig::Type type(document::compress(compression, vespalib::ConstBufferRef(os.c_str(), os.size()), compressed, false));
+ if (compression.type != type) {
+ compressed.getData()[oldPos] = type;
+ }
+ if (includeSerializedSize()) {
+ const uint32_t serializedSize = compressed.getDataLen()+4;
+ *reinterpret_cast<uint32_t *>(compressed.getData() + serializedSizePos) = htonl(serializedSize);
+ }
+ uint32_t crc = computeCrc(compressed.getData(), compressed.getDataLen());
+ compressed.writeInt32(crc);
+}
+
+size_t
+ChunkFormat::getMaxPackSize(const document::CompressionConfig & compression) const
+{
+ const size_t OVERHEAD(0);
+ const size_t MINSIZE(1 + 1 + 4 + 4 + includeSerializedSize() ? 4 : 0); // version + type + real length + crc + lastserial
+ const size_t formatSpecificSize(getHeaderSize());
+ size_t rawSize(MINSIZE + formatSpecificSize + OVERHEAD);
+ const size_t payloadSize(_dataBuf.size() + 8);
+ // This is a little dirty -> need interface.
+ if (compression.type == document::CompressionConfig::LZ4) {
+ document::LZ4Compressor lz4;
+ rawSize += lz4.adjustProcessLen(0, payloadSize);
+ } else {
+ rawSize += payloadSize;
+ }
+ return rawSize;
+}
+
+void
+ChunkFormat::verifyCompression(uint8_t type)
+{
+ if ((type != document::CompressionConfig::LZ4) &&
+ (type != document::CompressionConfig::NONE)) {
+ throw ChunkException(make_string("Unknown compressiontype %d", type), VESPA_STRLOC);
+ }
+}
+
+ChunkFormat::UP
+ChunkFormat::deserialize(const void * buffer, size_t len, bool skipcrc)
+{
+ uint8_t version(0);
+ vespalib::nbostream raw(buffer, len);
+ const uint32_t minimumRequiredSpace(sizeof(uint8_t)*2 + sizeof(uint32_t)*2);
+ if (raw.size() < minimumRequiredSpace) {
+ throw ChunkException(make_string("Available space (%ld) is less than required (%d)", raw.size(), minimumRequiredSpace), VESPA_STRLOC);
+ }
+ raw >> version;
+ size_t currPos = raw.rp();
+ raw.adjustReadPos(raw.size() - sizeof(uint32_t));
+ uint32_t crc32(0);
+ raw >> crc32;
+ raw.rp(currPos);
+ ChunkFormat::UP format;
+ if (version == ChunkFormatV1::VERSION) {
+ if (skipcrc) {
+ format.reset(new ChunkFormatV1(raw));
+ } else {
+ format.reset(new ChunkFormatV1(raw, crc32));
+ }
+ } else if (version == ChunkFormatV2::VERSION) {
+ if (skipcrc) {
+ format.reset(new ChunkFormatV2(raw));
+ } else {
+ format.reset(new ChunkFormatV2(raw, crc32));
+ }
+ } else {
+ throw ChunkException(make_string("Unknown version %d", version), VESPA_STRLOC);
+ }
+ return format;
+}
+
+ChunkFormat::ChunkFormat() :
+ _dataBuf()
+{
+}
+
+ChunkFormat::~ChunkFormat()
+{
+}
+
+ChunkFormat::ChunkFormat(size_t maxSize) :
+ _dataBuf(maxSize)
+{
+}
+
+void
+ChunkFormat::verifyCrc(const vespalib::nbostream & is, uint32_t expectedCrc) const
+{
+ uint32_t computedCrc32 = computeCrc(is.peek()-1, is.size() + 1 - sizeof(uint32_t));
+ if (expectedCrc != computedCrc32) {
+ throw ChunkException(make_string("Crc32 mismatch. Expected (%0x), computed (%0x)", expectedCrc, computedCrc32), VESPA_STRLOC);
+ }
+}
+
+void
+ChunkFormat::deserializeBody(vespalib::nbostream & is)
+{
+ if (includeSerializedSize()) {
+ uint32_t serializedSize(0);
+ is >> serializedSize;
+ const uint32_t alreadyRead(sizeof(uint8_t) + getHeaderSize() + sizeof(uint32_t));
+ const uint32_t required(serializedSize - alreadyRead);
+ if ((is.size() + alreadyRead) < serializedSize) {
+ throw ChunkException(make_string("Not enough data(%d) available in stream(%ld)", required, is.size()), VESPA_STRLOC);
+ }
+ }
+ uint8_t type(0);
+ is >> type;
+ verifyCompression(type);
+ uint32_t uncompressedLen(0);
+ is >> uncompressedLen;
+ // This is a dirty trick to fool some odd sanity checking in DataBuffer::swap
+ vespalib::DataBuffer uncompressed(const_cast<char *>(is.peek()), (size_t)0);
+ vespalib::ConstBufferRef data(is.peek(), is.size() - sizeof(uint32_t));
+ document::decompress(document::CompressionConfig::Type(type), uncompressedLen, data, uncompressed, true);
+ assert(uncompressed.getData() == uncompressed.getDead());
+ if (uncompressed.getData() != data.c_str()) {
+ const size_t sz(uncompressed.getDataLen());
+ vespalib::nbostream(uncompressed.stealBuffer(), sz).swap(_dataBuf);
+ } else {
+ _dataBuf = vespalib::nbostream(uncompressed.getData(), uncompressed.getDataLen());
+ }
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformat.h b/searchlib/src/vespa/searchlib/docstore/chunkformat.h
new file mode 100644
index 00000000000..8da755522e0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/chunkformat.h
@@ -0,0 +1,106 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/document/util/compressionconfig.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/util/exception.h>
+
+namespace search {
+
+class ChunkException : public vespalib::Exception
+{
+public:
+ ChunkException(const vespalib::stringref & msg, const vespalib::stringref & location);
+};
+
+// This is an interface for implementing a chunk format
+class ChunkFormat
+{
+public:
+ virtual ~ChunkFormat();
+ typedef std::unique_ptr<ChunkFormat> UP;
+ vespalib::nbostream & getBuffer() { return _dataBuf; }
+ const vespalib::nbostream & getBuffer() const { return _dataBuf; }
+
+ /**
+ * Will serialze your chunk.
+ * @param lastSerial The last serial number of any entry in the packet.
+ * @param compressed The buffer where the serialized data shall be placed.
+ * @param compression What kind of compression shall be employed.
+ */
+ void pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const document::CompressionConfig & compression);
+ /**
+ * Will deserialize and create a representation of the uncompressed data.
+ * param buffer Pointer to the serialized data
+ * @param len Length of serialized data
+ * @param indicate if crc verification shall be skipped.
+ */
+ static ChunkFormat::UP deserialize(const void * buffer, size_t len, bool skipcrc);
+ /**
+ * return the maximum size a packet can have. It allows correct size estimation
+ * need for direct io alignment.
+ * @param compression Compression config to be used.
+ * @return maximum number of bytes a packet can take in serialized form.
+ */
+ size_t getMaxPackSize(const document::CompressionConfig & compression) const;
+protected:
+ /**
+ * Constructor used when deserializing
+ */
+ ChunkFormat();
+ /**
+ * Constructor used when creating a new chunk.
+ * @param maxSize The maximum size the chunk can take before it will need to be closed.
+ */
+ ChunkFormat(size_t maxSize);
+ /**
+ * Will deserialize and uncompress the body.
+ * @param the potentially compressed stream.
+ */
+ void deserializeBody(vespalib::nbostream & is);
+ /**
+ * Wille compute and check the crc of the incoming stream.
+ * Will start 1 byte earlier and stop 4 bytes ahead of end.
+ * Thows exception if check fails.
+ */
+ void verifyCrc(const vespalib::nbostream & is, uint32_t expected) const;
+private:
+ /**
+ * Used when serializing to obtain correct version.
+ * @return version
+ */
+ virtual uint8_t getVersion() const = 0;
+ /**
+ * Used to compute maximum size needed for a serialized chunk.
+ * @return size of header this format will produce.
+ */
+ virtual size_t getHeaderSize() const = 0;
+ /**
+ * Does this format require the length of the serialized data to be include.
+ * Length will is inclusive. From and including version to end of crc.
+ * @return if length is required.
+ */
+ virtual bool includeSerializedSize() const = 0;
+ /**
+ * Will compute the crc for verifying the data.
+ * @param buf Start of buffer
+ * @param sz Size of buffer
+ * @return computed crc.
+ */
+ virtual uint32_t computeCrc(const void * buf, size_t sz) const = 0;
+ /**
+ * Allows each format to write its special stuff after the version byte.
+ * Must be reflected in @getHeaderSize
+ * @param buf Buffer to write into.
+ */
+ virtual void writeHeader(vespalib::DataBuffer & buf) const = 0;
+
+ static void verifyCompression(uint8_t type);
+
+ vespalib::nbostream _dataBuf;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformats.cpp b/searchlib/src/vespa/searchlib/docstore/chunkformats.cpp
new file mode 100644
index 00000000000..bdff46aacbc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/chunkformats.cpp
@@ -0,0 +1,73 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/docstore/chunkformats.h>
+#include <vespa/vespalib/util/crc.h>
+#include <vespa/vespalib/xxhash/xxhash.h>
+
+namespace search {
+
+using vespalib::make_string;
+
+ChunkFormatV1::ChunkFormatV1(vespalib::nbostream & is) :
+ ChunkFormat()
+{
+ deserializeBody(is);
+}
+
+ChunkFormatV1::ChunkFormatV1(vespalib::nbostream & is, uint32_t expectedCrc) :
+ ChunkFormat()
+{
+ verifyCrc(is, expectedCrc);
+ deserializeBody(is);
+}
+
+ChunkFormatV1::ChunkFormatV1(size_t maxSize) :
+ ChunkFormat(maxSize)
+{
+}
+
+uint32_t
+ChunkFormatV1::computeCrc(const void * buf, size_t sz) const
+{
+ return vespalib::crc_32_type::crc(buf, sz);
+}
+
+ChunkFormatV2::ChunkFormatV2(vespalib::nbostream & is) :
+ ChunkFormat()
+{
+ verifyMagic(is);
+ deserializeBody(is);
+}
+
+ChunkFormatV2::ChunkFormatV2(vespalib::nbostream & is, uint32_t expectedCrc) :
+ ChunkFormat()
+{
+ verifyCrc(is, expectedCrc);
+ verifyMagic(is);
+ deserializeBody(is);
+}
+
+
+ChunkFormatV2::ChunkFormatV2(size_t maxSize) :
+ ChunkFormat(maxSize)
+{
+}
+
+uint32_t
+ChunkFormatV2::computeCrc(const void * buf, size_t sz) const
+{
+ return XXH32(buf, sz, 0);
+}
+
+void
+ChunkFormatV2::verifyMagic(vespalib::nbostream & is) const
+{
+ uint32_t magic;
+ is >> magic;
+ if (magic != MAGIC) {
+ throw ChunkException(make_string("Unknown magic %0x, expected %0x", magic, MAGIC), VESPA_STRLOC);
+ }
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformats.h b/searchlib/src/vespa/searchlib/docstore/chunkformats.h
new file mode 100644
index 00000000000..15a45ec7e60
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/chunkformats.h
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/docstore/chunkformat.h>
+
+namespace search {
+
+class ChunkFormatV1 : public ChunkFormat
+{
+public:
+ enum {VERSION=0};
+ ChunkFormatV1(vespalib::nbostream & is);
+ ChunkFormatV1(vespalib::nbostream & is, uint32_t expectedCrc);
+ ChunkFormatV1(size_t maxSize);
+private:
+ virtual bool includeSerializedSize() const { return false; }
+ virtual uint8_t getVersion() const { return VERSION; }
+ virtual size_t getHeaderSize() const { return 0; }
+ virtual uint32_t computeCrc(const void * buf, size_t sz) const;
+ virtual void writeHeader(vespalib::DataBuffer & buf) const {
+ (void) buf;
+ }
+};
+
+class ChunkFormatV2 : public ChunkFormat
+{
+public:
+ enum {VERSION=1, MAGIC=0x5ba32de7};
+ ChunkFormatV2(vespalib::nbostream & is);
+ ChunkFormatV2(vespalib::nbostream & is, uint32_t expectedCrc);
+ ChunkFormatV2(size_t maxSize);
+private:
+ virtual bool includeSerializedSize() const { return true; }
+ virtual size_t getHeaderSize() const {
+ // MAGIC
+ return 4;
+ }
+ virtual uint8_t getVersion() const { return VERSION; }
+ virtual uint32_t computeCrc(const void * buf, size_t sz) const;
+ virtual void writeHeader(vespalib::DataBuffer & buf) const {
+ buf.writeInt32(MAGIC);
+ }
+ void verifyMagic(vespalib::nbostream & is) const;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.cpp b/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.cpp
new file mode 100644
index 00000000000..50177e5af31
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.cpp
@@ -0,0 +1,17 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "data_store_file_chunk_id.h"
+#include "filechunk.h"
+
+namespace search
+{
+
+vespalib::string
+DataStoreFileChunkId::createName(const vespalib::string &baseName) const
+{
+ FileChunk::NameId id(_nameId);
+ return id.createName(baseName);
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.h b/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.h
new file mode 100644
index 00000000000..7ed50a9d291
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.h
@@ -0,0 +1,28 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+
+/*
+ * Class representing the relative naming of a underlying file for a
+ * data store.
+ */
+class DataStoreFileChunkId
+{
+ uint64_t _nameId;
+public:
+ DataStoreFileChunkId(uint64_t nameId_in)
+ : _nameId(nameId_in)
+ {
+ }
+ uint64_t nameId() const { return _nameId; }
+ vespalib::string createName(const vespalib::string &baseName) const;
+ bool operator<(const DataStoreFileChunkId &rhs) const {
+ return _nameId < rhs._nameId;
+ }
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_stats.h b/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_stats.h
new file mode 100644
index 00000000000..48a99c0762c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_stats.h
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "data_store_storage_stats.h"
+#include "data_store_file_chunk_id.h"
+
+namespace search {
+
+/*
+ * Class representing stats for the underlying file for a data store.
+ */
+class DataStoreFileChunkStats : public DataStoreStorageStats,
+ public DataStoreFileChunkId
+{
+public:
+ DataStoreFileChunkStats(uint64_t diskUsage_in, uint64_t diskBloat_in,
+ double maxBucketSpread_in,
+ uint64_t lastSerialNum_in,
+ uint64_t lastFlushedSerialNum_in,
+ uint64_t nameId_in)
+ : DataStoreStorageStats(diskUsage_in, diskBloat_in,
+ maxBucketSpread_in,
+ lastSerialNum_in, lastFlushedSerialNum_in),
+ DataStoreFileChunkId(nameId_in)
+ {
+ }
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/docstore/data_store_storage_stats.h b/searchlib/src/vespa/searchlib/docstore/data_store_storage_stats.h
new file mode 100644
index 00000000000..906a2d76995
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/data_store_storage_stats.h
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search {
+
+/*
+ * Class representing brief stats for a data store.
+ */
+class DataStoreStorageStats
+{
+ uint64_t _diskUsage;
+ uint64_t _diskBloat;
+ double _maxBucketSpread;
+ uint64_t _lastSerialNum;
+ uint64_t _lastFlushedSerialNum;
+public:
+ DataStoreStorageStats(uint64_t diskUsage_in, uint64_t diskBloat_in,
+ double maxBucketSpread_in,
+ uint64_t lastSerialNum_in,
+ uint64_t lastFlushedSerialNum_in)
+ : _diskUsage(diskUsage_in),
+ _diskBloat(diskBloat_in),
+ _maxBucketSpread(maxBucketSpread_in),
+ _lastSerialNum(lastSerialNum_in),
+ _lastFlushedSerialNum(lastFlushedSerialNum_in)
+ {
+ }
+ uint64_t diskUsage() const { return _diskUsage; }
+ uint64_t diskBloat() const { return _diskBloat; }
+ double maxBucketSpread() const { return _maxBucketSpread; }
+ uint64_t lastSerialNum() const { return _lastSerialNum; }
+ uint64_t lastFlushedSerialNum() const { return _lastFlushedSerialNum; }
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.cpp b/searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.cpp
new file mode 100644
index 00000000000..cca794ccb69
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.cpp
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "document_store_visitor_progress.h"
+
+namespace search
+{
+
+
+DocumentStoreVisitorProgress::DocumentStoreVisitorProgress()
+ : search::IDocumentStoreVisitorProgress(),
+ _progress(0.0)
+{
+}
+
+
+void
+DocumentStoreVisitorProgress::updateProgress(double progress)
+{
+ _progress = progress;
+}
+
+
+double
+DocumentStoreVisitorProgress::getProgress() const
+{
+ return _progress;
+}
+
+
+} // namespace proton
diff --git a/searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.h b/searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.h
new file mode 100644
index 00000000000..125e8cb0f98
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.h
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "idocumentstore.h"
+
+namespace search
+{
+
+class DocumentStoreVisitorProgress : public IDocumentStoreVisitorProgress
+{
+ double _progress;
+public:
+ DocumentStoreVisitorProgress();
+
+ virtual void
+ updateProgress(double progress);
+
+ virtual double
+ getProgress() const;
+};
+
+} // namespace proton
+
diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp
new file mode 100644
index 00000000000..024a8a59d47
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp
@@ -0,0 +1,392 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".seach.docstore");
+
+#include "cachestats.h"
+#include "documentstore.h"
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/util/atomic.h>
+
+namespace search
+{
+
+namespace {
+
+class DocumentVisitorAdapter : public IBufferVisitor
+{
+public:
+ DocumentVisitorAdapter(const document::DocumentTypeRepo & repo, IDocumentVisitor & visitor) :
+ _repo(repo),
+ _visitor(visitor)
+ { }
+ void visit(uint32_t lid, vespalib::ConstBufferRef buf) override;
+private:
+ const document::DocumentTypeRepo & _repo;
+ IDocumentVisitor & _visitor;
+};
+
+void
+DocumentVisitorAdapter::visit(uint32_t lid, vespalib::ConstBufferRef buf) {
+ if (buf.size() > 0) {
+ vespalib::nbostream is(buf.c_str(), buf.size());
+ document::Document::UP doc(new document::Document(_repo, is));
+ _visitor.visit(lid, std::move(doc));
+ }
+}
+
+}
+
+using vespalib::nbostream;
+
+void
+DocumentStore::Value::set(vespalib::DataBuffer && buf,
+ ssize_t len,
+ const document::CompressionConfig & compression)
+{
+ //Underlying buffer must be identical to allow swap.
+ vespalib::DataBuffer compressed(buf.getData(), 0u);
+ document::CompressionConfig::Type type =
+ document::compress(compression,
+ vespalib::ConstBufferRef(buf.getData(), len),
+ compressed, true);
+ _compressedSize = compressed.getDataLen();
+ if (buf.getData() == compressed.getData()) {
+ // Uncompressed so we can just steal the underlying buffer.
+ buf.stealBuffer().swap(_buf);
+ } else {
+ compressed.stealBuffer().swap(_buf);
+ }
+ assert(((type == document::CompressionConfig::NONE) &&
+ (len == ssize_t(_compressedSize))) ||
+ ((type != document::CompressionConfig::NONE) &&
+ (len > ssize_t(_compressedSize))));
+ setCompression(type, len);
+}
+
+
+document::Document::UP
+DocumentStore::Value::deserializeDocument(const document::DocumentTypeRepo & repo)
+{
+ vespalib::DataBuffer uncompressed((char *)_buf.get(), (size_t)0);
+ document::decompress(getCompression(),
+ getUncompressedSize(),
+ vespalib::ConstBufferRef(*this, size()),
+ uncompressed, true);
+ vespalib::nbostream is(uncompressed.getData(), uncompressed.getDataLen());
+ return document::Document::UP(new document::Document(repo, is));
+}
+
+
+void DocumentStore::BackingStore::visit(const LidVector & lids, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const {
+ DocumentVisitorAdapter adapter(repo, visitor);
+ _backingStore.read(lids, adapter);
+}
+
+bool
+DocumentStore::BackingStore::read(DocumentIdT key, Value & value) const {
+ bool found(false);
+ vespalib::DataBuffer buf(4096);
+ ssize_t len = _backingStore.read(key, buf);
+ if (len > 0) {
+ value.set(std::move(buf), len, _compression);
+ found = true;
+ }
+ return found;
+}
+
+DocumentStore::DocumentStore(const Config & config, IDataStore & store)
+ : IDocumentStore(),
+ _config(config),
+ _backingStore(store),
+ _store(_backingStore, _config.getCompression()),
+ _cache(new Cache(_store, config.getMaxCacheBytes())),
+ _uncached_lookups(0)
+{
+ _cache->reserveElements(config.getInitialCacheEntries());
+}
+
+DocumentStore::~DocumentStore()
+{
+}
+
+void
+DocumentStore::visit(const LidVector & lids, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const
+{
+ _store.visit(lids, repo, visitor);
+}
+
+document::Document::UP
+DocumentStore::read(DocumentIdT lid, const document::DocumentTypeRepo &repo) const
+{
+ document::Document::UP retval;
+ Value value;
+ if (useCache()) {
+ value = _cache->read(lid);
+ } else {
+ vespalib::Atomic::add(&_uncached_lookups, 1UL);
+ _store.read(lid, value);
+ }
+ if ( ! value.empty() ) {
+ retval = value.deserializeDocument(repo);
+ }
+ return retval;
+}
+
+void
+DocumentStore::write(uint64_t syncToken, const document::Document& doc, DocumentIdT lid)
+{
+ nbostream stream(12345);
+ doc.serialize(stream);
+ _backingStore.write(syncToken, lid, stream.peek(), stream.size());
+ if (useCache()) {
+ _cache->invalidate(lid);
+ }
+}
+
+void
+DocumentStore::remove(uint64_t syncToken, DocumentIdT lid)
+{
+ _backingStore.remove(syncToken, lid);
+ if (useCache()) {
+ _cache->invalidate(lid);
+ }
+}
+
+void
+DocumentStore::compact(uint64_t syncToken)
+{
+ (void) syncToken;
+ // Most implementations does not offer compact.
+}
+
+void
+DocumentStore::flush(uint64_t syncToken)
+{
+ _backingStore.flush(syncToken);
+}
+
+uint64_t
+DocumentStore::initFlush(uint64_t syncToken)
+{
+ return _backingStore.initFlush(syncToken);
+}
+
+uint64_t
+DocumentStore::lastSyncToken() const
+{
+ return _backingStore.lastSyncToken();
+}
+
+uint64_t
+DocumentStore::tentativeLastSyncToken() const
+{
+ return _backingStore.tentativeLastSyncToken();
+}
+
+fastos::TimeStamp
+DocumentStore::getLastFlushTime() const
+{
+ return _backingStore.getLastFlushTime();
+}
+
+template <class Visitor>
+class DocumentStore::WrapVisitor : public IDataStoreVisitor
+{
+ Visitor &_visitor;
+ const document::DocumentTypeRepo &_repo;
+ const document::CompressionConfig &_compression;
+ IDocumentStore &_ds;
+ uint64_t _syncToken;
+
+public:
+ virtual void
+ visit(uint32_t lid, const void *buffer, size_t sz);
+
+ WrapVisitor(Visitor &visitor,
+ const document::DocumentTypeRepo &repo,
+ const document::CompressionConfig &compresion,
+ IDocumentStore &ds,
+ uint64_t syncToken);
+
+ inline void rewrite(uint32_t lid, const document::Document &doc);
+
+ inline void rewrite(uint32_t lid);
+
+ inline void visitRemove(uint32_t lid);
+};
+
+
+class DocumentStore::WrapVisitorProgress : public IDataStoreVisitorProgress
+{
+ IDocumentStoreVisitorProgress &_visitorProgress;
+public:
+ virtual void
+ updateProgress(double progress)
+ {
+ _visitorProgress.updateProgress(progress);
+ }
+
+ WrapVisitorProgress(IDocumentStoreVisitorProgress &visitProgress)
+ : _visitorProgress(visitProgress)
+ {
+ }
+};
+
+
+template <>
+void
+DocumentStore::WrapVisitor<IDocumentStoreReadVisitor>::
+rewrite(uint32_t lid, const document::Document &doc)
+{
+ (void) lid;
+ (void) doc;
+}
+
+template <>
+void
+DocumentStore::WrapVisitor<IDocumentStoreReadVisitor>::
+rewrite(uint32_t lid)
+{
+ (void) lid;
+}
+
+
+template <>
+void
+DocumentStore::WrapVisitor<IDocumentStoreReadVisitor>::
+visitRemove(uint32_t lid)
+{
+ _visitor.visit(lid);
+}
+
+
+template <>
+void
+DocumentStore::WrapVisitor<IDocumentStoreRewriteVisitor>::
+rewrite(uint32_t lid, const document::Document &doc)
+{
+ _ds.write(_syncToken, doc, lid);
+}
+
+template <>
+void
+DocumentStore::WrapVisitor<IDocumentStoreRewriteVisitor>::
+rewrite(uint32_t lid)
+{
+ _ds.remove(_syncToken, lid);
+}
+
+
+template <>
+void
+DocumentStore::WrapVisitor<IDocumentStoreRewriteVisitor>::
+visitRemove(uint32_t lid)
+{
+ (void) lid;
+}
+
+
+
+template <class Visitor>
+void
+DocumentStore::WrapVisitor<Visitor>::visit(uint32_t lid,
+ const void *buffer,
+ size_t sz)
+{
+ (void) lid;
+ (void) buffer;
+ (void) sz;
+
+ Value value;
+ vespalib::DataBuffer buf(4096);
+ buf.clear();
+ buf.writeBytes(buffer, sz);
+ ssize_t len = sz;
+ if (len > 0) {
+ value.set(std::move(buf), len, _compression);
+ }
+ if (! value.empty()) {
+ document::Document::UP doc(value.deserializeDocument(_repo));
+ _visitor.visit(lid, *doc);
+ rewrite(lid, *doc);
+ } else {
+ visitRemove(lid);
+ rewrite(lid);
+ }
+}
+
+
+template <class Visitor>
+DocumentStore::WrapVisitor<Visitor>::
+WrapVisitor(Visitor &visitor,
+ const document::DocumentTypeRepo &repo,
+ const document::CompressionConfig &compression,
+ IDocumentStore &ds,
+ uint64_t syncToken)
+ : _visitor(visitor),
+ _repo(repo),
+ _compression(compression),
+ _ds(ds),
+ _syncToken(syncToken)
+{
+}
+
+
+void
+DocumentStore::accept(IDocumentStoreReadVisitor &visitor,
+ IDocumentStoreVisitorProgress &visitorProgress,
+ const document::DocumentTypeRepo &repo)
+{
+ WrapVisitor<IDocumentStoreReadVisitor> wrap(visitor, repo,
+ _store.getCompression(),
+ *this,
+ _backingStore.
+ tentativeLastSyncToken());
+ WrapVisitorProgress wrapVisitorProgress(visitorProgress);
+ _backingStore.accept(wrap, wrapVisitorProgress, false);
+}
+
+
+void
+DocumentStore::accept(IDocumentStoreRewriteVisitor &visitor,
+ IDocumentStoreVisitorProgress &visitorProgress,
+ const document::DocumentTypeRepo &repo)
+{
+ WrapVisitor<IDocumentStoreRewriteVisitor> wrap(visitor,
+ repo,
+ _store.getCompression(),
+ *this,
+ _backingStore.
+ tentativeLastSyncToken());
+ WrapVisitorProgress wrapVisitorProgress(visitorProgress);
+ _backingStore.accept(wrap, wrapVisitorProgress, true);
+}
+
+
+double
+DocumentStore::getVisitCost() const
+{
+ return _backingStore.getVisitCost();
+}
+
+DataStoreStorageStats
+DocumentStore::getStorageStats() const
+{
+ return _backingStore.getStorageStats();
+}
+
+std::vector<DataStoreFileChunkStats>
+DocumentStore::getFileChunkStats() const
+{
+ return _backingStore.getFileChunkStats();
+}
+
+CacheStats DocumentStore::getCacheStats() const {
+ return CacheStats(_cache->getHit(), _cache->getMiss() + _uncached_lookups,
+ _cache->size(), _cache->sizeBytes());
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.h b/searchlib/src/vespa/searchlib/docstore/documentstore.h
new file mode 100644
index 00000000000..947ed58876c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/documentstore.h
@@ -0,0 +1,244 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "idocumentstore.h"
+#include "idatastore.h"
+#include <vespa/vespalib/stllike/cache.h>
+
+namespace search {
+
+/**
+ * Simple document store that contains serialized Document instances.
+ * updates will be held in memory until flush() is called.
+ * Uses a Local ID as key.
+ **/
+class DocumentStore : public IDocumentStore
+{
+public:
+ class Config {
+ public:
+ Config() :
+ _compression(document::CompressionConfig::LZ4, 9, 70),
+ _maxCacheBytes(1000000000),
+ _initialCacheEntries(0)
+ { }
+ Config(const document::CompressionConfig & compression, size_t maxCacheBytes, size_t initialCacheEntries) :
+ _compression((maxCacheBytes != 0) ? compression : document::CompressionConfig::NONE),
+ _maxCacheBytes(maxCacheBytes),
+ _initialCacheEntries(initialCacheEntries)
+ { }
+ const document::CompressionConfig & getCompression() const { return _compression; }
+ size_t getMaxCacheBytes() const { return _maxCacheBytes; }
+ size_t getInitialCacheEntries() const { return _initialCacheEntries; }
+ private:
+ document::CompressionConfig _compression;
+ size_t _maxCacheBytes;
+ size_t _initialCacheEntries;
+ };
+
+ /**
+ * Construct a document store.
+ * If the "simpledocstore.dat" data file exists, reads meta-data (offsets) into memory.
+ *
+ * @throws vespalib::IoException if the file is corrupt or other IO problems occur.
+ * @param baseDir The path to a directory where "simpledocstore.dat" will exist.
+ **/
+ DocumentStore(const Config & config, IDataStore & store);
+ ~DocumentStore();
+
+ /**
+ * Make a Document from a stored serialized data blob.
+ * @param lid The local ID associated with the document.
+ * @return NULL if there is no document associated with the lid.
+ **/
+ document::Document::UP read(DocumentIdT lid, const document::DocumentTypeRepo &repo) const override;
+ void visit(const LidVector & lids, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const override;
+
+ /**
+ * Serialize and store a document.
+ * @param doc The document to store
+ * @param lid The local ID associated with the document
+ **/
+ void write(uint64_t synkToken, const document::Document& doc, DocumentIdT lid) override;
+
+ /**
+ * Mark a document as removed. A later read() will return NULL for the given lid.
+ * @param lid The local ID associated with the document
+ **/
+ void remove(uint64_t syncToken, DocumentIdT lid) override;
+
+ /**
+ * Flush all in-memory updates to disk.
+ **/
+ void flush(uint64_t syncToken) override;
+ uint64_t initFlush(uint64_t synctoken) override;
+
+
+ /**
+ * If possible compact the disk.
+ **/
+ void compact(uint64_t syncToken) override;
+
+ /**
+ * The sync token used for the last successful flush() operation,
+ * or 0 if no flush() has been performed yet.
+ * @return Last flushed sync token.
+ **/
+ uint64_t lastSyncToken() const override;
+ uint64_t tentativeLastSyncToken() const override;
+ fastos::TimeStamp getLastFlushTime() const override;
+
+ /**
+ * Get the number of entries (including removed IDs
+ * or gaps in the local ID sequence) in the document store.
+ * @return The next local ID expected to be used.
+ */
+ uint64_t nextId() const override { return _backingStore.nextId(); }
+
+ /**
+ * Calculate memory used by this instance. During flush() actual
+ * memory usage may be approximately twice the reported amount.
+ * @return memory usage (in bytes)
+ **/
+ size_t memoryUsed() const override { return _backingStore.memoryUsed(); }
+ size_t getDiskFootprint() const override { return _backingStore.getDiskFootprint(); }
+ size_t getDiskBloat() const override { return _backingStore.getDiskBloat(); }
+ size_t getMaxCompactGain() const override { return _backingStore.getMaxCompactGain(); }
+
+ CacheStats getCacheStats() const override;
+
+ /**
+ * Calculates memory that is used for meta data by this instance. Calling
+ * flush() does not free this memory.
+ * @return memory usage (in bytes)
+ **/
+ size_t memoryMeta() const override { return _backingStore.memoryMeta(); }
+
+ const vespalib::string & getBaseDir() const override { return _backingStore.getBaseDir(); }
+
+ /**
+ * Visit all documents found in document store.
+ */
+ void
+ accept(IDocumentStoreReadVisitor &visitor,
+ IDocumentStoreVisitorProgress &visitorProgress,
+ const document::DocumentTypeRepo &repo) override;
+
+ /**
+ * Visit all documents found in document store.
+ */
+ void
+ accept(IDocumentStoreRewriteVisitor &visitor,
+ IDocumentStoreVisitorProgress &visitorProgress,
+ const document::DocumentTypeRepo &repo) override;
+
+ /**
+ * Return cost of visiting all documents found in document store.
+ */
+ double getVisitCost() const override;
+
+ /*
+ * Return brief stats for data store.
+ */
+ virtual DataStoreStorageStats getStorageStats() const override;
+
+ /*
+ * Return detailed stats about underlying files for data store.
+ */
+ virtual std::vector<DataStoreFileChunkStats>
+ getFileChunkStats() const override;
+
+private:
+ template <class> class WrapVisitor;
+ class WrapVisitorProgress;
+ class Value {
+ public:
+ typedef std::unique_ptr<Value> UP;
+ Value() : _compressedSize(0), _uncompressedSize(0), _compression(document::CompressionConfig::NONE) { }
+
+ Value(Value && rhs) :
+ _compressedSize(rhs._compressedSize),
+ _uncompressedSize(rhs._uncompressedSize),
+ _compression(rhs._compression),
+ _buf(std::move(rhs._buf))
+ { }
+
+ Value(const Value & rhs) :
+ _compressedSize(rhs._compressedSize),
+ _uncompressedSize(rhs._uncompressedSize),
+ _compression(rhs._compression),
+ _buf(rhs.size())
+ {
+ memcpy(get(), rhs.get(), size());
+ }
+ Value & operator = (Value && rhs) {
+ _buf = std::move(rhs._buf);
+ _compressedSize = rhs._compressedSize;
+ _uncompressedSize = rhs._uncompressedSize;
+ _compression = rhs._compression;
+ return *this;
+ }
+ void setCompression(document::CompressionConfig::Type comp, size_t uncompressedSize) {
+ _compression = comp;
+ _uncompressedSize = uncompressedSize;
+ }
+ document::CompressionConfig::Type getCompression() const { return _compression; }
+ size_t getUncompressedSize() const { return _uncompressedSize; }
+
+ /**
+ * Compress buffer into temporary buffer and copy temporary buffer to
+ * value along with compression config.
+ */
+ void set(vespalib::DataBuffer && buf, ssize_t len, const document::CompressionConfig &compression);
+
+ /**
+ * Decompress value into temporary buffer and deserialize document from
+ * the temporary buffer.
+ */
+ document::Document::UP deserializeDocument(const document::DocumentTypeRepo &repo);
+
+ size_t size() const { return _compressedSize; }
+ bool empty() const { return size() == 0; }
+ operator const void * () const { return _buf.get(); }
+ const void * get() const { return _buf.get(); }
+ void * get() { return _buf.get(); }
+
+ private:
+ size_t _compressedSize;
+ size_t _uncompressedSize;
+ document::CompressionConfig::Type _compression;
+ vespalib::DefaultAlloc _buf;
+ };
+ class BackingStore {
+ public:
+ typedef vespalib::hash_map<DocumentIdT, Value::UP> LidValueMap;
+ BackingStore(IDataStore & store, const document::CompressionConfig & compression) :
+ _backingStore(store),
+ _compression(compression)
+ { }
+ bool read(DocumentIdT key, Value & value) const;
+ void visit(const LidVector & lids, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const;
+ void write(DocumentIdT, const Value &) { }
+ void erase(DocumentIdT ) { }
+
+ const document::CompressionConfig & getCompression(void) const { return _compression; }
+ private:
+ IDataStore & _backingStore;
+ const document::CompressionConfig & _compression;
+ };
+ bool useCache() const { return (_cache->capacityBytes() != 0) && (_cache->capacity() != 0); }
+ typedef vespalib::cache< vespalib::CacheParam< vespalib::LruParam<DocumentIdT, Value>,
+ BackingStore,
+ vespalib::zero<DocumentIdT>,
+ vespalib::size<Value> > > Cache;
+
+ Config _config;
+ IDataStore & _backingStore;
+ BackingStore _store;
+ std::shared_ptr<Cache> _cache;
+ mutable volatile uint64_t _uncached_lookups;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp
new file mode 100644
index 00000000000..6194c2985b8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp
@@ -0,0 +1,676 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/docstore/filechunk.h>
+#include <vespa/searchlib/util/filekit.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <stdexcept>
+#include <vespa/log/log.h>
+#include <map>
+#include <vespa/vespalib/data/fileheader.h>
+#include "data_store_file_chunk_stats.h"
+
+LOG_SETUP(".search.filechunk");
+
+using vespalib::GenericHeader;
+using vespalib::FileHeader;
+using vespalib::IoException;
+using vespalib::getLastErrorString;
+using vespalib::getErrorString;
+
+
+namespace search
+{
+
+namespace {
+
+constexpr size_t ALIGNMENT=0x1000;
+constexpr size_t ENTRY_BIAS_SIZE=8;
+
+}
+
+using vespalib::make_string;
+
+SummaryException::SummaryException(const vespalib::stringref &msg,
+ FastOS_FileInterface &file,
+ const vespalib::stringref &location)
+ : IoException(make_string("%s : Failing file = '%s'. Reason given by OS = '%s'",
+ msg.c_str(), file.GetFileName(), file.getLastErrorString().c_str()),
+ getErrorType(file.GetLastError()), location)
+{
+}
+
+FileChunk::ChunkInfo::ChunkInfo(uint64_t offset, uint32_t size, uint64_t lastSerial)
+ : _lastSerial(lastSerial),
+ _offset(offset),
+ _size(size)
+{
+ assert(valid());
+}
+
+
+LidInfo::LidInfo(uint32_t fileId, uint32_t chunkId, uint32_t sz)
+{
+ _value.v.fileId = fileId;
+ _value.v.chunkId = chunkId;
+ _value.v.size = sz;
+ if (fileId >= (1 << 10)) {
+ throw std::runtime_error(
+ make_string("LidInfo(fileId=%u, chunkId=%u, size=%u) has invalid fileId larger than %d",
+ fileId, chunkId, sz, (1 << 10) - 1));
+ }
+ if (chunkId >= (1 << 22)) {
+ throw std::runtime_error(
+ make_string("LidInfo(fileId=%u, chunkId=%u, size=%u) has invalid chunkId larger than %d",
+ fileId, chunkId, sz, (1 << 22) - 1));
+ }
+}
+
+DirectIORandRead::DirectIORandRead(const vespalib::string & fileName)
+ : _file(fileName.c_str()),
+ _alignment(1),
+ _granularity(1),
+ _maxChunkSize(0x100000)
+{
+ _file.EnableDirectIO();
+ if (_file.OpenReadOnly()) {
+ if (!_file.GetDirectIORestrictions(_alignment, _granularity, _maxChunkSize)) {
+ LOG(debug, "Direct IO setup failed for file %s due to %s",
+ _file.GetFileName(), _file.getLastErrorString().c_str());
+ }
+ } else {
+ throw SummaryException("Failed opening data file", _file, VESPA_STRLOC);
+ }
+}
+
+FileRandRead::FSP
+DirectIORandRead::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz)
+{
+ size_t padBefore(0);
+ size_t padAfter(0);
+ bool directio = _file.DirectIOPadding(offset, sz, padBefore, padAfter);
+ buffer.clear();
+ buffer.ensureFree(padBefore + sz + padAfter + _alignment - 1);
+ if (directio) {
+ size_t unAligned = (-reinterpret_cast<size_t>(buffer.getFree()) & (_alignment - 1));
+ buffer.moveFreeToData(unAligned);
+ buffer.moveDataToDead(unAligned);
+ }
+ // XXX needs to use pread or file-position-mutex
+ _file.ReadBuf(buffer.getFree(), padBefore + sz + padAfter, offset - padBefore);
+ buffer.moveFreeToData(padBefore + sz);
+ buffer.moveDataToDead(padBefore);
+ return FSP();
+}
+
+
+int64_t
+DirectIORandRead::getSize(void)
+{
+ return _file.GetSize();
+}
+
+
+MMapRandRead::MMapRandRead(const vespalib::string & fileName, int mmapFlags, int fadviseOptions)
+ : _file(fileName.c_str())
+{
+ _file.enableMemoryMap(mmapFlags);
+ _file.setFAdviseOptions(fadviseOptions);
+ if ( ! _file.OpenReadOnly()) {
+ throw SummaryException("Failed opening data file", _file, VESPA_STRLOC);
+ }
+}
+
+
+NormalRandRead::NormalRandRead(const vespalib::string & fileName)
+ : _file(fileName.c_str())
+{
+ if ( ! _file.OpenReadOnly()) {
+ throw SummaryException("Failed opening data file", _file, VESPA_STRLOC);
+ }
+}
+
+FileRandRead::FSP
+MMapRandRead::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz)
+{
+ const char *ptr = static_cast<const char *>(_file.MemoryMapPtr(offset));
+ vespalib::DataBuffer(ptr, sz).swap(buffer);
+ return FSP();
+}
+
+int64_t
+MMapRandRead::getSize(void)
+{
+ return _file.GetSize();
+}
+
+MMapRandReadDynamic::MMapRandReadDynamic(const vespalib::string &fileName, int mmapFlags, int fadviseOptions)
+ : _fileName(fileName),
+ _mmapFlags(mmapFlags),
+ _fadviseOptions(fadviseOptions)
+{
+ reopen();
+}
+
+void
+MMapRandReadDynamic::reopen()
+{
+ std::unique_ptr<FastOS_File> file(new FastOS_File(_fileName.c_str()));
+ file->enableMemoryMap(_mmapFlags);
+ file->setFAdviseOptions(_fadviseOptions);
+ if (file->OpenReadOnly()) {
+ _holder.set(file.release());
+ _holder.latch();
+ } else {
+ throw SummaryException("Failed opening data file", *file, VESPA_STRLOC);
+ }
+}
+
+FileRandRead::FSP
+MMapRandReadDynamic::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz)
+{
+ FSP file(_holder.get());
+ const char * data(static_cast<const char *>(file->MemoryMapPtr(offset)));
+ if ((data == NULL) || (file->MemoryMapPtr(offset+sz-1) == NULL)) {
+ // Must check that both start and end of file is mapped in.
+ // Previous reopen could happend during a partial write of this buffer.
+ // This should fix bug 4630695.
+ reopen();
+ file = _holder.get();
+ data = static_cast<const char *>(file->MemoryMapPtr(offset));
+ }
+ vespalib::DataBuffer(data, sz).swap(buffer);
+ return file;
+}
+
+int64_t
+MMapRandReadDynamic::getSize(void)
+{
+ return _holder.get()->GetSize();
+}
+
+FileRandRead::FSP
+NormalRandRead::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz)
+{
+ buffer.clear();
+ buffer.ensureFree(sz);
+ _file.ReadBuf(buffer.getFree(), sz, offset);
+ buffer.moveFreeToData(sz);
+ return FSP();
+}
+
+int64_t
+NormalRandRead::getSize(void)
+{
+ return _file.GetSize();
+}
+
+vespalib::string
+FileChunk::NameId::createName(const vespalib::string &baseName) const {
+ vespalib::asciistream os;
+ os << baseName << '/' << vespalib::setfill('0') << vespalib::setw(19) << getId();
+ return os.str();
+}
+
+vespalib::string
+FileChunk::createIdxFileName(const vespalib::string & name) {
+ return name + ".idx";
+}
+
+vespalib::string
+FileChunk::createDatFileName(const vespalib::string & name) {
+ return name + ".dat";
+}
+
+FileChunk::FileChunk(FileId fileId, NameId nameId, const vespalib::string & baseName,
+ const TuneFileSummary & tune, const IBucketizer * bucketizer, bool skipCrcOnRead)
+ : _fileId(fileId),
+ _nameId(nameId),
+ _name(nameId.createName(baseName)),
+ _skipCrcOnRead(skipCrcOnRead),
+ _erasedCount(0),
+ _erasedBytes(0),
+ _diskFootprint(0),
+ _sumNumBuckets(0),
+ _numUniqueBuckets(0),
+ _file(),
+ _bucketizer(bucketizer),
+ _addedBytes(0),
+ _tune(tune),
+ _dataFileName(createDatFileName(_name)),
+ _idxFileName(createIdxFileName(_name)),
+ _chunkInfo(),
+ _dataHeaderLen(0u),
+ _idxHeaderLen(0u),
+ _lastPersistedSerialNum(0),
+ _modificationTime()
+{
+ FastOS_File dataFile(_dataFileName.c_str());
+ if (dataFile.OpenReadOnly()) {
+ if (!dataFile.Sync()) {
+ throw SummaryException("Failed syncing dat file", dataFile, VESPA_STRLOC);
+ }
+ _diskFootprint += dataFile.GetSize();
+ FastOS_File idxFile(_idxFileName.c_str());
+ if (idxFile.OpenReadOnly()) {
+ if (!idxFile.Sync()) {
+ throw SummaryException("Failed syncing idx file", idxFile, VESPA_STRLOC);
+ }
+ _diskFootprint += idxFile.GetSize();
+ _modificationTime = FileKit::getModificationTime(_idxFileName);
+ } else {
+ dataFile.Close();
+ throw SummaryException("Failed opening idx file", idxFile, VESPA_STRLOC);
+ }
+ } else {
+ }
+}
+
+FileChunk::~FileChunk()
+{
+}
+
+void
+FileChunk::addNumBuckets(size_t numBucketsInChunk)
+{
+ _sumNumBuckets += numBucketsInChunk;
+}
+
+class TmpChunkMeta : public ChunkMeta,
+ public std::vector<LidMeta>
+{
+public:
+ void fill(vespalib::nbostream & is) {
+ resize(getNumEntries());
+ for (LidMeta & lm : *this) {
+ lm.deserialize(is);
+ }
+ }
+};
+
+typedef vespalib::Array<TmpChunkMeta, vespalib::DefaultAlloc> TmpChunkMetaV;
+
+namespace {
+
+void
+verifyOrAssert(const TmpChunkMetaV & v)
+{
+ for (auto prev(v.begin()), it(prev); it != v.end(); ++it) {
+ assert(prev->getLastSerial() <= it->getLastSerial());
+ prev = it;
+ }
+}
+
+vespalib::string eraseErrorMsg(const vespalib::string & fileName, int error) {
+ return make_string("Error erasing file '%s'. Error is '%s'",
+ fileName.c_str(), getErrorString(error).c_str());
+}
+
+}
+
+void
+FileChunk::erase()
+{
+ _file.reset();
+ if (!FastOS_File::Delete(_idxFileName.c_str()) && (errno != ENOENT)) {
+ throw std::runtime_error(eraseErrorMsg(_idxFileName, errno));
+ }
+ if (!FastOS_File::Delete(_dataFileName.c_str()) && (errno != ENOENT)) {
+ throw std::runtime_error(eraseErrorMsg(_dataFileName, errno));
+ }
+}
+
+size_t
+FileChunk::updateLidMap(ISetLid & ds, uint64_t serialNum)
+{
+ size_t sz(0);
+ assert(_chunkInfo.empty());
+
+ FastOS_File idxFile(_idxFileName.c_str());
+ idxFile.enableMemoryMap(0);
+ if (idxFile.OpenReadOnly()) {
+ if (idxFile.IsMemoryMapped()) {
+ const int64_t fileSize = idxFile.GetSize();
+ if (_idxHeaderLen == 0) {
+ _idxHeaderLen = readIdxHeader(idxFile);
+ }
+ vespalib::nbostream is(static_cast<const char *>(idxFile.MemoryMapPtr(0)) + _idxHeaderLen,
+ fileSize - _idxHeaderLen);
+ TmpChunkMetaV tempVector;
+ tempVector.reserve(fileSize/(sizeof(ChunkMeta)+sizeof(LidMeta)));
+ while ( ! is.empty() && is.good()) {
+ const int64_t lastKnownGoodPos = _idxHeaderLen + is.rp();
+ tempVector.push_back(TmpChunkMeta());
+ TmpChunkMeta & chunkMeta(tempVector.back());
+ try {
+ chunkMeta.deserialize(is);
+ chunkMeta.fill(is);
+ } catch (const vespalib::IllegalStateException & e) {
+ LOG(warning, "Exception deserializing idx file : %s", e.what());
+ LOG(warning, "File '%s' seems to be partially truncated. Will truncate from size=%ld to %ld",
+ _idxFileName.c_str(), fileSize, lastKnownGoodPos);
+ FastOS_File toTruncate(_idxFileName.c_str());
+ if ( toTruncate.OpenReadWrite()) {
+ if (toTruncate.SetSize(lastKnownGoodPos)) {
+ tempVector.resize(tempVector.size() - 1);
+ } else {
+ throw SummaryException("SetSize(%ld) failed.", toTruncate, VESPA_STRLOC);
+ }
+ } else {
+ throw SummaryException("Open for truncation failed.", toTruncate, VESPA_STRLOC);
+ }
+ }
+ }
+ if ( ! tempVector.empty()) {
+ verifyOrAssert(tempVector);
+ if (tempVector[0].getLastSerial() < serialNum) {
+ LOG(warning,
+ "last serial num(%ld) from previous file is "
+ "bigger than my first(%ld). That is odd."
+ "Current filename is '%s'",
+ serialNum, tempVector[0].getLastSerial(),
+ _idxFileName.c_str());
+ serialNum = tempVector[0].getLastSerial();
+ }
+ BucketDensityComputer globalBucketMap(_bucketizer);
+ // Guard comes from the same bucketizer so the same guard can be used
+ // for both local and global BucketDensityComputer
+ vespalib::GenerationHandler::Guard bucketizerGuard = globalBucketMap.getGuard();
+ for (const TmpChunkMeta & chunkMeta : tempVector) {
+ assert(serialNum <= chunkMeta.getLastSerial());
+ BucketDensityComputer bucketMap(_bucketizer);
+ for (size_t i(0), m(chunkMeta.getNumEntries()); i < m; i++) {
+ const LidMeta & lidMeta(chunkMeta[i]);
+ bucketMap.recordLid(bucketizerGuard, lidMeta.getLid(), lidMeta.size());
+ globalBucketMap.recordLid(bucketizerGuard, lidMeta.getLid(), lidMeta.size());
+ ds.setLid(lidMeta.getLid(), LidInfo(getFileId().getId(), _chunkInfo.size(), lidMeta.size()));
+ _addedBytes += adjustSize(lidMeta.size());
+ }
+ serialNum = chunkMeta.getLastSerial();
+ addNumBuckets(bucketMap.getNumBuckets());
+ _chunkInfo.push_back(ChunkInfo(chunkMeta.getOffset(), chunkMeta.getSize(), chunkMeta.getLastSerial()));
+ assert(serialNum >= _lastPersistedSerialNum);
+ _lastPersistedSerialNum = serialNum;
+ }
+ _numUniqueBuckets = globalBucketMap.getNumBuckets();
+ }
+ } else {
+ assert(idxFile.getSize() == 0);
+ }
+ } else {
+ assert(false);
+ }
+ return sz;
+}
+
+void
+FileChunk::enableRead()
+{
+ if (_tune._randRead.getWantDirectIO()) {
+ LOG(debug, "enableRead(): DirectIORandRead: file='%s'", _dataFileName.c_str());
+ _file.reset(new DirectIORandRead(_dataFileName));
+ } else if (_tune._randRead.getWantMemoryMap()) {
+ const int mmapFlags(_tune._randRead.getMemoryMapFlags());
+ const int fadviseOptions(_tune._randRead.getAdvise());
+ if (frozen()) {
+ LOG(debug, "enableRead(): MMapRandRead: file='%s'", _dataFileName.c_str());
+ _file.reset(new MMapRandRead(_dataFileName, mmapFlags, fadviseOptions));
+ } else {
+ LOG(debug, "enableRead(): MMapRandReadDynamic: file='%s'", _dataFileName.c_str());
+ _file.reset(new MMapRandReadDynamic(_dataFileName, mmapFlags, fadviseOptions));
+ }
+ } else {
+ LOG(debug, "enableRead(): NormalRandRead: file='%s'", _dataFileName.c_str());
+ _file.reset(new NormalRandRead(_dataFileName));
+ }
+ _dataHeaderLen = readDataHeader(*_file);
+ if (_dataHeaderLen == 0u) {
+ throw std::runtime_error(make_string("bad file header: %s", _dataFileName.c_str()));
+ }
+}
+
+size_t FileChunk::adjustSize(size_t sz) {
+ return sz + ENTRY_BIAS_SIZE;
+}
+void
+FileChunk::remove(uint32_t lid, uint32_t size)
+{
+ (void) lid;
+ _erasedCount++;
+ _erasedBytes += adjustSize(size);
+}
+
+uint64_t
+FileChunk::getLastPersistedSerialNum() const
+{
+ return _lastPersistedSerialNum;
+}
+
+fastos::TimeStamp
+FileChunk::getModificationTime() const
+{
+ return _modificationTime;
+}
+
+void
+FileChunk::appendTo(const IGetLid & db, IWriteData & dest,
+ uint32_t numChunks,
+ IFileChunkVisitorProgress *visitorProgress)
+{
+ assert(frozen() || visitorProgress);
+ vespalib::GenerationHandler::Guard lidReadGuard(db.getLidReadGuard());
+ assert(numChunks <= getNumChunks());
+ for (size_t chunkId(0); chunkId < numChunks; chunkId++) {
+ const ChunkInfo & cInfo(_chunkInfo[chunkId]);
+ vespalib::DataBuffer whole(0ul, ALIGNMENT);
+ FileRandRead::FSP keepAlive(_file->read(cInfo.getOffset(), whole, cInfo.getSize()));
+ Chunk chunk(chunkId, whole.getData(), whole.getDataLen());
+ const Chunk::LidList ll(chunk.getUniqueLids());
+ for (const Chunk::Entry & e : ll) {
+ LidInfo lidInfo(getFileId().getId(), chunk.getId(), e.netSize());
+ if (db.getLid(lidReadGuard, e.getLid()) == lidInfo) {
+ vespalib::LockGuard guard(db.getLidGuard(e.getLid()));
+ if (db.getLid(lidReadGuard, e.getLid()) == lidInfo) {
+ // I am still in use so I need to taken care of.
+ vespalib::ConstBufferRef data(chunk.getLid(e.getLid()));
+ dest.write(guard, chunk.getId(), e.getLid(), data.c_str(), data.size());
+ }
+ }
+ }
+ if (visitorProgress != NULL) {
+ visitorProgress->updateProgress();
+ }
+ }
+ dest.close();
+}
+
+void
+FileChunk::read(LidInfoWithLidV::const_iterator begin, size_t count, IBufferVisitor & visitor) const
+{
+ if (count == 0) { return; }
+ uint32_t prevChunk = begin->getChunkId();
+ uint32_t start(0);
+ for (size_t i(0); i < count; i++) {
+ const LidInfoWithLid & li = *(begin + i);
+ if (li.getChunkId() != prevChunk) {
+ ChunkInfo ci = _chunkInfo[prevChunk];
+ read(begin + start, i - start, ci, visitor);
+ prevChunk = li.getChunkId();
+ start = i;
+ }
+ }
+ ChunkInfo ci = _chunkInfo[prevChunk];
+ read(begin + start, count - start, ci, visitor);
+}
+
+void
+FileChunk::read(LidInfoWithLidV::const_iterator begin, size_t count, ChunkInfo ci, IBufferVisitor & visitor) const
+{
+ vespalib::DataBuffer whole(0ul, ALIGNMENT);
+ FileRandRead::FSP keepAlive = _file->read(ci.getOffset(), whole, ci.getSize());
+ Chunk chunk(begin->getChunkId(), whole.getData(), whole.getDataLen(), _skipCrcOnRead);
+ for (size_t i(0); i < count; i++) {
+ const LidInfoWithLid & li = *(begin + i);
+ vespalib::ConstBufferRef buf = chunk.getLid(li.getLid());
+ if (buf.size() != 0) {
+ visitor.visit(li.getLid(), buf);
+ }
+ }
+}
+
+ssize_t
+FileChunk::read(uint32_t lid, SubChunkId chunkId,
+ vespalib::DataBuffer & buffer) const
+{
+ return (chunkId < _chunkInfo.size())
+ ? read(lid, chunkId, _chunkInfo[chunkId], buffer)
+ : -1;
+}
+
+ssize_t
+FileChunk::read(uint32_t lid, SubChunkId chunkId, const ChunkInfo & chunkInfo,
+ vespalib::DataBuffer & buffer) const
+{
+ vespalib::DataBuffer whole(0ul, ALIGNMENT);
+ FileRandRead::FSP keepAlive(_file->read(chunkInfo.getOffset(), whole, chunkInfo.getSize()));
+ Chunk chunk(chunkId, whole.getData(), whole.getDataLen(), _skipCrcOnRead);
+ return chunk.read(lid, buffer);
+}
+
+uint64_t
+FileChunk::readDataHeader(FileRandRead &datFile)
+{
+ uint64_t dataHeaderLen(0);
+ int64_t fileSize = datFile.getSize();
+ uint32_t hl = GenericHeader::getMinSize();
+ if (fileSize >= hl) {
+ vespalib::DataBuffer h(hl, ALIGNMENT);
+ datFile.read(0, h, hl);
+ GenericHeader::BufferReader rd(h);
+ uint32_t headerLen = GenericHeader::readSize(rd);
+ if (headerLen <= fileSize) {
+ dataHeaderLen = headerLen;
+ }
+ }
+ return dataHeaderLen;
+}
+
+
+uint64_t
+FileChunk::readIdxHeader(FastOS_FileInterface &idxFile)
+{
+ int64_t fileSize = idxFile.GetSize();
+ uint32_t hl = GenericHeader::getMinSize();
+ uint64_t idxHeaderLen = 0;
+ if (fileSize >= hl) {
+ GenericHeader::MMapReader rd(static_cast<const char *> (idxFile.MemoryMapPtr(0)), hl);
+ uint32_t headerLen = GenericHeader::readSize(rd);
+ if (headerLen <= fileSize) {
+ idxHeaderLen = headerLen;
+ }
+ }
+ if (idxHeaderLen == 0u) {
+ throw SummaryException("bad file header", idxFile, VESPA_STRLOC);
+ }
+ return idxHeaderLen;
+}
+
+void
+FileChunk::verify(bool reportOnly) const
+{
+ (void) reportOnly;
+ LOG(info,
+ "Verifying file '%s' with fileid '%u'. "
+ "erased-count='%u' and erased-bytes='%lu'. diskFootprint='%lu'",
+ _name.c_str(), _fileId.getId(),
+ _erasedCount, _erasedBytes, _diskFootprint);
+ size_t lastSerial(0);
+ size_t chunkId(0);
+ bool errorInPrev(false);
+ for (const ChunkInfo & ci : _chunkInfo) {
+ vespalib::DataBuffer whole(0ul, ALIGNMENT);
+ FileRandRead::FSP keepAlive(_file->read(ci.getOffset(), whole, ci.getSize()));
+ try {
+ Chunk chunk(chunkId++, whole.getData(), whole.getDataLen());
+ assert(chunk.getLastSerial() >= lastSerial);
+ lastSerial = chunk.getLastSerial();
+ if (errorInPrev) {
+ LOG(error, "Last serial number in first good chunk is %ld", chunk.getLastSerial());
+ errorInPrev = false;
+ }
+ } catch (const std::exception & e) {
+ LOG(error,
+ "Errors in chunk number %ld/%ld at file offset %lu and size %u."
+ " Last known good serial number = %ld\n.Got Exception : %s",
+ chunkId, _chunkInfo.size(), ci.getOffset(), ci.getSize(), lastSerial, e.what());
+ errorInPrev = true;
+ }
+ }
+}
+
+uint32_t
+FileChunk::getNumChunks() const
+{
+ return _chunkInfo.size();
+}
+
+size_t
+FileChunk::getMemoryFootprint() const
+{
+ // The memory footprint does not vary before or after flush
+ // Once frozen, there is no variable component.
+ // It is all captured by getMemoryMetaFootprint()
+ return 0;
+}
+
+size_t
+FileChunk::getMemoryMetaFootprint() const
+{
+ return sizeof(*this) + _chunkInfo.byteSize();
+}
+
+bool
+FileChunk::isIdxFileEmpty(const vespalib::string & name)
+{
+ vespalib::string fileName(name + ".idx");
+ FastOS_File idxFile(fileName.c_str());
+ idxFile.enableMemoryMap(0);
+ if (idxFile.OpenReadOnly()) {
+ if (idxFile.IsMemoryMapped()) {
+ int64_t fileSize = idxFile.getSize();
+ int64_t idxHeaderLen = FileChunk::readIdxHeader(idxFile);
+ return fileSize <= idxHeaderLen;
+ } else if ( idxFile.getSize() == 0u) {
+ return true;
+ } else {
+ throw SummaryException("Failed opening idx file for memorymapping", idxFile, VESPA_STRLOC);
+ }
+ } else {
+ throw SummaryException("Failed opening idx file readonly ", idxFile, VESPA_STRLOC);
+ }
+ return false;
+}
+
+void
+FileChunk::eraseIdxFile(const vespalib::string & name)
+{
+ vespalib::string fileName(name + ".idx");
+ if ( ! FastOS_File::Delete(fileName.c_str())) {
+ throw std::runtime_error(make_string("Failed to delete '%s'", fileName.c_str()));
+ }
+}
+
+
+DataStoreFileChunkStats
+FileChunk::getStats() const
+{
+ uint64_t diskFootprint = getDiskFootprint();
+ uint64_t diskBloat = getDiskBloat();
+ double bucketSpread = getBucketSpread();
+ uint64_t serialNum = getLastPersistedSerialNum();
+ uint64_t nameId = getNameId().getId();
+ return DataStoreFileChunkStats(diskFootprint, diskBloat, bucketSpread,
+ serialNum, serialNum, nameId);
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.h b/searchlib/src/vespa/searchlib/docstore/filechunk.h
new file mode 100644
index 00000000000..9ef2e733e7b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/filechunk.h
@@ -0,0 +1,338 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/docstore/chunk.h>
+#include <vespa/searchlib/docstore/ibucketizer.h>
+#include <vespa/vespalib/util/ptrholder.h>
+#include <vespa/vespalib/util/sync.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include <vespa/vespalib/util/generationhandler.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+namespace search
+{
+
+class IDataStoreVisitorProgress;
+class DataStoreFileChunkStats;
+
+class LidInfo {
+public:
+ LidInfo() : _value() { }
+ LidInfo(uint64_t rep) { _value.r = rep; }
+ LidInfo(uint32_t fileId, uint32_t chunkId, uint32_t size);
+ uint32_t getFileId() const { return _value.v.fileId; }
+ uint32_t getChunkId() const { return _value.v.chunkId; }
+ uint32_t size() const { return _value.v.size; }
+ operator uint64_t () const { return _value.r; }
+ bool empty() const { return size() == 0; }
+ bool valid() const { return _value.r != std::numeric_limits<uint64_t>::max(); }
+
+ bool operator==(const LidInfo &b) const {
+ return (getFileId() == b.getFileId()) &&
+ (getChunkId() == b.getChunkId());
+ }
+ bool operator < (const LidInfo &b) const {
+ return (getFileId() == b.getFileId())
+ ? (getChunkId() < b.getChunkId())
+ : (getFileId() < b.getFileId());
+ }
+ static uint32_t getMaxFileNum() { return 1 << 10; }
+ static uint32_t getMaxChunkNum() { return 1 << 22; }
+private:
+ struct Rep {
+ uint16_t fileId : 10;
+ uint32_t chunkId : 22;
+ uint32_t size;
+ };
+ union Value {
+ Value() : r(std::numeric_limits<uint64_t>::max()) { }
+ Rep v;
+ uint64_t r;
+ } _value;
+};
+
+class LidInfoWithLid : public LidInfo {
+public:
+ LidInfoWithLid(LidInfo lidInfo, uint32_t lid) : LidInfo(lidInfo), _lid(lid) { }
+ uint32_t getLid() const { return _lid; }
+private:
+ uint32_t _lid;
+};
+
+typedef std::vector<LidInfoWithLid> LidInfoWithLidV;
+
+class ISetLid
+{
+public:
+ virtual ~ISetLid() { }
+ virtual void setLid(uint32_t lid, const LidInfo & lm) = 0;
+};
+
+class IGetLid
+{
+public:
+ typedef vespalib::GenerationHandler::Guard Guard;
+ virtual ~IGetLid() { }
+
+ virtual LidInfo getLid(Guard & guard, uint32_t lid) const = 0;
+ virtual vespalib::LockGuard getLidGuard(uint32_t lid) const = 0;
+ virtual Guard getLidReadGuard() const = 0;
+};
+
+class IWriteData
+{
+public:
+ typedef std::unique_ptr<IWriteData> UP;
+ virtual ~IWriteData() { }
+
+ virtual void write(vespalib::LockGuard guard, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) = 0;
+ virtual void close() = 0;
+};
+
+class IFileChunkVisitorProgress
+{
+public:
+ virtual ~IFileChunkVisitorProgress() { }
+ virtual void updateProgress() = 0;
+};
+
+class FileRandRead
+{
+public:
+ typedef std::shared_ptr<FastOS_File> FSP;
+ virtual ~FileRandRead() { }
+ virtual FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) = 0;
+ virtual int64_t getSize(void) = 0;
+};
+
+class DirectIORandRead : public FileRandRead
+{
+public:
+ DirectIORandRead(const vespalib::string & fileName);
+ FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override;
+ int64_t getSize(void) override;
+private:
+ FastOS_File _file;
+ size_t _alignment;
+ size_t _granularity;
+ size_t _maxChunkSize;
+};
+
+class MMapRandRead : public FileRandRead
+{
+public:
+ MMapRandRead(const vespalib::string & fileName, int mmapFlags, int fadviseOptions);
+ FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override;
+ int64_t getSize(void) override;
+ const void * getMapping() { return _file.MemoryMapPtr(0); }
+private:
+ FastOS_File _file;
+};
+
+class MMapRandReadDynamic : public FileRandRead
+{
+public:
+ MMapRandReadDynamic(const vespalib::string & fileName, int mmapFlags, int fadviseOptions);
+ FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override;
+ int64_t getSize(void) override;
+private:
+ void reopen();
+ vespalib::string _fileName;
+ vespalib::PtrHolder<FastOS_File> _holder;
+ int _mmapFlags;
+ int _fadviseOptions;
+};
+
+class NormalRandRead : public FileRandRead
+{
+public:
+ NormalRandRead(const vespalib::string & fileName);
+ FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override;
+ int64_t getSize(void) override;
+private:
+ FastOS_File _file;
+};
+
+class BucketDensityComputer
+{
+public:
+ BucketDensityComputer(const IBucketizer * bucketizer) : _bucketizer(bucketizer), _count(0) { }
+ void recordLid(const vespalib::GenerationHandler::Guard & guard, uint32_t lid, uint32_t dataSize) {
+ if (_bucketizer && (dataSize > 0)) {
+ _count++;
+ _bucketSet[_bucketizer->getBucketOf(guard, lid)]++;
+ }
+ }
+ size_t getNumBuckets() const { return _bucketSet.size(); }
+ vespalib::GenerationHandler::Guard getGuard() const {
+ return _bucketizer
+ ? _bucketizer->getGuard()
+ : vespalib::GenerationHandler::Guard();
+ }
+private:
+ const IBucketizer * _bucketizer;
+ size_t _count;
+ vespalib::hash_map<uint64_t, uint32_t> _bucketSet;
+};
+
+class FileChunk
+{
+public:
+ class NameId {
+ public:
+ explicit NameId(size_t id) : _id(id) { }
+ uint64_t getId() const { return _id; }
+ vespalib::string createName(const vespalib::string &baseName) const;
+ bool operator == (const NameId & rhs) const { return _id == rhs._id; }
+ bool operator != (const NameId & rhs) const { return _id != rhs._id; }
+ bool operator < (const NameId & rhs) const { return _id < rhs._id; }
+ NameId next() const { return NameId(_id + 1); }
+ static NameId first() { return NameId(0u); }
+ static NameId last() { return NameId(std::numeric_limits<uint64_t>::max()); }
+ private:
+ uint64_t _id;
+ };
+ class FileId {
+ public:
+ explicit FileId(uint32_t id) : _id(id) { }
+ uint32_t getId() const { return _id; }
+ bool operator != (const FileId & rhs) const { return _id != rhs._id; }
+ bool operator == (const FileId & rhs) const { return _id == rhs._id; }
+ bool operator < (const FileId & rhs) const { return _id < rhs._id; }
+ FileId prev() const { return FileId(_id - 1); }
+ FileId next() const { return FileId(_id + 1); }
+ bool isActive() const { return _id < 0; }
+ static FileId first() { return FileId(0u); }
+ static FileId active() { return FileId(-1); }
+ private:
+ int32_t _id;
+ };
+ typedef vespalib::hash_map<uint32_t, vespalib::DataBuffer::UP> LidBufferMap;
+ typedef std::unique_ptr<FileChunk> UP;
+ typedef uint32_t SubChunkId;
+ FileChunk(FileId fileId, NameId nameId, const vespalib::string & baseName, const TuneFileSummary & tune, const IBucketizer * bucketizer, bool skipCrcOnRead);
+ virtual ~FileChunk();
+
+ virtual size_t updateLidMap(ISetLid & lidMap, uint64_t serialNum);
+ virtual ssize_t read(uint32_t lid, SubChunkId chunk, vespalib::DataBuffer & buffer) const;
+ virtual void read(LidInfoWithLidV::const_iterator begin, size_t count, IBufferVisitor & visitor) const;
+ void remove(uint32_t lid, uint32_t size);
+ virtual size_t getDiskFootprint() const { return _diskFootprint; }
+ virtual size_t getMemoryFootprint() const;
+ virtual size_t getMemoryMetaFootprint() const;
+
+ virtual size_t getDiskHeaderFootprint(void) const { return _dataHeaderLen + _idxHeaderLen; }
+ size_t getDiskBloat() const {
+ return (_addedBytes == 0)
+ ? getDiskFootprint()
+ : size_t(getDiskFootprint() * double(_erasedBytes)/_addedBytes);
+ }
+ double getBucketSpread() const {
+ return ((_chunkInfo.empty() || (_numUniqueBuckets == 0))
+ ? 1.0
+ : double(_sumNumBuckets)/_numUniqueBuckets);
+ }
+ void addNumBuckets(size_t numBucketsInChunk);
+
+ FileId getFileId() const { return _fileId; }
+ NameId getNameId() const { return _nameId; }
+ size_t getBloatCount() const { return _erasedCount; }
+ uint64_t getLastPersistedSerialNum() const;
+ virtual fastos::TimeStamp getModificationTime() const;
+ virtual bool frozen() const { return true; }
+ const vespalib::string & getName() const { return _name; }
+ void compact(const IGetLid & iGetLid);
+ void appendTo(const IGetLid & db, IWriteData & dest, uint32_t numChunks, IFileChunkVisitorProgress *visitorProgress);
+ /**
+ * Must be called after chunk has been created to allow correct
+ * underlying file object to be created. Must be called before
+ * any read.
+ */
+ void enableRead();
+ // This should never be done to something that is used. Backing
+ // Files are removed and everythings dies.
+ void erase();
+ /**
+ * This will spinn through the data and verify the content of both
+ * the '.dat' and the '.idx' files.
+ *
+ * @param reportOnly If set inconsitencies will be written to 'stderr'.
+ */
+ void verify(bool reportOnly) const;
+
+ uint32_t getNumChunks() const;
+ size_t getNumBuckets() const { return _sumNumBuckets; }
+ size_t getNumUniqueBuckets() const { return _numUniqueBuckets; }
+
+ virtual DataStoreFileChunkStats getStats() const;
+
+ /**
+ * Read header and return number of bytes it consist of.
+ */
+ static uint64_t readIdxHeader(FastOS_FileInterface &idxFile);
+ static uint64_t readDataHeader(FileRandRead &idxFile);
+ static bool isIdxFileEmpty(const vespalib::string & name);
+ static void eraseIdxFile(const vespalib::string & name);
+ static vespalib::string createIdxFileName(const vespalib::string & name);
+ static vespalib::string createDatFileName(const vespalib::string & name);
+private:
+ typedef std::unique_ptr<FileRandRead> File;
+ void loadChunkInfo();
+ const FileId _fileId;
+ const NameId _nameId;
+ const vespalib::string _name;
+ const bool _skipCrcOnRead;
+ uint32_t _erasedCount;
+ size_t _erasedBytes;
+ size_t _diskFootprint;
+ size_t _sumNumBuckets;
+ size_t _numUniqueBuckets;
+ File _file;
+protected:
+ void setDiskFootprint(size_t sz) { _diskFootprint = sz; }
+ static size_t adjustSize(size_t sz);
+
+ class ChunkInfo
+ {
+ public:
+ ChunkInfo() : _lastSerial(0), _offset(0), _size(0) { }
+ ChunkInfo(size_t offset, uint32_t size, uint64_t lastSerial);
+ size_t getOffset() const { return _offset; }
+ uint32_t getSize() const { return _size; }
+ uint64_t getLastSerial() const { return _lastSerial; }
+
+ bool valid() const { return (_offset != 0) || (_size != 0) || (_lastSerial != 0); }
+ private:
+ uint64_t _lastSerial;
+ size_t _offset;
+ uint32_t _size;
+ };
+
+ void setNumUniqueBuckets(size_t numUniqueBuckets) { _numUniqueBuckets = numUniqueBuckets; }
+ ssize_t read(uint32_t lid, SubChunkId chunkId, const ChunkInfo & chunkInfo, vespalib::DataBuffer & buffer) const;
+ void read(LidInfoWithLidV::const_iterator begin, size_t count, ChunkInfo ci, IBufferVisitor & visitor) const;
+
+ typedef vespalib::Array<ChunkInfo, vespalib::DefaultAlloc> ChunkInfoVector;
+ const IBucketizer * _bucketizer;
+ size_t _addedBytes;
+ TuneFileSummary _tune;
+ vespalib::string _dataFileName;
+ vespalib::string _idxFileName;
+ ChunkInfoVector _chunkInfo;
+ uint32_t _dataHeaderLen;
+ uint32_t _idxHeaderLen;
+ uint64_t _lastPersistedSerialNum;
+ fastos::TimeStamp _modificationTime;
+};
+
+class SummaryException : public vespalib::IoException
+{
+public:
+ SummaryException(const vespalib::stringref &msg,
+ FastOS_FileInterface & file,
+ const vespalib::stringref &location);
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/docstore/ibucketizer.h b/searchlib/src/vespa/searchlib/docstore/ibucketizer.h
new file mode 100644
index 00000000000..63757f71f45
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/ibucketizer.h
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/generationhandler.h>
+#include <vespa/vespalib/util/buffer.h>
+
+namespace search {
+
+class IBucketizer
+{
+public:
+ typedef std::shared_ptr<IBucketizer> SP;
+ virtual ~IBucketizer() { }
+ virtual uint64_t getBucketOf(const vespalib::GenerationHandler::Guard & guard, uint32_t lid) const = 0;
+ virtual vespalib::GenerationHandler::Guard getGuard() const = 0;
+};
+
+class IBufferVisitor {
+public:
+ virtual ~IBufferVisitor() { }
+ virtual void visit(uint32_t lid, vespalib::ConstBufferRef buffer) = 0;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/docstore/idatastore.cpp b/searchlib/src/vespa/searchlib/docstore/idatastore.cpp
new file mode 100644
index 00000000000..e0e788968c1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/idatastore.cpp
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "idatastore.h"
+
+namespace search {
+
+IDataStore::IDataStore(const vespalib::string& dirName) :
+ _nextId(0),
+ _dirName(dirName)
+{
+}
+
+IDataStore::~IDataStore()
+{
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/docstore/idatastore.h b/searchlib/src/vespa/searchlib/docstore/idatastore.h
new file mode 100644
index 00000000000..0db069d2fa3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/idatastore.h
@@ -0,0 +1,187 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/searchlib/docstore/ibucketizer.h>
+#include "data_store_file_chunk_stats.h"
+
+namespace search {
+
+class IDataStoreVisitor
+{
+public:
+ virtual ~IDataStoreVisitor() { }
+ virtual void visit(uint32_t lid, const void *buffer, size_t sz) = 0;
+};
+
+class IDataStoreVisitorProgress
+{
+public:
+ virtual ~IDataStoreVisitorProgress() { }
+ virtual void updateProgress(double progress) = 0;
+};
+
+/**
+ * Simple data storage for byte arrays.
+ * A small integer key is associated with each byte array;
+ * a zero-sized array is equivalent to a removed key.
+ * Changes are held in memory until flush() is called.
+ * A sync token is associated with each flush().
+ **/
+class IDataStore
+{
+public:
+ typedef std::vector<uint32_t> LidVector;
+ /**
+ * Construct an idata store.
+ * A data store has a base directory. The rest is up to the implementation.
+ *
+ * @param dirName The directory that will contain the data file.
+ **/
+ IDataStore(const vespalib::string & dirName);
+
+ /**
+ * Allow inhertitance.
+ **/
+ virtual ~IDataStore();
+
+ /**
+ * Read data from the data store into a buffer.
+ * @param lid The local ID associated with the data.
+ * @param buffer The buffer where the data will be written
+ * @param len On return is set to the number of bytes written to buffer
+ * @return true if non-zero-size data was found.
+ **/
+ virtual ssize_t read(uint32_t lid, vespalib::DataBuffer & buffer) const = 0;
+ virtual void read(const LidVector & lids, IBufferVisitor & visitor) const = 0;
+
+ /**
+ * Write data to the data store.
+ * @param serialNum The official unique reference number for this operation.
+ * @param lid The local ID associated with the data.
+ * @param buffer The source where the data will be fetched.
+ * @param len The number of bytes to fetch from the buffer.
+ **/
+ virtual void write(uint64_t serialNum, uint32_t lid, const void * buffer, size_t len) = 0;
+
+ /**
+ * Remove old data for a key. Equivalent to write with len==0.
+ * @param serialNum The official unique reference number for this operation.
+ * @param lid The local ID associated with the data.
+ **/
+ virtual void remove(uint64_t serialNum, uint32_t lid) = 0;
+
+ /**
+ * Flush in-memory data to disk.
+ **/
+ virtual void flush(uint64_t syncToken) = 0;
+
+ /*
+ * Prepare for flushing in-memory data to disk.
+ */
+ virtual uint64_t initFlush(uint64_t syncToken) = 0;
+
+ /**
+ * Calculate memory used by this instance. During flush() actual
+ * memory usage may be approximately twice the reported amount.
+ * @return memory usage (in bytes)
+ **/
+ virtual size_t memoryUsed() const = 0;
+
+ /**
+ * Calculates memory that is used for meta data by this instance. Calling
+ * flush() does not free this memory.
+ * @return memory usage (in bytes)
+ **/
+ virtual size_t memoryMeta() const = 0;
+
+ /**
+ * Calculates how much disk is used
+ * @return disk space used.
+ */
+ virtual size_t getDiskFootprint() const = 0;
+
+ /**
+ * Calculates how much disk is used by file headers.
+ * @return disk space used.
+ */
+ virtual size_t getDiskHeaderFootprint() const { return 0u; }
+ /**
+ * Calculates how much wasted space there is.
+ * @return disk bloat.
+ */
+ virtual size_t getDiskBloat() const = 0;
+
+ /**
+ * Calculates how much diskspace can be compacted during a flush.
+ * default is to return th ebloat limit, but as some targets have some internal limits
+ * to avoid misuse we let the report a more conservative number here if necessary.
+ * @return diskspace to be gained.
+ */
+ virtual size_t getMaxCompactGain() const { return getDiskBloat(); }
+
+
+ /**
+ * The sync token used for the last successful flush() operation,
+ * or 0 if no flush() has been performed yet.
+ * @return Last flushed sync token.
+ **/
+ virtual uint64_t lastSyncToken() const = 0;
+
+ /*
+ * The sync token used for last write operation.
+ */
+ virtual uint64_t tentativeLastSyncToken() const = 0;
+
+ /**
+ * The time of the last flush operation,
+ * or 0 if no flush has been performed yet.
+ * @return Time of last flush.
+ **/
+ virtual fastos::TimeStamp getLastFlushTime() const = 0;
+
+ /**
+ * Visit all data found in data store.
+ */
+ virtual void accept(IDataStoreVisitor &visitor, IDataStoreVisitorProgress &visitorProgress, bool prune) = 0;
+
+ /**
+ * Return cost of visiting all data found in data store.
+ */
+ virtual double getVisitCost() const = 0;
+
+ /*
+ * Return brief stats for data store.
+ */
+ virtual DataStoreStorageStats getStorageStats() const = 0;
+
+ /*
+ * Return detailed stats about underlying files for data store.
+ */
+ virtual std::vector<DataStoreFileChunkStats> getFileChunkStats() const = 0;
+
+ /**
+ * Get the number of entries (including removed IDs
+ * or gaps in the local ID sequence) in the data store.
+ * @return The next local ID expected to be used
+ */
+ uint64_t nextId() const { return _nextId; }
+
+ /**
+ * Returns the name of the base directory where the data file is stored.
+ **/
+ const vespalib::string & getBaseDir() const { return _dirName; }
+
+protected:
+ void setNextId(uint64_t id) { _nextId = id; }
+
+private:
+ uint64_t _nextId;
+ vespalib::string _dirName;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/idocumentstore.cpp b/searchlib/src/vespa/searchlib/docstore/idocumentstore.cpp
new file mode 100644
index 00000000000..1042e0dcc12
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/idocumentstore.cpp
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "idocumentstore.h"
+
+namespace search {
+
+IDocumentStore::IDocumentStore(void)
+{
+}
+
+IDocumentStore::~IDocumentStore()
+{
+}
+
+void IDocumentStore::visit(const LidVector & lids, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const {
+ for (uint32_t lid : lids) {
+ visitor.visit(lid, read(lid, repo));
+ }
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/idocumentstore.h b/searchlib/src/vespa/searchlib/docstore/idocumentstore.h
new file mode 100644
index 00000000000..902cb3e8d4b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/idocumentstore.h
@@ -0,0 +1,203 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/document/document.h>
+#include <vespa/searchlib/query/base.h>
+#include <vespa/searchlib/docstore/idatastore.h>
+
+namespace search {
+
+class CacheStats;
+
+class IDocumentStoreReadVisitor
+{
+public:
+ virtual ~IDocumentStoreReadVisitor() { }
+ virtual void visit(uint32_t lid, const document::Document &doc) = 0;
+ virtual void visit(uint32_t lid) = 0;
+};
+
+class IDocumentStoreRewriteVisitor
+{
+public:
+ virtual ~IDocumentStoreRewriteVisitor() { }
+ virtual void visit(uint32_t lid, document::Document &doc) = 0;
+};
+
+class IDocumentStoreVisitorProgress
+{
+public:
+ virtual ~IDocumentStoreVisitorProgress() { }
+
+ virtual void updateProgress(double progress) = 0;
+};
+
+class IDocumentVisitor
+{
+public:
+ virtual ~IDocumentVisitor() { }
+ virtual void visit(uint32_t lid, document::Document::UP doc) = 0;
+private:
+};
+
+/**
+ * Simple document store that contains serialized Document instances.
+ * updates will be held in memory until flush() is called.
+ * Uses a Local ID as key.
+ **/
+class IDocumentStore
+{
+public:
+ /**
+ * Convenience typedef for a shared pointer to this class.
+ **/
+ typedef std::shared_ptr<IDocumentStore> SP;
+ typedef std::vector<uint32_t> LidVector;
+
+ /**
+ * Construct a document store.
+ *
+ * @throws vespalib::IoException if the file is corrupt or other IO problems occur.
+ * @param docMan The document type manager to use when deserializing.
+ * @param baseDir The path to a directory where the implementaion specific files will reside.
+ **/
+ IDocumentStore(void);
+ virtual ~IDocumentStore();
+
+ /**
+ * Make a Document from a stored serialized data blob.
+ * @param lid The local ID associated with the document.
+ * @return NULL if there is no document associated with the lid.
+ **/
+ virtual document::Document::UP read(DocumentIdT lid, const document::DocumentTypeRepo &repo) const = 0;
+ virtual void visit(const LidVector & lidVector, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const;
+
+ /**
+ * Serialize and store a document.
+ * @param doc The document to store
+ * @param lid The local ID associated with the document
+ **/
+ virtual void write(uint64_t syncToken, const document::Document& doc, DocumentIdT lid) = 0;
+
+ /**
+ * Mark a document as removed. A later read() will return NULL for the given lid.
+ * @param lid The local ID associated with the document
+ **/
+ virtual void remove(uint64_t syncToken, DocumentIdT lid) = 0;
+
+ /**
+ * Flush all in-memory updates to disk.
+ **/
+ virtual void flush(uint64_t syncToken) = 0;
+
+ virtual uint64_t initFlush(uint64_t synctoken) = 0;
+
+ /**
+ * If possible compact the disk.
+ **/
+ virtual void compact(uint64_t syncToken) = 0;
+
+ /**
+ * The sync token used for the last successful flush() operation,
+ * or 0 if no flush() has been performed yet.
+ * @return Last flushed sync token.
+ **/
+ virtual uint64_t lastSyncToken() const = 0;
+
+ /*
+ * The sync token used for last write operation.
+ */
+ virtual uint64_t tentativeLastSyncToken() const = 0;
+
+ /**
+ * The time of the last flush operation,
+ * or 0 if no flush has been performed yet.
+ * @return Time of last flush.
+ **/
+ virtual fastos::TimeStamp getLastFlushTime() const = 0;
+
+ /**
+ * Get the number of entries (including removed IDs
+ * or gaps in the local ID sequence) in the document store.
+ * @return The next local ID expected to be used.
+ */
+ virtual uint64_t nextId() const = 0;
+
+ /**
+ * Calculate memory used by this instance. During flush() actual
+ * memory usage may be approximately twice the reported amount.
+ * @return memory usage (in bytes)
+ **/
+ virtual size_t memoryUsed() const = 0;
+
+ /**
+ * Calculates memory that is used for meta data by this instance. Calling
+ * flush() does not free this memory.
+ * @return memory usage (in bytes)
+ **/
+ virtual size_t memoryMeta() const = 0;
+
+ /**
+ * Calculates how much disk is used
+ * @return disk space used.
+ */
+ virtual size_t getDiskFootprint() const = 0;
+ /**
+ * Calculates how much wasted space there is.
+ * @return disk bloat.
+ */
+ virtual size_t getDiskBloat() const = 0;
+
+ /**
+ * Calculates how much diskspace can be compacted during a flush.
+ * default is to return th ebloat limit, but as some targets have some internal limits
+ * to avoid misuse we let the report a more conservative number here if necessary.
+ * @return diskspace to be gained.
+ */
+ virtual size_t getMaxCompactGain() const { return getDiskBloat(); }
+
+ /**
+ * Returns statistics about the cache.
+ */
+ virtual CacheStats getCacheStats() const = 0;
+
+ /**
+ * Returns the base directory from which all structures are stored.
+ **/
+ virtual const vespalib::string & getBaseDir() const = 0;
+
+ /**
+ * Visit all documents found in document store.
+ */
+ virtual void
+ accept(IDocumentStoreReadVisitor &visitor,
+ IDocumentStoreVisitorProgress &visitorProgress,
+ const document::DocumentTypeRepo &repo) = 0;
+
+ /**
+ * Visit all documents found in document store.
+ */
+ virtual void
+ accept(IDocumentStoreRewriteVisitor &visitor,
+ IDocumentStoreVisitorProgress &visitorProgress,
+ const document::DocumentTypeRepo &repo) = 0;
+
+ /**
+ * Return cost of visiting all documents found in document store.
+ */
+ virtual double getVisitCost() const = 0;
+
+ /*
+ * Return brief stats for data store.
+ */
+ virtual DataStoreStorageStats getStorageStats() const = 0;
+
+ /*
+ * Return detailed stats about underlying files for data store.
+ */
+ virtual std::vector<DataStoreFileChunkStats> getFileChunkStats() const = 0;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/liddatastore.h b/searchlib/src/vespa/searchlib/docstore/liddatastore.h
new file mode 100644
index 00000000000..1f8472df716
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/liddatastore.h
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/docstore/idatastore.h>
+
+namespace search {
+
+/**
+ * Factor out stuff common to MultiDataStore and SimpleDatastore
+ **/
+class LidDataStore : public IDataStore
+{
+public:
+ /**
+ * Construct an idata store.
+ * A data store has a base directory. The rest is up to the implementation.
+ *
+ * @param dirName The directory that will contain the data file.
+ **/
+ LidDataStore(const vespalib::string & dirName) : IDataStore(dirName), _lastSyncToken(0) { }
+
+
+ /**
+ * The sync token used for the last successful flush() operation,
+ * or 0 if no flush() has been performed yet.
+ * @return Last flushed sync token.
+ **/
+ virtual uint64_t lastSyncToken() const { return _lastSyncToken; }
+
+ virtual size_t getDiskBloat() const { return 0; }
+
+ /**
+ * Flush all in-memory data to disk.
+ **/
+ virtual void flushAll(uint64_t syncToken) {
+ flush(syncToken);
+ }
+
+ /**
+ * Get the number of entries (including removed IDs
+ * or gaps in the local ID sequence) in the data store.
+ * @return The next local ID expected to be used
+ */
+// uint64_t nextId() const { return _nextId; }
+
+
+protected:
+ void setLastSyncToken(uint64_t last) { _lastSyncToken = last; }
+// void setNextId(uint64_t id) { _nextId = id; }
+
+private:
+ uint64_t _lastSyncToken;
+// uint64_t _nextId;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
new file mode 100644
index 00000000000..080dc71cbf2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
@@ -0,0 +1,1240 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "logdatastore.h"
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <stdexcept>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.docstore.logdatastore");
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/vespalib/xxhash/xxhash.h>
+
+namespace search
+{
+
+using vespalib::LockGuard;
+using vespalib::getLastErrorString;
+using vespalib::getErrorString;
+using vespalib::GenerationHandler;
+using vespalib::make_string;
+using common::FileHeaderContext;
+using std::runtime_error;
+
+LogDataStore::LogDataStore(vespalib::ThreadStackExecutorBase &executor,
+ const vespalib::string &dirName,
+ const Config &config,
+ const GrowStrategy &growStrategy,
+ const TuneFileSummary &tune,
+ const FileHeaderContext &fileHeaderContext,
+ transactionlog::SyncProxy &tlSyncer,
+ const IBucketizer::SP & bucketizer,
+ bool readOnly)
+ : IDataStore(dirName),
+ _config(config),
+ _tune(tune),
+ _fileHeaderContext(fileHeaderContext),
+ _genHandler(),
+ _lidInfo(growStrategy.getDocsInitialCapacity(),
+ growStrategy.getDocsGrowPercent(),
+ growStrategy.getDocsGrowDelta()),
+ _fileChunks(),
+ _holdFileChunks(),
+ _active(0),
+ _prevActive(FileId::active()),
+ _readOnly(readOnly),
+ _executor(executor),
+ _initFlushSyncToken(0),
+ _tlSyncer(tlSyncer),
+ _bucketizer(bucketizer)
+{
+ // Reserve space for 1TB summary in order to avoid locking.
+ _fileChunks.reserve(LidInfo::getMaxFileNum());
+ _holdFileChunks.resize(LidInfo::getMaxFileNum());
+
+ preload();
+ updateLidMap();
+ updateSerialNum();
+}
+
+void
+LogDataStore::updateSerialNum()
+{
+ LockGuard guard(_updateLock);
+ if (getPrevActive(guard) != NULL) {
+ if (getActive(guard).getSerialNum() <
+ getPrevActive(guard)->getLastPersistedSerialNum()) {
+ getActive(guard).setSerialNum(getPrevActive(guard)->getLastPersistedSerialNum());
+ }
+ }
+}
+
+LogDataStore::~LogDataStore()
+{
+ // Must be called before ending threads as there are sanity checks.
+ _fileChunks.clear();
+ //_executor.shutdown();
+ _executor.sync();
+ _genHandler.updateFirstUsedGeneration();
+ _lidInfo.removeOldGenerations(_genHandler.getFirstUsedGeneration());
+}
+
+void
+LogDataStore::updateLidMap()
+{
+ uint64_t lastSerialNum(0);
+ for (FileChunk::UP & fc : _fileChunks) {
+ fc->updateLidMap(*this, lastSerialNum);
+ lastSerialNum = fc->getLastPersistedSerialNum();
+ }
+}
+
+void
+LogDataStore::read(const LidVector & lids, IBufferVisitor & visitor) const
+{
+ LidInfoWithLidV orderedLids;
+ GenerationHandler::Guard guard(_genHandler.takeGuard());
+ for (uint32_t lid : lids) {
+ LidInfo li = _lidInfo[lid];
+ if (!li.empty() && li.valid()) {
+ orderedLids.emplace_back(li, lid);
+ }
+ }
+ if (orderedLids.empty()) { return; }
+
+ std::sort(orderedLids.begin(), orderedLids.end());
+ uint32_t prevFile = orderedLids[0].getFileId();
+ uint32_t start = 0;
+ for (size_t curr(1); curr < orderedLids.size(); curr++) {
+ const LidInfoWithLid & li = orderedLids[curr];
+ if (prevFile != li.getFileId()) {
+ const FileChunk & fc(*_fileChunks[prevFile]);
+ fc.read(orderedLids.begin() + start, curr - start, visitor);
+ start = curr;
+ prevFile = li.getFileId();
+ }
+ }
+ const FileChunk & fc(*_fileChunks[prevFile]);
+ fc.read(orderedLids.begin() + start, orderedLids.size() - start, visitor);
+}
+
+ssize_t
+LogDataStore::read(uint32_t lid, vespalib::DataBuffer& buffer) const
+{
+ ssize_t sz(0);
+ if (lid < _lidInfo.size()) {
+ LidInfo li(0);
+ {
+ GenerationHandler::Guard guard(_genHandler.takeGuard());
+ li = _lidInfo[lid];
+ }
+ if (!li.empty() && li.valid()) {
+ const FileChunk & fc(*_fileChunks[li.getFileId()]);
+ sz = fc.read(lid, li.getChunkId(), buffer);
+ }
+ }
+ return sz;
+}
+
+
+void
+LogDataStore::write(uint64_t serialNum, uint32_t lid, const void * buffer, size_t len)
+{
+ LockGuard guard(_updateLock);
+ WriteableFileChunk & active = getActive(guard);
+ write(guard, active, serialNum, lid, buffer, len);
+}
+
+void
+LogDataStore::write(LockGuard guard, FileId destinationFileId, uint32_t lid, const void * buffer, size_t len)
+{
+ WriteableFileChunk & destination = static_cast<WriteableFileChunk &>(*_fileChunks[destinationFileId.getId()]);
+ write(guard, destination, destination.getSerialNum(), lid, buffer, len);
+}
+
+void
+LogDataStore::write(LockGuard guard, WriteableFileChunk & destination,
+ uint64_t serialNum, uint32_t lid, const void * buffer, size_t len)
+{
+ LidInfo lm = destination.append(serialNum, lid, buffer, len);
+ setLid(lid, lm);
+ if (destination.getFileId() == getActiveFileId(guard)) {
+ requireSpace(guard, destination);
+ }
+}
+
+void
+LogDataStore::requireSpace(LockGuard guard, WriteableFileChunk & active)
+{
+ assert(active.getFileId() == getActiveFileId(guard));
+ size_t oldSz(active.getDiskFootprint());
+ LOG(spam, "Checking file %s size %ld < %ld",
+ active.getName().c_str(), oldSz, _config.getMaxFileSize());
+ if (oldSz > _config.getMaxFileSize()) {
+ FileId fileId = allocateFileId(guard);
+ _fileChunks[fileId.getId()] = createWritableFile(fileId, active.getSerialNum());
+ setActive(guard, fileId);
+ std::unique_ptr<FileChunkHolder> activeHolder = holdFileChunk(active.getFileId());
+ guard.unlock();
+ // Write chunks to old .dat file
+ // Note: Feed latency spike
+ active.flush(true, active.getSerialNum());
+ // Sync transaction log
+ _tlSyncer.sync(active.getSerialNum());
+ // sync old active .dat file, write pending chunks to old .idx file
+ // and sync old .idx file to disk.
+ active.flushPendingChunks(active.getSerialNum());
+ active.freeze();
+ // TODO: Delay create of new file
+ LOG(debug, "Closed file %s of size %ld due to maxsize of %ld reached. Bloat is %ld",
+ active.getName().c_str(), active.getDiskFootprint(),
+ _config.getMaxFileSize(), active.getDiskBloat());
+ }
+}
+
+uint64_t
+LogDataStore::lastSyncToken() const
+{
+ LockGuard guard(_updateLock);
+ uint64_t lastSerial(getActive(guard).getLastPersistedSerialNum());
+ if (lastSerial == 0) {
+ const FileChunk * prev = getPrevActive(guard);
+ if (prev != NULL) {
+ lastSerial = prev->getLastPersistedSerialNum();
+ }
+ }
+ return lastSerial;
+}
+
+uint64_t
+LogDataStore::tentativeLastSyncToken() const
+{
+ LockGuard guard(_updateLock);
+ return getActive(guard).getSerialNum();
+}
+
+fastos::TimeStamp
+LogDataStore::getLastFlushTime() const
+{
+ if (lastSyncToken() == 0) {
+ return fastos::TimeStamp();
+ }
+ LockGuard guard(_updateLock);
+ fastos::TimeStamp timeStamp(getActive(guard).getModificationTime());
+ if (timeStamp == 0) {
+ const FileChunk * prev = getPrevActive(guard);
+ if (prev != nullptr) {
+ timeStamp = prev->getModificationTime();
+ }
+ }
+ return timeStamp;
+}
+
+void
+LogDataStore::remove(uint64_t serialNum, uint32_t lid)
+{
+ LockGuard guard(_updateLock);
+ if (lid < _lidInfo.size()) {
+ LidInfo lm = _lidInfo[lid];
+ if (lm.valid()) {
+ _fileChunks[lm.getFileId()]->remove(lid, lm.size());
+ }
+ lm = getActive(guard).append(serialNum, lid, NULL, 0);
+ assert( lm.empty() );
+ _lidInfo[lid] = lm;
+ }
+}
+
+namespace {
+
+vespalib::string bloatMsg(size_t bloat, size_t usage) {
+ return make_string("Disk bloat is now at %ld of %ld at %2.2f percent", bloat, usage, (bloat*100.0)/usage);
+}
+
+}
+
+void
+LogDataStore::compact(uint64_t syncToken)
+{
+ uint64_t usage = getDiskFootprint();
+ uint64_t bloat = getDiskBloat();
+ LOG(debug, "%s", bloatMsg(bloat, usage).c_str());
+ if ((_fileChunks.size() > 1) &&
+ ( isBucketSpreadTooLarge(getMaxBucketSpread()) ||
+ isBloatOverLimit(bloat, usage)))
+ {
+ LOG(info, "%s. Will compact", bloatMsg(bloat, usage).c_str());
+ compactWorst();
+ usage = getDiskFootprint();
+ bloat = getDiskBloat();
+ LOG(info, "Done compacting. %s", bloatMsg(bloat, usage).c_str());
+ }
+
+ flushActiveAndWait(syncToken);
+}
+
+size_t
+LogDataStore::getMaxCompactGain() const
+{
+ const size_t diskFootPrint = getDiskFootprint();
+ const size_t maxConfiguredDiskBloat = diskFootPrint * _config.getMaxDiskBloatFactor();
+ double maxSpread = getMaxBucketSpread();
+ size_t bloat = getDiskBloat();
+ if (bloat < maxConfiguredDiskBloat) {
+ bloat = 0;
+ }
+ size_t spreadAsBloat = diskFootPrint * (1.0 - 1.0/maxSpread);
+ if ( ! isBucketSpreadTooLarge(maxSpread)) {
+ spreadAsBloat = 0;
+ }
+ return (bloat + spreadAsBloat);
+}
+
+void
+LogDataStore::flush(uint64_t syncToken)
+{
+ WriteableFileChunk * active = NULL;
+ std::unique_ptr<FileChunkHolder> activeHolder;
+ assert(syncToken == _initFlushSyncToken);
+ {
+ LockGuard guard(_updateLock);
+ // Note: Feed latency spike
+ getActive(guard).flush(true, syncToken);
+ active = &getActive(guard);
+ activeHolder = holdFileChunk(active->getFileId());
+ }
+ active->flushPendingChunks(syncToken);
+ activeHolder.reset();
+ LOG(info, "Flushing. %s",bloatMsg(getDiskBloat(), getDiskFootprint()).c_str());
+}
+
+
+uint64_t
+LogDataStore::initFlush(uint64_t syncToken)
+{
+ assert(syncToken >= _initFlushSyncToken);
+ syncToken = flushActive(syncToken);
+ _initFlushSyncToken = syncToken;
+ return syncToken;
+}
+
+class Compacter : public IWriteData
+{
+public:
+ Compacter(LogDataStore & ds)
+ : _ds(ds)
+ {
+ }
+ void
+ write(LockGuard guard, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) override {
+ (void) chunkId;
+ FileChunk::FileId fileId= _ds.getActiveFileId(guard);
+ _ds.write(guard, fileId, lid, buffer, sz);
+ }
+ void close() override { }
+private:
+ LogDataStore & _ds;
+};
+
+typedef std::unique_ptr<vespalib::DataBuffer> BufferUP;
+class StoreByBucket
+{
+public:
+ StoreByBucket();
+ class IWrite {
+ public:
+ virtual ~IWrite() { }
+ virtual void write(uint64_t bucketId, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) = 0;
+ };
+ void add(uint64_t bucketId, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz);
+ void drain(IWrite & drain);
+ size_t getChunkCount() const { return _chunks.size(); }
+ size_t getBucketCount() const { return _where.size(); }
+ size_t getLidCount() const {
+ size_t lidCount(0);
+ for (const auto & it : _where) {
+ lidCount += it.second.size();
+ }
+ return lidCount;
+ }
+private:
+ void closeCurrent();
+ void createCurrent();
+ struct Index {
+ Index(uint32_t id, uint32_t chunkId, uint32_t entry) : _id(id), _chunkId(chunkId), _lid(entry) { }
+ uint32_t _id;
+ uint32_t _chunkId;
+ uint32_t _lid;
+ };
+ std::vector<BufferUP> _chunks;
+ Chunk::UP _current;
+ std::map<uint64_t, std::vector<Index>> _where;
+};
+
+StoreByBucket::StoreByBucket() :
+ _chunks(),
+ _current(),
+ _where()
+{
+ createCurrent();
+}
+
+void
+StoreByBucket::add(uint64_t bucketId, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz)
+{
+ if ( ! _current->hasRoom(sz)) {
+ closeCurrent();
+ createCurrent();
+ }
+ Index idx(_chunks.size(), chunkId, lid);
+ _current->append(lid, buffer, sz);
+ _where[bucketId].push_back(idx);
+}
+
+void StoreByBucket::createCurrent()
+{
+ _current.reset(new Chunk(_chunks.size(), Chunk::Config(0x10000, 1000)));
+}
+
+void
+StoreByBucket::closeCurrent()
+{
+ BufferUP buffer(new vespalib::DataBuffer());
+ document::CompressionConfig lz4(document::CompressionConfig::LZ4);
+ _current->pack(1, *buffer, lz4);
+ buffer->shrink(buffer->getDataLen());
+ _chunks.push_back(std::move(buffer));
+ _current.reset();
+}
+
+void
+StoreByBucket::drain(IWrite & drainer)
+{
+ closeCurrent();
+ std::vector<Chunk::UP> chunks;
+ for (BufferUP & buffer : _chunks) {
+ chunks.push_back(Chunk::UP(new Chunk(chunks.size(), buffer->getData(), buffer->getDataLen())));
+ buffer.reset();
+ }
+ _chunks.clear();
+ for (const auto & it : _where) {
+ for (Index idx : it.second) {
+ vespalib::ConstBufferRef data(chunks[idx._id]->getLid(idx._lid));
+ drainer.write(it.first, idx._chunkId, idx._lid, data.c_str(), data.size());
+ }
+ }
+}
+
+
+class BucketCompacter : public IWriteData, public StoreByBucket::IWrite
+{
+public:
+ using FileId = FileChunk::FileId;
+ BucketCompacter(LogDataStore & ds, const IBucketizer & bucketizer, FileId source, FileId destination);
+ void write(LockGuard guard, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) override ;
+ void write(uint64_t bucketId, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) override;
+ void close() override;
+private:
+ FileId getDestinationId(const LockGuard & guard) const {
+ return (_destinationFileId.isActive()) ? _ds.getActiveFileId(guard) : _destinationFileId;
+ }
+ FileId _sourceFileId;
+ FileId _destinationFileId;
+ LogDataStore & _ds;
+ const IBucketizer & _bucketizer;
+ std::vector<StoreByBucket> _tmpStore;
+ GenerationHandler::Guard _lidGuard;
+ GenerationHandler::Guard _bucketizerGuard;
+ vespalib::hash_map<uint64_t, uint32_t> _stat;
+};
+
+BucketCompacter::BucketCompacter(LogDataStore & ds, const IBucketizer & bucketizer, FileId source, FileId destination) :
+ _sourceFileId(source),
+ _destinationFileId(destination),
+ _ds(ds),
+ _bucketizer(bucketizer),
+ _tmpStore(256),
+ _lidGuard(ds.getLidReadGuard()),
+ _bucketizerGuard(bucketizer.getGuard()),
+ _stat()
+{
+}
+
+void
+BucketCompacter::write(LockGuard guard, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz)
+{
+ guard.unlock();
+ uint64_t bucketId = (sz > 0) ? _bucketizer.getBucketOf(_bucketizerGuard, lid) : 0;
+ uint32_t hash = XXH32(&bucketId, sizeof(bucketId), 0);
+ _tmpStore[hash%_tmpStore.size()].add(bucketId, chunkId, lid, buffer, sz);
+}
+
+void
+BucketCompacter::close()
+{
+ size_t lidCount1(0);
+ size_t bucketCount(0);
+ size_t chunkCount(0);
+ for (const StoreByBucket & store : _tmpStore) {
+ lidCount1 += store.getLidCount();
+ bucketCount += store.getBucketCount();
+ chunkCount += store.getChunkCount();
+ }
+ LOG(info, "Have read %ld lids and placed them in %ld buckets. Temporary compressed in %ld chunks.",
+ lidCount1, bucketCount, chunkCount);
+
+ for (StoreByBucket & store : _tmpStore) {
+ store.drain(*this);
+ }
+
+ size_t lidCount(0);
+ for (const auto & it : _stat) {
+ lidCount += it.second;
+ }
+ LOG(info, "Compacted %ld lids into %ld buckets", lidCount, _stat.size());
+}
+
+void
+BucketCompacter::write(uint64_t bucketId, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz)
+{
+ _stat[bucketId]++;
+ LockGuard guard(_ds.getLidGuard(lid));
+ LidInfo lidInfo(_sourceFileId.getId(), chunkId, sz);
+ if (_ds.getLid(_lidGuard, lid) == lidInfo) {
+ FileId fileId = getDestinationId(guard);
+ _ds.write(guard, fileId, lid, buffer, sz);
+ }
+}
+
+double
+LogDataStore::getMaxBucketSpread() const
+{
+ double maxSpread(1.0);
+ for (const FileChunk::UP & fc : _fileChunks) {
+ if (fc) {
+ if (_bucketizer && fc->frozen()) {
+ maxSpread = std::max(maxSpread, fc->getBucketSpread());
+ }
+ }
+ }
+ return maxSpread;
+}
+
+std::pair<bool, LogDataStore::FileId>
+LogDataStore::findNextToCompact()
+{
+ typedef std::multimap<double, FileId, std::greater<double>> CostMap;
+ CostMap worstBloat;
+ CostMap worstSpread;
+ LockGuard guard(_updateLock);
+ for (size_t i(0); i < _fileChunks.size(); i++) {
+ const FileChunk::UP & fc(_fileChunks[i]);
+ if (fc && fc->frozen() && (_currentlyCompacting.find(fc->getNameId()) == _currentlyCompacting.end())) {
+ uint64_t usage = fc->getDiskFootprint();
+ uint64_t bloat = fc->getDiskBloat();
+ if (_bucketizer) {
+ worstSpread.emplace(fc->getBucketSpread(), FileId(i));
+ }
+ if (usage > 0) {
+ double tmp(double(bloat)/usage);
+ worstBloat.emplace(tmp, FileId(i));
+ }
+ }
+ }
+ if (LOG_WOULD_LOG(debug)) {
+ for (const auto & it : worstBloat) {
+ const FileChunk & fc = *_fileChunks[it.second.getId()];
+ LOG(debug, "File '%s' has bloat '%2.2f' and bucket-spread '%1.4f numChunks=%d , numBuckets=%ld, numUniqueBuckets=%ld",
+ fc.getName().c_str(), it.first * 100, fc.getBucketSpread(), fc.getNumChunks(), fc.getNumBuckets(), fc.getNumUniqueBuckets());
+ }
+ }
+ std::pair<bool, FileId> retval(false, FileId(-1));
+ if ( ! worstBloat.empty() && (worstBloat.begin()->first > _config.getMaxDiskBloatFactor())) {
+ retval.first = true;
+ retval.second = worstBloat.begin()->second;
+ } else if ( ! worstSpread.empty() && (worstSpread.begin()->first > _config.getMaxBucketSpread())) {
+ retval.first = true;
+ retval.second = worstSpread.begin()->second;
+ }
+ if (retval.first) {
+ _currentlyCompacting.insert(_fileChunks[retval.second.getId()]->getNameId());
+ }
+ return retval;
+}
+
+void
+LogDataStore::compactWorst() {
+ auto worst = findNextToCompact();
+ if (worst.first) {
+ compactFile(worst.second);
+ }
+}
+
+SerialNum LogDataStore::flushFile(LockGuard guard, WriteableFileChunk & file, SerialNum syncToken) {
+ (void) guard;
+ uint64_t lastSerial(file.getSerialNum());
+ if (lastSerial > syncToken) {
+ syncToken = lastSerial;
+ }
+ file.flush(false, syncToken);
+ return syncToken;
+}
+
+void LogDataStore::flushFileAndWait(LockGuard guard, WriteableFileChunk & file, SerialNum syncToken) {
+ syncToken = flushFile(guard, file, syncToken);
+ file.waitForDiskToCatchUpToNow();
+ _tlSyncer.sync(syncToken);
+ file.flushPendingChunks(syncToken);
+}
+
+SerialNum LogDataStore::flushActive(SerialNum syncToken) {
+ LockGuard guard(_updateLock);
+ WriteableFileChunk &active = getActive(guard);
+ return flushFile(guard, active, syncToken);
+}
+
+void LogDataStore::flushActiveAndWait(SerialNum syncToken) {
+ LockGuard guard(_updateLock);
+ WriteableFileChunk &active = getActive(guard);
+ return flushFileAndWait(guard, active, syncToken);
+}
+
+bool LogDataStore::shouldCompactToActiveFile(size_t compactedSize) const {
+ return _config.compact2ActiveFile()
+ || (_config.getMinFileSizeFactor() * _config.getMaxFileSize() > compactedSize);
+}
+
+void LogDataStore::compactFile(FileId fileId)
+{
+ FileChunk::UP & fc(_fileChunks[fileId.getId()]);
+ NameId compactedNameId = fc->getNameId();
+ LOG(info, "Compacting file '%s' which has bloat '%2.2f' and bucket-spread '%1.4f",
+ fc->getName().c_str(), 100*fc->getDiskBloat()/double(fc->getDiskFootprint()), fc->getBucketSpread());
+ IWriteData::UP compacter;
+ FileId destinationFileId = FileId::active();
+ if (_bucketizer) {
+ if ( ! shouldCompactToActiveFile(fc->getDiskFootprint() - fc->getDiskBloat())) {
+ destinationFileId = allocateFileId();
+ FileChunk::UP destination = createWritableFile(destinationFileId, fc->getLastPersistedSerialNum(),
+ fc->getNameId().next());
+ _fileChunks[destination->getFileId().getId()] = std::move(destination);
+ }
+
+ compacter.reset(new BucketCompacter(*this, *_bucketizer, fc->getFileId(), destinationFileId));
+ } else {
+ compacter.reset(new Compacter(*this));
+ }
+
+ fc->appendTo(*this, *compacter, fc->getNumChunks(), nullptr);
+
+ if (destinationFileId.isActive()) {
+ flushActiveAndWait(0);
+ } else {
+ LockGuard guard(_updateLock);
+ WriteableFileChunk & compactTo = dynamic_cast<WriteableFileChunk &>(*_fileChunks[destinationFileId.getId()]);
+ flushFileAndWait(guard, compactTo, 0);
+ compactTo.freeze();
+ }
+
+ FastOS_Thread::Sleep(10 * 1000);
+ FileChunk::UP toDie;
+ for (;;) {
+ LockGuard guard(_updateLock);
+ if (_holdFileChunks[fc->getFileId().getId()] == 0u) {
+ toDie = std::move(fc);
+ break;
+ }
+ guard.unlock();
+ /*
+ * Wait for requireSpace() and flush() methods to leave chunk
+ * alone.
+ */
+ FastOS_Thread::Sleep(1000);
+ }
+ toDie->erase();
+ LockGuard guard(_updateLock);
+ _currentlyCompacting.erase(compactedNameId);
+}
+
+size_t
+LogDataStore::memoryUsed() const
+{
+ size_t sz(memoryMeta());
+ {
+ LockGuard guard(_updateLock);
+ for (const FileChunk::UP & fc : _fileChunks) {
+ if (fc) {
+ sz += fc->getMemoryFootprint();
+ }
+ }
+ }
+ return sz;
+}
+
+size_t
+LogDataStore::memoryMeta() const
+{
+ LockGuard guard(_updateLock);
+ size_t sz(_lidInfo.getMemoryUsage().allocatedBytes());
+ for (const FileChunk::UP & fc : _fileChunks) {
+ if (fc) {
+ sz += fc->getMemoryMetaFootprint();
+ }
+ }
+ return sz;
+}
+
+FileChunk::FileId
+LogDataStore::allocateFileId()
+{
+ LockGuard guard(_updateLock);
+ return allocateFileId(guard);
+}
+FileChunk::FileId
+LogDataStore::allocateFileId(const LockGuard & guard)
+{
+ (void) guard;
+ for (size_t i(0); i < _fileChunks.size(); i++) {
+ if (_fileChunks[i].get() == nullptr) {
+ return FileId(i);
+ }
+ }
+ // This assert is verify that we have not gotten ourselves into a mess
+ // that would require the use of locks to prevent. Just assure that the
+ // below resize is 'safe'.
+ assert(_fileChunks.capacity() > _fileChunks.size());
+ _fileChunks.resize(_fileChunks.size()+1);
+ return FileId(_fileChunks.size() - 1);
+}
+
+size_t
+LogDataStore::getDiskFootprint() const
+{
+ LockGuard guard(_updateLock);
+ size_t sz(0);
+ for (const FileChunk::UP & fc : _fileChunks) {
+ if (fc) {
+ sz += fc->getDiskFootprint();
+ }
+ }
+ return sz;
+}
+
+
+size_t
+LogDataStore::getDiskHeaderFootprint(void) const
+{
+ LockGuard guard(_updateLock);
+ size_t sz(0);
+ for (const FileChunk::UP & fc : _fileChunks) {
+ if (fc) {
+ sz += fc->getDiskHeaderFootprint();
+ }
+ }
+ return sz;
+}
+
+
+size_t
+LogDataStore::getDiskBloat() const
+{
+ LockGuard guard(_updateLock);
+ size_t sz(0);
+ for (FileId i(0); i < FileId(_fileChunks.size()); i = i.next()) {
+ /// Do not count the holes in the last file as bloat
+ if (i != _active) {
+ const FileChunk * chunk = _fileChunks[i.getId()].get();
+ if (chunk != NULL) {
+ sz += chunk->getDiskBloat();
+ }
+ }
+ }
+ return sz;
+}
+
+vespalib::string
+LogDataStore::createFileName(NameId id) const
+{
+ return id.createName(getBaseDir());
+}
+vespalib::string
+LogDataStore::createDatFileName(NameId id) const
+{
+ return FileChunk::createDatFileName(id.createName(getBaseDir()));
+}
+
+vespalib::string
+LogDataStore::createIdxFileName(NameId id) const
+{
+ return FileChunk::createIdxFileName(id.createName(getBaseDir()));
+}
+
+FileChunk::UP
+LogDataStore::createReadOnlyFile(FileId fileId, NameId nameId)
+{
+ FileChunk::UP file(new FileChunk(fileId, nameId, getBaseDir(), _tune,
+ _bucketizer.get(), _config.crcOnReadDisabled()));
+ file->enableRead();
+ return file;
+}
+
+FileChunk::UP
+LogDataStore::createWritableFile(FileId fileId, SerialNum serialNum, NameId nameId)
+{
+ for (const auto & fc : _fileChunks) {
+ if (fc && (fc->getNameId() == nameId)) {
+ LOG(error, "We already have a file registered with internal fileId=%u, and external nameId=%ld",
+ fileId.getId(), nameId.getId());
+ return FileChunk::UP();
+ }
+ }
+ FileChunk::UP file(new WriteableFileChunk(_executor, fileId, nameId, getBaseDir(),
+ serialNum, _config.getFileConfig(), _tune, _fileHeaderContext,
+ _bucketizer.get(), _config.crcOnReadDisabled()));
+ file->enableRead();
+ return file;
+}
+
+FileChunk::UP
+LogDataStore::createWritableFile(FileId fileId, SerialNum serialNum)
+{
+ return createWritableFile(fileId, serialNum, NameId(fastos::ClockSystem::now()));
+}
+
+namespace {
+
+vespalib::string
+lsSingleFile(const vespalib::string & fileName)
+{
+ vespalib::string s;
+ FastOS_StatInfo stat;
+ if ( FastOS_File::Stat(fileName.c_str(), &stat)) {
+ s += make_string("%s %20ld %12ld", fileName.c_str(), stat._modifiedTimeNS, stat._size);
+ } else {
+ s = make_string("%s 'stat' FAILED !!", fileName.c_str());
+ }
+ return s;
+}
+
+}
+
+vespalib::string LogDataStore::ls(const NameIdSet & partList)
+{
+ vespalib::string s;
+ for (auto it(++partList.begin()), mt(partList.end()); it != mt; ++it) {
+ s += lsSingleFile(createDatFileName(*it));
+ s += "\n";
+ s += lsSingleFile(createIdxFileName(*it));
+ }
+ return s;
+}
+
+
+static bool
+hasNonHeaderData(const vespalib::string &name)
+{
+ FastOS_File file(name.c_str());
+ if (!file.OpenReadOnly())
+ return false;
+ int64_t fSize(file.GetSize());
+ uint32_t headerLen = 0;
+ uint32_t minHeaderLen = vespalib::GenericHeader::getMinSize();
+ if (fSize < minHeaderLen)
+ return false;
+ try {
+ vespalib::FileHeader h;
+ headerLen = h.readFile(file);
+ } catch (vespalib::IllegalHeaderException &e) {
+ file.SetPosition(0);
+ try {
+ vespalib::FileHeader::FileReader fr(file);
+ uint32_t header2Len = vespalib::FileHeader::readSize(fr);
+ if (header2Len <= fSize) {
+ e.throwSelf(); // header not truncated
+ }
+ } catch (vespalib::IllegalHeaderException &e2) {
+ }
+ return false;
+ }
+ return fSize > headerLen;
+}
+
+
+void
+LogDataStore::verifyModificationTime(const NameIdSet & partList)
+{
+ FastOS_StatInfo prevDatStat;
+ FastOS_StatInfo prevIdxStat;
+ NameId nameId(*partList.begin());
+ vespalib::string datName(createDatFileName(nameId));
+ vespalib::string idxName(createIdxFileName(nameId));
+ if ( ! FastOS_File::Stat(datName.c_str(), &prevDatStat)) {
+ throw runtime_error(make_string("Failed to Stat '%s'\nDirectory =\n%s", datName.c_str(), ls(partList).c_str()));
+ }
+ if ( ! FastOS_File::Stat(idxName.c_str(), &prevIdxStat)) {
+ throw runtime_error(make_string("Failed to Stat '%s'\nDirectory =\n%s", idxName.c_str(), ls(partList).c_str()));
+ }
+ for (auto it(++partList.begin()), mt(partList.end()); it != mt; ++it) {
+ vespalib::string prevDatNam(datName);
+ vespalib::string prevIdxNam(idxName);
+ FastOS_StatInfo datStat;
+ FastOS_StatInfo idxStat;
+ nameId = *it;
+ datName = createDatFileName(nameId);
+ idxName = createIdxFileName(nameId);
+ if ( ! FastOS_File::Stat(datName.c_str(), &datStat)) {
+ throw runtime_error(make_string("Failed to Stat '%s'\nDirectory =\n%s", datName.c_str(), ls(partList).c_str()));
+ }
+ if ( ! FastOS_File::Stat(idxName.c_str(), &idxStat)) {
+ throw runtime_error(make_string("Failed to Stat '%s'\nDirectory =\n%s", idxName.c_str(), ls(partList).c_str()));
+ }
+ ns_log::Logger::LogLevel logLevel = _config.compact2ActiveFile()
+ ? ns_log::Logger::warning
+ : ns_log::Logger::debug;
+ if ((datStat._modifiedTimeNS < prevDatStat._modifiedTimeNS) && hasNonHeaderData(datName)) {
+ VLOG(logLevel, "Older file '%s' is newer (%ld) than file '%s' (%ld)\nDirectory =\n%s",
+ prevDatNam.c_str(), prevDatStat._modifiedTimeNS,
+ datName.c_str(), datStat._modifiedTimeNS,
+ ls(partList).c_str());
+ }
+ if ((idxStat._modifiedTimeNS < prevIdxStat._modifiedTimeNS) && hasNonHeaderData(idxName)) {
+ VLOG(logLevel, "Older file '%s' is newer (%ld) than file '%s' (%ld)\nDirectory =\n%s",
+ prevIdxNam.c_str(), prevIdxStat._modifiedTimeNS,
+ idxName.c_str(), idxStat._modifiedTimeNS,
+ ls(partList).c_str());
+ }
+ prevDatStat = datStat;
+ prevIdxStat = idxStat;
+ }
+}
+
+void
+LogDataStore::preload()
+{
+ // scan directory
+ NameIdSet partList = scanDir(getBaseDir(), ".idx");
+ NameIdSet datPartList = scanDir(getBaseDir(), ".dat");
+
+ partList = eraseEmptyIdxFiles(partList);
+ eraseDanglingDatFiles(partList, datPartList);
+
+ if (!partList.empty()) {
+ verifyModificationTime(partList);
+ partList = scanDir(getBaseDir(), ".idx");
+ typedef NameIdSet::const_iterator It;
+ for (It it(partList.begin()), mt(--partList.end()); it != mt; it++) {
+ _fileChunks.push_back(createReadOnlyFile(FileId(_fileChunks.size()), *it));
+ }
+ _fileChunks.push_back(isReadOnly()
+ ? createReadOnlyFile(FileId(_fileChunks.size()), *partList.rbegin())
+ : createWritableFile(FileId(_fileChunks.size()), getMinLastPersistedSerialNum(), *partList.rbegin()));
+ } else {
+ _fileChunks.push_back(createWritableFile(FileId::first(), 0));
+ }
+ _active = FileId(_fileChunks.size() - 1);
+ _prevActive = _active.prev();
+}
+
+
+LogDataStore::NameIdSet
+LogDataStore::eraseEmptyIdxFiles(const NameIdSet &partList)
+{
+ NameIdSet nonEmptyIdxPartList;
+ for (const auto & part : partList) {
+ vespalib::string name(createFileName(part));
+ if (FileChunk::isIdxFileEmpty(name)) {
+ LOG(warning, "We detected an empty idx file for part '%s'. Erasing it.", name.c_str());
+ FileChunk::eraseIdxFile(name);
+ } else {
+ nonEmptyIdxPartList.insert(part);
+ }
+ }
+ return nonEmptyIdxPartList;
+}
+
+void
+LogDataStore::eraseDanglingDatFiles(const NameIdSet &partList, const NameIdSet &datPartList)
+{
+ typedef NameIdSet::const_iterator IT;
+
+ IT iib(partList.begin());
+ IT ii(iib);
+ IT iie(partList.end());
+ IT dib(datPartList.begin());
+ IT di(dib);
+ IT die(datPartList.end());
+ IT dirb(die);
+ NameId endMarker(NameId::last());
+
+ if (dirb != dib) {
+ --dirb;
+ }
+ for (;;) {
+ if (ii == iie && di == die) {
+ break;
+ }
+ NameId ibase(ii == iie ? endMarker : *ii);
+ NameId dbase(di == die ? endMarker : *di);
+ if (ibase < dbase) {
+ vespalib::string name(createFileName(ibase));
+ const char *s = name.c_str();
+ throw runtime_error(make_string( "Missing file '%s.dat', found '%s.idx'", s, s));
+ } else if (dbase < ibase) {
+ vespalib::string name(createDatFileName(dbase));
+ const char *s = name.c_str();
+ LOG(warning, "Removing dangling file '%s'", s);
+ if (!FastOS_File::Delete(s)) {
+ vespalib::string e = getErrorString(errno);
+ throw runtime_error(make_string("Error erasing dangling file '%s'. Error is '%s'", s, e.c_str()));
+ }
+ ++di;
+ } else {
+ ++ii;
+ ++di;
+ }
+ }
+}
+
+LogDataStore::NameIdSet
+LogDataStore::scanDir(const vespalib::string &dir, const vespalib::string &suffix)
+{
+ NameIdSet baseFiles;
+ FastOS_DirectoryScan dirScan(dir.c_str());
+ while (dirScan.ReadNext()) {
+ if (dirScan.IsRegular()) {
+ vespalib::stringref file(dirScan.GetName());
+ if (file.size() > suffix.size() &&
+ file.find(suffix.c_str()) == file.size() - suffix.size()) {
+ vespalib::string base(file.substr(0, file.find(suffix.c_str())));
+ char *err(NULL);
+ errno = 0;
+ NameId baseId(strtoul(base.c_str(), &err, 10));
+ if ((errno == 0) && (err[0] == '\0')) {
+ vespalib::string tmpFull = createFileName(baseId);
+ vespalib::string tmp = tmpFull.substr(tmpFull.rfind('/') + 1);
+ assert(tmp == base);
+ baseFiles.insert(baseId);
+ } else {
+ throw runtime_error(make_string("Error converting '%s' to a unsigned integer number. Error occurred at '%s'. Error is '%s'",
+ base.c_str(), err, getLastErrorString().c_str()));
+ }
+ } else {
+ LOG(debug, "Skipping '%s' since it does not end with '%s'", file.c_str(), suffix.c_str());
+ }
+ }
+ }
+ return baseFiles;
+}
+
+void
+LogDataStore::setLid(uint32_t lid, const LidInfo & meta)
+{
+ if (lid < _lidInfo.size()) {
+ _genHandler.updateFirstUsedGeneration();
+ _lidInfo.removeOldGenerations(_genHandler.getFirstUsedGeneration());
+ const LidInfo & prev = _lidInfo[lid];
+ if (prev.valid()) {
+ _fileChunks[prev.getFileId()]->remove(lid, prev.size());
+ }
+ } else {
+ while (lid >= _lidInfo.size()) {
+ _lidInfo.push_back(LidInfo());
+ }
+ _lidInfo.setGeneration(_genHandler.getNextGeneration());
+ _genHandler.incGeneration();
+ setNextId(_lidInfo.size());
+ }
+ _lidInfo[lid] = meta;
+}
+
+void
+LogDataStore::verify(bool reportOnly) const
+{
+ for (const FileChunk::UP & fc : _fileChunks) {
+ if (fc) {
+ fc->verify(reportOnly);
+ }
+ }
+}
+
+class LogDataStore::WrapVisitor : public IWriteData
+{
+ IDataStoreVisitor &_visitor;
+
+public:
+ void write(LockGuard guard, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) override {
+ (void) chunkId;
+ guard.unlock();
+ _visitor.visit(lid, buffer, sz);
+ }
+
+ WrapVisitor(IDataStoreVisitor &visitor) : _visitor(visitor) { }
+ void close() override { }
+};
+
+
+class LogDataStore::WrapVisitorProgress : public IFileChunkVisitorProgress
+{
+ IDataStoreVisitorProgress &_progress;
+ const uint32_t _totalChunks;
+ uint32_t _processedChunks;
+
+public:
+ virtual
+ ~WrapVisitorProgress()
+ {
+ }
+
+ virtual void
+ updateProgress()
+ {
+ ++_processedChunks;
+ if (_totalChunks != 0) {
+ double progress = std::min(static_cast<double>(_processedChunks) /
+ static_cast<double>(_totalChunks),
+ 1.0);
+ _progress.updateProgress(progress);
+ }
+ };
+
+ WrapVisitorProgress(IDataStoreVisitorProgress &progress,
+ uint32_t totalChunks)
+ : _progress(progress),
+ _totalChunks(totalChunks),
+ _processedChunks(0u)
+ {
+ if (totalChunks == 0) {
+ progress.updateProgress(1.0);
+ }
+ }
+};
+
+
+void
+LogDataStore::internalFlushAll()
+{
+ uint64_t flushToken(initFlush(tentativeLastSyncToken()));
+ _tlSyncer.sync(flushToken);
+ flush(flushToken);
+}
+
+
+void
+LogDataStore::accept(IDataStoreVisitor &visitor,
+ IDataStoreVisitorProgress &visitorProgress,
+ bool prune)
+{
+ WrapVisitor wrap(visitor);
+ internalFlushAll();
+ FileIdxVector fileChunks;
+ fileChunks.reserve(_fileChunks.size());
+ for (auto &fc : _fileChunks) {
+ if (fc && (fc->getFileId() != _active)) {
+ fileChunks.push_back(fc->getFileId());
+ }
+ }
+ FileChunk & lfc = *_fileChunks[_active.getId()];
+
+ uint32_t totalChunks = 0;
+ for (auto &fc : fileChunks) {
+ totalChunks += _fileChunks[fc.getId()]->getNumChunks();
+ }
+ uint32_t lastChunks = lfc.getNumChunks();
+ totalChunks += lastChunks;
+ WrapVisitorProgress wrapProgress(visitorProgress, totalChunks);
+ for (FileId fcId : fileChunks) {
+ FileChunk & fc = *_fileChunks[fcId.getId()];
+ fc.appendTo(*this, wrap, fc.getNumChunks(), &wrapProgress);
+ if (prune) {
+ internalFlushAll();
+ FileChunk::UP toDie;
+ {
+ LockGuard guard(_updateLock);
+ toDie = std::move(_fileChunks[fcId.getId()]);
+ }
+ toDie->erase();
+ }
+ }
+ lfc.appendTo(*this, wrap, lastChunks, &wrapProgress);
+ if (prune) {
+ internalFlushAll();
+ }
+}
+
+
+double
+LogDataStore::getVisitCost() const
+{
+ uint32_t totalChunks = 0;
+ for (auto &fc : _fileChunks) {
+ totalChunks += fc->getNumChunks();
+ }
+ return totalChunks;
+}
+
+
+class LogDataStore::FileChunkHolder
+{
+private:
+ LogDataStore &_store;
+ FileId _fileId;
+public:
+ FileChunkHolder(LogDataStore &store, FileId fileId) : _store(store), _fileId(fileId) { }
+ ~FileChunkHolder() { _store.unholdFileChunk(_fileId); }
+};
+
+std::unique_ptr<LogDataStore::FileChunkHolder>
+LogDataStore::holdFileChunk(FileId fileId)
+{
+ assert(fileId.getId() < _holdFileChunks.size());
+ assert(_holdFileChunks[fileId.getId()] < 2000u);
+ ++_holdFileChunks[fileId.getId()];
+ return std::unique_ptr<FileChunkHolder>(new FileChunkHolder(*this, fileId));
+}
+
+
+void
+LogDataStore::unholdFileChunk(FileId fileId)
+{
+ LockGuard guard(_updateLock);
+ assert(fileId.getId() < _holdFileChunks.size());
+ assert(_holdFileChunks[fileId.getId()] > 0u);
+ --_holdFileChunks[fileId.getId()];
+ // No signalling, compactWorst() sleeps and retries
+}
+
+
+DataStoreStorageStats
+LogDataStore::getStorageStats() const
+{
+ uint64_t diskFootprint = getDiskFootprint();
+ uint64_t diskBloat = getDiskBloat();
+ double maxBucketSpread = getMaxBucketSpread();
+ // Note: Naming consistency issue
+ SerialNum lastSerialNum = tentativeLastSyncToken();
+ SerialNum lastFlushedSerialNum = lastSyncToken();
+ return DataStoreStorageStats(diskFootprint, diskBloat, maxBucketSpread,
+ lastSerialNum, lastFlushedSerialNum);
+}
+
+
+std::vector<DataStoreFileChunkStats>
+LogDataStore::getFileChunkStats() const
+{
+ std::vector<DataStoreFileChunkStats> result;
+ {
+ LockGuard guard(_updateLock);
+ for (const FileChunk::UP & fc : _fileChunks) {
+ if (fc) {
+ result.push_back(fc->getStats());
+ }
+ }
+ }
+ std::sort(result.begin(), result.end());
+ return std::move(result);
+}
+
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.h b/searchlib/src/vespa/searchlib/docstore/logdatastore.h
new file mode 100644
index 00000000000..53e5d5a5b69
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.h
@@ -0,0 +1,304 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/docstore/idatastore.h>
+#include <vespa/searchlib/docstore/writeablefilechunk.h>
+#include <vespa/searchlib/common/rcuvector.h>
+#include <vespa/document/util/compressionconfig.h>
+#include <vespa/vespalib/util/threadstackexecutor.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include <vespa/searchlib/transactionlog/syncproxy.h>
+
+#include <set>
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+
+/**
+ * Simple data storage for byte arrays.
+ * A small integer key is associated with each byte array;
+ * a zero-sized array is equivalent to a removed key.
+ * Changes are held in memory until flush() is called.
+ * A sync token is associated with each flush().
+ **/
+class LogDataStore : public IDataStore, public ISetLid, public IGetLid
+{
+private:
+ using NameId = FileChunk::NameId;
+ using FileId = FileChunk::FileId;
+public:
+ typedef vespalib::LockGuard LockGuard;
+ class Config {
+ public:
+ Config()
+ : _maxFileSize(1000000000ul),
+ _maxDiskBloatFactor(0.2),
+ _maxBucketSpread(2.5),
+ _minFileSizeFactor(0.2),
+ _numThreads(8),
+ _skipCrcOnRead(false),
+ _compactToActiveFile(true)
+ { }
+
+ Config(size_t maxFileSize,
+ double maxDiskBloatFactor,
+ double maxBucketSpread,
+ double minFileSizeFactor,
+ size_t numThreads,
+ bool compactToActiveFile,
+ const WriteableFileChunk::Config & fileConfig)
+ : _maxFileSize(maxFileSize),
+ _maxDiskBloatFactor(maxDiskBloatFactor),
+ _maxBucketSpread(maxBucketSpread),
+ _minFileSizeFactor(minFileSizeFactor),
+ _numThreads(numThreads),
+ _skipCrcOnRead(false),
+ _compactToActiveFile(compactToActiveFile),
+ _fileConfig(fileConfig)
+ { }
+
+ size_t getMaxFileSize() const { return _maxFileSize; }
+ double getMaxDiskBloatFactor() const { return _maxDiskBloatFactor; }
+ double getMaxBucketSpread() const { return _maxBucketSpread; }
+ double getMinFileSizeFactor() const { return _minFileSizeFactor; }
+
+ size_t getNumThreads() const { return _numThreads; }
+ bool crcOnReadDisabled() const { return _skipCrcOnRead; }
+ void disableCrcOnRead(bool v) { _skipCrcOnRead = v; }
+ bool compact2ActiveFile() const { return _compactToActiveFile; }
+
+ const WriteableFileChunk::Config & getFileConfig() const { return _fileConfig; }
+ private:
+ size_t _maxFileSize;
+ double _maxDiskBloatFactor;
+ double _maxBucketSpread;
+ double _minFileSizeFactor;
+ size_t _numThreads;
+ bool _skipCrcOnRead;
+ bool _compactToActiveFile;
+ WriteableFileChunk::Config _fileConfig;
+ };
+public:
+ /**
+ * Construct a log based data store.
+ * All files are stored in base directory.
+ *
+ * @param dirName The directory that will contain the data file.
+ * @param fileHeaderContext The file header context used to populate
+ * the generic file header with extra tags.
+ * The caller must keep it alive for the semantic
+ * lifetime of the log data store.
+ * @param tlSyncer Helper to sync transaction log to avoid
+ * it being behind the document store after a
+ * crash.
+ * The caller must keep it alive for the semantic
+ * lifetime of the log data store.
+ */
+ LogDataStore(vespalib::ThreadStackExecutorBase &executor,
+ const vespalib::string &dirName,
+ const Config & config,
+ const GrowStrategy &growStrategy,
+ const TuneFileSummary &tune,
+ const search::common::FileHeaderContext &fileHeaderContext,
+ transactionlog::SyncProxy &tlSyncer,
+ const IBucketizer::SP & bucketizer,
+ bool readOnly = false);
+
+ ~LogDataStore();
+
+ // Implements IDataStore API
+ ssize_t read(uint32_t lid, vespalib::DataBuffer & buffer) const override;
+ void read(const LidVector & lids, IBufferVisitor & visitor) const override;
+ void write(uint64_t serialNum, uint32_t lid, const void * buffer, size_t len) override;
+ void remove(uint64_t serialNum, uint32_t lid) override;
+ void flush(uint64_t syncToken) override;
+ uint64_t initFlush(uint64_t syncToken) override;
+ size_t memoryUsed() const override;
+ size_t memoryMeta() const override;
+ uint64_t lastSyncToken() const override;
+ uint64_t tentativeLastSyncToken() const override;
+ fastos::TimeStamp getLastFlushTime() const override;
+ size_t getDiskFootprint() const override;
+ size_t getDiskHeaderFootprint() const override;
+ size_t getDiskBloat() const override;
+ size_t getMaxCompactGain() const override;
+
+ /**
+ * Will compact the docsummary up to a lower limit of 5% bloat.
+ */
+ void compact(uint64_t syncToken);
+
+ const Config & getConfig() const { return _config; }
+ Config & getConfig() { return _config; }
+
+ void write(LockGuard guard, WriteableFileChunk & destination, uint64_t serialNum, uint32_t lid, const void * buffer, size_t len);
+ void write(LockGuard guard, FileId destinationFileId, uint32_t lid, const void * buffer, size_t len);
+
+ /**
+ * This will spinn through the data and verify the content of both
+ * the '.dat' and the '.idx' files.
+ *
+ * @param reportOnly If set inconsitencies will be written to 'stderr'.
+ */
+ void verify(bool reportOnly) const;
+
+ /**
+ * Visit all data found in data store.
+ */
+ void accept(IDataStoreVisitor &visitor, IDataStoreVisitorProgress &visitorProgress, bool prune) override;
+
+ /**
+ * Return cost of visiting all data found in data store.
+ */
+ double getVisitCost() const override;
+
+ // Implements IGetLid API
+ Guard getLidReadGuard() const override {
+ return _genHandler.takeGuard();
+ }
+
+ // Implements IGetLid API
+ LockGuard getLidGuard(uint32_t lid) const override {
+ (void) lid;
+ return LockGuard(_updateLock);
+ }
+
+ // Implements IGetLid API
+ LidInfo getLid(Guard & guard, uint32_t lid) const override {
+ (void) guard;
+ return _lidInfo[lid];
+ }
+ FileId getActiveFileId(const vespalib::LockGuard & guard) const {
+ assert(guard.locks(_updateLock));
+ return _active;
+ }
+
+ virtual DataStoreStorageStats getStorageStats() const override;
+
+ virtual std::vector<DataStoreFileChunkStats>
+ getFileChunkStats() const override;
+
+private:
+ class WrapVisitor;
+ class WrapVisitorProgress;
+ class FileChunkHolder;
+
+ void waitForUnblock();
+
+ // Implements ISetLid API
+ void setLid(uint32_t lid, const LidInfo & lm) override;
+
+ void compactWorst();
+ void compactFile(FileId chunkId);
+
+ typedef std::set<NameId> NameIdSet;
+ typedef attribute::RcuVector<uint64_t> LidInfoVector;
+ typedef std::vector<FileChunk::UP> FileChunkVector;
+
+ void updateLidMap();
+ void preload();
+ void verifyModificationTime(const NameIdSet & partList);
+
+ void eraseDanglingDatFiles(const NameIdSet &partList, const NameIdSet &datPartList);
+ NameIdSet eraseEmptyIdxFiles(const NameIdSet &partList);
+ void internalFlushAll(void);
+
+ NameIdSet scanDir(const vespalib::string &dir, const vespalib::string &suffix);
+ FileId allocateFileId(const LockGuard & guard);
+ FileId allocateFileId();
+ vespalib::string ls(const NameIdSet & partList);
+
+ WriteableFileChunk & getActive(const LockGuard & guard) {
+ assert(guard.locks(_updateLock));
+ return static_cast<WriteableFileChunk &>(*_fileChunks[_active.getId()]);
+ }
+
+ const WriteableFileChunk & getActive(const LockGuard & guard) const {
+ assert(guard.locks(_updateLock));
+ return static_cast<const WriteableFileChunk &>(*_fileChunks[_active.getId()]);
+ }
+
+ const FileChunk * getPrevActive(const LockGuard & guard) const {
+ assert(guard.locks(_updateLock));
+ return ( !_prevActive.isActive() ) ? _fileChunks[_prevActive.getId()].get() : NULL;
+ }
+ void setActive(const LockGuard & guard, FileId fileId) {
+ assert(guard.locks(_updateLock));
+ _prevActive = _active;
+ _active = fileId;
+ }
+
+ bool isBucketSpreadTooLarge(double spread) const {
+ return (spread >= _config.getMaxBucketSpread());
+ }
+ double getMaxBucketSpread() const;
+
+ FileChunk::UP createReadOnlyFile(FileId fileId, NameId nameId);
+ FileChunk::UP createWritableFile(FileId fileId, SerialNum serialNum);
+ FileChunk::UP createWritableFile(FileId fileId, SerialNum serialNum, NameId nameId);
+ vespalib::string createFileName(NameId id) const;
+ vespalib::string createDatFileName(NameId id) const;
+ vespalib::string createIdxFileName(NameId id) const;
+
+ void requireSpace(LockGuard guard, WriteableFileChunk & active);
+ bool isReadOnly() const { return _readOnly; }
+ void updateSerialNum();
+
+ bool isBloatOverLimit() const {
+ return isBloatOverLimit(getDiskBloat(), getDiskFootprint());
+ }
+ bool isBloatOverLimit(uint64_t bloat, uint64_t usage) const {
+ return (usage*_config.getMaxDiskBloatFactor() < bloat);
+ }
+
+ /*
+ * Protect against compactWorst() dropping file chunk. Caller must hold
+ * _updateLock.
+ */
+ std::unique_ptr<FileChunkHolder> holdFileChunk(FileId fileId);
+
+ /*
+ * Drop protection against compactWorst() dropping file chunk.
+ */
+ void unholdFileChunk(FileId fileId);
+
+ SerialNum flushFile(LockGuard guard, WriteableFileChunk & file, SerialNum syncToken);
+ SerialNum flushActive(SerialNum syncToken);
+ void flushActiveAndWait(SerialNum syncToken);
+ void flushFileAndWait(LockGuard guard, WriteableFileChunk & file, SerialNum syncToken);
+ SerialNum getMinLastPersistedSerialNum() const {
+ return (_fileChunks.empty() ? 0 : _fileChunks.back()->getLastPersistedSerialNum());
+ }
+ bool shouldCompactToActiveFile(size_t compactedSize) const;
+ std::pair<bool, FileId> findNextToCompact();
+
+ typedef std::vector<FileId> FileIdxVector;
+ Config _config;
+ TuneFileSummary _tune;
+ const search::common::FileHeaderContext &_fileHeaderContext;
+ mutable vespalib::GenerationHandler _genHandler;
+ LidInfoVector _lidInfo;
+ FileChunkVector _fileChunks;
+ std::vector<uint32_t> _holdFileChunks;
+ FileId _active;
+ FileId _prevActive;
+ vespalib::Lock _updateLock;
+ bool _readOnly;
+ vespalib::ThreadStackExecutorBase &_executor;
+ SerialNum _initFlushSyncToken;
+ transactionlog::SyncProxy &_tlSyncer;
+ IBucketizer::SP _bucketizer;
+ NameIdSet _currentlyCompacting;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/logdocumentstore.cpp b/searchlib/src/vespa/searchlib/docstore/logdocumentstore.cpp
new file mode 100644
index 00000000000..2a15cc118bd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/logdocumentstore.cpp
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "logdocumentstore.h"
+
+namespace search
+{
+
+using vespalib::nbostream;
+using common::FileHeaderContext;
+
+LogDocumentStore::LogDocumentStore(vespalib::ThreadStackExecutorBase & executor,
+ const vespalib::string & baseDir,
+ const Config & config,
+ const GrowStrategy & growStrategy,
+ const TuneFileSummary & tuneFileSummary,
+ const FileHeaderContext &fileHeaderContext,
+ transactionlog::SyncProxy &tlSyncer,
+ const IBucketizer::SP & bucketizer)
+ : DocumentStore(config, _backingStore),
+ _backingStore(executor, baseDir, config.getLogConfig(), growStrategy,
+ tuneFileSummary, fileHeaderContext, tlSyncer, bucketizer)
+{
+}
+
+LogDocumentStore::~LogDocumentStore()
+{
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/logdocumentstore.h b/searchlib/src/vespa/searchlib/docstore/logdocumentstore.h
new file mode 100644
index 00000000000..3a8227c9f3c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/logdocumentstore.h
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "documentstore.h"
+#include "logdatastore.h"
+#include <vespa/searchlib/common/tunefileinfo.h>
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+/**
+ * Simple document store that contains serialized Document instances.
+ * updates will be held in memory until flush() is called.
+ * Uses a Local ID as key.
+ **/
+class LogDocumentStore : public DocumentStore
+{
+public:
+ class Config : public DocumentStore::Config {
+ public:
+ Config(const DocumentStore::Config & base, const LogDataStore::Config & log) :
+ DocumentStore::Config(base),
+ _logConfig(log)
+ { }
+ const LogDataStore::Config & getLogConfig() const { return _logConfig; }
+ LogDataStore::Config & getLogConfig() { return _logConfig; }
+ private:
+ LogDataStore::Config _logConfig;
+ };
+ /**
+ * Construct a document store.
+ * If the "simpledocstore.dat" data file exists, reads meta-data (offsets) into memory.
+ *
+ * @throws vespalib::IoException if the file is corrupt or other IO problems occur.
+ * @param docMan The document type manager to use when deserializing.
+ * @param baseDir The path to a directory where "simpledocstore.dat" will exist.
+ * @param fileHeaderContext The file header context used to populate
+ * the generic file header with extra tags.
+ * The caller must keep it alive for the semantic
+ * lifetime of the log data store.
+ */
+ LogDocumentStore(vespalib::ThreadStackExecutorBase & executor,
+ const vespalib::string & baseDir,
+ const Config & config,
+ const GrowStrategy & growStrategy,
+ const TuneFileSummary &tuneFileSummary,
+ const common::FileHeaderContext &fileHeaderContext,
+ transactionlog::SyncProxy &tlSyncer,
+ const IBucketizer::SP & bucketizer);
+ ~LogDocumentStore();
+ LogDataStore::Config & getLogConfig() { return _backingStore.getConfig(); }
+ const LogDataStore::Config & getLogConfig() const { return _backingStore.getConfig(); }
+private:
+ void compact(uint64_t syncToken) override { _backingStore.compact(syncToken); }
+ LogDataStore _backingStore;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp
new file mode 100644
index 00000000000..fe5678c61e8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp
@@ -0,0 +1,868 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/docstore/writeablefilechunk.h>
+#include <stdexcept>
+#include <vespa/vespalib/util/closuretask.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/log/log.h>
+#include <map>
+#include <limits>
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/searchlib/common/fileheadercontext.h>
+#include "data_store_file_chunk_stats.h"
+
+LOG_SETUP(".search.writeablefilechunk");
+
+using vespalib::makeTask;
+using vespalib::makeClosure;
+using vespalib::FileHeader;
+using vespalib::make_string;
+using vespalib::LockGuard;
+using vespalib::MonitorGuard;
+using vespalib::nbostream;
+using vespalib::IllegalHeaderException;
+using vespalib::GenerationHandler;
+using search::common::FileHeaderContext;
+
+namespace search {
+
+namespace
+{
+
+const uint64_t Alignment = 4096;
+const uint64_t headerAlign = 4096;
+
+}
+
+WriteableFileChunk::
+WriteableFileChunk(vespalib::ThreadStackExecutorBase &executor,
+ FileId fileId, NameId nameId,
+ const vespalib::string &baseName,
+ SerialNum initialSerialNum,
+ const Config &config,
+ const TuneFileSummary &tune,
+ const FileHeaderContext &fileHeaderContext,
+ const IBucketizer * bucketizer,
+ bool skipCrcOnRead)
+ : FileChunk(fileId, nameId, baseName, tune, bucketizer, skipCrcOnRead),
+ _config(config),
+ _serialNum(initialSerialNum),
+ _frozen(false),
+ _lock(),
+ _writeLock(),
+ _flushLock(),
+ _dataFile(_dataFileName.c_str()),
+ _idxFile(_idxFileName.c_str()),
+ _chunkMap(),
+ _pendingChunks(),
+ _pendingIdx(0),
+ _pendingDat(0),
+ _currentDiskFootprint(0),
+ _nextChunkId(1),
+ _active(new Chunk(0, Chunk::Config(config.getMaxChunkBytes(), config.getMaxChunkEntries()))),
+ _alignment(1),
+ _granularity(1),
+ _maxChunkSize(0x100000),
+ _firstChunkIdToBeWritten(0),
+ _writeTaskIsRunning(false),
+ _executor(executor),
+ _bucketMap(bucketizer)
+{
+ if (tune._write.getWantDirectIO()) {
+ _dataFile.EnableDirectIO();
+ _idxFile.EnableSyncWrites();
+ } else if (tune._write.getWantSyncWrites()) {
+ _dataFile.EnableSyncWrites();
+ _idxFile.EnableSyncWrites();
+ }
+ if (_dataFile.OpenReadWrite()) {
+ readDataHeader();
+ if (_dataHeaderLen == 0) {
+ writeDataHeader(fileHeaderContext);
+ }
+ _dataFile.SetPosition(_dataFile.GetSize());
+ if (tune._write.getWantDirectIO()) {
+ if (!_dataFile.GetDirectIORestrictions(_alignment, _granularity, _maxChunkSize)) {
+ LOG(debug, "Direct IO setup failed for file %s due to %s",
+ _dataFile.GetFileName(), _dataFile.getLastErrorString().c_str());
+ }
+ }
+ if (_idxFile.OpenReadWrite()) {
+ readIdxHeader();
+ if (_idxHeaderLen == 0) {
+ _idxHeaderLen = writeIdxHeader(fileHeaderContext, _idxFile);
+ }
+ _idxFile.SetPosition(_idxFile.GetSize());
+ } else {
+ _dataFile.Close();
+ throw SummaryException("Failed opening idx file", _idxFile, VESPA_STRLOC);
+ }
+ } else {
+ throw SummaryException("Failed opening data file", _dataFile, VESPA_STRLOC);
+ }
+ _firstChunkIdToBeWritten = _active->getId();
+ updateCurrentDiskFootprint();
+}
+
+WriteableFileChunk::~WriteableFileChunk()
+{
+ if (!frozen()) {
+ if (_active->size() || _active->count()) {
+ flush(true, _serialNum);
+ }
+ freeze();
+ }
+ // This is a wild stab at fixing bug 6348143.
+ // If it works it indicates something bad with the filesystem.
+ if (_dataFile.IsOpened()) {
+ if (! _dataFile.Sync()) {
+ assert(false);
+ }
+ }
+ if (_idxFile.IsOpened()) {
+ if (! _idxFile.Sync()) {
+ assert(false);
+ }
+ }
+}
+
+size_t
+WriteableFileChunk::updateLidMap(ISetLid & ds, uint64_t serialNum)
+{
+ size_t sz = FileChunk::updateLidMap(ds, serialNum);
+ _nextChunkId = _chunkInfo.size();
+ _active.reset( new Chunk(_nextChunkId++, Chunk::Config(_config.getMaxChunkBytes(), _config.getMaxChunkEntries())));
+ _serialNum = getLastPersistedSerialNum();
+ _firstChunkIdToBeWritten = _active->getId();
+ setDiskFootprint(0);
+ _chunkInfo.reserve(0x10000);
+ return sz;
+}
+
+void
+WriteableFileChunk::restart(const MonitorGuard & guard, uint32_t nextChunkId)
+{
+ (void) guard;
+ _writeTaskIsRunning = true;
+ _executor.execute(makeTask(makeClosure(this, &WriteableFileChunk::fileWriter, nextChunkId)));
+}
+
+namespace {
+
+LidInfoWithLidV::const_iterator
+find_first(LidInfoWithLidV::const_iterator begin, uint32_t chunkId) {
+ for ( ; begin->getChunkId() != chunkId; ++begin);
+ return begin;
+}
+
+LidInfoWithLidV::const_iterator
+seek_past(LidInfoWithLidV::const_iterator begin, LidInfoWithLidV::const_iterator end, uint32_t chunkId) {
+ for ( ; (begin < end) && (begin->getChunkId() == chunkId); begin++);
+ return begin;
+}
+
+}
+
+void
+WriteableFileChunk::read(LidInfoWithLidV::const_iterator begin, size_t count, IBufferVisitor & visitor) const
+{
+ if (count == 0) { return; }
+ if (!frozen()) {
+ vespalib::hash_map<uint32_t, ChunkInfo> chunksOnFile;
+ {
+ LockGuard guard(_lock);
+ for (size_t i(0); i < count; i++) {
+ const LidInfoWithLid & li = *(begin + i);
+ uint32_t chunk = li.getChunkId();
+ if ((chunk >= _chunkInfo.size()) || !_chunkInfo[chunk].valid()) {
+ ChunkMap::const_iterator found = _chunkMap.find(chunk);
+ vespalib::ConstBufferRef buffer;
+ if (found != _chunkMap.end()) {
+ buffer = found->second->getLid(li.getLid());
+ } else {
+ assert(chunk == _active->getId());
+ buffer = _active->getLid(li.getLid());
+ }
+ visitor.visit(li.getLid(), buffer);
+ } else {
+ chunksOnFile[chunk] = _chunkInfo[chunk];
+ }
+ }
+ }
+ for (auto & it : chunksOnFile) {
+ LidInfoWithLidV::const_iterator first = find_first(begin, it.first);
+ LidInfoWithLidV::const_iterator last = seek_past(first, begin + count, it.first);
+ FileChunk::read(first, last - first, it.second, visitor);
+ }
+ } else {
+ FileChunk::read(begin, count, visitor);
+ }
+}
+
+ssize_t
+WriteableFileChunk::read(uint32_t lid, SubChunkId chunkId, vespalib::DataBuffer & buffer) const
+{
+ ChunkInfo chunkInfo;
+ if (!frozen()) {
+ LockGuard guard(_lock);
+ if ((chunkId >= _chunkInfo.size()) || !_chunkInfo[chunkId].valid()) {
+ ChunkMap::const_iterator found = _chunkMap.find(chunkId);
+ if (found != _chunkMap.end()) {
+ return found->second->read(lid, buffer);
+ } else {
+ assert(chunkId == _active->getId());
+ return _active->read(lid, buffer);
+ }
+ }
+ chunkInfo = _chunkInfo[chunkId];
+ } else {
+ chunkInfo = _chunkInfo[chunkId];
+ }
+ return FileChunk::read(lid, chunkId, chunkInfo, buffer);
+}
+
+void
+WriteableFileChunk::internalFlush(uint32_t chunkId, uint64_t serialNum)
+{
+ Chunk * active(NULL);
+ {
+ LockGuard guard(_lock);
+ active = _chunkMap[chunkId].get();
+ }
+
+ ProcessedChunk::UP tmp(new ProcessedChunk(chunkId, _alignment));
+ if (_alignment > 1) {
+ tmp->getBuf().ensureFree(active->getMaxPackSize(_config.getCompression()) + _alignment - 1);
+ }
+ active->pack(serialNum, tmp->getBuf(), _config.getCompression());
+ tmp->setPayLoad();
+ if (_alignment > 1) {
+ const size_t padAfter((_alignment - tmp->getPayLoad() % _alignment) % _alignment);
+ memset(tmp->getBuf().getFree(), 0, padAfter);
+ tmp->getBuf().moveFreeToData(padAfter);
+ }
+ {
+ LockGuard innerGuard(_lock);
+ setDiskFootprint(FileChunk::getDiskFootprint() + tmp->getBuf().getDataLen());
+ }
+ enque(std::move(tmp));
+}
+
+void
+WriteableFileChunk::enque(ProcessedChunk::UP tmp)
+{
+ LOG(debug, "enqueing %p", tmp.get());
+ MonitorGuard guard(_writeMonitor);
+ _writeQ.push_back(std::move(tmp));
+ if (_writeTaskIsRunning == false) {
+ restart(guard, _firstChunkIdToBeWritten);
+ }
+ guard.signal();
+}
+
+namespace {
+
+const std::vector<char> Padding(Alignment, '\0');
+
+size_t
+getAlignedStartPos(FastOS_File & file)
+{
+ ssize_t startPos(file.GetPosition());
+ assert(startPos == file.GetSize());
+ if (startPos & (Alignment-1)) {
+ FastOS_File align(file.GetFileName());
+ if (align.OpenWriteOnly()) {
+ align.SetPosition(startPos);
+ ssize_t toWrite(Alignment - (startPos & (Alignment-1)));
+ ssize_t written = align.Write2(&Padding[0], toWrite);
+ if (written == toWrite) {
+ align.Sync();
+ file.SetPosition(align.GetSize());
+ startPos = file.GetPosition();
+ } else {
+ throw SummaryException(
+ make_string("Failed writing %ld bytes to dat file. Only %ld written", toWrite, written),
+ align, VESPA_STRLOC);
+ }
+ } else {
+ throw SummaryException("Failed opening dat file for padding for direct io.", align, VESPA_STRLOC);
+ }
+ }
+ assert((startPos & (Alignment-1)) == 0);
+ return startPos;
+}
+
+}
+
+WriteableFileChunk::ProcessedChunkQ
+WriteableFileChunk::drainQ()
+{
+ ProcessedChunkQ newChunks;
+ MonitorGuard guard(_writeMonitor);
+ newChunks.swap(_writeQ);
+ if ( ! newChunks.empty() ) {
+ guard.broadcast();
+ }
+ return newChunks;
+}
+
+void
+WriteableFileChunk::insertChunks(ProcessedChunkMap & orderedChunks, ProcessedChunkQ & newChunks, const uint32_t nextChunkId)
+{
+ for (auto &chunk : newChunks) {
+ if (chunk.get() != 0) {
+ assert(chunk->getChunkId() >= nextChunkId);
+ assert(orderedChunks.find(chunk->getChunkId()) == orderedChunks.end());
+ orderedChunks[chunk->getChunkId()] = std::move(chunk);
+ } else {
+ orderedChunks[std::numeric_limits<uint32_t>::max()] = ProcessedChunk::UP();
+ }
+ }
+}
+
+WriteableFileChunk::ProcessedChunkQ
+WriteableFileChunk::fetchNextChain(ProcessedChunkMap & orderedChunks, const uint32_t firstChunkId)
+{
+ ProcessedChunkQ chunks;
+ while (!orderedChunks.empty() &&
+ ((orderedChunks.begin()->first == (firstChunkId+chunks.size())) ||
+ (orderedChunks.begin()->second.get() == NULL)))
+ {
+ chunks.push_back(std::move(orderedChunks.begin()->second));
+ orderedChunks.erase(orderedChunks.begin());
+ }
+ return chunks;
+}
+
+ChunkMeta
+WriteableFileChunk::computeChunkMeta(const LockGuard & guard,
+ const GenerationHandler::Guard & bucketizerGuard,
+ size_t offset, const ProcessedChunk & tmp, const Chunk & active)
+{
+ (void) guard;
+ size_t dataLen = tmp.getBuf().getDataLen();
+ const ChunkMeta cmeta(offset, tmp.getPayLoad(), active.getLastSerial(), active.count());
+ assert((size_t(tmp.getBuf().getData())%_alignment) == 0);
+ assert((dataLen%_alignment) == 0);
+ PendingChunk::SP pcsp;
+ pcsp.reset(new PendingChunk(active.getLastSerial(), offset, dataLen));
+ PendingChunk &pc(*pcsp.get());
+ nbostream &os(pc.getSerializedIdx());
+ cmeta.serialize(os);
+ BucketDensityComputer bucketMap(_bucketizer);
+ for (const Chunk::Entry & e : active.getLids()) {
+ bucketMap.recordLid(bucketizerGuard, e.getLid(), e.netSize());
+ _bucketMap.recordLid(bucketizerGuard, e.getLid(), e.netSize());
+ LidMeta lm(e.getLid(), e.netSize());
+ lm.serialize(os);
+ }
+ addNumBuckets(bucketMap.getNumBuckets());
+ setNumUniqueBuckets(_bucketMap.getNumBuckets());
+
+ _pendingDat += pc.getDataLen();
+ _pendingIdx += pc.getIdxLen();
+ _pendingChunks.push_back(pcsp);
+ return cmeta;
+}
+
+ChunkMetaV
+WriteableFileChunk::computeChunkMeta(ProcessedChunkQ & chunks, size_t startPos, size_t & sz, bool & done)
+{
+ ChunkMetaV cmetaV;
+ cmetaV.reserve(chunks.size());
+ uint64_t lastSerial(_lastPersistedSerialNum);
+ LockGuard guard(_lock);
+
+ if (!_pendingChunks.empty()) {
+ const PendingChunk::SP pcsp(_pendingChunks.back());
+ const PendingChunk &pc(*pcsp.get());
+ assert(pc.getLastSerial() >= lastSerial);
+ lastSerial = pc.getLastSerial();
+ }
+
+ GenerationHandler::Guard bucketizerGuard = _bucketMap.getGuard();
+ for (size_t i(0), m(chunks.size()); i < m; i++) {
+ if (chunks[i].get() != 0) {
+ const ProcessedChunk & chunk = *chunks[i];
+ const ChunkMeta cmeta(computeChunkMeta(guard, bucketizerGuard, startPos + sz, chunk, *_chunkMap[chunk.getChunkId()]));
+ sz += chunk.getBuf().getDataLen();
+ cmetaV.push_back(cmeta);
+ assert(cmeta.getLastSerial() >= lastSerial);
+ lastSerial = cmeta.getLastSerial();
+ } else {
+ done = true;
+ assert((i+1) == chunks.size());
+ chunks.resize(i);
+ assert(i == chunks.size());
+ }
+ }
+ return cmetaV;
+}
+
+void
+WriteableFileChunk::writeData(const ProcessedChunkQ & chunks, size_t sz)
+{
+ vespalib::DataBuffer buf(0ul, _alignment);
+ buf.ensureFree(sz);
+ for (const ProcessedChunk::UP & chunk : chunks) {
+ buf.writeBytes(chunk->getBuf().getData(), chunk->getBuf().getDataLen());
+ }
+
+ LockGuard guard(_writeLock);
+ ssize_t wlen = _dataFile.Write2(buf.getData(), buf.getDataLen());
+ if (wlen != static_cast<ssize_t>(buf.getDataLen())) {
+ throw SummaryException(make_string("Failed writing %ld bytes to dat file. Only %ld written",
+ buf.getDataLen(), wlen),
+ _idxFile, VESPA_STRLOC);
+ }
+ updateCurrentDiskFootprint();
+}
+
+void
+WriteableFileChunk::updateChunkInfo(const ProcessedChunkQ & chunks, const ChunkMetaV & cmetaV, size_t sz)
+{
+ MonitorGuard guard(_lock);
+ size_t nettoSz(sz);
+ for (size_t i(0); i < chunks.size(); i++) {
+ const ProcessedChunk & chunk = *chunks[i];
+ assert(_chunkMap.find(chunk.getChunkId()) == _chunkMap.begin());
+ const Chunk & active = *_chunkMap.begin()->second;
+ if (active.getId() >= _chunkInfo.size()) {
+ _chunkInfo.resize(active.getId()+1);
+ }
+ const ChunkMeta & cmeta(cmetaV[i]);
+ _chunkInfo[active.getId()] = ChunkInfo(cmeta.getOffset(), chunk.getPayLoad(), cmeta.getLastSerial());
+ nettoSz += active.size();
+ _chunkMap.erase(_chunkMap.begin());
+ }
+ setDiskFootprint(FileChunk::getDiskFootprint() - nettoSz);
+ guard.broadcast();
+}
+
+void
+WriteableFileChunk::fileWriter(const uint32_t firstChunkId)
+{
+ LOG(debug, "Starting the filewriter with chunkid = %d", firstChunkId);
+ uint32_t nextChunkId(firstChunkId);
+ bool done(false);
+ {
+ ProcessedChunkQ newChunks(drainQ());
+ if ( ! newChunks.empty()) {
+ insertChunks(_orderedChunks, newChunks, nextChunkId);
+ ProcessedChunkQ chunks(fetchNextChain(_orderedChunks, nextChunkId));
+ nextChunkId += chunks.size();
+
+ size_t sz(0);
+ ChunkMetaV cmetaV(computeChunkMeta(chunks, getAlignedStartPos(_dataFile), sz, done));
+ writeData(chunks, sz);
+ updateChunkInfo(chunks, cmetaV, sz);
+ LOG(spam, "bucket spread = '%3.2f'", getBucketSpread());
+ }
+ }
+ LOG(debug,
+ "Stopping the filewriter with startchunkid = %d and ending chunkid = %d done=%d",
+ firstChunkId, nextChunkId, done);
+ if (done) {
+ MonitorGuard guard(_writeMonitor);
+ assert(_writeQ.empty());
+ assert(_chunkMap.empty());
+ for (const ChunkInfo & cm : _chunkInfo) {
+ assert(cm.valid() && cm.getSize() != 0);
+ }
+ _writeTaskIsRunning = false;
+ guard.broadcast();
+ } else {
+ MonitorGuard guard(_writeMonitor);
+ if (_writeQ.empty()) {
+ _firstChunkIdToBeWritten = nextChunkId;
+ _writeTaskIsRunning = false;
+ } else {
+ restart(guard, nextChunkId);
+ }
+ }
+}
+
+fastos::TimeStamp
+WriteableFileChunk::getModificationTime() const
+{
+ LockGuard guard(_lock);
+ return _modificationTime;
+}
+
+void
+WriteableFileChunk::freeze()
+{
+ if (!frozen()) {
+ waitForAllChunksFlushedToDisk();
+ enque(ProcessedChunk::UP());
+ _executor.sync();
+ {
+ MonitorGuard guard(_writeMonitor);
+ while (_writeTaskIsRunning) {
+ guard.wait(10);
+ }
+ assert(_writeQ.empty());
+ }
+ {
+ MonitorGuard guard(_lock);
+ setDiskFootprint(getDiskFootprint(guard));
+ _frozen = true;
+ }
+ _dataFile.Close();
+ _idxFile.Close();
+ _bucketMap = BucketDensityComputer(_bucketizer);
+ }
+}
+
+size_t
+WriteableFileChunk::getDiskFootprint() const
+{
+ if (frozen()) {
+ return FileChunk::getDiskFootprint();
+ } else {
+ // Double checked locking.
+ MonitorGuard guard(_lock);
+ return getDiskFootprint(guard);
+ }
+}
+
+size_t
+WriteableFileChunk::getDiskFootprint(const vespalib::MonitorGuard & guard) const
+{
+ assert(guard.monitors(_lock));
+ return frozen()
+ ? FileChunk::getDiskFootprint()
+ : _currentDiskFootprint + FileChunk::getDiskFootprint();
+}
+
+size_t
+WriteableFileChunk::getMemoryFootprint() const
+{
+ size_t sz(0);
+ LockGuard guard(_lock);
+ for (const auto & it : _chunkMap) {
+ sz += it.second->size();
+ }
+ sz += _pendingIdx + _pendingDat;
+ return sz + FileChunk::getMemoryFootprint();
+}
+
+size_t
+WriteableFileChunk::getMemoryMetaFootprint() const
+{
+ constexpr size_t mySizeWithoutMyParent(sizeof(*this) - sizeof(FileChunk));
+ return mySizeWithoutMyParent + FileChunk::getMemoryMetaFootprint();
+}
+
+int32_t WriteableFileChunk::flushLastIfNonEmpty(bool force)
+{
+ int32_t chunkId(-1);
+ MonitorGuard guard(_lock);
+ for (bool ready(false); !ready;) {
+ if (_chunkMap.size() > 1000) {
+ LOG(debug, "Summary write overload at least 1000 outstanding chunks. Suspending.");
+ guard.wait();
+ LOG(debug, "Summary write overload eased off. Commencing.");
+ } else {
+ ready = true;
+ }
+ }
+ if ( force || ! _active->empty()) {
+ chunkId = _active->getId();
+ _chunkMap[chunkId] = std::move(_active);
+ assert(_nextChunkId < LidInfo::getMaxChunkNum());
+ _active.reset(new Chunk(_nextChunkId++,
+ Chunk::Config(_config.getMaxChunkBytes(),
+ _config.getMaxChunkEntries())));
+ }
+ return chunkId;
+}
+
+void
+WriteableFileChunk::flush(bool block, uint64_t syncToken)
+{
+ int32_t chunkId = flushLastIfNonEmpty(syncToken > _serialNum);
+ if (chunkId >= 0) {
+ setSerialNum(syncToken);
+ _executor.execute(makeTask(makeClosure(this,
+ &WriteableFileChunk::internalFlush,
+ static_cast<uint32_t>(chunkId),
+ _serialNum)));
+ } else {
+ if (block) {
+ MonitorGuard guard(_lock);
+ if (!_chunkMap.empty()) {
+ chunkId = _chunkMap.rbegin()->first;
+ }
+ }
+ }
+ if (block) {
+ _executor.sync();
+ waitForChunkFlushedToDisk(chunkId);
+ }
+}
+
+void
+WriteableFileChunk::waitForDiskToCatchUpToNow() const
+{
+ int32_t chunkId(-1);
+ {
+ MonitorGuard guard(_lock);
+ if (!_chunkMap.empty()) {
+ chunkId = _chunkMap.rbegin()->first;
+ }
+ }
+ waitForChunkFlushedToDisk(chunkId);
+}
+
+void
+WriteableFileChunk::waitForChunkFlushedToDisk(uint32_t chunkId) const
+{
+ MonitorGuard guard(_lock);
+ while( _chunkMap.find(chunkId) != _chunkMap.end() ) {
+ guard.wait();
+ }
+}
+
+void
+WriteableFileChunk::waitForAllChunksFlushedToDisk() const
+{
+ MonitorGuard guard(_lock);
+ while( ! _chunkMap.empty() ) {
+ guard.wait();
+ }
+}
+
+LidInfo
+WriteableFileChunk::append(uint64_t serialNum,
+ uint32_t lid,
+ const void * buffer,
+ size_t len)
+{
+ assert( !frozen() );
+ if ( ! _active->hasRoom(len)) {
+ flush(false, _serialNum);
+ }
+ assert(serialNum >= _serialNum);
+ _serialNum = serialNum;
+ _addedBytes += adjustSize(len);
+ size_t oldSz(_active->size());
+ LidMeta lm = _active->append(lid, buffer, len);
+ setDiskFootprint(FileChunk::getDiskFootprint() - oldSz + _active->size());
+ return LidInfo(getFileId().getId(), _active->getId(), lm.size());
+}
+
+
+void
+WriteableFileChunk::readDataHeader(void)
+{
+ int64_t fSize(_dataFile.GetSize());
+ try {
+ FileHeader h;
+ _dataHeaderLen = h.readFile(_dataFile);
+ _dataFile.SetPosition(_dataHeaderLen);
+ } catch (IllegalHeaderException &e) {
+ _dataFile.SetPosition(0);
+ try {
+ FileHeader::FileReader fr(_dataFile);
+ uint32_t header2Len = FileHeader::readSize(fr);
+ if (header2Len <= fSize)
+ e.throwSelf(); // header not truncated
+ } catch (IllegalHeaderException &e2) {
+ }
+ if (fSize > 0) {
+ // Truncate file (dropping header) if cannot even read
+ // header length, or if header has been truncated.
+ _dataFile.SetPosition(0);
+ _dataFile.SetSize(0);
+ assert(_dataFile.GetSize() == 0);
+ assert(_dataFile.GetPosition() == 0);
+ LOG(warning,
+ "Truncated file chunk data %s due to truncated file header",
+ _dataFile.GetFileName());
+ }
+ }
+}
+
+
+void
+WriteableFileChunk::readIdxHeader(void)
+{
+ int64_t fSize(_idxFile.GetSize());
+ try {
+ FileHeader h;
+ _idxHeaderLen = h.readFile(_idxFile);
+ _idxFile.SetPosition(_idxHeaderLen);
+ } catch (IllegalHeaderException &e) {
+ _idxFile.SetPosition(0);
+ try {
+ FileHeader::FileReader fr(_idxFile);
+ uint32_t header2Len = FileHeader::readSize(fr);
+ if (header2Len <= fSize)
+ e.throwSelf(); // header not truncated
+ } catch (IllegalHeaderException &e2) {
+ }
+ if (fSize > 0) {
+ // Truncate file (dropping header) if cannot even read
+ // header length, or if header has been truncated.
+ _idxFile.SetPosition(0);
+ _idxFile.SetSize(0);
+ assert(_idxFile.GetSize() == 0);
+ assert(_idxFile.GetPosition() == 0);
+ LOG(warning,
+ "Truncated file chunk index %s due to truncated file header",
+ _idxFile.GetFileName());
+ }
+ }
+}
+
+
+void
+WriteableFileChunk::writeDataHeader(const FileHeaderContext &fileHeaderContext)
+{
+ typedef FileHeader::Tag Tag;
+ FileHeader h(headerAlign);
+ assert(_dataFile.IsOpened());
+ assert(_dataFile.IsWriteMode());
+ assert(_dataFile.GetPosition() == 0);
+ fileHeaderContext.addTags(h, _dataFile.GetFileName());
+ h.putTag(Tag("desc", "Log data store chunk data"));
+ _dataHeaderLen = h.writeFile(_dataFile);
+}
+
+
+uint64_t
+WriteableFileChunk::writeIdxHeader(const FileHeaderContext &fileHeaderContext, FastOS_FileInterface & file)
+{
+ typedef FileHeader::Tag Tag;
+ FileHeader h;
+ assert(file.IsOpened());
+ assert(file.IsWriteMode());
+ assert(file.GetPosition() == 0);
+ fileHeaderContext.addTags(h, file.GetFileName());
+ h.putTag(Tag("desc", "Log data store chunk index"));
+ return h.writeFile(file);
+}
+
+
+bool
+WriteableFileChunk::needFlushPendingChunks(uint64_t serialNum, uint64_t datFileLen) {
+ MonitorGuard guard(_lock);
+ return needFlushPendingChunks(guard, serialNum, datFileLen);
+}
+
+bool
+WriteableFileChunk::needFlushPendingChunks(const MonitorGuard & guard, uint64_t serialNum, uint64_t datFileLen)
+{
+ assert(guard.monitors(_lock));
+ if (_pendingChunks.empty())
+ return false;
+ const PendingChunk::SP pcsp(_pendingChunks.front());
+ const PendingChunk &pc(*pcsp.get());
+ if (pc.getLastSerial() > serialNum)
+ return false;
+ bool datWritten = datFileLen >= pc.getDataOffset() + pc.getDataLen();
+ if (pc.getLastSerial() < serialNum) {
+ assert(datWritten);
+ return true;
+ }
+ return datWritten;
+}
+
+void
+WriteableFileChunk::updateCurrentDiskFootprint() {
+ _currentDiskFootprint = _idxFile.getSize() + _dataFile.getSize();
+}
+
+/*
+ * Called by writeExecutor thread for now.
+ */
+void
+WriteableFileChunk::flushPendingChunks(uint64_t serialNum) {
+ LockGuard flushGuard(_flushLock);
+ if (frozen())
+ return;
+ uint64_t datFileLen = _dataFile.getSize();
+ fastos::TimeStamp timeStamp(fastos::ClockSystem::now());
+ if (needFlushPendingChunks(serialNum, datFileLen)) {
+ timeStamp = unconditionallyFlushPendingChunks(flushGuard, serialNum, datFileLen);
+ }
+ LockGuard guard(_lock);
+ _modificationTime = std::max(timeStamp, _modificationTime);
+}
+
+fastos::TimeStamp
+WriteableFileChunk::unconditionallyFlushPendingChunks(const vespalib::LockGuard &flushGuard, uint64_t serialNum, uint64_t datFileLen)
+{
+ assert(flushGuard.locks(_flushLock));
+ if ( ! _dataFile.Sync()) {
+ throw SummaryException("Failed fsync of dat file", _dataFile, VESPA_STRLOC);
+ }
+ nbostream os;
+ uint64_t lastSerial = 0;
+ {
+ MonitorGuard guard(_lock);
+ lastSerial = _lastPersistedSerialNum;
+ for (;;) {
+ if (!needFlushPendingChunks(guard, serialNum, datFileLen))
+ break;
+ PendingChunk::SP pcsp;
+ pcsp.swap(_pendingChunks.front());
+ _pendingChunks.pop_front();
+ const PendingChunk &pc(*pcsp.get());
+ assert(_pendingIdx >= pc.getIdxLen());
+ assert(_pendingDat >= pc.getDataLen());
+ assert(datFileLen >= pc.getDataOffset() + pc.getDataLen());
+ assert(lastSerial <= pc.getLastSerial());
+ _pendingIdx -= pc.getIdxLen();
+ _pendingDat -= pc.getDataLen();
+ lastSerial = pc.getLastSerial();
+ const nbostream &os2(pc.getSerializedIdx());
+ os.write(os2.c_str(), os2.size());
+ }
+ }
+ fastos::TimeStamp timeStamp(fastos::ClockSystem::now());
+ ssize_t wlen = _idxFile.Write2(os.c_str(), os.size());
+ updateCurrentDiskFootprint();
+
+ if (wlen != static_cast<ssize_t>(os.size())) {
+ throw SummaryException("Failed writing idx file", _idxFile, VESPA_STRLOC);
+ }
+ if ( ! _idxFile.Sync()) {
+ throw SummaryException("Failed fsync of idx file", _idxFile, VESPA_STRLOC);
+ }
+ if (_lastPersistedSerialNum < lastSerial) {
+ _lastPersistedSerialNum = lastSerial;
+ }
+ return timeStamp;
+}
+
+DataStoreFileChunkStats
+WriteableFileChunk::getStats() const
+{
+ DataStoreFileChunkStats stats = FileChunk::getStats();
+ uint64_t serialNum = getSerialNum();
+ return DataStoreFileChunkStats(stats.diskUsage(), stats.diskBloat(),
+ stats.maxBucketSpread(),
+ serialNum,
+ stats.lastFlushedSerialNum(),
+ stats.nameId());
+};
+
+WriteableFileChunk::PendingChunk::PendingChunk(uint64_t lastSerial,
+ uint64_t dataOffset,
+ uint32_t dataLen)
+ : _idx(),
+ _lastSerial(lastSerial),
+ _dataOffset(dataOffset),
+ _dataLen(dataLen)
+{
+}
+
+WriteableFileChunk::PendingChunk::~PendingChunk(void)
+{
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h
new file mode 100644
index 00000000000..97c6ad8d711
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h
@@ -0,0 +1,185 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/docstore/filechunk.h>
+#include <vespa/vespalib/util/threadstackexecutor.h>
+#include <vespa/searchlib/transactionlog/syncproxy.h>
+#include <map>
+#include <deque>
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+class WriteableFileChunk : public FileChunk
+{
+public:
+ class Config
+ {
+ public:
+ Config()
+ : _compression(document::CompressionConfig::LZ4, 9, 60),
+ _maxChunkBytes(0x10000),
+ _maxChunkEntries(256)
+ { }
+
+ Config(const document::CompressionConfig &compression,
+ size_t maxChunkBytes, size_t maxChunkEntries)
+ : _compression(compression),
+ _maxChunkBytes(maxChunkBytes),
+ _maxChunkEntries(maxChunkEntries)
+ { }
+
+ const document::CompressionConfig & getCompression() const { return _compression; }
+ size_t getMaxChunkBytes() const { return _maxChunkBytes; }
+ size_t getMaxChunkEntries() const { return _maxChunkEntries; }
+ private:
+ document::CompressionConfig _compression;
+ size_t _maxChunkBytes;
+ size_t _maxChunkEntries;
+ };
+
+public:
+ typedef std::unique_ptr<WriteableFileChunk> UP;
+ WriteableFileChunk(vespalib::ThreadStackExecutorBase & executor,
+ FileId fileId, NameId nameId,
+ const vespalib::string & baseName,
+ uint64_t initialSerialNum,
+ const Config & config,
+ const TuneFileSummary &tune,
+ const common::FileHeaderContext &fileHeaderContext,
+ const IBucketizer * bucketizer,
+ bool crcOnReadDisabled);
+ ~WriteableFileChunk();
+
+ ssize_t read(uint32_t lid, SubChunkId chunk, vespalib::DataBuffer & buffer) const override;
+ void read(LidInfoWithLidV::const_iterator begin, size_t count, IBufferVisitor & visitor) const override;
+
+ LidInfo append(uint64_t serialNum, uint32_t lid, const void * buffer, size_t len);
+ void flush(bool block, uint64_t syncToken);
+ uint64_t getSerialNum() const { return _serialNum; }
+ void setSerialNum(uint64_t serialNum) { _serialNum = std::max(_serialNum, serialNum); }
+
+ virtual fastos::TimeStamp getModificationTime() const override;
+ void freeze();
+ size_t getDiskFootprint() const override;
+ size_t getMemoryFootprint() const override;
+ size_t getMemoryMetaFootprint() const override;
+ size_t updateLidMap(ISetLid & lidMap, uint64_t serialNum) override;
+ void waitForDiskToCatchUpToNow() const;
+ void flushPendingChunks(uint64_t serialNum);
+ virtual DataStoreFileChunkStats getStats() const override;
+
+ static uint64_t writeIdxHeader(const common::FileHeaderContext &fileHeaderContext, FastOS_FileInterface & file);
+private:
+ class ProcessedChunk
+ {
+ public:
+ typedef std::unique_ptr<ProcessedChunk> UP;
+ ProcessedChunk(uint32_t chunkId, uint32_t alignment)
+ : _chunkId(chunkId),
+ _payLoad(0),
+ _buf(0ul, alignment)
+ { }
+ void setPayLoad() { _payLoad = _buf.getDataLen(); }
+ uint32_t getPayLoad() const { return _payLoad; }
+ uint32_t getChunkId() const { return _chunkId; }
+ const vespalib::DataBuffer & getBuf() const { return _buf; }
+ vespalib::DataBuffer & getBuf() { return _buf; }
+ private:
+ uint32_t _chunkId;
+ uint32_t _payLoad;
+ vespalib::DataBuffer _buf;
+ };
+ typedef std::map<uint32_t, ProcessedChunk::UP> ProcessedChunkMap;
+
+ typedef std::vector<ProcessedChunk::UP> ProcessedChunkQ;
+
+ /*
+ * Information about serialized chunk written to .dat file but not yet
+ * synced.
+ */
+ class PendingChunk
+ {
+ vespalib::nbostream _idx; // Serialized chunk for .idx file
+ uint64_t _lastSerial;
+ uint64_t _dataOffset;
+ uint32_t _dataLen;
+ public:
+ typedef std::shared_ptr<PendingChunk> SP;
+ PendingChunk(uint64_t lastSerial, uint64_t dataOffset, uint32_t dataLen);
+ ~PendingChunk(void);
+ vespalib::nbostream & getSerializedIdx(void) { return _idx; }
+ const vespalib::nbostream & getSerializedIdx(void) const { return _idx; }
+ uint64_t getDataOffset(void) const { return _dataOffset; }
+ uint32_t getDataLen(void) const { return _dataLen; }
+ uint32_t getIdxLen(void) const { return _idx.size(); }
+ uint64_t getLastSerial(void) const { return _lastSerial; }
+ };
+
+ bool frozen() const override { return _frozen; }
+ void waitForChunkFlushedToDisk(uint32_t chunkId) const;
+ void waitForAllChunksFlushedToDisk() const;
+ void fileWriter(const uint32_t firstChunkId);
+ void internalFlush(uint32_t, uint64_t serialNum);
+ void enque(ProcessedChunk::UP);
+ int32_t flushLastIfNonEmpty(bool force);
+ void restart(const vespalib::MonitorGuard & guard, uint32_t nextChunkId);
+ ProcessedChunkQ drainQ();
+ void readDataHeader(void);
+ void readIdxHeader(void);
+ void writeDataHeader(const common::FileHeaderContext &fileHeaderContext);
+ bool needFlushPendingChunks(uint64_t serialNum, uint64_t datFileLen);
+ bool needFlushPendingChunks(const vespalib::MonitorGuard & guard, uint64_t serialNum, uint64_t datFileLen);
+ fastos::TimeStamp unconditionallyFlushPendingChunks(const vespalib::LockGuard & flushGuard, uint64_t serialNum, uint64_t datFileLen);
+ static void insertChunks(ProcessedChunkMap & orderedChunks, ProcessedChunkQ & newChunks, const uint32_t nextChunkId);
+ static ProcessedChunkQ fetchNextChain(ProcessedChunkMap & orderedChunks, const uint32_t firstChunkId);
+ size_t computeDataLen(const ProcessedChunk & tmp, const Chunk & active);
+ ChunkMeta computeChunkMeta(const vespalib::LockGuard & guard,
+ const vespalib::GenerationHandler::Guard & bucketizerGuard,
+ size_t offset, const ProcessedChunk & tmp, const Chunk & active);
+ ChunkMetaV computeChunkMeta(ProcessedChunkQ & chunks, size_t startPos, size_t & sz, bool & done);
+ void writeData(const ProcessedChunkQ & chunks, size_t sz);
+ void updateChunkInfo(const ProcessedChunkQ & chunks, const ChunkMetaV & cmetaV, size_t sz);
+ void updateCurrentDiskFootprint();
+ size_t getDiskFootprint(const vespalib::MonitorGuard & guard) const;
+
+ Config _config;
+ SerialNum _serialNum;
+ bool _frozen;
+ // Lock order is _writeLock, _flushLock, _lock
+ vespalib::Monitor _lock;
+ vespalib::Lock _writeLock;
+ vespalib::Lock _flushLock;
+ FastOS_File _dataFile;
+ FastOS_File _idxFile;
+ typedef std::map<uint32_t, Chunk::UP> ChunkMap;
+ ChunkMap _chunkMap;
+ typedef std::deque<PendingChunk::SP> PendingChunks;
+ PendingChunks _pendingChunks;
+ uint64_t _pendingIdx;
+ uint64_t _pendingDat;
+ uint64_t _currentDiskFootprint;
+ uint32_t _nextChunkId;
+ Chunk::UP _active;
+ size_t _alignment;
+ size_t _granularity;
+ size_t _maxChunkSize;
+ uint32_t _firstChunkIdToBeWritten;
+ bool _writeTaskIsRunning;
+ vespalib::Monitor _writeMonitor;
+ ProcessedChunkQ _writeQ;
+ vespalib::ThreadStackExecutorBase & _executor;
+ ProcessedChunkMap _orderedChunks;
+ BucketDensityComputer _bucketMap;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/engine/.gitignore b/searchlib/src/vespa/searchlib/engine/.gitignore
new file mode 100644
index 00000000000..583460ae288
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/.gitignore
@@ -0,0 +1,3 @@
+*.So
+.depend
+Makefile
diff --git a/searchlib/src/vespa/searchlib/engine/CMakeLists.txt b/searchlib/src/vespa/searchlib/engine/CMakeLists.txt
new file mode 100644
index 00000000000..06c1c2db0ec
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_engine OBJECT
+ SOURCES
+ docsumapi.cpp
+ docsumreply.cpp
+ docsumrequest.cpp
+ errorcodes.cpp
+ monitorreply.cpp
+ monitorrequest.cpp
+ packetconverter.cpp
+ propertiesmap.cpp
+ request.cpp
+ searchreply.cpp
+ searchrequest.cpp
+ source_description.cpp
+ transport_metrics.cpp
+ transportserver.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/engine/OWNERS b/searchlib/src/vespa/searchlib/engine/OWNERS
new file mode 100644
index 00000000000..12b533ec610
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/OWNERS
@@ -0,0 +1 @@
+havardpe
diff --git a/searchlib/src/vespa/searchlib/engine/create-class-cpp.sh b/searchlib/src/vespa/searchlib/engine/create-class-cpp.sh
new file mode 100755
index 00000000000..1f638bf0cb6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/create-class-cpp.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+class=$1
+guard=`echo $class | tr 'a-z' 'A-Z'`
+name=`echo $class | tr 'A-Z' 'a-z'`
+
+cat <<EOF
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/log/log.h>
+LOG_SETUP(".engine.$name");
+#include <vespa/fastos/fastos.h>
+#include "$name.h"
+
+namespace search {
+namespace engine {
+
+$class::$class()
+{
+}
+
+$class::~$class()
+{
+}
+
+} // namespace engine
+} // namespace search
+EOF
diff --git a/searchlib/src/vespa/searchlib/engine/create-class-h.sh b/searchlib/src/vespa/searchlib/engine/create-class-h.sh
new file mode 100644
index 00000000000..703d61207b3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/create-class-h.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+class=$1
+guard=`echo $class | tr 'a-z' 'A-Z'`
+
+cat <<EOF
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+namespace search {
+namespace engine {
+
+class $class
+{
+private:
+ $class(const $class &);
+ $class &operator=(const $class &);
+public:
+ $class();
+ virtual ~$class();
+};
+
+} // namespace engine
+} // namespace search
+
+EOF
diff --git a/searchlib/src/vespa/searchlib/engine/create-interface.sh b/searchlib/src/vespa/searchlib/engine/create-interface.sh
new file mode 100644
index 00000000000..a59c924f9bd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/create-interface.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+class=$1
+guard=`echo $class | tr 'a-z' 'A-Z'`
+
+cat <<EOF
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+namespace search {
+namespace engine {
+
+class $class
+{
+public:
+ virtual ~$class() {}
+};
+
+} // namespace engine
+} // namespace search
+
+EOF
diff --git a/searchlib/src/vespa/searchlib/engine/docsumapi.cpp b/searchlib/src/vespa/searchlib/engine/docsumapi.cpp
new file mode 100644
index 00000000000..2662a4d5bf9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/docsumapi.cpp
@@ -0,0 +1,17 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "docsumapi.h"
+#include <vespa/vespalib/util/sync.h>
+
+namespace search {
+namespace engine {
+
+DocsumReply::UP
+DocsumServer::getDocsums(DocsumRequest::UP request)
+{
+ (void) request;
+ assert(false);
+ return DocsumReply::UP();
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/engine/docsumapi.h b/searchlib/src/vespa/searchlib/engine/docsumapi.h
new file mode 100644
index 00000000000..39c5acd07e3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/docsumapi.h
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "docsumrequest.h"
+#include "docsumreply.h"
+
+namespace search {
+namespace engine {
+
+/**
+ * A docsum client is the object being notified of the completion of
+ * an asynchronous docsum operation.
+ **/
+class DocsumClient
+{
+public:
+ /**
+ * Invoked by the docsum server to indicate the completion of an
+ * asynchronous docsum operation.
+ *
+ * @param reply the docsum reply
+ **/
+ virtual void getDocsumsDone(DocsumReply::UP reply) = 0;
+
+ /**
+ * Empty, needed for subclassing
+ **/
+ virtual ~DocsumClient() {}
+};
+
+/**
+ * A docsum server is an object capable of performing a docsum
+ * operation.
+ **/
+class DocsumServer
+{
+public:
+ /**
+ * Initiate a docsum operation that can be completed either
+ * synchronously or asynchronously. The return value will indicate
+ * whether the server selected to perform the operation
+ * synchronously or asynchronously. If the return value contains
+ * an object, then the operation completed synchronously and no
+ * further action will be taken by the server. If the return value
+ * did not contain an object, the operation will continue
+ * asynchronously, and the given client will be notified when the
+ * operation is completed. The server is not allowed to signal an
+ * asynchronous completion of the operation in the context of this
+ * method invocation.
+ *
+ * @return actual return value if sync, 'null' if async
+ * @param request object containing request parameters.
+ * Note that it is decoded lazily -> upon access.
+ * @param client the client to be notified of async completion
+ **/
+
+ virtual DocsumReply::UP getDocsums(DocsumRequest::Source request, DocsumClient &client) = 0;
+ /**
+ * As above but synchronous.
+ * @param request object containing request parameters.
+ * @return the response.
+ **/
+ virtual DocsumReply::UP getDocsums(DocsumRequest::UP request);
+
+ /**
+ * Empty, needed for subclassing
+ **/
+ virtual ~DocsumServer() {}
+};
+
+} // namespace engine
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/engine/docsumreply.cpp b/searchlib/src/vespa/searchlib/engine/docsumreply.cpp
new file mode 100644
index 00000000000..3fb21abc959
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/docsumreply.cpp
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".engine.docsumreply");
+#include "docsumreply.h"
+#include "tracereply.h"
+
+namespace search {
+namespace engine {
+
+DocsumReply::DocsumReply() : DocsumReply(vespalib::Slime::UP(nullptr)) { }
+
+DocsumReply::DocsumReply(vespalib::Slime::UP root)
+ : docsums(),
+ _root(std::move(root))
+{
+}
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/docsumreply.h b/searchlib/src/vespa/searchlib/engine/docsumreply.h
new file mode 100644
index 00000000000..4a751069bd8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/docsumreply.h
@@ -0,0 +1,47 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+#include <vespa/document/base/globalid.h>
+#include <vespa/vespalib/util/memory.h>
+#include <memory>
+#include <vespa/searchlib/engine/docsumrequest.h>
+#include "tracereply.h"
+
+namespace search {
+namespace engine {
+
+struct DocsumReply
+{
+ typedef std::unique_ptr<DocsumReply> UP;
+
+ typedef vespalib::MallocPtr Blob;
+
+ struct Docsum {
+ uint32_t docid;
+ document::GlobalId gid;
+ Blob data;
+
+ Docsum() : docid(0), gid(), data(0) {}
+ Docsum(document::GlobalId gid_) : docid(0), gid(gid_), data(0) { }
+ Docsum(document::GlobalId gid_, const char *buf, uint32_t len) : docid(0), gid(gid_), data(len) {
+ memcpy(data.str(), buf, len);
+ }
+ Docsum & setData(const char *buf, uint32_t len) {
+ data.resize(len);
+ memcpy(data.str(), buf, len);
+ return *this;
+ }
+ };
+ std::vector<Docsum> docsums;
+
+ mutable DocsumRequest::UP request;
+ vespalib::Slime::UP _root;
+
+ DocsumReply();
+ DocsumReply(vespalib::Slime::UP root);
+};
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/docsumrequest.cpp b/searchlib/src/vespa/searchlib/engine/docsumrequest.cpp
new file mode 100644
index 00000000000..80c4202f013
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/docsumrequest.cpp
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".engine.docsumrequest");
+#include "docsumrequest.h"
+#include "packetconverter.h"
+
+namespace search {
+namespace engine {
+
+DocsumRequest::DocsumRequest()
+ : DocsumRequest(false)
+{
+}
+
+DocsumRequest::DocsumRequest(bool useRootSlime_)
+ : _flags(0u),
+ resultClassName(),
+ useWideHits(false),
+ _useRootSlime(useRootSlime_),
+ hits()
+{
+}
+
+
+void DocsumRequest::Source::lazyDecode() const
+{
+ if ((_request.get() == NULL) && (_fs4Packet != NULL)) {
+ _request.reset(new DocsumRequest());
+ PacketConverter::toDocsumRequest(*_fs4Packet, *_request);
+ _fs4Packet->Free();
+ _fs4Packet = NULL;
+ }
+}
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/docsumrequest.h b/searchlib/src/vespa/searchlib/engine/docsumrequest.h
new file mode 100644
index 00000000000..c7b613f203d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/docsumrequest.h
@@ -0,0 +1,99 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/document/base/globalid.h>
+#include <vespa/searchlib/common/hitrank.h>
+#include "propertiesmap.h"
+#include "request.h"
+#include "source_description.h"
+#include <vespa/searchlib/common/packets.h>
+#include <vespa/vespalib/data/slime/slime.h>
+
+namespace search {
+namespace engine {
+
+class DocsumRequest : public Request
+{
+public:
+ typedef fs4transport::FS4Packet_GETDOCSUMSX FS4Packet_GETDOCSUMSX;
+
+ typedef std::unique_ptr<DocsumRequest> UP;
+ typedef std::shared_ptr<DocsumRequest> SP;
+
+ class Source {
+ private:
+ mutable DocsumRequest::UP _request;
+ mutable FS4Packet_GETDOCSUMSX *_fs4Packet;
+ void lazyDecode() const;
+ const SourceDescription _desc;
+ public:
+
+ Source(DocsumRequest * request) : _request(request), _fs4Packet(NULL), _desc(0) {}
+ Source(DocsumRequest::UP request) : _request(std::move(request)), _fs4Packet(NULL), _desc(0) {}
+ Source(FS4Packet_GETDOCSUMSX *query, SourceDescription desc) : _request(), _fs4Packet(query), _desc(desc) { }
+
+ Source(Source && rhs)
+ : _request(std::move(rhs._request)),
+ _fs4Packet(rhs._fs4Packet),
+ _desc(std::move(rhs._desc))
+ {
+ rhs._fs4Packet = NULL;
+ }
+
+ ~Source() {
+ if (_fs4Packet != NULL) {
+ _fs4Packet->Free();
+ }
+ }
+
+ const DocsumRequest * operator -> () const { return get(); }
+
+ const DocsumRequest * get() const {
+ lazyDecode();
+ return _request.get();
+ }
+
+ Source& operator= (Source && rhs) = delete;
+ Source & operator= (const Source &) = delete;
+ Source(const Source &) = delete;
+
+ UP release() {
+ lazyDecode();
+ return std::move(_request);
+ }
+ };
+
+ class Hit
+ {
+ public:
+ Hit() : gid(), docid(0), path(0) {}
+ Hit(const document::GlobalId & gid_) : gid(gid_), docid(0), path(0) {}
+
+ document::GlobalId gid;
+ mutable uint32_t docid; // converted in backend
+ uint32_t path; // wide
+ };
+
+public:
+ uint32_t _flags;
+ vespalib::string resultClassName;
+ bool useWideHits;
+private:
+ const bool _useRootSlime;
+public:
+ std::vector<Hit> hits;
+ std::vector<char> sessionId;
+
+ DocsumRequest();
+ explicit DocsumRequest(bool useRootSlime_);
+
+ const vespalib::stringref getStackRef() const {
+ return vespalib::stringref(&stackDump[0], stackDump.size());
+ }
+ bool useRootSlime() const { return _useRootSlime; }
+};
+
+} // namespace engine
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/engine/errorcodes.cpp b/searchlib/src/vespa/searchlib/engine/errorcodes.cpp
new file mode 100644
index 00000000000..beeff3f86c2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/errorcodes.cpp
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include "errorcodes.h"
+
+namespace search {
+namespace engine {
+
+const char *
+getStringFromErrorCode(ErrorCode ecode)
+{
+ switch (ecode) {
+ case ECODE_NO_ERROR:
+ return "No error has occurred";
+ case ECODE_GENERAL_ERROR:
+ return "General error";
+ case ECODE_QUERY_PARSE_ERROR:
+ return "Error parsing query";
+ case ECODE_ALL_PARTITIONS_DOWN:
+ return "All searchnodes are down. This might indicate that no index is available yet.";
+ case ECODE_ILLEGAL_DATASET:
+ return "No such dataset";
+ case ECODE_OVERLOADED:
+ return "System is overloaded";
+ case ECODE_NOT_IMPLEMENTED:
+ return "The requested functionality is not implemented";
+ case ECODE_QUERY_NOT_ALLOWED:
+ return "Query not allowed to run";
+ case ECODE_TIMEOUT:
+ return "Query timed out";
+ }
+ return "Unknown error";
+}
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/errorcodes.h b/searchlib/src/vespa/searchlib/engine/errorcodes.h
new file mode 100644
index 00000000000..bd5fe04b4e7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/errorcodes.h
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1999-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+namespace search {
+namespace engine {
+
+/**
+ * Enum defining global error codes.
+ * Used in error_code field in search::fs4transport::PCODE_ERROR packets.
+ **/
+enum ErrorCode {
+ ECODE_NO_ERROR = 0,
+ ECODE_GENERAL_ERROR = 1,
+ ECODE_QUERY_PARSE_ERROR = 2,
+ ECODE_ALL_PARTITIONS_DOWN = 3,
+ ECODE_ILLEGAL_DATASET = 4,
+ ECODE_OVERLOADED = 5,
+ ECODE_NOT_IMPLEMENTED = 6,
+ ECODE_QUERY_NOT_ALLOWED = 7,
+ ECODE_TIMEOUT = 8
+};
+
+/**
+ * Normally error codes should be accompanied by an error message
+ * describing the error. If no such message is present, this method
+ * may be used to obtain the default description of an error code.
+ *
+ * @param error the error code we want info about.
+ * @return the default error message for the given error code.
+ **/
+const char* getStringFromErrorCode(ErrorCode error);
+
+} // namespace engine
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/engine/monitorapi.h b/searchlib/src/vespa/searchlib/engine/monitorapi.h
new file mode 100644
index 00000000000..20c0bdb9caa
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/monitorapi.h
@@ -0,0 +1,66 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "monitorrequest.h"
+#include "monitorreply.h"
+
+namespace search {
+namespace engine {
+
+/**
+ * A monitor client is the object being notified of the completion of
+ * an asynchronous monitor operation.
+ **/
+class MonitorClient
+{
+public:
+ /**
+ * Invoked by the monitor server to indicate the completion of an
+ * asynchronous monitor operation.
+ *
+ * @param reply the monitor reply
+ **/
+ virtual void pingDone(MonitorReply::UP reply) = 0;
+
+ /**
+ * Empty, needed for subclassing
+ **/
+ virtual ~MonitorClient() {}
+};
+
+/**
+ * A monitor server is an object capable of performing a monitor
+ * operation.
+ **/
+class MonitorServer
+{
+public:
+ /**
+ * Initiate a monitor operation that can be completed either
+ * synchronously or asynchronously. The return value will indicate
+ * whether the server selected to perform the operation
+ * synchronously or asynchronously. If the return value contains
+ * an object, then the operation completed synchronously and no
+ * further action will be taken by the server. If the return value
+ * did not contain an object, the operation will continue
+ * asynchronously, and the given client will be notified when the
+ * operation is completed. The server is not allowed to signal an
+ * asynchronous completion of the operation in the context of this
+ * method invocation.
+ *
+ * @return actual return value if sync, 'null' if async
+ * @param request object containing request parameters
+ * @param client the client to be notified of async completion
+ **/
+ virtual MonitorReply::UP ping(MonitorRequest::UP request, MonitorClient &client) = 0;
+
+ /**
+ * Empty, needed for subclassing
+ **/
+ virtual ~MonitorServer() {}
+};
+
+} // namespace engine
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/engine/monitorreply.cpp b/searchlib/src/vespa/searchlib/engine/monitorreply.cpp
new file mode 100644
index 00000000000..576f8e9bf39
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/monitorreply.cpp
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".engine.monitorreply");
+#include "monitorreply.h"
+
+namespace search {
+namespace engine {
+
+MonitorReply::MonitorReply()
+ : mld(),
+ activeDocsRequested(false),
+ partid(),
+ timestamp(),
+ totalNodes(),
+ activeNodes(),
+ totalParts(),
+ activeParts(),
+ activeDocs(0),
+ flags()
+{
+}
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/monitorreply.h b/searchlib/src/vespa/searchlib/engine/monitorreply.h
new file mode 100644
index 00000000000..ff4d64ff941
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/monitorreply.h
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <vector>
+#include <memory>
+
+namespace search {
+namespace engine {
+
+struct MonitorReply
+{
+ typedef std::unique_ptr<MonitorReply> UP;
+
+ bool mld;
+ bool activeDocsRequested;
+ uint32_t partid;
+ uint32_t timestamp;
+ uint32_t totalNodes; // mld
+ uint32_t activeNodes; // mld
+ uint32_t totalParts; // mld
+ uint32_t activeParts; // mld
+ uint64_t activeDocs;
+ uint32_t flags;
+
+ MonitorReply();
+};
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/monitorrequest.cpp b/searchlib/src/vespa/searchlib/engine/monitorrequest.cpp
new file mode 100644
index 00000000000..39d97830443
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/monitorrequest.cpp
@@ -0,0 +1,17 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".engine.monitorrequest");
+#include "monitorrequest.h"
+
+namespace search {
+namespace engine {
+
+MonitorRequest::MonitorRequest()
+ : reportActiveDocs(false), flags(0)
+{
+}
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/monitorrequest.h b/searchlib/src/vespa/searchlib/engine/monitorrequest.h
new file mode 100644
index 00000000000..1f99180a754
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/monitorrequest.h
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+
+namespace search {
+namespace engine {
+
+struct MonitorRequest
+{
+ typedef std::shared_ptr<MonitorRequest> SP;
+ typedef std::unique_ptr<MonitorRequest> UP;
+
+ bool reportActiveDocs;
+ uint32_t flags;
+
+ MonitorRequest();
+};
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/packetconverter.cpp b/searchlib/src/vespa/searchlib/engine/packetconverter.cpp
new file mode 100644
index 00000000000..9282c7cc820
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/packetconverter.cpp
@@ -0,0 +1,261 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".engine.packetconverter");
+#include "packetconverter.h"
+
+namespace {
+
+bool checkFeature(uint32_t features, uint32_t mask) {
+ return ((features & mask) != 0);
+}
+
+struct FS4PropertiesBuilder : public search::fef::IPropertiesVisitor {
+ uint32_t idx;
+ search::fs4transport::FS4Properties &props;
+ FS4PropertiesBuilder(search::fs4transport::FS4Properties &p) : idx(0), props(p) {}
+ virtual void visitProperty(const search::fef::Property::Value &key,
+ const search::fef::Property &values)
+ {
+ for (uint32_t i = 0; i < values.size(); ++i) {
+ props.setKey(idx, key.data(), key.size());
+ props.setValue(idx, values.getAt(i).data(), values.getAt(i).size());
+ ++idx;
+ }
+ }
+};
+
+} // namespace <unnamed>
+
+namespace search {
+namespace engine {
+
+using namespace search::fs4transport;
+
+void
+PacketConverter::fillPacketProperties(const PropertiesMap &source, PropsVector& target)
+{
+ target.resize(source.size());
+ PropertiesMap::ITR itr = source.begin();
+ PropertiesMap::ITR end = source.end();
+ for (uint32_t i = 0; itr != end; ++itr, ++i) {
+ const vespalib::string &name = itr->first;
+ const search::fef::Properties &values = itr->second;
+ target[i].setName(name.c_str(), name.size());
+ target[i].allocEntries(values.numValues());
+ FS4PropertiesBuilder builder(target[i]);
+ values.visitProperties(builder);
+ LOG_ASSERT(builder.idx == target[i].size());
+ LOG_ASSERT(builder.idx == values.numValues());
+ }
+}
+
+void
+PacketConverter::toSearchRequest(const QUERYX &packet, SearchRequest &request)
+{
+ request.offset = packet._offset;
+ request.maxhits = packet._maxhits;
+ request.setTimeout(packet.getTimeout());
+ request.queryFlags = packet._qflags;
+ request.ranking = packet._ranking;
+
+ for (uint32_t i = 0; i < packet._propsVector.size(); ++i) {
+ const FS4Properties &src = packet._propsVector[i];
+ search::fef::Properties &dst = request.propertiesMap.lookupCreate(src.getName());
+ for (uint32_t e = 0; e < src.size(); ++e) {
+ dst.add(vespalib::stringref(src.getKey(e), src.getKeyLen(e)),
+ vespalib::stringref(src.getValue(e), src.getValueLen(e)));
+ }
+ }
+ request.sortSpec = packet._sortSpec;
+ request.groupSpec.assign( packet._groupSpec.begin(), packet._groupSpec.end());
+ request.sessionId.assign( packet._sessionId.begin(), packet._sessionId.end());
+ request.location = packet._location;
+ request.stackItems = packet._numStackItems;
+ request.stackDump.assign( packet._stackDump.begin(), packet._stackDump.end());
+}
+
+void
+PacketConverter::fromSearchRequest(const SearchRequest &request, QUERYX &packet)
+{
+ // not needed yet
+ (void) packet;
+ (void) request;
+ LOG_ABORT("not implemented");
+}
+
+void
+PacketConverter::toSearchReply(const QUERYRESULTX &packet, SearchReply &reply)
+{
+ // not needed yet
+ (void) packet;
+ (void) reply;
+ LOG_ABORT("not implemented");
+}
+
+void
+PacketConverter::fromSearchReply(const SearchReply &reply, QUERYRESULTX &packet)
+{
+ packet._offset = reply.offset;
+ packet._numDocs = reply.hits.size();
+ packet._totNumDocs = reply.totalHitCount;
+ packet._maxRank = reply.maxRank;
+ packet.setDistributionKey(reply.getDistributionKey());
+ if (reply.sortIndex.size() > 0) {
+ packet._features |= QRF_SORTDATA;
+ uint32_t idxCnt = reply.sortIndex.size();
+ LOG_ASSERT(reply.sortIndex.size() == reply.hits.size()+1);
+ // allocate for N hits (will make space for N+1 indexes)
+ packet.AllocateSortIndex(reply.hits.size());
+ packet.AllocateSortData(reply.sortData.size());
+ for (uint32_t i = 0; i < idxCnt; ++i) {
+ packet._sortIndex[i] = reply.sortIndex[i];
+ }
+ memcpy(packet._sortData, &(reply.sortData[0]), reply.sortData.size());
+ }
+ if (reply.groupResult.size() > 0) {
+ packet._features |= QRF_GROUPDATA;
+ packet.AllocateGroupData(reply.groupResult.size());
+ memcpy(packet._groupData, &(reply.groupResult[0]), reply.groupResult.size());
+ }
+ if (reply.useCoverage) {
+ packet._features |= QRF_COVERAGE;
+ packet._coverageDocs = reply.coverage.getCovered();
+ packet._activeDocs = reply.coverage.getActive();
+ }
+ if (reply.useWideHits) {
+ packet._features |= QRF_MLD;
+ }
+ if (reply.propertiesMap.size() > 0) {
+ fillPacketProperties(reply.propertiesMap, packet._propsVector);
+ packet._features |= QRF_PROPERTIES;
+ }
+ uint32_t hitCnt = reply.hits.size();
+ packet.AllocateHits(hitCnt);
+ for (uint32_t i = 0; i < hitCnt; ++i) {
+ packet._hits[i]._gid = reply.hits[i].gid;
+ packet._hits[i]._metric = reply.hits[i].metric;
+ packet._hits[i]._partid = reply.hits[i].path;
+ packet._hits[i].setDistributionKey(reply.hits[i].getDistributionKey());
+ }
+}
+
+void
+PacketConverter::toDocsumRequest(const GETDOCSUMSX &packet, DocsumRequest &request)
+{
+ request.setTimeout(packet.getTimeout());
+ request.ranking = packet._ranking;
+ request.queryFlags = packet._qflags;
+ request.resultClassName = packet._resultClassName;
+ for (uint32_t i = 0; i < packet._propsVector.size(); ++i) {
+ const FS4Properties &src = packet._propsVector[i];
+ search::fef::Properties &dst = request.propertiesMap.lookupCreate(src.getName());
+ for (uint32_t e = 0; e < src.size(); ++e) {
+ dst.add(vespalib::stringref(src.getKey(e), src.getKeyLen(e)),
+ vespalib::stringref(src.getValue(e), src.getValueLen(e)));
+ }
+ }
+ request.stackItems = packet._stackItems;
+ request.stackDump.assign(packet._stackDump.begin(), packet._stackDump.end());
+ request.location = packet._location;
+ request._flags = packet._flags;
+ request.useWideHits = checkFeature(packet._features, GDF_MLD);
+ uint32_t hitCnt = packet._docidCnt;
+ request.hits.resize(hitCnt);
+ for (uint32_t i = 0; i < hitCnt; ++i) {
+ request.hits[i].gid = packet._docid[i]._gid;
+ request.hits[i].path = packet._docid[i]._partid;
+ }
+ search::fef::Property sessionId =
+ request.propertiesMap.rankProperties().lookup("sessionId");
+ if (sessionId.found()) {
+ vespalib::string id = sessionId.get();
+ request.sessionId.assign(id.begin(), id.end());
+ }
+}
+
+void
+PacketConverter::fromDocsumRequest(const DocsumRequest &request, GETDOCSUMSX &packet)
+{
+ // not needed yet
+ (void) packet;
+ (void) request;
+ LOG_ABORT("not implemented");
+}
+
+void
+PacketConverter::toDocsumReplyElement(const DOCSUM &packet, DocsumReply::Docsum &docsum)
+{
+ // not needed yet
+ (void) packet;
+ (void) docsum;
+ LOG_ABORT("not implemented");
+}
+
+void
+PacketConverter::fromDocsumReplyElement(const DocsumReply::Docsum &docsum, DOCSUM &packet)
+{
+ if (docsum.data.get() != 0) {
+ packet.SetBuf(docsum.data.c_str(), docsum.data.size());
+ }
+ packet.setGid(docsum.gid);
+}
+
+void
+PacketConverter::toMonitorRequest(const MONITORQUERYX &packet, MonitorRequest &request)
+{
+ request.flags = packet._qflags;
+ if ((packet._qflags & MQFLAG_REPORT_ACTIVEDOCS) != 0) {
+ request.reportActiveDocs = true;
+ }
+}
+
+void
+PacketConverter::fromMonitorRequest(const MonitorRequest &request, MONITORQUERYX &packet)
+{
+ // not needed yet
+ (void) packet;
+ (void) request;
+ LOG_ABORT("not implemented");
+}
+
+void
+PacketConverter::toMonitorReply(const MONITORRESULTX &packet, MonitorReply &reply)
+{
+ // not needed yet
+ (void) packet;
+ (void) reply;
+ LOG_ABORT("not implemented");
+}
+
+void
+PacketConverter::fromMonitorReply(const MonitorReply &reply, MONITORRESULTX &packet)
+{
+ if (reply.mld) {
+ packet._features |= MRF_MLD;
+ }
+ if (reply.activeDocsRequested) {
+ packet._features |= MRF_ACTIVEDOCS;
+ packet._activeDocs = reply.activeDocs;
+ }
+ packet._partid = reply.partid;
+ packet._timestamp = reply.timestamp;
+ packet._totalNodes = reply.totalNodes;
+ packet._activeNodes = reply.activeNodes;
+ packet._totalParts = reply.totalParts;
+ packet._activeParts = reply.activeParts;
+ packet._rflags = reply.flags;
+ if (packet._rflags != 0) {
+ packet._features |= MRF_RFLAGS;
+ }
+}
+
+void
+PacketConverter::fromTraceReply(const TraceReply &reply, TRACEREPLY &packet)
+{
+ fillPacketProperties(reply.propertiesMap, packet._propsVector);
+}
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/packetconverter.h b/searchlib/src/vespa/searchlib/engine/packetconverter.h
new file mode 100644
index 00000000000..f58bf6a9642
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/packetconverter.h
@@ -0,0 +1,177 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/common/packets.h>
+#include "searchrequest.h"
+#include "searchreply.h"
+#include "docsumrequest.h"
+#include "docsumreply.h"
+#include "monitorrequest.h"
+#include "monitorreply.h"
+#include "tracereply.h"
+
+namespace search {
+namespace engine {
+
+
+/**
+ * This class helps convert data back and forth between transport
+ * packets and engine api request/reply objects. All converting
+ * methods expect the const object to be fully filled out and the
+ * non-const object to be newly created and thus empty. Half of the
+ * methods are left unimplemented for now as they would only be needed
+ * if we also were to use the api to wrap remote engines. However, if
+ * such a time comes, we will probably not be using the packet
+ * protocol anymore anyways.
+ **/
+class PacketConverter
+{
+private:
+ PacketConverter(); // can not be instantiated
+ PacketConverter(const PacketConverter &);
+ PacketConverter &operator=(const PacketConverter &);
+
+public:
+ typedef search::fs4transport::FS4Packet_QUERYX QUERYX;
+ typedef search::fs4transport::FS4Packet_QUERYRESULTX QUERYRESULTX;
+ typedef search::fs4transport::FS4Packet_QUEUELEN QUEUELEN;
+ typedef search::fs4transport::FS4Packet_ERROR ERROR;
+ typedef search::fs4transport::FS4Packet_GETDOCSUMSX GETDOCSUMSX;
+ typedef search::fs4transport::FS4Packet_DOCSUM DOCSUM;
+ typedef search::fs4transport::FS4Packet_EOL EOL;
+ typedef search::fs4transport::FS4Packet_MONITORQUERYX MONITORQUERYX;
+ typedef search::fs4transport::FS4Packet_MONITORRESULTX MONITORRESULTX;
+ typedef search::fs4transport::FS4Packet_TRACEREPLY TRACEREPLY;
+
+ /**
+ * Utility conversion from a "fef" set of propertymaps to an array of FS4Properties.
+ * @return false if no properties were converted.
+ **/
+ static void
+ fillPacketProperties(const PropertiesMap &source, search::fs4transport::PropsVector& target);
+
+ /**
+ * Convert from a QUERYX packet to a SearchRequest object.
+ *
+ * @param packet transport packet
+ * @param request api request object
+ **/
+ static void toSearchRequest(const QUERYX &packet, SearchRequest &request);
+
+ /**
+ * Convert from a SearchRequest object to a QUERYX packet.
+ *
+ * (NOT YET IMPLEMENTED)
+ *
+ * @param request api request object
+ * @param packet transport packet
+ **/
+ static void fromSearchRequest(const SearchRequest &request, QUERYX &packet);
+
+ /**
+ * Convert from a QUERYRESULTX packet to a SearchReply object.
+ *
+ * (NOT YET IMPLEMENTED)
+ *
+ * @param packet transport packet
+ * @param reply api reply object
+ **/
+ static void toSearchReply(const QUERYRESULTX &packet, SearchReply &reply);
+
+ /**
+ * Convert from a SearchReply object to a QUERYRESULTX
+ * packet. Note that this method only handles the query result
+ * aspect of the reply, errors and queue length reporting still
+ * needs to be handled separately by the code using this utility
+ * method.
+ *
+ * @param reply api reply object
+ * @param packet transport packet
+ **/
+ static void fromSearchReply(const SearchReply &reply, QUERYRESULTX &packet);
+
+ /**
+ * Convert from a GETDOCSUMSX packet to a DocsumRequest object.
+ *
+ * @param packet transport packet
+ * @param request api request object
+ **/
+ static void toDocsumRequest(const GETDOCSUMSX &packet, DocsumRequest &request);
+
+ /**
+ * Convert from a DocsumRequest object to a GETDOCSUMSX packet.
+ *
+ * (NOT YET IMPLEMENTED)
+ *
+ * @param packet transport packet
+ * @param request api request object
+ **/
+ static void fromDocsumRequest(const DocsumRequest &request, GETDOCSUMSX &packet);
+
+ /**
+ * Convert from a DOCSUM packet to an entry in a DocsumReply object
+ *
+ * (NOT YET IMPLEMENTED)
+ *
+ * @param packet transport packet
+ * @param docsum api reply object element
+ **/
+ static void toDocsumReplyElement(const DOCSUM &packet, DocsumReply::Docsum &docsum);
+
+ /**
+ * Convert from an entry in a DocsumReply object to a DOCSUM packet.
+ *
+ * @param docsum api reply object element
+ * @param packet transport packet
+ **/
+ static void fromDocsumReplyElement(const DocsumReply::Docsum &docsum, DOCSUM &packet);
+
+ /**
+ * Convert a MONITORQUERYX packet to a MonitorRequest object.
+ *
+ * @param packet transport packet
+ * @param request api request object
+ **/
+ static void toMonitorRequest(const MONITORQUERYX &packet, MonitorRequest &request);
+
+ /**
+ * Convert from a MonitorRequest object to a MONITORQUERYX packet
+ *
+ * (NOT YET IMPLEMENTED)
+ *
+ * @param request api request object
+ * @param packet transport packet
+ **/
+ static void fromMonitorRequest(const MonitorRequest &request, MONITORQUERYX &packet);
+
+ /**
+ * Convert from a MONITORRESULTX packet to a MonitorReply object.
+ *
+ * (NOT YET IMPLEMENTED)
+ *
+ * @param packet transport packet
+ * @param reply api reply object
+ **/
+ static void toMonitorReply(const MONITORRESULTX &packet, MonitorReply &reply);
+
+ /**
+ * Convert from a MonitorReply object to a MONITORRESULTX packet.
+ *
+ * @param reply api reply object
+ * @param packet transport packet
+ **/
+ static void fromMonitorReply(const MonitorReply &reply, MONITORRESULTX &packet);
+
+ /**
+ * Convert from a TraceReply object to a TRACE packet.
+ *
+ * @param reply api reply object
+ * @param packet transport packet
+ **/
+ static void fromTraceReply(const TraceReply &reply, TRACEREPLY &packet);
+};
+
+} // namespace engine
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/engine/propertiesmap.cpp b/searchlib/src/vespa/searchlib/engine/propertiesmap.cpp
new file mode 100644
index 00000000000..8f90ebdfc75
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/propertiesmap.cpp
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".engine.propertiesmap");
+#include "propertiesmap.h"
+
+namespace search {
+namespace engine {
+
+search::fef::Properties PropertiesMap::_emptyProperties;
+
+search::fef::Properties &
+PropertiesMap::lookupCreate(const vespalib::stringref &name)
+{
+ return _propertiesMap[name];
+}
+
+const search::fef::Properties &
+PropertiesMap::lookup(const vespalib::stringref &name) const
+{
+ PropsMap::const_iterator pos = _propertiesMap.find(name);
+ if (pos == _propertiesMap.end()) {
+ return _emptyProperties;
+ }
+ return pos->second;
+}
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/propertiesmap.h b/searchlib/src/vespa/searchlib/engine/propertiesmap.h
new file mode 100644
index 00000000000..f6f34512483
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/propertiesmap.h
@@ -0,0 +1,129 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/common/mapnames.h>
+
+namespace search {
+namespace engine {
+
+/**
+ * A simple wrapper class used to hold multiple named collections of
+ * properties.
+ **/
+class PropertiesMap
+{
+private:
+ typedef search::fef::Properties Props;
+ typedef vespalib::hash_map<vespalib::string, Props> PropsMap;
+
+ static Props _emptyProperties;
+ PropsMap _propertiesMap;
+
+ /**
+ * Obtain a named collection of properties. This method will
+ * return an empty collection of properties if the properties did
+ * not exist.
+ *
+ * @param name name of properties
+ * @return the properties
+ **/
+ const search::fef::Properties &lookup(const vespalib::stringref &name) const;
+
+public:
+ typedef PropsMap::const_iterator ITR;
+
+ /**
+ * Obtain a named collection of properties. This method will
+ * create the properties if they did not exist yet.
+ *
+ * @param name name of properties
+ * @return the properties
+ **/
+ search::fef::Properties &lookupCreate(const vespalib::stringref &name);
+
+ /**
+ * Obtain the number of named collection of properties held by
+ * this object.
+ *
+ * @return number of named collections of properties
+ **/
+ uint32_t size() const { return _propertiesMap.size(); }
+
+ /**
+ * Iterate the map.
+ *
+ * @return begin iterator
+ **/
+ ITR begin() const { return _propertiesMap.begin(); }
+
+ /**
+ * Iterate the map.
+ *
+ * @return end iterator
+ **/
+ ITR end() const { return _propertiesMap.end(); }
+
+ /**
+ * Obtain rank properties (used to tune ranking evaluation)
+ *
+ * @return rank properties
+ **/
+ const search::fef::Properties &rankProperties() const {
+ return lookup(MapNames::RANK);
+ }
+
+ /**
+ * Obtain feature overrides (used to hardwire the values of
+ * features during ranking evaluation)
+ *
+ * @return feature overrides
+ **/
+ const search::fef::Properties &featureOverrides() const {
+ return lookup(MapNames::FEATURE);
+ }
+
+ /**
+ * Obtain properties used to define additional highlight terms to
+ * be used during dynamic summary generation.
+ *
+ * @return highlight terms properties
+ **/
+ const search::fef::Properties &highlightTerms() const {
+ return lookup(MapNames::HIGHLIGHTTERMS);
+ }
+
+ /**
+ * Obtain match properties (used to tune match evaluation)
+ *
+ * @return match properties
+ **/
+ const search::fef::Properties &matchProperties() const {
+ return lookup(MapNames::MATCH);
+ }
+
+ /**
+ * Obtain cache properties (used to tune cache usage)
+ *
+ * @return cache properties
+ **/
+ const search::fef::Properties &cacheProperties() const {
+ return lookup(MapNames::CACHES);
+ }
+
+ /**
+ * Obtain model overrides
+ *
+ * @return model properties
+ **/
+ const search::fef::Properties &modelOverrides() const {
+ return lookup(MapNames::MODEL);
+ }
+
+};
+
+} // namespace engine
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/engine/request.cpp b/searchlib/src/vespa/searchlib/engine/request.cpp
new file mode 100644
index 00000000000..16853281098
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/request.cpp
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "request.h"
+
+namespace search {
+namespace engine {
+
+Request::Request() :
+ _startTime(fastos::ClockSystem::now()),
+ _timeOfDoom(fastos::TimeStamp(fastos::TimeStamp::FUTURE)),
+ ranking(),
+ queryFlags(0),
+ location(),
+ propertiesMap(),
+ stackItems(0),
+ stackDump()
+{
+}
+
+void Request::setTimeout(const fastos::TimeStamp & timeout)
+{
+ _timeOfDoom = _startTime + timeout;
+}
+
+fastos::TimeStamp Request::getTimeUsed() const
+{
+ return fastos::TimeStamp(fastos::ClockSystem::now()) - _startTime;
+}
+
+fastos::TimeStamp Request::getTimeLeft() const
+{
+ return _timeOfDoom - fastos::TimeStamp(fastos::ClockSystem::now());
+}
+
+Request::~Request()
+{
+}
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/request.h b/searchlib/src/vespa/searchlib/engine/request.h
new file mode 100644
index 00000000000..a65f9896f98
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/request.h
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastos/timestamp.h>
+#include <vespa/vespalib/stllike/string.h>
+#include "propertiesmap.h"
+
+namespace search {
+namespace engine {
+
+class Request
+{
+public:
+ Request();
+ virtual ~Request();
+ void setTimeout(const fastos::TimeStamp & timeout);
+ fastos::TimeStamp getStartTime() const { return _startTime; }
+ fastos::TimeStamp getTimeOfDoom() const { return _timeOfDoom; }
+ fastos::TimeStamp getTimeUsed() const;
+ fastos::TimeStamp getTimeLeft() const;
+ bool expired() const { return getTimeLeft() > 0l; }
+
+ const vespalib::stringref getStackRef() const {
+ return vespalib::stringref(&stackDump[0], stackDump.size());
+ }
+
+private:
+ const fastos::TimeStamp _startTime;
+ fastos::TimeStamp _timeOfDoom;
+public:
+ /// Everything here should move up to private section and have accessors
+ vespalib::string ranking;
+ uint32_t queryFlags;
+ vespalib::string location;
+ PropertiesMap propertiesMap;
+ uint32_t stackItems;
+ std::vector<char> stackDump;
+};
+
+} // namespace engine
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/engine/searchapi.h b/searchlib/src/vespa/searchlib/engine/searchapi.h
new file mode 100644
index 00000000000..d44dd07ae89
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/searchapi.h
@@ -0,0 +1,66 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "searchrequest.h"
+#include "searchreply.h"
+
+namespace search {
+namespace engine {
+
+/**
+ * A search client is the object being notified of the completion of
+ * an asynchronous search operation.
+ **/
+class SearchClient
+{
+public:
+ /**
+ * Invoked by the search server to indicate the completion of an
+ * asynchronous search operation.
+ *
+ * @param reply the search reply
+ **/
+ virtual void searchDone(SearchReply::UP reply) = 0;
+
+ /**
+ * Empty, needed for subclassing
+ **/
+ virtual ~SearchClient() {}
+};
+
+/**
+ * A search server is an object capable of performing a search
+ * operation.
+ **/
+class SearchServer
+{
+public:
+ /**
+ * Initiate a search operation that can be completed either
+ * synchronously or asynchronously. The return value will indicate
+ * whether the server selected to perform the operation
+ * synchronously or asynchronously. If the return value contains
+ * an object, then the operation completed synchronously and no
+ * further action will be taken by the server. If the return value
+ * did not contain an object, the operation will continue
+ * asynchronously, and the given client will be notified when the
+ * operation is completed. The server is not allowed to signal an
+ * asynchronous completion of the operation in the context of this
+ * method invocation.
+ *
+ * @return actual return value if sync, 'null' if async
+ * @param request object containing request parameters
+ * @param client the client to be notified of async completion
+ **/
+ virtual SearchReply::UP search(SearchRequest::Source request, SearchClient &client) = 0;
+
+ /**
+ * Empty, needed for subclassing
+ **/
+ virtual ~SearchServer() {}
+};
+
+} // namespace engine
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/engine/searchreply.cpp b/searchlib/src/vespa/searchlib/engine/searchreply.cpp
new file mode 100644
index 00000000000..529be053e42
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/searchreply.cpp
@@ -0,0 +1,56 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".engine.searchreply");
+#include "searchreply.h"
+#include <vespa/searchlib/common/mapnames.h>
+
+namespace search {
+namespace engine {
+
+SearchReply::SearchReply()
+ : valid(true),
+ offset(0),
+ _distributionKey(0),
+ totalHitCount(0),
+ maxRank(0),
+ sortIndex(),
+ sortData(),
+ groupResult(),
+ useCoverage(false),
+ coverage(),
+ useWideHits(false),
+ hits(),
+ errorCode(0),
+ errorMessage(),
+ useQueueLen(false),
+ queueLen(0),
+ request()
+{
+}
+
+SearchReply::SearchReply(const SearchReply &rhs)
+ :
+ valid (rhs.valid),
+ offset (rhs.offset),
+ _distributionKey (rhs._distributionKey),
+ totalHitCount(rhs.totalHitCount),
+ maxRank (rhs.maxRank),
+ sortIndex (rhs.sortIndex),
+ sortData (rhs.sortData),
+ groupResult (rhs.groupResult),
+ useCoverage (rhs.useCoverage),
+ coverage (rhs.coverage),
+ useWideHits (rhs.useWideHits),
+ hits (rhs.hits),
+ errorCode (rhs.errorCode),
+ errorMessage (rhs.errorMessage),
+ useQueueLen (rhs.useQueueLen),
+ queueLen (rhs.queueLen),
+ request() // NB not copied
+{
+}
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/searchreply.h b/searchlib/src/vespa/searchlib/engine/searchreply.h
new file mode 100644
index 00000000000..692806114ee
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/searchreply.h
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+#include <vespa/document/base/globalid.h>
+#include <vespa/searchlib/common/hitrank.h>
+#include <vespa/vespalib/util/array.h>
+#include <memory>
+#include <vespa/searchlib/engine/searchrequest.h>
+
+namespace search {
+namespace engine {
+
+class SearchReply
+{
+public:
+ typedef std::unique_ptr<SearchReply> UP;
+
+ class Hit
+ {
+ public:
+ Hit() : gid(), metric(0), path(0), _distributionKey(0) {}
+ void setDistributionKey(uint32_t key) { _distributionKey = key; }
+ uint32_t getDistributionKey() const { return _distributionKey; }
+ document::GlobalId gid;
+ search::HitRank metric;
+ uint32_t path; // wide
+ private:
+ int32_t _distributionKey; // wide
+ };
+
+ class Coverage {
+ public:
+ Coverage() : _covered(0), _active(0) {}
+ Coverage(uint64_t active) : _covered(active), _active(active) {}
+ Coverage(uint64_t active, uint64_t covered) : _covered(covered), _active(active) {}
+ uint64_t getCovered() const { return _covered; }
+ uint64_t getActive() const { return _active; }
+ Coverage & setCovered(uint64_t v) { _covered = v; return *this; }
+ Coverage & setActive(uint64_t v) { _active = v; return *this; }
+ private:
+ uint64_t _covered;
+ uint64_t _active;
+ };
+
+ // set to false to indicate 'talk to the hand' behavior
+ bool valid;
+
+ // normal results
+ uint32_t offset;
+private:
+ uint32_t _distributionKey;
+public:
+ uint64_t totalHitCount;
+ search::HitRank maxRank;
+ std::vector<uint32_t> sortIndex;
+ std::vector<char> sortData;
+ vespalib::Array<char, vespalib::DefaultAlloc> groupResult;
+ bool useCoverage;
+ Coverage coverage;
+ bool useWideHits;
+ std::vector<Hit> hits;
+ PropertiesMap propertiesMap;
+
+ // in case of error
+ uint32_t errorCode;
+ vespalib::string errorMessage;
+
+ // piggyback monitoring
+ bool useQueueLen;
+ uint32_t queueLen;
+ SearchRequest::UP request;
+
+ SearchReply();
+ SearchReply(const SearchReply &rhs); // for test only
+
+ void setDistributionKey(uint32_t key) { _distributionKey = key; }
+ uint32_t getDistributionKey() const { return _distributionKey; }
+};
+
+} // namespace engine
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/engine/searchrequest.cpp b/searchlib/src/vespa/searchlib/engine/searchrequest.cpp
new file mode 100644
index 00000000000..06df3c0b764
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/searchrequest.cpp
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".engine.searchrequest");
+#include "searchrequest.h"
+#include "packetconverter.h"
+
+namespace search {
+namespace engine {
+
+SearchRequest::SearchRequest()
+ : Request(),
+ offset(0),
+ maxhits(10),
+ sortSpec(),
+ groupSpec(),
+ sessionId()
+{
+}
+
+void SearchRequest::Source::lazyDecode() const
+{
+ if ((_request.get() == NULL) && (_fs4Packet != NULL)) {
+ _request.reset(new SearchRequest());
+ PacketConverter::toSearchRequest(*_fs4Packet, *_request);
+ _fs4Packet->Free();
+ _fs4Packet = NULL;
+ }
+}
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/searchrequest.h b/searchlib/src/vespa/searchlib/engine/searchrequest.h
new file mode 100644
index 00000000000..9214aec02f6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/searchrequest.h
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include "propertiesmap.h"
+#include "request.h"
+#include "source_description.h"
+#include <vespa/searchlib/common/packets.h>
+
+namespace search {
+namespace engine {
+
+class SearchRequest : public Request
+{
+public:
+ typedef std::unique_ptr<SearchRequest> UP;
+ typedef fs4transport::FS4Packet_QUERYX FS4Packet_QUERYX;
+
+ class Source {
+ private:
+ mutable std::unique_ptr<SearchRequest> _request;
+ mutable FS4Packet_QUERYX *_fs4Packet;
+ void lazyDecode() const;
+ const SourceDescription _desc;
+ public:
+
+ Source(SearchRequest * request) : _request(request), _fs4Packet(NULL), _desc(0)
+ {}
+
+ Source(FS4Packet_QUERYX *query, SourceDescription desc) : _request(), _fs4Packet(query), _desc(desc)
+ {
+ }
+
+ Source(Source && rhs)
+ : _request(std::move(rhs._request)),
+ _fs4Packet(rhs._fs4Packet),
+ _desc(std::move(rhs._desc))
+ {
+ rhs._fs4Packet = NULL;
+ }
+
+ ~Source() {
+ if (_fs4Packet != NULL) {
+ _fs4Packet->Free();
+ }
+ }
+
+ const SearchRequest * operator -> () const { return get(); }
+
+ const SearchRequest * get() const {
+ lazyDecode();
+ return _request.get();
+ }
+
+ Source& operator= (Source && rhs) = delete;
+ Source & operator= (const Source &) = delete;
+ Source(const Source &) = delete;
+
+ UP release() {
+ lazyDecode();
+ return std::move(_request);
+ }
+ };
+ typedef std::shared_ptr<SearchRequest> SP;
+
+ uint32_t offset;
+ uint32_t maxhits;
+ vespalib::string sortSpec;
+ std::vector<char> groupSpec;
+ std::vector<char> sessionId;
+
+ SearchRequest();
+
+ const vespalib::stringref getStackRef() const {
+ return vespalib::stringref(&stackDump[0], stackDump.size());
+ }
+};
+
+} // namespace engine
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/engine/source_description.cpp b/searchlib/src/vespa/searchlib/engine/source_description.cpp
new file mode 100644
index 00000000000..91feb25b0ff
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/source_description.cpp
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "source_description.h"
+
+namespace search {
+namespace engine {
+
+const vespalib::string SourceDescription::protocol("FS4");
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/source_description.h b/searchlib/src/vespa/searchlib/engine/source_description.h
new file mode 100644
index 00000000000..52c65d35dee
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/source_description.h
@@ -0,0 +1,17 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace engine {
+
+struct SourceDescription {
+ int listenPort;
+ static const vespalib::string protocol;
+ SourceDescription(int port) : listenPort(port) {}
+};
+
+} // namespace engine
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/engine/tracereply.h b/searchlib/src/vespa/searchlib/engine/tracereply.h
new file mode 100644
index 00000000000..2d1cdf7ab61
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/tracereply.h
@@ -0,0 +1,16 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "propertiesmap.h"
+
+namespace search {
+namespace engine {
+
+struct TraceReply
+{
+ PropertiesMap propertiesMap;
+};
+
+} // namespace engine
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/engine/transport_metrics.cpp b/searchlib/src/vespa/searchlib/engine/transport_metrics.cpp
new file mode 100644
index 00000000000..c254787244a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/transport_metrics.cpp
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".engine.transportmetrics");
+#include "transport_metrics.h"
+
+namespace search {
+namespace engine {
+
+TransportMetrics::QueryMetrics::QueryMetrics(metrics::MetricSet *parent)
+ : metrics::MetricSet("query", "", "Query metrics", parent),
+ count("count", "logdefault", "Query requests handled", this),
+ latency("latency", "logdefault", "Query request latency", this)
+{
+}
+
+TransportMetrics::DocsumMetrics::DocsumMetrics(metrics::MetricSet *parent)
+ : metrics::MetricSet("docsum", "", "Docsum metrics", parent),
+ count("count", "logdefault", "Docsum requests handled", this),
+ docs("docs", "logdefault", "Total docsums returned", this),
+ latency("latency", "logdefault", "Docsum request latency", this)
+{
+}
+
+TransportMetrics::TransportMetrics()
+ : metrics::MetricSet("transport", "", "Transport server metrics", 0),
+ updateLock(),
+ query(this),
+ docsum(this)
+{
+}
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/transport_metrics.h b/searchlib/src/vespa/searchlib/engine/transport_metrics.h
new file mode 100644
index 00000000000..fa62460434c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/transport_metrics.h
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/metrics/metrics.h>
+#include <vespa/vespalib/util/sync.h>
+
+namespace search {
+namespace engine {
+
+struct TransportMetrics : metrics::MetricSet
+{
+ struct QueryMetrics : metrics::MetricSet {
+ metrics::LongCountMetric count;
+ metrics::DoubleAverageMetric latency;
+
+ QueryMetrics(metrics::MetricSet *parent);
+ };
+
+ struct DocsumMetrics : metrics::MetricSet {
+ metrics::LongCountMetric count;
+ metrics::LongCountMetric docs;
+ metrics::DoubleAverageMetric latency;
+
+ DocsumMetrics(metrics::MetricSet *parent);
+ };
+
+ vespalib::Lock updateLock;
+ QueryMetrics query;
+ DocsumMetrics docsum;
+
+ TransportMetrics();
+};
+
+} // namespace engine
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/engine/transportserver.cpp b/searchlib/src/vespa/searchlib/engine/transportserver.cpp
new file mode 100644
index 00000000000..cdde798b579
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/transportserver.cpp
@@ -0,0 +1,427 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".engine.transportserver");
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/common/packets.h>
+#include "packetconverter.h"
+#include "transportserver.h"
+
+namespace search {
+namespace engine {
+
+//-----------------------------------------------------------------------------
+
+typedef search::fs4transport::FS4PersistentPacketStreamer PacketStreamer;
+
+//-----------------------------------------------------------------------------
+
+void
+TransportServer::SearchHandler::start()
+{
+ SearchReply::UP reply = parent._searchServer.search(std::move(request), *this);
+ if (reply.get() != 0) {
+ searchDone(std::move(reply));
+ }
+}
+
+void
+TransportServer::SearchHandler::searchDone(SearchReply::UP reply)
+{
+ if (reply.get() != 0) {
+ const SearchReply &r = *reply;
+ if (r.valid) {
+ if (r.useQueueLen) {
+ PacketConverter::QUEUELEN *p = new PacketConverter::QUEUELEN();
+ p->_queueLen = r.queueLen;
+ p->_dispatchers = clientCnt;
+ if (shouldLog(DEBUG_SEARCH)) {
+ logPacket("outgoing packet", p, 0, channel->GetConnection());
+ }
+ channel->GetConnection()->PostPacket(p, FNET_NOID);
+ }
+ if (r.errorCode == 0) {
+ PacketConverter::QUERYRESULTX *p = new PacketConverter::QUERYRESULTX();
+ PacketConverter::fromSearchReply(r, *p);
+ p->UpdateCompatPCODE();
+ if (shouldLog(DEBUG_SEARCH)) {
+ logPacket("outgoing packet", p, channel, 0);
+ }
+ channel->Send(p);
+ } else {
+ PacketConverter::ERROR *p = new PacketConverter::ERROR();
+ p->_errorCode = r.errorCode;
+ p->setErrorMessage(r.errorMessage);
+ if (shouldLog(DEBUG_SEARCH)) {
+ logPacket("outgoing packet", p, channel, 0);
+ }
+ channel->Send(p);
+ }
+ if (r.request.get() != NULL) {
+ parent.updateQueryMetrics(r.request->getTimeUsed().sec()); // possible thread issue
+ }
+ } else {
+ PacketConverter::EOL *p = new PacketConverter::EOL();
+ if (shouldLog(DEBUG_SEARCH)) {
+ logPacket("outgoing packet", p, channel, 0);
+ }
+ channel->Send(p);
+ }
+ } else {
+ LOG(warning, "got <null> search reply from back-end");
+ }
+ delete this; // we are done
+}
+
+TransportServer::SearchHandler::~SearchHandler()
+{
+ channel->Free();
+}
+
+//-----------------------------------------------------------------------------
+
+void
+TransportServer::DocsumHandler::start()
+{
+ DocsumReply::UP reply = parent._docsumServer.getDocsums(std::move(request), *this);
+ if (reply.get() != 0) {
+ getDocsumsDone(std::move(reply));
+ }
+}
+
+void
+TransportServer::DocsumHandler::getDocsumsDone(DocsumReply::UP reply)
+{
+ if (reply.get() != 0) {
+ const DocsumReply &r = *reply;
+ for (uint32_t i = 0; i < r.docsums.size(); ++i) {
+ PacketConverter::DOCSUM *p = new PacketConverter::DOCSUM();
+ PacketConverter::fromDocsumReplyElement(r.docsums[i], *p);
+ if (shouldLog(DEBUG_DOCSUM)) {
+ logPacket("outgoing packet", p, channel, 0);
+ }
+ channel->Send(p);
+ }
+ PacketConverter::EOL *p = new PacketConverter::EOL();
+ if (shouldLog(DEBUG_DOCSUM)) {
+ logPacket("outgoing packet", p, channel, 0);
+ }
+ channel->Send(p);
+ if (r.request.get() != NULL) {
+ parent.updateDocsumMetrics(r.request->getTimeUsed().sec(),
+ r.docsums.size()); // possible thread issue
+ }
+ } else {
+ LOG(warning, "got <null> docsum reply from back-end");
+ }
+ delete this; // we are done
+}
+
+TransportServer::DocsumHandler::~DocsumHandler()
+{
+ channel->Free();
+}
+
+//-----------------------------------------------------------------------------
+
+void
+TransportServer::MonitorHandler::start()
+{
+ MonitorReply::UP reply = parent._monitorServer.ping(std::move(request), *this);
+ if (reply.get() != 0) {
+ pingDone(std::move(reply));
+ }
+}
+
+void
+TransportServer::MonitorHandler::pingDone(MonitorReply::UP reply)
+{
+ if (reply.get() != 0) {
+ const MonitorReply &r = *reply;
+ PacketConverter::MONITORRESULTX *p = new PacketConverter::MONITORRESULTX();
+ PacketConverter::fromMonitorReply(r, *p);
+ p->UpdateCompatPCODE();
+ if (shouldLog(DEBUG_MONITOR)) {
+ logPacket("outgoing packet", p, 0, connection);
+ }
+ connection->PostPacket(p, FNET_NOID);
+ } else {
+ LOG(warning, "got <null> monitor reply from back-end");
+ }
+ delete this; // we are done
+}
+
+TransportServer::MonitorHandler::~MonitorHandler()
+{
+ connection->SubRef();
+}
+
+//-----------------------------------------------------------------------------
+
+FNET_IPacketHandler::HP_RetCode
+TransportServer::HandlePacket(FNET_Packet *packet, FNET_Context context)
+{
+ uint32_t pcode = packet->GetPCODE();
+ FNET_Channel *channel = context._value.CHANNEL;
+ HP_RetCode rc = FNET_FREE_CHANNEL;
+
+ if (channel->GetID() == FNET_NOID) { // admin packet
+ if (packet->IsChannelLostCMD()) {
+ _clients.erase(channel);
+ if (shouldLog(DEBUG_CONNECTION)) {
+ LOG(debug, "connection closed: tag=%u", channel->GetConnection()->GetContext()._value.INT);
+ }
+ } else if (pcode == search::fs4transport::PCODE_MONITORQUERYX) {
+ const PacketConverter::MONITORQUERYX &mqx = static_cast<PacketConverter::MONITORQUERYX&>(*packet);
+ if (shouldLog(DEBUG_MONITOR)) {
+ logPacket("incoming packet", packet, channel, 0);
+ }
+ MonitorRequest::UP req(new MonitorRequest());
+ PacketConverter::toMonitorRequest(mqx, *req);
+ channel->GetConnection()->AddRef();
+ _pending.push(new MonitorHandler(*this, std::move(req), channel->GetConnection()));
+ rc = FNET_KEEP_CHANNEL;
+ } else if (shouldLog(DEBUG_UNHANDLED)) {
+ logPacket("unhandled packet", packet, channel, 0);
+ }
+ } else { // search/docsum request
+ if (pcode == search::fs4transport::PCODE_QUERYX) {
+ PacketConverter::QUERYX * qx = static_cast<PacketConverter::QUERYX *>(packet);
+ if (shouldLog(DEBUG_SEARCH)) {
+ logPacket("incoming packet", packet, channel, 0);
+ }
+ SearchRequest::Source req(qx, _sourceDesc);
+ packet = NULL;
+ _pending.push(new SearchHandler(*this, std::move(req), channel, _clients.size()));
+ rc = FNET_CLOSE_CHANNEL;
+ } else if (pcode == search::fs4transport::PCODE_GETDOCSUMSX) {
+ PacketConverter::GETDOCSUMSX * gdx = static_cast<PacketConverter::GETDOCSUMSX *>(packet);
+ if (shouldLog(DEBUG_DOCSUM)) {
+ logPacket("incoming packet", packet, channel, 0);
+ }
+ DocsumRequest::Source req(gdx, _sourceDesc);
+ packet = NULL;
+ _pending.push(new DocsumHandler(*this, std::move(req), channel));
+ rc = FNET_CLOSE_CHANNEL;
+ } else if (shouldLog(DEBUG_UNHANDLED)) {
+ logPacket("unhandled packet", packet, channel, 0);
+ }
+ }
+ if (packet != NULL) {
+ packet->Free();
+ }
+ return rc;
+}
+
+bool
+TransportServer::InitAdminChannel(FNET_Channel *channel)
+{
+ if (_listener == NULL) {
+ // handle race where we get an incoming connection and
+ // disables listening at the 'same time'. Note that sync close
+ // is only allowed in the InitAdminChannel method
+ channel->GetConnection()->Close(); // sync close
+ return false;
+ }
+ channel->SetContext(channel);
+ channel->SetHandler(this);
+ assert(_clients.count(channel) == 0);
+ _clients.insert(channel);
+ channel->GetConnection()->SetContext(FNET_Context(++_connTag));
+ if (shouldLog(DEBUG_CONNECTION)) {
+ LOG(debug, "connection established: tag=%u", _connTag);
+ }
+ return true;
+}
+
+bool
+TransportServer::InitChannel(FNET_Channel *channel, uint32_t pcode)
+{
+ channel->SetContext(channel);
+ channel->SetHandler(this);
+ if (shouldLog(DEBUG_CHANNEL)) {
+ LOG(debug, "new channel: id=%u, first pcode=%u", channel->GetID(), pcode);
+ }
+ return true;
+}
+
+void
+TransportServer::Run(FastOS_ThreadInterface *, void *)
+{
+ _dispatchTask.ScheduleNow();
+ _ready = true;
+ _transport.Main(); // <- transport event loop
+ _dispatchTask.Kill();
+ _listenTask.Kill();
+ discardRequests();
+}
+
+bool
+TransportServer::updateListen()
+{
+ bool doListen = _doListen;
+ if (doListen) {
+ if (_listener == NULL) { // start listening
+ _listener = _transport.Listen(_listenSpec.c_str(), &PacketStreamer::Instance, this);
+ if (_listener == NULL) {
+ LOG(error, "Could not bind fnet transport socket to %s", _listenSpec.c_str());
+ _failed = true;
+ return false;
+ }
+ }
+ } else {
+ if (_listener != NULL) { // stop listening
+ _transport.Close(_listener); // async close
+ _listener->SubRef();
+ _listener = NULL;
+ // also close client connections
+ std::set<FNET_Channel*>::iterator it = _clients.begin();
+ for (; it != _clients.end(); ++it) {
+ _transport.Close((*it)->GetConnection()); // async close
+ }
+ }
+ }
+ return true;
+}
+
+void
+TransportServer::dispatchRequests()
+{
+ while (!_pending.empty()) {
+ Handler *h = _pending.front();
+ _pending.pop();
+ h->start();
+ }
+}
+
+void
+TransportServer::discardRequests()
+{
+ while (!_pending.empty()) {
+ Handler *h = _pending.front();
+ _pending.pop();
+ delete h;
+ }
+}
+
+void
+TransportServer::logPacket(const vespalib::stringref &msg, FNET_Packet *p, FNET_Channel *ch, FNET_Connection *conn)
+{
+ uint32_t chid = -1;
+ uint32_t conntag = -1;
+ vespalib::string str;
+ if (ch != 0) {
+ chid = ch->GetID();
+ conntag = ch->GetConnection()->GetContext()._value.INT;
+ } else if (conn != 0) {
+ conntag = conn->GetContext()._value.INT;
+ }
+ search::fs4transport::FS4Packet *fs4p = dynamic_cast<search::fs4transport::FS4Packet*>(p);
+ if (fs4p != 0) {
+ str = fs4p->toString(0);
+ } else {
+ str = vespalib::make_string("packet { pcode=%u }", p->GetPCODE());
+ }
+ LOG(debug, "%s (chid=%u, conn=%u):\n%s", msg.c_str(), chid, conntag, str.c_str());
+}
+
+void
+TransportServer::updateQueryMetrics(double latency_s)
+{
+ vespalib::LockGuard guard(_metrics.updateLock);
+ _metrics.query.count.inc();
+ _metrics.query.latency.set(latency_s);
+}
+
+void
+TransportServer::updateDocsumMetrics(double latency_s, uint32_t numDocs)
+{
+ vespalib::LockGuard guard(_metrics.updateLock);
+ _metrics.docsum.count.inc();
+ _metrics.docsum.docs.inc(numDocs);
+ _metrics.docsum.latency.set(latency_s);
+}
+
+//-----------------------------------------------------------------------------
+
+bool
+TransportServer::shouldLog(uint32_t msgType) {
+ return (((msgType & _debugMask) != 0)
+ && ((msgType != DEBUG_MONITOR && LOG_WOULD_LOG(debug)) ||
+ (msgType == DEBUG_MONITOR && LOG_WOULD_LOG(spam))));
+}
+
+TransportServer::TransportServer(SearchServer &searchServer,
+ DocsumServer &docsumServer,
+ MonitorServer &monitorServer,
+ int port, uint32_t debugMask)
+ : _searchServer(searchServer),
+ _docsumServer(docsumServer),
+ _monitorServer(monitorServer),
+ _transport(),
+ _ready(false),
+ _failed(false),
+ _doListen(true),
+ _threadPool(256 * 1024),
+ _sourceDesc(port),
+ _listenSpec(),
+ _listener(0),
+ _clients(),
+ _pending(),
+ _dispatchTask(*this),
+ _listenTask(*this),
+ _connTag(0),
+ _debugMask(debugMask),
+ _metrics()
+{
+ _listenSpec = vespalib::make_string("tcp/%d", port);
+}
+
+bool
+TransportServer::start()
+{
+ if (!updateListen()) {
+ return false;
+ }
+ if (_threadPool.NewThread(this) == 0) {
+ LOG(error, "Could not start internal transport thread");
+ _failed = true;
+ return false;
+ }
+ return true;
+}
+
+int
+TransportServer::getListenPort()
+{
+ struct Cmd : public FNET_IExecutable {
+ TransportServer &server;
+ vespalib::Gate done;
+ int port;
+ Cmd(TransportServer &s) : server(s), done(), port(-1) {}
+ virtual void execute() {
+ if (server._listener != 0) {
+ port = server._listener->GetPortNumber();
+ }
+ done.countDown();
+ }
+ };
+ Cmd cmd(*this);
+ if (_transport.execute(&cmd)) {
+ cmd.done.await();
+ }
+ return cmd.port;
+};
+
+TransportServer::~TransportServer()
+{
+ shutDown(); // ensure shutdown
+ if (_listener != 0) {
+ _listener->SubRef();
+ _listener = 0;
+ }
+}
+
+} // namespace engine
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/engine/transportserver.h b/searchlib/src/vespa/searchlib/engine/transportserver.h
new file mode 100644
index 00000000000..a7de33093e2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/engine/transportserver.h
@@ -0,0 +1,334 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/atomic.h>
+#include <vespa/fnet/fnet.h>
+#include <set>
+#include <queue>
+#include <vespa/searchlib/engine/searchapi.h>
+#include <vespa/searchlib/engine/docsumapi.h>
+#include <vespa/searchlib/engine/monitorapi.h>
+#include <vespa/searchlib/common/packets.h>
+#include <vespa/vespalib/util/sync.h>
+#include "transport_metrics.h"
+#include "source_description.h"
+
+namespace search {
+namespace engine {
+
+
+/**
+ * Common transport server implementation interacting with the
+ * underlying search engine using the common search api. This
+ * implementation has less optimization tricks compared to the
+ * previous ones being integrated into specific applications.
+ **/
+class TransportServer : public FastOS_Runnable,
+ public FNET_IServerAdapter,
+ public FNET_IPacketHandler
+{
+private:
+ TransportServer(const TransportServer &);
+ TransportServer &operator=(const TransportServer &);
+
+ /**
+ * Task used to update listen status
+ **/
+ struct ListenTask : public FNET_Task
+ {
+ TransportServer &parent;
+ ListenTask(TransportServer &p) : FNET_Task(p._transport.GetScheduler()), parent(p) {}
+ virtual void PerformTask() { parent.updateListen(); }
+ };
+
+ /**
+ * Task used to dispatch incoming requests in an untangled way
+ * (aka not in the packet callback).
+ **/
+ struct DispatchTask : public FNET_Task
+ {
+ TransportServer &parent;
+ DispatchTask(TransportServer &p) : FNET_Task(p._transport.GetScheduler()), parent(p) {}
+ virtual void PerformTask() {
+ parent.dispatchRequests();
+ ScheduleNow(); // run each tick
+ }
+ };
+
+ class Handler;
+
+ SearchServer &_searchServer;
+ DocsumServer &_docsumServer;
+ MonitorServer &_monitorServer;
+ FNET_Transport _transport;
+ bool _ready; // flag indicating initial readyness
+ bool _failed; // flag indicating a critical failure
+ bool _doListen; // flag telling us to accept requests or not
+ FastOS_ThreadPool _threadPool; // thread pool owning transport thread
+ SourceDescription _sourceDesc; // description of where requests are coming from
+ vespalib::string _listenSpec; // where to listen; FNET connect spec
+ FNET_Connector *_listener; // object accepting incoming connections
+ std::set<FNET_Channel*> _clients; // the admin channel of all client connections
+ std::queue<Handler*> _pending; // queue of incoming requests not yet started
+ DispatchTask _dispatchTask; // task used to dispatch incoming requests
+ ListenTask _listenTask; // task used to update listen status
+ uint32_t _connTag; // sequential number used to tag connections
+ uint32_t _debugMask; // enable more debug logging with this
+ TransportMetrics _metrics; // metrics for this transport server
+
+ /**
+ * Toplevel class used to wrap incoming requests. Actual objects
+ * are used both to delay starting the request until we are not in
+ * the packet delivery callback and also as the callback target
+ * used by the underlying api objects to notify completion of
+ * individual requests.
+ **/
+ struct Handler
+ {
+ TransportServer &parent;
+ uint32_t _debugMask;
+ Handler(TransportServer &p) : parent(p), _debugMask(p._debugMask) {}
+ bool shouldLog(uint32_t msgType) { return parent.shouldLog(msgType); } // possible thread issue
+ virtual void start() = 0;
+ virtual ~Handler() {}
+ private:
+ Handler(const Handler &rhs);
+ Handler &operator=(const Handler &rhs);
+ };
+
+ /**
+ * Wrapper for search requests
+ **/
+ struct SearchHandler : public Handler,
+ public SearchClient
+ {
+ SearchRequest::Source request;
+ FNET_Channel *channel;
+ uint32_t clientCnt;
+
+ SearchHandler(TransportServer &p, SearchRequest::Source req, FNET_Channel *ch, uint32_t cnt)
+ : Handler(p), request(std::move(req)), channel(ch), clientCnt(cnt) {}
+ virtual void start();
+ virtual void searchDone(SearchReply::UP reply);
+ virtual ~SearchHandler();
+ };
+
+ /**
+ * Wrapper for docsum requests
+ **/
+ struct DocsumHandler : public Handler,
+ public DocsumClient
+ {
+ DocsumRequest::Source request;
+ FNET_Channel *channel;
+
+ DocsumHandler(TransportServer &p, DocsumRequest::Source req, FNET_Channel *ch)
+ : Handler(p), request(std::move(req)), channel(ch) {}
+ virtual void start();
+ virtual void getDocsumsDone(DocsumReply::UP reply);
+ virtual ~DocsumHandler();
+ };
+
+ /**
+ * Wrapper for monitor requests
+ **/
+ struct MonitorHandler : public Handler,
+ public MonitorClient
+ {
+ MonitorRequest::UP request;
+ FNET_Connection *connection;
+
+ MonitorHandler(TransportServer &p, MonitorRequest::UP req, FNET_Connection *conn)
+ : Handler(p), request(std::move(req)), connection(conn) {}
+ virtual void start();
+ virtual void pingDone(MonitorReply::UP reply);
+ virtual ~MonitorHandler();
+ };
+
+ // handle incoming network packets
+ virtual HP_RetCode HandlePacket(FNET_Packet *packet, FNET_Context context);
+
+ // set up admin channel for new clients
+ virtual bool InitAdminChannel(FNET_Channel *channel);
+
+ // set up channel for individual request
+ virtual bool InitChannel(FNET_Channel *channel, uint32_t pcode);
+
+ // entry point for thread running transport thread
+ virtual void Run(FastOS_ThreadInterface *thisThread, void *arg);
+
+ // update listen status
+ bool updateListen();
+
+ // dispatch incoming requests
+ void dispatchRequests();
+
+ // discard any pending requests during shutdown
+ void discardRequests();
+
+ // convenience method used to log packets
+ static void logPacket(const vespalib::stringref &msg, FNET_Packet *p, FNET_Channel *ch, FNET_Connection *conn);
+
+ void updateQueryMetrics(double latency_s);
+ void updateDocsumMetrics(double latency_s, uint32_t numDocs);
+
+public:
+ /**
+ * Convenience typedes.
+ */
+ typedef std::unique_ptr<TransportServer> UP;
+ typedef std::shared_ptr<TransportServer> SP;
+
+ /** no debug logging flags set **/
+ const static uint32_t DEBUG_NONE = 0x00000000;
+
+ /** log connect disconnect from clients **/
+ const static uint32_t DEBUG_CONNECTION = 0x00000001;
+
+ /** log channel open events **/
+ const static uint32_t DEBUG_CHANNEL = 0x00000002;
+
+ /** log search related packets **/
+ const static uint32_t DEBUG_SEARCH = 0x00000004;
+
+ /** log docsum related packets **/
+ const static uint32_t DEBUG_DOCSUM = 0x00000008;
+
+ /** log monitor related packets **/
+ const static uint32_t DEBUG_MONITOR = 0x00000010;
+
+ /** log unhandled packets **/
+ const static uint32_t DEBUG_UNHANDLED = 0x00000020;
+
+ /** all debug logging flags set **/
+ const static uint32_t DEBUG_ALL = 0x0000003f;
+
+ /**
+ * Check if we should log a debug message
+ *
+ * @return true if we should log a message for this event
+ * @param msgType the event we might want to log
+ **/
+ bool shouldLog(uint32_t msgType);
+
+ /**
+ * Create a transport server based on the given underlying api
+ * objects. An appropriate debug mask can be made by or'ing
+ * together the appropriate DEBUG_ constants defined in this
+ * class.
+ *
+ * @param searchServer search api
+ * @param docsumServer docsum api
+ * @param monitorServer monitor api
+ * @param port listen port.
+ * @param debugMask mask indicating what information should be logged as debug messages.
+ **/
+ TransportServer(SearchServer &searchServer,
+ DocsumServer &docsumServer,
+ MonitorServer &monitorServer,
+ int port, uint32_t debugMask = DEBUG_NONE);
+
+ /**
+ * Obtain the metrics used by this transport server.
+ *
+ * @return internal metrics
+ **/
+ TransportMetrics &getMetrics() { return _metrics; }
+
+ /**
+ * Obtain the listen spec used by this transport server
+ *
+ * @return listen spec
+ **/
+ const vespalib::string &getListenSpec() const { return _listenSpec; }
+
+ /**
+ * Start this server.
+ *
+ * @return success(true)/failure(false)
+ **/
+ bool start();
+
+ /**
+ * Check for initial readyness.
+ *
+ * @return true if we are ready.
+ **/
+ bool isReady() const { return _ready; }
+
+ /**
+ * Check if a critical error has occurred.
+ *
+ * @return true if something bad has happened.
+ **/
+ bool isFailed() const { return _failed; }
+
+ /**
+ * Get a reference to the internal fnet scheduler.
+ *
+ * @return fnet scheduler
+ **/
+ FNET_Scheduler &getScheduler() { return *(_transport.GetScheduler()); }
+
+ /**
+ * Set a flag indicating whether we should accept incoming
+ * requests or not. Setting the flag to false will make this
+ * server unavailable to any client application.
+ *
+ * @param listen flag indicating if we should listen
+ **/
+ void setListen(bool listen) {
+ _doListen = listen;
+ _listenTask.ScheduleNow();
+ }
+
+ /**
+ * Check which port this server is currently listening to. This
+ * method is useful when using automatically allocated port
+ * numbers (listening to port 0).
+ *
+ * @return current listening port number, -1 if not listening.
+ **/
+ int getListenPort();
+
+ /**
+ * Enable or disable nagles algorithm.
+ *
+ * @param noDelay set to true to disable nagles algorithm
+ **/
+ void setTCPNoDelay(bool noDelay) { _transport.SetTCPNoDelay(noDelay); }
+
+ /**
+ * Enable or disable the use of a Q for throughput between search thread and network thread.
+ *
+ * @param directWrite bypasses Q
+ **/
+ void setDirectWrite(bool directWrite) { _transport.SetDirectWrite(directWrite); }
+
+ /**
+ * Set a limit on how long a connection may be idle before closing it.
+ *
+ * @param millisecs max idle time in milliseconds
+ **/
+ void setIdleTimeout(double millisecs) { _transport.SetIOCTimeOut((uint32_t) millisecs); }
+
+ /**
+ * Shut down this component. This method will block until the
+ * transport server has been shut down. After this method returns,
+ * no new requests will be generated by this component.
+ **/
+ void shutDown() {
+ _transport.ShutDown(false);
+ _threadPool.Close();
+ }
+
+ /**
+ * Destructor will perform shutdown if needed.
+ **/
+ virtual ~TransportServer();
+};
+
+} // namespace engine
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/expression/.gitignore b/searchlib/src/vespa/searchlib/expression/.gitignore
new file mode 100644
index 00000000000..ee8938b6bf4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/.gitignore
@@ -0,0 +1,6 @@
+*.So
+*.exe
+*.ilk
+*.pdb
+.depend*
+Makefile
diff --git a/searchlib/src/vespa/searchlib/expression/CMakeLists.txt b/searchlib/src/vespa/searchlib/expression/CMakeLists.txt
new file mode 100644
index 00000000000..8184765167d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/CMakeLists.txt
@@ -0,0 +1,32 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_expression OBJECT
+ SOURCES
+ perdocexpression.cpp
+ expressiontree.cpp
+ timestamp.cpp
+ bucketresultnode.cpp
+ integerbucketresultnode.cpp
+ floatbucketresultnode.cpp
+ stringbucketresultnode.cpp
+ rawbucketresultnode.cpp
+ fixedwidthbucketfunctionnode.cpp
+ rangebucketpredef.cpp
+ resultvector.cpp
+ catserializer.cpp
+ strcatserializer.cpp
+ documentfieldnode.cpp
+ attributenode.cpp
+ zcurve.cpp
+ ucafunctionnode.cpp
+ debugwaitfunctionnode.cpp
+ mathfunctionnode.cpp
+ numericfunctionnode.cpp
+ resultnode.cpp
+ interpolatedlookupfunctionnode.cpp
+ functionnodes.cpp
+ resultnodes.cpp
+ arrayatlookupfunctionnode.cpp
+ arrayoperationnode.cpp
+ aggregationrefnode.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/expression/OWNERS b/searchlib/src/vespa/searchlib/expression/OWNERS
new file mode 100644
index 00000000000..1037590124e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/OWNERS
@@ -0,0 +1 @@
+balder
diff --git a/searchlib/src/vespa/searchlib/expression/addfunctionnode.h b/searchlib/src/vespa/searchlib/expression/addfunctionnode.h
new file mode 100644
index 00000000000..c2fc34e99be
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/addfunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/numericfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class AddFunctionNode : public NumericFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(AddFunctionNode);
+ AddFunctionNode() { }
+private:
+ virtual void onArgument(const ResultNode & arg, ResultNode & result) const;
+ virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const { return v.flattenSum(result); }
+ virtual ResultNode::CP getInitialValue() const { return ResultNode::CP(new Int64ResultNode(0)); }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/aggregationrefnode.cpp b/searchlib/src/vespa/searchlib/expression/aggregationrefnode.cpp
new file mode 100644
index 00000000000..52774fa7234
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/aggregationrefnode.cpp
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/aggregationrefnode.h>
+#include <iostream>
+
+#include <stdexcept>
+
+namespace search {
+namespace expression {
+
+using namespace vespalib;
+
+IMPLEMENT_EXPRESSIONNODE(AggregationRefNode, ExpressionNode);
+
+AggregationRefNode::AggregationRefNode(const AggregationRefNode & rhs) :
+ ExpressionNode(),
+ _index(rhs._index),
+ _expressionNode(NULL)
+{
+}
+
+AggregationRefNode & AggregationRefNode::operator = (const AggregationRefNode & expr)
+{
+ if (this != &expr) {
+ _index = expr._index;
+ _expressionNode = NULL;
+ }
+ return *this;
+}
+
+bool AggregationRefNode::onExecute() const
+{
+ if (_expressionNode != NULL) {
+ return _expressionNode->execute();
+ }
+ return false;
+}
+
+void AggregationRefNode::locateExpression(ExpressionNodeArray & exprVec) const
+{
+ if (_expressionNode == NULL) {
+ _expressionNode = static_cast<ExpressionNode *>(exprVec[_index].get());
+ if (_expressionNode == NULL) {
+ throw std::runtime_error(make_string("Failed locating expression for index '%d'", _index));
+ }
+ }
+}
+
+Serializer & AggregationRefNode::onSerialize(Serializer & os) const
+{
+ return os << _index;
+}
+
+Deserializer & AggregationRefNode::onDeserialize(Deserializer & is)
+{
+ return is >> _index;
+}
+
+void
+AggregationRefNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "index", _index);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_expressionrefnode() {}
diff --git a/searchlib/src/vespa/searchlib/expression/aggregationrefnode.h b/searchlib/src/vespa/searchlib/expression/aggregationrefnode.h
new file mode 100644
index 00000000000..190682cb534
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/aggregationrefnode.h
@@ -0,0 +1,50 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/aggregation/aggregationresult.h>
+#include <vespa/searchlib/expression/expressionnode.h>
+#include <vespa/searchlib/expression/resultvector.h>
+#include <vespa/searchlib/expression/serializer.h>
+#include <vespa/vespalib/objects/objectoperation.h>
+#include <vespa/vespalib/objects/objectpredicate.h>
+#include <iostream>
+
+namespace search {
+namespace expression {
+
+class AggregationRefNode : public ExpressionNode
+{
+public:
+ DECLARE_NBO_SERIALIZE;
+ class Configure : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+ {
+ public:
+ Configure(ExpressionNodeArray & exprVec) : _exprVec(exprVec) { }
+ private:
+ virtual void execute(vespalib::Identifiable &obj) { static_cast<AggregationRefNode&>(obj).locateExpression(_exprVec); }
+ virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(AggregationRefNode::classId); }
+ ExpressionNodeArray & _exprVec;
+ };
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+
+ DECLARE_EXPRESSIONNODE(AggregationRefNode);
+ AggregationRefNode() : _index(0), _expressionNode(NULL) { }
+ AggregationRefNode(uint32_t index) : _index(index), _expressionNode(NULL) { }
+ AggregationRefNode(const AggregationRefNode & rhs);
+ AggregationRefNode & operator = (const AggregationRefNode & exprref);
+
+ ExpressionNode *getExpression() { return _expressionNode; }
+ virtual const ResultNode & getResult() const { return _expressionNode->getResult(); }
+ virtual void onPrepare(bool preserveAccurateTypes) { _expressionNode->prepare(preserveAccurateTypes); }
+ virtual bool onExecute() const;
+
+private:
+ void locateExpression(ExpressionNodeArray & exprVec) const;
+
+ uint32_t _index;
+ mutable ExpressionNode *_expressionNode;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/andfunctionnode.h b/searchlib/src/vespa/searchlib/expression/andfunctionnode.h
new file mode 100644
index 00000000000..cc8d89c669c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/andfunctionnode.h
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/bitfunctionnode.h>
+#include <vespa/searchlib/expression/integerresultnode.h>
+
+namespace search {
+namespace expression {
+
+class AndFunctionNode : public BitFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(AndFunctionNode);
+ AndFunctionNode() { }
+private:
+ virtual ResultNode::CP getInitialValue() const { return ResultNode::CP(new Int64ResultNode(-1)); }
+ virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const { return v.flattenAnd(result); }
+
+ virtual void onArgument(const ResultNode & arg, Int64ResultNode & result) const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.cpp b/searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.cpp
new file mode 100644
index 00000000000..b112aa90969
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.cpp
@@ -0,0 +1,163 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/arrayatlookupfunctionnode.h>
+#include <vespa/searchlib/expression/floatresultnode.h>
+#include <vespa/searchlib/expression/integerresultnode.h>
+#include <vespa/searchlib/expression/stringresultnode.h>
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vespa/searchlib/common/converters.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+namespace search {
+namespace expression {
+
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+IMPLEMENT_EXPRESSIONNODE(ArrayAtLookup, UnaryFunctionNode);
+
+ArrayAtLookup::ArrayAtLookup()
+{
+}
+
+ArrayAtLookup::~ArrayAtLookup()
+{
+}
+
+ArrayAtLookup::ArrayAtLookup(const vespalib::string &attribute,
+ const ExpressionNode::CP &arg)
+ : UnaryFunctionNode(arg),
+ _attributeName(attribute)
+{
+}
+
+ArrayAtLookup::ArrayAtLookup(const search::attribute::IAttributeVector &attr,
+ const ExpressionNode::CP &indexArg)
+ : UnaryFunctionNode(indexArg),
+ _attributeName(attr.getName()),
+ _attribute(&attr)
+{
+}
+
+
+ArrayAtLookup::ArrayAtLookup(const ArrayAtLookup &rhs) :
+ UnaryFunctionNode(rhs),
+ _attributeName(rhs._attributeName),
+ _attribute(rhs._attribute),
+ _docId(rhs._docId),
+ _basicAttributeType(rhs._basicAttributeType)
+{
+ // why?
+ _docId = 0;
+}
+
+ArrayAtLookup & ArrayAtLookup::operator= (const ArrayAtLookup &rhs)
+{
+ if (this != &rhs) {
+ UnaryFunctionNode::operator =(rhs);
+ _attributeName = rhs._attributeName;
+ _attribute = rhs._attribute;
+ // _docId = rhs._docId;
+ _docId = 0;
+ _basicAttributeType = rhs._basicAttributeType;
+ }
+ return *this;
+}
+
+void ArrayAtLookup::onPrepareResult()
+{
+ if (_attribute->isIntegerType()) {
+ _basicAttributeType = BAT_INT;
+ setResultType(std::unique_ptr<ResultNode>(new Int64ResultNode()));
+ } else if (_attribute->isFloatingPointType()) {
+ _basicAttributeType = BAT_FLOAT;
+ setResultType(std::unique_ptr<ResultNode>(new FloatResultNode()));
+ } else {
+ _basicAttributeType = BAT_STRING;
+ setResultType(std::unique_ptr<ResultNode>(new StringResultNode()));
+ }
+}
+
+bool ArrayAtLookup::onExecute() const
+{
+ getArg().execute();
+ int64_t idx = getArg().getResult().getInteger();
+ // get attribute data
+ size_t numValues = _attribute->getValueCount(_docId);
+ if (idx < 0) {
+ idx = 0;
+ }
+ if (idx >= (int64_t)numValues) {
+ idx = numValues - 1;
+ }
+
+ if (_basicAttributeType == BAT_FLOAT) {
+ std::vector<search::attribute::IAttributeVector::WeightedFloat> wVector;
+ wVector.resize(numValues);
+ _attribute->get(_docId, &wVector[0], numValues);
+ std::vector<double> tmp;
+ tmp.resize(numValues);
+ for (size_t i = 0; i < numValues; ++i) {
+ tmp[i] = wVector[i].getValue();
+ }
+ double result = 0;
+ if (idx >= 0 && idx < (int64_t)numValues) {
+ result = tmp[idx];
+ }
+ static_cast<FloatResultNode &>(updateResult()).set(result);
+ } else if (_basicAttributeType == BAT_INT) {
+ std::vector<search::attribute::IAttributeVector::WeightedInt> wVector;
+ wVector.resize(numValues);
+ _attribute->get(_docId, &wVector[0], numValues);
+ std::vector<int64_t> tmp;
+ tmp.resize(numValues);
+ for (size_t i = 0; i < numValues; ++i) {
+ tmp[i] = wVector[i].getValue();
+ }
+ int64_t result = 0;
+ if (idx >= 0 && idx < (int64_t)numValues) {
+ result = tmp[idx];
+ }
+ static_cast<Int64ResultNode &>(updateResult()).set(result);
+ } else {
+ std::vector<search::attribute::IAttributeVector::WeightedString> wVector;
+ wVector.resize(numValues);
+ _attribute->get(_docId, &wVector[0], numValues);
+ std::vector<vespalib::string> tmp;
+ tmp.resize(numValues);
+ for (size_t i = 0; i < numValues; ++i) {
+ tmp[i] = wVector[i].getValue();
+ }
+ vespalib::string result;
+ if (idx >= 0 && idx < (int64_t)numValues) {
+ result = tmp[idx];
+ }
+ static_cast<StringResultNode &>(updateResult()).set(result);
+ }
+ return true;
+}
+
+void ArrayAtLookup::wireAttributes(const search::attribute::IAttributeContext & attrCtx)
+{
+ _attribute = attrCtx.getAttribute(_attributeName);
+ if (_attribute == NULL) {
+ throw std::runtime_error(vespalib::make_string("Failed locating attribute vector '%s'", _attributeName.c_str()));
+ }
+}
+
+Serializer & ArrayAtLookup::onSerialize(Serializer & os) const
+{
+ UnaryFunctionNode::onSerialize(os);
+ os << _attributeName;
+ return os;
+}
+
+Deserializer & ArrayAtLookup::onDeserialize(Deserializer & is)
+{
+ UnaryFunctionNode::onDeserialize(is);
+ is >> _attributeName;
+ return is;
+}
+
+} // namespace expression
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.h b/searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.h
new file mode 100644
index 00000000000..271543126da
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.h
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class ArrayAtLookup : public UnaryFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(ArrayAtLookup);
+ DECLARE_NBO_SERIALIZE;
+
+ ArrayAtLookup();
+ ~ArrayAtLookup();
+
+ ArrayAtLookup(const vespalib::string &attribute,
+ const ExpressionNode::CP & arg);
+
+ ArrayAtLookup(const search::attribute::IAttributeVector &attr,
+ const ExpressionNode::CP &indexArg);
+
+ ArrayAtLookup(const ArrayAtLookup &rhs);
+
+ ArrayAtLookup & operator= (const ArrayAtLookup &rhs);
+
+ void setDocId(DocId docId) { _docId = docId; }
+private:
+ virtual bool onExecute() const;
+ virtual void onPrepareResult();
+ virtual void wireAttributes(const search::attribute::IAttributeContext &attrCtx);
+
+ enum BasicAttributeType {
+ BAT_INT, BAT_FLOAT, BAT_STRING
+ };
+
+ vespalib::string _attributeName = vespalib::string();
+ const search::attribute::IAttributeVector * _attribute = 0;
+ DocId _docId = 0;
+ BasicAttributeType _basicAttributeType = BAT_STRING;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/arrayoperationnode.cpp b/searchlib/src/vespa/searchlib/expression/arrayoperationnode.cpp
new file mode 100644
index 00000000000..eccc1dfe02f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/arrayoperationnode.cpp
@@ -0,0 +1,66 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "arrayoperationnode.h"
+#include <vespa/vespalib/util/stringfmt.h>
+
+namespace search {
+namespace expression {
+
+IMPLEMENT_ABSTRACT_EXPRESSIONNODE(ArrayOperationNode, FunctionNode);
+
+ArrayOperationNode::ArrayOperationNode()
+ : FunctionNode(), _attributeName(), _attribute(0), _docId(0)
+{}
+
+ArrayOperationNode::ArrayOperationNode(const ArrayOperationNode& rhs)
+ : FunctionNode(),
+ _attributeName(rhs._attributeName),
+ _attribute(rhs._attribute),
+ _docId(0)
+{}
+
+// for unit testing
+ArrayOperationNode::ArrayOperationNode(IAttributeVector &attr)
+ : FunctionNode(),
+ _attributeName(attr.getName()),
+ _attribute(&attr),
+ _docId(0)
+{}
+
+ArrayOperationNode&
+ArrayOperationNode::operator= (const ArrayOperationNode& rhs)
+{
+ _attributeName = rhs._attributeName;
+ _attribute = rhs._attribute;
+ _docId = 0;
+ return *this;
+}
+
+void
+ArrayOperationNode::wireAttributes(const IAttributeContext &attrCtx)
+{
+ _attribute = attrCtx.getAttribute(_attributeName);
+ if (_attribute == NULL) {
+ throw std::runtime_error(vespalib::make_string("Failed locating attribute vector '%s'", _attributeName.c_str()));
+ }
+}
+
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+Serializer & ArrayOperationNode::onSerialize(Serializer & os) const
+{
+ FunctionNode::onSerialize(os);
+ os << _attributeName;
+ return os;
+}
+
+Deserializer & ArrayOperationNode::onDeserialize(Deserializer & is)
+{
+ FunctionNode::onDeserialize(is);
+ is >> _attributeName;
+ return is;
+}
+
+} // namespace expression
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/expression/arrayoperationnode.h b/searchlib/src/vespa/searchlib/expression/arrayoperationnode.h
new file mode 100644
index 00000000000..6afcbdcccaf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/arrayoperationnode.h
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/functionnode.h>
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+namespace search {
+namespace expression {
+
+class ArrayOperationNode : public FunctionNode
+{
+public:
+ typedef search::attribute::IAttributeVector IAttributeVector;
+ typedef search::attribute::IAttributeContext IAttributeContext;
+
+ DECLARE_NBO_SERIALIZE;
+ DECLARE_ABSTRACT_EXPRESSIONNODE(ArrayOperationNode);
+
+ ArrayOperationNode();
+ ArrayOperationNode(const ArrayOperationNode& rhs);
+ // for unit testing
+ ArrayOperationNode(IAttributeVector &attr);
+
+ ArrayOperationNode& operator= (const ArrayOperationNode& rhs);
+
+ void setDocId(DocId newDocId) { _docId = newDocId; }
+
+ virtual void wireAttributes(const IAttributeContext &attrCtx);
+
+protected:
+ DocId docId() const { return _docId; }
+
+ const IAttributeVector& attribute() const {
+ return *_attribute;
+ }
+
+private:
+ vespalib::string _attributeName;
+ const search::attribute::IAttributeVector * _attribute;
+ DocId _docId;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/attributenode.cpp b/searchlib/src/vespa/searchlib/expression/attributenode.cpp
new file mode 100644
index 00000000000..558177a7972
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/attributenode.cpp
@@ -0,0 +1,283 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/attributenode.h>
+#include <vespa/searchlib/attribute/singleenumattribute.h>
+#include <stdexcept>
+
+namespace search {
+namespace expression {
+
+using namespace vespalib;
+using search::attribute::IAttributeContext;
+using search::attribute::IAttributeVector;
+using search::attribute::BasicType;
+
+IMPLEMENT_EXPRESSIONNODE(AttributeNode, FunctionNode);
+IMPLEMENT_RESULTNODE(AttributeResult, ResultNode);
+
+namespace {
+
+class EnumAttributeResult : public AttributeResult
+{
+public:
+ DECLARE_RESULTNODE(EnumAttributeResult);
+ EnumAttributeResult(const attribute::IAttributeVector * attribute, DocId docId) :
+ AttributeResult(attribute, docId),
+ _enumAttr(dynamic_cast<const SingleValueEnumAttributeBase *>(attribute))
+ {
+ }
+private:
+ EnumAttributeResult() :
+ AttributeResult(),
+ _enumAttr(NULL)
+ { }
+ int64_t onGetEnum(size_t index) const override { (void) index; return (static_cast<int64_t>(_enumAttr->getE(getDocId()))); }
+ const SingleValueEnumAttributeBase * _enumAttr;
+};
+
+IMPLEMENT_RESULTNODE(EnumAttributeResult, AttributeResult);
+
+AttributeResult::UP createResult(const IAttributeVector * attribute)
+{
+ return (dynamic_cast<const SingleValueEnumAttributeBase *>(attribute) != NULL)
+ ? AttributeResult::UP(new EnumAttributeResult(attribute, 0))
+ : AttributeResult::UP(new AttributeResult(attribute, 0));
+}
+
+}
+
+AttributeNode::AttributeNode() :
+ FunctionNode(),
+ _scratchResult(new AttributeResult()),
+ _hasMultiValue(false),
+ _useEnumOptimization(false),
+ _handler(),
+ _attributeName()
+{
+}
+
+AttributeNode::AttributeNode(const vespalib::stringref &name) :
+ FunctionNode(),
+ _scratchResult(new AttributeResult()),
+ _hasMultiValue(false),
+ _useEnumOptimization(false),
+ _handler(),
+ _attributeName(name)
+{
+}
+AttributeNode::AttributeNode(const IAttributeVector & attribute) :
+ FunctionNode(),
+ _scratchResult(createResult(&attribute)),
+ _hasMultiValue(attribute.hasMultiValue()),
+ _useEnumOptimization(false),
+ _handler(),
+ _attributeName(attribute.getName())
+{
+}
+
+AttributeNode::AttributeNode(const AttributeNode & attribute) :
+ FunctionNode(attribute),
+ _scratchResult(attribute._scratchResult->clone()),
+ _hasMultiValue(attribute._hasMultiValue),
+ _useEnumOptimization(attribute._useEnumOptimization),
+ _handler(),
+ _attributeName(attribute._attributeName)
+{
+ _scratchResult->setDocId(0);
+}
+
+AttributeNode & AttributeNode::operator = (const AttributeNode & attr)
+{
+ if (this != &attr) {
+ FunctionNode::operator = (attr);
+ _attributeName = attr._attributeName;
+ _hasMultiValue = attr._hasMultiValue;
+ _useEnumOptimization = attr._useEnumOptimization;
+ _scratchResult.reset(attr._scratchResult->clone());
+ _scratchResult->setDocId(0);
+ }
+ return *this;
+}
+
+void AttributeNode::onPrepare(bool preserveAccurateTypes)
+{
+ const IAttributeVector * attribute = _scratchResult->getAttribute();
+ if (attribute != NULL) {
+ BasicType::Type basicType = attribute->getBasicType();
+ if (attribute->isIntegerType()) {
+ if (_hasMultiValue) {
+ if (preserveAccurateTypes) {
+ switch (basicType) {
+ case BasicType::INT8:
+ setResultType(std::unique_ptr<ResultNode>(new Int8ResultNodeVector()));
+ break;
+ case BasicType::INT16:
+ setResultType(std::unique_ptr<ResultNode>(new Int16ResultNodeVector()));
+ break;
+ case BasicType::INT32:
+ setResultType(std::unique_ptr<ResultNode>(new Int32ResultNodeVector()));
+ break;
+ case BasicType::INT64:
+ setResultType(std::unique_ptr<ResultNode>(new Int64ResultNodeVector()));
+ break;
+ default:
+ throw std::runtime_error("This is no valid integer attribute " + attribute->getName());
+ break;
+ }
+ } else {
+ setResultType(std::unique_ptr<ResultNode>(new IntegerResultNodeVector()));
+ }
+ _handler.reset(new IntegerHandler(updateResult()));
+ } else {
+ if (preserveAccurateTypes) {
+ switch (basicType) {
+ case BasicType::INT8:
+ setResultType(std::unique_ptr<ResultNode>(new Int8ResultNode()));
+ break;
+ case BasicType::INT16:
+ setResultType(std::unique_ptr<ResultNode>(new Int16ResultNode()));
+ break;
+ case BasicType::INT32:
+ setResultType(std::unique_ptr<ResultNode>(new Int32ResultNode()));
+ break;
+ case BasicType::INT64:
+ setResultType(std::unique_ptr<ResultNode>(new Int64ResultNode()));
+ break;
+ default:
+ throw std::runtime_error("This is no valid integer attribute " + attribute->getName());
+ break;
+ }
+ } else {
+ setResultType(std::unique_ptr<ResultNode>(new Int64ResultNode()));
+ }
+ }
+ } else if (attribute->isFloatingPointType()) {
+ if (_hasMultiValue) {
+ setResultType(std::unique_ptr<ResultNode>(new FloatResultNodeVector()));
+ _handler.reset(new FloatHandler(updateResult()));
+ } else {
+ setResultType(std::unique_ptr<ResultNode>(new FloatResultNode()));
+ }
+ } else if (attribute->isStringType()) {
+ if (_hasMultiValue) {
+ if (_useEnumOptimization) {
+ setResultType(std::unique_ptr<ResultNode>(new EnumResultNodeVector()));
+ _handler.reset(new EnumHandler(updateResult()));
+ } else {
+ setResultType(std::unique_ptr<ResultNode>(new StringResultNodeVector()));
+ _handler.reset(new StringHandler(updateResult()));
+ }
+ } else {
+ if (_useEnumOptimization) {
+ setResultType(std::unique_ptr<ResultNode>(new EnumResultNode()));
+ } else {
+ setResultType(std::unique_ptr<ResultNode>(new StringResultNode()));
+ }
+ }
+ } else {
+ throw std::runtime_error(make_string("Can not deduce correct resultclass for attribute vector '%s'",
+ attribute->getName().c_str()));
+ }
+ }
+}
+
+void AttributeNode::IntegerHandler::handle(const AttributeResult & r)
+{
+ size_t numValues = r.getAttribute()->getValueCount(r.getDocId());
+ _vector.resize(numValues);
+ _wVector.resize(numValues);
+ r.getAttribute()->get(r.getDocId(), &_wVector[0], _wVector.size());
+ for(size_t i(0); i < numValues; i++) {
+ _vector[i] = _wVector[i].getValue();
+ }
+}
+
+void AttributeNode::FloatHandler::handle(const AttributeResult & r)
+{
+ size_t numValues = r.getAttribute()->getValueCount(r.getDocId());
+ _vector.resize(numValues);
+ _wVector.resize(numValues);
+ r.getAttribute()->get(r.getDocId(), &_wVector[0], _wVector.size());
+ for(size_t i(0); i < numValues; i++) {
+ _vector[i] = _wVector[i].getValue();
+ }
+}
+
+void AttributeNode::StringHandler::handle(const AttributeResult & r)
+{
+ size_t numValues = r.getAttribute()->getValueCount(r.getDocId());
+ _vector.resize(numValues);
+ _wVector.resize(numValues);
+ r.getAttribute()->get(r.getDocId(), &_wVector[0], _wVector.size());
+ for(size_t i(0); i < numValues; i++) {
+ _vector[i] = _wVector[i].getValue();
+ }
+}
+
+void AttributeNode::EnumHandler::handle(const AttributeResult & r)
+{
+ size_t numValues = r.getAttribute()->getValueCount(r.getDocId());
+ _vector.resize(numValues);
+ _wVector.resize(numValues);
+ r.getAttribute()->get(r.getDocId(), &_wVector[0], _wVector.size());
+ for(size_t i(0); i < numValues; i++) {
+ _vector[i] = _wVector[i].getValue();
+ }
+}
+
+bool AttributeNode::onExecute() const
+{
+ if (_hasMultiValue) {
+ _handler->handle(*_scratchResult);
+ } else {
+ updateResult().set(*_scratchResult);
+ }
+ return true;
+}
+
+void AttributeNode::wireAttributes(const IAttributeContext & attrCtx)
+{
+ const IAttributeVector * attribute(_scratchResult ? _scratchResult->getAttribute() : nullptr);
+ if (attribute == NULL) {
+ if (_useEnumOptimization) {
+ attribute = attrCtx.getAttributeStableEnum(_attributeName);
+ } else {
+ attribute = attrCtx.getAttribute(_attributeName);
+ }
+ if (attribute == NULL) {
+ throw std::runtime_error(make_string("Failed locating attribute vector '%s'", _attributeName.c_str()));
+ }
+ _hasMultiValue = attribute->hasMultiValue();
+ _scratchResult = createResult(attribute);
+ }
+}
+
+void AttributeNode::cleanup()
+{
+ _scratchResult.reset();
+}
+
+Serializer & AttributeNode::onSerialize(Serializer & os) const
+{
+ FunctionNode::onSerialize(os);
+ return os << _attributeName;
+}
+
+Deserializer & AttributeNode::onDeserialize(Deserializer & is)
+{
+ FunctionNode::onDeserialize(is);
+
+ return is >> _attributeName;
+}
+
+void
+AttributeNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "attributeName", _attributeName);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_attributenode() {}
diff --git a/searchlib/src/vespa/searchlib/expression/attributenode.h b/searchlib/src/vespa/searchlib/expression/attributenode.h
new file mode 100644
index 00000000000..c55acff2808
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/attributenode.h
@@ -0,0 +1,158 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/functionnode.h>
+#include <vespa/searchlib/expression/resultvector.h>
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vespa/vespalib/objects/objectoperation.h>
+#include <vespa/vespalib/objects/objectpredicate.h>
+
+namespace search {
+namespace expression {
+
+class AttributeResult : public ResultNode
+{
+public:
+ typedef std::unique_ptr<AttributeResult> UP;
+ DECLARE_RESULTNODE(AttributeResult);
+ AttributeResult() : _attribute(NULL), _docId(0) { }
+ AttributeResult(const attribute::IAttributeVector * attribute, DocId docId) :
+ _attribute(attribute),
+ _docId(docId)
+ { }
+ void setDocId(DocId docId) { _docId = docId; }
+ const search::attribute::IAttributeVector *getAttribute() const { return _attribute; }
+ DocId getDocId() const { return _docId; }
+private:
+ virtual int64_t onGetInteger(size_t index) const { (void) index; return _attribute->getInt(_docId); }
+ virtual double onGetFloat(size_t index) const { (void) index; return _attribute->getFloat(_docId); }
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const {
+ (void) index;
+ const char * t = _attribute->getString(_docId, buf.str(), buf.size());
+ return ConstBufferRef(t, strlen(t));
+ }
+ int64_t onGetEnum(size_t index) const override { (void) index; return (static_cast<int64_t>(_attribute->getEnum(_docId))); }
+ virtual void set(const search::expression::ResultNode&) { }
+ virtual size_t hash() const { return _docId; }
+
+ const search::attribute::IAttributeVector * _attribute;
+ DocId _docId;
+};
+
+class AttributeNode : public FunctionNode
+{
+ typedef vespalib::BufferRef BufferRef;
+ typedef vespalib::ConstBufferRef ConstBufferRef;
+public:
+ DECLARE_NBO_SERIALIZE;
+ class Configure : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+ {
+ public:
+ Configure(const search::attribute::IAttributeContext & attrCtx) : _attrCtx(attrCtx) { }
+ private:
+ virtual void execute(vespalib::Identifiable &obj) {
+ static_cast<ExpressionNode &>(obj).wireAttributes(_attrCtx);
+ obj.selectMembers(*this, *this);
+ }
+ virtual bool check(const vespalib::Identifiable &obj) const {
+ return obj.inherits(ExpressionNode::classId);
+ }
+ const search::attribute::IAttributeContext & _attrCtx;
+ };
+
+ class CleanupAttributeReferences : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+ {
+ private:
+ virtual void execute(vespalib::Identifiable &obj) { static_cast<AttributeNode &>(obj).cleanup(); }
+ virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(AttributeNode::classId); }
+ };
+
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ DECLARE_EXPRESSIONNODE(AttributeNode);
+ AttributeNode();
+ AttributeNode(const vespalib::stringref &name);
+ AttributeNode(const search::attribute::IAttributeVector & attribute);
+ AttributeNode(const AttributeNode & attribute);
+ AttributeNode & operator = (const AttributeNode & attribute);
+ void setDocId(DocId docId) const { _scratchResult->setDocId(docId); }
+ const search::attribute::IAttributeVector *getAttribute() const {
+ return _scratchResult ? _scratchResult->getAttribute() : nullptr;
+ }
+ const vespalib::string & getAttributeName() const { return _attributeName; }
+
+ void useEnumOptimization(bool use=true) { _useEnumOptimization = use; }
+ bool hasMultiValue() const { return _hasMultiValue; }
+private:
+ void cleanup();
+ virtual void wireAttributes(const search::attribute::IAttributeContext & attrCtx);
+ virtual void onPrepare(bool preserveAccurateTypes);
+ virtual bool onExecute() const;
+ class Handler
+ {
+ public:
+ virtual ~Handler() { }
+ virtual void handle(const AttributeResult & r) = 0;
+ };
+ class IntegerHandler : public Handler
+ {
+ public:
+ IntegerHandler(ResultNode & result) :
+ Handler(),
+ _vector(((IntegerResultNodeVector &)result).getVector()),
+ _wVector()
+ { }
+ virtual void handle(const AttributeResult & r);
+ private:
+ IntegerResultNodeVector::Vector & _vector;
+ mutable std::vector<search::attribute::IAttributeVector::WeightedInt> _wVector;
+ };
+ class FloatHandler : public Handler
+ {
+ public:
+ FloatHandler(ResultNode & result) :
+ Handler(),
+ _vector(((FloatResultNodeVector &)result).getVector()),
+ _wVector()
+ { }
+ virtual void handle(const AttributeResult & r);
+ private:
+ FloatResultNodeVector::Vector & _vector;
+ mutable std::vector<search::attribute::IAttributeVector::WeightedFloat> _wVector;
+ };
+ class StringHandler : public Handler
+ {
+ public:
+ StringHandler(ResultNode & result) :
+ Handler(),
+ _vector(((StringResultNodeVector &)result).getVector()),
+ _wVector()
+ { }
+ virtual void handle(const AttributeResult & r);
+ private:
+ StringResultNodeVector::Vector & _vector;
+ mutable std::vector<search::attribute::IAttributeVector::WeightedConstChar> _wVector;
+ };
+ class EnumHandler : public Handler
+ {
+ public:
+ EnumHandler(ResultNode & result) :
+ Handler(),
+ _vector(((EnumResultNodeVector &)result).getVector()),
+ _wVector()
+ { }
+ virtual void handle(const AttributeResult & r);
+ private:
+ EnumResultNodeVector::Vector &_vector;
+ mutable std::vector<search::attribute::IAttributeVector::WeightedEnum> _wVector;
+ };
+
+ mutable AttributeResult::UP _scratchResult;
+ mutable bool _hasMultiValue;
+ mutable bool _useEnumOptimization;
+ std::unique_ptr<Handler> _handler;
+ vespalib::string _attributeName;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/binaryfunctionnode.h b/searchlib/src/vespa/searchlib/expression/binaryfunctionnode.h
new file mode 100644
index 00000000000..b3872bf4a92
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/binaryfunctionnode.h
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/multiargfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class BinaryFunctionNode : public MultiArgFunctionNode
+{
+public:
+ DECLARE_ABSTRACT_EXPRESSIONNODE(BinaryFunctionNode);
+ BinaryFunctionNode() { }
+ BinaryFunctionNode(const ExpressionNode::CP & arg1, const ExpressionNode::CP & arg2) :
+ MultiArgFunctionNode()
+ {
+ appendArg(arg1);
+ appendArg(arg2);
+ }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/bitfunctionnode.h b/searchlib/src/vespa/searchlib/expression/bitfunctionnode.h
new file mode 100644
index 00000000000..1a2529d1038
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/bitfunctionnode.h
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/multiargfunctionnode.h>
+#include <vespa/searchlib/expression/integerresultnode.h>
+
+namespace search {
+namespace expression {
+
+class BitFunctionNode : public NumericFunctionNode
+{
+public:
+ DECLARE_ABSTRACT_EXPRESSIONNODE(BitFunctionNode);
+ BitFunctionNode() { }
+protected:
+ virtual void onPrepareResult();
+private:
+ virtual void onArgument(const ResultNode & arg, Int64ResultNode & result) const = 0;
+ virtual void onArgument(const ResultNode & arg, ResultNode & result) const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/bucketresultnode.cpp b/searchlib/src/vespa/searchlib/expression/bucketresultnode.cpp
new file mode 100644
index 00000000000..e82cef0563a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/bucketresultnode.cpp
@@ -0,0 +1,17 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "bucketresultnode.h"
+
+namespace search {
+namespace expression {
+
+IMPLEMENT_IDENTIFIABLE_ABSTRACT_NS2(search, expression, BucketResultNode, vespalib::Identifiable);
+
+vespalib::FieldBase BucketResultNode::_toField("to");
+vespalib::FieldBase BucketResultNode::_fromField("from");
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_bucketresultnode() {}
diff --git a/searchlib/src/vespa/searchlib/expression/bucketresultnode.h b/searchlib/src/vespa/searchlib/expression/bucketresultnode.h
new file mode 100644
index 00000000000..36f0cff66f5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/bucketresultnode.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "resultnode.h"
+
+namespace search {
+namespace expression {
+
+class BucketResultNode : public ResultNode
+{
+public:
+ DECLARE_ABSTRACT_EXPRESSIONNODE(BucketResultNode);
+ virtual void set(const ResultNode & rhs) { (void) rhs; }
+protected:
+ static vespalib::FieldBase _fromField;
+ static vespalib::FieldBase _toField;
+private:
+ virtual int64_t onGetInteger(size_t index) const { (void) index; return 0; }
+ virtual double onGetFloat(size_t index) const { (void) index; return 0; }
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { (void) index; return buf; }
+ virtual size_t getRawByteSize() const { return onGetRawByteSize(); }
+ virtual size_t onGetRawByteSize() const = 0;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/catfunctionnode.h b/searchlib/src/vespa/searchlib/expression/catfunctionnode.h
new file mode 100644
index 00000000000..375bf6f84b1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/catfunctionnode.h
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/multiargfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class CatFunctionNode : public MultiArgFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(CatFunctionNode);
+ CatFunctionNode() { }
+ CatFunctionNode(const ExpressionNode & arg) { addArg(arg); }
+private:
+ virtual void onPrepare(bool preserveAccurateTypes);
+ virtual void onPrepareResult();
+ virtual bool onExecute() const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/catserializer.cpp b/searchlib/src/vespa/searchlib/expression/catserializer.cpp
new file mode 100644
index 00000000000..bbeca330f14
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/catserializer.cpp
@@ -0,0 +1,79 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "catserializer.h"
+#include "rawresultnode.h"
+#include "resultvector.h"
+#include <vespa/vespalib/util/exception.h>
+
+namespace search {
+namespace expression {
+
+using vespalib::IFieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+using vespalib::string;
+using vespalib::stringref;
+
+CatSerializer & CatSerializer::put(const IFieldBase & field, const stringref & value)
+{
+ (void) field;
+ getStream().write(value.c_str(), value.size());
+ return *this;
+}
+
+CatSerializer & CatSerializer::nop(const IFieldBase & field, const void * value)
+{
+ (void) field;
+ (void) value;
+ throw vespalib::Exception("search::expression::CatSerializer can not deserialize anything as it looses information on serialize");
+ return *this;
+}
+
+CatSerializer & CatSerializer::get(const IFieldBase & field, bool & value) { return nop(field, &value); }
+CatSerializer & CatSerializer::get(const IFieldBase & field, uint8_t & value) { return nop(field, &value); }
+CatSerializer & CatSerializer::get(const IFieldBase & field, uint16_t & value) { return nop(field, &value); }
+CatSerializer & CatSerializer::get(const IFieldBase & field, uint32_t & value) { return nop(field, &value); }
+CatSerializer & CatSerializer::get(const IFieldBase & field, uint64_t & value) { return nop(field, &value); }
+CatSerializer & CatSerializer::get(const IFieldBase & field, double & value) { return nop(field, &value); }
+CatSerializer & CatSerializer::get(const IFieldBase & field, float & value) { return nop(field, &value); }
+CatSerializer & CatSerializer::get(const IFieldBase & field, string & value) { return nop(field, &value); }
+
+CatSerializer & CatSerializer::put(const vespalib::IFieldBase & field, const vespalib::Identifiable & value)
+{
+ (void) field;
+ if (value.inherits(ResultNode::classId)) {
+ static_cast<const ResultNode &>(value).onSerializeResult(*this);
+ } else {
+ value.serializeDirect(*this);
+ }
+ return *this;
+}
+
+ResultSerializer & CatSerializer::putResult(const vespalib::IFieldBase & field, const RawResultNode & value)
+{
+ (void) field;
+ vespalib::ConstBufferRef raw(value.get());
+ getStream().write(raw.c_str(), raw.size());
+ return *this;
+}
+
+ResultSerializer & CatSerializer::putResult(const vespalib::IFieldBase & field, const ResultNodeVector & value)
+{
+ (void) field;
+ size_t sz(value.size());
+ for (size_t i(0); i < sz; i++) {
+ value.get(i).serialize(*this);
+ }
+ return *this;
+}
+
+void CatSerializer::proxyPut(const ResultNode & value)
+{
+ value.serializeDirect(*this);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_catserializer() {}
diff --git a/searchlib/src/vespa/searchlib/expression/catserializer.h b/searchlib/src/vespa/searchlib/expression/catserializer.h
new file mode 100644
index 00000000000..dc25e3b30d9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/catserializer.h
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/objects/nboserializer.h>
+#include <vespa/searchlib/expression/serializer.h>
+
+
+namespace search {
+namespace expression {
+
+class RawResultNode;
+
+class CatSerializer : public vespalib::NBOSerializer, public ResultSerializer
+{
+public:
+ CatSerializer(vespalib::nbostream & stream) : vespalib::NBOSerializer(stream) { }
+ virtual CatSerializer & put(const vespalib::IFieldBase & field, const vespalib::Identifiable & value);
+ virtual CatSerializer & put(const vespalib::IFieldBase & field, const vespalib::stringref & value);
+ virtual ResultSerializer & putResult(const vespalib::IFieldBase & field, const RawResultNode & value);
+ virtual ResultSerializer & putResult(const vespalib::IFieldBase & field, const ResultNodeVector & value);
+ virtual void proxyPut(const ResultNode & value);
+
+ virtual CatSerializer & get(const vespalib::IFieldBase & field, bool & value);
+ virtual CatSerializer & get(const vespalib::IFieldBase & field, uint8_t & value);
+ virtual CatSerializer & get(const vespalib::IFieldBase & field, uint16_t & value);
+ virtual CatSerializer & get(const vespalib::IFieldBase & field, uint32_t & value);
+ virtual CatSerializer & get(const vespalib::IFieldBase & field, uint64_t & value);
+ virtual CatSerializer & get(const vespalib::IFieldBase & field, double & value);
+ virtual CatSerializer & get(const vespalib::IFieldBase & field, float & value);
+ virtual CatSerializer & get(const vespalib::IFieldBase & field, vespalib::string & value);
+
+private:
+ CatSerializer & nop(const vespalib::IFieldBase & field, const void * value) __attribute__((noinline));
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/constantnode.h b/searchlib/src/vespa/searchlib/expression/constantnode.h
new file mode 100644
index 00000000000..b461af01319
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/constantnode.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/expressionnode.h>
+#include <vespa/searchlib/expression/resultnode.h>
+
+namespace search {
+namespace expression {
+
+class ConstantNode : public ExpressionNode
+{
+public:
+ DECLARE_NBO_SERIALIZE;
+ DECLARE_EXPRESSIONNODE(ConstantNode);
+ ConstantNode() : ExpressionNode(), _result() { }
+ ConstantNode(const ResultNode::CP & r) : ExpressionNode(), _result(r) { }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ virtual const ResultNode & getResult() const { return *_result; }
+private:
+ virtual void onPrepare(bool preserveAccurateTypes) { (void) preserveAccurateTypes; }
+ virtual bool onExecute() const { return true; }
+ ResultNode::CP _result;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.cpp b/searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.cpp
new file mode 100644
index 00000000000..73ddc5c2d0a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.cpp
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/debugwaitfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+IMPLEMENT_EXPRESSIONNODE(DebugWaitFunctionNode, UnaryFunctionNode);
+
+DebugWaitFunctionNode::DebugWaitFunctionNode()
+ : _waitTime(0.0),
+ _busyWait(true)
+{ }
+
+DebugWaitFunctionNode::~DebugWaitFunctionNode()
+{
+}
+
+DebugWaitFunctionNode::DebugWaitFunctionNode(const ExpressionNode::CP & arg, double waitTime, bool busyWait)
+ : UnaryFunctionNode(arg),
+ _waitTime(waitTime),
+ _busyWait(busyWait)
+{
+}
+
+bool
+DebugWaitFunctionNode::onExecute() const
+{
+ FastOS_Time time;
+ time.SetNow();
+ double millis = _waitTime * 1000.0;
+
+ while (time.MilliSecsToNow() < millis) {
+ if (_busyWait) {
+ for (int i = 0; i < 1000; i++)
+ ;
+ } else {
+ int rem = (int)(millis - time.MilliSecsToNow());
+ FastOS_Thread::Sleep(rem);
+ }
+ }
+ getArg().execute();
+ updateResult().assign(getArg().getResult());
+ return true;
+}
+
+Serializer &
+DebugWaitFunctionNode::onSerialize(Serializer & os) const
+{
+ UnaryFunctionNode::onSerialize(os);
+ return os << _waitTime << _busyWait;
+}
+
+Deserializer &
+DebugWaitFunctionNode::onDeserialize(Deserializer & is)
+{
+ UnaryFunctionNode::onDeserialize(is);
+ is >> _waitTime >> _busyWait;
+ return is;
+}
+
+void
+DebugWaitFunctionNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ UnaryFunctionNode::visitMembers(visitor);
+ visit(visitor, "waitTime", _waitTime);
+ visit(visitor, "busyWait", _busyWait);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_debugwaitfunctionnode() {}
diff --git a/searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.h b/searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.h
new file mode 100644
index 00000000000..13b171e3135
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.h
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+#include <vespa/searchlib/common/sortspec.h>
+#include <vespa/searchlib/expression/stringresultnode.h>
+#include <vespa/searchlib/expression/resultvector.h>
+
+
+namespace search {
+namespace expression {
+
+class DebugWaitFunctionNode : public UnaryFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(DebugWaitFunctionNode);
+ DECLARE_NBO_SERIALIZE;
+ DebugWaitFunctionNode();
+ ~DebugWaitFunctionNode();
+ DebugWaitFunctionNode(const ExpressionNode::CP & arg, double waitTime, bool busyWait);
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+private:
+ virtual bool onExecute() const;
+ double _waitTime;
+ bool _busyWait;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/dividefunctionnode.h b/searchlib/src/vespa/searchlib/expression/dividefunctionnode.h
new file mode 100644
index 00000000000..8775e71f7a0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/dividefunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/numericfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class DivideFunctionNode : public NumericFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(DivideFunctionNode);
+ DivideFunctionNode() { }
+private:
+ virtual void onArgument(const ResultNode & arg, ResultNode & result) const;
+ virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const;
+ virtual ResultNode::CP getInitialValue() const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/documentaccessornode.h b/searchlib/src/vespa/searchlib/expression/documentaccessornode.h
new file mode 100644
index 00000000000..971d9af792f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/documentaccessornode.h
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/expressionnode.h>
+#include <vespa/document/document.h>
+#include <vespa/vespalib/objects/objectoperation.h>
+#include <vespa/vespalib/objects/objectpredicate.h>
+
+namespace search {
+namespace expression {
+
+class DocumentAccessorNode : public ExpressionNode
+{
+public:
+ DECLARE_ABSTRACT_EXPRESSIONNODE(DocumentAccessorNode);
+ class Configure : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+ {
+ public:
+ Configure(const document::DocumentType & documentType) : _docType(documentType) { }
+ private:
+ virtual void execute(vespalib::Identifiable &obj) { static_cast<DocumentAccessorNode &>(obj).setDocType(_docType); }
+ virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(DocumentAccessorNode::classId); }
+ const document::DocumentType & _docType;
+ };
+
+ void setDoc(const document::Document & doc) { onDoc(doc); }
+ void setDocType(const document::DocumentType & docType) { onDocType(docType); }
+ virtual const vespalib::string & getFieldName() const { return _S_docId; }
+private:
+ virtual void onDoc(const document::Document & doc) = 0;
+ virtual void onDocType(const document::DocumentType & docType) = 0;
+ static const vespalib::string _S_docId;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/documentfieldnode.cpp b/searchlib/src/vespa/searchlib/expression/documentfieldnode.cpp
new file mode 100644
index 00000000000..5c85e110692
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/documentfieldnode.cpp
@@ -0,0 +1,340 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/documentfieldnode.h>
+#include <vespa/searchlib/expression/getdocidnamespacespecificfunctionnode.h>
+#include <vespa/searchlib/expression/getymumchecksumfunctionnode.h>
+#include <vespa/document/fieldvalue/fieldvalues.h>
+#include <stdexcept>
+#include <vespa/vespalib/objects/visit.h>
+
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.documentfieldnode");
+
+namespace search {
+namespace expression {
+
+using namespace vespalib;
+using namespace document;
+
+IMPLEMENT_ABSTRACT_EXPRESSIONNODE(DocumentAccessorNode, ExpressionNode);
+IMPLEMENT_EXPRESSIONNODE(DocumentFieldNode, DocumentAccessorNode);
+IMPLEMENT_EXPRESSIONNODE(GetYMUMChecksumFunctionNode, DocumentAccessorNode);
+IMPLEMENT_EXPRESSIONNODE(GetDocIdNamespaceSpecificFunctionNode, DocumentAccessorNode);
+
+const vespalib::string DocumentAccessorNode::_S_docId("documentid");
+
+DocumentFieldNode::DocumentFieldNode(const DocumentFieldNode & rhs) :
+ DocumentAccessorNode(rhs),
+ _fieldPath(rhs._fieldPath),
+ _value(rhs._value),
+ _fieldName(rhs._fieldName),
+ _doc(NULL)
+{
+}
+
+DocumentFieldNode & DocumentFieldNode::operator = (const DocumentFieldNode & rhs)
+{
+ if (this != &rhs) {
+ DocumentAccessorNode::operator=(rhs);
+ _fieldPath = rhs._fieldPath;
+ _value = rhs._value;
+ _fieldName = rhs._fieldName;
+ _doc = NULL;
+ }
+ return *this;
+}
+
+std::unique_ptr<ResultNode> deduceResultNode(const vespalib::stringref & fieldName, const FieldValue & fv, bool preserveAccurateTypes, bool nestedMultiValue)
+{
+ std::unique_ptr<ResultNode> value;
+ const Identifiable::RuntimeClass & cInfo = fv.getClass();
+ if (cInfo.inherits(ByteFieldValue::classId) || cInfo.inherits(IntFieldValue::classId) || cInfo.inherits(LongFieldValue::classId)) {
+ if (preserveAccurateTypes) {
+ if (cInfo.inherits(ByteFieldValue::classId)) {
+ value.reset(nestedMultiValue ? static_cast<ResultNode *>(new Int8ResultNodeVector()) : static_cast<ResultNode *>(new Int8ResultNode()));
+ } else if (cInfo.inherits(IntFieldValue::classId)) {
+ value.reset(nestedMultiValue ? static_cast<ResultNode *>(new Int32ResultNodeVector()) : static_cast<ResultNode *>(new Int32ResultNode()));
+ } else {
+ value.reset(nestedMultiValue ? static_cast<ResultNode *>(new Int64ResultNodeVector()) : static_cast<ResultNode *>(new Int64ResultNode()));
+ }
+ } else {
+ value.reset(nestedMultiValue ? static_cast<ResultNode *>(new Int64ResultNodeVector()) : static_cast<ResultNode *>(new Int64ResultNode()));
+ }
+ } else if (cInfo.inherits(FloatFieldValue::classId) || cInfo.inherits(DoubleFieldValue::classId)) {
+ value.reset(nestedMultiValue ? static_cast<ResultNode *>(new FloatResultNodeVector()) : static_cast<ResultNode *>(new FloatResultNode()));
+ } else if (cInfo.inherits(StringFieldValue::classId)) {
+ value.reset(nestedMultiValue ? static_cast<ResultNode *>(new StringResultNodeVector()) : static_cast<ResultNode *>(new StringResultNode()));
+ } else if (cInfo.inherits(RawFieldValue::classId)) {
+ value.reset(nestedMultiValue ? static_cast<ResultNode *>(new RawResultNodeVector()) : static_cast<ResultNode *>(new RawResultNode()));
+ } else if (cInfo.inherits(CollectionFieldValue::classId) || cInfo.inherits(MapFieldValue::classId)) {
+ if (cInfo.inherits(CollectionFieldValue::classId)) {
+ value = deduceResultNode(fieldName, *static_cast<const CollectionFieldValue &>(fv).createNested(), preserveAccurateTypes, nestedMultiValue);
+ } else if (cInfo.inherits(MapFieldValue::classId)) {
+ value = deduceResultNode(fieldName, *static_cast<const MapFieldValue &>(fv).createValue(), preserveAccurateTypes, nestedMultiValue);
+ } else {
+ throw std::runtime_error(make_string("Can not deduce correct resultclass for documentfield '%s' in based on class '%s'", fieldName.c_str(), cInfo.name()));
+ }
+ const Identifiable::RuntimeClass & rInfo = value->getClass();
+ if (rInfo.inherits(ResultNodeVector::classId)) {
+ //Already multivalue, so we are good to go.
+ } else if (rInfo.inherits(Int8ResultNode::classId)) {
+ value.reset(new Int8ResultNodeVector());
+ } else if (rInfo.inherits(Int16ResultNode::classId)) {
+ value.reset(new Int16ResultNodeVector());
+ } else if (rInfo.inherits(Int32ResultNode::classId)) {
+ value.reset(new Int32ResultNodeVector());
+ } else if (rInfo.inherits(Int64ResultNode::classId)) {
+ value.reset(new Int64ResultNodeVector());
+ } else if (rInfo.inherits(FloatResultNode::classId)) {
+ value.reset(new FloatResultNodeVector());
+ } else if (rInfo.inherits(StringResultNode::classId)) {
+ value.reset(new StringResultNodeVector());
+ } else if (rInfo.inherits(RawResultNode::classId)) {
+ value.reset(new RawResultNodeVector());
+ } else {
+ throw std::runtime_error(make_string("Can not deduce correct resultclass for documentfield '%s' in based on class '%s'. It nests down to %s which is not expected", fieldName.c_str(), cInfo.name(), rInfo.name()));
+ }
+ } else {
+ throw std::runtime_error(make_string("Can not deduce correct resultclass for documentfield '%s' in based on class '%s'", fieldName.c_str(), cInfo.name()));
+ }
+ return value;
+}
+
+void DocumentFieldNode::onPrepare(bool preserveAccurateTypes)
+{
+ LOG(debug, "DocumentFieldNode::onPrepare(this=%p)", this);
+
+ if ( !_fieldPath.empty() ) {
+ bool nestedMultiValue(false);
+ for(document::FieldPath::const_iterator it(_fieldPath.begin()), mt(_fieldPath.end()); !nestedMultiValue && (it != mt); it++) {
+ if (it->getType() == document::FieldPathEntry::STRUCT_FIELD) {
+ const vespalib::Identifiable::RuntimeClass & cInfo(it->getFieldValueToSet().getClass());
+ nestedMultiValue = cInfo.inherits(CollectionFieldValue::classId) || cInfo.inherits(MapFieldValue::classId);
+ }
+ }
+ const document::FieldPathEntry & endOfPath(_fieldPath.back());
+ if (endOfPath.getFieldValueToSetPtr() != NULL) {
+ const FieldValue& fv = endOfPath.getFieldValueToSet();
+ _value.reset(deduceResultNode(_fieldName, fv, preserveAccurateTypes, nestedMultiValue).release());
+ if (_value->inherits(ResultNodeVector::classId)) {
+ _handler.reset(new MultiHandler(static_cast<ResultNodeVector &>(*_value)));
+ } else {
+ _handler.reset(new SingleHandler(*_value));
+ }
+ } else {
+ if (endOfPath.getDataType().getClass().inherits(document::StructuredDataType::classId)) {
+ throw std::runtime_error(make_string("I am not able to access structured field '%s'", _fieldName.c_str()));
+ } else {
+ throw std::runtime_error(make_string("I am not able to access field '%s' for reasons I do not know", _fieldName.c_str()));
+ }
+ }
+ }
+}
+
+void DocumentFieldNode::onDocType(const DocumentType & docType)
+{
+ LOG(debug, "DocumentFieldNode::onDocType(this=%p)", this);
+ FieldPath::UP path = docType.buildFieldPath(_fieldName);
+ if (!path.get() || path->empty()) {
+ throw std::runtime_error(make_string("Field %s could not be loacated in documenttype %s", _fieldName.c_str(), docType.getName().c_str()));
+ }
+ _fieldPath = *path;
+}
+
+class FieldValue2ResultNode : public ResultNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(FieldValue2ResultNode);
+ FieldValue2ResultNode(const FieldValue * fv=NULL) : _fv(fv) { }
+ virtual int64_t onGetInteger(size_t index) const { (void) index; return _fv ? _fv->getAsLong() : 0; }
+ virtual double onGetFloat(size_t index) const { (void) index; return _fv ? _fv->getAsDouble() : 0; }
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const {
+ (void) index;
+ if (_fv) {
+ std::pair<const char*, size_t> raw = _fv->getAsRaw();
+ return ConstBufferRef(raw.first, raw.second);
+ }
+ return buf;
+ }
+ virtual void min(const ResultNode & b) { (void) b; }
+ virtual void max(const ResultNode & b) { (void) b; }
+ virtual void add(const ResultNode & b) { (void) b; }
+private:
+ virtual void set(const ResultNode&);
+ virtual size_t hash() const { return 0; }
+ const FieldValue * _fv;
+};
+
+char DefaultValue::null = 0;
+
+void DefaultValue::set(const ResultNode&)
+{
+ throw std::runtime_error("DefaultValue::set(const ResultNode&) is not possible.");
+}
+
+void FieldValue2ResultNode::set(const ResultNode&)
+{
+ throw std::runtime_error("FieldValue2ResultNode::set(const ResultNode&) is not possible.");
+}
+
+IMPLEMENT_EXPRESSIONNODE(FieldValue2ResultNode, ResultNode);
+IMPLEMENT_EXPRESSIONNODE(DefaultValue, ResultNode);
+
+void DocumentFieldNode::onDoc(const Document & doc)
+{
+ _doc = & doc;
+ _handler->reset();
+}
+
+bool DocumentFieldNode::onExecute() const
+{
+ _doc->iterateNested(_fieldPath.begin(), _fieldPath.end(), *_handler);
+ return true;
+}
+
+DefaultValue DocumentFieldNode::SingleHandler::_defaultValue;
+
+void
+DocumentFieldNode::SingleHandler::onPrimitive(const Content & c)
+{
+ LOG(spam, "SingleHandler::onPrimitive: field value '%s'", c.getValue().toString().c_str());
+ FieldValue2ResultNode converter(&c.getValue());
+ _result.set(converter);
+}
+
+void
+DocumentFieldNode::MultiHandler::onPrimitive(const Content & c)
+{
+ LOG(spam, "MultiHandler::onPrimitive: field value '%s'", c.getValue().toString().c_str());
+ FieldValue2ResultNode converter(&c.getValue());
+ _result.push_back_safe(converter);
+}
+
+void
+DocumentFieldNode::Handler::onCollectionStart(const Content & c)
+{
+ const document::FieldValue & fv = c.getValue();
+ LOG(spam, "onCollectionStart: field value '%s'", fv.toString().c_str());
+ if (fv.inherits(document::ArrayFieldValue::classId)) {
+ const document::ArrayFieldValue & afv = static_cast<const document::ArrayFieldValue &>(fv);
+ LOG(spam, "onCollectionStart: Array size = '%zu'", afv.size());
+ } else if (fv.inherits(document::WeightedSetFieldValue::classId)) {
+ const document::WeightedSetFieldValue & wsfv = static_cast<const document::WeightedSetFieldValue &>(fv);
+ LOG(spam, "onCollectionStart: WeightedSet size = '%zu'", wsfv.size());
+ }
+}
+
+void
+DocumentFieldNode::Handler::onStructStart(const Content & c)
+{
+ LOG(spam, "onStructStart: field value '%s'", c.getValue().toString().c_str());
+}
+
+
+Serializer & DocumentFieldNode::onSerialize(Serializer & os) const
+{
+ return os << _fieldName << _value;
+}
+
+Deserializer & DocumentFieldNode::onDeserialize(Deserializer & is)
+{
+ return is >> _fieldName >> _value;
+}
+
+void
+DocumentFieldNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "fieldName", _fieldName);
+ visit(visitor, "value", _value);
+ visitor.openStruct("fieldPath", "FieldPath");
+ _fieldPath.visitMembers(visitor);
+ visitor.closeStruct();
+}
+
+class String2ResultNode : public ResultNode
+{
+public:
+ String2ResultNode(const vespalib::string & s) : _s(s) { }
+ virtual int64_t onGetInteger(size_t index) const { (void) index; return strtoul(_s.c_str(), NULL, 0); }
+ virtual double onGetFloat(size_t index) const { (void) index; return strtod(_s.c_str(), NULL); }
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { (void) index; (void) buf; return ConstBufferRef(_s.c_str(), _s.size()); }
+private:
+ virtual String2ResultNode * clone() const { return new String2ResultNode(_s); }
+ virtual void set(const ResultNode&);
+ virtual size_t hash() const { return 0; }
+ const vespalib::string & _s;
+};
+
+void String2ResultNode::set(const ResultNode&)
+{
+ throw std::runtime_error("String2ResultNode::set(const ResultNode&) is not possible.");
+}
+
+void GetDocIdNamespaceSpecificFunctionNode::onDoc(const Document & doc)
+{
+ String2ResultNode converter(doc.getId().getScheme().getNamespaceSpecific());
+ _value->set(converter);
+}
+
+static const FieldBase _G_valueField("value");
+
+Serializer & GetDocIdNamespaceSpecificFunctionNode::onSerialize(Serializer & os) const
+{
+ return os << _value;
+}
+Deserializer & GetDocIdNamespaceSpecificFunctionNode::onDeserialize(Deserializer & is)
+{
+ return is >> _value;
+}
+
+void
+GetDocIdNamespaceSpecificFunctionNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, _G_valueField.getName(), _value);
+}
+
+void GetYMUMChecksumFunctionNode::onDoc(const Document & doc)
+{
+ const vespalib::string & ymumid = doc.getId().getScheme().getNamespaceSpecific();
+
+ try {
+ char decoded[20];
+ int len = Base64::decode(ymumid.c_str(), ymumid.size(), decoded, sizeof(decoded));
+
+ if (len != 20) {
+ LOG(warning, "Illegal YMUMID '%s' in document id %s. Length(%d) != 20", ymumid.c_str(), doc.getId().toString().c_str(), len);
+ _checkSum = 0;
+ } else {
+ int32_t key[3];
+ key[0] = 0;
+ memcpy(((char*)key) + 1, decoded + 9, sizeof(key) - 1);
+ _checkSum = (key[0] ^ key[1] ^ key[2]);
+ }
+ } catch (const std::exception & e) {
+ LOG(warning, "Illegal YMUMID '%s' in document id %s. Reason : %s", ymumid.c_str(), doc.getId().toString().c_str(), e.what());
+ _checkSum = 0;
+ }
+}
+
+Serializer & GetYMUMChecksumFunctionNode::onSerialize(Serializer & os) const
+{
+ return _checkSum.serialize(os);
+}
+
+Deserializer & GetYMUMChecksumFunctionNode::onDeserialize(Deserializer & is)
+{
+ return _checkSum.deserialize(is);
+}
+
+void
+GetYMUMChecksumFunctionNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "checkSum", _checkSum);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_documentfieldnode() {}
diff --git a/searchlib/src/vespa/searchlib/expression/documentfieldnode.h b/searchlib/src/vespa/searchlib/expression/documentfieldnode.h
new file mode 100644
index 00000000000..d40a9fd8836
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/documentfieldnode.h
@@ -0,0 +1,87 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/documentaccessornode.h>
+#include <vespa/searchlib/expression/resultnode.h>
+#include <vespa/searchlib/expression/resultvector.h>
+#include <vespa/document/document.h>
+#include <vespa/vespalib/encoding/base64.h>
+
+namespace search {
+namespace expression {
+
+class DefaultValue : public ResultNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(DefaultValue);
+ virtual int64_t onGetInteger(size_t index) const { (void) index; return 0; }
+ virtual double onGetFloat(size_t index) const { (void) index; return 0; }
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const {
+ (void) index;
+ (void) buf;
+ return ConstBufferRef(&null, 0);
+ }
+ virtual void min(const ResultNode & b) { (void) b; }
+ virtual void max(const ResultNode & b) { (void) b; }
+ virtual void add(const ResultNode & b) { (void) b; }
+private:
+ virtual void set(const ResultNode&);
+ virtual size_t hash() const { return 0; }
+ static char null;
+};
+
+class DocumentFieldNode : public DocumentAccessorNode
+{
+public:
+ DECLARE_NBO_SERIALIZE;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ DECLARE_EXPRESSIONNODE(DocumentFieldNode);
+ DocumentFieldNode() : _fieldPath(), _value(), _fieldName(), _doc(NULL) { }
+ DocumentFieldNode(const vespalib::stringref &name) : _fieldPath(), _value(), _fieldName(name), _doc(NULL) { }
+ DocumentFieldNode(const DocumentFieldNode & rhs);
+ DocumentFieldNode & operator = (const DocumentFieldNode & rhs);
+ virtual const vespalib::string & getFieldName() const { return _fieldName; }
+private:
+ class Handler : public document::FieldValue::IteratorHandler {
+ public:
+ virtual void reset() = 0;
+ protected:
+ typedef document::FieldValue::IteratorHandler::Content Content;
+ private:
+ virtual void onCollectionStart(const Content & c);
+ virtual void onStructStart(const Content & c);
+ };
+ class SingleHandler : public Handler {
+ public:
+ SingleHandler(ResultNode & result) : _result(result) {}
+ private:
+ virtual void reset() { _result.set(_defaultValue); }
+ ResultNode & _result;
+ static DefaultValue _defaultValue;
+ virtual void onPrimitive(const Content & c);
+ };
+ class MultiHandler : public Handler {
+ public:
+ MultiHandler(ResultNodeVector & result) : _result(result) {}
+ private:
+ virtual void reset() { _result.clear(); }
+ ResultNodeVector & _result;
+ virtual void onPrimitive(const Content & c);
+ };
+
+ virtual const ResultNode & getResult() const { return *_value; }
+ virtual void onPrepare(bool preserveAccurateTypes);
+ virtual bool onExecute() const;
+ virtual void onDoc(const document::Document & doc);
+ virtual void onDocType(const document::DocumentType & docType);
+ document::FieldPath _fieldPath;
+ mutable ResultNode::CP _value;
+ mutable std::unique_ptr<Handler> _handler;
+ vespalib::string _fieldName;
+ const document::Document * _doc;
+
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/enumresultnode.h b/searchlib/src/vespa/searchlib/expression/enumresultnode.h
new file mode 100644
index 00000000000..b395a1a7a6f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/enumresultnode.h
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/integerresultnode.h>
+
+namespace search {
+namespace expression {
+
+class EnumResultNode : public IntegerResultNodeT<int64_t>
+{
+private:
+ typedef IntegerResultNodeT<int64_t> Base;
+public:
+ DECLARE_RESULTNODE(EnumResultNode);
+
+ EnumResultNode(int64_t v=0) : Base(v) { }
+ virtual void set(const ResultNode & rhs) { setValue(rhs.getEnum()); }
+
+private:
+ virtual int64_t onGetEnum(size_t index) const { (void) index; return getValue(); }
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const {
+ (void) index;
+ int numWritten(std::min(buf.size(), (size_t)std::max(0, snprintf(buf.str(), buf.size(), "%" PRId64, getValue()))));
+ return ConstBufferRef(buf.str(), numWritten);
+ }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/expressionnode.h b/searchlib/src/vespa/searchlib/expression/expressionnode.h
new file mode 100644
index 00000000000..d5c388f18e5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/expressionnode.h
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/common/identifiable.h>
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace expression {
+
+typedef uint32_t DocId;
+
+class ResultNode;
+
+#define DECLARE_ABSTRACT_EXPRESSIONNODE(Class) DECLARE_IDENTIFIABLE_ABSTRACT_NS2(search, expression, Class)
+#define DECLARE_ABSTRACT_EXPRESSIONNODE_NS1(ns, Class) DECLARE_IDENTIFIABLE_ABSTRACT_NS3(search, expression, ns, Class)
+
+#define DECLARE_EXPRESSIONNODE(Class) \
+ DECLARE_IDENTIFIABLE_NS2(search, expression, Class) \
+ virtual Class * clone() const;
+
+#define DECLARE_EXPRESSIONNODE_NS1(ns, Class) \
+ DECLARE_IDENTIFIABLE_NS3(search, expression, ns, Class) \
+ virtual Class * clone() const;
+
+#define IMPLEMENT_ABSTRACT_EXPRESSIONNODE(Class, base) \
+ IMPLEMENT_IDENTIFIABLE_ABSTRACT_NS2(search, expression, Class, base)
+
+#define IMPLEMENT_EXPRESSIONNODE(Class, base) \
+ IMPLEMENT_IDENTIFIABLE_NS2(search, expression, Class, base) \
+ Class * Class::clone() const { return new Class(*this); }
+
+class ExpressionNode : public vespalib::Identifiable
+{
+public:
+ DECLARE_ABSTRACT_EXPRESSIONNODE(ExpressionNode);
+ typedef std::unique_ptr<ExpressionNode> UP;
+ typedef vespalib::IdentifiablePtr<ExpressionNode> CP;
+ typedef vespalib::IdentifiableLinkedPtr<ExpressionNode> LP;
+ virtual const ResultNode & getResult() const = 0;
+ bool execute() const { return onExecute(); }
+ ExpressionNode & prepare(bool preserveAccurateTypes) { onPrepare(preserveAccurateTypes); return *this; }
+ virtual ExpressionNode * clone() const = 0;
+ void executeIterative(const ResultNode & arg, ResultNode & result) const;
+ virtual void wireAttributes(const search::attribute::IAttributeContext &attrCtx);
+protected:
+private:
+ virtual void onArgument(const ResultNode & arg, ResultNode & result) const;
+ virtual void onPrepare(bool preserveAccurateTypes) = 0;
+ virtual bool onExecute() const = 0;
+};
+
+typedef ExpressionNode::CP * ExpressionNodeArray;
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/expressiontree.cpp b/searchlib/src/vespa/searchlib/expression/expressiontree.cpp
new file mode 100644
index 00000000000..1a01c49ac8c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/expressiontree.cpp
@@ -0,0 +1,202 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/expressiontree.h>
+
+namespace search {
+namespace expression {
+
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+IMPLEMENT_EXPRESSIONNODE(ExpressionTree, ExpressionNode);
+
+void ExpressionTree::Configure::execute(vespalib::Identifiable &obj)
+{
+ ExpressionTree & e(static_cast<ExpressionTree &>(obj));
+ if (e.getRoot().get()) {
+ e.getRoot()->prepare(false);
+ }
+ e.prepare(false);
+}
+
+ExpressionTree::ExpressionTree() :
+ _root(),
+ _attributeNodes(),
+ _documentAccessorNodes(),
+ _relevanceNodes(),
+ _interpolatedLookupNodes(),
+ _arrayAtLookupNodes()
+{
+ prepare(false);
+}
+
+ExpressionTree::ExpressionTree(const ExpressionNode & root) :
+ _root(root.clone()),
+ _attributeNodes(),
+ _documentAccessorNodes(),
+ _relevanceNodes(),
+ _interpolatedLookupNodes(),
+ _arrayAtLookupNodes()
+{
+ prepare(false);
+}
+
+template<typename NODE>
+class Gather : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+{
+ std::vector<NODE *> &_list;
+public:
+ Gather(std::vector<NODE *> &list) : _list(list) { _list.clear(); }
+
+ void from(ExpressionNode::LP &root) {
+ root->select(*this, *this);
+ }
+private:
+ virtual void execute(vespalib::Identifiable &obj) {
+ _list.push_back(&static_cast<NODE &>(obj));
+ }
+ virtual bool check(const vespalib::Identifiable &obj) const {
+ return obj.inherits(NODE::classId);
+ }
+};
+
+template<typename NODE>
+Gather<NODE>
+gather(std::vector<NODE *> &list) {
+ return Gather<NODE>(list);
+}
+
+
+void ExpressionTree::onPrepare(bool preserveAccurateTypes)
+{
+ (void) preserveAccurateTypes;
+ if (_root.get() != NULL) {
+ gather(_attributeNodes).from(_root);
+ gather(_documentAccessorNodes).from(_root);
+ gather(_relevanceNodes).from(_root);
+ gather(_interpolatedLookupNodes).from(_root);
+ gather(_arrayAtLookupNodes).from(_root);
+ }
+}
+
+ExpressionTree::ExpressionTree(const ExpressionNode::CP & root) :
+ _root(root->clone()),
+ _attributeNodes(),
+ _documentAccessorNodes(),
+ _relevanceNodes(),
+ _interpolatedLookupNodes(),
+ _arrayAtLookupNodes()
+{
+ prepare(false);
+}
+
+ExpressionTree::ExpressionTree(const ExpressionTree & rhs) :
+ ExpressionNode(rhs),
+ _root(rhs._root),
+ _attributeNodes(),
+ _documentAccessorNodes(),
+ _relevanceNodes(),
+ _interpolatedLookupNodes()
+{
+ prepare(false);
+}
+
+ExpressionTree & ExpressionTree::operator = (const ExpressionTree & rhs)
+{
+ if (this != & rhs) {
+ ExpressionTree eTree(rhs);
+ swap(eTree);
+ }
+ return *this;
+}
+
+void ExpressionTree::swap(ExpressionTree & e)
+{
+ std::swap(_root, e._root);
+ _attributeNodes.swap(e._attributeNodes);
+ _documentAccessorNodes.swap(e._documentAccessorNodes);
+ _relevanceNodes.swap(e._relevanceNodes);
+ _interpolatedLookupNodes.swap(e._interpolatedLookupNodes);
+}
+
+ExpressionTree::~ExpressionTree()
+{
+}
+
+bool ExpressionTree::execute(const document::Document & doc, HitRank rank) const
+{
+ for(DocumentAccessorNodeList::const_iterator it(_documentAccessorNodes.begin()), mt(_documentAccessorNodes.end()); it != mt; it++) {
+ (*it)->setDoc(doc);
+ }
+ for(RelevanceNodeList::const_iterator it(_relevanceNodes.begin()), mt(_relevanceNodes.end()); it != mt; it++) {
+ (*it)->setRelevance(rank);
+ }
+ return _root->execute();
+}
+
+struct DocIdSetter {
+ DocId _docId;
+ void operator() (InterpolatedLookup *node) {
+ node->setDocId(_docId);
+ }
+ void operator() (ArrayAtLookup *node) {
+ node->setDocId(_docId);
+ }
+ void operator() (AttributeNode *node) {
+ node->setDocId(_docId);
+ }
+ DocIdSetter(DocId docId) : _docId(docId) {}
+};
+
+struct RankSetter {
+ HitRank _rank;
+ void operator() (RelevanceNode *node) {
+ node->setRelevance(_rank);
+ }
+ RankSetter(HitRank rank) : _rank(rank) {}
+};
+
+
+bool ExpressionTree::execute(DocId docId, HitRank rank) const
+{
+ DocIdSetter setDocId(docId);
+ RankSetter setHitRank(rank);
+ std::for_each(_attributeNodes.cbegin(), _attributeNodes.cend(), setDocId);
+ std::for_each(_relevanceNodes.cbegin(), _relevanceNodes.cend(), setHitRank);
+ std::for_each(_interpolatedLookupNodes.cbegin(), _interpolatedLookupNodes.cend(), setDocId);
+ std::for_each(_arrayAtLookupNodes.cbegin(), _arrayAtLookupNodes.cend(), setDocId);
+
+ return _root->execute();
+}
+
+void
+ExpressionTree::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "root", _root.get());
+}
+
+void ExpressionTree::selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation)
+{
+ if (_root.get()) {
+ _root->select(predicate, operation);
+ }
+}
+
+
+Serializer & operator << (Serializer & os, const ExpressionTree & et)
+{
+ return os << et._root;
+}
+
+Deserializer & operator >> (Deserializer & is, ExpressionTree & et)
+{
+ is >> et._root;
+ et.prepare(false);
+ return is;
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_expressiontree() {}
diff --git a/searchlib/src/vespa/searchlib/expression/expressiontree.h b/searchlib/src/vespa/searchlib/expression/expressiontree.h
new file mode 100644
index 00000000000..af5c26c0efb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/expressiontree.h
@@ -0,0 +1,75 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/objects/objectoperation.h>
+#include <vespa/vespalib/objects/objectpredicate.h>
+#include <vespa/searchlib/common/hitrank.h>
+#include <vespa/searchlib/expression/expressionnode.h>
+#include <vespa/searchlib/expression/attributenode.h>
+#include <vespa/searchlib/expression/interpolatedlookupfunctionnode.h>
+#include <vespa/searchlib/expression/arrayatlookupfunctionnode.h>
+#include <vespa/searchlib/expression/relevancenode.h>
+#include <vespa/searchlib/expression/documentfieldnode.h>
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vespa/document/document.h>
+
+namespace search {
+namespace expression {
+
+struct ConfigureStaticParams {
+ ConfigureStaticParams (const search::attribute::IAttributeContext * attrCtx,
+ const document::DocumentType * docType)
+ : _attrCtx(attrCtx), _docType(docType) { }
+ const search::attribute::IAttributeContext * _attrCtx;
+ const document::DocumentType * _docType;
+};
+
+class ExpressionTree : public ExpressionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(ExpressionTree);
+ typedef vespalib::LinkedPtr<ExpressionTree> LP;
+ class Configure : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+ {
+ private:
+ virtual void execute(vespalib::Identifiable &obj);
+ virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(ExpressionTree::classId); }
+ };
+
+ ExpressionTree();
+ ExpressionTree(const ExpressionNode & root);
+ ExpressionTree(const ExpressionNode::CP & root);
+ ExpressionTree(const ExpressionTree & rhs);
+ ~ExpressionTree();
+ ExpressionTree & operator = (const ExpressionTree & rhs);
+ bool execute(DocId docId, HitRank rank) const;
+ bool execute(const document::Document & doc, HitRank rank) const;
+ const ExpressionNode::LP & getRoot() const { return _root; }
+ virtual const ResultNode & getResult() const { return _root->getResult(); }
+ friend vespalib::Serializer & operator << (vespalib::Serializer & os, const ExpressionTree & et);
+ friend vespalib::Deserializer & operator >> (vespalib::Deserializer & is, ExpressionTree & et);
+ void swap(ExpressionTree &);
+private:
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ virtual void selectMembers(const vespalib::ObjectPredicate &predicate, vespalib::ObjectOperation &operation);
+ virtual bool onExecute() const { return _root->execute(); }
+ virtual void onPrepare(bool preserveAccurateTypes);
+
+ typedef std::vector<AttributeNode *> AttributeNodeList;
+ typedef std::vector<DocumentAccessorNode *> DocumentAccessorNodeList;
+ typedef std::vector<RelevanceNode *> RelevanceNodeList;
+ typedef std::vector<InterpolatedLookup *> InterpolatedLookupList;
+ typedef std::vector<ArrayAtLookup *> ArrayAtLookupList;
+
+ ExpressionNode::LP _root;
+ AttributeNodeList _attributeNodes;
+ DocumentAccessorNodeList _documentAccessorNodes;
+ RelevanceNodeList _relevanceNodes;
+ InterpolatedLookupList _interpolatedLookupNodes;
+ ArrayAtLookupList _arrayAtLookupNodes;
+};
+
+
+} // namespace expression
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.cpp b/searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.cpp
new file mode 100644
index 00000000000..e3eed91fe81
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.cpp
@@ -0,0 +1,134 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "fixedwidthbucketfunctionnode.h"
+#include "integerresultnode.h"
+#include "floatresultnode.h"
+#include "integerbucketresultnode.h"
+#include "floatbucketresultnode.h"
+#include <vespa/vespalib/util/stringfmt.h>
+#include <stdexcept>
+#include <math.h>
+#include <limits>
+
+namespace search {
+namespace expression {
+
+IMPLEMENT_EXPRESSIONNODE(FixedWidthBucketFunctionNode, UnaryFunctionNode);
+
+void
+FixedWidthBucketFunctionNode::IntegerBucketHandler::update(ResultNode &result, const ResultNode &value) const
+{
+ IntegerBucketResultNode &bucket = (IntegerBucketResultNode &)result;
+ int64_t n = value.getInteger();
+ int64_t from = n;
+ int64_t to = n;
+ if (width > 0) {
+ if (n >= 0) {
+ from = (n/width) * width;
+ if (from >= (std::numeric_limits<int64_t>::max() - width)) {
+ to = std::numeric_limits<int64_t>::max();
+ } else {
+ to = from + width;
+ }
+ } else {
+ to = ((n+1)/width) * width;
+ if (to <= (std::numeric_limits<int64_t>::min() + width)) {
+ from = std::numeric_limits<int64_t>::min();
+ } else {
+ from = to - width;
+ }
+ }
+ }
+ bucket.setRange(from, to);
+}
+
+void
+FixedWidthBucketFunctionNode::IntegerVectorBucketHandler::update(ResultNode &result, const ResultNode &value) const
+{
+ const IntegerResultNodeVector::Vector & v(static_cast<const IntegerResultNodeVector &>(value).getVector());
+ IntegerBucketResultNodeVector::Vector & r(static_cast<IntegerBucketResultNodeVector &>(result).getVector());
+ r.resize(v.size());
+ for (size_t i(0), m(v.size()); i < m; i++) {
+ IntegerBucketHandler::update(r[i], v[i]);
+ }
+}
+
+void
+FixedWidthBucketFunctionNode::FloatVectorBucketHandler::update(ResultNode &result, const ResultNode &value) const
+{
+ const FloatResultNodeVector::Vector & v(static_cast<const FloatResultNodeVector &>(value).getVector());
+ FloatBucketResultNodeVector::Vector & r(static_cast<FloatBucketResultNodeVector &>(result).getVector());
+ r.resize(v.size());
+ for (size_t i(0), m(v.size()); i < m; i++) {
+ FloatBucketHandler::update(r[i], v[i]);
+ }
+}
+
+void
+FixedWidthBucketFunctionNode::FloatBucketHandler::update(ResultNode &result, const ResultNode &value) const
+{
+ FloatBucketResultNode &bucket = (FloatBucketResultNode &)result;
+ double n = value.getFloat();
+ double from = n;
+ double to = n;
+ if (width > 0.0) {
+ double tmp = floor(n/width);
+ from = tmp * width;
+ to = (tmp+1) * width;
+ }
+ bucket.setRange(from, to);
+}
+
+void
+FixedWidthBucketFunctionNode::onPrepareResult()
+{
+ const ExpressionNode &child = getArg();
+ const ResultNode &input = child.getResult();
+ if (input.getClass().inherits(IntegerResultNode::classId)) {
+ ResultNode::UP res(new IntegerBucketResultNode());
+ setResultType(std::move(res));
+ _bucketHandler.reset(new IntegerBucketHandler(_width->getInteger()));
+ } else if (input.getClass().inherits(FloatResultNode::classId)) {
+ ResultNode::UP res(new FloatBucketResultNode());
+ setResultType(std::move(res));
+ _bucketHandler.reset(new FloatBucketHandler(_width->getFloat()));
+ } else if (input.getClass().inherits(IntegerResultNodeVector::classId)) {
+ ResultNode::UP res(new IntegerBucketResultNodeVector());
+ setResultType(std::move(res));
+ _bucketHandler.reset(new IntegerVectorBucketHandler(_width->getInteger()));
+ } else if (input.getClass().inherits(FloatResultNodeVector::classId)) {
+ ResultNode::UP res(new FloatBucketResultNodeVector());
+ setResultType(std::move(res));
+ _bucketHandler.reset(new FloatVectorBucketHandler(_width->getFloat()));
+ } else {
+ throw std::runtime_error(vespalib::make_string("cannot create appropriate bucket for type '%s'", input.getClass().name()));
+ }
+}
+
+bool
+FixedWidthBucketFunctionNode::onExecute() const
+{
+ getArg().execute();
+ _bucketHandler->update(updateResult(), getArg().getResult());
+ return true;
+}
+
+vespalib::Serializer &
+FixedWidthBucketFunctionNode::onSerialize(vespalib::Serializer &os) const
+{
+ UnaryFunctionNode::onSerialize(os);
+ return os << _width;
+}
+
+vespalib::Deserializer &
+FixedWidthBucketFunctionNode::onDeserialize(vespalib::Deserializer &is)
+{
+ UnaryFunctionNode::onDeserialize(is);
+ return is >> _width;
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_fixedwidthbucketfunctionnode() {}
diff --git a/searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.h b/searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.h
new file mode 100644
index 00000000000..cf7b4561450
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.h
@@ -0,0 +1,71 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+#include <vespa/searchlib/expression/numericresultnode.h>
+#include <vespa/searchlib/expression/integerbucketresultnode.h>
+#include <vespa/searchlib/expression/floatbucketresultnode.h>
+#include <vespa/searchlib/expression/resultvector.h>
+#include <memory>
+
+namespace search {
+namespace expression {
+
+class FixedWidthBucketFunctionNode : public UnaryFunctionNode
+{
+public:
+ // update result bucket based on numeric value
+ struct BucketHandler {
+ typedef vespalib::CloneablePtr<BucketHandler> CP;
+ virtual void update(ResultNode &result, const ResultNode &value) const = 0;
+ virtual BucketHandler *clone() const = 0;
+ virtual ~BucketHandler() {}
+ };
+
+ // update integer result bucket based on integer value
+ struct IntegerBucketHandler : public BucketHandler {
+ int64_t width;
+ IntegerBucketHandler(int64_t w) : width(w) {}
+ virtual void update(ResultNode &result, const ResultNode &value) const;
+ virtual IntegerBucketHandler *clone() const { return new IntegerBucketHandler(*this); }
+ };
+ struct IntegerVectorBucketHandler : public IntegerBucketHandler {
+ IntegerVectorBucketHandler(int64_t w) : IntegerBucketHandler(w) { }
+ virtual void update(ResultNode &result, const ResultNode &value) const;
+ virtual IntegerVectorBucketHandler *clone() const { return new IntegerVectorBucketHandler(*this); }
+ };
+
+ // update float result bucket based on float value
+ struct FloatBucketHandler : public BucketHandler {
+ double width;
+ FloatBucketHandler(double w) : width(w) {}
+ virtual void update(ResultNode &result, const ResultNode &value) const;
+ virtual FloatBucketHandler *clone() const { return new FloatBucketHandler(*this); }
+ };
+
+ struct FloatVectorBucketHandler : public FloatBucketHandler {
+ FloatVectorBucketHandler(double w) : FloatBucketHandler(w) { }
+ virtual void update(ResultNode &result, const ResultNode &value) const;
+ virtual FloatVectorBucketHandler *clone() const { return new FloatVectorBucketHandler(*this); }
+ };
+private:
+ virtual void onPrepareResult();
+ virtual bool onExecute() const;
+
+ NumericResultNode::CP _width;
+ BucketHandler::CP _bucketHandler;
+
+public:
+ DECLARE_EXPRESSIONNODE(FixedWidthBucketFunctionNode);
+ DECLARE_NBO_SERIALIZE;
+ FixedWidthBucketFunctionNode() : UnaryFunctionNode(), _width(), _bucketHandler() {}
+ FixedWidthBucketFunctionNode(const ExpressionNode::CP &arg) : UnaryFunctionNode(arg), _width(), _bucketHandler() {}
+ FixedWidthBucketFunctionNode &setWidth(const NumericResultNode::CP &width) {
+ _width = width;
+ return *this;
+ }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/floatbucketresultnode.cpp b/searchlib/src/vespa/searchlib/expression/floatbucketresultnode.cpp
new file mode 100644
index 00000000000..34bb9f0fec6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/floatbucketresultnode.cpp
@@ -0,0 +1,85 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "floatbucketresultnode.h"
+#include <vespa/vespalib/objects/visit.h>
+#include <math.h>
+
+namespace search {
+namespace expression {
+
+IMPLEMENT_RESULTNODE(FloatBucketResultNode, BucketResultNode);
+
+FloatBucketResultNode FloatBucketResultNode::_nullResult;
+
+size_t
+FloatBucketResultNode::hash() const
+{
+ size_t tmpHash(0);
+ memcpy(&tmpHash, &_from, sizeof(tmpHash));
+ return tmpHash;
+}
+
+int
+FloatBucketResultNode::onCmp(const Identifiable &b) const
+{
+ double f1(_from);
+ double f2(static_cast<const FloatBucketResultNode &>(b)._from);
+
+ if (isnan(f1)) {
+ return isnan(f2) ? 0 : -1;
+ } else {
+ if (f1 < f2) {
+ return -1;
+ } else if (f1 > f2) {
+ return 1;
+ } else {
+ double t1(_to);
+ double t2(static_cast<const FloatBucketResultNode &>(b)._to);
+ if (isnan(t2)) {
+ return 1;
+ } else {
+ if (t1 < t2) {
+ return -1;
+ } else if (t1 > t2) {
+ return 1;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+int FloatBucketResultNode::contains(const FloatBucketResultNode & b) const
+{
+ double diff(_from - b._from);
+ if (diff < 0) {
+ return (_to < b._to) ? -1 : 0;
+ } else {
+ return (_to > b._to) ? 1 : 0;
+ }
+}
+
+void
+FloatBucketResultNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, _fromField.getName(), _from);
+ visit(visitor, _toField.getName(), _to);
+}
+
+vespalib::Serializer &
+FloatBucketResultNode::onSerialize(vespalib::Serializer & os) const
+{
+ return os.put(_fromField, _from).put(_toField, _to);
+}
+
+vespalib::Deserializer &
+FloatBucketResultNode::onDeserialize(vespalib::Deserializer & is)
+{
+ return is.get(_fromField, _from).get(_toField, _to);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_floatbucketresultnode() {}
diff --git a/searchlib/src/vespa/searchlib/expression/floatbucketresultnode.h b/searchlib/src/vespa/searchlib/expression/floatbucketresultnode.h
new file mode 100644
index 00000000000..91a4ea66059
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/floatbucketresultnode.h
@@ -0,0 +1,53 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "bucketresultnode.h"
+
+namespace search {
+namespace expression {
+
+class FloatBucketResultNode : public BucketResultNode
+{
+private:
+ double _from;
+ double _to;
+ static FloatBucketResultNode _nullResult;
+ virtual size_t onGetRawByteSize() const { return sizeof(_from) + sizeof(_to); }
+ virtual void create(void * buf) const { (void) buf; }
+ virtual void destroy(void * buf) const { (void) buf; }
+ virtual void encode(void * buf) const {
+ double * v(static_cast<double *>(buf));
+ v[0] = _from;
+ v[1] = _to;
+ }
+ virtual size_t hash(const void * buf) const { return static_cast<const size_t *>(buf)[0]; }
+ virtual void decode(const void * buf) {
+ const double * v(static_cast<const double *>(buf));
+ _from = v[0];
+ _to = v[1];
+ }
+public:
+ struct GetValue {
+ double operator () (const ResultNode & r) { return r.getFloat(); }
+ };
+
+ DECLARE_EXPRESSIONNODE(FloatBucketResultNode);
+ DECLARE_NBO_SERIALIZE;
+ FloatBucketResultNode() : _from(0.0), _to(0.0) {}
+ FloatBucketResultNode(double from, double to) : _from(from), _to(to) {}
+ virtual size_t hash() const;
+ virtual int onCmp(const Identifiable & b) const;
+ int contains(const FloatBucketResultNode & b) const;
+ int contains(double v) const { return (v < _from) ? 1 : (v >= _to) ? -1 : 0; }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ FloatBucketResultNode &setRange(double from, double to) {
+ _from = from;
+ _to = to;
+ return *this;
+ }
+ static const FloatBucketResultNode & getNull() { return _nullResult; }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/floatresultnode.h b/searchlib/src/vespa/searchlib/expression/floatresultnode.h
new file mode 100644
index 00000000000..4204e4457a9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/floatresultnode.h
@@ -0,0 +1,56 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/numericresultnode.h>
+#include <vespa/vespalib/util/sort.h>
+
+namespace search {
+namespace expression {
+
+class FloatResultNode : public NumericResultNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(FloatResultNode);
+ DECLARE_NBO_SERIALIZE;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ FloatResultNode(double v=0) : _value(v) { }
+ virtual size_t hash() const { size_t tmpHash(0); memcpy(&tmpHash, &_value, sizeof(tmpHash)); return tmpHash; }
+ virtual int onCmp(const Identifiable & b) const;
+ virtual void add(const ResultNode & b);
+ virtual void negate();
+ virtual void multiply(const ResultNode & b);
+ virtual void divide(const ResultNode & b);
+ virtual void modulo(const ResultNode & b);
+ virtual void min(const ResultNode & b);
+ virtual void max(const ResultNode & b);
+ virtual void set(const ResultNode & rhs);
+ double get() const { return _value; }
+ void set(double value) { _value = value; }
+private:
+ virtual int cmpMem(const void * a, const void *b) const {
+ const double & ai(*static_cast<const double *>(a));
+ const double & bi(*static_cast<const double *>(b));
+ return ai < bi ? -1 : ai == bi ? 0 : 1;
+ }
+ virtual void create(void * buf) const { (void) buf; }
+ virtual void destroy(void * buf) const { (void) buf; }
+ virtual void decode(const void * buf) { _value = *static_cast<const double *>(buf); }
+ virtual void encode(void * buf) const { *static_cast<double *>(buf) = _value; }
+ virtual void swap(void * buf) { std::swap(*static_cast<double *>(buf), _value); }
+ virtual size_t hash(const void * buf) const { size_t tmpHash(0); memcpy(&tmpHash, buf, sizeof(tmpHash)); return tmpHash; }
+ virtual uint64_t radixAsc(const void * buf) const { return vespalib::convertForSort<double, true>::convert(*static_cast<const double *>(buf)); }
+ virtual uint64_t radixDesc(const void * buf) const { return vespalib::convertForSort<double, false>::convert(*static_cast<const double *>(buf)); }
+
+ virtual size_t onGetRawByteSize() const { return sizeof(_value); }
+ bool isNan() const;
+ virtual void setMin();
+ virtual void setMax();
+ virtual int64_t onGetInteger(size_t index) const;
+ virtual double onGetFloat(size_t index) const;
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const;
+ double _value;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/forcelink.hpp b/searchlib/src/vespa/searchlib/expression/forcelink.hpp
new file mode 100644
index 00000000000..b31ff9aa091
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/forcelink.hpp
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+void forcelink_file_searchlib_expression_debugwaitfunctionnode();
+void forcelink_file_searchlib_expression_floatbucketresultnode();
+void forcelink_file_searchlib_expression_resultnode();
+void forcelink_file_searchlib_expression_stringbucketresultnode();
+void forcelink_file_searchlib_expression_numericfunctionnode();
+void forcelink_file_searchlib_expression_rangebucketpredef();
+void forcelink_file_searchlib_expression_strcatserializer();
+void forcelink_file_searchlib_expression_zcurve();
+void forcelink_file_searchlib_expression_expressiontree();
+void forcelink_file_searchlib_expression_mathfunctionnode();
+void forcelink_file_searchlib_expression_ucafunctionnode();
+void forcelink_file_searchlib_expression_timestamp();
+void forcelink_file_searchlib_expression_catserializer();
+void forcelink_file_searchlib_expression_documentfieldnode();
+void forcelink_file_searchlib_expression_bucketresultnode();
+void forcelink_file_searchlib_expression_fixedwidthbucketfunctionnode();
+void forcelink_file_searchlib_expression_rawbucketresultnode();
+void forcelink_file_searchlib_expression_attributenode();
+void forcelink_file_searchlib_expression_integerbucketresultnode();
+void forcelink_file_searchlib_expression_perdocexpression();
+void forcelink_file_searchlib_expression_resultvector();
+
+void forcelink_searchlib_expression() {
+ forcelink_file_searchlib_expression_debugwaitfunctionnode();
+ forcelink_file_searchlib_expression_floatbucketresultnode();
+ forcelink_file_searchlib_expression_resultnode();
+ forcelink_file_searchlib_expression_stringbucketresultnode();
+ forcelink_file_searchlib_expression_numericfunctionnode();
+ forcelink_file_searchlib_expression_rangebucketpredef();
+ forcelink_file_searchlib_expression_strcatserializer();
+ forcelink_file_searchlib_expression_zcurve();
+ forcelink_file_searchlib_expression_expressiontree();
+ forcelink_file_searchlib_expression_mathfunctionnode();
+ forcelink_file_searchlib_expression_ucafunctionnode();
+ forcelink_file_searchlib_expression_timestamp();
+ forcelink_file_searchlib_expression_catserializer();
+ forcelink_file_searchlib_expression_documentfieldnode();
+ forcelink_file_searchlib_expression_bucketresultnode();
+ forcelink_file_searchlib_expression_fixedwidthbucketfunctionnode();
+ forcelink_file_searchlib_expression_rawbucketresultnode();
+ forcelink_file_searchlib_expression_attributenode();
+ forcelink_file_searchlib_expression_integerbucketresultnode();
+ forcelink_file_searchlib_expression_perdocexpression();
+ forcelink_file_searchlib_expression_resultvector();
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/functionnode.h b/searchlib/src/vespa/searchlib/expression/functionnode.h
new file mode 100644
index 00000000000..b2486ce9c88
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/functionnode.h
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/expressionnode.h>
+#include <vespa/searchlib/expression/resultnode.h>
+
+namespace search {
+namespace expression {
+
+class FunctionNode : public ExpressionNode
+{
+public:
+ DECLARE_NBO_SERIALIZE;
+ virtual void visitMembers(vespalib::ObjectVisitor & visitor) const;
+ DECLARE_ABSTRACT_EXPRESSIONNODE(FunctionNode);
+ virtual const ResultNode & getResult() const { return *_tmpResult; }
+ ResultNode & updateResult() const { return *_tmpResult; }
+ virtual void reset() { _tmpResult.reset(NULL); }
+
+ FunctionNode &setResult(const ResultNode::CP res) { _tmpResult = res; return *this; }
+protected:
+ void setResultType(ResultNode::UP res) { _tmpResult.reset(res.release()); }
+ virtual void selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation);
+private:
+ mutable ResultNode::CP _tmpResult;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/functionnodes.cpp b/searchlib/src/vespa/searchlib/expression/functionnodes.cpp
new file mode 100644
index 00000000000..fff019767d4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/functionnodes.cpp
@@ -0,0 +1,624 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/integerresultnode.h>
+#include <vespa/searchlib/expression/floatresultnode.h>
+#include <vespa/searchlib/expression/stringresultnode.h>
+#include <vespa/searchlib/expression/rawresultnode.h>
+#include <vespa/searchlib/expression/enumresultnode.h>
+#include <vespa/searchlib/expression/constantnode.h>
+#include <vespa/searchlib/expression/relevancenode.h>
+#include <vespa/searchlib/expression/addfunctionnode.h>
+#include <vespa/searchlib/expression/dividefunctionnode.h>
+#include <vespa/searchlib/expression/multiplyfunctionnode.h>
+#include <vespa/searchlib/expression/modulofunctionnode.h>
+#include <vespa/searchlib/expression/minfunctionnode.h>
+#include <vespa/searchlib/expression/maxfunctionnode.h>
+#include <vespa/searchlib/expression/andfunctionnode.h>
+#include <vespa/searchlib/expression/orfunctionnode.h>
+#include <vespa/searchlib/expression/xorfunctionnode.h>
+#include <vespa/searchlib/expression/negatefunctionnode.h>
+#include <vespa/searchlib/expression/sortfunctionnode.h>
+#include <vespa/searchlib/expression/reversefunctionnode.h>
+#include <vespa/searchlib/expression/strlenfunctionnode.h>
+#include <vespa/searchlib/expression/numelemfunctionnode.h>
+#include <vespa/searchlib/expression/tostringfunctionnode.h>
+#include <vespa/searchlib/expression/torawfunctionnode.h>
+#include <vespa/searchlib/expression/catfunctionnode.h>
+#include <vespa/searchlib/expression/tointfunctionnode.h>
+#include <vespa/searchlib/expression/tofloatfunctionnode.h>
+#include <vespa/searchlib/expression/strcatfunctionnode.h>
+#include <vespa/searchlib/expression/xorbitfunctionnode.h>
+#include <vespa/searchlib/expression/md5bitfunctionnode.h>
+#include <vespa/searchlib/expression/binaryfunctionnode.h>
+#include <vespa/searchlib/expression/nullresultnode.h>
+#include <vespa/searchlib/expression/positiveinfinityresultnode.h>
+#include <vespa/searchlib/expression/resultvector.h>
+#include <vespa/searchlib/expression/catserializer.h>
+#include <vespa/searchlib/expression/strcatserializer.h>
+#include <vespa/searchlib/expression/normalizesubjectfunctionnode.h>
+#include <vespa/searchlib/expression/arrayoperationnode.h>
+
+#include <map>
+#include <vespa/vespalib/util/md5.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.documentexpressions");
+
+namespace search {
+namespace expression {
+
+using vespalib::asciistream;
+using vespalib::nbostream;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+using vespalib::make_string;
+using vespalib::Identifiable;
+using vespalib::BufferRef;
+using vespalib::ConstBufferRef;
+
+IMPLEMENT_ABSTRACT_EXPRESSIONNODE(ExpressionNode, Identifiable);
+IMPLEMENT_ABSTRACT_EXPRESSIONNODE(FunctionNode, ExpressionNode);
+IMPLEMENT_ABSTRACT_EXPRESSIONNODE(MultiArgFunctionNode, FunctionNode);
+IMPLEMENT_ABSTRACT_EXPRESSIONNODE(UnaryFunctionNode, MultiArgFunctionNode);
+IMPLEMENT_ABSTRACT_EXPRESSIONNODE(BinaryFunctionNode, MultiArgFunctionNode);
+IMPLEMENT_ABSTRACT_EXPRESSIONNODE(BitFunctionNode, NumericFunctionNode);
+IMPLEMENT_ABSTRACT_EXPRESSIONNODE(UnaryBitFunctionNode, UnaryFunctionNode);
+
+IMPLEMENT_EXPRESSIONNODE(ConstantNode, ExpressionNode);
+IMPLEMENT_EXPRESSIONNODE(AddFunctionNode, NumericFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(DivideFunctionNode, NumericFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(MultiplyFunctionNode, NumericFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(ModuloFunctionNode, NumericFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(MinFunctionNode, NumericFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(MaxFunctionNode, NumericFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(XorFunctionNode, BitFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(AndFunctionNode, BitFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(OrFunctionNode, BitFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(CatFunctionNode, MultiArgFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(StrCatFunctionNode, MultiArgFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(NegateFunctionNode, UnaryFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(SortFunctionNode, UnaryFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(ReverseFunctionNode, UnaryFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(StrLenFunctionNode, UnaryFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(NormalizeSubjectFunctionNode, UnaryFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(ToIntFunctionNode, UnaryFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(ToFloatFunctionNode, UnaryFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(NumElemFunctionNode, UnaryFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(ToStringFunctionNode, UnaryFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(ToRawFunctionNode, UnaryFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(XorBitFunctionNode, UnaryBitFunctionNode);
+IMPLEMENT_EXPRESSIONNODE(MD5BitFunctionNode, UnaryBitFunctionNode);
+
+void ExpressionNode::onArgument(const ResultNode & arg, ResultNode & result) const
+{
+ (void) arg;
+ (void) result;
+ throw std::runtime_error(make_string("Class %s does not implement onArgument(const ResultNode & arg, ResultNode & result). Probably an indication that it tries to take a multivalued argument, which it can not.", getClass().name()));
+}
+
+void ExpressionNode::executeIterative(const ResultNode & arg, ResultNode & result) const
+{
+ onArgument(arg, result);
+}
+
+void ExpressionNode::wireAttributes(const search::attribute::IAttributeContext &)
+{
+}
+
+
+class ArithmeticTypeConversion
+{
+public:
+ ArithmeticTypeConversion() :
+ _typeConversion()
+ {
+ _typeConversion[IntegerResultNode::classId][IntegerResultNode::classId] = Int64ResultNode::classId;
+ _typeConversion[IntegerResultNode::classId][FloatResultNode::classId] = FloatResultNode::classId;
+ _typeConversion[IntegerResultNode::classId][StringResultNode::classId] = Int64ResultNode::classId;
+ _typeConversion[IntegerResultNode::classId][RawResultNode::classId] = Int64ResultNode::classId;
+ _typeConversion[FloatResultNode::classId][IntegerResultNode::classId] = FloatResultNode::classId;
+ _typeConversion[FloatResultNode::classId][FloatResultNode::classId] = FloatResultNode::classId;
+ _typeConversion[FloatResultNode::classId][StringResultNode::classId] = FloatResultNode::classId;
+ _typeConversion[FloatResultNode::classId][RawResultNode::classId] = FloatResultNode::classId;
+ _typeConversion[StringResultNode::classId][IntegerResultNode::classId] = Int64ResultNode::classId;
+ _typeConversion[StringResultNode::classId][FloatResultNode::classId] = FloatResultNode::classId;
+ _typeConversion[StringResultNode::classId][StringResultNode::classId] = StringResultNode::classId;
+ _typeConversion[StringResultNode::classId][RawResultNode::classId] = StringResultNode::classId;
+ _typeConversion[RawResultNode::classId][IntegerResultNode::classId] = Int64ResultNode::classId;
+ _typeConversion[RawResultNode::classId][FloatResultNode::classId] = FloatResultNode::classId;
+ _typeConversion[RawResultNode::classId][StringResultNode::classId] = StringResultNode::classId;
+ _typeConversion[RawResultNode::classId][RawResultNode::classId] = RawResultNode::classId;
+ }
+ ResultNode::UP getType(const ResultNode & arg1, const ResultNode & arg2);
+ static ResultNode::UP getType(const ResultNode & arg);
+private:
+ static size_t getDimension(const ResultNode & r) {
+ if (r.getClass().inherits(ResultNodeVector::classId)) {
+ return 1 + getDimension(* r.createBaseType());
+ } else {
+ return 0;
+ }
+ }
+ static size_t getBaseType(const ResultNode & r);
+ static size_t getBaseType2(const ResultNode & r);
+ size_t getType(size_t arg1, size_t arg2) const {
+ return _typeConversion.find(arg1)->second.find(arg2)->second;
+ }
+ std::map<size_t, std::map<size_t, size_t> > _typeConversion;
+};
+
+ResultNode::UP ArithmeticTypeConversion::getType(const ResultNode & arg1, const ResultNode & arg2)
+{
+ size_t baseTypeId = getType(getBaseType2(arg1), getBaseType2(arg2));
+ size_t dimension = std::max(getDimension(arg1), getDimension(arg2));
+ ResultNode::UP result;
+ if (dimension == 0) {
+ return ResultNode::UP(static_cast<ResultNode *>(Identifiable::classFromId(baseTypeId)->create()));
+ } else if (dimension == 1) {
+ if (baseTypeId == Int64ResultNode::classId) {
+ result.reset(new IntegerResultNodeVector());
+ } else if (baseTypeId == FloatResultNode::classId) {
+ result.reset(new FloatResultNodeVector());
+ } else {
+ throw std::runtime_error("We can not handle anything but numbers.");
+ }
+ } else {
+ throw std::runtime_error("We are not able to handle multidimensional arrays");
+ }
+ return result;
+}
+
+ResultNode::UP ArithmeticTypeConversion::getType(const ResultNode & arg)
+{
+ size_t baseTypeId = getBaseType(arg);
+ return ResultNode::UP(static_cast<ResultNode *>(Identifiable::classFromId(baseTypeId)->create()));
+}
+
+size_t ArithmeticTypeConversion::getBaseType(const ResultNode & r)
+{
+ if (r.getClass().inherits(ResultNodeVector::classId)) {
+ return getBaseType(* r.createBaseType());
+ } else {
+ return r.getClass().id();
+ }
+}
+
+size_t ArithmeticTypeConversion::getBaseType2(const ResultNode & r)
+{
+ if (r.getClass().inherits(ResultNodeVector::classId)) {
+ return getBaseType2(* r.createBaseType());
+ } else if (r.getClass().inherits(IntegerResultNode::classId)) {
+ return IntegerResultNode::classId;
+ } else {
+ return getBaseType(r);
+ }
+}
+
+namespace {
+ ArithmeticTypeConversion _ArithmeticTypeConversion;
+}
+
+
+void MultiArgFunctionNode::onPrepare(bool preserveAccurateTypes)
+{
+ LOG(debug, "MultiArgFunctionNode::onPrepare(this=%p) Actual class = %s", this, getClass().name());
+ for(size_t i(0), m(_args.size()); i < m; i++) {
+ _args[i]->prepare(preserveAccurateTypes);
+ }
+ prepareResult();
+}
+
+void MultiArgFunctionNode::onPrepareResult()
+{
+ if (_args.size() == 1) {
+ setResultType(ArithmeticTypeConversion::getType(_args[0]->getResult()));
+ } else if (_args.size() > 1) {
+ setResultType(std::unique_ptr<ResultNode>(static_cast<ResultNode *>(_args[0]->getResult().clone())));
+ for(size_t i(1), m(_args.size()); i < m; i++) {
+ if (&_args[i]->getResult() != NULL) {
+ setResultType(_ArithmeticTypeConversion.getType(getResult(), _args[i]->getResult()));
+ }
+ }
+ }
+}
+
+bool MultiArgFunctionNode::onExecute() const
+{
+ for(size_t i(0), m(_args.size()); i < m; i++) {
+ _args[i]->execute();
+ }
+ return calculate(_args, updateResult());
+}
+
+bool MultiArgFunctionNode::onCalculate(const ExpressionNodeVector & args, ResultNode & result) const
+{
+ result.set(args[0]->getResult());
+ for (size_t i(1), m(args.size()); i < m; i++) {
+ executeIterative(args[i]->getResult(), result);
+ }
+ return true;
+}
+
+void BitFunctionNode::onPrepareResult()
+{
+ setResultType(std::unique_ptr<ResultNode>(new Int64ResultNode(0)));
+}
+
+void StrCatFunctionNode::onPrepareResult()
+{
+ setResultType(std::unique_ptr<ResultNode>(new StringResultNode()));
+}
+
+void CatFunctionNode::onPrepareResult()
+{
+ setResultType(std::unique_ptr<ResultNode>(new RawResultNode()));
+}
+
+void CatFunctionNode::onPrepare(bool preserveAccurateTypes)
+{
+ (void) preserveAccurateTypes;
+ MultiArgFunctionNode::onPrepare(true);
+}
+
+void BitFunctionNode::onArgument(const ResultNode & arg, ResultNode & result) const
+{
+ onArgument(arg, static_cast<Int64ResultNode &>(result));
+}
+
+void AddFunctionNode::onArgument(const ResultNode & arg, ResultNode & result) const { static_cast<NumericResultNode &>(result).add(arg); }
+void DivideFunctionNode::onArgument(const ResultNode & arg, ResultNode & result) const { static_cast<NumericResultNode &>(result).divide(arg); }
+void MultiplyFunctionNode::onArgument(const ResultNode & arg, ResultNode & result) const { static_cast<NumericResultNode &>(result).multiply(arg); }
+void ModuloFunctionNode::onArgument(const ResultNode & arg, ResultNode & result) const { static_cast<NumericResultNode &>(result).modulo(arg); }
+void MinFunctionNode::onArgument(const ResultNode & arg, ResultNode & result) const { static_cast<NumericResultNode &>(result).min(arg); }
+void MaxFunctionNode::onArgument(const ResultNode & arg, ResultNode & result) const { static_cast<NumericResultNode &>(result).max(arg); }
+void AndFunctionNode::onArgument(const ResultNode & arg, Int64ResultNode & result) const { result.andOp(arg); }
+void OrFunctionNode::onArgument(const ResultNode & arg, Int64ResultNode & result) const { result.orOp(arg); }
+void XorFunctionNode::onArgument(const ResultNode & arg, Int64ResultNode & result) const { result.xorOp(arg); }
+
+ResultNode::CP MaxFunctionNode::getInitialValue() const
+{
+ ResultNode::CP initial;
+ const ResultNode & arg(getArg(0).getResult());
+ if (arg.inherits(FloatResultNodeVector::classId)) {
+ initial.reset(new FloatResultNode(std::numeric_limits<double>::min()));
+ } else if (arg.inherits(IntegerResultNodeVector::classId)) {
+ initial.reset(new Int64ResultNode(std::numeric_limits<int64_t>::min()));
+ } else {
+ throw std::runtime_error(vespalib::string("Can not choose an initial value for class ") + arg.getClass().name());
+ }
+ return initial;
+}
+
+ResultNode::CP MinFunctionNode::getInitialValue() const
+{
+ ResultNode::CP initial;
+ const ResultNode & arg(getArg(0).getResult());
+ if (arg.inherits(FloatResultNodeVector::classId)) {
+ initial.reset(new FloatResultNode(std::numeric_limits<double>::max()));
+ } else if (arg.inherits(IntegerResultNodeVector::classId)) {
+ initial.reset(new Int64ResultNode(std::numeric_limits<int64_t>::max()));
+ } else {
+ throw std::runtime_error(vespalib::string("Can not choose an initial value for class ") + arg.getClass().name());
+ }
+ return initial;
+}
+
+ResultNode & ModuloFunctionNode::flatten(const ResultNodeVector &, ResultNode &) const
+{
+ throw std::runtime_error("ModuloFunctionNode::flatten() const not implemented since it shall never be used.");
+}
+
+ResultNode & DivideFunctionNode::flatten(const ResultNodeVector &, ResultNode &) const
+{
+ throw std::runtime_error("DivideFunctionNode::flatten() const not implemented since it shall never be used.");
+}
+
+ResultNode::CP ModuloFunctionNode::getInitialValue() const
+{
+ throw std::runtime_error("ModuloFunctionNode::getInitialValue() const not implemented since it shall never be used.");
+}
+
+ResultNode::CP DivideFunctionNode::getInitialValue() const
+{
+ throw std::runtime_error("DivideFunctionNode::getInitialValue() const not implemented since it shall never be used.");
+}
+
+void UnaryBitFunctionNode::onPrepareResult()
+{
+ setResultType(std::unique_ptr<ResultNode>(new RawResultNode()));
+}
+
+void UnaryBitFunctionNode::onPrepare(bool preserveAccurateTypes)
+{
+ (void) preserveAccurateTypes;
+ UnaryFunctionNode::onPrepare(true);
+}
+
+void UnaryFunctionNode::onPrepareResult()
+{
+ setResultType(std::unique_ptr<ResultNode>(getArg().getResult().clone()));
+}
+
+void ToStringFunctionNode::onPrepareResult()
+{
+ setResultType(std::unique_ptr<ResultNode>(new StringResultNode()));
+}
+
+bool ToStringFunctionNode::onExecute() const
+{
+ getArg().execute();
+ updateResult().set(getArg().getResult());
+ return true;
+}
+
+void ToRawFunctionNode::onPrepareResult()
+{
+ setResultType(std::unique_ptr<ResultNode>(new RawResultNode()));
+}
+
+bool ToRawFunctionNode::onExecute() const
+{
+ getArg().execute();
+ updateResult().set(getArg().getResult());
+ return true;
+}
+
+void ToIntFunctionNode::onPrepareResult()
+{
+ setResultType(std::unique_ptr<ResultNode>(new Int64ResultNode()));
+}
+
+bool ToIntFunctionNode::onExecute() const
+{
+ getArg().execute();
+ updateResult().set(getArg().getResult());
+ return true;
+}
+
+void ToFloatFunctionNode::onPrepareResult()
+{
+ setResultType(std::unique_ptr<ResultNode>(new FloatResultNode()));
+}
+
+bool ToFloatFunctionNode::onExecute() const
+{
+ getArg().execute();
+ updateResult().set(getArg().getResult());
+ return true;
+}
+
+void StrLenFunctionNode::onPrepareResult()
+{
+ setResultType(std::unique_ptr<ResultNode>(new Int64ResultNode()));
+}
+
+bool StrLenFunctionNode::onExecute() const
+{
+ getArg().execute();
+ char buf[32];
+ static_cast<Int64ResultNode &> (updateResult()).set(getArg().getResult().getString(BufferRef(buf, sizeof(buf))).size());
+ return true;
+}
+
+void NormalizeSubjectFunctionNode::onPrepareResult()
+{
+ setResultType(std::unique_ptr<ResultNode>(new StringResultNode()));
+}
+
+bool NormalizeSubjectFunctionNode::onExecute() const
+{
+ getArg().execute();
+ char buf[32];
+ ConstBufferRef tmp(getArg().getResult().getString(BufferRef(buf, sizeof(buf))));
+
+ int pos = 0;
+ if (tmp.size() >= 4) {
+ if ((tmp[0] == 'R') && ((tmp[1] | 0x20) == 'e') && (tmp[2] == ':') && (tmp[3] == ' ')) {
+ pos = 4;
+ } else if ((tmp[0] == 'F') && ((tmp[1] | 0x20) == 'w')) {
+ if ((tmp[2] == ':') && (tmp[3] == ' ')) {
+ pos = 4;
+ } else if (((tmp[2] | 0x20) == 'd') && (tmp[3] == ':') && (tmp[4] == ' ')) {
+ pos = 5;
+ }
+ }
+ }
+ static_cast<StringResultNode &> (updateResult()).set(vespalib::stringref(tmp.c_str() + pos, tmp.size() - pos));
+ return true;
+}
+
+void NumElemFunctionNode::onPrepareResult()
+{
+ setResultType(std::unique_ptr<ResultNode>(new Int64ResultNode(1)));
+}
+
+bool NumElemFunctionNode::onExecute() const
+{
+ getArg().execute();
+ if (getArg().getResult().inherits(ResultNodeVector::classId)) {
+ static_cast<Int64ResultNode &> (updateResult()).set(static_cast<const ResultNodeVector &>(getArg().getResult()).size());
+ }
+ return true;
+}
+
+bool NegateFunctionNode::onExecute() const
+{
+ getArg().execute();
+ updateResult().assign(getArg().getResult());
+ updateResult().negate();
+ return true;
+}
+
+bool SortFunctionNode::onExecute() const
+{
+ getArg().execute();
+ updateResult().assign(getArg().getResult());
+ updateResult().sort();
+ return true;
+}
+
+bool ReverseFunctionNode::onExecute() const
+{
+ getArg().execute();
+ updateResult().assign(getArg().getResult());
+ updateResult().reverse();
+ return true;
+}
+
+bool StrCatFunctionNode::onExecute() const
+{
+ asciistream os;
+ StrCatSerializer nos(os);
+ for(size_t i(0), m(getNumArgs()); i < m; i++) {
+ getArg(i).execute();
+ getArg(i).getResult().serialize(nos);
+ }
+ static_cast<StringResultNode &>(updateResult()).set(os.str());
+ return true;
+}
+
+bool CatFunctionNode::onExecute() const
+{
+ nbostream os;
+ CatSerializer nos(os);
+ for(size_t i(0), m(getNumArgs()); i < m; i++) {
+ getArg(i).execute();
+ getArg(i).getResult().serialize(nos);
+ }
+ static_cast<RawResultNode &>(updateResult()).setBuffer(os.c_str(), os.size());
+ return true;
+}
+
+XorBitFunctionNode::XorBitFunctionNode(const ExpressionNode::CP & arg, unsigned numBits) :
+ UnaryBitFunctionNode(arg, numBits),
+ _tmpXor(getNumBytes(), 0)
+{
+}
+
+bool UnaryBitFunctionNode::onExecute() const
+{
+ _tmpOs.clear();
+ getArg().execute();
+ CatSerializer os(_tmpOs);
+ getArg().getResult().serialize(os);
+ return internalExecute(_tmpOs);
+}
+
+void XorBitFunctionNode::onPrepareResult()
+{
+ UnaryBitFunctionNode::onPrepareResult();
+ _tmpXor.resize(getNumBytes());
+}
+
+bool XorBitFunctionNode::internalExecute(const nbostream & os) const
+{
+ const size_t numBytes(_tmpXor.size());
+ memset(&_tmpXor[0], 0, numBytes);
+ const char * s(os.c_str());
+ for (size_t i(0), m(os.size()/numBytes); i < m; i++) {
+ for (size_t j(0), k(numBytes); j < k; j++) {
+ _tmpXor[j] ^= s[j + k*i];
+ }
+ }
+ for (size_t i((os.size()/numBytes)*numBytes); i < os.size(); i++) {
+ _tmpXor[i%numBytes] = os.c_str()[i];
+ }
+ static_cast<RawResultNode &>(updateResult()).setBuffer(&_tmpXor[0], numBytes);
+ return true;
+}
+
+bool MD5BitFunctionNode::internalExecute(const nbostream & os) const
+{
+ const unsigned int MD5_DIGEST_LENGTH = 16;
+ unsigned char md5ScratchPad[MD5_DIGEST_LENGTH];
+ fastc_md5sum(os.c_str(), os.size(), md5ScratchPad);
+ static_cast<RawResultNode &>(updateResult()).setBuffer(md5ScratchPad, std::min(sizeof(md5ScratchPad), getNumBytes()));
+ return true;
+}
+
+Serializer & FunctionNode::onSerialize(Serializer & os) const
+{
+ return os << _tmpResult;
+}
+Deserializer & FunctionNode::onDeserialize(Deserializer & is)
+{
+ return is >> _tmpResult;
+}
+
+void
+ConstantNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "Value", _result);
+}
+
+Serializer & ConstantNode::onSerialize(Serializer & os) const
+{
+ return os << _result;
+}
+Deserializer & ConstantNode::onDeserialize(Deserializer & is)
+{
+ return is >> _result;
+}
+
+
+
+void
+FunctionNode::visitMembers(vespalib::ObjectVisitor & visitor) const
+{
+ visit(visitor, "tmpResult", _tmpResult);
+}
+
+void FunctionNode::selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation)
+{
+ if (_tmpResult.get()) {
+ _tmpResult->select(predicate, operation);
+ }
+}
+
+void MultiArgFunctionNode::selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation)
+{
+ FunctionNode::selectMembers(predicate, operation);
+ for(size_t i(0), m(_args.size()); i < m; i++) {
+ _args[i]->select(predicate, operation);
+ }
+}
+
+Serializer & MultiArgFunctionNode::onSerialize(Serializer & os) const
+{
+ FunctionNode::onSerialize(os);
+ os << _args;
+ return os;
+}
+Deserializer & MultiArgFunctionNode::onDeserialize(Deserializer & is)
+{
+ FunctionNode::onDeserialize(is);
+ return is >> _args;
+}
+
+void
+MultiArgFunctionNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ FunctionNode::visitMembers(visitor);
+ visit(visitor, "args", _args);
+}
+
+Serializer & UnaryBitFunctionNode::onSerialize(Serializer & os) const
+{
+ UnaryFunctionNode::onSerialize(os);
+ return os << _numBits;
+}
+Deserializer & UnaryBitFunctionNode::onDeserialize(Deserializer & is)
+{
+ UnaryFunctionNode::onDeserialize(is);
+ return is >> _numBits;
+}
+
+void
+UnaryBitFunctionNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ UnaryFunctionNode::visitMembers(visitor);
+ visit(visitor, "numBits", _numBits);
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/expression/getdocidnamespacespecificfunctionnode.h b/searchlib/src/vespa/searchlib/expression/getdocidnamespacespecificfunctionnode.h
new file mode 100644
index 00000000000..52114489f1f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/getdocidnamespacespecificfunctionnode.h
@@ -0,0 +1,28 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/documentaccessornode.h>
+
+namespace search {
+namespace expression {
+
+class GetDocIdNamespaceSpecificFunctionNode : public DocumentAccessorNode
+{
+public:
+ DECLARE_NBO_SERIALIZE;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ DECLARE_EXPRESSIONNODE(GetDocIdNamespaceSpecificFunctionNode);
+ GetDocIdNamespaceSpecificFunctionNode() : _value(new StringResultNode("")) { }
+ GetDocIdNamespaceSpecificFunctionNode(ResultNode::UP resultNode) : _value(resultNode.release()) { }
+private:
+ virtual const ResultNode & getResult() const { return *_value; }
+ virtual void onDocType(const document::DocumentType & docType) { (void) docType; }
+ virtual void onDoc(const document::Document & doc);
+ virtual void onPrepare(bool preserveAccurateTypes) { (void) preserveAccurateTypes; }
+ virtual bool onExecute() const { return true; }
+ ResultNode::CP _value;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/getymumchecksumfunctionnode.h b/searchlib/src/vespa/searchlib/expression/getymumchecksumfunctionnode.h
new file mode 100644
index 00000000000..786f115f6a5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/getymumchecksumfunctionnode.h
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/documentaccessornode.h>
+
+namespace search {
+namespace expression {
+
+class GetYMUMChecksumFunctionNode : public DocumentAccessorNode
+{
+public:
+ DECLARE_NBO_SERIALIZE;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ DECLARE_EXPRESSIONNODE(GetYMUMChecksumFunctionNode);
+private:
+ virtual void onPrepare(bool preserveAccurateTypes) { (void) preserveAccurateTypes; }
+ virtual const ResultNode & getResult() const { return _checkSum; }
+ virtual void onDocType(const document::DocumentType & docType) { (void) docType; }
+ virtual void onDoc(const document::Document & doc);
+ virtual bool onExecute() const { return true; }
+ Int64ResultNode _checkSum;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.cpp b/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.cpp
new file mode 100644
index 00000000000..f821026a679
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.cpp
@@ -0,0 +1,73 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "integerbucketresultnode.h"
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace expression {
+
+IMPLEMENT_RESULTNODE(IntegerBucketResultNode, BucketResultNode);
+
+IntegerBucketResultNode IntegerBucketResultNode::_nullResult;
+
+size_t
+IntegerBucketResultNode::hash() const
+{
+ return _from;
+}
+
+int
+IntegerBucketResultNode::onCmp(const Identifiable & b) const
+{
+ int64_t f1(_from);
+ int64_t f2(static_cast<const IntegerBucketResultNode &>(b)._from);
+ if (f1 < f2) {
+ return -1;
+ } else if (f1 > f2) {
+ return 1;
+ } else {
+ int64_t t1(_to);
+ int64_t t2(static_cast<const IntegerBucketResultNode &>(b)._to);
+ if (t1 < t2) {
+ return -1;
+ } else if (t1 > t2) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int IntegerBucketResultNode::contains(const IntegerBucketResultNode & b) const
+{
+ int64_t diff(_from - b._from);
+ if (diff < 0) {
+ return (_to < b._to) ? -1 : 0;
+ } else {
+ return (_to > b._to) ? 1 : 0;
+ }
+}
+
+void
+IntegerBucketResultNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, _fromField.getName(), _from);
+ visit(visitor, _toField.getName(), _to);
+}
+
+vespalib::Serializer &
+IntegerBucketResultNode::onSerialize(vespalib::Serializer & os) const
+{
+ return os.put(_fromField, _from).put(_toField, _to);
+}
+
+vespalib::Deserializer &
+IntegerBucketResultNode::onDeserialize(vespalib::Deserializer & is)
+{
+ return is.get(_fromField, _from).get(_toField, _to);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_integerbucketresultnode() {}
diff --git a/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.h b/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.h
new file mode 100644
index 00000000000..66f49887288
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.h
@@ -0,0 +1,52 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "bucketresultnode.h"
+
+namespace search {
+namespace expression {
+
+class IntegerBucketResultNode : public BucketResultNode
+{
+private:
+ int64_t _from;
+ int64_t _to;
+ static IntegerBucketResultNode _nullResult;
+
+ virtual size_t onGetRawByteSize() const { return sizeof(_from) + sizeof(_to); }
+ virtual void create(void * buf) const { (void) buf; }
+ virtual void destroy(void * buf) const { (void) buf; }
+ virtual void encode(void * buf) const {
+ int64_t * v(static_cast<int64_t *>(buf));
+ v[0] = _from;
+ v[1] = _to;
+ }
+ virtual size_t hash(const void * buf) const { return static_cast<const int64_t *>(buf)[0]; }
+ virtual void decode(const void * buf) {
+ const int64_t * v(static_cast<const int64_t *>(buf));
+ _from = v[0];
+ _to = v[1];
+ }
+#if 0
+#endif
+public:
+ DECLARE_EXPRESSIONNODE(IntegerBucketResultNode);
+ DECLARE_NBO_SERIALIZE;
+ IntegerBucketResultNode() : _from(0), _to(0) {}
+ IntegerBucketResultNode(int64_t from, int64_t to) : _from(from), _to(to) {}
+ virtual size_t hash() const;
+ virtual int onCmp(const Identifiable & b) const;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ int contains(const IntegerBucketResultNode & b) const;
+ int contains(int64_t v) const { return (v < _from) ? 1 : (v >= _to) ? -1 : 0; }
+ IntegerBucketResultNode &setRange(int64_t from, int64_t to) {
+ _from = from;
+ _to = to;
+ return *this;
+ }
+ static const IntegerBucketResultNode & getNull() { return _nullResult; }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/integerresultnode.h b/searchlib/src/vespa/searchlib/expression/integerresultnode.h
new file mode 100644
index 00000000000..3c5a571dc27
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/integerresultnode.h
@@ -0,0 +1,138 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/numericresultnode.h>
+#include <vespa/vespalib/util/sort.h>
+#include <limits>
+
+namespace search {
+namespace expression {
+
+class IntegerResultNode : public NumericResultNode
+{
+public:
+ DECLARE_ABSTRACT_RESULTNODE(IntegerResultNode);
+};
+
+template <typename T>
+class IntegerResultNodeT : public IntegerResultNode
+{
+public:
+ IntegerResultNodeT(int64_t v=0) : _value(v) { }
+ virtual size_t hash() const { return _value; }
+ virtual int onCmp(const Identifiable & b) const {
+ T bv(static_cast<const IntegerResultNodeT &>(b)._value);
+ return (_value < bv) ? -1 : (_value > bv) ? 1 : 0;
+ }
+ virtual void add(const ResultNode & b) { _value += b.getInteger(); }
+ virtual void negate() { _value = - _value; }
+ virtual void multiply(const ResultNode & b) { _value *= b.getInteger(); }
+ virtual void divide(const ResultNode & b) {
+ int64_t val = b.getInteger();
+ _value = (val == 0) ? 0 : (_value / val);
+ }
+ virtual void modulo(const ResultNode & b) {
+ int64_t val = b.getInteger();
+ _value = (val == 0) ? 0 : (_value % val);
+ }
+ virtual void min(const ResultNode & b) { int64_t t(b.getInteger()); if (t < _value) { _value = t; } }
+ virtual void max(const ResultNode & b) { int64_t t(b.getInteger()); if (t > _value) { _value = t; } }
+ virtual void set(const ResultNode & rhs) { _value = rhs.getInteger(); }
+ void andOp(const ResultNode & b) { _value &= b.getInteger(); }
+ void orOp(const ResultNode & b) { _value |= b.getInteger(); }
+ void xorOp(const ResultNode & b) { _value ^= b.getInteger(); }
+ int64_t get() const { return _value; }
+ void set(int64_t value) { _value = value; }
+ IntegerResultNode & operator ++() { _value++; return *this; }
+ IntegerResultNode & operator +=(int64_t v) { _value += v; return *this; }
+protected:
+ void setValue(const T &value) { _value = value; }
+ T getValue() const { return _value; }
+private:
+ virtual int cmpMem(const void * a, const void *b) const {
+ const T & ai(*static_cast<const T *>(a));
+ const T & bi(*static_cast<const T *>(b));
+ return ai < bi ? -1 : ai == bi ? 0 : 1;
+ }
+ virtual void create(void * buf) const { (void) buf; }
+ virtual void destroy(void * buf) const { (void) buf; }
+ virtual void decode(const void * buf) { _value = *static_cast<const T *>(buf); }
+ virtual void encode(void * buf) const { *static_cast<T *>(buf) = _value; }
+ virtual void swap(void * buf) { std::swap(*static_cast<T *>(buf), _value); }
+ virtual size_t hash(const void * buf) const { return *static_cast<const T *>(buf); }
+ virtual uint64_t radixAsc(const void * buf) const { return vespalib::convertForSort<T, true>::convert(*static_cast<const T *>(buf)); }
+ virtual uint64_t radixDesc(const void * buf) const { return vespalib::convertForSort<T, false>::convert(*static_cast<const T *>(buf)); }
+ virtual size_t onGetRawByteSize() const { return sizeof(_value); }
+ virtual void setMin() { _value = std::numeric_limits<T>::min(); }
+ virtual void setMax() { _value = std::numeric_limits<T>::max(); }
+ virtual vespalib::Serializer & onSerialize(vespalib::Serializer & os) const { return os << _value; }
+ virtual vespalib::Deserializer & onDeserialize(vespalib::Deserializer & is) { return is >> _value; }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const { visit(visitor, "value", _value); }
+ virtual int64_t onGetInteger(size_t index) const { (void) index; return _value; }
+ virtual double onGetFloat(size_t index) const { (void) index; return _value; }
+ T _value;
+};
+
+class Int8ResultNode : public IntegerResultNodeT<int8_t>
+{
+private:
+ typedef IntegerResultNodeT<int8_t> Base;
+public:
+ DECLARE_RESULTNODE(Int8ResultNode);
+ Int8ResultNode(int8_t v=0) : Base(v) { }
+private:
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const {
+ (void) index;
+ int numWritten(std::min(buf.size(), (size_t)std::max(0, snprintf(buf.str(), buf.size(), "%d", getValue()))));
+ return ConstBufferRef(buf.str(), numWritten);
+ }
+};
+
+class Int16ResultNode : public IntegerResultNodeT<int16_t>
+{
+private:
+ typedef IntegerResultNodeT<int16_t> Base;
+public:
+ DECLARE_RESULTNODE(Int16ResultNode);
+ Int16ResultNode(int16_t v=0) : Base(v) { }
+private:
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const {
+ (void) index;
+ int numWritten(std::min(buf.size(), (size_t)std::max(0, snprintf(buf.str(), buf.size(), "%d", getValue()))));
+ return ConstBufferRef(buf.str(), numWritten);
+ }
+};
+
+class Int32ResultNode : public IntegerResultNodeT<int32_t>
+{
+private:
+ typedef IntegerResultNodeT<int32_t> Base;
+public:
+ DECLARE_RESULTNODE(Int32ResultNode);
+ Int32ResultNode(int32_t v=0) : Base(v) { }
+private:
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const {
+ (void) index;
+ int numWritten(std::min(buf.size(), (size_t)std::max(0, snprintf(buf.str(), buf.size(), "%d", getValue()))));
+ return ConstBufferRef(buf.str(), numWritten);
+ }
+};
+
+class Int64ResultNode : public IntegerResultNodeT<int64_t>
+{
+private:
+ typedef IntegerResultNodeT<int64_t> Base;
+public:
+ DECLARE_RESULTNODE(Int64ResultNode);
+ Int64ResultNode(int64_t v=0) : Base(v) { }
+private:
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const {
+ (void) index;
+ int numWritten(std::min(buf.size(), (size_t)std::max(0, snprintf(buf.str(), buf.size(), "%" PRId64, getValue()))));
+ return ConstBufferRef(buf.str(), numWritten);
+ }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.cpp b/searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.cpp
new file mode 100644
index 00000000000..bd42ee61eb5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.cpp
@@ -0,0 +1,127 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/interpolatedlookupfunctionnode.h>
+#include <vespa/searchlib/expression/floatresultnode.h>
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vespa/searchlib/common/converters.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+namespace search {
+namespace expression {
+
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+IMPLEMENT_EXPRESSIONNODE(InterpolatedLookup, UnaryFunctionNode);
+
+InterpolatedLookup::InterpolatedLookup()
+ : _attribute(0),
+ _docId(0)
+{
+}
+
+InterpolatedLookup::~InterpolatedLookup()
+{
+}
+
+InterpolatedLookup::InterpolatedLookup(const vespalib::string &attribute,
+ const ExpressionNode::CP &arg)
+ : UnaryFunctionNode(arg),
+ _attributeName(attribute),
+ _attribute(0),
+ _docId(0)
+{
+}
+
+InterpolatedLookup::InterpolatedLookup(const search::attribute::IAttributeVector &attr,
+ const ExpressionNode::CP &lookupArg)
+ : UnaryFunctionNode(lookupArg),
+ _attributeName(attr.getName()),
+ _attribute(&attr),
+ _docId(0)
+{
+}
+
+
+InterpolatedLookup::InterpolatedLookup(const InterpolatedLookup &rhs) :
+ UnaryFunctionNode(rhs),
+ _attributeName(rhs._attributeName),
+ _attribute(rhs._attribute),
+ _docId(rhs._docId)
+{
+ // why?
+ _docId = 0;
+}
+
+InterpolatedLookup & InterpolatedLookup::operator= (const InterpolatedLookup &rhs)
+{
+ if (this != &rhs) {
+ UnaryFunctionNode::operator =(rhs);
+ _attributeName = rhs._attributeName;
+ _attribute = rhs._attribute;
+ // _docId = rhs._docId;
+ _docId = 0;
+ }
+ return *this;
+}
+
+void InterpolatedLookup::onPrepareResult()
+{
+ setResultType(std::unique_ptr<ResultNode>(new FloatResultNode()));
+}
+
+static double
+simpleInterpolate(size_t sz, std::vector<double> v, double lookup)
+{
+ if (sz == 0 || lookup < v[0])
+ return 0;
+ for (size_t i = 1; i < sz; ++i) {
+ if (lookup < v[i]) {
+ double total = v[i] - v[i-1];
+ double above = lookup - v[i-1];
+ double result = i - 1;
+ result += (above / total);
+ return result;
+ }
+ }
+ return sz - 1;
+}
+
+bool InterpolatedLookup::onExecute() const
+{
+ getArg().execute();
+ double lookup = getArg().getResult().getFloat();
+ // get attribute data
+ size_t numValues = _attribute->getValueCount(_docId);
+ std::vector<double> valueVector;
+ valueVector.resize(numValues);
+ _attribute->get(_docId, &valueVector[0], numValues);
+ double result = simpleInterpolate(numValues, valueVector, lookup);
+ static_cast<FloatResultNode &>(updateResult()).set(result);
+ return true;
+}
+
+void InterpolatedLookup::wireAttributes(const search::attribute::IAttributeContext & attrCtx)
+{
+ _attribute = attrCtx.getAttribute(_attributeName);
+ if (_attribute == NULL) {
+ throw std::runtime_error(vespalib::make_string("Failed locating attribute vector '%s'", _attributeName.c_str()));
+ }
+}
+
+Serializer & InterpolatedLookup::onSerialize(Serializer & os) const
+{
+ UnaryFunctionNode::onSerialize(os);
+ os << _attributeName;
+ return os;
+}
+
+Deserializer & InterpolatedLookup::onDeserialize(Deserializer & is)
+{
+ UnaryFunctionNode::onDeserialize(is);
+ is >> _attributeName;
+ return is;
+}
+
+} // namespace expression
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.h b/searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.h
new file mode 100644
index 00000000000..d81acb929f1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.h
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class InterpolatedLookup : public UnaryFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(InterpolatedLookup);
+ DECLARE_NBO_SERIALIZE;
+
+ InterpolatedLookup();
+ ~InterpolatedLookup();
+
+ InterpolatedLookup(const vespalib::string &attribute,
+ const ExpressionNode::CP & arg);
+
+ InterpolatedLookup(const search::attribute::IAttributeVector &attr,
+ const ExpressionNode::CP &lookupArg);
+
+ InterpolatedLookup(const InterpolatedLookup &rhs);
+
+ InterpolatedLookup & operator= (const InterpolatedLookup &rhs);
+
+ void setDocId(DocId docId) { _docId = docId; }
+private:
+ virtual bool onExecute() const;
+ virtual void onPrepareResult();
+ virtual void wireAttributes(const search::attribute::IAttributeContext &attrCtx);
+ vespalib::string _attributeName;
+ const search::attribute::IAttributeVector * _attribute;
+ DocId _docId;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/mathfunctionnode.cpp b/searchlib/src/vespa/searchlib/expression/mathfunctionnode.cpp
new file mode 100644
index 00000000000..561081129cc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/mathfunctionnode.cpp
@@ -0,0 +1,70 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/mathfunctionnode.h>
+#include <vespa/searchlib/expression/floatresultnode.h>
+#include <math.h>
+
+namespace search {
+namespace expression {
+
+using namespace vespalib;
+
+IMPLEMENT_EXPRESSIONNODE(MathFunctionNode, MultiArgFunctionNode);
+
+Serializer & MathFunctionNode::onSerialize(Serializer & os) const
+{
+ MultiArgFunctionNode::onSerialize(os);
+ uint8_t code(_function);
+ return os << code;
+}
+
+Deserializer & MathFunctionNode::onDeserialize(Deserializer & is)
+{
+ MultiArgFunctionNode::onDeserialize(is);
+ uint8_t code(0);
+ is >> code;
+ _function = (Function)code;
+ return is;
+}
+
+void MathFunctionNode::onPrepareResult()
+{
+ setResultType(std::unique_ptr<ResultNode>(new FloatResultNode()));
+}
+
+bool MathFunctionNode::onExecute() const
+{
+ getArg(0).execute();
+ double result(0.0);
+ switch (_function) {
+ case EXP: result = exp(getArg(0).getResult().getFloat()); break;
+ case POW: getArg(1).execute(); result = pow(getArg(0).getResult().getFloat(), getArg(1).getResult().getFloat()); break;
+ case LOG: result = log(getArg(0).getResult().getFloat()); break;
+ case LOG1P: result = log1p(getArg(0).getResult().getFloat()); break;
+ case LOG10: result = log10(getArg(0).getResult().getFloat()); break;
+ case SIN: result = sin(getArg(0).getResult().getFloat()); break;
+ case ASIN: result = asin(getArg(0).getResult().getFloat()); break;
+ case COS: result = cos(getArg(0).getResult().getFloat()); break;
+ case ACOS: result = acos(getArg(0).getResult().getFloat()); break;
+ case TAN: result = tan(getArg(0).getResult().getFloat()); break;
+ case ATAN: result = atan(getArg(0).getResult().getFloat()); break;
+ case SQRT: result = sqrt(getArg(0).getResult().getFloat()); break;
+ case SINH: result = sinh(getArg(0).getResult().getFloat()); break;
+ case ASINH: result = asinh(getArg(0).getResult().getFloat()); break;
+ case COSH: result = cosh(getArg(0).getResult().getFloat()); break;
+ case ACOSH: result = acosh(getArg(0).getResult().getFloat()); break;
+ case TANH: result = tanh(getArg(0).getResult().getFloat()); break;
+ case ATANH: result = atanh(getArg(0).getResult().getFloat()); break;
+ case CBRT: result = cbrt(getArg(0).getResult().getFloat()); break;
+ case HYPOT: getArg(1).execute(); result = hypot(getArg(0).getResult().getFloat(), getArg(1).getResult().getFloat()); break;
+ case FLOOR: result = floor(getArg(0).getResult().getFloat()); break;
+ }
+ static_cast<FloatResultNode &>(updateResult()).set(result);
+ return true;
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_mathfunctionnode() {}
diff --git a/searchlib/src/vespa/searchlib/expression/mathfunctionnode.h b/searchlib/src/vespa/searchlib/expression/mathfunctionnode.h
new file mode 100644
index 00000000000..3546a100b05
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/mathfunctionnode.h
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class MathFunctionNode : public MultiArgFunctionNode
+{
+public:
+ typedef enum {EXP=0, POW=1, LOG=2, LOG1P=3, LOG10=4, SIN=5, ASIN=6, COS=7, ACOS=8, TAN=9, ATAN=10, SQRT=11, SINH=12,
+ ASINH=13, COSH=14, ACOSH=15, TANH=16, ATANH=17, CBRT=18, HYPOT=19, FLOOR=20 } Function;
+ DECLARE_EXPRESSIONNODE(MathFunctionNode);
+ DECLARE_NBO_SERIALIZE;
+
+ MathFunctionNode() { }
+private:
+ virtual bool onExecute() const;
+ virtual void onPrepareResult();
+ Function _function;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/maxfunctionnode.h b/searchlib/src/vespa/searchlib/expression/maxfunctionnode.h
new file mode 100644
index 00000000000..515788611a3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/maxfunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/numericfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class MaxFunctionNode : public NumericFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(MaxFunctionNode);
+ MaxFunctionNode() { }
+private:
+ virtual void onArgument(const ResultNode & arg, ResultNode & result) const;
+ virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const { return v.flattenMax(result); }
+ virtual ResultNode::CP getInitialValue() const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/md5bitfunctionnode.h b/searchlib/src/vespa/searchlib/expression/md5bitfunctionnode.h
new file mode 100644
index 00000000000..038a26f2cac
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/md5bitfunctionnode.h
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unarybitfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class MD5BitFunctionNode : public UnaryBitFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(MD5BitFunctionNode);
+ MD5BitFunctionNode() { }
+ MD5BitFunctionNode(const ExpressionNode::CP & arg, unsigned numBits) : UnaryBitFunctionNode(arg, numBits) { }
+private:
+ virtual bool internalExecute(const vespalib::nbostream & os) const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/minfunctionnode.h b/searchlib/src/vespa/searchlib/expression/minfunctionnode.h
new file mode 100644
index 00000000000..1ce835fbc24
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/minfunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/numericfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class MinFunctionNode : public NumericFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(MinFunctionNode);
+ MinFunctionNode() { }
+private:
+ virtual void onArgument(const ResultNode & arg, ResultNode & result) const;
+ virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const { return v.flattenMin(result); }
+ virtual ResultNode::CP getInitialValue() const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/modulofunctionnode.h b/searchlib/src/vespa/searchlib/expression/modulofunctionnode.h
new file mode 100644
index 00000000000..c6d46feea73
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/modulofunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/numericfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class ModuloFunctionNode : public NumericFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(ModuloFunctionNode);
+ ModuloFunctionNode() { }
+private:
+ virtual void onArgument(const ResultNode & arg, ResultNode & result) const;
+ virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const;
+ virtual ResultNode::CP getInitialValue() const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/multiargfunctionnode.h b/searchlib/src/vespa/searchlib/expression/multiargfunctionnode.h
new file mode 100644
index 00000000000..349d448d753
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/multiargfunctionnode.h
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/functionnode.h>
+
+namespace search {
+namespace expression {
+
+class MultiArgFunctionNode : public FunctionNode
+{
+public:
+ typedef std::vector<ExpressionNode::CP> ExpressionNodeVector;
+ DECLARE_NBO_SERIALIZE;
+ virtual void visitMembers(vespalib::ObjectVisitor & visitor) const;
+ DECLARE_ABSTRACT_EXPRESSIONNODE(MultiArgFunctionNode);
+ MultiArgFunctionNode() : FunctionNode() { }
+ MultiArgFunctionNode & appendArg(const ExpressionNode::CP & arg) { return addArg(arg); }
+ MultiArgFunctionNode &addArg(const ExpressionNode::CP & arg) {
+ _args.push_back(arg);
+ return *this;
+ }
+ virtual void reset() { _args.clear(); FunctionNode::reset(); }
+ ExpressionNodeVector & expressionNodeVector() { return _args; }
+protected:
+ virtual bool onCalculate(const ExpressionNodeVector & args, ResultNode & result) const;
+ virtual bool onExecute() const;
+ virtual void onPrepare(bool preserveAccurateTypes);
+ size_t getNumArgs() const { return _args.size(); }
+ const ExpressionNode & getArg(size_t n) const { return *_args[n]; }
+ ExpressionNode & getArg(size_t n) { return *_args[n]; }
+private:
+ virtual void selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation);
+ bool calculate(const ExpressionNodeVector & args, ResultNode & result) const { return onCalculate(args, result); }
+ void prepareResult() { onPrepareResult(); }
+ virtual void onPrepareResult();
+ ExpressionNodeVector _args;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/multiplyfunctionnode.h b/searchlib/src/vespa/searchlib/expression/multiplyfunctionnode.h
new file mode 100644
index 00000000000..d6386b24ab0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/multiplyfunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/numericfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class MultiplyFunctionNode : public NumericFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(MultiplyFunctionNode);
+ MultiplyFunctionNode() { }
+private:
+ virtual void onArgument(const ResultNode & arg, ResultNode & result) const;
+ virtual ResultNode::CP getInitialValue() const { return ResultNode::CP(new Int64ResultNode(1)); }
+ virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const { return v.flattenMultiply(result); }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/negatefunctionnode.h b/searchlib/src/vespa/searchlib/expression/negatefunctionnode.h
new file mode 100644
index 00000000000..564317d4fa4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/negatefunctionnode.h
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class NegateFunctionNode : public UnaryFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(NegateFunctionNode);
+ NegateFunctionNode() { }
+ NegateFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { }
+private:
+ virtual bool onExecute() const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/normalizesubjectfunctionnode.h b/searchlib/src/vespa/searchlib/expression/normalizesubjectfunctionnode.h
new file mode 100644
index 00000000000..3e64946e7fb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/normalizesubjectfunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class NormalizeSubjectFunctionNode : public UnaryFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(NormalizeSubjectFunctionNode);
+ NormalizeSubjectFunctionNode() { }
+ NormalizeSubjectFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { }
+private:
+ virtual bool onExecute() const;
+ virtual void onPrepareResult();
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/nullresultnode.h b/searchlib/src/vespa/searchlib/expression/nullresultnode.h
new file mode 100644
index 00000000000..d5bf6d727a4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/nullresultnode.h
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/singleresultnode.h>
+
+namespace search {
+namespace expression {
+
+class NullResultNode : public SingleResultNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(NullResultNode);
+ virtual size_t hash() const;
+ virtual int onCmp(const Identifiable & b) const;
+ virtual void set(const ResultNode & rhs);
+ virtual void min(const ResultNode & b);
+ virtual void max(const ResultNode & b);
+ virtual void add(const ResultNode & b);
+private:
+ virtual void setMin();
+ virtual void setMax();
+ virtual int64_t onGetInteger(size_t index) const;
+ virtual double onGetFloat(size_t index) const;
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const;
+ virtual size_t onGetRawByteSize() const { return 0; }
+ virtual void create(void * buf) const { (void) buf; }
+ virtual void destroy(void * buf) const { (void) buf;}
+
+ virtual void decode(const void * buf) { (void) buf; }
+ virtual void encode(void * buf) const { (void) buf; }
+ virtual void swap(void * buf) { (void) buf; }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/numelemfunctionnode.h b/searchlib/src/vespa/searchlib/expression/numelemfunctionnode.h
new file mode 100644
index 00000000000..00ca2fcf75a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/numelemfunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class NumElemFunctionNode : public UnaryFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(NumElemFunctionNode);
+ NumElemFunctionNode() { }
+ NumElemFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { }
+private:
+ virtual bool onExecute() const;
+ virtual void onPrepareResult();
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/numericfunctionnode.cpp b/searchlib/src/vespa/searchlib/expression/numericfunctionnode.cpp
new file mode 100644
index 00000000000..5b3e34c564f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/numericfunctionnode.cpp
@@ -0,0 +1,156 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/numericfunctionnode.h>
+#include <stdexcept>
+
+namespace search {
+namespace expression {
+
+IMPLEMENT_ABSTRACT_EXPRESSIONNODE(NumericFunctionNode, MultiArgFunctionNode);
+
+NumericFunctionNode::NumericFunctionNode(const NumericFunctionNode & rhs) :
+ MultiArgFunctionNode(rhs),
+ _handler()
+{
+}
+
+NumericFunctionNode & NumericFunctionNode::operator = (const NumericFunctionNode & rhs)
+{
+ if (this != &rhs) {
+ MultiArgFunctionNode::operator =(rhs);
+ _handler.reset();
+ }
+ return *this;
+}
+
+void NumericFunctionNode::onPrepare(bool preserveAccurateTypes)
+{
+ MultiArgFunctionNode::onPrepare(preserveAccurateTypes);
+ if (getNumArgs() == 1) {
+ if (getArg(0).getResult().getClass().inherits(IntegerResultNodeVector::classId)) {
+ _handler.reset(new FlattenIntegerHandler(*this));
+ } else if (getArg(0).getResult().getClass().inherits(FloatResultNodeVector::classId)) {
+ _handler.reset(new FlattenFloatHandler(*this));
+ } else if (getArg(0).getResult().getClass().inherits(StringResultNodeVector::classId)) {
+ _handler.reset(new FlattenStringHandler(*this));
+ } else {
+ throw std::runtime_error(vespalib::string("No FlattenHandler for ") + getArg(0).getResult().getClass().name());
+ }
+ } else {
+ if (getResult().getClass().inherits(IntegerResultNodeVector::classId)) {
+ _handler.reset(new VectorIntegerHandler(*this));
+ } else if (getResult().getClass().inherits(FloatResultNodeVector::classId)) {
+ _handler.reset(new VectorFloatHandler(*this));
+ } else if (getResult().getClass().inherits(StringResultNodeVector::classId)) {
+ _handler.reset(new VectorStringHandler(*this));
+ } else if (getResult().getClass().inherits(IntegerResultNode::classId)) {
+ _handler.reset(new ScalarIntegerHandler(*this));
+ } else if (getResult().getClass().inherits(FloatResultNode::classId)) {
+ _handler.reset(new ScalarFloatHandler(*this));
+ } else if (getResult().getClass().inherits(StringResultNode::classId)) {
+ _handler.reset(new ScalarStringHandler(*this));
+ } else if (getResult().getClass().inherits(RawResultNode::classId)) {
+ _handler.reset(new ScalarRawHandler(*this));
+ } else {
+ throw std::runtime_error(vespalib::make_string("NumericFunctionNode::onPrepare does not handle results of type %s", getResult().getClass().name()));
+ }
+ }
+}
+
+bool NumericFunctionNode::onCalculate(const ExpressionNodeVector & args, ResultNode & result) const
+{
+ bool retval(true);
+ (void) result;
+ _handler->handleFirst(args[0]->getResult());
+ for (size_t i(1), m(args.size()); i < m; i++) {
+ _handler->handle(args[i]->getResult());
+ }
+ return retval;
+}
+
+template <typename T>
+void NumericFunctionNode::VectorHandler<T>::handle(const ResultNode & arg)
+{
+ typename T::Vector & result = _result.getVector();
+ if (arg.getClass().inherits(ResultNodeVector::classId)) {
+ const ResultNodeVector & av = static_cast<const ResultNodeVector &> (arg);
+ const size_t argSize(av.size());
+ const size_t oldRSize(result.size());
+ if (argSize > oldRSize) {
+ result.resize(argSize);
+ for (size_t i(oldRSize); i < argSize; i++) {
+ result[i] = result[i%oldRSize];
+ }
+ }
+ for (size_t i(0), m(result.size()), isize(argSize); i < m; i++) {
+ function().executeIterative(av.get(i%isize), result[i]);
+ }
+ } else {
+ for (size_t i(0), m(result.size()); i < m; i++) {
+ function().executeIterative(arg, result[i]);
+ }
+ }
+}
+
+template <typename T>
+void NumericFunctionNode::VectorHandler<T>::handleFirst(const ResultNode & arg)
+{
+ typename T::Vector & result = _result.getVector();
+ if (arg.getClass().inherits(ResultNodeVector::classId)) {
+ const ResultNodeVector & av = static_cast<const ResultNodeVector &> (arg);
+ result.resize(av.size());
+ for (size_t i(0), m(result.size()); i < m; i++) {
+ result[i].set(av.get(i));
+ }
+ } else {
+ result.resize(1);
+ result[0].set(arg);
+ }
+}
+
+
+void NumericFunctionNode::ScalarIntegerHandler::handle(const ResultNode & arg)
+{
+ function().executeIterative(arg, _result);
+}
+
+void NumericFunctionNode::ScalarFloatHandler::handle(const ResultNode & arg)
+{
+ function().executeIterative(arg, _result);
+}
+
+void NumericFunctionNode::ScalarStringHandler::handle(const ResultNode & arg)
+{
+ function().executeIterative(arg, _result);
+}
+
+void NumericFunctionNode::ScalarRawHandler::handle(const ResultNode & arg)
+{
+ function().executeIterative(arg, _result);
+}
+
+void NumericFunctionNode::FlattenIntegerHandler::handle(const ResultNode & arg)
+{
+ _result.set(_initial);
+ function().flatten(static_cast<const ResultNodeVector &> (arg), _result);
+}
+
+void NumericFunctionNode::FlattenFloatHandler::handle(const ResultNode & arg)
+{
+ _result.set(_initial);
+ function().flatten(static_cast<const ResultNodeVector &> (arg), _result);
+}
+
+void NumericFunctionNode::FlattenStringHandler::handle(const ResultNode & arg)
+{
+ _result.set(_initial);
+ function().flatten(static_cast<const ResultNodeVector &> (arg), _result);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_numericfunctionnode() {}
diff --git a/searchlib/src/vespa/searchlib/expression/numericfunctionnode.h b/searchlib/src/vespa/searchlib/expression/numericfunctionnode.h
new file mode 100644
index 00000000000..393b89d1049
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/numericfunctionnode.h
@@ -0,0 +1,178 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/multiargfunctionnode.h>
+#include <vespa/searchlib/expression/resultvector.h>
+#include <memory>
+
+namespace search {
+namespace expression {
+
+class NumericFunctionNode : public MultiArgFunctionNode
+{
+public:
+ DECLARE_ABSTRACT_EXPRESSIONNODE(NumericFunctionNode);
+ NumericFunctionNode() : _handler() { }
+ NumericFunctionNode(const NumericFunctionNode & rhs);
+ NumericFunctionNode & operator = (const NumericFunctionNode & rhs);
+ virtual void reset() { _handler.reset(); MultiArgFunctionNode::reset(); }
+protected:
+ virtual void onPrepare(bool preserveAccurateTypes);
+
+ class Handler
+ {
+ public:
+ Handler(const NumericFunctionNode & func) : _function(func) { }
+ virtual ~Handler() { }
+ virtual void handle(const ResultNode & arg) = 0;
+ virtual void handleFirst(const ResultNode & arg) = 0;
+ protected:
+ const NumericFunctionNode & function() const { return _function; }
+ private:
+ const NumericFunctionNode & _function;
+ };
+
+ template <typename T>
+ class VectorHandler : public Handler
+ {
+ protected:
+ VectorHandler(const NumericFunctionNode & func) :
+ Handler(func),
+ _result(static_cast<T &>(func.updateResult()))
+ { }
+ virtual void handle(const ResultNode & arg);
+ virtual void handleFirst(const ResultNode & arg);
+ private:
+ T & _result;
+ };
+
+ class VectorIntegerHandler : public VectorHandler<IntegerResultNodeVector>
+ {
+ private:
+ typedef VectorHandler<IntegerResultNodeVector> BaseHandler;
+ public:
+ VectorIntegerHandler(const NumericFunctionNode & func) : BaseHandler(func) { }
+ };
+ class VectorFloatHandler : public VectorHandler<FloatResultNodeVector>
+ {
+ private:
+ typedef VectorHandler<FloatResultNodeVector> BaseHandler;
+ public:
+ VectorFloatHandler(const NumericFunctionNode & func) : BaseHandler(func) { }
+ };
+ class VectorStringHandler : public VectorHandler<StringResultNodeVector>
+ {
+ private:
+ typedef VectorHandler<StringResultNodeVector> BaseHandler;
+ public:
+ VectorStringHandler(const NumericFunctionNode & func) : BaseHandler(func) { }
+ };
+private:
+ virtual ResultNode::CP getInitialValue() const = 0;
+ virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const = 0;
+ class ScalarIntegerHandler : public Handler
+ {
+ public:
+ ScalarIntegerHandler(const NumericFunctionNode & func) :
+ Handler(func),
+ _result(static_cast<Int64ResultNode &>(func.updateResult()))
+ { }
+ virtual void handle(const ResultNode & arg);
+ virtual void handleFirst(const ResultNode & arg) { _result.set(arg.getInteger()); }
+ protected:
+ Int64ResultNode & _result;
+ };
+ class ScalarFloatHandler : public Handler
+ {
+ public:
+ ScalarFloatHandler(const NumericFunctionNode & func) :
+ Handler(func),
+ _result(static_cast<FloatResultNode &>(func.updateResult()))
+ { }
+ virtual void handle(const ResultNode & arg);
+ virtual void handleFirst(const ResultNode & arg) { _result.set(arg.getFloat()); }
+ protected:
+ FloatResultNode & _result;
+ };
+ class ScalarStringHandler : public Handler
+ {
+ public:
+ ScalarStringHandler(const NumericFunctionNode & func) :
+ Handler(func),
+ _result(static_cast<StringResultNode &>(func.updateResult()))
+ { }
+ virtual void handle(const ResultNode & arg);
+ virtual void handleFirst(const ResultNode & arg) {
+ char buf[32];
+ vespalib::ConstBufferRef b = arg.getString(vespalib::BufferRef(buf, sizeof(buf)));
+ _result.set(vespalib::stringref(b.c_str(), b.size()));
+ }
+ protected:
+ StringResultNode & _result;
+ };
+ class ScalarRawHandler : public Handler
+ {
+ public:
+ ScalarRawHandler(const NumericFunctionNode & func) :
+ Handler(func),
+ _result(static_cast<RawResultNode &>(func.updateResult()))
+ { }
+ virtual void handle(const ResultNode & arg);
+ virtual void handleFirst(const ResultNode & arg) {
+ char buf[32];
+ vespalib::ConstBufferRef b = arg.getString(vespalib::BufferRef(buf, sizeof(buf)));
+ _result.setBuffer(b.data(), b.size());
+ }
+ protected:
+ RawResultNode & _result;
+ };
+ class FlattenIntegerHandler : public ScalarIntegerHandler
+ {
+ public:
+ FlattenIntegerHandler(const NumericFunctionNode & func) :
+ ScalarIntegerHandler(func),
+ _initial()
+ {
+ _initial.set(*func.getInitialValue());
+ }
+ virtual void handle(const ResultNode & arg);
+ virtual void handleFirst(const ResultNode & arg) { handle(arg); }
+ private:
+ Int64ResultNode _initial;
+ };
+ class FlattenFloatHandler : public ScalarFloatHandler
+ {
+ public:
+ FlattenFloatHandler(const NumericFunctionNode & func) :
+ ScalarFloatHandler(func),
+ _initial()
+ {
+ _initial.set(*func.getInitialValue());
+ }
+ virtual void handle(const ResultNode & arg);
+ virtual void handleFirst(const ResultNode & arg) { handle(arg); }
+ private:
+ FloatResultNode _initial;
+ };
+ class FlattenStringHandler : public ScalarStringHandler
+ {
+ public:
+ FlattenStringHandler(const NumericFunctionNode & func) :
+ ScalarStringHandler(func),
+ _initial()
+ {
+ _initial.set(*func.getInitialValue());
+ }
+ virtual void handle(const ResultNode & arg);
+ virtual void handleFirst(const ResultNode & arg) { handle(arg); }
+ private:
+ StringResultNode _initial;
+ };
+
+ virtual bool onCalculate(const ExpressionNodeVector & args, ResultNode & result) const;
+ std::unique_ptr<Handler> _handler;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/numericresultnode.h b/searchlib/src/vespa/searchlib/expression/numericresultnode.h
new file mode 100644
index 00000000000..c7b16b58335
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/numericresultnode.h
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/singleresultnode.h>
+
+namespace search {
+namespace expression {
+
+class NumericResultNode : public SingleResultNode
+{
+public:
+ DECLARE_ABSTRACT_EXPRESSIONNODE(NumericResultNode);
+ typedef vespalib::IdentifiablePtr<NumericResultNode> CP;
+ typedef std::unique_ptr<NumericResultNode> UP;
+ virtual NumericResultNode *clone() const = 0;
+ virtual void multiply(const ResultNode & b) = 0;
+ virtual void divide(const ResultNode & b) = 0;
+ virtual void modulo(const ResultNode & b) = 0;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/orfunctionnode.h b/searchlib/src/vespa/searchlib/expression/orfunctionnode.h
new file mode 100644
index 00000000000..3b374ffdc54
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/orfunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/bitfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class OrFunctionNode : public BitFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(OrFunctionNode);
+ OrFunctionNode() { }
+private:
+ virtual ResultNode::CP getInitialValue() const { return ResultNode::CP(new Int64ResultNode(0)); }
+ virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const { return v.flattenOr(result); }
+ virtual void onArgument(const ResultNode & arg, Int64ResultNode & result) const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/perdocexpression.cpp b/searchlib/src/vespa/searchlib/expression/perdocexpression.cpp
new file mode 100644
index 00000000000..202ce85f556
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/perdocexpression.cpp
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/floatresultnode.h>
+#include <vespa/searchlib/expression/relevancenode.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.documentexpressions");
+
+namespace search {
+namespace expression {
+
+using namespace vespalib;
+
+IMPLEMENT_EXPRESSIONNODE(RelevanceNode, ExpressionNode);
+
+void
+RelevanceNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "Value", _relevance);
+}
+
+Serializer & RelevanceNode::onSerialize(Serializer & os) const
+{
+ return _relevance.serialize(os);
+}
+
+Deserializer & RelevanceNode::onDeserialize(Deserializer & is)
+{
+ return _relevance.deserialize(is);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_perdocexpression() {}
diff --git a/searchlib/src/vespa/searchlib/expression/positiveinfinityresultnode.h b/searchlib/src/vespa/searchlib/expression/positiveinfinityresultnode.h
new file mode 100644
index 00000000000..94b8a7cef5f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/positiveinfinityresultnode.h
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/singleresultnode.h>
+
+namespace search {
+namespace expression {
+
+class PositiveInfinityResultNode : public SingleResultNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(PositiveInfinityResultNode);
+ virtual size_t hash() const;
+ virtual int onCmp(const Identifiable & b) const;
+ virtual void set(const ResultNode & rhs);
+ virtual void min(const ResultNode & b);
+ virtual void max(const ResultNode & b);
+ virtual void add(const ResultNode & b);
+private:
+ virtual void setMin();
+ virtual void setMax();
+ virtual int64_t onGetInteger(size_t index) const;
+ virtual double onGetFloat(size_t index) const;
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const;
+ virtual size_t onGetRawByteSize() const { return 0; }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/rangebucketpredef.cpp b/searchlib/src/vespa/searchlib/expression/rangebucketpredef.cpp
new file mode 100644
index 00000000000..87b70cb5757
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/rangebucketpredef.cpp
@@ -0,0 +1,133 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "rangebucketpredef.h"
+#include "integerresultnode.h"
+#include "floatresultnode.h"
+#include "integerbucketresultnode.h"
+#include "floatbucketresultnode.h"
+#include <vespa/vespalib/util/stringfmt.h>
+#include <stdexcept>
+#include <math.h>
+#include <limits>
+
+namespace search {
+namespace expression {
+
+IMPLEMENT_EXPRESSIONNODE(RangeBucketPreDefFunctionNode, UnaryFunctionNode);
+
+RangeBucketPreDefFunctionNode::RangeBucketPreDefFunctionNode(const RangeBucketPreDefFunctionNode & rhs) :
+ UnaryFunctionNode(rhs),
+ _predef(rhs._predef),
+ _result(NULL),
+ _nullResult(rhs._nullResult),
+ _handler()
+{
+}
+
+RangeBucketPreDefFunctionNode & RangeBucketPreDefFunctionNode::operator = (const RangeBucketPreDefFunctionNode & rhs)
+{
+ if (this != & rhs) {
+ UnaryFunctionNode::operator = (rhs);
+ _predef = rhs._predef;
+ _result = NULL;
+ _nullResult = rhs._nullResult;
+ _handler.reset();
+ }
+ return *this;
+}
+
+void
+RangeBucketPreDefFunctionNode::onPrepareResult()
+{
+ const vespalib::Identifiable::RuntimeClass & cInfo(getArg().getResult().getClass());
+ if (cInfo.inherits(ResultNodeVector::classId)) {
+ if (cInfo.inherits(IntegerResultNodeVector::classId)) {
+ _nullResult = & IntegerBucketResultNode::getNull();
+ } else if (cInfo.inherits(FloatResultNodeVector::classId)) {
+ _nullResult = & FloatBucketResultNode::getNull();
+ } else if (cInfo.inherits(StringResultNodeVector::classId)) {
+ _nullResult = & StringBucketResultNode::getNull();
+ } else if (cInfo.inherits(RawResultNodeVector::classId)) {
+ _nullResult = & RawBucketResultNode::getNull();
+ } else {
+ throw std::runtime_error(vespalib::make_string("cannot create appropriate bucket for type '%s'", cInfo.name()));
+ }
+ setResultType(ResultNode::UP(_predef->clone()));
+ static_cast<ResultNodeVector &>(updateResult()).clear();
+ _handler.reset(new MultiValueHandler(*this));
+ _result = & updateResult();
+ } else {
+ if (cInfo.inherits(IntegerResultNode::classId)) {
+ _nullResult = & IntegerBucketResultNode::getNull();
+ } else if (cInfo.inherits(FloatResultNode::classId)) {
+ _nullResult = & FloatBucketResultNode::getNull();
+ } else if (cInfo.inherits(StringResultNode::classId)) {
+ _nullResult = & StringBucketResultNode::getNull();
+ } else if (cInfo.inherits(RawResultNode::classId)) {
+ _nullResult = & RawBucketResultNode::getNull();
+ } else {
+ throw std::runtime_error(vespalib::make_string("cannot create appropriate bucket for type '%s'", cInfo.name()));
+ }
+ _result = _nullResult;
+ if ( ! _predef->empty()) {
+ _result = & _predef->get(0);
+ }
+ _handler.reset(new SingleValueHandler(*this));
+ }
+}
+
+bool
+RangeBucketPreDefFunctionNode::onExecute() const
+{
+ getArg().execute();
+ const ResultNode * result = _handler->handle(getArg().getResult());
+ _result = result ? result : _nullResult;
+ return true;
+}
+
+const ResultNode * RangeBucketPreDefFunctionNode::SingleValueHandler::handle(const ResultNode & arg)
+{
+ return _predef.find(arg);
+}
+
+const ResultNode * RangeBucketPreDefFunctionNode::MultiValueHandler::handle(const ResultNode & arg)
+{
+ const ResultNodeVector & v = static_cast<const ResultNodeVector &>(arg);
+ _result.clear();
+ for(size_t i(0), m(v.size()); i < m; i++) {
+ const ResultNode * bucket = _predef.find(v.get(i));
+ if (bucket != NULL) {
+ _result.push_back(*bucket);
+ } else {
+ _result.push_back(*_nullResult);
+ }
+ }
+ return &_result;
+}
+
+vespalib::Serializer &
+RangeBucketPreDefFunctionNode::onSerialize(vespalib::Serializer &os) const
+{
+ UnaryFunctionNode::onSerialize(os);
+ return os << _predef;
+}
+
+vespalib::Deserializer &
+RangeBucketPreDefFunctionNode::onDeserialize(vespalib::Deserializer &is)
+{
+ UnaryFunctionNode::onDeserialize(is);
+ return is >> _predef;
+}
+
+void
+RangeBucketPreDefFunctionNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ UnaryFunctionNode::visitMembers(visitor);
+ visit(visitor, "predefined", _predef);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_rangebucketpredef() {}
diff --git a/searchlib/src/vespa/searchlib/expression/rangebucketpredef.h b/searchlib/src/vespa/searchlib/expression/rangebucketpredef.h
new file mode 100644
index 00000000000..de9c5e69879
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/rangebucketpredef.h
@@ -0,0 +1,75 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "unaryfunctionnode.h"
+#include "resultvector.h"
+#include "integerresultnode.h"
+#include "floatresultnode.h"
+#include "stringresultnode.h"
+
+namespace search {
+namespace expression {
+
+class RangeBucketPreDefFunctionNode : public UnaryFunctionNode
+{
+private:
+ virtual void onPrepareResult();
+ virtual bool onExecute() const;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+
+ class Handler {
+ public:
+ Handler(const RangeBucketPreDefFunctionNode & rangeNode) : _predef(rangeNode.getBucketList()), _nullResult(rangeNode._nullResult) { }
+ virtual ~Handler() { }
+ virtual const ResultNode * handle(const ResultNode & arg) = 0;
+ protected:
+ const ResultNodeVector & _predef;
+ const ResultNode * _nullResult;
+ };
+ class SingleValueHandler : public Handler {
+ public:
+ SingleValueHandler(const RangeBucketPreDefFunctionNode & rangeNode) :
+ Handler(rangeNode)
+ { }
+ virtual const ResultNode * handle(const ResultNode & arg);
+ };
+ class MultiValueHandler : public Handler {
+ public:
+ MultiValueHandler(const RangeBucketPreDefFunctionNode & rangeNode) :
+ Handler(rangeNode),
+ _result(static_cast<ResultNodeVector &>(rangeNode.updateResult()))
+ { }
+ virtual const ResultNode * handle(const ResultNode & arg);
+ private:
+ ResultNodeVector & _result;
+ };
+
+
+ ResultNodeVector::CP _predef;
+ mutable const ResultNode * _result;
+ const ResultNode * _nullResult;
+ std::unique_ptr<Handler> _handler;
+ static IntegerBucketResultNode _nullIntegerResult;
+ static FloatBucketResultNode _nullFloatResult;
+ static StringBucketResultNode _nullStringResult;
+ static RawBucketResultNode _nullRawResult;
+
+public:
+ DECLARE_EXPRESSIONNODE(RangeBucketPreDefFunctionNode);
+ DECLARE_NBO_SERIALIZE;
+ RangeBucketPreDefFunctionNode() : UnaryFunctionNode(), _predef(), _result(NULL), _nullResult(NULL) {}
+ RangeBucketPreDefFunctionNode(const ExpressionNode::CP &arg) : UnaryFunctionNode(arg), _predef(), _result(NULL), _nullResult(NULL) {}
+ RangeBucketPreDefFunctionNode(const RangeBucketPreDefFunctionNode & rhs);
+ RangeBucketPreDefFunctionNode & operator = (const RangeBucketPreDefFunctionNode & rhs);
+ virtual const ResultNode & getResult() const { return *_result; }
+ const ResultNodeVector & getBucketList() const { return *_predef; }
+ ResultNodeVector & getBucketList() { return *_predef; }
+ RangeBucketPreDefFunctionNode & setBucketList(const ResultNodeVector & predef) {
+ _predef.reset(static_cast<ResultNodeVector *>(predef.clone()));
+ return *this;
+ }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/rawbucketresultnode.cpp b/searchlib/src/vespa/searchlib/expression/rawbucketresultnode.cpp
new file mode 100644
index 00000000000..df0d7384e35
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/rawbucketresultnode.cpp
@@ -0,0 +1,93 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "rawbucketresultnode.h"
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace expression {
+
+IMPLEMENT_RESULTNODE(RawBucketResultNode, BucketResultNode);
+
+RawBucketResultNode RawBucketResultNode::_nullResult;
+
+size_t
+RawBucketResultNode::hash() const
+{
+#if 0
+ union {
+ uint8_t cxor[8];
+ uint64_t ixor;
+ } xorResult;
+ xorResult.ixor = 0;
+ size_t i(0);
+ const size_t m(_from.size());
+ const char * c = _from.c_str();
+ const uint64_t * ic = reinterpret_cast<const uint64_t *>(c);
+ for (; i+8 < m; i+=8) {
+ const size_t index(i/8);
+ xorResult.ixor ^= ic[index];
+ }
+ for (; i < m; i++) {
+ xorResult.cxor[i%8] ^= c[i];
+ }
+ return xorResult.ixor;
+#else
+ return 0;
+#endif
+}
+
+int
+RawBucketResultNode::onCmp(const Identifiable & rhs) const
+{
+ const RawBucketResultNode & b = static_cast<const RawBucketResultNode &>(rhs);
+ int diff(_from->cmp(*b._from));
+ return (diff == 0) ? _to->cmp(*b._to) : diff;
+}
+
+int RawBucketResultNode::contains(const RawBucketResultNode & b) const
+{
+ int fromDiff(_from->cmp(*b._from));
+ int toDiff(_to->cmp(*b._to));
+ return (fromDiff < 0) ? std::min(0, toDiff) : std::max(0, toDiff);
+}
+
+int RawBucketResultNode::contains(const ConstBufferRef & s) const
+{
+ RawResultNode v(s.data(), s.size());
+ int diff(_from->cmp(v));
+ if (diff > 0) {
+ return 1;
+ } else {
+ diff = _to->cmp(v);
+ return (diff <= 0) ? -1 : 0;
+ }
+}
+
+void
+RawBucketResultNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, _fromField.getName(), _from);
+ visit(visitor, _toField.getName(), _to);
+}
+
+vespalib::Serializer &
+RawBucketResultNode::onSerialize(vespalib::Serializer & os) const
+{
+ _from.serialize(os);
+ _to.serialize(os);
+ return os;
+}
+
+vespalib::Deserializer &
+RawBucketResultNode::onDeserialize(vespalib::Deserializer & is)
+{
+ _from.deserialize(is);
+ _to.deserialize(is);
+ return is;
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_rawbucketresultnode() {}
diff --git a/searchlib/src/vespa/searchlib/expression/rawbucketresultnode.h b/searchlib/src/vespa/searchlib/expression/rawbucketresultnode.h
new file mode 100644
index 00000000000..abd78e64d97
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/rawbucketresultnode.h
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "bucketresultnode.h"
+#include "rawresultnode.h"
+
+namespace search {
+namespace expression {
+
+class RawBucketResultNode : public BucketResultNode
+{
+private:
+ ResultNode::CP _from;
+ ResultNode::CP _to;
+ static RawBucketResultNode _nullResult;
+ virtual size_t onGetRawByteSize() const { return sizeof(_from) + sizeof(_to); }
+public:
+ struct GetValue {
+ BufferRef _tmp;
+ ConstBufferRef operator () (const ResultNode & r) { return r.getString(_tmp); }
+ };
+
+ DECLARE_EXPRESSIONNODE(RawBucketResultNode);
+ DECLARE_NBO_SERIALIZE;
+ RawBucketResultNode() : _from(new RawResultNode()), _to(new RawResultNode()) {}
+ RawBucketResultNode(ResultNode::UP from, ResultNode::UP to) : _from(from.release()), _to(to.release()) {}
+ virtual size_t hash() const;
+ virtual int onCmp(const Identifiable & b) const;
+ int contains(const RawBucketResultNode & b) const;
+ int contains(const ConstBufferRef & v) const;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ static const RawBucketResultNode & getNull() { return _nullResult; }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/rawresultnode.h b/searchlib/src/vespa/searchlib/expression/rawresultnode.h
new file mode 100644
index 00000000000..fd6b41b549c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/rawresultnode.h
@@ -0,0 +1,53 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/singleresultnode.h>
+
+namespace search {
+namespace expression {
+
+class RawResultNode : public SingleResultNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(RawResultNode);
+ DECLARE_NBO_SERIALIZE;
+ DECLARE_RESULTNODE_SERIALIZE;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ RawResultNode() : _value(1) { setBuffer("", 0); }
+ RawResultNode(const void * buf, size_t sz) { setBuffer(buf, sz); }
+ virtual int onCmp(const Identifiable & b) const;
+ virtual size_t hash() const;
+ virtual void set(const ResultNode & rhs);
+ void setBuffer(const void * buf, size_t sz);
+ ConstBufferRef get() const { return ConstBufferRef(&_value[0], _value.size()); }
+ virtual void min(const ResultNode & b);
+ virtual void max(const ResultNode & b);
+ virtual void add(const ResultNode & b);
+ virtual void negate();
+private:
+ typedef std::vector<uint8_t> V;
+ virtual int cmpMem(const void * a, const void *b) const {
+ const V & ai(*static_cast<const V *>(a));
+ const V & bi(*static_cast<const V *>(b));
+ int result = memcmp(&ai[0], &bi[0], std::min(ai.size(), bi.size()));
+ if (result == 0) {
+ result = ai.size() < bi.size() ? -1 : ai.size() > bi.size() ? 1 : 0;
+ }
+ return result;
+ }
+ virtual void decode(const void * buf) { _value = *static_cast<const V *>(buf); }
+ virtual void encode(void * buf) const { *static_cast<V *>(buf) = _value; }
+ virtual size_t hash(const void * buf) const;
+
+ virtual size_t onGetRawByteSize() const { return sizeof(_value); }
+ virtual void setMin();
+ virtual void setMax();
+ virtual int64_t onGetInteger(size_t index) const;
+ virtual double onGetFloat(size_t index) const;
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const;
+ V _value;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/relevancenode.h b/searchlib/src/vespa/searchlib/expression/relevancenode.h
new file mode 100644
index 00000000000..6f867329961
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/relevancenode.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/expressionnode.h>
+#include <vespa/searchlib/expression/resultnode.h>
+
+namespace search {
+namespace expression {
+
+class RelevanceNode : public ExpressionNode
+{
+public:
+ DECLARE_NBO_SERIALIZE;
+ DECLARE_EXPRESSIONNODE(RelevanceNode);
+ RelevanceNode() : ExpressionNode(), _relevance() { }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ virtual const ResultNode & getResult() const { return _relevance; }
+ void setRelevance(double relevance) { _relevance.set(relevance); }
+private:
+ virtual void onPrepare(bool preserveAccurateTypes) { (void) preserveAccurateTypes; }
+ virtual bool onExecute() const { return true; }
+ FloatResultNode _relevance;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/resultnode.cpp b/searchlib/src/vespa/searchlib/expression/resultnode.cpp
new file mode 100644
index 00000000000..387b05e7add
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/resultnode.cpp
@@ -0,0 +1,73 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/resultnode.h>
+#include <stdexcept>
+
+namespace search {
+namespace expression {
+
+uint64_t ResultNode::radixAsc(const void * buf) const
+{
+ (void) buf;
+ throw std::runtime_error("ResultNode::radixAsc(const void * buf) must be overloaded by'" + vespalib::string(getClass().name()) + "'.");
+}
+
+uint64_t ResultNode::radixDesc(const void * buf) const
+{
+ (void) buf;
+ throw std::runtime_error("ResultNode::radixDesc(const void * buf) must be overloaded by'" + vespalib::string(getClass().name()) + "'.");
+}
+
+size_t ResultNode::hash(const void * buf) const
+{
+ (void) buf;
+ throw std::runtime_error("ResultNode::hash(const void * buf) must be overloaded by'" + vespalib::string(getClass().name()) + "'.");
+}
+
+void ResultNode::decode(const void * buf)
+{
+ (void) buf;
+ throw std::runtime_error("ResultNode::decode(const void * buf) must be overloaded by'" + vespalib::string(getClass().name()) + "'.");
+}
+
+void ResultNode::encode(void * buf) const
+{
+ (void) buf;
+ throw std::runtime_error("ResultNode::encode(void * buf) const must be overloaded by'" + vespalib::string(getClass().name()) + "'.");
+}
+
+void ResultNode::swap(void * buf)
+{
+ (void) buf;
+ throw std::runtime_error("ResultNode::swap(void * buf) must be overloaded by'" + vespalib::string(getClass().name()) + "'.");
+}
+
+void ResultNode::create(void * buf) const
+{
+ (void) buf;
+ throw std::runtime_error("ResultNode::create(void * buf) const must be overloaded by'" + vespalib::string(getClass().name()) + "'.");
+}
+
+void ResultNode::destroy(void * buf) const
+{
+ (void) buf;
+ throw std::runtime_error("ResultNode::destroy(void * buf) const must be overloaded by'" + vespalib::string(getClass().name()) + "'.");
+}
+
+int ResultNode::cmpMem(const void * a, const void *b) const
+{
+ (void) a;
+ (void) b;
+ throw std::runtime_error("ResultNode::cmpMem(const void * a, const void *b) const must be overloaded by'" + vespalib::string(getClass().name()) + "'.");
+}
+
+size_t ResultNode::getRawByteSize() const
+{
+ throw std::runtime_error("ResultNode::getRawByteSize() const must be overloaded by '" + vespalib::string(getClass().name()) + "'.");
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_resultnode() {}
diff --git a/searchlib/src/vespa/searchlib/expression/resultnode.h b/searchlib/src/vespa/searchlib/expression/resultnode.h
new file mode 100644
index 00000000000..0d83be292c7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/resultnode.h
@@ -0,0 +1,128 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/expressionnode.h>
+#include <vespa/searchlib/expression/serializer.h>
+#include <vespa/vespalib/util/exception.h>
+
+namespace search {
+namespace expression {
+
+#define DECLARE_ABSTRACT_RESULTNODE(Class) DECLARE_IDENTIFIABLE_ABSTRACT_NS2(search, expression, Class)
+#define DECLARE_ABSTRACT_RESULTNODE_NS1(ns, Class) DECLARE_IDENTIFIABLE_ABSTRACT_NS3(search, expression, ns, Class)
+
+#define DECLARE_RESULTNODE(Class) \
+ DECLARE_IDENTIFIABLE_NS2(search, expression, Class) \
+ virtual Class * clone() const;
+
+#define DECLARE_RESULTNODE_NS1(ns, Class) \
+ DECLARE_IDENTIFIABLE_NS3(search, expression, ns, Class) \
+ virtual Class * clone() const;
+
+#define DECLARE_RESULTNODE_SERIALIZE \
+ virtual ResultSerializer & onSerializeResult(ResultSerializer & os) const; \
+ virtual ResultDeserializer & onDeserializeResult(ResultDeserializer & is);
+
+#define IMPLEMENT_ABSTRACT_RESULTNODE(Class, base) IMPLEMENT_IDENTIFIABLE_ABSTRACT_NS2(search, expression, Class, base)
+
+#define IMPLEMENT_RESULTNODE(Class, base) \
+ IMPLEMENT_IDENTIFIABLE_NS2(search, expression, Class, base) \
+ Class * Class::clone() const { return new Class(*this); }
+
+class ResultNode : public vespalib::Identifiable
+{
+public:
+ typedef vespalib::BufferRef BufferRef;
+ typedef vespalib::ConstBufferRef ConstBufferRef;
+public:
+ int64_t getInteger() const { return onGetInteger(0); }
+ int64_t getEnum() const { return onGetEnum(0); }
+ double getFloat() const { return onGetFloat(0); }
+ ConstBufferRef getString(BufferRef buf) const { return onGetString(0, buf); }
+
+ int64_t getInteger(size_t index) const { return onGetInteger(index); }
+ double getFloat(size_t index) const { return onGetFloat(index); }
+ ConstBufferRef getString(size_t index, BufferRef buf) const { return onGetString(index, buf); }
+
+private:
+ virtual int64_t onGetInteger(size_t index) const = 0;
+ virtual int64_t onGetEnum(size_t index) const {
+ (void) index;
+ throw vespalib::Exception("search::expression::ResultNode onGetEnum is not implemented");
+ }
+ virtual double onGetFloat(size_t index) const = 0;
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const = 0;
+
+public:
+ DECLARE_ABSTRACT_RESULTNODE(ResultNode);
+ virtual ~ResultNode() { }
+ typedef std::unique_ptr<ResultNode> UP;
+ typedef vespalib::IdentifiablePtr<ResultNode> CP;
+ virtual void set(const ResultNode & rhs) = 0;
+
+ /**
+ * Will initialize a memory area that must be destroyed. After creation it can be encoded or decoded.
+ * Memory must be fixed size.
+ * This interface is used to efficiently store data in vectors without the overhead of virtual objects.
+ * @param memory area to initialize
+ */
+ virtual void create(void * buf) const;
+ /**
+ * Will initialize itself with the memory area supplied.
+ * @param memory area containing alrady encoded data.
+ */
+ virtual void decode(const void * buf);
+ /**
+ * Will decode itself into the memory area supplied.
+ * @param memory area used as storage.
+ */
+ virtual void encode(void * buf) const;
+ /**
+ * Will return a radixsortable value that will sort ascending.
+ * @param memory area used as storage.
+ */
+ virtual uint64_t radixAsc(const void * buf) const;
+ /**
+ * Will return a radixsortable value that will sort descending.
+ * @param memory area used as storage.
+ */
+ virtual uint64_t radixDesc(const void * buf) const;
+ /**
+ * Will return the typed hash of memory area supplied.
+ * @param memory area used as storage.
+ */
+ virtual size_t hash(const void * buf) const;
+ /**
+ * Will decode itself into the memory area supplied.
+ * It will also encode itself from the memory area.
+ * @param memory area used as storage.
+ */
+ virtual void swap(void * buf);
+ /**
+ * Will destroy any initialized memory.
+ * @param memory area used as storage.
+ */
+ virtual void destroy(void * buf) const;
+ /**
+ * Will do a typed compare of the given memory a and b.
+ * @param a memory area of a
+ * @param b memory area of b
+ * @return -1 if a<b, 0 if a==b, and 1 if a>b
+ */
+ virtual int cmpMem(const void * a, const void *b) const;
+
+ virtual void negate();
+ virtual void sort();
+ virtual void reverse();
+ virtual size_t hash() const = 0;
+ virtual ResultNode * clone() const = 0;
+ ResultNode::UP createBaseType() const { return ResultNode::UP(static_cast<ResultNode *>(getBaseClass().create())); }
+ virtual ResultSerializer & onSerializeResult(ResultSerializer & os) const;
+ virtual ResultDeserializer & onDeserializeResult(ResultDeserializer & is);
+ virtual size_t getRawByteSize() const;
+ virtual bool isMultiValue() const { return false; }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/resultnodes.cpp b/searchlib/src/vespa/searchlib/expression/resultnodes.cpp
new file mode 100644
index 00000000000..9a1d3639a8e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/resultnodes.cpp
@@ -0,0 +1,410 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/integerresultnode.h>
+#include <vespa/searchlib/expression/floatresultnode.h>
+#include <vespa/searchlib/expression/stringresultnode.h>
+#include <vespa/searchlib/expression/rawresultnode.h>
+#include <vespa/searchlib/expression/enumresultnode.h>
+#include <vespa/searchlib/expression/nullresultnode.h>
+#include <vespa/searchlib/expression/positiveinfinityresultnode.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.documentexpressions");
+
+namespace search {
+namespace expression {
+
+using vespalib::nbostream;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+using vespalib::make_string;
+using vespalib::Identifiable;
+using vespalib::BufferRef;
+using vespalib::ConstBufferRef;
+
+IMPLEMENT_ABSTRACT_RESULTNODE(ResultNode, Identifiable);
+IMPLEMENT_ABSTRACT_RESULTNODE(SingleResultNode, ResultNode);
+IMPLEMENT_ABSTRACT_RESULTNODE(NumericResultNode, SingleResultNode);
+IMPLEMENT_ABSTRACT_RESULTNODE(IntegerResultNode, NumericResultNode);
+IMPLEMENT_RESULTNODE(StringResultNode, SingleResultNode);
+IMPLEMENT_RESULTNODE(NullResultNode, SingleResultNode);
+IMPLEMENT_RESULTNODE(PositiveInfinityResultNode, SingleResultNode);
+IMPLEMENT_RESULTNODE(RawResultNode, SingleResultNode);
+IMPLEMENT_RESULTNODE(Int8ResultNode, IntegerResultNode);
+IMPLEMENT_RESULTNODE(Int16ResultNode, IntegerResultNode);
+IMPLEMENT_RESULTNODE(Int32ResultNode, IntegerResultNode);
+IMPLEMENT_RESULTNODE(Int64ResultNode, IntegerResultNode);
+IMPLEMENT_RESULTNODE(EnumResultNode, IntegerResultNode);
+IMPLEMENT_RESULTNODE(FloatResultNode, NumericResultNode);
+
+void ResultNode::sort()
+{
+}
+
+void ResultNode::reverse()
+{
+}
+
+void ResultNode::negate()
+{
+ throw std::runtime_error(make_string("Class %s does not implement 'negate'", getClass().name()));
+}
+
+ResultSerializer & ResultNode::onSerializeResult(ResultSerializer & os) const
+{
+ os.proxyPut(*this);
+ return os;
+}
+
+ResultDeserializer & ResultNode::onDeserializeResult(ResultDeserializer & is)
+{
+ is.proxyGet(*this);
+ return is;
+}
+
+int64_t FloatResultNode::onGetInteger(size_t index) const { (void) index; return static_cast<int64_t>(round(_value)); }
+double FloatResultNode::onGetFloat(size_t index) const { (void) index; return _value; }
+void FloatResultNode::add(const ResultNode & b) { _value += b.getFloat(); }
+void FloatResultNode::negate() { _value = - _value; }
+void FloatResultNode::multiply(const ResultNode & b) { _value *= b.getFloat(); }
+void FloatResultNode::divide(const ResultNode & b) {
+ double val = b.getFloat();
+ _value = (val == 0.0) ? 0.0 : (_value / val);
+}
+void FloatResultNode::modulo(const ResultNode & b) { _value = ResultNode::getInteger() % b.getInteger(); }
+void FloatResultNode::min(const ResultNode & b) { double t(b.getFloat()); if (t < _value) { _value = t; } }
+void FloatResultNode::max(const ResultNode & b) { double t(b.getFloat()); if (t > _value) { _value = t; } }
+void FloatResultNode::set(const ResultNode & rhs) { _value = rhs.getFloat(); }
+Serializer & FloatResultNode::onSerialize(Serializer & os) const { os << _value; return os; }
+Deserializer & FloatResultNode::onDeserialize(Deserializer & is) { is >> _value; return is; }
+
+void
+FloatResultNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "value", _value);
+}
+
+ResultNode::ConstBufferRef FloatResultNode::onGetString(size_t index, ResultNode::BufferRef buf) const
+{
+ (void) index;
+ int numWritten = std::min(buf.size(), (size_t)std::max(0, snprintf(buf.str(), buf.size(), "%g", _value)));
+ return ConstBufferRef(buf.str(), numWritten);
+}
+
+bool FloatResultNode::isNan() const
+{
+ return isnan(_value);
+}
+
+int FloatResultNode::onCmp(const Identifiable & b) const
+{
+ const FloatResultNode & rhs(static_cast<const FloatResultNode &>(b));
+ if (isNan()) {
+ return rhs.isNan() ? 0 : -1;
+ } else {
+ if (rhs.isNan()) {
+ return 1;
+ } else {
+ return (_value > rhs._value) ? 1 : (_value < rhs._value) ? -1 : 0;
+ }
+ }
+}
+
+void StringResultNode::setMin() { _value.clear(); }
+void StringResultNode::setMax() { _value.clear(); _value.append(char(-1)); }
+void RawResultNode::setMin() { _value.clear(); }
+void RawResultNode::setMax() { _value.push_back(-1); }
+void FloatResultNode::setMin() { _value = -std::numeric_limits<double>::max(); }
+void FloatResultNode::setMax() { _value = std::numeric_limits<double>::max(); }
+
+void NullResultNode::setMin() { }
+void NullResultNode::setMax() { }
+void NullResultNode::add(const ResultNode & b) { (void) b; }
+void NullResultNode::min(const ResultNode & b) { (void) b; }
+void NullResultNode::max(const ResultNode & b) { (void) b; }
+int64_t NullResultNode::onGetInteger(size_t index) const { (void) index; return 0; }
+double NullResultNode::onGetFloat(size_t index) const { (void) index; return 0.0; }
+int NullResultNode::onCmp(const Identifiable & b) const { (void) b; return (b.getClass().id() == NullResultNode::classId) ? 0 : 1; }
+void NullResultNode::set(const ResultNode & rhs) { (void) rhs; }
+size_t NullResultNode::hash() const { return 0; }
+ResultNode::ConstBufferRef NullResultNode::onGetString(size_t index, ResultNode::BufferRef buf) const { (void) index; return buf; }
+void PositiveInfinityResultNode::setMin() { }
+void PositiveInfinityResultNode::setMax() { }
+void PositiveInfinityResultNode::add(const ResultNode & b) { (void) b; }
+void PositiveInfinityResultNode::min(const ResultNode & b) { (void) b; }
+void PositiveInfinityResultNode::max(const ResultNode & b) { (void) b; }
+int64_t PositiveInfinityResultNode::onGetInteger(size_t index) const { (void) index; return 0; }
+double PositiveInfinityResultNode::onGetFloat(size_t index) const { (void) index; return 0.0; }
+void PositiveInfinityResultNode::set(const ResultNode & rhs) { (void) rhs; }
+size_t PositiveInfinityResultNode::hash() const { return 0; }
+ResultNode::ConstBufferRef PositiveInfinityResultNode::onGetString(size_t index, ResultNode::BufferRef buf) const { (void) index; return buf; }
+
+int PositiveInfinityResultNode::onCmp(const Identifiable & b) const
+{
+ if (b.inherits(PositiveInfinityResultNode::classId)) {
+ return 0;
+ }
+ return 1;
+}
+
+int64_t StringResultNode::onGetInteger(size_t index) const { (void) index; return strtoll(_value.c_str(), NULL, 0); }
+double StringResultNode::onGetFloat(size_t index) const { (void) index; return strtod(_value.c_str(), NULL); }
+Serializer & StringResultNode::onSerialize(Serializer & os) const
+{
+ os << _value;
+ return os;
+}
+
+int StringResultNode::onCmp(const Identifiable & b) const
+{
+ if (b.inherits(PositiveInfinityResultNode::classId)) {
+ return -1;
+ } else {
+ const StringResultNode & sb(static_cast<const StringResultNode &>(b));
+ size_t sz(std::min(_value.size(), sb._value.size()));
+ int result = memcmp(_value.c_str(), sb._value.c_str(), sz);
+ if (result == 0) {
+ result = _value.size() < sb._value.size() ? -1 : _value.size() > sb._value.size() ? 1 : 0;
+ }
+ return result;
+ }
+}
+
+Deserializer & StringResultNode::onDeserialize(Deserializer & is)
+{
+ is >> _value;
+ return is;
+}
+
+
+void RawResultNode::add(const ResultNode & b)
+{
+ char buf[32];
+ ConstBufferRef s(b.getString(BufferRef(buf, sizeof(buf))));
+ const uint8_t *raw = static_cast<const uint8_t *>(s.data());
+
+ size_t i(0);
+ for (; i < _value.size() && i < s.size(); i++) {
+ _value[i] += raw[i];
+ }
+ if (i < s.size()) {
+ for (; i < s.size(); i++) {
+ _value.push_back(raw[i]);
+ }
+ }
+
+}
+
+void RawResultNode::min(const ResultNode & b)
+{
+ char buf[32];
+ ConstBufferRef s(b.getString(BufferRef(buf, sizeof(buf))));
+
+ if (memcmp(&_value[0], s.data(), std::min(s.size(), _value.size())) > 0) {
+ setBuffer(s.data(), s.size());
+ }
+}
+
+void RawResultNode::max(const ResultNode & b)
+{
+ char buf[32];
+ ConstBufferRef s(b.getString(BufferRef(buf, sizeof(buf))));
+
+ if (memcmp(&_value[0], s.data(), std::min(s.size(), _value.size())) < 0) {
+ setBuffer(s.data(), s.size());
+ }
+}
+
+void RawResultNode::negate()
+{
+ for (size_t i(0); i < _value.size(); i++) {
+ _value[i] = - _value[i];
+ }
+}
+
+void StringResultNode::add(const ResultNode & b)
+{
+ char buf[32];
+ ConstBufferRef s(b.getString(BufferRef(buf, sizeof(buf))));
+ vespalib::stringref bs(s.c_str(), s.size());
+ size_t i(0);
+ for (; i < _value.length() && i < bs.length(); i++) {
+ _value[i] += bs[i];
+ }
+ if (i < bs.length()) {
+ // XXX: Should have some way of appending with iterators
+ _value.append(bs.data() + i, (bs.length() - i));
+ }
+}
+
+void StringResultNode::min(const ResultNode & b)
+{
+ char buf[32];
+ ConstBufferRef s(b.getString(BufferRef(buf, sizeof(buf))));
+ vespalib::stringref bs(s.c_str(), s.size());
+ if (_value > bs) {
+ _value = bs;
+ }
+}
+
+void StringResultNode::max(const ResultNode & b)
+{
+ char buf[32];
+ ConstBufferRef s(b.getString(BufferRef(buf, sizeof(buf))));
+ vespalib::stringref bs(s.c_str(), s.size());
+ if (_value < bs) {
+ _value = bs;
+ }
+}
+
+void StringResultNode::negate()
+{
+ for (size_t i(0); i < _value.length(); i++) {
+ _value[i] = - _value[i];
+ }
+}
+
+void
+StringResultNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "value", _value);
+}
+
+ResultNode::ConstBufferRef StringResultNode::onGetString(size_t index, ResultNode::BufferRef ) const { (void) index; return ConstBufferRef(_value.c_str(), _value.size()); }
+
+void StringResultNode::set(const ResultNode & rhs)
+{
+ char buf[32];
+ ConstBufferRef b(rhs.getString(BufferRef(buf, sizeof(buf))));
+ _value.assign(b.c_str(), b.size());
+}
+
+StringResultNode & StringResultNode::append(const ResultNode & rhs)
+{
+ char buf[32];
+ ConstBufferRef b(rhs.getString(BufferRef(buf, sizeof(buf))));
+ _value.append(b.c_str(), b.size());
+ return *this;
+}
+
+namespace {
+
+size_t hashBuf(const void *s, size_t sz)
+{
+ size_t result(0);
+ const size_t * value = static_cast<const size_t *>(s);
+ for(size_t i(0), m(sz/sizeof(size_t)); i < m; i++) {
+ result ^= value[i];
+ }
+ unsigned left(sz%sizeof(size_t));
+ if (left) {
+ size_t lastValue(0);
+ memcpy(&lastValue, static_cast<const char *>(s)+sz-left, left);
+ result ^= lastValue;
+ }
+ return result;
+}
+
+}
+
+size_t StringResultNode::hash() const { return hashBuf(_value.c_str(), _value.size()); }
+
+size_t StringResultNode::hash(const void * buf) const
+{
+ const vespalib::string & s = *static_cast<const vespalib::string *>(buf);
+ return hashBuf(s.c_str(), s.size());
+}
+
+int64_t RawResultNode::onGetInteger(size_t index) const
+{
+ (void) index;
+ union {
+ int64_t _int64;
+ uint8_t _bytes[8];
+ } nbo;
+ nbo._int64 = 0;
+ memcpy(nbo._bytes, &_value[0], std::min(sizeof(nbo._bytes), _value.size()));
+ return nbostream::n2h(nbo._int64);
+}
+
+double RawResultNode::onGetFloat(size_t index) const
+{
+ (void) index;
+ union {
+ double _double;
+ uint8_t _bytes[8];
+ } nbo;
+ nbo._double = 0;
+ memcpy(nbo._bytes, &_value[0], std::min(sizeof(nbo._bytes), _value.size()));
+ return nbostream::n2h(nbo._double);
+}
+
+Serializer & RawResultNode::onSerialize(Serializer & os) const
+{
+ os << _value;
+ return os;
+}
+
+ResultSerializer & RawResultNode::onSerializeResult(ResultSerializer & os) const
+{
+ return os.putResult(getClass(), *this);
+}
+
+int RawResultNode::onCmp(const Identifiable & b) const
+{
+ if (b.inherits(PositiveInfinityResultNode::classId)) {
+ return -1;
+ } else {
+ const RawResultNode & rb( static_cast<const RawResultNode &>(b) );
+ int result = memcmp(&_value[0], &rb._value[0], std::min(_value.size(), rb._value.size()));
+ if (result == 0) {
+ result = _value.size() < rb._value.size() ? -1 : _value.size() > rb._value.size() ? 1 : 0;
+ }
+ return result;
+ }
+}
+
+size_t RawResultNode::hash() const { return hashBuf(&_value[0], _value.size()); }
+
+size_t RawResultNode::hash(const void * buf) const
+{
+ const std::vector<uint8_t> & s = *static_cast<const std::vector<uint8_t> *>(buf);
+ return hashBuf(&s[0], s.size());
+}
+
+Deserializer & RawResultNode::onDeserialize(Deserializer & is)
+{
+ is >> _value;
+ return is;
+}
+
+ResultDeserializer & RawResultNode::onDeserializeResult(ResultDeserializer & is)
+{
+ return is.getResult(getClass(), *this);
+}
+
+void
+RawResultNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "value", _value);
+}
+
+void RawResultNode::set(const ResultNode & rhs)
+{
+ char buf[32];
+ ConstBufferRef b(rhs.getString(BufferRef(buf, sizeof(buf))));
+ setBuffer(b.data(), b.size());
+}
+void RawResultNode::setBuffer(const void *buf, size_t sz)
+{
+ _value.resize(sz + 1);
+ memcpy(&_value[0], buf, sz);
+ _value.back() = 0;
+ _value.resize(sz);
+}
+
+ResultNode::ConstBufferRef RawResultNode::onGetString(size_t index, ResultNode::BufferRef ) const { (void) index; return ConstBufferRef(&_value[0], _value.size()); }
+
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/expression/resultvector.cpp b/searchlib/src/vespa/searchlib/expression/resultvector.cpp
new file mode 100644
index 00000000000..169b4ea6159
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/resultvector.cpp
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/resultvector.h>
+
+namespace search {
+namespace expression {
+
+IMPLEMENT_ABSTRACT_EXPRESSIONNODE(ResultNodeVector, ResultNode);
+IMPLEMENT_RESULTNODE(Int8ResultNodeVector, ResultNodeVector);
+IMPLEMENT_RESULTNODE(Int16ResultNodeVector, ResultNodeVector);
+IMPLEMENT_RESULTNODE(Int32ResultNodeVector, ResultNodeVector);
+IMPLEMENT_RESULTNODE(Int64ResultNodeVector, ResultNodeVector);
+IMPLEMENT_RESULTNODE(EnumResultNodeVector, ResultNodeVector);
+IMPLEMENT_RESULTNODE(FloatResultNodeVector, ResultNodeVector);
+IMPLEMENT_RESULTNODE(StringResultNodeVector, ResultNodeVector);
+IMPLEMENT_RESULTNODE(RawResultNodeVector, ResultNodeVector);
+IMPLEMENT_RESULTNODE(IntegerBucketResultNodeVector, ResultNodeVector);
+IMPLEMENT_RESULTNODE(FloatBucketResultNodeVector, ResultNodeVector);
+IMPLEMENT_RESULTNODE(StringBucketResultNodeVector, ResultNodeVector);
+IMPLEMENT_RESULTNODE(RawBucketResultNodeVector, ResultNodeVector);
+IMPLEMENT_RESULTNODE(GeneralResultNodeVector, ResultNodeVector);
+
+const ResultNode *
+GeneralResultNodeVector::find(const ResultNode & key) const
+{
+ for (size_t i(0); i < _v.size(); i++) {
+ const ResultNode * r = _v[i].get();
+ if (r && (key.cmp(*r) == 0)) {
+ return _v[i].get();
+ }
+ }
+ return NULL;
+}
+
+size_t
+GeneralResultNodeVector::hash() const
+{
+ size_t h(0);
+ for (size_t i(0); i < _v.size(); i++) {
+ h ^= _v[i]->hash();
+ }
+ return h;
+}
+
+ResultSerializer &
+ResultNodeVector::onSerializeResult(ResultSerializer & os) const
+{
+ return os.putResult(getClass(), *this);
+}
+
+ResultDeserializer &
+ResultNodeVector::onDeserializeResult(ResultDeserializer & is)
+{
+ return is.getResult(getClass(), *this);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_resultvector() {}
diff --git a/searchlib/src/vespa/searchlib/expression/resultvector.h b/searchlib/src/vespa/searchlib/expression/resultvector.h
new file mode 100644
index 00000000000..c24a68f0da5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/resultvector.h
@@ -0,0 +1,399 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/enumresultnode.h>
+#include <vespa/searchlib/expression/integerresultnode.h>
+#include <vespa/searchlib/expression/floatresultnode.h>
+#include <vespa/searchlib/expression/stringresultnode.h>
+#include <vespa/searchlib/expression/rawresultnode.h>
+#include <vespa/searchlib/expression/integerbucketresultnode.h>
+#include <vespa/searchlib/expression/floatbucketresultnode.h>
+#include <vespa/searchlib/expression/stringbucketresultnode.h>
+#include <vespa/searchlib/expression/rawbucketresultnode.h>
+#include <vespa/vespalib/objects/visit.h>
+#include <algorithm>
+
+namespace search {
+namespace expression {
+
+class ResultNodeVector : public ResultNode
+{
+public:
+ DECLARE_ABSTRACT_EXPRESSIONNODE(ResultNodeVector);
+ DECLARE_RESULTNODE_SERIALIZE;
+ typedef std::unique_ptr<ResultNodeVector> UP;
+ typedef vespalib::IdentifiablePtr<ResultNodeVector> CP;
+ virtual const ResultNode * find(const ResultNode & key) const = 0;
+ virtual ResultNodeVector & push_back(const ResultNode & node) = 0;
+ virtual ResultNodeVector & push_back_safe(const ResultNode & node) = 0;
+ virtual const ResultNode & get(size_t index) const = 0;
+ virtual ResultNodeVector & set(size_t index, const ResultNode & node) = 0;
+ virtual ResultNode & get(size_t index) = 0;
+ virtual void clear() = 0;
+ virtual void resize(size_t sz) = 0;
+ size_t size() const { return onSize(); }
+ bool empty() const { return size() == 0; }
+ /**
+ * Sum yourself to the argument
+ * @param result the argument
+ */
+ virtual ResultNode & flattenMultiply(ResultNode & r) const { return r; }
+ virtual ResultNode & flattenSum(ResultNode & r) const { return r; }
+ virtual ResultNode & flattenMax(ResultNode & r) const { return r; }
+ virtual ResultNode & flattenMin(ResultNode & r) const { return r; }
+ virtual ResultNode & flattenAnd(ResultNode & r) const { return r; }
+ virtual ResultNode & flattenOr(ResultNode & r) const { return r; }
+ virtual ResultNode & flattenXor(ResultNode & r) const { return r; }
+ virtual void min(const ResultNode & b) { (void) b; }
+ virtual void max(const ResultNode & b) { (void) b; }
+ virtual void add(const ResultNode & b) { (void) b; }
+private:
+ virtual size_t onSize() const = 0;
+ virtual void set(const ResultNode & rhs) { (void) rhs; }
+ virtual bool isMultiValue() const { return true; }
+};
+
+template <typename B>
+struct cmpT {
+ struct less : public std::binary_function<B, B, bool> {
+ bool operator()(const B & a, const B & b) { return a.cmp(b) < 0; }
+ };
+ struct equal : public std::binary_function<B, B, bool> {
+ bool operator()(const B & a, const B & b) { return a.cmp(b) == 0; }
+ };
+};
+
+template <typename B, typename V>
+struct contains {
+ struct less : public std::binary_function<B, V, bool> {
+ bool operator()(const B & a, const V & b) { return a.contains(b) < 0; }
+ };
+ struct equal : public std::binary_function<B, V, bool> {
+ bool operator()(const B & a, const V & b) { return a.contains(b) == 0; }
+ };
+};
+
+template <typename B, typename C, typename G>
+class ResultNodeVectorT : public ResultNodeVector
+{
+public:
+ DECLARE_NBO_SERIALIZE;
+ typedef std::vector<B> Vector;
+ const Vector & getVector() const { return _result; }
+ Vector & getVector() { return _result; }
+ virtual const ResultNode * find(const ResultNode & key) const;
+ virtual void sort();
+ virtual void reverse();
+ virtual ResultNodeVector & push_back(const ResultNode & node);
+ virtual ResultNodeVector & push_back_safe(const ResultNode & node);
+ virtual ResultNodeVector & set(size_t index, const ResultNode & node);
+ virtual const ResultNode & get(size_t index) const { return _result[index]; }
+ virtual ResultNode & get(size_t index) { return _result[index]; }
+ virtual void clear() { _result.clear(); }
+ virtual void resize(size_t sz) { _result.resize(sz); }
+ virtual void negate();
+private:
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const { visit(visitor, "Vector", _result); }
+ virtual size_t onSize() const { return _result.size(); }
+ virtual const vespalib::Identifiable::RuntimeClass & getBaseClass() const { return B::_RTClass; }
+ virtual int64_t onGetInteger(size_t index) const { return _result[index].getInteger(index); }
+ virtual double onGetFloat(size_t index) const { return _result[index].getFloat(index); }
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { return _result[index].getString(index, buf); }
+ virtual size_t hash() const;
+ virtual int onCmp(const Identifiable & b) const;
+ Vector _result;
+};
+
+template <typename B, typename C, typename G>
+ResultNodeVector & ResultNodeVectorT<B, C, G>::set(size_t index, const ResultNode & node)
+{
+ _result[index].set(node);
+ return *this;
+}
+
+template <typename B, typename C, typename G>
+ResultNodeVector & ResultNodeVectorT<B, C, G>::push_back_safe(const ResultNode & node)
+{
+ if (node.inherits(B::classId)) {
+ _result.push_back(static_cast<const B &>(node));
+ } else {
+ B value;
+ value.set(node);
+ _result.push_back(value);
+ }
+ return *this;
+}
+
+template <typename B, typename C, typename G>
+ResultNodeVector & ResultNodeVectorT<B, C, G>::push_back(const ResultNode & node)
+{
+ _result.push_back(static_cast<const B &>(node));
+ return *this;
+}
+
+template <typename B, typename C, typename G>
+int ResultNodeVectorT<B, C, G>::onCmp(const Identifiable & rhs) const
+{
+ const ResultNodeVectorT & b(static_cast<const ResultNodeVectorT &>(rhs));
+ int diff = _result.size() - b._result.size();
+ for (size_t i(0), m(_result.size()); (diff == 0) && (i < m); i++) {
+ diff = _result[i].cmp(b._result[i]);
+ }
+ return diff;
+}
+
+template <typename B, typename C, typename G>
+void ResultNodeVectorT<B, C, G>::sort()
+{
+ typedef cmpT<B> LC;
+ std::sort(_result.begin(), _result.end(), typename LC::less());
+}
+
+template <typename B, typename C, typename G>
+void ResultNodeVectorT<B, C, G>::reverse()
+{
+ std::reverse(_result.begin(), _result.end());
+}
+
+template <typename B, typename C, typename G>
+size_t ResultNodeVectorT<B, C, G>::hash() const
+{
+ size_t h(0);
+ for(typename Vector::const_iterator it(_result.begin()), mt(_result.end()); it != mt; it++) {
+ h ^= it->hash();
+ }
+ return h;
+}
+
+template <typename B, typename C, typename G>
+void ResultNodeVectorT<B, C, G>::negate()
+{
+ for(typename Vector::iterator it(_result.begin()), mt(_result.end()); it != mt; it++) {
+ it->negate();
+ }
+}
+
+template <typename B, typename C, typename G>
+const ResultNode * ResultNodeVectorT<B, C, G>::find(const ResultNode & key) const
+{
+ G getter;
+ typename Vector::const_iterator found = std::lower_bound(_result.begin(), _result.end(), getter(key), typename C::less() );
+ if (found != _result.end()) {
+ typename C::equal equal;
+ return equal(*found, getter(key)) ? &(*found) : NULL;
+ }
+ return NULL;
+}
+
+template <typename B, typename C, typename G>
+vespalib::Serializer & ResultNodeVectorT<B, C, G>::onSerialize(vespalib::Serializer & os) const
+{
+ return serialize(_result, os);
+}
+
+template <typename B, typename C, typename G>
+vespalib::Deserializer & ResultNodeVectorT<B, C, G>::onDeserialize(vespalib::Deserializer & is)
+{
+ return deserialize(_result, is);
+}
+
+struct GetInteger {
+ int64_t operator () (const ResultNode & r) { return r.getInteger(); }
+};
+
+struct GetFloat {
+ double operator () (const ResultNode & r) { return r.getFloat(); }
+};
+
+struct GetString {
+ ResultNode::BufferRef _tmp;
+ ResultNode::ConstBufferRef operator () (const ResultNode & r) { return r.getString(_tmp); }
+};
+
+template <typename B>
+class NumericResultNodeVectorT : public ResultNodeVectorT<B, cmpT<ResultNode>, std::_Identity<ResultNode> >
+{
+public:
+ virtual ResultNode & flattenMultiply(ResultNode & r) const {
+ B v;
+ v.set(r);
+ const std::vector<B> & vec(this->getVector());
+ for(size_t i(0), m(vec.size()); i < m; i++) {
+ v.multiply(vec[i]);
+ }
+ r.set(v);
+ return r;
+ }
+ virtual ResultNode & flattenAnd(ResultNode & r) const {
+ Int64ResultNode v;
+ v.set(r);
+ const std::vector<B> & vec(this->getVector());
+ for(size_t i(0), m(vec.size()); i < m; i++) {
+ v.andOp(vec[i]);
+ }
+ r.set(v);
+ return r;
+ }
+ virtual ResultNode & flattenOr(ResultNode & r) const {
+ Int64ResultNode v;
+ v.set(r);
+ const std::vector<B> & vec(this->getVector());
+ for(size_t i(0), m(vec.size()); i < m; i++) {
+ v.orOp(vec[i]);
+ }
+ r.set(v);
+ return r;
+ }
+ virtual ResultNode & flattenXor(ResultNode & r) const {
+ Int64ResultNode v;
+ v.set(r);
+ const std::vector<B> & vec(this->getVector());
+ for(size_t i(0), m(vec.size()); i < m; i++) {
+ v.xorOp(vec[i]);
+ }
+ r.set(v);
+ return r;
+ }
+ virtual ResultNode & flattenSum(ResultNode & r) const {
+ B v;
+ v.set(r);
+ const std::vector<B> & vec(this->getVector());
+ for(size_t i(0), m(vec.size()); i < m; i++) {
+ v.add(vec[i]);
+ }
+ r.set(v);
+ return r;
+ }
+ virtual ResultNode & flattenMax(ResultNode & r) const {
+ B v;
+ v.set(r);
+ const std::vector<B> & vec(this->getVector());
+ for(size_t i(0), m(vec.size()); i < m; i++) {
+ v.max(vec[i]);
+ }
+ r.set(v);
+ return r;
+ }
+ virtual ResultNode & flattenMin(ResultNode & r) const {
+ B v;
+ v.set(r);
+ const std::vector<B> & vec(this->getVector());
+ for(size_t i(0), m(vec.size()); i < m; i++) {
+ v.min(vec[i]);
+ }
+ r.set(v);
+ return r;
+ }
+};
+
+class Int8ResultNodeVector : public NumericResultNodeVectorT<Int8ResultNode>
+{
+public:
+ Int8ResultNodeVector() { }
+ DECLARE_RESULTNODE(Int8ResultNodeVector);
+};
+
+class Int16ResultNodeVector : public NumericResultNodeVectorT<Int16ResultNode>
+{
+public:
+ Int16ResultNodeVector() { }
+ DECLARE_RESULTNODE(Int16ResultNodeVector);
+};
+
+class Int32ResultNodeVector : public NumericResultNodeVectorT<Int32ResultNode>
+{
+public:
+ Int32ResultNodeVector() { }
+ DECLARE_RESULTNODE(Int32ResultNodeVector);
+};
+
+class Int64ResultNodeVector : public NumericResultNodeVectorT<Int64ResultNode>
+{
+public:
+ Int64ResultNodeVector() { }
+ DECLARE_RESULTNODE(Int64ResultNodeVector);
+};
+
+typedef Int64ResultNodeVector IntegerResultNodeVector;
+
+class EnumResultNodeVector : public NumericResultNodeVectorT<EnumResultNode>
+{
+public:
+ EnumResultNodeVector() {}
+ DECLARE_RESULTNODE(EnumResultNodeVector);
+};
+
+class FloatResultNodeVector : public NumericResultNodeVectorT<FloatResultNode>
+{
+public:
+ FloatResultNodeVector() { }
+ DECLARE_RESULTNODE(FloatResultNodeVector);
+};
+
+class StringResultNodeVector : public ResultNodeVectorT<StringResultNode, cmpT<ResultNode>, std::_Identity<ResultNode> >
+{
+public:
+ StringResultNodeVector() { }
+ DECLARE_RESULTNODE(StringResultNodeVector);
+};
+
+class RawResultNodeVector : public ResultNodeVectorT<RawResultNode, cmpT<ResultNode>, std::_Identity<ResultNode> >
+{
+public:
+ RawResultNodeVector() { }
+ DECLARE_RESULTNODE(RawResultNodeVector);
+};
+
+class IntegerBucketResultNodeVector : public ResultNodeVectorT<IntegerBucketResultNode, contains<IntegerBucketResultNode, int64_t>, GetInteger >
+{
+public:
+ IntegerBucketResultNodeVector() { }
+ DECLARE_RESULTNODE(IntegerBucketResultNodeVector);
+};
+
+class FloatBucketResultNodeVector : public ResultNodeVectorT<FloatBucketResultNode, contains<FloatBucketResultNode, double>, GetFloat >
+{
+public:
+ FloatBucketResultNodeVector() { }
+ DECLARE_RESULTNODE(FloatBucketResultNodeVector);
+};
+
+class StringBucketResultNodeVector : public ResultNodeVectorT<StringBucketResultNode, contains<StringBucketResultNode, ResultNode::ConstBufferRef>, GetString >
+{
+public:
+ StringBucketResultNodeVector() { }
+ DECLARE_RESULTNODE(StringBucketResultNodeVector);
+};
+
+class RawBucketResultNodeVector : public ResultNodeVectorT<RawBucketResultNode, contains<RawBucketResultNode, ResultNode::ConstBufferRef>, GetString >
+{
+public:
+ RawBucketResultNodeVector() { }
+ DECLARE_RESULTNODE(RawBucketResultNodeVector);
+};
+
+class GeneralResultNodeVector : public ResultNodeVector
+{
+public:
+ DECLARE_EXPRESSIONNODE(GeneralResultNodeVector);
+ virtual const ResultNode * find(const ResultNode & key) const;
+ virtual ResultNodeVector & push_back(const ResultNode & node) { _v.push_back(node); return *this; }
+ virtual ResultNodeVector & push_back_safe(const ResultNode & node) { _v.push_back(node); return *this; }
+ virtual const ResultNode & get(size_t index) const { return *_v[index]; };
+ virtual ResultNodeVector & set(size_t index, const ResultNode & node) { _v[index] = node; return *this; }
+ virtual ResultNode & get(size_t index) { return *_v[index]; }
+ virtual void clear() { _v.clear(); }
+ virtual void resize(size_t sz) { _v.resize(sz); }
+private:
+ virtual int64_t onGetInteger(size_t index) const { return _v[index]->getInteger(index); }
+ virtual double onGetFloat(size_t index) const { return _v[index]->getFloat(index); }
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { return _v[index]->getString(index, buf); }
+ virtual size_t hash() const;
+ virtual size_t onSize() const { return _v.size(); }
+ std::vector<ResultNode::CP> _v;
+};
+
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/reversefunctionnode.h b/searchlib/src/vespa/searchlib/expression/reversefunctionnode.h
new file mode 100644
index 00000000000..2cc788d1424
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/reversefunctionnode.h
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class ReverseFunctionNode : public UnaryFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(ReverseFunctionNode);
+ ReverseFunctionNode() { }
+ ReverseFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { }
+private:
+ virtual bool onExecute() const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/serializer.h b/searchlib/src/vespa/searchlib/expression/serializer.h
new file mode 100644
index 00000000000..1c11c7b3951
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/serializer.h
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/objects/serializer.h>
+#include <vespa/vespalib/objects/deserializer.h>
+
+namespace search {
+namespace expression {
+
+class RawResultNode;
+class ResultNodeVector;
+class ResultNode;
+
+class ResultSerializer
+{
+public:
+ virtual ~ResultSerializer() { }
+ virtual ResultSerializer & putResult(const vespalib::IFieldBase & field, const RawResultNode & value) = 0;
+ virtual ResultSerializer & putResult(const vespalib::IFieldBase & field, const ResultNodeVector & value) = 0;
+ virtual void proxyPut(const ResultNode & value) = 0;
+};
+
+class ResultDeserializer
+{
+public:
+ virtual ~ResultDeserializer() { }
+ virtual ResultDeserializer & getResult(const vespalib::IFieldBase & field, RawResultNode & value) = 0;
+ virtual ResultDeserializer & getResult(const vespalib::IFieldBase & field, ResultNodeVector & value) = 0;
+ virtual void proxyGet(const ResultNode & value) = 0;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/singleresultnode.h b/searchlib/src/vespa/searchlib/expression/singleresultnode.h
new file mode 100644
index 00000000000..c0d50559ca5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/singleresultnode.h
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/resultnode.h>
+
+namespace search {
+namespace expression {
+
+class SingleResultNode : public ResultNode
+{
+public:
+ virtual ~SingleResultNode() { }
+ DECLARE_ABSTRACT_RESULTNODE(SingleResultNode);
+ typedef vespalib::IdentifiablePtr<SingleResultNode> CP;
+ typedef std::unique_ptr<SingleResultNode> UP;
+ virtual SingleResultNode *clone() const = 0;
+
+ virtual void min(const ResultNode & b) = 0;
+ virtual void max(const ResultNode & b) = 0;
+ virtual void add(const ResultNode & b) = 0;
+
+ virtual void setMin() = 0;
+ virtual void setMax() = 0;
+ virtual size_t getRawByteSize() const { return onGetRawByteSize(); }
+ virtual size_t onGetRawByteSize() const = 0;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/sortfunctionnode.h b/searchlib/src/vespa/searchlib/expression/sortfunctionnode.h
new file mode 100644
index 00000000000..837563b6ee2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/sortfunctionnode.h
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class SortFunctionNode : public UnaryFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(SortFunctionNode);
+ SortFunctionNode() { }
+ SortFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { }
+private:
+ virtual bool onExecute() const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/strcatfunctionnode.h b/searchlib/src/vespa/searchlib/expression/strcatfunctionnode.h
new file mode 100644
index 00000000000..8ceebd95bb0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/strcatfunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/multiargfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class StrCatFunctionNode : public MultiArgFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(StrCatFunctionNode);
+ StrCatFunctionNode() { }
+ StrCatFunctionNode(const ExpressionNode & arg) { addArg(arg); }
+private:
+ virtual void onPrepareResult();
+ virtual bool onExecute() const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/strcatserializer.cpp b/searchlib/src/vespa/searchlib/expression/strcatserializer.cpp
new file mode 100644
index 00000000000..5749c5505b2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/strcatserializer.cpp
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "strcatserializer.h"
+#include "rawresultnode.h"
+#include "resultvector.h"
+#include <vespa/vespalib/util/exception.h>
+
+namespace search {
+namespace expression {
+
+using vespalib::IFieldBase;
+using vespalib::Serializer;
+using vespalib::string;
+using vespalib::stringref;
+
+StrCatSerializer & StrCatSerializer::put(const vespalib::IFieldBase & field, const vespalib::Identifiable & value)
+{
+ (void) field;
+ if (value.inherits(ResultNode::classId)) {
+ static_cast<const ResultNode &>(value).onSerializeResult(*this);
+ } else {
+ value.serializeDirect(*this);
+ }
+ return *this;
+}
+
+ResultSerializer & StrCatSerializer::putResult(const vespalib::IFieldBase & field, const ResultNodeVector & value)
+{
+ (void) field;
+ size_t sz(value.size());
+ for (size_t i(0); i < sz; i++) {
+ value.get(i).serialize(*this);
+ }
+ return *this;
+}
+
+ResultSerializer & StrCatSerializer::putResult(const vespalib::IFieldBase & field, const RawResultNode & value)
+{
+ (void) field;
+ vespalib::ConstBufferRef buf(value.get());
+ getStream() << stringref(buf.c_str(), buf.size());
+ return *this;
+}
+
+void StrCatSerializer::proxyPut(const ResultNode & value)
+{
+ value.serializeDirect(*this);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_strcatserializer() {}
diff --git a/searchlib/src/vespa/searchlib/expression/strcatserializer.h b/searchlib/src/vespa/searchlib/expression/strcatserializer.h
new file mode 100644
index 00000000000..455e9828b40
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/strcatserializer.h
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/objects/asciiserializer.h>
+#include <vespa/searchlib/expression/serializer.h>
+
+
+namespace search {
+namespace expression {
+
+class RawResultNode;
+
+class StrCatSerializer : public vespalib::AsciiSerializer, public ResultSerializer
+{
+public:
+ StrCatSerializer(vespalib::asciistream & stream) : vespalib::AsciiSerializer(stream) { }
+ virtual StrCatSerializer & put(const vespalib::IFieldBase & field, const vespalib::Identifiable & value);
+ virtual ResultSerializer & putResult(const vespalib::IFieldBase & field, const ResultNodeVector & value);
+ virtual ResultSerializer & putResult(const vespalib::IFieldBase & field, const RawResultNode & value);
+ virtual void proxyPut(const ResultNode & value);
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/stringbucketresultnode.cpp b/searchlib/src/vespa/searchlib/expression/stringbucketresultnode.cpp
new file mode 100644
index 00000000000..2b7b4f096ff
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/stringbucketresultnode.cpp
@@ -0,0 +1,93 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "stringbucketresultnode.h"
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace expression {
+
+IMPLEMENT_RESULTNODE(StringBucketResultNode, BucketResultNode);
+
+StringBucketResultNode StringBucketResultNode::_nullResult;
+
+size_t
+StringBucketResultNode::hash() const
+{
+#if 0
+ union {
+ uint8_t cxor[8];
+ uint64_t ixor;
+ } xorResult;
+ xorResult.ixor = 0;
+ size_t i(0);
+ const size_t m(_from.size());
+ const char * c = _from.c_str();
+ const uint64_t * ic = reinterpret_cast<const uint64_t *>(c);
+ for (; i+8 < m; i+=8) {
+ const size_t index(i/8);
+ xorResult.ixor ^= ic[index];
+ }
+ for (; i < m; i++) {
+ xorResult.cxor[i%8] ^= c[i];
+ }
+ return xorResult.ixor;
+#else
+ return 0;
+#endif
+}
+
+int
+StringBucketResultNode::onCmp(const Identifiable & rhs) const
+{
+ const StringBucketResultNode & b = static_cast<const StringBucketResultNode &>(rhs);
+ int diff(_from->cmp(*b._from));
+ return (diff == 0) ? _to->cmp(*b._to) : diff;
+}
+
+int StringBucketResultNode::contains(const StringBucketResultNode & b) const
+{
+ int fromDiff(_from->cmp(*b._from));
+ int toDiff(_to->cmp(*b._to));
+ return (fromDiff < 0) ? std::min(0, toDiff) : std::max(0, toDiff);
+}
+
+int StringBucketResultNode::contains(const char * s) const
+{
+ StringResultNode v(s);
+ int diff(_from->cmp(v));
+ if (diff > 0) {
+ return 1;
+ } else {
+ diff = _to->cmp(v);
+ return (diff <= 0) ? -1 : 0;
+ }
+}
+
+void
+StringBucketResultNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, _fromField.getName(), _from);
+ visit(visitor, _toField.getName(), _to);
+}
+
+vespalib::Serializer &
+StringBucketResultNode::onSerialize(vespalib::Serializer & os) const
+{
+ _from.serialize(os);
+ _to.serialize(os);
+ return os;
+}
+
+vespalib::Deserializer &
+StringBucketResultNode::onDeserialize(vespalib::Deserializer & is)
+{
+ _from.deserialize(is);
+ _to.deserialize(is);
+ return is;
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_stringbucketresultnode() {}
diff --git a/searchlib/src/vespa/searchlib/expression/stringbucketresultnode.h b/searchlib/src/vespa/searchlib/expression/stringbucketresultnode.h
new file mode 100644
index 00000000000..df096b7350c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/stringbucketresultnode.h
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "bucketresultnode.h"
+#include "stringresultnode.h"
+
+namespace search {
+namespace expression {
+
+class StringBucketResultNode : public BucketResultNode
+{
+private:
+ ResultNode::CP _from;
+ ResultNode::CP _to;
+ static StringBucketResultNode _nullResult;
+ virtual size_t onGetRawByteSize() const { return sizeof(_from) + sizeof(_to); }
+public:
+ struct GetValue {
+ BufferRef _tmp;
+ ConstBufferRef operator () (const ResultNode & r) { return r.getString(_tmp); }
+ };
+
+ DECLARE_EXPRESSIONNODE(StringBucketResultNode);
+ DECLARE_NBO_SERIALIZE;
+ StringBucketResultNode() : _from(new StringResultNode()), _to(new StringResultNode()) {}
+ StringBucketResultNode(const vespalib::stringref & from, const vespalib::stringref & to) : _from(new StringResultNode(from)), _to(new StringResultNode(to)) {}
+ StringBucketResultNode(ResultNode::UP from, ResultNode::UP to) : _from(from.release()), _to(to.release()) {}
+ virtual size_t hash() const;
+ virtual int onCmp(const Identifiable & b) const;
+ int contains(const StringBucketResultNode & b) const;
+ int contains(const ConstBufferRef & v) const { return contains(v.c_str()); }
+ int contains(const char * v) const;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ StringBucketResultNode &setRange(const vespalib::stringref & from, const vespalib::stringref & to) {
+ _from.reset(new StringResultNode(from));
+ _to.reset(new StringResultNode(to));
+ return *this;
+ }
+ static const StringBucketResultNode & getNull() { return _nullResult; }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/stringresultnode.h b/searchlib/src/vespa/searchlib/expression/stringresultnode.h
new file mode 100644
index 00000000000..e2530f79fb9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/stringresultnode.h
@@ -0,0 +1,52 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/singleresultnode.h>
+
+namespace search {
+namespace expression {
+
+class StringResultNode : public SingleResultNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(StringResultNode);
+ DECLARE_NBO_SERIALIZE;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ StringResultNode(const char * v="") : _value(v) { }
+ StringResultNode(const vespalib::stringref & v) : _value(v) { }
+ virtual size_t hash() const;
+ virtual int onCmp(const Identifiable & b) const;
+ virtual void set(const ResultNode & rhs);
+ StringResultNode & append(const ResultNode & rhs);
+ StringResultNode & clear() { _value.clear(); return *this; }
+ const vespalib::string & get() const { return _value; }
+ void set(const vespalib::stringref & value) { _value = value; }
+ virtual void min(const ResultNode & b);
+ virtual void max(const ResultNode & b);
+ virtual void add(const ResultNode & b);
+ virtual void negate();
+
+private:
+ virtual int cmpMem(const void * a, const void *b) const {
+ return static_cast<const vespalib::string *>(a)->compare(*static_cast<const vespalib::string *>(b));
+ }
+ virtual void create(void * buf) const { new (buf) vespalib::string(); }
+ virtual void destroy(void * buf) const { static_cast<vespalib::string *>(buf)->vespalib::string::~string(); }
+
+ virtual void decode(const void * buf) { _value = *static_cast<const vespalib::string *>(buf); }
+ virtual void encode(void * buf) const { *static_cast<vespalib::string *>(buf) = _value; }
+ virtual void swap(void * buf) { std::swap(*static_cast<vespalib::string *>(buf), _value); }
+ virtual size_t hash(const void * buf) const;
+
+ virtual size_t onGetRawByteSize() const { return sizeof(_value); }
+ virtual void setMin();
+ virtual void setMax();
+ virtual int64_t onGetInteger(size_t index) const;
+ virtual double onGetFloat(size_t index) const;
+ virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const;
+ vespalib::string _value;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/strlenfunctionnode.h b/searchlib/src/vespa/searchlib/expression/strlenfunctionnode.h
new file mode 100644
index 00000000000..294b69a8172
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/strlenfunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class StrLenFunctionNode : public UnaryFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(StrLenFunctionNode);
+ StrLenFunctionNode() { }
+ StrLenFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { }
+private:
+ virtual bool onExecute() const;
+ virtual void onPrepareResult();
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/timestamp.cpp b/searchlib/src/vespa/searchlib/expression/timestamp.cpp
new file mode 100644
index 00000000000..6191af0f546
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/timestamp.cpp
@@ -0,0 +1,108 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/timestamp.h>
+
+namespace search {
+namespace expression {
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+IMPLEMENT_EXPRESSIONNODE(TimeStampFunctionNode, UnaryFunctionNode);
+
+TimeStampFunctionNode::TimeStampFunctionNode(const TimeStampFunctionNode & rhs) :
+ UnaryFunctionNode(rhs),
+ _timePart(rhs._timePart),
+ _isGmt(rhs._isGmt),
+ _handler()
+{
+}
+
+TimeStampFunctionNode & TimeStampFunctionNode::operator = (const TimeStampFunctionNode & rhs)
+{
+ if (this != &rhs) {
+ UnaryFunctionNode::operator =(rhs);
+ _timePart = rhs._timePart;
+ _isGmt = rhs._isGmt;
+ _handler.reset();
+ }
+ return *this;
+}
+
+void TimeStampFunctionNode::onPrepareResult()
+{
+ if (getArg().getResult().inherits(ResultNodeVector::classId)) {
+ setResultType(std::unique_ptr<ResultNode>(new IntegerResultNodeVector));
+ _handler.reset(new MultiValueHandler(*this));
+ } else {
+ setResultType(std::unique_ptr<ResultNode>(new Int64ResultNode));
+ _handler.reset(new SingleValueHandler(*this));
+ }
+}
+
+unsigned TimeStampFunctionNode::getTimePart(time_t secSince70, TimePart tp, bool gmt)
+{
+ tm ts;
+ if (gmt) {
+ gmtime_r(&secSince70, &ts);
+ } else {
+ localtime_r(&secSince70, &ts);
+ }
+ switch (tp) {
+ case Year: return ts.tm_year + 1900;
+ case Month: return ts.tm_mon + 1;
+ case MonthDay:return ts.tm_mday;
+ case WeekDay: return ts.tm_wday;
+ case Hour: return ts.tm_hour;
+ case Minute: return ts.tm_min;
+ case Second: return ts.tm_sec;
+ case YearDay: return ts.tm_yday;
+ case IsDST: return ts.tm_isdst;
+ }
+ return 0;
+}
+
+bool TimeStampFunctionNode::onExecute() const
+{
+ getArg().execute();
+ _handler->handle(getArg().getResult());
+ return true;
+}
+
+void TimeStampFunctionNode::SingleValueHandler::handle(const ResultNode & arg)
+{
+ handleOne(arg, _result);
+}
+
+void TimeStampFunctionNode::MultiValueHandler::handle(const ResultNode & arg)
+{
+ const ResultNodeVector & v(static_cast<const ResultNodeVector &>(arg));
+ _result.getVector().resize(v.size());
+ for(size_t i(0), m(_result.getVector().size()); i < m; i++) {
+ handleOne(v.get(i), _result.getVector()[i]);
+ }
+}
+
+Serializer & TimeStampFunctionNode::onSerialize(Serializer & os) const
+{
+ UnaryFunctionNode::onSerialize(os);
+ uint8_t code(getTimePart() | (isGmt() ? 0x80 : 0x00));
+ return os << code;
+}
+
+Deserializer & TimeStampFunctionNode::onDeserialize(Deserializer & is)
+{
+ UnaryFunctionNode::onDeserialize(is);
+ uint8_t code(0);
+ is >> code;
+ _isGmt = code & 0x80;
+ _timePart = static_cast<TimePart>(code & 0x7f);
+ return is;
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_timestamp() {}
diff --git a/searchlib/src/vespa/searchlib/expression/timestamp.h b/searchlib/src/vespa/searchlib/expression/timestamp.h
new file mode 100644
index 00000000000..16f92e233a4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/timestamp.h
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+#include <vespa/searchlib/expression/integerresultnode.h>
+#include <vespa/searchlib/expression/resultvector.h>
+
+namespace search {
+namespace expression {
+
+class TimeStampFunctionNode : public UnaryFunctionNode
+{
+public:
+ enum TimePart { Year=0, Month=1, MonthDay=2, WeekDay=3, Hour=4, Minute=5, Second=6, YearDay=7, IsDST=8 };
+ DECLARE_EXPRESSIONNODE(TimeStampFunctionNode);
+ DECLARE_NBO_SERIALIZE;
+ TimeStampFunctionNode() : _timePart(Year), _isGmt(true) { }
+ TimeStampFunctionNode(const ExpressionNode::CP & arg, TimePart timePart, bool gmt=true) : UnaryFunctionNode(arg), _timePart(timePart), _isGmt(gmt) { }
+ TimeStampFunctionNode(const TimeStampFunctionNode & rhs);
+ TimeStampFunctionNode & operator = (const TimeStampFunctionNode & rhs);
+ unsigned int getTime() const { return getResult().getInteger(); } // Not valid until after node has been prepared
+ TimePart getTimePart() const { return _timePart; }
+ TimeStampFunctionNode & setTimePart(TimePart timePart) { _timePart = timePart; return *this; }
+ bool isGmt() const { return _isGmt; }
+ bool isLocal() const { return ! isGmt(); }
+protected:
+/*
+unsigned year(timestamp); [1970 - 2039]
+unsigned month(timestamp); [1-12]
+unsigned date(timestamp); [1-31]
+unsigned weekday(timestamp); [1-7]
+unsigned hour(timestamp); [0-23]
+unsigned minute(timestamp);[0-59]
+unsigned second(timestamp);[0-59]
+*/
+ virtual bool onExecute() const;
+ virtual void onPrepareResult();
+private:
+ class Handler {
+ public:
+ Handler(const TimeStampFunctionNode & ts) : _timePart(ts.getTimePart()), _isGmt(ts.isGmt()) { }
+ virtual ~Handler() { }
+ virtual void handle(const ResultNode & arg) = 0;
+ protected:
+ void handleOne(const ResultNode & arg, Int64ResultNode & result) const {
+ result.set(TimeStampFunctionNode::getTimePart(arg.getInteger(), _timePart, _isGmt));
+ }
+ private:
+ TimePart _timePart;
+ bool _isGmt;
+ };
+ class SingleValueHandler : public Handler {
+ public:
+ SingleValueHandler(TimeStampFunctionNode & ts) : Handler(ts), _result(static_cast<Int64ResultNode &>(ts.updateResult())) { }
+ virtual void handle(const ResultNode & arg);
+ private:
+ Int64ResultNode & _result;
+ };
+ class MultiValueHandler : public Handler {
+ public:
+ MultiValueHandler(TimeStampFunctionNode & ts) : Handler(ts), _result(static_cast<IntegerResultNodeVector &>(ts.updateResult())) { }
+ virtual void handle(const ResultNode & arg);
+ private:
+ IntegerResultNodeVector & _result;
+ };
+
+ const ResultNode & getTimeStamp() const { return getArg().getResult(); }
+ void init();
+ Int64ResultNode & updateIntegerResult() const { return static_cast<Int64ResultNode &>(updateResult()); }
+ static unsigned getTimePart(time_t time, TimePart, bool gmt);
+ TimePart _timePart;
+ bool _isGmt;
+ std::unique_ptr<Handler> _handler;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/tofloatfunctionnode.h b/searchlib/src/vespa/searchlib/expression/tofloatfunctionnode.h
new file mode 100644
index 00000000000..48d93ee282c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/tofloatfunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class ToFloatFunctionNode : public UnaryFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(ToFloatFunctionNode);
+ ToFloatFunctionNode() { }
+ ToFloatFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { }
+private:
+ virtual bool onExecute() const;
+ virtual void onPrepareResult();
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/tointfunctionnode.h b/searchlib/src/vespa/searchlib/expression/tointfunctionnode.h
new file mode 100644
index 00000000000..420d6707215
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/tointfunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class ToIntFunctionNode : public UnaryFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(ToIntFunctionNode);
+ ToIntFunctionNode() { }
+ ToIntFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { }
+private:
+ virtual bool onExecute() const;
+ virtual void onPrepareResult();
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/torawfunctionnode.h b/searchlib/src/vespa/searchlib/expression/torawfunctionnode.h
new file mode 100644
index 00000000000..4c80bbab7d4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/torawfunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class ToRawFunctionNode : public UnaryFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(ToRawFunctionNode);
+ ToRawFunctionNode() { }
+ ToRawFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { }
+private:
+ virtual bool onExecute() const;
+ virtual void onPrepareResult();
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/tostringfunctionnode.h b/searchlib/src/vespa/searchlib/expression/tostringfunctionnode.h
new file mode 100644
index 00000000000..fd6d80d850e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/tostringfunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class ToStringFunctionNode : public UnaryFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(ToStringFunctionNode);
+ ToStringFunctionNode() { }
+ ToStringFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { }
+private:
+ virtual bool onExecute() const;
+ virtual void onPrepareResult();
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/ucafunctionnode.cpp b/searchlib/src/vespa/searchlib/expression/ucafunctionnode.cpp
new file mode 100644
index 00000000000..2cd4df49c5b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/ucafunctionnode.cpp
@@ -0,0 +1,115 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/ucafunctionnode.h>
+#include <vespa/searchlib/common/converters.h>
+
+namespace search {
+namespace expression {
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+IMPLEMENT_EXPRESSIONNODE(UcaFunctionNode, UnaryFunctionNode);
+
+UcaFunctionNode::UcaFunctionNode()
+{
+}
+
+UcaFunctionNode::~UcaFunctionNode()
+{
+}
+
+UcaFunctionNode::UcaFunctionNode(const ExpressionNode::CP & arg, const vespalib::string & locale, const vespalib::string & strength) :
+ UnaryFunctionNode(arg),
+ _locale(locale),
+ _strength(strength),
+ _collator(new common::UcaConverter(locale, strength))
+{
+}
+
+UcaFunctionNode::UcaFunctionNode(const UcaFunctionNode & rhs) :
+ UnaryFunctionNode(rhs),
+ _locale(rhs._locale),
+ _strength(rhs._strength),
+ _collator(rhs._collator),
+ _handler()
+{
+}
+
+UcaFunctionNode & UcaFunctionNode::operator = (const UcaFunctionNode & rhs)
+{
+ if (this != &rhs) {
+ UnaryFunctionNode::operator =(rhs);
+ _locale = rhs._locale;
+ _strength = rhs._strength;
+ _collator = rhs._collator;
+ _handler.reset();
+ }
+ return *this;
+}
+
+void UcaFunctionNode::onPrepareResult()
+{
+ if (getArg().getResult().inherits(ResultNodeVector::classId)) {
+ setResultType(std::unique_ptr<ResultNode>(new RawResultNodeVector));
+ _handler.reset(new MultiValueHandler(*this));
+ } else {
+ setResultType(std::unique_ptr<ResultNode>(new RawResultNode));
+ _handler.reset(new SingleValueHandler(*this));
+ }
+}
+
+UcaFunctionNode::Handler::Handler(const UcaFunctionNode & uca) :
+ _converter(*uca._collator),
+ _backingBuffer(),
+ _buffer(_backingBuffer, sizeof(_backingBuffer))
+{
+}
+
+void UcaFunctionNode::Handler::handleOne(const ResultNode & arg, RawResultNode & result) const
+{
+ vespalib::ConstBufferRef buf = _converter.convert(arg.getString(_buffer));
+ result.set(RawResultNode(buf.c_str(), buf.size()));
+}
+
+bool UcaFunctionNode::onExecute() const
+{
+ getArg().execute();
+ _handler->handle(getArg().getResult());
+ return true;
+}
+
+void UcaFunctionNode::SingleValueHandler::handle(const ResultNode & arg)
+{
+ handleOne(arg, _result);
+}
+
+void UcaFunctionNode::MultiValueHandler::handle(const ResultNode & arg)
+{
+ const ResultNodeVector & v(static_cast<const ResultNodeVector &>(arg));
+ _result.getVector().resize(v.size());
+ for(size_t i(0), m(_result.getVector().size()); i < m; i++) {
+ handleOne(v.get(i), _result.getVector()[i]);
+ }
+}
+
+Serializer & UcaFunctionNode::onSerialize(Serializer & os) const
+{
+ UnaryFunctionNode::onSerialize(os);
+ return os << _locale << _strength;
+}
+
+Deserializer & UcaFunctionNode::onDeserialize(Deserializer & is)
+{
+ UnaryFunctionNode::onDeserialize(is);
+ is >> _locale >> _strength;
+ _collator.reset(new common::UcaConverter(_locale, _strength));
+ return is;
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_ucafunctionnode() {}
diff --git a/searchlib/src/vespa/searchlib/expression/ucafunctionnode.h b/searchlib/src/vespa/searchlib/expression/ucafunctionnode.h
new file mode 100644
index 00000000000..78242d9cbd1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/ucafunctionnode.h
@@ -0,0 +1,60 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+#include <vespa/searchlib/common/sortspec.h>
+#include <vespa/searchlib/expression/stringresultnode.h>
+#include <vespa/searchlib/expression/resultvector.h>
+
+
+namespace search {
+namespace expression {
+
+class UcaFunctionNode : public UnaryFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(UcaFunctionNode);
+ DECLARE_NBO_SERIALIZE;
+ UcaFunctionNode();
+ ~UcaFunctionNode();
+ UcaFunctionNode(const ExpressionNode::CP & arg, const vespalib::string & locale, const vespalib::string & strength);
+ UcaFunctionNode(const UcaFunctionNode & rhs);
+ UcaFunctionNode & operator = (const UcaFunctionNode & rhs);
+private:
+ virtual bool onExecute() const;
+ virtual void onPrepareResult();
+ class Handler {
+ public:
+ Handler(const UcaFunctionNode & uca);
+ virtual ~Handler() { }
+ virtual void handle(const ResultNode & arg) = 0;
+ protected:
+ void handleOne(const ResultNode & arg, RawResultNode & result) const;
+ private:
+ const common::BlobConverter & _converter;
+ char _backingBuffer[32];
+ vespalib::BufferRef _buffer;
+ };
+ class SingleValueHandler : public Handler {
+ public:
+ SingleValueHandler(UcaFunctionNode & uca) : Handler(uca), _result(static_cast<RawResultNode &>(uca.updateResult())) { }
+ virtual void handle(const ResultNode & arg);
+ private:
+ RawResultNode & _result;
+ };
+ class MultiValueHandler : public Handler {
+ public:
+ MultiValueHandler(UcaFunctionNode & uca) : Handler(uca), _result(static_cast<RawResultNodeVector &>(uca.updateResult())) { }
+ virtual void handle(const ResultNode & arg);
+ private:
+ RawResultNodeVector & _result;
+ };
+ vespalib::string _locale;
+ vespalib::string _strength;
+ common::BlobConverter::LP _collator;
+ std::unique_ptr<Handler> _handler;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/unarybitfunctionnode.h b/searchlib/src/vespa/searchlib/expression/unarybitfunctionnode.h
new file mode 100644
index 00000000000..fa0184dc3e5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/unarybitfunctionnode.h
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class UnaryBitFunctionNode : public UnaryFunctionNode
+{
+public:
+ DECLARE_NBO_SERIALIZE;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ DECLARE_ABSTRACT_EXPRESSIONNODE(UnaryBitFunctionNode);
+ UnaryBitFunctionNode() : _numBits(0) { }
+ UnaryBitFunctionNode(const ExpressionNode::CP & arg, unsigned numBits) : UnaryFunctionNode(arg), _numBits(numBits) { }
+protected:
+ size_t getNumBits() const { return _numBits; }
+ size_t getNumBytes() const { return (_numBits+7)/8; }
+ virtual void onPrepareResult();
+private:
+ virtual void onPrepare(bool preserveAccurateTypes);
+ virtual bool internalExecute(const vespalib::nbostream & os) const = 0;
+ virtual bool onExecute() const;
+ uint32_t _numBits;
+ mutable vespalib::nbostream _tmpOs;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/unaryfunctionnode.h b/searchlib/src/vespa/searchlib/expression/unaryfunctionnode.h
new file mode 100644
index 00000000000..366e7d9191f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/unaryfunctionnode.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/multiargfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class UnaryFunctionNode : public MultiArgFunctionNode
+{
+public:
+ DECLARE_ABSTRACT_EXPRESSIONNODE(UnaryFunctionNode);
+ UnaryFunctionNode() { }
+ UnaryFunctionNode(const ExpressionNode::CP & arg) :
+ MultiArgFunctionNode()
+ {
+ appendArg(arg);
+ }
+protected:
+ const ExpressionNode & getArg() const { return MultiArgFunctionNode::getArg(0); }
+private:
+ virtual void onPrepareResult();
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/xorbitfunctionnode.h b/searchlib/src/vespa/searchlib/expression/xorbitfunctionnode.h
new file mode 100644
index 00000000000..b8d00b6ebdb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/xorbitfunctionnode.h
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unarybitfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class XorBitFunctionNode : public UnaryBitFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(XorBitFunctionNode);
+ XorBitFunctionNode() { }
+ XorBitFunctionNode(const ExpressionNode::CP & arg, unsigned numBits);
+private:
+ mutable std::vector<uint8_t> _tmpXor;
+ virtual bool internalExecute(const vespalib::nbostream & os) const;
+ virtual void onPrepareResult();
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/xorfunctionnode.h b/searchlib/src/vespa/searchlib/expression/xorfunctionnode.h
new file mode 100644
index 00000000000..117c55f69c8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/xorfunctionnode.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/bitfunctionnode.h>
+
+namespace search {
+namespace expression {
+
+class XorFunctionNode : public BitFunctionNode
+{
+public:
+ DECLARE_EXPRESSIONNODE(XorFunctionNode);
+ XorFunctionNode() { }
+private:
+ virtual ResultNode::CP getInitialValue() const { return ResultNode::CP(new Int64ResultNode(0)); }
+ virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const { return v.flattenXor(result); }
+ virtual void onArgument(const ResultNode & arg, Int64ResultNode & result) const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/expression/zcurve.cpp b/searchlib/src/vespa/searchlib/expression/zcurve.cpp
new file mode 100644
index 00000000000..dd22ca7c8fe
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/zcurve.cpp
@@ -0,0 +1,91 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/expression/zcurve.h>
+#include <vespa/vespalib/geo/zcurve.h>
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+namespace search {
+namespace expression {
+
+IMPLEMENT_EXPRESSIONNODE(ZCurveFunctionNode, UnaryFunctionNode);
+
+ZCurveFunctionNode::ZCurveFunctionNode(const ZCurveFunctionNode & rhs) :
+ UnaryFunctionNode(rhs),
+ _dim(rhs._dim),
+ _handler()
+{
+}
+
+ZCurveFunctionNode & ZCurveFunctionNode::operator = (const ZCurveFunctionNode & rhs)
+{
+ if (this != &rhs) {
+ UnaryFunctionNode::operator =(rhs);
+ _dim = rhs._dim;
+ _handler.reset();
+ }
+ return *this;
+}
+
+void ZCurveFunctionNode::onPrepareResult()
+{
+ if (getArg().getResult().inherits(ResultNodeVector::classId)) {
+ setResultType(std::unique_ptr<ResultNode>(new IntegerResultNodeVector));
+ _handler.reset(new MultiValueHandler(*this));
+ } else {
+ setResultType(std::unique_ptr<ResultNode>(new Int64ResultNode));
+ _handler.reset(new SingleValueHandler(*this));
+ }
+}
+
+int32_t ZCurveFunctionNode::Handler::getXorY(uint64_t z) const
+{
+ int32_t x, y;
+ vespalib::geo::ZCurve::decode(z, &x, &y);
+ return (_dim==X) ? x : y;
+}
+
+bool ZCurveFunctionNode::onExecute() const
+{
+ getArg().execute();
+ _handler->handle(getArg().getResult());
+ return true;
+}
+
+void ZCurveFunctionNode::SingleValueHandler::handle(const ResultNode & arg)
+{
+ handleOne(arg, _result);
+}
+
+void ZCurveFunctionNode::MultiValueHandler::handle(const ResultNode & arg)
+{
+ const ResultNodeVector & v(static_cast<const ResultNodeVector &>(arg));
+ _result.getVector().resize(v.size());
+ for(size_t i(0), m(_result.getVector().size()); i < m; i++) {
+ handleOne(v.get(i), _result.getVector()[i]);
+ }
+}
+
+Serializer & ZCurveFunctionNode::onSerialize(Serializer & os) const
+{
+ UnaryFunctionNode::onSerialize(os);
+ uint8_t code(_dim);
+ return os << code;
+}
+
+Deserializer & ZCurveFunctionNode::onDeserialize(Deserializer & is)
+{
+ UnaryFunctionNode::onDeserialize(is);
+ uint8_t code(0);
+ is >> code;
+ _dim = static_cast<Dimension>(code);
+ return is;
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_expression_zcurve() {}
diff --git a/searchlib/src/vespa/searchlib/expression/zcurve.h b/searchlib/src/vespa/searchlib/expression/zcurve.h
new file mode 100644
index 00000000000..88d2a7938a7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/expression/zcurve.h
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/expression/unaryfunctionnode.h>
+#include <vespa/searchlib/expression/integerresultnode.h>
+#include <vespa/searchlib/expression/resultvector.h>
+
+namespace search {
+namespace expression {
+
+class ZCurveFunctionNode : public UnaryFunctionNode
+{
+public:
+ enum Dimension {X=0, Y=1};
+ DECLARE_EXPRESSIONNODE(ZCurveFunctionNode);
+ DECLARE_NBO_SERIALIZE;
+ ZCurveFunctionNode() : _dim(X) { }
+ ZCurveFunctionNode(const ExpressionNode::CP & arg, Dimension dim) : UnaryFunctionNode(arg), _dim(dim) { }
+ ZCurveFunctionNode(const ZCurveFunctionNode & rhs);
+ ZCurveFunctionNode & operator = (const ZCurveFunctionNode & rhs);
+ Dimension getDim() const { return _dim; }
+private:
+ class Handler {
+ public:
+ Handler(Dimension dim) : _dim(dim) { }
+ virtual ~Handler() { }
+ virtual void handle(const ResultNode & arg) = 0;
+ protected:
+ void handleOne(const ResultNode & arg, Int64ResultNode & result) const {
+ result.set(getXorY(arg.getInteger()));
+ }
+ private:
+ int32_t getXorY(uint64_t z) const;
+ Dimension _dim;
+ };
+ class SingleValueHandler : public Handler {
+ public:
+ SingleValueHandler(ZCurveFunctionNode & ts) : Handler(ts.getDim()), _result(static_cast<Int64ResultNode &>(ts.updateResult())) { }
+ virtual void handle(const ResultNode & arg);
+ private:
+ Int64ResultNode & _result;
+ };
+ class MultiValueHandler : public Handler {
+ public:
+ MultiValueHandler(ZCurveFunctionNode & ts) : Handler(ts.getDim()), _result(static_cast<IntegerResultNodeVector &>(ts.updateResult())) { }
+ virtual void handle(const ResultNode & arg);
+ private:
+ IntegerResultNodeVector & _result;
+ };
+
+ virtual bool onExecute() const;
+ virtual void onPrepareResult();
+ Dimension _dim;
+ std::unique_ptr<Handler> _handler;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/features/.gitignore b/searchlib/src/vespa/searchlib/features/.gitignore
new file mode 100644
index 00000000000..583460ae288
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/.gitignore
@@ -0,0 +1,3 @@
+*.So
+.depend
+Makefile
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
new file mode 100644
index 00000000000..ec21aa87fae
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
@@ -0,0 +1,64 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_features
+ SOURCES
+ agefeature.cpp
+ array_parser.cpp
+ attributefeature.cpp
+ attributematchfeature.cpp
+ closenessfeature.cpp
+ debug_attribute_wait.cpp
+ debug_wait.cpp
+ distancefeature.cpp
+ distancetopathfeature.cpp
+ dotproductfeature.cpp
+ element_completeness_feature.cpp
+ element_similarity_feature.cpp
+ euclidean_distance_feature.cpp
+ fieldinfofeature.cpp
+ fieldlengthfeature.cpp
+ fieldmatchfeature.cpp
+ fieldtermmatchfeature.cpp
+ firstphasefeature.cpp
+ flow_completeness_feature.cpp
+ foreachfeature.cpp
+ freshnessfeature.cpp
+ item_raw_score_feature.cpp
+ jarowinklerdistancefeature.cpp
+ matchesfeature.cpp
+ matchfeature.cpp
+ native_dot_product_feature.cpp
+ nativeattributematchfeature.cpp
+ nativefieldmatchfeature.cpp
+ nativeproximityfeature.cpp
+ nativerankfeature.cpp
+ nowfeature.cpp
+ proximityfeature.cpp
+ querycompletenessfeature.cpp
+ queryfeature.cpp
+ queryterm.cpp
+ querytermcountfeature.cpp
+ randomfeature.cpp
+ rankingexpressionfeature.cpp
+ raw_score_feature.cpp
+ reverseproximityfeature.cpp
+ setup.cpp
+ subqueries_feature.cpp
+ tensor_factory_blueprint.cpp
+ tensor_from_labels_feature.cpp
+ tensor_from_tensor_attribute_executor.cpp
+ tensor_from_weighted_set_feature.cpp
+ term_field_md_feature.cpp
+ termdistancecalculator.cpp
+ termdistancefeature.cpp
+ termeditdistancefeature.cpp
+ termfeature.cpp
+ terminfofeature.cpp
+ text_similarity_feature.cpp
+ utils.cpp
+ valuefeature.cpp
+ weighted_set_parser.cpp
+ $<TARGET_OBJECTS:searchlib_fieldmatch>
+ $<TARGET_OBJECTS:searchlib_rankingexpression>
+ INSTALL lib64
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/features/OWNERS b/searchlib/src/vespa/searchlib/features/OWNERS
new file mode 100644
index 00000000000..12b533ec610
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/OWNERS
@@ -0,0 +1 @@
+havardpe
diff --git a/searchlib/src/vespa/searchlib/features/agefeature.cpp b/searchlib/src/vespa/searchlib/features/agefeature.cpp
new file mode 100644
index 00000000000..0b8c652b5c7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/agefeature.cpp
@@ -0,0 +1,79 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.agefeature");
+
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include "agefeature.h"
+#include "valuefeature.h"
+
+using search::attribute::IAttributeVector;
+
+namespace search {
+
+typedef fef::FeatureNameBuilder FNB;
+
+namespace features {
+
+AgeExecutor::AgeExecutor(const IAttributeVector *attribute) :
+ search::fef::FeatureExecutor(),
+ _attribute(attribute),
+ _buf()
+{
+ if (_attribute != NULL) {
+ _buf.allocate(attribute->getMaxValueCount());
+ }
+}
+
+void
+AgeExecutor::execute(search::fef::MatchData &data)
+{
+ feature_t age = 10000000000.0;
+ if (_attribute != NULL) {
+ _buf.fill(*_attribute, data.getDocId());
+ int64_t docTime = _buf[0];
+ feature_t currTime = *data.resolveFeature(inputs()[0]);
+ age = currTime - docTime;
+ if (age < 0) {
+ age = 0;
+ }
+ }
+ *data.resolveFeature(outputs()[0]) = age;
+}
+
+void
+AgeBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const
+{
+ // empty
+}
+
+bool
+AgeBlueprint::setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params)
+{
+ _attribute = params[0].getValue();
+ defineInput("now");
+
+ describeOutput("out", "The age of the document, in seconds.");
+ env.hintAttributeAccess(_attribute);
+ return true;
+}
+
+search::fef::Blueprint::UP
+AgeBlueprint::createInstance() const
+{
+ return search::fef::Blueprint::UP(new AgeBlueprint());
+}
+
+search::fef::FeatureExecutor::LP
+AgeBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ // Get docdate attribute vector
+ const IAttributeVector * attribute = env.getAttributeContext().getAttribute(_attribute);
+ return search::fef::FeatureExecutor::LP(new AgeExecutor(attribute));
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/features/agefeature.h b/searchlib/src/vespa/searchlib/features/agefeature.h
new file mode 100644
index 00000000000..795d04798f8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/agefeature.h
@@ -0,0 +1,64 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for the documentage feature outputting the
+ * difference between document time (stored in an attribute) and current
+ * system time
+ **/
+class AgeExecutor : public search::fef::FeatureExecutor {
+private:
+ const search::attribute::IAttributeVector *_attribute;
+ search::attribute::IntegerContent _buf;
+
+public:
+ /**
+ * Constructs a new executor.
+ **/
+ AgeExecutor(const search::attribute::IAttributeVector *attribute);
+ virtual void execute(search::fef::MatchData & data);
+};
+
+/**
+ * Implements the blueprint for 'documentage' feature. It uses the 'now' feature
+ * to get current time and reads document time from a specified attribute
+ */
+class AgeBlueprint : public search::fef::Blueprint {
+private:
+ vespalib::string _attribute;
+
+public:
+ AgeBlueprint() : search::fef::Blueprint("age") { }
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().attribute(search::fef::ParameterCollection::ANY);
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/features/array_parser.cpp b/searchlib/src/vespa/searchlib/features/array_parser.cpp
new file mode 100644
index 00000000000..0faac986033
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/array_parser.cpp
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.array_parser");
+
+#include "array_parser.h"
+
+namespace search {
+namespace features {
+
+void
+ArrayParser::logWarning(const vespalib::string &msg)
+{
+ LOG(warning, "%s", msg.c_str());
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/array_parser.h b/searchlib/src/vespa/searchlib/features/array_parser.h
new file mode 100644
index 00000000000..07b9d09b277
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/array_parser.h
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Utility for parsing a string representation of an array with values (numeric or string)
+ * that is typically passed down with the query.
+ *
+ * The format of the array is as follows:
+ * 1) Dense form: [value0 value1 ... valueN] (where value0 has index 0)
+ *
+ * 2) Sparse form: {idxA:valueA,idxB:valueB,...,idxN:valueN}.
+ * In the sparse form all non-specified indexes get the value 0.0 and
+ * has values for indexes in the range [0,max index specified].
+ * The parsed array is sorted in index order.
+ */
+class ArrayParser
+{
+private:
+ static void logWarning(const vespalib::string &msg);
+
+public:
+ template <typename T>
+ class ValueAndIndex {
+ public:
+ typedef T ValueType;
+ ValueAndIndex(T value, uint32_t index) : _value(value), _index(index) { }
+ T getValue() const { return _value; }
+ uint32_t getIndex() const { return _index; }
+ bool operator < (const ValueAndIndex & b) const { return _index < b._index; }
+ private:
+ T _value;
+ uint32_t _index;
+ };
+
+ template <typename OutputType>
+ static void parse(const vespalib::string &input, OutputType &output);
+
+ template <typename OutputType>
+ static void parsePartial(const vespalib::string &input, OutputType &output);
+};
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/array_parser.hpp b/searchlib/src/vespa/searchlib/features/array_parser.hpp
new file mode 100644
index 00000000000..cfa161798ab
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/array_parser.hpp
@@ -0,0 +1,96 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "array_parser.h"
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vector>
+
+namespace search {
+namespace features {
+
+template <typename OutputType>
+void
+ArrayParser::parse(const vespalib::string &input, OutputType &output)
+{
+ typedef std::vector<ValueAndIndex<typename OutputType::value_type>> SparseVector;
+ SparseVector sparse;
+ parsePartial(input, sparse);
+ std::sort(sparse.begin(), sparse.end());
+ if ( ! sparse.empty() ) {
+ output.resize(sparse.back().getIndex()+1);
+ for (const typename SparseVector::value_type &elem : sparse) {
+ output[elem.getIndex()] = elem.getValue();
+ }
+ }
+}
+
+template <typename OutputType>
+void
+ArrayParser::parsePartial(const vespalib::string &input, OutputType &output)
+{
+ size_t len = input.size();
+ if (len >= 2) {
+ vespalib::stringref s(input.c_str()+1, len - 2);
+ typedef typename OutputType::value_type ValueAndIndexType;
+ typename ValueAndIndexType::ValueType value;
+ if ((input[0] == '{' && input[len - 1] == '}') ||
+ (input[0] == '(' && input[len - 1] == ')') ) {
+ size_t key;
+ char colon;
+ while ( ! s.empty() ) {
+ vespalib::string::size_type commaPos(s.find(','));
+ vespalib::stringref item(s.substr(0, commaPos));
+ vespalib::asciistream is(item);
+ try {
+ is >> key >> colon >> value;
+ if ((colon == ':') && is.eof()) {
+ output.push_back(ValueAndIndexType(value, key));
+ } else {
+ logWarning(vespalib::make_string(
+ "Could not parse item '%s' in query vector '%s', skipping. "
+ "Expected ':' between dimension and component.",
+ item.c_str(), input.c_str()));
+ return;
+ }
+ } catch (vespalib::IllegalArgumentException & e) {
+ logWarning(vespalib::make_string(
+ "Could not parse item '%s' in query vector '%s', skipping. "
+ "Incorrect type of operands", item.c_str(), input.c_str()));
+ return;
+ }
+ if (commaPos != vespalib::string::npos) {
+ s = s.substr(commaPos+1);
+ } else {
+ s = vespalib::stringref();
+ }
+ }
+ } else if (len >= 2 && input[0] == '[' && input[len - 1] == ']') {
+ vespalib::asciistream is(s);
+ uint32_t index(0);
+ while (!is.eof()) {
+ try {
+ is >> value;
+ output.push_back(ValueAndIndexType(value, index++));
+ } catch (vespalib::IllegalArgumentException & e) {
+ logWarning(vespalib::make_string(
+ "Could not parse item[%ld] = '%s' in query vector '%s', skipping. "
+ "Incorrect type of operands", output.size(), is.c_str(), s.c_str()));
+ return;
+ }
+ }
+ }
+ } else {
+ logWarning(vespalib::make_string(
+ "Could not parse query vector '%s'. Expected surrounding '(' and ')' or '{' and '}'.",
+ input.c_str()));
+ }
+}
+
+template void
+ArrayParser::parse(const vespalib::string &input, std::vector<int> &);
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/attributefeature.cpp b/searchlib/src/vespa/searchlib/features/attributefeature.cpp
new file mode 100644
index 00000000000..b6eb2421ff5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/attributefeature.cpp
@@ -0,0 +1,433 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.attributefeature");
+#include "attributefeature.h"
+#include "utils.h"
+#include "valuefeature.h"
+
+#include <vespa/searchcommon/common/undefinedvalues.h>
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include <vespa/searchlib/attribute/tensorattribute.h>
+#include <vespa/searchlib/features/constant_tensor_executor.h>
+#include <vespa/searchlib/features/tensor_from_tensor_attribute_executor.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/vespalib/eval/value_type.h>
+#include <vespa/searchlib/fef/feature_type.h>
+
+using search::attribute::IAttributeVector;
+using search::attribute::CollectionType;
+using search::attribute::BasicType;
+using search::attribute::ConstCharContent;
+using search::attribute::IntegerContent;
+using search::attribute::FloatContent;
+using search::attribute::TensorAttribute;
+using search::attribute::WeightedConstCharContent;
+using search::attribute::WeightedIntegerContent;
+using search::attribute::WeightedFloatContent;
+using search::fef::FeatureExecutor;
+using search::features::util::ConstCharPtr;
+using vespalib::tensor::TensorType;
+using vespalib::eval::ValueType;
+using search::fef::FeatureType;
+
+using namespace search::fef::indexproperties;
+
+namespace {
+template <typename X, typename Y>
+bool equals(const X & lhs, const Y & rhs) {
+ return lhs == rhs;
+}
+
+template <>
+bool equals<ConstCharPtr, vespalib::stringref>(const ConstCharPtr & lhs, const vespalib::stringref & rhs) {
+ return strcmp(lhs, rhs.c_str()) == 0;
+}
+
+template <typename T>
+bool
+isUndefined(const T & value, const BasicType::Type & type)
+{
+ switch (type) {
+ case BasicType::INT8:
+ return search::attribute::isUndefined<int8_t>(static_cast<int8_t>(value));
+ case BasicType::INT16:
+ return search::attribute::isUndefined<int16_t>(static_cast<int16_t>(value));
+ case BasicType::INT32:
+ return search::attribute::isUndefined<int32_t>(static_cast<int32_t>(value));
+ case BasicType::INT64:
+ return search::attribute::isUndefined<int64_t>(static_cast<int64_t>(value));
+ case BasicType::FLOAT:
+ return search::attribute::isUndefined<float>(static_cast<float>(value));
+ case BasicType::DOUBLE:
+ return search::attribute::isUndefined<double>(static_cast<double>(value));
+ default:
+ return false;
+ }
+}
+
+template <>
+bool
+isUndefined<vespalib::stringref>(const vespalib::stringref &, const BasicType::Type &)
+{
+ return false;
+}
+
+template <typename T>
+search::feature_t
+considerUndefined(const T & value, const BasicType::Type & type)
+{
+ if (isUndefined(value, type)) {
+ return search::attribute::getUndefined<search::feature_t>();
+ }
+ return search::features::util::getAsFeature(value);
+}
+
+template <>
+search::feature_t
+considerUndefined<ConstCharPtr>(const ConstCharPtr & value, const BasicType::Type &)
+{
+ return search::features::util::getAsFeature(value);
+}
+
+
+}
+
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for fetching values from a single or array attribute vector
+ */
+template <typename T>
+class SingleAttributeExecutor : public fef::FeatureExecutor {
+private:
+ const T & _attribute;
+
+public:
+ /**
+ * Constructs an executor.
+ *
+ * @param attribute The attribute vector to use.
+ */
+ SingleAttributeExecutor(const T & attribute) : _attribute(attribute) { }
+
+ // Inherit doc from FeatureExecutor.
+ virtual void execute(search::fef::MatchData & data);
+};
+
+class CountOnlyAttributeExecutor : public fef::FeatureExecutor {
+private:
+ const attribute::IAttributeVector & _attribute;
+
+public:
+ /**
+ * Constructs an executor.
+ *
+ * @param attribute The attribute vector to use.
+ */
+ CountOnlyAttributeExecutor(const attribute::IAttributeVector & attribute) : _attribute(attribute) { }
+
+ // Inherit doc from FeatureExecutor.
+ virtual void execute(search::fef::MatchData & data);
+};
+/**
+ * Implements the executor for fetching values from a single or array attribute vector
+ */
+template <typename T>
+class AttributeExecutor : public fef::FeatureExecutor {
+private:
+ const attribute::IAttributeVector * _attribute;
+ attribute::BasicType::Type _attrType;
+ uint32_t _idx;
+ T _buffer; // used when fetching values from the attribute
+ feature_t _defaultCount;
+
+public:
+ /**
+ * Constructs an executor.
+ *
+ * @param attribute The attribute vector to use.
+ * @param idx The index used for an array attribute.
+ */
+ AttributeExecutor(const search::attribute::IAttributeVector * attribute, uint32_t idx);
+
+ // Inherit doc from FeatureExecutor.
+ virtual void execute(search::fef::MatchData & data);
+};
+
+
+/**
+ * Implements the executor for fetching weights from a weighted set attribute
+ */
+template <typename BT, typename T>
+class WeightedSetAttributeExecutor : public fef::FeatureExecutor {
+private:
+ const attribute::IAttributeVector * _attribute;
+ attribute::BasicType::Type _attrType;
+ BT _buffer; // used when fetching values and weights from the attribute
+ T _key; // the key to find a weight for
+ bool _useKey;
+
+public:
+ /**
+ * Constructs an executor.
+ *
+ * @param attribue The attribute vector to use.
+ * @param key The key to find a corresponding weight for.
+ * @param useKey Whether we should consider the key.
+ */
+ WeightedSetAttributeExecutor(const search::attribute::IAttributeVector * attribute, T key, bool useKey);
+
+ // Inherit doc from FeatureExecutor.
+ virtual void execute(search::fef::MatchData & data);
+};
+
+template <typename T>
+void
+SingleAttributeExecutor<T>::execute(search::fef::MatchData & match)
+{
+ typename T::LoadedValueType v = _attribute.getFast(match.getDocId());
+ // value
+ *match.resolveFeature(outputs()[0]) = __builtin_expect(attribute::isUndefined(v), false)
+ ? attribute::getUndefined<search::feature_t>()
+ : util::getAsFeature(v);
+ *match.resolveFeature(outputs()[1]) = 0.0f; // weight
+ *match.resolveFeature(outputs()[2]) = 0.0f; // contains
+ *match.resolveFeature(outputs()[3]) = 1.0f; // count
+}
+
+void
+CountOnlyAttributeExecutor::execute(search::fef::MatchData & match)
+{
+ *match.resolveFeature(outputs()[0]) = 0.0f; // value
+ *match.resolveFeature(outputs()[1]) = 0.0f; // weight
+ *match.resolveFeature(outputs()[2]) = 0.0f; // contains
+ *match.resolveFeature(outputs()[3]) = _attribute.getValueCount(match.getDocId()); // count
+}
+
+template <typename T>
+AttributeExecutor<T>::AttributeExecutor(const IAttributeVector * attribute, uint32_t idx) :
+ fef::FeatureExecutor(),
+ _attribute(attribute),
+ _attrType(attribute->getBasicType()),
+ _idx(idx),
+ _buffer(),
+ _defaultCount((attribute->getCollectionType() == CollectionType::ARRAY) ? 0 : 1)
+{
+ _buffer.allocate(_attribute->getMaxValueCount());
+}
+
+template <typename T>
+void
+AttributeExecutor<T>::execute(search::fef::MatchData & match)
+{
+ feature_t value = 0.0f;
+ _buffer.fill(*_attribute, match.getDocId());
+ if (_idx < _buffer.size()) {
+ value = considerUndefined(_buffer[_idx], _attrType);
+ }
+ *match.resolveFeature(outputs()[0]) = value; // value
+ *match.resolveFeature(outputs()[1]) = 0.0f; // weight
+ *match.resolveFeature(outputs()[2]) = 0.0f; // contains
+ *match.resolveFeature(outputs()[3]) = _defaultCount; // count
+}
+
+
+template <typename BT, typename T>
+WeightedSetAttributeExecutor<BT, T>::WeightedSetAttributeExecutor(const IAttributeVector * attribute, T key, bool useKey) :
+ fef::FeatureExecutor(),
+ _attribute(attribute),
+ _attrType(attribute->getBasicType()),
+ _buffer(),
+ _key(key),
+ _useKey(useKey)
+{
+}
+
+template <typename BT, typename T>
+void
+WeightedSetAttributeExecutor<BT, T>::execute(search::fef::MatchData & match)
+{
+ feature_t value = 0.0f;
+ feature_t weight = 0.0f;
+ feature_t contains = 0.0f;
+ feature_t count = 0.0f;
+ if (_useKey) {
+ _buffer.fill(*_attribute, match.getDocId());
+ for (uint32_t i = 0; i < _buffer.size(); ++i) {
+ if (equals(_buffer[i].getValue(), _key)) {
+ value = considerUndefined(_key, _attrType);
+ weight = static_cast<feature_t>(_buffer[i].getWeight());
+ contains = 1.0f;
+ break;
+ }
+ }
+ } else {
+ count = _attribute->getValueCount(match.getDocId());
+ }
+ *match.resolveFeature(outputs()[0]) = value; // value
+ *match.resolveFeature(outputs()[1]) = weight; // weight
+ *match.resolveFeature(outputs()[2]) = contains; // contains
+ *match.resolveFeature(outputs()[3]) = count; // count
+}
+
+
+AttributeBlueprint::AttributeBlueprint() :
+ search::fef::Blueprint("attribute"),
+ _attrName(),
+ _extra(),
+ _tensorType(TensorType::number())
+{
+}
+
+void
+AttributeBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const
+{
+}
+
+bool
+AttributeBlueprint::setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params)
+{
+ // params[0] = attribute name
+ // params[1] = index (array attribute) or key (weighted set attribute)
+ _attrName = params[0].getValue();
+ if (params.size() == 2) {
+ _extra = params[1].getValue();
+ }
+ vespalib::string attrType = type::Attribute::lookup(env.getProperties(), _attrName);
+ if (!attrType.empty()) {
+ _tensorType = TensorType::fromSpec(attrType);
+ }
+ FeatureType output_type = _tensorType.is_tensor()
+ ? FeatureType::object(_tensorType.as_value_type())
+ : FeatureType::number();
+ describeOutput("value", "The value of a single value attribute, "
+ "the value at the given index of an array attribute, "
+ "the given key of a weighted set attribute, or"
+ "the tensor of a tensor attribute", output_type);
+ if (!_tensorType.is_tensor()) {
+ describeOutput("weight", "The weight associated with the given key in a weighted set attribute.");
+ describeOutput("contains", "1 if the given key is present in a weighted set attribute, 0 otherwise.");
+ describeOutput("count", "Returns the number of elements in this array or weighted set attribute.");
+ }
+ env.hintAttributeAccess(_attrName);
+ return true;
+}
+
+search::fef::Blueprint::UP
+AttributeBlueprint::createInstance() const
+{
+ return search::fef::Blueprint::UP(new AttributeBlueprint());
+}
+
+#define CREATE_AND_RETURN_IF_SINGLE_NUMERIC(a, T) \
+ if (dynamic_cast<const SingleValueNumericAttribute<T> *>(a) != NULL) { \
+ return FeatureExecutor::LP(new SingleAttributeExecutor<SingleValueNumericAttribute<T>>(*static_cast<const SingleValueNumericAttribute<T> *>(a))); \
+ }
+
+namespace {
+
+search::fef::FeatureExecutor::LP
+createAttributeExecutor(const IAttributeVector *attribute, const vespalib::string &attrName, const vespalib::string &extraParam)
+{
+ if (attribute == NULL) {
+ LOG(warning, "The attribute vector '%s' was not found in the attribute manager, returning default values.",
+ attrName.c_str());
+ std::vector<feature_t> values(4, 0.0f);
+ return FeatureExecutor::LP(new ValueExecutor(values));
+ }
+ if (attribute->getCollectionType() == CollectionType::WSET) {
+ bool useKey = !extraParam.empty();
+ if (useKey) {
+ if (attribute->isStringType()) {
+ return FeatureExecutor::LP
+ (new WeightedSetAttributeExecutor<WeightedConstCharContent, vespalib::stringref>(attribute, extraParam, useKey));
+ } else if (attribute->isIntegerType()) {
+ return FeatureExecutor::LP
+ (new WeightedSetAttributeExecutor<WeightedIntegerContent, int64_t>(attribute, util::strToNum<int64_t>(extraParam), useKey));
+ } else { // FLOAT
+ return FeatureExecutor::LP
+ (new WeightedSetAttributeExecutor<WeightedFloatContent, double>(attribute, util::strToNum<double>(extraParam), useKey));
+ }
+ } else {
+ return FeatureExecutor::LP(new CountOnlyAttributeExecutor(*attribute));
+ }
+ } else { // SINGLE or ARRAY
+ if ((attribute->getCollectionType() == CollectionType::SINGLE) && (attribute->isIntegerType() || attribute->isFloatingPointType())) {
+ CREATE_AND_RETURN_IF_SINGLE_NUMERIC(attribute, FloatingPointAttributeTemplate<double>);
+ CREATE_AND_RETURN_IF_SINGLE_NUMERIC(attribute, FloatingPointAttributeTemplate<float>);
+ CREATE_AND_RETURN_IF_SINGLE_NUMERIC(attribute, IntegerAttributeTemplate<int32_t>);
+ CREATE_AND_RETURN_IF_SINGLE_NUMERIC(attribute, IntegerAttributeTemplate<int64_t>);
+ }
+ {
+ uint32_t idx = 0;
+ if (!extraParam.empty()) {
+ idx = util::strToNum<uint32_t>(extraParam);
+ } else if (attribute->getCollectionType() == CollectionType::ARRAY) {
+ return FeatureExecutor::LP(new CountOnlyAttributeExecutor(*attribute));
+ }
+ if (attribute->isStringType()) {
+ return FeatureExecutor::LP(new AttributeExecutor<ConstCharContent>(attribute, idx));
+ } else if (attribute->isIntegerType()) {
+ return FeatureExecutor::LP(new AttributeExecutor<IntegerContent>(attribute, idx));
+ } else { // FLOAT
+ return FeatureExecutor::LP(new AttributeExecutor<FloatContent>(attribute, idx));
+ }
+ }
+ }
+}
+
+search::fef::FeatureExecutor::LP
+createTensorAttributeExecutor(const IAttributeVector *attribute, const vespalib::string &attrName,
+ const TensorType &tensorType)
+{
+ if (attribute == NULL) {
+ LOG(warning, "The attribute vector '%s' was not found in the attribute manager."
+ " Returning empty tensor.", attrName.c_str());
+ return ConstantTensorExecutor::createEmpty();
+ }
+ if (attribute->getCollectionType() != search::attribute::CollectionType::SINGLE ||
+ attribute->getBasicType() != search::attribute::BasicType::TENSOR) {
+ LOG(warning, "The attribute vector '%s' is NOT of type tensor."
+ " Returning empty tensor.", attribute->getName().c_str());
+ return ConstantTensorExecutor::createEmpty();
+ }
+ const TensorAttribute *tensorAttribute = dynamic_cast<const TensorAttribute *>(attribute);
+ if (tensorAttribute == nullptr) {
+ LOG(warning, "The attribute vector '%s' could not be converted to a tensor attribute."
+ " Returning empty tensor.", attribute->getName().c_str());
+ return ConstantTensorExecutor::createEmpty();
+ }
+ if (tensorType != tensorAttribute->getConfig().tensorType()) {
+ LOG(warning, "The tensor attribute '%s' has tensor type '%s',"
+ " while the feature executor expects type '%s'. Returning empty tensor.",
+ tensorAttribute->getName().c_str(),
+ tensorAttribute->getConfig().tensorType().toSpec().c_str(),
+ tensorType.toSpec().c_str());
+ return ConstantTensorExecutor::createEmpty();
+ }
+ return FeatureExecutor::LP(new TensorFromTensorAttributeExecutor(tensorAttribute));
+}
+
+}
+
+search::fef::FeatureExecutor::LP
+AttributeBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ const IAttributeVector *attribute = env.getAttributeContext().getAttribute(_attrName);
+ if (_tensorType.is_tensor()) {
+ return createTensorAttributeExecutor(attribute, _attrName, _tensorType);
+ } else {
+ return createAttributeExecutor(attribute, _attrName, _extra);
+ }
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/attributefeature.h b/searchlib/src/vespa/searchlib/features/attributefeature.h
new file mode 100644
index 00000000000..c25b2b558b1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/attributefeature.h
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/vespalib/tensor/tensor_type.h>
+
+
+namespace search {
+namespace features {
+
+
+/**
+ * Implements the blueprint for the attribute executor.
+ *
+ * An executor of this outputs number(s) if used with regular attributes
+ * or a tensor value if used with tensor attributes.
+ */
+class AttributeBlueprint : public search::fef::Blueprint {
+private:
+ vespalib::string _attrName; // the name of the attribute vector
+ vespalib::string _extra; // the index or key
+ vespalib::tensor::TensorType _tensorType;
+
+public:
+ /**
+ * Constructs a blueprint.
+ */
+ AttributeBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().
+ desc().attribute(search::fef::ParameterCollection::ANY).
+ desc().attribute(search::fef::ParameterCollection::ANY).string();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/attributematchfeature.cpp b/searchlib/src/vespa/searchlib/features/attributematchfeature.cpp
new file mode 100644
index 00000000000..bca39ef4b73
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/attributematchfeature.cpp
@@ -0,0 +1,350 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.attributematchfeature");
+
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/properties.h>
+#include "attributematchfeature.h"
+#include "utils.h"
+#include "valuefeature.h"
+
+using namespace search::attribute;
+using namespace search::fef;
+using search::feature_t;
+
+namespace {
+feature_t adjustToOne(feature_t value) {
+ if (value > 1.0f) {
+ return 1.0f;
+ }
+ return value;
+}
+
+bool hasAttribute(const IQueryEnvironment &env, const ITermData &term_data)
+{
+ typedef ITermFieldRangeAdapter FRA;
+
+ for (FRA iter(term_data); iter.valid(); iter.next()) {
+ const FieldInfo *info = env.getIndexEnvironment().getField(iter.get().getFieldId());
+ if (info != 0 && info->type() == FieldType::ATTRIBUTE) {
+ return true;
+ }
+ }
+ return false;
+}
+} // namespace
+
+namespace search {
+namespace features {
+
+template <typename T>
+AttributeMatchExecutor<T>::Computer::Computer(const IQueryEnvironment & env, AttributeMatchParams params) :
+ _params(params),
+ _buffer(),
+ _numAttrTerms(0),
+ _totalTermWeight(0),
+ _totalTermSignificance(0),
+ _totalAttrTermWeight(0),
+ _queryTerms(),
+ _matches(0),
+ _matchedTermWeight(0),
+ _matchedTermSignificance(0),
+ _totalWeight(0),
+ _normalizedWeightedWeight(0),
+ _weightSum(0),
+ _valueCount(0)
+{
+ _buffer.allocate(_params.attribute->getMaxValueCount());
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ QueryTerm qt = QueryTermFactory::create(env, i);
+ _totalTermWeight += qt.termData()->getWeight().percent();
+ _totalTermSignificance += qt.significance();
+
+ if (hasAttribute(env, *qt.termData())) {
+ _numAttrTerms++;
+ _totalAttrTermWeight += qt.termData()->getWeight().percent();
+ const ITermFieldData *field = qt.termData()->lookupField(_params.attrInfo->id());
+ if (field != 0) {
+ qt.fieldHandle(field->getHandle());
+ _queryTerms.push_back(qt);
+ }
+ }
+ }
+ LOG(debug, "attributeMatch(%s): weightedSet(%s), numAttributeTerms(%u), totalAttrTermWeight(%u), numTerms(%u), "
+ "totalTermWeight(%u), totalTermSignificance(%f)",
+ _params.attrInfo->name().c_str(), _params.weightedSet ? "true" : "false",
+ _numAttrTerms, _totalAttrTermWeight, getNumTerms(), _totalTermWeight, _totalTermSignificance);
+}
+
+template <typename T>
+void
+AttributeMatchExecutor<T>::Computer::reset()
+{
+ _matches = 0;
+ _matchedTermWeight = 0,
+ _matchedTermSignificance = 0,
+ _totalWeight = 0;
+ _normalizedWeightedWeight = 0;
+ _weightSum = 0;
+ _valueCount = 0;
+}
+
+template <typename T>
+void
+AttributeMatchExecutor<T>::Computer::run(MatchData & match)
+{
+ for (size_t i = 0; i < _queryTerms.size(); ++i) {
+ const ITermData * td = _queryTerms[i].termData();
+ feature_t significance = _queryTerms[i].significance();
+ const TermFieldMatchData *tfmd = match.resolveTermField(_queryTerms[i].fieldHandle());
+ if (tfmd->getDocId() == match.getDocId()) { // hit on this document
+ _matches++;
+ _matchedTermWeight += td->getWeight().percent();
+ _matchedTermSignificance += significance;
+ if (_params.weightedSet) {
+ int32_t weight = tfmd->getWeight();
+ _totalWeight += weight;
+ // attribute weight * query term weight
+ _normalizedWeightedWeight += weight * static_cast<int32_t>(td->getWeight().percent());
+ }
+ }
+ }
+ if (_params.weightedSet) {
+ _buffer.fill(*_params.attribute, match.getDocId());
+ for (uint32_t i = 0; i < _buffer.size(); ++i) {
+ _weightSum += _buffer[i].getWeight();
+ }
+ } else {
+ _valueCount = _params.attribute->getValueCount(match.getDocId());
+ }
+
+ LOG(debug, "attributeMatch(%s)::Computer::run(): matches(%u), totalWeight(%d), normalizedWeightedWeight(%f), "
+ "weightSum(%d), valueCount(%u), matchedTermWeight(%u), matchedTermSignificance(%f)",
+ _params.attrInfo->name().c_str(), _matches, _totalWeight, _normalizedWeightedWeight,
+ _weightSum, _valueCount, _matchedTermWeight, _matchedTermSignificance);
+}
+
+template <typename T>
+feature_t
+AttributeMatchExecutor<T>::Computer::getAverageWeight() const
+{
+ if (_matches != 0) {
+ return (_totalWeight / static_cast<feature_t>(_matches));
+ }
+ return 0;
+}
+
+template <typename T>
+feature_t
+AttributeMatchExecutor<T>::Computer::getQueryCompleteness() const
+{
+ if (getNumTerms() != 0) {
+ return (_matches / static_cast<feature_t>(getNumTerms()));
+ }
+ return 0;
+}
+
+template <typename T>
+feature_t
+AttributeMatchExecutor<T>::Computer::getNormalizedWeight() const
+{
+ if (_params.weightedSet) {
+ feature_t normalizedWeight = _totalWeight > 0 ? _totalWeight / ((feature_t)_params.maxWeight * _numAttrTerms) : 0.0f;
+ return adjustToOne(normalizedWeight);
+ }
+ return 0;
+}
+
+template <typename T>
+feature_t
+AttributeMatchExecutor<T>::Computer::getNormalizedWeightedWeight() const
+{
+ if (_params.weightedSet) {
+ feature_t divider = _totalAttrTermWeight > 0 ? ((feature_t)_params.maxWeight * _totalAttrTermWeight) : _params.maxWeight;
+ feature_t normalized = _normalizedWeightedWeight > 0 ? _normalizedWeightedWeight / divider : 0.0f;
+ return adjustToOne(normalized);
+ }
+ return 0;
+}
+
+template <typename T>
+feature_t
+AttributeMatchExecutor<T>::Computer::getFieldCompleteness() const
+{
+ if (_params.weightedSet) {
+ if (_totalWeight <= 0) {
+ return 0;
+ } else if (_weightSum <= 0) {
+ return 1;
+ } else {
+ feature_t fieldCompleteness = (_totalWeight / static_cast<feature_t>(_weightSum));
+ return adjustToOne(fieldCompleteness);
+ }
+ } else {
+ if (_valueCount > 0) {
+ feature_t fieldCompleteness = _matches / static_cast<feature_t>(_valueCount);
+ return adjustToOne(fieldCompleteness);
+ } else {
+ return 0;
+ }
+ }
+}
+
+template <typename T>
+feature_t
+AttributeMatchExecutor<T>::Computer::getCompleteness() const
+{
+ return (getQueryCompleteness() * ( 1.0f - _params.fieldCompletenessImportance +
+ (_params.fieldCompletenessImportance * getFieldCompleteness()) ));
+}
+
+template <typename T>
+feature_t
+AttributeMatchExecutor<T>::Computer::getWeight() const
+{
+ if (_totalTermWeight > 0) {
+ return (feature_t)_matchedTermWeight / _totalTermWeight;
+ }
+ return 0;
+}
+
+template <typename T>
+feature_t
+AttributeMatchExecutor<T>::Computer::getSignificance() const
+{
+ if (_totalTermSignificance > 0) {
+ return (feature_t)_matchedTermSignificance / _totalTermSignificance;
+ }
+ return 0;
+}
+
+template <typename T>
+AttributeMatchExecutor<T>::AttributeMatchExecutor(const IQueryEnvironment & env, AttributeMatchParams params) :
+ FeatureExecutor(),
+ _cmp(env, params)
+{
+}
+
+
+template <typename T>
+void
+AttributeMatchExecutor<T>::execute(MatchData & match)
+{
+ //LOG(debug, "Execute for field '%s':", _params.attrInfo->name().c_str());
+ _cmp.reset();
+ _cmp.run(match);
+
+ *match.resolveFeature(outputs()[0]) = _cmp.getCompleteness();
+ *match.resolveFeature(outputs()[1]) = _cmp.getQueryCompleteness();
+ *match.resolveFeature(outputs()[2]) = _cmp.getFieldCompleteness();
+ *match.resolveFeature(outputs()[3]) = _cmp.getNormalizedWeight();
+ *match.resolveFeature(outputs()[4]) = _cmp.getNormalizedWeightedWeight();
+ *match.resolveFeature(outputs()[5]) = _cmp.getWeight();
+ *match.resolveFeature(outputs()[6]) = _cmp.getSignificance();
+ *match.resolveFeature(outputs()[7]) = _cmp.getImportance();
+ *match.resolveFeature(outputs()[8]) = static_cast<feature_t>(_cmp.getMatches());
+ *match.resolveFeature(outputs()[9]) = static_cast<feature_t>(_cmp.getTotalWeight());
+ *match.resolveFeature(outputs()[10]) = _cmp.getAverageWeight();
+}
+
+
+AttributeMatchBlueprint::AttributeMatchBlueprint() :
+ Blueprint("attributeMatch"),
+ _params()
+{
+ // empty
+}
+
+void
+AttributeMatchBlueprint::visitDumpFeatures(const IIndexEnvironment &env,
+ IDumpFeatureVisitor &visitor) const
+{
+ for (uint32_t i = 0; i < env.getNumFields(); ++i) {
+ const FieldInfo * field = env.getField(i);
+ if (field->type() == FieldType::ATTRIBUTE) {
+ FeatureNameBuilder fnb;
+ fnb.baseName(getBaseName()).parameter(field->name());
+ visitor.visitDumpFeature(fnb.buildName());
+ visitor.visitDumpFeature(fnb.output("completeness").buildName());
+ visitor.visitDumpFeature(fnb.output("queryCompleteness").buildName());
+ visitor.visitDumpFeature(fnb.output("fieldCompleteness").buildName());
+ visitor.visitDumpFeature(fnb.output("normalizedWeight").buildName());
+ visitor.visitDumpFeature(fnb.output("normalizedWeightedWeight").buildName());
+ visitor.visitDumpFeature(fnb.output("weight").buildName());
+ visitor.visitDumpFeature(fnb.output("significance").buildName());
+ visitor.visitDumpFeature(fnb.output("importance").buildName());
+ visitor.visitDumpFeature(fnb.output("matches").buildName());
+ visitor.visitDumpFeature(fnb.output("totalWeight").buildName());
+ visitor.visitDumpFeature(fnb.output("averageWeight").buildName());
+ }
+ }
+}
+
+Blueprint::UP
+AttributeMatchBlueprint::createInstance() const
+{
+ return Blueprint::UP(new AttributeMatchBlueprint());
+}
+
+bool
+AttributeMatchBlueprint::setup(const IIndexEnvironment & env,
+ const ParameterList & params)
+{
+ // params[0] = attribute name
+ _params.attrInfo = params[0].asField();
+ _params.maxWeight = util::strToNum<int32_t>(env.getProperties().lookup(getName(), "maxWeight").get("256"));
+ _params.fieldCompletenessImportance =
+ util::strToNum<feature_t>(env.getProperties().lookup(getName(), "fieldCompletenessImportance").get("0.05"));
+
+ // normalized
+ describeOutput("completeness", "The normalized total completeness, where field completeness is more important");
+ describeOutput("queryCompleteness", "The query completeness for this attribute: matches/the number of query terms searching this attribute");
+ describeOutput("fieldCompleteness", "The normalized ratio of query tokens which was matched in the field");
+ describeOutput("normalizedWeight", "A number which is close to 1 if the attribute weights of most matches in a weighted set are high (relative to the maxWeight configuration value), 0 otherwise");
+ describeOutput("normalizedWeightedWeight", "A number which is close to 1 if the attribute weights of most matches in a weighted set are high (relative to the maxWeight configuration value), and where highly weighted query terms has more impact, 0 otherwise");
+ // normalized and relative to the whole query
+ describeOutput("weight", "The normalized weight of this match relative to the whole query");
+ describeOutput("significance", "Returns the normalized term significance of the terms of this match relative to the whole query");
+ describeOutput("importance", "Returns the average of significance and weight");
+
+ // not normalized
+ describeOutput("matches", "The number of query terms which was matched in this attribute");
+ describeOutput("totalWeight", "The sum of the weights of the attribute keys matched in a weighted set attribute");
+ describeOutput("averageWeight", "totalWeight/matches");
+
+ env.hintAttributeAccess(_params.attrInfo->name());
+ return true;
+}
+
+FeatureExecutor::LP
+AttributeMatchBlueprint::createExecutor(const IQueryEnvironment & env) const
+{
+ const IAttributeVector * attribute = env.getAttributeContext().getAttribute(_params.attrInfo->name());
+ if (attribute == NULL) {
+ LOG(error, "The attribute vector '%s' was not found in the attribute manager.", _params.attrInfo->name().c_str());
+ return FeatureExecutor::LP(NULL);
+ }
+
+ AttributeMatchParams amp = _params;
+ amp.attribute = attribute;
+ amp.weightedSet = attribute->getCollectionType() == attribute::CollectionType::WSET;
+
+ if (attribute->isStringType()) {
+ return FeatureExecutor::LP
+ (new AttributeMatchExecutor<WeightedConstCharContent>(env, amp));
+ } else if (attribute->isIntegerType()) {
+ return FeatureExecutor::LP
+ (new AttributeMatchExecutor<WeightedIntegerContent>(env, amp));
+ } else { // FLOAT
+ return FeatureExecutor::LP
+ (new AttributeMatchExecutor<WeightedFloatContent>(env, amp));
+ }
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/attributematchfeature.h b/searchlib/src/vespa/searchlib/features/attributematchfeature.h
new file mode 100644
index 00000000000..391b92ce2b2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/attributematchfeature.h
@@ -0,0 +1,124 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/common/feature.h>
+#include "queryterm.h"
+
+namespace search {
+namespace features {
+
+struct AttributeMatchParams {
+ AttributeMatchParams() :
+ attrInfo(NULL), attribute(NULL), weightedSet(false), maxWeight(256), fieldCompletenessImportance(0.05f) {}
+ const search::fef::FieldInfo * attrInfo;
+ const search::attribute::IAttributeVector * attribute;
+ bool weightedSet;
+ // config values
+ int32_t maxWeight;
+ feature_t fieldCompletenessImportance;
+};
+
+/**
+ * Implements the executor for the attribute match feature.
+ */
+template <typename T>
+class AttributeMatchExecutor : public search::fef::FeatureExecutor {
+private:
+ /**
+ * This class is used to compute metrics for match in an attribute vector.
+ */
+ class Computer {
+ private:
+ // TermData pointer and significance
+ AttributeMatchParams _params;
+ mutable T _buffer; // used when fetching weights from a weighted set attribute
+
+ // per query
+ uint32_t _numAttrTerms;
+ uint32_t _totalTermWeight; // total weight of all terms
+ feature_t _totalTermSignificance; // total significance of all terms
+ uint32_t _totalAttrTermWeight; // weight of all attribute terms
+ QueryTermVector _queryTerms; // the terms searching this attribute
+
+ // per doc
+ uint32_t _matches;
+ uint32_t _matchedTermWeight; // term weight of matched terms
+ feature_t _matchedTermSignificance; // significance of matched terms
+ int32_t _totalWeight;
+ feature_t _normalizedWeightedWeight;
+ int32_t _weightSum; // sum of the weights for a weighted set attribute
+ uint32_t _valueCount; // the number of values for a non-weighted set attribute
+
+ public:
+ Computer(const search::fef::IQueryEnvironment & env,
+ AttributeMatchParams params);
+ void run(search::fef::MatchData & data);
+ void reset();
+ uint32_t getNumTerms() const { return _queryTerms.size(); }
+ uint32_t getMatches() const { return _matches; }
+ int32_t getTotalWeight() const { return _totalWeight; }
+ feature_t getAverageWeight() const;
+ feature_t getQueryCompleteness() const;
+ feature_t getNormalizedWeight() const;
+ feature_t getNormalizedWeightedWeight() const;
+ feature_t getFieldCompleteness() const;
+ feature_t getCompleteness() const;
+ feature_t getWeight() const;
+ feature_t getSignificance() const;
+ feature_t getImportance() const { return (getWeight() + getSignificance()) * 0.5; }
+ };
+
+ Computer _cmp;
+
+public:
+ /**
+ * Constructs an executor.
+ */
+ AttributeMatchExecutor(const search::fef::IQueryEnvironment & env,
+ AttributeMatchParams params);
+
+ // Inherit doc from FeatureExecutor.
+ virtual void execute(search::fef::MatchData & data);
+};
+
+
+/**
+ * Implements the blueprint for the attribute match executor.
+ */
+class AttributeMatchBlueprint : public search::fef::Blueprint {
+private:
+ AttributeMatchParams _params;
+
+public:
+ /**
+ * Constructs a blueprint.
+ */
+ AttributeMatchBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().attributeField(search::fef::ParameterCollection::ANY);
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp
new file mode 100644
index 00000000000..f6b289bfd16
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp
@@ -0,0 +1,110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.closenessfeature");
+#include <vespa/searchlib/fef/properties.h>
+#include "closenessfeature.h"
+#include "utils.h"
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+ClosenessExecutor::ClosenessExecutor(feature_t maxDistance, feature_t scaleDistance) :
+ FeatureExecutor(),
+ _maxDistance(maxDistance),
+ _logCalc(maxDistance, scaleDistance)
+{
+}
+
+void
+ClosenessExecutor::execute(MatchData & match)
+{
+ feature_t distance = *match.resolveFeature(inputs()[0]);
+ feature_t closeness = std::max(1 - (distance / _maxDistance), (feature_t)0);
+ *match.resolveFeature(outputs()[0]) = closeness;
+ *match.resolveFeature(outputs()[1]) = _logCalc.get(distance);
+}
+
+
+// Polar Earth radius r = 6356.8 km
+// Polar Earth diameter = 2 * pi * r = 39940.952 km
+// 1 diameter = 39940.952 km = 360 degrees = 360 * 1000000 microdegrees
+// -> 1 km = 9013.30536007 microdegrees
+
+ClosenessBlueprint::ClosenessBlueprint() :
+ Blueprint("closeness"),
+ _maxDistance(9013305.0), // default value (about 250 km)
+ _scaleDistance(5.0*9013.305), // default value (about 5 km)
+ _halfResponse(1)
+{
+}
+
+void
+ClosenessBlueprint::visitDumpFeatures(const IIndexEnvironment &,
+ IDumpFeatureVisitor &) const
+{
+}
+
+bool
+ClosenessBlueprint::setup(const IIndexEnvironment & env,
+ const search::fef::ParameterList & params)
+{
+ // params[0] = attribute name
+ Property p = env.getProperties().lookup(getName(), "maxDistance");
+ if (p.found()) {
+ _maxDistance = util::strToNum<feature_t>(p.get());
+ }
+ p = env.getProperties().lookup(getName(), "halfResponse");
+ bool useHalfResponse = false;
+ if (p.found()) {
+ _halfResponse = util::strToNum<feature_t>(p.get());
+ useHalfResponse = true;
+ }
+ // sanity checks:
+ if (_maxDistance < 1) {
+ LOG(warning, "Invalid %s.maxDistance = %g, using 1.0",
+ getName().c_str(), (double)_maxDistance);
+ _maxDistance = 1.0;
+ }
+ if (_halfResponse < 1) {
+ LOG(warning, "Invalid %s.halfResponse = %g, using 1.0",
+ getName().c_str(), (double)_halfResponse);
+ _halfResponse = 1.0;
+ }
+ if (_halfResponse >= _maxDistance / 2) {
+ feature_t newResponse = (_maxDistance / 2) - 1;
+ LOG(warning, "Invalid %s.halfResponse = %g, using %g ((%s.maxDistance / 2) - 1)",
+ getName().c_str(), (double)_halfResponse, (double)newResponse, getName().c_str());
+ _halfResponse = newResponse;
+ }
+
+ if (useHalfResponse) {
+ _scaleDistance = LogarithmCalculator::getScale(_halfResponse, _maxDistance);
+ }
+
+
+ defineInput("distance(" + params[0].getValue() + ")");
+ describeOutput("out", "The closeness of the document (linear)");
+ describeOutput("logscale", "The closeness of the document (logarithmic shape)");
+
+ return true;
+}
+
+Blueprint::UP
+ClosenessBlueprint::createInstance() const
+{
+ return Blueprint::UP(new ClosenessBlueprint());
+}
+
+FeatureExecutor::LP
+ClosenessBlueprint::createExecutor(const IQueryEnvironment &) const
+{
+ return FeatureExecutor::LP(new ClosenessExecutor(_maxDistance, _scaleDistance));
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/closenessfeature.h b/searchlib/src/vespa/searchlib/features/closenessfeature.h
new file mode 100644
index 00000000000..c86196f1e29
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/closenessfeature.h
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include "logarithmcalculator.h"
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for the closeness feature.
+ */
+class ClosenessExecutor : public search::fef::FeatureExecutor {
+private:
+ feature_t _maxDistance;
+ LogarithmCalculator _logCalc;
+
+public:
+ /**
+ * Constructs an executor.
+ */
+ ClosenessExecutor(feature_t maxDistance, feature_t scaleDistance);
+ virtual void execute(search::fef::MatchData & data);
+};
+
+
+/**
+ * Implements the blueprint for the closeness executor.
+ */
+class ClosenessBlueprint : public search::fef::Blueprint {
+private:
+ feature_t _maxDistance;
+ feature_t _scaleDistance;
+ feature_t _halfResponse;
+
+public:
+ /**
+ * Constructs a blueprint.
+ */
+ ClosenessBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().string();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/constant_tensor_executor.h b/searchlib/src/vespa/searchlib/features/constant_tensor_executor.h
new file mode 100644
index 00000000000..11b875df96b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/constant_tensor_executor.h
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/vespalib/eval/value.h>
+#include <vespa/vespalib/tensor/tensor.h>
+#include <vespa/vespalib/tensor/default_tensor.h>
+#include <memory>
+
+namespace search {
+namespace features {
+
+/**
+ * Feature executor that returns a constant tensor.
+ */
+class ConstantTensorExecutor : public fef::FeatureExecutor
+{
+private:
+ const vespalib::eval::TensorValue::UP _tensor;
+
+public:
+ ConstantTensorExecutor(vespalib::eval::TensorValue::UP tensor)
+ : _tensor(std::move(tensor))
+ {}
+ virtual bool isPure() override { return true; }
+ virtual void execute(fef::MatchData &data) override {
+ *data.resolve_object_feature(outputs()[0]) = *_tensor;
+ }
+ static fef::FeatureExecutor::LP create(vespalib::tensor::Tensor::UP tensor) {
+ return FeatureExecutor::LP(new ConstantTensorExecutor
+ (std::make_unique<vespalib::eval::TensorValue>(std::move(tensor))));
+ }
+ static fef::FeatureExecutor::LP createEmpty() {
+ // XXX: we should use numbers instead of empty tensors
+ vespalib::tensor::DefaultTensor::builder builder;
+ return FeatureExecutor::LP(new ConstantTensorExecutor
+ (std::make_unique<vespalib::eval::TensorValue>
+ (builder.build())));
+ }
+};
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/create-class-cpp.sh b/searchlib/src/vespa/searchlib/features/create-class-cpp.sh
new file mode 100755
index 00000000000..6ec335ffa3d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/create-class-cpp.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+class=$1
+guard=`echo $class | tr 'a-z' 'A-Z'`
+name=`echo $class | tr 'A-Z' 'a-z'`
+
+cat <<EOF
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/log/log.h>
+LOG_SETUP(".$name");
+#include <vespa/fastos/fastos.h>
+#include "$name.h"
+
+namespace search {
+namespace features {
+
+$class::$class()
+{
+}
+
+$class::~$class()
+{
+}
+
+} // namespace features
+} // namespace search
+EOF
diff --git a/searchlib/src/vespa/searchlib/features/create-class-h.sh b/searchlib/src/vespa/searchlib/features/create-class-h.sh
new file mode 100644
index 00000000000..b8236f60d46
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/create-class-h.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+class=$1
+guard=`echo $class | tr 'a-z' 'A-Z'`
+
+cat <<EOF
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+namespace search {
+namespace features {
+
+class $class
+{
+private:
+ $class(const $class &);
+ $class &operator=(const $class &);
+public:
+ $class();
+ virtual ~$class();
+};
+
+} // namespace features
+} // namespace search
+
+EOF
diff --git a/searchlib/src/vespa/searchlib/features/debug_attribute_wait.cpp b/searchlib/src/vespa/searchlib/features/debug_attribute_wait.cpp
new file mode 100644
index 00000000000..3c543624397
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/debug_attribute_wait.cpp
@@ -0,0 +1,96 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.debug_wait");
+#include "debug_attribute_wait.h"
+
+using search::attribute::IAttributeVector;
+
+namespace search {
+namespace features {
+
+//-----------------------------------------------------------------------------
+
+DebugAttributeWaitExecutor::DebugAttributeWaitExecutor(const search::fef::IQueryEnvironment &env,
+ const IAttributeVector *attribute,
+ const DebugAttributeWaitParams &params)
+ : _attribute(attribute),
+ _buf(),
+ _params(params)
+{
+ (void)env;
+}
+
+void
+DebugAttributeWaitExecutor::execute(search::fef::MatchData &data)
+{
+ double waitTime = 0.0;
+ FastOS_Time time;
+ time.SetNow();
+
+ if (_attribute != NULL) {
+ _buf.fill(*_attribute, data.getDocId());
+ waitTime = _buf[0];
+ }
+ double millis = waitTime * 1000.0;
+
+ while (time.MilliSecsToNow() < millis) {
+ if (_params.busyWait) {
+ for (int i = 0; i < 1000; i++)
+ ;
+ } else {
+ int rem = (int)(millis - time.MilliSecsToNow());
+ FastOS_Thread::Sleep(rem);
+ }
+ }
+ *data.resolveFeature(outputs()[0]) = 1.0e-6 * time.MicroSecsToNow();
+}
+
+//-----------------------------------------------------------------------------
+
+DebugAttributeWaitBlueprint::DebugAttributeWaitBlueprint()
+ : Blueprint("debugAttributeWait"),
+ _params()
+{
+}
+
+void
+DebugAttributeWaitBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const
+{
+ (void)env;
+ (void)visitor;
+}
+
+search::fef::Blueprint::UP
+DebugAttributeWaitBlueprint::createInstance() const
+{
+ return Blueprint::UP(new DebugAttributeWaitBlueprint());
+}
+
+bool
+DebugAttributeWaitBlueprint::setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params)
+{
+ (void)env;
+ _attribute = params[0].getValue();
+ _params.busyWait = (params[1].asDouble() == 1.0);
+
+ describeOutput("out", "actual time waited");
+ env.hintAttributeAccess(_attribute);
+ return true;
+}
+
+search::fef::FeatureExecutor::LP
+DebugAttributeWaitBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ // Get attribute vector
+ const IAttributeVector * attribute = env.getAttributeContext().getAttribute(_attribute);
+ return search::fef::FeatureExecutor::LP(new DebugAttributeWaitExecutor(env, attribute, _params));
+}
+
+//-----------------------------------------------------------------------------
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/debug_attribute_wait.h b/searchlib/src/vespa/searchlib/features/debug_attribute_wait.h
new file mode 100644
index 00000000000..d7043ce9f30
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/debug_attribute_wait.h
@@ -0,0 +1,71 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include <vespa/searchlib/fef/fef.h>
+
+namespace search {
+namespace features {
+
+//-----------------------------------------------------------------------------
+
+struct DebugAttributeWaitParams {
+ bool busyWait;
+};
+
+//-----------------------------------------------------------------------------
+
+class DebugAttributeWaitExecutor : public search::fef::FeatureExecutor
+{
+private:
+ const search::attribute::IAttributeVector *_attribute;
+ search::attribute::FloatContent _buf;
+ DebugAttributeWaitParams _params;
+
+public:
+ DebugAttributeWaitExecutor(const search::fef::IQueryEnvironment &env,
+ const search::attribute::IAttributeVector *
+ attribute,
+ const DebugAttributeWaitParams &params);
+ virtual void execute(search::fef::MatchData & data);
+};
+
+//-----------------------------------------------------------------------------
+
+class DebugAttributeWaitBlueprint : public search::fef::Blueprint
+{
+private:
+ vespalib::string _attribute;
+ DebugAttributeWaitParams _params;
+
+public:
+ DebugAttributeWaitBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().attribute(search::fef::ParameterCollection::ANY).number();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/debug_wait.cpp b/searchlib/src/vespa/searchlib/features/debug_wait.cpp
new file mode 100644
index 00000000000..58fb7925a74
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/debug_wait.cpp
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.debug_wait");
+#include "debug_wait.h"
+
+namespace search {
+namespace features {
+
+//-----------------------------------------------------------------------------
+
+DebugWaitExecutor::DebugWaitExecutor(const search::fef::IQueryEnvironment &env,
+ const DebugWaitParams &params)
+ : _params(params)
+{
+ (void)env;
+}
+
+void
+DebugWaitExecutor::execute(search::fef::MatchData &data)
+{
+ FastOS_Time time;
+ time.SetNow();
+ double millis = _params.waitTime * 1000.0;
+
+ while (time.MilliSecsToNow() < millis) {
+ if (_params.busyWait) {
+ for (int i = 0; i < 1000; i++)
+ ;
+ } else {
+ int rem = (int)(millis - time.MilliSecsToNow());
+ FastOS_Thread::Sleep(rem);
+ }
+ }
+ *data.resolveFeature(outputs()[0]) = 1.0e-6 * time.MicroSecsToNow();
+}
+
+//-----------------------------------------------------------------------------
+
+DebugWaitBlueprint::DebugWaitBlueprint()
+ : Blueprint("debugWait"),
+ _params()
+{
+}
+
+void
+DebugWaitBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const
+{
+ (void)env;
+ (void)visitor;
+}
+
+search::fef::Blueprint::UP
+DebugWaitBlueprint::createInstance() const
+{
+ return Blueprint::UP(new DebugWaitBlueprint());
+}
+
+bool
+DebugWaitBlueprint::setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params)
+{
+ (void)env;
+ _params.waitTime = params[0].asDouble();
+ _params.busyWait = (params[1].asDouble() == 1.0);
+
+ describeOutput("out", "actual time waited");
+ return true;
+}
+
+search::fef::FeatureExecutor::LP
+DebugWaitBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ return search::fef::FeatureExecutor::LP(new DebugWaitExecutor(env, _params));
+}
+
+//-----------------------------------------------------------------------------
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/debug_wait.h b/searchlib/src/vespa/searchlib/features/debug_wait.h
new file mode 100644
index 00000000000..69c7612381b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/debug_wait.h
@@ -0,0 +1,66 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/fef.h>
+
+namespace search {
+namespace features {
+
+//-----------------------------------------------------------------------------
+
+struct DebugWaitParams {
+ double waitTime;
+ bool busyWait;
+};
+
+//-----------------------------------------------------------------------------
+
+class DebugWaitExecutor : public search::fef::FeatureExecutor
+{
+private:
+ DebugWaitParams _params;
+
+public:
+ DebugWaitExecutor(const search::fef::IQueryEnvironment &env,
+ const DebugWaitParams &params);
+ virtual void execute(search::fef::MatchData & data);
+};
+
+//-----------------------------------------------------------------------------
+
+class DebugWaitBlueprint : public search::fef::Blueprint
+{
+private:
+ DebugWaitParams _params;
+
+public:
+ DebugWaitBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().number().number();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.cpp b/searchlib/src/vespa/searchlib/features/distancefeature.cpp
new file mode 100644
index 00000000000..2002729b049
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/distancefeature.cpp
@@ -0,0 +1,148 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.distancefeature");
+#include <cmath>
+#include <limits>
+#include <vespa/document/datatype/positiondatatype.h>
+#include <vespa/searchlib/fef/location.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/vespalib/geo/zcurve.h>
+#include "distancefeature.h"
+
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+feature_t
+DistanceExecutor::calculateDistance(uint32_t docId)
+{
+ if (_location.isValid() && _pos != NULL) {
+ return calculate2DZDistance(docId);
+ }
+ return DEFAULT_DISTANCE;
+}
+
+
+feature_t
+DistanceExecutor::calculate2DZDistance(uint32_t docId)
+{
+ _intBuf.fill(*_pos, docId);
+ uint32_t numValues = _intBuf.size();
+ uint64_t sqabsdist = std::numeric_limits<uint64_t>::max();
+ int32_t docx = 0;
+ int32_t docy = 0;
+ for (uint32_t i = 0; i < numValues; ++i) {
+ vespalib::geo::ZCurve::decode(_intBuf[i], &docx, &docy);
+ uint32_t dx;
+ uint32_t dy;
+ if (_location.getXPosition() > docx) {
+ dx = _location.getXPosition() - docx;
+ } else {
+ dx = docx - _location.getXPosition();
+ }
+ if (_location.getXAspect() != 0) {
+ dx = ((uint64_t) dx * _location.getXAspect()) >> 32;
+ }
+ if (_location.getYPosition() > docy) {
+ dy = _location.getYPosition() - docy;
+ } else {
+ dy = docy - _location.getYPosition();
+ }
+ uint64_t sqdist = (uint64_t) dx * dx + (uint64_t) dy * dy;
+ if (sqdist < sqabsdist) {
+ sqabsdist = sqdist;
+ }
+ }
+ return static_cast<feature_t>(sqrt(static_cast<feature_t>(sqabsdist)));
+}
+
+DistanceExecutor::DistanceExecutor(const Location & location,
+ const search::attribute::IAttributeVector * pos) :
+ FeatureExecutor(),
+ _location(location),
+ _pos(pos),
+ _intBuf()
+{
+ if (_pos != NULL) {
+ _intBuf.allocate(_pos->getMaxValueCount());
+ }
+}
+
+void
+DistanceExecutor::execute(MatchData & match)
+{
+ *match.resolveFeature(outputs()[0]) = calculateDistance(match.getDocId());
+}
+
+const feature_t DistanceExecutor::DEFAULT_DISTANCE(6400000000.0);
+
+
+DistanceBlueprint::DistanceBlueprint() :
+ Blueprint("distance"),
+ _posAttr()
+{
+}
+
+void
+DistanceBlueprint::visitDumpFeatures(const IIndexEnvironment &,
+ IDumpFeatureVisitor &) const
+{
+}
+
+Blueprint::UP
+DistanceBlueprint::createInstance() const
+{
+ return Blueprint::UP(new DistanceBlueprint());
+}
+
+bool
+DistanceBlueprint::setup(const IIndexEnvironment & env,
+ const ParameterList & params)
+{
+ _posAttr = params[0].getValue();
+ describeOutput("out", "The euclidian distance from the query position.");
+ env.hintAttributeAccess(_posAttr);
+ env.hintAttributeAccess(document::PositionDataType::getZCurveFieldName(_posAttr));
+ return true;
+}
+
+FeatureExecutor::LP
+DistanceBlueprint::createExecutor(const IQueryEnvironment & env) const
+{
+ const search::attribute::IAttributeVector * pos = NULL;
+ const Location & location = env.getLocation();
+ LOG(debug, "DistanceBlueprint::createExecutor location.valid='%s', '%s', alternatively '%s'",
+ location.isValid() ? "true" : "false", _posAttr.c_str(), document::PositionDataType::getZCurveFieldName(_posAttr).c_str());
+ if (location.isValid()) {
+ pos = env.getAttributeContext().getAttribute(_posAttr);
+ if (pos == NULL) {
+ LOG(debug, "Failed to find attribute '%s', resorting too '%s'",
+ _posAttr.c_str(), document::PositionDataType::getZCurveFieldName(_posAttr).c_str());
+ pos = env.getAttributeContext().getAttribute(document::PositionDataType::getZCurveFieldName(_posAttr));
+ }
+ if (pos != NULL) {
+ if (!pos->isIntegerType()) {
+ LOG(warning, "The position attribute '%s' is not an integer attribute. Will use default distance.",
+ pos->getName().c_str());
+ pos = NULL;
+ } else if (pos->getCollectionType() == attribute::CollectionType::WSET) {
+ LOG(warning, "The position attribute '%s' is a weighted set attribute. Will use default distance.",
+ pos->getName().c_str());
+ pos = NULL;
+ }
+ } else {
+ LOG(warning, "The position attribute '%s' was not found. Will use default distance.", _posAttr.c_str());
+ }
+ }
+
+ return FeatureExecutor::LP(new DistanceExecutor(location, pos));
+}
+
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.h b/searchlib/src/vespa/searchlib/features/distancefeature.h
new file mode 100644
index 00000000000..bf9d4cb54da
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/distancefeature.h
@@ -0,0 +1,75 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/common/feature.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for the distance feature.
+ */
+class DistanceExecutor : public search::fef::FeatureExecutor {
+private:
+ const search::fef::Location & _location;
+ const search::attribute::IAttributeVector * _pos;
+ search::attribute::IntegerContent _intBuf;
+
+ feature_t calculateDistance(uint32_t docId);
+ feature_t calculate2DZDistance(uint32_t docId);
+
+public:
+ /**
+ * Constructs an executor for the distance feature.
+ *
+ * @param location the location object associated with the query environment.
+ * @param pos the attribute to use for positions (expects zcurve encoding).
+ */
+ DistanceExecutor(const search::fef::Location & location,
+ const search::attribute::IAttributeVector * pos);
+ virtual void execute(search::fef::MatchData & data);
+
+ static const feature_t DEFAULT_DISTANCE;
+};
+
+/**
+ * Implements the blueprint for the distance executor.
+ */
+class DistanceBlueprint : public search::fef::Blueprint {
+private:
+ vespalib::string _posAttr;
+
+public:
+ /**
+ * Constructs a blueprint for the distance executor.
+ */
+ DistanceBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().string();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/distancetopathfeature.cpp b/searchlib/src/vespa/searchlib/features/distancetopathfeature.cpp
new file mode 100644
index 00000000000..05d17c33a79
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/distancetopathfeature.cpp
@@ -0,0 +1,177 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.distancetopathfeature");
+
+#include <boost/algorithm/string/split.hpp>
+#include <boost/algorithm/string/classification.hpp>
+#include <cmath>
+#include <vespa/document/datatype/positiondatatype.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <sstream>
+#include <vespa/vespalib/geo/zcurve.h>
+#include "distancetopathfeature.h"
+#include "utils.h"
+
+namespace search {
+namespace features {
+
+const feature_t DistanceToPathExecutor::DEFAULT_DISTANCE(6400000000.0);
+
+DistanceToPathExecutor::DistanceToPathExecutor(std::vector<Vector2> &path,
+ const search::attribute::IAttributeVector *pos) :
+ search::fef::FeatureExecutor(),
+ _intBuf(),
+ _path(),
+ _pos(pos)
+{
+ if (_pos != NULL) {
+ _intBuf.allocate(_pos->getMaxValueCount());
+ }
+ _path.swap(path); // avoid copy
+}
+
+void
+DistanceToPathExecutor::execute(search::fef::MatchData & match)
+{
+ if (_path.size() > 1 && _pos != NULL) {
+ double pos = -1, trip = 0, product = 0;
+ double minSqDist = std::numeric_limits<double>::max();
+ _intBuf.fill(*_pos, match.getDocId());
+
+ // For each line segment, do
+ for (uint32_t seg = 1; seg < _path.size(); ++seg) {
+ const Vector2 &p1 = _path[seg - 1];
+ const Vector2 &p2 = _path[seg];
+ double len2 = (p2.x - p1.x) * (p2.x - p1.x) + (p2.y - p1.y) * (p2.y - p1.y);
+ double len = sqrt(len2);
+
+ // For each document location, do
+ for (uint32_t loc = 0; loc < _intBuf.size(); ++loc) {
+ int32_t x = 0, y = 0;
+ vespalib::geo::ZCurve::decode(_intBuf[loc], &x, &y);
+
+ double u = 0, dx, dy;
+ if (len < 1e-6) {
+ dx = p1.x - x; // process as point
+ dy = p1.y - y;
+ } else {
+ u = std::min(1.0, std::max(0.0, (((x - p1.x) * (p2.x - p1.x)) + ((y - p1.y) * (p2.y - p1.y))) / len2));
+ if (u == 0) {
+ dx = p1.x - x; // intersection before segment
+ dy = p1.y - y;
+ } else if (u == 1) {
+ dx = p2.x - x; // intersection after segment
+ dy = p2.y - y;
+ } else {
+ dx = p1.x + u * (p2.x - p1.x) - x;
+ dy = p1.y + u * (p2.y - p1.y) - y;
+ }
+ }
+
+ double sqDist = dx * dx + dy * dy;
+ if (sqDist < minSqDist) {
+ minSqDist = sqDist;
+ pos = trip + u * len;
+ product = (p2.x - p1.x) * dy - (p2.y - p1.y) * dx;
+ }
+ }
+ trip += len;
+ }
+
+ *match.resolveFeature(outputs()[0]) = static_cast<feature_t>(sqrt(static_cast<feature_t>(minSqDist)));
+ *match.resolveFeature(outputs()[1]) = static_cast<feature_t>(pos > -1 ? (trip > 0 ? pos / trip : 0) : 1);
+ *match.resolveFeature(outputs()[2]) = static_cast<feature_t>(product);
+ } else {
+ *match.resolveFeature(outputs()[0]) = DEFAULT_DISTANCE;
+ *match.resolveFeature(outputs()[1]) = 1;
+ *match.resolveFeature(outputs()[2]) = 0;
+ }
+}
+
+DistanceToPathBlueprint::DistanceToPathBlueprint() :
+ Blueprint("distanceToPath"),
+ _posAttr()
+{
+ // empty
+}
+
+void
+DistanceToPathBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const
+{
+ // empty
+}
+
+search::fef::Blueprint::UP
+DistanceToPathBlueprint::createInstance() const
+{
+ return Blueprint::UP(new DistanceToPathBlueprint());
+}
+
+bool
+DistanceToPathBlueprint::setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params)
+{
+ _posAttr = params[0].getValue();
+ describeOutput("distance", "The euclidian distance from the query path.");
+ describeOutput("traveled", "The normalized distance traveled along the path before intersection.");
+ describeOutput("product", "The cross-product of the intersecting line segment and the intersection-to-document vector.");
+ env.hintAttributeAccess(_posAttr);
+ env.hintAttributeAccess(document::PositionDataType::getZCurveFieldName(_posAttr));
+ return true;
+}
+
+search::fef::FeatureExecutor::LP
+DistanceToPathBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ // Retrieve path from query using the name of this and "path" as property.
+ std::vector<Vector2> path;
+ search::fef::Property pro = env.getProperties().lookup(getName(), "path");
+ if (pro.found()) {
+ vespalib::string str = pro.getAt(0);
+ uint32_t len = str.size();
+ if (str[0] == '(' && len > 1 && str[len - 1] == ')') {
+ str = str.substr(1, len - 1); // remove braces
+ std::vector<vespalib::string> arr;
+ boost::split(arr, str, boost::is_any_of(","));
+ len = arr.size() - 1;
+ for (uint32_t i = 0; i < len; i += 2) {
+ double x = util::strToNum<double>(arr[i]);
+ double y = util::strToNum<double>(arr[i + 1]);
+ path.push_back(Vector2(x, y));
+ }
+ }
+ }
+
+ // Lookup the attribute vector that holds document positions.
+ const search::attribute::IAttributeVector *pos = NULL;
+ if (path.size() > 1) {
+ pos = env.getAttributeContext().getAttribute(_posAttr);
+ if (pos == NULL) {
+ pos = env.getAttributeContext().getAttribute(document::PositionDataType::getZCurveFieldName(_posAttr));
+ }
+ if (pos != NULL) {
+ if (!pos->isIntegerType()) {
+ LOG(warning, "The position attribute '%s' is not an integer attribute. Will use default distance.",
+ pos->getName().c_str());
+ pos = NULL;
+ } else if (pos->getCollectionType() == attribute::CollectionType::WSET) {
+ LOG(warning, "The position attribute '%s' is a weighted set attribute. Will use default distance.",
+ pos->getName().c_str());
+ pos = NULL;
+ }
+ } else {
+ LOG(warning, "The position attribute '%s' was not found. Will use default distance.", _posAttr.c_str());
+ }
+ } else {
+ LOG(warning, "No path given in query. Will use default distance.");
+ }
+
+ // Create and return a compatible executor.
+ return search::fef::FeatureExecutor::LP(new DistanceToPathExecutor(path, pos));
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/distancetopathfeature.h b/searchlib/src/vespa/searchlib/features/distancetopathfeature.h
new file mode 100644
index 00000000000..d82b55aef03
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/distancetopathfeature.h
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/common/feature.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Define the point type that makes up the end-points in our path.
+ */
+struct Vector2 {
+ Vector2(double _x, double _y) : x(_x), y(_y) { }
+ double x, y;
+};
+
+/**
+ * Implements the executor for the distance to path feature.
+ */
+class DistanceToPathExecutor : public search::fef::FeatureExecutor {
+private:
+ search::attribute::IntegerContent _intBuf; // Position value buffer.
+ std::vector<Vector2> _path; // Path given by query.
+ const search::attribute::IAttributeVector *_pos; // Position attribute.
+
+public:
+ /**
+ * Constructs an executor for the distance to path feature.
+ *
+ * @param path The path associated with the query environment.
+ * @param pos The attribute to use for positions (expects zcurve encoding).
+ */
+ DistanceToPathExecutor(std::vector<Vector2> &path,
+ const search::attribute::IAttributeVector *pos);
+ virtual void execute(search::fef::MatchData & data);
+
+ /**
+ * Defines a default distance value to use if a proper one can not be determined.
+ */
+ static const feature_t DEFAULT_DISTANCE;
+};
+
+/**
+ * Implements the blueprint for the distance to path feature.
+ */
+class DistanceToPathBlueprint : public search::fef::Blueprint {
+private:
+ vespalib::string _posAttr; // Name of the position attribute.
+
+public:
+ /**
+ * Constructs a blueprint for the distance to path feature.
+ */
+ DistanceToPathBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().string();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp
new file mode 100644
index 00000000000..51385a0b816
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp
@@ -0,0 +1,457 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.dotproduct");
+#include <boost/algorithm/string/split.hpp>
+#include <boost/algorithm/string/classification.hpp>
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include <vespa/searchlib/fef/properties.h>
+
+#include "dotproductfeature.h"
+#include "array_parser.hpp"
+#include "utils.h"
+#include "valuefeature.h"
+#include "weighted_set_parser.hpp"
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+
+using namespace search::attribute;
+using namespace search::fef;
+using vespalib::hwaccelrated::IAccelrated;
+
+namespace search {
+namespace features {
+namespace dotproduct {
+namespace wset {
+
+template <typename Vector, typename Buffer>
+DotProductExecutor<Vector, Buffer>::DotProductExecutor(const IAttributeVector * attribute, const Vector & vector) :
+ FeatureExecutor(),
+ _attribute(attribute),
+ _vector(vector),
+ _buffer()
+{
+ _buffer.allocate(_attribute->getMaxValueCount());
+ _vector.syncMap();
+}
+
+template <typename Vector, typename Buffer>
+void
+DotProductExecutor<Vector, Buffer>::execute(MatchData & match)
+{
+ feature_t val = 0;
+ if (!_vector.getDimMap().empty()) {
+ _buffer.fill(*_attribute, match.getDocId());
+ for (size_t i = 0; i < _buffer.size(); ++i) {
+ typename Vector::HashMap::const_iterator itr = _vector.getDimMap().find(_buffer[i].getValue());
+ if (itr != _vector.getDimMap().end()) {
+ val += _buffer[i].getWeight() * itr->second;
+ }
+ }
+ }
+ *match.resolveFeature(outputs()[0]) = val;
+}
+
+}
+
+namespace array {
+
+template <typename A>
+DotProductExecutor<A>::DotProductExecutor(const A * attribute, const V & vector) :
+ FeatureExecutor(),
+ _attribute(attribute),
+ _multiplier(IAccelrated::getAccelrator()),
+ _vector(vector)
+{
+}
+
+template <typename A>
+size_t
+DotProductExecutor<A>::getAttributeValues(uint32_t docId, const AT * & values)
+{
+ return _attribute->getRawValues(docId, values);
+}
+
+template <typename A>
+void
+DotProductExecutor<A>::execute(MatchData & match)
+{
+ const AT *values(NULL);
+ size_t count = getAttributeValues(match.getDocId(), values);
+ size_t commonRange = std::min(count, _vector.size());
+ *match.resolveFeature(outputs()[0]) = _multiplier->dotProduct(&_vector[0], reinterpret_cast<const typename A::BaseType *>(values), commonRange);
+}
+
+template <typename A>
+SparseDotProductExecutor<A>::SparseDotProductExecutor(const A * attribute, const V & values, const IV & indexes) :
+ DotProductExecutor<A>(attribute, values),
+ _indexes(indexes),
+ _scratch(std::max(static_cast<size_t>(attribute->getMaxValueCount()), indexes.size()))
+{
+}
+
+template <typename A>
+size_t
+SparseDotProductExecutor<A>::getAttributeValues(uint32_t docId, const AT * & values)
+{
+ const AT *allValues(NULL);
+ size_t count = this->_attribute->getRawValues(docId, allValues);
+ values = &_scratch[0];
+ size_t i(0);
+ for (; (i < _indexes.size()) && (_indexes[i] < count); i++) {
+ _scratch[i] = allValues[_indexes[i]];
+ }
+ return i;
+}
+
+template <typename A>
+DotProductByCopyExecutor<A>::DotProductByCopyExecutor(const A * attribute, const V & values) :
+ DotProductExecutor<A>(attribute, values),
+ _copy(static_cast<size_t>(attribute->getMaxValueCount()))
+{
+}
+
+template <typename A>
+size_t
+DotProductByCopyExecutor<A>::getAttributeValues(uint32_t docId, const AT * & values)
+{
+ size_t count = this->_attribute->getAll(docId, &_copy[0], _copy.size());
+ if (count > _copy.size()) {
+ _copy.resize(count);
+ count = this->_attribute->getAll(docId, &_copy[0], _copy.size());
+ }
+ values = reinterpret_cast<const AT *>(&_copy[0]);
+ return count;
+}
+
+template <typename A>
+SparseDotProductByCopyExecutor<A>::SparseDotProductByCopyExecutor(const A * attribute, const V & values, const IV & indexes) :
+ SparseDotProductExecutor<A>(attribute, values, indexes),
+ _copy(std::max(static_cast<size_t>(attribute->getMaxValueCount()), indexes.size()))
+{
+}
+
+template <typename A>
+size_t
+SparseDotProductByCopyExecutor<A>::getAttributeValues(uint32_t docId, const AT * & values)
+{
+ size_t count = this->_attribute->getAll(docId, &_copy[0], _copy.size());
+ if (count > _copy.size()) {
+ _copy.resize(count);
+ count = this->_attribute->getAll(docId, &_copy[0], _copy.size());
+ }
+ size_t i(0);
+ for (const IV & iv(this->_indexes); (i < iv.size()) && (iv[i] < count); i++) {
+ if (i != iv[i]) {
+ _copy[i] = _copy[iv[i]];
+ }
+ }
+ values = reinterpret_cast<const AT *>(&_copy[0]);
+ return i;
+}
+
+}
+
+}
+
+
+DotProductBlueprint::DotProductBlueprint() :
+ Blueprint("dotProduct"),
+ _defaultAttribute(),
+ _queryVector()
+{
+}
+
+vespalib::string
+DotProductBlueprint::getAttribute(const IQueryEnvironment & env) const
+{
+ Property prop = env.getProperties().lookup(getBaseName(), _defaultAttribute + ".override.name");
+ if (prop.found() && !prop.get().empty()) {
+ return prop.get();
+ }
+ return _defaultAttribute;
+}
+
+void
+DotProductBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const
+{
+}
+
+bool
+DotProductBlueprint::setup(const IIndexEnvironment & env, const ParameterList & params)
+{
+ _defaultAttribute = params[0].getValue();
+ _queryVector = params[1].getValue();
+ describeOutput("scalar", "The result after calculating the dot product of the vector represented by the weighted set "
+ "and the vector sent down with the query");
+ env.hintAttributeAccess(_defaultAttribute);
+ return true;
+}
+
+Blueprint::UP
+DotProductBlueprint::createInstance() const
+{
+ return Blueprint::UP(new DotProductBlueprint());
+}
+
+namespace {
+
+template <typename T>
+void
+parseVectors(const Property & prop, std::vector<T> & values, std::vector<uint32_t> & indexes)
+{
+ typedef std::vector<ArrayParser::ValueAndIndex<T>> SparseV;
+ SparseV sparse;
+ ArrayParser::parsePartial(prop.get(), sparse);
+ if ( ! sparse.empty()) {
+ std::sort(sparse.begin(), sparse.end());
+ if ((sparse.back().getIndex()+1)/sparse.size() < 10) {
+ values.resize(sparse.back().getIndex()+1);
+ for(const typename SparseV::value_type & a : sparse) {
+ values[a.getIndex()] = a.getValue();
+ }
+ } else {
+ values.reserve(sparse.size());
+ indexes.reserve(sparse.size());
+ for(const typename SparseV::value_type & a : sparse) {
+ values.push_back(a.getValue());
+ indexes.push_back(a.getIndex());
+ }
+ }
+ }
+}
+
+template <typename A>
+FeatureExecutor::LP
+create(const IAttributeVector * attribute, const Property & prop)
+{
+ std::vector<typename A::BaseType> values;
+ std::vector<uint32_t> indexes;
+ parseVectors(prop, values, indexes);
+ if (values.empty()) {
+ return FeatureExecutor::LP(new SingleZeroValueExecutor());
+ }
+ const A & iattr = dynamic_cast<const A &>(*attribute);
+ if (indexes.empty()) {
+ try {
+ const multivalue::Value<typename A::BaseType> * tmp;
+ iattr.getRawValues(0, tmp);
+ return FeatureExecutor::LP(new dotproduct::array::DotProductExecutor<A>(&iattr, values));
+ } catch (const std::runtime_error & e) {
+ (void) e;
+ return FeatureExecutor::LP(new dotproduct::array::DotProductByCopyExecutor<A>(&iattr, values));
+ }
+ } else {
+ try {
+ const multivalue::Value<typename A::BaseType> * tmp;
+ iattr.getRawValues(0, tmp);
+ return FeatureExecutor::LP(new dotproduct::array::SparseDotProductExecutor<A>(&iattr, values, indexes));
+ } catch (const std::runtime_error & e) {
+ (void) e;
+ return FeatureExecutor::LP(new dotproduct::array::SparseDotProductByCopyExecutor<A>(&iattr, values, indexes));
+ }
+ }
+ return FeatureExecutor::LP(new SingleZeroValueExecutor());
+}
+
+template <typename T>
+struct ArrayParam : public fef::Anything
+{
+ ArrayParam(const Property & prop) {
+ parseVectors(prop, values, indexes);
+ }
+ std::vector<T> values;
+ std::vector<uint32_t> indexes;
+};
+
+template <typename A>
+FeatureExecutor::LP
+create(const IAttributeVector * attribute, const ArrayParam<typename A::BaseType> & arguments)
+{
+ if (arguments.values.empty()) {
+ return FeatureExecutor::LP(new SingleZeroValueExecutor());
+ }
+ const A & iattr = dynamic_cast<const A &>(*attribute);
+ if (arguments.indexes.empty()) {
+ try {
+ const multivalue::Value<typename A::BaseType> * tmp;
+ iattr.getRawValues(0, tmp);
+ return FeatureExecutor::LP(new dotproduct::array::DotProductExecutor<A>(&iattr, arguments.values));
+ } catch (const std::runtime_error & e) {
+ (void) e;
+ return FeatureExecutor::LP(new dotproduct::array::DotProductByCopyExecutor<A>(&iattr, arguments.values));
+ }
+ } else {
+ try {
+ const multivalue::Value<typename A::BaseType> * tmp;
+ iattr.getRawValues(0, tmp);
+ return FeatureExecutor::LP(new dotproduct::array::SparseDotProductExecutor<A>(&iattr, arguments.values, arguments.indexes));
+ } catch (const std::runtime_error & e) {
+ (void) e;
+ return FeatureExecutor::LP(new dotproduct::array::SparseDotProductByCopyExecutor<A>(&iattr, arguments.values, arguments.indexes));
+ }
+ }
+ return FeatureExecutor::LP(new SingleZeroValueExecutor());
+}
+
+//const char * BINARY = "binary";
+const char * OBJECT = "object";
+
+
+FeatureExecutor::LP
+createFromObject(const IAttributeVector * attribute, const fef::Anything & object)
+{
+ if (attribute->getCollectionType() == attribute::CollectionType::ARRAY) {
+ switch (attribute->getBasicType()) {
+ case BasicType::INT32:
+ return create<IntegerAttributeTemplate<int32_t>>(attribute, dynamic_cast<const ArrayParam<int32_t> &>(object));
+ case BasicType::INT64:
+ return create<IntegerAttributeTemplate<int64_t>>(attribute, dynamic_cast<const ArrayParam<int64_t> &>(object));
+ case BasicType::FLOAT:
+ return create<FloatingPointAttributeTemplate<float>>(attribute, dynamic_cast<const ArrayParam<float> &>(object));
+ case BasicType::DOUBLE:
+ return create<FloatingPointAttributeTemplate<double>>(attribute, dynamic_cast<const ArrayParam<double> &>(object));
+ default:
+ break;
+ }
+ }
+ // TODO: Add support for creating executor for weighted set string / integer attribute
+ // where the query vector is represented as an object instead of a string.
+ LOG(warning, "The attribute vector '%s' is NOT of type array<int/long/float/double>"
+ ", returning executor with default value.", attribute->getName().c_str());
+ return FeatureExecutor::LP(new SingleZeroValueExecutor());
+}
+
+FeatureExecutor::LP
+createFromString(const IAttributeVector * attribute, const Property & prop)
+{
+ if (attribute->getCollectionType() == attribute::CollectionType::WSET) {
+ if (attribute->isStringType()) {
+ if (attribute->hasEnum()) {
+ dotproduct::wset::EnumVector vector(attribute);
+ WeightedSetParser::parse(prop.get(), vector);
+ return FeatureExecutor::LP
+ (new dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent>(attribute, vector));
+ } else {
+ dotproduct::wset::StringVector vector;
+ WeightedSetParser::parse(prop.get(), vector);
+ return FeatureExecutor::LP
+ (new dotproduct::wset::DotProductExecutor<dotproduct::wset::StringVector, WeightedConstCharContent>(attribute, vector));
+ }
+ } else if (attribute->isIntegerType()) {
+ if (attribute->hasEnum()) {
+ dotproduct::wset::EnumVector vector(attribute);
+ WeightedSetParser::parse(prop.get(), vector);
+ return FeatureExecutor::LP
+ (new dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent>(attribute, vector));
+
+ } else {
+ dotproduct::wset::IntegerVector vector;
+ WeightedSetParser::parse(prop.get(), vector);
+ return FeatureExecutor::LP
+ (new dotproduct::wset::DotProductExecutor<dotproduct::wset::IntegerVector, WeightedIntegerContent>(attribute, vector));
+ }
+ }
+ } else if (attribute->getCollectionType() == attribute::CollectionType::ARRAY) {
+ switch (attribute->getBasicType()) {
+ case BasicType::INT32:
+ return create<IntegerAttributeTemplate<int32_t>>(attribute, prop);
+ case BasicType::INT64:
+ return create<IntegerAttributeTemplate<int64_t>>(attribute, prop);
+ case BasicType::FLOAT:
+ return create<FloatingPointAttributeTemplate<float>>(attribute, prop);
+ case BasicType::DOUBLE:
+ return create<FloatingPointAttributeTemplate<double>>(attribute, prop);
+ default:
+ break;
+ }
+ }
+ LOG(warning, "The attribute vector '%s' is not of type weighted set string/integer nor"
+ " array<int/long/float/double>, returning executor with default value.", attribute->getName().c_str());
+ return FeatureExecutor::LP(new SingleZeroValueExecutor());
+}
+
+}
+
+void
+DotProductBlueprint::prepareSharedState(const IQueryEnvironment & env, IObjectStore & store) const
+{
+ const IAttributeVector * attribute = env.getAttributeContext().getAttribute(getAttribute(env));
+ if (attribute != NULL) {
+ if ((attribute->getCollectionType() == attribute::CollectionType::WSET) &&
+ attribute->hasEnum() &&
+ (attribute->isStringType() || attribute->isIntegerType()))
+ {
+ attribute = env.getAttributeContext().getAttributeStableEnum(getAttribute(env));
+ }
+ Property prop = env.getProperties().lookup(getBaseName(), _queryVector);
+ if (prop.found() && !prop.get().empty()) {
+ fef::Anything::UP arguments;
+ if (attribute->getCollectionType() == attribute::CollectionType::WSET) {
+ if (attribute->isStringType() && attribute->hasEnum()) {
+ dotproduct::wset::EnumVector vector(attribute);
+ WeightedSetParser::parse(prop.get(), vector);
+ } else if (attribute->isIntegerType()) {
+ if (attribute->hasEnum()) {
+ dotproduct::wset::EnumVector vector(attribute);
+ WeightedSetParser::parse(prop.get(), vector);
+ } else {
+ dotproduct::wset::IntegerVector vector;
+ WeightedSetParser::parse(prop.get(), vector);
+ }
+ }
+ } else if (attribute->getCollectionType() == attribute::CollectionType::ARRAY) {
+ switch (attribute->getBasicType()) {
+ case BasicType::INT32:
+ arguments.reset(new ArrayParam<int32_t>(prop));
+ break;
+ case BasicType::INT64:
+ arguments.reset(new ArrayParam<int64_t>(prop));
+ break;
+ case BasicType::FLOAT:
+ arguments.reset(new ArrayParam<float>(prop));
+ break;
+ case BasicType::DOUBLE:
+ arguments.reset(new ArrayParam<double>(prop));
+ break;
+ default:
+ break;
+ }
+ }
+ if ( arguments.get()) {
+ store.add(getBaseName() + "." + _queryVector + "." + OBJECT, std::move(arguments));
+ }
+ }
+ }
+}
+
+FeatureExecutor::LP
+DotProductBlueprint::createExecutor(const IQueryEnvironment & env) const
+{
+ const IAttributeVector * attribute = env.getAttributeContext().getAttribute(getAttribute(env));
+ if (attribute == NULL) {
+ LOG(warning, "The attribute vector '%s' was not found in the attribute manager, returning executor with default value.",
+ getAttribute(env).c_str());
+ return FeatureExecutor::LP(new SingleZeroValueExecutor());
+ }
+ if ((attribute->getCollectionType() == attribute::CollectionType::WSET) &&
+ attribute->hasEnum() &&
+ (attribute->isStringType() || attribute->isIntegerType()))
+ {
+ attribute = env.getAttributeContext().getAttributeStableEnum(getAttribute(env));
+ }
+ const fef::Anything * argument = env.getObjectStore().get(getBaseName() + "." + _queryVector + "." + OBJECT);
+ if (argument != NULL) {
+ return createFromObject(attribute, *argument);
+ } else {
+ Property prop = env.getProperties().lookup(getBaseName(), _queryVector);
+ if (prop.found() && !prop.get().empty()) {
+ return createFromString(attribute, prop);
+ }
+ }
+ return FeatureExecutor::LP(new SingleZeroValueExecutor());
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.h b/searchlib/src/vespa/searchlib/features/dotproductfeature.h
new file mode 100644
index 00000000000..6142914ae32
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.h
@@ -0,0 +1,217 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/attribute/multivalue.h>
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
+#include "utils.h"
+
+
+namespace search {
+namespace features {
+
+namespace dotproduct {
+
+struct ConstCharComparator {
+ bool operator()(const char * lhs, const char * rhs) const {
+ return strcmp(lhs, rhs) == 0;
+ }
+};
+
+template <typename Src, typename Dst>
+struct Converter {
+ Dst convert(const Src & value) const { return value; }
+};
+
+template <>
+struct Converter<vespalib::string, const char *> {
+ const char * convert(const vespalib::string & value) const { return value.c_str(); }
+};
+
+namespace wset {
+
+template <typename DimensionVType, typename DimensionHType, typename ComponentType, typename HashMapComparator = std::equal_to<DimensionHType> >
+class VectorBase {
+public:
+ typedef std::pair<DimensionVType, ComponentType> Element; // <dimension, component>
+ typedef std::vector<Element> Vector;
+ typedef vespalib::hash_map<DimensionHType, ComponentType, vespalib::hash<DimensionHType>, HashMapComparator> HashMap;
+protected:
+ Vector _vector;
+ HashMap _dimMap; // dimension -> component
+public:
+ const Vector & getVector() const { return _vector; }
+ void syncMap() {
+ Converter<DimensionVType, DimensionHType> conv;
+ _dimMap.clear();
+ _dimMap.resize(_vector.size()*2);
+ for (size_t i = 0; i < _vector.size(); ++i) {
+ _dimMap.insert(std::make_pair(conv.convert(_vector[i].first), _vector[i].second));
+ }
+ }
+ const HashMap & getDimMap() const { return _dimMap; }
+};
+
+/**
+ * Represents a vector where the dimensions are integers.
+ **/
+class IntegerVector : public VectorBase<int64_t, int64_t, feature_t> {
+public:
+ void insert(const vespalib::stringref & label, const vespalib::stringref & value) {
+ _vector.push_back(std::make_pair(util::strToNum<int64_t>(label), util::strToNum<feature_t>(value)));
+ }
+};
+
+/**
+ * Represents a vector where the dimensions are string values.
+ **/
+class StringVector : public VectorBase<vespalib::string, const char *, feature_t, ConstCharComparator> {
+public:
+ void insert(const vespalib::stringref & label, const vespalib::stringref & value) {
+ _vector.push_back(std::make_pair(label, util::strToNum<feature_t>(value)));
+ }
+};
+
+/**
+ * Represents a vector where the dimensions are enum values for strings.
+ **/
+class EnumVector : public VectorBase<search::attribute::EnumHandle, search::attribute::EnumHandle, feature_t> {
+private:
+ const search::attribute::IAttributeVector * _attribute;
+public:
+ EnumVector(const search::attribute::IAttributeVector * attribute) : _attribute(attribute) {}
+ void insert(const vespalib::stringref & label, const vespalib::stringref & value) {
+ search::attribute::EnumHandle e;
+ if (_attribute->findEnum(label.c_str(), e)) {
+ _vector.push_back(std::make_pair(e, util::strToNum<feature_t>(value)));
+ }
+ }
+};
+
+
+/**
+ * Implements the executor for the dotproduct feature.
+ */
+template <typename Vector, typename Buffer>
+class DotProductExecutor : public fef::FeatureExecutor {
+private:
+ const search::attribute::IAttributeVector * _attribute;
+ Vector _vector;
+ Buffer _buffer;
+
+public:
+ DotProductExecutor(const search::attribute::IAttributeVector * attribute, const Vector & vector);
+ virtual void execute(fef::MatchData & data);
+};
+
+}
+
+namespace array {
+
+/**
+ * Implements the executor for the dotproduct feature.
+ */
+template <typename A>
+class DotProductExecutor : public fef::FeatureExecutor {
+public:
+ typedef multivalue::Value<typename A::BaseType> AT;
+ typedef std::vector<typename A::BaseType> V;
+protected:
+ const A * _attribute;
+private:
+ vespalib::hwaccelrated::IAccelrated::UP _multiplier;
+ V _vector;
+ virtual size_t getAttributeValues(uint32_t docid, const AT * & count);
+public:
+ DotProductExecutor(const A * attribute, const V & vector);
+ virtual void execute(fef::MatchData & data);
+};
+
+template <typename A>
+class DotProductByCopyExecutor : public DotProductExecutor<A> {
+public:
+ typedef typename DotProductExecutor<A>::V V;
+ DotProductByCopyExecutor(const A * attribute, const V & vector);
+private:
+ typedef typename DotProductExecutor<A>::AT AT;
+ virtual size_t getAttributeValues(uint32_t docid, const AT * & count);
+ std::vector<typename A::BaseType> _copy;
+};
+
+template <typename A>
+class SparseDotProductExecutor : public DotProductExecutor<A> {
+public:
+ typedef std::vector<uint32_t> IV;
+ typedef typename DotProductExecutor<A>::V V;
+ SparseDotProductExecutor(const A * attribute, const V & vector, const IV & indexes);
+private:
+ typedef typename DotProductExecutor<A>::AT AT;
+ virtual size_t getAttributeValues(uint32_t docid, const AT * & count);
+protected:
+ IV _indexes;
+ std::vector<AT> _scratch;
+};
+
+template <typename A>
+class SparseDotProductByCopyExecutor : public SparseDotProductExecutor<A> {
+public:
+ typedef std::vector<uint32_t> IV;
+ typedef typename DotProductExecutor<A>::V V;
+ SparseDotProductByCopyExecutor(const A * attribute, const V & vector, const IV & indexes);
+private:
+ typedef typename DotProductExecutor<A>::AT AT;
+ virtual size_t getAttributeValues(uint32_t docid, const AT * & count);
+ std::vector<typename A::BaseType> _copy;
+};
+
+}
+
+}
+
+
+/**
+ * Implements the blueprint for the foreach executor.
+ */
+class DotProductBlueprint : public fef::Blueprint {
+private:
+ vespalib::string _defaultAttribute;
+ vespalib::string _queryVector;
+
+ vespalib::string getAttribute(const fef::IQueryEnvironment & env) const;
+
+public:
+ /**
+ * Constructs a blueprint.
+ */
+ DotProductBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const fef::IIndexEnvironment & env,
+ fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual fef::ParameterDescriptions getDescriptions() const {
+ return fef::ParameterDescriptions().desc().attribute(fef::ParameterCollection::ANY).string();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const fef::IIndexEnvironment & env,
+ const fef::ParameterList & params);
+
+ virtual void prepareSharedState(const fef::IQueryEnvironment & queryEnv, fef::IObjectStore & objectStore) const;
+
+ // Inherit doc from Blueprint.
+ virtual fef::FeatureExecutor::LP createExecutor(const fef::IQueryEnvironment & env) const;
+
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/element_completeness_feature.cpp b/searchlib/src/vespa/searchlib/features/element_completeness_feature.cpp
new file mode 100644
index 00000000000..9b5945432e1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/element_completeness_feature.cpp
@@ -0,0 +1,143 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.elementcompleteness");
+#include "element_completeness_feature.h"
+
+namespace search {
+namespace features {
+
+//-----------------------------------------------------------------------------
+
+ElementCompletenessExecutor::ElementCompletenessExecutor(const search::fef::IQueryEnvironment &env,
+ const ElementCompletenessParams &params)
+ : _params(params),
+ _terms(),
+ _queue(),
+ _sumTermWeight(0)
+{
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ const search::fef::ITermData *termData = env.getTerm(i);
+ if (termData->getWeight().percent() != 0) { // only consider query terms with contribution
+ typedef search::fef::ITermFieldRangeAdapter FRA;
+ for (FRA iter(*termData); iter.valid(); iter.next()) {
+ const search::fef::ITermFieldData &tfd = iter.get();
+ if (tfd.getFieldId() == _params.fieldId) {
+ int termWeight = termData->getWeight().percent();
+ _sumTermWeight += termWeight;
+ _terms.push_back(Term(tfd.getHandle(), termWeight));
+ }
+ }
+ }
+ }
+}
+
+void
+ElementCompletenessExecutor::execute(search::fef::MatchData &data)
+{
+ assert(_queue.empty());
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ search::fef::TermFieldMatchData *tfmd = data.resolveTermField(_terms[i].termHandle);
+ if (tfmd->getDocId() == data.getDocId()) {
+ Item item(i, tfmd->begin(), tfmd->end());
+ if (item.pos != item.end) {
+ _queue.push(item);
+ }
+ }
+ }
+ State best(0, 0);
+ while (!_queue.empty()) {
+ uint32_t elementId = _queue.front().pos->getElementId();
+ State state(_queue.front().pos->getElementWeight(),
+ _queue.front().pos->getElementLen());
+ while (!_queue.empty() && _queue.front().pos->getElementId() == elementId) {
+ state.addMatch(_terms[_queue.front().termIdx].termWeight);
+ Item &item = _queue.front();
+ while (item.pos != item.end && item.pos->getElementId() == elementId) {
+ ++item.pos;
+ }
+ if (item.pos == item.end) {
+ _queue.pop_front();
+ } else {
+ _queue.adjust();
+ }
+ }
+ state.calculateScore(_sumTermWeight, _params.fieldCompletenessImportance);
+ if (state.score > best.score) {
+ best = state;
+ }
+ }
+ *data.resolveFeature(outputs()[0]) = best.completeness;
+ *data.resolveFeature(outputs()[1]) = best.fieldCompleteness;
+ *data.resolveFeature(outputs()[2]) = best.queryCompleteness;
+ *data.resolveFeature(outputs()[3]) = best.elementWeight;
+}
+
+//-----------------------------------------------------------------------------
+
+ElementCompletenessBlueprint::ElementCompletenessBlueprint()
+ : Blueprint("elementCompleteness"),
+ _output(),
+ _params()
+{
+ _output.push_back("completeness");
+ _output.push_back("fieldCompleteness");
+ _output.push_back("queryCompleteness");
+ _output.push_back("elementWeight");
+}
+
+void
+ElementCompletenessBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const
+{
+ for (uint32_t i = 0; i < env.getNumFields(); ++i) {
+ const search::fef::FieldInfo &field = *env.getField(i);
+ if (field.type() == search::fef::FieldType::INDEX) {
+ if (!field.isFilter()) {
+ search::fef::FeatureNameBuilder fnb;
+ fnb.baseName(getBaseName()).parameter(field.name());
+ for (size_t out = 0; out < _output.size(); ++out) {
+ visitor.visitDumpFeature(fnb.output(_output[out]).buildName());
+ }
+ }
+ }
+ }
+}
+
+search::fef::Blueprint::UP
+ElementCompletenessBlueprint::createInstance() const
+{
+ return Blueprint::UP(new ElementCompletenessBlueprint());
+}
+
+bool
+ElementCompletenessBlueprint::setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params)
+{
+ const search::fef::FieldInfo *field = params[0].asField();
+
+ _params.fieldId = field->id();
+ const search::fef::Properties &lst = env.getProperties();
+ search::fef::Property obj = lst.lookup(getName(), "fieldCompletenessImportance");
+ if (obj.found()) {
+ _params.fieldCompletenessImportance = atof(obj.get().c_str());
+ }
+ describeOutput(_output[0], "combined completeness for best scored element");
+ describeOutput(_output[1], "best scored element completeness");
+ describeOutput(_output[2], "query completeness for best scored element");
+ describeOutput(_output[3], "element weight of best scored element");
+ env.hintFieldAccess(field->id());
+ return true;
+}
+
+search::fef::FeatureExecutor::LP
+ElementCompletenessBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ return search::fef::FeatureExecutor::LP(new ElementCompletenessExecutor(env, _params));
+}
+
+//-----------------------------------------------------------------------------
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/element_completeness_feature.h b/searchlib/src/vespa/searchlib/features/element_completeness_feature.h
new file mode 100644
index 00000000000..b092fcd8fa1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/element_completeness_feature.h
@@ -0,0 +1,131 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/vespalib/util/priority_queue.h>
+
+namespace search {
+namespace features {
+
+//-----------------------------------------------------------------------------
+
+struct ElementCompletenessParams {
+ uint32_t fieldId;
+ feature_t fieldCompletenessImportance;
+ ElementCompletenessParams()
+ : fieldId(search::fef::IllegalFieldId),
+ fieldCompletenessImportance(0.5) {}
+};
+
+//-----------------------------------------------------------------------------
+
+class ElementCompletenessExecutor : public search::fef::FeatureExecutor
+{
+private:
+ struct Term {
+ search::fef::TermFieldHandle termHandle;
+ int termWeight;
+ Term(search::fef::TermFieldHandle handle, int weight)
+ : termHandle(handle), termWeight(weight) {}
+ };
+
+ struct Item {
+ uint32_t termIdx;
+ search::fef::TermFieldMatchData::PositionsIterator pos;
+ search::fef::TermFieldMatchData::PositionsIterator end;
+ Item(uint32_t idx,
+ search::fef::TermFieldMatchData::PositionsIterator p,
+ search::fef::TermFieldMatchData::PositionsIterator e)
+ : termIdx(idx), pos(p), end(e) {}
+ bool operator<(const Item &other) const {
+ return (pos->getElementId() < other.pos->getElementId());
+ }
+ };
+
+ struct State {
+ int elementWeight;
+ uint32_t elementLength;
+ uint32_t matchedTerms;
+ int sumTermWeight;
+ double score;
+ feature_t completeness;
+ feature_t fieldCompleteness;
+ feature_t queryCompleteness;
+
+ State(int weight, uint32_t length)
+ : elementWeight(weight), elementLength(length),
+ matchedTerms(0), sumTermWeight(0),
+ score(0.0),
+ completeness(0.0), fieldCompleteness(0.0), queryCompleteness(0.0) {}
+
+ void addMatch(int termWeight) {
+ ++matchedTerms;
+ sumTermWeight += termWeight;
+ }
+
+ void calculateScore(int totalTermWeight, double factor) {
+ double matches = std::min(elementLength, matchedTerms);
+ queryCompleteness = ((double)sumTermWeight / (double)totalTermWeight);
+ fieldCompleteness = (matches / (double)elementLength);
+ completeness = (fieldCompleteness * factor) +
+ (queryCompleteness * (1 - factor));
+ score = completeness * (double)elementWeight;
+ }
+ };
+
+ ElementCompletenessParams _params;
+ std::vector<Term> _terms;
+ vespalib::PriorityQueue<Item> _queue;
+ int _sumTermWeight;
+
+ static bool nextElement(Item &item);
+
+public:
+ ElementCompletenessExecutor(const search::fef::IQueryEnvironment &env,
+ const ElementCompletenessParams &params);
+ virtual bool isPure() { return _terms.empty(); }
+ virtual void execute(search::fef::MatchData & data);
+};
+
+//-----------------------------------------------------------------------------
+
+class ElementCompletenessBlueprint : public search::fef::Blueprint
+{
+private:
+ std::vector<vespalib::string> _output;
+ ElementCompletenessParams _params;
+
+public:
+ ElementCompletenessBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::ANY);
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+
+ // for testing
+ const ElementCompletenessParams &getParams() const { return _params; }
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/element_similarity_feature.cpp b/searchlib/src/vespa/searchlib/features/element_similarity_feature.cpp
new file mode 100644
index 00000000000..a0b294d390e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/element_similarity_feature.cpp
@@ -0,0 +1,417 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.elementsimilarity");
+#include "element_similarity_feature.h"
+#include <vespa/vespalib/eval/compiled_function.h>
+#include <vespa/vespalib/eval/compile_cache.h>
+
+namespace search {
+namespace features {
+
+namespace {
+
+//-----------------------------------------------------------------------------
+
+struct Aggregator {
+ typedef std::unique_ptr<Aggregator> UP;
+ virtual UP create() const = 0;
+ virtual void clear() = 0;
+ virtual void add(double) = 0;
+ virtual double get() const = 0;
+ virtual ~Aggregator() {}
+};
+
+struct MaxAggregator : Aggregator {
+ size_t count;
+ double value;
+ MaxAggregator() : count(0), value(0.0) {}
+ virtual UP create() const override { return UP(new MaxAggregator()); }
+ virtual void clear() override { count = 0; value = 0.0; }
+ virtual void add(double v) override { value = ((++count == 1) || (v > value)) ? v : value; }
+ virtual double get() const override { return value; }
+};
+
+struct AvgAggregator : Aggregator {
+ size_t count;
+ double value;
+ AvgAggregator() : count(0), value(0.0) {}
+ virtual UP create() const override { return UP(new AvgAggregator()); }
+ virtual void clear() override { count = 0; value = 0.0; }
+ virtual void add(double v) override { ++count; value += v; }
+ virtual double get() const override { return (count == 0) ? 0.0 : (value/count); }
+};
+
+struct SumAggregator : Aggregator {
+ double value;
+ SumAggregator() : value(0.0) {}
+ virtual UP create() const override { return UP(new SumAggregator()); }
+ virtual void clear() override { value = 0.0; }
+ virtual void add(double v) override { value += v; }
+ virtual double get() const override { return value; }
+};
+
+Aggregator::UP create_aggregator(const vespalib::string &name) {
+ if (name == "max") {
+ return Aggregator::UP(new MaxAggregator());
+ }
+ if (name == "avg") {
+ return Aggregator::UP(new AvgAggregator());
+ }
+ if (name == "sum") {
+ return Aggregator::UP(new SumAggregator());
+ }
+ return Aggregator::UP(nullptr);
+}
+
+//-----------------------------------------------------------------------------
+
+typedef double (*function_5)(double, double, double, double, double);
+typedef std::pair<function_5, Aggregator::UP> OutputSpec;
+
+//-----------------------------------------------------------------------------
+
+struct VectorizedQueryTerms {
+ struct Term {
+ fef::TermFieldHandle handle;
+ int weight;
+ int index;
+ Term(fef::TermFieldHandle handle_in, int weight_in, int index_in)
+ : handle(handle_in), weight(weight_in), index(index_in) {}
+ };
+
+ std::vector<fef::TermFieldHandle> handles;
+ std::vector<int> weights;
+ int total_weight;
+
+ VectorizedQueryTerms(const VectorizedQueryTerms &) = delete;
+ VectorizedQueryTerms(VectorizedQueryTerms &&rhs)
+ : handles(std::move(rhs.handles)), weights(std::move(rhs.weights)), total_weight(rhs.total_weight) {}
+ VectorizedQueryTerms(const fef::IQueryEnvironment &env, uint32_t field_id)
+ : handles(), weights(), total_weight(0)
+ {
+ std::vector<Term> terms;
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ const fef::ITermData *termData = env.getTerm(i);
+ if (termData->getWeight().percent() != 0) { // only consider query terms with contribution
+ typedef fef::ITermFieldRangeAdapter FRA;
+ for (FRA iter(*termData); iter.valid(); iter.next()) {
+ const fef::ITermFieldData &tfd = iter.get();
+ if (tfd.getFieldId() == field_id) {
+ int term_weight = termData->getWeight().percent();
+ total_weight += term_weight;
+ terms.push_back(Term(tfd.getHandle(), term_weight,
+ termData->getTermIndex()));
+ }
+ }
+ }
+ }
+ std::sort(terms.begin(), terms.end(), [](const Term &a, const Term &b){ return (a.index < b.index); });
+ handles.reserve(terms.size());
+ weights.reserve(terms.size());
+ for (size_t i = 0; i < terms.size(); ++i) {
+ handles.push_back(terms[i].handle);
+ weights.push_back(terms[i].weight);
+ }
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+struct State {
+ uint32_t element_length;
+ uint32_t matched_terms;
+ int sum_term_weight;
+ uint32_t last_pos;
+ double sum_proximity_score;
+ uint32_t last_idx;
+ uint32_t num_in_order;
+
+ double proximity;
+ double order;
+ double query_coverage;
+ double field_coverage;
+ double element_weight;
+
+ State(uint32_t element_length_in, int32_t element_weight_in,
+ uint32_t first_pos, int32_t first_weight, uint32_t first_idx)
+ : element_length(element_length_in),
+ matched_terms(1), sum_term_weight(first_weight),
+ last_pos(first_pos), sum_proximity_score(0.0),
+ last_idx(first_idx), num_in_order(0),
+ proximity(0.0), order(0.0),
+ query_coverage(0.0), field_coverage(0.0),
+ element_weight(element_weight_in) {}
+
+ double proximity_score(uint32_t dist) {
+ return (dist > 8) ? 0 : (1.0 - (((dist-1)/8.0) * ((dist-1)/8.0)));
+ }
+
+ bool want_match(uint32_t pos) {
+ return (pos > last_pos);
+ }
+
+ void addMatch(uint32_t pos, int32_t weight, uint32_t idx) {
+ sum_proximity_score += proximity_score(pos - last_pos);
+ num_in_order += (idx > last_idx) ? 1 : 0;
+ last_pos = pos;
+ last_idx = idx;
+ ++matched_terms;
+ sum_term_weight += weight;
+ }
+
+ void calculate_scores(size_t num_query_terms, int total_term_weight) {
+ double matches = std::min(element_length, matched_terms);
+ if (matches < 2) {
+ proximity = proximity_score(element_length);
+ order = (num_query_terms == 1) ? 1.0 : 0.0;
+ } else {
+ proximity = sum_proximity_score / (matches - 1);
+ order = num_in_order / (double) (matches - 1);
+ }
+ query_coverage = sum_term_weight / (double) total_term_weight;
+ field_coverage = matches / (double) element_length;
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class ElementSimilarityExecutor : public fef::FeatureExecutor
+{
+private:
+ typedef fef::TermFieldMatchData::PositionsIterator ITR;
+
+ struct CmpPosition {
+ ITR *pos;
+ CmpPosition(ITR *pos_in) : pos(pos_in) {}
+ bool operator()(uint16_t a, uint16_t b) {
+ return (pos[a]->getPosition() == pos[b]->getPosition())
+ ? (a < b)
+ : (pos[a]->getPosition() < pos[b]->getPosition());
+ }
+ };
+
+ struct CmpElement {
+ ITR *pos;
+ CmpElement(ITR *pos_in) : pos(pos_in) {}
+ bool operator()(uint16_t a, uint16_t b) {
+ return pos[a]->getElementId() < pos[b]->getElementId();
+ }
+ };
+
+ typedef vespalib::PriorityQueue<uint16_t, CmpPosition> PositionQueue;
+ typedef vespalib::PriorityQueue<uint16_t, CmpElement> ElementQueue;
+
+ VectorizedQueryTerms _terms;
+ std::vector<ITR> _pos;
+ std::vector<ITR> _end;
+ PositionQueue _position_queue;
+ ElementQueue _element_queue;
+ std::vector<OutputSpec> _outputs;
+
+public:
+ ElementSimilarityExecutor(VectorizedQueryTerms &&terms, std::vector<OutputSpec> &&outputs_in)
+ : _terms(std::move(terms)),
+ _pos(_terms.handles.size(), nullptr),
+ _end(_terms.handles.size(), nullptr),
+ _position_queue(CmpPosition(&_pos[0])),
+ _element_queue(CmpElement(&_pos[0])),
+ _outputs(std::move(outputs_in)) {}
+
+ virtual bool isPure() { return _terms.handles.empty(); }
+
+ void requeue_term(uint16_t term, uint32_t element) {
+ while (_pos[term] != _end[term] &&
+ _pos[term]->getElementId() == element)
+ {
+ ++_pos[term];
+ }
+ if (_pos[term] != _end[term]) {
+ _element_queue.push(term);
+ }
+ }
+
+ virtual void execute(fef::MatchData &data) {
+ for (auto &output: _outputs) {
+ output.second->clear();
+ }
+ for (size_t i = 0; i < _terms.handles.size(); ++i) {
+ fef::TermFieldMatchData *tfmd = data.resolveTermField(_terms.handles[i]);
+ if (tfmd->getDocId() == data.getDocId()) {
+ _pos[i] = tfmd->begin();
+ _end[i] = tfmd->end();
+ if (_pos[i] != _end[i]) {
+ _element_queue.push(i);
+ }
+ }
+ }
+ while (!_element_queue.empty()) {
+ uint32_t elementId = _pos[_element_queue.front()]->getElementId();
+ while (!_element_queue.empty() && _pos[_element_queue.front()]->getElementId() == elementId) {
+ _position_queue.push(_element_queue.front());
+ _element_queue.pop_front();
+ }
+ uint16_t first = _position_queue.front();
+ State state(_pos[first]->getElementLen(),
+ _pos[first]->getElementWeight(),
+ _pos[first]->getPosition(),
+ _terms.weights[first],
+ first);
+ requeue_term(_position_queue.front(), elementId);
+ _position_queue.pop_front();
+ while (!_position_queue.empty()) {
+ uint16_t item = _position_queue.front();
+ if (state.want_match(_pos[item]->getPosition())) {
+ state.addMatch(_pos[item]->getPosition(),
+ _terms.weights[item],
+ item);
+ requeue_term(_position_queue.front(), elementId);
+ _position_queue.pop_front();
+ } else {
+ ++_pos[item];
+ if (_pos[item] == _end[item]) {
+ _position_queue.pop_front();
+ } else {
+ _position_queue.adjust();
+ }
+ }
+ }
+ state.calculate_scores(_terms.handles.size(), _terms.total_weight);
+ for (auto &output: _outputs) {
+ output.second->add(output.first(state.proximity, state.order,
+ state.query_coverage, state.field_coverage,
+ state.element_weight));
+ }
+ }
+ for (size_t i = 0; i < _outputs.size(); ++i) {
+ *data.resolveFeature(outputs()[i]) = _outputs[i].second->get();
+ }
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+std::vector<std::pair<vespalib::string, vespalib::string> > extract_properties(const fef::Properties &props,
+ const vespalib::string &ns, const vespalib::string &first_name, const vespalib::string &first_default)
+{
+ struct MyVisitor : fef::IPropertiesVisitor {
+ const vespalib::string &first_name;
+ std::vector<std::pair<vespalib::string, vespalib::string> > &result;
+ MyVisitor(const vespalib::string &first_name_in,
+ std::vector<std::pair<vespalib::string, vespalib::string> > &result_in)
+ : first_name(first_name_in), result(result_in) {}
+ virtual void visitProperty(const fef::Property::Value &key,
+ const fef::Property &values) override
+ {
+ if (key != first_name) {
+ result.emplace_back(key, values.get());
+ }
+ }
+ };
+ std::vector<std::pair<vespalib::string, vespalib::string> > result;
+ result.emplace_back(first_name, props.lookup(ns, first_name).get(first_default));
+ MyVisitor my_visitor(first_name, result);
+ props.visitNamespace(ns, my_visitor);
+ return result;
+}
+
+std::vector<std::pair<vespalib::string, vespalib::string> > get_outputs(const fef::Properties &props,
+ const vespalib::string &feature)
+{
+ return extract_properties(props, feature + ".output", "default", "max((0.35*p+0.15*o+0.30*q+0.20*f)*w)");
+}
+
+} // namespace features::<unnamed>
+
+//-----------------------------------------------------------------------------
+
+struct ElementSimilarityBlueprint::OutputContext {
+ vespalib::eval::CompileCache::Token::UP compile_token;
+ Aggregator::UP aggregator_factory;
+ OutputContext(const vespalib::eval::Function &function,
+ Aggregator::UP aggregator)
+ : compile_token(vespalib::eval::CompileCache::compile(function, vespalib::eval::PassParams::SEPARATE)),
+ aggregator_factory(std::move(aggregator)) {}
+};
+
+//-----------------------------------------------------------------------------
+
+ElementSimilarityBlueprint::ElementSimilarityBlueprint()
+ : Blueprint("elementSimilarity"), _field_id(fef::IllegalHandle), _outputs() {}
+
+ElementSimilarityBlueprint::~ElementSimilarityBlueprint() {}
+
+void
+ElementSimilarityBlueprint::visitDumpFeatures(const fef::IIndexEnvironment &env,
+ fef::IDumpFeatureVisitor &visitor) const
+{
+ for (uint32_t i = 0; i < env.getNumFields(); ++i) {
+ const fef::FieldInfo &field = *env.getField(i);
+ if ((field.type() == fef::FieldType::INDEX) &&
+ (field.collection() != fef::CollectionType::SINGLE) &&
+ ( ! field.isFilter()))
+ {
+ fef::FeatureNameBuilder fnb;
+ fnb.baseName(getBaseName()).parameter(field.name());
+ auto outputs = get_outputs(env.getProperties(), fnb.buildName());
+ visitor.visitDumpFeature(fnb.output("").buildName());
+ for (size_t out_idx = 1; out_idx < outputs.size(); ++out_idx) {
+ visitor.visitDumpFeature(fnb.output(outputs[out_idx].first).buildName());
+ }
+ }
+ }
+}
+
+bool
+ElementSimilarityBlueprint::setup(const fef::IIndexEnvironment &env,
+ const fef::ParameterList &params)
+{
+ const fef::FieldInfo *field = params[0].asField();
+ _field_id = field->id();
+ fef::FeatureNameBuilder fnb;
+ fnb.baseName(getBaseName()).parameter(field->name());
+ auto outputs = get_outputs(env.getProperties(), fnb.buildName());
+ for (const auto &entry: outputs) {
+ describeOutput(entry.first, entry.second);
+ vespalib::string aggr_name;
+ vespalib::string expr;
+ vespalib::string error;
+ if (!vespalib::eval::Function::unwrap(entry.second, aggr_name, expr, error)) {
+ LOG(warning, "'%s': could not extract aggregator and expression for output '%s' from config value '%s' (%s)",
+ fnb.buildName().c_str(), entry.first.c_str(), entry.second.c_str(), error.c_str());
+ return false;
+ }
+ Aggregator::UP aggr = create_aggregator(aggr_name);
+ if (aggr.get() == nullptr) {
+ LOG(warning, "'%s': unknown aggregator '%s'", fnb.buildName().c_str(), aggr_name.c_str());
+ return false;
+ }
+ std::vector<vespalib::string> args({"p","o","q","f","w"});
+ vespalib::eval::Function function = vespalib::eval::Function::parse(args, expr);
+ if (function.has_error()) {
+ LOG(warning, "'%s': per-element expression parse error: %s",
+ fnb.buildName().c_str(), function.get_error().c_str());
+ return false;
+ }
+ _outputs.push_back(OutputContext_UP(new OutputContext(function, std::move(aggr))));
+ }
+ env.hintFieldAccess(field->id());
+ return true;
+}
+
+fef::FeatureExecutor::LP
+ElementSimilarityBlueprint::createExecutor(const fef::IQueryEnvironment &env) const
+{
+ std::vector<OutputSpec> output_specs;
+ for (const auto &output: _outputs) {
+ output_specs.emplace_back(output->compile_token->get().get_function<5>(),
+ output->aggregator_factory->create());
+ }
+ return fef::FeatureExecutor::LP(new ElementSimilarityExecutor(VectorizedQueryTerms(env, _field_id), std::move(output_specs)));
+}
+
+//-----------------------------------------------------------------------------
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/element_similarity_feature.h b/searchlib/src/vespa/searchlib/features/element_similarity_feature.h
new file mode 100644
index 00000000000..e4424b29b1f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/element_similarity_feature.h
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/vespalib/util/priority_queue.h>
+
+namespace search {
+namespace features {
+
+//-----------------------------------------------------------------------------
+
+class ElementSimilarityBlueprint : public search::fef::Blueprint
+{
+private:
+ struct OutputContext;
+ typedef std::unique_ptr<OutputContext> OutputContext_UP;
+
+ uint32_t _field_id;
+ std::vector<OutputContext_UP> _outputs;
+
+public:
+ ElementSimilarityBlueprint();
+ virtual ~ElementSimilarityBlueprint();
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+ virtual search::fef::Blueprint::UP createInstance() const {
+ return Blueprint::UP(new ElementSimilarityBlueprint());
+ }
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::ANY);
+ }
+ virtual bool setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params);
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/euclidean_distance_feature.cpp b/searchlib/src/vespa/searchlib/features/euclidean_distance_feature.cpp
new file mode 100644
index 00000000000..c77f47e3d08
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/euclidean_distance_feature.cpp
@@ -0,0 +1,123 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <cmath>
+LOG_SETUP(".features.euclidean_distance_feature");
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include <vespa/searchcommon/attribute/iattributevector.h>
+#include <vespa/searchlib/fef/properties.h>
+#include "valuefeature.h"
+
+#include "euclidean_distance_feature.h"
+#include "array_parser.hpp"
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+
+using namespace search::attribute;
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+
+template <typename DataType>
+EuclideanDistanceExecutor<DataType>::EuclideanDistanceExecutor(const search::attribute::IAttributeVector &attribute, QueryVectorType vector) :
+ FeatureExecutor(),
+ _attribute(attribute),
+ _vector(std::move(vector)),
+ _attributeBuffer()
+{
+}
+
+template <typename DataType>
+feature_t EuclideanDistanceExecutor<DataType>::euclideanDistance(const BufferType &v1, const QueryVectorType &v2)
+{
+ feature_t val = 0;
+ size_t commonRange = std::min(static_cast<size_t>( v1.size() ), v2.size());
+ for (size_t i = 0; i < commonRange; ++i) {
+ feature_t diff = v1[i] - v2[i];
+ val += diff * diff;
+ }
+ return std::sqrt(val);
+}
+
+
+template <typename DataType>
+void
+EuclideanDistanceExecutor<DataType>::execute(MatchData &match)
+{
+ _attributeBuffer.fill(_attribute, match.getDocId());
+ *match.resolveFeature(outputs()[0]) = euclideanDistance(_attributeBuffer, _vector);
+}
+
+
+EuclideanDistanceBlueprint::EuclideanDistanceBlueprint() :
+ Blueprint("euclideanDistance"),
+ _attributeName(),
+ _queryVector()
+{
+}
+
+void
+EuclideanDistanceBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const
+{
+}
+
+bool
+EuclideanDistanceBlueprint::setup(const IIndexEnvironment &env, const ParameterList &params)
+{
+ _attributeName = params[0].getValue();
+ _queryVector = params[1].getValue();
+ describeOutput("distance", "The result after calculating the euclidean distance of the vector represented by the array "
+ "and the vector sent down with the query");
+ env.hintAttributeAccess(_attributeName);
+ return true;
+}
+
+Blueprint::UP
+EuclideanDistanceBlueprint::createInstance() const
+{
+ return Blueprint::UP(new EuclideanDistanceBlueprint());
+}
+
+namespace {
+
+template <typename DataType>
+FeatureExecutor::LP create(const IAttributeVector &attribute, const Property &queryVector)
+{
+ std::vector<DataType> v;
+ ArrayParser::parse(queryVector.get(), v);
+ return FeatureExecutor::LP(new EuclideanDistanceExecutor<DataType>(attribute, std::move(v)));
+}
+
+}
+
+FeatureExecutor::LP
+EuclideanDistanceBlueprint::createExecutor(const IQueryEnvironment &env) const
+{
+ const IAttributeVector * attribute = env.getAttributeContext().getAttribute(_attributeName);
+ if (attribute == NULL) {
+ LOG(warning, "The attribute vector '%s' was not found in the attribute manager, returning executor with default value.",
+ _attributeName.c_str());
+ return FeatureExecutor::LP(new SingleZeroValueExecutor());
+ }
+
+ Property queryVector = env.getProperties().lookup(getBaseName(), _queryVector);
+
+ if (attribute->getCollectionType() == attribute::CollectionType::ARRAY) {
+ if (attribute->isIntegerType()) {
+ return create<IAttributeVector::largeint_t>(*attribute, queryVector);
+ } else if (attribute->isFloatingPointType()) {
+ return create<double>(*attribute, queryVector);
+ }
+ }
+ LOG(warning, "The attribute vector '%s' is NOT of type array<int/long/float/double>"
+ ", returning executor with default value.", attribute->getName().c_str());
+ return FeatureExecutor::LP(new SingleZeroValueExecutor());
+
+}
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/euclidean_distance_feature.h b/searchlib/src/vespa/searchlib/features/euclidean_distance_feature.h
new file mode 100644
index 00000000000..23df79621ee
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/euclidean_distance_feature.h
@@ -0,0 +1,76 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchcommon/attribute/attributecontent.h>
+
+
+namespace search {
+namespace features {
+
+
+/**
+ * Implements the executor for the eucledian distance feature.
+ */
+template <typename DataType>
+class EuclideanDistanceExecutor : public fef::FeatureExecutor {
+
+public:
+ typedef search::attribute::AttributeContent<DataType> BufferType;
+ typedef std::vector<DataType> QueryVectorType;
+
+private:
+ const search::attribute::IAttributeVector &_attribute;
+ const QueryVectorType _vector;
+ BufferType _attributeBuffer;
+
+ feature_t euclideanDistance(const BufferType &v1, const QueryVectorType &v2);
+
+public:
+
+ EuclideanDistanceExecutor(const search::attribute::IAttributeVector &attribute, QueryVectorType vector);
+ virtual void execute(fef::MatchData &data) override;
+};
+
+
+/**
+ * Implements the blueprint for the euclidean distance executor.
+ */
+class EuclideanDistanceBlueprint : public fef::Blueprint {
+private:
+ vespalib::string _attributeName;
+ vespalib::string _queryVector;
+
+public:
+ /**
+ * Constructs a blueprint.
+ */
+ EuclideanDistanceBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const fef::IIndexEnvironment &env,
+ fef::IDumpFeatureVisitor &visitor) const override;
+
+ // Inherit doc from Blueprint.
+ virtual fef::Blueprint::UP createInstance() const override;
+
+ // Inherit doc from Blueprint.
+ virtual fef::ParameterDescriptions getDescriptions() const override {
+ return fef::ParameterDescriptions().desc().attribute(fef::ParameterCollection::ANY).string();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const fef::IIndexEnvironment &env,
+ const fef::ParameterList &params) override;
+
+ // Inherit doc from Blueprint.
+ virtual fef::FeatureExecutor::LP createExecutor(const fef::IQueryEnvironment &env) const override;
+
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/fieldinfofeature.cpp b/searchlib/src/vespa/searchlib/features/fieldinfofeature.cpp
new file mode 100644
index 00000000000..539dc3b0343
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldinfofeature.cpp
@@ -0,0 +1,235 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.fieldinfo");
+
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/fieldtype.h>
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/searchlib/fef/handle.h>
+#include <sstream>
+#include "fieldinfofeature.h"
+#include "valuefeature.h"
+#include "utils.h"
+
+namespace search {
+namespace features {
+
+IndexFieldInfoExecutor::IndexFieldInfoExecutor(feature_t type, feature_t isFilter,
+ uint32_t field, uint32_t fieldHandle)
+ : fef::FeatureExecutor(),
+ _type(type),
+ _isFilter(isFilter),
+ _field(field),
+ _fieldHandle(fieldHandle)
+{
+ // empty
+}
+
+void
+IndexFieldInfoExecutor::execute(fef::MatchData &data)
+{
+ *data.resolveFeature(outputs()[0]) = _type;
+ *data.resolveFeature(outputs()[1]) = _isFilter;
+ *data.resolveFeature(outputs()[2]) = 1.0f; // searched
+ fef::TermFieldMatchData *tfmd = data.resolveTermField(_fieldHandle);
+ if (tfmd->getDocId() == data.getDocId()) {
+ *data.resolveFeature(outputs()[3]) = 1.0f; // hit
+ } else {
+ *data.resolveFeature(outputs()[3]) = 0.0f; // no hit
+ }
+ fef::FieldPositionsIterator itr = tfmd->getIterator();
+ *data.resolveFeature(outputs()[4]) = itr.getFieldLength();
+ if (itr.valid()) {
+ uint32_t first = itr.getPosition();
+ uint32_t last = 0;
+ uint32_t cnt = 0;
+ for (; itr.valid(); itr.next()) {
+ last = itr.getPosition();
+ ++cnt;
+ }
+ *data.resolveFeature(outputs()[5]) = first;
+ *data.resolveFeature(outputs()[6]) = last;
+ *data.resolveFeature(outputs()[7]) = cnt;
+ } else {
+ *data.resolveFeature(outputs()[5]) = fef::FieldPositionsIterator::UNKNOWN_LENGTH; // first
+ *data.resolveFeature(outputs()[6]) = fef::FieldPositionsIterator::UNKNOWN_LENGTH; // last
+ *data.resolveFeature(outputs()[7]) = 0.0f;
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+AttrFieldInfoExecutor::AttrFieldInfoExecutor(feature_t type, uint32_t fieldHandle) :
+ FeatureExecutor(),
+ _type(type),
+ _fieldHandle(fieldHandle)
+{
+ // empty
+}
+
+void
+AttrFieldInfoExecutor::execute(fef::MatchData &data)
+{
+ *data.resolveFeature(outputs()[0]) = _type;
+ *data.resolveFeature(outputs()[1]) = 0.0; // not filter
+ *data.resolveFeature(outputs()[2]) = 1.0f; // searched
+ fef::TermFieldMatchData *tfmd = data.resolveTermField(_fieldHandle);
+ if (tfmd->getDocId() == data.getDocId()) {
+ *data.resolveFeature(outputs()[3]) = 1.0f; // hit
+ *data.resolveFeature(outputs()[4]) = fef::FieldPositionsIterator::UNKNOWN_LENGTH; // len
+ *data.resolveFeature(outputs()[5]) = 0.0f; // first
+ *data.resolveFeature(outputs()[6]) = 0.0f; // last
+ *data.resolveFeature(outputs()[7]) = 1.0f;
+ } else {
+ *data.resolveFeature(outputs()[3]) = 0.0f; // no hit
+ *data.resolveFeature(outputs()[4]) = fef::FieldPositionsIterator::UNKNOWN_LENGTH; // len
+ *data.resolveFeature(outputs()[5]) = fef::FieldPositionsIterator::UNKNOWN_LENGTH; // first
+ *data.resolveFeature(outputs()[6]) = fef::FieldPositionsIterator::UNKNOWN_LENGTH; // last
+ *data.resolveFeature(outputs()[7]) = 0.0f;
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+FieldInfoBlueprint::FieldInfoBlueprint() :
+ fef::Blueprint("fieldInfo"),
+ _overview(false),
+ _indexcnt(0.0f),
+ _attrcnt(0.0f),
+ _type(0.0f),
+ _isFilter(0.0f),
+ _fieldId(fef::IllegalFieldId)
+{
+ // empty
+}
+
+void
+FieldInfoBlueprint::visitDumpFeatures(const fef::IIndexEnvironment &indexEnv,
+ fef::IDumpFeatureVisitor &visitor) const
+{
+ if (!indexEnv.getProperties().lookup(getBaseName(), "enable").get("").empty()) {
+ fef::FeatureNameBuilder fnb;
+ fnb.baseName(getBaseName());
+ for (uint32_t i = 0; i < indexEnv.getNumFields(); ++i) {
+ const fef::FieldInfo *fi = indexEnv.getField(i);
+ fnb.clearParameters().parameter(fi->name());
+ fnb.output("type");
+ visitor.visitDumpFeature(fnb.buildName());
+ fnb.output("filter");
+ visitor.visitDumpFeature(fnb.buildName());
+ fnb.output("search");
+ visitor.visitDumpFeature(fnb.buildName());
+ fnb.output("hit");
+ visitor.visitDumpFeature(fnb.buildName());
+ fnb.output("len");
+ visitor.visitDumpFeature(fnb.buildName());
+ fnb.output("first");
+ visitor.visitDumpFeature(fnb.buildName());
+ fnb.output("last");
+ visitor.visitDumpFeature(fnb.buildName());
+ fnb.output("cnt");
+ visitor.visitDumpFeature(fnb.buildName());
+ }
+ fnb.clearParameters();
+ fnb.output("indexCnt");
+ visitor.visitDumpFeature(fnb.buildName());
+ fnb.output("attrCnt");
+ visitor.visitDumpFeature(fnb.buildName());
+ }
+}
+
+bool
+FieldInfoBlueprint::setup(const fef::IIndexEnvironment &indexEnv,
+ const fef::ParameterList &params)
+{
+ if (params.empty()) {
+ _overview = true;
+ for (uint32_t i = 0; i < indexEnv.getNumFields(); ++i) {
+ if (indexEnv.getField(i)->type() == fef::FieldType::INDEX) {
+ _indexcnt += 1.0;
+ }
+ if (indexEnv.getField(i)->type() == fef::FieldType::ATTRIBUTE) {
+ _attrcnt += 1.0;
+ }
+ }
+ describeOutput("indexCnt", "total number of fields of type index");
+ describeOutput("attrCnt", "total number of fields of type attribute");
+ return true;
+ }
+ if (params.size() == 1) {
+ vespalib::string name = params[0].getValue();
+ const fef::FieldInfo *fi = indexEnv.getFieldByName(name);
+ if (fi != 0) {
+ _fieldId = fi->id();
+ if (fi->type() == fef::FieldType::INDEX) {
+ indexEnv.hintFieldAccess(_fieldId);
+ _type = 1.0;
+ } else if (fi->type() == fef::FieldType::ATTRIBUTE) {
+ _type = 2.0;
+ }
+ if (fi->isFilter()) {
+ _isFilter = 1.0;
+ } else {
+ _isFilter = 0.0;
+ }
+ }
+ describeOutput("type", "1.0 for INDEX, 2.0 for ATTRIBUTE, 0.0 for unknown (from index env)");
+ describeOutput("filter", "1.0 if this is a filter, 0.0 otherwise (from index env)");
+ describeOutput("search", "1.0 means first term searched this field, 0.0 means it did not");
+ describeOutput("hit", "1.0 means first term got a hit in this field, 0.0 means it did not");
+ describeOutput("len", "field length in number of words");
+ describeOutput("first", "position of the first hit of the first term in this field");
+ describeOutput("last", "position of the last hit of the first term in this field");
+ describeOutput("cnt", "number of hits for the first term in this field");
+ return true;
+ }
+ return false;
+}
+
+fef::FeatureExecutor::LP
+FieldInfoBlueprint::createExecutor(const fef::IQueryEnvironment &queryEnv) const
+{
+ if (_overview) {
+ std::vector<feature_t> values;
+ values.push_back(_indexcnt);
+ values.push_back(_attrcnt);
+ return fef::FeatureExecutor::LP(new ValueExecutor(values));
+ }
+ uint32_t fieldHandle = util::getTermFieldHandle(queryEnv, 0, _fieldId);
+ if (fieldHandle == fef::IllegalHandle) {
+ std::vector<feature_t> values;
+ values.push_back(_type);
+ values.push_back(_isFilter);
+ values.push_back(0.0f); // not searched
+ values.push_back(0.0f); // no hit
+ values.push_back(fef::FieldPositionsIterator::UNKNOWN_LENGTH); // default field length
+ values.push_back(fef::FieldPositionsIterator::UNKNOWN_LENGTH); // default first pos
+ values.push_back(fef::FieldPositionsIterator::UNKNOWN_LENGTH); // default last pos
+ values.push_back(0.0f); // number of hits
+ return fef::FeatureExecutor::LP(new ValueExecutor(values));
+ }
+ if (_type == 1.0) { // index
+ return fef::FeatureExecutor::
+ LP(new IndexFieldInfoExecutor(_type, _isFilter, _fieldId, fieldHandle));
+ } else if (_type == 2.0) { // attribute
+ return fef::FeatureExecutor::LP(
+ new AttrFieldInfoExecutor(_type, fieldHandle));
+ }
+ std::vector<feature_t> values;
+ values.push_back(_type);
+ values.push_back(_isFilter);
+ values.push_back(1.0f); // searched
+ values.push_back(0.0f); // no hit
+ values.push_back(fef::FieldPositionsIterator::UNKNOWN_LENGTH); // default field length
+ values.push_back(fef::FieldPositionsIterator::UNKNOWN_LENGTH); // default first pos
+ values.push_back(fef::FieldPositionsIterator::UNKNOWN_LENGTH); // default last pos
+ values.push_back(0.0f); // number of hits
+ return fef::FeatureExecutor::LP(new ValueExecutor(values));
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/fieldinfofeature.h b/searchlib/src/vespa/searchlib/features/fieldinfofeature.h
new file mode 100644
index 00000000000..a7438873f97
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldinfofeature.h
@@ -0,0 +1,70 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/common/feature.h>
+
+namespace search {
+namespace features {
+
+class IndexFieldInfoExecutor : public search::fef::FeatureExecutor
+{
+private:
+ feature_t _type; // from index env
+ feature_t _isFilter; // from index env
+ uint32_t _field;
+ uint32_t _fieldHandle;
+
+public:
+ IndexFieldInfoExecutor(feature_t type, feature_t isFilter,
+ uint32_t field, uint32_t fieldHandle);
+ virtual void execute(search::fef::MatchData & data);
+};
+
+//-----------------------------------------------------------------------------
+
+class AttrFieldInfoExecutor : public search::fef::FeatureExecutor
+{
+private:
+ feature_t _type; // from index env
+ uint32_t _fieldHandle;
+
+public:
+ AttrFieldInfoExecutor(feature_t type, uint32_t fieldHandle);
+ virtual void execute(search::fef::MatchData & data);
+};
+
+//-----------------------------------------------------------------------------
+
+class FieldInfoBlueprint : public search::fef::Blueprint
+{
+private:
+ bool _overview;
+ feature_t _indexcnt;
+ feature_t _attrcnt;
+ feature_t _type;
+ feature_t _isFilter;
+ uint32_t _fieldId;
+
+public:
+ FieldInfoBlueprint();
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &indexEnv,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+ virtual search::fef::Blueprint::UP createInstance() const { return search::fef::Blueprint::UP(new FieldInfoBlueprint()); }
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().
+ desc(0).
+ desc(1).string();
+ }
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &queryEnv) const;
+};
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/fieldlengthfeature.cpp b/searchlib/src/vespa/searchlib/features/fieldlengthfeature.cpp
new file mode 100644
index 00000000000..fa356a9e012
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldlengthfeature.cpp
@@ -0,0 +1,99 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.fieldlength");
+
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/fieldtype.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include "fieldlengthfeature.h"
+#include "valuefeature.h"
+#include "utils.h"
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+FieldLengthExecutor::
+FieldLengthExecutor(const IQueryEnvironment &env,
+ uint32_t fieldId)
+ : FeatureExecutor(),
+ _fieldHandles()
+{
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ TermFieldHandle handle = util::getTermFieldHandle(env, i, fieldId);
+ if (handle != IllegalHandle) {
+ _fieldHandles.push_back(handle);
+ }
+ }
+}
+
+void
+FieldLengthExecutor::execute(MatchData &match)
+{
+ uint32_t val = 0;
+ bool validVal = false;
+ for (std::vector<TermFieldHandle>::const_iterator
+ hi = _fieldHandles.begin(), hie = _fieldHandles.end();
+ hi != hie; ++hi)
+ {
+ TermFieldMatchData &tfmd = *match.resolveTermField(*hi);
+ if (tfmd.getDocId() == match.getDocId()) {
+ FieldPositionsIterator it = tfmd.getIterator();
+ if (it.valid()) {
+ if (val < it.getFieldLength())
+ val = it.getFieldLength();
+ validVal = true;
+ }
+ }
+ }
+ if (!validVal) {
+ val = fef::FieldPositionsIterator::UNKNOWN_LENGTH;
+ }
+ feature_t value = val;
+ *match.resolveFeature(outputs()[0]) = value; // field length
+}
+
+FieldLengthBlueprint::FieldLengthBlueprint()
+ : Blueprint("fieldLength"),
+ _field(NULL)
+{
+}
+
+void
+FieldLengthBlueprint::visitDumpFeatures(const IIndexEnvironment &,
+ IDumpFeatureVisitor &) const
+{
+}
+
+bool
+FieldLengthBlueprint::setup(const IIndexEnvironment &env,
+ const ParameterList &params)
+{
+ (void) env;
+ _field = params[0].asField();
+ describeOutput("out", "The length of this field.");
+ return true;
+}
+
+Blueprint::UP
+FieldLengthBlueprint::createInstance() const
+{
+ return Blueprint::UP(new FieldLengthBlueprint());
+}
+
+FeatureExecutor::LP
+FieldLengthBlueprint::createExecutor(const IQueryEnvironment &env) const
+{
+ if (_field == 0) {
+ std::vector<feature_t> values;
+ values.push_back(fef::FieldPositionsIterator::UNKNOWN_LENGTH);
+ return FeatureExecutor::LP(new ValueExecutor(values));
+ }
+ return FeatureExecutor::LP(new FieldLengthExecutor(env, _field->id()));
+}
+
+}}
diff --git a/searchlib/src/vespa/searchlib/features/fieldlengthfeature.h b/searchlib/src/vespa/searchlib/features/fieldlengthfeature.h
new file mode 100644
index 00000000000..0d55881f0a8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldlengthfeature.h
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for field length.
+ */
+class FieldLengthExecutor : public search::fef::FeatureExecutor {
+private:
+ std::vector<search::fef::TermFieldHandle> _fieldHandles;
+
+public:
+ /**
+ * Constructs an executor for field length.
+ *
+ * @param env The query environment
+ * @param fieldId The field id
+ */
+ FieldLengthExecutor(const search::fef::IQueryEnvironment &env,
+ uint32_t fieldId);
+ virtual void execute(search::fef::MatchData &data);
+};
+
+/**
+ * Implements the blueprint for field length.
+ */
+class FieldLengthBlueprint : public search::fef::Blueprint {
+private:
+ const search::fef::FieldInfo *_field;
+
+public:
+ /**
+ * Constructs a blueprint for field length.
+ */
+ FieldLengthBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::SINGLE);
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+};
+
+}}
+
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/.gitignore b/searchlib/src/vespa/searchlib/features/fieldmatch/.gitignore
new file mode 100644
index 00000000000..583460ae288
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldmatch/.gitignore
@@ -0,0 +1,3 @@
+*.So
+.depend
+Makefile
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/fieldmatch/CMakeLists.txt
new file mode 100644
index 00000000000..2bbdf179763
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldmatch/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_fieldmatch OBJECT
+ SOURCES
+ computer.cpp
+ metrics.cpp
+ params.cpp
+ segmentstart.cpp
+ simplemetrics.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp
new file mode 100644
index 00000000000..f2e1601ed28
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp
@@ -0,0 +1,558 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.fieldmatch.computer");
+
+#include <iostream>
+#include <math.h>
+#include <set>
+#include <vespa/searchlib/features/utils.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include "computer.h"
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+namespace fieldmatch {
+
+
+Computer::Computer(const vespalib::string &propertyNamespace, const PhraseSplitter &splitter,
+ const FieldInfo &fieldInfo, const Params &params) :
+ _splitter(splitter),
+ _fieldId(fieldInfo.id()),
+ _params(params),
+ _tracing(false),
+ _trace(),
+ _useCachedHits(true),
+ _queryTerms(),
+ _queryTermFieldMatch(),
+ _totalTermWeight(0),
+ _totalTermSignificance(0.0f),
+ _match(NULL),
+ _fieldLength(FieldPositionsIterator::UNKNOWN_LENGTH),
+ _currentMetrics(this),
+ _finalMetrics(this),
+ _simpleMetrics(params),
+ _segments(),
+ _alternativeSegmentationsTried(0),
+ _cachedHits()
+{
+ // Store term data for all terms searching in this field
+ for (uint32_t i = 0; i < splitter.getNumTerms(); ++i) {
+ QueryTerm qt = QueryTermFactory::create(splitter, i, true, true);
+ _totalTermWeight += qt.termData()->getWeight().percent();
+ _totalTermSignificance += qt.significance();
+ _simpleMetrics.addQueryTerm(qt.termData()->getWeight().percent());
+ const ITermFieldData *field = qt.termData()->lookupField(_fieldId);
+ if (field != 0) {
+ qt.fieldHandle(field->getHandle());
+ _queryTerms.push_back(qt);
+ _simpleMetrics.addSearchedTerm(qt.termData()->getWeight().percent());
+ _queryTermFieldMatch.push_back(NULL);
+ _cachedHits.push_back(BitVectorData());
+ }
+ }
+
+ _totalTermWeight = atoi(splitter.getProperties().lookup(propertyNamespace, "totalTermWeight").
+ get(vespalib::make_string("%d", _totalTermWeight)).c_str());
+ _totalTermSignificance = atof(splitter.getProperties().lookup(propertyNamespace, "totalTermSignificance").
+ get(vespalib::make_string("%f", _totalTermSignificance)).c_str());
+ if (splitter.getProperties().lookup(propertyNamespace, "totalTermWeight").found()) {
+ _simpleMetrics.setTotalWeightInQuery(_totalTermWeight);
+ }
+
+ // update current and final metrics after initialization
+ _currentMetrics = Metrics(this);
+ _finalMetrics = Metrics(this);
+
+ // num query terms searching in this field + 1
+ for (uint32_t i = 0; i < (getNumQueryTerms() + 1); ++i) {
+ _segments.push_back(SegmentData(SegmentStart::SP(new SegmentStart(this, _currentMetrics))));
+ }
+}
+
+void
+Computer::reset(const MatchData & match)
+{
+ _currentMetrics.reset();
+ _finalMetrics.reset();
+ _simpleMetrics.resetMatchData();
+ for (uint32_t i = 0; i < _segments.size(); ++i) {
+ if (_segments[i].valid) {
+ _segments[i].valid = false;
+ }
+ }
+ _alternativeSegmentationsTried = 0;
+ for (uint32_t i = 0; i < _cachedHits.size(); ++i) {
+ if (_cachedHits[i].valid) {
+ _cachedHits[i].valid = false;
+ }
+ }
+
+ _match = &match;
+ _fieldLength = FieldPositionsIterator::UNKNOWN_LENGTH;
+
+ for (uint32_t i = 0; i < _queryTerms.size(); ++i) {
+ const ITermData *td = _queryTerms[i].termData();
+ const TermFieldMatchData *tfmd = _splitter.resolveTermField(_queryTerms[i].fieldHandle());
+ if (tfmd->getDocId() != match.getDocId()) { // only term match data if we have a hit
+ tfmd = NULL;
+ } else {
+ FieldPositionsIterator it = tfmd->getIterator();
+ uint32_t fieldLength = it.getFieldLength();
+ if (it.valid()) {
+ _simpleMetrics.addMatchWithPosOcc(td->getWeight().percent());
+ if (fieldLength == 0 || fieldLength == FieldPositionsIterator::UNKNOWN_LENGTH) {
+ _simpleMetrics.hasMatchWithInvalidFieldLength();
+ }
+ } else {
+ _simpleMetrics.addMatch(td->getWeight().percent());
+ }
+ if (_fieldLength == FieldPositionsIterator::UNKNOWN_LENGTH) {
+ _fieldLength = fieldLength; // save away the first valid field length
+ }
+
+ if (_useCachedHits && it.valid() && fieldLength != FieldPositionsIterator::UNKNOWN_LENGTH) {
+ // cache the field position iterator in a bit vector for faster lookup in
+ // findClosestInFieldBySemanticDistance()
+ _cachedHits[i].bitvector.clear();
+ _cachedHits[i].valid = true;
+ if (_cachedHits[i].bitvector.size() < _fieldLength) {
+ _cachedHits[i].bitvector.resize(_fieldLength);
+ }
+ for (; it.valid(); it.next()) {
+ uint32_t fieldPos = it.getPosition();
+ if (__builtin_expect(fieldPos < _fieldLength, true))
+ _cachedHits[i].bitvector.setBit(fieldPos);
+ else {
+ handleError(fieldPos, match.getDocId());
+ }
+ }
+ }
+ }
+ _queryTermFieldMatch[i] = tfmd;
+ }
+}
+
+void
+Computer::handleError(uint32_t fieldPos, uint32_t docId) const
+{
+ static int errcnt;
+ if (errcnt < 1000) {
+ errcnt++;
+ const FieldInfo * finfo = _splitter.getIndexEnvironment().getField(getFieldId());
+ LOG(debug, "Bad field position %u >= fieldLength %u for field '%s' document %u. "
+ "Document was probably refed during query (Ticket 7104969)",
+ fieldPos, _fieldLength,
+ finfo != NULL ? finfo->name().c_str() : "unknown field",
+ docId);
+ }
+}
+
+const Metrics &
+Computer::run()
+{
+ exploreSegments();
+ return _finalMetrics;
+}
+
+int
+Computer::findClosestInFieldBySemanticDistance(int i, int previousJ, uint32_t startSemanticDistance)
+{
+ if (_useCachedHits) {
+ if (!_cachedHits[i].valid) {
+ return -1; // not matched
+ }
+
+ const BitVector & hits = _cachedHits[i].bitvector;
+
+ for (uint32_t distance = startSemanticDistance; distance < _fieldLength; distance++) {
+ int j = semanticDistanceToFieldIndex(distance, previousJ);
+ if (j < 0) {
+ continue;
+ }
+
+ if (hits.testBit((uint32_t)j)) {
+ return distance;
+ }
+ }
+ return -1;
+ }
+
+ const TermFieldMatchData *termFieldMatch = _queryTermFieldMatch[i];
+ if (termFieldMatch == NULL) {
+ return -1; // not matched
+ }
+
+ for (uint32_t distance = startSemanticDistance; distance < _fieldLength; distance++) {
+ int j = semanticDistanceToFieldIndex(distance, previousJ);
+ if (j < 0) {
+ continue;
+ }
+
+ FieldPositionsIterator it = termFieldMatch->getIterator();
+ while (it.valid() && it.getPosition() < (uint32_t)j) {
+ it.next();
+ }
+ if (it.valid() && it.getPosition() == (uint32_t)j) {
+ return distance;
+ }
+ }
+ return -1;
+}
+
+int
+Computer::semanticDistanceToFieldIndex(int semanticDistance, uint32_t zeroJ) const
+{
+ if (semanticDistance == -1) {
+ return -1;
+ }
+ int firstSegmentLength = std::min(_params.getProximityLimit(), _fieldLength - zeroJ);
+ int secondSegmentLength = std::min(_params.getProximityLimit(), zeroJ);
+ if (semanticDistance < firstSegmentLength) {
+ return zeroJ + semanticDistance;
+ }
+ else if (semanticDistance < firstSegmentLength + secondSegmentLength) {
+ return zeroJ - semanticDistance - 1 + firstSegmentLength;
+ }
+ else if ((uint32_t)semanticDistance < _fieldLength - zeroJ + secondSegmentLength) {
+ return zeroJ + semanticDistance - secondSegmentLength;
+ }
+ else {
+ return _fieldLength - semanticDistance - 1;
+ }
+}
+
+int
+Computer::fieldIndexToSemanticDistance(int j, uint32_t zeroJ) const
+{
+ if (j == -1) {
+ return -1;
+ }
+ uint32_t firstSegmentLength = std::min(_params.getProximityLimit(), _fieldLength - zeroJ);
+ uint32_t secondSegmentLength = std::min(_params.getProximityLimit(), zeroJ);
+ if ((uint32_t)j >= zeroJ) {
+ if ((j - zeroJ) < firstSegmentLength) {
+ return j - zeroJ; // 0..limit
+ }
+ else {
+ return j - zeroJ + secondSegmentLength; // limit*2..field.length-zeroJ
+ }
+ }
+ else {
+ if ((zeroJ - j - 1) < secondSegmentLength) {
+ return zeroJ - j + firstSegmentLength - 1; // limit..limit*2
+ }
+ else {
+ return (zeroJ - j - 1) + _fieldLength - zeroJ; // field.length-zeroJ..
+ }
+ }
+}
+
+Computer &
+Computer::trace(const vespalib::string &str)
+{
+ if (_tracing) {
+ _trace.push_back(str);
+ //LOG(info, "%s", str.c_str());
+ }
+ return *this;
+}
+
+vespalib::string
+Computer::getTrace() const
+{
+ vespalib::string ret = "";
+ for (std::vector<vespalib::string>::const_iterator it = _trace.begin();
+ it != _trace.end(); ++it) {
+ ret += *it;
+ }
+ return ret;
+}
+
+vespalib::string
+Computer::toString() const
+{
+ return vespalib::make_string("Computer(%d query terms,%d field terms,%s)",
+ getNumQueryTerms(), _fieldLength,
+ _currentMetrics.toString().c_str());
+}
+
+void
+Computer::exploreSegments()
+{
+ if (isTracing()) {
+ trace(vespalib::make_string("Calculating matches for %d query terms, %d field terms.",
+ getNumQueryTerms(), _fieldLength));
+ }
+
+ _segments[0].segment->reset(_currentMetrics);
+ _segments[0].valid = true;
+ SegmentStart *segment = _segments[0].segment.get();
+ while (segment != NULL) {
+ if (isTracing()) {
+ trace(vespalib::make_string("Looking for segment from %s...",
+ segment->toString().c_str()));
+ }
+
+ _currentMetrics = segment->getMetrics(); // take a copy of the segment returned from the current segment.
+ bool found = findAlternativeSegmentFrom(segment);
+ if (found) {
+ if (isTracing()) {
+ vespalib::string segments = "[ ";
+ const std::vector<uint32_t> &lst = _currentMetrics.getSegmentStarts();
+ for (uint32_t i = 0; i < lst.size(); ++i) {
+ segments += vespalib::make_string("%d", lst[i]);
+ if (i < lst.size() - 1) {
+ segments += ", ";
+ }
+ }
+ segments += " ]";
+ trace(vespalib::make_string("...found segments: %s, score %f.",
+ segments.c_str(),
+ _currentMetrics.getSegmentationScore()));
+ }
+ } else {
+ if (isTracing()) {
+ trace("...no complete and improved segment existed.");
+ }
+ segment->setOpen(false);
+ }
+ segment = findOpenSegment(segment->getI());
+ }
+ _finalMetrics = findLastStartPoint()->getMetrics();
+ setOccurrenceCounts(_finalMetrics);
+ _finalMetrics.onComplete();
+ _finalMetrics.setComplete(true);
+}
+
+bool
+Computer::findAlternativeSegmentFrom(SegmentStart *segment) {
+ int semanticDistanceExplored = segment->getSemanticDistanceExplored();
+ int previousI = -1;
+ int previousJ = segment->getPreviousJ();
+ bool hasOpenSequence = false;
+ bool isFirst = true;
+ for (uint32_t i = segment->getStartI(); i < getNumQueryTerms(); i++) {
+ int semanticDistance = findClosestInFieldBySemanticDistance(i, previousJ, semanticDistanceExplored);
+ int j = semanticDistanceToFieldIndex(semanticDistance, previousJ);
+
+ if (j == -1 && semanticDistanceExplored > 0 && isFirst) {
+ return false; // segment explored before; no more matches found
+ }
+ if (hasOpenSequence && (j == -1 || j != previousJ + 1)) {
+ _currentMetrics.onSequenceEnd(previousJ);
+ hasOpenSequence = false;
+ }
+ if (isFirst) {
+ if (j != -1) {
+ segmentStart(i, j, isFirst ? -1 : previousJ);
+ segment->exploredTo(j);
+ isFirst = false;
+ }
+ else {
+ segment->incrementStartI(); // there are no matches for this i
+ }
+ }
+ else {
+ if ((unsigned int)abs(j - previousJ) >= _params.getProximityLimit()) {
+ segmentEnd(i - 1, previousJ);
+ return true;
+ }
+ else if (j != -1) {
+ inSegment(i, j, previousJ, previousI);
+ }
+ }
+ if (j != -1) {
+ _currentMetrics.onMatch(i);
+ if (!hasOpenSequence) {
+ _currentMetrics.onSequenceStart(j);
+ hasOpenSequence=true;
+ }
+ semanticDistanceExplored = 1; // skip the current match when looking for the next
+ } else {
+ semanticDistanceExplored = 0;
+ // we have a match for this term but no position information
+ if (_queryTermFieldMatch[i] != NULL && !_cachedHits[i].valid) {
+ _currentMetrics.onMatch(i);
+ }
+ }
+ if (j >= 0) {
+ previousI = i;
+ previousJ = j;
+ }
+ }
+ if (hasOpenSequence) {
+ _currentMetrics.onSequenceEnd(previousJ);
+ }
+ if (!isFirst) {
+ segmentEnd(getNumQueryTerms() - 1, previousJ);
+ return true;
+ }
+ else {
+ return false;
+ }
+}
+
+void
+Computer::inSegment(int i, int j, int previousJ, int previousI)
+{
+ _currentMetrics.onPair(i, j, previousJ);
+ if (j == previousJ + 1 && i == previousI + 1) {
+ _currentMetrics.onInSequence(i, j, previousJ);
+ }
+ else {
+ _currentMetrics.onInSegmentGap(i, j, previousJ);
+ if (isTracing()) {
+ trace(vespalib::make_string(" in segment gap: %d -> %d", i, j));
+ }
+ }
+}
+
+bool
+Computer::segmentStart(int i, int j, int previousJ)
+{
+ _currentMetrics.onNewSegment(i, j, previousJ);
+ if (previousJ >= 0) {
+ _currentMetrics.onPair(i, j, previousJ);
+ }
+ if (isTracing()) {
+ trace(vespalib::make_string(" new segment at: %d -> %d", i, j));
+ }
+ return true;
+}
+
+void
+Computer::segmentEnd(int i, int j)
+{
+ if (isTracing()) {
+ trace(vespalib::make_string(" segment ended at: %d -> %d", i, j));
+ }
+ SegmentStart *startOfNext = _segments[i + 1].segment.get();
+ if (!_segments[i + 1].valid) {
+ startOfNext->reset(_currentMetrics, j, i + 1);
+ _segments[i + 1].valid = true;
+ }
+ else {
+ startOfNext->offerHistory(j, _currentMetrics);
+ }
+}
+
+SegmentStart *
+Computer::findOpenSegment(uint32_t startI) {
+ for (uint32_t i = startI; i < _segments.size(); i++) {
+ SegmentStart *startPoint = _segments[i].valid ? _segments[i].segment.get() : NULL;
+ if (startPoint == NULL || !startPoint->isOpen()) {
+ continue;
+ }
+ if (startPoint->getSemanticDistanceExplored() == 0) {
+ return startPoint; // first attempt
+ }
+ if (_alternativeSegmentationsTried >= _params.getMaxAlternativeSegmentations()) {
+ continue;
+ }
+ _alternativeSegmentationsTried++;
+ return startPoint;
+ }
+ return NULL;
+}
+
+SegmentStart *
+Computer::findLastStartPoint()
+{
+ for (int i = _segments.size(); --i >= 0; ) {
+ SegmentStart *startPoint = _segments[i].valid ? _segments[i].segment.get() : NULL;
+ if (startPoint != NULL) {
+ return startPoint;
+ }
+ }
+ LOG(error, "findLastStartPoint() could not find any segment start. This should never happen!");
+ return NULL;
+}
+
+void
+Computer::setOccurrenceCounts(Metrics &metrics)
+{
+ // Find all unique query terms.
+ std::vector<uint32_t> uniqueTerms;
+ std::set<uint32_t> firstOccs;
+ for (uint32_t i = 0; i < _queryTermFieldMatch.size(); ++i) {
+ const TermFieldMatchData *termFieldMatch = _queryTermFieldMatch[i];
+ if (termFieldMatch == NULL) {
+ continue; // not for this match
+ }
+ FieldPositionsIterator it = termFieldMatch->getIterator();
+ if (it.valid()) {
+ if (firstOccs.find(it.getPosition()) == firstOccs.end()) {
+ uniqueTerms.push_back(i);
+ firstOccs.insert(it.getPosition());
+ }
+ }
+ }
+
+ // Commence occurence logic.
+ std::vector<feature_t> weightedOccurrences;
+ std::vector<feature_t> significantOccurrences;
+
+ uint32_t divider = std::min(_fieldLength, (uint32_t)(_params.getMaxOccurrences() * uniqueTerms.size()));
+ uint32_t maxOccurence = std::min(_fieldLength, _params.getMaxOccurrences());
+
+ feature_t occurrence = 0;
+ feature_t absoluteOccurrence = 0;
+ feature_t weightedAbsoluteOccurrence = 0;
+ int totalWeight = 0;
+ feature_t totalWeightedOccurrences = 0;
+ feature_t totalSignificantOccurrences = 0;
+
+ for (std::vector<uint32_t>::iterator it = uniqueTerms.begin();
+ it != uniqueTerms.end(); ++it)
+ {
+ const QueryTerm &queryTerm = _queryTerms[*it];
+ const ITermData &termData = *queryTerm.termData();
+ const TermFieldMatchData &termFieldMatch = *_queryTermFieldMatch[*it];
+
+ uint32_t termOccurrences = 0;
+ FieldPositionsIterator pos = termFieldMatch.getIterator();
+ while (pos.valid() && termOccurrences < _params.getMaxOccurrences()) {
+ termOccurrences++;
+ pos.next();
+ }
+
+ occurrence += (feature_t)termOccurrences / divider;
+ absoluteOccurrence += (feature_t)termOccurrences / (_params.getMaxOccurrences() * uniqueTerms.size());
+
+ weightedAbsoluteOccurrence += (feature_t)termOccurrences * termData.getWeight().percent() / _params.getMaxOccurrences();
+ totalWeight += termData.getWeight().percent();
+
+ totalWeightedOccurrences += (feature_t)maxOccurence * termData.getWeight().percent() / divider;
+ weightedOccurrences.push_back((feature_t)termOccurrences * termData.getWeight().percent() / divider);
+
+ totalSignificantOccurrences += (feature_t)maxOccurence * queryTerm.significance() / divider;
+ significantOccurrences.push_back((feature_t)termOccurrences * queryTerm.significance() / divider);
+ }
+ metrics.setOccurrence(occurrence);
+ metrics.setAbsoluteOccurrence(absoluteOccurrence);
+ metrics.setWeightedAbsoluteOccurrence(weightedAbsoluteOccurrence / (totalWeight > 0 ? totalWeight : 1));
+
+ feature_t weightedOccurrenceSum = 0;
+ for (std::vector<feature_t>::iterator it = weightedOccurrences.begin();
+ it != weightedOccurrences.end(); ++it)
+ {
+ weightedOccurrenceSum += totalWeightedOccurrences > 0.0f ? *it / totalWeightedOccurrences : 0.0f;
+ }
+ metrics.setWeightedOccurrence(weightedOccurrenceSum);
+
+ feature_t significantOccurrenceSum = 0;
+ for (std::vector<feature_t>::iterator it = significantOccurrences.begin();
+ it != significantOccurrences.end(); ++it)
+ {
+ significantOccurrenceSum += totalSignificantOccurrences > 0.0f ? *it / totalSignificantOccurrences : 0.0f;
+ }
+ metrics.setSignificantOccurrence(significantOccurrenceSum);
+}
+
+} // fieldmatch
+} // features
+} // search
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/computer.h b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.h
new file mode 100644
index 00000000000..558bee1443a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.h
@@ -0,0 +1,382 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/fef/iqueryenvironment.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/phrasesplitter.h>
+#include <vespa/searchlib/features/queryterm.h>
+#include <vespa/searchlib/common/allocatedbitvector.h>
+#include <string>
+#include <vector>
+#include "metrics.h"
+#include "params.h"
+#include "segmentstart.h"
+#include "simplemetrics.h"
+
+namespace search {
+namespace features {
+namespace fieldmatch {
+
+/**
+ * <p>Calculates a set of metrics capturing information about the degree of agreement between a query and a field
+ * string. This algorithm attempts to capture the property of text that very close tokens are usuall part of the same
+ * semantic structure, while tokens farther apart are much more loosely related. The algorithm will locate alternative
+ * such regions containing multiple query tokens (segments), do a more detailed analysis of these segments and choose
+ * the ones producing the best overall set of match metrics.</p>
+ *
+ * <p>Such segments are found by looking at query terms in sequence from left top right and finding matches in the
+ * field. All alternative segment start points are explored, and the segmentation achieving the best overall string
+ * match metric score is preferred. The dynamic programming paradigm is used to avoid redoing work on segmentations.</p>
+ *
+ * <p>When a segment start point is found, subsequenc tokens from the query are searched in the field from this starting
+ * point in "semantic order". This search order can be defined independently of the algorithm. The current order
+ * searches <i>proximityLimit tokens ahead first, then the same distance backwards (so if you need to go two steps
+ * backwards in the field from the segment starting point, the real distance is -2, but the "semantic distance" is
+ * proximityLimit+2.</p>
+ *
+ * <p>The actual metrics are calculated during execution of this algorithm by the {@link Metrics} class, by
+ * receiving events emitted from the algorithm. Any set of metrics derivable from these events a computable using this
+ * algorithm.</p>
+ *
+ * <p>Terminology:
+ * <ul>
+ * <li><b>Sequence</b> - A set of adjacent matched tokens in the field.</li>
+ * <li><b>Segment</b> - A field area containing matches to a continuous section of the query.</li>
+ * <li><b>Gap</b> - A chunk of adjacent tokens <i>inside a segment</i> separating two matched characters.</li>
+ * <li><b>Semantic distance</b> - A non-continuous distance between tokens in j.</li>
+ * </ul>
+ *
+ * <p>Notation: A position index in the query is denoted <code>i</code>. A position index in the field is denoted
+ * <code>j</code>.</p>
+ *
+ * <p>This class is not multithread safe, but is reusable across queries for a single thread.</p>
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ * @version $Id$
+ */
+class Computer {
+public:
+ /**
+ * Constructs a new computer object.
+ *
+ * @param propertyNamespace The namespace used in query properties.
+ * @param splitter The environment that holds all query information.
+ * @param fieldInfo The info object of the matched field.
+ * @param params The parameter object for this computer.
+ */
+ Computer(const vespalib::string &propertyNamespace, const search::fef::PhraseSplitter &splitter,
+ const search::fef::FieldInfo &fieldInfo, const Params &params);
+
+ /**
+ * Resets this object according to the given match data object.
+ *
+ * @param match The match data object containing match information for this field.
+ */
+ void reset(const search::fef::MatchData & match);
+
+ /**
+ * Runs this computer using the environment, match and parameters given to the constructor.
+ *
+ * @return The final metrics.
+ */
+ const Metrics & run();
+
+ /**
+ * Returns the final metrics.
+ *
+ * @return The final metrics.
+ */
+ const Metrics & getFinalMetrics() const {
+ return _finalMetrics;
+ }
+
+ /**
+ * Implements the prefered search order for finding a match to a query item - first
+ * looking close in the right order, then close in the reverse order, then far in the right order
+ * and lastly far in the reverse order.
+ *
+ * @param i The query term index.
+ * @param previousJ The previous field index.
+ * @param startSemanticDistance The semantic distance we must be larger than or equal to.
+ * @return The semantic distance of the next mathing j larger than startSemanticDistance, or -1 if
+ * there are no matches larger than startSemanticDistance
+ */
+ int findClosestInFieldBySemanticDistance(int i, int previousJ, uint32_t startSemanticDistance);
+
+ /**
+ * Returns the field index (j) from a starting point zeroJ and the distance form zeroJ in the
+ * semantic distance space.
+ *
+ * @param semanticDistance The semantic distance to transform to field index.
+ * @param zeroJ The starting point.
+ * @returns The field index, or -1 (undefined) if the semanticDistance is -1.
+ */
+ int semanticDistanceToFieldIndex(int semanticDistance, uint32_t zeroJ) const;
+
+ /**
+ * Returns the semantic distance from a starting point zeroJ to a field index j.
+ *
+ * @param j The field index to transform to semantic distance.
+ * @param zeroJ The starting point.
+ * @returns The semantic distance, or -1 (undefined) if j is -1.
+ */
+ int fieldIndexToSemanticDistance(int j, uint32_t zeroJ) const;
+
+ /**
+ * Returns the query environment of this. This contains information about the query.
+ *
+ * @return The query environment.
+ */
+ const search::fef::IQueryEnvironment &getQueryEnvironment() const {
+ return _splitter;
+ }
+
+ /**
+ * Returns the match data of this. This contains information about how the query was matched to the current
+ * document.
+ *
+ * @return The match data.
+ */
+ const search::fef::MatchData &getMatchData() const {
+ return *_match;
+ }
+
+ /**
+ * Returns the id of the searched field.
+ *
+ * @return The field id.
+ */
+ uint32_t getFieldId() const {
+ return _fieldId;
+ }
+
+ /**
+ * Returns the number of terms present in the searched field.
+ *
+ * @return The field length.
+ */
+ uint32_t getFieldLength() const {
+ return _fieldLength;
+ }
+
+ /**
+ * Returns the parameter object that was used to instantiate this.
+ *
+ * @return The parameters.
+ */
+ const Params &getParams() const {
+ return _params;
+ }
+
+ /**
+ * Adds the given string to the trace of this, if tracing is enabled.
+ *
+ * @param str The string to trace.
+ * @return This, to allow chaining.
+ */
+ Computer &trace(const vespalib::string &str);
+
+ /**
+ * Returns a textual trace of the last execution of this algorithm, if tracing is on.
+ *
+ * @return The trace string.
+ */
+ vespalib::string getTrace() const;
+
+ /**
+ * Set to true to collect a textual trace from the computation, which can be retrieved using {@link #getTrace}.
+ *
+ * @param tracing Whether or not to trace.
+ * @return This, to allow chaining.
+ */
+ Computer &setTracing(bool tracing) {
+ _tracing = tracing;
+ return *this;
+ }
+
+ /**
+ * Returns whether tracing is on.
+ *
+ * @return True if tracing is on.
+ */
+ bool isTracing() const { return _tracing; }
+
+ /**
+ * Returns the number of terms searching on this field.
+ *
+ * @return The number of terms.
+ */
+ uint32_t getNumQueryTerms() const {
+ return _queryTerms.size();
+ }
+
+ /**
+ * Returns the query term data for a specified term.
+ *
+ * @param The index of the term to return.
+ * @return The query term data.
+ */
+ const QueryTerm & getQueryTermData(int term) const {
+ return _queryTerms[term];
+ }
+
+ /**
+ * Returns the term match for a specified term.
+ *
+ * @param The index of the term match to return.
+ * @return The term match.
+ */
+ const search::fef::TermFieldMatchData *getQueryTermFieldMatch(int term) const {
+ return _queryTermFieldMatch[term];
+ }
+
+ /**
+ * Returns the total weight of all query terms.
+ *
+ * @return The total weight.
+ */
+ uint32_t getTotalTermWeight() const {
+ return _totalTermWeight;
+ }
+
+ /**
+ * Returns the total significance of all query terms.
+ *
+ * @return The total significance.
+ */
+ feature_t getTotalTermSignificance() const {
+ return _totalTermSignificance;
+ }
+
+ /**
+ * Returns a string representation of this computer.
+ *
+ * @return A string representation.
+ */
+ vespalib::string toString() const;
+
+ /**
+ * Returns the simple metrics computed while traversing the list of query terms in the constructor.
+ *
+ * @return the simple metrics object.
+ */
+ const SimpleMetrics & getSimpleMetrics() const {
+ return _simpleMetrics;
+ }
+
+
+private:
+ /**
+ * Finds segment candidates and explores them until we have the best segmentation history of the entire query.
+ */
+ void exploreSegments();
+
+ /**
+ * Find correspondences from a segment starting point startI.
+ *
+ * @param segment The segment starting point.
+ * @return True if a segment was found, false if none could be found.
+ */
+ bool findAlternativeSegmentFrom(SegmentStart *segment);
+
+ /**
+ * A match occured within a segment, report this to the metric as appropriate.
+ *
+ * @param i The current query term index.
+ * @param j The current field term index.
+ * @param previousJ The previous field term index.
+ * @param previousI The previous query term index.
+ */
+ void inSegment(int i, int j, int previousJ, int previousI);
+
+ /**
+ * Returns whether this segment was accepted as a starting point.
+ *
+ * @param i The current query term index.
+ * @param j The current field term index.
+ * @param previousJ The previous field term index.
+ * @return Whether this segment was accepted or not.
+ */
+ bool segmentStart(int i, int j, int previousJ);
+
+ /**
+ * Registers an end of a segment.
+ *
+ * @param i The i at which this segment ends.
+ * @param j The j at which this segment ends.
+ */
+ void segmentEnd(int i, int j);
+
+ /**
+ * Returns the next open segment to explore, or null if no more segments exists or should be explored.
+ *
+ * @param The i to start searching from.
+ * @return The next open segment, or null.
+ */
+ SegmentStart *findOpenSegment(uint32_t startI);
+
+ /**
+ * Returns the last segment start point in the internal list.
+ *
+ * @return The last segment start.
+ */
+ SegmentStart *findLastStartPoint();
+
+ /**
+ * Counts all occurrences of terms of the query in the field and set those metrics.
+ *
+ * @param metrics The metrics to update.
+ */
+ void setOccurrenceCounts(Metrics &metrics);
+
+ void handleError(uint32_t fieldPos, uint32_t docId) const __attribute__((noinline));
+
+
+private:
+ typedef std::shared_ptr<search::BitVector> BitVectorPtr;
+ typedef std::vector<const search::fef::TermFieldMatchData *> TermFieldMatchDataVector;
+
+ struct SegmentData {
+ SegmentData() : segment(), valid(false) {}
+ SegmentData(const SegmentStart::SP & ss, bool v = false) : segment(ss), valid(v) {}
+ SegmentStart::SP segment;
+ bool valid;
+ };
+
+ struct BitVectorData {
+ BitVectorData() : bitvector(0), valid(false) {}
+ search::AllocatedBitVector bitvector;
+ bool valid;
+ };
+
+ // per query
+ const search::fef::PhraseSplitter & _splitter;
+ uint32_t _fieldId;
+ Params _params;
+ bool _tracing;
+ std::vector<vespalib::string> _trace;
+ bool _useCachedHits;
+
+ QueryTermVector _queryTerms;
+ TermFieldMatchDataVector _queryTermFieldMatch;
+ uint32_t _totalTermWeight;
+ feature_t _totalTermSignificance;
+
+ // per docid
+ const search::fef::MatchData * _match;
+ uint32_t _fieldLength;
+ Metrics _currentMetrics; // The metrics of the currently explored segmentation.
+ Metrics _finalMetrics; // The final metrics, null during and before metric computation.
+ SimpleMetrics _simpleMetrics; // The metrics used to compute simple features.
+ std::vector<SegmentData> _segments; // Known segment starting points.
+ uint32_t _alternativeSegmentationsTried;
+ std::vector<BitVectorData> _cachedHits;
+};
+
+} // fieldmatch
+} // features
+} // search
+
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/metrics.cpp b/searchlib/src/vespa/searchlib/features/fieldmatch/metrics.cpp
new file mode 100644
index 00000000000..89da22f079e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldmatch/metrics.cpp
@@ -0,0 +1,344 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.fieldmatch.metrics");
+
+#include <algorithm>
+#include <math.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include "computer.h"
+#include "metrics.h"
+
+namespace search {
+namespace features {
+namespace fieldmatch {
+
+Metrics::Metrics(const Computer *source) :
+ _source(source),
+ _complete(false),
+ _outOfOrder(0),
+ _segments(0),
+ _gaps(0),
+ _gapLength(0),
+ _longestSequence(1),
+ _head(-1),
+ _tail(-1),
+ _matches(0),
+ _proximity(0),
+ _unweightedProximity(0),
+ _segmentDistance(0),
+ _pairs(0),
+ _weight(0),
+ _significance(0),
+ _occurrence(0), // default not given
+ _weightedOccurrence(0), // default not given
+ _absoluteOccurrence(0), // default not given
+ _weightedAbsoluteOccurrence(0), // default not given
+ _significantOccurrence(0), // default not given
+ _currentSequence(0),
+ _segmentStarts(),
+ _queryLength(_source->getNumQueryTerms())
+{
+ _segmentStarts.reserve(100);
+}
+
+Metrics::Metrics(const Metrics &rhs) :
+ _source(rhs._source),
+ _complete(rhs._complete),
+ _outOfOrder(rhs._outOfOrder),
+ _segments(rhs._segments),
+ _gaps(rhs._gaps),
+ _gapLength(rhs._gapLength),
+ _longestSequence(rhs._longestSequence),
+ _head(rhs._head),
+ _tail(rhs._tail),
+ _matches(rhs._matches),
+ _proximity(rhs._proximity),
+ _unweightedProximity(rhs._unweightedProximity),
+ _segmentDistance(rhs._segmentDistance),
+ _pairs(rhs._pairs),
+ _weight(rhs._weight),
+ _significance(rhs._significance),
+ _occurrence(rhs._occurrence),
+ _weightedOccurrence(rhs._weightedOccurrence),
+ _absoluteOccurrence(rhs._absoluteOccurrence),
+ _weightedAbsoluteOccurrence(rhs._weightedAbsoluteOccurrence),
+ _significantOccurrence(rhs._significantOccurrence),
+ _currentSequence(rhs._currentSequence),
+ _segmentStarts(rhs._segmentStarts),
+ _queryLength(rhs._queryLength)
+{
+}
+
+Metrics &
+Metrics::operator=(const Metrics & rhs)
+{
+ if (this != &rhs) {
+ _source = rhs._source;
+ _complete = rhs._complete;
+ _outOfOrder = rhs._outOfOrder;
+ _segments = rhs._segments;
+ _gaps = rhs._gaps;
+ _gapLength = rhs._gapLength;
+ _longestSequence = rhs._longestSequence;
+ _head = rhs._head;
+ _tail = rhs._tail;
+ _matches = rhs._matches;
+ _proximity = rhs._proximity;
+ _unweightedProximity = rhs._unweightedProximity;
+ _segmentDistance = rhs._segmentDistance;
+ _pairs = rhs._pairs;
+ _weight = rhs._weight;
+ _significance = rhs._significance;
+ _occurrence = rhs._occurrence;
+ _weightedOccurrence = rhs._weightedOccurrence;
+ _absoluteOccurrence = rhs._absoluteOccurrence;
+ _weightedAbsoluteOccurrence = rhs._weightedAbsoluteOccurrence;
+ _significantOccurrence = rhs._significantOccurrence;
+ _currentSequence = rhs._currentSequence;
+ _segmentStarts = rhs._segmentStarts;
+ _queryLength = rhs._queryLength;
+ }
+ return *this;
+}
+
+void
+Metrics::reset()
+{
+ _complete = false;
+ _outOfOrder = 0;
+ _segments = 0;
+ _gaps = 0;
+ _gapLength = 0;
+ _longestSequence = 1;
+ _head = -1;
+ _tail = -1;
+ _matches = 0;
+ _proximity = 0;
+ _unweightedProximity = 0;
+ _segmentDistance = 0;
+ _pairs = 0;
+ _weight = 0;
+ _significance = 0;
+ _occurrence = 0;
+ _weightedOccurrence = 0;
+ _absoluteOccurrence = 0;
+ _weightedAbsoluteOccurrence = 0;
+ _significantOccurrence = 0;
+ _currentSequence = 0;
+ _segmentStarts.clear();
+ _queryLength = _source->getNumQueryTerms();
+}
+
+feature_t
+Metrics::getQueryCompleteness() const
+{
+ return _queryLength > 0 ? (feature_t)_matches / _queryLength : 0.0f;
+}
+
+feature_t
+Metrics::getFieldCompleteness() const
+{
+ if (_source->getFieldLength() == 0) {
+ return 0; // default
+ }
+ return (feature_t)_matches / _source->getFieldLength();
+}
+
+feature_t
+Metrics::getCompleteness() const
+{
+ feature_t importance = _source->getParams().getFieldCompletenessImportance();
+ return getQueryCompleteness() * (1 - importance) + (importance * getFieldCompleteness());
+}
+
+feature_t
+Metrics::getRelatedness() const
+{
+ if (_matches == 0) {
+ return 0;
+ }
+ else if (_matches == 1) {
+ return 1;
+ }
+ else {
+ return 1 - (feature_t)(_segments - 1) / (_matches - 1);
+ }
+}
+
+feature_t
+Metrics::getSegmentProximity() const
+{
+ if (_source->getFieldLength() == 0) {
+ return 0; // default
+ }
+ return _matches == 0 ? 0.0f : 1 - (feature_t)_segmentDistance / _source->getFieldLength();
+}
+
+feature_t
+Metrics::getProximity() const
+{
+ feature_t totalConnectedness = 0;
+ for (uint32_t i = 1; i < _queryLength; i++) {
+ totalConnectedness += std::max(0.1, _source->getQueryTermData(i).connectedness());
+ }
+ feature_t averageConnectedness = 0.1f;
+ if (_queryLength > 1) {
+ averageConnectedness = totalConnectedness / (_queryLength - 1);
+ }
+ return getAbsoluteProximity() / averageConnectedness;
+}
+
+feature_t
+Metrics::getEarliness() const
+{
+ if (_matches == 0) {
+ return 0; // covers (field.length == 0) too
+ }
+ else if (_source->getFieldLength() == 1) {
+ return 1;
+ }
+ else {
+ return 1 - (feature_t)_head / (std::max(6u, _source->getFieldLength()) - 1);
+ }
+}
+
+feature_t
+Metrics::getMatch() const
+{
+ feature_t proximityCompletenessImportance = _source->getParams().getProximityCompletenessImportance();
+ feature_t earlinessImportance = _source->getParams().getEarlinessImportance();
+ feature_t relatednessImportance = _source->getParams().getRelatednessImportance();
+ feature_t segmentProximityImportance = _source->getParams().getSegmentProximityImportance();
+ feature_t occurrenceImportance = _source->getParams().getOccurrenceImportance();
+
+ feature_t scaledRelatedness = 1 - relatednessImportance + relatednessImportance * getRelatedness();
+
+ return
+ (proximityCompletenessImportance * scaledRelatedness * getProximity() * getCompleteness()*getCompleteness()
+ + earlinessImportance * getEarliness()
+ + segmentProximityImportance * getSegmentProximity()
+ + occurrenceImportance * getOccurrence()) /
+ (proximityCompletenessImportance + earlinessImportance + segmentProximityImportance + occurrenceImportance);
+}
+
+feature_t
+Metrics::getSegmentationScore() const
+{
+ feature_t retval = 0.0f;
+ if (_segments > 0) {
+ retval = getAbsoluteProximity() / (_segments * _segments);
+ }
+ return retval;
+}
+
+void
+Metrics::onMatch(uint32_t i)
+{
+ if (_matches >= _source->getFieldLength()) {
+ return;
+ }
+ _matches++;
+ _weight += _source->getTotalTermWeight() > 0 ?
+ (feature_t)_source->getQueryTermData(i).termData()->getWeight().percent() / _source->getTotalTermWeight() : 0.0f;
+ _significance += _source->getTotalTermSignificance() > 0.0f ?
+ _source->getQueryTermData(i).significance() / _source->getTotalTermSignificance() : 0.0f;
+}
+
+void
+Metrics::onSequenceStart(uint32_t j)
+{
+ if (_head == -1 || (int)j < _head) {
+ _head = j;
+ }
+ _currentSequence = 1;
+}
+
+void
+Metrics::onSequenceEnd(uint32_t j)
+{
+ int sequenceTail = _source->getFieldLength() - j - 1;
+ if (_tail == -1 || sequenceTail < _tail) {
+ _tail = sequenceTail;
+ }
+ if (_currentSequence > _longestSequence) {
+ _longestSequence = _currentSequence;
+ }
+ _currentSequence = 0;
+}
+
+void
+Metrics::onComplete()
+{
+ if (_segmentStarts.size() <= 1) {
+ _segmentDistance = 0;
+ }
+ else {
+ std::sort(_segmentStarts.begin(), _segmentStarts.end());
+ for (uint32_t i = 1; i < _segmentStarts.size(); i++) {
+ _segmentDistance += _segmentStarts[i] - _segmentStarts[i - 1] + 1;
+ }
+ }
+ if (_head == -1) {
+ _head = 0;
+ }
+ if (_tail == -1) {
+ _tail = 0;
+ }
+}
+
+void
+Metrics::onPair(uint32_t i, uint32_t j, uint32_t previousJ)
+{
+ int distance = j - previousJ - 1;
+ if (distance < 0) {
+ distance++; // discontinuity if two letters are in the same position
+ }
+ if (((unsigned int)std::abs(distance)) > _source->getParams().getProximityLimit()) {
+ return; // no contribution
+ }
+ feature_t pairProximity = _source->getParams().getProximityTable()[distance +
+ _source->getParams().getProximityLimit()];
+ _unweightedProximity += pairProximity;
+
+ feature_t connectedness = _source->getQueryTermData(i).connectedness();
+ _proximity += pow(pairProximity, connectedness / 0.1) * std::max(0.1, connectedness);
+ _pairs++;
+}
+
+void
+Metrics::onInSequence(uint32_t, uint32_t, uint32_t)
+{
+ _currentSequence++;
+}
+
+void
+Metrics::onInSegmentGap(uint32_t, uint32_t j, uint32_t previousJ)
+{
+ _gaps++;
+ if (j > previousJ) {
+ _gapLength += abs((int)j - (int)previousJ) - 1; // gap length may be 0 if the gap was in the query
+ }
+ else {
+ _outOfOrder++;
+ _gapLength += abs((int)j - (int)previousJ);
+ }
+}
+
+void
+Metrics::onNewSegment(uint32_t, uint32_t j, uint32_t)
+{
+ _segments++;
+ _segmentStarts.push_back(j);
+}
+
+vespalib::string
+Metrics::toString() const
+{
+ return vespalib::make_string("Metrics(match %f)", getMatch());
+}
+
+
+} // fieldmatch
+} // features
+} // search
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/metrics.h b/searchlib/src/vespa/searchlib/features/fieldmatch/metrics.h
new file mode 100644
index 00000000000..6b826f09e57
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldmatch/metrics.h
@@ -0,0 +1,563 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vector>
+#include <vespa/searchlib/common/feature.h>
+
+namespace search {
+namespace features {
+namespace fieldmatch {
+
+class Computer;
+
+/**
+ * The collection of metrics calculated by the string match metric calculator.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ * @version $Id$
+ */
+class Metrics {
+public:
+ /**
+ * Convenience typedefs.
+ */
+ typedef std::unique_ptr<Metrics> UP;
+ typedef std::shared_ptr<Metrics> SP;
+
+public:
+ /**
+ * Constructs a new metrics object.
+ *
+ * @param source The source of this.
+ */
+ Metrics(const Computer *source);
+
+ /**
+ * Implements the copy constructor.
+ *
+ * @param rhs The metrics to copy.
+ */
+ Metrics(const Metrics &rhs);
+
+ /**
+ * Implements the assignment operator.
+ */
+ Metrics & operator=(const Metrics & rhs);
+
+ /**
+ * Resets this object.
+ */
+ void reset();
+
+ /**
+ * Are these metrics representing a complete match.
+ *
+ * @return Whether or not this represents a complete match.
+ */
+ bool isComplete() const {
+ return _complete;
+ }
+
+ /**
+ * Sets whether or not these metrics represent a complete match.
+ *
+ * @param complete Whether or not this represents a complete match.
+ * @return This, to allow chaining.
+ */
+ Metrics &setComplete(bool complete) {
+ _complete = complete;
+ return *this;
+ }
+
+ /**
+ * Returns the segment start points.
+ *
+ * @return The start point list.
+ */
+ std::vector<uint32_t> &getSegmentStarts() {
+ return _segmentStarts;
+ }
+
+ /**
+ * Returns the total number of out of order token sequences within field segments.
+ *
+ * @return The number of tokens.
+ */
+ uint32_t getOutOfOrder() const {
+ return _outOfOrder;
+ }
+
+ /**
+ * Returns the number of field text segments which are needed to match the query as completely as possible.
+ *
+ * @return The number of segments.
+ */
+ uint32_t getSegments() const {
+ return _segments;
+ }
+
+ /**
+ * Returns the total number of position jumps (backward or forward) within document segments.
+ *
+ * @return The number of position jumps.
+ */
+ uint32_t getGaps() const {
+ return _gaps;
+ }
+
+ /**
+ * Returns the summed size of all gaps within segments.
+ *
+ * @return The summed size.
+ */
+ uint32_t getGapLength() const {
+ return _gapLength;
+ }
+
+ /**
+ * Returns the size of the longest matched continuous, in-order sequence in the document.
+ *
+ * @return The size of the sequence.
+ */
+ uint32_t getLongestSequence() const {
+ return _longestSequence;
+ }
+
+ /**
+ * Returns the number of tokens in the field preceding the start of the first matched segment.
+ *
+ * @return The number of tokens.
+ */
+ int getHead() const {
+ return _head;
+ }
+
+ /**
+ * Returns the number of tokens in the field following the end of the last matched segment.
+ *
+ * @return The number of tokens.
+ */
+ int getTail() const {
+ return _tail;
+ }
+
+ /**
+ * Returns the number of query terms which was matched in this field.
+ *
+ * @return The number of matched terms.
+ */
+ uint32_t getMatches() const {
+ return _matches;
+ }
+
+ /**
+ * Returns the number of in-segment token pairs.
+ *
+ * @return The number of token pairs.
+ */
+ uint32_t getPairs() const {
+ return _pairs;
+ }
+
+ /**
+ * Returns the normalized proximity of the matched terms, weighted by the connectedness of the query terms. This
+ * number is 0.1 if all the matched terms are and have default or lower connectedness, close to 1 if they are
+ * following in sequence and have a high connectedness, and close to 0 if they are far from each other in the
+ * segment or out of order.
+ *
+ * @return The proximity.
+ */
+ feature_t getAbsoluteProximity() const {
+ return _pairs < 1 ? 0.1f : _proximity / _pairs;
+ }
+
+ /**
+ * Returns the normalized proximity of the matched terms, not taking term connectedness into account. This number
+ * is close to 1 if all the matched terms are following each other in sequence, and close to 0 if they are far from
+ * each other or out of order
+ *
+ * @return The proximity.
+ */
+ feature_t getUnweightedProximity() const {
+ return _pairs < 1 ? 1.0f : _unweightedProximity / _pairs;
+ }
+
+ /**
+ * Returns the sum of the distance between all segments making up a match to the query, measured as the sum of the
+ * number of token positions separating the <i>start</i> of each field adjacent segment.
+ *
+ * @return The sum distance.
+ */
+ feature_t getSegmentDistance() const {
+ return _segmentDistance;
+ }
+
+ /**
+ * <p>Returns the normalized weight of this match relative to the whole query: The sum of the weights of all
+ * <i>matched</i> terms/the sum of the weights of all <i>query</i> terms If all the query terms were matched, this
+ * is 1. If no terms were matched, or these matches has weight zero, this is 0.</p>
+ *
+ * <p>As the sum of this number over all the terms of the query is always 1, sums over all fields of normalized rank
+ * features for each field multiplied by this number for the same field will produce a normalized number.</p>
+ *
+ * <p>Note that this scales with the number of matched query terms in the field. If you want a component which does
+ * not, divide by matches.</p>
+ *
+ * @return The normalized weight.
+ */
+ feature_t getWeight() const {
+ return _weight;
+ }
+
+ /**
+ * <p>Returns the normalized term significance (1-frequency) of the terms of this match relative to the whole query:
+ * The sum of the significance of all <i>matched</i> terms/the sum of the significance of all <i>query</i> terms If
+ * all the query terms were matched, this is 1. If no terms were matched, or if the significance of all the matched
+ * terms is zero (they are present in all (possible) documents), this number is zero.</p>
+ *
+ * <p>As the sum of this number over all the terms of the query is always 1, sums over all fields of normalized rank
+ * features for each field multiplied by this number for the same field will produce a normalized number.</p>
+ *
+ * <p>Note that this scales with the number of matched query terms in the field. If you want a component which does
+ * not, divide by matches.</p>
+ *
+ * @return The normalized significance.
+ */
+ feature_t getSignificance() const {
+ return _significance;
+ }
+
+ /**
+ * <p>Returns a normalized measure of the number of occurrence of the terms of the query. This number is 1 if there
+ * are many occurences of the query terms <i>in absolute terms, or relative to the total content of the field</i>,
+ * and 0 if there are none.</p>
+ *
+ * <p>This is suitable for occurence in fields containing regular text.</p>
+ *
+ * @return The normalized number of occurences.
+ */
+ feature_t getOccurrence() const {
+ return _occurrence;
+ }
+
+ /**
+ * <p>Returns a normalized measure of the number of occurrence of the terms of the query:
+ *
+ * <code>sum over all query terms(min(number of occurences of the term, maxOccurrences)) / (query term count *
+ * 100)</code>
+ *
+ * <p>This number is 1 if there are many occurrences of the query terms, and 0 if there are none. This number does
+ * not take the actual length of the field into account, so it is suitable for uses of occurrence to denote
+ * importance across multiple terms.</p>
+ *
+ * @return The normalized number of occurences.
+ */
+ feature_t getAbsoluteOccurrence() const {
+ return _absoluteOccurrence;
+ }
+
+ /**
+ * <p>Returns a normalized measure of the number of occurrence of the terms of the query, weighted by term weight.
+ * This number is close to 1 if there are many occurrences of highly weighted query terms, in absolute terms, or
+ * relative to the total content of the field, and 0 if there are none.</p>
+ *
+ * @return The normalized measure of weighted occurences.
+ */
+ feature_t getWeightedOccurrence() const {
+ return _weightedOccurrence;
+ }
+
+ /**
+ * <p>Returns a normalized measure of the number of occurrence of the terms of the query, taking weights into
+ * account so that occurrences of higher weighted query terms has more impact than lower weighted terms.</p>
+ *
+ * <p>This number is 1 if there are many occurrences of the highly weighted terms, and 0 if there are none. This
+ * number does not take the actual length of the field into account, so it is suitable for uses of occurrence to
+ * denote importance across multiple terms.</p>
+ *
+ * @return The normalized measure of weighted occurences.
+ */
+ feature_t getWeightedAbsoluteOccurrence() const {
+ return _weightedAbsoluteOccurrence;
+ }
+
+ /**
+ * <p>Returns a normalized measure of the number of occurrence of the terms of the query <i>in absolute terms, or
+ * relative to the total content of the field</i>, weighted by term significance.
+ *
+ * <p>This number is 1 if there are many occurrences of the highly significant terms, and 0 if there are none.</p>
+ *
+ * @return The normalized measure of occurences, weighted by significance.
+ */
+ feature_t getSignificantOccurrence() const {
+ return _significantOccurrence;
+ }
+
+ /**
+ * The ratio of query tokens which was matched in the field: <code>matches/queryLength</code>.
+ *
+ * @return The query completeness.
+ */
+ feature_t getQueryCompleteness() const;
+
+ /**
+ * The ratio of query tokens which was matched in the field: <code>matches/fieldLength</code>.
+ *
+ * @return The field completeness.
+ */
+ feature_t getFieldCompleteness() const;
+
+ /**
+ * Total completeness, where field completeness is more important: <code>queryCompleteness * ( 1 -
+ * fieldCompletenessImportancy + fieldCompletenessImportancy * fieldCompleteness )</code>
+ *
+ * @return The total completeness.
+ */
+ feature_t getCompleteness() const;
+
+ /**
+ * Returns how well the order of the terms agreed in segments: <code>1-outOfOrder/pairs</code>.
+ *
+ * @return The orderness of terms.
+ */
+ feature_t getOrderness() const {
+ return _pairs < 1 ? 1.0f : 1 - (feature_t)_outOfOrder / _pairs;
+ }
+
+ /**
+ * Returns the degree to which different terms are related (occurring in the same segment):
+ * <code>1-segments/(matches-1)</code>.
+ *
+ * @return The relatedness of terms.
+ */
+ feature_t getRelatedness() const;
+
+ /**
+ * Returns <code>longestSequence/matches</code>
+ *
+ * @return The longest sequence ratio.
+ */
+ feature_t getLongestSequenceRatio() const {
+ return _matches == 0 ? 0.0f : (feature_t)_longestSequence / _matches;
+ }
+
+ /**
+ * Returns the closeness of the segments in the field: <code>1-segmentDistance/fieldLength</code>.
+ *
+ * @return The segment proximity.
+ */
+ feature_t getSegmentProximity() const;
+
+ /**
+ * Returns a value which is close to 1 when matched terms are close and close to zero when they are far apart in the
+ * segment. Relatively more connected terms influence this value more. This is absoluteProximity/average
+ * connectedness.
+ *
+ * @return The matched term proximity.
+ */
+ feature_t getProximity() const;
+
+ /**
+ * <p>Returns the average of significance and weight.</p>
+ *
+ * <p>As the sum of this number over all the terms of the query is always 1, sums over all fields of normalized rank
+ * features for each field multiplied by this number for the same field will produce a normalized number.</p>
+ *
+ * <p>Note that this scales with the number of matched query terms in the field. If you want a component which does
+ * not, divide by matches.</p>
+ *
+ * @return The importance.
+ */
+ feature_t getImportance() const {
+ return (getSignificance() + getWeight()) / 2;
+ }
+
+ /**
+ * A normalized measure of how early the first segment occurs in this field:
+ * <code>1-(head+1)/max(6,field.length)</code>.
+ *
+ * @return The earliness of the first segment.
+ */
+ feature_t getEarliness() const;
+
+ /**
+ * <p>A ready-to-use aggregate match score. Use this if you don't have time to find a better application specific
+ * aggregate score of the fine grained match metrics.</p>
+ *
+ * <p>The current forumla is
+ *
+ * <code> ( proximityCompletenessImportance * (1-relatednessImportance + relatednessImportance*relatedness)
+ * proximity * completeness^2 + earlinessImportance * earliness + segmentProximityImportance * segmentProximity ) /
+ * (proximityCompletenessImportance + earlinessImportance + relatednessImportance)</code>
+ *
+ * but this is subject to change (i.e improvement) at any time. </p>
+ *
+ * <p>Weight and significance are not taken into account because this is mean to capture tha quality of the match in
+ * this field, while those measures relate this match to matches in other fields. This number can be multiplied with
+ * those values when combining with other field match scores.</p>
+ *
+ * @return The match score.
+ */
+ feature_t getMatch() const;
+
+ /**
+ * <p>The metric use to select the best segments during execution of the string match metric algoritm.</p>
+ *
+ * <p>This metric, and any metric it dependends on, must be correct each time a segment is completed, not only when
+ * the metrics are complete, because this metric is used to choose segments during calculation.</p>
+ *
+ * @return The score of the segmentation.
+ */
+ feature_t getSegmentationScore() const;
+
+ /**
+ * Called once for every match.
+ *
+ * @param i The index of the matched query term.
+ */
+ void onMatch(uint32_t i);
+
+
+ /**
+ * Called once per sequence, when the sequence starts.
+ *
+ * @param j Sequence starts at this position.
+ */
+ void onSequenceStart(uint32_t j);
+
+ /**
+ * Called once per sequence when the sequence ends.
+ *
+ * @param j Sequence ends at this position.
+ */
+ void onSequenceEnd(uint32_t j) ;
+
+ /**
+ * Called once when this value is calculated, before onComplete.
+ *
+ * @param occurence The new occurence value.
+ */
+ void setOccurrence(feature_t occurrence) {
+ _occurrence = occurrence;
+ }
+
+ /**
+ * Called once when this value is calculated, before onComplete.
+ *
+ * @param weightedOccurence The new occurence weight.
+ */
+ void setWeightedOccurrence(feature_t weightedOccurrence) {
+ _weightedOccurrence = weightedOccurrence;
+ }
+
+ /**
+ * Called once when this value is calculated, before onComplete.
+ *
+ * @param absoluteOccurence The new absolute occurence value.
+ */
+ void setAbsoluteOccurrence(feature_t absoluteOccurrence) {
+ _absoluteOccurrence = absoluteOccurrence;
+ }
+
+ /**
+ * Called once when this value is calculated, before onComplete.
+ *
+ * @param weightedAbsoluteOccurence The new absolute occurence weight.
+ */
+ void setWeightedAbsoluteOccurrence(feature_t weightedAbsoluteOccurrence) {
+ _weightedAbsoluteOccurrence = weightedAbsoluteOccurrence;
+ }
+
+ /**
+ * Called once when this value is calculated, before onComplete.
+ *
+ * @param significantOccurence The new significant occurence value.
+ */
+ void setSignificantOccurrence(feature_t significantOccurrence) {
+ _significantOccurrence = significantOccurrence;
+ }
+
+ /**
+ * Called once when matching is complete.
+ */
+ void onComplete();
+
+ /**
+ * Called when <i>any</i> pair is encountered.
+ *
+ * @param i The query term matched.
+ * @param j The field term index.
+ * @param previousJ The end of the previous segment, or -1 if this is the first segment.
+ */
+ void onPair(uint32_t i, uint32_t j, uint32_t previousJ);
+
+ /**
+ * Called when an in-sequence pair is encountered.
+ *
+ * @param i The query term matched.
+ * @param j The field term index.
+ * @param previousJ The end of the previous segment, or -1 if this is the first segment.
+ */
+ void onInSequence(uint32_t i, uint32_t j, uint32_t previousJ);
+
+ /**
+ * Called when a gap (within a sequence) is encountered.
+ *
+ * @param i The query term matched.
+ * @param j The field term index.
+ * @param previousJ The end of the previous segment, or -1 if this is the first segment.
+ */
+ void onInSegmentGap(uint32_t i, uint32_t j, uint32_t previousJ);
+
+ /**
+ * Called when a new segment is started
+ *
+ * @param i The query term matched.
+ * @param j The field term index.
+ * @param previousJ The end of the previous segment, or -1 if this is the first segment.
+ * */
+ void onNewSegment(uint32_t i, uint32_t j, uint32_t previousJ);
+
+ /**
+ * Returns a string representation of this.
+ *
+ * @return A string representation.
+ */
+ vespalib::string toString() const;
+
+private:
+ const Computer *_source;
+ bool _complete;
+
+ // Metrics
+ uint32_t _outOfOrder;
+ uint32_t _segments;
+ uint32_t _gaps;
+ uint32_t _gapLength;
+ uint32_t _longestSequence;
+ int _head;
+ int _tail;
+ uint32_t _matches;
+ feature_t _proximity;
+ feature_t _unweightedProximity;
+ feature_t _segmentDistance;
+ uint32_t _pairs;
+ feature_t _weight;
+ feature_t _significance;
+ feature_t _occurrence;
+ feature_t _weightedOccurrence;
+ feature_t _absoluteOccurrence;
+ feature_t _weightedAbsoluteOccurrence;
+ feature_t _significantOccurrence;
+
+ // Temporary variables
+ uint32_t _currentSequence;
+ std::vector<uint32_t> _segmentStarts;
+ uint32_t _queryLength; // num terms searching this field
+};
+
+} // fieldmatch
+} // features
+} // search
+
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/params.cpp b/searchlib/src/vespa/searchlib/features/fieldmatch/params.cpp
new file mode 100644
index 00000000000..34a23bb3642
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldmatch/params.cpp
@@ -0,0 +1,45 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.fieldmatch.params");
+
+#include "params.h"
+
+namespace search {
+namespace features {
+namespace fieldmatch {
+
+Params::Params() :
+ _proximityLimit(10),
+ _maxAlternativeSegmentations(1000),
+ _maxOccurrences(100),
+ _proximityCompletenessImportance(0.9f),
+ _relatednessImportance(0.9f),
+ _earlinessImportance(0.05f),
+ _segmentProximityImportance(0.05f),
+ _occurrenceImportance(0.05f),
+ _fieldCompletenessImportance(0.05f),
+ _proximityTable()
+{
+ feature_t table[] = { 0.01f, 0.02f, 0.03f, 0.04f, 0.06f, 0.08f, 0.12f, 0.17f, 0.24f, 0.33f, 1,
+ 0.71f, 0.50f, 0.35f, 0.25f, 0.18f, 0.13f, 0.09f, 0.06f, 0.04f, 0.03f };
+ for (uint32_t i = 0; i < _proximityLimit * 2 + 1; ++i) {
+ _proximityTable.push_back(table[i]);
+ }
+}
+
+bool
+Params::valid()
+{
+ if (_proximityTable.size() != (_proximityLimit * 2 + 1)) {
+ LOG(error, "Proximity table length is invalid. Proximity limit is %d, but table has only %zd elements "
+ "(must be proximityLimit * 2 + 1).",
+ _proximityLimit, _proximityTable.size());
+ return false;
+ }
+ return true;
+}
+
+}
+}
+}
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/params.h b/searchlib/src/vespa/searchlib/features/fieldmatch/params.h
new file mode 100644
index 00000000000..f3ff7558971
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldmatch/params.h
@@ -0,0 +1,261 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vector>
+#include <vespa/searchlib/common/feature.h>
+
+namespace search {
+namespace features {
+namespace fieldmatch {
+
+/**
+ * The parameters to a string match metric calculator.
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ * @version $Id$
+ */
+class Params {
+public:
+ /**
+ * Creates a marcg metrics object initialized to the default values.
+ */
+ Params();
+
+ /**
+ * Returns whether or not this parameter object contains valid content. If it is NOT valid, a descriptive string
+ * will be logged for reference.
+ *
+ * @return Whether or not this object is valid.
+ */
+ bool valid();
+
+ /**
+ * Sets the number of tokens within which proximity matters. Default: 10
+ *
+ * @param proximityLimit The number of tokens.
+ * @param This, to allow chaining.
+ */
+ Params &setProximityLimit(uint32_t proximityLimit) {
+ _proximityLimit = proximityLimit;
+ return *this;
+ }
+
+ /**
+ * Returns the number of tokens within which proximity matters. Default: 10
+ *
+ * @return The number of tokens.
+ */
+ uint32_t getProximityLimit() const {
+ return _proximityLimit;
+ }
+
+ /**
+ * Sets the proximity table deciding the importance of separations of various distances, The table must have size
+ * proximityLimit*2+1, where the first half is for reverse direction distances. The table must only contain values
+ * between 0 and 1, where 1 is "perfect" and 0 is "worst".
+ *
+ * @param proximityTable The proximity table.
+ * @return This, to allow chaining.
+ */
+ Params &setProximityTable(const std::vector<feature_t> &proximityTable) {
+ _proximityTable = proximityTable;
+ return *this;
+ }
+
+ /**
+ * Returns the current proxmity table. The default table is calculated by <code>1/2^(n/2)</code> on the right order
+ * side, and <code>1/2^(n/2) /3</code> on the reverse order side where n is the distance between the tokens.
+ *
+ * @return The proximity table.
+ */
+ const std::vector<feature_t> &getProximityTable() const {
+ return _proximityTable;
+ }
+
+ /**
+ * Returns the maximal number of <i>alternative</i> segmentations allowed in addition to the first one found.
+ * Default is 10000. This will prefer to not consider iterations on segments that are far out in the field, and
+ * which starts late in the query.
+ *
+ * @return The max number of alternative iterations.
+ */
+ uint32_t getMaxAlternativeSegmentations() const {
+ return _maxAlternativeSegmentations;
+ }
+
+ /**
+ * Sets the maximal number of alternative segmentations allowed in addition to the first one found.
+ *
+ * @param maxAlternativeSegmentations The max number of alternative iterations.
+ * @return This, to allow chaining.
+ */
+ Params &setMaxAlternativeSegmentations(uint32_t maxAlternativeSegmentations) {
+ _maxAlternativeSegmentations = maxAlternativeSegmentations;
+ return *this;
+ }
+
+ /**
+ * Returns the number of occurrences each word is normalized against. This should be set as the number above which
+ * additional occurrences of the term has no real significance. The default is 100.
+ *
+ * @return The max number of occurences.
+ */
+ uint32_t getMaxOccurrences() const {
+ return _maxOccurrences;
+ }
+
+ /**
+ * Sets the number occurences each word is normalized against.
+ *
+ * @params maxOccurences The max number of occurences.
+ * @return This, to allow chaining.
+ */
+ Params &setMaxOccurrences(uint32_t maxOccurrences) {
+ _maxOccurrences = maxOccurrences;
+ return *this;
+ }
+
+ /**
+ * Returns a number between 0 and 1 which determines the importance of field completeness in relation to query
+ * completeness in the <code>match</code> and <code>completeness</code> metrics. Default is 0.05
+ *
+ * @return The importance of field completeness.
+ */
+ feature_t getFieldCompletenessImportance() const {
+ return _fieldCompletenessImportance;
+ }
+
+ /**
+ * Sets the importance of this field's completeness.
+ *
+ * @param fieldCompletenessImportance The importance of field completeness.
+ * @return This, to allow chaining.
+ */
+ Params &setFieldCompletenessImportance(feature_t fieldCompletenessImportance) {
+ _fieldCompletenessImportance = fieldCompletenessImportance;
+ return *this;
+ }
+
+ /**
+ * Returns the importance of the match having high proximity and being complete, relative to
+ * segmentProximityImportance, occurrenceImportance and earlinessImportance in the <code>match</code>
+ * metric. Default: 0.9
+ *
+ * @return The importance of proximity AND completeness.
+ */
+ feature_t getProximityCompletenessImportance() const {
+ return _proximityCompletenessImportance;
+ }
+
+ /**
+ * Sets the importance of this fiel's proximity AND completeness.
+ *
+ * @param proximityCompletenessImportance The importance of proximity AND completeness.
+ * @return This, to allow chaining.
+ */
+ Params &setProximityCompletenessImportance(feature_t proximityCompletenessImportance) {
+ _proximityCompletenessImportance = proximityCompletenessImportance;
+ return *this;
+ }
+
+ /**
+ * Returns the importance of the match occuring early in the query, relative to segmentProximityImportance,
+ * occurrenceImportance and proximityCompletenessImportance in the <code>match</code> metric. Default: 0.05
+ *
+ * @return The importance of earliness.
+ */
+ feature_t getEarlinessImportance() const {
+ return _earlinessImportance;
+ }
+
+ /**
+ * Sets the importance of the match occuring early in the query.
+ *
+ * @param earlinessImportance The importance of earliness.
+ * @return This, to allow chaining.
+ */
+ Params &setEarlinessImportance(feature_t earlinessImportance) {
+ _earlinessImportance = earlinessImportance;
+ return *this;
+ }
+
+ /**
+ * Returns the importance of multiple segments being close to each other, relative to earlinessImportance,
+ * occurrenceImportance and proximityCompletenessImportance in the <code>match</code> metric. Default: 0.05
+ *
+ * @return The importance of segment proximity.
+ */
+ feature_t getSegmentProximityImportance() const {
+ return _segmentProximityImportance;
+ }
+
+ /**
+ * Sets the importance of multiple segments being close to each other.
+ *
+ * @param segmentProximityImportance The importance of segment proximity.
+ * @return This, to allow chaining.
+ */
+ Params &setSegmentProximityImportance(feature_t segmentProximityImportance) {
+ _segmentProximityImportance = segmentProximityImportance;
+ return *this;
+ }
+
+ /**
+ * Returns the importance of having many occurrences of the query terms, relative to earlinessImportance,
+ * segmentProximityImportance and proximityCompletenessImportance in the <code>match</code> metric. Default: 0.05
+ *
+ * @return The importance of many occurences.
+ */
+ feature_t getOccurrenceImportance() const {
+ return _occurrenceImportance;
+ }
+
+ /**
+ * Sets the importance of having many occurences of the query terms.
+ *
+ * @param occurenceImportance The importance of many occurences.
+ * @return This, to allow chaining.
+ */
+ Params &setOccurrenceImportance(feature_t occurrenceImportance) {
+ _occurrenceImportance = occurrenceImportance;
+ return *this;
+ }
+
+ /**
+ * Returns the normalized importance of relatedness used in the <code>match</code> metric. Default: 0.9
+ *
+ * @return The importance of relatedness.
+ */
+ feature_t getRelatednessImportance() const {
+ return _relatednessImportance;
+ }
+
+ /**
+ * Sets the normalized importance of relatedness used in the <code>match</code> metric.
+ *
+ * @param relatednessImportance The importance of relatedness.
+ * @return This, to allow chaining.
+ */
+ Params &setRelatednessImportance(feature_t relatednessImportance) {
+ _relatednessImportance = relatednessImportance;
+ return *this;
+ }
+
+private:
+ uint32_t _proximityLimit;
+ uint32_t _maxAlternativeSegmentations;
+ uint32_t _maxOccurrences;
+ feature_t _proximityCompletenessImportance;
+ feature_t _relatednessImportance;
+ feature_t _earlinessImportance;
+ feature_t _segmentProximityImportance;
+ feature_t _occurrenceImportance;
+ feature_t _fieldCompletenessImportance;
+ std::vector<feature_t> _proximityTable;
+};
+
+} // fieldmatch
+} // features
+} // search
+
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.cpp b/searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.cpp
new file mode 100644
index 00000000000..93be549bc1f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.cpp
@@ -0,0 +1,103 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.fieldmatch.segmentstart");
+
+#include <vespa/vespalib/util/stringfmt.h>
+#include "computer.h"
+#include "metrics.h"
+#include "segmentstart.h"
+
+namespace search {
+namespace features {
+namespace fieldmatch {
+
+SegmentStart::SegmentStart(Computer *owner, const Metrics & metrics, uint32_t previousJ, uint32_t i, uint32_t j) :
+ _owner(owner),
+ _metrics(metrics),
+ _i(i),
+ _skipI(0),
+ _previousJ(previousJ),
+ _semanticDistanceExplored(0),
+ _open(true)
+{
+ if (j < std::numeric_limits<uint32_t>::max()) {
+ exploredTo(j);
+ }
+}
+
+void
+SegmentStart::reset(const Metrics & metrics, uint32_t previousJ, uint32_t i, uint32_t j)
+{
+ _metrics = metrics;
+ _i = i;
+ _skipI = 0;
+ _previousJ = previousJ;
+ _semanticDistanceExplored = 0;
+ _open = true;
+ if (j < std::numeric_limits<uint32_t>::max()) {
+ exploredTo(j);
+ }
+}
+
+SegmentStart &
+SegmentStart::exploredTo(uint32_t j)
+{
+ _semanticDistanceExplored = _owner->fieldIndexToSemanticDistance(j, _previousJ) + 1;
+ return *this;
+}
+
+bool
+SegmentStart::offerHistory(int previousJ, const Metrics & metrics)
+{
+ if (metrics.getSegmentationScore() <= _metrics.getSegmentationScore()) {
+ if (_owner->isTracing()) {
+ _owner->trace(vespalib::make_string(" Rejected offered history [score %f, ending at %d] at %s.\n",
+ metrics.getSegmentationScore(),
+ previousJ,
+ toString().c_str()));
+ }
+ return false; // reject
+ }
+
+#if 0
+ // Starting over like this achieves higher correctness if the match metric is dependent on relative distance between
+ // segments but is more expensive
+ if (_previousJ != previousJ) {
+ semanticDistanceExplored = 0;
+ open = true;
+ }
+#endif
+
+ if (_owner->isTracing()) {
+ _owner->trace(vespalib::make_string(" Accepted offered history [score %f, ending at %d] at %s.\n",
+ metrics.getSegmentationScore(),
+ previousJ,
+ toString().c_str()));
+ }
+ _previousJ = previousJ;
+ _metrics = metrics; // take a copy of the given metrics
+ return true; // accept
+}
+
+vespalib::string
+SegmentStart::toString() {
+ if (_i == _owner->getNumQueryTerms()) {
+ return vespalib::make_string("Last segment: Complete match %f, previous j %d (%s).",
+ _metrics.getMatch(),
+ _previousJ,
+ _open ? "open" : "closed");
+ }
+ else {
+ return vespalib::make_string("Segment at %d: Match %f, previous j %d, explored to %d (%s).",
+ _i,
+ _metrics.getMatch(),
+ _previousJ,
+ _semanticDistanceExplored,
+ _open ? "open" : "closed");
+ }
+}
+
+} // fieldmatch
+} // features
+} // search
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.h b/searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.h
new file mode 100644
index 00000000000..8865b45a448
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.h
@@ -0,0 +1,186 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <math.h>
+#include <string>
+#include <limits>
+#include "metrics.h"
+
+namespace search {
+namespace features {
+namespace fieldmatch {
+
+/**
+ * <p>Information on segment start points stored temporarily during string match metric calculation.</p>
+ *
+ * <p>Given that we want to start a segment at i, this holdes the best known metrics up to i and the end of the previous
+ * segment. In addition it holds information on how far we have tried to look for alternative segments from this
+ * starting point (skipI and previousJ).</p>
+ *
+ * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a>
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ * @version $Id$
+ */
+class SegmentStart {
+public:
+ /**
+ * Convenience typedefs.
+ */
+ typedef std::unique_ptr<SegmentStart> UP;
+ typedef std::shared_ptr<SegmentStart> SP;
+
+public:
+ /**
+ * Creates a segment start point for any i position where the j is not known.
+ *
+ * @param owner The computar that pwns th1s.
+ * @param metrics The best known metric.
+ * @param previousJ The previous j.
+ * @param i The start position.
+ * @param j The end position.
+ */
+ SegmentStart(Computer *owner, const Metrics & metrics,
+ uint32_t previousJ = 0, uint32_t i = 0,
+ uint32_t j = std::numeric_limits<uint32_t>::max());
+
+ /**
+ * Resets this object.
+ *
+ * @param metrics The best known metric.
+ * @param previousJ The previous j.
+ * @param i The start position.
+ * @param j The end position.
+ */
+ void reset(const Metrics & metrics, uint32_t previousJ = 0, uint32_t i = 0,
+ uint32_t j = std::numeric_limits<uint32_t>::max());
+
+ /**
+ * Returns the current best metrics for this starting point
+ *
+ * @return The metrics.
+ */
+ const Metrics & getMetrics() const {
+ return _metrics;
+ }
+
+ /**
+ * Stores that we have explored to a certain j from the current previousJ.
+ *
+ * @param j The new position we have explored to.
+ * @return This, to allow chaining.
+ */
+ SegmentStart &exploredTo(uint32_t j);
+
+ /**
+ * Offers an alternative history leading up to this point, which is accepted and stored if it is better than the
+ * current history
+ *
+ * @param previousJ The previous j offered.
+ * @param metrics The offered metrics.
+ * @return Whether or not the new history was accepted.
+ */
+ bool offerHistory(int previousJ, const Metrics & metrics);
+
+ /**
+ * Returns whether there are still unexplored j's for this i.
+ *
+ * @return Whether or not there are unexplored j's.
+ */
+ bool isOpen() const {
+ return _open;
+ }
+
+ /**
+ * Sets whether there are still unexplored j's for this i.
+ *
+ * @param open Whehter or not there are unexplored j's.
+ * @return This, to allow chaining.
+ */
+ SegmentStart &setOpen(bool open) {
+ _open = open;
+ return *this;
+ }
+
+ /**
+ * Returns the i for which this is the possible segment starting points.
+ *
+ * @return The i value.
+ */
+ uint32_t getI() const {
+ return _i;
+ }
+
+ /**
+ * Returns the j ending the previous segmentation producing those best metrics.
+ *
+ * @return The previous j value.
+ */
+ uint32_t getPreviousJ() const {
+ return _previousJ;
+ }
+
+ /**
+ * Returns the semantic distance from the previous j which is explored so far, exclusive
+ * (meaning, if the value is 0, 0 is <i>not</i> explored yet)
+ *
+ * @return The distance explored.
+ */
+ uint32_t getSemanticDistanceExplored() const {
+ return _semanticDistanceExplored;
+ }
+
+ /**
+ * Sets the semantic distance from the previous j which is explored so far, exclusive.
+ *
+ * @param distance The distance explored.
+ * @return This, to allow chaining.
+ */
+ SegmentStart &setSemanticDistanceExplored(uint32_t distance) {
+ _semanticDistanceExplored = distance;
+ return *this;
+ }
+
+ /**
+ * Returns the position startI we should start at from this start point i. startI==i except when there are i's from
+ * this starting point which are not found anywhere in the field. In that case, startI==i+the number of terms
+ * following i which are known not to be present.
+ *
+ * @return The start i value.
+ */
+ uint32_t getStartI() const {
+ return _i + _skipI;
+ }
+
+ /**
+ * Increments the startI by one because we have discovered that the term at the current startI is not present in the
+ * field.
+ *
+ * @return This, to allow chaining.
+ */
+ SegmentStart &incrementStartI() {
+ _skipI++;
+ return *this;
+ }
+
+ /**
+ * Returns a string representation of this.
+ *
+ * @return A string representation.
+ */
+ vespalib::string toString();
+
+private:
+ Computer *_owner;
+ Metrics _metrics; // The best known metrics up to this starting point.
+
+ uint32_t _i; // The i for which this is the possible segment starting points.
+ uint32_t _skipI;
+ uint32_t _previousJ; // The j ending the previous segmentation producing those best metrics.
+ uint32_t _semanticDistanceExplored; // The semantic distance from the current previousJ which is already explored.
+ bool _open; // There are possibly more j's to try at this starting point.
+};
+
+} // fieldmatch
+} // features
+} // search
+
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.cpp b/searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.cpp
new file mode 100644
index 00000000000..6a6e1935ad3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.cpp
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.fieldmatch.simplemetrics");
+
+#include "simplemetrics.h"
+#include <vespa/vespalib/stllike/asciistream.h>
+
+namespace search {
+namespace features {
+namespace fieldmatch {
+
+SimpleMetrics::SimpleMetrics(const Params & params) :
+ _params(params),
+ _matches(0),
+ _matchesWithPosOcc(0),
+ _matchWithInvalidFieldLength(false),
+ _numTerms(0),
+ _matchedWeight(0),
+ _totalWeightInField(0),
+ _totalWeightInQuery(0)
+{
+}
+
+vespalib::string SimpleMetrics::toString() const
+{
+ vespalib::asciistream ss;
+ ss << "matches(" << _matches << "), matchedWithPosOcc(" << _matchesWithPosOcc << "), ";
+ ss << "matchWithInvalidFieldLength(" << (_matchWithInvalidFieldLength ? "true" : "false") << "), ";
+ ss << "numTerms(" << _numTerms << "), ";
+ ss << "matchedWeight(" << _matchedWeight << "), totalWeightInField(" << _totalWeightInField << "), ";
+ ss << "totalWeightInQuery(" << _totalWeightInQuery << ")";
+ return ss.str();
+}
+
+
+} // fieldmatch
+} // features
+} // search
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.h b/searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.h
new file mode 100644
index 00000000000..b2de310f080
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.h
@@ -0,0 +1,186 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/common/feature.h>
+#include <vespa/vespalib/stllike/string.h>
+#include "params.h"
+
+namespace search {
+namespace features {
+namespace fieldmatch {
+
+/**
+ * The collection of simple metrics calculated when traversing the query terms of the query environment.
+ **/
+class SimpleMetrics {
+private:
+ const Params & _params;
+ uint32_t _matches;
+ uint32_t _matchesWithPosOcc;
+ bool _matchWithInvalidFieldLength; // 0 or UNKNOWN_LENGTH
+ uint32_t _numTerms;
+ uint32_t _matchedWeight;
+ uint32_t _totalWeightInField;
+ uint32_t _totalWeightInQuery;
+
+public:
+ /**
+ * Constructs a new object.
+ **/
+ SimpleMetrics(const Params & params);
+
+ /**
+ * Resets the match data of this object.
+ **/
+ void resetMatchData() {
+ _matches = 0;
+ _matchesWithPosOcc = 0;
+ _matchWithInvalidFieldLength = false;
+ _matchedWeight = 0;
+ }
+
+ /**
+ * Registers a match in the field in question.
+ *
+ * @param weight The weight of the term matching.
+ **/
+ void addMatch(uint32_t weight) {
+ ++_matches;
+ _matchedWeight += weight;
+ }
+
+ /**
+ * Registers a match in the field in question.
+ * We have position information for this term match.
+ *
+ * @param weight The weight of the term matching.
+ **/
+ void addMatchWithPosOcc(uint32_t weight) {
+ addMatch(weight);
+ ++_matchesWithPosOcc;
+ }
+
+ /**
+ * Registers that a match has invalid field length.
+ **/
+ void hasMatchWithInvalidFieldLength() {
+ _matchWithInvalidFieldLength = true;
+ }
+
+
+ /**
+ * Registers a term that is searching in the field in question.
+ *
+ * @param weight The weight of the term.
+ **/
+ void addSearchedTerm(uint32_t weight) {
+ ++_numTerms;
+ _totalWeightInField += weight;
+ }
+
+ /**
+ * Registers a query term with the given weight.
+ *
+ * @param weight The weight of the term.
+ **/
+ void addQueryTerm(uint32_t weight) {
+ _totalWeightInQuery += weight;
+ }
+
+ /**
+ * Overrides the total weight for all query terms.
+ *
+ * @param weight The total weight.
+ **/
+ void setTotalWeightInQuery(uint32_t weight) {
+ _totalWeightInQuery = weight;
+ }
+
+ /**
+ * Returns the normalized score for this object.
+ * <code> total weight of matched terms in the field / total weight of searched terms in the field </code>
+ *
+ * @return The score.
+ **/
+ feature_t getScore() const {
+ return _totalWeightInField > 0 ? _matchedWeight / static_cast<feature_t>(_totalWeightInField) : 0;
+ }
+
+ /**
+ * Returns the completeness score for this object.
+ * <code> <code>queryCompleteness * ( 1 - fieldCompletenessImportance ) </code>
+ *
+ * @return The completeness.
+ **/
+ feature_t getCompleteness() const {
+ return getQueryCompleteness() * (1 - _params.getFieldCompletenessImportance());
+ }
+
+ /**
+ * Returns the query completeness score for this object.
+ * <code> total number of matched terms in the field / total number of searched terms in the field </code>
+ *
+ * @return The query completeness.
+ **/
+ feature_t getQueryCompleteness() const {
+ return _numTerms > 0 ? _matches / static_cast<feature_t>(_numTerms) : 0;
+ }
+
+ /**
+ * Returns the weight score for this object.
+ * <code> total weight of matched terms in the field / total weight of all query terms </code>
+ *
+ * @return The weight.
+ **/
+ feature_t getWeight() const {
+ return _totalWeightInQuery > 0 ? _matchedWeight / static_cast<feature_t>(_totalWeightInQuery) : 0;
+ }
+
+ /**
+ * Returns the number of matches in the field in question.
+ *
+ * @return The number of matches.
+ **/
+ uint32_t getMatches() const {
+ return _matches;
+ }
+
+ /**
+ * Returns the number of matches in the field in question with position information.
+ *
+ * @return The number of matches with position information.
+ **/
+ uint32_t getMatchesWithPosOcc() const {
+ return _matchesWithPosOcc;
+ }
+
+ /**
+ * Returns the number of degraded matches (no position information) in the field in question.
+ *
+ * @return The number of degraded matches.
+ **/
+ uint32_t getDegradedMatches() const {
+ return getMatches() - getMatchesWithPosOcc();
+ }
+
+ /**
+ * Returns whether we have a match in the field in question with invalid field length.
+ *
+ * @return Whether we have seen an invalid field length.
+ **/
+ bool getMatchWithInvalidFieldLength() const {
+ return _matchWithInvalidFieldLength;
+ }
+
+ /**
+ * Returns a string representation of this object.
+ *
+ * @return String representation.
+ **/
+ vespalib::string toString() const;
+};
+
+} // fieldmatch
+} // features
+} // search
+
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatchfeature.cpp b/searchlib/src/vespa/searchlib/features/fieldmatchfeature.cpp
new file mode 100644
index 00000000000..cec68d0c367
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldmatchfeature.cpp
@@ -0,0 +1,311 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.fieldmatchfeature");
+#include "fieldmatchfeature.h"
+#include "utils.h"
+
+#include <vespa/searchlib/features/fieldmatch/computer.h>
+#include <vespa/searchlib/features/fieldmatch/metrics.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+FieldMatchExecutor::FieldMatchExecutor(const IQueryEnvironment & queryEnv,
+ const FieldInfo & field,
+ const fieldmatch::Params & params) :
+ FeatureExecutor(),
+ _splitter(queryEnv, field.id()),
+ _field(field),
+ _params(params),
+ _cmp(vespalib::make_string("fieldMatch(%s)", _field.name().c_str()),
+ _splitter, field, params)
+{
+ // empty
+}
+
+void
+FieldMatchExecutor::execute(search::fef::MatchData & match)
+{
+ //LOG(info, "execute for field '%s' and docId(%u)", _field.name().c_str(), match.getDocId());
+
+ _splitter.update(match);
+ _cmp.reset(match);
+ //_cmp.setTracing(true);
+
+ const fieldmatch::SimpleMetrics & simple = _cmp.getSimpleMetrics();
+
+ // only run the computer if we have at least one match with position information
+ // and that the matches with position information have valid field lengths
+ bool runCmp = (simple.getMatches() > 0 &&
+ simple.getMatchesWithPosOcc() > 0 &&
+ !simple.getMatchWithInvalidFieldLength());
+
+ //LOG(info, "runCmp(%s), simpleMetrics(%s)", runCmp ? "true" : "false", simple.toString().c_str());
+
+ if (runCmp) {
+ _cmp.run();
+ }
+
+ const fieldmatch::Metrics & result = _cmp.getFinalMetrics();
+
+ *match.resolveFeature(outputs()[0]) = runCmp ? result.getMatch() : 0; // score
+ *match.resolveFeature(outputs()[1]) = runCmp ? result.getProximity() : 0; // proximity
+ *match.resolveFeature(outputs()[2]) = runCmp ? result.getCompleteness() : simple.getCompleteness(); // completeness
+ *match.resolveFeature(outputs()[3]) = runCmp ? result.getQueryCompleteness() : simple.getQueryCompleteness(); // queryCompleteness
+ *match.resolveFeature(outputs()[4]) = result.getFieldCompleteness(); // fieldCompleteness
+ *match.resolveFeature(outputs()[5]) = runCmp ? result.getOrderness() : 0; // orderness
+ *match.resolveFeature(outputs()[6]) = result.getRelatedness(); // relatedness
+ *match.resolveFeature(outputs()[7]) = result.getEarliness(); // earliness
+ *match.resolveFeature(outputs()[8]) = result.getLongestSequenceRatio(); // longestSequenceRatio
+ *match.resolveFeature(outputs()[9]) = result.getSegmentProximity(); // segmentProximity
+ *match.resolveFeature(outputs()[10]) = runCmp ? result.getUnweightedProximity() : 0; // unweightedProximity
+ *match.resolveFeature(outputs()[11]) = runCmp ? result.getAbsoluteProximity() : 0; // absoluteProximity
+ *match.resolveFeature(outputs()[12]) = result.getOccurrence(); // occurrence
+ *match.resolveFeature(outputs()[13]) = result.getAbsoluteOccurrence(); // absoluteOccurence
+ *match.resolveFeature(outputs()[14]) = result.getWeightedOccurrence(); // weightedOccurence
+ *match.resolveFeature(outputs()[15]) = result.getWeightedAbsoluteOccurrence(); // weightedAbsoluteOccurence
+ *match.resolveFeature(outputs()[16]) = result.getSignificantOccurrence(); // significantOccurence
+
+ *match.resolveFeature(outputs()[17]) = runCmp ? result.getWeight() : simple.getWeight(); // weight
+ *match.resolveFeature(outputs()[18]) = result.getSignificance(); // significance
+ *match.resolveFeature(outputs()[19]) = result.getImportance(); // importance
+
+ *match.resolveFeature(outputs()[20]) = result.getSegments(); // segments
+ *match.resolveFeature(outputs()[21]) = runCmp ? result.getMatches() : simple.getMatches(); // matches
+ *match.resolveFeature(outputs()[22]) = result.getOutOfOrder(); // outOfOrder
+ *match.resolveFeature(outputs()[23]) = result.getGaps(); // gaps
+ *match.resolveFeature(outputs()[24]) = result.getGapLength(); // gapLength
+ *match.resolveFeature(outputs()[25]) = runCmp ? result.getLongestSequence() : 0; // longestSequence
+ *match.resolveFeature(outputs()[26]) = runCmp ? result.getHead() : 0; // head
+ *match.resolveFeature(outputs()[27]) = runCmp ? result.getTail() : 0; // tail
+ *match.resolveFeature(outputs()[28]) = result.getSegmentDistance(); // segmentDistance
+ *match.resolveFeature(outputs()[29]) = simple.getDegradedMatches(); // degradedMatches
+}
+
+
+FieldMatchBlueprint::FieldMatchBlueprint() :
+ Blueprint("fieldMatch"),
+ _field(NULL),
+ _params()
+{
+ // empty
+}
+
+void
+FieldMatchBlueprint::visitDumpFeatures(const IIndexEnvironment & env,
+ IDumpFeatureVisitor & visitor) const
+{
+ for (uint32_t i = 0; i < env.getNumFields(); ++i) {
+ const search::fef::FieldInfo * field = env.getField(i);
+ if (field->type() == search::fef::FieldType::INDEX &&
+ field->collection() == search::fef::CollectionType::SINGLE)
+ {
+ FeatureNameBuilder fnb;
+ fnb.baseName(getBaseName()).parameter(field->name());
+ if (field->isFilter()) {
+ visitor.visitDumpFeature(fnb.buildName());
+ visitor.visitDumpFeature(fnb.output("completeness").buildName());
+ visitor.visitDumpFeature(fnb.output("queryCompleteness").buildName());
+ visitor.visitDumpFeature(fnb.output("weight").buildName());
+ visitor.visitDumpFeature(fnb.output("matches").buildName());
+ visitor.visitDumpFeature(fnb.output("degradedMatches").buildName());
+ } else {
+ visitor.visitDumpFeature(fnb.buildName());
+ visitor.visitDumpFeature(fnb.output("proximity").buildName());
+ visitor.visitDumpFeature(fnb.output("completeness").buildName());
+ visitor.visitDumpFeature(fnb.output("queryCompleteness").buildName());
+ visitor.visitDumpFeature(fnb.output("fieldCompleteness").buildName());
+ visitor.visitDumpFeature(fnb.output("orderness").buildName());
+ visitor.visitDumpFeature(fnb.output("relatedness").buildName());
+ visitor.visitDumpFeature(fnb.output("earliness").buildName());
+ visitor.visitDumpFeature(fnb.output("longestSequenceRatio").buildName());
+ visitor.visitDumpFeature(fnb.output("segmentProximity").buildName());
+ visitor.visitDumpFeature(fnb.output("unweightedProximity").buildName());
+ visitor.visitDumpFeature(fnb.output("absoluteProximity").buildName());
+ visitor.visitDumpFeature(fnb.output("occurrence").buildName());
+ visitor.visitDumpFeature(fnb.output("absoluteOccurrence").buildName());
+ visitor.visitDumpFeature(fnb.output("weightedOccurrence").buildName());
+ visitor.visitDumpFeature(fnb.output("weightedAbsoluteOccurrence").buildName());
+ visitor.visitDumpFeature(fnb.output("significantOccurrence").buildName());
+ visitor.visitDumpFeature(fnb.output("weight").buildName());
+ visitor.visitDumpFeature(fnb.output("significance").buildName());
+ visitor.visitDumpFeature(fnb.output("importance").buildName());
+ visitor.visitDumpFeature(fnb.output("segments").buildName());
+ visitor.visitDumpFeature(fnb.output("matches").buildName());
+ visitor.visitDumpFeature(fnb.output("outOfOrder").buildName());
+ visitor.visitDumpFeature(fnb.output("gaps").buildName());
+ visitor.visitDumpFeature(fnb.output("gapLength").buildName());
+ visitor.visitDumpFeature(fnb.output("longestSequence").buildName());
+ visitor.visitDumpFeature(fnb.output("head").buildName());
+ visitor.visitDumpFeature(fnb.output("tail").buildName());
+ visitor.visitDumpFeature(fnb.output("segmentDistance").buildName());
+ visitor.visitDumpFeature(fnb.output("degradedMatches").buildName());
+ }
+ }
+ }
+}
+
+Blueprint::UP
+FieldMatchBlueprint::createInstance() const
+{
+ return Blueprint::UP(new FieldMatchBlueprint());
+}
+
+bool
+FieldMatchBlueprint::setup(const IIndexEnvironment & env,
+ const ParameterList & params)
+{
+ _field = params[0].asField();
+
+ const Properties & lst = env.getProperties();
+ Property obj;
+ obj = lst.lookup(getName(), "proximityLimit");
+ if (obj.found()) {
+ _params.setProximityLimit(atoi(obj.get().c_str()));
+ }
+ obj = lst.lookup(getName(), "maxAlternativeSegmentations");
+ if (obj.found()) {
+ _params.setMaxAlternativeSegmentations(atoi(obj.get().c_str()));
+ }
+ obj = lst.lookup(getName(), "maxOccurrences");
+ if (obj.found()) {
+ _params.setMaxOccurrences(atoi(obj.get().c_str()));
+ }
+ obj = lst.lookup(getName(), "proximityCompletenessImportance");
+ if (obj.found()) {
+ _params.setProximityCompletenessImportance(atof(obj.get().c_str()));
+ }
+ obj = lst.lookup(getName(), "relatednessImportance");
+ if (obj.found()) {
+ _params.setRelatednessImportance(atof(obj.get().c_str()));
+ }
+ obj = lst.lookup(getName(), "earlinessImportance");
+ if (obj.found()) {
+ _params.setEarlinessImportance(atof(obj.get().c_str()));
+ }
+ obj = lst.lookup(getName(), "segmentProximityImportance");
+ if (obj.found()) {
+ _params.setSegmentProximityImportance(atof(obj.get().c_str()));
+ }
+ obj = lst.lookup(getName(), "occurrenceImportance");
+ if (obj.found()) {
+ _params.setOccurrenceImportance(atof(obj.get().c_str()));
+ }
+ obj = lst.lookup(getName(), "fieldCompletenessImportance");
+ if (obj.found()) {
+ _params.setFieldCompletenessImportance(atof(obj.get().c_str()));
+ }
+ obj = lst.lookup(getName(), "proximityTable");
+ if (obj.found()) {
+ std::vector<feature_t> table;
+ for (uint32_t i = 0; i < obj.size(); ++i) {
+ table.push_back(atof(obj.getAt(i).c_str()));
+ }
+ _params.setProximityTable(table);
+ }
+ if (!_params.valid()) {
+ return false;
+ }
+
+ // normalized
+ describeOutput("score",
+ "A normalized measure of the degree to which this query and field matched (default, the long name of this is match). Use "
+ "this if you don't want to create your own combination function of more fine grained fieldmatch features.");
+ describeOutput("proximity",
+ "Normalized proximity - a value which is close to 1 when matched terms are close inside each segment, and close to zero "
+ "when they are far apart inside segments. Relatively more connected terms influence this value more. This is "
+ "absoluteProximity/average connectedness for the query terms for this field.");
+ describeOutput("completeness",
+ "The normalized total completeness, where field completeness is more important.");
+ describeOutput("queryCompleteness",
+ "The normalized ratio of query tokens matched in the field.");
+ describeOutput("fieldCompleteness",
+ "The normalized ratio of query tokens which was matched in the field.");
+ describeOutput("orderness",
+ "A normalized metric of how well the order of the terms agrees in the chosen segments.");
+ describeOutput("relatedness",
+ "A normalized measure of the degree to which different terms are related (occurring in the same segment).");
+ describeOutput("earliness",
+ "A normalized measure of how early the first segment occurs in this field.");
+ describeOutput("longestSequenceRatio",
+ "A normalized metric of the relative size of the longest sequence.");
+ describeOutput("segmentProximity",
+ "A normalized metric of the closeness (inverse of spread) of segments in the field.");
+ describeOutput("unweightedProximity",
+ "The normalized proximity of the matched terms, not taking term connectedness into account. This number is close to 1 if "
+ "all the matched terms are following each other in sequence, and close to 0 if they are far from each other or out of "
+ "order.");
+ describeOutput("absoluteProximity",
+ "Returns the normalized proximity of the matched terms, weighted by the connectedness of the query terms. This number is "
+ "0.1 if all the matched terms are and have default or lower connectedness, close to 1 if they are following in sequence "
+ "and have a high connectedness, and close to 0 if they are far from each other in the segments or out of order.");
+ describeOutput("occurrence",
+ "Returns a normalized measure of the number of occurrence of the terms of the query. This number is 1 if there are many "
+ " occurrences of the query terms in absolute terms, or relative to the total content of the field, and 0 if there are "
+ "none.");
+ describeOutput("absoluteOccurrence",
+ "Returns a normalized measure of the number of occurrence of the terms of the query.");
+ describeOutput("weightedOccurrence",
+ "Returns a normalized measure of the number of occurrence of the terms of the query, weighted by term weight. This number "
+ "is close to 1 if there are many occurrences of highly weighted query terms, in absolute terms, or relative to the total "
+ "content of the field, and 0 if there are none.");
+ describeOutput("weightedAbsoluteOccurrence",
+ "Returns a normalized measure of the number of occurrence of the terms of the query, taking weights into account so that "
+ "occurrences of higher weighted query terms has more impact than lower weighted terms.");
+ describeOutput("significantOccurrence",
+ "Returns a normalized measure of the number of occurrence of the terms of the query in absolute terms, or relative to the "
+ "total content of the field, weighted by term significance.");
+
+ // normalized and relative to the whole query
+ describeOutput("weight",
+ "The normalized weight of this match relative to the whole query.");
+ describeOutput("significance",
+ "Returns the normalized term significance (1-frequency) of the terms of this match relative to the whole query.");
+ describeOutput("importance",
+ "Returns the average of significance and weight. This has the same properties as those metrics.");
+
+ // not normalized
+ describeOutput("segments",
+ "The number of field text segments which are needed to match the query as completely as possible.");
+ describeOutput("matches",
+ "The number of query terms which was matched in this field.");
+ describeOutput("outOfOrder",
+ "The total number of out of order token sequences within matched field segments.");
+ describeOutput("gaps",
+ "The total number of position jumps (backward or forward) within field segments.");
+ describeOutput("gapLength",
+ "The summed length of all gaps within segments.");
+ describeOutput("longestSequence",
+ "The size of the longest matched continuous, in-order sequence in the field.");
+ describeOutput("head",
+ "The number of tokens in the field preceeding the start of the first matched segment.");
+ describeOutput("tail",
+ "The number of tokens in the field following the end of the last matched segment.");
+ describeOutput("segmentDistance",
+ "The sum of the distance between all segments making up a match to the query, measured as the sum of the number of token "
+ "positions separating the start of each field adjacent segment.");
+ describeOutput("degradedMatches",
+ "The number of degraded query terms (no position information available) which was matched in this field.");
+ env.hintFieldAccess(_field->id());
+ return true;
+}
+
+FeatureExecutor::LP
+FieldMatchBlueprint::createExecutor(const IQueryEnvironment & env) const
+{
+ return FeatureExecutor::LP(new FieldMatchExecutor(env, *_field, _params));
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatchfeature.h b/searchlib/src/vespa/searchlib/features/fieldmatchfeature.h
new file mode 100644
index 00000000000..5aaa75cda2d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldmatchfeature.h
@@ -0,0 +1,70 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/features/fieldmatch/computer.h>
+#include <vespa/searchlib/features/fieldmatch/params.h>
+#include <vespa/searchlib/fef/blueprint.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for THE field match feature.
+ */
+class FieldMatchExecutor : public search::fef::FeatureExecutor {
+private:
+ search::fef::PhraseSplitter _splitter;
+ const search::fef::FieldInfo & _field;
+ const fieldmatch::Params & _params;
+ fieldmatch::Computer _cmp;
+
+public:
+ /**
+ * Constructs an executor.
+ */
+ FieldMatchExecutor(const search::fef::IQueryEnvironment & queryEnv,
+ const search::fef::FieldInfo & field,
+ const fieldmatch::Params & params);
+ virtual void execute(search::fef::MatchData & data);
+};
+
+
+/**
+ * Implements the blueprint for THE field match feature.
+ */
+class FieldMatchBlueprint : public search::fef::Blueprint {
+private:
+ const search::fef::FieldInfo * _field;
+ fieldmatch::Params _params;
+
+public:
+ /**
+ * Constructs a blueprint.
+ */
+ FieldMatchBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::SINGLE);
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.cpp b/searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.cpp
new file mode 100644
index 00000000000..7ce785e0c78
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.cpp
@@ -0,0 +1,129 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.fieldtermmatchfeature");
+
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/fieldtype.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include "fieldtermmatchfeature.h"
+#include "utils.h"
+
+namespace search {
+namespace features {
+
+FieldTermMatchExecutor::FieldTermMatchExecutor(const search::fef::IQueryEnvironment &env,
+ uint32_t fieldId, uint32_t termId) :
+ search::fef::FeatureExecutor(),
+ _fieldHandle(util::getTermFieldHandle(env, termId, fieldId))
+{
+}
+
+void
+FieldTermMatchExecutor::execute(search::fef::MatchData &match)
+{
+ if (_fieldHandle == search::fef::IllegalHandle) {
+ *match.resolveFeature(outputs()[0]) = 1000000; // firstPosition
+ *match.resolveFeature(outputs()[1]) = 1000000; // lastPosition
+ *match.resolveFeature(outputs()[2]) = 0.0f; // occurrences
+ *match.resolveFeature(outputs()[3]) = 0.0f; // sum weight
+ *match.resolveFeature(outputs()[4]) = 0.0f; // avg exactness
+ return;
+ }
+
+ search::fef::TermFieldMatchData &tfmd = *match.resolveTermField(_fieldHandle);
+ uint32_t firstPosition = 1000000;
+ uint32_t lastPosition = 1000000;
+ uint32_t occurrences = 0;
+ double sumExactness = 0;
+ int64_t weight = 0;
+ if (tfmd.getDocId() == match.getDocId()) {
+ search::fef::FieldPositionsIterator it = tfmd.getIterator();
+ if (it.valid()) {
+ lastPosition = 0;
+ while (it.valid()) {
+ firstPosition = std::min(firstPosition, it.getPosition());
+ lastPosition = std::max(lastPosition, it.getPosition());
+ ++occurrences;
+ weight += it.getElementWeight();
+ sumExactness += it.getMatchExactness();
+ it.next();
+ }
+ } else {
+ lastPosition = 1000000;
+ occurrences = 1;
+ }
+ }
+ *match.resolveFeature(outputs()[0]) = firstPosition;
+ *match.resolveFeature(outputs()[1]) = lastPosition;
+ *match.resolveFeature(outputs()[2]) = occurrences;
+ *match.resolveFeature(outputs()[3]) = weight;
+ *match.resolveFeature(outputs()[4]) = (occurrences > 0) ? (sumExactness / occurrences) : 0;
+}
+
+FieldTermMatchBlueprint::FieldTermMatchBlueprint() :
+ search::fef::Blueprint("fieldTermMatch"),
+ _fieldId(0),
+ _termId(0)
+{
+ // empty
+}
+
+void
+FieldTermMatchBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const
+{
+ const search::fef::Properties &props = env.getProperties();
+ const vespalib::string &baseName = getBaseName();
+ int baseNumTerms = atoi(props.lookup(baseName, "numTerms").get("5").c_str());
+
+ for (uint32_t i = 0; i < env.getNumFields(); ++i) {
+ const search::fef::FieldInfo& field = *env.getField(i);
+ if (field.type() == search::fef::FieldType::INDEX) {
+ const vespalib::string &fieldName = field.name();
+ const search::fef::Property &prop = props.lookup(baseName, "numTerms", fieldName);
+ int numTerms = prop.found() ? atoi(prop.get().c_str()) : baseNumTerms;
+ for (int term = 0; term < numTerms; ++term) {
+ search::fef::FeatureNameBuilder fnb;
+ fnb.baseName(baseName)
+ .parameter(fieldName)
+ .parameter(vespalib::make_string("%d", term));
+ visitor.visitDumpFeature(fnb.output("firstPosition").buildName());
+ visitor.visitDumpFeature(fnb.output("occurrences").buildName());
+ visitor.visitDumpFeature(fnb.output("weight").buildName());
+ }
+ }
+ }
+}
+
+bool
+FieldTermMatchBlueprint::setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params)
+{
+ _fieldId = params[0].asField()->id();
+ _termId = params[1].asInteger();
+ describeOutput("firstPosition", "The first occurrence of this term.");
+ describeOutput("lastPosition", "The last occurrence of this term.");
+ describeOutput("occurrences", "The number of occurrence of this term.");
+ describeOutput("weight", "The sum occurence weights of this term.");
+ describeOutput("exactness", "The average exactness this term.");
+ env.hintFieldAccess(_fieldId);
+ return true;
+}
+
+search::fef::Blueprint::UP
+FieldTermMatchBlueprint::createInstance() const
+{
+ return search::fef::Blueprint::UP(new FieldTermMatchBlueprint());
+}
+
+search::fef::FeatureExecutor::LP
+FieldTermMatchBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ return search::fef::FeatureExecutor::LP(new FieldTermMatchExecutor(env, _fieldId, _termId));
+}
+
+}}
diff --git a/searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.h b/searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.h
new file mode 100644
index 00000000000..be5ef27ef5e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.h
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for term feature.
+ */
+class FieldTermMatchExecutor : public search::fef::FeatureExecutor {
+public:
+ /**
+ * Constructs an executor for term feature.
+ *
+ * @param env The query environment.
+ * @param fieldId The field to match to.
+ * @param termId The term to match.
+ */
+ FieldTermMatchExecutor(const search::fef::IQueryEnvironment &env,
+ uint32_t fieldId, uint32_t termId);
+ virtual void execute(search::fef::MatchData &data);
+
+private:
+ search::fef::TermFieldHandle _fieldHandle;
+};
+
+/**
+ * Implements the blueprint for term feature.
+ */
+class FieldTermMatchBlueprint : public search::fef::Blueprint {
+public:
+ /**
+ * Constructs a blueprint for term feature.
+ */
+ FieldTermMatchBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::ANY).number();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+private:
+ uint32_t _fieldId;
+ uint32_t _termId;
+};
+
+}}
+
diff --git a/searchlib/src/vespa/searchlib/features/firstphasefeature.cpp b/searchlib/src/vespa/searchlib/features/firstphasefeature.cpp
new file mode 100644
index 00000000000..e3c7f7f5332
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/firstphasefeature.cpp
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.firstphasefeature");
+#include "firstphasefeature.h"
+
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/properties.h>
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+void
+FirstPhaseExecutor::execute(search::fef::MatchData & match)
+{
+ *match.resolveFeature(outputs()[0]) = *match.resolveFeature(inputs()[0]);
+}
+
+
+FirstPhaseBlueprint::FirstPhaseBlueprint() :
+ Blueprint("firstPhase")
+{
+ // empty
+}
+
+void
+FirstPhaseBlueprint::visitDumpFeatures(const IIndexEnvironment &,
+ IDumpFeatureVisitor & visitor) const
+{
+ // havardpe: dumping this is a really bad idea
+ visitor.visitDumpFeature(getBaseName());
+}
+
+Blueprint::UP
+FirstPhaseBlueprint::createInstance() const
+{
+ return Blueprint::UP(new FirstPhaseBlueprint());
+}
+
+bool
+FirstPhaseBlueprint::setup(const IIndexEnvironment & env,
+ const ParameterList &)
+{
+ describeOutput("score", "The ranking score for first phase.",
+ defineInput(indexproperties::rank::FirstPhase::lookup(env.getProperties()),
+ AcceptInput::ANY));
+ return true;
+}
+
+FeatureExecutor::LP
+FirstPhaseBlueprint::createExecutor(const IQueryEnvironment &) const
+{
+ return FeatureExecutor::LP(new FirstPhaseExecutor());
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/firstphasefeature.h b/searchlib/src/vespa/searchlib/features/firstphasefeature.h
new file mode 100644
index 00000000000..67deea23984
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/firstphasefeature.h
@@ -0,0 +1,53 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor outputting the first phase ranking.
+ */
+class FirstPhaseExecutor : public search::fef::FeatureExecutor {
+public:
+ virtual bool isPure() { return true; }
+ virtual void execute(search::fef::MatchData & data);
+};
+
+
+/**
+ * Implements the blueprint for the first phase feature.
+ */
+class FirstPhaseBlueprint : public search::fef::Blueprint {
+public:
+ /**
+ * Constructs a blueprint.
+ */
+ FirstPhaseBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/flow_completeness_feature.cpp b/searchlib/src/vespa/searchlib/features/flow_completeness_feature.cpp
new file mode 100644
index 00000000000..b33e367f4f0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/flow_completeness_feature.cpp
@@ -0,0 +1,309 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.flowcompleteness");
+#include "flow_completeness_feature.h"
+#include <vespa/vespalib/stllike/hash_map.h>
+
+namespace search {
+namespace features {
+
+//-----------------------------------------------------------------------------
+
+FlowCompletenessExecutor::FlowCompletenessExecutor(const search::fef::IQueryEnvironment &env,
+ const FlowCompletenessParams &params)
+ : _params(params),
+ _terms(),
+ _queue(),
+ _sumTermWeight(0)
+{
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ LOG(spam, "consider term %u", i);
+ const search::fef::ITermData *termData = env.getTerm(i);
+ LOG(spam, "term %u weight %u", i, termData->getWeight().percent());
+ if (termData->getWeight().percent() != 0) { // only consider query terms with contribution
+ typedef search::fef::ITermFieldRangeAdapter FRA;
+ uint32_t j = 0;
+ for (FRA iter(*termData); iter.valid(); iter.next()) {
+ const search::fef::ITermFieldData &tfd = iter.get();
+ LOG(spam, "term %u field data %u for field id %u (my field id %u)",
+ i, j++, tfd.getFieldId(), _params.fieldId);
+ if (tfd.getFieldId() == _params.fieldId) {
+ int termWeight = termData->getWeight().percent();
+ _sumTermWeight += termWeight;
+ _terms.push_back(Term(tfd.getHandle(), termWeight));
+ }
+ }
+ }
+ }
+ LOG(spam, "added %zu terms", _terms.size());
+}
+
+typedef std::vector<uint32_t> TermIdxList;
+typedef std::vector<uint32_t> PosList;
+
+typedef vespalib::hash_map<uint32_t, uint32_t> TermIdxMap;
+
+struct State {
+ int elementWeight;
+ uint32_t elementLength;
+ uint32_t matchedTerms;
+ int sumTermWeight;
+
+ std::vector<PosList> positionsForTerm;
+ uint32_t posLimit;
+ PosList matchedPosForTerm;
+ TermIdxMap matchedTermForPos; // maps pos -> term
+
+ double score;
+ double flow;
+ feature_t completeness;
+ feature_t fieldCompleteness;
+ feature_t queryCompleteness;
+
+ State(int weight, uint32_t length)
+ : elementWeight(weight), elementLength(length),
+ matchedTerms(0), sumTermWeight(0),
+ posLimit(0),
+ score(0.0), flow(0.0),
+ completeness(0.0), fieldCompleteness(0.0), queryCompleteness(0.0) {}
+
+ void addMatch(int termWeight) {
+ ++matchedTerms;
+ sumTermWeight += termWeight;
+ }
+
+ struct Path {
+ std::vector<uint32_t> path;
+ bool operator< (const Path& other) const {
+ return path.size() < other.path.size();
+ }
+ };
+
+ Path bfs(vespalib::PriorityQueue<Path> &queue)
+ {
+ TermIdxList seen(matchedTerms, 0);
+ while (!queue.empty()) {
+ Path firstP = queue.front();
+ queue.pop_front();
+ uint32_t startTerm = firstP.path.back();
+ seen[startTerm] = 1;
+ PosList &edges = positionsForTerm[startTerm];
+ for (size_t j = 0; j < edges.size(); ++j) {
+ Path nextP = firstP;
+ uint32_t pos = edges[j];
+ nextP.path.push_back(pos);
+ TermIdxMap::const_iterator it = matchedTermForPos.find(pos);
+ if (it == matchedTermForPos.end()) {
+ return nextP;
+ } else {
+ uint32_t nextTerm = it->second;
+ if (seen[nextTerm] == 0) {
+ seen[nextTerm] = 1;
+ nextP.path.push_back(nextTerm);
+ queue.push(nextP);
+ }
+ }
+ }
+ }
+ return Path();
+ }
+
+ int findMatches() {
+ vespalib::PriorityQueue<Path> q;
+
+ for (size_t i = 0; i < matchedTerms; ++i) {
+ if (matchedPosForTerm[i] == IllegalPosId) {
+ Path p;
+ p.path.push_back(i);
+ q.push(p);
+ }
+ }
+ if (q.empty()) {
+ return 0;
+ }
+ Path p = bfs(q);
+ if (p.path.size() == 0) {
+ return 0;
+ }
+ while (p.path.size() > 1) {
+ uint32_t pos = p.path.back();
+ assert(pos < posLimit);
+ p.path.pop_back();
+ uint32_t tix = p.path.back();
+ assert(tix < matchedTerms);
+ p.path.pop_back();
+ matchedTermForPos[pos] = tix;
+ matchedPosForTerm[tix] = pos;
+ }
+ assert(p.path.size() == 0);
+ return 1;
+ }
+
+ int findSimpleMatches() {
+ int found = 0;
+ for (size_t tix = 0; tix < matchedTerms; ++tix) {
+ assert(matchedPosForTerm[tix] == IllegalPosId);
+ assert(positionsForTerm[tix].size() > 0);
+ uint32_t pos = positionsForTerm[tix][0];
+ assert(pos < posLimit);
+
+ TermIdxMap::const_iterator it = matchedTermForPos.find(pos);
+ if (it == matchedTermForPos.end()) {
+ ++found;
+ matchedTermForPos[pos] = tix;
+ matchedPosForTerm[tix] = pos;
+ }
+ }
+ return found;
+ }
+
+ void calculateScore(uint32_t queryTerms, double factor) {
+ matchedPosForTerm.resize(matchedTerms, IllegalPosId);
+ int more = findSimpleMatches();
+ flow += more;
+ while ((more = findMatches()) > 0) {
+ flow += more;
+ }
+ queryCompleteness = (flow / (double)queryTerms);
+ fieldCompleteness = (flow / (double)elementLength);
+ completeness = (fieldCompleteness * factor) +
+ (queryCompleteness * (1 - factor));
+ score = completeness * (double)sumTermWeight;
+ }
+};
+
+
+void
+FlowCompletenessExecutor::execute(search::fef::MatchData &data)
+{
+ assert(_queue.empty());
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ search::fef::TermFieldMatchData *tfmd = data.resolveTermField(_terms[i].termHandle);
+ Item item(i, tfmd->begin(), tfmd->end());
+ LOG(spam, "found tfmd item with %zu positions", (item.end - item.pos));
+ if (item.pos != item.end) {
+ _queue.push(item);
+ }
+ }
+ State best(0, 0);
+ while (!_queue.empty()) {
+ Item &start = _queue.front();
+ uint32_t elementId = start.elemId;
+ LOG_ASSERT(start.pos != start.end);
+ State state(start.pos->getElementWeight(), start.pos->getElementLen());
+
+ while (!_queue.empty() && _queue.front().elemId == elementId) {
+ Item &item = _queue.front();
+
+ // update state
+ state.positionsForTerm.push_back(PosList());
+ while (item.pos != item.end && item.pos->getElementId() == elementId) {
+ uint32_t pos = item.pos->getPosition();
+ state.positionsForTerm.back().push_back(pos);
+ state.posLimit = std::max(state.posLimit, pos + 1);
+ ++item.pos;
+ }
+ state.addMatch(_terms[item.termIdx].termWeight);
+
+ // adjust item and its place in queue
+ if (item.pos == item.end) {
+ _queue.pop_front();
+ } else {
+ item.elemId = item.pos->getElementId();
+ _queue.adjust();
+ }
+ }
+ state.calculateScore(_terms.size(), _params.fieldCompletenessImportance);
+ if (state.score > best.score) {
+ best = state;
+ }
+ }
+ *data.resolveFeature(outputs()[0]) = best.completeness;
+ *data.resolveFeature(outputs()[1]) = best.fieldCompleteness;
+ *data.resolveFeature(outputs()[2]) = best.queryCompleteness;
+ *data.resolveFeature(outputs()[3]) = best.elementWeight;
+ *data.resolveFeature(outputs()[4]) = _params.fieldWeight;
+ *data.resolveFeature(outputs()[5]) = best.flow;
+
+}
+
+//-----------------------------------------------------------------------------
+
+FlowCompletenessBlueprint::FlowCompletenessBlueprint()
+ : Blueprint("flowCompleteness"),
+ _output(),
+ _params()
+{
+ _output.push_back("completeness");
+ _output.push_back("fieldCompleteness");
+ _output.push_back("queryCompleteness");
+ _output.push_back("elementWeight");
+ _output.push_back("weight");
+ _output.push_back("flow");
+}
+
+void
+FlowCompletenessBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const
+{
+#ifdef notyet
+ for (uint32_t i = 0; i < env.getNumFields(); ++i) {
+ const search::fef::FieldInfo &field = *env.getField(i);
+ if (field.type() == search::fef::FieldType::INDEX) {
+ if (!field.isFilter()) {
+ search::fef::FeatureNameBuilder fnb;
+ fnb.baseName(getBaseName()).parameter(field.name());
+ for (size_t out = 0; out < _output.size(); ++out) {
+ visitor.visitDumpFeature(fnb.output(_output[out]).buildName());
+ }
+ }
+ }
+ }
+#else
+ (void)env;
+ (void)visitor;
+#endif
+}
+
+search::fef::Blueprint::UP
+FlowCompletenessBlueprint::createInstance() const
+{
+ return Blueprint::UP(new FlowCompletenessBlueprint());
+}
+
+bool
+FlowCompletenessBlueprint::setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params)
+{
+ const search::fef::FieldInfo *field = params[0].asField();
+
+ _params.fieldId = field->id();
+ const search::fef::Properties &lst = env.getProperties();
+ search::fef::Property obj = lst.lookup(getName(), "fieldCompletenessImportance");
+ if (obj.found()) {
+ _params.fieldCompletenessImportance = atof(obj.get().c_str());
+ }
+ _params.fieldWeight = search::fef::indexproperties::FieldWeight::lookup(lst, field->name());
+
+ describeOutput(_output[0], "combined completeness for best scored element");
+ describeOutput(_output[1], "best scored element completeness");
+ describeOutput(_output[2], "query completeness for best scored element");
+ describeOutput(_output[3], "element weight of best scored element");
+ describeOutput(_output[4], "field weight");
+ describeOutput(_output[5], "query terms matching in best element (measured by flow)");
+ env.hintFieldAccess(field->id());
+ return true;
+}
+
+search::fef::FeatureExecutor::LP
+FlowCompletenessBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ return search::fef::FeatureExecutor::LP(new FlowCompletenessExecutor(env, _params));
+}
+
+//-----------------------------------------------------------------------------
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/flow_completeness_feature.h b/searchlib/src/vespa/searchlib/features/flow_completeness_feature.h
new file mode 100644
index 00000000000..07ff6f55884
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/flow_completeness_feature.h
@@ -0,0 +1,111 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/vespalib/util/priority_queue.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+
+namespace search {
+namespace features {
+
+//-----------------------------------------------------------------------------
+
+struct FlowCompletenessParams {
+ uint32_t fieldId;
+ feature_t fieldWeight;
+ feature_t fieldCompletenessImportance;
+ FlowCompletenessParams()
+ : fieldId(search::fef::IllegalFieldId),
+ fieldWeight(0),
+ fieldCompletenessImportance(0.5) {}
+};
+
+//-----------------------------------------------------------------------------
+
+const uint32_t IllegalElementId = 0xffffffff;
+const uint32_t IllegalTermId = 0xffffffff;
+const uint32_t IllegalPosId = 0xffffffff;
+
+class FlowCompletenessExecutor : public search::fef::FeatureExecutor
+{
+private:
+ struct Term {
+ search::fef::TermFieldHandle termHandle;
+ int termWeight;
+ Term(search::fef::TermFieldHandle handle, int weight)
+ : termHandle(handle), termWeight(weight) {}
+ };
+
+ struct Item {
+ uint32_t elemId;
+ uint32_t termIdx;
+ search::fef::TermFieldMatchData::PositionsIterator pos;
+ search::fef::TermFieldMatchData::PositionsIterator end;
+
+ Item(uint32_t idx,
+ search::fef::TermFieldMatchData::PositionsIterator p,
+ search::fef::TermFieldMatchData::PositionsIterator e)
+ : elemId(IllegalElementId), termIdx(idx), pos(p), end(e)
+ {
+ if (p != e) elemId = p->getElementId();
+ }
+
+ bool operator< (const Item &other) const {
+ return (elemId < other.elemId);
+ }
+ };
+
+ FlowCompletenessParams _params;
+ std::vector<Term> _terms;
+ vespalib::PriorityQueue<Item> _queue;
+ int _sumTermWeight;
+
+ static bool nextElement(Item &item);
+
+public:
+ FlowCompletenessExecutor(const search::fef::IQueryEnvironment &env,
+ const FlowCompletenessParams &params);
+ virtual bool isPure() { return _terms.empty(); }
+ virtual void execute(search::fef::MatchData & data);
+};
+
+//-----------------------------------------------------------------------------
+
+class FlowCompletenessBlueprint : public search::fef::Blueprint
+{
+private:
+ std::vector<vespalib::string> _output;
+ FlowCompletenessParams _params;
+
+public:
+ FlowCompletenessBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::ANY);
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP
+ createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/foreachfeature.cpp b/searchlib/src/vespa/searchlib/features/foreachfeature.cpp
new file mode 100644
index 00000000000..4ee92e9177d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/foreachfeature.cpp
@@ -0,0 +1,186 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.foreachfeature");
+#include "foreachfeature.h"
+#include "utils.h"
+
+#include <boost/algorithm/string/replace.hpp>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/vstringfmt.h>
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+template <typename CO, typename OP>
+ForeachExecutor<CO, OP>::ForeachExecutor(const CO & condition, uint32_t numInputs) :
+ FeatureExecutor(),
+ _condition(condition),
+ _operation(),
+ _numInputs(numInputs)
+{
+}
+
+template <typename CO, typename OP>
+void
+ForeachExecutor<CO, OP>::execute(MatchData & match)
+{
+ _operation.reset();
+ for (uint32_t i = 0; i < inputs().size(); ++i) {
+ feature_t val = *match.resolveFeature(inputs()[i]);
+ if (_condition.useValue(val)) {
+ _operation.onValue(val);
+ }
+ }
+ *match.resolveFeature(outputs()[0]) = _operation.getResult();
+}
+
+
+bool
+ForeachBlueprint::decideDimension(const vespalib::string & param)
+{
+ if (param == "terms") {
+ _dimension = TERMS;
+ } else if (param == "fields") {
+ _dimension = FIELDS;
+ } else if (param == "attributes") {
+ _dimension = ATTRIBUTES;
+ } else {
+ LOG(error, "Expected dimension parameter to be 'terms', 'fields', or 'attributes', but was '%s'",
+ param.c_str());
+ return false;
+ }
+ return true;
+}
+
+bool
+ForeachBlueprint::decideCondition(const vespalib::string & condition, const vespalib::string & operation)
+{
+ if (condition == "true") {
+ return decideOperation(TrueCondition(), operation);
+ } else if (condition.size() >= 2 && condition[0] == '<') {
+ return decideOperation(LessThanCondition(util::strToNum<feature_t>(condition.substr(1))), operation);
+ } else if (condition.size() >= 2 && condition[0] == '>') {
+ return decideOperation(GreaterThanCondition(util::strToNum<feature_t>(condition.substr(1))), operation);
+ } else {
+ LOG(error, "Expected condition parameter to be 'true', '<a', or '>a', but was '%s'",
+ condition.c_str());
+ return false;
+ }
+}
+
+template <typename CO>
+bool
+ForeachBlueprint::decideOperation(CO condition, const vespalib::string & operation)
+{
+ if (operation == "sum") {
+ setExecutorCreator<CO, SumOperation>(condition);
+ } else if (operation == "product") {
+ setExecutorCreator<CO, ProductOperation>(condition);
+ } else if (operation == "average") {
+ setExecutorCreator<CO, AverageOperation>(condition);
+ } else if (operation == "max") {
+ setExecutorCreator<CO, MaxOperation>(condition);
+ } else if (operation == "min") {
+ setExecutorCreator<CO, MinOperation>(condition);
+ } else if (operation == "count") {
+ setExecutorCreator<CO, CountOperation>(condition);
+ } else {
+ LOG(error, "Expected operation parameter to be 'sum', 'product', 'average', 'max', 'min', or 'count', but was '%s'",
+ operation.c_str());
+ return false;
+ }
+ return true;
+}
+
+template <typename CO, typename OP>
+void
+ForeachBlueprint::setExecutorCreator(CO condition)
+{
+ class ExecutorCreator : public ExecutorCreatorBase {
+ private:
+ CO _condition;
+ public:
+ ExecutorCreator(CO cond) : _condition(cond) {}
+ virtual search::fef::FeatureExecutor::LP create(uint32_t numInputs) const {
+ return search::fef::FeatureExecutor::LP(new ForeachExecutor<CO, OP>(_condition, numInputs));
+ }
+ };
+ _executorCreator.reset(new ExecutorCreator(condition));
+}
+
+ForeachBlueprint::ForeachBlueprint() :
+ Blueprint("foreach"),
+ _dimension(ILLEGAL),
+ _executorCreator(),
+ _num_inputs(0)
+{
+}
+
+void
+ForeachBlueprint::visitDumpFeatures(const IIndexEnvironment &,
+ IDumpFeatureVisitor &) const
+{
+}
+
+bool
+ForeachBlueprint::setup(const IIndexEnvironment & env,
+ const ParameterList & params)
+{
+ if (!decideDimension(params[0].getValue())) {
+ return false;
+ }
+ if (!decideCondition(params[3].getValue(), params[4].getValue())) {
+ return false;
+ }
+
+ const vespalib::string & variable = params[1].getValue();
+ const vespalib::string & feature = params[2].getValue();
+
+ if (_dimension == TERMS) {
+ uint32_t maxTerms = util::strToNum<uint32_t>(env.getProperties().lookup(getBaseName(), "maxTerms").get("16"));
+ for (uint32_t i = 0; i < maxTerms; ++i) {
+ defineInput(boost::algorithm::replace_all_copy(feature, variable, vespalib::make_vespa_string("%u", i)));
+ ++_num_inputs;
+ }
+ } else {
+ for (uint32_t i = 0; i < env.getNumFields(); ++i) {
+ const FieldInfo * info = env.getField(i);
+ if (info->type() == FieldType::INDEX && _dimension == FIELDS) {
+ defineInput(boost::algorithm::replace_all_copy(feature, variable, info->name()));
+ ++_num_inputs;
+ } else if (info->type() == FieldType::ATTRIBUTE && _dimension == ATTRIBUTES) {
+ defineInput(boost::algorithm::replace_all_copy(feature, variable, info->name()));
+ ++_num_inputs;
+ }
+ }
+ }
+
+ describeOutput("value", "The result after iterating over the input feature values using the specified operation");
+
+ return true;
+}
+
+Blueprint::UP
+ForeachBlueprint::createInstance() const
+{
+ return Blueprint::UP(new ForeachBlueprint());
+}
+
+FeatureExecutor::LP
+ForeachBlueprint::createExecutor(const IQueryEnvironment &) const
+{
+ if (_executorCreator.get() != NULL) {
+ return _executorCreator->create(_num_inputs);
+ }
+ return FeatureExecutor::LP(NULL);
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/foreachfeature.h b/searchlib/src/vespa/searchlib/features/foreachfeature.h
new file mode 100644
index 00000000000..6485b579971
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/foreachfeature.h
@@ -0,0 +1,185 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <limits>
+
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for the foreach feature.
+ * Uses a condition and operation template class to perform the computation.
+ */
+template <typename CO, typename OP>
+class ForeachExecutor : public search::fef::FeatureExecutor {
+private:
+ CO _condition;
+ OP _operation;
+ uint32_t _numInputs;
+
+public:
+ ForeachExecutor(const CO & condition, uint32_t numInputs);
+ virtual void execute(search::fef::MatchData & data);
+};
+
+
+/**
+ * Base class for condition template class.
+ **/
+class ConditionBase {
+protected:
+ feature_t _param;
+public:
+ ConditionBase(feature_t param = 0) : _param(param) {}
+};
+
+/**
+ * Implements the true condition.
+ **/
+struct TrueCondition : public ConditionBase {
+ bool useValue(feature_t val) { (void) val; return true; }
+};
+
+/**
+ * Implements the less than condition.
+ **/
+struct LessThanCondition : public ConditionBase {
+ LessThanCondition(feature_t param) : ConditionBase(param) {}
+ bool useValue(feature_t val) { return val < _param; }
+};
+
+/**
+ * Implements the greater than condition.
+ **/
+struct GreaterThanCondition : public ConditionBase {
+ GreaterThanCondition(feature_t param) : ConditionBase(param) {}
+ bool useValue(feature_t val) { return val > _param; }
+};
+
+
+/**
+ * Base class for operation template class.
+ */
+class OperationBase {
+protected:
+ feature_t _result;
+public:
+ OperationBase() : _result(0) {}
+ feature_t getResult() const { return _result; }
+};
+
+/**
+ * Implements sum operation.
+ **/
+struct SumOperation : public OperationBase {
+ void reset() { _result = 0; }
+ void onValue(feature_t val) { _result += val; }
+};
+
+/**
+ * Implements product operation.
+ **/
+struct ProductOperation : public OperationBase {
+ void reset() { _result = 1; }
+ void onValue(feature_t val) { _result *= val; }
+};
+
+/**
+ * Implements average operation.
+ **/
+class AverageOperation : public OperationBase {
+private:
+ uint32_t _numValues;
+public:
+ AverageOperation() : OperationBase(), _numValues(0) {}
+ void reset() { _result = 0; _numValues = 0; }
+ void onValue(feature_t val) { _result += val; ++_numValues; }
+ feature_t getResult() const { return _numValues != 0 ? _result / _numValues : 0; }
+};
+
+/**
+ * Implements max operation.
+ **/
+struct MaxOperation : public OperationBase {
+ void reset() { _result = -std::numeric_limits<feature_t>::max(); }
+ void onValue(feature_t val) { _result = std::max(val, _result); }
+};
+
+/**
+ * Implements min operation.
+ **/
+struct MinOperation : public OperationBase {
+ void reset() { _result = std::numeric_limits<feature_t>::max(); }
+ void onValue(feature_t val) { _result = std::min(val, _result); }
+};
+
+/**
+ * Implements count operation.
+ **/
+struct CountOperation : public OperationBase {
+ void reset() { _result = 0; }
+ void onValue(feature_t val) { (void) val; _result += 1; }
+};
+
+
+/**
+ * Implements the blueprint for the foreach executor.
+ */
+class ForeachBlueprint : public search::fef::Blueprint {
+private:
+ enum Dimension {
+ TERMS,
+ FIELDS,
+ ATTRIBUTES,
+ ILLEGAL
+ };
+ struct ExecutorCreatorBase {
+ virtual search::fef::FeatureExecutor::LP create(uint32_t numInputs) const = 0;
+ virtual ~ExecutorCreatorBase() {}
+ };
+
+ Dimension _dimension;
+ std::unique_ptr<ExecutorCreatorBase> _executorCreator;
+ size_t _num_inputs;
+
+ bool decideDimension(const vespalib::string & param);
+ bool decideCondition(const vespalib::string & condition, const vespalib::string & operation);
+ template <typename CO>
+ bool decideOperation(CO condition, const vespalib::string & operation);
+ template <typename CO, typename OP>
+ void setExecutorCreator(CO condition);
+
+public:
+ /**
+ * Constructs a blueprint.
+ */
+ ForeachBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().string().string().feature().string().string();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/freshnessfeature.cpp b/searchlib/src/vespa/searchlib/features/freshnessfeature.cpp
new file mode 100644
index 00000000000..cc6d1c24c50
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/freshnessfeature.cpp
@@ -0,0 +1,101 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.freshnessfeature");
+#include <vespa/searchlib/fef/properties.h>
+#include "freshnessfeature.h"
+#include "utils.h"
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+FreshnessExecutor::FreshnessExecutor(feature_t maxAge, feature_t scaleAge) :
+ FeatureExecutor(),
+ _maxAge(maxAge),
+ _logCalc(maxAge, scaleAge)
+{
+}
+
+void
+FreshnessExecutor::execute(MatchData & match)
+{
+ feature_t age = *match.resolveFeature(inputs()[0]);
+ LOG(debug, "Age: %f Maxage: %f res: %f\n", age, _maxAge, (age / _maxAge));
+ feature_t freshness = std::max(1 - (age / _maxAge), (feature_t)0);
+ *match.resolveFeature(outputs()[0]) = freshness;
+ *match.resolveFeature(outputs()[1]) = _logCalc.get(age);
+}
+
+
+FreshnessBlueprint::FreshnessBlueprint() :
+ Blueprint("freshness"),
+ _maxAge(3*30*24*60*60), // default value (90 days)
+ _halfResponse(7*24*60*60), // makes sure freshness.logscale = 0.5 when age is 7 days
+ _scaleAge(LogarithmCalculator::getScale(_halfResponse, _maxAge))
+{
+}
+
+void
+FreshnessBlueprint::visitDumpFeatures(const IIndexEnvironment &,
+ IDumpFeatureVisitor &) const
+{
+}
+
+bool
+FreshnessBlueprint::setup(const IIndexEnvironment & env,
+ const ParameterList & params)
+{
+ // params[0] = attribute name
+ Property p = env.getProperties().lookup(getName(), "maxAge");
+ if (p.found()) {
+ _maxAge = util::strToNum<feature_t>(p.get());
+ }
+ p = env.getProperties().lookup(getName(), "halfResponse");
+ if (p.found()) {
+ _halfResponse = util::strToNum<feature_t>(p.get());
+ }
+ // sanity checks:
+ if (_maxAge < 1) {
+ LOG(warning, "Invalid %s.maxAge = %g, using 1.0",
+ getName().c_str(), (double)_maxAge);
+ _maxAge = 1.0;
+ }
+ if (_halfResponse < 1) {
+ LOG(warning, "Invalid %s.halfResponse = %g, using 1.0",
+ getName().c_str(), (double)_halfResponse);
+ _halfResponse = 1.0;
+ }
+ if (_halfResponse >= _maxAge / 2) {
+ feature_t newResponse = (_maxAge / 2) - 1;
+ LOG(warning, "Invalid %s.halfResponse = %g, using %g ((%s.maxAge / 2) - 1)",
+ getName().c_str(), (double)_halfResponse, (double)newResponse, getName().c_str());
+ _halfResponse = newResponse;
+ }
+ _scaleAge = LogarithmCalculator::getScale(_halfResponse, _maxAge);
+
+ defineInput("age(" + params[0].getValue() + ")");
+ describeOutput("out", "The freshness of the document (linear)");
+ describeOutput("logscale", "The freshness of the document (logarithmic shape)");
+
+ return true;
+}
+
+Blueprint::UP
+FreshnessBlueprint::createInstance() const
+{
+ return Blueprint::UP(new FreshnessBlueprint());
+}
+
+FeatureExecutor::LP
+FreshnessBlueprint::createExecutor(const IQueryEnvironment &) const
+{
+ return FeatureExecutor::LP(new FreshnessExecutor(_maxAge, _scaleAge));
+}
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/freshnessfeature.h b/searchlib/src/vespa/searchlib/features/freshnessfeature.h
new file mode 100644
index 00000000000..979966b48ca
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/freshnessfeature.h
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include "logarithmcalculator.h"
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for the freshness feature.
+ */
+class FreshnessExecutor : public search::fef::FeatureExecutor {
+private:
+ feature_t _maxAge;
+ LogarithmCalculator _logCalc;
+
+public:
+ /**
+ * Constructs an executor.
+ */
+ FreshnessExecutor(feature_t maxAge, feature_t scaleAge);
+ virtual void execute(search::fef::MatchData & data);
+};
+
+
+/**
+ * Implements the blueprint for the freshness executor.
+ */
+class FreshnessBlueprint : public search::fef::Blueprint {
+private:
+ feature_t _maxAge;
+ feature_t _halfResponse;
+ feature_t _scaleAge;
+
+public:
+ /**
+ * Constructs a blueprint.
+ */
+ FreshnessBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().attribute(search::fef::ParameterCollection::ANY);
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/item_raw_score_feature.cpp b/searchlib/src/vespa/searchlib/features/item_raw_score_feature.cpp
new file mode 100644
index 00000000000..1fc8203a58e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/item_raw_score_feature.cpp
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.item_raw_score_feature");
+#include "item_raw_score_feature.h"
+#include "valuefeature.h"
+#include "utils.h"
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+void
+ItemRawScoreExecutor::execute(MatchData &data)
+{
+ feature_t output = 0.0;
+ for (uint32_t i = 0; i < _handles.size(); ++i) {
+ const TermFieldMatchData *tfmd = data.resolveTermField(_handles[i]);
+ if (tfmd->getDocId() == data.getDocId()) {
+ output += tfmd->getRawScore();
+ }
+ }
+ *data.resolveFeature(outputs()[0]) = output;
+}
+
+//-----------------------------------------------------------------------------
+
+void
+SimpleItemRawScoreExecutor::execute(MatchData &data)
+{
+ feature_t output = 0.0;
+ const TermFieldMatchData *tfmd = data.resolveTermField(_handle);
+ if (tfmd->getDocId() == data.getDocId()) {
+ output = tfmd->getRawScore();
+ }
+ *data.resolveFeature(outputs()[0]) = output;
+}
+
+//-----------------------------------------------------------------------------
+
+bool
+ItemRawScoreBlueprint::setup(const IIndexEnvironment &,
+ const ParameterList &params)
+{
+ _label = params[0].getValue();
+ describeOutput("out", "raw score for the given query item");
+ return true;
+}
+
+FeatureExecutor::LP
+ItemRawScoreBlueprint::createExecutor(const IQueryEnvironment &queryEnv) const
+{
+ HandleVector handles = resolve(queryEnv, _label);
+ if (handles.size() == 1) {
+ return FeatureExecutor::LP(new SimpleItemRawScoreExecutor(handles[0]));
+ } else if (handles.size() == 0) {
+ return FeatureExecutor::LP(new SingleZeroValueExecutor());
+ } else {
+ return FeatureExecutor::LP(new ItemRawScoreExecutor(handles));
+ }
+}
+
+ItemRawScoreBlueprint::HandleVector
+ItemRawScoreBlueprint::resolve(const search::fef::IQueryEnvironment &env,
+ const vespalib::string &label)
+{
+ HandleVector handles;
+ const ITermData *term = util::getTermByLabel(env, label);
+ for (uint32_t i = 0; (term != 0) && (i < term->numFields()); ++i) {
+ TermFieldHandle handle = term->field(i).getHandle();
+ if (handle != IllegalHandle) {
+ handles.push_back(handle);
+ }
+ }
+ return handles;
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/item_raw_score_feature.h b/searchlib/src/vespa/searchlib/features/item_raw_score_feature.h
new file mode 100644
index 00000000000..10a6c30611d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/item_raw_score_feature.h
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/vespalib/stllike/smallvector.h>
+
+namespace search {
+namespace features {
+
+class ItemRawScoreExecutor : public search::fef::FeatureExecutor
+{
+public:
+ typedef std::vector<search::fef::TermFieldHandle> HandleVector;
+private:
+ HandleVector _handles;
+public:
+ ItemRawScoreExecutor(HandleVector handles)
+ : FeatureExecutor(), _handles(handles) {}
+ virtual void execute(search::fef::MatchData &data);
+};
+
+class SimpleItemRawScoreExecutor : public search::fef::FeatureExecutor
+{
+private:
+ search::fef::TermFieldHandle _handle;
+public:
+ SimpleItemRawScoreExecutor(search::fef::TermFieldHandle handle)
+ : FeatureExecutor(), _handle(handle) {}
+ virtual void execute(search::fef::MatchData &data);
+};
+
+
+//-----------------------------------------------------------------------------
+
+class ItemRawScoreBlueprint : public search::fef::Blueprint
+{
+private:
+ typedef std::vector<search::fef::TermFieldHandle> HandleVector;
+ vespalib::string _label;
+public:
+ ItemRawScoreBlueprint() : Blueprint("itemRawScore"), _label() {}
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const {}
+ virtual search::fef::Blueprint::UP createInstance() const {
+ return Blueprint::UP(new ItemRawScoreBlueprint());
+ }
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().string();
+ }
+ virtual bool setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params);
+ virtual search::fef::FeatureExecutor::LP
+ createExecutor(const search::fef::IQueryEnvironment &env) const;
+
+ static HandleVector resolve(const search::fef::IQueryEnvironment &env,
+ const vespalib::string &label);
+};
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.cpp b/searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.cpp
new file mode 100644
index 00000000000..b2d80324bec
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.cpp
@@ -0,0 +1,184 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.jarowinklerdistance");
+
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/fieldtype.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include "jarowinklerdistancefeature.h"
+#include "utils.h"
+#include <limits>
+
+namespace search {
+namespace features {
+
+//-----------------------------------------------------------------------------
+// JaroWinklerDistanceConfig
+//-----------------------------------------------------------------------------
+JaroWinklerDistanceConfig::JaroWinklerDistanceConfig() :
+ fieldId(search::fef::IllegalHandle),
+ fieldBegin(0),
+ fieldEnd(std::numeric_limits<uint32_t>::max()),
+ boostThreshold(0.7f),
+ prefixSize(4u)
+{
+ // empty
+}
+
+//-----------------------------------------------------------------------------
+// JaroWinklerDistanceExecutor
+//-----------------------------------------------------------------------------
+JaroWinklerDistanceExecutor::JaroWinklerDistanceExecutor(const search::fef::IQueryEnvironment &env,
+ const JaroWinklerDistanceConfig &config) :
+ search::fef::FeatureExecutor(),
+ _config(config),
+ _termFieldHandles(),
+ _lenHandle(search::fef::IllegalHandle)
+{
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ _termFieldHandles.push_back(util::getTermFieldHandle(env, i, config.fieldId));
+ }
+}
+
+void
+JaroWinklerDistanceExecutor::execute(search::fef::MatchData &match)
+{
+ // Build a list of field position iterators, one per query term.
+ std::vector<search::fef::FieldPositionsIterator> pos;
+ for (uint32_t term = 0; term < _termFieldHandles.size(); ++term) {
+ search::fef::FieldPositionsIterator it; // this is not vaild
+ const search::fef::TermFieldHandle &handle = _termFieldHandles[term];
+ if (handle != search::fef::IllegalHandle) {
+ search::fef::TermFieldMatchData &tfmd = *match.resolveTermField(handle);
+ if (tfmd.getDocId() == match.getDocId()) {
+ it = tfmd.getIterator();
+ }
+ }
+ pos.push_back(it);
+ }
+
+ // Assign the jaroWinkler distance to this executor's output.
+ *match.resolveFeature(outputs()[0]) = 1 - jaroWinklerProximity(pos, (uint32_t)*match.resolveFeature(_lenHandle));
+}
+
+namespace {
+uint32_t
+matches(const std::vector<search::fef::FieldPositionsIterator> &termPos,
+ uint32_t fieldLen, uint32_t *numTransposes)
+{
+ (*numTransposes) = 0u;
+ uint32_t ret = 0;
+ uint32_t halfLen = termPos.size() > fieldLen ? (fieldLen / 2 + 1) : (termPos.size() / 2 + 1);
+ for (uint32_t i = 0; i < termPos.size(); ++i) {
+ uint32_t min = i > halfLen ? i - halfLen : 0u;
+ uint32_t max = std::min(fieldLen, i + halfLen);
+ for (search::fef::FieldPositionsIterator it = termPos[i]; it.valid() && it.getPosition() <= max; it.next()) {
+ uint32_t pos = it.getPosition();
+ if (pos >= min && pos <= max) {
+ if (pos != i) {
+ (*numTransposes)++;
+ }
+ ret++;
+ break;
+ }
+ }
+ }
+ (*numTransposes) /= 2;
+ return ret;
+}
+
+uint32_t
+prefixMatch(const std::vector<search::fef::FieldPositionsIterator> &termPos, uint32_t fieldLen, uint32_t maxLen)
+{
+ uint32_t len = std::min((uint32_t)termPos.size(), std::min(fieldLen, maxLen));
+ for (uint32_t i = 0; i < len; ++i) {
+ if (!termPos[i].valid() || termPos[i].getPosition() != i) {
+ return i;
+ }
+ }
+ return len;
+}
+
+feature_t
+jaroMeasure(const std::vector<search::fef::FieldPositionsIterator> &termPos, uint32_t fieldLen)
+{
+ // _P_A_R_A_N_O_I_A_
+ if (termPos.empty() || fieldLen == 0) {
+ return 0.0f;
+ }
+ uint32_t numTransposes = 0;
+ uint32_t numMatches = matches(termPos, fieldLen, &numTransposes);
+ if (numMatches == 0u) {
+ return 0.0f;
+ }
+ return (((feature_t)numMatches / termPos.size()) +
+ ((feature_t)numMatches / fieldLen) +
+ ((feature_t)numMatches - numTransposes) / numMatches) / 3.0f;
+}
+} // namespace
+
+feature_t
+JaroWinklerDistanceExecutor::jaroWinklerProximity(const std::vector<search::fef::FieldPositionsIterator> &termPos, uint32_t fieldLen)
+{
+ feature_t ret = std::min(1.0, std::max(0.0, jaroMeasure(termPos, fieldLen)));
+ //LOG(debug, "Jaro measure is %f.", ret);
+ if (ret > _config.boostThreshold) {
+ ret += 0.1f * prefixMatch(termPos, fieldLen, _config.prefixSize) * (1 - ret); // less boost close to 1
+ //LOG(debug, "Applying Winkler boost.");
+ }
+ //LOG(debug, "JaroWinkler measure is %f.", ret);
+ return ret;
+}
+
+//-----------------------------------------------------------------------------
+// JaroWinklerDistanceBlueprint
+//-----------------------------------------------------------------------------
+JaroWinklerDistanceBlueprint::JaroWinklerDistanceBlueprint() :
+ search::fef::Blueprint("jaroWinklerDistance"),
+ _config()
+{
+ // empty
+}
+
+void
+JaroWinklerDistanceBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const
+{
+ // empty
+}
+
+bool
+JaroWinklerDistanceBlueprint::setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params)
+{
+ _config.fieldId = params[0].asField()->id();
+
+ vespalib::string boostThreshold = env.getProperties().lookup(getName(), "boostThreshold").getAt(0);
+ _config.boostThreshold = boostThreshold.empty() ? 0.7f : atof(boostThreshold.c_str());
+
+ vespalib::string prefixSize = env.getProperties().lookup(getName(), "prefixSize").getAt(0);
+ _config.prefixSize = prefixSize.empty() ? 4 : atoi(prefixSize.c_str());
+
+ defineInput(vespalib::make_string("fieldLength(%s)", params[0].getValue().c_str()));
+ describeOutput("out", "JaroWinklerDistance distance measure.");
+ env.hintFieldAccess(_config.fieldId);
+ return true;
+}
+
+search::fef::Blueprint::UP
+JaroWinklerDistanceBlueprint::createInstance() const
+{
+ return search::fef::Blueprint::UP(new JaroWinklerDistanceBlueprint());
+}
+
+search::fef::FeatureExecutor::LP
+JaroWinklerDistanceBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ return search::fef::FeatureExecutor::LP(new JaroWinklerDistanceExecutor(env, _config));
+}
+
+}}
diff --git a/searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.h b/searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.h
new file mode 100644
index 00000000000..a287618dd75
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.h
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/common/feature.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the necessary config to pass from the jaro winkler distance blueprint to the executor.
+ */
+struct JaroWinklerDistanceConfig {
+ JaroWinklerDistanceConfig();
+
+ uint32_t fieldId; // The id of field to process.
+ uint32_t fieldBegin; // The first field term to evaluate.
+ uint32_t fieldEnd; // The last field term to evaluate.
+ feature_t boostThreshold; // The jaro threshold to exceed to apply boost.
+ uint32_t prefixSize; // The number of characters to use for boost.
+};
+
+/**
+ * Implements the executor for the jaro winkler distance calculator.
+ */
+class JaroWinklerDistanceExecutor : public search::fef::FeatureExecutor {
+public:
+ /**
+ * Constructs a new executor for the jaro winkler distance calculator.
+ *
+ * @param config The config for this executor.
+ */
+ JaroWinklerDistanceExecutor(const search::fef::IQueryEnvironment &env,
+ const JaroWinklerDistanceConfig &config);
+ void inputs_done() override { _lenHandle = inputs()[0]; }
+ virtual void execute(search::fef::MatchData &data);
+
+private:
+ feature_t jaroWinklerProximity(const std::vector<search::fef::FieldPositionsIterator> &termPos, uint32_t fieldLen);
+
+private:
+ const JaroWinklerDistanceConfig &_config; // The config for this executor.
+ std::vector<search::fef::TermFieldHandle> _termFieldHandles; // The handles of all query terms.
+ search::fef::FeatureHandle _lenHandle; // Handle to the length input feature.
+};
+
+/**
+ * Implements the blueprint for the jaro winkler distance calculator.
+ */
+class JaroWinklerDistanceBlueprint : public search::fef::Blueprint {
+public:
+ /**
+ * Constructs a new blueprint for the jaro winkler distance calculator.
+ */
+ JaroWinklerDistanceBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::SINGLE);
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const;
+
+private:
+ JaroWinklerDistanceConfig _config; // The config for this blueprint.
+};
+
+}}
+
diff --git a/searchlib/src/vespa/searchlib/features/logarithmcalculator.h b/searchlib/src/vespa/searchlib/features/logarithmcalculator.h
new file mode 100644
index 00000000000..4faad71a289
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/logarithmcalculator.h
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <math.h>
+
+namespace search {
+namespace features {
+
+/**
+ * This class is used to calculate a logarithmic-shaped function that goes from 1 to 0.
+ * The function is:
+ * logscale(x, m, s) = (x > m ? 0 : (( log(m + s) - log(x + s)) / (log(m + s) - log(s)))),
+ * where m specifies for which x the function should output 0 (max parameter),
+ * and s controls the shape of the function (scale parameter).
+ *
+ * If you decide a value for x for when the function should output 0.5,
+ * s can be calculated as -x^2/(2x - m).
+ **/
+class LogarithmCalculator {
+private:
+ feature_t _m;
+ feature_t _s;
+ feature_t _maxLog;
+ feature_t _minLog;
+ feature_t _divMult;
+
+public:
+ /**
+ * Creates a calculator for the given values for m (max) and s (scale).
+ **/
+ LogarithmCalculator(feature_t m, feature_t s) :
+ _m(m),
+ _s(s),
+ _maxLog(log(_m + _s)),
+ _minLog(log(_s)),
+ _divMult(1.0 / (_maxLog - _minLog))
+ {
+ }
+
+ /**
+ * Calculate the function for the given x.
+ **/
+ feature_t get(feature_t x) const {
+ if (x > _m) x = _m;
+ if (x < 0) x = 0;
+ return (_maxLog - log(x + _s)) * _divMult;
+ }
+
+ /**
+ * Calculate the scale parameter to use if the function should output 0.5
+ * for the given x and max parameter.
+ */
+ static feature_t getScale(feature_t x, feature_t m) {
+ return (x * x) / (m - 2*x);
+ }
+};
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/matchesfeature.cpp b/searchlib/src/vespa/searchlib/features/matchesfeature.cpp
new file mode 100644
index 00000000000..459fe4487af
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/matchesfeature.cpp
@@ -0,0 +1,90 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.matchesfeature");
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include "matchesfeature.h"
+#include "utils.h"
+#include "valuefeature.h"
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+MatchesExecutor::MatchesExecutor(uint32_t fieldId,
+ const search::fef::IQueryEnvironment &env,
+ uint32_t begin, uint32_t end)
+ : FeatureExecutor(),
+ _handles()
+{
+ for (uint32_t i = begin; i < end; ++i) {
+ search::fef::TermFieldHandle handle = util::getTermFieldHandle(env, i, fieldId);
+ if (handle != search::fef::IllegalHandle) {
+ _handles.push_back(handle);
+ }
+ }
+}
+
+void
+MatchesExecutor::execute(MatchData &match)
+{
+ size_t output = 0;
+ for (uint32_t i = 0; i < _handles.size(); ++i) {
+ const TermFieldMatchData *tfmd = match.resolveTermField(_handles[i]);
+ if (tfmd->getDocId() == match.getDocId()) {
+ output = 1;
+ break;
+ }
+ }
+ *match.resolveFeature(outputs()[0]) = static_cast<feature_t>(output);
+}
+
+
+MatchesBlueprint::MatchesBlueprint() :
+ Blueprint("matches"),
+ _field(NULL),
+ _termIdx(std::numeric_limits<uint32_t>::max())
+{
+}
+
+void
+MatchesBlueprint::visitDumpFeatures(const IIndexEnvironment &,
+ IDumpFeatureVisitor &) const
+{
+}
+
+bool
+MatchesBlueprint::setup(const IIndexEnvironment &,
+ const ParameterList & params)
+{
+ _field = params[0].asField();
+ if (params.size() == 2) {
+ _termIdx = params[1].asInteger();
+ }
+ describeOutput("out", "Returns 1 if the given field is matched by the query, 0 otherwise");
+ return true;
+}
+
+Blueprint::UP
+MatchesBlueprint::createInstance() const
+{
+ return Blueprint::UP(new MatchesBlueprint());
+}
+
+FeatureExecutor::LP
+MatchesBlueprint::createExecutor(const IQueryEnvironment & queryEnv) const
+{
+ if (_field == 0) {
+ return search::fef::FeatureExecutor::LP(new ValueExecutor(std::vector<feature_t>(1, 0.0)));
+ }
+ if (_termIdx != std::numeric_limits<uint32_t>::max()) {
+ return FeatureExecutor::LP(new MatchesExecutor(_field->id(), queryEnv, _termIdx, _termIdx + 1));
+ } else {
+ return FeatureExecutor::LP(new MatchesExecutor(_field->id(), queryEnv, 0, queryEnv.getNumTerms()));
+ }
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/matchesfeature.h b/searchlib/src/vespa/searchlib/features/matchesfeature.h
new file mode 100644
index 00000000000..9f380ad8ac9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/matchesfeature.h
@@ -0,0 +1,71 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for the matches feature for index and
+ * attribute fields.
+ */
+class MatchesExecutor : public search::fef::FeatureExecutor
+{
+private:
+ std::vector<search::fef::TermFieldHandle> _handles;
+
+public:
+ MatchesExecutor(uint32_t fieldId,
+ const search::fef::IQueryEnvironment &env,
+ uint32_t begin, uint32_t end);
+ virtual void execute(search::fef::MatchData & data);
+};
+
+/**
+ * Implements the blueprint for the matches executor.
+ *
+ * matches(name)
+ * - returns 1 if there is an index or attribute with this name which matched the query, 0 otherwise
+ * matches(name,n)
+ * - returns 1 if there is an index or attribute with this name which matched with the query term at the given position, 0 otherwise
+ */
+class MatchesBlueprint : public search::fef::Blueprint
+{
+private:
+ const search::fef::FieldInfo *_field;
+ uint32_t _termIdx;
+
+public:
+ /**
+ * Constructs a blueprint.
+ */
+ MatchesBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().
+ desc().field().
+ desc().field().number();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/matchfeature.cpp b/searchlib/src/vespa/searchlib/features/matchfeature.cpp
new file mode 100644
index 00000000000..e80d56a2edd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/matchfeature.cpp
@@ -0,0 +1,107 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.matchfeature");
+#include "matchfeature.h"
+
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include "utils.h"
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+MatchExecutor::MatchExecutor(const MatchParams & params) :
+ FeatureExecutor(),
+ _params(params)
+{
+ // empty
+}
+
+void
+MatchExecutor::execute(MatchData & match)
+{
+ feature_t sum = 0.0f;
+ feature_t totalWeight = 0.0f;
+ for (uint32_t i = 0; i < _params.weights.size(); ++i) {
+ feature_t weight = static_cast<feature_t>(_params.weights[i]);
+ feature_t matchScore = *match.resolveFeature(inputs()[i]);
+ if (matchScore > 0.0f) {
+ totalWeight += weight;
+ sum += (weight * matchScore);
+ }
+ *match.resolveFeature(outputs()[i + 2]) = weight;
+ }
+
+ *match.resolveFeature(outputs()[0]) = totalWeight > 0.0f ? sum / totalWeight : 0.0f;
+ *match.resolveFeature(outputs()[1]) = totalWeight;
+}
+
+
+MatchBlueprint::MatchBlueprint() :
+ Blueprint("match"),
+ _params()
+{
+ // empty
+}
+
+void
+MatchBlueprint::visitDumpFeatures(const IIndexEnvironment & env,
+ IDumpFeatureVisitor & visitor) const
+{
+ (void) env;
+ (void) visitor;
+}
+
+Blueprint::UP
+MatchBlueprint::createInstance() const
+{
+ return Blueprint::UP(new MatchBlueprint());
+}
+
+bool
+MatchBlueprint::setup(const IIndexEnvironment & env,
+ const ParameterList &)
+{
+ for (uint32_t i = 0; i < env.getNumFields(); ++i) {
+ const FieldInfo * info = env.getField(i);
+ if ((info->type() == FieldType::INDEX) || (info->type() == FieldType::ATTRIBUTE)) {
+ _params.weights.push_back(indexproperties::FieldWeight::lookup(env.getProperties(), info->name()));
+ if (info->type() == FieldType::INDEX) {
+ if (info->collection() == CollectionType::SINGLE) {
+ defineInput("fieldMatch(" + info->name() + ")");
+ } else {
+ defineInput("elementCompleteness(" + info->name() + ")");
+ }
+ } else if (info->type() == FieldType::ATTRIBUTE) {
+ defineInput("attributeMatch(" + info->name() + ")");
+ }
+ }
+ }
+ describeOutput("score", "Normalized sum over all matched fields");
+ describeOutput("totalWeight", "Sum of rank weights for all matched fields");
+ for (uint32_t i = 0; i < env.getNumFields(); ++i) {
+ const FieldInfo * info = env.getField(i);
+ if ((info->type() == FieldType::INDEX) || (info->type() == FieldType::ATTRIBUTE)) {
+ describeOutput("weight." + info->name(), "The rank weight value for field '" + info->name() + "'");
+ }
+ }
+ return true;
+}
+
+FeatureExecutor::LP
+MatchBlueprint::createExecutor(const IQueryEnvironment & env) const
+{
+ (void) env;
+ return FeatureExecutor::LP(new MatchExecutor(_params));
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/matchfeature.h b/searchlib/src/vespa/searchlib/features/matchfeature.h
new file mode 100644
index 00000000000..26ecfb85132
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/matchfeature.h
@@ -0,0 +1,68 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace features {
+
+struct MatchParams {
+ MatchParams() : weights() {}
+ std::vector<uint32_t> weights;
+};
+
+/**
+ * Implements the executor for the match feature.
+ */
+class MatchExecutor : public search::fef::FeatureExecutor {
+private:
+ const MatchParams & _params;
+
+public:
+ /**
+ * Constructs an executor.
+ */
+ MatchExecutor(const MatchParams & params);
+ virtual void execute(search::fef::MatchData & data);
+};
+
+
+/**
+ * Implements the blueprint for the match executor.
+ */
+class MatchBlueprint : public search::fef::Blueprint {
+private:
+ MatchParams _params;
+
+public:
+ /**
+ * Constructs a blueprint.
+ */
+ MatchBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp
new file mode 100644
index 00000000000..d135ecfdc91
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.native_dot_product_feature");
+#include "native_dot_product_feature.h"
+#include "utils.h"
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+NativeDotProductExecutor::NativeDotProductExecutor(const search::fef::IQueryEnvironment &env, uint32_t fieldId)
+ : FeatureExecutor(),
+ _pairs()
+{
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ search::fef::TermFieldHandle handle = util::getTermFieldHandle(env, i, fieldId);
+ if (handle != search::fef::IllegalHandle) {
+ _pairs.push_back(std::make_pair(handle, env.getTerm(i)->getWeight()));
+ }
+ }
+}
+
+void
+NativeDotProductExecutor::execute(MatchData &data)
+{
+ feature_t output = 0.0;
+ for (uint32_t i = 0; i < _pairs.size(); ++i) {
+ const TermFieldMatchData *tfmd = data.resolveTermField(_pairs[i].first);
+ if (tfmd->getDocId() == data.getDocId()) {
+ output += (tfmd->getWeight() * (int32_t)_pairs[i].second.percent());
+ }
+ }
+ *data.resolveFeature(outputs()[0]) = output;
+}
+
+//-----------------------------------------------------------------------------
+
+bool
+NativeDotProductBlueprint::setup(const IIndexEnvironment &,
+ const ParameterList &params)
+{
+ _field = params[0].asField();
+ describeOutput("out", "dot product between query term weights and match weights for the given field");
+ return true;
+}
+
+FeatureExecutor::LP
+NativeDotProductBlueprint::createExecutor(const IQueryEnvironment &queryEnv) const
+{
+ return FeatureExecutor::LP(new NativeDotProductExecutor(queryEnv, _field->id()));
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/native_dot_product_feature.h b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.h
new file mode 100644
index 00000000000..addff898298
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.h
@@ -0,0 +1,47 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/query/weight.h>
+#include <algorithm>
+
+namespace search {
+namespace features {
+
+class NativeDotProductExecutor : public search::fef::FeatureExecutor
+{
+private:
+ typedef std::pair<search::fef::TermFieldHandle,query::Weight> Pair;
+ std::vector<Pair> _pairs;
+public:
+ NativeDotProductExecutor(const search::fef::IQueryEnvironment &env, uint32_t fieldId);
+ virtual void execute(search::fef::MatchData &data);
+};
+
+//-----------------------------------------------------------------------------
+
+class NativeDotProductBlueprint : public search::fef::Blueprint
+{
+private:
+ const search::fef::FieldInfo *_field;
+public:
+ NativeDotProductBlueprint() : Blueprint("nativeDotProduct"), _field(0) {}
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const {}
+ virtual search::fef::Blueprint::UP createInstance() const {
+ return Blueprint::UP(new NativeDotProductBlueprint());
+ }
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().field();
+ }
+ virtual bool setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params);
+ virtual search::fef::FeatureExecutor::LP
+ createExecutor(const search::fef::IQueryEnvironment &env) const;
+};
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/nativeattributematchfeature.cpp b/searchlib/src/vespa/searchlib/features/nativeattributematchfeature.cpp
new file mode 100644
index 00000000000..a3e68c2907d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/nativeattributematchfeature.cpp
@@ -0,0 +1,150 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/itablemanager.h>
+#include <vespa/searchlib/fef/properties.h>
+#include "valuefeature.h"
+#include "nativeattributematchfeature.h"
+#include "utils.h"
+LOG_SETUP(".features.nativeattributematchfeature");
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+feature_t
+NativeAttributeMatchExecutor::calculateScore(const CachedTermData &td, const TermFieldMatchData &tfmd)
+{
+ return (td.weightBoostTable->get(tfmd.getWeight()) * td.scale);
+}
+
+NativeAttributeMatchExecutor::Precomputed
+NativeAttributeMatchExecutor::preComputeSetup(const IQueryEnvironment & env,
+ const NativeAttributeMatchParams & params)
+{
+ NativeAttributeMatchExecutor::Precomputed precomputed;
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ const ITermData *termData = env.getTerm(i);
+ if (termData->getWeight().percent() != 0) // only consider query terms with contribution
+ {
+ typedef search::fef::ITermFieldRangeAdapter FRA;
+ for (FRA iter(*termData); iter.valid(); iter.next()) {
+ const ITermFieldData& tfd = iter.get();
+ uint32_t fieldId = tfd.getFieldId();
+ if (params.considerField(fieldId)) { // only consider fields with contribution
+ const NativeAttributeMatchParams::Param & param = params.vector[fieldId];
+ precomputed.first.push_back(CachedTermData(params, tfd,
+ param.fieldWeight * termData->getWeight().percent() / param.maxTableSum));
+ precomputed.second += (param.fieldWeight * termData->getWeight().percent());
+ }
+ }
+ }
+ }
+ return precomputed;
+}
+
+FeatureExecutor::LP
+NativeAttributeMatchExecutor::createExecutor(const IQueryEnvironment & env,
+ const NativeAttributeMatchParams & params)
+{
+ Precomputed setup = preComputeSetup(env, params);
+ if (setup.first.size() == 0) {
+ return LP(new ValueExecutor(std::vector<feature_t>(1, 0.0)));
+ } else if (setup.first.size() == 1) {
+ return LP(new NativeAttributeMatchExecutorSingle(setup));
+ } else {
+ return LP(new NativeAttributeMatchExecutorMulti(setup));
+ }
+}
+
+void
+NativeAttributeMatchExecutorMulti::execute(MatchData & match)
+{
+ feature_t score = 0;
+ for (size_t i = 0; i < _queryTermData.size(); ++i) {
+ const TermFieldMatchData *tfmd = match.resolveTermField(_queryTermData[i].tfh);
+ if (tfmd->getDocId() == match.getDocId()) {
+ score += calculateScore(_queryTermData[i], *tfmd);
+ }
+ }
+ *match.resolveFeature(outputs()[0]) = score / _divisor;
+}
+
+void
+NativeAttributeMatchExecutorSingle::execute(MatchData & match)
+{
+ const TermFieldMatchData &tfmd = *match.resolveTermField(_queryTermData.tfh);
+ *match.resolveFeature(outputs()[0]) = (tfmd.getDocId() == match.getDocId())
+ ? calculateScore(_queryTermData, tfmd)
+ : 0;
+}
+
+
+NativeAttributeMatchBlueprint::NativeAttributeMatchBlueprint() :
+ Blueprint("nativeAttributeMatch"),
+ _params()
+{
+}
+
+namespace {
+const vespalib::string DefaultWeightTable = "linear(1,0)";
+const vespalib::string WeightTableName = "weightTable";
+}
+
+void
+NativeAttributeMatchBlueprint::visitDumpFeatures(const IIndexEnvironment & env,
+ IDumpFeatureVisitor & visitor) const
+{
+ (void) env;
+ visitor.visitDumpFeature(getBaseName());
+}
+
+Blueprint::UP
+NativeAttributeMatchBlueprint::createInstance() const
+{
+ return Blueprint::UP(new NativeAttributeMatchBlueprint());
+}
+
+bool
+NativeAttributeMatchBlueprint::setup(const IIndexEnvironment & env,
+ const ParameterList & params)
+{
+ _params.resize(env.getNumFields());
+ FieldWrapper fields(env, params, FieldType::ATTRIBUTE);
+ for (uint32_t i = 0; i < fields.getNumFields(); ++i) {
+ const FieldInfo * info = fields.getField(i);
+
+ uint32_t fieldId = info->id();
+ NativeAttributeMatchParams::Param & param = _params.vector[fieldId];
+ param.field = true;
+ const Table * weightBoostTable = util::lookupTable(env, getBaseName(), WeightTableName, info->name(), DefaultWeightTable);
+ if (weightBoostTable == NULL) {
+ return false;
+ }
+ param.weightBoostTable = SymmetricTable(*weightBoostTable);
+ param.fieldWeight = indexproperties::FieldWeight::lookup(env.getProperties(), info->name());
+ if (param.fieldWeight == 0) {
+ param.field = false;
+ }
+ if (NativeRankBlueprint::useTableNormalization(env)) {
+ _params.setMaxTableSums(fieldId, param.weightBoostTable.max());
+ }
+ }
+
+ describeOutput("score", "The native attribute match score");
+ return true;
+}
+
+FeatureExecutor::LP
+NativeAttributeMatchBlueprint::createExecutor(const IQueryEnvironment & env) const
+{
+ return FeatureExecutor::LP(NativeAttributeMatchExecutor::createExecutor(env, _params));
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/nativeattributematchfeature.h b/searchlib/src/vespa/searchlib/features/nativeattributematchfeature.h
new file mode 100644
index 00000000000..411a07d4067
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/nativeattributematchfeature.h
@@ -0,0 +1,119 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/fef/table.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/searchlib/fef/itermfielddata.h>
+#include "nativerankfeature.h"
+#include <vespa/searchlib/fef/symmetrictable.h>
+
+namespace search {
+namespace features {
+
+/**
+ * This struct contains parameters used by the executor.
+ **/
+struct NativeAttributeMatchParam : public NativeParamBase
+{
+ NativeAttributeMatchParam() : NativeParamBase() { }
+ fef::SymmetricTable weightBoostTable;
+};
+typedef NativeRankParamsBase<NativeAttributeMatchParam> NativeAttributeMatchParams;
+
+/**
+ * Implements the executor for calculating the native attribute match score.
+ **/
+class NativeAttributeMatchExecutor : public fef::FeatureExecutor {
+protected:
+ struct CachedTermData {
+ CachedTermData() : scale(0), weightBoostTable(NULL), tfh(search::fef::IllegalHandle) { }
+ CachedTermData(const NativeAttributeMatchParams & params, const fef::ITermFieldData & tfd, feature_t s) :
+ scale(s),
+ weightBoostTable(&params.vector[tfd.getFieldId()].weightBoostTable),
+ tfh(tfd.getHandle())
+ { }
+ feature_t scale;
+ const fef::SymmetricTable * weightBoostTable;
+ fef::TermFieldHandle tfh;
+ };
+ typedef std::vector<CachedTermData> CachedVector;
+ typedef std::pair<CachedVector, feature_t> Precomputed;
+
+ static feature_t calculateScore(const CachedTermData &td, const fef::TermFieldMatchData &tfmd);
+private:
+ static Precomputed preComputeSetup(const fef::IQueryEnvironment & env,
+ const NativeAttributeMatchParams & params);
+
+public:
+ static fef::FeatureExecutor::LP createExecutor(const fef::IQueryEnvironment & env,
+ const NativeAttributeMatchParams & params);
+};
+
+class NativeAttributeMatchExecutorMulti : public NativeAttributeMatchExecutor
+{
+private:
+ feature_t _divisor;
+ std::vector<CachedTermData> _queryTermData;
+public:
+ NativeAttributeMatchExecutorMulti(const Precomputed & setup) : _divisor(setup.second), _queryTermData(setup.first) { }
+ // Inherit doc from FeatureExecutor.
+ virtual void execute(fef::MatchData & data);
+};
+
+class NativeAttributeMatchExecutorSingle : public NativeAttributeMatchExecutor
+{
+private:
+ CachedTermData _queryTermData;
+public:
+ NativeAttributeMatchExecutorSingle(const Precomputed & setup) :
+ _queryTermData(setup.first[0])
+ {
+ _queryTermData.scale /= setup.second;
+ }
+ // Inherit doc from FeatureExecutor.
+ virtual void execute(fef::MatchData & data);
+};
+
+
+/**
+ * Implements the blueprint for the native attribute match executor.
+ **/
+class NativeAttributeMatchBlueprint : public fef::Blueprint {
+private:
+ NativeAttributeMatchParams _params;
+
+public:
+ NativeAttributeMatchBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const fef::IIndexEnvironment & env,
+ fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual fef::ParameterDescriptions getDescriptions() const {
+ return fef::ParameterDescriptions().desc().attribute(search::fef::ParameterCollection::ANY).repeat();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const fef::IIndexEnvironment & env,
+ const fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual fef::FeatureExecutor::LP createExecutor(const fef::IQueryEnvironment & env) const;
+
+ /**
+ * Obtains the parameters used by the executor.
+ **/
+ const NativeAttributeMatchParams & getParams() const { return _params; }
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.cpp b/searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.cpp
new file mode 100644
index 00000000000..e19d54e8d09
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.cpp
@@ -0,0 +1,179 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.nativefieldmatchfeature");
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/itablemanager.h>
+#include <vespa/searchlib/fef/properties.h>
+#include "nativefieldmatchfeature.h"
+#include "valuefeature.h"
+#include "utils.h"
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+const uint32_t NativeFieldMatchParam::NOT_DEF_FIELD_LENGTH(std::numeric_limits<uint32_t>::max());
+
+feature_t
+NativeFieldMatchExecutor::calculateScore(const MyQueryTerm &qt, MatchData &md)
+{
+ feature_t termScore = 0;
+ for (size_t i = 0; i < qt.handles().size(); ++i) {
+ TermFieldHandle tfh = qt.handles()[i];
+ TermFieldMatchData *tfmd = md.resolveTermField(tfh);
+ const NativeFieldMatchParam & param = _params.vector[tfmd->getFieldId()];
+ if (tfmd->getDocId() == md.getDocId()) { // do we have a hit
+ FieldPositionsIterator pos = tfmd->getIterator();
+ if (pos.valid()) {
+ uint32_t fieldLength = getFieldLength(param, pos.getFieldLength());
+ termScore +=
+ ((getFirstOccBoost(param, pos.getPosition(), fieldLength) * param.firstOccImportance) +
+ (getNumOccBoost(param, pos.size(), fieldLength) * (1 - param.firstOccImportance))) *
+ param.fieldWeight / param.maxTableSum;
+ }
+ }
+ }
+ termScore *= (qt.significance() * qt.termData()->getWeight().percent());
+ return termScore;
+}
+
+NativeFieldMatchExecutor::NativeFieldMatchExecutor(const IQueryEnvironment & env,
+ const NativeFieldMatchParams & params) :
+ FeatureExecutor(),
+ _params(params),
+ _queryTerms(),
+ _totalTermWeight(0),
+ _divisor(0)
+{
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ MyQueryTerm qt(QueryTermFactory::create(env, i));
+ if (qt.termData()->getWeight().percent() != 0) // only consider query terms with contribution
+ {
+ typedef search::fef::ITermFieldRangeAdapter FRA;
+ uint32_t totalFieldWeight = 0;
+ for (FRA iter(*qt.termData()); iter.valid(); iter.next()) {
+ const ITermFieldData& tfd = iter.get();
+ uint32_t fieldId = tfd.getFieldId();
+ if (_params.considerField(fieldId)) { // only consider fields with contribution
+ totalFieldWeight += _params.vector[fieldId].fieldWeight;
+ qt.handles().push_back(tfd.getHandle());
+ }
+ }
+ if (!qt.handles().empty()) {
+ _queryTerms.push_back(qt);
+ _divisor += (qt.significance() * qt.termData()->getWeight().percent() * totalFieldWeight);
+ }
+ }
+ }
+}
+
+void
+NativeFieldMatchExecutor::execute(search::fef::MatchData &match)
+{
+ feature_t score = 0;
+ for (size_t i = 0; i < _queryTerms.size(); ++i) {
+ score += calculateScore(_queryTerms[i], match);
+ }
+ if (_divisor > 0) {
+ score /= _divisor;
+ }
+ *match.resolveFeature(outputs()[0]) = score;
+}
+
+
+NativeFieldMatchBlueprint::NativeFieldMatchBlueprint() :
+ Blueprint("nativeFieldMatch"),
+ _params(),
+ _defaultFirstOcc("expdecay(8000,12.50)"),
+ _defaultNumOcc("loggrowth(1500,4000,19)")
+{
+}
+
+void
+NativeFieldMatchBlueprint::visitDumpFeatures(const IIndexEnvironment & env,
+ IDumpFeatureVisitor & visitor) const
+{
+ (void) env;
+ visitor.visitDumpFeature(getBaseName());
+}
+
+Blueprint::UP
+NativeFieldMatchBlueprint::createInstance() const
+{
+ return Blueprint::UP(new NativeFieldMatchBlueprint());
+}
+
+bool
+NativeFieldMatchBlueprint::setup(const IIndexEnvironment & env,
+ const ParameterList & params)
+{
+ _params.resize(env.getNumFields());
+ FieldWrapper fields(env, params, FieldType::INDEX);
+ vespalib::string defaultFirstOccImportance = env.getProperties().lookup(getBaseName(), "firstOccurrenceImportance").get("0.5");
+ for (uint32_t i = 0; i < fields.getNumFields(); ++i) {
+ const FieldInfo * info = fields.getField(i);
+ uint32_t fieldId = info->id();
+ NativeFieldMatchParam & param = _params.vector[fieldId];
+ param.field = true;
+ if ((param.firstOccTable =
+ util::lookupTable(env, getBaseName(), "firstOccurrenceTable", info->name(), _defaultFirstOcc)) == NULL)
+ {
+ return false;
+ }
+ if ((param.numOccTable =
+ util::lookupTable(env, getBaseName(), "occurrenceCountTable", info->name(), _defaultNumOcc)) == NULL)
+ {
+ return false;
+ }
+ param.fieldWeight = indexproperties::FieldWeight::lookup(env.getProperties(), info->name());
+ if (param.fieldWeight == 0 ||
+ info->isFilter())
+ {
+ param.field = false;
+ }
+ Property afl = env.getProperties().lookup(getBaseName(), "averageFieldLength", info->name());
+ if (afl.found()) {
+ param.averageFieldLength = util::strToNum<uint32_t>(afl.get());
+ }
+
+ param.firstOccImportance = util::strToNum<feature_t>
+ (env.getProperties().lookup(getBaseName(), "firstOccurrenceImportance", info->name()).
+ get(defaultFirstOccImportance));
+
+ if (NativeRankBlueprint::useTableNormalization(env)) {
+ const Table * fo = param.firstOccTable;
+ const Table * no = param.numOccTable;
+ if (fo != NULL && no != NULL) {
+ double value = (fo->max() * param.firstOccImportance) +
+ (no->max() * (1 - param.firstOccImportance));
+ _params.setMaxTableSums(fieldId, value);
+ }
+ }
+ if (param.field) {
+ env.hintFieldAccess(fieldId);
+ }
+ }
+ _params.minFieldLength = util::strToNum<uint32_t>(env.getProperties().lookup
+ (getBaseName(), "minFieldLength").get("6"));
+
+ describeOutput("score", "The native field match score");
+ return true;
+}
+
+FeatureExecutor::LP
+NativeFieldMatchBlueprint::createExecutor(const IQueryEnvironment & env) const
+{
+ std::unique_ptr<NativeFieldMatchExecutor> native(new NativeFieldMatchExecutor(env, _params));
+ if (native->empty()) {
+ return FeatureExecutor::LP(new ValueExecutor(std::vector<feature_t>(1, 0.0)));
+ } else {
+ return FeatureExecutor::LP(native.release());
+ }
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.h b/searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.h
new file mode 100644
index 00000000000..00cb2a9e316
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.h
@@ -0,0 +1,133 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/fef/table.h>
+#include "nativerankfeature.h"
+#include "queryterm.h"
+
+namespace search {
+namespace features {
+
+/**
+ * This struct contains parameters used by the executor.
+ **/
+struct NativeFieldMatchParam : public NativeParamBase
+{
+ static const uint32_t NOT_DEF_FIELD_LENGTH;
+ NativeFieldMatchParam() : NativeParamBase(), firstOccTable(NULL), numOccTable(NULL), averageFieldLength(NOT_DEF_FIELD_LENGTH), firstOccImportance(0.5) { }
+ const search::fef::Table * firstOccTable;
+ const search::fef::Table * numOccTable;
+ uint32_t averageFieldLength;
+ feature_t firstOccImportance;
+};
+
+class NativeFieldMatchParams : public NativeRankParamsBase<NativeFieldMatchParam>
+{
+public:
+ uint32_t minFieldLength;
+ NativeFieldMatchParams() : minFieldLength(6) { }
+};
+
+/**
+ * Implements the executor for calculating the native field match score.
+ **/
+class NativeFieldMatchExecutor : public search::fef::FeatureExecutor
+{
+private:
+ typedef std::vector<search::fef::TermFieldHandle> HandleVector;
+
+ class MyQueryTerm : public QueryTerm
+ {
+ private:
+ HandleVector _handles; // field match handles
+ public:
+ MyQueryTerm(const QueryTerm & qt) : QueryTerm(qt), _handles() {}
+ HandleVector &handles() { return _handles; }
+ const HandleVector &handles() const { return _handles; }
+ };
+ const NativeFieldMatchParams & _params;
+ std::vector<MyQueryTerm> _queryTerms;
+ uint32_t _totalTermWeight;
+ feature_t _divisor;
+
+ VESPA_DLL_LOCAL feature_t calculateScore(const MyQueryTerm &qt, search::fef::MatchData &md);
+
+ uint32_t getFieldLength(const NativeFieldMatchParam & param, uint32_t fieldLength) const {
+ if (param.averageFieldLength != NativeFieldMatchParam::NOT_DEF_FIELD_LENGTH) {
+ return param.averageFieldLength;
+ }
+ return fieldLength;
+ }
+
+ feature_t getFirstOccBoost(const NativeFieldMatchParam & param, uint32_t position, uint32_t fieldLength) const {
+ const search::fef::Table * table = param.firstOccTable;
+ size_t index = (position * (table->size() - 1)) / (std::max(_params.minFieldLength, fieldLength) - 1);
+ return table->get(index);
+ }
+
+ feature_t getNumOccBoost(const NativeFieldMatchParam & param, uint32_t occs, uint32_t fieldLength) const {
+ const search::fef::Table * table = param.numOccTable;
+ size_t index = (occs * (table->size() - 1)) / (std::max(_params.minFieldLength, fieldLength));
+ return table->get(index);
+ }
+
+public:
+ NativeFieldMatchExecutor(const search::fef::IQueryEnvironment & env,
+ const NativeFieldMatchParams & params);
+ virtual void execute(search::fef::MatchData & data);
+
+ feature_t getFirstOccBoost(uint32_t field, uint32_t position, uint32_t fieldLength) const {
+ return getFirstOccBoost(_params.vector[field], position, fieldLength);
+ }
+
+ feature_t getNumOccBoost(uint32_t field, uint32_t occs, uint32_t fieldLength) const {
+ return getNumOccBoost(_params.vector[field], occs, fieldLength);
+ }
+ bool empty() const { return _queryTerms.empty(); }
+};
+
+
+/**
+ * Implements the blueprint for the native field match executor.
+ **/
+class NativeFieldMatchBlueprint : public search::fef::Blueprint {
+private:
+ NativeFieldMatchParams _params;
+ vespalib::string _defaultFirstOcc;
+ vespalib::string _defaultNumOcc;
+
+public:
+ NativeFieldMatchBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().field().repeat();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+
+ /**
+ * Obtains the parameters used by the executor.
+ **/
+ const NativeFieldMatchParams & getParams() const { return _params; }
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp
new file mode 100644
index 00000000000..6d39aea8780
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp
@@ -0,0 +1,218 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.nativeproximityfeature");
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/itablemanager.h>
+#include <vespa/searchlib/fef/properties.h>
+#include "nativeproximityfeature.h"
+#include "valuefeature.h"
+#include "utils.h"
+#include <map>
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+feature_t
+NativeProximityExecutor::calculateScoreForField(const FieldSetup & fs, MatchData & match)
+{
+ feature_t score = 0;
+ for (size_t i = 0; i < fs.pairs.size(); ++i) {
+ score += calculateScoreForPair(fs.pairs[i], fs.fieldId, match);
+ }
+ score *= _params.vector[fs.fieldId].fieldWeight;
+ if (fs.divisor > 0) {
+ score /= fs.divisor;
+ }
+ return score;
+}
+
+feature_t
+NativeProximityExecutor::calculateScoreForPair(const TermPair & pair, uint32_t fieldId, MatchData & match)
+{
+ const NativeProximityParam & param = _params.vector[fieldId];
+ TermDistanceCalculator::Result result;
+ const QueryTerm & a = pair.first;
+ const QueryTerm & b = pair.second;
+ TermDistanceCalculator::run(a, b, match, result);
+ uint32_t forwardIdx = result.forwardDist > 0 ? result.forwardDist - 1 : 0;
+ uint32_t reverseIdx = result.reverseDist > 0 ? result.reverseDist - 1 : 0;
+ feature_t forwardScore = param.proximityTable->get(forwardIdx) * param.proximityImportance;
+ feature_t reverseScore = param.revProximityTable->get(reverseIdx) * (1 - param.proximityImportance);
+ feature_t termPairWeight = pair.connectedness *
+ (a.significance() * a.termData()->getWeight().percent() +
+ b.significance() * b.termData()->getWeight().percent());
+ feature_t score = (forwardScore + reverseScore) * termPairWeight / param.maxTableSum;
+ //LOG(debug, "calculateScoreForPair: pair(%u,%u), fieldId(%u), forwardScore(%f), reverseScore(%f), "
+ //"termPairWeight(%f), maxTableSum(%f), score(%f)",
+ //fieldId, a.termData()->getUniqueId(), b.termData()->getUniqueId(), forwardScore, reverseScore,
+ //termPairWeight, _params.maxTableSums[fieldId], score);
+ return score;
+}
+
+
+NativeProximityExecutor::NativeProximityExecutor(const IQueryEnvironment & env,
+ const NativeProximityParams & params) :
+ FeatureExecutor(),
+ _params(params),
+ _setups(),
+ _totalFieldWeight(0)
+{
+ std::map<uint32_t, QueryTermVector> fields;
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ QueryTerm qt = QueryTermFactory::create(env, i);
+
+ typedef search::fef::ITermFieldRangeAdapter FRA;
+
+ for (FRA iter(*qt.termData()); iter.valid(); iter.next()) {
+
+ uint32_t fieldId = iter.get().getFieldId();
+ if (_params.considerField(fieldId)) { // only consider fields with contribution
+ qt.fieldHandle(iter.get().getHandle());
+ fields[fieldId].push_back(qt);
+ }
+ }
+ }
+ for (std::map<uint32_t, QueryTermVector>::const_iterator itr = fields.begin(); itr != fields.end(); ++itr) {
+ if (itr->second.size() >= 2) {
+ FieldSetup setup(itr->first);
+ generateTermPairs(env, itr->second, _params.slidingWindow, setup);
+ if (!setup.pairs.empty()) {
+ _setups.push_back(setup);
+ _totalFieldWeight += params.vector[itr->first].fieldWeight;
+ }
+ }
+ }
+}
+
+void
+NativeProximityExecutor::execute(search::fef::MatchData & match)
+{
+ feature_t score = 0;
+ for (size_t i = 0; i < _setups.size(); ++i) {
+ score += calculateScoreForField(_setups[i], match);
+ }
+ if (_totalFieldWeight > 0) {
+ score /= _totalFieldWeight;
+ }
+ *match.resolveFeature(outputs()[0]) = score;
+}
+
+void
+NativeProximityExecutor::generateTermPairs(const IQueryEnvironment & env, const QueryTermVector & terms,
+ uint32_t slidingWindow, FieldSetup & setup)
+{
+ TermPairVector & pairs = setup.pairs;
+ for (size_t i = 0; i < terms.size(); ++i) {
+ for (size_t j = i + 1; (j < i + slidingWindow) && (j < terms.size()); ++j) {
+ feature_t connectedness = 1;
+ for (size_t k = j; k > i; --k) {
+ connectedness = std::min(util::lookupConnectedness(env, terms[k].termData()->getUniqueId(),
+ terms[k-1].termData()->getUniqueId(), 0.1),
+ connectedness);
+ }
+ connectedness /= (j - i);
+ if (terms[i].termData()->getWeight().percent() != 0 ||
+ terms[j].termData()->getWeight().percent() != 0)
+ { // only consider term pairs with contribution
+ pairs.push_back(TermPair(terms[i], terms[j], connectedness));
+ setup.divisor += (terms[i].significance() * terms[i].termData()->getWeight().percent() +
+ terms[j].significance() * terms[j].termData()->getWeight().percent()) * connectedness;
+ }
+ }
+ }
+}
+
+
+NativeProximityBlueprint::NativeProximityBlueprint() :
+ Blueprint("nativeProximity"),
+ _params(),
+ _defaultProximityBoost("expdecay(500,3)"),
+ _defaultRevProximityBoost("expdecay(400,3)")
+{
+}
+
+void
+NativeProximityBlueprint::visitDumpFeatures(const IIndexEnvironment & env,
+ IDumpFeatureVisitor & visitor) const
+{
+ (void) env;
+ visitor.visitDumpFeature(getBaseName());
+}
+
+Blueprint::UP
+NativeProximityBlueprint::createInstance() const
+{
+ return Blueprint::UP(new NativeProximityBlueprint());
+}
+
+bool
+NativeProximityBlueprint::setup(const IIndexEnvironment & env,
+ const ParameterList & params)
+{
+ _params.resize(env.getNumFields());
+ _params.slidingWindow = util::strToNum<uint32_t>(env.getProperties().lookup(getBaseName(), "slidingWindowSize").get("4"));
+ FieldWrapper fields(env, params, FieldType::INDEX);
+ vespalib::string defaultProximityImportance = env.getProperties().lookup(getBaseName(), "proximityImportance").get("0.5");
+ for (uint32_t i = 0; i < fields.getNumFields(); ++i) {
+ const FieldInfo * info = fields.getField(i);
+ uint32_t fieldId = info->id();
+ NativeProximityParam & param = _params.vector[fieldId];
+ param.field = true;
+ if ((param.proximityTable =
+ util::lookupTable(env, getBaseName(), "proximityTable", info->name(), _defaultProximityBoost)) == NULL)
+ {
+ return false;
+ }
+ if ((param.revProximityTable =
+ util::lookupTable(env, getBaseName(), "reverseProximityTable", info->name(), _defaultRevProximityBoost)) == NULL)
+ {
+ return false;
+ }
+ param.fieldWeight = indexproperties::FieldWeight::lookup(env.getProperties(), info->name());
+ if (param.fieldWeight == 0 ||
+ info->isFilter())
+ {
+ param.field = false;
+ }
+ param.proximityImportance = util::strToNum<feature_t>
+ (env.getProperties().lookup(getBaseName(), "proximityImportance", info->name()).
+ get(defaultProximityImportance));
+
+ if (NativeRankBlueprint::useTableNormalization(env)) {
+ const Table * fp = param.proximityTable;
+ const Table * rp = param.revProximityTable;
+ if (fp != NULL && rp != NULL) {
+ double value = (fp->max() * param.proximityImportance) +
+ (rp->max() * (1 - param.proximityImportance));
+ _params.setMaxTableSums(fieldId, value);
+ }
+ }
+ if (param.field) {
+ env.hintFieldAccess(fieldId);
+ }
+ }
+
+ describeOutput("score", "The native proximity score");
+ return true;
+}
+
+FeatureExecutor::LP
+NativeProximityBlueprint::createExecutor(const IQueryEnvironment & env) const
+{
+ std::unique_ptr<NativeProximityExecutor> native(new NativeProximityExecutor(env, _params));
+ if (native->empty()) {
+ return FeatureExecutor::LP(new ValueExecutor(std::vector<feature_t>(1, 0.0)));
+ } else {
+ return FeatureExecutor::LP(native.release());
+ }
+
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/nativeproximityfeature.h b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.h
new file mode 100644
index 00000000000..be79ee7beac
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.h
@@ -0,0 +1,119 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/fef/table.h>
+#include "nativerankfeature.h"
+#include "queryterm.h"
+#include "termdistancecalculator.h"
+
+namespace search {
+namespace features {
+
+/**
+ * This struct contains parameters used by the executor.
+ **/
+struct NativeProximityParam : public NativeParamBase
+{
+ NativeProximityParam() : NativeParamBase(), proximityTable(NULL), revProximityTable(NULL), proximityImportance(0.5) { }
+ const search::fef::Table * proximityTable;
+ const search::fef::Table * revProximityTable;
+ feature_t proximityImportance;
+};
+
+class NativeProximityParams : public NativeRankParamsBase<NativeProximityParam>
+{
+public:
+ uint32_t slidingWindow;
+ NativeProximityParams() : slidingWindow(4) { }
+};
+
+/**
+ * Implements the executor for calculating the native proximity score.
+ **/
+class NativeProximityExecutor : public search::fef::FeatureExecutor {
+public:
+ /**
+ * Represents a term pair with connectedness and associated term distance calculator.
+ **/
+ struct TermPair {
+ QueryTerm first;
+ QueryTerm second;
+ feature_t connectedness;
+ TermPair(QueryTerm f, QueryTerm s, feature_t c) :
+ first(f), second(s), connectedness(c) {}
+ };
+ typedef std::vector<TermPair> TermPairVector;
+ /**
+ * Represents the setup needed to calculate the proximity score for a single field.
+ **/
+ struct FieldSetup {
+ uint32_t fieldId;
+ TermPairVector pairs;
+ feature_t divisor;
+ FieldSetup(uint32_t fid) : fieldId(fid), pairs(), divisor(0) {}
+ };
+
+private:
+ const NativeProximityParams & _params;
+ std::vector<FieldSetup> _setups;
+ uint32_t _totalFieldWeight;
+
+ feature_t calculateScoreForField(const FieldSetup & fs, search::fef::MatchData & match);
+ feature_t calculateScoreForPair(const TermPair & pair, uint32_t fieldId, search::fef::MatchData & match);
+
+public:
+ NativeProximityExecutor(const search::fef::IQueryEnvironment & env,
+ const NativeProximityParams & params);
+ virtual void execute(search::fef::MatchData & data);
+
+ static void generateTermPairs(const search::fef::IQueryEnvironment & env, const QueryTermVector & terms,
+ uint32_t slidingWindow, FieldSetup & setup);
+
+ bool empty() const { return _setups.empty(); }
+};
+
+
+/**
+ * Implements the blueprint for the native proximity executor.
+ **/
+class NativeProximityBlueprint : public search::fef::Blueprint {
+private:
+ NativeProximityParams _params;
+ vespalib::string _defaultProximityBoost;
+ vespalib::string _defaultRevProximityBoost;
+
+public:
+ NativeProximityBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().field().repeat();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+
+ /**
+ * Obtains the parameters used by the executor.
+ **/
+ const NativeProximityParams & getParams() const { return _params; }
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/nativerankfeature.cpp b/searchlib/src/vespa/searchlib/features/nativerankfeature.cpp
new file mode 100644
index 00000000000..b4d549df9cf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/nativerankfeature.cpp
@@ -0,0 +1,173 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.nativerankfeature");
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/properties.h>
+#include "nativerankfeature.h"
+#include "valuefeature.h"
+#include "utils.h"
+
+using namespace search::fef;
+
+namespace {
+
+vespalib::string
+buildFeatureName(const vespalib::string & baseName, const search::features::FieldWrapper & fields)
+{
+ std::ostringstream oss;
+ oss << baseName << "(";
+ for (size_t i = 0; i < fields.getNumFields(); ++i) {
+ if (i > 0) {
+ oss << ",";
+ }
+ oss << fields.getField(i)->name();
+ }
+ oss << ")";
+ return oss.str();
+}
+
+}
+
+namespace search {
+namespace features {
+
+FieldWrapper::FieldWrapper(const IIndexEnvironment & env,
+ const ParameterList & fields,
+ const FieldType filter) :
+ _fields()
+{
+ if (!fields.empty()) {
+ for (size_t i = 0; i < fields.size(); ++i) {
+ const search::fef::FieldInfo * info = fields[i].asField();
+ if (info->type() == filter) {
+ _fields.push_back(info);
+ }
+ }
+ } else {
+ for (size_t i = 0; i < env.getNumFields(); ++i) {
+ const search::fef::FieldInfo * info = env.getField(i);
+ LOG_ASSERT(info->id() == i && "The field ids must be the same in FieldInfo as in IIndexEnvironment");
+ if (info->type() == filter) {
+ _fields.push_back(info);
+ }
+ }
+ }
+}
+
+
+NativeRankExecutor::NativeRankExecutor(const NativeRankParams & params) :
+ FeatureExecutor(),
+ _params(params),
+ _divisor(0)
+{
+ _divisor += _params.fieldMatchWeight;
+ _divisor += _params.attributeMatchWeight;
+ _divisor += _params.proximityWeight;
+}
+
+void
+NativeRankExecutor::execute(search::fef::MatchData & match)
+{
+ *match.resolveFeature(outputs()[0]) = (*match.resolveFeature(inputs()[0]) * _params.fieldMatchWeight
+ + *match.resolveFeature(inputs()[1]) * _params.proximityWeight
+ + *match.resolveFeature(inputs()[2]) * _params.attributeMatchWeight) / _divisor;
+}
+
+
+NativeRankBlueprint::NativeRankBlueprint() :
+ Blueprint("nativeRank"),
+ _params()
+{
+}
+
+void
+NativeRankBlueprint::visitDumpFeatures(const IIndexEnvironment & env,
+ IDumpFeatureVisitor & visitor) const
+{
+ (void) env;
+ visitor.visitDumpFeature(getBaseName());
+}
+
+Blueprint::UP
+NativeRankBlueprint::createInstance() const
+{
+ return Blueprint::UP(new NativeRankBlueprint());
+}
+
+bool
+NativeRankBlueprint::setup(const IIndexEnvironment & env,
+ const ParameterList & params)
+{
+ _params.fieldMatchWeight = util::strToNum<feature_t>
+ (env.getProperties().lookup(getBaseName(), "fieldMatchWeight").get("100"));
+ _params.attributeMatchWeight = util::strToNum<feature_t>
+ (env.getProperties().lookup(getBaseName(), "attributeMatchWeight").get("100"));
+ vespalib::string defProxWeight = "25";
+ if (!useTableNormalization(env)) {
+ defProxWeight = "100"; // must use another weight to match the default boost tables
+ }
+ _params.proximityWeight = util::strToNum<feature_t>
+ (env.getProperties().lookup(getBaseName(), "proximityWeight").get(defProxWeight));
+
+ vespalib::string nfm = "nativeFieldMatch";
+ vespalib::string np = "nativeProximity";
+ vespalib::string nam = "nativeAttributeMatch";
+ vespalib::string zero = "value(0)";
+
+ // handle parameter list
+ if (!params.empty()) {
+ FieldWrapper indexFields(env, params, FieldType::INDEX);
+ FieldWrapper attrFields(env, params, FieldType::ATTRIBUTE);
+ if (indexFields.getNumFields() > 0) {
+ nfm = buildFeatureName("nativeFieldMatch", indexFields);
+ np = buildFeatureName("nativeProximity", indexFields);
+ } else {
+ nfm = zero;
+ np = zero;
+ }
+ if (attrFields.getNumFields() > 0) {
+ nam = buildFeatureName("nativeAttributeMatch", attrFields);
+ } else {
+ nam = zero;
+ }
+ }
+ // optimizations when weight == 0
+ if (_params.fieldMatchWeight == 0) {
+ nfm = zero;
+ }
+ if (_params.proximityWeight == 0) {
+ np = zero;
+ }
+ if (_params.attributeMatchWeight == 0) {
+ nam = zero;
+ }
+
+ defineInput(nfm);
+ defineInput(np);
+ defineInput(nam);
+ describeOutput("score", "The native rank score");
+ return true;
+}
+
+FeatureExecutor::LP
+NativeRankBlueprint::createExecutor(const IQueryEnvironment &) const
+{
+ if (_params.proximityWeight + _params.fieldMatchWeight + _params.attributeMatchWeight > 0) {
+ return FeatureExecutor::LP(new NativeRankExecutor(_params));
+ } else {
+ return FeatureExecutor::LP(new ValueExecutor(std::vector<feature_t>(1, 0.0)));
+ }
+}
+
+bool
+NativeRankBlueprint::useTableNormalization(const search::fef::IIndexEnvironment & env)
+{
+ Property norm = env.getProperties().lookup("nativeRank", "useTableNormalization");
+ return (!(norm.found() && (norm.get() == vespalib::string("false"))));
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/nativerankfeature.h b/searchlib/src/vespa/searchlib/features/nativerankfeature.h
new file mode 100644
index 00000000000..c6a27d80784
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/nativerankfeature.h
@@ -0,0 +1,133 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/fef/fieldtype.h>
+#include <vespa/searchlib/fef/table.h>
+
+namespace search {
+namespace features {
+
+/**
+ * This struct contains parameters used by the nativeRank executor.
+ **/
+struct NativeRankParams {
+ feature_t fieldMatchWeight;
+ feature_t attributeMatchWeight;
+ feature_t proximityWeight;
+ NativeRankParams() : fieldMatchWeight(0), attributeMatchWeight(0), proximityWeight(0) {}
+};
+
+/**
+ * The base class for parameter classes used by native rank sub executors.
+ **/
+struct NativeParamBase {
+ NativeParamBase() : maxTableSum(1), fieldWeight(100), field(false) { }
+ double maxTableSum;
+ uint32_t fieldWeight;
+ bool field;
+};
+template <class P>
+class NativeRankParamsBase {
+public:
+ typedef P Param;
+ std::vector<P> vector;
+ NativeRankParamsBase() : vector() {}
+ void resize(size_t numFields) {
+ vector.resize(numFields);
+ }
+ void setMaxTableSums(size_t fieldId, double value) {
+ vector[fieldId].maxTableSum = value;
+ if (vector[fieldId].maxTableSum == 0) {
+ vector[fieldId].maxTableSum = 1;
+ }
+ }
+ bool considerField(size_t fieldId) const {
+ assert(fieldId < vector.size());
+ return vector[fieldId].field;
+ }
+};
+
+/**
+ * This class wraps an index environment and serves fields of a certain type.
+ * You can specify a set of field names to consider instead of all found in the index environment.
+ **/
+class FieldWrapper {
+public:
+ std::vector<const search::fef::FieldInfo *> _fields;
+
+public:
+ /**
+ * Creates a new wrapper.
+ *
+ * @param env the environment to wrap.
+ * @param fieldNames the set of field names to consider. If empty all found in the environment are used.
+ * @param filter the field type this wrapper should let through.
+ **/
+ FieldWrapper(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & fields,
+ const search::fef::FieldType filter);
+ size_t getNumFields() const { return _fields.size(); }
+ const search::fef::FieldInfo * getField(size_t idx) const { return _fields[idx]; }
+};
+
+/**
+ * Implements the executor for calculating the native rank score.
+ **/
+class NativeRankExecutor : public search::fef::FeatureExecutor {
+private:
+ const NativeRankParams & _params;
+ feature_t _divisor;
+
+public:
+ NativeRankExecutor(const NativeRankParams & params);
+ virtual void execute(search::fef::MatchData & data);
+};
+
+
+/**
+ * Implements the blueprint for the native rank executor.
+ **/
+class NativeRankBlueprint : public search::fef::Blueprint {
+private:
+ NativeRankParams _params;
+
+public:
+ NativeRankBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().field().repeat();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+
+ /**
+ * Obtains the parameters used by the executor.
+ **/
+ const NativeRankParams & getParams() const { return _params; }
+
+ /**
+ * Returns whether we should use table normalization for the setup using the given environment.
+ **/
+ static bool useTableNormalization(const search::fef::IIndexEnvironment & env);
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/nowfeature.cpp b/searchlib/src/vespa/searchlib/features/nowfeature.cpp
new file mode 100644
index 00000000000..28eb844c6b0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/nowfeature.cpp
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.nowfeature");
+
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/queryproperties.h>
+#include <vespa/searchlib/fef/properties.h>
+#include "nowfeature.h"
+#include "valuefeature.h"
+
+namespace search {
+namespace features {
+
+NowExecutor::NowExecutor(int64_t timestamp) :
+ search::fef::FeatureExecutor(),
+ _timestamp(timestamp)
+{
+ // empty
+}
+
+void
+NowExecutor::execute(search::fef::MatchData &data) {
+ *data.resolveFeature(outputs()[0]) = _timestamp;
+}
+
+void
+NowBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &visitor) const
+{
+ visitor.visitDumpFeature(getBaseName());
+}
+
+bool
+NowBlueprint::setup(const search::fef::IIndexEnvironment &,
+ const search::fef::ParameterList &)
+{
+ describeOutput("out", "The timestamp (seconds since epoch) of query execution.");
+ return true;
+}
+
+search::fef::Blueprint::UP
+NowBlueprint::createInstance() const
+{
+ return search::fef::Blueprint::UP(new NowBlueprint());
+}
+
+search::fef::FeatureExecutor::LP
+NowBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ int64_t timestamp;
+ const fef::Property &prop = env.getProperties().lookup(fef::queryproperties::now::SystemTime::NAME);
+ if (prop.found()) {
+ timestamp = atoll(prop.get().c_str());
+ } else {
+ FastOS_Time now;
+ now.SetNow();
+ timestamp = (int64_t)now.Secs();
+ }
+ return search::fef::FeatureExecutor::LP(new NowExecutor(timestamp));
+}
+
+}}
diff --git a/searchlib/src/vespa/searchlib/features/nowfeature.h b/searchlib/src/vespa/searchlib/features/nowfeature.h
new file mode 100644
index 00000000000..006ebb72446
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/nowfeature.h
@@ -0,0 +1,60 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for the 'now' feature. This executor returns the current
+ * system time, or the time specified by the query argument 'vespa.now'.
+ * Time is returned in two formats. First as seconds since epoch (first output),
+ * then as days since epoch and seconds within that day (second and third output).
+ * This is due to precision problems when encoding current time as a float.
+ **/
+class NowExecutor : public search::fef::FeatureExecutor {
+private:
+ // Current time, in seconds since epoch
+ int64_t _timestamp;
+
+public:
+ /**
+ * Constructs a new executor.
+ **/
+ NowExecutor(int64_t timestamp);
+ virtual void execute(search::fef::MatchData & data);
+};
+
+/**
+ * Implements the blueprint for 'now' feature.
+ */
+class NowBlueprint : public search::fef::Blueprint {
+public:
+ NowBlueprint() : search::fef::Blueprint("now") { }
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const;
+};
+
+}}
+
diff --git a/searchlib/src/vespa/searchlib/features/proximityfeature.cpp b/searchlib/src/vespa/searchlib/features/proximityfeature.cpp
new file mode 100644
index 00000000000..5fd590650fd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/proximityfeature.cpp
@@ -0,0 +1,149 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.proximity");
+
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include "proximityfeature.h"
+#include "utils.h"
+
+namespace search {
+namespace features {
+
+ProximityConfig::ProximityConfig() :
+ fieldId(search::fef::IllegalHandle),
+ termA(std::numeric_limits<uint32_t>::max()),
+ termB(std::numeric_limits<uint32_t>::max())
+{
+ // empty
+}
+
+ProximityExecutor::ProximityExecutor(const search::fef::IQueryEnvironment &env,
+ const ProximityConfig &config) :
+ search::fef::FeatureExecutor(),
+ _config(config),
+ _termA(util::getTermFieldHandle(env, _config.termA, _config.fieldId)),
+ _termB(util::getTermFieldHandle(env, _config.termB, _config.fieldId))
+{
+}
+
+void
+ProximityExecutor::execute(search::fef::MatchData &match)
+{
+ // Cannot calculate proximity in this case
+ if (_termA != search::fef::IllegalHandle &&
+ _termB != search::fef::IllegalHandle)
+ {
+ search::fef::TermFieldMatchData &matchA = *match.resolveTermField(_termA);
+ search::fef::TermFieldMatchData &matchB = *match.resolveTermField(_termB);
+
+ if (matchA.getDocId() == match.getDocId() &&
+ matchB.getDocId() == match.getDocId())
+ {
+ if (findBest(match, matchA, matchB)) return;
+ }
+ }
+ // no match
+ *match.resolveFeature(outputs()[0]) = util::FEATURE_MAX; // out
+ *match.resolveFeature(outputs()[1]) = util::FEATURE_MAX; // posA
+ *match.resolveFeature(outputs()[2]) = util::FEATURE_MIN; // posB
+ return;
+}
+
+bool
+ProximityExecutor::findBest(search::fef::MatchData &match,
+ search::fef::TermFieldMatchData &matchA,
+ search::fef::TermFieldMatchData &matchB)
+{
+ // Look for optimal positions for term A and B.
+ uint32_t optA = 0, optB = 0xFFFFFFFFu;
+
+ search::fef::TermFieldMatchData::PositionsIterator itA, itB, epA, epB;
+ itA = matchA.begin();
+ itB = matchB.begin();
+ epA = matchA.end();
+ epB = matchB.end();
+
+ while (itB != epB) {
+ uint32_t eid = itB->getElementId();
+ while (itA != epA && itA->getElementId() < eid) {
+ ++itA;
+ }
+ if (itA != epA && itA->getElementId() == eid) {
+ // there is a pair somewhere here
+ while (itA != epA &&
+ itB != epB &&
+ itA->getElementId() == eid &&
+ itB->getElementId() == eid)
+ {
+ uint32_t a = itA->getPosition();
+ uint32_t b = itB->getPosition();
+ if (a < b) {
+ if (b - a < optB - optA) {
+ optA = a;
+ optB = b;
+ }
+ ++itA;
+ } else {
+ ++itB;
+ }
+ }
+ } else {
+ ++itB;
+ }
+ }
+ if (optB != 0xFFFFFFFFu) {
+ // Output proximity score.
+ *match.resolveFeature(outputs()[0]) = optB - optA;
+ *match.resolveFeature(outputs()[1]) = optA;
+ *match.resolveFeature(outputs()[2]) = optB;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+ProximityBlueprint::ProximityBlueprint() :
+ search::fef::Blueprint("proximity"),
+ _config()
+{
+ // empty
+}
+
+void
+ProximityBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const
+{
+ // empty
+}
+
+bool
+ProximityBlueprint::setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params)
+{
+ _config.fieldId = params[0].asField()->id();
+ _config.termA = params[1].asInteger();
+ _config.termB = params[2].asInteger();
+ describeOutput("out" , "The proximity of the query terms.");
+ describeOutput("posA", "The best position of the first query term.");
+ describeOutput("posB", "The best position of the second query term.");
+ env.hintFieldAccess(_config.fieldId);
+ return true;
+}
+
+search::fef::Blueprint::UP
+ProximityBlueprint::createInstance() const
+{
+ return search::fef::Blueprint::UP(new ProximityBlueprint());
+}
+
+search::fef::FeatureExecutor::LP
+ProximityBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ return search::fef::FeatureExecutor::LP(new ProximityExecutor(env, _config));
+}
+
+}}
diff --git a/searchlib/src/vespa/searchlib/features/proximityfeature.h b/searchlib/src/vespa/searchlib/features/proximityfeature.h
new file mode 100644
index 00000000000..08a963a099d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/proximityfeature.h
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the necessary config for proximity.
+ */
+struct ProximityConfig {
+ ProximityConfig();
+
+ uint32_t fieldId; // The id of field to process.
+ uint32_t termA; // The id of the first query term in the pair (a, b).
+ uint32_t termB; // The id of the second query term.
+};
+
+/**
+ * Implements the executor for proximity.
+ */
+class ProximityExecutor : public search::fef::FeatureExecutor {
+public:
+ /**
+ * Constructs an executor for proximity.
+ *
+ * @param env The query environment.
+ * @param config The completeness config.
+ */
+ ProximityExecutor(const search::fef::IQueryEnvironment &env,
+ const ProximityConfig &config);
+ virtual void execute(search::fef::MatchData &data);
+
+private:
+ const ProximityConfig &_config; // The proximity config.
+ search::fef::TermFieldHandle _termA; // Handle to the first query term.
+ search::fef::TermFieldHandle _termB; // Handle to the second query term.
+
+ bool findBest(search::fef::MatchData &match,
+ search::fef::TermFieldMatchData &matchA,
+ search::fef::TermFieldMatchData &matchB);
+};
+
+/**
+ * Implements the blueprint for proximity.
+ */
+class ProximityBlueprint : public search::fef::Blueprint {
+public:
+ /**
+ * Constructs a proximity blueprint.
+ */
+ ProximityBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::ANY).number().number();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const;
+
+private:
+ ProximityConfig _config;
+};
+
+}}
+
diff --git a/searchlib/src/vespa/searchlib/features/querycompletenessfeature.cpp b/searchlib/src/vespa/searchlib/features/querycompletenessfeature.cpp
new file mode 100644
index 00000000000..8d944f970c1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/querycompletenessfeature.cpp
@@ -0,0 +1,112 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.querycompleteness");
+
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include "querycompletenessfeature.h"
+#include "utils.h"
+#include <limits>
+
+namespace search {
+namespace features {
+
+QueryCompletenessConfig::QueryCompletenessConfig() :
+ fieldId(search::fef::IllegalHandle),
+ fieldBegin(0),
+ fieldEnd(std::numeric_limits<uint32_t>::max())
+{
+ // empty
+}
+
+QueryCompletenessExecutor::QueryCompletenessExecutor(const search::fef::IQueryEnvironment &env,
+ const QueryCompletenessConfig &config) :
+ search::fef::FeatureExecutor(),
+ _config(config),
+ _fieldHandles()
+{
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ const search::fef::TermFieldHandle handle = util::getTermFieldHandle(env, i, config.fieldId);
+ if (handle != search::fef::IllegalHandle) {
+ _fieldHandles.push_back(handle);
+ }
+ }
+}
+
+void
+QueryCompletenessExecutor::execute(search::fef::MatchData &match)
+{
+ uint32_t hit = 0, miss = 0;
+ for (std::vector<search::fef::TermFieldHandle>::iterator it = _fieldHandles.begin();
+ it != _fieldHandles.end(); ++it)
+ {
+ search::fef::TermFieldMatchData &tfmd = *match.resolveTermField(*it);
+ if (tfmd.getDocId() == match.getDocId()) {
+ search::fef::FieldPositionsIterator field = tfmd.getIterator();
+ while (field.valid() && field.getPosition() < _config.fieldBegin) {
+ field.next();
+ }
+ if (field.valid() && field.getPosition() < _config.fieldEnd) {
+ ++hit;
+ } else {
+ ++miss;
+ }
+ } else {
+ ++miss;
+ }
+ }
+ *match.resolveFeature(outputs()[0]) = hit;
+ *match.resolveFeature(outputs()[1]) = miss;
+}
+
+QueryCompletenessBlueprint::QueryCompletenessBlueprint() :
+ search::fef::Blueprint("queryCompleteness"),
+ _config()
+{
+ // empty
+}
+
+void
+QueryCompletenessBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const
+{
+ // empty
+}
+
+bool
+QueryCompletenessBlueprint::setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params)
+{
+ _config.fieldId = params[0].asField()->id();
+ if (params.size() > 1) {
+ _config.fieldBegin = params[1].asInteger();
+ if (params.size() == 3) {
+ _config.fieldEnd = params[2].asInteger();
+ }
+ if (_config.fieldBegin >= _config.fieldEnd) {
+ LOG(error, "Can not calculate query completeness for field '%s' because range is malformed (from %d to %d).",
+ params[0].getValue().c_str(), _config.fieldBegin, _config.fieldEnd);
+ return false;
+ }
+ }
+ describeOutput("hit", "The number of query terms matched in field.");
+ describeOutput("miss", "The number of query terms not matched in field.");
+ env.hintFieldAccess(_config.fieldId);
+ return true;
+}
+
+search::fef::Blueprint::UP
+QueryCompletenessBlueprint::createInstance() const
+{
+ return search::fef::Blueprint::UP(new QueryCompletenessBlueprint());
+}
+
+search::fef::FeatureExecutor::LP
+QueryCompletenessBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ return search::fef::FeatureExecutor::LP(new QueryCompletenessExecutor(env, _config));
+}
+
+}}
diff --git a/searchlib/src/vespa/searchlib/features/querycompletenessfeature.h b/searchlib/src/vespa/searchlib/features/querycompletenessfeature.h
new file mode 100644
index 00000000000..7d28c504e79
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/querycompletenessfeature.h
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the necessary config for query completeness.
+ */
+struct QueryCompletenessConfig {
+ QueryCompletenessConfig();
+
+ uint32_t fieldId; // The id of field to process.
+ uint32_t fieldBegin; // The first field token to evaluate.
+ uint32_t fieldEnd; // The last field token to evaluate.
+};
+
+/**
+ * Implements the executor for query completeness.
+ */
+class QueryCompletenessExecutor : public search::fef::FeatureExecutor {
+public:
+ /**
+ * Constructs an executor for query completenes.
+ *
+ * @param env The query environment.
+ * @param config The completeness config.
+ */
+ QueryCompletenessExecutor(const search::fef::IQueryEnvironment &env,
+ const QueryCompletenessConfig &config);
+ virtual void execute(search::fef::MatchData &data);
+
+private:
+ const QueryCompletenessConfig &_config;
+ std::vector<search::fef::TermFieldHandle> _fieldHandles;
+};
+
+/**
+ * Implements the blueprint for query completeness.
+ */
+class QueryCompletenessBlueprint : public search::fef::Blueprint {
+public:
+ /**
+ * Constructs a completeness blueprint.
+ */
+ QueryCompletenessBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().
+ desc().indexField(search::fef::ParameterCollection::ANY).
+ desc().indexField(search::fef::ParameterCollection::ANY).number().
+ desc().indexField(search::fef::ParameterCollection::ANY).number().number();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const;
+
+private:
+ QueryCompletenessConfig _config;
+};
+
+}}
+
diff --git a/searchlib/src/vespa/searchlib/features/queryfeature.cpp b/searchlib/src/vespa/searchlib/features/queryfeature.cpp
new file mode 100644
index 00000000000..e2dbc2d668c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/queryfeature.cpp
@@ -0,0 +1,161 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.queryfeature");
+#include "queryfeature.h"
+
+#include <vespa/searchlib/features/constant_tensor_executor.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/vespalib/tensor/tensor_type.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/tensor/default_tensor.h>
+#include <vespa/vespalib/tensor/tensor_mapper.h>
+#include <vespa/vespalib/tensor/serialization/typed_binary_format.h>
+#include <memory>
+#include "utils.h"
+#include "valuefeature.h"
+#include <vespa/vespalib/eval/value_type.h>
+#include <vespa/searchlib/fef/feature_type.h>
+
+using namespace search::fef;
+using namespace search::fef::indexproperties;
+using vespalib::tensor::DefaultTensor;
+using vespalib::tensor::TensorBuilder;
+using vespalib::tensor::TensorType;
+using vespalib::eval::ValueType;
+using search::fef::FeatureType;
+
+namespace search {
+namespace features {
+
+namespace {
+
+/**
+ * Convert a string to a feature value using special quoting
+ * mechanics; a string that can be converted directly into a feature
+ * (numeric value) will be converted. If the string cannot be
+ * converted directly, it will be hashed, after stripping the leading
+ * "'" if it exists.
+ *
+ * @return feature value
+ * @param str string value to be converted
+ **/
+feature_t asFeature(const vespalib::string &str) {
+ char *end;
+ errno = 0;
+ double val = strtod(str.c_str(), &end);
+ if (errno != 0 || *end != '\0') { // not happy
+ if (str.size() > 0 && str[0] == '\'') {
+ val = vespalib::hash_code(str.substr(1));
+ } else {
+ val = vespalib::hash_code(str);
+ }
+ }
+ return val;
+}
+
+} // namespace search::features::<unnamed>
+
+QueryBlueprint::QueryBlueprint() :
+ Blueprint("query"),
+ _key(),
+ _key2(),
+ _defaultValue(0),
+ _tensorType(TensorType::number())
+{
+}
+
+void
+QueryBlueprint::visitDumpFeatures(const IIndexEnvironment &,
+ IDumpFeatureVisitor &) const
+{
+}
+
+Blueprint::UP
+QueryBlueprint::createInstance() const
+{
+ return Blueprint::UP(new QueryBlueprint());
+}
+
+bool
+QueryBlueprint::setup(const IIndexEnvironment &env,
+ const ParameterList &params)
+{
+ _key = params[0].getValue();
+ _key2 = "$";
+ _key2.append(_key);
+
+ vespalib::string key3;
+ key3.append("query(");
+ key3.append(_key);
+ key3.append(")");
+ Property p = env.getProperties().lookup(key3);
+ if (!p.found()) {
+ p = env.getProperties().lookup(_key2);
+ }
+ if (p.found()) {
+ _defaultValue = asFeature(p.get());
+ }
+ vespalib::string queryFeatureType = type::QueryFeature::lookup(env.getProperties(), _key);
+ if (!queryFeatureType.empty()) {
+ _tensorType = TensorType::fromSpec(queryFeatureType);
+ }
+ FeatureType output_type = _tensorType.is_tensor()
+ ? FeatureType::object(_tensorType.as_value_type())
+ : FeatureType::number();
+ describeOutput("out", "The value looked up in query properties using the given key.",
+ output_type);
+ return true;
+}
+
+namespace {
+
+FeatureExecutor::LP
+createTensorExecutor(const search::fef::IQueryEnvironment &env,
+ const vespalib::string &queryKey,
+ const TensorType &tensorType)
+{
+ search::fef::Property prop = env.getProperties().lookup(queryKey);
+ if (prop.found() && !prop.get().empty()) {
+ DefaultTensor::builder tensorBuilder;
+ const vespalib::string &value = prop.get();
+ vespalib::nbostream stream(value.data(), value.size());
+ vespalib::tensor::TypedBinaryFormat::deserialize(stream, tensorBuilder);
+ vespalib::tensor::Tensor::UP tensor = tensorBuilder.build();
+ if (tensor->getType() != tensorType) {
+ vespalib::tensor::TensorMapper mapper(tensorType);
+ vespalib::tensor::Tensor::UP mappedTensor = mapper.map(*tensor);
+ tensor = std::move(mappedTensor);
+ }
+ return ConstantTensorExecutor::create(std::move(tensor));
+ }
+ return ConstantTensorExecutor::createEmpty();
+}
+
+}
+
+FeatureExecutor::LP
+QueryBlueprint::createExecutor(const IQueryEnvironment &env) const
+{
+ if (_tensorType.is_tensor()) {
+ return createTensorExecutor(env, _key, _tensorType);
+ } else {
+ std::vector<feature_t> values;
+ Property p = env.getProperties().lookup(_key);
+ if (!p.found()) {
+ p = env.getProperties().lookup(_key2);
+ }
+ if (p.found()) {
+ values.push_back(asFeature(p.get()));
+ } else {
+ values.push_back(_defaultValue);
+ }
+ return FeatureExecutor::LP(new ValueExecutor(values));
+ }
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/queryfeature.h b/searchlib/src/vespa/searchlib/features/queryfeature.h
new file mode 100644
index 00000000000..fa3194b30c5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/queryfeature.h
@@ -0,0 +1,52 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/vespalib/tensor/tensor_type.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the blueprint for the query feature.
+ *
+ * An executor of this outputs the value of a feature passed down with the query.
+ * This can either be a number or a tensor value.
+ */
+class QueryBlueprint : public search::fef::Blueprint {
+private:
+ vespalib::string _key; // 'foo'
+ vespalib::string _key2; // '$foo'
+ feature_t _defaultValue;
+ vespalib::tensor::TensorType _tensorType;
+
+public:
+ /**
+ * Constructs a query blueprint.
+ */
+ QueryBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().string();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const;
+};
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/queryterm.cpp b/searchlib/src/vespa/searchlib/features/queryterm.cpp
new file mode 100644
index 00000000000..f8c659be359
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/queryterm.cpp
@@ -0,0 +1,52 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.queryterm");
+#include "queryterm.h"
+#include "utils.h"
+
+using namespace search::fef;
+using search::feature_t;
+
+namespace search {
+namespace features {
+
+QueryTerm::QueryTerm() :
+ _termData(NULL),
+ _handle(IllegalHandle),
+ _significance(0),
+ _connectedness(0)
+{
+}
+
+QueryTerm::QueryTerm(const ITermData * td, feature_t sig, feature_t con) :
+ _termData(td),
+ _handle(IllegalHandle),
+ _significance(sig),
+ _connectedness(con)
+{
+}
+
+QueryTerm
+QueryTermFactory::create(const IQueryEnvironment & env,
+ uint32_t termIdx,
+ bool lookupSignificance,
+ bool lookupConnectedness)
+{
+ const ITermData *termData = env.getTerm(termIdx);
+ feature_t significance = 0;
+ if (lookupSignificance) {
+ feature_t fallback = util::getSignificance(*termData);
+ significance = util::lookupSignificance(env, termIdx, fallback);
+ }
+ feature_t connectedness = 0;
+ if (lookupConnectedness) {
+ connectedness = search::features::util::lookupConnectedness(env, termIdx);
+ }
+ return QueryTerm(termData, significance, connectedness);
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/queryterm.h b/searchlib/src/vespa/searchlib/features/queryterm.h
new file mode 100644
index 00000000000..ba92eeb3ec7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/queryterm.h
@@ -0,0 +1,64 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/common/feature.h>
+#include <vespa/searchlib/fef/iqueryenvironment.h>
+#include <vespa/searchlib/fef/itermdata.h>
+
+namespace search {
+namespace features {
+
+/**
+ * This class represents a query term with the relevant data. Now also
+ * with an optional attachment of a TermFieldData pointer.
+ */
+class QueryTerm {
+private:
+ const fef::ITermData *_termData;
+ fef::TermFieldHandle _handle;
+ feature_t _significance;
+ feature_t _connectedness;
+public:
+ QueryTerm();
+ QueryTerm(const fef::ITermData *td, feature_t sig = 0, feature_t con = 0);
+ const fef::ITermData *termData() const { return _termData; }
+ feature_t significance() const { return _significance; }
+ feature_t connectedness() const { return _connectedness; }
+ fef::TermFieldHandle fieldHandle() const { return _handle; }
+ void fieldHandle(fef::TermFieldHandle handle) { _handle = handle; }
+ void fieldHandle(const fef::ITermFieldData *fd) {
+ if (fd) {
+ _handle = fd->getHandle();
+ }
+ }
+};
+
+/**
+ * Convenience typedef for a vector of QueryTerm objects.
+ */
+typedef std::vector<QueryTerm> QueryTermVector;
+
+/**
+ * This class is a factory for creating QueryTerm objects.
+ */
+class QueryTermFactory {
+public:
+ /**
+ * Creates a new QueryTerm object for the term with the given term index.
+ *
+ * @param env the environment used to lookup TermData object, significance, and connectedness.
+ * @param termIndex the index to use when looking up the TermData object.
+ * @param lookupSignificance whether we should look up the significance for this term.
+ * @param lookupConnectedness whether we should look up the connectedness this term has with the previous term.
+ */
+ static QueryTerm create(const fef::IQueryEnvironment & env,
+ uint32_t termIndex,
+ bool lookupSignificance = true,
+ bool lookupConnectedness = false);
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/querytermcountfeature.cpp b/searchlib/src/vespa/searchlib/features/querytermcountfeature.cpp
new file mode 100644
index 00000000000..7069b4208be
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/querytermcountfeature.cpp
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.querytermcountfeature");
+
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/fieldtype.h>
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/searchlib/fef/handle.h>
+#include "querytermcountfeature.h"
+#include "valuefeature.h"
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+QueryTermCountBlueprint::QueryTermCountBlueprint() :
+ Blueprint("queryTermCount")
+{
+}
+
+void
+QueryTermCountBlueprint::visitDumpFeatures(const IIndexEnvironment & env,
+ IDumpFeatureVisitor & visitor) const
+{
+ (void) env;
+ visitor.visitDumpFeature(getBaseName());
+}
+
+Blueprint::UP
+QueryTermCountBlueprint::createInstance() const
+{
+ return Blueprint::UP(new QueryTermCountBlueprint());
+}
+
+bool
+QueryTermCountBlueprint::setup(const IIndexEnvironment &,
+ const ParameterList &)
+{
+ describeOutput("out", "The number of query terms found in the query environment.");
+ return true;
+}
+
+FeatureExecutor::LP
+QueryTermCountBlueprint::createExecutor(const IQueryEnvironment & env) const
+{
+ std::vector<feature_t> values;
+ values.push_back(static_cast<feature_t>(env.getNumTerms()));
+ return FeatureExecutor::LP(new ValueExecutor(values));
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/querytermcountfeature.h b/searchlib/src/vespa/searchlib/features/querytermcountfeature.h
new file mode 100644
index 00000000000..54cfee056f0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/querytermcountfeature.h
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/common/feature.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the blueprint for the query term count feature.
+ */
+class QueryTermCountBlueprint : public search::fef::Blueprint {
+private:
+public:
+ /**
+ * Constructs a blueprint.
+ */
+ QueryTermCountBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/randomfeature.cpp b/searchlib/src/vespa/searchlib/features/randomfeature.cpp
new file mode 100644
index 00000000000..2c470808a62
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/randomfeature.cpp
@@ -0,0 +1,87 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.randomfeature");
+#include "randomfeature.h"
+#include "utils.h"
+#include <vespa/searchlib/fef/properties.h>
+
+namespace search {
+namespace features {
+
+RandomExecutor::RandomExecutor(uint64_t seed, uint64_t matchSeed) :
+ search::fef::FeatureExecutor(),
+ _rnd(),
+ _matchRnd(),
+ _matchSeed(matchSeed)
+{
+ LOG(debug, "RandomExecutor: seed=%" PRIu64 ", matchSeed=%" PRIu64,
+ seed, matchSeed);
+ _rnd.srand48(seed);
+}
+
+void
+RandomExecutor::execute(search::fef::MatchData & match)
+{
+ feature_t rndScore = _rnd.lrand48() / (feature_t)0x80000000u; // 2^31
+ _matchRnd.srand48(_matchSeed + match.getDocId());
+ feature_t matchRndScore = _matchRnd.lrand48() / (feature_t)0x80000000u; // 2^31
+ //LOG(debug, "execute: %f", rndScore);
+ *match.resolveFeature(outputs()[0]) = rndScore;
+ *match.resolveFeature(outputs()[1]) = matchRndScore;
+}
+
+
+RandomBlueprint::RandomBlueprint() :
+ search::fef::Blueprint("random"),
+ _seed(0)
+{
+ // empty
+}
+
+void
+RandomBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const
+{
+ // empty
+}
+
+search::fef::Blueprint::UP
+RandomBlueprint::createInstance() const
+{
+ return search::fef::Blueprint::UP(new RandomBlueprint());
+}
+
+bool
+RandomBlueprint::setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList &)
+{
+ search::fef::Property p = env.getProperties().lookup(getName(), "seed");
+ if (p.found()) {
+ _seed = util::strToNum<uint64_t>(p.get());
+ }
+ describeOutput("out" , "A random value in the interval [0, 1>");
+ describeOutput("match" , "A random value in the interval [0, 1> that is stable for a given match (document and query)");
+ return true;
+}
+
+search::fef::FeatureExecutor::LP
+RandomBlueprint::createExecutor(const search::fef::IQueryEnvironment & env) const
+{
+ uint64_t seed = _seed;
+ if (seed == 0) {
+ FastOS_Time time;
+ time.SetNow();
+ seed = static_cast<uint64_t>(time.MicroSecs()) ^
+ reinterpret_cast<uint64_t>(&seed); // results in different seeds in different threads
+ }
+ uint64_t matchSeed = util::strToNum<uint64_t>
+ (env.getProperties().lookup(getName(), "match", "seed").get("1024")); // default seed
+
+ return search::fef::FeatureExecutor::LP(new RandomExecutor(seed, matchSeed));
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/randomfeature.h b/searchlib/src/vespa/searchlib/features/randomfeature.h
new file mode 100644
index 00000000000..f2932876a10
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/randomfeature.h
@@ -0,0 +1,71 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/util/rand48.h>
+
+namespace search {
+namespace features {
+
+
+/**
+ * Implements the executor for the random feature outputting a number in the interval [0, 1>.
+ **/
+class RandomExecutor : public search::fef::FeatureExecutor {
+private:
+ Rand48 _rnd; // seeded once per query
+ Rand48 _matchRnd; // seeded once per match
+ uint64_t _matchSeed;
+
+public:
+ /**
+ * Constructs a new executor.
+ **/
+ RandomExecutor(uint64_t seed, uint64_t matchSeed);
+ virtual void execute(search::fef::MatchData & data);
+};
+
+
+/**
+ * Implements the blueprint for the random feature.
+ */
+class RandomBlueprint : public search::fef::Blueprint {
+private:
+ uint64_t _seed;
+
+public:
+ /**
+ * Constructs a new blueprint.
+ */
+ RandomBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().
+ desc().
+ desc().string(); // in order to name different features
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/rankingexpression/.gitignore b/searchlib/src/vespa/searchlib/features/rankingexpression/.gitignore
new file mode 100644
index 00000000000..31d063a8460
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/rankingexpression/.gitignore
@@ -0,0 +1,6 @@
+*.So
+.depend
+Makefile
+lex.yy.cpp
+parser.tab.cpp
+parser.tab.h
diff --git a/searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt
new file mode 100644
index 00000000000..2853a06c49e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt
@@ -0,0 +1,6 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_rankingexpression OBJECT
+ SOURCES
+ feature_name_extractor.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.cpp b/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.cpp
new file mode 100644
index 00000000000..4dc5124df67
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.cpp
@@ -0,0 +1,87 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "feature_name_extractor.h"
+
+namespace search {
+namespace features {
+namespace rankingexpression {
+
+namespace {
+
+struct LegalChar {
+ bool legal[256];
+ LegalChar(std::initializer_list<uint8_t> extra_chars) {
+ for (int c = 0; c < 256; ++c) {
+ legal[c] = isalnum(c);
+ }
+ for (uint8_t c: extra_chars) {
+ legal[c] = true;
+ }
+ }
+ bool is_legal(uint8_t c) { return legal[c]; }
+};
+
+static LegalChar prefix({'_', '$', '@'});
+static LegalChar suffix({'_', '.', '$', '@'});
+
+struct CountParen {
+ size_t depth = 0;
+ bool quoted = false;
+ bool escaped = false;
+ bool done(char c) {
+ if (quoted) {
+ if (escaped) {
+ escaped = false;
+ } else {
+ if (c == '\\') {
+ escaped = true;
+ } else if (c == '"') {
+ quoted = false;
+ }
+ }
+ } else {
+ if (c == '"') {
+ quoted = true;
+ } else if (c == '(') {
+ ++depth;
+ } else if (c == ')') {
+ if (--depth == 0) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+};
+
+} // namespace <unnamed>
+
+void
+FeatureNameExtractor::extract_symbol(const char *pos_in, const char *end_in,
+ const char *&pos_out, vespalib::string &symbol_out) const
+{
+ while ((pos_in < end_in) && prefix.is_legal(*pos_in)) {
+ symbol_out.push_back(*pos_in++);
+ }
+ if ((pos_in < end_in) && (*pos_in == '(')) {
+ CountParen paren;
+ while (pos_in < end_in) {
+ symbol_out.push_back(*pos_in);
+ if (paren.done(*pos_in++)) {
+ break;
+ }
+ }
+ }
+ if ((pos_in < end_in) && (*pos_in == '.')) {
+ symbol_out.push_back(*pos_in++);
+ while ((pos_in < end_in) && suffix.is_legal(*pos_in)) {
+ symbol_out.push_back(*pos_in++);
+ }
+ }
+ pos_out = pos_in;
+}
+
+} // namespace rankingexpression
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h b/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h
new file mode 100644
index 00000000000..34551cc8503
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/eval/function.h>
+
+namespace search {
+namespace features {
+namespace rankingexpression {
+
+/**
+ * Custom symbol extractor used to extract ranking feature names when
+ * parsing ranking expressions.
+ **/
+struct FeatureNameExtractor : public vespalib::eval::SymbolExtractor {
+ virtual void extract_symbol(const char *pos_in, const char *end_in,
+ const char *&pos_out, vespalib::string &symbol_out) const;
+};
+
+} // namespace rankingexpression
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp
new file mode 100644
index 00000000000..80724d2d3ba
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp
@@ -0,0 +1,170 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.rankingexpression");
+
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/features/rankingexpression/feature_name_extractor.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/eval/function.h>
+#include <vespa/vespalib/eval/compiled_function.h>
+#include <vespa/vespalib/eval/compile_cache.h>
+#include <vespa/vespalib/eval/node_types.h>
+#include "rankingexpressionfeature.h"
+#include "utils.h"
+#include <stdexcept>
+#include <vespa/vespalib/eval/value_type.h>
+#include <vespa/searchlib/fef/feature_type.h>
+#include <vespa/vespalib/tensor/default_tensor_engine.h>
+
+using vespalib::eval::Function;
+using vespalib::eval::PassParams;
+using vespalib::eval::CompileCache;
+using vespalib::eval::CompiledFunction;
+using vespalib::eval::InterpretedFunction;
+using vespalib::eval::ValueType;
+using vespalib::eval::NodeTypes;
+using vespalib::tensor::DefaultTensorEngine;
+using search::fef::FeatureType;
+
+namespace search {
+namespace features {
+
+//-----------------------------------------------------------------------------
+
+CompiledRankingExpressionExecutor::CompiledRankingExpressionExecutor(const vespalib::eval::CompiledFunction &compiled_function)
+ : _ranking_function(compiled_function.get_function()),
+ _params(compiled_function.num_params(), 0.0)
+{
+}
+
+void
+CompiledRankingExpressionExecutor::execute(search::fef::MatchData &data)
+{
+ for (size_t i = 0; i < _params.size(); ++i) {
+ _params[i] = *data.resolveFeature(inputs()[i]);
+ }
+ *data.resolveFeature(outputs()[0]) = _ranking_function(&_params[0]);
+}
+
+//-----------------------------------------------------------------------------
+
+InterpretedRankingExpressionExecutor::InterpretedRankingExpressionExecutor(const vespalib::eval::InterpretedFunction &function)
+ : _context(),
+ _function(function)
+{
+}
+
+void
+InterpretedRankingExpressionExecutor::execute(search::fef::MatchData &data)
+{
+ _context.clear_params();
+ for (size_t i = 0; i < _function.num_params(); ++i) {
+ if (data.feature_is_object(inputs()[i])) {
+ _context.add_param(*data.resolve_object_feature(inputs()[i]));
+ } else {
+ _context.add_param(*data.resolveFeature(inputs()[i]));
+ }
+ }
+ *data.resolve_object_feature(outputs()[0]) = _function.eval(_context);
+}
+
+//-----------------------------------------------------------------------------
+
+RankingExpressionBlueprint::RankingExpressionBlueprint()
+ : search::fef::Blueprint("rankingExpression"),
+ _interpreted_function(),
+ _compile_token()
+{
+}
+
+void
+RankingExpressionBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const
+{
+ // empty
+}
+
+bool
+RankingExpressionBlueprint::setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params)
+{
+ // Retrieve and concatenate whatever config is available.
+ vespalib::string script = "";
+ search::fef::Property property = env.getProperties().lookup(getName(), "rankingScript");
+ if (property.size() > 0) {
+ for (uint32_t i = 0; i < property.size(); ++i) {
+ script.append(property.getAt(i));
+ }
+ //LOG(debug, "Script from config: '%s'\n", script.c_str());
+ } else if (params.size() == 1) {
+ script = params[0].getValue();
+ //LOG(debug, "Script from param: '%s'\n", script.c_str());
+ } else {
+ LOG(error, "No expression given.");
+ return false;
+ }
+ Function rank_function = Function::parse(script, rankingexpression::FeatureNameExtractor());
+ if (rank_function.has_error()) {
+ LOG(error, "Failed to parse expression '%s': %s", script.c_str(), rank_function.get_error().c_str());
+ return false;
+ }
+ bool do_compile = true;
+ std::vector<ValueType> input_types;
+ for (size_t i = 0; i < rank_function.num_params(); ++i) {
+ const FeatureType &input = defineInput(rank_function.param_name(i), AcceptInput::ANY);
+ if (input.is_object()) {
+ do_compile = false;
+ input_types.push_back(input.type());
+ } else {
+ input_types.push_back(ValueType::double_type());
+ }
+ }
+ NodeTypes node_types(rank_function, input_types);
+ if (!node_types.all_types_are_double()) {
+ do_compile = false;
+ }
+ ValueType root_type = node_types.get_type(rank_function.root());
+ if (root_type.is_error()) {
+ LOG(error, "rank expression contains type errors: %s\n", script.c_str());
+ return false;
+ }
+ if (root_type.is_any()) {
+ LOG(warning, "rank expression could produce run-time type errors: %s\n", script.c_str());
+ }
+ // avoid costly compilation when only verifying setup
+ if (env.getFeatureMotivation() != env.FeatureMotivation::VERIFY_SETUP) {
+ if (do_compile) {
+ _compile_token = CompileCache::compile(rank_function, PassParams::ARRAY);
+ } else {
+ _interpreted_function.reset(new InterpretedFunction(DefaultTensorEngine::ref(), rank_function));
+ }
+ }
+ FeatureType output_type = do_compile
+ ? FeatureType::number()
+ : FeatureType::object(root_type);
+ describeOutput("out", "The result of running the contained ranking expression.", output_type);
+ return true;
+}
+
+search::fef::Blueprint::UP
+RankingExpressionBlueprint::createInstance() const
+{
+ return search::fef::Blueprint::UP(new RankingExpressionBlueprint());
+}
+
+search::fef::FeatureExecutor::LP
+RankingExpressionBlueprint::createExecutor(const search::fef::IQueryEnvironment &) const
+{
+ if (_interpreted_function) {
+ return search::fef::FeatureExecutor::LP(new InterpretedRankingExpressionExecutor(*_interpreted_function));
+ }
+ assert(_compile_token.get() != nullptr); // will be nullptr for VERIFY_SETUP feature motivation
+ return search::fef::FeatureExecutor::LP(new CompiledRankingExpressionExecutor(_compile_token->get()));
+}
+
+//-----------------------------------------------------------------------------
+
+} // features
+} // search
diff --git a/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h
new file mode 100644
index 00000000000..af60c0de456
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h
@@ -0,0 +1,88 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/vespalib/eval/compiled_function.h>
+#include <vespa/vespalib/eval/interpreted_function.h>
+#include <vespa/vespalib/eval/compile_cache.h>
+
+namespace search {
+namespace features {
+
+//-----------------------------------------------------------------------------
+
+/**
+ * Implements the executor for compiled ranking expressions
+ **/
+class CompiledRankingExpressionExecutor : public search::fef::FeatureExecutor
+{
+private:
+ typedef double (*arr_function)(const double *);
+ arr_function _ranking_function;
+ std::vector<double> _params;
+
+public:
+ CompiledRankingExpressionExecutor(const vespalib::eval::CompiledFunction &compiled_function);
+ virtual void execute(search::fef::MatchData &data);
+};
+
+//-----------------------------------------------------------------------------
+
+/**
+ * Implements the executor for interpreted ranking expressions (with tensor support)
+ **/
+class InterpretedRankingExpressionExecutor : public search::fef::FeatureExecutor
+{
+private:
+ vespalib::eval::InterpretedFunction::Context _context;
+ const vespalib::eval::InterpretedFunction &_function;
+
+public:
+ InterpretedRankingExpressionExecutor(const vespalib::eval::InterpretedFunction &function);
+ virtual void execute(search::fef::MatchData &data);
+};
+
+//-----------------------------------------------------------------------------
+
+/**
+ * Implements the blueprint for ranking expression.
+ */
+class RankingExpressionBlueprint : public search::fef::Blueprint
+{
+private:
+ vespalib::eval::InterpretedFunction::UP _interpreted_function;
+ vespalib::eval::CompileCache::Token::UP _compile_token;
+
+public:
+ /**
+ * Constructs a ranking expression blueprint.
+ */
+ RankingExpressionBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().
+ desc().
+ desc().string();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const;
+};
+
+//-----------------------------------------------------------------------------
+
+} // features
+} // search
diff --git a/searchlib/src/vespa/searchlib/features/raw_score_feature.cpp b/searchlib/src/vespa/searchlib/features/raw_score_feature.cpp
new file mode 100644
index 00000000000..c6689a45ecd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/raw_score_feature.cpp
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.raw_score_feature");
+#include "raw_score_feature.h"
+#include "utils.h"
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+RawScoreExecutor::RawScoreExecutor(const search::fef::IQueryEnvironment &env, uint32_t fieldId)
+ : FeatureExecutor(),
+ _handles()
+{
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ search::fef::TermFieldHandle handle = util::getTermFieldHandle(env, i, fieldId);
+ if (handle != search::fef::IllegalHandle) {
+ _handles.push_back(handle);
+ }
+ }
+}
+
+void
+RawScoreExecutor::execute(MatchData &data)
+{
+ feature_t output = 0.0;
+ for (uint32_t i = 0; i < _handles.size(); ++i) {
+ const TermFieldMatchData *tfmd = data.resolveTermField(_handles[i]);
+ if (tfmd->getDocId() == data.getDocId()) {
+ output += tfmd->getRawScore();
+ }
+ }
+ *data.resolveFeature(outputs()[0]) = output;
+}
+
+//-----------------------------------------------------------------------------
+
+bool
+RawScoreBlueprint::setup(const IIndexEnvironment &,
+ const ParameterList &params)
+{
+ _field = params[0].asField();
+ describeOutput("out", "accumulated raw score for the given field");
+ return true;
+}
+
+FeatureExecutor::LP
+RawScoreBlueprint::createExecutor(const IQueryEnvironment &queryEnv) const
+{
+ return FeatureExecutor::LP(new RawScoreExecutor(queryEnv, _field->id()));
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/raw_score_feature.h b/searchlib/src/vespa/searchlib/features/raw_score_feature.h
new file mode 100644
index 00000000000..f357989dda6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/raw_score_feature.h
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace features {
+
+class RawScoreExecutor : public search::fef::FeatureExecutor
+{
+private:
+ std::vector<search::fef::TermFieldHandle> _handles;
+public:
+ RawScoreExecutor(const search::fef::IQueryEnvironment &env, uint32_t fieldId);
+ virtual void execute(search::fef::MatchData &data);
+};
+
+//-----------------------------------------------------------------------------
+
+class RawScoreBlueprint : public search::fef::Blueprint
+{
+private:
+ const search::fef::FieldInfo *_field;
+public:
+ RawScoreBlueprint() : Blueprint("rawScore"), _field(0) {}
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const {}
+ virtual search::fef::Blueprint::UP createInstance() const {
+ return Blueprint::UP(new RawScoreBlueprint());
+ }
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().field();
+ }
+ virtual bool setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params);
+ virtual search::fef::FeatureExecutor::LP
+ createExecutor(const search::fef::IQueryEnvironment &env) const;
+};
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/reverseproximityfeature.cpp b/searchlib/src/vespa/searchlib/features/reverseproximityfeature.cpp
new file mode 100644
index 00000000000..5a297a8da8a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/reverseproximityfeature.cpp
@@ -0,0 +1,136 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.reverseproximity");
+
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/fieldtype.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include "reverseproximityfeature.h"
+#include "utils.h"
+
+namespace search {
+namespace features {
+
+ReverseProximityConfig::ReverseProximityConfig() :
+ fieldId(search::fef::IllegalHandle),
+ termA(std::numeric_limits<uint32_t>::max()),
+ termB(std::numeric_limits<uint32_t>::max())
+{
+ // empty
+}
+
+ReverseProximityExecutor::ReverseProximityExecutor(const search::fef::IQueryEnvironment &env,
+ const ReverseProximityConfig &config) :
+ search::fef::FeatureExecutor(),
+ _config(config),
+ _termA(util::getTermFieldHandle(env, _config.termA, _config.fieldId)),
+ _termB(util::getTermFieldHandle(env, _config.termB, _config.fieldId))
+{
+}
+
+void
+ReverseProximityExecutor::execute(search::fef::MatchData &match)
+{
+ // Cannot calculate proximity in this case
+ if (_termA == search::fef::IllegalHandle || _termB == search::fef::IllegalHandle) {
+ *match.resolveFeature(outputs()[0]) = util::FEATURE_MAX; // out
+ *match.resolveFeature(outputs()[1]) = util::FEATURE_MIN; // posA
+ *match.resolveFeature(outputs()[2]) = util::FEATURE_MAX; // posB
+ return;
+ }
+
+ // Look for an initial pair to use as guess.
+ uint32_t posA = 0, posB = 0;
+ search::fef::FieldPositionsIterator itA, itB;
+ search::fef::TermFieldMatchData &matchA = *match.resolveTermField(_termA);
+ search::fef::TermFieldMatchData &matchB = *match.resolveTermField(_termB);
+ if (matchA.getDocId() == match.getDocId() && matchB.getDocId() == match.getDocId()) {
+ itA = matchA.getIterator();
+ itB = matchB.getIterator();
+ if (itA.valid() && itB.valid()) {
+ for(posA = itA.getPosition(), posB = itB.getPosition();
+ itA.valid() && itA.getPosition() < posB; itA.next())
+ {
+ // empty
+ }
+ }
+ }
+ //LOG(debug, "Initial guess; posA is '%u' and posB is '%u'.", posA, posB);
+
+ // _P_A_R_A_N_O_I_A_
+ if (!itA.valid() || !itB.valid()) {
+ //LOG(debug, "Initial guess is invalid.");
+ *match.resolveFeature(outputs()[0]) = util::FEATURE_MAX; // out
+ *match.resolveFeature(outputs()[1]) = util::FEATURE_MIN; // posA
+ *match.resolveFeature(outputs()[2]) = util::FEATURE_MAX; // posB
+ return;
+ }
+
+ // Look for optimal positions for term A and B.
+ uint32_t optA = posA, optB = posB;
+ while (itA.valid() && itB.valid()) {
+ uint32_t a = itA.getPosition(), b = itB.getPosition();
+ if (b < posA) {
+ posB = b;
+ itB.next();
+ }
+ else {
+ if (posA - posB < optA - optB) {
+ optA = posA;
+ optB = posB;
+ }
+ posA = a;
+ itA.next();
+ }
+ }
+
+ // Output proximity score.
+ *match.resolveFeature(outputs()[0]) = optA - optB;
+ *match.resolveFeature(outputs()[1]) = optA;
+ *match.resolveFeature(outputs()[2]) = optB;
+}
+
+ReverseProximityBlueprint::ReverseProximityBlueprint() :
+ search::fef::Blueprint("reverseProximity"),
+ _config()
+{
+ // empty
+}
+
+void
+ReverseProximityBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const
+{
+ // empty
+}
+
+bool
+ReverseProximityBlueprint::setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params)
+{
+ _config.fieldId = params[0].asField()->id();
+ _config.termA = params[1].asInteger();
+ _config.termB = params[2].asInteger();
+ describeOutput("out" , "The reverse proximity of the query terms.");
+ describeOutput("posA", "The best position of the first query term.");
+ describeOutput("posB", "The best position of the second query term.");
+ env.hintFieldAccess(_config.fieldId);
+ return true;
+}
+
+search::fef::Blueprint::UP
+ReverseProximityBlueprint::createInstance() const
+{
+ return search::fef::Blueprint::UP(new ReverseProximityBlueprint());
+}
+
+search::fef::FeatureExecutor::LP
+ReverseProximityBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ return search::fef::FeatureExecutor::LP(new ReverseProximityExecutor(env, _config));
+}
+
+}}
diff --git a/searchlib/src/vespa/searchlib/features/reverseproximityfeature.h b/searchlib/src/vespa/searchlib/features/reverseproximityfeature.h
new file mode 100644
index 00000000000..4311af3dc81
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/reverseproximityfeature.h
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the necessary config for reverse proximity.
+ */
+struct ReverseProximityConfig {
+ ReverseProximityConfig();
+
+ uint32_t fieldId; // The id of field to process.
+ uint32_t termA; // The id of the first query term in the pair (a, b).
+ uint32_t termB; // The id of the second query term.
+};
+
+/**
+ * Implements the executor for reverse proximity.
+ */
+class ReverseProximityExecutor : public search::fef::FeatureExecutor {
+public:
+ /**
+ * Constructs an executor for reverse proximity.
+ *
+ * @param env The query environment.
+ * @param config The completeness config.
+ */
+ ReverseProximityExecutor(const search::fef::IQueryEnvironment &env,
+ const ReverseProximityConfig &config);
+ virtual void execute(search::fef::MatchData &data);
+
+private:
+ const ReverseProximityConfig &_config; // The proximity config.
+ search::fef::TermFieldHandle _termA; // Handle to the first query term.
+ search::fef::TermFieldHandle _termB; // Handle to the second query term.
+};
+
+/**
+ * Implements the blueprint for proximity.
+ */
+class ReverseProximityBlueprint : public search::fef::Blueprint {
+public:
+ /**
+ * Constructs a blueprint for reverse proximity.
+ */
+ ReverseProximityBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::ANY).number().number();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const;
+
+private:
+ ReverseProximityConfig _config;
+};
+
+}}
+
diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp
new file mode 100644
index 00000000000..e05569c0b6d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/setup.cpp
@@ -0,0 +1,115 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "setup.h"
+
+#include "agefeature.h"
+#include "attributefeature.h"
+#include "attributematchfeature.h"
+#include "closenessfeature.h"
+#include "debug_attribute_wait.h"
+#include "debug_wait.h"
+#include "distancefeature.h"
+#include "distancetopathfeature.h"
+#include "dotproductfeature.h"
+#include "element_completeness_feature.h"
+#include "element_similarity_feature.h"
+#include "euclidean_distance_feature.h"
+#include "fieldinfofeature.h"
+#include "fieldlengthfeature.h"
+#include "fieldmatchfeature.h"
+#include "fieldtermmatchfeature.h"
+#include "firstphasefeature.h"
+#include "flow_completeness_feature.h"
+#include "foreachfeature.h"
+#include "freshnessfeature.h"
+#include "item_raw_score_feature.h"
+#include "jarowinklerdistancefeature.h"
+#include "matchesfeature.h"
+#include "matchfeature.h"
+#include "native_dot_product_feature.h"
+#include "nativeattributematchfeature.h"
+#include "nativefieldmatchfeature.h"
+#include "nativeproximityfeature.h"
+#include "nativerankfeature.h"
+#include "nowfeature.h"
+#include "proximityfeature.h"
+#include "querycompletenessfeature.h"
+#include "queryfeature.h"
+#include "querytermcountfeature.h"
+#include "randomfeature.h"
+#include "rankingexpressionfeature.h"
+#include "raw_score_feature.h"
+#include "reverseproximityfeature.h"
+#include "subqueries_feature.h"
+#include "tensor_from_labels_feature.h"
+#include "tensor_from_weighted_set_feature.h"
+#include "term_field_md_feature.h"
+#include "termdistancefeature.h"
+#include "termeditdistancefeature.h"
+#include "termfeature.h"
+#include "terminfofeature.h"
+#include "text_similarity_feature.h"
+#include "valuefeature.h"
+
+using search::fef::Blueprint;
+
+namespace search {
+namespace features {
+
+void setup_search_features(fef::IBlueprintRegistry & registry)
+{
+ // Prod features.
+ registry.addPrototype(Blueprint::SP(new AgeBlueprint()));
+ registry.addPrototype(Blueprint::SP(new AttributeBlueprint()));
+ registry.addPrototype(Blueprint::SP(new AttributeMatchBlueprint()));
+ registry.addPrototype(Blueprint::SP(new ClosenessBlueprint()));
+ registry.addPrototype(Blueprint::SP(new DistanceBlueprint()));
+ registry.addPrototype(Blueprint::SP(new DistanceToPathBlueprint()));
+ registry.addPrototype(Blueprint::SP(new DebugAttributeWaitBlueprint()));
+ registry.addPrototype(Blueprint::SP(new DebugWaitBlueprint()));
+ registry.addPrototype(Blueprint::SP(new DotProductBlueprint()));
+ registry.addPrototype(Blueprint::SP(new ElementCompletenessBlueprint()));
+ registry.addPrototype(Blueprint::SP(new ElementSimilarityBlueprint()));
+ registry.addPrototype(Blueprint::SP(new EuclideanDistanceBlueprint()));
+ registry.addPrototype(Blueprint::SP(new FieldInfoBlueprint()));
+ registry.addPrototype(Blueprint::SP(new FlowCompletenessBlueprint()));
+ registry.addPrototype(Blueprint::SP(new FieldLengthBlueprint()));
+ registry.addPrototype(Blueprint::SP(new FieldMatchBlueprint()));
+ registry.addPrototype(Blueprint::SP(new FieldTermMatchBlueprint()));
+ registry.addPrototype(Blueprint::SP(new FirstPhaseBlueprint()));
+ registry.addPrototype(Blueprint::SP(new ForeachBlueprint()));
+ registry.addPrototype(Blueprint::SP(new FreshnessBlueprint()));
+ registry.addPrototype(Blueprint::SP(new ItemRawScoreBlueprint()));
+ registry.addPrototype(Blueprint::SP(new MatchesBlueprint()));
+ registry.addPrototype(Blueprint::SP(new MatchBlueprint()));
+ registry.addPrototype(Blueprint::SP(new NativeAttributeMatchBlueprint()));
+ registry.addPrototype(Blueprint::SP(new NativeDotProductBlueprint()));
+ registry.addPrototype(Blueprint::SP(new NativeFieldMatchBlueprint()));
+ registry.addPrototype(Blueprint::SP(new NativeProximityBlueprint()));
+ registry.addPrototype(Blueprint::SP(new NativeRankBlueprint()));
+ registry.addPrototype(Blueprint::SP(new NowBlueprint()));
+ registry.addPrototype(Blueprint::SP(new QueryBlueprint()));
+ registry.addPrototype(Blueprint::SP(new QueryTermCountBlueprint()));
+ registry.addPrototype(Blueprint::SP(new RandomBlueprint()));
+ registry.addPrototype(Blueprint::SP(new RankingExpressionBlueprint()));
+ registry.addPrototype(Blueprint::SP(new RawScoreBlueprint()));
+ registry.addPrototype(Blueprint::SP(new SubqueriesBlueprint));
+ registry.addPrototype(Blueprint::SP(new TensorFromLabelsBlueprint()));
+ registry.addPrototype(Blueprint::SP(new TensorFromWeightedSetBlueprint()));
+ registry.addPrototype(Blueprint::SP(new TermBlueprint()));
+ registry.addPrototype(Blueprint::SP(new TermDistanceBlueprint()));
+ registry.addPrototype(Blueprint::SP(new TermInfoBlueprint()));
+ registry.addPrototype(Blueprint::SP(new TextSimilarityBlueprint()));
+ registry.addPrototype(Blueprint::SP(new ValueBlueprint()));
+
+ // Beta features.
+ registry.addPrototype(Blueprint::SP(new JaroWinklerDistanceBlueprint()));
+ registry.addPrototype(Blueprint::SP(new ProximityBlueprint()));
+ registry.addPrototype(Blueprint::SP(new QueryCompletenessBlueprint()));
+ registry.addPrototype(Blueprint::SP(new ReverseProximityBlueprint()));
+ registry.addPrototype(Blueprint::SP(new TermEditDistanceBlueprint()));
+ registry.addPrototype(Blueprint::SP(new TermFieldMdBlueprint()));
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/setup.h b/searchlib/src/vespa/searchlib/features/setup.h
new file mode 100644
index 00000000000..34e36ed5746
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/setup.h
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/iblueprintregistry.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Adds prototypes for all features in this library to the given registry.
+ *
+ * @param registry The blueprint registry to add prototypes to.
+ **/
+void setup_search_features(fef::IBlueprintRegistry & registry);
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/subqueries_feature.cpp b/searchlib/src/vespa/searchlib/features/subqueries_feature.cpp
new file mode 100644
index 00000000000..f6736f2cac9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/subqueries_feature.cpp
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.subqueries_feature");
+#include "subqueries_feature.h"
+#include "utils.h"
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+SubqueriesExecutor::SubqueriesExecutor(const IQueryEnvironment &env,
+ uint32_t fieldId)
+ : FeatureExecutor(),
+ _handles() {
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ TermFieldHandle handle = util::getTermFieldHandle(env, i, fieldId);
+ if (handle != IllegalHandle) {
+ _handles.push_back(handle);
+ }
+ }
+}
+
+void SubqueriesExecutor::execute(MatchData &data) {
+ uint32_t lsb = 0;
+ uint32_t msb = 0;
+ for (uint32_t i = 0; i < _handles.size(); ++i) {
+ const TermFieldMatchData *tfmd = data.resolveTermField(_handles[i]);
+ if (tfmd->getDocId() == data.getDocId()) {
+ lsb |= static_cast<uint32_t>(tfmd->getSubqueries());
+ msb |= tfmd->getSubqueries() >> 32;
+ }
+ }
+ *data.resolveFeature(outputs()[0]) = lsb;
+ *data.resolveFeature(outputs()[1]) = msb;
+}
+
+//-----------------------------------------------------------------------------
+
+bool SubqueriesBlueprint::setup(const IIndexEnvironment &,
+ const ParameterList &params) {
+ _field = params[0].asField();
+ describeOutput("lsb", "32 least significant bits of the subquery bitmap"
+ " for the given field");
+ describeOutput("msb", "32 most significant bits of the subquery bitmap"
+ " for the given field");
+ return true;
+}
+
+FeatureExecutor::LP
+SubqueriesBlueprint::createExecutor(const IQueryEnvironment &queryEnv) const {
+ return FeatureExecutor::LP(new SubqueriesExecutor(queryEnv, _field->id()));
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/subqueries_feature.h b/searchlib/src/vespa/searchlib/features/subqueries_feature.h
new file mode 100644
index 00000000000..2ac727ba4b3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/subqueries_feature.h
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace features {
+
+class SubqueriesExecutor : public search::fef::FeatureExecutor {
+ std::vector<search::fef::TermFieldHandle> _handles;
+public:
+ SubqueriesExecutor(const search::fef::IQueryEnvironment &env,
+ uint32_t fieldId);
+ virtual void execute(search::fef::MatchData &data);
+};
+
+//-----------------------------------------------------------------------------
+
+class SubqueriesBlueprint : public search::fef::Blueprint
+{
+private:
+ const search::fef::FieldInfo *_field;
+public:
+ SubqueriesBlueprint() : Blueprint("subqueries"), _field(nullptr) {}
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const {}
+ virtual search::fef::Blueprint::UP createInstance() const {
+ return Blueprint::UP(new SubqueriesBlueprint);
+ }
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().field();
+ }
+ virtual bool setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params);
+ virtual search::fef::FeatureExecutor::LP
+ createExecutor(const search::fef::IQueryEnvironment &env) const;
+};
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp
new file mode 100644
index 00000000000..addb90426d7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.tensor_factory_blueprint");
+
+#include "tensor_factory_blueprint.h"
+
+#include <vespa/vespalib/eval/function.h>
+
+using namespace search::fef;
+using vespalib::eval::Function;
+
+namespace search {
+namespace features {
+
+vespalib::string TensorFactoryBlueprint::ATTRIBUTE_SOURCE = "attribute";
+vespalib::string TensorFactoryBlueprint::QUERY_SOURCE = "query";
+
+bool
+TensorFactoryBlueprint::extractSource(const vespalib::string &source)
+{
+ vespalib::string error;
+ bool unwrapOk = Function::unwrap(source, _sourceType, _sourceParam, error);
+ if (!unwrapOk) {
+ LOG(error, "Failed to extract source param: '%s'", error.c_str());
+ return false;
+ }
+ if (_sourceType != ATTRIBUTE_SOURCE && _sourceType != QUERY_SOURCE) {
+ LOG(error, "Expected source type '%s' or '%s', but it was '%s'",
+ ATTRIBUTE_SOURCE.c_str(), QUERY_SOURCE.c_str(), _sourceType.c_str());
+ return false;
+ }
+ return true;
+}
+
+TensorFactoryBlueprint::TensorFactoryBlueprint(const vespalib::string &baseName)
+ : Blueprint(baseName),
+ _sourceType(),
+ _sourceParam(),
+ _dimension("0") // default dimension is set to the source param if not specified.
+{
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h
new file mode 100644
index 00000000000..5d9ec8eafad
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Factory class for tensor rank features.
+ */
+class TensorFactoryBlueprint : public search::fef::Blueprint
+{
+protected:
+ static vespalib::string ATTRIBUTE_SOURCE;
+ static vespalib::string QUERY_SOURCE;
+
+ vespalib::string _sourceType;
+ vespalib::string _sourceParam;
+ vespalib::string _dimension;
+
+ bool extractSource(const vespalib::string &source);
+ TensorFactoryBlueprint(const vespalib::string &baseName);
+
+public:
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const override {}
+};
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h b/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h
new file mode 100644
index 00000000000..220fea0c849
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h
@@ -0,0 +1,56 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchcommon/attribute/iattributevector.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/vespalib/eval/value.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/tensor/default_tensor.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Feature executor that extracts the content from an attribute vector
+ * and converts that into a tensor.
+ */
+template <typename WeightedBufferType>
+class TensorFromAttributeExecutor : public fef::FeatureExecutor
+{
+private:
+ const search::attribute::IAttributeVector *_attribute;
+ vespalib::string _dimension;
+ WeightedBufferType _attrBuffer;
+ vespalib::eval::TensorValue::UP _tensor;
+
+public:
+ TensorFromAttributeExecutor(const search::attribute::IAttributeVector *attribute,
+ const vespalib::string &dimension)
+ : _attribute(attribute),
+ _dimension(dimension),
+ _attrBuffer(),
+ _tensor()
+ {
+ _attrBuffer.allocate(_attribute->getMaxValueCount());
+ }
+ virtual void execute(fef::MatchData &data);
+};
+
+template <typename WeightedBufferType>
+void
+TensorFromAttributeExecutor<WeightedBufferType>::execute(fef::MatchData &data)
+{
+ _attrBuffer.fill(*_attribute, data.getDocId());
+ vespalib::tensor::DefaultTensor::builder builder;
+ vespalib::tensor::TensorBuilder::Dimension dimensionEnum = builder.define_dimension(_dimension);
+ for (size_t i = 0; i < _attrBuffer.size(); ++i) {
+ builder.add_label(dimensionEnum, vespalib::string(_attrBuffer[i].value()));
+ builder.add_cell(_attrBuffer[i].weight());
+ }
+ _tensor = vespalib::eval::TensorValue::UP(new vespalib::eval::TensorValue(builder.build()));
+ *data.resolve_object_feature(outputs()[0]) = *_tensor;
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp
new file mode 100644
index 00000000000..819ca5c4ff1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp
@@ -0,0 +1,122 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.tensor_from_labels_feature");
+
+#include "tensor_from_labels_feature.h"
+#include "array_parser.hpp"
+#include "constant_tensor_executor.h"
+#include "tensor_from_attribute_executor.h"
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include <vespa/searchcommon/attribute/iattributevector.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/vespalib/tensor/default_tensor.h>
+#include <vespa/vespalib/eval/value_type.h>
+#include <vespa/searchlib/fef/feature_type.h>
+
+using namespace search::fef;
+using search::attribute::IAttributeVector;
+using search::attribute::WeightedConstCharContent;
+using search::attribute::WeightedStringContent;
+using vespalib::tensor::DefaultTensor;
+using vespalib::tensor::TensorBuilder;
+using vespalib::eval::ValueType;
+using search::fef::FeatureType;
+
+namespace search {
+namespace features {
+
+TensorFromLabelsBlueprint::TensorFromLabelsBlueprint()
+ : TensorFactoryBlueprint("tensorFromLabels")
+{
+}
+
+bool
+TensorFromLabelsBlueprint::setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params)
+{
+ (void) env;
+ // _params[0] = source ('attribute(name)' OR 'query(param)');
+ // _params[1] = dimension (optional);
+ bool validSource = extractSource(params[0].getValue());
+ if (params.size() == 2) {
+ _dimension = params[1].getValue();
+ } else {
+ _dimension = _sourceParam;
+ }
+ describeOutput("tensor",
+ "The tensor created from the given array source (attribute field or query parameter)",
+ FeatureType::object(ValueType::tensor_type({{_dimension}})));
+ return validSource;
+}
+
+namespace {
+
+FeatureExecutor::LP
+createAttributeExecutor(const search::fef::IQueryEnvironment &env,
+ const vespalib::string &attrName,
+ const vespalib::string &dimension)
+{
+ const IAttributeVector *attribute = env.getAttributeContext().getAttribute(attrName);
+ if (attribute == NULL) {
+ LOG(warning, "The attribute vector '%s' was not found in the attribute manager."
+ " Returning empty tensor.", attrName.c_str());
+ return ConstantTensorExecutor::createEmpty();
+ }
+ if (attribute->getCollectionType() != search::attribute::CollectionType::ARRAY ||
+ attribute->isFloatingPointType()) {
+ LOG(warning, "The attribute vector '%s' is NOT of type array of string or integer."
+ " Returning empty tensor.", attrName.c_str());
+ return ConstantTensorExecutor::createEmpty();
+ }
+ // Note that for array attribute vectors the default weight is 1.0 for all values.
+ // This means we can get the attribute content as weighted content and build
+ // the tensor the same way as with weighted set attributes in tensorFromWeightedSet.
+ if (attribute->isIntegerType()) {
+ // Using WeightedStringContent ensures that the integer values are converted
+ // to strings while extracting them from the attribute.
+ return FeatureExecutor::LP
+ (new TensorFromAttributeExecutor<WeightedStringContent>(attribute, dimension));
+ }
+ // When the underlying attribute is of type string we can reference these values
+ // using WeightedConstCharContent.
+ return FeatureExecutor::LP
+ (new TensorFromAttributeExecutor<WeightedConstCharContent>(attribute, dimension));
+}
+
+FeatureExecutor::LP
+createQueryExecutor(const search::fef::IQueryEnvironment &env,
+ const vespalib::string &queryKey,
+ const vespalib::string &dimension)
+{
+ search::fef::Property prop = env.getProperties().lookup(queryKey);
+ if (prop.found() && !prop.get().empty()) {
+ std::vector<vespalib::string> vector;
+ ArrayParser::parse(prop.get(), vector);
+ DefaultTensor::builder tensorBuilder;
+ TensorBuilder::Dimension dimensionEnum = tensorBuilder.define_dimension(dimension);
+ for (const auto &elem : vector) {
+ tensorBuilder.add_label(dimensionEnum, elem);
+ tensorBuilder.add_cell(1.0);
+ }
+ return ConstantTensorExecutor::create(tensorBuilder.build());
+ }
+ return ConstantTensorExecutor::createEmpty();
+}
+
+}
+
+FeatureExecutor::LP
+TensorFromLabelsBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ if (_sourceType == ATTRIBUTE_SOURCE) {
+ return createAttributeExecutor(env, _sourceParam, _dimension);
+ } else if (_sourceType == QUERY_SOURCE) {
+ return createQueryExecutor(env, _sourceParam, _dimension);
+ }
+ return ConstantTensorExecutor::createEmpty();
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.h b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.h
new file mode 100644
index 00000000000..3da8d07b063
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.h
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "tensor_factory_blueprint.h"
+
+namespace search {
+namespace features {
+
+/**
+ * Blueprint for a rank feature that creates a tensor from an array
+ * where the elements in the array are used as labels in the tensor addresses.
+ * The tensor cells all get the value 1.0.
+ *
+ * The array source can be either an attribute vector or query parameter.
+ */
+class TensorFromLabelsBlueprint : public TensorFactoryBlueprint
+{
+public:
+ TensorFromLabelsBlueprint();
+ virtual search::fef::Blueprint::UP createInstance() const override {
+ return Blueprint::UP(new TensorFromLabelsBlueprint());
+ }
+ virtual search::fef::ParameterDescriptions getDescriptions() const override {
+ return search::fef::ParameterDescriptions().
+ desc().string().
+ desc().string().string();
+ }
+ virtual bool setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params) override;
+ virtual search::fef::FeatureExecutor::LP
+ createExecutor(const search::fef::IQueryEnvironment &env) const override;
+};
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.cpp
new file mode 100644
index 00000000000..2e00b5d4f19
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.cpp
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "tensor_from_tensor_attribute_executor.h"
+#include <vespa/searchlib/attribute/tensorattribute.h>
+
+namespace search {
+namespace features {
+
+TensorFromTensorAttributeExecutor::
+TensorFromTensorAttributeExecutor(const search::attribute::TensorAttribute *
+ attribute)
+ : _attribute(attribute),
+ _tensor(),
+ _builder(),
+ // XXX: we should use numbers instead of empty tensors
+ _emptyTensor(std::make_unique<vespalib::eval::TensorValue>(_builder.build()))
+{
+}
+
+
+void
+TensorFromTensorAttributeExecutor::execute(fef::MatchData &data)
+{
+ auto tensor = _attribute->getTensor(data.getDocId());
+ if (!tensor) {
+ *data.resolve_object_feature(outputs()[0]) = *_emptyTensor;
+ return;
+ }
+ _tensor = std::make_unique<vespalib::eval::TensorValue>(std::move(tensor));
+ *data.resolve_object_feature(outputs()[0]) = *_tensor;
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.h b/searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.h
new file mode 100644
index 00000000000..aa037b4ab59
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.h
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchcommon/attribute/iattributevector.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/vespalib/eval/value.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/tensor/default_tensor.h>
+
+namespace search {
+namespace attribute { class TensorAttribute; }
+namespace features {
+
+class TensorFromTensorAttributeExecutor : public fef::FeatureExecutor
+{
+private:
+ const search::attribute::TensorAttribute *_attribute;
+ vespalib::eval::TensorValue::UP _tensor;
+ vespalib::tensor::DefaultTensor::builder _builder;
+ vespalib::eval::TensorValue::UP _emptyTensor;
+
+public:
+ TensorFromTensorAttributeExecutor(const search::attribute::TensorAttribute *
+ attribute);
+ virtual void execute(fef::MatchData &data);
+};
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp
new file mode 100644
index 00000000000..5754649109b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp
@@ -0,0 +1,137 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.tensor_from_weighted_set_feature");
+
+#include "tensor_from_weighted_set_feature.h"
+
+#include "constant_tensor_executor.h"
+#include "utils.h"
+#include "tensor_from_attribute_executor.h"
+#include "weighted_set_parser.hpp"
+
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include <vespa/searchcommon/attribute/iattributevector.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/vespalib/eval/function.h>
+#include <vespa/vespalib/tensor/tensor.h>
+#include <vespa/vespalib/tensor/default_tensor.h>
+#include <memory>
+#include <vespa/vespalib/eval/value_type.h>
+#include <vespa/searchlib/fef/feature_type.h>
+
+using namespace search::fef;
+using search::attribute::IAttributeVector;
+using search::attribute::WeightedConstCharContent;
+using search::attribute::WeightedStringContent;
+using vespalib::tensor::DefaultTensor;
+using vespalib::tensor::TensorBuilder;
+using vespalib::eval::ValueType;
+using search::fef::FeatureType;
+
+namespace search {
+namespace features {
+
+namespace {
+
+struct WeightedStringVector
+{
+ std::vector<IAttributeVector::WeightedString> _data;
+ void insert(const vespalib::stringref &key, const vespalib::stringref &weight) {
+ _data.emplace_back(key, util::strToNum<int32_t>(weight));
+ }
+};
+
+}
+
+TensorFromWeightedSetBlueprint::TensorFromWeightedSetBlueprint()
+ : TensorFactoryBlueprint("tensorFromWeightedSet")
+{
+}
+
+bool
+TensorFromWeightedSetBlueprint::setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params)
+{
+ (void) env;
+ // _params[0] = source ('attribute(name)' OR 'query(param)');
+ // _params[1] = dimension (optional);
+ bool validSource = extractSource(params[0].getValue());
+ if (params.size() == 2) {
+ _dimension = params[1].getValue();
+ } else {
+ _dimension = _sourceParam;
+ }
+ describeOutput("tensor",
+ "The tensor created from the given weighted set source (attribute field or query parameter)",
+ FeatureType::object(ValueType::tensor_type({{_dimension}})));
+ return validSource;
+}
+
+namespace {
+
+FeatureExecutor::LP
+createAttributeExecutor(const search::fef::IQueryEnvironment &env,
+ const vespalib::string &attrName,
+ const vespalib::string &dimension)
+{
+ const IAttributeVector *attribute = env.getAttributeContext().getAttribute(attrName);
+ if (attribute == NULL) {
+ LOG(warning, "The attribute vector '%s' was not found in the attribute manager."
+ " Returning empty tensor.", attrName.c_str());
+ return ConstantTensorExecutor::createEmpty();
+ }
+ if (attribute->getCollectionType() != search::attribute::CollectionType::WSET ||
+ attribute->isFloatingPointType()) {
+ LOG(warning, "The attribute vector '%s' is NOT of type weighted set of string or integer."
+ " Returning empty tensor.", attrName.c_str());
+ return ConstantTensorExecutor::createEmpty();
+ }
+ if (attribute->isIntegerType()) {
+ // Using WeightedStringContent ensures that the integer values are converted
+ // to strings while extracting them from the attribute.
+ return FeatureExecutor::LP
+ (new TensorFromAttributeExecutor<WeightedStringContent>(attribute, dimension));
+ }
+ // When the underlying attribute is of type string we can reference these values
+ // using WeightedConstCharContent.
+ return FeatureExecutor::LP
+ (new TensorFromAttributeExecutor<WeightedConstCharContent>(attribute, dimension));
+}
+
+FeatureExecutor::LP
+createQueryExecutor(const search::fef::IQueryEnvironment &env,
+ const vespalib::string &queryKey,
+ const vespalib::string &dimension)
+{
+ search::fef::Property prop = env.getProperties().lookup(queryKey);
+ if (prop.found() && !prop.get().empty()) {
+ WeightedStringVector vector;
+ WeightedSetParser::parse(prop.get(), vector);
+ DefaultTensor::builder tensorBuilder;
+ TensorBuilder::Dimension dimensionEnum = tensorBuilder.define_dimension(dimension);
+ for (const auto &elem : vector._data) {
+ tensorBuilder.add_label(dimensionEnum, elem.value());
+ tensorBuilder.add_cell(elem.weight());
+ }
+ return ConstantTensorExecutor::create(tensorBuilder.build());
+ }
+ return ConstantTensorExecutor::createEmpty();
+}
+
+}
+
+FeatureExecutor::LP
+TensorFromWeightedSetBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ if (_sourceType == ATTRIBUTE_SOURCE) {
+ return createAttributeExecutor(env, _sourceParam, _dimension);
+ } else if (_sourceType == QUERY_SOURCE) {
+ return createQueryExecutor(env, _sourceParam, _dimension);
+ }
+ return ConstantTensorExecutor::createEmpty();
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.h b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.h
new file mode 100644
index 00000000000..f38b811fa36
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.h
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "tensor_factory_blueprint.h"
+
+namespace search {
+namespace features {
+
+/**
+ * Feature blueprint for a rank feature that creates a tensor from a weighted set.
+ * The weighted set source can be either an attribute vector or query parameter.
+ */
+class TensorFromWeightedSetBlueprint : public TensorFactoryBlueprint
+{
+public:
+ TensorFromWeightedSetBlueprint();
+ virtual search::fef::Blueprint::UP createInstance() const override {
+ return Blueprint::UP(new TensorFromWeightedSetBlueprint());
+ }
+ virtual search::fef::ParameterDescriptions getDescriptions() const override {
+ return search::fef::ParameterDescriptions().
+ desc().string().
+ desc().string().string();
+ }
+ virtual bool setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params) override;
+ virtual search::fef::FeatureExecutor::LP
+ createExecutor(const search::fef::IQueryEnvironment &env) const override;
+};
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/term_field_md_feature.cpp b/searchlib/src/vespa/searchlib/features/term_field_md_feature.cpp
new file mode 100644
index 00000000000..1e242f8b4f3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/term_field_md_feature.cpp
@@ -0,0 +1,115 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/itablemanager.h>
+#include <vespa/searchlib/fef/properties.h>
+#include "term_field_md_feature.h"
+#include "utils.h"
+LOG_SETUP(".features.term_field_md_feature");
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+
+TermFieldMdExecutor::TermFieldMdExecutor(const search::fef::IQueryEnvironment &env,
+ uint32_t fieldId)
+ : _terms()
+{
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ const search::fef::ITermData *td = env.getTerm(i);
+ LOG_ASSERT(td != 0);
+ const search::fef::ITermFieldData *tfd = td->lookupField(fieldId);
+ if (tfd != 0) {
+ LOG_ASSERT(tfd->getHandle() != search::fef::IllegalHandle);
+ _terms.push_back(std::make_pair(tfd->getHandle(), td->getWeight()));
+ }
+ }
+}
+
+void
+TermFieldMdExecutor::execute(MatchData & match)
+{
+ uint32_t termsmatched = 0;
+ uint32_t occs = 0;
+ feature_t score = 0;
+ feature_t weight = 0;
+ feature_t maxTermWeight = 0;
+
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ const TermFieldMatchData &tfmd = *match.resolveTermField(_terms[i].first);
+ int32_t termWeight = _terms[i].second.percent();
+
+ if (tfmd.getDocId() == match.getDocId()) {
+ ++termsmatched;
+ score += tfmd.getWeight();
+ occs += (tfmd.end() - tfmd.begin());
+ if (weight == 0) {
+ weight = tfmd.getWeight();
+ }
+ if (termWeight > maxTermWeight) {
+ maxTermWeight = termWeight;
+ }
+ }
+
+ }
+ *match.resolveFeature(outputs()[0]) = score;
+ *match.resolveFeature(outputs()[1]) = _terms.size();
+ *match.resolveFeature(outputs()[2]) = (termsmatched > 0 ? 1.0 : 0.0);
+ *match.resolveFeature(outputs()[3]) = termsmatched;
+ *match.resolveFeature(outputs()[4]) = weight;
+ *match.resolveFeature(outputs()[5]) = occs;
+ *match.resolveFeature(outputs()[6]) = maxTermWeight;
+}
+
+
+TermFieldMdBlueprint::TermFieldMdBlueprint() :
+ Blueprint("termFieldMd"),
+ _field(0)
+{
+}
+
+void
+TermFieldMdBlueprint::visitDumpFeatures(const IIndexEnvironment &,
+ IDumpFeatureVisitor &) const
+{
+}
+
+Blueprint::UP
+TermFieldMdBlueprint::createInstance() const
+{
+ return Blueprint::UP(new TermFieldMdBlueprint());
+}
+
+bool
+TermFieldMdBlueprint::setup(const IIndexEnvironment & env,
+ const ParameterList & params)
+{
+ _field = params[0].asField();
+ LOG_ASSERT(_field != 0);
+
+ describeOutput("score", "The term field match score");
+ describeOutput("terms", "The number of ranked terms searching this field");
+ describeOutput("match", "1.0 if some ranked term matched this field, 0.0 otherwise");
+ describeOutput("termsmatched", "The number of ranked terms matching this field");
+ describeOutput("firstweight", "The first element weight seen");
+ describeOutput("occurrences", "The sum of occurrences (positions) in the match data");
+ describeOutput("maxTermWeight", "The max term weight among ranked terms matching this field");
+
+ env.hintFieldAccess(_field->id());
+ return true;
+}
+
+FeatureExecutor::LP
+TermFieldMdBlueprint::createExecutor(const IQueryEnvironment & env) const
+{
+ return FeatureExecutor::LP(new TermFieldMdExecutor(env, _field->id()));
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/term_field_md_feature.h b/searchlib/src/vespa/searchlib/features/term_field_md_feature.h
new file mode 100644
index 00000000000..b2752b52e80
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/term_field_md_feature.h
@@ -0,0 +1,60 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/fef/table.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/searchlib/fef/itermfielddata.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for looking at term field match data
+ **/
+class TermFieldMdExecutor : public fef::FeatureExecutor {
+
+ typedef std::pair<search::fef::TermFieldHandle, query::Weight> Element;
+ std::vector<Element> _terms;
+ virtual void execute(fef::MatchData &data);
+
+public:
+ TermFieldMdExecutor(const search::fef::IQueryEnvironment &env,
+ uint32_t fieldId);
+};
+
+
+/**
+ * Implements the blueprint for the term field md executor.
+ **/
+class TermFieldMdBlueprint : public fef::Blueprint {
+ const search::fef::FieldInfo * _field;
+public:
+ TermFieldMdBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const fef::IIndexEnvironment & env,
+ fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual fef::ParameterDescriptions getDescriptions() const {
+ return fef::ParameterDescriptions().desc().field();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const fef::IIndexEnvironment & env,
+ const fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual fef::FeatureExecutor::LP createExecutor(const fef::IQueryEnvironment & env) const;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/termdistancecalculator.cpp b/searchlib/src/vespa/searchlib/features/termdistancecalculator.cpp
new file mode 100644
index 00000000000..a581aea2867
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/termdistancecalculator.cpp
@@ -0,0 +1,81 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.termdistancecalculator");
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include "termdistancecalculator.h"
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+const uint32_t TermDistanceCalculator::UNDEFINED_VALUE(1000000);
+
+
+void
+TermDistanceCalculator::run(const QueryTerm &termX, const QueryTerm &termY,
+ MatchData & match, Result & r)
+{
+ const TermFieldMatchData *tmdX = match.resolveTermField(termX.fieldHandle());
+ const TermFieldMatchData *tmdY = match.resolveTermField(termY.fieldHandle());
+ if (tmdX->getDocId() != match.getDocId() || tmdY->getDocId() != match.getDocId()) {
+ return;
+ }
+ findBest(tmdX, tmdY, termX.termData()->getPhraseLength(), r.forwardDist, r.forwardTermPos);
+ findBest(tmdY, tmdX, termY.termData()->getPhraseLength(), r.reverseDist, r.reverseTermPos);
+}
+
+
+void
+TermDistanceCalculator::findBest(const TermFieldMatchData *tmdX,
+ const TermFieldMatchData *tmdY,
+ uint32_t numTermsX,
+ uint32_t & bestDist,
+ uint32_t & bestPos)
+{
+ search::fef::TermFieldMatchData::PositionsIterator itA, itB, epA, epB;
+ itA = tmdX->begin();
+ epA = tmdX->end();
+
+ itB = tmdY->begin();
+ epB = tmdY->end();
+
+ uint32_t addA = numTermsX - 1;
+
+ while (itB != epB) {
+ uint32_t eid = itB->getElementId();
+ while (itA != epA && itA->getElementId() < eid) {
+ ++itA;
+ }
+ if (itA != epA && itA->getElementId() == eid) {
+ // there is a pair somewhere here
+ while (itA != epA &&
+ itB != epB &&
+ itA->getElementId() == eid &&
+ itB->getElementId() == eid)
+ {
+ uint32_t a = itA->getPosition();
+ uint32_t b = itB->getPosition();
+ if (a < b) {
+ if (b - a < bestDist + addA) {
+ bestDist = b - (a + addA);
+ bestPos = a;
+ }
+ itA++;
+ } else {
+ itB++;
+ }
+ }
+ } else {
+ ++itB;
+ }
+ }
+
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/termdistancecalculator.h b/searchlib/src/vespa/searchlib/features/termdistancecalculator.h
new file mode 100644
index 00000000000..39da5987ff4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/termdistancecalculator.h
@@ -0,0 +1,81 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "queryterm.h"
+
+namespace search {
+namespace features {
+
+/**
+ * This class is used to calculate the minimal forward and reverse term distance
+ * between two terms matching in the same field using the position information for both terms.
+ *
+ * The terms 'a' and 'b' matching the field 'a b x a' will give the following result:
+ * - forwardDist = 1
+ * - forwardTermPos = 0
+ * - reverseDist = 2
+ * - reverseTermPos = 1
+ *
+ * Note that if we have a phrase 'a b' and term 'c' matching the field 'a b x c' we will get:
+ * - forwardDist = 2 (between b and c)
+ * - forwardTermPos = 0 (pos of first word)
+ **/
+class TermDistanceCalculator {
+public:
+ /**
+ * Represents an undefined value.
+ **/
+ static const uint32_t UNDEFINED_VALUE;
+
+ /**
+ * Contains the result from running the calculator.
+ **/
+ struct Result {
+ uint32_t forwardDist; // min distance between term X and term Y in the field
+ uint32_t forwardTermPos; // the position of term X for that distance
+ uint32_t reverseDist; // min distance between term Y and term X in the field
+ uint32_t reverseTermPos; // the position of term Y for that distance
+
+ /**
+ * Creates a new object with undefined values.
+ **/
+ Result() { reset(); }
+
+ /**
+ * Creates a new object with the given values.
+ **/
+ Result(uint32_t fd, uint32_t ftp, uint32_t rd, uint32_t rtp) :
+ forwardDist(fd), forwardTermPos(ftp), reverseDist(rd), reverseTermPos(rtp) {}
+
+ /**
+ * Sets all variables to the undefined value.
+ **/
+ void reset() {
+ forwardDist = UNDEFINED_VALUE;
+ forwardTermPos = UNDEFINED_VALUE;
+ reverseDist = UNDEFINED_VALUE;
+ reverseTermPos = UNDEFINED_VALUE;
+ }
+ };
+
+private:
+ static void findBest(const search::fef::TermFieldMatchData *tmdX,
+ const search::fef::TermFieldMatchData *tmdY,
+ uint32_t numTermsX,
+ uint32_t & bestDist,
+ uint32_t & bestPos);
+
+public:
+ /**
+ * Calculates the min forward and reverse distances based on the given
+ * match data and field id. The calculated values are stored in the given result object.
+ * NB: Both query terms must have attached term fields with valid term field handles.
+ **/
+ static void run(const QueryTerm &termX, const QueryTerm &termY,
+ search::fef::MatchData & match, Result & r);
+};
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/termdistancefeature.cpp b/searchlib/src/vespa/searchlib/features/termdistancefeature.cpp
new file mode 100644
index 00000000000..8bfc191cc98
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/termdistancefeature.cpp
@@ -0,0 +1,100 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.nativeproximityfeature");
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/properties.h>
+#include "termdistancefeature.h"
+#include "valuefeature.h"
+#include "utils.h"
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+
+TermDistanceExecutor::TermDistanceExecutor(const IQueryEnvironment & env,
+ const TermDistanceParams & params) :
+ FeatureExecutor(),
+ _params(params),
+ _termA(env.getTerm(params.termX)),
+ _termB(env.getTerm(params.termY))
+{
+ _termA.fieldHandle(util::getTermFieldData(env, params.termX, params.fieldId));
+ _termB.fieldHandle(util::getTermFieldData(env, params.termY, params.fieldId));
+}
+
+bool TermDistanceExecutor::valid() const
+{
+ return ((_termA.termData() != 0) && (_termB.termData() != 0) &&
+ (_termA.fieldHandle() != IllegalHandle) && (_termB.fieldHandle() != IllegalHandle));
+}
+
+void
+TermDistanceExecutor::execute(MatchData & match)
+{
+ TermDistanceCalculator::Result result;
+ TermDistanceCalculator::run(_termA, _termB, match, result);
+ *match.resolveFeature(outputs()[0]) = result.forwardDist;
+ *match.resolveFeature(outputs()[1]) = result.forwardTermPos;
+ *match.resolveFeature(outputs()[2]) = result.reverseDist;
+ *match.resolveFeature(outputs()[3]) = result.reverseTermPos;
+}
+
+
+TermDistanceBlueprint::TermDistanceBlueprint() :
+ Blueprint("termDistance"),
+ _params()
+{
+}
+
+void
+TermDistanceBlueprint::visitDumpFeatures(const IIndexEnvironment &,
+ IDumpFeatureVisitor &) const
+{
+}
+
+Blueprint::UP
+TermDistanceBlueprint::createInstance() const
+{
+ return Blueprint::UP(new TermDistanceBlueprint());
+}
+
+bool
+TermDistanceBlueprint::setup(const IIndexEnvironment &,
+ const ParameterList & params)
+{
+ _params.fieldId = params[0].asField()->id();
+ _params.termX = params[1].asInteger();
+ _params.termY = params[2].asInteger();
+
+ describeOutput("forward", "the min distance between term X and term Y in the field");
+ describeOutput("forwardTermPosition", "the position of term X for the forward distance");
+ describeOutput("reverse", "the min distance between term Y and term X in the field");
+ describeOutput("reverseTermPosition", "the position of term Y for the reverse distance");
+
+ return true;
+}
+
+FeatureExecutor::LP
+TermDistanceBlueprint::createExecutor(const IQueryEnvironment & env) const
+{
+ std::unique_ptr<TermDistanceExecutor> tde(new TermDistanceExecutor(env, _params));
+ if (tde->valid()) {
+ return FeatureExecutor::LP(tde.release());
+ } else {
+ TermDistanceCalculator::Result r;
+ std::vector<feature_t> values(4);
+ values[0] = r.forwardDist;
+ values[1] = r.forwardTermPos;
+ values[2] = r.reverseDist;
+ values[3] = r.reverseTermPos;
+ return FeatureExecutor::LP(new ValueExecutor(values));
+ }
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/termdistancefeature.h b/searchlib/src/vespa/searchlib/features/termdistancefeature.h
new file mode 100644
index 00000000000..b18227494a9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/termdistancefeature.h
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/fef/table.h>
+#include "termdistancecalculator.h"
+
+namespace search {
+namespace features {
+
+/**
+ * This struct contains parameters used by the executor.
+ **/
+struct TermDistanceParams {
+ uint32_t fieldId;
+ uint32_t termX;
+ uint32_t termY;
+ TermDistanceParams() : fieldId(0), termX(0), termY(0) {}
+};
+
+/**
+ * Implements the executor for calculating min term distance (forward and reverse).
+ **/
+class TermDistanceExecutor : public search::fef::FeatureExecutor
+{
+private:
+ const TermDistanceParams & _params;
+ QueryTerm _termA;
+ QueryTerm _termB;
+
+public:
+ TermDistanceExecutor(const search::fef::IQueryEnvironment & env,
+ const TermDistanceParams & params);
+ virtual void execute(search::fef::MatchData & data);
+ bool valid() const;
+};
+
+
+/**
+ * Implements the blueprint for the term distance executor.
+ **/
+class TermDistanceBlueprint : public search::fef::Blueprint {
+private:
+ TermDistanceParams _params;
+
+public:
+ TermDistanceBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::ANY).number().number();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/termeditdistancefeature.cpp b/searchlib/src/vespa/searchlib/features/termeditdistancefeature.cpp
new file mode 100644
index 00000000000..82b2e0b5058
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/termeditdistancefeature.cpp
@@ -0,0 +1,234 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.termeditdistance");
+
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/fieldtype.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include "termeditdistancefeature.h"
+#include "utils.h"
+
+namespace search {
+namespace features {
+
+//---------------------------------------------------------------------------------------------------------------------
+// TedCell
+//---------------------------------------------------------------------------------------------------------------------
+TedCell::TedCell() :
+ cost(util::FEATURE_MAX),
+ numDel(0),
+ numIns(0),
+ numSub(0)
+{
+ // empty
+}
+
+TedCell::TedCell(feature_t argCost, uint32_t argNumDel, uint32_t argNumIns, uint32_t argNumSub) :
+ cost(argCost),
+ numDel(argNumDel),
+ numIns(argNumIns),
+ numSub(argNumSub)
+{
+ // empty
+}
+
+//---------------------------------------------------------------------------------------------------------------------
+// TermEditDistanceConfig
+//---------------------------------------------------------------------------------------------------------------------
+TermEditDistanceConfig::TermEditDistanceConfig() :
+ fieldId(search::fef::IllegalHandle),
+ fieldBegin(0),
+ fieldEnd(std::numeric_limits<uint32_t>::max()),
+ costDel(1),
+ costIns(1),
+ costSub(1)
+{
+ // empty
+}
+
+//---------------------------------------------------------------------------------------------------------------------
+// TermEditDistanceExecutor
+//---------------------------------------------------------------------------------------------------------------------
+TermEditDistanceExecutor::TermEditDistanceExecutor(const search::fef::IQueryEnvironment &env,
+ const TermEditDistanceConfig &config) :
+ search::fef::FeatureExecutor(),
+ _config(config),
+ _fieldHandles(),
+ _termWeights(),
+ _lenHandle(search::fef::IllegalHandle),
+ _prevRow(16),
+ _thisRow(_prevRow.size())
+{
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ _fieldHandles.push_back(util::getTermFieldHandle(env, i, config.fieldId));
+ _termWeights.push_back(1.0f);
+
+ // XXX was intended to use something like this instead of 1.0f:
+ // const search::fef::TermData& term = *env.getTerm(i);
+ // term.isMandatory() ? (feature_t)term.getWeight() : 0.0f
+ }
+}
+
+void
+TermEditDistanceExecutor::execute(search::fef::MatchData &match)
+{
+ // Determine the number of terms in the field.
+ uint32_t numQueryTerms = _fieldHandles.size();
+ uint32_t fieldBegin = _config.fieldBegin;
+ uint32_t fieldEnd = std::min(_config.fieldEnd,
+ (uint32_t)*match.resolveFeature(_lenHandle));
+
+ // _P_A_R_A_N_O_I_A_
+ TedCell last;
+ if (fieldBegin < fieldEnd) {
+ // Construct the cost table.
+ uint32_t numFieldTerms = fieldEnd - fieldBegin;
+ if (_prevRow.size() < numFieldTerms + 1) {
+ _prevRow.resize(numFieldTerms + 1);
+ _thisRow.resize(_prevRow.size());
+ }
+ for (uint32_t field = 0; field <= numFieldTerms; ++field) {
+ _prevRow[field] = TedCell(field * _config.costIns, 0, field, 0);
+ }
+ //LOG(debug, "[ F I E L D S ]");
+ //logRow(_prevRow, numFieldTerms + 1);
+
+ // Iterate over each query term.
+ for (uint32_t query = 1; query <= numQueryTerms; ++query) {
+ search::fef::FieldPositionsIterator it; // this is not vaild
+
+ // Look for a match of this term.
+ search::fef::TermFieldHandle handle = _fieldHandles[query - 1];
+ if (handle != search::fef::IllegalHandle) {
+ search::fef::TermFieldMatchData &tfmd = *match.resolveTermField(handle);
+ if (tfmd.getDocId() == match.getDocId()) {
+ it = tfmd.getIterator(); // this is now valid
+ while (it.valid() && it.getPosition() < fieldBegin) {
+ it.next(); // forward to window
+ }
+ }
+ }
+
+ // Predefine the cost of operations on the current term.
+ feature_t weight = _termWeights[query - 1];
+ feature_t costDel = _config.costDel * weight;
+ feature_t costIns = _config.costIns * weight;
+ feature_t costSub = _config.costSub * weight;
+
+ // Iterate over each field term.
+ _thisRow[0] = TedCell(_prevRow[0].cost + costDel, query, 0, 0);
+ for (uint32_t field = 1; field <= numFieldTerms; ++field) {
+ // If the iterator is still valid, we _might_ have a match.
+ if (it.valid()) {
+ // If the iterator knows an occurance at this field term, this is a match.
+ if (it.getPosition() == fieldBegin + (field - 1)) {
+ _thisRow[field] = _prevRow[field - 1]; // no cost
+ it.next();
+ continue; // skip calculations
+ }
+ }
+
+ // Determine the least-cost operation.
+ feature_t del = _prevRow[field ].cost + costDel; // cost per previous query term, ie. ignoring this query term.
+ feature_t ins = _thisRow[field - 1].cost + costIns; // cost per previous field term, ie. insert this query term.
+ feature_t sub = _prevRow[field - 1].cost + costSub; // cost to replace field term with query term.
+
+ feature_t min = std::min(del, std::min(ins, sub));
+ if (min == del) {
+ const TedCell &cell = _prevRow[field];
+ _thisRow[field] = TedCell(del, cell.numDel + 1, cell.numIns, cell.numSub);
+ }
+ else if(min == ins) {
+ const TedCell &cell = _thisRow[field - 1];
+ _thisRow[field] = TedCell(ins, cell.numDel, cell.numIns + 1, cell.numSub);
+ }
+ else {
+ const TedCell &cell = _prevRow[field - 1];
+ _thisRow[field] = TedCell(sub, cell.numDel, cell.numIns, cell.numSub + 1);
+ }
+ }
+ _thisRow.swap(_prevRow);
+ //logRow(_prevRow, numFieldTerms + 1);
+ }
+
+ // Retrieve the bottom-right value.
+ last = _prevRow[numFieldTerms];
+ }
+ *match.resolveFeature(outputs()[0]) = last.cost;
+ *match.resolveFeature(outputs()[1]) = last.numDel;
+ *match.resolveFeature(outputs()[2]) = last.numIns;
+ *match.resolveFeature(outputs()[3]) = last.numSub;
+}
+
+void
+TermEditDistanceExecutor::logRow(const std::vector<TedCell> &row, size_t numCols)
+{
+ if (logger.wants(ns_log::Logger::info)) {
+ vespalib::string str = "[ ";
+ for (size_t i = 0; i < numCols; ++i) {
+ str.append(vespalib::make_string("%5.2f", row[i].cost));
+ if (i < numCols - 1) {
+ str.append(" ");
+ }
+ }
+ str.append(" ]");
+ LOG(debug, "%s", str.c_str());
+ }
+}
+
+//---------------------------------------------------------------------------------------------------------------------
+// TermEditDistanceBlueprint
+//---------------------------------------------------------------------------------------------------------------------
+TermEditDistanceBlueprint::TermEditDistanceBlueprint() :
+ search::fef::Blueprint("termEditDistance"),
+ _config()
+{
+ // empty
+}
+
+void
+TermEditDistanceBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const
+{
+ // empty
+}
+
+bool
+TermEditDistanceBlueprint::setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params)
+{
+ _config.fieldId = params[0].asField()->id();
+
+ vespalib::string costDel = env.getProperties().lookup(getName(), "costDel").getAt(0);
+ _config.costDel = costDel.empty() ? 1.0f : atof(costDel.c_str());
+ vespalib::string costIns = env.getProperties().lookup(getName(), "costIns").getAt(0);
+ _config.costIns = costIns.empty() ? 1.0f : atof(costIns.c_str());
+ vespalib::string costSub = env.getProperties().lookup(getName(), "costSub").getAt(0);
+ _config.costSub = costSub.empty() ? 1.0f : atof(costSub.c_str());
+
+ defineInput(vespalib::make_string("fieldLength(%s)", params[0].getValue().c_str()));
+ describeOutput("out", "Term-wise edit distance.");
+ describeOutput("del", "Number of deletions performed.");
+ describeOutput("ins", "Number of insertions performed.");
+ describeOutput("sub", "Number of substitutions performed.");
+ env.hintFieldAccess(_config.fieldId);
+ return true;
+}
+
+search::fef::Blueprint::UP
+TermEditDistanceBlueprint::createInstance() const
+{
+ return search::fef::Blueprint::UP(new TermEditDistanceBlueprint());
+}
+
+search::fef::FeatureExecutor::LP
+TermEditDistanceBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ return search::fef::FeatureExecutor::LP(new TermEditDistanceExecutor(env, _config));
+}
+
+}}
diff --git a/searchlib/src/vespa/searchlib/features/termeditdistancefeature.h b/searchlib/src/vespa/searchlib/features/termeditdistancefeature.h
new file mode 100644
index 00000000000..2f897a1c826
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/termeditdistancefeature.h
@@ -0,0 +1,153 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/common/feature.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements a cell class for the cost table constructed when running the term edit distance calculator. This is
+ * necessary to keep track of the route actually chosen through the table, since the algorithm itself merely find the
+ * minimum cost.
+ */
+class TedCell {
+public:
+ TedCell();
+ TedCell(feature_t cost, uint32_t numDel, uint32_t numIns, uint32_t numSub);
+
+ feature_t cost; // The cost at this point.
+ uint32_t numDel; // The number of deletions to get here.
+ uint32_t numIns; // The number of insertions to get here.
+ uint32_t numSub; // The number of substitutions to get here.
+};
+
+/**
+ * Implements the necessary config for the term edit distance calculator. This class exists so that the executor does
+ * not need a separate copy of the config parsed by the blueprint, and at the same time avoiding that the executor needs
+ * to know about the blueprint.
+ */
+struct TermEditDistanceConfig {
+ TermEditDistanceConfig();
+
+ uint32_t fieldId; // The id of field to process.
+ uint32_t fieldBegin; // The first field term to evaluate.
+ uint32_t fieldEnd; // The last field term to evaluate.
+ feature_t costDel; // The cost of a delete.
+ feature_t costIns; // The cost of an insert.
+ feature_t costSub; // The cost of a substitution.
+};
+
+/**
+ * Implements the executor for the term edit distance calculator.
+ */
+class TermEditDistanceExecutor : public search::fef::FeatureExecutor {
+public:
+ /**
+ * Constructs a new executor for the term edit distance calculator.
+ *
+ * @param config The config for this executor.
+ */
+ TermEditDistanceExecutor(const search::fef::IQueryEnvironment &env,
+ const TermEditDistanceConfig &config);
+
+ void inputs_done() override { _lenHandle = inputs()[0]; }
+
+ /**
+ *
+ * This executor prepares a matrix that has one row per query term, and one column per field term. Initialize this
+ * array as follows:
+ *
+ * |f i e l d
+ * -+---------
+ * q|0 1 2 3 4
+ * u|1 . . . .
+ * e|2 . . . .
+ * r|3 . . . .
+ * y|4 . . . .
+ *
+ * Run through this matrix per field term, per query term; i.e. column by column, row by row. Compare the field term
+ * at that column with the query term at that row. Then set the value of that cell to the minimum of:
+ *
+ * 1. The cost of substitution; the above-left value plus the cost (0 if equal).
+ * 2. The cost of insertion; the left value plus the cost.
+ * 3. The cost of deletion; the above value plus the cost.
+ *
+ * After completing the matrix, the minimum cost is contained in the bottom-right.
+ *
+ * @param data All available match data.
+ */
+ virtual void execute(search::fef::MatchData &data);
+
+private:
+ /**
+ * Writes the given list of feature values to log so that it can be viewed for instrumentation.
+ *
+ * @param row The list of feature values to write.
+ * @param numCols The number of columns to write.
+ */
+ void logRow(const std::vector<TedCell> &row, size_t numCols);
+
+private:
+ const TermEditDistanceConfig &_config; // The config for this executor.
+ std::vector<search::fef::TermFieldHandle> _fieldHandles; // The handles of all query terms.
+ std::vector<feature_t> _termWeights; // The weights of all query terms.
+ search::fef::FeatureHandle _lenHandle; // Handle to the length input feature.
+ std::vector<TedCell> _prevRow; // Optimized representation of the cost table.
+ std::vector<TedCell> _thisRow; //
+};
+
+/**
+ * Implements the blueprint for the term edit distance calculator.
+ */
+class TermEditDistanceBlueprint : public search::fef::Blueprint {
+public:
+ /**
+ * Constructs a new blueprint for the term edit distance calculator.
+ */
+ TermEditDistanceBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::SINGLE);
+ }
+
+ /**
+ * The cost of each operation is specified by the parameters to the {@link #setup} method of this blueprint. All
+ * costs are multiplied by the relative weight of eacht query term. Furthermore, if the query term is not mandatory,
+ * all operations are free. The parameters are:
+ *
+ * 1. The name of the field to calculate the distance for.
+ * 2. The cost of ignoring a query term, this is typically HIGH.
+ * 3. The cost of inserting a field term into the query term, this is typically LOW.
+ * 4. The cost of substituting a field term with a query term, this is also typically LOW.
+ * 5. Optional: The field position to begin iteration.
+ * 6. Optional: The field position to end iteration.
+ *
+ * @param env The index environment.
+ * @param params A list of the parameters mentioned above.
+ * @return Whether or not setup was possible.
+ */
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+private:
+ TermEditDistanceConfig _config; // The config for this blueprint.
+};
+
+}}
+
diff --git a/searchlib/src/vespa/searchlib/features/termfeature.cpp b/searchlib/src/vespa/searchlib/features/termfeature.cpp
new file mode 100644
index 00000000000..61cd3347845
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/termfeature.cpp
@@ -0,0 +1,91 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.termfeature");
+
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/fieldtype.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include "termfeature.h"
+#include "utils.h"
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+
+TermExecutor::TermExecutor(const search::fef::IQueryEnvironment &env,
+ uint32_t termId) :
+ search::fef::FeatureExecutor(),
+ _termData(env.getTerm(termId)),
+ _connectedness(util::lookupConnectedness(env, termId)),
+ _significance(0)
+{
+ if (_termData != NULL) {
+ feature_t fallback = util::getSignificance(*_termData);
+ _significance = util::lookupSignificance(env, termId, fallback);
+ }
+}
+
+void
+TermExecutor::execute(search::fef::MatchData &match)
+{
+ if (_termData == NULL) { // this query term is not present in the query
+ *match.resolveFeature(outputs()[0]) = 0.0f; // connectedness
+ *match.resolveFeature(outputs()[1]) = 0.0f; // significance (1 - frequency)
+ *match.resolveFeature(outputs()[2]) = 0.0f; // weight
+ return;
+ }
+ *match.resolveFeature(outputs()[0]) = _connectedness;
+ *match.resolveFeature(outputs()[1]) = _significance;
+ *match.resolveFeature(outputs()[2]) = (feature_t)_termData->getWeight().percent();
+}
+
+TermBlueprint::TermBlueprint() :
+ search::fef::Blueprint("term"),
+ _termId(0)
+{
+ // empty
+}
+
+void
+TermBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const
+{
+ int numTerms = atoi(env.getProperties().lookup(getBaseName(), "numTerms").get("5").c_str());
+ for (int term = 0; term < numTerms; ++term) {
+ search::fef::FeatureNameBuilder fnb;
+ fnb.baseName(getBaseName()).parameter(vespalib::make_string("%d", term));
+ visitor.visitDumpFeature(fnb.output("connectedness").buildName());
+ visitor.visitDumpFeature(fnb.output("significance").buildName());
+ visitor.visitDumpFeature(fnb.output("weight").buildName());
+ }
+}
+
+bool
+TermBlueprint::setup(const search::fef::IIndexEnvironment &,
+ const search::fef::ParameterList &params)
+{
+ _termId = params[0].asInteger();
+ describeOutput("connectedness", "The normalized strength with which this term is connected to the next term in the query.");
+ describeOutput("significance", "1 - the normalized frequency of documents containing this query term.");
+ describeOutput("weight", "The normalized importance of matching this query term.");
+ return true;
+}
+
+search::fef::Blueprint::UP
+TermBlueprint::createInstance() const
+{
+ return search::fef::Blueprint::UP(new TermBlueprint());
+}
+
+search::fef::FeatureExecutor::LP
+TermBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ return search::fef::FeatureExecutor::LP(new TermExecutor(env, _termId));
+}
+
+}}
diff --git a/searchlib/src/vespa/searchlib/features/termfeature.h b/searchlib/src/vespa/searchlib/features/termfeature.h
new file mode 100644
index 00000000000..6394fd10936
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/termfeature.h
@@ -0,0 +1,68 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/common/feature.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for term feature.
+ */
+class TermExecutor : public search::fef::FeatureExecutor {
+public:
+ /**
+ * Constructs an executor for term feature.
+ *
+ * @param env The query environment.
+ * @param termId The id of the query term to evaluate.
+ */
+ TermExecutor(const search::fef::IQueryEnvironment &env,
+ uint32_t termId);
+ virtual void execute(search::fef::MatchData &data);
+
+private:
+ const search::fef::ITermData *_termData;
+ feature_t _connectedness;
+ feature_t _significance;
+};
+
+/**
+ * Implements the blueprint for term feature.
+ */
+class TermBlueprint : public search::fef::Blueprint {
+public:
+ /**
+ * Constructs a blueprint for term feature.
+ */
+ TermBlueprint();
+
+ // Inherit doc from Blueprint.
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::Blueprint::UP createInstance() const;
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().number();
+ }
+
+ // Inherit doc from Blueprint.
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+
+ // Inherit doc from Blueprint.
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const;
+
+private:
+ uint32_t _termId;
+};
+
+}}
+
diff --git a/searchlib/src/vespa/searchlib/features/terminfofeature.cpp b/searchlib/src/vespa/searchlib/features/terminfofeature.cpp
new file mode 100644
index 00000000000..5ca385b0440
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/terminfofeature.cpp
@@ -0,0 +1,55 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.terminfo");
+
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/fieldtype.h>
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/searchlib/fef/handle.h>
+#include <sstream>
+#include "terminfofeature.h"
+#include "valuefeature.h"
+
+namespace search {
+namespace features {
+
+TermInfoBlueprint::TermInfoBlueprint()
+ : search::fef::Blueprint("termInfo"),
+ _termIdx(0)
+{
+}
+
+void
+TermInfoBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const
+{
+}
+
+bool
+TermInfoBlueprint::setup(const search::fef::IIndexEnvironment &,
+ const search::fef::ParameterList & params)
+{
+ _termIdx = params[0].asInteger();
+ describeOutput("queryidx", "The index of the first term with the given "
+ "term index in the query term ordering. -1 if not found.");
+ return true;
+}
+
+search::fef::FeatureExecutor::LP
+TermInfoBlueprint::createExecutor(const search::fef::IQueryEnvironment &queryEnv) const
+{
+ feature_t queryIdx = -1.0;
+ if (queryEnv.getNumTerms() > _termIdx) {
+ queryIdx = _termIdx;
+ }
+ std::vector<feature_t> values;
+ values.push_back(queryIdx);
+ return search::fef::FeatureExecutor::LP(new ValueExecutor(values));
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/terminfofeature.h b/searchlib/src/vespa/searchlib/features/terminfofeature.h
new file mode 100644
index 00000000000..063277c9b1f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/terminfofeature.h
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace features {
+
+class TermInfoBlueprint : public search::fef::Blueprint
+{
+private:
+ uint32_t _termIdx;
+
+public:
+ TermInfoBlueprint();
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &indexEnv,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+ virtual search::fef::Blueprint::UP createInstance() const { return search::fef::Blueprint::UP(new TermInfoBlueprint()); }
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().number();
+ }
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &queryEnv) const;
+};
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/text_similarity_feature.cpp b/searchlib/src/vespa/searchlib/features/text_similarity_feature.cpp
new file mode 100644
index 00000000000..794e67560b4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/text_similarity_feature.cpp
@@ -0,0 +1,220 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.textsimilarity");
+#include "text_similarity_feature.h"
+
+namespace search {
+namespace features {
+
+namespace {
+
+struct Term {
+ search::fef::TermFieldHandle handle;
+ int weight;
+ int index;
+ Term(search::fef::TermFieldHandle handle_in, int weight_in, int index_in)
+ : handle(handle_in), weight(weight_in), index(index_in) {}
+};
+
+struct State {
+ uint32_t field_length;
+ uint32_t matched_terms;
+ int sum_term_weight;
+ uint32_t last_pos;
+ double sum_proximity_score;
+ uint32_t last_idx;
+ uint32_t num_in_order;
+
+ State(uint32_t length, uint32_t first_pos, int32_t first_weight, uint32_t first_idx)
+ : field_length(length),
+ matched_terms(1), sum_term_weight(first_weight),
+ last_pos(first_pos), sum_proximity_score(0.0),
+ last_idx(first_idx), num_in_order(0) {}
+
+ double proximity_score(uint32_t dist) {
+ return (dist > 8) ? 0 : (1.0 - (((dist-1)/8.0) * ((dist-1)/8.0)));
+ }
+
+ bool want_match(uint32_t pos) {
+ return (pos > last_pos);
+ }
+
+ void addMatch(uint32_t pos, int32_t weight, uint32_t idx) {
+ sum_proximity_score += proximity_score(pos - last_pos);
+ num_in_order += (idx > last_idx) ? 1 : 0;
+ last_pos = pos;
+ last_idx = idx;
+ ++matched_terms;
+ sum_term_weight += weight;
+ }
+
+ void calculateScore(size_t num_query_terms, int total_term_weight,
+ double &score_out,
+ double &proximity_out, double &order_out,
+ double &query_coverage_out, double &field_coverage_out)
+ {
+ double matches = std::min(field_length, matched_terms);
+ if (matches < 2) {
+ proximity_out = proximity_score(field_length);
+ order_out = (num_query_terms == 1) ? 1.0 : 0.0;
+ } else {
+ proximity_out = sum_proximity_score / (matches - 1);
+ order_out = num_in_order / (double) (matches - 1);
+ }
+ query_coverage_out = sum_term_weight / (double) total_term_weight;
+ field_coverage_out = matches / (double) field_length;
+ score_out = (0.35 * proximity_out) + (0.15 * order_out)
+ + (0.30 * query_coverage_out) + (0.20 * field_coverage_out);
+ }
+};
+
+} // namespace search::features::<unnamed>
+
+//-----------------------------------------------------------------------------
+
+TextSimilarityExecutor::TextSimilarityExecutor(const search::fef::IQueryEnvironment &env,
+ uint32_t field_id)
+ : _handles(),
+ _weights(),
+ _total_term_weight(0),
+ _queue()
+{
+ std::vector<Term> terms;
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ const search::fef::ITermData *termData = env.getTerm(i);
+ if (termData->getWeight().percent() != 0) { // only consider query terms with contribution
+ typedef search::fef::ITermFieldRangeAdapter FRA;
+ for (FRA iter(*termData); iter.valid(); iter.next()) {
+ const search::fef::ITermFieldData &tfd = iter.get();
+ if (tfd.getFieldId() == field_id) {
+ int term_weight = termData->getWeight().percent();
+ _total_term_weight += term_weight;
+ terms.push_back(Term(tfd.getHandle(), term_weight,
+ termData->getTermIndex()));
+ }
+ }
+ }
+ }
+ std::sort(terms.begin(), terms.end(), [](const Term &a, const Term &b){ return (a.index < b.index); });
+ _handles.reserve(terms.size());
+ _weights.reserve(terms.size());
+ for (size_t i = 0; i < terms.size(); ++i) {
+ _handles.push_back(terms[i].handle);
+ _weights.push_back(terms[i].weight);
+ }
+}
+
+void
+TextSimilarityExecutor::execute(search::fef::MatchData &data)
+{
+ for (size_t i = 0; i < _handles.size(); ++i) {
+ search::fef::TermFieldMatchData *tfmd = data.resolveTermField(_handles[i]);
+ if (tfmd->getDocId() == data.getDocId()) {
+ Item item(i, tfmd->begin(), tfmd->end());
+ if (item.pos != item.end) {
+ _queue.push(item);
+ }
+ }
+ }
+ if (_queue.empty()) {
+ *data.resolveFeature(outputs()[0]) = 0.0;
+ *data.resolveFeature(outputs()[1]) = 0.0;
+ *data.resolveFeature(outputs()[2]) = 0.0;
+ *data.resolveFeature(outputs()[3]) = 0.0;
+ *data.resolveFeature(outputs()[4]) = 0.0;
+ return;
+ }
+ const Item &first = _queue.front();
+ State state(first.pos->getElementLen(),
+ first.pos->getPosition(),
+ _weights[first.idx],
+ first.idx);
+ _queue.pop_front();
+ while (!_queue.empty()) {
+ Item &item = _queue.front();
+ if (state.want_match(item.pos->getPosition())) {
+ state.addMatch(item.pos->getPosition(),
+ _weights[item.idx],
+ item.idx);
+ _queue.pop_front();
+ } else {
+ ++item.pos;
+ if (item.pos == item.end) {
+ _queue.pop_front();
+ } else {
+ _queue.adjust();
+ }
+ }
+ }
+ state.calculateScore(_handles.size(), _total_term_weight,
+ *data.resolveFeature(outputs()[0]),
+ *data.resolveFeature(outputs()[1]),
+ *data.resolveFeature(outputs()[2]),
+ *data.resolveFeature(outputs()[3]),
+ *data.resolveFeature(outputs()[4]));
+}
+
+//-----------------------------------------------------------------------------
+
+const vespalib::string TextSimilarityBlueprint::score_output("score");
+const vespalib::string TextSimilarityBlueprint::proximity_output("proximity");
+const vespalib::string TextSimilarityBlueprint::order_output("order");
+const vespalib::string TextSimilarityBlueprint::query_coverage_output("queryCoverage");
+const vespalib::string TextSimilarityBlueprint::field_coverage_output("fieldCoverage");
+
+TextSimilarityBlueprint::TextSimilarityBlueprint()
+ : Blueprint("textSimilarity"), _field_id(fef::IllegalHandle) {}
+
+void
+TextSimilarityBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const
+{
+ for (uint32_t i = 0; i < env.getNumFields(); ++i) {
+ const search::fef::FieldInfo &field = *env.getField(i);
+ if (field.type() == search::fef::FieldType::INDEX) {
+ if (!field.isFilter() && field.collection() == fef::CollectionType::SINGLE) {
+ search::fef::FeatureNameBuilder fnb;
+ fnb.baseName(getBaseName()).parameter(field.name());
+ visitor.visitDumpFeature(fnb.output(score_output).buildName());
+ visitor.visitDumpFeature(fnb.output(proximity_output).buildName());
+ visitor.visitDumpFeature(fnb.output(order_output).buildName());
+ visitor.visitDumpFeature(fnb.output(query_coverage_output).buildName());
+ visitor.visitDumpFeature(fnb.output(field_coverage_output).buildName());
+ }
+ }
+ }
+}
+
+search::fef::Blueprint::UP
+TextSimilarityBlueprint::createInstance() const
+{
+ return Blueprint::UP(new TextSimilarityBlueprint());
+}
+
+bool
+TextSimilarityBlueprint::setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params)
+{
+ const search::fef::FieldInfo *field = params[0].asField();
+ _field_id = field->id();
+ describeOutput(score_output, "default normalized combination of other outputs");
+ describeOutput(proximity_output, "normalized match proximity score");
+ describeOutput(order_output, "normalized match order score");
+ describeOutput(query_coverage_output, "normalized query match coverage");
+ describeOutput(field_coverage_output, "normalized field match coverage");
+ env.hintFieldAccess(field->id());
+ return true;
+}
+
+search::fef::FeatureExecutor::LP
+TextSimilarityBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const
+{
+ return search::fef::FeatureExecutor::LP(new TextSimilarityExecutor(env, _field_id));
+}
+
+//-----------------------------------------------------------------------------
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/text_similarity_feature.h b/searchlib/src/vespa/searchlib/features/text_similarity_feature.h
new file mode 100644
index 00000000000..88969b13ac6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/text_similarity_feature.h
@@ -0,0 +1,75 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/vespalib/util/priority_queue.h>
+
+namespace search {
+namespace features {
+
+//-----------------------------------------------------------------------------
+
+class TextSimilarityExecutor : public search::fef::FeatureExecutor
+{
+private:
+ std::vector<fef::TermFieldHandle> _handles;
+ std::vector<int> _weights;
+ int _total_term_weight;
+
+ struct Item {
+ uint32_t idx;
+ search::fef::TermFieldMatchData::PositionsIterator pos;
+ search::fef::TermFieldMatchData::PositionsIterator end;
+ Item(uint32_t idx_in,
+ search::fef::TermFieldMatchData::PositionsIterator pos_in,
+ search::fef::TermFieldMatchData::PositionsIterator end_in)
+ : idx(idx_in), pos(pos_in), end(end_in) {}
+ bool operator<(const Item &other) const {
+ return (pos->getPosition() == other.pos->getPosition())
+ ? (idx < other.idx)
+ : (pos->getPosition() < other.pos->getPosition());
+ }
+ };
+
+ vespalib::PriorityQueue<Item> _queue;
+
+public:
+ TextSimilarityExecutor(const search::fef::IQueryEnvironment &env, uint32_t field_id);
+ virtual bool isPure() { return _handles.empty(); }
+ virtual void execute(search::fef::MatchData & data);
+};
+
+//-----------------------------------------------------------------------------
+
+class TextSimilarityBlueprint : public search::fef::Blueprint
+{
+private:
+ static const vespalib::string score_output;
+ static const vespalib::string proximity_output;
+ static const vespalib::string order_output;
+ static const vespalib::string query_coverage_output;
+ static const vespalib::string field_coverage_output;
+
+ uint32_t _field_id;
+
+public:
+ TextSimilarityBlueprint();
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env,
+ search::fef::IDumpFeatureVisitor &visitor) const;
+ virtual search::fef::Blueprint::UP createInstance() const;
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::SINGLE);
+ }
+ virtual bool setup(const search::fef::IIndexEnvironment &env,
+ const search::fef::ParameterList &params);
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const;
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/utils.cpp b/searchlib/src/vespa/searchlib/features/utils.cpp
new file mode 100644
index 00000000000..0f19a2b4e3c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/utils.cpp
@@ -0,0 +1,155 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.utils");
+#include "utils.h"
+#include <vespa/searchlib/fef/itablemanager.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+#include <cmath>
+#include <iostream>
+
+using namespace search::fef;
+
+namespace search {
+namespace features {
+namespace util {
+
+feature_t lookupConnectedness(const search::fef::IQueryEnvironment & env, uint32_t termId, feature_t fallback)
+{
+ if (termId == 0) {
+ return fallback; // no previous term
+ }
+
+ const ITermData * data = env.getTerm(termId);
+ const ITermData * prev = env.getTerm(termId - 1);
+ if (data == NULL || prev == NULL) {
+ return fallback; // default value
+ }
+ return lookupConnectedness(env, data->getUniqueId(), prev->getUniqueId(), fallback);
+}
+
+feature_t lookupConnectedness(const search::fef::IQueryEnvironment & env,
+ uint32_t currUniqueId, uint32_t prevUniqueId, feature_t fallback)
+{
+ // Connectedness of 0.5 between term with unique id 2 and term with unique id 1 is represented as:
+ // [vespa.term.2.connexity: "1", vespa.term.2.connexity: "0.5"]
+ vespalib::asciistream os;
+ os << "vespa.term." << currUniqueId << ".connexity";
+ Property p = env.getProperties().lookup(os.str());
+ if (p.size() == 2) {
+ // we have a defined connectedness with the previous term
+ if (strToNum<uint32_t>(p.getAt(0)) == prevUniqueId) {
+ return strToNum<feature_t>(p.getAt(1));
+ }
+ }
+ return fallback;
+}
+
+feature_t lookupSignificance(const search::fef::IQueryEnvironment & env, uint32_t termId, feature_t fallback)
+{
+ const ITermData * data = env.getTerm(termId);
+ if (data == NULL) {
+ return fallback;
+ }
+
+ // Significance of 0.5 for term with unique id 1 is represented as:
+ // [vespa.term.1.significance: "0.5"]
+ vespalib::asciistream os;
+ os << "vespa.term." << data->getUniqueId() << ".significance";
+ Property p = env.getProperties().lookup(os.str());
+ if (p.found()) {
+ return strToNum<feature_t>(p.get());
+ }
+
+ return fallback;
+}
+
+double getRobertsonSparckJonesWeight(double docCount, double docsInCorpus)
+{
+ return log((docsInCorpus - docCount + 0.5)/(docCount + 0.5));
+}
+
+static const double N = 1000000.0;
+
+feature_t getSignificance(double docFreq)
+{
+ if (docFreq < (1.0/N)) {
+ docFreq = 1.0/N;
+ }
+ if (docFreq > 1.0) {
+ docFreq = 1.0;
+ }
+ double d = log(docFreq)/log(1.0/N);
+ return 0.5 + 0.5 * d;
+#if 0
+ double n = docFreq * N;
+ n = (n == 0) ? 1 : (n > N ? N : n);
+ double a = getRobertsonSparckJonesWeight(1, N + 1);
+ double b = getRobertsonSparckJonesWeight(N + 1, N + 1);
+ double w = getRobertsonSparckJonesWeight(n, N + 1);
+ return ((w - b)/(a - b));
+#endif
+}
+
+feature_t getSignificance(const search::fef::ITermData &termData)
+{
+ typedef search::fef::ITermFieldRangeAdapter FRA;
+ double df = 0;
+ for (FRA iter(termData); iter.valid(); iter.next()) {
+ df = std::max(df, iter.get().getDocFreq());
+ }
+
+ feature_t signif = getSignificance(df);
+ LOG(debug, "getSignificance %e %f [ %e %f ] = %e", df, df, df * N, df * N, signif);
+ return signif;
+}
+
+const search::fef::Table *
+lookupTable(const search::fef::IIndexEnvironment & env, const vespalib::string & featureName,
+ const vespalib::string & table, const vespalib::string & fieldName, const vespalib::string & fallback)
+{
+ vespalib::string tn1 = env.getProperties().lookup(featureName, table).get(fallback);
+ vespalib::string tn2 = env.getProperties().lookup(featureName, table, fieldName).get(tn1);
+ const search::fef::Table * retval = env.getTableManager().getTable(tn2);
+ if (retval == NULL) {
+ LOG(warning, "Could not find the %s '%s' to be used for field '%s' in feature '%s'",
+ table.c_str(), tn2.c_str(), fieldName.c_str(), featureName.c_str());
+ }
+ return retval;
+}
+
+const search::fef::ITermData *
+getTermByLabel(const search::fef::IQueryEnvironment &env, const vespalib::string &label)
+{
+ // Labeling the query item with unique id '5' with the label 'foo'
+ // is represented as: [vespa.label.foo.id: "5"]
+ vespalib::asciistream os;
+ os << "vespa.label." << label << ".id";
+ Property p = env.getProperties().lookup(os.str());
+ if (!p.found()) {
+ return 0;
+ }
+ uint32_t uid = strToNum<uint32_t>(p.get());
+ if (uid == 0) {
+ LOG(warning, "Query label '%s' was attached to invalid unique id: '%s'",
+ label.c_str(), p.get().c_str());
+ return 0;
+ }
+ for (uint32_t i(0), m(env.getNumTerms()); i < m; ++i) {
+ const ITermData *term = env.getTerm(i);
+ if (term->getUniqueId() == uid) {
+ return term;
+ }
+ }
+ LOG(warning, "Query label '%s' was attached to non-existing unique id: '%s'",
+ label.c_str(), p.get().c_str());
+ return 0;
+}
+
+} // namespace util
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/utils.h b/searchlib/src/vespa/searchlib/features/utils.h
new file mode 100644
index 00000000000..bc830aaa9d3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/utils.h
@@ -0,0 +1,234 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <limits>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <string>
+#include <vespa/searchlib/fef/iqueryenvironment.h>
+#include <vespa/searchlib/fef/table.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/searchlib/fef/itermfielddata.h>
+#include <vespa/searchlib/common/feature.h>
+#include <vespa/vespalib/util/string_hash.h>
+
+namespace search {
+namespace features {
+namespace util {
+
+/**
+ * Maximum feature value
+ */
+const feature_t FEATURE_MAX = std::numeric_limits<feature_t>::max();
+
+/**
+ * Minimum feature value
+ */
+const feature_t FEATURE_MIN = -std::numeric_limits<feature_t>::max();
+
+typedef const char * ConstCharPtr;
+
+/**
+ * Converts the given string to a numeric value.
+ *
+ * @param str The string to convert.
+ * @return The numeric value.
+ */
+template <typename T>
+T strToNum(const vespalib::stringref &str)
+{
+ vespalib::asciistream iss(str);
+ T retval = 0;
+ try {
+ iss >> retval;
+ } catch (const vespalib::IllegalArgumentException &) {
+ }
+ return retval;
+}
+
+template <typename T>
+feature_t getAsFeature(const T &value) __attribute__((__always_inline__));
+
+/**
+ * Converts the given value to a feature value.
+ *
+ * @param value The value to convert.
+ * @return The feature value.
+ */
+template <typename T>
+inline feature_t getAsFeature(const T &value)
+{
+ return static_cast<feature_t>(value);
+}
+
+/**
+ * Specialization for const char *.
+ *
+ * @param value The string to convert.
+ * @return The feature value.
+ */
+template <>
+inline feature_t getAsFeature<ConstCharPtr>(const ConstCharPtr & value) {
+ return static_cast<feature_t>(vespalib::hash_code(value, strlen(value)));
+}
+
+/**
+ * Specialization for a string value.
+ *
+ * @param value The string to convert.
+ * @return The feature value.
+ */
+template <>
+inline feature_t getAsFeature<vespalib::string>(const vespalib::string & value) {
+ return static_cast<feature_t>(vespalib::hash_code(value));
+}
+
+/**
+ * Specialization for a string value.
+ *
+ * @param value The string to convert.
+ * @return The feature value.
+ */
+template <>
+inline feature_t getAsFeature<vespalib::stringref>(const vespalib::stringref & value) {
+ return static_cast<feature_t>(vespalib::hash_code(value));
+}
+
+
+/**
+ * This method inputs a value to cap to the range [capFloor, capCeil] and then normalize this
+ * value to the unit range [0, 1].
+ *
+ * @param val The value to unit normalize.
+ * @param capFloor The minimum value of the cap range.
+ * @param capCeil The maximum value of the cap range.
+ * @return The unit normalized value.
+ */
+template <typename T>
+T unitNormalize(const T &val, const T &capFloor, const T &capCeil)
+{
+ return (std::max(capFloor, std::min(capCeil, val)) - capFloor) / (capCeil - capFloor);
+}
+
+/**
+ * Returns the normalized strength with which the given term is connected to the previous term in the query.
+ * Uses the property map of the query environment to lookup this data.
+ *
+ * @param env The query environment.
+ * @param termId The term id.
+ * @param fallback The value to return if the connectedness was not found in the property map.
+ * @return The connectedness.
+ */
+feature_t lookupConnectedness(const search::fef::IQueryEnvironment & env, uint32_t termId, feature_t fallback = 0.1f);
+
+/**
+ * Returns the normalized strength with which the given current term is connected to the given previous term.
+ * Uses the property map of the query environment to lookup this data.
+ *
+ * @param env The query environment.
+ * @param currUniqueId Unique id of the current term.
+ * @param prevUniqueId Unique id of the previous term.
+ * @param fallback The value to return if the connectedness was not found in the property map.
+ * @return The connectedness between the current term and previous term.
+ */
+feature_t lookupConnectedness(const search::fef::IQueryEnvironment & env,
+ uint32_t currUniqueId, uint32_t prevUniqueId, feature_t fallback = 0.1f);
+
+/**
+ * Returns the significance of the given term.
+ * Uses the property map of the query environment to lookup this data.
+ *
+ * @param env The query environment.
+ * @param termId The term id.
+ * @param fallback The value to return if the significance was not found in the property map.
+ * @return The significance.
+ */
+feature_t lookupSignificance(const search::fef::IQueryEnvironment & env, uint32_t termId, feature_t fallback = 0.0f);
+
+/**
+ * Returns the Robertson-Sparck-Jones weight based on the given document count
+ * (number of documents containing the term) and the number of documents in the corpus.
+ * This weight is a variant of inverse document frequency.
+ */
+double getRobertsonSparckJonesWeight(double docCount, double docsInCorpus);
+
+/**
+ * Returns the significance based on the given scaled number of documents containing the term.
+ *
+ * @param docFreq The scaled number of documents containing the term.
+ * @return The significance.
+ */
+feature_t getSignificance(double docFreq);
+
+/**
+ * Returns the significance based on max known frequency of the term
+ *
+ * @param termData Data for the term
+ * @return The significance.
+ */
+feature_t getSignificance(const search::fef::ITermData &termData);
+
+/**
+ * Lookups a table by using the properties and the table manager in the given index environment.
+ * The table name is found by looking up the following properties and using the first found:
+ * 'featureName.table.fieldName', 'featureName.table'.
+ * The table name 'fallback' is used if no properties are found.
+ *
+ * @param env the index environment.
+ * @param featureName the name of the feature.
+ * @param table the table to be used by the feature.
+ * @param fieldName the name of the field we want to lookup a table for.
+ * @param fallback the actual name of the table to use if we do not find any properties.
+ * @return the table pointer or NULL if not found.
+ **/
+const search::fef::Table *
+lookupTable(const search::fef::IIndexEnvironment & env, const vespalib::string & featureName,
+ const vespalib::string & table, const vespalib::string & fieldName, const vespalib::string & fallback);
+
+/**
+ * Obtain query information for a term/field combination.
+ *
+ * @return query information for a term/field combination, or 0 if not found
+ * @param env query environment
+ * @param termId the term id
+ * @param fieldId the field id
+ **/
+inline const search::fef::ITermFieldData *
+getTermFieldData(const search::fef::IQueryEnvironment &env, uint32_t termId, uint32_t fieldId) {
+ const search::fef::ITermData *td = env.getTerm(termId);
+ return (td == 0) ? 0 : td->lookupField(fieldId);
+}
+
+/**
+ * Obtain the match handle for the given term within the given field.
+ *
+ * @return match handle, or IllegalHandle if not found
+ * @param env query environment
+ * @param termId the term id
+ * @param fieldId the field id
+ **/
+inline search::fef::TermFieldHandle
+getTermFieldHandle(const search::fef::IQueryEnvironment &env, uint32_t termId, uint32_t fieldId) {
+ const search::fef::ITermFieldData *tfd = getTermFieldData(env, termId, fieldId);
+ return (tfd == 0) ? search::fef::IllegalHandle : tfd->getHandle();
+}
+
+/**
+ * Obtain the term annotated with the given label. This function will
+ * reverse map label to unique id and then traverse the query
+ * environment trying to locate the term with the appropriate unique
+ * id. If no such term can be found, 0 will be returned.
+ *
+ * @return term with given label, or 0 if not found
+ * @param env query environment
+ * @param label query item label
+ **/
+const search::fef::ITermData *
+getTermByLabel(const search::fef::IQueryEnvironment &env, const vespalib::string &label);
+
+} // namespace util
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/valuefeature.cpp b/searchlib/src/vespa/searchlib/features/valuefeature.cpp
new file mode 100644
index 00000000000..1a1a202744b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/valuefeature.cpp
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.value");
+#include "valuefeature.h"
+
+#include <sstream>
+
+namespace search {
+namespace features {
+
+ValueExecutor::ValueExecutor(const std::vector<feature_t> & values) :
+ search::fef::FeatureExecutor(),
+ _values(values)
+{
+ // empty
+}
+
+void
+ValueExecutor::execute(search::fef::MatchData & data)
+{
+ for (uint32_t i = 0; i < _values.size(); ++i) {
+ *data.resolveFeature(outputs()[i]) = _values[i];
+ }
+}
+
+void
+SingleZeroValueExecutor::execute(search::fef::MatchData & data)
+{
+ *data.resolveFeature(outputs()[0]) = 0.0;
+}
+
+ValueBlueprint::ValueBlueprint() :
+ search::fef::Blueprint("value"),
+ _values()
+{
+ // empty
+}
+
+void
+ValueBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const
+{
+ // empty
+}
+
+bool
+ValueBlueprint::setup(const search::fef::IIndexEnvironment &,
+ const search::fef::ParameterList & params)
+{
+ for (uint32_t i = 0; i < params.size(); ++i) {
+ _values.push_back(params[i].asDouble());
+ std::ostringstream name;
+ name << i;
+ std::ostringstream desc;
+ desc << "value " << i;
+ describeOutput(name.str(), desc.str());
+ // we have no inputs
+ }
+ return true;
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/valuefeature.h b/searchlib/src/vespa/searchlib/features/valuefeature.h
new file mode 100644
index 00000000000..c1d462b645c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/valuefeature.h
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/common/feature.h>
+
+namespace search {
+namespace features {
+
+class ValueExecutor : public search::fef::FeatureExecutor
+{
+private:
+ std::vector<feature_t> _values;
+
+public:
+ ValueExecutor(const std::vector<feature_t> & values);
+ virtual bool isPure() { return true; }
+ virtual void execute(search::fef::MatchData & data);
+ const std::vector<feature_t> & getValues() const { return _values; }
+};
+
+class SingleZeroValueExecutor : public search::fef::FeatureExecutor
+{
+public:
+ SingleZeroValueExecutor() : FeatureExecutor() {}
+ virtual bool isPure() { return true; }
+ virtual void execute(search::fef::MatchData & data);
+};
+
+
+class ValueBlueprint : public search::fef::Blueprint
+{
+private:
+ std::vector<feature_t> _values;
+
+public:
+ ValueBlueprint();
+
+ virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & indexEnv,
+ search::fef::IDumpFeatureVisitor & visitor) const;
+ virtual search::fef::Blueprint::UP createInstance() const { return Blueprint::UP(new ValueBlueprint()); }
+ virtual search::fef::ParameterDescriptions getDescriptions() const {
+ return search::fef::ParameterDescriptions().desc().number().number().repeat();
+ }
+ virtual bool setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params);
+ virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & queryEnv) const {
+ (void) queryEnv;
+ return search::fef::FeatureExecutor::LP(new ValueExecutor(_values));
+ }
+};
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/weighted_set_parser.cpp b/searchlib/src/vespa/searchlib/features/weighted_set_parser.cpp
new file mode 100644
index 00000000000..3d2f03d54eb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/weighted_set_parser.cpp
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".features.weighted_set_parser");
+
+#include "weighted_set_parser.h"
+
+namespace search {
+namespace features {
+
+void
+WeightedSetParser::logWarning(const vespalib::string &msg)
+{
+ LOG(warning, "%s", msg.c_str());
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/weighted_set_parser.h b/searchlib/src/vespa/searchlib/features/weighted_set_parser.h
new file mode 100644
index 00000000000..929b1356a08
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/weighted_set_parser.h
@@ -0,0 +1,28 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Utility for parsing a string representation of a weighted set
+ * that is typically passed down with the query.
+ *
+ * The format of the weighted set is as follows:
+ * {key1:weight1,key2:weight2,...,keyN:weightN}.
+ */
+class WeightedSetParser
+{
+private:
+ static void logWarning(const vespalib::string &msg);
+
+public:
+ template <typename OutputType>
+ static void parse(const vespalib::string &input, OutputType &output);
+};
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/weighted_set_parser.hpp b/searchlib/src/vespa/searchlib/features/weighted_set_parser.hpp
new file mode 100644
index 00000000000..cbc67d411d3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/weighted_set_parser.hpp
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "weighted_set_parser.h"
+#include <vespa/vespalib/util/stringfmt.h>
+
+namespace search {
+namespace features {
+
+template <typename OutputType>
+void
+WeightedSetParser::parse(const vespalib::string &input, OutputType &output)
+{
+ size_t len = input.size();
+ // Note that we still handle '(' and ')' for backward compatibility.
+ if (len >= 2 && ((input[0] == '{' && input[len - 1] == '}') ||
+ (input[0] == '(' && input[len - 1] == ')')) ) {
+ vespalib::stringref s(input.c_str()+1, len - 2);
+ while ( ! s.empty() ) {
+ vespalib::string::size_type commaPos(s.find(','));
+ vespalib::stringref item(s.substr(0, commaPos));
+ vespalib::string::size_type colonPos(item.find(':'));
+ if (colonPos != vespalib::string::npos) {
+ vespalib::string tmpKey(item.substr(0, colonPos));
+ vespalib::string::size_type start(tmpKey.find_first_not_of(' '));
+ vespalib::stringref key(tmpKey.c_str() + start, colonPos - start);
+ vespalib::stringref value(item.substr(colonPos+1));
+ output.insert(key, value);
+ } else {
+ logWarning(vespalib::make_string(
+ "Could not parse item '%s' in input string '%s', skipping. "
+ "Expected ':' between key and weight.", item.c_str(), input.c_str()));
+ }
+ if (commaPos != vespalib::string::npos) {
+ s = s.substr(commaPos+1);
+ } else {
+ s = vespalib::stringref();
+ }
+ }
+ } else {
+ logWarning(vespalib::make_string("Could not parse input string '%s'. "
+ "Expected surrounding '(' and ')' or '{' and '}'.", input.c_str()));
+ }
+}
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/.gitignore b/searchlib/src/vespa/searchlib/fef/.gitignore
new file mode 100644
index 00000000000..38092de6898
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/.gitignore
@@ -0,0 +1,4 @@
+*.So
+.depend
+Makefile
+html
diff --git a/searchlib/src/vespa/searchlib/fef/CMakeLists.txt b/searchlib/src/vespa/searchlib/fef/CMakeLists.txt
new file mode 100644
index 00000000000..0004779ed91
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/CMakeLists.txt
@@ -0,0 +1,44 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_fef OBJECT
+ SOURCES
+ blueprint.cpp
+ blueprintfactory.cpp
+ blueprintresolver.cpp
+ collection_type.cpp
+ feature_type.cpp
+ featureexecutor.cpp
+ featurenamebuilder.cpp
+ featurenameparser.cpp
+ featureoverrider.cpp
+ fef.cpp
+ fieldinfo.cpp
+ fieldpositionsiterator.cpp
+ fieldtype.cpp
+ filetablefactory.cpp
+ functiontablefactory.cpp
+ indexproperties.cpp
+ location.cpp
+ matchdata.cpp
+ matchdatalayout.cpp
+ objectstore.cpp
+ parameter.cpp
+ parameterdescriptions.cpp
+ parametervalidator.cpp
+ phrasesplitter.cpp
+ properties.cpp
+ queryproperties.cpp
+ rank_program.cpp
+ ranksetup.cpp
+ simpletermdata.cpp
+ simpletermfielddata.cpp
+ sumexecutor.cpp
+ symmetrictable.cpp
+ table.cpp
+ tablemanager.cpp
+ termfieldmatchdata.cpp
+ termfieldmatchdataposition.cpp
+ termmatchdatamerger.cpp
+ utils.cpp
+ verify_feature.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/fef/Doxyfile b/searchlib/src/vespa/searchlib/fef/Doxyfile
new file mode 100644
index 00000000000..9c3496fc985
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/Doxyfile
@@ -0,0 +1,1162 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+# Doxyfile 1.3.9.1
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+# TAG = value [value, ...]
+# For lists items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME = "Feature Execution Framework"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY =
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of source
+# files, where putting all generated files in the same directory would otherwise
+# cause performance problems for the file system.
+
+CREATE_SUBDIRS = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish,
+# Dutch, Finnish, French, German, Greek, Hungarian, Italian, Japanese,
+# Japanese-en (Japanese with English messages), Korean, Korean-en, Norwegian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish,
+# Swedish, and Ukrainian.
+
+OUTPUT_LANGUAGE = English
+
+# This tag can be used to specify the encoding used in the generated output.
+# The encoding is not always determined by the language that is chosen,
+# but also whether or not the output is meant for Windows or non-Windows users.
+# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES
+# forces the Windows encoding (this is the default for the Windows binary),
+# whereas setting the tag to NO uses a Unix-style encoding (the default for
+# all platforms other than Windows).
+
+USE_WINDOWS_ENCODING = NO
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is used
+# as the annotated text. Otherwise, the brief description is used as-is. If left
+# blank, the following values are used ("$name" is automatically replaced with the
+# name of the entity): "The $name class" "The $name widget" "The $name file"
+# "is" "provides" "specifies" "contains" "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all inherited
+# members of a class in the documentation of that class as if those members were
+# ordinary class members. Constructors, destructors and assignment operators of
+# the base classes will not be shown.
+
+INLINE_INHERITED_MEMB = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like the Qt-style comments (thus requiring an
+# explicit @brief command for a brief description.
+
+JAVADOC_AUTOBRIEF = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the DETAILS_AT_TOP tag is set to YES then Doxygen
+# will output the detailed description near the top, like JavaDoc.
+# If set to NO, the detailed description appears after the member
+# documentation.
+
+DETAILS_AT_TOP = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE = 8
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java sources
+# only. Doxygen will then generate output that is more tailored for Java.
+# For instance, namespaces will be presented as packages, qualified scopes
+# will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING = YES
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation.
+
+SHOW_DIRECTORIES = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR = YES
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text.
+
+WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT = .
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx *.hpp
+# *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm
+
+FILE_PATTERNS =
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE =
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or directories
+# that are symbolic links (a Unix filesystem feature) are excluded from the input.
+
+EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+
+EXCLUDE_PATTERNS =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output. If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
+# is applied to all files.
+
+FILTER_PATTERNS =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES (the default)
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES (the default)
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION = YES
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
+HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET =
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS = YES
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compressed HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND = NO
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE = 4
+
+# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be
+# generated containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+,
+# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are
+# probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH = 250
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS = NO
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX = NO
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader. This is useful
+# if you want to understand what is going on. On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_PREDEFINED tags.
+
+EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED = IAM_DOXYGEN
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse the
+# parser if not removed.
+
+SKIP_FUNCTION_MACROS = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base or
+# super classes. Setting the tag to NO turns the diagrams off. Note that this
+# option is superseded by the HAVE_DOT option below. This is only a fallback. It is
+# recommended to install and use dot, since it yields more powerful graphs.
+
+CLASS_DIAGRAMS = YES
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT = YES
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH = YES
+
+# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will
+# generate a call dependency graph for every global function or class method.
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command.
+
+CALL_GRAPH = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found on the path.
+
+DOT_PATH =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS =
+
+# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
+MAX_DOT_GRAPH_WIDTH = 1024
+
+# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
+MAX_DOT_GRAPH_HEIGHT = 1024
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes that
+# lay further from the root node will be omitted. Note that setting this option to
+# 1 or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that a graph may be further truncated if the graph's image dimensions are
+# not sufficient to fit the graph (see MAX_DOT_GRAPH_WIDTH and MAX_DOT_GRAPH_HEIGHT).
+# If 0 is used for the depth value (the default), the graph is not depth-constrained.
+
+MAX_DOT_GRAPH_DEPTH = 0
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to the search engine
+#---------------------------------------------------------------------------
+
+# The SEARCHENGINE tag specifies whether or not a search engine should be
+# used. If set to NO the values of all tags below this one will be ignored.
+
+SEARCHENGINE = NO
diff --git a/searchlib/src/vespa/searchlib/fef/OWNERS b/searchlib/src/vespa/searchlib/fef/OWNERS
new file mode 100644
index 00000000000..12b533ec610
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/OWNERS
@@ -0,0 +1 @@
+havardpe
diff --git a/searchlib/src/vespa/searchlib/fef/blueprint.cpp b/searchlib/src/vespa/searchlib/fef/blueprint.cpp
new file mode 100644
index 00000000000..c9a9b94eb44
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/blueprint.cpp
@@ -0,0 +1,76 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.blueprint");
+#include "blueprint.h"
+#include "parametervalidator.h"
+
+namespace search {
+namespace fef {
+
+const FeatureType &
+Blueprint::defineInput(const vespalib::stringref &inName,
+ AcceptInput accept)
+{
+ assert(_dependency_handler != nullptr);
+ return _dependency_handler->resolve_input(inName, accept);
+}
+
+void
+Blueprint::describeOutput(const vespalib::stringref &outName,
+ const vespalib::stringref &desc,
+ const FeatureType &type)
+{
+ (void) desc;
+ assert(_dependency_handler != nullptr);
+ _dependency_handler->define_output(outName, type);
+}
+
+Blueprint::Blueprint(const vespalib::stringref & baseName)
+ : _baseName(baseName),
+ _name(),
+ _dependency_handler(nullptr)
+{
+}
+
+Blueprint::~Blueprint()
+{
+}
+
+ParameterDescriptions
+Blueprint::getDescriptions() const
+{
+ // desc: 0-n parameters
+ return ParameterDescriptions().desc().string().repeat();
+}
+
+bool
+Blueprint::setup(const IIndexEnvironment &indexEnv,
+ const StringVector &params)
+{
+ ParameterDescriptions descs = getDescriptions();
+ ParameterValidator validator(indexEnv, params, descs);
+ ParameterValidator::Result result = validator.validate();
+ if (result.valid()) {
+ return setup(indexEnv, result.getParameters());
+ } else {
+ LOG(error, "The parameter list used for setting up rank feature %s is not valid: %s",
+ getBaseName().c_str(), result.getError().c_str());
+ return false;
+ }
+}
+
+bool
+Blueprint::setup(const IIndexEnvironment &indexEnv,
+ const ParameterList &params)
+{
+ (void) indexEnv; (void) params;
+ LOG(error, "The setup function using a typed parameter list does not have a default implementation. "
+ "Make sure the setup function is implemented in the rank feature %s.", getBaseName().c_str());
+ return false;
+}
+
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/blueprint.h b/searchlib/src/vespa/searchlib/fef/blueprint.h
new file mode 100644
index 00000000000..9b7a7c541a2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/blueprint.h
@@ -0,0 +1,252 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <algorithm>
+#include <vector>
+#include "featureexecutor.h"
+#include "iindexenvironment.h"
+#include "iqueryenvironment.h"
+#include "idumpfeaturevisitor.h"
+#include "parameter.h"
+#include "parameterdescriptions.h"
+#include "feature_type.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * A blueprint is a description of a named feature executor with a
+ * given set of parameters that also acts as a factory for that
+ * feature executor. During setup, the blueprint will look at the
+ * parameters and generate a list of input feature names and also name
+ * and describe its outputs. A blueprint will be created per rank
+ * setup and used to create feature executors per query. A single
+ * instance is used as a prototype to create actual blueprints used by
+ * the framework. The prototype instance will also get a chance to
+ * name features that should be dumped when doing a full feature dump
+ * (feature dumps are used for things like MLR training). It will be
+ * possible to define additional dump features in the config.
+ **/
+class Blueprint
+{
+public:
+ /**
+ * A feature can be either a number (double) or an object
+ * (vespalib::eval::Value::CREF). This enum is used to describe
+ * the accepted type for a specific input to a feature executor.
+ **/
+ enum class AcceptInput { NUMBER, OBJECT, ANY };
+
+ /**
+ * Interface used to set up feature dependencies recursively. This
+ * is needed to know the exact type of an input feature during
+ * executor setup.
+ **/
+ struct DependencyHandler {
+ virtual const FeatureType &resolve_input(const vespalib::string &feature_name, AcceptInput accept_type) = 0;
+ virtual void define_output(const vespalib::string &output_name, const FeatureType &type) = 0;
+ virtual ~DependencyHandler() {}
+ };
+
+ /**
+ * Convenience typedef for an auto pointer to this class.
+ **/
+ typedef std::unique_ptr<Blueprint> UP;
+
+ /**
+ * Convenience typedef for an shared pointer to this class.
+ **/
+ typedef std::shared_ptr<Blueprint> SP;
+
+ typedef vespalib::string string;
+ typedef std::vector<string> StringVector;
+
+private:
+ Blueprint(const Blueprint &);
+ Blueprint &operator=(const Blueprint &);
+
+ string _baseName;
+ string _name;
+ DependencyHandler *_dependency_handler;
+
+protected:
+ /**
+ * Define an input feature for this blueprint. This method should
+ * be invoked by the @ref setup method. Note that the order in
+ * which the inputs are defined is extremely important, since this
+ * must exactly match the input order of the corresponding feature
+ * executor. Note that inputs must be addressed with full feature
+ * names, for example 'foo(a,b).out'.
+ *
+ * @param inName feature name of input
+ * @param type accepted input type
+ **/
+ const FeatureType &defineInput(const vespalib::stringref &inName,
+ AcceptInput accept = AcceptInput::NUMBER);
+
+ /**
+ * Describe an output for this blueprint. This method should be
+ * invoked by the @ref setup method. Note that the order in which
+ * the outputs are described is extremely important, since this
+ * must exactly match the output order of the corresponding
+ * feature executor. Note that the output name is local to this
+ * blueprint. As an example, the blueprint 'foo(a,b)' having the
+ * feature 'foo(a,b).out' as output, would describe it simply as
+ * 'out'.
+ *
+ * @param outName output name
+ * @param desc output description
+ **/
+ void describeOutput(const vespalib::stringref &outName, const vespalib::stringref &desc,
+ const FeatureType &type = FeatureType::number());
+
+public:
+ /**
+ * Create an empty blueprint. Blueprints in their initial state
+ * are used as prototypes to create other instances of the same
+ * class. The @ref setup method is used to tailor a blueprint
+ * object for a specific set of parameters.
+ **/
+ Blueprint(const vespalib::stringref & baseName);
+
+ /**
+ * Obtain the base name of this blueprint. This method will
+ * typically only be invoked on the prototype object. The given
+ * name is the base name of all feature executors that will be
+ * indirectly created with this blueprint.
+ *
+ * An example scenario: A blueprint prototype is added with the
+ * base name 'foo'. If the framework needs to calculate the feature
+ * 'foo(a,b).out' it will first use the 'foo' prototype to create
+ * a new instance of the appropriate class. The name of the newly
+ * created blueprint will be set to 'foo(a,b)' and the setup
+ * method will be invoked with 'a' and 'b' as parameters. After
+ * inspecting the output names to find out which output has the
+ * name 'out', the blueprint can be used to create a feature
+ * executor that can perform the actual calculation of the
+ * feature.
+ *
+ * @return blueprint base name
+ **/
+ const vespalib::string & getBaseName() const { return _baseName; }
+
+ /**
+ * This method may indicate which features that should be dumped
+ * during a full feature dump by naming them to the given
+ * visitor. The index environment is also given, since it may
+ * affect the choice of which features to dump. Note that any
+ * feature names can be given, but politeness indicate that only
+ * those calculated by feature executors created through this
+ * class should be given. Also note that naming non-existing
+ * features here will break feature dumping.
+ *
+ * @param indexEnv the index environment
+ * @param visitor the object visiting dump features
+ **/
+ virtual void visitDumpFeatures(const IIndexEnvironment &indexEnv,
+ IDumpFeatureVisitor &visitor) const = 0;
+
+ /**
+ * Create another instance of this class. This must be implemented
+ * by all the leafs in the inheritance hierarchy. (ref prototype
+ * pattern)
+ *
+ * @return a new instance of this class (wrapped in an auto pointer)
+ **/
+ virtual UP createInstance() const = 0;
+
+ /**
+ * Set the name of this blueprint. This is the full name including
+ * parameters. If the base name of a feature executor is 'foo' and
+ * we are going to set up a blueprint for this executor with the
+ * parameters 'a' and 'b', the name of this blueprint will be
+ * 'foo(a,b)'. This method will be invoked by the framework right
+ * before invoking the @ref setup method (and must not be invoked
+ * by others).
+ **/
+ void setName(const vespalib::stringref &name) { _name = name; }
+
+ /**
+ * Obtain the name of this blueprint.
+ *
+ * @return blueprint name
+ **/
+ const string &getName() const { return _name; }
+
+ /**
+ * Returns the parameter descriptions for this blueprint.
+ * The default implementation will return a description accepting all parameter lists.
+ *
+ * @return the parameter descriptions.
+ **/
+ virtual ParameterDescriptions getDescriptions() const;
+
+ void attach_dependency_handler(DependencyHandler &dependency_handler) {
+ _dependency_handler = &dependency_handler;
+ }
+
+ void detach_dependency_handler() {
+ _dependency_handler = nullptr;
+ }
+
+ /**
+ * Tailor this blueprint for the given set of parameters. The
+ * implementation of this method should use the @ref defineInput
+ * and @ref describeOutput methods.
+ *
+ * The default implementation of this function will validate
+ * the parameters based on the parameter descriptions for this
+ * blueprint, convert them to a parameter list, and call the
+ * other setup function.
+ *
+ * @return false if the parameters does not make sense for this
+ * blueprint (aka setup failed)
+ * @param indexEnv the index environment
+ * @param params the parameters as simple strings
+ **/
+ virtual bool setup(const IIndexEnvironment &indexEnv,
+ const StringVector &params);
+
+ /**
+ * Setups this blueprint for the given set of parameters. The
+ * implementation of this method should use the @ref defineInput
+ * and @ref describeOutput methods.
+ *
+ * @return false if the parameters does not make sense for this
+ * blueprint (aka setup failed)
+ * @param indexEnv the index environment.
+ * @param params the parameters as a list of actual parameters.
+ **/
+ virtual bool setup(const IIndexEnvironment &indexEnv,
+ const ParameterList &params);
+
+ /**
+ * Here you can do some preprocessing. State must be stored in the IObjectStore.
+ * This is called before creating multiple execution threads.
+ * @param queryEnv The query environment.
+ */
+ virtual void prepareSharedState(const IQueryEnvironment & queryEnv, IObjectStore & objectStore) const {
+ (void) queryEnv;
+ (void) objectStore;
+ }
+
+ /**
+ * Create a feature executor based on this blueprint. Failure to
+ * initialize a feature executor for this blueprint may be
+ * signaled by returning a shared pointer to 0.
+ *
+ * @return feature executor wrapped in a shared pointer
+ * @param queryEnv query environment
+ **/
+ virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment &queryEnv) const = 0;
+
+ /**
+ * Virtual destructor to allow safe subclassing.
+ **/
+ virtual ~Blueprint();
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/blueprintfactory.cpp b/searchlib/src/vespa/searchlib/fef/blueprintfactory.cpp
new file mode 100644
index 00000000000..af03194abb7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/blueprintfactory.cpp
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.blueprintfactory");
+#include "blueprintfactory.h"
+
+namespace search {
+namespace fef {
+
+BlueprintFactory::BlueprintFactory()
+ : _blueprintMap()
+{
+}
+
+void
+BlueprintFactory::addPrototype(Blueprint::SP proto)
+{
+ vespalib::string name = proto->getBaseName();
+ if (_blueprintMap.find(name) != _blueprintMap.end()) {
+ LOG(warning, "Blueprint prototype overwritten: %s", name.c_str());
+ }
+ _blueprintMap[name] = proto;
+}
+
+void
+BlueprintFactory::visitDumpFeatures(const IIndexEnvironment &indexEnv,
+ IDumpFeatureVisitor &visitor) const
+{
+ BlueprintMap::const_iterator itr = _blueprintMap.begin();
+ BlueprintMap::const_iterator end = _blueprintMap.end();
+ for (; itr != end; ++itr) {
+ itr->second->visitDumpFeatures(indexEnv, visitor);
+ }
+}
+
+Blueprint::SP
+BlueprintFactory::createBlueprint(const vespalib::string &name) const
+{
+ BlueprintMap::const_iterator itr = _blueprintMap.find(name);
+ if (itr == _blueprintMap.end()) {
+ return Blueprint::SP();
+ }
+ Blueprint::UP bp = itr->second->createInstance();
+ return Blueprint::SP(bp.release());
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/blueprintfactory.h b/searchlib/src/vespa/searchlib/fef/blueprintfactory.h
new file mode 100644
index 00000000000..8d9924d67a8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/blueprintfactory.h
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <set>
+#include <map>
+#include "blueprint.h"
+#include "iblueprintregistry.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * This class implements the blueprint repository interface and acts
+ * as a blueprint factory for the framework itself.
+ **/
+class BlueprintFactory : public IBlueprintRegistry
+{
+private:
+ BlueprintFactory(const BlueprintFactory &);
+ BlueprintFactory &operator=(const BlueprintFactory &);
+
+ typedef std::map<vespalib::string, Blueprint::SP> BlueprintMap;
+
+ BlueprintMap _blueprintMap;
+
+public:
+ /**
+ * Create an empty factory.
+ **/
+ BlueprintFactory();
+
+ // inherit doc
+ virtual void addPrototype(Blueprint::SP proto);
+
+ /**
+ * This method will visit features to be dumped by forwarding the
+ * visiting request to each of the prototypes registered in this
+ * factory.
+ *
+ * @param indexEnv the index environment
+ * @param visitor the object visiting dump features
+ **/
+ void visitDumpFeatures(const IIndexEnvironment &indexEnv,
+ IDumpFeatureVisitor &visitor) const;
+
+ /**
+ * Create a new blueprint instance by using the appropriate
+ * prototype contained in this factory. The name given is the
+ * feature executor base name (the same one used in the @ref
+ * addPrototype method)
+ *
+ * @return fresh and clean blueprint of the appropriate class
+ * @param name feature executor base name
+ **/
+ Blueprint::SP createBlueprint(const vespalib::string &name) const;
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/blueprintresolver.cpp b/searchlib/src/vespa/searchlib/fef/blueprintresolver.cpp
new file mode 100644
index 00000000000..505d7c102ce
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/blueprintresolver.cpp
@@ -0,0 +1,227 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.blueprintresolver");
+#include "blueprintresolver.h"
+#include "blueprintfactory.h"
+#include "featurenameparser.h"
+#include "featurenamebuilder.h"
+#include <stack>
+#include <vespa/vespalib/util/stringfmt.h>
+
+namespace search {
+namespace fef {
+
+namespace {
+
+using Accept = Blueprint::AcceptInput;
+
+bool is_compatible(bool is_object, Accept accept_type) {
+ return ((accept_type == Accept::ANY) ||
+ ((accept_type == Accept::OBJECT) == (is_object)));
+}
+
+const char *type_str(bool is_object) {
+ return (is_object ? "object" : "number");
+}
+
+const char *accept_type_str(Accept accept_type) {
+ switch (accept_type) {
+ case Accept::NUMBER: return "number";
+ case Accept::OBJECT: return "object";
+ case Accept::ANY: return "any";
+ }
+ return "(not reached)";
+}
+
+struct Compiler : public Blueprint::DependencyHandler {
+ using ExecutorSpec = BlueprintResolver::ExecutorSpec;
+ using ExecutorSpecList = BlueprintResolver::ExecutorSpecList;
+ using FeatureRef = BlueprintResolver::FeatureRef;
+ using FeatureMap = BlueprintResolver::FeatureMap;
+
+ struct Frame {
+ ExecutorSpec spec;
+ const FeatureNameParser &parser;
+ Frame(Blueprint::SP blueprint, const FeatureNameParser &parser_in)
+ : spec(blueprint), parser(parser_in) {}
+ };
+ using Stack = std::vector<Frame>;
+
+ struct FrameGuard {
+ Stack &stack;
+ FrameGuard(Stack &stack_in) : stack(stack_in) {}
+ ~FrameGuard() { stack.pop_back(); }
+ };
+
+ const BlueprintFactory &factory;
+ const IIndexEnvironment &index_env;
+ bool compile_error;
+ Stack resolve_stack;
+ ExecutorSpecList &spec_list;
+ FeatureMap &feature_map;
+
+ Compiler(const BlueprintFactory &factory_in,
+ const IIndexEnvironment &index_env_in,
+ ExecutorSpecList &spec_list_out,
+ FeatureMap &feature_map_out)
+ : factory(factory_in),
+ index_env(index_env_in),
+ compile_error(false),
+ resolve_stack(),
+ spec_list(spec_list_out),
+ feature_map(feature_map_out) {}
+
+ Frame &self() { return resolve_stack.back(); }
+
+ FeatureRef failed(const vespalib::string &feature_name, const vespalib::string &reason) {
+ if (!compile_error) {
+ LOG(warning, "invalid rank feature: '%s' (%s)", feature_name.c_str(), reason.c_str());
+ for (size_t i = resolve_stack.size(); i > 0; --i) {
+ const auto &frame = resolve_stack[i - 1];
+ if (&frame != &self()) {
+ LOG(warning, " ... needed by rank feature '%s'", frame.parser.featureName().c_str());
+ }
+ }
+ compile_error = true;
+ }
+ return FeatureRef();
+ }
+
+ FeatureRef verify_type(const FeatureNameParser &parser, FeatureRef ref, Accept accept_type) {
+ const auto &spec = spec_list[ref.executor];
+ bool is_object = spec.output_types[ref.output];
+ if (!is_compatible(is_object, accept_type)) {
+ return failed(parser.featureName(),
+ vespalib::make_string("output '%s' has wrong type: was %s, expected %s",
+ parser.output().c_str(), type_str(is_object), accept_type_str(accept_type)));
+ }
+ return ref;
+ }
+
+ FeatureRef setup_feature(const FeatureNameParser &parser, Accept accept_type) {
+ Blueprint::SP blueprint = factory.createBlueprint(parser.baseName());
+ if (blueprint.get() == nullptr) {
+ return failed(parser.featureName(),
+ vespalib::make_string("unknown basename: '%s'", parser.baseName().c_str()));
+ }
+ resolve_stack.emplace_back(blueprint, parser);
+ FrameGuard frame_guard(resolve_stack);
+ self().spec.blueprint->setName(parser.executorName());
+ self().spec.blueprint->attach_dependency_handler(*this);
+ if (!self().spec.blueprint->setup(index_env, parser.parameters())) {
+ return failed(parser.featureName(), "invalid parameters");
+ }
+ if (parser.output().empty() && self().spec.output_types.empty()) {
+ return failed(parser.featureName(), "has no output value");
+ }
+ const auto &feature = feature_map.find(parser.featureName());
+ if (feature == feature_map.end()) {
+ return failed(parser.featureName(),
+ vespalib::make_string("unknown output: '%s'", parser.output().c_str()));
+ }
+ spec_list.push_back(self().spec);
+ return verify_type(parser, feature->second, accept_type);
+ }
+
+ FeatureRef resolve_feature(const vespalib::string &feature_name, Accept accept_type) {
+ FeatureNameParser parser(feature_name);
+ if (!parser.valid()) {
+ return failed(feature_name, "malformed name");
+ }
+ const auto &feature = feature_map.find(parser.featureName());
+ if (feature != feature_map.end()) {
+ return verify_type(parser, feature->second, accept_type);
+ }
+ if ((resolve_stack.size() + 1) > BlueprintResolver::MAX_DEP_DEPTH) {
+ return failed(parser.featureName(), "dependency graph too deep");
+ }
+ for (const Frame &frame: resolve_stack) {
+ if (frame.parser.executorName() == parser.executorName()) {
+ return failed(parser.featureName(), "dependency cycle detected");
+ }
+ }
+ return setup_feature(parser, accept_type);
+ }
+
+ const FeatureType &resolve_input(const vespalib::string &feature_name, Accept accept_type) override {
+ assert(self().spec.output_types.empty()); // require: 'resolve inputs' before 'define outputs'
+ auto ref = resolve_feature(feature_name, accept_type);
+ if (!ref.valid()) {
+ return FeatureType::number();
+ }
+ self().spec.inputs.push_back(ref);
+ return spec_list[ref.executor].output_types[ref.output];
+ }
+
+ void define_output(const vespalib::string &output_name, const FeatureType &type) override {
+ vespalib::string feature_name = self().parser.executorName();
+ if (!output_name.empty()) {
+ feature_name.push_back('.');
+ feature_name.append(output_name);
+ }
+ FeatureRef output_ref(spec_list.size(), self().spec.output_types.size());
+ if (output_ref.output == 0) {
+ feature_map.emplace(self().parser.executorName(), output_ref);
+ }
+ feature_map.emplace(feature_name, output_ref);
+ self().spec.output_types.push_back(type);
+ }
+};
+
+} // namespace search::fef::<unnamed>
+
+BlueprintResolver::BlueprintResolver(const BlueprintFactory &factory,
+ const IIndexEnvironment &indexEnv)
+ : _factory(factory),
+ _indexEnv(indexEnv),
+ _seeds(),
+ _executorSpecs(),
+ _featureMap(),
+ _seedMap()
+{
+}
+
+void
+BlueprintResolver::addSeed(const vespalib::stringref &feature)
+{
+ _seeds.push_back(feature);
+}
+
+bool
+BlueprintResolver::compile()
+{
+ assert(_executorSpecs.empty()); // only one compilation allowed
+ Compiler compiler(_factory, _indexEnv, _executorSpecs, _featureMap);
+ for (const auto &seed: _seeds) {
+ auto ref = compiler.resolve_feature(seed, Blueprint::AcceptInput::ANY);
+ if (compiler.compile_error) {
+ return false;
+ }
+ _seedMap.emplace(FeatureNameParser(seed).featureName(), ref);
+ }
+ return true;
+}
+
+const BlueprintResolver::ExecutorSpecList &
+BlueprintResolver::getExecutorSpecs() const
+{
+ return _executorSpecs;
+}
+
+const BlueprintResolver::FeatureMap &
+BlueprintResolver::getFeatureMap() const
+{
+ return _featureMap;
+}
+
+const BlueprintResolver::FeatureMap &
+BlueprintResolver::getSeedMap() const
+{
+ return _seedMap;
+}
+
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/blueprintresolver.h b/searchlib/src/vespa/searchlib/fef/blueprintresolver.h
new file mode 100644
index 00000000000..ceab7125ba8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/blueprintresolver.h
@@ -0,0 +1,150 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+#include <queue>
+#include <map>
+#include "blueprint.h"
+#include "feature_type.h"
+
+namespace search {
+namespace fef {
+
+class BlueprintFactory;
+class IIndexEnvironment;
+class FeatureNameParser;
+
+/**
+ * This class is used by the framework to resolve blueprint
+ * dependencies. A blueprint factory is used to create new blueprints
+ * when needed during dependency resolving. Note that this class is
+ * not inteded for direct use. It is used by the @ref RankSetup
+ * class. It may also be used for low-level testing.
+ **/
+class BlueprintResolver
+{
+public:
+ typedef std::shared_ptr<BlueprintResolver> SP;
+
+ /**
+ * Low-level reference to a single output from a feature
+ * executor. 'executor' is the offset into the topological
+ * ordering of all executors. This order is defined by the return
+ * value from the getExecutorSpecs function. 'output' is the
+ * offset into the ordered list of outputs from the relevant
+ * executor.
+ **/
+ struct FeatureRef {
+ uint32_t executor;
+ uint32_t output;
+ static constexpr uint32_t undef = -1;
+
+ FeatureRef() : executor(undef), output(0) {}
+ FeatureRef(uint32_t executor_in, uint32_t output_in)
+ : executor(executor_in), output(output_in) {}
+ bool valid() { return (executor != undef); }
+ };
+ typedef std::map<vespalib::string, FeatureRef> FeatureMap;
+
+ /**
+ * Thin blueprint wrapper with additional information about how
+ * the executor created from the blueprint should be wired with
+ * other executors.
+ **/
+ struct ExecutorSpec {
+ Blueprint::SP blueprint;
+ std::vector<FeatureRef> inputs;
+ std::vector<FeatureType> output_types;
+
+ ExecutorSpec(Blueprint::SP blueprint_in)
+ : blueprint(blueprint_in), inputs(), output_types() {}
+ };
+ typedef std::vector<ExecutorSpec> ExecutorSpecList;
+
+ /**
+ * The maximum dependency depth. This value is defined to protect
+ * against infinitely deep dependency graphs and exposed for
+ * testing purposes. It should be set high enough to avoid
+ * problems for 'sane' developers and low enough to avoid stack
+ * overflow.
+ **/
+ static const uint32_t MAX_DEP_DEPTH = 64;
+
+private:
+ const BlueprintFactory &_factory;
+ const IIndexEnvironment &_indexEnv;
+ std::vector<vespalib::string> _seeds;
+ ExecutorSpecList _executorSpecs;
+ FeatureMap _featureMap;
+ FeatureMap _seedMap;
+
+public:
+ BlueprintResolver(const BlueprintResolver &) = delete;
+ BlueprintResolver &operator=(const BlueprintResolver &) = delete;
+
+ /**
+ * Create a new blueprint resolver within the given index
+ * environment and backed by the given factory.
+ *
+ * @param factory blueprint factory
+ * @param indexEnv index environment
+ **/
+ BlueprintResolver(const BlueprintFactory &factory,
+ const IIndexEnvironment &indexEnv);
+
+ /**
+ * Add a feature name to the list of seeds. During compilation,
+ * blueprints for all seeds and dependencies will be instantiated
+ * and enumerated.
+ *
+ * @param feature feature name to use as a seed
+ **/
+ void addSeed(const vespalib::stringref &feature);
+
+ /**
+ * Create Blueprints for all seeds and dependencies and enumerate
+ * blueprints in such a way that blueprints only depend on other
+ * blueprints with lower enum values. Compilation will typically
+ * fail if a dependency cannot be created or if you have circular
+ * dependencies.
+ *
+ * @return true if ok, false if compilation error
+ **/
+ bool compile();
+
+ /**
+ * Obtain a vector indicating the order of instantiation of
+ * feature executors and also how they should be wired together.
+ * The enum value of an executor spec may be used directly as an
+ * index into the returned vector.
+ *
+ * @return feature executor assembly directions
+ **/
+ const ExecutorSpecList &getExecutorSpecs() const;
+
+ /**
+ * Obtain the location of all named features known to this
+ * resolver. This may be used to dump a list of feature name/value
+ * pairs after all feature values have been computed. The seeds
+ * are the keys in the returned map, and the feature locations are
+ * the values.
+ *
+ * @return feature locations
+ **/
+ const FeatureMap &getFeatureMap() const;
+
+ /**
+ * Obtain the location of all seeds used by this resolver. This
+ * may be used to dump a list of feature name/value pairs after
+ * all feature values have been computed. The seeds are the keys
+ * in the returned map, and the feature locations are the
+ * values.
+ *
+ * @return seed locations
+ **/
+ const FeatureMap &getSeedMap() const;
+};
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/collection_type.cpp b/searchlib/src/vespa/searchlib/fef/collection_type.cpp
new file mode 100644
index 00000000000..c5b31a4bd0c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/collection_type.cpp
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "collection_type.h"
+
+namespace search {
+namespace fef {
+
+CollectionType::CollectionType(uint32_t value)
+ : _value(value)
+{
+}
+
+const CollectionType CollectionType::SINGLE(1);
+
+const CollectionType CollectionType::ARRAY(2);
+
+const CollectionType CollectionType::WEIGHTEDSET(3);
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/collection_type.h b/searchlib/src/vespa/searchlib/fef/collection_type.h
new file mode 100644
index 00000000000..55c9a7a143c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/collection_type.h
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search {
+namespace fef {
+
+/**
+ * Typesafe enum used to indicate the collection type of a field.
+ **/
+class CollectionType
+{
+private:
+ uint32_t _value;
+
+ CollectionType(uint32_t value);
+public:
+ /**
+ * Indicating that the field is single-value
+ **/
+ static const CollectionType SINGLE;
+
+ /**
+ * Indicating that the field is multi-value without element weights
+ **/
+ static const CollectionType ARRAY;
+
+ /**
+ * Indicating that the field is multi-value with element weights
+ **/
+ static const CollectionType WEIGHTEDSET;
+
+ /**
+ * Less than operator; needed to be handled as a value by the standard library.
+ **/
+ bool operator<(const CollectionType &rhs) const { return (_value < rhs._value); }
+
+ /**
+ * Check if two collection types are equal.
+ **/
+ bool operator==(const CollectionType &rhs) const { return (_value == rhs._value); }
+
+ /**
+ * Check if two collection types are not equal.
+ **/
+ bool operator!=(const CollectionType &rhs) const { return (_value != rhs._value); }
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/create-class-cpp.sh b/searchlib/src/vespa/searchlib/fef/create-class-cpp.sh
new file mode 100755
index 00000000000..e47cc402dca
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/create-class-cpp.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+class=$1
+guard=`echo $class | tr 'a-z' 'A-Z'`
+name=`echo $class | tr 'A-Z' 'a-z'`
+
+cat <<EOF
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.$name");
+#include <vespa/fastos/fastos.h>
+#include "$name.h"
+
+namespace search {
+namespace fef {
+
+$class::$class()
+{
+}
+
+$class::~$class()
+{
+}
+
+} // namespace fef
+} // namespace search
+EOF
diff --git a/searchlib/src/vespa/searchlib/fef/create-class-h.sh b/searchlib/src/vespa/searchlib/fef/create-class-h.sh
new file mode 100644
index 00000000000..9a4444c30bc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/create-class-h.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+class=$1
+guard=`echo $class | tr 'a-z' 'A-Z'`
+
+cat <<EOF
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+namespace search {
+namespace fef {
+
+class $class
+{
+private:
+ $class(const $class &);
+ $class &operator=(const $class &);
+public:
+ $class();
+ virtual ~$class();
+};
+
+} // namespace fef
+} // namespace search
+
+EOF
diff --git a/searchlib/src/vespa/searchlib/fef/create-fef-includes.sh b/searchlib/src/vespa/searchlib/fef/create-fef-includes.sh
new file mode 100644
index 00000000000..018bf7484c0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/create-fef-includes.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+cat <<EOF
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// NOTE: This file was generated by the 'create-fef-includes.sh' script
+#pragma once
+/**
+ * @file fef.h
+ *
+ * This is a convenience header that will include everything you need
+ * to use this library.
+ **/
+
+
+EOF
+echo "#include <vespa/searchlib/common/feature.h>"
+echo ""
+
+for f in *.h; do
+ if [ $f != "fef.h" ]; then
+ echo "#include \"$f\""
+ fi
+done
+
+echo ""
diff --git a/searchlib/src/vespa/searchlib/fef/create-interface.sh b/searchlib/src/vespa/searchlib/fef/create-interface.sh
new file mode 100644
index 00000000000..128f4a16711
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/create-interface.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+class=$1
+guard=`echo $class | tr 'a-z' 'A-Z'`
+
+cat <<EOF
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+namespace search {
+namespace fef {
+
+class $class
+{
+public:
+ virtual ~$class() {}
+};
+
+} // namespace fef
+} // namespace search
+
+EOF
diff --git a/searchlib/src/vespa/searchlib/fef/dist_doc_hp.sh b/searchlib/src/vespa/searchlib/fef/dist_doc_hp.sh
new file mode 100755
index 00000000000..ae9e35f739c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/dist_doc_hp.sh
@@ -0,0 +1,3 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+doxygen
+scp html/* testdata:dist/doxygen/fef/
diff --git a/searchlib/src/vespa/searchlib/fef/feature_type.cpp b/searchlib/src/vespa/searchlib/fef/feature_type.cpp
new file mode 100644
index 00000000000..63434ec6bb7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/feature_type.cpp
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "feature_type.h"
+
+namespace search {
+namespace fef {
+
+const FeatureType FeatureType::_number = FeatureType(TYPE_UP());
+
+FeatureType::FeatureType(const FeatureType &rhs)
+ : _type()
+{
+ if (rhs.is_object()) {
+ _type = std::make_unique<TYPE>(rhs.type());
+ }
+}
+
+FeatureType
+FeatureType::object(const TYPE &type_in)
+{
+ return FeatureType(std::make_unique<TYPE>(type_in));
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/feature_type.h b/searchlib/src/vespa/searchlib/fef/feature_type.h
new file mode 100644
index 00000000000..0251fe7ab25
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/feature_type.h
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/eval/value_type.h>
+
+namespace search {
+namespace fef {
+
+/**
+ * The full type of a feature calculated by the ranking framework. The
+ * ranking framework wraps a thin layer on top of the types defined in
+ * the low-level eval library. A feature can either be a simple number
+ * represented by a double or a polymorph value represented with an
+ * object. The ranking framework itself will mostly care about the
+ * representation (number/object) and not the specific type, hence the
+ * implicit cast to bool. The type function is used to extract the
+ * underlying type and is only allowed for features that are objects.
+ **/
+class FeatureType {
+private:
+ using TYPE = vespalib::eval::ValueType;
+ using TYPE_UP = std::unique_ptr<TYPE>;
+ TYPE_UP _type;
+ static const FeatureType _number;
+ FeatureType(TYPE_UP type_in) : _type(std::move(type_in)) {}
+public:
+ FeatureType(const FeatureType &rhs);
+ bool is_object() const { return (_type.get() != nullptr); }
+ operator bool() const { return is_object(); }
+ const TYPE &type() const {
+ assert(_type);
+ return *_type;
+ }
+ static const FeatureType &number() { return _number; }
+ static FeatureType object(const TYPE &type_in);
+};
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp b/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp
new file mode 100644
index 00000000000..b28bec39b19
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "featureexecutor.h"
+
+namespace search {
+namespace fef {
+
+FeatureExecutor::FeatureExecutor()
+ : _inputs(),
+ _outputs()
+{
+}
+
+bool
+FeatureExecutor::isPure()
+{
+ return false;
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/featureexecutor.h b/searchlib/src/vespa/searchlib/fef/featureexecutor.h
new file mode 100644
index 00000000000..75110889ab7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/featureexecutor.h
@@ -0,0 +1,185 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+#include <vespa/vespalib/util/linkedptr.h>
+#include "handle.h"
+#include "matchdata.h"
+#include <cassert>
+#include <memory>
+
+namespace search {
+namespace fef {
+
+/**
+ * A feature executor is a general component that calculates one or
+ * more feature values. It may take multiple features as input. A
+ * feature executor may also use term match data as input, or whatever
+ * it has access to regarding the index.
+ **/
+class FeatureExecutor
+{
+public:
+ class SharedInputs {
+ std::vector<FeatureHandle> _inputs;
+ public:
+ SharedInputs() : _inputs() {}
+ void add(FeatureHandle handle) { _inputs.push_back(handle); }
+ size_t size() const { return _inputs.size(); }
+ FeatureHandle operator[](size_t idx) const { return _inputs[idx]; }
+ };
+
+ class Inputs {
+ SharedInputs *_inputs;
+ uint32_t _offset;
+ uint32_t _size;
+ public:
+ Inputs() : _inputs(nullptr), _offset(0), _size(0) {}
+ void bind(SharedInputs &inputs) {
+ _inputs = &inputs;
+ _offset = _inputs->size();
+ _size = 0;
+ }
+ void add(FeatureHandle handle) {
+ assert(_inputs != nullptr);
+ assert(_inputs->size() == (_offset + _size));
+ _inputs->add(handle);
+ ++_size;
+ }
+ bool empty() const { return (_size == 0); }
+ size_t size() const { return _size; }
+ FeatureHandle operator[](size_t idx) const {
+ assert(idx < _size);
+ return (*_inputs)[_offset + idx];
+ }
+ };
+
+ class Outputs {
+ FeatureHandle _begin;
+ FeatureHandle _end;
+ public:
+ Outputs() : _begin(IllegalHandle), _end(IllegalHandle) {}
+ void add(FeatureHandle handle) {
+ if (_begin == IllegalHandle) {
+ _begin = handle;
+ _end = (_begin + 1);
+ } else if (handle == _end) {
+ ++_end;
+ } else {
+ assert(handle == _end);
+ }
+ }
+ bool empty() const { return (_end == _begin); }
+ size_t size() const { return (_end - _begin); }
+ FeatureHandle operator[](size_t idx) const {
+ assert(idx < (_end - _begin));
+ return (_begin + idx);
+ }
+ };
+
+private:
+ FeatureExecutor(const FeatureExecutor &);
+ FeatureExecutor &operator=(const FeatureExecutor &);
+
+ Inputs _inputs;
+ Outputs _outputs;
+
+public:
+ /**
+ * Convenience typedef for a shared pointer to this class.
+ **/
+ typedef vespalib::LinkedPtr<FeatureExecutor> LP;
+
+ typedef std::unique_ptr<FeatureExecutor> UP;
+
+ /**
+ * Create a feature executor that has not yet been bound to neither
+ * inputs nor outputs.
+ **/
+ FeatureExecutor();
+
+ /**
+ * Bind shared external storage to this feature executor. The
+ * shared storage will be used to store the handle of feature
+ * inputs. This function must be called before starting to add
+ * inputs.
+ *
+ * @param shared_inputs shared store for input feature handles
+ **/
+ void bind_shared_inputs(SharedInputs &shared_inputs) { _inputs.bind(shared_inputs); }
+
+ /**
+ * Add an input to this feature executor. All inputs must be added
+ * before this object is added to the feature execution manager.
+ *
+ * @param handle the feature handle of the input to add
+ **/
+ void addInput(FeatureHandle handle) { _inputs.add(handle); }
+ virtual void inputs_done() {} // needed for feature decorators
+
+ /**
+ * Access the input features for this executor. Use {@link
+ * MatchData#resolveFeature} to resolve these handles.
+ *
+ * @return const view of input features
+ **/
+ const Inputs &inputs() const { return _inputs; }
+
+ /**
+ * Assign a feature handle to the next unbound output feature.
+ * This method will be invoked by the @ref FeatureExecutionManager
+ * when new feature executors are added. It may also be used for
+ * testing, but should not be invoked directly from application
+ * code. Note that this method must be invoked exactly the number
+ * of times indicated by the @ref getNumOutputs method.
+ *
+ * @param handle feature handle to be assigned to the next unbound
+ * output feature.
+ **/
+ void bindOutput(FeatureHandle handle) { _outputs.add(handle); }
+ virtual void outputs_done() {} // needed for feature decorators
+
+ /**
+ * Access the output features for this executor. Use {@link
+ * MatchData#resolveFeature} to resolve these handles.
+ *
+ * @return const view of output features
+ **/
+ const Outputs &outputs() const { return _outputs; }
+
+ /**
+ * Check if this feature executor is pure. A feature executor
+ * claiming to be pure must satisfy the requirement that its
+ * output feature values only depend on the values of its input
+ * features (in other words: if the input features does not change
+ * in value, neither does the outputs). This method is implemented
+ * to return false by default, but may be overridden by feature
+ * executors that are pure. Whether a feature executor is pure or
+ * not may be used by the framework to optimize feature
+ * execution. It is always safe to let this method return false,
+ * but letting pure executors return true may increase
+ * performance.
+ *
+ * @return true if this feature executor is pure
+ **/
+ virtual bool isPure();
+
+ /**
+ * Execute this feature executor on the given data.
+ *
+ * @param data data storage
+ **/
+ virtual void execute(MatchData &data) = 0;
+
+ /**
+ * Virtual destructor to allow subclassing.
+ **/
+ virtual ~FeatureExecutor() {}
+};
+
+} // namespace fef
+} // namespace search
+
+
+// LocalWords: param
diff --git a/searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp b/searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp
new file mode 100644
index 00000000000..292f5ac5bcc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp
@@ -0,0 +1,159 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "featurenamebuilder.h"
+#include "featurenameparser.h"
+
+namespace {
+
+// ref: http://en.wikipedia.org/wiki/ASCII
+// note: we also consider space to be printable
+bool isPrintable(char c) {
+ return (static_cast<unsigned char>(c) >= 32 &&
+ static_cast<unsigned char>(c) <= 126);
+}
+
+bool isSpace(char c) {
+ switch (c) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\f':
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool isBlank(const vespalib::string &str) {
+ for (uint32_t i = 0; i < str.size(); ++i) {
+ if (!isSpace(str[i])) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void appendQuoted(char c, vespalib::string &str) {
+ switch (c) {
+ case '\\':
+ str.append("\\\\");
+ break;
+ case '"':
+ str.append("\\\"");
+ break;
+ case '\t':
+ str.append("\\t");
+ break;
+ case '\n':
+ str.append("\\n");
+ break;
+ case '\r':
+ str.append("\\r");
+ break;
+ case '\f':
+ str.append("\\f");
+ break;
+ default:
+ if (isPrintable(c)) {
+ str.push_back(c);
+ } else {
+ const char *lookup = "0123456789abcdef";
+ str.append("\\x");
+ str.push_back(lookup[(c >> 4) & 0xf]);
+ str.push_back(lookup[c & 0xf]);
+ }
+ }
+}
+
+vespalib::string quoteString(const vespalib::string &str)
+{
+ vespalib::string res;
+ res.push_back('"');
+ for (uint32_t i = 0; i < str.size(); ++i) {
+ appendQuoted(str[i], res);
+ }
+ res.push_back('"');
+ return res;
+}
+
+} // namespace <unnamed>
+
+namespace search {
+namespace fef {
+
+FeatureNameBuilder::FeatureNameBuilder()
+ : _baseName(),
+ _parameters(),
+ _output()
+{
+}
+
+FeatureNameBuilder::~FeatureNameBuilder()
+{
+}
+
+FeatureNameBuilder &
+FeatureNameBuilder::baseName(const vespalib::string &str)
+{
+ _baseName = str;
+ return *this;
+}
+
+FeatureNameBuilder &
+FeatureNameBuilder::parameter(const vespalib::string &str, bool exact)
+{
+ if (str.empty() || (!exact && isBlank(str))) {
+ _parameters.push_back("");
+ } else {
+ FeatureNameParser parser(str);
+ if (!parser.valid() || (exact && str != parser.featureName())) {
+ _parameters.push_back(quoteString(str));
+ } else {
+ _parameters.push_back(parser.featureName());
+ }
+ }
+ return *this;
+}
+
+FeatureNameBuilder &
+FeatureNameBuilder::clearParameters()
+{
+ _parameters.resize(0);
+ return *this;
+}
+
+FeatureNameBuilder &
+FeatureNameBuilder::output(const vespalib::string &str)
+{
+ _output = str;
+ return *this;
+}
+
+vespalib::string
+FeatureNameBuilder::buildName() const
+{
+ vespalib::string ret;
+ if (!_baseName.empty()) {
+ ret = _baseName;
+ if (!_parameters.empty() > 0) {
+ ret += "(";
+ for (uint32_t i = 0; i < _parameters.size(); ++i) {
+ if (i > 0) {
+ ret += ",";
+ }
+ ret += _parameters[i];
+ }
+ ret += ")";
+ }
+ if (!_output.empty()) {
+ ret += ".";
+ ret += _output;
+ }
+ }
+ return ret;
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/featurenamebuilder.h b/searchlib/src/vespa/searchlib/fef/featurenamebuilder.h
new file mode 100644
index 00000000000..3bcc49114bc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/featurenamebuilder.h
@@ -0,0 +1,75 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <vector>
+
+namespace search {
+namespace fef {
+
+/**
+ * An object of this class may be used to build feature names in a
+ * convenient way. Using this class will ensure things like correct
+ * quoting of reserved characters used in parameters.
+ **/
+class FeatureNameBuilder
+{
+private:
+ vespalib::string _baseName;
+ std::vector<vespalib::string> _parameters;
+ vespalib::string _output;
+
+public:
+ /**
+ * Create an empty builder.
+ **/
+ FeatureNameBuilder();
+ ~FeatureNameBuilder();
+
+ /**
+ * Set the base name.
+ *
+ * @return this object, for chaining
+ * @param str base name
+ **/
+ FeatureNameBuilder &baseName(const vespalib::string &str);
+
+ /**
+ * Add a parameter to the end of the parameter list.
+ *
+ * @return this object, for chaining
+ * @param str a parameter
+ * @param exact if this is true, the parameter will preserve its
+ * exact string value. If this is false, the framework is allowed
+ * to normalize the string as if it was a feature name.
+ **/
+ FeatureNameBuilder &parameter(const vespalib::string &str, bool exact = true);
+
+ /**
+ * Clear the list of parameters.
+ *
+ * @return this object, for chaining
+ **/
+ FeatureNameBuilder &clearParameters();
+
+ /**
+ * Set the output name
+ *
+ * @return this object, for chaining
+ * @param str output name
+ **/
+ FeatureNameBuilder &output(const vespalib::string &str);
+
+ /**
+ * Build a full feature name from the information put into this
+ * object.
+ *
+ * @return feature name
+ **/
+ vespalib::string buildName() const;
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/featurenameparser.cpp b/searchlib/src/vespa/searchlib/fef/featurenameparser.cpp
new file mode 100644
index 00000000000..2d646de5a72
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/featurenameparser.cpp
@@ -0,0 +1,499 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.featurenameparser");
+#include <vespa/vespalib/util/stringfmt.h>
+#include "featurenameparser.h"
+#include "featurenamebuilder.h"
+
+namespace {
+
+//-----------------------------------------------------------------------------
+
+int decodeHex(char c) {
+ if (c >= '0' && c <= '9') {
+ return (c - '0');
+ }
+ if (c >= 'a' && c <= 'f') {
+ return ((c - 'a') + 10);
+ }
+ if (c >= 'A' && c <= 'F') {
+ return ((c - 'A') + 10);
+ }
+ return -1;
+}
+
+//-----------------------------------------------------------------------------
+
+template <typename A>
+class IsLogged
+{
+private:
+ A _a;
+ vespalib::string _name;
+
+public:
+ IsLogged(A a) : _a(a), _name(a.getName()) {}
+ bool operator()(char c) {
+ bool res = _a(c);
+ LOG(info, "%s returned %s for char '%c'",
+ _name.c_str(), res ? "true" : "false", c);
+ return res;
+ }
+};
+
+template <typename A>
+class DoLog
+{
+private:
+ A _a;
+ vespalib::string _name;
+
+public:
+ DoLog(A a) : _a(a), _name(a.getName()) {}
+ bool operator()(char c) {
+ bool res = _a(c);
+ LOG(info, "%s returned %s for char '%c'",
+ _name.c_str(), res ? "true" : "false", c);
+ return res;
+ }
+ bool done() {
+ bool res = _a.done();
+ LOG(info, "%s returned %s on done signal",
+ _name.c_str(), res ? "true" : "false");
+ return res;
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+template <typename A>
+IsLogged<A> isLogged(A a) {
+ return IsLogged<A>(a);
+}
+
+template <typename A>
+DoLog<A> doLog(A a) {
+ return DoLog<A>(a);
+}
+
+//-----------------------------------------------------------------------------
+
+class ParseContext
+{
+private:
+ const vespalib::string &_str; // the input string
+ uint32_t _pos; // current position
+ char _curr; // current character, 0 means eos
+ bool _error; // flag indicating whether we have a parse error
+
+public:
+ ParseContext(const vespalib::string &in) : _str(in), _pos(0),
+ _curr((in.empty()) ? 0 : in[0]),
+ _error(false) {}
+ uint32_t pos() const { return _pos; }
+ char get() const { return _curr; }
+ bool eos() const { return !_curr; }
+ bool signalError() {
+ _curr = 0; // also signals eos
+ _error = true;
+ return false;
+ }
+ bool error() {
+ return _error;
+ }
+ void next() {
+ if (eos()) {
+ return;
+ }
+ if (++_pos < _str.size()) {
+ _curr = _str[_pos];
+ } else {
+ _curr = 0;
+ }
+ }
+ bool eatChar(char c) {
+ if (get() != c) {
+ return false;
+ }
+ next();
+ return true;
+ }
+ template <typename CHECK, typename SINK>
+ bool scan(CHECK check, SINK sink) {
+ while (!eos()) {
+ if (!check(get())) {
+ break;
+ }
+ if (!sink(get())) {
+ signalError();
+ }
+ next();
+ }
+ if (!sink.done()) {
+ signalError();
+ }
+ return !error();
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class IsSpace
+{
+public:
+ bool operator()(char c) const {
+ switch (c) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\f':
+ return true;
+ default:
+ return false;
+ }
+ }
+ vespalib::string getName() const { return "IsSpace"; }
+};
+
+class Ident
+{
+public:
+ Ident() {
+ for(size_t i(0), m(256); i < m; i++) { _valid[i] = false; }
+ for(size_t i('a'), m('z'); i <= m; i++) { _valid[i] = true; }
+ for(size_t i('A'), m('Z'); i <= m; i++) { _valid[i] = true; }
+ for(size_t i('0'), m('9'); i <= m; i++) { _valid[i] = true; }
+ _valid[uint8_t('_')] = true;
+ _valid[uint8_t('+')] = true;
+ _valid[uint8_t('-')] = true;
+ _valid[uint8_t('$')] = true;
+ _valid[uint8_t('@')] = true;
+ }
+ bool isValid(uint8_t c) { return _valid[c]; }
+private:
+ bool _valid[256];
+};
+
+static Ident _G_ident;
+
+class IsIdent
+{
+public:
+ bool operator()(char c) const {
+ return _G_ident.isValid(c);
+ }
+ vespalib::string getName() const { return "IsIdent"; }
+};
+
+class IsChar
+{
+private:
+ char _c;
+
+public:
+ IsChar(char c) : _c(c) {}
+ bool operator()(char c) const {
+ return (c == _c);
+ }
+ vespalib::string getName() const { return vespalib::make_string("IsChar(%c)", _c); }
+};
+
+template <typename A>
+class IsNot
+{
+private:
+ A _a;
+
+public:
+ IsNot(A a) : _a(a) {}
+ bool operator()(char c) {
+ return !(_a(c));
+ }
+ vespalib::string getName() const { return vespalib::make_string("IsNot(%s)", _a.getName().c_str()); }
+};
+
+template <typename A, typename B>
+class IsEither
+{
+private:
+ A _a;
+ B _b;
+
+public:
+ IsEither(A a, B b) : _a(a), _b(b) {}
+ bool operator()(char c) {
+ return (_a(c) || _b(c));
+ }
+ vespalib::string getName() const { return vespalib::make_string("IsEither(%s,%s)",
+ _a.getName().c_str(), _b.getName().c_str()); }
+};
+
+class IsEndQuote
+{
+private:
+ bool _escape;
+
+public:
+ IsEndQuote() : _escape(false) {}
+ bool operator()(char c) {
+ if (_escape) {
+ _escape = false;
+ return false;
+ }
+ if (c == '\\') {
+ _escape = true;
+ return false;
+ }
+ return (c == '"');
+ }
+ vespalib::string getName() const { return "IsEndQuote"; }
+};
+
+//-----------------------------------------------------------------------------
+
+class DoIgnore
+{
+public:
+ bool operator()(char) { return true; }
+ bool done() { return true; }
+ vespalib::string getName() const { return "doIgnore"; }
+};
+
+class DoSave
+{
+private:
+ vespalib::string &_dst;
+
+public:
+ DoSave(vespalib::string &str) : _dst(str) {}
+ bool operator()(char c) {
+ _dst.push_back(c);
+ return true;
+ }
+ bool done() { return !_dst.empty(); }
+ vespalib::string getName() const { return "doSave"; }
+};
+
+class DoDequote
+{
+private:
+ bool _escape; // true means we are dequoting something
+ int _hex; // how many hex numbers left to read
+ unsigned char _c; // save up hex decoded char here
+ vespalib::string &_dst; // where to save the dequoted string
+
+public:
+ DoDequote(vespalib::string &str) : _escape(false), _hex(0), _c(0), _dst(str) {}
+ bool operator()(char c) {
+ if (_escape) {
+ if (_hex > 0) {
+ --_hex;
+ int val = decodeHex(c);
+ if (val < 0) {
+ return false;
+ }
+ _c |= ((val & 0xf) << (_hex * 4));
+ if (_hex == 0) {
+ if (_c == 0) {
+ return false;
+ }
+ _dst.push_back(_c);
+ _escape = false;
+ }
+ } else {
+ switch (c) {
+ case '"':
+ _dst.push_back('\"');
+ _escape = false;
+ break;
+ case '\\':
+ _dst.push_back('\\');
+ _escape = false;
+ break;
+ case 't':
+ _dst.push_back('\t');
+ _escape = false;
+ break;
+ case 'n':
+ _dst.push_back('\n');
+ _escape = false;
+ break;
+ case 'r':
+ _dst.push_back('\r');
+ _escape = false;
+ break;
+ case 'f':
+ _dst.push_back('\f');
+ _escape = false;
+ break;
+ case 'x':
+ _hex = 2;
+ _c = 0;
+ break;
+ default:
+ return false; // signal error
+ }
+ }
+ } else {
+ if (c == '\\') {
+ _escape = true;
+ } else {
+ _dst.push_back(c); // normal case (no dequoting needed)
+ }
+ }
+ return true;
+ }
+ bool done() { return !_escape; }
+ vespalib::string getName() const { return "doDequote"; }
+};
+
+//-----------------------------------------------------------------------------
+
+IsSpace isSpace() { return IsSpace(); }
+
+IsIdent isIdent() { return IsIdent(); }
+
+IsChar isChar(char c) { return IsChar(c); }
+
+template <typename A>
+IsNot<A> isNot(A a) {
+ return IsNot<A>(a);
+}
+
+template <typename A, typename B>
+IsEither<A, B> isEither(A a, B b) {
+ return IsEither<A, B>(a, b);
+}
+
+IsEndQuote isEndQuote() { return IsEndQuote(); }
+
+DoIgnore doIgnore() { return DoIgnore(); }
+
+DoSave doSave(vespalib::string &str) { return DoSave(str); }
+
+DoDequote doDequote(vespalib::string &str) { return DoDequote(str); }
+
+//-----------------------------------------------------------------------------
+
+// need forward declaration of this for recursive parsing
+bool normalizeFeatureName(ParseContext &ctx, vespalib::string &name);
+
+bool parseParameters(ParseContext &ctx, std::vector<vespalib::string> &parameters)
+{
+ ctx.scan(isSpace(), doIgnore());
+ if (!ctx.eatChar('(')) {
+ return true; // no parameters = ok
+ }
+ for (;;) {
+ vespalib::string param;
+ ctx.scan(isSpace(), doIgnore());
+ switch (ctx.get()) {
+ case ')':
+ case ',':
+ break; // empty param
+ case '"': // parse param as quoted string
+ ctx.next(); // eat opening '"'
+ if (!ctx.scan(isNot(isEndQuote()), doDequote(param))) {
+ return false;
+ }
+ if (!ctx.eatChar('"')) { // missing end quote
+ return ctx.signalError();
+ }
+ break;
+ default: // parse param as feature name
+ if (!normalizeFeatureName(ctx, param)) {
+ return false;
+ }
+ break;
+ }
+ parameters.push_back(param);
+ ctx.scan(isSpace(), doIgnore());
+ if (ctx.eatChar(')')) { // done
+ return true;
+ } else if (!ctx.eatChar(',')) { // illegal param list
+ return ctx.signalError();
+ }
+ }
+}
+
+bool parseOutput(ParseContext &ctx, vespalib::string &output)
+{
+ ctx.scan(isSpace(), doIgnore());
+ if (!ctx.eatChar('.')) {
+ return true; // output is optional
+ }
+ ctx.scan(isSpace(), doIgnore());
+ return ctx.scan(isEither(isIdent(), isChar('.')), doSave(output));
+}
+
+bool parseFeatureName(ParseContext &ctx, vespalib::string &baseName,
+ std::vector<vespalib::string> &parameters, vespalib::string &output)
+{
+ return (ctx.scan(isIdent(), doSave(baseName)) &&
+ parseParameters(ctx, parameters) &&
+ parseOutput(ctx, output));
+}
+
+bool normalizeFeatureName(ParseContext &ctx, vespalib::string &name) {
+ vespalib::string baseName;
+ std::vector<vespalib::string> params;
+ vespalib::string output;
+ if (!parseFeatureName(ctx, baseName, params, output)) {
+ return false;
+ }
+ search::fef::FeatureNameBuilder builder;
+ builder.baseName(baseName);
+ for (uint32_t i = 0; i < params.size(); ++i) {
+ builder.parameter(params[i]);
+ }
+ builder.output(output);
+ name = builder.buildName();
+ return true;
+}
+
+} // namespace <unnamed>
+
+namespace search {
+namespace fef {
+
+FeatureNameParser::FeatureNameParser(const string &input)
+ : _valid(false),
+ _endPos(0),
+ _baseName(),
+ _parameters(),
+ _output(),
+ _executorName(),
+ _featureName()
+{
+ ParseContext ctx(input);
+ ctx.scan(isSpace(), doIgnore());
+ _valid = parseFeatureName(ctx, _baseName, _parameters, _output);
+ ctx.scan(isSpace(), doIgnore());
+ if (!ctx.eos()) {
+ _valid = ctx.signalError();
+ }
+ _endPos = ctx.pos();
+ if (_valid && ctx.eos()) {
+ FeatureNameBuilder builder;
+ builder.baseName(_baseName);
+ for (uint32_t i = 0; i < _parameters.size(); ++i) {
+ builder.parameter(_parameters[i]);
+ }
+ _executorName = builder.buildName();
+ builder.output(_output);
+ _featureName = builder.buildName();
+ } else {
+ _baseName = "";
+ {
+ StringVector tmp;
+ _parameters.swap(tmp);
+ }
+ _output = "";
+ }
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/featurenameparser.h b/searchlib/src/vespa/searchlib/fef/featurenameparser.h
new file mode 100644
index 00000000000..fea86479d0b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/featurenameparser.h
@@ -0,0 +1,100 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <vector>
+
+namespace search {
+namespace fef {
+
+/**
+ * Simple parser used to split feature names into components by the
+ * framework.
+ **/
+class FeatureNameParser
+{
+public:
+ typedef vespalib::string string;
+ typedef std::vector<string> StringVector;
+private:
+ bool _valid;
+ uint32_t _endPos;
+ string _baseName;
+ StringVector _parameters;
+ string _output;
+ string _executorName;
+ string _featureName;
+
+public:
+ /**
+ * The constructor parses the given feature name, splitting it
+ * into components. If the given string is not a valid feature
+ * name, all components will be empty and the @ref valid method
+ * will return false.
+ *
+ * @param featureName feature name
+ **/
+ FeatureNameParser(const vespalib::string &featureName);
+
+ /**
+ * Does this object represent a valid feature name?
+ *
+ * @return true if valid, false if invalid
+ **/
+ bool valid() const { return _valid; }
+
+ /**
+ * Obtain the number of bytes from the original feature name that
+ * was successfully parsed. If the feature name was valid, this
+ * method will simply return the size of the string given to the
+ * constructor. If a parse error occurred, this method will return
+ * the index of the offending character in the string given to the
+ * constructor.
+ *
+ * @return number of bytes successfully parsed
+ **/
+ uint32_t parsedBytes() const { return _endPos; }
+
+ /**
+ * Obtain the base name from the parsed feature name.
+ *
+ * @return base name
+ **/
+ const string &baseName() const { return _baseName; }
+
+ /**
+ * Obtain the parameter list from the parsed feature name.
+ *
+ * @return parameter list
+ **/
+ const StringVector &parameters() const { return _parameters; }
+
+ /**
+ * Obtain the output name from the parsed feature name.
+ *
+ * @return output name
+ **/
+ const string &output() const { return _output; }
+
+ /**
+ * Obtain a normalized name for the executor making this
+ * feature. This includes the parameter list. The @ref
+ * FeatureNameBuilder is used to make this name.
+ *
+ * @return normalized executor name with parameters
+ **/
+ const string &executorName() const { return _executorName; }
+
+ /**
+ * Obtain a normalized full feature name. The @ref
+ * FeatureNameBuilder is used to make this name.
+ *
+ * @return normalized full feature name
+ **/
+ const string &featureName() const { return _featureName; }
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/featureoverrider.cpp b/searchlib/src/vespa/searchlib/fef/featureoverrider.cpp
new file mode 100644
index 00000000000..5bb2a2789bf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/featureoverrider.cpp
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "featureoverrider.h"
+
+namespace search {
+namespace fef {
+
+FeatureOverrider::FeatureOverrider(FeatureExecutor::LP executor, uint32_t outputIdx, feature_t value)
+ : _executor(executor),
+ _outputIdx(outputIdx),
+ _handle(IllegalHandle),
+ _value(value)
+{
+}
+
+void
+FeatureOverrider::inputs_done()
+{
+ for (uint32_t i = 0; i < inputs().size(); ++i) {
+ _executor->addInput(inputs()[i]);
+ }
+ _executor->inputs_done();
+}
+
+void
+FeatureOverrider::outputs_done()
+{
+ if (_outputIdx < outputs().size()) {
+ _handle = outputs()[_outputIdx];
+ }
+ for (uint32_t i = 0; i < outputs().size(); ++i) {
+ _executor->bindOutput(outputs()[i]);
+ }
+ _executor->outputs_done();
+}
+
+bool
+FeatureOverrider::isPure()
+{
+ return _executor->isPure();
+}
+
+void
+FeatureOverrider::execute(MatchData &data)
+{
+ _executor->execute(data);
+ if (_handle != IllegalHandle) {
+ *data.resolveFeature(_handle) = _value;
+ }
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/featureoverrider.h b/searchlib/src/vespa/searchlib/fef/featureoverrider.h
new file mode 100644
index 00000000000..432a8ea4736
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/featureoverrider.h
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "featureexecutor.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * A Feature Overrider is a simple decorator class that wraps a single
+ * Feature Executor instance and overrides one of its output
+ * features. All method invocations are passed through to the inner
+ * feature executor. Each time the execute method is invoked, the
+ * appropriate feature value is overwritten.
+ **/
+class FeatureOverrider : public FeatureExecutor
+{
+private:
+ FeatureOverrider(const FeatureOverrider &);
+ FeatureOverrider &operator=(const FeatureOverrider &);
+
+ FeatureExecutor::LP _executor;
+ uint32_t _outputIdx;
+ FeatureHandle _handle;
+ feature_t _value;
+
+public:
+ /**
+ * Create a feature overrider that will override the given output
+ * with the given feature value.
+ *
+ * @param executor the feature executor for which we should override an output
+ * @param outputIdx which output to override
+ * @param value what value to override with
+ **/
+ FeatureOverrider(FeatureExecutor::LP executor, uint32_t outputIdx, feature_t value);
+ void inputs_done() override;
+ void outputs_done() override;
+ bool isPure() override;
+ void execute(MatchData &data) override;
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/fef.cpp b/searchlib/src/vespa/searchlib/fef/fef.cpp
new file mode 100644
index 00000000000..6a29c098479
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/fef.cpp
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "fef.h"
+
+namespace search {
+namespace fef {
+
+// this file is just to verify the fef.h file
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/fef.h b/searchlib/src/vespa/searchlib/fef/fef.h
new file mode 100644
index 00000000000..94ac5daf26e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/fef.h
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// NOTE: This file was generated by the 'create-fef-includes.sh' script
+
+
+/**
+ * @file fef.h
+ *
+ * This is a convenience header that will include everything you need
+ * to use this library.
+ **/
+
+#pragma once
+
+#include <vespa/searchlib/common/feature.h>
+
+#include "blueprint.h"
+#include "blueprintfactory.h"
+#include "blueprintresolver.h"
+#include "collection_type.h"
+#include "featureexecutor.h"
+#include "featurenamebuilder.h"
+#include "featurenameparser.h"
+#include "featureoverrider.h"
+#include "fieldinfo.h"
+#include "fieldpositionsiterator.h"
+#include "fieldtype.h"
+#include "filetablefactory.h"
+#include "functiontablefactory.h"
+#include "handle.h"
+#include "iblueprintregistry.h"
+#include "idumpfeaturevisitor.h"
+#include "iindexenvironment.h"
+#include "indexproperties.h"
+#include "iqueryenvironment.h"
+#include "itablefactory.h"
+#include "itablemanager.h"
+#include "itermdata.h"
+#include "itermfielddata.h"
+#include "location.h"
+#include "matchdata.h"
+#include "matchdatalayout.h"
+#include "parameter.h"
+#include "parameterdescriptions.h"
+#include "parametervalidator.h"
+#include "phrasesplitter.h"
+#include "properties.h"
+#include "queryproperties.h"
+#include "rank_program.h"
+#include "ranksetup.h"
+#include "simpletermdata.h"
+#include "simpletermfielddata.h"
+#include "sumexecutor.h"
+#include "symmetrictable.h"
+#include "table.h"
+#include "tablemanager.h"
+#include "termfieldmatchdata.h"
+#include "termfieldmatchdataarray.h"
+#include "termfieldmatchdataposition.h"
+#include "termmatchdatamerger.h"
+#include "utils.h"
+#include "verify_feature.h"
+
diff --git a/searchlib/src/vespa/searchlib/fef/fieldinfo.cpp b/searchlib/src/vespa/searchlib/fef/fieldinfo.cpp
new file mode 100644
index 00000000000..234352ddb31
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/fieldinfo.cpp
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "fieldinfo.h"
+
+namespace search {
+namespace fef {
+
+FieldInfo::FieldInfo(FieldType type_in, CollectionType collection_in,
+ const string &name_in, uint32_t id_in)
+ : _type(type_in),
+ _data_type(DataType::DOUBLE),
+ _collection(collection_in),
+ _name(name_in),
+ _id(id_in),
+ _isFilter(false),
+ _hasAttribute(type_in == FieldType::ATTRIBUTE)
+{
+}
+
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/fieldinfo.h b/searchlib/src/vespa/searchlib/fef/fieldinfo.h
new file mode 100644
index 00000000000..8c0625f0c27
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/fieldinfo.h
@@ -0,0 +1,112 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include "fieldtype.h"
+#include "collection_type.h"
+#include <vespa/searchcommon/common/schema.h>
+
+namespace search {
+namespace fef {
+
+const uint32_t IllegalFieldId = 0xffffffff;
+
+/**
+ * Information about a single field. This class is used by the @ref
+ * IIndexEnvironment to expose information.
+ **/
+class FieldInfo
+{
+public:
+ using DataType = search::index::Schema::DataType;
+ typedef vespalib::string string;
+private:
+ FieldType _type;
+ DataType _data_type;
+ CollectionType _collection;
+ string _name;
+ uint32_t _id;
+ bool _isFilter;
+ bool _hasAttribute;
+
+public:
+ /**
+ * Create a new field info object. The id of a field acts as both
+ * an index used to iterate all fields through the index
+ * environment and as an enumeration of fields. Multiple fields
+ * owned by the same index environment may not have the same name.
+ **/
+ FieldInfo(FieldType type_in, CollectionType collection_in,
+ const string &name_in, uint32_t id_in);
+
+ /**
+ * Check if an attribute vector is available for this
+ * field. Attributes are, and therefore have attributes. Index
+ * fields may also have attributes available, or attributes may be
+ * generated on-the-fly when needed. This function will tell you
+ * whether attribute value lookup for a field will be possible.
+ *
+ *@return true if an attribute can be obtained for this field
+ **/
+ bool hasAttribute() const { return _hasAttribute; }
+
+ /**
+ * Add the power of attribute lookup to this field. This is used
+ * to verify rank features using attributes during setup. If you
+ * call this function to allow rank setup, but do not supply the
+ * needed attributes during query execution; the poo is on you.
+ **/
+ void addAttribute() { _hasAttribute = true; }
+
+ /**
+ * Obtain the type of this field
+ *
+ * @return the type of this field
+ **/
+ FieldType type() const { return _type; }
+
+ void set_data_type(DataType data_type_in) { _data_type = data_type_in; }
+ DataType get_data_type() const { return _data_type; }
+
+ /**
+ * Obtain the collection type of this field
+ *
+ * @return collection type of this field
+ **/
+ CollectionType collection() const { return _collection; }
+
+ /**
+ * Obtain the name of this field
+ *
+ * @return the name of this field
+ **/
+ const string & name() const { return _name; }
+
+ /**
+ * Obtain the id of this field
+ *
+ * @return the id of this field
+ **/
+ uint32_t id() const { return _id; }
+
+ /**
+ * Set the flag indicating whether this field should be treated as
+ * a filter field (fast searching and low complexity ranking).
+ *
+ * @param flag true if this field should be treated as a filter
+ **/
+ void setFilter(bool flag) { _isFilter = flag; }
+
+ /**
+ * Obtain the flag indicating whether this field should be treated
+ * as a filter field (fast searching and low complexity ranking).
+ *
+ * @return true if this field should be treated as a filter
+ **/
+ bool isFilter() const { return _isFilter; }
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.cpp b/searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.cpp
new file mode 100644
index 00000000000..8994c9492a7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.cpp
@@ -0,0 +1,14 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "fieldpositionsiterator.h"
+#include <vespa/searchlib/common/fslimits.h>
+
+namespace search {
+namespace fef {
+
+const uint32_t FieldPositionsIterator::UNKNOWN_LENGTH =
+SEARCHLIB_FEF_UNKNOWN_FIELD_LENGTH;
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.h b/searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.h
new file mode 100644
index 00000000000..933858b79c9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.h
@@ -0,0 +1,164 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+#include "termfieldmatchdataposition.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * Iterator used to iterate over all positions of a term inside a
+ * specific field.
+ **/
+class FieldPositionsIterator
+{
+public:
+ /**
+ * The iterator type of the underlying data, which have all
+ * positions for a term across all fields searched.
+ **/
+ typedef const TermFieldMatchDataPosition * PositionsIterator;
+
+private:
+ uint32_t _length;
+ PositionsIterator _begin;
+ PositionsIterator _pos;
+ PositionsIterator _end;
+
+public:
+ /**
+ * The length reported for fields for which we do not know the
+ * real length.
+ **/
+ static const uint32_t UNKNOWN_LENGTH;
+
+ /**
+ * Create a new iterator for a field we know nothing about. This
+ * will give the field no position data and a length of 0.
+ **/
+ FieldPositionsIterator()
+ : _length(UNKNOWN_LENGTH), _begin(0), _pos(0), _end(0) {}
+
+ /**
+ * Create a new iterator for a field with the given offset and
+ * length, using a slice of the underlying position data.
+ *
+ * @param length the length of the field in words
+ * @param begin start of position data slice
+ * @param end end of position data slice
+ **/
+ FieldPositionsIterator(uint32_t length,
+ PositionsIterator begin,
+ PositionsIterator end)
+ : _length(length), _begin(begin), _pos(begin), _end(end) {}
+
+ /**
+ * Relocate the references held by this object into the actual
+ * occurrence data. This method assumes iterators are random
+ * access and cheap to copy. This method must be invoked if the
+ * underlying occurrence data is moved in memory.
+ *
+ * @param oldRef old reference iterator
+ * @param newRef new reference iterator
+ **/
+ void relocate(PositionsIterator oldRef, PositionsIterator newRef) {
+ if (_begin != PositionsIterator(0)) {
+ _begin = newRef + (_begin - oldRef);
+ _pos = newRef + (_pos - oldRef);
+ _end = newRef + (_end - oldRef);
+ }
+ }
+
+ /**
+ * Check if there is valid data available at the current position
+ * of this iterator.
+ *
+ * @return false if no more data is available
+ **/
+ bool valid() const { return _pos != _end; }
+
+ /**
+ * Step this iterator to the next position. This method may only
+ * be invoked if the @ref valid method returns true.
+ **/
+ void next() { ++_pos; }
+
+ /**
+ * Try to step this iterator backwards. This method will return
+ * false if the iterator is already located at the beginning.
+ *
+ * @return false if we are unable to step backwards
+ **/
+ bool prev() {
+ if (_pos == _begin) {
+ return false;
+ }
+ --_pos;
+ return true;
+ }
+
+ /**
+ * Obtain the word position within the field for the entry
+ * indicated by the current position of this iterator. This method
+ * may only be invoked if the @ref valid method returns true.
+ *
+ * @return word position within the field
+ **/
+ uint32_t getPosition() const { return _pos->getPosition(); }
+
+ /**
+ * Obtain the element id within the field for the entry
+ * indicated by the current position of this iterator. This method
+ * may only be invoked if the @ref valid method returns true.
+ *
+ * @return element id within the field
+ **/
+ uint32_t getElementId() const { return _pos->getElementId(); }
+
+ /**
+ * Obtain the element length within the field for the entry
+ * indicated by the current position of this iterator. This method
+ * may only be invoked if the @ref valid method returns true.
+ *
+ * @return element id within the field
+ **/
+ uint32_t getElementLen() const { return _pos->getElementLen(); }
+
+ /**
+ * Obtain the element weight within the field for the entry
+ * indicated by the current position of this iterator. This method
+ * may only be invoked if the @ref valid method returns true.
+ *
+ * @return element id within the field
+ **/
+ int32_t getElementWeight() const { return _pos->getElementWeight(); }
+
+ /**
+ * Obtain the match exactness indicated by the current position of
+ * this iterator. This method may only be invoked if the @ref valid
+ * method returns true.
+ *
+ * @return exactness measure
+ **/
+ double getMatchExactness() const { return _pos->getMatchExactness(); }
+
+ /**
+ * Obtain the total number of words in the field.
+ *
+ * @return field length in words.
+ **/
+ uint32_t getFieldLength() const { return _length; }
+
+ /**
+ * Obtain the number of positions in this iterator.
+ *
+ * @return number of positions
+ **/
+ uint32_t size() const { return (_end - _begin); }
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/fieldtype.cpp b/searchlib/src/vespa/searchlib/fef/fieldtype.cpp
new file mode 100644
index 00000000000..39cb1be7997
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/fieldtype.cpp
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "fieldtype.h"
+
+namespace search {
+namespace fef {
+
+FieldType::FieldType(uint32_t value)
+ : _value(value)
+{
+}
+
+const FieldType FieldType::INDEX(1);
+
+const FieldType FieldType::ATTRIBUTE(2);
+
+const FieldType FieldType::HIDDEN_ATTRIBUTE(3);
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/fieldtype.h b/searchlib/src/vespa/searchlib/fef/fieldtype.h
new file mode 100644
index 00000000000..8b1b17f9801
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/fieldtype.h
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search {
+namespace fef {
+
+/**
+ * Typesafe enum used to indicate the type of a field.
+ **/
+class FieldType
+{
+private:
+ uint32_t _value;
+
+ FieldType(uint32_t value);
+public:
+ /**
+ * Indicating that the field is indexed
+ **/
+ static const FieldType INDEX;
+
+ /**
+ * Indicating that the field is kept in an attribute vector
+ **/
+ static const FieldType ATTRIBUTE;
+
+ /**
+ * Indicating that the field is kept in an attribute vector
+ **/
+ static const FieldType HIDDEN_ATTRIBUTE;
+
+ /**
+ * Less than operator; needed to be handled as a value by the standard library.
+ **/
+ bool operator<(const FieldType &rhs) const { return (_value < rhs._value); }
+
+ /**
+ * Check if two field types are equal.
+ **/
+ bool operator==(const FieldType &rhs) const { return (_value == rhs._value); }
+
+ /**
+ * Check if two field types are not equal.
+ **/
+ bool operator!=(const FieldType &rhs) const { return (_value != rhs._value); }
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/filetablefactory.cpp b/searchlib/src/vespa/searchlib/fef/filetablefactory.cpp
new file mode 100644
index 00000000000..c113efe33d9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/filetablefactory.cpp
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.filetablefactory");
+#include "filetablefactory.h"
+#include <fstream>
+
+namespace search {
+namespace fef {
+
+FileTableFactory::FileTableFactory(const vespalib::string & path) :
+ _path(path)
+{
+}
+
+Table::SP
+FileTableFactory::createTable(const vespalib::string & name) const
+{
+ vespalib::string completeName(_path);
+ completeName.append("/");
+ completeName.append(name);
+ std::ifstream file(completeName.c_str(), std::ifstream::in);
+ if (file.is_open()) {
+ Table::SP table(new Table());
+ for (;;) {
+ double val = 0;
+ file >> val;
+ if (!file.good()) {
+ break;
+ }
+ table->add(val);
+ }
+ return table;
+ }
+ LOG(warning, "Could not open file '%s' for creating table '%s'", completeName.c_str(), name.c_str());
+ return Table::SP(NULL);
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/filetablefactory.h b/searchlib/src/vespa/searchlib/fef/filetablefactory.h
new file mode 100644
index 00000000000..74e0a09e6dd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/filetablefactory.h
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "itablefactory.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * This factory class is used to instantiate tables that are stored in files on disk.
+ **/
+class FileTableFactory : public ITableFactory
+{
+private:
+ vespalib::string _path;
+
+public:
+ /**
+ * Creates a new factory for table files that are located in the given path.
+ **/
+ FileTableFactory(const vespalib::string & path);
+
+ /**
+ * Creates a table by reading the file 'path/name' and setting up a Table object.
+ * The numbers in the file should be separated with ' ' or '\n'.
+ * Table::SP(NULL) is returned if the file 'path/name' is not found.
+ **/
+ virtual Table::SP createTable(const vespalib::string & name) const;
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/functiontablefactory.cpp b/searchlib/src/vespa/searchlib/fef/functiontablefactory.cpp
new file mode 100644
index 00000000000..a901ecc90ea
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/functiontablefactory.cpp
@@ -0,0 +1,134 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.functiontablefactory");
+#include <boost/algorithm/string/split.hpp>
+#include <boost/algorithm/string/classification.hpp>
+#include <cmath>
+#include "functiontablefactory.h"
+
+namespace {
+
+void logArgumentWarning(const vespalib::string & name, size_t exp, size_t act)
+{
+ LOG(warning, "Cannot create table for function '%s'. Wrong number of arguments: expected %zu to %zu, but got %zu",
+ name.c_str(), exp, exp + 1, act);
+}
+
+}
+
+namespace search {
+namespace fef {
+
+bool
+FunctionTableFactory::checkArgs(const std::vector<vespalib::string> & args, size_t exp, size_t & tableSize) const
+{
+ if (exp <= args.size() && args.size() <= (exp + 1)) {
+ if (args.size() == (exp + 1)) {
+ tableSize = atoi(args.back().c_str());
+ } else {
+ tableSize = _defaultTableSize;
+ }
+ return true;
+ }
+ return false;
+}
+
+bool
+FunctionTableFactory::isSupported(const vespalib::string & type) const
+{
+ return (isExpDecay(type) || isLogGrowth(type) || isLinear(type));
+}
+
+Table::SP
+FunctionTableFactory::createExpDecay(double w, double t, size_t len) const
+{
+ Table::SP table(new Table());
+ for (size_t x = 0; x < len; ++x) {
+ table->add(w * exp(-(x / t)));
+ }
+ return table;
+}
+
+Table::SP
+FunctionTableFactory::createLogGrowth(double w, double t, double s, size_t len) const
+{
+ Table::SP table(new Table());
+ for (size_t x = 0; x < len; ++x) {
+ table->add(w * (log(1 + (x / s))) + t);
+ }
+ return table;
+}
+
+Table::SP
+FunctionTableFactory::createLinear(double w, double t, size_t len) const
+{
+ Table::SP table(new Table());
+ for (size_t x = 0; x < len; ++x) {
+ table->add(w * x + t);
+ }
+ return table;
+}
+
+FunctionTableFactory::FunctionTableFactory(size_t defaultTableSize) :
+ _defaultTableSize(defaultTableSize)
+{
+}
+
+Table::SP
+FunctionTableFactory::createTable(const vespalib::string & name) const
+{
+ ParsedName p;
+ if (parseFunctionName(name, p)) {
+ if (isSupported(p.type)) {
+ size_t tableSize = _defaultTableSize;
+ if (isExpDecay(p.type)) {
+ if (checkArgs(p.args, 2, tableSize)) {
+ return createExpDecay(atof(p.args[0].c_str()), atof(p.args[1].c_str()), tableSize);
+ }
+ logArgumentWarning(name, 2, p.args.size());
+ } else if (isLogGrowth(p.type)) {
+ if (checkArgs(p.args, 3, tableSize)) {
+ return createLogGrowth(atof(p.args[0].c_str()), atof(p.args[1].c_str()), atof(p.args[2].c_str()), tableSize);
+ }
+ logArgumentWarning(name, 3, p.args.size());
+ } else if (isLinear(p.type)) {
+ if (checkArgs(p.args, 2, tableSize)) {
+ return createLinear(atof(p.args[0].c_str()), atof(p.args[1].c_str()), tableSize);
+ }
+ logArgumentWarning(name, 2, p.args.size());
+ }
+ } else {
+ LOG(warning, "Cannot create table for function '%s'. Function type '%s' is not supported",
+ name.c_str(), p.type.c_str());
+ }
+ } else {
+ LOG(warning, "Cannot create table for function '%s'. Could not be parsed.", name.c_str());
+ }
+ return Table::SP(NULL);
+}
+
+bool
+FunctionTableFactory::parseFunctionName(const vespalib::string & name, ParsedName & parsed)
+{
+ size_t ps = name.find('(');
+ size_t pe = name.find(')');
+ if (ps == vespalib::string::npos || pe == vespalib::string::npos) {
+ LOG(warning, "Parse error: Did not find '(' and ')' in function name '%s'", name.c_str());
+ return false;
+ }
+ if (ps >= pe) {
+ LOG(warning, "Parse error: Found ')' before '(' in function name '%s'", name.c_str());
+ return false;
+ }
+ parsed.type = name.substr(0, ps);
+ vespalib::string args = name.substr(ps + 1, pe - ps - 1);
+ if (!args.empty()) {
+ boost::split(parsed.args, args, boost::is_any_of(","));
+ }
+ return true;
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/functiontablefactory.h b/searchlib/src/vespa/searchlib/fef/functiontablefactory.h
new file mode 100644
index 00000000000..10a610f3c04
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/functiontablefactory.h
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "itablefactory.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * This factory class is used to instantiate tables based on a function.
+ * The name of the table specifies the function and arguments to use.
+ * The following functions are supported:
+ * - expdecay(w,t) : w * exp(-x/t)
+ * - loggrowth(w,t,s) : w * log(1 + x/s) + t
+ * - linear(w,t) : w * x + t
+ * All functions support an optional last parameter for setting the table size.
+ **/
+class FunctionTableFactory : public ITableFactory
+{
+public:
+ struct ParsedName {
+ vespalib::string type;
+ std::vector<vespalib::string> args;
+ ParsedName() : type(), args() {}
+ };
+
+private:
+ size_t _defaultTableSize;
+
+ bool checkArgs(const std::vector<vespalib::string> & args, size_t exp, size_t & tableSize) const;
+ bool isSupported(const vespalib::string & type) const;
+ bool isExpDecay(const vespalib::string & type) const { return type == "expdecay"; }
+ bool isLogGrowth(const vespalib::string & type) const { return type == "loggrowth"; }
+ bool isLinear(const vespalib::string & type) const { return type == "linear"; }
+ Table::SP createExpDecay(double w, double t, size_t len) const;
+ Table::SP createLogGrowth(double w, double t, double s, size_t len) const;
+ Table::SP createLinear(double w, double t, size_t len) const;
+
+public:
+ /**
+ * Creates a new factory able to create tables with the given default size.
+ **/
+ FunctionTableFactory(size_t defaultTableSize);
+
+ /**
+ * Creates a table where the given name specifies the function and arguments to use.
+ **/
+ virtual Table::SP createTable(const vespalib::string & name) const;
+
+ /**
+ * Parses the given function name and returns true if success.
+ **/
+ static bool parseFunctionName(const vespalib::string & name, ParsedName & parsed);
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/handle.h b/searchlib/src/vespa/searchlib/fef/handle.h
new file mode 100644
index 00000000000..3e608b251b7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/handle.h
@@ -0,0 +1,17 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <stdint.h>
+
+namespace search {
+namespace fef {
+
+typedef uint32_t FeatureHandle;
+typedef uint32_t TermFieldHandle;
+
+const uint32_t IllegalHandle = 0xffffffff;
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/iblueprintregistry.h b/searchlib/src/vespa/searchlib/fef/iblueprintregistry.h
new file mode 100644
index 00000000000..20092719d2f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/iblueprintregistry.h
@@ -0,0 +1,28 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search {
+namespace fef {
+
+/**
+ * This is an interface used during plugin setup to register blueprint
+ * prototypes.
+ **/
+class IBlueprintRegistry
+{
+public:
+ /**
+ * Add a blueprint prototype to the registry.
+ **/
+ virtual void addPrototype(Blueprint::SP proto) = 0;
+
+ /**
+ * Virtual destructor to allow safe subclassing.
+ **/
+ virtual ~IBlueprintRegistry() {}
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/idumpfeaturevisitor.h b/searchlib/src/vespa/searchlib/fef/idumpfeaturevisitor.h
new file mode 100644
index 00000000000..0014dd677e4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/idumpfeaturevisitor.h
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+namespace search {
+namespace fef {
+
+/**
+ * This interface is implemented by objects that want to visit all
+ * dump features.
+ **/
+class IDumpFeatureVisitor
+{
+public:
+ /**
+ * Visit a feature that should be dumped when doing a full feature
+ * dump. Note that full feature names must be used, for example
+ * 'foo(a,b).out'.
+ *
+ * @param name full feature name
+ **/
+ virtual void visitDumpFeature(const vespalib::string &name) = 0;
+
+ /**
+ * Virtual destructor to allow safe subclassing.
+ **/
+ virtual ~IDumpFeatureVisitor() {}
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/iindexenvironment.h b/searchlib/src/vespa/searchlib/fef/iindexenvironment.h
new file mode 100644
index 00000000000..c2f489abe3b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/iindexenvironment.h
@@ -0,0 +1,125 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace fef {
+
+class Properties;
+class FieldInfo;
+class ITableManager;
+
+/**
+ * Abstract view of index related information available to the
+ * framework.
+ **/
+class IIndexEnvironment
+{
+public:
+ typedef vespalib::string string;
+ /**
+ * This enum defines the different motivations the framework has
+ * for configuring a feature blueprint. RANK means the feature is
+ * needed for ranking calculations in normal operation. DUMP means
+ * the feature is needed to perform a feature dump. VERIFY_SETUP
+ * means that we are just trying to figure out if this setup is
+ * valid; the feature will never actually be executed.
+ **/
+ enum FeatureMotivation {
+ UNKNOWN = 0,
+ RANK = 1,
+ DUMP = 2,
+ VERIFY_SETUP = 3
+ };
+
+ /**
+ * Obtain the set of properties associated with this index
+ * environment.
+ *
+ * @return properties
+ **/
+ virtual const Properties &getProperties() const = 0;
+
+ /**
+ * Obtain the number of fields
+ *
+ * @return number of fields
+ **/
+ virtual uint32_t getNumFields() const = 0;
+
+ /**
+ * Obtain a field by using the field enumeration. The legal range
+ * for id is [0, getNumFields>. If id is out of bounds, 0 will be
+ * returned.
+ *
+ * @return information about a single field
+ **/
+ virtual const FieldInfo *getField(uint32_t id) const = 0;
+
+ /**
+ * Obtain a field by using the field name. If the field is not
+ * found, 0 will be returned.
+ *
+ * @return information about a single field
+ **/
+ virtual const FieldInfo *getFieldByName(const string &name) const = 0;
+
+ /**
+ * Obtain the table manager associated with this index environment.
+ *
+ * @return table manager
+ **/
+ virtual const ITableManager &getTableManager() const = 0;
+
+ /**
+ * Obtain the current motivation behind feature setup. The
+ * motivation is typically that we want to set up features for
+ * ranking or dumping. In some cases we are also setting things up
+ * just to verify that it is possible.
+ *
+ * @return current feature motivation
+ **/
+ virtual FeatureMotivation getFeatureMotivation() const = 0;
+
+ /**
+ * Hint about the nature of the feature blueprints we are about to
+ * configure. This method provides additional information that may
+ * be useful when interpreting hints about future field and
+ * attribute access.
+ *
+ * @param motivation the motivation behind the feature blueprints
+ * the framework is about to configure.
+ **/
+ virtual void hintFeatureMotivation(FeatureMotivation motivation) const = 0;
+
+ /**
+ * Hint about the future access of a field. This method may be
+ * used by blueprints during setup to hint the enclosing system
+ * that a feature executor created by it might try to access the
+ * field iterator for a specific field during execution.
+ *
+ * @param fieldId field id
+ **/
+ virtual void hintFieldAccess(uint32_t fieldId) const = 0;
+
+ /**
+ * Hint about the future access of an attribute. This method may
+ * be used by blueprints during setup to hint the enclosing system
+ * that a feature executor created by it might try to access a
+ * specific attribute during execution.
+ *
+ * @param name attribute name
+ **/
+ virtual void hintAttributeAccess(const string &name) const = 0;
+
+ /**
+ * Virtual destructor to allow safe subclassing.
+ **/
+ virtual ~IIndexEnvironment() {}
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
new file mode 100644
index 00000000000..14d92e3a909
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
@@ -0,0 +1,373 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "indexproperties.h"
+#include "properties.h"
+#include <limits>
+
+namespace search {
+namespace fef {
+namespace indexproperties {
+
+namespace {
+
+vespalib::string
+lookupString(const Properties &props, const vespalib::string &name,
+ const vespalib::string &defaultValue)
+{
+ Property p = props.lookup(name);
+ if (p.found()) {
+ return p.get();
+ }
+ return defaultValue;
+}
+
+std::vector<vespalib::string>
+lookupStringVector(const Properties &props, const vespalib::string &name,
+ const std::vector<vespalib::string> &defaultValue)
+{
+ Property p = props.lookup(name);
+ if (p.found()) {
+ std::vector<vespalib::string> retval;
+ for (uint32_t i = 0; i < p.size(); ++i) {
+ retval.push_back(p.getAt(i));
+ }
+ return retval;
+ }
+ return defaultValue;
+}
+
+double
+lookupDouble(const Properties &props, const vespalib::string &name, double defaultValue)
+{
+ Property p = props.lookup(name);
+ if (p.found()) {
+ return strtod(p.get().c_str(), NULL);
+ }
+ return defaultValue;
+}
+
+uint32_t
+lookupUint32(const Properties &props, const vespalib::string &name, uint32_t defaultValue)
+{
+ Property p = props.lookup(name);
+ if (p.found()) {
+ return atoi(p.get().c_str());
+ }
+ return defaultValue;
+}
+
+bool
+lookupBool(const Properties &props, const vespalib::string &name, bool defaultValue)
+{
+ Property p = props.lookup(name);
+ if (p.found()) {
+ return (p.get() == "true");
+ }
+ return defaultValue;
+}
+
+bool
+checkIfTrue(const Properties &props, const vespalib::string &name,
+ const vespalib::string &defaultValue)
+{
+ return (props.lookup(name).get(defaultValue) == "true");
+}
+
+}
+
+namespace rank {
+
+const vespalib::string FirstPhase::NAME("vespa.rank.firstphase");
+const vespalib::string FirstPhase::DEFAULT_VALUE("nativeRank");
+
+vespalib::string
+FirstPhase::lookup(const Properties &props)
+{
+ return lookupString(props, NAME, DEFAULT_VALUE);
+}
+
+const vespalib::string SecondPhase::NAME("vespa.rank.secondphase");
+const vespalib::string SecondPhase::DEFAULT_VALUE("");
+
+vespalib::string
+SecondPhase::lookup(const Properties &props)
+{
+ return lookupString(props, NAME, DEFAULT_VALUE);
+}
+
+} // namespace rank
+
+namespace summary {
+
+const vespalib::string Feature::NAME("vespa.summary.feature");
+const std::vector<vespalib::string> Feature::DEFAULT_VALUE;
+
+std::vector<vespalib::string>
+Feature::lookup(const Properties &props)
+{
+ return lookupStringVector(props, NAME, DEFAULT_VALUE);
+}
+
+} // namespace summary
+
+namespace dump {
+
+const vespalib::string Feature::NAME("vespa.dump.feature");
+const std::vector<vespalib::string> Feature::DEFAULT_VALUE;
+
+std::vector<vespalib::string>
+Feature::lookup(const Properties &props)
+{
+ return lookupStringVector(props, NAME, DEFAULT_VALUE);
+}
+
+const vespalib::string IgnoreDefaultFeatures::NAME("vespa.dump.ignoredefaultfeatures");
+const vespalib::string IgnoreDefaultFeatures::DEFAULT_VALUE("false");
+
+bool
+IgnoreDefaultFeatures::check(const Properties &props)
+{
+ return checkIfTrue(props, NAME, DEFAULT_VALUE);
+}
+
+} // namespace dump
+
+namespace matching {
+
+const vespalib::string TermwiseLimit::NAME("vespa.matching.termwise_limit");
+const double TermwiseLimit::DEFAULT_VALUE(1.0);
+
+double
+TermwiseLimit::lookup(const Properties &props)
+{
+ return lookupDouble(props, NAME, DEFAULT_VALUE);
+}
+
+const vespalib::string NumThreadsPerSearch::NAME("vespa.matching.numthreadspersearch");
+const uint32_t NumThreadsPerSearch::DEFAULT_VALUE(std::numeric_limits<uint32_t>::max());
+
+uint32_t
+NumThreadsPerSearch::lookup(const Properties &props)
+{
+ return lookupUint32(props, NAME, DEFAULT_VALUE);
+}
+
+const vespalib::string NumSearchPartitions::NAME("vespa.matching.numsearchpartitions");
+const uint32_t NumSearchPartitions::DEFAULT_VALUE(1);
+
+uint32_t
+NumSearchPartitions::lookup(const Properties &props)
+{
+ return lookupUint32(props, NAME, DEFAULT_VALUE);
+}
+} // namespace matching
+
+namespace matchphase {
+
+const vespalib::string DegradationAttribute::NAME("vespa.matchphase.degradation.attribute");
+const vespalib::string DegradationAttribute::DEFAULT_VALUE("");
+
+const vespalib::string DegradationAscendingOrder::NAME("vespa.matchphase.degradation.ascendingorder");
+const bool DegradationAscendingOrder::DEFAULT_VALUE(false);
+
+const vespalib::string DegradationMaxHits::NAME("vespa.matchphase.degradation.maxhits");
+const uint32_t DegradationMaxHits::DEFAULT_VALUE(0);
+
+const vespalib::string DegradationSamplePercentage::NAME("vespa.matchphase.degradation.samplepercentage");
+const double DegradationSamplePercentage::DEFAULT_VALUE(0.2);
+
+const vespalib::string DegradationMaxFilterCoverage::NAME("vespa.matchphase.degradation.maxfiltercoverage");
+const double DegradationMaxFilterCoverage::DEFAULT_VALUE(1.0);
+
+const vespalib::string DegradationPostFilterMultiplier::NAME("vespa.matchphase.degradation.postfiltermultiplier");
+const double DegradationPostFilterMultiplier::DEFAULT_VALUE(1.0);
+
+const vespalib::string DiversityAttribute::NAME("vespa.matchphase.diversity.attribute");
+const vespalib::string DiversityAttribute::DEFAULT_VALUE("");
+
+const vespalib::string DiversityMinGroups::NAME("vespa.matchphase.diversity.mingroups");
+const uint32_t DiversityMinGroups::DEFAULT_VALUE(1);
+
+const vespalib::string DiversityCutoffFactor::NAME("vespa.matchphase.diversity.cutoff.factor");
+const double DiversityCutoffFactor::DEFAULT_VALUE(10.0);
+
+const vespalib::string DiversityCutoffStrategy::NAME("vespa.matchphase.diversity.cutoff.strategy");
+const vespalib::string DiversityCutoffStrategy::DEFAULT_VALUE("loose");
+
+vespalib::string
+DegradationAttribute::lookup(const Properties &props)
+{
+ return lookupString(props, NAME, DEFAULT_VALUE);
+}
+
+bool
+DegradationAscendingOrder::lookup(const Properties &props)
+{
+ return lookupBool(props, NAME, DEFAULT_VALUE);
+}
+
+uint32_t
+DegradationMaxHits::lookup(const Properties &props)
+{
+ return lookupUint32(props, NAME, DEFAULT_VALUE);
+}
+
+double
+DegradationSamplePercentage::lookup(const Properties &props)
+{
+ return lookupDouble(props, NAME, DEFAULT_VALUE);
+}
+
+double
+DegradationMaxFilterCoverage::lookup(const Properties &props)
+{
+ return lookupDouble(props, NAME, DEFAULT_VALUE);
+}
+
+double
+DegradationPostFilterMultiplier::lookup(const Properties &props)
+{
+ return lookupDouble(props, NAME, DEFAULT_VALUE);
+}
+
+vespalib::string
+DiversityAttribute::lookup(const Properties &props)
+{
+ return lookupString(props, NAME, DEFAULT_VALUE);
+}
+
+uint32_t
+DiversityMinGroups::lookup(const Properties &props)
+{
+ return lookupUint32(props, NAME, DEFAULT_VALUE);
+}
+
+double
+DiversityCutoffFactor::lookup(const Properties &props)
+{
+ return lookupDouble(props, NAME, DEFAULT_VALUE);
+}
+
+vespalib::string
+DiversityCutoffStrategy::lookup(const Properties &props)
+{
+ return lookupString(props, NAME, DEFAULT_VALUE);
+}
+
+
+}
+
+namespace hitcollector {
+
+const vespalib::string HeapSize::NAME("vespa.hitcollector.heapsize");
+const uint32_t HeapSize::DEFAULT_VALUE(100);
+
+uint32_t
+HeapSize::lookup(const Properties &props)
+{
+ return lookupUint32(props, NAME, DEFAULT_VALUE);
+}
+
+const vespalib::string ArraySize::NAME("vespa.hitcollector.arraysize");
+const uint32_t ArraySize::DEFAULT_VALUE(10000);
+
+uint32_t
+ArraySize::lookup(const Properties &props)
+{
+ return lookupUint32(props, NAME, DEFAULT_VALUE);
+}
+
+const vespalib::string EstimatePoint::NAME("vespa.hitcollector.estimatepoint");
+const uint32_t EstimatePoint::DEFAULT_VALUE(0xffffffff);
+
+uint32_t
+EstimatePoint::lookup(const Properties &props)
+{
+ return lookupUint32(props, NAME, DEFAULT_VALUE);
+}
+
+const vespalib::string EstimateLimit::NAME("vespa.hitcollector.estimatelimit");
+const uint32_t EstimateLimit::DEFAULT_VALUE(0xffffffff);
+
+uint32_t
+EstimateLimit::lookup(const Properties &props)
+{
+ return lookupUint32(props, NAME, DEFAULT_VALUE);
+}
+
+const vespalib::string RankScoreDropLimit::NAME("vespa.hitcollector.rankscoredroplimit");
+const feature_t RankScoreDropLimit::DEFAULT_VALUE(-std::numeric_limits<feature_t>::quiet_NaN());
+
+feature_t
+RankScoreDropLimit::lookup(const Properties &props)
+{
+ return lookupDouble(props, NAME, DEFAULT_VALUE);
+}
+
+} // namspace hitcollector
+
+
+const vespalib::string FieldWeight::BASE_NAME("vespa.fieldweight.");
+const uint32_t FieldWeight::DEFAULT_VALUE(100);
+
+uint32_t
+FieldWeight::lookup(const Properties &props, const vespalib::string &fieldName)
+{
+ return lookupUint32(props, BASE_NAME + fieldName, DEFAULT_VALUE);
+}
+
+
+const vespalib::string IsFilterField::BASE_NAME("vespa.isfilterfield.");
+const vespalib::string IsFilterField::DEFAULT_VALUE("false");
+
+void
+IsFilterField::set(Properties &props, const vespalib::string &fieldName)
+{
+ props.add(BASE_NAME + fieldName, "true");
+}
+
+bool
+IsFilterField::check(const Properties &props, const vespalib::string &fieldName)
+{
+ return checkIfTrue(props, BASE_NAME + fieldName, DEFAULT_VALUE);
+}
+
+
+namespace type {
+
+const vespalib::string Attribute::BASE_NAME("vespa.type.attribute.");
+const vespalib::string Attribute::DEFAULT_VALUE("");
+
+vespalib::string
+Attribute::lookup(const Properties &props, const vespalib::string &attributeName)
+{
+ return lookupString(props, BASE_NAME + attributeName, DEFAULT_VALUE);
+}
+
+void
+Attribute::set(Properties &props, const vespalib::string &attributeName, const vespalib::string &type)
+{
+ props.add(BASE_NAME + attributeName, type);
+}
+
+const vespalib::string QueryFeature::BASE_NAME("vespa.type.query.");
+const vespalib::string QueryFeature::DEFAULT_VALUE("");
+
+vespalib::string
+QueryFeature::lookup(const Properties &props, const vespalib::string &queryFeatureName)
+{
+ return lookupString(props, BASE_NAME + queryFeatureName, DEFAULT_VALUE);
+}
+
+void
+QueryFeature::set(Properties &props, const vespalib::string &queryFeatureName, const vespalib::string &type)
+{
+ props.add(BASE_NAME + queryFeatureName, type);
+}
+
+} // namespace type
+
+} // namespace indexproperties
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h
new file mode 100644
index 00000000000..8dcd08dfc49
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h
@@ -0,0 +1,307 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <vector>
+#include <vespa/searchlib/common/feature.h>
+
+namespace search {
+namespace fef {
+
+class Properties;
+
+/**
+ * This namespace is a placeholder for several structs, each representing
+ * an index property with name and default value. All property names
+ * defined here will have the prefix "vespa." and are known by the
+ * feature execution framework. When accessing an index property from a @ref Properties
+ * instance one should use the property names defined here to perform the lookup.
+ * If the property is not present the default value is used.
+ **/
+namespace indexproperties {
+
+namespace rank {
+
+ /**
+ * Property for the feature name used for first phase rank.
+ **/
+ struct FirstPhase {
+ static const vespalib::string NAME;
+ static const vespalib::string DEFAULT_VALUE;
+ static vespalib::string lookup(const Properties &props);
+ };
+
+ /**
+ * Property for the feature name used for second phase rank.
+ **/
+ struct SecondPhase {
+ static const vespalib::string NAME;
+ static const vespalib::string DEFAULT_VALUE;
+ static vespalib::string lookup(const Properties &props);
+ };
+
+} // namespace rank
+
+namespace summary {
+
+ /**
+ * Property for the set of features to be inserted into the
+ * summaryfeatures docsum field
+ **/
+ struct Feature {
+ static const vespalib::string NAME;
+ static const std::vector<vespalib::string> DEFAULT_VALUE;
+ static std::vector<vespalib::string> lookup(const Properties &props);
+ };
+
+} // namespace summary
+
+namespace dump {
+
+ /**
+ * Property for the set of feature names used for dumping.
+ **/
+ struct Feature {
+ static const vespalib::string NAME;
+ static const std::vector<vespalib::string> DEFAULT_VALUE;
+ static std::vector<vespalib::string> lookup(const Properties &props);
+ };
+
+ /**
+ * Property that may be used to ignore default rank features when
+ * dumping.
+ **/
+ struct IgnoreDefaultFeatures {
+ static const vespalib::string NAME;
+ static const vespalib::string DEFAULT_VALUE;
+ static bool check(const Properties &props);
+ };
+
+} // namespace dump
+
+namespace matching {
+
+ /**
+ * A number in the range [0,1] indicating how much of the corpus
+ * the query must match for termwise evaluation to be enabled. 1
+ * means never allowed. 0 means always allowed. The default value
+ * is 1 (never).
+ **/
+ struct TermwiseLimit {
+ static const vespalib::string NAME;
+ static const double DEFAULT_VALUE;
+ static double lookup(const Properties &props);
+ };
+
+ /**
+ * Property for the number of threads used per search.
+ **/
+ struct NumThreadsPerSearch {
+ static const vespalib::string NAME;
+ static const uint32_t DEFAULT_VALUE;
+ static uint32_t lookup(const Properties &props);
+ };
+ /**
+ * Property for the number of threads used per search.
+ **/
+ struct NumSearchPartitions {
+ static const vespalib::string NAME;
+ static const uint32_t DEFAULT_VALUE;
+ static uint32_t lookup(const Properties &props);
+ };
+}
+
+namespace matchphase {
+
+ /**
+ * Property for the attribute used for graceful degradation during match phase.
+ **/
+ struct DegradationAttribute {
+ static const vespalib::string NAME;
+ static const vespalib::string DEFAULT_VALUE;
+ static vespalib::string lookup(const Properties &props);
+ };
+
+ /**
+ * Property for the order used for graceful degradation during match phase.
+ **/
+ struct DegradationAscendingOrder {
+ static const vespalib::string NAME;
+ static const bool DEFAULT_VALUE;
+ static bool lookup(const Properties &props);
+ };
+
+ /**
+ * Property for how many hits the used wanted for graceful degradation during match phase.
+ **/
+ struct DegradationMaxHits {
+ static const vespalib::string NAME;
+ static const uint32_t DEFAULT_VALUE;
+ static uint32_t lookup(const Properties &props);
+ };
+
+ /**
+ * Property for how many hits out of wanted hits to collect before considering graceful degradation during match phase.
+ **/
+ struct DegradationSamplePercentage {
+ static const vespalib::string NAME;
+ static const double DEFAULT_VALUE;
+ static double lookup(const Properties &props);
+ };
+
+ struct DegradationMaxFilterCoverage {
+ static const vespalib::string NAME;
+ static const double DEFAULT_VALUE;
+ static double lookup(const Properties &props);
+ };
+
+ /**
+ * Property for moving the swithpoint between pre and post filtering.
+ * > 1 favors pre filtering, less favour post filtering
+ **/
+ struct DegradationPostFilterMultiplier {
+ static const vespalib::string NAME;
+ static const double DEFAULT_VALUE;
+ static double lookup(const Properties &props);
+ };
+
+ /**
+ * The name of the attribute used to ensure result diversity
+ * during match phase limiting. If this property is "" (empty
+ * string; the default) diversity will be disabled.
+ **/
+ struct DiversityAttribute {
+ static const vespalib::string NAME;
+ static const vespalib::string DEFAULT_VALUE;
+ static vespalib::string lookup(const Properties &props);
+ };
+
+ /**
+ * If we were to later group on the diversity attribute, try not
+ * to end up with fewer groups than this number. If this property
+ * is 1 (the default) diversity will be disabled.
+ **/
+ struct DiversityMinGroups {
+ static const vespalib::string NAME;
+ static const uint32_t DEFAULT_VALUE;
+ static uint32_t lookup(const Properties &props);
+ };
+
+ struct DiversityCutoffFactor {
+ static const vespalib::string NAME;
+ static const double DEFAULT_VALUE;
+ static double lookup(const Properties &props);
+ };
+ struct DiversityCutoffStrategy {
+ static const vespalib::string NAME;
+ static const vespalib::string DEFAULT_VALUE;
+ static vespalib::string lookup(const Properties &props);
+ };
+
+} // namespace matchphase
+
+
+namespace hitcollector {
+
+ /**
+ * Property for the heap size used in the hit collector.
+ **/
+ struct HeapSize {
+ static const vespalib::string NAME;
+ static const uint32_t DEFAULT_VALUE;
+ static uint32_t lookup(const Properties &props);
+ };
+
+ /**
+ * Property for the array size used in the hit collector.
+ **/
+ struct ArraySize {
+ static const vespalib::string NAME;
+ static const uint32_t DEFAULT_VALUE;
+ static uint32_t lookup(const Properties &props);
+ };
+
+ /**
+ * Property for the estimate point used in parallel query evaluation.
+ * Specifies when to estimate the total number of hits.
+ **/
+ struct EstimatePoint {
+ static const vespalib::string NAME;
+ static const uint32_t DEFAULT_VALUE;
+ static uint32_t lookup(const Properties &props);
+ };
+
+ /**
+ * Property for the estimate limit used in parallel query evaluation.
+ * Specifies the limit for a hit estimate. If the estimate is above the limit abort ranking.
+ **/
+ struct EstimateLimit {
+ static const vespalib::string NAME;
+ static const uint32_t DEFAULT_VALUE;
+ static uint32_t lookup(const Properties &props);
+ };
+
+ /**
+ * Property for the rank score drop limit used in parallel query evaluation.
+ * Drop a hit if the rank score <= drop limit.
+ **/
+ struct RankScoreDropLimit {
+ static const vespalib::string NAME;
+ static const feature_t DEFAULT_VALUE;
+ static feature_t lookup(const Properties &props);
+ };
+
+
+} // namespace hitcollector
+
+/**
+ * Property for the field weight of a field.
+ **/
+struct FieldWeight {
+ static const vespalib::string BASE_NAME;
+ static const uint32_t DEFAULT_VALUE;
+ static uint32_t lookup(const Properties &props, const vespalib::string &fieldName);
+};
+
+/**
+ * Property for whether a field is a filter field.
+ **/
+struct IsFilterField {
+ static const vespalib::string BASE_NAME;
+ static const vespalib::string DEFAULT_VALUE;
+ static void set(Properties &props, const vespalib::string &fieldName);
+ static bool check(const Properties &props, const vespalib::string &fieldName);
+};
+
+namespace type {
+
+/**
+ * Property for the type of an attribute.
+ * Currently, only tensor types are specified using this.
+ */
+struct Attribute {
+ static const vespalib::string BASE_NAME;
+ static const vespalib::string DEFAULT_VALUE;
+ static vespalib::string lookup(const Properties &props, const vespalib::string &attributeName);
+ static void set(Properties &props, const vespalib::string &attributeName, const vespalib::string &type);
+};
+
+/**
+ * Property for the type of a query feature.
+ * Currently, only tensor types are specified using this.
+ */
+struct QueryFeature {
+ static const vespalib::string BASE_NAME;
+ static const vespalib::string DEFAULT_VALUE;
+ static vespalib::string lookup(const Properties &props, const vespalib::string &queryFeatureName);
+ static void set(Properties &props, const vespalib::string &queryFeatureName, const vespalib::string &type);
+};
+
+} // namespace type
+
+
+} // namespace indexproperties
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/iqueryenvironment.h b/searchlib/src/vespa/searchlib/fef/iqueryenvironment.h
new file mode 100644
index 00000000000..b84782995d2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/iqueryenvironment.h
@@ -0,0 +1,94 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iindexenvironment.h"
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vespa/searchlib/fef/objectstore.h>
+
+namespace search {
+namespace fef {
+
+class Location;
+class Properties;
+class ITermData;
+
+/**
+ * Abstract view of query related information available to the
+ * framework.
+ **/
+class IQueryEnvironment
+{
+public:
+ /**
+ * Convenience typedef.
+ **/
+ typedef std::shared_ptr<IQueryEnvironment> SP;
+
+ /**
+ * Obtain the set of properties associated with this query
+ * environment. This set of properties is known through the system
+ * as 'rankProperties', and is tagged with the name 'rank' when
+ * propagated down through the system.
+ *
+ * @return properties
+ **/
+ virtual const Properties &getProperties() const = 0;
+
+ /**
+ * Obtain the number of ranked terms in the query. The order of the
+ * terms are not yet strongly defined.
+ *
+ * @return number of ranked terms in the query
+ **/
+ virtual uint32_t getNumTerms() const = 0;
+
+ /**
+ * Obtain information about a single ranked term in the query. If
+ * idx is out of bounds, 0 will be returned.
+ *
+ * TODO: this must return an ordering that corresponds to the connexity of the term data.
+ * TODO: any other ordering seems inappropriate when we offer connexity as an attribute of
+ * TODO: the term data.
+ *
+ * @return information about a ranked term
+ * @param idx the term we want information about
+ **/
+ virtual const ITermData *getTerm(uint32_t idx) const = 0;
+
+ /**
+ * Obtain the location information associated with this query environment.
+ *
+ * @return location object.
+ **/
+ virtual const Location & getLocation() const = 0;
+
+ /**
+ * Returns the attribute context for this query.
+ *
+ * @return attribute context
+ **/
+ virtual const search::attribute::IAttributeContext & getAttributeContext() const = 0;
+
+ /**
+ * Returns a const view of the index environment.
+ *
+ * @return index environment
+ **/
+ virtual const IIndexEnvironment & getIndexEnvironment() const = 0;
+
+ /**
+ * Virtual destructor to allow safe subclassing.
+ **/
+ virtual ~IQueryEnvironment() { }
+
+ IObjectStore & getObjectStore() { return _objectStore; }
+ const IObjectStore & getObjectStore() const { return _objectStore; }
+protected:
+ IQueryEnvironment() { }
+private:
+ ObjectStore _objectStore;
+};
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/itablefactory.h b/searchlib/src/vespa/searchlib/fef/itablefactory.h
new file mode 100644
index 00000000000..b2c1146d7d9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/itablefactory.h
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include "table.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * This is an interface for a factory used to create tables.
+ **/
+class ITableFactory
+{
+public:
+ /**
+ * Convenience typedef for a shared pointer to this class.
+ **/
+ typedef std::shared_ptr<ITableFactory> SP;
+
+ /**
+ * Creates a table with the given name.
+ * Table::SP(NULL) is returned if the table cannot be created.
+ **/
+ virtual Table::SP createTable(const vespalib::string & name) const = 0;
+
+ /**
+ * Virtual destructor to allow safe subclassing.
+ **/
+ virtual ~ITableFactory() {}
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/itablemanager.h b/searchlib/src/vespa/searchlib/fef/itablemanager.h
new file mode 100644
index 00000000000..e84c0d50db4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/itablemanager.h
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "table.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * This is an interface used to access registered tables.
+ **/
+class ITableManager
+{
+public:
+ /**
+ * Returns a const view of the table with the given name or NULL if not found.
+ **/
+ virtual const Table * getTable(const vespalib::string & name) const = 0;
+
+ /**
+ * Virtual destructor to allow safe subclassing.
+ **/
+ virtual ~ITableManager() {}
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/itermdata.h b/searchlib/src/vespa/searchlib/fef/itermdata.h
new file mode 100644
index 00000000000..f8e1cf9c0c8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/itermdata.h
@@ -0,0 +1,89 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "handle.h"
+#include "itermfielddata.h"
+#include <vespa/searchlib/query/weight.h>
+
+namespace search {
+namespace fef {
+
+/**
+ * Interface to static match data for a single unit (term/phrase/etc).
+ **/
+class ITermData
+{
+protected:
+ virtual ~ITermData() {}
+
+public:
+ /**
+ * Returns the term weight.
+ **/
+ virtual query::Weight getWeight() const = 0;
+
+ /**
+ * Returns the number of terms represented by this term data object.
+ **/
+ virtual uint32_t getPhraseLength() const = 0;
+
+ /**
+ * Obtain the location of this term in the original user query.
+ *
+ * @return term index
+ **/
+ virtual uint32_t getTermIndex() const = 0;
+
+ /**
+ * Obtain the unique id of this term. 0 means not set.
+ *
+ * @return unique id or 0
+ **/
+ virtual uint32_t getUniqueId() const = 0;
+
+ /**
+ * Get number of fields searched
+ **/
+ virtual size_t numFields() const = 0;
+
+ /**
+ * Direct access to data for individual fields
+ * @param i local index, must have: 0 <= i < numFields()
+ */
+ virtual const ITermFieldData &field(size_t i) const = 0;
+
+ /**
+ * Obtain information about a specific field that may be searched
+ * by this term. If the requested field is not searched by this
+ * term, NULL will be returned.
+ *
+ * @param fieldId global field ID
+ * @return term field data, or NULL if not found
+ **/
+ virtual const ITermFieldData *lookupField(uint32_t fieldId) const = 0;
+};
+
+/**
+ * convenience adapter for easy iteration
+ **/
+class ITermFieldRangeAdapter
+{
+ const ITermData& _ref;
+ size_t _idx;
+ size_t _lim;
+public:
+ explicit ITermFieldRangeAdapter(const ITermData& ref)
+ : _ref(ref), _idx(0), _lim(ref.numFields())
+ {}
+
+ bool valid() const { return (_idx < _lim); }
+
+ const ITermFieldData& get() const { return _ref.field(_idx); }
+
+ void next() { assert(valid()); ++_idx; }
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/itermfielddata.h b/searchlib/src/vespa/searchlib/fef/itermfielddata.h
new file mode 100644
index 00000000000..f86bba1af4a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/itermfielddata.h
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "handle.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * Interface to information about a single field that is being
+ * searched for a term (described by the ITermData interface). The
+ * field may be either an index field or an attribute field. If more
+ * information about the field is needed, the field id may be used to
+ * consult the index environment.
+ **/
+class ITermFieldData
+{
+protected:
+ virtual ~ITermFieldData() {}
+
+public:
+ /**
+ * Obtain the global field id.
+ *
+ * @return field id
+ **/
+ virtual uint32_t getFieldId() const = 0;
+
+ /**
+ * Obtain the document frequency. This is a value between 0 and 1
+ * indicating the ratio of the matching documents to the corpus.
+ *
+ * @return document frequency
+ **/
+ virtual double getDocFreq() const = 0;
+
+ /**
+ * Obtain the match handle for this field.
+ *
+ * @return match handle (or IllegalHandle)
+ **/
+ virtual TermFieldHandle getHandle() const = 0;
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/location.cpp b/searchlib/src/vespa/searchlib/fef/location.cpp
new file mode 100644
index 00000000000..9bed7305a1e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/location.cpp
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "location.h"
+
+namespace search {
+namespace fef {
+
+Location::Location() :
+ _attr(),
+ _xPos(0),
+ _yPos(0),
+ _xAspect(0),
+ _valid(false)
+{
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/location.h b/searchlib/src/vespa/searchlib/fef/location.h
new file mode 100644
index 00000000000..cfb66016cd5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/location.h
@@ -0,0 +1,111 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace fef {
+
+/**
+ * This class contains location data that is associated with a query.
+ **/
+class Location
+{
+private:
+ vespalib::string _attr;
+ int32_t _xPos;
+ int32_t _yPos;
+ uint32_t _xAspect;
+ bool _valid;
+
+public:
+ /**
+ * Creates an empty object.
+ **/
+ Location();
+
+ /**
+ * Sets the name of the attribute to use for x positions.
+ *
+ * @param xAttr the attribute name.
+ * @return this to allow chaining.
+ **/
+ Location &
+ setAttribute(const vespalib::string & attr)
+ {
+ _attr = attr;
+ return *this;
+ }
+
+ /**
+ * Returns the name of the attribute to use for x positions.
+ *
+ * @return the attribute name.
+ **/
+ const vespalib::string & getAttribute() const { return _attr; }
+
+ /**
+ * Sets the x position of this location.
+ *
+ * @param xPos the x position.
+ * @return this to allow chaining.
+ **/
+ Location & setXPosition(int32_t xPos) { _xPos = xPos; return *this; }
+
+ /**
+ * Returns the x position of this location.
+ *
+ * @return the x position.
+ **/
+ int32_t getXPosition() const { return _xPos; }
+
+ /**
+ * Sets the y position of this location.
+ *
+ * @param yPos the y position.
+ * @return this to allow chaining.
+ **/
+ Location & setYPosition(int32_t yPos) { _yPos = yPos; return *this; }
+
+ /**
+ * Returns the y position of this location.
+ *
+ * @return the y position.
+ **/
+ int32_t getYPosition() const { return _yPos; }
+
+ /**
+ * Sets the x distance multiplier fraction.
+ *
+ * @param xAspect the x aspect.
+ * @return this to allow chaining.
+ **/
+ Location & setXAspect(uint32_t xAspect) { _xAspect = xAspect; return *this; }
+
+ /**
+ * Returns the x distance multiplier fraction.
+ *
+ * @return the x aspect.
+ **/
+ uint32_t getXAspect() const { return _xAspect; }
+
+ /**
+ * Sets whether this is a valid location object.
+ *
+ * @param valid true if this is valid.
+ * @return this to allow chaining.
+ **/
+ Location & setValid(bool valid) { _valid = valid; return *this; }
+
+ /**
+ * Returns whether this is a valid location object.
+ *
+ * @param true if this is a valid.
+ **/
+ bool isValid() const { return _valid; }
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/matchdata.cpp b/searchlib/src/vespa/searchlib/fef/matchdata.cpp
new file mode 100644
index 00000000000..4dc411dce72
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/matchdata.cpp
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "matchdata.h"
+#include <math.h>
+
+namespace search {
+namespace fef {
+
+MatchData::MatchData(const Params &cparams)
+ : _docid(TermFieldMatchData::invalidId()),
+ _termFields(cparams.numTermFields()),
+ _features(cparams.numFeatures()),
+ _feature_is_object(cparams.numFeatures(), false),
+ _termwise_limit(1.0)
+{
+}
+
+MatchData::UP
+MatchData::makeTestInstance(uint32_t numFeatures, uint32_t numHandles, uint32_t fieldIdLimit)
+{
+ MatchData::UP data(new MatchData(Params().numFeatures(numFeatures).numTermFields(numHandles)));
+ for (uint32_t i = 0; i < numHandles; ++i) {
+ data->resolveTermField(i)->setFieldId(i % fieldIdLimit);
+ }
+ return data;
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/matchdata.h b/searchlib/src/vespa/searchlib/fef/matchdata.h
new file mode 100644
index 00000000000..1f836eddfdc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/matchdata.h
@@ -0,0 +1,181 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "handle.h"
+#include "termfieldmatchdata.h"
+#include <vespa/searchlib/common/feature.h>
+#include <memory>
+#include <vector>
+#include <vespa/vespalib/eval/value.h>
+
+namespace search {
+namespace fef {
+
+/**
+ * An object of this class is used to store all basic data and derived
+ * features for a single hit.
+ **/
+class MatchData
+{
+private:
+ union NumberOrObject {
+ feature_t as_number;
+ vespalib::eval::Value::CREF as_object;
+ NumberOrObject() { memset(this, 0, sizeof(NumberOrObject)); }
+ ~NumberOrObject() {}
+ };
+ uint32_t _docid;
+ std::vector<TermFieldMatchData> _termFields;
+ std::vector<NumberOrObject> _features;
+ std::vector<bool> _feature_is_object;
+ double _termwise_limit;
+
+public:
+ /**
+ * Wrapper for constructor parameters
+ **/
+ class Params
+ {
+ private:
+ uint32_t _numTermFields;
+ uint32_t _numFeatures;
+
+ friend class ::search::fef::MatchData;
+ Params() : _numTermFields(0), _numFeatures(0) {}
+ public:
+ uint32_t numTermFields() const { return _numTermFields; }
+ Params & numTermFields(uint32_t value) {
+ _numTermFields = value;
+ return *this;
+ }
+
+ uint32_t numFeatures() const { return _numFeatures; }
+ Params & numFeatures(uint32_t value) {
+ _numFeatures = value;
+ return *this;
+ }
+ };
+ /**
+ * Avoid C++'s most vexing parse problem.
+ * (reference: http://www.amazon.com/dp/0201749629/)
+ **/
+ static Params params() { return Params(); }
+
+ /**
+ * Convenience typedef for an auto-pointer to this class.
+ **/
+ typedef std::unique_ptr<MatchData> UP;
+
+ /**
+ * Create a new object with the given number of term, attribute, and feature
+ * slots.
+ *
+ * @param numTerms number of term slots
+ * @param numAttributes number of attribute slots
+ * @param numFeatures number of feature slots
+ **/
+ explicit MatchData(const Params &cparams);
+
+ MatchData(const MatchData &rhs) = delete;
+ MatchData & operator=(const MatchData &rhs) = delete;
+
+ /**
+ * A number in the range [0,1] indicating how much of the corpus
+ * the query must match for termwise evaluation to be enabled. 1
+ * means never allowed. 0 means always allowed. The initial value
+ * is 1 (never). This value is used when creating a search
+ * (queryeval::Blueprint::createSearch).
+ **/
+ double get_termwise_limit() const { return _termwise_limit; }
+ void set_termwise_limit(double value) { _termwise_limit = value; }
+
+ /**
+ * Set the document id for this match object. This method is
+ * invoked by the parallel query evaluation driver code during
+ * term data unpacking.
+ *
+ * @param docid docid for this match data
+ **/
+ void setDocId(uint32_t docid) { _docid = docid; }
+
+ /**
+ * Obtain the document id for this match data. This may be used to
+ * check if we have term match data for the document we are
+ * processing or not. Also, it will be used when merging hits from
+ * the heap back into the full result set.
+ *
+ * @return document id for this match data
+ **/
+ uint32_t getDocId() const { return _docid; }
+
+ /**
+ * Obtain the number of term fields allocated in this match data
+ * structure.
+ *
+ * @return number of term fields allocated
+ **/
+ uint32_t getNumTermFields() const { return _termFields.size(); }
+
+ /**
+ * Obtain the number of features allocated in this match data
+ * structure.
+ *
+ * @return number of features allocated
+ **/
+ uint32_t getNumFeatures() const { return _features.size(); }
+
+ /**
+ * Resolve a term field handle into a pointer to the actual data.
+ *
+ * @return term field match data
+ * @param handle term field handle
+ **/
+ TermFieldMatchData *resolveTermField(TermFieldHandle handle) { return &_termFields[handle]; }
+
+ /**
+ * Resolve a term field handle into a pointer to the actual data.
+ *
+ * @return term field match data
+ * @param handle term field handle
+ **/
+ const TermFieldMatchData *resolveTermField(TermFieldHandle handle) const { return &_termFields[handle]; }
+
+ /**
+ * Resolve a feature handle into a pointer to the actual data.
+ * This is used to resolve both {@link FeatureExecutor#inputs}
+ * and {@link FeatureExecutor#outputs}.
+ *
+ * @return feature location
+ * @param handle feature handle
+ **/
+ feature_t *resolveFeature(FeatureHandle handle) { return &_features[handle].as_number; }
+
+ /**
+ * Resolve a feature handle into a pointer to the actual data.
+ * This is used to resolve both {@link FeatureExecutor#inputs}
+ * and {@link FeatureExecutor#outputs}.
+ *
+ * @return feature location
+ * @param handle feature handle
+ **/
+ const feature_t *resolveFeature(FeatureHandle handle) const { return &_features[handle].as_number; }
+
+ void tag_feature_as_object(FeatureHandle handle) { _feature_is_object[handle] = true; }
+ bool feature_is_object(FeatureHandle handle) const { return _feature_is_object[handle]; }
+
+ vespalib::eval::Value::CREF *resolve_object_feature(FeatureHandle handle) {
+ assert(_feature_is_object[handle]);
+ return &_features[handle].as_object;
+ }
+
+ const vespalib::eval::Value::CREF *resolve_object_feature(FeatureHandle handle) const {
+ assert(_feature_is_object[handle]);
+ return &_features[handle].as_object;
+ }
+
+ static MatchData::UP makeTestInstance(uint32_t numFeatures, uint32_t numHandles, uint32_t fieldIdLimit);
+};
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/matchdatalayout.cpp b/searchlib/src/vespa/searchlib/fef/matchdatalayout.cpp
new file mode 100644
index 00000000000..64070006b59
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/matchdatalayout.cpp
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "matchdatalayout.h"
+
+namespace search {
+namespace fef {
+
+MatchDataLayout::MatchDataLayout()
+ : _numTermFields(0),
+ _numFeatures(0),
+ _fieldIds(),
+ _object_features()
+{
+}
+
+MatchData::UP
+MatchDataLayout::createMatchData() const
+{
+ MatchData::UP md(new MatchData(MatchData::params()
+ .numTermFields(_numTermFields)
+ .numFeatures(_numFeatures)));
+
+ assert(_numTermFields == _fieldIds.size());
+ for (size_t i = 0; i < _numTermFields; ++i) {
+ md->resolveTermField(i)->setFieldId(_fieldIds[i]);
+ }
+ for (FeatureHandle object_handle: _object_features) {
+ md->tag_feature_as_object(object_handle);
+ }
+ return md;
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/matchdatalayout.h b/searchlib/src/vespa/searchlib/fef/matchdatalayout.h
new file mode 100644
index 00000000000..5b8240d3caa
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/matchdatalayout.h
@@ -0,0 +1,64 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "handle.h"
+#include "matchdata.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * This class is used to describe the layout of term match data and
+ * features within MatchData objects for a single query.
+ **/
+class MatchDataLayout
+{
+private:
+ uint32_t _numTermFields;
+ uint32_t _numFeatures;
+ std::vector<uint32_t> _fieldIds;
+ std::vector<FeatureHandle> _object_features;
+
+public:
+ /**
+ * Create an empty object.
+ **/
+ MatchDataLayout();
+
+ /**
+ * Allocate space for a term field match data structure.
+ *
+ * @param fieldId the field ID the space will be used for
+ * @return handle to be used with match data objects
+ **/
+ TermFieldHandle allocTermField(uint32_t fieldId) {
+ _fieldIds.push_back(fieldId);
+ return _numTermFields++;
+ }
+
+ /**
+ * Allocate space for a feature.
+ *
+ * @return handle to be used with match data objects
+ **/
+ FeatureHandle allocFeature(bool is_object = false) {
+ if (is_object) {
+ _object_features.push_back(_numFeatures);
+ }
+ return _numFeatures++;
+ }
+
+ /**
+ * Create a match data object with the layout described by this
+ * object. Note that this method should only be invoked after all
+ * terms and features have been allocated.
+ *
+ * @return auto-pointer to a match data object
+ **/
+ MatchData::UP createMatchData() const;
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/objectstore.cpp b/searchlib/src/vespa/searchlib/fef/objectstore.cpp
new file mode 100644
index 00000000000..9e7aa5d9b81
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/objectstore.cpp
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/searchlib/fef/objectstore.h>
+
+namespace search {
+namespace fef {
+
+ObjectStore::ObjectStore() :
+ _objectMap()
+{
+}
+
+ObjectStore::~ObjectStore()
+{
+ for(auto & it : _objectMap) {
+ delete it.second;
+ it.second = NULL;
+ }
+}
+
+void
+ObjectStore::add(const vespalib::string & key, Anything::UP value)
+{
+ ObjectMap::iterator found = _objectMap.find(key);
+ if (found != _objectMap.end()) {
+ delete found->second;
+ found->second = NULL;
+ }
+ _objectMap[key] = value.release();
+}
+
+const Anything *
+ObjectStore::get(const vespalib::string & key) const
+{
+ ObjectMap::const_iterator found = _objectMap.find(key);
+ return (found != _objectMap.end()) ? found->second : NULL;
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/fef/objectstore.h b/searchlib/src/vespa/searchlib/fef/objectstore.h
new file mode 100644
index 00000000000..457371c4ebf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/objectstore.h
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/stllike/hash_map.h>
+
+namespace search {
+namespace fef {
+
+class Anything
+{
+public:
+ typedef std::unique_ptr<Anything> UP;
+ virtual ~Anything() { }
+};
+
+class IObjectStore
+{
+public:
+ virtual ~IObjectStore() { }
+ virtual void add(const vespalib::string & key, Anything::UP value) = 0;
+ virtual const Anything * get(const vespalib::string & key) const = 0;
+};
+
+class ObjectStore : public IObjectStore
+{
+public:
+ ObjectStore();
+ virtual ~ObjectStore();
+ virtual void add(const vespalib::string & key, Anything::UP value);
+ virtual const Anything * get(const vespalib::string & key) const;
+private:
+ typedef vespalib::hash_map<vespalib::string, Anything *> ObjectMap;
+ ObjectMap _objectMap;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/fef/parameter.cpp b/searchlib/src/vespa/searchlib/fef/parameter.cpp
new file mode 100644
index 00000000000..583061ad383
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/parameter.cpp
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "parameter.h"
+
+namespace search {
+namespace fef {
+
+Parameter::Parameter(ParameterType::Enum type, const vespalib::string & value) :
+ _type(type),
+ _stringVal(value),
+ _doubleVal(0),
+ _intVal(0),
+ _fieldVal(NULL)
+{
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/parameter.h b/searchlib/src/vespa/searchlib/fef/parameter.h
new file mode 100644
index 00000000000..049ea7f76ab
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/parameter.h
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include "fieldinfo.h"
+#include "parameterdescriptions.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * This class represents a parameter with type and value.
+ * You can use convenience functions to access the parameter value as different types.
+ */
+class Parameter {
+private:
+ ParameterType::Enum _type;
+ vespalib::string _stringVal;
+ double _doubleVal;
+ int64_t _intVal;
+ const search::fef::FieldInfo * _fieldVal;
+
+public:
+ Parameter(ParameterType::Enum type, const vespalib::string & value);
+ Parameter & setDouble(double val) { _doubleVal = val; return *this; }
+ Parameter & setInteger(int64_t val) { _intVal = val; return *this; }
+ Parameter & setField(const search::fef::FieldInfo * val) { _fieldVal = val; return *this; }
+ ParameterType::Enum getType() const { return _type; }
+ const vespalib::string & getValue() const { return _stringVal; }
+ double asDouble() const { return _doubleVal; }
+ int64_t asInteger() const { return _intVal; }
+ const search::fef::FieldInfo * asField() const { return _fieldVal; }
+};
+
+typedef std::vector<Parameter> ParameterList;
+
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/parameterdescriptions.cpp b/searchlib/src/vespa/searchlib/fef/parameterdescriptions.cpp
new file mode 100644
index 00000000000..bcaf75450c8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/parameterdescriptions.cpp
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "parameterdescriptions.h"
+
+namespace search {
+namespace fef {
+
+ParameterDescriptions::Description::Description(size_t tag) :
+ _tag(tag),
+ _params(),
+ _repeat(0)
+{
+}
+
+ParamDescItem
+ParameterDescriptions::Description::getParam(size_t i) const
+{
+ if (i < _params.size()) {
+ return _params[i];
+ }
+ size_t offset = (i - _params.size()) % _repeat;
+ size_t realIndex = _params.size() - _repeat + offset;
+ return _params[realIndex];
+}
+
+ParameterDescriptions::ParameterDescriptions() :
+ _descriptions(),
+ _nextTag(0)
+{
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/parameterdescriptions.h b/searchlib/src/vespa/searchlib/fef/parameterdescriptions.h
new file mode 100644
index 00000000000..df1e1f75f73
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/parameterdescriptions.h
@@ -0,0 +1,197 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+
+namespace search {
+namespace fef {
+
+/**
+ * Represents the type of a parameter.
+ */
+struct ParameterType {
+ enum Enum {
+ NONE,
+ FIELD, // for match information in a field
+ INDEX_FIELD, // for match information in an index field
+ ATTRIBUTE_FIELD, // for match information in an attribute field
+ ATTRIBUTE, // for accessing an attribute vector
+ FEATURE, // a complete rank feature name
+ NUMBER,
+ STRING
+ };
+};
+
+/**
+ * Represents the accepted collection types for a field parameter.
+ **/
+struct ParameterCollection {
+ enum Enum {
+ NONE,
+ SINGLE, // single value
+ ARRAY, // unweighted multi-value
+ WEIGHTEDSET, // weighted multi-value
+ ANY // any collection type
+ };
+};
+
+/**
+ * The description of a single parameter within a single
+ * ParameterDescription object.
+ **/
+struct ParamDescItem {
+ ParameterType::Enum type;
+ ParameterCollection::Enum collection;
+ ParamDescItem(ParameterType::Enum t,
+ ParameterCollection::Enum c)
+ : type(t), collection(c) {}
+};
+
+/**
+ * This class represents a set of parameter descriptions that each indicate what are a valid input parameter list for a Blueprint.
+ * During setup of a Blueprint the descriptions can be used to validate the input parameter
+ * list for that Blueprint. The parameters are valid if one of the descriptions match the actual parameter list.
+ */
+class ParameterDescriptions {
+public:
+ /**
+ * This class represents a single parameter description, consisting of a list of parameter types.
+ * This list of types must match the actual parameter list.
+ */
+ class Description {
+ private:
+ size_t _tag;
+ std::vector<ParamDescItem> _params;
+ size_t _repeat;
+ public:
+ /**
+ * Creates a new object with the given tag.
+ */
+ Description(size_t tag);
+ Description & addParameter(const ParamDescItem &param) {
+ _params.push_back(param);
+ return *this;
+ }
+
+ /**
+ * Sets the repeat number.
+ * This indicates that the last <repeat> parameter types can occur 0-n times.
+ * The repeat should only be set after all parameter types are added.
+ */
+ Description & setRepeat(size_t repeat) {
+ _repeat = repeat;
+ return *this;
+ }
+ size_t getTag() const { return _tag; }
+ const std::vector<ParamDescItem> & getParams() const { return _params; }
+ /**
+ * Returns the parameter type with the given index.
+ * If this description has repeat the index can be out of bounds (the correct repeat parameter will be returned).
+ */
+ ParamDescItem getParam(size_t i) const;
+ bool hasRepeat() const { return _repeat != 0; }
+ size_t getRepeat() const { return _repeat; }
+ };
+ typedef std::vector<Description> DescriptionVector;
+
+private:
+ DescriptionVector _descriptions;
+ size_t _nextTag;
+
+ Description & getCurrent() { return _descriptions.back(); }
+ void addParameter(const ParamDescItem &param) {
+ assert(!_descriptions.empty());
+ assert(!getCurrent().hasRepeat());
+ getCurrent().addParameter(param);
+ }
+ void addParameter(ParameterType::Enum type, ParameterCollection::Enum collection) {
+ addParameter(ParamDescItem(type, collection));
+ }
+ void addParameter(ParameterType::Enum type) {
+ addParameter(type, ParameterCollection::ANY);
+ }
+
+public:
+ /**
+ * Creates a new object with no descriptions.
+ */
+ ParameterDescriptions();
+ const DescriptionVector & getDescriptions() const { return _descriptions; }
+ ParameterDescriptions & desc() {
+ _descriptions.push_back(Description(_nextTag++));
+ return *this;
+ }
+ /**
+ * Starts a new description with the given tag.
+ */
+ ParameterDescriptions & desc(size_t tag) {
+ _descriptions.push_back(Description(tag));
+ _nextTag = tag + 1;
+ return *this;
+ }
+ /**
+ * Adds a field parameter to the current description.
+ */
+ ParameterDescriptions & field() {
+ addParameter(ParameterType::FIELD);
+ return *this;
+ }
+ /**
+ * Adds an index field parameter to the current description.
+ */
+ ParameterDescriptions & indexField(ParameterCollection::Enum collection) {
+ addParameter(ParameterType::INDEX_FIELD, collection);
+ return *this;
+ }
+ /**
+ * Adds an attribute field parameter to the current description.
+ */
+ ParameterDescriptions & attributeField(ParameterCollection::Enum collection) {
+ addParameter(ParameterType::ATTRIBUTE_FIELD, collection);
+ return *this;
+ }
+ /**
+ * Adds an attribute parameter to the current description.
+ */
+ ParameterDescriptions & attribute(ParameterCollection::Enum collection) {
+ addParameter(ParameterType::ATTRIBUTE, collection);
+ return *this;
+ }
+ /**
+ * Adds a feature parameter to the current description.
+ */
+ ParameterDescriptions & feature() {
+ addParameter(ParameterType::FEATURE);
+ return *this;
+ }
+ /**
+ * Adds a number parameter to the current description.
+ */
+ ParameterDescriptions & number() {
+ addParameter(ParameterType::NUMBER);
+ return *this;
+ }
+ /**
+ * Adds a string parameter to the current description.
+ */
+ ParameterDescriptions & string() {
+ addParameter(ParameterType::STRING);
+ return *this;
+ }
+ /**
+ * Sets the repeat number on the current description.
+ */
+ ParameterDescriptions & repeat(size_t n = 1) {
+ assert(!_descriptions.empty());
+ assert(getCurrent().getParams().size() >= n);
+ getCurrent().setRepeat(n);
+ return *this;
+ }
+};
+
+
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/parametervalidator.cpp b/searchlib/src/vespa/searchlib/fef/parametervalidator.cpp
new file mode 100644
index 00000000000..203de1ba8f0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/parametervalidator.cpp
@@ -0,0 +1,158 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <boost/lexical_cast.hpp>
+#include <vespa/vespalib/util/vstringfmt.h>
+#include "fieldinfo.h"
+#include "fieldtype.h"
+#include "parametervalidator.h"
+
+using vespalib::make_vespa_string;
+
+namespace search {
+namespace fef {
+
+namespace {
+
+bool checkCollectionType(ParameterCollection::Enum accept, CollectionType actual) {
+ switch (accept) {
+ case ParameterCollection::NONE: return false;
+ case ParameterCollection::SINGLE: return (actual == CollectionType::SINGLE);
+ case ParameterCollection::ARRAY: return (actual == CollectionType::ARRAY);
+ case ParameterCollection::WEIGHTEDSET: return (actual == CollectionType::WEIGHTEDSET);
+ case ParameterCollection::ANY: return true;
+ }
+ return false;
+}
+
+class ValidateException
+{
+public:
+ ValidateException(const vespalib::string & message) : _message(message) { }
+ const vespalib::string & getMessage() const { return _message; }
+private:
+ vespalib::string _message;
+};
+
+} // namespace search::fef::<unnamed>
+
+ParameterValidator::Result::Result(size_t tag) :
+ _params(),
+ _tag(tag),
+ _errorStr(),
+ _valid(true)
+{
+}
+
+void
+ParameterValidator::validateField(ParameterType::Enum type, ParameterCollection::Enum collection,
+ size_t i, Result & result)
+{
+ const FieldInfo * field = _indexEnv.getFieldByName(_params[i]);
+ if (field == NULL) {
+ throw ValidateException(make_vespa_string("Param[%zu]: Field '%s' was not found in the index environment",
+ i, _params[i].c_str()));
+ }
+ if (type == ParameterType::INDEX_FIELD) {
+ if (field->type() != FieldType::INDEX) {
+ throw ValidateException(make_vespa_string("Param[%zu]: Expected field '%s' to be an index field, but it was not",
+ i, _params[i].c_str()));
+ }
+ } else if (type == ParameterType::ATTRIBUTE_FIELD) {
+ if (field->type() != FieldType::ATTRIBUTE) {
+ throw ValidateException(make_vespa_string("Param[%zu]: Expected field '%s' to be an attribute field, but it was not",
+ i, _params[i].c_str()));
+ }
+ } else if (type == ParameterType::ATTRIBUTE) {
+ if (!field->hasAttribute()) {
+ throw ValidateException(make_vespa_string("Param[%zu]: Expected field '%s' to support attribute lookup, but it does not",
+ i, _params[i].c_str()));
+ }
+ }
+ if (!checkCollectionType(collection, field->collection())) {
+ throw ValidateException(make_vespa_string("Param[%zu]: field '%s' has inappropriate collection type",
+ i, _params[i].c_str()));
+ }
+ result.addParameter(Parameter(type, _params[i]).setField(field));
+}
+
+void
+ParameterValidator::validateNumber(ParameterType::Enum type, size_t i, Result & result)
+{
+ try {
+ double doubleVal = boost::lexical_cast<double>(_params[i]);
+ int64_t intVal = static_cast<int64_t>(doubleVal);
+ result.addParameter(Parameter(type, _params[i]).setInteger(intVal).setDouble(doubleVal));
+ } catch (const boost::bad_lexical_cast &) {
+ throw ValidateException(make_vespa_string("Param[%zu]: Could not convert '%s' to a number", i, _params[i].c_str()));
+ }
+}
+
+ParameterValidator::Result
+ParameterValidator::validate(const ParameterDescriptions::Description & desc)
+{
+ Result result(desc.getTag());
+ if (desc.hasRepeat()) {
+ size_t minParams = desc.getParams().size() - desc.getRepeat(); // the repeat params can occur 0-n times
+ if (minParams > _params.size() ||
+ ((_params.size() - desc.getParams().size()) % desc.getRepeat() != 0))
+ {
+ throw ValidateException(make_vespa_string("Expected %zd+%zdx parameter(s), but got %zd",
+ minParams, desc.getRepeat(), _params.size()));
+ }
+ } else if (desc.getParams().size() != _params.size()) {
+ throw ValidateException(make_vespa_string("Expected %zd parameter(s), but got %zd", desc.getParams().size(), _params.size()));
+ }
+ for (size_t i = 0; i < _params.size(); ++i) {
+ ParamDescItem param = desc.getParam(i);
+ ParameterType::Enum type = param.type;
+ switch (type) {
+ case ParameterType::FIELD:
+ case ParameterType::INDEX_FIELD:
+ case ParameterType::ATTRIBUTE_FIELD:
+ case ParameterType::ATTRIBUTE:
+ validateField(type, param.collection, i, result);
+ break;
+ case ParameterType::NUMBER:
+ validateNumber(type, i, result);
+ break;
+ case ParameterType::FEATURE:
+ case ParameterType::STRING:
+ result.addParameter(Parameter(type, _params[i]));
+ break;
+ default:
+ break;
+ }
+ }
+ return result;
+}
+
+ParameterValidator::ParameterValidator(const IIndexEnvironment & indexEnv,
+ const StringVector & params,
+ const ParameterDescriptions & descs) :
+ _indexEnv(indexEnv),
+ _params(params),
+ _descs(descs)
+{
+}
+
+ParameterValidator::Result
+ParameterValidator::validate()
+{
+ Result invalid;
+ for (size_t i = 0; i < _descs.getDescriptions().size(); ++i) {
+ try {
+ return validate(_descs.getDescriptions()[i]);
+ } catch (const ValidateException & e) {
+ if (invalid.valid()) {
+ Result tmp(_descs.getDescriptions()[i].getTag());
+ tmp.setError(e.getMessage());
+ invalid = tmp;
+ }
+ }
+ }
+ return invalid;
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/parametervalidator.h b/searchlib/src/vespa/searchlib/fef/parametervalidator.h
new file mode 100644
index 00000000000..e416ea1ecdf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/parametervalidator.h
@@ -0,0 +1,83 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/exception.h>
+#include "iindexenvironment.h"
+#include "parameter.h"
+#include "parameterdescriptions.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * This class is a validator for a string parameter list given an index environment and a set of parameter descriptions.
+ * The string parameter list is valid if it is matched with one of the parameter descriptions.
+ * In case of a match the string parameter list is converted into a parameter list with type information.
+ */
+class ParameterValidator {
+public:
+ typedef vespalib::string string;
+ typedef std::vector<string> StringVector;
+ /**
+ * This class contains the result after running a validation for a given parameter description.
+ * If the result is valid the parameter description matched the string parameter list
+ * and the converted parameter list is stored.
+ * If the result is not valid the reason for this is found in the error string.
+ */
+ class Result {
+ private:
+ ParameterList _params;
+ size_t _tag;
+ string _errorStr;
+ bool _valid;
+
+ public:
+ /**
+ * Creates a result for the parameter description with the given tag.
+ */
+ Result(size_t tag = 0);
+ Result & addParameter(const Parameter & param) { _params.push_back(param); return *this; }
+ Result & setError(const vespalib::stringref & str) {
+ _errorStr = str;
+ _params.clear();
+ _valid = false;
+ return *this;
+ }
+ const ParameterList & getParameters() const { return _params; }
+ size_t getTag() const { return _tag; }
+ const string & getError() const { return _errorStr; }
+ bool valid() const { return _valid; }
+ };
+private:
+ const IIndexEnvironment & _indexEnv;
+ const StringVector & _params;
+ const ParameterDescriptions & _descs;
+
+ void validateField(ParameterType::Enum type, ParameterCollection::Enum collection,
+ size_t i, Result & result);
+ void validateNumber(ParameterType::Enum type, size_t i, Result & result);
+ Result validate(const ParameterDescriptions::Description & desc);
+
+public:
+ /**
+ * Creates a new validator.
+ *
+ * @param indexEnv the index environment used to lookup fields.
+ * @param params the string parameter list to validate.
+ * @param descs the parameter descriptions to use during validation.
+ */
+ ParameterValidator(const IIndexEnvironment & indexEnv,
+ const StringVector & params,
+ const ParameterDescriptions & descs);
+ /**
+ * Runs the validator and returns the result.
+ * The result object for the first parameter description that match is returned.
+ * In case of no match the result object for the first registered parameter description is returned.
+ */
+ Result validate();
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp b/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp
new file mode 100644
index 00000000000..a18587efeef
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp
@@ -0,0 +1,110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.phrasesplitter");
+#include "phrasesplitter.h"
+
+namespace search {
+namespace fef {
+
+void
+PhraseSplitter::considerTerm(uint32_t termIdx, const ITermData &term, std::vector<PhraseTerm> &phraseTerms, uint32_t fieldId)
+{
+ typedef search::fef::ITermFieldRangeAdapter FRA;
+
+ for (FRA iter(term); iter.valid(); iter.next()) {
+ if (iter.get().getFieldId() == fieldId) {
+ TermFieldHandle h = iter.get().getHandle();
+ _maxHandle = std::max(_maxHandle, h);
+ if (term.getPhraseLength() > 1) {
+ SimpleTermData prototype;
+ prototype.setWeight(term.getWeight());
+ prototype.setPhraseLength(1);
+ prototype.setTermIndex(term.getTermIndex());
+ prototype.setUniqueId(term.getUniqueId());
+ prototype.addField(fieldId);
+ phraseTerms.push_back(PhraseTerm(term, _terms.size(), h));
+ for (uint32_t i = 0; i < term.getPhraseLength(); ++i) {
+ _terms.push_back(prototype);
+ _termIdxMap.push_back(TermIdx(_terms.size() - 1, true));
+ }
+ return;
+ }
+ }
+ }
+ _termIdxMap.push_back(TermIdx(termIdx, false));
+}
+
+PhraseSplitter::PhraseSplitter(const IQueryEnvironment & queryEnv,
+ uint32_t fieldId) :
+ _queryEnv(queryEnv),
+ _matchData(NULL),
+ _terms(),
+ _termMatches(),
+ _termIdxMap(),
+ _maxHandle(0),
+ _skipHandles(0)
+{
+ TermFieldHandle numHandles = 0; // how many handles existed in underlying data
+ std::vector<PhraseTerm> phraseTerms; // data about original phrase terms
+
+ for (uint32_t i = 0; i < queryEnv.getNumTerms(); ++i) {
+ const ITermData *td = queryEnv.getTerm(i);
+ LOG_ASSERT(td != NULL);
+ considerTerm(i, *td, phraseTerms, fieldId);
+ numHandles += td->numFields();
+ }
+
+ _skipHandles = _maxHandle + 1 + numHandles;
+ for (uint32_t i = 0; i < _terms.size(); ++i) {
+ // start at _skipHandles + 0
+ _terms[i].field(0).setHandle(_skipHandles + _termMatches.size());
+ TermFieldMatchData empty;
+ empty.setFieldId(fieldId);
+ _termMatches.push_back(empty);
+ }
+
+ for (uint32_t i = 0; i < phraseTerms.size(); ++i) {
+ const PhraseTerm &pterm = phraseTerms[i];
+
+ for (uint32_t j = 0; j < pterm.term.getPhraseLength(); ++j) {
+ const ITermData &splitp_td = _terms[pterm.idx + j];
+ const ITermFieldData& splitp_tfd = splitp_td.field(0);
+ HowToCopy meta;
+ meta.orig_handle = pterm.orig_handle;
+ meta.split_handle = splitp_tfd.getHandle();
+ meta.offsetInPhrase = j;
+ _copyInfo.push_back(meta);
+ }
+ }
+
+}
+
+void
+PhraseSplitter::copyTermFieldMatchData(TermFieldMatchData & dst, const TermFieldMatchData & src, uint32_t hitOffset)
+{
+ dst.reset(src.getDocId());
+
+ for (TermFieldMatchData::PositionsIterator itr = src.begin(), end = src.end(); itr != end; ++itr) {
+ TermFieldMatchDataPosition pos(*itr);
+ pos.setPosition(pos.getPosition() + hitOffset);
+ dst.appendPosition(TermFieldMatchDataPosition(pos));
+ }
+}
+
+void
+PhraseSplitter::update(const MatchData & matchData)
+{
+ _matchData = &matchData;
+ for (uint32_t i = 0; i < _copyInfo.size(); ++i) {
+ const TermFieldMatchData *src = matchData.resolveTermField(_copyInfo[i].orig_handle);
+ TermFieldMatchData *dst = resolveSplittedTermField(_copyInfo[i].split_handle);
+ LOG_ASSERT(src != NULL && dst != NULL);
+ copyTermFieldMatchData(*dst, *src, _copyInfo[i].offsetInPhrase);
+ }
+
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/phrasesplitter.h b/searchlib/src/vespa/searchlib/fef/phrasesplitter.h
new file mode 100644
index 00000000000..5438954f380
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/phrasesplitter.h
@@ -0,0 +1,146 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iqueryenvironment.h"
+#include "matchdata.h"
+#include "simpletermdata.h"
+#include "termfieldmatchdata.h"
+#include "fieldinfo.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * This class is used to split all phrase terms in a query environment
+ * into separate terms. New TermData and TermFieldMatchData objects
+ * are created for each splitted phrase term and managed by this
+ * class. Unmodified single terms are served from the query
+ * environment and match data.
+ *
+ * The TermFieldMatchData objects managed by this class are updated
+ * based on the TermFieldMatchData objects associated with the
+ * original phrase terms. Positions are adjusted with +1 for each term
+ * after the first one.
+ *
+ * Use this class if you want to handle a phrase term the same way as
+ * single terms.
+ **/
+class PhraseSplitter : public IQueryEnvironment
+{
+private:
+ struct TermIdx {
+ uint32_t idx; // index into either query environment or vector of TermData objects
+ bool splitted; // whether this term has been splitted or not
+ TermIdx(uint32_t i, bool s) : idx(i), splitted(s) {}
+ };
+ struct PhraseTerm {
+ const ITermData & term; // for original phrase
+ uint32_t idx; // index into vector of our TermData objects
+ TermFieldHandle orig_handle;
+ PhraseTerm(const ITermData & t, uint32_t i, uint32_t h) : term(t), idx(i), orig_handle(h) {}
+ };
+ struct HowToCopy {
+ TermFieldHandle orig_handle;
+ TermFieldHandle split_handle;
+ uint32_t offsetInPhrase;
+ };
+
+ const IQueryEnvironment &_queryEnv;
+ const MatchData *_matchData;
+ std::vector<SimpleTermData> _terms; // splitted terms
+ std::vector<TermFieldMatchData> _termMatches; // match objects associated with splitted terms
+ std::vector<HowToCopy> _copyInfo;
+ std::vector<TermIdx> _termIdxMap; // renumbering of terms
+ TermFieldHandle _maxHandle; // the largest among original term field handles
+ TermFieldHandle _skipHandles; // how many handles to skip
+
+ void considerTerm(uint32_t termIdx, const ITermData &term, std::vector<PhraseTerm> &phraseTerms, uint32_t fieldId);
+ void splitPhrase(const ITermData &phrase, std::vector<PhraseTerm> &phraseTerms, uint32_t fieldId);
+
+ TermFieldMatchData *resolveSplittedTermField(TermFieldHandle handle) {
+ return &_termMatches[handle - _skipHandles];
+ }
+
+ const TermFieldMatchData *resolveSplittedTermField(TermFieldHandle handle) const {
+ return &_termMatches[handle - _skipHandles];
+ }
+
+public:
+ /**
+ * Create a phrase splitter based on the given query environment.
+ *
+ * @param queryEnv the query environment to wrap.
+ * @param field the field where we need to split phrases
+ **/
+ PhraseSplitter(const IQueryEnvironment & queryEnv, uint32_t fieldId);
+
+ /**
+ * Copy the source object to the destination object.
+ * Use the given hit offset when copying position information. pos (x) -> pos (x + hitOffset).
+ *
+ * @param dst the destination object.
+ * @param src the source object.
+ * @param hitOffset the offset to use when copying position information.
+ **/
+ static void copyTermFieldMatchData(TermFieldMatchData & dst, const TermFieldMatchData & src, uint32_t hitOffset);
+
+ /**
+ * Update the underlying TermFieldMatchData objects based on the given MatchData object.
+ *
+ * @param matchData the MatchData object containing original TermFieldMatchData objects.
+ **/
+ void update(const MatchData & matchData);
+
+ /**
+ * Inherit doc from IQueryEnvironment.
+ **/
+ virtual uint32_t getNumTerms() const {
+ return _termIdxMap.size();
+ }
+
+ /**
+ * Inherit doc from IQueryEnvironment.
+ **/
+ virtual const ITermData * getTerm(uint32_t idx) const {
+ if (idx >= _termIdxMap.size()) {
+ return NULL;
+ }
+ const TermIdx & ti = _termIdxMap[idx];
+ return ti.splitted ? &_terms[ti.idx] : _queryEnv.getTerm(ti.idx);
+ }
+
+ /**
+ * Inherit doc from MatchData.
+ **/
+ const TermFieldMatchData * resolveTermField(TermFieldHandle handle) const {
+ if (_matchData == NULL) {
+ return NULL;
+ }
+ return handle < _skipHandles ? _matchData->resolveTermField(handle) : resolveSplittedTermField(handle);
+ }
+
+ /**
+ * Inherit doc from IQueryEnvironment.
+ **/
+ virtual const Properties & getProperties() const { return _queryEnv.getProperties(); }
+
+ /**
+ * Inherit doc from IQueryEnvironment.
+ **/
+ virtual const Location & getLocation() const { return _queryEnv.getLocation(); }
+
+ /**
+ * Inherit doc from IQueryEnvironment.
+ **/
+ virtual const attribute::IAttributeContext & getAttributeContext() const { return _queryEnv.getAttributeContext(); }
+
+ /**
+ * Inherit doc from IQueryEnvironment.
+ **/
+ virtual const IIndexEnvironment & getIndexEnvironment() const { return _queryEnv.getIndexEnvironment(); }
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/properties.cpp b/searchlib/src/vespa/searchlib/fef/properties.cpp
new file mode 100644
index 00000000000..7351bc4e5bf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/properties.cpp
@@ -0,0 +1,269 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.properties");
+#include "properties.h"
+
+namespace search {
+namespace fef {
+
+const Property::Value Property::_emptyValue;
+const Property::Values Property::_emptyValues;
+
+Property::Property(const Property::Values &values)
+ : _values(&values)
+{
+}
+
+Property::Property()
+ : _values(&_emptyValues)
+{
+}
+
+bool
+Property::found() const
+{
+ return !(*_values).empty();
+}
+
+const Property::Value &
+Property::get() const
+{
+ if ((*_values).empty()) {
+ return _emptyValue;
+ }
+ return (*_values)[0];
+}
+
+const Property::Value &
+Property::get(const Property::Value &fallBack) const
+{
+ if ((*_values).empty()) {
+ return fallBack;
+ }
+ return (*_values)[0];
+}
+
+uint32_t
+Property::size() const
+{
+ return (*_values).size();
+}
+
+const Property::Value &
+Property::getAt(uint32_t idx) const
+{
+ if (idx < (*_values).size()) {
+ return (*_values)[idx];
+ }
+ return _emptyValue;
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t
+Properties::rawHash(const void *buf, uint32_t len)
+{
+ uint32_t res = 0;
+ unsigned const char *pt = (unsigned const char *) buf;
+ unsigned const char *end = pt + len;
+ while (pt < end) {
+ res = (res << 7) + (res >> 25) + *pt++;
+ }
+ return res;
+}
+
+Properties::Properties()
+ : _numValues(0),
+ _data()
+{
+}
+
+Properties::~Properties()
+{
+ LOG_ASSERT(_numValues >= _data.size());
+}
+
+Properties &
+Properties::add(const vespalib::stringref &key, const vespalib::stringref &value)
+{
+ if (!key.empty()) {
+ Value & v = _data[key];
+ v.push_back(value);
+ ++_numValues;
+ }
+ return *this;
+}
+
+uint32_t
+Properties::count(const vespalib::stringref &key) const
+{
+ if (!key.empty()) {
+ Map::const_iterator node = _data.find(key);
+ if (node != _data.end()) {
+ return node->second.size();
+ }
+ }
+ return 0;
+}
+
+Properties &
+Properties::remove(const vespalib::stringref &key)
+{
+ if (!key.empty()) {
+ Map::iterator node = _data.find(key);
+ if (node != _data.end()) {
+ _numValues -= node->second.size();
+ _data.erase(node);
+ }
+ }
+ return *this;
+}
+
+Properties &
+Properties::import(const Properties &src)
+{
+ Map::const_iterator itr = src._data.begin();
+ Map::const_iterator end = src._data.end();
+ for (; itr != end; ++itr) {
+ Map::insert_result res = _data.insert(Map::value_type(itr->first, itr->second));
+ if ( ! res.second) {
+ _numValues -= res.first->second.size();
+ res.first->second = itr->second;
+ }
+ _numValues += itr->second.size();
+ }
+ return *this;
+}
+
+Properties &
+Properties::clear()
+{
+ if (_data.empty()) {
+ return *this;
+ }
+ {
+ Map empty;
+ std::swap(_data, empty);
+ }
+ _numValues = 0;
+ return *this;
+}
+
+bool
+Properties::operator==(const Properties &rhs) const
+{
+ return (_numValues == rhs._numValues &&
+ _data == rhs._data);
+}
+
+uint32_t
+Properties::hashCode() const
+{
+ uint32_t hash = numKeys() + numValues();
+ Map::const_iterator itr = _data.begin();
+ Map::const_iterator end = _data.end();
+ for (; itr != end; ++itr) {
+ const Key &key = itr->first;
+ const Value &value = itr->second;
+ Value::const_iterator v_itr = value.begin();
+ Value::const_iterator v_end = value.end();
+ hash += rawHash(key.data(), key.size());
+ for (; v_itr != v_end; ++v_itr) {
+ hash += rawHash(v_itr->data(), v_itr->size());
+ }
+ }
+ return hash;
+}
+
+void
+Properties::visitProperties(IPropertiesVisitor &visitor) const
+{
+ Map::const_iterator itr = _data.begin();
+ Map::const_iterator end = _data.end();
+ for (; itr != end; ++itr) {
+ visitor.visitProperty(itr->first, Property(itr->second));
+ }
+}
+
+void
+Properties::visitNamespace(const vespalib::stringref &ns,
+ IPropertiesVisitor &visitor) const
+{
+ vespalib::string tmp;
+ vespalib::string prefix = ns + ".";
+ Map::const_iterator itr = _data.begin();
+ Map::const_iterator end = _data.end();
+ for (; itr != end; ++itr) {
+ if ((itr->first.find(prefix) == 0) &&
+ (itr->first.size() > prefix.size()))
+ {
+ tmp = vespalib::stringref(itr->first.data() + prefix.size(),
+ itr->first.size() - prefix.size());
+ visitor.visitProperty(tmp, Property(itr->second));
+ }
+ }
+}
+
+Property
+Properties::lookup(const vespalib::stringref &key) const
+{
+ if (key.empty()) {
+ return Property();
+ }
+ Map::const_iterator node = _data.find(key);
+ if (node == _data.end()) {
+ return Property();
+ }
+ return Property(node->second);
+}
+
+Property Properties::lookup(const vespalib::stringref &namespace1,
+ const vespalib::stringref &key) const
+{
+ if (namespace1.empty() ||
+ key.empty())
+ {
+ return Property();
+ }
+ return lookup(namespace1 + "." + key);
+}
+
+Property Properties::lookup(const vespalib::stringref &namespace1,
+ const vespalib::stringref &namespace2,
+ const vespalib::stringref &key) const
+{
+ if (namespace1.empty() ||
+ namespace2.empty() ||
+ key.empty())
+ {
+ return Property();
+ }
+ return lookup(namespace1 + "." + namespace2 + "." + key);
+}
+
+Property Properties::lookup(const vespalib::stringref &namespace1,
+ const vespalib::stringref &namespace2,
+ const vespalib::stringref &namespace3,
+ const vespalib::stringref &key) const
+{
+ if (namespace1.empty() ||
+ namespace2.empty() ||
+ namespace3.empty() ||
+ key.empty())
+ {
+ return Property();
+ }
+ return lookup(namespace1 + "." + namespace2 + "."
+ + namespace3 + "." + key);
+}
+
+void Properties::swap(Properties & rhs)
+{
+ _data.swap(rhs._data);
+ std::swap(_numValues, rhs._numValues);
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/properties.h b/searchlib/src/vespa/searchlib/fef/properties.h
new file mode 100644
index 00000000000..e808b77e2af
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/properties.h
@@ -0,0 +1,324 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vector>
+
+namespace search {
+namespace fef {
+
+class Properties;
+
+//-----------------------------------------------------------------------------
+
+/**
+ * This object represents the result of a lookup in a @ref Properties
+ * object. This class is also used for property visitation. It
+ * contains all values associated with the key used for lookup. The
+ * values are accessible in the order in which they were originally
+ * added. This object is only valid until the @ref Properties object
+ * it was obtained from is changed or deleted.
+ **/
+class Property
+{
+public:
+ typedef vespalib::string Value;
+ typedef std::vector<Value> Values;
+private:
+ friend class Properties;
+
+ static const Value _emptyValue;
+ static const Values _emptyValues;
+ const Values *_values;
+
+ /**
+ * Create a new property using the given value vector.
+ *
+ * @param values the values for this property
+ **/
+ Property(const Values &values);
+
+public:
+ /**
+ * Create a property that represents the result of a lookup that
+ * did not find anything. This method may be used to allocate an
+ * object on the stack in the application, and will also be used
+ * by the @ref Properties class when a lookup gives no results.
+ **/
+ Property();
+
+ /**
+ * Check if we found what we were looking for or not.
+ *
+ * @return true if the key we looked up had at least one value
+ **/
+ bool found() const;
+
+ /**
+ * Get the first value assigned to the looked up key. This method
+ * will return an empty string if no values were found.
+ *
+ * @return first value for the looked up key, or ""
+ **/
+ const Value &get() const;
+
+ /**
+ * Get the first value assigned to the looked up key. This method
+ * will return the specified fallback string if no values were
+ * found.
+ *
+ * @return first value for the looked up key, or fallBack
+ * @param fallBack value to return if no values were found
+ **/
+ const Value & get(const Value &fallBack) const;
+
+ /**
+ * The number of values found for the looked up key.
+ *
+ * @return number of values for this property
+ **/
+ uint32_t size() const;
+
+ /**
+ * Obtain a specific value for the looked up key.
+ *
+ * @return the requested value, or "" if idx was out of bounds
+ * @param idx the index of the value we want to access
+ **/
+ const Value &getAt(uint32_t idx) const;
+};
+
+//-----------------------------------------------------------------------------
+
+/**
+ * This interface is implemented by objects that want to visit all
+ * properties contained in a Properties object.
+ **/
+class IPropertiesVisitor
+{
+public:
+ /**
+ * Visit a single key and all its values. Keys are visited in
+ * sorting order according to the less operator of the string
+ * class. The values are wrapped in a Property object that is
+ * equivalent to the object that would be returned if the key had
+ * been used as parameter to the lookup method in the Properties
+ * object.
+ *
+ * @param key the key
+ * @param values the values
+ **/
+ virtual void visitProperty(const Property::Value &key,
+ const Property &values) = 0;
+
+ /**
+ * Virtual destructor to allow safe subclassing.
+ **/
+ virtual ~IPropertiesVisitor() {}
+};
+
+//-----------------------------------------------------------------------------
+
+/**
+ * A simple wrapper for a set of key/value pairs. Each key may be
+ * added multiple times, resulting in multiple values for a single
+ * key. When data is imported from one object to another, the set of
+ * values for common keys are totally replaced.
+ **/
+class Properties
+{
+private:
+ typedef vespalib::string Key;
+ typedef Property::Values Value;
+ typedef vespalib::hash_map<Key, Value> Map;
+
+ uint32_t _numValues;
+ Map _data;
+
+ /**
+ * Calculate a hash code from raw data.
+ *
+ * @return hash code
+ * @param buf data pointer
+ * @param len data length
+ **/
+ static uint32_t rawHash(const void *buf, uint32_t len);
+
+public:
+ typedef std::unique_ptr<Properties> UP;
+
+ /**
+ * Create an empty properties object.
+ **/
+ Properties();
+
+ /**
+ * The destructor asserts that key/value counts look sane before
+ * deleting the internal data.
+ **/
+ ~Properties();
+
+ /**
+ * Add a value to a key. If the key is an empty string, the value
+ * will be ignored.
+ *
+ * @return this object, for chaining
+ * @param key the key
+ * @param value the value
+ **/
+ Properties &add(const vespalib::stringref &key, const vespalib::stringref &value);
+
+ /**
+ * Obtain the number of values for a given key.
+ *
+ * @return number of values for the given key
+ * @param key the key
+ **/
+ uint32_t count(const vespalib::stringref &key) const;
+
+ /**
+ * Remove all values for the given key.
+ *
+ * @return this object, for chaining
+ * @param key the key
+ **/
+ Properties &remove(const vespalib::stringref &key);
+
+ /**
+ * Import all key/value pairs from src into this object. All
+ * values stored in this object for keys present in src will be
+ * removed during this operation.
+ *
+ * @return this object, for chaining
+ * @param src where to import from
+ **/
+ Properties &import(const Properties &src);
+
+ /**
+ * Remove all key/value pairs from this object, making it
+ * equivalent with a freshly created object. It is relatively
+ * cheap to clear an already empty object.
+ *
+ * @return this object, for chaining
+ **/
+ Properties &clear();
+
+ /**
+ * Obtain the total number of keys stored in this object.
+ *
+ * @return number of keys
+ **/
+ uint32_t numKeys() const { return _data.size(); }
+
+ /**
+ * Obtain the total number of values stored in this object.
+ *
+ * @return number of values
+ **/
+ uint32_t numValues() const { return _numValues; }
+
+ /**
+ * Check if rhs contains the same key/value pairs as this
+ * object. If a key has multiple values, they need to be in the
+ * same order to match.
+ *
+ * @return true if we are equal to rhs
+ **/
+ bool operator==(const Properties &rhs) const;
+
+ /**
+ * Calculate a hash code for this object
+ *
+ * @return hash code for this object
+ **/
+ uint32_t hashCode() const;
+
+ /**
+ * Visit all key/value pairs
+ *
+ * @param visitor the object being notified of all key/value pairs
+ **/
+ void visitProperties(IPropertiesVisitor &visitor) const;
+
+ /**
+ * Visit all key/value pairs inside a namespace. The namespace
+ * itself will be stripped from the keys that are visited.
+ *
+ * @param ns the namespace to visit
+ * @param visitor the object being notified of key/value pairs inside the namespace
+ **/
+ void visitNamespace(const vespalib::stringref &ns,
+ IPropertiesVisitor &visitor) const;
+
+ /**
+ * Look up a key in this object. An empty key will result in an
+ * empty property.
+ *
+ * @return object encapsulating lookup result
+ * @param key the key to look up
+ **/
+ Property lookup(const vespalib::stringref &key) const;
+
+ /**
+ * Look up a key inside a namespace using the proposed namespace
+ * syntax. When using namespaces, the actual key is generated by
+ * concatenating all namespaces and the key, inserting a '.'
+ * between elements. An empty key and/or namespace will result in
+ * an empty property.
+ *
+ * @return object encapsulating lookup result
+ * @param namespace1 the namespace
+ * @param key the key to look up
+ **/
+ Property lookup(const vespalib::stringref &namespace1,
+ const vespalib::stringref &key) const;
+
+ /**
+ * Look up a key inside a namespace using the proposed namespace
+ * syntax. When using namespaces, the actual key is generated by
+ * concatenating all namespaces and the key, inserting a '.'
+ * between elements. An empty key and/or namespace will result in
+ * an empty property.
+ *
+ * @return object encapsulating lookup result
+ * @param namespace the first namespace
+ * @param namespace the second namespace
+ * @param key the key to look up
+ **/
+ Property lookup(const vespalib::stringref &namespace1,
+ const vespalib::stringref &namespace2,
+ const vespalib::stringref &key) const;
+
+ /**
+ * Look up a key inside a namespace using the proposed namespace
+ * syntax. When using namespaces, the actual key is generated by
+ * concatenating all namespaces and the key, inserting a '.'
+ * between elements. An empty key and/or namespace will result in
+ * an empty property.
+ *
+ * @return object encapsulating lookup result
+ * @param namespace the first namespace
+ * @param namespace the second namespace
+ * @param namespace the third namespace
+ * @param key the key to look up
+ **/
+ Property lookup(const vespalib::stringref &namespace1,
+ const vespalib::stringref &namespace2,
+ const vespalib::stringref &namespace3,
+ const vespalib::stringref &key) const;
+
+ void swap(Properties & rhs);
+};
+
+inline void
+swap(Properties & a, Properties & b)
+{
+ a.swap(b);
+}
+
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/queryproperties.cpp b/searchlib/src/vespa/searchlib/fef/queryproperties.cpp
new file mode 100644
index 00000000000..a5dd5dc9229
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/queryproperties.cpp
@@ -0,0 +1,16 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "queryproperties.h"
+
+namespace search {
+namespace fef {
+namespace queryproperties {
+namespace now {
+
+const vespalib::string SystemTime::NAME("vespa.now");
+
+} // namespace now
+} // namespace queryproperties
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/queryproperties.h b/searchlib/src/vespa/searchlib/fef/queryproperties.h
new file mode 100644
index 00000000000..661bc460415
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/queryproperties.h
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace fef {
+
+/**
+ * This namespace is a placeholder for several structs, each
+ * representing a query property with name and default value. All
+ * property names defined here will have the prefix "vespa." and are
+ * known by the feature execution framework. When accessing a query
+ * property from a @ref Properties instance one should use the
+ * property names defined here to perform the lookup. The query
+ * properties are the set of properties available through the query
+ * environment. These properties are denoted as rank properties in
+ * other parts of the system.
+ **/
+namespace queryproperties {
+
+namespace now {
+ /**
+ * Property indicating the time to be used for time-sensitive
+ * relevancy computations. This affects the value returned by the
+ * global feature 'now'. The time is given in seconds since epoch.
+ **/
+ struct SystemTime {
+
+ /**
+ * Property name.
+ **/
+ static const vespalib::string NAME;
+ };
+
+} // namespace now
+
+} // namespace queryproperties
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.cpp b/searchlib/src/vespa/searchlib/fef/rank_program.cpp
new file mode 100644
index 00000000000..69cd76917cd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/rank_program.cpp
@@ -0,0 +1,240 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.rank_program");
+#include "rank_program.h"
+#include "featureoverrider.h"
+#include <algorithm>
+
+namespace search {
+namespace fef {
+
+namespace {
+
+struct Override
+{
+ BlueprintResolver::FeatureRef ref;
+ feature_t value;
+
+ Override(const BlueprintResolver::FeatureRef &r, feature_t v)
+ : ref(r), value(v) {}
+
+ bool operator<(const Override &rhs) const {
+ return (ref.executor < rhs.ref.executor);
+ }
+};
+
+struct OverrideVisitor : public IPropertiesVisitor
+{
+ const BlueprintResolver::FeatureMap &feature_map;
+ std::vector<Override> &overrides;
+
+ OverrideVisitor(const BlueprintResolver::FeatureMap &feature_map_in,
+ std::vector<Override> &overrides_out)
+ : feature_map(feature_map_in), overrides(overrides_out) {}
+
+ virtual void visitProperty(const Property::Value & key,
+ const Property & values)
+ {
+ auto pos = feature_map.find(key);
+ if (pos != feature_map.end()) {
+ overrides.push_back(Override(pos->second, strtod(values.get().c_str(), nullptr)));
+ }
+ }
+};
+
+std::vector<Override> prepare_overrides(const BlueprintResolver::FeatureMap &feature_map,
+ const Properties &featureOverrides)
+{
+ std::vector<Override> overrides;
+ overrides.reserve(featureOverrides.numValues());
+ OverrideVisitor visitor(feature_map, overrides);
+ featureOverrides.visitProperties(visitor);
+ std::sort(overrides.begin(), overrides.end());
+ return overrides;
+}
+
+struct UnboxingExecutor : FeatureExecutor {
+ UnboxingExecutor(SharedInputs &shared_inputs,
+ FeatureHandle old_feature,
+ FeatureHandle new_feature)
+ {
+ bind_shared_inputs(shared_inputs);
+ addInput(old_feature);
+ bindOutput(new_feature);
+ }
+ bool isPure() override { return true; }
+ void execute(search::fef::MatchData &md) override {
+ double number_value = md.resolve_object_feature(inputs()[0])->get().as_double();
+ *md.resolveFeature(outputs()[0]) = number_value;
+ }
+};
+
+} // namespace search::fef::<unnamed>
+
+void
+RankProgram::add_unboxing_executors(MatchDataLayout &my_mdl)
+{
+ const auto &specs = _resolver->getExecutorSpecs();
+ for (const auto &seed_entry: _resolver->getSeedMap()) {
+ auto seed = seed_entry.second;
+ if (specs[seed.executor].output_types[seed.output]) {
+ FeatureHandle old_handle = _executors[seed.executor]->outputs()[seed.output];
+ FeatureHandle new_handle = my_mdl.allocFeature(false);
+ _executors.emplace_back(new UnboxingExecutor(_shared_inputs, old_handle, new_handle));
+ _unboxed_seeds[seed_entry.first] = std::make_pair(old_handle, new_handle);
+ }
+ }
+}
+
+void
+RankProgram::compile()
+{
+ MatchData &md = match_data();
+ std::vector<bool> is_calculated(md.getNumFeatures(), false);
+ for (size_t i = 0; i < _executors.size(); ++i) {
+ FeatureExecutor &executor = *_executors[i];
+ bool is_const = executor.isPure();
+ const auto &inputs = executor.inputs();
+ for (size_t in_idx = 0; is_const && (in_idx < inputs.size()); ++in_idx) {
+ is_const &= is_calculated[inputs[in_idx]];
+ }
+ if (is_const) {
+ executor.execute(md);
+ const auto &outputs = executor.outputs();
+ for (size_t out_idx = 0; out_idx < outputs.size(); ++out_idx) {
+ is_calculated[outputs[out_idx]] = true;
+ }
+ } else {
+ _program.push_back(&executor);
+ }
+ }
+}
+
+RankProgram::RankProgram(BlueprintResolver::SP resolver)
+ : _resolver(resolver),
+ _shared_inputs(),
+ _program(),
+ _executors(),
+ _unboxed_seeds()
+{
+}
+
+void
+RankProgram::setup(const MatchDataLayout &mdl_in,
+ const IQueryEnvironment &queryEnv,
+ const Properties &featureOverrides)
+{
+ assert(_executors.empty());
+ MatchDataLayout my_mdl(mdl_in);
+ std::vector<Override> overrides = prepare_overrides(_resolver->getFeatureMap(), featureOverrides);
+ auto override = overrides.begin();
+ auto override_end = overrides.end();
+
+ const auto &specs = _resolver->getExecutorSpecs();
+ _executors.reserve(specs.size());
+ for (uint32_t i = 0; i < specs.size(); ++i) {
+ FeatureExecutor::UP executor(specs[i].blueprint->createExecutor(queryEnv).release());
+ assert(executor);
+ executor->bind_shared_inputs(_shared_inputs);
+ for (; (override < override_end) && (override->ref.executor == i); ++override) {
+ FeatureExecutor::LP tmp(executor.release());
+ executor.reset(new FeatureOverrider(tmp, override->ref.output, override->value));
+ executor->bind_shared_inputs(_shared_inputs);
+ }
+ for (auto ref: specs[i].inputs) {
+ executor->addInput(_executors[ref.executor]->outputs()[ref.output]);
+ }
+ executor->inputs_done();
+ uint32_t out_cnt = specs[i].output_types.size();
+ for (uint32_t out_idx = 0; out_idx < out_cnt; ++out_idx) {
+ executor->bindOutput(my_mdl.allocFeature(specs[i].output_types[out_idx]));
+ }
+ executor->outputs_done();
+ _executors.push_back(std::move(executor));
+ }
+ add_unboxing_executors(my_mdl);
+ _match_data = my_mdl.createMatchData();
+ compile();
+}
+
+namespace {
+
+template <typename Each>
+void extract_handles(const BlueprintResolver::FeatureMap &features,
+ const std::vector<FeatureExecutor::UP> &executors,
+ const Each &each)
+{
+ each.reserve(features.size());
+ for (const auto &entry: features) {
+ auto ref = entry.second;
+ FeatureHandle handle = executors[ref.executor]->outputs()[ref.output];
+ each.process(entry.first, handle);
+ }
+}
+
+struct RawHandleCollector {
+ std::vector<vespalib::string> &names;
+ std::vector<FeatureHandle> &handles;
+ RawHandleCollector(std::vector<vespalib::string> &names_in,
+ std::vector<FeatureHandle> &handles_in)
+ : names(names_in), handles(handles_in) {}
+ void reserve(size_t size) const {
+ names.reserve(size);
+ handles.reserve(size);
+ }
+ void process(const vespalib::string &name, FeatureHandle handle) const {
+ names.push_back(name);
+ handles.push_back(handle);
+ }
+};
+
+struct MappedHandleCollector {
+ typedef std::map<vespalib::string, std::pair<FeatureHandle, FeatureHandle> > MappedFeatures;
+ RawHandleCollector collector;
+ const MappedFeatures &mapped;
+ MappedHandleCollector(std::vector<vespalib::string> &names,
+ std::vector<FeatureHandle> &handles,
+ const MappedFeatures &mapped_in)
+ : collector(names, handles), mapped(mapped_in) {}
+ void reserve(size_t size) const { collector.reserve(size); }
+ void process(const vespalib::string &name, FeatureHandle handle) const {
+ auto pos = mapped.find(name);
+ if (pos == mapped.end()) {
+ collector.process(name, handle);
+ } else {
+ assert(handle == pos->second.first);
+ collector.process(name, pos->second.second);
+ }
+ }
+};
+
+}
+
+void
+RankProgram::get_seed_handles(std::vector<vespalib::string> &names_out,
+ std::vector<FeatureHandle> &handles_out,
+ bool unbox_seeds) const
+{
+ if (unbox_seeds && !_unboxed_seeds.empty()) {
+ extract_handles(_resolver->getSeedMap(), _executors, MappedHandleCollector(names_out, handles_out, _unboxed_seeds));
+ } else {
+ extract_handles(_resolver->getSeedMap(), _executors, RawHandleCollector(names_out, handles_out));
+ }
+}
+
+void
+RankProgram::get_all_feature_handles(std::vector<vespalib::string> &names_out,
+ std::vector<FeatureHandle> &handles_out,
+ bool unbox_seeds) const
+{
+ if (unbox_seeds && !_unboxed_seeds.empty()) {
+ extract_handles(_resolver->getFeatureMap(), _executors, MappedHandleCollector(names_out, handles_out, _unboxed_seeds));
+ } else {
+ extract_handles(_resolver->getFeatureMap(), _executors, RawHandleCollector(names_out, handles_out));
+ }
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.h b/searchlib/src/vespa/searchlib/fef/rank_program.h
new file mode 100644
index 00000000000..d9ac2e0e68b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/rank_program.h
@@ -0,0 +1,135 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "blueprintresolver.h"
+#include "featureexecutor.h"
+#include "properties.h"
+#include "matchdata.h"
+#include "matchdatalayout.h"
+#include <vespa/vespalib/stllike/string.h>
+#include <vector>
+#include <memory.h>
+
+namespace search {
+namespace fef {
+
+/**
+ * A rank program runs multiple feature executors in a predefined
+ * order to produce a set of feature values. The rank program owns the
+ * MatchData used to store unpacked term-field match information and
+ * feature values used during evaluation.
+ **/
+class RankProgram
+{
+private:
+ RankProgram(const RankProgram &) = delete;
+ RankProgram &operator=(const RankProgram &) = delete;
+
+ // { first: old_handle, second: new_handle }
+ typedef std::pair<FeatureHandle, FeatureHandle> MappedHandle;
+
+ BlueprintResolver::SP _resolver;
+ FeatureExecutor::SharedInputs _shared_inputs;
+ std::vector<FeatureExecutor*> _program;
+ MatchData::UP _match_data;
+ std::vector<FeatureExecutor::UP> _executors;
+ std::map<vespalib::string, MappedHandle> _unboxed_seeds;
+
+ /**
+ * Add unboxing executors for seeds that are object features to
+ * make sure all output values are numbers.
+ **/
+ void add_unboxing_executors(MatchDataLayout &my_mdl);
+
+ /**
+ * Prepare the final program and evaluate all constant features.
+ **/
+ void compile();
+
+public:
+ typedef std::unique_ptr<RankProgram> UP;
+
+ /**
+ * Create a new rank program backed by the given resolver.
+ *
+ * @param resolver description on how to set up executors
+ **/
+ RankProgram(BlueprintResolver::SP resolver);
+
+ size_t program_size() const { return _program.size(); }
+ size_t num_executors() const { return _executors.size(); }
+
+ /**
+ * Set up this rank program by creating the needed feature
+ * executors and wiring them together. This function will also
+ * create the MatchData to be used for iterator unpacking and
+ * feature calculation as well as pre-calculating all constant
+ * features.
+ **/
+ void setup(const MatchDataLayout &mdl,
+ const IQueryEnvironment &queryEnv,
+ const Properties &featureOverrides = Properties());
+
+ /**
+ * Expose the MatchData containing all calculated features. This
+ * is also used when creating search iterators as it is where all
+ * iterators should unpack their match information.
+ **/
+ MatchData &match_data() { return *_match_data; }
+ const MatchData &match_data() const { return *_match_data; }
+
+ /**
+ * Obtain the names and match data storage locations of all seed
+ * features for this rank program. The obtained information is
+ * written in parallel into the given vectors such that the i'th
+ * name corresponds to the i'th storage location. Programs for
+ * ranking phases will only have a single seed while programs used
+ * for summary features or scraping will have multiple seeds.
+ *
+ * @param names where to store feature names
+ * @param handles where to store feature storage locations
+ * @params unbox_seeds make sure seeds values are numbers
+ **/
+ void get_seed_handles(std::vector<vespalib::string> &names_out,
+ std::vector<FeatureHandle> &handles_out,
+ bool unbox_seeds = true) const;
+
+ /**
+ * Obtain the names and match data storage locations of all
+ * features for this rank program. The obtained information is
+ * written in parallel into the given vectors such that the i'th
+ * name corresponds to the i'th storage location. This method is
+ * intended for debugging and testing.
+ *
+ * @param names where to store feature names
+ * @param handles where to store feature storage locations
+ * @params unbox_seeds make sure seeds values are numbers
+ **/
+ void get_all_feature_handles(std::vector<vespalib::string> &names_out,
+ std::vector<FeatureHandle> &handles_out,
+ bool unbox_seeds = true) const;
+
+ /**
+ * Run this rank program on the current state of the internal
+ * match data for the given docid. Typically, match data for a
+ * specific result will be unpacked before calling run. After run
+ * is called, the wanted results can be extracted using the
+ * appropriate feature handles. The given docid will be used to
+ * tag the internal match data container before execution. Match
+ * data for individual term/field combinations are only considered
+ * valid if their docid matches that of the match data container.
+ *
+ * @param docid the document we are ranking
+ **/
+ void run(uint32_t docid) {
+ MatchData &md = match_data();
+ md.setDocId(docid);
+ for (FeatureExecutor *executor: _program) {
+ executor->execute(md);
+ }
+ }
+};
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
new file mode 100644
index 00000000000..a954f70c82b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
@@ -0,0 +1,186 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.ranksetup");
+#include "ranksetup.h"
+#include "idumpfeaturevisitor.h"
+#include "indexproperties.h"
+#include "featurenameparser.h"
+
+namespace {
+class VisitorAdapter : public search::fef::IDumpFeatureVisitor
+{
+ search::fef::BlueprintResolver &_resolver;
+public:
+ VisitorAdapter(search::fef::BlueprintResolver &resolver)
+ : _resolver(resolver) {}
+ virtual void visitDumpFeature(const vespalib::string &name) {
+ _resolver.addSeed(name);
+ }
+};
+} // namespace <unnamed>
+
+namespace search {
+namespace fef {
+
+RankSetup::RankSetup(const BlueprintFactory &factory,
+ const IIndexEnvironment &indexEnv)
+ : _factory(factory),
+ _indexEnv(indexEnv),
+ _first_phase_resolver(new BlueprintResolver(factory, indexEnv)),
+ _second_phase_resolver(new BlueprintResolver(factory, indexEnv)),
+ _summary_resolver(new BlueprintResolver(factory, indexEnv)),
+ _dumpResolver(new BlueprintResolver(factory, indexEnv)),
+ _firstPhaseRankFeature(),
+ _secondPhaseRankFeature(),
+ _degradationAttribute(),
+ _numThreads(0),
+ _numSearchPartitions(0),
+ _heapSize(0),
+ _arraySize(0),
+ _estimatePoint(0),
+ _estimateLimit(0),
+ _degradationMaxHits(0),
+ _degradationMaxFilterCoverage(1.0),
+ _degradationSamplePercentage(0.2),
+ _degradationPostFilterMultiplier(1.0),
+ _rankScoreDropLimit(0),
+ _summaryFeatures(),
+ _dumpFeatures(),
+ _ignoreDefaultRankFeatures(false),
+ _compiled(false),
+ _compileError(false),
+ _degradationAscendingOrder(false),
+ _diversityAttribute(),
+ _diversityMinGroups(1),
+ _diversityCutoffFactor(10.0),
+ _diversityCutoffStrategy("loose")
+{
+}
+
+void
+RankSetup::configure()
+{
+ setFirstPhaseRank(indexproperties::rank::FirstPhase::lookup(_indexEnv.getProperties()));
+ setSecondPhaseRank(indexproperties::rank::SecondPhase::lookup(_indexEnv.getProperties()));
+ std::vector<vespalib::string> summaryFeatures = indexproperties::summary::Feature::lookup(_indexEnv.getProperties());
+ for (uint32_t i = 0; i < summaryFeatures.size(); ++i) {
+ addSummaryFeature(summaryFeatures[i]);
+ }
+ setIgnoreDefaultRankFeatures(indexproperties::dump::IgnoreDefaultFeatures::check(_indexEnv.getProperties()));
+ std::vector<vespalib::string> dumpFeatures = indexproperties::dump::Feature::lookup(_indexEnv.getProperties());
+ for (uint32_t i = 0; i < dumpFeatures.size(); ++i) {
+ addDumpFeature(dumpFeatures[i]);
+ }
+ set_termwise_limit(indexproperties::matching::TermwiseLimit::lookup(_indexEnv.getProperties()));
+ setNumThreadsPerSearch(indexproperties::matching::NumThreadsPerSearch::lookup(_indexEnv.getProperties()));
+ setNumSearchPartitions(indexproperties::matching::NumSearchPartitions::lookup(_indexEnv.getProperties()));
+ setHeapSize(indexproperties::hitcollector::HeapSize::lookup(_indexEnv.getProperties()));
+ setArraySize(indexproperties::hitcollector::ArraySize::lookup(_indexEnv.getProperties()));
+ setDegradationAttribute(indexproperties::matchphase::DegradationAttribute::lookup(_indexEnv.getProperties()));
+ setDegradationOrderAscending(indexproperties::matchphase::DegradationAscendingOrder::lookup(_indexEnv.getProperties()));
+ setDegradationMaxHits(indexproperties::matchphase::DegradationMaxHits::lookup(_indexEnv.getProperties()));
+ setDegradationMaxFilterCoverage(indexproperties::matchphase::DegradationMaxFilterCoverage::lookup(_indexEnv.getProperties()));
+ setDegradationSamplePercentage(indexproperties::matchphase::DegradationSamplePercentage::lookup(_indexEnv.getProperties()));
+ setDegradationPostFilterMultiplier(indexproperties::matchphase::DegradationPostFilterMultiplier::lookup(_indexEnv.getProperties()));
+ setDiversityAttribute(indexproperties::matchphase::DiversityAttribute::lookup(_indexEnv.getProperties()));
+ setDiversityMinGroups(indexproperties::matchphase::DiversityMinGroups::lookup(_indexEnv.getProperties()));
+ setDiversityCutoffFactor(indexproperties::matchphase::DiversityCutoffFactor::lookup(_indexEnv.getProperties()));
+ setDiversityCutoffStrategy(indexproperties::matchphase::DiversityCutoffStrategy::lookup(_indexEnv.getProperties()));
+ setEstimatePoint(indexproperties::hitcollector::EstimatePoint::lookup(_indexEnv.getProperties()));
+ setEstimateLimit(indexproperties::hitcollector::EstimateLimit::lookup(_indexEnv.getProperties()));
+ setRankScoreDropLimit(indexproperties::hitcollector::RankScoreDropLimit::lookup(_indexEnv.getProperties()));
+}
+
+void
+RankSetup::setFirstPhaseRank(const vespalib::string &featureName)
+{
+ LOG_ASSERT(!_compiled);
+ _firstPhaseRankFeature = featureName;
+}
+
+void
+RankSetup::setSecondPhaseRank(const vespalib::string &featureName)
+{
+ LOG_ASSERT(!_compiled);
+ _secondPhaseRankFeature = featureName;
+}
+
+void
+RankSetup::addSummaryFeature(const vespalib::string &summaryFeature)
+{
+ LOG_ASSERT(!_compiled);
+ _summaryFeatures.push_back(summaryFeature);
+}
+
+void
+RankSetup::addDumpFeature(const vespalib::string &dumpFeature)
+{
+ LOG_ASSERT(!_compiled);
+ _dumpFeatures.push_back(dumpFeature);
+}
+
+bool
+RankSetup::compile()
+{
+ LOG_ASSERT(!_compiled);
+ if (!_firstPhaseRankFeature.empty()) {
+ FeatureNameParser parser(_firstPhaseRankFeature);
+ if (parser.valid()) {
+ _firstPhaseRankFeature = parser.featureName();
+ _first_phase_resolver->addSeed(_firstPhaseRankFeature);
+ } else {
+ LOG(warning, "invalid feature name for initial rank: '%s'",
+ _firstPhaseRankFeature.c_str());
+ _compileError = true;
+ }
+ }
+ if (!_secondPhaseRankFeature.empty()) {
+ FeatureNameParser parser(_secondPhaseRankFeature);
+ if (parser.valid()) {
+ _secondPhaseRankFeature = parser.featureName();
+ _second_phase_resolver->addSeed(_secondPhaseRankFeature);
+ } else {
+ LOG(warning, "invalid feature name for final rank: '%s'",
+ _secondPhaseRankFeature.c_str());
+ _compileError = true;
+ }
+ }
+ for (uint32_t i = 0; i < _summaryFeatures.size(); ++i) {
+ _summary_resolver->addSeed(_summaryFeatures[i]);
+ }
+ if (!_ignoreDefaultRankFeatures) {
+ VisitorAdapter adapter(*_dumpResolver);
+ _factory.visitDumpFeatures(_indexEnv, adapter);
+ }
+ for (uint32_t i = 0; i < _dumpFeatures.size(); ++i) {
+ _dumpResolver->addSeed(_dumpFeatures[i]);
+ }
+ _indexEnv.hintFeatureMotivation(IIndexEnvironment::RANK);
+ _compileError |= !_first_phase_resolver->compile();
+ _compileError |= !_second_phase_resolver->compile();
+ _compileError |= !_summary_resolver->compile();
+ _indexEnv.hintFeatureMotivation(IIndexEnvironment::DUMP);
+ _compileError |= !_dumpResolver->compile();
+ _compiled = true;
+ return !_compileError;
+}
+
+void
+RankSetup::prepareSharedState(const IQueryEnvironment &queryEnv, IObjectStore &objectStore) const
+{
+ LOG_ASSERT(_compiled && !_compileError);
+ for (const auto &spec : _first_phase_resolver->getExecutorSpecs()) {
+ spec.blueprint->prepareSharedState(queryEnv, objectStore);
+ }
+ for (const auto &spec : _second_phase_resolver->getExecutorSpecs()) {
+ spec.blueprint->prepareSharedState(queryEnv, objectStore);
+ }
+ for (const auto &spec : _summary_resolver->getExecutorSpecs()) {
+ spec.blueprint->prepareSharedState(queryEnv, objectStore);
+ }
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h
new file mode 100644
index 00000000000..86b381e3af6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h
@@ -0,0 +1,393 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "blueprintfactory.h"
+#include "iindexenvironment.h"
+#include "iqueryenvironment.h"
+#include "blueprintresolver.h"
+#include "rank_program.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * A rank setup contains information about how initial and final rank
+ * should be calculated. A rank setup is responsible for resolving
+ * dependencies between named features and also acts as a factory for
+ * @ref RankContext objects. In addition to keeping track of how to
+ * calculate rank, a RankSetup also keeps track of how to calculate
+ * all features that should be dumped when performing a full feature
+ * dump.
+ **/
+class RankSetup
+{
+private:
+ const BlueprintFactory &_factory;
+ const IIndexEnvironment &_indexEnv;
+ BlueprintResolver::SP _first_phase_resolver;
+ BlueprintResolver::SP _second_phase_resolver;
+ BlueprintResolver::SP _summary_resolver;
+ BlueprintResolver::SP _dumpResolver;
+ vespalib::string _firstPhaseRankFeature;
+ vespalib::string _secondPhaseRankFeature;
+ vespalib::string _degradationAttribute;
+ double _termwise_limit;
+ uint32_t _numThreads;
+ uint32_t _numSearchPartitions;
+ uint32_t _heapSize;
+ uint32_t _arraySize;
+ uint32_t _estimatePoint;
+ uint32_t _estimateLimit;
+ uint32_t _degradationMaxHits;
+ double _degradationMaxFilterCoverage;
+ double _degradationSamplePercentage;
+ double _degradationPostFilterMultiplier;
+ feature_t _rankScoreDropLimit;
+ std::vector<vespalib::string> _summaryFeatures;
+ std::vector<vespalib::string> _dumpFeatures;
+ bool _ignoreDefaultRankFeatures;
+ bool _compiled;
+ bool _compileError;
+ bool _degradationAscendingOrder;
+ vespalib::string _diversityAttribute;
+ uint32_t _diversityMinGroups;
+ double _diversityCutoffFactor;
+ vespalib::string _diversityCutoffStrategy;
+
+
+public:
+ RankSetup(const RankSetup &) = delete;
+ RankSetup &operator=(const RankSetup &) = delete;
+ /**
+ * Convenience typedef for a shared pointer to this class.
+ **/
+ typedef std::shared_ptr<RankSetup> SP;
+
+ /**
+ * Create a new rank setup within the given index environment and
+ * backed by the given factory.
+ *
+ * @param factory blueprint factory
+ * @param indexEnv index environment
+ **/
+ RankSetup(const BlueprintFactory &factory,
+ const IIndexEnvironment &indexEnv);
+
+ /**
+ * Configures this rank setup according to the fef properties
+ * found in the index environment.
+ **/
+ void configure();
+
+ /**
+ * This method is invoked during setup (before invoking the @ref
+ * compile method) to define what feature to use as first phase
+ * ranking.
+ *
+ * @param featureName full feature name for first phase rank
+ **/
+ void setFirstPhaseRank(const vespalib::string &featureName);
+
+ /**
+ * Returns the first phase ranking.
+ *
+ * @return feature name for first phase rank
+ **/
+ const vespalib::string &getFirstPhaseRank() const { return _firstPhaseRankFeature; }
+
+ /**
+ * This method is invoked during setup (before invoking the @ref
+ * compile method) to define what feature to use as second phase ranking.
+ *
+ * @param featureName full feature name for second phase rank
+ **/
+ void setSecondPhaseRank(const vespalib::string &featureName);
+
+ /**
+ * Returns the second phase ranking.
+ *
+ * @return feature name for second phase rank
+ **/
+ const vespalib::string &getSecondPhaseRank() const { return _secondPhaseRankFeature; }
+
+ /**
+ * Set the termwise limit
+ *
+ * The termwise limit is a number in the range [0,1] indicating
+ * how much of the corpus the query must match for termwise
+ * evaluation to be enabled.
+ *
+ * @param value termwise limit
+ **/
+ void set_termwise_limit(double value) { _termwise_limit = value; }
+
+ /**
+ * Get the termwise limit
+ *
+ * The termwise limit is a number in the range [0,1] indicating
+ * how much of the corpus the query must match for termwise
+ * evaluation to be enabled.
+ *
+ * @return termwise limit
+ **/
+ double get_termwise_limit() const { return _termwise_limit; }
+
+ /**
+ * Sets the number of threads per search.
+ *
+ * @param numThreads the number of threads
+ **/
+ void setNumThreadsPerSearch(uint32_t numThreads) { _numThreads = numThreads; }
+
+ /**
+ * Returns the number of threads per search.
+ *
+ * @return the number of threads
+ **/
+ uint32_t getNumThreadsPerSearch() const { return _numThreads; }
+
+ void setNumSearchPartitions(uint32_t numSearchPartitions) { _numSearchPartitions = numSearchPartitions; }
+
+ uint32_t getNumSearchPartitions() const { return _numSearchPartitions; }
+
+ /**
+ * Sets the heap size to be used in the hit collector.
+ *
+ * @param heapSize the heap size
+ **/
+ void setHeapSize(uint32_t heapSize) { _heapSize = heapSize; }
+
+ /**
+ * Returns the heap size to be used in the hit collector.
+ *
+ * @return the heap size
+ **/
+ uint32_t getHeapSize() const { return _heapSize; }
+
+ /**
+ * Sets the array size to be used in the hit collector.
+ *
+ * @param arraySize the array size
+ **/
+ void setArraySize(uint32_t arraySize) { _arraySize = arraySize; }
+
+ /**
+ * Returns the array size to be used in the hit collector.
+ *
+ * @return the array size
+ **/
+ uint32_t getArraySize() const { return _arraySize; }
+
+ /** whether match phase should do graceful degradation */
+ bool hasMatchPhaseDegradation() const {
+ return (_degradationAttribute.size() > 0);
+ }
+
+ /** get name of attribute to use for graceful degradation in match phase */
+ vespalib::string getDegradationAttribute() const {
+ return _degradationAttribute;
+ }
+ /** check whether attribute should be used in ascending order during graceful degradation in match phase */
+ bool isDegradationOrderAscending() const {
+ return _degradationAscendingOrder;
+ }
+ /** get number of hits to collect during graceful degradation in match phase */
+ uint32_t getDegradationMaxHits() const {
+ return _degradationMaxHits;
+ }
+
+ double getDegradationMaxFilterCoverage() const { return _degradationMaxFilterCoverage; }
+ /** get number of hits to collect during graceful degradation in match phase */
+ double getDegradationSamplePercentage() const {
+ return _degradationSamplePercentage;
+ }
+
+ /** get number of hits to collect during graceful degradation in match phase */
+ double getDegradationPostFilterMultiplier() const {
+ return _degradationPostFilterMultiplier;
+ }
+
+ /** get the attribute used to ensure diversity during match phase limiting **/
+ vespalib::string getDiversityAttribute() const {
+ return _diversityAttribute;
+ }
+
+ /** get the minimal diversity we should try to achieve **/
+ uint32_t getDiversityMinGroups() const {
+ return _diversityMinGroups;
+ }
+
+ double getDiversityCutoffFactor() const {
+ return _diversityCutoffFactor;
+ }
+
+ const vespalib::string & getDiversityCutoffStrategy() const {
+ return _diversityCutoffStrategy;
+ }
+
+ /** set name of attribute to use for graceful degradation in match phase */
+ void setDegradationAttribute(const vespalib::string &name) {
+ _degradationAttribute = name;
+ }
+ /** set whether attribute should be used in ascending order during graceful degradation in match phase */
+ void setDegradationOrderAscending(bool ascending) {
+ _degradationAscendingOrder = ascending;
+ }
+ /** set number of hits to collect during graceful degradation in match phase */
+ void setDegradationMaxHits(uint32_t maxHits) {
+ _degradationMaxHits = maxHits;
+ }
+
+ void setDegradationMaxFilterCoverage(double degradationMaxFilterCoverage) {
+ _degradationMaxFilterCoverage = degradationMaxFilterCoverage;
+ }
+
+ /** set number of hits to collect during graceful degradation in match phase */
+ void setDegradationSamplePercentage(double samplePercentage) {
+ _degradationSamplePercentage = samplePercentage;
+ }
+
+ /** set number of hits to collect during graceful degradation in match phase */
+ void setDegradationPostFilterMultiplier(double samplePercentage) {
+ _degradationPostFilterMultiplier = samplePercentage;
+ }
+
+ /** set the attribute used to ensure diversity during match phase limiting **/
+ void setDiversityAttribute(const vespalib::string &value) {
+ _diversityAttribute = value;
+ }
+
+ /** set the minimal diversity we should try to achieve **/
+ void setDiversityMinGroups(uint32_t value) {
+ _diversityMinGroups = value;
+ }
+
+ void setDiversityCutoffFactor(double value) {
+ _diversityCutoffFactor = value;
+ }
+
+ void setDiversityCutoffStrategy(const vespalib::string & value) {
+ _diversityCutoffStrategy = value;
+ }
+
+ /**
+ * Sets the estimate point to be used in parallel query evaluation.
+ *
+ * @param estimatePoint the estimate point
+ **/
+ void setEstimatePoint(uint32_t estimatePoint) { _estimatePoint = estimatePoint; }
+
+ /**
+ * Returns the estimate point to be used in parallel query evaluation.
+ *
+ * @return the estimate point
+ **/
+ uint32_t getEstimatePoint() const { return _estimatePoint; }
+
+ /**
+ * Sets the estimate limit to be used in parallel query evaluation.
+ *
+ * @param estimateLimit the estimate limit
+ **/
+ void setEstimateLimit(uint32_t estimateLimit) { _estimateLimit = estimateLimit; }
+
+ /**
+ * Returns the estimate limit to be used in parallel query evaluation.
+ *
+ * @return the estimate limit
+ **/
+ uint32_t getEstimateLimit() const { return _estimateLimit; }
+
+ /**
+ * Sets the rank score drop limit to be used in parallel query evaluation.
+ *
+ * @param rankScoreDropLimit the rank score drop limit
+ **/
+ void setRankScoreDropLimit(feature_t rankScoreDropLimit) { _rankScoreDropLimit = rankScoreDropLimit; }
+
+ /**
+ * Returns the rank score drop limit to be used in parallel query evaluation.
+ *
+ * @return the rank score drop limit
+ **/
+ feature_t getRankScoreDropLimit() const { return _rankScoreDropLimit; }
+
+ /**
+ * This method may be used to indicate that certain features
+ * should be present in the docsum.
+ *
+ * @param summaryFeature full feature name of a summary feature
+ **/
+ void addSummaryFeature(const vespalib::string &summaryFeature);
+
+ /**
+ * Returns a const view of the summary features added.
+ *
+ * @return vector of summary feature names.
+ **/
+ const std::vector<vespalib::string> &getSummaryFeatures() const { return _summaryFeatures; }
+
+ /**
+ * Set the flag indicating whether we should ignore the default
+ * rank features (the ones specified by the plugins themselves)
+ *
+ * @param flag true means ignore default rank features
+ **/
+ void setIgnoreDefaultRankFeatures(bool flag) { _ignoreDefaultRankFeatures = flag; }
+
+ /**
+ * Get the flag indicating whether we should ignore the default
+ * rank features (the ones specified by the plugins themselves)
+ *
+ * @return true means ignore default rank features
+ **/
+ bool getIgnoreDefaultRankFeatures() { return _ignoreDefaultRankFeatures; }
+
+ /**
+ * This method may be used to indicate that certain features
+ * should be dumped during a full feature dump.
+ *
+ * @param dumpFeature full feature name of a dump feature
+ **/
+ void addDumpFeature(const vespalib::string &dumpFeature);
+
+ /**
+ * Returns a const view of the dump features added.
+ *
+ * @return vector of dump feature names.
+ **/
+ const std::vector<vespalib::string> &getDumpFeatures() const { return _dumpFeatures; }
+
+ /**
+ * Create blueprints, resolve dependencies and form a strategy for
+ * how to create feature executors used to calculate initial and
+ * final rank for individual queries. This method must be invoked
+ * after the @ref setInitialRank and @ref setFinalRank methods and
+ * before creating @ref RankContext objects using the @ref
+ * createRankContext and @ref createDumpContext methods.
+ *
+ * @return true if things went ok, false otherwise (dependency issues)
+ **/
+ bool compile();
+
+ // These functions create rank programs for different tasks. Note
+ // that the setup function must be called on rank programs for
+ // them to be ready to use. Also keep in mind that creating a rank
+ // program is cheap while setting it up is more expensive.
+
+ RankProgram::UP create_first_phase_program() const { return RankProgram::UP(new RankProgram(_first_phase_resolver)); }
+ RankProgram::UP create_second_phase_program() const { return RankProgram::UP(new RankProgram(_second_phase_resolver)); }
+ RankProgram::UP create_summary_program() const { return RankProgram::UP(new RankProgram(_summary_resolver)); }
+ RankProgram::UP create_dump_program() const { return RankProgram::UP(new RankProgram(_dumpResolver)); }
+
+ /**
+ * Here you can do some preprocessing. State must be stored in the IObjectStore.
+ * This is called before creating multiple execution threads.
+ * @param queryEnv The query environment.
+ */
+ void prepareSharedState(const IQueryEnvironment & queryEnv, IObjectStore & objectStore) const;
+};
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/simpletermdata.cpp b/searchlib/src/vespa/searchlib/fef/simpletermdata.cpp
new file mode 100644
index 00000000000..97ec1f8cca3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/simpletermdata.cpp
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.simpletermdata");
+#include "simpletermdata.h"
+
+namespace search {
+namespace fef {
+
+SimpleTermData::SimpleTermData()
+ : _weight(0),
+ _numTerms(0),
+ _termIndex(0),
+ _uniqueId(0),
+ _fields()
+{
+}
+
+SimpleTermData::SimpleTermData(const ITermData &rhs)
+ : _weight(rhs.getWeight()),
+ _numTerms(rhs.getPhraseLength()),
+ _termIndex(rhs.getTermIndex()),
+ _uniqueId(rhs.getUniqueId()),
+ _fields()
+{
+ for (size_t i(0), m(rhs.numFields()); i < m; ++i) {
+ _fields.push_back(SimpleTermFieldData(rhs.field(i)));
+ }
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/simpletermdata.h b/searchlib/src/vespa/searchlib/fef/simpletermdata.h
new file mode 100644
index 00000000000..ee4cab468e1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/simpletermdata.h
@@ -0,0 +1,195 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "handle.h"
+#include "itermdata.h"
+#include "simpletermfielddata.h"
+#include <vespa/searchlib/query/weight.h>
+#include <vector>
+
+namespace search {
+namespace fef {
+
+/**
+ * Static match data for a single unit (term/phrase/etc).
+ **/
+class SimpleTermData : public ITermData
+{
+private:
+ query::Weight _weight;
+ uint32_t _numTerms;
+ uint32_t _termIndex;
+ uint32_t _uniqueId;
+
+ std::vector<SimpleTermFieldData> _fields;
+
+public:
+ /**
+ * Creates a new object.
+ **/
+ SimpleTermData();
+
+ /**
+ * Side-cast copy constructor.
+ **/
+ SimpleTermData(const ITermData &rhs);
+
+ //----------- ITermData implementation ------------------------------------
+
+ /**
+ * Returns the term weight.
+ **/
+ virtual query::Weight getWeight() const { return _weight; }
+
+ /**
+ * Returns the number of terms represented by this term data object.
+ **/
+ virtual uint32_t getPhraseLength() const { return _numTerms; }
+
+ /**
+ * Obtain the location of this term in the original user query.
+ *
+ * @return term index
+ **/
+ virtual uint32_t getTermIndex() const { return _termIndex; }
+
+ /**
+ * Obtain the unique id of this term. 0 means not set.
+ *
+ * @return unique id or 0
+ **/
+ virtual uint32_t getUniqueId() const { return _uniqueId; }
+
+ /**
+ * Get number of fields searched
+ **/
+ virtual size_t numFields() const { return _fields.size(); }
+
+ /**
+ * Direct access to data for individual fields
+ * @param i local index, must have: 0 <= i < numFields()
+ */
+ virtual const ITermFieldData &field(size_t i) const {
+ return _fields[i];
+ }
+
+ /**
+ * Obtain information about a specific field that may be searched
+ * by this term. If the requested field is not searched by this
+ * term, NULL will be returned.
+ *
+ * @return term field data, or NULL if not found
+ **/
+ virtual const ITermFieldData *lookupField(uint32_t fieldId) const {
+ for (size_t fieldIdx(0), m(numFields()); fieldIdx < m; ++fieldIdx) {
+ const ITermFieldData &tfd = field(fieldIdx);
+ if (tfd.getFieldId() == fieldId) {
+ return &tfd;
+ }
+ }
+ return 0;
+ }
+
+ //----------- Utility functions -------------------------------------------
+
+ /**
+ * Sets the term weight.
+ **/
+ SimpleTermData &setWeight(query::Weight weight) {
+ _weight = weight;
+ return *this;
+ }
+
+ /**
+ * Sets the number of terms represented by this term data object.
+ **/
+ SimpleTermData &setPhraseLength(uint32_t numTerms) {
+ _numTerms = numTerms;
+ return *this;
+ }
+
+ /**
+ * Set the location of this term in the original user query.
+ *
+ * @return this to allow chaining.
+ * @param idx term index
+ **/
+ SimpleTermData &setTermIndex(uint32_t idx) {
+ _termIndex = idx;
+ return *this;
+ }
+
+ /**
+ * Set the unique id of this term. 0 means not set.
+ *
+ * @param id unique id or 0
+ * @return this to allow chaining.
+ **/
+ SimpleTermData &setUniqueId(uint32_t id) {
+ _uniqueId = id;
+ return *this;
+ }
+
+ /**
+ * Add a new field to the set that is searched by this term.
+ *
+ * @return the newly added field
+ * @param fieldId field id of the added field
+ **/
+ SimpleTermFieldData &addField(uint32_t fieldId) {
+ _fields.push_back(SimpleTermFieldData(fieldId));
+ return _fields.back();
+ }
+
+ /**
+ * Direct access to data for individual fields
+ * @param i local index, must have: 0 <= i < numFields()
+ */
+ SimpleTermFieldData &field(size_t i) {
+ return _fields[i];
+ }
+
+ /**
+ * Obtain information about a specific field that may be searched
+ * by this term. If the requested field is not searched by this
+ * term, NULL will be returned.
+ *
+ * @return term field data, or NULL if not found
+ **/
+ SimpleTermFieldData *lookupField(uint32_t fieldId) {
+ for (size_t fieldIdx(0), m(numFields()); fieldIdx < m; ++fieldIdx) {
+ SimpleTermFieldData& tfd = field(fieldIdx);
+ if (tfd.getFieldId() == fieldId) {
+ return &tfd;
+ }
+ }
+ return 0;
+ }
+};
+
+
+/**
+ * convenience adapter for easy iteration
+ **/
+class SimpleTermFieldRangeAdapter
+{
+ SimpleTermData& _ref;
+ size_t _idx;
+ size_t _lim;
+public:
+ explicit SimpleTermFieldRangeAdapter(SimpleTermData& ref)
+ : _ref(ref), _idx(0), _lim(ref.numFields())
+ {}
+
+ bool valid() const { return (_idx < _lim); }
+
+ SimpleTermFieldData& get() const { return _ref.field(_idx); }
+
+ void next() { assert(valid()); ++_idx; }
+};
+
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp
new file mode 100644
index 00000000000..582e5e330d6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.simpletermfielddata");
+#include "simpletermfielddata.h"
+
+namespace search {
+namespace fef {
+
+SimpleTermFieldData::SimpleTermFieldData(uint32_t fieldId)
+ : _fieldId(fieldId),
+ _docFreq(0),
+ _handle(IllegalHandle)
+{
+}
+
+SimpleTermFieldData::SimpleTermFieldData(const ITermFieldData &rhs)
+ : _fieldId(rhs.getFieldId()),
+ _docFreq(rhs.getDocFreq()),
+ _handle(rhs.getHandle())
+{
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h
new file mode 100644
index 00000000000..f95ca5b3472
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "itermfielddata.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * Information about a single field that is being searched for a term
+ * (described by the TermData class). The field may be either an index
+ * field or an attribute field. If more information about the field is
+ * needed, the field id may be used to consult the index environment.
+ **/
+class SimpleTermFieldData : public ITermFieldData
+{
+private:
+ uint32_t _fieldId;
+ double _docFreq;
+ TermFieldHandle _handle;
+
+public:
+ /**
+ * Side-cast copy constructor.
+ **/
+ SimpleTermFieldData(const ITermFieldData &rhs);
+
+ /**
+ * Create a new instance for the given field.
+ *
+ * @param fieldId the field being searched
+ **/
+ SimpleTermFieldData(uint32_t fieldId);
+
+ /**
+ * Obtain the field id.
+ *
+ * @return field id
+ **/
+ virtual uint32_t getFieldId() const { return _fieldId; }
+
+ /**
+ * Obtain the document frequency.
+ *
+ * @return document frequency
+ **/
+ virtual double getDocFreq() const { return _docFreq; }
+
+ /**
+ * Obtain the match handle for this field.
+ *
+ * @return match handle
+ **/
+ virtual TermFieldHandle getHandle() const {
+ return _handle;
+ }
+
+ /**
+ * Sets the document frequency.
+ *
+ * @return this object (for chaining)
+ * @param docFreq document frequency
+ **/
+ SimpleTermFieldData &setDocFreq(double docFreq) {
+ _docFreq = docFreq;
+ return *this;
+ }
+
+ /**
+ * Sets the match handle for this field.
+ *
+ * @return this object (for chaining)
+ * @param handle match handle
+ **/
+ SimpleTermFieldData &setHandle(TermFieldHandle handle) {
+ _handle = handle;
+ return *this;
+ }
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/sumexecutor.cpp b/searchlib/src/vespa/searchlib/fef/sumexecutor.cpp
new file mode 100644
index 00000000000..06df6a6d909
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/sumexecutor.cpp
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "sumexecutor.h"
+#include "matchdata.h"
+
+namespace search {
+namespace fef {
+
+void
+SumExecutor::execute(MatchData &data)
+{
+ feature_t sum = 0.0;
+ for (uint32_t i = 0; i < inputs().size(); ++i) {
+ sum += *data.resolveFeature(inputs()[i]);
+ }
+ *data.resolveFeature(outputs()[0]) = sum;
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/sumexecutor.h b/searchlib/src/vespa/searchlib/fef/sumexecutor.h
new file mode 100644
index 00000000000..82f5ea237b3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/sumexecutor.h
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "featureexecutor.h"
+
+namespace search {
+namespace fef {
+
+class MatchData;
+
+/**
+ * Simple executor that calculates the sum of a set of inputs. This
+ * will be moved to another library as it is not really part of the
+ * framework.
+ **/
+class SumExecutor : public FeatureExecutor
+{
+public:
+ virtual void execute(MatchData &data);
+
+ /**
+ * Create an instance of this class and return it as a shared pointer.
+ *
+ * @return shared pointer to new instance
+ **/
+ static FeatureExecutor::LP create() { return FeatureExecutor::LP(new SumExecutor()); }
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/symmetrictable.cpp b/searchlib/src/vespa/searchlib/fef/symmetrictable.cpp
new file mode 100644
index 00000000000..29818a9f416
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/symmetrictable.cpp
@@ -0,0 +1,52 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "symmetrictable.h"
+
+namespace search {
+namespace fef {
+
+SymmetricTable::SymmetricTable() :
+ _backingTable(),
+ _size(),
+ _table(NULL),
+ _max(0)
+{
+}
+
+SymmetricTable::SymmetricTable(const SymmetricTable & table) :
+ _backingTable(table._backingTable),
+ _size(_backingTable.size()/2),
+ _table(&_backingTable[_size]),
+ _max(table.max())
+{
+}
+
+SymmetricTable & SymmetricTable::operator=(const SymmetricTable & rhs)
+{
+ if (&rhs != this) {
+ SymmetricTable n(rhs);
+ swap(n);
+ }
+ return *this;
+}
+
+SymmetricTable::SymmetricTable(const Table & table) :
+ _backingTable(table.size()*2 - 1),
+ _size(_backingTable.size()/2),
+ _table(&_backingTable[_size]),
+ _max(table.max())
+{
+ _table[0] = table[0];
+ for(int i(1); i <= _size; i++) {
+ _table[i] = table[i];
+ _table[-i] = -table[i];
+ }
+}
+
+SymmetricTable::~SymmetricTable()
+{
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/symmetrictable.h b/searchlib/src/vespa/searchlib/fef/symmetrictable.h
new file mode 100644
index 00000000000..984879cc540
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/symmetrictable.h
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+#include <vespa/searchlib/fef/table.h>
+
+namespace search {
+namespace fef {
+
+/**
+ * This class represents a rank table with double values. It takes both negative and positive indexes.
+ * The content of a table is typically a pre-computed function that is used by a feature executor.
+ * Values in the negative index range are negated values of corresponding positive value.
+ **/
+class SymmetricTable
+{
+private:
+ std::vector<double> _backingTable;
+ int _size;
+ double * _table;
+ double _max;
+
+public:
+ typedef std::shared_ptr<SymmetricTable> SP;
+
+ SymmetricTable();
+ /**
+ * Creates a symmetric table based on the real one.
+ **/
+ SymmetricTable(const Table & table);
+ SymmetricTable(const SymmetricTable & table);
+ ~SymmetricTable();
+
+ SymmetricTable & operator =(const SymmetricTable & table);
+ void swap(SymmetricTable & rhs) {
+ _backingTable.swap(rhs._backingTable);
+ std::swap(_size, rhs._size);
+ std::swap(_table, rhs._table);
+ std::swap(_max, rhs._max);
+ }
+ /**
+ * Returns the element at the given position.
+ **/
+ double operator[](int i) const { return _table[i]; }
+
+ /**
+ * Retrives the element at the given position or the last element if i is outside the range.
+ **/
+ double get(int i) const {
+ return (i<-_size) ? _table[-_size] : ((i>_size) ? _table[_size] : _table[i]);
+ };
+ double max() const { return _max; }
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/table.cpp b/searchlib/src/vespa/searchlib/fef/table.cpp
new file mode 100644
index 00000000000..c32cd233937
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/table.cpp
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "table.h"
+#include <limits>
+
+namespace search {
+namespace fef {
+
+Table::Table() :
+ _table(),
+ _max(-std::numeric_limits<double>::max())
+{
+ _table.reserve(256);
+}
+
+Table::~Table()
+{
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/table.h b/searchlib/src/vespa/searchlib/fef/table.h
new file mode 100644
index 00000000000..a2203b83041
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/table.h
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+
+namespace search {
+namespace fef {
+
+/**
+ * This class represents a rank table with double values.
+ * The content of a table is typically a pre-computed function that is used by a feature executor.
+ **/
+class Table
+{
+private:
+ std::vector<double> _table;
+ double _max;
+
+public:
+ typedef std::shared_ptr<Table> SP;
+
+ /**
+ * Creates a new table with zero elements.
+ **/
+ Table();
+ ~Table();
+
+ /**
+ * Adds the given element to this table.
+ **/
+ Table & add(double val) {
+ _table.push_back(val);
+ _max = std::max(val, _max);
+ return *this;
+ }
+
+ /**
+ * Returns the number of elements in this table.
+ **/
+ size_t size() const { return _table.size(); }
+
+ /**
+ * Returns the element at the given position.
+ **/
+ double operator[](size_t i) const { return _table[i]; }
+
+ /**
+ * Retrives the element at the given position or the last element if i is outside the range.
+ **/
+ double get(size_t i) const {
+ return _table[std::min(i, size() - 1)];
+ };
+
+ /**
+ * Returns the largest element in this table.
+ **/
+ double max() const {
+ return _max;
+ }
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/tablemanager.cpp b/searchlib/src/vespa/searchlib/fef/tablemanager.cpp
new file mode 100644
index 00000000000..f62f24a3b0f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/tablemanager.cpp
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "tablemanager.h"
+
+namespace search {
+namespace fef {
+
+TableManager::TableManager() :
+ _factories(),
+ _cache(),
+ _lock()
+{
+}
+
+const Table *
+TableManager::getTable(const vespalib::string & name) const
+{
+ vespalib::LockGuard guard(_lock);
+ TableCache::const_iterator itr = _cache.find(name);
+ if (itr != _cache.end()) {
+ return itr->second.get();
+ }
+ for (size_t i = 0; i < _factories.size(); ++i) {
+ Table::SP table = _factories[i]->createTable(name);
+ if (table.get() != NULL) {
+ _cache.insert(std::make_pair(name, table));
+ return table.get();
+ }
+ }
+ _cache.insert(std::make_pair(name, Table::SP(NULL)));
+ return NULL;
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/tablemanager.h b/searchlib/src/vespa/searchlib/fef/tablemanager.h
new file mode 100644
index 00000000000..e69c05b1dce
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/tablemanager.h
@@ -0,0 +1,50 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <map>
+#include <vector>
+#include <vespa/vespalib/util/sync.h>
+#include "itablefactory.h"
+#include "itablemanager.h"
+
+namespace search {
+namespace fef {
+
+/**
+ * This class manages a set of tables and contains an ordered list of table factories used to create tables,
+ * and a cache of allready created tables. A table is accessed by a unique name.
+ **/
+class TableManager : public ITableManager
+{
+private:
+ TableManager(const TableManager &);
+ TableManager &operator=(const TableManager &);
+
+ typedef std::map<vespalib::string, Table::SP> TableCache;
+ std::vector<ITableFactory::SP> _factories;
+ mutable TableCache _cache;
+ vespalib::Lock _lock;
+
+public:
+ TableManager();
+
+ /**
+ * Adds a table factory to this manager.
+ * The table factories are used in the order they where added to create tables.
+ **/
+ void addFactory(ITableFactory::SP factory) { _factories.push_back(factory); }
+
+ /**
+ * Retrieves the table with the given name using the following strategy:
+ * 1. Try to find the table in the cache.
+ * 2. Iterate over the table factories and try to create the table.
+ * The first table that is successfully created is added it to the cache and returned.
+ * 3. Return NULL.
+ **/
+ virtual const Table * getTable(const vespalib::string & name) const;
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/termfieldmatchdata.cpp b/searchlib/src/vespa/searchlib/fef/termfieldmatchdata.cpp
new file mode 100644
index 00000000000..2ba9cf90870
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/termfieldmatchdata.cpp
@@ -0,0 +1,121 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.termfieldmatchdata");
+#include "termfieldmatchdata.h"
+#include "fieldinfo.h"
+#include <algorithm>
+
+namespace search {
+namespace fef {
+
+TermFieldMatchData::TermFieldMatchData() :
+ _docId(invalidId()),
+ _fieldId(FIELDID_MASK),
+ _sz(0)
+{
+ memset(&_data, 0, sizeof(_data));
+}
+
+TermFieldMatchData::TermFieldMatchData(const TermFieldMatchData & rhs) :
+ _docId(rhs._docId),
+ _fieldId(rhs._fieldId),
+ _sz(0)
+{
+ memset(&_data, 0, sizeof(_data));
+ if (isRawScore()) {
+ _data._rawScore = rhs._data._rawScore;
+ } else {
+ for (auto it(rhs.begin()), mt(rhs.end()); it != mt; it++) {
+ appendPosition(*it);
+ }
+ }
+}
+
+TermFieldMatchData & TermFieldMatchData::operator = (const TermFieldMatchData & rhs)
+{
+ if (this != & rhs) {
+ TermFieldMatchData tmp(rhs);
+ swap(tmp);
+ }
+ return *this;
+}
+
+TermFieldMatchData::~TermFieldMatchData()
+{
+ if (isRawScore()) {
+ } else if (isMultiPos()) {
+ delete [] _data._positions._positions;
+ } else {
+ getFixed()->~TermFieldMatchDataPosition();
+ }
+}
+
+namespace {
+
+template <typename T>
+void sswap(T * a, T * b) {
+ T tmp(*a);
+ *a = *b;
+ *b = tmp;
+}
+
+}
+
+void
+TermFieldMatchData::swap(TermFieldMatchData &rhs)
+{
+ sswap(&_docId, &rhs._docId);
+ sswap(&_fieldId, &rhs._fieldId);
+ sswap(&_sz, &rhs._sz);
+ char tmp[sizeof(_data)];
+ memcpy(tmp, &rhs._data, sizeof(_data));
+ memcpy(&rhs._data, &_data, sizeof(_data));
+ memcpy(&_data, tmp, sizeof(_data));
+}
+
+namespace {
+
+constexpr size_t MAX_ELEMS = std::numeric_limits<uint16_t>::max();
+
+}
+
+void
+TermFieldMatchData::resizePositionVector(size_t sz)
+{
+ size_t newSize(std::min(MAX_ELEMS, std::max(1ul, sz*2)));
+ TermFieldMatchDataPosition * n = new TermFieldMatchDataPosition[newSize];
+ if (sz > 0) {
+ if (isMultiPos()) {
+ for (size_t i(0); i < _data._positions._allocated; i++) {
+ n[i] = _data._positions._positions[i];
+ }
+ delete [] _data._positions._positions;
+ } else {
+ assert(sz == 1);
+ _fieldId = _fieldId | 0x4000;
+ n[0] = *getFixed();
+ _data._positions._maxElementLength = getFixed()->getElementLen();
+ }
+ }
+ _data._positions._allocated = newSize;
+ _data._positions._positions = n;
+}
+
+void
+TermFieldMatchData::appendPositionToAllocatedVector(const TermFieldMatchDataPosition &pos)
+{
+ if (__builtin_expect(_sz >= _data._positions._allocated, false)) {
+ resizePositionVector(_sz);
+ }
+ if (__builtin_expect(pos.getElementLen() > _data._positions._maxElementLength, false)) {
+ _data._positions._maxElementLength = pos.getElementLen();
+ }
+ if (__builtin_expect(_sz < MAX_ELEMS, true)) {
+ _data._positions._positions[_sz++] = pos;
+ }
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/termfieldmatchdata.h b/searchlib/src/vespa/searchlib/fef/termfieldmatchdata.h
new file mode 100644
index 00000000000..a3ce0ac4bb6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/termfieldmatchdata.h
@@ -0,0 +1,267 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "termfieldmatchdataposition.h"
+#include "fieldpositionsiterator.h"
+#include "fieldinfo.h"
+#include <vespa/searchlib/common/feature.h>
+#include <vespa/vespalib/util/noncopyable.hpp>
+#include <string.h>
+#include <assert.h>
+
+class MatchDataHeapTest;
+
+namespace search {
+namespace fef {
+
+class TermMatchDataMerger;
+
+/**
+ * Match information for a single term within a single field.
+ **/
+class TermFieldMatchData
+{
+public:
+ typedef const TermFieldMatchDataPosition * PositionsIterator;
+ typedef TermFieldMatchDataPosition * MutablePositionsIterator;
+ struct Positions {
+ uint16_t _maxElementLength;
+ uint16_t _allocated;
+ TermFieldMatchDataPosition *_positions;
+ } __attribute__((packed));
+
+ union Features {
+ feature_t _rawScore;
+ unsigned char _position[sizeof(TermFieldMatchDataPosition)];
+ Positions _positions;
+ uint64_t _subqueries;
+ } __attribute__((packed));
+private:
+ bool isRawScore() const { return _fieldId & 0x8000; }
+ bool isMultiPos() const { return _fieldId & 0x4000; }
+ bool empty() const { return _sz == 0; }
+ void clear() { _sz = 0; }
+ bool allocated() const { return isMultiPos(); }
+ const TermFieldMatchDataPosition * getFixed() const { return reinterpret_cast<const TermFieldMatchDataPosition *>(_data._position); }
+ TermFieldMatchDataPosition * getFixed() { return reinterpret_cast<TermFieldMatchDataPosition *>(_data._position); }
+ const TermFieldMatchDataPosition * getMultiple() const { return _data._positions._positions; }
+ TermFieldMatchDataPosition * getMultiple() { return _data._positions._positions; }
+ int32_t getElementWeight() const { return empty() ? 1 : allocated() ? getMultiple()->getElementWeight() : getFixed()->getElementWeight(); }
+ uint32_t getMaxElementLength() const { return empty() ? 0 : allocated() ? _data._positions._maxElementLength : getFixed()->getElementLen(); }
+ void appendPositionToAllocatedVector(const TermFieldMatchDataPosition &pos);
+ void resizePositionVector(size_t sz) __attribute__((noinline));
+
+ enum { FIELDID_MASK = 0x1fff};
+
+ uint32_t _docId;
+ // 3 upper bits used to tell if it is use for RawScore, SinglePos or multiPos.
+ uint16_t _fieldId;
+ uint16_t _sz;
+ Features _data;
+
+ friend class ::MatchDataHeapTest;
+
+public:
+ /**
+ * This gives you access to the underlying positions.
+ * @return the array of positions.
+ */
+ MutablePositionsIterator getPositions() { return allocated() ? getMultiple() : getFixed(); }
+ PositionsIterator begin() const { return allocated() ? getMultiple() : getFixed(); }
+ PositionsIterator end() const { return allocated() ? getMultiple() + _sz : empty() ? getFixed() : getFixed()+1; }
+ size_t size() const { return _sz; }
+ size_t capacity() const { return allocated() ? _data._positions._allocated : 1; }
+
+ /**
+ * Create empty object. To complete object setup, field id must be
+ * set.
+ **/
+ TermFieldMatchData();
+
+ TermFieldMatchData(const TermFieldMatchData & rhs);
+
+ ~TermFieldMatchData();
+ TermFieldMatchData & operator = (const TermFieldMatchData & rhs);
+
+ /**
+ * Swaps the content of this object with the content of the given
+ * term field match data object.
+ *
+ * @param rhs The object to swap with.
+ **/
+ void swap(TermFieldMatchData &rhs);
+
+ /**
+ * Set which field this object has match information for.
+ *
+ * @return this object (for chaining)
+ * @param fieldId field id
+ **/
+ TermFieldMatchData &setFieldId(uint32_t fieldId) {
+ if (fieldId == IllegalFieldId) {
+ fieldId = FIELDID_MASK;
+ } else {
+ assert(fieldId < FIELDID_MASK);
+ }
+ _fieldId = (_fieldId & ~FIELDID_MASK) | fieldId;
+ return *this;
+ }
+
+ /**
+ * Obtain the field id
+ *
+ * @return field id
+ **/
+ uint32_t getFieldId() const {
+ return __builtin_expect((_fieldId & FIELDID_MASK) != FIELDID_MASK, true) ? (_fieldId & FIELDID_MASK) : IllegalFieldId;
+ }
+
+ /**
+ * Reset the content of this match data and prepare it for use
+ * with the given docid.
+ *
+ * @return this object (for chaining)
+ * @param docId id of the document we are generating match information for
+ **/
+ TermFieldMatchData &reset(uint32_t docId) {
+ _docId = docId;
+ _sz = 0;
+ if (isRawScore()) {
+ _data._rawScore = 0.0;
+ } else if (isMultiPos()) {
+ _data._positions._maxElementLength = 0;
+ }
+ return *this;
+ }
+
+ /**
+ * Reset only the docid of this match data and prepare it for use
+ * with the given docid. Assume all other are not touched.
+ *
+ * @return this object (for chaining)
+ * @param docId id of the document we are generating match information for
+ **/
+ TermFieldMatchData &resetOnlyDocId(uint32_t docId) {
+ _docId = docId;
+ return *this;
+ }
+
+ /**
+ * Indicate a match for a given docid and inject a raw score
+ * instead of detailed match data. The raw score can be picked up
+ * in the ranking framework by using the rawScore feature for the
+ * appropriate field.
+ *
+ * @return this object (for chaining)
+ * @param docId id of the document we have matched
+ * @param score a raw score for the matched document
+ **/
+ TermFieldMatchData &setRawScore(uint32_t docId, feature_t score) {
+ resetOnlyDocId(docId);
+ enableRawScore();
+ _data._rawScore = score;
+ return *this;
+ }
+ TermFieldMatchData & enableRawScore() {
+ _fieldId = _fieldId | 0x8000;
+ return *this;
+ }
+
+ /**
+ * Obtain the raw score for this match data.
+ *
+ * @return raw score
+ **/
+ feature_t getRawScore() const {
+ return __builtin_expect(isRawScore(), true) ? _data._rawScore : 0.0;
+ }
+
+ void setSubqueries(uint32_t docId, uint64_t subqueries) {
+ resetOnlyDocId(docId);
+ _data._subqueries = subqueries;
+ }
+
+ uint64_t getSubqueries() const {
+ if (!empty() || isRawScore()) {
+ return 0;
+ }
+ return _data._subqueries;
+ }
+
+ /**
+ * Obtain the document id for which the data contained in this object is valid.
+ *
+ * @return document id
+ **/
+ uint32_t getDocId() const {
+ return _docId;
+ }
+
+ /**
+ * Obtain the weight of the first occurrence in this field, or 1
+ * if no occurrences are present. This function is intended for
+ * attribute matching calculations.
+ *
+ * @return weight
+ **/
+ int32_t getWeight() const {
+ if (__builtin_expect(_sz == 0, false)) {
+ return 1;
+ }
+ return __builtin_expect(allocated(), false) ? getMultiple()->getElementWeight() : getFixed()->getElementWeight();
+ }
+
+ /**
+ * Add occurrence information to this match data for the current
+ * document.
+ *
+ * @return this object (for chaining)
+ * @param pos low-level occurrence information
+ **/
+ TermFieldMatchData &appendPosition(const TermFieldMatchDataPosition &pos) {
+ if (isMultiPos() || (_sz > 0)) {
+ appendPositionToAllocatedVector(pos);
+ } else {
+ _sz = 1;
+ new (_data._position) TermFieldMatchDataPosition(pos);
+ }
+ return *this;
+ }
+
+ /**
+ * Obtain an object that gives access to the low-level occurrence
+ * information stored in this object.
+ *
+ * @return field position iterator
+ **/
+ FieldPositionsIterator getIterator() const {
+ const uint32_t len(getMaxElementLength());
+ return FieldPositionsIterator(len != 0 ? len : FieldPositionsIterator::UNKNOWN_LENGTH, begin(), end());
+ }
+
+ /**
+ * This indicates if this instance is actually used for ranking or not.
+ * @return true if it is not needed.
+ */
+ bool isNotNeeded() const { return _fieldId & 0x2000; }
+
+ /**
+ * Tag that this instance is not really used for ranking.
+ */
+ void tagAsNotNeeded() {
+ _fieldId = _fieldId | 0x2000;
+ }
+
+ /**
+ * Special docId value indicating that no data has been saved yet.
+ * This should match (or be above) endId() in search::queryeval::SearchIterator.
+ *
+ * @return constant
+ **/
+ static uint32_t invalidId() { return 0xdeadbeefU; }
+} __attribute__((packed));
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/termfieldmatchdataarray.h b/searchlib/src/vespa/searchlib/fef/termfieldmatchdataarray.h
new file mode 100644
index 00000000000..874870f5afa
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/termfieldmatchdataarray.h
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search {
+namespace fef {
+
+class TermFieldMatchData;
+
+/**
+ * Array of pointers to TermFieldMatchData instances.
+ * Use this class to pass an ordered set of references
+ * into e.g. iterators searching in multiple fields at once.
+ * The array must either be totally empty, or contain
+ * the appropriate number of valid references.
+ **/
+class TermFieldMatchDataArray
+{
+private:
+ std::vector<TermFieldMatchData *> _array;
+
+public:
+ /**
+ * Reserve space for a number of elements in order to reduce number of allocations.
+ * @param size Number of elements to reserve space for.
+ */
+ void reserve(size_t sz) {
+ _array.reserve(sz);
+ }
+ /**
+ * add a pointer to the array.
+ *
+ * @return this object for chaining
+ * @param value the pointer to be added
+ **/
+ TermFieldMatchDataArray &add(TermFieldMatchData *value) {
+ assert(value != 0);
+ _array.push_back(value);
+ return *this;
+ }
+
+ /**
+ * check that the array contains valid references.
+ *
+ * @return true if array not empty
+ **/
+ bool valid() const { return !_array.empty(); }
+
+ /**
+ * size of the array.
+ *
+ * @return the size
+ **/
+ size_t size() const { return _array.size(); }
+
+ /**
+ * get a pointer from the array.
+ *
+ * @return the pointer
+ * @param i index of the pointer
+ **/
+ TermFieldMatchData *operator[] (size_t i) const {
+ return _array[i];
+ }
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.cpp b/searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.cpp
new file mode 100644
index 00000000000..f76c785b616
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.cpp
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.termfieldmatchdataposition");
+#include "termfieldmatchdataposition.h"
+
+namespace search {
+namespace fef {
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.h b/searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.h
new file mode 100644
index 00000000000..b7f82819bfb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.h
@@ -0,0 +1,113 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/common/fslimits.h>
+
+namespace search {
+namespace fef {
+
+class TermFieldMatchDataPositionKey
+{
+private:
+ uint32_t _elementId;
+ uint32_t _position;
+
+public:
+ TermFieldMatchDataPositionKey()
+ : _elementId(0u),
+ _position(0u)
+ {
+ }
+
+ TermFieldMatchDataPositionKey(uint32_t elementId,
+ uint32_t position)
+ : _elementId(elementId),
+ _position(position)
+ {
+ }
+
+ uint32_t getElementId() const { return _elementId; }
+ uint32_t getPosition() const { return _position; }
+
+ void setElementId(uint32_t elementId) { _elementId = elementId; }
+ void setPosition(uint32_t position) { _position = position; }
+
+ bool operator<(const TermFieldMatchDataPositionKey &rhs) const {
+ if (_elementId != rhs._elementId) {
+ return _elementId < rhs._elementId;
+ }
+ return _position < rhs._position;
+ }
+
+ bool operator==(const TermFieldMatchDataPositionKey &rhs) const {
+ return ((_elementId == rhs._elementId) &&
+ (_position == rhs._position));
+ }
+};
+
+class TermFieldMatchDataPosition : public TermFieldMatchDataPositionKey
+{
+private:
+ int32_t _elementWeight;
+ uint32_t _elementLen;
+ uint32_t _matchLength;
+ double _matchExactness; // or possibly _matchWeight
+
+public:
+ TermFieldMatchDataPosition()
+ : TermFieldMatchDataPositionKey(),
+ _elementWeight(1),
+ _elementLen(SEARCHLIB_FEF_UNKNOWN_FIELD_LENGTH),
+ _matchLength(1),
+ _matchExactness(1.0)
+ {
+ }
+
+ const TermFieldMatchDataPositionKey &key() const {
+ return *this;
+ }
+
+ /**
+ * A comparator for sorting in natural (ascending) order but if
+ * positions are equal, sort best exactness first.
+ */
+ static bool compareWithExactness(const TermFieldMatchDataPosition &a,
+ const TermFieldMatchDataPosition &b)
+ {
+ if (a < b) return true;
+ if (b < a) return false;
+ return a._matchExactness >= b._matchExactness;
+ }
+
+ TermFieldMatchDataPosition(uint32_t elementId,
+ uint32_t position,
+ int32_t elementWeight,
+ uint32_t elementLen)
+ : TermFieldMatchDataPositionKey(elementId, position),
+ _elementWeight(elementWeight),
+ _elementLen(elementLen),
+ _matchLength(1),
+ _matchExactness(1.0)
+ {
+ }
+
+ int32_t getElementWeight() const { return _elementWeight; }
+ uint32_t getElementLen() const { return _elementLen; }
+ double getMatchExactness() const { return _matchExactness; }
+
+ void setElementWeight(int32_t elementWeight) {
+ _elementWeight = elementWeight;
+ }
+ void setElementLen(uint32_t elementLen) {
+ _elementLen = elementLen;
+ }
+ TermFieldMatchDataPosition& setMatchExactness(double exactness) {
+ _matchExactness = exactness;
+ return *this;
+ }
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp b/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp
new file mode 100644
index 00000000000..c82d9e1e030
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp
@@ -0,0 +1,77 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.termmatchdatamerger");
+#include "termmatchdatamerger.h"
+
+#include <algorithm>
+
+namespace search {
+namespace fef {
+
+TermMatchDataMerger::TermMatchDataMerger(const Inputs &allinputs,
+ const TermFieldMatchDataArray &outputs)
+ : _inputs(),
+ _output(outputs),
+ _scratch()
+{
+ for (size_t i = 0; i < _output.size(); ++i) {
+ Inputs inputs_for_i;
+ uint32_t fieldId = _output[i]->getFieldId();
+
+ for (size_t j = 0; j < allinputs.size(); ++j) {
+ if (allinputs[j].matchData->getFieldId() == fieldId) {
+ inputs_for_i.push_back(allinputs[j]);
+ }
+ }
+ _inputs.push_back(inputs_for_i);
+ }
+}
+
+void
+TermMatchDataMerger::merge(uint32_t docid)
+{
+ for (size_t i = 0; i < _output.size(); ++i) {
+ merge(docid, _inputs[i], *(_output[i]));
+ }
+}
+
+void
+TermMatchDataMerger::merge(uint32_t docid,
+ const Inputs &in,
+ TermFieldMatchData &out)
+{
+ _scratch.clear();
+ bool wasMatch = false;
+ for (size_t i = 0; i < in.size(); ++i) {
+ const TermFieldMatchData *md = in[i].matchData;
+ if (md->getDocId() == docid) {
+ for (const TermFieldMatchDataPosition &iter : *md) {
+ double exactness = in[i].exactness * iter.getMatchExactness();
+ _scratch.push_back(iter);
+ _scratch.back().setMatchExactness(exactness);
+ }
+ wasMatch = true;
+ }
+ }
+ if (wasMatch) {
+ out.reset(docid);
+ if (_scratch.size() > 0) {
+ std::sort(_scratch.begin(), _scratch.end(),
+ TermFieldMatchDataPosition::compareWithExactness);
+ TermFieldMatchDataPosition prev = _scratch[0];
+ for (size_t i = 1; i < _scratch.size(); ++i) {
+ const TermFieldMatchDataPosition &curr = _scratch[i];
+ if (prev.key() < curr.key()) {
+ out.appendPosition(prev);
+ prev = curr;
+ }
+ }
+ out.appendPosition(prev);
+ }
+ }
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.h b/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.h
new file mode 100644
index 00000000000..6c1ae717a43
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.h
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "termfieldmatchdataarray.h"
+#include "termfieldmatchdata.h"
+#include <vector>
+
+namespace search {
+namespace fef {
+
+class TermMatchDataMerger
+{
+public:
+ struct Input {
+ const TermFieldMatchData *matchData;
+ double exactness;
+
+ Input() : matchData(NULL), exactness(0.0) {}
+ Input(const TermFieldMatchData *arg_matchData, double arg_exactness)
+ : matchData(arg_matchData), exactness(arg_exactness)
+ {}
+ };
+ typedef std::vector<Input> Inputs;
+private:
+ std::vector<Inputs> _inputs;
+ const TermFieldMatchDataArray _output;
+ std::vector<TermFieldMatchDataPosition> _scratch;
+
+ TermMatchDataMerger(const TermMatchDataMerger &);
+ TermMatchDataMerger &operator=(const TermMatchDataMerger &);
+
+ void merge(uint32_t docid,
+ const Inputs &in,
+ TermFieldMatchData &out);
+public:
+
+ TermMatchDataMerger(const Inputs &allinputs,
+ const TermFieldMatchDataArray &outputs);
+
+ void merge(uint32_t docid);
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/.gitignore b/searchlib/src/vespa/searchlib/fef/test/.gitignore
new file mode 100644
index 00000000000..583460ae288
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/.gitignore
@@ -0,0 +1,3 @@
+*.So
+.depend
+Makefile
diff --git a/searchlib/src/vespa/searchlib/fef/test/CMakeLists.txt b/searchlib/src/vespa/searchlib/fef/test/CMakeLists.txt
new file mode 100644
index 00000000000..dd9ea8828c9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/CMakeLists.txt
@@ -0,0 +1,14 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_fef_test OBJECT
+ SOURCES
+ dummy_dependency_handler.cpp
+ featuretest.cpp
+ ftlib.cpp
+ indexenvironment.cpp
+ indexenvironmentbuilder.cpp
+ matchdatabuilder.cpp
+ queryenvironment.cpp
+ queryenvironmentbuilder.cpp
+ rankresult.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.cpp b/searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.cpp
new file mode 100644
index 00000000000..7515a6338e0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.cpp
@@ -0,0 +1,60 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+
+#include <vespa/fastos/fastos.h>
+#include "dummy_dependency_handler.h"
+
+namespace search {
+namespace fef {
+namespace test {
+
+DummyDependencyHandler::DummyDependencyHandler(Blueprint &blueprint_in)
+ : blueprint(blueprint_in),
+ object_type_map(),
+ accept_type_mismatch(false),
+ input(),
+ accept_input(),
+ output(),
+ output_type()
+{
+ blueprint.attach_dependency_handler(*this);
+}
+
+DummyDependencyHandler::~DummyDependencyHandler()
+{
+ blueprint.detach_dependency_handler();
+}
+
+void
+DummyDependencyHandler::define_object_input(const vespalib::string &name, const vespalib::eval::ValueType &type)
+{
+ object_type_map.emplace(name, FeatureType::object(type));
+}
+
+const FeatureType &
+DummyDependencyHandler::resolve_input(const vespalib::string &feature_name, Blueprint::AcceptInput accept_type)
+{
+ input.push_back(feature_name);
+ accept_input.push_back(accept_type);
+ auto pos = object_type_map.find(feature_name);
+ if (pos == object_type_map.end()) {
+ if (accept_type == Blueprint::AcceptInput::OBJECT) {
+ accept_type_mismatch = true;
+ }
+ return FeatureType::number();
+ }
+ if (accept_type == Blueprint::AcceptInput::NUMBER) {
+ accept_type_mismatch = true;
+ }
+ return pos->second;
+}
+
+void DummyDependencyHandler::define_output(const vespalib::string &output_name, const FeatureType &type)
+{
+ output.push_back(output_name);
+ output_type.push_back(type);
+}
+
+} // namespace search::fef::test
+} // namespace search::fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.h b/searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.h
new file mode 100644
index 00000000000..fa1a21d42ad
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.h
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vector>
+#include <vespa/searchlib/fef/feature_type.h>
+#include <vespa/vespalib/eval/value_type.h>
+#include <map>
+
+namespace search {
+namespace fef {
+namespace test {
+
+/**
+ * A very simple blueprint dependency resolver that will keep track of
+ * inputs and outputs for a single blueprint.
+ **/
+struct DummyDependencyHandler : public Blueprint::DependencyHandler
+{
+ Blueprint &blueprint;
+ std::map<vespalib::string,FeatureType> object_type_map;
+ bool accept_type_mismatch;
+ std::vector<vespalib::string> input;
+ std::vector<Blueprint::AcceptInput> accept_input;
+ std::vector<vespalib::string> output;
+ std::vector<FeatureType> output_type;
+
+ explicit DummyDependencyHandler(Blueprint &blueprint_in);
+ ~DummyDependencyHandler();
+ void define_object_input(const vespalib::string &name, const vespalib::eval::ValueType &type);
+ const FeatureType &resolve_input(const vespalib::string &feature_name, Blueprint::AcceptInput accept_type) override;
+ void define_output(const vespalib::string &output_name, const FeatureType &type) override;
+};
+
+} // namespace search::fef::test
+} // namespace search::fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/featuretest.cpp b/searchlib/src/vespa/searchlib/fef/test/featuretest.cpp
new file mode 100644
index 00000000000..dd8dc0699f5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/featuretest.cpp
@@ -0,0 +1,159 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.featuretest");
+
+#include <sstream>
+#include "featuretest.h"
+#include <vespa/searchlib/fef/utils.h>
+
+namespace search {
+namespace fef {
+namespace test {
+
+FeatureTest::FeatureTest(BlueprintFactory &factory,
+ const IndexEnvironment &indexEnv,
+ QueryEnvironment &queryEnv,
+ MatchDataLayout &layout,
+ const std::vector<vespalib::string> &features,
+ const Properties &overrides) :
+ _factory(factory),
+ _indexEnv(indexEnv),
+ _queryEnv(queryEnv),
+ _features(features),
+ _layout(layout),
+ _overrides(overrides),
+ _resolver(new BlueprintResolver(factory, indexEnv)),
+ _rankProgram(new RankProgram(_resolver)),
+ _doneSetup(false)
+{
+ // empty
+}
+
+FeatureTest::FeatureTest(BlueprintFactory &factory,
+ const IndexEnvironment &indexEnv,
+ QueryEnvironment &queryEnv,
+ MatchDataLayout &layout,
+ const vespalib::string &feature,
+ const Properties &overrides) :
+ _factory(factory),
+ _indexEnv(indexEnv),
+ _queryEnv(queryEnv),
+ _features(),
+ _layout(layout),
+ _overrides(overrides),
+ _resolver(new BlueprintResolver(factory, indexEnv)),
+ _rankProgram(new RankProgram(_resolver)),
+ _doneSetup(false)
+{
+ _features.push_back(feature);
+}
+
+bool
+FeatureTest::setup()
+{
+ if (_doneSetup) {
+ LOG(error, "Setup already done.");
+ return false;
+ }
+
+ // clear state so that setup can be called multiple times.
+ clear();
+
+ for (uint32_t i = 0; i < _features.size(); ++i) {
+ _resolver->addSeed(_features[i]);
+ }
+
+ if (!_resolver->compile()) {
+ LOG(error, "Failed to compile blueprint resolver.");
+ return false;
+ }
+
+ _rankProgram->setup(_layout, _queryEnv, _overrides);
+ _doneSetup = true;
+ return true;
+}
+
+MatchDataBuilder::UP
+FeatureTest::createMatchDataBuilder()
+{
+ if (_doneSetup) {
+ return MatchDataBuilder::UP(new MatchDataBuilder(_queryEnv, _rankProgram->match_data()));
+ }
+ LOG(warning, "Match data not initialized.");
+ return MatchDataBuilder::UP();
+}
+
+bool
+FeatureTest::execute(const RankResult &expected, uint32_t docId)
+{
+ RankResult result;
+ if (!executeOnly(result, docId)) {
+ return false;
+ }
+
+ if (!result.includes(expected)) {
+ std::stringstream exp, act;
+ exp << "Expected: " << expected;
+ act << "Actual : " << result;
+
+ LOG(error, "Expected result not present in actual result after execution:");
+ LOG(error, "%s", exp.str().c_str());
+ LOG(error, "%s", act.str().c_str());
+
+ return false;
+ }
+ return true;
+}
+
+bool
+FeatureTest::execute(feature_t expected, double epsilon, uint32_t docId)
+{
+ return execute(RankResult().setEpsilon(epsilon).addScore(_features.front(), expected), docId);
+}
+
+bool
+FeatureTest::executeOnly(uint32_t docId)
+{
+ if (!_doneSetup) {
+ LOG(error, "Setup not done.");
+ return false;
+ }
+ // Note: match data object is reset as part of run
+ _rankProgram->run(docId);
+
+ return true;
+}
+
+bool
+FeatureTest::executeOnly(RankResult & result, uint32_t docId)
+{
+ if (!executeOnly(docId)) {
+ return false;
+ }
+
+ std::map<vespalib::string, feature_t> all = Utils::getAllFeatures(*_rankProgram);
+ for (auto itr = all.begin(); itr != all.end(); ++itr) {
+ result.addScore(itr->first, itr->second);
+ }
+
+ return true;
+}
+
+const vespalib::eval::Value::CREF *
+FeatureTest::resolveObjectFeature()
+{
+ return Utils::getObjectFeature(*_rankProgram);
+}
+
+void
+FeatureTest::clear()
+{
+ _resolver = BlueprintResolver::SP(new BlueprintResolver(_factory, _indexEnv));
+ _rankProgram.reset(new RankProgram(_resolver));
+ _doneSetup = false;
+}
+
+} // namespace test
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/featuretest.h b/searchlib/src/vespa/searchlib/fef/test/featuretest.h
new file mode 100644
index 00000000000..ead33f35f88
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/featuretest.h
@@ -0,0 +1,137 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/fef/blueprintfactory.h>
+#include <vespa/searchlib/fef/blueprintresolver.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/fieldtype.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/rank_program.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#include <vespa/searchlib/fef/test/matchdatabuilder.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/fef/test/queryenvironmentbuilder.h>
+#include <vespa/searchlib/fef/test/rankresult.h>
+#include <vespa/vespalib/eval/value.h>
+
+namespace search {
+namespace fef {
+namespace test {
+
+/**
+ * This class wraps everything necessary to simulate a feature execution environment.
+ */
+class FeatureTest {
+public:
+ /**
+ * Constructs a new feature test.
+ *
+ * @param factory The blueprint factory that holds all registered features.
+ * @param indexEnv The index environment to use.
+ * @param queryEnv The query environment to use.
+ * @param layout The match data layout to use.
+ * @param feature The feature strings to run.
+ * @param overrides The set of feature overrides.
+ */
+ FeatureTest(BlueprintFactory &factory,
+ const IndexEnvironment &indexEnv,
+ QueryEnvironment &queryEnv,
+ MatchDataLayout &layout,
+ const std::vector<vespalib::string> &features,
+ const Properties &overrides);
+
+ /**
+ * Constructs a new feature test.
+ *
+ * @param factory The blueprint factory that holds all registered features.
+ * @param indexEnv The index environment to use.
+ * @param queryEnv The query environment to use.
+ * @param layout The match data layout to use.
+ * @param feature The feature string to run.
+ * @param overrides The set of feature overrides.
+ */
+ FeatureTest(BlueprintFactory &factory,
+ const IndexEnvironment &indexEnv,
+ QueryEnvironment &queryEnv,
+ MatchDataLayout &layout,
+ const vespalib::string &feature,
+ const Properties &overrides);
+ /**
+ * Necessary method to setup the internal feature execution manager. A test will typically assert on the return of
+ * this method, since no test can run if setup failed.
+ *
+ * @return Whether or not setup was ok.
+ */
+ bool setup();
+
+ /**
+ * Creates and returns a match data builder object. This will clear whatever content is currently contained in this
+ * runner. The builder offers a simple API to build a match data object.
+ *
+ * @return A builder object.
+ */
+ MatchDataBuilder::UP createMatchDataBuilder();
+
+ /**
+ * Executes the content of this runner, comparing the result to the given result set.
+ *
+ * @param expected The expected output.
+ * @param docId The document id to set on the match data object before running executors.
+ * @return Whether or not the output matched the expected.
+ */
+ bool execute(const RankResult &expected, uint32_t docId = 1);
+
+ /**
+ * Convenience method to assert the final output of a feature string.
+ *
+ * @param expected The expected output.
+ * @param epsilon The allowed slack for comparing rank results.
+ * @param docId The document id to set on the match data object before running executors.
+ * @return Whether or not the output matched the expected.
+ */
+ bool execute(feature_t expected, double epsilon = 0, uint32_t docId = 1);
+
+ /**
+ * Executes the content of this runner only.
+ *
+ * @param docId The document id to set on the match data object before running executors.
+ * @return Whether the executors were executed.
+ */
+ bool executeOnly(uint32_t docId = 1);
+
+ /**
+ * Executes the content of this runner only and stores the result in the given rank result.
+ *
+ * @param result The rank result to store the rank scores.
+ * @param docId The document id to set on the match data object before running executors.
+ * @return Whether the executors were executed.
+ */
+ bool executeOnly(RankResult & result, uint32_t docId = 1);
+
+ /**
+ * Resolve the only object feature that is present in the match data of the underlying
+ * rank program.
+ */
+ const vespalib::eval::Value::CREF *resolveObjectFeature();
+
+private:
+ BlueprintFactory &_factory;
+ const IndexEnvironment &_indexEnv;
+ QueryEnvironment &_queryEnv;
+ std::vector<vespalib::string> _features;
+ MatchDataLayout &_layout;
+ const Properties &_overrides;
+ BlueprintResolver::SP _resolver;
+ RankProgram::UP _rankProgram;
+ bool _doneSetup;
+
+ void clear();
+};
+
+} // namespace test
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/ftlib.cpp b/searchlib/src/vespa/searchlib/fef/test/ftlib.cpp
new file mode 100644
index 00000000000..3fd85d04241
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/ftlib.cpp
@@ -0,0 +1,399 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".ftlib");
+
+#include <math.h>
+#include <boost/tokenizer.hpp>
+#include <vespa/searchlib/features/utils.h>
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include "ftlib.h"
+#include "dummy_dependency_handler.h"
+
+using namespace search::features;
+using namespace search::fef;
+using namespace search::fef::test;
+
+FtIndexEnvironment::FtIndexEnvironment() :
+ search::fef::test::IndexEnvironment(),
+ _builder(*this)
+{
+ // empty
+}
+
+FtQueryEnvironment::FtQueryEnvironment(search::fef::test::IndexEnvironment &env)
+ : search::fef::test::QueryEnvironment(&env),
+ _layout(),
+ _builder(*this, _layout)
+{
+ // empty
+}
+
+FtDumpFeatureVisitor::FtDumpFeatureVisitor() :
+ _features()
+{
+}
+
+FtFeatureTest::FtFeatureTest(search::fef::BlueprintFactory &factory, const vespalib::string &feature) :
+ _indexEnv(),
+ _queryEnv(_indexEnv),
+ _overrides(),
+ _test(factory, _indexEnv, _queryEnv, _queryEnv.getLayout(), feature, _overrides)
+{
+ // empty
+}
+
+FtFeatureTest::FtFeatureTest(search::fef::BlueprintFactory &factory, const std::vector<vespalib::string> &features)
+ : _indexEnv(),
+ _queryEnv(_indexEnv),
+ _overrides(),
+ _test(factory, _indexEnv, _queryEnv, _queryEnv.getLayout(), features, _overrides)
+{
+}
+
+
+//---------------------------------------------------------------------------------------------------------------------
+// FtUtil
+//---------------------------------------------------------------------------------------------------------------------
+std::vector<vespalib::string>
+FtUtil::tokenize(const vespalib::string & str, const vespalib::string & separator)
+{
+ typedef boost::tokenizer<boost::char_separator<char> > Tokenizer;
+ typedef boost::char_separator<char> Separator;
+
+ std::vector<vespalib::string> retval;
+ if (separator != vespalib::string("")) {
+ Tokenizer tnz(str, Separator(separator.c_str()));
+ for (Tokenizer::const_iterator itr = tnz.begin(); itr != tnz.end(); ++itr) {
+ retval.push_back(*itr);
+ }
+ } else {
+ for (uint32_t i = 0; i < str.size(); ++i) {
+ retval.push_back(vespalib::string("" + str[i]));
+ }
+ }
+ return retval;
+}
+
+
+FtQuery
+FtUtil::toQuery(const vespalib::string & query, const vespalib::string & separator)
+{
+ std::vector<vespalib::string> prepQuery = FtUtil::tokenize(query, separator);
+ FtQuery retval(prepQuery.size());
+ for (uint32_t i = 0; i < prepQuery.size(); ++i) {
+ std::vector<vespalib::string> significanceSplit = FtUtil::tokenize(prepQuery[i], vespalib::string("%"));
+ std::vector<vespalib::string> weightSplit = FtUtil::tokenize(significanceSplit[0], vespalib::string("!"));
+ std::vector<vespalib::string> connexitySplit = FtUtil::tokenize(weightSplit[0], vespalib::string(":"));
+ if (connexitySplit.size() > 1) {
+ retval[i].term = connexitySplit[1];
+ retval[i].connexity = search::features::util::strToNum<feature_t>(connexitySplit[0]);
+ } else {
+ retval[i].term = connexitySplit[0];
+ }
+ if (significanceSplit.size() > 1) {
+ retval[i].significance = search::features::util::strToNum<feature_t>(significanceSplit[1]);
+ }
+ if (weightSplit.size() > 1) {
+ retval[i].termWeight.setPercent(search::features::util::strToNum<uint32_t>(weightSplit[1]));
+ }
+ }
+ return retval;
+}
+
+RankResult
+FtUtil::toRankResult(const vespalib::string & baseName, const vespalib::string & result, const vespalib::string & separator)
+{
+ RankResult retval;
+ std::vector<vespalib::string> prepResult = FtUtil::tokenize(result, separator);
+ for (uint32_t i = 0; i < prepResult.size(); ++i) {
+ std::vector<vespalib::string> rs = FtUtil::tokenize(prepResult[i], ":");
+ vespalib::string name = rs[0];
+ vespalib::string value = rs[1];
+ retval.addScore(baseName + "." + name, search::features::util::strToNum<feature_t>(value));
+ }
+ return retval;
+}
+
+
+//---------------------------------------------------------------------------------------------------------------------
+// FtTestApp
+//---------------------------------------------------------------------------------------------------------------------
+void
+FtTestApp::FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const StringList &params)
+{
+ search::fef::test::IndexEnvironment ie;
+ FT_SETUP_FAIL(prototype, ie, params);
+}
+
+void
+FtTestApp::FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env,
+ const StringList &params)
+{
+ FT_LOG(prototype, env, params);
+ search::fef::Blueprint::UP bp = prototype.createInstance();
+ DummyDependencyHandler deps(*bp);
+ EXPECT_TRUE(!bp->setup(env, params));
+}
+
+void
+FtTestApp::FT_SETUP_OK(const search::fef::Blueprint &prototype, const StringList &params,
+ const StringList &expectedIn, const StringList &expectedOut)
+{
+ search::fef::test::IndexEnvironment ie;
+ FT_SETUP_OK(prototype, ie, params, expectedIn, expectedOut);
+}
+
+void
+FtTestApp::FT_SETUP_OK(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env,
+ const StringList &params, const StringList &expectedIn, const StringList &expectedOut)
+{
+ FT_LOG(prototype, env, params);
+ search::fef::Blueprint::UP bp = prototype.createInstance();
+ DummyDependencyHandler deps(*bp);
+ ASSERT_TRUE(bp->setup(env, params));
+ FT_EQUAL(expectedIn, deps.input, "In, ");
+ FT_EQUAL(expectedOut, deps.output, "Out,");
+}
+
+void
+FtTestApp::FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName)
+{
+ StringList empty;
+ FT_DUMP(factory, baseName, empty);
+}
+
+void
+FtTestApp::FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
+ search::fef::test::IndexEnvironment &env)
+{
+ StringList empty;
+ FT_DUMP(factory, baseName, env, empty);
+}
+
+void
+FtTestApp::FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
+ const StringList &expected)
+{
+ search::fef::test::IndexEnvironment ie;
+ FT_DUMP(factory, baseName, ie, expected);
+}
+
+void
+FtTestApp::FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
+ search::fef::test::IndexEnvironment &env,
+ const StringList &expected)
+{
+ FtDumpFeatureVisitor dfv;
+ search::fef::Blueprint::SP bp = factory.createBlueprint(baseName);
+ if (bp.get() == NULL) {
+ LOG(error, "Blueprint '%s' does not exist in factory, did you forget to add it?", baseName.c_str());
+ ASSERT_TRUE(bp.get() != NULL);
+ }
+ bp->visitDumpFeatures(env, dfv);
+ FT_EQUAL(expected, dfv.features(), "Dump");
+}
+
+void
+FtTestApp::FT_EQUAL(const std::vector<string> &expected, const std::vector<string> &actual,
+ const vespalib::string prefix)
+{
+ FT_LOG(prefix + " expected", expected);
+ FT_LOG(prefix + " actual ", actual);
+ EXPECT_EQUAL(expected.size(), actual.size());
+ ASSERT_TRUE(expected.size() == actual.size());
+ for (uint32_t i = 0; i < expected.size(); ++i) {
+ EXPECT_EQUAL(expected[i], actual[i]);
+ ASSERT_TRUE(expected[i] == actual[i]);
+ }
+}
+
+void
+FtTestApp::FT_LOG(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env,
+ const StringList &params)
+{
+ LOG(info, "Testing blueprint '%s'.", prototype.getBaseName().c_str());
+ std::vector<vespalib::string> arr;
+ for (std::vector<search::fef::FieldInfo>::const_iterator it = env.getFields().begin();
+ it != env.getFields().end(); ++it) {
+ arr.push_back(it->name());
+ }
+ FT_LOG("Environment ", arr);
+ FT_LOG("Parameters ", params);
+}
+
+void
+FtTestApp::FT_LOG(const vespalib::string &prefix, const std::vector<vespalib::string> &arr)
+{
+ vespalib::string str = prefix + " = [ ";
+ for (uint32_t i = 0; i < arr.size(); ++i) {
+ str.append("'").append(arr[i]).append("'");
+ if (i < arr.size() - 1) {
+ str.append(", ");
+ }
+ }
+ str.append(" ]");
+ LOG(info, "%s", str.c_str());
+}
+
+void
+FtTestApp::FT_SETUP(FtFeatureTest &test, const vespalib::string &query, const StringMap &index,
+ uint32_t docId)
+{
+ LOG(info, "Setup test for query '%s'.", query.c_str());
+
+ // Add all query terms.
+ FtQueryEnvironment &queryEnv = test.getQueryEnv();
+ for (uint32_t i = 0; i < query.size(); ++i) {
+ queryEnv.getBuilder().addAllFields();
+ }
+ ASSERT_TRUE(test.setup());
+
+ // Add all occurences.
+ search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder();
+ for (StringMap::const_iterator it = index.begin();
+ it != index.end(); ++it) {
+ ASSERT_TRUE(mdb->setFieldLength(it->first, it->second.size()));
+ for (uint32_t i = 0; i < it->second.size(); ++i) {
+ size_t pos = query.find_first_of(it->second[i]);
+ if (pos != vespalib::string::npos) {
+ LOG(debug, "Occurence of '%c' added to field '%s' at position %d.", query[pos], it->first.c_str(), i);
+ ASSERT_TRUE(mdb->addOccurence(it->first, pos, i));
+ }
+ }
+ }
+ ASSERT_TRUE(mdb->apply(docId));
+}
+
+void
+FtTestApp::FT_SETUP(FtFeatureTest & test, const std::vector<FtQueryTerm> & query, const StringVectorMap & index,
+ uint32_t docId)
+{
+ setupQueryEnv(test.getQueryEnv(), query);
+ ASSERT_TRUE(test.setup());
+
+ search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder();
+
+ // Add all occurences.
+ for (StringVectorMap::const_iterator itr = index.begin(); itr != index.end(); ++itr) {
+ ASSERT_TRUE(mdb->setFieldLength(itr->first, itr->second.size()));
+ for (uint32_t i = 0; i < itr->second.size(); ++i) {
+ FtQuery::const_iterator fitr = query.begin();
+ for (;;) {
+ fitr = std::find(fitr, query.end(), FtQueryTerm(itr->second[i]));
+ if (fitr != query.end()) {
+ uint32_t termId = fitr - query.begin();
+ LOG(debug, "Occurence of '%s' added to field '%s' at position %u.", fitr->term.c_str(), itr->first.c_str(), i);
+ ASSERT_TRUE(mdb->addOccurence(itr->first, termId, i));
+ ++fitr;
+ } else {
+ break;
+ }
+ }
+ }
+ }
+ ASSERT_TRUE(mdb->apply(docId));
+}
+
+void
+FtTestApp::FT_SETUP(FtFeatureTest &test, const FtQuery &query, const FtIndex &index, uint32_t docId)
+{
+ setupQueryEnv(test.getQueryEnv(), query);
+ ASSERT_TRUE(test.setup());
+ search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder();
+
+ // Add all occurences.
+ for (FtIndex::FieldMap::const_iterator itr = index.index.begin(); itr != index.index.end(); ++itr) {
+ const FtIndex::Field &field = itr->second;
+ for (size_t e = 0; e < field.size(); ++e) {
+ const FtIndex::Element &element = field[e];
+ ASSERT_TRUE(mdb->addElement(itr->first, element.weight, element.tokens.size()));
+ for (size_t t = 0; t < element.tokens.size(); ++t) {
+ const vespalib::string &token = element.tokens[t];
+ for (size_t q = 0; q < query.size(); ++q) {
+ if (query[q].term == token) {
+ ASSERT_TRUE(mdb->addOccurence(itr->first, q, t, e));
+ }
+ }
+ }
+ }
+ }
+ ASSERT_TRUE(mdb->apply(docId));
+}
+
+void
+FtTestApp::setupQueryEnv(FtQueryEnvironment & queryEnv, const FtQuery & query)
+{
+ // Add all query terms.
+ for (uint32_t i = 0; i < query.size(); ++i) {
+ queryEnv.getBuilder().addAllFields();
+ queryEnv.getTerms()[i].setPhraseLength(1);
+ queryEnv.getTerms()[i].setUniqueId(i);
+ queryEnv.getTerms()[i].setWeight(query[i].termWeight);
+ if (i > 0) {
+ vespalib::string from = vespalib::make_string("vespa.term.%u.connexity", i);
+ vespalib::string to = vespalib::make_string("%u", i - 1);
+ vespalib::string connexity = vespalib::make_string("%f", query[i].connexity);
+ queryEnv.getProperties().add(from, to);
+ queryEnv.getProperties().add(from, connexity);
+ }
+ vespalib::string term = vespalib::make_string("vespa.term.%u.significance", i);
+ vespalib::string significance = vespalib::make_string("%f", query[i].significance);
+ queryEnv.getProperties().add(term, significance);
+ LOG(debug, "Add term node: '%s'", query[i].term.c_str());
+ }
+}
+
+void
+FtTestApp::setupFieldMatch(FtFeatureTest & ft, const vespalib::string & indexName,
+ const vespalib::string & query, const vespalib::string & field,
+ const fieldmatch::Params * params, uint32_t totalTermWeight, feature_t totalSignificance,
+ uint32_t docId)
+{
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, indexName);
+
+ if (params != NULL) {
+ Properties & p = ft.getIndexEnv().getProperties();
+ p.add("fieldMatch(" + indexName + ").proximityLimit", vespalib::make_string("%u", params->getProximityLimit()));
+ p.add("fieldMatch(" + indexName + ").maxAlternativeSegmentations", vespalib::make_string("%u", params->getMaxAlternativeSegmentations()));
+ p.add("fieldMatch(" + indexName + ").maxOccurrences", vespalib::make_string("%u", params->getMaxOccurrences()));
+ p.add("fieldMatch(" + indexName + ").proximityCompletenessImportance", vespalib::make_string("%f", params->getProximityCompletenessImportance()));
+ p.add("fieldMatch(" + indexName + ").relatednessImportance", vespalib::make_string("%f", params->getRelatednessImportance()));
+ p.add("fieldMatch(" + indexName + ").earlinessImportance", vespalib::make_string("%f", params->getEarlinessImportance()));
+ p.add("fieldMatch(" + indexName + ").segmentProximityImportance", vespalib::make_string("%f", params->getSegmentProximityImportance()));
+ p.add("fieldMatch(" + indexName + ").occurrenceImportance", vespalib::make_string("%f", params->getOccurrenceImportance()));
+ p.add("fieldMatch(" + indexName + ").fieldCompletenessImportance", vespalib::make_string("%f", params->getFieldCompletenessImportance()));
+ for (std::vector<feature_t>::const_iterator it = params->getProximityTable().begin();
+ it != params->getProximityTable().end(); ++it)
+ {
+ p.add("fieldMatch(" + indexName + ").proximityTable", vespalib::make_string("%f", *it));
+ }
+ }
+
+ if (totalTermWeight > 0) {
+ ft.getQueryEnv().getProperties().add("fieldMatch(" + indexName + ").totalTermWeight",
+ vespalib::make_string("%u", totalTermWeight));
+ }
+
+ if (totalSignificance > 0.0f) {
+ ft.getQueryEnv().getProperties().add("fieldMatch(" + indexName + ").totalTermSignificance",
+ vespalib::make_string("%f", totalSignificance));
+ }
+
+ std::map<vespalib::string, std::vector<vespalib::string> > index;
+ index[indexName] = FtUtil::tokenize(field);
+ FT_SETUP(ft, FtUtil::toQuery(query), index, docId);
+}
+
+
+RankResult
+FtTestApp::toRankResult(const vespalib::string & baseName,
+ const vespalib::string & result,
+ const vespalib::string & separator)
+{
+ return FtUtil::toRankResult(baseName, result, separator);
+}
+
+
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/ftlib.h b/searchlib/src/vespa/searchlib/fef/test/ftlib.h
new file mode 100644
index 00000000000..dff9764b03b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/ftlib.h
@@ -0,0 +1,238 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/query/weight.h>
+#include <vespa/searchlib/features/fieldmatch/params.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/fef/test/featuretest.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#include <vespa/searchlib/fef/test/matchdatabuilder.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+#include <vespa/searchlib/fef/test/queryenvironmentbuilder.h>
+#include <vespa/searchlib/fef/test/rankresult.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/common/feature.h>
+
+using search::feature_t;
+
+//---------------------------------------------------------------------------------------------------------------------
+// StringList
+//---------------------------------------------------------------------------------------------------------------------
+class StringList : public std::vector<vespalib::string> {
+public:
+ StringList &add(const vespalib::stringref &str) { push_back(str); return *this; }
+ StringList &clear() { std::vector<vespalib::string>::clear(); return *this; }
+};
+
+//---------------------------------------------------------------------------------------------------------------------
+// StringMap
+//---------------------------------------------------------------------------------------------------------------------
+class StringMap : public std::map<vespalib::string, vespalib::string> {
+public:
+ StringMap &add(const vespalib::string &key, const vespalib::string &val) {
+ iterator it = insert(std::make_pair(key, val)).first;
+ it->second = val;
+ return *this;
+ }
+ StringMap &clear() {
+ std::map<vespalib::string, vespalib::string>::clear();
+ return *this;
+ }
+};
+
+//---------------------------------------------------------------------------------------------------------------------
+// StringSet
+//---------------------------------------------------------------------------------------------------------------------
+class StringSet : public std::set<vespalib::string> {
+public:
+ StringSet & add(const vespalib::string & str) { insert(str); return *this; }
+ StringSet & clear() { std::set<vespalib::string>::clear(); return *this; }
+};
+
+
+//---------------------------------------------------------------------------------------------------------------------
+// FtIndexEnvironment
+//---------------------------------------------------------------------------------------------------------------------
+class FtIndexEnvironment : public search::fef::test::IndexEnvironment {
+public:
+ FtIndexEnvironment();
+
+ search::fef::test::IndexEnvironmentBuilder &getBuilder() { return _builder; }
+
+private:
+ search::fef::test::IndexEnvironmentBuilder _builder;
+};
+
+//---------------------------------------------------------------------------------------------------------------------
+// FtQueryEnvironment
+//---------------------------------------------------------------------------------------------------------------------
+class FtQueryEnvironment : public search::fef::test::QueryEnvironment {
+public:
+ FtQueryEnvironment(search::fef::test::IndexEnvironment &indexEnv);
+
+ search::fef::test::QueryEnvironmentBuilder &getBuilder() { return _builder; }
+ search::fef::MatchDataLayout &getLayout() { return _layout; }
+
+private:
+ search::fef::MatchDataLayout _layout;
+ search::fef::test::QueryEnvironmentBuilder _builder;
+};
+
+//---------------------------------------------------------------------------------------------------------------------
+// FtDumpFeatureVisitor
+//---------------------------------------------------------------------------------------------------------------------
+class FtDumpFeatureVisitor : public search::fef::IDumpFeatureVisitor
+{
+private:
+ std::vector<vespalib::string> _features;
+
+public:
+ FtDumpFeatureVisitor();
+ virtual void visitDumpFeature(const vespalib::string & name) { _features.push_back(name); }
+ const std::vector<vespalib::string> & features() const { return _features; }
+};
+
+//---------------------------------------------------------------------------------------------------------------------
+// FtTestRunner
+//---------------------------------------------------------------------------------------------------------------------
+class FtFeatureTest {
+public:
+ FtFeatureTest(search::fef::BlueprintFactory &factory, const vespalib::string &feature);
+ FtFeatureTest(search::fef::BlueprintFactory &factory, const std::vector<vespalib::string> &features);
+
+ bool setup() { return _test.setup(); }
+ bool execute(feature_t expected, double epsilon = 0, uint32_t docId = 1) { return _test.execute(expected, epsilon, docId); }
+ bool execute(const search::fef::test::RankResult &expected, uint32_t docId = 1) { return _test.execute(expected, docId); }
+ bool executeOnly(uint32_t docId = 1) { return _test.executeOnly(docId); }
+ bool executeOnly(search::fef::test::RankResult &result, uint32_t docId = 1) { return _test.executeOnly(result, docId); }
+ search::fef::test::MatchDataBuilder::UP createMatchDataBuilder() { return _test.createMatchDataBuilder(); }
+ const vespalib::eval::Value::CREF *resolveObjectFeature() { return _test.resolveObjectFeature(); }
+
+ FtIndexEnvironment &getIndexEnv() { return _indexEnv; }
+ FtQueryEnvironment &getQueryEnv() { return _queryEnv; }
+ search::fef::Properties &getOverrides() { return _overrides; }
+
+private:
+ FtIndexEnvironment _indexEnv;
+ FtQueryEnvironment _queryEnv;
+ search::fef::Properties _overrides;
+ search::fef::test::FeatureTest _test;
+};
+
+//---------------------------------------------------------------------------------------------------------------------
+// FtQueryTerm
+//---------------------------------------------------------------------------------------------------------------------
+struct FtQueryTerm {
+ FtQueryTerm(const vespalib::string t, uint32_t tw = 100, feature_t co = 0.1f, feature_t si = 0.1f) :
+ term(t), termWeight(tw), connexity(co), significance(si) {}
+ FtQueryTerm() : term(), termWeight(100), connexity(0.1f), significance(0.1f) {}
+ vespalib::string term;
+ search::query::Weight termWeight;
+ feature_t connexity;
+ feature_t significance;
+ bool operator<(const FtQueryTerm & rhs) const {
+ return term < rhs.term;
+ }
+ bool operator==(const FtQueryTerm & rhs) const {
+ return term == rhs.term;
+ }
+};
+
+typedef std::vector<FtQueryTerm> FtQuery;
+typedef std::map<vespalib::string, std::vector<vespalib::string> > StringVectorMap;
+
+//---------------------------------------------------------------------------------------------------------------------
+// FtUtil
+//---------------------------------------------------------------------------------------------------------------------
+class FtUtil {
+public:
+ static std::vector<vespalib::string> tokenize(const vespalib::string & str, const vespalib::string & separator = " ");
+ static FtQuery toQuery(const vespalib::string & query, const vespalib::string & separator = " ");
+ static search::fef::test::RankResult toRankResult(const vespalib::string & baseName,
+ const vespalib::string & result,
+ const vespalib::string & separator = " ");
+};
+
+//---------------------------------------------------------------------------------------------------------------------
+// FtIndex
+//---------------------------------------------------------------------------------------------------------------------
+struct FtIndex {
+ struct Element {
+ typedef std::vector<vespalib::string> Tokens;
+ int32_t weight;
+ Tokens tokens;
+ Element(int32_t w, const Tokens &t)
+ : weight(w), tokens(t) {}
+ };
+ typedef std::vector<Element> Field;
+ typedef std::map<vespalib::string, Field> FieldMap;
+ FieldMap index; // raw content of all fields
+ vespalib::string cursor; // last referenced field
+ FtIndex() : index(), cursor() {}
+ FtIndex &field(const vespalib::string &name) {
+ cursor = name;
+ index[name];
+ return *this;
+ }
+ FtIndex &element(const vespalib::string &content, int32_t weight = 1) {
+ assert(!cursor.empty());
+ index[cursor].push_back(Element(weight, FtUtil::tokenize(content, " ")));
+ return *this;
+ }
+};
+
+//---------------------------------------------------------------------------------------------------------------------
+// FtTestApp
+//---------------------------------------------------------------------------------------------------------------------
+struct FtTestApp : public vespalib::TestApp {
+ typedef vespalib::string string;
+ static void FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const StringList &params);
+ static void FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env,
+ const StringList &params);
+ static void FT_SETUP_OK(const search::fef::Blueprint &prototype, const StringList &params,
+ const StringList &expectedIn, const StringList &expectedOut);
+ static void FT_SETUP_OK(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env,
+ const StringList &params, const StringList &expectedIn, const StringList &expectedOut);
+
+ static void FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName);
+ static void FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
+ search::fef::test::IndexEnvironment &env);
+ static void FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
+ const StringList &expected);
+ static void FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
+ search::fef::test::IndexEnvironment &env,
+ const StringList &expected);
+
+ static void FT_EQUAL(const std::vector<string> &expected, const std::vector<string> &actual,
+ const vespalib::string prefix = "");
+
+ static void FT_LOG(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, const StringList &params);
+ static void FT_LOG(const vespalib::string &prefix, const std::vector<vespalib::string> &arr);
+
+
+ static void FT_SETUP(FtFeatureTest & test, const vespalib::string & query, const StringMap & index, uint32_t docId);
+ static void FT_SETUP(FtFeatureTest & test, const FtQuery & query, const StringVectorMap & index, uint32_t docId);
+
+ static void FT_SETUP(FtFeatureTest &test, const FtQuery &query, const FtIndex &index, uint32_t docId);
+
+ static void setupQueryEnv(FtQueryEnvironment & queryEnv, const FtQuery & query);
+ static void setupFieldMatch(FtFeatureTest & test, const vespalib::string & indexName,
+ const vespalib::string & query, const vespalib::string & field,
+ const search::features::fieldmatch::Params * params,
+ uint32_t totalTermWeight, feature_t totalSignificance,
+ uint32_t docId);
+
+ static search::fef::test::RankResult toRankResult(const vespalib::string & baseName,
+ const vespalib::string & result,
+ const vespalib::string & separator = " ");
+
+ template <typename T>
+ static bool assertCreateInstance(const T & prototype, const vespalib::string & baseName) {
+ search::fef::Blueprint::UP bp = prototype.createInstance();
+ if (!EXPECT_TRUE(dynamic_cast<T*>(bp.get()) != NULL)) return false;
+ if (!EXPECT_EQUAL(bp->getBaseName(), baseName)) return false;
+ return true;
+ }
+};
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp
new file mode 100644
index 00000000000..fa2e2102311
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.indexenvironment");
+
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include "indexenvironment.h"
+
+namespace search {
+namespace fef {
+namespace test {
+
+IndexEnvironment::IndexEnvironment() :
+ _properties(),
+ _fields(),
+ _attrMan(),
+ _tableMan()
+{
+}
+
+const FieldInfo *
+IndexEnvironment::getField(uint32_t id) const
+{
+ return id < _fields.size() ? &_fields[id] : NULL;
+}
+
+const FieldInfo *
+IndexEnvironment::getFieldByName(const string &name) const
+{
+ for (std::vector<FieldInfo>::const_iterator it = _fields.begin();
+ it != _fields.end(); ++it) {
+ if (it->name() == name) {
+ return &(*it);
+ }
+ }
+ return NULL;
+}
+
+} // namespace test
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h
new file mode 100644
index 00000000000..aeb669be158
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h
@@ -0,0 +1,83 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/attribute/attributemanager.h>
+#include <vespa/searchlib/fef/iindexenvironment.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/tablemanager.h>
+#include <string>
+#include <vector>
+
+namespace search {
+namespace fef {
+namespace test {
+
+/**
+ * Implementation of the IIndexEnvironment interface used for testing.
+ */
+class IndexEnvironment : public IIndexEnvironment
+{
+public:
+ /**
+ * Constructs a new index environment.
+ */
+ IndexEnvironment();
+
+ // Inherit doc from IIndexEnvironment.
+ virtual const Properties &getProperties() const { return _properties; }
+
+ // Inherit doc from IIndexEnvironment.
+ virtual uint32_t getNumFields() const { return _fields.size(); }
+
+ // Inherit doc from IIndexEnvironment.
+ virtual const FieldInfo *getField(uint32_t id) const;
+
+ // Inherit doc from IIndexEnvironment.
+ virtual const FieldInfo *getFieldByName(const string &name) const;
+
+ // Inherit doc from IIndexEnvironment.
+ virtual const ITableManager &getTableManager() const { return _tableMan; }
+
+ // Inherit doc from IIndexEnvironment.
+ virtual FeatureMotivation getFeatureMotivation() const override { return UNKNOWN; }
+
+ // Inherit doc from IIndexEnvironment.
+ virtual void hintFeatureMotivation(FeatureMotivation) const {}
+
+ // Inherit doc from IIndexEnvironment.
+ virtual void hintFieldAccess(uint32_t) const {}
+
+ // Inherit doc from IIndexEnvironment.
+ virtual void hintAttributeAccess(const string &) const {}
+
+ /** Returns a reference to the properties map of this. */
+ Properties &getProperties() { return _properties; }
+
+ /** Returns a reference to the list of fields of this. */
+ std::vector<FieldInfo> &getFields() { return _fields; }
+
+ /** Returns a const reference to the list of fields of this. */
+ const std::vector<FieldInfo> &getFields() const { return _fields; }
+
+ /** Returns a reference to the attribute manager of this. */
+ AttributeManager &getAttributeManager() { return _attrMan; }
+
+ /** Returns a reference to the table manager of this. */
+ TableManager &getTableManager() { return _tableMan; }
+
+private:
+ IndexEnvironment(const IndexEnvironment &); // hide
+ IndexEnvironment & operator=(const IndexEnvironment &); // hide
+
+private:
+ Properties _properties;
+ std::vector<FieldInfo> _fields;
+ AttributeManager _attrMan;
+ TableManager _tableMan;
+};
+
+} // namespace test
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.cpp b/searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.cpp
new file mode 100644
index 00000000000..4682dbfe00f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.cpp
@@ -0,0 +1,28 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "indexenvironmentbuilder.h"
+
+namespace search {
+namespace fef {
+namespace test {
+
+IndexEnvironmentBuilder::IndexEnvironmentBuilder(IndexEnvironment &env) :
+ _env(env)
+{
+ // empty
+}
+
+IndexEnvironmentBuilder &
+IndexEnvironmentBuilder::addField(const FieldType &type,
+ const CollectionType &coll,
+ const vespalib::string &name)
+{
+ uint32_t idx = _env.getFields().size();
+ FieldInfo field(type, coll, name, idx);
+ _env.getFields().push_back(field);
+ return *this;
+}
+
+} // namespace test
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.h b/searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.h
new file mode 100644
index 00000000000..15640eb7bfe
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.h
@@ -0,0 +1,50 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "indexenvironment.h"
+
+namespace search {
+namespace fef {
+namespace test {
+
+/**
+ * This class is used to setup an IndexEnvironment for testing.
+ */
+class IndexEnvironmentBuilder {
+public:
+ /**
+ * Constructs a new index environment builder.
+ *
+ * @param env The index environment to build to.
+ */
+ IndexEnvironmentBuilder(IndexEnvironment &env);
+
+ /**
+ * Add a field to the index environment. This is analogous to adding fields to a document.
+ *
+ * @param type The type of field to add.
+ * @param coll collection type
+ * @param name The name of the field.
+ */
+ IndexEnvironmentBuilder &addField(const FieldType &type,
+ const CollectionType &coll,
+ const vespalib::string &name);
+
+ /** Returns a reference to the index environment of this. */
+ IndexEnvironment &getIndexEnv() { return _env; }
+
+ /** Returns a const reference to the index environment of this. */
+ const IndexEnvironment &getIndexEnv() const { return _env; }
+
+private:
+ IndexEnvironmentBuilder(const IndexEnvironmentBuilder &); // hide
+ IndexEnvironmentBuilder & operator=(const IndexEnvironmentBuilder &); // hide
+
+private:
+ IndexEnvironment &_env;
+};
+
+} // namespace test
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp
new file mode 100644
index 00000000000..2324198d302
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp
@@ -0,0 +1,184 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.matchdatabuilder");
+
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributemanager.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+#include "matchdatabuilder.h"
+
+namespace search {
+namespace fef {
+namespace test {
+
+MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data) :
+ _queryEnv(queryEnv),
+ _data(data),
+ _index(),
+ _match()
+{
+ // reset all match data objects and set docId to 'endId' (aka -1)
+ for (TermFieldHandle handle = 0; handle < _data.getNumTermFields(); ++handle) {
+ _data.resolveTermField(handle)->reset(TermFieldMatchData::invalidId());
+ }
+ _data.setDocId(TermFieldMatchData::invalidId());
+}
+
+TermFieldMatchData *
+MatchDataBuilder::getTermFieldMatchData(uint32_t termId, uint32_t fieldId)
+{
+ const ITermData *term = _queryEnv.getTerm(termId);
+ if (term == NULL) {
+ return NULL;
+ }
+ const ITermFieldData *field = term->lookupField(fieldId);
+ if (field == NULL || field->getHandle() >= _data.getNumTermFields()) {
+ return NULL;
+ }
+ return _data.resolveTermField(field->getHandle());
+}
+
+
+bool
+MatchDataBuilder::setFieldLength(const vespalib::string &fieldName, uint32_t length)
+{
+ const FieldInfo *info = _queryEnv.getIndexEnv()->getFieldByName(fieldName);
+ if (info == NULL) {
+ LOG(error, "Field '%s' does not exist.", fieldName.c_str());
+ return false;
+ }
+ _index[info->id()].fieldLength = length;
+ return true;
+}
+
+bool
+MatchDataBuilder::addElement(const vespalib::string &fieldName, int32_t weight, uint32_t length)
+{
+ const FieldInfo *info = _queryEnv.getIndexEnv()->getFieldByName(fieldName);
+ if (info == NULL) {
+ LOG(error, "Field '%s' does not exist.", fieldName.c_str());
+ return false;
+ }
+ _index[info->id()].elements.push_back(MyElement(weight, length));
+ return true;
+}
+
+bool
+MatchDataBuilder::addOccurence(const vespalib::string &fieldName, uint32_t termId, uint32_t pos, uint32_t element)
+{
+ const FieldInfo *info = _queryEnv.getIndexEnv()->getFieldByName(fieldName);
+ if (info == NULL) {
+ LOG(error, "Field '%s' does not exist.", fieldName.c_str());
+ return false;
+ }
+ if (termId >= _queryEnv.getNumTerms()) {
+ LOG(error, "Term id '%u' is invalid.", termId);
+ return false;
+ }
+ const ITermFieldData *tfd = _queryEnv.getTerm(termId)->lookupField(info->id());
+ if (tfd == NULL) {
+ LOG(error, "Field '%s' is not searched by the given term.",
+ fieldName.c_str());
+ return false;
+ }
+ _match[termId][info->id()].insert(Position(pos, element));
+ return true;
+}
+
+bool
+MatchDataBuilder::setWeight(const vespalib::string &fieldName, uint32_t termId, int32_t weight)
+{
+ const FieldInfo *info = _queryEnv.getIndexEnv()->getFieldByName(fieldName);
+ if (info == NULL) {
+ LOG(error, "Field '%s' does not exist.", fieldName.c_str());
+ return false;
+ }
+ if (termId >= _queryEnv.getNumTerms()) {
+ LOG(error, "Term id '%u' is invalid.", termId);
+ return false;
+ }
+ const ITermFieldData *tfd = _queryEnv.getTerm(termId)->lookupField(info->id());
+ if (tfd == NULL) {
+ LOG(error, "Field '%s' is not searched by the given term.",
+ fieldName.c_str());
+ return false;
+ }
+ uint32_t eid = _index[info->id()].elements.size();
+ _match[termId][info->id()].clear();
+ _match[termId][info->id()].insert(Position(0, eid));
+ _index[info->id()].elements.push_back(MyElement(weight, 1));
+ return true;
+}
+
+bool
+MatchDataBuilder::apply(uint32_t docId)
+{
+ _data.setDocId(docId);
+
+ // For each term, do
+ for (TermMap::const_iterator term_iter = _match.begin();
+ term_iter != _match.end(); ++term_iter)
+ {
+ uint32_t termId = term_iter->first;
+
+ for (FieldPositions::const_iterator field_iter = term_iter->second.begin();
+ field_iter != term_iter->second.end(); ++field_iter)
+ {
+ uint32_t fieldId = field_iter->first;
+ TermFieldMatchData *match = getTermFieldMatchData(termId, fieldId);
+
+ // Make sure there is a corresponding term field match data object.
+ if (match == NULL) {
+ LOG(error, "Term id '%u' is invalid.", termId);
+ return false;
+ }
+ match->reset(docId);
+
+ // find field data
+ MyField field;
+ IndexData::const_iterator idxItr = _index.find(fieldId);
+ if (idxItr != _index.end()) {
+ field = idxItr->second;
+ }
+
+ // For log, attempt to lookup field name.
+ const FieldInfo *info = _queryEnv.getIndexEnv()->getField(fieldId);
+ vespalib::string name = info != NULL ? info->name() : vespalib::make_string("%d", fieldId).c_str();
+
+ // For each occurence of that term, in that field, do
+ for (Positions::const_iterator occ_iter = field_iter->second.begin();
+ occ_iter != field_iter->second.end(); occ_iter++)
+ {
+ // Append a term match position to the term match data.
+ Position occ = *occ_iter;
+ match->appendPosition(TermFieldMatchDataPosition(
+ occ.eid,
+ occ.pos,
+ field.getWeight(occ.eid),
+ field.getLength(occ.eid)));
+ LOG(debug,
+ "Added occurence of term '%u' in field '%s'"
+ " at position '%u'.",
+ termId, name.c_str(), occ.pos);
+ if (occ.pos >= field.getLength(occ.eid)) {
+ LOG(warning,
+ "Added occurence of term '%u' in field '%s'"
+ " at position '%u' >= fieldLen '%u'.",
+ termId, name.c_str(), occ.pos, field.getLength(occ.eid));
+ }
+ }
+ }
+ }
+ // Return ok.
+ return true;
+}
+
+} // namespace test
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h
new file mode 100644
index 00000000000..6efc335dd94
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h
@@ -0,0 +1,150 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <memory>
+#include <set>
+#include <vespa/searchlib/fef/matchdata.h>
+#include "queryenvironment.h"
+
+namespace search {
+namespace fef {
+namespace test {
+
+class MatchDataBuilder {
+public:
+ struct MyElement {
+ int32_t weight;
+ uint32_t length;
+ MyElement(int32_t w, uint32_t l) : weight(w), length(l) {}
+ };
+ struct MyField {
+ uint32_t fieldLength;
+ std::vector<MyElement> elements;
+ MyField() : fieldLength(0), elements() {}
+ MyElement &getElement(uint32_t eid) {
+ while (elements.size() <= eid) {
+ elements.push_back(MyElement(0, 0));
+ }
+ return elements[eid];
+ }
+ int32_t getWeight(uint32_t eid) const {
+ if (eid < elements.size()) {
+ return elements[eid].weight;
+ }
+ return 1;
+ }
+ uint32_t getLength(uint32_t eid) const {
+ if (eid < elements.size()) {
+ return elements[eid].length;
+ }
+ return fieldLength;
+ }
+ };
+ struct Position {
+ uint32_t pos;
+ uint32_t eid;
+ Position(uint32_t p, uint32_t e) : pos(p), eid(e) {}
+ bool operator<(const Position &other) const {
+ if (eid == other.eid) {
+ return pos < other.pos;
+ }
+ return eid < other.eid;
+ }
+ };
+
+ /**
+ * Convenience typedefs.
+ */
+ typedef std::unique_ptr<MatchDataBuilder> UP;
+ typedef std::map<uint32_t, MyField> IndexData; // index data per field
+ typedef std::set<Position> Positions; // match information for a single term and field combination
+ typedef std::map<uint32_t, Positions> FieldPositions; // position information per field for a single term
+ typedef std::map<uint32_t, FieldPositions> TermMap; // maps term id to map of position information per field
+
+public:
+ /**
+ * Constructs a new match data builder. This is what you should use when building match data since there are alot of
+ * interconnections that must be set up correctly.
+ *
+ * @param queryEnv The query environment to build for.
+ * @param data The match data to build in.
+ */
+ MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data);
+
+ /**
+ * Returns the term field match data that corresponds to a given
+ * term id and field id combination. This goes by way of the query
+ * environment to find the handler of the given term id.
+ *
+ * @param termId The id of the term whose data to return.
+ * @param fieldId The id of the field whose data to return.
+ * @return The corresponding term match data.
+ */
+ TermFieldMatchData *getTermFieldMatchData(uint32_t termId, uint32_t fieldId);
+
+ /**
+ * Sets the length of a named field. This will fail if the named field does not exist.
+ *
+ * @param fieldName The name of the field.
+ * @param length The length to set.
+ * @return Whether or not the field length could be set.
+ */
+ bool setFieldLength(const vespalib::string &fieldName, uint32_t length);
+
+ /**
+ * Adds an element to a named field. This will fail if the named field does not exist.
+ *
+ * @param fieldName The name of the field.
+ * @param weight The weight of the element.
+ * @param length The length of the element.
+ * @return Whether or not the element could be added.
+ */
+ bool addElement(const vespalib::string &fieldName, int32_t weight, uint32_t length);
+
+ /**
+ * Adds an occurence of a term to the named field, at the given
+ * position. This will fail if the named field does not exist. The
+ * list of occurences is implemented as a set, so there is no need
+ * to add these in order.
+ *
+ * @param fieldName The name of the field.
+ * @param termId The id of the term to register an occurence for.
+ * @param pos The position of the occurence.
+ * @param element The element containing the occurence.
+ * @return Whether or not the occurence could be added.
+ */
+ bool addOccurence(const vespalib::string &fieldName, uint32_t termId, uint32_t pos, uint32_t element = 0);
+
+ /**
+ * Sets the weight for an attribute match.
+ *
+ * @param fieldName The name of the field.
+ * @param termId The id of the term to register an occurence for.
+ * @param weight The weight of the match.
+ * @return Whether or not the occurence could be added.
+ **/
+ bool setWeight(const vespalib::string &fieldName, uint32_t termId, int32_t weight);
+
+ /**
+ * Apply the content of this builder to the underlying match data.
+ *
+ * @param docId the document id
+ * @return Whether or not the content of this could be applied.
+ */
+ bool apply(uint32_t docId);
+
+private:
+ MatchDataBuilder(const MatchDataBuilder &); // hide
+ MatchDataBuilder & operator=(const MatchDataBuilder &); // hide
+
+private:
+ QueryEnvironment &_queryEnv;
+ MatchData &_data;
+ IndexData _index;
+ TermMap _match;
+};
+
+} // namespace test
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/.gitignore b/searchlib/src/vespa/searchlib/fef/test/plugin/.gitignore
new file mode 100644
index 00000000000..583460ae288
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/.gitignore
@@ -0,0 +1,3 @@
+*.So
+.depend
+Makefile
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/CMakeLists.txt b/searchlib/src/vespa/searchlib/fef/test/plugin/CMakeLists.txt
new file mode 100644
index 00000000000..00c75637129
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/CMakeLists.txt
@@ -0,0 +1,12 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_fef_test_plugin OBJECT
+ SOURCES
+ double.cpp
+ sum.cpp
+ staticrank.cpp
+ chain.cpp
+ cfgvalue.cpp
+ query.cpp
+ setup.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.cpp b/searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.cpp
new file mode 100644
index 00000000000..f36c7588b4b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.cpp
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.cfgvalue");
+
+#include <vespa/searchlib/fef/properties.h>
+#include <sstream>
+#include "cfgvalue.h"
+
+namespace search {
+namespace fef {
+namespace test {
+
+CfgValueBlueprint::CfgValueBlueprint() :
+ Blueprint("test_cfgvalue"),
+ _values()
+{
+ // empty
+}
+
+void
+CfgValueBlueprint::visitDumpFeatures(const IIndexEnvironment &indexEnv, IDumpFeatureVisitor &visitor) const
+{
+ Property p = indexEnv.getProperties().lookup(getBaseName(), "dump");
+ for (uint32_t i = 0; i < p.size(); ++i) {
+ visitor.visitDumpFeature(p.getAt(i));
+ }
+}
+
+bool
+CfgValueBlueprint::setup(const IIndexEnvironment &indexEnv, const StringVector &params)
+{
+ (void) params;
+ Property p = indexEnv.getProperties().lookup(getName(), "value");
+ for (uint32_t i = 0; i < p.size(); ++i) {
+ std::istringstream iss(p.getAt(i));
+ feature_t value;
+ iss >> std::dec >> value;
+ _values.push_back(value);
+
+ if (iss.fail()) {
+ return false;
+ }
+
+ std::ostringstream name;
+ name << i;
+ std::ostringstream desc;
+ desc << "value " << i;
+ describeOutput(name.str(), desc.str());
+ // we have no inputs
+ }
+ return true;
+}
+
+} // namespace test
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.h b/searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.h
new file mode 100644
index 00000000000..0fc9baac424
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.h
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/features/valuefeature.h>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace fef {
+namespace test {
+
+class CfgValueBlueprint : public Blueprint
+{
+private:
+ std::vector<feature_t> _values;
+
+public:
+ CfgValueBlueprint();
+ virtual void visitDumpFeatures(const IIndexEnvironment & indexEnv, IDumpFeatureVisitor & visitor) const;
+ virtual Blueprint::UP createInstance() const { return Blueprint::UP(new CfgValueBlueprint()); }
+ virtual bool setup(const IIndexEnvironment & indexEnv, const StringVector & params);
+ virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment & queryEnv) const {
+ (void) queryEnv;
+ return FeatureExecutor::LP(new search::features::ValueExecutor(_values));
+ }
+};
+
+} // namespace test
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/chain.cpp b/searchlib/src/vespa/searchlib/fef/test/plugin/chain.cpp
new file mode 100644
index 00000000000..33567dd1a67
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/chain.cpp
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.chain");
+#include "chain.h"
+
+#include <sstream>
+
+namespace search {
+namespace fef {
+namespace test {
+
+ChainExecutor::ChainExecutor() :
+ FeatureExecutor()
+{
+}
+
+void
+ChainExecutor::execute(MatchData & data)
+{
+ *data.resolveFeature(outputs()[0]) = *data.resolveFeature(inputs()[0]);
+}
+
+
+ChainBlueprint::ChainBlueprint() :
+ Blueprint("chain")
+{
+}
+
+bool
+ChainBlueprint::setup(const IIndexEnvironment & indexEnv, const StringVector & params)
+{
+ (void) indexEnv;
+ if (params.size() != 3) { // [type, children, value]
+ return false;
+ }
+ const std::string & type = params[0];
+ const std::string & children = params[1];
+ const std::string & value = params[2];
+
+ uint32_t numChildren;
+ std::istringstream iss(children);
+ iss >> std::dec >> numChildren;
+ std::ostringstream oss;
+ if (numChildren == 0) {
+ return false;
+ }
+ if (numChildren == 1) {
+ if (type == "basic") {
+ oss << "value(" << value << ")"; // value = input to value executor
+ defineInput(oss.str());
+ } else if (type == "cycle") {
+ oss << "chain(" << type << "," << value << "," << value << ")"; // value = where to insert the cycle
+ defineInput(oss.str());
+ } else {
+ return false;
+ }
+ } else {
+ oss << "chain(" << type << "," << (numChildren - 1) << "," << value << ")";
+ defineInput(oss.str());
+ }
+ describeOutput("out", "chain");
+ return true;
+}
+
+} // namespace test
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/chain.h b/searchlib/src/vespa/searchlib/fef/test/plugin/chain.h
new file mode 100644
index 00000000000..ca65012fa0f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/chain.h
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace fef {
+namespace test {
+
+class ChainExecutor : public FeatureExecutor
+{
+public:
+ ChainExecutor();
+ virtual void execute(MatchData & data);
+};
+
+
+class ChainBlueprint : public Blueprint
+{
+public:
+ ChainBlueprint();
+ virtual void visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const {}
+ virtual Blueprint::UP createInstance() const { return Blueprint::UP(new ChainBlueprint()); }
+ virtual bool setup(const IIndexEnvironment & indexEnv, const StringVector & params);
+ virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment & queryEnv) const {
+ (void) queryEnv;
+ return FeatureExecutor::LP(new ChainExecutor());
+ }
+};
+
+} // namespace test
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/double.cpp b/searchlib/src/vespa/searchlib/fef/test/plugin/double.cpp
new file mode 100644
index 00000000000..724b8597ece
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/double.cpp
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.double");
+#include "double.h"
+
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+
+namespace search {
+namespace fef {
+namespace test {
+
+void
+DoubleExecutor::execute(MatchData & data)
+{
+ assert(inputs().size() == _cnt);
+ assert(outputs().size() == _cnt);
+ for (uint32_t i = 0; i < _cnt; ++i) {
+ *data.resolveFeature(outputs()[i]) = *data.resolveFeature(inputs()[i]) * 2;
+ }
+}
+
+
+DoubleBlueprint::DoubleBlueprint() :
+ Blueprint("double"),
+ _cnt(0)
+{
+}
+
+void
+DoubleBlueprint::visitDumpFeatures(const IIndexEnvironment & indexEnv, IDumpFeatureVisitor & visitor) const
+{
+ (void) indexEnv;
+ (void) visitor;
+}
+
+bool
+DoubleBlueprint::setup(const IIndexEnvironment & indexEnv, const StringVector & params)
+{
+ (void) indexEnv;
+ for (uint32_t i = 0; i < params.size(); ++i) {
+ defineInput(params[i]);
+ }
+ for (uint32_t i = 0; i < params.size(); ++i) {
+ vespalib::asciistream name;
+ name << i;
+ vespalib::asciistream desc;
+ desc << "doubled value " << i;
+ describeOutput(name.str(), desc.str());
+ }
+ _cnt = params.size();
+ return true;
+}
+
+} // namespace test
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/double.h b/searchlib/src/vespa/searchlib/fef/test/plugin/double.h
new file mode 100644
index 00000000000..af69a4fbeec
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/double.h
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace fef {
+namespace test {
+
+class DoubleExecutor : public FeatureExecutor
+{
+private:
+ size_t _cnt;
+public:
+ DoubleExecutor(size_t cnt) : _cnt(cnt) {}
+ virtual void execute(MatchData & data);
+};
+
+
+class DoubleBlueprint : public Blueprint
+{
+private:
+ size_t _cnt;
+public:
+ DoubleBlueprint();
+ virtual void visitDumpFeatures(const IIndexEnvironment & indexEnv, IDumpFeatureVisitor & visitor) const;
+ virtual Blueprint::UP createInstance() const { return Blueprint::UP(new DoubleBlueprint()); }
+ virtual bool setup(const IIndexEnvironment & indexEnv, const StringVector & params);
+ virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment & queryEnv) const {
+ (void) queryEnv;
+ return FeatureExecutor::LP(new DoubleExecutor(_cnt));
+ }
+};
+
+} // namespace test
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/query.cpp b/searchlib/src/vespa/searchlib/fef/test/plugin/query.cpp
new file mode 100644
index 00000000000..4308bd4908d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/query.cpp
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.query");
+
+#include <vespa/searchlib/features/valuefeature.h>
+#include <vespa/searchlib/fef/properties.h>
+#include <sstream>
+#include "query.h"
+
+namespace search {
+namespace fef {
+namespace test {
+
+QueryBlueprint::QueryBlueprint() :
+ Blueprint("test_query"),
+ _key()
+{
+ // empty
+}
+
+bool
+QueryBlueprint::setup(const IIndexEnvironment &indexEnv, const StringVector &params)
+{
+ (void) indexEnv;
+ if (params.size() != 1) {
+ return false;
+ }
+ _key = params[0];
+ describeOutput("value", "the parameter looked up in the rank properties and converted to a float");
+ return true;
+}
+
+FeatureExecutor::LP
+QueryBlueprint::createExecutor(const IQueryEnvironment &queryEnv) const
+{
+ std::vector<feature_t> values;
+ std::string val = queryEnv.getProperties().lookup(_key).get("0.0");
+ values.push_back(strtod(val.data(), NULL));
+ return FeatureExecutor::LP(new search::features::ValueExecutor(values));
+}
+
+} // namespace test
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/query.h b/searchlib/src/vespa/searchlib/fef/test/plugin/query.h
new file mode 100644
index 00000000000..95a56ddf59a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/query.h
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace fef {
+namespace test {
+
+class QueryBlueprint : public Blueprint
+{
+private:
+ std::string _key;
+
+public:
+ QueryBlueprint();
+ virtual void visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const {};
+ virtual Blueprint::UP createInstance() const { return Blueprint::UP(new QueryBlueprint()); }
+ virtual bool setup(const IIndexEnvironment &indexEnv, const StringVector &params);
+ virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment &queryEnv) const;
+};
+
+} // namespace test
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/setup.cpp b/searchlib/src/vespa/searchlib/fef/test/plugin/setup.cpp
new file mode 100644
index 00000000000..94a74947cea
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/setup.cpp
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.setup");
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <string>
+
+#include "cfgvalue.h"
+#include "chain.h"
+#include "double.h"
+#include "query.h"
+#include "setup.h"
+#include "staticrank.h"
+#include "sum.h"
+
+namespace search {
+namespace fef {
+namespace test {
+
+void setup_fef_test_plugin(IBlueprintRegistry & registry)
+{
+ // register blueprints
+ registry.addPrototype(Blueprint::SP(new DoubleBlueprint()));
+ registry.addPrototype(Blueprint::SP(new SumBlueprint()));
+ registry.addPrototype(Blueprint::SP(new StaticRankBlueprint()));
+ registry.addPrototype(Blueprint::SP(new ChainBlueprint()));
+ registry.addPrototype(Blueprint::SP(new CfgValueBlueprint()));
+ registry.addPrototype(Blueprint::SP(new QueryBlueprint()));
+}
+
+} // namespace test
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/setup.h b/searchlib/src/vespa/searchlib/fef/test/plugin/setup.h
new file mode 100644
index 00000000000..0204c12663a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/setup.h
@@ -0,0 +1,16 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/iblueprintregistry.h>
+
+namespace search {
+namespace fef {
+namespace test {
+
+void setup_fef_test_plugin(IBlueprintRegistry & registry);
+
+} // namespace test
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.cpp b/searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.cpp
new file mode 100644
index 00000000000..502115b2b1b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.cpp
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.staticrank");
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include "staticrank.h"
+
+namespace search {
+namespace fef {
+namespace test {
+
+StaticRankExecutor::StaticRankExecutor(const search::attribute::IAttributeVector * attribute) :
+ FeatureExecutor(),
+ _attribute(attribute)
+{
+}
+
+void
+StaticRankExecutor::execute(MatchData & data)
+{
+ uint32_t doc = data.getDocId();
+ search::attribute::FloatContent staticRank;
+ if (_attribute != NULL) {
+ staticRank.allocate(_attribute->getMaxValueCount());
+ staticRank.fill(*_attribute, doc);
+ }
+ *data.resolveFeature(outputs()[0]) = static_cast<feature_t>(staticRank[0]);
+}
+
+
+StaticRankBlueprint::StaticRankBlueprint() :
+ Blueprint("staticrank"),
+ _attributeName()
+{
+}
+
+bool
+StaticRankBlueprint::setup(const IIndexEnvironment & indexEnv, const StringVector & params)
+{
+ (void) indexEnv;
+ if (params.size() != 1) {
+ return false;
+ }
+ _attributeName = params[0];
+ describeOutput("out", "static rank");
+ return true;
+}
+
+FeatureExecutor::LP
+StaticRankBlueprint::createExecutor(const IQueryEnvironment & queryEnv) const
+{
+ const search::attribute::IAttributeVector * av = queryEnv.getAttributeContext().getAttribute(_attributeName);
+ return FeatureExecutor::LP(new StaticRankExecutor(av));
+}
+
+} // namespace test
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.h b/searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.h
new file mode 100644
index 00000000000..3b6ee1e5b76
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.h
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace fef {
+namespace test {
+
+class StaticRankExecutor : public FeatureExecutor
+{
+private:
+ const search::attribute::IAttributeVector * _attribute;
+
+public:
+ StaticRankExecutor(const search::attribute::IAttributeVector * attribute);
+ virtual void execute(MatchData & data);
+};
+
+
+class StaticRankBlueprint : public Blueprint
+{
+private:
+ std::string _attributeName;
+
+public:
+ StaticRankBlueprint();
+ virtual void visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const {}
+ virtual Blueprint::UP createInstance() const { return Blueprint::UP(new StaticRankBlueprint()); }
+ virtual bool setup(const IIndexEnvironment & indexEnv, const StringVector & params);
+ virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment & queryEnv) const;
+};
+
+} // namespace test
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/sum.cpp b/searchlib/src/vespa/searchlib/fef/test/plugin/sum.cpp
new file mode 100644
index 00000000000..e5e8e3dedc0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/sum.cpp
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.sum");
+#include "sum.h"
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+
+namespace search {
+namespace fef {
+namespace test {
+
+void
+SumExecutor::execute(MatchData & data)
+{
+ feature_t sum = 0.0f;
+ for (uint32_t i = 0; i < inputs().size(); ++i) {
+ sum += *data.resolveFeature(inputs()[i]);
+ }
+ *data.resolveFeature(outputs()[0]) = sum;
+}
+
+
+SumBlueprint::SumBlueprint() :
+ Blueprint("mysum")
+{
+}
+
+void
+SumBlueprint::visitDumpFeatures(const IIndexEnvironment & indexEnv, IDumpFeatureVisitor & visitor) const
+{
+ (void) indexEnv;
+#if 1
+ (void) visitor;
+#else
+ // Use the feature name builder to make sure that the naming of features are quoted correctly.
+ typedef FeatureNameBuilder FNB;
+
+ // This blueprint dumps 2 ranking features. This is a very tricky feature in that it's dependencies
+ // are given by its parameters, so the definition of features implicitly declares this tree. This
+ // blueprint can actually produce any number of features, but only the following 2 are ever dumped.
+
+ // The first feature this produces is "sum(value(4),value(16))", quoted correctly by the feature name
+ // builder. The feature "value" simply returns the value of its single parameter, so this feature will
+ // always produce the output "20".
+ visitor.visitDumpFeature(FNB().baseName("sum").parameter("value(4)").parameter("value(16)").buildName());
+
+ // The second feature is "sum(double(value(8)),double(value(32)))", again quoted by the feature name
+ // builder. The feature "double" returns twice the value of its single input. This means that this
+ // feature will always produce the output "80" (= 8*2 + 32*2).
+ std::string d1 = FNB().baseName("double").parameter("value(8)").buildName();
+ std::string d2 = FNB().baseName("double").parameter("value(32)").buildName();
+ visitor.visitDumpFeature(FNB().baseName("sum").parameter(d1).parameter(d2).buildName());
+#endif
+}
+
+bool
+SumBlueprint::setup(const IIndexEnvironment & indexEnv, const StringVector & params)
+{
+ (void) indexEnv;
+
+ // This blueprints expects all parameters to be complete feature names, so depend on these.
+ for (uint32_t i = 0; i < params.size(); ++i) {
+ defineInput(params[i]);
+ }
+
+ // Produce only a single output named "out".
+ describeOutput("out", "The sum of the values of all parameter features.");
+ return true;
+}
+
+} // namespace test
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/sum.h b/searchlib/src/vespa/searchlib/fef/test/plugin/sum.h
new file mode 100644
index 00000000000..d54d31bb5d9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/plugin/sum.h
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search {
+namespace fef {
+namespace test {
+
+class SumExecutor : public FeatureExecutor
+{
+public:
+ virtual bool isPure() { return true; }
+ virtual void execute(MatchData & data);
+};
+
+
+class SumBlueprint : public Blueprint
+{
+public:
+ SumBlueprint();
+ virtual void visitDumpFeatures(const IIndexEnvironment & indexEnv, IDumpFeatureVisitor & visitor) const;
+ virtual Blueprint::UP createInstance() const { return Blueprint::UP(new SumBlueprint()); }
+ virtual bool setup(const IIndexEnvironment & indexEnv, const StringVector & params);
+ virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment & queryEnv) const {
+ (void) queryEnv;
+ return FeatureExecutor::LP(new SumExecutor());
+ }
+};
+
+} // namespace test
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/queryenvironment.cpp b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.cpp
new file mode 100644
index 00000000000..af68e2a5163
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.cpp
@@ -0,0 +1,20 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "queryenvironment.h"
+
+namespace search {
+namespace fef {
+namespace test {
+
+QueryEnvironment::QueryEnvironment(IndexEnvironment *env)
+ : _indexEnv(env),
+ _terms(),
+ _properties(),
+ _location(),
+ _attrCtx((env == NULL) ? attribute::IAttributeContext::UP() : env->getAttributeManager().createContext())
+{
+}
+
+} // namespace test
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h
new file mode 100644
index 00000000000..acb454bbfa7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h
@@ -0,0 +1,94 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vespa/searchlib/fef/iqueryenvironment.h>
+#include <vespa/searchlib/fef/location.h>
+#include <vespa/searchlib/fef/simpletermdata.h>
+#include "indexenvironment.h"
+
+namespace search {
+namespace fef {
+namespace test {
+
+/**
+ * Implementation of the IQueryEnvironment interface used for testing.
+ */
+class QueryEnvironment : public IQueryEnvironment
+{
+private:
+ QueryEnvironment(const QueryEnvironment &); // hide
+ QueryEnvironment & operator=(const QueryEnvironment &); // hide
+
+ IndexEnvironment *_indexEnv;
+ std::vector<SimpleTermData> _terms;
+ Properties _properties;
+ Location _location;
+ search::attribute::IAttributeContext::UP _attrCtx;
+
+public:
+ /**
+ * Constructs a new query environment.
+ *
+ * @param indexEnv The index environment of this.
+ */
+ QueryEnvironment(IndexEnvironment *indexEnv = NULL);
+
+ // Inherit doc from IQueryEnvironment.
+ virtual const Properties &getProperties() const { return _properties; }
+
+ // Inherit doc from IQueryEnvironment.
+ virtual uint32_t getNumTerms() const { return _terms.size(); }
+
+ // Inherit doc from IQueryEnvironment.
+ virtual const ITermData *getTerm(uint32_t idx) const { return idx < _terms.size() ? &_terms[idx] : NULL; }
+
+ // Inherit doc from IQueryEnvironment.
+ virtual const Location & getLocation() const { return _location; }
+
+ // Inherit doc from IQueryEnvironment.
+ virtual const search::attribute::IAttributeContext &getAttributeContext() const { return *_attrCtx; }
+
+ // Inherit doc from IQueryEnvironment.
+ virtual const IIndexEnvironment &getIndexEnvironment() const { assert(_indexEnv != NULL); return *_indexEnv; }
+
+ /** Returns a reference to the index environment of this. */
+ IndexEnvironment *getIndexEnv() { return _indexEnv; }
+
+ /** Returns a const reference to the index environment of this. */
+ const IndexEnvironment *getIndexEnv() const { return _indexEnv; }
+
+ /** Sets the index environment of this. */
+ QueryEnvironment &setIndexEnv(IndexEnvironment *indexEnv) {
+ _indexEnv = indexEnv;
+ _attrCtx = ((indexEnv == NULL) ? search::attribute::IAttributeContext::UP() :
+ indexEnv->getAttributeManager().createContext());
+ return *this;
+ }
+
+ /**
+ * Override which attribute manager to use.
+ *
+ * @param vecMan the manager we want to use
+ **/
+ void overrideAttributeManager(AttributeManager *vecMan) {
+ _attrCtx = ((vecMan == NULL) ? search::attribute::IAttributeContext::UP() : vecMan->createContext());
+ }
+
+ /** Returns a reference to the list of term data objects. */
+ std::vector<SimpleTermData> &getTerms() { return _terms; }
+
+ /** Returns a const reference to the list of term data objects. */
+ const std::vector<SimpleTermData> &getTerms() const { return _terms; }
+
+ /** Returns a reference to the properties of this. */
+ Properties & getProperties() { return _properties; }
+
+ /** Returns a reference to the location of this. */
+ Location & getLocation() { return _location; }
+};
+
+} // namespace test
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.cpp b/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.cpp
new file mode 100644
index 00000000000..8291a2b7ebd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.cpp
@@ -0,0 +1,66 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "queryenvironmentbuilder.h"
+
+namespace search {
+namespace fef {
+namespace test {
+
+QueryEnvironmentBuilder::QueryEnvironmentBuilder(QueryEnvironment &env,
+ MatchDataLayout &layout) :
+ _queryEnv(env),
+ _layout(layout)
+{
+ // empty
+}
+
+SimpleTermData &
+QueryEnvironmentBuilder::addAllFields()
+{
+ _queryEnv.getTerms().push_back(SimpleTermData());
+ SimpleTermData &td = _queryEnv.getTerms().back();
+ td.setWeight(search::query::Weight(100));
+ const IIndexEnvironment &idxEnv = *_queryEnv.getIndexEnv();
+ for (uint32_t i = 0; i < idxEnv.getNumFields(); ++i) {
+ const FieldInfo *info = idxEnv.getField(i);
+ SimpleTermFieldData &tfd = td.addField(info->id());
+ tfd.setHandle(_layout.allocTermField(tfd.getFieldId()));
+ }
+ return td;
+}
+
+SimpleTermData *
+QueryEnvironmentBuilder::addIndexNode(const std::vector<vespalib::string> &fieldNames)
+{
+ _queryEnv.getTerms().push_back(SimpleTermData());
+ SimpleTermData &td = _queryEnv.getTerms().back();
+ td.setWeight(search::query::Weight(100));
+ for (uint32_t i = 0; i < fieldNames.size(); ++i) {
+ const FieldInfo *info = _queryEnv.getIndexEnv()->getFieldByName(fieldNames[i]);
+ if (info == NULL || info->type() != FieldType::INDEX) {
+ return NULL;
+ }
+ SimpleTermFieldData &tfd = td.addField(info->id());
+ tfd.setHandle(_layout.allocTermField(tfd.getFieldId()));
+ }
+ return &td;
+}
+
+SimpleTermData *
+QueryEnvironmentBuilder::addAttributeNode(const vespalib::string &attrName)
+{
+ const FieldInfo *info = _queryEnv.getIndexEnv()->getFieldByName(attrName);
+ if (info == NULL || info->type() != FieldType::ATTRIBUTE) {
+ return NULL;
+ }
+ _queryEnv.getTerms().push_back(SimpleTermData());
+ SimpleTermData &td = _queryEnv.getTerms().back();
+ td.setWeight(search::query::Weight(100));
+ SimpleTermFieldData &tfd = td.addField(info->id());
+ tfd.setHandle(_layout.allocTermField(tfd.getFieldId()));
+ return &td;
+}
+
+} // namespace test
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.h b/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.h
new file mode 100644
index 00000000000..2842e4d8ca5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.h
@@ -0,0 +1,71 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include "queryenvironment.h"
+
+namespace search {
+namespace fef {
+namespace test {
+
+class QueryEnvironmentBuilder {
+public:
+ /**
+ * Constructs a new query environment builder.
+ *
+ * @param queryEnv The query environment to build in.
+ * @param layout The layout of match data to simultaneously update.
+ */
+ QueryEnvironmentBuilder(QueryEnvironment &queryEnv, MatchDataLayout &layout);
+
+ /**
+ * Add a term node searching all known fields to this query
+ * environment. This will update both the environment and the
+ * match data layout.
+ *
+ * @return Reference to the corresponding term data.
+ */
+ SimpleTermData &addAllFields();
+
+ /**
+ * Add a term node searching in the given fields to this query
+ * environment. This will update both the environment and the
+ * match data layout. All fields are required to be of type INDEX.
+ *
+ * @return Pointer to the corresponding term data or NULL if one of the fields does not exists.
+ */
+ SimpleTermData *addIndexNode(const std::vector<vespalib::string> &fieldNames);
+
+ /**
+ * Add an attribute node searching in the given attribute to this query environment.
+ * This will update both the environment and the match data layout.
+ *
+ * @return Pointer to the corresponding term data or NULL if attribute does not exists.
+ */
+ SimpleTermData *addAttributeNode(const vespalib::string & attrName);
+
+ /** Returns a reference to the query environment of this. */
+ QueryEnvironment &getQueryEnv() { return _queryEnv; }
+
+ /** Returns a const reference to the query environment of this. */
+ const QueryEnvironment &getQueryEnv() const { return _queryEnv; }
+
+ /** Returns a reference to the match data layout of this. */
+ MatchDataLayout &getLayout() { return _layout; }
+
+ /** Returns a const reference to the match data layout of this. */
+ const MatchDataLayout &getLayout() const { return _layout; }
+
+private:
+ QueryEnvironmentBuilder(const QueryEnvironmentBuilder &); // hide
+ QueryEnvironmentBuilder & operator=(const QueryEnvironmentBuilder &); // hide
+
+private:
+ QueryEnvironment &_queryEnv;
+ MatchDataLayout &_layout;
+};
+
+} // namespace test
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/test/rankresult.cpp b/searchlib/src/vespa/searchlib/fef/test/rankresult.cpp
new file mode 100644
index 00000000000..bfc61348c1e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/rankresult.cpp
@@ -0,0 +1,113 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.rankresult");
+#include "rankresult.h"
+#include <cmath>
+#include <iostream>
+
+namespace search {
+namespace fef {
+namespace test {
+
+RankResult::RankResult() :
+ _rankScores(),
+ _epsilon(0.0)
+{
+ // empty
+}
+
+RankResult &
+RankResult::addScore(const vespalib::string & featureName, feature_t score)
+{
+ _rankScores[featureName] = score;
+ return *this;
+}
+
+feature_t
+RankResult::getScore(const vespalib::string & featureName) const
+{
+ RankScores::const_iterator itr = _rankScores.find(featureName);
+ if (itr != _rankScores.end()) {
+ return itr->second;
+ }
+ return 0.0f;
+}
+
+bool
+RankResult::operator==(const RankResult & rhs) const
+{
+ return includes(rhs) && rhs.includes(*this);
+}
+
+bool
+RankResult::includes(const RankResult & rhs) const
+{
+ double epsilon = std::max(_epsilon, rhs._epsilon);
+
+ RankScores::const_iterator findItr;
+ for (RankScores::const_iterator itr = rhs._rankScores.begin(); itr != rhs._rankScores.end(); ++itr) {
+ findItr = _rankScores.find(itr->first);
+ if (findItr == _rankScores.end()) {
+ LOG(info, "Did not find expected feature '%s' in this rank result", itr->first.c_str());
+ return false;
+ }
+ if (itr->second < findItr->second - epsilon ||
+ itr->second > findItr->second + epsilon ||
+ (std::isnan(findItr->second) &&
+ !std::isnan(itr->second)))
+ {
+ LOG(info, "Feature '%s' did not have expected score.", itr->first.c_str());
+ LOG(info, "Expected: %f ~ %f", itr->second, epsilon);
+ LOG(info, "Actual : %f", findItr->second);
+ return false;
+ }
+ }
+ return true;
+}
+
+RankResult &
+RankResult::clear()
+{
+ _rankScores.clear();
+ return *this;
+}
+
+std::vector<vespalib::string> &
+RankResult::getKeys(std::vector<vespalib::string> &ret)
+{
+ for (RankScores::const_iterator it = _rankScores.begin(); it != _rankScores.end(); ++it) {
+ ret.push_back(it->first);
+ }
+ return ret;
+}
+
+std::vector<vespalib::string>
+RankResult::getKeys()
+{
+ std::vector<vespalib::string> ret;
+ return getKeys(ret);
+}
+
+RankResult &
+RankResult::setEpsilon(double epsilon) {
+ _epsilon = epsilon;
+ return *this;
+}
+
+double
+RankResult::getEpsilon() const {
+ return _epsilon;
+}
+
+std::ostream & operator<<(std::ostream & os, const RankResult & rhs) {
+ os << "[";
+ for (RankResult::RankScores::const_iterator itr = rhs._rankScores.begin(); itr != rhs._rankScores.end(); ++itr) {
+ os << "['" << itr->first << "' = " << itr->second << "]";
+ }
+ return os << "]";
+}
+
+} // namespace test
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/test/rankresult.h b/searchlib/src/vespa/searchlib/fef/test/rankresult.h
new file mode 100644
index 00000000000..90ac332c87b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/test/rankresult.h
@@ -0,0 +1,113 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/common/feature.h>
+#include <map>
+#include <vespa/vespalib/stllike/string.h>
+#include <vector>
+
+namespace search {
+namespace fef {
+namespace test {
+
+class RankResult {
+public:
+ /**
+ * Convenience typedefs.
+ */
+ typedef std::map<vespalib::string, feature_t> RankScores;
+
+public:
+ /**
+ * Constructs a new rank result.
+ */
+ RankResult();
+
+ /**
+ * Adds a score for the given feature name.
+ *
+ * @param featureName The name of the feature.
+ * @param score The score of that feature.
+ * @return This, to allow chaining.
+ */
+ RankResult &addScore(const vespalib::string & featureName, feature_t score);
+
+ /**
+ * Returns the score of a given feature.
+ *
+ * @param featureName The name of the feature.
+ * @return The score of that feature.
+ */
+ feature_t getScore(const vespalib::string & featureName) const;
+
+ /**
+ * Implements equality operator.
+ *
+ * @param rhs The result to compare to.
+ * @return Whether or not this is equal to the other.
+ */
+ bool operator==(const RankResult & rhs) const;
+
+ /**
+ * Returns whether or not this rank result contains another.
+ *
+ * @param rhs The result to see if this contains.
+ * @return Whether or not this contains the other.
+ */
+ bool includes(const RankResult & rhs) const;
+
+ /**
+ * Clears the content of this map.
+ *
+ * @return This, to allow chaining.
+ */
+ RankResult &clear();
+
+ /**
+ * Fills the given vector with the key strings of this.
+ *
+ * @param ret The vector to fill.
+ * @return Reference to the 'ret' param.
+ */
+ std::vector<vespalib::string> &getKeys(std::vector<vespalib::string> &ret);
+
+ /**
+ * Creates and returns a vector with the key strings of this.
+ *
+ * @return List of all key strings.
+ */
+ std::vector<vespalib::string> getKeys();
+
+ /**
+ * Sets the epsilon used when comparing this rank result to another.
+ *
+ * @param epsilon The new epsilon.
+ * @return This, to allow chaining.
+ */
+ RankResult &setEpsilon(double epsilon);
+
+ /**
+ * Returns the epsilon used when comparing this rank result to another.
+ *
+ * @return The epsilon.
+ */
+ double getEpsilon() const;
+
+ /**
+ * Implements streaming operator.
+ *
+ * @param os The stream to write to.
+ * @param rhs The result to write.
+ * @return The stream, to allow chaining.
+ */
+ friend std::ostream & operator<<(std::ostream & os, const RankResult & rhs);
+
+private:
+ RankScores _rankScores;
+ double _epsilon;
+};
+
+} // namespace test
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/fef/utils.cpp b/searchlib/src/vespa/searchlib/fef/utils.cpp
new file mode 100644
index 00000000000..7532d0d60fb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/utils.cpp
@@ -0,0 +1,75 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "utils.h"
+#include <vector>
+
+namespace search {
+namespace fef {
+
+namespace {
+
+FeatureHandle
+getSingleFeatureHandle(const RankProgram &rankProgram)
+{
+ std::vector<vespalib::string> featureNames;
+ std::vector<FeatureHandle> featureHandles;
+ rankProgram.get_seed_handles(featureNames, featureHandles, false);
+ assert(featureNames.size() == 1);
+ assert(featureHandles.size() == 1);
+ return featureHandles.front();
+}
+
+}
+
+const feature_t *
+Utils::getScoreFeature(const RankProgram &rankProgram)
+{
+ return rankProgram.match_data().resolveFeature(getSingleFeatureHandle(rankProgram));
+}
+
+const vespalib::eval::Value::CREF *
+Utils::getObjectFeature(const RankProgram &rankProgram)
+{
+ return rankProgram.match_data().resolve_object_feature(getSingleFeatureHandle(rankProgram));
+}
+
+namespace {
+
+std::map<vespalib::string, feature_t>
+resolveFeatures(const MatchData &matchData,
+ const std::vector<vespalib::string> &featureNames,
+ const std::vector<FeatureHandle> &featureHandles)
+{
+ assert(featureNames.size() == featureHandles.size());
+ std::map<vespalib::string, feature_t> result;
+ for (size_t i = 0; i < featureNames.size(); ++i) {
+ const vespalib::string &name = featureNames[i];
+ feature_t value = *(matchData.resolveFeature(featureHandles[i]));
+ result.insert(std::make_pair(name, value));
+ }
+ return result;
+}
+
+}
+
+std::map<vespalib::string, feature_t>
+Utils::getSeedFeatures(const RankProgram &rankProgram)
+{
+ std::vector<vespalib::string> featureNames;
+ std::vector<FeatureHandle> featureHandles;
+ rankProgram.get_seed_handles(featureNames, featureHandles);
+ return resolveFeatures(rankProgram.match_data(), featureNames, featureHandles);
+}
+
+std::map<vespalib::string, feature_t>
+Utils::getAllFeatures(const RankProgram &rankProgram)
+{
+ std::vector<vespalib::string> featureNames;
+ std::vector<FeatureHandle> featureHandles;
+ rankProgram.get_all_feature_handles(featureNames, featureHandles);
+ return resolveFeatures(rankProgram.match_data(), featureNames, featureHandles);
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/utils.h b/searchlib/src/vespa/searchlib/fef/utils.h
new file mode 100644
index 00000000000..20ec62e3bfe
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/utils.h
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "rank_program.h"
+#include <vespa/vespalib/eval/value.h>
+#include <map>
+
+namespace search {
+namespace fef {
+
+struct Utils
+{
+ /**
+ * Extract a single score feature from the given rank program.
+ */
+ static const feature_t *getScoreFeature(const RankProgram &rankProgram);
+
+ /**
+ * Extract a single object feature from the given rank program.
+ */
+ static const vespalib::eval::Value::CREF *getObjectFeature(const RankProgram &rankProgram);
+
+ /**
+ * Extract all seed feature values from the given rank program.
+ **/
+ static std::map<vespalib::string, feature_t> getSeedFeatures(const RankProgram &rankProgram);
+
+ /**
+ * Extract all feature values from the given rank program.
+ **/
+ static std::map<vespalib::string, feature_t> getAllFeatures(const RankProgram &rankProgram);
+
+};
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/verify_feature.cpp b/searchlib/src/vespa/searchlib/fef/verify_feature.cpp
new file mode 100644
index 00000000000..ebfdf1622ba
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/verify_feature.cpp
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fef.verify_feature");
+#include "verify_feature.h"
+#include "blueprintresolver.h"
+
+namespace search {
+namespace fef {
+
+bool verifyFeature(const BlueprintFactory &factory,
+ const IIndexEnvironment &indexEnv,
+ const std::string &featureName,
+ const std::string &desc)
+{
+ indexEnv.hintFeatureMotivation(IIndexEnvironment::VERIFY_SETUP);
+ BlueprintResolver resolver(factory, indexEnv);
+ resolver.addSeed(featureName);
+ bool result = resolver.compile();
+ if (!result) {
+ LOG(error, "rank feature verification failed: %s (%s)",
+ featureName.c_str(), desc.c_str());
+ }
+ return result;
+}
+
+} // namespace fef
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/fef/verify_feature.h b/searchlib/src/vespa/searchlib/fef/verify_feature.h
new file mode 100644
index 00000000000..b1edd5a16fd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/verify_feature.h
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "blueprintfactory.h"
+#include "iindexenvironment.h"
+#include <string>
+
+namespace search {
+namespace fef {
+
+/**
+ * Verify whether a specific feature can be computed. If the feature
+ * can not be computed, log a reason why, including feature
+ * dependencies.
+ *
+ * @return true if the feature can be computed, false otherwise
+ * @param factory blueprint factory
+ * @param indexEnv index environment
+ * @param featureName name of feature to verify
+ * @param desc external description of the feature
+ **/
+bool verifyFeature(const BlueprintFactory &factory,
+ const IIndexEnvironment &indexEnv,
+ const std::string &featureName,
+ const std::string &desc);
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/grouping/CMakeLists.txt b/searchlib/src/vespa/searchlib/grouping/CMakeLists.txt
new file mode 100644
index 00000000000..3e202895beb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/grouping/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_grouping OBJECT
+ SOURCES
+ collect.cpp
+ groupandcollectengine.cpp
+ groupengine.cpp
+ groupingengine.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/grouping/OWNERS b/searchlib/src/vespa/searchlib/grouping/OWNERS
new file mode 100644
index 00000000000..1037590124e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/grouping/OWNERS
@@ -0,0 +1 @@
+balder
diff --git a/searchlib/src/vespa/searchlib/grouping/collect.cpp b/searchlib/src/vespa/searchlib/grouping/collect.cpp
new file mode 100644
index 00000000000..f34b63d4047
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/grouping/collect.cpp
@@ -0,0 +1,113 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/grouping/collect.h>
+
+namespace search {
+
+using namespace expression;
+using namespace aggregation;
+
+namespace grouping {
+
+Collect::ResultAccessor::ResultAccessor(const AggregationResult & aggregator, size_t offset) :
+ _bluePrint(&aggregator),
+ _aggregator(_bluePrint->clone()),
+ _offset(offset)
+{
+}
+
+void Collect::ResultAccessor::create(uint8_t * base)
+{
+ _aggregator->getResult().create(base+_offset);
+ _bluePrint->getResult().encode(base+_offset);
+}
+
+Collect::Collect(const Group & gp) :
+ _aggregatorSize(0),
+ _aggregator(),
+ _aggrBacking()
+{
+ _aggregator.reserve(gp.getAggrSize());
+ for (size_t i(0); i < gp.getAggrSize(); i++) {
+ ResultAccessor accessor(const_cast<AggregationResult &>(gp.getAggregationResult(i)), _aggregatorSize);
+ _aggregator.push_back(accessor);
+ assert(accessor.getRawByteSize() > 0);
+ _aggregatorSize += accessor.getRawByteSize();
+ }
+ _sortInfo.resize(gp.getOrderBySize());
+ for(size_t i(0); i < _sortInfo.size(); i++) {
+ const uint32_t index = std::abs(gp.getOrderBy(i)) - 1;
+ const uint32_t z(gp.getExpr(index));
+ _sortInfo[i] = SortInfo(z, gp.getOrderBy(i));
+ }
+}
+
+Collect::~Collect()
+{
+ if (_aggregatorSize > 0) {
+ assert((_aggrBacking.size() % _aggregatorSize) == 0);
+ for (size_t i(0), m(_aggrBacking.size()/_aggregatorSize); i < m; i++) {
+ uint8_t * base(&_aggrBacking[ i * _aggregatorSize]);
+ for (size_t j(0), k(_aggregator.size()); j < k; j++) {
+ ResultAccessor & r = _aggregator[j];
+ r.destroy(base);
+ }
+ }
+ }
+}
+
+void
+Collect::getCollectors(GroupRef ref, Group & g) const
+{
+ size_t offset(getAggrBase(ref));
+ if (offset < _aggrBacking.size()) {
+ const uint8_t * base(&_aggrBacking[offset]);
+ for (size_t i(0), m(_aggregator.size()); i < m; i++) {
+ const ResultAccessor & r = _aggregator[i];
+ r.getResult(g.getAggregationResult(i).getResult(), base);
+ g.getAggregationResult(i).postMerge();
+ }
+ }
+}
+
+void
+Collect::collect(GroupRef gr, uint32_t docId, double rank)
+{
+ uint8_t * base(&_aggrBacking[getAggrBase(gr)]);
+ for (size_t i(0), m(_aggregator.size()); i < m; i++) {
+ _aggregator[i].aggregate(base, docId, rank);
+ }
+}
+
+void
+Collect::createCollectors(GroupRef gr)
+{
+ size_t offset(getAggrBase(gr));
+ if (offset == _aggrBacking.size()) {
+ _aggrBacking.resize(getAggrBase(GroupRef(gr.getRef() + 1)));
+ uint8_t * base(&_aggrBacking[offset]);
+ for (size_t i(0), m(_aggregator.size()); i < m; i++) {
+ ResultAccessor & r = _aggregator[i];
+ r.create(base);
+ }
+ }
+}
+
+void
+Collect::preFill(GroupRef gr, const Group & g)
+{
+ if (gr.valid()) {
+ size_t offset(getAggrBase(gr));
+ uint8_t * base(&_aggrBacking[offset]);
+ for (size_t i(0), m(_aggregator.size()); i < m; i++) {
+ ResultAccessor & r = _aggregator[i];
+ r.setResult(g.getAggregationResult(i).getResult(), base);
+ }
+ }
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_grouping_collect() {}
diff --git a/searchlib/src/vespa/searchlib/grouping/collect.h b/searchlib/src/vespa/searchlib/grouping/collect.h
new file mode 100644
index 00000000000..f2bdf014826
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/grouping/collect.h
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/grouping/groupref.h>
+#include <vespa/searchlib/aggregation/group.h>
+
+namespace search {
+namespace grouping {
+
+class Collect : public vespalib::noncopyable
+{
+protected:
+ Collect(const aggregation::Group & protoType);
+ ~Collect();
+ void preFill(GroupRef gr, const aggregation::Group & r);
+ void createCollectors(GroupRef gr);
+ void collect(GroupRef group, uint32_t docId, double rank);
+ void getCollectors(GroupRef ref, aggregation::Group & g) const;
+ int cmpAggr(GroupRef a, GroupRef b) const {
+ int diff(0);
+ size_t aOff(getAggrBase(a));
+ size_t bOff(getAggrBase(b));
+ for(std::vector<SortInfo>::const_iterator it(_sortInfo.begin()), mt(_sortInfo.end()); (diff == 0) && (it != mt); it++) {
+ diff = _aggregator[it->getIndex()].cmp(&_aggrBacking[aOff], &_aggrBacking[bOff]) * it->getSign();
+ }
+ return diff;
+ }
+ uint64_t radixAggrAsc(GroupRef gr) const {
+ return _aggregator[_sortInfo[0].getIndex()].radixAsc(&_aggrBacking[getAggrBase(gr)]);
+ }
+ uint64_t radixAggrDesc(GroupRef gr) const {
+ return _aggregator[_sortInfo[0].getIndex()].radixDesc(&_aggrBacking[getAggrBase(gr)]);
+ }
+ bool hasSpecifiedOrder() const { return ! _sortInfo.empty(); }
+ bool isPrimarySortKeyAscending() const { return _sortInfo[0].getSign() >= 0; }
+private:
+ // Returns the byteoffset where aggregationresults for this group are stored.
+ size_t getAggrBase(GroupRef gr) const { return _aggregatorSize*gr.getRef(); }
+ // Return the aggregator with the corresponding id for the requested group.
+ const expression::ResultNode & getAggrResult(uint32_t aggrId, GroupRef ref) const {
+ return _aggregator[aggrId].getResult(&_aggrBacking[getAggrBase(ref.getRef())]);
+ }
+
+ /**
+ * A ResultAccessor hides the dirty details for aggregating and accessing results
+ * stored in flat memory elsewhere.
+ * It keeps an offset that is added to get to memory storing the result.
+ * It also keeps a scratch aggregator for doing the calculation. The 'warm' method, aggregate, does
+ * r.swap(m); r.aggregate(); r.swap(m);
+ * The extra incurred cost is dual swap, in exchange for avoiding the memory cost of virtual objects.
+ * TODO: This are solutions planned to avoid the dual swaps. But so far they can be neglected as they do not occupy many cycles.
+ */
+ class ResultAccessor {
+ public:
+ ResultAccessor() : _bluePrint(NULL), _aggregator(NULL), _offset(0) { }
+ ResultAccessor(const aggregation::AggregationResult & aggregator, size_t offset);
+ void setResult(const expression::ResultNode & result, uint8_t * base) {
+ result.encode(base+_offset);
+ }
+ const expression::ResultNode & getResult(expression::ResultNode & result, const uint8_t * base) const {
+ result.decode(base+_offset);
+ return result;
+ }
+ const expression::ResultNode & getResult(const uint8_t * base) const {
+ _aggregator->getResult().decode(base+_offset);
+ return _aggregator->getResult();
+ }
+ size_t getRawByteSize() const { return _aggregator->getResult().getRawByteSize(); }
+ uint64_t radixAsc(const uint8_t * a) const { return _aggregator->getResult().radixAsc(a); }
+ uint64_t radixDesc(const uint8_t * a) const { return _aggregator->getResult().radixDesc(a); }
+ int cmp(const uint8_t * a, const uint8_t * b) const {
+ return _aggregator->getResult().cmpMem(a, b);
+ }
+ void create(uint8_t * base);
+ void destroy(uint8_t * base) { _aggregator->getResult().destroy(base+_offset); }
+ void aggregate(uint8_t * base, uint32_t docId, double rank) {
+ _aggregator->getResult().swap(base+_offset);
+ _aggregator->aggregate(docId, rank);
+ _aggregator->getResult().swap(base+_offset);
+ }
+ private:
+ const aggregation::AggregationResult * _bluePrint;
+ mutable vespalib::IdentifiablePtr<aggregation::AggregationResult> _aggregator;
+ uint32_t _offset;
+ };
+ typedef vespalib::Array<uint8_t> AggregatorBacking;
+ typedef vespalib::Array<ResultAccessor> ResultAccessorList;
+ class SortInfo {
+ public:
+ SortInfo() : _index(0), _sign(1) { }
+ SortInfo(uint8_t index, int8_t sign) : _index(index), _sign(sign) { }
+ uint8_t getIndex() const { return _index; }
+ int8_t getSign() const { return _sign; }
+ private:
+ uint8_t _index; // Which index in the aggragators should be used for sorting this level.
+ int8_t _sign; // And which way. positive number -> ascending, negative number descending.
+ };
+ size_t _aggregatorSize; // This is the bytesize required to store the aggrgate values per bucket.
+ ResultAccessorList _aggregator; // These are the accessors to use when accessing the results.
+ AggregatorBacking _aggrBacking; // This is the storage for the accessors.
+ std::vector<SortInfo> _sortInfo; // Generated cheap sortInfo, to avoid accessing more complicated data.
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/grouping/forcelink.hpp b/searchlib/src/vespa/searchlib/grouping/forcelink.hpp
new file mode 100644
index 00000000000..09496d294c7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/grouping/forcelink.hpp
@@ -0,0 +1,13 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+void forcelink_file_searchlib_grouping_groupandcollectengine();
+void forcelink_file_searchlib_grouping_groupingengine();
+void forcelink_file_searchlib_grouping_groupengine();
+
+void forcelink_searchlib_grouping() {
+ forcelink_file_searchlib_grouping_groupandcollectengine();
+ forcelink_file_searchlib_grouping_groupingengine();
+ forcelink_file_searchlib_grouping_groupengine();
+}
+
diff --git a/searchlib/src/vespa/searchlib/grouping/groupandcollectengine.cpp b/searchlib/src/vespa/searchlib/grouping/groupandcollectengine.cpp
new file mode 100644
index 00000000000..6f06960d5c5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/grouping/groupandcollectengine.cpp
@@ -0,0 +1,50 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/grouping/groupandcollectengine.h>
+
+namespace search {
+
+using namespace expression;
+using namespace aggregation;
+
+namespace grouping {
+
+GroupAndCollectEngine::GroupAndCollectEngine(const GroupingLevel * request, size_t level, GroupEngine * nextEngine, bool frozen) :
+ GroupEngine(request, level, nextEngine, frozen)
+{
+}
+
+GroupAndCollectEngine::~GroupAndCollectEngine()
+{
+}
+
+GroupRef
+GroupAndCollectEngine::group(Children & children, uint32_t docId, double rank)
+{
+ GroupRef gr(GroupEngine::group(children, docId, rank));
+ if (gr.valid()) {
+ collect(gr, docId, rank);
+ }
+ return gr;
+}
+
+void
+GroupAndCollectEngine::group(uint32_t docId, double rank)
+{
+ GroupEngine::group(docId, rank);
+ collect(GroupRef(0), docId, rank);
+}
+
+GroupRef
+GroupAndCollectEngine::createGroup(const search::expression::ResultNode & v)
+{
+ GroupRef gr(GroupEngine::createGroup(v));
+ createCollectors(gr);
+ return gr;
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_grouping_groupandcollectengine() {}
diff --git a/searchlib/src/vespa/searchlib/grouping/groupandcollectengine.h b/searchlib/src/vespa/searchlib/grouping/groupandcollectengine.h
new file mode 100644
index 00000000000..4d1aa5a49df
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/grouping/groupandcollectengine.h
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/grouping/groupengine.h>
+
+namespace search {
+namespace grouping {
+
+class GroupAndCollectEngine : public GroupEngine
+{
+public:
+ GroupAndCollectEngine(const aggregation::GroupingLevel * request, size_t level, GroupEngine * nextEngine, bool frozen);
+ ~GroupAndCollectEngine();
+private:
+ virtual GroupRef group(Children & children, uint32_t docId, double rank);
+ virtual void group(uint32_t docId, double rank);
+ virtual GroupRef createGroup(const expression::ResultNode & id);
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/grouping/groupengine.cpp b/searchlib/src/vespa/searchlib/grouping/groupengine.cpp
new file mode 100644
index 00000000000..48ecf6931ee
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/grouping/groupengine.cpp
@@ -0,0 +1,227 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/grouping/groupengine.h>
+#include <vespa/searchlib/expression/nullresultnode.h>
+#include <vespa/searchlib/common/sort.h>
+
+namespace search {
+
+using namespace expression;
+using namespace aggregation;
+
+namespace grouping {
+
+GroupEngine::GroupEngine(const GroupingLevel * request, size_t level, GroupEngine * nextEngine, bool frozen) :
+ Collect(request->getGroupPrototype()),
+ _request(request),
+ _nextEngine(nextEngine),
+ _idByteSize(0),
+ _ids(),
+ _idScratch(),
+ _rank(),
+ _groupBacking(),
+ _level(level),
+ _frozen(frozen)
+{
+ if ((request != NULL) && (level > 0)) {
+ _idScratch.reset(request->getExpression().getResult().clone());
+ } else {
+ _idScratch.reset(new NullResultNode());
+ }
+ _idByteSize = _idScratch->getRawByteSize();
+}
+
+GroupEngine::~GroupEngine()
+{
+ if (_idByteSize) {
+ for (size_t i(0), m(_ids.size()/_idByteSize); i < m; i++) {
+ _idScratch->destroy(&_ids[getIdBase(GroupRef(i))]);
+ }
+ }
+ for (size_t i(0), m(_groupBacking.size()); i < m; i++) {
+ delete _groupBacking[i];
+ }
+}
+
+GroupRef GroupEngine::group(Children & children, uint32_t docId, double rank)
+{
+ const ExpressionTree &selector = _request->getExpression();
+ if (!selector.execute(docId, rank)) {
+ throw std::runtime_error("Does not know how to handle failed select statements");
+ }
+ const ResultNode &selectResult = selector.getResult();
+ Children::iterator found = children.find<ResultNode, GroupResult, Group::ResultHash, Group::ResultEqual>(selectResult, GroupResult(*this));
+ GroupRef gr;
+ if (found == children.end()) {
+ if (_request->allowMoreGroups(children.size())) {
+ gr = createGroup(selectResult);
+ _rank.push_back(rank);
+ children.insert(gr);
+ } else {
+ return gr;
+ }
+ } else {
+ gr = *found;
+ }
+
+ if (_nextEngine != NULL) {
+ _nextEngine->group(*_groupBacking[gr], docId, rank);
+ }
+
+ return gr;
+}
+
+void GroupEngine::group(uint32_t docId, double rank)
+{
+ if (_nextEngine != NULL) {
+ _nextEngine->group(*_groupBacking[0], docId, rank);
+ }
+}
+
+void GroupEngine::merge(Children &, const GroupEngine &)
+{
+}
+
+void GroupEngine::merge(const GroupEngine & b)
+{
+ if (_nextEngine != NULL) {
+ _nextEngine->merge(*_groupBacking[0], *b._nextEngine);
+ }
+}
+
+#if 0
+int GroupEngine::cmpRank(GroupRef a, GroupRef b) const
+{
+#if 0
+ return cmpAggr(a, b);
+#else
+#if 0
+ int diff(cmpAggr(a, b));
+ return diff
+ ? diff
+ : ((_rank[a] > _rank[b])
+ ? -1
+ : ((_rank[a] < _rank[b]) ? 1 : 0));
+#else
+ return (_rank[a] > _rank[b])
+ ? -1
+ : ((_rank[a] < _rank[b]) ? 1 : 0);
+#endif
+#endif
+}
+#endif
+
+GroupRef GroupEngine::createGroup(const search::expression::ResultNode & v)
+{
+ GroupRef gr(_idByteSize ? _ids.size()/_idByteSize : 0);
+ _ids.resize(getIdBase(GroupRef(gr + 1)));
+ uint8_t * base(&_ids[getIdBase(gr)]);
+ v.create(base);
+ v.encode(base);
+ if (_nextEngine != NULL) {
+ _groupBacking.push_back(_nextEngine->createChildren().release());
+ }
+ return gr;
+}
+
+GroupRef
+GroupEngine::createFullGroup(const search::expression::ResultNode & v)
+{
+ GroupRef gr(GroupEngine::createGroup(v));
+ createCollectors(gr);
+ return gr;
+}
+
+namespace {
+class RadixAccess {
+public:
+ RadixAccess(const uint64_t * v) : _radix(v) { }
+ uint64_t operator () (size_t i) const { return _radix[i]; }
+private:
+ const uint64_t * _radix;
+};
+}
+
+Group::UP GroupEngine::getGroup(GroupRef ref) const
+{
+ Group::UP p(new Group(_request->getGroupPrototype()));
+ Group & g(*p);
+ g.setId(getGroupId(ref));
+ g.setRank(_rank[ref]);
+ if (_nextEngine != NULL) {
+ const Children & ch(*_groupBacking[ref]);
+ std::vector<GroupRef> v(ch.size());
+ {
+ size_t i(0);
+ for (Children::const_iterator it(ch.begin()), mt(ch.end()); it != mt; it++) {
+ v[i++] = *it;
+ }
+ }
+ uint64_t maxN(_nextEngine->_request->getPrecision());
+ if (maxN < v.size()) {
+#if 0
+ std::sort(v.begin(), v.end(), GroupRankLess(*_nextEngine));
+#else
+ size_t radixSorted;
+ if (_nextEngine->hasSpecifiedOrder()) {
+ uint64_t * radixCache = new uint64_t[v.size()];
+ if (_nextEngine->isPrimarySortKeyAscending()) {
+ for (size_t i(0); i < v.size(); i++) {
+ radixCache[i] = _nextEngine->radixAggrAsc(GroupRef(i));
+ }
+ } else {
+ for (size_t i(0); i < v.size(); i++) {
+ radixCache[i] = _nextEngine->radixAggrDesc(GroupRef(i));
+ }
+ }
+ radixSorted = ShiftBasedRadixSorter<GroupRef, RadixAccess, GroupRankLess, 56>::
+ radix_sort(RadixAccess(radixCache), GroupRankLess(*_nextEngine), &v[0], v.size(), 16, maxN);
+ delete [] radixCache;
+ } else {
+ radixSorted = ShiftBasedRadixSorter<GroupRef, GroupRankRadix, GroupRankLess, 56>::
+ radix_sort(GroupRankRadix(*_nextEngine), GroupRankLess(*_nextEngine), &v[0], v.size(), 16, maxN);
+ }
+ assert(radixSorted >= maxN);
+ assert(radixSorted <= v.size());
+ v.resize(radixSorted);
+ std::sort(v.begin(), v.end(), GroupRankLess(*_nextEngine));
+#endif
+ v.resize(maxN);
+ }
+ std::sort(v.begin(), v.end(), GroupIdLess(*_nextEngine));
+ for (size_t i(0); i < v.size(); i++) {
+ g.addChild(_nextEngine->getGroup(v[i]));
+ }
+ }
+ getCollectors(ref, g);
+ return p;
+}
+
+GroupRef
+GroupEngine::preFillEngine(const Group & r, size_t depth)
+{
+ GroupRef gr;
+ if (depth >= _level) {
+ gr = (r.hasId())
+ ? createFullGroup(r.getId())
+ : createFullGroup(NullResultNode());
+ _rank.push_back(r.getRank());
+ if (_nextEngine != NULL) {
+ Children & ch(*_groupBacking[gr]);
+ for (size_t i(0), m(r.getChildrenSize()); i < m; i++) {
+ GroupRef tmp = _nextEngine->preFillEngine(r.getChild(i), depth);
+ if (tmp.valid()) {
+ ch.insert(tmp);
+ }
+ }
+ }
+ preFill(gr, r);
+ }
+ return gr;
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_grouping_groupengine() {}
diff --git a/searchlib/src/vespa/searchlib/grouping/groupengine.h b/searchlib/src/vespa/searchlib/grouping/groupengine.h
new file mode 100644
index 00000000000..4ac29d77b3d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/grouping/groupengine.h
@@ -0,0 +1,139 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/aggregation/groupinglevel.h>
+#include <vespa/searchlib/grouping/collect.h>
+
+namespace search {
+namespace grouping {
+
+class GroupEngine : protected Collect
+{
+public:
+ class GroupHash {
+ public:
+ GroupHash(const GroupEngine & engine) : _engine(engine) { }
+ uint32_t operator () (GroupRef a) const { return _engine.hash(a); }
+ private:
+ const GroupEngine & _engine;
+ };
+ class GroupEqual {
+ public:
+ GroupEqual(const GroupEngine & engine) : _engine(engine) { }
+ bool operator () (GroupRef a, GroupRef b) const { return _engine.cmpId(a, b) == 0; }
+ private:
+ const GroupEngine & _engine;
+ };
+ class GroupIdLess {
+ public:
+ GroupIdLess(const GroupEngine & engine) : _engine(engine) { }
+ bool operator () (GroupRef a, GroupRef b) const { return _engine.cmpId(a, b) < 0; }
+ private:
+ const GroupEngine & _engine;
+ };
+ class GroupRankRadix {
+ public:
+ GroupRankRadix(const GroupEngine & engine) : _engine(engine) { }
+ uint64_t operator () (GroupRef a) const { return _engine.rankRadix(a); }
+ private:
+ const GroupEngine & _engine;
+ };
+ class GroupRankLess {
+ public:
+ GroupRankLess(const GroupEngine & engine) : _engine(engine) { }
+ bool operator () (GroupRef a, GroupRef b) const { return _engine.cmpRank(a, b) < 0; }
+ private:
+ const GroupEngine & _engine;
+ };
+ class GroupResult {
+ public:
+ GroupResult(const GroupEngine & engine) : _engine(engine) { }
+ const expression::ResultNode & operator() (GroupRef v) const { return _engine.getGroupId(v); }
+ private:
+ const GroupEngine & _engine;
+ };
+
+ typedef vespalib::hash_set<GroupRef, GroupHash, GroupEqual> Children;
+
+ /**
+ * @param request The request creating this engine.
+ * @param level This is my level. 0 is the top level.
+ * @param nextEngine This is the engine handling the next level.
+ * @param frozen Tell if this level can create new groups or not.
+ */
+ GroupEngine(const aggregation::GroupingLevel * request, size_t level, GroupEngine * nextEngine, bool frozen);
+ virtual ~GroupEngine();
+
+ /**
+ * @param children The list of children already present.
+ * @param docId The docid of the hit
+ * @param rank The rank of the hit
+ **/
+ virtual GroupRef group(Children & children, uint32_t docId, double rank);
+ virtual void group(uint32_t docId, double rank);
+ virtual void merge(Children & children, const GroupEngine & b);
+ virtual void merge(const GroupEngine & b);
+
+ std::unique_ptr<Children> createChildren() { return std::unique_ptr<Children>(new Children(0, GroupHash(*this), GroupEqual(*this))); }
+
+ virtual aggregation::Group::UP getGroup(GroupRef ref) const;
+ aggregation::Group::UP getRootGroup() const { return getGroup(GroupRef(0)); }
+
+ GroupRef preFillEngine(const aggregation::Group & r, size_t depth);
+
+protected:
+ GroupEngine(const aggregation::GroupingLevel * request, size_t level);
+ void groupNext(uint32_t docId, double rank);
+ virtual GroupRef createGroup(const expression::ResultNode & id);
+private:
+ int cmpRank(GroupRef a, GroupRef b) const {
+ //Here there is room for improvement
+ //Most critical inner loop.
+#if 0
+ return cmpAggr(a, b);
+#else
+#if 1
+ int diff(cmpAggr(a, b));
+ return diff
+ ? diff
+ : ((_rank[a] > _rank[b])
+ ? -1
+ : ((_rank[a] < _rank[b]) ? 1 : 0));
+#else
+ return (_rank[a] > _rank[b])
+ ? -1
+ : ((_rank[a] < _rank[b]) ? 1 : 0);
+#endif
+#endif
+ }
+ size_t hash(GroupRef a) const { return _idScratch->hash(&_ids[getIdBase(a)]); }
+ uint64_t rankRadix(GroupRef a) const { return vespalib::convertForSort<double, false>::convert(_rank[a]); }
+ int cmpId(GroupRef a, GroupRef b) const {
+ return _idScratch->cmpMem(&_ids[getIdBase(a)], &_ids[getIdBase(b)]);
+ }
+ GroupRef createFullGroup(const expression::ResultNode & id);
+ const expression::ResultNode & getGroupId(GroupRef ref) const { return getGroupId(ref, *_idScratch); }
+ const expression::ResultNode & getGroupId(GroupRef ref, expression::ResultNode & r) const {
+ r.decode(&_ids[getIdBase(ref)]);
+ return r;
+ }
+ size_t getIdBase(GroupRef g) const { return _idByteSize*g; }
+
+ typedef expression::ResultNodeVector::UP IdList;
+ typedef vespalib::Array<Children *> GroupBacking;
+ typedef std::vector<double> RankV;
+ typedef vespalib::Array<uint8_t> IdBacking;
+
+ const aggregation::GroupingLevel * _request;
+ GroupEngine * _nextEngine; // This is the engine for the next level.
+ size_t _idByteSize; // Correct fixed size of memory needed for one id.
+ IdBacking _ids; // These are all the group ids at this level.
+ expression::ResultNode::UP _idScratch; // Used for typing the ids.
+ RankV _rank; // This is the rank of the group. TODO handle with ordinary aggregator.
+ GroupBacking _groupBacking; // These are all the children at this level. Vector<HashTable<GroupRef()>>
+ size_t _level; // This is my level
+ bool _frozen; // If set no more groups will be created at this level.
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/grouping/groupingengine.cpp b/searchlib/src/vespa/searchlib/grouping/groupingengine.cpp
new file mode 100644
index 00000000000..ec34af16662
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/grouping/groupingengine.cpp
@@ -0,0 +1,110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/grouping/groupingengine.h>
+#include <vespa/searchlib/grouping/groupandcollectengine.h>
+
+namespace search {
+
+using namespace aggregation;
+using namespace expression;
+
+namespace grouping {
+
+GroupingEngine::GroupingEngine(Grouping & request) :
+ _request(request),
+ _levels(),
+ _rootRequestLevel()
+{
+ const Grouping::GroupingLevelList & gll(request.getLevels());
+ assert(request.getLastLevel() <= gll.size());
+ bool collectLastLevel(request.getLastLevel() == gll.size());
+ _levels.resize(request.getLastLevel() + ((gll.size()==request.getLastLevel()) ? 0 : 1) + 1); // 1 for inclusive, 1 for artificial root
+ GroupEngine * nextEngine(NULL);
+ for (size_t i(_levels.size()); i-- > 1; ) {
+ const GroupingLevel & l = gll[i-1];
+ if (i > request.getFirstLevel()) {
+ if ((i-1) == request.getLastLevel()) {
+ if (collectLastLevel) {
+ _levels[i] = new GroupAndCollectEngine(&l, i, nextEngine, false);
+ } else {
+ _levels[i] = new GroupEngine(&l, i, nextEngine, false);
+ }
+ } else {
+ _levels[i] = new GroupAndCollectEngine(&l, i, nextEngine, false);
+ }
+ } else {
+ // This should be a frozen level
+ if (i == request.getFirstLevel()) {
+ _levels[i] = new GroupAndCollectEngine(&l, i, nextEngine, true);
+ } else {
+ _levels[i] = new GroupEngine(&l, i, nextEngine, true);
+ }
+ }
+ nextEngine = _levels[i];
+ }
+
+ fillRootRequest(request.getRoot());
+ if (0 >= request.getFirstLevel()) {
+ _levels[0] = new GroupAndCollectEngine(&_rootRequestLevel, 0, nextEngine, true);
+ } else {
+ _levels[0] = new GroupEngine(&_rootRequestLevel, 0, nextEngine, true);
+ }
+ preFillEngines(request.getRoot(), request.getFirstLevel());
+}
+
+void
+GroupingEngine::preFillEngines(const Group & r, size_t levels)
+{
+ if (_levels.size() > levels) {
+ _levels[0]->preFillEngine(r, levels);
+ }
+}
+
+void
+GroupingEngine::fillRootRequest(const Group & r)
+{
+ _rootRequestLevel.setMaxGroups(1).setPresicion(1).freeze();
+ for (size_t i(0), m(r.getAggrSize()); i < m; i++) {
+ _rootRequestLevel.addResult(r.getAggregationResult(i));
+ }
+}
+
+GroupingEngine::~GroupingEngine()
+{
+ for (size_t i(0); i < _levels.size(); i++) {
+ delete _levels[i];
+ _levels[i] = 0;
+ }
+}
+
+void
+GroupingEngine::aggregate(const RankedHit * rankedHit, unsigned int len)
+{
+ _request.preAggregate( ! _request.needResort());
+ if ( ! _levels.empty() ) {
+ len = _request.getMaxN(len);
+ for (size_t i(0); i < len; i++) {
+ const RankedHit & r(rankedHit[i]);
+ _levels[0]->group(r.getDocId(), r.getRank());
+ }
+ }
+ _request.postAggregate();
+}
+
+Group::UP
+GroupingEngine::createResult() const
+{
+ return _levels[0]->getRootGroup();
+}
+
+void GroupingEngine::merge(const GroupingEngine & b)
+{
+ _levels[0]->merge(*b._levels[0]);
+}
+
+}
+
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_grouping_groupingengine() {}
diff --git a/searchlib/src/vespa/searchlib/grouping/groupingengine.h b/searchlib/src/vespa/searchlib/grouping/groupingengine.h
new file mode 100644
index 00000000000..00187a0c818
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/grouping/groupingengine.h
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/aggregation/grouping.h>
+#include <vespa/searchlib/grouping/groupengine.h>
+
+namespace search {
+namespace grouping {
+
+class GroupingEngine : private vespalib::noncopyable
+{
+public:
+ typedef std::vector<GroupEngine *> GroupEngines;
+public:
+ GroupingEngine(aggregation::Grouping & request);
+ GroupingEngine(vespalib::nbostream & request, bool oldWay);
+ ~GroupingEngine();
+ vespalib::nbostream & serializeOldWay(vespalib::nbostream & request) const;
+ vespalib::nbostream & serialize(vespalib::nbostream & request) const;
+ void aggregate(const RankedHit * rankedHit, unsigned int len);
+ void merge(const GroupingEngine & b);
+ aggregation::Group::UP createResult() const;
+ const GroupEngines & getEngines() const { return _levels; }
+private:
+ void fillRootRequest(const aggregation::Group & r);
+ void preFillEngines(const aggregation::Group & r, size_t levels);
+ aggregation::Grouping & _request;
+ GroupEngines _levels;
+ aggregation::GroupingLevel _rootRequestLevel;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/grouping/groupref.h b/searchlib/src/vespa/searchlib/grouping/groupref.h
new file mode 100644
index 00000000000..bcc56172be6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/grouping/groupref.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <stdint.h>
+
+namespace search {
+namespace grouping {
+
+class GroupRef
+{
+public:
+ GroupRef() : _ref(-1) { }
+ GroupRef(uint32_t ref) : _ref(ref) { }
+ uint32_t getRef() const { return _ref; }
+ bool valid() const { return _ref != static_cast<uint32_t>(-1); }
+ operator uint32_t () const { return getRef(); }
+private:
+ uint32_t _ref;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/grouping/hyperloglog.h b/searchlib/src/vespa/searchlib/grouping/hyperloglog.h
new file mode 100644
index 00000000000..7ef731f833b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/grouping/hyperloglog.h
@@ -0,0 +1,140 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "sketch.h"
+#include <vespa/document/util/compressionconfig.h>
+#include <vespa/document/util/compressor.h>
+#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/objects/deserializer.h>
+#include <vespa/vespalib/objects/serializer.h>
+#include <vespa/vespalib/util/buffer.h>
+#include <algorithm>
+
+namespace search {
+
+// How many elements are required before we use a normal sketch representation.
+const uint32_t SPARSE_SKETCH_LIMIT = 255;
+
+/**
+ * Decorator to SparseSketch handling the switch to NormalSketch
+ * representation. It holds a reference to HyperLogLog::_sketch, which
+ * is a unique pointer initially pointing to this class. By resetting
+ * that pointer to a new sketch class, this class is deleted. By
+ * having the logic for exchanging the sketch class here, we remove it
+ * along with the sparse representation once the switch is made.
+ */
+template <int BucketBits = 10, typename HashT = uint32_t>
+class ExchangerSketch : public SparseSketch<BucketBits, HashT> {
+ typename Sketch<BucketBits, HashT>::UP &_sketch_ptr;
+
+ virtual int aggregate(HashT hash) override {
+ if (this->getSize() > SPARSE_SKETCH_LIMIT) {
+ NormalSketch<BucketBits, HashT> *normal_sketch =
+ new NormalSketch<BucketBits, HashT>;
+ normal_sketch->merge(*this);
+ _sketch_ptr.reset(normal_sketch); // deletes this
+ return normal_sketch->aggregate(hash);
+ }
+ return SparseSketch<BucketBits, HashT>::aggregate(hash);
+ }
+public:
+ ExchangerSketch(typename Sketch<BucketBits, HashT>::UP &sketch_ptr)
+ : _sketch_ptr(sketch_ptr) {}
+};
+
+/**
+ * HyperLogLog is used to estimate the number of unique hashes seen.
+ */
+template <int BucketBits = 10, typename HashT = uint32_t>
+class HyperLogLog {
+ typename Sketch<BucketBits, HashT>::UP _sketch;
+
+public:
+ typedef HashT hash_type;
+ enum { bucketBits = BucketBits };
+
+ // Initialize ExchangerSketch with a reference to _sketch.
+ HyperLogLog() : _sketch(new ExchangerSketch<BucketBits, HashT>(_sketch)) {}
+ HyperLogLog(const HyperLogLog<BucketBits, HashT> &other)
+ : HyperLogLog() {
+ merge(other);
+ }
+ HyperLogLog<BucketBits, HashT> &operator=(
+ const HyperLogLog<BucketBits, HashT> &other) {
+ _sketch.reset(new ExchangerSketch<BucketBits, HashT>(_sketch));
+ merge(other);
+ return *this;
+ }
+
+ // Aggregates a hash value into the sketch.
+ int aggregate(HashT hash) { return _sketch->aggregate(hash); }
+ void merge(const HyperLogLog<BucketBits, HashT> &other);
+ void serialize(vespalib::Serializer &os) const;
+ void deserialize(vespalib::Deserializer &is);
+
+ const Sketch<BucketBits, HashT> &getSketch() const { return *_sketch; }
+};
+
+
+template <int BucketBits, typename HashT>
+void HyperLogLog<BucketBits, HashT>::
+merge(const HyperLogLog<BucketBits, HashT> &other) {
+ typedef SparseSketch<BucketBits, HashT> Sparse;
+ typedef NormalSketch<BucketBits, HashT> Normal;
+
+ if (_sketch->getClassId() == Sparse::classId) {
+ Sparse &sparse = static_cast<Sparse &>(*_sketch);
+ if (other.getSketch().getClassId() == Sparse::classId) {
+ const Sparse &other_sparse =
+ static_cast<const Sparse &>(other.getSketch());
+ sparse.merge(other_sparse);
+ if (sparse.getSize() > SPARSE_SKETCH_LIMIT) {
+ typename Normal::UP new_sketch(new Normal);
+ new_sketch->merge(sparse);
+ _sketch.reset(new_sketch.release());
+ }
+ } else { // other is NormalSketch
+ const Normal &other_normal =
+ static_cast<const Normal &>(other.getSketch());
+ typename Normal::UP new_sketch(new Normal(other_normal));
+ new_sketch->merge(sparse);
+ _sketch.reset(new_sketch.release());
+ }
+ } else { // NormalSketch
+ Normal &normal = static_cast<Normal &>(*_sketch);
+ if (other.getSketch().getClassId() == Sparse::classId) {
+ const Sparse &other_sparse =
+ static_cast<const Sparse &>(other.getSketch());
+ normal.merge(other_sparse);
+ } else { // other is NormalSketch
+ const Normal &other_normal =
+ static_cast<const Normal &>(other.getSketch());
+ normal.merge(other_normal);
+ }
+ }
+}
+
+template <int BucketBits, typename HashT>
+void HyperLogLog<BucketBits, HashT>::
+serialize(vespalib::Serializer &os) const {
+ os << _sketch->getClassId();
+ _sketch->serialize(os);
+}
+
+template <int BucketBits, typename HashT>
+void HyperLogLog<BucketBits, HashT>::
+deserialize(vespalib::Deserializer &is) {
+ uint32_t type;
+ is >> type;
+ if (type == SparseSketch<BucketBits, HashT>::classId) {
+ _sketch.reset(new ExchangerSketch<BucketBits, HashT>(_sketch));
+ _sketch->deserialize(is);
+ } else if (type == NormalSketch<BucketBits, HashT>::classId) {
+ _sketch.reset(new NormalSketch<BucketBits, HashT>);
+ _sketch->deserialize(is);
+ }
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/grouping/sketch.h b/searchlib/src/vespa/searchlib/grouping/sketch.h
new file mode 100644
index 00000000000..0a475a9e805
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/grouping/sketch.h
@@ -0,0 +1,260 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/document/util/compressionconfig.h>
+#include <vespa/document/util/compressor.h>
+#include <lz4.h>
+#include <vespa/searchlib/common/identifiable.h>
+#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/objects/deserializer.h>
+#include <vespa/vespalib/objects/identifiable.h>
+#include <vespa/vespalib/objects/serializer.h>
+#include <algorithm>
+#include <unordered_set>
+
+namespace search {
+
+template <int BucketBits, typename HashT> struct NormalSketch;
+
+/**
+ * Sketch interface.
+ */
+template <int BucketBits, typename HashT>
+struct Sketch {
+ enum { bucketBits = BucketBits };
+ typedef HashT hash_type;
+ typedef Sketch<BucketBits, HashT> SketchType;
+ typedef std::unique_ptr<SketchType> UP;
+
+ static const HashT BUCKET_COUNT = HashT(1) << BucketBits;
+ static const HashT BUCKET_MASK = BUCKET_COUNT - 1;
+
+ virtual ~Sketch() {}
+
+ virtual int aggregate(HashT hash) = 0;
+
+ virtual uint32_t getClassId() const = 0;
+ virtual void serialize(vespalib::Serializer &os) const = 0;
+ virtual void deserialize(vespalib::Deserializer &is) = 0;
+
+ virtual bool operator==(const SketchType &other) const = 0;
+ virtual void print(std::ostream &out) const = 0;
+};
+template <int BucketBits, typename HashT>
+std::ostream &operator<<(std::ostream &o, const Sketch<BucketBits, HashT> &s) {
+ o << "[";
+ s.print(o);
+ return o << " ]";
+}
+
+
+template <typename T>
+uint8_t countPrefixZeros(T t) {
+ uint8_t count = 0;
+ const T FIRST_BIT = T(1) << ((sizeof(T) * 8) - 1);
+ while (!((t << count) & FIRST_BIT)) {
+ ++count;
+ }
+ return ++count;
+}
+
+
+/**
+ * Sketch containing a set of hashes
+ */
+template <int BucketBits = 10, typename HashT = uint32_t>
+struct SparseSketch : Sketch<BucketBits, HashT> {
+ using typename Sketch<BucketBits, HashT>::SketchType;
+ enum { classId = IDENTIFIABLE_CLASSID_NS(search, SparseSketch) };
+
+ struct IdentityHash {
+ size_t operator()(HashT hash) const { return hash; }
+ };
+ std::unordered_set<HashT, IdentityHash> hash_set;
+
+ size_t getSize() const { return hash_set.size(); }
+
+ virtual int aggregate(HashT hash) override {
+ return hash_set.insert(hash).second ? 1 : 0;
+ }
+
+ virtual uint32_t getClassId() const override { return classId; }
+ virtual void serialize(vespalib::Serializer &os) const override;
+ virtual void deserialize(vespalib::Deserializer &is) override;
+
+ virtual bool operator==(const SketchType &other) const override {
+ const SparseSketch<BucketBits, HashT> *other_sparse =
+ dynamic_cast<const SparseSketch<BucketBits, HashT> *>(&other);
+ if (!other_sparse) {
+ return false;
+ }
+ if (hash_set.size() != other_sparse->hash_set.size()) {
+ return false;
+ }
+ for (auto hash : hash_set) {
+ if (other_sparse->hash_set.count(hash) == 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ virtual void print(std::ostream &out) const override {
+ out << " (" << hash_set.size() << " elements)";
+ for (auto hash : hash_set) {
+ out << " 0x" << std::hex;
+ out.width(8);
+ out.fill('0');
+ out << hash;
+ }
+ }
+
+ void merge(const SparseSketch<BucketBits, HashT> &other) {
+ hash_set.insert(other.hash_set.begin(), other.hash_set.end());
+ }
+};
+
+
+/**
+ * Sketch containing a fixed number of buckets
+ */
+template <int BucketBits = 10, typename HashT = uint32_t>
+struct NormalSketch : Sketch<BucketBits, HashT> {
+ using typename Sketch<BucketBits, HashT>::SketchType;
+ using Sketch<BucketBits, HashT>::BUCKET_COUNT;
+ using Sketch<BucketBits, HashT>::BUCKET_MASK;
+ typedef std::unique_ptr<NormalSketch> UP;
+ enum { classId = IDENTIFIABLE_CLASSID_NS(search, NormalSketch) };
+
+ uint8_t bucket[BUCKET_COUNT];
+
+ NormalSketch() { memset(&bucket[0], 0, BUCKET_COUNT); }
+
+ virtual int aggregate(HashT hash) override {
+ uint8_t existing_value = bucket[hash & BUCKET_MASK];
+ uint8_t new_value = countPrefixZeros(hash | BUCKET_MASK);
+ if (new_value > existing_value) {
+ bucket[hash & BUCKET_MASK] = new_value;
+ return new_value - existing_value;
+ }
+ return 0;
+ }
+
+ uint32_t compress_buckets_into(char *buffer, uint32_t size) const;
+ void decompress_buckets_from(char *buffer, uint32_t size);
+ virtual uint32_t getClassId() const override { return classId; }
+ virtual void serialize(vespalib::Serializer &os) const override;
+ virtual void deserialize(vespalib::Deserializer &is) override;
+
+ virtual bool operator==(const SketchType &other) const override {
+ const NormalSketch<BucketBits, HashT> *other_normal =
+ dynamic_cast<const NormalSketch<BucketBits, HashT> *>(&other);
+ if (!other_normal) {
+ return false;
+ }
+ for (size_t i = 0; i < BUCKET_COUNT; ++i) {
+ if (other_normal->bucket[i] != bucket[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ virtual void print(std::ostream &out) const override {
+ for (size_t i = 0; i < BUCKET_COUNT; ++i) {
+ out << " " << int(bucket[i]);
+ }
+ }
+
+ void merge(const NormalSketch<BucketBits, HashT> &other) {
+ std::transform(bucket, bucket + BUCKET_COUNT, other.bucket, bucket,
+ [](uint8_t a, uint8_t b) { return std::max(a, b); });
+ }
+
+ void merge(const SparseSketch<BucketBits, HashT> &other) {
+ for (auto hash : other.hash_set) {
+ aggregate(hash);
+ }
+ }
+};
+
+
+template <int BucketBits, typename HashT>
+void SparseSketch<BucketBits, HashT>::
+serialize(vespalib::Serializer &os) const {
+ uint32_t size = hash_set.size();
+ os << size;
+ for (HashT hash : hash_set) {
+ os << hash;
+ }
+}
+template <int BucketBits, typename HashT>
+void SparseSketch<BucketBits, HashT>::
+deserialize(vespalib::Deserializer &is) {
+ uint32_t size;
+ is >> size;
+ for (uint32_t i = 0; i < size; ++i) {
+ uint32_t hash;
+ is >> hash;
+ aggregate(hash);
+ }
+}
+
+template <int BucketBits, typename HashT>
+uint32_t NormalSketch<BucketBits, HashT>::
+compress_buckets_into(char *buffer, uint32_t size) const {
+ document::CompressionConfig config(document::CompressionConfig::LZ4, 9, 9);
+ vespalib::ConstBufferRef org(&bucket[0], BUCKET_COUNT);
+ vespalib::DataBuffer compress_buffer(buffer, size);
+ document::CompressionConfig::Type r =
+ document::compress(config, org, compress_buffer, false);
+ assert(compress_buffer.getDead() == buffer);
+ if (r == document::CompressionConfig::LZ4) {
+ assert(compress_buffer.getDataLen() < BUCKET_COUNT);
+ return compress_buffer.getDataLen();
+ } else {
+ assert(BUCKET_COUNT <= size);
+ memcpy(buffer, bucket, BUCKET_COUNT);
+ return BUCKET_COUNT;
+ }
+}
+template <int BucketBits, typename HashT>
+void NormalSketch<BucketBits, HashT>::
+decompress_buckets_from(char *buffer, uint32_t size) {
+ if (size == BUCKET_COUNT) { // not compressed
+ memcpy(bucket, buffer, BUCKET_COUNT);
+ } else {
+ vespalib::ConstBufferRef compressed(buffer, size);
+ vespalib::DataBuffer uncompressed(reinterpret_cast<char *>(&bucket[0]),
+ BUCKET_COUNT);
+ document::decompress(document::CompressionConfig::LZ4, BUCKET_COUNT,
+ compressed, uncompressed, false);
+ }
+}
+template <int BucketBits, typename HashT>
+void NormalSketch<BucketBits, HashT>::
+serialize(vespalib::Serializer &os) const {
+ vespalib::DefaultAlloc backing(LZ4_compressBound(BUCKET_COUNT));
+ char * compress_array(static_cast<char *>(backing.get()));
+ uint32_t size =
+ compress_buckets_into(compress_array, backing.size());
+ os << BUCKET_COUNT << size;
+ for (size_t i = 0; i < size; ++i) {
+ os << static_cast<uint8_t>(compress_array[i]);
+ }
+}
+template <int BucketBits, typename HashT>
+void NormalSketch<BucketBits, HashT>::
+deserialize(vespalib::Deserializer &is) {
+ uint32_t bucket_count, size;
+ is >> bucket_count >> size;
+ assert(bucket_count == BUCKET_COUNT);
+ uint8_t compressed_array[BUCKET_COUNT];
+ for (size_t i = 0; i < size; ++i) {
+ is >> compressed_array[i];
+ }
+ decompress_buckets_from(reinterpret_cast<char *>(compressed_array), size);
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/index/.gitignore b/searchlib/src/vespa/searchlib/index/.gitignore
new file mode 100644
index 00000000000..ee8938b6bf4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/.gitignore
@@ -0,0 +1,6 @@
+*.So
+*.exe
+*.ilk
+*.pdb
+.depend*
+Makefile
diff --git a/searchlib/src/vespa/searchlib/index/CMakeLists.txt b/searchlib/src/vespa/searchlib/index/CMakeLists.txt
new file mode 100644
index 00000000000..0fa012ab51e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_searchlib_index OBJECT
+ SOURCES
+ dictionaryfile.cpp
+ docbuilder.cpp
+ docidandfeatures.cpp
+ doctypebuilder.cpp
+ dummyfileheadercontext.cpp
+ indexbuilder.cpp
+ olddictionaryfile.cpp
+ postinglisthandle.cpp
+ postinglistcounts.cpp
+ postinglistcountfile.cpp
+ postinglistfile.cpp
+ postinglistparams.cpp
+ schemautil.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/index/OWNERS b/searchlib/src/vespa/searchlib/index/OWNERS
new file mode 100644
index 00000000000..64735d11d93
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/OWNERS
@@ -0,0 +1 @@
+tegge
diff --git a/searchlib/src/vespa/searchlib/index/bitvectorkeys.h b/searchlib/src/vespa/searchlib/index/bitvectorkeys.h
new file mode 100644
index 00000000000..2a1e33026af
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/bitvectorkeys.h
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+namespace search
+{
+
+namespace index
+{
+
+class BitVectorWordSingleKey
+{
+public:
+ uint64_t _wordNum;
+ uint32_t _numDocs;
+ uint32_t _pad;
+
+ BitVectorWordSingleKey(void)
+ : _wordNum(0),
+ _numDocs(0),
+ _pad(0)
+ {
+ }
+
+ bool
+ operator<(const BitVectorWordSingleKey &rhs) const
+ {
+ return _wordNum < rhs._wordNum;
+ }
+
+ bool
+ operator==(const BitVectorWordSingleKey &rhs) const
+ {
+ return _wordNum == rhs._wordNum;
+ }
+};
+
+} // namespace index
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/index/dictionaryfile.cpp b/searchlib/src/vespa/searchlib/index/dictionaryfile.cpp
new file mode 100644
index 00000000000..9915e2c56e1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/dictionaryfile.cpp
@@ -0,0 +1,45 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".index.dictionaryfile");
+#include "dictionaryfile.h"
+
+namespace search
+{
+
+namespace index
+{
+
+
+DictionaryFileSeqRead::~DictionaryFileSeqRead(void)
+{
+}
+
+
+DictionaryFileSeqWrite::~DictionaryFileSeqWrite(void)
+{
+}
+
+
+DictionaryFileRandRead::DictionaryFileRandRead(void)
+ : _memoryMapped(false)
+{
+}
+
+
+DictionaryFileRandRead::~DictionaryFileRandRead(void)
+{
+}
+
+
+void
+DictionaryFileRandRead::afterOpen(FastOS_FileInterface &file)
+{
+ _memoryMapped = file.MemoryMapPtr(0) != NULL;
+}
+
+
+} // namespace index
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/index/dictionaryfile.h b/searchlib/src/vespa/searchlib/index/dictionaryfile.h
new file mode 100644
index 00000000000..8a3f101ba99
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/dictionaryfile.h
@@ -0,0 +1,138 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "postinglistcounts.h"
+#include "postinglisthandle.h"
+#include "postinglistcountfile.h"
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include <map>
+#include <vector>
+#include <string>
+#include <limits>
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+namespace index
+{
+
+/**
+ * Interface for dictionary file containing words and counts for words.
+ */
+class DictionaryFileSeqRead : public PostingListCountFileSeqRead
+{
+public:
+ DictionaryFileSeqRead(void)
+ {
+ }
+
+ virtual
+ ~DictionaryFileSeqRead(void);
+
+ /**
+ * Read word and counts. Only nonzero counts are returned. If at
+ * end of dictionary then noWordNumHigh() is returned as word number.
+ */
+ virtual void
+ readWord(vespalib::string &word,
+ uint64_t &wordNum,
+ PostingListCounts &counts) = 0;
+
+ /**
+ * Open dictionary file for sequential read.
+ */
+ virtual bool
+ open(const vespalib::string &name, const TuneFileSeqRead &tuneFileRead) = 0;
+
+ static uint64_t
+ noWordNum(void)
+ {
+ return 0u;
+ }
+
+ static uint64_t
+ noWordNumHigh(void)
+ {
+ return std::numeric_limits<uint64_t>::max();
+ }
+};
+
+/**
+ * Interface for dictionary file containing words and count for words.
+ */
+class DictionaryFileSeqWrite : public PostingListCountFileSeqWrite
+{
+protected:
+public:
+ DictionaryFileSeqWrite(void)
+ {
+ }
+
+ virtual
+ ~DictionaryFileSeqWrite(void);
+
+ /**
+ * Write word and counts. Only nonzero counts should be supplied.
+ */
+ virtual void
+ writeWord(const vespalib::stringref &word,
+ const PostingListCounts &counts) = 0;
+};
+
+
+/**
+ * Interface for dictionary file containing words and counts.
+ */
+class DictionaryFileRandRead
+{
+protected:
+ // Can be examined after open
+ bool _memoryMapped;
+public:
+ DictionaryFileRandRead(void);
+
+ virtual
+ ~DictionaryFileRandRead(void);
+
+ virtual bool
+ lookup(const vespalib::stringref &word,
+ uint64_t &wordNum,
+ PostingListOffsetAndCounts &offsetAndCounts) = 0;
+
+ /**
+ * Open dictionary file for random read.
+ */
+ virtual bool
+ open(const vespalib::string &name,
+ const TuneFileRandRead &tuneFileRead) = 0;
+
+ /**
+ * Close dictionary file.
+ */
+ virtual bool
+ close(void) = 0;
+
+ bool
+ getMemoryMapped(void) const
+ {
+ return _memoryMapped;
+ }
+
+ virtual uint64_t
+ getNumWordIds(void) const = 0;
+protected:
+ void
+ afterOpen(FastOS_FileInterface &file);
+};
+
+} // namespace index
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/index/docbuilder.cpp b/searchlib/src/vespa/searchlib/index/docbuilder.cpp
new file mode 100644
index 00000000000..fc362b6a306
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/docbuilder.cpp
@@ -0,0 +1,930 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".index.docbuilder");
+#include "docbuilder.h"
+#include "doctypebuilder.h"
+#include <vespa/document/datatype/annotationtype.h>
+#include <vespa/document/datatype/datatypes.h>
+#include <vespa/document/datatype/urldatatype.h>
+#include <vespa/document/fieldvalue/fieldvalues.h>
+#include <vespa/document/repo/documenttyperepo.h>
+#include <vespa/fastlib/text/unicodeutil.h>
+#include <vespa/vespalib/geo/zcurve.h>
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/tensor/tensor.h>
+
+using namespace document;
+using namespace search::index;
+using vespalib::Utf8Reader;
+using vespalib::Utf8Writer;
+using vespalib::geo::ZCurve;
+
+namespace {
+
+void
+insertStr(const Schema::Field & sfield, document::FieldValue * fvalue, const vespalib::string & val)
+{
+ if (sfield.getDataType() == Schema::STRING ||
+ sfield.getDataType() == Schema::RAW)
+ {
+ (dynamic_cast<LiteralFieldValueB *>(fvalue))->setValue(val);
+ } else {
+ throw DocBuilder::Error(vespalib::make_string("Field '%s' not compatible", sfield.getName().c_str()));
+ }
+}
+
+void
+insertInt(const Schema::Field & sfield, document::FieldValue * fvalue, int64_t val)
+{
+ if (sfield.getDataType() == Schema::INT8) {
+ (dynamic_cast<ByteFieldValue *>(fvalue))->setValue((uint8_t)val);
+ } else if (sfield.getDataType() == Schema::INT16) {
+ (dynamic_cast<ShortFieldValue *>(fvalue))->setValue((int16_t)val);
+ } else if (sfield.getDataType() == Schema::INT32) {
+ (dynamic_cast<IntFieldValue *>(fvalue))->setValue((int32_t)val);
+ } else if (sfield.getDataType() == Schema::INT64) {
+ (dynamic_cast<LongFieldValue *>(fvalue))->setValue(val);
+ } else {
+ throw DocBuilder::Error(vespalib::make_string("Field '%s' not compatible", sfield.getName().c_str()));
+ }
+}
+
+void
+insertFloat(const Schema::Field & sfield, document::FieldValue * fvalue, double val)
+{
+ if (sfield.getDataType() == Schema::FLOAT) {
+ (dynamic_cast<FloatFieldValue *>(fvalue))->setValue((float)val);
+ } else if (sfield.getDataType() == Schema::DOUBLE) {
+ (dynamic_cast<DoubleFieldValue *>(fvalue))->setValue(val);
+ } else {
+ throw DocBuilder::Error(vespalib::make_string("Field '%s' not compatible", sfield.getName().c_str()));
+ }
+}
+
+void insertPredicate(const Schema::Field &sfield,
+ document::FieldValue *fvalue,
+ std::unique_ptr<vespalib::Slime> val) {
+ if (sfield.getDataType() == Schema::BOOLEANTREE) {
+ *(dynamic_cast<PredicateFieldValue *>(fvalue)) =
+ PredicateFieldValue(std::move(val));
+ } else {
+ throw DocBuilder::Error(vespalib::make_string(
+ "Field '%s' not compatible",
+ sfield.getName().c_str()));
+ }
+}
+
+void insertTensor(const Schema::Field &schemaField,
+ document::FieldValue *fvalue,
+ std::unique_ptr<vespalib::tensor::Tensor> val) {
+ if (schemaField.getDataType() == Schema::TENSOR) {
+ *(dynamic_cast<TensorFieldValue *>(fvalue)) = std::move(val);
+ } else {
+ throw DocBuilder::Error(vespalib::make_string(
+ "Field '%s' not compatible",
+ schemaField.getName().c_str()));
+ }
+}
+
+void
+insertPosition(const Schema::Field & sfield,
+ document::FieldValue * fvalue, int32_t xpos, int32_t ypos)
+{
+ assert(*fvalue->getDataType() == *DataType::LONG);
+ assert(sfield.getDataType() == Schema::INT64);
+ (void) sfield;
+ int64_t zpos = ZCurve::encode(xpos, ypos);
+ document::LongFieldValue *zvalue =
+ dynamic_cast<LongFieldValue *>(fvalue);
+ zvalue->setValue(zpos);
+}
+
+
+void
+insertRaw(const Schema::Field & sfield,
+ document::FieldValue *fvalue, const void *buf, size_t len)
+{
+ assert(*fvalue->getDataType() == *DataType::RAW);
+ assert(sfield.getDataType() == Schema::RAW);
+ (void) sfield;
+ document::RawFieldValue *rfvalue =
+ dynamic_cast<RawFieldValue *>(fvalue);
+ rfvalue->setValue(static_cast<const char *>(buf), len);
+}
+
+
+template <typename T>
+std::unique_ptr<T>
+make_UP(T *p)
+{
+ return std::unique_ptr<T>(p);
+}
+
+template <typename T>
+std::unique_ptr<T>
+makeUP(T *p)
+{
+ return std::unique_ptr<T>(p);
+}
+
+} // namespace
+
+namespace docbuilderkludge
+{
+
+namespace linguistics
+{
+
+const vespalib::string SPANTREE_NAME("linguistics");
+
+enum TokenType {
+ UNKNOWN = 0,
+ SPACE = 1,
+ PUNCTUATION = 2,
+ SYMBOL = 3,
+ ALPHABETIC = 4,
+ NUMERIC = 5,
+ MARKER = 6
+};
+
+}
+
+}
+
+using namespace docbuilderkludge;
+
+namespace
+{
+
+Annotation::UP
+makeTokenType(linguistics::TokenType type)
+{
+ return makeUP(new Annotation(*AnnotationType::TOKEN_TYPE,
+ makeUP(new IntFieldValue(type))));
+}
+
+}
+
+namespace search {
+namespace index {
+
+VESPA_IMPLEMENT_EXCEPTION(DocBuilderError, vespalib::Exception);
+
+DocBuilder::FieldHandle::FieldHandle(const document::Field & dfield, const Schema::Field & field) :
+ _sfield(field),
+ _value(),
+ _element()
+{
+ _value = dfield.createValue();
+}
+
+
+DocBuilder::CollectionFieldHandle::CollectionFieldHandle(const document::Field & dfield, const Schema::Field & field) :
+ FieldHandle(dfield, field),
+ _elementWeight(1)
+{
+}
+
+void
+DocBuilder::CollectionFieldHandle::startElement(int32_t weight)
+{
+ assert(_element.get() == NULL);
+ _elementWeight = weight;
+ const CollectionFieldValue * value = dynamic_cast<CollectionFieldValue *>(_value.get());
+ _element = value->createNested();
+}
+
+void
+DocBuilder::CollectionFieldHandle::endElement()
+{
+ if (_sfield.getCollectionType() == Schema::ARRAY) {
+ onEndElement();
+ ArrayFieldValue * value = dynamic_cast<ArrayFieldValue *>(_value.get());
+ value->add(*_element);
+ } else if (_sfield.getCollectionType() ==
+ Schema::WEIGHTEDSET) {
+ onEndElement();
+ WeightedSetFieldValue * value = dynamic_cast<WeightedSetFieldValue *>(_value.get());
+ value->add(*_element, _elementWeight);
+ } else {
+ throw Error(vespalib::make_string("Field '%s' not compatible", _sfield.getName().c_str()));
+ }
+ _element.reset(NULL);
+}
+
+
+DocBuilder::IndexFieldHandle::IndexFieldHandle(const FixedTypeRepo & repo, const document::Field & dfield, const Schema::Field & sfield)
+ : CollectionFieldHandle(dfield, sfield),
+ _str(),
+ _strSymbols(0u),
+ _spanList(NULL),
+ _spanTree(),
+ _lastSpan(NULL),
+ _spanStart(0u),
+ _autoAnnotate(true),
+ _autoSpace(true),
+ _skipAutoSpace(true),
+ _uriField(false),
+ _subField(),
+ _repo(repo)
+{
+ _str.reserve(1023);
+
+ if (_sfield.getCollectionType() == Schema::SINGLE) {
+ if (*_value->getDataType() == document::UrlDataType::getInstance())
+ _uriField = true;
+ } else {
+ const CollectionFieldValue * value = dynamic_cast<CollectionFieldValue *>(_value.get());
+ if (value->getNestedType() == document::UrlDataType::getInstance())
+ _uriField = true;
+ }
+ startAnnotate();
+}
+
+
+void
+DocBuilder::IndexFieldHandle::append(const vespalib::string &val)
+{
+ _strSymbols += val.size();
+ _str += val;
+}
+
+
+void
+DocBuilder::IndexFieldHandle::addStr(const vespalib::string &val)
+{
+ assert(_spanTree.get() != NULL);
+ if (val.empty())
+ return;
+ if (!_skipAutoSpace && _autoSpace)
+ addSpace();
+ _skipAutoSpace = false;
+ _spanStart = _strSymbols;
+ append(val);
+ if (_autoAnnotate) {
+ addSpan();
+ addTermAnnotation();
+ if (val[0] >= '0' && val[0] <= '9') {
+ addNumericTokenAnnotation();
+ } else {
+ addAlphabeticTokenAnnotation();
+ }
+ }
+}
+
+
+void
+DocBuilder::IndexFieldHandle::addSpace(void)
+{
+ addNoWordStr(" ");
+}
+
+
+void
+DocBuilder::IndexFieldHandle::addNoWordStr(const vespalib::string &val)
+{
+ assert(_spanTree.get() != NULL);
+ if (val.empty())
+ return;
+ _spanStart = _strSymbols;
+ append(val);
+ if (_autoAnnotate) {
+ addSpan();
+ if (val[0] == ' ' || val[0] == '\t')
+ addSpaceTokenAnnotation();
+ else if (val[0] >= '0' && val[0] <= '9') {
+ addNumericTokenAnnotation();
+ } else {
+ addAlphabeticTokenAnnotation();
+ }
+
+ }
+ _skipAutoSpace = true;
+}
+
+
+void
+DocBuilder::IndexFieldHandle::addTokenizedString(const vespalib::string &val,
+ bool urlMode)
+{
+ Utf8Reader r(val);
+ vespalib::string sbuf;
+ Utf8Writer w(sbuf);
+ uint32_t c = 0u;
+ bool oldWord = false;
+ assert(_uriField == urlMode);
+ assert(_uriField != _subField.empty());
+
+ while (r.hasMore()) {
+ c = r.getChar();
+ bool newWord = Fast_UnicodeUtil::IsWordChar(c) ||
+ (urlMode && (c == '-' || c == '_'));
+ if (oldWord != newWord) {
+ if (!sbuf.empty()) {
+ if (oldWord)
+ addStr(sbuf);
+ else
+ addNoWordStr(sbuf);
+ sbuf.clear();
+ }
+ oldWord = newWord;
+ }
+ w.putChar(c);
+ }
+ if (!sbuf.empty()) {
+ if (oldWord)
+ addStr(sbuf);
+ else
+ addNoWordStr(sbuf);
+ }
+}
+
+
+void
+DocBuilder::IndexFieldHandle::addSpan(size_t start, size_t len)
+{
+ const SpanNode &span = _spanList->add(makeUP(new Span(start, len)));
+ _lastSpan = &span;
+}
+
+
+void
+DocBuilder::IndexFieldHandle::addSpan(void)
+{
+ size_t endPos = _strSymbols;
+ assert(endPos > _spanStart);
+ addSpan(_spanStart, endPos - _spanStart);
+ _spanStart = endPos;
+}
+
+
+void
+DocBuilder::IndexFieldHandle::addSpaceTokenAnnotation(void)
+{
+ assert(_spanTree.get() != NULL);
+ assert(_lastSpan != NULL);
+ _spanTree->annotate(*_lastSpan, makeTokenType(linguistics::SPACE));
+}
+
+
+void
+DocBuilder::IndexFieldHandle::addNumericTokenAnnotation(void)
+{
+ assert(_spanTree.get() != NULL);
+ assert(_lastSpan != NULL);
+ _spanTree->annotate(*_lastSpan, makeTokenType(linguistics::NUMERIC));
+}
+
+
+void
+DocBuilder::IndexFieldHandle::addAlphabeticTokenAnnotation(void)
+{
+ assert(_spanTree.get() != NULL);
+ assert(_lastSpan != NULL);
+ _spanTree->annotate(*_lastSpan, makeTokenType(linguistics::ALPHABETIC));
+}
+
+
+void
+DocBuilder::IndexFieldHandle::addTermAnnotation(void)
+{
+ assert(_spanTree.get() != NULL);
+ assert(_lastSpan != NULL);
+ _spanTree->annotate(*_lastSpan, *AnnotationType::TERM);
+}
+
+
+void
+DocBuilder::IndexFieldHandle::addTermAnnotation(const vespalib::string &val)
+{
+ assert(_spanTree.get() != NULL);
+ assert(_lastSpan != NULL);
+ _spanTree->annotate(*_lastSpan,
+ makeUP(new Annotation(*AnnotationType::TERM,
+ makeUP(new StringFieldValue(val)))));
+}
+
+
+void
+DocBuilder::IndexFieldHandle::onEndElement(void)
+{
+ // Flush data for index field.
+ assert(_subField.empty());
+ if (_uriField)
+ return;
+ StringFieldValue * value;
+ if (_sfield.getCollectionType() != Schema::SINGLE) {
+ value = dynamic_cast<StringFieldValue *>(_element.get());
+ } else {
+ value = dynamic_cast<StringFieldValue *>(_value.get());
+ }
+ value->setValue(_str);
+ // Also drop all spans no annotation for now
+ if (_spanTree->numAnnotations() > 0u) {
+ StringFieldValue::SpanTrees trees;
+ trees.emplace_back(std::move(_spanTree));
+ value->setSpanTrees(trees, _repo);
+ } else {
+ _spanTree.reset();
+ }
+ _spanList = NULL;
+ _lastSpan = NULL;
+ _spanStart = 0u;
+ _strSymbols = 0u;
+ _str.clear();
+ _skipAutoSpace = true;
+ startAnnotate();
+}
+
+
+void
+DocBuilder::IndexFieldHandle::onEndField(void)
+{
+ if (_sfield.getCollectionType() == Schema::SINGLE)
+ onEndElement();
+}
+
+
+void
+DocBuilder::IndexFieldHandle::startAnnotate(void)
+{
+ SpanList::UP span_list(new SpanList);
+ _spanList = span_list.get();
+ _spanTree.reset(new SpanTree(linguistics::SPANTREE_NAME, std::move(span_list)));
+}
+
+
+void
+DocBuilder::IndexFieldHandle::setAutoAnnotate(bool autoAnnotate)
+{
+ _autoAnnotate = autoAnnotate;
+}
+
+
+void
+DocBuilder::IndexFieldHandle::setAutoSpace(bool autoSpace)
+{
+ _autoSpace = autoSpace;
+}
+
+
+void
+DocBuilder::IndexFieldHandle::startSubField(const vespalib::string &subField)
+{
+ assert(_subField.empty());
+ assert(_uriField);
+ _subField = subField;
+}
+
+
+
+void
+DocBuilder::IndexFieldHandle::endSubField(void)
+{
+ assert(!_subField.empty());
+ assert(_uriField);
+ StructuredFieldValue *sValue;
+ if (_sfield.getCollectionType() != Schema::SINGLE) {
+ sValue = dynamic_cast<StructFieldValue *>(_element.get());
+ } else {
+ sValue = dynamic_cast<StructFieldValue *>(_value.get());
+ }
+ const Field &f = sValue->getField(_subField);
+ FieldValue::UP fval(f.getDataType().createFieldValue());
+ *fval = _str;
+ StringFieldValue *value = dynamic_cast<StringFieldValue *>(fval.get());
+ StringFieldValue::SpanTrees trees;
+ trees.emplace_back(std::move(_spanTree));
+ value->setSpanTrees(trees, _repo);
+ sValue->setValue(f, *fval);
+ _spanList = NULL;
+ _lastSpan = NULL;
+ _spanStart = 0u;
+ _strSymbols = 0u;
+ _str.clear();
+ _skipAutoSpace = true;
+ startAnnotate();
+ _subField.clear();
+}
+
+
+
+DocBuilder::AttributeFieldHandle::
+AttributeFieldHandle(const document::Field &dfield,
+ const Schema::Field &sfield)
+ : CollectionFieldHandle(dfield, sfield)
+{
+}
+
+void
+DocBuilder::AttributeFieldHandle::addStr(const vespalib::string & val)
+{
+ if (_element.get() != NULL) {
+ insertStr(_sfield, _element.get(), val);
+ } else {
+ insertStr(_sfield, _value.get(), val);
+ }
+}
+
+void
+DocBuilder::AttributeFieldHandle::addInt(int64_t val)
+{
+ if (_element.get() != NULL) {
+ insertInt(_sfield, _element.get(), val);
+ } else {
+ insertInt(_sfield, _value.get(), val);
+ }
+}
+
+void
+DocBuilder::AttributeFieldHandle::addFloat(double val)
+{
+ if (_element.get() != NULL) {
+ insertFloat(_sfield, _element.get(), val);
+ } else {
+ insertFloat(_sfield, _value.get(), val);
+ }
+}
+
+void
+DocBuilder::AttributeFieldHandle::addPredicate(
+ std::unique_ptr<vespalib::Slime> val)
+{
+ if (_element.get() != NULL) {
+ insertPredicate(_sfield, _element.get(), std::move(val));
+ } else {
+ insertPredicate(_sfield, _value.get(), std::move(val));
+ }
+}
+
+
+void
+DocBuilder::AttributeFieldHandle::addTensor(
+ std::unique_ptr<vespalib::tensor::Tensor> val)
+{
+ if (_element.get() != NULL) {
+ insertTensor(_sfield, _element.get(), std::move(val));
+ } else {
+ insertTensor(_sfield, _value.get(), std::move(val));
+ }
+}
+
+
+void
+DocBuilder::AttributeFieldHandle::addPosition(int32_t xpos, int32_t ypos)
+{
+ if (_element.get() != NULL) {
+ insertPosition(_sfield, _element.get(), xpos, ypos);
+ } else {
+ insertPosition(_sfield, _value.get(), xpos, ypos);
+ }
+}
+
+
+DocBuilder::SummaryFieldHandle::
+SummaryFieldHandle(const document::Field & dfield,
+ const Schema::Field & sfield)
+ : CollectionFieldHandle(dfield, sfield)
+{
+}
+
+void
+DocBuilder::SummaryFieldHandle::addStr(const vespalib::string & val)
+{
+ if (_element.get() != NULL) {
+ insertStr(_sfield, _element.get(), val);
+ } else {
+ insertStr(_sfield, _value.get(), val);
+ }
+}
+
+void
+DocBuilder::SummaryFieldHandle::addInt(int64_t val)
+{
+ if (_element.get() != NULL) {
+ insertInt(_sfield, _element.get(), val);
+ } else {
+ insertInt(_sfield, _value.get(), val);
+ }
+}
+
+void
+DocBuilder::SummaryFieldHandle::addFloat(double val)
+{
+ if (_element.get() != NULL) {
+ insertFloat(_sfield, _element.get(), val);
+ } else {
+ insertFloat(_sfield, _value.get(), val);
+ }
+}
+
+
+void
+DocBuilder::SummaryFieldHandle::addRaw(const void *buf, size_t len)
+{
+ if (_element.get() != NULL) {
+ insertRaw(_sfield, _element.get(), buf, len);
+ } else {
+ insertRaw(_sfield, _value.get(), buf, len);
+ }
+}
+
+
+DocBuilder::DocumentHandle::DocumentHandle(document::Document &doc,
+ const vespalib::string & docId)
+ : _type(&doc.getType()),
+ _doc(&doc),
+ _fieldHandle(),
+ _repo(*_doc->getRepo(), *_type)
+{
+ (void) docId;
+}
+
+
+DocBuilder::DocBuilder(const Schema &schema)
+ : _schema(schema),
+ _doctypes_config(DocTypeBuilder(schema).makeConfig()),
+ _repo(new DocumentTypeRepo(_doctypes_config)),
+ _docType(*_repo->getDocumentType("searchdocument")),
+ _doc(),
+ _handleDoc(),
+ _currDoc()
+{
+}
+
+DocBuilder &
+DocBuilder::startDocument(const vespalib::string & docId)
+{
+ _doc.reset(new Document(_docType, DocumentId(docId)));
+ _doc->setRepo(*_repo);
+ _handleDoc.reset(new DocumentHandle(*_doc, docId));
+ return *this;
+}
+
+document::Document::UP
+DocBuilder::endDocument()
+{
+ _handleDoc->endDocument(_doc);
+ return std::move(_doc);
+}
+
+DocBuilder &
+DocBuilder::startIndexField(const vespalib::string & name)
+{
+ assert(_handleDoc->getFieldHandle().get() == NULL);
+ uint32_t field_id = _schema.getIndexFieldId(name);
+ assert(field_id != Schema::UNKNOWN_FIELD_ID);
+ _handleDoc->startIndexField(_schema.getIndexField(field_id));
+ _currDoc = _handleDoc.get();
+ return *this;
+}
+
+DocBuilder &
+DocBuilder::startAttributeField(const vespalib::string & name)
+{
+ assert(_handleDoc->getFieldHandle().get() == NULL);
+ uint32_t field_id = _schema.getIndexFieldId(name);
+ assert(field_id == Schema::UNKNOWN_FIELD_ID);
+ field_id = _schema.getAttributeFieldId(name);
+ assert(field_id != Schema::UNKNOWN_FIELD_ID);
+ _handleDoc->startAttributeField(_schema.getAttributeField(field_id));
+ _currDoc = _handleDoc.get();
+ return *this;
+}
+
+DocBuilder &
+DocBuilder::startSummaryField(const vespalib::string & name)
+{
+ assert(_handleDoc->getFieldHandle().get() == NULL);
+ uint32_t field_id = _schema.getIndexFieldId(name);
+ assert(field_id == Schema::UNKNOWN_FIELD_ID);
+ field_id = _schema.getAttributeFieldId(name);
+ assert(field_id == Schema::UNKNOWN_FIELD_ID);
+ field_id = _schema.getSummaryFieldId(name);
+ assert(field_id != Schema::UNKNOWN_FIELD_ID);
+ _handleDoc->startSummaryField(_schema.getSummaryField(field_id));
+ _currDoc = _handleDoc.get();
+ return *this;
+}
+
+DocBuilder &
+DocBuilder::endField()
+{
+ assert(_currDoc != NULL);
+ _currDoc->endField();
+ _currDoc = NULL;
+ return *this;
+}
+
+DocBuilder &
+DocBuilder::startElement(int32_t weight)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->startElement(weight);
+ return *this;
+}
+
+DocBuilder &
+DocBuilder::endElement()
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->endElement();
+ return *this;
+}
+
+DocBuilder &
+DocBuilder::addStr(const vespalib::string & str)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addStr(str);
+ return *this;
+}
+
+DocBuilder &
+DocBuilder::addSpace(void)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addSpace();
+ return *this;
+}
+
+DocBuilder &
+DocBuilder::addNoWordStr(const vespalib::string & str)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addNoWordStr(str);
+ return *this;
+}
+
+DocBuilder &
+DocBuilder::addTokenizedString(const vespalib::string &str)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addTokenizedString(str, false);
+ return *this;
+}
+
+DocBuilder &
+DocBuilder::addUrlTokenizedString(const vespalib::string &str)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addTokenizedString(str, true);
+ return *this;
+}
+
+DocBuilder &
+DocBuilder::addInt(int64_t val)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addInt(val);
+ return *this;
+}
+
+DocBuilder &
+DocBuilder::addFloat(double val)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addFloat(val);
+ return *this;
+}
+
+
+DocBuilder &
+DocBuilder::addPredicate(std::unique_ptr<vespalib::Slime> val)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addPredicate(std::move(val));
+ return *this;
+}
+
+
+DocBuilder &
+DocBuilder::addTensor(std::unique_ptr<vespalib::tensor::Tensor> val)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addTensor(std::move(val));
+ return *this;
+}
+
+
+DocBuilder &
+DocBuilder::addSpan(size_t start, size_t len)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addSpan(start, len);
+ return *this;
+}
+
+
+DocBuilder &
+DocBuilder::addSpan(void)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addSpan();
+ return *this;
+}
+
+
+DocBuilder &
+DocBuilder::addSpaceTokenAnnotation(void)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addSpaceTokenAnnotation();
+ return *this;
+}
+
+
+DocBuilder &
+DocBuilder::addNumericTokenAnnotation(void)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addNumericTokenAnnotation();
+ return *this;
+}
+
+
+DocBuilder &
+DocBuilder::addAlphabeticTokenAnnotation(void)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addAlphabeticTokenAnnotation();
+ return *this;
+}
+
+
+DocBuilder&
+DocBuilder::addTermAnnotation(void)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addTermAnnotation();
+ return *this;
+}
+
+
+DocBuilder &
+DocBuilder::addTermAnnotation(const vespalib::string &val)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addTermAnnotation(val);
+ return *this;
+}
+
+
+DocBuilder &
+DocBuilder::addPosition(int32_t xpos, int32_t ypos)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addPosition(xpos, ypos);
+ return *this;
+}
+
+
+DocBuilder &
+DocBuilder::addRaw(const void *buf, size_t len)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->addRaw(buf, len);
+ return *this;
+}
+
+
+DocBuilder &
+DocBuilder::startSubField(const vespalib::string &subField)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->startSubField(subField);
+ return *this;
+}
+
+
+DocBuilder &
+DocBuilder::endSubField(void)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->endSubField();
+ return *this;
+}
+
+
+DocBuilder &
+DocBuilder::setAutoAnnotate(bool autoAnnotate)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->setAutoAnnotate(autoAnnotate);
+ return *this;
+}
+
+
+DocBuilder &
+DocBuilder::setAutoSpace(bool autoSpace)
+{
+ assert(_currDoc != NULL);
+ _currDoc->getFieldHandle()->setAutoSpace(autoSpace);
+ return *this;
+}
+
+
+} // namespace search::index
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/index/docbuilder.h b/searchlib/src/vespa/searchlib/index/docbuilder.h
new file mode 100644
index 00000000000..8bb5f08d722
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/docbuilder.h
@@ -0,0 +1,432 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/document/datatype/datatypes.h>
+#include <vespa/document/fieldvalue/fieldvalues.h>
+#include <vespa/document/annotation/annotation.h>
+#include <vespa/document/annotation/span.h>
+#include <vespa/document/annotation/spanlist.h>
+#include <vespa/document/annotation/spantree.h>
+#include <vespa/document/repo/documenttyperepo.h>
+#include <vespa/vespalib/util/exception.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include "doctypebuilder.h"
+
+namespace vespalib { namespace tensor { class Tensor; } }
+namespace search {
+namespace index {
+
+VESPA_DEFINE_EXCEPTION(DocBuilderError, vespalib::Exception);
+
+/**
+ * Builder class used to generate a search document that corresponds
+ * to an index schema.
+ **/
+class DocBuilder
+{
+public:
+ typedef DocBuilderError Error;
+
+private:
+ /**
+ * Base class for handling the construction of a field.
+ **/
+ class FieldHandle {
+ public:
+ typedef std::shared_ptr<FieldHandle> SP;
+ protected:
+ const Schema::Field & _sfield;
+ document::FieldValue::UP _value;
+ document::FieldValue::UP _element;
+ public:
+ FieldHandle(const document::Field & dfield, const Schema::Field & field);
+ virtual ~FieldHandle() {}
+ virtual void startElement(int32_t weight) { (void) weight; throw Error("Function not supported"); }
+ virtual void endElement() { throw Error("Function not supported"); }
+ virtual void addStr(const vespalib::string & val) { (void) val; throw Error("Function not supported"); }
+
+ virtual void
+ addSpace(void)
+ {
+ throw Error("Function not supported");
+ }
+
+ virtual void
+ addNoWordStr(const vespalib::string & val)
+ {
+ (void) val;
+ throw Error("Function not supported");
+ }
+
+ virtual void
+ addTokenizedString(const vespalib::string &val,
+ bool urlMode)
+ {
+ (void) val;
+ (void) urlMode;
+ throw Error("Function not supported");
+ }
+
+ virtual void
+ addSpan(size_t start, size_t len)
+ {
+ (void) start;
+ (void) len;
+ throw Error("Function not supported");
+ }
+
+ virtual void
+ addSpan(void)
+ {
+ throw Error("Function not supported");
+ }
+
+ virtual void
+ addSpaceTokenAnnotation(void)
+ {
+ throw Error("Function not supported");
+ }
+
+ virtual void
+ addNumericTokenAnnotation(void)
+ {
+ throw Error("Function not supported");
+ }
+
+ virtual void
+ addAlphabeticTokenAnnotation(void)
+ {
+ throw Error("Function not supported");
+ }
+
+ virtual void
+ addTermAnnotation(void)
+ {
+ throw Error("Function not supported");
+ }
+
+ virtual void
+ addTermAnnotation(const vespalib::string &val)
+ {
+ (void) val;
+ throw Error("Function not supported");
+ }
+
+ virtual void addInt(int64_t val) { (void) val; throw Error("Function not supported"); }
+ virtual void addFloat(double val) { (void) val; throw Error("Function not supported"); }
+ virtual void addPredicate(std::unique_ptr<vespalib::Slime>) {
+ throw Error("Function not supported");
+ }
+ virtual void addTensor(std::unique_ptr<vespalib::tensor::Tensor>) {
+ throw Error("Function not supported");
+ }
+ const document::FieldValue::UP & getValue() const { return _value; }
+ const Schema::Field & getField() const { return _sfield; }
+
+ virtual void
+ onEndElement(void)
+ {
+ }
+
+ virtual void
+ onEndField(void)
+ {
+ }
+
+ virtual void
+ setAutoAnnotate(bool autoAnnotate)
+ {
+ (void) autoAnnotate;
+ throw Error("Function not supported");
+ }
+
+ virtual void
+ setAutoSpace(bool autoSpace)
+ {
+ (void) autoSpace;
+ throw Error("Function not supported");
+ }
+
+ virtual void
+ addPosition(int32_t xpos, int32_t ypos)
+ {
+ (void) xpos;
+ (void) ypos;
+ throw Error("Function not supported");
+ }
+
+ virtual void
+ addRaw(const void *buf, size_t len)
+ {
+ (void) buf;
+ (void) len;
+ throw Error("Function not supported");
+ }
+
+ virtual void
+ startSubField(const vespalib::string &subField)
+ {
+ (void) subField;
+ throw Error("Function not supported");
+ }
+
+ virtual void
+ endSubField(void)
+ {
+ throw Error("Function not supported");
+ }
+ };
+
+ /**
+ * Class that can handle multi value fields.
+ **/
+ class CollectionFieldHandle : public FieldHandle {
+ private:
+ int32_t _elementWeight;
+ public:
+ CollectionFieldHandle(const document::Field & dfield, const Schema::Field & sfield);
+ virtual void startElement(int32_t weight);
+ virtual void endElement();
+ };
+
+ /**
+ * Class for handling the construction of the content of an index field.
+ **/
+ class IndexFieldHandle : public CollectionFieldHandle
+ {
+ vespalib::string _str; // adjusted as word comes along
+ size_t _strSymbols; // symbols in string, assuming UTF8
+ document::SpanList *_spanList; // owned by _spanTree
+ document::SpanTree::UP _spanTree;
+ const document::SpanNode *_lastSpan;
+ size_t _spanStart; // start of span
+ bool _autoAnnotate; // Add annotation when adding strings
+ bool _autoSpace; // Add space before strings
+ bool _skipAutoSpace; // one shot skip of adding space
+ bool _uriField; // URI handling (special struct case)
+ vespalib::string _subField;
+ const document::FixedTypeRepo & _repo;
+
+ void
+ append(const vespalib::string &val);
+
+ public:
+ IndexFieldHandle(const document::FixedTypeRepo & repo,
+ const document::Field &dfield,
+ const Schema::Field &sfield);
+
+ virtual void addStr(const vespalib::string & val);
+
+ virtual void
+ addSpace(void);
+
+ virtual void
+ addNoWordStr(const vespalib::string & val);
+
+ void
+ addTokenizedString(const vespalib::string &val,
+ bool urlMode);
+
+ virtual void
+ addSpan(size_t start, size_t len);
+
+ virtual void
+ addSpan(void);
+
+ virtual void
+ addSpaceTokenAnnotation(void);
+
+ virtual void
+ addNumericTokenAnnotation(void);
+
+ virtual void
+ addAlphabeticTokenAnnotation(void);
+
+ virtual void
+ addTermAnnotation(void);
+
+ virtual void
+ addTermAnnotation(const vespalib::string &val);
+
+ virtual void
+ onEndElement(void);
+
+ virtual void
+ onEndField(void);
+
+ void
+ startAnnotate(void);
+
+ virtual void
+ setAutoAnnotate(bool autoAnnotate);
+
+ virtual void
+ setAutoSpace(bool autoSpace);
+
+ virtual void
+ startSubField(const vespalib::string &subField);
+
+ virtual void
+ endSubField(void);
+ };
+
+ /**
+ * Class for handling the construction of the content of an attribute field.
+ **/
+ class AttributeFieldHandle : public CollectionFieldHandle
+ {
+ public:
+ AttributeFieldHandle(const document::Field & dfield, const Schema::Field & sfield);
+ virtual void addStr(const vespalib::string & val);
+ virtual void addInt(int64_t val);
+ virtual void addFloat(double val);
+ virtual void addPredicate(std::unique_ptr<vespalib::Slime> val);
+ virtual void addTensor(std::unique_ptr<vespalib::tensor::Tensor> val)
+ override;
+
+ virtual void
+ addPosition(int32_t xpos, int32_t ypos);
+ };
+
+ /**
+ * Class for handling the construction of the content of a summary field.
+ **/
+ class SummaryFieldHandle : public CollectionFieldHandle {
+ public:
+ SummaryFieldHandle(const document::Field & dfield, const Schema::Field & sfield);
+ virtual void addStr(const vespalib::string & val);
+ virtual void addInt(int64_t val);
+ virtual void addFloat(double val);
+
+ virtual void
+ addRaw(const void *buf, size_t len);
+ };
+
+ /**
+ * Class for handling the construction of a document (set of fields).
+ **/
+ class DocumentHandle {
+ public:
+ typedef std::shared_ptr<DocumentHandle> SP;
+ private:
+ const document::DocumentType * _type;
+ document::Document *const _doc;
+ FieldHandle::SP _fieldHandle;
+ document::FixedTypeRepo _repo;
+ public:
+ DocumentHandle(document::Document &doc, const vespalib::string & docId);
+ const FieldHandle::SP & getFieldHandle() const { return _fieldHandle; }
+ void startIndexField(const Schema::Field & sfield) {
+ _fieldHandle.reset(new IndexFieldHandle(_repo, _type->getField(sfield.getName()), sfield));
+ }
+ void startAttributeField(const Schema::Field & sfield) {
+ _fieldHandle.reset(new AttributeFieldHandle(_type->getField(sfield.getName()), sfield));
+ }
+ void startSummaryField(const Schema::Field & sfield) {
+ _fieldHandle.reset(new SummaryFieldHandle(_type->getField(sfield.getName()), sfield));
+ }
+ void
+ endField()
+ {
+ _fieldHandle->onEndField();
+ _doc->setValue(_type->getField(_fieldHandle->getField().getName()), *_fieldHandle->getValue());
+ _fieldHandle.reset(static_cast<FieldHandle *>(NULL));
+ }
+ void endDocument(const document::Document::UP & doc) {
+ (void) doc;
+ }
+ };
+
+ const Schema & _schema;
+ document::DocumenttypesConfig _doctypes_config;
+ document::DocumentTypeRepo::SP _repo;
+ const document::DocumentType &_docType;
+ document::Document::UP _doc; // the document we are about to generate
+
+ DocumentHandle::SP _handleDoc; // handle for all fields
+ DocumentHandle * _currDoc; // the current document handle
+
+public:
+ DocBuilder(const Schema & schema);
+
+ DocBuilder & startDocument(const vespalib::string & docId);
+ document::Document::UP endDocument();
+
+ DocBuilder & startIndexField(const vespalib::string & name);
+ DocBuilder & startAttributeField(const vespalib::string & name);
+ DocBuilder & startSummaryField(const vespalib::string & name);
+ DocBuilder & endField();
+
+ DocBuilder & startElement(int32_t weight = 1);
+ DocBuilder & endElement();
+
+ DocBuilder & addStr(const vespalib::string & val);
+ DocBuilder & addSpace(void);
+ DocBuilder & addNoWordStr(const vespalib::string & val);
+ DocBuilder & addInt(int64_t val);
+ DocBuilder & addFloat(double val);
+ DocBuilder & addPredicate(std::unique_ptr<vespalib::Slime> val);
+ DocBuilder & addTensor(std::unique_ptr<vespalib::tensor::Tensor> val);
+
+ DocBuilder &
+ addTokenizedString(const vespalib::string &val);
+
+ DocBuilder &
+ addUrlTokenizedString(const vespalib::string &val);
+
+ DocBuilder &
+ addSpan(size_t start, size_t len);
+
+ DocBuilder &
+ addSpan(void);
+
+ DocBuilder &
+ addSpaceTokenAnnotation(void);
+
+ DocBuilder &
+ addNumericTokenAnnotation(void);
+
+ DocBuilder &
+ addAlphabeticTokenAnnotation(void);
+
+ DocBuilder&
+ addTermAnnotation(void);
+
+ DocBuilder &
+ addTermAnnotation(const vespalib::string &val);
+
+ DocBuilder &
+ setAutoAnnotate(bool autoAnnotate);
+
+ DocBuilder &
+ setAutoSpace(bool autoSpace);
+
+ DocBuilder &
+ addPosition(int32_t xpos, int32_t ypos);
+
+ DocBuilder &
+ addRaw(const void *buf, size_t len);
+
+ DocBuilder &
+ startSubField(const vespalib::string &subField);
+
+ DocBuilder &
+ endSubField(void);
+
+ static bool
+ hasAnnotations(void)
+ {
+ return true;
+ }
+
+ const document::DocumentType &getDocumentType() const { return _docType; }
+ const document::DocumentTypeRepo::SP &getDocumentTypeRepo() const
+ { return _repo; }
+ document::DocumenttypesConfig getDocumenttypesConfig() const
+ { return _doctypes_config; }
+};
+
+} // namespace search::index
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp b/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp
new file mode 100644
index 00000000000..442f25b94f0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".index.docidandfeatures");
+#include "docidandfeatures.h"
+#include <vespa/vespalib/objects/nbostream.h>
+
+namespace search
+{
+
+namespace index
+{
+
+using vespalib::nbostream;
+
+#if 0
+void
+DocIdAndFeatures::append(const DocIdAndFeatures &rhs, uint32_t localFieldId)
+{
+ assert(!rhs.getRaw());
+ assert(rhs._fields.size() == 1);
+ const WordDocFieldFeatures &field = rhs._fields.front();
+ assert(field.getFieldId() == 0);
+ uint32_t numElements = field.getNumElements();
+ std::vector<WordDocFieldElementFeatures>::const_iterator element =
+ rhs._elements.begin();
+ std::vector<WordDocFieldElementWordPosFeatures>::const_iterator position =
+ rhs._wordPositions.begin();
+ assert(_fields.empty() || localFieldId > _fields.back().getFieldId());
+ _fields.push_back(field);
+ _fields.back().setFieldId(localFieldId);
+ for (uint32_t elementDone = 0; elementDone < numElements;
+ ++elementDone, ++element) {
+ _elements.push_back(*element);
+ for (uint32_t posResidue = element->getNumOccs(); posResidue > 0;
+ --posResidue, ++position) {
+ _wordPositions.push_back(*position);
+ }
+ }
+}
+#endif
+
+
+nbostream &
+operator<<(nbostream &out, const WordDocElementFeatures &features)
+{
+ out << features._elementId << features._numOccs <<
+ features._weight << features._elementLen;
+ return out;
+}
+
+
+nbostream &
+operator>>(nbostream &in, WordDocElementFeatures &features)
+{
+ in >> features._elementId >> features._numOccs >>
+ features._weight >> features._elementLen;
+ return in;
+}
+
+
+nbostream &
+operator<<(nbostream &out, const WordDocElementWordPosFeatures &features)
+{
+ out << features._wordPos;
+ return out;
+}
+
+
+nbostream &
+operator>>(nbostream &in, WordDocElementWordPosFeatures &features)
+{
+ in >> features._wordPos;
+ return in;
+}
+
+
+nbostream &
+operator<<(nbostream &out, const DocIdAndFeatures &features)
+{
+ out << features._docId;
+ out.saveVector(features._elements).
+ saveVector(features._wordPositions);
+ out.saveVector(features._blob);
+ out << features._bitOffset << features._bitLength << features._raw;
+ return out;
+}
+
+
+nbostream &
+operator>>(nbostream &in, DocIdAndFeatures &features)
+{
+ in >> features._docId;
+ in.restoreVector(features._elements).
+ restoreVector(features._wordPositions);
+ in.restoreVector(features._blob);
+ in >> features._bitOffset >> features._bitLength >> features._raw;
+ return in;
+}
+
+
+} // namespace index
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/index/docidandfeatures.h b/searchlib/src/vespa/searchlib/index/docidandfeatures.h
new file mode 100644
index 00000000000..1bb74e6c3b3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/docidandfeatures.h
@@ -0,0 +1,338 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vector>
+#include <vespa/searchlib/common/fslimits.h>
+
+namespace vespalib
+{
+
+class nbostream;
+
+}
+
+namespace search
+{
+
+namespace index
+{
+
+/*
+ * The following feature classes are not self contained. To reduce
+ * memory allocator pressure, the DocIdAndFeatures class contains a
+ * flattened representation of the features at different levels.
+ */
+
+/*
+ * (word, doc) features.
+ *
+ * Present as member in DocIdAndFeatures.
+ */
+class WordDocFeatures
+{
+public:
+ // TODO: add support for user features
+
+ WordDocFeatures(void)
+ {
+ }
+
+ void
+ clear(void)
+ {
+ }
+};
+
+/*
+ * (word, doc, field) features.
+ *
+ * Present as vector element in DocIdAndFeatures.
+ */
+class WordDocFieldFeatures
+{
+public:
+ uint32_t _numElements; // Number of array indexes
+ // TODO: add support for user features
+
+ WordDocFieldFeatures(void)
+ : _numElements(0u)
+ {
+ }
+
+ uint32_t
+ getNumElements(void) const
+ {
+ return _numElements;
+ }
+
+ void
+ setNumElements(uint32_t numElements)
+ {
+ _numElements = numElements;
+ }
+
+ void
+ incNumElements(void)
+ {
+ ++_numElements;
+ }
+};
+
+/*
+ * (word, doc, field, element) features.
+ *
+ * Present as vector element in DocIdAndFeatures.
+ */
+class WordDocElementFeatures
+{
+public:
+ uint32_t _elementId; // Array index
+ uint32_t _numOccs;
+ int32_t _weight;
+ uint32_t _elementLen;
+ // TODO: add support for user features
+
+ WordDocElementFeatures(void)
+ : _elementId(0u),
+ _numOccs(0u),
+ _weight(1),
+ _elementLen(SEARCHLIB_FEF_UNKNOWN_FIELD_LENGTH)
+ {
+ }
+
+ WordDocElementFeatures(uint32_t elementId)
+ : _elementId(elementId),
+ _numOccs(0u),
+ _weight(1),
+ _elementLen(SEARCHLIB_FEF_UNKNOWN_FIELD_LENGTH)
+ {
+ }
+
+ WordDocElementFeatures(uint32_t elementId,
+ uint32_t weight,
+ uint32_t elementLen)
+ : _elementId(elementId),
+ _numOccs(0u),
+ _weight(weight),
+ _elementLen(elementLen)
+ {
+ }
+
+ uint32_t
+ getElementId(void) const
+ {
+ return _elementId;
+ }
+
+ uint32_t
+ getNumOccs(void) const
+ {
+ return _numOccs;
+ }
+
+ int32_t
+ getWeight(void) const
+ {
+ return _weight;
+ }
+
+ uint32_t
+ getElementLen(void) const
+ {
+ return _elementLen;
+ }
+
+ void
+ setElementId(uint32_t elementId)
+ {
+ _elementId = elementId;
+ }
+
+ void
+ setNumOccs(uint32_t numOccs)
+ {
+ _numOccs = numOccs;
+ }
+
+ void
+ setWeight(int32_t weight)
+ {
+ _weight = weight;
+ }
+
+ void
+ setElementLen(uint32_t elementLen)
+ {
+ _elementLen = elementLen;
+ }
+
+ void
+ incNumOccs(void)
+ {
+ ++_numOccs;
+ }
+};
+
+/*
+ * (word, doc, field, element, wordpos) features.
+ *
+ * Present as vector element in DocIdAndFeatures.
+ */
+class WordDocElementWordPosFeatures
+{
+public:
+ uint32_t _wordPos;
+ // TODO: add support for user features
+
+ WordDocElementWordPosFeatures(void)
+ : _wordPos(0u)
+ {
+ }
+
+ WordDocElementWordPosFeatures(uint32_t wordPos)
+ : _wordPos(wordPos)
+ {
+ }
+
+ uint32_t
+ getWordPos(void) const
+ {
+ return _wordPos;
+ }
+
+ void
+ setWordPos(uint32_t wordPos)
+ {
+ _wordPos = wordPos;
+ }
+};
+
+/**
+ * Class for minimal common representation of features available for a
+ * (word, doc) pair, used by index fusion to shuffle information from
+ * input files to the output file without having to know all the details.
+ */
+class DocIdAndFeatures
+{
+public:
+ uint32_t _docId; // Current Docid
+ // generic feature data, flattened to avoid excessive allocator usage
+ WordDocFeatures _wordDocFeatures;
+ std::vector<WordDocElementFeatures> _elements;
+ std::vector<WordDocElementWordPosFeatures> _wordPositions;
+#ifdef notyet
+ // user blobs (packed)
+ UserFeatures _userFeatures;
+ // TODO: Determine how to handle big endian versus little endian user
+ // features, and whether set of user features is contiguous in file or
+ // interleaved with predefined features (word position, word weight)
+#endif
+ // raw data (file format specific, packed)
+ std::vector<uint64_t> _blob; // Feature data for (word, docid) pair
+ uint32_t _bitOffset; // Offset of feature start ([0..63])
+ uint32_t _bitLength; // Length of features
+ bool _raw; //
+
+ DocIdAndFeatures(void)
+ : _docId(0),
+ _wordDocFeatures(),
+ _elements(),
+ _wordPositions(),
+ _blob(),
+ _bitOffset(0u),
+ _bitLength(0u),
+ _raw(false)
+ {
+ }
+
+ ~DocIdAndFeatures(void)
+ {
+ }
+
+ void
+ clearFeatures(void)
+ {
+ _wordDocFeatures.clear();
+ _elements.clear();
+ _wordPositions.clear();
+ _bitOffset = 0u;
+ _bitLength = 0u;
+ _blob.clear();
+ }
+
+ void
+ clearFeatures(uint32_t bitOffset)
+ {
+ _wordDocFeatures.clear();
+ _elements.clear();
+ _wordPositions.clear();
+ _bitOffset = bitOffset;
+ _bitLength = 0u;
+ _blob.clear();
+ }
+
+ void
+ clear(uint32_t docId)
+ {
+ _docId = docId;
+ clearFeatures();
+ }
+
+
+ void
+ clear(uint32_t docId,
+ uint32_t bitOffset)
+ {
+ _docId = docId;
+ clearFeatures(bitOffset);
+ }
+
+ void
+ setRaw(bool raw)
+ {
+ _raw = raw;
+ }
+
+ bool
+ getRaw(void) const
+ {
+ return _raw;
+ }
+
+ /**
+ * Append features from a single field to a field collection.
+ *
+ * @param rhs features for a single field
+ * @param localFieldId local field id for the field
+ */
+ void
+ append(const DocIdAndFeatures &rhs, uint32_t localFieldId);
+};
+
+
+vespalib::nbostream &
+operator<<(vespalib::nbostream &out,
+ const WordDocElementFeatures &features);
+
+vespalib::nbostream &
+operator>>(vespalib::nbostream &in, WordDocElementFeatures &features);
+
+vespalib::nbostream &
+operator<<(vespalib::nbostream &out,
+ const WordDocElementWordPosFeatures &features);
+
+vespalib::nbostream &
+operator>>(vespalib::nbostream &in,
+ WordDocElementWordPosFeatures &features);
+
+vespalib::nbostream &
+operator<<(vespalib::nbostream &out, const DocIdAndFeatures &features);
+
+vespalib::nbostream &
+operator>>(vespalib::nbostream &in, DocIdAndFeatures &features);
+
+
+} // namespace index
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp b/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp
new file mode 100644
index 00000000000..fff8a735bf9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp
@@ -0,0 +1,356 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "doctypebuilder.h"
+#include <vespa/document/datatype/datatypes.h>
+#include <vespa/document/datatype/urldatatype.h>
+#include <vespa/document/fieldvalue/fieldvalues.h>
+#include <vespa/document/repo/configbuilder.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".index.doctypebuilder");
+
+using namespace document;
+using namespace search::index;
+
+namespace search
+{
+
+namespace index
+{
+
+namespace
+{
+
+const DataType *convert(Schema::DataType type) {
+ switch (type) {
+ case Schema::UINT1:
+ case Schema::UINT2:
+ case Schema::UINT4:
+ case Schema::INT8:
+ return DataType::BYTE;
+ case Schema::INT16:
+ return DataType::SHORT;
+ case Schema::INT32:
+ return DataType::INT;
+ case Schema::INT64:
+ return DataType::LONG;
+ case Schema::FLOAT:
+ return DataType::FLOAT;
+ case Schema::DOUBLE:
+ return DataType::DOUBLE;
+ case Schema::STRING:
+ return DataType::STRING;
+ case Schema::RAW:
+ return DataType::RAW;
+ case Schema::BOOLEANTREE:
+ return DataType::PREDICATE;
+ case Schema::TENSOR:
+ return DataType::TENSOR;
+ default:
+ break;
+ }
+ assert(!"Unknown datatype in schema");
+ return 0;
+}
+
+void
+insertStructType(document::DocumenttypesConfig::Documenttype & cfg,
+ const StructDataType & structType)
+{
+ typedef document::DocumenttypesConfig DTC;
+ DTC::Documenttype::Datatype::Sstruct cfgStruct;
+ cfgStruct.name = structType.getName();
+ Field::Set fieldSet = structType.getFieldSet();
+ for (Field::Set::const_iterator itr = fieldSet.begin();
+ itr != fieldSet.end(); ++itr)
+ {
+ DTC::Documenttype::Datatype::Sstruct::Field field;
+ field.name = (*itr)->getName();
+ field.datatype = (*itr)->getDataType().getId();
+ field.id = (*itr)->getId(Document::getNewestSerializationVersion());
+ field.idV6 = (*itr)->getId(6);
+ cfgStruct.field.push_back(field);
+ }
+ cfg.datatype.push_back(DTC::Documenttype::Datatype());
+ cfg.datatype.back().sstruct = cfgStruct;
+ cfg.datatype.back().id = structType.getId();
+}
+
+}
+
+DocTypeBuilder::UriField::UriField(void)
+ : _all(Schema::UNKNOWN_FIELD_ID),
+ _scheme(Schema::UNKNOWN_FIELD_ID),
+ _host(Schema::UNKNOWN_FIELD_ID),
+ _port(Schema::UNKNOWN_FIELD_ID),
+ _path(Schema::UNKNOWN_FIELD_ID),
+ _query(Schema::UNKNOWN_FIELD_ID),
+ _fragment(Schema::UNKNOWN_FIELD_ID),
+ _hostname(Schema::UNKNOWN_FIELD_ID)
+{
+}
+
+
+bool
+DocTypeBuilder::UriField::valid(const Schema &schema,
+ uint32_t fieldId,
+ const Schema::CollectionType &collectionType)
+{
+ if (fieldId == Schema::UNKNOWN_FIELD_ID)
+ return false;
+ const Schema::IndexField &field = schema.getIndexField(fieldId);
+ if (field.getDataType() != Schema::STRING)
+ return false;
+ if (field.getCollectionType() != collectionType)
+ return false;
+ return true;
+}
+
+
+bool
+DocTypeBuilder::UriField::broken(const Schema &schema,
+ const Schema::CollectionType &
+ collectionType) const
+{
+ return !valid(schema, _all, collectionType) &&
+ valid(schema, _scheme, collectionType) &&
+ valid(schema, _host, collectionType) &&
+ valid(schema, _port, collectionType) &&
+ valid(schema, _path, collectionType) &&
+ valid(schema, _query, collectionType) &&
+ valid(schema, _fragment, collectionType);
+}
+
+bool
+DocTypeBuilder::UriField::valid(const Schema &schema,
+ const Schema::CollectionType &
+ collectionType) const
+{
+ return valid(schema, _all, collectionType) &&
+ valid(schema, _scheme, collectionType) &&
+ valid(schema, _host, collectionType) &&
+ valid(schema, _port, collectionType) &&
+ valid(schema, _path, collectionType) &&
+ valid(schema, _query, collectionType) &&
+ valid(schema, _fragment, collectionType);
+}
+
+
+void
+DocTypeBuilder::UriField::setup(const Schema &schema,
+ const vespalib::string &field)
+{
+ _all = schema.getIndexFieldId(field);
+ _scheme = schema.getIndexFieldId(field + ".scheme");
+ _host = schema.getIndexFieldId(field + ".host");
+ _port = schema.getIndexFieldId(field + ".port");
+ _path = schema.getIndexFieldId(field + ".path");
+ _query = schema.getIndexFieldId(field + ".query");
+ _fragment = schema.getIndexFieldId(field + ".fragment");
+ _hostname = schema.getIndexFieldId(field + ".hostname");
+}
+
+
+void
+DocTypeBuilder::UriField::markUsed(UsedFieldsMap &usedFields,
+ uint32_t field)
+{
+ if (field == Schema::UNKNOWN_FIELD_ID)
+ return;
+ assert(usedFields.size() > field);
+ usedFields[field] = true;
+}
+
+
+void
+DocTypeBuilder::UriField::markUsed(UsedFieldsMap &usedFields) const
+{
+ markUsed(usedFields, _all);
+ markUsed(usedFields, _scheme);
+ markUsed(usedFields, _host);
+ markUsed(usedFields, _port);
+ markUsed(usedFields, _path);
+ markUsed(usedFields, _query);
+ markUsed(usedFields, _fragment);
+ markUsed(usedFields, _hostname);
+}
+
+
+
+DocTypeBuilder::SchemaIndexFields::SchemaIndexFields(void)
+ : _textFields(),
+ _uriFields()
+{
+}
+
+
+void
+DocTypeBuilder::SchemaIndexFields::setup(const Schema &schema)
+{
+ uint32_t numIndexFields = schema.getNumIndexFields();
+ UsedFieldsMap usedFields;
+ usedFields.resize(numIndexFields);
+
+ // Detect all URI fields (flattened structs).
+ for (uint32_t fieldId = 0; fieldId < numIndexFields; ++fieldId) {
+ const Schema::IndexField &field = schema.getIndexField(fieldId);
+ const vespalib::string &name = field.getName();
+ size_t dotPos = name.find('.');
+ if (dotPos != vespalib::string::npos) {
+ const vespalib::string suffix = name.substr(dotPos + 1);
+ if (suffix == "scheme") {
+ const vespalib::string shortName = name.substr(0, dotPos);
+ UriField uriField;
+ uriField.setup(schema, shortName);
+ if (uriField.valid(schema, field.getCollectionType())) {
+ _uriFields.push_back(uriField);
+ uriField.markUsed(usedFields);
+ } else if (uriField.broken(schema,
+ field.getCollectionType())) {
+ // Broken removal of unused URI fields.
+ uriField.markUsed(usedFields);
+ }
+ }
+ }
+ }
+
+ // Non-URI fields are currently supposed to be text fields.
+ for (uint32_t fieldId = 0; fieldId < numIndexFields; ++fieldId) {
+ if (usedFields[fieldId])
+ continue;
+ const Schema::IndexField &field = schema.getIndexField(fieldId);
+ switch (field.getDataType()) {
+ case Schema::STRING:
+ _textFields.push_back(fieldId);
+ break;
+ default:
+ ;
+ }
+ }
+}
+
+DocTypeBuilder::DocTypeBuilder(const Schema &schema)
+ : _schema(schema),
+ _iFields()
+{
+ _iFields.setup(schema);
+}
+
+namespace {
+using namespace document::config_builder;
+TypeOrId makeCollection(TypeOrId datatype,
+ Schema::CollectionType collection_type) {
+ switch (collection_type) {
+ case Schema::ARRAY:
+ return Array(datatype);
+ case Schema::WEIGHTEDSET:
+ // TODO: consider using array of struct<primitive,int32> to keep order
+ return Wset(datatype);
+ default:
+ return datatype;
+ }
+}
+
+struct TypeCache {
+ std::map<std::pair<int, int>, TypeOrId> types;
+
+ TypeOrId getType(TypeOrId datatype, Schema::CollectionType c_type) {
+ TypeOrId type = makeCollection(datatype, c_type);
+ std::pair<int, int> key = std::make_pair(datatype.id, c_type);
+ if (types.find(key) == types.end()) {
+ types.insert(std::make_pair(key, type));
+ }
+ return types.find(key)->second;
+ }
+};
+} // namespace
+
+document::DocumenttypesConfig DocTypeBuilder::makeConfig() const {
+ using namespace document::config_builder;
+ TypeCache type_cache;
+
+ typedef std::set<vespalib::string> UsedFields;
+ UsedFields usedFields;
+
+ Struct header_struct("searchdocument.header");
+ header_struct.setId(-1505212454);
+
+ int32_t field_id = 0;
+ for (size_t i = 0; i < _iFields._textFields.size(); ++i) {
+ const Schema::IndexField &field =
+ _schema.getIndexField(_iFields._textFields[i]);
+
+ // only handles string fields for now
+ assert(field.getDataType() == Schema::STRING);
+ header_struct.addField(field.getName(), type_cache.getType(
+ DataType::T_STRING, field.getCollectionType()));
+ header_struct.sstruct.field.back().id = field_id++;
+ usedFields.insert(field.getName());
+ }
+
+ const int32_t uri_type = document::UrlDataType::getInstance().getId();
+ for (size_t i = 0; i < _iFields._uriFields.size(); ++i) {
+ const Schema::IndexField &field =
+ _schema.getIndexField(_iFields._uriFields[i]._all);
+
+ // only handles string fields for now
+ assert(field.getDataType() == Schema::STRING);
+ header_struct.addField(field.getName(), type_cache.getType(
+ uri_type, field.getCollectionType()));
+ header_struct.sstruct.field.back().id = field_id++;
+ usedFields.insert(field.getName());
+ }
+
+ for (uint32_t i = 0; i < _schema.getNumAttributeFields(); ++i) {
+ const Schema::AttributeField &field = _schema.getAttributeField(i);
+ UsedFields::const_iterator usf = usedFields.find(field.getName());
+ if (usf != usedFields.end())
+ continue; // taken as index field
+
+ const DataType *primitiveType = convert(field.getDataType());
+ header_struct.addField(field.getName(), type_cache.getType(
+ primitiveType->getId(), field.getCollectionType()));
+ header_struct.sstruct.field.back().id = field_id++;
+ usedFields.insert(field.getName());
+ }
+
+ for (uint32_t i = 0; i < _schema.getNumSummaryFields(); ++i) {
+ const Schema::SummaryField &field = _schema.getSummaryField(i);
+ UsedFields::const_iterator usf = usedFields.find(field.getName());
+ if (usf != usedFields.end())
+ continue; // taken as index field or attribute field
+ const DataType *primitiveType(convert(field.getDataType()));
+ header_struct.addField(field.getName(), type_cache.getType(
+ primitiveType->getId(), field.getCollectionType()));
+ header_struct.sstruct.field.back().id = field_id++;
+ usedFields.insert(field.getName());
+ }
+
+ DocumenttypesConfigBuilderHelper builder;
+ builder.document(-645763131, "searchdocument",
+ header_struct, Struct("searchdocument.body"));
+ return builder.config();
+}
+
+document::DocumenttypesConfig
+DocTypeBuilder::makeConfig(const DocumentType &docType)
+{
+ typedef document::DocumenttypesConfigBuilder DTC;
+ DTC cfg;
+ { // document type
+ DTC::Documenttype dtype;
+ dtype.id = docType.getId();
+ dtype.name = docType.getName();
+ // TODO(vekterli): remove header/body config
+ dtype.headerstruct = docType.getFieldsType().getId();
+ dtype.bodystruct = docType.getFieldsType().getId();
+ cfg.documenttype.push_back(dtype);
+ }
+ insertStructType(cfg.documenttype[0], docType.getFieldsType());
+ return cfg;
+}
+
+
+} // namespace search::index
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/index/doctypebuilder.h b/searchlib/src/vespa/searchlib/index/doctypebuilder.h
new file mode 100644
index 00000000000..1781d772122
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/doctypebuilder.h
@@ -0,0 +1,95 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/document/config/config-documenttypes.h>
+#include <vespa/document/datatype/datatypes.h>
+#include <vespa/document/fieldvalue/fieldvalues.h>
+#include <vespa/vespalib/util/exception.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchcommon/common/schema.h>
+
+namespace search
+{
+
+namespace index
+{
+
+/**
+ * Builder for the indexingdocument document type based on an index schema.
+ **/
+class DocTypeBuilder {
+public:
+ typedef std::vector<bool> UsedFieldsMap;
+ typedef std::vector<uint32_t> FieldIdVector;
+
+ class UriField
+ {
+ public:
+ uint32_t _all;
+ uint32_t _scheme;
+ uint32_t _host;
+ uint32_t _port;
+ uint32_t _path;
+ uint32_t _query;
+ uint32_t _fragment;
+ uint32_t _hostname;
+
+ private:
+ static void
+ markUsed(UsedFieldsMap &usedFields,
+ uint32_t field);
+
+ static bool
+ valid(const Schema &schema,
+ uint32_t fieldId,
+ const Schema::CollectionType &collectionType);
+
+ public:
+ UriField(void);
+
+ bool
+ broken(const Schema &schema,
+ const Schema::CollectionType &collectionType) const;
+
+ bool
+ valid(const Schema &schema,
+ const Schema::CollectionType &collectionType) const;
+
+ void
+ setup(const Schema &schema,
+ const vespalib::string &field);
+
+ void
+ markUsed(UsedFieldsMap &usedFields) const;
+ };
+
+ typedef std::vector<UriField> UriFieldIdVector;
+
+ class SchemaIndexFields
+ {
+ public:
+ FieldIdVector _textFields;
+ UriFieldIdVector _uriFields;
+
+ SchemaIndexFields(void);
+
+ void
+ setup(const Schema &schema);
+ };
+
+private:
+ const Schema &_schema;
+ SchemaIndexFields _iFields;
+
+public:
+ DocTypeBuilder(const Schema & schema);
+ document::DocumenttypesConfig makeConfig() const;
+
+ static document::DocumenttypesConfig
+ makeConfig(const document::DocumentType &docType);
+};
+
+} // namespace search::index
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/index/dummyfileheadercontext.cpp b/searchlib/src/vespa/searchlib/index/dummyfileheadercontext.cpp
new file mode 100644
index 00000000000..2228a19a1e7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/dummyfileheadercontext.cpp
@@ -0,0 +1,70 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".index.dummyfileheadercontext");
+#include "dummyfileheadercontext.h"
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/searchlib/util/fileheadertk.h>
+
+namespace search
+{
+
+namespace index
+{
+
+vespalib::string DummyFileHeaderContext::_creator;
+
+DummyFileHeaderContext::DummyFileHeaderContext(void)
+ : common::FileHeaderContext(),
+ _disableFileName(false),
+ _hostName(),
+ _pid(getpid())
+{
+ _hostName = FastOS_Socket::getHostName();
+ assert(!_hostName.empty());
+}
+
+
+DummyFileHeaderContext::~DummyFileHeaderContext(void)
+{
+}
+
+
+void
+DummyFileHeaderContext::disableFileName(void)
+{
+ _disableFileName = true;
+}
+
+
+void
+DummyFileHeaderContext::addTags(vespalib::GenericHeader &header,
+ const vespalib::string &name) const
+{
+ typedef vespalib::GenericHeader::Tag Tag;
+
+ FileHeaderTk::addVersionTags(header);
+ if (!_disableFileName) {
+ header.putTag(Tag("fileName", name));
+ addCreateAndFreezeTime(header);
+ }
+ header.putTag(Tag("hostName", _hostName));
+ header.putTag(Tag("pid", _pid));
+ if (!_creator.empty()) {
+ header.putTag(Tag("creator", _creator));
+ }
+ header.putTag(Tag("DummyFileHeaderContext", "enabled"));
+}
+
+
+void
+DummyFileHeaderContext::setCreator(const vespalib::string &creator)
+{
+ _creator = creator;
+}
+
+
+} // namespace index
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/index/dummyfileheadercontext.h b/searchlib/src/vespa/searchlib/index/dummyfileheadercontext.h
new file mode 100644
index 00000000000..c7270dcf61e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/dummyfileheadercontext.h
@@ -0,0 +1,47 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/common/fileheadercontext.h>
+
+namespace vespalib
+{
+
+class GenericHeader;
+
+}
+
+namespace search
+{
+
+namespace index
+{
+
+class DummyFileHeaderContext : public common::FileHeaderContext
+{
+ bool _disableFileName;
+ vespalib::string _hostName;
+ pid_t _pid;
+
+ static vespalib::string _creator;
+public:
+ DummyFileHeaderContext(void);
+
+ virtual
+ ~DummyFileHeaderContext(void);
+
+ void
+ disableFileName(void);
+
+ virtual void
+ addTags(vespalib::GenericHeader &header,
+ const vespalib::string &name) const;
+
+ static void
+ setCreator(const vespalib::string &creator);
+};
+
+
+} // namespace index
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/index/indexbuilder.cpp b/searchlib/src/vespa/searchlib/index/indexbuilder.cpp
new file mode 100644
index 00000000000..aaefb0e4ff8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/indexbuilder.cpp
@@ -0,0 +1,28 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".index.indexbuilder");
+#include "indexbuilder.h"
+
+namespace search
+{
+
+namespace index
+{
+
+
+IndexBuilder::IndexBuilder(const Schema &schema)
+ : _schema(schema)
+{
+}
+
+
+IndexBuilder::~IndexBuilder(void)
+{
+}
+
+
+} // namespace index
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/index/indexbuilder.h b/searchlib/src/vespa/searchlib/index/indexbuilder.h
new file mode 100644
index 00000000000..f4688167b81
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/indexbuilder.h
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search
+{
+
+namespace index
+{
+
+class Schema;
+class WordDocElementWordPosFeatures;
+
+class IndexBuilder
+{
+protected:
+ const Schema &_schema;
+
+public:
+ IndexBuilder(const Schema &schema);
+
+ virtual
+ ~IndexBuilder(void);
+
+ virtual void
+ startWord(const vespalib::stringref & word) = 0;
+
+ virtual void
+ endWord(void) = 0;
+
+ virtual void
+ startDocument(uint32_t docId) = 0;
+
+ virtual void
+ endDocument(void) = 0;
+
+ virtual void
+ startField(uint32_t fieldId) = 0;
+
+ virtual void
+ endField(void) = 0;
+
+ virtual void
+ startElement(uint32_t elementId, int32_t weight, uint32_t elementLen) = 0;
+
+ virtual void
+ endElement(void) = 0;
+
+ virtual void
+ addOcc(const WordDocElementWordPosFeatures &features) = 0;
+
+ // TODO: methods for attribute vectors.
+
+ // TODO: methods for document summary.
+};
+
+
+} // namespace index
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/index/olddictionaryfile.cpp b/searchlib/src/vespa/searchlib/index/olddictionaryfile.cpp
new file mode 100644
index 00000000000..7e194e3ead4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/olddictionaryfile.cpp
@@ -0,0 +1,115 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".index.dictionaryfile");
+#include "olddictionaryfile.h"
+
+namespace search
+{
+
+namespace index
+{
+
+
+OldDictionaryIndexMapping::OldDictionaryIndexMapping(void)
+ : _fieldIdToLocalId(),
+ _indexNames(),
+ _indexIds(),
+ _washedIndexIds()
+{
+}
+
+
+OldDictionaryIndexMapping::~OldDictionaryIndexMapping(void)
+{
+}
+
+
+void
+OldDictionaryIndexMapping::
+setup(const Schema &schema,
+ const std::vector<vespalib::string> &fieldNames)
+{
+ _indexIds.clear();
+ _washedIndexIds.clear();
+ _indexNames.clear();
+
+ for (std::vector<vespalib::string>::const_iterator
+ i = fieldNames.begin(), ie = fieldNames.end();
+ i != ie;
+ ++i) {
+ uint32_t fieldId = schema.getIndexFieldId(*i);
+ _indexIds.push_back(fieldId);
+ if (fieldId != Schema::UNKNOWN_FIELD_ID)
+ _washedIndexIds.push_back(fieldId);
+ _indexNames.push_back(*i);
+ }
+ setupHelper(schema);
+}
+
+
+void
+OldDictionaryIndexMapping::setup(const Schema &schema,
+ const std::vector<uint32_t> &fields)
+{
+ _indexIds.clear();
+ _washedIndexIds.clear();
+ _indexNames.clear();
+
+ uint32_t fieldId = 0;
+ vespalib::string fname;
+ for (std::vector<uint32_t>::const_iterator
+ i = fields.begin(), ie = fields.end();
+ i != ie;
+ ++i, ++fieldId)
+ {
+ assert(*i != Schema::UNKNOWN_FIELD_ID);
+ assert(*i < schema.getNumIndexFields());
+ fname = schema.getIndexField(*i).getName();
+ _indexIds.push_back(*i);
+ _washedIndexIds.push_back(*i);
+ _indexNames.push_back(fname);
+ }
+ setupHelper(schema);
+}
+
+
+void
+OldDictionaryIndexMapping::setupHelper(const Schema &schema)
+{
+ // Create mapping to local ids
+
+ _fieldIdToLocalId.clear();
+ uint32_t localId = 0;
+ vespalib::string fname;
+ for (std::vector<uint32_t>::const_iterator
+ i = _indexIds.begin(), ie = _indexIds.end();
+ i != ie;
+ ++i, ++localId)
+ {
+ if (*i == Schema::UNKNOWN_FIELD_ID)
+ continue; // Field on file not in current schema
+ assert(*i < schema.getNumIndexFields());
+ (void) schema;
+ while (_fieldIdToLocalId.size() <= *i)
+ _fieldIdToLocalId.push_back(noLocalId());
+ assert(_fieldIdToLocalId[*i] == noLocalId());
+ _fieldIdToLocalId[*i] = localId;
+ }
+}
+
+
+OldDictionaryFileSeqRead::~OldDictionaryFileSeqRead(void)
+{
+}
+
+
+OldDictionaryFileSeqWrite::~OldDictionaryFileSeqWrite(void)
+{
+}
+
+
+} // namespace index
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/index/olddictionaryfile.h b/searchlib/src/vespa/searchlib/index/olddictionaryfile.h
new file mode 100644
index 00000000000..cd7d2171b61
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/olddictionaryfile.h
@@ -0,0 +1,208 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "postinglistcounts.h"
+#include "postinglisthandle.h"
+#include <vespa/searchcommon/common/schema.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include <map>
+#include <vector>
+#include <string>
+#include <limits>
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+namespace index
+{
+
+class OldDictionaryIndexMapping
+{
+private:
+ std::vector<uint32_t> _fieldIdToLocalId;
+ std::vector<vespalib::string> _indexNames;
+ std::vector<uint32_t> _indexIds;
+ std::vector<uint32_t> _washedIndexIds;
+
+ void
+ setupHelper(const Schema &schema);
+
+public:
+ OldDictionaryIndexMapping(void);
+
+ ~OldDictionaryIndexMapping(void);
+
+ static uint32_t
+ noLocalId(void)
+ {
+ return std::numeric_limits<uint32_t>::max();
+ }
+
+ uint32_t
+ getLocalId(uint32_t dfid) const
+ {
+ if (dfid < _fieldIdToLocalId.size())
+ return _fieldIdToLocalId[dfid];
+ else
+ return noLocalId();
+ }
+
+ uint32_t
+ getExternalId(uint32_t localId) const
+ {
+ return _indexIds[localId];
+ }
+
+ void
+ setup(const Schema &schema,
+ const std::vector<vespalib::string> &indexNames);
+
+ void
+ setup(const Schema &schema,
+ const std::vector<uint32_t> &indexes);
+
+ const std::vector<uint32_t> &
+ getIndexIds(void) const
+ {
+ return _indexIds;
+ }
+
+ const std::vector<uint32_t> &
+ getWashedIndexIds(void) const
+ {
+ return _washedIndexIds;
+ }
+
+ const std::vector<vespalib::string> &
+ getIndexNames(void) const
+ {
+ return _indexNames;
+ }
+
+ uint32_t
+ getNumIndexes(void) const
+ {
+ return _indexIds.size();
+ }
+};
+
+
+/**
+ * Interface for dictionary file containing words and counts for words.
+ *
+ * This is "at" schema level.
+ */
+class OldDictionaryFileSeqRead
+{
+public:
+ OldDictionaryFileSeqRead(void)
+ {
+ }
+
+ virtual
+ ~OldDictionaryFileSeqRead(void);
+
+ /**
+ * Read word and counts. Only nonzero counts are returned. If at
+ * end of dictionary then noWordNumHigh() is returned as word number.
+ */
+ virtual void
+ readWord(vespalib::string &word,
+ uint64_t &wordNum,
+ std::vector<uint32_t> &indexes,
+ std::vector<PostingListCounts> &counts) = 0;
+
+ /**
+ * Open dictionary file for sequential read. The supplied schema
+ * decides what existing indexes are visible (i.e. indexes in dictionary
+ * but not in schema are hidden). A dictionary might have no visible
+ * indexes.
+ */
+ virtual bool
+ open(const vespalib::string &name, const Schema &schema,
+ const TuneFileSeqRead &tuneFileRead) = 0;
+
+ /**
+ * Close dictionary file.
+ */
+ virtual bool
+ close(void) = 0;
+
+ /*
+ * Get visible indexes available in dictionary.
+ */
+ virtual void
+ getIndexes(std::vector<uint32_t> &indexes) = 0;
+
+ static uint64_t
+ noWordNum(void)
+ {
+ return 0u;
+ }
+
+ static uint64_t
+ noWordNumHigh(void)
+ {
+ return std::numeric_limits<uint64_t>::max();
+ }
+};
+
+/**
+ * Interface for dictionary file containing words and count for words.
+ *
+ * This is "at" schema level.
+ *
+ * The file should contain the set of field names for which the dictionary
+ * is valid, to simplify handling of schema changes.
+ */
+class OldDictionaryFileSeqWrite
+{
+protected:
+public:
+ OldDictionaryFileSeqWrite(void)
+ {
+ }
+
+ virtual
+ ~OldDictionaryFileSeqWrite(void);
+
+ /**
+ * Write word and counts. Only nonzero counts should be supplied.
+ */
+ virtual void
+ writeWord(const vespalib::stringref &word,
+ const std::vector<uint32_t> &indexes,
+ const std::vector<PostingListCounts> &counts) = 0;
+
+ /**
+ * Open dictionary file for sequential write. The field with most
+ * words should be first for optimal compression.
+ */
+ virtual bool
+ open(const vespalib::string &name,
+ uint32_t numWords,
+ uint32_t chunkSize,
+ const std::vector<uint32_t> &indexes,
+ const Schema &schema,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const common::FileHeaderContext &fileHeaderContext) = 0;
+
+ /**
+ * Close dictionary file.
+ */
+ virtual bool
+ close(void) = 0;
+};
+
+
+} // namespace index
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp
new file mode 100644
index 00000000000..78c7d28905b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp
@@ -0,0 +1,60 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".index.postinglistcountfile");
+#include "postinglistcountfile.h"
+
+namespace search
+{
+
+namespace index
+{
+
+PostingListCountFileSeqRead::PostingListCountFileSeqRead(void)
+{
+}
+
+
+PostingListCountFileSeqRead::~PostingListCountFileSeqRead(void)
+{
+}
+
+
+void
+PostingListCountFileSeqRead::
+getParams(PostingListParams &params)
+{
+ params.clear();
+}
+
+
+PostingListCountFileSeqWrite::PostingListCountFileSeqWrite(void)
+{
+}
+
+
+PostingListCountFileSeqWrite::~PostingListCountFileSeqWrite(void)
+{
+}
+
+
+void
+PostingListCountFileSeqWrite::
+setParams(const PostingListParams &params)
+{
+ (void) params;
+}
+
+
+void
+PostingListCountFileSeqWrite::
+getParams(PostingListParams &params)
+{
+ params.clear();
+}
+
+
+} // namespace index
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/index/postinglistcountfile.h b/searchlib/src/vespa/searchlib/index/postinglistcountfile.h
new file mode 100644
index 00000000000..f9b1c66bf9c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/postinglistcountfile.h
@@ -0,0 +1,140 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/index/postinglistparams.h>
+#include "postinglistcounts.h"
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include <string>
+
+namespace vespalib
+{
+
+class nbostream;
+
+}
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+namespace index
+{
+
+class PostingListCounts;
+class PostingListHandle;
+
+/**
+ * Interface for count files describing where in a posting list file
+ * the various words are located. It is merged at index time with a
+ * text-only dictionary to produce a binary dictionary optimized for
+ * random access used at search time.
+ *
+ * TODO: Might want to allow semi-random access for prefix searches,
+ * allowing for less data in posting list files being duplicated from
+ * the count file.
+ */
+class PostingListCountFileSeqRead
+{
+public:
+ PostingListCountFileSeqRead(void);
+
+ virtual
+ ~PostingListCountFileSeqRead(void);
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt. Implies
+ * flush from memory to disk, and possibly also sync to permanent
+ * storage media.
+ */
+ virtual void
+ checkPointWrite(vespalib::nbostream &out) = 0;
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ */
+ virtual void
+ checkPointRead(vespalib::nbostream &in) = 0;
+
+ /**
+ * Open posting list count file for sequential read.
+ */
+ virtual bool
+ open(const vespalib::string &name,
+ const TuneFileSeqRead &tuneFileRead) = 0;
+
+ /**
+ * Close posting list count file.
+ */
+ virtual bool
+ close(void) = 0;
+
+ /*
+ * Get current parameters.
+ */
+ virtual void
+ getParams(PostingListParams &params);
+};
+
+
+class PostingListCountFileSeqWrite
+{
+public:
+ PostingListCountFileSeqWrite(void);
+
+ virtual
+ ~PostingListCountFileSeqWrite(void);
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt. Implies
+ * flush from memory to disk, and possibly also sync to permanent
+ * storage media.
+ */
+ virtual void
+ checkPointWrite(vespalib::nbostream &out) = 0;
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ */
+ virtual void
+ checkPointRead(vespalib::nbostream &in) = 0;
+
+ /**
+ * Open posting list count file for sequential write.
+ */
+ virtual bool
+ open(const vespalib::string &name,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const common::FileHeaderContext &fileHeaderContext) = 0;
+
+ /**
+ * Close posting list count file.
+ */
+ virtual bool
+ close(void) = 0;
+
+ /*
+ * Set parameters.
+ */
+ virtual void
+ setParams(const PostingListParams &params);
+
+ /*
+ * Get current parameters.
+ */
+ virtual void
+ getParams(PostingListParams &params);
+};
+
+
+} // namespace index
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/index/postinglistcounts.cpp b/searchlib/src/vespa/searchlib/index/postinglistcounts.cpp
new file mode 100644
index 00000000000..8268b1e5c64
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/postinglistcounts.cpp
@@ -0,0 +1,90 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".index.postinglistcounts");
+#include <vespa/vespalib/objects/nbostream.h>
+#include "postinglistcounts.h"
+
+namespace search
+{
+
+namespace index
+{
+
+void swap(PostingListCounts & a, PostingListCounts & b)
+{
+ a.swap(b);
+}
+
+using vespalib::nbostream;
+
+nbostream &
+operator<<(nbostream &out, const PostingListCounts::Segment &segment)
+{
+ out << segment._bitLength << segment._numDocs << segment._lastDoc;
+ return out;
+}
+
+
+nbostream &
+operator>>(nbostream &in, PostingListCounts::Segment &segment)
+{
+ in >> segment._bitLength >> segment._numDocs >> segment._lastDoc;
+ return in;
+}
+
+
+nbostream &
+operator<<(nbostream &out, const PostingListCounts &counts)
+{
+ out << counts._numDocs << counts._bitLength;
+ size_t numSegments = counts._segments.size();
+ out << numSegments;
+ for (size_t seg = 0; seg < numSegments; ++seg) {
+ out << counts._segments[seg];
+ }
+ return out;
+}
+
+
+nbostream &
+operator>>(nbostream &in, PostingListCounts &counts)
+{
+ in >> counts._numDocs >> counts._bitLength;
+ size_t numSegments = 0;
+ in >> numSegments;
+ counts._segments.reserve(numSegments);
+ counts._segments.clear();
+ for (size_t seg = 0; seg < numSegments; ++seg) {
+ PostingListCounts::Segment segment;
+ in >> segment;
+ counts._segments.push_back(segment);
+ }
+ return in;
+}
+
+
+nbostream &
+operator<<(nbostream &out, const PostingListOffsetAndCounts &offsetAndCounts)
+{
+ out << offsetAndCounts._offset;
+ out << offsetAndCounts._accNumDocs;
+ out << offsetAndCounts._counts;
+ return out;
+}
+
+
+nbostream &
+operator>>(nbostream &in, PostingListOffsetAndCounts &offsetAndCounts)
+{
+ in >> offsetAndCounts._offset;
+ in >> offsetAndCounts._accNumDocs;
+ in >> offsetAndCounts._counts;
+ return in;
+}
+
+
+} // namespace index
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/index/postinglistcounts.h b/searchlib/src/vespa/searchlib/index/postinglistcounts.h
new file mode 100644
index 00000000000..564955d8c6b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/postinglistcounts.h
@@ -0,0 +1,144 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vector>
+
+namespace vespalib
+{
+
+class nbostream;
+
+}
+
+namespace search
+{
+
+namespace index
+{
+
+/**
+ * Basic class for holding the result of a dictionary lookup result
+ * for a word, to optimize query tree node child order and know from
+ * where in the posting list files to read data. A posting list with
+ * 64 or fewer documents does not have skip info.
+ */
+class PostingListCounts
+{
+public:
+ /*
+ * Nested class for describing a segment of a large posting list.
+ * Very large posting lists are divided into segments, to limit
+ * memory consumption (for buffering) and can be viewed as a
+ * high level skip list stored in the dictionary. If the posting
+ * list for a word is less than 256 kB then it is not split into
+ * segments.
+ */
+ class Segment
+ {
+ public:
+ uint64_t _bitLength; // Length of segment
+ uint32_t _numDocs; // Number of documents in segment
+ uint32_t _lastDoc; // Last document id in segment
+
+ Segment(void)
+ : _bitLength(0),
+ _numDocs(0),
+ _lastDoc(0)
+ {
+ }
+
+ bool
+ operator==(const Segment &rhs) const
+ {
+ return (_bitLength == rhs._bitLength &&
+ _numDocs == rhs._numDocs &&
+ _lastDoc == rhs._lastDoc);
+ }
+
+ friend vespalib::nbostream &
+ operator<<(vespalib::nbostream &out, const Segment &segment);
+
+ friend vespalib::nbostream &
+ operator>>(vespalib::nbostream &in, Segment &segment);
+ };
+
+ /**
+ * Counts might span multiple posting lists (i.e. multiple words
+ * for prefix search), numDocs is then sum of documents for each posting
+ * list, which segment info is absent.
+ */
+ uint64_t _numDocs; // Number of documents for word(s)
+ uint64_t _bitLength; // Length of postings for word(s)
+
+ /**
+ * Very large posting lists with skip info are split into multiple
+ * segments. If there are more than one segments for a word then the
+ * last segment has skip info even if it has fewer than 64 documents.
+ */
+ std::vector<Segment> _segments;
+
+ PostingListCounts(void)
+ : _numDocs(0),
+ _bitLength(0),
+ _segments()
+ {
+ }
+ void swap(PostingListCounts & rhs) {
+ std::swap(_numDocs, rhs._numDocs);
+ std::swap(_bitLength, rhs._bitLength);
+ std::swap(_segments, rhs._segments);
+ }
+
+ void
+ clear(void)
+ {
+ _bitLength = 0;
+ _numDocs = 0;
+ _segments.clear();
+ }
+
+ bool
+ operator==(const PostingListCounts &rhs) const
+ {
+ return (_numDocs == rhs._numDocs &&
+ _bitLength == rhs._bitLength &&
+ _segments == rhs._segments);
+ }
+
+ friend vespalib::nbostream &
+ operator<<(vespalib::nbostream &out, const PostingListCounts &counts);
+
+ friend vespalib::nbostream &
+ operator>>(vespalib::nbostream &in, PostingListCounts &counts);
+};
+
+void swap(PostingListCounts & a, PostingListCounts & b);
+
+
+class PostingListOffsetAndCounts
+{
+public:
+ uint64_t _offset;
+ uint64_t _accNumDocs; // Used by prefix search for now.
+ PostingListCounts _counts;
+
+ PostingListOffsetAndCounts(void)
+ : _offset(0),
+ _accNumDocs(0u),
+ _counts()
+ {
+ }
+
+ friend vespalib::nbostream &
+ operator<<(vespalib::nbostream &out,
+ const PostingListOffsetAndCounts &offsetAndCounts);
+
+ friend vespalib::nbostream &
+ operator>>(vespalib::nbostream &in,
+ PostingListOffsetAndCounts &offsetAndCounts);
+};
+
+} // namespace index
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp
new file mode 100644
index 00000000000..677ca101d88
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp
@@ -0,0 +1,170 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".index.postinglistfile");
+#include "postinglistfile.h"
+
+namespace search
+{
+
+namespace index
+{
+
+PostingListFileSeqRead::PostingListFileSeqRead(void)
+ : _counts(),
+ _residueDocs(0)
+{
+}
+
+
+PostingListFileSeqRead::~PostingListFileSeqRead(void)
+{
+}
+
+
+void
+PostingListFileSeqRead::
+getParams(PostingListParams &params)
+{
+ params.clear();
+}
+
+
+void
+PostingListFileSeqRead::
+setFeatureParams(const PostingListParams &params)
+{
+ (void) params;
+}
+
+
+void
+PostingListFileSeqRead::
+getFeatureParams(PostingListParams &params)
+{
+ params.clear();
+}
+
+
+PostingListFileSeqWrite::PostingListFileSeqWrite(void)
+ : _counts()
+{
+}
+
+
+PostingListFileSeqWrite::~PostingListFileSeqWrite(void)
+{
+}
+
+
+void
+PostingListFileSeqWrite::
+setParams(const PostingListParams &params)
+{
+ (void) params;
+}
+
+
+void
+PostingListFileSeqWrite::
+getParams(PostingListParams &params)
+{
+ params.clear();
+}
+
+
+void
+PostingListFileSeqWrite::
+setFeatureParams(const PostingListParams &params)
+{
+ (void) params;
+}
+
+
+void
+PostingListFileSeqWrite::
+getFeatureParams(PostingListParams &params)
+{
+ params.clear();
+}
+
+
+PostingListFileRandRead::
+PostingListFileRandRead(void)
+ : _memoryMapped(false)
+{
+}
+
+
+PostingListFileRandRead::~PostingListFileRandRead(void)
+{
+}
+
+
+void
+PostingListFileRandRead::afterOpen(FastOS_FileInterface &file)
+{
+ _memoryMapped = file.MemoryMapPtr(0) != NULL;
+}
+
+
+PostingListFileRandReadPassThrough::
+PostingListFileRandReadPassThrough(PostingListFileRandRead *lower,
+ bool ownLower)
+ : _lower(lower),
+ _ownLower(ownLower)
+{
+}
+
+
+PostingListFileRandReadPassThrough::~PostingListFileRandReadPassThrough(void)
+{
+ if (_ownLower)
+ delete _lower;
+}
+
+
+search::queryeval::SearchIterator *
+PostingListFileRandReadPassThrough::
+createIterator(const PostingListCounts &counts,
+ const PostingListHandle &handle,
+ const search::fef::TermFieldMatchDataArray &matchData,
+ bool usebitVector) const
+{
+ return _lower->createIterator(counts, handle, matchData, usebitVector);
+}
+
+
+void
+PostingListFileRandReadPassThrough::
+readPostingList(const PostingListCounts &counts,
+ uint32_t firstSegment,
+ uint32_t numSegments,
+ PostingListHandle &handle)
+{
+ _lower->readPostingList(counts, firstSegment, numSegments,
+ handle);
+}
+
+
+bool
+PostingListFileRandReadPassThrough::open(const vespalib::string &name,
+ const TuneFileRandRead &tuneFileRead)
+{
+ bool ret = _lower->open(name, tuneFileRead);
+ _memoryMapped = _lower->getMemoryMapped();
+ return ret;
+}
+
+
+bool
+PostingListFileRandReadPassThrough::close(void)
+{
+ return _lower->close();
+}
+
+
+} // namespace index
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.h b/searchlib/src/vespa/searchlib/index/postinglistfile.h
new file mode 100644
index 00000000000..1518948cc6f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/postinglistfile.h
@@ -0,0 +1,344 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/index/postinglistcounts.h>
+#include <vespa/searchlib/index/postinglisthandle.h>
+#include <vespa/searchlib/index/postinglistparams.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include <map>
+#include <string>
+
+namespace vespalib
+{
+
+class nbostream;
+
+}
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+namespace index
+{
+
+
+class DocIdAndFeatures;
+
+
+/**
+ * Interface for posting list files containing document ids and features
+ * for words.
+ */
+class PostingListFileSeqRead
+{
+protected:
+ PostingListCounts _counts;
+ unsigned int _residueDocs; // Docids left to read for word
+public:
+ PostingListFileSeqRead(void);
+
+ virtual
+ ~PostingListFileSeqRead(void);
+
+ /**
+ * Read document id and features.
+ */
+ virtual void
+ readDocIdAndFeatures(DocIdAndFeatures &features) = 0;
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt. Implies
+ * flush from memory to disk, and possibly also sync to permanent
+ * storage media.
+ */
+ virtual void
+ checkPointWrite(vespalib::nbostream &out) = 0;
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ */
+ virtual void
+ checkPointRead(vespalib::nbostream &in) = 0;
+
+ /**
+ * Read counts for a word.
+ */
+ virtual void
+ readCounts(const PostingListCounts &counts) = 0;
+
+ /**
+ * Open posting list file for sequential read.
+ */
+ virtual bool
+ open(const vespalib::string &name,
+ const TuneFileSeqRead &tuneFileRead) = 0;
+
+ /**
+ * Close posting list file.
+ */
+ virtual bool
+ close(void) = 0;
+
+ /*
+ * Get current parameters.
+ */
+ virtual void
+ getParams(PostingListParams &params);
+
+ /*
+ * Set (word, docid) feature parameters.
+ *
+ * Typically can only enable or disable cooked features.
+ */
+ virtual void
+ setFeatureParams(const PostingListParams &params);
+
+ /*
+ * Get current (word, docid) feature parameters.
+ */
+ virtual void
+ getFeatureParams(PostingListParams &params);
+
+ // Methods used when generating posting list for common word pairs.
+
+ /*
+ * Get current posting offset, measured in bits. First posting list
+ * starts at 0, i.e. file header is not accounted for here.
+ *
+ * @return current posting offset, measured in bits.
+ */
+ virtual uint64_t
+ getCurrentPostingOffset(void) const = 0;
+
+ /**
+ * Set current posting offset, measured in bits. First posting
+ * list starts at 0, i.e. file header is not accounted for here.
+ *
+ * @param Offset start of posting lists for word pair.
+ * @param endOffset end of posting lists for word pair.
+ * @param readAheadOffset end of posting list for either this or a
+ * later word pair, depending on disk seek cost.
+ */
+ virtual void
+ setPostingOffset(uint64_t offset,
+ uint64_t endOffset,
+ uint64_t readAheadOffset) = 0;
+
+ /**
+ * Get counts read by last readCounts().
+ */
+ const PostingListCounts &
+ getCounts(void) const
+ {
+ return _counts;
+ }
+
+ PostingListCounts &
+ getCounts(void)
+ {
+ return _counts;
+ }
+};
+
+/**
+ * Interface for posting list files containing document ids and features
+ * for words.
+ */
+class PostingListFileSeqWrite
+{
+protected:
+ PostingListCounts _counts;
+public:
+ PostingListFileSeqWrite(void);
+
+ virtual
+ ~PostingListFileSeqWrite(void);
+
+ /**
+ * Write document id and features.
+ */
+ virtual void
+ writeDocIdAndFeatures(const DocIdAndFeatures &features) = 0;
+
+ /**
+ * Flush word (during write) after it is complete to buffers, i.e.
+ * prepare for next word, but not for application crash.
+ */
+ virtual void
+ flushWord(void) = 0;
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt. Implies
+ * flush from memory to disk, and possibly also sync to permanent
+ * storage media.
+ */
+ virtual void
+ checkPointWrite(vespalib::nbostream &out) = 0;
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ */
+ virtual void
+ checkPointRead(vespalib::nbostream &in) = 0;
+
+ /**
+ * Open posting list file for sequential write.
+ */
+ virtual bool
+ open(const vespalib::string &name,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const common::FileHeaderContext &fileHeaderContext) = 0;
+
+ /**
+ * Close posting list file.
+ */
+ virtual bool
+ close(void) = 0;
+
+ /*
+ * Set parameters.
+ */
+ virtual void
+ setParams(const PostingListParams &params);
+
+ /*
+ * Get current parameters.
+ */
+ virtual void
+ getParams(PostingListParams &params);
+
+ /*
+ * Set (word, docid) feature parameters.
+ */
+ virtual void
+ setFeatureParams(const PostingListParams &params);
+
+ /*
+ * Get current (word, docid) feature parameters.
+ */
+ virtual void
+ getFeatureParams(PostingListParams &params);
+
+ PostingListCounts &
+ getCounts(void)
+ {
+ return _counts;
+ }
+};
+
+
+/**
+ * Interface for posting list files containing document ids and features
+ * for words.
+ */
+class PostingListFileRandRead
+{
+protected:
+ // Can be examined after open
+ bool _memoryMapped;
+public:
+ typedef std::shared_ptr<PostingListFileRandRead> SP;
+
+ PostingListFileRandRead(void);
+
+ virtual
+ ~PostingListFileRandRead(void);
+
+ /**
+ * Create iterator for single word. Semantic lifetime of counts and
+ * handle must exceed lifetime of iterator.
+ *
+ * XXX: TODO: How to read next set of segments from disk if handle
+ * didn't cover the whole word, probably need access to higher level
+ * API above caches.
+ */
+ virtual search::queryeval::SearchIterator *
+ createIterator(const PostingListCounts &counts,
+ const PostingListHandle &handle,
+ const search::fef::TermFieldMatchDataArray &matchData,
+ bool usebitVector) const = 0;
+
+
+ /**
+ * Read (possibly partial) posting list into handle.
+ */
+ virtual void
+ readPostingList(const PostingListCounts &counts,
+ uint32_t firstSegment,
+ uint32_t numSegments,
+ PostingListHandle &handle) = 0;
+
+ /**
+ * Open posting list file for random read.
+ */
+ virtual bool
+ open(const vespalib::string &name,
+ const TuneFileRandRead &tuneFileRead) = 0;
+
+ /**
+ * Close posting list file.
+ */
+ virtual bool
+ close(void) = 0;
+
+ bool
+ getMemoryMapped(void) const
+ {
+ return _memoryMapped;
+ }
+
+protected:
+ void
+ afterOpen(FastOS_FileInterface &file);
+};
+
+
+/**
+ * Passthrough class.
+ */
+class PostingListFileRandReadPassThrough : public PostingListFileRandRead
+{
+protected:
+ PostingListFileRandRead *_lower;
+ bool _ownLower;
+
+public:
+ PostingListFileRandReadPassThrough(PostingListFileRandRead *lower,
+ bool ownLower);
+
+ virtual
+ ~PostingListFileRandReadPassThrough(void);
+
+ virtual search::queryeval::SearchIterator *
+ createIterator(const PostingListCounts &counts,
+ const PostingListHandle &handle,
+ const search::fef::TermFieldMatchDataArray &matchData,
+ bool usebitVector) const;
+
+ virtual void
+ readPostingList(const PostingListCounts &counts,
+ uint32_t firstSegment,
+ uint32_t numSegments,
+ PostingListHandle &handle);
+
+ virtual bool
+ open(const vespalib::string &name, const TuneFileRandRead &tuneFileRead);
+
+ virtual bool
+ close(void);
+};
+
+
+} // namespace index
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/index/postinglisthandle.cpp b/searchlib/src/vespa/searchlib/index/postinglisthandle.cpp
new file mode 100644
index 00000000000..97a5a6f3da6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/postinglisthandle.cpp
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".index.postinglisthandle");
+#include "postinglisthandle.h"
+#include <vespa/searchlib/index/postinglistfile.h>
+
+namespace search
+{
+
+namespace index
+{
+
+search::queryeval::SearchIterator *
+PostingListHandle::createIterator(const PostingListCounts &counts,
+ const search::fef::TermFieldMatchDataArray &matchData,
+ bool useBitVector) const
+{
+ (void) useBitVector;
+ return _file->createIterator(counts, *this, matchData, useBitVector);
+}
+
+
+} // namespace index
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/index/postinglisthandle.h b/searchlib/src/vespa/searchlib/index/postinglisthandle.h
new file mode 100644
index 00000000000..605904e3912
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/postinglisthandle.h
@@ -0,0 +1,90 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/index/postinglistcounts.h>
+
+namespace search { class BitVector; }
+namespace search { namespace queryeval { class SearchIterator; } }
+namespace search { namespace fef { class TermFieldMatchDataArray; } }
+
+namespace search {
+namespace index {
+
+class PostingListFileRandRead;
+
+/**
+ * Class for owning a posting list in memory after having read it from
+ * posting list file, or referencing a chunk of memory containing the
+ * posting list (if the file was memory mapped).
+ */
+class PostingListHandle
+{
+public:
+ typedef std::unique_ptr<PostingListHandle> UP;
+ // Key portion
+ PostingListFileRandRead *_file; // File containing posting list
+ uint64_t _bitOffset; // posting list start relative to start of file
+ uint64_t _bitLength; // Length of posting list, in bits
+
+ // Value portion
+ uint32_t _firstSegment; // First segment for word
+ uint32_t _numSegments; // Number of segments
+ uint64_t _bitOffsetMem; // _mem relative to start of file
+ const void *_mem; // Memory backing posting list after read/mmap
+ void *_allocMem; // What to free after posting list
+ size_t _allocSize; // Size of allocated memory
+
+ PostingListHandle(void)
+ : _file(NULL),
+ _bitOffset(0),
+ _bitLength(0),
+ _firstSegment(0),
+ _numSegments(0),
+ _bitOffsetMem(0),
+ _mem(NULL),
+ _allocMem(NULL),
+ _allocSize(0)
+ {
+ }
+
+ ~PostingListHandle(void)
+ {
+ if (_allocMem != NULL)
+ free(_allocMem);
+ }
+
+ /**
+ * Create iterator for single word. Semantic lifetime of counts and
+ * handle must exceed lifetime of iterator.
+ *
+ * XXX: TODO: How to read next set of segments from disk if handle
+ * didn't cover the whole word, probably need access to higher level
+ * API above caches.
+ */
+ search::queryeval::SearchIterator *
+ createIterator(const PostingListCounts &counts,
+ const search::fef::TermFieldMatchDataArray &matchData,
+ bool useBitVector=false) const;
+
+ /**
+ * Drop value portion of handle.
+ */
+ void
+ drop(void)
+ {
+ _firstSegment = 0;
+ _numSegments = 0;
+ _bitOffsetMem = 0;
+ _mem = NULL;
+ if (_allocMem != NULL) {
+ free(_allocMem);
+ _allocMem = NULL;
+ }
+ _allocSize = 0;
+ }
+};
+
+
+} // namespace search::index
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/index/postinglistparams.cpp b/searchlib/src/vespa/searchlib/index/postinglistparams.cpp
new file mode 100644
index 00000000000..9c64587f7ee
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/postinglistparams.cpp
@@ -0,0 +1,138 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".index.postinglistparams");
+#include "postinglistparams.h"
+#include <sstream>
+
+namespace
+{
+
+vespalib::string empty;
+
+}
+
+namespace search
+{
+
+namespace index
+{
+
+bool
+PostingListParams::isSet(const vespalib::string &key) const
+{
+ Map::const_iterator it;
+
+ it = _map.find(key);
+ if (it != _map.end())
+ return true;
+ return false;
+}
+
+
+void
+PostingListParams::setStr(const vespalib::string &key,
+ const vespalib::string &val)
+{
+ _map[key] = val;
+}
+
+
+const vespalib::string &
+PostingListParams::getStr(const vespalib::string &key) const
+{
+ Map::const_iterator it;
+
+ it = _map.find(key);
+ if (it != _map.end())
+ return it->second;
+ return empty;
+}
+
+
+void
+PostingListParams::clear(void)
+{
+ _map.clear();
+}
+
+
+void
+PostingListParams::erase(const vespalib::string &key)
+{
+ _map.erase(key);
+}
+
+
+bool
+PostingListParams::operator!=(const PostingListParams &rhs) const
+{
+ return _map != rhs._map;
+}
+
+template <typename TYPE>
+void
+PostingListParams::set(const vespalib::string &key,
+ const TYPE &val)
+{
+ std::ostringstream os;
+
+ os << val;
+ _map[key] = os.str();
+}
+
+
+template <typename TYPE>
+void
+PostingListParams::get(const vespalib::string &key,
+ TYPE &val) const
+{
+ std::istringstream is;
+ Map::const_iterator it;
+
+ it = _map.find(key);
+ if (it != _map.end()) {
+ is.str(it->second);
+ is >> val;
+ }
+}
+
+
+template void
+PostingListParams::set<bool>(const vespalib::string &key,
+ const bool &val);
+
+template void
+PostingListParams::get<bool>(const vespalib::string &key,
+ bool &val) const;
+
+
+template void
+PostingListParams::set<int32_t>(const vespalib::string &key,
+ const int32_t &val);
+
+template void
+PostingListParams::get<int32_t>(const vespalib::string &key,
+ int32_t &val) const;
+
+template void
+PostingListParams::set<uint32_t>(const vespalib::string &key,
+ const uint32_t &val);
+
+template void
+PostingListParams::get<uint32_t>(const vespalib::string &key,
+ uint32_t &val) const;
+
+
+template void
+PostingListParams::set<uint64_t>(const vespalib::string &key,
+ const uint64_t &val);
+
+template void
+PostingListParams::get<uint64_t>(const vespalib::string &key,
+ uint64_t &val) const;
+
+} // namespace index
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/index/postinglistparams.h b/searchlib/src/vespa/searchlib/index/postinglistparams.h
new file mode 100644
index 00000000000..d8424b81835
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/postinglistparams.h
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <map>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search
+{
+
+namespace index
+{
+
+class PostingListParams
+{
+ typedef std::map<vespalib::string, vespalib::string> Map;
+ Map _map;
+public:
+ template <typename TYPE>
+ void
+ set(const vespalib::string &key, const TYPE &val);
+
+ template <typename TYPE>
+ void
+ get(const vespalib::string &key, TYPE &val) const;
+
+ bool
+ isSet(const vespalib::string &key) const;
+
+ void
+ setStr(const vespalib::string &key, const vespalib::string &val);
+
+ const vespalib::string &
+ getStr(const vespalib::string &key) const;
+
+ void
+ clear(void);
+
+ void
+ erase(const vespalib::string &key);
+
+ bool
+ operator!=(const PostingListParams &rhs) const;
+};
+
+} // namespace index
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/index/schemautil.cpp b/searchlib/src/vespa/searchlib/index/schemautil.cpp
new file mode 100644
index 00000000000..6019c7ce4bd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/schemautil.cpp
@@ -0,0 +1,217 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <fstream>
+#include "schemautil.h"
+#include <set>
+LOG_SETUP(".index.schemautil");
+
+namespace search
+{
+
+namespace index
+{
+
+SchemaUtil::IndexSettings
+SchemaUtil::getIndexSettings(const Schema &schema,
+ const uint32_t index)
+{
+ IndexSettings ret;
+ Schema::DataType indexDataType(Schema::STRING);
+ bool error = false;
+ bool somePrefixes = false;
+ bool someNotPrefixes = false;
+ bool somePhrases = false;
+ bool someNotPhrases = false;
+ bool somePositions = false;
+ bool someNotPositions = false;
+
+ const Schema::IndexField &iField = schema.getIndexField(index);
+ if (iField.hasPhrases())
+ somePhrases = true;
+ else
+ someNotPhrases = true;
+ if (iField.hasPrefix())
+ somePrefixes = true;
+ else
+ someNotPrefixes = true;
+ if (iField.hasPositions())
+ somePositions = true;
+ else
+ someNotPositions = true;
+ indexDataType = iField.getDataType();
+ switch (indexDataType) {
+ case Schema::STRING:
+ break;
+ default:
+ error = true;
+ LOG(error,
+ "Field %s has bad data type",
+ iField.getName().c_str());
+ }
+
+ return IndexSettings(indexDataType, error,
+ somePrefixes && !someNotPrefixes,
+ somePhrases && !someNotPhrases,
+ somePositions && !someNotPositions);
+}
+
+
+bool
+SchemaUtil::IndexIterator::hasOldFields(const Schema &oldSchema,
+ bool phrases) const
+{
+ assert(isValid());
+ const Schema::IndexField &newField =
+ getSchema().getIndexField(getIndex());
+ const vespalib::string &fieldName = newField.getName();
+ uint32_t oldFieldId = oldSchema.getIndexFieldId(fieldName);
+ if (oldFieldId == Schema::UNKNOWN_FIELD_ID)
+ return false;
+ const Schema::IndexField &oldField =
+ oldSchema.getIndexField(oldFieldId);
+ if (oldField.getDataType() != newField.getDataType())
+ return false; // wrong data type
+ if (!phrases)
+ return true;
+ return oldField.hasPhrases();
+}
+
+
+bool
+SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema,
+ bool phrases) const
+{
+ assert(isValid());
+ const Schema::IndexField &newField =
+ getSchema().getIndexField(getIndex());
+ const vespalib::string &fieldName = newField.getName();
+ uint32_t oldFieldId = oldSchema.getIndexFieldId(fieldName);
+ if (oldFieldId == Schema::UNKNOWN_FIELD_ID)
+ return false;
+ if (phrases) {
+ IndexIterator oldIterator(oldSchema, oldFieldId);
+ IndexSettings settings = oldIterator.getIndexSettings();
+ if (!settings.hasPhrases())
+ return false;
+ }
+ const Schema::IndexField &oldField =
+ oldSchema.getIndexField(oldFieldId);
+ if (oldField.getDataType() != newField.getDataType() ||
+ oldField.getCollectionType() != newField.getCollectionType())
+ return false;
+ return true;
+}
+
+
+bool
+SchemaUtil::validateIndexField(const Schema::IndexField &field)
+{
+ bool ok = true;
+ if (!validateIndexFieldType(field.getDataType())) {
+ LOG(error,
+ "Field %s has bad data type",
+ field.getName().c_str());
+ ok = false;
+ }
+ if (field.getDataType() != Schema::STRING) {
+ if (field.hasPrefix()) {
+ LOG(error,
+ "Field %s is non-string but has prefix",
+ field.getName().c_str());
+ ok = false;
+ }
+ if (field.hasPhrases()) {
+ LOG(error,
+ "Field %s is non-string but has phrases",
+ field.getName().c_str());
+ ok = false;
+ }
+ if (field.hasPositions()) {
+ LOG(error,
+ "Field %s is non-string but has positions",
+ field.getName().c_str());
+ ok = false;
+ }
+ }
+ if (field.hasPhrases() && !field.hasPositions()) {
+ LOG(error,
+ "Field %s has phrases but not positions",
+ field.getName().c_str());
+ ok = false;
+ }
+ return ok;
+}
+
+
+bool
+SchemaUtil::addIndexField(Schema &schema,
+ const Schema::IndexField &field)
+{
+ bool ok = true;
+ if (!validateIndexField(field))
+ ok = false;
+ uint32_t fieldId = schema.getIndexFieldId(field.getName());
+ if (fieldId != Schema::UNKNOWN_FIELD_ID) {
+ LOG(error,
+ "Field %s already exists in schema",
+ field.getName().c_str());
+ ok = false;
+ }
+ if (ok)
+ schema.addIndexField(field);
+ return ok;
+}
+
+
+bool
+SchemaUtil::validateSchema(const Schema &schema)
+{
+ bool ok = true;
+ for (IndexIterator it(schema); it.isValid(); ++it) {
+ uint32_t fieldId = it.getIndex();
+ const Schema::IndexField &field = schema.getIndexField(fieldId);
+ if (!validateIndexField(field))
+ ok = false;
+ if (schema.getIndexFieldId(field.getName()) != fieldId) {
+ LOG(error,
+ "Duplcate field %s",
+ field.getName().c_str());
+ ok = false;
+ }
+ }
+ for (uint32_t fsId = 0; fsId < schema.getNumFieldSets(); ++fsId) {
+ const Schema::FieldSet &fs = schema.getFieldSet(fsId);
+ if (schema.getFieldSetId(fs.getName()) != fsId) {
+ LOG(error,
+ "Duplicate field set %s",
+ fs.getName().c_str());
+ ok = false;
+ }
+ }
+ return ok;
+}
+
+
+bool
+SchemaUtil::getIndexIds(const Schema &schema,
+ DataType dataType,
+ std::vector<uint32_t> &indexes)
+{
+ typedef SchemaUtil::IndexIterator IndexIterator;
+
+ indexes.clear();
+ for (IndexIterator i(schema); i.isValid(); ++i) {
+ SchemaUtil::IndexSettings settings = i.getIndexSettings();
+ if (settings.hasError())
+ return false;
+ if (settings.getDataType() == dataType)
+ indexes.push_back(i.getIndex());
+ }
+ return true;
+}
+
+
+} // namespace search::index
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/index/schemautil.h b/searchlib/src/vespa/searchlib/index/schemautil.h
new file mode 100644
index 00000000000..1f7c351c43e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/index/schemautil.h
@@ -0,0 +1,234 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchcommon/common/schema.h>
+
+namespace search
+{
+
+namespace index
+{
+
+
+class SchemaUtil
+{
+public:
+ typedef Schema::DataType DataType;
+
+ class IndexSettings
+ {
+ DataType _dataType;
+ bool _error; // Schema is bad.
+ bool _prefix;
+ bool _phrases;
+ bool _positions;
+
+ public:
+ const DataType &
+ getDataType(void) const
+ {
+ return _dataType;
+ }
+
+ bool
+ hasError(void) const
+ {
+ return _error;
+ }
+
+ bool
+ hasPrefix(void) const
+ {
+ return _prefix;
+ }
+
+ bool
+ hasPhrases(void) const
+ {
+ return _phrases;
+ }
+
+ bool
+ hasPositions(void) const
+ {
+ return _positions;
+ }
+
+ IndexSettings(void)
+ : _dataType(Schema::STRING),
+ _error(false),
+ _prefix(false),
+ _phrases(false),
+ _positions(false)
+ {
+ }
+
+ IndexSettings(const IndexSettings &rhs)
+ : _dataType(rhs._dataType),
+ _error(rhs._error),
+ _prefix(rhs._prefix),
+ _phrases(rhs._phrases),
+ _positions(rhs._positions)
+ {
+ }
+
+ IndexSettings(DataType dataType,
+ bool error,
+ bool prefix,
+ bool phrases,
+ bool positions)
+ : _dataType(dataType),
+ _error(error),
+ _prefix(prefix),
+ _phrases(phrases),
+ _positions(positions)
+ {
+ }
+
+ IndexSettings &
+ operator=(const IndexSettings &rhs)
+ {
+ IndexSettings tmp(rhs);
+ swap(tmp);
+ return *this;
+ }
+
+ void
+ swap(IndexSettings &rhs)
+ {
+ std::swap(_dataType, rhs._dataType);
+ std::swap(_error, rhs._error);
+ std::swap(_prefix, rhs._prefix);
+ std::swap(_phrases, rhs._phrases);
+ std::swap(_positions, rhs._positions);
+ }
+ };
+
+ class IndexIterator
+ {
+ const Schema &_schema;
+ uint32_t _index;
+
+ public:
+ IndexIterator(const Schema &schema)
+ : _schema(schema),
+ _index(0u)
+ {
+ }
+
+ IndexIterator(const Schema &schema, uint32_t index)
+ : _schema(schema),
+ _index(index)
+ {
+ }
+
+ IndexIterator(const Schema &schema, const IndexIterator &rhs)
+ : _schema(schema),
+ _index(Schema::UNKNOWN_FIELD_ID)
+ {
+ const vespalib::string &name = rhs.getName();
+ _index = schema.getIndexFieldId(name);
+ }
+
+ const Schema &
+ getSchema(void) const
+ {
+ return _schema;
+ }
+
+ uint32_t
+ getIndex(void) const
+ {
+ return _index;
+ }
+
+ const vespalib::string &
+ getName(void) const
+ {
+ return _schema.getIndexField(_index).getName();
+ }
+
+ IndexIterator &
+ operator++(void)
+ {
+ if (_index < _schema.getNumIndexFields()) {
+ ++_index;
+ }
+ return *this;
+ }
+
+ bool
+ isValid(void) const
+ {
+ return _index < _schema.getNumIndexFields();
+ }
+
+ IndexSettings
+ getIndexSettings(void) const
+ {
+ return SchemaUtil::getIndexSettings(_schema, _index);
+ }
+
+ /**
+ * Return if old schema has at least one usable input field
+ * with matching data type. If we want phrases then all input
+ * fields usable for terms must also be usable for phrases.
+ *
+ * @param oldSchema old schema, present in an input index
+ * @param phrases ask for phrase files
+ */
+ bool
+ hasOldFields(const Schema &oldSchema, bool phrases) const;
+
+ /**
+ * Return if fields in old schema matches fields in new
+ * schema, allowing for slightly faster fusion operations.
+ * Field collections must have same set of fields which must
+ * also match between new and old schema.
+ *
+ * @param oldSchema old schema, present in an input index
+ * @param phrases ask for phrase files
+ */
+ bool
+ hasMatchingOldFields(const Schema &oldSchema, bool phrases) const;
+ };
+
+ static IndexSettings
+ getIndexSettings(const Schema &schema, const uint32_t index);
+
+
+ static bool
+ validateIndexFieldType(DataType dataType)
+ {
+ switch (dataType) {
+ case Schema::STRING:
+ case Schema::INT32:
+ return true;
+ default:
+ ;
+ }
+ return false;
+ }
+
+ static bool
+ validateIndexField(const Schema::IndexField &field);
+
+ static bool
+ addIndexField(Schema &schema,
+ const Schema::IndexField &field);
+
+ static bool
+ validateSchema(const Schema &schema);
+
+ static bool
+ getIndexIds(const Schema &schema,
+ DataType dataType,
+ std::vector<uint32_t> &indexes);
+};
+
+
+} // namespace search::index
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/.gitignore b/searchlib/src/vespa/searchlib/memoryindex/.gitignore
new file mode 100644
index 00000000000..583460ae288
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/.gitignore
@@ -0,0 +1,3 @@
+*.So
+.depend
+Makefile
diff --git a/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt
new file mode 100644
index 00000000000..b9e5bf5a4ea
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt
@@ -0,0 +1,17 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_searchlib_memoryindex OBJECT
+ SOURCES
+ compact_document_words_store.cpp
+ dictionary.cpp
+ documentinverter.cpp
+ document_remover.cpp
+ featurestore.cpp
+ fieldinverter.cpp
+ memoryfieldindex.cpp
+ memoryindex.cpp
+ ordereddocumentinserter.cpp
+ postingiterator.cpp
+ urlfieldinverter.cpp
+ wordstore.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/memoryindex/OWNERS b/searchlib/src/vespa/searchlib/memoryindex/OWNERS
new file mode 100644
index 00000000000..e6340232840
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/OWNERS
@@ -0,0 +1,2 @@
+tegge
+geirst
diff --git a/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.cpp b/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.cpp
new file mode 100644
index 00000000000..05f242b0928
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.cpp
@@ -0,0 +1,176 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".memoryindex.compact_document_words_store");
+#include "compact_document_words_store.h"
+
+namespace search {
+namespace memoryindex {
+
+typedef CompactDocumentWordsStore::Builder Builder;
+
+namespace {
+
+constexpr size_t MIN_CLUSTERS = 1024u;
+
+size_t
+getSerializedSize(const Builder &builder)
+{
+ size_t size = 1 + builder.words().size(); // numWords, [words]
+ return size;
+}
+
+uint32_t *
+serialize(const Builder &builder, uint32_t *begin)
+{
+ uint32_t *buf = begin;
+ const Builder::WordRefVector &words = builder.words();
+ *buf++ = words.size();
+ for (auto word : words) {
+ *buf++ = word.ref();
+ }
+ return buf;
+}
+
+}
+
+CompactDocumentWordsStore::Builder &
+CompactDocumentWordsStore::Builder::insert(btree::EntryRef wordRef)
+{
+ _words.push_back(wordRef);
+ return *this;
+}
+
+inline void
+CompactDocumentWordsStore::Iterator::nextWord()
+{
+ _wordRef = *_buf++;
+ _remainingWords--;
+}
+
+CompactDocumentWordsStore::Iterator::Iterator()
+ : _buf(NULL),
+ _remainingWords(0),
+ _wordRef(0),
+ _valid(false)
+{
+}
+
+CompactDocumentWordsStore::Iterator::Iterator(const uint32_t *buf)
+ : _buf(buf),
+ _remainingWords(0),
+ _wordRef(0),
+ _valid(true)
+{
+ _remainingWords = *_buf++;
+ if (_remainingWords > 0) {
+ nextWord();
+ } else {
+ _valid = false;
+ }
+}
+
+CompactDocumentWordsStore::Iterator &
+CompactDocumentWordsStore::Iterator::operator++()
+{
+ if (_remainingWords > 0) {
+ nextWord();
+ } else {
+ _valid = false;
+ }
+ return *this;
+}
+
+CompactDocumentWordsStore::Store::Store()
+ : _store(),
+ _type(1,
+ MIN_CLUSTERS,
+ RefType::offsetSize()),
+ _typeId(0)
+{
+ _store.addType(&_type);
+ _store.initActiveBuffers();
+}
+
+CompactDocumentWordsStore::Store::~Store()
+{
+ _store.dropBuffers();
+}
+
+btree::EntryRef
+CompactDocumentWordsStore::Store::insert(const Builder &builder)
+{
+ size_t serializedSize = getSerializedSize(builder);
+ _store.ensureBufferCapacity(_typeId, serializedSize);
+
+ uint32_t activeBufferId = _store.getActiveBufferId(_typeId);
+ btree::BufferState &state = _store.getBufferState(activeBufferId);
+ size_t oldSize = state.size();
+ RefType ref(oldSize, activeBufferId);
+ assert(oldSize == ref.offset());
+
+ uint32_t *begin = _store.getBufferEntry<uint32_t>(activeBufferId, oldSize);
+ uint32_t *end = serialize(builder, begin);
+ assert(size_t(end - begin) == serializedSize);
+ state.pushed_back(serializedSize);
+
+ return ref;
+}
+
+CompactDocumentWordsStore::Iterator
+CompactDocumentWordsStore::Store::get(btree::EntryRef ref) const
+{
+ RefType internalRef(ref);
+ const uint32_t *buf = _store.getBufferEntry<uint32_t>(internalRef.bufferId(),
+ internalRef.offset());
+ return Iterator(buf);
+}
+
+CompactDocumentWordsStore::CompactDocumentWordsStore()
+ : _docs(),
+ _wordsStore()
+{
+}
+
+void
+CompactDocumentWordsStore::insert(const Builder &builder)
+{
+ btree::EntryRef ref = _wordsStore.insert(builder);
+ auto insres = _docs.insert(std::make_pair(builder.docId(), ref));
+ if (!insres.second) {
+ LOG(error, "Failed inserting remove info for docid %u",
+ builder.docId());
+ abort();
+ }
+}
+
+void
+CompactDocumentWordsStore::remove(uint32_t docId)
+{
+ _docs.erase(docId);
+}
+
+CompactDocumentWordsStore::Iterator
+CompactDocumentWordsStore::get(uint32_t docId) const
+{
+ auto itr = _docs.find(docId);
+ if (itr != _docs.end()) {
+ return _wordsStore.get(itr->second);
+ }
+ return Iterator();
+}
+
+MemoryUsage
+CompactDocumentWordsStore::getMemoryUsage() const
+{
+ MemoryUsage usage;
+ usage.incAllocatedBytes(_docs.getMemoryConsumption());
+ usage.incUsedBytes(_docs.getMemoryUsed());
+ usage.merge(_wordsStore.getMemoryUsage());
+ return usage;
+
+}
+
+} // namespace memoryindex
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.h b/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.h
new file mode 100644
index 00000000000..2841b02bab3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.h
@@ -0,0 +1,102 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/btree/datastore.h>
+#include <vespa/searchlib/btree/entryref.h>
+#include <vespa/searchlib/util/memoryusage.h>
+#include <vespa/vespalib/util/array.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+
+namespace search {
+namespace memoryindex {
+
+/**
+ * Class used to store the {wordRef, fieldId, docId} tuples that are inserted
+ * into the memory index dictionary. These tuples are later used when removing
+ * all remains of a document from the posting lists of the dictionary.
+ */
+class CompactDocumentWordsStore
+{
+public:
+
+ /**
+ * Builder used to collect all wordRefs for a field.
+ */
+ class Builder
+ {
+ public:
+ typedef std::unique_ptr<Builder> UP;
+ typedef vespalib::Array<btree::EntryRef> WordRefVector;
+
+ private:
+ uint32_t _docId;
+ WordRefVector _words;
+
+ public:
+ Builder(uint32_t docId_) : _docId(docId_), _words() {}
+ Builder &insert(btree::EntryRef wordRef);
+ uint32_t docId() const { return _docId; }
+ const WordRefVector &words() const { return _words; }
+ };
+
+ /**
+ * Iterator over all {wordRef, fieldId} pairs for a document.
+ */
+ class Iterator
+ {
+ private:
+ const uint32_t *_buf;
+ uint32_t _remainingWords;
+ uint32_t _wordRef;
+ bool _valid;
+
+ inline void nextWord();
+
+ public:
+ Iterator();
+ Iterator(const uint32_t *buf);
+ bool valid() const { return _valid; }
+ Iterator &operator++();
+ btree::EntryRef wordRef() const { return _wordRef; }
+ bool hasBackingBuf() const { return _buf != nullptr; }
+ };
+
+ /**
+ * Store for all {wordRef, fieldId} pairs among all documents.
+ */
+ class Store
+ {
+ public:
+ typedef btree::DataStoreT<btree::EntryRefT<22> > DataStoreType;
+ typedef DataStoreType::RefType RefType;
+
+ private:
+ DataStoreType _store;
+ btree::BufferType<uint32_t> _type;
+ const uint32_t _typeId;
+
+ public:
+ Store();
+ ~Store();
+ btree::EntryRef insert(const Builder &builder);
+ Iterator get(btree::EntryRef ref) const;
+ MemoryUsage getMemoryUsage() const { return _store.getMemoryUsage(); }
+ };
+
+ typedef vespalib::hash_map<uint32_t, btree::EntryRef> DocumentWordsMap;
+
+private:
+ DocumentWordsMap _docs;
+ Store _wordsStore;
+
+public:
+ CompactDocumentWordsStore();
+ void insert(const Builder &builder);
+ void remove(uint32_t docId);
+ Iterator get(uint32_t docId) const;
+ MemoryUsage getMemoryUsage() const;
+};
+
+} // namespace memoryindex
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/dictionary.cpp b/searchlib/src/vespa/searchlib/memoryindex/dictionary.cpp
new file mode 100644
index 00000000000..665d377af99
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/dictionary.cpp
@@ -0,0 +1,68 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include "dictionary.h"
+
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreenodestore.hpp>
+#include <vespa/searchlib/btree/btreestore.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btree.hpp>
+#include "fieldinverter.h"
+
+LOG_SETUP(".memoryindex.dictionary");
+
+namespace search {
+
+using index::DocIdAndFeatures;
+using index::WordDocElementFeatures;
+using index::Schema;
+
+namespace memoryindex {
+
+Dictionary::Dictionary(const Schema & schema)
+ : _fieldIndexes(),
+ _numFields(schema.getNumIndexFields())
+{
+ for (uint32_t fieldId = 0; fieldId < _numFields; ++fieldId) {
+ auto fieldIndex = std::make_unique<MemoryFieldIndex>(schema, fieldId);
+ _fieldIndexes.push_back(std::move(fieldIndex));
+ }
+}
+
+Dictionary::~Dictionary(void)
+{
+}
+
+
+void
+Dictionary::dump(search::index::IndexBuilder &indexBuilder)
+{
+ for (uint32_t fieldId = 0; fieldId < _numFields; ++fieldId) {
+ indexBuilder.startField(fieldId);
+ _fieldIndexes[fieldId]->dump(indexBuilder);
+ indexBuilder.endField();
+ }
+}
+
+MemoryUsage
+Dictionary::getMemoryUsage() const
+{
+ MemoryUsage usage;
+ for (auto &fieldIndex : _fieldIndexes) {
+ usage.merge(fieldIndex->getMemoryUsage());
+ }
+ return usage;
+}
+
+
+} // namespace search::memoryindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/memoryindex/dictionary.h b/searchlib/src/vespa/searchlib/memoryindex/dictionary.h
new file mode 100644
index 00000000000..b4093a05a43
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/dictionary.h
@@ -0,0 +1,64 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "memoryfieldindex.h"
+
+namespace search {
+namespace memoryindex {
+
+class IDocumentRemoveListener;
+class FieldInverter;
+
+class Dictionary {
+public:
+ using PostingList = MemoryFieldIndex::PostingList;
+
+private:
+ typedef vespalib::GenerationHandler GenerationHandler;
+
+ std::vector<std::unique_ptr<MemoryFieldIndex> > _fieldIndexes;
+ uint32_t _numFields;
+
+public:
+ Dictionary(const index::Schema &schema);
+ ~Dictionary(void);
+ PostingList::Iterator find(const vespalib::stringref word,
+ uint32_t fieldId) const
+ {
+ return _fieldIndexes[fieldId]->find(word);
+ }
+
+ PostingList::ConstIterator
+ findFrozen(const vespalib::stringref word, uint32_t fieldId) const
+ {
+ return _fieldIndexes[fieldId]->findFrozen(word);
+ }
+
+ uint64_t getNumUniqueWords() const {
+ uint64_t numUniqueWords = 0;
+ for (auto &fieldIndex : _fieldIndexes) {
+ numUniqueWords += fieldIndex->getNumUniqueWords();
+ }
+ return numUniqueWords;
+ }
+
+ void dump(search::index::IndexBuilder & indexBuilder);
+
+ MemoryUsage getMemoryUsage() const;
+
+ MemoryFieldIndex *getFieldIndex(uint32_t fieldId) const {
+ return _fieldIndexes[fieldId].get();
+ }
+
+ const std::vector<std::unique_ptr<MemoryFieldIndex> > &
+ getFieldIndexes() const { return _fieldIndexes; }
+
+ uint32_t getNumFields() const { return _numFields; }
+};
+
+} // namespace search::memoryindex
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_remover.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_remover.cpp
new file mode 100644
index 00000000000..9119b9aa518
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_remover.cpp
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "document_remover.h"
+#include "i_document_remove_listener.h"
+#include "wordstore.h"
+#include <vespa/searchlib/common/sort.h>
+
+LOG_SETUP(".memoryindex.document_remover");
+
+namespace search {
+namespace memoryindex {
+
+typedef CompactDocumentWordsStore::Builder Builder;
+typedef CompactDocumentWordsStore::Iterator Iterator;
+
+DocumentRemover::DocumentRemover(const WordStore &wordStore)
+ : _store(),
+ _builder(),
+ _wordFieldDocTuples(),
+ _wordStore(wordStore)
+{
+}
+
+void
+DocumentRemover::remove(uint32_t docId, IDocumentRemoveListener &listener)
+{
+ Iterator itr = _store.get(docId);
+ if (itr.valid()) {
+ for (; itr.valid(); ++itr) {
+ vespalib::stringref word = _wordStore.getWord(itr.wordRef());
+ listener.remove(word, docId);
+ }
+ _store.remove(docId);
+ }
+}
+
+void
+DocumentRemover::insert(btree::EntryRef wordRef, uint32_t docId)
+{
+ _wordFieldDocTuples.emplace_back(wordRef, docId);
+}
+
+
+void
+DocumentRemover::flush()
+{
+ if (_wordFieldDocTuples.empty()) {
+ return;
+ }
+ ShiftBasedRadixSorter<WordFieldDocTuple, WordFieldDocTuple::Radix, std::less<WordFieldDocTuple>, 24, true>::
+ radix_sort(WordFieldDocTuple::Radix(), std::less<WordFieldDocTuple>(), &_wordFieldDocTuples[0], _wordFieldDocTuples.size(), 16);
+ Builder::UP builder(new Builder(_wordFieldDocTuples[0]._docId));
+ for (const auto &tuple : _wordFieldDocTuples) {
+ if (builder->docId() != tuple._docId) {
+ _store.insert(*builder);
+ builder.reset(new Builder(tuple._docId));
+ }
+ builder->insert(tuple._wordRef);
+ }
+ _store.insert(*builder);
+ _wordFieldDocTuples.clear();
+}
+
+
+} // namespace memoryindex
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_remover.h b/searchlib/src/vespa/searchlib/memoryindex/document_remover.h
new file mode 100644
index 00000000000..d08cf46c68e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_remover.h
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "compact_document_words_store.h"
+#include "i_document_insert_listener.h"
+
+namespace search {
+namespace memoryindex {
+
+class IDocumentRemoveListener;
+class WordStore;
+
+/**
+ * Class used to remove documents from the memory index dictionary.
+ */
+class DocumentRemover : public IDocumentInsertListener
+{
+private:
+ struct WordFieldDocTuple
+ {
+ btree::EntryRef _wordRef;
+ uint32_t _docId;
+ WordFieldDocTuple() :
+ _wordRef(0),
+ _docId(0)
+ { }
+ WordFieldDocTuple(btree::EntryRef wordRef, uint32_t docId) :
+ _wordRef(wordRef),
+ _docId(docId)
+ { }
+ bool operator<(const WordFieldDocTuple &rhs) const {
+ if (_docId != rhs._docId) {
+ return _docId < rhs._docId;
+ }
+ return _wordRef < rhs._wordRef;
+ }
+ struct Radix {
+ uint32_t operator () (const WordFieldDocTuple & wft) const {
+ return wft._docId;
+ }
+ };
+
+ };
+
+ CompactDocumentWordsStore _store;
+ CompactDocumentWordsStore::Builder::UP _builder;
+ std::vector<WordFieldDocTuple> _wordFieldDocTuples;
+ const WordStore &_wordStore;
+
+public:
+ DocumentRemover(const WordStore &wordStore);
+ void remove(uint32_t docId, IDocumentRemoveListener &inverter);
+ CompactDocumentWordsStore &getStore() { return _store; }
+ const CompactDocumentWordsStore &getStore() const { return _store; }
+
+ // Implements IDocumentInsertListener
+ void insert(btree::EntryRef wordRef, uint32_t docId) override;
+ void flush() override;
+};
+
+} // namespace memoryindex
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp
new file mode 100644
index 00000000000..a32676baccf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp
@@ -0,0 +1,206 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".memoryindex.documentinverter");
+#include "documentinverter.h"
+#include <vespa/document/datatype/urldatatype.h>
+#include <vespa/searchlib/util/url.h>
+#include <stdexcept>
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/text/lowercase.h>
+#include <vespa/searchlib/common/sort.h>
+#include <vespa/document/repo/fixedtyperepo.h>
+#include "fieldinverter.h"
+#include "urlfieldinverter.h"
+#include "dictionary.h"
+#include "ordereddocumentinserter.h"
+#include <vespa/searchlib/common/isequencedtaskexecutor.h>
+
+namespace search
+{
+
+namespace memoryindex
+{
+
+using document::Field;
+using document::FieldValue;
+using document::Document;
+using document::ArrayFieldValue;
+using document::WeightedSetFieldValue;
+using document::StringFieldValue;
+using document::IntFieldValue;
+using document::StructFieldValue;
+using document::DataType;
+using document::DocumentType;
+using document::Annotation;
+using document::AnnotationType;
+using document::AlternateSpanList;
+using document::Span;
+using document::SpanList;
+using document::SimpleSpanList;
+using document::SpanNode;
+using document::SpanTree;
+using document::SpanTreeVisitor;
+using index::DocIdAndPosOccFeatures;
+using index::Schema;
+using vespalib::make_string;
+using search::util::URL;
+
+
+DocumentInverter::DocumentInverter(const Schema &schema,
+ ISequencedTaskExecutor &invertThreads,
+ ISequencedTaskExecutor &pushThreads)
+ : _schema(schema),
+ _indexedFieldPaths(),
+ _dataType(nullptr),
+ _schemaIndexFields(),
+ _inverters(),
+ _urlInverters(),
+ _invertThreads(invertThreads),
+ _pushThreads(pushThreads)
+{
+ _schemaIndexFields.setup(schema);
+
+ for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields();
+ ++fieldId) {
+ _inverters.push_back(std::make_unique<FieldInverter>(_schema, fieldId));
+ }
+ for (auto &urlField : _schemaIndexFields._uriFields) {
+ Schema::CollectionType collectionType =
+ _schema.getIndexField(urlField._all).getCollectionType();
+ _urlInverters.push_back(std::make_unique<UrlFieldInverter>
+ (collectionType,
+ _inverters[urlField._all].get(),
+ _inverters[urlField._scheme].get(),
+ _inverters[urlField._host].get(),
+ _inverters[urlField._port].get(),
+ _inverters[urlField._path].get(),
+ _inverters[urlField._query].get(),
+ _inverters[urlField._fragment].get(),
+ _inverters[urlField._hostname].get()));
+ }
+}
+
+
+DocumentInverter::~DocumentInverter()
+{
+ _invertThreads.sync();
+ _pushThreads.sync();
+
+}
+
+
+void
+DocumentInverter::addFieldPath(const document::DocumentType &docType,
+ uint32_t fieldId)
+{
+ assert(fieldId < _indexedFieldPaths.size());
+ std::unique_ptr<FieldPath> fp;
+ if ( ! docType.hasField(_schema.getIndexField(fieldId).getName())) {
+ LOG(error,
+ "Mismatch between documentdefinition and schema. "
+ "No field named '%s' from schema in document type '%s'",
+ _schema.getIndexField(fieldId).getName().c_str(),
+ docType.getName().c_str());
+ } else {
+ fp.reset(new Field(docType.getField(_schema.getIndexField(fieldId).getName())));
+ }
+ _indexedFieldPaths[fieldId] = std::move(fp);
+}
+
+
+void DocumentInverter::buildFieldPath(const document::DocumentType &docType,
+ const document::DataType *dataType)
+{
+ _indexedFieldPaths.clear();
+ _indexedFieldPaths.resize(_schema.getNumIndexFields());
+ for (const auto & fi : _schemaIndexFields._textFields) {
+ addFieldPath(docType, fi);
+ }
+ for (const auto & fi : _schemaIndexFields._uriFields) {
+ addFieldPath(docType, fi._all);
+ }
+ _dataType = dataType;
+}
+
+
+void
+DocumentInverter::invertDocument(uint32_t docId, const Document &doc)
+{
+ const document::DataType *dataType(doc.getDataType());
+ if (_indexedFieldPaths.empty() || _dataType != dataType) {
+ buildFieldPath(doc.getType(), dataType);
+ }
+ for (uint32_t fieldId : _schemaIndexFields._textFields) {
+ const FieldPath *const fieldPath(_indexedFieldPaths[fieldId].get());
+ FieldValue::UP fv;
+ if (fieldPath != nullptr) {
+ // TODO: better handling of input data (and better input data)
+ // FieldValue::UP fv = doc.getNestedFieldValue(fieldPath.begin(), fieldPath.end());
+ fv = doc.getValue(*fieldPath);
+ }
+ FieldInverter *inverter = _inverters[fieldId].get();
+ _invertThreads.execute(fieldId,
+ [inverter, docId, fv(std::move(fv))]()
+ { inverter->invertField(docId, fv); });
+ }
+ uint32_t urlId = 0;
+ for (const auto & fi : _schemaIndexFields._uriFields) {
+ uint32_t fieldId = fi._all;
+ const FieldPath *const fieldPath(_indexedFieldPaths[fieldId].get());
+ FieldValue::UP fv;
+ if (fieldPath != nullptr) {
+ // TODO: better handling of input data (and better input data)
+ // FieldValue::UP fv = doc.getNestedFieldValue(fieldPath.begin(), fieldPath.end());
+ fv = doc.getValue(*fieldPath);
+ }
+ UrlFieldInverter *inverter = _urlInverters[urlId].get();
+ _invertThreads.execute(fieldId,
+ [inverter, docId, fv(std::move(fv))]()
+ { inverter->invertField(docId, fv); });
+ ++urlId;
+ }
+}
+
+
+void
+DocumentInverter::removeDocument(uint32_t docId)
+{
+ uint32_t fieldId = 0;
+ for (auto &inverter : _inverters) {
+ _invertThreads.execute(fieldId,
+ [inverter(inverter.get()), docId]()
+ { inverter->removeDocument(docId); });
+ ++fieldId;
+ }
+}
+
+
+void
+DocumentInverter::pushDocuments(Dictionary &dict,
+ const std::shared_ptr<IDestructorCallback> &
+ onWriteDone)
+{
+ auto indexFieldIterator = dict.getFieldIndexes().begin();
+ uint32_t fieldId = 0;
+ for (auto &inverter : _inverters) {
+ MemoryFieldIndex &fieldIndex(**indexFieldIterator);
+ DocumentRemover &remover(fieldIndex.getDocumentRemover());
+ OrderedDocumentInserter &inserter(fieldIndex.getInserter());
+ _pushThreads.execute(fieldId,
+ [inverter(inverter.get()), &remover, &inserter,
+ &fieldIndex, onWriteDone]()
+ { inverter->applyRemoves(remover);
+ inverter->pushDocuments(inserter);
+ fieldIndex.commit(); });
+ ++indexFieldIterator;
+ ++fieldId;
+ }
+}
+
+
+} // namespace memoryindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/documentinverter.h b/searchlib/src/vespa/searchlib/memoryindex/documentinverter.h
new file mode 100644
index 00000000000..415271a0990
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/documentinverter.h
@@ -0,0 +1,128 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <map>
+#include <set>
+#include <vespa/document/document.h>
+#include <vespa/searchlib/index/doctypebuilder.h>
+#include <limits>
+#include "i_document_remove_listener.h"
+
+namespace search
+{
+
+class ISequencedTaskExecutor;
+class IDestructorCallback;
+
+namespace memoryindex
+{
+
+class FieldInverter;
+class UrlFieldInverter;
+class Dictionary;
+
+class DocumentInverter
+{
+private:
+ DocumentInverter(const DocumentInverter &) = delete;
+ DocumentInverter &operator=(const DocumentInverter &) = delete;
+
+ const index::Schema &_schema;
+
+ typedef index::DocTypeBuilder DocTypeBuilder;
+ typedef DocTypeBuilder::UriField UriField;
+ typedef DocTypeBuilder::SchemaIndexFields SchemaIndexFields;
+
+ void
+ addFieldPath(const document::DocumentType &docType,
+ uint32_t fieldId);
+
+ void
+ buildFieldPath(const document::DocumentType & docType,
+ const document::DataType *dataType);
+
+ void
+ invertNormalDocTextField(size_t fieldId,
+ const document::FieldValue &field);
+
+ void
+ invertNormalDocUriField(const UriField &handle,
+ const document::FieldValue &field);
+
+ //typedef document::FieldPath FieldPath;
+ typedef document::Field FieldPath;
+ typedef std::vector<std::unique_ptr<FieldPath> > IndexedFieldPaths;
+ IndexedFieldPaths _indexedFieldPaths;
+ const document::DataType * _dataType;
+
+ DocTypeBuilder::SchemaIndexFields _schemaIndexFields;
+
+ std::vector<std::unique_ptr<FieldInverter>> _inverters;
+ std::vector<std::unique_ptr<UrlFieldInverter>> _urlInverters;
+ ISequencedTaskExecutor &_invertThreads;
+ ISequencedTaskExecutor &_pushThreads;
+
+ /**
+ * Obtain the schema used by this index.
+ *
+ * @return schema used by this index
+ */
+ const index::Schema &
+ getSchema(void) const
+ {
+ return _schema;
+ }
+
+public:
+ /**
+ * Create a new memory index based on the given schema.
+ *
+ * @param schema the index schema to use
+ */
+ DocumentInverter(const index::Schema &schema,
+ ISequencedTaskExecutor &invertThreads,
+ ISequencedTaskExecutor &pushThreads);
+
+ ~DocumentInverter();
+
+ /**
+ * Push inverted documents to memory index structure.
+ *
+ * @param dict dictionary
+ */
+ void
+ pushDocuments(Dictionary &dict,
+ const std::shared_ptr<IDestructorCallback> &onWriteDone);
+
+ /**
+ * Invert a document.
+ *
+ * @param docId local id for document
+ * @param doc the document
+ *
+ **/
+ void
+ invertDocument(uint32_t docId, const document::Document &doc);
+
+ /**
+ * Remove a document.
+ *
+ * @param docId local id for document
+ */
+ void removeDocument(uint32_t docId);
+
+ FieldInverter *getInverter(uint32_t fieldId) const {
+ return _inverters[fieldId].get();
+ }
+
+ const std::vector<std::unique_ptr<FieldInverter> > &
+ getInverters() const { return _inverters; }
+
+ uint32_t getNumFields() const { return _inverters.size(); }
+};
+
+} // namespace memoryindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp b/searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp
new file mode 100644
index 00000000000..ac009ed7554
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp
@@ -0,0 +1,167 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".memoryindex.featurestore");
+#include "featurestore.h"
+#include <vespa/searchlib/index/schemautil.h>
+#include <vespa/searchlib/btree/datastore.hpp>
+
+namespace search
+{
+
+namespace memoryindex
+{
+
+constexpr size_t MIN_CLUSTERS = 1024u;
+
+using index::SchemaUtil;
+
+uint64_t
+FeatureStore::writeFeatures(uint32_t packedIndex,
+ const DocIdAndFeatures &features)
+{
+ _f._fieldsParams = &_fieldsParams[packedIndex];
+ uint64_t oldOffset = _f.getWriteOffset();
+ assert((oldOffset & 63) == 0);
+ if (oldOffset > 2000) {
+ _f.setupWrite(_fctx);
+ oldOffset = 0;
+ assert(_f.getWriteOffset() == oldOffset);
+ }
+ assert(!features.getRaw());
+ _f.writeFeatures(features);
+ return oldOffset;
+}
+
+
+btree::EntryRef
+FeatureStore::addFeatures(const uint8_t *src, uint64_t byteLen)
+{
+ uint32_t pad = RefType::pad(byteLen);
+ _store.ensureBufferCapacity(_typeId, byteLen + pad + DECODE_SAFETY);
+ uint32_t activeBufferId = _store.getActiveBufferId(_typeId);
+ btree::BufferState &state = _store.getBufferState(activeBufferId);
+ size_t oldSize = state.size();
+ RefType ref(oldSize, activeBufferId);
+ uint8_t * dst = _store.getBufferEntry<uint8_t>(activeBufferId, oldSize);
+ memcpy(dst, src, byteLen);
+ dst += byteLen;
+ if (pad > 0) {
+ memset(dst, 0, pad);
+ dst += pad;
+ }
+ memset(dst, 0, DECODE_SAFETY);
+ state.pushed_back(byteLen + pad);
+ return ref;
+}
+
+
+std::pair<btree::EntryRef, uint64_t>
+FeatureStore::addFeatures(uint64_t beginOffset, uint64_t endOffset)
+{
+ uint64_t bitLen = (endOffset - beginOffset);
+ assert(static_cast<int64_t>(bitLen) > 0);
+ uint64_t wordLen = (bitLen + 63) / 64;
+ uint64_t byteLen = (bitLen + 7) / 8;
+ assert(wordLen > 0);
+ assert(byteLen > 0);
+ const uint8_t *src = reinterpret_cast<const uint8_t *>(_f._valI - wordLen);
+ RefType ref = addFeatures(src, byteLen);
+ return std::make_pair(ref, bitLen);
+}
+
+
+btree::EntryRef
+FeatureStore::moveFeatures(btree::EntryRef ref, uint64_t bitLen)
+{
+ const uint8_t *src = getBits(ref);
+ uint64_t byteLen = (bitLen + 7) / 8;
+ RefType newRef = addFeatures(src, byteLen);
+ // Mark old features as dead
+ _store.incDead(ref, byteLen + RefType::pad(byteLen));
+ return newRef;
+}
+
+
+FeatureStore::FeatureStore(const Schema &schema)
+ : _store(),
+ _f(NULL),
+ _fctx(_f),
+ _d(NULL),
+ _fieldsParams(),
+ _schema(schema),
+ _type(RefType::align(1u), MIN_CLUSTERS,
+ RefType::offsetSize() / RefType::align(1u)),
+ _typeId(0)
+{
+ _f.setWriteContext(&_fctx);
+ _fctx.allocComprBuf(64, 1);
+ _f.afterWrite(_fctx, 0, 0);
+
+ _fieldsParams.resize(_schema.getNumIndexFields());
+ SchemaUtil::IndexIterator it(_schema);
+ for(; it.isValid(); ++it) {
+ _fieldsParams[it.getIndex()].
+ setSchemaParams(_schema, it.getIndex());
+ }
+ _store.addType(&_type);
+ _store.initActiveBuffers();
+}
+
+
+FeatureStore::~FeatureStore(void)
+{
+ _store.dropBuffers();
+}
+
+
+std::pair<btree::EntryRef, uint64_t>
+FeatureStore::addFeatures(uint32_t packedIndex,
+ const DocIdAndFeatures &features)
+{
+ uint64_t oldOffset = writeFeatures(packedIndex, features);
+ uint64_t newOffset = _f.getWriteOffset();
+ _f.flush();
+ return addFeatures(oldOffset, newOffset);
+}
+
+
+
+void
+FeatureStore::getFeatures(uint32_t packedIndex, btree::EntryRef ref,
+ DocIdAndFeatures &features)
+{
+ setupForField(packedIndex, _d);
+ setupForReadFeatures(ref, _d);
+ _d.readFeatures(features);
+}
+
+
+size_t
+FeatureStore::bitSize(uint32_t packedIndex, btree::EntryRef ref)
+{
+ setupForField(packedIndex, _d);
+ setupForUnpackFeatures(ref, _d);
+ uint64_t oldOffset = _d.getReadOffset();
+ _d.skipFeatures(1);
+ uint64_t newOffset = _d.getReadOffset();
+ uint64_t bitLen = (newOffset - oldOffset);
+ assert(static_cast<int64_t>(bitLen) > 0);
+ return bitLen;
+}
+
+
+btree::EntryRef
+FeatureStore::moveFeatures(uint32_t packedIndex,
+ btree::EntryRef ref)
+{
+ uint64_t bitLen = bitSize(packedIndex, ref);
+ return moveFeatures(ref, bitLen);
+}
+
+
+} // namespace memoryindex
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/memoryindex/featurestore.h b/searchlib/src/vespa/searchlib/memoryindex/featurestore.h
new file mode 100644
index 00000000000..676e2d54860
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/featurestore.h
@@ -0,0 +1,274 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/btree/datastore.h>
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+
+namespace search {
+
+namespace memoryindex {
+
+class FeatureStore
+{
+public:
+ typedef btree::DataStoreT<btree::AlignedEntryRefT<22, 2> > DataStoreType;
+ typedef DataStoreType::RefType RefType;
+ typedef bitcompression::EG2PosOccEncodeContext<true> EncodeContext;
+ typedef bitcompression::EG2PosOccDecodeContextCooked<true>
+ DecodeContextCooked;
+ typedef vespalib::GenerationHandler::generation_t generation_t;
+
+private:
+ typedef index::Schema Schema;
+ typedef index::DocIdAndFeatures DocIdAndFeatures;
+ typedef bitcompression::PosOccFieldsParams PosOccFieldsParams;
+
+ static const uint32_t DECODE_SAFETY = 16;
+
+ DataStoreType _store;
+
+ // Feature Encoder
+ EncodeContext _f;
+ // Buffer for compressed features.
+ ComprFileWriteContext _fctx;
+
+ // Feature Decoder
+ DecodeContextCooked _d;
+
+ // Coding parameters for fields and field collections, derived
+ // from schema.
+ std::vector<PosOccFieldsParams> _fieldsParams;
+
+ const Schema &_schema;
+
+ btree::BufferType<uint8_t> _type;
+ const uint32_t _typeId;
+
+ /**
+ * Writes the given features to the underlying encode context.
+ *
+ * @param packedIndex the field or field collection owning features
+ * @param features the features to be encoded
+ * @return the encode offset before writing
+ */
+ uint64_t
+ writeFeatures(uint32_t packedIndex, const DocIdAndFeatures &features);
+
+ /**
+ * Adds the features from the given buffer to the data store.
+ *
+ * @param src buffer with features
+ * @param byteLen the byte length of the buffer
+ * @return the entry ref for the added features
+ */
+ btree::EntryRef
+ addFeatures(const uint8_t * src, uint64_t byteLen);
+
+ /**
+ * Adds the features currently in the underlying encode context to the data store.
+ *
+ * @param beginOffset the begin offset into the encode context
+ * @param endOffset the end offset into the encode context
+ * @return the entry ref and bit length of the features
+ */
+ std::pair<btree::EntryRef, uint64_t>
+ addFeatures(uint64_t beginOffset, uint64_t endOffset);
+
+ /**
+ * Moves features to new location, as part of compaction.
+ *
+ * @param ref old reference to stored features
+ * @param bitLen bit length of features to move
+ * @return new reference to stored features
+ */
+ btree::EntryRef moveFeatures(btree::EntryRef ref, uint64_t bitLen);
+
+public:
+
+ /**
+ * Constructor for feature store.
+ *
+ * @param schema The schema describing fields and field
+ * collections available, used to derive
+ * coding parameters.
+ */
+ FeatureStore(const Schema &schema);
+
+ ~FeatureStore(void);
+
+ /**
+ * Add features to feature store
+ *
+ * @param packedIndex The field or field collection owning features
+ * @param features The features to be encoded
+ * @return pair with reference to stored features and
+ * size of encoded features in bits
+ */
+ std::pair<btree::EntryRef, uint64_t>
+ addFeatures(uint32_t packedIndex,
+ const DocIdAndFeatures &features);
+
+
+ /**
+ * Get features from feature store. Method signature is not
+ * const since feature decoder is written to during calculation.
+ *
+ * @param packedIndex The field or field collection owning features
+ * @param ref Reference to stored features
+ * @param features The features to be decoded
+ */
+ void
+ getFeatures(uint32_t packedIndex,
+ btree::EntryRef ref,
+ DocIdAndFeatures &features);
+
+
+ /**
+ * Setup the given decoder to be used for the given field or field
+ * collection.
+ *
+ * @param packedIndex The field or field collection owning features
+ * @param decoder The feature decoder
+ */
+ void
+ setupForField(uint32_t packedIndex, DecodeContextCooked &decoder) const
+ {
+ decoder._fieldsParams = &_fieldsParams[packedIndex];
+ }
+
+ /**
+ * Setup the given decoder to later use readFeatures() to decode
+ * the stored features.
+ *
+ * @param ref Reference to stored features
+ * @param decoder The feature decoder
+ */
+ void
+ setupForReadFeatures(btree::EntryRef ref, DecodeContextCooked &decoder) const
+ {
+ const uint8_t * bits = getBits(ref);
+ decoder.setByteCompr(bits);
+ uint32_t bufferId = RefType(ref).bufferId();
+ const btree::BufferState &state = _store.getBufferState(bufferId);
+ decoder.setEnd(
+ ((_store.getBufferEntry<uint8_t>(bufferId, state.size()) -
+ bits) + 7) / 8,
+ false);
+ }
+
+ /**
+ * Setup the given decoder to later use unpackFeatures() to decode
+ * the stored features.
+ *
+ * @param ref Reference to stored features
+ * @param decoder The feature decoder
+ */
+ void
+ setupForUnpackFeatures(btree::EntryRef ref, DecodeContextCooked &decoder) const
+ {
+ decoder.setByteCompr(getBits(ref));
+ }
+
+ /**
+ * Calculate size of encoded features. Method signature is not
+ * const since feature decoder is written to during calculation.
+ *
+ * @param packedIndex The field or field collection owning features
+ * @param ref Reference to stored features
+ * @return size of features in bits
+ */
+ size_t
+ bitSize(uint32_t packedIndex, btree::EntryRef ref);
+
+ /**
+ * Get byte address of stored features
+ *
+ * @param ref Referennce to stored features
+ * @return byte address of stored features
+ */
+ const uint8_t *
+ getBits(btree::EntryRef ref) const
+ {
+ RefType iRef(ref);
+ return _store.getBufferEntry<uint8_t>(iRef.bufferId(), iRef.offset());
+ }
+
+ /**
+ * Move features to new location, as part of compaction.
+ *
+ * @param packedIndex The field or field collection owning features
+ * @param ref Old reference to stored features
+ * @return New reference to stored features
+ */
+ btree::EntryRef
+ moveFeatures(uint32_t packedIndex,
+ btree::EntryRef ref);
+
+ /**
+ * Return a const view of the fields params used by this feature store.
+ *
+ * @return const view of fields params.
+ */
+ const std::vector<PosOccFieldsParams> &
+ getFieldsParams() const
+ {
+ return _fieldsParams;
+ }
+
+ // Inherit doc from DataStoreBase
+ void
+ trimHoldLists(generation_t usedGen)
+ {
+ _store.trimHoldLists(usedGen);
+ }
+
+ // Inherit doc from DataStoreBase
+ void
+ transferHoldLists(generation_t generation)
+ {
+ _store.transferHoldLists(generation);
+ }
+
+ void
+ clearHoldLists(void)
+ {
+ _store.clearHoldLists();
+ }
+
+ // Inherit doc from DataStoreBase
+ std::vector<uint32_t>
+ startCompact()
+ {
+ return _store.startCompact(_typeId);
+ }
+
+ // Inherit doc from DataStoreBase
+ void
+ finishCompact(const std::vector<uint32_t> & toHold)
+ {
+ _store.finishCompact(toHold);
+ }
+
+ // Inherit doc from DataStoreBase
+ MemoryUsage
+ getMemoryUsage() const
+ {
+ return _store.getMemoryUsage();
+ }
+
+ // Inherit doc from DataStoreBase
+ btree::DataStoreBase::MemStats
+ getMemStats() const
+ {
+ return _store.getMemStats();
+ }
+};
+
+
+} // namespace search::memoryindex
+} // namespace search
+
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp
new file mode 100644
index 00000000000..a6899d87bba
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp
@@ -0,0 +1,577 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".memoryindex.fieldinverter");
+#include "fieldinverter.h"
+#include <vespa/document/datatype/urldatatype.h>
+#include <vespa/searchlib/util/url.h>
+#include <stdexcept>
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/text/lowercase.h>
+#include <vespa/searchlib/common/sort.h>
+#include "ordereddocumentinserter.h"
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+#include <vespa/document/annotation/annotation.h>
+#include <vespa/document/annotation/span.h>
+#include <vespa/document/annotation/spanlist.h>
+#include <vespa/document/annotation/alternatespanlist.h>
+#include <vespa/document/annotation/spantree.h>
+#include <vespa/document/annotation/spantreevisitor.h>
+
+namespace search
+{
+
+namespace memoryindex
+{
+
+using document::Field;
+using document::FieldValue;
+using document::Document;
+using document::ArrayFieldValue;
+using document::WeightedSetFieldValue;
+using document::StringFieldValue;
+using document::IntFieldValue;
+using document::StructFieldValue;
+using document::DataType;
+using document::DocumentType;
+using document::Annotation;
+using document::AnnotationType;
+using document::AlternateSpanList;
+using document::Span;
+using document::SpanList;
+using document::SimpleSpanList;
+using document::SpanNode;
+using document::SpanTree;
+using document::SpanTreeVisitor;
+using index::DocIdAndPosOccFeatures;
+using index::Schema;
+using vespalib::make_string;
+using search::util::URL;
+
+namespace documentinverterkludge
+{
+
+namespace linguistics
+{
+
+const vespalib::string SPANTREE_NAME("linguistics");
+
+}
+
+}
+
+using namespace documentinverterkludge;
+
+namespace
+{
+
+class SpanFinder : public SpanTreeVisitor
+{
+public:
+ int32_t begin_pos;
+ int32_t end_pos;
+
+ SpanFinder() : begin_pos(0x7fffffff), end_pos(-1) {}
+ Span span() { return Span(begin_pos, end_pos - begin_pos); }
+
+ void visit(const Span &node) override {
+ begin_pos = std::min(begin_pos, node.from());
+ end_pos = std::max(end_pos, node.from() + node.length());
+ }
+ void visit(const SpanList &node) override {
+ for (const auto & span_ : node) {
+ const_cast<SpanNode *>(span_)->accept(*this);
+ }
+ }
+ void visit(const SimpleSpanList &node) override {
+ for (const auto & span_ : node) {
+ const_cast<Span &>(span_).accept(*this);
+ }
+ }
+ void visit(const AlternateSpanList &node) override {
+ for (size_t i = 0; i < node.getNumSubtrees(); ++i) {
+ visit(node.getSubtree(i));
+ }
+ }
+};
+
+Span
+getSpan(const SpanNode &span_node)
+{
+ SpanFinder finder;
+ // The SpanNode will not be changed.
+ const_cast<SpanNode &>(span_node).accept(finder);
+ return finder.span();
+}
+
+}
+
+void
+FieldInverter::processAnnotations(const StringFieldValue &value)
+{
+ _terms.clear();
+ StringFieldValue::SpanTrees spanTrees = value.getSpanTrees();
+ const SpanTree *tree = StringFieldValue::findTree(spanTrees, linguistics::SPANTREE_NAME);
+ if (tree == NULL) {
+ /* This is wrong unless field is exact match */
+ const vespalib::string &text = value.getValue();
+ if (text.empty())
+ return;
+ uint32_t wordRef = saveWord(text);
+ if (wordRef != 0u) {
+ add(wordRef);
+ stepWordPos();
+ }
+ return;
+ }
+ const vespalib::string &text = value.getValue();
+ for (const Annotation & annotation : *tree) {
+ const SpanNode *span = annotation.getSpanNode();
+ if ((span != nullptr) && annotation.valid() &&
+ (annotation.getType() == *AnnotationType::TERM))
+ {
+ Span sp = getSpan(*span);
+ if (sp.length() != 0) {
+ _terms.push_back(std::make_pair(sp,
+ annotation.getFieldValue()));
+ }
+ }
+ }
+ std::sort(_terms.begin(), _terms.end());
+ SpanTermVector::const_iterator it = _terms.begin();
+ SpanTermVector::const_iterator ite = _terms.end();
+ uint32_t wordRef;
+ bool mustStep = false;
+ for (; it != ite; ) {
+ SpanTermVector::const_iterator it_begin = it;
+ for (; it != ite && it->first == it_begin->first; ++it) {
+ if (it->second) { // it->second is a const FieldValue *.
+ wordRef = saveWord(*it->second);
+ } else {
+ const Span &iSpan = it->first;
+ assert(iSpan.from() >= 0);
+ assert(iSpan.length() > 0);
+ wordRef = saveWord(vespalib::stringref(&text[iSpan.from()],
+ iSpan.length()));
+ }
+ if (wordRef != 0u) {
+ add(wordRef);
+ mustStep = true;
+ }
+ }
+ if (mustStep) {
+ stepWordPos();
+ mustStep = false;
+ }
+ }
+}
+
+
+void
+FieldInverter::reset()
+{
+ _words.clear();
+ _elems.clear();
+ _positions.clear();
+ _wordRefs.resize(1);
+ _pendingDocs.clear();
+ _abortedDocs.clear();
+ _removeDocs.clear();
+ _oldPosSize = 0u;
+}
+
+struct WordRefRadix {
+ uint32_t operator () (const uint64_t v) { return v >> 32; }
+};
+
+void
+FieldInverter::sortWords(void)
+{
+ assert(_wordRefs.size() > 1);
+
+ // Make a dictionary for words.
+ { // Use radix sort based on first four bytes of word, before finalizing with std::sort.
+ vespalib::Array<uint64_t> firstFourBytes(_wordRefs.size());
+ for (size_t i(1); i < _wordRefs.size(); i++) {
+ uint64_t firstFour = ntohl(*reinterpret_cast<const uint32_t *>(getWordFromRef(_wordRefs[i])));
+ firstFourBytes[i] = (firstFour << 32) | _wordRefs[i];
+ }
+ ShiftBasedRadixSorter<uint64_t, WordRefRadix, CompareWordRef, 24, true>::
+ radix_sort(WordRefRadix(), CompareWordRef(_words), &firstFourBytes[1], firstFourBytes.size()-1, 16);
+ for (size_t i(1); i < firstFourBytes.size(); i++) {
+ _wordRefs[i] = firstFourBytes[i] & 0xffffffffl;
+ }
+ }
+ // Populate word numbers in word buffer and mapping from
+ // word numbers to word reference.
+ // TODO: shrink word buffer to only contain unique words
+ std::vector<uint32_t>::const_iterator w(_wordRefs.begin() + 1);
+ std::vector<uint32_t>::const_iterator we(_wordRefs.end());
+ uint32_t wordNum = 1; // First valid word number
+ const char *lastWord = getWordFromRef(*w);
+ updateWordNum(*w, wordNum);
+ for (++w; w != we; ++w) {
+ const char *word = getWordFromRef(*w);
+ int cmpres = strcmp(lastWord, word);
+ assert(cmpres <= 0);
+ if (cmpres < 0) {
+ ++wordNum;
+ _wordRefs[wordNum] = *w;
+ lastWord = word;
+ }
+ updateWordNum(*w, wordNum);
+ }
+ assert(_wordRefs.size() >= wordNum + 1);
+ _wordRefs.resize(wordNum + 1);
+ // Replace initial word reference by word number.
+ for (auto &p : _positions) {
+ p._wordNum = getWordNum(p._wordNum);
+ }
+}
+
+
+void
+FieldInverter::startElement(int32_t weight)
+{
+ _elems.push_back(ElemInfo(weight)); // Fill in length later
+}
+
+
+void
+FieldInverter::endElement(void)
+{
+ _elems.back().setLen(_wpos);
+ _wpos = 0;
+ ++_elem;
+}
+
+uint32_t
+FieldInverter::saveWord(const vespalib::stringref word)
+{
+ const size_t wordsSize = _words.size();
+ // assert((wordsSize & 3) == 0); // Check alignment
+ size_t len = word.size();
+ if (len == 0)
+ return 0u;
+
+ const size_t fullyPaddedSize = (wordsSize + 4 + len + 1 + 3) & ~3;
+ _words.reserve(vespalib::roundUp2inN(fullyPaddedSize));
+ _words.resize(fullyPaddedSize);
+
+ char * buf = &_words[0] + wordsSize;
+ memset(buf, 0, 4);
+ memcpy(buf + 4, word.c_str(), len);
+ uint32_t *lastWord = reinterpret_cast<uint32_t *>(buf + 4 + (len & ~0x3));
+ *lastWord &= (0xffffff >> ((3 - (len & 3)) << 3)); //only on little endian machiness !!
+
+ uint32_t wordRef = (wordsSize + 4) >> 2;
+ // assert(wordRef != 0);
+ _wordRefs.push_back(wordRef);
+ return wordRef;
+}
+
+
+uint32_t
+FieldInverter::saveWord(const document::FieldValue &fv)
+{
+ assert(fv.getClass().id() == StringFieldValue::classId);
+ typedef std::pair<const char*, size_t> RawRef;
+ RawRef sRef = fv.getAsRaw();
+ return saveWord(vespalib::stringref(sRef.first, sRef.second));
+}
+
+
+void
+FieldInverter::remove(const vespalib::stringref word, uint32_t docId)
+{
+ uint32_t wordRef = saveWord(word);
+ assert(wordRef != 0);
+ _positions.emplace_back(wordRef, docId);
+}
+
+
+void
+FieldInverter::processNormalDocTextField(const StringFieldValue &field)
+{
+ startElement(1);
+ processAnnotations(field);
+ endElement();
+}
+
+
+void
+FieldInverter::processNormalDocArrayTextField(const ArrayFieldValue &field)
+{
+ uint32_t el = 0;
+ uint32_t ele = field.size();
+ for (;el < ele; ++el) {
+ const FieldValue &elfv = field[el];
+ assert(elfv.getClass().id() == StringFieldValue::classId);
+ const StringFieldValue &element =
+ static_cast<const StringFieldValue &>(elfv);
+ startElement(1);
+ processAnnotations(element);
+ endElement();
+ }
+}
+
+
+void
+FieldInverter::processNormalDocWeightedSetTextField(const WeightedSetFieldValue &field)
+{
+ for (const auto & el : field) {
+ const FieldValue &key = *el.first;
+ const FieldValue &xweight = *el.second;
+ assert(key.getClass().id() == StringFieldValue::classId);
+ assert(xweight.getClass().id() == IntFieldValue::classId);
+ const StringFieldValue &element = static_cast<const StringFieldValue &>(key);
+ int32_t weight = xweight.getAsInt();
+ startElement(weight);
+ processAnnotations(element);
+ endElement();
+ }
+}
+
+
+FieldInverter::FieldInverter(const Schema &schema, uint32_t fieldId)
+ : _fieldId(fieldId),
+ _elem(0u),
+ _wpos(0u),
+ _docId(0),
+ _oldPosSize(0),
+ _schema(schema),
+ _words(),
+ _elems(),
+ _positions(),
+ _features(),
+ _elementWordRefs(),
+ _wordRefs(1),
+ _terms(),
+ _abortedDocs(),
+ _pendingDocs(),
+ _removeDocs()
+{
+}
+
+
+void
+FieldInverter::abortPendingDoc(uint32_t docId)
+{
+ auto itr = _pendingDocs.find(docId);
+ if (itr != _pendingDocs.end()) {
+ if (itr->second.getLen() != 0) {
+ _abortedDocs.push_back(itr->second);
+ }
+ _pendingDocs.erase(itr);
+ }
+}
+
+
+void
+FieldInverter::moveNotAbortedDocs(uint32_t &dstIdx,
+ uint32_t srcIdx,
+ uint32_t nextTrimIdx)
+{
+ assert(nextTrimIdx >= srcIdx);
+ uint32_t size = nextTrimIdx - srcIdx;
+ if (size == 0)
+ return;
+ assert(dstIdx < srcIdx);
+ assert(srcIdx < _positions.size());
+ assert(srcIdx + size <= _positions.size());
+ PosInfo *dst = &_positions[dstIdx];
+ const PosInfo *src = &_positions[srcIdx];
+ const PosInfo *srce = src + size;
+ while (src != srce) {
+ *dst = *src;
+ ++dst;
+ ++src;
+ }
+ dstIdx += size;
+}
+
+
+void
+FieldInverter::trimAbortedDocs()
+{
+ if (_abortedDocs.empty()) {
+ return;
+ }
+ std::sort(_abortedDocs.begin(), _abortedDocs.end());
+ auto itrEnd = _abortedDocs.end();
+ auto itr = _abortedDocs.begin();
+ uint32_t dstIdx = itr->getStart();
+ uint32_t srcIdx = itr->getStart() + itr->getLen();
+ ++itr;
+ while (itr != itrEnd) {
+ moveNotAbortedDocs(dstIdx, srcIdx, itr->getStart());
+ srcIdx = itr->getStart() + itr->getLen();
+ ++itr;
+ }
+ moveNotAbortedDocs(dstIdx, srcIdx, _positions.size());
+ _positions.resize(dstIdx);
+ _abortedDocs.clear();
+}
+
+
+void
+FieldInverter::invertField(uint32_t docId, const FieldValue::UP &val)
+{
+ startDoc(docId);
+ if (val) {
+ invertNormalDocTextField(*val);
+ }
+ endDoc();
+}
+
+
+void
+FieldInverter::invertNormalDocTextField(const FieldValue &val)
+{
+ const vespalib::Identifiable::RuntimeClass & cInfo(val.getClass());
+ const Schema::IndexField &field = _schema.getIndexField(_fieldId);
+ switch (field.getCollectionType()) {
+ case Schema::SINGLE:
+ if (cInfo.id() == StringFieldValue::classId) {
+ processNormalDocTextField(static_cast<const StringFieldValue &>(val));
+ } else {
+ throw std::runtime_error(make_string("Expected DataType::STRING, got '%s'", val.getDataType()->getName().c_str()));
+ }
+ break;
+ case Schema::WEIGHTEDSET:
+ if (cInfo.id() == WeightedSetFieldValue::classId) {
+ const WeightedSetFieldValue &wset = static_cast<const WeightedSetFieldValue &>(val);
+ if (wset.getNestedType() == *DataType::STRING) {
+ processNormalDocWeightedSetTextField(wset);
+ } else {
+ throw std::runtime_error(make_string("Expected DataType::STRING, got '%s'", wset.getNestedType().getName().c_str()));
+ }
+ } else {
+ throw std::runtime_error(make_string("Expected weighted set, got '%s'", cInfo.name()));
+ }
+ break;
+ case Schema::ARRAY:
+ if (cInfo.id() == ArrayFieldValue::classId) {
+ const ArrayFieldValue &arr = static_cast<const ArrayFieldValue&>(val);
+ if (arr.getNestedType() == *DataType::STRING) {
+ processNormalDocArrayTextField(arr);
+ } else {
+ throw std::runtime_error(make_string("Expected DataType::STRING, got '%s'", arr.getNestedType().getName().c_str()));
+ }
+ } else {
+ throw std::runtime_error(make_string("Expected Array, got '%s'", cInfo.name()));
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+
+namespace {
+
+struct FullRadix {
+ uint64_t operator () (const FieldInverter::PosInfo & p) const {
+ return (static_cast<uint64_t>(p._wordNum) << 32) |
+ p._docId;
+ }
+};
+
+}
+
+
+void
+FieldInverter::applyRemoves(DocumentRemover &remover)
+{
+ for (auto docId : _removeDocs) {
+ remover.remove(docId, *this);
+ }
+ _removeDocs.clear();
+}
+
+
+void
+FieldInverter::pushDocuments(IOrderedDocumentInserter &inserter)
+{
+ trimAbortedDocs();
+
+ if (_positions.empty()) {
+ reset();
+ return; // All documents with words aborted
+ }
+
+ sortWords();
+
+ // Sort for terms.
+ ShiftBasedRadixSorter<PosInfo, FullRadix, std::less<PosInfo>, 56, true>::
+ radix_sort(FullRadix(), std::less<PosInfo>(), &_positions[0], _positions.size(), 16);
+
+ constexpr uint32_t NO_ELEMENT_ID = std::numeric_limits<uint32_t>::max();
+ constexpr uint32_t NO_WORD_POS = std::numeric_limits<uint32_t>::max();
+ uint32_t lastWordNum = 0;
+ uint32_t lastElemId = 0;
+ uint32_t lastWordPos = 0;
+ uint32_t numWordIds = _wordRefs.size() - 1;
+ uint32_t lastDocId = 0;
+ vespalib::stringref word;
+ bool emptyFeatures = true;
+
+ inserter.rewind();
+
+ for (auto &i : _positions) {
+ assert(i._wordNum <= numWordIds);
+ (void) numWordIds;
+ if (lastWordNum != i._wordNum || lastDocId != i._docId) {
+ if (!emptyFeatures) {
+ inserter.add(lastDocId, _features);
+ emptyFeatures = true;
+ }
+ if (lastWordNum != i._wordNum) {
+ lastWordNum = i._wordNum;
+ word = getWordFromNum(lastWordNum);
+ inserter.setNextWord(word);
+ }
+ lastDocId = i._docId;
+ if (i.removed()) {
+ inserter.remove(lastDocId);
+ continue;
+ }
+ }
+ if (emptyFeatures) {
+ if (!i.removed()) {
+ emptyFeatures = false;
+ _features.clear(lastDocId);
+ lastElemId = NO_ELEMENT_ID;
+ lastWordPos = NO_WORD_POS;
+ } else {
+ continue; // ignore dup remove
+ }
+ } else {
+ // removes must come before non-removes
+ assert(!i.removed());
+ }
+ const ElemInfo &elem = _elems[i._elemRef];
+ if (i._wordPos != lastWordPos || i._elemId != lastElemId) {
+ _features.addNextOcc(i._elemId, i._wordPos,
+ elem._weight, elem._len);
+ lastElemId = i._elemId;
+ lastWordPos = i._wordPos;
+ } else {
+ // silently ignore duplicate annotations
+ }
+ }
+
+ if (!emptyFeatures) {
+ inserter.add(lastDocId, _features);
+ }
+ inserter.flush();
+ reset();
+}
+
+
+} // namespace memoryindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h
new file mode 100644
index 00000000000..1f72c8e62b4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h
@@ -0,0 +1,449 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <map>
+#include <set>
+#include <vespa/document/document.h>
+#include <vespa/document/datatype/datatypes.h>
+#include <vespa/searchlib/index/doctypebuilder.h>
+#include <limits>
+#include "i_document_remove_listener.h"
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+#include <vespa/document/annotation/span.h>
+
+namespace search
+{
+
+namespace memoryindex
+{
+
+class IOrderedDocumentInserter;
+class DocumentRemover;
+
+class FieldInverter : public IDocumentRemoveListener
+{
+public:
+ class PosInfo
+ {
+ public:
+ uint32_t _wordNum; // XXX: Initially word reference
+ uint32_t _docId;
+ uint32_t _elemId;
+ uint32_t _wordPos;
+ uint32_t _elemRef; // Offset in _elems
+
+ static constexpr uint32_t _elemRemoved =
+ std::numeric_limits<uint32_t>::max();
+
+ PosInfo()
+ : _wordNum(0),
+ _docId(0),
+ _elemId(0),
+ _wordPos(0),
+ _elemRef(0)
+ {
+ }
+
+ PosInfo(uint32_t wordRef,
+ uint32_t docId,
+ uint32_t elemId,
+ uint32_t wordPos, uint32_t elemRef)
+ : _wordNum(wordRef),
+ _docId(docId),
+ _elemId(elemId),
+ _wordPos(wordPos),
+ _elemRef(elemRef)
+ {
+ }
+
+
+ PosInfo(uint32_t wordRef,
+ uint32_t docId)
+ : _wordNum(wordRef),
+ _docId(docId),
+ _elemId(_elemRemoved),
+ _wordPos(0),
+ _elemRef(0)
+ {
+ }
+
+ bool
+ removed() const
+ {
+ return _elemId == _elemRemoved;
+ }
+
+ bool
+ operator<(const PosInfo &rhs) const
+ {
+ if (_wordNum != rhs._wordNum)
+ return _wordNum < rhs._wordNum;
+ if (_docId != rhs._docId)
+ return _docId < rhs._docId;
+ if (_elemId != rhs._elemId) {
+ if (removed() != rhs.removed())
+ return removed() && !rhs.removed();
+ return _elemId < rhs._elemId;
+ }
+ return _wordPos < rhs._wordPos;
+ }
+ };
+
+private:
+ FieldInverter(const FieldInverter &) = delete;
+ FieldInverter(const FieldInverter &&) = delete;
+ FieldInverter &operator=(const FieldInverter &) = delete;
+ FieldInverter &operator=(const FieldInverter &&) = delete;
+
+ typedef vespalib::Array<char, vespalib::DefaultAlloc> WordBuffer;
+
+ class ElemInfo
+ {
+ public:
+ int32_t _weight;
+ uint32_t _len;
+
+ ElemInfo(int32_t weight)
+ : _weight(weight),
+ _len(0u)
+ {
+ }
+
+ void
+ setLen(uint32_t len)
+ {
+ _len = len;
+ }
+ };
+
+ typedef std::vector<ElemInfo> ElemInfoVec;
+
+ typedef std::vector<PosInfo> PosInfoVec;
+
+ class CompareWordRef
+ {
+ const char *const _wordBuffer;
+
+ public:
+ CompareWordRef(const WordBuffer &wordBuffer)
+ : _wordBuffer(&wordBuffer[0])
+ {
+ }
+
+ const char *
+ getWord(uint32_t wordRef) const
+ {
+ return &_wordBuffer[static_cast<size_t>(wordRef) << 2];
+ }
+
+ bool
+ operator()(const uint32_t lhs, const uint32_t rhs) const
+ {
+ return strcmp(getWord(lhs), getWord(rhs)) < 0;
+ }
+ };
+
+ /*
+ * Range in _positions vector used to represent a document put.
+ */
+ class PositionRange
+ {
+ uint32_t _start;
+ uint32_t _len;
+
+ public:
+ PositionRange(uint32_t start, uint32_t len)
+ : _start(start),
+ _len(len)
+ {
+ }
+
+ bool
+ operator<(const PositionRange &rhs) const
+ {
+ if (_start != rhs._start) {
+ return _start < rhs._start;
+ }
+ return _len < rhs._len;
+ }
+
+ uint32_t getStart() const { return _start; }
+ uint32_t getLen() const { return _len; }
+ };
+
+ // Current field state.
+ uint32_t _fieldId; // current field id
+ uint32_t _elem; // current element
+ uint32_t _wpos; // current word pos
+ uint32_t _docId;
+ uint32_t _oldPosSize;
+
+ const index::Schema &_schema;
+
+ WordBuffer _words;
+ ElemInfoVec _elems;
+ PosInfoVec _positions;
+ index::DocIdAndPosOccFeatures _features;
+ std::vector<uint32_t> _elementWordRefs;
+ std::vector<uint32_t> _wordRefs;
+
+ typedef std::pair<document::Span, const document::FieldValue *> SpanTerm;
+ typedef std::vector<SpanTerm> SpanTermVector;
+ SpanTermVector _terms;
+
+ // info about aborted and pending documents.
+ std::vector<PositionRange> _abortedDocs;
+ std::map<uint32_t, PositionRange> _pendingDocs;
+ std::vector<uint32_t> _removeDocs;
+
+ void
+ invertNormalDocTextField(const document::FieldValue &val);
+
+public:
+ /**
+ * Start a new element
+ *
+ * @param weight element weight
+ */
+ void
+ startElement(int32_t weight);
+
+ /**
+ * End an element.
+ */
+ void
+ endElement(void);
+
+private:
+ /**
+ * Save field value as word in word buffer.
+ *
+ * @param word word to be saved
+ * @param len length of word to be saved.
+ *
+ * @return word reference
+ */
+ VESPA_DLL_LOCAL uint32_t
+ saveWord(const vespalib::stringref word);
+
+ /**
+ * Save field value as word in word buffer.
+ *
+ * @param fv field value containing word to be stored
+ *
+ * @return word reference
+ */
+ VESPA_DLL_LOCAL uint32_t
+ saveWord(const document::FieldValue &fv);
+
+ /**
+ * Get pointer to saved word from a word reference.
+ *
+ * @param wordRef word reference
+ *
+ * @return saved word
+ */
+ const char *
+ getWordFromRef(uint32_t wordRef) const
+ {
+ return &_words[static_cast<size_t>(wordRef) << 2];
+ }
+
+ /**
+ * Get pointer to saved word from a word number
+ *
+ * @param wordNum word number
+ *
+ * @return saved word
+ */
+ const char *
+ getWordFromNum(uint32_t wordNum) const
+ {
+ return getWordFromRef(_wordRefs[wordNum]);
+ }
+
+ /**
+ * Get word number from word reference
+ *
+ * @param wordRef word reference
+ *
+ * @return word number
+ */
+ uint32_t
+ getWordNum(uint32_t wordRef) const
+ {
+ const char *p = &_words[static_cast<size_t>(wordRef - 1) << 2];
+ return *reinterpret_cast<const uint32_t *>(p);
+ }
+
+ /**
+ * Update mapping from word reference to word number
+ *
+ * @param wordRef word reference
+ * @param wordNum word number
+ */
+ void
+ updateWordNum(uint32_t wordRef, uint32_t wordNum)
+ {
+ char *p = &_words[static_cast<size_t>(wordRef - 1) << 2];
+ *reinterpret_cast<uint32_t *>(p) = wordNum;
+ }
+
+ /**
+ * Add a word reference to posting list. Don't step word pos.
+ *
+ *
+ * @param wordRef word reference
+ */
+ void
+ add(uint32_t wordRef) {
+ _positions.emplace_back(wordRef, _docId, _elem,
+ _wpos, _elems.size() - 1);
+ }
+
+ void
+ stepWordPos(void)
+ {
+ ++_wpos;
+ }
+
+public:
+ VESPA_DLL_LOCAL void
+ processAnnotations(const document::StringFieldValue &value);
+
+private:
+ void
+ processNormalDocTextField(const document::StringFieldValue &field);
+
+ void
+ processNormalDocArrayTextField(const document::ArrayFieldValue &field);
+
+ void
+ processNormalDocWeightedSetTextField(const document::WeightedSetFieldValue &field);
+
+ /**
+ * Obtain the schema used by this index.
+ *
+ * @return schema used by this index
+ */
+ const index::Schema &
+ getSchema(void) const
+ {
+ return _schema;
+ }
+
+ /**
+ * Clear internal memory structures.
+ */
+ void
+ reset(void);
+
+ /**
+ * Calculate word numbers and replace word references with word
+ * numbers in internal memory structures.
+ */
+ void
+ sortWords(void);
+
+ void
+ moveNotAbortedDocs(uint32_t &dstIdx, uint32_t srcIdx, uint32_t nextTrimIdx);
+
+ void
+ trimAbortedDocs();
+
+ /*
+ * Abort a pending document that has already been inverted.
+ *
+ * @param docId local id for document
+ *
+ */
+ void
+ abortPendingDoc(uint32_t docId);
+
+public:
+ /**
+ * Create a new memory index based on the given schema.
+ *
+ * @param schema the index schema to use
+ * @param schema the field to be inverted
+ */
+ FieldInverter(const index::Schema &schema, uint32_t fieldId);
+
+ /*
+ * Apply pending removes.
+ *
+ * @param remover document remover
+ */
+ void
+ applyRemoves(DocumentRemover &remover);
+
+ /**
+ * Push inverted documents to memory index structure.
+ *
+ * Temporary restriction: Currently only one document at a time is
+ * supported.
+ *
+ * @param inserter ordered document inserter
+ */
+ void
+ pushDocuments(IOrderedDocumentInserter &inserter);
+
+ /*
+ * Invert a normal text field, based on annotations.
+ */
+ void
+ invertField(uint32_t docId, const document::FieldValue::UP &val);
+
+ /*
+ * Setup remove of word in old version of document.
+ */
+ virtual void
+ remove(const vespalib::stringref word, uint32_t docId) override;
+
+ void
+ removeDocument(uint32_t docId)
+ {
+ abortPendingDoc(docId);
+ _removeDocs.push_back(docId);
+ }
+
+ void
+ startDoc(uint32_t docId)
+ {
+ assert(_docId == 0);
+ assert(docId != 0);
+ abortPendingDoc(docId);
+ _removeDocs.push_back(docId);
+ _docId = docId;
+ _elem = 0;
+ _wpos = 0;
+ }
+
+ void
+ endDoc()
+ {
+ uint32_t newPosSize = static_cast<uint32_t>(_positions.size());
+ _pendingDocs.insert({ _docId,
+ { _oldPosSize, newPosSize - _oldPosSize } });
+ _docId = 0;
+ _oldPosSize = newPosSize;
+ }
+
+ void
+ addWord(const vespalib::stringref word)
+ {
+ uint32_t wordRef = saveWord(word);
+ if (wordRef != 0u) {
+ add(wordRef);
+ stepWordPos();
+ }
+ }
+};
+
+} // namespace memoryindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_document_insert_listener.h b/searchlib/src/vespa/searchlib/memoryindex/i_document_insert_listener.h
new file mode 100644
index 00000000000..7cf84892b17
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/i_document_insert_listener.h
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+#include <vespa/searchlib/btree/entryref.h>
+
+namespace search {
+namespace memoryindex {
+
+/**
+ * Interface used to track which {wordRef, fieldId} pairs that are
+ * inserted into the memory index dictionary for a document.
+ */
+class IDocumentInsertListener
+{
+public:
+ virtual ~IDocumentInsertListener() {}
+ virtual void insert(btree::EntryRef wordRef, uint32_t docId) = 0;
+ virtual void flush() = 0;
+};
+
+
+} // namespace memoryindex
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_document_remove_listener.h b/searchlib/src/vespa/searchlib/memoryindex/i_document_remove_listener.h
new file mode 100644
index 00000000000..b8e71f8673b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/i_document_remove_listener.h
@@ -0,0 +1,28 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search
+{
+
+namespace memoryindex
+{
+
+/**
+ * Interface used to track which {wordRef, fieldId} pairs that are
+ * removed from the memory index dictionary for a document.
+ */
+class IDocumentRemoveListener
+{
+public:
+ virtual ~IDocumentRemoveListener() {}
+
+ virtual void remove(const vespalib::stringref word,
+ uint32_t docId) = 0;
+};
+
+
+}
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/iordereddocumentinserter.h b/searchlib/src/vespa/searchlib/memoryindex/iordereddocumentinserter.h
new file mode 100644
index 00000000000..aef68b62d23
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/iordereddocumentinserter.h
@@ -0,0 +1,52 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search
+{
+
+namespace memoryindex
+{
+
+/**
+ * Interface class for ordered document inserter.
+ *
+ * Insert order must be properly sorted, by (word, docId)
+ */
+class IOrderedDocumentInserter
+{
+public:
+ virtual ~IOrderedDocumentInserter() { }
+
+ /**
+ * Set next word to operate on.
+ */
+ virtual void setNextWord(const vespalib::stringref word) = 0;
+
+ /**
+ * Add (word, docId) tuple with given features.
+ */
+ virtual void add(uint32_t docId,
+ const index::DocIdAndFeatures &features) = 0;
+
+ /**
+ * Remove (word, docId) tuple.
+ */
+ virtual void remove(uint32_t docId) = 0;
+
+ /*
+ * Flush pending changes to postinglist for (_word).
+ *
+ * _dItr is located at correct position.
+ */
+ virtual void flush() = 0;
+
+ /*
+ * Rewind iterator, to start new pass.
+ */
+ virtual void rewind() = 0;
+};
+
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.cpp b/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.cpp
new file mode 100644
index 00000000000..88b718e1860
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.cpp
@@ -0,0 +1,342 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include "memoryfieldindex.h"
+
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreenodestore.hpp>
+#include <vespa/searchlib/btree/btreestore.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btree.hpp>
+#include "ordereddocumentinserter.h"
+
+LOG_SETUP(".memoryindex.memoryfieldindex");
+
+namespace search {
+
+using index::DocIdAndFeatures;
+using index::WordDocElementFeatures;
+using index::Schema;
+
+namespace memoryindex {
+
+MemoryFieldIndex::MemoryFieldIndex(const Schema & schema, uint32_t fieldId)
+ : _wordStore(),
+ _numUniqueWords(0),
+ _generationHandler(),
+ _dict(),
+ _postingListStore(),
+ _featureStore(schema),
+ _fieldId(fieldId),
+ _remover(_wordStore),
+ _inserter(std::make_unique<OrderedDocumentInserter>(*this))
+{
+}
+
+MemoryFieldIndex::~MemoryFieldIndex(void)
+{
+ _postingListStore.disableFreeLists();
+ _postingListStore.disableElemHoldList();
+ _dict.disableFreeLists();
+ _dict.disableElemHoldList();
+ // XXX: Kludge
+ for (DictionaryTree::Iterator it = _dict.begin();
+ it.valid(); ++it) {
+ btree::EntryRef pidx(it.getData());
+ if (pidx.valid()) {
+ _postingListStore.clear(pidx);
+ // Before updating ref
+ std::atomic_thread_fence(std::memory_order_release);
+ it.writeData(btree::EntryRef().ref());
+ }
+ }
+ _postingListStore.clearBuilder();
+ freeze(); // Flush all pending posting list tree freezes
+ transferHoldLists();
+ _dict.clear(); // Clear dictionary
+ freeze(); // Flush pending freeze for dictionary tree.
+ transferHoldLists();
+ incGeneration();
+ trimHoldLists();
+}
+
+MemoryFieldIndex::PostingList::Iterator
+MemoryFieldIndex::find(const vespalib::stringref word) const
+{
+ DictionaryTree::Iterator itr =
+ _dict.find(WordKey(btree::EntryRef()),
+ KeyComp(_wordStore, word));
+ if (itr.valid()) {
+ return _postingListStore.begin(itr.getData());
+ }
+ return PostingList::Iterator();
+}
+
+MemoryFieldIndex::PostingList::ConstIterator
+MemoryFieldIndex::findFrozen(const vespalib::stringref word) const
+{
+ DictionaryTree::ConstIterator itr =
+ _dict.getFrozenView().find(WordKey(btree::EntryRef()),
+ KeyComp(_wordStore, word));
+ if (itr.valid()) {
+ return _postingListStore.beginFrozen(itr.getData());
+ }
+ return PostingList::Iterator();
+}
+
+
+void
+MemoryFieldIndex::compactFeatures(void)
+{
+ std::vector<uint32_t> toHold;
+
+ toHold = _featureStore.startCompact();
+ DictionaryTree::Iterator itr(_dict.begin());
+ uint32_t packedIndex = _fieldId;
+ for (; itr.valid(); ++itr) {
+ PostingListStore::RefType pidx(itr.getData());
+ if (!pidx.valid())
+ continue;
+ uint32_t clusterSize = _postingListStore.getClusterSize(pidx);
+ if (clusterSize == 0) {
+ const PostingList *tree =
+ _postingListStore.getTreeEntry(pidx);
+ PostingList::Iterator
+ it(tree->begin(_postingListStore.getAllocator()));
+ for (; it.valid(); ++it) {
+ btree::EntryRef oldFeatures = it.getData();
+
+ // Filter on which buffers to move features from when
+ // performing incremental compaction.
+
+ btree::EntryRef newFeatures =
+ _featureStore.moveFeatures(packedIndex, oldFeatures);
+
+#if 0
+ LOG(info,
+ "Moved features from 0x%x to 0x%x\n",
+ oldFeatures.ref(), newFeatures.ref());
+#endif
+
+ // Features must be written before reference is updated.
+ std::atomic_thread_fence(std::memory_order_release);
+
+ // Ugly, ugly due to const_cast in iterator
+ it.writeData(newFeatures.ref());
+ }
+ } else {
+ const PostingListKeyDataType *shortArray =
+ _postingListStore.getKeyDataEntry(pidx, clusterSize);
+ const PostingListKeyDataType *ite = shortArray + clusterSize;
+ for (const PostingListKeyDataType *it = shortArray; it < ite;
+ ++it) {
+ btree::EntryRef oldFeatures = it->getData();
+
+ // Filter on which buffers to move features from when
+ // performing incremental compaction.
+
+ btree::EntryRef newFeatures =
+ _featureStore.moveFeatures(packedIndex, oldFeatures);
+
+#if 0
+ LOG(info,
+ "Moved features from 0x%x to 0x%x\n",
+ oldFeatures.ref(), newFeatures.ref());
+#endif
+
+ // Features must be written before reference is updated.
+ std::atomic_thread_fence(std::memory_order_release);
+
+ // Ugly, ugly due to const_cast, but new data is
+ // semantically equal to old data
+ const_cast<PostingListKeyDataType *>(it)->
+ setData(newFeatures.ref());
+ }
+ }
+ }
+ typedef GenerationHandler::generation_t generation_t;
+ _featureStore.finishCompact(toHold);
+ generation_t generation = _generationHandler.getCurrentGeneration();
+ _featureStore.transferHoldLists(generation);
+}
+
+void
+MemoryFieldIndex::dump(search::index::IndexBuilder & indexBuilder)
+{
+ vespalib::stringref word;
+ FeatureStore::DecodeContextCooked decoder(NULL);
+ DocIdAndFeatures features;
+ vespalib::Array<uint32_t, vespalib::DefaultAlloc> wordMap(_numUniqueWords + 1, 0);
+ _featureStore.setupForField(_fieldId, decoder);
+ for (DictionaryTree::Iterator itr = _dict.begin(); itr.valid(); ++itr) {
+ const WordKey & wk = itr.getKey();
+ PostingListStore::RefType plist(itr.getData());
+ word = _wordStore.getWord(wk._wordRef);
+ if (!plist.valid())
+ continue;
+ indexBuilder.startWord(word);
+ uint32_t clusterSize = _postingListStore.getClusterSize(plist);
+ if (clusterSize == 0) {
+ const PostingList *tree =
+ _postingListStore.getTreeEntry(plist);
+ PostingList::Iterator pitr = tree->begin(_postingListStore.getAllocator());
+ assert(pitr.valid());
+ for (; pitr.valid(); ++pitr) {
+ uint32_t docId = pitr.getKey();
+ btree::EntryRef featureRef = pitr.getData();
+ indexBuilder.startDocument(docId);
+ _featureStore.setupForReadFeatures(featureRef, decoder);
+ decoder.readFeatures(features);
+ size_t poff = 0;
+ uint32_t wpIdx = 0u;
+ size_t numElements = features._elements.size();
+ for (size_t i = 0; i < numElements; ++i) {
+ const WordDocElementFeatures & fef = features._elements[i];
+ indexBuilder.startElement(fef.getElementId(), fef.getWeight(), fef.getElementLen());
+ for (size_t j = 0; j < fef.getNumOccs(); ++j, ++wpIdx) {
+ assert(wpIdx == poff + j);
+ indexBuilder.addOcc(features._wordPositions[poff + j]);
+ }
+ poff += fef.getNumOccs();
+ indexBuilder.endElement();
+ }
+ indexBuilder.endDocument();
+ }
+ } else {
+ const PostingListKeyDataType *kd =
+ _postingListStore.getKeyDataEntry(plist, clusterSize);
+ const PostingListKeyDataType *kde = kd + clusterSize;
+ for (; kd != kde; ++kd) {
+ uint32_t docId = kd->_key;
+ btree::EntryRef featureRef = kd->getData();
+ indexBuilder.startDocument(docId);
+ _featureStore.setupForReadFeatures(featureRef, decoder);
+ decoder.readFeatures(features);
+ size_t poff = 0;
+ uint32_t wpIdx = 0u;
+ size_t numElements = features._elements.size();
+ for (size_t i = 0; i < numElements; ++i) {
+ const WordDocElementFeatures & fef = features._elements[i];
+ indexBuilder.startElement(fef.getElementId(), fef.getWeight(), fef.getElementLen());
+ for (size_t j = 0; j < fef.getNumOccs(); ++j, ++wpIdx) {
+ assert(wpIdx == poff + j);
+ indexBuilder.addOcc(features.
+ _wordPositions[poff + j]);
+ }
+ poff += fef.getNumOccs();
+ indexBuilder.endElement();
+ }
+ indexBuilder.endDocument();
+ }
+ }
+ indexBuilder.endWord();
+ }
+}
+
+
+MemoryUsage
+MemoryFieldIndex::getMemoryUsage() const
+{
+ MemoryUsage usage;
+ usage.merge(_wordStore.getMemoryUsage());
+ usage.merge(_dict.getMemoryUsage());
+ usage.merge(_postingListStore.getMemoryUsage());
+ usage.merge(_featureStore.getMemoryUsage());
+ usage.merge(_remover.getStore().getMemoryUsage());
+ return usage;
+}
+
+
+} // namespace search::memoryindex
+
+namespace btree {
+
+template
+class BTreeNodeDataWrap<memoryindex::MemoryFieldIndex::WordKey,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+template
+class BTreeNodeT<memoryindex::MemoryFieldIndex::WordKey,
+ BTreeDefaultTraits::INTERNAL_SLOTS>;
+
+#if 0
+template
+class BTreeNodeT<memoryindex::MemoryFieldIndex::WordKey,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+#endif
+
+template
+class BTreeNodeTT<memoryindex::MemoryFieldIndex::WordKey,
+ EntryRef,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS>;
+
+template
+class BTreeNodeTT<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+template
+class BTreeInternalNode<memoryindex::MemoryFieldIndex::WordKey,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS>;
+
+template
+class BTreeLeafNode<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+template
+class BTreeNodeStore<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+template
+class BTreeIterator<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ const memoryindex::MemoryFieldIndex::KeyComp,
+ BTreeDefaultTraits>;
+
+template
+class BTree<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ const memoryindex::MemoryFieldIndex::KeyComp,
+ BTreeDefaultTraits>;
+
+template
+class BTreeRoot<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ const memoryindex::MemoryFieldIndex::KeyComp,
+ BTreeDefaultTraits>;
+
+template
+class BTreeRootBase<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+template
+class BTreeNodeAllocator<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+
+} // namespace btree
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.h b/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.h
new file mode 100644
index 00000000000..1c16f1746a5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.h
@@ -0,0 +1,283 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "featurestore.h"
+#include "wordstore.h"
+#include "document_remover.h"
+#include <vespa/searchlib/btree/btreeroot.h>
+#include <vespa/searchlib/btree/btree.h>
+#include <vespa/searchlib/btree/btreenodeallocator.h>
+#include <vespa/searchlib/btree/btreestore.h>
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/searchlib/index/indexbuilder.h>
+#include <vespa/searchlib/util/memoryusage.h>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace memoryindex {
+
+class OrderedDocumentInserter;
+/*
+ * Memory index for a single field.
+ */
+class MemoryFieldIndex {
+public:
+ typedef btree::BTreeRoot<uint32_t, uint32_t, search::btree::NoAggregated>
+ PostingList; // docid -> feature ref
+ typedef btree::BTreeStore<uint32_t, uint32_t,
+ search::btree::NoAggregated,
+ std::less<uint32_t>,
+ btree::BTreeDefaultTraits> PostingListStore;
+ typedef PostingListStore::KeyDataType PostingListKeyDataType;
+
+
+ struct WordKey {
+ btree::EntryRef _wordRef;
+
+ explicit WordKey(btree::EntryRef wordRef)
+ : _wordRef(wordRef)
+ {
+ }
+
+ WordKey(void)
+ : _wordRef()
+ {
+ }
+
+ friend vespalib::asciistream &
+ operator<<(vespalib::asciistream & os, const WordKey & rhs)
+ {
+ os << "wr(" << rhs._wordRef.ref() << ")";
+ return os;
+ }
+ };
+
+ class KeyComp {
+ private:
+ const WordStore &_wordStore;
+ const vespalib::stringref _word;
+
+ const char *
+ getWord(btree::EntryRef wordRef) const
+ {
+ if (wordRef.valid()) {
+ return _wordStore.getWord(wordRef);
+ }
+ return _word.c_str();
+ }
+
+ public:
+ KeyComp(const WordStore &wordStore, const vespalib::stringref word)
+ : _wordStore(wordStore),
+ _word(word)
+ {
+ }
+
+ bool
+ operator()(const WordKey & lhs, const WordKey & rhs) const
+ {
+ int cmpres = strcmp(getWord(lhs._wordRef), getWord(rhs._wordRef));
+ return cmpres < 0;
+ }
+ };
+
+ typedef uint32_t PostingListPtr;
+ typedef btree::BTree<WordKey, PostingListPtr,
+ search::btree::NoAggregated,
+ const KeyComp> DictionaryTree;
+private:
+ typedef vespalib::GenerationHandler GenerationHandler;
+
+ WordStore _wordStore;
+ uint64_t _numUniqueWords;
+ GenerationHandler _generationHandler;
+ DictionaryTree _dict;
+ PostingListStore _postingListStore;
+ FeatureStore _featureStore;
+ uint32_t _fieldId;
+ DocumentRemover _remover;
+ std::unique_ptr<OrderedDocumentInserter> _inserter;
+
+public:
+ btree::EntryRef addWord(const vespalib::stringref word) {
+ _numUniqueWords++;
+ return _wordStore.addWord(word);
+ }
+
+ btree::EntryRef
+ addFeatures(const index::DocIdAndFeatures &features)
+ {
+ return _featureStore.addFeatures(_fieldId, features).first;
+ }
+
+ MemoryFieldIndex(const index::Schema &schema, uint32_t fieldId);
+ ~MemoryFieldIndex(void);
+ PostingList::Iterator find(const vespalib::stringref word) const;
+
+ PostingList::ConstIterator
+ findFrozen(const vespalib::stringref word) const;
+
+ uint64_t getNumUniqueWords() const { return _numUniqueWords; }
+ const FeatureStore & getFeatureStore() const { return _featureStore; }
+ const WordStore &getWordStore() const { return _wordStore; }
+ OrderedDocumentInserter &getInserter() const { return *_inserter; }
+
+private:
+ void freeze() {
+ _postingListStore.freeze();
+ _dict.getAllocator().freeze();
+ }
+
+ void
+ trimHoldLists()
+ {
+ GenerationHandler::generation_t usedGen =
+ _generationHandler.getFirstUsedGeneration();
+ _postingListStore.trimHoldLists(usedGen);
+ _dict.getAllocator().trimHoldLists(usedGen);
+ _featureStore.trimHoldLists(usedGen);
+ }
+
+ void
+ transferHoldLists()
+ {
+ GenerationHandler::generation_t generation =
+ _generationHandler.getCurrentGeneration();
+ _postingListStore.transferHoldLists(generation);
+ _dict.getAllocator().transferHoldLists(generation);
+ _featureStore.transferHoldLists(generation);
+ }
+
+ void
+ incGeneration(void)
+ {
+ _generationHandler.incGeneration();
+ }
+
+public:
+ GenerationHandler::Guard takeGenerationGuard() {
+ return _generationHandler.takeGuard();
+ }
+
+ void
+ compactFeatures(void);
+
+ void dump(search::index::IndexBuilder & indexBuilder);
+
+ MemoryUsage getMemoryUsage() const;
+
+ DictionaryTree &
+ getDictionaryTree()
+ {
+ return _dict;
+ }
+
+ PostingListStore &
+ getPostingListStore()
+ {
+ return _postingListStore;
+ }
+
+ DocumentRemover &
+ getDocumentRemover()
+ {
+ return _remover;
+ }
+
+ void commit()
+ {
+ _remover.flush();
+ freeze();
+ transferHoldLists();
+ incGeneration();
+ trimHoldLists();
+ }
+};
+
+} // namespace search::memoryindex
+
+namespace btree {
+
+extern template
+class BTreeNodeDataWrap<memoryindex::MemoryFieldIndex::WordKey,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+extern template
+class BTreeNodeT<memoryindex::MemoryFieldIndex::WordKey,
+ BTreeDefaultTraits::INTERNAL_SLOTS>;
+
+#if 0
+extern template
+class BTreeNodeT<memoryindex::MemoryFieldIndex::WordKey,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+#endif
+
+extern template
+class BTreeNodeTT<memoryindex::MemoryFieldIndex::WordKey,
+ EntryRef,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS>;
+
+extern template
+class BTreeNodeTT<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+extern template
+class BTreeInternalNode<memoryindex::MemoryFieldIndex::WordKey,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS>;
+
+extern template
+class BTreeLeafNode<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+extern template
+class BTreeNodeStore<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+extern template
+class BTreeIterator<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ const memoryindex::MemoryFieldIndex::KeyComp,
+ BTreeDefaultTraits>;
+
+extern template
+class BTree<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ const memoryindex::MemoryFieldIndex::KeyComp,
+ BTreeDefaultTraits>;
+
+extern template
+class BTreeRoot<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ const memoryindex::MemoryFieldIndex::KeyComp,
+ BTreeDefaultTraits>;
+
+extern template
+class BTreeRootBase<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+extern template
+class BTreeNodeAllocator<memoryindex::MemoryFieldIndex::WordKey,
+ memoryindex::MemoryFieldIndex::PostingListPtr,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+} // namespace search::btree
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memoryindex.cpp b/searchlib/src/vespa/searchlib/memoryindex/memoryindex.cpp
new file mode 100644
index 00000000000..90a0957ccab
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/memoryindex.cpp
@@ -0,0 +1,308 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.memoryindex.memoryindex");
+
+#include "memoryindex.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/searchlib/index/indexbuilder.h>
+#include <vespa/searchlib/index/schemautil.h>
+#include <vespa/searchlib/memoryindex/featurestore.h>
+#include <vespa/searchlib/memoryindex/postingiterator.h>
+#include <vespa/searchlib/queryeval/create_blueprint_visitor_helper.h>
+#include <vespa/searchlib/queryeval/split_float.h>
+#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/searchable.h>
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/leaf_blueprints.h>
+#include <vespa/searchlib/queryeval/termasstring.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/common/sequencedtaskexecutor.h>
+
+#include <algorithm>
+
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+
+using document::ArrayFieldValue;
+using document::WeightedSetFieldValue;
+using vespalib::LockGuard;
+using vespalib::GenerationHandler;
+
+namespace search {
+
+using fef::TermFieldMatchDataArray;
+using index::IndexBuilder;
+using index::Schema;
+using index::SchemaUtil;
+using query::NumberTerm;
+using query::LocationTerm;
+using query::Node;
+using query::PredicateQuery;
+using query::PrefixTerm;
+using query::RangeTerm;
+using query::RegExpTerm;
+using query::StringTerm;
+using query::SubstringTerm;
+using query::SuffixTerm;
+using queryeval::SearchIterator;
+using queryeval::Searchable;
+using queryeval::CreateBlueprintVisitorHelper;
+using queryeval::Blueprint;
+using queryeval::BooleanMatchIteratorWrapper;
+using queryeval::EmptyBlueprint;
+using queryeval::FieldSpecBase;
+using queryeval::FieldSpecBaseList;
+using queryeval::FieldSpec;
+using queryeval::IRequestContext;
+
+namespace memoryindex {
+
+MemoryIndex::MemoryIndex(const Schema &schema,
+ ISequencedTaskExecutor &invertThreads,
+ ISequencedTaskExecutor &pushThreads)
+ : _schema(schema),
+ _invertThreads(invertThreads),
+ _pushThreads(pushThreads),
+ _inverter0(_schema, _invertThreads, _pushThreads),
+ _inverter1(_schema, _invertThreads, _pushThreads),
+ _inverter(&_inverter0),
+ _dictionary(_schema),
+ _frozen(false),
+ _maxDocId(0), // docId 0 is reserved
+ _numDocs(0),
+ _lock(),
+ _hiddenFields(schema.getNumIndexFields(), false),
+ _wipeTimeSchema(),
+ _indexedDocs(0),
+ _staticMemoryFootprint(getMemoryUsage().allocatedBytes())
+{
+}
+
+MemoryIndex::~MemoryIndex()
+{
+ _invertThreads.sync();
+ _pushThreads.sync();
+}
+
+void
+MemoryIndex::insertDocument(uint32_t docId, const document::Document &doc)
+{
+ if (_frozen) {
+ LOG(warning, "Memory index frozen: ignoring insert of document '%s'(%u): '%s'",
+ doc.getId().toString().c_str(), docId, doc.toString().c_str());
+ return;
+ }
+ updateMaxDocId(docId);
+ _inverter->invertDocument(docId, doc);
+ if (_indexedDocs.insert(docId).second) {
+ incNumDocs();
+ }
+}
+
+void
+MemoryIndex::removeDocument(uint32_t docId)
+{
+ if (_frozen) {
+ LOG(warning, "Memory index frozen: ignoring remove of document (%u)",
+ docId);
+ return;
+ }
+ _inverter->removeDocument(docId);
+ if (_indexedDocs.find(docId) != _indexedDocs.end()) {
+ _indexedDocs.erase(docId);
+ decNumDocs();
+ }
+}
+
+void
+MemoryIndex::commit(const std::shared_ptr<IDestructorCallback> &onWriteDone)
+{
+ _invertThreads.sync(); // drain inverting into this inverter
+ _pushThreads.sync(); // drain use of other inverter
+ _inverter->pushDocuments(_dictionary, onWriteDone);
+ flipInverter();
+}
+
+
+void
+MemoryIndex::flipInverter()
+{
+ if (_inverter != &_inverter0) {
+ _inverter = &_inverter0;
+ } else {
+ _inverter = &_inverter1;
+ }
+}
+
+void
+MemoryIndex::freeze()
+{
+ _frozen = true;
+}
+
+void
+MemoryIndex::dump(IndexBuilder &indexBuilder)
+{
+ _dictionary.dump(indexBuilder);
+}
+
+namespace {
+
+class MemTermBlueprint : public queryeval::SimpleLeafBlueprint
+{
+private:
+ GenerationHandler::Guard _genGuard;
+ Dictionary::PostingList::ConstIterator _pitr;
+ const FeatureStore &_featureStore;
+ const uint32_t _fieldId;
+ const bool _useBitVector;
+
+public:
+ MemTermBlueprint(GenerationHandler::Guard &&genGuard,
+ Dictionary::PostingList::ConstIterator pitr,
+ const FeatureStore &featureStore,
+ const FieldSpecBase &field,
+ uint32_t fieldId,
+ bool useBitVector)
+ : SimpleLeafBlueprint(field),
+ _genGuard(),
+ _pitr(pitr),
+ _featureStore(featureStore),
+ _fieldId(fieldId),
+ _useBitVector(useBitVector)
+ {
+ _genGuard = std::move(genGuard);
+ HitEstimate estimate(_pitr.size(), !_pitr.valid());
+ setEstimate(estimate);
+ }
+
+ virtual SearchIterator::UP
+ createLeafSearch(const TermFieldMatchDataArray &tfmda, bool) const
+ {
+ SearchIterator::UP search(new PostingIterator(_pitr, _featureStore, _fieldId, tfmda));
+ if (_useBitVector) {
+ LOG(debug, "Return BooleanMatchIteratorWrapper: fieldId(%u), docCount(%zu)",
+ _fieldId, _pitr.size());
+ return SearchIterator::UP(new BooleanMatchIteratorWrapper(std::move(search), tfmda));
+ }
+ LOG(debug, "Return PostingIterator: fieldId(%u), docCount(%zu)",
+ _fieldId, _pitr.size());
+ return search;
+ }
+
+};
+
+/**
+ * Determines the correct Blueprint to use.
+ **/
+class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper
+{
+private:
+ const FieldSpec &_field;
+ const uint32_t _fieldId;
+ Dictionary & _dictionary;
+
+public:
+ CreateBlueprintVisitor(Searchable &searchable,
+ const IRequestContext & requestContext,
+ const FieldSpec &field,
+ uint32_t fieldId,
+ Dictionary &dictionary)
+ : CreateBlueprintVisitorHelper(searchable, field, requestContext),
+ _field(field),
+ _fieldId(fieldId),
+ _dictionary(dictionary) {}
+
+ template <class TermNode>
+ void visitTerm(TermNode &n) {
+ const vespalib::string termStr = queryeval::termAsString(n);
+ LOG(debug, "searching for '%s' in '%s'",
+ termStr.c_str(), _field.getName().c_str());
+ MemoryFieldIndex *fieldIndex = _dictionary.getFieldIndex(_fieldId);
+ GenerationHandler::Guard genGuard = fieldIndex->takeGenerationGuard();
+ Dictionary::PostingList::ConstIterator pitr
+ = fieldIndex->findFrozen(termStr);
+ bool useBitVector = _field.isFilter();
+ setResult(make_UP(new MemTermBlueprint(std::move(genGuard), pitr,
+ fieldIndex->getFeatureStore(),
+ _field, _fieldId, useBitVector)));
+ }
+
+ virtual void visit(LocationTerm &n) { visitTerm(n); }
+ virtual void visit(PrefixTerm &n) { visitTerm(n); }
+ virtual void visit(RangeTerm &n) { visitTerm(n); }
+ virtual void visit(StringTerm &n) { visitTerm(n); }
+ virtual void visit(SubstringTerm &n) { visitTerm(n); }
+ virtual void visit(SuffixTerm &n) { visitTerm(n); }
+ virtual void visit(RegExpTerm &n) { visitTerm(n); }
+ virtual void visit(PredicateQuery &) { }
+
+ virtual void visit(NumberTerm &n) {
+ handleNumberTermAsText(n);
+ }
+
+};
+
+} // namespace search::memoryindex::<unnamed>
+
+Blueprint::UP
+MemoryIndex::createBlueprint(const IRequestContext & requestContext,
+ const FieldSpec &field,
+ const Node &term)
+{
+ uint32_t fieldId = _schema.getIndexFieldId(field.getName());
+ if (fieldId == Schema::UNKNOWN_FIELD_ID || _hiddenFields[fieldId]) {
+ return Blueprint::UP(new EmptyBlueprint(field));
+ }
+ CreateBlueprintVisitor visitor(*this, requestContext, field, fieldId, _dictionary);
+ const_cast<Node &>(term).accept(visitor);
+ return visitor.getResult();
+}
+
+MemoryUsage
+MemoryIndex::getMemoryUsage() const
+{
+ MemoryUsage usage;
+ usage.merge(_dictionary.getMemoryUsage());
+ return usage;
+}
+
+void
+MemoryIndex::wipeHistory(const Schema &schema)
+{
+ LockGuard lock(_lock);
+ if (_wipeTimeSchema.get() == NULL) {
+ Schema::UP newSchema = Schema::intersect(_schema, schema);
+ if (_schema == *newSchema)
+ return;
+ _wipeTimeSchema.reset(newSchema.release());
+ } else {
+ Schema::UP newSchema = Schema::intersect(*_wipeTimeSchema, schema);
+ if (*_wipeTimeSchema == *newSchema)
+ return;
+ _wipeTimeSchema.reset(newSchema.release());
+ }
+ SchemaUtil::IndexIterator i(_schema);
+ for (; i.isValid(); ++i) {
+ uint32_t packedIndex = i.getIndex();
+ assert(packedIndex < _hiddenFields.size());
+ SchemaUtil::IndexIterator wi(*_wipeTimeSchema, i);
+ _hiddenFields[packedIndex] = !wi.isValid();
+ }
+}
+
+Schema::SP
+MemoryIndex::getWipeTimeSchema() const
+{
+ LockGuard lock(_lock);
+ return _wipeTimeSchema;
+}
+
+} // namespace memoryindex
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memoryindex.h b/searchlib/src/vespa/searchlib/memoryindex/memoryindex.h
new file mode 100644
index 00000000000..1a8a993275e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/memoryindex.h
@@ -0,0 +1,184 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "dictionary.h"
+#include "documentinverter.h"
+#include <vespa/document/document.h>
+#include <vespa/searchlib/queryeval/searchable.h>
+#include <string>
+#include <vector>
+#include <map>
+#include <set>
+#include <vespa/vespalib/stllike/hash_set.h>
+
+namespace search {
+
+namespace index { class IndexBuilder; }
+
+class ISequencedTaskExecutor;
+
+namespace memoryindex {
+
+/**
+ * Lock-free implementation of a memory-based index
+ * using the document inverter and dictionary classes from searchlib.
+ **/
+class MemoryIndex : public queryeval::Searchable
+{
+private:
+ index::Schema _schema;
+ ISequencedTaskExecutor &_invertThreads;
+ ISequencedTaskExecutor &_pushThreads;
+ DocumentInverter _inverter0;
+ DocumentInverter _inverter1;
+ DocumentInverter *_inverter;
+ Dictionary _dictionary;
+ bool _frozen;
+ uint32_t _maxDocId;
+ uint32_t _numDocs;
+ vespalib::Lock _lock;
+ std::vector<bool> _hiddenFields;
+ index::Schema::SP _wipeTimeSchema;
+ vespalib::hash_set<uint32_t> _indexedDocs; // documents in memory index
+ const uint64_t _staticMemoryFootprint;
+
+ MemoryIndex(const MemoryIndex &) = delete;
+ MemoryIndex(MemoryIndex &&) = delete;
+ MemoryIndex &operator=(const MemoryIndex &) = delete;
+ MemoryIndex &operator=(MemoryIndex &&) = delete;
+
+ void removeDocumentHelper(uint32_t docId, const document::Document &doc);
+ void updateMaxDocId(uint32_t docId) {
+ if (docId > _maxDocId) {
+ _maxDocId = docId;
+ }
+ }
+ void incNumDocs() {
+ ++_numDocs;
+ }
+ void decNumDocs() {
+ if (_numDocs > 0) {
+ --_numDocs;
+ }
+ }
+
+ void flipInverter();
+
+public:
+ /**
+ * Convenience type defs.
+ */
+ typedef std::unique_ptr<MemoryIndex> UP;
+ typedef std::shared_ptr<MemoryIndex> SP;
+
+ /**
+ * Create a new memory index based on the given schema.
+ *
+ * @param schema the index schema to use
+ **/
+ MemoryIndex(const index::Schema &schema,
+ ISequencedTaskExecutor &invertThreads,
+ ISequencedTaskExecutor &pushThreads);
+
+ /**
+ * Class destructor. Clean up washlist.
+ */
+ ~MemoryIndex();
+
+ /**
+ * Obtain the schema used by this index.
+ *
+ * @return schema used by this index
+ **/
+ const index::Schema &getSchema() const { return _schema; }
+
+ /**
+ * Check if this index is frozen.
+ *
+ * @return true if this index is frozen
+ **/
+ bool isFrozen() const { return _frozen; }
+
+ /**
+ * Insert a document into the index. If the document is already in
+ * the index, the old version will be removed first.
+ *
+ * @param docId local document id.
+ * @param doc the document to insert.
+ **/
+ void insertDocument(uint32_t docId, const document::Document &doc);
+
+ /**
+ * Remove a document from the index.
+ *
+ * @param docId local document id.
+ **/
+ void removeDocument(uint32_t docId);
+
+ /**
+ * Commits the inserts and removes since the last commit, making
+ * them searchable. When commit is completed, onWriteDone goes out
+ * of scope, scheduling completion callback.
+ *
+ * Callers can call pushThreads.sync() to wait for push completion.
+ **/
+ void commit(const std::shared_ptr<IDestructorCallback> &onWriteDone);
+
+ /**
+ * Freeze this index. Further index updates will be
+ * discarded. Extra information kept to wash the posting lists
+ * will be discarded.
+ **/
+ void freeze();
+
+ /**
+ * Dump the contents of this index into the given index builder.
+ *
+ * @param indexBuilder the builder to dump into
+ **/
+ void dump(index::IndexBuilder &indexBuilder);
+
+ // implements Searchable
+ virtual queryeval::Blueprint::UP
+ createBlueprint(const queryeval::IRequestContext & requestContext,
+ const queryeval::FieldSpec &field,
+ const query::Node &term);
+
+ virtual queryeval::Blueprint::UP
+ createBlueprint(const queryeval::IRequestContext & requestContext,
+ const queryeval::FieldSpecList &fields,
+ const query::Node &term) {
+ return queryeval::Searchable::createBlueprint(requestContext, fields, term);
+ }
+
+ virtual uint32_t getDocIdLimit() const {
+ // Used to get docId range.
+ return _maxDocId + 1;
+ }
+
+ virtual uint32_t getNumDocs() const {
+ return _numDocs;
+ }
+
+ virtual uint64_t getNumWords() const {
+ return _dictionary.getNumUniqueWords();
+ }
+
+ void
+ wipeHistory(const index::Schema &schema);
+
+ index::Schema::SP getWipeTimeSchema() const;
+
+ /**
+ * Gets an approximation of how much memory the index uses.
+ *
+ * @return approximately how much memory is used by the index.
+ **/
+ MemoryUsage getMemoryUsage() const;
+
+ uint64_t getStaticMemoryFootprint() const { return _staticMemoryFootprint; }
+};
+
+} // namespace memoryindex
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.cpp b/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.cpp
new file mode 100644
index 00000000000..ca7b83a0781
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.cpp
@@ -0,0 +1,158 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include "ordereddocumentinserter.h"
+#include "i_document_insert_listener.h"
+
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreenodestore.hpp>
+#include <vespa/searchlib/btree/btreestore.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btree.hpp>
+
+LOG_SETUP(".memoryindex.orderedfieldinserter");
+
+namespace search
+{
+
+namespace memoryindex
+{
+
+namespace
+{
+
+const vespalib::string emptyWord = "";
+
+}
+
+
+OrderedDocumentInserter::OrderedDocumentInserter(MemoryFieldIndex &fieldIndex)
+ : _word(),
+ _prevDocId(noDocId),
+ _prevAdd(false),
+ _fieldIndex(fieldIndex),
+ _dItr(_fieldIndex.getDictionaryTree().begin()),
+ _listener(_fieldIndex.getDocumentRemover()),
+ _removes(),
+ _adds()
+{
+}
+
+OrderedDocumentInserter::~OrderedDocumentInserter()
+{
+ flush();
+}
+
+
+void
+OrderedDocumentInserter::flushWord()
+{
+ if (_removes.empty() && _adds.empty()) {
+ return;
+ }
+ //XXX: Feature store leak, removed features not marked dead
+ PostingListStore &postingListStore(_fieldIndex.getPostingListStore());
+ btree::EntryRef pidx(_dItr.getData());
+ postingListStore.apply(pidx,
+ &_adds[0],
+ &_adds[0] + _adds.size(),
+ &_removes[0],
+ &_removes[0] + _removes.size());
+ if (pidx.ref() != _dItr.getData()) {
+ // Before updating ref
+ std::atomic_thread_fence(std::memory_order_release);
+ _dItr.writeData(pidx.ref());
+ }
+ _removes.clear();
+ _adds.clear();
+}
+
+
+void
+OrderedDocumentInserter::flush()
+{
+ flushWord();
+ _listener.flush();
+}
+
+
+void
+OrderedDocumentInserter::setNextWord(const vespalib::stringref word)
+{
+ // TODO: Adjust here if zero length words should be legal.
+ assert(_word < word);
+ _word = word;
+ _prevDocId = noDocId;
+ _prevAdd = false;
+ flushWord();
+ const WordStore &wordStore(_fieldIndex.getWordStore());
+ KeyComp cmp(wordStore, _word);
+ WordKey key;
+ if (_dItr.valid() && cmp(_dItr.getKey(), key)) {
+ _dItr.binarySeek(key, cmp);
+ }
+ if (!_dItr.valid() || cmp(key, _dItr.getKey())) {
+ btree::EntryRef wordRef = _fieldIndex.addWord(_word);
+ WordKey insertKey(wordRef);
+ DictionaryTree &dTree(_fieldIndex.getDictionaryTree());
+ dTree.insert(_dItr, insertKey, btree::EntryRef().ref());
+ }
+ assert(_dItr.valid());
+ assert(_word == wordStore.getWord(_dItr.getKey()._wordRef));
+}
+
+
+void
+OrderedDocumentInserter::add(uint32_t docId,
+ const index::DocIdAndFeatures &features)
+{
+ assert(docId != noDocId);
+ assert(_prevDocId == noDocId || _prevDocId < docId ||
+ (_prevDocId == docId && !_prevAdd));
+ btree::EntryRef featureRef = _fieldIndex.addFeatures(features);
+ _adds.push_back(PostingListKeyDataType(docId, featureRef.ref()));
+ _listener.insert(_dItr.getKey()._wordRef, docId);
+ _prevDocId = docId;
+ _prevAdd = true;
+}
+
+
+void
+OrderedDocumentInserter::remove(uint32_t docId)
+{
+ assert(docId != noDocId);
+ assert(_prevDocId == noDocId || _prevDocId < docId);
+ _removes.push_back(docId);
+ _prevDocId = docId;
+ _prevAdd = false;
+}
+
+
+void
+OrderedDocumentInserter::rewind()
+{
+ assert(_removes.empty() && _adds.empty());
+ _word = "";
+ _prevDocId = noDocId;
+ _prevAdd = false;
+ _dItr.begin();
+}
+
+
+btree::EntryRef
+OrderedDocumentInserter::getWordRef() const
+{
+ return _dItr.getKey()._wordRef;
+}
+
+
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.h b/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.h
new file mode 100644
index 00000000000..f8ec07e305e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.h
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iordereddocumentinserter.h"
+#include "memoryfieldindex.h"
+#include <limits>
+
+namespace search
+{
+
+namespace memoryindex
+{
+
+class IDocumentInsertListener;
+
+
+/**
+ * Class for inserting updates to MemoryFieldIndex in an ordered manner
+ * (single pass scan of dictionary tree)
+ *
+ * Insert order must be properly sorted, by (word, docId)
+ */
+class OrderedDocumentInserter : public IOrderedDocumentInserter
+{
+ vespalib::stringref _word;
+ uint32_t _prevDocId;
+ bool _prevAdd;
+ using DictionaryTree = MemoryFieldIndex::DictionaryTree;
+ using PostingListStore = MemoryFieldIndex::PostingListStore;
+ using KeyComp = MemoryFieldIndex::KeyComp;
+ using WordKey = MemoryFieldIndex::WordKey;
+ using PostingListKeyDataType = MemoryFieldIndex::PostingListKeyDataType;
+ MemoryFieldIndex &_fieldIndex;
+ DictionaryTree::Iterator _dItr;
+ IDocumentInsertListener &_listener;
+
+ // Pending changes to posting list for (_word)
+ std::vector<uint32_t> _removes;
+ std::vector<PostingListKeyDataType> _adds;
+
+
+ static constexpr uint32_t noFieldId = std::numeric_limits<uint32_t>::max();
+ static constexpr uint32_t noDocId = std::numeric_limits<uint32_t>::max();
+
+ /*
+ * Flush pending changes to postinglist for (_word).
+ *
+ * _dItr is located at correct position.
+ */
+ void flushWord();
+
+public:
+ OrderedDocumentInserter(MemoryFieldIndex &fieldIndex);
+ virtual ~OrderedDocumentInserter();
+ virtual void setNextWord(const vespalib::stringref word) override;
+ virtual void add(uint32_t docId,
+ const index::DocIdAndFeatures &features) override;
+ virtual void remove(uint32_t docId) override;
+
+ /*
+ * Flush pending changes to postinglist for (_word). Also flush
+ * insert listener.
+ *
+ * _dItr is located at correct position.
+ */
+ virtual void flush() override;
+
+ /*
+ * Rewind iterator, to start new pass.
+ */
+ virtual void rewind() override;
+
+ // Used by unit test
+ btree::EntryRef getWordRef() const;
+};
+
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/postingiterator.cpp b/searchlib/src/vespa/searchlib/memoryindex/postingiterator.cpp
new file mode 100644
index 00000000000..f03b476ef6e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/postingiterator.cpp
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".memoryindex.postingiterator");
+#include "postingiterator.h"
+
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreenodestore.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+
+namespace search {
+namespace memoryindex {
+
+PostingIterator::PostingIterator(Dictionary::PostingList::ConstIterator itr,
+ const FeatureStore & featureStore,
+ uint32_t packedIndex,
+ const fef::TermFieldMatchDataArray & matchData) :
+ queryeval::RankedSearchIteratorBase(matchData),
+ _itr(itr),
+ _featureStore(featureStore),
+ _featureDecoder(NULL)
+{
+ _featureStore.setupForField(packedIndex, _featureDecoder);
+}
+
+void
+PostingIterator::initRange(uint32_t begin, uint32_t end)
+{
+ SearchIterator::initRange(begin, end);
+ _itr.lower_bound(begin);
+ if (!_itr.valid() || isAtEnd(_itr.getKey())) {
+ setAtEnd();
+ } else {
+ setDocId(_itr.getKey());
+ }
+ clearUnpacked();
+}
+
+void
+PostingIterator::doSeek(uint32_t docId)
+{
+ if (getUnpacked()) {
+ clearUnpacked();
+ }
+ _itr.linearSeek(docId);
+ if (!_itr.valid()) {
+ setAtEnd();
+ } else {
+ setDocId(_itr.getKey());
+ }
+}
+
+void
+PostingIterator::doUnpack(uint32_t docId)
+{
+ if (!_matchData.valid() || getUnpacked()) {
+ return;
+ }
+ assert(docId == getDocId());
+ assert(_itr.valid());
+ assert(docId == _itr.getKey());
+ btree::EntryRef featureRef(_itr.getData());
+ _featureStore.setupForUnpackFeatures(featureRef, _featureDecoder);
+ _featureDecoder.unpackFeatures(_matchData, docId);
+ setUnpacked();
+}
+
+
+} // namespace search::memoryindex
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/postingiterator.h b/searchlib/src/vespa/searchlib/memoryindex/postingiterator.h
new file mode 100644
index 00000000000..8b40984a710
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/postingiterator.h
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "dictionary.h"
+#include <vespa/searchlib/queryeval/iterators.h>
+
+namespace search {
+namespace memoryindex {
+
+/**
+ * Search iterator for memory index posting list.
+ **/
+class PostingIterator : public queryeval::RankedSearchIteratorBase
+{
+private:
+ Dictionary::PostingList::ConstIterator _itr;
+ const FeatureStore &_featureStore;
+ FeatureStore::DecodeContextCooked _featureDecoder;
+
+public:
+ /**
+ * Creates a search iterator for the given posting list iterator.
+ *
+ * @param itr the posting list iterator to base the search iterator upon.
+ * @param featureStore reference to store for features.
+ * @param packedIndex the field or field collection owning features.
+ * @param matchData the match data to unpack features into.
+ **/
+ PostingIterator(Dictionary::PostingList::ConstIterator itr,
+ const FeatureStore &featureStore,
+ uint32_t packedIndex,
+ const fef::TermFieldMatchDataArray &matchData);
+
+ void doSeek(uint32_t docId) override;
+ void doUnpack(uint32_t docId) override;
+ void initRange(uint32_t begin, uint32_t end) override;
+ Trinary is_strict() const override { return Trinary::True; }
+};
+
+} // namespace search::memoryindex
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.cpp
new file mode 100644
index 00000000000..d31a69c9ff6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.cpp
@@ -0,0 +1,384 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".memoryindex.urlfieldinverter");
+#include "urlfieldinverter.h"
+#include "fieldinverter.h"
+#include <vespa/document/datatype/urldatatype.h>
+#include <vespa/searchlib/util/url.h>
+#include <stdexcept>
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/text/lowercase.h>
+#include <vespa/searchlib/common/sort.h>
+
+namespace search
+{
+
+namespace memoryindex
+{
+
+namespace
+{
+static vespalib::string HOSTNAME_BEGIN("StArThOsT");
+static vespalib::string HOSTNAME_END("EnDhOsT");
+const vespalib::string SPANTREE_NAME("linguistics");
+
+static size_t
+lowercaseToken(vespalib::string &dest, const char *src, size_t srcSize)
+{
+ dest.clear();
+ dest.reserve(8 + srcSize);
+
+ vespalib::Utf8Reader r(src, srcSize);
+ vespalib::Utf8Writer w(dest);
+
+ using vespalib::LowerCase;
+
+ while (r.hasMore()) {
+ uint32_t i = r.getChar(vespalib::Utf8::BAD);
+ if (i != vespalib::Utf8::BAD) {
+ w.putChar(LowerCase::convert(i));
+ }
+ }
+ return dest.size();
+}
+
+
+} // namespace
+
+
+using document::ArrayFieldValue;
+using document::DataType;
+using document::FieldValue;
+using document::IntFieldValue;
+using document::SpanTree;
+using document::StringFieldValue;
+using document::StructFieldValue;
+using document::UrlDataType;
+using document::WeightedSetFieldValue;
+using search::index::Schema;
+using search::util::URL;
+using vespalib::make_string;
+
+
+void
+UrlFieldInverter::startDoc(uint32_t docId)
+{
+ _all->startDoc(docId);
+ _scheme->startDoc(docId);
+ _host->startDoc(docId);
+ _port->startDoc(docId);
+ _path->startDoc(docId);
+ _query->startDoc(docId);
+ _fragment->startDoc(docId);
+ _hostname->startDoc(docId);
+}
+
+
+void
+UrlFieldInverter::endDoc()
+{
+ _all->endDoc();
+ _scheme->endDoc();
+ _host->endDoc();
+ _port->endDoc();
+ _path->endDoc();
+ _query->endDoc();
+ _fragment->endDoc();
+ _hostname->endDoc();
+}
+
+
+void
+UrlFieldInverter::startElement(int32_t weight)
+{
+ _all->startElement(weight);
+ _scheme->startElement(weight);
+ _host->startElement(weight);
+ _port->startElement(weight);
+ _path->startElement(weight);
+ _query->startElement(weight);
+ _fragment->startElement(weight);
+ _hostname->startElement(weight);
+}
+
+
+void
+UrlFieldInverter::endElement()
+{
+ _all->endElement();
+ _scheme->endElement();
+ _host->endElement();
+ _port->endElement();
+ _path->endElement();
+ _query->endElement();
+ _fragment->endElement();
+ _hostname->endElement();
+}
+
+
+void
+UrlFieldInverter::processUrlSubField(FieldInverter *inverter,
+ const StructFieldValue &field,
+ vespalib::stringref subField,
+ bool addAnchors)
+{
+ const FieldValue::UP sfv = field.getValue(subField);
+ if (!sfv)
+ return;
+ if (!sfv->inherits(IDENTIFIABLE_CLASSID(StringFieldValue))) {
+ LOG(error,
+ "Illegal field type %s for URL subfield %s, expected string",
+ sfv->getDataType()->getName().c_str(),
+ subField.c_str());
+ return;
+ }
+ const StringFieldValue &value = static_cast<const StringFieldValue &>(*sfv);
+ if (addAnchors) {
+ inverter->addWord(HOSTNAME_BEGIN);
+ }
+ inverter->processAnnotations(value);
+ if (addAnchors) {
+ inverter->addWord(HOSTNAME_END);
+ }
+}
+
+
+void
+UrlFieldInverter::processAnnotatedUrlField(const StructFieldValue & field)
+{
+ processUrlSubField(_all, field, UrlDataType::FIELD_ALL, false);
+ processUrlSubField(_scheme, field, UrlDataType::FIELD_SCHEME, false);
+ processUrlSubField(_host, field, UrlDataType::FIELD_HOST, false);
+ processUrlSubField(_port, field, UrlDataType::FIELD_PORT, false);
+ processUrlSubField(_path, field, UrlDataType::FIELD_PATH, false);
+ processUrlSubField(_query, field, UrlDataType::FIELD_QUERY, false);
+ processUrlSubField(_fragment, field, UrlDataType::FIELD_FRAGMENT, false);
+ processUrlSubField(_hostname, field, UrlDataType::FIELD_HOST, true);
+}
+
+
+void
+UrlFieldInverter::processUrlField(const FieldValue &url_field)
+{
+ if (url_field.inherits(IDENTIFIABLE_CLASSID(StringFieldValue))) {
+ const vespalib::string &url_str =
+ static_cast<const StringFieldValue &>(url_field).getValue();
+ processUrlOldStyle(url_str);
+ return;
+ }
+ assert(url_field.getClass().id() == StructFieldValue::classId);
+ const StructFieldValue &field =
+ static_cast<const StructFieldValue &>(url_field);
+
+ const FieldValue::UP all_val = field.getValue("all");
+ if (all_val.get() == NULL) {
+ if (_useAnnotations) {
+ // New style, use annotations
+ processAnnotatedUrlField(field);
+ }
+ return;
+ }
+
+ if (!all_val->inherits(IDENTIFIABLE_CLASSID(StringFieldValue))) {
+ LOG(error,
+ "Illegal field type %s for URL subfield all, expected string",
+ all_val->getDataType()->getName().c_str());
+ return;
+ }
+ const StringFieldValue &all_sfv =
+ static_cast<const StringFieldValue &>(*all_val);
+ if (_useAnnotations) {
+ StringFieldValue::SpanTrees trees = all_sfv.getSpanTrees();
+ const SpanTree *tree = StringFieldValue::findTree(trees, SPANTREE_NAME);
+ if (tree != NULL) {
+ // New style, use annotations
+ processAnnotatedUrlField(field);
+ return;
+ }
+ }
+
+ if (_useAnnotations) {
+ return;
+ }
+
+ // Old style, tokenize in backend
+ const vespalib::string &s = all_sfv.getValue();
+ processUrlOldStyle(s);
+}
+
+void UrlFieldInverter::processUrlOldStyle(const vespalib::string &s) {
+ URL url(reinterpret_cast<const unsigned char *>(s.data()), s.size());
+
+ _hostname->addWord(HOSTNAME_BEGIN);
+
+ vespalib::string lowToken;
+ const unsigned char *t;
+ URL::URL_CONTEXT url_context;
+ while ((t = url.GetToken(url_context))) {
+ const char *token = reinterpret_cast<const char *>(t);
+ size_t tokenLen = strlen(token);
+ tokenLen = lowercaseToken(lowToken, token, tokenLen);
+ token = lowToken.c_str();
+ vespalib::stringref tokenRef(token, tokenLen);
+ switch (url_context) {
+ case URL::URL_SCHEME:
+ _scheme->addWord(tokenRef);
+ _all->addWord(tokenRef);
+ break;
+ case URL::URL_HOST:
+ case URL::URL_DOMAIN:
+ case URL::URL_MAINTLD:
+ _host->addWord(tokenRef);
+ _hostname->addWord(tokenRef);
+ _all->addWord(tokenRef);
+ break;
+ case URL::URL_PORT:
+ if (strcmp(token, "80") && strcmp(token, "443")) {
+ _port->addWord(tokenRef);
+ _all->addWord(tokenRef);
+ }
+ break;
+ case URL::URL_PATH:
+ case URL::URL_FILENAME:
+ case URL::URL_EXTENSION:
+ case URL::URL_PARAMS:
+ _path->addWord(tokenRef);
+ _all->addWord(tokenRef);
+ break;
+ case URL::URL_QUERY:
+ _query->addWord(tokenRef);
+ _all->addWord(tokenRef);
+ break;
+ case URL::URL_FRAGMENT:
+ _fragment->addWord(tokenRef);
+ _all->addWord(tokenRef);
+ break;
+ case URL::URL_ADDRESS:
+ _all->addWord(tokenRef);
+ break;
+ default:
+ LOG(warning, "Ignoring unknown Uri token '%s'.", token);
+ }
+ }
+ _hostname->addWord(HOSTNAME_END);
+}
+
+
+void
+UrlFieldInverter::processArrayUrlField(const ArrayFieldValue &field)
+{
+ for (uint32_t el(0), ele(field.size());el < ele; ++el) {
+ const FieldValue &element = field[el];
+ startElement(1);
+ processUrlField(element);
+ endElement();
+ }
+}
+
+
+void
+UrlFieldInverter::processWeightedSetUrlField(const WeightedSetFieldValue &field)
+{
+ for (const auto & el : field) {
+ const FieldValue &key = *el.first;
+ const FieldValue &xweight = *el.second;
+ assert(xweight.getClass().id() == IntFieldValue::classId);
+ int32_t weight = xweight.getAsInt();
+ startElement(weight);
+ processUrlField(key);
+ endElement();
+ }
+}
+
+namespace {
+bool isUriType(const DataType &type) {
+ return type == UrlDataType::getInstance()
+ || type == *DataType::STRING
+ || type == *DataType::URI;
+}
+} // namespace
+
+
+void
+UrlFieldInverter::invertUrlField(const FieldValue &val)
+{
+ const vespalib::Identifiable::RuntimeClass & cInfo(val.getClass());
+ switch (_collectionType) {
+ case Schema::SINGLE:
+ if (isUriType(*val.getDataType())) {
+ startElement(1);
+ processUrlField(val);
+ endElement();
+ } else {
+ throw std::runtime_error(make_string("Expected URI struct, got '%s'", val.getDataType()->getName().c_str()));
+ }
+ break;
+ case Schema::WEIGHTEDSET:
+ if (cInfo.id() == WeightedSetFieldValue::classId) {
+ const WeightedSetFieldValue &wset = static_cast<const WeightedSetFieldValue &>(val);
+ if (isUriType(wset.getNestedType())) {
+ processWeightedSetUrlField(wset);
+ } else {
+ throw std::runtime_error(make_string("Expected wset of URI struct, got '%s'", wset.getNestedType().getName().c_str()));
+ }
+ } else {
+ throw std::runtime_error(make_string("Expected weighted set, got '%s'", cInfo.name()));
+ }
+ break;
+ case Schema::ARRAY:
+ if (cInfo.id() == ArrayFieldValue::classId) {
+ const ArrayFieldValue &arr = static_cast<const ArrayFieldValue&>(val);
+ if (isUriType(arr.getNestedType())) {
+ processArrayUrlField(arr);
+ } else {
+ throw std::runtime_error(make_string("Expected array of URI struct, got '%s' (%s)", arr.getNestedType().getName().c_str(), arr.getNestedType().toString(true).c_str()));
+ }
+ } else {
+ throw std::runtime_error(make_string("Expected Array, got '%s'", cInfo.name()));
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+void
+UrlFieldInverter::invertField(uint32_t docId, const FieldValue::UP &val)
+{
+ startDoc(docId);
+ if (val) {
+ invertUrlField(*val);
+ }
+ endDoc();
+}
+
+
+UrlFieldInverter::UrlFieldInverter(index::Schema::CollectionType collectionType,
+ FieldInverter *all,
+ FieldInverter *scheme,
+ FieldInverter *host,
+ FieldInverter *port,
+ FieldInverter *path,
+ FieldInverter *query,
+ FieldInverter *fragment,
+ FieldInverter *hostname)
+ : _all(all),
+ _scheme(scheme),
+ _host(host),
+ _port(port),
+ _path(path),
+ _query(query),
+ _fragment(fragment),
+ _hostname(hostname),
+ _useAnnotations(false),
+ _collectionType(collectionType)
+{
+}
+
+
+} // namespace memoryindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.h b/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.h
new file mode 100644
index 00000000000..107000cb775
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.h
@@ -0,0 +1,79 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/index/doctypebuilder.h>
+
+namespace search
+{
+
+namespace memoryindex
+{
+
+class FieldInverter;
+
+class UrlFieldInverter
+{
+ FieldInverter *_all;
+ FieldInverter *_scheme;
+ FieldInverter *_host;
+ FieldInverter *_port;
+ FieldInverter *_path;
+ FieldInverter *_query;
+ FieldInverter *_fragment;
+ FieldInverter *_hostname;
+
+ bool _useAnnotations;
+ index::Schema::CollectionType _collectionType;
+
+public:
+ using UriField = index::DocTypeBuilder::UriField;
+
+private:
+ void startDoc(uint32_t docId);
+
+ void endDoc();
+
+ void startElement(int32_t weight);
+
+ void endElement();
+
+ void
+ processUrlSubField(FieldInverter *inverter,
+ const document::StructFieldValue &field,
+ vespalib::stringref subField,
+ bool addAnchors);
+
+ void processAnnotatedUrlField(const document::StructFieldValue &field);
+
+ void processUrlField(const document::FieldValue &url_field);
+
+ void processUrlOldStyle(const vespalib::string &s);
+
+ void processArrayUrlField(const document::ArrayFieldValue &field);
+
+ void processWeightedSetUrlField(const document::WeightedSetFieldValue &field);
+
+ void invertUrlField(const document::FieldValue &field);
+public:
+ UrlFieldInverter(index::Schema::CollectionType collectionType,
+ FieldInverter *all,
+ FieldInverter *scheme,
+ FieldInverter *host,
+ FieldInverter *port,
+ FieldInverter *path,
+ FieldInverter *query,
+ FieldInverter *fragment,
+ FieldInverter *hostname);
+
+ void invertField(uint32_t docId, const document::FieldValue::UP &field);
+
+ void setUseAnnotations(bool useAnnotations) {
+ _useAnnotations = useAnnotations;
+ }
+};
+
+
+} // namespace memoryindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/memoryindex/wordstore.cpp b/searchlib/src/vespa/searchlib/memoryindex/wordstore.cpp
new file mode 100644
index 00000000000..a0df99f62be
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/wordstore.cpp
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".memoryindex.wordstore");
+#include "wordstore.h"
+#include <vespa/searchlib/btree/datastore.hpp>
+
+namespace search {
+namespace memoryindex {
+
+constexpr size_t MIN_CLUSTERS = 1024;
+
+WordStore::WordStore()
+ : _store(),
+ _numWords(0),
+ _type(RefType::align(1),
+ MIN_CLUSTERS,
+ RefType::offsetSize() / RefType::align(1)),
+ _typeId(0)
+{
+ _store.addType(&_type);
+ _store.initActiveBuffers();
+}
+
+
+WordStore::~WordStore(void)
+{
+ _store.dropBuffers();
+}
+
+btree::EntryRef
+WordStore::addWord(const vespalib::stringref word)
+{
+ _store.ensureBufferCapacity(_typeId, RefType::align(word.size() + 1));
+ uint32_t activeBufferId = _store.getActiveBufferId(_typeId);
+ btree::BufferState &state = _store.getBufferState(activeBufferId);
+ size_t oldSize = state.size();
+ RefType ref(oldSize, activeBufferId);
+ assert(oldSize == ref.offset());
+ char *be = _store.getBufferEntry<char>(activeBufferId, oldSize);
+ for (size_t i = 0; i < word.size(); ++i) {
+ *be++ = word[i];
+ }
+ *be++ = 0;
+ state.pushed_back(word.size() + 1);
+ size_t pad = RefType::pad(state.size());
+ for (size_t i = 0; i < pad; ++i) {
+ *be++ = 0;
+ }
+ state.pushed_back(pad);
+ ++_numWords;
+ return ref;
+}
+
+
+} // namespace search::memoryindex
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/memoryindex/wordstore.h b/searchlib/src/vespa/searchlib/memoryindex/wordstore.h
new file mode 100644
index 00000000000..ad5b5020759
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/wordstore.h
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/btree/datastore.h>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace memoryindex {
+
+class WordStore
+{
+public:
+ typedef btree::DataStoreT<btree::AlignedEntryRefT<22, 2> > DataStoreType;
+ typedef DataStoreType::RefType RefType;
+
+private:
+ DataStoreType _store;
+ uint32_t _numWords;
+ btree::BufferType<char> _type;
+ const uint32_t _typeId;
+
+public:
+ WordStore();
+ ~WordStore();
+ btree::EntryRef addWord(const vespalib::stringref word);
+ const char * getWord(btree::EntryRef ref) const
+ {
+ RefType internalRef(ref);
+ return _store.getBufferEntry<char>(internalRef.bufferId(),
+ internalRef.offset());
+ }
+
+ MemoryUsage getMemoryUsage() const {
+ return _store.getMemoryUsage();
+ }
+};
+
+} // namespace search::memoryindex
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/parsequery/.gitignore b/searchlib/src/vespa/searchlib/parsequery/.gitignore
new file mode 100644
index 00000000000..ee8938b6bf4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/parsequery/.gitignore
@@ -0,0 +1,6 @@
+*.So
+*.exe
+*.ilk
+*.pdb
+.depend*
+Makefile
diff --git a/searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt b/searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt
new file mode 100644
index 00000000000..3d0ca9697a9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_parsequery OBJECT
+ SOURCES
+ parse.cpp
+ simplequerystack.cpp
+ stackdumpiterator.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/parsequery/OWNERS b/searchlib/src/vespa/searchlib/parsequery/OWNERS
new file mode 100644
index 00000000000..1037590124e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/parsequery/OWNERS
@@ -0,0 +1 @@
+balder
diff --git a/searchlib/src/vespa/searchlib/parsequery/parse.cpp b/searchlib/src/vespa/searchlib/parsequery/parse.cpp
new file mode 100644
index 00000000000..e071b5728e9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/parsequery/parse.cpp
@@ -0,0 +1,239 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * Creation date: 2000-05-15
+ *
+ * Implementation of ParseItem
+ *
+ * Copyright (C) 1997-2003 Fast Search & Transfer ASA
+ * Copyright (C) 2003 Overture Services Norway AS
+ * ALL RIGHTS RESERVED
+ */
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("");
+
+#include <vespa/searchlib/parsequery/parse.h>
+#include <vespa/vespalib/objects/nbostream.h>
+
+namespace search {
+
+#define PARSEITEM_DEFAULT_CONSTRUCTOR_LIST \
+ _next(NULL), \
+ _sibling(NULL), \
+ _weight(100), \
+ _uniqueId(0), \
+ _arg1(0), \
+ _arg2(0), \
+ _arg3(0), \
+ _type(ITEM_UNDEF), \
+ _flags(0), \
+ _arity(0), \
+ _indexName(), \
+ _term()
+
+
+ParseItem::ParseItem(ItemType type, int arity)
+ : PARSEITEM_DEFAULT_CONSTRUCTOR_LIST
+{
+ assert(type==ITEM_OR || type==ITEM_WEAK_AND || type==ITEM_EQUIV ||
+ type==ITEM_AND || type==ITEM_NOT || type==ITEM_RANK ||
+ type==ITEM_PHRASE || type==ITEM_ANY || type==ITEM_NEAR || type==ITEM_ONEAR);
+ SetType(type);
+ _arity = arity;
+}
+
+ParseItem::ParseItem(ItemType type, int arity, const char *idx)
+ : PARSEITEM_DEFAULT_CONSTRUCTOR_LIST
+{
+ assert(type == ITEM_PHRASE || type==ITEM_WEIGHTED_SET || type==ITEM_DOT_PRODUCT || type==ITEM_WAND);
+ SetType(type);
+ _arity = arity;
+ SetIndex(idx);
+}
+
+namespace {
+
+void assert_type(ParseItem::ItemType type)
+{
+ assert(type == ParseItem::ITEM_TERM ||
+ type == ParseItem::ITEM_NUMTERM ||
+ type == ParseItem::ITEM_PREFIXTERM ||
+ type == ParseItem::ITEM_SUBSTRINGTERM ||
+ type == ParseItem::ITEM_SUFFIXTERM ||
+ type == ParseItem::ITEM_PURE_WEIGHTED_STRING ||
+ type == ParseItem::ITEM_PURE_WEIGHTED_LONG ||
+ type == ParseItem::ITEM_EXACTSTRINGTERM ||
+ type == ParseItem::ITEM_PREDICATE_QUERY);
+ (void) type;
+}
+
+}
+
+ParseItem::ParseItem(ItemType type, const vespalib::stringref & idx, const char *term)
+ : PARSEITEM_DEFAULT_CONSTRUCTOR_LIST
+{
+ assert_type(type);
+ SetType(type);
+ SetIndex(idx.c_str());
+ SetTerm(term);
+}
+
+ParseItem::ParseItem(ItemType type, const char *term)
+ : PARSEITEM_DEFAULT_CONSTRUCTOR_LIST
+{
+ assert_type(type);
+ SetType(type);
+ SetTerm(term);
+}
+
+ParseItem::~ParseItem(void)
+{
+ delete _next;
+ delete _sibling;
+}
+
+void
+ParseItem::AppendBuffer(RawBuf *buf) const
+{
+ // Calculate the length of the buffer.
+ uint32_t indexLen = _indexName.size();
+ uint32_t termLen = _term.size();
+
+ // Put the values into the buffer.
+ buf->append(_type);
+ if (Feature_Weight()) { // this item has weight
+ buf->appendCompressedNumber(_weight.percent());
+ }
+ if (feature_UniqueId()) {
+ buf->appendCompressedPositiveNumber(_uniqueId);
+ }
+ if (feature_Flags()) {
+ buf->append(_flags);
+ }
+ switch (Type()) {
+ case ITEM_OR:
+ case ITEM_EQUIV:
+ case ITEM_AND:
+ case ITEM_NOT:
+ case ITEM_RANK:
+ case ITEM_ANY:
+ buf->appendCompressedPositiveNumber(_arity);
+ break;
+ case ITEM_WEAK_AND:
+ case ITEM_NEAR:
+ case ITEM_ONEAR:
+ buf->appendCompressedPositiveNumber(_arity);
+ buf->appendCompressedPositiveNumber(_arg1);
+ if (Type() == ITEM_WEAK_AND) {
+ buf->appendCompressedPositiveNumber(indexLen);
+ if (indexLen != 0) {
+ buf->append(_indexName.c_str(), indexLen);
+ }
+ }
+ break;
+ case ITEM_WEIGHTED_SET:
+ case ITEM_DOT_PRODUCT:
+ case ITEM_WAND:
+ case ITEM_PHRASE:
+ buf->appendCompressedPositiveNumber(_arity);
+ buf->appendCompressedPositiveNumber(indexLen);
+ if (indexLen != 0) {
+ buf->append(_indexName.c_str(), indexLen);
+ }
+ if (Type() == ITEM_WAND) {
+ buf->appendCompressedPositiveNumber(_arg1); // targetNumHits
+ double nboVal = vespalib::nbostream::n2h(_arg2);
+ buf->append(&nboVal, sizeof(nboVal)); // scoreThreshold
+ nboVal = vespalib::nbostream::n2h(_arg3);
+ buf->append(&nboVal, sizeof(nboVal)); // thresholdBoostFactor
+ }
+ break;
+ case ITEM_TERM:
+ case ITEM_NUMTERM:
+ case ITEM_PREFIXTERM:
+ case ITEM_SUBSTRINGTERM:
+ case ITEM_EXACTSTRINGTERM:
+ case ITEM_SUFFIXTERM:
+ case ITEM_REGEXP:
+ buf->appendCompressedPositiveNumber(indexLen);
+ if (indexLen != 0) {
+ buf->append(_indexName.c_str(), indexLen);
+ }
+ buf->appendCompressedPositiveNumber(termLen);
+ if (termLen != 0) {
+ buf->append(_term.c_str(), termLen);
+ }
+ break;
+ case ITEM_UNDEF:
+ default:
+ break;
+ }
+}
+
+size_t
+ParseItem::GetBufferLen(void) const
+{
+ // Calculate the length of the buffer.
+ uint32_t indexLen = _indexName.size();
+ uint32_t termLen = _term.size();
+
+ uint32_t len = sizeof(uint8_t); // type field
+ if (Feature_Weight()) {
+ len += sizeof(uint32_t);
+ }
+ if (feature_UniqueId()) {
+ len += sizeof(uint32_t);
+ }
+ if (feature_Flags()) {
+ len += sizeof(uint8_t);
+ }
+
+ // Put the values into the buffer.
+ switch (Type()) {
+ case ITEM_OR:
+ case ITEM_EQUIV:
+ case ITEM_AND:
+ case ITEM_NOT:
+ case ITEM_RANK:
+ case ITEM_ANY:
+ len += sizeof(uint32_t);
+ break;
+ case ITEM_NEAR:
+ case ITEM_ONEAR:
+ len += sizeof(uint32_t) * 2;
+ break;
+ case ITEM_WEAK_AND:
+ len += sizeof(uint32_t) * 3 + indexLen;
+ break;
+ case ITEM_WEIGHTED_SET:
+ case ITEM_DOT_PRODUCT:
+ case ITEM_PHRASE:
+ len += sizeof(uint32_t) * 2 + indexLen;
+ break;
+ case ITEM_WAND:
+ len += sizeof(uint32_t) * 4 + indexLen;
+ break;
+ case ITEM_TERM:
+ case ITEM_NUMTERM:
+ case ITEM_PREFIXTERM:
+ case ITEM_SUBSTRINGTERM:
+ case ITEM_EXACTSTRINGTERM:
+ case ITEM_SUFFIXTERM:
+ case ITEM_REGEXP:
+ len += sizeof(uint32_t) * 2 + indexLen + termLen;
+ break;
+ case ITEM_PURE_WEIGHTED_STRING:
+ len += sizeof(uint32_t) + termLen;
+ break;
+ case ITEM_PURE_WEIGHTED_LONG:
+ len += sizeof(uint64_t);
+ break;
+ case ITEM_UNDEF:
+ default:
+ break;
+ }
+ return len;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/parsequery/parse.h b/searchlib/src/vespa/searchlib/parsequery/parse.h
new file mode 100644
index 00000000000..889cc52f31c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/parsequery/parse.h
@@ -0,0 +1,232 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * Creation date: 2000-05-15
+ *
+ * Declaration of ParseItem class.
+ *
+ * Copyright (C) 1997-2003 Fast Search & Transfer ASA
+ * Copyright (C) 2003 Overture Services Norway AS
+ * ALL RIGHTS RESERVED
+ */
+#pragma once
+
+#include <vespa/searchlib/query/tree/predicate_query_term.h>
+#include <vespa/searchlib/query/weight.h>
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+
+/**
+ * An item on the simple query stack.
+ *
+ * An object of this class represents a single item
+ * on the simple query stack. It has a type, which corresponds
+ * to the different query stack execution operations. It also
+ * provides an arity, and the string values indexName and term, to
+ * accomodate the different needs of the operations.
+ * It also includes a mechanism for making singly linked lists
+ * with sub-lists. This is used during the parsing, and also
+ * when constructing the simple query stack.
+ */
+class ParseItem
+{
+private:
+ ParseItem(const ParseItem &);
+ ParseItem& operator=(const ParseItem &);
+public:
+ /** Pointer to next item in a linked list. */
+ ParseItem *_next;
+ /** Pointer to first item in a sublist. */
+ ParseItem *_sibling;
+
+ /** The type of the item is from this set of values.
+ It is important that these defines match those in prelude/source/com/yahoo/prelude/query/Item.java */
+ enum ItemType {
+ ITEM_OR = 0,
+ ITEM_AND = 1,
+ ITEM_NOT = 2,
+ ITEM_RANK = 3,
+ ITEM_TERM = 4,
+ ITEM_NUMTERM = 5,
+ ITEM_PHRASE = 6,
+ ITEM_PAREN = 7,
+ ITEM_PREFIXTERM = 8,
+ ITEM_SUBSTRINGTERM = 9,
+ ITEM_ANY = 10,
+ ITEM_NEAR = 11,
+ ITEM_ONEAR = 12,
+ ITEM_SUFFIXTERM = 13,
+ ITEM_EQUIV = 14,
+ ITEM_WEIGHTED_SET = 15,
+ ITEM_WEAK_AND = 16,
+ ITEM_EXACTSTRINGTERM = 17,
+ UNUSED_LEGACY_ITEM_RISE_QUERY = 18,
+ ITEM_PURE_WEIGHTED_STRING = 19,
+ ITEM_PURE_WEIGHTED_LONG = 20,
+ ITEM_DOT_PRODUCT = 21,
+ ITEM_WAND = 22,
+ ITEM_PREDICATE_QUERY = 23,
+ ITEM_REGEXP = 24,
+ ITEM_WORD_ALTERNATIVES = 25,
+ ITEM_MAX = 26, // Indicates how long tables must be.
+ ITEM_UNDEF = 31,
+ };
+
+ /** A tag identifying the origin of this query node.
+ * Note that descendants may origin from elsewhere.
+ * If changes necessary:
+ * NB! Append at end of list - corresponding type
+ * used in Juniper and updates of these two types must be synchronized.
+ * (juniper/src/query.h)
+ */
+ enum ItemCreator {
+ CREA_ORIG = 0, // Original user query
+ CREA_FILTER // Automatically applied filter (no specific type)
+ };
+
+ enum ItemFeatures {
+ IF_MASK = 0xE0, // mask for item features
+ IF_WEIGHT = 0x20, // item has rank weight
+ IF_UNIQUEID = 0x40, // item has unique id
+ IF_FLAGS = 0x80, // item has extra flags
+ IF_SUPPORTED_MASK = 0xE0 // mask for supported item features
+ };
+
+ enum ItemFlags {
+ IFLAG_NORANK = 0x00000001, // this term should not be ranked (not exposed to rank framework)
+ IFLAG_SPECIALTOKEN = 0x00000002,
+ IFLAG_NOPOSITIONDATA = 0x00000004, // we should not use position data when ranking this term
+ IFLAG_FILTER = 0x00000008
+ };
+
+private:
+ query::Weight _weight;
+ uint32_t _uniqueId;
+ uint32_t _arg1;
+ double _arg2;
+ double _arg3;
+ uint8_t _type;
+ uint8_t _flags;
+
+public:
+ /** Extra information on each item (creator id) coded in bits 12-19 of _type */
+ static inline ItemCreator GetCreator(uint8_t type) { return static_cast<ItemCreator>((type >> 3) & 0x01); }
+ /** The old item type now uses only the lower 12 bits in a backward compatible way) */
+ static inline ItemType GetType(uint8_t type) { return static_cast<ItemType>(type & 0x1F); }
+ inline ItemType Type() const { return GetType(_type); }
+
+ static inline bool GetFeature(uint8_t type, uint8_t feature)
+ { return ((type & feature) != 0); }
+
+ static inline bool GetFeature_Weight(uint8_t type)
+ { return GetFeature(type, IF_WEIGHT); }
+
+ static inline bool getFeature_UniqueId(uint8_t type)
+ { return GetFeature(type, IF_UNIQUEID); }
+
+ static inline bool getFeature_Flags(uint8_t type)
+ { return GetFeature(type, IF_FLAGS); }
+
+ inline bool Feature(uint8_t feature) const
+ { return GetFeature(_type, feature); }
+
+ inline bool Feature_Weight() const
+ { return GetFeature_Weight(_type); }
+
+ inline bool feature_UniqueId() const
+ { return getFeature_UniqueId(_type); }
+
+ inline bool feature_Flags() const
+ { return getFeature_Flags(_type); }
+
+ static inline bool getFlag(uint8_t flags, uint8_t flag)
+ { return ((flags & flag) != 0); }
+
+ /** The number of operands for the operation. */
+ uint32_t _arity;
+ /** The name of the specified index, or NULL if no index. */
+ vespalib::string _indexName;
+ /** The specified search term. */
+ vespalib::string _term;
+
+/**
+ * Overloaded constructor for ParseItem. Used primarily for
+ * the operators, or pharse without indexName.
+ *
+ * @param type The type of the ParseItem.
+ * @param arity The arity of the operation indicated by the ParseItem.
+ */
+ ParseItem(ItemType type, int arity);
+
+/**
+ * Overloaded constructor for ParseItem. Used for PHRASEs.
+ *
+ * @param type The type of the ParseItem.
+ * @param arity The arity of the operation indicated by the ParseItem.
+ * @param idx The name of the index of the ParseItem.
+ */
+ ParseItem(ItemType type, int arity, const char *index);
+
+/**
+ * Overloaded constructor for ParseItem. Used for TERMs.
+ *
+ * @param type The type of the ParseItem.
+ * @param idx The name of the index of the ParseItem.
+ * @param term The actual term string of the ParseItem.
+ */
+ ParseItem(ItemType type, const vespalib::stringref & index, const char *term);
+
+/**
+ * Overloaded constructor for ParseItem. Used for TERMs without index.
+ *
+ * @param type The type of the ParseItem.
+ * @param term The actual term string of the ParseItem.
+ */
+ ParseItem(ItemType type, const char *term);
+
+/**
+ * Destructor for ParseItem.
+ */
+ ~ParseItem();
+
+/**
+ * Set the value of the _term field.
+ * @param term The string to set the _term field to.
+ */
+ void SetTerm(const char *term) { _term = term; }
+
+/**
+ * Set the value of the _indexName field.
+ * @param idx The string to set the _indexName field to.
+ */
+ void SetIndex(const char *index) { _indexName = index; }
+
+ /**
+ * Set the type of the operator. Use this with caution,
+ * as this changes the semantics of the item.
+ *
+ * @param type The new type.
+ */
+ void SetType(ItemType type) {
+ _type = (_type & ~0x1F) | type;
+ }
+
+ /**
+ * Get the unique id for this item.
+ *
+ * @return unique id for this item
+ **/
+ uint32_t getUniqueId() const { return _uniqueId; }
+
+ /**
+ * Encode the item in a binary buffer.
+ * @param buf Pointer to a buffer containing the encoded contents.
+ */
+ void AppendBuffer(RawBuf *buf) const;
+
+ size_t GetBufferLen(void) const;
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/parsequery/simplequerystack.cpp b/searchlib/src/vespa/searchlib/parsequery/simplequerystack.cpp
new file mode 100644
index 00000000000..146b4aeeff4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/parsequery/simplequerystack.cpp
@@ -0,0 +1,354 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * Creation date: 2000-05-15
+ * Implementation of the simple query stack.
+ *
+ * Copyright (C) 1997-2003 Fast Search & Transfer ASA
+ * Copyright (C) 2003 Overture Services Norway AS
+ * ALL RIGHTS RESERVED
+ */
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/util/vstringfmt.h>
+#include <vespa/vespalib/util/compress.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/searchlib/parsequery/simplequerystack.h>
+
+using vespalib::make_vespa_string;
+
+namespace search {
+
+SimpleQueryStack::SimpleQueryStack(void)
+ : _numItems(0),
+ _stack(NULL),
+ _FP_queryOK(true)
+{
+}
+
+SimpleQueryStack::~SimpleQueryStack(void)
+{
+ delete _stack;
+}
+
+void
+SimpleQueryStack::Push(search::ParseItem *item)
+{
+ // Check if query OK for FirstPage
+ _FP_queryOK &=
+ ( item->Type() != search::ParseItem::ITEM_UNDEF
+ && item->Type() != search::ParseItem::ITEM_PAREN
+ );
+
+
+ item->_next = _stack;
+ _stack = item;
+
+ _numItems++;
+}
+
+search::ParseItem *
+SimpleQueryStack::Pop(void)
+{
+ search::ParseItem *item = _stack;
+ if (_stack != NULL) {
+ _numItems--;
+ _stack = _stack->_next;
+ item->_next = NULL;
+ }
+ return item;
+}
+
+void
+SimpleQueryStack::AppendBuffer(search::RawBuf *buf) const
+{
+ for (search::ParseItem *item = _stack; item != NULL; item = item->_next) {
+ item->AppendBuffer(buf);
+ }
+}
+
+size_t
+SimpleQueryStack::GetBufferLen(void) const
+{
+ size_t result;
+
+ result = 0;
+ for (const search::ParseItem *item = _stack;
+ item != NULL; item = item->_next) {
+ result += item->GetBufferLen();
+ }
+
+ return result;
+}
+
+uint32_t
+SimpleQueryStack::GetSize(void)
+{
+ return _numItems;
+}
+
+bool
+SimpleQueryStack::_FP_isAllowed(void)
+{
+ return _FP_queryOK;
+}
+
+class ItemName {
+public:
+ ItemName() {
+ memset(_name, 'X', sizeof(_name));
+ _name[search::ParseItem::ITEM_OR] = '|';
+ _name[search::ParseItem::ITEM_WEAK_AND] = 'w';
+ _name[search::ParseItem::ITEM_EQUIV] = 'E';
+ _name[search::ParseItem::ITEM_AND] = '&';
+ _name[search::ParseItem::ITEM_NOT] = '-';
+ _name[search::ParseItem::ITEM_ANY] = '?';
+ _name[search::ParseItem::ITEM_RANK] = '%';
+ _name[search::ParseItem::ITEM_NEAR] = 'N';
+ _name[search::ParseItem::ITEM_ONEAR] = 'O';
+ _name[search::ParseItem::ITEM_NUMTERM] = '#';
+ _name[search::ParseItem::ITEM_TERM] = 't';
+ _name[search::ParseItem::ITEM_PURE_WEIGHTED_STRING] = 'T';
+ _name[search::ParseItem::ITEM_PURE_WEIGHTED_LONG] = 'L';
+ _name[search::ParseItem::ITEM_PREFIXTERM] = '*';
+ _name[search::ParseItem::ITEM_SUBSTRINGTERM] = 's';
+ _name[search::ParseItem::ITEM_EXACTSTRINGTERM] = 'e';
+ _name[search::ParseItem::ITEM_SUFFIXTERM] = 'S';
+ _name[search::ParseItem::ITEM_PHRASE] = '"';
+ _name[search::ParseItem::ITEM_WEIGHTED_SET] = 'W';
+ _name[search::ParseItem::ITEM_DOT_PRODUCT] = 'D';
+ _name[search::ParseItem::ITEM_WAND] = 'A';
+ _name[search::ParseItem::ITEM_PREDICATE_QUERY] = 'P';
+ _name[search::ParseItem::ITEM_REGEXP] = '^';
+ }
+ char operator[] (search::ParseItem::ItemType i) const { return _name[i]; }
+ char operator[] (size_t i) const { return _name[i]; }
+private:
+ char _name[search::ParseItem::ITEM_MAX];
+};
+
+static ItemName _G_ItemName;
+
+vespalib::string
+SimpleQueryStack::StackbufToString(const vespalib::stringref &theBuf)
+{
+ vespalib::string result;
+
+ /*
+ * This is a slightly bogus estimate of the size required. It should
+ * be enough in most cases, but it is possible to break it in rare and
+ * artificial circumstances.
+ *
+ * The simple operators use 8 bytes in the buffer.
+ * The string representation has 3 overhead chars, leaving 5 chars
+ * for the printed representation of the arity, i.e. < 10^5.
+ *
+ * The phrase operator uses 12 bytes + the length of the index string.
+ * The string representation has 5 overhead chars, leaving 7 chars
+ * for the total printed representation of the length of the index.
+ * If the index is 0, then the arity may use 6 chars, i.e. < 10^6.
+ *
+ * The term operator uses 12 bytes + the length of the index and term string.
+ * The string representation has 6 overhead chars, leaving 6 chars
+ * for the total printed representation of the index and term lengths.
+ * If for instance the index is 0, then the term must be shorter
+ * than 10^5 characters.
+ */
+
+ uint8_t rawtype = 0;
+ uint32_t type = 0, arity = 0, arg1 = 0;
+ const char *idxRef;
+ const char *termRef;
+ uint32_t idxRefLen;
+ uint32_t termRefLen;
+
+ const char *p = theBuf.begin();
+ const char *ep = theBuf.end();
+ uint64_t tmp(0);
+ uint8_t flags(0);
+ while (p < ep) {
+ vespalib::string metaStr;
+ rawtype = *p++;
+ type = search::ParseItem::GetType(rawtype);
+ if (search::ParseItem::GetFeature_Weight(rawtype)) {
+ int64_t tmpLong(0);
+ p += vespalib::compress::Integer::decompress(tmpLong, p);
+ metaStr.append("(w:");
+ metaStr.append(make_vespa_string("%ld", tmpLong));
+ metaStr.append(")");
+ }
+ if (search::ParseItem::getFeature_UniqueId(rawtype)) {
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ metaStr.append("(u:");
+ metaStr.append(make_vespa_string("%ld", tmp));
+ metaStr.append(")");
+ }
+ if (search::ParseItem::getFeature_Flags(rawtype)) {
+ flags = *p++;
+ metaStr.append("(f:");
+ metaStr.append(make_vespa_string("%d", flags));
+ metaStr.append(")");
+ }
+ if (search::ParseItem::GetCreator(flags) != search::ParseItem::CREA_ORIG) {
+ metaStr.append("(c:");
+ metaStr.append(make_vespa_string("%d", search::ParseItem::GetCreator(flags)));
+ metaStr.append(")");
+ }
+
+ metaStr.append('/');
+ result.append(metaStr);
+
+ switch (type) {
+ case search::ParseItem::ITEM_OR:
+ case search::ParseItem::ITEM_AND:
+ case search::ParseItem::ITEM_EQUIV:
+ case search::ParseItem::ITEM_NOT:
+ case search::ParseItem::ITEM_RANK:
+ case search::ParseItem::ITEM_ANY:
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ arity = tmp;
+ result.append(make_vespa_string("%c/%d~", _G_ItemName[type], arity));
+ break;
+ case search::ParseItem::ITEM_WEAK_AND:
+ case search::ParseItem::ITEM_NEAR:
+ case search::ParseItem::ITEM_ONEAR:
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ arity = tmp;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ arg1 = tmp;
+ if (type == search::ParseItem::ITEM_WEAK_AND) {
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ idxRefLen = tmp;
+ idxRef = p;
+ p += idxRefLen;
+ result.append(make_vespa_string("%c/%d/%d/%d:%.*s~", _G_ItemName[type], arity, arg1, idxRefLen, idxRefLen, idxRef));
+ } else {
+ result.append(make_vespa_string("%c/%d/%d~", _G_ItemName[type], arity, arg1));
+ }
+ break;
+
+ case search::ParseItem::ITEM_NUMTERM:
+ case search::ParseItem::ITEM_TERM:
+ case search::ParseItem::ITEM_PREFIXTERM:
+ case search::ParseItem::ITEM_SUBSTRINGTERM:
+ case search::ParseItem::ITEM_EXACTSTRINGTERM:
+ case search::ParseItem::ITEM_SUFFIXTERM:
+ case search::ParseItem::ITEM_REGEXP:
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ idxRefLen = tmp;
+ idxRef = p;
+ p += idxRefLen;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ termRefLen = tmp;
+ termRef = p;
+ p += termRefLen;
+ result.append(make_vespa_string("%c/%d:%.*s/%d:%.*s~", _G_ItemName[type],
+ idxRefLen, idxRefLen, idxRef,
+ termRefLen, termRefLen, termRef));
+ break;
+ case search::ParseItem::ITEM_PURE_WEIGHTED_STRING:
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ termRefLen = tmp;
+ termRef = p;
+ p += termRefLen;
+ result.append(make_vespa_string("%c/%d:%.*s~", _G_ItemName[type],
+ termRefLen, termRefLen, termRef));
+ break;
+
+ case search::ParseItem::ITEM_PURE_WEIGHTED_LONG:
+ tmp = vespalib::nbostream::n2h(*reinterpret_cast<const uint64_t *>(p));
+ p += sizeof(uint64_t);
+ result.append(make_vespa_string("%c/%lu", _G_ItemName[type], tmp));
+ break;
+
+ case search::ParseItem::ITEM_PHRASE:
+ case search::ParseItem::ITEM_WEIGHTED_SET:
+ case search::ParseItem::ITEM_DOT_PRODUCT:
+ case search::ParseItem::ITEM_WAND:
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ arity = tmp;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ idxRefLen = tmp;
+ idxRef = p;
+ p += idxRefLen;
+ if (type == search::ParseItem::ITEM_WAND) {
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ uint32_t targetNumHits = tmp;
+ double scoreThreshold = vespalib::nbostream::n2h(*reinterpret_cast<const double *>(p));
+ p += sizeof(double);
+ double thresholdBoostFactor = vespalib::nbostream::n2h(*reinterpret_cast<const double *>(p)); // thresholdBoostFactor
+ p += sizeof(double);
+ result.append(make_vespa_string("%c/%d/%d:%.*s(%u,%f,%f)~", _G_ItemName[type], arity, idxRefLen,
+ idxRefLen, idxRef, targetNumHits, scoreThreshold, thresholdBoostFactor));
+ } else {
+ result.append(make_vespa_string("%c/%d/%d:%.*s~", _G_ItemName[type], arity, idxRefLen,
+ idxRefLen, idxRef));
+ }
+ break;
+
+ case search::ParseItem::ITEM_PREDICATE_QUERY:
+ {
+ idxRefLen = static_cast<uint32_t>(ReadCompressedPositiveInt(p));
+ idxRef = p;
+ p += idxRefLen;
+ size_t feature_count = ReadCompressedPositiveInt(p);
+ result.append(make_vespa_string(
+ "%c/%d:%.*s/%zu(", _G_ItemName[type], idxRefLen, idxRefLen, idxRef, feature_count));
+ for (size_t i = 0; i < feature_count; ++i) {
+ vespalib::string key = ReadString(p);
+ vespalib::string value = ReadString(p);
+ uint64_t sub_queries = ReadUint64(p);
+ result.append(make_vespa_string("%s:%s:%" PRIx64, key.c_str(), value.c_str(), sub_queries));
+ if (i < feature_count - 1) {
+ result.append(',');
+ }
+ }
+
+ size_t range_feature_count = ReadCompressedPositiveInt(p);
+ result.append(make_vespa_string(")/%zu(", range_feature_count));
+ for (size_t i = 0; i < range_feature_count; ++i) {
+ vespalib::string key = ReadString(p);
+ uint64_t value = ReadUint64(p);
+ uint64_t sub_queries = ReadUint64(p);
+ result.append(make_vespa_string("%s:%" PRIu64 ":%" PRIx64, key.c_str(), value, sub_queries));
+ if (i < range_feature_count - 1) {
+ result.append(',');
+ }
+ }
+ result.append(")~");
+ break;
+ }
+
+ default:
+ abort();
+ }
+ }
+ return result;
+}
+
+vespalib::string
+SimpleQueryStack::ReadString(const char *&p)
+{
+ uint64_t tmp;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ vespalib::string s(p, tmp);
+ p += s.size();
+ return s;
+}
+
+uint64_t
+SimpleQueryStack::ReadUint64(const char *&p)
+{
+ uint64_t l = static_cast<uint64_t>(vespalib::nbostream::n2h(*(const uint64_t *)p));
+ p += sizeof(uint64_t);
+ return l;
+}
+
+uint64_t
+SimpleQueryStack::ReadCompressedPositiveInt(const char *&p)
+{
+ uint64_t tmp;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ return tmp;
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/parsequery/simplequerystack.h b/searchlib/src/vespa/searchlib/parsequery/simplequerystack.h
new file mode 100644
index 00000000000..fd6bced2704
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/parsequery/simplequerystack.h
@@ -0,0 +1,108 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * Creation date: 2000-05-15
+ *
+ * Declaration of the SimpleQueryStack
+ *
+ * Copyright (C) 1997-2003 Fast Search & Transfer ASA
+ * Copyright (C) 2003 Overture Services Norway AS
+ * ALL RIGHTS RESERVED
+ */
+#pragma once
+
+#include <vespa/searchlib/parsequery/parse.h>
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+
+/**
+ * A stack of ParseItems.
+ *
+ * A simple stack consisting of a list of ParseItems.
+ * It is able to generate a binary encoding of itself
+ * to a search::RawBuf.
+ */
+class SimpleQueryStack
+{
+
+private:
+ SimpleQueryStack(const SimpleQueryStack &);
+ SimpleQueryStack& operator=(const SimpleQueryStack &);
+
+ static vespalib::string ReadString(const char *&p);
+ static uint64_t ReadUint64(const char *&p);
+ static uint64_t ReadCompressedPositiveInt(const char *&p);
+
+ /** The number of items on the stack. */
+ uint32_t _numItems;
+
+ /** The top of the stack.
+ * Warning: FastQT_ProximityEmul currently assumes this is the head
+ * of a singly linked list (linked with _next).
+ */
+ search::ParseItem *_stack;
+
+ /** Is this query OK for FirstPage? */
+ bool _FP_queryOK;
+
+public:
+ /**
+ * Constructor for SimpleQueryStack.
+ */
+ SimpleQueryStack(void);
+ /**
+ * Destructor for SimpleQueryStack.
+ */
+ ~SimpleQueryStack(void);
+ /**
+ * Push an item on the stack.
+ * @param item The search::ParseItem to push.
+ */
+ void Push(search::ParseItem *item);
+ /**
+ * Pop an item of the stack.
+ * @return Pointer to the search::ParseItem poped, or NULL if stack is empty.
+ */
+ search::ParseItem *Pop(void);
+ /**
+ * Top node of the stack.
+ * @return Pointer to the top search::ParseItem, or NULL if stack is empty.
+ */
+ search::ParseItem *Top(void) { return _stack; }
+
+ /**
+ * Encode the contents of the stack in a binary buffer.
+ * @param buf Pointer to a buffer containing the encoded contents.
+ */
+ void AppendBuffer(search::RawBuf *buf) const;
+
+ size_t GetBufferLen(void) const;
+ /**
+ * Return the number of items on the stack.
+ * @return The number of items on the stack.
+ */
+ uint32_t GetSize(void);
+ /**
+ * Set the number of items on the stack.
+ * This can be used by QTs that change the stack
+ * under the hood. Use with care!
+ * @param numItems The number of items on the stack.
+ */
+ void SetSize(uint32_t numItems) { _numItems = numItems; }
+
+ /**
+ * Is it possible to run this query on FirstPage?
+ * @return true if ok
+ */
+ bool _FP_isAllowed(void);
+ /**
+ * Make a string representation of the search::RawBuf representing a querystack.
+ * @param theBuf The querystack encoded buffer.
+ * @return a fresh string
+ */
+ static vespalib::string StackbufToString(const vespalib::stringref &theBuf);
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp
new file mode 100644
index 00000000000..dac07640bd7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp
@@ -0,0 +1,297 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * Implementation of the simple query stack dump iterator.
+ *
+ * Copyright (C) 1997-2003 Fast Search & Transfer ASA
+ * Copyright (C) 2003 Overture Services Norway AS
+ * ALL RIGHTS RESERVED
+ */
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/parsequery/stackdumpiterator.h>
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/vespalib/util/compress.h>
+#include <vespa/vespalib/objects/nbostream.h>
+
+using search::query::PredicateQueryTerm;
+
+namespace search {
+
+SimpleQueryStackDumpIterator::SimpleQueryStackDumpIterator(const vespalib::stringref &buf) :
+ _buf(buf.begin()),
+ _bufEnd(buf.end()),
+ _bufLen(buf.size()),
+ _currPos(_buf),
+ _currEnd(_buf),
+ _currType(ParseItem::ITEM_UNDEF),
+ _currCreator(ParseItem::CREA_ORIG),
+ _currWeight(100),
+ _currUniqueId(0),
+ _currFlags(0),
+ _currArity(0),
+ _currArg1(0),
+ _currArg2(0),
+ _currArg3(0),
+ _predicate_query_term(),
+ _currIndexName(NULL),
+ _currIndexNameLen(0),
+ _currTerm(NULL),
+ _currTermLen(0),
+ _generatedTerm(),
+ _currNum(-1)
+{
+}
+
+SimpleQueryStackDumpIterator::~SimpleQueryStackDumpIterator()
+{
+}
+
+vespalib::string SimpleQueryStackDumpIterator::readString(const char *&p) {
+ if (p >= _bufEnd) throw false;
+ uint64_t tmp;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ vespalib::string s(p, tmp);
+ p += s.size();
+ return s;
+}
+
+uint64_t SimpleQueryStackDumpIterator::readUint64(const char *&p) {
+ if (p + sizeof(uint64_t) > _bufEnd) throw false;
+ uint64_t l = vespalib::nbostream::n2h(*(const uint64_t *)p);
+ p += sizeof(uint64_t);
+ return l;
+}
+
+uint64_t
+SimpleQueryStackDumpIterator::readCompressedPositiveInt(const char *&p) {
+ if (p >= _bufEnd) throw false;
+ uint64_t tmp;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ return tmp;
+}
+
+bool
+SimpleQueryStackDumpIterator::next()
+{
+ if (_currEnd >= _bufEnd)
+ // End of buffer, so no more items available
+ return false;
+
+ // Set the position to the previous end. If just starting, sets pos to _buf
+ _currPos = _currEnd;
+
+ // Find an item at the current position
+ const char *p = _currPos;
+ uint8_t typefield = *p++;
+ _currType = ParseItem::GetType(typefield);
+
+ uint64_t tmp(0);
+ if (ParseItem::GetFeature_Weight(typefield)) {
+ long tmpLong;
+ if (p >= _bufEnd) return false;
+ p += vespalib::compress::Integer::decompress(tmpLong, p);
+ _currWeight.setPercent(tmpLong);
+ if (p > _bufEnd) return false;
+ } else {
+ _currWeight.setPercent(100);
+ }
+ if (ParseItem::getFeature_UniqueId(typefield)) {
+ if (p >= _bufEnd) return false;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ _currUniqueId = tmp;
+ } else {
+ _currUniqueId = 0;
+ }
+ if (ParseItem::getFeature_Flags(typefield)) {
+ if ((p + sizeof(uint32_t)) > _bufEnd) {
+ return false;
+ }
+ _currFlags = (uint8_t)*p++;
+ } else {
+ _currFlags = 0;
+ }
+ _currCreator = ParseItem::GetCreator(_currFlags);
+
+ switch (_currType) {
+ case ParseItem::ITEM_OR:
+ case ParseItem::ITEM_EQUIV:
+ case ParseItem::ITEM_AND:
+ case ParseItem::ITEM_NOT:
+ case ParseItem::ITEM_RANK:
+ case ParseItem::ITEM_ANY:
+ if (p >= _bufEnd) return false;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ _currArity = tmp;
+ if (p > _bufEnd) return false;
+ _currArg1 = 0;
+ _currIndexName = NULL;
+ _currIndexNameLen = 0;
+ _currTerm = NULL;
+ _currTermLen = 0;
+ break;
+
+ case ParseItem::ITEM_NEAR:
+ case ParseItem::ITEM_ONEAR:
+ if (p >= _bufEnd) return false;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ _currArity = tmp;
+ if (p > _bufEnd) return false;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ _currArg1 = tmp;
+ if (p > _bufEnd) return false;
+ _currIndexName = NULL;
+ _currIndexNameLen = 0;
+ _currTerm = NULL;
+ _currTermLen = 0;
+ break;
+
+ case ParseItem::ITEM_WEAK_AND:
+ if (p >= _bufEnd) return false;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ _currArity = tmp;
+ if (p > _bufEnd) return false;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ _currArg1 = tmp;
+ if (p > _bufEnd) return false;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ _currIndexNameLen = tmp;
+ if (p > _bufEnd) return false;
+ _currIndexName = p;
+ p += _currIndexNameLen;
+ if (p > _bufEnd) return false;
+ _currTerm = NULL;
+ _currTermLen = 0;
+ break;
+
+ case ParseItem::ITEM_PURE_WEIGHTED_STRING:
+ if (p >= _bufEnd) return false;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ _currTermLen = tmp;
+ if (p > _bufEnd) return false;
+ _currTerm = p;
+ p += _currTermLen;
+ if (p > _bufEnd) return false;
+
+ _currArg1 = 0;
+ _currArity = 0;
+ break;
+ case ParseItem::ITEM_PURE_WEIGHTED_LONG:
+ if (p + 8 > _bufEnd) return false;
+ _generatedTerm.clear();
+ _generatedTerm << vespalib::nbostream::n2h(*(const uint64_t *)p);
+ _currTerm = _generatedTerm.c_str();
+ _currTermLen = _generatedTerm.size();
+ p += 8;
+ if (p > _bufEnd) return false;
+
+ _currArg1 = 0;
+ _currArity = 0;
+ break;
+ case ParseItem::ITEM_WORD_ALTERNATIVES:
+ try {
+ _currIndexNameLen = readCompressedPositiveInt(p);
+ _currIndexName = p;
+ p += _currIndexNameLen;
+ _currArity = readCompressedPositiveInt(p);
+ _currTerm = NULL;
+ _currTermLen = 0;
+ if (p > _bufEnd) return false;
+ } catch (...) {
+ return false;
+ }
+ break;
+ case ParseItem::ITEM_NUMTERM:
+ case ParseItem::ITEM_TERM:
+ case ParseItem::ITEM_PREFIXTERM:
+ case ParseItem::ITEM_SUBSTRINGTERM:
+ case ParseItem::ITEM_EXACTSTRINGTERM:
+ case ParseItem::ITEM_SUFFIXTERM:
+ case ParseItem::ITEM_REGEXP:
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ _currIndexNameLen = tmp;
+ if (p > _bufEnd) return false;
+ _currIndexName = p;
+ p += _currIndexNameLen;
+ if (p > _bufEnd) return false;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ _currTermLen = tmp;
+ if (p > _bufEnd) return false;
+ _currTerm = p;
+ p += _currTermLen;
+ if (p > _bufEnd) return false;
+
+ _currArg1 = 0;
+ _currArity = 0;
+ break;
+ case ParseItem::ITEM_PREDICATE_QUERY:
+ try {
+ if (p >= _bufEnd) return false;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ _currIndexNameLen = tmp;
+ if (p > _bufEnd) return false;
+ _currIndexName = p;
+ p += _currIndexNameLen;
+ _predicate_query_term.reset(new PredicateQueryTerm);
+
+ size_t count = readCompressedPositiveInt(p);
+ for (size_t i = 0; i < count; ++i) {
+ vespalib::string key = readString(p);
+ vespalib::string value = readString(p);
+ uint64_t sub_queries = readUint64(p);
+ _predicate_query_term->addFeature(key, value, sub_queries);
+ }
+ count = readCompressedPositiveInt(p);
+ for (size_t i = 0; i < count; ++i) {
+ vespalib::string key = readString(p);
+ uint64_t value = readUint64(p);
+ uint64_t sub_queries = readUint64(p);
+ _predicate_query_term->addRangeFeature(
+ key, value, sub_queries);
+ }
+ if (p > _bufEnd) return false;
+ } catch (...) {
+ return false;
+ }
+ break;
+
+ case ParseItem::ITEM_WEIGHTED_SET:
+ case ParseItem::ITEM_DOT_PRODUCT:
+ case ParseItem::ITEM_WAND:
+ case ParseItem::ITEM_PHRASE:
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ _currArity = tmp;
+ if (p > _bufEnd) return false;
+ p += vespalib::compress::Integer::decompressPositive(tmp, p);
+ _currIndexNameLen = tmp;
+ if (p > _bufEnd) return false;
+ _currIndexName = p;
+ p += _currIndexNameLen;
+ if (p > _bufEnd) return false;
+ if (_currType == ParseItem::ITEM_WAND) {
+ p += vespalib::compress::Integer::decompressPositive(tmp, p); // targetNumHits
+ _currArg1 = tmp;
+ _currArg2 = vespalib::nbostream::n2h(*reinterpret_cast<const double *>(p)); // scoreThreshold
+ p += sizeof(double);
+ _currArg3 = vespalib::nbostream::n2h(*reinterpret_cast<const double *>(p)); // thresholdBoostFactor
+ p += sizeof(double);
+ } else {
+ _currArg1 = 0;
+ }
+ _currTerm = NULL;
+ _currTermLen = 0;
+ break;
+
+ default:
+ // Unknown item, so report that no more are available
+ return false;
+ break;
+ }
+ _currNum++;
+ _currEnd = p;
+
+ // We should not have passed the buffer
+ assert(_currEnd <= _bufEnd);
+
+ return true;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h
new file mode 100644
index 00000000000..451ea226d86
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h
@@ -0,0 +1,165 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * Declaration of the SimpleQueryStack dump iterator
+ *
+ * Copyright (C) 1997-2003 Fast Search & Transfer ASA
+ * Copyright (C) 2003 Overture Services Norway AS
+ * ALL RIGHTS RESERVED
+ */
+#pragma once
+
+#include <vespa/searchlib/parsequery/parse.h>
+#include <vespa/searchlib/query/tree/predicate_query_term.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+/**
+ * An iterator to be used on a buffer that is a stack dump
+ * of a SimpleQueryStack.
+ */
+class SimpleQueryStackDumpIterator
+{
+private:
+ SimpleQueryStackDumpIterator(const SimpleQueryStackDumpIterator &);
+ SimpleQueryStackDumpIterator& operator=(const SimpleQueryStackDumpIterator &);
+
+ /** Pointer to the start of the input buffer */
+ const char *_buf;
+ /** Pointer to just past the input buffer */
+ const char *_bufEnd;
+ /** Total length of the input buffer */
+ size_t _bufLen;
+
+ /** Pointer to the position of the current item in the buffer */
+ const char *_currPos;
+ /** Pointer to after the current item */
+ const char *_currEnd;
+ /** The type of the current item */
+ ParseItem::ItemType _currType;
+ ParseItem::ItemCreator _currCreator;
+ /** Rank weight of current item **/
+ query::Weight _currWeight;
+ /** unique id of the current item **/
+ uint32_t _currUniqueId;
+
+ /** flags of the current item **/
+ uint32_t _currFlags;
+
+ /** The arity of the current item */
+ uint32_t _currArity;
+ /** The first argument of the current item (length of NEAR/ONEAR area for example) */
+ uint32_t _currArg1;
+ /** The second argument of the current item (score threshold of WAND for example) */
+ double _currArg2;
+ /** The third argument of the current item (threshold boost factor of WAND for example) */
+ double _currArg3;
+ /** The predicate query specification */
+ query::PredicateQueryTerm::UP _predicate_query_term;
+ /** Pointer to the position of the index name in the current item */
+ const char *_currIndexName;
+ /** The length of the index name in the current item */
+ size_t _currIndexNameLen;
+ /** Pointer to the position of the term in the current item */
+ const char *_currTerm;
+ /** The length of the term in the current item */
+ size_t _currTermLen;
+ vespalib::asciistream _generatedTerm;
+
+ /** The number of the current item */
+ int _currNum;
+
+ vespalib::string readString(const char *&p);
+ uint64_t readUint64(const char *&p);
+ uint64_t readCompressedPositiveInt(const char *&p);
+
+public:
+ /**
+ * Make an iterator on a buffer. To get the first item, next
+ * must be called.
+ *
+ * @param buf A pointer to the buffer holding the stackdump
+ * @param buflen The length of the buffer in bytes
+ */
+ SimpleQueryStackDumpIterator(const vespalib::stringref &buf);
+ ~SimpleQueryStackDumpIterator();
+
+ /**
+ * Moves to the next item in the buffer.
+ *
+ * @return true if there is a new item, false if there are no more items
+ * or if there was errors in extracting the next item.
+ */
+ bool next(void);
+
+ /**
+ * Get the number of the current item.
+ *
+ * @return The ordinal of the current item. -1 if at the start.
+ */
+ int getNum(void) const { return _currNum; }
+
+ /**
+ * Get the type of the current item.
+ * @return the type.
+ */
+ ParseItem::ItemType getType(void) const { return _currType; }
+ /**
+ * Get the type of the current item.
+ * @return the type.
+ */
+ ParseItem::ItemCreator getCreator(void) const { return _currCreator; }
+
+ /**
+ * Get the rank weight of the current item.
+ *
+ * @return rank weight.
+ **/
+ query::Weight GetWeight() const { return _currWeight; }
+
+ /**
+ * Get the unique id of the current item.
+ *
+ * @return unique id of current item
+ **/
+ uint32_t getUniqueId() const { return _currUniqueId; }
+
+ /**
+ * Get the term index of the current item.
+ *
+ * @return term index of current item
+ **/
+ uint32_t getTermIndex() const { return -1; }
+
+ /**
+ * Get the flags of the current item.
+ *
+ * @return flags of current item
+ **/
+ uint32_t getFlags() const { return _currFlags; }
+
+ uint32_t getArity(void) const { return _currArity; }
+
+ uint32_t getArg1(void) const { return _currArg1; }
+
+ double getArg2() const { return _currArg2; }
+
+ double getArg3() const { return _currArg3; }
+
+ query::PredicateQueryTerm::UP getPredicateQueryTerm()
+ { return std::move(_predicate_query_term); }
+
+ /**
+ * Get the type of the current item.
+ * @return the type.
+ */
+ void getIndexName(const char **buf, size_t *buflen) const { *buf = _currIndexName; *buflen = _currIndexNameLen; }
+ /**
+ * Get the type of the current item.
+ * @return the type.
+ */
+ void getTerm(const char **buf, size_t *buflen) const { *buf = _currTerm; *buflen = _currTermLen; }
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/predicate/CMakeLists.txt b/searchlib/src/vespa/searchlib/predicate/CMakeLists.txt
new file mode 100644
index 00000000000..51465893356
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/CMakeLists.txt
@@ -0,0 +1,14 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_predicate OBJECT
+ SOURCES
+ document_features_store.cpp
+ predicate_index.cpp
+ predicate_interval.cpp
+ predicate_interval_store.cpp
+ predicate_range_expander.cpp
+ predicate_tree_analyzer.cpp
+ predicate_tree_annotator.cpp
+ predicate_zero_constraint_posting_list.cpp
+ simple_index.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/predicate/OWNERS b/searchlib/src/vespa/searchlib/predicate/OWNERS
new file mode 100644
index 00000000000..569bf1cc3a1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/OWNERS
@@ -0,0 +1 @@
+bjorncs
diff --git a/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp b/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp
new file mode 100644
index 00000000000..db5f1611d0a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp
@@ -0,0 +1,293 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/log/log.h>
+LOG_SETUP(".document_features_store");
+#include <vespa/fastos/fastos.h>
+
+#include "document_features_store.h"
+#include "predicate_index.h"
+#include "predicate_range_expander.h"
+#include "predicate_tree_annotator.h"
+#include <vespa/searchlib/btree/btreenode.h>
+#include <vespa/vespalib/data/databuffer.h>
+
+#include <unordered_map>
+#include <vector>
+
+using search::btree::BTreeNoLeafData;
+using search::btree::EntryRef;
+using vespalib::MMapDataBuffer;
+using vespalib::stringref;
+using std::unordered_map;
+using std::vector;
+
+namespace search {
+namespace predicate {
+
+void
+DocumentFeaturesStore::setCurrent(uint32_t docId, FeatureVector *features) {
+ _currDocId = docId;
+ _currFeatures = features;
+}
+
+DocumentFeaturesStore::DocumentFeaturesStore(uint32_t arity)
+ : _docs(),
+ _ranges(),
+ _word_store(),
+ _word_index(),
+ _currDocId(0),
+ _currFeatures(),
+ _numFeatures(0),
+ _numRanges(0),
+ _arity(arity) {
+}
+
+namespace {
+template <typename KeyComp, typename WordIndex>
+void deserializeWords(MMapDataBuffer &buffer,
+ memoryindex::WordStore &word_store,
+ WordIndex &word_index,
+ vector<EntryRef> &word_refs) {
+ uint32_t word_list_size = buffer.readInt32();
+ word_refs.reserve(word_list_size);
+ vector<char> word;
+ KeyComp cmp(word_store, "");
+ for (uint32_t i = 0; i < word_list_size; ++i) {
+ uint32_t size = buffer.readInt32();
+ word.clear();
+ word.resize(size);
+ buffer.readBytes(&word[0], size);
+ word_refs.push_back(word_store.addWord(stringref(&word[0], size)));
+ word_index.insert(word_refs.back(), BTreeNoLeafData(), cmp);
+ }
+}
+
+template <typename RangeFeaturesMap>
+void deserializeRanges(MMapDataBuffer &buffer, vector<EntryRef> &word_refs,
+ RangeFeaturesMap &ranges, size_t &num_ranges) {
+ typedef typename RangeFeaturesMap::mapped_type::value_type Range;
+ uint32_t ranges_size = buffer.readInt32();
+ for (uint32_t i = 0; i < ranges_size; ++i) {
+ uint32_t doc_id = buffer.readInt32();
+ uint32_t range_count = buffer.readInt32();
+ auto &range_vector = ranges[doc_id];
+ range_vector.reserve(range_count);
+ for (uint32_t j = 0; j < range_count; ++j) {
+ Range range;
+ range.label_ref = word_refs[buffer.readInt32()];
+ range.from = buffer.readInt64();
+ range.to = buffer.readInt64();
+ range_vector.push_back(range);
+ }
+ num_ranges += range_count;
+ }
+}
+
+template <typename DocumentFeaturesMap>
+void deserializeDocs(MMapDataBuffer &buffer, DocumentFeaturesMap &docs,
+ size_t &num_features) {
+ uint32_t docs_size = buffer.readInt32();
+ for (uint32_t i = 0; i < docs_size; ++i) {
+ uint32_t doc_id = buffer.readInt32();
+ uint32_t feature_count = buffer.readInt32();
+ auto &feature_vector = docs[doc_id];
+ feature_vector.reserve(feature_count);
+ for (uint32_t j = 0; j < feature_count; ++j) {
+ feature_vector.push_back(buffer.readInt64());
+ }
+ num_features += feature_count;
+ }
+}
+} // namespace
+
+DocumentFeaturesStore::DocumentFeaturesStore(MMapDataBuffer &buffer)
+ : DocumentFeaturesStore(0) {
+ _arity = buffer.readInt16();
+
+ vector<EntryRef> word_refs;
+ deserializeWords<KeyComp>(buffer, _word_store, _word_index, word_refs);
+ deserializeRanges(buffer, word_refs, _ranges, _numRanges);
+ deserializeDocs(buffer, _docs, _numFeatures);
+}
+
+DocumentFeaturesStore::~DocumentFeaturesStore() {
+ _word_index.disableFreeLists();
+ _word_index.disableElemHoldList();
+ _word_index.getAllocator().freeze();
+ _word_index.clear();
+}
+
+void DocumentFeaturesStore::insert(uint64_t featureId, uint32_t docId) {
+ assert(docId != 0);
+ if (_currDocId != docId) {
+ auto docsItr = _docs.find(docId);
+ if (docsItr == _docs.end()) {
+ docsItr =
+ _docs.insert(std::make_pair(docId, FeatureVector())).first;
+ }
+ setCurrent(docId, &docsItr->second);
+ }
+ _currFeatures->push_back(featureId);
+ ++_numFeatures;
+}
+
+void DocumentFeaturesStore::insert(const PredicateTreeAnnotations &annotations,
+ uint32_t doc_id) {
+ assert(doc_id != 0);
+ if (!annotations.features.empty()) {
+ auto it = _docs.find(doc_id);
+ if (it == _docs.end()) {
+ it = _docs.insert(std::make_pair(doc_id, FeatureVector())).first;
+ }
+ size_t size = it->second.size();
+ it->second.resize(size + annotations.features.size());
+ memcpy(&it->second[size], &annotations.features[0],
+ annotations.features.size() * sizeof(annotations.features[0]));
+ _numFeatures += annotations.features.size();
+ }
+ if (!annotations.range_features.empty()) {
+ auto it = _ranges.find(doc_id);
+ if (it == _ranges.end()) {
+ it = _ranges.insert(std::make_pair(doc_id, RangeVector())).first;
+ }
+ for (const auto &range : annotations.range_features) {
+ stringref word(range.label.data, range.label.size);
+ KeyComp cmp(_word_store, word);
+ auto word_it = _word_index.find(btree::EntryRef(), cmp);
+ btree::EntryRef ref;
+ if (word_it.valid()) {
+ ref = word_it.getKey();
+ } else {
+ ref = _word_store.addWord(word);
+ _word_index.insert(ref, BTreeNoLeafData(), cmp);
+ }
+ it->second.push_back({ref, range.from, range.to});
+ }
+ _numRanges += annotations.range_features.size();
+ }
+}
+
+DocumentFeaturesStore::FeatureSet
+DocumentFeaturesStore::get(uint32_t docId) const {
+ FeatureSet features;
+ auto docsItr = _docs.find(docId);
+ if (docsItr != _docs.end()) {
+ features.insert(docsItr->second.begin(), docsItr->second.end());
+ }
+ auto rangeItr = _ranges.find(docId);
+ if (rangeItr != _ranges.end()) {
+ for (auto range : rangeItr->second) {
+ const char *label = _word_store.getWord(range.label_ref);
+ PredicateRangeExpander::expandRange(
+ label, range.from, range.to, _arity,
+ std::inserter(features, features.end()));
+ }
+ }
+ return features;
+}
+
+void DocumentFeaturesStore::remove(uint32_t doc_id) {
+ auto itr = _docs.find(doc_id);
+ if (itr != _docs.end()) {
+ _numFeatures = _numFeatures >= itr->second.size() ?
+ (_numFeatures - itr->second.size()) : 0;
+ _docs.erase(itr);
+ }
+ auto range_itr = _ranges.find(doc_id);
+ if (range_itr != _ranges.end()) {
+ _numRanges = _numRanges >= range_itr->second.size() ?
+ (_numRanges - range_itr->second.size()) : 0;
+ _ranges.erase(range_itr);
+ }
+ if (_currDocId == doc_id) {
+ setCurrent(0, NULL);
+ }
+}
+
+search::MemoryUsage DocumentFeaturesStore::getMemoryUsage() const {
+ search::MemoryUsage usage;
+ usage.incAllocatedBytes(_docs.getMemoryConsumption());
+ usage.incUsedBytes(_docs.getMemoryUsed());
+ usage.incAllocatedBytes(_ranges.getMemoryConsumption());
+ usage.incUsedBytes(_ranges.getMemoryUsed());
+ // Note: allocated bytes in FeatureVector is slighly larger, but
+ // this should be good enough.
+ usage.incAllocatedBytes(_numFeatures * sizeof(uint64_t));
+ usage.incUsedBytes(_numFeatures * sizeof(uint64_t));
+ usage.incAllocatedBytes(_numRanges * sizeof(Range));
+ usage.incUsedBytes(_numRanges * sizeof(Range));
+
+ usage.merge(_word_store.getMemoryUsage());
+ usage.merge(_word_index.getMemoryUsage());
+
+ return usage;
+}
+
+namespace {
+template <typename RangeFeaturesMap>
+void findUsedWords(const RangeFeaturesMap &ranges,
+ unordered_map<uint32_t, uint32_t> &word_map,
+ vector<EntryRef> &word_list) {
+ for (const auto &range_features_entry : ranges) {
+ for (const auto &range : range_features_entry.second) {
+ if (!word_map.count(range.label_ref.ref())) {
+ word_map[range.label_ref.ref()] = word_list.size();
+ word_list.push_back(range.label_ref);
+ }
+ }
+ }
+}
+
+void serializeWords(MMapDataBuffer &buffer, const vector<EntryRef> &word_list,
+ const memoryindex::WordStore &word_store) {
+ buffer.writeInt32(word_list.size());
+ for (const auto &word_ref : word_list) {
+ const char *word = word_store.getWord(word_ref);
+ uint32_t len = strlen(word);
+ buffer.writeInt32(len);
+ buffer.writeBytes(word, len);
+ }
+}
+
+template <typename RangeFeaturesMap>
+void serializeRanges(MMapDataBuffer &buffer, RangeFeaturesMap &ranges,
+ unordered_map<uint32_t, uint32_t> &word_map) {
+ buffer.writeInt32(ranges.size());
+ for (const auto &range_features_entry : ranges) {
+ buffer.writeInt32(range_features_entry.first); // doc id
+ buffer.writeInt32(range_features_entry.second.size());
+ for (const auto &range : range_features_entry.second) {
+ buffer.writeInt32(word_map[range.label_ref.ref()]);
+ buffer.writeInt64(range.from);
+ buffer.writeInt64(range.to);
+ }
+ }
+}
+
+template <typename DocumentFeaturesMap>
+void serializeDocs(MMapDataBuffer &buffer, DocumentFeaturesMap &docs) {
+ buffer.writeInt32(docs.size());
+ for (const auto &doc_features_entry : docs) {
+ buffer.writeInt32(doc_features_entry.first); // doc id
+ buffer.writeInt32(doc_features_entry.second.size());
+ for (const auto &feature : doc_features_entry.second) {
+ buffer.writeInt64(feature);
+ }
+ }
+}
+} // namespace
+
+void DocumentFeaturesStore::serialize(MMapDataBuffer &buffer) const {
+ vector<EntryRef> word_list;
+ unordered_map<uint32_t, uint32_t> word_map;
+
+ findUsedWords(_ranges, word_map, word_list);
+
+ buffer.writeInt16(_arity);
+ serializeWords(buffer, word_list, _word_store);
+ serializeRanges(buffer, _ranges, word_map);
+ serializeDocs(buffer, _docs);
+}
+
+} // namespace predicate
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/predicate/document_features_store.h b/searchlib/src/vespa/searchlib/predicate/document_features_store.h
new file mode 100644
index 00000000000..314e7347f27
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/document_features_store.h
@@ -0,0 +1,89 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "predicate_tree_annotator.h"
+#include <vespa/searchlib/btree/btree.h>
+#include <vespa/searchlib/memoryindex/wordstore.h>
+#include <vespa/searchlib/util/memoryusage.h>
+#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/vespalib/util/array.h>
+#include <unordered_set>
+
+namespace search {
+namespace predicate {
+
+/**
+ * Class used to track the {featureId, docId} pairs that are inserted
+ * into the btree memory index dictionary. These pairs are later used
+ * when removing all remains of a document from the feature posting
+ * lists of the dictionary.
+ */
+class DocumentFeaturesStore {
+ typedef memoryindex::WordStore WordStore;
+ struct Range {
+ btree::EntryRef label_ref;
+ int64_t from;
+ int64_t to;
+ };
+ // Compares EntryRefs by their corresponding word in a WordStore.
+ // To find a word without knowing its EntryRef, set the word in
+ // the constructor and search for an illegal EntryRef.
+ class KeyComp {
+ const WordStore &_word_store;
+ const vespalib::string _word;
+
+ const char *getWord(btree::EntryRef ref) const {
+ return ref.valid() ? _word_store.getWord(ref) : _word.c_str();
+ }
+
+ public:
+ KeyComp(const WordStore &word_store, const vespalib::stringref &word)
+ : _word_store(word_store),
+ _word(word) {
+ }
+
+ bool operator()(const btree::EntryRef &lhs,
+ const btree::EntryRef &rhs) const {
+ return strcmp(getWord(lhs), getWord(rhs)) < 0;
+ }
+ };
+ typedef vespalib::Array<uint64_t> FeatureVector;
+ typedef vespalib::hash_map<uint32_t, FeatureVector> DocumentFeaturesMap;
+ typedef vespalib::Array<Range> RangeVector;
+ typedef vespalib::hash_map<uint32_t, RangeVector> RangeFeaturesMap;
+ typedef btree::BTree<btree::EntryRef, btree::BTreeNoLeafData,
+ btree::NoAggregated, const KeyComp &> WordIndex;
+
+ DocumentFeaturesMap _docs;
+ RangeFeaturesMap _ranges;
+ WordStore _word_store;
+ WordIndex _word_index;
+ uint32_t _currDocId;
+ FeatureVector *_currFeatures;
+ size_t _numFeatures;
+ size_t _numRanges;
+ uint32_t _arity;
+
+ void setCurrent(uint32_t docId, FeatureVector *features);
+
+public:
+ typedef std::unordered_set<uint64_t> FeatureSet;
+
+ DocumentFeaturesStore(uint32_t arity);
+ DocumentFeaturesStore(vespalib::MMapDataBuffer &buffer);
+ ~DocumentFeaturesStore();
+
+ void insert(uint64_t featureId, uint32_t docId);
+ void insert(const PredicateTreeAnnotations &annotations, uint32_t docId);
+ FeatureSet get(uint32_t docId) const;
+ void remove(uint32_t docId);
+ search::MemoryUsage getMemoryUsage() const;
+
+ void serialize(vespalib::MMapDataBuffer &buffer) const;
+};
+
+} // namespace predicate
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_bounds_posting_list.h b/searchlib/src/vespa/searchlib/predicate/predicate_bounds_posting_list.h
new file mode 100644
index 00000000000..28e0e9a7fbe
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_bounds_posting_list.h
@@ -0,0 +1,96 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "predicate_posting_list.h"
+#include "predicate_index.h"
+
+namespace search {
+namespace predicate {
+
+/**
+ * PredicatePostingList implementation for range query edge iterators (bounds)
+ * from PredicateIndex.
+ */
+template<typename Iterator>
+class PredicateBoundsPostingList : public PredicatePostingList {
+ const PredicateIntervalStore &_interval_store;
+ Iterator _iterator;
+ const IntervalWithBounds *_current_interval;
+ uint32_t _interval_count;
+ uint32_t _value_diff;
+ IntervalWithBounds _single_buf;
+
+public:
+ PredicateBoundsPostingList(const PredicateIntervalStore &interval_store,
+ Iterator it,
+ uint32_t value_diff);
+ bool next(uint32_t doc_id) override;
+ bool nextInterval() override;
+ VESPA_DLL_LOCAL uint32_t getInterval() const override {
+ return _current_interval ? _current_interval->interval : 0;
+ }
+};
+
+template<typename Iterator>
+PredicateBoundsPostingList<Iterator>::PredicateBoundsPostingList(
+ const PredicateIntervalStore &interval_store,
+ Iterator it, uint32_t value_diff)
+ : _interval_store(interval_store),
+ _iterator(it),
+ _current_interval(0),
+ _interval_count(0),
+ _value_diff(value_diff) {
+}
+
+namespace {
+ bool checkBounds(uint32_t bounds, uint32_t diff) {
+ if (bounds & 0x80000000) {
+ return diff >= (bounds & 0x3fffffff);
+ } else if (bounds & 0x40000000) {
+ return diff < (bounds & 0x3fffffff);
+ } else {
+ return (diff >= (bounds >> 16)) && (diff < (bounds & 0xffff));
+ }
+ }
+} // namespace
+
+template<typename Iterator>
+bool PredicateBoundsPostingList<Iterator>::next(uint32_t doc_id) {
+ if (_iterator.valid() && _iterator.getKey() <= doc_id) {
+ _iterator.linearSeek(doc_id + 1);
+ }
+ for (;; ++_iterator) {
+ if (!_iterator.valid()) {
+ return false;
+ }
+ _current_interval = _interval_store.get(_iterator.getData(),
+ _interval_count, &_single_buf);
+ if (checkBounds(_current_interval->bounds, _value_diff)) {
+ break;
+ }
+ if (nextInterval()) {
+ break;
+ }
+ }
+ setDocId(_iterator.getKey());
+ return true;
+}
+
+template<typename Iterator>
+bool PredicateBoundsPostingList<Iterator>::nextInterval() {
+ uint32_t next_bounds;
+ do {
+ if (__builtin_expect(_interval_count == 1, true)) {
+ return false;
+ }
+ ++_current_interval;
+ --_interval_count;
+ next_bounds = _current_interval->bounds;
+ } while (!checkBounds(next_bounds, _value_diff));
+ return true;
+}
+
+} // namespace predicate
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_hash.h b/searchlib/src/vespa/searchlib/predicate/predicate_hash.h
new file mode 100644
index 00000000000..47719a5c80e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_hash.h
@@ -0,0 +1,125 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace predicate {
+/**
+ * Hash function coming from the RISE code base, used in boolean search.
+ */
+struct PredicateHash {
+ static uint64_t hash64(vespalib::stringref aKey) {
+ return hash64(aKey.data(), aKey.size());
+ }
+
+ static uint64_t hash64(const void *data, uint32_t origLen) {
+ int64_t a, b, c;
+ int offset; // Current offset into the entire key.
+
+ const uint8_t *aKey = static_cast<const uint8_t *>(data);
+
+ // Set up the internal state
+ int anInitval = 0;
+ a = b = anInitval; // the previous hash value
+ c = 0x9e3779b97f4a7c13LL; // the golden ratio; an arbitrary value
+ offset = 0;
+ uint32_t len = origLen;
+
+ // handle most of the key
+ while (len >= 24) {
+ a += ((0xffLL & aKey[offset+0]) +
+ ((0xffLL & aKey[offset+1])<<8) +
+ ((0xffLL & aKey[offset+2])<<16) +
+ ((0xffLL & aKey[offset+3])<<24) +
+ ((0xffLL & aKey[offset+4])<<32) +
+ ((0xffLL & aKey[offset+5])<<40) +
+ ((0xffLL & aKey[offset+6])<<48) +
+ ((0xffLL & aKey[offset+7])<<56));
+ b += ((0xffLL & aKey[offset+8]) +
+ ((0xffLL & aKey[offset+9])<<8) +
+ ((0xffLL & aKey[offset+10])<<16) +
+ ((0xffLL & aKey[offset+11])<<24) +
+ ((0xffLL & aKey[offset+12])<<32) +
+ ((0xffLL & aKey[offset+13])<<40) +
+ ((0xffLL & aKey[offset+14])<<48) +
+ ((0xffLL & aKey[offset+15])<<56));
+ c += ((0xffLL & aKey[offset+16]) +
+ ((0xffLL & aKey[offset+17])<<8) +
+ ((0xffLL & aKey[offset+18])<<16) +
+ ((0xffLL & aKey[offset+19])<<24) +
+ ((0xffLL & aKey[offset+20])<<32) +
+ ((0xffLL & aKey[offset+21])<<40) +
+ ((0xffLL & aKey[offset+22])<<48) +
+ ((0xffLL & aKey[offset+23])<<56));
+
+ // Mix. This arithmetic must match the mix below.
+ a -= b; a -= c; a ^= (((uint64_t) c)>>43);
+ b -= c; b -= a; b ^= (a<<9);
+ c -= a; c -= b; c ^= (((uint64_t) b)>>8);
+ a -= b; a -= c; a ^= (((uint64_t) c)>>38);
+ b -= c; b -= a; b ^= (a<<23);
+ c -= a; c -= b; c ^= (((uint64_t) b)>>5);
+ a -= b; a -= c; a ^= (((uint64_t) c)>>35);
+ b -= c; b -= a; b ^= (a<<49);
+ c -= a; c -= b; c ^= (((uint64_t) b)>>11);
+ a -= b; a -= c; a ^= (((uint64_t) c)>>12);
+ b -= c; b -= a; b ^= (a<<18);
+ c -= a; c -= b; c ^= (((uint64_t) b)>>22);
+ // End mix.
+
+ offset += 24; len -= 24;
+ }
+
+ // handle the last 23 bytes
+ c += origLen;
+ switch(len) { // all the case statements fall through
+ case 23: c+=((0xffLL & aKey[offset+22])<<56);
+ case 22: c+=((0xffLL & aKey[offset+21])<<48);
+ case 21: c+=((0xffLL & aKey[offset+20])<<40);
+ case 20: c+=((0xffLL & aKey[offset+19])<<32);
+ case 19: c+=((0xffLL & aKey[offset+18])<<24);
+ case 18: c+=((0xffLL & aKey[offset+17])<<16);
+ case 17: c+=((0xffLL & aKey[offset+16])<<8);
+ // the first byte of c is reserved for the length
+ case 16: b+=((0xffLL & aKey[offset+15])<<56);
+ case 15: b+=((0xffLL & aKey[offset+14])<<48);
+ case 14: b+=((0xffLL & aKey[offset+13])<<40);
+ case 13: b+=((0xffLL & aKey[offset+12])<<32);
+ case 12: b+=((0xffLL & aKey[offset+11])<<24);
+ case 11: b+=((0xffLL & aKey[offset+10])<<16);
+ case 10: b+=((0xffLL & aKey[offset+ 9])<<8);
+ case 9: b+=( 0xffLL & aKey[offset+ 8]);
+ case 8: a+=((0xffLL & aKey[offset+ 7])<<56);
+ case 7: a+=((0xffLL & aKey[offset+ 6])<<48);
+ case 6: a+=((0xffLL & aKey[offset+ 5])<<40);
+ case 5: a+=((0xffLL & aKey[offset+ 4])<<32);
+ case 4: a+=((0xffLL & aKey[offset+ 3])<<24);
+ case 3: a+=((0xffLL & aKey[offset+ 2])<<16);
+ case 2: a+=((0xffLL & aKey[offset+ 1])<<8);
+ case 1: a+=( 0xffLL & aKey[offset+ 0]);
+ // case 0: nothing left to add
+ }
+
+ // Mix. This arithmetic must match the mix above.
+ a -= b; a -= c; a ^= (((uint64_t) c)>>43);
+ b -= c; b -= a; b ^= (a<<9);
+ c -= a; c -= b; c ^= (((uint64_t) b)>>8);
+ a -= b; a -= c; a ^= (((uint64_t) c)>>38);
+ b -= c; b -= a; b ^= (a<<23);
+ c -= a; c -= b; c ^= (((uint64_t) b)>>5);
+ a -= b; a -= c; a ^= (((uint64_t) c)>>35);
+ b -= c; b -= a; b ^= (a<<49);
+ c -= a; c -= b; c ^= (((uint64_t) b)>>11);
+ a -= b; a -= c; a ^= (((uint64_t) c)>>12);
+ b -= c; b -= a; b ^= (a<<18);
+ c -= a; c -= b; c ^= (((uint64_t) b)>>22);
+ // End mix.
+
+ return static_cast<uint64_t>(c);
+ }
+};
+} // namespace predicate
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp
new file mode 100644
index 00000000000..5ca00d1863f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp
@@ -0,0 +1,288 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/log/log.h>
+LOG_SETUP(".predicate_index");
+#include <vespa/fastos/fastos.h>
+
+#include "predicate_index.h"
+
+#include "predicate_tree_annotator.h"
+#include <vespa/searchlib/btree/entryref.h>
+#include "predicate_hash.h"
+#include <algorithm>
+
+using search::btree::EntryRef;
+using vespalib::MMapDataBuffer;
+using std::vector;
+
+namespace search {
+namespace predicate {
+
+const vespalib::string PredicateIndex::z_star_attribute_name("z-star");
+const uint64_t PredicateIndex::z_star_hash(
+ PredicateHash::hash64(PredicateIndex::z_star_attribute_name));
+const vespalib::string PredicateIndex::z_star_compressed_attribute_name("z-star-compressed");
+const uint64_t PredicateIndex::z_star_compressed_hash(
+ PredicateHash::hash64(PredicateIndex::z_star_compressed_attribute_name));
+
+template <>
+void PredicateIndex::addPosting<Interval>(
+ uint64_t feature, uint32_t doc_id, EntryRef ref) {
+ _interval_index.addPosting(feature, doc_id, ref);
+}
+template <>
+void PredicateIndex::addPosting<IntervalWithBounds>(
+ uint64_t feature, uint32_t doc_id, EntryRef ref) {
+ _bounds_index.addPosting(feature, doc_id, ref);
+}
+
+template <typename IntervalT>
+void PredicateIndex::indexDocumentFeatures(
+ uint32_t doc_id, const PredicateIndex::FeatureMap<IntervalT> &interval_map) {
+ if (interval_map.empty()) {
+ return;
+ }
+ for (const auto &map_entry : interval_map) {
+ uint64_t feature = map_entry.first;
+ const auto &interval_list = map_entry.second;
+ btree::EntryRef ref = _interval_store.insert(interval_list);
+ assert(ref.valid());
+ addPosting<IntervalT>(feature, doc_id, ref);
+ _cache.set(feature, doc_id, true);
+ }
+}
+
+namespace {
+constexpr double THRESHOLD_USE_BIT_VECTOR_CACHE = 0.1;
+
+// PostingSerializer that writes intervals from interval store based
+// on the EntryRef that is to be serialized.
+template <typename IntervalT>
+class IntervalSerializer : public PostingSerializer<EntryRef> {
+ const PredicateIntervalStore &_store;
+public:
+ IntervalSerializer(const PredicateIntervalStore &store) : _store(store) {}
+ virtual void serialize(const EntryRef &ref,
+ vespalib::MMapDataBuffer &buffer) const {
+ uint32_t size;
+ IntervalT single_buf;
+ const IntervalT *interval = _store.get(ref, size, &single_buf);
+ buffer.writeInt16(size);
+ for (uint32_t i = 0; i < size; ++i) {
+ interval[i].serialize(buffer);
+ }
+ }
+};
+
+// PostingDeserializer that writes intervals to interval store and
+// returns an EntryRef to be stored in the PredicateIndex.
+template <typename IntervalT>
+class IntervalDeserializer : public PostingDeserializer<EntryRef> {
+ PredicateIntervalStore &_store;
+public:
+ IntervalDeserializer(PredicateIntervalStore &store) : _store(store) {}
+ virtual EntryRef deserialize(vespalib::MMapDataBuffer &buffer) {
+ std::vector<IntervalT> intervals;
+ size_t size = buffer.readInt16();
+ for (uint32_t i = 0; i < size; ++i) {
+ intervals.push_back(IntervalT::deserialize(buffer));
+ }
+ return _store.insert(intervals);
+ }
+};
+
+} // namespace
+
+PredicateIndex::PredicateIndex(GenerationHandler &generation_handler, GenerationHolder &genHolder,
+ const DocIdLimitProvider &limit_provider,
+ const SimpleIndexConfig &simple_index_config, MMapDataBuffer &buffer,
+ SimpleIndexDeserializeObserver<> & observer, uint32_t version)
+ : _arity(0),
+ _generation_handler(generation_handler),
+ _limit_provider(limit_provider),
+ _interval_index(genHolder, limit_provider, simple_index_config),
+ _bounds_index(genHolder, limit_provider, simple_index_config),
+ _interval_store(),
+ _zero_constraint_docs(),
+ _features_store(buffer),
+ _cache(genHolder)
+{
+ _arity = buffer.readInt16();
+ uint32_t zero_constraint_doc_count = buffer.readInt32();
+ typename BTreeSet::Builder builder(_zero_constraint_docs.getAllocator());
+ for (size_t i = 0; i < zero_constraint_doc_count; ++i) {
+ uint32_t raw_id = buffer.readInt32();
+ uint32_t doc_id = version == 0 ? raw_id >> 6 : raw_id;
+ builder.insert(doc_id, btree::BTreeNoLeafData::_instance);
+ observer.notifyInsert(0, doc_id, 0);
+ }
+ _zero_constraint_docs.assign(builder);
+ IntervalDeserializer<Interval> interval_deserializer(_interval_store);
+ _interval_index.deserialize(buffer, interval_deserializer, observer, version);
+ IntervalDeserializer<IntervalWithBounds>
+ bounds_deserializer(_interval_store);
+ _bounds_index.deserialize(buffer, bounds_deserializer, observer, version);
+ commit();
+}
+
+void PredicateIndex::serialize(MMapDataBuffer &buffer) const {
+ _features_store.serialize(buffer);
+ buffer.writeInt16(_arity);
+ buffer.writeInt32(_zero_constraint_docs.size());
+ for (auto it = _zero_constraint_docs.begin(); it.valid(); ++it) {
+ buffer.writeInt32(it.getKey());
+ }
+ IntervalSerializer<Interval> interval_serializer(_interval_store);
+ _interval_index.serialize(buffer, interval_serializer);
+ IntervalSerializer<IntervalWithBounds> bounds_serializer(_interval_store);
+ _bounds_index.serialize(buffer, bounds_serializer);
+}
+
+void PredicateIndex::onDeserializationCompleted() {
+ _interval_index.promoteOverThresholdVectors();
+ _bounds_index.promoteOverThresholdVectors();
+}
+
+void PredicateIndex::indexDocument(uint32_t doc_id, const PredicateTreeAnnotations &annotations) {
+ indexDocumentFeatures(doc_id, annotations.interval_map);
+ indexDocumentFeatures(doc_id, annotations.bounds_map);
+ _features_store.insert(annotations, doc_id);
+}
+
+void PredicateIndex::indexEmptyDocument(uint32_t doc_id)
+{
+ _zero_constraint_docs.insert(doc_id, btree::BTreeNoLeafData::_instance);
+}
+
+namespace {
+void removeFromIndex(
+ uint64_t feature, uint32_t doc_id, SimpleIndex<btree::EntryRef> &index, PredicateIntervalStore &interval_store)
+{
+ auto result = index.removeFromPostingList(feature, doc_id);
+ if (result.second) { // Posting was removed
+ auto ref = result.first;
+ assert(ref.valid());
+ interval_store.remove(ref);
+ }
+}
+
+class DocIdIterator : public PopulateInterface::Iterator {
+public:
+ using BTreeIterator = SimpleIndex<btree::EntryRef>::BTreeIterator;
+
+ DocIdIterator(BTreeIterator it) : _it(it) { }
+ int32_t getNext() override {
+ if (_it.valid()) {
+ uint32_t docId = _it.getKey();
+ ++_it;
+ return docId;
+ }
+ return -1;
+ }
+private:
+ BTreeIterator _it;
+};
+
+} // namespace
+
+void PredicateIndex::removeDocument(uint32_t doc_id) {
+ _zero_constraint_docs.remove(doc_id);
+
+ auto features = _features_store.get(doc_id);
+ if (!features.empty()) {
+ for (auto feature : features) {
+ removeFromIndex(feature, doc_id, _interval_index,
+ _interval_store);
+ removeFromIndex(feature, doc_id, _bounds_index,
+ _interval_store);
+ }
+ _cache.removeIndex(doc_id);
+ }
+ _features_store.remove(doc_id);
+}
+
+void PredicateIndex::commit() {
+ _interval_index.commit();
+ _bounds_index.commit();
+ _zero_constraint_docs.getAllocator().freeze();
+}
+
+void PredicateIndex::trimHoldLists(generation_t used_generation) {
+ _interval_index.trimHoldLists(used_generation);
+ _bounds_index.trimHoldLists(used_generation);
+ _interval_store.trimHoldLists(used_generation);
+ _zero_constraint_docs.getAllocator().trimHoldLists(used_generation);
+}
+
+void PredicateIndex::transferHoldLists(generation_t generation) {
+ _interval_index.transferHoldLists(generation);
+ _bounds_index.transferHoldLists(generation);
+ _interval_store.transferHoldLists(generation);
+ _zero_constraint_docs.getAllocator().transferHoldLists(generation);
+}
+
+MemoryUsage PredicateIndex::getMemoryUsage() const {
+ // TODO Include bit vector cache memory usage
+ MemoryUsage combined;
+ combined.merge(_interval_index.getMemoryUsage());
+ combined.merge(_bounds_index.getMemoryUsage());
+ combined.merge(_zero_constraint_docs.getMemoryUsage());
+ combined.merge(_interval_store.getMemoryUsage());
+ combined.merge(_features_store.getMemoryUsage());
+ return combined;
+}
+
+PopulateInterface::Iterator::UP
+PredicateIndex::lookup(uint64_t key) const
+{
+ auto dictIterator = _interval_index.lookup(key);
+ if (dictIterator.valid()) {
+ auto it = _interval_index.getBTreePostingList(dictIterator.getData());
+ if (it.valid()) {
+ return PopulateInterface::Iterator::UP(new DocIdIterator(it));
+ }
+ }
+ return PopulateInterface::Iterator::UP();
+}
+
+void
+PredicateIndex::populateIfNeeded(size_t doc_id_limit)
+{
+ if ( _cache.needPopulation()) {
+ _cache.populate(doc_id_limit, *this);
+ }
+}
+
+BitVectorCache::KeySet
+PredicateIndex::lookupCachedSet(const BitVectorCache::KeyAndCountSet & keys) const
+{
+ // Don't count documents using bit vector if combined length is less than threshold
+ uint64_t total_length = 0;
+ auto cached_keys = _cache.lookupCachedSet(keys);
+ for (const auto &p : keys) {
+ if (cached_keys.find(p.first) != cached_keys.end()) {
+ total_length += p.second;
+ }
+ }
+ double fill_ratio = total_length / static_cast<double>(_limit_provider.getDocIdLimit());
+ if (fill_ratio < THRESHOLD_USE_BIT_VECTOR_CACHE) {
+ cached_keys.clear();
+ }
+ return cached_keys;
+}
+
+void
+PredicateIndex::computeCountVector(BitVectorCache::KeySet & keys, BitVectorCache::CountVector & v) const
+{
+ _cache.computeCountVector(keys, v);
+}
+
+
+void
+PredicateIndex::adjustDocIdLimit(uint32_t docId)
+{
+ _cache.adjustDocIdLimit(docId);
+}
+
+} // namespace predicate
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.h b/searchlib/src/vespa/searchlib/predicate/predicate_index.h
new file mode 100644
index 00000000000..d2e5c1f268e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.h
@@ -0,0 +1,131 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "document_features_store.h"
+#include "predicate_interval_store.h"
+#include "simple_index.h"
+#include <vespa/searchlib/common/bitvectorcache.h>
+#include <unordered_map>
+#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/stllike/string.h>
+#include "predicate_interval.h"
+
+namespace search {
+namespace predicate {
+class PredicateTreeAnnotations;
+
+/**
+ * PredicateIndex keeps an index of boolean constraints for use with
+ * the interval algorithm. It is the central component of
+ * PredicateAttribute, and PredicateBlueprint uses it to obtain
+ * posting lists for matching.
+ */
+class PredicateIndex : public PopulateInterface {
+ typedef SimpleIndex<btree::EntryRef> IntervalIndex;
+ typedef SimpleIndex<btree::EntryRef> BoundsIndex;
+ typedef btree::BTree<uint32_t, btree::BTreeNoLeafData> BTreeSet;
+ template <typename IntervalT>
+ using FeatureMap = std::unordered_map<uint64_t, std::vector<IntervalT>>;
+ using generation_t = vespalib::GenerationHandler::generation_t;
+ template <typename T>
+ using optional = std::experimental::optional<T>;
+
+public:
+ using ZeroConstraintDocs = BTreeSet::FrozenView;
+ typedef std::unique_ptr<PredicateIndex> UP;
+ typedef vespalib::GenerationHandler GenerationHandler;
+ typedef vespalib::GenerationHolder GenerationHolder;
+ using BTreeIterator = SimpleIndex<btree::EntryRef>::BTreeIterator;
+ using VectorIterator = SimpleIndex<btree::EntryRef>::VectorIterator;
+ static const vespalib::string z_star_attribute_name;
+ static const uint64_t z_star_hash;
+ static const vespalib::string z_star_compressed_attribute_name;
+ static const uint64_t z_star_compressed_hash;
+
+private:
+ uint32_t _arity;
+ GenerationHandler &_generation_handler;
+ const DocIdLimitProvider &_limit_provider;
+ IntervalIndex _interval_index;
+ BoundsIndex _bounds_index;
+ PredicateIntervalStore _interval_store;
+ BTreeSet _zero_constraint_docs;
+
+ DocumentFeaturesStore _features_store;
+ mutable BitVectorCache _cache;
+
+ template <typename IntervalT>
+ void addPosting(uint64_t feature, uint32_t doc_id,
+ btree::EntryRef ref);
+
+ template <typename IntervalT>
+ void indexDocumentFeatures(uint32_t doc_id, const FeatureMap<IntervalT> &interval_map);
+
+ PopulateInterface::Iterator::UP lookup(uint64_t key) const override;
+
+public:
+ PredicateIndex(GenerationHandler &generation_handler, GenerationHolder &genHolder,
+ const DocIdLimitProvider &limit_provider,
+ const SimpleIndexConfig &simple_index_config, uint32_t arity)
+ : _arity(arity),
+ _generation_handler(generation_handler),
+ _limit_provider(limit_provider),
+ _interval_index(genHolder, limit_provider, simple_index_config),
+ _bounds_index(genHolder, limit_provider, simple_index_config),
+ _interval_store(),
+ _zero_constraint_docs(),
+ _features_store(arity),
+ _cache(genHolder) {
+ }
+ // deserializes PredicateIndex from buffer.
+ // The observer can be used to gain some insight into what has been added to the index..
+ PredicateIndex(GenerationHandler &generation_handler, GenerationHolder &genHolder,
+ const DocIdLimitProvider &limit_provider,
+ const SimpleIndexConfig &simple_index_config, vespalib::MMapDataBuffer &buffer,
+ SimpleIndexDeserializeObserver<> & observer, uint32_t version);
+
+ void serialize(vespalib::MMapDataBuffer &buffer) const;
+ void onDeserializationCompleted();
+
+ void indexEmptyDocument(uint32_t doc_id);
+ void indexDocument(uint32_t doc_id, const PredicateTreeAnnotations &annotations);
+ void removeDocument(uint32_t doc_id);
+ void commit();
+ void trimHoldLists(generation_t used_generation);
+ void transferHoldLists(generation_t generation);
+ MemoryUsage getMemoryUsage() const;
+
+ int getArity() const { return _arity; }
+
+ const ZeroConstraintDocs getZeroConstraintDocs() const {
+ return _zero_constraint_docs.getFrozenView();
+ }
+
+ const IntervalIndex &getIntervalIndex() const {
+ return _interval_index;
+ }
+
+ const BoundsIndex &getBoundsIndex() const {
+ return _bounds_index;
+ }
+
+ const PredicateIntervalStore &getIntervalStore() const {
+ return _interval_store;
+ }
+
+ void populateIfNeeded(size_t doc_id_limit);
+ BitVectorCache::KeySet lookupCachedSet(const BitVectorCache::KeyAndCountSet & keys) const;
+ void computeCountVector(BitVectorCache::KeySet & keys, BitVectorCache::CountVector & v) const;
+
+ /*
+ * Adjust size of structures to have space for docId.
+ */
+ void adjustDocIdLimit(uint32_t docId);
+};
+
+extern template class SimpleIndex<btree::EntryRef>;
+
+} // namespace predicate
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_interval.cpp
new file mode 100644
index 00000000000..d6e830f3a15
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval.cpp
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "predicate_interval.h"
+#include <ostream>
+
+namespace search {
+namespace predicate {
+
+std::ostream &operator<<(std::ostream &out, const Interval &i) {
+ std::ios_base::fmtflags flags = out.flags();
+ out << "0x" << std::hex << i.interval;
+ out.flags(flags);
+ return out;
+}
+
+std::ostream &operator<<(std::ostream &out, const IntervalWithBounds &i) {
+ std::ios_base::fmtflags flags = out.flags();
+ out << "0x" << std::hex << i.interval << ", 0x" << i.bounds;
+ out.flags(flags);
+ return out;
+}
+
+} // namespace predicate
+}
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval.h b/searchlib/src/vespa/searchlib/predicate/predicate_interval.h
new file mode 100644
index 00000000000..fede659582a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval.h
@@ -0,0 +1,64 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/data/databuffer.h>
+
+namespace search {
+namespace predicate {
+
+/**
+ * Stores a simple interval for the boolean constraint interval algorithm.
+ */
+struct Interval {
+ uint32_t interval;
+
+ Interval() : interval(0) {}
+ Interval(uint32_t interval_) : interval(interval_) {}
+
+ void serialize(vespalib::MMapDataBuffer &buffer) const {
+ buffer.writeInt32(interval);
+ }
+ static Interval deserialize(vespalib::MMapDataBuffer &buffer) {
+ return Interval{buffer.readInt32()};
+ }
+ bool operator==(const Interval &other) const {
+ return interval == other.interval;
+ }
+ bool valid() const {
+ return interval != 0;
+ }
+};
+std::ostream &operator<<(std::ostream &out, const Interval &i);
+
+/**
+ * Stores an interval and bounds information for edge cases of range
+ * searches in the boolean constraint interval algorithm.
+ */
+struct IntervalWithBounds {
+ uint32_t interval;
+ uint32_t bounds;
+
+ IntervalWithBounds() : interval(0), bounds(0) {}
+ IntervalWithBounds(uint32_t interval_, uint32_t bounds_) : interval(interval_), bounds(bounds_) {}
+
+ void serialize(vespalib::MMapDataBuffer &buffer) const {
+ buffer.writeInt32(interval);
+ buffer.writeInt32(bounds);
+ }
+ static IntervalWithBounds deserialize(vespalib::MMapDataBuffer &buffer) {
+ uint32_t interval = buffer.readInt32();
+ uint32_t bounds = buffer.readInt32();
+ return IntervalWithBounds{interval, bounds};
+ }
+ bool operator==(const IntervalWithBounds &other) const {
+ return interval == other.interval && bounds == other.bounds;
+ }
+ bool valid() const {
+ return interval != 0 && bounds != 0;
+ }
+};
+std::ostream &operator<<(std::ostream &out, const IntervalWithBounds &i);
+
+} // namespace predicate
+}
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval_posting_list.h b/searchlib/src/vespa/searchlib/predicate/predicate_interval_posting_list.h
new file mode 100644
index 00000000000..918f2e2f1df
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval_posting_list.h
@@ -0,0 +1,68 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "predicate_posting_list.h"
+#include "predicate_index.h"
+
+namespace search {
+namespace predicate {
+
+/**
+ * PredicatePostingList implementation for regular interval iterators
+ * from PredicateIndex.
+ */
+template<typename Iterator>
+class PredicateIntervalPostingList : public PredicatePostingList {
+ const PredicateIntervalStore &_interval_store;
+ Iterator _iterator;
+ const Interval *_current_interval;
+ uint32_t _interval_count;
+ Interval _single_buf;
+
+public:
+ PredicateIntervalPostingList(const PredicateIntervalStore &interval_store, Iterator it);
+ bool next(uint32_t doc_id) override;
+ VESPA_DLL_LOCAL bool nextInterval() override {
+ if (_interval_count == 1) {
+ return false;
+ } else {
+ ++_current_interval;
+ --_interval_count;
+ return true;
+ }
+ }
+ VESPA_DLL_LOCAL uint32_t getInterval() const override {
+ return _current_interval ? _current_interval->interval : 0;
+ }
+};
+
+template<typename Iterator>
+PredicateIntervalPostingList<Iterator>::PredicateIntervalPostingList(
+ const PredicateIntervalStore &interval_store, Iterator it)
+ : _interval_store(interval_store),
+ _iterator(it),
+ _current_interval(nullptr),
+ _interval_count(0) {
+}
+
+template<typename Iterator>
+bool PredicateIntervalPostingList<Iterator>::next(uint32_t doc_id) {
+ if (!_iterator.valid()) {
+ return false;
+ }
+ if (__builtin_expect(_iterator.getKey() <= doc_id, true)) {
+ _iterator.linearSeek(doc_id + 1);
+ if (!_iterator.valid()) {
+ return false;
+ }
+ }
+ _current_interval =
+ _interval_store.get(_iterator.getData(), _interval_count, &_single_buf);
+ setDocId(_iterator.getKey());
+ return true;
+}
+
+} // namespace predicate
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp
new file mode 100644
index 00000000000..b4204010eec
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp
@@ -0,0 +1,124 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/log/log.h>
+LOG_SETUP(".predicate_interval_store");
+#include <vespa/fastos/fastos.h>
+
+#include "predicate_interval_store.h"
+
+#include "predicate_index.h"
+#include <vespa/searchlib/btree/bufferstate.h>
+#include <vespa/searchlib/btree/datastore.hpp>
+#include <vespa/searchlib/btree/entryref.h>
+
+using search::btree::BufferState;
+using search::btree::EntryRef;
+using std::vector;
+
+namespace search {
+namespace predicate {
+
+template <typename T>
+PredicateIntervalStore::Entry<T> PredicateIntervalStore::allocNewEntry(
+ uint32_t type_id, uint32_t size) {
+ _store.ensureBufferCapacity(type_id, size);
+ uint32_t active_buffer_id = _store.getActiveBufferId(type_id);
+ btree::BufferState &state = _store.getBufferState(active_buffer_id);
+ assert(state._state == btree::BufferState::ACTIVE);
+ size_t old_size = state.size();
+ T *buf = _store.getBufferEntry<T>(active_buffer_id, old_size);
+ state.pushed_back(size);
+ return {RefType(old_size, active_buffer_id), buf};
+}
+
+PredicateIntervalStore::PredicateIntervalStore()
+ : _store(),
+ _size1Type(1, 1024u, RefType::offsetSize()),
+ _store_adapter(_store),
+ _ref_cache(_store_adapter) {
+
+ // This order determines type ids.
+ _store.addType(&_size1Type);
+
+ _store.initActiveBuffers();
+}
+
+PredicateIntervalStore::~PredicateIntervalStore() {
+ _store.dropBuffers();
+}
+
+//
+// NOTE: The allocated entries are arrays of type uint32_t, but the
+// entries are used as arrays of either Interval or IntervalWithBounds
+// objects (PODs). These objects are memcpy'ed into the uint32_t
+// arrays, and in the get() function they are typecast back to the
+// object expected by the caller. Which type an entry has cannot be
+// inferred from the EntryRef, but must be known by the caller.
+//
+// This saves us from having separate buffers for Intervals and
+// IntervalWithBounds objects, since the caller knows the correct type
+// anyway.
+//
+template <typename IntervalT>
+btree::EntryRef PredicateIntervalStore::insert(
+ const vector<IntervalT> &intervals) {
+ const uint32_t size = entrySize<IntervalT>() * intervals.size();
+ if (size == 0) {
+ return btree::EntryRef();
+ }
+ uint32_t *buffer;
+ btree::EntryRef ref;
+ if (size == 1 && intervals[0].interval <= RefCacheType::DATA_REF_MASK) {
+ return btree::EntryRef(intervals[0].interval);
+ }
+ uint32_t cached_ref = _ref_cache.find(
+ reinterpret_cast<const uint32_t *>(&intervals[0]), size);
+ if (cached_ref) {
+ return cached_ref;
+ }
+
+ if (size < RefCacheType::MAX_SIZE) {
+ auto entry = allocNewEntry<uint32_t>(0, size);
+ buffer = entry.buffer;
+ ref = entry.ref.ref() | (size << RefCacheType::SIZE_SHIFT);
+ } else {
+ auto entry = allocNewEntry<uint32_t>(0, size + 1);
+ buffer = entry.buffer;
+ ref = entry.ref.ref() | RefCacheType::SIZE_MASK;
+ *buffer++ = size;
+ }
+ memcpy(buffer, &intervals[0], size * sizeof(uint32_t));
+ _ref_cache.insert(ref.ref());
+ return ref;
+}
+// Explicit instantiation for relevant types.
+template
+EntryRef PredicateIntervalStore::insert(const vector<Interval> &);
+template
+EntryRef PredicateIntervalStore::insert(const vector<IntervalWithBounds> &);
+
+void PredicateIntervalStore::remove(EntryRef ref) {
+ if (ref.valid()) {
+ uint32_t buffer_id = RefType(ref).bufferId();
+ if (buffer_id == 0) { // single interval optimization.
+ return;
+ }
+ // Don't remove anything.
+
+ // BufferState &state = _store.getBufferState(buffer_id);
+ // uint32_t type_id = state.getTypeId();
+ // uint32_t size = type_id <= MAX_ARRAY_SIZE ? type_id : 1;
+ // _store.holdElem(ref, size);
+ }
+}
+
+void PredicateIntervalStore::trimHoldLists(generation_t used_generation) {
+ _store.trimHoldLists(used_generation);
+}
+
+void PredicateIntervalStore::transferHoldLists(generation_t generation) {
+ _store.transferHoldLists(generation);
+}
+
+} // namespace predicate
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h
new file mode 100644
index 00000000000..585b9e5bcb9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h
@@ -0,0 +1,119 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "predicate_ref_cache.h"
+#include <vespa/searchlib/btree/bufferstate.h>
+#include <vespa/searchlib/btree/datastore.h>
+#include <vespa/searchlib/btree/entryref.h>
+#include <vector>
+
+namespace search {
+namespace predicate {
+class Interval;
+
+/**
+ * Stores interval entries in a memory-efficient way.
+ * It works with both Interval and IntervalWithBounds entries.
+ */
+class PredicateIntervalStore {
+ class DataStoreAdapter;
+ typedef PredicateRefCache<DataStoreAdapter, 8> RefCacheType;
+ typedef btree::DataStoreT<btree::EntryRefT<18, 6>> DataStoreType;
+ typedef DataStoreType::RefType RefType;
+ using generation_t = vespalib::GenerationHandler::generation_t;
+
+ DataStoreType _store;
+ btree::BufferType<uint32_t> _size1Type;
+
+ class DataStoreAdapter {
+ const DataStoreType &_store;
+ public:
+ DataStoreAdapter(const DataStoreType &store) : _store(store) {}
+ const uint32_t *getBuffer(uint32_t ref) const {
+ RefType entry_ref(ref);
+ return _store.getBufferEntry<uint32_t>(
+ entry_ref.bufferId(), entry_ref.offset());
+ }
+ };
+ DataStoreAdapter _store_adapter;
+ RefCacheType _ref_cache;
+
+ // Return type for private allocation functions
+ template <typename T>
+ struct Entry {
+ RefType ref;
+ T *buffer;
+ };
+
+ // Allocates a new entry in a datastore buffer.
+ template <typename T>
+ Entry<T> allocNewEntry(uint32_t type_id, uint32_t size);
+ // Returns the size of an interval entry in number of uint32_t.
+ template <typename IntervalT>
+ static uint32_t entrySize() { return sizeof(IntervalT) / sizeof(uint32_t); }
+
+public:
+ PredicateIntervalStore();
+ ~PredicateIntervalStore();
+
+ /**
+ * Inserts an array of intervals into the store.
+ * IntervalT is either Interval or IntervalWithBounds.
+ */
+ template <typename IntervalT>
+ btree::EntryRef insert(const std::vector<IntervalT> &intervals);
+
+ /**
+ * Removes an entry. The entry remains accessible until commit
+ * is called, and also as long as readers hold the current
+ * generation.
+ *
+ * Remove is currently disabled, as the ref cache is assumed to
+ * keep the total number of different entries low.
+ */
+ void remove(btree::EntryRef ref);
+
+ void trimHoldLists(generation_t used_generation);
+
+ void transferHoldLists(generation_t generation);
+
+ /**
+ * Return memory usage (only the data store is included)
+ */
+ MemoryUsage getMemoryUsage() const {
+ return _store.getMemoryUsage();
+ }
+
+ /**
+ * Retrieves a list of intervals.
+ * IntervalT is either Interval or IntervalWithBounds.
+ * single_buf is a pointer to a single IntervalT, used by the
+ * single interval optimization.
+ */
+ template <typename IntervalT>
+ const IntervalT *get(btree::EntryRef btree_ref,
+ uint32_t &size_out,
+ IntervalT *single_buf) const
+ {
+ uint32_t size = btree_ref.ref() >> RefCacheType::SIZE_SHIFT;
+ RefType data_ref(btree_ref.ref() & RefCacheType::DATA_REF_MASK);
+ if (__builtin_expect(size == 0, true)) { // single-interval optimization
+ *single_buf = IntervalT();
+ single_buf->interval = data_ref.ref();
+ size_out = 1;
+ return single_buf;
+ }
+ const uint32_t *buf =
+ _store.getBufferEntry<uint32_t>(data_ref.bufferId(),
+ data_ref.offset());
+ if (size == RefCacheType::MAX_SIZE) {
+ size = *buf++;
+ }
+ size_out = size / entrySize<IntervalT>();
+ return reinterpret_cast<const IntervalT *>(buf);
+ }
+};
+} // namespace predicate
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_posting_list.h b/searchlib/src/vespa/searchlib/predicate/predicate_posting_list.h
new file mode 100644
index 00000000000..6ee20c26a79
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_posting_list.h
@@ -0,0 +1,52 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <memory>
+#include <cstdint>
+#include <vespa/fastos/dynamiclibrary.h>
+
+/**
+ * Interface for posting lists used by PredicateSearch.
+ */
+namespace search {
+namespace predicate {
+
+class PredicatePostingList {
+ uint32_t _docId;
+ uint64_t _subquery;
+
+protected:
+ PredicatePostingList()
+ : _docId(0),
+ _subquery(UINT64_MAX) {
+ }
+
+ void setDocId(uint32_t docId) { _docId = docId; }
+
+public:
+ using UP = std::unique_ptr<PredicatePostingList>;
+
+ virtual ~PredicatePostingList() {}
+
+ /*
+ * Moves to next document after the one supplied.
+ * Returns false if there were no more doc ids.
+ */
+ virtual bool next(uint32_t docId) = 0;
+
+ /*
+ * Moves to the next interval within the current doc id.
+ * Returns false if there were no more intervals for the current doc id.
+ */
+ virtual bool nextInterval() = 0;
+
+ uint32_t getDocId() const { return _docId; }
+ VESPA_DLL_LOCAL virtual uint32_t getInterval() const = 0;
+
+ // Comes from the query that triggered inclusion of this posting list.
+ void setSubquery(uint64_t subquery) { _subquery = subquery; }
+ uint64_t getSubquery() const { return _subquery; }
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_range_expander.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_range_expander.cpp
new file mode 100644
index 00000000000..bb7e26f168f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_range_expander.cpp
@@ -0,0 +1,17 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/log/log.h>
+LOG_SETUP(".predicate_range_expander");
+#include <vespa/fastos/fastos.h>
+
+#include "predicate_range_expander.h"
+
+namespace search {
+namespace predicate {
+
+void PredicateRangeExpander::debugLog(const char *fmt, const char *msg) {
+ LOG(debug, fmt, msg);
+}
+
+} // namespace predicate
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_range_expander.h b/searchlib/src/vespa/searchlib/predicate/predicate_range_expander.h
new file mode 100644
index 00000000000..b7d5f25e78b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_range_expander.h
@@ -0,0 +1,122 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "predicate_hash.h"
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+namespace search {
+namespace predicate {
+
+/**
+ * Helper class for expanding ranges. This functionality is ported from
+ * com.yahoo.vespa.indexinglanguage.predicate.ComplexNodeTransformer
+ *
+ * It is tested through document_features_store_test.cpp.
+ */
+class PredicateRangeExpander {
+ static void debugLog(const char *format_str, const char *msg);
+
+ template <typename InsertIt>
+ static void addEdgePartition(const char *label, uint64_t value,
+ bool negative, InsertIt out) {
+ vespalib::string to_hash =
+ vespalib::make_string("%s%s%" PRIu64, label,
+ negative? "=-" : "=", value);
+ debugLog("Hashing edge partition %s", to_hash.c_str());
+ *out++ = PredicateHash::hash64(to_hash);
+ }
+
+ template <typename InsertIt>
+ static void addPartitions(const char *label, uint64_t part,
+ uint64_t part_size, uint32_t first,
+ uint32_t last, bool negative, InsertIt out) {
+ for (uint32_t i = first; i < last; ++i) {
+ uint64_t from = (part + i) * part_size;
+ uint64_t to = from + part_size - 1;
+ if (negative) {
+ std::swap(to, from);
+ }
+ vespalib::string to_hash =
+ vespalib::make_string("%s%s%" PRIu64 "-%" PRIu64, label,
+ negative? "=-" : "=", from, to);
+ debugLog("Hashing partition %s", to_hash.c_str());
+ *out++ = PredicateHash::hash64(to_hash);
+ }
+ }
+
+ template <typename InsertIt>
+ static void makePartitions(const char *label,
+ uint64_t from, uint64_t to,
+ uint64_t step_size, int32_t arity,
+ bool negative, InsertIt out) {
+ uint32_t from_remainder = from % arity;
+ uint32_t to_remainder = to % arity;
+ uint64_t next_from = from - from_remainder;
+ uint64_t next_to = to - to_remainder;
+ if (next_from == next_to) {
+ addPartitions(label, next_from, step_size,
+ from_remainder, to_remainder, negative, out);
+ } else {
+ if (from_remainder > 0) {
+ addPartitions(label, next_from, step_size,
+ from_remainder, arity, negative, out);
+ from = next_from + arity;
+ }
+ addPartitions(label, next_to, step_size,
+ 0, to_remainder, negative, out);
+ makePartitions(label, from / arity, to / arity,
+ step_size * arity, arity, negative, out);
+ }
+ }
+
+ template <typename InsertIt>
+ static void partitionRange(const char *label, uint64_t from, uint64_t to,
+ uint32_t arity, bool negative, InsertIt out) {
+ uint32_t from_remainder = from % arity;
+ // operate on exclusive upper bound.
+ uint32_t to_remainder = (to + 1) % arity;
+ uint64_t from_val = from - from_remainder;
+ uint64_t to_val = to - to_remainder;
+ if (from_val == to_val + 1) {
+ addEdgePartition(label, from_val, negative, out);
+ return;
+ } else {
+ if (from_remainder != 0) {
+ addEdgePartition(label, from_val, negative, out);
+ from_val += arity;
+ }
+ if (to_remainder != 0) {
+ addEdgePartition(label, to_val + 1, negative, out);
+ }
+ }
+ makePartitions(label, from_val / arity,
+ (to_val - (arity - 1)) / arity + 1,
+ arity, arity, negative, out);
+ }
+
+public:
+ // Expands a range and returns the hash values through the insert iterator.
+ template <typename InsertIt>
+ static void expandRange(const char *label, int64_t from, int64_t to,
+ uint32_t arity, InsertIt out) {
+ if (from < 0) {
+ if (to < 0) {
+ // Special case for to==-1. -X-0 means the same as -X-1,
+ // but is more efficient.
+ partitionRange(label, (to == -1 ? 0 : -to), -from, arity,
+ true, out);
+ } else {
+ partitionRange(label, 0, -from, arity, true, out);
+ partitionRange(label, 0, to, arity, false, out);
+ }
+ } else {
+ partitionRange(label, from, to, arity, false, out);
+ }
+ }
+};
+
+} // namespace predicate
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h b/searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h
new file mode 100644
index 00000000000..2918c96a0c5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h
@@ -0,0 +1,99 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace predicate {
+
+/**
+ * Helper class for expanding a point in a predicate range query to
+ * the hashed labels. Used by PredicateBlueprint.
+ */
+class PredicateRangeTermExpander {
+ int _arity;
+ uint16_t _max_positive_levels;
+ uint16_t _max_negative_levels;
+ int64_t _lower_bound;
+ int64_t _upper_bound;
+
+public:
+ PredicateRangeTermExpander(int arity,
+ int64_t lower_bound = LLONG_MIN,
+ int64_t upper_bound = LLONG_MAX)
+ : _arity(arity),
+ _max_positive_levels(1),
+ _max_negative_levels(1),
+ _lower_bound(lower_bound),
+ _upper_bound(upper_bound) {
+ uint64_t t = _upper_bound;
+ while ((t /= _arity) > 0) ++_max_positive_levels;
+ t = -_lower_bound;
+ while ((t /= _arity) > 0) ++_max_negative_levels;
+ }
+
+ template <typename Handler>
+ void expand(const vespalib::string &key, int64_t value, Handler &handler);
+};
+
+
+/**
+ * Handler must implement handleRange(string) and handleEdge(string, uint64_t).
+ */
+template <typename Handler>
+void PredicateRangeTermExpander::expand(
+ const vespalib::string &key, int64_t signed_value, Handler &handler) {
+ if (signed_value < _lower_bound || signed_value > _upper_bound) {
+ LOG(warning, "Search outside bounds should have been rejected by "
+ "ValidatePredicateSearcher.");
+ return;
+ }
+ char buffer[21 * 2 + 3 + key.size()]; // 2 numbers + punctuation + key
+ int size;
+ int prefix_size = sprintf(buffer, "%s=", key.c_str());
+ bool negative = signed_value < 0;
+ uint64_t value;
+ int max_levels;
+ if (negative) {
+ value = -signed_value;
+ buffer[prefix_size++] = '-';
+ max_levels = _max_negative_levels;
+ } else {
+ value = signed_value;
+ max_levels = _max_positive_levels;
+ }
+
+ int64_t edge_interval = (value / _arity) * _arity;
+ size = sprintf(buffer + prefix_size, "%lu", edge_interval);
+ handler.handleEdge(vespalib::stringref(buffer, prefix_size + size),
+ value - edge_interval);
+
+ uint64_t level_size = _arity;
+ for (int i = 0; i < max_levels; ++i) {
+ uint64_t start = (value / level_size) * level_size;
+ if (negative) {
+ if (start + level_size - 1 > uint64_t(-LLONG_MIN)) {
+ break;
+ }
+ size = sprintf(buffer + prefix_size, "%lu-%lu",
+ start + level_size - 1, start);
+ } else {
+ if (start + level_size - 1 > LLONG_MAX) {
+ break;
+ }
+ size = sprintf(buffer + prefix_size, "%lu-%lu",
+ start, start + level_size - 1);
+ }
+ handler.handleRange(vespalib::stringref(buffer, prefix_size + size));
+ level_size *= _arity;
+ if (!level_size) { // overflow
+ break;
+ }
+ }
+}
+
+
+} // namespace search::predicate
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_ref_cache.h b/searchlib/src/vespa/searchlib/predicate/predicate_ref_cache.h
new file mode 100644
index 00000000000..9aae296311a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_ref_cache.h
@@ -0,0 +1,160 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+
+#pragma once
+
+#include <set>
+
+namespace search {
+namespace predicate {
+
+/**
+ * Holds the data used in a cache lookup operation.
+ */
+struct CurrentZeroRef {
+ const uint32_t *buf;
+ uint32_t size;
+ void set(const uint32_t *b, uint32_t s) {
+ buf = b;
+ size = s;
+ }
+};
+
+/**
+ * Comparator (less) used in std::set. It holds a reference to a data
+ * store, from which it looks up buffers from the "data_ref"-part of the
+ * cached references.
+ */
+template <typename BufferStore, int SIZE_BITS>
+class RefCacheComparator {
+ enum { DATA_REF_BITS = 32 - SIZE_BITS,
+ DATA_REF_MASK = (1 << DATA_REF_BITS) - 1,
+ MAX_SIZE = (1 << SIZE_BITS) - 1,
+ SIZE_SHIFT = DATA_REF_BITS };
+ const BufferStore &_store;
+ const CurrentZeroRef &_current_zero_ref;
+public:
+ RefCacheComparator(const BufferStore &store,
+ const CurrentZeroRef &zero_ref)
+ : _store(store),
+ _current_zero_ref(zero_ref){
+ }
+
+ void getSizeAndBuf(uint32_t ref, uint32_t &size,
+ const uint32_t *&buf) const {
+ if (ref) {
+ size = ref >> SIZE_SHIFT;
+ buf = _store.getBuffer(ref & DATA_REF_MASK);
+ if (size == MAX_SIZE) {
+ size = *buf++;
+ }
+ } else {
+ size = _current_zero_ref.size;
+ buf = _current_zero_ref.buf;
+ }
+ }
+
+ bool compareWithZeroRef(uint32_t lhs, uint32_t rhs) const {
+ uint32_t lhs_size;
+ const uint32_t *lhs_buf;
+ getSizeAndBuf(lhs, lhs_size, lhs_buf);
+ uint32_t rhs_size;
+ const uint32_t *rhs_buf;
+ getSizeAndBuf(rhs, rhs_size, rhs_buf);
+
+ if (lhs_size != rhs_size) {
+ return lhs_size < rhs_size;
+ }
+ for (uint32_t i = 0; i < lhs_size; ++i) {
+ if (lhs_buf[i] != rhs_buf[i]) {
+ return lhs_buf[i] < rhs_buf[i];
+ }
+ }
+ return false;
+ }
+
+ bool operator() (uint32_t lhs, uint32_t rhs) const {
+ if (!lhs || !rhs) {
+ return compareWithZeroRef(lhs, rhs);
+ }
+ uint32_t lhs_size = lhs >> SIZE_SHIFT;
+ uint32_t rhs_size = rhs >> SIZE_SHIFT;
+ if (lhs_size != rhs_size) {
+ return lhs_size < rhs_size;
+ }
+ if (lhs == rhs) {
+ return false;
+ }
+ const uint32_t *lhs_buf = _store.getBuffer(lhs & DATA_REF_MASK);
+ const uint32_t *rhs_buf = _store.getBuffer(rhs & DATA_REF_MASK);
+ uint32_t size = lhs_size;
+ if (lhs_size == MAX_SIZE) {
+ size = lhs_buf[0] + 1; // Compare sizes and data in loop
+ // below. If actual size differs
+ // then loop will exit in first
+ // iteration.
+ }
+ for (uint32_t i = 0; i < size; ++i) {
+ if (lhs_buf[i] != rhs_buf[i]) {
+ return lhs_buf[i] < rhs_buf[i];
+ }
+ }
+ return false;
+ }
+};
+
+/**
+ * Holds a set of refs and a reference to a datastore that is used to
+ * lookup data based on the "data_ref"-part of the ref. Each ref also
+ * uses the upper bits to hold the size of the data refered to. If the
+ * size is too large to represent by the allocated bits, the max size
+ * is used, and the actual size is stored in the first 32-bit value of
+ * the data buffer.
+ *
+ * Note that this class is inherently single threaded, and thus needs
+ * external synchronization if used from multiple threads. (Both
+ * insert and find)
+ */
+template <typename BufferStore, int SIZE_BITS = 8>
+class PredicateRefCache {
+ typedef RefCacheComparator<BufferStore, SIZE_BITS> ComparatorType;
+
+ mutable CurrentZeroRef _current_zero_ref;
+ std::set<uint32_t, ComparatorType> _ref_cache;
+
+public:
+ enum { DATA_REF_BITS = 32 - SIZE_BITS,
+ DATA_REF_MASK = (1 << DATA_REF_BITS) - 1,
+ MAX_SIZE = (1 << SIZE_BITS) - 1,
+ SIZE_SHIFT = DATA_REF_BITS,
+ SIZE_MASK = MAX_SIZE << SIZE_SHIFT};
+
+ PredicateRefCache(const BufferStore &store)
+ : _ref_cache(ComparatorType(store, _current_zero_ref)) {
+ }
+
+ /**
+ * Inserts a ref into the cache. The ref refers to data already
+ * inserted in the underlying data store.
+ */
+ uint32_t insert(uint32_t ref) {
+ assert(ref);
+ return *_ref_cache.insert(ref).first;
+ }
+
+ /**
+ * Checks if a data sequence is already present in the
+ * cache. Returns the datastore ref, or 0 if not present.
+ */
+ uint32_t find(const uint32_t *buf, uint32_t size) const {
+ _current_zero_ref.set(buf, size);
+ auto it = _ref_cache.find(0);
+ if (it != _ref_cache.end()) {
+ return *it;
+ }
+ return 0;
+ }
+};
+
+} // namespace predicate
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.cpp
new file mode 100644
index 00000000000..21878d5ca9e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.cpp
@@ -0,0 +1,168 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/log/log.h>
+LOG_SETUP(".predicate_tree_analyzer");
+#include <vespa/fastos/fastos.h>
+
+#include "predicate_tree_analyzer.h"
+#include <vespa/document/predicate/predicate.h>
+#include <algorithm>
+#include <iostream>
+#include <cmath>
+
+using document::Predicate;
+using std::map;
+using std::min;
+using std::string;
+using vespalib::slime::Inspector;
+using vespalib::slime::Memory;
+
+namespace search {
+namespace predicate {
+namespace {
+long getType(const Inspector &in, bool negated) {
+ long type = in[Predicate::NODE_TYPE].asLong();
+ if (negated) {
+ if (type == Predicate::TYPE_CONJUNCTION) {
+ return Predicate::TYPE_DISJUNCTION;
+ } else if (type == Predicate::TYPE_DISJUNCTION) {
+ return Predicate::TYPE_CONJUNCTION;
+ }
+ }
+ return type;
+}
+
+void createOrIncrease(map<string, int> &counts, const string &key) {
+ auto it = counts.find(key);
+ if (it == counts.end()) {
+ counts.insert(make_pair(key, 1));
+ } else {
+ ++(it->second);
+ }
+}
+} // namespace
+
+void PredicateTreeAnalyzer::traverseTree(const Inspector &in) {
+ switch (getType(in, _negated)) {
+ case Predicate::TYPE_NEGATION:
+ assert(in[Predicate::CHILDREN].children() == 1);
+ _negated = !_negated;
+ traverseTree(in[Predicate::CHILDREN][0]);
+ _negated = !_negated;
+ return;
+ case Predicate::TYPE_CONJUNCTION: {
+ int crumb_size = _crumbs.size();
+ int size = 0;
+ for (size_t i = 0; i < in[Predicate::CHILDREN].children(); ++i) {
+ _crumbs.setChild(i, 'a');
+ traverseTree(in[Predicate::CHILDREN][i]);
+ size += _size;
+ _size_map.insert(make_pair(_crumbs.getCrumb(), _size));
+ _crumbs.resize(crumb_size);
+ }
+ _size = size;
+ return;
+ }
+ case Predicate::TYPE_DISJUNCTION: {
+ int crumb_size = _crumbs.size();
+ int size = 0;
+ for (size_t i = 0; i < in[Predicate::CHILDREN].children(); ++i) {
+ _crumbs.setChild(i, 'o');
+ traverseTree(in[Predicate::CHILDREN][i]);
+ size += _size;
+ _crumbs.resize(crumb_size);
+ }
+ _size = size;
+ return;
+ }
+ case Predicate::TYPE_FEATURE_SET:
+ if (_negated) {
+ _size = 2;
+ _has_not = true;
+ } else {
+ _size = 1;
+ Memory label_mem = in[Predicate::KEY].asString();
+ string label(label_mem.data, label_mem.size);
+ label.push_back('=');
+ const size_t prefix_size = label.size();
+ for (size_t i = 0; i < in[Predicate::SET].children(); ++i) {
+ Memory value = in[Predicate::SET][i].asString();
+ label.resize(prefix_size);
+ label.append(value.data, value.size);
+ createOrIncrease(_key_counts, label);
+ }
+ }
+ return;
+ case Predicate::TYPE_FEATURE_RANGE: {
+ if (_negated) {
+ _size = 2;
+ _has_not = true;
+ } else {
+ _size = 1;
+ string key = in[Predicate::KEY].asString().make_string();
+ createOrIncrease(_key_counts, key);
+ }
+ }
+ } // switch
+}
+
+float PredicateTreeAnalyzer::findMinFeature(const Inspector &in) {
+ float min_feature = 0.0f;
+ switch (getType(in, _negated)) {
+ case Predicate::TYPE_CONJUNCTION: // sum of children
+ for (size_t i = 0; i < in[Predicate::CHILDREN].children(); ++i) {
+ min_feature += findMinFeature(in[Predicate::CHILDREN][i]);
+ }
+ return min_feature;
+ case Predicate::TYPE_DISJUNCTION: // min of children
+ min_feature = findMinFeature(in[Predicate::CHILDREN][0]);
+ for (size_t i = 1; i < in[Predicate::CHILDREN].children(); ++i) {
+ min_feature = min(min_feature,
+ findMinFeature(in[Predicate::CHILDREN][i]));
+ }
+ return min_feature;
+ case Predicate::TYPE_NEGATION: // == child
+ assert(in[Predicate::CHILDREN].children() == 1);
+ _negated = !_negated;
+ min_feature = findMinFeature(in[Predicate::CHILDREN][0]);
+ _negated = !_negated;
+ return min_feature;
+ case Predicate::TYPE_FEATURE_SET: {
+ if (_negated) {
+ return 0.0f;
+ }
+ Memory label_mem = in[Predicate::KEY].asString();
+ string label(label_mem.data, label_mem.size);
+ label.push_back('=');
+ const size_t prefix_size = label.size();
+ min_feature = 1.0f;
+ for (size_t i = 0; i < in[Predicate::SET].children(); ++i) {
+ Memory value = in[Predicate::SET][i].asString();
+ label.resize(prefix_size);
+ label.append(value.data, value.size);
+ auto it = _key_counts.find(label);
+ assert(it != _key_counts.end());
+ min_feature = min(min_feature, 1.0f / it->second);
+ }
+ return min_feature;
+ }
+ case Predicate::TYPE_FEATURE_RANGE: {
+ if (_negated) {
+ return 0.0f;
+ }
+ string key = in[Predicate::KEY].asString().make_string();
+ auto it = _key_counts.find(key);
+ assert(it != _key_counts.end());
+ return 1.0f / it->second;
+ }
+ } // switch
+ return 0.0f;
+}
+
+PredicateTreeAnalyzer::PredicateTreeAnalyzer(const Inspector &in) : _has_not(false), _negated(false) {
+ traverseTree(in);
+ _min_feature = static_cast<int>(ceilf(findMinFeature(in)) + (_has_not? 1.0 : 0.0));
+}
+
+} // namespace predicate
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.h b/searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.h
new file mode 100644
index 00000000000..35e91db718c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.h
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "tree_crumbs.h"
+#include <vespa/vespalib/data/slime/slime.h>
+#include <map>
+#include <string>
+
+namespace search {
+namespace predicate {
+
+/**
+ * Analyzes a predicate tree, in the form of a slime object, to find
+ * the value for min_feature (the minimum number of features required
+ * to find a match), and a map of sizes that is used when assigning
+ * intervals.
+ */
+class PredicateTreeAnalyzer {
+ std::map<std::string, int> _key_counts;
+ std::map<std::string, int> _size_map;
+ int _min_feature;
+ bool _has_not;
+
+ bool _negated;
+ TreeCrumbs _crumbs;
+ int _size;
+
+ // Fills _key_counts, _size_map, and _has_not.
+ void traverseTree(const vespalib::slime::Inspector &in);
+ float findMinFeature(const vespalib::slime::Inspector &in);
+
+public:
+ PredicateTreeAnalyzer(const vespalib::slime::Inspector &in);
+
+ int getMinFeature() const { return _min_feature; }
+ int getSize() const { return _size; }
+ const std::map<std::string, int> &getSizeMap() const { return _size_map; }
+};
+
+} // namespace predicate
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.cpp
new file mode 100644
index 00000000000..c97f1f73848
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.cpp
@@ -0,0 +1,256 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/log/log.h>
+LOG_SETUP(".predicate_tree_annotator");
+#include <vespa/fastos/fastos.h>
+
+#include "predicate_tree_annotator.h"
+
+#include "predicate_index.h"
+#include "predicate_range_expander.h"
+#include "predicate_tree_analyzer.h"
+#include "tree_crumbs.h"
+#include <vespa/document/predicate/predicate.h>
+#include "predicate_hash.h"
+#include <vespa/vespalib/data/slime/slime.h>
+#include <map>
+#include <string>
+
+using document::Predicate;
+using std::map;
+using std::string;
+using vespalib::slime::Inspector;
+using vespalib::slime::Memory;
+
+namespace search {
+namespace predicate {
+
+using predicate::MIN_INTERVAL;
+using predicate::MAX_INTERVAL;
+
+namespace {
+
+class PredicateTreeAnnotatorImpl {
+ uint32_t _begin;
+ uint32_t _end;
+ uint32_t _left_weight;
+ PredicateTreeAnnotations &_result;
+ uint64_t _zStar_hash;
+ bool _negated;
+ bool _final_range_used;
+ const std::map<std::string, int> &_size_map;
+ TreeCrumbs _crumbs;
+ int64_t _lower_bound;
+ int64_t _upper_bound;
+ uint16_t _interval_range;
+
+
+ uint32_t makeMarker(uint32_t begin, uint32_t end) {
+ return (begin << 16) | end;
+ }
+ uint32_t getCEnd() {
+ if (!_final_range_used && _end == _interval_range) {
+ _final_range_used = true;
+ return _interval_range - 1;
+ }
+ return _left_weight + 1;
+ }
+ void addZstarIntervalIfNegated(uint32_t cEnd);
+
+public:
+ PredicateTreeAnnotatorImpl(const std::map<std::string, int> &size_map,
+ PredicateTreeAnnotations &result,
+ int64_t lower, int64_t upper, uint16_t interval_range);
+
+ void assignIntervalMarkers(const vespalib::slime::Inspector &in);
+};
+
+void PredicateTreeAnnotatorImpl::addZstarIntervalIfNegated(uint32_t cEnd) {
+ if (_negated) {
+ auto it = _result.interval_map.find(_zStar_hash);
+ if (it == _result.interval_map.end()) {
+ it = _result.interval_map.insert(make_pair(
+ _zStar_hash, std::vector<Interval>())).first;
+ _result.features.push_back(_zStar_hash);
+ }
+ auto &intervals = it->second;
+ intervals.push_back(Interval{ makeMarker(cEnd, _begin - 1) });
+ if (_end - cEnd != 1) {
+ intervals.push_back(Interval{ makeMarker(0, _end) });
+ }
+ _left_weight += 1;
+ }
+}
+
+PredicateTreeAnnotatorImpl::PredicateTreeAnnotatorImpl(
+ const map<string, int> &size_map,
+ PredicateTreeAnnotations &result,
+ int64_t lower_bound, int64_t upper_bound, uint16_t interval_range)
+ : _begin(MIN_INTERVAL),
+ _end(interval_range),
+ _left_weight(0),
+ _result(result),
+ _zStar_hash(PredicateIndex::z_star_compressed_hash),
+ _negated(false),
+ _final_range_used(false),
+ _size_map(size_map),
+ _crumbs(),
+ _lower_bound(lower_bound),
+ _upper_bound(upper_bound),
+ _interval_range(interval_range) {
+}
+
+long getType(const Inspector &in, bool negated) {
+ long type = in[Predicate::NODE_TYPE].asLong();
+ if (negated) {
+ if (type == Predicate::TYPE_CONJUNCTION) {
+ return Predicate::TYPE_DISJUNCTION;
+ } else if (type == Predicate::TYPE_DISJUNCTION) {
+ return Predicate::TYPE_CONJUNCTION;
+ }
+ }
+ return type;
+}
+
+void PredicateTreeAnnotatorImpl::assignIntervalMarkers(const Inspector &in) {
+ switch (getType(in, _negated)) {
+ case Predicate::TYPE_CONJUNCTION: {
+ int crumb_size = _crumbs.size();
+ uint32_t curr = _begin;
+ size_t child_count = in[Predicate::CHILDREN].children();
+ uint32_t begin = _begin;
+ uint32_t end = _end;
+ for (size_t i = 0; i < child_count; ++i) {
+ _crumbs.setChild(i, 'a');
+ if (i == child_count - 1) { // Last child (may also be the only?)
+ _begin = curr;
+ _end = end;
+ assignIntervalMarkers(in[Predicate::CHILDREN][i]);
+ // No need to update/touch curr
+ } else if (i == 0) { // First child
+ auto it = _size_map.find(_crumbs.getCrumb());
+ assert (it != _size_map.end());
+ uint32_t child_size = it->second;
+ uint32_t next = _left_weight + child_size + 1;
+ _begin = curr;
+ _end = next - 1;
+ assignIntervalMarkers(in[Predicate::CHILDREN][i]);
+ curr = next;
+ } else { // Middle children
+ auto it = _size_map.find(_crumbs.getCrumb());
+ assert (it != _size_map.end());
+ uint32_t child_size = it->second;
+ uint32_t next = curr + child_size;
+ _begin = curr;
+ _end = next - 1;
+ assignIntervalMarkers(in[Predicate::CHILDREN][i]);
+ curr = next;
+ }
+ _crumbs.resize(crumb_size);
+ }
+ _begin = begin;
+ break;
+ }
+ case Predicate::TYPE_DISJUNCTION: {
+ // All OR children will have the same {begin, end} values, and
+ // the values will be same as that of the parent OR node
+ int crumb_size = _crumbs.size();
+ for (size_t i = 0; i < in[Predicate::CHILDREN].children(); ++i) {
+ _crumbs.setChild(i, 'o');
+ assignIntervalMarkers(in[Predicate::CHILDREN][i]);
+ _crumbs.resize(crumb_size);
+ }
+ break;
+ }
+ case Predicate::TYPE_FEATURE_SET: {
+ uint32_t cEnd = _negated? getCEnd() : 0;
+ Memory label_mem = in[Predicate::KEY].asString();
+ string label(label_mem.data, label_mem.size);
+ label.push_back('=');
+ const size_t prefix_size = label.size();
+ for (size_t i = 0; i < in[Predicate::SET].children(); ++i) {
+ Memory value = in[Predicate::SET][i].asString();
+ label.resize(prefix_size);
+ label.append(value.data, value.size);
+ uint64_t hash = PredicateHash::hash64(label);
+ if (_result.interval_map.find(hash)
+ == _result.interval_map.end()) {
+ _result.features.push_back(hash);
+ }
+ _result.interval_map[hash].push_back(
+ { makeMarker(_begin, _negated? cEnd : _end) });
+ }
+ addZstarIntervalIfNegated(cEnd);
+ _left_weight += 1;
+ break;
+ }
+ case Predicate::TYPE_FEATURE_RANGE: {
+ uint32_t cEnd = _negated? getCEnd() : 0;
+ for (size_t i = 0; i < in[Predicate::HASHED_PARTITIONS].children();
+ ++i) {
+ uint64_t hash = in[Predicate::HASHED_PARTITIONS][i].asLong();
+ _result.interval_map[hash].push_back(
+ { makeMarker(_begin, _negated? cEnd : _end) });
+ }
+ const Inspector& in_hashed_edges =
+ in[Predicate::HASHED_EDGE_PARTITIONS];
+ for (size_t i = 0; i < in_hashed_edges.children(); ++i){
+ const Inspector& child = in_hashed_edges[i];
+ uint64_t hash = child[Predicate::HASH].asLong();
+ uint32_t payload = child[Predicate::PAYLOAD].asLong();
+ _result.bounds_map[hash].push_back(
+ { makeMarker(_begin, _negated? cEnd : _end), payload });
+ }
+ uint32_t hash_count = in[Predicate::HASHED_PARTITIONS].children() +
+ in_hashed_edges.children();
+ if (hash_count < 3) { // three features takes more space than
+ // one stored range.
+ for (size_t i = 0; i < in[Predicate::HASHED_PARTITIONS].children();
+ ++i) {
+ _result.features.push_back(in[Predicate::HASHED_PARTITIONS][i]
+ .asLong());
+ }
+ for (size_t i = 0; i < in_hashed_edges.children(); ++i) {
+ _result.features.push_back(in_hashed_edges[i].asLong());
+ }
+ } else {
+ bool has_min = in[Predicate::RANGE_MIN].valid();
+ bool has_max = in[Predicate::RANGE_MAX].valid();
+ _result.range_features.push_back(
+ {in[Predicate::KEY].asString(),
+ has_min? in[Predicate::RANGE_MIN].asLong() : _lower_bound,
+ has_max? in[Predicate::RANGE_MAX].asLong() : _upper_bound
+ });
+ }
+ addZstarIntervalIfNegated(cEnd);
+ _left_weight += 1;
+ break;
+ }
+ case Predicate::TYPE_NEGATION:
+ _negated = !_negated;
+ assignIntervalMarkers(in[Predicate::CHILDREN][0]);
+ _negated = !_negated;
+ break;
+ } // switch
+}
+} // namespace
+
+void PredicateTreeAnnotator::annotate(const Inspector &in,
+ PredicateTreeAnnotations &result,
+ int64_t lower, int64_t upper) {
+ PredicateTreeAnalyzer analyzer(in);
+ uint32_t min_feature = static_cast<uint32_t>(analyzer.getMinFeature());
+ // Size is as interval range (tree size is lower bound for interval range)
+ int size = analyzer.getSize();
+ assert(size <= UINT16_MAX && size > 0);
+ uint16_t interval_range = static_cast<uint16_t>(size);
+
+ PredicateTreeAnnotatorImpl
+ annotator(analyzer.getSizeMap(), result, lower, upper, interval_range);
+ annotator.assignIntervalMarkers(in);
+ result.min_feature = min_feature;
+ result.interval_range = interval_range;
+}
+
+} // namespace predicate
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.h b/searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.h
new file mode 100644
index 00000000000..d2146aa66a5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.h
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <unordered_map>
+#include <vespa/vespalib/data/slime/memory.h>
+#include <vespa/vespalib/stllike/string.h>
+#include "predicate_interval.h"
+
+namespace vespalib {
+namespace slime { class Inspector; }
+} // namespace vespalib;
+
+namespace search {
+namespace predicate {
+
+struct RangeFeature {
+ vespalib::slime::Memory label;
+ int64_t from;
+ int64_t to;
+};
+
+constexpr uint32_t MIN_INTERVAL = 0x0001;
+constexpr uint32_t MAX_INTERVAL = 0xffff;
+
+struct PredicateTreeAnnotations {
+ PredicateTreeAnnotations(uint32_t mf=0, uint16_t ir=MAX_INTERVAL)
+ : min_feature(mf), interval_range(ir) {}
+ uint32_t min_feature;
+ uint16_t interval_range;
+ std::unordered_map<uint64_t, std::vector<Interval>> interval_map;
+ std::unordered_map<uint64_t, std::vector<IntervalWithBounds>> bounds_map;
+
+ std::vector<uint64_t> features;
+ std::vector<RangeFeature> range_features;
+};
+
+/**
+ * Annotates a predicate document, represented by a slime object, with
+ * intervals used for matching with the interval algorithm.
+ */
+struct PredicateTreeAnnotator {
+ static void annotate(const vespalib::slime::Inspector &in,
+ PredicateTreeAnnotations &result,
+ int64_t lower_bound=LLONG_MIN,
+ int64_t upper_bound=LLONG_MAX);
+};
+
+} // namespace predicate
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.cpp
new file mode 100644
index 00000000000..ca08d65b82f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.cpp
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/log/log.h>
+LOG_SETUP(".predicate_zero_constraint_posting_list");
+#include <vespa/fastos/fastos.h>
+
+#include "predicate_zero_constraint_posting_list.h"
+
+namespace search {
+namespace predicate {
+
+PredicateZeroConstraintPostingList::PredicateZeroConstraintPostingList(Iterator it)
+ : _iterator(it) {}
+
+bool PredicateZeroConstraintPostingList::next(uint32_t doc_id) {
+ if (_iterator.valid() && _iterator.getKey() <= doc_id) {
+ _iterator.linearSeek(doc_id + 1);
+ }
+ if (!_iterator.valid()) {
+ return false;
+ }
+ setDocId(_iterator.getKey());
+ return true;
+}
+
+} // namespace search::predicate
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.h b/searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.h
new file mode 100644
index 00000000000..428901823c2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.h
@@ -0,0 +1,28 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "predicate_posting_list.h"
+#include "predicate_index.h"
+
+namespace search {
+namespace predicate {
+
+/**
+ * PredicatePostingList implementation for zero constraint documents
+ * from PredicateIndex.
+ */
+class PredicateZeroConstraintPostingList : public PredicatePostingList {
+ using Iterator = PredicateIndex::ZeroConstraintDocs::Iterator;
+ Iterator _iterator;
+
+public:
+ PredicateZeroConstraintPostingList(Iterator it);
+ bool next(uint32_t doc_id) override;
+ bool nextInterval() override { return false; }
+ VESPA_DLL_LOCAL uint32_t getInterval() const override { return 0x00010001; }
+};
+
+} // namespace predicate
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_zstar_compressed_posting_list.h b/searchlib/src/vespa/searchlib/predicate/predicate_zstar_compressed_posting_list.h
new file mode 100644
index 00000000000..5aaf02f9ded
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_zstar_compressed_posting_list.h
@@ -0,0 +1,89 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "predicate_posting_list.h"
+#include "predicate_index.h"
+
+namespace search {
+namespace predicate {
+
+/**
+ * PredicatePostingList implementation for zstar iterators from
+ * PredicateIndex.
+ */
+template <typename Iterator>
+class PredicateZstarCompressedPostingList : public PredicatePostingList {
+ const PredicateIntervalStore &_interval_store;
+ Iterator _iterator;
+ const Interval *_current_interval;
+ uint32_t _interval_count;
+ uint32_t _interval;
+ uint32_t _prev_interval;
+
+ void setInterval(uint32_t interval) { _interval = interval; }
+public:
+ PredicateZstarCompressedPostingList(const PredicateIntervalStore &store, Iterator it);
+ bool next(uint32_t doc_id) override;
+ bool nextInterval() override;
+ VESPA_DLL_LOCAL uint32_t getInterval() const override { return _interval; }
+};
+
+template <typename Iterator>
+PredicateZstarCompressedPostingList<Iterator>::PredicateZstarCompressedPostingList(
+ const PredicateIntervalStore &interval_store, Iterator it)
+ : _interval_store(interval_store),
+ _iterator(it),
+ _current_interval(0),
+ _interval_count(0),
+ _interval(0),
+ _prev_interval(0) {
+}
+
+template<typename Iterator>
+bool PredicateZstarCompressedPostingList<Iterator>::next(uint32_t doc_id) {
+ if (_iterator.valid() && _iterator.getKey() <= doc_id) {
+ _iterator.linearSeek(doc_id + 1);
+ }
+ if (!_iterator.valid()) {
+ return false;
+ }
+ Interval single_buf;
+ _current_interval =
+ _interval_store.get(_iterator.getData(), _interval_count, &single_buf);
+ setDocId(_iterator.getKey());
+ setInterval(_current_interval[0].interval);
+ _prev_interval = getInterval();
+ return true;
+}
+
+template<typename Iterator>
+bool PredicateZstarCompressedPostingList<Iterator>::nextInterval() {
+ uint32_t next_interval = UINT32_MAX;
+ if (_interval_count > 1) {
+ next_interval = _current_interval[1].interval;
+ }
+ if (_prev_interval) {
+ if ((next_interval & 0xffff0000) == 0) {
+ setInterval(_prev_interval >> 16 | next_interval << 16);
+ ++_current_interval;
+ --_interval_count;
+ } else {
+ uint32_t value = _prev_interval >> 16;
+ setInterval((value + 1) << 16 | value);
+ }
+ _prev_interval = 0;
+ return true;
+ } else if (next_interval != UINT32_MAX) {
+ ++_current_interval;
+ --_interval_count;
+ setInterval(next_interval);
+ _prev_interval = next_interval;
+ return true;
+ }
+ return false;
+}
+
+} // namespace predicate
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.cpp b/searchlib/src/vespa/searchlib/predicate/simple_index.cpp
new file mode 100644
index 00000000000..829423bdc86
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/simple_index.cpp
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/log/log.h>
+#include "simple_index.hpp"
+
+LOG_SETUP(".searchlib.simple_index");
+
+namespace search {
+namespace predicate {
+namespace simpleindex {
+
+bool log_enabled() {
+ return LOG_WOULD_LOG(debug);
+}
+
+void log_debug(vespalib::string &str) {
+ LOG(debug, str.c_str());
+}
+
+} // namespace simpleindex
+
+template class SimpleIndex<btree::EntryRef>;
+
+} // namespace predicate
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.h b/searchlib/src/vespa/searchlib/predicate/simple_index.h
new file mode 100644
index 00000000000..be6fc098682
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/simple_index.h
@@ -0,0 +1,261 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/common/rcuvector.h>
+#include <vespa/searchlib/btree/btree.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreenodestore.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btreestore.hpp>
+#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/macro.h>
+#include <vespa/vespalib/util/generationholder.h>
+#include <experimental/optional>
+
+namespace search {
+namespace predicate {
+
+
+template <typename Key = uint64_t, typename DocId = uint32_t>
+struct SimpleIndexDeserializeObserver {
+ virtual ~SimpleIndexDeserializeObserver() {}
+ virtual void notifyInsert(Key key, DocId docId, uint32_t k) = 0;
+};
+
+template <typename Posting>
+struct PostingSerializer {
+ virtual ~PostingSerializer() {}
+ virtual void serialize(const Posting &posting,
+ vespalib::MMapDataBuffer &buffer) const = 0;
+};
+
+template <typename Posting>
+struct PostingDeserializer {
+ virtual ~PostingDeserializer() {}
+ virtual Posting deserialize(vespalib::MMapDataBuffer &buffer) = 0;
+};
+
+struct DocIdLimitProvider {
+ virtual uint32_t getDocIdLimit() const = 0;
+ virtual uint32_t getCommittedDocIdLimit() const = 0;
+ virtual ~DocIdLimitProvider() {}
+};
+
+struct SimpleIndexConfig {
+ static constexpr double DEFAULT_UPPER_DOCID_FREQ_THRESHOLD = 0.40;
+ static constexpr double DEFAULT_LOWER_DOCID_FREQ_THRESHOLD =
+ 0.8 * DEFAULT_UPPER_DOCID_FREQ_THRESHOLD;
+ static constexpr size_t DEFAULT_UPPER_VECTOR_SIZE_THRESHOLD = 10000;
+ static constexpr size_t DEFAULT_LOWER_VECTOR_SIZE_THRESHOLD =
+ static_cast<size_t>(0.8 * DEFAULT_UPPER_VECTOR_SIZE_THRESHOLD);
+ static constexpr size_t DEFAULT_VECTOR_PRUNE_FREQUENCY = 20000;
+ static constexpr double DEFAULT_FOREACH_VECTOR_THRESHOLD = 0.25;
+
+ // Create vector posting list if doc frequency is above
+ double upper_docid_freq_threshold = DEFAULT_UPPER_DOCID_FREQ_THRESHOLD;
+ // Remove vector posting list if doc frequency is below
+ double lower_docid_freq_threshold = DEFAULT_LOWER_DOCID_FREQ_THRESHOLD;
+ // Threshold to create vector posting list
+ size_t upper_vector_size_threshold = DEFAULT_UPPER_VECTOR_SIZE_THRESHOLD;
+ // Threshold to remove vector posting list
+ size_t lower_vector_size_threshold = DEFAULT_LOWER_VECTOR_SIZE_THRESHOLD;
+ // How often to prune vector when add is called
+ size_t vector_prune_frequency = DEFAULT_VECTOR_PRUNE_FREQUENCY;
+ // Use vector posting list in foreach_frozen if doc frequency is above
+ double foreach_vector_threshold = DEFAULT_FOREACH_VECTOR_THRESHOLD;
+ // Grow strategy for the posting vectors
+ GrowStrategy grow_strategy = GrowStrategy();
+
+ SimpleIndexConfig() {}
+ SimpleIndexConfig(double upper_docid_freq_threshold_,
+ double lower_docid_freq_threshold_,
+ size_t upper_vector_size_threshold_,
+ size_t lower_vector_size_threshold_,
+ size_t vector_prune_frequency_,
+ double foreach_vector_threshold_,
+ GrowStrategy grow_strategy_)
+ : upper_docid_freq_threshold(upper_docid_freq_threshold_),
+ lower_docid_freq_threshold(lower_docid_freq_threshold_),
+ upper_vector_size_threshold(upper_vector_size_threshold_),
+ lower_vector_size_threshold(lower_vector_size_threshold_),
+ vector_prune_frequency(vector_prune_frequency_),
+ foreach_vector_threshold(foreach_vector_threshold_),
+ grow_strategy(grow_strategy_) {}
+ SimpleIndexConfig(double upper_docid_freq_threshold_, GrowStrategy grow_strategy_)
+ : upper_docid_freq_threshold(upper_docid_freq_threshold_),
+ lower_docid_freq_threshold(upper_docid_freq_threshold_ * 0.80),
+ grow_strategy(grow_strategy_) {}
+};
+
+template <typename Posting, typename Key, typename DocId>
+class PostingVectorIterator {
+ using PostingVector = attribute::RcuVectorBase<Posting>;
+
+ const Posting * const _vector;
+ const size_t _size;
+ size_t _pos;
+ Posting _data;
+
+public:
+ // Handle both move and copy construction
+ PostingVectorIterator(PostingVectorIterator&&) = default;
+ PostingVectorIterator& operator=(PostingVectorIterator&&) = default;
+ PostingVectorIterator(const PostingVectorIterator&) = default;
+ PostingVectorIterator& operator=(const PostingVectorIterator&) = default;
+
+ explicit PostingVectorIterator(const PostingVector & vector, size_t size) :
+ _vector(&vector[0]), _size(size) {
+ assert(_size <= vector.size());
+ linearSeek(1);
+ }
+
+ bool valid() const { return _pos < _size; }
+ DocId getKey() const { return _pos; }
+ Posting getData() const { return _data; }
+ void linearSeek(DocId doc_id) {
+ while (doc_id < _size) {
+ const Posting &p = _vector[doc_id];
+ if (p.valid()) {
+ _pos = doc_id;
+ _data = p;
+ return;
+ }
+ ++doc_id;
+ }
+ _pos = _size;
+ }
+ PostingVectorIterator & operator++() {
+ linearSeek(_pos + 1);
+ return *this;
+ }
+};
+
+/**
+ * SimpleIndex holds a dictionary of Keys and posting lists of DocIds
+ * with Posting information.
+ *
+ * Serialization / deserialization assumes that Key fits in 64 bits
+ * and DocId fits in 32 bits.
+ */
+template <typename Posting,
+ typename Key = uint64_t, typename DocId = uint32_t>
+class SimpleIndex {
+public:
+ using Dictionary = btree::BTree<Key, btree::EntryRef, btree::NoAggregated>;
+ using DictionaryIterator = typename Dictionary::ConstIterator;
+ using BTreeStore = btree::BTreeStore<
+ DocId, Posting, btree::NoAggregated, std::less<DocId>, btree::BTreeDefaultTraits>;
+ using BTreeIterator = typename BTreeStore::ConstIterator;
+ using PostingVector = attribute::RcuVectorBase<Posting>;
+ using VectorStore = btree::BTree<Key, std::shared_ptr<PostingVector>, btree::NoAggregated>;
+ using VectorIterator = PostingVectorIterator<Posting, Key, DocId>;
+
+private:
+ using GenerationHolder = vespalib::GenerationHolder;
+ using generation_t = vespalib::GenerationHandler::generation_t;
+ template <typename T>
+ using optional = std::experimental::optional<T>;
+
+ Dictionary _dictionary;
+ BTreeStore _btree_posting_lists;
+ VectorStore _vector_posting_lists;
+ GenerationHolder &_generation_holder;
+ uint32_t _insert_remove_counter = 0;
+ const SimpleIndexConfig _config;
+ const DocIdLimitProvider &_limit_provider;
+
+ void insertIntoPosting(btree::EntryRef &ref, Key key, DocId doc_id, const Posting &posting);
+ void insertIntoVectorPosting(btree::EntryRef ref, Key key, DocId doc_id, const Posting &posting);
+ void removeFromVectorPostingList(btree::EntryRef ref, Key key, DocId doc_id);
+ void pruneBelowThresholdVectors();
+ void createVectorIfOverThreshold(btree::EntryRef ref, Key key);
+ bool removeVectorIfBelowThreshold(btree::EntryRef ref, typename VectorStore::Iterator &it);
+
+ void logVector(const char *action, Key key, size_t document_count,
+ double ratio, size_t vector_length) const;
+ double getDocumentRatio(size_t document_count, uint32_t doc_id_limit) const;
+ size_t getDocumentCount(btree::EntryRef ref) const;
+ bool shouldCreateVectorPosting(size_t size, double ratio) const;
+ bool shouldRemoveVectorPosting(size_t size, double ratio) const;
+ size_t getVectorPostingSize(const PostingVector &vector) const {
+ return std::min(vector.size(),
+ static_cast<size_t>(_limit_provider.getCommittedDocIdLimit()));
+ }
+
+public:
+ SimpleIndex(GenerationHolder &generation_holder, const DocIdLimitProvider &provider) :
+ SimpleIndex(generation_holder, provider, SimpleIndexConfig()) {}
+ SimpleIndex(GenerationHolder &generation_holder,
+ const DocIdLimitProvider &provider, const SimpleIndexConfig &config)
+ : _generation_holder(generation_holder), _config(config), _limit_provider(provider) {}
+ ~SimpleIndex();
+
+ void serialize(vespalib::MMapDataBuffer &buffer,
+ const PostingSerializer<Posting> &serializer) const;
+ void deserialize(vespalib::MMapDataBuffer &buffer,
+ PostingDeserializer<Posting> &deserializer,
+ SimpleIndexDeserializeObserver<Key, DocId> &observer, uint32_t version);
+
+ void addPosting(Key key, DocId doc_id, const Posting &posting);
+ std::pair<Posting, bool> removeFromPostingList(Key key, DocId doc_id);
+ // Call promoteOverThresholdVectors() after deserializing a SimpleIndex
+ // (and after doc id limits values are determined) to promote posting lists to vectors.
+ void promoteOverThresholdVectors();
+ void commit();
+ void trimHoldLists(generation_t used_generation);
+ void transferHoldLists(generation_t generation);
+ MemoryUsage getMemoryUsage() const;
+ template <typename FunctionType>
+ void foreach_frozen_key(btree::EntryRef ref, Key key, FunctionType func) const;
+
+ DictionaryIterator lookup(Key key) const {
+ return _dictionary.getFrozenView().find(key);
+ }
+
+ size_t getPostingListSize(btree::EntryRef ref) const {
+ return _btree_posting_lists.frozenSize(ref);
+ }
+
+ BTreeIterator getBTreePostingList(btree::EntryRef ref) const {
+ return _btree_posting_lists.beginFrozen(ref);
+ }
+
+ optional<VectorIterator> getVectorPostingList(Key key) const {
+ auto it = _vector_posting_lists.getFrozenView().find(key);
+ if (it.valid()) {
+ auto &vector = *it.getData();
+ size_t size = getVectorPostingSize(vector);
+ return optional<VectorIterator>(VectorIterator(vector, size));
+ }
+ return optional<VectorIterator>();
+
+ }
+};
+
+template<typename Posting, typename Key, typename DocId>
+template<typename FunctionType>
+void SimpleIndex<Posting, Key, DocId>::foreach_frozen_key(
+ btree::EntryRef ref, Key key, FunctionType func) const {
+ auto it = _vector_posting_lists.getFrozenView().find(key);
+ double ratio = getDocumentRatio(getDocumentCount(ref), _limit_provider.getDocIdLimit());
+ if (it.valid() && ratio > _config.foreach_vector_threshold) {
+ auto &vector = *it.getData();
+ size_t size = getVectorPostingSize(vector);
+ for (DocId doc_id = 1; doc_id < size; ++doc_id) {
+ if (vector[doc_id].valid()) {
+ func(doc_id);
+ }
+ }
+ } else {
+ _btree_posting_lists.foreach_frozen_key(ref, func);
+ }
+}
+
+} // namespace predicate
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.hpp b/searchlib/src/vespa/searchlib/predicate/simple_index.hpp
new file mode 100644
index 00000000000..10ba3e79a02
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/simple_index.hpp
@@ -0,0 +1,315 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "simple_index.h"
+
+namespace search {
+namespace predicate {
+
+namespace simpleindex {
+ bool log_enabled();
+ void log_debug(vespalib::string &str);
+}
+
+template <typename Posting, typename Key, typename DocId>
+void SimpleIndex<Posting, Key, DocId>::insertIntoPosting(
+ btree::EntryRef &ref, Key key, DocId doc_id, const Posting &posting) {
+ bool ok = _btree_posting_lists.insert(ref, doc_id, posting);
+ if (!ok) {
+ _btree_posting_lists.remove(ref, doc_id);
+ ok = _btree_posting_lists.insert(ref, doc_id, posting);
+ }
+ assert(ok);
+ insertIntoVectorPosting(ref, key, doc_id, posting);
+ pruneBelowThresholdVectors();
+}
+
+template <typename Posting, typename Key, typename DocId>
+void SimpleIndex<Posting, Key, DocId>::insertIntoVectorPosting(
+ btree::EntryRef ref, Key key, DocId doc_id, const Posting &posting) {
+ assert(doc_id < _limit_provider.getDocIdLimit());
+ auto it = _vector_posting_lists.find(key);
+ if (it.valid()) {
+ auto &vector = *it.getData();
+ vector.ensure_size(doc_id + 1);
+ vector[doc_id] = posting;
+ } else {
+ createVectorIfOverThreshold(ref, key);
+ }
+};
+
+template <typename Posting, typename Key, typename DocId>
+SimpleIndex<Posting, Key, DocId>::~SimpleIndex() {
+ _btree_posting_lists.disableFreeLists();
+ _btree_posting_lists.disableElemHoldList();
+
+ for (auto it = _dictionary.begin(); it.valid(); ++it) {
+ btree::EntryRef ref(it.getData());
+ if (ref.valid()) {
+ _btree_posting_lists.clear(ref);
+ }
+ }
+
+ _vector_posting_lists.disableFreeLists();
+ _vector_posting_lists.disableElemHoldList();
+ _vector_posting_lists.clear();
+ _vector_posting_lists.getAllocator().freeze();
+ _vector_posting_lists.getAllocator().clearHoldLists();
+
+ _dictionary.disableFreeLists();
+ _dictionary.disableElemHoldList();
+ _dictionary.clear();
+ _dictionary.getAllocator().freeze();
+ _dictionary.getAllocator().clearHoldLists();
+
+ _btree_posting_lists.clearBuilder();
+ _btree_posting_lists.freeze();
+ _btree_posting_lists.clearHoldLists();
+}
+
+template <typename Posting, typename Key, typename DocId>
+void SimpleIndex<Posting, Key, DocId>::serialize(
+ vespalib::MMapDataBuffer &buffer,
+ const PostingSerializer<Posting> &serializer) const {
+ assert(sizeof(Key) <= sizeof(uint64_t));
+ assert(sizeof(DocId) <= sizeof(uint32_t));
+ buffer.writeInt32(_dictionary.size());
+ for (auto it = _dictionary.begin(); it.valid(); ++it) {
+ btree::EntryRef ref = it.getData();
+ buffer.writeInt32(_btree_posting_lists.size(ref)); // 0 if !valid()
+ auto posting_it = _btree_posting_lists.begin(ref);
+ if (!posting_it.valid())
+ continue;
+ buffer.writeInt64(it.getKey()); // Key
+ for (; posting_it.valid(); ++posting_it) {
+ buffer.writeInt32(posting_it.getKey()); // DocId
+ serializer.serialize(posting_it.getData(), buffer);
+ }
+ }
+}
+
+template <typename Posting, typename Key, typename DocId>
+void SimpleIndex<Posting, Key, DocId>::deserialize(
+ vespalib::MMapDataBuffer &buffer,
+ PostingDeserializer<Posting> &deserializer,
+ SimpleIndexDeserializeObserver<Key, DocId> &observer, uint32_t version) {
+ typename Dictionary::Builder builder(_dictionary.getAllocator());
+ uint32_t size = buffer.readInt32();
+ std::vector<btree::BTreeKeyData<DocId, Posting>> postings;
+ for (size_t i = 0; i < size; ++i) {
+ uint32_t posting_size = buffer.readInt32();
+ if (!posting_size)
+ continue;
+ postings.clear();
+ Key key = buffer.readInt64();
+ for (size_t j = 0; j < posting_size; ++j) {
+ DocId doc_id;
+ if (version == 0) {
+ DocId raw_id = buffer.readInt32();
+ doc_id = raw_id >> 6;
+ uint8_t k = static_cast<uint8_t>(raw_id & 0x3f);
+ uint8_t min_feature = k == 0 ? k : k + 1;
+ observer.notifyInsert(key, doc_id, min_feature);
+ } else {
+ doc_id = buffer.readInt32();
+ // min-feature is stored in separate data structure for version > 0
+ observer.notifyInsert(key, doc_id, 0);
+ }
+ postings.emplace_back(doc_id, deserializer.deserialize(buffer));
+ }
+ btree::EntryRef ref;
+ _btree_posting_lists.apply(ref, &postings[0], &postings[postings.size()],
+ 0, 0);
+ builder.insert(key, ref);
+ }
+ _dictionary.assign(builder);
+ commit();
+}
+
+template <typename Posting, typename Key, typename DocId>
+void SimpleIndex<Posting, Key, DocId>::addPosting(Key key, DocId doc_id,
+ const Posting &posting) {
+ auto iter = _dictionary.find(key);
+ btree::EntryRef ref;
+ if (iter.valid()) {
+ ref = iter.getData();
+ insertIntoPosting(ref, key, doc_id, posting);
+ if (ref != iter.getData()) {
+ std::atomic_thread_fence(std::memory_order_release);
+ iter.writeData(ref);
+ }
+ } else {
+ insertIntoPosting(ref, key, doc_id, posting);
+ _dictionary.insert(key, ref);
+ }
+}
+
+template <typename Posting, typename Key, typename DocId>
+std::pair<Posting, bool>
+SimpleIndex<Posting, Key, DocId>::removeFromPostingList(Key key, DocId doc_id) {
+ auto dict_it = _dictionary.find(key);
+ if (!dict_it.valid()) {
+ return std::make_pair(Posting(), false);
+ }
+ auto ref = dict_it.getData();
+ assert(ref.valid());
+ auto posting_it = _btree_posting_lists.begin(ref);
+ assert(posting_it.valid());
+
+ if (posting_it.getKey() < doc_id) {
+ posting_it.binarySeek(doc_id);
+ }
+ if (!posting_it.valid() || posting_it.getKey() != doc_id) {
+ return std::make_pair(Posting(), false);
+ }
+
+ Posting posting = posting_it.getData();
+ btree::EntryRef original_ref(ref);
+ _btree_posting_lists.remove(ref, doc_id);
+ removeFromVectorPostingList(ref, key, doc_id);
+ if (!ref.valid()) { // last posting was removed
+ _dictionary.remove(key);
+ } else if (ref != original_ref) { // ref changed. update dictionary.
+ std::atomic_thread_fence(std::memory_order_release);
+ dict_it.writeData(ref);
+ }
+ return std::make_pair(posting, true);
+}
+
+template <typename Posting, typename Key, typename DocId>
+void SimpleIndex<Posting, Key, DocId>::removeFromVectorPostingList(
+ btree::EntryRef ref, Key key, DocId doc_id) {
+ auto it = _vector_posting_lists.find(key);
+ if (it.valid()) {
+ if (!removeVectorIfBelowThreshold(ref, it)) {
+ (*it.getData())[doc_id] = Posting();
+ }
+ }
+};
+
+template <typename Posting, typename Key, typename DocId>
+void SimpleIndex<Posting, Key, DocId>::pruneBelowThresholdVectors() {
+ // Check if it is time to prune any vector postings
+ if (++_insert_remove_counter % _config.vector_prune_frequency > 0) return;
+
+ for (auto posting_it = _vector_posting_lists.begin(); posting_it.valid();) {
+ Key key = posting_it.getKey();
+ auto dict_it = _dictionary.find(key);
+ assert(dict_it.valid());
+ if (!removeVectorIfBelowThreshold(dict_it.getData(), posting_it)) {
+ ++posting_it;
+ }
+ }
+};
+
+template <typename Posting, typename Key, typename DocId>
+void SimpleIndex<Posting, Key, DocId>::promoteOverThresholdVectors() {
+ for (auto it = _dictionary.begin(); it.valid(); ++it) {
+ Key key = it.getKey();
+ if (!_vector_posting_lists.find(key).valid()) {
+ createVectorIfOverThreshold(it.getData(), key);
+ }
+ }
+}
+
+template <typename Posting, typename Key, typename DocId>
+void SimpleIndex<Posting, Key, DocId>::logVector(
+ const char *action, Key key, size_t document_count, double ratio, size_t vector_length) const {
+ if (!simpleindex::log_enabled()) return;
+ auto msg = vespalib::make_string(
+ "%s vector for key '%016" PRIx64 "' with length %zu. Contains %zu documents "
+ "(doc id limit %" PRIu32", committed doc id limit %" PRIu32 ", ratio %f, "
+ "vector count %zu)",
+ action, key, vector_length, document_count, _limit_provider.getDocIdLimit(),
+ _limit_provider.getCommittedDocIdLimit(), ratio, _vector_posting_lists.size());
+ simpleindex::log_debug(msg);
+}
+
+template <typename Posting, typename Key, typename DocId>
+void SimpleIndex<Posting, Key, DocId>::createVectorIfOverThreshold(btree::EntryRef ref, Key key) {
+ uint32_t doc_id_limit = _limit_provider.getDocIdLimit();
+ size_t size = getDocumentCount(ref);
+ double ratio = getDocumentRatio(size, doc_id_limit);
+ if (shouldCreateVectorPosting(size, ratio)) {
+ auto vector = new attribute::RcuVectorBase<Posting>(_config.grow_strategy, _generation_holder);
+ vector->unsafe_resize(doc_id_limit);
+ _btree_posting_lists.foreach_unfrozen(
+ ref, [&](DocId d, const Posting &p) { (*vector)[d] = p; });
+ _vector_posting_lists.insert(key, std::shared_ptr<PostingVector>(vector));
+ logVector("Created", key, size, ratio, vector->size());
+ }
+}
+
+template <typename Posting, typename Key, typename DocId>
+bool SimpleIndex<Posting, Key, DocId>::removeVectorIfBelowThreshold(
+ btree::EntryRef ref, typename VectorStore::Iterator &it) {
+ size_t size = getDocumentCount(ref);
+ double ratio = getDocumentRatio(size, _limit_provider.getDocIdLimit());
+ if (shouldRemoveVectorPosting(size, ratio)) {
+ Key key = it.getKey();
+ size_t vector_length = it.getData()->size();
+ _vector_posting_lists.remove(it);
+ logVector("Removed", key, size, ratio, vector_length);
+ return true;
+ }
+ return false;
+}
+
+template <typename Posting, typename Key, typename DocId>
+double SimpleIndex<Posting, Key, DocId>::getDocumentRatio(size_t document_count,
+ uint32_t doc_id_limit) const {
+ assert(doc_id_limit > 1);
+ return document_count / static_cast<double>(doc_id_limit - 1);
+};
+
+template <typename Posting, typename Key, typename DocId>
+size_t SimpleIndex<Posting, Key, DocId>::getDocumentCount(btree::EntryRef ref) const {
+ return _btree_posting_lists.size(ref);
+};
+
+template <typename Posting, typename Key, typename DocId>
+bool SimpleIndex<Posting, Key, DocId>::shouldRemoveVectorPosting(size_t size, double ratio) const {
+ return size < _config.lower_vector_size_threshold || ratio < _config.lower_docid_freq_threshold;
+};
+
+template <typename Posting, typename Key, typename DocId>
+bool SimpleIndex<Posting, Key, DocId>::shouldCreateVectorPosting(size_t size, double ratio) const {
+ return size >= _config.upper_vector_size_threshold && ratio >= _config.upper_docid_freq_threshold;
+};
+
+template <typename Posting, typename Key, typename DocId>
+void SimpleIndex<Posting, Key, DocId>::commit() {
+ _dictionary.getAllocator().freeze();
+ _btree_posting_lists.freeze();
+ _vector_posting_lists.getAllocator().freeze();
+}
+
+template <typename Posting, typename Key, typename DocId>
+void SimpleIndex<Posting, Key, DocId>::trimHoldLists(generation_t used_generation) {
+ _btree_posting_lists.trimHoldLists(used_generation);
+ _dictionary.getAllocator().trimHoldLists(used_generation);
+ _vector_posting_lists.getAllocator().trimHoldLists(used_generation);
+
+}
+
+template <typename Posting, typename Key, typename DocId>
+void SimpleIndex<Posting, Key, DocId>::transferHoldLists(generation_t generation) {
+ _dictionary.getAllocator().transferHoldLists(generation);
+ _btree_posting_lists.transferHoldLists(generation);
+ _vector_posting_lists.getAllocator().transferHoldLists(generation);
+}
+
+template <typename Posting, typename Key, typename DocId>
+MemoryUsage SimpleIndex<Posting, Key, DocId>::getMemoryUsage() const {
+ MemoryUsage combined;
+ combined.merge(_dictionary.getMemoryUsage());
+ combined.merge(_btree_posting_lists.getMemoryUsage());
+ combined.merge(_vector_posting_lists.getMemoryUsage());
+ for (auto it = _vector_posting_lists.begin(); it.valid(); ++it) {
+ combined.merge(it.getData()->getMemoryUsage());
+ }
+ return combined;
+};
+
+} // namespace predicate
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/predicate/tree_crumbs.h b/searchlib/src/vespa/searchlib/predicate/tree_crumbs.h
new file mode 100644
index 00000000000..ee0cd638bb1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/predicate/tree_crumbs.h
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+namespace search {
+namespace predicate {
+
+/**
+ * Builds a path from the root of a tree, to be able to describe a
+ * given position in the tree.
+ */
+class TreeCrumbs {
+ std::vector<char> _buffer;
+
+public:
+ void setChild(size_t number, char delimiter = ':') {
+ _buffer.push_back(delimiter);
+ char buf[10];
+ int i = 0;
+ while (number > 0) {
+ buf[i++] = (number % 10) + '0';
+ number /= 10;
+ }
+ if (i == 0) {
+ _buffer.push_back('0');
+ }
+ while (i > 0) {
+ _buffer.push_back(buf[--i]);
+ }
+ }
+ void resize(size_t i) { _buffer.resize(i); }
+
+ size_t size() const { return _buffer.size(); }
+ std::string getCrumb() const {
+ return std::string(&_buffer[0], _buffer.size());
+ }
+};
+
+} // namespace predicate
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/.gitignore b/searchlib/src/vespa/searchlib/query/.gitignore
new file mode 100644
index 00000000000..ee8938b6bf4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/.gitignore
@@ -0,0 +1,6 @@
+*.So
+*.exe
+*.ilk
+*.pdb
+.depend*
+Makefile
diff --git a/searchlib/src/vespa/searchlib/query/CMakeLists.txt b/searchlib/src/vespa/searchlib/query/CMakeLists.txt
new file mode 100644
index 00000000000..50aca60fc1c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/CMakeLists.txt
@@ -0,0 +1,12 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_query
+ SOURCES
+ queryterm.cpp
+ querynode.cpp
+ base.cpp
+ query.cpp
+ querynoderesultbase.cpp
+ $<TARGET_OBJECTS:searchlib_tree>
+ INSTALL lib64
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/query/OWNERS b/searchlib/src/vespa/searchlib/query/OWNERS
new file mode 100644
index 00000000000..1037590124e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/OWNERS
@@ -0,0 +1 @@
+balder
diff --git a/searchlib/src/vespa/searchlib/query/base.cpp b/searchlib/src/vespa/searchlib/query/base.cpp
new file mode 100644
index 00000000000..e149b7d4d2c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/base.cpp
@@ -0,0 +1,16 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/query/base.h>
+
+namespace search {
+
+Object::~Object(void)
+{
+}
+
+vespalib::string Object::toString() const
+{
+ return vespalib::string("");
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/base.h b/searchlib/src/vespa/searchlib/query/base.h
new file mode 100644
index 00000000000..ba066b5410d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/base.h
@@ -0,0 +1,141 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/fastos/thread.h>
+#include <vespa/fastos/time.h>
+#include <vespa/fastlib/text/unicodeutil.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vector>
+#include <queue>
+#include <functional>
+#include <algorithm>
+#include <map>
+#include <vespa/vespalib/util/sync.h>
+
+namespace search
+{
+
+/// Type of general unsigned 8 bit data.
+typedef unsigned char byte;
+/// A simple container for the raw querystack.
+typedef vespalib::stringref QueryPacketT;
+/// The type of the local documentId.
+typedef unsigned DocumentIdT;
+/// This is the type of the CollectionId used in the StorageAPI.
+typedef uint64_t CollectionIdT;
+/// The type to identify a query.
+typedef unsigned QueryIdT;
+/// The rank type.
+typedef unsigned RankT;
+/// How time type. Used to represent seconds since 1970.
+typedef unsigned TimeT;
+/// Type to identify performance counters.
+typedef uint64_t CounterT;
+/// Type to identify performance values.
+typedef int ValueT;
+/// This is a 16 byte vector used in SSE2 integer operations.
+typedef char v16qi __attribute__ ((__vector_size__(16)));
+/// This is a 2 element uint64_t vector used in SSE2 integer operations.
+typedef long long v2di __attribute__ ((__vector_size__(16)));
+/// A type to represent a list of strings.
+typedef std::vector<vespalib::string> StringListT;
+/// A type to represent a vector of 32 bit signed integers.
+typedef std::vector<int32_t> Int32ListT;
+/// A type to represent a list of document ids.
+typedef std::vector<DocumentIdT> DocumentIdList;
+
+/// A debug macro the does "a" when l & the mask is true. The mask is set per file.
+#define DEBUG(l, a) { if (l&DEBUGMASK) {a;} }
+#ifdef __USE_RAWDEBUG__
+ #define RAWDEBUG(a) a
+#else
+ #define RAWDEBUG(a)
+#endif
+/// A macro avoid warnings for unused parameters.
+#define UNUSED_PARAM(p)
+/// A macro that gives you number of elements in an array.
+#define NELEMS(a) (sizeof(a)/sizeof(a[0]))
+
+/// A macro used in descendants of Object to instantiate the duplicate method.
+#define DUPLICATE(a) virtual a * duplicate() const;
+#define IMPLEMENT_DUPLICATE(a) a * a::duplicate() const { return new a(*this); }
+
+/**
+ This is a base class that ensures that all descendants can be duplicated.
+ This implies also that they have a copy constructor.
+ It also makes them streamable to an std:ostream.
+*/
+class Object
+{
+ public:
+ virtual ~Object(void);
+ /// Returns an allocated(new) object that is identical to this one.
+ virtual Object * duplicate() const = 0;
+ /// Gives you streamability of the object. Object does nothing.
+ virtual vespalib::string toString() const;
+};
+
+/**
+ This is a template that can hold any objects of any descendants of T.
+ It does take a copy of the object. Very nice for holding different descendants
+ and not have to worry about what happens on copy, assignment, destruction.
+ No references, just simple copy.
+ It gives you the -> and * operator so you can use it as a pointer to T.
+ Very convenient.
+*/
+template <typename T>
+class ObjectContainer
+{
+ public:
+ ObjectContainer() : _p(NULL) { }
+ ObjectContainer(const T & org) : _p(static_cast<T*>(org.duplicate())) { }
+ ObjectContainer(const T * org) : _p(org ? static_cast<T*>(org->duplicate()) : NULL) { }
+ ObjectContainer(const ObjectContainer & org) : _p(NULL) { *this = org; }
+ ObjectContainer & operator = (const T * org) { cleanUp(); if (org) { _p = static_cast<T*>(org->duplicate()); } return *this; }
+ ObjectContainer & operator = (const T & org) { cleanUp(); _p = static_cast<T*>(org.duplicate()); return *this; }
+ ObjectContainer & operator = (const ObjectContainer & org) { if (this != & org) { cleanUp(); if (org._p) { _p = static_cast<T*>(org._p->duplicate());} } return *this; }
+ virtual ~ObjectContainer() { cleanUp(); }
+ bool valid() const { return (_p != NULL); }
+ T *operator->() { return _p; }
+ T &operator*() { return *_p; }
+ const T *operator->() const { return _p; }
+ const T &operator*() const { return *_p; }
+ operator T & () const { return *_p; }
+ operator T * () const { return _p; }
+
+ private:
+ void cleanUp() { delete _p; _p = NULL; }
+ T * _p;
+};
+
+/**
+ This is a template similar to ObjectContainer that frees you from the trouble
+ of having to write you own copy/assignment operators when you use pointers as
+ pure references. Adds one level of indirection, but that normally optimized
+ away by the compiler. Can be used as an ordinary pointer since -> and * is
+ overloaded.
+*/
+template <typename T>
+class PointerContainer
+{
+ public:
+ PointerContainer() : _p(NULL) { }
+ PointerContainer(T & org) : _p(org) { }
+ PointerContainer(T * org) : _p(org) { }
+ PointerContainer(const PointerContainer & org) : _p(org._p) { }
+ PointerContainer & operator = (T * org) { _p = org; return *this; }
+ PointerContainer & operator = (T & org) { _p = &org; return *this; }
+ PointerContainer & operator = (const PointerContainer & org) { if (this != & org) { _p = org._p;} return *this; }
+ virtual ~PointerContainer() { _p = 0; }
+ bool valid() const { return (_p != NULL); }
+ T *operator->() const { return _p; }
+ T &operator*() const { return *_p; }
+ operator T & () const { return *_p; }
+ operator T * () const { return _p; }
+ private:
+ T * _p;
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/query/posocc.h b/searchlib/src/vespa/searchlib/query/posocc.h
new file mode 100644
index 00000000000..90aaa40b285
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/posocc.h
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/query/base.h>
+
+namespace search
+{
+
+class Hit
+{
+ public:
+ Hit(uint32_t pos_, uint32_t context_, int32_t weight_) :
+ _position(pos_ | (context_<<24)),
+ _weight(weight_)
+ { }
+ int32_t weight() const { return _weight; }
+ uint32_t pos() const { return _position; }
+ uint32_t wordpos() const { return _position & 0xffffff; }
+ uint32_t context() const { return _position >> 24; }
+ bool operator < (const Hit & b) const { return cmp(b) < 0; }
+ private:
+ int cmp(const Hit & b) const { return _position - b._position; }
+ uint32_t _position;
+ int32_t _weight;
+};
+
+typedef std::vector<Hit> HitList;
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/query/query.cpp b/searchlib/src/vespa/searchlib/query/query.cpp
new file mode 100644
index 00000000000..861b0258dc1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/query.cpp
@@ -0,0 +1,348 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/query/query.h>
+#include <vespa/vespalib/objects/visit.h>
+
+using vespalib::Identifiable;
+
+namespace search
+{
+
+IMPLEMENT_IDENTIFIABLE_ABSTRACT_NS(search, QueryConnector, QueryNode);
+IMPLEMENT_IDENTIFIABLE_NS(search, Query, Identifiable);
+IMPLEMENT_IDENTIFIABLE_NS(search, TrueNode, QueryNode);
+IMPLEMENT_IDENTIFIABLE_NS(search, AndQueryNode, QueryConnector);
+IMPLEMENT_IDENTIFIABLE_NS(search, AndNotQueryNode, QueryConnector);
+IMPLEMENT_IDENTIFIABLE_NS(search, OrQueryNode, QueryConnector);
+IMPLEMENT_IDENTIFIABLE_NS(search, EquivQueryNode, OrQueryNode);
+IMPLEMENT_IDENTIFIABLE_NS(search, PhraseQueryNode, AndQueryNode);
+IMPLEMENT_IDENTIFIABLE_NS(search, NotQueryNode, QueryConnector);
+IMPLEMENT_IDENTIFIABLE_NS(search, NearQueryNode, AndQueryNode);
+IMPLEMENT_IDENTIFIABLE_NS(search, ONearQueryNode, NearQueryNode);
+
+void QueryConnector::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "Operator", _opName);
+ visit(visitor, "Children", (const QueryNodeList &)*this);
+}
+
+QueryConnector::QueryConnector(const char * opName) :
+ QueryNode(),
+ _opName(opName),
+ _index()
+{
+}
+
+const HitList & QueryConnector::evaluateHits(HitList & hl) const
+{
+ if (evaluate()) {
+ hl.push_back(Hit(1, 0, 1));
+ }
+ return hl;
+}
+
+void QueryConnector::reset()
+{
+ for(iterator it=begin(), mt=end(); it != mt; it++) {
+ QueryNode & qn = **it;
+ qn.reset();
+ }
+}
+
+void QueryConnector::getLeafs(QueryTermList & tl)
+{
+ for(iterator it=begin(), mt=end(); it != mt; it++) {
+ QueryNode & qn = **it;
+ qn.getLeafs(tl);
+ }
+}
+
+void QueryConnector::getLeafs(ConstQueryTermList & tl) const
+{
+ for(const_iterator it=begin(), mt=end(); it != mt; it++) {
+ const QueryNode & qn = **it;
+ qn.getLeafs(tl);
+ }
+}
+
+void QueryConnector::getPhrases(QueryNodeRefList & tl)
+{
+ for(iterator it=begin(), mt=end(); it != mt; it++) {
+ QueryNode & qn = **it;
+ qn.getPhrases(tl);
+ }
+}
+
+void QueryConnector::getPhrases(ConstQueryNodeRefList & tl) const
+{
+ for(const_iterator it=begin(), mt=end(); it != mt; it++) {
+ const QueryNode & qn = **it;
+ qn.getPhrases(tl);
+ }
+}
+
+size_t QueryConnector::depth() const
+{
+ size_t d(0);
+ for(const_iterator it=begin(), mt=end(); (it!=mt); it++) {
+ const QueryNode & qn = **it;
+ size_t t = qn.depth();
+ if (t > d)
+ d = t;
+ }
+ return d+1;
+}
+
+size_t QueryConnector::width() const
+{
+ size_t w(0);
+ for(const_iterator it=begin(), mt=end(); (it!=mt); it++) {
+ const QueryNode & qn = **it;
+ w += qn.width();
+ }
+
+ return w;
+}
+
+QueryConnector *
+QueryConnector::create(ParseItem::ItemType type)
+{
+ switch (type) {
+ case search::ParseItem::ITEM_AND: return new AndQueryNode();
+ case search::ParseItem::ITEM_OR: return new OrQueryNode();
+ case search::ParseItem::ITEM_WEAK_AND: return new OrQueryNode();
+ case search::ParseItem::ITEM_EQUIV: return new EquivQueryNode();
+ case search::ParseItem::ITEM_WEIGHTED_SET: return new EquivQueryNode();
+ case search::ParseItem::ITEM_DOT_PRODUCT: return new OrQueryNode();
+ case search::ParseItem::ITEM_WAND: return new OrQueryNode();
+ case search::ParseItem::ITEM_NOT: return new AndNotQueryNode();
+ case search::ParseItem::ITEM_PHRASE: return new PhraseQueryNode();
+ case search::ParseItem::ITEM_NEAR: return new NearQueryNode();
+ case search::ParseItem::ITEM_ONEAR: return new ONearQueryNode();
+ default:
+ return NULL;
+ }
+}
+
+bool TrueNode::evaluate() const
+{
+ return true;
+}
+
+bool AndQueryNode::evaluate() const
+{
+ bool ok(true);
+ for (const_iterator it=begin(), mt=end(); ok && (it!=mt); it++) {
+ const QueryNode & qn = **it;
+ ok = ok && qn.evaluate();
+ }
+ return ok;
+}
+
+bool AndNotQueryNode::evaluate() const
+{
+ bool ok(empty() ? true : front()->evaluate());
+ if (!empty()) {
+ for (const_iterator it=begin()+1, mt=end(); ok && (it!=mt); it++) {
+ const QueryNode & qn = **it;
+ ok = ok && ! qn.evaluate();
+ }
+ }
+ return ok;
+}
+
+bool OrQueryNode::evaluate() const
+{
+ bool ok(false);
+ for (const_iterator it=begin(), mt=end(); !ok && (it!=mt); it++) {
+ const QueryNode & qn = **it;
+ ok = qn.evaluate();
+ }
+ return ok;
+}
+
+
+bool EquivQueryNode::evaluate() const
+{
+ return OrQueryNode::evaluate();
+}
+
+
+bool PhraseQueryNode::evaluate() const
+{
+ bool ok(false);
+ HitList hl;
+ ok = ! evaluateHits(hl).empty();
+ return ok;
+}
+
+void PhraseQueryNode::getPhrases(QueryNodeRefList & tl) { tl.push_back(this); }
+void PhraseQueryNode::getPhrases(ConstQueryNodeRefList & tl) const { tl.push_back(this); }
+
+const HitList & PhraseQueryNode::evaluateHits(HitList & hl) const
+{
+ hl.clear();
+ _fieldInfo.clear();
+ bool andResult(AndQueryNode::evaluate());
+ if (andResult) {
+ HitList tmpHL;
+ unsigned int fullPhraseLen = size();
+ unsigned int currPhraseLen = 0;
+ std::vector<unsigned int> indexVector(fullPhraseLen, 0);
+ const QueryTerm * curr = static_cast<const QueryTerm *> (&(*(*this)[currPhraseLen]));
+ bool exhausted( curr->evaluateHits(tmpHL).empty());
+ for (; !exhausted; ) {
+ const QueryTerm & next = static_cast<const QueryTerm &>(*(*this)[currPhraseLen+1]);
+ unsigned int & currIndex = indexVector[currPhraseLen];
+ unsigned int & nextIndex = indexVector[currPhraseLen+1];
+ const HitList & nextHL = next.evaluateHits(tmpHL);
+
+ size_t firstPosition = curr->evaluateHits(tmpHL)[currIndex].pos();
+ int diff(0);
+ size_t nextIndexMax = nextHL.size();
+ while ((nextIndex < nextIndexMax) && ((diff = nextHL[nextIndex].pos()-firstPosition) < 1))
+ nextIndex++;
+ if (diff == 1) {
+ currPhraseLen++;
+ bool ok = ((currPhraseLen+1)==fullPhraseLen);
+ if (ok) {
+ Hit h = nextHL[indexVector[currPhraseLen]];
+ hl.push_back(h);
+ const QueryTerm::FieldInfo & fi = next.getFieldInfo(h.context());
+ updateFieldInfo(h.context(), hl.size() - 1, fi.getFieldLength());
+ currPhraseLen = 0;
+ indexVector[0]++;
+ }
+ } else {
+ currPhraseLen = 0;
+ indexVector[currPhraseLen]++;
+ }
+ curr = static_cast<const QueryTerm *>(&*(*this)[currPhraseLen]);
+ exhausted = (nextIndex >= nextIndexMax) || (indexVector[currPhraseLen] >= curr->evaluateHits(tmpHL).size());
+ }
+ }
+ return hl;
+}
+
+void
+PhraseQueryNode::updateFieldInfo(size_t fid, size_t offset, size_t fieldLength) const
+{
+ if (fid >= _fieldInfo.size()) {
+ _fieldInfo.resize(fid + 1);
+ // only set hit offset and field length the first time
+ QueryTerm::FieldInfo & fi = _fieldInfo[fid];
+ fi.setHitOffset(offset);
+ fi.setFieldLength(fieldLength);
+ }
+ QueryTerm::FieldInfo & fi = _fieldInfo[fid];
+ fi.setHitCount(fi.getHitCount() + 1);
+}
+
+bool NotQueryNode::evaluate() const
+{
+ bool ok(false);
+ for (const_iterator it=begin(), mt=end(); it!=mt; it++) {
+ const QueryNode & qn = **it;
+ ok |= ! qn.evaluate();
+ }
+ return ok;
+}
+
+bool NearQueryNode::evaluate() const
+{
+ bool ok(AndQueryNode::evaluate());
+ return ok;
+}
+
+void NearQueryNode::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ AndQueryNode::visitMembers(visitor);
+ visit(visitor, "distance", _distance);
+}
+
+
+bool ONearQueryNode::evaluate() const
+{
+ bool ok(NearQueryNode::evaluate());
+ return ok;
+}
+
+Query::Query() :
+ Identifiable(),
+ _root()
+{
+}
+
+Query::Query(const QueryNodeResultBase & org, const QueryPacketT & queryRep) :
+ Identifiable(),
+ _root()
+{
+ build(org, queryRep);
+}
+
+void Query::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "root", _root);
+}
+
+bool Query::evaluate() const
+{
+ bool ok = valid() ? _root->evaluate() : false;
+ return ok;
+}
+
+bool Query::build(const QueryNodeResultBase & org, const QueryPacketT & queryRep)
+{
+ search::SimpleQueryStackDumpIterator stack(queryRep);
+ if (stack.next()) {
+ _root.reset(QueryNode::Build(NULL, org, stack, true).release());
+ }
+ return valid();
+}
+
+void Query::getLeafs(QueryTermList & tl)
+{
+ if (valid()) {
+ _root->getLeafs(tl);
+ }
+}
+
+void Query::getLeafs(ConstQueryTermList & tl) const
+{
+ if (valid()) {
+ _root->getLeafs(tl);
+ }
+}
+
+void Query::getPhrases(QueryNodeRefList & tl)
+{
+ if (valid()) {
+ _root->getPhrases(tl);
+ }
+}
+
+void Query::getPhrases(ConstQueryNodeRefList & tl) const
+{
+ if (valid()) {
+ _root->getPhrases(tl);
+ }
+}
+
+void Query::reset()
+{
+ if (valid()) {
+ _root->reset();
+ }
+}
+
+size_t Query::depth() const
+{
+ return valid() ? _root->depth() : 0;
+}
+
+size_t Query::width() const
+{
+ return valid() ? _root->width() : 0;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/query.h b/searchlib/src/vespa/searchlib/query/query.h
new file mode 100644
index 00000000000..7e9f00eb436
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/query.h
@@ -0,0 +1,212 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/query/queryterm.h>
+
+namespace search
+{
+
+/**
+ Base class for all N-ary query operators.
+ Implements the width, depth, print, and collect all leafs operators(terms).
+*/
+class QueryConnector : public QueryNode, public QueryNodeList
+{
+public:
+ DECLARE_IDENTIFIABLE_ABSTRACT_NS(search, QueryConnector);
+ QueryConnector(const char * opName);
+ virtual const HitList & evaluateHits(HitList & hl) const;
+ /// Will clear the results from the querytree.
+ virtual void reset();
+ /// Will get all leafnodes.
+ virtual void getLeafs(QueryTermList & tl);
+ virtual void getLeafs(ConstQueryTermList & tl) const;
+ /// Gives you all phrases of this tree.
+ virtual void getPhrases(QueryNodeRefList & tl);
+ virtual void getPhrases(ConstQueryNodeRefList & tl) const;
+ virtual size_t depth() const;
+ virtual size_t width() const;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ virtual void setIndex(const vespalib::string & index) { _index = index; }
+ virtual const vespalib::string & getIndex() const { return _index; }
+ static QueryConnector * create(ParseItem::ItemType type);
+ virtual bool isFlattenable(ParseItem::ItemType type) const { (void) type; return false; }
+private:
+ vespalib::string _opName;
+ vespalib::string _index;
+};
+
+/**
+ True operator. Matches everything.
+*/
+class TrueNode : public QueryConnector
+{
+public:
+ DECLARE_IDENTIFIABLE_NS(search, TrueNode);
+ TrueNode() : QueryConnector("AND") { }
+ virtual bool evaluate() const;
+};
+
+/**
+ N-ary Or operator that simply ANDs all the nodes together.
+*/
+class AndQueryNode : public QueryConnector
+{
+public:
+ DECLARE_IDENTIFIABLE_NS(search, AndQueryNode);
+ AndQueryNode() : QueryConnector("AND") { }
+ AndQueryNode(const char * opName) : QueryConnector(opName) { }
+ virtual bool evaluate() const;
+ virtual bool isFlattenable(ParseItem::ItemType type) const { return type == ParseItem::ITEM_AND; }
+};
+
+/**
+ N-ary special AndNot operator. n[0] & !n[1] & !n[2] .. & !n[j].
+*/
+class AndNotQueryNode : public QueryConnector
+{
+public:
+ DECLARE_IDENTIFIABLE_NS(search, AndNotQueryNode);
+ AndNotQueryNode() : QueryConnector("ANDNOT") { }
+ virtual bool evaluate() const;
+ virtual bool isFlattenable(ParseItem::ItemType type) const { return type == ParseItem::ITEM_NOT; }
+};
+
+/**
+ N-ary Or operator that simply ORs all the nodes together.
+*/
+class OrQueryNode : public QueryConnector
+{
+public:
+ DECLARE_IDENTIFIABLE_NS(search, OrQueryNode);
+ OrQueryNode() : QueryConnector("OR") { }
+ OrQueryNode(const char * opName) : QueryConnector(opName) { }
+ virtual bool evaluate() const;
+ virtual bool isFlattenable(ParseItem::ItemType type) const {
+ return (type == ParseItem::ITEM_OR) ||
+ (type == ParseItem::ITEM_DOT_PRODUCT) ||
+ (type == ParseItem::ITEM_WAND) ||
+ (type == ParseItem::ITEM_WEAK_AND);
+ }
+};
+
+/**
+ N-ary "EQUIV" operator that merges terms from nodes below.
+*/
+class EquivQueryNode : public OrQueryNode
+{
+public:
+ DECLARE_IDENTIFIABLE_NS(search, EquivQueryNode);
+ EquivQueryNode() : OrQueryNode("EQUIV") { }
+ virtual bool evaluate() const;
+ virtual bool isFlattenable(ParseItem::ItemType type) const {
+ return (type == ParseItem::ITEM_EQUIV) ||
+ (type == ParseItem::ITEM_WEIGHTED_SET);
+ }
+};
+
+/**
+ N-ary phrase operator. All terms must be satisfied and have the correct order
+ with distance to next term equal to 1.
+*/
+class PhraseQueryNode : public AndQueryNode
+{
+public:
+ DECLARE_IDENTIFIABLE_NS(search, PhraseQueryNode);
+ PhraseQueryNode() : AndQueryNode("PHRASE"), _fieldInfo(32) { }
+ virtual bool evaluate() const;
+ virtual const HitList & evaluateHits(HitList & hl) const;
+ virtual void getPhrases(QueryNodeRefList & tl);
+ virtual void getPhrases(ConstQueryNodeRefList & tl) const;
+ const QueryTerm::FieldInfo & getFieldInfo(size_t fid) const { return _fieldInfo[fid]; }
+ size_t getFieldInfoSize() const { return _fieldInfo.size(); }
+ virtual bool isFlattenable(ParseItem::ItemType type) const { return type == ParseItem::ITEM_NOT; }
+private:
+ mutable std::vector<QueryTerm::FieldInfo> _fieldInfo;
+ void updateFieldInfo(size_t fid, size_t offset, size_t fieldLength) const;
+#if WE_EVER_NEED_TO_CACHE_THIS_WE_MIGHT_WANT_SOME_CODE_HERE
+ HitList _cachedHitList;
+ bool _evaluated;
+#endif
+};
+
+/**
+ Unary Not operator. Just inverts the nodes result.
+*/
+class NotQueryNode : public QueryConnector
+{
+public:
+ DECLARE_IDENTIFIABLE_NS(search, NotQueryNode);
+ NotQueryNode() : QueryConnector("NOT") { }
+ virtual bool evaluate() const;
+};
+
+/**
+ N-ary Near operator. All terms must be within the given distance.
+*/
+class NearQueryNode : public AndQueryNode
+{
+public:
+ DECLARE_IDENTIFIABLE_NS(search, NearQueryNode);
+ NearQueryNode() : AndQueryNode("NEAR"), _distance(0) { }
+ NearQueryNode(const char * opName) : AndQueryNode(opName), _distance(0) { }
+ virtual bool evaluate() const;
+ void distance(size_t dist) { _distance = dist; }
+ size_t distance() const { return _distance; }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ virtual bool isFlattenable(ParseItem::ItemType type) const { return type == ParseItem::ITEM_NOT; }
+private:
+ size_t _distance;
+};
+
+/**
+ N-ary Ordered near operator. The terms must be in order and the distance between
+ the first and last must not exceed the given distance.
+*/
+class ONearQueryNode : public NearQueryNode
+{
+public:
+ DECLARE_IDENTIFIABLE_NS(search, ONearQueryNode);
+ ONearQueryNode() : NearQueryNode("ONEAR") { }
+ virtual ~ONearQueryNode() { }
+ virtual bool evaluate() const;
+};
+
+/**
+ Query packages the query tree. The usage pattern is like this.
+ Construct the tree with the correct tree description.
+ Get the leaf nodes and populate them with the term occurences.
+ Then evaluate the query. This is repeated for each document or chunk that
+ you want to process. The tree can also be printed. And you can read the
+ width and depth properties.
+*/
+class Query : public vespalib::Identifiable
+{
+public:
+ DECLARE_IDENTIFIABLE_NS(search, Query);
+ Query();
+ Query(const QueryNodeResultBase & org, const QueryPacketT & queryRep);
+ virtual ~Query() { }
+ /// Will build the query tree
+ bool build(const QueryNodeResultBase & org, const QueryPacketT & queryRep);
+ /// Will clear the results from the querytree.
+ void reset();
+ /// Will get all leafnodes.
+ void getLeafs(QueryTermList & tl);
+ void getLeafs(ConstQueryTermList & tl) const;
+ /// Gives you all phrases of this tree.
+ void getPhrases(QueryNodeRefList & tl);
+ void getPhrases(ConstQueryNodeRefList & tl) const;
+ bool evaluate() const;
+ size_t depth() const;
+ size_t width() const;
+ bool valid() const { return _root.get() != NULL; }
+ const QueryNode::LP & getRoot() const { return _root; }
+ QueryNode::LP & getRoot() { return _root; }
+private:
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ QueryNode::LP _root;
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/query/querynode.cpp b/searchlib/src/vespa/searchlib/query/querynode.cpp
new file mode 100644
index 00000000000..fc96a352b7f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/querynode.cpp
@@ -0,0 +1,199 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/query/query.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".vsm.querynode");
+
+namespace search
+{
+
+IMPLEMENT_IDENTIFIABLE_ABSTRACT_NS(search, QueryNode, vespalib::Identifiable);
+
+void TestClose(QueryNode::LP & qn, QueryNodeList & currentNodeList)
+{
+ if (&qn != NULL) {
+ QueryConnector * qc = dynamic_cast<QueryConnector *> (qn.get());
+ if (qc) {
+ // qc->QueryNodeList = currentNodeList;
+ for (size_t i=0; i < currentNodeList.size(); i++) {
+ qc->push_back(currentNodeList[i]);
+ }
+ currentNodeList.clear();
+ currentNodeList.push_back(qn);
+ }
+ }
+}
+
+void NewNode(QueryNode::LP & qn, QueryNodeList & currentNodeList, size_t count=0)
+{
+ if ( ! currentNodeList.empty() ) {
+ QueryConnector *qc = dynamic_cast<QueryConnector *> (&*currentNodeList.back());
+ if (qc != 0 &&
+ ((dynamic_cast<NotQueryNode *>(qc) != NULL) ||
+ (dynamic_cast<PhraseQueryNode *>(qc) != NULL && (count == qc->size()))))
+ {
+ qc->push_back(qn);
+ } else {
+ currentNodeList.push_back(qn);
+ }
+ } else {
+ currentNodeList.push_back(qn);
+ }
+}
+
+namespace {
+ vespalib::stringref DEFAULT("default");
+}
+
+#define CASE(c, q) case c: { qn.reset(new q()); } break;
+QueryNode::UP QueryNode::Build(const QueryNode * parent, const QueryNodeResultBase & org, search::SimpleQueryStackDumpIterator & queryRep, bool allowRewrite)
+{
+ unsigned int arity = queryRep.getArity();
+ search::ParseItem::ItemType type = queryRep.getType();
+ UP qn;
+ switch (type) {
+ case search::ParseItem::ITEM_AND:
+ case search::ParseItem::ITEM_OR:
+ case search::ParseItem::ITEM_WEAK_AND:
+ case search::ParseItem::ITEM_EQUIV:
+ case search::ParseItem::ITEM_WEIGHTED_SET:
+ case search::ParseItem::ITEM_DOT_PRODUCT:
+ case search::ParseItem::ITEM_WAND:
+ case search::ParseItem::ITEM_NOT:
+ case search::ParseItem::ITEM_PHRASE:
+ case search::ParseItem::ITEM_NEAR:
+ case search::ParseItem::ITEM_ONEAR:
+ {
+ qn.reset(QueryConnector::create(type));
+ if (qn.get()) {
+ QueryConnector * qc = dynamic_cast<QueryConnector *> (qn.get());
+ NearQueryNode * nqn = dynamic_cast<NearQueryNode *> (qc);
+ if (nqn) {
+ nqn->distance(queryRep.getArg1());
+ }
+ if ((type == search::ParseItem::ITEM_WEAK_AND) ||
+ (type == search::ParseItem::ITEM_WEIGHTED_SET) ||
+ (type == search::ParseItem::ITEM_DOT_PRODUCT) ||
+ (type == search::ParseItem::ITEM_WAND))
+ {
+ const char * index;
+ size_t indexLen(0);
+ queryRep.getIndexName(&index, &indexLen);
+ qn->setIndex(vespalib::string(index, indexLen));
+ }
+ for (size_t i=0; i < arity; i++) {
+ queryRep.next();
+ if (qc->isFlattenable(queryRep.getType())) {
+ arity += queryRep.getArity();
+ } else {
+ LP child(Build(qc,
+ org,
+ queryRep,
+ allowRewrite && ((dynamic_cast<NearQueryNode *> (qn.get()) == NULL) && (dynamic_cast<PhraseQueryNode *> (qn.get()) == NULL))).release());
+ qc->push_back(child);
+ }
+ }
+ }
+ }
+ break;
+ case search::ParseItem::ITEM_NUMTERM:
+ case search::ParseItem::ITEM_TERM:
+ case search::ParseItem::ITEM_PREFIXTERM:
+ case search::ParseItem::ITEM_REGEXP:
+ case search::ParseItem::ITEM_SUBSTRINGTERM:
+ case search::ParseItem::ITEM_EXACTSTRINGTERM:
+ case search::ParseItem::ITEM_SUFFIXTERM:
+ case search::ParseItem::ITEM_PURE_WEIGHTED_STRING:
+ case search::ParseItem::ITEM_PURE_WEIGHTED_LONG:
+ {
+ const char * index;
+ size_t indexLen(0);
+ queryRep.getIndexName(&index, &indexLen);
+ if (indexLen == 0) {
+ if ((type == search::ParseItem::ITEM_PURE_WEIGHTED_STRING) || (type == search::ParseItem::ITEM_PURE_WEIGHTED_LONG)) {
+ const vespalib::string & ref = parent->getIndex();
+ index = ref.c_str();
+ indexLen = ref.size();
+ } else {
+ index = "default";
+ indexLen = strlen(index);
+ }
+ }
+ const char * term;
+ size_t termLen(0);
+ queryRep.getTerm(&term, &termLen);
+ QueryTerm::SearchTerm sTerm(QueryTerm::WORD);
+ switch (type) {
+ case search::ParseItem::ITEM_REGEXP:
+ sTerm = QueryTerm::REGEXP;
+ break;
+ case search::ParseItem::ITEM_PREFIXTERM:
+ sTerm = QueryTerm::PREFIXTERM;
+ break;
+ case search::ParseItem::ITEM_SUBSTRINGTERM:
+ sTerm = QueryTerm::SUBSTRINGTERM;
+ break;
+ case search::ParseItem::ITEM_EXACTSTRINGTERM:
+ sTerm = QueryTerm::EXACTSTRINGTERM;
+ break;
+ case search::ParseItem::ITEM_SUFFIXTERM:
+ sTerm = QueryTerm::SUFFIXTERM;
+ break;
+ default:
+ break;
+ }
+ QueryTerm::string ssTerm(term, termLen);
+ QueryTerm::string ssIndex(index, indexLen);
+ if (ssIndex == "sddocname") {
+ // This is suboptimal as the term should be checked too.
+ // But it will do for now as only correct sddocname queries are sent down.
+ qn.reset(new TrueNode());
+ } else {
+ std::unique_ptr<QueryTerm> qt(new QueryTerm(org, ssTerm, ssIndex, sTerm));
+ qt->setWeight(queryRep.GetWeight());
+ qt->setUniqueId(queryRep.getUniqueId());
+ if ( qt->encoding().isBase10Integer() || ! qt->encoding().isFloat() || ! org.getRewriteFloatTerms() || !allowRewrite || (ssTerm.find('.') == vespalib::string::npos)) {
+ qn.reset(qt.release());
+ } else {
+ std::unique_ptr<PhraseQueryNode> phrase(new PhraseQueryNode());
+
+ phrase->push_back(LP(new QueryTerm(org, ssTerm.substr(0, ssTerm.find('.')), ssIndex, QueryTerm::WORD)));
+ phrase->push_back(LP(new QueryTerm(org, ssTerm.substr(ssTerm.find('.') + 1), ssIndex, QueryTerm::WORD)));
+ std::unique_ptr<EquivQueryNode> orqn(new EquivQueryNode());
+ orqn->push_back(LP(qt.release()));
+ orqn->push_back(LP(phrase.release()));
+ qn.reset(orqn.release());
+ }
+ }
+ }
+ break;
+ case search::ParseItem::ITEM_RANK:
+ {
+ if (arity >= 1) {
+ queryRep.next();
+ qn = Build(parent, org, queryRep, false);
+ for (uint32_t skipCount = arity-1; (skipCount > 0) && queryRep.next(); skipCount--) {
+ skipCount += queryRep.getArity();
+ }
+ }
+ }
+ break;
+ default:
+ {
+ for (uint32_t skipCount = arity; (skipCount > 0) && queryRep.next(); skipCount--) {
+ skipCount += queryRep.getArity();
+ LOG(warning, "Does not understand anything,.... skipping %d", type);
+ }
+ }
+ break;
+ }
+ return qn;
+}
+#undef CASE
+
+const HitList & QueryNode::evaluateHits(HitList & hl) const
+{
+ return hl;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/querynode.h b/searchlib/src/vespa/searchlib/query/querynode.h
new file mode 100644
index 00000000000..f05c34be89b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/querynode.h
@@ -0,0 +1,66 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/parsequery/stackdumpiterator.h>
+#include <vespa/searchlib/query/base.h>
+#include <vespa/searchlib/query/querynoderesultbase.h>
+#include <vespa/vespalib/util/linkedptr.h>
+#include <vespa/searchlib/common/identifiable.h>
+#include "posocc.h"
+
+namespace search
+{
+
+class QueryTerm;
+class QueryNode;
+/// Typedef a simple list that contains references to QueryNodes.
+typedef std::vector<QueryNode *> QueryNodeRefList;
+/// Typedef a simple list that contains const references to QueryNodes.
+typedef std::vector<const QueryNode *> ConstQueryNodeRefList;
+/// Typedef a simple list that contains references to QueryTerms.
+typedef std::vector<QueryTerm *> QueryTermList;
+/// Typedef a simple list that contains const references to QueryTerms.
+typedef std::vector<const QueryTerm *> ConstQueryTermList;
+
+/**
+ This is the base of any node in the query tree. Both leaf nodes (terms)
+ and operator nodes (AND, NOT, OR, PHRASE, NEAR, ONEAR, etc).
+*/
+class QueryNode : public vespalib::Identifiable
+{
+ public:
+ DECLARE_IDENTIFIABLE_ABSTRACT_NS(search, QueryNode);
+ typedef vespalib::LinkedPtr<QueryNode> LP;
+ typedef std::unique_ptr<QueryNode> UP;
+
+ virtual ~QueryNode() { }
+ /// This evalutes if the subtree starting here evaluates to true.
+ virtual bool evaluate() const = 0;
+ /// This return the hitList for this subtree. Does only give meaning in a
+ /// phrase search or any other search that requires position info.
+ virtual const HitList & evaluateHits(HitList & hl) const;
+ /// Clears all the hitlists so the query tree can be reused.
+ virtual void reset() = 0;
+ /// Gives you all leafs of this tree.
+ virtual void getLeafs(QueryTermList & tl) = 0;
+ /// Gives you all leafs of this tree. Indicating that they are all const.
+ virtual void getLeafs(ConstQueryTermList & tl) const = 0;
+ /// Gives you all phrases of this tree.
+ virtual void getPhrases(QueryNodeRefList & tl) = 0;
+ /// Gives you all phrases of this tree. Indicating that they are all const.
+ virtual void getPhrases(ConstQueryNodeRefList & tl) const = 0;
+ virtual void setIndex(const vespalib::string & index) = 0;
+ virtual const vespalib::string & getIndex() const = 0;
+
+ /// Return the depth of this tree.
+ virtual size_t depth() const { return 1; }
+ /// Return the width of this tree.
+ virtual size_t width() const { return 1; }
+ static UP Build(const QueryNode * parent, const QueryNodeResultBase & org, search::SimpleQueryStackDumpIterator & queryRep, bool allowRewrite);
+};
+
+/// A list conating the QuerNode objects. With copy/assignment.
+typedef std::vector<QueryNode::LP> QueryNodeList;
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/query/querynoderesultbase.cpp b/searchlib/src/vespa/searchlib/query/querynoderesultbase.cpp
new file mode 100644
index 00000000000..48d056cce80
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/querynoderesultbase.cpp
@@ -0,0 +1,8 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "querynoderesultbase.h"
+
+namespace search {
+
+IMPLEMENT_DUPLICATE(EmptyQueryNodeResult);
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/querynoderesultbase.h b/searchlib/src/vespa/searchlib/query/querynoderesultbase.h
new file mode 100644
index 00000000000..80030e9eb31
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/querynoderesultbase.h
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/query/base.h>
+
+namespace search
+{
+
+/**
+ This is the base of any item that can be attached to the leafs in a querytree.
+ The intention is to put stuff here that are search specific. Fx to differentiate
+ between streamed and indexed variants.
+*/
+class QueryNodeResultBase : public Object
+{
+ public:
+ virtual bool evaluate() const = 0;
+ virtual void reset() = 0;
+ virtual bool getRewriteFloatTerms() const { return false; }
+};
+
+class EmptyQueryNodeResult : public QueryNodeResultBase
+{
+ public:
+ DUPLICATE(EmptyQueryNodeResult);
+ virtual ~EmptyQueryNodeResult() { }
+ virtual bool evaluate() const { return true; }
+ virtual void reset() { }
+ private:
+};
+
+
+typedef ObjectContainer<QueryNodeResultBase> QueryNodeResultBaseContainer;
+}
+
diff --git a/searchlib/src/vespa/searchlib/query/queryterm.cpp b/searchlib/src/vespa/searchlib/query/queryterm.cpp
new file mode 100644
index 00000000000..e6ab5591872
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/queryterm.cpp
@@ -0,0 +1,469 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <math.h>
+#include <limits>
+#include <vespa/searchlib/query/queryterm.h>
+#include <vespa/vespalib/objects/visit.h>
+#include <vespa/vespalib/text/utf8.h>
+#include <cxxabi.h>
+
+namespace {
+
+class CharInfo {
+public:
+ CharInfo();
+ uint8_t get(uint8_t c) const { return _charInfo[c]; }
+private:
+ uint8_t _charInfo[256];
+};
+
+CharInfo::CharInfo()
+{
+ // XXX: Should refactor to reduce number of magic constants.
+ memset(_charInfo, 0x01, 128); // All 7 bits are ascii7bit
+ memset(_charInfo+128, 0x00, 128); // The rest are not.
+ memset(_charInfo + '0', 0x07, 10);
+ _charInfo[uint8_t('-')] = 0x07;
+ _charInfo[uint8_t('<')] = 0x07;
+ _charInfo[uint8_t('>')] = 0x07;
+ _charInfo[uint8_t(';')] = 0x07;
+ _charInfo[uint8_t('[')] = 0x07;
+ _charInfo[uint8_t(']')] = 0x07;
+
+ _charInfo[uint8_t('.')] = 0x05;
+ _charInfo[uint8_t('+')] = 0x05;
+ _charInfo[uint8_t('e')] = 0x05;
+ _charInfo[uint8_t('E')] = 0x05;
+}
+
+static CharInfo _G_charTable;
+
+
+template <typename N>
+bool isValidInteger(int64_t value)
+{
+ return value >= std::numeric_limits<N>::min() && value <= std::numeric_limits<N>::max();
+}
+
+}
+
+namespace search
+{
+
+IMPLEMENT_IDENTIFIABLE_NS(search, QueryTerm, QueryNode);
+
+QueryTermBase::QueryTermBase() :
+ QueryTermSimple(),
+ _cachedTermLen(0),
+ _termUCS4()
+{
+ _termUCS4.push_back(0);
+}
+
+QueryTermBase::QueryTermBase(const string & termS, SearchTerm type) :
+ QueryTermSimple(termS, type),
+ _cachedTermLen(0),
+ _termUCS4()
+{
+ _termUCS4.reserve(termS.size() + 1);
+ vespalib::Utf8Reader r(termS);
+ while (r.hasMore()) {
+ ucs4_t u = r.getChar();
+ _termUCS4.push_back(u);
+ }
+ _termUCS4.push_back(0);
+ _cachedTermLen = _termUCS4.size() - 1;
+}
+
+QueryTerm::QueryTerm() :
+ QueryTermBase(),
+ _index(),
+ _encoding(),
+ _result(),
+ _hitList(),
+ _weight(100),
+ _uniqueId(0),
+ _fieldInfo(32)
+{
+}
+
+void
+QueryTermSimple::visitMembers(vespalib::ObjectVisitor & visitor) const
+{
+ visit(visitor, "term", _term);
+ visit(visitor, "type", _type);
+}
+
+template <typename N>
+QueryTermSimple::RangeResult<N>
+QueryTermSimple::getFloatRange() const
+{
+ double lowRaw, highRaw;
+ bool valid = getAsDoubleTerm(lowRaw, highRaw);
+ RangeResult<N> res;
+ res.valid = valid;
+ if (!valid) {
+ res.low = std::numeric_limits<N>::max();
+ res.high = - std::numeric_limits<N>::max();
+ res.adjusted = true;
+ } else {
+ res.low = lowRaw;
+ res.high = highRaw;
+ }
+ return res;
+}
+
+namespace {
+
+bool isRepresentableByInt64(double d) {
+ return (d > double(std::numeric_limits<int64_t>::min()))
+ && (d < double(std::numeric_limits<int64_t>::max()));
+}
+
+}
+
+bool
+QueryTermSimple::getRangeInternal(int64_t & low, int64_t & high) const
+{
+ bool valid = getAsIntegerTerm(low, high);
+ if ( ! valid ) {
+ double l(0), h(0);
+ valid = getAsDoubleTerm(l, h);
+ if (valid) {
+ if ((l == h) && isRepresentableByInt64(l)) {
+ low = high = round(l);
+ } else {
+ if (l > double(std::numeric_limits<int64_t>::min())) {
+ if (l < double(std::numeric_limits<int64_t>::max())) {
+ low = ceil(l);
+ } else {
+ low = std::numeric_limits<int64_t>::max();
+ }
+ }
+ if (h < double(std::numeric_limits<int64_t>::max())) {
+ if (h > double(std::numeric_limits<int64_t>::min())) {
+ high = floor(h);
+ } else {
+ high = std::numeric_limits<int64_t>::min();
+ }
+ }
+ }
+ }
+ }
+ return valid;
+}
+
+template <typename N>
+QueryTermSimple::RangeResult<N>
+QueryTermSimple::getIntegerRange() const
+{
+ int64_t lowRaw, highRaw;
+ bool valid = getRangeInternal(lowRaw, highRaw);
+ RangeResult<N> res;
+ res.valid = valid;
+ if (valid) {
+ bool validLow = isValidInteger<N>(lowRaw);
+ if (validLow) {
+ res.low = lowRaw;
+ } else {
+ res.low = (lowRaw < static_cast<int64_t>(std::numeric_limits<N>::min()) ?
+ std::numeric_limits<N>::min() : std::numeric_limits<N>::max());
+ res.adjusted = true;
+ }
+ bool validHigh = isValidInteger<N>(highRaw);
+ if (validHigh) {
+ res.high = highRaw;
+ } else {
+ res.high = (highRaw > static_cast<int64_t>(std::numeric_limits<N>::max()) ?
+ std::numeric_limits<N>::max() : std::numeric_limits<N>::min());
+ res.adjusted = true;
+ }
+ } else {
+ res.low = std::numeric_limits<N>::max();
+ res.high = std::numeric_limits<N>::min();
+ res.adjusted = true;
+ }
+ return res;
+}
+
+template <>
+QueryTermSimple::RangeResult<float>
+QueryTermSimple::getRange() const
+{
+ return getFloatRange<float>();
+}
+
+template <>
+QueryTermSimple::RangeResult<double>
+QueryTermSimple::getRange() const
+{
+ return getFloatRange<double>();
+}
+
+template <>
+QueryTermSimple::RangeResult<int8_t>
+QueryTermSimple::getRange() const
+{
+ return getIntegerRange<int8_t>();
+}
+
+template <>
+QueryTermSimple::RangeResult<int16_t>
+QueryTermSimple::getRange() const
+{
+ return getIntegerRange<int16_t>();
+}
+
+template <>
+QueryTermSimple::RangeResult<int32_t>
+QueryTermSimple::getRange() const
+{
+ return getIntegerRange<int32_t>();
+}
+
+template <>
+QueryTermSimple::RangeResult<int64_t>
+QueryTermSimple::getRange() const
+{
+ return getIntegerRange<int64_t>();
+}
+
+void
+QueryTermBase::visitMembers(vespalib::ObjectVisitor & visitor) const
+{
+ QueryTermSimple::visitMembers(visitor);
+ visit(visitor, "termlength", _cachedTermLen);
+}
+
+void
+QueryTerm::visitMembers(vespalib::ObjectVisitor & visitor) const
+{
+ QueryTermBase::visitMembers(visitor);
+ visit(visitor, "encoding.isBase10Integer", _encoding.isBase10Integer());
+ visit(visitor, "encoding.isFloat", _encoding.isFloat());
+ visit(visitor, "encoding.isAscii7Bit", _encoding.isAscii7Bit());
+ visit(visitor, "index", _index);
+ visit(visitor, "weight", _weight.percent());
+ visit(visitor, "uniqueid", _uniqueId);
+}
+
+
+QueryTerm::QueryTerm(const QueryNodeResultBase & org, const string & termS, const string & indexS, SearchTerm type) :
+ QueryTermBase(termS, type),
+ _index(indexS),
+ _encoding(0x01),
+ _result(org),
+ _hitList(),
+ _weight(100),
+ _uniqueId(0),
+ _fieldInfo(32)
+{
+ if (!termS.empty()) {
+ uint8_t enc(0xff);
+ for (size_t i(0), m(termS.size()); i < m; i++) {
+ enc &= _G_charTable.get(termS[i]);
+ }
+ _encoding = enc;
+ }
+}
+
+void QueryTerm::getPhrases(QueryNodeRefList & tl) { (void) tl; }
+void QueryTerm::getPhrases(ConstQueryNodeRefList & tl) const { (void) tl; }
+void QueryTerm::getLeafs(QueryTermList & tl) { tl.push_back(this); }
+void QueryTerm::getLeafs(ConstQueryTermList & tl) const { tl.push_back(this); }
+bool QueryTerm::evaluate() const { return !_hitList.empty() && _result->evaluate(); }
+void QueryTerm::reset() { _hitList.clear(); _result->reset(); }
+const HitList & QueryTerm::evaluateHits(HitList & UNUSED_PARAM(hl)) const { return _hitList; }
+
+void QueryTerm::resizeFieldId(size_t fieldNo)
+{
+ if (fieldNo >= _fieldInfo.size()) {
+ _fieldInfo.resize(fieldNo + 1);
+ }
+}
+
+void QueryTerm::add(unsigned pos, unsigned context, int32_t weight_)
+{
+ _hitList.emplace_back(pos, context, weight_);
+}
+
+template <int B>
+struct IntDecoder {
+ static int64_t fromstr(const char * v, char ** end) { return strtoll(v, end, B); }
+ static int64_t nearestDownwd(int64_t n, int64_t min) { return (n > min ? n - 1 : n); }
+ static int64_t nearestUpward(int64_t n, int64_t max) { return (n < max ? n + 1 : n); }
+};
+
+struct DoubleDecoder {
+ static double fromstr(const char * v, char ** end) { return strtod(v, end); }
+ static double nearestDownwd(double n, double min) { return nextafterf(n, min); }
+ static double nearestUpward(double n, double max) { return nextafterf(n, max); }
+};
+
+bool QueryTermSimple::getAsIntegerTerm(int64_t & lower, int64_t & upper) const
+{
+ lower = std::numeric_limits<int64_t>::min();
+ upper = std::numeric_limits<int64_t>::max();
+ return getAsNumericTerm(lower, upper, IntDecoder<10>());
+}
+
+bool QueryTermSimple::getAsDoubleTerm(double & lower, double & upper) const
+{
+ lower = - std::numeric_limits<double>::max();
+ upper = std::numeric_limits<double>::max();
+ return getAsNumericTerm(lower, upper, DoubleDecoder());
+}
+
+QueryTermSimple::QueryTermSimple() :
+ _type(WORD),
+ _term(),
+ _diversityAttribute(),
+ _rangeLimit(0),
+ _maxPerGroup(0),
+ _diversityCutoffGroups(std::numeric_limits<uint32_t>::max()),
+ _diversityCutoffStrict(false),
+ _valid(true)
+{
+}
+
+namespace {
+
+bool isFullRange(const vespalib::stringref & s) {
+ const size_t sz(s.size());
+ return (sz >= 3u) &&
+ (s[0] == '<' || s[0] == '[') &&
+ (s[sz-1] == '>' || s[sz-1] == ']');
+}
+
+}
+
+QueryTermSimple::QueryTermSimple(const string & term_, SearchTerm type) :
+ _type(type),
+ _term(term_),
+ _diversityAttribute(),
+ _rangeLimit(0),
+ _maxPerGroup(0),
+ _diversityCutoffGroups(std::numeric_limits<uint32_t>::max()),
+ _diversityCutoffStrict(false),
+ _valid(true)
+{
+ if (isFullRange(_term)) {
+ stringref rest(_term.c_str() + 1, _term.size() - 2);
+ std::vector<stringref> parts;
+ parts.reserve(5);
+ while (! rest.empty() ) {
+ size_t pos(rest.find(';'));
+ if (pos != vespalib::string::npos) {
+ parts.push_back(rest.substr(0, pos));
+ rest = rest.substr(pos + 1);
+ if (rest.empty()) {
+ parts.push_back(rest);
+ }
+ } else {
+ parts.push_back(rest);
+ rest = stringref();
+ }
+ }
+ _valid = parts.size() >= 2;
+ if (parts.size() >= 3) {
+ _rangeLimit = strtol(parts[2].c_str(), NULL, 0);
+ if (parts.size() > 3) {
+ _valid = parts.size() >= 5;
+ if (_valid) {
+ _diversityAttribute = parts[3];
+ _maxPerGroup = strtoul(parts[4].c_str(), NULL, 0);
+ if ((_maxPerGroup > 0) && (parts.size() > 5)) {
+ char *err = nullptr;
+ size_t cutoffGroups = strtoul(parts[5].c_str(), &err, 0);
+ if ((err == nullptr) || (size_t(err - parts[5].c_str()) == parts[5].size())) {
+ _diversityCutoffGroups = cutoffGroups;
+ }
+ if (parts.size() > 6) {
+ _diversityCutoffStrict = (parts[6] == "strict");
+ _valid = (parts.size() == 7);
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+template <typename T, typename D>
+bool
+QueryTermSimple::getAsNumericTerm(T & lower, T & upper, D d) const
+{
+ bool valid(empty());
+ size_t sz(_term.size());
+ if (sz) {
+ char *err(NULL);
+ T low(lower);
+ T high(upper);
+ const char * q = _term.c_str();
+ const char first(q[0]);
+ const char last(q[sz-1]);
+ q += ((first == '<') || (first == '>') || (first == '[')) ? 1 : 0;
+ T ll = d.fromstr(q, &err);
+ valid = isValid() && ((*err == 0) || (*err == ';'));
+ if (valid) {
+ if (first == '<' && (*err == 0)) {
+ high = d.nearestDownwd(ll, lower);
+ } else if (first == '>' && (*err == 0)) {
+ low = d.nearestUpward(ll, upper);
+ } else if ((first == '[') || (first == '<')) {
+ if (q != err) {
+ low = (first == '[') ? ll : d.nearestUpward(ll, upper);
+ }
+ q = err + 1;
+ T hh = d.fromstr(q, &err);
+ bool hasUpperLimit(q != err);
+ if (*err == ';') {
+ err = const_cast<char *>(_term.end() - 1);
+ }
+ valid = (*err == last) && ((last == ']') || (last == '>'));
+ if (hasUpperLimit) {
+ high = (last == ']') ? hh : d.nearestDownwd(hh, lower);
+ }
+ } else {
+ low = high = ll;
+ }
+ }
+ if (valid) {
+ lower = low;
+ upper = high;
+ }
+ }
+ return valid;
+}
+
+vespalib::string
+QueryTermSimple::getClassName() const
+{
+ vespalib::string name(typeid(*this).name());
+ int status = 0;
+ size_t size = 0;
+ char *unmangled = abi::__cxa_demangle(name.c_str(), 0, &size, &status);
+ vespalib::string result(unmangled);
+ free(unmangled);
+ return result;
+}
+
+}
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::QueryTermSimple *obj)
+{
+ if (obj != 0) {
+ self.openStruct(name, obj->getClassName());
+ obj->visitMembers(self);
+ self.closeStruct();
+ } else {
+ self.visitNull(name);
+ }
+}
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::QueryTermSimple &obj)
+{
+ visit(self, name, &obj);
+}
diff --git a/searchlib/src/vespa/searchlib/query/queryterm.h b/searchlib/src/vespa/searchlib/query/queryterm.h
new file mode 100644
index 00000000000..5d8d971c9f8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/queryterm.h
@@ -0,0 +1,190 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/query/posocc.h>
+#include <vespa/searchlib/query/querynode.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/util/array.h>
+
+#include "weight.h"
+
+namespace search
+{
+
+/// An stl based ucs4 string identical to a char string.
+typedef vespalib::Array<ucs4_t> UCS4StringT;
+
+class QueryTermSimple
+{
+public:
+ typedef std::unique_ptr<QueryTermSimple> UP;
+ typedef vespalib::string string;
+ typedef vespalib::stringref stringref;
+ enum SearchTerm {
+ WORD,
+ PREFIXTERM,
+ SUBSTRINGTERM,
+ EXACTSTRINGTERM,
+ SUFFIXTERM,
+ REGEXP
+ };
+
+ template <typename N>
+ struct RangeResult {
+ N low;
+ N high;
+ bool valid; // Whether parsing of the range was successful
+ bool adjusted; // Whether the low and high was adjusted according to min and max limits of the given type.
+ RangeResult() : low(), high(), valid(true), adjusted(false) {}
+ bool isEqual() const { return low == high; }
+ };
+
+ QueryTermSimple();
+ QueryTermSimple(const string & term_, SearchTerm type);
+ virtual ~QueryTermSimple() { }
+ /**
+ * Extracts the content of this query term as a range with low and high values.
+ */
+ template <typename N>
+ RangeResult<N> getRange() const;
+ int getRangeLimit() const { return _rangeLimit; }
+ size_t getMaxPerGroup() const { return _maxPerGroup; }
+ size_t getDiversityCutoffGroups() const { return _diversityCutoffGroups; }
+ bool getDiversityCutoffStrict() const { return _diversityCutoffStrict; }
+ vespalib::stringref getDiversityAttribute() const { return _diversityAttribute; }
+ bool getAsIntegerTerm(int64_t & lower, int64_t & upper) const;
+ bool getAsDoubleTerm(double & lower, double & upper) const;
+ const char * getTerm() const { return _term.c_str(); }
+ bool isPrefix() const { return (_type == PREFIXTERM); }
+ bool isSubstring() const { return (_type == SUBSTRINGTERM); }
+ bool isExactstring() const { return (_type == EXACTSTRINGTERM); }
+ bool isSuffix() const { return (_type == SUFFIXTERM); }
+ bool isWord() const { return (_type == WORD); }
+ bool isRegex() const { return (_type == REGEXP); }
+ bool empty() const { return _term.empty(); }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ vespalib::string getClassName() const;
+ bool isValid() const { return _valid; }
+private:
+ bool getRangeInternal(int64_t & low, int64_t & high) const;
+ template <typename N>
+ RangeResult<N> getIntegerRange() const;
+ template <typename N>
+ RangeResult<N> getFloatRange() const;
+ SearchTerm _type;
+ string _term;
+ stringref _diversityAttribute;
+ int _rangeLimit;
+ uint32_t _maxPerGroup;
+ uint32_t _diversityCutoffGroups;
+ bool _diversityCutoffStrict;
+ bool _valid;
+ template <typename T, typename D>
+ bool getAsNumericTerm(T & lower, T & upper, D d) const;
+};
+
+class QueryTermBase : public QueryTermSimple
+{
+public:
+ typedef std::unique_ptr<QueryTermBase> UP;
+ QueryTermBase();
+ QueryTermBase(const string & term_, SearchTerm type);
+ size_t getTermLen() const { return _cachedTermLen; }
+ size_t term(const char * & t) const { t = getTerm(); return _cachedTermLen; }
+ size_t term(const ucs4_t * & t) const { t = _termUCS4.begin(); return _cachedTermLen; }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+private:
+ size_t _cachedTermLen;
+ UCS4StringT _termUCS4;
+};
+
+/**
+ This is a leaf in the Query tree. All terms are leafs.
+ A QueryTerm has the index for where to find the term. The term is a string,
+ both char(utf8) and ucs4. There are flags indicating encoding. And there are
+ flags indicating if it should be considered a prefix.
+*/
+class QueryTerm : public QueryTermBase, public QueryNode
+{
+public:
+ typedef std::unique_ptr<QueryTerm> UP;
+ class EncodingBitMap
+ {
+ public:
+ EncodingBitMap(unsigned bm=0) : _enc(bm) { }
+ bool isFloat() const { return _enc & Float; }
+ bool isBase10Integer() const { return _enc & Base10Integer; }
+ bool isAscii7Bit() const { return _enc & Ascii7Bit; }
+ void setBase10Integer(bool v) { if (v) _enc |= Base10Integer; else _enc &= ~Base10Integer; }
+ void setAscii7Bit(bool v) { if (v) _enc |= Ascii7Bit; else _enc &= ~Ascii7Bit; }
+ void setFloat(bool v) { if (v) _enc |= Float; else _enc &= ~Float; }
+ private:
+ enum { Ascii7Bit=0x01, Base10Integer=0x02, Float=0x04 };
+ unsigned _enc;
+ };
+ class FieldInfo {
+ public:
+ FieldInfo() : _hitListOffset(0), _hitCount(0), _fieldLength(0) { }
+ FieldInfo(uint32_t hitListOffset, uint32_t hitCount, uint32_t fieldLength) :
+ _hitListOffset(hitListOffset), _hitCount(hitCount), _fieldLength(fieldLength) { }
+ size_t getHitOffset() const { return _hitListOffset; }
+ size_t getHitCount() const { return _hitCount; }
+ size_t getFieldLength() const { return _fieldLength; }
+ void setHitOffset(size_t v) { _hitListOffset = v; }
+ void setHitCount(size_t v) { _hitCount = v; }
+ void setFieldLength(size_t v) { _fieldLength = v; }
+ private:
+ uint32_t _hitListOffset;
+ uint32_t _hitCount;
+ uint32_t _fieldLength;
+ };
+ DECLARE_IDENTIFIABLE_NS(search, QueryTerm);
+ QueryTerm();
+ QueryTerm(const QueryNodeResultBase & org, const string & term, const string & index, SearchTerm type);
+ virtual ~QueryTerm() { }
+ virtual bool evaluate() const;
+ virtual const HitList & evaluateHits(HitList & hl) const;
+ virtual void reset();
+ virtual void getLeafs(QueryTermList & tl);
+ virtual void getLeafs(ConstQueryTermList & tl) const;
+ /// Gives you all phrases of this tree.
+ virtual void getPhrases(QueryNodeRefList & tl);
+ /// Gives you all phrases of this tree. Indicating that they are all const.
+ virtual void getPhrases(ConstQueryNodeRefList & tl) const;
+
+ void add(unsigned pos, unsigned context, int32_t weight);
+ EncodingBitMap encoding() const { return _encoding; }
+ size_t termLen() const { return getTermLen(); }
+ const string & index() const { return _index; }
+ void setWeight(query::Weight v) { _weight = v; }
+ void setUniqueId(uint32_t u) { _uniqueId = u; }
+ query::Weight weight() const { return _weight; }
+ uint32_t uniqueId() const { return _uniqueId; }
+ void resizeFieldId(size_t fieldId);
+ const FieldInfo & getFieldInfo(size_t fid) const { return _fieldInfo[fid]; }
+ FieldInfo & getFieldInfo(size_t fid) { return _fieldInfo[fid]; }
+ size_t getFieldInfoSize() const { return _fieldInfo.size(); }
+ const QueryNodeResultBase & getQueryItem() const { return *_result; }
+ QueryNodeResultBase & getQueryItem() { return *_result; }
+ const HitList & getHitList() const { return _hitList; }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ virtual void setIndex(const string & index_) { _index = index_; }
+ virtual const string & getIndex() const { return _index; }
+protected:
+ string _index;
+ EncodingBitMap _encoding;
+ QueryNodeResultBaseContainer _result;
+ HitList _hitList;
+private:
+ query::Weight _weight;
+ uint32_t _uniqueId;
+ std::vector<FieldInfo> _fieldInfo;
+};
+
+}
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::QueryTermSimple &obj);
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::QueryTermSimple *obj);
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/.gitignore b/searchlib/src/vespa/searchlib/query/tree/.gitignore
new file mode 100644
index 00000000000..583460ae288
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/.gitignore
@@ -0,0 +1,3 @@
+*.So
+.depend
+Makefile
diff --git a/searchlib/src/vespa/searchlib/query/tree/CMakeLists.txt b/searchlib/src/vespa/searchlib/query/tree/CMakeLists.txt
new file mode 100644
index 00000000000..3f7f5bdb3af
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/CMakeLists.txt
@@ -0,0 +1,13 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_tree OBJECT
+ SOURCES
+ intermediate.cpp
+ intermediatenodes.cpp
+ querybuilder.cpp
+ stackdumpcreator.cpp
+ term.cpp
+ location.cpp
+ range.cpp
+ termnodes.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/query/tree/OWNERS b/searchlib/src/vespa/searchlib/query/tree/OWNERS
new file mode 100644
index 00000000000..12b533ec610
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/OWNERS
@@ -0,0 +1 @@
+havardpe
diff --git a/searchlib/src/vespa/searchlib/query/tree/customtypetermvisitor.h b/searchlib/src/vespa/searchlib/query/tree/customtypetermvisitor.h
new file mode 100644
index 00000000000..ca58b6c1dce
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/customtypetermvisitor.h
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/query/tree/customtypevisitor.h>
+#include <vespa/searchlib/query/tree/intermediate.h>
+
+namespace search {
+namespace query {
+
+template <class NodeTypes>
+class CustomTypeTermVisitor : public CustomTypeVisitor<NodeTypes>
+{
+protected:
+ void visitChildren(Intermediate &n) {
+ for (size_t i = 0; i < n.getChildren().size(); ++i) {
+ n.getChildren()[i]->accept(*this);
+ }
+ }
+
+private:
+ virtual void visit(typename NodeTypes::And &n) { visitChildren(n); }
+ virtual void visit(typename NodeTypes::AndNot &n) { visitChildren(n); }
+ virtual void visit(typename NodeTypes::Equiv &n) { visitChildren(n); }
+ virtual void visit(typename NodeTypes::Near &n) { visitChildren(n); }
+ virtual void visit(typename NodeTypes::ONear &n) { visitChildren(n); }
+ virtual void visit(typename NodeTypes::Or &n) { visitChildren(n); }
+ virtual void visit(typename NodeTypes::Rank &n) { visitChildren(n); }
+ virtual void visit(typename NodeTypes::WeakAnd &n) { visitChildren(n); }
+
+ // phrases and weighted set terms are conceptual leaf nodes and
+ // should be handled that way.
+};
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h b/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h
new file mode 100644
index 00000000000..ce1cff082b7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "queryvisitor.h"
+
+namespace search {
+namespace query {
+
+/**
+ * By typedefing a (complete) set of subclasses to the query nodes in
+ * a traits class, you can get the CustomTypeVisitor to visit those
+ * types instead of their base classes.
+ *
+ * The traits class must define the following types:
+ * And, AndNot, Equiv, NumberTerm, Near, ONear, Or,
+ * Phrase, PrefixTerm, RangeTerm, Rank, StringTerm, SubstringTerm,
+ * SuffixTerm, WeakAnd, WeightedSetTerm, DotProduct, RegExpTerm
+ *
+ * See customtypevisitor_test.cpp for an example.
+ *
+ * Please note that your CustomTypeVisitor<T> subclass should NOT
+ * implement any of the regular QueryVisitor member functions, as this
+ * would interfere with the routing.
+ */
+template <class NodeTypes>
+class CustomTypeVisitor : public QueryVisitor {
+public:
+ virtual ~CustomTypeVisitor() {}
+
+ virtual void visit(typename NodeTypes::And &) = 0;
+ virtual void visit(typename NodeTypes::AndNot &) = 0;
+ virtual void visit(typename NodeTypes::Equiv &) = 0;
+ virtual void visit(typename NodeTypes::NumberTerm &) = 0;
+ virtual void visit(typename NodeTypes::LocationTerm &) = 0;
+ virtual void visit(typename NodeTypes::Near &) = 0;
+ virtual void visit(typename NodeTypes::ONear &) = 0;
+ virtual void visit(typename NodeTypes::Or &) = 0;
+ virtual void visit(typename NodeTypes::Phrase &) = 0;
+ virtual void visit(typename NodeTypes::PrefixTerm &) = 0;
+ virtual void visit(typename NodeTypes::RangeTerm &) = 0;
+ virtual void visit(typename NodeTypes::Rank &) = 0;
+ virtual void visit(typename NodeTypes::StringTerm &) = 0;
+ virtual void visit(typename NodeTypes::SubstringTerm &) = 0;
+ virtual void visit(typename NodeTypes::SuffixTerm &) = 0;
+ virtual void visit(typename NodeTypes::WeakAnd &) = 0;
+ virtual void visit(typename NodeTypes::WeightedSetTerm &) = 0;
+ virtual void visit(typename NodeTypes::DotProduct &) = 0;
+ virtual void visit(typename NodeTypes::WandTerm &) = 0;
+ virtual void visit(typename NodeTypes::PredicateQuery &) = 0;
+ virtual void visit(typename NodeTypes::RegExpTerm &) = 0;
+
+private:
+ // Route QueryVisit requests to the correct custom type.
+
+ typedef typename NodeTypes::And TAnd;
+ typedef typename NodeTypes::AndNot TAndNot;
+ typedef typename NodeTypes::Equiv TEquiv;
+ typedef typename NodeTypes::NumberTerm TNumberTerm;
+ typedef typename NodeTypes::LocationTerm TLocTrm;
+ typedef typename NodeTypes::Near TNear;
+ typedef typename NodeTypes::ONear TONear;
+ typedef typename NodeTypes::Or TOr;
+ typedef typename NodeTypes::Phrase TPhrase;
+ typedef typename NodeTypes::PrefixTerm TPrefixTerm;
+ typedef typename NodeTypes::RangeTerm TRangeTerm;
+ typedef typename NodeTypes::Rank TRank;
+ typedef typename NodeTypes::StringTerm TStringTerm;
+ typedef typename NodeTypes::SubstringTerm TSubstrTr;
+ typedef typename NodeTypes::SuffixTerm TSuffixTerm;
+ typedef typename NodeTypes::WeakAnd TWeakAnd;
+ typedef typename NodeTypes::WeightedSetTerm TWeightedSetTerm;
+ typedef typename NodeTypes::DotProduct TDotProduct;
+ typedef typename NodeTypes::WandTerm TWandTerm;
+ typedef typename NodeTypes::PredicateQuery TPredicateQuery;
+ typedef typename NodeTypes::RegExpTerm TRegExpTerm;
+
+ virtual void visit(And &n) { visit(static_cast<TAnd&>(n)); }
+ virtual void visit(AndNot &n) { visit(static_cast<TAndNot&>(n)); }
+ virtual void visit(Equiv &n) { visit(static_cast<TEquiv&>(n)); }
+ virtual void visit(NumberTerm &n) { visit(static_cast<TNumberTerm&>(n)); }
+ virtual void visit(LocationTerm &n) { visit(static_cast<TLocTrm&>(n)); }
+ virtual void visit(Near &n) { visit(static_cast<TNear&>(n)); }
+ virtual void visit(ONear &n) { visit(static_cast<TONear&>(n)); }
+ virtual void visit(Or &n) { visit(static_cast<TOr&>(n)); }
+ virtual void visit(Phrase &n) { visit(static_cast<TPhrase&>(n)); }
+ virtual void visit(PrefixTerm &n) { visit(static_cast<TPrefixTerm&>(n)); }
+ virtual void visit(RangeTerm &n) { visit(static_cast<TRangeTerm&>(n)); }
+ virtual void visit(Rank &n) { visit(static_cast<TRank&>(n)); }
+ virtual void visit(StringTerm &n) { visit(static_cast<TStringTerm&>(n)); }
+ virtual void visit(SubstringTerm &n) { visit(static_cast<TSubstrTr&>(n)); }
+ virtual void visit(SuffixTerm &n) { visit(static_cast<TSuffixTerm&>(n)); }
+ virtual void visit(WeakAnd &n) { visit(static_cast<TWeakAnd&>(n)); }
+ virtual void visit(WeightedSetTerm &n)
+ { visit(static_cast<TWeightedSetTerm&>(n)); }
+ virtual void visit(DotProduct &n) { visit(static_cast<TDotProduct&>(n)); }
+ virtual void visit(WandTerm &n) { visit(static_cast<TWandTerm&>(n)); }
+ virtual void visit(PredicateQuery &n)
+ { visit(static_cast<TPredicateQuery&>(n)); }
+ virtual void visit(RegExpTerm &n) { visit(static_cast<TRegExpTerm&>(n)); }
+};
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/intermediate.cpp b/searchlib/src/vespa/searchlib/query/tree/intermediate.cpp
new file mode 100644
index 00000000000..2f4d6b35be7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/intermediate.cpp
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/query/tree/intermediate.h>
+#include <vespa/searchlib/query/tree/term.h>
+
+namespace search {
+namespace query {
+
+Intermediate::~Intermediate() {
+ for (size_t i = 0; i < _children.size(); ++i) {
+ delete _children[i];
+ }
+}
+
+Intermediate &Intermediate::append(Node::UP child)
+{
+ _children.push_back(child.release());
+ return *this;
+}
+
+} // namespace query
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/query/tree/intermediate.h b/searchlib/src/vespa/searchlib/query/tree/intermediate.h
new file mode 100644
index 00000000000..4ee6d30445e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/intermediate.h
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vector>
+#include <vespa/searchlib/query/tree/node.h>
+
+namespace search {
+namespace query {
+
+class Intermediate : public Node
+{
+ std::vector<Node *> _children;
+ public:
+ typedef std::unique_ptr<Intermediate> UP;
+
+ Intermediate(const Intermediate & rhs) = delete;
+ Intermediate & operator = (const Intermediate & rhs) = delete;
+
+ Intermediate() = default;
+ virtual ~Intermediate() = 0;
+
+ const std::vector<Node *> &getChildren() const { return _children; }
+ Intermediate &reserve(size_t sz) { _children.reserve(sz); return *this; }
+ Intermediate &append(Node::UP child);
+};
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp
new file mode 100644
index 00000000000..ba1485107b1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".intermediatenodes");
+
+#include "intermediatenodes.h"
+
+namespace search {
+namespace query {
+
+And::~And() {}
+
+AndNot::~AndNot() {}
+
+Or::~Or() {}
+
+WeakAnd::~WeakAnd() {}
+
+Equiv::~Equiv() {}
+
+Rank::~Rank() {}
+
+Near::~Near() {}
+
+ONear::~ONear() {}
+
+Phrase::~Phrase() {}
+
+WeightedSetTerm::~WeightedSetTerm() {}
+
+DotProduct::~DotProduct() {}
+
+WandTerm::~WandTerm() {}
+
+} // namespace query
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h
new file mode 100644
index 00000000000..29b0e8f8af7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h
@@ -0,0 +1,143 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "intermediate.h"
+#include "querynodemixin.h"
+#include "term.h"
+#include <vespa/searchlib/query/weight.h>
+#include <string>
+
+namespace search {
+namespace query {
+
+class And : public QueryNodeMixin<And, Intermediate> {
+public:
+ virtual ~And() = 0;
+};
+
+//-----------------------------------------------------------------------------
+
+class AndNot : public QueryNodeMixin<AndNot, Intermediate> {
+public:
+ virtual ~AndNot() = 0;
+};
+
+//-----------------------------------------------------------------------------
+
+class Or : public QueryNodeMixin<Or, Intermediate> {
+public:
+ virtual ~Or() = 0;
+};
+
+//-----------------------------------------------------------------------------
+
+class WeakAnd : public QueryNodeMixin<WeakAnd, Intermediate> {
+ uint32_t _minHits;
+ vespalib::string _view;
+public:
+ virtual ~WeakAnd() = 0;
+
+ WeakAnd(uint32_t minHits, const vespalib::string & view) : _minHits(minHits), _view(view) {}
+
+ uint32_t getMinHits() const { return _minHits; }
+ const vespalib::string & getView() const { return _view; }
+};
+
+//-----------------------------------------------------------------------------
+
+class Equiv : public QueryNodeMixin<Equiv, Intermediate> {
+private:
+ int32_t _id;
+ Weight _weight;
+ int32_t _term_index;
+public:
+ virtual ~Equiv() = 0;
+
+ Equiv(int32_t id, Weight weight)
+ : _id(id), _weight(weight), _term_index(-1)
+ {}
+ void setTermIndex(int32_t term_index) { _term_index = term_index; }
+
+ Weight getWeight() const { return _weight; }
+ int32_t getId() const { return _id; }
+ int32_t getTermIndex() const { return _term_index; }
+};
+
+//-----------------------------------------------------------------------------
+
+class Rank : public QueryNodeMixin<Rank, Intermediate> {
+public:
+ virtual ~Rank() = 0;
+};
+
+//-----------------------------------------------------------------------------
+
+class Near : public QueryNodeMixin<Near, Intermediate>
+{
+ uint32_t _distance;
+
+ public:
+ Near(size_t distance) : _distance(distance) {}
+ virtual ~Near() = 0;
+
+ size_t getDistance() const { return _distance; }
+};
+
+//-----------------------------------------------------------------------------
+
+class ONear : public QueryNodeMixin<ONear, Intermediate>
+{
+ uint32_t _distance;
+
+ public:
+ ONear(size_t distance) : _distance(distance) {}
+ virtual ~ONear() = 0;
+
+ size_t getDistance() const { return _distance; }
+};
+
+//-----------------------------------------------------------------------------
+
+class Phrase : public QueryNodeMixin<Phrase, Intermediate>, public Term {
+public:
+ Phrase(const vespalib::string &view, int32_t id, Weight weight)
+ : Term(view, id, weight) {}
+ virtual ~Phrase() = 0;
+};
+
+class WeightedSetTerm : public QueryNodeMixin<WeightedSetTerm, Intermediate>, public Term {
+public:
+ WeightedSetTerm(const vespalib::string &view, int32_t id, Weight weight)
+ : Term(view, id, weight) {}
+ virtual ~WeightedSetTerm() = 0;
+};
+
+class DotProduct : public QueryNodeMixin<DotProduct, Intermediate>, public Term {
+public:
+ DotProduct(const vespalib::string &view, int32_t id, Weight weight)
+ : Term(view, id, weight) {}
+ virtual ~DotProduct() = 0;
+};
+
+class WandTerm : public QueryNodeMixin<WandTerm, Intermediate>, public Term {
+private:
+ uint32_t _targetNumHits;
+ int64_t _scoreThreshold;
+ double _thresholdBoostFactor;
+public:
+ WandTerm(const vespalib::string &view, int32_t id, Weight weight,
+ uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor)
+ : Term(view, id, weight),
+ _targetNumHits(targetNumHits),
+ _scoreThreshold(scoreThreshold),
+ _thresholdBoostFactor(thresholdBoostFactor) {}
+ virtual ~WandTerm() = 0;
+ uint32_t getTargetNumHits() const { return _targetNumHits; }
+ int64_t getScoreThreshold() const { return _scoreThreshold; }
+ double getThresholdBoostFactor() const { return _thresholdBoostFactor; }
+};
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/location.cpp b/searchlib/src/vespa/searchlib/query/tree/location.cpp
new file mode 100644
index 00000000000..9e8a5d59147
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/location.cpp
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "location.h"
+#include "point.h"
+#include "rectangle.h"
+
+using vespalib::asciistream;
+
+namespace search {
+namespace query {
+
+Location::Location(const Point &point, uint32_t max_dist, uint32_t x_aspect) {
+ asciistream loc;
+ loc << "(2" // dimensionality
+ << "," << point.x
+ << "," << point.y
+ << "," << max_dist
+ << "," << "0" // table id.
+ << "," << "1" // rank multiplier.
+ << "," << "0" // rank only on distance.
+ << "," << x_aspect // x aspect.
+ << ")";
+ _location_string = loc.str();
+}
+
+Location::Location(const Rectangle &rect,
+ const Point &point, uint32_t max_dist, uint32_t x_aspect)
+{
+ asciistream loc;
+ loc << "(2" // dimensionality
+ << "," << point.x
+ << "," << point.y
+ << "," << max_dist
+ << "," << "0" // table id.
+ << "," << "1" // rank multiplier.
+ << "," << "0" // rank only on distance.
+ << "," << x_aspect // x aspect.
+ << ")";
+ loc << "[2," << rect.left
+ << "," << rect.top
+ << "," << rect.right
+ << "," << rect.bottom
+ << "]" ;
+ _location_string = loc.str();
+
+}
+
+
+Location::Location(const Rectangle &rect) {
+ asciistream loc;
+ loc << "[2," << rect.left
+ << "," << rect.top
+ << "," << rect.right
+ << "," << rect.bottom
+ << "]" ;
+ _location_string = loc.str();
+}
+
+} // namespace query
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/query/tree/location.h b/searchlib/src/vespa/searchlib/query/tree/location.h
new file mode 100644
index 00000000000..8941fcf0b0e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/location.h
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+
+namespace search {
+namespace query {
+class Point;
+class Rectangle;
+
+class Location {
+ vespalib::string _location_string;
+
+public:
+ Location() : _location_string() {}
+ Location(const Point &p, uint32_t dist, uint32_t x_asp);
+ Location(const Rectangle &rect);
+ Location(const Rectangle &rect,
+ const Point &p, uint32_t dist, uint32_t x_asp);
+ Location(const vespalib::string &s) : _location_string(s) {}
+
+ bool operator==(const Location &other) const {
+ return _location_string == other._location_string;
+ }
+ const vespalib::string &getLocationString() const
+ { return _location_string; }
+};
+
+inline vespalib::asciistream &operator<<(vespalib::asciistream &out, const Location &loc) {
+ return out << loc.getLocationString();
+}
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/node.h b/searchlib/src/vespa/searchlib/query/tree/node.h
new file mode 100644
index 00000000000..5af76c4e7ef
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/node.h
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <memory>
+
+namespace search {
+namespace query {
+
+class QueryVisitor;
+
+/**
+ This is the base of any node in the query tree. Both leaf nodes (terms)
+ and operator nodes (AND, NOT, OR, PHRASE, NEAR, ONEAR, etc).
+*/
+class Node {
+ public:
+ typedef std::unique_ptr<Node> UP;
+
+ virtual ~Node() {}
+
+ virtual void accept(QueryVisitor &visitor) = 0;
+};
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/point.h b/searchlib/src/vespa/searchlib/query/tree/point.h
new file mode 100644
index 00000000000..8490ae1ec20
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/point.h
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <stdint.h>
+
+namespace search {
+namespace query {
+
+struct Point {
+ int64_t x;
+ int64_t y;
+ Point() : x(0), y(0) {}
+ Point(int64_t x_in, int64_t y_in) : x(x_in), y(y_in) {}
+};
+
+inline bool operator==(const Point &p1, const Point &p2) {
+ return p1.x == p2.x && p1.y == p2.y;
+}
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h b/searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h
new file mode 100644
index 00000000000..6868b039307
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h
@@ -0,0 +1,76 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <memory>
+#include <vector>
+
+namespace search {
+namespace query {
+
+/**
+ * Represents a predicate query, with features and range features.
+ */
+class PredicateQueryTerm {
+ static const uint64_t ALL_SUB_QUERIES = 0xffffffffffffffffULL;
+
+ template <typename ValueType>
+ class Entry {
+ vespalib::string _key;
+ ValueType _value;
+ uint64_t _sub_query_bitmap;
+
+ public:
+ Entry(const vespalib::string &key, const ValueType &value,
+ uint64_t sub_query_bitmap = ALL_SUB_QUERIES)
+ : _key(key), _value(value), _sub_query_bitmap(sub_query_bitmap) {}
+
+ vespalib::string getKey() const { return _key; }
+ ValueType getValue() const { return _value; }
+ uint64_t getSubQueryBitmap() const { return _sub_query_bitmap; }
+ bool operator==(const Entry<ValueType> &other) const {
+ return _key == other._key
+ && _value == other._value
+ && _sub_query_bitmap == other._sub_query_bitmap;
+ }
+ };
+
+ std::vector<Entry<vespalib::string>> _features;
+ std::vector<Entry<uint64_t>> _range_features;
+
+public:
+ typedef std::unique_ptr<PredicateQueryTerm> UP;
+
+ PredicateQueryTerm() : _features(), _range_features() {}
+
+ PredicateQueryTerm(const std::vector<Entry<vespalib::string>> &features,
+ const std::vector<Entry<uint64_t>> &range_features)
+ : _features(features),
+ _range_features(range_features) {
+ }
+
+ void addFeature(const vespalib::string &key, const vespalib::string &value,
+ uint64_t sub_query_bitmask = ALL_SUB_QUERIES) {
+ _features.emplace_back(key, value, sub_query_bitmask);
+ }
+
+ void addRangeFeature(const vespalib::string &key, uint64_t value,
+ uint64_t sub_query_bitmask = ALL_SUB_QUERIES) {
+ _range_features.emplace_back(key, value, sub_query_bitmask);
+ }
+
+ const std::vector<Entry<vespalib::string>> &getFeatures() const
+ { return _features; }
+ const std::vector<Entry<uint64_t>> &getRangeFeatures() const
+ { return _range_features; }
+
+ bool operator==(const PredicateQueryTerm &other) const {
+ return _features == other._features
+ && _range_features == other._range_features;
+ }
+};
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/querybuilder.cpp b/searchlib/src/vespa/searchlib/query/tree/querybuilder.cpp
new file mode 100644
index 00000000000..ae8c2012049
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/querybuilder.cpp
@@ -0,0 +1,103 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".querybuilder");
+
+#include "querybuilder.h"
+
+#include "intermediate.h"
+
+using vespalib::string;
+using namespace search::query;
+
+void QueryBuilderBase::reportError(const vespalib::string &msg) {
+ if (!hasError()) {
+ _error_msg = msg;
+ }
+}
+
+QueryBuilderBase::QueryBuilderBase()
+ : _root(),
+ _nodes(),
+ _error_msg() {
+}
+
+QueryBuilderBase::~QueryBuilderBase() {
+ reset();
+}
+
+void QueryBuilderBase::addCompleteNode(Node *n)
+{
+ Node::UP node(n);
+
+ if (hasError()) {
+ return;
+ }
+ if (_nodes.empty()) {
+ if (!_root.get()) {
+ _root = std::move(node);
+ return;
+ }
+ reportError("QueryBuilder got invalid node structure.");
+ return;
+ }
+
+ assert(_nodes.top().remaining_child_count > 0);
+ _nodes.top().node->append(std::move(node));
+ if (--_nodes.top().remaining_child_count == 0) {
+ Node *completed(_nodes.top().node);
+ _nodes.pop();
+ addCompleteNode(completed);
+ }
+}
+
+void QueryBuilderBase::addIntermediateNode(Intermediate *n, int child_count)
+{
+ Intermediate::UP node(n);
+ if (!hasError()) {
+ if (_root.get()) {
+ reportError("QueryBuilder got invalid node structure.");
+ } else {
+ node->reserve(child_count);
+ WeightOverride weight_override;
+ if (!_nodes.empty()) {
+ weight_override = _nodes.top().weight_override;
+ }
+ _nodes.push(NodeInfo(node.release(), child_count));
+ _nodes.top().weight_override = weight_override;
+ if (child_count == 0) {
+ Node *completed(_nodes.top().node);
+ _nodes.pop();
+ addCompleteNode(completed);
+ }
+ }
+ }
+}
+
+void QueryBuilderBase::setWeightOverride(const Weight &weight) {
+ assert(!_nodes.empty());
+ _nodes.top().weight_override = WeightOverride(weight);
+}
+
+Node::UP QueryBuilderBase::build() {
+ if (!_root.get()) {
+ reportError("Trying to build incomplete query tree.");
+ }
+ if (!_nodes.empty()) {
+ reportError("QueryBuilder got invalid node structure.");
+ }
+ if (hasError()) {
+ return Node::UP();
+ }
+ return std::move(_root);
+}
+
+void QueryBuilderBase::reset() {
+ while (!_nodes.empty()) {
+ delete _nodes.top().node;
+ _nodes.pop();
+ }
+ _root.reset(0);
+ _error_msg = "";
+}
diff --git a/searchlib/src/vespa/searchlib/query/tree/querybuilder.h b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h
new file mode 100644
index 00000000000..b5cbdb07a13
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h
@@ -0,0 +1,358 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/*
+ * The QueryBuilder builds a query tree. The exact type of the nodes
+ * in the tree is defined by a traits class, which defines the actual
+ * subclasses of the query nodes to use. Simple subclasses are defined
+ * in "simplequery.h"
+ *
+ * To create a QueryBuilder that uses the simple query nodes, create
+ * the builder like this:
+ *
+ * QueryBuilder<SimpleQueryNodeTypes> builder;
+ *
+ * Query trees are built using prefix traversal, e.g:
+ * builder.addOr(2); // Two children
+ * builder.addStringTerm(term, view, id, weight);
+ * builder.addStringTerm(term, view, id, weight);
+ * Node::UP node = builder.build();
+ */
+
+#pragma once
+
+#include "predicate_query_term.h"
+#include <stack>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/searchlib/query/weight.h>
+#include "node.h"
+
+namespace search {
+namespace query {
+
+class Intermediate;
+class Location;
+class Range;
+
+class QueryBuilderBase
+{
+ class WeightOverride {
+ bool _active;
+ Weight _weight;
+ public:
+ WeightOverride() : _active(false), _weight(0) {}
+ WeightOverride(Weight weight) : _active(true), _weight(weight) {}
+ void adjustWeight(Weight &weight) const { if (_active) weight = _weight; }
+ };
+ struct NodeInfo {
+ Intermediate *node;
+ int remaining_child_count;
+ WeightOverride weight_override;
+ NodeInfo(Intermediate *n, int c) : node(n), remaining_child_count(c) {}
+ };
+ Node::UP _root;
+ std::stack<NodeInfo> _nodes;
+ vespalib::string _error_msg;
+
+ void reportError(const vespalib::string &msg);
+
+protected:
+ QueryBuilderBase();
+ ~QueryBuilderBase();
+
+ // Takes ownership of node.
+ void addCompleteNode(Node *node);
+ // Takes ownership of node.
+ void addIntermediateNode(Intermediate *node, int child_count);
+ // Activates a weight override for the current intermediate node.
+ void setWeightOverride(const Weight &weight);
+ // Resets weight if a weight override is active.
+ void adjustWeight(Weight &weight) const {
+ if (!_nodes.empty()) {
+ _nodes.top().weight_override.adjustWeight(weight);
+ }
+ }
+
+public:
+ /**
+ * Builds the query tree. Returns 0 if something went wrong.
+ */
+ Node::UP build();
+
+ /**
+ * Checks if an error has occurred.
+ */
+ bool hasError() const { return !_error_msg.empty(); }
+
+ /**
+ * If build failed, the reason is stored here.
+ */
+ vespalib::string error() { return _error_msg; }
+
+ /**
+ * After an error, reset() must be called before attempting to
+ * build a new query tree with the same builder.
+ */
+ void reset();
+};
+
+
+// These template functions create nodes based on a traits class.
+// You may specialize these functions for your own traits class to have full
+// control of the query node instantiation.
+
+// Intermediate nodes
+template <class NodeTypes>
+typename NodeTypes::And *createAnd() { return new typename NodeTypes::And; }
+
+template <class NodeTypes>
+typename NodeTypes::AndNot *
+createAndNot() { return new typename NodeTypes::AndNot; }
+
+template <class NodeTypes>
+typename NodeTypes::Or *createOr() { return new typename NodeTypes::Or; }
+
+template <class NodeTypes>
+typename NodeTypes::WeakAnd *createWeakAnd(uint32_t minHits, const vespalib::stringref & view) {
+ return new typename NodeTypes::WeakAnd(minHits, view);
+}
+template <class NodeTypes>
+typename NodeTypes::Equiv *createEquiv(int32_t id, Weight weight) {
+ return new typename NodeTypes::Equiv(id, weight);
+}
+template <class NodeTypes>
+typename NodeTypes::Phrase *createPhrase(
+ const vespalib::stringref &view, int32_t id, Weight weight) {
+ return new typename NodeTypes::Phrase(view, id, weight);
+}
+template <class NodeTypes>
+typename NodeTypes::WeightedSetTerm *createWeightedSetTerm(
+ const vespalib::stringref &view, int32_t id, Weight weight) {
+ return new typename NodeTypes::WeightedSetTerm(view, id, weight);
+}
+template <class NodeTypes>
+typename NodeTypes::DotProduct *createDotProduct(
+ const vespalib::stringref &view, int32_t id, Weight weight) {
+ return new typename NodeTypes::DotProduct(view, id, weight);
+}
+template <class NodeTypes>
+typename NodeTypes::WandTerm *createWandTerm(
+ const vespalib::stringref &view, int32_t id, Weight weight,
+ uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) {
+ return new typename NodeTypes::WandTerm(view, id, weight,
+ targetNumHits, scoreThreshold, thresholdBoostFactor);
+}
+template <class NodeTypes>
+typename NodeTypes::Rank *createRank() { return new typename NodeTypes::Rank; }
+
+template <class NodeTypes>
+typename NodeTypes::Near *createNear(size_t distance) {
+ return new typename NodeTypes::Near(distance);
+}
+template <class NodeTypes>
+typename NodeTypes::ONear *createONear(size_t distance) {
+ return new typename NodeTypes::ONear(distance);
+}
+
+// Term nodes
+template <class NodeTypes>
+typename NodeTypes::NumberTerm *createNumberTerm(
+ const vespalib::stringref &term, const vespalib::stringref &view, int32_t id, Weight weight)
+{
+ return new typename NodeTypes::NumberTerm(term, view, id, weight);
+}
+template <class NodeTypes>
+typename NodeTypes::PrefixTerm *createPrefixTerm(
+ const vespalib::stringref &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+{
+ return new typename NodeTypes::PrefixTerm(term, view, id, weight);
+}
+template <class NodeTypes>
+typename NodeTypes::RangeTerm *createRangeTerm(
+ const Range &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+{
+ return new typename NodeTypes::RangeTerm(term, view, id, weight);
+}
+template <class NodeTypes>
+typename NodeTypes::StringTerm *createStringTerm(
+ const vespalib::stringref &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+{
+ return new typename NodeTypes::StringTerm(term, view, id, weight);
+}
+template <class NodeTypes>
+typename NodeTypes::SubstringTerm *createSubstringTerm(
+ const vespalib::stringref &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+{
+ return new typename NodeTypes::SubstringTerm(term, view, id, weight);
+}
+template <class NodeTypes>
+typename NodeTypes::SuffixTerm *createSuffixTerm(
+ const vespalib::stringref &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+{
+ return new typename NodeTypes::SuffixTerm(term, view, id, weight);
+}
+
+template <class NodeTypes>
+typename NodeTypes::LocationTerm *createLocationTerm(
+ const Location &loc, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+{
+ return new typename NodeTypes::LocationTerm(loc, view, id, weight);
+}
+
+template <class NodeTypes>
+typename NodeTypes::PredicateQuery *createPredicateQuery(
+ PredicateQueryTerm::UP term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+{
+ return new typename NodeTypes::PredicateQuery(
+ std::move(term), view, id, weight);
+}
+
+template <class NodeTypes>
+typename NodeTypes::RegExpTerm *createRegExpTerm(
+ const vespalib::stringref &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+{
+ return new typename NodeTypes::RegExpTerm(term, view, id, weight);
+}
+
+template <class NodeTypes>
+class QueryBuilder : public QueryBuilderBase {
+ template <class T>
+ T &addIntermediate(T *node, int child_count) {
+ addIntermediateNode(node, child_count);
+ return *node;
+ }
+
+ template <class T>
+ T &addTerm(T *node) {
+ addCompleteNode(node);
+ return *node;
+ }
+
+public:
+ typename NodeTypes::And &addAnd(int child_count) {
+ return addIntermediate(createAnd<NodeTypes>(), child_count);
+ }
+ typename NodeTypes::AndNot &addAndNot(int child_count) {
+ return addIntermediate(createAndNot<NodeTypes>(), child_count);
+ }
+ typename NodeTypes::Near &addNear(int child_count, size_t distance) {
+ return addIntermediate(createNear<NodeTypes>(distance), child_count);
+ }
+ typename NodeTypes::ONear &addONear(int child_count, size_t distance) {
+ return addIntermediate(createONear<NodeTypes>(distance), child_count);
+ }
+ typename NodeTypes::Or &addOr(int child_count) {
+ return addIntermediate(createOr<NodeTypes>(), child_count);
+ }
+ typename NodeTypes::WeakAnd &addWeakAnd(int child_count, uint32_t minHits, const vespalib::stringref & view) {
+ return addIntermediate(createWeakAnd<NodeTypes>(minHits, view), child_count);
+ }
+ typename NodeTypes::Equiv &addEquiv(int child_count, int32_t id, Weight weight) {
+ return addIntermediate(createEquiv<NodeTypes>(id, weight), child_count);
+ }
+ typename NodeTypes::Phrase &addPhrase(
+ int child_count, const vespalib::stringref &view,
+ int32_t id, Weight weight) {
+ adjustWeight(weight);
+ typename NodeTypes::Phrase &node = addIntermediate(
+ createPhrase<NodeTypes>(view, id, weight), child_count);
+ setWeightOverride(weight);
+ return node;
+ }
+ typename NodeTypes::WeightedSetTerm &addWeightedSetTerm(
+ int child_count, const vespalib::stringref &view,
+ int32_t id, Weight weight) {
+ adjustWeight(weight);
+ typename NodeTypes::WeightedSetTerm &node = addIntermediate(
+ createWeightedSetTerm<NodeTypes>(view, id, weight), child_count);
+ return node;
+ }
+ typename NodeTypes::DotProduct &addDotProduct(
+ int child_count, const vespalib::stringref &view,
+ int32_t id, Weight weight) {
+ adjustWeight(weight);
+ typename NodeTypes::DotProduct &node = addIntermediate(
+ createDotProduct<NodeTypes>(view, id, weight), child_count);
+ return node;
+ }
+ typename NodeTypes::WandTerm &addWandTerm(
+ int child_count, const vespalib::stringref &view,
+ int32_t id, Weight weight, uint32_t targetNumHits,
+ int64_t scoreThreshold, double thresholdBoostFactor) {
+ adjustWeight(weight);
+ typename NodeTypes::WandTerm &node = addIntermediate(
+ createWandTerm<NodeTypes>(view, id, weight,
+ targetNumHits, scoreThreshold, thresholdBoostFactor),
+ child_count);
+ return node;
+ }
+ typename NodeTypes::Rank &addRank(int child_count) {
+ return addIntermediate(createRank<NodeTypes>(), child_count);
+ }
+
+ typename NodeTypes::NumberTerm &addNumberTerm(
+ const vespalib::stringref &term, const vespalib::stringref &view,
+ int32_t id, Weight weight) {
+ adjustWeight(weight);
+ return addTerm(createNumberTerm<NodeTypes>(term, view, id, weight));
+ }
+ typename NodeTypes::PrefixTerm &addPrefixTerm(
+ const vespalib::stringref &term, const vespalib::stringref &view,
+ int32_t id, Weight weight) {
+ adjustWeight(weight);
+ return addTerm(createPrefixTerm<NodeTypes>(term, view, id, weight));
+ }
+ typename NodeTypes::RangeTerm &addRangeTerm(
+ const Range &range, const vespalib::stringref &view,
+ int32_t id, Weight weight) {
+ adjustWeight(weight);
+ return addTerm(createRangeTerm<NodeTypes>(range, view, id, weight));
+ }
+ typename NodeTypes::StringTerm &addStringTerm(
+ const vespalib::stringref &term, const vespalib::stringref &view,
+ int32_t id, Weight weight) {
+ adjustWeight(weight);
+ return addTerm(createStringTerm<NodeTypes>(term, view, id, weight));
+ }
+ typename NodeTypes::SubstringTerm &addSubstringTerm(
+ const vespalib::stringref &t, const vespalib::stringref &view,
+ int32_t id, Weight weight) {
+ adjustWeight(weight);
+ return addTerm(createSubstringTerm<NodeTypes>(t, view, id, weight));
+ }
+ typename NodeTypes::SuffixTerm &addSuffixTerm(
+ const vespalib::stringref &term, const vespalib::stringref &view,
+ int32_t id, Weight weight) {
+ adjustWeight(weight);
+ return addTerm(createSuffixTerm<NodeTypes>(term, view, id, weight));
+ }
+ typename NodeTypes::LocationTerm &addLocationTerm(
+ const Location &loc, const vespalib::stringref &view,
+ int32_t id, Weight weight) {
+ adjustWeight(weight);
+ return addTerm(createLocationTerm<NodeTypes>(loc, view, id, weight));
+ }
+ typename NodeTypes::PredicateQuery &addPredicateQuery(
+ PredicateQueryTerm::UP term, const vespalib::stringref &view,
+ int32_t id, Weight weight) {
+ adjustWeight(weight);
+ return addTerm(createPredicateQuery<NodeTypes>(
+ std::move(term), view, id, weight));
+ }
+ typename NodeTypes::RegExpTerm &addRegExpTerm(
+ const vespalib::stringref &term, const vespalib::stringref &view,
+ int32_t id, Weight weight) {
+ adjustWeight(weight);
+ return addTerm(createRegExpTerm<NodeTypes>(term, view, id, weight));
+ }
+};
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/querynodemixin.h b/searchlib/src/vespa/searchlib/query/tree/querynodemixin.h
new file mode 100644
index 00000000000..7fbcb45d742
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/querynodemixin.h
@@ -0,0 +1,28 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/query/tree/queryvisitor.h>
+
+namespace search {
+namespace query {
+
+template <typename T, typename Base>
+struct QueryNodeMixin : Base {
+ typedef QueryNodeMixin<T, Base> QueryNodeMixinType;
+
+ virtual ~QueryNodeMixin() = 0;
+ virtual void accept(QueryVisitor &visitor) {
+ visitor.visit(static_cast<T &>(*this));
+ }
+
+protected:
+ using Base::Base;
+};
+
+template <typename T, typename Base>
+QueryNodeMixin<T, Base>::~QueryNodeMixin() {}
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h
new file mode 100644
index 00000000000..38f666cb155
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h
@@ -0,0 +1,171 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "intermediatenodes.h"
+#include "querybuilder.h"
+#include "queryvisitor.h"
+#include "termnodes.h"
+
+namespace search {
+namespace query {
+
+/**
+ * Creates a new query tree based on an existing one. The traits class
+ * specifies what concrete types the query tree classes should have.
+ */
+template <class NodeTypes>
+class QueryReplicator : private QueryVisitor {
+ QueryBuilder<NodeTypes> _builder;
+
+public:
+ Node::UP replicate(const Node &node) {
+ // The visitor doesn't deal with const nodes. However, we are
+ // not changing the node, so we can safely remove the const.
+ const_cast<Node &>(node).accept(*this);
+ return _builder.build();
+ }
+
+private:
+ void visitNodes(const std::vector<Node *> &nodes) {
+ for (size_t i = 0; i < nodes.size(); ++i) {
+ nodes[i]->accept(*this);
+ }
+ }
+
+ virtual void visit(And &node) {
+ _builder.addAnd(node.getChildren().size());
+ visitNodes(node.getChildren());
+ }
+
+ virtual void visit(AndNot &node) {
+ _builder.addAndNot(node.getChildren().size());
+ visitNodes(node.getChildren());
+ }
+
+ virtual void visit(WeakAnd &node) {
+ _builder.addWeakAnd(node.getChildren().size(), node.getMinHits(), node.getView());
+ visitNodes(node.getChildren());
+ }
+
+ virtual void visit(Equiv &node) {
+ _builder.addEquiv(node.getChildren().size(), node.getId(), node.getWeight())
+ .setTermIndex(node.getTermIndex());
+ visitNodes(node.getChildren());
+ }
+
+ virtual void visit(Near &node) {
+ _builder.addNear(node.getChildren().size(), node.getDistance());
+ visitNodes(node.getChildren());
+ }
+
+ virtual void visit(ONear &node) {
+ _builder.addONear(node.getChildren().size(), node.getDistance());
+ visitNodes(node.getChildren());
+ }
+
+ virtual void visit(Or &node) {
+ _builder.addOr(node.getChildren().size());
+ visitNodes(node.getChildren());
+ }
+
+ virtual void visit(Phrase &node) {
+ replicate(node, _builder.addPhrase(node.getChildren().size(),
+ node.getView(),
+ node.getId(), node.getWeight()));
+ visitNodes(node.getChildren());
+ }
+
+ virtual void visit(WeightedSetTerm &node) {
+ replicate(node, _builder.addWeightedSetTerm(node.getChildren().size(),
+ node.getView(),
+ node.getId(), node.getWeight()));
+ visitNodes(node.getChildren());
+ }
+
+ virtual void visit(DotProduct &node) {
+ replicate(node, _builder.addDotProduct(node.getChildren().size(),
+ node.getView(),
+ node.getId(), node.getWeight()));
+ visitNodes(node.getChildren());
+ }
+
+ virtual void visit(WandTerm &node) {
+ replicate(node, _builder.addWandTerm(node.getChildren().size(),
+ node.getView(),
+ node.getId(), node.getWeight(),
+ node.getTargetNumHits(),
+ node.getScoreThreshold(),
+ node.getThresholdBoostFactor()));
+ visitNodes(node.getChildren());
+ }
+
+ virtual void visit(Rank &node) {
+ _builder.addRank(node.getChildren().size());
+ visitNodes(node.getChildren());
+ }
+
+ void replicate(const Term &original, Term &replica) {
+ replica.setTermIndex(original.getTermIndex());
+ replica.setRanked(original.isRanked());
+ }
+
+ virtual void visit(NumberTerm &node) {
+ replicate(node, _builder.addNumberTerm(
+ node.getTerm(), node.getView(),
+ node.getId(), node.getWeight()));
+ }
+
+ virtual void visit(LocationTerm &node) {
+ replicate(node,_builder.addLocationTerm(
+ node.getTerm(), node.getView(),
+ node.getId(), node.getWeight()));
+ }
+
+ virtual void visit(PrefixTerm &node) {
+ replicate(node, _builder.addPrefixTerm(
+ node.getTerm(), node.getView(),
+ node.getId(), node.getWeight()));
+ }
+
+ virtual void visit(RangeTerm &node) {
+ replicate(node, _builder.addRangeTerm(
+ node.getTerm(), node.getView(),
+ node.getId(), node.getWeight()));
+ }
+
+ virtual void visit(StringTerm &node) {
+ replicate(node, _builder.addStringTerm(
+ node.getTerm(), node.getView(),
+ node.getId(), node.getWeight()));
+ }
+
+ virtual void visit(SubstringTerm &node) {
+ replicate(node, _builder.addSubstringTerm(
+ node.getTerm(), node.getView(),
+ node.getId(), node.getWeight()));
+ }
+
+ virtual void visit(SuffixTerm &node) {
+ replicate(node, _builder.addSuffixTerm(
+ node.getTerm(), node.getView(),
+ node.getId(), node.getWeight()));
+ }
+
+ virtual void visit(PredicateQuery &node) {
+ replicate(node, _builder.addPredicateQuery(
+ PredicateQueryTerm::UP(new PredicateQueryTerm(
+ *node.getTerm())),
+ node.getView(), node.getId(), node.getWeight()));
+ }
+
+ virtual void visit(RegExpTerm &node) {
+ replicate(node, _builder.addRegExpTerm(
+ node.getTerm(), node.getView(),
+ node.getId(), node.getWeight()));
+ }
+};
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/querytreecreator.h b/searchlib/src/vespa/searchlib/query/tree/querytreecreator.h
new file mode 100644
index 00000000000..f7e997e82f4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/querytreecreator.h
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "queryreplicator.h"
+#include "stackdumpquerycreator.h"
+
+namespace search {
+namespace query {
+
+/**
+ * Holds functions for creating query trees, either from a stack dump
+ * or from another query tree. The traits specify the concrete
+ * subclasses to be used when building the tree.
+ */
+template <class NodeTypes>
+struct QueryTreeCreator {
+ static Node::UP replicate(const Node &node) {
+ return QueryReplicator<NodeTypes>().replicate(node);
+ }
+
+ static Node::UP create(search::SimpleQueryStackDumpIterator &iterator) {
+ return StackDumpQueryCreator<NodeTypes>().create(iterator);
+ }
+
+private:
+ QueryTreeCreator();
+};
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h b/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h
new file mode 100644
index 00000000000..dc24dc9e8f6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search {
+namespace query {
+
+class And;
+class AndNot;
+class Equiv;
+class NumberTerm;
+class LocationTerm;
+class Near;
+class ONear;
+class Or;
+class Phrase;
+class PrefixTerm;
+class RangeTerm;
+class Rank;
+class StringTerm;
+class SubstringTerm;
+class SuffixTerm;
+class WeakAnd;
+class WeightedSetTerm;
+class DotProduct;
+class WandTerm;
+class PredicateQuery;
+class RegExpTerm;
+
+struct QueryVisitor {
+ virtual ~QueryVisitor() {}
+
+ virtual void visit(And &) = 0;
+ virtual void visit(AndNot &) = 0;
+ virtual void visit(Equiv &) = 0;
+ virtual void visit(NumberTerm &) = 0;
+ virtual void visit(LocationTerm &) = 0;
+ virtual void visit(Near &) = 0;
+ virtual void visit(ONear &) = 0;
+ virtual void visit(Or &) = 0;
+ virtual void visit(Phrase &) = 0;
+ virtual void visit(PrefixTerm &) = 0;
+ virtual void visit(RangeTerm &) = 0;
+ virtual void visit(Rank &) = 0;
+ virtual void visit(StringTerm &) = 0;
+ virtual void visit(SubstringTerm &) = 0;
+ virtual void visit(SuffixTerm &) = 0;
+ virtual void visit(WeakAnd &) = 0;
+ virtual void visit(WeightedSetTerm &) = 0;
+ virtual void visit(DotProduct &) = 0;
+ virtual void visit(WandTerm &) = 0;
+ virtual void visit(PredicateQuery &) = 0;
+ virtual void visit(RegExpTerm &) = 0;
+};
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/range.cpp b/searchlib/src/vespa/searchlib/query/tree/range.cpp
new file mode 100644
index 00000000000..0b516d3f73c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/range.cpp
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "range.h"
+#include <sstream>
+#include <vespa/vespalib/stllike/asciistream.h>
+
+namespace search {
+namespace query {
+
+Range::Range(int64_t f, int64_t t)
+{
+ vespalib::asciistream ost;
+ ost << "[" << f << ";" << t << "]";
+ _range = ost.str();
+}
+
+vespalib::asciistream &operator<<(vespalib::asciistream &out, const Range &range)
+{
+ return out << range.getRangeString();
+}
+
+} // namespace query
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/query/tree/range.h b/searchlib/src/vespa/searchlib/query/tree/range.h
new file mode 100644
index 00000000000..39a0776ca7d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/range.h
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+
+namespace search {
+namespace query {
+
+class Range {
+ vespalib::string _range;
+
+public:
+ Range() : _range() {}
+ Range(int64_t f, int64_t t);
+ Range(const vespalib::string &range) : _range(range) {}
+
+ const vespalib::string & getRangeString() const { return _range; }
+};
+
+inline bool operator==(const Range &r1, const Range &r2) {
+ return r1.getRangeString() == r2.getRangeString();
+}
+
+vespalib::asciistream &operator<<(vespalib::asciistream &out, const Range &range);
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/rectangle.h b/searchlib/src/vespa/searchlib/query/tree/rectangle.h
new file mode 100644
index 00000000000..faf2ca4b4d0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/rectangle.h
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search {
+namespace query {
+
+struct Rectangle {
+ int64_t left;
+ int64_t top;
+ int64_t right;
+ int64_t bottom;
+
+ Rectangle() : left(0), top(0), right(0), bottom(0) {}
+ Rectangle(int64_t l, int64_t t, int64_t r, int64_t b)
+ : left(l), top(t), right(r), bottom(b) {}
+};
+
+inline bool operator==(const Rectangle &r1, const Rectangle &r2) {
+ return r1.left == r2.left && r1.right == r2.right
+ && r1.top == r2.top && r1.bottom == r2.bottom;
+}
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/simplequery.h b/searchlib/src/vespa/searchlib/query/tree/simplequery.h
new file mode 100644
index 00000000000..e0f66d70f28
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/simplequery.h
@@ -0,0 +1,132 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/*
+ * This file defines a set of subclasses to the query nodes, and a
+ * traits class to make them easy to use with the query builder. These
+ * subclasses don't add any extra information to the abstract nodes.
+ */
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include "intermediatenodes.h"
+#include "termnodes.h"
+
+namespace search {
+namespace query {
+
+struct SimpleAnd : And {};
+struct SimpleAndNot : AndNot {};
+struct SimpleNear : Near { SimpleNear(size_t dist) : Near(dist) {} };
+struct SimpleONear : ONear { SimpleONear(size_t dist) : ONear(dist) {} };
+struct SimpleOr : Or {};
+struct SimpleWeakAnd : WeakAnd {
+ SimpleWeakAnd(uint32_t minHits, const vespalib::stringref & view) :
+ WeakAnd(minHits, view)
+ {}
+};
+struct SimpleEquiv : Equiv {
+ SimpleEquiv(int32_t id, Weight weight)
+ : Equiv(id, weight) {}
+};
+struct SimplePhrase : Phrase {
+ SimplePhrase(const vespalib::stringref &view, int32_t id, Weight weight)
+ : Phrase(view, id, weight) {}
+};
+struct SimpleWeightedSetTerm : WeightedSetTerm {
+ SimpleWeightedSetTerm(const vespalib::stringref &view, int32_t id, Weight weight)
+ : WeightedSetTerm(view, id, weight) {}
+};
+struct SimpleDotProduct : DotProduct {
+ SimpleDotProduct(const vespalib::stringref &view, int32_t id, Weight weight)
+ : DotProduct(view, id, weight) {}
+};
+struct SimpleWandTerm : WandTerm {
+ SimpleWandTerm(const vespalib::stringref &view, int32_t id, Weight weight,
+ uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor)
+ : WandTerm(view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor) {}
+};
+struct SimpleRank : Rank {};
+struct SimpleNumberTerm : NumberTerm {
+ SimpleNumberTerm(Type term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : NumberTerm(term, view, id, weight) {
+ }
+};
+struct SimpleLocationTerm : LocationTerm {
+ SimpleLocationTerm(const Type &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : LocationTerm(term, view, id, weight) {
+ }
+};
+struct SimplePrefixTerm : PrefixTerm {
+ SimplePrefixTerm(const Type &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : PrefixTerm(term, view, id, weight) {
+ }
+};
+struct SimpleRangeTerm : RangeTerm {
+ SimpleRangeTerm(const Type &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : RangeTerm(term, view, id, weight) {
+ }
+};
+struct SimpleStringTerm : StringTerm {
+ SimpleStringTerm(const Type &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : StringTerm(term, view, id, weight) {
+ }
+};
+struct SimpleSubstringTerm : SubstringTerm {
+ SimpleSubstringTerm(const Type &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : SubstringTerm(term, view, id, weight) {
+ }
+};
+struct SimpleSuffixTerm : SuffixTerm {
+ SimpleSuffixTerm(const Type &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : SuffixTerm(term, view, id, weight) {
+ }
+};
+struct SimplePredicateQuery : PredicateQuery {
+ SimplePredicateQuery(PredicateQueryTerm::UP term,
+ const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : PredicateQuery(std::move(term), view, id, weight) {
+ }
+};
+struct SimpleRegExpTerm : RegExpTerm {
+ SimpleRegExpTerm(const Type &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : RegExpTerm(term, view, id, weight) {
+ }
+};
+
+
+struct SimpleQueryNodeTypes {
+ typedef SimpleAnd And;
+ typedef SimpleAndNot AndNot;
+ typedef SimpleEquiv Equiv;
+ typedef SimpleNumberTerm NumberTerm;
+ typedef SimpleLocationTerm LocationTerm;
+ typedef SimpleNear Near;
+ typedef SimpleONear ONear;
+ typedef SimpleOr Or;
+ typedef SimplePhrase Phrase;
+ typedef SimplePrefixTerm PrefixTerm;
+ typedef SimpleRangeTerm RangeTerm;
+ typedef SimpleRank Rank;
+ typedef SimpleStringTerm StringTerm;
+ typedef SimpleSubstringTerm SubstringTerm;
+ typedef SimpleSuffixTerm SuffixTerm;
+ typedef SimpleWeakAnd WeakAnd;
+ typedef SimpleWeightedSetTerm WeightedSetTerm;
+ typedef SimpleDotProduct DotProduct;
+ typedef SimpleWandTerm WandTerm;
+ typedef SimplePredicateQuery PredicateQuery;
+ typedef SimpleRegExpTerm RegExpTerm;
+};
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp
new file mode 100644
index 00000000000..b5d43176f36
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp
@@ -0,0 +1,301 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".stackdumpcreator");
+
+#include "stackdumpcreator.h"
+
+#include "intermediatenodes.h"
+#include "queryvisitor.h"
+#include "termnodes.h"
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/searchlib/parsequery/parse.h>
+#include <vespa/searchlib/util/rawbuf.h>
+
+using vespalib::string;
+using std::vector;
+using search::ParseItem;
+using search::RawBuf;
+using namespace search::query;
+
+namespace {
+class QueryNodeConverter : public QueryVisitor {
+ RawBuf _buf;
+
+ void visitNodes(const vector<Node *> &nodes) {
+ for (size_t i = 0; i < nodes.size(); ++i) {
+ nodes[i]->accept(*this);
+ }
+ }
+
+ void appendString(const string &s) {
+ _buf.preAlloc(sizeof(uint32_t) + s.size());
+ _buf.appendCompressedPositiveNumber(s.size());
+ _buf.append(s.data(), s.size());
+ }
+
+ void appendCompressedPositiveNumber(uint64_t n) {
+ _buf.appendCompressedPositiveNumber(n);
+ }
+
+ void appendCompressedNumber(int64_t n) {
+ _buf.appendCompressedNumber(n);
+ }
+
+ void appendInt(uint32_t i) {
+ _buf.preAlloc(sizeof(uint32_t));
+ _buf.PutToInet(i);
+ }
+
+ void appendLong(uint64_t l) {
+ _buf.preAlloc(sizeof(uint64_t));
+ _buf.Put64ToInet(l);
+ }
+
+ void appendByte(uint8_t i) {
+ _buf.preAlloc(sizeof(uint8_t));
+ _buf.append(&i, sizeof(uint8_t));
+ }
+
+ void appendDouble(double i) {
+ _buf.preAlloc(sizeof(double));
+ double nboVal = vespalib::nbostream::n2h(i);
+ _buf.append(&nboVal, sizeof(double));
+ }
+ void append(const vespalib::string &s) { appendString(s); }
+ void append(uint64_t l) { appendLong(l); }
+
+ template <typename V>
+ void appendPredicateQueryTermVector(const V& v);
+
+ void createIntermediate(const Intermediate &node, size_t type) {
+ appendByte(type);
+ appendCompressedPositiveNumber(node.getChildren().size());
+ visitNodes(node.getChildren());
+ }
+
+ void createIntermediate(const Intermediate &node, size_t type,
+ size_t distance) {
+ appendByte(type);
+ appendCompressedPositiveNumber(node.getChildren().size());
+ appendCompressedPositiveNumber(distance);
+ visitNodes(node.getChildren());
+ }
+
+ void createIntermediate(const Intermediate &node, size_t type,
+ size_t distance,
+ const vespalib::string & view) {
+ appendByte(type);
+ appendCompressedPositiveNumber(node.getChildren().size());
+ appendCompressedPositiveNumber(distance);
+ appendString(view);
+ visitNodes(node.getChildren());
+ }
+
+ virtual void visit(And &node) {
+ createIntermediate(node, ParseItem::ITEM_AND);
+ }
+
+ virtual void visit(AndNot &node) {
+ createIntermediate(node, ParseItem::ITEM_NOT);
+ }
+
+ virtual void visit(Near &node) {
+ createIntermediate(node, ParseItem::ITEM_NEAR, node.getDistance());
+ }
+
+ virtual void visit(ONear &node) {
+ createIntermediate(node, ParseItem::ITEM_ONEAR, node.getDistance());
+ }
+
+ virtual void visit(Or &node) {
+ createIntermediate(node, ParseItem::ITEM_OR);
+ }
+
+ virtual void visit(WeakAnd &node) {
+ createIntermediate(node, ParseItem::ITEM_WEAK_AND, node.getMinHits(), node.getView());
+ }
+
+ virtual void visit(Equiv &node) {
+ createIntermediate(node, ParseItem::ITEM_EQUIV);
+ }
+
+ virtual void visit(Phrase &node) {
+ uint8_t typefield = (ParseItem::ITEM_PHRASE | ParseItem::IF_WEIGHT);
+ uint8_t flags = 0;
+ if (!node.isRanked()) {
+ flags |= ParseItem::IFLAG_NORANK;
+ }
+ if (!node.usePositionData()) {
+ flags |= ParseItem::IFLAG_NOPOSITIONDATA;
+ }
+ if (flags != 0) {
+ typefield |= ParseItem::IF_FLAGS;
+ }
+ appendByte(typefield);
+ appendCompressedNumber(node.getWeight().percent());
+ if (typefield & ParseItem::IF_FLAGS) {
+ appendByte(flags);
+ }
+ appendCompressedPositiveNumber(node.getChildren().size());
+ appendString(node.getView());
+ visitNodes(node.getChildren());
+ }
+
+ template <typename NODE>
+ void createWeightedSet(NODE &node, uint8_t typefield) {
+ uint8_t flags = 0;
+ if (!node.isRanked()) {
+ flags |= ParseItem::IFLAG_NORANK;
+ }
+ // usePositionData should not have any effect
+ // but is propagated anyway
+ if (!node.usePositionData()) {
+ flags |= ParseItem::IFLAG_NOPOSITIONDATA;
+ }
+ if (flags != 0) {
+ typefield |= ParseItem::IF_FLAGS;
+ }
+ appendByte(typefield);
+ appendCompressedNumber(node.getWeight().percent());
+ if (typefield & ParseItem::IF_FLAGS) {
+ appendByte(flags);
+ }
+ appendCompressedPositiveNumber(node.getChildren().size());
+ appendString(node.getView());
+ }
+
+ virtual void visit(WeightedSetTerm &node) {
+ createWeightedSet(node, ParseItem::ITEM_WEIGHTED_SET | ParseItem::IF_WEIGHT);
+ visitNodes(node.getChildren());
+ }
+
+ virtual void visit(DotProduct &node) {
+ createWeightedSet(node, ParseItem::ITEM_DOT_PRODUCT | ParseItem::IF_WEIGHT);
+ visitNodes(node.getChildren());
+ }
+
+ virtual void visit(WandTerm &node) {
+ createWeightedSet(node, ParseItem::ITEM_WAND | ParseItem::IF_WEIGHT);
+ appendCompressedPositiveNumber(node.getTargetNumHits());
+ appendDouble(node.getScoreThreshold());
+ appendDouble(node.getThresholdBoostFactor());
+ visitNodes(node.getChildren());
+ }
+
+ virtual void visit(Rank &node) {
+ createIntermediate(node, ParseItem::ITEM_RANK);
+ }
+
+ template <typename T> void appendTerm(const TermBase<T> &node);
+
+ template <class Term>
+ void createTerm(const Term &node, size_t type) {
+ uint8_t typefield = type |
+ ParseItem::IF_WEIGHT |
+ ParseItem::IF_UNIQUEID;
+ uint8_t flags = 0;
+ if (!node.isRanked()) {
+ flags |= ParseItem::IFLAG_NORANK;
+ }
+ if (!node.usePositionData()) {
+ flags |= ParseItem::IFLAG_NOPOSITIONDATA;
+ }
+ if (flags != 0) {
+ typefield |= ParseItem::IF_FLAGS;
+ }
+ appendByte(typefield);
+ appendCompressedNumber(node.getWeight().percent());
+ appendCompressedPositiveNumber(node.getId());
+ if (typefield & ParseItem::IF_FLAGS) {
+ appendByte(flags);
+ }
+ appendString(node.getView());
+ appendTerm(node);
+ }
+
+ virtual void visit(NumberTerm &node) {
+ createTerm(node, ParseItem::ITEM_NUMTERM);
+ }
+
+ virtual void visit(LocationTerm &node) {
+ createTerm(node, ParseItem::ITEM_NUMTERM);
+ }
+
+ virtual void visit(PrefixTerm &node) {
+ createTerm(node, ParseItem::ITEM_PREFIXTERM);
+ }
+
+ virtual void visit(RangeTerm &node) {
+ createTerm(node, ParseItem::ITEM_NUMTERM);
+ }
+
+ virtual void visit(StringTerm &node) {
+ createTerm(node, ParseItem::ITEM_TERM);
+ }
+
+ virtual void visit(SubstringTerm &node) {
+ createTerm(node, ParseItem::ITEM_SUBSTRINGTERM);
+ }
+
+ virtual void visit(SuffixTerm &node) {
+ createTerm(node, ParseItem::ITEM_SUFFIXTERM);
+ }
+
+ virtual void visit(PredicateQuery &node) {
+ createTerm(node, ParseItem::ITEM_PREDICATE_QUERY);
+ }
+
+ virtual void visit(RegExpTerm &node) {
+ createTerm(node, ParseItem::ITEM_REGEXP);
+ }
+
+public:
+ QueryNodeConverter()
+ : _buf(4096)
+ {
+ }
+
+ string getStackDump() {
+ return string(_buf.GetDrainPos(),
+ _buf.GetDrainPos() + _buf.GetUsedLen());
+ }
+};
+
+template <typename T>
+void QueryNodeConverter::appendTerm(const TermBase<T> &node) {
+ vespalib::asciistream ost;
+ ost << node.getTerm();
+ appendString(ost.str());
+}
+template <>
+void QueryNodeConverter::appendTerm(const TermBase<string> &node) {
+ appendString(node.getTerm());
+}
+template <>
+void QueryNodeConverter::appendTerm(
+ const TermBase<PredicateQueryTerm::UP> &node) {
+ const PredicateQueryTerm &term = *node.getTerm();
+ appendPredicateQueryTermVector(term.getFeatures());
+ appendPredicateQueryTermVector(term.getRangeFeatures());
+}
+template <typename V>
+void QueryNodeConverter::appendPredicateQueryTermVector(const V& v) {
+ appendCompressedNumber(v.size());
+ for (const auto &entry : v) {
+ append(entry.getKey());
+ append(entry.getValue());
+ append(entry.getSubQueryBitmap());
+ }
+}
+} // namespace
+
+string StackDumpCreator::create(const Node &node) {
+ QueryNodeConverter converter;
+ const_cast<Node &>(node).accept(converter);
+ return converter.getStackDump();
+}
+
+using namespace search::query;
diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.h b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.h
new file mode 100644
index 00000000000..80bcd60df5d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.h
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace query {
+
+class Node;
+
+struct StackDumpCreator {
+ // Creates a stack dump from a query tree.
+ static vespalib::string create(const Node &node);
+};
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h
new file mode 100644
index 00000000000..c3b10aae05d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h
@@ -0,0 +1,175 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "node.h"
+#include "querybuilder.h"
+#include "term.h"
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <string>
+#include <vespa/searchlib/parsequery/stackdumpiterator.h>
+#include <algorithm>
+
+namespace search {
+namespace query {
+
+/**
+ * Creates a query tree from a stack dump.
+ */
+template <class NodeTypes>
+class StackDumpQueryCreator {
+private:
+ /**
+ * If changing this class note:
+ * Note that this method must return a reference into the existing querystack.
+ * This is necessary to use the non-copying stringref noted in the create method.
+ */
+ static vespalib::stringref readString(
+ SimpleQueryStackDumpIterator &queryStack,
+ void (SimpleQueryStackDumpIterator::*f)(const char **,
+ size_t *) const)
+ {
+ const char *p;
+ size_t len;
+ (queryStack.*f)(&p, &len);
+ return vespalib::stringref(p, len);
+ }
+
+public:
+ static Node::UP create(search::SimpleQueryStackDumpIterator &queryStack)
+ {
+ QueryBuilder<NodeTypes> builder;
+
+ // Make sure that the life time of what pureTermView refers to exceeds that of pureTermView.
+ // Especially make sure that do not create any stack local objects like vespalib::string
+ // with smaller scope, that you refer with pureTermView.
+ vespalib::stringref pureTermView;
+ while (queryStack.next()) {
+ uint32_t arity = queryStack.getArity();
+ uint32_t arg1 = queryStack.getArg1();
+ double arg2 = queryStack.getArg2();
+ double arg3 = queryStack.getArg3();
+ ParseItem::ItemType type = queryStack.getType();
+ Node::UP node;
+ Term *t = 0;
+ if (type == ParseItem::ITEM_AND) {
+ builder.addAnd(arity);
+ } else if (type == ParseItem::ITEM_RANK) {
+ builder.addRank(arity);
+ } else if (type == ParseItem::ITEM_OR) {
+ builder.addOr(arity);
+ } else if (type == ParseItem::ITEM_WORD_ALTERNATIVES) {
+ vespalib::stringref view = readString(queryStack,
+ &SimpleQueryStackDumpIterator::getIndexName);
+ int32_t id = queryStack.getUniqueId();
+ Weight weight = queryStack.GetWeight();
+ builder.addEquiv(arity, id, weight);
+ pureTermView = view;
+ } else if (type == ParseItem::ITEM_WEAK_AND) {
+ vespalib::stringref view = readString(queryStack,
+ &SimpleQueryStackDumpIterator::getIndexName);
+ builder.addWeakAnd(arity, arg1, view);
+ pureTermView = view;
+ } else if (type == ParseItem::ITEM_EQUIV) {
+ int32_t id = queryStack.getUniqueId();
+ Weight weight = queryStack.GetWeight();
+ builder.addEquiv(arity, id, weight);
+ } else if (type == ParseItem::ITEM_NEAR) {
+ builder.addNear(arity, arg1);
+ } else if (type == ParseItem::ITEM_ONEAR) {
+ builder.addONear(arity, arg1);
+ } else if (type == ParseItem::ITEM_PHRASE) {
+ vespalib::stringref view = readString(queryStack,
+ &SimpleQueryStackDumpIterator::getIndexName);
+ int32_t id = queryStack.getUniqueId();
+ Weight weight = queryStack.GetWeight();
+ t = &builder.addPhrase(arity, view, id, weight);
+ pureTermView = view;
+ } else if (type == ParseItem::ITEM_WEIGHTED_SET) {
+ vespalib::stringref view = readString(queryStack,
+ &SimpleQueryStackDumpIterator::getIndexName);
+ int32_t id = queryStack.getUniqueId();
+ Weight weight = queryStack.GetWeight();
+ t = &builder.addWeightedSetTerm(arity, view, id, weight);
+ pureTermView = vespalib::stringref();
+ } else if (type == ParseItem::ITEM_DOT_PRODUCT) {
+ vespalib::stringref view = readString(queryStack,
+ &SimpleQueryStackDumpIterator::getIndexName);
+ int32_t id = queryStack.getUniqueId();
+ Weight weight = queryStack.GetWeight();
+ t = &builder.addDotProduct(arity, view, id, weight);
+ pureTermView = vespalib::stringref();
+ } else if (type == ParseItem::ITEM_WAND) {
+ vespalib::stringref view = readString(queryStack,
+ &SimpleQueryStackDumpIterator::getIndexName);
+ int32_t id = queryStack.getUniqueId();
+ Weight weight = queryStack.GetWeight();
+ t = &builder.addWandTerm(
+ arity, view, id, weight, arg1, arg2, arg3);
+ pureTermView = vespalib::stringref();
+ } else if (type == ParseItem::ITEM_NOT) {
+ builder.addAndNot(arity);
+ } else {
+ vespalib::stringref term = readString(queryStack,
+ &SimpleQueryStackDumpIterator::getTerm);
+ vespalib::stringref view = readString(queryStack,
+ &SimpleQueryStackDumpIterator::getIndexName);
+ int32_t id = queryStack.getUniqueId();
+ Weight weight = queryStack.GetWeight();
+
+ if (type == ParseItem::ITEM_TERM) {
+ t = &builder.addStringTerm(term, view, id, weight);
+ } else if (type == ParseItem::ITEM_PURE_WEIGHTED_STRING) {
+ t = &builder.addStringTerm(term, pureTermView, id, weight);
+ } else if (type == ParseItem::ITEM_PURE_WEIGHTED_LONG) {
+ t = &builder.addNumberTerm(term, pureTermView, id, weight);
+ } else if (type == ParseItem::ITEM_PREFIXTERM) {
+ t = &builder.addPrefixTerm(term, view, id, weight);
+ } else if (type == ParseItem::ITEM_SUBSTRINGTERM) {
+ t = &builder.addSubstringTerm(term, view, id, weight);
+ } else if (type == ParseItem::ITEM_EXACTSTRINGTERM) {
+ t = &builder.addStringTerm(term, view, id, weight);
+ } else if (type == ParseItem::ITEM_SUFFIXTERM) {
+ t = &builder.addSuffixTerm(term, view, id, weight);
+ } else if (type == ParseItem::ITEM_NUMTERM) {
+ if (term[0] == '[' || term[0] == '<' || term[0] == '>') {
+ Range range(term);
+ t = &builder.addRangeTerm(range, view, id, weight);
+ } else if (term[0] == '(') {
+ Location loc(term);
+ t = &builder.addLocationTerm(loc, view, id, weight);
+ } else {
+ t = &builder.addNumberTerm(term, view, id, weight);
+ }
+ } else if (type == ParseItem::ITEM_PREDICATE_QUERY) {
+ t = &builder.addPredicateQuery(
+ queryStack.getPredicateQueryTerm(),
+ view, id, weight);
+ } else if (type == ParseItem::ITEM_REGEXP) {
+ t = &builder.addRegExpTerm(term, view, id, weight);
+ } else {
+ LOG(error, "Unable to create query tree from stack dump. "
+ "node type = %d.", type);
+ }
+ }
+ if (t) {
+ t->setTermIndex(queryStack.getTermIndex());
+ if (queryStack.getFlags() & ParseItem::IFLAG_NORANK) {
+ t->setRanked(false);
+ }
+ if (queryStack.getFlags() & ParseItem::IFLAG_NOPOSITIONDATA) {
+ t->setPositionData(false);
+ }
+ }
+ }
+ if (builder.hasError()) {
+ LOG(error, "Unable to create query tree from stack dump. %s",
+ builder.error().c_str());
+ }
+ return builder.build();
+ }
+};
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h b/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h
new file mode 100644
index 00000000000..fc11856f564
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/query/tree/customtypetermvisitor.h>
+
+namespace search {
+namespace query {
+
+/**
+ * Use this class to visit all term nodes by deriving from this class
+ * and implementing a single template member function:
+ * template <class TermType> void visitTerm(TermType &n);
+ *
+ * This class uses the curiously recurring template pattern to know
+ * its own derived class that has the visitTerm template member
+ * function.
+ */
+template <class Self, class NodeTypes>
+class TemplateTermVisitor : public CustomTypeTermVisitor<NodeTypes> {
+ template <class TermNode>
+ void myVisit(TermNode &n) {
+ static_cast<Self &>(*this).template visitTerm(n);
+ }
+
+ virtual void visit(typename NodeTypes::NumberTerm &n) { myVisit(n); }
+ virtual void visit(typename NodeTypes::LocationTerm &n) { myVisit(n); }
+ virtual void visit(typename NodeTypes::PrefixTerm &n) { myVisit(n); }
+ virtual void visit(typename NodeTypes::RangeTerm &n) { myVisit(n); }
+ virtual void visit(typename NodeTypes::StringTerm &n) { myVisit(n); }
+ virtual void visit(typename NodeTypes::SubstringTerm &n) { myVisit(n); }
+ virtual void visit(typename NodeTypes::SuffixTerm &n) { myVisit(n); }
+ virtual void visit(typename NodeTypes::PredicateQuery &n) { myVisit(n); }
+ virtual void visit(typename NodeTypes::RegExpTerm &n) { myVisit(n); }
+
+ // Phrases are terms with children. This visitor will not visit
+ // the phrase's children, unless this member function is
+ // overridden to do so.
+ virtual void visit(typename NodeTypes::Phrase &n) { myVisit(n); }
+
+ // WeightedSetTerms are terms with children. This visitor will not visit
+ // the weighted set's children, unless this member function is
+ // overridden to do so.
+ virtual void visit(typename NodeTypes::WeightedSetTerm &n) { myVisit(n); }
+
+ // DotProducts have children. This visitor will not visit the dot
+ // product's children, unless this member function is overridden
+ // to do so.
+ virtual void visit(typename NodeTypes::DotProduct &n) { myVisit(n); }
+
+ // WandTerms have children. This visitor will not visit the wand
+ // term's children, unless this member function is overridden
+ // to do so.
+ virtual void visit(typename NodeTypes::WandTerm &n) { myVisit(n); }
+};
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/term.cpp b/searchlib/src/vespa/searchlib/query/tree/term.cpp
new file mode 100644
index 00000000000..a9bdf50962c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/term.cpp
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".term");
+
+#include "term.h"
+
+namespace search {
+namespace query {
+
+Term::~Term()
+{
+}
+
+Term::Term(const vespalib::stringref &view, int32_t id, Weight weight) :
+ _view(view),
+ _id(id),
+ _weight(weight),
+ _term_index(-1),
+ _ranked(true),
+ _position_data(true)
+{
+}
+
+} // namespace query
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/query/tree/term.h b/searchlib/src/vespa/searchlib/query/tree/term.h
new file mode 100644
index 00000000000..dafcc3976da
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/term.h
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/searchlib/query/tree/node.h>
+#include <vespa/searchlib/query/weight.h>
+
+namespace search {
+namespace query {
+
+/**
+ * This is a leaf in the Query tree. Sort of. Phrases are both terms
+ * and intermediate nodes.
+ */
+class Term
+{
+ vespalib::string _view;
+ int32_t _id;
+ Weight _weight;
+ int32_t _term_index;
+ bool _ranked;
+ bool _position_data;
+
+public:
+ virtual ~Term() = 0;
+
+ void setTermIndex(int32_t term_index) { _term_index = term_index; }
+ void setRanked(bool ranked) { _ranked = ranked; }
+ void setPositionData(bool position_data) { _position_data = position_data; }
+
+ void setStateFrom(const Term& other) {
+ setTermIndex(other.getTermIndex());
+ setRanked(other.isRanked());
+ setPositionData(other.usePositionData());
+ // too late to copy this state:
+ assert(_view == other.getView());
+ assert(_id == other.getId());
+ assert(_weight == other.getWeight());
+ }
+
+ const vespalib::string & getView() const { return _view; }
+ Weight getWeight() const { return _weight; }
+ int32_t getId() const { return _id; }
+ int32_t getTermIndex() const { return _term_index; }
+ bool isRanked() const { return _ranked; }
+ bool usePositionData() const { return _position_data; }
+
+protected:
+ Term(const vespalib::stringref &view, int32_t id, Weight weight);
+};
+
+/**
+ * Generic functionality for most of Term's derived classes.
+ */
+template <typename T>
+class TermBase : public Node, public Term {
+ T _term;
+
+public:
+ typedef T Type;
+
+ virtual ~TermBase() = 0;
+ const T &getTerm() const { return _term; }
+
+protected:
+ TermBase(T term, const vespalib::stringref &view, int32_t id, Weight weight)
+ : Term(view, id, weight),
+ _term(std::move(term)) {
+ }
+};
+
+template <typename T>
+TermBase<T>::~TermBase() {}
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp b/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp
new file mode 100644
index 00000000000..3da7aa59b7b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".termnodes");
+
+#include "termnodes.h"
+
+namespace search {
+namespace query {
+
+NumberTerm::~NumberTerm() {}
+
+PrefixTerm::~PrefixTerm() {}
+
+RangeTerm::~RangeTerm() {}
+
+StringTerm::~StringTerm() {}
+
+SubstringTerm::~SubstringTerm() {}
+
+SuffixTerm::~SuffixTerm() {}
+
+LocationTerm::~LocationTerm() {}
+
+RegExpTerm::~RegExpTerm() {}
+
+} // namespace query
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/query/tree/termnodes.h b/searchlib/src/vespa/searchlib/query/tree/termnodes.h
new file mode 100644
index 00000000000..4e5c6ae0e49
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/tree/termnodes.h
@@ -0,0 +1,123 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "location.h"
+#include "predicate_query_term.h"
+#include "querynodemixin.h"
+#include "range.h"
+#include "term.h"
+
+namespace search {
+namespace query {
+
+typedef TermBase<vespalib::string> StringBase;
+
+class NumberTerm : public QueryNodeMixin<NumberTerm, StringBase >
+{
+public:
+ NumberTerm(Type term, const vespalib::stringref &view, int32_t id, Weight weight)
+ : QueryNodeMixinType(term, view, id, weight) {}
+ virtual ~NumberTerm() = 0;
+};
+
+//-----------------------------------------------------------------------------
+
+class PrefixTerm : public QueryNodeMixin<PrefixTerm, StringBase >
+{
+public:
+ PrefixTerm(const Type &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : QueryNodeMixinType(term, view, id, weight)
+ {}
+ virtual ~PrefixTerm() = 0;
+};
+
+//-----------------------------------------------------------------------------
+
+class RangeTerm : public QueryNodeMixin<RangeTerm, TermBase<Range> >
+{
+public:
+ RangeTerm(const Type& term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : QueryNodeMixinType(term, view, id, weight)
+ {}
+ virtual ~RangeTerm() = 0;
+};
+
+//-----------------------------------------------------------------------------
+
+class StringTerm : public QueryNodeMixin<StringTerm, StringBase >
+{
+public:
+ StringTerm(const Type &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : QueryNodeMixinType(term, view, id, weight) {}
+ virtual ~StringTerm() = 0;
+};
+
+//-----------------------------------------------------------------------------
+
+class SubstringTerm :
+ public QueryNodeMixin<SubstringTerm, StringBase >
+{
+ public:
+ SubstringTerm(const Type &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : QueryNodeMixinType(term, view, id, weight)
+ {}
+ virtual ~SubstringTerm() = 0;
+};
+
+//-----------------------------------------------------------------------------
+
+class SuffixTerm : public QueryNodeMixin<SuffixTerm, StringBase >
+{
+public:
+ SuffixTerm(const Type &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : QueryNodeMixinType(term, view, id, weight)
+ {}
+ virtual ~SuffixTerm() = 0;
+};
+
+//-----------------------------------------------------------------------------
+
+class LocationTerm : public QueryNodeMixin<LocationTerm, TermBase<Location> >
+{
+public:
+ LocationTerm(const Type &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : QueryNodeMixinType(term, view, id, weight)
+ {}
+ virtual ~LocationTerm() = 0;
+};
+
+//-----------------------------------------------------------------------------
+
+class PredicateQuery : public QueryNodeMixin<PredicateQuery,
+ TermBase<PredicateQueryTerm::UP> >
+{
+public:
+ PredicateQuery(PredicateQueryTerm::UP term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : QueryNodeMixinType(std::move(term), view, id, weight)
+ {}
+};
+
+//-----------------------------------------------------------------------------
+
+class RegExpTerm : public QueryNodeMixin<RegExpTerm, StringBase>
+{
+public:
+ RegExpTerm(const Type &term, const vespalib::stringref &view,
+ int32_t id, Weight weight)
+ : QueryNodeMixinType(term, view, id, weight)
+ {}
+ virtual ~RegExpTerm() = 0;
+};
+
+
+} // namespace query
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/query/weight.h b/searchlib/src/vespa/searchlib/query/weight.h
new file mode 100644
index 00000000000..18c6a2edd78
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/weight.h
@@ -0,0 +1,52 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+namespace search {
+namespace query {
+
+/**
+ * Represents the weight given on a query item such as a term, phrase, or equiv.
+ * Normally given and used as an integer percent value.
+ */
+class Weight
+{
+private:
+ int32_t _weight;
+
+public:
+ /**
+ * constructor.
+ * @param value The initial weight in percent; should be 100 unless a specific value is set.
+ **/
+ explicit Weight(int32_t value) : _weight(value) {}
+
+ /**
+ * change the weight value.
+ * @param value The new weight value in percent.
+ **/
+ void setPercent(int32_t value) { _weight = value; }
+
+ /**
+ * retrieve the weight value.
+ * @return weight value in percent.
+ **/
+ int32_t percent() const { return _weight; }
+
+ /**
+ * retrieve the weight value as a multiplier.
+ * @return weight multiplier with 100 percent giving 1.0 as multiplier.
+ **/
+ double multiplier() const { return 0.01 * _weight; }
+
+ /** compare two weights */
+ bool operator== (const Weight& other) const { return _weight == other._weight; }
+};
+
+} // namespace query
+} // namespace search
+
+inline search::query::Weight operator+(const search::query::Weight& a, const search::query::Weight& b)
+{
+ return search::query::Weight(a.percent() + b.percent());
+}
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/.gitignore b/searchlib/src/vespa/searchlib/queryeval/.gitignore
new file mode 100644
index 00000000000..583460ae288
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/.gitignore
@@ -0,0 +1,3 @@
+*.So
+.depend
+Makefile
diff --git a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
new file mode 100644
index 00000000000..56c77ed46cf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
@@ -0,0 +1,54 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_queryeval
+ SOURCES
+ andnotsearch.cpp
+ andsearch.cpp
+ blueprint.cpp
+ booleanmatchiteratorwrapper.cpp
+ create_blueprint_visitor_helper.cpp
+ document_weight_search_iterator.cpp
+ dot_product_blueprint.cpp
+ dot_product_search.cpp
+ emptysearch.cpp
+ equiv_blueprint.cpp
+ equivsearch.cpp
+ fake_requestcontext.cpp
+ fake_result.cpp
+ fake_search.cpp
+ fake_searchable.cpp
+ field_spec.cpp
+ get_weight_from_node.cpp
+ hitcollector.cpp
+ intermediate_blueprints.cpp
+ isourceselector.cpp
+ iterator_pack.cpp
+ iterators.cpp
+ leaf_blueprints.cpp
+ monitoring_dump_iterator.cpp
+ monitoring_search_iterator.cpp
+ multibitvectoriterator.cpp
+ multisearch.cpp
+ nearsearch.cpp
+ orsearch.cpp
+ predicate_blueprint.cpp
+ predicate_search.cpp
+ ranksearch.cpp
+ searchable.cpp
+ searchiterator.cpp
+ simple_phrase_blueprint.cpp
+ simple_phrase_search.cpp
+ simpleresult.cpp
+ simplesearch.cpp
+ sourceblendersearch.cpp
+ split_float.cpp
+ termasstring.cpp
+ termwise_blueprint_helper.cpp
+ termwise_search.cpp
+ truesearch.cpp
+ unpackinfo.cpp
+ weighted_set_term_blueprint.cpp
+ weighted_set_term_search.cpp
+ $<TARGET_OBJECTS:searchlib_queryeval_wand>
+ INSTALL lib64
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/queryeval/OWNERS b/searchlib/src/vespa/searchlib/queryeval/OWNERS
new file mode 100644
index 00000000000..12b533ec610
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/OWNERS
@@ -0,0 +1 @@
+havardpe
diff --git a/searchlib/src/vespa/searchlib/queryeval/andnotsearch.cpp b/searchlib/src/vespa/searchlib/queryeval/andnotsearch.cpp
new file mode 100644
index 00000000000..58a33c26d9f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/andnotsearch.cpp
@@ -0,0 +1,163 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "andnotsearch.h"
+
+namespace search {
+namespace queryeval {
+
+void
+AndNotSearch::doSeek(uint32_t docid)
+{
+ const Children & children(getChildren());
+ if (!children[0]->seek(docid)) {
+ return; // not match in positive subtree
+ }
+ for (uint32_t i = 1; i < children.size(); ++i) {
+ if (children[i]->seek(docid)) {
+ return; // match in negative subtree
+ }
+ }
+ setDocId(docid); // we have a match
+}
+
+void
+AndNotSearch::doUnpack(uint32_t docid)
+{
+ getChildren()[0]->doUnpack(docid);
+}
+
+SearchIterator::UP
+AndNotSearchStrictBase::andWith(UP filter, uint32_t estimate)
+{
+ return getChildren()[0]->andWith(std::move(filter), estimate);
+}
+
+namespace {
+class AndNotSearchStrict : public AndNotSearchStrictBase
+{
+private:
+ template<bool doSeekOnlyOnPositiveChild>
+ void internalSeek(uint32_t docid);
+protected:
+ void doSeek(uint32_t docid) override {
+ internalSeek<true>(docid);
+ }
+public:
+ /**
+ * Create a new strict AndNot Search with the given children.
+ * A strict AndNot can assume that the first child below is also strict.
+ * No such assumptions can be made about the * other children.
+ *
+ * @param children the search objects we are andnot'ing
+ **/
+ AndNotSearchStrict(const Children & children) : AndNotSearchStrictBase(children)
+ {
+ }
+
+ void initRange(uint32_t beginid, uint32_t endid) override {
+ AndNotSearch::initRange(beginid, endid);
+ internalSeek<false>(beginid);
+ }
+
+};
+
+template <bool doSeekOnlyOnPositiveChild>
+void
+AndNotSearchStrict::internalSeek(uint32_t docid)
+{
+ const Children & children(getChildren());
+ bool hit;
+ if (doSeekOnlyOnPositiveChild) {
+ children[0]->doSeek(docid);
+ hit = (children[0]->getDocId() == docid);
+ } else {
+ hit = children[0]->seek(docid);
+ }
+ for (uint32_t i = 1; hit && i < children.size(); ++i) {
+ if (children[i]->seek(docid)) {
+ hit = false;
+ }
+ }
+ if (hit) {
+ setDocId(docid);
+ return;
+ }
+ uint32_t nextId = children[0]->getDocId();
+ while (!isAtEnd(nextId)) {
+ bool foundHit = true;
+ for (uint32_t i = 1; i < children.size(); ++i) {
+ if (children[i]->seek(nextId)) {
+ foundHit = false;
+ ++nextId;
+ break;
+ }
+ }
+ if (foundHit) {
+ break;
+ } else {
+ children[0]->doSeek(nextId);
+ nextId = children[0]->getDocId();
+ }
+ }
+ setDocId(nextId);
+}
+
+} // namespace
+
+OptimizedAndNotForBlackListing::OptimizedAndNotForBlackListing(const MultiSearch::Children & children) :
+ AndNotSearchStrictBase(children)
+{
+}
+
+void OptimizedAndNotForBlackListing::initRange(uint32_t beginid, uint32_t endid)
+{
+ AndNotSearch::initRange(beginid, endid);
+ setDocId(internalSeek<false>(beginid));
+}
+
+bool OptimizedAndNotForBlackListing::isBlackListIterator(const SearchIterator * iterator)
+{
+ return dynamic_cast<const BlackListIterator *>(iterator) != 0;
+}
+
+void OptimizedAndNotForBlackListing::doSeek(uint32_t docid)
+{
+ setDocId(internalSeek<true>(docid));
+}
+
+void OptimizedAndNotForBlackListing::doUnpack(uint32_t docid)
+{
+ positive()->doUnpack(docid);
+}
+
+SearchIterator *
+AndNotSearch::create(const AndNotSearch::Children &children, bool strict) {
+ if (strict) {
+ if ((children.size() == 2) && OptimizedAndNotForBlackListing::isBlackListIterator(children[1])) {
+ return new OptimizedAndNotForBlackListing(children);
+ } else {
+ return new AndNotSearchStrict(children);
+ }
+ } else {
+ return new AndNotSearch(children);
+ }
+}
+
+BitVector::UP
+AndNotSearch::get_hits(uint32_t begin_id) {
+ const Children &children = getChildren();
+ BitVector::UP result = children.front()->get_hits(begin_id);
+ if (children.size() > 1) {
+ BitVector::UP not_result = children[1]->get_hits(begin_id);
+ for (size_t i = 2; i < children.size(); ++i) {
+ children[i]->or_hits_into(*not_result, begin_id);
+ }
+ const BitVector &rhs = *not_result;
+ result->andNotWith(rhs);
+ }
+ return result;
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/andnotsearch.h b/searchlib/src/vespa/searchlib/queryeval/andnotsearch.h
new file mode 100644
index 00000000000..4c6eae0693c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/andnotsearch.h
@@ -0,0 +1,101 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+#include "multisearch.h"
+#include <vespa/searchlib/attribute/attributeiterators.h>
+#include <vespa/searchlib/attribute/singlesmallnumericattribute.h>
+
+namespace search {
+namespace queryeval {
+
+/**
+ * A simple implementation of the AndNot search operation.
+ **/
+class AndNotSearch : public MultiSearch
+{
+protected:
+ void doSeek(uint32_t docid) override;
+ void doUnpack(uint32_t docid) override;
+ Trinary is_strict() const override { return Trinary::False; }
+
+ /**
+ * Create a new AndNot Search with the given children.
+ *A AndNot has no strictness assumptions about its children.
+ *
+ * @param children the search objects we are andnot'ing
+ **/
+ AndNotSearch(const Children & children) : MultiSearch(children) { }
+
+public:
+ // Caller takes ownership of the returned SearchIterator.
+ static SearchIterator *create(const Children &children, bool strict);
+
+ BitVector::UP get_hits(uint32_t begin_id) override;
+
+private:
+ bool isAndNot() const override { return true; }
+ bool needUnpack(size_t index) const override {
+ return index == 0;
+ }
+};
+
+class AndNotSearchStrictBase : public AndNotSearch
+{
+protected:
+ AndNotSearchStrictBase(const Children & children) : AndNotSearch(children) { }
+private:
+ Trinary is_strict() const override { return Trinary::True; }
+ UP andWith(UP filter, uint32_t estimate) override;
+};
+
+/**
+ * This is a specialized andnot iterator you get when you have no andnot's in you query and only get the blacklist blueprint.
+ * This one is now constructed at getSearch() phase. However this should be better handled in the AndNotBlueprint.
+ */
+class OptimizedAndNotForBlackListing : public AndNotSearchStrictBase
+{
+private:
+ // This is the actual iterator that should be produced by the documentmetastore in searchcore, but that
+ // will probably be changed later on. An ordinary bitvector could be even better as that would open up for more optimizations.
+ //typedef FilterAttributeIteratorT<SingleValueSmallNumericAttribute::SingleSearchContext> BlackListIterator;
+ typedef AttributeIteratorT<SingleValueSmallNumericAttribute::SingleSearchContext> BlackListIterator;
+public:
+ OptimizedAndNotForBlackListing(const MultiSearch::Children & children);
+ static bool isBlackListIterator(const SearchIterator * iterator);
+
+ uint32_t seekFast(uint32_t docid) {
+ return internalSeek<true>(docid);
+ }
+ void initRange(uint32_t beginid, uint32_t endid) override;
+private:
+ SearchIterator * positive() { return getChildren()[0]; }
+ BlackListIterator * blackList() { return static_cast<BlackListIterator *>(getChildren()[1]); }
+ template<bool doSeekOnly>
+ uint32_t internalSeek(uint32_t docid) {
+ uint32_t curr(docid);
+ while (true) {
+ if (doSeekOnly) {
+ positive()->doSeek(curr);
+ } else {
+ positive()->seek(curr);
+ }
+ if ( ! positive()->isAtEnd() ) {
+ curr = positive()->getDocId();
+ if (! blackList()->seekFast(curr)) {
+ return curr;
+ }
+ curr++;
+ } else {
+ return search::endDocId;
+ }
+ }
+ }
+ virtual void doSeek(uint32_t docid);
+ virtual void doUnpack(uint32_t docid);
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/andsearch.cpp b/searchlib/src/vespa/searchlib/queryeval/andsearch.cpp
new file mode 100644
index 00000000000..9217c90ad59
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/andsearch.cpp
@@ -0,0 +1,123 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "andsearch.h"
+#include "andsearchstrict.h"
+
+namespace search {
+namespace queryeval {
+
+BitVector::UP
+AndSearch::get_hits(uint32_t begin_id) {
+ const Children &children = getChildren();
+ BitVector::UP result = children.front()->get_hits(begin_id);
+ for (size_t i = 1; i < children.size(); ++i) {
+ children[i]->and_hits_into(*result, begin_id);
+ }
+ return result;
+}
+
+SearchIterator::UP AndSearch::andWith(UP filter, uint32_t estimate_)
+{
+ return offerFilterToChildren(std::move(filter), estimate_);
+}
+
+SearchIterator::UP AndSearch::offerFilterToChildren(UP filter, uint32_t estimate_)
+{
+ const Children & children(getChildren());
+ for (uint32_t i(0); filter && (i < children.size()); ++i) {
+ filter = children[i]->andWith(std::move(filter), estimate_);
+ }
+ return filter;
+}
+
+void AndSearch::doUnpack(uint32_t docid)
+{
+ const Children & children(getChildren());
+ for (uint32_t i(0); i < children.size(); ++i) {
+ children[i]->doUnpack(docid);
+ }
+}
+
+AndSearch::AndSearch(const Children & children) :
+ MultiSearch(children),
+ _estimate(std::numeric_limits<uint32_t>::max())
+{
+}
+
+namespace {
+
+class FullUnpack
+{
+public:
+ void unpack(uint32_t docid, const MultiSearch & search) {
+ const MultiSearch::Children & children(search.getChildren());
+ for (uint32_t i(0); i < children.size(); ++i) {
+ children[i]->doUnpack(docid);
+ }
+ }
+ bool needUnpack(size_t index) const {
+ (void) index;
+ return true;
+ }
+ void onRemove(size_t index) { (void) index; }
+ void onInsert(size_t index) { (void) index; }
+};
+
+class SelectiveUnpack
+{
+public:
+ SelectiveUnpack(const UnpackInfo & unpackInfo) :
+ _unpackInfo(unpackInfo)
+ { }
+ void unpack(uint32_t docid, const MultiSearch & search) {
+ auto &children = search.getChildren();
+ _unpackInfo.each([&children,docid](size_t i){children[i]->doUnpack(docid);},
+ children.size());
+ }
+ bool needUnpack(size_t index) const {
+ return _unpackInfo.needUnpack(index);
+ }
+ void onRemove(size_t index) {
+ _unpackInfo.remove(index);
+ }
+ void onInsert(size_t index) {
+ _unpackInfo.insert(index);
+ }
+private:
+ UnpackInfo _unpackInfo;
+};
+
+}
+
+AndSearch *
+AndSearch::create(const MultiSearch::Children &children, bool strict)
+{
+ UnpackInfo unpackInfo;
+ unpackInfo.forceAll();
+ return create(children, strict, unpackInfo);
+}
+
+AndSearch *
+AndSearch::create(const MultiSearch::Children &children, bool strict, const UnpackInfo & unpackInfo) {
+ if (strict) {
+ if (unpackInfo.unpackAll()) {
+ return new AndSearchStrict<FullUnpack>(children, FullUnpack());
+ } else if(unpackInfo.empty()) {
+ return new AndSearchStrict<NoUnpack>(children, NoUnpack());
+ } else {
+ return new AndSearchStrict<SelectiveUnpack>(children, SelectiveUnpack(unpackInfo));
+ }
+ } else {
+ if (unpackInfo.unpackAll()) {
+ return new AndSearchNoStrict<FullUnpack>(children, FullUnpack());
+ } else if (unpackInfo.empty()) {
+ return new AndSearchNoStrict<NoUnpack>(children, NoUnpack());
+ } else {
+ return new AndSearchNoStrict<SelectiveUnpack>(children, SelectiveUnpack(unpackInfo));
+ }
+ }
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/andsearch.h b/searchlib/src/vespa/searchlib/queryeval/andsearch.h
new file mode 100644
index 00000000000..d15e08213f1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/andsearch.h
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "multisearch.h"
+#include "unpackinfo.h"
+
+namespace search {
+namespace queryeval {
+
+/**
+ * A simple implementation of the And search operation.
+ **/
+class AndSearch : public MultiSearch
+{
+public:
+ // Caller takes ownership of the returned SearchIterator.
+ static AndSearch *create(const Children &children, bool strict, const UnpackInfo & unpackInfo);
+ static AndSearch *create(const Children &children, bool strict);
+
+ BitVector::UP get_hits(uint32_t begin_id) override;
+
+ AndSearch & estimate(uint32_t est) { _estimate = est; return *this; }
+ uint32_t estimate() const { return _estimate; }
+protected:
+ AndSearch(const Children & children);
+ void doUnpack(uint32_t docid) override;
+ UP andWith(UP filter, uint32_t estimate) override;
+ UP offerFilterToChildren(UP filter, uint32_t estimate);
+private:
+ bool isAnd() const override { return true; }
+ uint32_t _estimate;
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/andsearchnostrict.h b/searchlib/src/vespa/searchlib/queryeval/andsearchnostrict.h
new file mode 100644
index 00000000000..b42359bf760
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/andsearchnostrict.h
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "andsearch.h"
+
+namespace search {
+namespace queryeval {
+
+/**
+ * A simple implementation of the And search operation.
+ **/
+template <typename Unpack>
+class AndSearchNoStrict : public AndSearch
+{
+public:
+ /**
+ * Create a new And Search with the given children.
+ * A And Search has no strictness assumptions about
+ * its children.
+ *
+ * @param children the search objects we are and'ing
+ * ownership of the children is taken by the MultiSearch base class.
+ **/
+ AndSearchNoStrict(const Children & children, const Unpack & unpacker) :
+ AndSearch(children),
+ _unpacker(unpacker)
+ { }
+
+protected:
+ void doSeek(uint32_t docid) override {
+ const Children & children(getChildren());
+ for (uint32_t i = 0; i < children.size(); ++i) {
+ if (!children[i]->seek(docid)) {
+ return;
+ }
+ }
+ setDocId(docid);
+ }
+ Trinary is_strict() const override { return Trinary::False; }
+
+ virtual void doUnpack(uint32_t docid) {
+ _unpacker.unpack(docid, *this);
+ }
+ virtual void onRemove(size_t index) {
+ _unpacker.onRemove(index);
+ }
+ virtual void onInsert(size_t index) {
+ _unpacker.onInsert(index);
+ }
+ virtual bool needUnpack(size_t index) const {
+ return _unpacker.needUnpack(index);
+ }
+
+private:
+ Unpack _unpacker;
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/andsearchstrict.h b/searchlib/src/vespa/searchlib/queryeval/andsearchstrict.h
new file mode 100644
index 00000000000..7f275e9d585
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/andsearchstrict.h
@@ -0,0 +1,109 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "andsearchnostrict.h"
+
+namespace search {
+namespace queryeval {
+
+/**
+ * A simple strict implementation of the And search operation.
+ **/
+template <typename Unpack>
+class AndSearchStrict : public AndSearchNoStrict<Unpack>
+{
+private:
+ template<bool doSeekOnly>
+ VESPA_DLL_LOCAL void advance(uint32_t failedChildIndexd) __attribute__((noinline));
+ using Trinary=vespalib::Trinary;
+protected:
+ void doSeek(uint32_t docid) override;
+ Trinary is_strict() const override { return Trinary::True; }
+ SearchIterator::UP andWith(SearchIterator::UP filter, uint32_t estimate) override;
+public:
+ AndSearchStrict(const MultiSearch::Children & children, const Unpack & unpacker) :
+ AndSearchNoStrict<Unpack>(children, unpacker)
+ {
+ }
+
+ void initRange(uint32_t beginid, uint32_t endid) override {
+ AndSearchNoStrict<Unpack>::initRange(beginid, endid);
+ advance<false>(0);
+ }
+};
+
+template<typename Unpack>
+template<bool doSeekOnly>
+void
+AndSearchStrict<Unpack>::advance(uint32_t failedChildIndex)
+{
+ const MultiSearch::Children & children(this->getChildren());
+ SearchIterator & firstChild(*children[0]);
+ bool foundHit(false);
+ if (failedChildIndex != 0) {
+ if (doSeekOnly) {
+ if (__builtin_expect(children[failedChildIndex]->isAtEnd(), false)) {
+ this->setAtEnd();
+ return;
+ }
+ firstChild.doSeek(std::max(firstChild.getDocId() + 1, children[failedChildIndex]->getDocId()));
+ } else {
+ firstChild.seek(std::max(firstChild.getDocId() + 1, children[failedChildIndex]->getDocId()));
+ }
+ }
+ uint32_t nextId(firstChild.getDocId());
+ while (!foundHit && !this->isAtEnd(nextId)) {
+ foundHit = true;
+ for (uint32_t i(1); foundHit && (i < children.size()); ++i) {
+ SearchIterator & child(*children[i]);
+ if (!(foundHit = child.seek(nextId))) {
+ if (__builtin_expect(!child.isAtEnd(), true)) {
+ firstChild.doSeek(std::max(nextId+1, child.getDocId()));
+ nextId = firstChild.getDocId();
+ } else {
+ this->setAtEnd();
+ return;
+ }
+ }
+ }
+ }
+ this->setDocId(nextId);
+}
+
+template<typename Unpack>
+void
+AndSearchStrict<Unpack>::doSeek(uint32_t docid)
+{
+ const MultiSearch::Children & children(this->getChildren());
+ for (uint32_t i(0); i < children.size(); ++i) {
+ children[i]->doSeek(docid);
+ if (children[i]->getDocId() != docid) {
+ advance<true>(i);
+ return;
+ }
+ }
+ this->setDocId(docid);
+}
+
+template<typename Unpack>
+SearchIterator::UP
+AndSearchStrict<Unpack>::andWith(SearchIterator::UP filter, uint32_t estimate_)
+{
+ filter = this->getChildren()[0]->andWith(std::move(filter), estimate_);
+ if (filter) {
+ if ((estimate_ < this->estimate()) && (filter->is_strict() == Trinary::True)) {
+ this->insert(0, std::move(filter));
+ } else {
+ filter = this->offerFilterToChildren(std::move(filter), estimate_);
+ if (filter) {
+ this->insert(1, std::move(filter));
+ }
+ }
+ }
+ return filter; // Should always be empty, returning it incase logic changes.
+}
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/begin_and_end_id.h b/searchlib/src/vespa/searchlib/queryeval/begin_and_end_id.h
new file mode 100644
index 00000000000..70b0ad40bae
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/begin_and_end_id.h
@@ -0,0 +1,10 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+namespace search {
+
+static constexpr uint32_t beginDocId = 0u;
+static constexpr uint32_t endDocId = 0x7fffffffu; // max signed value
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
new file mode 100644
index 00000000000..78bf883acde
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -0,0 +1,562 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".queryeval.blueprint");
+#include "blueprint.h"
+#include <vespa/vespalib/objects/visit.h>
+#include <vespa/vespalib/objects/objectdumper.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include "leaf_blueprints.h"
+#include "intermediate_blueprints.h"
+#include "equiv_blueprint.h"
+
+#include <vector>
+#include <set>
+#include <map>
+
+// NB: might need to hide this from non-gcc compilers...
+#include <cxxabi.h>
+
+namespace search {
+namespace queryeval {
+
+//-----------------------------------------------------------------------------
+
+void maybe_eliminate_self(Blueprint* &self, Blueprint::UP replacement) {
+ // replace with replacement
+ if (replacement.get() != nullptr) {
+ Blueprint *tmp = replacement.release();
+ tmp->setParent(self->getParent());
+ tmp->setSourceId(self->getSourceId());
+ self->setParent(0);
+ replacement.reset(self);
+ self = tmp;
+ }
+ // replace with empty blueprint if empty
+ if (self->getState().estimate().empty) {
+ Blueprint::UP discard(self);
+ self = new EmptyBlueprint(discard->getState().fields());
+ self->setParent(discard->getParent());
+ self->setSourceId(discard->getSourceId());
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+Blueprint::HitEstimate
+Blueprint::max(const std::vector<HitEstimate> &data)
+{
+ HitEstimate est;
+ for (size_t i = 0; i < data.size(); ++i) {
+ if (est.empty || est.estHits < data[i].estHits) {
+ est = data[i];
+ }
+ }
+ return est;
+}
+
+Blueprint::HitEstimate
+Blueprint::min(const std::vector<HitEstimate> &data)
+{
+ HitEstimate est;
+ for (size_t i = 0; i < data.size(); ++i) {
+ if (i == 0 || data[i].empty || data[i].estHits < est.estHits) {
+ est = data[i];
+ }
+ }
+ return est;
+}
+
+void
+Blueprint::notifyChange()
+{
+ if (_parent != 0) {
+ _parent->notifyChange();
+ }
+}
+
+Blueprint::Blueprint()
+ : _parent(0),
+ _sourceId(0xffffffff),
+ _docid_limit(0)
+{
+}
+
+Blueprint::Blueprint(const Blueprint &x)
+ : _parent(0),
+ _sourceId(x.getSourceId()),
+ _docid_limit(x.get_docid_limit())
+{
+}
+
+Blueprint::~Blueprint()
+{
+}
+
+Blueprint::UP
+Blueprint::optimize(Blueprint::UP bp) {
+ Blueprint *root = bp.release();
+ root->optimize(root);
+ return Blueprint::UP(root);
+}
+
+void
+Blueprint::optimize_self()
+{
+}
+
+Blueprint::UP
+Blueprint::get_replacement()
+{
+ return Blueprint::UP();
+}
+
+const Blueprint &
+Blueprint::root() const
+{
+ const Blueprint *bp = this;
+ while (bp->_parent != nullptr) {
+ bp = bp->_parent;
+ }
+ return *bp;
+}
+
+vespalib::string
+Blueprint::asString() const
+{
+ vespalib::ObjectDumper dumper;
+ visit(dumper, "", this);
+ return dumper.toString();
+}
+
+vespalib::string
+Blueprint::getClassName() const
+{
+ vespalib::string name(typeid(*this).name());
+ int status = 0;
+ size_t size = 0;
+ // NB: might need to hide this from non-gcc compilers...
+ char *unmangled = abi::__cxa_demangle(name.c_str(), 0, &size, &status);
+ vespalib::string result(unmangled);
+ free(unmangled);
+ return result;
+}
+
+void
+Blueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ const State &state = getState();
+ visitor.visitBool("isTermLike", state.isTermLike());
+ if (state.isTermLike()) {
+ visitor.openStruct("fields", "FieldList");
+ for (size_t i = 0; i < state.numFields(); ++i) {
+ const FieldSpecBase &spec = state.field(i);
+ visitor.openStruct(vespalib::make_string("[%zu]", i), "Field");
+ // visitor.visitString("name", spec.getName());
+ visitor.visitInt("fieldId", spec.getFieldId());
+ visitor.visitInt("handle", spec.getHandle());
+ visitor.visitBool("isFilter", spec.isFilter());
+ visitor.closeStruct();
+ }
+ visitor.closeStruct();
+ }
+ visitor.openStruct("estimate", "HitEstimate");
+ visitor.visitBool("empty", state.estimate().empty);
+ visitor.visitInt("estHits", state.estimate().estHits);
+ visitor.visitInt("tree_size", state.tree_size());
+ visitor.visitInt("allow_termwise_eval", state.allow_termwise_eval());
+ visitor.closeStruct();
+ visitor.visitInt("sourceId", _sourceId);
+ visitor.visitInt("docid_limit", _docid_limit);
+}
+
+namespace blueprint {
+
+//-----------------------------------------------------------------------------
+
+void
+StateCache::notifyChange()
+{
+ Blueprint::notifyChange();
+ _stale = true;
+}
+
+const Blueprint::State &
+StateCache::getState() const
+{
+ if (_stale) {
+ calculateState().swap(_state);
+ _stale = false;
+ }
+ return _state;
+}
+
+} // namespace blueprint
+
+//-----------------------------------------------------------------------------
+
+IntermediateBlueprint::~IntermediateBlueprint()
+{
+ while (!_children.empty()) {
+ delete _children.back();
+ _children.pop_back();
+ }
+}
+
+void
+IntermediateBlueprint::setDocIdLimit(uint32_t limit)
+{
+ Blueprint::setDocIdLimit(limit);
+ for (size_t i = 0; i < _children.size(); ++i) {
+ _children[i]->setDocIdLimit(limit);
+ }
+}
+
+Blueprint::HitEstimate
+IntermediateBlueprint::calculateEstimate() const
+{
+ std::vector<HitEstimate> estimates;
+ estimates.reserve(_children.size());
+ for (size_t i = 0; i < _children.size(); ++i) {
+ estimates.push_back(_children[i]->getState().estimate());
+ }
+ return combine(estimates);
+}
+
+uint32_t
+IntermediateBlueprint::calculate_tree_size() const
+{
+ uint32_t nodes = 1;
+ for (size_t i = 0; i < _children.size(); ++i) {
+ nodes += _children[i]->getState().tree_size();
+ }
+ return nodes;
+}
+
+bool
+IntermediateBlueprint::infer_allow_termwise_eval() const
+{
+ if (!supports_termwise_children()) {
+ return false;
+ }
+ for (size_t i = 0; i < _children.size(); ++i) {
+ if (!_children[i]->getState().allow_termwise_eval()) {
+ return false;
+ }
+ }
+ return true;
+};
+
+size_t
+IntermediateBlueprint::count_termwise_nodes(const UnpackInfo &unpack) const
+{
+ size_t termwise_nodes = 0;
+ for (size_t i = 0; i < _children.size(); ++i) {
+ const State &state = _children[i]->getState();
+ if (state.allow_termwise_eval() && !unpack.needUnpack(i)) {
+ termwise_nodes += state.tree_size();
+ }
+ }
+ return termwise_nodes;
+}
+
+IntermediateBlueprint::IndexList
+IntermediateBlueprint::find(const IPredicate & pred) const
+{
+ IndexList list;
+ for (size_t i = 0; i < _children.size(); ++i) {
+ if (pred.check(*_children[i])) {
+ list.push_back(i);
+ }
+ }
+ return list;
+}
+
+FieldSpecBaseList
+IntermediateBlueprint::mixChildrenFields() const
+{
+ typedef std::map<uint32_t, const FieldSpecBase*> Map;
+ typedef Map::value_type MapVal;
+ typedef Map::iterator MapPos;
+ typedef std::pair<MapPos, bool> MapRes;
+
+ Map fieldMap;
+ FieldSpecBaseList fieldList;
+ for (size_t i = 0; i < _children.size(); ++i) {
+ const State &childState = _children[i]->getState();
+ if (!childState.isTermLike()) {
+ return fieldList; // empty: non-term-like child
+ }
+ for (size_t j = 0; j < childState.numFields(); ++j) {
+ const FieldSpecBase &f = childState.field(j);
+ MapRes res = fieldMap.insert(MapVal(f.getFieldId(), &f));
+ if (!res.second) {
+ const FieldSpecBase &other = *(res.first->second);
+ if (other.getHandle() != f.getHandle()) {
+ return fieldList; // empty: conflicting children
+ }
+ }
+ }
+ }
+ for (MapPos pos = fieldMap.begin(); pos != fieldMap.end(); ++pos) {
+ fieldList.add(*(pos->second));
+ }
+ return fieldList;
+}
+
+Blueprint::State
+IntermediateBlueprint::calculateState() const
+{
+ State state(exposeFields());
+ state.estimate(calculateEstimate());
+ state.allow_termwise_eval(infer_allow_termwise_eval());
+ state.tree_size(calculate_tree_size());
+ return state;
+}
+
+bool
+IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double match_limit) const
+{
+ if (root().hit_ratio() <= match_limit) {
+ return false; // global hit density too low
+ }
+ if (getState().allow_termwise_eval() && unpack.empty() &&
+ has_parent() && getParent()->supports_termwise_children())
+ {
+ return false; // higher up will be better
+ }
+ return (count_termwise_nodes(unpack) > 1);
+}
+
+void
+IntermediateBlueprint::optimize(Blueprint* &self)
+{
+ assert(self == this);
+ if (should_optimize_children()) {
+ for (size_t i = 0; i < _children.size(); ++i) {
+ _children[i]->optimize(_children[i]);
+ }
+ }
+ optimize_self();
+ sort(_children);
+ maybe_eliminate_self(self, get_replacement());
+}
+
+SearchIterator::UP
+IntermediateBlueprint::createSearch(fef::MatchData &md, bool strict) const
+{
+ MultiSearch::Children subSearches;
+ subSearches.reserve(_children.size());
+ for (size_t i = 0; i < _children.size(); ++i) {
+ bool strictChild = (strict && inheritStrict(i));
+ SearchIterator::UP search = _children[i]->createSearch(md, strictChild);
+ subSearches.push_back(search.release());
+ }
+ return createIntermediateSearch(subSearches, strict, md);
+}
+
+IntermediateBlueprint::IntermediateBlueprint()
+ : _children()
+{
+}
+
+IntermediateBlueprint::
+IntermediateBlueprint(const IntermediateBlueprint &x)
+ : StateCache(x),
+ _children()
+{
+ // children are not copied
+}
+
+const Blueprint &
+IntermediateBlueprint::getChild(size_t n) const
+{
+ assert(n < _children.size());
+ return *_children[n];
+}
+
+Blueprint &
+IntermediateBlueprint::getChild(size_t n)
+{
+ assert(n < _children.size());
+ return *_children[n];
+}
+
+IntermediateBlueprint &
+IntermediateBlueprint::addChild(Blueprint::UP child)
+{
+ _children.push_back(child.get());
+ child.release()->setParent(this);
+ notifyChange();
+ return *this;
+}
+
+Blueprint::UP
+IntermediateBlueprint::removeChild(size_t n)
+{
+ assert(n < _children.size());
+ Blueprint::UP ret(_children[n]);
+ _children.erase(_children.begin() + n);
+ ret->setParent(0);
+ notifyChange();
+ return ret;
+}
+
+IntermediateBlueprint &
+IntermediateBlueprint::insertChild(size_t n, Blueprint::UP child)
+{
+ assert(n <= _children.size());
+ _children.insert(_children.begin() + n, child.get());
+ child.release()->setParent(this);
+ notifyChange();
+ return *this;
+}
+
+void
+IntermediateBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ StateCache::visitMembers(visitor);
+ visit(visitor, "children", _children);
+}
+
+void
+IntermediateBlueprint::fetchPostings(bool strict)
+{
+ for (size_t i = 0; i < _children.size(); ++i) {
+ bool strictChild = (strict && inheritStrict(i));
+ _children[i]->fetchPostings(strictChild);
+ }
+}
+
+namespace {
+
+bool
+areAnyParentsEquiv(const Blueprint * node)
+{
+ return (node == NULL)
+ ? false
+ : (dynamic_cast<const EquivBlueprint *>(node) != NULL)
+ ? true
+ : areAnyParentsEquiv(node->getParent());
+}
+
+bool
+canBlueprintSkipUnpack(const Blueprint & bp, const fef::MatchData & md)
+{
+ return (bp.getState().numFields() != 0)
+ || (( dynamic_cast<const IntermediateBlueprint *>(&bp) != nullptr)
+ && static_cast<const IntermediateBlueprint &>(bp).calculateUnpackInfo(md).empty());
+}
+
+}
+
+UnpackInfo
+IntermediateBlueprint::calculateUnpackInfo(const fef::MatchData & md) const
+{
+ UnpackInfo unpackInfo;
+ bool allNeedUnpack(true);
+ if ( ! areAnyParentsEquiv(getParent()) ) {
+ for (size_t i = 0; i < childCnt(); ++i) {
+ if (isPositive(i)) {
+ const Blueprint & child = getChild(i);
+ const State &cs = child.getState();
+ bool canSkipUnpack(canBlueprintSkipUnpack(child, md));
+ LOG(debug, "Child[%ld] has %ld fields. canSkipUnpack='%s'.", i, cs.numFields(), canSkipUnpack ? "true" : "false");
+ for (size_t j = 0; canSkipUnpack && (j < cs.numFields()); ++j) {
+ if ( ! cs.field(j).resolve(md)->isNotNeeded()) {
+ LOG(debug, "Child[%ld].field(%ld).fieldId=%d need unpack.", i, j, cs.field(j).getFieldId());
+ canSkipUnpack = false;
+ }
+ }
+ if ( canSkipUnpack) {
+ allNeedUnpack = false;
+ } else {
+ unpackInfo.add(i);
+ }
+ } else {
+ allNeedUnpack = false;
+ }
+ }
+ }
+ if (allNeedUnpack) {
+ unpackInfo.forceAll();
+ }
+ LOG(spam, "UnpackInfo for %s \n is \n %s", asString().c_str(), unpackInfo.toString().c_str());
+ return unpackInfo;
+}
+
+
+//-----------------------------------------------------------------------------
+
+void
+LeafBlueprint::fetchPostings(bool strict)
+{
+ (void) strict;
+}
+
+SearchIterator::UP
+LeafBlueprint::createSearch(fef::MatchData &md, bool strict) const
+{
+ const State &state = getState();
+ fef::TermFieldMatchDataArray tfmda;
+ tfmda.reserve(state.numFields());
+ for (size_t i = 0; i < state.numFields(); ++i) {
+ tfmda.add(state.field(i).resolve(md));
+ }
+ return createLeafSearch(tfmda, strict);
+}
+
+void
+LeafBlueprint::optimize(Blueprint* &self)
+{
+ assert(self == this);
+ optimize_self();
+ maybe_eliminate_self(self, get_replacement());
+}
+
+void
+LeafBlueprint::setEstimate(HitEstimate est)
+{
+ _state.estimate(est);
+ notifyChange();
+}
+
+void
+LeafBlueprint::set_allow_termwise_eval(bool value)
+{
+ _state.allow_termwise_eval(value);
+ notifyChange();
+}
+
+void
+LeafBlueprint::set_tree_size(uint32_t value)
+{
+ _state.tree_size(value);
+ notifyChange();
+}
+
+//-----------------------------------------------------------------------------
+
+} // namespace queryeval
+} // namespace search
+
+//-----------------------------------------------------------------------------
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::Blueprint *obj)
+{
+ if (obj != 0) {
+ self.openStruct(name, obj->getClassName());
+ obj->visitMembers(self);
+ self.closeStruct();
+ } else {
+ self.visitNull(name);
+ }
+}
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::Blueprint &obj)
+{
+ visit(self, name, &obj);
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
new file mode 100644
index 00000000000..82a7aa642a5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -0,0 +1,314 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/handle.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/vespalib/util/array.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vector>
+#include <memory>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/multisearch.h>
+#include <vespa/searchlib/queryeval/unpackinfo.h>
+#include "field_spec.h"
+
+namespace vespalib { class ObjectVisitor; };
+
+namespace search {
+namespace queryeval {
+
+
+/**
+ * A Blueprint is an intermediate representation of a search. More
+ * concretely, it is a tree of search iterator factories annotated
+ * with meta-data about the fields to be searched, how match
+ * information is to be exposed to the ranking framework and estimates
+ * for the number of results that will be produced. Intermediate
+ * operations are implemented by extending the blueprint::Intermediate
+ * template class. Leaf operations are implemented by extending the
+ * blueprint::Leaf template class.
+ **/
+class Blueprint
+{
+public:
+ typedef std::unique_ptr<Blueprint> UP;
+
+ struct HitEstimate {
+ uint32_t estHits;
+ bool empty;
+
+ HitEstimate() : estHits(0), empty(true) {}
+ HitEstimate(uint32_t estHits_, bool empty_)
+ : estHits(estHits_), empty(empty_) {}
+
+ bool operator < (const HitEstimate &other) const {
+ if (empty == other.empty) {
+ return (estHits < other.estHits);
+ } else {
+ return empty;
+ }
+ }
+ };
+
+ class State
+ {
+ private:
+ FieldSpecBaseList _fields;
+ HitEstimate _estimate;
+ uint32_t _tree_size;
+ bool _allow_termwise_eval;
+
+ public:
+ State(const FieldSpecBaseList &fields_in)
+ : _fields(fields_in),
+ _estimate(),
+ _tree_size(1),
+ _allow_termwise_eval(true)
+ {
+ }
+ void swap(State & rhs) {
+ _fields.swap(rhs._fields);
+ std::swap(_estimate, rhs._estimate);
+ std::swap(_tree_size, rhs._tree_size);
+ std::swap(_allow_termwise_eval, rhs._allow_termwise_eval);
+ }
+
+ bool isTermLike() const { return !_fields.empty(); }
+ const FieldSpecBaseList &fields() const { return _fields; }
+
+ size_t numFields() const { return _fields.size(); }
+ const FieldSpecBase &field(size_t idx) const { return _fields[idx]; }
+ const FieldSpecBase *lookupField(uint32_t fieldId) const {
+ for (size_t i = 0; i < _fields.size(); ++i) {
+ if (_fields[i].getFieldId() == fieldId) {
+ return &_fields[i];
+ }
+ }
+ return nullptr;
+ }
+
+ void estimate(HitEstimate est) { _estimate = est; }
+ HitEstimate estimate() const { return _estimate; }
+ double hit_ratio(uint32_t docid_limit) const {
+ uint32_t total_hits = _estimate.estHits;
+ uint32_t total_docs = std::max(total_hits, docid_limit);
+ return double(total_hits) / double(total_docs);
+ }
+ void tree_size(uint32_t value) { _tree_size = value; }
+ uint32_t tree_size() const { return _tree_size; }
+ void allow_termwise_eval(bool value) { _allow_termwise_eval = value; }
+ bool allow_termwise_eval() const { return _allow_termwise_eval; }
+ };
+
+ // utility that just takes maximum estimate
+ static HitEstimate max(const std::vector<HitEstimate> &data);
+
+ // utility that just takes minium estimate
+ static HitEstimate min(const std::vector<HitEstimate> &data);
+
+ // utility to get the greater estimate to sort first
+ struct GreaterEstimate {
+ bool operator () (Blueprint * const &a, Blueprint * const &b) const {
+ return (b->getState().estimate() < a->getState().estimate());
+ }
+ };
+
+ // utility to get the lesser estimate to sort first
+ struct LessEstimate {
+ bool operator () (Blueprint * const &a, const Blueprint * const &b) const {
+ return (a->getState().estimate() < b->getState().estimate());
+ }
+ };
+
+private:
+ Blueprint *_parent;
+ uint32_t _sourceId;
+ uint32_t _docid_limit;
+
+ Blueprint &operator=(const Blueprint &); // disable
+
+public:
+ class IPredicate {
+ public:
+ virtual ~IPredicate() {}
+ virtual bool check(const Blueprint & bp) const = 0;
+ };
+
+ Blueprint();
+ Blueprint(const Blueprint &x);
+ virtual ~Blueprint();
+
+ void setParent(Blueprint *parent) { _parent = parent; }
+ Blueprint *getParent() const { return _parent; }
+ bool has_parent() const { return (_parent != nullptr); }
+
+ Blueprint &setSourceId(uint32_t sourceId) { _sourceId = sourceId; return *this; }
+ uint32_t getSourceId() const { return _sourceId; }
+
+ virtual void setDocIdLimit(uint32_t limit) { _docid_limit = limit; }
+ uint32_t get_docid_limit() const { return _docid_limit; }
+
+ virtual void notifyChange();
+
+ static Blueprint::UP optimize(Blueprint::UP bp);
+ virtual void optimize(Blueprint* &self) = 0;
+ virtual void optimize_self();
+ virtual Blueprint::UP get_replacement();
+ virtual bool should_optimize_children() const { return true; }
+
+ virtual bool supports_termwise_children() const { return false; }
+
+ virtual const State &getState() const = 0;
+ const Blueprint &root() const;
+
+ double hit_ratio() const { return getState().hit_ratio(_docid_limit); }
+
+ virtual void fetchPostings(bool strict) = 0;
+
+ virtual SearchIterator::UP createSearch(fef::MatchData &md, bool strict) const = 0;
+
+ // for debug dumping
+ vespalib::string asString() const;
+ virtual vespalib::string getClassName() const;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+};
+
+namespace blueprint {
+
+//-----------------------------------------------------------------------------
+
+class StateCache : public Blueprint
+{
+private:
+ mutable bool _stale;
+ mutable State _state;
+
+protected:
+ virtual void notifyChange();
+ virtual State calculateState() const = 0;
+
+public:
+ StateCache() : _stale(true), _state(FieldSpecBaseList()) {}
+ StateCache(const StateCache &x)
+ : Blueprint(x), _stale(true), _state(FieldSpecBaseList()) {}
+ const State &getState() const override final;
+};
+
+} // namespace blueprint
+
+//-----------------------------------------------------------------------------
+
+class IntermediateBlueprint : public blueprint::StateCache
+{
+public:
+ typedef std::vector<Blueprint*> Children;
+private:
+ Children _children;
+ HitEstimate calculateEstimate() const;
+ uint32_t calculate_tree_size() const;
+ bool infer_allow_termwise_eval() const;
+
+ size_t count_termwise_nodes(const UnpackInfo &unpack) const;
+
+protected:
+ // returns an empty collection if children have empty or
+ // conflicting collections of field specs.
+ FieldSpecBaseList mixChildrenFields() const;
+
+ State calculateState() const override final;
+
+ virtual bool isPositive(size_t index) const { (void) index; return true; }
+
+ bool should_do_termwise_eval(const UnpackInfo &unpack, double match_limit) const;
+
+public:
+ typedef std::vector<size_t> IndexList;
+ IntermediateBlueprint();
+ IntermediateBlueprint(const IntermediateBlueprint &x);
+ virtual ~IntermediateBlueprint();
+
+ void setDocIdLimit(uint32_t limit) override final;
+
+ virtual void optimize(Blueprint* &self) override final;
+
+ IndexList find(const IPredicate & check) const;
+ size_t childCnt() const { return _children.size(); }
+ const Blueprint &getChild(size_t n) const;
+ Blueprint &getChild(size_t n);
+ IntermediateBlueprint & insertChild(size_t n, Blueprint::UP child);
+ IntermediateBlueprint &addChild(Blueprint::UP child);
+ Blueprint::UP removeChild(size_t n);
+ virtual SearchIterator::UP createSearch(fef::MatchData &md, bool strict) const;
+
+ virtual HitEstimate
+ combine(const std::vector<HitEstimate> &data) const = 0;
+ virtual FieldSpecBaseList exposeFields() const = 0;
+ virtual void sort(std::vector<Blueprint*> &children) const = 0;
+ virtual bool inheritStrict(size_t i) const = 0;
+ virtual SearchIterator::UP
+ createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, fef::MatchData &md) const = 0;
+
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+
+ virtual void fetchPostings(bool strict);
+ UnpackInfo calculateUnpackInfo(const fef::MatchData & md) const;
+};
+
+
+class LeafBlueprint : public Blueprint
+{
+private:
+ State _state;
+
+protected:
+ virtual void optimize(Blueprint* &self) override final;
+
+ void setEstimate(HitEstimate est);
+
+ void set_allow_termwise_eval(bool value);
+
+ void set_tree_size(uint32_t value);
+
+ LeafBlueprint(const FieldSpecBaseList &fields, bool allow_termwise_eval) : _state(fields) {
+ _state.allow_termwise_eval(allow_termwise_eval);
+ }
+
+public:
+ const State &getState() const override final { return _state; }
+
+ void setDocIdLimit(uint32_t limit) override final { Blueprint::setDocIdLimit(limit); }
+
+ virtual void fetchPostings(bool strict);
+
+ virtual SearchIterator::UP createSearch(fef::MatchData &md, bool strict) const;
+
+ virtual SearchIterator::UP createLeafSearch(const fef::TermFieldMatchDataArray &tfmda,
+ bool strict) const = 0;
+};
+
+// for leaf nodes representing a single term
+struct SimpleLeafBlueprint : LeafBlueprint {
+ SimpleLeafBlueprint(const FieldSpecBase &field) : LeafBlueprint(FieldSpecBaseList().add(field), true) {}
+ SimpleLeafBlueprint(const FieldSpecBaseList &fields) : LeafBlueprint(fields, true) {}
+};
+
+// for leaf nodes representing more complex structures like wand/phrase
+struct ComplexLeafBlueprint : LeafBlueprint {
+ ComplexLeafBlueprint(const FieldSpecBase &field) : LeafBlueprint(FieldSpecBaseList().add(field), false) {}
+ ComplexLeafBlueprint(const FieldSpecBaseList &fields) : LeafBlueprint(fields, false) {}
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace queryeval
+} // namespace search
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::Blueprint &obj);
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::Blueprint *obj);
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.cpp b/searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.cpp
new file mode 100644
index 00000000000..1510716a84b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.cpp
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".booleanmatchiteratorwrapper");
+#include "booleanmatchiteratorwrapper.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace queryeval {
+
+void
+BooleanMatchIteratorWrapper::doSeek(uint32_t docid)
+{
+ _search->seek(docid); // use outer seek for most robustness
+ setDocId(_search->getDocId()); // propagate current iterator docid
+}
+
+void
+BooleanMatchIteratorWrapper::doUnpack(uint32_t docid)
+{
+ if (_tfmdp != 0) { // handle not having a match data (unranked, or multiple fields)
+ _tfmdp->reset(docid); // unpack ensures that docid is a hit
+ }
+}
+
+BooleanMatchIteratorWrapper::BooleanMatchIteratorWrapper(
+ SearchIterator::UP search,
+ const fef::TermFieldMatchDataArray &matchData)
+ : _search(std::move(search)),
+ _tfmdp(0)
+{
+ if (matchData.size() == 1) {
+ _tfmdp = matchData[0];
+ }
+}
+
+void
+BooleanMatchIteratorWrapper::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "search", _search);
+ // _match not visited
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h b/searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h
new file mode 100644
index 00000000000..6d4ca1abbb7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h
@@ -0,0 +1,64 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "searchiterator.h"
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+
+namespace search {
+namespace queryeval {
+
+/**
+ * A term iterator wrapper used to hide detailed match
+ * information. Wrapping a term iterator with an instance of this
+ * class will ensure that the unpack method will only disclose whether
+ * we found a match or not. This is done by intercepting calls to the
+ * doUnpack method. The doSeek method will be forwarded to ensure we
+ * match the same set of documents.
+ **/
+class BooleanMatchIteratorWrapper : public SearchIterator
+{
+private:
+ SearchIterator::UP _search;
+ fef::TermFieldMatchData *_tfmdp;
+
+ BooleanMatchIteratorWrapper(const BooleanMatchIteratorWrapper &);
+ BooleanMatchIteratorWrapper &operator=(const BooleanMatchIteratorWrapper &);
+
+protected:
+ void doSeek(uint32_t docid) override;
+ void doUnpack(uint32_t docid) override;
+ Trinary is_strict() const override { return _search->is_strict(); }
+ void initRange(uint32_t beginid, uint32_t endid) override {
+ _search->initRange(beginid, endid);
+ SearchIterator::initRange(_search->getDocId()+1, _search->getEndId());
+ }
+ void resetRange() override {
+ _search->resetRange();
+ SearchIterator::resetRange();
+ }
+
+public:
+ /**
+ * Create a wrapper for the given search using the given term
+ * match data. This object will take ownership of the given search
+ * and delete it in the destructor. The given search must be a
+ * term iterator that is using the given term match data to store
+ * its matching details during unpack. The given term match data
+ * is expected to be stored inside a match data object and as such
+ * be managed outside of this object. The iterator will fill in
+ * match/non-match information only, and only if the given array
+ * holds exactly one reference.
+ *
+ * @param search internal search, must be a term iterator
+ * @param match term match data used by the internal iterator
+ **/
+ BooleanMatchIteratorWrapper(SearchIterator::UP search,
+ const fef::TermFieldMatchDataArray &matchData);
+
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/create-class-cpp.sh b/searchlib/src/vespa/searchlib/queryeval/create-class-cpp.sh
new file mode 100755
index 00000000000..728ab7a11ce
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/create-class-cpp.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+class=$1
+guard=`echo $class | tr 'a-z' 'A-Z'`
+name=`echo $class | tr 'A-Z' 'a-z'`
+
+cat <<EOF
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/log/log.h>
+LOG_SETUP(".$name");
+#include <vespa/fastos/fastos.h>
+#include "$name.h"
+
+namespace search {
+namespace queryeval {
+
+$class::$class()
+{
+}
+
+$class::~$class()
+{
+}
+
+} // namespace queryeval
+} // namespace search
+EOF
diff --git a/searchlib/src/vespa/searchlib/queryeval/create-class-h.sh b/searchlib/src/vespa/searchlib/queryeval/create-class-h.sh
new file mode 100644
index 00000000000..1548ff86daa
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/create-class-h.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+class=$1
+guard=`echo $class | tr 'a-z' 'A-Z'`
+
+cat <<EOF
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+namespace search {
+namespace queryeval {
+
+class $class
+{
+private:
+ $class(const $class &);
+ $class &operator=(const $class &);
+public:
+ $class();
+ virtual ~$class();
+};
+
+} // namespace queryeval
+} // namespace search
+
+EOF
diff --git a/searchlib/src/vespa/searchlib/queryeval/create-interface.sh b/searchlib/src/vespa/searchlib/queryeval/create-interface.sh
new file mode 100644
index 00000000000..04bf4c61694
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/create-interface.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+class=$1
+guard=`echo $class | tr 'a-z' 'A-Z'`
+
+cat <<EOF
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+namespace search {
+namespace queryeval {
+
+class $class
+{
+public:
+ virtual ~$class() {}
+};
+
+} // namespace queryeval
+} // namespace search
+
+EOF
diff --git a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp
new file mode 100644
index 00000000000..74b7464ea3f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp
@@ -0,0 +1,20 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// $Id$
+
+#include <vespa/fastos/fastos.h>
+#include "create_blueprint_visitor_helper.h"
+#include <vespa/searchlib/queryeval/leaf_blueprints.h>
+
+namespace search {
+namespace queryeval {
+
+Blueprint::UP
+CreateBlueprintVisitorHelper::getResult()
+{
+ return _result
+ ? std::move(_result)
+ : Blueprint::UP(new EmptyBlueprint(_field));
+}
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h
new file mode 100644
index 00000000000..a79f1f104be
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h
@@ -0,0 +1,143 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "dot_product_blueprint.h"
+#include "get_weight_from_node.h"
+#include "wand/parallel_weak_and_blueprint.h"
+#include "searchable.h"
+#include "simple_phrase_blueprint.h"
+#include "split_float.h"
+#include "termasstring.h"
+#include "weighted_set_term_blueprint.h"
+#include <vespa/searchlib/query/tree/intermediatenodes.h>
+#include <vespa/searchlib/query/tree/queryvisitor.h>
+#include <vespa/searchlib/query/tree/termnodes.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <memory>
+
+namespace search {
+namespace queryeval {
+
+class CreateBlueprintVisitorHelper : public search::query::QueryVisitor
+{
+private:
+ const IRequestContext & _requestContext;
+ Searchable & _searchable;
+ FieldSpec _field;
+ Blueprint::UP _result;
+
+protected:
+ const IRequestContext & getRequestContext() const { return _requestContext; }
+
+public:
+ CreateBlueprintVisitorHelper(Searchable &searchable, const FieldSpec &field, const IRequestContext & requestContext) :
+ _requestContext(requestContext),
+ _searchable(searchable),
+ _field(field),
+ _result()
+ {}
+
+ template <typename T>
+ std::unique_ptr<T> make_UP(T *p) { return std::unique_ptr<T>(p); }
+
+ template <typename T>
+ void setResult(std::unique_ptr<T> result) { _result = std::move(result); }
+
+ Blueprint::UP getResult();
+
+ const FieldSpec &getField() const { return _field; }
+
+ void visitPhrase(search::query::Phrase &n) {
+ SimplePhraseBlueprint *phrase = new SimplePhraseBlueprint(_field, _requestContext);
+ Blueprint::UP result(phrase);
+ for (size_t i = 0; i < n.getChildren().size(); ++i) {
+ FieldSpecList fields;
+ fields.add(phrase->getNextChildField(_field));
+ phrase->addTerm(_searchable.createBlueprint(_requestContext, fields, *n.getChildren()[i]));
+ }
+ setResult(std::move(result));
+ }
+
+ template <typename WS, typename NODE>
+ void createWeightedSet(WS *bp, NODE &n) {
+ Blueprint::UP result(bp);
+ FieldSpecList fields;
+ for (size_t i = 0; i < n.getChildren().size(); ++i) {
+ fields.clear();
+ fields.add(bp->getNextChildField(_field));
+ const search::query::Node &node = *n.getChildren()[i];
+ uint32_t weight = getWeightFromNode(node).percent();
+ bp->addTerm(_searchable.createBlueprint(_requestContext, fields, node), weight);
+ }
+ setResult(std::move(result));
+ }
+ void visitWeightedSetTerm(search::query::WeightedSetTerm &n) {
+ WeightedSetTermBlueprint *bp = new WeightedSetTermBlueprint(_field);
+ createWeightedSet(bp, n);
+ }
+ void visitDotProduct(search::query::DotProduct &n) {
+ DotProductBlueprint *bp = new DotProductBlueprint(_field);
+ createWeightedSet(bp, n);
+ }
+ void visitWandTerm(search::query::WandTerm &n) {
+ ParallelWeakAndBlueprint *bp = new ParallelWeakAndBlueprint(_field,
+ n.getTargetNumHits(),
+ n.getScoreThreshold(),
+ n.getThresholdBoostFactor());
+ createWeightedSet(bp, n);
+ }
+
+ void handleNumberTermAsText(search::query::NumberTerm &n)
+ {
+ vespalib::string termStr = termAsString(n);
+ queryeval::SplitFloat splitter(termStr);
+ if (splitter.parts() > 1) {
+ query::SimplePhrase phraseNode(n.getView(), n.getId(), n.getWeight());
+ phraseNode.setStateFrom(n);
+ for (size_t i = 0; i < splitter.parts(); ++i) {
+ query::Node::UP nn;
+ nn.reset(new query::SimpleStringTerm(splitter.getPart(i), "", 0, query::Weight(0)));
+ phraseNode.append(std::move(nn));
+ }
+ visitPhrase(phraseNode);
+ } else {
+ if (splitter.parts() == 1) {
+ termStr = splitter.getPart(0);
+ }
+ query::SimpleStringTerm stringNode(termStr, n.getView(), n.getId(), n.getWeight());
+ stringNode.setStateFrom(n);
+ visit(stringNode);
+ }
+ }
+
+ void illegalVisit() {}
+
+ virtual void visit(search::query::And &) { illegalVisit(); }
+ virtual void visit(search::query::AndNot &) { illegalVisit(); }
+ virtual void visit(search::query::Equiv &) { illegalVisit(); }
+ virtual void visit(search::query::Near &) { illegalVisit(); }
+ virtual void visit(search::query::ONear &) { illegalVisit(); }
+ virtual void visit(search::query::Or &) { illegalVisit(); }
+ virtual void visit(search::query::Rank &) { illegalVisit(); }
+ virtual void visit(search::query::WeakAnd &) { illegalVisit(); }
+
+ virtual void visit(search::query::Phrase &n) {
+ visitPhrase(n);
+ }
+ virtual void visit(search::query::WeightedSetTerm &n) { visitWeightedSetTerm(n); }
+ virtual void visit(search::query::DotProduct &n) { visitDotProduct(n); }
+ virtual void visit(search::query::WandTerm &n) { visitWandTerm(n); }
+
+ virtual void visit(search::query::NumberTerm &n) = 0;
+ virtual void visit(search::query::LocationTerm &n) = 0;
+ virtual void visit(search::query::PrefixTerm &n) = 0;
+ virtual void visit(search::query::RangeTerm &n) = 0;
+ virtual void visit(search::query::StringTerm &n) = 0;
+ virtual void visit(search::query::SubstringTerm &n) = 0;
+ virtual void visit(search::query::SuffixTerm &n) = 0;
+ virtual void visit(search::query::RegExpTerm &n) = 0;
+};
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.cpp
new file mode 100644
index 00000000000..9f876e788bf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.cpp
@@ -0,0 +1,4 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "document_weight_search_iterator.h"
diff --git a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.h b/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.h
new file mode 100644
index 00000000000..1bda4ba3dda
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.h
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "searchiterator.h"
+#include <vespa/searchlib/attribute/i_document_weight_attribute.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+
+namespace search {
+namespace queryeval {
+
+class DocumentWeightSearchIterator : public SearchIterator
+{
+private:
+ fef::TermFieldMatchData &_tfmd;
+ fef::TermFieldMatchDataPosition * _matchPosition;
+ DocumentWeightIterator _iterator;
+ queryeval::MinMaxPostingInfo _postingInfo;
+
+public:
+ DocumentWeightSearchIterator(fef::TermFieldMatchData &tfmd,
+ const IDocumentWeightAttribute &attr,
+ IDocumentWeightAttribute::LookupResult dict_entry)
+ : _tfmd(tfmd),
+ _matchPosition(NULL),
+ _iterator(attr.create(dict_entry.posting_idx)),
+ _postingInfo(queryeval::MinMaxPostingInfo(dict_entry.min_weight, dict_entry.max_weight))
+ {
+ search::fef::TermFieldMatchDataPosition pos;
+ _tfmd.appendPosition(pos);
+ _matchPosition = _tfmd.getPositions();
+ }
+ void initRange(uint32_t begin, uint32_t end) override {
+ SearchIterator::initRange(begin, end);
+ _iterator.lower_bound(begin);
+ updateDocId();
+ }
+ void updateDocId() {
+ if (_iterator.valid()) {
+ setDocId(_iterator.getKey());
+ } else {
+ setAtEnd();
+ }
+ }
+
+ void doSeek(uint32_t docId) override {
+ _iterator.linearSeek(docId);
+ updateDocId();
+ }
+
+ void doUnpack(uint32_t docId) override {
+ _tfmd.resetOnlyDocId(docId);
+ _matchPosition->setElementWeight(_iterator.getData());
+ }
+
+ const queryeval::PostingInfo *getPostingInfo() const override { return &_postingInfo; }
+ Trinary is_strict() const override { return Trinary::True; }
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp
new file mode 100644
index 00000000000..62efcab7c4c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp
@@ -0,0 +1,92 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".queryeval.dot_product_blueprint");
+
+#include "dot_product_blueprint.h"
+#include "dot_product_search.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/vespalib/objects/visit.h>
+#include <algorithm>
+
+namespace search {
+namespace queryeval {
+
+DotProductBlueprint::DotProductBlueprint(const FieldSpec &field)
+ : ComplexLeafBlueprint(field),
+ _estimate(),
+ _layout(),
+ _weights(),
+ _terms()
+{
+}
+
+DotProductBlueprint::~DotProductBlueprint()
+{
+ while (!_terms.empty()) {
+ delete _terms.back();
+ _terms.pop_back();
+ }
+}
+
+FieldSpec
+DotProductBlueprint::getNextChildField(const FieldSpec &outer)
+{
+ return FieldSpec(outer.getName(), outer.getFieldId(), _layout.allocTermField(outer.getFieldId()), false);
+}
+
+void
+DotProductBlueprint::addTerm(Blueprint::UP term, int32_t weight)
+{
+ HitEstimate childEst = term->getState().estimate();
+ if (! childEst.empty) {
+ if (_estimate.empty) {
+ _estimate = childEst;
+ } else {
+ _estimate.estHits += childEst.estHits;
+ }
+ setEstimate(_estimate);
+ }
+ _weights.push_back(weight);
+ _terms.push_back(term.get());
+ term.release();
+}
+
+SearchIterator::UP
+DotProductBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda,
+ bool) const
+{
+ assert(tfmda.size() == 1);
+ fef::MatchData::UP md = _layout.createMatchData();
+ std::vector<fef::TermFieldMatchData*> childMatch;
+ std::vector<SearchIterator*> children(_terms.size());
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ const State &childState = _terms[i]->getState();
+ assert(childState.numFields() == 1);
+ childMatch.push_back(childState.field(0).resolve(*md));
+ children[i] = _terms[i]->createSearch(*md, true).release();
+ }
+ return DotProductSearch::create(children, *tfmda[0], childMatch, _weights, std::move(md));
+}
+
+void
+DotProductBlueprint::fetchPostings(bool strict)
+{
+ (void) strict;
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ _terms[i]->fetchPostings(true);
+ }
+}
+
+void
+DotProductBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ LeafBlueprint::visitMembers(visitor);
+ visit(visitor, "_weights", _weights);
+ visit(visitor, "_terms", _terms);
+}
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.h
new file mode 100644
index 00000000000..1e19264e9cb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.h
@@ -0,0 +1,47 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "searchable.h"
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <memory>
+#include <vector>
+
+namespace search {
+namespace fef { class TermFieldMatchData; }
+
+namespace queryeval {
+
+class DotProductBlueprint : public ComplexLeafBlueprint
+{
+ HitEstimate _estimate;
+ fef::MatchDataLayout _layout;
+ std::vector<int32_t> _weights;
+ std::vector<Blueprint*> _terms;
+
+ DotProductBlueprint(const DotProductBlueprint &); // disabled
+ DotProductBlueprint &operator=(const DotProductBlueprint &); // disabled
+
+public:
+ DotProductBlueprint(const FieldSpec &field);
+ virtual ~DotProductBlueprint();
+
+ // used by create visitor
+ FieldSpec getNextChildField(const FieldSpec &outer);
+
+ // used by create visitor
+ void addTerm(Blueprint::UP term, int32_t weight);
+
+ virtual SearchIterator::UP
+ createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda,
+ bool strict) const;
+
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+
+ virtual void
+ fetchPostings(bool strict);
+};
+
+} // namespace search::queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp
new file mode 100644
index 00000000000..91fcf8a9502
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp
@@ -0,0 +1,154 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".queryeval.dot_product_search");
+
+#include "dot_product_search.h"
+#include "iterator_pack.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/vespalib/objects/visit.h>
+#include <algorithm>
+#include <functional>
+
+using search::fef::TermFieldMatchData;
+using vespalib::ObjectVisitor;
+
+namespace search {
+namespace queryeval {
+
+
+template <typename HEAP, typename IteratorPack>
+class DotProductSearchImpl : public DotProductSearch
+{
+private:
+ typedef uint32_t ref_t;
+
+ struct CmpDocId {
+ const uint32_t *termPos;
+ CmpDocId(const uint32_t *tp) : termPos(tp) {}
+ bool operator()(const ref_t &a, const ref_t &b) const {
+ return (termPos[a] < termPos[b]);
+ }
+ };
+
+ fef::TermFieldMatchData &_tmd;
+ std::vector<int32_t> _weights;
+ std::vector<uint32_t> _termPos;
+ CmpDocId _cmpDocId;
+ std::vector<ref_t> _data_space;
+ ref_t *_data_begin;
+ ref_t *_data_stash;
+ ref_t *_data_end;
+ IteratorPack _children;
+
+ void seek_child(ref_t child, uint32_t docId) {
+ _termPos[child] = _children.seek(child, docId);
+ }
+
+public:
+ DotProductSearchImpl(search::fef::TermFieldMatchData &tmd,
+ const std::vector<int32_t> &weights,
+ IteratorPack &&iteratorPack)
+ : _tmd(tmd),
+ _weights(weights),
+ _termPos(weights.size()),
+ _cmpDocId(&_termPos[0]),
+ _data_space(),
+ _data_begin(nullptr),
+ _data_stash(nullptr),
+ _data_end(nullptr),
+ _children(std::move(iteratorPack))
+ {
+ HEAP::require_left_heap();
+ assert(_weights.size() > 0);
+ assert(_weights.size() == _children.size());
+ _data_space.reserve(_weights.size());
+ for (size_t i = 0; i < weights.size(); ++i) {
+ _data_space.push_back(i);
+ }
+ _data_begin = &_data_space[0];
+ _data_end = _data_begin + _data_space.size();
+ }
+
+ void doSeek(uint32_t docId) override {
+ while (_data_stash < _data_end) {
+ seek_child(*_data_stash, docId);
+ HEAP::push(_data_begin, ++_data_stash, _cmpDocId);
+ }
+ while (_termPos[HEAP::front(_data_begin, _data_stash)] < docId) {
+ seek_child(HEAP::front(_data_begin, _data_stash), docId);
+ HEAP::adjust(_data_begin, _data_stash, _cmpDocId);
+ }
+ setDocId(_termPos[HEAP::front(_data_begin, _data_stash)]);
+ }
+
+ void doUnpack(uint32_t docId) override {
+ feature_t score = 0.0;
+ while ((_data_begin < _data_stash) &&
+ _termPos[HEAP::front(_data_begin, _data_stash)] == docId)
+ {
+ HEAP::pop(_data_begin, _data_stash--, _cmpDocId);
+ const ref_t child = *_data_stash;
+ double tmp = _weights[child];
+ tmp *= _children.get_weight(child, docId);
+ score += tmp;
+ };
+ _tmd.setRawScore(docId, score);
+ }
+
+ void initRange(uint32_t begin, uint32_t end) override {
+ DotProductSearch::initRange(begin, end);
+ _children.initRange(begin, end);
+ for (size_t i = 0; i < _children.size(); ++i) {
+ _termPos[i] = _children.get_docid(i);
+ }
+ _data_stash = _data_begin;
+ while (_data_stash < _data_end) {
+ HEAP::push(_data_begin, ++_data_stash, _cmpDocId);
+ }
+ }
+ Trinary is_strict() const override { return Trinary::True; }
+
+ void visitMembers(vespalib::ObjectVisitor &) const override {}
+};
+
+//-----------------------------------------------------------------------------
+
+
+SearchIterator::UP
+DotProductSearch::create(const std::vector<SearchIterator*> &children,
+ search::fef::TermFieldMatchData &tmd,
+ const std::vector<fef::TermFieldMatchData*> &childMatch,
+ const std::vector<int32_t> &weights,
+ fef::MatchData::UP md)
+{
+ typedef DotProductSearchImpl<vespalib::LeftArrayHeap, SearchIteratorPack> ArrayHeapImpl;
+ typedef DotProductSearchImpl<vespalib::LeftHeap, SearchIteratorPack> HeapImpl;
+
+ if (childMatch.size() < 128) {
+ return SearchIterator::UP(new ArrayHeapImpl(tmd, weights, SearchIteratorPack(children, childMatch, std::move(md))));
+ }
+ return SearchIterator::UP(new HeapImpl(tmd, weights, SearchIteratorPack(children, childMatch, std::move(md))));
+}
+
+//-----------------------------------------------------------------------------
+
+SearchIterator::UP
+DotProductSearch::create(search::fef::TermFieldMatchData &tmd,
+ const std::vector<int32_t> &weights,
+ std::vector<DocumentWeightIterator> &&iterators)
+{
+ typedef DotProductSearchImpl<vespalib::LeftArrayHeap, AttributeIteratorPack> ArrayHeapImpl;
+ typedef DotProductSearchImpl<vespalib::LeftHeap, AttributeIteratorPack> HeapImpl;
+
+ if (iterators.size() < 128) {
+ return SearchIterator::UP(new ArrayHeapImpl(tmd, weights, AttributeIteratorPack(std::move(iterators))));
+ }
+ return SearchIterator::UP(new HeapImpl(tmd, weights, AttributeIteratorPack(std::move(iterators))));
+}
+
+//-----------------------------------------------------------------------------
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h
new file mode 100644
index 00000000000..d5503f7baa5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "multisearch.h"
+#include <vespa/vespalib/util/priority_queue.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/attribute/iterator_pack.h>
+#include <memory>
+#include <vector>
+
+namespace search {
+namespace fef {
+class TermFieldMatchData;
+} // namespace fef
+
+namespace queryeval {
+
+/**
+ * Search iterator for a sparse dot product, based on a set of child
+ * search iterators.
+ *
+ * This class is a base class for a set of different instantiations of
+ * DotProductSearchImpl, defined in the .cpp-file.
+ */
+class DotProductSearch : public SearchIterator
+{
+protected:
+ DotProductSearch() {}
+
+public:
+ static SearchIterator::UP create(const std::vector<SearchIterator*> &children,
+ search::fef::TermFieldMatchData &tmd,
+ const std::vector<fef::TermFieldMatchData*> &childMatch,
+ const std::vector<int32_t> &weights,
+ fef::MatchData::UP md);
+
+ static SearchIterator::UP create(search::fef::TermFieldMatchData &tmd,
+ const std::vector<int32_t> &weights,
+ std::vector<DocumentWeightIterator> &&iterators);
+};
+
+} // namespace search::queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/emptysearch.cpp b/searchlib/src/vespa/searchlib/queryeval/emptysearch.cpp
new file mode 100644
index 00000000000..b500e977848
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/emptysearch.cpp
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "emptysearch.h"
+
+namespace search {
+namespace queryeval {
+
+void
+EmptySearch::doSeek(uint32_t)
+{
+}
+
+void
+EmptySearch::doUnpack(uint32_t)
+{
+}
+
+EmptySearch::EmptySearch()
+ : SearchIterator()
+{
+}
+
+EmptySearch::~EmptySearch()
+{
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/emptysearch.h b/searchlib/src/vespa/searchlib/queryeval/emptysearch.h
new file mode 100644
index 00000000000..45b60e08468
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/emptysearch.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "searchiterator.h"
+
+namespace search {
+namespace queryeval {
+
+class EmptySearch : public SearchIterator
+{
+protected:
+ void doSeek(uint32_t) override;
+ void doUnpack(uint32_t) override;
+ void initRange(uint32_t begin, uint32_t end) override {
+ SearchIterator::initRange(begin, end);
+ setAtEnd();
+ }
+
+public:
+ EmptySearch();
+ ~EmptySearch();
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp
new file mode 100644
index 00000000000..e61fd77918c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "equiv_blueprint.h"
+#include "equivsearch.h"
+
+namespace search {
+namespace queryeval {
+
+EquivBlueprint::EquivBlueprint(const FieldSpecBaseList &fields,
+ fef::MatchDataLayout subtree_mdl)
+ : ComplexLeafBlueprint(fields),
+ _fields(fields),
+ _estimate(),
+ _layout(subtree_mdl),
+ _terms(),
+ _exactness()
+{
+}
+
+EquivBlueprint::~EquivBlueprint()
+{
+}
+
+SearchIterator::UP
+EquivBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &outputs,
+ bool strict) const
+{
+ fef::MatchData::UP md = _layout.createMatchData();
+ MultiSearch::Children children(_terms.size());
+ search::fef::TermMatchDataMerger::Inputs childMatch;
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ const State &childState = _terms[i]->getState();
+ for (size_t j = 0; j < childState.numFields(); ++j) {
+ childMatch.emplace_back(childState.field(j).resolve(*md), _exactness[i]);
+ }
+ children[i] = _terms[i]->createSearch(*md, strict).release();
+ }
+ return SearchIterator::UP(EquivSearch::create(children, std::move(md), childMatch, outputs, strict));
+}
+
+void
+EquivBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ LeafBlueprint::visitMembers(visitor);
+ visit(visitor, "terms", _terms);
+}
+
+void
+EquivBlueprint::fetchPostings(bool strict)
+{
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ _terms[i]->fetchPostings(strict);
+ }
+}
+
+EquivBlueprint&
+EquivBlueprint::addTerm(Blueprint::UP term, double exactness)
+{
+ const State &childState = term->getState();
+
+ HitEstimate childEst = childState.estimate();
+ if (_terms.empty() || _estimate < childEst ) {
+ _estimate = childEst;
+ }
+ setEstimate(_estimate);
+ _terms.push_back(std::move(term));
+ _exactness.push_back(exactness);
+ return *this;
+}
+
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.h
new file mode 100644
index 00000000000..818257df7a1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.h
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "blueprint.h"
+#include <vespa/searchlib/fef/matchdatalayout.h>
+
+namespace search {
+namespace queryeval {
+
+class EquivBlueprint : public ComplexLeafBlueprint
+{
+private:
+ FieldSpecBaseList _fields;
+ HitEstimate _estimate;
+ fef::MatchDataLayout _layout;
+ std::vector<Blueprint::UP> _terms;
+ std::vector<double> _exactness;
+
+public:
+ EquivBlueprint(const FieldSpecBaseList &fields, fef::MatchDataLayout subtree_mdl);
+ virtual ~EquivBlueprint();
+
+ // used by create visitor
+ EquivBlueprint& addTerm(Blueprint::UP term, double exactness);
+
+ virtual SearchIterator::UP
+ createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda,
+ bool strict) const;
+
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ virtual void fetchPostings(bool strict);
+};
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/equivsearch.cpp b/searchlib/src/vespa/searchlib/queryeval/equivsearch.cpp
new file mode 100644
index 00000000000..841466d3e67
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/equivsearch.cpp
@@ -0,0 +1,72 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "equivsearch.h"
+#include <vespa/vespalib/objects/visit.h>
+#include <vespa/searchlib/fef/termmatchdatamerger.h>
+
+namespace search {
+namespace queryeval {
+
+template <bool strict>
+class EquivImpl : public OrLikeSearch<strict, NoUnpack>
+{
+private:
+ fef::MatchData::UP _inputMatchData;
+ fef::TermMatchDataMerger _merger;
+ bool _valid;
+
+protected:
+ virtual void doUnpack(uint32_t docid);
+
+public:
+ /**
+ * Create a new Equiv Search with the given children.
+ *
+ * @param children the search objects that should be equivalent
+ **/
+ EquivImpl(const MultiSearch::Children &children,
+ fef::MatchData::UP inputMatchData,
+ const search::fef::TermMatchDataMerger::Inputs &inputs,
+ const fef::TermFieldMatchDataArray &outputs);
+};
+
+template<bool strict>
+EquivImpl<strict>::EquivImpl(const MultiSearch::Children &children,
+ fef::MatchData::UP inputMatchData,
+ const search::fef::TermMatchDataMerger::Inputs &inputs,
+ const search::fef::TermFieldMatchDataArray &outputs)
+
+ : OrLikeSearch<strict, NoUnpack>(children, NoUnpack()),
+ _inputMatchData(std::move(inputMatchData)),
+ _merger(inputs, outputs),
+ _valid(outputs.valid())
+{
+}
+
+template<bool strict>
+void
+EquivImpl<strict>::doUnpack(uint32_t docid)
+{
+ if (_valid) {
+ MultiSearch::doUnpack(docid);
+ _merger.merge(docid);
+ }
+}
+
+SearchIterator *
+EquivSearch::create(const Children &children,
+ fef::MatchData::UP inputMatchData,
+ const search::fef::TermMatchDataMerger::Inputs &inputs,
+ const search::fef::TermFieldMatchDataArray &outputs,
+ bool strict)
+{
+ if (strict) {
+ return new EquivImpl<true>(children, std::move(inputMatchData), inputs, outputs);
+ } else {
+ return new EquivImpl<false>(children, std::move(inputMatchData), inputs, outputs);
+ }
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/equivsearch.h b/searchlib/src/vespa/searchlib/queryeval/equivsearch.h
new file mode 100644
index 00000000000..b96117a946a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/equivsearch.h
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "orlikesearch.h"
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/fef/termmatchdatamerger.h>
+
+namespace search {
+namespace queryeval {
+
+/**
+ * A simple implementation of the Equiv search operation.
+ **/
+class EquivSearch : public SearchIterator
+{
+public:
+ typedef MultiSearch::Children Children;
+
+ // Caller takes ownership of the returned SearchIterator.
+ static SearchIterator *create(const Children &children,
+ fef::MatchData::UP inputMD,
+ const search::fef::TermMatchDataMerger::Inputs &inputs,
+ const search::fef::TermFieldMatchDataArray &outputs,
+ bool strict);
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp
new file mode 100644
index 00000000000..7dc2fd3869b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp
@@ -0,0 +1,15 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+
+namespace search {
+namespace queryeval {
+
+FakeRequestContext::FakeRequestContext(attribute::IAttributeContext * context, fastos::TimeStamp doom_in) :
+ _clock(),
+ _doom(_clock, doom_in),
+ _attributeContext(context)
+{ }
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h
new file mode 100644
index 00000000000..9807d2310f3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/queryeval/irequestcontext.h>
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <limits>
+
+namespace search {
+namespace queryeval {
+
+class FakeRequestContext : public IRequestContext
+{
+public:
+ FakeRequestContext(attribute::IAttributeContext * context = nullptr, fastos::TimeStamp doom=std::numeric_limits<int64_t>::max());
+ const vespalib::Doom & getDoom() const override { return _doom; }
+ const AttributeVector * getAttribute(const vespalib::string & name) const override {
+ return _attributeContext
+ ? dynamic_cast<const AttributeVector *>(_attributeContext->getAttribute(name))
+ : nullptr;
+ }
+ const AttributeVector * getAttributeStableEnum(const vespalib::string & name) const override {
+ return _attributeContext
+ ? dynamic_cast<const AttributeVector *>(_attributeContext->getAttribute(name))
+ : nullptr;
+ }
+private:
+ vespalib::Clock _clock;
+ const vespalib::Doom _doom;
+ attribute::IAttributeContext * _attributeContext;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_result.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_result.cpp
new file mode 100644
index 00000000000..6c152376803
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_result.cpp
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fakeresult");
+#include "fake_result.h"
+
+namespace search {
+namespace queryeval {
+
+std::ostream &operator << (std::ostream &out, const FakeResult &result) {
+ const std::vector<FakeResult::Document> &doc = result.inspect();
+ if (doc.size() == 0) {
+ out << std::endl << "empty" << std::endl;
+ } else {
+ out << std::endl;
+ for (size_t d = 0; d < doc.size(); ++d) {
+ out << "{ DOC id: " << doc[d].docId << " }" << std::endl;
+
+ const std::vector<FakeResult::Element> &elem = doc[d].elements;
+ for (size_t e = 0; e < elem.size(); ++e) {
+ out << " ( ELEM id: " << elem[e].id
+ << " weight: " << elem[e].weight
+ << " len: " << elem[e].length
+ << " )" << std::endl;
+
+ const std::vector<uint32_t> &pos = elem[e].positions;
+ for (size_t p = 0; p < pos.size(); ++p) {
+ out << " [ OCC pos: " << pos[p] << " ]" << std::endl;
+ }
+ }
+ out << " ( RAW score: " << doc[d].rawScore << " )" << std::endl;
+ }
+ }
+ return out;
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_result.h b/searchlib/src/vespa/searchlib/queryeval/fake_result.h
new file mode 100644
index 00000000000..d47cbcf8763
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_result.h
@@ -0,0 +1,108 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "posting_info.h"
+#include <vespa/searchlib/common/feature.h>
+#include <vespa/searchlib/common/fslimits.h>
+#include <vector>
+
+namespace search {
+namespace queryeval {
+
+class FakeResult
+{
+public:
+ struct Element {
+ uint32_t id;
+ int32_t weight;
+ uint32_t length;
+ std::vector<uint32_t> positions;
+ Element(uint32_t id_) : id(id_), weight(1),
+ length(SEARCHLIB_FEF_UNKNOWN_FIELD_LENGTH),
+ positions() {}
+ bool operator==(const Element &rhs) const {
+ return (id == rhs.id &&
+ weight == rhs.weight &&
+ length == rhs.length &&
+ positions == rhs.positions);
+ }
+ };
+
+ struct Document {
+ uint32_t docId;
+ std::vector<Element> elements;
+ feature_t rawScore;
+ Document(uint32_t id) : docId(id), elements(), rawScore(0) {}
+ bool operator==(const Document &rhs) const {
+ return (docId == rhs.docId &&
+ elements == rhs.elements &&
+ rawScore == rhs.rawScore);
+ }
+ };
+
+private:
+ std::vector<Document> _documents;
+ MinMaxPostingInfo::SP _minMaxPostingInfo;
+
+public:
+ FakeResult() : _documents(), _minMaxPostingInfo() {}
+
+ FakeResult &doc(uint32_t docId) {
+ _documents.push_back(Document(docId));
+ return *this;
+ }
+
+ FakeResult &elem(uint32_t id) {
+ _documents.back().elements.push_back(Element(id));
+ return *this;
+ }
+
+ FakeResult &score(feature_t s) {
+ _documents.back().rawScore = s;
+ return *this;
+ }
+
+ FakeResult &len(uint32_t length) {
+ if (_documents.back().elements.empty()) {
+ elem(0);
+ }
+ _documents.back().elements.back().length = length;
+ return *this;
+ }
+
+ FakeResult &weight(uint32_t w) {
+ if (_documents.back().elements.empty()) {
+ elem(0);
+ }
+ _documents.back().elements.back().weight = w;
+ return *this;
+ }
+
+ FakeResult &pos(uint32_t p) {
+ if (_documents.back().elements.empty()) {
+ elem(0);
+ }
+ _documents.back().elements.back().positions.push_back(p);
+ return *this;
+ }
+
+ FakeResult &minMax(int32_t minWeight, int32_t maxWeight) {
+ _minMaxPostingInfo.reset(new MinMaxPostingInfo(minWeight, maxWeight));
+ return *this;
+ }
+
+ bool operator==(const FakeResult &rhs) const {
+ return _documents == rhs._documents;
+ }
+
+ const std::vector<Document> &inspect() const { return _documents; }
+
+ const PostingInfo *postingInfo() const { return _minMaxPostingInfo.get(); }
+};
+
+std::ostream &operator << (std::ostream &out, const FakeResult &result);
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp
new file mode 100644
index 00000000000..85946e1a758
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp
@@ -0,0 +1,56 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fakesearch");
+#include <vespa/vespalib/objects/visit.h>
+#include "fake_search.h"
+#include <vespa/searchlib/fef/termfieldmatchdataposition.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+
+namespace search {
+namespace queryeval {
+
+void
+FakeSearch::doSeek(uint32_t docid)
+{
+ while (valid() && docid > currId()) {
+ next();
+ }
+ if (valid()) {
+ setDocId(currId());
+ } else {
+ setAtEnd();
+ }
+}
+
+void
+FakeSearch::doUnpack(uint32_t docid)
+{
+ typedef fef::TermFieldMatchDataPosition PosCtx;
+ typedef FakeResult::Document Doc;
+ typedef FakeResult::Element Elem;
+
+ assert(valid());
+ const Doc &doc = _result.inspect()[_offset];
+ assert(doc.docId == docid);
+ _tfmda[0]->reset(docid);
+ for (uint32_t i = 0; i < doc.elements.size(); ++i) {
+ const Elem &elem =doc.elements[i];
+ for (uint32_t j = 0; j < elem.positions.size(); ++j) {
+ _tfmda[0]->appendPosition(PosCtx(elem.id, elem.positions[j],
+ elem.weight, elem.length));
+ }
+ }
+}
+
+void
+FakeSearch::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "tag", _tag);
+ visit(visitor, "field", _field);
+ visit(visitor, "term", _term);
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_search.h b/searchlib/src/vespa/searchlib/queryeval/fake_search.h
new file mode 100644
index 00000000000..b360cdd76e7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_search.h
@@ -0,0 +1,45 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "searchiterator.h"
+#include "fake_result.h"
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+
+namespace search {
+namespace queryeval {
+
+class FakeSearch : public SearchIterator
+{
+private:
+ vespalib::string _tag;
+ vespalib::string _field;
+ vespalib::string _term;
+ FakeResult _result;
+ uint32_t _offset;
+ fef::TermFieldMatchDataArray _tfmda;
+
+ bool valid() const { return _offset < _result.inspect().size(); }
+ uint32_t currId() const { return _result.inspect()[_offset].docId; }
+ void next() { ++_offset; }
+
+public:
+ FakeSearch(const vespalib::string &tag,
+ const vespalib::string &field,
+ const vespalib::string &term,
+ const FakeResult &res,
+ const fef::TermFieldMatchDataArray &tfmda)
+ : _tag(tag), _field(field), _term(term),
+ _result(res), _offset(0), _tfmda(tfmda)
+ {
+ assert(_tfmda.size() == 1);
+ }
+ virtual void doSeek(uint32_t docid);
+ virtual void doUnpack(uint32_t docid);
+ virtual const PostingInfo *getPostingInfo() const { return _result.postingInfo(); }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp
new file mode 100644
index 00000000000..c9f088f9039
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp
@@ -0,0 +1,108 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.queryeval.fake_searchable");
+
+#include "fake_searchable.h"
+#include "leaf_blueprints.h"
+#include "termasstring.h"
+
+#include "create_blueprint_visitor_helper.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/vespalib/objects/visit.h>
+
+using search::query::NumberTerm;
+using search::query::LocationTerm;
+using search::query::Node;
+using search::query::PredicateQuery;
+using search::query::PrefixTerm;
+using search::query::RangeTerm;
+using search::query::RegExpTerm;
+using search::query::StringTerm;
+using search::query::SubstringTerm;
+using search::query::SuffixTerm;
+
+namespace search {
+namespace queryeval {
+
+FakeSearchable::FakeSearchable()
+ : _tag("<undef>"),
+ _map()
+{
+}
+
+FakeSearchable &
+FakeSearchable::addResult(const vespalib::string &field,
+ const vespalib::string &term,
+ const FakeResult &result)
+{
+ _map[Key(field, term)] = result;
+ return *this;
+}
+
+namespace {
+
+/**
+ * Determines the correct LookupResult to use.
+ **/
+template <class Map>
+class LookupVisitor : public CreateBlueprintVisitorHelper
+{
+ const Map &_map;
+ const vespalib::string _tag;
+
+public:
+ LookupVisitor(Searchable &searchable,
+ const IRequestContext & requestContext,
+ const Map &map, const vespalib::string &tag,
+ const FieldSpec &field)
+ : CreateBlueprintVisitorHelper(searchable, field, requestContext),
+ _map(map), _tag(tag) {}
+
+ template <class TermNode>
+ void visitTerm(TermNode &n) {
+ const vespalib::string term_string = termAsString(n);
+
+ FakeResult result;
+ typename Map::const_iterator pos =
+ _map.find(typename Map::key_type(getField().getName(), term_string));
+ if (pos != _map.end()) {
+ result = pos->second;
+ }
+ FakeBlueprint *fake = new FakeBlueprint(getField(), result);
+ Blueprint::UP b(fake);
+ fake->tag(_tag).term(term_string);
+ setResult(std::move(b));
+ }
+
+ virtual void visit(NumberTerm &n) { visitTerm(n); }
+ virtual void visit(LocationTerm &n) { visitTerm(n); }
+ virtual void visit(PrefixTerm &n) { visitTerm(n); }
+ virtual void visit(RangeTerm &n) { visitTerm(n); }
+ virtual void visit(StringTerm &n) { visitTerm(n); }
+ virtual void visit(SubstringTerm &n) { visitTerm(n); }
+ virtual void visit(SuffixTerm &n) { visitTerm(n); }
+ virtual void visit(PredicateQuery &n) { visitTerm(n); }
+ virtual void visit(RegExpTerm &n) { visitTerm(n); }
+};
+
+} // namespace search::queryeval::<unnamed>
+
+Blueprint::UP
+FakeSearchable::createBlueprint(const IRequestContext & requestContext,
+ const FieldSpec &field,
+ const search::query::Node &term)
+{
+ LookupVisitor<Map> visitor(*this, requestContext, _map, _tag, field);
+ const_cast<Node &>(term).accept(visitor);
+ return visitor.getResult();
+}
+
+FakeSearchable::~FakeSearchable()
+{
+}
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_searchable.h b/searchlib/src/vespa/searchlib/queryeval/fake_searchable.h
new file mode 100644
index 00000000000..26a8258713d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_searchable.h
@@ -0,0 +1,68 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "searchable.h"
+#include "fake_result.h"
+
+#include <string>
+#include <map>
+
+namespace search {
+namespace queryeval {
+
+/**
+ * A fake Searchable implementation.
+ **/
+class FakeSearchable : public Searchable
+{
+private:
+ typedef std::pair<vespalib::string, vespalib::string> Key;
+ typedef FakeResult Value;
+ typedef std::map<Key, Value> Map;
+
+ vespalib::string _tag;
+ Map _map;
+
+public:
+ /**
+ * Create an initially empty fake searchable.
+ **/
+ FakeSearchable();
+
+ /**
+ * Tag this searchable with a string value that will be visible
+ * when dumping search iterators created from it.
+ *
+ * @return this object for chaining
+ * @param t tag
+ **/
+ FakeSearchable &tag(const vespalib::string &t) {
+ _tag = t;
+ return *this;
+ }
+
+ /**
+ * Add a fake result to be returned for lookup on the given field
+ * and term combination.
+ *
+ * @return this object for chaining
+ * @param field field name
+ * @param term search term in string form
+ * @param result the fake result
+ **/
+ FakeSearchable &addResult(const vespalib::string &field,
+ const vespalib::string &term,
+ const FakeResult &result);
+
+ // inherited from Searchable
+ using Searchable::createBlueprint;
+ virtual Blueprint::UP createBlueprint(const IRequestContext & requestContext,
+ const FieldSpec &field,
+ const search::query::Node &term);
+ virtual ~FakeSearchable();
+};
+
+} // namespace search::queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp b/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp
new file mode 100644
index 00000000000..60cdefab2c1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fieldspec");
+#include "field_spec.h"
+
+namespace search {
+namespace queryeval {
+
+FieldSpecBase::FieldSpecBase(uint32_t fieldId, fef::TermFieldHandle handle, bool isFilter_) :
+ _fieldId(fieldId | (isFilter_ ? 0x1000000u : 0)),
+ _handle(handle)
+{
+ assert(fieldId < 0x1000000); // Can be represented by 24 bits
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/field_spec.h b/searchlib/src/vespa/searchlib/queryeval/field_spec.h
new file mode 100644
index 00000000000..b652b7b676a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/field_spec.h
@@ -0,0 +1,119 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/handle.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vector>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+
+namespace queryeval {
+
+
+/**
+ * Base description of a single field to be searched.
+ **/
+class FieldSpecBase
+{
+public:
+ FieldSpecBase(uint32_t fieldId, fef::TermFieldHandle handle, bool isFilter_ = false);
+
+ // resolve where to put match information for this term/field combination
+ search::fef::TermFieldMatchData *resolve(search::fef::MatchData &md) const {
+ return md.resolveTermField(getHandle());
+ }
+ const search::fef::TermFieldMatchData *resolve(const search::fef::MatchData &md) const {
+ return md.resolveTermField(getHandle());
+ }
+ uint32_t getFieldId() const { return _fieldId & 0xffffff; }
+ fef::TermFieldHandle getHandle() const { return _handle; }
+ /// a filter produces less detailed match data
+ bool isFilter() const { return _fieldId & 0x1000000; }
+private:
+ uint32_t _fieldId; // field id in ranking framework
+ fef::TermFieldHandle _handle; // handle used when exposing match data to ranking framework
+};
+
+/**
+ * Description of a single field to be searched.
+ **/
+class FieldSpec : public FieldSpecBase
+{
+public:
+ FieldSpec(const vespalib::string & name, uint32_t fieldId,
+ fef::TermFieldHandle handle, bool isFilter_ = false)
+ : FieldSpecBase(fieldId, handle, isFilter_),
+ _name(name)
+ {}
+
+ // resolve where to put match information for this term/field combination
+ search::fef::TermFieldMatchData *resolve(search::fef::MatchData &md) const {
+ return md.resolveTermField(getHandle());
+ }
+ const vespalib::string & getName() const { return _name; }
+private:
+ vespalib::string _name; // field name
+};
+
+/**
+ * List of fields to be searched.
+ **/
+class FieldSpecBaseList
+{
+private:
+ std::vector<FieldSpecBase> _list;
+
+public:
+ FieldSpecBaseList &add(const FieldSpecBase &spec) {
+ _list.push_back(spec);
+ return *this;
+ }
+ bool empty() const {
+ return _list.empty();
+ }
+ size_t size() const {
+ return _list.size();
+ }
+ const FieldSpecBase &operator[](size_t i) const {
+ return _list[i];
+ }
+ void clear() { _list.clear(); }
+
+ void swap(FieldSpecBaseList & rhs) {
+ _list.swap(rhs._list);
+ }
+};
+
+/**
+ * List of fields to be searched.
+ **/
+class FieldSpecList
+{
+private:
+ std::vector<FieldSpec> _list;
+
+public:
+ FieldSpecList &add(const FieldSpec &spec) {
+ _list.push_back(spec);
+ return *this;
+ }
+ bool empty() const {
+ return _list.empty();
+ }
+ size_t size() const {
+ return _list.size();
+ }
+ const FieldSpec &operator[](size_t i) const {
+ return _list[i];
+ }
+ void clear() { _list.clear(); }
+ void swap(FieldSpecList & rhs) {
+ _list.swap(rhs._list);
+ }
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.cpp b/searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.cpp
new file mode 100644
index 00000000000..544240daeff
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.cpp
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".get_weight_from_node");
+
+#include "get_weight_from_node.h"
+#include <vespa/searchlib/query/tree/intermediatenodes.h>
+#include <vespa/searchlib/query/tree/node.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/tree/templatetermvisitor.h>
+#include <vespa/searchlib/query/tree/termnodes.h>
+
+using search::query::Node;
+using search::query::SimpleQueryNodeTypes;
+using search::query::TemplateTermVisitor;
+using search::query::Weight;
+
+namespace search {
+namespace queryeval {
+namespace {
+
+struct WeightExtractor : public TemplateTermVisitor<WeightExtractor,
+ SimpleQueryNodeTypes> {
+ Weight weight;
+
+ WeightExtractor() : weight(0) {}
+
+ template <class TermType> void visitTerm(TermType &n) {
+ weight = n.getWeight();
+ }
+
+ // Treat Equiv nodes as terms.
+ virtual void visit(search::query::Equiv &n) { visitTerm(n); }
+};
+
+} // namespace search::queryeval::<unnamed>
+
+Weight
+getWeightFromNode(const Node &node)
+{
+ WeightExtractor extractor;
+ const_cast<Node &>(node).accept(extractor);
+ return extractor.weight;
+}
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.h b/searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.h
new file mode 100644
index 00000000000..6634fc474fb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.h
@@ -0,0 +1,15 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/query/weight.h>
+
+namespace search {
+namespace query { class Node; }
+namespace queryeval {
+
+search::query::Weight getWeightFromNode(const search::query::Node &node);
+
+} // namespace search::queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
new file mode 100644
index 00000000000..95d98e48727
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
@@ -0,0 +1,313 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "hitcollector.h"
+#include "scores.h"
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/common/sort.h>
+#include <limits>
+
+namespace search {
+namespace queryeval {
+
+void
+HitCollector::sortHitsByScore(size_t topn)
+{
+ topn = std::min(topn, _hits.size());
+ if (topn > _scoreOrder.size()) {
+ _scoreOrder.clear();
+ _scoreOrder.reserve(_hits.size());
+ for (size_t i(0); i < _hits.size(); i++) {
+ _scoreOrder.push_back(i);
+ }
+ ShiftBasedRadixSorter<uint32_t, IndirectScoreRadix, IndirectScoreComparator, 56, true>::
+ radix_sort(IndirectScoreRadix(&_hits[0]), IndirectScoreComparator(&_hits[0]), &_scoreOrder[0], _scoreOrder.size(), 16, topn);
+ _scoreOrder.resize(topn);
+ }
+}
+
+void
+HitCollector::sortHitsByDocId()
+{
+ if (_hitsSortOrder != SortOrder::DOC_ID) {
+ ShiftBasedRadixSorter<Hit, DocIdRadix, DocIdComparator, 24>::
+ radix_sort(DocIdRadix(), DocIdComparator(), &_hits[0], _hits.size(), 16);
+ _hitsSortOrder = SortOrder::DOC_ID;
+ _scoreOrder.clear();
+ }
+}
+
+HitCollector::HitCollector(uint32_t numDocs,
+ uint32_t maxHitsSize,
+ uint32_t maxReRankHitsSize)
+ : _numDocs(numDocs),
+ _maxHitsSize(maxHitsSize),
+ _maxReRankHitsSize(maxReRankHitsSize),
+ _maxDocIdVectorSize((numDocs + 31) / 32),
+ _hits(),
+ _hitsSortOrder(SortOrder::DOC_ID),
+ _docIdVector(),
+ _bitVector(),
+ _reRankedHits(),
+ _scale(1.0),
+ _adjust(0),
+ _hasReRanked(false),
+ _needReScore(false)
+{
+ if (_maxHitsSize > 0) {
+ _collector.reset(new RankedHitCollector(*this));
+ } else {
+ _collector.reset(new DocIdCollector<false>(*this));
+ }
+ _hits.reserve(maxHitsSize);
+}
+
+HitCollector::~HitCollector()
+{
+}
+
+void
+HitCollector::RankedHitCollector::collect(uint32_t docId, feature_t score)
+{
+ HitCollector & hc = this->_hc;
+ if (hc._hits.size() < hc._maxHitsSize) {
+ if (__builtin_expect(((hc._hits.size() > 0) &&
+ (docId < hc._hits.back().first) &&
+ (hc._hitsSortOrder == SortOrder::DOC_ID)), false))
+ {
+ hc._hitsSortOrder = SortOrder::NONE;
+ }
+ hc._hits.push_back(std::make_pair(docId, score));
+ } else {
+ collectAndChangeCollector(docId, score);
+ }
+}
+
+void
+HitCollector::RankedHitCollector::collectAndChangeCollector(uint32_t docId, feature_t score)
+{
+ HitCollector & hc = this->_hc;
+ Collector::UP newCollector;
+ if (hc._maxDocIdVectorSize > hc._maxHitsSize) {
+ // start using docid vector
+ hc._docIdVector.reserve(hc._maxDocIdVectorSize);
+ uint32_t iSize = hc._hits.size();
+ for (uint32_t i = 0; i < iSize; ++i) {
+ hc._docIdVector.push_back(hc._hits[i].first);
+ }
+ hc._docIdVector.push_back(docId);
+ newCollector.reset(new DocIdCollector<true>(hc));
+ } else {
+ // start using bit vector
+ hc._bitVector = BitVector::create(hc._numDocs);
+ hc._bitVector->invalidateCachedCount();
+ uint32_t iSize = hc._hits.size();
+ for (uint32_t i = 0; i < iSize; ++i) {
+ hc._bitVector->setBit(hc._hits[i].first);
+ }
+ hc._bitVector->setBit(docId);
+ newCollector.reset(new BitVectorCollector<true>(hc));
+ }
+ // treat hit vector as a heap
+ std::make_heap(hc._hits.begin(), hc._hits.end(), ScoreComparator());
+ hc._hitsSortOrder = SortOrder::HEAP;
+ this->considerForHitVector(docId, score);
+ hc._collector = std::move(newCollector);
+}
+
+template<bool CollectRankedHit>
+void
+HitCollector::DocIdCollector<CollectRankedHit>::collect(uint32_t docId, feature_t score)
+{
+ if (CollectRankedHit) {
+ this->considerForHitVector(docId, score);
+ }
+ HitCollector & hc = this->_hc;
+ if (hc._docIdVector.size() < hc._maxDocIdVectorSize) {
+ hc._docIdVector.push_back(docId);
+ } else {
+ collectAndChangeCollector(docId);
+ }
+}
+
+template<bool CollectRankedHit>
+void
+HitCollector::DocIdCollector<CollectRankedHit>::collectAndChangeCollector(uint32_t docId)
+{
+ HitCollector & hc = this->_hc;
+ // start using bit vector instead of docid array.
+ hc._bitVector = BitVector::create(hc._numDocs);
+ hc._bitVector->invalidateCachedCount();
+ uint32_t iSize = static_cast<uint32_t>(hc._docIdVector.size());
+ for (uint32_t i = 0; i < iSize; ++i) {
+ hc._bitVector->setBit(hc._docIdVector[i]);
+ }
+ std::vector<uint32_t> emptyVector;
+ emptyVector.swap(hc._docIdVector);
+ hc._bitVector->setBit(docId);
+ hc._collector.reset(new BitVectorCollector<CollectRankedHit>(hc)); // note - self-destruct.
+}
+
+std::vector<feature_t>
+HitCollector::getSortedHeapScores()
+{
+ std::vector<feature_t> scores;
+ size_t scoresToReturn = std::min(_hits.size(), static_cast<size_t>(_maxReRankHitsSize));
+ scores.reserve(scoresToReturn);
+ sortHitsByScore(scoresToReturn);
+ for (size_t i = 0; i < scoresToReturn; ++i) {
+ scores.push_back(_hits[_scoreOrder[i]].second);
+ }
+ return scores;
+}
+
+size_t
+HitCollector::reRank(DocumentScorer &scorer)
+{
+ return reRank(scorer, _maxReRankHitsSize);
+}
+
+size_t
+HitCollector::reRank(DocumentScorer &scorer, size_t count)
+{
+ size_t hitsToReRank = std::min(_hits.size(), count);
+ if (_hasReRanked || hitsToReRank == 0) {
+ return 0;
+ }
+ sortHitsByScore(hitsToReRank);
+ _reRankedHits.reserve(_reRankedHits.size() + hitsToReRank);
+ for (size_t i(0); i < hitsToReRank; i++) {
+ _reRankedHits.push_back(_hits[_scoreOrder[i]]);
+ }
+
+ Scores &initScores = _ranges.first;
+ Scores &finalScores = _ranges.second;
+ initScores = Scores(_reRankedHits.back().second,
+ _reRankedHits.front().second);
+ finalScores = Scores(std::numeric_limits<feature_t>::max(),
+ -std::numeric_limits<feature_t>::max());
+
+ std::sort(_reRankedHits.begin(), _reRankedHits.end()); // sort on docId
+ for (auto &hit : _reRankedHits) {
+ hit.second = scorer.score(hit.first);
+ finalScores.low = std::min(finalScores.low, hit.second);
+ finalScores.high = std::max(finalScores.high, hit.second);
+ }
+ _hasReRanked = true;
+ return hitsToReRank;
+}
+
+std::pair<Scores, Scores>
+HitCollector::getRanges() const
+{
+ return _ranges;
+}
+
+void
+HitCollector::setRanges(const std::pair<Scores, Scores> &ranges)
+{
+ _ranges = ranges;
+}
+
+namespace {
+
+void
+mergeHitsIntoResultSet(const std::vector<HitCollector::Hit> &hits, ResultSet &result)
+{
+ RankedHit *rhIter = result.getArray();
+ RankedHit *rhEnd = rhIter + result.getArrayUsed();
+ for (const auto &hit : hits) {
+ while (rhIter != rhEnd && rhIter->_docId != hit.first) {
+ // just set the iterators right
+ ++rhIter;
+ }
+ assert(rhIter != rhEnd); // the hits should be a subset of the hits in ranked hit array.
+ rhIter->_rankValue = hit.second;
+ }
+}
+
+}
+
+std::unique_ptr<ResultSet>
+HitCollector::getResultSet()
+{
+ Scores &initHeapScores = _ranges.first;
+ Scores &finalHeapScores = _ranges.second;
+ if (initHeapScores.low > finalHeapScores.low) {
+ // scale and adjust the score according to the range
+ // of the initial and final heap score values to avoid that
+ // a score from the first phase is larger than finalHeapScores.low
+ feature_t initRange = initHeapScores.high - initHeapScores.low;
+ if (initRange < 1.0) initRange = 1.0f;
+ feature_t finalRange = finalHeapScores.high - finalHeapScores.low;
+ if (finalRange < 1.0) finalRange = 1.0f;
+ _scale = finalRange / initRange;
+ _adjust = initHeapScores.low * _scale - finalHeapScores.low;
+ _needReScore = true;
+ }
+
+ // destroys the heap property or score sort order
+ sortHitsByDocId();
+
+ std::unique_ptr<ResultSet> rs(new ResultSet());
+ if ( ! _collector->isDocIdCollector() ) {
+ unsigned int iSize = _hits.size();
+ rs->allocArray(iSize);
+ RankedHit * rh = rs->getArray();
+ if (_needReScore) {
+ for (uint32_t i = 0; i < iSize; ++i) {
+ rh[i]._docId = _hits[i].first;
+ rh[i]._rankValue = getReScore(_hits[i].second);
+ }
+ } else {
+ for (uint32_t i = 0; i < iSize; ++i) {
+ rh[i]._docId = _hits[i].first;
+ rh[i]._rankValue = _hits[i].second;
+ }
+ }
+ rs->setArrayUsed(iSize);
+ } else {
+ unsigned int iSize = _hits.size();
+ unsigned int jSize = _docIdVector.size();
+ rs->allocArray(jSize);
+ RankedHit * rh = rs->getArray();
+ uint32_t i = 0;
+ if (_needReScore) {
+ for (uint32_t j = 0; j < jSize; ++j) {
+ uint32_t docId = _docIdVector[j];
+ rh[j]._docId = docId;
+ if (i < iSize && docId == _hits[i].first) {
+ rh[j]._rankValue = getReScore(_hits[i].second);
+ ++i;
+ } else {
+ rh[j]._rankValue = 0;
+ }
+ }
+ } else {
+ for (uint32_t j = 0; j < jSize; ++j) {
+ uint32_t docId = _docIdVector[j];
+ rh[j]._docId = docId;
+ if (i < iSize && docId == _hits[i].first) {
+ rh[j]._rankValue = _hits[i].second;
+ ++i;
+ } else {
+ rh[j]._rankValue = 0;
+ }
+ }
+ }
+ rs->setArrayUsed(jSize);
+ }
+
+ if (_hasReRanked) {
+ mergeHitsIntoResultSet(_reRankedHits, *rs.get());
+ }
+
+ if (_bitVector != NULL) {
+ rs->setBitOverflow(std::move(_bitVector));
+ }
+
+ return rs;
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
new file mode 100644
index 00000000000..78f71bdf81a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
@@ -0,0 +1,214 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "scores.h"
+#include <vespa/searchlib/common/hitrank.h>
+#include <vespa/searchlib/common/resultset.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/vespalib/util/sort.h>
+#include <algorithm>
+#include <vector>
+
+namespace search {
+
+namespace queryeval {
+
+/**
+ * This class is used to store all hits found during parallel query evaluation.
+ **/
+class HitCollector {
+public:
+ typedef std::pair<uint32_t, feature_t> Hit;
+
+ /**
+ * Interface used to calculate the second phase score for the documents being re-ranked.
+ */
+ struct DocumentScorer {
+ virtual ~DocumentScorer() {}
+ virtual feature_t score(uint32_t docId) = 0;
+ };
+
+private:
+ enum class SortOrder { NONE, DOC_ID, HEAP };
+
+ const uint32_t _numDocs;
+ const uint32_t _maxHitsSize;
+ const uint32_t _maxReRankHitsSize;
+ const uint32_t _maxDocIdVectorSize;
+
+ std::vector<Hit> _hits; // used as a heap when _hits.size == _maxHitsSize
+ std::vector<uint32_t> _scoreOrder; // Holds an indirection to the N best hits
+ SortOrder _hitsSortOrder;
+ std::vector<uint32_t> _docIdVector;
+ BitVector::UP _bitVector;
+ std::vector<Hit> _reRankedHits;
+
+ std::pair<Scores, Scores> _ranges;
+ feature_t _scale;
+ feature_t _adjust;
+
+ bool _hasReRanked;
+ bool _needReScore;
+
+ struct ScoreComparator {
+ bool operator() (const Hit & lhs, const Hit & rhs) const {
+ if (lhs.second == rhs.second) {
+ return (lhs.first < rhs.first);
+ }
+ return (lhs.second >= rhs.second); // comparator for min-heap
+ }
+ };
+
+ struct IndirectScoreComparator {
+ IndirectScoreComparator(const Hit * hits) : _hits(hits) { }
+ bool operator() (uint32_t lhs, uint32_t rhs) const {
+ if (_hits[lhs].second == _hits[rhs].second) {
+ return (_hits[lhs].first < _hits[rhs].first);
+ }
+ return (_hits[lhs].second >= _hits[rhs].second); // operator for min-heap
+ }
+ const Hit * _hits;
+ };
+
+ struct IndirectScoreRadix {
+ IndirectScoreRadix(const Hit * hits) : _hits(hits) { }
+ uint64_t operator () (uint32_t v) {
+ return vespalib::convertForSort<double, false>::convert(_hits[v].second);
+ }
+ const Hit * _hits;
+ };
+ struct DocIdRadix {
+ uint32_t operator () (const Hit & v) { return v.first; }
+ };
+ struct DocIdComparator {
+ bool operator() (const Hit & lhs, const Hit & rhs) const {
+ return (lhs.first < rhs.first);
+ }
+ };
+
+ class Collector {
+ public:
+ typedef std::unique_ptr<Collector> UP;
+ virtual ~Collector() {}
+ virtual void collect(uint32_t docId, feature_t score) = 0;
+ virtual bool isRankedHitCollector() const { return false; }
+ virtual bool isDocIdCollector() const { return false; }
+ };
+
+ Collector::UP _collector;
+
+ class CollectorBase : public Collector {
+ public:
+ CollectorBase(HitCollector &hc) : _hc(hc) { }
+ void considerForHitVector(uint32_t docId, feature_t score) {
+ if (__builtin_expect((score > _hc._hits[0].second), false)) {
+ replaceHitInVector(docId, score);
+ }
+ }
+ protected:
+ void replaceHitInVector(uint32_t docId, feature_t score) {
+ // replace lowest scored hit in hit vector
+ std::pop_heap(_hc._hits.begin(), _hc._hits.end(), ScoreComparator());
+ _hc._hits.back().first = docId;
+ _hc._hits.back().second = score;
+ std::push_heap(_hc._hits.begin(), _hc._hits.end(), ScoreComparator());
+ }
+ HitCollector &_hc;
+ };
+
+ class RankedHitCollector : public CollectorBase {
+ public:
+ RankedHitCollector(HitCollector &hc) : CollectorBase(hc) { }
+ virtual void collect(uint32_t docId, feature_t score);
+ void collectAndChangeCollector(uint32_t docId, feature_t score) __attribute__((noinline));
+ virtual bool isRankedHitCollector() const { return true; }
+ };
+
+ template <bool CollectRankedHit>
+ class DocIdCollector : public CollectorBase {
+ public:
+ DocIdCollector(HitCollector &hc) : CollectorBase(hc) { }
+ virtual void collect(uint32_t docId, feature_t score);
+ void collectAndChangeCollector(uint32_t docId) __attribute__((noinline));
+ virtual bool isDocIdCollector() const { return true; }
+ };
+
+ template <bool CollectRankedHit>
+ class BitVectorCollector : public CollectorBase {
+ public:
+ BitVectorCollector(HitCollector &hc) : CollectorBase(hc) { }
+ virtual void collect(uint32_t docId, feature_t score) {
+ this->_hc._bitVector->setBit(docId);
+ if (CollectRankedHit) {
+ this->considerForHitVector(docId, score);
+ }
+ }
+ };
+
+ HitRank getReScore(feature_t score) const {
+ return ((score * _scale) - _adjust);
+ }
+ VESPA_DLL_LOCAL void sortHitsByScore(size_t topn);
+ VESPA_DLL_LOCAL void sortHitsByDocId();
+
+public:
+ /**
+ * Creates a hit collector used to store hits for doc ids in the
+ * range [0, numDocs>. Doc id and rank score are stored for the n
+ * (=maxHitsSize) best hits. The best m (=maxReRankHitsSize) hits are
+ * candidates for re-ranking. Note that n >= m.
+ *
+ * @param numDocs
+ * @param maxHitsSize
+ * @param maxReRankHitsSize
+ **/
+ HitCollector(uint32_t numDocs, uint32_t maxHitsSize, uint32_t maxReRankHitsSize);
+ ~HitCollector();
+
+ /**
+ * Adds the given hit to this collector. Stores doc id and rank
+ * score if the given hit is among the n (=maxHitsSize) best hits.
+ * Stores only doc id if it is not among the n best hits.
+ *
+ * @param docId the doc id for the hit
+ * @param score the first phase rank score for the hit
+ **/
+ void addHit(uint32_t docId, feature_t score) {
+ _collector->collect(docId, score);
+ }
+
+ /**
+ * Returns a sorted vector of scores for the hits that are stored
+ * in the heap. These are the candidates for re-ranking.
+ */
+ std::vector<feature_t> getSortedHeapScores();
+
+ /**
+ * Re-ranks the m (=maxHeapSize) best hits by invoking the score()
+ * method on the given document scorer. The best m hits are sorted on doc id
+ * so that score() is called in doc id order.
+ **/
+ size_t reRank(DocumentScorer &scorer);
+ size_t reRank(DocumentScorer &scorer, size_t count);
+
+ std::pair<Scores, Scores> getRanges() const;
+ void setRanges(const std::pair<Scores, Scores> &ranges);
+
+ /**
+ * Returns a result set based on the content of this collector.
+ * Invoking this method will destroy the heap property of the
+ * ranked hits and the match data heap.
+ *
+ * @param auto pointer to the result set
+ **/
+ std::unique_ptr<ResultSet> getResultSet();
+
+private:
+ HitCollector(const HitCollector &); // Not implemented
+ HitCollector &operator=(const HitCollector &); // Not implemented
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
new file mode 100644
index 00000000000..4f9bf665796
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -0,0 +1,584 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".queryeval.intermediate_blueprints");
+#include "intermediate_blueprints.h"
+#include "andnotsearch.h"
+#include "andsearch.h"
+#include "orsearch.h"
+#include "nearsearch.h"
+#include "ranksearch.h"
+#include "sourceblendersearch.h"
+#include "equivsearch.h"
+#include "termwise_blueprint_helper.h"
+#include "termwise_search.h"
+#include <vespa/searchlib/queryeval/wand/weak_and_search.h>
+
+namespace search {
+namespace queryeval {
+
+//-----------------------------------------------------------------------------
+
+namespace {
+
+template <typename CombineType>
+size_t lookup_create_source(std::vector<std::unique_ptr<CombineType> > &sources, uint32_t child_source) {
+ for (size_t i = 0; i < sources.size(); ++i) {
+ if (sources[i]->getSourceId() == child_source) {
+ return i;
+ }
+ }
+ sources.push_back(std::unique_ptr<CombineType>(new CombineType()));
+ sources.back()->setSourceId(child_source);
+ return (sources.size() - 1);
+}
+
+template <typename CombineType>
+void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx) {
+ std::vector<size_t> source_blenders;
+ SourceBlenderBlueprint *reference = nullptr;
+ for (size_t i = begin_idx; i < self.childCnt(); ++i) {
+ SourceBlenderBlueprint *child = dynamic_cast<SourceBlenderBlueprint *>(&self.getChild(i));
+ if (child != nullptr) {
+ if (reference == nullptr || reference->isCompatibleWith(*child)) {
+ source_blenders.push_back(i);
+ reference = child;
+ }
+ }
+ }
+ if (source_blenders.size() > 1) { // maybe 2
+ Blueprint::UP blender_up;
+ std::vector<std::unique_ptr<CombineType> > sources;
+ while (!source_blenders.empty()) {
+ blender_up = self.removeChild(source_blenders.back());
+ source_blenders.pop_back();
+ SourceBlenderBlueprint *blender = dynamic_cast<SourceBlenderBlueprint *>(blender_up.get());
+ assert(blender != nullptr);
+ while (blender->childCnt() > 0) {
+ Blueprint::UP child_up = blender->removeChild(blender->childCnt() - 1);
+ size_t source_idx = lookup_create_source(sources, child_up->getSourceId());
+ sources[source_idx]->addChild(std::move(child_up));
+ }
+ }
+ SourceBlenderBlueprint *top = dynamic_cast<SourceBlenderBlueprint *>(blender_up.get());
+ assert(top != nullptr);
+ while (!sources.empty()) {
+ top->addChild(std::move(sources.back()));
+ sources.pop_back();
+ }
+ blender_up = Blueprint::optimize(std::move(blender_up));
+ self.addChild(std::move(blender_up));
+ }
+}
+
+} // namespace search::queryeval::<unnamed>
+
+//-----------------------------------------------------------------------------
+
+Blueprint::HitEstimate
+AndNotBlueprint::combine(const std::vector<HitEstimate> &data) const
+{
+ if (data.empty()) {
+ return HitEstimate();
+ }
+ return data[0];
+}
+
+FieldSpecBaseList
+AndNotBlueprint::exposeFields() const
+{
+ return FieldSpecBaseList();
+}
+
+void
+AndNotBlueprint::optimize_self()
+{
+ AndNotBlueprint *child = dynamic_cast<AndNotBlueprint *>(&getChild(0));
+ if (child != nullptr) {
+ while (child->childCnt() > 1) {
+ addChild(child->removeChild(1));
+ }
+ insertChild(1, child->removeChild(0));
+ removeChild(0);
+ }
+ for (size_t i = 1; i < childCnt(); ++i) {
+ if (getChild(i).getState().estimate().empty) {
+ removeChild(i--);
+ }
+ }
+ if (dynamic_cast<AndNotBlueprint *>(getParent()) == nullptr) {
+ optimize_source_blenders<OrBlueprint>(*this, 1);
+ }
+}
+
+Blueprint::UP
+AndNotBlueprint::get_replacement()
+{
+ if (childCnt() == 1) {
+ return removeChild(0);
+ }
+ return Blueprint::UP();
+}
+
+void
+AndNotBlueprint::sort(std::vector<Blueprint*> &children) const
+{
+ if (children.size() > 2) {
+ std::sort(children.begin() + 1, children.end(), GreaterEstimate());
+ }
+}
+
+bool
+AndNotBlueprint::inheritStrict(size_t i) const
+{
+ return (i == 0);
+}
+
+SearchIterator::UP
+AndNotBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData &md) const
+{
+ UnpackInfo unpackInfo(calculateUnpackInfo(md));
+ if (should_do_termwise_eval(unpackInfo, md.get_termwise_limit())) {
+ TermwiseBlueprintHelper helper(*this, subSearches, unpackInfo);
+ bool termwise_strict = (strict && inheritStrict(helper.first_termwise));
+ auto termwise_search = (helper.first_termwise == 0)
+ ? SearchIterator::UP(AndNotSearch::create(helper.termwise, termwise_strict))
+ : SearchIterator::UP(OrSearch::create(helper.termwise, termwise_strict));
+ helper.insert_termwise(std::move(termwise_search), termwise_strict);
+ if (helper.children.size() == 1) {
+ return SearchIterator::UP(helper.children.front());
+ }
+ return SearchIterator::UP(AndNotSearch::create(helper.children, strict));
+ }
+ return SearchIterator::UP(AndNotSearch::create(subSearches, strict));
+}
+
+//-----------------------------------------------------------------------------
+
+Blueprint::HitEstimate
+AndBlueprint::combine(const std::vector<HitEstimate> &data) const
+{
+ return min(data);
+}
+
+FieldSpecBaseList
+AndBlueprint::exposeFields() const
+{
+ return FieldSpecBaseList();
+}
+
+void
+AndBlueprint::optimize_self()
+{
+ for (size_t i = 0; i < childCnt(); ++i) {
+ AndBlueprint *child = dynamic_cast<AndBlueprint *>(&getChild(i));
+ if (child != nullptr) {
+ while (child->childCnt() > 0) {
+ addChild(child->removeChild(0));
+ }
+ removeChild(i--);
+ }
+ }
+ if (dynamic_cast<AndBlueprint *>(getParent()) == nullptr) {
+ optimize_source_blenders<AndBlueprint>(*this, 0);
+ }
+}
+
+Blueprint::UP
+AndBlueprint::get_replacement()
+{
+ if (childCnt() == 1) {
+ return removeChild(0);
+ }
+ return Blueprint::UP();
+}
+
+void
+AndBlueprint::sort(std::vector<Blueprint*> &children) const
+{
+ std::sort(children.begin(), children.end(), LessEstimate());
+}
+
+bool
+AndBlueprint::inheritStrict(size_t i) const
+{
+ return (i == 0);
+}
+
+SearchIterator::UP
+AndBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData & md) const
+{
+ UnpackInfo unpackInfo(calculateUnpackInfo(md));
+ AndSearch * search = 0;
+ if (should_do_termwise_eval(unpackInfo, md.get_termwise_limit())) {
+ TermwiseBlueprintHelper helper(*this, subSearches, unpackInfo);
+ bool termwise_strict = (strict && inheritStrict(helper.first_termwise));
+ auto termwise_search = SearchIterator::UP(AndSearch::create(helper.termwise, termwise_strict));
+ helper.insert_termwise(std::move(termwise_search), termwise_strict);
+ if (helper.children.size() == 1) {
+ return SearchIterator::UP(helper.children.front());
+ } else {
+ search = AndSearch::create(helper.children, strict, helper.termwise_unpack);
+ }
+ } else {
+ search = AndSearch::create(subSearches, strict, unpackInfo);
+ }
+ search->estimate(getState().estimate().estHits);
+ return SearchIterator::UP(search);
+}
+
+//-----------------------------------------------------------------------------
+
+Blueprint::HitEstimate
+OrBlueprint::combine(const std::vector<HitEstimate> &data) const
+{
+ return max(data);
+}
+
+FieldSpecBaseList
+OrBlueprint::exposeFields() const
+{
+ return mixChildrenFields();
+}
+
+void
+OrBlueprint::optimize_self()
+{
+ for (size_t i = 0; (childCnt() > 1) && (i < childCnt()); ++i) {
+ OrBlueprint *child = dynamic_cast<OrBlueprint *>(&getChild(i));
+ if (child != nullptr) {
+ while (child->childCnt() > 0) {
+ addChild(child->removeChild(0));
+ }
+ removeChild(i--);
+ } else if (getChild(i).getState().estimate().empty) {
+ removeChild(i--);
+ }
+ }
+ if (dynamic_cast<OrBlueprint *>(getParent()) == nullptr) {
+ optimize_source_blenders<OrBlueprint>(*this, 0);
+ }
+}
+
+Blueprint::UP
+OrBlueprint::get_replacement()
+{
+ if (childCnt() == 1) {
+ return removeChild(0);
+ }
+ return Blueprint::UP();
+}
+
+void
+OrBlueprint::sort(std::vector<Blueprint*> &children) const
+{
+ std::sort(children.begin(), children.end(), GreaterEstimate());
+}
+
+bool
+OrBlueprint::inheritStrict(size_t) const
+{
+ return true;
+}
+
+SearchIterator::UP
+OrBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData & md) const
+{
+ UnpackInfo unpackInfo(calculateUnpackInfo(md));
+ if (should_do_termwise_eval(unpackInfo, md.get_termwise_limit())) {
+ TermwiseBlueprintHelper helper(*this, subSearches, unpackInfo);
+ bool termwise_strict = (strict && inheritStrict(helper.first_termwise));
+ auto termwise_search = SearchIterator::UP(OrSearch::create(helper.termwise, termwise_strict));
+ helper.insert_termwise(std::move(termwise_search), termwise_strict);
+ if (helper.children.size() == 1) {
+ return SearchIterator::UP(helper.children.front());
+ }
+ return SearchIterator::UP(OrSearch::create(helper.children, strict, helper.termwise_unpack));
+ }
+ return SearchIterator::UP(OrSearch::create(subSearches, strict, unpackInfo));
+}
+
+//-----------------------------------------------------------------------------
+
+Blueprint::HitEstimate
+WeakAndBlueprint::combine(const std::vector<HitEstimate> &data) const
+{
+ HitEstimate childEst = max(data);
+ HitEstimate myEst(_n, false);
+ if (childEst < myEst) {
+ return childEst;
+ }
+ return myEst;
+}
+
+FieldSpecBaseList
+WeakAndBlueprint::exposeFields() const
+{
+ return FieldSpecBaseList();
+}
+
+void
+WeakAndBlueprint::sort(std::vector<Blueprint*> &) const
+{
+ // order needs to stay the same as _weights
+}
+
+bool
+WeakAndBlueprint::inheritStrict(size_t) const
+{
+ return true;
+}
+
+SearchIterator::UP
+WeakAndBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData &) const
+{
+ WeakAndSearch::Terms terms;
+ assert(subSearches.size() == childCnt());
+ assert(_weights.size() == childCnt());
+ for (size_t i = 0; i < subSearches.size(); ++i) {
+ terms.push_back(wand::Term(subSearches[i],
+ _weights[i],
+ getChild(i).getState().estimate().estHits));
+ }
+ return SearchIterator::UP(WeakAndSearch::create(terms, _n, strict));
+}
+
+//-----------------------------------------------------------------------------
+
+Blueprint::HitEstimate
+NearBlueprint::combine(const std::vector<HitEstimate> &data) const
+{
+ return min(data);
+}
+
+FieldSpecBaseList
+NearBlueprint::exposeFields() const
+{
+ return FieldSpecBaseList();
+}
+
+void
+NearBlueprint::sort(std::vector<Blueprint*> &children) const
+{
+ std::sort(children.begin(), children.end(), LessEstimate());
+}
+
+bool
+NearBlueprint::inheritStrict(size_t i) const
+{
+ return (i == 0);
+}
+
+SearchIterator::UP
+NearBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData &md) const
+{
+ search::fef::TermFieldMatchDataArray tfmda;
+ for (size_t i = 0; i < childCnt(); ++i) {
+ const State &cs = getChild(i).getState();
+ for (size_t j = 0; j < cs.numFields(); ++j) {
+ tfmda.add(cs.field(j).resolve(md));
+ }
+ }
+ return SearchIterator::UP(new NearSearch(subSearches, tfmda, _window, strict));
+}
+
+//-----------------------------------------------------------------------------
+
+Blueprint::HitEstimate
+ONearBlueprint::combine(const std::vector<HitEstimate> &data) const
+{
+ return min(data);
+}
+
+FieldSpecBaseList
+ONearBlueprint::exposeFields() const
+{
+ return FieldSpecBaseList();
+}
+
+void
+ONearBlueprint::sort(std::vector<Blueprint*> &children) const
+{
+ // ordered near cannot sort children here
+ (void)children;
+}
+
+bool
+ONearBlueprint::inheritStrict(size_t i) const
+{
+ return (i == 0);
+}
+
+SearchIterator::UP
+ONearBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData &md) const
+{
+ search::fef::TermFieldMatchDataArray tfmda;
+ for (size_t i = 0; i < childCnt(); ++i) {
+ const State &cs = getChild(i).getState();
+ for (size_t j = 0; j < cs.numFields(); ++j) {
+ tfmda.add(cs.field(j).resolve(md));
+ }
+ }
+ // could sort subSearches here
+ // but then strictness inheritance would also need to be fixed
+ return SearchIterator::UP(new ONearSearch(subSearches, tfmda, _window, strict));
+}
+
+//-----------------------------------------------------------------------------
+
+Blueprint::HitEstimate
+RankBlueprint::combine(const std::vector<HitEstimate> &data) const
+{
+ if (data.empty()) {
+ return HitEstimate();
+ }
+ return data[0];
+}
+
+FieldSpecBaseList
+RankBlueprint::exposeFields() const
+{
+ return FieldSpecBaseList();
+}
+
+void
+RankBlueprint::optimize_self()
+{
+ for (size_t i = 1; i < childCnt(); ++i) {
+ if (getChild(i).getState().estimate().empty) {
+ removeChild(i--);
+ }
+ }
+ optimize_source_blenders<OrBlueprint>(*this, 1);
+}
+
+Blueprint::UP
+RankBlueprint::get_replacement()
+{
+ if (childCnt() == 1) {
+ return removeChild(0);
+ }
+ return Blueprint::UP();
+}
+
+void
+RankBlueprint::sort(std::vector<Blueprint*> &children) const
+{
+ (void)children;
+}
+
+bool
+RankBlueprint::inheritStrict(size_t i) const
+{
+ return (i == 0);
+}
+
+SearchIterator::UP
+RankBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData & md) const
+{
+ UnpackInfo unpackInfo(calculateUnpackInfo(md));
+ if (unpackInfo.unpackAll()) {
+ return SearchIterator::UP(RankSearch::create(subSearches, strict));
+ } else {
+ MultiSearch::Children requireUnpack;
+ requireUnpack.reserve(subSearches.size());
+ requireUnpack.push_back(subSearches[0]);
+ for (size_t i(1); i < subSearches.size(); i++) {
+ if (unpackInfo.needUnpack(i)) {
+ requireUnpack.push_back(subSearches[i]);
+ } else {
+ delete subSearches[i];
+ }
+ }
+ if (requireUnpack.size() == 1) {
+ return SearchIterator::UP(requireUnpack[0]);
+ } else {
+ return SearchIterator::UP(RankSearch::create(requireUnpack, strict));
+ }
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+SourceBlenderBlueprint::SourceBlenderBlueprint(const ISourceSelector &selector)
+ : _selector(selector)
+{
+}
+
+Blueprint::HitEstimate
+SourceBlenderBlueprint::combine(const std::vector<HitEstimate> &data) const
+{
+ return max(data);
+}
+
+FieldSpecBaseList
+SourceBlenderBlueprint::exposeFields() const
+{
+ return mixChildrenFields();
+}
+
+void
+SourceBlenderBlueprint::sort(std::vector<Blueprint*> &) const
+{
+}
+
+bool
+SourceBlenderBlueprint::inheritStrict(size_t) const
+{
+ return true;
+}
+
+class FindSource : public Blueprint::IPredicate
+{
+public:
+ FindSource(uint32_t sourceId) : _sourceId(sourceId) { }
+ virtual bool check(const Blueprint & bp) const { return bp.getSourceId() == _sourceId; }
+private:
+ uint32_t _sourceId;
+};
+
+ssize_t
+SourceBlenderBlueprint::findSource(uint32_t sourceId) const
+{
+ ssize_t index(-1);
+ FindSource fs(sourceId);
+ IndexList list = find(fs);
+ if ( ! list.empty()) {
+ index = list.front();
+ }
+ return index;
+}
+
+SearchIterator::UP
+SourceBlenderBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData &) const
+{
+ SourceBlenderSearch::Children children;
+ assert(subSearches.size() == childCnt());
+ for (size_t i = 0; i < subSearches.size(); ++i) {
+ children.push_back(SourceBlenderSearch::Child(subSearches[i],
+ getChild(i).getSourceId()));
+ assert(children.back().sourceId != 0xffffffff);
+ }
+ return SearchIterator::UP(SourceBlenderSearch::create(_selector.createIterator(),
+ children, strict));
+}
+
+bool
+SourceBlenderBlueprint::isCompatibleWith(const SourceBlenderBlueprint &other) const
+{
+ return (&_selector == &other._selector);
+}
+
+//-----------------------------------------------------------------------------
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
new file mode 100644
index 00000000000..b36538b55d4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
@@ -0,0 +1,181 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "blueprint.h"
+#include "isourceselector.h"
+#include "searchable.h"
+#include <vespa/searchlib/queryeval/multisearch.h>
+#include <vector>
+#include <map>
+
+namespace search {
+namespace queryeval {
+
+//-----------------------------------------------------------------------------
+
+class AndNotBlueprint : public IntermediateBlueprint
+{
+public:
+ bool supports_termwise_children() const override { return true; }
+ virtual HitEstimate combine(const std::vector<HitEstimate> &data) const;
+ virtual FieldSpecBaseList exposeFields() const;
+ virtual void optimize_self() override;
+ virtual Blueprint::UP get_replacement() override;
+ virtual void sort(std::vector<Blueprint*> &children) const;
+ virtual bool inheritStrict(size_t i) const;
+ virtual SearchIterator::UP
+ createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData &md) const;
+private:
+ virtual bool isPositive(size_t index) const { return index == 0; }
+};
+
+//-----------------------------------------------------------------------------
+
+class AndBlueprint : public IntermediateBlueprint
+{
+public:
+ bool supports_termwise_children() const override { return true; }
+ virtual HitEstimate combine(const std::vector<HitEstimate> &data) const;
+ virtual FieldSpecBaseList exposeFields() const;
+ virtual void optimize_self() override;
+ virtual Blueprint::UP get_replacement() override;
+ virtual void sort(std::vector<Blueprint*> &children) const;
+ virtual bool inheritStrict(size_t i) const;
+ virtual SearchIterator::UP
+ createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData &md) const;
+};
+
+//-----------------------------------------------------------------------------
+
+class OrBlueprint : public IntermediateBlueprint
+{
+public:
+ bool supports_termwise_children() const override { return true; }
+ virtual HitEstimate combine(const std::vector<HitEstimate> &data) const;
+ virtual FieldSpecBaseList exposeFields() const;
+ virtual void optimize_self() override;
+ virtual Blueprint::UP get_replacement() override;
+ virtual void sort(std::vector<Blueprint*> &children) const;
+ virtual bool inheritStrict(size_t i) const;
+ virtual SearchIterator::UP
+ createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData &md) const;
+};
+
+//-----------------------------------------------------------------------------
+
+class WeakAndBlueprint : public IntermediateBlueprint
+{
+private:
+ uint32_t _n;
+ std::vector<uint32_t> _weights;
+
+public:
+ virtual HitEstimate combine(const std::vector<HitEstimate> &data) const;
+ virtual FieldSpecBaseList exposeFields() const;
+ virtual void sort(std::vector<Blueprint*> &children) const;
+ virtual bool inheritStrict(size_t i) const;
+ virtual SearchIterator::UP
+ createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData &md) const;
+
+ WeakAndBlueprint(uint32_t n) : _n(n) {}
+ void addTerm(Blueprint::UP bp, uint32_t weight) {
+ addChild(std::move(bp));
+ _weights.push_back(weight);
+ }
+ uint32_t getN() const { return _n; }
+ const std::vector<uint32_t> &getWeights() const { return _weights; }
+};
+
+//-----------------------------------------------------------------------------
+
+class NearBlueprint : public IntermediateBlueprint
+{
+private:
+ uint32_t _window;
+
+public:
+ virtual HitEstimate combine(const std::vector<HitEstimate> &data) const;
+ virtual FieldSpecBaseList exposeFields() const;
+ virtual bool should_optimize_children() const override { return false; }
+ virtual void sort(std::vector<Blueprint*> &children) const;
+ virtual bool inheritStrict(size_t i) const;
+ virtual SearchIterator::UP
+ createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData &md) const;
+
+ NearBlueprint(uint32_t window) : _window(window) {}
+};
+
+//-----------------------------------------------------------------------------
+
+class ONearBlueprint : public IntermediateBlueprint
+{
+private:
+ uint32_t _window;
+
+public:
+ virtual HitEstimate combine(const std::vector<HitEstimate> &data) const;
+ virtual FieldSpecBaseList exposeFields() const;
+ virtual bool should_optimize_children() const override { return false; }
+ virtual void sort(std::vector<Blueprint*> &children) const;
+ virtual bool inheritStrict(size_t i) const;
+ virtual SearchIterator::UP
+ createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData &md) const;
+
+ ONearBlueprint(uint32_t window) : _window(window) {}
+};
+
+//-----------------------------------------------------------------------------
+
+class RankBlueprint : public IntermediateBlueprint
+{
+public:
+ virtual HitEstimate combine(const std::vector<HitEstimate> &data) const;
+ virtual FieldSpecBaseList exposeFields() const;
+ virtual void optimize_self() override;
+ virtual Blueprint::UP get_replacement() override;
+ virtual void sort(std::vector<Blueprint*> &children) const;
+ virtual bool inheritStrict(size_t i) const;
+ virtual SearchIterator::UP
+ createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData &md) const;
+};
+
+//-----------------------------------------------------------------------------
+
+class SourceBlenderBlueprint : public IntermediateBlueprint
+{
+private:
+ const ISourceSelector &_selector;
+
+public:
+ SourceBlenderBlueprint(const ISourceSelector &selector);
+ virtual HitEstimate combine(const std::vector<HitEstimate> &data) const;
+ virtual FieldSpecBaseList exposeFields() const;
+ virtual void sort(std::vector<Blueprint*> &children) const;
+ virtual bool inheritStrict(size_t i) const;
+ /**
+ * Will return the index matching the given sourceId.
+ * @param sourceId The sourceid to find.
+ * @return The index to the child representing the sourceId. -1 if not found.
+ */
+ ssize_t findSource(uint32_t sourceId) const;
+ virtual SearchIterator::UP
+ createIntermediateSearch(const MultiSearch::Children &subSearches,
+ bool strict, search::fef::MatchData &md) const;
+
+ /** check if this blueprint has the same source selector as the other */
+ bool isCompatibleWith(const SourceBlenderBlueprint &other) const;
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/irequestcontext.h b/searchlib/src/vespa/searchlib/queryeval/irequestcontext.h
new file mode 100644
index 00000000000..0ee13ccbde5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/irequestcontext.h
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/doom.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+
+namespace search {
+namespace queryeval {
+
+/**
+ * Provides a context that follows the life of a query.
+ */
+class IRequestContext
+{
+public:
+ virtual ~IRequestContext() { }
+ /**
+ * Provides the time of doom for the query.
+ * @return time of doom.
+ */
+ virtual const vespalib::Doom & getDoom() const = 0;
+
+ /**
+ * Provide access to attributevectors
+ * @return AttributeVector or nullptr if it does not exist.
+ */
+ virtual const AttributeVector * getAttribute(const vespalib::string & name) const = 0;
+ virtual const AttributeVector * getAttributeStableEnum(const vespalib::string & name) const = 0;
+};
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp b/searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp
new file mode 100644
index 00000000000..7821be450c0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp
@@ -0,0 +1,16 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/searchlib/queryeval/isourceselector.h>
+
+namespace search {
+namespace queryeval {
+
+ISourceSelector::ISourceSelector(Source defaultSource) :
+ _baseId(0),
+ _defaultSource(defaultSource)
+{
+ assert(defaultSource < SOURCE_LIMIT);
+}
+
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/isourceselector.h b/searchlib/src/vespa/searchlib/queryeval/isourceselector.h
new file mode 100644
index 00000000000..4d3ce3ee302
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/isourceselector.h
@@ -0,0 +1,103 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <stdint.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+
+namespace search {
+namespace queryeval {
+
+typedef uint8_t Source;
+
+/**
+ * Component used to select between sources during result blending.
+ **/
+class ISourceSelector
+{
+protected:
+ typedef SingleValueNumericAttribute<IntegerAttributeTemplate<int8_t> > SourceStore;
+public:
+ typedef std::unique_ptr<ISourceSelector> UP;
+ typedef std::shared_ptr<ISourceSelector> SP;
+ static const Source SOURCE_LIMIT = 254u;
+
+ /**
+ * Read-only interface to the data held by the parent source
+ * selector.
+ **/
+ class Iterator {
+ public:
+ Iterator(const SourceStore & source)
+ : _source(source)
+ {
+ }
+ typedef std::unique_ptr<Iterator> UP;
+
+ /**
+ * Obtain the source to be used for the given document. This
+ * function should always be called with increasing document
+ * ids.
+ *
+ * @return source id
+ * @param docId document id
+ **/
+ queryeval::Source getSource(uint32_t docId) const {
+ return _source.getFast(docId);
+ }
+
+ /**
+ * empty; defined for safe subclassing.
+ **/
+ virtual ~Iterator() {}
+
+ uint32_t
+ getDocIdLimit(void) const
+ {
+ return _source.getCommittedDocIdLimit();
+ }
+ private:
+ const SourceStore & _source;
+ };
+
+protected:
+ ISourceSelector(Source defaultSource);
+public:
+ void setBaseId(uint32_t baseId) { _baseId = baseId; }
+ uint32_t getBaseId() const { return _baseId; }
+ Source getDefaultSource() const { return _defaultSource; }
+ /**
+ * Set the source to be used for a given document.
+ *
+ * @param docId local document id
+ * @param source source for this document
+ **/
+ virtual void setSource(uint32_t docId, Source source) = 0;
+
+ /**
+ * Gets the limit for docId numbers known to this selector.
+ *
+ * @return one above highest known doc id
+ **/
+ virtual uint32_t getDocIdLimit() const = 0;
+
+ /**
+ * Create a new iterator over the data held by this source
+ * selector.
+ *
+ * @return source selection iterator
+ **/
+ virtual Iterator::UP createIterator() const = 0;
+
+ /**
+ * empty; defined for safe subclassing.
+ **/
+ virtual ~ISourceSelector() {}
+private:
+ uint32_t _baseId;
+ Source _defaultSource;
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp
new file mode 100644
index 00000000000..b73af2a721e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp
@@ -0,0 +1,9 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "iterator_pack.h"
+
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h
new file mode 100644
index 00000000000..b64af50a827
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include "searchiterator.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/matchdata.h>
+
+namespace search {
+namespace queryeval {
+
+class SearchIteratorPack
+{
+private:
+ std::vector<SearchIterator::UP> _children;
+ std::vector<fef::TermFieldMatchData*> _childMatch;
+ fef::MatchData::UP _md;
+
+public:
+ SearchIteratorPack() : _children(), _childMatch(), _md() {}
+ SearchIteratorPack(SearchIteratorPack &&rhs)
+ : _children(std::move(rhs._children)),
+ _childMatch(std::move(rhs._childMatch)),
+ _md(std::move(rhs._md)) {}
+
+ SearchIteratorPack &operator=(SearchIteratorPack &&rhs) {
+ _children = std::move(rhs._children);
+ _childMatch = std::move(rhs._childMatch);
+ _md = std::move(rhs._md);
+ return *this;
+ }
+
+ SearchIteratorPack(const std::vector<SearchIterator*> &children,
+ const std::vector<fef::TermFieldMatchData*> &childMatch,
+ fef::MatchData::UP md)
+ : _children(),
+ _childMatch(childMatch),
+ _md(std::move(md))
+ {
+ _children.reserve(children.size());
+ for (auto child: children) {
+ _children.emplace_back(child);
+ }
+ assert((_children.size() == _childMatch.size()) ||
+ (_childMatch.empty() && (_md.get() == nullptr)));
+ }
+
+ explicit SearchIteratorPack(const std::vector<SearchIterator*> &children)
+ : SearchIteratorPack(children,
+ std::vector<fef::TermFieldMatchData*>(),
+ fef::MatchData::UP()) {}
+
+ uint32_t get_docid(uint32_t ref) const {
+ return _children[ref]->getDocId();
+ }
+
+ uint32_t seek(uint32_t ref, uint32_t docid) {
+ _children[ref]->seek(docid);
+ return _children[ref]->getDocId();
+ }
+
+ int32_t get_weight(uint32_t ref, uint32_t docid) {
+ _children[ref]->doUnpack(docid);
+ return _childMatch[ref]->getWeight();
+ }
+
+ void unpack(uint32_t ref, uint32_t docid) {
+ _children[ref]->doUnpack(docid);
+ }
+
+ size_t size() const {
+ return _children.size();
+ }
+ void initRange(uint32_t begin, uint32_t end) {
+ for (auto & child: _children) {
+ child->initRange(begin, end);
+ }
+ }
+};
+
+} // namespace queryevel
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/iterators.cpp b/searchlib/src/vespa/searchlib/queryeval/iterators.cpp
new file mode 100644
index 00000000000..d62a1b67069
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/iterators.cpp
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".iterators");
+
+#include "iterators.h"
+
+namespace search {
+
+namespace queryeval {
+
+RankedSearchIteratorBase::
+RankedSearchIteratorBase(const fef::TermFieldMatchDataArray &matchData)
+ : SearchIterator(),
+ _matchData(matchData),
+ _needUnpack(1)
+{ }
+
+} // namespace queryeval
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/iterators.h b/searchlib/src/vespa/searchlib/queryeval/iterators.h
new file mode 100644
index 00000000000..565b7aff455
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/iterators.h
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include "searchiterator.h"
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+
+namespace search
+{
+
+namespace queryeval
+{
+
+class DocIdAndFeatures;
+
+class RankedSearchIteratorBase : public SearchIterator
+{
+public:
+ fef::TermFieldMatchDataArray _matchData;
+private:
+ uint32_t _needUnpack;
+protected:
+ bool getUnpacked() const { return _needUnpack == 0; }
+ void setUnpacked() { _needUnpack = 0; }
+ void clearUnpacked() { _needUnpack = 1; }
+ uint32_t getNeedUnpack() const { return _needUnpack; }
+ void incNeedUnpack() { ++_needUnpack; }
+
+public:
+ RankedSearchIteratorBase(const fef::TermFieldMatchDataArray &matchData);
+};
+
+} // namespace queryeval
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
new file mode 100644
index 00000000000..4ddef401dd5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
@@ -0,0 +1,91 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".queryeval.leaf_blueprints");
+#include "leaf_blueprints.h"
+#include "emptysearch.h"
+#include "simplesearch.h"
+#include "fake_search.h"
+
+namespace search {
+namespace queryeval {
+
+//-----------------------------------------------------------------------------
+
+SearchIterator::UP
+EmptyBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &,
+ bool) const
+{
+ return SearchIterator::UP(new EmptySearch());
+}
+
+EmptyBlueprint::EmptyBlueprint(const FieldSpecBase &field)
+ : SimpleLeafBlueprint(field)
+{
+}
+
+EmptyBlueprint::EmptyBlueprint(const FieldSpecBaseList &fields)
+ : SimpleLeafBlueprint(fields)
+{
+}
+
+EmptyBlueprint::EmptyBlueprint()
+ : SimpleLeafBlueprint(FieldSpecBaseList())
+{
+}
+
+//-----------------------------------------------------------------------------
+
+SearchIterator::UP
+SimpleBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &,
+ bool) const
+{
+ SimpleSearch *ss = new SimpleSearch(_result);
+ SearchIterator::UP search(ss);
+ ss->tag(_tag);
+ return search;
+}
+
+SimpleBlueprint::SimpleBlueprint(const SimpleResult &result)
+ : SimpleLeafBlueprint(FieldSpecBaseList()),
+ _tag(),
+ _result(result)
+{
+ setEstimate(HitEstimate(result.getHitCount(),
+ (result.getHitCount() == 0)));
+}
+
+SimpleBlueprint &
+SimpleBlueprint::tag(const vespalib::string &t)
+{
+ _tag = t;
+ return *this;
+}
+
+//-----------------------------------------------------------------------------
+
+SearchIterator::UP
+FakeBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda,
+ bool) const
+{
+ return SearchIterator::UP(new FakeSearch(_tag, _field.getName(), _term,
+ _result, tfmda));
+}
+
+FakeBlueprint::FakeBlueprint(const FieldSpec &field,
+ const FakeResult &result)
+ : SimpleLeafBlueprint(field),
+ _tag("<tag>"),
+ _term("<term>"),
+ _field(field),
+ _result(result)
+{
+ setEstimate(HitEstimate(result.inspect().size(),
+ result.inspect().empty()));
+}
+
+//-----------------------------------------------------------------------------
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h
new file mode 100644
index 00000000000..eca464c846d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "blueprint.h"
+#include "simpleresult.h"
+#include "fake_result.h"
+#include "searchable.h"
+
+namespace search {
+
+namespace queryeval {
+
+//-----------------------------------------------------------------------------
+
+class EmptyBlueprint : public SimpleLeafBlueprint
+{
+protected:
+ virtual SearchIterator::UP
+ createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda,
+ bool strict) const;
+
+public:
+ EmptyBlueprint(const FieldSpecBaseList &fields);
+ EmptyBlueprint(const FieldSpecBase &field);
+ EmptyBlueprint();
+};
+
+//-----------------------------------------------------------------------------
+
+class SimpleBlueprint : public SimpleLeafBlueprint
+{
+private:
+ vespalib::string _tag;
+ SimpleResult _result;
+
+protected:
+ virtual SearchIterator::UP
+ createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda,
+ bool strict) const;
+
+public:
+ SimpleBlueprint(const SimpleResult &result);
+ SimpleBlueprint &tag(const vespalib::string &tag);
+ const vespalib::string &tag() const { return _tag; }
+};
+
+//-----------------------------------------------------------------------------
+
+class FakeBlueprint : public SimpleLeafBlueprint
+{
+private:
+ vespalib::string _tag;
+ vespalib::string _term;
+ FieldSpec _field;
+ FakeResult _result;
+
+protected:
+ virtual SearchIterator::UP
+ createLeafSearch(const fef::TermFieldMatchDataArray &tfmda,
+ bool strict) const;
+
+public:
+ FakeBlueprint(const FieldSpec &field,
+ const FakeResult &result);
+
+ FakeBlueprint &tag(const vespalib::string &t) {
+ _tag = t;
+ return *this;
+ }
+
+ FakeBlueprint &term(const vespalib::string &t) {
+ _term = t;
+ return *this;
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.cpp
new file mode 100644
index 00000000000..198634c56a9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.cpp
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".queryeval.monitoring_dump_iterator");
+#include "monitoring_dump_iterator.h"
+
+namespace search {
+namespace queryeval {
+
+MonitoringDumpIterator::MonitoringDumpIterator(MonitoringSearchIterator::UP iterator)
+ : _search(std::move(iterator))
+{
+}
+
+MonitoringDumpIterator::~MonitoringDumpIterator()
+{
+ MonitoringSearchIterator::Dumper dumper(4, 25, 7, 10, 6);
+ visit(dumper, "", *_search);
+ LOG(info, "Search stats: %s", dumper.toString().c_str());
+}
+
+void
+MonitoringDumpIterator::doSeek(uint32_t docId)
+{
+ _search->seek(docId);
+ setDocId(_search->getDocId());
+}
+
+void
+MonitoringDumpIterator::doUnpack(uint32_t docId)
+{
+ _search->unpack(docId);
+}
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.h b/searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.h
new file mode 100644
index 00000000000..6a6ab1f63a4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.h
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "monitoring_search_iterator.h"
+
+namespace search {
+namespace queryeval {
+
+/**
+ * Search iterator that dumps the search stats of the underlying
+ * monitoring search iterator upon destruction.
+ */
+class MonitoringDumpIterator : public SearchIterator
+{
+private:
+ MonitoringSearchIterator::UP _search;
+
+public:
+ MonitoringDumpIterator(MonitoringSearchIterator::UP iterator);
+ ~MonitoringDumpIterator();
+
+ // Overrides SearchIterator
+ void doSeek(uint32_t docId) override;
+ void doUnpack(uint32_t docId) override;
+ Trinary is_strict() const override { return _search->is_strict(); }
+ void initRange(uint32_t beginid, uint32_t endid) override {
+ _search->initRange(beginid, endid);
+ SearchIterator::initRange(_search->getDocId()+1, _search->getEndId());
+ }
+ void resetRange() override {
+ _search->resetRange();
+ SearchIterator::resetRange();
+ }
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.cpp
new file mode 100644
index 00000000000..be282a29cdf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.cpp
@@ -0,0 +1,239 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "monitoring_search_iterator.h"
+#include <vespa/vespalib/objects/objectvisitor.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+LOG_SETUP(".queryeval.monitoring_search_iterator");
+
+using vespalib::make_string;
+
+namespace search {
+namespace queryeval {
+
+MonitoringSearchIterator::Stats::Stats()
+ : _numSeeks(0),
+ _numUnpacks(0),
+ _numDocIdSteps(0),
+ _numHitSkips(0)
+{
+}
+
+void
+MonitoringSearchIterator::Dumper::addIndent()
+{
+ int n = _currIndent;
+ if (n < 0) {
+ n = 0;
+ }
+ _str.append(vespalib::string(n, ' '));
+}
+
+void
+MonitoringSearchIterator::Dumper::addText(const vespalib::string &value)
+{
+ addIndent();
+ _str.append(value.c_str());
+ uint32_t extraSpaces = value.size() < _textFormatWidth ? _textFormatWidth - value.size() : 0;
+ _str.append(make_string(":%s ", vespalib::string(extraSpaces, ' ').c_str()));
+}
+
+void
+MonitoringSearchIterator::Dumper::addInt(int64_t value, const vespalib::string &desc)
+{
+ _str.append(make_string("%*" PRId64 " %s",
+ _intFormatWidth, value, desc.c_str()));
+}
+
+void
+MonitoringSearchIterator::Dumper::addFloat(double value, const vespalib::string &desc)
+{
+ _str.append(make_string("%*.*f %s",
+ _floatFormatWidth, _floatFormatPrecision, value, desc.c_str()));
+}
+
+void
+MonitoringSearchIterator::Dumper::openScope()
+{
+ _currIndent += _indent;
+}
+
+void
+MonitoringSearchIterator::Dumper::closeScope()
+{
+ _currIndent -= _indent;
+}
+
+MonitoringSearchIterator::Dumper::Dumper(int indent,
+ uint32_t textFormatWidth,
+ uint32_t intFormatWidth,
+ uint32_t floatFormatWidth,
+ uint32_t floatFormatPrecision)
+ : _indent(indent),
+ _textFormatWidth(textFormatWidth),
+ _intFormatWidth(intFormatWidth),
+ _floatFormatWidth(floatFormatWidth),
+ _floatFormatPrecision(floatFormatPrecision),
+ _str(),
+ _currIndent(0),
+ _stack()
+{
+}
+
+void
+MonitoringSearchIterator::Dumper::openStruct(const vespalib::string &name, const vespalib::string &type)
+{
+ if (type == "search::queryeval::MonitoringSearchIterator") {
+ _stack.push(ITERATOR);
+ } else if (type == "MonitoringSearchIterator::Stats") {
+ _stack.push(STATS);
+ } else if (name == "children") {
+ _stack.push(CHILDREN);
+ openScope();
+ } else {
+ _stack.push(UNKNOWN);
+ }
+}
+
+void
+MonitoringSearchIterator::Dumper::closeStruct()
+{
+ StructType top = _stack.top();
+ _stack.pop();
+ if (top == CHILDREN) {
+ closeScope();
+ }
+}
+
+void
+MonitoringSearchIterator::Dumper::visitBool(const vespalib::string &name, bool value)
+{
+ (void) name;
+ (void) value;
+}
+
+void
+MonitoringSearchIterator::Dumper::visitInt(const vespalib::string &name, int64_t value)
+{
+ if (_stack.top() == STATS) {
+ if (name == "numSeeks") {
+ addInt(value, "seeks, ");
+ } else if (name == "numUnpacks") {
+ addInt(value, "unpacks, ");
+ }
+ }
+}
+
+void
+MonitoringSearchIterator::Dumper::visitFloat(const vespalib::string &name, double value)
+{
+ if (_stack.top() == STATS) {
+ if (name == "avgDocIdSteps") {
+ addFloat(value, "steps/seek, ");
+ } else if (name == "avgHitSkips") {
+ addFloat(value, "skips/seek, ");
+ } else if (name == "numSeeksPerUnpack") {
+ addFloat(value, "seeks/unpack\n");
+ }
+ }
+}
+
+void
+MonitoringSearchIterator::Dumper::visitString(const vespalib::string &name, const vespalib::string &value)
+{
+ if (_stack.top() == ITERATOR) {
+ if (name == "iteratorName") {
+ addText(value);
+ }
+ }
+}
+
+void
+MonitoringSearchIterator::Dumper::visitNull(const vespalib::string &name)
+{
+ (void) name;
+}
+
+void
+MonitoringSearchIterator::Dumper::visitNotImplemented()
+{
+}
+
+
+uint32_t
+MonitoringSearchIterator::countHitSkips(uint32_t docId)
+{
+ uint32_t tmpDocId = _search->getDocId();
+ uint32_t numHitSkips = 0;
+ for (; ;) {
+ _search->seek(tmpDocId + 1);
+ tmpDocId = _search->getDocId();
+ if (tmpDocId >= docId) {
+ break;
+ }
+ ++numHitSkips;
+ }
+ return numHitSkips;
+}
+
+MonitoringSearchIterator::MonitoringSearchIterator(const vespalib::string &name,
+ SearchIterator::UP search,
+ bool collectHitSkipStats)
+ : _name(name),
+ _search(std::move(search)),
+ _collectHitSkipStats(collectHitSkipStats),
+ _stats()
+{
+}
+
+void
+MonitoringSearchIterator::doSeek(uint32_t docId)
+{
+ _stats.seek();
+ _stats.step(docId - getDocId());
+ if (_collectHitSkipStats) {
+ _stats.skip(countHitSkips(docId));
+ } else {
+ _search->seek(docId);
+ }
+ LOG(debug, "%s:doSeek(%d) = %d e=%d", _name.c_str(), docId, _search->getDocId(), _search->getEndId());
+ setDocId(_search->getDocId());
+}
+
+void
+MonitoringSearchIterator::doUnpack(uint32_t docId)
+{
+ LOG(debug, "%s:doUnpack(%d)", _name.c_str(), docId);
+ _stats.unpack();
+ _search->unpack(docId);
+}
+
+const PostingInfo *
+MonitoringSearchIterator::getPostingInfo() const
+{
+ return _search->getPostingInfo();
+}
+
+void
+MonitoringSearchIterator::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visitor.visitString("iteratorName", _name);
+ visitor.visitString("iteratorType", _search->getClassName());
+ {
+ visitor.openStruct("stats", "MonitoringSearchIterator::Stats");
+ visitor.visitInt("numSeeks", _stats.getNumSeeks());
+ visitor.visitInt("numDocIdSteps", _stats.getNumDocIdSteps());
+ visitor.visitFloat("avgDocIdSteps", _stats.getAvgDocIdSteps());
+ visitor.visitInt("numHitSkips", _stats.getNumHitSkips());
+ visitor.visitFloat("avgHitSkips", _stats.getAvgHitSkips());
+ visitor.visitInt("numUnpacks", _stats.getNumUnpacks());
+ visitor.visitFloat("numSeeksPerUnpack", _stats.getNumSeeksPerUnpack());
+ visitor.closeStruct();
+ }
+ _search->visitMembers(visitor);
+}
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.h b/searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.h
new file mode 100644
index 00000000000..b837dd06978
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.h
@@ -0,0 +1,131 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "searchiterator.h"
+#include <vespa/vespalib/objects/objectvisitor.h>
+#include <stack>
+
+namespace search {
+namespace queryeval {
+
+/**
+ * Search iterator that monitors an underlying search iterator
+ * and at the end provides statastics on the following:
+ * - number of seeks
+ * - number of unpacks
+ * - average docid step size
+ * - average hit skip size
+ * - number of seeks per hit
+ */
+class MonitoringSearchIterator : public SearchIterator
+{
+public:
+ class Stats
+ {
+ private:
+ uint32_t _numSeeks;
+ uint32_t _numUnpacks;
+ uint64_t _numDocIdSteps;
+ uint64_t _numHitSkips;
+ double divide(double dividend, double divisor) const {
+ return divisor > 0.0 ? dividend / divisor : 0.0;
+ }
+ public:
+ Stats();
+ void seek() { ++_numSeeks; }
+ void step(uint32_t docIdDiff) { _numDocIdSteps += docIdDiff; }
+ void skip(uint32_t hitDiff) { _numHitSkips += hitDiff; }
+ void unpack() { ++_numUnpacks; }
+ uint32_t getNumSeeks() const { return _numSeeks; }
+ uint32_t getNumUnpacks() const { return _numUnpacks; }
+ double getNumSeeksPerUnpack() const { return divide(getNumSeeks(), getNumUnpacks()); }
+ uint64_t getNumDocIdSteps() const { return _numDocIdSteps; }
+ double getAvgDocIdSteps() const { return divide(getNumDocIdSteps(), getNumSeeks()); }
+ uint64_t getNumHitSkips() const { return _numHitSkips; }
+ double getAvgHitSkips() const { return divide(getNumHitSkips(), getNumSeeks()); }
+ };
+
+ class Dumper : public vespalib::ObjectVisitor
+ {
+ private:
+ enum StructType {
+ ITERATOR,
+ STATS,
+ CHILDREN,
+ UNKNOWN
+ };
+
+ int _indent;
+ uint32_t _textFormatWidth;
+ uint32_t _intFormatWidth;
+ uint32_t _floatFormatWidth;
+ uint32_t _floatFormatPrecision;
+ vespalib::string _str;
+ int _currIndent;
+ std::stack<StructType> _stack;
+ uint32_t _numberWidth;
+
+ void addIndent();
+ void addText(const vespalib::string &value);
+ void addInt(int64_t value, const vespalib::string &desc);
+ void addFloat(double value, const vespalib::string &desc);
+ void openScope();
+ void closeScope();
+
+ public:
+ Dumper(int indent = 4,
+ uint32_t textFormatWidth = 1,
+ uint32_t intFormatWidth = 1,
+ uint32_t floatFormatWidth = 1,
+ uint32_t floatFormatPrecision = 2);
+
+ vespalib::string toString() const { return _str; }
+
+ // Overrides ObjectVisitor
+ virtual void openStruct(const vespalib::string &name, const vespalib::string &type);
+ virtual void closeStruct();
+ virtual void visitBool(const vespalib::string &name, bool value);
+ virtual void visitInt(const vespalib::string &name, int64_t value);
+ virtual void visitFloat(const vespalib::string &name, double value);
+ virtual void visitString(const vespalib::string &name, const vespalib::string &value);
+ virtual void visitNull(const vespalib::string &name);
+ virtual void visitNotImplemented();
+ };
+
+ typedef std::unique_ptr<MonitoringSearchIterator> UP;
+
+private:
+ const vespalib::string _name;
+ const SearchIterator::UP _search;
+ const bool _collectHitSkipStats;
+ Stats _stats;
+
+ uint32_t countHitSkips(uint32_t docId);
+
+public:
+ MonitoringSearchIterator(const vespalib::string &name,
+ SearchIterator::UP search,
+ bool collectHitSkipStats);
+
+ // Overrides SearchIterator
+ void doSeek(uint32_t docId) override;
+ void doUnpack(uint32_t docId) override;
+ void initRange(uint32_t beginid, uint32_t endid) override {
+ _search->initRange(beginid, endid);
+ SearchIterator::initRange(_search->getDocId()+1, _search->getEndId());
+ }
+ void resetRange() override {
+ _search->resetRange();
+ SearchIterator::resetRange();
+ }
+ Trinary is_strict() const override { return _search->is_strict(); }
+ virtual const PostingInfo *getPostingInfo() const;
+ void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+
+ const SearchIterator &getIterator() const { return *_search; }
+ const Stats &getStats() const { return _stats; }
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp
new file mode 100644
index 00000000000..0765d8d6850
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp
@@ -0,0 +1,258 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/queryeval/multibitvectoriterator.h>
+#include <vespa/searchlib/queryeval/andsearch.h>
+#include <vespa/searchlib/queryeval/andnotsearch.h>
+#include <vespa/searchlib/queryeval/sourceblendersearch.h>
+#include <vespa/searchlib/queryeval/orsearch.h>
+#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/searchlib/attribute/attributeiterators.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/vespalib/util/optimized.h>
+
+namespace search {
+namespace queryeval {
+
+namespace {
+
+template<typename Update>
+class MultiBitVectorIterator : public MultiBitVectorIteratorBase
+{
+public:
+ MultiBitVectorIterator(const Children & children) : MultiBitVectorIteratorBase(children) { }
+protected:
+ void updateLastValue(uint32_t docId);
+ void strictSeek(uint32_t docId);
+private:
+ void doSeek(uint32_t docId) override;
+ bool isStrict() const override { return false; }
+ bool acceptExtraFilter() const override { return Update::isAnd(); }
+ Update _update;
+};
+
+template<typename Update>
+class MultiBitVectorIteratorStrict : public MultiBitVectorIterator<Update>
+{
+public:
+ MultiBitVectorIteratorStrict(const MultiSearch::Children & children) : MultiBitVectorIterator<Update>(children) { }
+private:
+ void doSeek(uint32_t docId) override { this->strictSeek(docId); }
+ bool isStrict() const override { return true; }
+};
+
+template<typename Update>
+void MultiBitVectorIterator<Update>::updateLastValue(uint32_t docId)
+{
+ if (docId >= _lastMaxDocIdLimit) {
+ if (__builtin_expect(docId < _numDocs, true)) {
+ const uint32_t index(wordNum(docId));
+ _lastValue = _bvs[0][index];
+ for(uint32_t i(1); i < _bvs.size(); i++) {
+ _lastValue = _update(_lastValue, _bvs[i][index]);
+ }
+ _lastMaxDocIdLimit = (index + 1) * WordLen;
+ } else {
+ setAtEnd();
+ }
+ }
+}
+
+template<typename Update>
+void
+MultiBitVectorIterator<Update>::doSeek(uint32_t docId)
+{
+ updateLastValue(docId);
+ if (__builtin_expect( ! isAtEnd(), true)) {
+ if (_lastValue & mask(docId)) {
+ setDocId(docId);
+ }
+ }
+}
+
+template<typename Update>
+void
+MultiBitVectorIterator<Update>::strictSeek(uint32_t docId)
+{
+ for (updateLastValue(docId), _lastValue=_lastValue & checkTab(docId);
+ (_lastValue == 0) && __builtin_expect(! isAtEnd(), true);
+ updateLastValue(_lastMaxDocIdLimit));
+ if (__builtin_expect(!isAtEnd(), true)) {
+ docId = _lastMaxDocIdLimit - WordLen + vespalib::Optimized::lsbIdx(_lastValue);
+ if (__builtin_expect(docId >= _numDocs, false)) {
+ setAtEnd();
+ } else {
+ setDocId(docId);
+ }
+ }
+}
+
+struct And {
+ typedef BitWord::Word Word;
+ Word operator () (const Word a, const Word b) {
+ return a & b;
+ }
+ static bool isAnd() { return true; }
+};
+
+struct Or {
+ typedef BitWord::Word Word;
+ Word operator () (const Word a, const Word b) {
+ return a | b;
+ }
+ static bool isAnd() { return false; }
+};
+
+typedef MultiBitVectorIterator<And> AndBVIterator;
+typedef MultiBitVectorIteratorStrict<And> AndBVIteratorStrict;
+typedef MultiBitVectorIterator<Or> OrBVIterator;
+typedef MultiBitVectorIteratorStrict<Or> OrBVIteratorStrict;
+
+bool hasAtLeast2Bitvectors(const MultiSearch::Children & children)
+{
+ size_t count(0);
+ for (auto it(children.begin()); it != children.end(); it++) {
+ if ((*it)->isBitVector()) {
+ count++;
+ }
+ }
+ return count >= 2;
+}
+
+size_t firstStealable(const MultiSearch & s)
+{
+ return s.isAndNot() ? 1 : 0;
+}
+
+bool canOptimize(const MultiSearch & s) {
+ return (s.getChildren().size() >= 2)
+ && (s.isAnd() || s.isOr() || s.isAndNot())
+ && hasAtLeast2Bitvectors(s.getChildren());
+}
+
+}
+
+MultiBitVectorIteratorBase::MultiBitVectorIteratorBase(const Children & children) :
+ MultiSearch(children),
+ _numDocs(std::numeric_limits<unsigned int>::max()),
+ _lastValue(0),
+ _lastMaxDocIdLimit(0),
+ _bvs(children.size())
+{
+ for (size_t i(0); i < children.size(); i++) {
+ const BitVectorIterator * bv = static_cast<const BitVectorIterator *>(children[i]);
+ _bvs[i] = reinterpret_cast<const Word *>(bv->getBitValues());
+ _numDocs = std::min(_numDocs, bv->getDocIdLimit());
+ }
+}
+
+MultiBitVectorIteratorBase::~MultiBitVectorIteratorBase()
+{
+}
+
+SearchIterator::UP
+MultiBitVectorIteratorBase::andWith(UP filter, uint32_t estimate)
+{
+ (void) estimate;
+ if (filter->isBitVector() && acceptExtraFilter()) {
+ const BitVectorIterator & bv = static_cast<const BitVectorIterator &>(*filter);
+ _bvs.push_back(reinterpret_cast<const Word *>(bv.getBitValues()));
+ insert(getChildren().size(), std::move(filter));
+ _lastMaxDocIdLimit = 0; // force reload
+ }
+ return filter;
+}
+
+void
+MultiBitVectorIteratorBase::doUnpack(uint32_t docid)
+{
+ if (_unpackInfo.unpackAll()) {
+ MultiSearch::doUnpack(docid);
+ } else {
+ auto &children = getChildren();
+ _unpackInfo.each([&children,docid](size_t i){children[i]->doUnpack(docid);},
+ children.size());
+ }
+}
+
+SearchIterator::UP
+MultiBitVectorIteratorBase::optimize(SearchIterator::UP parentIt)
+{
+ if (parentIt->isSourceBlender()) {
+ SourceBlenderSearch & parent(static_cast<SourceBlenderSearch &>(*parentIt));
+ for (size_t i(0); i < parent.getNumChildren(); i++) {
+ parent.setChild(i, optimize(parent.steal(i)));
+ }
+ } else if (parentIt->isMultiSearch()) {
+ parentIt = optimizeMultiSearch(std::move(parentIt));
+ }
+ return parentIt;
+}
+
+SearchIterator::UP
+MultiBitVectorIteratorBase::optimizeMultiSearch(SearchIterator::UP parentIt)
+{
+ MultiSearch & parent(static_cast<MultiSearch &>(*parentIt));
+ if (canOptimize(parent)) {
+ MultiSearch::Children stolen;
+ std::vector<size_t> _unpackIndex;
+ bool strict(false);
+ size_t insertPosition(0);
+ for (size_t it(firstStealable(parent)); it != parent.getChildren().size(); ) {
+ if (parent.getChildren()[it]->isBitVector()) {
+ if (stolen.empty()) {
+ insertPosition = it;
+ }
+ if (parent.needUnpack(it)) {
+ _unpackIndex.push_back(stolen.size());
+ }
+ SearchIterator::UP bit = parent.remove(it);
+ if ( ! strict && static_cast<const BitVectorIterator &>(*bit).isStrict()) {
+ strict = true;
+ }
+ stolen.push_back(bit.release());
+ } else {
+ it++;
+ }
+ }
+ SearchIterator::UP next;
+ if (parent.isAnd()) {
+ if (strict) {
+ next.reset(new AndBVIteratorStrict(stolen));
+ } else {
+ next.reset(new AndBVIterator(stolen));
+ }
+ } else if (parent.isOr()) {
+ if (strict) {
+ next.reset(new OrBVIteratorStrict(stolen));
+ } else {
+ next.reset(new OrBVIterator(stolen));
+ }
+ } else if (parent.isAndNot()) {
+ if (strict) {
+ next.reset(new OrBVIteratorStrict(stolen));
+ } else {
+ next.reset(new OrBVIterator(stolen));
+ }
+ }
+ MultiBitVectorIteratorBase & nextM(static_cast<MultiBitVectorIteratorBase &>(*next));
+ for (size_t index : _unpackIndex) {
+ nextM.addUnpackIndex(index);
+ }
+ if (parent.getChildren().empty()) {
+ return next;
+ } else {
+ parent.insert(insertPosition, std::move(next));
+ }
+ }
+ MultiSearch::Children & toOptimize(const_cast<MultiSearch::Children &>(parent.getChildren()));
+ for (size_t i(0); i < toOptimize.size(); i++) {
+ toOptimize[i] = optimize(MultiSearch::UP(toOptimize[i])).release();
+ }
+
+ return parentIt;
+}
+
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h
new file mode 100644
index 00000000000..75762bf3e52
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/queryeval/multisearch.h>
+#include <vespa/searchlib/queryeval/unpackinfo.h>
+
+namespace search {
+namespace queryeval {
+
+class MultiBitVectorIteratorBase : public MultiSearch, protected BitWord
+{
+public:
+ ~MultiBitVectorIteratorBase();
+ virtual bool isStrict() const = 0;
+ void addUnpackIndex(size_t index) { _unpackInfo.add(index); }
+ /**
+ * Will steal and optimize bitvectoriterators if it can
+ * Might return itself or a new structure.
+ */
+ static SearchIterator::UP optimize(SearchIterator::UP parent);
+protected:
+ MultiBitVectorIteratorBase(const Children & children);
+
+ uint32_t _numDocs;
+ Word _lastValue; // Last value computed
+ uint32_t _lastMaxDocIdLimit; // next documentid requiring recomputation.
+ std::vector<const Word *> _bvs;
+private:
+ virtual bool acceptExtraFilter() const = 0;
+ UP andWith(UP filter, uint32_t estimate) override;
+ void doUnpack(uint32_t docid) override;
+ UnpackInfo _unpackInfo;
+ static SearchIterator::UP optimizeMultiSearch(SearchIterator::UP parent);
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/multisearch.cpp b/searchlib/src/vespa/searchlib/queryeval/multisearch.cpp
new file mode 100644
index 00000000000..60a2d373e75
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/multisearch.cpp
@@ -0,0 +1,95 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "multisearch.h"
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace queryeval {
+
+void
+MultiSearch::insert(size_t index, SearchIterator::UP search)
+{
+ assert(index <= _children.size());
+ _children.insert(_children.begin()+index, search.release());
+ onInsert(index);
+}
+
+SearchIterator::UP
+MultiSearch::remove(size_t index)
+{
+ assert(index < _children.size());
+ SearchIterator::UP search(_children[index]);
+ _children.erase(_children.begin() + index);
+ onRemove(index);
+ return search;
+}
+
+void
+MultiSearch::doUnpack(uint32_t docid)
+{
+ size_t sz(_children.size());
+ for (size_t i = 0; i < sz; ) {
+ if (__builtin_expect(_children[i]->getDocId() < docid, false)) {
+ _children[i]->doSeek(docid);
+ if (_children[i]->isAtEnd()) {
+ sz = deactivate(i);
+ continue;
+ }
+ }
+ if (__builtin_expect(_children[i]->getDocId() == docid, false)) {
+ _children[i]->doUnpack(docid);
+ }
+ i++;
+ }
+}
+
+size_t
+MultiSearch::deactivate(size_t idx)
+{
+ assert(idx < _children.size());
+ delete _children[idx];
+ _children[idx] = _children.back();
+ _children.resize(_children.size() - 1);
+ return _children.size();
+}
+
+MultiSearch::MultiSearch(const Children & children)
+ : _children(children)
+{
+}
+
+MultiSearch::~MultiSearch()
+{
+ for (SearchIterator * child : _children) {
+ delete child;
+ }
+}
+
+void
+MultiSearch::initRange(uint32_t beginid, uint32_t endid)
+{
+ SearchIterator::initRange(beginid, endid);
+ for (SearchIterator * child : _children) {
+ child->initRange(beginid, endid);
+ }
+}
+
+void
+MultiSearch::resetRange()
+{
+ SearchIterator::resetRange();
+ for (SearchIterator * child : _children) {
+ child->resetRange();
+ }
+}
+
+void
+MultiSearch::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "children", _children);
+}
+
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/multisearch.h b/searchlib/src/vespa/searchlib/queryeval/multisearch.h
new file mode 100644
index 00000000000..3de15040062
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/multisearch.h
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/array.h>
+#include "searchiterator.h"
+
+namespace search {
+namespace queryeval {
+
+/**
+ * A virtual intermediate class that serves as the basis for combining searches
+ * like and, or any or others that take a list of children.
+ **/
+class MultiSearch : public SearchIterator
+{
+public:
+ /**
+ * Defines how to represent the children iterators. vespalib::Array usage
+ * generates faster and more compact code then using std::vector.
+ */
+ typedef std::vector<SearchIterator *> Children;
+ /**
+ * Create a new Multi Search with the given children.
+ *
+ * @param children the search objects we are and'ing
+ * this object takes ownership of the children.
+ **/
+ MultiSearch(const Children & children);
+ virtual ~MultiSearch();
+ const Children & getChildren() const { return _children; }
+ virtual bool isAnd() const { return false; }
+ virtual bool isAndNot() const { return false; }
+ virtual bool isOr() const { return false; }
+ void insert(size_t index, SearchIterator::UP search);
+ SearchIterator::UP remove(size_t index);
+ virtual bool needUnpack(size_t index) const { (void) index; return true; }
+ void initRange(uint32_t beginId, uint32_t endId) override;
+ void resetRange() override;
+protected:
+ void doUnpack(uint32_t docid) override;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+private:
+ /**
+ * Call back when children are removed / inserted after the Iterator has been constructed.
+ * This is to support code that make assumptions that iterators do not move around or disappear.
+ * These are invoked after the child has been removed.
+ */
+ virtual void onRemove(size_t index) { (void) index; }
+ virtual void onInsert(size_t index) { (void) index; }
+
+ virtual bool isMultiSearch() const { return true; }
+ size_t deactivate(size_t index);
+ Children _children;
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearsearch.cpp b/searchlib/src/vespa/searchlib/queryeval/nearsearch.cpp
new file mode 100644
index 00000000000..b33ab946acc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/nearsearch.cpp
@@ -0,0 +1,313 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".nearsearch");
+
+#include "nearsearch.h"
+#include <vespa/vespalib/objects/visit.h>
+#include <vespa/vespalib/util/priority_queue.h>
+#include <limits>
+#include <set>
+
+namespace search {
+namespace queryeval {
+
+namespace {
+
+using search::fef::TermFieldMatchDataArray;
+using search::fef::TermFieldMatchDataPositionKey;
+
+template<typename T>
+void setup_fields(uint32_t window, std::vector<T> &matchers, const TermFieldMatchDataArray &in) {
+ std::set<uint32_t> fields;
+ for (size_t i = 0; i < in.size(); ++i) {
+ fields.insert(in[i]->getFieldId());
+ }
+ std::set<uint32_t>::const_iterator pos = fields.begin();
+ std::set<uint32_t>::const_iterator end = fields.end();
+ for (; pos != end; ++pos) {
+ matchers.push_back(T(window, *pos, in));
+ }
+}
+
+} // namespace search::queryeval::<unnamed>
+
+NearSearchBase::NearSearchBase(const Children & terms,
+ const TermFieldMatchDataArray &data,
+ uint32_t window,
+ bool strict)
+ : AndSearch(terms),
+ _data_size(data.size()),
+ _window(window),
+ _strict(strict)
+{
+}
+
+void
+NearSearchBase::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ AndSearch::visitMembers(visitor);
+ visit(visitor, "data_size", _data_size);
+ visit(visitor, "window", _window);
+ visit(visitor, "strict", _strict);
+}
+
+void
+NearSearchBase::seekNext(uint32_t docId)
+{
+ LOG(debug, "seekNext(%d)", docId);
+ const Children & terms(getChildren());
+ SearchIterator &firstTerm = *terms[0];
+ uint32_t nextId = firstTerm.getDocId();
+ while ( ! isAtEnd(nextId)) {
+ LOG(debug, "Looking for match in document %d.", nextId);
+ bool foundHit = true;
+ for (uint32_t i = 1, len = terms.size(); i < len; ++i) {
+ SearchIterator &term = *terms[i];
+ if (!term.seek(nextId)) {
+ LOG(debug, "Term %d does not occur in document %d.", i, nextId);
+ foundHit = false;
+ if (term.getDocId() > nextId) {
+ nextId = term.getDocId();
+ LOG(debug, "Next document in which term %d occurs is %d.", i, nextId);
+ } else {
+ ++nextId;
+ LOG(debug, "Bumping target document to %d.", nextId);
+ }
+ break;
+ }
+ LOG(debug, "Term %d occurs in document %d.", i, nextId);
+ }
+ if (foundHit) {
+ LOG(debug, "All terms occur in document %d, check for match.", nextId);
+ if (match(nextId)) {
+ LOG(debug, "Document %d matches.", nextId);
+ break;
+ }
+ ++nextId;
+ }
+ if ( ! isAtEnd(nextId)) {
+ LOG(debug, "Seeking next document that contains term 0, starting at %d.", nextId);
+ firstTerm.seek(nextId);
+ nextId = firstTerm.getDocId();
+ LOG(debug, "Next document that contains term 0 is %d.", nextId);
+ }
+ }
+ if (isAtEnd(nextId)) {
+ LOG(debug, "Reached end of document list.");
+ setAtEnd();
+ } else {
+ setDocId(nextId);
+ }
+}
+
+void
+NearSearchBase::doSeek(uint32_t docId)
+{
+ LOG(debug, "doSeek(%d)", docId);
+ const Children & terms(getChildren());
+ bool foundHit = true;
+ for (uint32_t i = 0, len = terms.size(); i < len; ++i) {
+ SearchIterator *term = terms[i];
+ if (!term->seek(docId)) {
+ LOG(debug, "Term %d does not occur in document %d.", i, docId);
+ foundHit = false;
+ break;
+ }
+ }
+ if (foundHit && match(docId)) {
+ LOG(debug, "Document %d matches.", docId);
+ setDocId(docId);
+ } else if (_strict) {
+ LOG(debug, "Document %d does not match, seeking next.", docId);
+ seekNext(docId);
+ }
+}
+
+NearSearch::NearSearch(const Children & terms,
+ const TermFieldMatchDataArray &data,
+ uint32_t window,
+ bool strict)
+ : NearSearchBase(terms, data, window, strict),
+ _matchers()
+{
+ setup_fields(window, _matchers, data);
+}
+
+namespace {
+
+struct PosIter {
+ search::fef::TermFieldMatchData::PositionsIterator curPos;
+ search::fef::TermFieldMatchData::PositionsIterator endPos;
+
+ bool operator< (const PosIter &other) const {
+ // assumes none is at end
+ TermFieldMatchDataPositionKey mykey = *curPos;
+ TermFieldMatchDataPositionKey otherkey = *other.curPos;
+ return mykey < otherkey;
+ }
+};
+
+struct Iterators
+{
+ vespalib::PriorityQueue<PosIter> _queue;
+ TermFieldMatchDataPositionKey _maxOcc;
+
+ void update(TermFieldMatchDataPositionKey occ)
+ {
+ if (_queue.size() == 1 || _maxOcc < occ) { _maxOcc = occ; }
+ }
+
+ void add(const search::fef::TermFieldMatchData *term)
+ {
+ PosIter iter;
+ iter.curPos = term->begin();
+ iter.endPos = term->end();
+ LOG_ASSERT(iter.curPos != iter.endPos);
+ _queue.push(iter);
+ update(*iter.curPos);
+ }
+
+ bool match(uint32_t window) {
+ for (;;) {
+ PosIter &front = _queue.front();
+ TermFieldMatchDataPositionKey lastAllowed = *front.curPos;
+ lastAllowed.setPosition(front.curPos->getPosition() + window);
+
+ if (!(lastAllowed < _maxOcc)) {
+ return true;
+ }
+ do {
+ ++front.curPos;
+ if (front.curPos == front.endPos) {
+ return false;
+ }
+ lastAllowed = *front.curPos;
+ lastAllowed.setPosition(front.curPos->getPosition() + window);
+ } while (lastAllowed < _maxOcc);
+
+ update(*front.curPos);
+ _queue.adjust();
+ }
+ }
+};
+
+} // namespace <unnamed>
+
+bool
+NearSearch::Matcher::match(uint32_t docId)
+{
+ Iterators pos;
+ for (uint32_t i = 0, len = inputs().size(); i < len; ++i) {
+ const search::fef::TermFieldMatchData *term = inputs()[i];
+ if (term->getDocId() != docId || term->begin() == term->end()) {
+ LOG(debug, "No occurrences found for term %d.", i);
+ return false;
+ }
+ LOG(debug, "Got positions iterator for term %d.", i);
+ pos.add(term);
+ }
+
+ // Look for matching window.
+ return pos.match(window());
+}
+
+bool
+NearSearch::match(uint32_t docId)
+{
+ // Retrieve position iterators for each term.
+ doUnpack(docId);
+ for (size_t i = 0; i < _matchers.size(); ++i) {
+ if (_matchers[i].match(docId)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+ONearSearch::ONearSearch(const Children & terms,
+ const TermFieldMatchDataArray &data,
+ uint32_t window,
+ bool strict)
+ : NearSearchBase(terms, data, window, strict),
+ _matchers()
+{
+ setup_fields(window, _matchers, data);
+}
+
+bool
+ONearSearch::Matcher::match(uint32_t docId)
+{
+ uint32_t numTerms = inputs().size();
+ PositionsIteratorList pos;
+ for (uint32_t i = 0; i < numTerms; ++i) {
+ const search::fef::TermFieldMatchData *term = inputs()[i];
+ if (term->getDocId() != docId || term->begin() == term->end()) {
+ LOG(debug, "No occurrences found for term %d.", i);
+ return false;
+ }
+ LOG(debug, "Got positions iterator for term %d.", i);
+ pos.push_back(term->begin());
+ }
+ if (numTerms < 2) return true; // 1 term is always near itself
+
+ int32_t remain = window();
+
+ TermFieldMatchDataPositionKey prevTermPos;
+ TermFieldMatchDataPositionKey curTermPos;
+ TermFieldMatchDataPositionKey lastAllowed;
+
+ // Look for match for every occurrence of the first term.
+ for ( ; pos[0] != inputs()[0]->end(); ++pos[0]) {
+ TermFieldMatchDataPositionKey firstTermPos = *pos[0];
+ lastAllowed = firstTermPos;
+ lastAllowed.setPosition(firstTermPos.getPosition() + remain);
+ if (lastAllowed < curTermPos) {
+ // if we already know that we must seek onwards:
+ continue;
+ }
+ prevTermPos = firstTermPos;
+ LOG(spam, "Looking for match in window [%d, %d].",
+ firstTermPos.getPosition(), lastAllowed.getPosition());
+ for (uint32_t i = 1; i < numTerms; ++i) {
+ LOG(spam, "Forwarding iterator for term %d beyond %d.", i, prevTermPos.getPosition());
+ while (pos[i] != inputs()[i]->end() && !(prevTermPos < *pos[i])) {
+ ++pos[i];
+ }
+ if (pos[i] == inputs()[i]->end()) {
+ LOG(debug, "Reached end of occurrences for term %d without matching ONEAR.", i);
+ return false;
+ }
+ curTermPos = *pos[i];
+ if (lastAllowed < curTermPos) {
+ // outside window
+ break;
+ }
+ LOG(spam, "Current position for term %d is %d.", i, curTermPos.getPosition());
+ if (i + 1 == numTerms) {
+ LOG(debug, "ONEAR match found for document %d.", docId);
+ // OK for all terms
+ return true;
+ }
+ prevTermPos = curTermPos;
+ }
+ }
+ LOG(debug, "No ONEAR match found for document %d.", docId);
+ return false;
+}
+
+bool
+ONearSearch::match(uint32_t docId)
+{
+ // Retrieve position iterators for each term.
+ doUnpack(docId);
+ for (size_t i = 0; i < _matchers.size(); ++i) {
+ if (_matchers[i].match(docId)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+} // queryeval
+} // search
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearsearch.h b/searchlib/src/vespa/searchlib/queryeval/nearsearch.h
new file mode 100644
index 00000000000..48a3af91a4d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/nearsearch.h
@@ -0,0 +1,157 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <boost/utility.hpp>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include "andsearch.h"
+
+namespace search {
+namespace queryeval {
+
+/**
+ * The near search base implements the common logic of the near and o-near search.
+ */
+class NearSearchBase : public AndSearch
+{
+protected:
+ uint32_t _data_size;
+ uint32_t _window;
+ bool _strict;
+
+ typedef search::fef::TermFieldMatchDataArray TermFieldMatchDataArray;
+
+ class MatcherBase
+ {
+ private:
+ uint32_t _window;
+ TermFieldMatchDataArray _inputs;
+ protected:
+ uint32_t window() const { return _window; }
+ const TermFieldMatchDataArray &inputs() const { return _inputs; }
+ public:
+ MatcherBase(uint32_t win, uint32_t fieldId, const TermFieldMatchDataArray &in)
+ : _window(win),
+ _inputs()
+ {
+ for (size_t i = 0; i < in.size(); ++i) {
+ if (in[i]->getFieldId() == fieldId) {
+ _inputs.add(in[i]);
+ }
+ }
+ }
+ };
+
+ /**
+ * Typedef the list of positions iterators because it takes far too much space to write out :-)
+ */
+ typedef std::vector<search::fef::TermFieldMatchData::PositionsIterator> PositionsIteratorList;
+
+ /**
+ * Returns whether or not given document matches. This should only be called when all child terms are all
+ * at the same document.
+ *
+ * @param docId The document for which we are checking.
+ * @return True if the document matches.
+ */
+ virtual bool match(uint32_t docId) = 0;
+
+ /**
+ * Performs seek() on all child terms until a match is found. This method calls setDocId() to signal the
+ * document found.
+ *
+ * @param docId The document id from which to start seeking.
+ */
+ void seekNext(uint32_t docId);
+
+public:
+ /**
+ * Constructs a new search for the given term match data.
+ *
+ * @param terms The iterators for all child terms.
+ * @param data The term match data objects for all child terms.
+ * @param window The size of the window in which all terms must occur.
+ * @param strict Whether or not to skip to next matching document if seek fails.
+ */
+ NearSearchBase(const Children & terms,
+ const TermFieldMatchDataArray &data,
+ uint32_t window,
+ bool strict);
+
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+
+ // Inherit doc from SearchIterator.
+ virtual void doSeek(uint32_t docId);
+};
+
+/**
+ * The near search matches only when all of its child terms occur within some given window size.
+ */
+class NearSearch : public NearSearchBase
+{
+private:
+ struct Matcher : public NearSearchBase::MatcherBase
+ {
+ Matcher(uint32_t win, uint32_t fieldId, const TermFieldMatchDataArray &in)
+ : MatcherBase(win, fieldId, in) {}
+ bool match(uint32_t docId);
+ };
+
+ std::vector<Matcher> _matchers;
+
+ // Inherit doc from NearSearchBase.
+ virtual bool match(uint32_t docId);
+
+public:
+ /**
+ * Constructs a new search for the given term match data.
+ *
+ * @param terms The iterators for all child terms.
+ * @param data The term match data objects for all child terms.
+ * @param window The size of the window in which all terms must occur.
+ * @param strict Whether or not to skip to next matching document if seek fails.
+ */
+ NearSearch(const Children & terms,
+ const TermFieldMatchDataArray &data,
+ uint32_t window,
+ bool strict = true);
+};
+
+/**
+ * The o-near search matches only when all of its child terms occur within some given window size, in the
+ * same order as they appear as children of this.
+ */
+class ONearSearch : public NearSearchBase
+{
+private:
+ struct Matcher : public NearSearchBase::MatcherBase
+ {
+ Matcher(uint32_t win, uint32_t fieldId, const TermFieldMatchDataArray &in)
+ : MatcherBase(win, fieldId, in) {}
+ bool match(uint32_t docId);
+ };
+
+ std::vector<Matcher> _matchers;
+
+ // Inherit doc from NearSearchBase.
+ virtual bool match(uint32_t docId);
+
+public:
+ /**
+ * Constructs a new search for the given term match data.
+ *
+ * @param terms The iterators for all child terms.
+ * @param data The term match data objects for all child terms.
+ * @param window The size of the window in which all terms must occur.
+ * @param strict Whether or not to skip to next matching document if seek fails.
+ */
+ ONearSearch(const Children & terms,
+ const TermFieldMatchDataArray &data,
+ uint32_t window,
+ bool strict = true);
+
+};
+
+} // queryeval
+} // search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/orlikesearch.h b/searchlib/src/vespa/searchlib/queryeval/orlikesearch.h
new file mode 100644
index 00000000000..356519ea5e8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/orlikesearch.h
@@ -0,0 +1,73 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/objects/visit.h>
+#include "orsearch.h"
+
+namespace search {
+namespace queryeval {
+
+/**
+ * A simple implementation of the Or search operation.
+ **/
+template <bool strict, typename Unpack>
+class OrLikeSearch : public OrSearch
+{
+protected:
+ void doSeek(uint32_t docid) override {
+ const Children & children(getChildren());
+ for (uint32_t i = 0; i < children.size(); ++i) {
+ if (children[i]->seek(docid)) {
+ setDocId(docid);
+ return;
+ }
+ }
+ if (strict) {
+ uint32_t minNextId = children[0]->getDocId();
+ for (uint32_t i = 1; i < children.size(); ++i) {
+ if (children[i]->getDocId() < minNextId) {
+ minNextId = children[i]->getDocId();
+ }
+ }
+ setDocId(minNextId);
+ }
+ }
+ Trinary is_strict() const override { return strict ? Trinary::True : Trinary::False; }
+ void visitMembers(vespalib::ObjectVisitor &visitor) const override {
+ MultiSearch::visitMembers(visitor);
+ visit(visitor, "strict", strict);
+ }
+
+public:
+ /**
+ * Create a new Or Search with the given children. A strict Or
+ * can assume that all children below are also strict. A
+ * non-strict Or has no strictness assumptions about its children.
+ *
+ * @param children the search objects we are or'ing
+ **/
+ OrLikeSearch(const Children &children, const Unpack & unpacker) :
+ OrSearch(children),
+ _unpacker(unpacker)
+ { }
+private:
+ virtual void onRemove(size_t index) {
+ _unpacker.onRemove(index);
+ }
+ virtual void onInsert(size_t index) {
+ _unpacker.onInsert(index);
+ }
+ virtual void doUnpack(uint32_t docid) {
+ _unpacker.unpack(docid, *this);
+ }
+ virtual bool needUnpack(size_t index) const {
+ return _unpacker.needUnpack(index);
+ }
+ Unpack _unpacker;
+};
+
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp b/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp
new file mode 100644
index 00000000000..9342727100e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp
@@ -0,0 +1,119 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "orsearch.h"
+#include "orlikesearch.h"
+
+namespace search {
+namespace queryeval {
+
+namespace {
+
+class FullUnpack
+{
+public:
+ void unpack(uint32_t docid, MultiSearch & search) {
+ const MultiSearch::Children & children(search.getChildren());
+ size_t sz(children.size());
+ for (size_t i(0); i < sz; ) {
+ if (__builtin_expect(children[i]->getDocId() < docid, false)) {
+ children[i]->doSeek(docid);
+ if (children[i]->getDocId() == search::endDocId) {
+ sz = deactivate(search, i);
+ continue;
+ }
+ }
+ if (__builtin_expect(children[i]->getDocId() == docid, false)) {
+ children[i]->doUnpack(docid);
+ }
+ i++;
+ }
+ }
+ void onRemove(size_t index) { (void) index; }
+ void onInsert(size_t index) { (void) index; }
+ bool needUnpack(size_t index) const { (void) index; return true; }
+private:
+ static size_t deactivate(MultiSearch &children, size_t idx);
+};
+
+size_t
+FullUnpack::deactivate(MultiSearch & search, size_t idx)
+{
+ search.remove(idx);
+ return search.getChildren().size();
+}
+
+class SelectiveUnpack
+{
+public:
+ SelectiveUnpack(const UnpackInfo & unpackInfo) :
+ _unpackInfo(unpackInfo)
+ { }
+ void unpack(uint32_t docid, const MultiSearch & search) {
+ auto &children = search.getChildren();
+ _unpackInfo.each([&children,docid](size_t i) {
+ SearchIterator &child = *children[i];
+ if (__builtin_expect(child.getDocId() < docid, false)) {
+ child.doSeek(docid);
+ }
+ if (__builtin_expect(child.getDocId() == docid, false)) {
+ child.doUnpack(docid);
+ }
+ }, children.size());
+ }
+ void onRemove(size_t index) {
+ _unpackInfo.remove(index);
+ }
+ void onInsert(size_t index) {
+ _unpackInfo.insert(index);
+ }
+ bool needUnpack(size_t index) const {
+ return _unpackInfo.needUnpack(index);
+ }
+private:
+ UnpackInfo _unpackInfo;
+};
+
+}
+
+BitVector::UP
+OrSearch::get_hits(uint32_t begin_id) {
+ const Children &children = getChildren();
+ BitVector::UP result = children.front()->get_hits(begin_id);
+ for (size_t i = 1; i < children.size(); ++i) {
+ children[i]->or_hits_into(*result, begin_id);
+ }
+ return result;
+}
+
+SearchIterator *
+OrSearch::create(const MultiSearch::Children &children, bool strict) {
+ UnpackInfo unpackInfo;
+ unpackInfo.forceAll();
+ return create(children, strict, unpackInfo);
+}
+
+SearchIterator *
+OrSearch::create(const MultiSearch::Children &children, bool strict, const UnpackInfo & unpackInfo) {
+ (void) unpackInfo;
+ if (strict) {
+ if (unpackInfo.unpackAll()) {
+ return new OrLikeSearch<true, FullUnpack>(children, FullUnpack());
+ } else if(unpackInfo.empty()) {
+ return new OrLikeSearch<true, NoUnpack>(children, NoUnpack());
+ } else {
+ return new OrLikeSearch<true, SelectiveUnpack>(children, SelectiveUnpack(unpackInfo));
+ }
+ } else {
+ if (unpackInfo.unpackAll()) {
+ return new OrLikeSearch<false, FullUnpack>(children, FullUnpack());
+ } else if(unpackInfo.empty()) {
+ return new OrLikeSearch<false, NoUnpack>(children, NoUnpack());
+ } else {
+ return new OrLikeSearch<false, SelectiveUnpack>(children, SelectiveUnpack(unpackInfo));
+ }
+ }
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/orsearch.h b/searchlib/src/vespa/searchlib/queryeval/orsearch.h
new file mode 100644
index 00000000000..353276da932
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/orsearch.h
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "multisearch.h"
+#include "unpackinfo.h"
+
+namespace search {
+namespace queryeval {
+
+/**
+ * A simple implementation of the Or search operation.
+ **/
+class OrSearch : public MultiSearch
+{
+public:
+ typedef MultiSearch::Children Children;
+
+ // Caller takes ownership of the returned SearchIterator.
+ static SearchIterator *create(const Children &children, bool strict);
+ static SearchIterator *create(const Children &children, bool strict, const UnpackInfo & unpackInfo);
+
+ BitVector::UP get_hits(uint32_t begin_id) override;
+
+protected:
+ OrSearch(const Children & children) : MultiSearch(children) { }
+private:
+ virtual bool isOr() const { return true; }
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/posting_info.h b/searchlib/src/vespa/searchlib/queryeval/posting_info.h
new file mode 100644
index 00000000000..087e0fb62d7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/posting_info.h
@@ -0,0 +1,45 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+
+namespace search {
+namespace queryeval {
+
+/**
+ * Interface for getting global information stored in underlying posting list
+ * used by a search iterator.
+ *
+ * Subclasses of this interface will expose different information that can be
+ * used during evaluation.
+ */
+struct PostingInfo {
+ virtual ~PostingInfo() {}
+};
+
+
+/**
+ * Class for getting the min and max weights of a posting list.
+ *
+ * Such posting lists store a weight with each doc id and maintain the min and
+ * max weights among the whole posting list.
+ */
+class MinMaxPostingInfo : public PostingInfo {
+private:
+ int32_t _minWeight;
+ int32_t _maxWeight;
+
+public:
+ typedef std::unique_ptr<MinMaxPostingInfo> UP;
+ typedef std::shared_ptr<MinMaxPostingInfo> SP;
+ MinMaxPostingInfo(int32_t minWeight, int32_t maxWeight)
+ : PostingInfo(),
+ _minWeight(minWeight),
+ _maxWeight(maxWeight)
+ {}
+ int32_t getMinWeight() const { return _minWeight; }
+ int32_t getMaxWeight() const { return _maxWeight; }
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp
new file mode 100644
index 00000000000..70903788992
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp
@@ -0,0 +1,345 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+
+#include "predicate_blueprint.h"
+#include <vespa/searchlib/predicate/predicate_bounds_posting_list.h>
+#include <vespa/searchlib/predicate/predicate_interval_posting_list.h>
+#include <vespa/searchlib/predicate/predicate_zero_constraint_posting_list.h>
+#include <vespa/searchlib/predicate/predicate_zstar_compressed_posting_list.h>
+#include <vespa/searchlib/attribute/predicate_attribute.h>
+#include <vespa/searchlib/query/tree/predicate_query_term.h>
+#include <vespa/searchlib/query/tree/termnodes.h>
+#include <vespa/searchlib/predicate/predicate_hash.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".predicate_blueprint");
+#include <vespa/searchlib/predicate/predicate_range_term_expander.h>
+
+using search::query::PredicateQuery;
+using search::query::PredicateQueryTerm;
+using std::make_pair;
+using std::pair;
+using std::vector;
+using vespalib::string;
+using namespace search::predicate;
+
+namespace search {
+namespace queryeval {
+
+namespace {
+typedef PredicateBlueprint::IntervalEntry IntervalEntry;
+typedef PredicateBlueprint::BoundsEntry BoundsEntry;
+
+template <typename Entry>
+void pushValueDictionaryEntry(const Entry &entry,
+ const SimpleIndex<btree::EntryRef> &interval_index,
+ vector<IntervalEntry> &interval_entries) {
+ const std::string &hash_str = entry.getKey() + "=" + entry.getValue();
+ uint64_t feature = PredicateHash::hash64(hash_str);
+ auto iterator = interval_index.lookup(feature);
+ if (iterator.valid()) {
+ size_t sz = interval_index.getPostingListSize(iterator.getData());
+ LOG(debug, "postinglist(%s) = (%d).size = %ld", hash_str.c_str(), iterator.getData().ref(), sz);
+ interval_entries.push_back({iterator.getData(), entry.getSubQueryBitmap(), sz, feature});
+ }
+}
+
+struct MyRangeHandler {
+ const SimpleIndex<btree::EntryRef> &interval_index;
+ const SimpleIndex<btree::EntryRef> &bounds_index;
+ vector<IntervalEntry> &interval_entries;
+ vector<BoundsEntry> &bounds_entries;
+ uint64_t subquery_bitmap;
+
+ void handleRange(const string &label) {
+ uint64_t feature = PredicateHash::hash64(label);
+ auto iterator = interval_index.lookup(feature);
+ if (iterator.valid()) {
+ size_t sz = interval_index.getPostingListSize(iterator.getData());
+ interval_entries.push_back({iterator.getData(), subquery_bitmap, sz, feature});
+ }
+ }
+ void handleEdge(const string &label, uint32_t value) {
+ uint64_t feature = PredicateHash::hash64(label);
+ auto iterator = bounds_index.lookup(feature);
+ if (iterator.valid()) {
+ size_t sz = bounds_index.getPostingListSize(iterator.getData());
+ bounds_entries.push_back({iterator.getData(), value, subquery_bitmap, sz, feature});
+ }
+ }
+};
+
+template <typename Entry>
+void pushRangeDictionaryEntries(
+ const Entry &entry,
+ const PredicateIndex &index,
+ vector<IntervalEntry> &interval_entries,
+ vector<BoundsEntry> &bounds_entries) {
+ PredicateRangeTermExpander expander(index.getArity());
+ MyRangeHandler handler{index.getIntervalIndex(), index.getBoundsIndex(), interval_entries,
+ bounds_entries, entry.getSubQueryBitmap()};
+ expander.expand(entry.getKey(), entry.getValue(), handler);
+}
+
+void pushZStarPostingList(const SimpleIndex<btree::EntryRef> &interval_index,
+ vector<IntervalEntry> &interval_entries) {
+ uint64_t feature = PredicateIndex::z_star_hash;
+ auto iterator = interval_index.lookup(feature);
+ if (iterator.valid()) {
+ size_t sz = interval_index.getPostingListSize(iterator.getData());
+ interval_entries.push_back({iterator.getData(), UINT64_MAX, sz, feature});
+ }
+}
+
+} // namespace
+
+void PredicateBlueprint::addPostingToK(uint64_t feature)
+{
+ const auto &interval_index = _index.getIntervalIndex();
+ auto tmp = interval_index.lookup(feature);
+ if (__builtin_expect(tmp.valid() && (_cachedFeatures.find(feature) == _cachedFeatures.end()), true)) {
+ uint8_t *kVBase = &_kV[0];
+ size_t kVSize = _kV.size();
+ interval_index.foreach_frozen_key(
+ tmp.getData(),
+ feature,
+ [=](uint32_t doc_id)
+ {
+ if (__builtin_expect(doc_id < kVSize, true)) {
+ ++kVBase[doc_id];
+ }
+ });
+ }
+}
+
+void PredicateBlueprint::addBoundsPostingToK(uint64_t feature)
+{
+ const auto &bounds_index = _index.getBoundsIndex();
+ auto tmp = bounds_index.lookup(feature);
+ if (__builtin_expect(tmp.valid(), true)) {
+ uint8_t *kVBase = &_kV[0];
+ size_t kVSize = _kV.size();
+ bounds_index.foreach_frozen_key(
+ tmp.getData(),
+ feature,
+ [=](uint32_t doc_id)
+ {
+ if (__builtin_expect(doc_id < kVSize, true)) {
+ ++kVBase[doc_id];
+ }
+ });
+ }
+}
+
+void PredicateBlueprint::addZeroConstraintToK()
+{
+ uint8_t *kVBase = &_kV[0];
+ size_t kVSize = _kV.size();
+ _index.getZeroConstraintDocs().foreach_key(
+ [=](uint32_t doc_id)
+ {
+ if (__builtin_expect(doc_id < kVSize, true)) {
+ ++kVBase[doc_id];
+ }
+ });
+}
+
+PredicateBlueprint::PredicateBlueprint(const FieldSpecBase &field,
+ const PredicateAttribute & attribute,
+ const PredicateQuery &query)
+ : ComplexLeafBlueprint(field),
+ _attribute(attribute),
+ _index(predicate_attribute().getIndex()),
+ _kVBacking(),
+ _kV(nullptr, 0),
+ _cachedFeatures(),
+ _interval_dict_entries(),
+ _bounds_dict_entries(),
+ _zstar_dict_entry(),
+ _interval_btree_iterators(),
+ _interval_vector_iterators(),
+ _bounds_btree_iterators(),
+ _bounds_vector_iterators(),
+ _zstar_btree_iterator(),
+ _zstar_vector_iterator()
+{
+ const auto &interval_index = _index.getIntervalIndex();
+ const auto zero_constraints_docs = _index.getZeroConstraintDocs();
+ const PredicateQueryTerm &term = *query.getTerm();
+ for (const auto &entry : term.getFeatures()) {
+ pushValueDictionaryEntry(entry, interval_index, _interval_dict_entries);
+ }
+ for (const auto &entry : term.getRangeFeatures()) {
+ pushRangeDictionaryEntries(entry, _index, _interval_dict_entries,
+ _bounds_dict_entries);
+ }
+ pushZStarPostingList(interval_index, _interval_dict_entries);
+
+ BitVectorCache::KeyAndCountSet keys;
+ keys.reserve(_interval_dict_entries.size());
+ for (const auto & e : _interval_dict_entries) {
+ keys.push_back({e.feature, e.size});
+ }
+ _cachedFeatures = _index.lookupCachedSet(keys);
+
+ auto it = interval_index.lookup(PredicateIndex::z_star_compressed_hash);
+ if (it.valid()) {
+ _zstar_dict_entry = it.getData();
+ }
+
+ std::sort(_interval_dict_entries.begin(), _interval_dict_entries.end(),
+ [&] (const auto & a, const auto & b) {
+ return a.size > b.size;
+ });
+
+ std::sort(_bounds_dict_entries.begin(), _bounds_dict_entries.end(),
+ [&] (const auto & a, const auto & b) {
+ return a.size > b.size;
+ });
+
+
+ if (zero_constraints_docs.size() == 0 &&
+ _interval_dict_entries.empty() && _bounds_dict_entries.empty() &&
+ !_zstar_dict_entry.valid()) {
+ setEstimate(HitEstimate(0, true));
+ } else {
+ setEstimate(HitEstimate(static_cast<uint32_t>(zero_constraints_docs.size()), false));
+ }
+}
+
+namespace {
+
+ template<typename DictEntry, typename VectorIteratorEntry, typename BTreeIteratorEntry>
+ void lookupPostingLists(const std::vector<DictEntry> &dict_entries,
+ std::vector<VectorIteratorEntry> &vector_iterators,
+ std::vector<BTreeIteratorEntry> &btree_iterators,
+ const SimpleIndex<btree::EntryRef> &index)
+ {
+ for (const auto &entry : dict_entries) {
+ auto vector_iterator = index.getVectorPostingList(entry.feature);
+ if (vector_iterator) {
+ vector_iterators.push_back(VectorIteratorEntry{*vector_iterator, entry});
+ } else {
+ auto btree_iterator = index.getBTreePostingList(entry.entry_ref);
+ btree_iterators.push_back(BTreeIteratorEntry{btree_iterator, entry});
+ }
+ }
+
+ };
+
+}
+
+void PredicateBlueprint::fetchPostings(bool) {
+ const auto &interval_index = _index.getIntervalIndex();
+ const auto &bounds_index = _index.getBoundsIndex();
+ lookupPostingLists(_interval_dict_entries, _interval_vector_iterators,
+ _interval_btree_iterators, interval_index);
+ lookupPostingLists(_bounds_dict_entries, _bounds_vector_iterators,
+ _bounds_btree_iterators, bounds_index);
+
+ // Lookup zstar interval iterator
+ if (_zstar_dict_entry.valid()) {
+ auto vector_iterator = interval_index.getVectorPostingList(
+ PredicateIndex::z_star_compressed_hash);
+ if (vector_iterator) {
+ _zstar_vector_iterator.emplace(std::move(*vector_iterator));
+ } else {
+ _zstar_btree_iterator.emplace(interval_index.getBTreePostingList(_zstar_dict_entry));
+ }
+ }
+
+ PredicateAttribute::MinFeatureHandle mfh = predicate_attribute().getMinFeatureVector();
+ vespalib::DefaultAlloc kv(mfh.second);
+ _kVBacking.swap(kv);
+ _kV = BitVectorCache::CountVector(static_cast<uint8_t *>(_kVBacking.get()), mfh.second);
+ _index.computeCountVector(_cachedFeatures, _kV);
+ for (const auto & entry : _bounds_dict_entries) {
+ addBoundsPostingToK(entry.feature);
+ }
+ for (const auto & entry : _interval_dict_entries) {
+ addPostingToK(entry.feature);
+ }
+ addPostingToK(PredicateIndex::z_star_compressed_hash);
+ addZeroConstraintToK();
+}
+
+SearchIterator::UP
+PredicateBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool) const {
+ const auto &attribute = predicate_attribute();
+ PredicateAttribute::MinFeatureHandle mfh = attribute.getMinFeatureVector();
+ auto interval_range_vector = attribute.getIntervalRangeVector();
+ auto max_interval_range = attribute.getMaxIntervalRange();
+ return SearchIterator::UP(new PredicateSearch(mfh.first, interval_range_vector, max_interval_range, _kV,
+ createPostingLists(), tfmda));
+}
+
+namespace {
+
+ template<typename IteratorEntry, typename PostingListFactory>
+ void createPredicatePostingLists(const std::vector<IteratorEntry> &iterator_entries,
+ std::vector<PredicatePostingList::UP> &posting_lists,
+ PostingListFactory posting_list_factory)
+ {
+ for (const auto &entry : iterator_entries) {
+ if (entry.iterator.valid()) {
+ auto posting_list = posting_list_factory(entry);
+ posting_list->setSubquery(entry.entry.subquery);
+ posting_lists.emplace_back(PredicatePostingList::UP(posting_list));
+ }
+ }
+ }
+
+}
+
+std::vector<PredicatePostingList::UP> PredicateBlueprint::createPostingLists() const {
+ size_t total_size = _interval_btree_iterators.size() + _interval_vector_iterators.size() +
+ _bounds_btree_iterators.size() + _bounds_vector_iterators.size() + 2;
+ std::vector<PredicatePostingList::UP> posting_lists;
+ posting_lists.reserve(total_size);
+ const auto &interval_store = _index.getIntervalStore();
+
+ createPredicatePostingLists(
+ _interval_vector_iterators, posting_lists,
+ [&] (const IntervalIteratorEntry<VectorIterator> &entry) {
+ return new PredicateIntervalPostingList<VectorIterator>(interval_store, entry.iterator);
+ });
+
+ createPredicatePostingLists(
+ _interval_btree_iterators, posting_lists,
+ [&] (const IntervalIteratorEntry<BTreeIterator> &entry) {
+ return new PredicateIntervalPostingList<BTreeIterator>(interval_store, entry.iterator);
+ });
+
+ createPredicatePostingLists(
+ _bounds_vector_iterators, posting_lists,
+ [&] (const BoundsIteratorEntry<VectorIterator> &entry) {
+ return new PredicateBoundsPostingList<VectorIterator>(interval_store, entry.iterator,
+ entry.entry.value_diff);
+ });
+
+ createPredicatePostingLists(
+ _bounds_btree_iterators, posting_lists,
+ [&] (const BoundsIteratorEntry<BTreeIterator> &entry) {
+ return new PredicateBoundsPostingList<BTreeIterator>(interval_store, entry.iterator,
+ entry.entry.value_diff);
+ });
+
+ if (_zstar_vector_iterator && _zstar_vector_iterator->valid()) {
+ auto posting_list = PredicatePostingList::UP(
+ new PredicateZstarCompressedPostingList<VectorIterator>(interval_store, *_zstar_vector_iterator));
+ posting_lists.emplace_back(std::move(posting_list));
+ } else if (_zstar_btree_iterator && _zstar_btree_iterator->valid()) {
+ auto posting_list = PredicatePostingList::UP(
+ new PredicateZstarCompressedPostingList<BTreeIterator>(interval_store, *_zstar_btree_iterator));
+ posting_lists.emplace_back(std::move(posting_list));
+ }
+ auto iterator = _index.getZeroConstraintDocs().begin();
+ if (iterator.valid()) {
+ auto posting_list = PredicatePostingList::UP(new PredicateZeroConstraintPostingList(iterator));
+ posting_lists.emplace_back(std::move(posting_list));
+ }
+ return posting_lists;
+}
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h
new file mode 100644
index 00000000000..aeab9d4175f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h
@@ -0,0 +1,94 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "blueprint.h"
+#include "predicate_search.h"
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/predicate_attribute.h>
+#include <vespa/searchlib/btree/entryref.h>
+#include <vector>
+#include <memory>
+
+namespace search {
+namespace query { class PredicateQuery; }
+
+namespace queryeval {
+/**
+ * Blueprint for building predicate searches. It builds search
+ * iterators based on PredicateSearch.
+ */
+class PredicateBlueprint : public ComplexLeafBlueprint {
+public:
+ struct IntervalEntry {
+ btree::EntryRef entry_ref;
+ uint64_t subquery;
+ size_t size;
+ uint64_t feature;
+ };
+ struct BoundsEntry {
+ btree::EntryRef entry_ref;
+ uint32_t value_diff;
+ uint64_t subquery;
+ size_t size;
+ uint64_t feature;
+ };
+ template<typename I>
+ struct IntervalIteratorEntry {
+ I iterator;
+ const IntervalEntry &entry;
+ };
+ template<typename I>
+ struct BoundsIteratorEntry {
+ I iterator;
+ const BoundsEntry &entry;
+ };
+
+ PredicateBlueprint(const FieldSpecBase &field,
+ const PredicateAttribute & attribute,
+ const query::PredicateQuery &query);
+
+ void fetchPostings(bool strict) override;
+
+ SearchIterator::UP
+ createLeafSearch(const fef::TermFieldMatchDataArray &tfmda,
+ bool strict) const override;
+private:
+ using BTreeIterator = predicate::SimpleIndex<btree::EntryRef>::BTreeIterator;
+ using VectorIterator = predicate::SimpleIndex<btree::EntryRef>::VectorIterator;
+ template <typename T>
+ using optional = std::experimental::optional<T>;
+
+ const PredicateAttribute & predicate_attribute() const {
+ return _attribute;
+ }
+ PredicateAttribute & predicate_attribute() {
+ return const_cast<PredicateAttribute &>(_attribute);
+ }
+ void addBoundsPostingToK(uint64_t feature);
+ void addPostingToK(uint64_t feature);
+ void addZeroConstraintToK();
+ std::vector<predicate::PredicatePostingList::UP> createPostingLists() const;
+
+ const PredicateAttribute & _attribute;
+ const predicate::PredicateIndex &_index;
+ vespalib::DefaultAlloc _kVBacking;
+ BitVectorCache::CountVector _kV;
+ BitVectorCache::KeySet _cachedFeatures;
+
+ std::vector<IntervalEntry> _interval_dict_entries;
+ std::vector<BoundsEntry> _bounds_dict_entries;
+ btree::EntryRef _zstar_dict_entry;
+
+ std::vector<IntervalIteratorEntry<BTreeIterator>> _interval_btree_iterators;
+ std::vector<IntervalIteratorEntry<VectorIterator>> _interval_vector_iterators;
+ std::vector<BoundsIteratorEntry<BTreeIterator>> _bounds_btree_iterators;
+ std::vector<BoundsIteratorEntry<VectorIterator>> _bounds_vector_iterators;
+ // The zstar iterator is either a vector or a btree iterator.
+ optional<BTreeIterator> _zstar_btree_iterator;
+ optional<VectorIterator> _zstar_vector_iterator;
+};
+
+} // namespace search::queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_search.cpp b/searchlib/src/vespa/searchlib/queryeval/predicate_search.cpp
new file mode 100644
index 00000000000..91815f5f9ca
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/predicate_search.cpp
@@ -0,0 +1,310 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/log/log.h>
+LOG_SETUP(".predicate_search");
+#include <vespa/fastos/fastos.h>
+
+#include "predicate_search.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <algorithm>
+
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataArray;
+using std::vector;
+using namespace search::predicate;
+
+namespace search {
+
+using predicate::MIN_INTERVAL;
+using predicate::MAX_INTERVAL;
+
+namespace queryeval {
+
+namespace {
+
+class SkipMinFeatureSSE2 : public SkipMinFeature
+{
+public:
+ SkipMinFeatureSSE2(const uint8_t * min_feature, const uint8_t * kv, size_t sz);
+private:
+ typedef char v16u8 __attribute__((vector_size(16)));
+ VESPA_DLL_LOCAL uint32_t next() override;
+ uint32_t cmp32(size_t j) {
+ v16u8 r0 = _kv[j*2] >= _min_feature[j*2];
+ v16u8 r1 = _kv[j*2+1] >= _min_feature[j*2+1];
+ return __builtin_ia32_pmovmskb128(r0) | (__builtin_ia32_pmovmskb128(r1) << 16);
+ }
+ VESPA_DLL_LOCAL void advance();
+ const v16u8 * _min_feature;
+ const v16u8 * _kv;
+ uint32_t _sz;
+ uint32_t _chunk;
+ uint32_t _last32;
+};
+
+SkipMinFeatureSSE2::SkipMinFeatureSSE2(const uint8_t * min_feature, const uint8_t * kv, size_t sz) :
+ _min_feature(reinterpret_cast<const v16u8 *>(min_feature)),
+ _kv(reinterpret_cast<const v16u8 *>(kv)),
+ _sz(sz),
+ _chunk(0),
+ _last32(0)
+{
+ advance();
+ if (_chunk == 1) {
+ _last32 &= ~0x1;
+ }
+}
+
+void
+SkipMinFeatureSSE2::advance()
+{
+ for (;(_last32 == 0) && (_chunk < (_sz>>5)); _last32 = cmp32(_chunk++));
+ if (_last32 == 0) {
+ const uint8_t * min_feature = reinterpret_cast<const uint8_t *>(_min_feature);
+ const uint8_t * kv = reinterpret_cast<const uint8_t *>(_kv);
+ for (size_t i(_chunk << 5); i < _sz; i++) {
+ if (kv[i] >= min_feature[i]) {
+ _last32 |= 1 << (i - (_chunk << 5));
+ }
+ }
+ _chunk++;
+ }
+}
+
+uint32_t
+SkipMinFeatureSSE2::next()
+{
+ if (__builtin_expect(_last32 == 0, true)) {
+ advance();
+ }
+ if (_last32) {
+ uint32_t n = vespalib::Optimized::lsbIdx(_last32);
+ _last32 &= ~(1 << n);
+ n += ((_chunk - 1) << 5);
+ return n < _sz ? n : -1;
+ } else {
+ return -1;
+ }
+}
+
+}
+
+SkipMinFeature::UP
+SkipMinFeature::create(const uint8_t * min_feature, const uint8_t * kv, size_t sz)
+{
+ return UP(new SkipMinFeatureSSE2(min_feature, kv, sz));
+}
+
+PredicateSearch::PredicateSearch(const uint8_t * minFeatureVector,
+ const IntervalRange * interval_range_vector,
+ IntervalRange max_interval_range,
+ CondensedBitVector::CountVector kV,
+ vector<PredicatePostingList::UP> posting_lists,
+ const fef::TermFieldMatchDataArray &tfmda)
+ : _skip(SkipMinFeature::create(minFeatureVector, &kV[0], kV.size())),
+ _posting_lists(std::move(posting_lists)),
+ _sorted_indexes(_posting_lists.size()),
+ _sorted_indexes_merge_buffer(_posting_lists.size()),
+ _doc_ids(_posting_lists.size()),
+ _intervals(_posting_lists.size()),
+ _subqueries(_posting_lists.size()),
+ _subquery_markers(new uint64_t[max_interval_range+1]),
+ _visited(new bool[max_interval_range+1]),
+ _termFieldMatchData(tfmda.valid()? tfmda[0] : nullptr),
+ _min_feature_vector(minFeatureVector),
+ _interval_range_vector(interval_range_vector),
+ _max_interval_range(max_interval_range)
+{
+
+ for (size_t i = 0; i < _posting_lists.size(); ++i) {
+ _sorted_indexes[i] = i;
+ _doc_ids[i] = _posting_lists[i]->getDocId();
+ _subqueries[i] = _posting_lists[i]->getSubquery();
+ }
+}
+
+PredicateSearch::~PredicateSearch()
+{
+ delete [] _visited;
+ delete [] _subquery_markers;
+}
+
+bool PredicateSearch::advanceOneTo(uint32_t doc_id, size_t index) {
+ size_t i = _sorted_indexes[index];
+ if (__builtin_expect(_posting_lists[i]->next(doc_id - 1), true)) {
+ _doc_ids[i] = _posting_lists[i]->getDocId();
+ return true;
+ }
+ _doc_ids[i] = UINT32_MAX; // will be last after sorting.
+ return false;
+}
+
+namespace {
+template <typename CompareType>
+void sort_indexes(uint16_t *indexes, size_t size, CompareType *values) {
+ std::sort(indexes, indexes + size,
+ [&] (uint16_t a, uint16_t b) { return values[a] < values[b]; });
+}
+} // namespace
+
+void PredicateSearch::advanceAllTo(uint32_t doc_id) {
+ size_t i = 0;
+ size_t completed_count = 0;
+ for (; i < _sorted_indexes.size() && _doc_ids[_sorted_indexes[i]] < doc_id; ++i) {
+ if (!advanceOneTo(doc_id, i)) {
+ ++completed_count;
+ }
+ }
+ if (__builtin_expect((i > 0) && ! _sorted_indexes.empty(), true)) {
+ sort_indexes(&_sorted_indexes[0], i, &_doc_ids[0]);
+ std::merge(
+ _sorted_indexes.begin(), _sorted_indexes.begin() + i,
+ _sorted_indexes.begin() + i, _sorted_indexes.end(),
+ _sorted_indexes_merge_buffer.begin(),
+ [&] (uint16_t a, uint16_t b) {
+ return _doc_ids[a] < _doc_ids[b];
+ });
+ _sorted_indexes.swap(_sorted_indexes_merge_buffer);
+ // After sorting and merging the completed indexes are at the end.
+ _sorted_indexes.resize(_sorted_indexes.size() - completed_count);
+ _sorted_indexes_merge_buffer.resize(_sorted_indexes.size());
+ }
+}
+
+
+namespace {
+bool isNotInterval(uint32_t begin, uint32_t end) {
+ return begin > end;
+}
+
+void markSubquery(uint32_t begin, uint32_t end, uint64_t subquery, uint64_t *subquery_markers, bool * visited) {
+ if (visited[begin]) {
+ visited[end] = true;
+ subquery_markers[end] |= subquery;
+ }
+}
+
+// Returns the semantic interval end - or UINT32_MAX if no interval cover is possible
+uint32_t addInterval(uint32_t interval, uint64_t subquery,
+ uint64_t *subquery_markers, bool * visited, uint32_t highest_end_seen) {
+ uint32_t begin = interval >> 16;
+ uint32_t end = interval & 0xffff;
+
+ if (isNotInterval(begin, end)) {
+ // Note: End and begin values are swapped for zStar intervals
+ if (highest_end_seen < end) return UINT32_MAX;
+ markSubquery(end, begin, ~(subquery_markers[end]), subquery_markers, visited);
+ return begin;
+ } else {
+ if (highest_end_seen < begin - 1) return UINT32_MAX;
+ markSubquery(begin - 1, end, subquery_markers[begin - 1] & subquery, subquery_markers, visited);
+ return end;
+ }
+}
+void restoreSortedOrder(size_t first, size_t last,
+ vector<uint16_t> &indexes,
+ const vector<uint32_t> &intervals) __attribute__((noinline));
+
+// One step of insertion sort: First element is moved to correct position.
+void restoreSortedOrder(size_t first, size_t last,
+ vector<uint16_t> &indexes,
+ const vector<uint32_t> &intervals) {
+ uint32_t interval_to_move = intervals[indexes[first]];
+ uint16_t index_to_move = indexes[first];
+ while (++first < last && interval_to_move > intervals[indexes[first]]) {
+ indexes[first - 1] = indexes[first];
+ }
+ indexes[first - 1] = index_to_move;
+}
+
+} // namespace
+
+bool PredicateSearch::evaluateHit(uint32_t doc_id, uint32_t k) {
+ size_t candidates = sortIntervals(doc_id, k);
+
+ size_t interval_end = _interval_range_vector[doc_id];
+ memset(_subquery_markers, 0, sizeof(uint64_t) * (interval_end + 1));
+ memset(_visited, false, sizeof(bool) * (interval_end + 1));
+ _subquery_markers[0] = UINT64_MAX;
+ _visited[0] = true;
+
+ uint32_t highest_end_seen = 1;
+ for (size_t i = 0; i < candidates; ) {
+ size_t index = _sorted_indexes[i];
+ uint32_t last_end_seen = addInterval(
+ _intervals[index], _subqueries[index], _subquery_markers, _visited, highest_end_seen);
+ if (last_end_seen == UINT32_MAX) {
+ return false;
+ }
+ highest_end_seen = std::max(last_end_seen, highest_end_seen);
+ if (_posting_lists[index]->nextInterval()) {
+ _intervals[index] = _posting_lists[index]->getInterval();
+ restoreSortedOrder(i, candidates, _sorted_indexes, _intervals);
+ } else {
+ ++i;
+ }
+ }
+ return _subquery_markers[interval_end] != 0;
+}
+
+size_t PredicateSearch::sortIntervals(uint32_t doc_id, uint32_t k) {
+ size_t candidates = k + 1;
+ for (size_t i = candidates; i < _sorted_indexes.size(); ++i) {
+ if (_doc_ids[_sorted_indexes[i]] == doc_id) {
+ ++candidates;
+ } else {
+ break;
+ }
+ }
+ for (size_t i = 0; i < candidates; i++) {
+ _intervals[_sorted_indexes[i]] = _posting_lists[_sorted_indexes[i]]->getInterval();
+ }
+ sort_indexes(&_sorted_indexes[0], candidates, &_intervals[0]);
+ return candidates;
+}
+
+void PredicateSearch::skipMinFeature(uint32_t doc_id_in)
+{
+ uint32_t doc_id;
+ for (doc_id = _skip->next(); doc_id < doc_id_in; doc_id = _skip->next());
+
+ if (__builtin_expect( ! isAtEnd(doc_id), true)) {
+ advanceAllTo(doc_id);
+ } else {
+ setAtEnd();
+ }
+}
+
+void PredicateSearch::doSeek(uint32_t doc_id) {
+ skipMinFeature(doc_id);
+ while (!_sorted_indexes.empty() && ! isAtEnd()) {
+ uint32_t doc_id_0 = _doc_ids[_sorted_indexes[0]];
+ uint8_t min_feature = _min_feature_vector[doc_id_0];
+ uint8_t k = static_cast<uint8_t>(min_feature == 0 ? 0 : min_feature - 1);
+ if (k < _sorted_indexes.size()) {
+ uint32_t doc_id_k = _doc_ids[_sorted_indexes[k]];
+ if (doc_id_0 == doc_id_k) {
+ if (evaluateHit(doc_id_0, k)) {
+ setDocId(doc_id_0);
+ return;
+ }
+ }
+ }
+ skipMinFeature(doc_id_0 + 1);
+ }
+ setAtEnd();
+}
+
+void PredicateSearch::doUnpack(uint32_t doc_id) {
+ if (doc_id == getDocId()) {
+ if (_termFieldMatchData) {
+ auto end = _interval_range_vector[doc_id];
+ _termFieldMatchData
+ ->setSubqueries(doc_id, _subquery_markers[end]);
+ }
+ }
+}
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_search.h b/searchlib/src/vespa/searchlib/queryeval/predicate_search.h
new file mode 100644
index 00000000000..f1461eb76b0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/predicate_search.h
@@ -0,0 +1,71 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "searchiterator.h"
+#include <vespa/searchlib/predicate/predicate_posting_list.h>
+#include <vespa/searchlib/common/condensedbitvectors.h>
+#include <vespa/searchlib/predicate/predicate_tree_annotator.h>
+#include <vector>
+
+namespace search {
+namespace fef {
+class TermFieldMatchData;
+class TermFieldMatchDataArray;
+} // namespace fef
+namespace queryeval {
+
+
+
+class SkipMinFeature
+{
+public:
+ typedef std::unique_ptr<SkipMinFeature> UP;
+ virtual ~SkipMinFeature() { }
+ VESPA_DLL_LOCAL virtual uint32_t next() = 0;
+ static SkipMinFeature::UP create(const uint8_t * min_feature, const uint8_t * kv, size_t sz);
+};
+
+/**
+ * Search iterator implementing the interval algorithm for boolean
+ * search. It operates on PredicatePostingLists, as defined above.
+ */
+using IntervalRange = uint16_t;
+
+class PredicateSearch : public SearchIterator {
+ SkipMinFeature::UP _skip;
+ std::vector<predicate::PredicatePostingList::UP> _posting_lists;
+ std::vector<uint16_t> _sorted_indexes;
+ std::vector<uint16_t> _sorted_indexes_merge_buffer;
+ std::vector<uint32_t> _doc_ids;
+ std::vector<uint32_t> _intervals;
+ std::vector<uint64_t> _subqueries;
+ uint64_t *_subquery_markers;
+ bool * _visited;
+ fef::TermFieldMatchData *_termFieldMatchData;
+ const uint8_t * _min_feature_vector;
+ const IntervalRange * _interval_range_vector;
+ const IntervalRange _max_interval_range;
+
+ VESPA_DLL_LOCAL bool advanceOneTo(uint32_t doc_id, size_t index);
+ VESPA_DLL_LOCAL void advanceAllTo(uint32_t doc_id);
+ VESPA_DLL_LOCAL bool evaluateHit(uint32_t doc_id, uint32_t k);
+ VESPA_DLL_LOCAL size_t sortIntervals(uint32_t doc_id, uint32_t k);
+ VESPA_DLL_LOCAL void skipMinFeature(uint32_t doc_id) __attribute__((noinline));
+
+public:
+ PredicateSearch(const uint8_t * minFeature,
+ const IntervalRange * interval_range_vector,
+ IntervalRange max_interval_range,
+ CondensedBitVector::CountVector kV,
+ std::vector<predicate::PredicatePostingList::UP> posting_lists,
+ const fef::TermFieldMatchDataArray &tfmda);
+ ~PredicateSearch();
+
+ void doSeek(uint32_t doc_id) override;
+ void doUnpack(uint32_t doc_id) override;
+};
+
+} // namespace search::queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/ranksearch.cpp b/searchlib/src/vespa/searchlib/queryeval/ranksearch.cpp
new file mode 100644
index 00000000000..58d61d83d08
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/ranksearch.cpp
@@ -0,0 +1,64 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "ranksearch.h"
+
+namespace search {
+namespace queryeval {
+
+void
+RankSearch::doSeek(uint32_t docid)
+{
+ SearchIterator & firstChild(**getChildren().begin());
+ if (firstChild.seek(docid)) {
+ setDocId(docid);
+ }
+}
+
+namespace {
+/**
+ * A simple implementation of the strict Rank search operation.
+ **/
+class RankSearchStrict : public RankSearch
+{
+protected:
+ void doSeek(uint32_t docid) override;
+ UP andWith(UP filter, uint32_t estimate) override;;
+
+public:
+ /**
+ * Create a new Rank Search with the given children and
+ * strictness. A strict Rank can assume that the first child below
+ * is also strict. No such assumptions can be made about the other
+ * children.
+ *
+ * @param children the search objects we are rank'ing
+ **/
+ RankSearchStrict(const Children & children) : RankSearch(children) { }
+};
+
+SearchIterator::UP
+RankSearchStrict::andWith(UP filter, uint32_t estimate)
+{
+ return getChildren()[0]->andWith(std::move(filter), estimate);
+}
+
+void
+RankSearchStrict::doSeek(uint32_t docid)
+{
+ SearchIterator & firstChild(**getChildren().begin());
+ setDocId(firstChild.seek(docid) ? docid : firstChild.getDocId());
+}
+} // namespace
+
+SearchIterator *
+RankSearch::create(const RankSearch::Children &children, bool strict) {
+ if (strict) {
+ return new RankSearchStrict(children);
+ } else {
+ return new RankSearch(children);
+ }
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/ranksearch.h b/searchlib/src/vespa/searchlib/queryeval/ranksearch.h
new file mode 100644
index 00000000000..7dfd215d724
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/ranksearch.h
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "multisearch.h"
+
+namespace search {
+namespace queryeval {
+
+/**
+ * A simple implementation of the Rank search operation.
+ **/
+class RankSearch : public MultiSearch
+{
+protected:
+ void doSeek(uint32_t docid) override;
+
+ /**
+ * Create a new Rank Search with the given children. A non-strict Rank has
+ * no strictness assumptions about its children.
+ *
+ * @param children the search objects we are rank'ing
+ **/
+ RankSearch(const Children & children) : MultiSearch(children) { }
+
+public:
+ // Caller takes ownership of the returned SearchIterator.
+ static SearchIterator *create(const Children &children, bool strict);
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/scores.h b/searchlib/src/vespa/searchlib/queryeval/scores.h
new file mode 100644
index 00000000000..776e8acb4bd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/scores.h
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/common/feature.h>
+
+namespace search {
+namespace queryeval {
+
+struct Scores {
+ feature_t low;
+ feature_t high;
+ Scores() : low(1), high(0) {}
+ Scores(feature_t l, feature_t h) : low(l), high(h) {}
+
+ bool isValid() const { return low <= high; }
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/searchable.cpp b/searchlib/src/vespa/searchlib/queryeval/searchable.cpp
new file mode 100644
index 00000000000..be1487eb7fd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/searchable.cpp
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchable");
+#include "searchable.h"
+#include "leaf_blueprints.h"
+#include "intermediate_blueprints.h"
+
+namespace search {
+namespace queryeval {
+
+Blueprint::UP
+Searchable::createBlueprint(const IRequestContext & requestContext,
+ const FieldSpecList &fields,
+ const search::query::Node &term)
+{
+ if (fields.empty()) {
+ return Blueprint::UP(new EmptyBlueprint());
+ }
+ if (fields.size() == 1) {
+ return createBlueprint(requestContext, fields[0], term);
+ }
+ OrBlueprint *b = new OrBlueprint();
+ Blueprint::UP result(b);
+ for (size_t i = 0; i < fields.size(); ++i) {
+ b->addChild(createBlueprint(requestContext, fields[i], term));
+ }
+ return result;
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/searchable.h b/searchlib/src/vespa/searchlib/queryeval/searchable.h
new file mode 100644
index 00000000000..b386793de8e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/searchable.h
@@ -0,0 +1,60 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+#include "field_spec.h"
+#include "blueprint.h"
+#include <vespa/searchlib/queryeval/irequestcontext.h>
+
+namespace search {
+
+namespace query { class Node; }
+
+namespace queryeval {
+
+/**
+ * Abstract class extended by components to expose content that can be
+ * searched by a query term. A Searchable component supports searching
+ * in one or more named fields. The Blueprint created by a Searchable
+ * is an intermediate query representation that is later used to
+ * create the actual search iterators used to produce matches.
+ **/
+class Searchable
+{
+protected:
+ /**
+ * Create a blueprint searching a single field.
+ *
+ * @return blueprint
+ * @param requestContext that belongs to the query
+ * @param field the field to search
+ * @param term the query tree term
+ **/
+ virtual Blueprint::UP createBlueprint(const IRequestContext & requestContext,
+ const FieldSpec &field,
+ const search::query::Node &term) = 0;
+
+public:
+ typedef std::shared_ptr<Searchable> SP;
+
+ Searchable() {}
+
+ /**
+ * Create a blueprint searching a set of fields. The default
+ * implementation of this function will create blueprints for
+ * individual fields and combine them with an OR blueprint.
+ *
+ * @return blueprint
+ * @param requestContext that belongs to the query
+ * @param fields the set of fields to search
+ * @param term the query tree term
+ **/
+ virtual Blueprint::UP createBlueprint(const IRequestContext & requestContext,
+ const FieldSpecList &fields,
+ const search::query::Node &term);
+ virtual ~Searchable() {}
+};
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp b/searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp
new file mode 100644
index 00000000000..2e439acbf14
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp
@@ -0,0 +1,129 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchbase");
+#include "searchiterator.h"
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/vespalib/objects/objectdumper.h>
+#include <vespa/vespalib/objects/objectvisitor.h>
+#include <vespa/vespalib/objects/visit.h>
+#include <typeinfo>
+
+// NB: might need to hide this from non-gcc compilers...
+#include <cxxabi.h>
+
+namespace search {
+namespace queryeval {
+
+SearchIterator::SearchIterator() :
+ _docid(0),
+ _endid(0)
+{
+}
+
+void
+SearchIterator::resetRange()
+{
+ _docid = 0;
+ _endid = 0;
+}
+
+SearchIterator::~SearchIterator()
+{
+}
+
+void
+SearchIterator::initRange(uint32_t beginid, uint32_t endid)
+{
+ _docid = beginid - 1;
+ _endid = endid;
+}
+
+BitVector::UP
+SearchIterator::get_hits(uint32_t begin_id)
+{
+ BitVector::UP result(BitVector::create(getEndId()));
+ uint32_t docid = std::max(begin_id, getDocId());
+ while (!isAtEnd(docid)) {
+ if (seek(docid)) {
+ result->setBit(docid);
+ }
+ docid = std::max(docid + 1, getDocId());
+ }
+ return result;
+}
+
+SearchIterator::UP
+SearchIterator::andWith(UP filter, uint32_t estimate)
+{
+ (void) estimate;
+ return filter;
+}
+
+void
+SearchIterator::or_hits_into(BitVector &result, uint32_t begin_id)
+{
+ BitVector::UP tmp = get_hits(begin_id);
+ const BitVector &rhs = *tmp;
+ result.orWith(rhs);
+}
+
+void
+SearchIterator::and_hits_into(BitVector &result, uint32_t begin_id)
+{
+ BitVector::UP tmp = get_hits(begin_id);
+ const BitVector &rhs = *tmp;
+ result.andWith(rhs);
+}
+
+vespalib::string
+SearchIterator::asString() const
+{
+ vespalib::ObjectDumper dumper;
+ visit(dumper, "", this);
+ return dumper.toString();
+}
+
+vespalib::string
+SearchIterator::getClassName() const
+{
+ vespalib::string name(typeid(*this).name());
+ int status = 0;
+ size_t size = 0;
+ // NB: might need to hide this from non-gcc compilers...
+ char *unmangled = abi::__cxa_demangle(name.c_str(), 0, &size, &status);
+ vespalib::string result(unmangled);
+ free(unmangled);
+ return result;
+}
+
+void
+SearchIterator::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "docid", _docid);
+ visit(visitor, "endid", _endid);
+}
+
+} // namespace queryeval
+} // namespace search
+
+//-----------------------------------------------------------------------------
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::SearchIterator *obj)
+{
+ if (obj != 0) {
+ self.openStruct(name, obj->getClassName());
+ obj->visitMembers(self);
+ self.closeStruct();
+ } else {
+ self.visitNull(name);
+ }
+}
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::SearchIterator &obj)
+{
+ visit(self, name, &obj);
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/searchiterator.h b/searchlib/src/vespa/searchlib/queryeval/searchiterator.h
new file mode 100644
index 00000000000..0aae661a6df
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/searchiterator.h
@@ -0,0 +1,345 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <memory>
+#include <stdint.h>
+#include "posting_info.h"
+#include "begin_and_end_id.h"
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/util/trinary.h>
+#include <vespa/searchlib/common/bitvector.h>
+
+namespace vespalib { class ObjectVisitor; };
+
+namespace search {
+
+namespace queryeval {
+
+/**
+ * This is the abstract superclass of all search objects. Each search
+ * object act as an iterator over documents that are results for the
+ * subquery represented by that search object. Search objects will be
+ * combined into a tree structure to perform query evaluation in
+ * parallel. The unpack method is used to unpack match information for
+ * a document. The placement and format of this match data is a
+ * contract between the application and the leaf search objects and is
+ * of no concern to the interface defined by this class.
+ **/
+class SearchIterator
+{
+private:
+ SearchIterator(const SearchIterator &);
+ SearchIterator &operator=(const SearchIterator &);
+
+ /**
+ * The current document id for this search object. This variable
+ * will have a value that is either @ref beginId, @ref endId or a
+ * document id representing a hit for this search object.
+ **/
+ uint32_t _docid;
+
+ /**
+ * This is the end of the the lidspace this iterator shall consider.
+ */
+ uint32_t _endid;
+
+protected:
+ /**
+ * This method is used by the @ref doSeek method to indicate that
+ * a document is a hit. This method is also used to indicate that
+ * no more hits are available by using the @ref endId value.
+ *
+ * @param id docid for hit
+ **/
+ void setDocId(uint32_t id) { _docid = id; }
+
+ /**
+ * Will terminate the iterator by setting it past the end.
+ * Further calls to isAtEnd() will then return true.
+ */
+ void setAtEnd() { _docid = search::endDocId; }
+
+public:
+ using Trinary=vespalib::Trinary;
+ // doSeek and doUnpack are called by templated classes, so making
+ // them public to avoid complicated friend requests. Note that if
+ // you call doSeek and doUnpack directly instead of using
+ // seek/unpack, you are bypassing docid checks and need to know
+ // what you are doing.
+
+ /**
+ * This method must be overridden to perform the actual seeking
+ * for the concrete search class. The task of this method is to
+ * check whether the given document id is a hit for this search
+ * object. The current document id is changed with the @ref
+ * setDocId method. When this method returns, the current document
+ * id must have been updated as follows: if the candidate document
+ * id was in fact a hit, this is now the new current document
+ * id. If the candidate document id was not a hit, the method may
+ * choose to either leave the current document id as is, or
+ * increase it to indicate the next hit for this search object
+ * (@ref endId being a valid value).
+ *
+ * @param docid hit candidate
+ **/
+ virtual void doSeek(uint32_t docid) = 0;
+
+ /**
+ * This method must be overridden to perform the actual unpacking
+ * for the concrete search class. The task of this method is to
+ * unpack match information for the given docid. This method can
+ * assume that the given document is also the current position of
+ * the iterator. This is checked by the @ref unpack method which
+ * invokes this method.
+ *
+ * @param docid what docid to unpack match information for.
+ **/
+ virtual void doUnpack(uint32_t docid) = 0;
+
+ /**
+ * This sets the range the iterator shall work.
+ * As soon as it reaches its limit it can stop.
+ * Iterators can overload this one and do what it needs to do.
+ * It must also rewind if instructed to do so.
+ *
+ * @param beginId This is the first valid docId and the lowest that will be given to doSeek.
+ * @param endId This is the first docid after the valid range.
+ */
+ virtual void initRange(uint32_t beginId, uint32_t endId);
+ /**
+ * Will initialize the full range.
+ **/
+ void initFullRange() { initRange(1, search::endDocId); }
+
+ /**
+ * Temporary to explicitt rewind iterator.
+ */
+ virtual void resetRange();
+
+ /**
+ * Find all hits in the currently searched range (specified by
+ * initRange) and return them as a bitvector. This function will
+ * perform term-at-a-time evaluation and should only be used for
+ * terms not needed for ranking. Calling this function will
+ * exhaust this iterator and no more results will be available in
+ * the currently searched range after this function returns.
+ *
+ * @return bitvector with hits for this iterator
+ * @param begin_id the lowest document id that may be a hit
+ * (we do not remember beginId from initRange)
+ **/
+ virtual BitVector::UP get_hits(uint32_t begin_id);
+
+ /**
+ * Find all hits in the currently searched range (specified by
+ * initRange) and OR them into the given temporary result. This
+ * function will perform term-at-a-time evaluation and should only
+ * be used for terms not needed for ranking. Calling this function
+ * will exhaust this iterator and no more results will be
+ * available in the currently searched range after this function
+ * returns.
+ *
+ * @param result result to be augmented by adding hits from this
+ * iterator.
+ * @param begin_id the lowest document id that may be a hit
+ * (we might not remember beginId from initRange)
+ **/
+ virtual void or_hits_into(BitVector &result, uint32_t begin_id);
+
+ /**
+ * Find all hits in the currently searched range (specified by
+ * initRange) and OR them into the given temporary result. This
+ * function will perform term-at-a-time evaluation and should only
+ * be used for terms not needed for ranking. Calling this function
+ * will exhaust this iterator and no more results will be
+ * available in the currently searched range after this function
+ * returns.
+ *
+ * @param result result to be augmented by adding hits from this
+ * iterator.
+ * @param begin_id the lowest document id that may be a hit
+ * (we might not remember beginId from initRange)
+ **/
+ virtual void and_hits_into(BitVector &result, uint32_t begin_id);
+
+public:
+ typedef std::unique_ptr<SearchIterator> UP;
+
+ /**
+ * The constructor sets the current document id to @ref beginId.
+ **/
+ SearchIterator();
+
+
+ /**
+ * Special value indicating that this searcher has not yet started
+ * seeking through documents. This must match beginId() in
+ * search::fef::TermFieldMatchData class.
+ *
+ * @return constant
+ **/
+ static uint32_t beginId() { return beginDocId; }
+
+ /**
+ * Tell if the iterator has reached the end.
+ *
+ * @return true if the iterator has reached its end.
+ **/
+ bool isAtEnd() const { return isAtEnd(_docid); }
+ bool isAtEnd(uint32_t docid) const {
+ if (__builtin_expect(docid >= _endid, false)) {
+ assert (_endid != 0);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Obtain the current document id for this search object. The
+ * value is either @ref beginId, @ref endId or a document id
+ * representing a hit for this search object.
+ *
+ * @return current document id
+ **/
+ uint32_t getDocId() const { return _docid; }
+
+ uint32_t getEndId() const { return _endid; }
+
+ /**
+ * Check if the given document id is a hit. If it is a hit, the
+ * current document id of this search object is set to the given
+ * document id. If it is not a hit, the current document id is
+ * either unchanged, set to the next hit, or set to @ref endId.
+ *
+ * @return true if the given document id is a hit.
+ * @param docid hit candidate
+ **/
+ bool seek(uint32_t docid) {
+ if (__builtin_expect(docid > _docid, true)) {
+ doSeek(docid);
+ }
+ return (docid == _docid);
+ }
+
+ /**
+ * Seek to the next docid and return it. Start with the one given.
+ * With protection for going backWards.
+ * Note that this requires the iterator to be strict.
+ *
+ * @return the first matching docid
+ * @param docid hit candidate
+ **/
+ uint32_t seekFirst(uint32_t docid) {
+ if (__builtin_expect(docid > _docid, true)) {
+ doSeek(docid);
+ }
+ return _docid;
+ }
+
+ /**
+ * Seek to the next docid and return it. Start with the one given.
+ * Without protection for going backWards.
+ * Note that this requires the iterator to be strict.
+ *
+ * @return the first matching docid
+ * @param docid hit candidate
+ **/
+ uint32_t seekNext(uint32_t docid) {
+ doSeek(docid);
+ return _docid;
+ }
+
+ /**
+ * Unpack hit information for the given docid if available. This
+ * method may also change the current docid for this iterator.
+ *
+ * @param docid what docid to unpack match information for.
+ **/
+ void unpack(uint32_t docid) {
+ if (__builtin_expect(seek(docid), true)) {
+ doUnpack(docid);
+ }
+ }
+
+ /**
+ * Return global posting info associated with this search iterator.
+ *
+ * @return global posting info or NULL if no info is available.
+ **/
+ virtual const PostingInfo *getPostingInfo() const { return NULL; }
+
+ /**
+ * Create a human-readable representation of this object. This
+ * method will use object visitation internally to capture the
+ * full structure of this object.
+ *
+ * @return structured human-readable representation of this object
+ **/
+ vespalib::string asString() const;
+
+ /**
+ * Obtain the fully qualified name of the concrete class for this
+ * object. The default implementation will perform automatic name
+ * resolving. There is only a need to override this function if
+ * you want to impersonate another class.
+ *
+ * @return fully qualified class name
+ **/
+ virtual vespalib::string getClassName() const;
+
+ /**
+ * Visit each of the members of this object. This method should be
+ * overridden by subclasses and should present all appropriate
+ * internal structure of this object to the given visitor. Note
+ * that while each level of a class hierarchy may cooperate to
+ * visit all object members (invoking superclass method within
+ * method), this method, as implemented in the SearchIterator class
+ * should not be invoked, since its default implementation is
+ * there to signal about the method not being overridden.
+ *
+ * @param visitor the visitor of this object
+ **/
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+
+ /**
+ * Empty, just defined to make it virtual.
+ **/
+ virtual ~SearchIterator();
+
+ /**
+ * @return true if it is a bitvector
+ */
+ virtual bool isBitVector() const { return false; }
+ /**
+ * @return true if it is a source blender
+ */
+ virtual bool isSourceBlender() const { return false; }
+ /**
+ * @return true if it is a multi search
+ */
+ virtual bool isMultiSearch() const { return false; }
+
+ /**
+ * This is used for adding an extra filter. If it is accepted it will return an empty UP.
+ * If not you will get in in return. Currently it will only be accepted by a
+ * MultiBitVector<And> with a pure 'and' path down if it is an BitVector,
+ * or by a strict AND with a pure 'and' path. Be careful if you you plan to steal the filter.
+ *
+ * @param filter the searchiterator that is an extra filter.
+ * @param estimate is the number of hits this filter is expected to produce.
+ * @return the given filter or empty if it has been consumed.
+ **/
+ virtual UP andWith(UP filter, uint32_t estimate);
+
+ virtual Trinary is_strict() const { return Trinary::Undefined; }
+};
+
+} // namespace queryeval
+} // namespace search
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::SearchIterator &obj);
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::SearchIterator *obj);
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp
new file mode 100644
index 00000000000..8f06823ea37
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".simple_phrase_blueprint");
+
+#include "simple_phrase_blueprint.h"
+#include "simple_phrase_search.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/vespalib/objects/visit.h>
+#include <algorithm>
+#include <map>
+
+namespace search {
+namespace queryeval {
+
+SimplePhraseBlueprint::SimplePhraseBlueprint(const FieldSpec &field, const IRequestContext & requestContext)
+ : ComplexLeafBlueprint(field),
+ _doom(requestContext.getDoom()),
+ _field(field),
+ _estimate(),
+ _layout(),
+ _terms()
+{
+}
+
+SimplePhraseBlueprint::~SimplePhraseBlueprint()
+{
+ while (!_terms.empty()) {
+ delete _terms.back();
+ _terms.pop_back();
+ }
+}
+
+FieldSpec
+SimplePhraseBlueprint::getNextChildField(const FieldSpec &outer)
+{
+ return FieldSpec(outer.getName(), outer.getFieldId(), _layout.allocTermField(outer.getFieldId()), false);
+}
+
+void
+SimplePhraseBlueprint::addTerm(Blueprint::UP term)
+{
+ const State &childState = term->getState();
+ assert(childState.numFields() == 1);
+ const FieldSpecBase &childField = childState.field(0);
+ assert(childField.getFieldId() == _field.getFieldId());
+ (void) childField;
+
+ HitEstimate childEst = childState.estimate();
+ if (_terms.empty() || childEst < _estimate) {
+ _estimate = childEst;
+ }
+ setEstimate(_estimate);
+ _terms.push_back(term.get());
+ term.release();
+}
+
+SearchIterator::UP
+SimplePhraseBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda,
+ bool strict) const
+{
+ assert(tfmda.size() == 1);
+ fef::MatchData::UP md = _layout.createMatchData();
+ search::fef::TermFieldMatchDataArray childMatch;
+ SimplePhraseSearch::Children children(_terms.size());
+ std::multimap<uint32_t, uint32_t> order_map;
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ const State &childState = _terms[i]->getState();
+ assert(childState.numFields() == 1);
+ childMatch.add(childState.field(0).resolve(*md));
+ children[i] = _terms[i]->createSearch(*md, strict).release();
+ order_map.insert(std::make_pair(childState.estimate().estHits, i));
+ }
+ std::vector<uint32_t> eval_order;
+ for (std::multimap<uint32_t, uint32_t>::iterator
+ it = order_map.begin(); it != order_map.end(); ++it) {
+ eval_order.push_back(it->second);
+ }
+
+ SimplePhraseSearch * phrase = new SimplePhraseSearch(children, std::move(md), childMatch,
+ eval_order, *tfmda[0], strict);
+ phrase->setDoom(& _doom);
+ return SearchIterator::UP(phrase);
+}
+
+
+void
+SimplePhraseBlueprint::fetchPostings(bool strict)
+{
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ _terms[i]->fetchPostings(strict);
+ }
+}
+
+void
+SimplePhraseBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ LeafBlueprint::visitMembers(visitor);
+ visit(visitor, "terms", _terms);
+}
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.h
new file mode 100644
index 00000000000..fc1fde7e2c6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.h
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "searchable.h"
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/queryeval/irequestcontext.h>
+#include <memory>
+#include <vector>
+
+namespace search {
+namespace fef { class TermFieldMatchData; }
+
+namespace queryeval {
+
+class SimplePhraseBlueprint : public ComplexLeafBlueprint
+{
+private:
+ const vespalib::Doom _doom;
+ FieldSpec _field;
+ HitEstimate _estimate;
+ fef::MatchDataLayout _layout;
+ std::vector<Blueprint*> _terms;
+
+ SimplePhraseBlueprint(const SimplePhraseBlueprint &); // disabled
+ SimplePhraseBlueprint &operator=(const SimplePhraseBlueprint &); // disabled
+
+public:
+ SimplePhraseBlueprint(const FieldSpec &field, const IRequestContext & requestContext);
+ virtual ~SimplePhraseBlueprint();
+
+ // used by create visitor
+ FieldSpec getNextChildField(const FieldSpec &outer);
+
+ // used by create visitor
+ void addTerm(Blueprint::UP term);
+
+ virtual SearchIterator::UP
+ createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda,
+ bool strict) const;
+
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+
+ virtual void
+ fetchPostings(bool strict);
+};
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp
new file mode 100644
index 00000000000..9a624724933
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp
@@ -0,0 +1,201 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".simple_phrase_search");
+
+#include "simple_phrase_search.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/vespalib/objects/visit.h>
+#include <algorithm>
+#include <functional>
+
+using search::fef::TermFieldMatchData;
+using std::unique_ptr;
+using std::mem_fun_ref;
+using std::transform;
+using std::vector;
+using vespalib::ObjectVisitor;
+
+namespace search {
+namespace queryeval {
+
+namespace {
+// Helper class
+class PhraseMatcher {
+ const fef::TermFieldMatchDataArray &_tmds;
+ const vector<uint32_t> &_eval_order;
+ vector<TermFieldMatchData::PositionsIterator> &_iterators;
+ uint32_t _element_id;
+ uint32_t _position;
+
+ TermFieldMatchData::PositionsIterator &iterator(uint32_t word_index)
+ { return _iterators[word_index]; }
+
+ TermFieldMatchData::PositionsIterator end(uint32_t word_index)
+ { return _tmds[word_index]->end(); }
+
+ uint32_t elementId(uint32_t word_index)
+ { return iterator(word_index)->getElementId(); }
+
+ uint32_t position(uint32_t word_index)
+ { return iterator(word_index)->getPosition(); }
+
+ void iterateToElement(uint32_t word_index) {
+ while (iterator(word_index) != end(word_index) &&
+ elementId(word_index) < _element_id) {
+ ++iterator(word_index);
+ }
+ }
+
+ template <typename FwdIt>
+ bool match(FwdIt first, FwdIt last) {
+ if (first == last) {
+ return true;
+ }
+ uint32_t word_index = *first;
+
+ iterateToElement(word_index);
+ while (iterator(word_index) != end(word_index) &&
+ elementId(word_index) == _element_id) {
+ if (position(word_index) == _position + word_index) {
+ return match(++first, last);
+ } else if (position(word_index) > _position + word_index) {
+ return false;
+ }
+ ++iterator(word_index);
+ }
+ return false;
+ }
+
+ bool match() {
+ _element_id = elementId(_eval_order[0]);
+ if (position(_eval_order[0]) < _eval_order[0]) {
+ // this position too early in element to allow match of other phrase terms
+ return false;
+ }
+ _position = position(_eval_order[0]) - _eval_order[0];
+ return match(++_eval_order.begin(), _eval_order.end());
+ }
+
+public:
+ PhraseMatcher(const fef::TermFieldMatchDataArray &tmds,
+ const vector<uint32_t> &eval_order,
+ vector<TermFieldMatchData::PositionsIterator> &iterators)
+ : _tmds(tmds),
+ _eval_order(eval_order),
+ _iterators(iterators)
+ {
+ for (size_t i = 0; i < _tmds.size(); ++i) {
+ _iterators[i] = _tmds[i]->begin();
+ }
+ }
+
+ bool hasMatch() {
+ if (_tmds.size() == 1) {
+ return true;
+ }
+
+ while (iterator(_eval_order[0]) != end(_eval_order[0])) {
+ if (match()) {
+ return true;
+ }
+ ++iterator(_eval_order[0]);
+ }
+ return false;
+ }
+
+ void fillPositions(TermFieldMatchData &tmd) {
+ if (_tmds.size() == 1) {
+ for (TermFieldMatchData::PositionsIterator
+ it = _tmds[0]->begin(); it != _tmds[0]->end(); ++it) {
+ tmd.appendPosition(*it);
+ }
+ } else {
+ while (iterator(_eval_order[0]) != end(_eval_order[0])) {
+ if (match()) {
+ tmd.appendPosition(*iterator(0));
+ }
+ ++iterator(_eval_order[0]);
+ }
+ }
+ }
+};
+
+bool allTermsHaveMatch(const SimplePhraseSearch::Children &terms,
+ const vector<uint32_t> &eval_order, uint32_t doc_id) {
+ for (uint32_t i = 0; i < terms.size(); ++i) {
+ if (!terms[eval_order[i]]->seek(doc_id)) {
+ return false;
+ }
+ }
+ return true;
+}
+} // namespace
+
+void SimplePhraseSearch::phraseSeek(uint32_t doc_id) {
+ if (allTermsHaveMatch(getChildren(), _eval_order, doc_id)) {
+ if ((_doom != nullptr) && _doom->doom()) {
+ setAtEnd();
+ } else {
+ AndSearch::doUnpack(doc_id);
+ if (PhraseMatcher(_childMatch, _eval_order, _iterators).hasMatch()) {
+ setDocId(doc_id);
+ }
+ }
+ }
+}
+
+
+SimplePhraseSearch::SimplePhraseSearch(const Children &children,
+ fef::MatchData::UP md,
+ const fef::TermFieldMatchDataArray &childMatch,
+ vector<uint32_t> eval_order,
+ TermFieldMatchData &tmd, bool strict)
+ : AndSearch(children),
+ _md(std::move(md)),
+ _childMatch(childMatch),
+ _eval_order(eval_order),
+ _tmd(tmd),
+ _doom(nullptr),
+ _strict(strict),
+ _iterators(children.size())
+{
+ assert(!children.empty());
+ assert(children.size() == _childMatch.size());
+ assert(children.size() == _eval_order.size());
+}
+
+void SimplePhraseSearch::doSeek(uint32_t doc_id) {
+ phraseSeek(doc_id);
+ if (_strict) {
+ uint32_t next_candidate = doc_id;
+ while (getDocId() < doc_id || getDocId() == beginId()) {
+ getChildren()[0]->seek(next_candidate + 1);
+ next_candidate = getChildren()[0]->getDocId();
+ if (isAtEnd(next_candidate)) {
+ setAtEnd();
+ return;
+ }
+ // child must behave as strict.
+ assert(next_candidate > doc_id && next_candidate != beginId());
+
+ phraseSeek(next_candidate);
+ }
+ }
+}
+
+void SimplePhraseSearch::doUnpack(uint32_t doc_id) {
+ // All children has already been unpacked before this call is made.
+
+ _tmd.reset(doc_id);
+ PhraseMatcher(_childMatch, _eval_order, _iterators).fillPositions(_tmd);
+}
+
+void SimplePhraseSearch::visitMembers(ObjectVisitor &visitor) const {
+ AndSearch::visitMembers(visitor);
+ visit(visitor, "strict", _strict);
+}
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.h b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.h
new file mode 100644
index 00000000000..5d8e7d592fe
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.h
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "andsearch.h"
+#include <vespa/searchlib/queryeval/irequestcontext.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <memory>
+#include <vector>
+
+namespace search {
+
+namespace queryeval {
+
+/**
+ * Search iterator for a phrase, based on a set of child search iterators.
+ */
+class SimplePhraseSearch : public AndSearch
+{
+ fef::MatchData::UP _md;
+ fef::TermFieldMatchDataArray _childMatch;
+ std::vector<uint32_t> _eval_order;
+ fef::TermFieldMatchData &_tmd;
+ const vespalib::Doom *_doom;
+ bool _strict;
+
+ typedef fef::TermFieldMatchData::PositionsIterator It;
+ // Reuse this vector instead of allocating a new one when needed.
+ std::vector<It> _iterators;
+
+ void phraseSeek(uint32_t doc_id);
+
+public:
+ /**
+ * Takes ownership of the contents of children.
+ * If this iterator is strict, the first child also needs to be strict.
+ *
+ * @param children SearchIterator objects for each child.
+ * @param tmds TermFieldMatchData for the children.
+ * @param eval_order determines the order of evaluation for the
+ * terms. The term with fewest hits should be
+ * evaluated first.
+ **/
+ SimplePhraseSearch(const Children &children,
+ fef::MatchData::UP md,
+ const fef::TermFieldMatchDataArray &childMatch,
+ std::vector<uint32_t> eval_order,
+ fef::TermFieldMatchData &tmd, bool strict);
+
+ virtual void doSeek(uint32_t doc_id);
+ virtual void doUnpack(uint32_t doc_id);
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ SimplePhraseSearch & setDoom(const vespalib::Doom * doom) { _doom = doom; return *this; }
+};
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/simpleresult.cpp b/searchlib/src/vespa/searchlib/queryeval/simpleresult.cpp
new file mode 100644
index 00000000000..f114eef8b39
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/simpleresult.cpp
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".simpleresult");
+#include "simpleresult.h"
+
+namespace search {
+namespace queryeval {
+
+SimpleResult &
+SimpleResult::addHit(uint32_t docid)
+{
+ _hits.push_back(docid);
+ return *this;
+}
+
+void
+SimpleResult::clear()
+{
+ std::vector<uint32_t> tmp;
+ tmp.swap(_hits);
+}
+
+void
+SimpleResult::search(SearchIterator &sb)
+{
+ clear();
+ // assume strict toplevel search object located at start
+ sb.initFullRange();
+ for (sb.seek(1); !sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) {
+ sb.unpack(sb.getDocId());
+ _hits.push_back(sb.getDocId());
+ }
+}
+
+void
+SimpleResult::search(SearchIterator &sb, uint32_t docIdLimit)
+{
+ clear();
+ // assume non-strict toplevel search object
+ sb.initFullRange();
+ for (uint32_t docId = 1; docId < docIdLimit; ++docId) {
+ if (sb.seek(docId)) {
+ assert(docId == sb.getDocId());
+ sb.unpack(docId);
+ _hits.push_back(docId);
+ }
+ }
+}
+
+std::ostream &
+operator << (std::ostream &out, const SimpleResult &result)
+{
+ if (result.getHitCount() == 0) {
+ out << std::endl << "empty" << std::endl;
+ } else {
+ out << std::endl;
+ for (uint32_t i = 0; i < result.getHitCount(); ++i) {
+ out << "{" << result.getHit(i) << "}" << std::endl;
+ }
+ }
+ return out;
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/simpleresult.h b/searchlib/src/vespa/searchlib/queryeval/simpleresult.h
new file mode 100644
index 00000000000..18e01374ec5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/simpleresult.h
@@ -0,0 +1,87 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+#include "searchiterator.h"
+
+namespace search {
+namespace queryeval {
+
+/**
+ * Simple result class containing only document ids. This class will
+ * mostly be used for testing.
+ **/
+class SimpleResult
+{
+private:
+ std::vector<uint32_t> _hits;
+
+public:
+ /**
+ * Create an empty result
+ **/
+ SimpleResult() : _hits() {}
+
+ /**
+ * Obtain the number of hits
+ *
+ * @return number of hits
+ **/
+ uint32_t getHitCount() const { return _hits.size(); }
+
+ /**
+ * Get the docid of a specific hit
+ *
+ * @return docid for the i'th hit
+ * @param i which hit to obtain
+ **/
+ uint32_t getHit(uint32_t i) const { return _hits[i]; }
+
+ /**
+ * Add a hit. Hits must be added in sorted order (smallest docid
+ * first).
+ *
+ * @return this object for chaining
+ * @param docid hit to add
+ **/
+ SimpleResult &addHit(uint32_t docid);
+
+ /**
+ * remove all hits
+ **/
+ void clear();
+
+ /**
+ * Fill this result with all the hits returned by the given search
+ * object. Old hits will be removed from this result before doing
+ * the search. Assumes strict toplevel search object located at start
+ *
+ * @param sb search object
+ **/
+ void search(SearchIterator &sb);
+
+ /**
+ * Fill this result with all the hits returned by the given search
+ * object. Old hits will be removed from this result before doing
+ * the search. Assumes non-strict toplevel search object.
+ *
+ * @param sb search object
+ * @param docIdLimit the end of the docId range for this search iterator
+ **/
+ void search(SearchIterator &sb, uint32_t docIdLimit);
+
+ /**
+ * Test of we contain the same hits as rhs.
+ *
+ * @return true if the results are equal
+ * @param rhs other results
+ **/
+ bool operator==(const SimpleResult &rhs) const { return (_hits == rhs._hits); }
+};
+
+std::ostream &operator << (std::ostream &out, const SimpleResult &result);
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp b/searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp
new file mode 100644
index 00000000000..2d5b6b3e8b8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".simplesearch");
+#include "simplesearch.h"
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace queryeval {
+
+void
+SimpleSearch::doSeek(uint32_t docid)
+{
+ while (_index < _result.getHitCount() && _result.getHit(_index) < docid) {
+ ++_index;
+ }
+ if (_index == _result.getHitCount()) {
+ setAtEnd();
+ return;
+ }
+ setDocId(_result.getHit(_index));
+}
+
+void
+SimpleSearch::doUnpack(uint32_t docid)
+{
+ (void) docid;
+}
+
+SimpleSearch::SimpleSearch(const SimpleResult &result)
+ : _tag("<null>"),
+ _result(result),
+ _index(0)
+{
+}
+
+void
+SimpleSearch::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "tag", _tag);
+}
+
+SimpleSearch::~SimpleSearch()
+{
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/simplesearch.h b/searchlib/src/vespa/searchlib/queryeval/simplesearch.h
new file mode 100644
index 00000000000..523d4f9dedc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/simplesearch.h
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "searchiterator.h"
+#include "simpleresult.h"
+
+namespace search {
+namespace queryeval {
+
+/**
+ * Simple search class used to return a predefined set of
+ * results. This class will mostly be used for testing.
+ **/
+class SimpleSearch : public SearchIterator
+{
+private:
+ vespalib::string _tag;
+ SimpleResult _result;
+ uint32_t _index;
+
+ SimpleSearch(const SimpleSearch &);
+ SimpleSearch &operator=(const SimpleSearch &);
+
+protected:
+ virtual void doSeek(uint32_t docid);
+ virtual void doUnpack(uint32_t docid);
+
+public:
+ SimpleSearch(const SimpleResult &result);
+ SimpleSearch &tag(const vespalib::string &t) {
+ _tag = t;
+ return *this;
+ }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ virtual ~SimpleSearch();
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.cpp b/searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.cpp
new file mode 100644
index 00000000000..1482ec21fed
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.cpp
@@ -0,0 +1,187 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include "sourceblendersearch.h"
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace queryeval {
+
+EmptySearch SourceBlenderSearch::_emptySearch;
+
+class SourceBlenderSearchStrict : public SourceBlenderSearch
+{
+public:
+ SourceBlenderSearchStrict(ISourceSelector::Iterator::UP sourceSelector, const Children &children);
+private:
+ VESPA_DLL_LOCAL void advance() __attribute__((noinline));
+ vespalib::Array<SearchIterator *> _nextChildren;
+
+ void doSeek(uint32_t docid) override;
+ Trinary is_strict() const override { return Trinary::True; }
+};
+
+SourceBlenderSearchStrict::SourceBlenderSearchStrict(
+ ISourceSelector::Iterator::UP sourceSelector,
+ const Children &children)
+ : SourceBlenderSearch(std::move(sourceSelector), children),
+ _nextChildren()
+{
+ _nextChildren.reserve(children.size());
+}
+
+void
+SourceBlenderSearch::doSeek(uint32_t docid)
+{
+ if (docid >= _docIdLimit) {
+ setDocId(endDocId);
+ return;
+ }
+ _matchedChild = getSearch(_sourceSelector->getSource(docid));
+ if (_matchedChild->seek(docid)) {
+ setDocId(docid);
+ }
+}
+
+void
+SourceBlenderSearchStrict::doSeek(uint32_t docid)
+{
+ if (docid >= _docIdLimit) {
+ setDocId(endDocId);
+ return;
+ }
+ _matchedChild = getSearch(_sourceSelector->getSource(docid));
+ if (_matchedChild->seek(docid)) {
+ setDocId(docid);
+ } else {
+ for (auto & child : _children) {
+ getSearch(child)->seek(docid);
+ }
+ advance();
+ }
+}
+
+void
+SourceBlenderSearchStrict::advance()
+{
+ for (;;) {
+ SearchIterator * search = getSearch(_children[0]);
+ uint32_t minNextId = search->getDocId();
+ _nextChildren.clear();
+ _nextChildren.push_back_fast(search);
+ for (uint32_t i = 1; i < _children.size(); ++i) {
+ search = getSearch(_children[i]);
+ uint32_t nextId = search->getDocId();
+ if (nextId < minNextId) {
+ minNextId = nextId;
+ _nextChildren.clear();
+ _nextChildren.push_back_fast(search);
+ } else if (nextId == minNextId) {
+ _nextChildren.push_back_fast(search);
+ }
+ }
+ if (isAtEnd(minNextId)) {
+ setAtEnd();
+ return;
+ }
+ if (minNextId >= _docIdLimit) {
+ setAtEnd();
+ return;
+ }
+ search = getSearch(_sourceSelector->getSource(minNextId));
+ for (uint32_t i = 0; i < _nextChildren.size(); ++i) {
+ if (_nextChildren[i] == search) {
+ _matchedChild = search;
+ setDocId(minNextId);
+ return;
+ }
+ _nextChildren[i]->seek(minNextId + 1);
+ }
+ }
+}
+
+void
+SourceBlenderSearch::doUnpack(uint32_t docid)
+{
+ _matchedChild->doUnpack(docid);
+}
+
+SourceBlenderSearch::SourceBlenderSearch(
+ ISourceSelector::Iterator::UP sourceSelector,
+ const Children &children) :
+ _matchedChild(NULL),
+ _sourceSelector(std::move(sourceSelector)),
+ _children(),
+ _docIdLimit(_sourceSelector->getDocIdLimit())
+{
+ for (size_t i(0); i < sizeof(_sources)/sizeof(_sources[0]); i++) {
+ _sources[i] = &_emptySearch;
+ }
+ for (auto & child : children) {
+ Source sid(child.sourceId);
+ _children.push_back(sid);
+ _sources[sid] = child.search;
+ }
+}
+
+void
+SourceBlenderSearch::initRange(uint32_t beginid, uint32_t endid)
+{
+ SearchIterator::initRange(beginid, endid);
+ for (auto & child : _children) {
+ getSearch(child)->initRange(beginid, endid);
+ }
+}
+
+void
+SourceBlenderSearch::resetRange()
+{
+ SearchIterator::resetRange();
+ for (auto & child : _children) {
+ getSearch(child)->resetRange();
+ }
+}
+
+void
+SourceBlenderSearch::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "children", _children);
+ for (const auto & child : _children) {
+ vespalib::asciistream os;
+ os << "Source " << child;
+ visit(visitor, os.str(), *getSearch(child));
+ }
+}
+
+SourceBlenderSearch::~SourceBlenderSearch()
+{
+ for (auto & child : _children) {
+ delete getSearch(child);
+ }
+}
+
+SourceBlenderSearch * SourceBlenderSearch::create(
+ ISourceSelector::Iterator::UP sourceSelector,
+ const Children &children,
+ bool strict)
+{
+ if (strict) {
+ return new SourceBlenderSearchStrict(std::move(sourceSelector), children);
+ } else {
+ return new SourceBlenderSearch(std::move(sourceSelector), children);
+ }
+}
+
+
+} // namespace queryeval
+} // namespace search
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::SourceBlenderSearch::Child &obj)
+{
+ self.openStruct(name, "search::queryeval::SourceBlenderSearch::Child");
+ visit(self, "search", obj.search);
+ visit(self, "sourceId", obj.sourceId);
+ self.closeStruct();
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.h b/searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.h
new file mode 100644
index 00000000000..81688184fa5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.h
@@ -0,0 +1,92 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/array.h>
+#include "searchiterator.h"
+#include "emptysearch.h"
+#include "isourceselector.h"
+
+namespace search {
+namespace queryeval {
+
+/**
+ * A simple implementation of the source blender operation. This class
+ * is used to blend results from multiple sources. Each source is
+ * represented with a separate search iterator. A source selector
+ * iterator is used to select the appropriate source for each
+ * document. The source blender will make sure to only propagate
+ * unpack requests to one of the sources below, enabling them to use
+ * the same target location for detailed match data unpacking.
+ **/
+class SourceBlenderSearch : public SearchIterator
+{
+public:
+ /**
+ * Small wrapper used to specify the underlying searches to be
+ * blended.
+ **/
+ struct Child {
+ SearchIterator *search;
+ uint32_t sourceId;
+ Child() : search(NULL), sourceId(0) { }
+ Child(SearchIterator *s, uint32_t id) : search(s), sourceId(id) {}
+ };
+ typedef std::vector<Child> Children;
+
+private:
+ SourceBlenderSearch(const SourceBlenderSearch &);
+ SourceBlenderSearch &operator=(const SourceBlenderSearch &);
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ virtual bool isSourceBlender() const { return true; }
+ static EmptySearch _emptySearch;
+protected:
+ typedef std::vector<Source> SourceIndex;
+ SearchIterator * _matchedChild;
+ ISourceSelector::Iterator::UP _sourceSelector;
+ SourceIndex _children;
+ uint32_t _docIdLimit;
+ SearchIterator * _sources[256];
+
+ void doSeek(uint32_t docid) override;
+ void doUnpack(uint32_t docid) override;
+ Trinary is_strict() const override { return Trinary::False; }
+ SourceBlenderSearch(ISourceSelector::Iterator::UP sourceSelector, const Children &children);
+ SearchIterator * getSearch(Source source) const { return _sources[source]; }
+public:
+ /**
+ * Create a new SourceBlender Search with the given children and
+ * strictness. A strict blender can assume that all children below
+ * are also strict. A non-strict blender has no strictness
+ * assumptions about its children.
+ *
+ * @param sourceSelector This is an iterator that provide you with the
+ * the correct source to use.
+ * @param children the search objects we are blending
+ * this object takes ownership of the children.
+ * @param strict whether this search is strict
+ * (a strict search will locate its next hit when seeking fails)
+ **/
+ static SourceBlenderSearch * create(ISourceSelector::Iterator::UP sourceSelector,
+ const Children &children, bool strict);
+ virtual ~SourceBlenderSearch();
+ size_t getNumChildren() const { return _children.size(); }
+ SearchIterator::UP steal(size_t index) {
+ SearchIterator::UP retval(_sources[_children[index]]);
+ _sources[_children[index]] = NULL;
+ return retval;
+ }
+ void setChild(size_t index, SearchIterator::UP child) {
+ assert(_sources[_children[index]] == NULL);
+ _sources[_children[index]] = child.release();
+ }
+ void initRange(uint32_t beginId, uint32_t endId) override;
+ void resetRange() override;
+};
+
+} // namespace queryeval
+} // namespace search
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::SourceBlenderSearch::Child &obj);
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/split_float.cpp b/searchlib/src/vespa/searchlib/queryeval/split_float.cpp
new file mode 100644
index 00000000000..dd6a370ceda
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/split_float.cpp
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// $Id$
+
+#include <vespa/fastos/fastos.h>
+#include "split_float.h"
+#include <ctype.h>
+
+namespace search {
+namespace queryeval {
+
+SplitFloat::SplitFloat(const vespalib::string &input)
+{
+ bool seenText = false;
+ for (size_t i = 0; i < input.size(); ++i) {
+ unsigned char c = input[i];
+ if (isalnum(c)) {
+ if (!seenText) {
+ _parts.push_back(vespalib::string());
+ }
+ _parts.back().push_back(c);
+ seenText = true;
+ } else {
+ seenText = false;
+ }
+ }
+}
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/split_float.h b/searchlib/src/vespa/searchlib/queryeval/split_float.h
new file mode 100644
index 00000000000..b8fcf8f5ab3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/split_float.h
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// $Id$
+
+#pragma once
+
+#include <vector>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace queryeval {
+
+class SplitFloat
+{
+private:
+ std::vector<vespalib::string> _parts;
+public:
+ SplitFloat(const vespalib::string &input);
+ size_t parts() const { return _parts.size(); }
+ const vespalib::string &getPart(size_t i) const { return _parts[i]; }
+};
+
+} // namespace search::queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp b/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp
new file mode 100644
index 00000000000..79b04b90e53
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp
@@ -0,0 +1,120 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+
+#include "termasstring.h"
+#include <vespa/searchlib/query/tree/node.h>
+#include <vespa/searchlib/query/tree/queryvisitor.h>
+#include <vespa/searchlib/query/tree/termnodes.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <typeinfo>
+
+LOG_SETUP(".termasstring");
+
+using search::query::And;
+using search::query::AndNot;
+using search::query::Equiv;
+using search::query::NumberTerm;
+using search::query::LocationTerm;
+using search::query::Near;
+using search::query::Node;
+using search::query::ONear;
+using search::query::Or;
+using search::query::Phrase;
+using search::query::PredicateQuery;
+using search::query::PrefixTerm;
+using search::query::QueryVisitor;
+using search::query::RangeTerm;
+using search::query::Rank;
+using search::query::RegExpTerm;
+using search::query::StringTerm;
+using search::query::SubstringTerm;
+using search::query::SuffixTerm;
+using search::query::WeakAnd;
+using search::query::WeightedSetTerm;
+using search::query::DotProduct;
+using search::query::WandTerm;
+using vespalib::string;
+
+namespace search {
+namespace queryeval {
+
+vespalib::string termAsString(double float_term) {
+ vespalib::asciistream os;
+ return (os << float_term).str();
+}
+
+vespalib::string termAsString(int64_t int_term) {
+ vespalib::asciistream os;
+ return (os << int_term).str();
+}
+
+vespalib::string termAsString(const search::query::Range &term) {
+ vespalib::asciistream os;
+ return (os << term).str();
+}
+
+vespalib::string termAsString(const search::query::Location &term) {
+ vespalib::asciistream os;
+ return (os << term).str();
+}
+
+namespace {
+struct TermAsStringVisitor : public QueryVisitor {
+ string term;
+ bool isSet;
+
+ TermAsStringVisitor() : term(), isSet(false) {}
+
+ template <class TermNode>
+ void visitTerm(TermNode &n) {
+ term = termAsString(n.getTerm());
+ isSet = true;
+ }
+
+ void illegalVisit() {
+ term.clear();
+ isSet = false;
+ }
+
+ virtual void visit(And &) { illegalVisit(); }
+ virtual void visit(AndNot &) { illegalVisit(); }
+ virtual void visit(Equiv &) { illegalVisit(); }
+ virtual void visit(Near &) { illegalVisit(); }
+ virtual void visit(ONear &) { illegalVisit(); }
+ virtual void visit(Or &) { illegalVisit(); }
+ virtual void visit(Phrase &) { illegalVisit(); }
+ virtual void visit(Rank &) { illegalVisit(); }
+ virtual void visit(WeakAnd &) { illegalVisit(); }
+ virtual void visit(WeightedSetTerm &) { illegalVisit(); }
+ virtual void visit(DotProduct &) { illegalVisit(); }
+ virtual void visit(WandTerm &) { illegalVisit(); }
+
+ virtual void visit(NumberTerm &n) { visitTerm(n); }
+ virtual void visit(LocationTerm &n) { visitTerm(n); }
+ virtual void visit(PrefixTerm &n) { visitTerm(n); }
+ virtual void visit(RangeTerm &n) { visitTerm(n); }
+ virtual void visit(StringTerm &n) { visitTerm(n); }
+ virtual void visit(SubstringTerm &n) { visitTerm(n); }
+ virtual void visit(SuffixTerm &n) { visitTerm(n); }
+ virtual void visit(RegExpTerm &n) { visitTerm(n); }
+
+ virtual void visit(PredicateQuery &) { illegalVisit(); }
+};
+} // namespace
+
+string termAsString(const Node &term_node) {
+ TermAsStringVisitor visitor;
+ const_cast<Node &>(term_node).accept(visitor);
+ if (!visitor.isSet) {
+ vespalib::string err(vespalib::make_string("Trying to convert a non-term node ('%s') to a term string.", typeid(term_node).name()));
+ LOG(warning, "%s", err.c_str());
+ throw vespalib::IllegalArgumentException(err, VESPA_STRLOC);
+ }
+ return visitor.term;
+}
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/termasstring.h b/searchlib/src/vespa/searchlib/queryeval/termasstring.h
new file mode 100644
index 00000000000..5428dfa0ce7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/termasstring.h
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/query/tree/location.h>
+#include <vespa/searchlib/query/tree/range.h>
+#include <string>
+
+namespace search {
+namespace query { class Node; }
+
+namespace queryeval {
+
+inline const vespalib::string &termAsString(const vespalib::string &term) {
+ return term;
+}
+
+vespalib::string termAsString(double float_term);
+
+vespalib::string termAsString(int64_t int_term);
+
+vespalib::string termAsString(const search::query::Range &term);
+
+vespalib::string termAsString(const search::query::Location &term);
+
+vespalib::string termAsString(const search::query::Node &term_node);
+
+} // namespace search::queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.cpp b/searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.cpp
new file mode 100644
index 00000000000..45d975e5e77
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.cpp
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "termwise_blueprint_helper.h"
+#include "termwise_search.h"
+
+namespace search {
+namespace queryeval {
+
+TermwiseBlueprintHelper::TermwiseBlueprintHelper(const IntermediateBlueprint &self,
+ const MultiSearch::Children &subSearches,
+ UnpackInfo &unpackInfo)
+ : children(),
+ termwise(),
+ first_termwise(subSearches.size()),
+ termwise_unpack()
+{
+ children.reserve(subSearches.size());
+ termwise.reserve(subSearches.size());
+ for (size_t i = 0; i < subSearches.size(); ++i) {
+ bool need_unpack = unpackInfo.needUnpack(i);
+ bool allow_termwise = self.getChild(i).getState().allow_termwise_eval();
+ if (need_unpack || !allow_termwise) {
+ if (need_unpack) {
+ size_t index = (i < first_termwise) ? children.size() : (children.size() + 1);
+ termwise_unpack.add(index);
+ }
+ children.push_back(subSearches[i]);
+ } else {
+ first_termwise = std::min(i, first_termwise);
+ termwise.push_back(subSearches[i]);
+ }
+ }
+}
+
+void
+TermwiseBlueprintHelper::insert_termwise(SearchIterator::UP search, bool strict)
+{
+ auto termwise_search = make_termwise(std::move(search), strict);
+ children.insert(children.begin() + first_termwise, termwise_search.release());
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.h b/searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.h
new file mode 100644
index 00000000000..c99fedd4921
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.h
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "multisearch.h"
+#include "blueprint.h"
+#include "unpackinfo.h"
+#include "searchiterator.h"
+
+namespace search {
+namespace queryeval {
+
+/**
+ * Utility used to keep track of which children can be evaluated
+ * termwise, which children we need to unpack and how to combine the
+ * termwise and non-termwise parts with each other.
+ **/
+struct TermwiseBlueprintHelper {
+ MultiSearch::Children children;
+ MultiSearch::Children termwise;
+ size_t first_termwise;
+ UnpackInfo termwise_unpack;
+
+ TermwiseBlueprintHelper(const IntermediateBlueprint &self,
+ const MultiSearch::Children &subSearches, UnpackInfo &unpackInfo);
+
+ void insert_termwise(SearchIterator::UP search, bool strict);
+};
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/termwise_search.cpp b/searchlib/src/vespa/searchlib/queryeval/termwise_search.cpp
new file mode 100644
index 00000000000..7d25a897026
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/termwise_search.cpp
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "termwise_search.h"
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace queryeval {
+
+template <bool IS_STRICT>
+struct TermwiseSearch : public SearchIterator {
+
+ SearchIterator::UP search;
+ BitVector::UP result;
+
+ TermwiseSearch(SearchIterator::UP search_in)
+ : search(std::move(search_in)), result() {}
+
+ Trinary is_strict() const override { return IS_STRICT ? Trinary::True : Trinary::False; }
+ void initRange(uint32_t beginid, uint32_t endid) override {
+ SearchIterator::initRange(beginid, endid);
+ search->initRange(beginid, endid);
+ result = search->get_hits(beginid);
+ }
+ void resetRange() override {
+ SearchIterator::resetRange();
+ search->resetRange();
+ result.reset();
+ }
+ void doSeek(uint32_t docid) override {
+ if (__builtin_expect(isAtEnd(docid), false)) {
+ setAtEnd();
+ } else if (IS_STRICT) {
+ uint32_t nextid = result->getNextTrueBit(docid);
+ if (__builtin_expect(isAtEnd(nextid), false)) {
+ setAtEnd();
+ } else {
+ setDocId(nextid);
+ }
+ } else if (result->testBit(docid)) {
+ setDocId(docid);
+ }
+ }
+ void doUnpack(uint32_t) override {}
+ void visitMembers(vespalib::ObjectVisitor &visitor) const {
+ visit(visitor, "search", *search);
+ visit(visitor, "strict", IS_STRICT);
+ }
+};
+
+SearchIterator::UP
+make_termwise(SearchIterator::UP search, bool strict)
+{
+ if (strict) {
+ return SearchIterator::UP(new TermwiseSearch<true>(std::move(search)));
+ } else {
+ return SearchIterator::UP(new TermwiseSearch<false>(std::move(search)));
+ }
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/termwise_search.h b/searchlib/src/vespa/searchlib/queryeval/termwise_search.h
new file mode 100644
index 00000000000..bf2013731d7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/termwise_search.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "searchiterator.h"
+
+namespace search {
+namespace queryeval {
+
+/**
+ * Creates a termwise wrapper for the given search. The wrapper will
+ * perform termwise evaluation of the underlying search when the
+ * initRange function is called. All hits for the active range are
+ * stored in a bitvector fragment in the wrapper. The wrapper will act
+ * as a normal iterator to be used for parallel query evaluation. Note
+ * that no match data will be available for the hits returned by the
+ * wrapper. Termwise evaluation should only ever be used for parts of
+ * the query not used for ranking.
+ *
+ * @return wrapper performing termwise evaluation of the original search
+ * @param search the search we want to perform termwise evaluation of
+ * @param strict whether the wrapper itself should be a strict iterator
+ **/
+SearchIterator::UP make_termwise(SearchIterator::UP search, bool strict);
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/test/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/test/CMakeLists.txt
new file mode 100644
index 00000000000..3b2bca7d35b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/test/CMakeLists.txt
@@ -0,0 +1,5 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_queryeval_test INTERFACE
+ SOURCES
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/queryeval/test/eagerchild.h b/searchlib/src/vespa/searchlib/queryeval/test/eagerchild.h
new file mode 100644
index 00000000000..d6404842b39
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/test/eagerchild.h
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/queryeval/searchiterator.h>
+
+namespace search {
+namespace queryeval {
+namespace test {
+
+/**
+ * Child iterator that has initial docid > 0.
+ **/
+struct EagerChild : public SearchIterator
+{
+ EagerChild(uint32_t initial) : SearchIterator() { setDocId(initial); }
+ virtual void doSeek(uint32_t) { setAtEnd(); }
+ virtual void doUnpack(uint32_t) {}
+};
+
+} // namespace test
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/test/leafspec.h b/searchlib/src/vespa/searchlib/queryeval/test/leafspec.h
new file mode 100644
index 00000000000..5fb03c4afa2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/test/leafspec.h
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "searchhistory.h"
+#include "trackedsearch.h"
+#include <vespa/searchlib/queryeval/fake_search.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <string>
+
+namespace search {
+namespace queryeval {
+namespace test {
+
+/**
+ * Defines the hits to be returned by a wand-like subsearch and creates a TrackedSearch.
+ **/
+struct LeafSpec
+{
+ std::string name;
+ int32_t weight;
+ int32_t maxWeight;
+ FakeResult result;
+ SearchIterator *search;
+ LeafSpec(const std::string &n, int32_t w = 100)
+ : name(n),
+ weight(w),
+ maxWeight(std::numeric_limits<int32_t>::min()),
+ result(),
+ search()
+ {}
+ LeafSpec &doc(uint32_t docid) {
+ result.doc(docid);
+ return *this;
+ }
+ LeafSpec &doc(uint32_t docid, int32_t w) {
+ result.doc(docid);
+ result.weight(w);
+ result.pos(0);
+ maxWeight = std::max(maxWeight, w);
+ return *this;
+ }
+ LeafSpec &itr(SearchIterator *si) {
+ search = si;
+ return *this;
+ }
+ SearchIterator *create(SearchHistory &hist, fef::TermFieldMatchData *tfmd) const {
+ if (search != NULL) {
+ return new TrackedSearch(name, hist, search);
+ } else if (tfmd != NULL) {
+ return new TrackedSearch(name, hist, result, *tfmd,
+ MinMaxPostingInfo(0, maxWeight));
+ }
+ return new TrackedSearch(name, hist, result,
+ MinMaxPostingInfo(0, maxWeight));
+ }
+};
+
+} // namespace test
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/test/searchhistory.h b/searchlib/src/vespa/searchlib/queryeval/test/searchhistory.h
new file mode 100644
index 00000000000..3e528e9a370
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/test/searchhistory.h
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <ostream>
+#include <string>
+#include <vector>
+
+namespace search {
+namespace queryeval {
+namespace test {
+
+/**
+ * Seek and unpack history for a search iterator.
+ **/
+struct SearchHistory {
+ struct Entry {
+ std::string target;
+ std::string op;
+ uint32_t docid;
+ Entry(const std::string &t, const std::string &o, uint32_t id)
+ : target(t), op(o), docid(id) {}
+ bool operator==(const Entry &rhs) const {
+ return ((target == rhs.target) &&
+ (op == rhs.op) &&
+ (docid == rhs.docid));
+ }
+ };
+ std::vector<Entry> _entries;
+ SearchHistory &seek(const std::string &target, uint32_t docid) {
+ _entries.push_back(Entry(target, "seek", docid));
+ return *this;
+ }
+ SearchHistory &step(const std::string &target, uint32_t docid) {
+ _entries.push_back(Entry(target, "setDocId", docid));
+ return *this;
+ }
+ SearchHistory &unpack(const std::string &target, uint32_t docid) {
+ _entries.push_back(Entry(target, "unpack", docid));
+ return *this;
+ }
+ bool operator==(const SearchHistory &rhs) const {
+ return (_entries == rhs._entries);
+ }
+};
+
+std::ostream &operator << (std::ostream &out, const SearchHistory &hist) {
+ out << "History:\n";
+ for (size_t i = 0; i < hist._entries.size(); ++i) {
+ const SearchHistory::Entry &entry = hist._entries[i];
+ out << " " << entry.target << "->" << entry.op << "(" << entry.docid << ")" << std::endl;
+ }
+ return out;
+}
+
+} // namespace test
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/test/trackedsearch.h b/searchlib/src/vespa/searchlib/queryeval/test/trackedsearch.h
new file mode 100644
index 00000000000..49e3fd6b84a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/test/trackedsearch.h
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "searchhistory.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/queryeval/fake_search.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <string>
+
+namespace search {
+namespace queryeval {
+namespace test {
+
+/**
+ * Proxy and wrapper for FakeSearch to track search history and
+ * keep match data.
+ **/
+class TrackedSearch : public SearchIterator
+{
+private:
+ std::string _name;
+ SearchHistory &_history;
+ fef::TermFieldMatchData _matchData;
+ SearchIterator::UP _search;
+ MinMaxPostingInfo::UP _minMaxPostingInfo;
+
+ static fef::TermFieldMatchDataArray makeArray(fef::TermFieldMatchData &match) {
+ fef::TermFieldMatchDataArray array;
+ array.add(&match);
+ return array;
+ }
+
+protected:
+ virtual void doSeek(uint32_t docid) {
+ _history.seek(_name, docid);
+ _search->seek(docid);
+ setDocId(_search->getDocId());
+ _history.step(_name, getDocId());
+ }
+ virtual void doUnpack(uint32_t docid) {
+ _history.unpack(_name, docid);
+ _search->unpack(docid);
+ }
+
+public:
+ // wraps a FakeSearch and owns its match data
+ TrackedSearch(const std::string &name, SearchHistory &hist,
+ const FakeResult &result, const MinMaxPostingInfo &minMaxPostingInfo)
+ : _name(name), _history(hist), _matchData(),
+ _search(new FakeSearch("<tag>", "<field>", "<term>", result, makeArray(_matchData))),
+ _minMaxPostingInfo(new MinMaxPostingInfo(minMaxPostingInfo))
+ { setDocId(_search->getDocId()); }
+ // wraps a FakeSearch with external match data
+ TrackedSearch(const std::string &name, SearchHistory &hist,
+ const FakeResult &result, fef::TermFieldMatchData &tfmd,
+ const MinMaxPostingInfo &minMaxPostingInfo)
+ : _name(name), _history(hist), _matchData(),
+ _search(new FakeSearch("<tag>", "<field>", "<term>", result, makeArray(tfmd))),
+ _minMaxPostingInfo(new MinMaxPostingInfo(minMaxPostingInfo))
+ { setDocId(_search->getDocId()); }
+ // wraps a generic search (typically wand)
+ TrackedSearch(const std::string &name, SearchHistory &hist, SearchIterator *search)
+ : _name(name), _history(hist), _matchData(), _search(search), _minMaxPostingInfo()
+ { setDocId(_search->getDocId()); }
+
+ virtual const PostingInfo *getPostingInfo() const {
+ return _minMaxPostingInfo.get();
+ }
+};
+
+} // namespace test
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/test/wandspec.h b/searchlib/src/vespa/searchlib/queryeval/test/wandspec.h
new file mode 100644
index 00000000000..a578d1d908d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/test/wandspec.h
@@ -0,0 +1,53 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "leafspec.h"
+#include "trackedsearch.h"
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/queryeval/fake_search.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/wand/wand_parts.h>
+#include <vector>
+
+namespace search {
+namespace queryeval {
+namespace test {
+
+/**
+ * Defines the overall behavior of a wand like search with tracked children.
+ * This struct also owns the search iterator history.
+ **/
+class WandSpec
+{
+private:
+ std::vector<LeafSpec> _leafs;
+ fef::MatchDataLayout _layout;
+ std::vector<fef::TermFieldHandle> _handles;
+ SearchHistory _history;
+
+public:
+ WandSpec() : _leafs(), _layout(), _handles(), _history() {}
+ WandSpec &leaf(const LeafSpec &l) {
+ _leafs.push_back(l);
+ _handles.push_back(_layout.allocTermField(0));
+ return *this;
+ }
+ wand::Terms getTerms(fef::MatchData *matchData = NULL) {
+ wand::Terms terms;
+ for (size_t i = 0; i < _leafs.size(); ++i) {
+ fef::TermFieldMatchData *tfmd = (matchData != NULL ? matchData->resolveTermField(_handles[i]) : NULL);
+ terms.push_back(wand::Term(_leafs[i].create(_history, tfmd),
+ _leafs[i].weight,
+ _leafs[i].result.inspect().size(),
+ tfmd));
+ }
+ return terms;
+ }
+ SearchHistory &getHistory() { return _history; }
+ fef::MatchData::UP createMatchData() const { return _layout.createMatchData(); }
+};
+
+} // namespace test
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/truesearch.cpp b/searchlib/src/vespa/searchlib/queryeval/truesearch.cpp
new file mode 100644
index 00000000000..0c808ce06f2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/truesearch.cpp
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "truesearch.h"
+
+namespace search {
+namespace queryeval {
+
+void
+TrueSearch::doSeek(uint32_t docid)
+{
+ setDocId(docid);
+}
+
+void
+TrueSearch::doUnpack(uint32_t docid)
+{
+ _tfmd.resetOnlyDocId(docid);
+}
+
+TrueSearch::TrueSearch(fef::TermFieldMatchData & tfmd) :
+ SearchIterator(),
+ _tfmd(tfmd)
+{
+ _tfmd.resetOnlyDocId(0);
+}
+
+TrueSearch::~TrueSearch()
+{
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/truesearch.h b/searchlib/src/vespa/searchlib/queryeval/truesearch.h
new file mode 100644
index 00000000000..3f7b36160f4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/truesearch.h
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include "searchiterator.h"
+
+namespace search {
+namespace queryeval {
+
+class TrueSearch : public SearchIterator
+{
+private:
+ fef::TermFieldMatchData & _tfmd;
+ Trinary is_strict() const override { return Trinary::True; }
+ void doSeek(uint32_t) override;
+ void doUnpack(uint32_t) override;
+
+public:
+ TrueSearch(fef::TermFieldMatchData & tfmd);
+ ~TrueSearch();
+};
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/unpackinfo.cpp b/searchlib/src/vespa/searchlib/queryeval/unpackinfo.cpp
new file mode 100644
index 00000000000..5a9faf42e8e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/unpackinfo.cpp
@@ -0,0 +1,104 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/queryeval/unpackinfo.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+
+namespace search {
+namespace queryeval {
+
+UnpackInfo::UnpackInfo()
+ : _size(0)
+{
+ memset(_unpack, 0, sizeof(_unpack));
+}
+
+UnpackInfo &
+UnpackInfo::add(size_t index)
+{
+ if ((index <= max_index) && (_size < max_size)) {
+ _unpack[_size++] = index;
+ std::sort(&_unpack[0], &_unpack[_size]);
+ } else {
+ forceAll();
+ }
+ return *this;
+}
+
+UnpackInfo &
+UnpackInfo::insert(size_t index, bool unpack)
+{
+ if (unpackAll()) {
+ return *this;
+ }
+ for (size_t rp = 0; rp < _size; ++rp) {
+ if (_unpack[rp] >= index) {
+ if (_unpack[rp] == max_index) {
+ forceAll();
+ return *this;
+ }
+ ++_unpack[rp];
+ }
+ }
+ if (unpack) {
+ add(index);
+ }
+ return *this;
+}
+
+UnpackInfo &
+UnpackInfo::remove(size_t index)
+{
+ if (unpackAll()) {
+ return *this;
+ }
+ size_t wp = 0;
+ bool found_index = false;
+ for (size_t rp = 0; rp < _size; ++rp) {
+ if (_unpack[rp] == index) {
+ found_index = true;
+ } else if (_unpack[rp] > index) {
+ _unpack[wp++] = (_unpack[rp] - 1);
+ } else {
+ _unpack[wp++] = _unpack[rp];
+ }
+ }
+ if (found_index) {
+ --_size;
+ }
+ assert(wp == _size);
+ return *this;
+}
+
+bool
+UnpackInfo::needUnpack(size_t index) const
+{
+ if (unpackAll()) {
+ return true;
+ }
+ for (size_t i = 0; i < _size; ++i) {
+ if (_unpack[i] == index) {
+ return true;
+ }
+ }
+ return false;
+}
+
+vespalib::string
+UnpackInfo::toString() const
+{
+ vespalib::asciistream os;
+ if (unpackAll()) {
+ os << "full-unpack";
+ } else if (empty()) {
+ os << "no-unpack";
+ } else {
+ os << size_t(_unpack[0]);
+ for (size_t i = 1; i < _size; ++i) {
+ os << " " << size_t(_unpack[i]);
+ }
+ }
+ return os.str();
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/unpackinfo.h b/searchlib/src/vespa/searchlib/queryeval/unpackinfo.h
new file mode 100644
index 00000000000..8c83ec355fa
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/unpackinfo.h
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string.h>
+#include <stdint.h>
+#include "multisearch.h"
+
+namespace search {
+namespace queryeval {
+
+class UnpackInfo
+{
+private:
+ static constexpr size_t max_size = 31;
+ static constexpr size_t max_index = 255;
+
+ uint8_t _size;
+ uint8_t _unpack[max_size];
+
+public:
+ UnpackInfo();
+
+ // add an index to unpack, will not renumber existing indexes
+ UnpackInfo &add(size_t index);
+
+ // insert an index that may need unpacking, will renumber existing indexes
+ UnpackInfo &insert(size_t index, bool unpack = true);
+
+ // remove an index and its unpack data, will renumber existing indexes
+ UnpackInfo &remove(size_t index);
+
+ UnpackInfo &forceAll() {
+ _size = (max_size + 1);
+ return *this;
+ }
+
+ bool unpackAll() const { return (_size > max_size); }
+ bool empty() const { return (_size == 0); }
+ bool needUnpack(size_t index) const;
+
+ template <typename F>
+ void each(F &&f, size_t n) const {
+ if (__builtin_expect(unpackAll(), false)) {
+ for (size_t i = 0; i < n; ++i) {
+ f(i);
+ }
+ } else {
+ for (size_t i = 0; i < _size; ++i) {
+ f(_unpack[i]);
+ }
+ }
+ }
+
+ vespalib::string toString() const;
+};
+
+struct NoUnpack {
+ void unpack(uint32_t docid, const MultiSearch & search) {
+ (void) docid;
+ (void) search;
+ }
+ void onRemove(size_t index) { (void) index; }
+ void onInsert(size_t index) { (void) index; }
+ bool needUnpack(size_t index) const { (void) index; return false; }
+};
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/wand/CMakeLists.txt
new file mode 100644
index 00000000000..a1dbdcfa2f1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_queryeval_wand OBJECT
+ SOURCES
+ parallel_weak_and_blueprint.cpp
+ parallel_weak_and_search.cpp
+ wand_parts.cpp
+ weak_and_heap.cpp
+ weak_and_search.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp
new file mode 100644
index 00000000000..b57694fc07d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp
@@ -0,0 +1,126 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".queryeval.parallel_weak_and_blueprint");
+
+#include "wand_parts.h"
+#include "parallel_weak_and_blueprint.h"
+#include "parallel_weak_and_search.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/vespalib/objects/visit.h>
+#include <algorithm>
+
+namespace search {
+namespace queryeval {
+
+ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(const FieldSpec &field,
+ uint32_t scoresToTrack,
+ score_t scoreThreshold,
+ double thresholdBoostFactor)
+ : ComplexLeafBlueprint(field),
+ _field(field),
+ _scores(scoresToTrack),
+ _scoreThreshold(scoreThreshold),
+ _thresholdBoostFactor(thresholdBoostFactor),
+ _scoresAdjustFrequency(DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY),
+ _estimate(),
+ _layout(),
+ _weights(),
+ _terms()
+{
+}
+
+ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(const FieldSpec &field,
+ uint32_t scoresToTrack,
+ score_t scoreThreshold,
+ double thresholdBoostFactor,
+ uint32_t scoresAdjustFrequency)
+ : ComplexLeafBlueprint(field),
+ _field(field),
+ _scores(scoresToTrack),
+ _scoreThreshold(scoreThreshold),
+ _thresholdBoostFactor(thresholdBoostFactor),
+ _scoresAdjustFrequency(scoresAdjustFrequency),
+ _estimate(),
+ _layout(),
+ _weights(),
+ _terms()
+{
+}
+
+ParallelWeakAndBlueprint::~ParallelWeakAndBlueprint()
+{
+ while (!_terms.empty()) {
+ delete _terms.back();
+ _terms.pop_back();
+ }
+}
+
+FieldSpec
+ParallelWeakAndBlueprint::getNextChildField(const FieldSpec &outer)
+{
+ return FieldSpec(outer.getName(), outer.getFieldId(), _layout.allocTermField(outer.getFieldId()), false);
+}
+
+void
+ParallelWeakAndBlueprint::addTerm(Blueprint::UP term, int32_t weight)
+{
+ HitEstimate childEst = term->getState().estimate();
+ if (!childEst.empty) {
+ if (_estimate.empty) {
+ _estimate = childEst;
+ } else {
+ _estimate.estHits += childEst.estHits;
+ }
+ setEstimate(_estimate);
+ }
+ _weights.push_back(weight);
+ _terms.push_back(term.get());
+ term.release();
+ set_tree_size(_terms.size() + 1);
+}
+
+SearchIterator::UP
+ParallelWeakAndBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda,
+ bool strict) const
+{
+ assert(tfmda.size() == 1);
+ fef::MatchData::UP childrenMatchData = _layout.createMatchData();
+ wand::Terms terms;
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ const State &childState = _terms[i]->getState();
+ assert(childState.numFields() == 1);
+ terms.push_back(wand::Term(_terms[i]->createSearch(*childrenMatchData, true).release(),
+ _weights[i],
+ childState.estimate().estHits,
+ childState.field(0).resolve(*childrenMatchData)));
+ }
+ return SearchIterator::UP
+ (ParallelWeakAndSearch::create(terms,
+ ParallelWeakAndSearch::MatchParams(_scores,
+ _scoreThreshold,
+ _thresholdBoostFactor,
+ _scoresAdjustFrequency).setDocIdLimit(get_docid_limit()),
+ ParallelWeakAndSearch::RankParams(*tfmda[0],
+ std::move(childrenMatchData)), strict));
+}
+
+void
+ParallelWeakAndBlueprint::fetchPostings(bool)
+{
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ _terms[i]->fetchPostings(true);
+ }
+}
+
+void
+ParallelWeakAndBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ LeafBlueprint::visitMembers(visitor);
+ visit(visitor, "_weights", _weights);
+ visit(visitor, "_terms", _terms);
+}
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h
new file mode 100644
index 00000000000..bae74c046cb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h
@@ -0,0 +1,75 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include "wand_parts.h"
+#include "weak_and_heap.h"
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <memory>
+#include <vector>
+
+namespace search {
+namespace queryeval {
+
+const uint32_t DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY = 4;
+
+/**
+ * Blueprint for the parallel weak and search operator.
+ */
+class ParallelWeakAndBlueprint : public ComplexLeafBlueprint
+{
+private:
+ typedef wand::score_t score_t;
+
+ const FieldSpec _field;
+ mutable SharedWeakAndPriorityQueue _scores;
+ const wand::score_t _scoreThreshold;
+ double _thresholdBoostFactor;
+ const uint32_t _scoresAdjustFrequency;
+ HitEstimate _estimate;
+ fef::MatchDataLayout _layout;
+ std::vector<int32_t> _weights;
+ std::vector<Blueprint*> _terms;
+
+ ParallelWeakAndBlueprint(const ParallelWeakAndBlueprint &);
+ ParallelWeakAndBlueprint &operator=(const ParallelWeakAndBlueprint &);
+
+public:
+ ParallelWeakAndBlueprint(const FieldSpec &field,
+ uint32_t scoresToTrack,
+ score_t scoreThreshold,
+ double thresholdBoostFactor);
+ ParallelWeakAndBlueprint(const FieldSpec &field,
+ uint32_t scoresToTrack,
+ score_t scoreThreshold,
+ double thresholdBoostFactor,
+ uint32_t scoresAdjustFrequency);
+ virtual ~ParallelWeakAndBlueprint();
+
+ const WeakAndHeap &getScores() const { return _scores; }
+
+ score_t getScoreThreshold() const { return _scoreThreshold; }
+
+ double getThresholdBoostFactor() const { return _thresholdBoostFactor; }
+
+ // Used by create visitor
+ FieldSpec getNextChildField(const FieldSpec &outer);
+
+ // Used by create visitor
+ void addTerm(Blueprint::UP term, int32_t weight);
+
+ // Override doc from blueprint::Leaf.
+ virtual SearchIterator::UP
+ createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda,
+ bool strict) const;
+
+ // Override doc from blueprint::Leaf.
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+
+ virtual void fetchPostings(bool strict);
+};
+
+} // namespace search::queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp
new file mode 100644
index 00000000000..05a2d7d6822
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp
@@ -0,0 +1,263 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".queryeval.parallel_weak_and_search");
+#include "parallel_weak_and_search.h"
+#include <vespa/searchlib/queryeval/monitoring_dump_iterator.h>
+#include <vespa/searchlib/queryeval/monitoring_search_iterator.h>
+#include "wand_parts.h"
+#include <vespa/vespalib/objects/visit.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/queryeval/document_weight_search_iterator.h>
+
+using vespalib::make_string;
+
+namespace search {
+namespace queryeval {
+
+typedef ParallelWeakAndSearch::MatchParams MatchParams;
+typedef ParallelWeakAndSearch::RankParams RankParams;
+
+namespace wand {
+
+namespace { bool should_monitor_wand() { return LOG_WOULD_LOG(spam); } }
+
+
+template <typename VectorizedTerms, typename FutureHeap, typename PastHeap, bool IS_STRICT>
+class ParallelWeakAndSearchImpl : public ParallelWeakAndSearch
+{
+private:
+ fef::TermFieldMatchData &_tfmd;
+ VectorizedTerms _terms;
+ DualHeap<FutureHeap, PastHeap> _heaps;
+ Algorithm _algo;
+ score_t _threshold;
+ score_t _boostedThreshold;
+ const MatchParams _matchParams;
+ std::vector<score_t> _localScores;
+
+ void updateThreshold(score_t newThreshold) {
+ if (newThreshold > _threshold) {
+ _threshold = newThreshold;
+ _boostedThreshold = (newThreshold * _matchParams.thresholdBoostFactor);
+ }
+ }
+
+ void seek_strict(uint32_t docid) {
+ _algo.set_candidate(_terms, _heaps, docid);
+ while (_algo.solve_wand_constraint(_terms, _heaps, GreaterThan(_boostedThreshold))) {
+ if (_algo.check_score(_terms, _heaps, DotProductScorer(), GreaterThan(_threshold))) {
+ setDocId(_algo.get_candidate());
+ return;
+ } else {
+ _algo.set_candidate(_terms, _heaps, _algo.get_candidate() + 1);
+ }
+ }
+ setAtEnd();
+ }
+
+ void seek_unstrict(uint32_t docid) {
+ if (docid > _algo.get_candidate()) {
+ _algo.set_candidate(_terms, _heaps, docid);
+ if (_algo.check_wand_constraint(_terms, _heaps, GreaterThan(_boostedThreshold))) {
+ if (_algo.check_score(_terms, _heaps, DotProductScorer(), GreaterThan(_threshold))) {
+ setDocId(_algo.get_candidate());
+ }
+ }
+ }
+ }
+
+public:
+ ParallelWeakAndSearchImpl(fef::TermFieldMatchData &tfmd,
+ VectorizedTerms &&terms,
+ const MatchParams &matchParams)
+ : _tfmd(tfmd),
+ _terms(std::move(terms)),
+ _heaps(DocIdOrder(_terms.docId()), _terms.size()),
+ _algo(),
+ _threshold(matchParams.scoreThreshold),
+ _boostedThreshold(_threshold * matchParams.thresholdBoostFactor),
+ _matchParams(matchParams),
+ _localScores()
+ {
+ }
+ virtual size_t get_num_terms() const override { return _terms.size(); }
+ virtual int32_t get_term_weight(size_t idx) const override { return _terms.weight(idx); }
+ virtual score_t get_max_score(size_t idx) const override { return _terms.maxScore(idx); }
+ virtual const MatchParams &getMatchParams() const override { return _matchParams; }
+
+ virtual void doSeek(uint32_t docid) override {
+ updateThreshold(_matchParams.scores.getMinScore());
+ if (IS_STRICT) {
+ seek_strict(docid);
+ } else {
+ seek_unstrict(docid);
+ }
+ }
+ virtual void doUnpack(uint32_t docid) override {
+ score_t score = _algo.get_full_score(_terms, _heaps, DotProductScorer());
+ _localScores.push_back(score);
+ if (_localScores.size() == _matchParams.scoresAdjustFrequency) {
+ _matchParams.scores.adjust(&_localScores[0], &_localScores[0] + _localScores.size());
+ _localScores.clear();
+ }
+ _tfmd.setRawScore(docid, score);
+ }
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const override {
+ _terms.visit_members(visitor);
+ }
+ void initRange(uint32_t begin, uint32_t end) override {
+ ParallelWeakAndSearch::initRange(begin, end);
+ _algo.init_range(_terms, _heaps, begin, end);
+ }
+ Trinary is_strict() const override { return IS_STRICT ? Trinary::True : Trinary::False; }
+};
+
+namespace {
+
+wand::Terms
+insertMonitoringSearchIterator(const wand::Terms &terms)
+{
+ wand::Terms retval = terms;
+ for (size_t i = 0; i < terms.size(); ++i) {
+ wand::Term &t = retval[i];
+ t.search = new MonitoringSearchIterator
+ (make_string("w%d:e%u:m%" PRId64 "",
+ t.weight, t.estHits, DotProductScorer::calculateMaxScore(t)),
+ SearchIterator::UP(t.search), true);
+ }
+ return retval;
+}
+
+template <typename FutureHeap, typename PastHeap, bool IS_STRICT>
+SearchIterator *
+createWand(const wand::Terms &terms,
+ const ParallelWeakAndSearch::MatchParams &matchParams,
+ ParallelWeakAndSearch::RankParams &&rankParams)
+{
+ typedef ParallelWeakAndSearchImpl<VectorizedIteratorTerms, FutureHeap, PastHeap, IS_STRICT> WandType;
+ if (should_monitor_wand()) {
+ wand::Terms termsWithMonitoring = insertMonitoringSearchIterator(terms);
+ MonitoringSearchIterator::UP monitoringIterator =
+ MonitoringSearchIterator::UP(new MonitoringSearchIterator
+ (make_string("PWAND(%u,%" PRId64 "),strict=%u",
+ matchParams.scores.getScoresToTrack(),
+ matchParams.scoreThreshold,
+ IS_STRICT),
+ SearchIterator::UP(new WandType(rankParams.rootMatchData,
+ VectorizedIteratorTerms(termsWithMonitoring,
+ DotProductScorer(),
+ matchParams.docIdLimit,
+ std::move(rankParams.childrenMatchData)),
+ matchParams)),
+ false));
+ return new MonitoringDumpIterator(std::move(monitoringIterator));
+ }
+ return new WandType(rankParams.rootMatchData,
+ VectorizedIteratorTerms(terms,
+ DotProductScorer(),
+ matchParams.docIdLimit,
+ std::move(rankParams.childrenMatchData)),
+ matchParams);
+}
+
+} // namespace search::queryeval::wand::<unnamed>
+
+} // namespace search::queryeval::wand
+
+SearchIterator *
+ParallelWeakAndSearch::createArrayWand(const Terms &terms,
+ const MatchParams &matchParams,
+ RankParams &&rankParams,
+ bool strict)
+{
+ if (strict) {
+ return wand::createWand<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, true>(terms, matchParams, std::move(rankParams));
+ } else {
+ return wand::createWand<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, false>(terms, matchParams, std::move(rankParams));
+ }
+}
+
+SearchIterator *
+ParallelWeakAndSearch::createHeapWand(const Terms &terms,
+ const MatchParams &matchParams,
+ RankParams &&rankParams,
+ bool strict)
+{
+ if (strict) {
+ return wand::createWand<vespalib::LeftHeap, vespalib::RightHeap, true>(terms, matchParams, std::move(rankParams));
+ } else {
+ return wand::createWand<vespalib::LeftHeap, vespalib::RightHeap, false>(terms, matchParams, std::move(rankParams));
+ }
+}
+
+SearchIterator *
+ParallelWeakAndSearch::create(const Terms &terms,
+ const MatchParams &matchParams,
+ RankParams &&rankParams,
+ bool strict)
+{
+ if (terms.size() < 128) {
+ return createArrayWand(terms, matchParams, std::move(rankParams), strict);
+ } else {
+ return createHeapWand(terms, matchParams, std::move(rankParams), strict);
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+namespace {
+
+template <typename VectorizedTerms, typename FutureHeap, typename PastHeap>
+SearchIterator::UP create_helper(search::fef::TermFieldMatchData &tfmd, VectorizedTerms &&terms, const MatchParams &params, bool strict) {
+ return (strict)
+ ? SearchIterator::UP(new wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, true>(tfmd, std::move(terms), params))
+ : SearchIterator::UP( new wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, false>(tfmd, std::move(terms), params));
+}
+
+template <typename VectorizedTerms>
+SearchIterator::UP create_helper(search::fef::TermFieldMatchData &tfmd, VectorizedTerms &&terms, const MatchParams &params, bool strict, bool use_array) {
+ return (use_array)
+ ? create_helper<VectorizedTerms, vespalib::LeftArrayHeap, vespalib::RightArrayHeap>(tfmd, std::move(terms), params, strict)
+ : create_helper<VectorizedTerms, vespalib::LeftHeap, vespalib::RightHeap>(tfmd, std::move(terms), params, strict);
+}
+
+} // namespace search::queryeval::<unnamed>
+
+SearchIterator::UP
+ParallelWeakAndSearch::create(search::fef::TermFieldMatchData &tfmd,
+ const MatchParams &matchParams,
+ const std::vector<int32_t> &weights,
+ const std::vector<IDocumentWeightAttribute::LookupResult> &dict_entries,
+ const IDocumentWeightAttribute &attr,
+ bool strict)
+{
+ assert(weights.size() == dict_entries.size());
+ if (!wand::should_monitor_wand()) {
+ wand::VectorizedAttributeTerms terms(weights, dict_entries, attr, wand::DotProductScorer(), matchParams.docIdLimit);
+ return create_helper(tfmd, std::move(terms), matchParams, strict, (weights.size() < 128));
+ } else {
+ // reverse-wrap direct iterators into old API to be compatible with monitoring
+ fef::MatchDataLayout layout;
+ std::vector<fef::TermFieldHandle> handles;
+ for (size_t i = 0; i < weights.size(); ++i) {
+ handles.push_back(layout.allocTermField(tfmd.getFieldId()));
+ }
+ fef::MatchData::UP childrenMatchData = layout.createMatchData();
+ assert(childrenMatchData->getNumTermFields() == dict_entries.size());
+ wand::Terms terms;
+ for (size_t i = 0; i < dict_entries.size(); ++i) {
+ terms.push_back(wand::Term(new DocumentWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]),
+ weights[i],
+ dict_entries[i].posting_size,
+ childrenMatchData->resolveTermField(handles[i])));
+ }
+ assert(terms.size() == dict_entries.size());
+ return SearchIterator::UP(create(terms, matchParams, RankParams(tfmd, std::move(childrenMatchData)), strict));
+ }
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h
new file mode 100644
index 00000000000..68c43844520
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h
@@ -0,0 +1,85 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include "wand_parts.h"
+#include "parallel_weak_and_search.h"
+#include "weak_and_heap.h"
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+
+namespace search {
+namespace queryeval {
+
+/**
+ * WAND search iterator that uses a shared heap between match threads.
+ */
+struct ParallelWeakAndSearch : public SearchIterator
+{
+ typedef wand::score_t score_t;
+ typedef wand::docid_t docid_t;
+
+ /**
+ * Params used to tweak the behavior of the WAND algorithm.
+ */
+ struct MatchParams
+ {
+ WeakAndHeap &scores;
+ score_t scoreThreshold;
+ double thresholdBoostFactor;
+ uint32_t scoresAdjustFrequency;
+ docid_t docIdLimit;
+ MatchParams(WeakAndHeap &scores_,
+ score_t scoreThreshold_,
+ double thresholdBoostFactor_,
+ uint32_t scoresAdjustFrequency_)
+ : scores(scores_),
+ scoreThreshold(scoreThreshold_),
+ thresholdBoostFactor(thresholdBoostFactor_),
+ scoresAdjustFrequency(scoresAdjustFrequency_),
+ docIdLimit(0)
+ {}
+ MatchParams &setDocIdLimit(docid_t value) {
+ docIdLimit = value;
+ return *this;
+ }
+ };
+
+ /**
+ * Params used for rank calculation.
+ */
+ struct RankParams
+ {
+ fef::TermFieldMatchData &rootMatchData;
+ fef::MatchData::UP childrenMatchData;
+ RankParams(fef::TermFieldMatchData &rootMatchData_,
+ fef::MatchData::UP &&childrenMatchData_)
+ : rootMatchData(rootMatchData_),
+ childrenMatchData(std::move(childrenMatchData_))
+ {}
+ };
+
+ typedef wand::Terms Terms;
+
+ virtual size_t get_num_terms() const = 0;
+ virtual int32_t get_term_weight(size_t idx) const = 0;
+ virtual score_t get_max_score(size_t idx) const = 0;
+ virtual const MatchParams &getMatchParams() const = 0;
+
+ static SearchIterator *createArrayWand(const Terms &terms, const MatchParams &matchParams, RankParams &&rankParams, bool strict);
+ static SearchIterator *createHeapWand(const Terms &terms, const MatchParams &matchParams, RankParams &&rankParams, bool strict);
+ static SearchIterator *create(const Terms &terms, const MatchParams &matchParams, RankParams &&rankParams, bool strict);
+
+ static SearchIterator::UP create(search::fef::TermFieldMatchData &tmd,
+ const MatchParams &matchParams,
+ const std::vector<int32_t> &weights,
+ const std::vector<IDocumentWeightAttribute::LookupResult> &dict_entries,
+ const IDocumentWeightAttribute &attr,
+ bool strict);
+};
+
+} // namespace queryeval
+} // namespace search
+
+
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.cpp
new file mode 100644
index 00000000000..c021557a0f5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.cpp
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "wand_parts.h"
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace queryeval {
+namespace wand {
+
+void
+VectorizedIteratorTerms::visit_members(vespalib::ObjectVisitor &visitor) const {
+ visit(visitor, "children", _terms);
+}
+
+} // namespace wand
+} // namespace queryeval
+} // namespace search
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::wand::Term &obj)
+{
+ self.openStruct(name, "search::queryeval::wand::Term");
+ visit(self, "weight", obj.weight);
+ visit(self, "estHits", obj.estHits);
+ visit(self, "search", obj.search);
+ self.closeStruct();
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
new file mode 100644
index 00000000000..f28caa3f529
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
@@ -0,0 +1,615 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <algorithm>
+#include <math.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/iterator_pack.h>
+#include <vespa/searchlib/attribute/iterator_pack.h>
+#include <vespa/vespalib/objects/objectvisitor.h>
+#include <vespa/vespalib/util/array.h>
+#include <vespa/vespalib/util/priority_queue.h>
+#include <vespa/searchlib/attribute/i_document_weight_attribute.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+namespace search {
+namespace queryeval {
+namespace wand {
+
+//-----------------------------------------------------------------------------
+
+struct Term;
+typedef std::vector<Term> Terms;
+typedef int64_t score_t;
+typedef uint32_t docid_t;
+typedef uint16_t ref_t;
+
+typedef IDocumentWeightAttribute Attr;
+typedef Attr::LookupResult AttrDictEntry;
+typedef std::vector<AttrDictEntry> AttrDictEntries;
+
+//-----------------------------------------------------------------------------
+
+/**
+ * Wrapper used to specify underlying terms during setup
+ **/
+struct Term {
+ SearchIterator *search;
+ int32_t weight;
+ uint32_t estHits;
+ fef::TermFieldMatchData *matchData;
+ score_t maxScore = 0.0; // <- only used by rise wand test
+ Term(SearchIterator *s, int32_t w, uint32_t e, fef::TermFieldMatchData *tfmd)
+ : search(s), weight(w), estHits(e), matchData(tfmd) {}
+ Term() : Term(nullptr, 0, 0, nullptr){}
+ Term(SearchIterator *s, int32_t w, uint32_t e) : Term(s, w, e, nullptr) {}
+};
+
+//-----------------------------------------------------------------------------
+
+// input manipulation utilities
+
+namespace {
+
+struct Ident {
+ template <typename T> T operator()(const T &t) const { return t; }
+};
+
+struct NumericOrder {
+ size_t my_size;
+ NumericOrder(size_t my_size_in) : my_size(my_size_in) {}
+ size_t size() const { return my_size; }
+ ref_t operator[](size_t idx) const { return idx; }
+};
+
+template <typename F, typename Order>
+auto assemble(const F &f, const Order &order)->std::vector<decltype(f(0))> {
+ std::vector<decltype(f(0))> result;
+ result.reserve(order.size());
+ for (size_t i = 0; i < order.size(); ++i) {
+ result.push_back(f(order[i]));
+ }
+ return result;
+}
+
+int32_t get_max_weight(const SearchIterator &search) {
+ const MinMaxPostingInfo *minMax = dynamic_cast<const MinMaxPostingInfo *>(search.getPostingInfo());
+ return (minMax != nullptr) ? minMax->getMaxWeight() : std::numeric_limits<int32_t>::max();
+}
+
+} // namespace search::wand::<unnamed>
+
+struct TermInput {
+ const Terms &terms;
+ TermInput(const Terms &terms_in) : terms(terms_in) {}
+ size_t size() const { return terms.size(); }
+ int32_t get_weight(ref_t ref) const { return terms[ref].weight; }
+ uint32_t get_est_hits(ref_t ref) const { return terms[ref].estHits; }
+ int32_t get_max_weight(ref_t ref) const { return ::search::queryeval::wand::get_max_weight(*(terms[ref].search)); }
+ docid_t get_initial_docid(ref_t ref) const { return terms[ref].search->getDocId(); }
+};
+
+struct AttrInput {
+ const std::vector<int32_t> &weights;
+ const std::vector<IDocumentWeightAttribute::LookupResult> &dict_entries;
+ AttrInput(const std::vector<int32_t> &weights_in,
+ const std::vector<IDocumentWeightAttribute::LookupResult> &dict_entries_in)
+ : weights(weights_in), dict_entries(dict_entries_in) {}
+ size_t size() const { return weights.size(); }
+ int32_t get_weight(ref_t ref) const { return weights[ref]; }
+ uint32_t get_est_hits(ref_t ref) const { return dict_entries[ref].posting_size; }
+ int32_t get_max_weight(ref_t ref) const { return dict_entries[ref].max_weight; }
+ docid_t get_initial_docid(ref_t) const { return SearchIterator::beginId(); }
+};
+
+template <typename Input>
+struct MaxSkipOrder {
+ double estNumDocs;
+ const Input &input;
+ const std::vector<score_t> &max_score;
+ MaxSkipOrder(docid_t docIdLimit, const Input &input_in,
+ const std::vector<score_t> &max_score_in)
+ : estNumDocs(1.0), input(input_in), max_score(max_score_in)
+ {
+ estNumDocs = std::max(estNumDocs, docIdLimit - 1.0);
+ for (size_t i = 0; i < input.size(); ++i) {
+ estNumDocs = std::max(estNumDocs, (double)input.get_est_hits(i));
+ }
+ }
+ double p_not_hit(double estHits) const {
+ return ((estNumDocs - estHits) / (estNumDocs));
+ }
+ bool operator()(ref_t a, ref_t b) const {
+ return ((p_not_hit(input.get_est_hits(a)) * max_score[a]) > (p_not_hit(input.get_est_hits(b)) * max_score[b]));
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+namespace {
+
+template <typename ITR, typename F>
+vespalib::string do_stringify(const vespalib::string &title, ITR begin, ITR end, const F &f) {
+ vespalib::string result = vespalib::make_string("[%s]{", title.c_str());
+ for (ITR pos = begin; pos != end; ++pos) {
+ if (pos != begin) {
+ result.append(", ");
+ }
+ result.append(f(*pos));
+ }
+ result.append("}");
+ return result;
+}
+
+} // namespace searchlib::wand::<unnamed>
+
+//-----------------------------------------------------------------------------
+
+template <typename IteratorPack>
+class VectorizedState
+{
+private:
+ std::vector<docid_t> _docId;
+ std::vector<int32_t> _weight;
+ std::vector<score_t> _maxScore;
+ IteratorPack _iteratorPack;
+
+public:
+ VectorizedState() : _docId(), _weight(), _maxScore(), _iteratorPack() {}
+
+ template <typename Scorer, typename Input>
+ std::vector<ref_t> init_state(const Input &input, uint32_t docIdLimit) {
+ std::vector<ref_t> order;
+ std::vector<score_t> max_scores;
+ order.reserve(input.size());
+ max_scores.reserve(input.size());
+ for (size_t i = 0; i < input.size(); ++i) {
+ order.push_back(i);
+ max_scores.push_back(Scorer::calculate_max_score(input, i));
+ }
+ std::sort(order.begin(), order.end(), MaxSkipOrder<Input>(docIdLimit, input, max_scores));
+ _docId = assemble([&input](ref_t ref){ return input.get_initial_docid(ref); }, order);
+ _weight = assemble([&input](ref_t ref){ return input.get_weight(ref); }, order);
+ _maxScore = assemble([&max_scores](ref_t ref){ return max_scores[ref]; }, order);
+ return order;
+ }
+
+ docid_t *docId() { return &(_docId[0]); }
+ const int32_t *weight() const { return &(_weight[0]); }
+ const score_t *maxScore() const { return &(_maxScore[0]); }
+
+ docid_t &docId(ref_t ref) { return _docId[ref]; }
+ int32_t weight(ref_t ref) const { return _weight[ref]; }
+ score_t maxScore(ref_t ref) const { return _maxScore[ref]; }
+
+ size_t size() const { return _docId.size(); }
+ IteratorPack &iteratorPack() { return _iteratorPack; }
+
+ uint32_t seek(uint16_t ref, uint32_t docid) { return _iteratorPack.seek(ref, docid); }
+ int32_t get_weight(uint16_t ref, uint32_t docid) { return _iteratorPack.get_weight(ref, docid); }
+
+ vespalib::string stringify_docid() const {
+ auto range = assemble(Ident(), NumericOrder(_docId.size()));
+ return do_stringify("state{docid}", range.begin(), range.end(),
+ [this](ref_t ref)
+ {
+ return vespalib::make_string("%u:%u/%u", ref, _docId[ref], _iteratorPack.get_docid(ref));
+ });
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+class VectorizedIteratorTerms : public VectorizedState<SearchIteratorPack>
+{
+private:
+ Terms _terms; // TODO: want to get rid of this
+
+public:
+ template <typename Scorer>
+ VectorizedIteratorTerms(const Terms &t, const Scorer &, uint32_t docIdLimit,
+ fef::MatchData::UP childrenMatchData)
+ : _terms()
+ {
+ std::vector<ref_t> order = init_state<Scorer>(TermInput(t), docIdLimit);
+ _terms = assemble([&t](ref_t ref){ return t[ref]; }, order);
+ iteratorPack() = SearchIteratorPack(assemble([&t](ref_t ref){ return t[ref].search; }, order),
+ assemble([&t](ref_t ref){ return t[ref].matchData; }, order),
+ std::move(childrenMatchData));
+ }
+ void unpack(uint16_t ref, uint32_t docid) { iteratorPack().unpack(ref, docid); }
+ void visit_members(vespalib::ObjectVisitor &visitor) const;
+ const Terms &input_terms() const { return _terms; }
+};
+
+//-----------------------------------------------------------------------------
+
+struct VectorizedAttributeTerms : VectorizedState<AttributeIteratorPack> {
+ template <typename Scorer>
+ VectorizedAttributeTerms(const std::vector<int32_t> &weights,
+ const std::vector<IDocumentWeightAttribute::LookupResult> &dict_entries,
+ const IDocumentWeightAttribute &attr,
+ const Scorer &,
+ docid_t docIdLimit)
+ {
+ std::vector<ref_t> order = init_state<Scorer>(AttrInput(weights, dict_entries), docIdLimit);
+ std::vector<DocumentWeightIterator> iterators;
+ iterators.reserve(order.size());
+ for (size_t i = 0; i < order.size(); ++i) {
+ attr.create(dict_entries[order[i]].posting_idx, iterators);
+ docId(i) = (iterators.back().valid()) ? iterators.back().getKey() : search::endDocId;
+ }
+ iteratorPack() = AttributeIteratorPack(std::move(iterators));
+ }
+ void visit_members(vespalib::ObjectVisitor &) const {}
+};
+
+//-----------------------------------------------------------------------------
+
+/**
+ * Comparator used on vectorized state to sort by increasing document
+ * id
+ **/
+struct DocIdOrder {
+ const docid_t *termPos;
+ DocIdOrder(docid_t *pos) : termPos(pos) {}
+ bool at_end(ref_t ref) const { return termPos[ref] == search::endDocId; }
+ docid_t get_pos(ref_t ref) const { return termPos[ref]; }
+ bool operator()(ref_t a, ref_t b) const {
+ return (termPos[a] < termPos[b]);
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+template <typename FutureHeap, typename PastHeap>
+class DualHeap
+{
+private:
+ DocIdOrder _futureCmp;
+ std::vector<ref_t> _space;
+ ref_t *_future; // start of future heap
+ ref_t *_present; // start of present array
+ ref_t *_past; // start of past heap
+ ref_t *_trash; // end of used data
+ size_t _size;
+
+public:
+ DualHeap(const DocIdOrder &futureCmp, size_t size)
+ : _futureCmp(futureCmp), _space(), _future(nullptr), _present(nullptr), _past(nullptr), _trash(nullptr), _size(size)
+ {
+ FutureHeap::require_left_heap();
+ PastHeap::require_right_heap();
+ _space.reserve(size);
+ init();
+ }
+ void init() {
+ _space.clear();
+ _future = &(_space[0]);
+ _present = _future;
+ for (size_t i = 0; i < _size; ++i) {
+ if (!_futureCmp.at_end(i)) {
+ _space.push_back(i);
+ FutureHeap::push(_future, ++_present, _futureCmp);
+ }
+ }
+ _past = _present;
+ _trash = _past;
+ assert(_future == &(_space[0])); // space has not moved
+ }
+ bool has_future() const { return (_future != _present);}
+ bool has_present() const { return (_present != _past);}
+ bool has_past() const { return (_past != _trash);}
+ ref_t future() const { return FutureHeap::front(_future, _present); }
+ ref_t first_present() const { return *_present; }
+ ref_t last_present() const { return *(_past - 1); }
+ void swap_presents() { std::swap(*_present, *(_past - 1)); }
+ void push_future() { FutureHeap::push(_future, ++_present, _futureCmp); }
+ void pop_future() { FutureHeap::pop(_future, _present--, _futureCmp); }
+ void push_past() { PastHeap::push(--_past, _trash, std::less<ref_t>()); }
+ void pop_past() { PastHeap::pop(_past++, _trash, std::less<ref_t>()); }
+ void pop_any_past() { _past++; }
+ void discard_last_present() {
+ memmove((_past - 1), _past,
+ (_trash - _past) * sizeof(ref_t));
+ --_past;
+ --_trash;
+ }
+ ref_t *present_begin() const { return _present; }
+ ref_t *present_end() const { return _past; }
+ vespalib::string stringify() const {
+ return "Heaps: "
+ + do_stringify("future", _future, _present,
+ [this](ref_t ref){ return vespalib::make_string("%u@%u", ref, _futureCmp.get_pos(ref)); })
+ + " " + do_stringify("present", _present, _past,
+ [this](ref_t ref){ return vespalib::make_string("%u@%u", ref, _futureCmp.get_pos(ref)); })
+ + " " + do_stringify("past", _past, _trash,
+ [this](ref_t ref){ return vespalib::make_string("%u@%u", ref, _futureCmp.get_pos(ref)); });
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+#define TermFrequencyScorer_TERM_SCORE_FACTOR 1000000.0
+
+/**
+ * Scorer used with WeakAndAlgorithm that calculates a pseudo term frequency
+ * as max score and regular score for a term.
+ */
+struct TermFrequencyScorer
+{
+ // weight * idf, scaled to fixedpoint
+ static score_t calculateMaxScore(double estHits, double weight) {
+ return (score_t) (TermFrequencyScorer_TERM_SCORE_FACTOR * weight / (1.0 + log(1.0 + (estHits / 1000.0))));
+ }
+
+ static score_t calculateMaxScore(const Term &term) {
+ return calculateMaxScore(term.estHits, term.weight) + 1;
+ }
+
+ template <typename Input>
+ static score_t calculate_max_score(const Input &input, ref_t ref) {
+ return calculateMaxScore(input.get_est_hits(ref), input.get_weight(ref)) + 1;
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+/**
+ * Scorer used with WeakAndAlgorithm that calculates a real dot product upper
+ * bound as max score and dot product component score per term.
+ */
+struct DotProductScorer
+{
+ static score_t calculateMaxScore(const Term &term) {
+ int32_t maxWeight = std::numeric_limits<int32_t>::max();
+ const PostingInfo *postingInfo = term.search->getPostingInfo();
+ if (postingInfo != NULL) {
+ const MinMaxPostingInfo *minMax = dynamic_cast<const MinMaxPostingInfo *>(postingInfo);
+ if (minMax != NULL) {
+ maxWeight = minMax->getMaxWeight();
+ }
+ }
+ return (score_t)term.weight * maxWeight;
+ }
+
+ template <typename Input>
+ static score_t calculate_max_score(const Input &input, ref_t ref) {
+ return input.get_weight(ref) * (score_t) input.get_max_weight(ref);
+ }
+
+ static score_t calculateScore(const Term &term, docid_t docId) {
+ term.search->doUnpack(docId);
+ return (score_t)term.weight * term.matchData->getWeight();
+ }
+
+ template <typename VectorizedTerms>
+ static score_t calculateScore(VectorizedTerms &terms, ref_t ref, docid_t docId) {
+ return terms.weight(ref) * (score_t)terms.get_weight(ref, docId);
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+// used with parallel wand where we can safely discard hits based on score
+struct GreaterThan {
+ score_t threshold;
+ GreaterThan(score_t t) : threshold(t) {}
+ bool operator()(score_t score) const { return (score > threshold); }
+};
+
+// used with old-style vespa wand to ensure at least AND'ish results
+struct GreaterThanEqual {
+ score_t threshold;
+ GreaterThanEqual(score_t t) : threshold(t) {}
+ bool operator()(score_t score) const { return (score >= threshold); }
+};
+
+//-----------------------------------------------------------------------------
+
+class Algorithm
+{
+private:
+ docid_t _candidate;
+ score_t _upperBound;
+ score_t _maxUpperBound;
+ score_t _partial_score;
+
+ template <typename VectorizedTerms>
+ bool step_term(VectorizedTerms &terms, ref_t ref) {
+ terms.docId(ref) = terms.seek(ref, _candidate);
+ return (terms.docId(ref) == _candidate);
+ }
+
+ template <typename VectorizedTerms, typename Heaps>
+ void evict_last_present(VectorizedTerms &terms, Heaps &heaps) {
+ _maxUpperBound -= terms.maxScore(heaps.last_present());
+ if (terms.docId(heaps.last_present()) != search::endDocId) {
+ heaps.swap_presents();
+ heaps.push_future();
+ } else {
+ heaps.discard_last_present();
+ }
+ }
+
+ template <typename Heaps>
+ void discard_candidate(Heaps &heaps) {
+ while (heaps.has_present()) {
+ heaps.push_past();
+ }
+ _upperBound = 0;
+ }
+
+ template <typename VectorizedTerms, typename Heaps>
+ void step_optimal_term(VectorizedTerms &terms, Heaps &heaps) {
+ heaps.pop_past();
+ if (step_term(terms, heaps.last_present())) {
+ _upperBound += terms.maxScore(heaps.last_present());
+ } else {
+ evict_last_present(terms, heaps);
+ }
+ }
+
+ template <typename VectorizedTerms, typename Heaps>
+ void step_candidate(VectorizedTerms &terms, Heaps &heaps) {
+ discard_candidate(heaps); // will reset upper bound
+ _candidate = terms.docId(heaps.future());
+ do {
+ heaps.pop_future();
+ _upperBound += terms.maxScore(heaps.first_present());
+ } while (heaps.has_future() && terms.docId(heaps.future()) == _candidate);
+ _maxUpperBound += _upperBound;
+ }
+
+ template <typename VectorizedTerms, typename Heaps, typename Scorer, typename AboveThreshold>
+ bool check_present_score(VectorizedTerms &terms, Heaps &heaps, score_t &max_score, const Scorer &, AboveThreshold &&aboveThreshold) {
+ ref_t *end = heaps.present_end();
+ for (ref_t *ref = heaps.present_begin(); ref != end; ++ref) {
+ score_t term_score = Scorer::calculateScore(terms, *ref, _candidate);
+ _partial_score += term_score;
+ max_score -= (terms.maxScore(*ref) - term_score);
+ if (!aboveThreshold(max_score)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ template <typename VectorizedTerms, typename Heaps, typename Scorer, typename AboveThreshold>
+ bool check_past_score(VectorizedTerms &terms, Heaps &heaps, score_t &max_score, const Scorer &, AboveThreshold &&aboveThreshold) {
+ while (heaps.has_past() && !aboveThreshold(_partial_score)) {
+ heaps.pop_past();
+ if (step_term(terms, heaps.last_present())) {
+ score_t term_score = Scorer::calculateScore(terms, heaps.last_present(), _candidate);
+ _partial_score += term_score;
+ max_score -= (terms.maxScore(heaps.last_present()) - term_score);
+ } else {
+ max_score -= terms.maxScore(heaps.last_present());
+ evict_last_present(terms, heaps);
+ }
+ if (!aboveThreshold(max_score)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ void reset() {
+ _candidate = SearchIterator::beginId();
+ _upperBound = 0;
+ _maxUpperBound = 0;
+ _partial_score = 0;
+ }
+
+public:
+ Algorithm() : _candidate(SearchIterator::beginId()), _upperBound(0), _maxUpperBound(0), _partial_score(0) {}
+
+ template <typename VectorizedTerms, typename Heaps>
+ void init_range(VectorizedTerms &terms, Heaps &heaps, uint32_t begin_id, uint32_t end_id) {
+ reset();
+ terms.iteratorPack().initRange(begin_id, end_id);
+ for (size_t i = 0; i < terms.size(); ++i) {
+ terms.docId(i) = terms.iteratorPack().get_docid(i);
+ }
+ heaps.init();
+ }
+
+ docid_t get_candidate() const { return _candidate; }
+ score_t get_upper_bound() const { return _upperBound; }
+
+ template <typename VectorizedTerms, typename Heaps>
+ void set_candidate(VectorizedTerms &terms, Heaps &heaps, docid_t candidate) {
+ _candidate = candidate;
+ while (heaps.has_future() && terms.docId(heaps.future()) < candidate) {
+ heaps.pop_future();
+ _maxUpperBound += terms.maxScore(heaps.first_present());
+ }
+ discard_candidate(heaps); // will reset upper bound
+ while (heaps.has_future() && terms.docId(heaps.future()) == candidate) {
+ heaps.pop_future();
+ _upperBound += terms.maxScore(heaps.first_present());
+ }
+ _maxUpperBound += _upperBound;
+ }
+
+ template <typename VectorizedTerms, typename Heaps, typename AboveThreshold>
+ bool solve_wand_constraint(VectorizedTerms &terms, Heaps &heaps, AboveThreshold &&aboveThreshold) {
+ while (!aboveThreshold(_upperBound)) {
+ if (aboveThreshold(_maxUpperBound)) {
+ step_optimal_term(terms, heaps);
+ } else if (heaps.has_future()) {
+ step_candidate(terms, heaps);
+ } else {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ template <typename VectorizedTerms, typename Heaps, typename AboveThreshold>
+ bool check_wand_constraint(VectorizedTerms &terms, Heaps &heaps, AboveThreshold &&aboveThreshold) {
+ while (!aboveThreshold(_upperBound)) {
+ if (aboveThreshold(_maxUpperBound)) {
+ step_optimal_term(terms, heaps);
+ } else {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ template <typename VectorizedTerms, typename Heaps, typename Scorer, typename AboveThreshold>
+ bool check_score(VectorizedTerms &terms, Heaps &heaps, Scorer &&scorer, AboveThreshold &&aboveThreshold) {
+ _partial_score = 0;
+ score_t max_score = _maxUpperBound;
+ if (check_present_score(terms, heaps, max_score, scorer, aboveThreshold)) {
+ if (check_past_score(terms, heaps, max_score, scorer, aboveThreshold)) {
+ return aboveThreshold(_partial_score);
+ }
+ }
+ return false;
+ }
+
+ template <typename VectorizedTerms, typename Heaps, typename Scorer>
+ score_t get_full_score(VectorizedTerms &terms, Heaps &heaps, Scorer &&) {
+ score_t score = _partial_score;
+ while (heaps.has_past()) {
+ heaps.pop_any_past();
+ if (step_term(terms, heaps.last_present())) {
+ score += Scorer::calculateScore(terms, heaps.last_present(), _candidate);
+ } else {
+ evict_last_present(terms, heaps);
+ }
+ }
+ return score;
+ }
+
+ template <typename VectorizedTerms, typename Heaps>
+ void find_matching_terms(VectorizedTerms &terms, Heaps &heaps) {
+ while (heaps.has_past()) {
+ heaps.pop_any_past();
+ if (step_term(terms, heaps.last_present())) {
+ _upperBound += terms.maxScore(heaps.last_present());
+ } else {
+ evict_last_present(terms, heaps);
+ }
+ }
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace wand
+} // namespace queryeval
+} // namespace search
+
+//-----------------------------------------------------------------------------
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::wand::Term &obj);
+
+//-----------------------------------------------------------------------------
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp
new file mode 100644
index 00000000000..e8e149da476
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".queryeval.weak_and_heap");
+#include "weak_and_heap.h"
+#include <limits>
+
+namespace search {
+namespace queryeval {
+
+SharedWeakAndPriorityQueue::SharedWeakAndPriorityQueue(uint32_t scoresToTrack) :
+ WeakAndHeap(scoresToTrack),
+ _bestScores(),
+ _lock()
+{
+ _bestScores.reserve(scoresToTrack);
+}
+
+void
+SharedWeakAndPriorityQueue::adjust(score_t *begin, score_t *end)
+{
+ if (getScoresToTrack() == 0) {
+ return;
+ }
+ vespalib::LockGuard guard(_lock);
+ for (score_t *itr = begin; itr != end; ++itr) {
+ score_t score = *itr;
+ if (!is_full()) {
+ _bestScores.push(score);
+ } else if (_bestScores.front() < score) {
+ _bestScores.push(score);
+ _bestScores.pop_front();
+ }
+ }
+ if (is_full()) {
+ setMinScore(_bestScores.front());
+ }
+}
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h
new file mode 100644
index 00000000000..7208dca2dbe
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "wand_parts.h"
+#include <vespa/vespalib/util/priority_queue.h>
+#include <vespa/vespalib/util/sync.h>
+
+namespace search {
+namespace queryeval {
+
+/**
+ * An interface used to insert scores into an underlying heap (or similar data structure)
+ * that can be shared between multiple search iterators.
+ * An implementation of this interface must keep the best N scores and
+ * provide the threshold score (lowest score among the best N).
+ */
+class WeakAndHeap {
+public:
+ typedef wand::score_t score_t;
+ WeakAndHeap(uint32_t scoresToTrack) :
+ _minScore((scoresToTrack == 0)
+ ? std::numeric_limits<score_t>::max()
+ : 0),
+ _scoresToTrack(scoresToTrack)
+ { }
+ virtual ~WeakAndHeap() {}
+ /**
+ * Consider the given scores for insertion into the underlying structure.
+ * The implementation may change the given score array to speed up execution.
+ */
+ virtual void adjust(score_t *begin, score_t *end) = 0;
+
+ /**
+ * The number of scores this heap is tracking.
+ **/
+ uint32_t getScoresToTrack() const { return _scoresToTrack; }
+
+ score_t getMinScore() const { return _minScore; }
+protected:
+ void setMinScore(score_t minScore) { _minScore = minScore; }
+private:
+ score_t _minScore;
+ const uint32_t _scoresToTrack;
+};
+
+/**
+ * An implementation using an underlying priority queue to keep track of the N
+ * best hits that can be shared among multiple search iterators.
+ */
+class SharedWeakAndPriorityQueue : public WeakAndHeap
+{
+private:
+ typedef vespalib::PriorityQueue<score_t> Scores;
+ Scores _bestScores;
+ vespalib::Lock _lock;
+
+ bool is_full() const { return (_bestScores.size() >= getScoresToTrack()); }
+
+public:
+ SharedWeakAndPriorityQueue(uint32_t scoresToTrack);
+ Scores &getScores() { return _bestScores; }
+ void adjust(score_t *begin, score_t *end) override;
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
new file mode 100644
index 00000000000..988be3f6ba9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
@@ -0,0 +1,143 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "wand_parts.h"
+#include "weak_and_search.h"
+#include <vespa/searchlib/queryeval/orsearch.h>
+#include <vespa/vespalib/util/left_right_heap.h>
+#include <vespa/vespalib/util/priority_queue.h>
+#include <vespa/vespalib/objects/visit.h>
+
+namespace search {
+namespace queryeval {
+namespace wand {
+
+template <typename FutureHeap, typename PastHeap, bool IS_STRICT>
+class WeakAndSearchLR : public WeakAndSearch
+{
+private:
+ typedef vespalib::PriorityQueue<score_t> Scores;
+
+ VectorizedIteratorTerms _terms;
+ DualHeap<FutureHeap, PastHeap> _heaps;
+ Algorithm _algo;
+ score_t _threshold; // current score threshold
+ Scores _scores; // best n scores
+ const uint32_t _n;
+
+ void seek_strict(uint32_t docid) {
+ _algo.set_candidate(_terms, _heaps, docid);
+ if (_algo.solve_wand_constraint(_terms, _heaps, GreaterThanEqual(_threshold))) {
+ setDocId(_algo.get_candidate());
+ } else {
+ setAtEnd();
+ }
+ }
+
+ void seek_unstrict(uint32_t docid) {
+ if (docid > _algo.get_candidate()) {
+ _algo.set_candidate(_terms, _heaps, docid);
+ if (_algo.check_wand_constraint(_terms, _heaps, GreaterThanEqual(_threshold))) {
+ setDocId(_algo.get_candidate());
+ }
+ }
+ }
+
+public:
+ WeakAndSearchLR(const Terms &terms, uint32_t n)
+ : _terms(terms,
+ TermFrequencyScorer(),
+ 0,
+ fef::MatchData::UP(nullptr)),
+ _heaps(DocIdOrder(_terms.docId()), _terms.size()),
+ _algo(),
+ _threshold(1),
+ _scores(),
+ _n(n)
+ {
+ }
+ virtual size_t get_num_terms() const override { return _terms.size(); }
+ virtual int32_t get_term_weight(size_t idx) const override { return _terms.weight(idx); }
+ virtual score_t get_max_score(size_t idx) const override { return _terms.maxScore(idx); }
+ const Terms &getTerms() const { return _terms.input_terms(); }
+ uint32_t getN() const { return _n; }
+ void doSeek(uint32_t docid) override {
+ if (IS_STRICT) {
+ seek_strict(docid);
+ } else {
+ seek_unstrict(docid);
+ }
+ }
+ void doUnpack(uint32_t docid) override {
+ _algo.find_matching_terms(_terms, _heaps);
+ _scores.push(_algo.get_upper_bound());
+ if (_scores.size() > _n) {
+ _scores.pop_front();
+ }
+ if (_scores.size() == _n) {
+ _threshold = _scores.front();
+ }
+ ref_t *end = _heaps.present_end();
+ for (ref_t *ref = _heaps.present_begin(); ref != end; ++ref) {
+ _terms.unpack(*ref, docid);
+ }
+ }
+ void initRange(uint32_t begin, uint32_t end) override {
+ WeakAndSearch::initRange(begin, end);
+ _algo.init_range(_terms, _heaps, begin, end);
+ if (_n == 0) {
+ setAtEnd();
+ }
+ }
+ Trinary is_strict() const override { return IS_STRICT ? Trinary::True : Trinary::False; }
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace search::queryeval::wand
+
+//-----------------------------------------------------------------------------
+
+void
+WeakAndSearch::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ visit(visitor, "n", getN());
+ visit(visitor, "terms", getTerms());
+}
+
+//-----------------------------------------------------------------------------
+
+SearchIterator *
+WeakAndSearch::createArrayWand(const Terms &terms, uint32_t n, bool strict)
+{
+ if (strict) {
+ return new wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, true>(terms, n);
+ } else {
+ return new wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, false>(terms, n);
+ }
+}
+
+SearchIterator *
+WeakAndSearch::createHeapWand(const Terms &terms, uint32_t n, bool strict)
+{
+ if (strict) {
+ return new wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, true>(terms, n);
+ } else {
+ return new wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, false>(terms, n);
+ }
+}
+
+SearchIterator *
+WeakAndSearch::create(const Terms &terms, uint32_t n, bool strict)
+{
+ if (terms.size() < 128) {
+ return createArrayWand(terms, n, strict);
+ } else {
+ return createHeapWand(terms, n, strict);
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+} // namespace queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h
new file mode 100644
index 00000000000..814e84c2d79
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include "wand_parts.h"
+
+namespace search {
+namespace queryeval {
+
+struct WeakAndSearch : SearchIterator {
+ typedef wand::Terms Terms;
+ virtual size_t get_num_terms() const = 0;
+ virtual int32_t get_term_weight(size_t idx) const = 0;
+ virtual wand::score_t get_max_score(size_t idx) const = 0;
+ virtual const Terms &getTerms() const = 0;
+ virtual uint32_t getN() const = 0;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+ static SearchIterator *createArrayWand(const Terms &terms, uint32_t n, bool strict);
+ static SearchIterator *createHeapWand(const Terms &terms, uint32_t n, bool strict);
+ static SearchIterator *create(const Terms &terms, uint32_t n, bool strict);
+};
+
+} // namespace queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp
new file mode 100644
index 00000000000..d572fd5f48b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp
@@ -0,0 +1,89 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".queryeval.weighted_set_term.blueprint");
+
+#include "weighted_set_term_blueprint.h"
+#include "weighted_set_term_search.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/vespalib/objects/visit.h>
+#include <algorithm>
+
+namespace search {
+namespace queryeval {
+
+WeightedSetTermBlueprint::WeightedSetTermBlueprint(const FieldSpec &field)
+ : ComplexLeafBlueprint(field),
+ _estimate(),
+ _weights(),
+ _terms()
+{
+}
+
+WeightedSetTermBlueprint::~WeightedSetTermBlueprint()
+{
+ while (!_terms.empty()) {
+ delete _terms.back();
+ _terms.pop_back();
+ }
+}
+
+void
+WeightedSetTermBlueprint::addTerm(Blueprint::UP term, int32_t weight)
+{
+ HitEstimate childEst = term->getState().estimate();
+ if (! childEst.empty) {
+ if (_estimate.empty) {
+ _estimate = childEst;
+ } else {
+ _estimate.estHits += childEst.estHits;
+ }
+ setEstimate(_estimate);
+ }
+ _weights.push_back(weight);
+ _terms.push_back(term.get());
+ term.release();
+}
+
+SearchIterator::UP
+WeightedSetTermBlueprint::createSearch(search::fef::MatchData &md,
+ bool) const
+{
+ const State &state = getState();
+ assert(state.numFields() == 1);
+ search::fef::TermFieldMatchData &tfmd = *state.field(0).resolve(md);
+
+ std::vector<SearchIterator*> children(_terms.size());
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ children[i] = _terms[i]->createSearch(md, true).release();
+ }
+ return SearchIterator::UP(WeightedSetTermSearch::create(children, tfmd, _weights));
+}
+
+void
+WeightedSetTermBlueprint::fetchPostings(bool strict)
+{
+ (void) strict;
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ _terms[i]->fetchPostings(true);
+ }
+}
+
+void
+WeightedSetTermBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ LeafBlueprint::visitMembers(visitor);
+ visit(visitor, "_weights", _weights);
+ visit(visitor, "_terms", _terms);
+}
+
+SearchIterator::UP
+WeightedSetTermBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &, bool) const
+{
+ abort();
+}
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h
new file mode 100644
index 00000000000..8d7916b8f42
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h
@@ -0,0 +1,47 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "searchable.h"
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <memory>
+#include <vector>
+
+namespace search {
+namespace fef { class TermFieldMatchData; }
+
+namespace queryeval {
+
+class WeightedSetTermBlueprint : public ComplexLeafBlueprint
+{
+ HitEstimate _estimate;
+ std::vector<int32_t> _weights;
+ std::vector<Blueprint*> _terms;
+
+ WeightedSetTermBlueprint(const WeightedSetTermBlueprint &); // disabled
+ WeightedSetTermBlueprint &operator=(const WeightedSetTermBlueprint &); // disabled
+
+public:
+ WeightedSetTermBlueprint(const FieldSpec &field);
+ ~WeightedSetTermBlueprint();
+
+ // used by create visitor
+ // matches signature in dot product blueprint for common blueprint
+ // building code. Hands out its own field spec to children. NOTE:
+ // this is only ok since children will never be unpacked.
+ FieldSpec getNextChildField(const FieldSpec &outer) { return outer; }
+
+ // used by create visitor
+ void addTerm(Blueprint::UP term, int32_t weight);
+
+ SearchIterator::UP createSearch(search::fef::MatchData &md, bool strict) const override;
+ void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+
+private:
+ SearchIterator::UP createLeafSearch(const search::fef::TermFieldMatchDataArray &, bool) const override;
+ void fetchPostings(bool strict) override;
+};
+
+} // namespace search::queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
new file mode 100644
index 00000000000..fe8c3273153
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
@@ -0,0 +1,161 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".queryeval.weighted_set_term.search");
+
+#include "weighted_set_term_search.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/vespalib/objects/visit.h>
+#include <algorithm>
+#include <functional>
+#include "iterator_pack.h"
+
+using search::fef::TermFieldMatchData;
+using vespalib::ObjectVisitor;
+
+namespace search {
+namespace queryeval {
+
+template <typename HEAP, typename IteratorPack>
+class WeightedSetTermSearchImpl : public WeightedSetTermSearch
+{
+private:
+ typedef uint32_t ref_t;
+
+ struct CmpDocId {
+ const uint32_t *termPos;
+ CmpDocId(const uint32_t *tp) : termPos(tp) {}
+ bool operator()(const ref_t &a, const ref_t &b) const {
+ return (termPos[a] < termPos[b]);
+ }
+ };
+
+ struct CmpWeight {
+ const int32_t *weight;
+ CmpWeight(const int32_t *w) : weight(w) {}
+ bool operator()(const ref_t &a, const ref_t &b) const {
+ return (weight[a] > weight[b]);
+ }
+ };
+
+ fef::TermFieldMatchData &_tmd;
+ std::vector<int32_t> _weights;
+ std::vector<uint32_t> _termPos;
+ CmpDocId _cmpDocId;
+ CmpWeight _cmpWeight;
+ std::vector<ref_t> _data_space;
+ ref_t *_data_begin;
+ ref_t *_data_stash;
+ ref_t *_data_end;
+ IteratorPack _children;
+
+ void seek_child(ref_t child, uint32_t docId) {
+ _termPos[child] = _children.seek(child, docId);
+ }
+
+public:
+ WeightedSetTermSearchImpl(search::fef::TermFieldMatchData &tmd,
+ const std::vector<int32_t> &weights,
+ IteratorPack &&iteratorPack)
+ : _tmd(tmd),
+ _weights(weights),
+ _termPos(weights.size()),
+ _cmpDocId(&_termPos[0]),
+ _cmpWeight(&_weights[0]),
+ _data_space(),
+ _data_begin(nullptr),
+ _data_stash(nullptr),
+ _data_end(nullptr),
+ _children(std::move(iteratorPack))
+ {
+ HEAP::require_left_heap();
+ assert(_children.size() > 0);
+ assert(_children.size() == _weights.size());
+ _data_space.reserve(_children.size());
+ for (size_t i = 0; i < _children.size(); ++i) {
+ _data_space.push_back(i);
+ }
+ _data_begin = &_data_space[0];
+ _data_end = _data_begin + _data_space.size();
+ }
+
+ void doSeek(uint32_t docId) override {
+ while (_data_stash < _data_end) {
+ seek_child(*_data_stash, docId);
+ HEAP::push(_data_begin, ++_data_stash, _cmpDocId);
+ }
+ while (_termPos[HEAP::front(_data_begin, _data_stash)] < docId) {
+ seek_child(HEAP::front(_data_begin, _data_stash), docId);
+ HEAP::adjust(_data_begin, _data_stash, _cmpDocId);
+ }
+ setDocId(_termPos[HEAP::front(_data_begin, _data_stash)]);
+ }
+
+ void doUnpack(uint32_t docId) override {
+ _tmd.reset(docId);
+ while ((_data_begin < _data_stash) &&
+ _termPos[HEAP::front(_data_begin, _data_stash)] == docId)
+ {
+ HEAP::pop(_data_begin, _data_stash--, _cmpDocId);
+ }
+ std::sort(_data_stash, _data_end, _cmpWeight);
+ for (ref_t *ptr = _data_stash; ptr < _data_end; ++ptr) {
+ fef::TermFieldMatchDataPosition pos;
+ pos.setElementWeight(_weights[*ptr]);
+ _tmd.appendPosition(pos);
+ }
+ }
+
+ void initRange(uint32_t begin, uint32_t end) override {
+ WeightedSetTermSearch::initRange(begin, end);
+ _children.initRange(begin, end);
+ for (size_t i = 0; i < _children.size(); ++i) {
+ _termPos[i] = _children.get_docid(i);
+ }
+ _data_stash = _data_begin;
+ while (_data_stash < _data_end) {
+ HEAP::push(_data_begin, ++_data_stash, _cmpDocId);
+ }
+ }
+ Trinary is_strict() const override { return Trinary::True; }
+
+ void visitMembers(vespalib::ObjectVisitor &) const override {}
+};
+
+//-----------------------------------------------------------------------------
+
+SearchIterator *
+WeightedSetTermSearch::create(const std::vector<SearchIterator*> &children,
+ TermFieldMatchData &tmd,
+ const std::vector<int32_t> &weights)
+{
+ typedef WeightedSetTermSearchImpl<vespalib::LeftArrayHeap, SearchIteratorPack> ArrayHeapImpl;
+ typedef WeightedSetTermSearchImpl<vespalib::LeftHeap, SearchIteratorPack> HeapImpl;
+
+ if (children.size() < 128) {
+ return new ArrayHeapImpl(tmd, weights, SearchIteratorPack(children));
+ }
+ return new HeapImpl(tmd, weights, SearchIteratorPack(children));
+}
+
+//-----------------------------------------------------------------------------
+
+SearchIterator::UP
+WeightedSetTermSearch::create(search::fef::TermFieldMatchData &tmd,
+ const std::vector<int32_t> &weights,
+ std::vector<DocumentWeightIterator> &&iterators)
+{
+ typedef WeightedSetTermSearchImpl<vespalib::LeftArrayHeap, AttributeIteratorPack> ArrayHeapImpl;
+ typedef WeightedSetTermSearchImpl<vespalib::LeftHeap, AttributeIteratorPack> HeapImpl;
+
+ if (iterators.size() < 128) {
+ return SearchIterator::UP(new ArrayHeapImpl(tmd, weights, AttributeIteratorPack(std::move(iterators))));
+ }
+ return SearchIterator::UP(new HeapImpl(tmd, weights, AttributeIteratorPack(std::move(iterators))));
+}
+
+//-----------------------------------------------------------------------------
+
+} // namespace search::queryeval
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
new file mode 100644
index 00000000000..536d13836ff
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "multisearch.h"
+#include <vespa/vespalib/util/priority_queue.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/attribute/iterator_pack.h>
+#include <memory>
+#include <vector>
+
+namespace search {
+namespace fef {
+class TermFieldMatchData;
+} // namespace fef
+
+namespace queryeval {
+
+/**
+ * Search iterator for a weighted set, based on a set of child search
+ * iterators.
+ */
+class WeightedSetTermSearch : public SearchIterator
+{
+protected:
+ WeightedSetTermSearch() {}
+
+public:
+ static SearchIterator* create(const std::vector<SearchIterator*> &children,
+ search::fef::TermFieldMatchData &tmd,
+ const std::vector<int32_t> &weights);
+
+ static SearchIterator::UP create(search::fef::TermFieldMatchData &tmd,
+ const std::vector<int32_t> &weights,
+ std::vector<DocumentWeightIterator> &&iterators);
+};
+
+} // namespace search::queryeval
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/test/.gitignore b/searchlib/src/vespa/searchlib/test/.gitignore
new file mode 100644
index 00000000000..ee8938b6bf4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/.gitignore
@@ -0,0 +1,6 @@
+*.So
+*.exe
+*.ilk
+*.pdb
+.depend*
+Makefile
diff --git a/searchlib/src/vespa/searchlib/test/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/CMakeLists.txt
new file mode 100644
index 00000000000..6b23f41a34a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/CMakeLists.txt
@@ -0,0 +1,12 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_test
+ SOURCES
+ statefile.cpp
+ statestring.cpp
+ initrange.cpp
+ document_weight_attribute_helper.cpp
+ $<TARGET_OBJECTS:searchlib_fakedata>
+ $<TARGET_OBJECTS:searchlib_searchlib_test_diskindex>
+ DEPENDS
+ searchlib_searchlib_test_memoryindex
+)
diff --git a/searchlib/src/vespa/searchlib/test/OWNERS b/searchlib/src/vespa/searchlib/test/OWNERS
new file mode 100644
index 00000000000..64735d11d93
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/OWNERS
@@ -0,0 +1 @@
+tegge
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/.gitignore b/searchlib/src/vespa/searchlib/test/diskindex/.gitignore
new file mode 100644
index 00000000000..5dae353d999
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/.gitignore
@@ -0,0 +1,2 @@
+.depend
+Makefile
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt
new file mode 100644
index 00000000000..6f43be53471
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_searchlib_test_diskindex OBJECT
+ SOURCES
+ threelevelcountbuffers.cpp
+ testdiskindex.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp
new file mode 100644
index 00000000000..41cb1cea68a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp
@@ -0,0 +1,120 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/test/diskindex/testdiskindex.h>
+#include <vespa/searchlib/diskindex/indexbuilder.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/vespalib/io/fileutil.h>
+
+namespace search {
+
+using index::Schema;
+using index::DummyFileHeaderContext;
+using index::WordDocElementWordPosFeatures;
+
+namespace diskindex {
+
+struct Builder
+{
+ search::diskindex::IndexBuilder _ib;
+ TuneFileIndexing _tuneFileIndexing;
+ DummyFileHeaderContext _fileHeaderContext;
+
+ Builder(const std::string &dir,
+ const Schema &s,
+ uint32_t docIdLimit,
+ uint64_t numWordIds,
+ bool directio)
+ : _ib(s)
+ {
+ if (directio) {
+ _tuneFileIndexing._read.setWantDirectIO();
+ _tuneFileIndexing._write.setWantDirectIO();
+ }
+ _ib.setPrefix(dir);
+ _ib.open(docIdLimit, numWordIds, _tuneFileIndexing,
+ _fileHeaderContext);
+ }
+
+ void
+ addDoc(uint32_t docId)
+ {
+ _ib.startDocument(docId);
+ _ib.startElement(0, 1, 1);
+ _ib.addOcc(WordDocElementWordPosFeatures(0));
+ _ib.endElement();
+ _ib.endDocument();
+ }
+
+ void
+ close()
+ {
+ _ib.close();
+ }
+};
+
+
+void
+TestDiskIndex::buildSchema(void)
+{
+ _schema.addIndexField(Schema::IndexField("f1", Schema::STRING));
+ _schema.addIndexField(Schema::IndexField("f2", Schema::STRING));
+ _schema.addFieldSet(Schema::FieldSet("c2").
+ addField("f1").
+ addField("f2"));
+}
+
+void
+TestDiskIndex::buildIndex(const std::string & dir, bool directio,
+ bool fieldEmpty, bool docEmpty, bool wordEmpty)
+{
+ Builder b(dir, _schema, docEmpty ? 1 : 32, wordEmpty ? 0 : 2, directio);
+ if (!wordEmpty && !fieldEmpty && !docEmpty) {
+ // f1
+ b._ib.startField(0);
+ b._ib.startWord("w1");
+ b.addDoc(1);
+ b.addDoc(3);
+ b._ib.endWord();
+ b._ib.endField();
+ // f2
+ b._ib.startField(1);
+ b._ib.startWord("w1");
+ b.addDoc(2);
+ b.addDoc(4);
+ b.addDoc(6);
+ b._ib.endWord();
+ b._ib.startWord("w2");
+ for (uint32_t docId = 1; docId < 18; ++docId) {
+ b.addDoc(docId);
+ }
+ b._ib.endWord();
+ b._ib.endField();
+ }
+ b.close();
+}
+
+void
+TestDiskIndex::openIndex(const std::string &dir, bool directio, bool readmmap,
+ bool fieldEmpty, bool docEmpty, bool wordEmpty)
+{
+ buildIndex(dir, directio, fieldEmpty, docEmpty, wordEmpty);
+ TuneFileRandRead tuneFileRead;
+ if (directio) {
+ tuneFileRead.setWantDirectIO();
+ }
+ if (readmmap) {
+ tuneFileRead.setWantMemoryMap();
+ }
+ _index.reset(new DiskIndex(dir));
+ bool ok(_index->setup(tuneFileRead));
+ assert(ok);
+}
+
+TestDiskIndex::TestDiskIndex() :
+ _schema(),
+ _index()
+{
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.h b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.h
new file mode 100644
index 00000000000..d340b02c3b3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.h
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/diskindex/diskindex.h>
+
+namespace search {
+namespace diskindex {
+
+class TestDiskIndex {
+private:
+ void buildIndex(const std::string &dir, bool directio,
+ bool fieldEmpty, bool docEmpty, bool wordEmpty);
+protected:
+ index::Schema _schema;
+ std::unique_ptr<DiskIndex> _index;
+
+public:
+ TestDiskIndex();
+ DiskIndex & getIndex() { return *_index; }
+ void buildSchema();
+ void openIndex(const std::string &dir, bool directio, bool readmmap,
+ bool fieldEmpty, bool docEmpty, bool wordEmpty);
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.cpp b/searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.cpp
new file mode 100644
index 00000000000..594035af760
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.cpp
@@ -0,0 +1,133 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/countcompression.h>
+#include <vespa/searchlib/index/postinglistcounts.h>
+#include "threelevelcountbuffers.h"
+
+LOG_SETUP(".threelevelcountbuffers");
+
+namespace search
+{
+
+namespace diskindex
+{
+
+
+ThreeLevelCountWriteBuffers::
+ThreeLevelCountWriteBuffers(EC &sse, EC &spe, EC &pe)
+ : _sse(sse),
+ _spe(spe),
+ _pe(pe),
+ _wcsse(sse),
+ _wcspe(spe),
+ _wcpe(pe),
+ _ssHeaderLen(0u),
+ _spHeaderLen(0u),
+ _pHeaderLen(0u),
+ _ssFileBitSize(0u),
+ _spFileBitSize(0u),
+ _pFileBitSize(0u)
+{
+ _wcsse.allocComprBuf();
+ _sse.setWriteContext(&_wcsse);
+ _sse.setupWrite(_wcsse);
+ assert(_sse.getWriteOffset() == 0);
+
+ _wcspe.allocComprBuf();
+ _spe.setWriteContext(&_wcspe);
+ _spe.setupWrite(_wcspe);
+ assert(_spe.getWriteOffset() == 0);
+
+ _wcpe.allocComprBuf();
+ _pe.setWriteContext(&_wcpe);
+ _pe.setupWrite(_wcpe);
+ assert(_pe.getWriteOffset() == 0);
+}
+
+
+void
+ThreeLevelCountWriteBuffers::flush(void)
+{
+ _ssFileBitSize = _sse.getWriteOffset();
+ _spFileBitSize = _spe.getWriteOffset();
+ _pFileBitSize = _pe.getWriteOffset();
+ _sse.padBits(128);
+ _sse.flush();
+ _spe.padBits(128);
+ _spe.flush();
+ _pe.padBits(128);
+ _pe.flush();
+}
+
+
+void
+ThreeLevelCountWriteBuffers::startPad(uint32_t ssHeaderLen,
+ uint32_t spHeaderLen,
+ uint32_t pHeaderLen)
+{
+ _sse.padBits(ssHeaderLen * 8);
+ _spe.padBits(spHeaderLen * 8);
+ _pe.padBits(pHeaderLen * 8);
+ _ssHeaderLen = ssHeaderLen;
+ _spHeaderLen = spHeaderLen;
+ _pHeaderLen = pHeaderLen;
+}
+
+
+ThreeLevelCountReadBuffers::ThreeLevelCountReadBuffers(DC &ssd,
+ DC &spd,
+ DC &pd,
+ ThreeLevelCountWriteBuffers &wb)
+ : _ssd(ssd),
+ _spd(spd),
+ _pd(pd),
+ _rcssd(ssd),
+ _rcspd(spd),
+ _rcpd(pd),
+ _ssHeaderLen(wb._ssHeaderLen),
+ _spHeaderLen(wb._spHeaderLen),
+ _pHeaderLen(wb._pHeaderLen),
+ _ssFileBitSize(wb._ssFileBitSize),
+ _spFileBitSize(wb._spFileBitSize),
+ _pFileBitSize(wb._pFileBitSize)
+{
+ ssd.setReadContext(&_rcssd);
+ spd.setReadContext(&_rcspd);
+ pd.setReadContext(&_rcpd);
+ _rcssd.referenceWriteContext(wb._wcsse);
+ _rcspd.referenceWriteContext(wb._wcspe);
+ _rcpd.referenceWriteContext(wb._wcpe);
+ ssd.skipBits(_ssHeaderLen * 8);
+ spd.skipBits(_spHeaderLen * 8);
+ pd.skipBits(_pHeaderLen * 8);
+}
+
+
+ThreeLevelCountReadBuffers::ThreeLevelCountReadBuffers(DC &ssd,
+ DC &spd,
+ DC &pd)
+ : _ssd(ssd),
+ _spd(spd),
+ _pd(pd),
+ _rcssd(ssd),
+ _rcspd(spd),
+ _rcpd(pd),
+ _ssHeaderLen(0u),
+ _spHeaderLen(0u),
+ _pHeaderLen(0u),
+ _ssFileBitSize(0u),
+ _spFileBitSize(0u),
+ _pFileBitSize(0u)
+{
+ ssd.setReadContext(&_rcssd);
+ spd.setReadContext(&_rcspd);
+ pd.setReadContext(&_rcpd);
+}
+
+
+} // namespace diskindex
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.h b/searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.h
new file mode 100644
index 00000000000..cf7f5adc791
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.h
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <limits>
+#include <vespa/searchlib/bitcompression/countcompression.h>
+
+namespace search
+{
+
+namespace diskindex
+{
+
+class ThreeLevelCountWriteBuffers
+{
+public:
+ typedef search::bitcompression::PostingListCountFileEncodeContext EC;
+ EC &_sse;
+ EC &_spe;
+ EC &_pe;
+ ComprFileWriteContext _wcsse;
+ ComprFileWriteContext _wcspe;
+ ComprFileWriteContext _wcpe;
+
+ uint32_t _ssHeaderLen; // Length of header for sparse sparse file (bytes)
+ uint32_t _spHeaderLen; // Length of header for sparse page file (bytes)
+ uint32_t _pHeaderLen; // Length of header for page file (bytes)
+
+ uint64_t _ssFileBitSize;
+ uint64_t _spFileBitSize;
+ uint64_t _pFileBitSize;
+
+ ThreeLevelCountWriteBuffers(EC &sse, EC &spe, EC &pe);
+
+ void
+ flush(void);
+
+ // unit test method. Just pads without writing proper header
+ void
+ startPad(uint32_t ssHeaderLen,
+ uint32_t spHeaderLen,
+ uint32_t pHeaderLen);
+};
+
+
+class ThreeLevelCountReadBuffers
+{
+public:
+ typedef search::bitcompression::PostingListCountFileEncodeContext EC;
+ typedef search::bitcompression::PostingListCountFileDecodeContext DC;
+ DC &_ssd;
+ DC &_spd;
+ DC &_pd;
+ ComprFileReadContext _rcssd;
+ ComprFileReadContext _rcspd;
+ ComprFileReadContext _rcpd;
+
+ uint32_t _ssHeaderLen;
+ uint32_t _spHeaderLen;
+ uint32_t _pHeaderLen;
+
+ uint64_t _ssFileBitSize;
+ uint64_t _spFileBitSize;
+ uint64_t _pFileBitSize;
+
+ // Unit test usage constructor.
+ ThreeLevelCountReadBuffers(DC &ssd,
+ DC &spd,
+ DC &pd,
+ ThreeLevelCountWriteBuffers &wb);
+
+ // Normal usage constructor
+ ThreeLevelCountReadBuffers(DC &ssd,
+ DC &spd,
+ DC &pd);
+};
+
+
+} // namespace diskindex
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.cpp b/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.cpp
new file mode 100644
index 00000000000..cfd06a86d0e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.cpp
@@ -0,0 +1,9 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "document_weight_attribute_helper.h"
+
+namespace search {
+namespace test {
+
+} // namespace search::test
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h b/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h
new file mode 100644
index 00000000000..cf63d881d93
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h
@@ -0,0 +1,55 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/attribute/i_document_weight_attribute.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/multinumericpostattribute.hpp>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+
+namespace search {
+namespace test {
+
+class DocumentWeightAttributeHelper
+{
+private:
+ AttributeVector::SP _attr;
+ IntegerAttribute *_int_attr;
+ const IDocumentWeightAttribute *_dwa;
+
+ AttributeVector::SP make_attr() {
+ attribute::Config cfg(attribute::BasicType::INT64, attribute::CollectionType::WSET);
+ cfg.setFastSearch(true);
+ return AttributeFactory::createAttribute("my_attribute", cfg);
+ }
+
+public:
+ DocumentWeightAttributeHelper() : _attr(make_attr()),
+ _int_attr(dynamic_cast<IntegerAttribute *>(_attr.get())),
+ _dwa(_attr->asDocumentWeightAttribute())
+ {
+ ASSERT_TRUE(_int_attr != nullptr);
+ ASSERT_TRUE(_dwa != nullptr);
+ }
+
+ void add_docs(size_t limit) {
+ AttributeVector::DocId docid;
+ for (size_t i = 0; i < limit; ++i) {
+ _attr->addDoc(docid);
+ }
+ _attr->commit();
+ ASSERT_EQUAL((limit - 1), docid);
+ }
+
+ void set_doc(uint32_t docid, int64_t key, int32_t weight) {
+ _int_attr->clearDoc(docid);
+ _int_attr->append(docid, key, weight);
+ _int_attr->commit();
+ }
+
+ const IDocumentWeightAttribute &dwa() const { return *_dwa; }
+};
+
+} // namespace search::test
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/.gitignore b/searchlib/src/vespa/searchlib/test/fakedata/.gitignore
new file mode 100644
index 00000000000..5dae353d999
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/.gitignore
@@ -0,0 +1,2 @@
+.depend
+Makefile
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt
new file mode 100644
index 00000000000..b01ad63e02f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt
@@ -0,0 +1,16 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_fakedata OBJECT
+ SOURCES
+ fakeword.cpp
+ fakewordset.cpp
+ fakeposting.cpp
+ fakefilterocc.cpp
+ fakeegcompr64filterocc.cpp
+ fakememtreeocc.cpp
+ fakezcfilterocc.cpp
+ fakezcbfilterocc.cpp
+ fpfactory.cpp
+ bitencode64.cpp
+ bitdecode64.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.cpp b/searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.cpp
new file mode 100644
index 00000000000..cda9314366c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.cpp
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".bitdecode64");
+#include "bitencode64.h"
+#include "bitdecode64.h"
+
+
+namespace search
+{
+
+namespace fakedata
+{
+
+template class BitDecode64<true>;
+
+template class BitDecode64<false>;
+
+} // namespace fakedata
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.h b/searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.h
new file mode 100644
index 00000000000..05dfdaf0fb1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.h
@@ -0,0 +1,91 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "bitencode64.h"
+#include <vespa/searchlib/util/comprfile.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+
+namespace search
+{
+
+namespace fakedata
+{
+
+template <bool bigEndian>
+class BitDecode64 : public bitcompression::DecodeContext64<bigEndian>
+{
+private:
+ const uint64_t *_comprBase;
+ int _bitOffsetBase;
+ typedef bitcompression::DecodeContext64<bigEndian> ParentClass;
+
+public:
+ using ParentClass::_val;
+ using ParentClass::_valI;
+ using ParentClass::_preRead;
+ using ParentClass::_cacheInt;
+ typedef typename bitcompression::DecodeContext64<bigEndian>::EC EC;
+
+ BitDecode64(const uint64_t *compr,
+ int bitOffset)
+ : bitcompression::DecodeContext64<bigEndian>(compr, bitOffset),
+ _comprBase(compr),
+ _bitOffsetBase(bitOffset)
+ {
+ }
+
+ typedef bitcompression::DecodeContext64<bigEndian> DC;
+
+ void
+ seek(uint64_t offset)
+ {
+ offset += _bitOffsetBase;
+ const uint64_t *compr = _comprBase + (offset / 64);
+ int bitOffset = offset & 63;
+ _valI = compr + 1;
+ _val = 0;
+ _cacheInt = EC::bswap(*compr);
+ _preRead = 64 - bitOffset;
+ uint32_t length = 64;
+ UC64_READBITS(_val, _valI, _preRead, _cacheInt, EC);
+ }
+
+ uint64_t
+ getOffset(void) const
+ {
+ return 64 * (_valI - _comprBase - 1) - this->_preRead -
+ _bitOffsetBase;
+ }
+
+ uint64_t
+ getOffset(const uint64_t *valI, int preRead) const
+ {
+ return 64 * (valI - _comprBase - 1) - preRead - _bitOffsetBase;
+ }
+
+ const uint64_t *
+ getComprBase(void) const
+ {
+ return _comprBase;
+ }
+
+ int
+ getBitOffsetBase(void) const
+ {
+ return _bitOffsetBase;
+ }
+};
+
+
+extern template class BitDecode64<true>;
+
+extern template class BitDecode64<false>;
+
+typedef BitDecode64<true> BitDecode64BE;
+
+typedef BitDecode64<false> BitDecode64LE;
+
+} // namespace fakedata
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/bitencode64.cpp b/searchlib/src/vespa/searchlib/test/fakedata/bitencode64.cpp
new file mode 100644
index 00000000000..9ceea95e01d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/bitencode64.cpp
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".bitencode64");
+#include "bitencode64.h"
+
+
+namespace search
+{
+
+namespace fakedata
+{
+
+template <bool bigEndian>
+BitEncode64<bigEndian>::BitEncode64(void)
+ : bitcompression::EncodeContext64<bigEndian>(),
+ _cbuf(*this)
+{
+ _cbuf.allocComprBuf(64, 1);
+ this->afterWrite(_cbuf, 0, 0);
+}
+
+
+template <bool bigEndian>
+BitEncode64<bigEndian>::~BitEncode64(void)
+{
+}
+
+template class BitEncode64<true>;
+
+template class BitEncode64<false>;
+
+
+} // namespace fakedata
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/bitencode64.h b/searchlib/src/vespa/searchlib/test/fakedata/bitencode64.h
new file mode 100644
index 00000000000..893410aa86b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/bitencode64.h
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vector>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/util/comprfile.h>
+
+namespace search
+{
+
+namespace fakedata
+{
+
+template <bool bigEndian>
+class BitEncode64 : public bitcompression::EncodeContext64<bigEndian>
+{
+ search::ComprFileWriteContext _cbuf;
+
+public:
+ BitEncode64(void);
+
+ ~BitEncode64(void);
+
+ typedef bitcompression::EncodeContext64<bigEndian> EC;
+
+ void
+ writeComprBuffer(void)
+ {
+ _cbuf.writeComprBuffer(true);
+ }
+
+ void
+ writeComprBufferIfNeeded(void)
+ {
+ if (this->_valI >= this->_valE)
+ _cbuf.writeComprBuffer(false);
+ }
+
+ std::pair<uint64_t *, size_t>
+ grabComprBuffer(void *&comprBufMalloc)
+ {
+ std::pair<void *, size_t> tres = _cbuf.grabComprBuffer(comprBufMalloc);
+ return std::make_pair(static_cast<uint64_t *>(tres.first),
+ tres.second);
+ }
+};
+
+extern template class BitEncode64<true>;
+
+extern template class BitEncode64<false>;
+
+typedef BitEncode64<true> BitEncode64BE;
+
+typedef BitEncode64<false> BitEncode64LE;
+
+} // namespace fakedata
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.cpp
new file mode 100644
index 00000000000..dd1190f0945
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.cpp
@@ -0,0 +1,1521 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fakeegcompr64filterocc");
+#include "fakeegcompr64filterocc.h"
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+#include <vespa/searchlib/queryeval/iterators.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include "fpfactory.h"
+
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataPosition;
+
+#include "bitencode64.h"
+#include "bitdecode64.h"
+
+namespace search
+{
+
+namespace fakedata
+{
+
+#define DEBUG_EGCOMPR64FILTEROCC_PRINTF 0
+#define DEBUG_EGCOMPR64FILTEROCC_ASSERT 1
+
+static FPFactoryInit
+init(std::make_pair("EGCompr64FilterOcc",
+ makeFPFactory<FPFactoryT<FakeEGCompr64FilterOcc> >));
+
+#define K_VALUE_FILTEROCC_RESIDUE 8
+
+#define K_VALUE_FILTEROCC_FIRST_DOCID 22
+
+#define K_VALUE_FILTEROCC_DELTA_DOCID 7
+
+#define K_VALUE_FILTEROCC_L1SKIPDELTA_DOCID 13
+
+#define K_VALUE_FILTEROCC_L1SKIPDELTA_BITPOS 10
+
+#define K_VALUE_FILTEROCC_L2SKIPDELTA_DOCID 15
+
+#define K_VALUE_FILTEROCC_L2SKIPDELTA_BITPOS 12
+
+#define K_VALUE_FILTEROCC_L2SKIPDELTA_L1SKIPBITPOS 10
+
+#define K_VALUE_FILTEROCC_L3SKIPDELTA_DOCID 18
+
+#define K_VALUE_FILTEROCC_L3SKIPDELTA_BITPOS 15
+
+#define K_VALUE_FILTEROCC_L3SKIPDELTA_L1SKIPBITPOS 13
+
+#define K_VALUE_FILTEROCC_L3SKIPDELTA_L2SKIPBITPOS 10
+
+#define K_VALUE_FILTEROCC_L4SKIPDELTA_DOCID 21
+
+#define K_VALUE_FILTEROCC_L4SKIPDELTA_BITPOS 18
+
+#define K_VALUE_FILTEROCC_L4SKIPDELTA_L1SKIPBITPOS 16
+
+#define K_VALUE_FILTEROCC_L4SKIPDELTA_L2SKIPBITPOS 13
+
+#define K_VALUE_FILTEROCC_L4SKIPDELTA_L3SKIPBITPOS 10
+
+#define L1SKIPSTRIDE 16
+#define L2SKIPSTRIDE 8
+#define L3SKIPSTRIDE 8
+#define L4SKIPSTRIDE 8
+
+FakeEGCompr64FilterOcc::FakeEGCompr64FilterOcc(const FakeWord &fw)
+ : FakePosting(fw.getName() + ".egc64filterocc"),
+ _compressed(std::make_pair(static_cast<uint64_t *>(NULL), 0)),
+ _l1SkipCompressed(std::make_pair(static_cast<uint64_t *>(NULL), 0)),
+ _l2SkipCompressed(std::make_pair(static_cast<uint64_t *>(NULL), 0)),
+ _l3SkipCompressed(std::make_pair(static_cast<uint64_t *>(NULL), 0)),
+ _l4SkipCompressed(std::make_pair(static_cast<uint64_t *>(NULL), 0)),
+ _compressedMalloc(NULL),
+ _l1SkipCompressedMalloc(NULL),
+ _l2SkipCompressedMalloc(NULL),
+ _l3SkipCompressedMalloc(NULL),
+ _l4SkipCompressedMalloc(NULL),
+ _docIdLimit(0),
+ _hitDocs(0),
+ _lastDocId(0u),
+ _bitSize(0),
+ _l1SkipBitSize(0),
+ _l2SkipBitSize(0),
+ _l3SkipBitSize(0),
+ _l4SkipBitSize(0),
+ _bigEndian(true)
+{
+ setup(fw);
+}
+
+
+FakeEGCompr64FilterOcc::FakeEGCompr64FilterOcc(const FakeWord &fw,
+ bool bigEndian,
+ const char *nameSuffix)
+ : FakePosting(fw.getName() + nameSuffix),
+ _compressed(std::make_pair(static_cast<uint64_t *>(NULL), 0)),
+ _l1SkipCompressed(std::make_pair(static_cast<uint64_t *>(NULL), 0)),
+ _l2SkipCompressed(std::make_pair(static_cast<uint64_t *>(NULL), 0)),
+ _l3SkipCompressed(std::make_pair(static_cast<uint64_t *>(NULL), 0)),
+ _l4SkipCompressed(std::make_pair(static_cast<uint64_t *>(NULL), 0)),
+ _compressedMalloc(NULL),
+ _l1SkipCompressedMalloc(NULL),
+ _l2SkipCompressedMalloc(NULL),
+ _l3SkipCompressedMalloc(NULL),
+ _l4SkipCompressedMalloc(NULL),
+ _docIdLimit(0),
+ _hitDocs(0),
+ _lastDocId(0u),
+ _bitSize(0),
+ _l1SkipBitSize(0),
+ _l2SkipBitSize(0),
+ _l3SkipBitSize(0),
+ _l4SkipBitSize(0),
+ _bigEndian(bigEndian)
+{
+ setup(fw);
+}
+
+
+void
+FakeEGCompr64FilterOcc::setup(const FakeWord &fw)
+{
+ if (_bigEndian)
+ setupT<true>(fw);
+ else
+ setupT<false>(fw);
+}
+
+
+template <bool bigEndian>
+void
+FakeEGCompr64FilterOcc::
+setupT(const FakeWord &fw)
+{
+ BitEncode64<bigEndian> bits;
+ BitEncode64<bigEndian> l1SkipBits;
+ BitEncode64<bigEndian> l2SkipBits;
+ BitEncode64<bigEndian> l3SkipBits;
+ BitEncode64<bigEndian> l4SkipBits;
+ uint32_t lastDocId = 0u;
+ uint32_t lastL1SkipDocId = 0u;
+ uint64_t lastL1SkipDocIdPos = 0;
+ uint32_t l1SkipCnt = 0;
+ uint32_t lastL2SkipDocId = 0u;
+ uint64_t lastL2SkipDocIdPos = 0;
+ uint64_t lastL2SkipL1SkipPos = 0;
+ unsigned int l2SkipCnt = 0;
+ uint32_t lastL3SkipDocId = 0u;
+ uint64_t lastL3SkipDocIdPos = 0;
+ uint64_t lastL3SkipL1SkipPos = 0;
+ uint64_t lastL3SkipL2SkipPos = 0;
+ unsigned int l3SkipCnt = 0;
+ uint32_t lastL4SkipDocId = 0u;
+ uint64_t lastL4SkipDocIdPos = 0;
+ uint64_t lastL4SkipL1SkipPos = 0;
+ uint64_t lastL4SkipL2SkipPos = 0;
+ uint64_t lastL4SkipL3SkipPos = 0;
+ unsigned int l4SkipCnt = 0;
+
+
+ typedef FakeWord FW;
+ typedef FW::DocWordFeatureList DWFL;
+ typedef FW::DocWordPosFeatureList DWPFL;
+
+ DWFL::const_iterator d(fw._postings.begin());
+ DWFL::const_iterator de(fw._postings.end());
+ DWPFL::const_iterator p(fw._wordPosFeatures.begin());
+ DWPFL::const_iterator pe(fw._wordPosFeatures.end());
+
+ if (d != de) {
+ // Prefix support needs counts embedded in posting list
+ // if selector bits are dropped.
+ bits.encodeExpGolomb(fw._postings.size(),
+ K_VALUE_FILTEROCC_RESIDUE);
+ bits.writeComprBufferIfNeeded();
+ lastL1SkipDocIdPos = bits.getWriteOffset();
+ lastL2SkipDocIdPos = bits.getWriteOffset();
+ lastL3SkipDocIdPos = bits.getWriteOffset();
+ lastL4SkipDocIdPos = bits.getWriteOffset();
+ }
+ while (d != de) {
+ if (l1SkipCnt >= L1SKIPSTRIDE) {
+ uint32_t docIdDelta = lastDocId - lastL1SkipDocId;
+ assert(static_cast<int32_t>(docIdDelta) > 0);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ uint64_t prevL1SkipPos = l1SkipBits.getWriteOffset();
+#endif
+ l1SkipBits.encodeExpGolomb(docIdDelta - 1,
+ K_VALUE_FILTEROCC_L1SKIPDELTA_DOCID);
+ uint64_t lastDocIdPos = bits.getWriteOffset();
+ uint32_t docIdPosDelta = lastDocIdPos - lastL1SkipDocIdPos;
+ l1SkipBits.encodeExpGolomb(docIdPosDelta - 1,
+ K_VALUE_FILTEROCC_L1SKIPDELTA_BITPOS);
+ l1SkipBits.writeComprBufferIfNeeded();
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L1Encode docId=%d (+%u), docIdPos=%d (+%u), "
+ "L1SkipPos=%d -> %d\n",
+ lastDocId,
+ docIdDelta,
+ (int) lastDocIdPos,
+ docIdPosDelta,
+ (int) prevL1SkipPos,
+ (int) l1SkipBits.getWriteOffset());
+#endif
+ lastL1SkipDocId = lastDocId;
+ lastL1SkipDocIdPos = lastDocIdPos;
+ l1SkipCnt = 0;
+ ++l2SkipCnt;
+ if (l2SkipCnt >= L2SKIPSTRIDE) {
+ docIdDelta = lastDocId - lastL2SkipDocId;
+ docIdPosDelta = lastDocIdPos - lastL2SkipDocIdPos;
+ uint64_t lastL1SkipPos = l1SkipBits.getWriteOffset();
+ uint32_t l1SkipPosDelta = lastL1SkipPos - lastL2SkipL1SkipPos;
+ l2SkipBits.encodeExpGolomb(docIdDelta - 1,
+ K_VALUE_FILTEROCC_L2SKIPDELTA_DOCID);
+ l2SkipBits.encodeExpGolomb(docIdPosDelta - 1,
+ K_VALUE_FILTEROCC_L2SKIPDELTA_BITPOS);
+ l2SkipBits.encodeExpGolomb(l1SkipPosDelta - 1,
+ K_VALUE_FILTEROCC_L2SKIPDELTA_L1SKIPBITPOS);
+ l2SkipBits.writeComprBufferIfNeeded();
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L2Encode docId=%d (+%u), docIdPos=%d (+%u), "
+ "L1SkipPos=%d (+%u)\n",
+ lastDocId,
+ docIdDelta,
+ (int) lastDocIdPos,
+ docIdPosDelta,
+ (int) lastL1SkipPos,
+ l1SkipPosDelta);
+#endif
+ lastL2SkipDocId = lastDocId;
+ lastL2SkipDocIdPos = lastDocIdPos;
+ lastL2SkipL1SkipPos = lastL1SkipPos;
+ l2SkipCnt = 0;
+ ++l3SkipCnt;
+ if (l3SkipCnt >= L3SKIPSTRIDE) {
+ docIdDelta = lastDocId - lastL3SkipDocId;
+ docIdPosDelta = lastDocIdPos - lastL3SkipDocIdPos;
+ l1SkipPosDelta = lastL1SkipPos - lastL3SkipL1SkipPos;
+ uint64_t lastL2SkipPos = l2SkipBits.getWriteOffset();
+ uint32_t l2SkipPosDelta = lastL2SkipPos -
+ lastL3SkipL2SkipPos;
+ l3SkipBits.encodeExpGolomb(docIdDelta - 1,
+ K_VALUE_FILTEROCC_L3SKIPDELTA_DOCID);
+ l3SkipBits.encodeExpGolomb(docIdPosDelta - 1,
+ K_VALUE_FILTEROCC_L3SKIPDELTA_BITPOS);
+ l3SkipBits.writeComprBufferIfNeeded();
+ l3SkipBits.encodeExpGolomb(l1SkipPosDelta - 1,
+ K_VALUE_FILTEROCC_L3SKIPDELTA_L1SKIPBITPOS);
+ l3SkipBits.encodeExpGolomb(l2SkipPosDelta - 1,
+ K_VALUE_FILTEROCC_L3SKIPDELTA_L2SKIPBITPOS);
+ l3SkipBits.writeComprBufferIfNeeded();
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L3Encode docId=%d (+%u), docIdPos=%d (+%u), "
+ "L1SkipPos=%d (+%u) L2SkipPos=%d (+%u)\n",
+ lastDocId,
+ docIdDelta,
+ (int) lastDocIdPos,
+ docIdPosDelta,
+ (int) lastL1SkipPos,
+ l1SkipPosDelta,
+ (int) lastL2SkipPos,
+ l2SkipPosDelta);
+#endif
+ lastL3SkipDocId = lastDocId;
+ lastL3SkipDocIdPos = lastDocIdPos;
+ lastL3SkipL1SkipPos = lastL1SkipPos;
+ lastL3SkipL2SkipPos = lastL2SkipPos;
+ l3SkipCnt = 0;
+ ++l4SkipCnt;
+ if (l4SkipCnt >= L4SKIPSTRIDE) {
+ docIdDelta = lastDocId - lastL4SkipDocId;
+ docIdPosDelta = lastDocIdPos - lastL4SkipDocIdPos;
+ l1SkipPosDelta = lastL1SkipPos - lastL4SkipL1SkipPos;
+ l2SkipPosDelta = lastL2SkipPos - lastL4SkipL2SkipPos;
+ uint64_t lastL3SkipPos = l3SkipBits.getWriteOffset();
+ uint32_t l3SkipPosDelta = lastL3SkipPos -
+ lastL4SkipL3SkipPos;
+ l4SkipBits.encodeExpGolomb(docIdDelta - 1,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_DOCID);
+ l4SkipBits.encodeExpGolomb(docIdPosDelta - 1,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_BITPOS);
+ l4SkipBits.writeComprBufferIfNeeded();
+ l4SkipBits.encodeExpGolomb(l1SkipPosDelta - 1,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_L1SKIPBITPOS);
+ l4SkipBits.encodeExpGolomb(l2SkipPosDelta - 1,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_L2SKIPBITPOS);
+ l4SkipBits.encodeExpGolomb(l3SkipPosDelta - 1,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_L3SKIPBITPOS);
+ l4SkipBits.writeComprBufferIfNeeded();
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L4Encode docId=%d (+%u), docIdPos=%d (+%u), "
+ "L1SkipPos=%d (+%u) L2SkipPos=%d (+%u)"
+ "L3SkipPos=%d (+%u)\n",
+ lastDocId,
+ docIdDelta,
+ (int) lastDocIdPos,
+ docIdPosDelta,
+ (int) lastL1SkipPos,
+ l1SkipPosDelta,
+ (int) lastL2SkipPos,
+ l2SkipPosDelta,
+ (int) lastL3SkipPos,
+ l3SkipPosDelta);
+#endif
+ lastL4SkipDocId = lastDocId;
+ lastL4SkipDocIdPos = lastDocIdPos;
+ lastL4SkipL1SkipPos = lastL1SkipPos;
+ lastL4SkipL2SkipPos = lastL2SkipPos;
+ lastL4SkipL3SkipPos = lastL3SkipPos;
+ l4SkipCnt = 0;
+ }
+ }
+ }
+ }
+ if (lastDocId == 0u) {
+ bits.encodeExpGolomb(d->_docId - 1,
+ K_VALUE_FILTEROCC_FIRST_DOCID);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("Encode docId=%d\n", d->_docId);
+#endif
+ } else {
+ uint32_t docIdDelta = d->_docId - lastDocId;
+ bits.encodeExpGolomb(docIdDelta - 1,
+ K_VALUE_FILTEROCC_DELTA_DOCID);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("Encode docId=%d (+%u)\n",
+ d->_docId,
+ docIdDelta);
+#endif
+ }
+ bits.writeComprBufferIfNeeded();
+ lastDocId = d->_docId;
+ ++l1SkipCnt;
+ ++d;
+ }
+ // Extra partial entries for skip tables to simplify iterator during search
+ uint32_t docIdDelta = lastDocId - lastL1SkipDocId;
+ assert(static_cast<int32_t>(docIdDelta) > 0);
+ l1SkipBits.encodeExpGolomb(docIdDelta - 1,
+ K_VALUE_FILTEROCC_L1SKIPDELTA_DOCID);
+ docIdDelta = lastDocId - lastL2SkipDocId;
+ assert(static_cast<int32_t>(docIdDelta) > 0);
+ l2SkipBits.encodeExpGolomb(docIdDelta - 1,
+ K_VALUE_FILTEROCC_L2SKIPDELTA_DOCID);
+ docIdDelta = lastDocId - lastL3SkipDocId;
+ assert(static_cast<int32_t>(docIdDelta) > 0);
+ l3SkipBits.encodeExpGolomb(docIdDelta - 1,
+ K_VALUE_FILTEROCC_L3SKIPDELTA_DOCID);
+ docIdDelta = lastDocId - lastL4SkipDocId;
+ assert(static_cast<int32_t>(docIdDelta) > 0);
+ l4SkipBits.encodeExpGolomb(docIdDelta - 1,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_DOCID);
+ _hitDocs = fw._postings.size();
+ _bitSize = bits.getWriteOffset();
+ _l1SkipBitSize = l1SkipBits.getWriteOffset();
+ _l2SkipBitSize = l2SkipBits.getWriteOffset();
+ _l3SkipBitSize = l3SkipBits.getWriteOffset();
+ _l4SkipBitSize = l4SkipBits.getWriteOffset();
+ bits.writeComprBufferIfNeeded();
+ bits.writeBits(static_cast<uint64_t>(-1), 64);
+ bits.writeBits(static_cast<uint64_t>(-1), 64);
+ bits.writeComprBufferIfNeeded();
+ bits.writeBits(static_cast<uint64_t>(-1), 64);
+ bits.writeBits(static_cast<uint64_t>(-1), 64);
+ bits.flush();
+ bits.writeComprBuffer();
+ l1SkipBits.writeComprBufferIfNeeded();
+ l1SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l1SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l1SkipBits.writeComprBufferIfNeeded();
+ l1SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l1SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l1SkipBits.flush();
+ l1SkipBits.writeComprBuffer();
+ l2SkipBits.writeComprBufferIfNeeded();
+ l2SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l2SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l2SkipBits.writeComprBufferIfNeeded();
+ l2SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l2SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l2SkipBits.flush();
+ l2SkipBits.writeComprBuffer();
+ l3SkipBits.writeComprBufferIfNeeded();
+ l3SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l3SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l3SkipBits.writeComprBufferIfNeeded();
+ l3SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l3SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l3SkipBits.flush();
+ l3SkipBits.writeComprBuffer();
+ l4SkipBits.writeComprBufferIfNeeded();
+ l4SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l4SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l4SkipBits.writeComprBufferIfNeeded();
+ l4SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l4SkipBits.writeBits(static_cast<uint64_t>(-1), 64);
+ l4SkipBits.flush();
+ l4SkipBits.writeComprBuffer();
+ _compressed = bits.grabComprBuffer(_compressedMalloc);
+ _l1SkipCompressed = l1SkipBits.grabComprBuffer(_l1SkipCompressedMalloc);
+ _l2SkipCompressed = l2SkipBits.grabComprBuffer(_l2SkipCompressedMalloc);
+ _l3SkipCompressed = l3SkipBits.grabComprBuffer(_l3SkipCompressedMalloc);
+ _l4SkipCompressed = l4SkipBits.grabComprBuffer(_l4SkipCompressedMalloc);
+ _docIdLimit = fw._docIdLimit;
+ _lastDocId = lastDocId;
+}
+
+
+FakeEGCompr64FilterOcc::~FakeEGCompr64FilterOcc(void)
+{
+ free(_compressedMalloc);
+ free(_l1SkipCompressedMalloc);
+ free(_l2SkipCompressedMalloc);
+ free(_l3SkipCompressedMalloc);
+ free(_l4SkipCompressedMalloc);
+}
+
+
+void
+FakeEGCompr64FilterOcc::forceLink(void)
+{
+}
+
+
+size_t
+FakeEGCompr64FilterOcc::bitSize(void) const
+{
+ return _bitSize;
+}
+
+
+bool
+FakeEGCompr64FilterOcc::hasWordPositions(void) const
+{
+ return false;
+}
+
+
+size_t
+FakeEGCompr64FilterOcc::skipBitSize(void) const
+{
+ return _l1SkipBitSize + _l2SkipBitSize + _l3SkipBitSize + _l4SkipBitSize;
+}
+
+
+size_t
+FakeEGCompr64FilterOcc::l1SkipBitSize(void) const
+{
+ return _l1SkipBitSize;
+}
+
+
+size_t
+FakeEGCompr64FilterOcc::l2SkipBitSize(void) const
+{
+ return _l2SkipBitSize;
+}
+
+
+size_t
+FakeEGCompr64FilterOcc::l3SkipBitSize(void) const
+{
+ return _l3SkipBitSize;
+}
+
+
+size_t
+FakeEGCompr64FilterOcc::l4SkipBitSize(void) const
+{
+ return _l4SkipBitSize;
+}
+
+
+int
+FakeEGCompr64FilterOcc::lowLevelSinglePostingScan(void) const
+{
+ return 0;
+}
+
+
+int
+FakeEGCompr64FilterOcc::lowLevelSinglePostingScanUnpack(void) const
+{
+ return 0;
+}
+
+
+int
+FakeEGCompr64FilterOcc::
+lowLevelAndPairPostingScan(const FakePosting &rhs) const
+{
+ (void) rhs;
+ return 0;
+}
+
+
+int
+FakeEGCompr64FilterOcc::
+lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const
+{
+ (void) rhs;
+ return 0;
+}
+
+#define UC64_FILTEROCC_READ_RESIDUE(val, valI, preRead, cacheInt, \
+ residue, EC) \
+ do { \
+ UC64_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, \
+ K_VALUE_FILTEROCC_RESIDUE, EC); \
+ residue = val64; \
+ } while (0)
+
+
+#define UC64_FILTEROCC_READ_FIRST_DOC(val, valI, preRead, cacheInt, \
+ docId, EC) \
+ do { \
+ UC64_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, \
+ K_VALUE_FILTEROCC_FIRST_DOCID, EC); \
+ docId = val64 + 1; \
+ } while (0)
+
+
+#define UC64_FILTEROCC_READ_NEXT_DOC(val, valI, preRead, cacheInt, \
+ docId, EC) \
+ do { \
+ UC64_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, \
+ K_VALUE_FILTEROCC_DELTA_DOCID, EC); \
+ docId += val64 + 1; \
+ } while (0)
+
+
+#define UC64_FILTEROCC_READ_NEXT_DOC_NS(prefix, EC) \
+ do { \
+ UC64_FILTEROCC_READ_NEXT_DOC(prefix ## Val, prefix ## Compr, \
+ prefix ## PreRead, \
+ prefix ## CacheInt, \
+ prefix ## DocId, EC); \
+ } while (0)
+
+
+#define UC64_FILTEROCC_DECODECONTEXT \
+ uint64_t val64; \
+ unsigned int length;
+
+
+class BitDecode64BEDocIds : public BitDecode64BE
+{
+public:
+ BitDecode64BEDocIds(const uint64_t *compr,
+ int bitOffset)
+ : BitDecode64BE(compr, bitOffset)
+ {
+ }
+
+ uint32_t
+ getDocIdDelta(void)
+ {
+ uint32_t ret;
+ unsigned int length;
+ const bool bigEndian = true;
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(_val, _valI, _preRead, _cacheInt,
+ K_VALUE_FILTEROCC_DELTA_DOCID, EC,
+ ret = 1 +);
+ return ret;
+ }
+
+ uint32_t
+ getL1SkipDocIdDelta(void)
+ {
+ uint32_t ret;
+ unsigned int length;
+ const bool bigEndian = true;
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(_val, _valI, _preRead, _cacheInt,
+ K_VALUE_FILTEROCC_L1SKIPDELTA_DOCID, EC,
+ ret = 1 +);
+ return ret;
+ }
+
+ uint32_t
+ getL2SkipDocIdDelta(void)
+ {
+ uint32_t ret;
+ unsigned int length;
+ const bool bigEndian = true;
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(_val, _valI, _preRead, _cacheInt,
+ K_VALUE_FILTEROCC_L2SKIPDELTA_DOCID, EC,
+ ret = 1 +);
+ return ret;
+ }
+
+ uint32_t
+ getL3SkipDocIdDelta(void)
+ {
+ uint32_t ret;
+ unsigned int length;
+ UC64BE_DECODEEXPGOLOMB_SMALL_APPLY(_val, _valI, _preRead, _cacheInt,
+ K_VALUE_FILTEROCC_L3SKIPDELTA_DOCID, EC,
+ ret = 1 +);
+ return ret;
+ }
+};
+
+template <bool bigEndian>
+class FakeFilterOccEGCompressed64ArrayIterator
+ : public queryeval::RankedSearchIteratorBase
+{
+private:
+
+ FakeFilterOccEGCompressed64ArrayIterator(const FakeFilterOccEGCompressed64ArrayIterator &other);
+
+ FakeFilterOccEGCompressed64ArrayIterator&
+ operator=(const FakeFilterOccEGCompressed64ArrayIterator &other);
+
+ typedef BitEncode64<bigEndian> EC;
+ typedef BitDecode64<bigEndian> DC;
+
+public:
+ DC _docIdBits;
+ uint32_t _residue;
+ uint32_t _lastDocId;
+
+ FakeFilterOccEGCompressed64ArrayIterator(const uint64_t *compressedOccurrences,
+ int compressedBitOffset,
+ uint32_t residue,
+ uint32_t lastDocId,
+ const search::fef::TermFieldMatchDataArray &matchData);
+
+ ~FakeFilterOccEGCompressed64ArrayIterator(void);
+
+ void doUnpack(uint32_t docId) override;
+ void doSeek(uint32_t docId) override;
+ void initRange(uint32_t begin, uint32_t end) override;
+ Trinary is_strict() const override { return Trinary::True; }
+};
+
+
+template <bool bigEndian>
+FakeFilterOccEGCompressed64ArrayIterator<bigEndian>::
+FakeFilterOccEGCompressed64ArrayIterator(const uint64_t *compressedOccurrences,
+ int compressedBitOffset,
+ uint32_t residue,
+ uint32_t lastDocId,
+ const search::fef::TermFieldMatchDataArray &matchData)
+ : queryeval::RankedSearchIteratorBase(matchData),
+ _docIdBits(compressedOccurrences, compressedBitOffset),
+ _residue(residue),
+ _lastDocId(lastDocId)
+{
+ clearUnpacked();
+}
+
+template <bool bigEndian>
+void
+FakeFilterOccEGCompressed64ArrayIterator<bigEndian>::
+initRange(uint32_t begin, uint32_t end)
+{
+ queryeval::RankedSearchIteratorBase::initRange(begin, end);
+ UC64_FILTEROCC_DECODECONTEXT;
+ uint32_t docId = 0;
+ uint32_t myResidue = 0;
+ UC64_FILTEROCC_READ_RESIDUE(_docIdBits._val,
+ _docIdBits._valI,
+ _docIdBits._preRead,
+ _docIdBits._cacheInt, myResidue, EC);
+ assert(myResidue == _residue);
+ (void) myResidue;
+ if (_residue > 0) {
+ UC64_FILTEROCC_READ_FIRST_DOC(_docIdBits._val,
+ _docIdBits._valI,
+ _docIdBits._preRead,
+ _docIdBits._cacheInt, docId, EC);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("DecodeInit docId=%d\n",
+ docId);
+#endif
+ setDocId(docId);
+ } else {
+ setAtEnd();
+ }
+}
+
+
+template <bool bigEndian>
+FakeFilterOccEGCompressed64ArrayIterator<bigEndian>::
+~FakeFilterOccEGCompressed64ArrayIterator(void)
+{
+}
+
+
+template <bool bigEndian>
+void
+FakeFilterOccEGCompressed64ArrayIterator<bigEndian>::doSeek(uint32_t docId)
+{
+ unsigned int length;
+ uint32_t oDocId = getDocId();
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, this->_docIdBits._);
+
+ if (getUnpacked())
+ clearUnpacked();
+ while (__builtin_expect(oDocId < docId, true)) {
+ if (__builtin_expect(--_residue == 0, false))
+ goto atbreak;
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(oVal, oCompr,
+ oPreRead, oCacheInt,
+ K_VALUE_FILTEROCC_DELTA_DOCID, EC,
+ oDocId += 1 +);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("Decode docId=%d\n",
+ oDocId);
+#endif
+ }
+ UC64_DECODECONTEXT_STORE(o, this->_docIdBits._);
+ setDocId(oDocId);
+ return;
+ atbreak:
+ UC64_DECODECONTEXT_STORE(o, this->_docIdBits._);
+ setAtEnd(); // Mark end of data
+ return;
+}
+
+
+template <bool bigEndian>
+void
+FakeFilterOccEGCompressed64ArrayIterator<bigEndian>::doUnpack(uint32_t docId)
+{
+ if (_matchData.size() != 1 || getUnpacked()) {
+ return;
+ }
+ assert(docId == getDocId());
+ _matchData[0]->reset(docId);
+ setUnpacked();
+}
+
+
+search::queryeval::SearchIterator *
+FakeEGCompr64FilterOcc::
+createIterator(const fef::TermFieldMatchDataArray &matchData) const
+{
+ const uint64_t *arr = _compressed.first;
+ if (_bigEndian)
+ return new FakeFilterOccEGCompressed64ArrayIterator<true>(arr,
+ 0,
+ _hitDocs,
+ _lastDocId,
+ matchData);
+ else
+ return new FakeFilterOccEGCompressed64ArrayIterator<false>(arr,
+ 0,
+ _hitDocs,
+ _lastDocId,
+ matchData);
+}
+
+
+class FakeEGCompr64LEFilterOcc : public FakeEGCompr64FilterOcc
+{
+public:
+ FakeEGCompr64LEFilterOcc(const FakeWord &fw);
+
+ ~FakeEGCompr64LEFilterOcc(void);
+};
+
+
+FakeEGCompr64LEFilterOcc::FakeEGCompr64LEFilterOcc(const FakeWord &fw)
+ : FakeEGCompr64FilterOcc(fw, false, ".egc64lefilterocc")
+{
+}
+
+
+FakeEGCompr64LEFilterOcc::~FakeEGCompr64LEFilterOcc(void)
+{
+}
+
+
+static FPFactoryInit
+initLE(std::make_pair("EGCompr64LEFilterOcc",
+ makeFPFactory<FPFactoryT<FakeEGCompr64LEFilterOcc> >));
+
+
+template <bool doSkip>
+class FakeEGCompr64SkipFilterOcc : public FakeEGCompr64FilterOcc
+{
+public:
+ FakeEGCompr64SkipFilterOcc(const FakeWord &fw);
+
+ ~FakeEGCompr64SkipFilterOcc(void);
+
+ virtual search::queryeval::SearchIterator *
+ createIterator(const fef::TermFieldMatchDataArray &matchData) const;
+};
+
+
+static FPFactoryInit
+initNoSkip(std::make_pair("EGCompr64NoSkipFilterOcc",
+ makeFPFactory<FPFactoryT<FakeEGCompr64SkipFilterOcc<false> > >));
+
+
+static FPFactoryInit
+initSkip(std::make_pair("EGCompr64SkipFilterOcc",
+ makeFPFactory<FPFactoryT<FakeEGCompr64SkipFilterOcc<true> > >));
+
+
+template<>
+FakeEGCompr64SkipFilterOcc<true>::FakeEGCompr64SkipFilterOcc(const FakeWord &fw)
+ : FakeEGCompr64FilterOcc(fw, true, ".egc64skipfilterocc")
+{
+}
+
+
+template<>
+FakeEGCompr64SkipFilterOcc<false>::FakeEGCompr64SkipFilterOcc(const FakeWord &fw)
+ : FakeEGCompr64FilterOcc(fw, true, ".egc64noskipfilterocc")
+{
+}
+
+
+template <bool doSkip>
+FakeEGCompr64SkipFilterOcc<doSkip>::~FakeEGCompr64SkipFilterOcc(void)
+{
+}
+
+
+template <bool doSkip>
+class FakeFilterOccEGCompressed64SkipArrayIterator
+ : public queryeval::RankedSearchIteratorBase
+{
+private:
+
+ FakeFilterOccEGCompressed64SkipArrayIterator(const FakeFilterOccEGCompressed64SkipArrayIterator &other);
+
+ FakeFilterOccEGCompressed64SkipArrayIterator&
+ operator=(const FakeFilterOccEGCompressed64SkipArrayIterator &other);
+
+ typedef bitcompression::EncodeContext64BE EC;
+
+public:
+ BitDecode64BEDocIds _docIdBits;
+ uint32_t _lastDocId;
+ uint32_t _l1SkipDocId;
+ uint32_t _l2SkipDocId;
+ uint32_t _l3SkipDocId;
+ uint32_t _l4SkipDocId;
+ uint64_t _l1SkipDocIdBitsOffset;
+ uint64_t _l2SkipDocIdBitsOffset;
+ uint64_t _l2SkipL1SkipBitsOffset;
+ uint64_t _l3SkipDocIdBitsOffset;
+ uint64_t _l3SkipL1SkipBitsOffset;
+ uint64_t _l3SkipL2SkipBitsOffset;
+ uint64_t _l4SkipDocIdBitsOffset;
+ uint64_t _l4SkipL1SkipBitsOffset;
+ uint64_t _l4SkipL2SkipBitsOffset;
+ uint64_t _l4SkipL3SkipBitsOffset;
+ BitDecode64BEDocIds _l1SkipBits;
+ BitDecode64BEDocIds _l2SkipBits;
+ BitDecode64BEDocIds _l3SkipBits;
+ BitDecode64BE _l4SkipBits;
+ std::string _name;
+
+ FakeFilterOccEGCompressed64SkipArrayIterator(const uint64_t *compressedOccurrences,
+ int compressedBitOffset,
+ uint32_t lastDocId,
+ const uint64_t *compressedL1SkipOccurrences,
+ int compressedL1SkipBitOffset,
+ const uint64_t *compressedL2SkipOccurrences,
+ int compressedL2SkipBitOffset,
+ const uint64_t *compressedL3SkipOccurrences,
+ int compressedL3SkipBitOffset,
+ const uint64_t *compressedL4SkipOccurrences,
+ int compressedL4SkipBitOffset,
+ const std::string &name,
+ const fef::TermFieldMatchDataArray &matchData);
+
+ ~FakeFilterOccEGCompressed64SkipArrayIterator(void);
+
+
+ void doL4SkipSeek(uint32_t docid);
+ void doL3SkipSeek(uint32_t docid);
+ void doL2SkipSeek(uint32_t docid);
+ void doL1SkipSeek(uint32_t docId);
+
+ void doUnpack(uint32_t docId);
+ void doSeek(uint32_t docId) override;
+ void initRange(uint32_t begin, uint32_t end) override;
+ Trinary is_strict() const override { return Trinary::True; }
+};
+
+
+template <bool doSkip>
+FakeFilterOccEGCompressed64SkipArrayIterator<doSkip>::
+FakeFilterOccEGCompressed64SkipArrayIterator(const uint64_t *compressedOccurrences,
+ int compressedBitOffset,
+ uint32_t lastDocId,
+ const uint64_t *compressedL1SkipOccurrences,
+ int compressedL1SkipBitOffset,
+ const uint64_t *compressedL2SkipOccurrences,
+ int compressedL2SkipBitOffset,
+ const uint64_t *compressedL3SkipOccurrences,
+ int compressedL3SkipBitOffset,
+ const uint64_t *compressedL4SkipOccurrences,
+ int compressedL4SkipBitOffset,
+ const std::string &name,
+ const fef::TermFieldMatchDataArray &matchData)
+ : queryeval::RankedSearchIteratorBase(matchData),
+ _docIdBits(compressedOccurrences, compressedBitOffset),
+ _lastDocId(lastDocId),
+ _l1SkipDocId(0),
+ _l2SkipDocId(0),
+ _l3SkipDocId(0),
+ _l4SkipDocId(0),
+ _l1SkipDocIdBitsOffset(0),
+ _l2SkipDocIdBitsOffset(0),
+ _l2SkipL1SkipBitsOffset(0),
+ _l3SkipDocIdBitsOffset(0),
+ _l3SkipL1SkipBitsOffset(0),
+ _l3SkipL2SkipBitsOffset(0),
+ _l4SkipDocIdBitsOffset(0),
+ _l4SkipL1SkipBitsOffset(0),
+ _l4SkipL2SkipBitsOffset(0),
+ _l4SkipL3SkipBitsOffset(0),
+ _l1SkipBits(compressedL1SkipOccurrences, compressedL1SkipBitOffset),
+ _l2SkipBits(compressedL2SkipOccurrences, compressedL2SkipBitOffset),
+ _l3SkipBits(compressedL3SkipOccurrences, compressedL3SkipBitOffset),
+ _l4SkipBits(compressedL4SkipOccurrences, compressedL4SkipBitOffset),
+ _name(name)
+{
+ clearUnpacked();
+}
+
+template <bool doSkip>
+void
+FakeFilterOccEGCompressed64SkipArrayIterator<doSkip>::
+initRange(uint32_t begin, uint32_t end)
+{
+ queryeval::RankedSearchIteratorBase::initRange(begin, end);
+
+ const bool bigEndian = true;
+ UC64_FILTEROCC_DECODECONTEXT;
+ assert(_docIdBits.getOffset() == 0);
+ uint32_t docId = 0;
+ if (_lastDocId > 0) {
+ UC64_FILTEROCC_READ_FIRST_DOC(_docIdBits._val,
+ _docIdBits._valI,
+ _docIdBits._preRead,
+ _docIdBits._cacheInt, docId, EC);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("DecodeInit docId=%d\n",
+ docId);
+#endif
+ UC64_DECODECONTEXT_CONSTRUCTOR(s, _l1SkipBits._);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L1SKIPDELTA_DOCID, EC,
+ _l1SkipDocId = 1 +);
+ UC64_DECODECONTEXT_STORE(s, _l1SkipBits._);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L1DecodeInit docId=%d, docIdPos=%d, L1SkipPos=%d\n",
+ _l1SkipDocId,
+ (int) _l1SkipDocIdBitsOffset,
+ (int) _l1SkipBits.getOffset());
+#endif
+ UC64_DECODECONTEXT_LOAD(s, _l2SkipBits._);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L2SKIPDELTA_DOCID, EC,
+ _l2SkipDocId = 1 +);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L2DecodeInit docId=%d, docIdPos=%d, L1SkipPos=%d\n",
+ _l2SkipDocId,
+ (int) _l2SkipDocIdBitsOffset,
+ (int) _l2SkipL1SkipBitsOffset);
+#endif
+ UC64_DECODECONTEXT_STORE(s, _l2SkipBits._);
+ UC64_DECODECONTEXT_LOAD(s, _l3SkipBits._);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L3SKIPDELTA_DOCID, EC,
+ _l3SkipDocId = 1 +);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L3DecodeInit docId=%d, docIdPos=%d, L1SkipPos=%d\n",
+ _l3SkipDocId,
+ (int) _l3SkipDocIdBitsOffset,
+ (int) _l3SkipL1SkipBitsOffset);
+#endif
+ UC64_DECODECONTEXT_STORE(s, _l3SkipBits._);
+ UC64_DECODECONTEXT_LOAD(s, _l4SkipBits._);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_DOCID, EC,
+ _l4SkipDocId = 1 +);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L4DecodeInit docId=%d, docIdPos=%d, L1SkipPos=%d\n",
+ _l4SkipDocId,
+ (int) _l4SkipDocIdBitsOffset,
+ (int) _l4SkipL1SkipBitsOffset);
+#endif
+ UC64_DECODECONTEXT_STORE(s, _l4SkipBits._);
+ setDocId(docId);
+ } else {
+ setAtEnd();
+ _l1SkipDocId = _l2SkipDocId = _l3SkipDocId = _l4SkipDocId = search::endDocId;
+ }
+}
+
+
+template <bool doSkip>
+FakeFilterOccEGCompressed64SkipArrayIterator<doSkip>::
+~FakeFilterOccEGCompressed64SkipArrayIterator(void)
+{
+}
+
+
+template<>
+void
+FakeFilterOccEGCompressed64SkipArrayIterator<true>::
+doL4SkipSeek(uint32_t docId)
+{
+ unsigned int length;
+ uint32_t lastL4SkipDocId;
+ const bool bigEndian = true;
+
+ if (__builtin_expect(docId > _lastDocId, false)) {
+ _l1SkipDocId = _l2SkipDocId = _l3SkipDocId = _l4SkipDocId = search::endDocId;
+ setAtEnd();
+ return;
+ }
+
+ UC64_DECODECONTEXT_CONSTRUCTOR(s, _l4SkipBits._);
+ do {
+ lastL4SkipDocId = _l4SkipDocId;
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_BITPOS, EC,
+ _l4SkipDocIdBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_L1SKIPBITPOS, EC,
+ _l4SkipL1SkipBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_L2SKIPBITPOS, EC,
+ _l4SkipL2SkipBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_L3SKIPBITPOS, EC,
+ _l4SkipL3SkipBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_DOCID, EC,
+ _l4SkipDocId += 1 +);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L4Decode docId=%d, docIdPos=%d, l1SkipPos=%d, nextDocId %d\n",
+ lastL4SkipDocId,
+ (int) _l4SkipDocIdBitsOffset,
+ (int) _l4SkipL1SkipBitsOffset,
+ _l4SkipDocId);
+#endif
+ } while (docId > _l4SkipDocId);
+ UC64_DECODECONTEXT_STORE(s, _l4SkipBits._);
+ _l1SkipDocId = _l2SkipDocId = _l3SkipDocId = lastL4SkipDocId;
+ _l1SkipDocIdBitsOffset = _l2SkipDocIdBitsOffset = _l3SkipDocIdBitsOffset =
+ _l4SkipDocIdBitsOffset;
+ _l2SkipL1SkipBitsOffset = _l3SkipL1SkipBitsOffset =_l4SkipL1SkipBitsOffset;
+ _l3SkipL2SkipBitsOffset =_l4SkipL2SkipBitsOffset;
+ _docIdBits.seek(_l4SkipDocIdBitsOffset);
+ _l1SkipBits.seek(_l4SkipL1SkipBitsOffset);
+ _l2SkipBits.seek(_l4SkipL2SkipBitsOffset);
+ _l3SkipBits.seek(_l4SkipL3SkipBitsOffset);
+ lastL4SkipDocId += _docIdBits.getDocIdDelta();
+ _l1SkipDocId += _l1SkipBits.getL1SkipDocIdDelta();
+ _l2SkipDocId += _l2SkipBits.getL2SkipDocIdDelta();
+ _l3SkipDocId += _l3SkipBits.getL3SkipDocIdDelta();
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L4Seek, docId %d docIdPos %d L1SkipPos %d, nextDocId %d\n",
+ lastL4SkipDocId,
+ (int) _l4SkipDocIdBitsOffset,
+ (int) _l4SkipL1SkipBitsOffset,
+ _l4SkipDocId);
+#endif
+ setDocId(lastL4SkipDocId);
+}
+
+
+template<>
+void
+FakeFilterOccEGCompressed64SkipArrayIterator<true>::
+doL3SkipSeek(uint32_t docId)
+{
+ unsigned int length;
+ uint32_t lastL3SkipDocId;
+ const bool bigEndian = true;
+
+ if (__builtin_expect(docId > _l4SkipDocId, false)) {
+ doL4SkipSeek(docId);
+ if (docId <= _l3SkipDocId)
+ return;
+ }
+
+ UC64_DECODECONTEXT_CONSTRUCTOR(s, _l3SkipBits._);
+ do {
+ lastL3SkipDocId = _l3SkipDocId;
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L3SKIPDELTA_BITPOS, EC,
+ _l3SkipDocIdBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L3SKIPDELTA_L1SKIPBITPOS, EC,
+ _l3SkipL1SkipBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L3SKIPDELTA_L2SKIPBITPOS, EC,
+ _l3SkipL2SkipBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L3SKIPDELTA_DOCID, EC,
+ _l3SkipDocId += 1 +);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L3Decode docId=%d, docIdPos=%d, l1SkipPos=%d, nextDocId %d\n",
+ lastL3SkipDocId,
+ (int) _l3SkipDocIdBitsOffset,
+ (int) _l3SkipL1SkipBitsOffset,
+ _l3SkipDocId);
+#endif
+ } while (docId > _l3SkipDocId);
+ UC64_DECODECONTEXT_STORE(s, _l3SkipBits._);
+ _l1SkipDocId = _l2SkipDocId = lastL3SkipDocId;
+ _l1SkipDocIdBitsOffset = _l2SkipDocIdBitsOffset = _l3SkipDocIdBitsOffset;
+ _l2SkipL1SkipBitsOffset = _l3SkipL1SkipBitsOffset;
+ _docIdBits.seek(_l3SkipDocIdBitsOffset);
+ _l1SkipBits.seek(_l3SkipL1SkipBitsOffset);
+ _l2SkipBits.seek(_l3SkipL2SkipBitsOffset);
+ lastL3SkipDocId += _docIdBits.getDocIdDelta();
+ _l1SkipDocId += _l1SkipBits.getL1SkipDocIdDelta();
+ _l2SkipDocId += _l2SkipBits.getL2SkipDocIdDelta();
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L3Seek, docId %d docIdPos %d L1SkipPos %d, nextDocId %d\n",
+ lastL3SkipDocId,
+ (int) _l3SkipDocIdBitsOffset,
+ (int) _l3SkipL1SkipBitsOffset,
+ _l3SkipDocId);
+#endif
+ setDocId(lastL3SkipDocId);
+}
+
+
+template<>
+void
+FakeFilterOccEGCompressed64SkipArrayIterator<true>::
+doL2SkipSeek(uint32_t docId)
+{
+ unsigned int length;
+ uint32_t lastL2SkipDocId;
+ const bool bigEndian = true;
+
+ if (__builtin_expect(docId > _l3SkipDocId, false)) {
+ doL3SkipSeek(docId);
+ if (docId <= _l2SkipDocId)
+ return;
+ }
+
+ UC64_DECODECONTEXT_CONSTRUCTOR(s, _l2SkipBits._);
+ do {
+ lastL2SkipDocId = _l2SkipDocId;
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L2SKIPDELTA_BITPOS, EC,
+ _l2SkipDocIdBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L2SKIPDELTA_L1SKIPBITPOS, EC,
+ _l2SkipL1SkipBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L2SKIPDELTA_DOCID, EC,
+ _l2SkipDocId += 1 +);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L2Decode docId=%d, docIdPos=%d, l1SkipPos=%d, nextDocId %d\n",
+ lastL2SkipDocId,
+ (int) _l2SkipDocIdBitsOffset,
+ (int) _l2SkipL1SkipBitsOffset,
+ _l2SkipDocId);
+#endif
+ } while (docId > _l2SkipDocId);
+ UC64_DECODECONTEXT_STORE(s, _l2SkipBits._);
+ _l1SkipDocId = lastL2SkipDocId;
+ _l1SkipDocIdBitsOffset = _l2SkipDocIdBitsOffset;
+ _docIdBits.seek(_l2SkipDocIdBitsOffset);
+ _l1SkipBits.seek(_l2SkipL1SkipBitsOffset);
+ lastL2SkipDocId += _docIdBits.getDocIdDelta();
+ _l1SkipDocId += _l1SkipBits.getL1SkipDocIdDelta();
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L2Seek, docId %d docIdPos %d L1SkipPos %d, nextDocId %d\n",
+ lastL2SkipDocId,
+ (int) _l2SkipDocIdBitsOffset,
+ (int) _l2SkipL1SkipBitsOffset,
+ _l2SkipDocId);
+#endif
+ setDocId(lastL2SkipDocId);
+}
+
+
+template<>
+void
+FakeFilterOccEGCompressed64SkipArrayIterator<false>::doL1SkipSeek(uint32_t docId)
+{
+ (void) docId;
+}
+
+
+template<>
+void
+FakeFilterOccEGCompressed64SkipArrayIterator<true>::doL1SkipSeek(uint32_t docId)
+{
+ unsigned int length;
+ uint32_t lastL1SkipDocId;
+ const bool bigEndian = true;
+
+ if (__builtin_expect(docId > _l2SkipDocId, false)) {
+ doL2SkipSeek(docId);
+ if (docId <= _l1SkipDocId)
+ return;
+ }
+ UC64_DECODECONTEXT_CONSTRUCTOR(s, _l1SkipBits._);
+ do {
+ lastL1SkipDocId = _l1SkipDocId;
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L1SKIPDELTA_BITPOS, EC,
+ _l1SkipDocIdBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt,
+ K_VALUE_FILTEROCC_L1SKIPDELTA_DOCID, EC,
+ _l1SkipDocId += 1 +);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L1Decode docId=%d docIdPos=%d, L1SkipPos=%d, nextDocId %d\n",
+ lastL1SkipDocId,
+ (int) _l1SkipDocIdBitsOffset,
+ (int) _l1SkipBits.getOffset(sCompr, sPreRead),
+ _l1SkipDocId);
+#endif
+ } while (docId > _l1SkipDocId);
+ UC64_DECODECONTEXT_STORE(s, _l1SkipBits._);
+ _docIdBits.seek(_l1SkipDocIdBitsOffset);
+ lastL1SkipDocId += _docIdBits.getDocIdDelta();
+ setDocId(lastL1SkipDocId);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L1SkipSeek, docId %d docIdPos %d, nextDocId %d\n",
+ lastL1SkipDocId,
+ (int) _l1SkipDocIdBitsOffset,
+ _l1SkipDocId);
+#endif
+}
+
+
+template <bool doSkip>
+void
+FakeFilterOccEGCompressed64SkipArrayIterator<doSkip>::doSeek(uint32_t docId)
+{
+ if (getUnpacked())
+ clearUnpacked();
+ if (doSkip && docId > _l1SkipDocId) {
+ doL1SkipSeek(docId);
+ }
+ unsigned int length;
+ uint32_t oDocId = getDocId();
+ const bool bigEndian = true;
+ if (doSkip) {
+#if DEBUG_EGCOMPR64FILTEROCC_ASSERT
+ assert(oDocId <= _l1SkipDocId);
+ assert(docId <= _l1SkipDocId);
+ assert(oDocId <= _l2SkipDocId);
+ assert(docId <= _l2SkipDocId);
+ assert(oDocId <= _l3SkipDocId);
+ assert(docId <= _l3SkipDocId);
+ assert(oDocId <= _l4SkipDocId);
+ assert(docId <= _l4SkipDocId);
+#endif
+ }
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, this->_docIdBits._);
+ while (__builtin_expect(oDocId < docId, true)) {
+ if (!doSkip) {
+ if (__builtin_expect(oDocId >= _lastDocId, false)) {
+#if DEBUG_ZCFILTEROCC_ASSERT
+ assert(_l1SkipDocId == _lastDocId);
+ assert(_l2SkipDocId == _lastDocId);
+ assert(_l3SkipDocId == _lastDocId);
+ assert(_l4SkipDocId == _lastDocId);
+#endif
+ oDocId = _l1SkipDocId = _l2SkipDocId = _l3SkipDocId = _l4SkipDocId = search::endDocId;
+ break;
+ }
+ }
+ if (doSkip) {
+#if DEBUG_EGCOMPR64FILTEROCC_ASSERT
+ assert(oDocId <= _l1SkipDocId);
+ assert(oDocId <= _l2SkipDocId);
+ assert(oDocId <= _l3SkipDocId);
+ assert(oDocId <= _l4SkipDocId);
+#endif
+ } else if (__builtin_expect(oDocId >= _l1SkipDocId, false)) {
+ // Validate L1 Skip information
+ assert(oDocId == _l1SkipDocId);
+ uint64_t docIdBitsOffset = _docIdBits.getOffset(oCompr, oPreRead);
+ UC64_DECODECONTEXT_CONSTRUCTOR(s1, _l1SkipBits._);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(s1Val, s1Compr, s1PreRead,
+ s1CacheInt,
+ K_VALUE_FILTEROCC_L1SKIPDELTA_BITPOS, EC,
+ _l1SkipDocIdBitsOffset += 1 +);
+ assert(docIdBitsOffset = _l1SkipDocIdBitsOffset);
+ if (__builtin_expect(oDocId >= _l2SkipDocId, false)) {
+ // Validate L2 Skip information
+ assert(oDocId == _l2SkipDocId);
+ uint64_t l1SkipBitsOffset =
+ _l1SkipBits.getOffset(s1Compr, s1PreRead);
+ UC64_DECODECONTEXT_CONSTRUCTOR(s2, _l2SkipBits._);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(s2Val, s2Compr, s2PreRead,
+ s2CacheInt,
+ K_VALUE_FILTEROCC_L2SKIPDELTA_BITPOS, EC,
+ _l2SkipDocIdBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(s2Val, s2Compr, s2PreRead,
+ s2CacheInt,
+ K_VALUE_FILTEROCC_L2SKIPDELTA_L1SKIPBITPOS, EC,
+ _l2SkipL1SkipBitsOffset += 1 +);
+ assert(docIdBitsOffset == _l2SkipDocIdBitsOffset);
+ assert(l1SkipBitsOffset == _l2SkipL1SkipBitsOffset);
+ if (__builtin_expect(oDocId >= _l3SkipDocId, false)) {
+ // Validate L3 Skip information
+ assert(oDocId == _l3SkipDocId);
+ uint64_t l2SkipBitsOffset =
+ _l2SkipBits.getOffset(s2Compr, s2PreRead);
+ UC64_DECODECONTEXT_CONSTRUCTOR(s3, _l3SkipBits._);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(s3Val, s3Compr,
+ s3PreRead,
+ s3CacheInt,
+ K_VALUE_FILTEROCC_L3SKIPDELTA_BITPOS, EC,
+ _l3SkipDocIdBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(s3Val, s3Compr,
+ s3PreRead,
+ s3CacheInt,
+ K_VALUE_FILTEROCC_L3SKIPDELTA_L1SKIPBITPOS, EC,
+ _l3SkipL1SkipBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(s3Val, s3Compr,
+ s3PreRead,
+ s3CacheInt,
+ K_VALUE_FILTEROCC_L3SKIPDELTA_L2SKIPBITPOS, EC,
+ _l3SkipL2SkipBitsOffset += 1 +);
+ assert(docIdBitsOffset == _l3SkipDocIdBitsOffset);
+ assert(l1SkipBitsOffset == _l3SkipL1SkipBitsOffset);
+ assert(l2SkipBitsOffset == _l3SkipL2SkipBitsOffset);
+ if (__builtin_expect(oDocId >= _l4SkipDocId, false)) {
+ // Validate L4 Skip information
+ assert(oDocId == _l4SkipDocId);
+ uint64_t l3SkipBitsOffset =
+ _l3SkipBits.getOffset(s3Compr, s3PreRead);
+ UC64_DECODECONTEXT_CONSTRUCTOR(s4, _l4SkipBits._);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(s4Val, s4Compr,
+ s4PreRead,
+ s4CacheInt,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_BITPOS, EC,
+ _l4SkipDocIdBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(s4Val, s4Compr,
+ s4PreRead,
+ s4CacheInt,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_L1SKIPBITPOS, EC,
+ _l4SkipL1SkipBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(s4Val, s4Compr,
+ s4PreRead,
+ s4CacheInt,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_L2SKIPBITPOS, EC,
+ _l4SkipL2SkipBitsOffset += 1 +);
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(s4Val, s4Compr,
+ s4PreRead,
+ s4CacheInt,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_L3SKIPBITPOS, EC,
+ _l4SkipL3SkipBitsOffset += 1 +);
+ assert(docIdBitsOffset == _l4SkipDocIdBitsOffset);
+ (void) docIdBitsOffset;
+ assert(l1SkipBitsOffset == _l4SkipL1SkipBitsOffset);
+ (void) l1SkipBitsOffset;
+ assert(l2SkipBitsOffset == _l4SkipL2SkipBitsOffset);
+ (void) l2SkipBitsOffset;
+ assert(l3SkipBitsOffset == _l4SkipL3SkipBitsOffset);
+ (void) l3SkipBitsOffset;
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(s4Val, s4Compr,
+ s4PreRead,
+ s4CacheInt,
+ K_VALUE_FILTEROCC_L4SKIPDELTA_DOCID, EC,
+ _l4SkipDocId += 1 +);
+ UC64_DECODECONTEXT_STORE(s4, _l4SkipBits._);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L4DecodeV docId=%d docIdPos=%d L1SkipPos=%d\n",
+ _l4SkipDocId,
+ (int) _l4SkipDocIdBitsOffset,
+ (int) _l4SkipL1SkipBitsOffset);
+#endif
+ }
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(s3Val, s3Compr,
+ s3PreRead,
+ s3CacheInt,
+ K_VALUE_FILTEROCC_L3SKIPDELTA_DOCID, EC,
+ _l3SkipDocId += 1 +);
+ UC64_DECODECONTEXT_STORE(s3, _l3SkipBits._);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L3DecodeV docId=%d docIdPos=%d L1SkipPos=%d\n",
+ _l3SkipDocId,
+ (int) _l3SkipDocIdBitsOffset,
+ (int) _l3SkipL1SkipBitsOffset);
+#endif
+ }
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(s2Val, s2Compr, s2PreRead,
+ s2CacheInt,
+ K_VALUE_FILTEROCC_L2SKIPDELTA_DOCID, EC,
+ _l2SkipDocId += 1 +);
+ UC64_DECODECONTEXT_STORE(s2, _l2SkipBits._);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L2DecodeV docId=%d docIdPos=%d L1SkipPos=%d\n",
+ _l2SkipDocId,
+ (int) _l2SkipDocIdBitsOffset,
+ (int) _l2SkipL1SkipBitsOffset);
+#endif
+ }
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(s1Val, s1Compr, s1PreRead,
+ s1CacheInt,
+ K_VALUE_FILTEROCC_L1SKIPDELTA_DOCID, EC,
+ _l1SkipDocId += 1 +);
+ UC64_DECODECONTEXT_STORE(s1, _l1SkipBits._);
+ assert(docIdBitsOffset == _l1SkipDocIdBitsOffset);
+ BitDecode64BE
+ checkDocIdBits(_docIdBits.getComprBase(),
+ _docIdBits.getBitOffsetBase());
+ checkDocIdBits.seek(_l1SkipDocIdBitsOffset);
+ if (checkDocIdBits._valI != oCompr ||
+ checkDocIdBits._val != oVal ||
+ checkDocIdBits._cacheInt != oCacheInt ||
+ checkDocIdBits._preRead != oPreRead) {
+ printf("seek problem: check "
+ "(%p,%d) "
+ "%p,%" PRIu64 ",%" PRIu64 ",%u != "
+ "(%p,%d) "
+ "%p,%" PRIu64 ",%" PRIu64 ",%u for "
+ "offset %" PRIu64 "\n",
+ checkDocIdBits.getComprBase(),
+ checkDocIdBits.getBitOffsetBase(),
+ checkDocIdBits._valI,
+ checkDocIdBits._val,
+ checkDocIdBits._cacheInt,
+ checkDocIdBits._preRead,
+ _docIdBits.getComprBase(),
+ _docIdBits.getBitOffsetBase(),
+ oCompr,
+ oVal,
+ oCacheInt,
+ oPreRead,
+ _l1SkipDocIdBitsOffset);
+ abort();
+ }
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("L1DecodeV docId=%d docIdPos=%d L1SkipPos=%d\n",
+ _l1SkipDocId,
+ (int) _l2SkipDocIdBitsOffset,
+ (int) _l2SkipL1SkipBitsOffset);
+#endif
+ }
+ UC64_DECODEEXPGOLOMB_SMALL_APPLY(oVal, oCompr, oPreRead, oCacheInt,
+ K_VALUE_FILTEROCC_DELTA_DOCID, EC,
+ oDocId += 1 +);
+#if DEBUG_EGCOMPR64FILTEROCC_PRINTF
+ printf("Decode docId=%d\n",
+ oDocId);
+#endif
+ }
+ UC64_DECODECONTEXT_STORE(o, this->_docIdBits._);
+ setDocId(oDocId);
+ return;
+}
+
+
+template <bool doSkip>
+void
+FakeFilterOccEGCompressed64SkipArrayIterator<doSkip>::doUnpack(uint32_t docId)
+{
+ if (_matchData.size() != 1 || getUnpacked()) {
+ return;
+ }
+ assert(docId == getDocId());
+ _matchData[0]->reset(docId);
+ setUnpacked();
+}
+
+
+template <bool doSkip>
+search::queryeval::SearchIterator *
+FakeEGCompr64SkipFilterOcc<doSkip>::
+createIterator(const fef::TermFieldMatchDataArray &matchData) const
+{
+ unsigned int length;
+ uint64_t val64;
+ const uint64_t *arr = _compressed.first;
+ const bool bigEndian = true;
+ BitDecode64BE docIdBits(arr, 0);
+ assert(docIdBits.getCompr() == arr);
+ assert(docIdBits.getBitOffset() == 0);
+ assert(docIdBits.getOffset() == 0);
+
+ typedef bitcompression::EncodeContext64BE EC;
+
+ uint32_t myResidue = 0;
+ UC64_FILTEROCC_READ_RESIDUE(docIdBits._val,
+ docIdBits._valI,
+ docIdBits._preRead,
+ docIdBits._cacheInt, myResidue, EC);
+ assert(myResidue == _hitDocs);
+ (void) myResidue;
+
+ const uint64_t *l1SkipArr = _l1SkipCompressed.first;
+ const uint64_t *l2SkipArr = _l2SkipCompressed.first;
+ const uint64_t *l3SkipArr = _l3SkipCompressed.first;
+ const uint64_t *l4SkipArr = _l4SkipCompressed.first;
+ return new FakeFilterOccEGCompressed64SkipArrayIterator<doSkip>(docIdBits.getCompr(),
+ docIdBits.getBitOffset(),
+ _lastDocId,
+ l1SkipArr, 0,
+ l2SkipArr, 0,
+ l3SkipArr, 0,
+ l4SkipArr, 0,
+ getName(),
+ matchData);
+}
+
+
+} // namespace fakedata
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.h
new file mode 100644
index 00000000000..333f029cd08
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.h
@@ -0,0 +1,121 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "fakeword.h"
+#include "fakeposting.h"
+
+namespace search
+{
+
+namespace fakedata
+{
+
+/*
+ * Old compressed posocc format.
+ */
+class FakeEGCompr64FilterOcc : public FakePosting
+{
+protected:
+ std::pair<uint64_t *, size_t> _compressed;
+ std::pair<uint64_t *, size_t> _l1SkipCompressed;
+ std::pair<uint64_t *, size_t> _l2SkipCompressed;
+ std::pair<uint64_t *, size_t> _l3SkipCompressed;
+ std::pair<uint64_t *, size_t> _l4SkipCompressed;
+ void *_compressedMalloc;
+ void *_l1SkipCompressedMalloc;
+ void *_l2SkipCompressedMalloc;
+ void *_l3SkipCompressedMalloc;
+ void *_l4SkipCompressedMalloc;
+ unsigned int _docIdLimit;
+ unsigned int _hitDocs;
+ uint32_t _lastDocId;
+ size_t _bitSize;
+ size_t _l1SkipBitSize;
+ size_t _l2SkipBitSize;
+ size_t _l3SkipBitSize;
+ size_t _l4SkipBitSize;
+ bool _bigEndian;
+
+private:
+ void
+ setup(const FakeWord &fw);
+
+ template <bool bigEndian>
+ void
+ setupT(const FakeWord &fw);
+
+public:
+ FakeEGCompr64FilterOcc(const FakeWord &fw);
+
+ FakeEGCompr64FilterOcc(const FakeWord &fw,
+ bool bigEndian,
+ const char *nameSuffix);
+
+ ~FakeEGCompr64FilterOcc(void);
+
+ static void
+ forceLink(void);
+
+ /*
+ * Size of posting list, in bits.
+ */
+ size_t
+ bitSize(void) const;
+
+ virtual bool
+ hasWordPositions(void) const;
+
+ /*
+ * Size of posting skip list, in bits.
+ */
+ size_t
+ skipBitSize(void) const;
+
+ size_t
+ l1SkipBitSize(void) const;
+
+ size_t
+ l2SkipBitSize(void) const;
+
+ size_t
+ l3SkipBitSize(void) const;
+
+ size_t
+ l4SkipBitSize(void) const;
+
+ /*
+ * Single posting list performance, without feature unpack.
+ */
+ virtual int
+ lowLevelSinglePostingScan(void) const;
+
+ /*
+ * Single posting list performance, with feature unpack.
+ */
+ virtual int
+ lowLevelSinglePostingScanUnpack(void) const;
+
+ /*
+ * Two posting lists performance (same format) without feature unpack.
+ */
+ virtual int
+ lowLevelAndPairPostingScan(const FakePosting &rhs) const;
+
+ /*
+ * Two posting lists performance (same format) with feature unpack.
+ */
+ virtual int
+ lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const;
+
+
+ /*
+ * Iterator factory, for current query evaluation framework.
+ */
+ virtual search::queryeval::SearchIterator *
+ createIterator(const fef::TermFieldMatchDataArray &matchData) const;
+};
+
+} // namespace fakedata
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.cpp
new file mode 100644
index 00000000000..749803cc6ed
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.cpp
@@ -0,0 +1,206 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fakefilterocc");
+#include <vespa/searchlib/queryeval/iterators.h>
+#include "fakefilterocc.h"
+#include "fpfactory.h"
+
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataPosition;
+
+namespace search
+{
+
+namespace fakedata
+{
+
+static FPFactoryInit
+init(std::make_pair("FilterOcc",
+ makeFPFactory<FPFactoryT<FakeFilterOcc> >));
+
+FakeFilterOcc::FakeFilterOcc(const FakeWord &fw)
+ : FakePosting(fw.getName() + ".filterocc"),
+ _uncompressed(),
+ _docIdLimit(0),
+ _hitDocs(0)
+{
+ std::vector<uint32_t> fake;
+
+ typedef FakeWord FW;
+ typedef FW::DocWordFeatureList DWFL;
+
+ DWFL::const_iterator d(fw._postings.begin());
+ DWFL::const_iterator de(fw._postings.end());
+
+ while (d != de) {
+ fake.push_back(d->_docId);
+ ++d;
+ }
+ std::swap(_uncompressed, fake);
+ _docIdLimit = fw._docIdLimit;
+ _hitDocs = fw._postings.size();
+}
+
+
+FakeFilterOcc::~FakeFilterOcc(void)
+{
+}
+
+
+void
+FakeFilterOcc::forceLink(void)
+{
+}
+
+
+size_t
+FakeFilterOcc::bitSize(void) const
+{
+ return 32 * _uncompressed.size();
+}
+
+
+bool
+FakeFilterOcc::hasWordPositions(void) const
+{
+ return false;
+}
+
+
+int
+FakeFilterOcc::lowLevelSinglePostingScan(void) const
+{
+ return 0;
+}
+
+
+int
+FakeFilterOcc::lowLevelSinglePostingScanUnpack(void) const
+{
+ return 0;
+}
+
+
+int
+FakeFilterOcc::
+lowLevelAndPairPostingScan(const FakePosting &rhs) const
+{
+ (void) rhs;
+ return 0;
+}
+
+
+int
+FakeFilterOcc::
+lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const
+{
+ (void) rhs;
+ return 0;
+}
+
+
+class FakeFilterOccArrayIterator: public queryeval::RankedSearchIteratorBase
+{
+private:
+ FakeFilterOccArrayIterator(const FakeFilterOccArrayIterator &other);
+
+ FakeFilterOccArrayIterator& operator=(const FakeFilterOccArrayIterator &);
+
+public:
+ const uint32_t *_arr;
+ const uint32_t *_arrEnd;
+
+ FakeFilterOccArrayIterator(const uint32_t *arr,
+ const uint32_t *arrEnd,
+ const fef::TermFieldMatchDataArray &matchData);
+
+ ~FakeFilterOccArrayIterator(void);
+
+ void doUnpack(uint32_t docId) override;
+ void doSeek(uint32_t docId) override;
+ void initRange(uint32_t begin, uint32_t end) override;
+ Trinary is_strict() const override { return Trinary::True; }
+};
+
+
+void
+FakeFilterOccArrayIterator::doSeek(uint32_t docId)
+{
+ const uint32_t *oarr = _arr;
+ const uint32_t *oarrEnd = _arrEnd;
+
+ if (getUnpacked())
+ clearUnpacked();
+ if (oarr >= oarrEnd)
+ goto doneuncompressed;
+ for (;;) {
+ if ((int) *oarr >= (int) docId)
+ goto found;
+ if (++oarr >= oarrEnd)
+ goto doneuncompressed;
+ }
+ found:
+ _arr = oarr;
+ setDocId(*oarr);
+ return; // Still data
+ doneuncompressed:
+ _arr = oarr;
+ setAtEnd(); // Mark end of data
+ return; // Ran off end
+}
+
+
+FakeFilterOccArrayIterator::
+FakeFilterOccArrayIterator(const uint32_t *arr,
+ const uint32_t *arrEnd,
+ const fef::TermFieldMatchDataArray &matchData)
+ : queryeval::RankedSearchIteratorBase(matchData),
+ _arr(arr),
+ _arrEnd(arrEnd)
+{
+ clearUnpacked();
+}
+
+void
+FakeFilterOccArrayIterator::initRange(uint32_t begin, uint32_t end)
+{
+ queryeval::RankedSearchIteratorBase::initRange(begin, end);
+ if (_arr < _arrEnd) {
+ setDocId(*_arr);
+ } else {
+ setAtEnd();
+ }
+}
+
+
+FakeFilterOccArrayIterator::~FakeFilterOccArrayIterator(void)
+{
+}
+
+
+void
+FakeFilterOccArrayIterator::doUnpack(uint32_t docId)
+{
+ if (_matchData.size() != 1 || getUnpacked()) {
+ return;
+ }
+ assert(docId == getDocId());
+ _matchData[0]->reset(docId);
+ setUnpacked();
+}
+
+
+search::queryeval::SearchIterator *
+FakeFilterOcc::
+createIterator(const fef::TermFieldMatchDataArray &matchData) const
+{
+ return new FakeFilterOccArrayIterator(&*_uncompressed.begin(),
+ &*_uncompressed.end(),
+ matchData);
+}
+
+} // namespace fakedata
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.h
new file mode 100644
index 00000000000..b0d18b94eac
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.h
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "fakeword.h"
+#include "fakeposting.h"
+
+namespace search
+{
+
+namespace fakedata
+{
+
+/*
+ * Old posocc format.
+ */
+class FakeFilterOcc : public FakePosting
+{
+private:
+ std::vector<uint32_t> _uncompressed;
+ unsigned int _docIdLimit;
+ unsigned int _hitDocs;
+public:
+ FakeFilterOcc(const FakeWord &fakeword);
+
+ ~FakeFilterOcc(void);
+
+ static void
+ forceLink(void);
+
+ /*
+ * Size of posting list, in bits.
+ */
+ size_t
+ bitSize(void) const;
+
+ virtual bool
+ hasWordPositions(void) const;
+
+ /*
+ * Single posting list performance, without feature unpack.
+ */
+ virtual int
+ lowLevelSinglePostingScan(void) const;
+
+ /*
+ * Single posting list performance, with feature unpack.
+ */
+ virtual int
+ lowLevelSinglePostingScanUnpack(void) const;
+
+ /*
+ * Two posting lists performance (same format) without feature unpack.
+ */
+ virtual int
+ lowLevelAndPairPostingScan(const FakePosting &rhs) const;
+
+ /*
+ * Two posting lists performance (same format) with feature unpack.
+ */
+ virtual int
+ lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const;
+
+
+ /*
+ * Iterator factory, for current query evaluation framework.
+ */
+ virtual search::queryeval::SearchIterator *
+ createIterator(const fef::TermFieldMatchDataArray &matchData) const;
+};
+
+} // namespace fakedata
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp
new file mode 100644
index 00000000000..01bd2551989
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp
@@ -0,0 +1,430 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fakememtreeocc");
+#include <vespa/searchlib/queryeval/iterators.h>
+#include "fakememtreeocc.h"
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodestore.hpp>
+#include <vespa/searchlib/memoryindex/postingiterator.h>
+#include "fpfactory.h"
+#include <vespa/searchlib/util/postingpriorityqueue.h>
+
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataPosition;
+
+namespace search
+{
+
+namespace fakedata
+{
+
+static FPFactoryInit
+init(std::make_pair("MemTreeOcc",
+ makeFPFactory<FakeMemTreeOccFactory>));
+
+static FPFactoryInit
+init2(std::make_pair("MemTreeOcc2",
+ makeFPFactory<FakeMemTreeOcc2Factory>));
+
+FakeMemTreeOcc::FakeMemTreeOcc(const FakeWord &fw,
+ NodeAllocator &allocator,
+ Tree &tree,
+ uint64_t featureBitSize,
+ const FakeMemTreeOccMgr &mgr)
+ : FakePosting(fw.getName() + ".memtreeocc"),
+ _allocator(allocator),
+ _tree(tree),
+ _fieldsParams(fw.getFieldsParams()),
+ _packedIndex(fw.getPackedIndex()),
+ _featureBitSize(featureBitSize),
+ _mgr(mgr),
+ _docIdLimit(0),
+ _hitDocs(0)
+{
+ _docIdLimit = fw._docIdLimit;
+ _hitDocs = fw._postings.size();
+}
+
+
+FakeMemTreeOcc::FakeMemTreeOcc(const FakeWord &fw,
+ NodeAllocator &allocator,
+ Tree &tree,
+ uint64_t featureBitSize,
+ const FakeMemTreeOccMgr &mgr,
+ const char *suffix)
+ : FakePosting(fw.getName() + suffix),
+ _allocator(allocator),
+ _tree(tree),
+ _fieldsParams(fw.getFieldsParams()),
+ _packedIndex(fw.getPackedIndex()),
+ _featureBitSize(featureBitSize),
+ _mgr(mgr),
+ _docIdLimit(0),
+ _hitDocs(0)
+{
+ _docIdLimit = fw._docIdLimit;
+ _hitDocs = fw._postings.size();
+}
+
+
+FakeMemTreeOcc::~FakeMemTreeOcc(void)
+{
+}
+
+
+void
+FakeMemTreeOcc::forceLink(void)
+{
+}
+
+
+size_t
+FakeMemTreeOcc::bitSize(void) const
+{
+ return _tree.bitSize(_allocator) + _featureBitSize;
+}
+
+
+bool
+FakeMemTreeOcc::hasWordPositions(void) const
+{
+ return true;
+}
+
+
+int
+FakeMemTreeOcc::lowLevelSinglePostingScan(void) const
+{
+ return 0;
+}
+
+
+int
+FakeMemTreeOcc::lowLevelSinglePostingScanUnpack(void) const
+{
+ return 0;
+}
+
+
+int
+FakeMemTreeOcc::
+lowLevelAndPairPostingScan(const FakePosting &rhs) const
+{
+ (void) rhs;
+ return 0;
+}
+
+
+int
+FakeMemTreeOcc::
+lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const
+{
+ (void) rhs;
+ return 0;
+}
+
+
+search::queryeval::SearchIterator *
+FakeMemTreeOcc::
+createIterator(const fef::TermFieldMatchDataArray &matchData) const
+{
+ return new search::memoryindex::PostingIterator(_tree.begin(_allocator),
+ _mgr._featureStore,
+ _packedIndex,
+ matchData);
+}
+
+
+FakeMemTreeOccMgr::FakeMemTreeOccMgr(const Schema &schema)
+ : _generationHandler(),
+ _allocator(),
+ _fw2WordIdx(),
+ _postingIdxs(),
+ _fakeWords(),
+ _featureSizes(),
+ _featureStore(schema)
+{
+}
+
+
+FakeMemTreeOccMgr::~FakeMemTreeOccMgr(void)
+{
+ std::vector<std::shared_ptr<PostingIdx> >::iterator
+ it(_postingIdxs.begin());
+ std::vector<std::shared_ptr<PostingIdx> >::iterator
+ ite(_postingIdxs.end());
+
+ for (; it != ite; ++it) {
+ (*it)->clear();
+ }
+ sync();
+}
+
+
+void
+FakeMemTreeOccMgr::freeze(void)
+{
+ _allocator.freeze();
+}
+
+
+void
+FakeMemTreeOccMgr::transferHoldLists(void)
+{
+ _allocator.transferHoldLists(_generationHandler.getCurrentGeneration());
+}
+
+void
+FakeMemTreeOccMgr::incGeneration(void)
+{
+ _generationHandler.incGeneration();
+}
+
+
+void
+FakeMemTreeOccMgr::trimHoldLists(void)
+{
+ _allocator.trimHoldLists(_generationHandler.getFirstUsedGeneration());
+}
+
+
+void
+FakeMemTreeOccMgr::sync(void)
+{
+ freeze();
+ transferHoldLists();
+ incGeneration();
+ trimHoldLists();
+}
+
+
+void
+FakeMemTreeOccMgr::add(uint32_t wordIdx, index::DocIdAndFeatures &features)
+{
+ typedef FeatureStore::RefType RefType;
+
+ const FakeWord *fw = _fakeWords[wordIdx];
+
+ std::pair<EntryRef, uint64_t> r =
+ _featureStore.addFeatures(fw->getPackedIndex(), features);
+
+ _featureSizes[wordIdx] += RefType::align((r.second + 7) / 8) * 8;
+
+ _unflushed.push_back(PendingOp(wordIdx, features._docId, r.first));
+
+ if (_unflushed.size() >= 10000)
+ flush();
+}
+
+
+void
+FakeMemTreeOccMgr::remove(uint32_t wordIdx, uint32_t docId)
+{
+ _unflushed.push_back(PendingOp(wordIdx, docId));
+
+ if (_unflushed.size() >= 10000)
+ flush();
+}
+
+
+void
+FakeMemTreeOccMgr::sortUnflushed(void)
+{
+ typedef std::vector<PendingOp>::iterator I;
+ uint32_t seq = 0;
+ for (I i(_unflushed.begin()), ie(_unflushed.end()); i != ie; ++i) {
+ i->setSeq(++seq);
+ }
+ std::sort(_unflushed.begin(), _unflushed.end());
+}
+
+
+void
+FakeMemTreeOccMgr::flush(void)
+{
+ typedef FeatureStore::RefType RefType;
+ typedef std::vector<PendingOp>::iterator I;
+
+ if (_unflushed.empty())
+ return;
+
+ uint32_t lastWord = std::numeric_limits<uint32_t>::max();
+ sortUnflushed();
+ for (I i(_unflushed.begin()), ie(_unflushed.end()); i != ie; ++i) {
+ uint32_t wordIdx = i->getWordIdx();
+ uint32_t docId = i->getDocId();
+ PostingIdx &pidx(*_postingIdxs[wordIdx].get());
+ Tree &tree = pidx._tree;
+ Tree::Iterator &itr = pidx._iterator;
+ const FakeWord *fw = _fakeWords[wordIdx];
+ if (wordIdx != lastWord)
+ itr.lower_bound(docId);
+ else if (itr.valid() && itr.getKey() < docId) {
+ itr.linearSeek(docId);
+ }
+ lastWord = wordIdx;
+ if (i->getRemove()) {
+ if (itr.valid() && itr.getKey() == docId) {
+ uint64_t bits = _featureStore.bitSize(fw->getPackedIndex(),
+ itr.getData());
+ _featureSizes[wordIdx] -= RefType::align((bits + 7) / 8) * 8;
+ tree.remove(itr);
+ }
+ } else {
+ if (!itr.valid() || docId < itr.getKey()) {
+ tree.insert(itr, docId, i->getFeatureRef().ref());
+ }
+ }
+ }
+ _unflushed.clear();
+ sync();
+}
+
+void
+FakeMemTreeOccMgr::compactTrees(void)
+{
+ // compact full trees by calling incremental compaction methods in a loop
+
+ std::vector<uint32_t> toHold = _allocator.startCompact();
+ for (uint32_t wordIdx = 0; wordIdx < _postingIdxs.size(); ++wordIdx) {
+ PostingIdx &pidx(*_postingIdxs[wordIdx].get());
+ Tree &tree = pidx._tree;
+ Tree::Iterator &itr = pidx._iterator;
+ itr.begin();
+ tree.setRoot(itr.moveFirstLeafNode(tree.getRoot()), _allocator);
+ while (itr.valid()) {
+ // LOG(info, "Leaf moved to %d", UNWRAP(itr.getKey()));
+ itr.moveNextLeafNode();
+ }
+ }
+ _allocator.finishCompact(toHold);
+ sync();
+}
+
+void
+FakeMemTreeOccMgr::finalize(void)
+{
+ flush();
+}
+
+
+FakeMemTreeOccFactory::FakeMemTreeOccFactory(const Schema &schema)
+ : _mgr(schema)
+{
+}
+
+
+FakeMemTreeOccFactory::~FakeMemTreeOccFactory(void)
+{
+}
+
+
+FakePosting::SP
+FakeMemTreeOccFactory::make(const FakeWord &fw)
+{
+ std::map<const FakeWord *, uint32_t>::const_iterator
+ i(_mgr._fw2WordIdx.find(&fw));
+
+ if (i == _mgr._fw2WordIdx.end())
+ abort();
+
+ uint32_t wordIdx = i->second;
+
+ assert(_mgr._postingIdxs.size() > wordIdx);
+
+ return FakePosting::SP(new FakeMemTreeOcc(fw, _mgr._allocator,
+ _mgr._postingIdxs[wordIdx]->_tree,
+ _mgr._featureSizes[wordIdx],
+ _mgr));
+}
+
+
+void
+FakeMemTreeOccFactory::setup(const std::vector<const FakeWord *> &fws)
+{
+ typedef FakeMemTreeOccMgr::PostingIdx PostingIdx;
+ std::vector<FakeWord::RandomizedReader> r;
+ uint32_t wordIdx = 0;
+ std::vector<const FakeWord *>::const_iterator fwi(fws.begin());
+ std::vector<const FakeWord *>::const_iterator fwe(fws.end());
+ while (fwi != fwe) {
+ _mgr._fakeWords.push_back(*fwi);
+ _mgr._featureSizes.push_back(0);
+ _mgr._fw2WordIdx[*fwi] = wordIdx;
+ _mgr._postingIdxs.push_back(
+ std::shared_ptr<PostingIdx>
+ (new PostingIdx(_mgr._allocator)));
+ r.push_back(FakeWord::RandomizedReader());
+ r.back().setup(*fwi, wordIdx);
+ ++fwi;
+ ++wordIdx;
+ }
+
+ PostingPriorityQueue<FakeWord::RandomizedReader> heap;
+ std::vector<FakeWord::RandomizedReader>::iterator i(r.begin());
+ std::vector<FakeWord::RandomizedReader>::iterator ie(r.end());
+ while (i != ie) {
+ i->read();
+ if (i->isValid())
+ heap.initialAdd(&*i);
+#if 0
+ heap.merge(_mgr, 4);
+#endif
+ ++i;
+ }
+ heap.merge(_mgr, 4);
+ assert(heap.empty());
+ _mgr.finalize();
+}
+
+
+FakeMemTreeOcc2Factory::FakeMemTreeOcc2Factory(const Schema &schema)
+ : FakeMemTreeOccFactory(schema)
+{
+}
+
+
+FakeMemTreeOcc2Factory::~FakeMemTreeOcc2Factory(void)
+{
+}
+
+
+FakePosting::SP
+FakeMemTreeOcc2Factory::make(const FakeWord &fw)
+{
+ std::map<const FakeWord *, uint32_t>::const_iterator
+ i(_mgr._fw2WordIdx.find(&fw));
+
+ if (i == _mgr._fw2WordIdx.end())
+ abort();
+
+ uint32_t wordIdx = i->second;
+
+ assert(_mgr._postingIdxs.size() > wordIdx);
+
+ return FakePosting::SP(new FakeMemTreeOcc(fw, _mgr._allocator,
+ _mgr._postingIdxs[wordIdx]->_tree,
+ _mgr._featureSizes[wordIdx],
+ _mgr,
+ ".memtreeocc2"));
+}
+
+
+void
+FakeMemTreeOcc2Factory::setup(const std::vector<const FakeWord *> &fws)
+{
+ FakeMemTreeOccFactory::setup(fws);
+ LOG(info, "start compacting trees");
+ _mgr.compactTrees();
+ LOG(info, "done compacting trees");
+}
+
+
+} // namespace fakedata
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h
new file mode 100644
index 00000000000..111f3b6ba54
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h
@@ -0,0 +1,287 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "fakeword.h"
+#include "fakeposting.h"
+#include "fpfactory.h"
+#include <vespa/searchlib/memoryindex/dictionary.h>
+#include <vespa/searchlib/memoryindex/featurestore.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+
+namespace search
+{
+
+namespace fakedata
+{
+
+class FakeMemTreeOccMgr : public FakeWord::RandomizedWriter
+{
+public:
+ typedef memoryindex::Dictionary::PostingList Tree;
+ typedef Tree::NodeAllocatorType NodeAllocator;
+ typedef memoryindex::FeatureStore FeatureStore;
+ typedef btree::EntryRef EntryRef;
+ typedef index::Schema Schema;
+ typedef bitcompression::PosOccFieldsParams PosOccFieldsParams;
+
+ vespalib::GenerationHandler _generationHandler;
+ NodeAllocator _allocator;
+
+ std::map<const FakeWord *, uint32_t> _fw2WordIdx;
+ class PostingIdx
+ {
+ public:
+ Tree _tree;
+ Tree::Iterator _iterator;
+
+ PostingIdx(NodeAllocator &allocator)
+ : _tree(),
+ _iterator(_tree.getRoot(), allocator)
+ {
+ }
+
+ void
+ clear(void)
+ {
+ _tree.clear(_iterator.getAllocator());
+ _iterator = _tree.begin(_iterator.getAllocator());
+ }
+ };
+
+ class PendingOp
+ {
+ uint32_t _wordIdx;
+ uint32_t _docId;
+ EntryRef _features;
+ bool _removal;
+ uint32_t _seq;
+
+ public:
+ PendingOp(uint32_t wordIdx, uint32_t docId)
+ : _wordIdx(wordIdx),
+ _docId(docId),
+ _features(),
+ _removal(true),
+ _seq(0)
+ {
+ }
+
+ PendingOp(uint32_t wordIdx, uint32_t docId, EntryRef features)
+ : _wordIdx(wordIdx),
+ _docId(docId),
+ _features(features),
+ _removal(false),
+ _seq(0)
+ {
+ }
+
+ void
+ setSeq(uint32_t seq)
+ {
+ _seq = seq;
+ }
+
+ uint32_t
+ getWordIdx(void) const
+ {
+ return _wordIdx;
+ }
+
+ uint32_t
+ getDocId(void) const
+ {
+ return _docId;
+ }
+
+ EntryRef
+ getFeatureRef(void) const
+ {
+ return _features;
+ }
+
+ bool
+ getRemove(void) const
+ {
+ return _removal;
+ }
+
+ bool
+ operator<(const PendingOp &rhs) const
+ {
+ if (_wordIdx != rhs._wordIdx)
+ return _wordIdx < rhs._wordIdx;
+ if (_docId != rhs._docId)
+ return _docId < rhs._docId;
+ return _seq < rhs._seq;
+ }
+ };
+
+ std::vector<std::shared_ptr<PostingIdx> > _postingIdxs;
+ std::vector<const FakeWord *> _fakeWords;
+ std::vector<uint64_t> _featureSizes;
+ std::vector<PendingOp> _unflushed;
+
+ FeatureStore _featureStore;
+
+ FakeMemTreeOccMgr(const Schema &schema);
+
+ virtual
+ ~FakeMemTreeOccMgr(void);
+
+ void
+ freeze(void);
+
+ void
+ transferHoldLists(void);
+
+ void
+ incGeneration(void);
+
+ void
+ trimHoldLists(void);
+
+ void
+ sync(void);
+
+ virtual void
+ add(uint32_t wordIdx, index::DocIdAndFeatures &features);
+
+ virtual void
+ remove(uint32_t wordIdx, uint32_t docId);
+
+ void
+ sortUnflushed(void);
+
+ void
+ flush(void);
+
+ void
+ compactTrees(void);
+
+ void
+ finalize(void);
+};
+
+
+class FakeMemTreeOccFactory : public FPFactory
+{
+public:
+ typedef FakeMemTreeOccMgr::Tree Tree;
+ typedef FakeMemTreeOccMgr::NodeAllocator NodeAllocator;
+ typedef index::Schema Schema;
+
+ FakeMemTreeOccMgr _mgr;
+
+ FakeMemTreeOccFactory(const Schema &schema);
+
+ virtual
+ ~FakeMemTreeOccFactory(void);
+
+ virtual FakePosting::SP
+ make(const FakeWord &fw);
+
+ virtual void
+ setup(const std::vector<const FakeWord *> &fws);
+};
+
+class FakeMemTreeOcc2Factory : public FakeMemTreeOccFactory
+{
+public:
+ FakeMemTreeOcc2Factory(const Schema &schema);
+
+ virtual
+ ~FakeMemTreeOcc2Factory(void);
+
+ virtual FakePosting::SP
+ make(const FakeWord &fw);
+
+ virtual void
+ setup(const std::vector<const FakeWord *> &fws);
+};
+
+
+/*
+ * Updateable memory tree format.
+ */
+class FakeMemTreeOcc : public FakePosting
+{
+public:
+ typedef FakeMemTreeOccMgr::Tree Tree;
+ typedef FakeMemTreeOccMgr::NodeAllocator NodeAllocator;
+ typedef FakeMemTreeOccMgr::PosOccFieldsParams PosOccFieldsParams;
+
+
+private:
+ NodeAllocator &_allocator;
+ Tree &_tree;
+ const PosOccFieldsParams &_fieldsParams;
+ uint32_t _packedIndex;
+ uint64_t _featureBitSize;
+ const FakeMemTreeOccMgr &_mgr;
+ unsigned int _docIdLimit;
+ unsigned int _hitDocs;
+public:
+ FakeMemTreeOcc(const FakeWord &fakeword,
+ NodeAllocator &allocator,
+ Tree &tree,
+ uint64_t featureBitSize,
+ const FakeMemTreeOccMgr &mgr);
+
+ FakeMemTreeOcc(const FakeWord &fakeword,
+ NodeAllocator &allocator,
+ Tree &tree,
+ uint64_t featureBitSize,
+ const FakeMemTreeOccMgr &mgr,
+ const char *suffix);
+
+ ~FakeMemTreeOcc(void);
+
+ static void
+ forceLink(void);
+
+ /*
+ * Size of posting list, in bits.
+ */
+ size_t
+ bitSize(void) const;
+
+ virtual bool
+ hasWordPositions(void) const;
+
+ /*
+ * Single posting list performance, without feature unpack.
+ */
+ virtual int
+ lowLevelSinglePostingScan(void) const;
+
+ /*
+ * Single posting list performance, with feature unpack.
+ */
+ virtual int
+ lowLevelSinglePostingScanUnpack(void) const;
+
+ /*
+ * Two posting lists performance (same format) without feature unpack.
+ */
+ virtual int
+ lowLevelAndPairPostingScan(const FakePosting &rhs) const;
+
+ /*
+ * Two posting lists performance (same format) with feature unpack.
+ */
+ virtual int
+ lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const;
+
+
+ /*
+ * Iterator factory, for current query evaluation framework.
+ */
+ virtual search::queryeval::SearchIterator *
+ createIterator(const fef::TermFieldMatchDataArray &matchData) const;
+};
+
+} // namespace fakedata
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp
new file mode 100644
index 00000000000..8d0915d4966
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fakeposting");
+#include "fakeposting.h"
+
+namespace search
+{
+
+namespace fakedata
+{
+
+FakePosting::FakePosting(const std::string &name)
+ : _name(name)
+{
+}
+
+
+FakePosting::~FakePosting(void)
+{
+}
+
+
+size_t
+FakePosting::skipBitSize(void) const
+{
+ return l1SkipBitSize() + l2SkipBitSize() + l3SkipBitSize() +
+ l4SkipBitSize();
+}
+
+size_t
+FakePosting::l1SkipBitSize(void) const
+{
+ return 0;
+}
+
+
+size_t
+FakePosting::l2SkipBitSize(void) const
+{
+ return 0;
+}
+
+
+size_t
+FakePosting::l3SkipBitSize(void) const
+{
+ return 0;
+}
+
+
+size_t
+FakePosting::l4SkipBitSize(void) const
+{
+ return 0;
+}
+
+} // namespace fakedata
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h
new file mode 100644
index 00000000000..946d1e05379
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/queryeval/searchiterator.h>
+
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataPosition;
+
+#include <map>
+#include <vector>
+#include <string>
+
+namespace search
+{
+
+namespace fakedata
+{
+
+/*
+ * Base class for faked posting list formats.
+ */
+class FakePosting
+{
+private:
+ FakePosting(const FakePosting &);
+
+ FakePosting &
+ operator=(const FakePosting &);
+
+ std::string _name;
+public:
+ typedef std::shared_ptr<FakePosting> SP;
+
+ FakePosting(const std::string &name);
+
+ virtual ~FakePosting(void);
+
+ /*
+ * Size of posting list, in bits.
+ */
+ virtual size_t
+ bitSize(void) const = 0;
+
+ virtual size_t
+ skipBitSize(void) const;
+
+ virtual size_t
+ l1SkipBitSize(void) const;
+
+ virtual size_t
+ l2SkipBitSize(void) const;
+
+ virtual size_t
+ l3SkipBitSize(void) const;
+
+ virtual size_t
+ l4SkipBitSize(void) const;
+
+ virtual bool
+ hasWordPositions(void) const = 0;
+
+ /*
+ * Single posting list performance, without feature unpack.
+ */
+ virtual int
+ lowLevelSinglePostingScan(void) const = 0;
+
+ /*
+ * Single posting list performance, with feature unpack.
+ */
+ virtual int
+ lowLevelSinglePostingScanUnpack(void) const = 0;
+
+ /*
+ * Two posting lists performance (same format) without feature unpack.
+ */
+ virtual int
+ lowLevelAndPairPostingScan(const FakePosting &rhs) const = 0;
+
+ /*
+ * Two posting lists performance (same format) with feature unpack.
+ */
+ virtual int
+ lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const = 0;
+
+
+ /*
+ * Iterator factory, for current query evaluation framework.
+ */
+ virtual search::queryeval::SearchIterator *
+ createIterator(const fef::TermFieldMatchDataArray &matchData) const = 0;
+
+ const std::string &getName(void) const
+ {
+ return _name;
+ }
+};
+
+} // namespace fakedata
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp
new file mode 100644
index 00000000000..5ad3140b5b2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp
@@ -0,0 +1,796 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fakeword");
+#include "fakeword.h"
+
+#include <vespa/searchlib/index/postinglistfile.h>
+#include <vespa/searchlib/index/postinglistcountfile.h>
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataPosition;
+using search::index::WordDocElementFeatures;
+using search::index::WordDocElementWordPosFeatures;
+using search::index::PostingListFileSeqWrite;
+using search::index::DocIdAndFeatures;
+using search::index::DocIdAndPosOccFeatures;
+using search::index::PostingListCounts;
+using search::index::PostingListFileSeqRead;
+using search::diskindex::FieldReader;
+using search::diskindex::FieldWriter;
+
+namespace search
+{
+
+namespace fakedata
+{
+
+
+static void
+fillbitset(search::BitVector *bitvector,
+ unsigned int size,
+ search::Rand48 &rnd)
+{
+ unsigned int range;
+ unsigned int idx;
+ unsigned int j;
+
+ range = bitvector->size();
+ assert(range > 0);
+ --range;
+ bitvector->invalidateCachedCount();
+
+ assert(size <= range);
+ if (size > range / 2) {
+ if (range > 0)
+ bitvector->setInterval(1, range);
+
+ for (j = range; j > size; --j) {
+ do {
+ idx = (rnd.lrand48() % range) + 1u;
+ } while (!bitvector->testBit(idx));
+ bitvector->clearBit(idx);
+ }
+ } else {
+ // bitvector->reset();
+ bitvector->invalidateCachedCount();
+ for (j = bitvector->countTrueBits(); j < size; j++) {
+ do {
+ idx = (rnd.lrand48() % range) + 1u;
+ } while (bitvector->testBit(idx));
+ bitvector->setBit(idx);
+ }
+ }
+}
+
+
+static void
+fillcorrelatedbitset(search::BitVector &bitvector,
+ unsigned int size,
+ const FakeWord &otherword,
+ search::Rand48 &rnd)
+{
+ const FakeWord::DocWordFeatureList &opostings = otherword._postings;
+
+ unsigned int range = opostings.size();
+ search::BitVector::UP corrmap(search::BitVector::create(range + 1));
+
+ if (size > range)
+ size = range;
+ fillbitset(corrmap.get(), size, rnd);
+
+ unsigned int idx = corrmap->getNextTrueBit(1u);
+ while (idx < range) {
+ unsigned int docId = opostings[idx - 1]._docId;
+ bitvector.setBit(docId);
+ ++idx;
+ if (idx > range)
+ break;
+ idx = corrmap->getNextTrueBit(idx);
+ }
+}
+
+
+FakeWord::DocWordPosFeature::DocWordPosFeature(void)
+ : _elementId(0),
+ _wordPos(0),
+ _elementWeight(1),
+ _elementLen(0)
+{
+}
+
+
+FakeWord::DocWordPosFeature::~DocWordPosFeature(void)
+{
+}
+
+
+FakeWord::DocWordCollapsedFeature::DocWordCollapsedFeature(void)
+{
+}
+
+
+FakeWord::DocWordCollapsedFeature::~DocWordCollapsedFeature(void)
+{
+}
+
+
+FakeWord::DocWordFeature::DocWordFeature(void)
+ : _docId(0),
+ _collapsedDocWordFeatures(),
+ _positions(0),
+ _accPositions(0)
+{
+}
+
+FakeWord::DocWordFeature::~DocWordFeature(void)
+{
+}
+
+FakeWord::FakeWord(uint32_t docIdLimit,
+ const std::vector<uint32_t> & docIds,
+ const std::string &name,
+ const PosOccFieldsParams &fieldsParams,
+ uint32_t packedIndex)
+ : _postings(),
+ _wordPosFeatures(),
+ _extraPostings(),
+ _extraWordPosFeatures(),
+ _docIdLimit(docIdLimit),
+ _name(name),
+ _fieldsParams(fieldsParams),
+ _packedIndex(packedIndex)
+{
+ search::BitVector::UP bitmap(search::BitVector::create(docIdLimit));
+ for (uint32_t docId : docIds) {
+ bitmap->setBit(docId);
+ }
+ search::Rand48 rnd;
+ fakeup(*bitmap, rnd, _postings, _wordPosFeatures);
+}
+
+FakeWord::FakeWord(uint32_t docIdLimit,
+ uint32_t wordDocs,
+ uint32_t tempWordDocs,
+ const std::string &name,
+ search::Rand48 &rnd,
+ const PosOccFieldsParams &fieldsParams,
+ uint32_t packedIndex)
+ : _postings(),
+ _wordPosFeatures(),
+ _extraPostings(),
+ _extraWordPosFeatures(),
+ _docIdLimit(docIdLimit),
+ _name(name),
+ _fieldsParams(fieldsParams),
+ _packedIndex(packedIndex)
+{
+ search::BitVector::UP bitmap(search::BitVector::create(docIdLimit));
+
+ fillbitset(bitmap.get(), wordDocs, rnd);
+
+ fakeup(*bitmap, rnd, _postings, _wordPosFeatures);
+ fakeupTemps(rnd, docIdLimit, tempWordDocs);
+ setupRandomizer(rnd);
+}
+
+
+FakeWord::FakeWord(uint32_t docIdLimit,
+ uint32_t wordDocs,
+ uint32_t tempWordDocs,
+ const std::string &name,
+ const FakeWord &otherWord,
+ size_t overlapDocs,
+ search::Rand48 &rnd,
+ const PosOccFieldsParams &fieldsParams,
+ uint32_t packedIndex)
+ : _postings(),
+ _wordPosFeatures(),
+ _docIdLimit(docIdLimit),
+ _name(name),
+ _fieldsParams(fieldsParams),
+ _packedIndex(packedIndex)
+{
+ search::BitVector::UP bitmap(search::BitVector::create(docIdLimit));
+
+ if (wordDocs * 2 < docIdLimit &&
+ overlapDocs > 0)
+ fillcorrelatedbitset(*bitmap, overlapDocs, otherWord, rnd);
+ fillbitset(bitmap.get(), wordDocs, rnd);
+
+ fakeup(*bitmap, rnd, _postings, _wordPosFeatures);
+ fakeupTemps(rnd, docIdLimit, tempWordDocs);
+ setupRandomizer(rnd);
+}
+
+
+FakeWord::~FakeWord(void)
+{
+}
+
+
+void
+FakeWord::fakeup(search::BitVector &bitmap,
+ search::Rand48 &rnd,
+ DocWordFeatureList &postings,
+ DocWordPosFeatureList &wordPosFeatures)
+{
+ DocWordPosFeatureList wpf;
+ unsigned int idx;
+ uint32_t numFields = _fieldsParams.getNumFields();
+ assert(numFields == 1u);
+ (void) numFields;
+ uint32_t docIdLimit = bitmap.size();
+ idx = bitmap.getNextTrueBit(1u);
+ while (idx < docIdLimit) {
+ DocWordFeature dwf;
+ unsigned int positions;
+
+ dwf._docId = idx;
+ positions = ((rnd.lrand48() % 10) == 0) ? 2 : 1;
+ dwf._positions = positions;
+ wpf.clear();
+ for (unsigned int j = 0; j < positions; ++j) {
+ DocWordPosFeature dwpf;
+ dwpf._wordPos = rnd.lrand48() % 8192;
+ dwpf._elementId = 0;
+ if (_fieldsParams.getFieldParams()[0]._hasElements)
+ dwpf._elementId = rnd.lrand48() % 4;
+ wpf.push_back(dwpf);
+ }
+ if (positions > 1) {
+ /* Sort wordpos list and "avoid" duplicate positions */
+ std::sort(wpf.begin(), wpf.end());
+ }
+ do {
+ DocWordPosFeatureList::iterator ie(wpf.end());
+ DocWordPosFeatureList::iterator i(wpf.begin());
+ while (i != ie) {
+ uint32_t lastwordpos = i->_wordPos;
+ DocWordPosFeatureList::iterator pi(i);
+ ++i;
+ while (i != ie &&
+ pi->_elementId == i->_elementId) {
+ if (i->_wordPos <= lastwordpos)
+ i->_wordPos = lastwordpos + 1;
+ lastwordpos = i->_wordPos;
+ ++i;
+ }
+ uint32_t elementLen = (rnd.lrand48() % 8192) + 1 + lastwordpos;
+ int32_t elementWeight = 1;
+ if (_fieldsParams.getFieldParams()[0].
+ _hasElementWeights) {
+ uint32_t uWeight = rnd.lrand48() % 2001;
+ if ((uWeight & 1) != 0)
+ elementWeight = - (uWeight >> 1) - 1;
+ else
+ elementWeight = (uWeight >> 1);
+ assert(elementWeight <= 1000);
+ assert(elementWeight >= -1000);
+ }
+ while (pi != i) {
+ pi->_elementLen = elementLen;
+ pi->_elementWeight = elementWeight;
+ ++pi;
+ }
+ }
+ } while (0);
+ dwf._accPositions = wordPosFeatures.size();
+ assert(dwf._positions == wpf.size());
+ postings.push_back(dwf);
+ DocWordPosFeatureList::iterator ie(wpf.end());
+ DocWordPosFeatureList::iterator i(wpf.begin());
+ while (i != ie) {
+ wordPosFeatures.push_back(*i);
+ ++i;
+ }
+ ++idx;
+ if (idx >= docIdLimit)
+ break;
+ idx = bitmap.getNextTrueBit(idx);
+ }
+}
+
+
+void
+FakeWord::fakeupTemps(search::Rand48 &rnd,
+ uint32_t docIdLimit,
+ uint32_t tempWordDocs)
+{
+ uint32_t maxTempWordDocs = docIdLimit / 2;
+ tempWordDocs = std::min(tempWordDocs, maxTempWordDocs);
+ if (tempWordDocs > 0) {
+ search::BitVector::UP bitmap(search::BitVector::create(docIdLimit));
+ fillbitset(bitmap.get(), tempWordDocs, rnd);
+ fakeup(*bitmap, rnd, _extraPostings, _extraWordPosFeatures);
+ }
+}
+
+void
+FakeWord::setupRandomizer(search::Rand48 &rnd)
+{
+ typedef DocWordFeatureList DWFL;
+ Randomizer randomAdd;
+ Randomizer randomRem;
+
+ DWFL::const_iterator d(_postings.begin());
+ DWFL::const_iterator de(_postings.end());
+ int32_t ref = 0;
+
+ while (d != de) {
+ do {
+ randomAdd._random = rnd.lrand48();
+ } while (randomAdd._random < 10000);
+ randomAdd._ref = ref;
+ assert(!randomAdd.isExtra());
+ assert(!randomAdd.isRemove());
+ _randomizer.push_back(randomAdd);
+ ++d;
+ ++ref;
+ }
+
+ DWFL::const_iterator ed(_extraPostings.begin());
+ DWFL::const_iterator ede(_extraPostings.end());
+
+ int32_t eref = -1;
+ uint32_t tref = 0;
+ ref = 0;
+ int32_t refmax = _randomizer.size();
+ while (ed != ede) {
+ while (ref < refmax && _postings[ref]._docId < ed->_docId)
+ ++ref;
+ if (ref < refmax && _postings[ref]._docId == ed->_docId) {
+ randomAdd._random = rnd.lrand48() % (_randomizer[ref]._random - 1);
+ randomRem._random = _randomizer[ref]._random - 1;
+ } else {
+ do {
+ randomAdd._random = rnd.lrand48();
+ randomRem._random = rnd.lrand48();
+ } while (randomAdd._random >= randomRem._random);
+ }
+ randomAdd._ref = eref;
+ randomRem._ref = eref - 1;
+ assert(randomAdd.isExtra());
+ assert(!randomAdd.isRemove());
+ assert(randomAdd.extraIdx() == tref);
+ assert(randomRem.isExtra());
+ assert(randomRem.isRemove());
+ assert(randomRem.extraIdx() == tref);
+ _randomizer.push_back(randomAdd);
+ _randomizer.push_back(randomRem);
+ ++ed;
+ eref -= 2;
+ ++tref;
+ }
+ std::sort(_randomizer.begin(), _randomizer.end());
+}
+
+
+void
+FakeWord::addDocIdBias(uint32_t docIdBias)
+{
+ typedef DocWordFeatureList DWFL;
+ DWFL::iterator d(_postings.begin());
+ DWFL::iterator de(_postings.end());
+ for (; d != de; ++d) {
+ d->_docId += docIdBias;
+ }
+ d = _extraPostings.begin();
+ de = _extraPostings.end();
+ for (; d != de; ++d) {
+ d->_docId += docIdBias;
+ }
+ _docIdLimit += docIdBias;
+}
+
+
+bool
+FakeWord::validate(search::queryeval::SearchIterator *iterator,
+ const fef::TermFieldMatchDataArray &matchData,
+ uint32_t stride,
+ bool verbose) const
+{
+ iterator->initFullRange();
+ uint32_t docId = 0;
+
+ typedef DocWordFeatureList DWFL;
+ typedef DocWordPosFeatureList DWPFL;
+ typedef TermFieldMatchData::PositionsIterator TMDPI;
+
+ DWFL::const_iterator d(_postings.begin());
+ DWFL::const_iterator de(_postings.end());
+ DWPFL::const_iterator p(_wordPosFeatures.begin());
+ DWPFL::const_iterator pe(_wordPosFeatures.end());
+
+ if (verbose)
+ printf("Start validate word '%s'\n", _name.c_str());
+ int strideResidue = stride;
+ while (d != de) {
+ if (strideResidue > 1) {
+ --strideResidue;
+ unsigned int positions = d->_positions;
+ while (positions > 0) {
+ ++p;
+ --positions;
+ }
+ } else {
+ strideResidue = stride;
+ docId = d->_docId;
+ bool seekRes = iterator->seek(docId);
+ assert(seekRes);
+ (void) seekRes;
+ assert(d != de);
+ unsigned int positions = d->_positions;
+ iterator->unpack(docId);
+ for (size_t lfi = 0; lfi < matchData.size(); ++lfi) {
+ if (matchData[lfi]->getDocId() != docId)
+ continue;
+ TMDPI mdpe = matchData[lfi]->end();
+ TMDPI mdp = matchData[lfi]->begin();
+ while (mdp != mdpe) {
+ assert(p != pe);
+ assert(positions > 0);
+ assert(p->_wordPos == mdp->getPosition());
+ assert(p->_elementId == mdp->getElementId());
+ assert(p->_elementWeight == mdp->getElementWeight());
+ assert(p->_elementLen == mdp->getElementLen());
+ ++p;
+ ++mdp;
+ --positions;
+ }
+ }
+ assert(positions == 0);
+ }
+ ++d;
+ }
+ assert(p == pe);
+ assert(d == de);
+ if (verbose)
+ printf("word '%s' validated successfully with unpack\n",
+ _name.c_str());
+ return true;
+}
+
+
+bool
+FakeWord::validate(search::queryeval::SearchIterator *iterator,
+ const fef::TermFieldMatchDataArray &matchData,
+ bool verbose) const
+{
+ iterator->initFullRange();
+ uint32_t docId = 1;
+
+ typedef DocWordFeatureList DWFL;
+ typedef DocWordPosFeatureList DWPFL;
+ typedef TermFieldMatchData::PositionsIterator TMDPI;
+
+ DWFL::const_iterator d(_postings.begin());
+ DWFL::const_iterator de(_postings.end());
+ DWPFL::const_iterator p(_wordPosFeatures.begin());
+ DWPFL::const_iterator pe(_wordPosFeatures.end());
+
+ if (verbose)
+ printf("Start validate word '%s'\n", _name.c_str());
+ for (;;) {
+ if (iterator->seek(docId)) {
+ assert(d != de);
+ assert(d->_docId == docId);
+ iterator->unpack(docId);
+ unsigned int positions = d->_positions;
+ for (size_t lfi = 0; lfi < matchData.size(); ++lfi) {
+ if (matchData[lfi]->getDocId() != docId)
+ continue;
+ TMDPI mdpe = matchData[lfi]->end();
+ TMDPI mdp = matchData[lfi]->begin();
+ while (mdp != mdpe) {
+ assert(p != pe);
+ assert(positions > 0);
+ assert(p->_wordPos == mdp->getPosition());
+ assert(p->_elementId == mdp->getElementId());
+ assert(p->_elementWeight == mdp->getElementWeight());
+ assert(p->_elementLen == mdp->getElementLen());
+ ++p;
+ ++mdp;
+ --positions;
+ }
+ }
+ assert(positions == 0);
+ ++d;
+ ++docId;
+ } else {
+ if (iterator->getDocId() > docId)
+ docId = iterator->getDocId();
+ else
+ ++docId;
+ }
+ if (docId >= _docIdLimit)
+ break;
+ }
+ assert(p == pe);
+ assert(d == de);
+ if (verbose)
+ printf("word '%s' validated successfully with unpack\n",
+ _name.c_str());
+ return true;
+}
+
+
+bool
+FakeWord::validate(search::queryeval::SearchIterator *iterator, bool verbose) const
+{
+ iterator->initFullRange();
+ uint32_t docId = 1;
+
+ typedef DocWordFeatureList DWFL;
+
+ DWFL::const_iterator d(_postings.begin());
+ DWFL::const_iterator de(_postings.end());
+
+ if (verbose)
+ printf("Start validate word '%s'\n", _name.c_str());
+ for (;;) {
+ if (iterator->seek(docId)) {
+ assert(d != de);
+ assert(d->_docId == docId);
+ ++d;
+ ++docId;
+ } else {
+ if (iterator->getDocId() > docId)
+ docId = iterator->getDocId();
+ else
+ ++docId;
+ }
+ if (docId >= _docIdLimit)
+ break;
+ }
+ assert(d == de);
+ if (verbose)
+ printf("word '%s' validated successfully without unpack\n",
+ _name.c_str());
+ return true;
+}
+
+
+bool
+FakeWord::validate(std::shared_ptr<FieldReader> &fieldReader,
+ uint32_t wordNum,
+ const fef::TermFieldMatchDataArray &matchData,
+ bool verbose,
+ uint32_t &checkPointCheck,
+ uint32_t checkPointInterval,
+ CheckPointCallback *const checkPointCallback) const
+{
+ uint32_t docId = 0;
+ uint32_t numDocs;
+ uint32_t residue;
+ uint32_t presidue;
+ bool unpres;
+
+ typedef DocWordFeatureList DWFL;
+ typedef DocWordPosFeatureList DWPFL;
+ typedef TermFieldMatchData::PositionsIterator TMDPI;
+
+ DWFL::const_iterator d(_postings.begin());
+ DWFL::const_iterator de(_postings.end());
+ DWPFL::const_iterator p(_wordPosFeatures.begin());
+ DWPFL::const_iterator pe(_wordPosFeatures.end());
+
+ if (verbose)
+ printf("Start validate word '%s'\n", _name.c_str());
+#ifdef notyet
+ // Validate word number
+#else
+ (void) wordNum;
+#endif
+ numDocs = _postings.size();
+ for (residue = numDocs; residue > 0; --residue) {
+ assert(fieldReader->_wordNum == wordNum);
+ DocIdAndFeatures &features(fieldReader->_docIdAndFeatures);
+ docId = features._docId;
+ assert(d != de);
+ assert(d->_docId == docId);
+ if (matchData.valid()) {
+#ifdef notyet
+ unpres = features.unpack(matchData);
+ assert(unpres);
+#else
+ (void) unpres;
+
+ typedef WordDocElementFeatures Elements;
+ typedef WordDocElementWordPosFeatures Positions;
+
+ std::vector<Elements>::const_iterator element =
+ features._elements.begin();
+ std::vector<Positions>::const_iterator position =
+ features._wordPositions.begin();
+
+ TermFieldMatchData *tfmd = matchData[0];
+ LOG_ASSERT(tfmd != 0);
+ tfmd->reset(features._docId);
+
+ uint32_t elementResidue = features._elements.size();
+ while (elementResidue != 0) {
+ uint32_t positionResidue = element->getNumOccs();
+ while (positionResidue != 0) {
+ uint32_t wordPos = position->getWordPos();
+ TermFieldMatchDataPosition pos(element->getElementId(),
+ wordPos,
+ element->getWeight(),
+ element->getElementLen());
+ tfmd->appendPosition(pos);
+ ++position;
+ --positionResidue;
+ }
+ ++element;
+ --elementResidue;
+ }
+#endif
+ unsigned int positions = d->_positions;
+ presidue = positions;
+ for (size_t lfi = 0; lfi < matchData.size(); ++lfi) {
+ if (matchData[lfi]->getDocId() != docId)
+ continue;
+ TMDPI mdpe = matchData[lfi]->end();
+ TMDPI mdp = matchData[lfi]->begin();
+ while (mdp != mdpe) {
+ assert(p != pe);
+ assert(presidue > 0);
+ assert(p->_wordPos == mdp->getPosition());
+ assert(p->_elementId == mdp->getElementId());
+ assert(p->_elementWeight == mdp->getElementWeight());
+ assert(p->_elementLen == mdp->getElementLen());
+ ++p;
+ ++mdp;
+ --presidue;
+ }
+ }
+ assert(presidue == 0);
+ ++d;
+ }
+ if (++checkPointCheck >= checkPointInterval) {
+ checkPointCheck = 0;
+ if (checkPointCallback != NULL)
+ checkPointCallback->checkPoint();
+ }
+ fieldReader->read();
+ }
+ if (matchData.valid()) {
+ assert(p == pe);
+ assert(d == de);
+ }
+ if (verbose)
+ printf("word '%s' validated successfully %s unpack\n",
+ _name.c_str(),
+ matchData.valid() ? "with" : "without");
+ return true;
+}
+
+
+void
+FakeWord::validate(const std::vector<uint32_t> &docIds) const
+{
+ typedef DocWordFeatureList DWFL;
+ typedef std::vector<uint32_t> DL;
+ DWFL::const_iterator d(_postings.begin());
+ DWFL::const_iterator de(_postings.end());
+ DL::const_iterator di(docIds.begin());
+ DL::const_iterator die(docIds.end());
+
+ while (d != de) {
+ assert(di != die);
+ assert(d->_docId == *di);
+ ++d;
+ ++di;
+ }
+ assert(di == die);
+}
+
+
+void
+FakeWord::validate(const search::BitVector &bv) const
+{
+ typedef DocWordFeatureList DWFL;
+ DWFL::const_iterator d(_postings.begin());
+ DWFL::const_iterator de(_postings.end());
+ uint32_t bitHits = bv.countTrueBits();
+ assert(bitHits == _postings.size());
+ (void) bitHits;
+ uint32_t bi = bv.getNextTrueBit(1u);
+ while (d != de) {
+ assert(d->_docId == bi);
+ ++d;
+ bi = bv.getNextTrueBit(bi + 1);
+ }
+ assert(bi >= bv.size());
+}
+
+
+bool
+FakeWord::dump(std::shared_ptr<FieldWriter> &fieldWriter,
+ bool verbose,
+ uint32_t &checkPointCheck,
+ uint32_t checkPointInterval,
+ CheckPointCallback *checkPointCallback) const
+{
+ uint32_t numDocs;
+ uint32_t residue;
+ DocIdAndPosOccFeatures features;
+
+ typedef DocWordFeatureList DWFL;
+ typedef DocWordPosFeatureList DWPFL;
+
+ DWFL::const_iterator d(_postings.begin());
+ DWFL::const_iterator de(_postings.end());
+ DWPFL::const_iterator p(_wordPosFeatures.begin());
+ DWPFL::const_iterator pe(_wordPosFeatures.end());
+
+ if (verbose)
+ printf("Start dumping word '%s'\n", _name.c_str());
+ numDocs = _postings.size();
+ for (residue = numDocs; residue > 0; --residue) {
+ assert(d != de);
+ setupFeatures(*d, &*p, features);
+ p += d->_positions;
+ fieldWriter->add(features);
+ ++d;
+ if (++checkPointCheck >= checkPointInterval) {
+ checkPointCheck = 0;
+ if (checkPointCallback != NULL)
+ checkPointCallback->checkPoint();
+ }
+ }
+ assert(p == pe);
+ assert(d == de);
+ if (verbose)
+ printf("word '%s' dumped successfully\n",
+ _name.c_str());
+ return true;
+}
+
+
+FakeWord::RandomizedReader::RandomizedReader(void)
+ : _r(),
+ _fw(NULL),
+ _wordIdx(0u),
+ _valid(false),
+ _ri(),
+ _re()
+{
+}
+
+
+void
+FakeWord::RandomizedReader::read(void)
+{
+ if (_ri != _re) {
+ _r = *_ri;
+ ++_ri;
+ } else
+ _valid = false;
+}
+
+
+void
+FakeWord::RandomizedReader::setup(const FakeWord *fw,
+ uint32_t wordIdx)
+{
+ _fw = fw;
+ _wordIdx = wordIdx;
+ _ri = fw->_randomizer.begin();
+ _re = fw->_randomizer.end();
+ _valid = _ri != _re;
+}
+
+
+FakeWord::RandomizedWriter::~RandomizedWriter(void)
+{
+}
+
+
+} // namespace fakedata
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h
new file mode 100644
index 00000000000..8814bd9cf7e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h
@@ -0,0 +1,355 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/util/rand48.h>
+#include <vector>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/diskindex/fieldreader.h>
+#include <vespa/searchlib/diskindex/fieldwriter.h>
+
+namespace search
+{
+
+namespace fakedata
+{
+
+
+class CheckPointCallback
+{
+public:
+ CheckPointCallback(void)
+ {
+ }
+
+ virtual
+ ~CheckPointCallback(void)
+ {
+ }
+
+ virtual void
+ checkPoint(void) = 0;
+};
+
+/*
+ * General representation of a faked word, containing all features used
+ * by any of the candidate posting list formats.
+ */
+class FakeWord
+{
+public:
+ typedef bitcompression::PosOccFieldsParams PosOccFieldsParams;
+
+ class DocWordPosFeature
+ {
+ public:
+ uint32_t _elementId;
+ uint32_t _wordPos;
+ int32_t _elementWeight;
+ uint32_t _elementLen;
+
+ inline bool
+ operator<(const DocWordPosFeature &rhs) const
+ {
+ if (_elementId != rhs._elementId)
+ return _elementId < rhs._elementId;
+ return _wordPos < rhs._wordPos;
+ }
+
+ DocWordPosFeature(void);
+ ~DocWordPosFeature(void);
+ };
+
+ typedef std::vector<DocWordPosFeature> DocWordPosFeatureList;
+
+ class DocWordCollapsedFeature
+ {
+ public:
+ DocWordCollapsedFeature(void);
+ ~DocWordCollapsedFeature(void);
+ };
+
+ class DocWordFeature
+ {
+ public:
+ uint32_t _docId;
+ DocWordCollapsedFeature _collapsedDocWordFeatures;
+ uint32_t _positions;
+ uint32_t _accPositions; // accumulated positions for previous words
+
+ DocWordFeature(void);
+ ~DocWordFeature(void);
+ };
+
+ typedef std::vector<DocWordFeature> DocWordFeatureList;
+
+ class Randomizer
+ {
+ public:
+ uint32_t _random;
+ int32_t _ref;
+
+ Randomizer(void)
+ : _random(0),
+ _ref(0)
+ {
+ }
+
+ bool
+ operator<(const Randomizer &rhs) const
+ {
+ if (_random != rhs._random)
+ return _random < rhs._random;
+ return _ref < rhs._ref;
+ }
+
+ bool
+ operator==(const Randomizer &rhs) const
+ {
+ return _random == rhs._random && _ref == rhs._ref;
+ }
+
+ bool
+ isExtra(void) const
+ {
+ return _ref < 0;
+ }
+
+ bool
+ isRemove(void) const
+ {
+ return isExtra() && (_ref & 1) == 0;
+ }
+
+ uint32_t
+ extraIdx(void) const
+ {
+ return (~_ref) >> 1;
+ }
+
+ };
+
+ class RandomizedWriter
+ {
+ public:
+ virtual
+ ~RandomizedWriter(void);
+
+ virtual void
+ add(uint32_t wordIdx, index::DocIdAndFeatures &features) = 0;
+
+ virtual void
+ remove(uint32_t wordIdx, uint32_t docId) = 0;
+ };
+
+ class RandomizedReader
+ {
+ Randomizer _r;
+ const FakeWord *_fw;
+ uint32_t _wordIdx;
+ bool _valid;
+ std::vector<Randomizer>::const_iterator _ri;
+ std::vector<Randomizer>::const_iterator _re;
+ index::DocIdAndPosOccFeatures _features;
+ public:
+ RandomizedReader(void);
+
+ void
+ read(void);
+
+ void
+ write(RandomizedWriter &writer)
+ {
+ const FakeWord::DocWordFeature &d = _fw->getDocWordFeature(_r);
+ if (_r.isRemove()) {
+ writer.remove(_wordIdx, d._docId);
+ } else {
+ const DocWordPosFeature *p = _fw->getDocWordPosFeature(_r, d);
+ FakeWord::setupFeatures(d, p, _features);
+ writer.add(_wordIdx, _features);
+ }
+ }
+
+ bool
+ isValid(void) const
+ {
+ return _valid;
+ }
+
+ bool operator<(const RandomizedReader &rhs) const
+ {
+ if (_r < rhs._r)
+ return true;
+ if (!(_r == rhs._r))
+ return false;
+ return _wordIdx < rhs._wordIdx;
+ }
+
+ void
+ setup(const FakeWord *fw,
+ uint32_t wordIdx);
+ };
+
+ DocWordFeatureList _postings;
+ DocWordPosFeatureList _wordPosFeatures;
+ DocWordFeatureList _extraPostings;
+ DocWordPosFeatureList _extraWordPosFeatures;
+ std::vector<Randomizer> _randomizer;
+ uint32_t _docIdLimit; // Documents in index
+ std::string _name;
+ const PosOccFieldsParams &_fieldsParams;
+ uint32_t _packedIndex;
+
+ void
+ fakeup(search::BitVector &bitmap,
+ search::Rand48 &rnd,
+ DocWordFeatureList &postings,
+ DocWordPosFeatureList &wordPosFeatures);
+
+ void
+ fakeupTemps(search::Rand48 &rnd,
+ uint32_t docIdLimit,
+ uint32_t tempWordDocs);
+
+ void
+ setupRandomizer(search::Rand48 &rnd);
+
+ const DocWordFeature &
+ getDocWordFeature(const Randomizer &r) const
+ {
+ if (r.isExtra()) {
+ assert(r.extraIdx() < _extraPostings.size());
+ return _extraPostings[r.extraIdx()];
+ }
+ assert(static_cast<uint32_t>(r._ref) < _postings.size());
+ return _postings[r._ref];
+ }
+
+ const
+ DocWordPosFeature *
+ getDocWordPosFeature(const Randomizer &r, const DocWordFeature &d) const
+ {
+ if (r.isExtra()) {
+ assert(d._accPositions + d._positions <=
+ _extraWordPosFeatures.size());
+ return &_extraWordPosFeatures[d._accPositions];
+ }
+ assert(d._accPositions + d._positions <=
+ _wordPosFeatures.size());
+ return &_wordPosFeatures[d._accPositions];
+ }
+
+ static void
+ setupFeatures(const DocWordFeature &d,
+ const DocWordPosFeature *p,
+ index::DocIdAndPosOccFeatures &features)
+ {
+ unsigned int positions = d._positions;
+ features.clear(d._docId);
+ for (unsigned int t = 0; t < positions; ++t) {
+ features.addNextOcc(p->_elementId, p->_wordPos,
+ p->_elementWeight, p->_elementLen);
+ ++p;
+ }
+ }
+
+public:
+
+ FakeWord(uint32_t docIdLimit,
+ const std::vector<uint32_t> & docIds,
+ const std::string &name,
+ const PosOccFieldsParams &fieldsParams,
+ uint32_t packedIndex);
+
+ FakeWord(uint32_t docIdLimit,
+ uint32_t wordDocs,
+ uint32_t tempWordDocs,
+ const std::string &name,
+ search::Rand48 &rnd,
+ const PosOccFieldsParams &fieldsParams,
+ uint32_t packedIndex);
+
+ FakeWord(uint32_t docIdLimit,
+ uint32_t wordDocs,
+ uint32_t tempWordDocs,
+ const std::string &name,
+ const FakeWord &otherWord,
+ size_t overlapDocs,
+ search::Rand48 &rnd,
+ const PosOccFieldsParams &fieldsParams,
+ uint32_t packedIndex);
+
+ ~FakeWord(void);
+
+ bool
+ validate(search::queryeval::SearchIterator *iterator,
+ const fef::TermFieldMatchDataArray &matchData,
+ uint32_t stride,
+ bool verbose) const;
+
+ bool
+ validate(search::queryeval::SearchIterator *iterator,
+ const fef::TermFieldMatchDataArray &matchData,
+ bool verbose) const;
+
+ bool
+ validate(search::queryeval::SearchIterator *iterator,
+ bool verbose) const;
+
+ bool
+ validate(std::shared_ptr<search::diskindex::FieldReader> &fieldReader,
+ uint32_t wordNum,
+ const fef::TermFieldMatchDataArray &matchData,
+ bool verbose,
+ uint32_t &checkPointCheck,
+ uint32_t checkPointInterval,
+ CheckPointCallback *const checkPointCallback) const;
+
+ void
+ validate(const std::vector<uint32_t> &docIds) const;
+
+ void
+ validate(const BitVector &bv) const;
+
+ bool
+ dump(std::shared_ptr<search::diskindex::FieldWriter> &fieldWriter,
+ bool verbose,
+ uint32_t &checkPointCheck,
+ uint32_t checkPointInterval,
+ CheckPointCallback *checkPointCallback) const;
+
+ const std::string &getName(void) const
+ {
+ return _name;
+ }
+
+ uint32_t
+ getDocIdLimit(void) const
+ {
+ return _docIdLimit;
+ }
+
+ const PosOccFieldsParams &
+ getFieldsParams(void) const
+ {
+ return _fieldsParams;
+ }
+
+ uint32_t
+ getPackedIndex(void) const
+ {
+ return _packedIndex;
+ }
+
+ void
+ addDocIdBias(uint32_t docIdBias);
+};
+
+} // namespace fakedata
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp
new file mode 100644
index 00000000000..4ecf04bb59c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp
@@ -0,0 +1,161 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fakewordset");
+#include "fakewordset.h"
+#include "fakeword.h"
+#include <vespa/searchlib/index/schemautil.h>
+
+namespace search
+{
+
+namespace fakedata
+{
+
+using index::PostingListParams;
+using index::SchemaUtil;
+
+static void
+clearFakeWordVector(std::vector<FakeWord *> &v)
+{
+ for (unsigned int i = 0; i < v.size(); ++i)
+ delete v[i];
+ v.clear();
+}
+
+
+static void
+applyDocIdBiasToVector(std::vector<FakeWord *> &v, uint32_t docIdBias)
+{
+ for (unsigned int i = 0; i < v.size(); ++i)
+ v[i]->addDocIdBias(docIdBias);
+}
+
+
+FakeWordSet::FakeWordSet(void)
+ : _words(NUM_WORDCLASSES),
+ _schema(),
+ _fieldsParams()
+{
+ setupParams(false, false);
+}
+
+
+FakeWordSet::FakeWordSet(bool hasElements,
+ bool hasElementWeights)
+ : _words(NUM_WORDCLASSES),
+ _schema(),
+ _fieldsParams()
+{
+ setupParams(hasElements, hasElementWeights);
+}
+
+
+FakeWordSet::~FakeWordSet(void)
+{
+ dropWords();
+}
+
+
+void
+FakeWordSet::setupParams(bool hasElements,
+ bool hasElementWeights)
+{
+ _schema.clear();
+
+ assert(hasElements || !hasElementWeights);
+ Schema::CollectionType collectionType(Schema::SINGLE);
+ if (hasElements) {
+ if (hasElementWeights)
+ collectionType = Schema::WEIGHTEDSET;
+ else
+ collectionType = Schema::ARRAY;
+ }
+ Schema::IndexField indexField("field0",
+ Schema::STRING,
+ collectionType);
+ indexField.setAvgElemLen(512u);
+ _schema.addIndexField(indexField);
+ _fieldsParams.resize(_schema.getNumIndexFields());
+ SchemaUtil::IndexIterator it(_schema);
+ for(; it.isValid(); ++it) {
+ _fieldsParams[it.getIndex()].
+ setSchemaParams(_schema, it.getIndex());
+ }
+}
+
+
+void
+FakeWordSet::setupWords(search::Rand48 &rnd,
+ unsigned int numDocs,
+ unsigned int commonDocFreq,
+ unsigned int numWordsPerWordClass)
+{
+ std::string common = "common";
+ std::string medium = "medium";
+ std::string rare = "rare";
+ FakeWord *fw;
+ FastOS_Time tv;
+ double before;
+ double after;
+
+ LOG(info, "enter setupWords");
+ tv.SetNow();
+ before = tv.Secs();
+ uint32_t packedIndex = _fieldsParams.size() - 1;
+ for (unsigned int i = 0; i < numWordsPerWordClass; ++i) {
+ std::ostringstream vi;
+
+ vi << (i + 1);
+ fw = new FakeWord(numDocs, commonDocFreq, commonDocFreq / 2,
+ common + vi.str(), rnd,
+ _fieldsParams[packedIndex],
+ packedIndex);
+ _words[COMMON_WORD].push_back(fw);
+ fw = new FakeWord(numDocs, 1000, 500,
+ medium + vi.str(), rnd,
+ _fieldsParams[packedIndex],
+ packedIndex);
+ _words[MEDIUM_WORD].push_back(fw);
+ fw = new FakeWord(numDocs, 10, 5,
+ rare + vi.str(), rnd,
+ _fieldsParams[packedIndex],
+ packedIndex);
+ _words[RARE_WORD].push_back(fw);
+ }
+ tv.SetNow();
+ after = tv.Secs();
+ LOG(info, "leave setupWords, elapsed %10.6f s", after - before);
+}
+
+
+void
+FakeWordSet::dropWords(void)
+{
+ for (unsigned int i = 0; i < _words.size(); ++i)
+ clearFakeWordVector(_words[i]);
+}
+
+
+int
+FakeWordSet::getNumWords(void)
+{
+ int ret = 0;
+ for (unsigned int i = 0; i < _words.size(); ++i)
+ ret += _words[i].size();
+ return ret;
+}
+
+
+void
+FakeWordSet::addDocIdBias(uint32_t docIdBias)
+{
+ for (unsigned int i = 0; i < _words.size(); ++i)
+ applyDocIdBiasToVector(_words[i], docIdBias);
+}
+
+
+} // namespace fakedata
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h
new file mode 100644
index 00000000000..51e87ffd817
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h
@@ -0,0 +1,92 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vector>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+
+namespace search
+{
+class Rand48;
+}
+
+namespace search
+{
+
+namespace fakedata
+{
+
+class FakeWord;
+
+class FakeWordSet
+{
+public:
+ typedef bitcompression::PosOccFieldsParams PosOccFieldsParams;
+ typedef bitcompression::PosOccFieldParams PosOccFieldParams;
+ typedef index::Schema Schema;
+
+ enum {
+ COMMON_WORD,
+ MEDIUM_WORD,
+ RARE_WORD,
+ NUM_WORDCLASSES,
+ };
+ std::vector<std::vector<FakeWord *> > _words;
+ Schema _schema;
+ std::vector<PosOccFieldsParams> _fieldsParams;
+
+ FakeWordSet(void);
+
+ FakeWordSet(bool hasElements,
+ bool hasElementWeights);
+
+ ~FakeWordSet(void);
+
+ void
+ setupParams(bool hasElements,
+ bool hasElementWeights);
+
+ void
+ setupWords(search::Rand48 &rnd,
+ unsigned int numDocs,
+ unsigned int commonDocFreq,
+ unsigned int numWordsPerWordClass);
+
+ void
+ dropWords(void);
+
+ int
+ getNumWords(void);
+
+ const PosOccFieldsParams &
+ getFieldsParams(void) const
+ {
+ return _fieldsParams.back();
+ }
+
+ uint32_t
+ getPackedIndex(void) const
+ {
+ return _fieldsParams.size() - 1;
+ }
+
+ const std::vector<PosOccFieldsParams> &
+ getAllFieldsParams(void) const
+ {
+ return _fieldsParams;
+ }
+
+ const Schema &
+ getSchema(void) const
+ {
+ return _schema;
+ }
+
+ void
+ addDocIdBias(uint32_t docIdBias);
+};
+
+} // namespace fakedata
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.cpp
new file mode 100644
index 00000000000..b1539e2ea2d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.cpp
@@ -0,0 +1,268 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fakezcbfilterocc");
+#include "fakezcbfilterocc.h"
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+#include <vespa/searchlib/queryeval/iterators.h>
+#include "fpfactory.h"
+
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataPosition;
+
+namespace search
+{
+
+namespace fakedata
+{
+
+static FPFactoryInit
+init(std::make_pair("ZcbFilterOcc",
+ makeFPFactory<FPFactoryT<FakeZcbFilterOcc> >));
+
+static void
+zcbEncode(std::vector<uint8_t> &bytes,
+ uint32_t num)
+{
+ if (num < (1 << 7)) {
+ num <<= 1;
+ num += 1;
+ } else if (num < (1 << 14)) {
+ num <<= 2;
+ num += 2;
+ } else if (num < (1 << 21)) {
+ num <<= 3;
+ num += 4;
+ } else
+ num <<= 4;
+
+ do {
+ bytes.push_back(num & 0xff);
+ num >>= 8;
+ } while (num != 0);
+}
+
+
+#define ZCBDECODE(valI, resop) \
+do { \
+ if (__builtin_expect((valI[0] & 1) != 0, true)) { \
+ resop (valI[0] >> 1); \
+ valI += 1; \
+ } else if (__builtin_expect((valI[0] & 2) != 0, true)) { \
+ resop (((*(const uint32_t *) valI) >> 2) & ((1 << 14) - 1)); \
+ valI += 2; \
+ } else if (__builtin_expect((valI[0] & 4) != 0, true)) { \
+ resop (((*(const uint32_t *) valI) >> 3) & ((1 << 21) - 1)); \
+ valI += 3; \
+ } else { \
+ resop ((*(const uint32_t *) valI) >> 4); \
+ valI += 4; \
+ } \
+} while (0)
+
+FakeZcbFilterOcc::FakeZcbFilterOcc(const FakeWord &fw)
+ : FakePosting(fw.getName() + ".zcbfilterocc"),
+ _compressed(),
+ _docIdLimit(0),
+ _hitDocs(0),
+ _bitSize(0)
+{
+ std::vector<uint8_t> bytes;
+ uint32_t lastDocId = 0u;
+
+
+ typedef FakeWord FW;
+ typedef FW::DocWordFeatureList DWFL;
+ typedef FW::DocWordPosFeatureList DWPFL;
+
+ DWFL::const_iterator d(fw._postings.begin());
+ DWFL::const_iterator de(fw._postings.end());
+ DWPFL::const_iterator p(fw._wordPosFeatures.begin());
+ DWPFL::const_iterator pe(fw._wordPosFeatures.end());
+
+ while (d != de) {
+ if (lastDocId == 0u) {
+ zcbEncode(bytes, d->_docId - 1);
+ } else {
+ uint32_t docIdDelta = d->_docId - lastDocId;
+ zcbEncode(bytes, docIdDelta - 1);
+ }
+ lastDocId = d->_docId;
+ ++d;
+ }
+ // 3 padding bytes to ensure ZCBDECODE reads initialized memory.
+ bytes.push_back(0);
+ bytes.push_back(0);
+ bytes.push_back(0);
+ _hitDocs = fw._postings.size();
+ std::swap(_compressed, bytes);
+ _docIdLimit = fw._docIdLimit;
+}
+
+
+FakeZcbFilterOcc::~FakeZcbFilterOcc(void)
+{
+}
+
+
+void
+FakeZcbFilterOcc::forceLink(void)
+{
+}
+
+
+size_t
+FakeZcbFilterOcc::bitSize(void) const
+{
+ // Do not count the 3 padding bytes here.
+ return 8 * (_compressed.size() - 3) ;
+}
+
+bool
+FakeZcbFilterOcc::hasWordPositions(void) const
+{
+ return false;
+}
+
+
+int
+FakeZcbFilterOcc::lowLevelSinglePostingScan(void) const
+{
+ return 0;
+}
+
+
+int
+FakeZcbFilterOcc::lowLevelSinglePostingScanUnpack(void) const
+{
+ return 0;
+}
+
+
+int
+FakeZcbFilterOcc::
+lowLevelAndPairPostingScan(const FakePosting &rhs) const
+{
+ (void) rhs;
+ return 0;
+}
+
+
+int
+FakeZcbFilterOcc::
+lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const
+{
+ (void) rhs;
+ return 0;
+}
+
+
+class FakeFilterOccZCBArrayIterator
+ : public queryeval::RankedSearchIteratorBase
+{
+private:
+
+ FakeFilterOccZCBArrayIterator(const FakeFilterOccZCBArrayIterator &other);
+
+ FakeFilterOccZCBArrayIterator&
+ operator=(const FakeFilterOccZCBArrayIterator &other);
+
+public:
+ // Pointer to compressed data
+ const uint8_t *_valI;
+ unsigned int _residue;
+
+ FakeFilterOccZCBArrayIterator(const uint8_t *compressedOccurrences,
+ unsigned int residue,
+ const fef::TermFieldMatchDataArray &matchData);
+
+ ~FakeFilterOccZCBArrayIterator(void);
+
+ void doUnpack(uint32_t docId) override;
+ void doSeek(uint32_t docId) override;
+ void initRange(uint32_t begin, uint32_t end) override;
+ Trinary is_strict() const override { return Trinary::True; }
+};
+
+
+FakeFilterOccZCBArrayIterator::
+FakeFilterOccZCBArrayIterator(const uint8_t *compressedOccurrences,
+ unsigned int residue,
+ const fef::TermFieldMatchDataArray &matchData)
+ : queryeval::RankedSearchIteratorBase(matchData),
+ _valI(compressedOccurrences),
+ _residue(residue)
+{
+ clearUnpacked();
+}
+
+void
+FakeFilterOccZCBArrayIterator::initRange(uint32_t begin, uint32_t end)
+{
+ queryeval::RankedSearchIteratorBase::initRange(begin, end);
+ uint32_t docId = 0;
+ if (_residue > 0) {
+ ZCBDECODE(_valI, docId = 1 +);
+ setDocId(docId);
+ } else {
+ setAtEnd();
+ }
+}
+
+
+FakeFilterOccZCBArrayIterator::
+~FakeFilterOccZCBArrayIterator(void)
+{
+}
+
+
+void
+FakeFilterOccZCBArrayIterator::doSeek(uint32_t docId)
+{
+ const uint8_t *oCompr = _valI;
+ uint32_t oDocId = getDocId();
+
+ if (getUnpacked())
+ clearUnpacked();
+ while (oDocId < docId) {
+ if (--_residue == 0)
+ goto atbreak;
+ ZCBDECODE(oCompr, oDocId += 1 +);
+ }
+ _valI = oCompr;
+ setDocId(oDocId);
+ return;
+ atbreak:
+ _valI = oCompr;
+ setAtEnd(); // Mark end of data
+ return;
+}
+
+
+void
+FakeFilterOccZCBArrayIterator::doUnpack(uint32_t docId)
+{
+ if (_matchData.size() != 1 || getUnpacked()) {
+ return;
+ }
+ assert(docId == getDocId());
+ _matchData[0]->reset(docId);
+ setUnpacked();
+}
+
+
+search::queryeval::SearchIterator *
+FakeZcbFilterOcc::
+createIterator(const fef::TermFieldMatchDataArray &matchData) const
+{
+ const uint8_t *arr = &*_compressed.begin();
+ return new FakeFilterOccZCBArrayIterator(arr,
+ _hitDocs,
+ matchData);
+}
+
+} // namespace fakedata
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.h
new file mode 100644
index 00000000000..c9d183af80e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.h
@@ -0,0 +1,75 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "fakeword.h"
+#include "fakeposting.h"
+
+namespace search
+{
+
+namespace fakedata
+{
+
+/*
+ * YST style compression of docid list.
+ */
+class FakeZcbFilterOcc : public FakePosting
+{
+private:
+ std::vector<uint8_t> _compressed;
+ unsigned int _docIdLimit;
+ unsigned int _hitDocs;
+ size_t _bitSize;
+public:
+ FakeZcbFilterOcc(const FakeWord &fw);
+
+ ~FakeZcbFilterOcc(void);
+
+ static void
+ forceLink(void);
+
+ /*
+ * Size of posting list, in bits.
+ */
+ size_t
+ bitSize(void) const;
+
+ virtual bool
+ hasWordPositions(void) const;
+
+ /*
+ * Single posting list performance, without feature unpack.
+ */
+ virtual int
+ lowLevelSinglePostingScan(void) const;
+
+ /*
+ * Single posting list performance, with feature unpack.
+ */
+ virtual int
+ lowLevelSinglePostingScanUnpack(void) const;
+
+ /*
+ * Two posting lists performance (same format) without feature unpack.
+ */
+ virtual int
+ lowLevelAndPairPostingScan(const FakePosting &rhs) const;
+
+ /*
+ * Two posting lists performance (same format) with feature unpack.
+ */
+ virtual int
+ lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const;
+
+
+ /*
+ * Iterator factory, for current query evaluation framework.
+ */
+ virtual search::queryeval::SearchIterator *
+ createIterator(const fef::TermFieldMatchDataArray &matchData) const;
+};
+
+} // namespace fakedata
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
new file mode 100644
index 00000000000..2fc379c8a71
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
@@ -0,0 +1,1823 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fakezcfilterocc");
+#include "fakezcfilterocc.h"
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+#include <vespa/searchlib/diskindex/zcposocciterators.h>
+#include <vespa/searchlib/index/docidandfeatures.h>
+#include <vespa/searchlib/index/postinglistcounts.h>
+#include "fpfactory.h"
+
+
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataArray;
+using search::fef::TermFieldMatchDataPosition;
+using search::queryeval::SearchIterator;
+using search::index::PostingListParams;
+using search::index::DocIdAndFeatures;
+using search::index::DocIdAndPosOccFeatures;
+using search::bitcompression::PosOccFieldParams;
+using search::bitcompression::EGPosOccEncodeContext;
+using search::bitcompression::EG2PosOccEncodeContext;
+using search::bitcompression::FeatureEncodeContext;
+using search::ComprFileWriteContext;
+using namespace search::diskindex;
+
+namespace search
+{
+
+namespace fakedata
+{
+
+
+#define L1SKIPSTRIDE 16
+#define L2SKIPSTRIDE 8
+#define L3SKIPSTRIDE 8
+#define L4SKIPSTRIDE 8
+
+#define DEBUG_ZCFILTEROCC_PRINTF 0
+#define DEBUG_ZCFILTEROCC_ASSERT 0
+
+static FPFactoryInit
+init(std::make_pair("ZcFilterOcc",
+ makeFPFactory<FPFactoryT<FakeZcFilterOcc> >));
+
+static void
+zcEncode(std::vector<uint8_t> &bytes,
+ uint32_t num)
+{
+ for (;;) {
+ if (num < (1 << 7)) {
+ bytes.push_back(num);
+ break;
+ }
+ bytes.push_back((num & ((1 << 7) - 1)) | (1 << 7));
+ num >>= 7;
+ }
+}
+
+#define ZCDECODE(valI, resop) \
+do { \
+ if (__builtin_expect(valI[0] < (1 << 7), true)) { \
+ resop valI[0]; \
+ valI += 1; \
+ } else if (__builtin_expect(valI[1] < (1 << 7), true)) { \
+ resop (valI[0] & ((1 << 7) - 1)) + \
+ (valI[1] << 7); \
+ valI += 2; \
+ } else if (__builtin_expect(valI[2] < (1 << 7), true)) { \
+ resop (valI[0] & ((1 << 7) - 1)) + \
+ ((valI[1] & ((1 << 7) - 1)) << 7) + \
+ (valI[2] << 14); \
+ valI += 3; \
+ } else if (__builtin_expect(valI[3] < (1 << 7), true)) { \
+ resop (valI[0] & ((1 << 7) - 1)) + \
+ ((valI[1] & ((1 << 7) - 1)) << 7) + \
+ ((valI[2] & ((1 << 7) - 1)) << 14) + \
+ (valI[3] << 21); \
+ valI += 4; \
+ } else { \
+ resop (valI[0] & ((1 << 7) - 1)) + \
+ ((valI[1] & ((1 << 7) - 1)) << 7) + \
+ ((valI[2] & ((1 << 7) - 1)) << 14) + \
+ ((valI[3] & ((1 << 7) - 1)) << 21) + \
+ (valI[4] << 28); \
+ valI += 5; \
+ } \
+} while (0)
+
+FakeZcFilterOcc::FakeZcFilterOcc(const FakeWord &fw)
+ : FakePosting(fw.getName() + ".zcfilterocc"),
+ _docIdsSize(0),
+ _l1SkipSize(0),
+ _l2SkipSize(0),
+ _l3SkipSize(0),
+ _l4SkipSize(0),
+ _docIdLimit(0),
+ _hitDocs(0),
+ _lastDocId(0u),
+ _compressedBits(0),
+ _compressed(std::make_pair(static_cast<uint64_t *>(NULL), 0)),
+ _compressedMalloc(NULL),
+ _featuresSize(0),
+ _fieldsParams(fw.getFieldsParams()),
+ _bigEndian(true)
+{
+ setup(fw, false, true);
+}
+
+
+FakeZcFilterOcc::FakeZcFilterOcc(const FakeWord &fw,
+ bool bigEndian,
+ const char *nameSuffix)
+ : FakePosting(fw.getName() + nameSuffix),
+ _docIdsSize(0),
+ _l1SkipSize(0),
+ _l2SkipSize(0),
+ _l3SkipSize(0),
+ _l4SkipSize(0),
+ _docIdLimit(0),
+ _hitDocs(0),
+ _lastDocId(0u),
+ _compressedBits(0),
+ _compressed(std::make_pair(static_cast<uint64_t *>(NULL), 0)),
+ _featuresSize(0),
+ _fieldsParams(fw.getFieldsParams()),
+ _bigEndian(bigEndian)
+{
+ // subclass responsible for calling setup(fw, false/true);
+}
+
+
+void
+FakeZcFilterOcc::setup(const FakeWord &fw, bool doFeatures,
+ bool dynamicK)
+{
+ if (_bigEndian)
+ setupT<true>(fw, doFeatures, dynamicK);
+ else
+ setupT<false>(fw, doFeatures, dynamicK);
+}
+
+
+template <bool bigEndian>
+void
+FakeZcFilterOcc::setupT(const FakeWord &fw, bool doFeatures,
+ bool dynamicK)
+{
+ std::vector<uint8_t> bytes;
+ std::vector<uint8_t> l1SkipBytes;
+ std::vector<uint8_t> l2SkipBytes;
+ std::vector<uint8_t> l3SkipBytes;
+ std::vector<uint8_t> l4SkipBytes;
+ uint32_t lastDocId = 0u;
+ uint32_t lastL1SkipDocId = 0u;
+ uint64_t lastL1SkipDocIdPos = 0;
+ uint64_t lastL1SkipFeaturePos = 0;
+ unsigned int l1SkipCnt = 0;
+ uint32_t lastL2SkipDocId = 0u;
+ uint64_t lastL2SkipDocIdPos = 0;
+ uint64_t lastL2SkipFeaturePos = 0;
+ uint64_t lastL2SkipL1SkipPos = 0;
+ unsigned int l2SkipCnt = 0;
+ uint32_t lastL3SkipDocId = 0u;
+ uint64_t lastL3SkipDocIdPos = 0;
+ uint64_t lastL3SkipFeaturePos = 0;
+ uint64_t lastL3SkipL1SkipPos = 0;
+ uint64_t lastL3SkipL2SkipPos = 0;
+ unsigned int l3SkipCnt = 0;
+ uint32_t lastL4SkipDocId = 0u;
+ uint64_t lastL4SkipDocIdPos = 0;
+ uint64_t lastL4SkipFeaturePos = 0;
+ uint64_t lastL4SkipL1SkipPos = 0;
+ uint64_t lastL4SkipL2SkipPos = 0;
+ uint64_t lastL4SkipL3SkipPos = 0;
+ unsigned int l4SkipCnt = 0;
+ uint64_t featurePos = 0;
+
+ typedef FakeWord FW;
+ typedef FW::DocWordFeatureList DWFL;
+ typedef FW::DocWordPosFeatureList DWPFL;
+
+ DWFL::const_iterator d(fw._postings.begin());
+ DWFL::const_iterator de(fw._postings.end());
+ DWPFL::const_iterator p(fw._wordPosFeatures.begin());
+ DWPFL::const_iterator pe(fw._wordPosFeatures.end());
+ DocIdAndPosOccFeatures features;
+ EGPosOccEncodeContext<bigEndian> f1(&_fieldsParams);
+ EG2PosOccEncodeContext<bigEndian> f0(&_fieldsParams);
+ FeatureEncodeContext<bigEndian> &f = (dynamicK ?
+ static_cast<FeatureEncodeContext<bigEndian> &>(f1) :
+ static_cast<FeatureEncodeContext<bigEndian> &>(f0));
+ search::ComprFileWriteContext fctx(f);
+ f.setWriteContext(&fctx);
+ fctx.allocComprBuf(64, 1);
+ f.afterWrite(fctx, 0, 0);
+
+ while (d != de) {
+ if (l1SkipCnt >= L1SKIPSTRIDE) {
+ uint32_t docIdDelta = lastDocId - lastL1SkipDocId;
+ assert(static_cast<int32_t>(docIdDelta) > 0);
+ zcEncode(l1SkipBytes, docIdDelta - 1);
+ uint64_t lastDocIdPos = bytes.size();
+ uint32_t docIdPosDelta = lastDocIdPos - lastL1SkipDocIdPos;
+ zcEncode(l1SkipBytes, docIdPosDelta - 1);
+ if (doFeatures) {
+ featurePos = f.getWriteOffset();
+ zcEncode(l1SkipBytes, featurePos - lastL1SkipFeaturePos - 1);
+ lastL1SkipFeaturePos = featurePos;
+ }
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L1Encode docId=%d (+%d), docIdPos=%d (+%u)\n",
+ lastDocId, docIdDelta,
+ (int) lastDocIdPos, docIdPosDelta);
+#endif
+ lastL1SkipDocId = lastDocId;
+ lastL1SkipDocIdPos = lastDocIdPos;
+ l1SkipCnt = 0;
+ ++l2SkipCnt;
+ if (l2SkipCnt >= L2SKIPSTRIDE) {
+ docIdDelta = lastDocId - lastL2SkipDocId;
+ docIdPosDelta = lastDocIdPos - lastL2SkipDocIdPos;
+ uint64_t lastL1SkipPos = l1SkipBytes.size();
+ uint32_t l1SkipPosDelta = lastL1SkipPos - lastL2SkipL1SkipPos;
+ zcEncode(l2SkipBytes, docIdDelta - 1);
+ zcEncode(l2SkipBytes, docIdPosDelta - 1);
+ if (doFeatures) {
+ zcEncode(l2SkipBytes,
+ featurePos - lastL2SkipFeaturePos - 1);
+ lastL2SkipFeaturePos = featurePos;
+ }
+ zcEncode(l2SkipBytes, l1SkipPosDelta - 1);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L2Encode docId=%d (+%d), docIdPos=%d (+%u),"
+ " l1SkipPos=%d (+%u)\n",
+ lastDocId, docIdDelta,
+ (int) lastDocIdPos, docIdPosDelta,
+ (int) lastL1SkipPos, l1SkipPosDelta);
+#endif
+ lastL2SkipDocId = lastDocId;
+ lastL2SkipDocIdPos = lastDocIdPos;
+ lastL2SkipL1SkipPos = lastL1SkipPos;
+ l2SkipCnt = 0;
+ ++l3SkipCnt;
+ if (l3SkipCnt >= L3SKIPSTRIDE) {
+ docIdDelta = lastDocId - lastL3SkipDocId;
+ docIdPosDelta = lastDocIdPos - lastL3SkipDocIdPos;
+ l1SkipPosDelta = lastL1SkipPos - lastL3SkipL1SkipPos;
+ uint64_t lastL2SkipPos = l2SkipBytes.size();
+ uint32_t l2SkipPosDelta = lastL2SkipPos -
+ lastL3SkipL2SkipPos;
+ zcEncode(l3SkipBytes, docIdDelta - 1);
+ zcEncode(l3SkipBytes, docIdPosDelta - 1);
+ if (doFeatures) {
+ zcEncode(l3SkipBytes,
+ featurePos - lastL3SkipFeaturePos - 1);
+ lastL3SkipFeaturePos = featurePos;
+ }
+ zcEncode(l3SkipBytes, l1SkipPosDelta - 1);
+ zcEncode(l3SkipBytes, l2SkipPosDelta - 1);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L3Encode docId=%d (+%d), docIdPos=%d (+%u),"
+ " l1SkipPos=%d (+%u) l2SkipPos %d (+%u)\n",
+ lastDocId, docIdDelta,
+ (int) lastDocIdPos, docIdPosDelta,
+ (int) lastL1SkipPos, l1SkipPosDelta,
+ (int) lastL2SkipPos, l2SkipPosDelta);
+#endif
+ lastL3SkipDocId = lastDocId;
+ lastL3SkipDocIdPos = lastDocIdPos;
+ lastL3SkipL1SkipPos = lastL1SkipPos;
+ lastL3SkipL2SkipPos = lastL2SkipPos;
+ l3SkipCnt = 0;
+ ++l4SkipCnt;
+ if (l4SkipCnt >= L4SKIPSTRIDE) {
+ docIdDelta = lastDocId - lastL4SkipDocId;
+ docIdPosDelta = lastDocIdPos - lastL4SkipDocIdPos;
+ l1SkipPosDelta = lastL1SkipPos - lastL4SkipL1SkipPos;
+ l2SkipPosDelta = lastL2SkipPos - lastL4SkipL2SkipPos;
+ uint64_t lastL3SkipPos = l3SkipBytes.size();
+ uint32_t l3SkipPosDelta = lastL3SkipPos -
+ lastL4SkipL3SkipPos;
+ zcEncode(l4SkipBytes, docIdDelta - 1);
+ zcEncode(l4SkipBytes, docIdPosDelta - 1);
+ if (doFeatures) {
+ zcEncode(l4SkipBytes,
+ featurePos - lastL4SkipFeaturePos - 1);
+ lastL4SkipFeaturePos = featurePos;
+ }
+ zcEncode(l4SkipBytes, l1SkipPosDelta - 1);
+ zcEncode(l4SkipBytes, l2SkipPosDelta - 1);
+ zcEncode(l4SkipBytes, l3SkipPosDelta - 1);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L4Encode docId=%d (+%d), docIdPos=%d (+%u),"
+ " l1SkipPos=%d (+%u) l2SkipPos %d (+%u)"
+ " l3SkipPos=%d (+%u)\n",
+ lastDocId, docIdDelta,
+ (int) lastDocIdPos, docIdPosDelta,
+ (int) lastL1SkipPos, l1SkipPosDelta,
+ (int) lastL2SkipPos, l2SkipPosDelta,
+ (int) lastL3SkipPos, l3SkipPosDelta);
+#endif
+ lastL4SkipDocId = lastDocId;
+ lastL4SkipDocIdPos = lastDocIdPos;
+ lastL4SkipL1SkipPos = lastL1SkipPos;
+ lastL4SkipL2SkipPos = lastL2SkipPos;
+ lastL4SkipL3SkipPos = lastL3SkipPos;
+ l4SkipCnt = 0;
+ }
+ }
+ }
+ }
+ if (lastDocId == 0u) {
+ zcEncode(bytes, d->_docId - 1);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("Encode docId=%d\n",
+ d->_docId);
+#endif
+ } else {
+ uint32_t docIdDelta = d->_docId - lastDocId;
+ zcEncode(bytes, docIdDelta - 1);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("Encode docId=%d (+%d)\n",
+ d->_docId, docIdDelta);
+#endif
+ }
+ if (doFeatures) {
+ fw.setupFeatures(*d, &*p, features);
+ p += d->_positions;
+ f.writeFeatures(features);
+ }
+ lastDocId = d->_docId;
+ ++l1SkipCnt;
+ ++d;
+ }
+ if (doFeatures) {
+ assert(p == pe);
+ _featuresSize = f.getWriteOffset();
+ // First pad to 64 bits.
+ uint32_t pad = (64 - f.getWriteOffset()) & 63;
+ while (pad > 0) {
+ uint32_t now = std::min(32u, pad);
+ f.writeBits(0, now);
+ f.writeComprBufferIfNeeded();
+ pad -= now;
+ }
+
+ // Then write 128 more bits. This allows for 64-bit decoding
+ // with a readbits that always leaves a nonzero preRead
+ for (unsigned int i = 0; i < 4; i++) {
+ f.writeBits(0, 32);
+ f.writeComprBufferIfNeeded();
+ }
+ f.writeComprBufferIfNeeded();
+ f.flush();
+ f.writeComprBuffer();
+ } else {
+ _featuresSize = 0;
+ }
+ // Extra partial entries for skip tables to simplify iterator during search
+ if (l1SkipBytes.size() > 0) {
+ uint32_t docIdDelta = lastDocId - lastL1SkipDocId;
+ assert(static_cast<int32_t>(docIdDelta) > 0);
+ zcEncode(l1SkipBytes, docIdDelta - 1);
+ }
+ if (l2SkipBytes.size() > 0) {
+ uint32_t docIdDelta = lastDocId - lastL2SkipDocId;
+ assert(static_cast<int32_t>(docIdDelta) > 0);
+ zcEncode(l2SkipBytes, docIdDelta - 1);
+ }
+ if (l3SkipBytes.size() > 0) {
+ uint32_t docIdDelta = lastDocId - lastL3SkipDocId;
+ assert(static_cast<int32_t>(docIdDelta) > 0);
+ zcEncode(l3SkipBytes, docIdDelta - 1);
+ }
+ if (l4SkipBytes.size() > 0) {
+ uint32_t docIdDelta = lastDocId - lastL4SkipDocId;
+ assert(static_cast<int32_t>(docIdDelta) > 0);
+ zcEncode(l4SkipBytes, docIdDelta - 1);
+ }
+ _hitDocs = fw._postings.size();
+ _docIdLimit = fw._docIdLimit;
+ _lastDocId = lastDocId;
+ FeatureEncodeContext<bigEndian> e;
+ ComprFileWriteContext ectx(e);
+ e.setWriteContext(&ectx);
+ ectx.allocComprBuf(64, 1);
+ e.afterWrite(ectx, 0, 0);
+
+ // Encode word header
+ e.encodeExpGolomb(_hitDocs - 1, K_VALUE_ZCPOSTING_NUMDOCS);
+ _docIdsSize = bytes.size() * 8;
+ _l1SkipSize = l1SkipBytes.size();
+ _l2SkipSize = _l3SkipSize = _l4SkipSize = 0;
+ if (_l1SkipSize != 0)
+ _l2SkipSize = l2SkipBytes.size();
+ if (_l2SkipSize != 0)
+ _l3SkipSize = l3SkipBytes.size();
+ if (_l3SkipSize != 0)
+ _l4SkipSize = l4SkipBytes.size();
+
+ e.encodeExpGolomb(bytes.size() - 1, K_VALUE_ZCPOSTING_DOCIDSSIZE);
+ e.encodeExpGolomb(_l1SkipSize, K_VALUE_ZCPOSTING_L1SKIPSIZE);
+ e.writeComprBufferIfNeeded();
+ if (_l1SkipSize != 0) {
+ e.encodeExpGolomb(_l2SkipSize, K_VALUE_ZCPOSTING_L2SKIPSIZE);
+ if (_l2SkipSize != 0) {
+ e.writeComprBufferIfNeeded();
+ e.encodeExpGolomb(_l3SkipSize, K_VALUE_ZCPOSTING_L3SKIPSIZE);
+ if (_l3SkipSize != 0) {
+ e.encodeExpGolomb(_l4SkipSize, K_VALUE_ZCPOSTING_L4SKIPSIZE);
+ }
+ }
+ }
+ e.writeComprBufferIfNeeded();
+ if (doFeatures) {
+ e.encodeExpGolomb(_featuresSize, K_VALUE_ZCPOSTING_FEATURESSIZE);
+ }
+ uint32_t docIdK = e.calcDocIdK(_hitDocs, _docIdLimit);
+ if (dynamicK)
+ e.encodeExpGolomb(_docIdLimit - 1 - _lastDocId, docIdK);
+ else
+ e.encodeExpGolomb(_docIdLimit - 1 - _lastDocId,
+ K_VALUE_ZCPOSTING_LASTDOCID);
+ uint64_t bytePad = (- e.getWriteOffset()) & 7;
+ if (bytePad > 0)
+ e.writeBits(0, bytePad);
+ size_t docIdSize = bytes.size();
+ if (docIdSize > 0) {
+ uint8_t *docIdBytes = &bytes[0];
+ uint32_t docIdBytesOffset =
+ reinterpret_cast<unsigned long>(docIdBytes) & 7;
+ e.writeBits(reinterpret_cast<const uint64_t *>(docIdBytes -
+ docIdBytesOffset),
+ docIdBytesOffset * 8,
+ docIdSize * 8);
+ }
+ if (_l1SkipSize > 0) {
+ uint8_t *l1Bytes = &l1SkipBytes[0];
+ uint32_t l1BytesOffset = reinterpret_cast<unsigned long>(l1Bytes) & 7;
+ e.writeBits(reinterpret_cast<const uint64_t *>(l1Bytes -
+ l1BytesOffset),
+ l1BytesOffset * 8,
+ _l1SkipSize * 8);
+ if (_l2SkipSize > 0) {
+ uint8_t *l2Bytes = &l2SkipBytes[0];
+ uint32_t l2BytesOffset =
+ reinterpret_cast<unsigned long>(l2Bytes) & 7;
+ e.writeBits(reinterpret_cast<const uint64_t *>(l2Bytes -
+ l2BytesOffset),
+ l2BytesOffset * 8,
+ _l2SkipSize * 8);
+ if (_l3SkipSize > 0) {
+ uint8_t *l3Bytes = &l3SkipBytes[0];
+ uint32_t l3BytesOffset =
+ reinterpret_cast<unsigned long>(l3Bytes) & 7;
+ e.writeBits(reinterpret_cast<const uint64_t *>(l3Bytes -
+ l3BytesOffset),
+ l3BytesOffset * 8,
+ _l3SkipSize * 8);
+ if (_l4SkipSize > 0) {
+ uint8_t *l4Bytes = &l4SkipBytes[0];
+ uint32_t l4BytesOffset =
+ reinterpret_cast<unsigned long>(l4Bytes) & 7;
+ e.writeBits(reinterpret_cast<const uint64_t *>(l4Bytes -
+ l4BytesOffset),
+ l4BytesOffset * 8,
+ _l4SkipSize * 8);
+ }
+ }
+ }
+ }
+ if (doFeatures) {
+ e.writeBits(static_cast<const uint64_t *>(fctx._comprBuf),
+ 0,
+ _featuresSize);
+ }
+ _compressedBits = e.getWriteOffset();
+ // First pad to 64 bits.
+ uint32_t pad = (64 - e.getWriteOffset()) & 63;
+ while (pad > 0) {
+ uint32_t now = std::min(32u, pad);
+ e.writeBits(0, now);
+ e.writeComprBufferIfNeeded();
+ pad -= now;
+ }
+
+ // Then write 128 more bits. This allows for 64-bit decoding
+ // with a readbits that always leaves a nonzero preRead
+ for (unsigned int i = 0; i < 4; i++) {
+ e.writeBits(0, 32);
+ e.writeComprBufferIfNeeded();
+ }
+ e.writeComprBufferIfNeeded();
+ e.flush();
+ e.writeComprBuffer();
+
+ std::pair<void *, size_t> ectxData = ectx.grabComprBuffer(_compressedMalloc);
+ _compressed = std::make_pair(static_cast<uint64_t *>(ectxData.first),
+ ectxData.second);
+}
+
+
+FakeZcFilterOcc::~FakeZcFilterOcc(void)
+{
+ free(_compressedMalloc);
+}
+
+
+void
+FakeZcFilterOcc::forceLink(void)
+{
+}
+
+
+size_t
+FakeZcFilterOcc::bitSize(void) const
+{
+ return _compressedBits -
+ (_l1SkipSize + _l2SkipSize + _l3SkipSize + _l4SkipSize) * 8;
+}
+
+
+bool
+FakeZcFilterOcc::hasWordPositions(void) const
+{
+ return false;
+}
+
+
+size_t
+FakeZcFilterOcc::skipBitSize(void) const
+{
+ return (_l1SkipSize + _l2SkipSize + _l3SkipSize + _l4SkipSize) * 8;
+}
+
+
+size_t
+FakeZcFilterOcc::l1SkipBitSize(void) const
+{
+ return _l1SkipSize * 8;
+}
+
+
+size_t
+FakeZcFilterOcc::l2SkipBitSize(void) const
+{
+ return _l2SkipSize * 8;
+}
+
+
+size_t
+FakeZcFilterOcc::l3SkipBitSize(void) const
+{
+ return _l3SkipSize * 8;
+}
+
+
+size_t
+FakeZcFilterOcc::l4SkipBitSize(void) const
+{
+ return _l4SkipSize * 8;
+}
+
+
+int
+FakeZcFilterOcc::lowLevelSinglePostingScan(void) const
+{
+ return 0;
+}
+
+
+int
+FakeZcFilterOcc::lowLevelSinglePostingScanUnpack(void) const
+{
+ return 0;
+}
+
+
+int
+FakeZcFilterOcc::
+lowLevelAndPairPostingScan(const FakePosting &rhs) const
+{
+ (void) rhs;
+ return 0;
+}
+
+
+int
+FakeZcFilterOcc::
+lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const
+{
+ (void) rhs;
+ return 0;
+}
+
+
+class FakeFilterOccZCArrayIterator
+ : public queryeval::RankedSearchIteratorBase
+{
+private:
+ FakeFilterOccZCArrayIterator(const FakeFilterOccZCArrayIterator &other);
+
+ FakeFilterOccZCArrayIterator&
+ operator=(const FakeFilterOccZCArrayIterator &other);
+
+public:
+ // Pointer to compressed data
+ const uint8_t *_valI;
+ unsigned int _residue;
+ uint32_t _lastDocId;
+
+ typedef search::bitcompression::FeatureDecodeContextBE DecodeContext;
+ typedef search::bitcompression::FeatureEncodeContextBE EncodeContext;
+ DecodeContext _decodeContext;
+ uint32_t _docIdLimit;
+
+ FakeFilterOccZCArrayIterator(const uint64_t *compressed,
+ int bitOffset,
+ uint32_t docIdLimit,
+ const fef::TermFieldMatchDataArray &matchData);
+
+ ~FakeFilterOccZCArrayIterator(void);
+
+ void doUnpack(uint32_t docId) override;
+ void doSeek(uint32_t docId) override;
+ void initRange(uint32_t begin, uint32_t end) override;
+ Trinary is_strict() const override { return Trinary::True; }
+};
+
+
+FakeFilterOccZCArrayIterator::
+FakeFilterOccZCArrayIterator(const uint64_t *compressed,
+ int bitOffset,
+ uint32_t docIdLimit,
+ const fef::TermFieldMatchDataArray &matchData)
+ : queryeval::RankedSearchIteratorBase(matchData),
+ _valI(NULL),
+ _residue(0),
+ _lastDocId(0),
+ _decodeContext(compressed, bitOffset),
+ _docIdLimit(docIdLimit)
+{
+ clearUnpacked();
+}
+
+void
+FakeFilterOccZCArrayIterator::initRange(uint32_t begin, uint32_t end)
+{
+ queryeval::RankedSearchIteratorBase::initRange(begin, end);
+ DecodeContext &d = _decodeContext;
+ typedef EncodeContext EC;
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
+ uint32_t length;
+ uint64_t val64;
+
+ UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC);
+ uint32_t numDocs = static_cast<uint32_t>(val64) + 1;
+
+ uint32_t docIdK = EC::calcDocIdK(numDocs, _docIdLimit);
+
+ UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DOCIDSSIZE, EC);
+ uint32_t docIdsSize = val64 + 1;
+ UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L1SKIPSIZE, EC);
+ uint32_t l1SkipSize = val64;
+ uint32_t l2SkipSize = 0;
+ if (l1SkipSize != 0) {
+ UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L2SKIPSIZE, EC);
+ l2SkipSize = val64;
+ }
+ uint32_t l3SkipSize = 0;
+ if (l2SkipSize != 0) {
+ UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L3SKIPSIZE, EC);
+ l3SkipSize = val64;
+ }
+ uint32_t l4SkipSize = 0;
+ if (l3SkipSize != 0) {
+ UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC);
+ l4SkipSize = val64;
+ }
+ // Feature size would be here
+ UC64BE_DECODEEXPGOLOMB_NS(o, docIdK, EC);
+ _lastDocId = _docIdLimit - 1 - val64;
+ UC64_DECODECONTEXT_STORE(o, d._);
+ uint64_t bytePad = oPreRead & 7;
+ if (bytePad > 0) {
+ length = bytePad;
+ oVal <<= length;
+ UC64BE_READBITS_NS(o, EC);
+ }
+ UC64_DECODECONTEXT_STORE(o, d._);
+ assert((d.getBitOffset() & 7) == 0);
+ const uint8_t *bcompr = d.getByteCompr();
+ _valI = bcompr;
+ bcompr += docIdsSize;
+ bcompr += l1SkipSize;
+ bcompr += l2SkipSize;
+ bcompr += l3SkipSize;
+ bcompr += l4SkipSize;
+ d.setByteCompr(bcompr);
+ uint32_t oDocId;
+ ZCDECODE(_valI, oDocId = 1 +);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("DecodeInit docId=%d\n",
+ oDocId);
+#endif
+ setDocId(oDocId);
+ _residue = numDocs;
+}
+
+
+FakeFilterOccZCArrayIterator::
+~FakeFilterOccZCArrayIterator(void)
+{
+}
+
+
+void
+FakeFilterOccZCArrayIterator::doSeek(uint32_t docId)
+{
+ const uint8_t *oCompr = _valI;
+ uint32_t oDocId = getDocId();
+
+ if (getUnpacked())
+ clearUnpacked();
+ while (oDocId < docId) {
+ if (--_residue == 0)
+ goto atbreak;
+ ZCDECODE(oCompr, oDocId += 1 +);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("Decode docId=%d\n",
+ docId);
+#endif
+ }
+ _valI = oCompr;
+ setDocId(oDocId);
+ return;
+ atbreak:
+ _valI = oCompr;
+ setAtEnd(); // Mark end of data
+ return;
+}
+
+
+void
+FakeFilterOccZCArrayIterator::doUnpack(uint32_t docId)
+{
+ if (_matchData.size() != 1 || getUnpacked()) {
+ return;
+ }
+ assert(docId == getDocId());
+ _matchData[0]->reset(docId);
+ setUnpacked();
+}
+
+
+SearchIterator *
+FakeZcFilterOcc::
+createIterator(const TermFieldMatchDataArray &matchData) const
+{
+ return new FakeFilterOccZCArrayIterator(_compressed.first, 0, _docIdLimit, matchData);
+}
+
+template <bool doSkip>
+class FakeZcSkipFilterOcc : public FakeZcFilterOcc
+{
+public:
+ FakeZcSkipFilterOcc(const FakeWord &fw);
+
+ ~FakeZcSkipFilterOcc(void);
+
+ virtual SearchIterator *
+ createIterator(const TermFieldMatchDataArray &matchData) const;
+};
+
+static FPFactoryInit
+initNoSkip(std::make_pair("ZcNoSkipFilterOcc",
+ makeFPFactory<FPFactoryT<FakeZcSkipFilterOcc<false> > >));
+
+
+static FPFactoryInit
+initSkip(std::make_pair("ZcSkipFilterOcc",
+ makeFPFactory<FPFactoryT<FakeZcSkipFilterOcc<true> > >));
+
+template<>
+FakeZcSkipFilterOcc<false>::FakeZcSkipFilterOcc(const FakeWord &fw)
+ : FakeZcFilterOcc(fw, true, ".zcnoskipfilterocc")
+{
+ setup(fw, false, true);
+}
+
+
+template<>
+FakeZcSkipFilterOcc<true>::FakeZcSkipFilterOcc(const FakeWord &fw)
+ : FakeZcFilterOcc(fw, true, ".zcskipfilterocc")
+{
+ setup(fw, false, true);
+}
+
+
+template <bool doSkip>
+FakeZcSkipFilterOcc<doSkip>::~FakeZcSkipFilterOcc(void)
+{
+}
+
+
+template <bool doSkip>
+class FakeFilterOccZCSkipArrayIterator
+ : public queryeval::RankedSearchIteratorBase
+{
+private:
+
+ FakeFilterOccZCSkipArrayIterator(const FakeFilterOccZCSkipArrayIterator &other);
+
+ FakeFilterOccZCSkipArrayIterator&
+ operator=(const FakeFilterOccZCSkipArrayIterator &other);
+
+public:
+ // Pointer to compressed data
+ const uint8_t *_valI;
+ uint32_t _lastDocId;
+ uint32_t _l1SkipDocId;
+ uint32_t _l2SkipDocId;
+ uint32_t _l3SkipDocId;
+ uint32_t _l4SkipDocId;
+ const uint8_t *_l1SkipDocIdPos;
+ const uint8_t *_l1SkipValI;
+ const uint8_t *_valIBase;
+ const uint8_t *_l1SkipValIBase;
+ const uint8_t *_l2SkipDocIdPos;
+ const uint8_t *_l2SkipValI;
+ const uint8_t *_l2SkipL1SkipPos;
+ const uint8_t *_l2SkipValIBase;
+ const uint8_t *_l3SkipDocIdPos;
+ const uint8_t *_l3SkipValI;
+ const uint8_t *_l3SkipL1SkipPos;
+ const uint8_t *_l3SkipL2SkipPos;
+ const uint8_t *_l3SkipValIBase;
+ const uint8_t *_l4SkipDocIdPos;
+ const uint8_t *_l4SkipValI;
+ const uint8_t *_l4SkipL1SkipPos;
+ const uint8_t *_l4SkipL2SkipPos;
+ const uint8_t *_l4SkipL3SkipPos;
+
+ typedef search::bitcompression::FeatureDecodeContextBE DecodeContext;
+ typedef search::bitcompression::FeatureEncodeContextBE EncodeContext;
+ DecodeContext _decodeContext;
+ uint32_t _docIdLimit;
+
+ FakeFilterOccZCSkipArrayIterator(const uint64_t *compressed,
+ int bitOffset,
+ uint32_t docIdLimit,
+ const TermFieldMatchDataArray &matchData);
+
+ ~FakeFilterOccZCSkipArrayIterator(void);
+
+ void doL4SkipSeek(uint32_t docId);
+ void doL3SkipSeek(uint32_t docId);
+ void doL2SkipSeek(uint32_t docId);
+ void doL1SkipSeek(uint32_t docId);
+
+ void doUnpack(uint32_t docId) override;
+ void doSeek(uint32_t docId) override;
+ void initRange(uint32_t begin, uint32_t end) override;
+ Trinary is_strict() const override { return Trinary::True; }
+};
+
+
+template <bool doSkip>
+FakeFilterOccZCSkipArrayIterator<doSkip>::
+FakeFilterOccZCSkipArrayIterator(const uint64_t *compressed,
+ int bitOffset,
+ uint32_t docIdLimit,
+ const fef::TermFieldMatchDataArray &matchData)
+ : queryeval::RankedSearchIteratorBase(matchData),
+ _valI(NULL),
+ _lastDocId(0),
+ _l1SkipDocId(0),
+ _l2SkipDocId(0),
+ _l3SkipDocId(0),
+ _l4SkipDocId(0),
+ _l1SkipDocIdPos(NULL),
+ _l1SkipValI(NULL),
+ _valIBase(NULL),
+ _l1SkipValIBase(NULL),
+ _l2SkipDocIdPos(NULL),
+ _l2SkipValI(NULL),
+ _l2SkipL1SkipPos(NULL),
+ _l2SkipValIBase(NULL),
+ _l3SkipDocIdPos(NULL),
+ _l3SkipValI(NULL),
+ _l3SkipL1SkipPos(NULL),
+ _l3SkipL2SkipPos(NULL),
+ _l3SkipValIBase(NULL),
+ _l4SkipDocIdPos(NULL),
+ _l4SkipValI(NULL),
+ _l4SkipL1SkipPos(NULL),
+ _l4SkipL2SkipPos(NULL),
+ _l4SkipL3SkipPos(NULL),
+ _decodeContext(compressed, bitOffset),
+ _docIdLimit(docIdLimit)
+{
+}
+
+template <bool doSkip>
+void
+FakeFilterOccZCSkipArrayIterator<doSkip>::
+initRange(uint32_t begin, uint32_t end)
+{
+ queryeval::RankedSearchIteratorBase::initRange(begin, end);
+ DecodeContext &d = _decodeContext;
+ typedef EncodeContext EC;
+ UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
+ uint32_t length;
+ uint64_t val64;
+
+ UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC);
+ uint32_t numDocs = static_cast<uint32_t>(val64) + 1;
+
+ uint32_t docIdK = EC::calcDocIdK(numDocs, _docIdLimit);
+
+ UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DOCIDSSIZE, EC);
+ uint32_t docIdsSize = val64 + 1;
+ UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L1SKIPSIZE, EC);
+ uint32_t l1SkipSize = val64;
+ uint32_t l2SkipSize = 0;
+ if (l1SkipSize != 0) {
+ UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L2SKIPSIZE, EC);
+ l2SkipSize = val64;
+ }
+ uint32_t l3SkipSize = 0;
+ if (l2SkipSize != 0) {
+ UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L3SKIPSIZE, EC);
+ l3SkipSize = val64;
+ }
+ uint32_t l4SkipSize = 0;
+ if (l3SkipSize != 0) {
+ UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC);
+ l4SkipSize = val64;
+ }
+ // Feature size would be here
+ UC64BE_DECODEEXPGOLOMB_NS(o, docIdK, EC);
+ _lastDocId = _docIdLimit - 1 - val64;
+ UC64_DECODECONTEXT_STORE(o, d._);
+ uint64_t bytePad = oPreRead & 7;
+ if (bytePad > 0) {
+ length = bytePad;
+ oVal <<= length;
+ UC64BE_READBITS_NS(o, EC);
+ }
+ UC64_DECODECONTEXT_STORE(o, d._);
+ assert((d.getBitOffset() & 7) == 0);
+ const uint8_t *bcompr = d.getByteCompr();
+ _valIBase = _valI = bcompr;
+ _l1SkipDocIdPos = _l2SkipDocIdPos = bcompr;
+ _l3SkipDocIdPos = _l4SkipDocIdPos = bcompr;
+ bcompr += docIdsSize;
+ if (l1SkipSize != 0) {
+ _l1SkipValIBase = _l1SkipValI = bcompr;
+ _l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos = bcompr;
+ bcompr += l1SkipSize;
+ } else {
+ _l1SkipValIBase = _l1SkipValI = NULL;
+ _l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos = NULL;
+ }
+ if (l2SkipSize != 0) {
+ _l2SkipValIBase = _l2SkipValI = bcompr;
+ _l3SkipL2SkipPos = _l4SkipL2SkipPos = bcompr;
+ bcompr += l2SkipSize;
+ } else {
+ _l2SkipValIBase = _l2SkipValI = NULL;
+ _l3SkipL2SkipPos = _l4SkipL2SkipPos = NULL;
+ }
+ if (l3SkipSize != 0) {
+ _l3SkipValIBase = _l3SkipValI = bcompr;
+ _l4SkipL3SkipPos = bcompr;
+ bcompr += l3SkipSize;
+ } else {
+ _l3SkipValIBase = _l3SkipValI = NULL;
+ _l4SkipL3SkipPos = NULL;
+ }
+ if (l4SkipSize != 0) {
+ _l4SkipValI = bcompr;
+ bcompr += l4SkipSize;
+ } else {
+ _l4SkipValI = NULL;
+ }
+ d.setByteCompr(bcompr);
+ uint32_t oDocId;
+ ZCDECODE(_valI, oDocId = 1 +);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("DecodeInit docId=%d\n",
+ oDocId);
+#endif
+ setDocId(oDocId);
+ if (_l1SkipValI != NULL) {
+ ZCDECODE(_l1SkipValI, _l1SkipDocId = 1 +);
+ } else
+ _l1SkipDocId = _lastDocId;
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L1DecodeInit docId=%d\n",
+ _l1SkipDocId);
+#endif
+ if (_l2SkipValI != NULL) {
+ ZCDECODE(_l2SkipValI, _l2SkipDocId = 1 +);
+ } else
+ _l2SkipDocId = _lastDocId;
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L2DecodeInit docId=%d\n",
+ _l2SkipDocId);
+#endif
+ if (_l3SkipValI != NULL) {
+ ZCDECODE(_l3SkipValI, _l3SkipDocId = 1 +);
+ } else
+ _l3SkipDocId = _lastDocId;
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L3DecodeInit docId=%d\n",
+ _l3SkipDocId);
+#endif
+ if (_l4SkipValI != NULL) {
+ ZCDECODE(_l4SkipValI, _l4SkipDocId = 1 +);
+ } else
+ _l4SkipDocId = _lastDocId;
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L4DecodeInit docId=%d\n",
+ _l4SkipDocId);
+#endif
+ clearUnpacked();
+}
+
+
+template <bool doSkip>
+FakeFilterOccZCSkipArrayIterator<doSkip>::
+~FakeFilterOccZCSkipArrayIterator(void)
+{
+}
+
+
+template <>
+void
+FakeFilterOccZCSkipArrayIterator<true>::doL4SkipSeek(uint32_t docId)
+{
+ uint32_t lastL4SkipDocId;
+
+ if (__builtin_expect(docId > _lastDocId, false)) {
+ _l4SkipDocId = _l3SkipDocId = _l2SkipDocId = _l1SkipDocId = search::endDocId;
+ setAtEnd();
+ return;
+ }
+ do {
+ lastL4SkipDocId = _l4SkipDocId;
+ ZCDECODE(_l4SkipValI, _l4SkipDocIdPos += 1 +);
+ ZCDECODE(_l4SkipValI, _l4SkipL1SkipPos += 1 + );
+ ZCDECODE(_l4SkipValI, _l4SkipL2SkipPos += 1 + );
+ ZCDECODE(_l4SkipValI, _l4SkipL3SkipPos += 1 + );
+ ZCDECODE(_l4SkipValI, _l4SkipDocId += 1 + );
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L4Decode docId %d, docIdPos %d,"
+ " l1SkipPos %d, l2SkipPos %d, l3SkipPos %d, nextDocId %d\n",
+ lastL4SkipDocId,
+ (int) (_l4SkipDocIdPos - _valIBase),
+ (int) (_l4SkipL1SkipPos - _l1SkipValIBase),
+ (int) (_l4SkipL2SkipPos - _l2SkipValIBase),
+ (int) (_l4SkipL3SkipPos - _l3SkipValIBase),
+ _l4SkipDocId);
+#endif
+ } while (docId > _l4SkipDocId);
+ _valI = _l1SkipDocIdPos = _l2SkipDocIdPos = _l3SkipDocIdPos =
+ _l4SkipDocIdPos;
+ _l1SkipDocId = _l2SkipDocId = _l3SkipDocId = lastL4SkipDocId;
+ _l1SkipValI = _l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos;
+ _l2SkipValI = _l3SkipL2SkipPos = _l4SkipL2SkipPos;
+ _l3SkipValI = _l4SkipL3SkipPos;
+ ZCDECODE(_valI, lastL4SkipDocId += 1 +);
+ ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +);
+ ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 +);
+ ZCDECODE(_l3SkipValI, _l3SkipDocId += 1 +);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L4Seek, docId %d docIdPos %d"
+ " L1SkipPos %d L2SkipPos %d L3SkipPos %d, nextDocId %d\n",
+ lastL4SkipDocId,
+ (int) (_l4SkipDocIdPos - _valIBase),
+ (int) (_l4SkipL1SkipPos - _l1SkipValIBase),
+ (int) (_l4SkipL2SkipPos - _l2SkipValIBase),
+ (int) (_l4SkipL3SkipPos - _l3SkipValIBase),
+ _l4SkipDocId);
+#endif
+ setDocId(lastL4SkipDocId);
+}
+
+
+template <>
+void
+FakeFilterOccZCSkipArrayIterator<true>::doL3SkipSeek(uint32_t docId)
+{
+ uint32_t lastL3SkipDocId;
+
+ if (__builtin_expect(docId > _l4SkipDocId, false)) {
+ doL4SkipSeek(docId);
+ if (docId <= _l3SkipDocId)
+ return;
+ }
+ do {
+ lastL3SkipDocId = _l3SkipDocId;
+ ZCDECODE(_l3SkipValI, _l3SkipDocIdPos += 1 +);
+ ZCDECODE(_l3SkipValI, _l3SkipL1SkipPos += 1 + );
+ ZCDECODE(_l3SkipValI, _l3SkipL2SkipPos += 1 + );
+ ZCDECODE(_l3SkipValI, _l3SkipDocId += 1 + );
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L3Decode docId %d, docIdPos %d,"
+ " l1SkipPos %d, l2SkipPos %d, nextDocId %d\n",
+ lastL3SkipDocId,
+ (int) (_l3SkipDocIdPos - _valIBase),
+ (int) (_l3SkipL1SkipPos - _l1SkipValIBase),
+ (int) (_l3SkipL2SkipPos - _l2SkipValIBase),
+ _l3SkipDocId);
+#endif
+ } while (docId > _l3SkipDocId);
+ _valI = _l1SkipDocIdPos = _l2SkipDocIdPos = _l3SkipDocIdPos;
+ _l1SkipDocId = _l2SkipDocId = lastL3SkipDocId;
+ _l1SkipValI = _l2SkipL1SkipPos = _l3SkipL1SkipPos;
+ _l2SkipValI = _l3SkipL2SkipPos;
+ ZCDECODE(_valI, lastL3SkipDocId += 1 +);
+ ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +);
+ ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 +);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L3Seek, docId %d docIdPos %d"
+ " L1SkipPos %d L2SkipPos %d, nextDocId %d\n",
+ lastL3SkipDocId,
+ (int) (_l3SkipDocIdPos - _valIBase),
+ (int) (_l3SkipL1SkipPos - _l1SkipValIBase),
+ (int) (_l3SkipL2SkipPos - _l2SkipValIBase),
+ _l3SkipDocId);
+#endif
+ setDocId(lastL3SkipDocId);
+}
+
+
+template <>
+void
+FakeFilterOccZCSkipArrayIterator<true>::doL2SkipSeek(uint32_t docId)
+{
+ uint32_t lastL2SkipDocId;
+
+ if (__builtin_expect(docId > _l3SkipDocId, false)) {
+ doL3SkipSeek(docId);
+ if (docId <= _l2SkipDocId)
+ return;
+ }
+ do {
+ lastL2SkipDocId = _l2SkipDocId;
+ ZCDECODE(_l2SkipValI, _l2SkipDocIdPos += 1 +);
+ ZCDECODE(_l2SkipValI, _l2SkipL1SkipPos += 1 + );
+ ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 + );
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L2Decode docId %d, docIdPos %d, l1SkipPos %d, nextDocId %d\n",
+ lastL2SkipDocId,
+ (int) (_l2SkipDocIdPos - _valIBase),
+ (int) (_l2SkipL1SkipPos - _l1SkipValIBase),
+ _l2SkipDocId);
+#endif
+ } while (docId > _l2SkipDocId);
+ _valI = _l1SkipDocIdPos = _l2SkipDocIdPos;
+ _l1SkipDocId = lastL2SkipDocId;
+ _l1SkipValI = _l2SkipL1SkipPos;
+ ZCDECODE(_valI, lastL2SkipDocId += 1 +);
+ ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L2Seek, docId %d docIdPos %d L1SkipPos %d, nextDocId %d\n",
+ lastL2SkipDocId,
+ (int) (_l2SkipDocIdPos - _valIBase),
+ (int) (_l2SkipL1SkipPos - _l1SkipValIBase),
+ _l2SkipDocId);
+#endif
+ setDocId(lastL2SkipDocId);
+}
+
+
+template <>
+void
+FakeFilterOccZCSkipArrayIterator<false>::doL1SkipSeek(uint32_t docId)
+{
+ (void) docId;
+}
+
+
+template <>
+void
+FakeFilterOccZCSkipArrayIterator<true>::doL1SkipSeek(uint32_t docId)
+{
+ uint32_t lastL1SkipDocId;
+ if (__builtin_expect(docId > _l2SkipDocId, false)) {
+ doL2SkipSeek(docId);
+ if (docId <= _l1SkipDocId)
+ return;
+ }
+ do {
+ lastL1SkipDocId = _l1SkipDocId;
+ ZCDECODE(_l1SkipValI, _l1SkipDocIdPos += 1 +);
+ ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L1Decode docId %d, docIdPos %d, L1SkipPos %d, nextDocId %d\n",
+ lastL1SkipDocId,
+ (int) (_l1SkipDocIdPos - _valIBase),
+ (int) (_l1SkipValI - _l1SkipValIBase),
+ _l1SkipDocId);
+#endif
+ } while (docId > _l1SkipDocId);
+ _valI = _l1SkipDocIdPos;
+ ZCDECODE(_valI, lastL1SkipDocId += 1 +);
+ setDocId(lastL1SkipDocId);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L1SkipSeek, docId %d docIdPos %d, nextDocId %d\n",
+ lastL1SkipDocId,
+ (int) (_l1SkipDocIdPos - _valIBase),
+ _l1SkipDocId);
+#endif
+}
+
+
+template <bool doSkip>
+void
+FakeFilterOccZCSkipArrayIterator<doSkip>::doSeek(uint32_t docId)
+{
+ if (getUnpacked())
+ clearUnpacked();
+ if (doSkip && docId > _l1SkipDocId) {
+ doL1SkipSeek(docId);
+ }
+ uint32_t oDocId = getDocId();
+ if (doSkip) {
+#if DEBUG_ZCFILTEROCC_ASSERT
+ assert(oDocId <= _l1SkipDocId);
+ assert(docId <= _l1SkipDocId);
+ assert(oDocId <= _l2SkipDocId);
+ assert(docId <= _l2SkipDocId);
+ assert(oDocId <= _l3SkipDocId);
+ assert(docId <= _l3SkipDocId);
+ assert(oDocId <= _l4SkipDocId);
+ assert(docId <= _l4SkipDocId);
+#endif
+ }
+ const uint8_t *oCompr = _valI;
+ while (__builtin_expect(oDocId < docId, true)) {
+ if (!doSkip) {
+ if (__builtin_expect(oDocId >= _lastDocId, false)) {
+#if DEBUG_ZCFILTEROCC_ASSERT
+ assert(_l1SkipDocId == _lastDocId);
+ assert(_l2SkipDocId == _lastDocId);
+ assert(_l3SkipDocId == _lastDocId);
+ assert(_l4SkipDocId == _lastDocId);
+#endif
+ oDocId = _l1SkipDocId = _l2SkipDocId = _l3SkipDocId =
+ _l4SkipDocId = search::endDocId;
+ break;
+ }
+ }
+ if (doSkip) {
+#if DEBUG_ZCFILTEROCC_ASSERT
+ assert(oDocId <= _l1SkipDocId);
+ assert(oDocId <= _l2SkipDocId);
+ assert(oDocId <= _l3SkipDocId);
+ assert(oDocId <= _l4SkipDocId);
+#endif
+ } else if (__builtin_expect(oDocId >= _l1SkipDocId, false)) {
+ // Validate L1 Skip information
+ assert(oDocId == _l1SkipDocId);
+ ZCDECODE(_l1SkipValI, _l1SkipDocIdPos += 1 +);
+ assert(oCompr == _l1SkipDocIdPos);
+ if (__builtin_expect(oDocId >= _l2SkipDocId, false)) {
+ // Validate L2 Skip information
+ assert(oDocId == _l2SkipDocId);
+ ZCDECODE(_l2SkipValI, _l2SkipDocIdPos += 1 +);
+ ZCDECODE(_l2SkipValI, _l2SkipL1SkipPos += 1 +);
+ assert(oCompr = _l2SkipDocIdPos);
+ assert(_l1SkipValI == _l2SkipL1SkipPos);
+ if (__builtin_expect(oDocId >= _l3SkipDocId, false)) {
+ // Validate L3 Skip information
+ assert(oDocId == _l3SkipDocId);
+ ZCDECODE(_l3SkipValI, _l3SkipDocIdPos += 1 +);
+ ZCDECODE(_l3SkipValI, _l3SkipL1SkipPos += 1 +);
+ ZCDECODE(_l3SkipValI, _l3SkipL2SkipPos += 1 +);
+ assert(oCompr = _l3SkipDocIdPos);
+ assert(_l1SkipValI == _l3SkipL1SkipPos);
+ assert(_l2SkipValI == _l3SkipL2SkipPos);
+ if (__builtin_expect(oDocId >= _l4SkipDocId, false)) {
+ // Validate L4 Skip information
+ assert(oDocId == _l4SkipDocId);
+ ZCDECODE(_l4SkipValI, _l4SkipDocIdPos += 1 +);
+ ZCDECODE(_l4SkipValI, _l4SkipL1SkipPos += 1 +);
+ ZCDECODE(_l4SkipValI, _l4SkipL2SkipPos += 1 +);
+ ZCDECODE(_l4SkipValI, _l4SkipL3SkipPos += 1 +);
+ assert(oCompr = _l4SkipDocIdPos);
+ assert(_l1SkipValI == _l4SkipL1SkipPos);
+ assert(_l2SkipValI == _l4SkipL2SkipPos);
+ assert(_l3SkipValI == _l4SkipL3SkipPos);
+ ZCDECODE(_l4SkipValI, _l4SkipDocId += 1 +);
+ assert(_l4SkipDocId <= _lastDocId);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L4DecodeV docId=%d docIdPos=%d"
+ " L1SkipPos=%d L2SkipPos %d L3SkipPos %d\n",
+ _l4SkipDocId,
+ (int) (_l4SkipDocIdPos - _valIBase),
+ (int) (_l4SkipL1SkipPos - _l1SkipValIBase),
+ (int) (_l4SkipL2SkipPos - _l2SkipValIBase),
+ (int) (_l4SkipL3SkipPos - _l3SkipValIBase));
+#endif
+ }
+ ZCDECODE(_l3SkipValI, _l3SkipDocId += 1 +);
+ assert(_l3SkipDocId <= _lastDocId);
+ assert(_l3SkipDocId <= _l4SkipDocId);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L3DecodeV docId=%d docIdPos=%d"
+ " L1SkipPos=%d L2SkipPos %d\n",
+ _l3SkipDocId,
+ (int) (_l3SkipDocIdPos - _valIBase),
+ (int) (_l3SkipL1SkipPos - _l1SkipValIBase),
+ (int) (_l3SkipL2SkipPos - _l2SkipValIBase));
+#endif
+ }
+ ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 +);
+ assert(_l2SkipDocId <= _lastDocId);
+ assert(_l2SkipDocId <= _l4SkipDocId);
+ assert(_l2SkipDocId <= _l3SkipDocId);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L2DecodeV docId=%d docIdPos=%d L1SkipPos=%d\n",
+ _l2SkipDocId,
+ (int) (_l2SkipDocIdPos - _valIBase),
+ (int) (_l2SkipL1SkipPos - _l1SkipValIBase));
+#endif
+ }
+ ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +);
+ assert(_l1SkipDocId <= _lastDocId);
+ assert(_l1SkipDocId <= _l4SkipDocId);
+ assert(_l1SkipDocId <= _l3SkipDocId);
+ assert(_l1SkipDocId <= _l2SkipDocId);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("L1DecodeV docId=%d, docIdPos=%d\n",
+ _l1SkipDocId,
+ (int) (_l1SkipDocIdPos - _valIBase));
+#endif
+ }
+ ZCDECODE(oCompr, oDocId += 1 +);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("Decode docId=%d\n",
+ oDocId);
+#endif
+ }
+ _valI = oCompr;
+ setDocId(oDocId);
+ return;
+}
+
+
+template <bool doSkip>
+void
+FakeFilterOccZCSkipArrayIterator<doSkip>::doUnpack(uint32_t docId)
+{
+ if (_matchData.size() != 1 || getUnpacked()) {
+ return;
+ }
+ assert(docId == getDocId());
+ _matchData[0]->reset(docId);
+ setUnpacked();
+}
+
+
+template <bool doSkip>
+SearchIterator *
+FakeZcSkipFilterOcc<doSkip>::
+createIterator(const TermFieldMatchDataArray &matchData) const
+{
+ return new FakeFilterOccZCSkipArrayIterator<doSkip>(_compressed.first,
+ 0,
+ _docIdLimit,
+ matchData);
+}
+
+
+template <bool bigEndian>
+class FakeEGCompr64PosOcc : public FakeZcFilterOcc
+{
+public:
+ FakeEGCompr64PosOcc(const FakeWord &fw);
+
+ ~FakeEGCompr64PosOcc(void);
+
+ void
+ setup(const FakeWord &fw);
+
+ size_t
+ bitSize(void) const;
+
+ virtual bool
+ hasWordPositions(void) const;
+
+ virtual SearchIterator *
+ createIterator(const TermFieldMatchDataArray &matchData) const;
+};
+
+
+template <bool bigEndian>
+FakeEGCompr64PosOcc<bigEndian>::FakeEGCompr64PosOcc(const FakeWord &fw)
+ : FakeZcFilterOcc(fw, bigEndian,
+ bigEndian ? ".zcposoccbe" : ".zcposoccle")
+{
+ setup(fw);
+}
+
+
+template <bool bigEndian>
+FakeEGCompr64PosOcc<bigEndian>::~FakeEGCompr64PosOcc(void)
+{
+}
+
+
+template <bool bigEndian>
+void
+FakeEGCompr64PosOcc<bigEndian>::setup(const FakeWord &fw)
+{
+ uint32_t lastDocId = 0u;
+
+ typedef FakeWord FW;
+ typedef FW::DocWordFeatureList DWFL;
+ typedef FW::DocWordPosFeatureList DWPFL;
+
+ DWFL::const_iterator d(fw._postings.begin());
+ DWFL::const_iterator de(fw._postings.end());
+ DWPFL::const_iterator p(fw._wordPosFeatures.begin());
+ DWPFL::const_iterator pe(fw._wordPosFeatures.end());
+ DocIdAndPosOccFeatures features;
+ EGPosOccEncodeContext<bigEndian> e(&_fieldsParams);
+ ComprFileWriteContext ectx(e);
+ e.setWriteContext(&ectx);
+ ectx.allocComprBuf(64, 1);
+ e.afterWrite(ectx, 0, 0);
+
+ _hitDocs = fw._postings.size();
+ _docIdLimit = fw._docIdLimit;
+ if (_hitDocs > 0)
+ _lastDocId = fw._postings.back()._docId;
+ else
+ _lastDocId = 0u;
+ e.encodeExpGolomb(_hitDocs - 1, K_VALUE_ZCPOSTING_NUMDOCS);
+
+ uint32_t docIdK = e.calcDocIdK(_hitDocs, _docIdLimit);
+
+ while (d != de) {
+ e.encodeExpGolomb(d->_docId - lastDocId - 1, docIdK);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("Encode docId=%d (+%u + 1)\n",
+ d->_docId, d->_docId - lastDocId - 1);
+#endif
+ fw.setupFeatures(*d, &*p, features);
+ p += d->_positions;
+ e.writeFeatures(features);
+ lastDocId = d->_docId;
+ ++d;
+ }
+ assert(p == pe);
+
+ _compressedBits = e.getWriteOffset();
+
+ // First pad to 64 bits.
+ uint32_t pad = (64 - e.getWriteOffset()) & 63;
+ while (pad > 0) {
+ uint32_t now = std::min(32u, pad);
+ e.writeBits(0, now);
+ e.writeComprBufferIfNeeded();
+ pad -= now;
+ }
+
+ // Then write 128 more bits. This allows for 64-bit decoding
+ // with a readbits that always leaves a nonzero preRead
+ for (unsigned int i = 0; i < 4; i++) {
+ e.writeBits(0, 32);
+ e.writeComprBufferIfNeeded();
+ }
+ e.writeComprBufferIfNeeded();
+ e.flush();
+ e.writeComprBuffer();
+
+ std::pair<void *, size_t> ectxData =
+ ectx.grabComprBuffer(_compressedMalloc);
+ _compressed = std::make_pair(static_cast<uint64_t *>(ectxData.first),
+ ectxData.second);
+}
+
+
+template <bool bigEndian>
+size_t
+FakeEGCompr64PosOcc<bigEndian>::bitSize(void) const
+{
+ return _compressedBits;
+}
+
+
+template <bool bigEndian>
+bool
+FakeEGCompr64PosOcc<bigEndian>::hasWordPositions(void) const
+{
+ return true;
+}
+
+
+template <bool bigEndian>
+SearchIterator *
+FakeEGCompr64PosOcc<bigEndian>::
+createIterator(const TermFieldMatchDataArray &matchData) const
+{
+ return new ZcRareWordPosOccIterator<bigEndian>(Position(_compressed.first, 0),
+ _compressedBits, _docIdLimit, &_fieldsParams, matchData);
+}
+
+
+template <bool bigEndian>
+class FakeEG2Compr64PosOcc : public FakeZcFilterOcc
+{
+public:
+ FakeEG2Compr64PosOcc(const FakeWord &fw);
+
+ ~FakeEG2Compr64PosOcc(void);
+
+ void
+ setup(const FakeWord &fw);
+
+ size_t
+ bitSize(void) const;
+
+ virtual bool
+ hasWordPositions(void) const;
+
+ virtual SearchIterator *
+ createIterator(const fef::TermFieldMatchDataArray &matchData) const;
+};
+
+
+template <bool bigEndian>
+FakeEG2Compr64PosOcc<bigEndian>::FakeEG2Compr64PosOcc(const FakeWord &fw)
+ : FakeZcFilterOcc(fw, bigEndian,
+ bigEndian ? ".zc2posoccbe" : ".zc2posoccle")
+{
+ setup(fw);
+}
+
+
+template <bool bigEndian>
+FakeEG2Compr64PosOcc<bigEndian>::~FakeEG2Compr64PosOcc(void)
+{
+}
+
+
+template <bool bigEndian>
+void
+FakeEG2Compr64PosOcc<bigEndian>::setup(const FakeWord &fw)
+{
+ uint32_t lastDocId = 0u;
+
+ typedef FakeWord FW;
+ typedef FW::DocWordFeatureList DWFL;
+ typedef FW::DocWordPosFeatureList DWPFL;
+
+ DWFL::const_iterator d(fw._postings.begin());
+ DWFL::const_iterator de(fw._postings.end());
+ DWPFL::const_iterator p(fw._wordPosFeatures.begin());
+ DWPFL::const_iterator pe(fw._wordPosFeatures.end());
+ DocIdAndPosOccFeatures features;
+ EG2PosOccEncodeContext<bigEndian> e(&_fieldsParams);
+ ComprFileWriteContext ectx(e);
+ e.setWriteContext(&ectx);
+ ectx.allocComprBuf(64, 1);
+ e.afterWrite(ectx, 0, 0);
+
+ _hitDocs = fw._postings.size();
+ _docIdLimit = fw._docIdLimit;
+ if (_hitDocs > 0)
+ _lastDocId = fw._postings.back()._docId;
+ else
+ _lastDocId = 0u;
+ e.encodeExpGolomb(_hitDocs - 1, K_VALUE_ZCPOSTING_NUMDOCS);
+
+ while (d != de) {
+ e.encodeExpGolomb(d->_docId - lastDocId - 1,
+ K_VALUE_ZCPOSTING_DELTA_DOCID);
+#if DEBUG_ZCFILTEROCC_PRINTF
+ printf("Encode docId=%d (+%u + 1)\n",
+ d->_docId, d->_docId - lastDocId - 1);
+#endif
+ fw.setupFeatures(*d, &*p, features);
+ p += d->_positions;
+ e.writeFeatures(features);
+ lastDocId = d->_docId;
+ ++d;
+ }
+ assert(p == pe);
+
+ _compressedBits = e.getWriteOffset();
+
+ // First pad to 64 bits.
+ uint32_t pad = (64 - e.getWriteOffset()) & 63;
+ while (pad > 0) {
+ uint32_t now = std::min(32u, pad);
+ e.writeBits(0, now);
+ e.writeComprBufferIfNeeded();
+ pad -= now;
+ }
+
+ // Then write 128 more bits. This allows for 64-bit decoding
+ // with a readbits that always leaves a nonzero preRead
+ for (unsigned int i = 0; i < 4; i++) {
+ e.writeBits(0, 32);
+ e.writeComprBufferIfNeeded();
+ }
+ e.writeComprBufferIfNeeded();
+ e.flush();
+ e.writeComprBuffer();
+
+ std::pair<void *, size_t> ectxData =
+ ectx.grabComprBuffer(_compressedMalloc);
+ _compressed = std::make_pair(static_cast<uint64_t *>(ectxData.first),
+ ectxData.second);
+}
+
+
+template <bool bigEndian>
+size_t
+FakeEG2Compr64PosOcc<bigEndian>::bitSize(void) const
+{
+ return _compressedBits;
+}
+
+
+template <bool bigEndian>
+bool
+FakeEG2Compr64PosOcc<bigEndian>::hasWordPositions(void) const
+{
+ return true;
+}
+
+
+template <bool bigEndian>
+SearchIterator *
+FakeEG2Compr64PosOcc<bigEndian>::
+createIterator(const TermFieldMatchDataArray &matchData) const
+{
+ return new Zc4RareWordPosOccIterator<bigEndian>(Position(_compressed.first, 0),
+ _compressedBits, _docIdLimit, &_fieldsParams, matchData);
+}
+
+
+template <bool bigEndian>
+class FakeZcSkipPosOcc : public FakeZcFilterOcc
+{
+ search::index::PostingListCounts _counts;
+public:
+ FakeZcSkipPosOcc(const FakeWord &fw);
+
+ ~FakeZcSkipPosOcc(void);
+
+ size_t
+ bitSize(void) const;
+
+ virtual bool
+ hasWordPositions(void) const;
+
+ virtual SearchIterator *
+ createIterator(const TermFieldMatchDataArray &matchData) const;
+};
+
+
+template <bool bigEndian>
+FakeZcSkipPosOcc<bigEndian>::FakeZcSkipPosOcc(const FakeWord &fw)
+ : FakeZcFilterOcc(fw, bigEndian,
+ bigEndian ? ".zcskipposoccbe" : ".zcskipposoccle")
+{
+ setup(fw, true, true);
+ _counts._bitLength = _compressedBits;
+}
+
+
+template <bool bigEndian>
+FakeZcSkipPosOcc<bigEndian>::~FakeZcSkipPosOcc(void)
+{
+}
+
+
+template <bool bigEndian>
+size_t
+FakeZcSkipPosOcc<bigEndian>::bitSize(void) const
+{
+ return _compressedBits -
+ _l1SkipSize - _l2SkipSize - _l3SkipSize - _l4SkipSize;
+}
+
+
+template <bool bigEndian>
+bool
+FakeZcSkipPosOcc<bigEndian>::hasWordPositions(void) const
+{
+ return true;
+}
+
+
+template <bool bigEndian>
+SearchIterator *
+FakeZcSkipPosOcc<bigEndian>::
+createIterator(const TermFieldMatchDataArray &matchData) const
+{
+ return new ZcPosOccIterator<bigEndian>(Position(_compressed.first, 0), _compressedBits, _docIdLimit,
+ static_cast<uint32_t>(-1),
+ _counts,
+ &_fieldsParams,
+ matchData);
+}
+
+
+template <bool bigEndian>
+class FakeZc2SkipPosOcc : public FakeZcFilterOcc
+{
+ search::index::PostingListCounts _counts;
+public:
+ FakeZc2SkipPosOcc(const FakeWord &fw);
+
+ ~FakeZc2SkipPosOcc(void);
+
+ size_t
+ bitSize(void) const;
+
+ virtual bool
+ hasWordPositions(void) const;
+
+ virtual SearchIterator *
+ createIterator(const TermFieldMatchDataArray &matchData) const;
+};
+
+
+template <bool bigEndian>
+FakeZc2SkipPosOcc<bigEndian>::FakeZc2SkipPosOcc(const FakeWord &fw)
+ : FakeZcFilterOcc(fw, bigEndian,
+ bigEndian ? ".zc2skipposoccbe" : ".zc2skipposoccle")
+{
+ setup(fw, true, false);
+ _counts._bitLength = _compressedBits;
+}
+
+
+template <bool bigEndian>
+FakeZc2SkipPosOcc<bigEndian>::~FakeZc2SkipPosOcc(void)
+{
+}
+
+
+template <bool bigEndian>
+size_t
+FakeZc2SkipPosOcc<bigEndian>::bitSize(void) const
+{
+ return _compressedBits -
+ _l1SkipSize - _l2SkipSize - _l3SkipSize - _l4SkipSize;
+}
+
+
+template <bool bigEndian>
+bool
+FakeZc2SkipPosOcc<bigEndian>::hasWordPositions(void) const
+{
+ return true;
+}
+
+
+template <bool bigEndian>
+SearchIterator *
+FakeZc2SkipPosOcc<bigEndian>::
+createIterator(const TermFieldMatchDataArray &matchData) const
+{
+ return new Zc4PosOccIterator<bigEndian>(Position(_compressed.first, 0), _compressedBits, _docIdLimit,
+ static_cast<uint32_t>(-1), _counts, &_fieldsParams, matchData);
+}
+
+
+static FPFactoryInit
+initPosbe(std::make_pair("EGCompr64PosOccBE",
+ makeFPFactory<FPFactoryT<FakeEGCompr64PosOcc<true> > >));
+
+static FPFactoryInit
+initPosle(std::make_pair("EGCompr64PosOccLE",
+ makeFPFactory<FPFactoryT<FakeEGCompr64PosOcc<false> > >));
+
+
+static FPFactoryInit
+initPos0be(std::make_pair("EG2Compr64PosOccBE",
+ makeFPFactory<FPFactoryT<FakeEG2Compr64PosOcc<true> > >));
+
+
+static FPFactoryInit
+initPos0le(std::make_pair("EG2Compr64PosOccLE",
+ makeFPFactory<FPFactoryT<FakeEG2Compr64PosOcc<false> > >));
+
+
+static FPFactoryInit
+initSkipPosbe(std::make_pair("ZcSkipPosOccBE",
+ makeFPFactory<FPFactoryT<FakeZcSkipPosOcc<true> > >));
+
+
+static FPFactoryInit
+initSkipPosle(std::make_pair("ZcSkipPosOccLE",
+ makeFPFactory<FPFactoryT<FakeZcSkipPosOcc<false> > >));
+
+
+static FPFactoryInit
+initSkipPos0be(std::make_pair("Zc2SkipPosOccBE",
+ makeFPFactory<FPFactoryT<FakeZc2SkipPosOcc<true> > >));
+
+
+static FPFactoryInit
+initSkipPos0le(std::make_pair("Zc2SkipPosOccLE",
+ makeFPFactory<FPFactoryT<FakeZc2SkipPosOcc<false> > >));
+
+
+} // namespace fakedata
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h
new file mode 100644
index 00000000000..0e1bcba7680
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h
@@ -0,0 +1,119 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+
+#include "fakeword.h"
+#include "fakeposting.h"
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/posocccompression.h>
+
+namespace search
+{
+
+namespace fakedata
+{
+
+/*
+ * YST style compression of docid list.
+ */
+class FakeZcFilterOcc : public FakePosting
+{
+protected:
+ size_t _docIdsSize;
+ size_t _l1SkipSize;
+ size_t _l2SkipSize;
+ size_t _l3SkipSize;
+ size_t _l4SkipSize;
+ unsigned int _docIdLimit;
+ unsigned int _hitDocs;
+ uint32_t _lastDocId;
+
+ uint64_t _compressedBits;
+ std::pair<uint64_t *, size_t> _compressed;
+ void *_compressedMalloc;
+ uint64_t _featuresSize;
+ const search::bitcompression::PosOccFieldsParams &_fieldsParams;
+ bool _bigEndian;
+protected:
+ void
+ setup(const FakeWord &fw, bool doFeatures, bool dynamicK);
+
+ template <bool bigEndian>
+ void
+ setupT(const FakeWord &fw, bool doFeatures, bool dynamicK);
+
+public:
+ FakeZcFilterOcc(const FakeWord &fw);
+
+ FakeZcFilterOcc(const FakeWord &fw,
+ bool bigEndian,
+ const char *nameSuffix);
+
+ ~FakeZcFilterOcc(void);
+
+ static void
+ forceLink(void);
+
+ /*
+ * Size of posting list, in bits.
+ */
+ size_t
+ bitSize(void) const;
+
+ virtual bool
+ hasWordPositions(void) const;
+
+ /*
+ * Size of posting skip list, in bits.
+ */
+ size_t
+ skipBitSize(void) const;
+
+ size_t
+ l1SkipBitSize(void) const;
+
+ size_t
+ l2SkipBitSize(void) const;
+
+ size_t
+ l3SkipBitSize(void) const;
+
+ size_t
+ l4SkipBitSize(void) const;
+
+ /*
+ * Single posting list performance, without feature unpack.
+ */
+ virtual int
+ lowLevelSinglePostingScan(void) const;
+
+ /*
+ * Single posting list performance, with feature unpack.
+ */
+ virtual int
+ lowLevelSinglePostingScanUnpack(void) const;
+
+ /*
+ * Two posting lists performance (same format) without feature unpack.
+ */
+ virtual int
+ lowLevelAndPairPostingScan(const FakePosting &rhs) const;
+
+ /*
+ * Two posting lists performance (same format) with feature unpack.
+ */
+ virtual int
+ lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const;
+
+
+ /*
+ * Iterator factory, for current query evaluation framework.
+ */
+ virtual search::queryeval::SearchIterator *
+ createIterator(const fef::TermFieldMatchDataArray &matchData) const;
+};
+
+} // namespace fakedata
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fpfactory.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fpfactory.cpp
new file mode 100644
index 00000000000..94ccd4cd891
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fpfactory.cpp
@@ -0,0 +1,120 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".fpfactory");
+#include "fakeegcompr64filterocc.h"
+#include "fakefilterocc.h"
+#include "fakezcbfilterocc.h"
+#include "fakezcfilterocc.h"
+#include "fakememtreeocc.h"
+#include "fpfactory.h"
+#include "fakewordset.h"
+
+namespace search
+{
+
+namespace fakedata
+{
+
+using index::Schema;
+
+FPFactory::~FPFactory(void)
+{
+}
+
+void
+FPFactory::setup(const FakeWordSet &fws)
+{
+ std::vector<const FakeWord *> v;
+
+ for (uint32_t wc = 0; wc < fws._words.size(); ++wc) {
+ std::vector<FakeWord *>::const_iterator fwi(fws._words[wc].begin());
+ std::vector<FakeWord *>::const_iterator fwe(fws._words[wc].end());
+ while (fwi != fwe) {
+ v.push_back(*fwi);
+ ++fwi;
+ }
+ }
+ setup(v);
+}
+
+
+void
+FPFactory::setup(const std::vector<const FakeWord *> &fws)
+{
+ (void) fws;
+}
+
+
+typedef std::map<const std::string, FPFactoryMaker *const>
+FPFactoryMap;
+
+static FPFactoryMap *fpFactoryMap = NULL;
+
+/*
+ * Posting list factory glue.
+ */
+
+FPFactory *
+getFPFactory(const std::string &name, const Schema &schema)
+{
+ if (fpFactoryMap == NULL)
+ return NULL;
+
+ FPFactoryMap::const_iterator i(fpFactoryMap->find(name));
+
+ if (i != fpFactoryMap->end())
+ return i->second(schema);
+ else
+ return NULL;
+}
+
+
+std::vector<std::string>
+getPostingTypes(void)
+{
+ std::vector<std::string> res;
+
+ if (fpFactoryMap != NULL)
+ for (FPFactoryMap::const_iterator i(fpFactoryMap->begin());
+ i != fpFactoryMap->end();
+ ++i)
+ res.push_back(i->first);
+ return res;
+}
+
+
+FPFactoryInit::FPFactoryInit(const FPFactoryMapEntry &fpFactoryMapEntry)
+ : _key(fpFactoryMapEntry.first)
+{
+ if (fpFactoryMap == NULL)
+ fpFactoryMap = new FPFactoryMap;
+ fpFactoryMap->insert(fpFactoryMapEntry);
+}
+
+FPFactoryInit::~FPFactoryInit(void)
+{
+ assert(fpFactoryMap != NULL);
+ size_t eraseRes = fpFactoryMap->erase(_key);
+ assert(eraseRes == 1);
+ (void) eraseRes;
+ if (fpFactoryMap->empty()) {
+ delete fpFactoryMap;
+ fpFactoryMap = NULL;
+ }
+}
+
+void
+FPFactoryInit::forceLink(void)
+{
+ FakeEGCompr64FilterOcc::forceLink();
+ FakeFilterOcc::forceLink();
+ FakeZcbFilterOcc::forceLink();
+ FakeZcFilterOcc::forceLink();
+ FakeMemTreeOcc::forceLink();
+};
+
+} // namespace fakedata
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fpfactory.h b/searchlib/src/vespa/searchlib/test/fakedata/fpfactory.h
new file mode 100644
index 00000000000..fe09e653e26
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fpfactory.h
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <map>
+#include <vector>
+#include <string>
+#include "fakeposting.h"
+
+namespace search
+{
+
+namespace fakedata
+{
+
+class FakeWord;
+class FakeWordSet;
+
+class FPFactory
+{
+public:
+ virtual
+ ~FPFactory(void);
+
+ virtual FakePosting::SP
+ make(const FakeWord &fw) = 0;
+
+ virtual void
+ setup(const FakeWordSet &fws);
+
+ virtual void
+ setup(const std::vector<const FakeWord *> &fws);
+};
+
+template<class P>
+class FPFactoryT : public FPFactory
+{
+public:
+ FPFactoryT(const index::Schema &schema)
+ : FPFactory()
+ {
+ (void) schema;
+ }
+
+ virtual FakePosting::SP
+ make(const FakeWord &fw)
+ {
+ return FakePosting::SP(new P(fw));
+ }
+};
+
+typedef FPFactory *(FPFactoryMaker)(const index::Schema &schema);
+
+typedef std::pair<const std::string, FPFactoryMaker *const>
+FPFactoryMapEntry;
+
+template <class F>
+static FPFactory *
+makeFPFactory(const index::Schema &schema)
+{
+ return new F(schema);
+}
+
+FPFactory *
+getFPFactory(const std::string &name, const index::Schema &schema);
+
+std::vector<std::string>
+getPostingTypes(void);
+
+class FPFactoryInit
+{
+ std::string _key;
+public:
+ FPFactoryInit(const FPFactoryMapEntry &fpFactoryMapEntry);
+
+ ~FPFactoryInit(void);
+
+ static void
+ forceLink(void);
+};
+
+} // namespace fakedata
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/test/initrange.cpp b/searchlib/src/vespa/searchlib/test/initrange.cpp
new file mode 100644
index 00000000000..30508915d3e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/initrange.cpp
@@ -0,0 +1,185 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "initrange.h"
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchlib/queryeval/truesearch.h>
+
+namespace search {
+namespace test {
+
+using namespace search::queryeval;
+using std::make_unique;
+
+class DocIdIterator : public SearchIterator
+{
+public:
+ DocIdIterator(const InitRangeVerifier::DocIds & docIds, bool strict) :
+ _strict(strict),
+ _currIndex(0),
+ _docIds(docIds)
+ { }
+
+ void initRange(uint32_t beginId, uint32_t endId) override {
+ SearchIterator::initRange(beginId, endId);
+ _currIndex = 0;
+ if (_strict) {
+ doSeek(beginId);
+ }
+ }
+
+ void doSeek(uint32_t docId) override {
+ while ((_currIndex < _docIds.size()) && (_docIds[_currIndex] < docId)) {
+ _currIndex++;
+ }
+ if ((_currIndex < _docIds.size()) && (_docIds[_currIndex] < getEndId())) {
+ if (_docIds[_currIndex] == docId || _strict) {
+ setDocId(_docIds[_currIndex]);
+ }
+ } else {
+ setAtEnd();
+ }
+ }
+
+ void doUnpack(uint32_t docid) { (void) docid; }
+
+ vespalib::Trinary is_strict() const override {
+ return _strict ? vespalib::Trinary::True : vespalib::Trinary::False;
+ }
+
+private:
+ const bool _strict;
+ uint32_t _currIndex;
+ const InitRangeVerifier::DocIds _docIds;
+};
+
+InitRangeVerifier::InitRangeVerifier() :
+ _trueTfmd(),
+ _docIds()
+{
+ // (0),1 and 10,11 and 20,21 .... 200,201 etc are hits
+ // 0 is of course invalid.
+ for (size_t i(0); (i*10+1) < getDocIdLimit(); i++) {
+ if (i > 0) {
+ _docIds.push_back(i * 10);
+ }
+ _docIds.push_back(i*10 + 1);
+ }
+}
+
+InitRangeVerifier::DocIds
+InitRangeVerifier::invert(const DocIds & docIds, uint32_t docIdlimit)
+{
+ DocIds inverted;
+ inverted.reserve(docIdlimit);
+ for (size_t i(1), next(0); i < docIdlimit; i++) {
+ if (next < docIds.size()) {
+ if (i >= docIds[next]) {
+ if (i == docIds[next++]) {
+ continue;
+ }
+ }
+ }
+ inverted.push_back(i);
+ }
+ return inverted;
+}
+
+SearchIterator::UP
+InitRangeVerifier::createIterator(const DocIds &docIds, bool strict) const
+{
+ return make_unique<DocIdIterator>(docIds, strict);
+}
+
+SearchIterator::UP
+InitRangeVerifier::createEmptyIterator() const
+{
+ return make_unique<EmptySearch>();
+}
+
+SearchIterator::UP
+InitRangeVerifier::createFullIterator() const
+{
+ return make_unique<TrueSearch>(_trueTfmd);
+}
+
+void
+InitRangeVerifier::verify(SearchIterator * iterator) const
+{
+ SearchIterator::UP up(iterator);
+ verify(*up);
+}
+
+void
+InitRangeVerifier::verify(SearchIterator & iterator) const
+{
+ ASSERT_TRUE(iterator.is_strict() != vespalib::Trinary::Undefined);
+ if (iterator.is_strict() == vespalib::Trinary::True) {
+ verify(iterator, true);
+ }
+ verify(iterator, false);
+}
+
+void
+InitRangeVerifier::verify(SearchIterator & iterator, bool strict) const
+{
+ verify(iterator, Ranges({{1, 202}}), strict);
+ verify(iterator, Ranges({{1, 202}}), strict);
+ for (uint32_t rangeWidth : { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 100, 202 }) {
+ Ranges ranges;
+ for (uint32_t sum(1); sum < getDocIdLimit(); sum += rangeWidth) {
+ ranges.emplace_back(sum, std::min(sum+rangeWidth, getDocIdLimit()));
+ }
+ verify(iterator, ranges, strict);
+ std::reverse(ranges.begin(), ranges.end());
+ verify(iterator, ranges, strict);
+ }
+}
+
+void
+InitRangeVerifier::verify(SearchIterator & iterator, const Ranges & ranges, bool strict) const
+{
+ DocIds result = search(iterator, ranges, strict);
+ ASSERT_EQUAL(_docIds.size(), result.size());
+ for (size_t i(0); i < _docIds.size(); i++) {
+ EXPECT_EQUAL(_docIds[i], result[i]);
+ }
+}
+
+InitRangeVerifier::DocIds
+InitRangeVerifier::search(SearchIterator & it, const Ranges & ranges, bool strict)
+{
+ DocIds result;
+ for (Range range: ranges) {
+ DocIds part = strict ? searchStrict(it, range) : searchRelaxed(it, range);
+ result.insert(result.end(), part.begin(), part.end());
+ }
+ std::sort(result.begin(), result.end());
+ return result;
+}
+
+InitRangeVerifier::DocIds
+InitRangeVerifier::searchRelaxed(SearchIterator & it, Range range)
+{
+ DocIds result;
+ it.initRange(range.first, range.second);
+ for (uint32_t docid = range.first; docid < range.second; ++docid) {
+ if (it.seek(docid)) {
+ result.emplace_back(docid);
+ }
+ }
+ return result;
+}
+
+InitRangeVerifier::DocIds
+InitRangeVerifier::searchStrict(SearchIterator & it, Range range)
+{
+ DocIds result;
+ it.initRange(range.first, range.second);
+ for (uint32_t docId = it.seekFirst(range.first); docId < range.second; docId = it.seekNext(docId + 1)) {
+ result.push_back(docId);
+ }
+ return result;
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/test/initrange.h b/searchlib/src/vespa/searchlib/test/initrange.h
new file mode 100644
index 00000000000..eb04977d605
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/initrange.h
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+
+namespace search {
+namespace test {
+
+class InitRangeVerifier {
+public:
+ typedef queryeval::SearchIterator SearchIterator;
+ typedef std::vector<uint32_t> DocIds;
+ typedef std::pair<uint32_t, uint32_t> Range;
+ typedef std::vector<Range> Ranges;
+
+ static DocIds invert(const DocIds & docIds, uint32_t docIdlimit);
+ SearchIterator::UP createIterator(const DocIds &docIds, bool strict) const;
+ SearchIterator::UP createEmptyIterator() const;
+ SearchIterator::UP createFullIterator() const;
+ InitRangeVerifier();
+ const DocIds & getExpectedDocIds() const { return _docIds; }
+ uint32_t getDocIdLimit() const { return 207; }
+ void verify(SearchIterator & iterator) const;
+ /// Convenience that takes ownership of the pointer.
+ void verify(SearchIterator * iterator) const;
+private:
+ void verify(SearchIterator & iterator, bool strict) const;
+ void verify(SearchIterator & iterator, const Ranges & ranges, bool strict) const;
+ static DocIds search(SearchIterator & iterator, const Ranges & ranges, bool strict);
+ static DocIds searchRelaxed(SearchIterator & search, Range range);
+ static DocIds searchStrict(SearchIterator & search, Range range);
+ mutable search::fef::TermFieldMatchData _trueTfmd;
+ DocIds _docIds;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/memoryindex/CMakeLists.txt
new file mode 100644
index 00000000000..75453abc693
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/memoryindex/CMakeLists.txt
@@ -0,0 +1,5 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_searchlib_test_memoryindex INTERFACE
+ SOURCES
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/ordereddocumentinserter.h b/searchlib/src/vespa/searchlib/test/memoryindex/ordereddocumentinserter.h
new file mode 100644
index 00000000000..5f2ec0c033a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/memoryindex/ordereddocumentinserter.h
@@ -0,0 +1,119 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/memoryindex/iordereddocumentinserter.h>
+
+namespace search
+{
+
+namespace memoryindex
+{
+
+namespace test
+{
+
+class OrderedDocumentInserter : public IOrderedDocumentInserter
+{
+ std::stringstream _ss;
+ bool _first;
+ bool _verbose;
+ uint32_t _fieldId;
+
+ void
+ addComma()
+ {
+ if (!_first) {
+ _ss << ",";
+ } else {
+ _first = false;
+ }
+ }
+public:
+ OrderedDocumentInserter()
+ : _ss(),
+ _first(true),
+ _verbose(false),
+ _fieldId(0)
+ {
+ }
+
+ virtual void
+ setNextWord(const vespalib::stringref word) override
+ {
+ addComma();
+ _ss << "w=" << word;
+ }
+
+ void
+ setFieldId(uint32_t fieldId)
+ {
+ _fieldId = fieldId;
+ }
+
+ virtual void
+ add(uint32_t docId,
+ const index::DocIdAndFeatures &features) override
+ {
+ (void) features;
+ addComma();
+ _ss << "a=" << docId;
+ if (_verbose) {
+ _ss << "(";
+ auto wpi = features._wordPositions.begin();
+ bool firstElement = true;
+ for (auto &el : features._elements) {
+ if (!firstElement) {
+ _ss << ",";
+ }
+ firstElement = false;
+ _ss << "e=" << el.getElementId() << ",w=" <<
+ el.getWeight() << ",l=" <<
+ el.getElementLen() << "[";
+ bool firstWordPos = true;
+ for (uint32_t i = 0; i < el.getNumOccs(); ++i) {
+ if (!firstWordPos) {
+ _ss << ",";
+ }
+ firstWordPos = false;
+ _ss << wpi->getWordPos();
+ }
+ _ss << "]";
+ }
+ _ss << ")";
+ }
+ }
+
+ virtual void
+ remove(uint32_t docId) override
+ {
+ addComma();
+ _ss << "r=" << docId;
+ }
+
+ virtual void flush() override { }
+ virtual void rewind() override {
+ addComma();
+ _ss << "f=" << _fieldId;
+ }
+
+ std::string
+ toStr(void) const
+ {
+ return _ss.str();
+ }
+
+ void
+ reset()
+ {
+ _ss.str("");
+ _first = true;
+ _verbose = false;
+ }
+
+ void setVerbose() { _verbose = true; }
+};
+
+} // namespace test
+} // namespace memoryindex
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/test/statefile.cpp b/searchlib/src/vespa/searchlib/test/statefile.cpp
new file mode 100644
index 00000000000..005145c5cb3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/statefile.cpp
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/util/statefile.h>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include "statefile.h"
+
+namespace search
+{
+
+namespace test
+{
+
+namespace statefile
+{
+
+vespalib::string
+readState(StateFile &sf)
+{
+ std::vector<char> buf;
+ sf.readState(buf);
+ return vespalib::string(buf.begin(), buf.end());
+}
+
+
+std::vector<vespalib::string>
+readHistory(const char *name)
+{
+ std::vector<vespalib::string> res;
+ std::ifstream is(name);
+ std::string line;
+ while (!is.eof()) {
+ std::getline(is, line);
+ if (is.eof() && line.empty()) {
+ break;
+ }
+ res.push_back(line + "\n");
+ }
+ return res;
+}
+
+
+}
+
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/statefile.h b/searchlib/src/vespa/searchlib/test/statefile.h
new file mode 100644
index 00000000000..a5aee2ff2a3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/statefile.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search
+{
+
+class StateFile;
+
+namespace test
+{
+
+namespace statefile
+{
+
+vespalib::string readState(StateFile &sf);
+std::vector<vespalib::string> readHistory(const char *name);
+
+}
+
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/statestring.cpp b/searchlib/src/vespa/searchlib/test/statestring.cpp
new file mode 100644
index 00000000000..e1c8df43c4b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/statestring.cpp
@@ -0,0 +1,98 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <sstream>
+#include "statestring.h"
+
+namespace search
+{
+
+namespace test
+{
+
+namespace statestring
+{
+
+bool
+testStartPos(vespalib::string &s, size_t pos)
+{
+ return (pos < s.size() && (pos == 0 || s[pos - 1] == ' '));
+}
+
+
+size_t
+findStartPos(vespalib::string &s, const vespalib::string &key)
+{
+ size_t pos = 0;
+ while (pos < s.size()) {
+ pos = s.find(key, pos);
+ if (testStartPos(s, pos)) {
+ break;
+ }
+ ++pos;
+ }
+ return pos;
+}
+
+
+size_t
+scanBreakPos(vespalib::string &s, size_t pos)
+{
+ while (pos < s.size() && s[pos] != ' ' && s[pos] != '\n') {
+ ++pos;
+ }
+ return pos;
+}
+
+
+void
+normalizeTimestamp(vespalib::string &s)
+{
+ size_t pos = findStartPos(s, "ts=");
+ if (pos < s.size()) {
+ size_t npos = scanBreakPos(s, pos + 3);
+ s.replace(pos, npos - pos, "ts=0.0");
+ return;
+ }
+}
+
+
+void
+normalizeAddr(vespalib::string &s, void *addr)
+{
+ size_t pos = findStartPos(s, "addr=");
+ if (pos < s.size()) {
+ size_t npos = scanBreakPos(s, pos + 5);
+ std::ostringstream os;
+ os << "addr=0x";
+ os.width(16);
+ os.fill('0');
+ os << std::hex << reinterpret_cast<unsigned long>(addr);
+ s.replace(pos, npos - pos, os.str());
+ return;
+ }
+}
+
+
+void
+normalizeTimestamps(std::vector<vespalib::string> &sv)
+{
+ for (auto &s : sv) {
+ normalizeTimestamp(s);
+ }
+}
+
+
+void
+normalizeAddrs(std::vector<vespalib::string> &sv, void *addr)
+{
+ for (auto &s : sv) {
+ normalizeAddr(s, addr);
+ }
+}
+
+
+}
+
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/statestring.h b/searchlib/src/vespa/searchlib/test/statestring.h
new file mode 100644
index 00000000000..ad5d70186dc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/statestring.h
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search
+{
+
+namespace test
+{
+
+namespace statestring
+{
+
+void normalizeTimestamp(vespalib::string &s);
+void normalizeAddr(vespalib::string &s, void *addr);
+void normalizeTimestamps(std::vector<vespalib::string> &sv);
+void normalizeAddrs(std::vector<vespalib::string> &sv, void *addr);
+
+}
+
+}
+
+}
+
+
diff --git a/searchlib/src/vespa/searchlib/transactionlog/.gitignore b/searchlib/src/vespa/searchlib/transactionlog/.gitignore
new file mode 100644
index 00000000000..ee8938b6bf4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/.gitignore
@@ -0,0 +1,6 @@
+*.So
+*.exe
+*.ilk
+*.pdb
+.depend*
+Makefile
diff --git a/searchlib/src/vespa/searchlib/transactionlog/CMakeLists.txt b/searchlib/src/vespa/searchlib/transactionlog/CMakeLists.txt
new file mode 100644
index 00000000000..6b01f773124
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/CMakeLists.txt
@@ -0,0 +1,14 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_translog OBJECT
+ SOURCES
+ common.cpp
+ domain.cpp
+ domainpart.cpp
+ nosyncproxy.cpp
+ session.cpp
+ trans_log_server_explorer.cpp
+ translogclient.cpp
+ translogserver.cpp
+ translogserverapp.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/transactionlog/OWNERS b/searchlib/src/vespa/searchlib/transactionlog/OWNERS
new file mode 100644
index 00000000000..1037590124e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/OWNERS
@@ -0,0 +1 @@
+balder
diff --git a/searchlib/src/vespa/searchlib/transactionlog/common.cpp b/searchlib/src/vespa/searchlib/transactionlog/common.cpp
new file mode 100644
index 00000000000..6ff2aee8ee7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/common.cpp
@@ -0,0 +1,107 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "common.h"
+
+namespace search
+{
+
+namespace transactionlog
+{
+
+using vespalib::nbostream;
+
+int makeDirectory(const char * dir)
+{
+ int retval(-1);
+
+ FastOS_StatInfo st;
+ if ( FastOS_File::Stat(dir, &st) ) {
+ retval = st._isDirectory ? 0 : -2;
+ } else {
+ retval = FastOS_File::MakeDirectory(dir) ? 0 : -3;
+ }
+
+ return retval;
+}
+
+int64_t SerialNumRange::cmp(const SerialNumRange & b) const
+{
+ int64_t diff(0);
+ if ( ! (contains(b) || b.contains(*this)) ) {
+ diff = _from - b._from;
+ }
+ return diff;
+}
+
+Packet::Packet(const void * buf, size_t sz) :
+ _count(0),
+ _range(),
+ _limit(sz),
+ _buf(static_cast<const char *>(buf), sz, true)
+{
+ nbostream os(_buf.c_str(), sz, true);
+ while ( os.size() > 0 ) {
+ Entry e;
+ e.deserialize(os);
+ if (_range.to() == 0) {
+ _range.from(e.serial());
+ }
+ _range.to(e.serial());
+ _count++;
+ }
+}
+
+bool Packet::merge(const Packet & packet)
+{
+ bool retval(_range.to() < packet._range.from());
+ if (retval) {
+ _count += packet._count;
+ _range.to(packet._range.to());
+ _buf.write(packet.getHandle().c_str(), packet.getHandle().size());
+ }
+ return retval;
+}
+
+nbostream & Packet::Entry::deserialize(nbostream & os)
+{
+ _valid = false;
+ int32_t len(0);
+ os >> _unique >> _type >> len;
+ _data = vespalib::ConstBufferRef(os.peek(), len);
+ os.adjustReadPos(len);
+ _valid = true;
+ return os;
+}
+
+nbostream & Packet::Entry::serialize(nbostream & os) const
+{
+ os << _unique << _type << static_cast<uint32_t>(_data.size());
+ os.write(_data.c_str(), _data.size());
+ return os;
+}
+
+Packet::Entry::Entry(SerialNum u, Type t, const vespalib::ConstBufferRef & d) :
+ _unique(u),
+ _type(t),
+ _valid(true),
+ _data(d)
+{
+}
+
+
+bool Packet::add(const Packet::Entry & e)
+{
+ bool retval((_buf.size() < _limit) && (_range.to() < e.serial()));
+ if (retval) {
+ if (_buf.empty()) {
+ _range.from(e.serial());
+ }
+ e.serialize(_buf);
+ _count++;
+ _range.to(e.serial());
+ }
+ return retval;
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/common.h b/searchlib/src/vespa/searchlib/transactionlog/common.h
new file mode 100644
index 00000000000..ae6f27f39a1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/common.h
@@ -0,0 +1,100 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/common/serialnum.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/util/buffer.h>
+
+namespace search {
+namespace transactionlog {
+
+/// This represents a type of the entry. Fx update,remove
+typedef uint32_t Type;
+/// A channel represents one data stream.
+
+class RPC
+{
+public:
+enum Result { OK, FULL, ERROR };
+};
+
+class SerialNumRange
+{
+public:
+ SerialNumRange() : _from(0), _to(0) { }
+ SerialNumRange(SerialNum f) : _from(f), _to(f ? f-1 : f) { }
+ SerialNumRange(SerialNum f, SerialNum t) : _from(f), _to(t) { }
+ bool operator == (const SerialNumRange & b) const { return cmp(b) == 0; }
+ bool operator < (const SerialNumRange & b) const { return cmp(b) < 0; }
+ bool operator > (const SerialNumRange & b) const { return cmp(b) > 0; }
+ bool operator <= (const SerialNumRange & b) const { return cmp(b) <= 0; }
+ bool operator >= (const SerialNumRange & b) const { return cmp(b) >= 0; }
+ SerialNum from() const { return _from; }
+ SerialNum to() const { return _to; }
+ void from(SerialNum v) { _from = v; }
+ void to(SerialNum v) { _to = v; }
+
+ bool contains(SerialNum s) const {
+ return (_from <= s) && (s <= _to);
+ }
+
+ bool contains(const SerialNumRange & b) const {
+ return (_from <= b._from) && (b._to <= _to);
+ }
+private:
+ int64_t cmp(const SerialNumRange & b) const;
+ SerialNum _from;
+ SerialNum _to;
+};
+
+class Packet
+{
+public:
+ class Entry
+ {
+ public:
+ Entry() : _unique(0), _type(0), _valid(false), _data() { }
+ Entry(SerialNum u, Type t, const vespalib::ConstBufferRef & d);
+ SerialNum serial() const { return _unique; }
+ Type type() const { return _type; }
+ bool valid() const { return _valid; }
+ size_t serializedSize() const { return sizeof(SerialNum) + sizeof(Type) + sizeof(uint32_t) + _data.size(); }
+ const vespalib::ConstBufferRef & data() const { return _data; }
+ vespalib::nbostream & deserialize(vespalib::nbostream & is);
+ vespalib::nbostream & serialize(vespalib::nbostream & os) const;
+ private:
+ SerialNum _unique;
+ Type _type;
+ bool _valid;
+ vespalib::ConstBufferRef _data;
+ };
+public:
+ Packet(size_t m=0xf000) : _count(0), _range(), _limit(m), _buf(m) { }
+ Packet(const void * buf, size_t sz);
+ bool add(const Entry & data);
+ void close() { }
+ void clear() { _buf.clear(); _count = 0; _range.from(0); _range.to(0); }
+ const SerialNumRange & range() const { return _range; }
+ const vespalib::nbostream & getHandle() const { return _buf; }
+ size_t size() const { return _count; }
+ bool empty() const { return _count == 0; }
+ size_t sizeBytes() const { return _buf.size(); }
+ bool merge(const Packet & packet);
+private:
+ size_t _count;
+ SerialNumRange _range;
+ size_t _limit;
+ vespalib::nbostream _buf;
+};
+
+int makeDirectory(const char * dir);
+
+class Writer {
+public:
+ virtual ~Writer() { }
+ virtual void commit(const vespalib::string & domainName, const Packet & packet) = 0;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/transactionlog/domain.cpp b/searchlib/src/vespa/searchlib/transactionlog/domain.cpp
new file mode 100644
index 00000000000..be1de99efef
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/domain.cpp
@@ -0,0 +1,405 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/transactionlog/domain.h>
+#include <limits>
+#include <vespa/vespalib/util/vstringfmt.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <stdexcept>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/util/closuretask.h>
+
+LOG_SETUP(".transactionlog.domain");
+
+using vespalib::string;
+using vespalib::make_string;
+using vespalib::make_vespa_string;
+using vespalib::LockGuard;
+using vespalib::makeTask;
+using vespalib::makeClosure;
+using vespalib::Monitor;
+using vespalib::MonitorGuard;
+using search::common::FileHeaderContext;
+using std::runtime_error;
+
+namespace search
+{
+
+namespace transactionlog
+{
+
+Domain::Domain(const string &domainName,
+ const string & baseDir,
+ vespalib::ThreadStackExecutor & executor,
+ uint64_t domainPartSize,
+ bool useFsync,
+ DomainPart::Crc defaultCrcType,
+ const FileHeaderContext &fileHeaderContext) :
+ _defaultCrcType(defaultCrcType),
+ _executor(executor),
+ _count(0),
+ _sessionId(1),
+ _useFsync(useFsync),
+ _syncMonitor(),
+ _pendingSync(false),
+ _name(domainName),
+ _domainPartSize(domainPartSize),
+ _parts(),
+ _lock(),
+ _sessionLock(),
+ _sessions(),
+ _baseDir(baseDir),
+ _fileHeaderContext(fileHeaderContext),
+ _markedDeleted(false)
+{
+ int retval(0);
+ if ((retval = makeDirectory(_baseDir.c_str())) != 0) {
+ throw runtime_error(make_string("Failed creating basedirectory %s r(%d), e(%d)", _baseDir.c_str(), retval, errno));
+ }
+ if ((retval = makeDirectory(dir().c_str())) != 0) {
+ throw runtime_error(make_string("Failed creating domaindir %s r(%d), e(%d)", dir().c_str(), retval, errno));
+ }
+ SerialNumList partIdVector = scanDir();
+ const int64_t lastPart = partIdVector.empty() ? 0 : partIdVector.back();
+ for (const int64_t partId : partIdVector) {
+ if ( partId != -1) {
+ _executor.execute(makeTask(makeClosure(this, &Domain::addPart, partId, partId == lastPart)));
+ }
+ }
+ _executor.sync();
+ if (_parts.empty() || _parts.crbegin()->second->isClosed()) {
+ _parts[lastPart].reset(new DomainPart(_name, dir(), lastPart, _useFsync, _defaultCrcType, _fileHeaderContext, false));
+ }
+}
+
+void Domain::addPart(int64_t partId, bool isLastPart) {
+ DomainPart::SP dp(new DomainPart(_name, dir(), partId, _useFsync, _defaultCrcType, _fileHeaderContext, isLastPart));
+ if (dp->size() == 0) {
+ // Only last domain part is allowed to be truncated down to
+ // empty size.
+ assert(isLastPart);
+ dp->erase(dp->range().to() + 1);
+ } else {
+ {
+ LockGuard guard(_lock);
+ _count += dp->size();
+ _parts[partId] = dp;
+ }
+ if (! isLastPart) {
+ dp->close();
+ }
+ }
+}
+
+class Sync : public vespalib::Executor::Task
+{
+public:
+ Sync(Monitor &syncMonitor, const DomainPart::SP &dp, bool &pendingSync) :
+ _syncMonitor(syncMonitor),
+ _dp(dp),
+ _pendingSync(pendingSync)
+ { }
+private:
+ void run() override {
+ _dp->sync();
+ MonitorGuard guard(_syncMonitor);
+ _pendingSync = false;
+ guard.broadcast();
+ }
+
+ Monitor & _syncMonitor;
+ DomainPart::SP _dp;
+ bool & _pendingSync;
+};
+
+Domain::~Domain() { }
+
+DomainInfo
+Domain::getDomainInfo() const
+{
+ LockGuard guard(_lock);
+ DomainInfo info(SerialNumRange(begin(), end()), count(), byteSize());
+ for (const auto &entry: _parts) {
+ const DomainPart &part = *entry.second;
+ info.parts.emplace_back(PartInfo(part.range(), part.size(),
+ part.byteSize(), part.fileName()));
+ }
+ return info;
+}
+
+SerialNum Domain::begin() const
+{
+ SerialNum s(0);
+ if ( ! _parts.empty() ) {
+ s = _parts.begin()->second->range().from();
+ }
+ return s;
+}
+
+SerialNum Domain::end() const
+{
+ SerialNum s(0);
+ if ( ! _parts.empty() ) {
+ s = _parts.rbegin()->second->range().to();
+ }
+ return s;
+}
+
+size_t Domain::byteSize() const
+{
+ size_t size = 0;
+ for (const auto &entry : _parts) {
+ const DomainPart &part = *entry.second;
+ size += part.byteSize();
+ }
+ return size;
+}
+
+SerialNum
+Domain::getSynced(void) const
+{
+ SerialNum s(0);
+ LockGuard guard(_lock);
+ if (_parts.empty()) {
+ return s;
+ }
+ DomainPartList::const_iterator it(_parts.end());
+ --it;
+ s = it->second->getSynced();
+ if (s == 0 && it != _parts.begin()) {
+ --it;
+ s = it->second->getSynced();
+ }
+ return s;
+}
+
+
+void
+Domain::triggerSyncNow(void)
+{
+ MonitorGuard guard(_syncMonitor);
+ if (!_pendingSync) {
+ _pendingSync = true;
+ DomainPart::SP dp(_parts.rbegin()->second);
+ _executor.execute(Sync::UP(new Sync(_syncMonitor, dp, _pendingSync)));
+ }
+}
+
+DomainPart::SP Domain::findPart(SerialNum s)
+{
+ LockGuard guard(_lock);
+ DomainPartList::iterator it(_parts.upper_bound(s));
+ if (!_parts.empty() && it != _parts.begin()) {
+ DomainPartList::iterator prev(it);
+ --prev;
+ if (prev->second->range().to() > s) {
+ return prev->second;
+ }
+ }
+ if (it != _parts.end()) {
+ return it->second;
+ }
+ return DomainPart::SP();
+}
+
+uint64_t Domain::size() const
+{
+ LockGuard guard(_lock);
+ return size(guard);
+}
+
+uint64_t Domain::size(const LockGuard & guard) const
+{
+ (void) guard;
+ uint64_t sz(0);
+ for (const auto & part : _parts) {
+ sz += part.second->size();
+ }
+ return sz;
+}
+
+SerialNum Domain::findOldestActiveVisit() const
+{
+ SerialNum oldestActive(std::numeric_limits<SerialNum>::max());
+ LockGuard guard(_sessionLock);
+ for (const auto & pair : _sessions) {
+ Session * session(pair.second.get());
+ if (!session->inSync()) {
+ oldestActive = std::min(oldestActive, session->range().from());
+ }
+ }
+ return oldestActive;
+}
+
+void Domain::cleanSessions()
+{
+ if ( _sessions.empty()) {
+ return;
+ }
+ LockGuard guard(_sessionLock);
+ for (SessionList::iterator it(_sessions.begin()), mt(_sessions.end()); it != mt; ) {
+ Session * session(it->second.get());
+ if ((!session->continous() && session->inSync())) {
+ _sessions.erase(it++);
+ } else if (session->finished()) {
+ _sessions.erase(it++);
+ } else {
+ it++;
+ }
+ }
+}
+
+void Domain::commit(const Packet & packet)
+{
+ DomainPart::SP dp(_parts.rbegin()->second);
+ vespalib::nbostream is(packet.getHandle().c_str(), packet.getHandle().size(), true);
+ Packet::Entry entry;
+ entry.deserialize(is);
+ if (dp->byteSize() > _domainPartSize) {
+ triggerSyncNow();
+ {
+ MonitorGuard guard(_syncMonitor);
+ while (_pendingSync) {
+ guard.wait();
+ }
+ }
+ dp->close();
+ dp.reset(new DomainPart(_name, dir(), entry.serial(), _useFsync, _defaultCrcType, _fileHeaderContext, false));
+ {
+ LockGuard guard(_lock);
+ _parts[entry.serial()] = dp;
+ }
+ dp = _parts.rbegin()->second;
+ }
+ size_t oldSz(dp->size());
+ dp->commit(entry.serial(), packet);
+ cleanSessions();
+ // If commit fails no updates should be sent to subscribers either.
+ // Is is better to keep a consistent behaviour.
+ _count += dp->size() - oldSz;
+
+ LockGuard guard(_sessionLock);
+ for (auto & it : _sessions) {
+ const Session::SP & session(it.second);
+ if (session->continous()) {
+ if (session->ok()) {
+ Session::enQ(session, entry.serial(), packet);
+ }
+ }
+ }
+}
+
+bool Domain::erase(const SerialNum & to)
+{
+ bool retval(true);
+ /// Do not erase the last element
+ for (DomainPartList::iterator it(_parts.begin()); (_parts.size() > 1) && (it->second.get()->range().to() < to); it = _parts.begin()) {
+ DomainPart::SP dp(it->second);
+ {
+ LockGuard guard(_lock);
+ _parts.erase(it);
+ }
+ retval = retval && dp->erase(to);
+ }
+ if (_parts.begin()->second->range().to() >= to) {
+ _parts.begin()->second->erase(to);
+ }
+ return retval;
+}
+
+int Domain::visit(const Domain::SP & domain, const SerialNum & from, const SerialNum & to, FRT_Supervisor & supervisor, FNET_Connection *conn)
+{
+ assert(this == domain.get());
+ cleanSessions();
+ SerialNumRange range(from, to);
+ Session * session = new Session(_sessionId++, range, domain, supervisor, conn);
+ LockGuard guard(_sessionLock);
+ _sessions[session->id()] = Session::SP(session);
+ return session->id();
+}
+
+int Domain::startSession(int sessionId)
+{
+ int retval(-1);
+ LockGuard guard(_sessionLock);
+ SessionList::iterator found = _sessions.find(sessionId);
+ if (found != _sessions.end()) {
+ if ( execute(Session::createTask(found->second)).get() == nullptr ) {
+ retval = 0;
+ } else {
+ _sessions.erase(sessionId);
+ }
+ }
+ return retval;
+}
+
+int Domain::closeSession(int sessionId)
+{
+ int retval(-1);
+ {
+ LockGuard guard(_sessionLock);
+ SessionList::iterator found = _sessions.find(sessionId);
+ if (found != _sessions.end()) {
+ retval = 1;
+ _executor.sync();
+ }
+ }
+ if (retval == 1) {
+ FastOS_Thread::Sleep(10);
+ LockGuard guard(_sessionLock);
+ SessionList::iterator found = _sessions.find(sessionId);
+ if (found != _sessions.end()) {
+ _sessions.erase(sessionId);
+ retval = 0;
+ } else {
+ retval = 0;
+ }
+ }
+ return retval;
+}
+
+int Domain::subscribe(const Domain::SP & domain, const SerialNum & from, FRT_Supervisor & supervisor, FNET_Connection *conn)
+{
+ assert(this == domain.get());
+ cleanSessions();
+ SerialNumRange range(from, end());
+ Session * session = new Session(_sessionId++, range, domain, supervisor, conn, true);
+ LockGuard guard(_sessionLock);
+ _sessions[session->id()] = Session::SP(session);
+ return session->id();
+}
+
+
+Domain::SerialNumList
+Domain::scanDir(void)
+{
+ SerialNumList res;
+
+ FastOS_DirectoryScan dirScan(dir().c_str());
+
+ const char *wantPrefix = _name.c_str();
+ size_t wantPrefixLen = strlen(wantPrefix);
+
+ while (dirScan.ReadNext()) {
+ const char *ename = dirScan.GetName();
+ if (strcmp(ename, ".") == 0 ||
+ strcmp(ename, "..") == 0)
+ continue;
+ if (strncmp(ename, wantPrefix, wantPrefixLen) != 0)
+ continue;
+ if (ename[wantPrefixLen] != '-')
+ continue;
+ const char *p = ename + wantPrefixLen + 1;
+ uint64_t num = strtoull(p, NULL, 10);
+ string checkName = make_string("%s-%016" PRIu64, _name.c_str(), num);
+ if (strcmp(checkName.c_str(), ename) != 0)
+ continue;
+ res.push_back(static_cast<SerialNum>(num));
+ }
+ std::sort(res.begin(), res.end());
+ return res;
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/domain.h b/searchlib/src/vespa/searchlib/transactionlog/domain.h
new file mode 100644
index 00000000000..6309d7113f1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/domain.h
@@ -0,0 +1,125 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/transactionlog/domainpart.h>
+#include <vespa/searchlib/transactionlog/session.h>
+#include <vespa/vespalib/util/threadstackexecutor.h>
+
+namespace search {
+namespace transactionlog {
+
+struct PartInfo {
+ SerialNumRange range;
+ size_t count;
+ size_t byteSize;
+ vespalib::string file;
+ PartInfo(SerialNumRange range_in, size_t count_in,
+ size_t byteSize_in,
+ vespalib::stringref file_in)
+ : range(range_in), count(count_in), byteSize(byteSize_in),
+ file(file_in) {}
+};
+
+struct DomainInfo {
+ SerialNumRange range;
+ size_t count;
+ size_t byteSize;
+ std::vector<PartInfo> parts;
+ DomainInfo(SerialNumRange range_in, size_t count_in, size_t byteSize_in)
+ : range(range_in), count(count_in), byteSize(byteSize_in), parts() {}
+ DomainInfo()
+ : range(), count(0), byteSize(0), parts() {}
+};
+
+typedef std::map<vespalib::string, DomainInfo> DomainStats;
+
+class Domain
+{
+public:
+ typedef std::shared_ptr<Domain> SP;
+ Domain(const vespalib::string &name,
+ const vespalib::string &baseDir,
+ vespalib::ThreadStackExecutor & executor,
+ uint64_t domainPartSize,
+ bool useFsync,
+ DomainPart::Crc defaultCrcType,
+ const common::FileHeaderContext &fileHeaderContext);
+
+ virtual ~Domain();
+
+ DomainInfo getDomainInfo() const;
+
+ const vespalib::string & name() const { return _name; }
+ bool erase(const SerialNum & to);
+
+ void commit(const Packet & packet);
+ int
+ visit(const Domain::SP & self,
+ const SerialNum & from,
+ const SerialNum & to,
+ FRT_Supervisor & supervisor,
+ FNET_Connection *conn);
+
+ int subscribe(const Domain::SP & self, const SerialNum & from, FRT_Supervisor & supervisor, FNET_Connection *conn);
+
+ SerialNum begin() const;
+ SerialNum end() const;
+ SerialNum getSynced(void) const;
+ void triggerSyncNow(void);
+ bool getMarkedDeleted(void) const { return _markedDeleted; }
+ void markDeleted(void) { _markedDeleted = true; }
+
+ uint64_t count() const { return _count; }
+ size_t byteSize() const;
+ size_t getNumSessions() const { return _sessions.size(); }
+
+ int startSession(int sessionId);
+ int closeSession(int sessionId);
+
+ SerialNum findOldestActiveVisit() const;
+ DomainPart::SP findPart(SerialNum s);
+
+ static vespalib::string
+ getDir(const vespalib::string & base, const vespalib::string & domain) {
+ return base + "/" + domain;
+ }
+ vespalib::Executor::Task::UP execute(vespalib::Executor::Task::UP task) {
+ return _executor.execute(std::move(task));
+ }
+ uint64_t size() const;
+private:
+ uint64_t size(const vespalib::LockGuard & guard) const;
+ void cleanSessions();
+ vespalib::string dir() const { return getDir(_baseDir, _name); }
+ void addPart(int64_t partId, bool isLastPart);
+
+ typedef std::vector<SerialNum> SerialNumList;
+
+ SerialNumList scanDir(void);
+
+ typedef std::map<int, Session::SP > SessionList;
+ typedef std::map<int64_t, DomainPart::SP > DomainPartList;
+ typedef vespalib::ThreadStackExecutor Executor;
+
+ DomainPart::Crc _defaultCrcType;
+ Executor & _executor;
+ uint64_t _count;
+ int _sessionId;
+ const bool _useFsync;
+ vespalib::Monitor _syncMonitor;
+ bool _pendingSync;
+ vespalib::string _name;
+ uint64_t _domainPartSize;
+ DomainPartList _parts;
+ vespalib::Lock _lock;
+ vespalib::Lock _sessionLock;
+ SessionList _sessions;
+ vespalib::string _baseDir;
+ const common::FileHeaderContext &_fileHeaderContext;
+ bool _markedDeleted;
+ bool _urgentSync;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp
new file mode 100644
index 00000000000..274a3495e73
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp
@@ -0,0 +1,681 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/transactionlog/domainpart.h>
+#include <vespa/vespalib/util/crc.h>
+#include <vespa/vespalib/xxhash/xxhash.h>
+#include <vespa/vespalib/util/vstringfmt.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/searchlib/common/fileheadercontext.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <stdexcept>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".transactionlog.domainpart");
+
+using vespalib::make_string;
+using vespalib::FileHeader;
+using vespalib::string;
+using vespalib::getLastErrorString;
+using vespalib::IllegalHeaderException;
+using vespalib::LockGuard;
+using vespalib::nbostream;
+using search::common::FileHeaderContext;
+using std::runtime_error;
+
+namespace search
+{
+
+namespace transactionlog
+{
+
+
+namespace
+{
+
+void
+handleSync(FastOS_FileInterface &file) __attribute__ ((noinline));
+
+string
+handleWriteError(const char *text,
+ FastOS_FileInterface &file,
+ int64_t lastKnownGoodPos,
+ const Packet::Entry &entry,
+ int bufLen) __attribute__ ((noinline));
+
+bool
+handleReadError(const char *text,
+ FastOS_FileInterface &file,
+ ssize_t len,
+ ssize_t rlen,
+ int64_t lastKnownGoodPos,
+ bool allowTruncate) __attribute__ ((noinline));
+
+bool
+addPacket(Packet &packet,
+ const Packet::Entry &e) __attribute__ ((noinline));
+
+bool
+tailOfFileIsZero(FastOS_FileInterface &file, int64_t lastKnownGoodPos) __attribute__ ((noinline));
+
+bool
+addPacket(Packet &packet, const Packet::Entry &e)
+{
+ LOG(spam, "Adding serial #%" PRIu64 ", of type %d and size %zd into packet of size %zu and %zu bytes",
+ e.serial(), e.type(), e.data().size(), packet.size(), packet.sizeBytes());
+ return ! packet.add(e);
+}
+
+void
+handleSync(FastOS_FileInterface &file)
+{
+ if ( file.IsOpened() && ! file.Sync() ) {
+ int osError = errno;
+ throw runtime_error(make_string("Failed to synchronize file '%s' of size %" PRId64 " due to '%s'. "
+ "Does not know how to handle this so throwing an exception.",
+ file.GetFileName(), file.GetSize(), FastOS_File::getErrorString(osError).c_str()));
+ }
+}
+
+string
+handleWriteError(const char *text,
+ FastOS_FileInterface &file,
+ int64_t lastKnownGoodPos,
+ const Packet::Entry &entry,
+ int bufLen)
+{
+ string last(FastOS_File::getLastErrorString());
+ string e(make_string("%s. File '%s' at position %" PRId64 " for entry %" PRIu64 " of length %u. "
+ "OS says '%s'. Rewind to last known good position %" PRId64 ".",
+ text, file.GetFileName(), file.GetPosition(), entry.serial(), bufLen,
+ last.c_str(), lastKnownGoodPos));
+ LOG(error, "%s", e.c_str());
+ if ( ! file.SetPosition(lastKnownGoodPos) ) {
+ last = FastOS_File::getLastErrorString();
+ throw runtime_error(make_string("Failed setting position %" PRId64 " of file '%s' of size %" PRId64 ": OS says '%s'",
+ lastKnownGoodPos, file.GetFileName(), file.GetSize(), last.c_str()));
+ }
+ handleSync(file);
+ return e;
+}
+
+string
+getError(FastOS_FileInterface & f)
+{
+ return make_string("File '%s' of size %ld has last error of '%s'.",
+ f.GetFileName(), f.GetSize(), FastOS_File::getLastErrorString().c_str());
+}
+
+bool
+tailOfFileIsZero(FastOS_FileInterface &file, int64_t lastKnownGoodPos)
+{
+ ssize_t rest(file.GetSize() - lastKnownGoodPos);
+ if (rest < 0 || rest > 0x100000) {
+ return false;
+ }
+ std::vector<char> buf(rest, 0);
+ file.ReadBuf(&buf[0], buf.size(), lastKnownGoodPos);
+ for (char c : buf) {
+ if (c != 0) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool
+handleReadError(const char *text,
+ FastOS_FileInterface &file,
+ ssize_t len,
+ ssize_t rlen,
+ int64_t lastKnownGoodPos,
+ bool allowTruncate)
+{
+ bool retval(true);
+ if (rlen != -1) {
+ string e;
+ if (len == rlen) {
+ e = make_string("Error in data read of size %zd bytes at pos %" PRId64 " trying to read %s. ",
+ len, file.GetPosition() - rlen, text);
+ } else {
+ e = make_string("Short Read. Got only %zd of %zd bytes at pos %" PRId64 " trying to read %s. ",
+ rlen, len, file.GetPosition() - rlen, text);
+ }
+ e += getError(file);
+ if (!allowTruncate) {
+ LOG(error, "%s", e.c_str());
+ throw runtime_error(e);
+ }
+ // Short read. Log error, Truncate, continue.
+ e += make_string(" Truncate to %" PRId64 " and continue", lastKnownGoodPos);
+ LOG(error, "%s", e.c_str());
+ FastOS_File truncateFile(file.GetFileName());
+ file.Close();
+ if ( truncateFile.OpenWriteOnlyExisting()) {
+ if (truncateFile.SetSize(lastKnownGoodPos)) {
+ if (truncateFile.Close()) {
+ if (file.OpenReadOnly()) {
+ if (file.SetPosition(lastKnownGoodPos)) {
+ retval = false;
+ } else {
+ throw runtime_error(make_string("Failed setting position %" PRId64 ". %s", lastKnownGoodPos, getError(file).c_str()));
+ }
+ } else {
+ throw runtime_error(make_string("Failed reopening file after truncate: %s", getError(file).c_str()));
+ }
+ } else {
+ throw runtime_error(make_string("Failed closing truncated file: %s", getError(truncateFile).c_str()));
+ }
+ } else {
+ throw runtime_error(make_string("Failed truncating to %" PRId64 ": %s", lastKnownGoodPos, getError(truncateFile).c_str()));
+ }
+ } else {
+ throw runtime_error(make_string("Failed opening for truncating: %s", getError(file).c_str()));
+ }
+ } else {
+ // Some kind of IO error throw exception.
+ string errString = FastOS_File::getLastErrorString();
+ throw runtime_error(make_string("IO error when reading %zd bytes at pos %" PRId64 "trying to read %s."
+ " Last known good position is %" PRId64 ": %s",
+ len, file.GetPosition(), text, lastKnownGoodPos, getError(file).c_str()));
+ }
+ return retval;
+}
+
+}
+
+int64_t
+DomainPart::buildPacketMapping(bool allowTruncate)
+{
+ Fast_BufferedFile transLog;
+ transLog.EnableDirectIO();
+ if ( ! transLog.OpenReadOnly(_transLog.GetFileName())) {
+ throw runtime_error(make_string("Failed opening '%s' for buffered readinf with direct io.", transLog.GetFileName()));
+ }
+ int64_t fSize(transLog.GetSize());
+ int64_t currPos(0);
+ try {
+ FileHeader header;
+ _headerLen = header.readFile(transLog);
+ transLog.SetPosition(_headerLen);
+ currPos = _headerLen;
+ } catch (const IllegalHeaderException &e) {
+ transLog.SetPosition(0);
+ try {
+ FileHeader::FileReader fr(transLog);
+ uint32_t header2Len = FileHeader::readSize(fr);
+ if (header2Len <= fSize)
+ e.throwSelf(); // header not truncated
+ } catch (const IllegalHeaderException &e2) {
+ }
+ if (fSize > 0) {
+ // Truncate file (dropping header) if cannot even read
+ // header length, or if header has been truncated.
+ handleReadError("file header", transLog, 0, FileHeader::getMinSize(), 0, allowTruncate);
+ }
+ }
+ while ((currPos < fSize)) {
+ Packet packet;
+ SerialNum firstSerial(0);
+ SerialNum lastSerial(0);
+ int64_t firstPos(currPos);
+ bool full(false);
+ vespalib::DefaultAlloc buf;
+ for(size_t i(0); !full && (currPos < fSize); i++) {
+ Packet::Entry e;
+ if (read(transLog, e, buf, allowTruncate)) {
+ if (e.valid()) {
+ if (i == 0) {
+ firstSerial = e.serial();
+ if (currPos == _headerLen) {
+ _range.from(firstSerial);
+ }
+ }
+ try {
+ full = addPacket(packet, e);
+ if ( ! full ) {
+ lastSerial = e.serial();
+ currPos = transLog.GetPosition();
+ _sz++;
+ } else {
+ transLog.SetPosition(currPos);
+ }
+ } catch (const std::exception & ex) {
+ throw runtime_error(make_string("%s : Failed creating packet for list %s(%" PRIu64 ") at pos(%" PRIu64 ", %" PRIu64 ")",
+ ex.what(), transLog.GetFileName(), fSize, currPos, transLog.GetPosition()));
+ }
+ } else {
+ throw runtime_error(make_string("Invalid entry reading file %s(%" PRIu64 ") at pos(%" PRIu64 ", %" PRIu64 ")",
+ transLog.GetFileName(), fSize, currPos, transLog.GetPosition()));
+ }
+ } else {
+ if (transLog.GetSize() != fSize) {
+ fSize = transLog.GetSize();
+ } else {
+ throw runtime_error(make_string("Failed reading file %s(%" PRIu64 ") at pos(%" PRIu64 ", %" PRIu64 ")",
+ transLog.GetFileName(), fSize, currPos, transLog.GetPosition()));
+ }
+ }
+ }
+ packet.close();
+ if (!packet.empty()) {
+ _packets[firstSerial] = packet;
+ _range.to(lastSerial);
+ {
+ LockGuard guard(_lock);
+ _skipList.push_back(SkipInfo(firstSerial, firstPos));
+ }
+ }
+ }
+ transLog.Close();
+ return currPos;
+}
+
+DomainPart::DomainPart(const string & name,
+ const string & baseDir,
+ SerialNum s,
+ bool useFsync,
+ Crc defaultCrc,
+ const FileHeaderContext &fileHeaderContext,
+ bool allowTruncate) :
+ _defaultCrc(defaultCrc),
+ _useFsync(useFsync),
+ _lock(),
+ _fileLock(),
+ _range(s),
+ _sz(0),
+ _byteSize(0),
+ _packets(),
+ _fileName(make_string("%s/%s-%016" PRIu64, baseDir.c_str(), name.c_str(), s)),
+ _transLog(_fileName.c_str()),
+ _skipList(),
+ _headerLen(0),
+ _writeLock(),
+ _writtenSerial(0),
+ _syncedSerial(0)
+{
+ if (_transLog.OpenReadOnly()) {
+ int64_t currPos = buildPacketMapping(allowTruncate);
+ if ( ! _transLog.Close() ) {
+ throw runtime_error(make_string("Failed closing file '%s' after reading.", _transLog.GetFileName()));
+ }
+ if ( ! _transLog.OpenWriteOnlyExisting() ) {
+ string e(make_string("Failed opening existing file '%s' for writing: %s", _transLog.GetFileName(), getLastErrorString().c_str()));
+ LOG(error, "%s", e.c_str());
+ throw runtime_error(e);
+ }
+ if (currPos == 0) {
+ // Previous header was truncated. Write new one.
+ writeHeader(fileHeaderContext);
+ currPos = _headerLen;
+ }
+ _byteSize = currPos;
+ } else {
+ if ( ! _transLog.OpenWriteOnly()) {
+ string e(make_string("Failed opening new file '%s' for writing: '%s'", _transLog.GetFileName(), getLastErrorString().c_str()));
+
+ LOG(error, "%s", e.c_str());
+ throw runtime_error(e);
+ }
+ writeHeader(fileHeaderContext);
+ _byteSize = _headerLen;
+ }
+ if ( ! _transLog.SetPosition(_transLog.GetSize()) ) {
+ throw runtime_error(make_string("Failed moving write pointer to the end of the file %s(%" PRIu64 ").",
+ _transLog.GetFileName(), _transLog.GetSize()));
+ }
+ handleSync(_transLog);
+ _writtenSerial = _range.to();
+ _syncedSerial = _writtenSerial;
+}
+
+DomainPart::~DomainPart()
+{
+ close();
+}
+
+void
+DomainPart::writeHeader(const FileHeaderContext &fileHeaderContext)
+{
+ typedef vespalib::GenericHeader::Tag Tag;
+ FileHeader header;
+ assert(_transLog.IsOpened());
+ assert(_transLog.IsWriteMode());
+ assert(_transLog.GetPosition() == 0);
+ fileHeaderContext.addTags(header, _transLog.GetFileName());
+ header.putTag(Tag("desc", "Transaction log domain part file"));
+ _headerLen = header.writeFile(_transLog);
+}
+
+bool
+DomainPart::close()
+{
+ bool retval(false);
+ {
+ LockGuard guard(_fileLock);
+ /*
+ * Sync old domainpart before starting writing new, to avoid
+ * hole. XXX: Feed latency spike due to lack of delayed open
+ * for new domainpart.
+ */
+ handleSync(_transLog);
+ _transLog.dropFromCache();
+ retval = _transLog.Close();
+ LockGuard wguard(_writeLock);
+ _syncedSerial = _writtenSerial;
+ }
+ if ( ! retval ) {
+ throw runtime_error(make_string("Failed closing file '%s' of size %" PRId64 ".",
+ _transLog.GetFileName(), _transLog.GetSize()));
+ }
+ {
+ LockGuard guard(_lock);
+ _packets.clear();
+ }
+ return retval;
+}
+
+bool
+DomainPart::openAndFind(FastOS_FileInterface &file, const SerialNum &from)
+{
+ bool retval(file.OpenReadOnly(_transLog.GetFileName()));
+ if (retval) {
+ int64_t pos(_headerLen);
+ LockGuard guard(_lock);
+ for(SkipList::const_iterator it(_skipList.begin()), mt(_skipList.end());
+ (it < mt) && (it->id() <= from);
+ it++)
+ {
+ pos = it->filePos();
+ }
+ retval = file.SetPosition(pos);
+ }
+ return retval;
+}
+
+bool
+DomainPart::erase(SerialNum to)
+{
+ bool retval(true);
+ if (to > _range.to()) {
+ close();
+ _transLog.Delete();
+ } else {
+ _range.from(std::max(to, _range.from()));
+ }
+ return retval;
+}
+
+void
+DomainPart::commit(SerialNum firstSerial, const Packet &packet)
+{
+ int64_t firstPos(_transLog.GetPosition());
+ nbostream h(packet.getHandle().c_str(), packet.getHandle().size(), true);
+ if (_range.from() == 0) {
+ _range.from(firstSerial);
+ }
+ for (size_t i(0); h.size() > 0; i++) {
+ //LOG(spam,
+ //"Pos(%d) Len(%d), Lim(%d), Remaining(%d)",
+ //h.getPos(), h.getLength(), h.getLimit(), h.getRemaining());
+ Packet::Entry entry;
+ entry.deserialize(h);
+ if (_range.to() < entry.serial()) {
+ write(_transLog, entry);
+ _sz++;
+ _range.to(entry.serial());
+ } else {
+ throw runtime_error(make_string("Incomming serial number(%ld) must be bigger than the last one (%ld).",
+ entry.serial(), _range.to()));
+ }
+ }
+ if (_useFsync) {
+ sync();
+ }
+
+ bool merged(false);
+ LockGuard guard(_lock);
+ if ( ! _packets.empty() ) {
+ Packet & lastPacket = _packets.rbegin()->second;
+ if (lastPacket.sizeBytes() < 0xf000) {
+ if ( ! (merged = lastPacket.merge(packet)) ) {
+ LOG(error, "Failed merging packet [%" PRIu64 ", %" PRIu64 "] with [%" PRIu64 ", %" PRIu64 "]",
+ lastPacket.range().from(), lastPacket.range().to(),
+ packet.range().from(), packet.range().to());
+ }
+ }
+ }
+ if (! merged ) {
+ _packets[firstSerial] = packet;
+ _skipList.push_back(SkipInfo(firstSerial, firstPos));
+ }
+}
+
+void DomainPart::sync()
+{
+ SerialNum syncSerial(0);
+ {
+ LockGuard guard(_writeLock);
+ syncSerial = _writtenSerial;
+ }
+ LockGuard guard(_fileLock);
+ handleSync(_transLog);
+ LockGuard wguard(_writeLock);
+ if (_syncedSerial < syncSerial) {
+ _syncedSerial = syncSerial;
+ }
+}
+
+bool
+DomainPart::visit(SerialNumRange &r, Packet &packet)
+{
+ bool retval(false);
+ LockGuard guard(_lock);
+ LOG(debug, "Visit r(%" PRIu64 ", %" PRIu64 "] Checking %" PRIu64 " packets",
+ r.from(), r.to(), uint64_t(_packets.size()));
+ if ( ! isClosed() ) {
+ PacketList::const_iterator start(_packets.lower_bound(r.from() + 1));
+ PacketList::const_iterator end(_packets.upper_bound(r.to()));
+ if (start != _packets.end()) {
+ if ( ! start->second.range().contains(r.from() + 1) &&
+ (start != _packets.begin())) {
+ PacketList::const_iterator prev(start);
+ prev--;
+ if (prev->second.range().contains(r.from() + 1)) {
+ start--;
+ }
+ }
+ } else {
+ if (!_packets.empty())
+ start--;
+ }
+ if ( start != _packets.end() && start->first <= r.to()) {
+ PacketList::const_iterator next(start);
+ next++;
+ if ((r.from() < start->first) &&
+ ((next != end) || ((next != _packets.end()) && ((r.to() + 1) == next->first))))
+ {
+ packet = start->second;
+ LOG(debug, "Visit whole packet[%" PRIu64 ", %" PRIu64 "]", packet.range().from(), packet.range().to());
+ if (next != _packets.end()) {
+ r.from(next->first - 1);
+ retval = true;
+ } else {
+ /// This is the very last package. Can safely finish.
+ }
+ } else {
+ const nbostream & tmp = start->second.getHandle();
+ nbostream h(tmp.c_str(), tmp.size(), true);
+ LOG(debug, "Visit partial[%" PRIu64 ", %" PRIu64 "] (%zd, %zd, %zd)",
+ start->second.range().from(), start->second.range().to(), h.rp(), h.size(), h.capacity());
+ Packet newPacket(h.size());
+ for (; (h.size() > 0) && (r.from() < r.to()); ) {
+ Packet::Entry e;
+ e.deserialize(h);
+ if (r.from() < e.serial()) {
+ if (e.serial() <= r.to()) {
+ LOG(spam, "Adding serial #%" PRIu64 ", of type %d and size %zd into packet of size %zu and %zu bytes",
+ e.serial(), e.type(), e.data().size(), newPacket.size(), newPacket.sizeBytes());
+ if (newPacket.add(e)) {
+ r.from(e.serial());
+ } else {
+ throw runtime_error("Could not add entry to packet. Here is some mumbo jumbo. Fix.");
+ }
+ } else {
+ // Force breakout on visiting empty interval.
+ r.from(r.to());
+ }
+ }
+ }
+ newPacket.close();
+ packet = newPacket;
+ retval = next != _packets.end();
+ }
+ } else {
+ packet.close();
+ }
+ } else {
+ /// File has been closed must continue from file.
+ retval = true;
+ }
+ return retval;
+}
+
+
+bool
+DomainPart::visit(FastOS_FileInterface &file, SerialNumRange &r, Packet &packet)
+{
+ bool retval(true);
+ if ( ! file.IsOpened() ) {
+ retval = openAndFind(file, r.from() + 1);
+ }
+ if (retval) {
+ Packet newPacket;
+ vespalib::DefaultAlloc buf;
+ for (bool full(false);!full && retval && (r.from() < r.to());) {
+ Packet::Entry e;
+ int64_t fPos = file.GetPosition();
+ retval = read(file, e, buf, false);
+ if (retval &&
+ e.valid() &&
+ (r.from() < e.serial()) &&
+ (e.serial() <= r.to())) {
+ try {
+ full = addPacket(newPacket, e);
+ } catch (const std::exception & ex) {
+ throw runtime_error(make_string("%s : Failed creating packet for visit %s(%" PRIu64 ") at pos(%" PRIu64 ", %" PRIu64 ")",
+ ex.what(), file.GetFileName(), file.GetSize(), fPos, file.GetPosition()));
+ }
+ if ( !full ) {
+ r.from(e.serial());
+ } else {
+ if ( ! file.SetPosition(fPos) ) {
+ throw runtime_error(make_string("Failed setting read position for file '%s' of size %" PRId64 " from %" PRId64 " to %" PRId64 ".",
+ file.GetFileName(), file.GetSize(), file.GetPosition(), fPos));
+ }
+ }
+ }
+ }
+ newPacket.close();
+ packet = newPacket;
+ }
+
+ return retval;
+}
+
+void
+DomainPart::write(FastOS_FileInterface &file, const Packet::Entry &entry)
+{
+ int64_t lastKnownGoodPos(file.GetPosition());
+ int32_t crc(0);
+ uint32_t len(entry.serializedSize() + sizeof(crc));
+ nbostream os;
+ os << static_cast<uint8_t>(_defaultCrc);
+ os << len;
+ size_t start(os.size());
+ entry.serialize(os);
+ size_t end(os.size());
+ crc = calcCrc(_defaultCrc, os.c_str()+start, end - start);
+ os << crc;
+ size_t osSize = os.size();
+ assert(osSize == len + sizeof(len) + sizeof(uint8_t));
+
+ LockGuard guard(_writeLock);
+ if ( ! file.CheckedWrite(os.c_str(), osSize) ) {
+ throw runtime_error(handleWriteError("Failed writing the entry.", file, lastKnownGoodPos, entry, end - start));
+ }
+ _writtenSerial = entry.serial();
+ _byteSize.store(lastKnownGoodPos + osSize, std::memory_order_release);
+}
+
+bool
+DomainPart::read(FastOS_FileInterface &file,
+ Packet::Entry &entry,
+ vespalib::DefaultAlloc & buf,
+ bool allowTruncate)
+{
+ bool retval(true);
+ char tmp[5];
+ int64_t lastKnownGoodPos(file.GetPosition());
+ size_t rlen = file.Read(tmp, sizeof(tmp));
+ nbostream his(tmp, sizeof(tmp));
+ uint8_t version(-1);
+ uint32_t len(0);
+ his >> version >> len;
+ if ((retval = (rlen == sizeof(tmp)))) {
+ if ( ! (retval = (version == ccitt_crc32) || version == xxh64)) {
+ vespalib::string msg(make_string("Version mismatch. Expected 'ccitt_crc32=1' or 'xxh64=2',"
+ " got %d from '%s' at position %ld",
+ version, file.GetFileName(), lastKnownGoodPos));
+ if ((version == 0) && (len == 0) && tailOfFileIsZero(file, lastKnownGoodPos)) {
+ LOG(warning, "%s", msg.c_str());
+ return handleReadError("packet version", file, sizeof(tmp), rlen, lastKnownGoodPos, allowTruncate);
+ } else {
+ throw runtime_error(msg);
+ }
+ }
+ if (len > buf.size()) {
+ vespalib::DefaultAlloc(len).swap(buf);
+ }
+ rlen = file.Read(buf.get(), len);
+ retval = rlen == len;
+ if (!retval) {
+ retval = handleReadError("packet blob", file, len, rlen, lastKnownGoodPos, allowTruncate);
+ } else {
+ nbostream is(buf.get(), len, true);
+ entry.deserialize(is);
+ int32_t crc(0);
+ is >> crc;
+ int32_t crcVerify(calcCrc(static_cast<Crc>(version), buf.get(), len - sizeof(crc)));
+ if (crc != crcVerify) {
+ throw runtime_error(make_string("Got bad crc for packet from '%s' (len pos=%" PRId64 ", len=%d) : crcVerify = %d, expected %d",
+ file.GetFileName(), file.GetPosition() - len - sizeof(len),
+ static_cast<int>(len), static_cast<int>(crcVerify), static_cast<int>(crc)));
+ }
+ }
+ } else {
+ if (rlen == 0) {
+ // Eof
+ } else {
+ retval = handleReadError("packet length", file, sizeof(len), rlen, lastKnownGoodPos, allowTruncate);
+ }
+ }
+ return retval;
+}
+
+int32_t DomainPart::calcCrc(Crc version, const void * buf, size_t sz)
+{
+ if (version == xxh64) {
+ return static_cast<int32_t>(XXH64(buf, sz, 0ll));
+ } else if (version == ccitt_crc32) {
+ vespalib::crc_32_type calculator;
+ calculator.process_bytes(buf, sz);
+ return calculator.checksum();
+ } else {
+ assert(false);
+ }
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/domainpart.h b/searchlib/src/vespa/searchlib/transactionlog/domainpart.h
new file mode 100644
index 00000000000..04041a2cba0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/domainpart.h
@@ -0,0 +1,123 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "common.h"
+#include <map>
+#include <vector>
+#include <atomic>
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/util/sync.h>
+#include <vespa/vespalib/util/memory.h>
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+
+namespace transactionlog
+{
+
+class DomainPart {
+private:
+ DomainPart(const DomainPart &);
+ DomainPart& operator=(const DomainPart &);
+
+public:
+ enum Crc {
+ ccitt_crc32=1,
+ xxh64=2
+ };
+ typedef std::shared_ptr<DomainPart> SP;
+ DomainPart(const vespalib::string &name,
+ const vespalib::string &baseDir,
+ SerialNum s,
+ bool useFsync,
+ Crc defaultCrc,
+ const common::FileHeaderContext &FileHeaderContext,
+ bool allowTruncate);
+
+ ~DomainPart();
+
+ const vespalib::string &fileName() const { return _fileName; }
+ void commit(SerialNum firstSerial, const Packet &packet);
+ bool erase(SerialNum to);
+ bool visit(SerialNumRange &r, Packet &packet);
+ bool visit(FastOS_FileInterface &file, SerialNumRange &r, Packet &packet);
+ bool close();
+ void sync();
+ SerialNumRange range() const { return _range; }
+
+ SerialNum getSynced(void) const {
+ vespalib::LockGuard guard(_writeLock);
+ return _syncedSerial;
+ }
+
+ size_t size() const { return _sz; }
+ size_t byteSize() const {
+ return _byteSize.load(std::memory_order_acquire);
+ }
+ bool isClosed() const { return ! _transLog.IsOpened(); }
+private:
+ bool openAndFind(FastOS_FileInterface &file, const SerialNum &from);
+ int64_t buildPacketMapping(bool allowTruncate);
+
+ static bool
+ read(FastOS_FileInterface &file,
+ Packet::Entry &entry,
+ vespalib::DefaultAlloc &buf,
+ bool allowTruncate);
+
+ void write(FastOS_FileInterface &file, const Packet::Entry &entry);
+ static int32_t calcCrc(Crc crc, const void * buf, size_t len);
+ void writeHeader(const common::FileHeaderContext &fileHeaderContext);
+
+ class SkipInfo
+ {
+ public:
+ SkipInfo(SerialNum s, uint64_t p) :
+ _id(s),
+ _pos(p)
+ {
+ }
+
+ bool operator ==(const SkipInfo &b) const { return cmp(b) == 0; }
+ bool operator <(const SkipInfo &b) const { return cmp(b) < 0; }
+ bool operator >(const SkipInfo &b) const { return cmp(b) > 0; }
+ bool operator <=(const SkipInfo &b) const { return cmp(b) <= 0; }
+ bool operator >=(const SkipInfo &b) const { return cmp(b) >= 0; }
+ int64_t filePos() const { return _pos; }
+ SerialNum id() const { return _id; }
+ private:
+ int64_t cmp(const SkipInfo & b) const { return _id - b._id; }
+ SerialNum _id;
+ uint64_t _pos;
+ };
+ typedef std::vector<SkipInfo> SkipList;
+ typedef std::map<SerialNum, Packet> PacketList;
+ const Crc _defaultCrc;
+ const bool _useFsync;
+ vespalib::Lock _lock;
+ vespalib::Lock _fileLock;
+ SerialNumRange _range;
+ size_t _sz;
+ std::atomic<uint64_t> _byteSize;
+ PacketList _packets;
+ vespalib::string _fileName;
+ FastOS_File _transLog;
+ SkipList _skipList;
+ uint32_t _headerLen;
+ vespalib::Lock _writeLock;
+ // Protected by _writeLock
+ SerialNum _writtenSerial;
+ SerialNum _syncedSerial;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.cpp b/searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.cpp
new file mode 100644
index 00000000000..2b8e2935752
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.cpp
@@ -0,0 +1,28 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "nosyncproxy.h"
+
+namespace search
+{
+namespace transactionlog
+{
+
+NoSyncProxy::NoSyncProxy(void)
+{
+}
+
+
+NoSyncProxy::~NoSyncProxy(void)
+{
+}
+
+
+void
+NoSyncProxy::sync(SerialNum syncTo)
+{
+ (void) syncTo;
+}
+
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.h b/searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.h
new file mode 100644
index 00000000000..0c8faba2979
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "syncproxy.h"
+
+namespace search
+{
+namespace transactionlog
+{
+
+class NoSyncProxy : public SyncProxy
+{
+public:
+ NoSyncProxy(void);
+
+ virtual
+ ~NoSyncProxy(void);
+
+ virtual void
+ sync(SerialNum syncTo);
+};
+
+}
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/transactionlog/session.cpp b/searchlib/src/vespa/searchlib/transactionlog/session.cpp
new file mode 100644
index 00000000000..bbb786b25c3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/session.cpp
@@ -0,0 +1,275 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/transactionlog/session.h>
+#include <vespa/searchlib/transactionlog/domain.h>
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <vespa/vespalib/util/closuretask.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".transactionlog.session");
+
+using vespalib::LockGuard;
+
+namespace search {
+namespace transactionlog {
+
+namespace {
+ const double NEVER(-1.0);
+}
+
+vespalib::Executor::Task::UP
+Session::createTask(const Session::SP & session)
+{
+ if (session->continous()) {
+ return Task::UP(new SubscribeTask(session));
+ } else {
+ return Task::UP(new VisitTask(session));
+ }
+}
+
+void
+Session::SubscribeTask::run()
+{
+ _session->subscribe();
+}
+
+void
+Session::VisitTask::run()
+{
+ _session->visitOnly();
+}
+
+void
+Session::SendTask::run()
+{
+ _session->sendPending();
+}
+
+bool
+Session::inSync() const
+{
+ return _inSync;
+}
+
+void
+Session::visit()
+{
+ LOG(debug, "[%d] : Visiting %" PRIu64 " - %" PRIu64, _id, _range.from(), _range.to());
+ for (DomainPart::SP dpSafe = _domain->findPart(_range.from()); dpSafe.get() && (_range.from() < _range.to()) && (dpSafe.get()->range().from() <= _range.to()); dpSafe = _domain->findPart(_range.from())) {
+ // Must use findPart and iterate until no candidate parts found.
+ DomainPart * dp(dpSafe.get());
+ LOG(debug, "[%d] : Visiting the interval %" PRIu64 " - %" PRIu64 " in domain part [%" PRIu64 ", %" PRIu64 "]", _id, _range.from(), _range.to(), dp->range().from(), dp->range().to());
+ Fast_BufferedFile file;
+ file.EnableDirectIO();
+ for(bool more(true); ok() && more && (_range.from() < _range.to()); ) {
+ LOG(debug, "[%d] : Visiting the interval %" PRIu64 " - %" PRIu64 " in subpart", _id, _range.from(), _range.to());
+ Packet packet;
+ if (dp->isClosed()) {
+ more = dp->visit(file, _range, packet);
+ } else {
+ more = dp->visit(_range, packet);
+ }
+ if (packet.getHandle().size() > 0) {
+ LOG(debug, "[%d] : Sending the interval %" PRIu64 " - %" PRIu64 ". Packet : [%" PRIu64 ", %" PRIu64 "]", _id, _range.from(), _range.to(), packet.range().from(), packet.range().to());
+ send(packet);
+ }
+ }
+ // Nothing more in this DomainPart, force switch to next one.
+ if (_range.from() < dp->range().to()) {
+ _range.from(std::min(dp->range().to(), _range.to()));
+ }
+ }
+
+ LOG(debug, "[%d] : Done visiting, starting subscribe %" PRIu64 " - %" PRIu64, _id, _range.from(), _range.to());
+}
+
+void
+Session::visitOnly()
+{
+ visit();
+ sendDone();
+ finalize();
+}
+
+void
+Session::enQ(const SP & session, SerialNum serial, const Packet & packet)
+{
+ LockGuard guard(session->_lock);
+ session->_packetQ.push_back(QPacket(serial,packet));
+ if (session->_inSync) {
+ session->_domain->execute(Task::UP(new SendTask(session)));
+ }
+}
+
+void
+Session::subscribe()
+{
+ visit();
+ sendPending();
+ sendSync();
+}
+
+void
+Session::sendPending()
+{
+ for (;;) {
+ QPacket packet;
+ {
+ LockGuard guard(_lock);
+ if (_packetQ.empty() || !ok())
+ break;
+ packet = std::move(_packetQ.front());
+ _packetQ.pop_front();
+ }
+ sendPacket(packet._serial, *packet._packet);
+ }
+}
+
+void
+Session::sendPacket(SerialNum serial, const Packet & packet)
+{
+ if (_range.from() < serial) {
+ send(packet);
+ } else {
+ LOG(debug, "[%d] : Skipping %" PRIu64 ". Last sent is %" PRIu64, _id, serial, _range.from());
+ }
+}
+
+void
+Session::finalize()
+{
+ if (!ok()) {
+ LOG(error, "[%d] : Error in %s(%" PRIu64 " - %" PRIu64 "), stopping since I have no idea on what to do.", _id, (continous() ? "subscriber" : "visitor"), _range.from(), _range.to());
+ }
+ LOG(debug, "[%d] : Stopped %" PRIu64 " - %" PRIu64, _id, _range.from(), _range.to());
+ _finished = true;
+}
+
+int32_t
+Session::rpc(FRT_RPCRequest * req)
+{
+ int32_t retval(-7);
+ LOG(debug, "rpc %s starting.", req->GetMethodName());
+ FRT_Supervisor::InvokeSync(_supervisor.GetTransport(), _connection, req, NEVER);
+ if (req->GetErrorCode() == FRTE_NO_ERROR) {
+ retval = (req->GetReturn()->GetValue(0)._intval32);
+ LOG(debug, "rpc %s = %d\n", req->GetMethodName(), retval);
+ } else if (req->GetErrorCode() == FRTE_RPC_TIMEOUT) {
+ LOG(warning, "rpc %s timed out. Will allow to continue: error(%d): %s\n", req->GetMethodName(), req->GetErrorCode(), req->GetErrorMessage());
+ retval = -req->GetErrorCode();
+ } else {
+ if (req->GetErrorCode() != FRTE_RPC_CONNECTION) {
+ LOG(warning, "rpc %s: error(%d): %s\n", req->GetMethodName(), req->GetErrorCode(), req->GetErrorMessage());
+ }
+ retval = -req->GetErrorCode();
+ _ok = false;
+ }
+ return retval;
+}
+
+void
+Session::RequestDone(FRT_RPCRequest * req)
+{
+ _ok = (req->GetErrorCode() == FRTE_NO_ERROR);
+ if (req->GetErrorCode() != FRTE_NO_ERROR) {
+ LOG(warning, "rpcAsync failed %s: error(%d): %s\n", req->GetMethodName(), req->GetErrorCode(), req->GetErrorMessage());
+ } else {
+ int32_t retval = req->GetReturn()->GetValue(0)._intval32;
+ if (retval != RPC::OK) {
+ LOG(error, "Return value != OK in RequestDone for method '%s'", req->GetMethodName());
+ }
+ }
+ req->SubRef();
+}
+
+int32_t
+Session::rpcAsync(FRT_RPCRequest * req)
+{
+ int32_t retval(-7);
+ LOG(debug, "rpcAsync %s starting.", req->GetMethodName());
+ FRT_Supervisor::InvokeAsync(_supervisor.GetTransport(), _connection, req, NEVER, this);
+ if (ok()) {
+ LOG(debug, "rpcAsync %s OK", req->GetMethodName());
+ retval = 0;
+ } else {
+ LOG(warning, "rpcAsync %s FAILED", req->GetMethodName());
+ }
+ return retval;
+}
+
+Session::Session(int sId, const SerialNumRange & r, const Domain::SP & d,
+ FRT_Supervisor & supervisor, FNET_Connection *conn, bool subscriber) :
+ _supervisor(supervisor),
+ _connection(conn),
+ _domain(d),
+ _range(r),
+ _id(sId),
+ _subscriber(subscriber),
+ _inSync(false),
+ _ok(true),
+ _finished(false),
+ _packetQ()
+{
+ _connection->AddRef();
+}
+
+Session::~Session()
+{
+ _connection->SubRef();
+}
+
+bool
+Session::send(const Packet & packet)
+{
+ FRT_RPCRequest *req = _supervisor.AllocRPCRequest();
+ req->SetMethodName("visitCallback");
+ req->GetParams()->AddString(_domain->name().c_str());
+ req->GetParams()->AddInt32(id());
+ req->GetParams()->AddData(packet.getHandle().c_str(), packet.getHandle().size());
+ return send(req, true);
+}
+
+bool
+Session::send(FRT_RPCRequest * req, bool wait)
+{
+ int32_t retval(-1);
+ if (wait) {
+ retval = rpc(req);
+ if ( ! ((retval == RPC::OK) || (retval == FRTE_RPC_CONNECTION)) ) {
+ LOG(error, "Return value != OK(%d) in send for method 'visitCallback'.", retval);
+ }
+ req->SubRef();
+ } else {
+ retval = rpcAsync(req);
+ }
+ return (retval == RPC::OK);
+}
+
+bool
+Session::sendSync()
+{
+ FRT_RPCRequest *req = _supervisor.AllocRPCRequest();
+ req->SetMethodName("syncCallback");
+ req->GetParams()->AddString(_domain->name().c_str());
+ req->GetParams()->AddInt32(id());
+ bool retval(send(req, true));
+ LockGuard guard(_lock);
+ _inSync = true;
+ return retval;
+}
+
+bool
+Session::sendDone()
+{
+ FRT_RPCRequest *req = _supervisor.AllocRPCRequest();
+ req->SetMethodName("eofCallback");
+ req->GetParams()->AddString(_domain->name().c_str());
+ req->GetParams()->AddInt32(id());
+ bool retval(send(req, true));
+ LockGuard guard(_lock);
+ _inSync = true;
+ return retval;
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/session.h b/searchlib/src/vespa/searchlib/transactionlog/session.h
new file mode 100644
index 00000000000..69d22e69fc1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/session.h
@@ -0,0 +1,94 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "common.h"
+#include <vespa/fnet/frt/frt.h>
+#include <vespa/vespalib/util/executor.h>
+#include <vespa/vespalib/util/sync.h>
+#include <deque>
+
+namespace search {
+namespace transactionlog {
+
+class Domain;
+typedef std::shared_ptr<Domain> DomainSP;
+
+class Session : public FRT_IRequestWait,
+ public vespalib::noncopyable
+{
+private:
+ typedef vespalib::Executor::Task Task;
+
+public:
+ typedef std::shared_ptr<Session> SP;
+ Session(int sId, const SerialNumRange & r, const DomainSP & d, FRT_Supervisor & supervisor, FNET_Connection *conn, bool subscriber=false);
+ virtual ~Session();
+ const SerialNumRange & range() const { return _range; }
+ int id() const { return _id; }
+ bool inSync() const;
+ bool continous() const { return _subscriber; }
+ bool ok() const { return _ok; }
+ bool finished() const { return _finished || (_connection->GetState() != FNET_Connection::FNET_CONNECTED);}
+ static void enQ(const SP & session, SerialNum serial, const Packet & packet);
+ static Task::UP createTask(const Session::SP & session);
+private:
+ struct QPacket {
+ QPacket() : _serial(0), _packet() {}
+ QPacket(SerialNum s, const Packet & p)
+ : _serial(s),
+ _packet(new Packet(p))
+ {
+ }
+ SerialNum _serial;
+ std::unique_ptr<Packet> _packet;
+ };
+ class VisitTask : public Task {
+ public:
+ VisitTask(const Session::SP & session) : _session(session) { }
+ private:
+ virtual void run();
+ Session::SP _session;
+ };
+ class SubscribeTask : public Task {
+ public:
+ SubscribeTask(const Session::SP & session) : _session(session) { }
+ private:
+ virtual void run();
+ Session::SP _session;
+ };
+ class SendTask : public Task {
+ public:
+ SendTask(const Session::SP & session) : _session(session) { }
+ virtual void run();
+ private:
+ Session::SP _session;
+ };
+ bool send(FRT_RPCRequest * req, bool wait);
+ virtual void RequestDone(FRT_RPCRequest *req);
+ bool send(const Packet & packet);
+ void sendPacket(SerialNum serial, const Packet & packet);
+ bool sendDone();
+ bool sendSync();
+ void sendPending();
+ void visit();
+ void visitOnly();
+ void subscribe();
+ void finalize();
+ int32_t rpc(FRT_RPCRequest * req);
+ int32_t rpcAsync(FRT_RPCRequest * req);
+ FRT_Supervisor & _supervisor;
+ FNET_Connection * _connection;
+ DomainSP _domain;
+ SerialNumRange _range;
+ int _id;
+ bool _subscriber;
+ bool _inSync;
+ bool _ok;
+ bool _finished;
+ std::deque<QPacket> _packetQ;
+ vespalib::Lock _lock;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/transactionlog/syncproxy.h b/searchlib/src/vespa/searchlib/transactionlog/syncproxy.h
new file mode 100644
index 00000000000..baf533518e7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/syncproxy.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/common/serialnum.h>
+
+namespace search
+{
+namespace transactionlog
+{
+
+class SyncProxy
+{
+public:
+ virtual
+ ~SyncProxy(void)
+ {
+ }
+
+ virtual void
+ sync(SerialNum syncTo) = 0;
+};
+
+}
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.cpp b/searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.cpp
new file mode 100644
index 00000000000..49c16940be5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.cpp
@@ -0,0 +1,71 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "trans_log_server_explorer.h"
+#include "domain.h"
+#include <vespa/vespalib/data/slime/slime.h>
+
+using vespalib::slime::Inserter;
+using vespalib::slime::Cursor;
+
+namespace search {
+namespace transactionlog {
+
+namespace {
+
+struct DomainExplorer : vespalib::StateExplorer {
+ Domain::SP domain;
+ DomainExplorer(Domain::SP domain_in) : domain(std::move(domain_in)) {}
+ virtual void get_state(const Inserter &inserter, bool full) const override {
+ Cursor &state = inserter.insertObject();
+ DomainInfo info = domain->getDomainInfo();
+ state.setLong("from", info.range.from());
+ state.setLong("to", info.range.to());
+ state.setLong("count", info.count);
+ state.setLong("byteSize", info.byteSize);
+ if (full) {
+ Cursor &array = state.setArray("parts");
+ for (const PartInfo &part_in: info.parts) {
+ Cursor &part = array.addObject();
+ part.setLong("from", part_in.range.from());
+ part.setLong("to", part_in.range.to());
+ part.setLong("count", part_in.count);
+ part.setLong("byteSize", part_in.byteSize);
+ part.setString("file", part_in.file);
+ {
+ FastOS_StatInfo stat_info;
+ FastOS_File::Stat(part_in.file.c_str(), &stat_info);
+ part.setString("lastModified", fastos::TimeStamp::asString(stat_info._modifiedTime));
+ }
+ }
+ }
+ }
+};
+
+} // namespace search::transactionlog::<unnamed>
+
+void
+TransLogServerExplorer::get_state(const Inserter &inserter, bool full) const
+{
+ (void) full;
+ inserter.insertObject();
+}
+
+std::vector<vespalib::string>
+TransLogServerExplorer::get_children_names() const
+{
+ return _server->getDomainNames();
+}
+
+std::unique_ptr<vespalib::StateExplorer>
+TransLogServerExplorer::get_child(vespalib::stringref name) const
+{
+ Domain::SP domain = _server->findDomain(name);
+ if (!domain) {
+ return std::unique_ptr<vespalib::StateExplorer>(nullptr);
+ }
+ return std::unique_ptr<vespalib::StateExplorer>(new DomainExplorer(std::move(domain)));
+}
+
+} // namespace search::transactionlog
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.h b/searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.h
new file mode 100644
index 00000000000..8d3f7080385
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "translogserver.h"
+#include <vespa/vespalib/net/state_explorer.h>
+
+namespace search {
+namespace transactionlog {
+
+/**
+ * Class used to explore the state of a transaction log server.
+ */
+class TransLogServerExplorer : public vespalib::StateExplorer
+{
+private:
+ TransLogServer::SP _server;
+
+public:
+ TransLogServerExplorer(TransLogServer::SP server) : _server(std::move(server)) {}
+ virtual void get_state(const vespalib::slime::Inserter &inserter, bool full) const override;
+ virtual std::vector<vespalib::string> get_children_names() const override;
+ virtual std::unique_ptr<StateExplorer> get_child(vespalib::stringref name) const override;
+};
+
+} // namespace search::transactionlog
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/transactionlog/translogclient.cpp b/searchlib/src/vespa/searchlib/transactionlog/translogclient.cpp
new file mode 100644
index 00000000000..47a2897fba6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/translogclient.cpp
@@ -0,0 +1,402 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/transactionlog/translogclient.h>
+#include <vespa/log/log.h>
+#include <stdexcept>
+
+LOG_SETUP(".translogclient");
+
+namespace search {
+namespace transactionlog {
+
+namespace {
+ const double NEVER(-1.0);
+}
+
+using vespalib::LockGuard;
+
+TransLogClient::TransLogClient(const vespalib::string & rpcTarget) :
+ _rpcTarget(rpcTarget),
+ _sessions(),
+ _supervisor(),
+ _target(NULL)
+{
+ reconnect();
+ exportRPC(_supervisor);
+ _supervisor.Start();
+}
+
+TransLogClient::~TransLogClient()
+{
+ disconnect();
+ _supervisor.ShutDown(true);
+}
+
+bool TransLogClient::reconnect()
+{
+ disconnect();
+ _target = _supervisor.Get2WayTarget(_rpcTarget.c_str());
+ return isConnected();
+}
+
+void TransLogClient::disconnect()
+{
+ if (_target) {
+ _target->SubRef();
+ }
+}
+
+bool TransLogClient::create(const vespalib::string & domain)
+{
+ FRT_RPCRequest *req = _supervisor.AllocRPCRequest();
+ req->SetMethodName("createDomain");
+ req->GetParams()->AddString(domain.c_str());
+ int32_t retval(rpc(req));
+ req->SubRef();
+ return (retval == 0);
+}
+
+bool TransLogClient::remove(const vespalib::string & domain)
+{
+ FRT_RPCRequest *req = _supervisor.AllocRPCRequest();
+ req->SetMethodName("deleteDomain");
+ req->GetParams()->AddString(domain.c_str());
+ int32_t retval(rpc(req));
+ req->SubRef();
+ return (retval == 0);
+}
+
+TransLogClient::Session::UP TransLogClient::open(const vespalib::string & domain)
+{
+ Session::UP session;
+ FRT_RPCRequest *req = _supervisor.AllocRPCRequest();
+ req->SetMethodName("openDomain");
+ req->GetParams()->AddString(domain.c_str());
+ int32_t retval(rpc(req));
+ if (retval == 0) {
+ session.reset(new Session(domain, *this));
+ }
+ req->SubRef();
+ return session;
+}
+
+TransLogClient::Subscriber::UP TransLogClient::createSubscriber(const vespalib::string & domain, TransLogClient::Session::Callback & callBack)
+{
+ return TransLogClient::Subscriber::UP(new Subscriber(domain, *this, callBack));
+}
+
+TransLogClient::Visitor::UP TransLogClient::createVisitor(const vespalib::string & domain, TransLogClient::Session::Callback & callBack)
+{
+ return TransLogClient::Visitor::UP(new Visitor(domain, *this, callBack));
+}
+
+bool TransLogClient::listDomains(std::vector<vespalib::string> & dir)
+{
+ FRT_RPCRequest *req = _supervisor.AllocRPCRequest();
+ req->SetMethodName("listDomains");
+ int32_t retval(rpc(req));
+ if (retval == 0) {
+ char * s = req->GetReturn()->GetValue(1)._string._str;
+ for (const char * d(strsep(&s, "\n")); d && (*d != '\0'); d = strsep(&s, "\n")) {
+ dir.push_back(d);
+ }
+ }
+ req->SubRef();
+ return (retval == 0);
+}
+
+int32_t TransLogClient::rpc(FRT_RPCRequest * req)
+{
+ int32_t retval(-7);
+ if (_target) {
+ _target->InvokeSync(req, NEVER);
+ if (req->GetErrorCode() == FRTE_NO_ERROR) {
+ retval = (req->GetReturn()->GetValue(0)._intval32);
+ LOG(debug, "rpc %s = %d", req->GetMethodName(), retval);
+ } else {
+ LOG(warning, "%s: error(%d): %s", req->GetMethodName(), req->GetErrorCode(), req->GetErrorMessage());
+ }
+ } else {
+ retval = -6;
+ }
+ return retval;
+}
+
+TransLogClient::Session * TransLogClient::findSession(const vespalib::string & domainName, int sessionId)
+{
+ SessionKey key(domainName, sessionId);
+ SessionMap::iterator found(_sessions.find(key));
+ Session * session((found != _sessions.end()) ? found->second : NULL);
+ return session;
+}
+
+void TransLogClient::exportRPC(FRT_Supervisor & supervisor)
+{
+ FRT_ReflectionBuilder rb( & supervisor);
+
+ //-- Visit Callbacks -----------------------------------------------------------
+ rb.DefineMethod("visitCallback", "six", "i", false, FRT_METHOD(TransLogClient::visitCallbackRPC), this);
+ rb.MethodDesc("Will return data asked from a subscriber/visitor.");
+ rb.ParamDesc("name", "The name of the domain.");
+ rb.ParamDesc("session", "Session handle.");
+ rb.ParamDesc("packet", "The data packet.");
+ rb.ReturnDesc("result", "A resultcode(int) of the operation. Non zero number indicates error.");
+
+ //-- Visit Callbacks -----------------------------------------------------------
+ rb.DefineMethod("syncCallback", "si", "i", false, FRT_METHOD(TransLogClient::syncCallbackRPC), this);
+ rb.MethodDesc("Will tell you that now you are uptodate on the subscribtion.");
+ rb.ParamDesc("name", "The name of the domain.");
+ rb.ParamDesc("session", "Session handle.");
+ rb.ReturnDesc("result", "A resultcode(int) of the operation. Non zero number indicates error.");
+
+ //-- Visit Callbacks -----------------------------------------------------------
+ rb.DefineMethod("eofCallback", "si", "i", false, FRT_METHOD(TransLogClient::eofCallbackRPC), this);
+ rb.MethodDesc("Will tell you that you are done with the visitor.");
+ rb.ParamDesc("name", "The name of the domain.");
+ rb.ParamDesc("session", "Session handle.");
+ rb.ReturnDesc("result", "A resultcode(int) of the operation. Non zero number indicates error.");
+}
+
+void TransLogClient::visitCallbackRPC(FRT_RPCRequest *req)
+{
+ uint32_t retval(uint32_t(-1));
+ FRT_Values & params = *req->GetParams();
+ FRT_Values & ret = *req->GetReturn();
+ const char * domainName = params[0]._string._str;
+ int32_t sessionId(params[1]._intval32);
+ LOG(spam, "visitCallback(%s, %d)(%d)", domainName, sessionId, params[2]._data._len);
+ Session * session(findSession(domainName, sessionId));
+ if (session != NULL) {
+ Packet packet(params[2]._data._buf, params[2]._data._len);
+ retval = session->visit(packet);
+ }
+ ret.AddInt32(retval);
+ LOG(debug, "visitCallback(%s, %d)=%d done", domainName, sessionId, retval);
+}
+
+void TransLogClient::syncCallbackRPC(FRT_RPCRequest *req)
+{
+ uint32_t retval(uint32_t(-1));
+ FRT_Values & params = *req->GetParams();
+ FRT_Values & ret = *req->GetReturn();
+ const char * domainName = params[0]._string._str;
+ int32_t sessionId(params[1]._intval32);
+ LOG(debug, "syncCallback(%s, %d)", domainName, sessionId);
+ LockGuard guard(_lock);
+ Session * session(findSession(domainName, sessionId));
+ if (session != NULL) {
+ session->inSync();
+ retval = 0;
+ }
+ ret.AddInt32(retval);
+ LOG(debug, "syncCallback(%s, %d)=%d done", domainName, sessionId, retval);
+}
+
+void TransLogClient::eofCallbackRPC(FRT_RPCRequest *req)
+{
+ uint32_t retval(uint32_t(-1));
+ FRT_Values & params = *req->GetParams();
+ FRT_Values & ret = *req->GetReturn();
+ const char * domainName = params[0]._string._str;
+ int32_t sessionId(params[1]._intval32);
+ LOG(debug, "eofCallback(%s, %d)", domainName, sessionId);
+ Session * session(findSession(domainName, sessionId));
+ if (session != NULL) {
+ session->eof();
+ retval = 0;
+ }
+ ret.AddInt32(retval);
+ LOG(debug, "eofCallback(%s, %d)=%d done", domainName, sessionId, retval);
+}
+
+
+TransLogClient::Session::Session(const vespalib::string & domain, TransLogClient & tlc) :
+ _tlc(tlc),
+ _domain(domain),
+ _sessionId(0)
+{
+}
+
+TransLogClient::Session::~Session()
+{
+ close();
+ clear();
+}
+
+bool TransLogClient::Session::commit(const vespalib::ConstBufferRef & buf)
+{
+ bool retval(true);
+ if (buf.size() != 0) {
+ FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest();
+ req->SetMethodName("domainCommit");
+ req->GetParams()->AddString(_domain.c_str());
+ req->GetParams()->AddData(buf.c_str(), buf.size());
+ int retcode = _tlc.rpc(req);
+ retval = (retcode == 0);
+ if (retval) {
+ req->SubRef();
+ } else {
+ vespalib::string msg;
+ if (req->GetReturn() != 0) {
+ msg = req->GetReturn()->GetValue(1)._string._str;
+ } else {
+ msg = vespalib::make_string("Clientside error %s: error(%d): %s", req->GetMethodName(), req->GetErrorCode(), req->GetErrorMessage());
+ }
+ req->SubRef();
+ throw std::runtime_error(vespalib::make_string("commit failed with code %d. server says: %s", retcode, msg.c_str()));
+ }
+ }
+ return retval;
+}
+
+bool TransLogClient::Session::status(SerialNum & b, SerialNum & e, size_t & count)
+{
+ FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest();
+ req->SetMethodName("domainStatus");
+ req->GetParams()->AddString(_domain.c_str());
+ int32_t retval(_tlc.rpc(req));
+ if (retval == 0) {
+ b = req->GetReturn()->GetValue(1)._intval64;
+ e = req->GetReturn()->GetValue(2)._intval64;
+ count = req->GetReturn()->GetValue(3)._intval64;
+ }
+ req->SubRef();
+ return (retval == 0);
+}
+
+bool TransLogClient::Session::erase(const SerialNum & to)
+{
+ FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest();
+ req->SetMethodName("domainPrune");
+ req->GetParams()->AddString(_domain.c_str());
+ req->GetParams()->AddInt64(to);
+ int32_t retval(_tlc.rpc(req));
+ req->SubRef();
+ if (retval == 1) {
+ LOG(warning, "Prune to %" PRIu64 " denied since there were active visitors in that area", to);
+ }
+ return (retval == 0);
+}
+
+
+bool
+TransLogClient::Session::sync(const SerialNum &syncTo, SerialNum &syncedTo)
+{
+ FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest();
+ req->SetMethodName("domainSync");
+ FRT_Values & params = *req->GetParams();
+ params.AddString(_domain.c_str());
+ params.AddInt64(syncTo);
+ int32_t retval(_tlc.rpc(req));
+ if (retval == 0) {
+ syncedTo = req->GetReturn()->GetValue(1)._intval64;
+ }
+ req->SubRef();
+ return (retval == 0);
+}
+
+
+void TransLogClient::Session::clear()
+{
+ if (_sessionId > 0) {
+ LockGuard guard(_tlc._lock);
+ _tlc._sessions.erase(SessionKey(_domain, _sessionId));
+ }
+ _sessionId = 0;
+}
+
+int TransLogClient::SessionKey::cmp(const TransLogClient::SessionKey & b) const
+{
+ int diff(strcmp(_domain.c_str(), b._domain.c_str()));
+ if (diff == 0) {
+ diff = _sessionId - b._sessionId;
+ }
+ return diff;
+}
+
+TransLogClient::Subscriber::Subscriber(const vespalib::string & domain, TransLogClient & tlc, Callback & callBack) :
+ Session(domain, tlc),
+ _callback(callBack)
+{
+}
+
+TransLogClient::Subscriber::~Subscriber()
+{
+}
+
+TransLogClient::Visitor::Visitor(const vespalib::string & domain, TransLogClient & tlc, Callback & callBack) :
+ Subscriber(domain, tlc, callBack)
+{
+}
+
+bool TransLogClient::Session::init(FRT_RPCRequest *req)
+{
+ int32_t retval(_tlc.rpc(req));
+ req->SubRef();
+ if (retval > 0) {
+ clear();
+ _sessionId = retval;
+ SessionKey key(_domain, _sessionId);
+ {
+ LockGuard guard(_tlc._lock);
+ _tlc._sessions[key] = this;
+ }
+ retval = run();
+ }
+ return (retval > 0);
+}
+
+bool TransLogClient::Visitor::visit(const SerialNum & from, const SerialNum & to)
+{
+ FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest();
+ req->SetMethodName("domainVisit");
+ req->GetParams()->AddString(_domain.c_str());
+ req->GetParams()->AddInt64(from);
+ req->GetParams()->AddInt64(to);
+ return init(req);
+}
+
+bool TransLogClient::Subscriber::subscribe(const SerialNum & from)
+{
+ FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest();
+ req->SetMethodName("domainSubscribe");
+ req->GetParams()->AddString(_domain.c_str());
+ req->GetParams()->AddInt64(from);
+ return init(req);
+}
+
+bool TransLogClient::Session::run()
+{
+ FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest();
+ req->SetMethodName("domainSessionRun");
+ req->GetParams()->AddString(_domain.c_str());
+ req->GetParams()->AddInt32(_sessionId);
+ int32_t retval(_tlc.rpc(req));
+ req->SubRef();
+ return (retval == 0);
+}
+
+bool TransLogClient::Session::close()
+{
+ int retval(0);
+ if (_sessionId > 0) {
+ do {
+ FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest();
+ req->SetMethodName("domainSessionClose");
+ req->GetParams()->AddString(_domain.c_str());
+ req->GetParams()->AddInt32(_sessionId);
+ if ( (retval = _tlc.rpc(req)) > 0) {
+ FastOS_Thread::Sleep(10);
+ }
+ req->SubRef();
+ } while ( retval == 1 );
+ }
+ return (retval == 0);
+}
+
+TransLogClient::Visitor::~Visitor()
+{
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/translogclient.h b/searchlib/src/vespa/searchlib/transactionlog/translogclient.h
new file mode 100644
index 00000000000..702a7cd260f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/translogclient.h
@@ -0,0 +1,140 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "common.h"
+#include <vespa/document/util/bytebuffer.h>
+#include <vespa/fnet/frt/frt.h>
+#include <map>
+#include <vector>
+#include <vespa/vespalib/util/sync.h>
+#include <vespa/vespalib/util/buffer.h>
+
+namespace search {
+namespace transactionlog {
+
+class TransLogClient : private FRT_Invokable
+{
+private:
+ TransLogClient(const TransLogClient &);
+ TransLogClient& operator=(const TransLogClient &);
+
+public:
+ class Session
+ {
+ public:
+ class Callback {
+ public:
+ virtual ~Callback() { }
+ virtual RPC::Result receive(const Packet & packet) = 0;
+ virtual void inSync() { }
+ virtual void eof() { }
+ };
+ public:
+ typedef std::unique_ptr<Session> UP;
+ typedef std::shared_ptr<Session> SP;
+
+ Session(const vespalib::string & domain, TransLogClient & tlc);
+ virtual ~Session();
+ /// You can commit data of any registered type to any channel.
+ bool commit(const vespalib::ConstBufferRef & packet);
+ /// Will erase all entries prior to <to>
+ bool erase(const SerialNum & to);
+ bool status(SerialNum & b, SerialNum & e, size_t & count);
+
+ bool sync(const SerialNum &syncTo, SerialNum &syncedTo);
+
+ virtual RPC::Result visit(const Packet & ) { return RPC::OK; }
+ virtual void inSync() { }
+ virtual void eof() { }
+ bool close();
+ void clear();
+ const vespalib::string & getDomain() const { return _domain; }
+ const TransLogClient & getTLC() const { return _tlc; }
+ protected:
+ bool init(FRT_RPCRequest * req);
+ bool run();
+ TransLogClient & _tlc;
+ vespalib::string _domain;
+ int _sessionId;
+ };
+ /// Here you connect to the incomming data getting everything from <from>
+ class Subscriber : public Session
+ {
+ public:
+ typedef std::unique_ptr<Subscriber> UP;
+ typedef std::shared_ptr<Subscriber> SP;
+
+ Subscriber(const vespalib::string & domain, TransLogClient & tlc, Callback & callBack);
+ bool subscribe(const SerialNum & from);
+ virtual ~Subscriber();
+ virtual RPC::Result visit(const Packet & packet) { return _callback.receive(packet); }
+ virtual void inSync() { _callback.inSync(); }
+ virtual void eof() { _callback.eof(); }
+ private:
+ Callback & _callback;
+ };
+ /// Here you read the incomming data getting everything from <from>
+ class Visitor : public Subscriber
+ {
+ public:
+ typedef std::unique_ptr<Visitor> UP;
+ typedef std::shared_ptr<Visitor> SP;
+
+ Visitor(const vespalib::string & domain, TransLogClient & tlc, Callback & callBack);
+ bool visit(const SerialNum & from, const SerialNum & to);
+ virtual ~Visitor();
+ };
+public:
+ typedef std::unique_ptr<TransLogClient> UP;
+
+ TransLogClient(const vespalib::string & rpctarget);
+ virtual ~TransLogClient();
+
+ /// Here you create a new domain
+ bool create(const vespalib::string & domain);
+ /// Here you remove a domain
+ bool remove(const vespalib::string & domain);
+ /// Here you open an existing domain
+ Session::UP open(const vespalib::string & domain);
+ /// Here you can get a list of available domains.
+ bool listDomains(std::vector<vespalib::string> & dir);
+ /// Here you get a subscriber
+ Subscriber::UP createSubscriber(const vespalib::string & domain, Session::Callback & callBack);
+ Visitor::UP createVisitor(const vespalib::string & domain, Session::Callback & callBack);
+
+ bool isConnected() const { return (_target != NULL) && _target->IsValid(); }
+ void disconnect();
+ bool reconnect();
+ const vespalib::string &getRPCTarget() const { return _rpcTarget; }
+private:
+ void exportRPC(FRT_Supervisor & supervisor);
+ void visitCallbackRPC(FRT_RPCRequest *req);
+ void syncCallbackRPC(FRT_RPCRequest *req);
+ void eofCallbackRPC(FRT_RPCRequest *req);
+ int32_t rpc(FRT_RPCRequest * req);
+ Session * findSession(const vespalib::string & domain, int sessionId);
+
+ class SessionKey
+ {
+ public:
+ SessionKey(const vespalib::string & domain, int sessionId) : _domain(domain), _sessionId(sessionId) { }
+ bool operator < (const SessionKey & b) const { return cmp(b) < 0; }
+ private:
+ int cmp(const SessionKey & b) const;
+ vespalib::string _domain;
+ int _sessionId;
+ };
+
+ typedef std::map< SessionKey, Session * > SessionMap;
+
+ vespalib::string _rpcTarget;
+ SessionMap _sessions;
+ //Brute force lock for subscriptions. For multithread safety.
+ vespalib::Lock _lock;
+ FRT_Supervisor _supervisor;
+ FRT_Target * _target;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp b/searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp
new file mode 100644
index 00000000000..79b7413c1b4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp
@@ -0,0 +1,672 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/transactionlog/translogserver.h>
+#include <fstream>
+#include <vespa/vespalib/util/vstringfmt.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <stdexcept>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/io/fileutil.h>
+
+LOG_SETUP(".transactionlog.server");
+
+using vespalib::make_string;
+using vespalib::stringref;
+using vespalib::make_vespa_string;
+using vespalib::IllegalArgumentException;
+using search::common::FileHeaderContext;
+
+namespace search
+{
+
+namespace transactionlog
+{
+
+namespace
+{
+
+class SyncHandler : public FNET_Task
+{
+ FRT_RPCRequest & _req;
+ Domain::SP _domain;
+ TransLogServer::Session::SP _session;
+ SerialNum _syncTo;
+
+public:
+ SyncHandler(FRT_Supervisor *supervisor,
+ FRT_RPCRequest *req,const Domain::SP &domain,
+ const TransLogServer::Session::SP &session,
+ SerialNum syncTo);
+
+ ~SyncHandler(void);
+ void PerformTask(void) override;
+};
+
+
+SyncHandler::SyncHandler(FRT_Supervisor *supervisor,
+ FRT_RPCRequest *req,
+ const Domain::SP &domain,
+ const TransLogServer::Session::SP &session,
+ SerialNum syncTo)
+ : FNET_Task(supervisor->GetScheduler()),
+ _req(*req),
+ _domain(domain),
+ _session(session),
+ _syncTo(syncTo)
+{
+}
+
+
+SyncHandler::~SyncHandler(void)
+{
+}
+
+
+void
+SyncHandler::PerformTask(void)
+{
+ SerialNum synced(_domain->getSynced());
+ if (_session->getDown() ||
+ _domain->getMarkedDeleted() ||
+ synced >= _syncTo) {
+ FRT_Values &rvals = *_req.GetReturn();
+ rvals.AddInt32(0);
+ rvals.AddInt64(synced);
+ _req.Return();
+ delete this;
+ } else {
+ _domain->triggerSyncNow();
+ Schedule(0.05); // Retry in 0.05 seconds
+ }
+}
+
+}
+
+
+
+TransLogServer::TransLogServer(const vespalib::string &name,
+ int listenPort,
+ const vespalib::string &baseDir,
+ const FileHeaderContext &fileHeaderContext,
+ uint64_t domainPartSize,
+ bool useFsync,
+ size_t maxThreads,
+ DomainPart::Crc defaultCrcType)
+ : FRT_Invokable(),
+ _name(name),
+ _baseDir(baseDir),
+ _domainPartSize(domainPartSize),
+ _useFsync(useFsync),
+ _defaultCrcType(defaultCrcType),
+ _executor(maxThreads, 128*1024),
+ _threadPool(8192, 1),
+ _supervisor(),
+ _domains(),
+ _reqQ(),
+ _fileHeaderContext(fileHeaderContext)
+{
+ int retval(0);
+ if ((retval = makeDirectory(_baseDir.c_str())) == 0) {
+ if ((retval = makeDirectory(dir().c_str())) == 0) {
+ std::ifstream domainDir(domainList().c_str());
+ while (domainDir.good() && !domainDir.eof()) {
+ vespalib::string domainName;
+ domainDir >> domainName;
+ if ( ! domainName.empty()) {
+ try {
+ Domain::SP domain(new Domain(domainName,
+ dir(),
+ _executor,
+ _domainPartSize,
+ _useFsync,
+ _defaultCrcType,
+ _fileHeaderContext));
+ _domains[domain->name()] = domain;
+ } catch (const std::exception & e) {
+ LOG(warning, "Failed creating %s domain on startup. Exception = %s", domainName.c_str(), e.what());
+ }
+ }
+ }
+ exportRPC(_supervisor);
+ char listenSpec[32];
+ sprintf(listenSpec, "tcp/%d", listenPort);
+ bool listenOk(false);
+ for (int i(600); !listenOk && i; i--) {
+ if (_supervisor.Listen(listenSpec)) {
+ _supervisor.Start();
+ listenOk = true;
+ } else {
+ LOG(warning, "Failed listening at port %s trying for %d seconds more.", listenSpec, i);
+ FastOS_Thread::Sleep(1000);
+ }
+ }
+ if ( ! listenOk ) {
+ throw std::runtime_error(make_string("Failed listening at port %s. Giving up. Requires manual intervention.", listenSpec));
+ }
+ } else {
+ throw std::runtime_error(make_string("Failed creating tls dir %s r(%d), e(%d). Requires manual intervention.", dir().c_str(), retval, errno));
+ }
+ } else {
+ throw std::runtime_error(make_string("Failed creating tls base dir %s r(%d), e(%d). Requires manual intervention.", _baseDir.c_str(), retval, errno));
+ }
+ start(_threadPool);
+}
+
+TransLogServer::~TransLogServer()
+{
+ stop();
+ join();
+ _supervisor.ShutDown(true);
+}
+
+bool TransLogServer::onStop()
+{
+ LOG(info, "Stopping TLS");
+ _reqQ.push(NULL);
+ return true;
+}
+
+void TransLogServer::run()
+{
+ FRT_RPCRequest *req(NULL);
+ bool hasPacket(false);
+ logMetric();
+ do {
+ for (req = NULL; (hasPacket = _reqQ.pop(req, 60000)) && (req != NULL); req = NULL) {
+ bool immediate = true;
+ if (strcmp(req->GetMethodName(), "domainSessionClose") == 0) {
+ domainSessionClose(req);
+ } else if (strcmp(req->GetMethodName(), "domainSubscribe") == 0) {
+ domainSubscribe(req);
+ } else if (strcmp(req->GetMethodName(), "domainVisit") == 0) {
+ domainVisit(req);
+ } else if (strcmp(req->GetMethodName(), "createDomain") == 0) {
+ createDomain(req);
+ } else if (strcmp(req->GetMethodName(), "deleteDomain") == 0) {
+ deleteDomain(req);
+ } else if (strcmp(req->GetMethodName(), "openDomain") == 0) {
+ openDomain(req);
+ } else if (strcmp(req->GetMethodName(), "listDomains") == 0) {
+ listDomains(req);
+ } else if (strcmp(req->GetMethodName(), "domainStatus") == 0) {
+ domainStatus(req);
+ } else if (strcmp(req->GetMethodName(), "domainCommit") == 0) {
+ domainCommit(req);
+ } else if (strcmp(req->GetMethodName(), "domainPrune") == 0) {
+ domainPrune(req);
+ } else if (strcmp(req->GetMethodName(), "domainSessionRun") == 0) {
+ domainSessionRun(req);
+ } else if (strcmp(req->GetMethodName(), "domainSync") == 0) {
+ immediate = false;
+ domainSync(req);
+ } else {
+ LOG(warning, "Received unknown RPC command %s", req->GetMethodName());
+ }
+ if (immediate) {
+ req->Return();
+ }
+ }
+ logMetric();
+ } while (running() && !(hasPacket && (req == NULL)));
+ LOG(info, "TLS Stopped");
+}
+
+void TransLogServer::logMetric() const
+{
+ Guard domainGuard(_lock);
+ for (DomainList::const_iterator it(_domains.begin()), mt(_domains.end()); it != mt; it++) {
+ vespalib::string prefix("translogserver." + it->first + ".serialnum.");
+ EV_COUNT((prefix + "last").c_str(), it->second->end());
+ EV_COUNT((prefix + "first").c_str(), it->second->begin());
+ EV_VALUE((prefix + "numused").c_str(), it->second->size());
+ EV_COUNT((prefix + "count").c_str(), it->second->count());
+ }
+}
+
+DomainStats
+TransLogServer::getDomainStats() const
+{
+ DomainStats retval;
+ Guard domainGuard(_lock);
+ for (const auto &elem : _domains) {
+ retval[elem.first] = elem.second->getDomainInfo();
+ }
+ return retval;
+}
+
+std::vector<vespalib::string>
+TransLogServer::getDomainNames()
+{
+ std::vector<vespalib::string> names;
+ Guard guard(_lock);
+ for(const auto &domain: _domains) {
+ names.push_back(domain.first);
+ }
+ return names;
+}
+
+Domain::SP
+TransLogServer::findDomain(const stringref &domainName)
+{
+ Guard domainGuard(_lock);
+ Domain::SP domain;
+ DomainList::iterator found(_domains.find(domainName));
+ if (found != _domains.end()) {
+ domain = found->second;
+ }
+ return domain;
+}
+
+void TransLogServer::exportRPC(FRT_Supervisor & supervisor)
+{
+ _supervisor.SetSessionInitHook(FRT_METHOD(TransLogServer::initSession), this);
+ _supervisor.SetSessionFiniHook(FRT_METHOD(TransLogServer::finiSession), this);
+ _supervisor.SetSessionDownHook(FRT_METHOD(TransLogServer::downSession), this);
+ FRT_ReflectionBuilder rb( & supervisor);
+
+ //-- Create Domain -----------------------------------------------------------
+ rb.DefineMethod("createDomain", "s", "i", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this);
+ rb.MethodDesc("Create a new domain.");
+ rb.ParamDesc("name", "The name of the domain.");
+ rb.ReturnDesc("handle", "A handle(int) to the domain. Negative number indicates error.");
+
+ //-- Delete Domain -----------------------------------------------------------
+ rb.DefineMethod("deleteDomain", "s", "is", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this);
+ rb.MethodDesc("Create a new domain.");
+ rb.ParamDesc("name", "The name of the domain.");
+ rb.ReturnDesc("retval", "0 on success. Negative number indicates error.");
+ rb.ReturnDesc("errormsg", "Message describing the error, if any.");
+
+ //-- Open Domain -----------------------------------------------------------
+ rb.DefineMethod("openDomain", "s", "i", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this);
+ rb.MethodDesc("Open an existing domain.");
+ rb.ParamDesc("name", "The name of the domain.");
+ rb.ReturnDesc("handle", "A handle(int) to the domain. Negative number indicates error.");
+
+ //-- List Domains -----------------------------------------------------------
+ rb.DefineMethod("listDomains", "", "is", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this);
+ rb.MethodDesc("Will return a list of all the domains.");
+ rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error.");
+ rb.ReturnDesc("domains", "List of all the domains in a newline separated string");
+
+ //-- Domain Status -----------------------------------------------------------
+ rb.DefineMethod("domainStatus", "s", "illl", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this);
+ rb.MethodDesc("This will return key status information about the domain.");
+ rb.ParamDesc("name", "The name of the domain.");
+ rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error.");
+ rb.ReturnDesc("begin", "The id of the first element in the log.");
+ rb.ReturnDesc("end", "The id of the last element in the log.");
+ rb.ReturnDesc("size", "Number of elements in the log.");
+
+ //-- Domain Commit -----------------------------------------------------------
+ rb.DefineMethod("domainCommit", "sx", "is", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this);
+ rb.MethodDesc("Will commit the data to the log.");
+ rb.ParamDesc("name", "The name of the domain.");
+ rb.ParamDesc("packet", "The data to commit to the domain.");
+ rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error.");
+ rb.ReturnDesc("message", "A textual description of the result code.");
+
+ //-- Domain Prune -----------------------------------------------------------
+ rb.DefineMethod("domainPrune", "sl", "i", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this);
+ rb.MethodDesc("Will erase all operations prior to the serial number.");
+ rb.ParamDesc("name", "The name of the domain.");
+ rb.ParamDesc("to", "Will erase all up and including.");
+ rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error.");
+
+ //-- Domain Subscribe -----------------------------------------------------------
+ rb.DefineMethod("domainSubscribe", "sl", "i", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this);
+ rb.MethodDesc("This will create a subscription. It will live till the connection is closed.");
+ rb.ParamDesc("name", "The name of the domain.");
+ rb.ParamDesc("from", "Will return all entries following(not including) <from>.");
+ rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error. Positive number is the sessionid");
+
+ //-- Domain Visit -----------------------------------------------------------
+ rb.DefineMethod("domainVisit", "sll", "i", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this);
+ rb.MethodDesc("This will create a visitor that return all operations in the range.");
+ rb.ParamDesc("name", "The name of the domain.");
+ rb.ParamDesc("from", "Will return all entries following(not including) <from>.");
+ rb.ParamDesc("to", "Will return all entries including <to>.");
+ rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error. Positive number is the sessionid");
+
+ //-- Domain Session Run -----------------------------------------------------------
+ rb.DefineMethod("domainSessionRun", "si", "i", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this);
+ rb.MethodDesc("This will start the session thread.");
+ rb.ParamDesc("name", "The name of the domain.");
+ rb.ParamDesc("sessionid", "The session identifier.");
+ rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error.");
+
+ //-- Domain Session Close -----------------------------------------------------------
+ rb.DefineMethod("domainSessionClose", "si", "i", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this);
+ rb.MethodDesc("This will close the session.");
+ rb.ParamDesc("name", "The name of the domain.");
+ rb.ParamDesc("sessionid", "The session identifier.");
+ rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error. 1 means busy -> retry. 0 is OK.");
+
+ //-- Domain Sync --
+ rb.DefineMethod("domainSync", "sl", "il", true,
+ FRT_METHOD(TransLogServer::relayToThreadRPC), this);
+ rb.MethodDesc("Sync domain to given entry");
+ rb.ParamDesc("name", "The name of the domain.");
+ rb.ParamDesc("syncto", "Entry to sync to");
+ rb.ReturnDesc("result",
+ "A resultcode(int) of the operation. "
+ "Negative number indicates error.");
+ rb.ReturnDesc("syncedto", "Entry synced to");
+}
+
+void TransLogServer::createDomain(FRT_RPCRequest *req)
+{
+ uint32_t retval(0);
+ FRT_Values & params = *req->GetParams();
+ FRT_Values & ret = *req->GetReturn();
+
+ const char * domainName = params[0]._string._str;
+ LOG(debug, "createDomain(%s)", domainName);
+
+ Guard createDeleteGuard(_fileLock);
+ Domain::SP domain(findDomain(domainName));
+ if ( !domain ) {
+ try {
+ domain.reset(new Domain(domainName,
+ dir(),
+ _executor,
+ _domainPartSize,
+ _useFsync,
+ _defaultCrcType,
+ _fileHeaderContext));
+ {
+ Guard domainGuard(_lock);
+ _domains[domain->name()] = domain;
+ }
+ std::ofstream domainDir(domainList().c_str(), std::ios::app);
+ domainDir << domain->name() << std::endl;
+ } catch (const std::exception & e) {
+ LOG(warning, "Failed creating %s domain. Exception = %s", domainName, e.what());
+ retval = uint32_t(-1);
+ }
+ }
+
+ ret.AddInt32(retval);
+}
+
+void TransLogServer::deleteDomain(FRT_RPCRequest *req)
+{
+ uint32_t retval(0);
+ vespalib::string msg("ok");
+ FRT_Values & params = *req->GetParams();
+ FRT_Values & ret = *req->GetReturn();
+
+ const char * domainName = params[0]._string._str;
+ LOG(debug, "deleteDomain(%s)", domainName);
+
+ Guard createDeleteGuard(_fileLock);
+ Domain::SP domain(findDomain(domainName));
+ if ( !domain || (domain->getNumSessions() == 0)) {
+ try {
+ if (domain) {
+ domain->markDeleted();
+ Guard domainGuard(_lock);
+ _domains.erase(domainName);
+ }
+ vespalib::rmdir(Domain::getDir(dir(), domainName).c_str(), true);
+ std::ofstream domainDir(domainList().c_str(), std::ios::trunc);
+ Guard domainGuard(_lock);
+ for (DomainList::const_iterator it(_domains.begin()), mt(_domains.end()); it != mt; it++) {
+ domainDir << it->first << std::endl;
+ }
+ } catch (const std::exception & e) {
+ msg = make_vespa_string("Failed deleting %s domain. Exception = %s", domainName, e.what());
+ retval = -1;
+ LOG(warning, "%s", msg.c_str());
+ }
+ } else {
+ retval = -2;
+ msg = vespalib::make_vespa_string("Domain '%s' is open. Can not delete open domains.", domainName);
+ LOG(warning, "%s", msg.c_str());
+ }
+ ret.AddInt32(retval);
+ ret.AddString(msg.c_str());
+}
+
+void TransLogServer::openDomain(FRT_RPCRequest *req)
+{
+ uint32_t retval(0);
+ FRT_Values & params = *req->GetParams();
+ FRT_Values & ret = *req->GetReturn();
+
+ const char * domainName = params[0]._string._str;
+ LOG(debug, "openDomain(%s)", domainName);
+
+ Domain::SP domain(findDomain(domainName));
+ if ( !domain ) {
+ retval = uint32_t(-1);
+ }
+
+ ret.AddInt32(retval);
+}
+
+void TransLogServer::listDomains(FRT_RPCRequest *req)
+{
+ FRT_Values & ret = *req->GetReturn();
+ LOG(debug, "listDomains()");
+
+ vespalib::string domains;
+ Guard domainGuard(_lock);
+ for(DomainList::const_iterator it(_domains.begin()), mt(_domains.end()); it != mt; it++) {
+ domains += it->second->name();
+ domains += "\n";
+ }
+ ret.AddInt32(0);
+ ret.AddString(domains.c_str());
+}
+
+void TransLogServer::domainStatus(FRT_RPCRequest *req)
+{
+ FRT_Values & params = *req->GetParams();
+ FRT_Values & ret = *req->GetReturn();
+ const char * domainName = params[0]._string._str;
+ LOG(debug, "domainStatus(%s)", domainName);
+ Domain::SP domain(findDomain(domainName));
+ if (domain) {
+ ret.AddInt32(0);
+ ret.AddInt64(domain->begin());
+ ret.AddInt64(domain->end());
+ ret.AddInt64(domain->size());
+ } else {
+ ret.AddInt32(uint32_t(-1));
+ ret.AddInt64(0);
+ ret.AddInt64(0);
+ ret.AddInt64(0);
+ }
+}
+
+void TransLogServer::commit(const vespalib::string & domainName, const Packet & packet)
+{
+ Domain::SP domain(findDomain(domainName));
+ if (domain) {
+ domain->commit(packet);
+ } else {
+ throw IllegalArgumentException("Could not find domain " + domainName);
+ }
+}
+
+void TransLogServer::domainCommit(FRT_RPCRequest *req)
+{
+ FRT_Values & params = *req->GetParams();
+ FRT_Values & ret = *req->GetReturn();
+ const char * domainName = params[0]._string._str;
+ LOG(debug, "domainCommit(%s)(%d)", domainName, params[1]._data._len);
+ Domain::SP domain(findDomain(domainName));
+ if (domain) {
+ Packet packet(params[1]._data._buf, params[1]._data._len);
+ try {
+ domain->commit(packet);
+ ret.AddInt32(0);
+ ret.AddString("ok");
+ } catch (const std::exception & e) {
+ ret.AddInt32(-2);
+ ret.AddString(make_string("Exception during commit on %s : %s", domainName, e.what()).c_str());
+ }
+ } else {
+ ret.AddInt32(-1);
+ ret.AddString(make_string("Could not find domain %s", domainName).c_str());
+ }
+}
+
+void TransLogServer::domainSubscribe(FRT_RPCRequest *req)
+{
+ uint32_t retval(uint32_t(-1));
+ FRT_Values & params = *req->GetParams();
+ FRT_Values & ret = *req->GetReturn();
+ const char * domainName = params[0]._string._str;
+ LOG(debug, "domainSubscribe(%s)", domainName);
+ Domain::SP domain(findDomain(domainName));
+ if (domain) {
+ SerialNum from(params[1]._intval64);
+ LOG(debug, "domainSubscribe(%s, %" PRIu64 ")", domainName, from);
+ retval = domain->subscribe(domain, from, _supervisor, req->GetConnection());
+ }
+ ret.AddInt32(retval);
+}
+
+void TransLogServer::domainVisit(FRT_RPCRequest *req)
+{
+ uint32_t retval(uint32_t(-1));
+ FRT_Values & params = *req->GetParams();
+ FRT_Values & ret = *req->GetReturn();
+ const char * domainName = params[0]._string._str;
+ LOG(debug, "domainVisit(%s)", domainName);
+ Domain::SP domain(findDomain(domainName));
+ if (domain) {
+ SerialNum from(params[1]._intval64);
+ SerialNum to(params[2]._intval64);
+ LOG(debug, "domainVisit(%s, %" PRIu64 ", %" PRIu64 ")", domainName, from, to);
+ retval = domain->visit(domain, from, to, _supervisor, req->GetConnection());
+ }
+ ret.AddInt32(retval);
+}
+
+void TransLogServer::domainSessionRun(FRT_RPCRequest *req)
+{
+ uint32_t retval(uint32_t(-1));
+ FRT_Values & params = *req->GetParams();
+ FRT_Values & ret = *req->GetReturn();
+ const char * domainName = params[0]._string._str;
+ int sessionId(params[1]._intval32);
+ LOG(debug, "domainSessionRun(%s, %d)", domainName, sessionId);
+ Domain::SP domain(findDomain(domainName));
+ if (domain) {
+ LOG(debug, "Valid domain domainSessionRun(%s, %d)", domainName, sessionId);
+ retval = domain->startSession(sessionId);
+ }
+ ret.AddInt32(retval);
+}
+
+void TransLogServer::relayToThreadRPC(FRT_RPCRequest *req)
+{
+ req->Detach();
+ _reqQ.push(req);
+}
+
+void TransLogServer::domainSessionClose(FRT_RPCRequest *req)
+{
+ uint32_t retval(uint32_t(-1));
+ FRT_Values & params = *req->GetParams();
+ FRT_Values & ret = *req->GetReturn();
+ const char * domainName = params[0]._string._str;
+ int sessionId(params[1]._intval32);
+ LOG(debug, "domainSessionClose(%s, %d)", domainName, sessionId);
+ Domain::SP domain(findDomain(domainName));
+ if (domain) {
+ LOG(debug, "Valid domain domainSessionClose(%s, %d)", domainName, sessionId);
+ retval = domain->closeSession(sessionId);
+ }
+ LOG(debug, "domainSessionClose(%s, %d) = %d", domainName, sessionId, retval);
+ ret.AddInt32(retval);
+}
+
+void TransLogServer::domainPrune(FRT_RPCRequest *req)
+{
+ uint32_t retval(uint32_t(-1));
+ FRT_Values & params = *req->GetParams();
+ FRT_Values & ret = *req->GetReturn();
+ const char * domainName = params[0]._string._str;
+ LOG(debug, "domainPrune(%s)", domainName);
+ Domain::SP domain(findDomain(domainName));
+ if (domain) {
+ SerialNum to(params[1]._intval64);
+ SerialNum oldestActive = domain->findOldestActiveVisit();
+ if (oldestActive < to) {
+ retval = 1;
+ } else if (domain->erase(to)) {
+ retval = 0;
+ }
+ }
+ ret.AddInt32(retval);
+}
+
+
+const TransLogServer::Session::SP &
+TransLogServer::getSession(FRT_RPCRequest *req)
+{
+ FNET_Connection *conn = req->GetConnection();
+ void *vctx = conn->GetContext()._value.VOIDP;
+ Session::SP *sessionspp = static_cast<Session::SP *>(vctx);
+ return *sessionspp;
+}
+
+
+void
+TransLogServer::initSession(FRT_RPCRequest *req)
+{
+ req->GetConnection()->SetContext(new Session::SP(new Session()));
+}
+
+
+void
+TransLogServer::finiSession(FRT_RPCRequest *req)
+{
+ FNET_Connection *conn = req->GetConnection();
+ void *vctx = conn->GetContext()._value.VOIDP;
+ conn->GetContextPT()->_value.VOIDP = NULL;
+ Session::SP *sessionspp = static_cast<Session::SP *>(vctx);
+ delete sessionspp;
+}
+
+
+void
+TransLogServer::downSession(FRT_RPCRequest *req)
+{
+ getSession(req)->setDown();
+}
+
+
+void
+TransLogServer::domainSync(FRT_RPCRequest *req)
+{
+ FRT_Values & params = *req->GetParams();
+ const char * domainName = params[0]._string._str;
+ SerialNum syncTo(params[1]._intval64);
+ LOG(debug, "domainSync(%s, %" PRIu64 ")", domainName, syncTo);
+ Domain::SP domain(findDomain(domainName));
+ Session::SP session(getSession(req));
+
+ if (domain.get() == nullptr) {
+ FRT_Values &rvals = *req->GetReturn();
+ rvals.AddInt32(0);
+ rvals.AddInt64(0);
+ req->Return();
+ return;
+ }
+
+ SyncHandler *syncHandler = new SyncHandler(&_supervisor,
+ req,
+ domain,
+ session,
+ syncTo);
+
+ syncHandler->ScheduleNow();
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/translogserver.h b/searchlib/src/vespa/searchlib/transactionlog/translogserver.h
new file mode 100644
index 00000000000..98a24393814
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/translogserver.h
@@ -0,0 +1,110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/transactionlog/domain.h>
+#include <vespa/vespalib/util/document_runnable.h>
+#include <vespa/document/util/queue.h>
+#include <mutex>
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+namespace transactionlog
+{
+
+class TransLogServerExplorer;
+
+class TransLogServer : public document::Runnable, private FRT_Invokable, public Writer
+{
+public:
+ friend class TransLogServerExplorer;
+ typedef std::unique_ptr<TransLogServer> UP;
+ typedef std::shared_ptr<TransLogServer> SP;
+
+ TransLogServer(const vespalib::string &name,
+ int listenPort,
+ const vespalib::string &baseDir,
+ const common::FileHeaderContext &fileHeaderContext,
+ uint64_t domainPartSize=0x10000000,
+ bool useFsync=false,
+ size_t maxThreads=4,
+ DomainPart::Crc defaultCrc=DomainPart::xxh64);
+ virtual ~TransLogServer();
+ uint64_t getDomainPartSize() const { return _domainPartSize; }
+ uint64_t setDomainPartSize();
+ DomainStats getDomainStats() const;
+
+ virtual void commit(const vespalib::string & domainName, const Packet & packet);
+
+
+ class Session
+ {
+ bool _down;
+ public:
+ typedef std::shared_ptr<Session> SP;
+
+ Session(void) : _down(false) { }
+ bool getDown(void) const { return _down; }
+ void setDown(void) { _down = true; }
+ };
+
+private:
+ virtual bool onStop();
+ virtual void run();
+ void exportRPC(FRT_Supervisor & supervisor);
+ void relayToThreadRPC(FRT_RPCRequest *req);
+
+ void createDomain(FRT_RPCRequest *req);
+ void deleteDomain(FRT_RPCRequest *req);
+ void openDomain(FRT_RPCRequest *req);
+ void listDomains(FRT_RPCRequest *req);
+
+ void domainStatus(FRT_RPCRequest *req);
+ void domainCommit(FRT_RPCRequest *req);
+ void domainSessionRun(FRT_RPCRequest *req);
+ void domainPrune(FRT_RPCRequest *req);
+ void domainVisit(FRT_RPCRequest *req);
+ void domainSubscribe(FRT_RPCRequest *req);
+ void domainSessionClose(FRT_RPCRequest *req);
+ void domainSync(FRT_RPCRequest *req);
+
+ void initSession(FRT_RPCRequest *req);
+ void finiSession(FRT_RPCRequest *req);
+ void downSession(FRT_RPCRequest *req);
+
+ void logMetric() const;
+ std::vector<vespalib::string> getDomainNames();
+ Domain::SP findDomain(const vespalib::stringref &name);
+ vespalib::string dir() const { return _baseDir + "/" + _name; }
+ vespalib::string domainList() const { return dir() + "/" + _name + ".domains"; }
+
+ static const Session::SP & getSession(FRT_RPCRequest *req);
+
+ typedef std::map<vespalib::string, Domain::SP > DomainList;
+
+ vespalib::string _name;
+ vespalib::string _baseDir;
+ const uint64_t _domainPartSize;
+ const bool _useFsync;
+ const DomainPart::Crc _defaultCrcType;
+ vespalib::ThreadStackExecutor _executor;
+ FastOS_ThreadPool _threadPool;
+ FRT_Supervisor _supervisor;
+ DomainList _domains;
+ mutable std::mutex _lock; // Protects _domains
+ std::mutex _fileLock; // Protects the creating and deleting domains including file system operations.
+ document::Queue<FRT_RPCRequest *> _reqQ;
+ const common::FileHeaderContext &_fileHeaderContext;
+ using Guard = std::lock_guard<std::mutex>;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/transactionlog/translogserverapp.cpp b/searchlib/src/vespa/searchlib/transactionlog/translogserverapp.cpp
new file mode 100644
index 00000000000..33918e373d1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/translogserverapp.cpp
@@ -0,0 +1,68 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/transactionlog/translogserverapp.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".translogserverapp");
+
+using search::common::FileHeaderContext;
+
+namespace search
+{
+
+namespace transactionlog
+{
+
+TransLogServerApp::TransLogServerApp(const config::ConfigUri & tlsConfigUri,
+ const FileHeaderContext & fileHeaderContext)
+ : _tls(),
+ _tlsConfig(),
+ _tlsConfigFetcher(tlsConfigUri.getContext()),
+ _fileHeaderContext(fileHeaderContext)
+{
+ _tlsConfigFetcher.subscribe<searchlib::TranslogserverConfig>(tlsConfigUri.getConfigId(), this);
+ _tlsConfigFetcher.start();
+}
+
+namespace {
+
+DomainPart::Crc getCrc(searchlib::TranslogserverConfig::Crcmethod crcType)
+{
+ switch (crcType) {
+ case searchlib::TranslogserverConfig::ccitt_crc32:
+ return DomainPart::ccitt_crc32;
+ case searchlib::TranslogserverConfig::xxh64:
+ return DomainPart::xxh64;
+ }
+ assert(false);
+}
+
+}
+
+void TransLogServerApp::start()
+{
+ std::shared_ptr<searchlib::TranslogserverConfig> c = _tlsConfig.get();
+ _tls.reset(new TransLogServer(c->servername,
+ c->listenport,
+ c->basedir,
+ _fileHeaderContext,
+ c->filesizemax,
+ c->usefsync,
+ c->maxthreads,
+ getCrc(c->crcmethod)));
+}
+
+TransLogServerApp::~TransLogServerApp()
+{
+ _tlsConfigFetcher.close();
+}
+
+void TransLogServerApp::configure(std::unique_ptr<searchlib::TranslogserverConfig> cfg)
+{
+ LOG(config, "configure Transaction Log Server %s at port %d", cfg->servername.c_str(), cfg->listenport);
+ _tlsConfig.set(cfg.release());
+ _tlsConfig.latch();
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/translogserverapp.h b/searchlib/src/vespa/searchlib/transactionlog/translogserverapp.h
new file mode 100644
index 00000000000..5478fee61ed
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/transactionlog/translogserverapp.h
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/searchlib/transactionlog/translogserver.h>
+#include <vespa/searchlib/config/config-translogserver.h>
+#include <vespa/config/helper/configfetcher.h>
+#include <vespa/vespalib/util/ptrholder.h>
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+namespace transactionlog
+{
+
+class TransLogServerApp : public config::IFetcherCallback<searchlib::TranslogserverConfig>
+{
+private:
+ TransLogServer::SP _tls;
+ vespalib::PtrHolder<searchlib::TranslogserverConfig> _tlsConfig;
+ config::ConfigFetcher _tlsConfigFetcher;
+ const common::FileHeaderContext & _fileHeaderContext;
+
+ void configure(std::unique_ptr<searchlib::TranslogserverConfig> cfg);
+
+public:
+ typedef std::unique_ptr<TransLogServerApp> UP;
+
+ TransLogServerApp(const config::ConfigUri & tlsConfigUri,
+ const common::FileHeaderContext &fileHeaderContext);
+ ~TransLogServerApp();
+
+ TransLogServer::SP getTransLogServer() const { return _tls; }
+
+ void start();
+};
+
+} // namespace transactionlog
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/util/.gitignore b/searchlib/src/vespa/searchlib/util/.gitignore
new file mode 100644
index 00000000000..ee8938b6bf4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/.gitignore
@@ -0,0 +1,6 @@
+*.So
+*.exe
+*.ilk
+*.pdb
+.depend*
+Makefile
diff --git a/searchlib/src/vespa/searchlib/util/CMakeLists.txt b/searchlib/src/vespa/searchlib/util/CMakeLists.txt
new file mode 100644
index 00000000000..a7d9d2290a3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/CMakeLists.txt
@@ -0,0 +1,25 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_util OBJECT
+ SOURCES
+ bufferwriter.cpp
+ comprbuffer.cpp
+ comprfile.cpp
+ dirtraverse.cpp
+ drainingbufferwriter.cpp
+ filealign.cpp
+ fileheadertk.cpp
+ filekit.cpp
+ filesizecalculator.cpp
+ fileutil.cpp
+ foldedstringcompare.cpp
+ ioerrorhandler.cpp
+ logutil.cpp
+ rawbuf.cpp
+ sigbushandler.cpp
+ slime_output_raw_buf_adapter.cpp
+ statebuf.cpp
+ statefile.cpp
+ stringenum.cpp
+ url.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/util/bufferwriter.cpp b/searchlib/src/vespa/searchlib/util/bufferwriter.cpp
new file mode 100644
index 00000000000..21e1d16a079
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/bufferwriter.cpp
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "bufferwriter.h"
+
+namespace search
+{
+
+BufferWriter::BufferWriter()
+ : _cur(nullptr),
+ _end(nullptr),
+ _start(nullptr)
+{
+}
+
+
+BufferWriter::~BufferWriter()
+{
+}
+
+
+void
+BufferWriter::writeSlow(const void *src, size_t len)
+{
+ size_t residue = len;
+ const char *csrc = static_cast<const char *>(src);
+ for (;;) {
+ size_t maxLen = freeLen();
+ if (residue <= maxLen) {
+ writeFast(csrc, residue);
+ break;
+ }
+ if (maxLen != 0) {
+ writeFast(csrc, maxLen);
+ csrc += maxLen;
+ residue -= maxLen;
+ }
+ flush();
+ }
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/util/bufferwriter.h b/searchlib/src/vespa/searchlib/util/bufferwriter.h
new file mode 100644
index 00000000000..b4183f779ed
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/bufferwriter.h
@@ -0,0 +1,55 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search
+{
+
+/**
+ * Abstract class to write to a buffer with an abstract backing store
+ * and abstract backing buffer. Each time backing buffer is full,
+ * flush() is called to resize it or drain it to the backing store.
+ */
+class BufferWriter
+{
+ char *_cur;
+ char *_end;
+ char *_start;
+protected:
+ void rewind() { _cur = _start; }
+
+ void setup(void *start, size_t len) {
+ _start = static_cast<char *>(start);
+ _end = _start + len;
+ rewind();
+ }
+
+ size_t freeLen() const { return _end - _cur; }
+ size_t usedLen() const { return _cur - _start; }
+
+ void writeFast(const void *src, size_t len)
+ {
+ __builtin_memcpy(_cur, src, len);
+ _cur += len;
+ }
+
+ void writeSlow(const void *src, size_t len);
+
+public:
+ BufferWriter();
+
+ virtual ~BufferWriter();
+
+ virtual void flush() = 0;
+
+ void write(const void *src, size_t len)
+ {
+ if (__builtin_expect(len <= freeLen(), true)) {
+ writeFast(src, len);
+ return;
+ }
+ writeSlow(src, len);
+ }
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/util/comprbuffer.cpp b/searchlib/src/vespa/searchlib/util/comprbuffer.cpp
new file mode 100644
index 00000000000..b76cfc2674d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/comprbuffer.cpp
@@ -0,0 +1,147 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchlib/util/comprbuffer.h>
+#include <vespa/vespalib/objects/nbostream.h>
+
+LOG_SETUP(".comprbuffer");
+
+namespace search
+{
+
+using vespalib::nbostream;
+
+ComprBuffer::ComprBuffer(uint32_t unitSize)
+ : _comprBuf(NULL),
+ _comprBufSize(0),
+ _unitSize(unitSize),
+ _comprBufMalloc(NULL)
+{
+}
+
+
+ComprBuffer::~ComprBuffer(void)
+{
+ dropComprBuf();
+}
+
+
+void
+ComprBuffer::dropComprBuf(void)
+{
+ free(_comprBufMalloc);
+ _comprBuf = NULL;
+ _comprBufMalloc = NULL;
+}
+
+
+void
+ComprBuffer::allocComprBuf(size_t comprBufSize,
+ size_t preferredFileAlignment,
+ FastOS_FileInterface *file,
+ bool padBefore)
+{
+ comprBufSize = _aligner.setupAlign(comprBufSize,
+ _unitSize,
+ file,
+ preferredFileAlignment);
+ _comprBufSize = comprBufSize;
+ _padBefore = padBefore;
+ allocComprBuf();
+}
+
+void
+ComprBuffer::allocComprBuf(void)
+{
+ dropComprBuf();
+ /*
+ * Add padding after normal buffer, to allow buffer to be completely
+ * full before normal flushes for encoding. Any spillover into padding
+ * area should be copied to start of buffer after write. This allows
+ * for better alignment of write operations since buffer writes can then
+ * normally write full buffers.
+ *
+ * For read, the padding after normal buffer gives some slack for the
+ * decoder prefetch at end of file.
+ */
+ size_t paddingAfter = minimumPadding() * _unitSize;
+ size_t paddingBefore = 0;
+ if (_padBefore) {
+ /*
+ * Add padding before normal buffer, to allow last data at end of
+ * buffer to be copied to the padding area before the normal buffer
+ * prior to a full buffer read. This allows for better alignment of
+ * read operations since buffer reads can then normally read full
+ * buffers.
+ */
+ paddingBefore = paddingAfter + 2 * _unitSize;
+ size_t memalign = FastOS_File::getMaxDirectIOMemAlign();
+ if (paddingBefore < memalign)
+ paddingBefore = memalign;
+ }
+ size_t fullpadding = paddingAfter + paddingBefore;
+ size_t allocLen = _comprBufSize * _unitSize + fullpadding;
+ void *alignedBuf = FastOS_File::allocateGenericDirectIOBuffer(allocLen,
+ _comprBufMalloc);
+ memset(alignedBuf, 0, allocLen);
+ /*
+ * Set pointer to the start of normal buffer, which should be properly
+ * aligned in memory for direct IO.
+ */
+ _comprBuf = reinterpret_cast<void *>
+ (static_cast<char *>(alignedBuf) + paddingBefore);
+}
+
+
+void
+ComprBuffer::expandComprBuf(uint32_t overflowUnits)
+{
+ size_t newSize = static_cast<size_t>(_comprBufSize) * 2;
+ assert(static_cast<unsigned int>(newSize) == newSize);
+ if (newSize < 16)
+ newSize = 16;
+ size_t paddingAfter = minimumPadding() * _unitSize;
+ assert(overflowUnits <= minimumPadding());
+ void *newBuf = malloc(newSize * _unitSize + paddingAfter);
+ size_t oldLen = (static_cast<size_t>(_comprBufSize) + overflowUnits) *
+ _unitSize;
+ if (oldLen > 0)
+ memcpy(newBuf, _comprBuf, oldLen);
+ free(_comprBufMalloc);
+ _comprBuf = _comprBufMalloc = newBuf;
+ _comprBufSize = newSize;
+}
+
+
+void
+ComprBuffer::referenceComprBuf(const ComprBuffer &rhs)
+{
+ _comprBuf = rhs._comprBuf;
+ _comprBufSize = rhs._comprBufSize;
+}
+
+
+void
+ComprBuffer::checkPointWrite(nbostream &out)
+{
+ _aligner.checkPointWrite(out);
+ out << _comprBufSize << _unitSize << _padBefore;
+}
+
+
+void
+ComprBuffer::checkPointRead(nbostream &in)
+{
+ _aligner.checkPointRead(in);
+ uint32_t unitSize;
+ in >> _comprBufSize >> unitSize >> _padBefore;
+ assert(unitSize == _unitSize);
+
+ allocComprBuf();
+}
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/comprbuffer.h b/searchlib/src/vespa/searchlib/util/comprbuffer.h
new file mode 100644
index 00000000000..43a67bcfb97
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/comprbuffer.h
@@ -0,0 +1,98 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1999-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/util/filealign.h>
+
+namespace search {
+
+class ComprBuffer
+{
+private:
+ ComprBuffer(const ComprBuffer &);
+
+ ComprBuffer &
+ operator=(const ComprBuffer &);
+
+ void
+ allocComprBuf(void);
+public:
+ void *_comprBuf;
+ size_t _comprBufSize;
+ uint32_t _unitSize; // Size of unit in bytes, doubles up as alignment
+ bool _padBefore;
+ void *_comprBufMalloc;
+ FileAlign _aligner;
+
+ ComprBuffer(uint32_t unitSize);
+
+ virtual
+ ~ComprBuffer(void);
+
+ void
+ dropComprBuf(void);
+
+ void
+ allocComprBuf(size_t comprBufSize,
+ size_t preferredFileAlignment,
+ FastOS_FileInterface *const file,
+ bool padbefore);
+
+ static size_t
+ minimumPadding(void)
+ {
+ return 8;
+ }
+
+ uint32_t
+ getUnitBitSize(void) const
+ {
+ return _unitSize * 8;
+ }
+
+ bool
+ getPadBefore(void) const
+ {
+ return _padBefore;
+ }
+
+ bool
+ getCheckPointResumed(void) const
+ {
+ return _aligner.getCheckPointResumed();
+ }
+
+ /*
+ * When encoding to memory instead of file, the compressed buffer must
+ * be able to grow.
+ */
+ void
+ expandComprBuf(uint32_t overflowUnits);
+
+ /*
+ * For unit testing only. Reference data owned by rhs, only works as
+ * long as rhs is live and unchanged.
+ */
+ void
+ referenceComprBuf(const ComprBuffer &rhs);
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt.
+ */
+ void
+ checkPointWrite(vespalib::nbostream &out);
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ *
+ */
+ void
+ checkPointRead(vespalib::nbostream &in);
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/util/comprfile.cpp b/searchlib/src/vespa/searchlib/util/comprfile.cpp
new file mode 100644
index 00000000000..719b423861c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/comprfile.cpp
@@ -0,0 +1,650 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchlib/util/comprbuffer.h>
+#include <vespa/searchlib/util/comprfile.h>
+#include <vespa/vespalib/objects/nbostream.h>
+
+LOG_SETUP(".comprbuffer");
+
+namespace search
+{
+
+using vespalib::nbostream;
+
+void
+ComprFileReadBase::ReadComprBuffer(uint64_t stopOffset,
+ bool readAll,
+ ComprFileDecodeContext &decodeContext,
+ int &bitOffset,
+ FastOS_FileInterface &file,
+ uint64_t &fileReadByteOffset,
+ uint64_t fileSize,
+ ComprBuffer &cbuf)
+{
+ assert(cbuf._comprBuf != NULL);
+
+ bool isretryread = false;
+
+ retry:
+ if (decodeContext.lastChunk())
+ return; // Already reached end of file.
+ int remainingUnits = decodeContext.remainingUnits();
+
+ // There's a good amount of data here already.
+ if (remainingUnits >
+ static_cast<ssize_t>(ComprBuffer::minimumPadding())) //FIX! Tune
+ return;
+
+ // Assert that file read offset is aligned on unit boundary
+ assert((static_cast<size_t>(fileReadByteOffset) &
+ (cbuf._unitSize - 1)) == 0);
+ // Get direct IO file alignment
+ size_t fileDirectIOAlign = cbuf._aligner.getDirectIOFileAlign();
+ // calculate number of pad units before requested start
+ int padBeforeUnits = static_cast<int>
+ (static_cast<size_t>(fileReadByteOffset) &
+ (fileDirectIOAlign - 1)) / cbuf._unitSize;
+ // No padding before if at end of file.
+ if (fileReadByteOffset >= fileSize)
+ padBeforeUnits = 0;
+ // Continuation reads starts at aligned boundary.
+ assert(remainingUnits == 0 || padBeforeUnits == 0);
+
+ if (readAll)
+ stopOffset = fileSize << 3;
+ else if (!isretryread) {
+ stopOffset += 8 * cbuf.getUnitBitSize(); // XXX: Magic integer
+ // Realign stop offset to direct IO alignment boundary
+ uint64_t fileDirectIOBitAlign =
+ static_cast<uint64_t>(fileDirectIOAlign) << 3;
+ if ((stopOffset & (fileDirectIOBitAlign - 1)) != 0)
+ stopOffset += fileDirectIOBitAlign -
+ (stopOffset & (fileDirectIOBitAlign - 1));
+ }
+
+ bool isMore = true;
+ if (stopOffset >= (fileSize << 3)) {
+ stopOffset = fileSize << 3;
+ isMore = false;
+ }
+
+ int64_t readBits = static_cast<int64_t>(stopOffset) -
+ (static_cast<int64_t>(fileReadByteOffset) << 3) +
+ padBeforeUnits * cbuf.getUnitBitSize();
+ int64_t bufferBits = cbuf._comprBufSize * cbuf.getUnitBitSize();
+ if (readBits > 0 && (bufferBits < readBits))
+ {
+ isMore = true;
+ readBits = bufferBits;
+ }
+
+ int extraRemainingUnits = 0;
+ if (bitOffset == -1) {
+ // Ensure that compressed data for current position is still available
+ // in buffer form.
+ extraRemainingUnits = 2;
+ }
+ // Move remaining integers to padding area before start of buffer
+ if (remainingUnits + extraRemainingUnits > 0)
+ memmove(static_cast<char *>(cbuf._comprBuf) -
+ (remainingUnits + extraRemainingUnits) * cbuf._unitSize,
+ static_cast<const char *>(decodeContext.getUnitPtr()) -
+ extraRemainingUnits * cbuf._unitSize,
+ (remainingUnits + extraRemainingUnits) * cbuf._unitSize);
+
+ // Adjust file position to direct IO boundary if needed before read
+ if (padBeforeUnits != 0) {
+ fileReadByteOffset -= padBeforeUnits * cbuf._unitSize;
+ file.SetPosition(fileReadByteOffset);
+ }
+ int readUnits0 = 0;
+ if (readBits > 0)
+ readUnits0 = static_cast<int>((readBits + cbuf.getUnitBitSize() - 1) /
+ cbuf.getUnitBitSize());
+
+ // Try to align end of read to an alignment boundary
+ int readUnits = cbuf._aligner.adjustElements(fileReadByteOffset /
+ cbuf._unitSize, readUnits0);
+ if (readUnits < readUnits0)
+ isMore = true;
+
+ if (readUnits > 0) {
+ int64_t padBytes = fileReadByteOffset +
+ static_cast<int64_t>(readUnits) * cbuf._unitSize -
+ fileSize;
+ if (!isMore && padBytes > 0) {
+ // Pad reading of file written with smaller unit size with
+ // NUL bytes.
+ file.ReadBuf(cbuf._comprBuf, readUnits * cbuf._unitSize -
+ padBytes);
+ memset(static_cast<char *>(cbuf._comprBuf) +
+ readUnits * cbuf._unitSize - padBytes,
+ 0,
+ padBytes);
+ } else
+ file.ReadBuf(cbuf._comprBuf, readUnits * cbuf._unitSize);
+ }
+ // If at end of file then add units of zero bits as padding
+ if (!isMore)
+ memset(static_cast<char *>(cbuf._comprBuf) +
+ readUnits * cbuf._unitSize,
+ 0,
+ cbuf._unitSize * ComprBuffer::minimumPadding());
+
+ assert(remainingUnits + readUnits >= 0);
+ decodeContext.afterRead(static_cast<char *>(cbuf._comprBuf) +
+ (padBeforeUnits - remainingUnits) *
+ static_cast<int32_t>(cbuf._unitSize),
+ (remainingUnits + readUnits - padBeforeUnits),
+ fileReadByteOffset +
+ readUnits * cbuf._unitSize,
+ isMore);
+ fileReadByteOffset += readUnits * cbuf._unitSize;
+ if (!isretryread &&
+ decodeContext.endOfChunk() &&
+ isMore) {
+ isretryread = true;
+ goto retry; // Alignment caused too short read
+ }
+
+ if (bitOffset != -1) {
+ decodeContext.setupBits(bitOffset);
+ bitOffset = -1;
+ }
+
+}
+
+
+void
+ComprFileReadBase::SetPosition(uint64_t newPosition,
+ uint64_t stopOffset,
+ bool readAll,
+ ComprFileDecodeContext &decodeContext,
+ int &bitOffset,
+ FastOS_FileInterface &file,
+ uint64_t &fileReadByteOffset,
+ uint64_t fileSize,
+ ComprBuffer &cbuf)
+{
+ int64_t pos;
+ uint64_t oldPosition;
+
+ oldPosition = decodeContext.getBitPos(bitOffset, fileReadByteOffset);
+ assert(oldPosition == decodeContext.getBitPosV());
+ if (newPosition == oldPosition)
+ return;
+ if (newPosition > oldPosition && newPosition <= (fileReadByteOffset << 3)) {
+ size_t skip = newPosition - oldPosition;
+ if (skip < 2 * cbuf.getUnitBitSize()) {
+ // Cached bits might still be needed, just read and ignore bits
+ if (decodeContext.endOfChunk())
+ ReadComprBuffer(stopOffset,
+ readAll,
+ decodeContext,
+ bitOffset,
+ file,
+ fileReadByteOffset,
+ fileSize,
+ cbuf);
+ decodeContext.skipBits(skip);
+ assert(decodeContext.getBitPos(bitOffset,
+ fileReadByteOffset) == newPosition);
+ assert(decodeContext.getBitPosV() == newPosition);
+ return;
+ }
+ // Cached bits not needed, skip to new position in buffer
+ size_t left = (fileReadByteOffset << 3) - newPosition;
+ decodeContext.adjUnitPtr((left + cbuf.getUnitBitSize() - 1) /
+ cbuf.getUnitBitSize());
+ bitOffset = static_cast<int>
+ (static_cast<uint32_t>(newPosition) &
+ (cbuf.getUnitBitSize() - 1));
+ // We might now be at end of chunk, read more if needed in order
+ // for setupBits() to be safe.
+ if (decodeContext.endOfChunk())
+ ReadComprBuffer(stopOffset,
+ readAll,
+ decodeContext,
+ bitOffset,
+ file,
+ fileReadByteOffset,
+ fileSize,
+ cbuf);
+ // Only call SetupBits() if ReadComprBuffer() didn't do it.
+ if (bitOffset != -1) {
+ decodeContext.setupBits(bitOffset);
+ bitOffset = -1;
+ }
+ assert(decodeContext.getBitPos(bitOffset,
+ fileReadByteOffset) == newPosition);
+ assert(decodeContext.getBitPosV() == newPosition);
+ return;
+ }
+ pos = newPosition / cbuf.getUnitBitSize();
+ pos *= cbuf._unitSize;
+ fileReadByteOffset = pos;
+ bitOffset = static_cast<int>(static_cast<uint32_t>(newPosition) &
+ (cbuf.getUnitBitSize() - 1));
+
+ assert(pos <= static_cast<int64_t>(fileSize));
+
+ file.SetPosition(pos);
+ assert(pos == file.GetPosition());
+
+ decodeContext.emptyBuffer(newPosition);
+ assert(decodeContext.getBitPos(bitOffset,
+ fileReadByteOffset) == newPosition);
+ assert(decodeContext.getBitPosV() == newPosition);
+}
+
+
+void
+ComprFileWriteBase::
+WriteComprBuffer(ComprFileEncodeContext &encodeContext,
+ ComprBuffer &cbuf,
+ FastOS_FileInterface &file,
+ uint64_t &fileWriteByteOffset,
+ bool flushSlack)
+{
+ assert(cbuf._comprBuf != NULL);
+
+ int chunkUsedUnits = encodeContext.getUsedUnits(cbuf._comprBuf);
+
+ if (chunkUsedUnits == 0)
+ return;
+ int chunkSizeNormalMax = encodeContext.getNormalMaxUnits(cbuf._comprBuf);
+ int chunksize = chunkUsedUnits;
+ /*
+ * Normally, only flush the normal buffer and copy the slack
+ * after the buffer to the start of buffer.
+ */
+ if (!flushSlack && chunksize > chunkSizeNormalMax)
+ chunksize = chunkSizeNormalMax;
+ assert(static_cast<unsigned int>(chunksize) <= cbuf._comprBufSize ||
+ (flushSlack &&
+ static_cast<unsigned int>(chunksize) <= cbuf._comprBufSize +
+ ComprBuffer::minimumPadding()));
+ file.WriteBuf(cbuf._comprBuf, cbuf._unitSize * chunksize);
+
+ int remainingUnits = chunkUsedUnits - chunksize;
+ assert(remainingUnits == 0 ||
+ (!flushSlack &&
+ static_cast<unsigned int>(remainingUnits) <=
+ ComprBuffer::minimumPadding()));
+ // Copy any slack after buffer to the start of the buffer
+ if (remainingUnits > 0)
+ memmove(cbuf._comprBuf,
+ static_cast<char *>(cbuf._comprBuf) +
+ chunksize * cbuf._unitSize,
+ cbuf._unitSize * remainingUnits);
+
+ fileWriteByteOffset += chunksize * cbuf._unitSize;
+ encodeContext.afterWrite(cbuf,
+ remainingUnits,
+ fileWriteByteOffset);
+}
+
+
+ComprFileReadContext::
+ComprFileReadContext(ComprFileDecodeContext &decodeContext)
+ : ComprBuffer(decodeContext.getUnitByteSize()),
+ _decodeContext(&decodeContext),
+ _fileSize(0),
+ _fileReadByteOffset(0),
+ _bitOffset(0),
+ _stopOffset(0),
+ _readAll(true),
+ _checkPointOffsetValid(false),
+ _file(NULL),
+ _checkPointOffset(0)
+{
+}
+
+
+ComprFileReadContext::
+ComprFileReadContext(uint32_t unitSize)
+ : ComprBuffer(unitSize),
+ _decodeContext(NULL),
+ _fileSize(0),
+ _fileReadByteOffset(0),
+ _bitOffset(0),
+ _stopOffset(0),
+ _readAll(true),
+ _checkPointOffsetValid(false),
+ _file(NULL),
+ _checkPointOffset(0)
+{
+}
+
+
+ComprFileReadContext::~ComprFileReadContext(void)
+{
+}
+
+
+void
+ComprFileReadContext::readComprBuffer(uint64_t stopOffset, bool readAll)
+{
+ search::ComprFileReadBase::ReadComprBuffer(stopOffset,
+ readAll,
+ *_decodeContext,
+ _bitOffset,
+ *_file,
+ _fileReadByteOffset,
+ _fileSize,
+ *this);
+}
+
+
+void
+ComprFileReadContext::readComprBuffer(void)
+{
+ search::ComprFileReadBase::ReadComprBuffer(_stopOffset,
+ _readAll,
+ *_decodeContext,
+ _bitOffset,
+ *_file,
+ _fileReadByteOffset,
+ _fileSize,
+ *this);
+}
+
+
+void
+ComprFileReadContext::setPosition(uint64_t newPosition,
+ uint64_t stopOffset,
+ bool readAll)
+{
+ setStopOffset(stopOffset, readAll);
+ search::ComprFileReadBase::SetPosition(newPosition,
+ stopOffset,
+ readAll,
+ *_decodeContext,
+ _bitOffset,
+ *_file,
+ _fileReadByteOffset,
+ _fileSize,
+ *this);
+}
+
+
+void
+ComprFileReadContext::setPosition(uint64_t newPosition)
+{
+ search::ComprFileReadBase::SetPosition(newPosition,
+ _stopOffset,
+ _readAll,
+ *_decodeContext,
+ _bitOffset,
+ *_file,
+ _fileReadByteOffset,
+ _fileSize,
+ *this);
+}
+
+
+void
+ComprFileReadContext::allocComprBuf(unsigned int comprBufSize,
+ size_t preferredFileAlignment)
+{
+ ComprBuffer::allocComprBuf(comprBufSize, preferredFileAlignment,
+ _file, true);
+}
+
+
+void
+ComprFileReadContext::referenceWriteContext(const ComprFileWriteContext &rhs)
+{
+ ComprFileEncodeContext *e = rhs.getEncodeContext();
+ ComprFileDecodeContext *d = getDecodeContext();
+
+ assert(e != NULL);
+ int usedUnits = e->getUsedUnits(rhs._comprBuf);
+ assert(usedUnits >= 0);
+
+ referenceComprBuf(rhs);
+ setBufferEndFilePos(static_cast<uint64_t>(usedUnits) * _unitSize);
+ setFileSize(static_cast<uint64_t>(usedUnits) * _unitSize);
+ if (d != NULL) {
+ d->afterRead(_comprBuf,
+ usedUnits,
+ static_cast<uint64_t>(usedUnits) * _unitSize,
+ false);
+ d->setupBits(0);
+ setBitOffset(-1);
+ assert(d->getBitPosV() == 0);
+ }
+}
+
+
+void
+ComprFileReadContext::copyWriteContext(const ComprFileWriteContext &rhs)
+{
+ ComprFileEncodeContext *e = rhs.getEncodeContext();
+ ComprFileDecodeContext *d = getDecodeContext();
+
+ assert(e != NULL);
+ int usedUnits = e->getUsedUnits(rhs._comprBuf);
+ assert(usedUnits >= 0);
+
+ dropComprBuf();
+ allocComprBuf(usedUnits, 32768);
+ assert(_comprBufSize >= static_cast<unsigned int>(usedUnits));
+ memcpy(_comprBuf, rhs._comprBuf,
+ static_cast<size_t>(usedUnits) * _unitSize);
+ setBufferEndFilePos(static_cast<uint64_t>(usedUnits) * _unitSize);
+ setFileSize(static_cast<uint64_t>(usedUnits) * _unitSize);
+ if (d != NULL) {
+ d->afterRead(_comprBuf,
+ usedUnits,
+ static_cast<uint64_t>(usedUnits) * _unitSize,
+ false);
+ d->setupBits(0);
+ setBitOffset(-1);
+ assert(d->getBitPosV() == 0);
+ }
+}
+
+
+void
+ComprFileReadContext::referenceReadContext(const ComprFileReadContext &rhs)
+{
+ ComprFileDecodeContext *d = getDecodeContext();
+
+ int usedUnits = rhs.getBufferEndFilePos() / _unitSize;
+ assert(usedUnits >= 0);
+ assert(static_cast<uint64_t>(usedUnits) * _unitSize ==
+ rhs.getBufferEndFilePos());
+
+ referenceComprBuf(rhs);
+ setBufferEndFilePos(static_cast<uint64_t>(usedUnits) * _unitSize);
+ setFileSize(static_cast<uint64_t>(usedUnits) * _unitSize);
+ if (d != NULL) {
+ d->afterRead(_comprBuf,
+ usedUnits,
+ static_cast<uint64_t>(usedUnits) * _unitSize,
+ false);
+ d->setupBits(0);
+ setBitOffset(-1);
+ assert(d->getBitPosV() == 0);
+ }
+}
+
+
+void
+ComprFileReadContext::copyReadContext(const ComprFileReadContext &rhs)
+{
+ ComprFileDecodeContext *d = getDecodeContext();
+
+ int usedUnits = rhs.getBufferEndFilePos() / _unitSize;
+ assert(usedUnits >= 0);
+ assert(static_cast<uint64_t>(usedUnits) * _unitSize ==
+ rhs.getBufferEndFilePos());
+
+ dropComprBuf();
+ allocComprBuf(usedUnits, 32768);
+ assert(_comprBufSize >= static_cast<unsigned int>(usedUnits));
+ memcpy(_comprBuf, rhs._comprBuf,
+ static_cast<size_t>(usedUnits) * _unitSize);
+ setBufferEndFilePos(static_cast<uint64_t>(usedUnits) * _unitSize);
+ setFileSize(static_cast<uint64_t>(usedUnits) * _unitSize);
+ if (d != NULL) {
+ d->afterRead(_comprBuf,
+ usedUnits,
+ static_cast<uint64_t>(usedUnits) * _unitSize,
+ false);
+ d->setupBits(0);
+ setBitOffset(-1);
+ assert(d->getBitPosV() == 0);
+ }
+}
+
+
+void
+ComprFileReadContext::checkPointWrite(nbostream &out)
+{
+ ComprBuffer::checkPointWrite(out);
+ ComprFileDecodeContext &d = *_decodeContext;
+ d.checkPointWrite(out);
+ uint64_t bitOffset = d.getBitPosV();
+ out << bitOffset;
+}
+
+
+void
+ComprFileReadContext::checkPointRead(nbostream &in)
+{
+ ComprBuffer::checkPointRead(in);
+ ComprFileDecodeContext &d = *_decodeContext;
+ d.checkPointRead(in);
+ in >> _checkPointOffset; // Cannot seek until file is opened
+ _checkPointOffsetValid = true;
+}
+
+ComprFileWriteContext::
+ComprFileWriteContext(ComprFileEncodeContext &encodeContext)
+ : ComprBuffer(encodeContext.getUnitByteSize()),
+ _encodeContext(&encodeContext),
+ _file(NULL),
+ _fileWriteByteOffset(0)
+{
+}
+
+
+ComprFileWriteContext::
+ComprFileWriteContext(uint32_t unitSize)
+ : ComprBuffer(unitSize),
+ _encodeContext(NULL),
+ _file(NULL),
+ _fileWriteByteOffset(0)
+{
+}
+
+
+ComprFileWriteContext::~ComprFileWriteContext(void)
+{
+}
+
+
+void
+ComprFileWriteContext::writeComprBuffer(bool flushSlack)
+{
+ if (_file != NULL) {
+ search::ComprFileWriteBase::WriteComprBuffer(*_encodeContext,
+ *this,
+ *_file,
+ _fileWriteByteOffset,
+ flushSlack);
+ return;
+ }
+
+ int chunkUsedUnits = _encodeContext->getUsedUnits(_comprBuf);
+ int chunkSizeNormalMax = _encodeContext->getNormalMaxUnits(_comprBuf);
+
+ if (chunkUsedUnits >= chunkSizeNormalMax) {
+ int overflowUnits = chunkUsedUnits - chunkSizeNormalMax;
+ expandComprBuf(overflowUnits);
+ }
+
+ _encodeContext->afterWrite(*this,
+ chunkUsedUnits,
+ 0);
+}
+
+
+std::pair<void *, size_t>
+ComprFileWriteContext::grabComprBuffer(void *&comprBufMalloc)
+{
+ assert(_file == NULL);
+ std::pair<void *, size_t> res =
+ std::make_pair(_comprBuf, _encodeContext->getUsedUnits(_comprBuf));
+ comprBufMalloc = _comprBufMalloc;
+ _comprBuf = _comprBufMalloc = NULL;
+ _comprBufSize = 0;
+ return res;
+}
+
+
+void
+ComprFileWriteContext::allocComprBuf(unsigned int comprBufSize,
+ size_t preferredFileAlignment)
+{
+ ComprBuffer::allocComprBuf(comprBufSize, preferredFileAlignment,
+ _file, false);
+}
+
+
+void
+ComprFileWriteContext::allocComprBuf(void)
+{
+ allocComprBuf(32768, 32768);
+}
+
+
+void
+ComprFileWriteContext::checkPointWrite(nbostream &out)
+{
+ ComprBuffer::checkPointWrite(out);
+ ComprFileEncodeContext &e = *_encodeContext;
+ uint64_t bufferStartFilePos = getBufferStartFilePos();
+ uint64_t usedSize = e.getUsedUnits(_comprBuf) *
+ e.getUnitByteSize();
+ out << bufferStartFilePos << usedSize;
+ e.checkPointWrite(out);
+ if (usedSize != 0) {
+ out.write(_comprBuf, usedSize);
+ }
+ uint64_t bitOffset = e.getBitPosV();
+ out << bitOffset;
+}
+
+
+void
+ComprFileWriteContext::checkPointRead(nbostream &in)
+{
+ ComprBuffer::checkPointRead(in);
+ ComprFileEncodeContext &e = *_encodeContext;
+ uint64_t bufferStartFilePos = 0;
+ uint64_t usedSize = 0;
+ in >> bufferStartFilePos >> usedSize;
+ e.checkPointRead(in);
+ if (usedSize != 0) {
+ assert((usedSize % e.getUnitByteSize()) == 0);
+ assert(_comprBufSize >= usedSize / e.getUnitByteSize());
+ in.read(_comprBuf, usedSize);
+ }
+ setBufferStartFilePos(bufferStartFilePos);
+ e.afterWrite(*this, usedSize / e.getUnitByteSize(), bufferStartFilePos);
+ uint64_t bitOffset = 0;
+ in >> bitOffset;
+ uint64_t writeOffset = e.getBitPosV();
+ assert(bitOffset == writeOffset);
+ (void) writeOffset;
+}
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/comprfile.h b/searchlib/src/vespa/searchlib/util/comprfile.h
new file mode 100644
index 00000000000..8c05884a0f1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/comprfile.h
@@ -0,0 +1,456 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1999-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <utility>
+#include <vespa/searchlib/util/filealign.h>
+#include <vespa/searchlib/util/comprbuffer.h>
+
+namespace vespalib
+{
+
+class nbostream;
+
+}
+
+
+namespace search {
+
+class ComprFileWriteContext;
+
+class ComprFileDecodeContext
+{
+public:
+ virtual
+ ~ComprFileDecodeContext(void)
+ {
+ }
+
+ /**
+ *
+ * Check if the chunk referenced by the decode context was the
+ * last chunk in the file (e.g. _valE > _realValE)
+ */
+ virtual bool
+ lastChunk(void) const = 0;
+
+ /**
+ * Check if we're at the end of the current chunk (e.g. _valI >= _valE)
+ */
+ virtual bool
+ endOfChunk(void) const = 0;
+
+ /**
+ * Get remaining units in buffer (e.g. _realValE - _valI)
+ */
+
+ virtual int32_t
+ remainingUnits(void) const = 0;
+
+ /**
+ * Get unit ptr (e.g. _valI) from decode context.
+ */
+ virtual const void *
+ getUnitPtr(void) const = 0;
+
+ /**
+ * Setup unit buffer in decode context after read.
+ */
+ virtual void
+ afterRead(const void *start,
+ size_t bufferUnits,
+ uint64_t bufferEndFilePos,
+ bool isMore) = 0;
+
+ /**
+ * Setup for bitwise reading.
+ */
+ virtual void
+ setupBits(int bitOffset) = 0;
+
+ virtual uint64_t
+ getBitPos(int bitOffset,
+ uint64_t bufferEndFilePos) const = 0;
+
+ virtual uint64_t
+ getBitPosV(void) const = 0;
+
+ virtual
+ void skipBits(int bits) = 0;
+
+ virtual void
+ adjUnitPtr(int newRemainingUnits) = 0;
+
+ virtual void
+ emptyBuffer(uint64_t newBitPosition) = 0;
+
+ /**
+ * Get size of each unit (typically 4 or 8)
+ */
+ virtual uint32_t
+ getUnitByteSize(void) const = 0;
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt. Caller must
+ * save position.
+ */
+ virtual void
+ checkPointWrite(vespalib::nbostream &out) = 0;
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ * Caller must restore position.
+ */
+ virtual void
+ checkPointRead(vespalib::nbostream &in) = 0;
+};
+
+class ComprFileReadBase
+{
+public:
+ static void ReadComprBuffer(uint64_t stopOffset,
+ bool readAll,
+ ComprFileDecodeContext &decodeContext,
+ int &bitOffset,
+ FastOS_FileInterface &file,
+ uint64_t &fileReadByteOffset,
+ uint64_t fileSize,
+ ComprBuffer &cbuf);
+ static void SetPosition(uint64_t newPosition,
+ uint64_t stopOffset,
+ bool readAll,
+ ComprFileDecodeContext &decodeContext,
+ int &bitOffset,
+ FastOS_FileInterface &file,
+ uint64_t &fileReadByteOffset,
+ uint64_t fileSize,
+ ComprBuffer &cbuf);
+
+protected:
+ virtual ~ComprFileReadBase(void) { }
+};
+
+
+class ComprFileReadContext : public ComprBuffer
+{
+private:
+ ComprFileDecodeContext *_decodeContext;
+ uint64_t _fileSize;
+ uint64_t _fileReadByteOffset;
+ int _bitOffset;
+ uint64_t _stopOffset;
+ bool _readAll;
+ bool _checkPointOffsetValid; // Set only if checkpoint has been read
+ FastOS_FileInterface *_file;
+ uint64_t _checkPointOffset; // bit offset saved by checkPointRead
+
+public:
+ ComprFileReadContext(ComprFileDecodeContext &decodeContext);
+
+ ComprFileReadContext(uint32_t unitSize);
+
+ ~ComprFileReadContext(void);
+
+ void
+ readComprBuffer(uint64_t stopOffset, bool readAll);
+
+ void
+ readComprBuffer(void);
+
+ void
+ setPosition(uint64_t newPosition,
+ uint64_t stopOffset,
+ bool readAll);
+
+ void
+ setPosition(uint64_t newPosition);
+
+ void
+ allocComprBuf(unsigned int comprBufSize,
+ size_t preferredFileAlignment);
+
+ void
+ setDecodeContext(ComprFileDecodeContext *decodeContext)
+ {
+ _decodeContext = decodeContext;
+ }
+
+ ComprFileDecodeContext *
+ getDecodeContext(void) const
+ {
+ return _decodeContext;
+ }
+
+ void
+ setFile(FastOS_FileInterface *file)
+ {
+ _file = file;
+ }
+
+ FastOS_FileInterface *
+ getFile(void) const
+ {
+ return _file;
+ }
+
+ /**
+ * Get file offset for end of compressed buffer.
+ */
+ uint64_t
+ getBufferEndFilePos(void) const
+ {
+ return _fileReadByteOffset;
+ }
+
+ /**
+ * Set file offset for end of compressed byffer.
+ */
+ void
+ setBufferEndFilePos(uint64_t bufferEndFilePos)
+ {
+ _fileReadByteOffset = bufferEndFilePos;
+ }
+
+ void
+ setBitOffset(int bitOffset)
+ {
+ _bitOffset = bitOffset;
+ }
+
+ void
+ setFileSize(uint64_t fileSize)
+ {
+ _fileSize = fileSize;
+ }
+
+ /*
+ * Set stop offset for sequential read.
+ */
+ void
+ setStopOffset(uint64_t stopOffset, bool readAll)
+ {
+ _stopOffset = stopOffset;
+ _readAll = readAll;
+ }
+
+ /*
+ * For unit testing only. Reference data owned by rhs, only works as
+ * long as rhs is live and unchanged.
+ */
+ void
+ referenceReadContext(const ComprFileReadContext &rhs);
+
+ /*
+ * For unit testing only. Copy data owned by rhs.
+ */
+ void
+ copyReadContext(const ComprFileReadContext &rhs);
+
+ /*
+ * For unit testing only. Reference data owned by rhs, only works as
+ * long as rhs is live and unchanged.
+ */
+ void
+ referenceWriteContext(const ComprFileWriteContext &rhs);
+
+ /*
+ * For unit testing only. Copy data owned by rhs.
+ */
+ void
+ copyWriteContext(const ComprFileWriteContext &rhs);
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt.
+ */
+ void
+ checkPointWrite(vespalib::nbostream &out);
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ */
+ void
+ checkPointRead(vespalib::nbostream &in);
+
+ bool
+ getCheckPointOffsetValid(void) const
+ {
+ return _checkPointOffsetValid;
+ }
+
+ uint64_t
+ getCheckPointOffset(void) const
+ {
+ return _checkPointOffset;
+ }
+};
+
+
+class ComprFileEncodeContext
+{
+public:
+ virtual
+ ~ComprFileEncodeContext(void)
+ {
+ }
+
+ /**
+ * Get number of used units (e.g. _valI - start)
+ */
+ virtual int
+ getUsedUnits(void *start) = 0;
+
+ /**
+ * Get normal full buffer size (e.g. _valE - start)
+ */
+ virtual int
+ getNormalMaxUnits(void *start) = 0;
+
+ /**
+ * Adjust buffer after write (e.g. _valI, _fileWriteBias)
+ */
+ virtual void
+ afterWrite(ComprBuffer &cbuf,
+ uint32_t remainingUnits,
+ uint64_t bufferStartFilePos) = 0;
+
+
+ /**
+ * Adjust buffer size to align end of buffer.
+ */
+ virtual void
+ adjustBufSize(ComprBuffer &cbuf) = 0;
+
+ /**
+ * Get size of each unit (typically 4 or 8)
+ */
+ virtual uint32_t
+ getUnitByteSize(void) const = 0;
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt. Caller must
+ * save position, although partial unit is saved.
+ */
+ virtual void
+ checkPointWrite(vespalib::nbostream &out) = 0;
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ * Caller must restore positon, although partial unit is restored.
+ */
+ virtual void
+ checkPointRead(vespalib::nbostream &in) = 0;
+
+ virtual uint64_t
+ getBitPosV(void) const = 0;
+};
+
+class ComprFileWriteBase
+{
+public:
+ static void WriteComprBuffer(ComprFileEncodeContext &encodeContext,
+ ComprBuffer &cbuf,
+ FastOS_FileInterface &file,
+ uint64_t &fileWriteByteOffset,
+ bool flushSlack);
+
+protected:
+ virtual ~ComprFileWriteBase(void) { }
+};
+
+
+class ComprFileWriteContext : public ComprBuffer
+{
+private:
+ ComprFileEncodeContext *_encodeContext;
+ FastOS_FileInterface *_file;
+ uint64_t _fileWriteByteOffset; // XXX: Migrating from encode context
+
+public:
+ ComprFileWriteContext(ComprFileEncodeContext &encodeContext);
+
+ ComprFileWriteContext(uint32_t unitSize);
+
+ ~ComprFileWriteContext(void);
+
+ void
+ writeComprBuffer(bool flushSlack);
+
+ void
+ allocComprBuf(unsigned int comprBufSize,
+ size_t preferredFileAlignment);
+
+ void
+ allocComprBuf(void);
+
+ void
+ setEncodeContext(ComprFileEncodeContext *encodeContext)
+ {
+ _encodeContext = encodeContext;
+ }
+
+ ComprFileEncodeContext *
+ getEncodeContext(void) const
+ {
+ return _encodeContext;
+ }
+
+ void
+ setFile(FastOS_FileInterface *file)
+ {
+ _file = file;
+ }
+
+ FastOS_FileInterface *
+ getFile(void) const
+ {
+ return _file;
+ }
+
+ /**
+ * Get file offset for start of compressed buffer.
+ */
+ uint64_t
+ getBufferStartFilePos(void) const
+ {
+ return _fileWriteByteOffset;
+ }
+
+ /**
+ * Set file offset for start of compressed byffer.
+ */
+ void
+ setBufferStartFilePos(uint64_t bufferStartFilePos)
+ {
+ _fileWriteByteOffset = bufferStartFilePos;
+ }
+
+ /**
+ * Grab compressed buffer from write context. This is only legal when
+ * no file is attached.
+ */
+ std::pair<void *, size_t>
+ grabComprBuffer(void *&comprBufMalloc);
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt.
+ */
+ void
+ checkPointWrite(vespalib::nbostream &out);
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ */
+ void
+ checkPointRead(vespalib::nbostream &in);
+};
+
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/util/dirtraverse.cpp b/searchlib/src/vespa/searchlib/util/dirtraverse.cpp
new file mode 100644
index 00000000000..a6c716a13cd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/dirtraverse.cpp
@@ -0,0 +1,289 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include "dirtraverse.h"
+
+namespace search
+{
+
+extern "C" {
+static int cmpname(const void *av, const void *bv)
+{
+ const DirectoryTraverse::Name *const a =
+ *(const DirectoryTraverse::Name *const *) av;
+ const DirectoryTraverse::Name *const b =
+ *(const DirectoryTraverse::Name *const *) bv;
+ return strcmp(a->_name, b->_name);
+}
+}
+
+
+DirectoryTraverse::Name *
+DirectoryTraverse::Name::sort(Name *head,
+ int count)
+{
+ Name *nl;
+ Name **names;
+ int i;
+
+ names = new Name *[count];
+ i = 0;
+ for(nl = head; nl != NULL; nl = nl->_next)
+ names[i++] = nl;
+ assert(i == count);
+ qsort(names, count, sizeof(Name *), cmpname);
+ for (i = 0; i < count; i++) {
+ if (i + 1 < count)
+ names[i]->_next = names[i + 1];
+ else
+ names[i]->_next = NULL;
+ }
+ head = names[0];
+ delete [] names;
+ return head;
+}
+
+
+void
+DirectoryTraverse::QueueDir(const char *name)
+{
+ Name *n = new Name(name);
+ if (_dirTail == NULL)
+ _dirHead = n;
+ else
+ _dirTail->_next = n;
+ _dirTail = n;
+}
+
+
+void
+DirectoryTraverse::PushDir(const char *name)
+{
+ Name *n = new Name(name);
+ n->_next = _pdirHead;
+ _pdirHead = n;
+}
+
+
+void
+DirectoryTraverse::PushRemoveDir(const char *name)
+{
+ Name *n = new Name(name);
+ n->_next = _rdirHead;
+ _rdirHead = n;
+}
+
+
+void
+DirectoryTraverse::PushPushedDirs(void)
+{
+ Name *n;
+ while (_pdirHead != NULL) {
+ n = _pdirHead;
+ _pdirHead = n->_next;
+ n->_next = _dirHead;
+ _dirHead = n;
+ if (_dirTail == NULL)
+ _dirTail = n;
+ }
+}
+
+
+DirectoryTraverse::Name *
+DirectoryTraverse::UnQueueDir(void)
+{
+ Name *n;
+ PushPushedDirs();
+ if (_dirHead == NULL)
+ return NULL;
+ n = _dirHead;
+ _dirHead = n->_next;
+ n->_next = NULL;
+ if (_dirHead == NULL)
+ _dirTail = NULL;
+ return n;
+}
+
+DirectoryTraverse::Name *
+DirectoryTraverse::UnQueueName(void)
+{
+ Name *n;
+ if (_nameHead == NULL)
+ return NULL;
+ n = _nameHead;
+ _nameHead = n->_next;
+ n->_next = NULL;
+ _nameCount--;
+ return n;
+}
+
+
+void
+DirectoryTraverse::ScanSingleDir(void)
+{
+ assert(_nameHead == NULL);
+ assert(_nameCount == 0);
+ delete _curDir;
+ free(_fullDirName);
+ _fullDirName = NULL;
+ _curDir = UnQueueDir();
+ if (_curDir == NULL)
+ return;
+ _fullDirName = (char *) malloc(strlen(_baseDir) + 1 +
+ strlen(_curDir->_name) + 1);
+ strcpy(_fullDirName, _baseDir);
+ if (_curDir->_name[0] != '\0') {
+ strcat(_fullDirName, "/");
+ strcat(_fullDirName, _curDir->_name);
+ }
+ FastOS_DirectoryScan *dirscan = new FastOS_DirectoryScan(_fullDirName);
+ while (dirscan->ReadNext()) {
+ const char *name = dirscan->GetName();
+ if (strcmp(name, ".") == 0 ||
+ strcmp(name, "..") == 0)
+ continue;
+ Name *nl = new Name(name);
+ nl->_next = _nameHead;
+ _nameHead = nl;
+ _nameCount++;
+ }
+ if (_nameCount > 1)
+ _nameHead = _nameHead->sort(_nameHead, _nameCount);
+ delete dirscan;
+}
+
+
+bool
+DirectoryTraverse::NextName(void)
+{
+ delete _curName;
+ _curName = NULL;
+ while (_nameHead == NULL && (_dirHead != NULL || _pdirHead != NULL))
+ ScanSingleDir();
+ if (_nameHead == NULL)
+ return false;
+ _curName = UnQueueName();
+ free(_fullName);
+ _fullName = (char *) malloc(strlen(_fullDirName) + 1 +
+ strlen(_curName->_name) + 1);
+ strcpy(_fullName, _fullDirName);
+ _relName = _fullName + strlen(_baseDir) + 1;
+ strcat(_fullName, "/");
+ strcat(_fullName, _curName->_name);
+ return true;
+}
+
+
+bool
+DirectoryTraverse::NextRemoveDir(void)
+{
+ Name *curName;
+
+ delete _curName;
+ _curName = NULL;
+ if (_rdirHead == NULL)
+ return false;
+ curName = _rdirHead;
+ _rdirHead = curName->_next;
+ free(_fullName);
+ _fullName = (char *) malloc(strlen(_baseDir) + 1 +
+ strlen(curName->_name) + 1);
+ strcpy(_fullName, _baseDir);
+ _relName = _fullName + strlen(_baseDir) + 1;
+ strcat(_fullName, "/");
+ strcat(_fullName, curName->_name);
+ delete curName;
+ return true;
+}
+
+
+bool
+DirectoryTraverse::RemoveTree(void)
+{
+ FastOS_StatInfo statInfo;
+
+ while (NextName()) {
+ const char *relname = GetRelName();
+ const char *fullname = GetFullName();
+ if (FastOS_File::Stat(fullname, &statInfo)) {
+ if (statInfo._isDirectory) {
+ PushDir(relname);
+ PushRemoveDir(relname);
+ } else {
+ FastOS_File::Delete(fullname);
+ }
+ }
+ }
+ while (NextRemoveDir()) {
+ const char *fullname = GetFullName();
+ FastOS_File::RemoveDirectory(fullname);
+ }
+ FastOS_File::RemoveDirectory(_baseDir);
+ return true;
+}
+
+uint64_t
+DirectoryTraverse::GetTreeSize()
+{
+ FastOS_StatInfo statInfo;
+ uint64_t size = 0;
+ const uint64_t blockSize = 4096;
+
+ while (NextName()) {
+ const char *relname = GetRelName();
+ const char *fullname = GetFullName();
+ if (FastOS_File::Stat(fullname, &statInfo)) {
+ uint64_t adjSize = ((statInfo._size + blockSize - 1) / blockSize) * blockSize;
+ size += adjSize;
+ if (statInfo._isDirectory) {
+ PushDir(relname);
+ }
+ }
+ }
+ return size;
+}
+
+DirectoryTraverse::DirectoryTraverse(const char *baseDir)
+ : _baseDir(NULL),
+ _nameHead(NULL),
+ _nameCount(0),
+ _dirHead(NULL),
+ _dirTail(NULL),
+ _pdirHead(NULL),
+ _rdirHead(NULL),
+ _curDir(NULL),
+ _curName(NULL),
+ _fullDirName(NULL),
+ _fullName(NULL),
+ _relName(NULL)
+{
+ _baseDir = strdup(baseDir);
+ QueueDir("");
+ ScanSingleDir();
+}
+
+
+DirectoryTraverse::~DirectoryTraverse(void)
+{
+ free(_fullDirName);
+ free(_fullName);
+ free(_baseDir);
+ delete _curDir;
+ delete _curName;
+ PushPushedDirs();
+ while (_dirHead != NULL)
+ delete UnQueueDir();
+ while (_nameHead != NULL)
+ delete UnQueueName();
+ while (_rdirHead != NULL) {
+ Name *n;
+ n = _rdirHead;
+ _rdirHead = n->_next;
+ n->_next = NULL;
+ delete n;
+ }
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/util/dirtraverse.h b/searchlib/src/vespa/searchlib/util/dirtraverse.h
new file mode 100644
index 00000000000..550da2fa7d1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/dirtraverse.h
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+namespace search
+{
+
+class DirectoryTraverse
+{
+private:
+ DirectoryTraverse(const DirectoryTraverse &);
+ DirectoryTraverse& operator=(const DirectoryTraverse &);
+
+public:
+ class Name
+ {
+ private:
+ Name(const Name &);
+ Name& operator=(const Name &);
+
+ public:
+ char *_name;
+ Name *_next;
+ explicit Name(const char *name)
+ : _name(NULL),
+ _next(NULL)
+ {
+ _name = strdup(name);
+ }
+ ~Name(void) { free(_name); }
+ static Name *sort(Name *head, int count);
+ };
+private:
+ char *_baseDir;
+ Name *_nameHead;
+ int _nameCount;
+ Name *_dirHead;
+ Name *_dirTail;
+ Name *_pdirHead;
+ Name *_rdirHead;
+ Name *_curDir;
+ Name *_curName;
+ char *_fullDirName;
+ char *_fullName;
+ char *_relName;
+public:
+ const char *GetFullName(void) const { return _fullName; }
+ const char *GetRelName(void) const { return _relName; }
+ void QueueDir(const char *name);
+ void PushDir(const char *name);
+ void PushRemoveDir(const char *name);
+ void PushPushedDirs(void);
+ Name *UnQueueDir(void);
+ Name *UnQueueName(void);
+ void ScanSingleDir(void);
+ bool NextName(void);
+ bool NextRemoveDir(void);
+ bool RemoveTree(void);
+ uint64_t GetTreeSize(); // Returns size of directory in bytes
+ explicit DirectoryTraverse(const char *baseDir);
+ ~DirectoryTraverse(void);
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/util/drainingbufferwriter.cpp b/searchlib/src/vespa/searchlib/util/drainingbufferwriter.cpp
new file mode 100644
index 00000000000..9c1150917a7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/drainingbufferwriter.cpp
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "drainingbufferwriter.h"
+
+namespace search
+{
+
+DrainingBufferWriter::DrainingBufferWriter()
+ : BufferWriter(),
+ _buf(),
+ _bytesWritten(0),
+ _incompleteBuffers(0)
+{
+ _buf.resize(BUFFER_SIZE);
+ setup(&_buf[0], _buf.size());
+}
+
+
+DrainingBufferWriter::~DrainingBufferWriter()
+{
+}
+
+
+void
+DrainingBufferWriter::flush() {
+ // measure overhead above this flush method
+ assert(_incompleteBuffers == 0); // all previous buffers must have been full
+ size_t nowLen = usedLen();
+ if (nowLen != _buf.size()) {
+ // buffer is not full, only allowed for last buffer
+ ++_incompleteBuffers;
+ }
+ if (nowLen == 0) {
+ return; // empty buffer
+ }
+ _bytesWritten += nowLen;
+ rewind();
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/util/drainingbufferwriter.h b/searchlib/src/vespa/searchlib/util/drainingbufferwriter.h
new file mode 100644
index 00000000000..2c471ce2148
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/drainingbufferwriter.h
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "bufferwriter.h"
+#include <vector>
+
+namespace search
+{
+
+/**
+ * Class to write to a "drain" buffer, used to measure performance of
+ * BufferWriter and measure number of bytes written.
+ */
+class DrainingBufferWriter : public BufferWriter
+{
+ std::vector<char> _buf;
+ size_t _bytesWritten;
+ uint32_t _incompleteBuffers;
+public:
+ static constexpr size_t BUFFER_SIZE = 262144;
+
+ DrainingBufferWriter();
+
+ virtual ~DrainingBufferWriter();
+
+ virtual void flush() override;
+
+ size_t getBytesWritten() const { return _bytesWritten; }
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/util/filealign.cpp b/searchlib/src/vespa/searchlib/util/filealign.cpp
new file mode 100644
index 00000000000..e3a7b85f0f0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/filealign.cpp
@@ -0,0 +1,145 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/util/filealign.h>
+#include <vespa/vespalib/objects/nbostream.h>
+
+namespace search
+{
+
+using vespalib::nbostream;
+
+namespace {
+
+size_t
+gcd(size_t a, size_t b)
+{
+ size_t remainder;
+
+ for (;;) {
+ remainder = a % b;
+ if (remainder == 0)
+ return b;
+ a = b;
+ b = remainder;
+ }
+}
+
+
+size_t
+getMinBlocking(size_t elementsize, size_t alignment)
+{
+ return alignment / gcd(alignment, elementsize);
+}
+
+}
+
+
+FileAlign::FileAlign(void)
+ : _directIOFileAlign(1),
+ _preferredFileAlign(1),
+ _minDirectIOSize(1),
+ _minAlignedSize(1),
+ _elemSize(1),
+ _directIOMemAlign(1),
+ _directio(false),
+ _checkPointResumed(false)
+{
+}
+
+
+FileAlign::~FileAlign(void)
+{
+}
+
+
+size_t
+FileAlign::adjustSize(int64_t offset, size_t size)
+{
+ if (_directio && (offset & (_directIOFileAlign - 1)) != 0) {
+ // Align end of IO to direct IO boundary
+ assert(offset % _elemSize == 0);
+ size_t maxSize = _minDirectIOSize - (offset % _minDirectIOSize);
+ if (size > maxSize)
+ size = maxSize;
+ } else if ((offset & (_preferredFileAlign - 1)) != 0) {
+ // Align end of IO to preferred boundary
+ assert(offset % _elemSize == 0);
+ size_t tailLen = (offset + size) % _minAlignedSize;
+ if (tailLen < size)
+ size -= tailLen;
+ }
+ assert(size % _elemSize == 0);
+ return size;
+}
+
+
+size_t
+FileAlign::adjustElements(int64_t eoffset, size_t esize)
+{
+ return adjustSize(eoffset * _elemSize, esize * _elemSize) / _elemSize;
+}
+
+
+size_t
+FileAlign::setupAlign(size_t elements,
+ size_t elemSize,
+ FastOS_FileInterface *file,
+ size_t preferredFileAlignment)
+{
+ size_t memoryAlignment;
+ size_t transferGranularity;
+ size_t transferMaximum;
+
+ if (file != NULL) {
+ _directio =
+ file->GetDirectIORestrictions(memoryAlignment,
+ transferGranularity,
+ transferMaximum);
+ } else
+ _directio = false;
+ if (_directio) {
+ _directIOFileAlign = transferGranularity;
+ _directIOMemAlign = memoryAlignment;
+ if (preferredFileAlignment < _directIOFileAlign)
+ preferredFileAlignment = _directIOFileAlign;
+ } else {
+ _directIOFileAlign = 1;
+ _directIOMemAlign = 1;
+ }
+ if (preferredFileAlignment < 4096)
+ preferredFileAlignment = 4096;
+ _preferredFileAlign = preferredFileAlignment;
+
+ size_t minDirectIOElements = getMinBlocking(elemSize, _directIOFileAlign);
+ size_t minAlignedElements = getMinBlocking(elemSize, _preferredFileAlign);
+
+ if (elements % minAlignedElements != 0)
+ elements += minAlignedElements - (elements % minAlignedElements);
+ _minDirectIOSize = minDirectIOElements * elemSize;
+ _minAlignedSize = minAlignedElements * elemSize;
+ _elemSize = elemSize;
+ return elements;
+}
+
+
+void
+FileAlign::checkPointWrite(nbostream &out)
+{
+ out << _directIOFileAlign << _preferredFileAlign <<
+ _minDirectIOSize << _minAlignedSize << _elemSize <<
+ _directIOMemAlign << _directio;
+}
+
+
+void
+FileAlign::checkPointRead(nbostream &in)
+{
+ in >> _directIOFileAlign >> _preferredFileAlign >>
+ _minDirectIOSize >> _minAlignedSize >> _elemSize >>
+ _directIOMemAlign >> _directio;
+ _checkPointResumed = true;
+}
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/filealign.h b/searchlib/src/vespa/searchlib/util/filealign.h
new file mode 100644
index 00000000000..2bad98c9dd3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/filealign.h
@@ -0,0 +1,138 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace vespalib
+{
+
+class nbostream;
+
+}
+
+namespace search
+{
+
+class FileAlign
+{
+private:
+ size_t _directIOFileAlign;
+ size_t _preferredFileAlign;
+ size_t _minDirectIOSize;
+ size_t _minAlignedSize;
+ size_t _elemSize;
+ size_t _directIOMemAlign;
+ bool _directio;
+ bool _checkPointResumed;
+
+
+public:
+ FileAlign(void);
+
+ ~FileAlign(void);
+
+ /**
+ * Adjust number of bytes for IO (read or write), reducing
+ * number of bytes if it helps making end of IO matching
+ * an alignment boundary.
+ *
+ * @param offset position of start of IO, measured in bytes
+ * @param size number of bytes for IO
+ *
+ * @return adjusted number of bytes for IO
+ */
+ size_t
+ adjustSize(int64_t offset, size_t size);
+
+ /**
+ * Adjust number of elements for IO (read or write), reducing
+ * number of elements if it helps making end of IO matching
+ * an alignment boundary.
+ *
+ * @param eoffset position of start of IO, measured in elements
+ * @param esize number of elements for IO
+ *
+ * @return adjusted number of elements for IO
+ */
+ size_t
+ adjustElements(int64_t eoffset, size_t esize);
+
+ /**
+ * Setup alignment
+ *
+ * @param elements suggested number of elements in buffer
+ * @param elemSize size of each elements
+ * @param file File interface for IO
+ * @param preferredFileAlignment prefered alignment for IO
+ *
+ * @return adjusted number of elements in buffer
+ */
+ size_t
+ setupAlign(size_t elements,
+ size_t elemSize,
+ FastOS_FileInterface *file,
+ size_t preferredFileAlignment);
+
+ bool
+ getDirectIO(void) const
+ {
+ return _directio;
+ }
+
+ bool
+ getCheckPointResumed(void) const
+ {
+ return _checkPointResumed;
+ }
+
+ size_t
+ getDirectIOFileAlign(void) const
+ {
+ return _directIOFileAlign;
+ }
+
+ size_t
+ getDirectIOMemAlign(void) const
+ {
+ return _directIOMemAlign;
+ }
+
+ size_t
+ getMinDirectIOSize(void) const
+ {
+ return _minDirectIOSize;
+ }
+
+ size_t
+ getMinAlignedSize(void) const
+ {
+ return _minAlignedSize;
+ }
+
+ size_t
+ getPreferredFileAlign(void) const
+ {
+ return _preferredFileAlign;
+ }
+
+ size_t
+ getElemSize(void) const
+ {
+ return _elemSize;
+ }
+
+ /**
+ * Checkpoint write. Used at semi-regular intervals during indexing
+ * to allow for continued indexing after an interrupt.
+ */
+ void
+ checkPointWrite(vespalib::nbostream &out);
+
+ /**
+ * Checkpoint read. Used when resuming indexing after an interrupt.
+ */
+ void
+ checkPointRead(vespalib::nbostream &in);
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/util/fileheadertk.cpp b/searchlib/src/vespa/searchlib/util/fileheadertk.cpp
new file mode 100644
index 00000000000..d1ae39d15c7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/fileheadertk.cpp
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "fileheadertk.h"
+
+using namespace search;
+using vespalib::GenericHeader;
+
+void
+FileHeaderTk::addVersionTags(vespalib::GenericHeader &header)
+{
+#ifdef V_TAG
+ header.putTag(GenericHeader::Tag("version-tag", V_TAG));;
+ header.putTag(GenericHeader::Tag("version-date", V_TAG_DATE));;
+ header.putTag(GenericHeader::Tag("version-pkg", V_TAG_PKG));;
+ header.putTag(GenericHeader::Tag("version-arch", V_TAG_ARCH));;
+ header.putTag(GenericHeader::Tag("version-system", V_TAG_SYSTEM));
+ header.putTag(GenericHeader::Tag("version-system-rev", V_TAG_SYSTEM_REV));
+ header.putTag(GenericHeader::Tag("version-builder", V_TAG_BUILDER));
+ header.putTag(GenericHeader::Tag("version-component", V_TAG_COMPONENT));
+#else
+ (void)header;
+#endif
+}
diff --git a/searchlib/src/vespa/searchlib/util/fileheadertk.h b/searchlib/src/vespa/searchlib/util/fileheadertk.h
new file mode 100644
index 00000000000..8b88df997eb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/fileheadertk.h
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/data/fileheader.h>
+
+namespace search {
+
+/**
+ * This class offers convenience methods to add tags to a GenericHeader.
+ */
+class FileHeaderTk {
+public:
+ /**
+ * Adds all available version tags to the given header. These tags are set by the build environment and
+ * describe things such as build time, build tag, builder, etc.
+ *
+ * @param header The header to add tags to.
+ */
+ static void addVersionTags(vespalib::GenericHeader &header);
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/util/filekit.cpp b/searchlib/src/vespa/searchlib/util/filekit.cpp
new file mode 100644
index 00000000000..df509f2ea96
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/filekit.cpp
@@ -0,0 +1,108 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchlib/util/filekit.h>
+#include <vespa/vespalib/util/error.h>
+#include <memory>
+#include <string>
+LOG_SETUP(".filekit");
+
+namespace search
+{
+
+using vespalib::getLastErrorString;
+
+bool
+FileKit::createStamp(const vespalib::stringref &name)
+{
+ FastOS_File stamp;
+ FastOS_StatInfo statInfo;
+ bool statres;
+
+ statres = FastOS_File::Stat(name.c_str(), &statInfo);
+
+ if (!statres && (statInfo._error != FastOS_StatInfo::FileNotFound)) {
+ LOG(error, "FATAL: Could not check stamp file %s: %s",
+ name.c_str(), getLastErrorString().c_str());
+ return false;
+ }
+ if (statres && statInfo._size > 0) {
+ LOG(error, "FATAL: Stamp file not empty: %s", name.c_str());
+ return false;
+ }
+
+ if (!stamp.OpenWriteOnlyTruncate(name.c_str())) {
+ LOG(error, "FATAL: Could not create stamp file %s: %s",
+ name.c_str(), getLastErrorString().c_str());
+ return false;
+ }
+ return true;
+}
+
+
+bool
+FileKit::hasStamp(const vespalib::stringref &name)
+{
+ FastOS_StatInfo statInfo;
+ bool statres;
+
+ statres = FastOS_File::Stat(name.c_str(), &statInfo);
+
+ if (!statres && (statInfo._error != FastOS_StatInfo::FileNotFound)) {
+ LOG(error, "FATAL: Could not check stamp file %s: %s",
+ name.c_str(), getLastErrorString().c_str());
+ return false;
+ }
+ return statres;
+}
+
+
+bool
+FileKit::removeStamp(const vespalib::stringref &name)
+{
+ FastOS_StatInfo statInfo;
+ bool deleteres;
+ bool statres;
+
+ statres = FastOS_File::Stat(name.c_str(), &statInfo);
+
+ if (!statres && (statInfo._error != FastOS_StatInfo::FileNotFound)) {
+ LOG(error, "FATAL: Could not check stamp file %s: %s",
+ name.c_str(), getLastErrorString().c_str());
+ return false;
+ }
+ if (statres && statInfo._size > 0) {
+ LOG(error, "FATAL: Stamp file not empty: %s", name.c_str());
+ return false;
+ }
+
+ do {
+ deleteres = FastOS_File::Delete(name.c_str());
+ //FIX! errno
+ } while (!deleteres && errno == EINTR);
+
+ if (!deleteres &&
+ FastOS_File::GetLastError() != FastOS_File::ERR_ENOENT) {
+ LOG(error, "FATAL: Could not remove stamp file %s: %s",
+ name.c_str(), getLastErrorString().c_str());
+ return false;
+ }
+ return true;
+}
+
+
+fastos::TimeStamp
+FileKit::getModificationTime(const vespalib::stringref &name)
+{
+ FastOS_StatInfo statInfo;
+ if (FastOS_File::Stat(name.c_str(), &statInfo)) {
+ return fastos::TimeStamp(statInfo._modifiedTimeNS);
+ }
+ return fastos::TimeStamp();
+}
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/filekit.h b/searchlib/src/vespa/searchlib/util/filekit.h
new file mode 100644
index 00000000000..fb2332529d4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/filekit.h
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+
+class FileKit
+{
+private:
+ static bool _syncFiles;
+public:
+ static bool
+ createStamp(const vespalib::stringref &name);
+
+ static bool
+ hasStamp(const vespalib::stringref &name);
+
+ static bool
+ removeStamp(const vespalib::stringref &name);
+
+ /**
+ * Returns the modification time of the given file/directory,
+ * or time stamp 0 if stating of file/directory fails.
+ **/
+ static fastos::TimeStamp
+ getModificationTime(const vespalib::stringref &name);
+
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/util/filesizecalculator.cpp b/searchlib/src/vespa/searchlib/util/filesizecalculator.cpp
new file mode 100644
index 00000000000..4ae53c03430
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/filesizecalculator.cpp
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.util.filesizecalculator");
+
+#include "filesizecalculator.h"
+#include <vespa/vespalib/data/fileheader.h>
+
+namespace search
+{
+
+namespace {
+
+const vespalib::string fileBitSizeTag = "fileBitSize";
+
+bool byteAligned(uint64_t bitSize)
+{
+ return ((bitSize % 8) == 0);
+}
+
+}
+
+bool
+FileSizeCalculator::extractFileSize(const vespalib::GenericHeader &header,
+ size_t headerLen,
+ vespalib::string fileName, size_t &fileSize)
+{
+ if (!header.hasTag(fileBitSizeTag)) {
+ return true;
+ }
+ uint64_t fileBitSize = header.getTag(fileBitSizeTag).asInteger();
+ uint64_t fileByteSize = fileBitSize / 8;
+ if (!byteAligned(fileBitSize)) {
+ LOG(error,
+ "Bad header file size tag for %s, fileBitSize=%" PRIu64
+ " which is not a multiple of 8",
+ fileName.c_str(), fileBitSize);
+ return false;
+ }
+ if (fileByteSize < headerLen) {
+ LOG(error,
+ "Bad header file size tag for %s, fileBitSize=%" PRIu64
+ " but header is %" PRIu64 "bits",
+ fileName.c_str(), fileBitSize, headerLen * 8);
+ return false;
+ }
+ if (fileByteSize > fileSize) {
+ LOG(error,
+ "Bad header file size tag for %s, fileBitSize=%" PRIu64
+ " but whole file size is %" PRIu64 "bits",
+ fileName.c_str(), fileBitSize, fileSize * 8);
+ return false;
+ }
+ fileSize = fileByteSize;
+ return true;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/filesizecalculator.h b/searchlib/src/vespa/searchlib/util/filesizecalculator.h
new file mode 100644
index 00000000000..233f1fe56f3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/filesizecalculator.h
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace vespalib { class GenericHeader; }
+
+namespace search
+{
+
+/*
+ * Class to calculate logical file size of a file based on header tags
+ * and physical file size. Logical file size can be smaller than
+ * physical file size due to padding for directio alignment
+ * constraints.
+ */
+class FileSizeCalculator
+{
+public:
+ static bool
+ extractFileSize(const vespalib::GenericHeader &header, size_t headerLen,
+ vespalib::string fileName, size_t &fileSize);
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/fileutil.cpp b/searchlib/src/vespa/searchlib/util/fileutil.cpp
new file mode 100644
index 00000000000..d3407ba030b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/fileutil.cpp
@@ -0,0 +1,176 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/util/fileutil.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/guard.h>
+#include <vespa/vespalib/data/fileheader.h>
+#include "filesizecalculator.h"
+#include <stdexcept>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.util.fileutil");
+
+using vespalib::make_string;
+using vespalib::IllegalStateException;
+using vespalib::GenericHeader;
+using vespalib::FileDescriptor;
+using vespalib::getLastErrorString;
+
+namespace search
+{
+
+
+FileUtil::LoadedMmap::LoadedMmap(const vespalib::string &fileName)
+ : LoadedBuffer(NULL, 0),
+ _mapBuffer(NULL),
+ _mapSize(0)
+{
+ FileDescriptor fd(open(fileName.c_str(), O_RDONLY, 0664));
+ if (fd.valid()) {
+ struct stat stbuf;
+ int res = fstat(fd.fd(), &stbuf);
+ if (res == 0) {
+ size_t sz = stbuf.st_size;
+ if (sz) {
+ void *tmpBuffer = mmap(NULL, sz,
+ PROT_READ, MAP_PRIVATE,
+ fd.fd(), 0);
+ if (tmpBuffer != MAP_FAILED) {
+ _mapSize = sz;
+ _mapBuffer = tmpBuffer;
+ uint32_t hl = GenericHeader::getMinSize();
+ bool badHeader = true;
+ if (sz >= hl) {
+ GenericHeader::MMapReader rd(static_cast<const char *>
+ (tmpBuffer), sz);
+ _header = std::make_unique<GenericHeader>();
+ size_t headerLen = _header->read(rd);
+ if ((headerLen <= _mapSize) &&
+ FileSizeCalculator::extractFileSize(*_header,
+ headerLen,
+ fileName,
+ sz)) {
+ _size = sz - headerLen;
+ _buffer = static_cast<char *>
+ (_mapBuffer) + headerLen;
+ badHeader = false;
+ }
+ }
+ if (badHeader) {
+ throw IllegalStateException(
+ make_string("bad file header: %s",
+ fileName.c_str()));
+ }
+ } else {
+ throw IllegalStateException(
+ make_string("Failed mmaping '%s'"
+ " of size %" PRIu64 " errno(%d)",
+ fileName.c_str(),
+ static_cast<uint64_t>(sz),
+ errno));
+ }
+ }
+ } else {
+ throw IllegalStateException(
+ make_string("Failed fstat '%s' of fd %d with result = %d",
+ fileName.c_str(), fd.fd(), res));
+ }
+ } else {
+ throw IllegalStateException(
+ make_string("Failed opening '%s' for reading errno(%d)",
+ fileName.c_str(), errno));
+ }
+}
+
+
+
+FileUtil::LoadedMmap::~LoadedMmap()
+{
+ madvise(_mapBuffer, _mapSize, MADV_DONTNEED);
+ munmap(_mapBuffer, _mapSize);
+}
+
+
+std::unique_ptr<Fast_BufferedFile>
+FileUtil::openFile(const vespalib::string &fileName)
+{
+ std::unique_ptr<Fast_BufferedFile> file(new Fast_BufferedFile());
+ file->EnableDirectIO();
+ if (!file->OpenReadOnly(fileName.c_str())) {
+ LOG(error, "could not open %s: %s",
+ file->GetFileName(), getLastErrorString().c_str());
+ file->Close();
+ throw IllegalStateException(
+ make_string("Failed opening '%s' for direct IO reading.",
+ file->GetFileName()));
+ }
+ return file;
+}
+
+
+FileUtil::LoadedBuffer::UP
+FileUtil::loadFile(const vespalib::string &fileName)
+{
+ LoadedBuffer::UP data(new LoadedMmap(fileName));
+ FastOS_File file(fileName.c_str());
+ if (!file.OpenReadOnly()) {
+ LOG(error, "could not open %s: %s",
+ file.GetFileName(), getLastErrorString().c_str());
+ }
+ file.Close();
+ return data;
+}
+
+
+void FileReaderBase::handleError(ssize_t numRead, size_t wanted)
+{
+ if (numRead == 0) {
+ throw std::runtime_error(vespalib::make_string("Trying to read past EOF of file %s", _file.GetFileName()));
+ } else {
+ throw std::runtime_error(vespalib::make_string("Partial read(%zd of %zu) of file %s", numRead, wanted, _file.GetFileName()));
+ }
+}
+
+void FileWriterBase::handleError(ssize_t numRead, size_t wanted)
+{
+ if (numRead == 0) {
+ throw std::runtime_error(vespalib::make_string("Failed writing anything to file %s", _file.GetFileName()));
+ } else {
+ throw std::runtime_error(vespalib::make_string("Partial read(%zd of %zu) of file %s", numRead, wanted, _file.GetFileName()));
+ }
+}
+
+SequentialFileArray::SequentialFileArray(const vespalib::string & fname) :
+ _backingFile(),
+ _name(fname)
+{
+ _backingFile.EnableDirectIO();
+}
+
+void SequentialFileArray::rewind()
+{
+ assert(_backingFile.SetPosition(0));
+}
+
+void SequentialFileArray::close()
+{
+ _backingFile.Close();
+}
+
+void SequentialFileArray::erase()
+{
+ close();
+ FastOS_File::Delete(_backingFile.GetFileName());
+}
+
+void SequentialFileArray::openReadOnly()
+{
+ _backingFile.ReadOpen(_name.c_str());
+}
+
+void SequentialFileArray::openWriteOnly()
+{
+ _backingFile.OpenWriteOnlyTruncate(_name.c_str());
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/fileutil.h b/searchlib/src/vespa/searchlib/util/fileutil.h
new file mode 100644
index 00000000000..f809da8da5c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/fileutil.h
@@ -0,0 +1,389 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/fastlib/io/bufferedfile.h>
+#include <vector>
+#include <memory>
+#include <vespa/vespalib/data/fileheader.h>
+#include <vespa/vespalib/util/array.h>
+#include <vespa/vespalib/stllike/string.h>
+
+using vespalib::GenericHeader;
+
+namespace search {
+
+/**
+ * Util class with static functions for handling attribute data files.
+ **/
+class FileUtil
+{
+public:
+ /**
+ * Buffer class with content loaded from file.
+ **/
+ class LoadedBuffer
+ {
+ private:
+ LoadedBuffer(const LoadedBuffer & rhs);
+
+ LoadedBuffer &
+ operator =(const LoadedBuffer & rhs);
+ protected:
+ void * _buffer;
+ size_t _size;
+ std::unique_ptr<GenericHeader> _header;
+ public:
+ typedef std::unique_ptr<LoadedBuffer> UP;
+
+ LoadedBuffer(void * buf, size_t sz)
+ : _buffer(buf),
+ _size(sz),
+ _header(nullptr)
+ {
+ }
+
+ virtual
+ ~LoadedBuffer()
+ {
+ }
+
+ const void *
+ buffer() const
+ {
+ return _buffer;
+ }
+
+ const char *
+ c_str() const
+ {
+ return static_cast<const char *>(_buffer);
+ }
+
+ size_t
+ size() const
+ {
+ return _size;
+ }
+
+ bool
+ empty() const
+ {
+ return _size == 0;
+ }
+
+ size_t
+ size(size_t elemSize) const
+ {
+ return _size/elemSize;
+ }
+
+ const GenericHeader &
+ getHeader() const
+ {
+ return *_header;
+ }
+ };
+
+ /**
+ * Buffer class with content mmapped from file.
+ **/
+ class LoadedMmap : public LoadedBuffer
+ {
+ void * _mapBuffer;
+ size_t _mapSize;
+ public:
+ LoadedMmap(const vespalib::string &fileName);
+
+ virtual
+ ~LoadedMmap();
+ };
+
+ /**
+ * Opens and returns the file with the given name for reading.
+ * Enables direct IO on the file.
+ **/
+ static std::unique_ptr<Fast_BufferedFile>
+ openFile(const vespalib::string &fileName);
+
+ /**
+ * Loads and returns the file with the given name.
+ * Mmaps the file into the returned buffer.
+ **/
+ static LoadedBuffer::UP
+ loadFile(const vespalib::string &fileName);
+};
+
+class FileReaderBase
+{
+public:
+ FileReaderBase(FastOS_FileInterface & file) : _file(file) { }
+ ssize_t read(void *buf, size_t sz) {
+ ssize_t numRead = _file.Read(buf, sz);
+ if (numRead != ssize_t(sz)) {
+ handleError(numRead, sz);
+ }
+ return numRead;
+ }
+private:
+ void handleError(ssize_t numRead, size_t wanted);
+ FastOS_FileInterface & _file;
+};
+
+class FileWriterBase
+{
+public:
+ FileWriterBase(FastOS_FileInterface & file) : _file(file) { }
+ ssize_t write(const void *buf, size_t sz) {
+ ssize_t numWritten = _file.Write2(buf, sz);
+ if (numWritten != ssize_t(sz)) {
+ handleError(numWritten, sz);
+ }
+ return numWritten;
+ }
+protected:
+ void handleError(ssize_t numWritten, size_t wanted);
+private:
+ FastOS_FileInterface & _file;
+};
+
+template <typename T>
+class FileReader : public FileReaderBase
+{
+public:
+ FileReader(FastOS_FileInterface & file) : FileReaderBase(file) { }
+ T readHostOrder() {
+ T result;
+ read(&result, sizeof(result));
+ return result;
+ }
+};
+
+class SequentialFileArray
+{
+public:
+ SequentialFileArray(const vespalib::string & fname);
+ virtual ~SequentialFileArray() { close(); }
+ const vespalib::string & getName() const { return _name; }
+ void rewind();
+ void close();
+ void erase();
+protected:
+ void openReadOnly();
+ void openWriteOnly();
+ mutable Fast_BufferedFile _backingFile;
+ vespalib::string _name;
+};
+
+template <typename T>
+class SequentialFileArrayRead : public SequentialFileArray
+{
+public:
+ SequentialFileArrayRead(const vespalib::string & fname);
+ T getNext() const { return _fileReader.readHostOrder(); }
+ bool hasNext() const { return _backingFile.BytesLeft() >= sizeof(T); }
+ size_t size() const { return _backingFile.GetSize()/sizeof(T); }
+private:
+ mutable FileReader<T> _fileReader;
+};
+
+template <typename T>
+class SequentialFileArrayWrite : public SequentialFileArray
+{
+public:
+ SequentialFileArrayWrite(const vespalib::string & fname);
+ void push_back(const T & v) { _count++; _fileWriter.write(&v, sizeof(v)); }
+ size_t size() const { return _count; }
+ bool empty() const { return _count == 0; }
+private:
+ size_t _count;
+ FileWriterBase _fileWriter;
+};
+
+template <typename T>
+SequentialFileArrayRead<T>::SequentialFileArrayRead(const vespalib::string & fname) :
+ SequentialFileArray(fname),
+ _fileReader(_backingFile)
+{
+ openReadOnly();
+}
+
+template <typename T>
+SequentialFileArrayWrite<T>::SequentialFileArrayWrite(const vespalib::string & fname) :
+ SequentialFileArray(fname),
+ _count(0),
+ _fileWriter(_backingFile)
+{
+ openWriteOnly();
+}
+
+template <typename T, typename S>
+class MergeSorter
+{
+public:
+ MergeSorter(const vespalib::string & name, size_t chunkSize);
+ void push_back(const T & v);
+ void commit() { sortChunk(); merge(); }
+ const vespalib::string & getName() const { return _name; }
+ void rewind() { }
+private:
+ vespalib::string genName(size_t n);
+ void merge();
+ void sortChunk();
+
+ std::vector<T> _chunk;
+ size_t _chunkCount;
+ vespalib::string _name;
+};
+
+template <typename T, typename S>
+MergeSorter<T, S>::MergeSorter(const vespalib::string & name, size_t chunkSize) :
+ _chunk(),
+ _chunkCount(0),
+ _name(name + ".sorted")
+{
+ _chunk.reserve(chunkSize);
+}
+
+template <typename T, typename S>
+void MergeSorter<T, S>::push_back(const T & v)
+{
+ if (_chunk.size() < _chunk.capacity()) {
+ _chunk.push_back(v);
+ if (_chunk.size() == _chunk.capacity()) {
+ sortChunk();
+ }
+ }
+}
+
+template <typename T, typename S>
+vespalib::string MergeSorter<T, S>::genName(size_t n)
+{
+ char tmp[32];
+ sprintf(tmp, ".%zd", n);
+ vespalib::string fname(_name);
+ fname += tmp;
+ return fname;
+}
+
+template <typename T, typename S>
+void MergeSorter<T, S>::merge()
+{
+ S sorter;
+ std::vector< SequentialFileArrayRead<T> *> fileParts;
+ size_t count(0);
+ for(size_t i(0); i < _chunkCount; i++) {
+ std::unique_ptr< SequentialFileArrayRead<T> > part(new SequentialFileArrayRead<T>(genName(i)));
+ size_t sz = part->size();
+ if (sz > 0) {
+ fileParts.push_back(part.release());
+ } else {
+ part->erase();
+ }
+ count += sz;
+ }
+
+ std::vector<T> cachedValue;
+ for(size_t i(0), m(fileParts.size()); i < m; i++) {
+ cachedValue.push_back(fileParts[i]->getNext());
+ }
+ SequentialFileArrayWrite<T> merged(_name);
+ for(size_t j(0); j < count; j++) {
+ size_t firstIndex(0);
+ for(size_t i(1), m(cachedValue.size()); i < m; i++) {
+ if (sorter.cmp(cachedValue[i], cachedValue[firstIndex])) {
+ firstIndex = i;
+ }
+ }
+ merged.push_back(cachedValue[firstIndex]);
+ if ( ! fileParts[firstIndex]->hasNext() ) {
+ fileParts[firstIndex]->erase();
+ delete fileParts[firstIndex];
+ fileParts.erase(fileParts.begin()+firstIndex);
+ cachedValue.erase(cachedValue.begin()+firstIndex);
+ } else {
+ cachedValue[firstIndex] = fileParts[firstIndex]->getNext();
+ }
+ }
+}
+
+template <typename T, typename S>
+void MergeSorter<T, S>::sortChunk()
+{
+ S sorter;
+ sorter.sort(&_chunk[0], _chunk.size());
+ FastOS_File chunkFile(genName(_chunkCount).c_str());
+ chunkFile.EnableDirectIO();
+ if (chunkFile.OpenWriteOnlyTruncate()) {
+ chunkFile.CheckedWrite(&_chunk[0], _chunk.size()*sizeof(_chunk[0]));
+ }
+ chunkFile.Close();
+ _chunkCount++;
+ _chunk.clear();
+}
+
+template <typename T>
+class SequentialReadModifyWriteInterface
+{
+public:
+ typedef T Type;
+ virtual ~SequentialReadModifyWriteInterface() { }
+ virtual const T & read() = 0;
+ virtual void write(const T & v) = 0;
+ virtual bool next() = 0;
+ virtual bool empty() const { return size() == 0; }
+ virtual size_t size() const = 0;
+ virtual void rewind() = 0;
+};
+
+template <typename T, typename A=vespalib::HeapAlloc>
+class SequentialReadModifyWriteVector : public SequentialReadModifyWriteInterface<T>, public vespalib::Array<T, A>
+{
+private:
+ typedef vespalib::Array<T, A> Vector;
+public:
+ SequentialReadModifyWriteVector() : Vector(), _rp(0), _wp(0) { }
+ SequentialReadModifyWriteVector(size_t sz) : Vector(sz), _rp(0), _wp(0) { }
+ virtual const T & read() { return (*this)[_rp]; }
+ virtual void write(const T & v) { (*this)[_wp++] = v; }
+ virtual bool next() { _rp++; return _rp < Vector::size(); }
+ virtual bool empty() const { return Vector::empty(); }
+ virtual size_t size() const { return Vector::size(); }
+ virtual void rewind() { _rp = 0; _wp = 0; }
+private:
+ size_t _rp;
+ size_t _wp;
+};
+
+template <typename T, typename R, typename W>
+class SequentialReaderWriter : public SequentialReadModifyWriteInterface<T>
+{
+public:
+ SequentialReaderWriter(R & reader, W & writer) :
+ _reader(reader),
+ _writer(writer)
+ {
+ next();
+ }
+ virtual const T & read() { return _lastRead; }
+ virtual void write(const T & v) { _writer.push_back(v); }
+ virtual bool next() {
+ bool hasMore(_reader.hasNext());
+ if (hasMore) {
+ _lastRead = _reader.getNext();
+ }
+ return hasMore;
+ }
+ virtual size_t size() const { return _reader.size(); }
+ virtual void rewind() {
+ _reader.rewind();
+ next();
+ _writer.rewind();
+ }
+private:
+ T _lastRead;
+ R & _reader;
+ W & _writer;
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp b/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp
new file mode 100644
index 00000000000..ac63d1a7a64
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "foldedstringcompare.h"
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/text/lowercase.h>
+
+using vespalib::LowerCase;
+
+namespace search {
+
+size_t
+FoldedStringCompare::
+size(const char *key) const
+{
+ return vespalib::Utf8ReaderForZTS::countChars(key);
+}
+
+int
+FoldedStringCompare::
+compareFolded(const char *key, const char *okey) const
+{
+ vespalib::Utf8ReaderForZTS kreader(key);
+ vespalib::Utf8ReaderForZTS oreader(okey);
+
+ for (;;) {
+ uint32_t kval = LowerCase::convert(kreader.getChar());
+ uint32_t oval = LowerCase::convert(oreader.getChar());
+
+ if (kval != oval) {
+ if (kval < oval) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+ if (kval == 0) {
+ return 0;
+ }
+ }
+}
+
+
+int
+FoldedStringCompare::
+compareFoldedPrefix(const char *key, const char *okey, size_t prefixLen) const
+{
+ vespalib::Utf8ReaderForZTS kreader(key);
+ vespalib::Utf8ReaderForZTS oreader(okey);
+
+ for (size_t j = 0; j < prefixLen; ++j ) {
+ uint32_t kval = LowerCase::convert(kreader.getChar());
+ uint32_t oval = LowerCase::convert(oreader.getChar());
+
+ if (kval != oval) {
+ if (kval < oval) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+ if (kval == 0) return 0;
+ }
+ // reached end of prefix
+ return 0;
+}
+
+
+int
+FoldedStringCompare::
+compare(const char *key, const char *okey) const
+{
+ int res;
+
+ res = compareFolded(key, okey);
+ if (res != 0)
+ return res;
+ return strcmp(key, okey);
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/util/foldedstringcompare.h b/searchlib/src/vespa/searchlib/util/foldedstringcompare.h
new file mode 100644
index 00000000000..1904e0ca940
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/foldedstringcompare.h
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+
+
+namespace search {
+
+class FoldedStringCompare
+{
+public:
+ FoldedStringCompare(void) {}
+
+ /**
+ * count number of UCS-4 characters in utf8 string
+ *
+ * @param key NUL terminated utf8 string
+ * @return integer number of symbols in utf8 string before NUL
+ */
+ size_t size(const char *key) const;
+
+ /**
+ * Compare utf8 key with utf8 other key after folding both
+ *
+ * @param key NUL terminated utf8 string
+ * @param okey NUL terminated utf8 string
+ * @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey
+ **/
+ int compareFolded(const char *key, const char *okey) const;
+
+ /**
+ * Compare utf8 key with utf8 other key after folding both.
+ *
+ * @param key NUL terminated utf8 string
+ * @param okey NUL terminated utf8 string
+ * @param prefixLen max number of symbols to compare before
+ * considering keys identical.
+ *
+ * @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey
+ */
+ int compareFoldedPrefix(const char *key,
+ const char *okey,
+ size_t prefixLen) const;
+
+ /*
+ * Compare utf8 key with utf8 other key after folding both, if
+ * they seem equal then fall back to comparing without folding.
+ *
+ * @param key NUL terminated utf8 string
+ * @param okey NUL terminated utf8 string
+ * @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey
+ */
+ int compare(const char *key, const char *okey) const;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/util/inline.h b/searchlib/src/vespa/searchlib/util/inline.h
new file mode 100644
index 00000000000..85e2e096406
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/inline.h
@@ -0,0 +1,5 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/util/inline.h>
+
diff --git a/searchlib/src/vespa/searchlib/util/ioerrorhandler.cpp b/searchlib/src/vespa/searchlib/util/ioerrorhandler.cpp
new file mode 100644
index 00000000000..a7f548a7c67
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/ioerrorhandler.cpp
@@ -0,0 +1,96 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "ioerrorhandler.h"
+#include "statebuf.h"
+#include "statefile.h"
+#include <atomic>
+
+namespace search
+{
+
+
+IOErrorHandler *IOErrorHandler::_instance = nullptr;
+
+namespace
+{
+
+std::atomic<int> nesting;
+
+}
+
+void
+IOErrorHandler::trap(void)
+{
+ _instance = this;
+ FastOS_File::SetFailedHandler(forward);
+ _trapped = true;
+}
+
+
+void
+IOErrorHandler::untrap(void)
+{
+#ifdef notyet
+ FastOS_File::SetFailedHandler(nullptr);
+#endif
+ _trapped = false;
+ _instance = nullptr;
+}
+
+
+void
+IOErrorHandler::forward(const char *op, const char *file,
+ int error, int64_t offset, size_t len, ssize_t rlen)
+{
+ nesting++;
+ IOErrorHandler *instance = _instance;
+ if (instance) {
+ instance->handle(op, file, error, offset, len, rlen);
+ }
+ nesting--;
+}
+
+
+void
+IOErrorHandler::handle(const char *op, const char *file,
+ int error, int64_t offset, size_t len, ssize_t rlen)
+{
+ std::vector<char> buf(4096);
+ StateBuf sb(&buf[0], buf.size());
+ sb.appendKey("state") << "down";
+ sb.appendTimestamp();
+ sb.appendKey("operation") << op;
+ sb.appendKey("file") << file;
+ sb.appendKey("error") << error;
+ sb.appendKey("offset") << offset;
+ sb.appendKey("len") << len;
+ sb.appendKey("rlen") << rlen;
+ sb << '\n';
+ if (_stateFile != nullptr) {
+ _stateFile->addState(sb.base(), sb.size(), false);
+ }
+ _fired = true;
+ sleep(3);
+}
+
+
+IOErrorHandler::IOErrorHandler(StateFile *stateFile)
+ : _stateFile(stateFile),
+ _trapped(false),
+ _fired(false)
+{
+ trap();
+}
+
+
+IOErrorHandler::~IOErrorHandler()
+{
+ untrap();
+ // Drain callbacks
+ while (nesting != 0) {
+ sleep(1);
+ }
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/ioerrorhandler.h b/searchlib/src/vespa/searchlib/util/ioerrorhandler.h
new file mode 100644
index 00000000000..acdc0d12282
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/ioerrorhandler.h
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search
+{
+
+class StateFile;
+
+/*
+ * Class used to handle io error callsbacks from fastos.
+ */
+class IOErrorHandler
+{
+ static IOErrorHandler *_instance;
+ StateFile *_stateFile;
+ bool _trapped;
+ bool _fired;
+
+ using FailedHandler = void (*)(const char *op,
+ const char *file,
+ int error,
+ int64_t offset,
+ size_t len,
+ ssize_t rlen);
+ void
+ trap();
+
+ void
+ untrap();
+
+ static void
+ forward(const char *op,
+ const char *file,
+ int error,
+ int64_t offset,
+ size_t len,
+ ssize_t rlen);
+
+ void
+ handle(const char *op,
+ const char *file,
+ int error,
+ int64_t offset,
+ size_t len,
+ ssize_t rlen);
+
+public:
+ IOErrorHandler(StateFile *stateFile);
+
+ ~IOErrorHandler();
+
+ bool
+ fired() const
+ {
+ return _fired;
+ }
+};
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/logutil.cpp b/searchlib/src/vespa/searchlib/util/logutil.cpp
new file mode 100644
index 00000000000..f8dd7120aac
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/logutil.cpp
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.util.logutil");
+
+#include "logutil.h"
+#include <vespa/searchlib/util/dirtraverse.h>
+#include <vector>
+
+using vespalib::JSONStringer;
+
+namespace search {
+namespace util {
+
+vespalib::string
+LogUtil::extractLastElements(const vespalib::string & path, size_t numElems)
+{
+ std::vector<vespalib::string> elems;
+ for (size_t pos = 0; pos < path.size(); ) {
+ size_t fpos = path.find('/', pos);
+ if (fpos == vespalib::string::npos) {
+ fpos = path.size();
+ }
+ size_t len = fpos - pos;
+ if (len > 0) {
+ elems.push_back(path.substr(pos, len));
+ }
+ pos = fpos + 1;
+ }
+ vespalib::string retval;
+ if (numElems >= elems.size() && path[0] == '/') {
+ retval.append("/");
+ }
+ size_t num = std::min(numElems, elems.size());
+ size_t pos = elems.size() - num;
+ for (size_t i = 0; i < num; ++i) {
+ if (i != 0) retval.append("/");
+ retval.append(elems[pos + i]);
+ }
+ return retval;
+}
+
+void
+LogUtil::logDir(JSONStringer & jstr, const vespalib::string & path, size_t numElems)
+{
+ jstr.beginObject();
+ jstr.appendKey("dir").appendString(LogUtil::extractLastElements(path, numElems));
+ search::DirectoryTraverse dirt(path.c_str());
+ jstr.appendKey("size").appendInt64(dirt.GetTreeSize());
+ jstr.endObject();
+}
+
+} // namespace util
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/util/logutil.h b/searchlib/src/vespa/searchlib/util/logutil.h
new file mode 100644
index 00000000000..dc984277c16
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/logutil.h
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/util/jsonwriter.h>
+
+namespace search {
+namespace util {
+
+class LogUtil {
+public:
+ /**
+ * Extract the last num elements from the given path and
+ * return a new path with these elements.
+ **/
+ static vespalib::string extractLastElements(const vespalib::string & path, size_t numElems);
+
+ /**
+ * Log the given directory (with size) to the given json stringer.
+ *
+ * @param jstr the json stringer to log into.
+ * @param path the path of the directory to log.
+ * @param numElems the last number of elements from the path to log.
+ **/
+ static void logDir(vespalib::JSONStringer & jstr, const vespalib::string & path, size_t numElems);
+};
+
+} // namespace util
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/util/memorytub.h b/searchlib/src/vespa/searchlib/util/memorytub.h
new file mode 100644
index 00000000000..40068e8e674
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/memorytub.h
@@ -0,0 +1,94 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2002-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#if defined(MEMTUB_CLASS) || defined(MEMTUB_REFCLASS) || defined(MEMTUB_CHUNK) || defined(MEMTUB_LIMIT)
+#error "Memory tub 'template' parameters collide with other defines..."
+#endif
+
+#include <new>
+
+#include <vespa/fastos/fastos.h>
+
+namespace search {
+namespace util {
+
+class IMemTub
+{
+public:
+ /**
+ * Destructor. No cleanup needed for base class.
+ */
+ virtual ~IMemTub(void) { }
+
+ virtual void *TubAlloc(size_t size) = 0;
+ virtual void AddRef() = 0;
+ virtual void SubRef() = 0;
+ static uint32_t Align(uint32_t size)
+ {
+ return ((size + (sizeof(char *) - 1))
+ & ~(sizeof(char *) - 1));
+ }
+};
+
+}
+}
+
+inline void *
+operator new(size_t size, search::util::IMemTub *tub)
+{
+ return tub->TubAlloc(size);
+}
+
+inline void *
+operator new[](size_t size, search::util::IMemTub *tub)
+{
+ return tub->TubAlloc(size);
+}
+
+#define MEMTUB_CLASS MicroMemoryTub
+#define MEMTUB_REFCLASS MicroMemoryTubRefCnt
+#define MEMTUB_CHUNK (8192 - 256)
+#define MEMTUB_LIMIT 2048
+#include <vespa/searchlib/util/memorytub_impl.h>
+
+#define MEMTUB_CLASS TinyMemoryTub
+#define MEMTUB_REFCLASS TinyMemoryTubRefCnt
+#define MEMTUB_CHUNK (16384 - 256)
+#define MEMTUB_LIMIT 4096
+#include <vespa/searchlib/util/memorytub_impl.h>
+
+#define MEMTUB_CLASS SmallMemoryTub
+#define MEMTUB_REFCLASS SmallMemoryTubRefCnt
+#define MEMTUB_CHUNK (32768 - 256)
+#define MEMTUB_LIMIT 8192
+#include <vespa/searchlib/util/memorytub_impl.h>
+
+#define MEMTUB_CLASS MediumMemoryTub
+#define MEMTUB_REFCLASS MediumMemoryTubRefCnt
+#define MEMTUB_CHUNK (65536 - 256)
+#define MEMTUB_LIMIT 16384
+#include <vespa/searchlib/util/memorytub_impl.h>
+
+#define MEMTUB_CLASS LargeMemoryTub
+#define MEMTUB_REFCLASS LargeMemoryTubRefCnt
+#define MEMTUB_CHUNK (131072 - 256)
+#define MEMTUB_LIMIT 32768
+#include <vespa/searchlib/util/memorytub_impl.h>
+
+#define MEMTUB_CLASS HugeMemoryTub
+#define MEMTUB_REFCLASS HugeMemoryTubRefCnt
+#define MEMTUB_CHUNK (262144 - 256)
+#define MEMTUB_LIMIT 65536
+#include <vespa/searchlib/util/memorytub_impl.h>
+
+namespace search {
+namespace util {
+
+class DocSumMemoryPool : public SmallMemoryTub {};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/util/memorytub_impl.h b/searchlib/src/vespa/searchlib/util/memorytub_impl.h
new file mode 100644
index 00000000000..802a34cf976
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/memorytub_impl.h
@@ -0,0 +1,202 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2003-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#if !defined(MEMTUB_CLASS) || !defined(MEMTUB_REFCLASS) || !defined(MEMTUB_CHUNK) || !defined(MEMTUB_LIMIT)
+#error "Missing 'template' parameter(s)..."
+#endif
+
+
+namespace search {
+namespace util {
+
+/**
+ * These classes are used to speed up allocation and deallocation of
+ * memory. The poor mans template HACK is in honor of AIX. The denial
+ * of array alloc operations is in honor of Microsoft (VC++).
+ **/
+class MEMTUB_CLASS : public IMemTub
+{
+private:
+ MEMTUB_CLASS(const MEMTUB_CLASS &);
+ MEMTUB_CLASS& operator=(const MEMTUB_CLASS &);
+
+public:
+
+ struct AllocInfo {
+ private:
+ AllocInfo(const AllocInfo &);
+ AllocInfo &operator=(const AllocInfo &);
+
+ public:
+ AllocInfo *_next;
+ void *_data;
+ uint32_t _size;
+
+ AllocInfo(AllocInfo *next, void *data, uint32_t size)
+ : _next(next), _data(data), _size(size) {}
+ };
+
+ struct Chunk {
+ private:
+ Chunk(const Chunk &);
+ Chunk &operator=(const Chunk &);
+ public:
+ uint32_t _used;
+ Chunk *_next;
+ char _data[MEMTUB_CHUNK];
+
+ void *Alloc(size_t size)
+ {
+ size_t alignedsize = Align(size);
+ if (_used + alignedsize <= sizeof(_data)) {
+ void *ret = &_data[_used];
+ _used += alignedsize;
+ return ret;
+ }
+ return NULL;
+ }
+ Chunk(uint32_t used,
+ Chunk *next)
+ : _used(used),
+ _next(next)
+ {
+ }
+ };
+
+private:
+
+ Chunk _fixedChunk;
+ Chunk *_chunkHead;
+ AllocInfo *_allocHead;
+
+ void *SlowAlloc(size_t size) {
+ Chunk *chunk = static_cast<Chunk *>(malloc(sizeof(Chunk)));
+ assert(chunk != NULL);
+ chunk->_used = 0;
+ chunk->_next = _chunkHead;
+ _chunkHead = chunk;
+ return _chunkHead->Alloc(size);
+ }
+ void *SmallAlloc(size_t size) {
+ void *tmp = _chunkHead->Alloc(size);
+ return (tmp != NULL) ? tmp : SlowAlloc(size);
+ }
+ void *BigAlloc(size_t size) {
+ void *ret = malloc(size);
+ assert(ret != NULL);
+ _allocHead = new (SmallAlloc(sizeof(AllocInfo))) AllocInfo(_allocHead, ret, size);
+ return ret;
+ }
+
+public:
+ MEMTUB_CLASS()
+ : _fixedChunk(0, NULL),
+ _chunkHead(&_fixedChunk),
+ _allocHead(NULL)
+ {
+ assert(MEMTUB_CHUNK >= MEMTUB_LIMIT * 2);
+ assert(MEMTUB_LIMIT >= sizeof(AllocInfo));
+ }
+
+ uint32_t GetChunkSize() const { return MEMTUB_CHUNK; }
+ uint32_t GetAllocLimit() const { return MEMTUB_LIMIT; }
+
+ inline bool InTub(const void *pt) const {
+ const char *p = static_cast<const char *>(pt);
+
+ for (Chunk *chunk = _chunkHead; chunk != NULL; chunk = chunk->_next)
+ if (p >= chunk->_data &&
+ p < chunk->_data + chunk->_used)
+ return true;
+
+ for (AllocInfo *info = _allocHead; info != NULL; info = info->_next)
+ if (p >= static_cast<char *>(info->_data) &&
+ p < static_cast<char *>(info->_data) + info->_size)
+ return true;
+
+ return false;
+ }
+
+ void *Alloc(size_t size) {
+ return (size > MEMTUB_LIMIT) ? BigAlloc(size) : SmallAlloc(size);
+ }
+
+ void Reset()
+ {
+ for (AllocInfo *info = _allocHead;
+ info != NULL; info = info->_next) {
+ free(info->_data);
+ }
+ _allocHead = NULL;
+ while (_chunkHead != &_fixedChunk) {
+ Chunk *tmp = _chunkHead;
+ _chunkHead = tmp->_next;
+ free(tmp);
+ }
+ _fixedChunk._used = 0;
+ }
+
+ virtual ~MEMTUB_CLASS()
+ {
+ Reset();
+ }
+
+ // IMemTub implementation
+ virtual void *TubAlloc(size_t size) {
+ return Alloc(size);
+ }
+ virtual void AddRef() {}
+ virtual void SubRef() {}
+};
+
+
+class MEMTUB_REFCLASS : public MEMTUB_CLASS
+{
+private:
+ FastOS_Mutex _lock;
+ int _refcnt;
+
+public:
+ MEMTUB_REFCLASS() : _lock(), _refcnt(1) {}
+ virtual ~MEMTUB_REFCLASS() { assert(_refcnt == 0); }
+ virtual void AddRef()
+ {
+ _lock.Lock();
+ _refcnt++;
+ _lock.Unlock();
+ }
+ virtual void SubRef()
+ {
+ _lock.Lock();
+ assert(_refcnt > 0);
+ if (--_refcnt > 0) {
+ _lock.Unlock();
+ return;
+ }
+ _lock.Unlock();
+ delete this;
+ }
+};
+
+}
+}
+
+inline void *
+operator new(size_t size, search::util::MEMTUB_CLASS *tub)
+{
+ return tub->Alloc(size);
+}
+
+
+inline void *
+operator new[](size_t size, search::util::MEMTUB_CLASS *tub)
+{
+ return tub->Alloc(size);
+}
+
+
+#undef MEMTUB_CLASS
+#undef MEMTUB_REFCLASS
+#undef MEMTUB_CHUNK
+#undef MEMTUB_LIMIT
diff --git a/searchlib/src/vespa/searchlib/util/memoryusage.h b/searchlib/src/vespa/searchlib/util/memoryusage.h
new file mode 100644
index 00000000000..cc331f73d2a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/memoryusage.h
@@ -0,0 +1,123 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search {
+
+class MemoryUsage {
+private:
+ size_t _allocatedBytes;
+ size_t _usedBytes;
+ size_t _deadBytes;
+ size_t _allocatedBytesOnHold;
+
+public:
+ MemoryUsage()
+ : _allocatedBytes(0),
+ _usedBytes(0),
+ _deadBytes(0),
+ _allocatedBytesOnHold(0)
+ {
+ }
+
+ MemoryUsage(size_t allocated,
+ size_t used,
+ size_t dead,
+ size_t onHold)
+ : _allocatedBytes(allocated),
+ _usedBytes(used),
+ _deadBytes(dead),
+ _allocatedBytesOnHold(onHold)
+ {
+ }
+
+ size_t
+ allocatedBytes(void) const
+ {
+ return _allocatedBytes;
+ }
+
+ size_t
+ usedBytes(void) const
+ {
+ return _usedBytes;
+ }
+
+ size_t
+ deadBytes(void) const
+ {
+ return _deadBytes;
+ }
+
+ size_t
+ allocatedBytesOnHold(void) const
+ {
+ return _allocatedBytesOnHold;
+ }
+
+ void
+ incAllocatedBytes(size_t inc)
+ {
+ _allocatedBytes += inc;
+ }
+
+ void
+ decAllocatedBytes(size_t dec)
+ {
+ _allocatedBytes -= dec;
+ }
+
+ void
+ incUsedBytes(size_t inc)
+ {
+ _usedBytes += inc;
+ }
+
+ void
+ incDeadBytes(size_t inc)
+ {
+ _deadBytes += inc;
+ }
+
+ void
+ incAllocatedBytesOnHold(size_t inc)
+ {
+ _allocatedBytesOnHold += inc;
+ }
+
+ void
+ setAllocatedBytes(size_t alloc)
+ {
+ _allocatedBytes = alloc;
+ }
+
+ void
+ setUsedBytes(size_t used)
+ {
+ _usedBytes = used;
+ }
+
+ void
+ setDeadBytes(size_t dead)
+ {
+ _deadBytes = dead;
+ }
+
+ void
+ setAllocatedBytesOnHold(size_t onHold)
+ {
+ _allocatedBytesOnHold = onHold;
+ }
+
+ void
+ merge(const MemoryUsage & rhs)
+ {
+ _allocatedBytes += rhs._allocatedBytes;
+ _usedBytes += rhs._usedBytes;
+ _deadBytes += rhs._deadBytes;
+ _allocatedBytesOnHold += rhs._allocatedBytesOnHold;
+ }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/util/postingpriorityqueue.h b/searchlib/src/vespa/searchlib/util/postingpriorityqueue.h
new file mode 100644
index 00000000000..42519e10b9f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/postingpriorityqueue.h
@@ -0,0 +1,258 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+
+namespace search
+{
+
+/*
+ * Provide priority queue semantics for a set of posting inputs.
+ */
+template <class IN>
+class PostingPriorityQueue
+{
+public:
+ class Ref
+ {
+ IN *_ref;
+ public:
+ Ref(IN *ref)
+ : _ref(ref)
+ {
+ }
+
+ bool
+ operator<(const Ref &rhs) const
+ {
+ return *_ref < *rhs._ref;
+ }
+
+ IN *
+ get(void) const
+ {
+ return _ref;
+ }
+ };
+
+ typedef std::vector<Ref> Vector;
+ Vector _vec;
+
+ PostingPriorityQueue(void)
+ : _vec()
+ {
+ }
+
+ bool
+ empty(void) const
+ {
+ return _vec.empty();
+ }
+
+ void
+ clear(void)
+ {
+ _vec.clear();
+ }
+
+ void
+ initialAdd(IN *it)
+ {
+ _vec.push_back(Ref(it));
+ }
+
+ /*
+ * Sort vector after a set of initial add operations, so lowest()
+ * and adjust() can be used.
+ */
+ void
+ sort(void)
+ {
+ std::sort(_vec.begin(), _vec.end());
+ }
+
+ /*
+ * Return lowest value. Assumes vector is sorted.
+ */
+ IN *
+ lowest(void) const
+ {
+ return _vec.front().get();
+ }
+
+ /*
+ * The vector might no longer be sorted since the first element has changed
+ * value. Perform adjustments to make vector sorted again.
+ */
+ void
+ adjust(void);
+
+
+ template <class OUT>
+ void
+ mergeHeap(OUT &out) __attribute__((noinline));
+
+ template <class OUT>
+ static void
+ mergeOne(OUT &out, IN &in) __attribute__((noinline));
+
+ template <class OUT>
+ static void
+ mergeTwo(OUT &out, IN &in1, IN &in2) __attribute__((noinline));
+
+ template <class OUT>
+ static void
+ mergeSmall(OUT &out,
+ typename Vector::iterator ib,
+ typename Vector::iterator ie)
+ __attribute__((noinline));
+
+ template <class OUT>
+ void
+ merge(OUT &out, uint32_t heapLimit) __attribute__((noinline));
+};
+
+
+template <class IN>
+void
+PostingPriorityQueue<IN>::adjust(void)
+{
+ typedef typename Vector::iterator VIT;
+ if (!_vec.front().get()->isValid()) {
+ _vec.erase(_vec.begin()); // Iterator no longer valid
+ return;
+ }
+ if (_vec.size() == 1) // Only one iterator left
+ return;
+ // Peform binary search to find first element higher than changed value
+ VIT gt = std::upper_bound(_vec.begin() + 1, _vec.end(), _vec.front());
+ VIT to = _vec.begin();
+ VIT from = to;
+ ++from;
+ Ref changed = *to; // Remember changed value
+ while (from != gt) { // Shift elements to make space for changed value
+ *to = *from;
+ ++from;
+ ++to;
+ }
+ *to = changed; // Save changed value at right location
+}
+
+
+template <class IN>
+template <class OUT>
+void
+PostingPriorityQueue<IN>::mergeHeap(OUT &out)
+{
+ while (!empty()) {
+ IN *low = lowest();
+ low->write(out);
+ low->read();
+ adjust();
+ }
+}
+
+
+template <class IN>
+template <class OUT>
+void
+PostingPriorityQueue<IN>::mergeOne(OUT &out, IN &in)
+{
+ while (in.isValid()) {
+ in.write(out);
+ in.read();
+ }
+}
+
+template <class IN>
+template <class OUT>
+void
+PostingPriorityQueue<IN>::mergeTwo(OUT &out, IN &in1, IN &in2)
+{
+ for (;;) {
+ IN &low = in2 < in1 ? in2 : in1;
+ low.write(out);
+ low.read();
+ if (!low.isValid())
+ break;
+ }
+}
+
+
+template <class IN>
+template <class OUT>
+void
+PostingPriorityQueue<IN>::mergeSmall(OUT &out,
+ typename Vector::iterator ib,
+ typename Vector::iterator ie)
+{
+ for (;;) {
+ typename Vector::iterator i = ib;
+ IN *low = i->get();
+ for (++i; i != ie; ++i)
+ if (*i->get() < *low)
+ low = i->get();
+ low->write(out);
+ low->read();
+ if (!low->isValid())
+ break;
+ }
+}
+
+
+template <class IN>
+template <class OUT>
+void
+PostingPriorityQueue<IN>::merge(OUT &out, uint32_t heapLimit)
+{
+ if (_vec.empty())
+ return;
+ for (typename Vector::iterator i = _vec.begin(), ie = _vec.end(); i != ie;
+ ++i) {
+ assert(i->get()->isValid());
+ }
+ if (_vec.size() >= heapLimit) {
+ sort();
+ void (PostingPriorityQueue::*mergeHeapFunc)(OUT &out) =
+ &PostingPriorityQueue::mergeHeap;
+ (this->*mergeHeapFunc)(out);
+ return;
+ }
+ for (;;) {
+ if (_vec.size() == 1) {
+ void (*mergeOneFunc)(OUT &out, IN &in) =
+ &PostingPriorityQueue<IN>::mergeOne;
+ (*mergeOneFunc)(out, *_vec.front().get());
+ _vec.clear();
+ return;
+ }
+ if (_vec.size() == 2) {
+ void (*mergeTwoFunc)(OUT &out, IN &in1, IN &in2) =
+ &PostingPriorityQueue<IN>::mergeTwo;
+ (*mergeTwoFunc)(out, *_vec[0].get(), *_vec[1].get());
+ } else {
+ void (*mergeSmallFunc)(OUT &out,
+ typename Vector::iterator ib,
+ typename Vector::iterator ie) =
+ &PostingPriorityQueue::mergeSmall;
+ (*mergeSmallFunc)(out, _vec.begin(), _vec.end());
+ }
+ for (typename Vector::iterator i = _vec.begin(), ie = _vec.end();
+ i != ie; ++i) {
+ if (!i->get()->isValid()) {
+ _vec.erase(i);
+ break;
+ }
+ }
+ for (typename Vector::iterator i = _vec.begin(), ie = _vec.end();
+ i != ie; ++i) {
+ assert(i->get()->isValid());
+ }
+ assert(!_vec.empty());
+ }
+}
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/util/rand48.h b/searchlib/src/vespa/searchlib/util/rand48.h
new file mode 100644
index 00000000000..91fcf1b03e9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/rand48.h
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+
+namespace search {
+
+/*
+ * Simple random generator based on lrand48() spec.
+ */
+class Rand48
+{
+private:
+ uint64_t _state;
+public:
+ void
+ srand48(long seed)
+ {
+ _state = ((static_cast<uint64_t>(seed & 0xffffffffu)) << 16) + 0x330e;
+ }
+
+ Rand48(void)
+ : _state(0)
+ {
+ srand48(0x1234abcd);
+ };
+ void iterate(void) {
+ _state = (UINT64_C(0x5DEECE66D) * _state + 0xb) &
+ UINT64_C(0xFFFFFFFFFFFF);
+ }
+ /*
+ * Return value from 0 to 2^31 - 1
+ */
+ long lrand48(void) {
+ iterate();
+ return static_cast<long>(_state >> 17);
+ }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/util/randomgenerator.h b/searchlib/src/vespa/searchlib/util/randomgenerator.h
new file mode 100644
index 00000000000..84bab6f03c9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/randomgenerator.h
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/util/rand48.h>
+#include <string>
+#include <vector>
+
+namespace search {
+class RandomGenerator
+{
+private:
+ Rand48 _rnd;
+
+public:
+ RandomGenerator() : _rnd() {}
+
+ RandomGenerator(long seed) : _rnd() {
+ _rnd.srand48(seed);
+ }
+
+ void srand(long seed) {
+ _rnd.srand48(seed);
+ }
+
+ uint32_t rand(uint32_t min, uint32_t max) {
+ assert(min <= max);
+ uint32_t divider = max - min + 1;
+ return (divider == 0 ? _rnd.lrand48() : min + _rnd.lrand48() % divider);
+ }
+
+ vespalib::string getRandomString(uint32_t minLen, uint32_t maxLen) {
+ uint32_t len = rand(minLen, maxLen);
+ vespalib::string retval;
+ for (uint32_t i = 0; i < len; ++i) {
+ char c = static_cast<char>(rand('a', 'z'));
+ retval.push_back(c);
+ }
+ return retval;
+ }
+
+ void fillRandomStrings(std::vector<vespalib::string> & vec, uint32_t numStrings,
+ uint32_t minLen, uint32_t maxLen) {
+ vec.clear();
+ vec.reserve(numStrings);
+ for (uint32_t i = 0; i < numStrings; ++i) {
+ vec.push_back(getRandomString(minLen, maxLen));
+ }
+ }
+
+ template <typename T>
+ void fillRandomIntegers(std::vector<T> & vec, uint32_t numValues) {
+ vec.clear();
+ vec.reserve(numValues);
+ for (uint32_t i = 0; i < numValues; ++i) {
+ vec.push_back(static_cast<T>(_rnd.lrand48()));
+ }
+ }
+};
+
+} // search
+
diff --git a/searchlib/src/vespa/searchlib/util/rawbuf.cpp b/searchlib/src/vespa/searchlib/util/rawbuf.cpp
new file mode 100644
index 00000000000..1a19792604f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/rawbuf.cpp
@@ -0,0 +1,360 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/util/rawbuf.h>
+#include <vespa/vespalib/util/compress.h>
+
+namespace search {
+
+static inline size_t smin(size_t a, size_t b) { return (a < b) ? a : b; }
+
+RawBuf::RawBuf(size_t size)
+ : _bufStart(NULL),
+ _bufEnd(NULL),
+ _bufFillPos(NULL),
+ _bufDrainPos(NULL),
+ _initialBufStart(NULL),
+ _initialSize(size)
+{
+ if (size > 0) {
+ _bufStart = static_cast<char *>(malloc(size));
+ }
+ _bufEnd = _bufStart + size;
+ _bufDrainPos = _bufFillPos = _bufStart;
+}
+
+
+RawBuf::RawBuf(char *start, size_t size)
+ : _bufStart(NULL),
+ _bufEnd(NULL),
+ _bufFillPos(NULL),
+ _bufDrainPos(NULL),
+ _initialBufStart(start),
+ _initialSize(size)
+{
+ _bufStart = start;
+ _bufEnd = _bufStart + size;
+ _bufDrainPos = _bufFillPos = _bufStart;
+}
+
+
+RawBuf::~RawBuf(void)
+{
+ if (_bufStart != _initialBufStart)
+ free(_bufStart);
+}
+
+
+/**
+ * Allocate a new buffer at least as large as the parameter value,
+ * move any content to the new and delete the old buffer.
+ */
+void
+RawBuf::expandBuf(size_t needlen)
+{
+ size_t size = (_bufEnd - _bufStart) * 2;
+ if (size < 1)
+ size = 2;
+ needlen += _bufEnd - _bufStart;
+ while (size < needlen)
+ size *= 2;
+ char* nbuf = static_cast<char *>(malloc(size));
+ if (_bufFillPos != _bufDrainPos)
+ memcpy(nbuf, _bufDrainPos, _bufFillPos - _bufDrainPos);
+ _bufFillPos = _bufFillPos - _bufDrainPos + nbuf;
+ _bufDrainPos = nbuf;
+ if (_bufStart != _initialBufStart)
+ free(_bufStart);
+ _bufStart = nbuf;
+ _bufEnd = _bufStart + size;
+}
+
+
+/**
+ * Put 'data' of 'len'gth into the buffer. If insufficient room,
+ * make the buffer larger.
+ */
+void
+RawBuf::append(const void *data, size_t len)
+{
+ ensureSize(len);
+ memcpy(_bufFillPos, data, len);
+ _bufFillPos += len;
+}
+
+void
+RawBuf::append(uint8_t byte)
+{
+ ensureSize(1);
+ *_bufFillPos++ = byte;
+}
+
+void
+RawBuf::appendCompressedPositiveNumber(uint64_t n)
+{
+ size_t len(vespalib::compress::Integer::compressedPositiveLength(n));
+ ensureSize(len);
+ _bufFillPos += vespalib::compress::Integer::compressPositive(n, _bufFillPos);
+}
+
+void
+RawBuf::appendCompressedNumber(int64_t n)
+{
+ size_t len(vespalib::compress::Integer::compressedLength(n));
+ ensureSize(len);
+ _bufFillPos += vespalib::compress::Integer::compress(n, _bufFillPos);
+}
+
+
+/**
+ * Has the entire contents of the buffer been used up, i.e. freed?
+ */
+bool
+RawBuf::IsEmpty(void)
+{
+ return _bufFillPos == _bufDrainPos;
+}
+
+
+/**
+ * Free 'len' bytes from the start of the contents. (These
+ * have presumably been written or read.)
+ */
+void
+RawBuf::Drain(size_t len)
+{
+ _bufDrainPos += len;
+ if (_bufDrainPos == _bufFillPos)
+ reset();
+}
+
+
+/**
+ * Compact any free space from the beginning of the buffer, by
+ * copying the contents to the start of the buffer.
+ * If the resulting buffer doesn't have room for 'len' more
+ * bytes of contents, make it large enough.
+ */
+void
+RawBuf::preAlloc(size_t len)
+{
+ size_t curfree = _bufEnd - _bufFillPos;
+ if (curfree >= len)
+ return;
+ if (_bufEnd - _bufStart < len + _bufFillPos - _bufDrainPos) {
+ expandBuf(len);
+ assert(_bufEnd - _bufStart >= len + _bufFillPos - _bufDrainPos);
+ curfree = _bufEnd - _bufFillPos;
+ if (curfree >= len)
+ return;
+ }
+ memmove(_bufStart, _bufDrainPos, _bufFillPos - _bufDrainPos);
+ _bufFillPos -= (_bufDrainPos - _bufStart);
+ _bufDrainPos = _bufStart;
+ assert(static_cast<size_t>(_bufEnd -_bufFillPos) >= len);
+}
+
+
+void
+RawBuf::Compact(void)
+{
+ if (_bufDrainPos == _bufStart)
+ return;
+ if (_bufFillPos != _bufDrainPos)
+ memmove(_bufStart, _bufDrainPos, _bufFillPos - _bufDrainPos);
+ _bufFillPos -= (_bufDrainPos - _bufStart);
+ _bufDrainPos = _bufStart;
+}
+
+
+void
+RawBuf::Reuse(void)
+{
+ if (static_cast<size_t>(_bufEnd - _bufStart) > _initialSize * 4) {
+ free(_bufStart);
+ if (_initialSize > 0) {
+ if (_initialBufStart != NULL)
+ _bufStart = _initialBufStart;
+ else
+ _bufStart = static_cast<char *>(malloc(_initialSize));
+ assert(_bufStart != NULL);
+ } else
+ _bufStart = NULL;
+ _bufEnd = _bufStart + _initialSize;
+ }
+ _bufDrainPos = _bufFillPos = _bufStart;
+}
+
+
+void
+RawBuf::operator+=(const char *src)
+{
+ while (*src) {
+ char *cachedBufFillPos = _bufFillPos;
+ const char *cachedBufEnd = _bufEnd;
+ while (cachedBufFillPos < cachedBufEnd && *src)
+ *cachedBufFillPos++ = *src++;
+ _bufFillPos = cachedBufFillPos;
+ if (_bufFillPos >= _bufEnd)
+ expandBuf(1);
+ }
+}
+
+
+void
+RawBuf::operator+=(const RawBuf& buffer)
+{
+ size_t nbytes = buffer.GetUsedLen();
+ if (nbytes == 0)
+ return;
+
+ while (GetFreeLen() < nbytes)
+ expandBuf(nbytes);
+ memcpy(_bufFillPos, buffer._bufDrainPos, nbytes);
+ _bufFillPos += nbytes;
+}
+
+
+bool
+RawBuf::operator==(const RawBuf &buffer)
+{
+ size_t nbytes = buffer.GetUsedLen();
+ if (nbytes != GetUsedLen())
+ return false;
+
+ const char *p, *t;
+ for (p=_bufDrainPos, t=buffer._bufDrainPos; p<_bufFillPos; p++, t++) {
+ if (*p != *t)
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Append the value of param 'num' to the buffer, as a decimal
+ * number right adjusted in a field of width 'fieldw', remaining
+ * space filled with 'fill' characters.
+ */
+void
+RawBuf::addNum(size_t num, size_t fieldw, char fill)
+{
+ char buf1[20];
+ char *p = buf1;
+ do {
+ *p++ = '0' + (num % 10);
+ num /= 10;
+ } while (num != 0);
+ size_t plen = p - buf1;
+ size_t wantlen = fieldw;
+ if (plen > wantlen)
+ wantlen = plen;
+ if (_bufFillPos + wantlen >= _bufEnd)
+ expandBuf(wantlen);
+ char *cachedBufFillPos = _bufFillPos;
+ while (plen < wantlen) {
+ *cachedBufFillPos++ = fill;
+ wantlen--;
+ }
+ while (p > buf1) {
+ *cachedBufFillPos++ = *--p;
+ }
+ _bufFillPos = cachedBufFillPos;
+}
+
+
+void
+RawBuf::addNum32(int32_t num, size_t fieldw, char fill)
+{
+ char buf1[11];
+ uint32_t unum = num >= 0 ? num : -num;
+ char *p = buf1;
+ do {
+ *p++ = '0' + (unum % 10);
+ unum /= 10;
+ } while (unum != 0);
+ if (num < 0)
+ *p++ = '-';
+ size_t plen = p - buf1;
+ size_t wantlen = fieldw;
+ if (plen > wantlen)
+ wantlen = plen;
+ if (_bufFillPos + wantlen >= _bufEnd)
+ expandBuf(wantlen);
+ char *cachedBufFillPos = _bufFillPos;
+ while (plen < wantlen) {
+ *cachedBufFillPos++ = fill;
+ wantlen--;
+ }
+ while (p > buf1) {
+ *cachedBufFillPos++ = *--p;
+ }
+ _bufFillPos = cachedBufFillPos;
+}
+
+
+
+void
+RawBuf::addNum64(int64_t num, size_t fieldw, char fill)
+{
+ char buf1[21];
+ uint64_t unum = num >= 0 ? num : -num;
+ char *p = buf1;
+ do {
+ *p++ = '0' + (unum % 10);
+ unum /= 10;
+ } while (unum != 0);
+ if (num < 0)
+ *p++ = '-';
+ size_t plen = p - buf1;
+ size_t wantlen = fieldw;
+ if (plen > wantlen)
+ wantlen = plen;
+ if (_bufFillPos + wantlen >= _bufEnd)
+ expandBuf(wantlen);
+ char *cachedBufFillPos = _bufFillPos;
+ while (plen < wantlen) {
+ *cachedBufFillPos++ = fill;
+ wantlen--;
+ }
+ while (p > buf1) {
+ *cachedBufFillPos++ = *--p;
+ }
+ _bufFillPos = cachedBufFillPos;
+}
+
+
+void
+RawBuf::addHitRank(HitRank num)
+{
+ char buf1[100];
+ snprintf(buf1, sizeof(buf1), "%g", static_cast<double>(num));
+ append(buf1, strlen(buf1));
+}
+
+
+void
+RawBuf::addSignedHitRank(SignedHitRank num)
+{
+ char buf1[100];
+ snprintf(buf1, sizeof(buf1), "%g", static_cast<double>(num));
+ append(buf1, strlen(buf1));
+}
+
+/**
+ * Read from the indicated file into the buffer, no more that the
+ * given number of bytes and no more than will fit in the buffer.
+ */
+size_t
+RawBuf::readFile(FastOS_File &file, size_t maxlen)
+{
+ size_t got = file.Read(_bufFillPos, smin((_bufEnd - _bufFillPos), maxlen));
+ if (got > 0)
+ _bufFillPos += got;
+ return got;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/rawbuf.h b/searchlib/src/vespa/searchlib/util/rawbuf.h
new file mode 100644
index 00000000000..39e791a070f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/rawbuf.h
@@ -0,0 +1,163 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/searchlib/common/hitrank.h>
+
+namespace search {
+/**
+ * A buffer with an input point and an output point. The space
+ * is dynamically allocated by the constructor, and can be extended
+ * when needed. Buffer contents may be moved around when there is
+ * insufficient room.
+ */
+
+class FASTOS_LOADABLE_EXPORT RawBuf
+{
+private:
+ RawBuf(const RawBuf &);
+ RawBuf& operator=(const RawBuf &);
+
+ char* _bufStart; // ref. to start of buffer (don't move this!)
+ char* _bufEnd; // ref. to byte after last in buffer (don't mo)
+ char* _bufFillPos; // ref. to byte where next should be put in
+ char* _bufDrainPos; // ref. to next byte to take out of buffer
+ char* _initialBufStart;
+ size_t _initialSize;
+public:
+
+ RawBuf(char *start, size_t size);// Initially use provided buffer
+ RawBuf(size_t size); // malloc-s given size, assigns to _bufStart
+ ~RawBuf(void); // Frees _bufStart, i.e. the char[].
+
+ void operator+=(const char *src);
+ void operator+=(const RawBuf& buffer);
+ bool operator==(const RawBuf &buffer);
+ void addNum(size_t num, size_t fieldw, char fill);
+ void addNum32(int32_t num, size_t fieldw, char fill);
+ void addNum64(int64_t num, size_t fieldw, char fill);
+
+ void addHitRank(HitRank num);
+ void addSignedHitRank(SignedHitRank num);
+
+ void append(const void *data, size_t len);
+ void append(uint8_t byte);
+ void appendLong(uint64_t n);
+ void appendCompressedPositiveNumber(uint64_t n);
+ void appendCompressedNumber(int64_t n);
+ bool IsEmpty(void); // Return whether all written.
+ void expandBuf(size_t needlen);
+ size_t GetFreeLen(void) const { return _bufEnd - _bufFillPos; }
+ size_t GetDrainLen(void) const { return _bufDrainPos - _bufStart; }
+ const char *GetDrainPos(void) const { return _bufDrainPos; }
+ const char *GetFillPos(void) const { return _bufFillPos; }
+ char * GetWritableFillPos(void) const { return _bufFillPos; }
+ char * GetWritableFillPos(size_t len) { preAlloc(len); return _bufFillPos; }
+ char * GetWritableDrainPos(size_t offset) { return _bufDrainPos + offset; }
+ void truncate(size_t offset) { _bufFillPos = _bufDrainPos + offset; }
+ void preAlloc(size_t len); // Ensure room for 'len' more bytes.
+ size_t readFile(FastOS_File &file, size_t maxlen);
+ void reset(void) { _bufDrainPos = _bufFillPos = _bufStart; }
+ void Compact(void);
+ void Reuse(void);
+ size_t GetUsedAndDrainLen(void) const { return _bufFillPos - _bufStart; }
+ size_t GetUsedLen(void) const { return _bufFillPos - _bufDrainPos; }
+ void Drain(size_t len); // Adjust drain pos.
+ void Fill(size_t len) { _bufFillPos += len; }
+
+ void ensureSize(size_t size) {
+ if (static_cast<size_t>(_bufEnd - _bufFillPos) < size) {
+ expandBuf(size);
+ assert(static_cast<size_t>(_bufEnd - _bufFillPos) >= size);
+ }
+ }
+
+ /**
+ * Convert from interNet highendian order at 'src', to unsigned integers
+ */
+ static uint16_t InetTo16(const unsigned char *src) {
+ return (static_cast<uint16_t>(*src) << 8) + *(src + 1);
+ };
+ static uint16_t InetTo16(const char* src) {
+ return InetTo16(reinterpret_cast<const unsigned char *>(src));
+ };
+ static uint32_t InetTo32(const unsigned char* src) {
+ return (((((static_cast<uint32_t>(*src) << 8) + *(src + 1)) << 8)
+ + *(src + 2)) << 8) + *(src + 3);
+ };
+ static uint32_t InetTo32(const char* src) {
+ return InetTo32(reinterpret_cast<const unsigned char *>(src));
+ };
+
+ /**
+ * Convert unsigned int.s 'src', to interNet highendian order, at 'dst'
+ * or _bufFillPos. Update or return ref to next char after those filled in.
+ */
+ static unsigned char* ToInet(uint16_t src, unsigned char* dst) {
+ *(dst + 1) = static_cast<unsigned char>(src); // The least significant 8 bits
+ src >>= 8; // of 'src' are stored.
+ *dst = static_cast<unsigned char>(src);
+ return dst + 2;
+ };
+ void Put16ToInet(uint16_t src) {
+ ensureSize(2);
+ _bufFillPos = reinterpret_cast<char *>
+ (ToInet(src,
+ reinterpret_cast<unsigned char*>(_bufFillPos)));
+ };
+ static unsigned char* ToInet(uint32_t src, unsigned char* dst) {
+ *(dst + 3) = src; // The least significant 8 bits
+ src >>= 8; // of 'src' are stored.
+ *(dst + 2) = src;
+ src >>= 8;
+ *(dst + 1) = src;
+ src >>= 8;
+ *dst = src;
+ return dst + 4;
+ };
+ void PutToInet(uint32_t src) {
+ ensureSize(4);
+ _bufFillPos = reinterpret_cast<char *>
+ (ToInet(src,
+ reinterpret_cast<unsigned char*>(_bufFillPos)));
+ };
+
+ static unsigned char* ToInet(uint64_t src, unsigned char* dst) {
+ ToInet(static_cast<uint32_t>(src >> 32), dst);
+ ToInet(static_cast<uint32_t>(src & 0xffffffffull), dst + 4);
+ return dst + 8;
+ };
+ void Put64ToInet(uint64_t src) {
+ ensureSize(8);
+ _bufFillPos = reinterpret_cast<char *>
+ (ToInet(src,
+ reinterpret_cast<unsigned char*>(_bufFillPos)));
+ };
+
+ /**
+ * Check that char-s are loaded to and stored from the 8 least
+ * significant bits of a 32 bit value, and that shift works the usual
+ * way. (It is placed in this class to keep it out of view.)
+ */
+ static void CheckHardware(void) {
+ uint32_t i = 0xe2345678;
+ unsigned char b = 67, // 'C'
+ c = 65, // 'A'
+ d = 66; // 'B'
+ unsigned char* p = &c;
+ assert(sizeof(uint32_t) == 4 &&
+ sizeof(long int) == 8 &&
+ static_cast<uint32_t>(*p) << 16 == 4259840);
+ *p = i >> 16;
+ if ( !(b == 67 && c == 52 && d == 66)) {
+ abort();
+ }
+ };
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/util/runnable.h b/searchlib/src/vespa/searchlib/util/runnable.h
new file mode 100644
index 00000000000..437a2333231
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/runnable.h
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/util/sync.h>
+
+namespace search {
+
+class Runnable : public FastOS_Runnable
+{
+protected:
+ vespalib::Monitor _cond;
+ bool _done;
+ bool _stopped;
+
+public:
+ Runnable() :
+ _cond(), _done(false), _stopped(false)
+ { }
+ void Run(FastOS_ThreadInterface *, void *) {
+ doRun();
+
+ vespalib::MonitorGuard guard(_cond);
+ _stopped = true;
+ guard.broadcast();
+ }
+ virtual void doRun() = 0;
+ void stop() {
+ vespalib::MonitorGuard guard(_cond);
+ _done = true;
+ }
+ void join() {
+ vespalib::MonitorGuard guard(_cond);
+ while (!_stopped) {
+ guard.wait();
+ }
+ }
+};
+
+} // search
+
diff --git a/searchlib/src/vespa/searchlib/util/searchable_stats.h b/searchlib/src/vespa/searchlib/util/searchable_stats.h
new file mode 100644
index 00000000000..a7d5764de7c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/searchable_stats.h
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+namespace search {
+
+/**
+ * Simple statistics for a single Searchable component. Used for
+ * internal aggregation before inserting numbers into the metrics
+ * framework.
+ **/
+class SearchableStats
+{
+private:
+ size_t _memoryUsage;
+ size_t _docsInMemory;
+ size_t _sizeOnDisk;
+
+public:
+ SearchableStats() : _memoryUsage(0), _docsInMemory(0), _sizeOnDisk(0) {}
+ SearchableStats &memoryUsage(size_t value) {
+ _memoryUsage = value;
+ return *this;
+ }
+ size_t memoryUsage() const { return _memoryUsage; }
+ SearchableStats &docsInMemory(size_t value) {
+ _docsInMemory = value;
+ return *this;
+ }
+ size_t docsInMemory() const { return _docsInMemory; }
+ SearchableStats &sizeOnDisk(size_t value) {
+ _sizeOnDisk = value;
+ return *this;
+ }
+ size_t sizeOnDisk() const { return _sizeOnDisk; }
+ SearchableStats &add(const SearchableStats &rhs) {
+ _memoryUsage += rhs._memoryUsage;
+ _docsInMemory += rhs._docsInMemory;
+ _sizeOnDisk += rhs._sizeOnDisk;
+ return *this;
+ }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/util/sigbushandler.cpp b/searchlib/src/vespa/searchlib/util/sigbushandler.cpp
new file mode 100644
index 00000000000..7670388572a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/sigbushandler.cpp
@@ -0,0 +1,168 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "statefile.h"
+#include "statebuf.h"
+#include "sigbushandler.h"
+#include <setjmp.h>
+#include <system_error>
+#include <mutex>
+#include <atomic>
+
+namespace search
+{
+
+
+SigBusHandler *SigBusHandler::_instance = nullptr;
+
+
+namespace
+{
+
+std::atomic<int> sigBusNesting;
+
+class TryLockGuard
+{
+ bool _gotLock;
+public:
+ TryLockGuard() noexcept
+ : _gotLock(false)
+ {
+ int expzero = 0;
+ _gotLock = sigBusNesting.compare_exchange_strong(expzero, 1);
+ }
+
+ ~TryLockGuard() noexcept
+ {
+ if (_gotLock) {
+ sigBusNesting = 0;
+ }
+ }
+
+ bool
+ gotLock() const noexcept
+ {
+ return _gotLock;
+ }
+};
+
+
+/*
+ * Write string to standard error using only async signal safe methods.
+ */
+void
+mystderr(const char *msg) noexcept
+{
+ const char *p = msg;
+ while (*p != '\0') {
+ ++p;
+ }
+ write(STDERR_FILENO, msg, static_cast<size_t>(p - msg));
+}
+
+}
+
+void
+SigBusHandler::trap(void)
+{
+ struct sigaction sa;
+ _instance = this;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = SigBusHandler::forward;
+ sa.sa_flags = SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ sigaddset(&sa.sa_mask, SIGBUS);
+ sigaction(SIGBUS, &sa, nullptr);
+ _trapped = true;
+}
+
+
+void
+SigBusHandler::untrap(void)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sa.sa_flags = 0;
+ sigemptyset(&sa.sa_mask);
+ sigaction(SIGBUS, &sa, nullptr);
+ _trapped = false;
+ _instance = nullptr;
+}
+
+
+void
+SigBusHandler::forward(int sig, siginfo_t *si, void *ucv)
+{
+ _instance->handle(sig, si, ucv);
+}
+
+
+void
+SigBusHandler::handle(int sig, siginfo_t *si, void *ucv)
+{
+ (void) sig;
+ (void) ucv;
+
+ StateBuf sb(_buf, sizeof(_buf));
+ bool raced = false;
+ do {
+ // Protect against multiple threads.
+ TryLockGuard guard;
+ if (!guard.gotLock()) {
+ raced = true;
+ break;
+ }
+ sb.appendKey("state") << "down";
+ sb.appendTimestamp();
+ sb.appendKey("operation") << "sigbus";
+ sb.appendKey("errno") << static_cast<long>(si->si_errno);
+ sb.appendKey("code") << static_cast<long>(si->si_code);
+ if (si->si_code != 0) {
+ sb.appendAddr(si->si_addr);
+ }
+ sb << '\n';
+ // TODO: Report backing store file, for quick diagnostics.
+ if (_stateFile != nullptr) {
+ _stateFile->addState(sb.base(), sb.size(), true);
+ }
+ _fired = true;
+ } while (0);
+ if (raced) {
+ mystderr("SIGBUS handler call race, ignoring signal\n");
+ sleep(5);
+ return;
+ }
+ untrap(); // Further bus errors will trigger core dump
+
+ if (_unwind != nullptr) {
+ // Unit test is using siglongjmp based unwinding
+ sigjmp_buf *unwind = _unwind;
+ _unwind = nullptr;
+ siglongjmp(*unwind, 1);
+ } else {
+ // Normal case, sleep 3 seconds (i.e. allow main thread to detect
+ // issue and notify cluster controller) before returning and
+ // likely core dumping.
+ sleep(3);
+ }
+}
+
+
+SigBusHandler::SigBusHandler(StateFile *stateFile)
+ : _stateFile(stateFile),
+ _unwind(nullptr),
+ _trapped(false),
+ _fired(false)
+{
+ trap();
+}
+
+
+SigBusHandler::~SigBusHandler()
+{
+ untrap();
+}
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/sigbushandler.h b/searchlib/src/vespa/searchlib/util/sigbushandler.h
new file mode 100644
index 00000000000..49c7879b10a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/sigbushandler.h
@@ -0,0 +1,60 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <setjmp.h>
+
+namespace search
+{
+
+class StateFile;
+
+/*
+ * Class used to handle SIGBUS signals, which are generated on IO errors
+ * on backing file for a memory map.
+ */
+class SigBusHandler
+{
+ static SigBusHandler *_instance;
+ StateFile *_stateFile;
+ sigjmp_buf *_unwind;
+ bool _trapped;
+ bool _fired;
+ char _buf[2048];
+
+ void
+ trap();
+
+ void
+ untrap();
+
+ static void
+ forward(int sig, siginfo_t *si, void *ucv);
+
+ void
+ handle(int sig, siginfo_t *si, void *ucv);
+
+public:
+ SigBusHandler(StateFile *stateFile);
+
+ ~SigBusHandler();
+
+ bool
+ fired() const
+ {
+ return _fired;
+ }
+
+ /*
+ * Setup siglongjmp based unwinding, used by unit tests.
+ */
+ void
+ setUnwind(sigjmp_buf *unwind)
+ {
+ _unwind = unwind;
+ }
+};
+
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.cpp b/searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.cpp
new file mode 100644
index 00000000000..c86303e5a97
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.cpp
@@ -0,0 +1,8 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "slime_output_raw_buf_adapter.h"
+
+namespace search {
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.h b/searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.h
new file mode 100644
index 00000000000..9ab88d07c28
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.h
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/data/slime/output.h>
+#include "rawbuf.h"
+
+namespace search {
+
+class SlimeOutputRawBufAdapter : public ::vespalib::slime::Output
+{
+private:
+ RawBuf &_buf;
+
+public:
+ SlimeOutputRawBufAdapter(RawBuf &buf) : _buf(buf) {}
+ virtual char *exchange(char *, size_t commit, size_t reserve) {
+ _buf.Fill(commit);
+ return _buf.GetWritableFillPos(reserve);
+ }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/util/sort.h b/searchlib/src/vespa/searchlib/util/sort.h
new file mode 100644
index 00000000000..70324731ca5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/sort.h
@@ -0,0 +1,143 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 1998-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+
+#pragma once
+
+#include <algorithm>
+#include <vespa/searchlib/util/inline.h>
+
+namespace search {
+/* Various sorting-related functions */
+
+template <typename T, typename Compare>
+inline always_inline__ T *
+median3(T *a, T *b, T *c, Compare *compobj)
+{
+ return Compare::Compare(compobj, *a, *b) < 0 ?
+ (Compare::Compare(compobj, *b, *c) < 0 ? b : Compare::Compare(compobj, *a, *c) < 0 ? c : a) :
+ (Compare::Compare(compobj, *b, *c) > 0 ? b : Compare::Compare(compobj, *a, *c) > 0 ? c : a);
+}
+
+
+template <typename T, typename Compare>
+void
+insertion_sort(T a[], unsigned int n, Compare *compobj)
+{
+ unsigned int i, j;
+ T _swap;
+
+ for (i=1; i<n ; i++)
+ {
+ _swap = a[i];
+ j = i;
+ while (Compare::Compare(compobj, _swap, a[j-1]) < 0)
+ {
+ a[j] = a[j-1];
+ if(!(--j)) break;
+ }
+ a[j] = _swap;
+ }
+}
+
+template <int InsertSortLevel, int Median9Level, typename T,
+ typename Compare>
+void
+qsort(T *a, unsigned int n, Compare *compobj)
+{
+ for (;;) {
+ if (n < InsertSortLevel) {
+ insertion_sort<T, Compare>(a, n, compobj);
+ return;
+ }
+ T *middle = a + (n/2);
+ T *left = a;
+ T *right = a + n - 1;
+ if (n > Median9Level) {
+ size_t s = n/8;
+ left = median3<T, Compare>
+ (left, left + s, left + 2*s, compobj);
+ middle = median3<T, Compare>
+ (middle - s, middle, middle+s, compobj);
+ right = median3<T, Compare>
+ (right - 2*s, right - s, right, compobj);
+ }
+ middle = median3<T, Compare>(left, middle, right, compobj);
+ T *pa, *pb, *pc, *pd;
+ pa = pb = a;
+ pc = pd = a + n - 1;
+ T swap;
+ T pivot = *middle;
+ int r;
+ for (;;) {
+ while (pb <= pc && (r = Compare::Compare(compobj, *pb, pivot)) <= 0) {
+ if (r == 0) {
+ swap = *pa;
+ *pa = *pb;
+ *pb = swap;
+ pa++;
+ }
+ pb++;
+ }
+ while (pb <= pc && (r = Compare::Compare(compobj, *pc, pivot)) >= 0) {
+ if (r == 0) {
+ swap = *pc;
+ *pc = *pd;
+ *pd = swap;
+ pd--;
+ }
+ pc--;
+ }
+ if (pb > pc)
+ break;
+ swap = *pb;
+ *pb = *pc;
+ *pc = swap;
+ pb++;
+ pc--;
+ }
+ right = a + n;
+ int s = std::min(pa - a, pb - pa);
+ T *swapa = a;
+ T *swapb = pb-s;
+ T *swapaend = a + s;
+ while (swapa < swapaend) {
+ T tmp = *swapa;
+ *swapa++ = *swapb;
+ *swapb++ = tmp;
+ }
+ s = std::min(pd - pc, right - pd - 1);
+ swapa = pb;
+ swapb = right - s;
+ swapaend = pb + s;
+ while (swapa < swapaend) {
+ T tmp = *swapa;
+ *swapa++ = *swapb;
+ *swapb++ = tmp;
+ }
+ // Recurse on the smaller partition.
+ if (pb - pa < pd - pc) {
+ if ((s = pb - pa) > 1)
+ qsort<InsertSortLevel, Median9Level, T, Compare>
+ (a, s, compobj);
+ if ((s = pd - pc) > 1) {
+ a = right - s;
+ n = s;
+ continue;
+ }
+ } else {
+ if ((s = pd - pc) > 1)
+ qsort<InsertSortLevel, Median9Level, T, Compare>
+ (right - s, s, compobj);
+ if ((s = pb - pa) > 1) {
+ n = s;
+ continue;
+ }
+ }
+ break;
+ }
+}
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/util/statebuf.cpp b/searchlib/src/vespa/searchlib/util/statebuf.cpp
new file mode 100644
index 00000000000..69021f3b5f5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/statebuf.cpp
@@ -0,0 +1,215 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "statebuf.h"
+
+static const char *hexx = "0123456789abcdef";
+
+namespace search
+{
+
+void
+StateBuf::overflow() noexcept
+{
+ abort();
+}
+
+
+
+StateBuf::StateBuf(void *buf, size_t bufLen) noexcept
+ : _start(static_cast<char *>(buf)),
+ _cur(static_cast<char *>(buf)),
+ _end(static_cast<char *>(buf) + bufLen)
+{
+}
+
+
+StateBuf &
+StateBuf::operator<<(const char *s) noexcept
+{
+ for (const char *p = s; *p != '\0'; ++p) {
+ *this << *p;
+ }
+ return *this;
+}
+
+
+StateBuf &
+StateBuf::appendQuoted(const char *s) noexcept
+{
+ *this << '"';
+ for (const char *p = s; *p != '\0'; ++p) {
+ switch (*p) {
+ case '\\':
+ *this << '\\' << '\\';
+ break;
+ case '\n':
+ *this << '\\' << 'n';
+ break;
+ case '"':
+ *this << '\\' << '"';
+ break;
+ default:
+ *this << *p;
+ }
+ }
+ *this << '"';
+ return *this;
+}
+
+
+StateBuf &
+StateBuf::appendKey(const char *s) noexcept
+{
+ if (_cur != _start) {
+ *this << ' ';
+ }
+ *this << s << '=';
+ return *this;
+}
+
+
+StateBuf &
+StateBuf::operator<<(unsigned long val) noexcept
+{
+ char buf[22];
+ char *p = buf;
+ for (; val != 0; ++p) {
+ *p = '0' + (val % 10);
+ val /= 10;
+ }
+ if (p == buf) {
+ *this << '0';
+ }
+ while (p != buf) {
+ --p;
+ *this << *p;
+ }
+ return *this;
+}
+
+
+StateBuf &
+StateBuf::operator<<(long val) noexcept
+{
+ if (val < 0) {
+ *this << '-' << static_cast<unsigned long>(- val);
+ } else {
+ *this << static_cast<unsigned long>(val);
+ }
+ return *this;
+}
+
+
+StateBuf &
+StateBuf::operator<<(unsigned int val) noexcept
+{
+ *this << static_cast<unsigned long>(val);
+ return *this;
+}
+
+
+StateBuf &
+StateBuf::operator<<(int val) noexcept
+{
+ *this << static_cast<long>(val);
+ return *this;
+}
+
+
+StateBuf &
+StateBuf::appendDecFraction(unsigned long val, unsigned int width) noexcept
+{
+ char buf[22];
+ if (width > sizeof(buf)) {
+ abort();
+ }
+ char *p = buf;
+ char *pe = buf + width;
+ for (; p != pe; ++p) {
+ *p = '0' + (val % 10);
+ val /= 10;
+ }
+ while (p != buf) {
+ --p;
+ *this << *p;
+ }
+ return *this;
+}
+
+StateBuf &
+StateBuf::appendHex(unsigned long val) noexcept
+{
+ *this << "0x";
+ for (int shft = 64; shft != 0;) {
+ shft -= 4;
+ *this << hexx[(val >> shft) & 15];
+ }
+ return *this;
+}
+
+
+StateBuf &
+StateBuf::operator<<(const struct timespec &ts) noexcept
+{
+ (*this << static_cast<unsigned long>(ts.tv_sec) << '.').
+ appendDecFraction(static_cast<unsigned long>(ts.tv_nsec), 9);
+ return *this;
+}
+
+
+StateBuf &
+StateBuf::appendTimestamp(const struct timespec &ts) noexcept
+{
+ appendKey("ts") << ts;
+ return *this;
+}
+
+
+StateBuf &
+StateBuf::appendTimestamp() noexcept
+{
+ struct timespec ts;
+ /*
+ * clock_gettime() is supposed to be async signal safe.
+ * gettimeofday() is not documented to be async signal safe.
+ */
+ int gtres = clock_gettime(CLOCK_REALTIME, &ts);
+ if (gtres != 0) {
+ abort();
+ }
+ appendTimestamp(ts);
+ return *this;
+}
+
+
+StateBuf &
+StateBuf::appendAddr(void *addr) noexcept
+{
+ appendKey("addr");
+ appendHex(reinterpret_cast<unsigned long>(addr));
+ return *this;
+}
+
+
+size_t
+StateBuf::size() const noexcept
+{
+ return _cur - _start;
+};
+
+
+const char *
+StateBuf::base() const noexcept
+{
+ return _start;
+}
+
+
+std::string
+StateBuf::str() const
+{
+ return std::string(_start, _cur);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/statebuf.h b/searchlib/src/vespa/searchlib/util/statebuf.h
new file mode 100644
index 00000000000..0e2df4f8d7b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/statebuf.h
@@ -0,0 +1,92 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+
+namespace search
+{
+
+/**
+ * Class used to serialize application state in a mostly safe manner.
+ *
+ * Only async signal safe methods can be called, except for unit test
+ * helper methods (str).
+ *
+ */
+class StateBuf
+{
+ char *_start;
+ char *_cur;
+ char *_end;
+
+ void
+ overflow() noexcept __attribute__((__noinline__, __noreturn__));
+
+public:
+ StateBuf(void *buf, size_t bufLen) noexcept;
+
+ inline StateBuf &
+ operator<<(char c) noexcept __attribute__((__always_inline__))
+ {
+ if (__builtin_expect(_cur != _end, true)) {
+ *_cur++ = c;
+ return *this;
+ }
+ overflow();
+ }
+
+
+ StateBuf &
+ operator<<(const char *s) noexcept;
+
+ StateBuf &
+ appendQuoted(const char *s) noexcept;
+
+ StateBuf &
+ appendKey(const char *s) noexcept;
+
+ StateBuf &
+ operator<<(const struct timespec &ts) noexcept;
+
+ StateBuf &
+ appendTimestamp(const struct timespec &ts) noexcept;
+
+ StateBuf &
+ appendTimestamp() noexcept;
+
+ StateBuf &
+ appendAddr(void *addr) noexcept;
+
+ StateBuf &
+ operator<<(unsigned long val) noexcept;
+
+ StateBuf &
+ operator<<(long val) noexcept;
+
+ StateBuf &
+ operator<<(unsigned int val) noexcept;
+
+ StateBuf &
+ operator<<(int val) noexcept;
+
+ StateBuf &
+ appendDecFraction(unsigned long val, unsigned int width) noexcept;
+
+ StateBuf &
+ appendHex(unsigned long val) noexcept;
+
+ size_t
+ size() const noexcept;
+
+ const char *
+ base() const noexcept;
+
+ /*
+ * Unit test helper methods.
+ */
+ std::string
+ str() const;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/statefile.cpp b/searchlib/src/vespa/searchlib/util/statefile.cpp
new file mode 100644
index 00000000000..d093a036f19
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/statefile.cpp
@@ -0,0 +1,460 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include "statefile.h"
+#include <system_error>
+#include <mutex>
+
+using Mutex = std::mutex;
+using Guard = std::lock_guard<Mutex>;
+
+namespace search
+{
+
+namespace
+{
+
+Mutex stateMutex;
+
+/*
+ * Assumes that std::atomic implementation is lock free, which it is
+ * for gcc 4.9.2. Usage is not async signal safe unless the
+ * implementation is lock free.
+ */
+std::atomic<int> nestingCount;
+
+int
+myopen(const char *name) noexcept
+{
+ int fd = open(name, O_CREAT | O_CLOEXEC | O_SYNC | O_RDWR, 0644);
+ if (fd < 0) {
+ std::error_code ec(errno, std::system_category());
+ fprintf(stderr,
+ "Could not open %s: %s\n", name, ec.message().c_str());
+ abort();
+ }
+ return fd;
+}
+
+
+void
+myfstat(const char *name, int fd, struct stat &stbuf) noexcept
+{
+ int fsres = fstat(fd, &stbuf);
+ if (fsres != 0) {
+ std::error_code ec(errno, std::system_category());
+ fprintf(stderr, "Could not fstat %s: %s\n", name, ec.message().c_str());
+ abort();
+ }
+}
+
+
+void
+mypread(const char *name, int fd, void *buf, size_t bufLen, int64_t offset) noexcept
+{
+ ssize_t rres = pread(fd, buf, bufLen, offset);
+ if (static_cast<size_t>(rres) != bufLen) {
+ if (rres >= 0) {
+ fprintf(stderr,
+ "Could not read %zu bytes from %s offset %" PRId64
+ ": short read (%zd)\n",
+ bufLen, name, offset, rres);
+ } else {
+ std::error_code ec(errno, std::system_category());
+ fprintf(stderr,
+ "Could not read %zu bytes from %s offset %" PRId64 ": %s\n",
+ bufLen, name, offset, ec.message().c_str());
+ }
+ abort();
+ }
+}
+
+
+void
+mypwrite(const char *name, int fd, const void *buf, size_t bufLen,
+ int64_t offset) noexcept
+{
+ ssize_t wres = pwrite(fd, buf, bufLen, offset);
+ if (static_cast<size_t>(wres) != bufLen) {
+ if (wres >= 0) {
+ fprintf(stderr,"Could not write %zu bytes to %s offset %" PRId64
+ ": short write (%zd)\n",
+ bufLen, name, offset, wres);
+ } else {
+ std::error_code ec(errno, std::system_category());
+ fprintf(stderr,
+ "Could not write %zu bytes to %s offset %" PRId64 ": %s\n",
+ bufLen, name, offset, ec.message().c_str());
+ }
+ abort();
+ }
+}
+
+
+void
+myclose(const char *name, int fd) noexcept
+{
+ int closeres = close(fd);
+ if (closeres != 0) {
+ std::error_code ec(errno, std::system_category());
+ fprintf(stderr, "Could not close %s: %s\n",
+ name, ec.message().c_str());
+ abort();
+ }
+}
+
+
+void
+myfsync(const char *name, int fd) noexcept
+{
+ int fsyncres = fsync(fd);
+ if (fsyncres != 0) {
+ std::error_code ec(errno, std::system_category());
+ fprintf(stderr, "Could not fsync %s: %s\n",
+ name, ec.message().c_str());
+ abort();
+ }
+}
+
+
+void
+myunlink(const char *name) noexcept
+{
+ int unlinkres = unlink(name);
+ if (unlinkres != 0 && errno != ENOENT) {
+ std::error_code ec(errno, std::system_category());
+ fprintf(stderr, "Could not unlink %s: %s\n",
+ name, ec.message().c_str());
+ abort();
+ }
+}
+
+
+/*
+ * Write string to standard error using only async signal safe methods.
+ */
+void
+mystderr(const char *msg) noexcept
+{
+ const char *p = msg;
+ while (*p != '\0') {
+ ++p;
+ }
+ write(STDERR_FILENO, msg, static_cast<size_t>(p - msg));
+}
+
+
+/*
+ * Get async signal safe spinlock.
+ */
+void
+getLock() noexcept
+{
+ int expzero = 0;
+ while (!nestingCount.compare_exchange_weak(expzero, 1)) {
+ expzero = 0;
+ sleep(1);
+ }
+}
+
+
+/*
+ * Release async signal safe spinlock
+ */
+void
+releaseLock() noexcept
+{
+ nestingCount = 0;
+}
+
+class SpinGuard
+{
+public:
+ SpinGuard() noexcept
+ {
+ getLock();
+ }
+
+ ~SpinGuard() noexcept
+ {
+ releaseLock();
+ }
+};
+
+}
+
+
+StateFile::StateFile(const std::string &name)
+ : _name(nullptr),
+ _historyName(nullptr),
+ _gen(0)
+{
+ _name = strdup(name.c_str());
+ std::string historyName = name + ".history";
+ _historyName = strdup(historyName.c_str());
+ zeroPad();
+ fixupHistory();
+}
+
+
+StateFile::~StateFile()
+{
+ free(_name);
+ free(_historyName);
+}
+
+
+void
+StateFile::erase(const std::string &name)
+{
+ std::string historyName = name + ".history";
+ myunlink(name.c_str());
+ myunlink(historyName.c_str());
+}
+
+
+void
+StateFile::readRawState(std::vector<char> &buf)
+{
+ struct stat stbuf;
+ Guard guard(stateMutex); // Serialize states
+ SpinGuard spinGuard;
+ int fd = myopen(_name);
+ myfstat(_name, fd, stbuf);
+ buf.resize(stbuf.st_size);
+ mypread(_name, fd, &buf[0], buf.size(), 0);
+ myclose(_name, fd);
+}
+
+
+void
+StateFile::trimState(std::vector<char> &buf)
+{
+ auto newBufEnd = buf.cbegin();
+ auto bufEnd = buf.cend();
+ for (auto p = buf.cbegin(); p != bufEnd; ++p) {
+ if (*p == '\n') { // End of state string
+ newBufEnd = p + 1;
+ break; // stop scanning after first state
+ }
+ if (*p == '\0') { // padding encountered, stop scanning for end
+ break;
+ }
+ }
+ size_t newStateSize = newBufEnd - buf.cbegin();
+ buf.resize(newStateSize);
+}
+
+
+void
+StateFile::readState(std::vector<char> &buf)
+{
+ readRawState(buf);
+ trimState(buf);
+}
+
+
+void
+StateFile::trimHistory(std::vector<char> &history, const char *name, int hfd,
+ std::vector<char> &lastHistoryState)
+{
+ auto historyEnd = history.cend();
+ auto prevHistoryEnd = history.cbegin();
+ auto newHistoryEnd = history.cbegin();
+ for (auto p = history.cbegin(); p != historyEnd; ++p) {
+ if (*p == '\n') { // End of state string
+ prevHistoryEnd = newHistoryEnd;
+ newHistoryEnd = p + 1;
+ }
+ if (*p == '\0') { // corruption, stop scanning for end
+ break;
+ }
+ }
+ std::vector<char> historyEntry(prevHistoryEnd, newHistoryEnd);
+ size_t newHistSize = newHistoryEnd - history.cbegin();
+ if (newHistSize != history.size()) {
+ int ftruncres = ftruncate(hfd, newHistSize);
+ if (ftruncres != 0) {
+ std::error_code ec(errno, std::system_category());
+ fprintf(stderr, "Could not truncate %s: %s\n",
+ name, ec.message().c_str());
+ abort();
+ }
+ history.resize(newHistSize);
+ }
+ historyEntry.swap(lastHistoryState);
+}
+
+/*
+ * Fixup history after failed append, e.g. trucated write caused partial
+ * last state.
+ */
+void
+StateFile::fixupHistory()
+{
+ struct stat sthbuf;
+ int hfd = myopen(_historyName);
+ myfstat(_historyName, hfd, sthbuf);
+ std::vector<char> history(sthbuf.st_size);
+ mypread(_historyName, hfd, &history[0], history.size(), 0);
+ std::vector<char> lastHistory;
+ trimHistory(history, _historyName, hfd, lastHistory);
+ std::vector<char> buf;
+ readState(buf);
+ if (!buf.empty() && buf != lastHistory) {
+ mypwrite(_historyName, hfd, &buf[0], buf.size(), history.size());
+ myfsync(_historyName, hfd);
+ }
+ myclose(_historyName, hfd);
+ if (buf.empty() && !lastHistory.empty()) {
+ // Restore state in main state file from last state in history.
+ int fd = myopen(_name);
+ mypwrite(_name, fd, &lastHistory[0], lastHistory.size(), 0);
+ myfsync(_name, fd);
+ myclose(_name, fd);
+ }
+}
+
+
+void
+StateFile::zeroPad()
+{
+ struct stat stbuf;
+ int minSize = 4096;
+ int fd = myopen(_name);
+ myfstat(_name, fd, stbuf);
+ std::vector<char> buf(minSize);
+ if (stbuf.st_size < minSize) {
+ int padSize = minSize - stbuf.st_size;
+ mypwrite(_name, fd, &buf[0], padSize, stbuf.st_size);
+ myfsync(_name, fd);
+ }
+ myclose(_name, fd);
+}
+
+
+void
+StateFile::checkState(const char *buf, size_t bufLen) noexcept
+{
+ const char *pe = buf + bufLen;
+ for (const char *p = buf; p < pe; ++p) {
+ if (*p == '\n') {
+ if (p != buf + bufLen - 1) {
+ mystderr("statefile state corrupted: early newline\n");
+ abort();
+ }
+ return;
+ }
+ if (*p == '\0') {
+ mystderr("statefile state corrupted: nul byte found\n");
+ abort();
+ }
+ }
+ mystderr("statefile state corrupted: missing newline at end\n");
+ abort();
+}
+
+
+void
+StateFile::internalAddSignalState(const char *buf, size_t bufLen,
+ const char *name,
+ int appendFlag,
+ const char *openerr,
+ const char *writeerr,
+ const char *fsyncerr,
+ const char *closeerr) noexcept
+{
+ // Write to main state file, overwriting previous state
+ int fd = open(name, O_CREAT | O_CLOEXEC | O_SYNC | O_RDWR | appendFlag,
+ 0644);
+ if (fd < 0) {
+ mystderr(openerr);
+ abort();
+ }
+ ssize_t wres = write(fd, buf, bufLen);
+ if (static_cast<size_t>(wres) != bufLen) {
+ mystderr(writeerr);
+ abort();
+ }
+ int fsyncres = fsync(fd);
+ if (fsyncres != 0) {
+ mystderr(fsyncerr);
+ abort();
+ }
+ int closeres = close(fd);
+ if (closeres != 0) {
+ mystderr(closeerr);
+ abort();
+ }
+}
+
+/*
+ * Write state string to file. State string contains one newline, at the end.
+ *
+ * Async signal safe functions used:
+ * open(), write(), fsync(), close()
+ *
+ * Is in signal handler, thus cannot throw exception.
+ */
+void
+StateFile::addSignalState(const char *buf, size_t bufLen) noexcept
+{
+ checkState(buf, bufLen);
+ SpinGuard spinGuard;
+ // Write to main state file, overwriting previous state
+ internalAddSignalState(buf, bufLen, _name, 0,
+ "Could not open statefile for read/write\n",
+ "Error writing to statefile\n",
+ "Error syncing statefile\n",
+ "Error closing statefile\n");
+ // Write to state file history, appending
+ internalAddSignalState(buf, bufLen, _historyName, O_APPEND,
+ "Could not open statefile history for read/write\n",
+ "Error writing to statefile history\n",
+ "Error syncing statefile history\n",
+ "Error closing statefile history\n");
+ ++_gen;
+}
+
+/*
+ * Write state string to file. State string contains one newline, at the end.
+ */
+void
+StateFile::addState(const char *buf, size_t bufLen, bool signal)
+{
+ if (signal) {
+ // In signal context, degraded error reporting on state file failures
+ addSignalState(buf, bufLen);
+ return;
+ }
+ checkState(buf, bufLen);
+ Guard guard(stateMutex); // Serialize states
+ SpinGuard spinGuard;
+ {
+ // Write to main state file, overwriting previous state
+ int fd = myopen(_name);
+ mypwrite(_name, fd, buf, bufLen, 0);
+ myfsync(_name, fd);
+ myclose(_name, fd);
+ }
+ {
+ // Write to state file history, appending
+ int hfd = myopen(_historyName);
+ struct stat sthbuf;
+ myfstat(_historyName, hfd, sthbuf);
+ mypwrite(_historyName, hfd, buf, bufLen, sthbuf.st_size);
+ myfsync(_historyName, hfd);
+ myclose(_historyName, hfd);
+ }
+ ++_gen;
+}
+
+
+int
+StateFile::getGen() const
+{
+ return _gen;
+}
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/statefile.h b/searchlib/src/vespa/searchlib/util/statefile.h
new file mode 100644
index 00000000000..fddfc5bb8fc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/statefile.h
@@ -0,0 +1,106 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <atomic>
+
+namespace search
+{
+
+/*
+ * Class used to store application state in a mostly safe manner.
+ *
+ * It maintaines two files, one file with zero-padding at end to store
+ * last state, and another file with history of states.
+ *
+ * State files can not be shared between processes, file locking is not
+ * async signal safe.
+ *
+ * Standalone implementation (doesn't use fastos or vespalib) to
+ * ensure that we don't trigger callback hooks in fastos.
+ *
+ */
+class StateFile
+{
+ char *_name;
+ char *_historyName;
+ std::atomic<int> _gen;
+
+ /*
+ * Zero pad file, to ensure that a later write won't run out of space.
+ */
+ void
+ zeroPad();
+
+ /*
+ * Read state file to buffer in raw form, including padding.
+ */
+ void
+ readRawState(std::vector<char> &buf);
+
+ /*
+ * Trim padding and everything after state (i.e. stop at first newline).
+ */
+ static void
+ trimState(std::vector<char> &buf);
+
+ /*
+ * Trim partial state from end of history.
+ */
+ static void
+ trimHistory(std::vector<char> &history, const char *historyName, int hfd,
+ std::vector<char> &lastHistoryState);
+
+ /*
+ * Fixup history: trim partial state from end and append current state
+ * in state file to history if different from last state in history.
+ * If main state file doesn't have a state but history has a state then
+ * restore main state from history.
+ */
+ void
+ fixupHistory();
+
+ /*
+ * Check that state doesn't contain nul bytes or early newline and
+ * that it is terminated by a newline at end.
+ */
+ void
+ checkState(const char *buf, size_t bufLen) noexcept;
+
+ void
+ internalAddSignalState(const char *buf, size_t bufLen,
+ const char *name,
+ int appendFlag,
+ const char *openerr,
+ const char *writeerr,
+ const char *fsyncerr,
+ const char *closeerr) noexcept;
+
+ void
+ addSignalState(const char *buf, size_t bufLen) noexcept;
+public:
+ StateFile(const std::string &name);
+
+ ~StateFile();
+
+ void
+ addState(const char *buf, size_t bufLen, bool signal);
+
+ static void
+ erase(const std::string &name);
+
+ /*
+ * Read state file to buffer and trim it down to a state.
+ */
+ void
+ readState(std::vector<char> &buf);
+
+ /*
+ * Get current state generation (bumped whenever new state is written).
+ */
+ int
+ getGen(void) const;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/stringenum.cpp b/searchlib/src/vespa/searchlib/util/stringenum.cpp
new file mode 100644
index 00000000000..c89312e4276
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/stringenum.cpp
@@ -0,0 +1,131 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".seachlib.util.stringenum");
+#include <vespa/searchlib/util/stringenum.h>
+#include <vespa/fastlib/io/bufferedfile.h>
+
+namespace search {
+namespace util {
+
+static inline char *
+StripString(char *str)
+{
+ char *first = NULL; // first non-space char
+ char *last = NULL; // last non-space char
+
+ if (str == NULL)
+ return NULL;
+
+ for (; *str != '\0' && isspace(*str); str++);
+ first = str;
+
+ for (; *str != '\0'; str++)
+ if (!isspace(*str))
+ last = str;
+
+ if (last != NULL)
+ *(last + 1) = '\0';
+
+ return first;
+}
+
+StringEnum::~StringEnum()
+{
+}
+
+void
+StringEnum::CreateReverseMapping() const
+{
+ _reverseMap.resize(_numEntries);
+
+ for (Map::const_iterator it = _mapping.begin();
+ it != _mapping.end();
+ it++)
+ {
+ assert(it->second >= 0);
+ assert(it->second < (int)_numEntries);
+ _reverseMap[it->second] = it->first.c_str();
+ }
+}
+
+
+bool
+StringEnum::Save(const char *filename)
+{
+ char str[1024];
+
+ Fast_BufferedFile file;
+ file.WriteOpen(filename);
+ if (!file.IsOpened())
+ return false;
+
+ file.SetSize(0);
+ sprintf(str, "%d\n", _numEntries);
+ file.WriteString(str);
+
+ for (uint32_t i = 0; i < _numEntries; i++) {
+ file.WriteString(Lookup(i));
+ file.WriteString("\n");
+ }
+
+ file.Close();
+ return true;
+}
+
+
+bool
+StringEnum::Load(const char *filename)
+{
+ char line[1024];
+ char *pt;
+ uint32_t entries; // from first line of file
+ uint32_t lineNumber; // current line in file
+ uint32_t entryCnt; // # entries obtained from file
+
+ Clear();
+
+ Fast_BufferedFile file;
+ if (!file.OpenReadOnly(filename))
+ return false;
+
+ lineNumber = 0;
+ entryCnt = 0;
+
+ pt = StripString(file.ReadLine(line, sizeof(line)));
+ if (pt == NULL || *pt == '\0')
+ return false;
+ lineNumber++;
+
+ entries = atoi(pt);
+
+ while (!file.Eof()) {
+ pt = StripString(file.ReadLine(line, sizeof(line)));
+ if (pt == NULL) // end of input ?
+ break;
+ lineNumber++;
+ if (*pt == '\0') // empty line ?
+ continue;
+
+ uint32_t tmp = _numEntries;
+ if (static_cast<uint32_t>(Add(pt)) != tmp) {
+ LOG(error, "(%s:%d) duplicate enum entry: %s", filename, lineNumber, pt);
+ }
+ entryCnt++;
+ }
+
+ file.Close();
+ if (entries != _numEntries
+ || entries != entryCnt) {
+ Clear();
+ return false;
+ }
+ return true;
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/util/stringenum.h b/searchlib/src/vespa/searchlib/util/stringenum.h
new file mode 100644
index 00000000000..86a84261b10
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/stringenum.h
@@ -0,0 +1,147 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include <vector>
+#include <vespa/vespalib/stllike/hash_map.h>
+
+namespace search {
+namespace util {
+
+/**
+ * An object of this class represents an enumeration of a set of
+ * strings. This is useful for mapping a set of strings into a
+ * continuous range of integers.
+ **/
+class StringEnum
+{
+private:
+ StringEnum(const StringEnum &);
+ StringEnum& operator=(const StringEnum &);
+ typedef vespalib::hash_map<vespalib::string, int> Map;
+
+ uint32_t _numEntries;
+ Map _mapping;
+ mutable std::vector<const char *> _reverseMap;
+
+ /**
+ * Create a reverse mapping that enables the user to map integers
+ * into strings. This method is called by the Lookup(int) method.
+ **/
+ void CreateReverseMapping() const;
+
+public:
+
+ /**
+ * Create an empty string enumeration.
+ **/
+ StringEnum()
+ : _numEntries(0),
+ _mapping(),
+ _reverseMap()
+ {
+ }
+
+ /**
+ * Destructor.
+ **/
+ ~StringEnum();
+
+
+ /**
+ * Discard all entries held by this object.
+ **/
+ void Clear()
+ {
+ _reverseMap.clear();
+ _mapping.clear();
+ _numEntries = 0;
+ }
+
+
+ /**
+ * Add a string to this enumeration. Equal strings will get the same
+ * enumerated value. Different string will get different enumerated
+ * values. The set of values returned from multiple invocations of
+ * this method will always be a contiuous range beginning at 0.
+ *
+ * @return the enumerated value for the given string.
+ * @param str string you want to add.
+ **/
+ int Add(const char *str)
+ {
+ Map::const_iterator found(_mapping.find(str));
+ if (found != _mapping.end()) {
+ return found->second;
+ } else {
+ int value = _numEntries++;
+ _mapping[str] = value;
+ return value;
+ }
+ }
+
+
+ /**
+ * Obtain the enumerated value for the given string.
+ *
+ * @return enumerated value or -1 if not present.
+ * @param str the string to look up.
+ **/
+ int Lookup(const char *str) const
+ {
+ Map::const_iterator found(_mapping.find(str));
+ return (found != _mapping.end()) ? found->second : -1;
+ }
+
+
+ /**
+ * Obtain the string for the given enumerated value.
+ *
+ * @return string or NULL if out of range.
+ * @param value the enumerated value to look up.
+ **/
+ const char *Lookup(uint32_t value) const
+ {
+ if (value >= _numEntries)
+ return NULL;
+
+ if (_numEntries > _reverseMap.size())
+ CreateReverseMapping();
+
+ return _reverseMap[value];
+ }
+
+
+ /**
+ * Obtain the number of entries currently present in this
+ * enumeration.
+ *
+ * @return current number of entries.
+ **/
+ uint32_t GetNumEntries() const { return _numEntries; }
+
+
+ /**
+ * Save the enumeration currently held by this object to file.
+ *
+ * @return success(true)/fail(false).
+ * @param filename name of save file.
+ **/
+ bool Save(const char *filename);
+
+
+ /**
+ * Load an enumeration from file. The loaded enumeration will
+ * replace the one currently held by this object.
+ *
+ * @return success(true)/fail(false).
+ * @param filename name of file to load.
+ **/
+ bool Load(const char *filename);
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/util/url.cpp b/searchlib/src/vespa/searchlib/util/url.cpp
new file mode 100644
index 00000000000..d60ed222305
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/url.cpp
@@ -0,0 +1,555 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2000-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+/*
+ * Note for bugs / fixes:
+ * Please update fastserver4/test/regress/url/testurl.cpp
+ * with test for new url's when bugs are discovered/fixed.
+ */
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchlib/util/url.h>
+
+LOG_SETUP(".searchlib.util.url");
+
+namespace search {
+namespace util {
+bool
+URL::IsAlphaChar(unsigned char c) // According to RFC2396
+{
+ return (c>='A' && c<='Z') || (c>='a' && c<='z');
+}
+
+bool
+URL::IsDigitChar(unsigned char c) // According to RFC2396
+{
+ return (c>='0' && c<='9');
+}
+
+bool
+URL::IsMarkChar(unsigned char c) // According to RFC2396
+{
+ return (c=='-' || c=='_' || c=='.' || c=='!' || c=='~' ||
+ c=='*' || c=='\'' || c=='(' || c==')');
+}
+
+bool
+URL::IsUnreservedChar(unsigned char c) // According to RFC2396
+{
+ return (IsAlphaChar(c) ||
+ IsDigitChar(c) ||
+ IsMarkChar(c));
+}
+
+bool
+URL::IsEscapedChar(unsigned char c) // According to RFC2396
+{
+ // Cheat! Shoud be ('%' hex hex)
+ return (c=='%');
+}
+
+bool
+URL::IsReservedChar(unsigned char c) // According to RFC2396
+{
+ return (c==';' || c=='/' || c=='?' || c==':' || c=='@' ||
+ c=='&' || c=='=' || c=='+' || c=='$' || c==',');
+}
+
+bool
+URL::IsPChar(unsigned char c) // According to RFC2068
+{
+ return (IsUnreservedChar(c) ||
+ IsEscapedChar(c) ||
+ (c==':' || c=='@' || c=='&' || c=='=' || c=='+' ||
+ c=='$' || c==','));
+}
+bool
+URL::IsUricChar(unsigned char c) // According to RFC2068
+{
+ return (IsUnreservedChar(c) ||
+ IsEscapedChar(c) ||
+ IsReservedChar(c));
+}
+
+
+
+
+
+bool
+URL::IsSchemeChar(unsigned char c) // According to RFC2068
+{
+ return (IsAlphaChar(c) ||
+ IsDigitChar(c) ||
+ c=='+' || c=='-' || c=='.');
+}
+
+bool
+URL::IsHostChar(unsigned char c) // According to RFC2068
+{
+ return (IsAlphaChar(c) ||
+ IsDigitChar(c) ||
+ c=='.' || c=='+' || c=='-');
+}
+
+bool
+URL::IsPortChar(unsigned char c) // According to RFC2068
+{
+ return IsDigitChar(c);
+}
+
+bool
+URL::IsPathChar(unsigned char c) // According to RFC2068
+{
+ return (IsPChar(c) ||
+ c=='/' || c==';');
+}
+
+bool
+URL::IsFileNameChar(unsigned char c) // According to RFC2068
+{
+ return IsPChar(c);
+}
+
+bool
+URL::IsParamChar(unsigned char c) // According to RFC2068
+{
+ return IsPChar(c) || c=='/';
+}
+
+bool
+URL::IsParamsChar(unsigned char c) // According to RFC2068
+{
+ return IsParamChar(c) || c==';';
+}
+
+bool
+URL::IsQueryChar(unsigned char c) // According to RFC2068
+{
+ return IsUricChar(c);
+}
+
+bool
+URL::IsFragmentChar(unsigned char c) // According to RFC2068
+{
+ return IsUricChar(c);
+}
+
+bool
+URL::IsTokenChar(unsigned char c) // According to FAST URL tokenization
+{
+ return (IsAlphaChar(c) ||
+ IsDigitChar(c) ||
+ c == '_' || c == '-');
+}
+
+unsigned char *
+URL::ParseURLPart(unsigned char *src,
+ unsigned char *dest,
+ unsigned int destsize,
+ bool (*IsPartChar)(unsigned char c))
+{
+ unsigned char *p = src;
+ unsigned int len = 0;
+
+ while (IsPartChar(*p) && len<destsize-1) {
+ len++;
+ p++;
+ }
+ if (len > 0) {
+ strncpy(reinterpret_cast<char *>(dest),
+ reinterpret_cast<char *>(src), len);
+ dest[len] = '\0';
+ }
+
+ return p;
+}
+
+
+URL::URL(const unsigned char *url, size_t len) :
+ _maintld(_emptystring),
+ _tld(reinterpret_cast<const unsigned char *>("")),
+ _domain(reinterpret_cast<const unsigned char *>("")),
+ _tldregion(reinterpret_cast<const unsigned char *>("")),
+ _pathDepth(0),
+ _startScheme(&_token[sizeof(_token) - 1]),
+ _startHost(&_token[sizeof(_token) - 1]),
+ _startDomain(&_token[sizeof(_token) - 1]),
+ _startMainTld(&_token[sizeof(_token)-1]),
+ _startPort(&_token[sizeof(_token)-1]),
+ _startPath(&_token[sizeof(_token)-1]),
+ _startFileName(&_token[sizeof(_token) - 1]),
+ _startExtension(&_token[sizeof(_token) - 1]),
+ _startParams(&_token[sizeof(_token) - 1]),
+ _startQuery(&_token[sizeof(_token) - 1]),
+ _startFragment(&_token[sizeof(_token) - 1]),
+ _startAddress(&_token[sizeof(_token) - 1]),
+ _tokenPos(_url),
+ _gotCompleteURL(false)
+{
+ Reset();
+ if (url != NULL)
+ SetURL(url, len);
+}
+
+
+void
+URL::Reset(void)
+{
+ _gotCompleteURL = false;
+
+ _emptystring[0] = '\0';
+
+ _url[0] = '\0';
+ _scheme[0] = '\0';
+ _host[0] = '\0';
+ _siteowner[0] = '\0';
+ _port[0] = '\0';
+ _path[0] = '\0';
+ _filename[0] = '\0';
+ _extension[0] = '\0';
+ _params[0] = '\0';
+ _query[0] = '\0';
+ _fragment[0] = '\0';
+ _address[0] = '\0';
+ _maintld = _emptystring; // Hack needed to please langid.
+ _tld = (const unsigned char *) "";
+ _domain = (const unsigned char *) "";
+ _tldregion = (const unsigned char *) "";
+ _pathDepth = 0;
+
+ _token[0] = '\0';
+
+ _startScheme = &_token[sizeof(_token)-1];
+ _startHost = &_token[sizeof(_token)-1];
+ _startDomain = &_token[sizeof(_token)-1];
+ _startMainTld = &_token[sizeof(_token)-1];
+ _startPort = &_token[sizeof(_token)-1];
+ _startPath = &_token[sizeof(_token)-1];
+ _startFileName = &_token[sizeof(_token)-1];
+ _startExtension = &_token[sizeof(_token)-1];
+ _startParams = &_token[sizeof(_token)-1];
+ _startQuery = &_token[sizeof(_token)-1];
+ _startFragment = &_token[sizeof(_token)-1];
+ _startAddress = &_token[sizeof(_token)-1];
+
+ _tokenPos = _url;
+}
+
+void
+URL::SetURL(const unsigned char *url, size_t length)
+{
+ int len = 0;
+ unsigned char
+ *p, *ptmp, *siteowner = 0, *filename = 0, *extension = 0;
+
+ Reset();
+ if (length > MAX_URL_LEN) {
+ LOG(warning,
+ "Max link size overflow: len=%lu, max=%d",
+ static_cast<unsigned long>(length), MAX_URL_LEN);
+ length = MAX_URL_LEN;
+ }
+ if (length == 0)
+ length = MAX_URL_LEN;
+
+ strncpy(reinterpret_cast<char *>(_url),
+ reinterpret_cast<const char *>(url), length);
+ _url[length] = '\0';
+
+ p = _url;
+
+ // Look for ':' as the first non-scheme-char character. If so => scheme
+ for (p = _url, len = 0; *p != '\0' && IsSchemeChar(*p); p++, len++)
+ ;
+
+ if (*p++ == ':') {
+ strncpy(reinterpret_cast<char *>(_scheme),
+ reinterpret_cast<char *>(_url), len);
+ _scheme[len] = '\0';
+ _startScheme = _url;
+ } else
+ p = _url;
+
+ // get host name
+ if ((strncasecmp(reinterpret_cast<char *>(_scheme), "http", 4) == 0 &&
+ p[0] == '/' && p[1] == '/') ||
+ strncasecmp(reinterpret_cast<char *>(_url), "www.", 4) == 0) {
+ if (p[0] == '/' && p[1] == '/')
+ p += 2;
+ _startHost = p;
+ p = ParseURLPart(p, _host, sizeof(_host), IsHostChar);
+
+ // Locate siteowner. eg. 'www.sony.com' => 'sony'
+ if (_host[0] != '\0') {
+ unsigned char *pso;
+
+ int solen = 0;
+
+ // First check entries from config.
+ siteowner = pso = _host;
+
+ for (solen = 0; *pso != '\0'; pso++, solen++) {
+ if (*pso == '.') {
+ siteowner = pso + 1;
+ solen = -1;
+ }
+ }
+ _domain = siteowner;
+ _startDomain = _startHost + (siteowner - _host);
+ _startMainTld = _startDomain;
+
+ // Locate main-tld info.
+ ptmp = reinterpret_cast<unsigned char *>
+ (strrchr(reinterpret_cast<char *>(_host), '.'));
+ if (ptmp != NULL) {
+ _maintld = &ptmp[1];
+ _startMainTld = _startHost + (_maintld - _host);
+ if (*_tld == '\0') {
+ _tld = _maintld;
+ }
+ }
+
+ // If siteowner is not found in config entries use second latest word in host.
+ if (_siteowner[0] == '\0') {
+ pso = reinterpret_cast<unsigned char *>
+ (strrchr(reinterpret_cast<char *>(_host), '.'));
+ if (pso != NULL && pso > _host) {
+ pso--;
+ solen = 0;
+ while (pso > _host && *pso != '.') {
+ solen++;
+ pso--;
+ }
+ if (*pso != '.')
+ solen++;
+ else
+ pso++;
+ if (solen > 0) {
+ strncpy(reinterpret_cast<char *>(_siteowner),
+ reinterpret_cast<char *>(pso), solen);
+ _siteowner[solen] = '\0';
+ _startDomain = _startHost + (pso - _host);
+ _domain = pso;
+ }
+ }
+ }
+ }
+
+ // Parse port number
+ if (*p == ':') {
+ p++;
+ _startPort = p;
+ p = ParseURLPart(p, _port, sizeof(_port), IsDigitChar);
+ }
+ }
+
+ if (_scheme[0] == '\0' ||
+ strncasecmp(reinterpret_cast<char *>(_scheme), "http", 4) == 0) {
+ // Handle http url.
+
+ // Parse path, filename, extension.
+ _startPath = p;
+ p = ParseURLPart(p, _path, sizeof(_path), IsPathChar);
+
+ filename = _path;
+ if (IsFileNameChar(*filename))
+ _pathDepth++;
+ for (ptmp = _path ; *ptmp != '\0' && *ptmp != ';' ; ptmp++)
+ if (*ptmp == '/') {
+ filename = ptmp + 1;
+ if (IsFileNameChar(*filename))
+ _pathDepth++;
+ }
+ _startFileName = _startPath + (filename - _path);
+ ParseURLPart(filename, _filename, sizeof(_filename), IsFileNameChar);
+
+ extension = reinterpret_cast<unsigned char *>
+ (strrchr(reinterpret_cast<char *>(_filename), '.'));
+ if (extension != NULL) {
+ extension++;
+ strcpy(reinterpret_cast<char *>(_extension),
+ reinterpret_cast<char *>(extension));
+ _startExtension = _startFileName + (extension - _filename);
+ }
+
+ // Parse params part.
+ if ((ptmp = reinterpret_cast<unsigned char *>
+ (strchr(reinterpret_cast<char *>(_path), ';'))) != NULL) {
+ ptmp++;
+ _startParams = _startPath + (ptmp - _path);
+ ParseURLPart(ptmp, _params, sizeof(_params), IsParamsChar);
+ }
+
+ // Parse query part.
+ if (*p == '?') {
+ p++;
+ _startQuery = p;
+ p = ParseURLPart(p, _query, sizeof(_query), IsQueryChar);
+ }
+
+ // Parse fragment part
+ if (*p == '#') {
+ p++;
+ _startFragment = p;
+ p = ParseURLPart(p, _fragment, sizeof(_fragment), IsFragmentChar);
+ }
+
+ // stuff the rest into address
+ _startAddress = p;
+ strncpy(reinterpret_cast<char *>(_address),
+ reinterpret_cast<char *>(p), sizeof(_address) - 1);
+ _address[sizeof(_address) - 1] = '\0';
+ } else {
+ _startAddress = p;
+ strncpy(reinterpret_cast<char *>(_address),
+ reinterpret_cast<char *>(p), sizeof(_address) - 1);
+ _address[sizeof(_address) - 1] = '\0';
+ }
+}
+
+bool
+URL::IsBaseURL(void) const
+{
+ return (_scheme[0] != '\0' &&
+ _host[0] != '\0' &&
+ _path[0] == '/');
+}
+
+const unsigned char *
+URL::GetToken(URL_CONTEXT &ctx)
+{
+ int i = 0;
+
+ // Skip whitespace
+ while (!IsTokenChar(*_tokenPos) && *_tokenPos != '\0')
+ _tokenPos++;
+
+ while (IsTokenChar(*_tokenPos))
+ _token[i++] = *_tokenPos++;
+ _token[i] = '\0';
+
+ ctx = URL_SCHEME;
+ if (_tokenPos > _startHost)
+ ctx = URL_HOST;
+ if (_tokenPos > _startDomain)
+ ctx = URL_DOMAIN;
+ if (_tokenPos > _startMainTld)
+ ctx = URL_MAINTLD;
+ if (_tokenPos > _startPort)
+ ctx = URL_PORT;
+ if (_tokenPos > _startPath)
+ ctx = URL_PATH;
+ if (_tokenPos > _startFileName)
+ ctx = URL_FILENAME;
+ if (_tokenPos > _startExtension)
+ ctx = URL_EXTENSION;
+ if (_tokenPos > _startParams)
+ ctx = URL_PARAMS;
+ if (_tokenPos > _startQuery)
+ ctx = URL_QUERY;
+ if (_tokenPos > _startFragment)
+ ctx = URL_FRAGMENT;
+ if (_tokenPos > _startAddress)
+ ctx = URL_ADDRESS;
+
+ if (_token[0] != '\0')
+ return _token;
+ else
+ return NULL;
+}
+
+const char *
+URL::ContextName(URL_CONTEXT ctx)
+{
+ switch (ctx) {
+ case URL_SCHEME:
+ return "SCHEME";
+ case URL_HOST:
+ return "HOST";
+ case URL_DOMAIN:
+ return "DOMAIN";
+ case URL_MAINTLD:
+ return "MAINTLD";
+ case URL_PORT:
+ return "PORT";
+ case URL_PATH:
+ return "PATH";
+ case URL_FILENAME:
+ return "FILENAME";
+ case URL_EXTENSION:
+ return "EXTENSION";
+ case URL_PARAMS:
+ return "PARAMS";
+ case URL_QUERY:
+ return "QUERY";
+ case URL_FRAGMENT:
+ return "FRAGMENT";
+ case URL_ADDRESS:
+ return "ADDRESS";
+ }
+
+ return "UNKNOWN";
+}
+
+void
+URL::Dump(void)
+{
+ printf("URL: '%s'\n", _url);
+
+ if (_scheme[0] != '\0')
+ printf(" scheme: '%s'\n", _scheme);
+ if (_host[0] != '\0')
+ printf(" host: '%s'\n", _host);
+ if (_domain[0] != '\0')
+ printf(" domain: '%s'\n", _domain);
+ if (_siteowner[0] != '\0')
+ printf(" siteowner: '%s'\n", _siteowner);
+ if (_maintld[0] != '\0')
+ printf(" maintld: '%s'\n", _maintld);
+ if (_tld[0] != '\0')
+ printf(" tld: '%s'\n", _tld);
+ if (_tldregion[0] != '\0')
+ printf(" tldregion: '%s'\n", _tldregion);
+ if (_port[0] != '\0')
+ printf(" port: '%s'\n", _port);
+ if (_path[0] != '\0')
+ printf(" path: '%s'\n", _path);
+ if (_pathDepth != 0)
+ printf(" pathdepth: '%d'\n", _pathDepth);
+ if (_filename[0] != '\0')
+ printf(" filename: '%s'\n", _filename);
+ if (_extension[0] != '\0')
+ printf(" extension: '%s'\n", _extension);
+ if (_params[0] != '\0')
+ printf(" params: '%s'\n", _params);
+ if (_query[0] != '\0')
+ printf(" query: '%s'\n", _query);
+ if (_fragment[0] != '\0')
+ printf(" fragment: '%s'\n", _fragment);
+ if (_address[0] != '\0')
+ printf(" address: '%s'\n", _address);
+
+ printf("_startScheme: '%s'\n", _startScheme);
+ printf("_startHost: '%s'\n", _startHost);
+ printf("_startDomain: '%s'\n", _startDomain);
+ printf("_startMainTld: '%s'\n", _startMainTld);
+ printf("_startPort: '%s'\n", _startPort);
+ printf("_startPath: '%s'\n", _startPath);
+ printf("_startFileName: '%s'\n", _startFileName);
+ printf("_startExtension: '%s'\n", _startExtension);
+ printf("_startParams: '%s'\n", _startParams);
+ printf("_startQuery: '%s'\n", _startQuery);
+ printf("_startFragment: '%s'\n", _startFragment);
+ printf("_startAddress: '%s'\n", _startAddress);
+
+ const unsigned char *token;
+ URL_CONTEXT ctx;
+ while ((token = GetToken(ctx)) != NULL) {
+ printf("TOKEN: %s '%s'\n", ContextName(ctx), token);
+ }
+}
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/util/url.h b/searchlib/src/vespa/searchlib/util/url.h
new file mode 100644
index 00000000000..f700f0f79f1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/url.h
@@ -0,0 +1,277 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2000-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+#pragma once
+
+#ifndef MAX_URL_LEN
+#define MAX_URL_LEN 4096
+#endif
+
+/**
+ * Class that parses URL's and split them into
+ * a number of subelements. Detects different types
+ * of "URL's", such as http:, https:, ftp:, mailto:,
+ * file:, etc. Only http: and https: URL's are
+ * processed into smaller subelements. For http: and
+ * https: URL's, the parser tries to locate the name
+ * of the owner of the domain ('siteowner') by
+ * extracting the last word before the TLD from the
+ * domain part of the URL. A list of TLD's may be
+ * loaded to improve the siteowner extraction algorithm.
+ * The class handles relative as well as absolute URL's.
+ *
+ * Note that memory consumption is quite high for this version,
+ * roughly 40kB / instance.
+ */
+
+namespace search {
+namespace util {
+
+class URL
+{
+private:
+ URL(const URL &);
+ URL& operator=(const URL &);
+
+public:
+ enum URL_CONTEXT {
+ URL_SCHEME,
+ URL_HOST,
+ URL_DOMAIN,
+ URL_MAINTLD,
+ URL_PORT,
+ URL_PATH,
+ URL_FILENAME,
+ URL_EXTENSION,
+ URL_PARAMS,
+ URL_QUERY,
+ URL_FRAGMENT,
+ URL_ADDRESS
+ };
+
+protected:
+
+ unsigned char _url[MAX_URL_LEN+1];
+ unsigned char _scheme[MAX_URL_LEN+1];
+ unsigned char _host[MAX_URL_LEN+1];
+ unsigned char _siteowner[MAX_URL_LEN+1];
+ unsigned char _port[MAX_URL_LEN+1];
+ unsigned char _path[MAX_URL_LEN+1];
+ unsigned char _filename[MAX_URL_LEN+1];
+ unsigned char _extension[MAX_URL_LEN+1];
+ unsigned char _params[MAX_URL_LEN+1];
+ unsigned char _query[MAX_URL_LEN+1];
+ unsigned char _fragment[MAX_URL_LEN+1];
+ unsigned char _address[MAX_URL_LEN+1];
+ unsigned char *_maintld;
+ const unsigned char *_tld;
+ const unsigned char *_domain;
+ const unsigned char *_tldregion;
+ unsigned char _emptystring[1];
+ int _pathDepth;
+ unsigned char _token[MAX_URL_LEN+1];
+
+ unsigned char *_startScheme;
+ unsigned char *_startHost;
+ unsigned char *_startDomain;
+ unsigned char *_startMainTld;
+ unsigned char *_startPort;
+ unsigned char *_startPath;
+ unsigned char *_startFileName;
+ unsigned char *_startExtension;
+ unsigned char *_startParams;
+ unsigned char *_startQuery;
+ unsigned char *_startFragment;
+ unsigned char *_startAddress;
+ unsigned char *_tokenPos;
+
+ bool _gotCompleteURL;
+
+ void Reset(void);
+
+ static inline unsigned char *ParseURLPart(unsigned char *url,
+ unsigned char *buf,
+ unsigned int bufsize,
+ bool (*IsPartChar)(unsigned char c));
+
+public:
+ static inline bool IsAlphaChar(unsigned char c);
+ static inline bool IsDigitChar(unsigned char c);
+ static inline bool IsMarkChar(unsigned char c);
+ static inline bool IsUnreservedChar(unsigned char c);
+ static inline bool IsEscapedChar(unsigned char c);
+ static inline bool IsReservedChar(unsigned char c);
+ static inline bool IsUricChar(unsigned char c);
+ static inline bool IsPChar(unsigned char c);
+
+ static inline bool IsSchemeChar(unsigned char c);
+ static inline bool IsHostChar(unsigned char c);
+ static inline bool IsPortChar(unsigned char c);
+ static inline bool IsPathChar(unsigned char c);
+ static inline bool IsFileNameChar(unsigned char c);
+ static inline bool IsParamsChar(unsigned char c);
+ static inline bool IsParamChar(unsigned char c);
+ static inline bool IsQueryChar(unsigned char c);
+ static inline bool IsFragmentChar(unsigned char c);
+
+ static inline bool IsTokenChar(unsigned char c);
+
+ /**
+ * Defautl constructor. Optionally, the URL to be parsed may be given
+ * as a parameter.
+ *
+ * @param url The URL to parse.
+ * @param length The length of url.
+ */
+ URL(const unsigned char *url=0, size_t length=0);
+
+ /**
+ * Use a new URL to be parsed and split into subelements.
+ *
+ * @param url The URL to parse.
+ * @param length The length of url.
+ */
+ void SetURL(const unsigned char *url, size_t length=0);
+
+ /**
+ * Check if the current URL is a base (absolute) URL.
+ *
+ * @return true if this is an absolute URL, false otherwise.
+ */
+ bool IsBaseURL(void) const;
+
+ /**
+ * Get a pointer to the current URL.
+ * @return Pointer to string containing the URL, "" if none set.
+ */
+ const unsigned char *GetURL() const {return _url;}
+
+ /**
+ * Get the scheme part of the current URL (e.g. "http", "mailto", etc).
+ * @return Pointer to string containing the scheme, "" if none found.
+ */
+ const unsigned char *GetScheme() const {return _scheme;}
+
+ /**
+ * Get the host part of the current URL.
+ * @return Pointer to string containing the host name, "" if none found.
+ */
+ const unsigned char *GetHost() const {return _host;}
+
+ /**
+ * Get the domain part of the current URL.
+ * @return Pointer to string containing the domain name, "" if none found.
+ */
+ const unsigned char *GetDomain() const {return _domain;}
+
+ /**
+ * Get the siteowner part of the current URL.
+ * @return Pointer to string containing the siteowner, "" if none found.
+ */
+ const unsigned char *GetSiteOwner() const {return _siteowner;}
+
+ /**
+ * Get the region correlated to the document tld. I.e. 'no', 'com', etc.
+ * @return Pointer to string containing the tld name, "" if none found.
+ */
+ const unsigned char *GetMainTLD() const {return _maintld;}
+ unsigned char *GetMainTLD_NoConst() const {return _maintld;}
+
+ /**
+ * Similar til GetMainTLD, but includes tld's taken from the tldlist file;
+ * may return strings like 'co.uk.'.
+ * @return Pointer to string containing the tld name, "" if none found.
+ */
+ const unsigned char *GetTLD() const {return _tld;}
+
+ /**
+ * Get the region correlated to the document tld. I.e. 'europe' for '.no'.
+ * @return Pointer to string containing the region name, "" if none found.
+ */
+ const unsigned char *GetTLDRegion() const {return _tldregion;}
+
+ /**
+ * Get the port part of the current URL.
+ * @return Pointer to string containing the port, "" if none found.
+ */
+ const unsigned char *GetPort() const {return _port;}
+
+ /**
+ * Get the path part of the current URL.
+ * @return Pointer to string containing the path, "" if none found.
+ */
+ const unsigned char *GetPath() const {return _path;}
+
+ /**
+ * Get the path part of the current URL.
+ * @return Pointer to string containing the path, "" if none found.
+ */
+ unsigned int GetPathDepth() const {return _pathDepth;}
+
+ /**
+ * Get the filename part of the current URL.
+ * @return Pointer to string containing the filename, "" if none found.
+ */
+ const unsigned char *GetFilename() const {return _filename;}
+
+ /**
+ * Get the filename extension of the current URL.
+ * @return Pointer to string containing the extension, "" if none found.
+ */
+ const unsigned char *GetExtension() const {return _extension;}
+
+ /**
+ * Get the params information part of the current URL. This is the part
+ * of the URL located between the filename and the params parts of the URL.
+ * @return Pointer to string containing the params part, "" if none found.
+ */
+ const unsigned char *GetParams() const {return _params;}
+
+ /**
+ * Get the query information part of the current URL. This is the part
+ * of the URL located between the path and the fragment parts of the URL.
+ * @return Pointer to string containing the param part, "" if none found.
+ */
+ const unsigned char *GetQuery() const {return _query;}
+
+ /**
+ * Get the fragment part of the current URL. This is
+ * treated as everythin behind any '#' character in the URL.
+ * @return Pointer to string containing the fragment, "" if none found.
+ */
+ const unsigned char *GetFragment() const {return _fragment;}
+
+ /**
+ * Get the adress part of the current URL. In the current version,
+ * this is everything behind the type field if different from
+ * http: and https:.
+ * @return Pointer to string containing the address, "" if none found.
+ */
+ const unsigned char *GetAddress() const {return _address;}
+
+ /**
+ * Get tokens with corresponding context information from the current url.
+ * The first call to this function will return the first token in the url.
+ * This function may be called repetedly untill the value NULL is returned.
+ * @return Pointer to string containing the token, NULL when all tokens have
+ * been returned.
+ */
+ const unsigned char *GetToken(URL_CONTEXT &ctx);
+
+ /**
+ * Get a pointer to a string that contains the name of a given context.
+ * @return Pointer to string containing the name of a given contexttoken.
+ */
+ const char *ContextName(URL_CONTEXT ctx);
+
+ /**
+ * Dump the contents of the URL and subelements to stdout. Only
+ * elements that contains information are shown.
+ */
+ void Dump(void);
+};
+
+}
+}
+
+
diff --git a/searchlib/testrun/.gitignore b/searchlib/testrun/.gitignore
new file mode 100644
index 00000000000..559f57dccbe
--- /dev/null
+++ b/searchlib/testrun/.gitignore
@@ -0,0 +1,9 @@
+test-report.html
+test-report.html.*
+test.*.*.desc
+test.*.*.file.*
+test.*.*.files.html
+test.*.*.log
+tmp.*
+/test.*.*.result
+Makefile